picky 1.5.1 → 1.5.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/sources/db.rb +33 -37
- data/spec/lib/sources/db_spec.rb +3 -3
- metadata +1 -1
data/lib/picky/sources/db.rb
CHANGED
@@ -40,6 +40,8 @@ module Sources
|
|
40
40
|
def create_database_adapter # :nodoc:
|
41
41
|
# TODO Do not use ActiveRecord directly.
|
42
42
|
#
|
43
|
+
# TODO Use set_table_name etc.
|
44
|
+
#
|
43
45
|
adapter_class = Class.new ActiveRecord::Base
|
44
46
|
adapter_class.abstract_class = true
|
45
47
|
adapter_class
|
@@ -76,56 +78,58 @@ module Sources
|
|
76
78
|
#
|
77
79
|
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
78
80
|
#
|
79
|
-
def take_snapshot
|
81
|
+
def take_snapshot index
|
80
82
|
connect_backend
|
81
83
|
|
82
|
-
origin = snapshot_table_name
|
83
|
-
|
84
|
+
origin = snapshot_table_name index
|
84
85
|
on_database = database.connection
|
85
86
|
|
86
|
-
|
87
|
+
# Drop the table if it exists.
|
88
|
+
#
|
89
|
+
on_database.drop_table origin if on_database.table_exists?(origin)
|
90
|
+
|
91
|
+
# The adapters currently do not support this.
|
92
|
+
#
|
87
93
|
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
88
|
-
|
94
|
+
|
95
|
+
# Add a column that Picky uses to traverse the table's entries.
|
89
96
|
#
|
90
|
-
|
91
|
-
on_database.execute "ALTER TABLE #{origin} ADD COLUMN #{@@traversal_id} SERIAL PRIMARY KEY"
|
92
|
-
else
|
93
|
-
on_database.execute "ALTER TABLE #{origin} ADD COLUMN #{@@traversal_id} INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
|
94
|
-
end
|
97
|
+
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
|
95
98
|
|
96
|
-
# Execute any special queries this
|
99
|
+
# Execute any special queries this index needs executed.
|
97
100
|
#
|
98
|
-
on_database.execute
|
101
|
+
on_database.execute index.after_indexing if index.after_indexing
|
99
102
|
end
|
100
103
|
|
101
104
|
# Counts all the entries that are used for the index.
|
102
105
|
#
|
103
|
-
def count
|
106
|
+
def count index
|
104
107
|
connect_backend
|
105
108
|
|
106
|
-
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(
|
109
|
+
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
|
107
110
|
end
|
108
111
|
|
112
|
+
# The name of the snapshot table created by Picky.
|
109
113
|
#
|
110
|
-
|
111
|
-
|
112
|
-
"#{type.name}_type_index"
|
114
|
+
def snapshot_table_name index
|
115
|
+
"picky_#{index.name}_index"
|
113
116
|
end
|
114
117
|
|
115
118
|
# Harvests the data to index in chunks.
|
116
119
|
#
|
117
|
-
def harvest
|
120
|
+
def harvest index, category, &block
|
118
121
|
connect_backend
|
119
122
|
|
120
|
-
(0..count(
|
121
|
-
get_data
|
123
|
+
(0..count(index)).step(chunksize) do |offset|
|
124
|
+
get_data index, category, offset, &block
|
122
125
|
end
|
123
126
|
end
|
124
127
|
|
125
128
|
# Gets the data from the backend.
|
126
129
|
#
|
127
|
-
def get_data
|
128
|
-
|
130
|
+
def get_data index, category, offset, &block # :nodoc:
|
131
|
+
|
132
|
+
select_statement = harvest_statement_with_offset index, category, offset
|
129
133
|
|
130
134
|
# TODO Rewrite ASAP.
|
131
135
|
#
|
@@ -134,25 +138,19 @@ module Sources
|
|
134
138
|
text_key = category.from.to_s
|
135
139
|
database.connection.execute(select_statement).each do |hash|
|
136
140
|
id, text = hash.values_at id_key, text_key
|
137
|
-
|
138
|
-
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
139
|
-
yield id, text
|
141
|
+
yield id, text if text
|
140
142
|
end
|
141
143
|
else
|
142
144
|
database.connection.execute(select_statement).each do |id, text|
|
143
|
-
|
144
|
-
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
145
|
-
yield id, text
|
145
|
+
yield id, text if text
|
146
146
|
end
|
147
147
|
end
|
148
148
|
end
|
149
149
|
|
150
150
|
# Builds a harvest statement for getting data to index.
|
151
151
|
#
|
152
|
-
|
153
|
-
|
154
|
-
def harvest_statement_with_offset type, category, offset # :nodoc:
|
155
|
-
statement = harvest_statement type, category
|
152
|
+
def harvest_statement_with_offset index, category, offset
|
153
|
+
statement = harvest_statement index, category
|
156
154
|
|
157
155
|
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
158
156
|
|
@@ -161,15 +159,13 @@ module Sources
|
|
161
159
|
|
162
160
|
# The harvest statement used to pull data from the snapshot table.
|
163
161
|
#
|
164
|
-
def harvest_statement
|
165
|
-
"SELECT id, #{category.from} FROM #{snapshot_table_name(
|
162
|
+
def harvest_statement index, category
|
163
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
|
166
164
|
end
|
167
165
|
|
168
166
|
# The amount of records that are loaded each chunk.
|
169
167
|
#
|
170
|
-
def chunksize
|
171
|
-
# TODO Make parametrizable.
|
172
|
-
#
|
168
|
+
def chunksize
|
173
169
|
25_000
|
174
170
|
end
|
175
171
|
|
data/spec/lib/sources/db_spec.rb
CHANGED
@@ -30,7 +30,7 @@ describe Sources::DB do
|
|
30
30
|
|
31
31
|
@connection.should_receive(:execute).
|
32
32
|
once.
|
33
|
-
with('SELECT id, some_category FROM
|
33
|
+
with('SELECT id, some_category FROM picky_some_type_index st WHERE st.__picky_id > some_offset LIMIT 25000').
|
34
34
|
and_return []
|
35
35
|
|
36
36
|
@source.get_data @type, @category, :some_offset
|
@@ -40,7 +40,7 @@ describe Sources::DB do
|
|
40
40
|
it 'yields to the caller' do
|
41
41
|
@connection.should_receive(:execute).
|
42
42
|
any_number_of_times.
|
43
|
-
with('SELECT id, some_category FROM
|
43
|
+
with('SELECT id, some_category FROM picky_some_type_index st WHERE st.__picky_id > some_offset LIMIT 25000').
|
44
44
|
and_return [[1, 'text']]
|
45
45
|
|
46
46
|
@source.get_data @type, @category, :some_offset do |id, text|
|
@@ -86,7 +86,7 @@ describe Sources::DB do
|
|
86
86
|
end
|
87
87
|
it "should get the id count" do
|
88
88
|
result = stub(:result, :to_i => 12_345)
|
89
|
-
@connection.should_receive(:select_value).once.with("SELECT COUNT(__picky_id) FROM
|
89
|
+
@connection.should_receive(:select_value).once.with("SELECT COUNT(__picky_id) FROM picky_some_type_name_index")
|
90
90
|
|
91
91
|
@source.count @type
|
92
92
|
end
|