picky 1.5.1 → 1.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/sources/db.rb +33 -37
- data/spec/lib/sources/db_spec.rb +3 -3
- metadata +1 -1
data/lib/picky/sources/db.rb
CHANGED
@@ -40,6 +40,8 @@ module Sources
|
|
40
40
|
def create_database_adapter # :nodoc:
|
41
41
|
# TODO Do not use ActiveRecord directly.
|
42
42
|
#
|
43
|
+
# TODO Use set_table_name etc.
|
44
|
+
#
|
43
45
|
adapter_class = Class.new ActiveRecord::Base
|
44
46
|
adapter_class.abstract_class = true
|
45
47
|
adapter_class
|
@@ -76,56 +78,58 @@ module Sources
|
|
76
78
|
#
|
77
79
|
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
78
80
|
#
|
79
|
-
def take_snapshot
|
81
|
+
def take_snapshot index
|
80
82
|
connect_backend
|
81
83
|
|
82
|
-
origin = snapshot_table_name
|
83
|
-
|
84
|
+
origin = snapshot_table_name index
|
84
85
|
on_database = database.connection
|
85
86
|
|
86
|
-
|
87
|
+
# Drop the table if it exists.
|
88
|
+
#
|
89
|
+
on_database.drop_table origin if on_database.table_exists?(origin)
|
90
|
+
|
91
|
+
# The adapters currently do not support this.
|
92
|
+
#
|
87
93
|
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
88
|
-
|
94
|
+
|
95
|
+
# Add a column that Picky uses to traverse the table's entries.
|
89
96
|
#
|
90
|
-
|
91
|
-
on_database.execute "ALTER TABLE #{origin} ADD COLUMN #{@@traversal_id} SERIAL PRIMARY KEY"
|
92
|
-
else
|
93
|
-
on_database.execute "ALTER TABLE #{origin} ADD COLUMN #{@@traversal_id} INTEGER NOT NULL PRIMARY KEY AUTO_INCREMENT"
|
94
|
-
end
|
97
|
+
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
|
95
98
|
|
96
|
-
# Execute any special queries this
|
99
|
+
# Execute any special queries this index needs executed.
|
97
100
|
#
|
98
|
-
on_database.execute
|
101
|
+
on_database.execute index.after_indexing if index.after_indexing
|
99
102
|
end
|
100
103
|
|
101
104
|
# Counts all the entries that are used for the index.
|
102
105
|
#
|
103
|
-
def count
|
106
|
+
def count index
|
104
107
|
connect_backend
|
105
108
|
|
106
|
-
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(
|
109
|
+
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
|
107
110
|
end
|
108
111
|
|
112
|
+
# The name of the snapshot table created by Picky.
|
109
113
|
#
|
110
|
-
|
111
|
-
|
112
|
-
"#{type.name}_type_index"
|
114
|
+
def snapshot_table_name index
|
115
|
+
"picky_#{index.name}_index"
|
113
116
|
end
|
114
117
|
|
115
118
|
# Harvests the data to index in chunks.
|
116
119
|
#
|
117
|
-
def harvest
|
120
|
+
def harvest index, category, &block
|
118
121
|
connect_backend
|
119
122
|
|
120
|
-
(0..count(
|
121
|
-
get_data
|
123
|
+
(0..count(index)).step(chunksize) do |offset|
|
124
|
+
get_data index, category, offset, &block
|
122
125
|
end
|
123
126
|
end
|
124
127
|
|
125
128
|
# Gets the data from the backend.
|
126
129
|
#
|
127
|
-
def get_data
|
128
|
-
|
130
|
+
def get_data index, category, offset, &block # :nodoc:
|
131
|
+
|
132
|
+
select_statement = harvest_statement_with_offset index, category, offset
|
129
133
|
|
130
134
|
# TODO Rewrite ASAP.
|
131
135
|
#
|
@@ -134,25 +138,19 @@ module Sources
|
|
134
138
|
text_key = category.from.to_s
|
135
139
|
database.connection.execute(select_statement).each do |hash|
|
136
140
|
id, text = hash.values_at id_key, text_key
|
137
|
-
|
138
|
-
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
139
|
-
yield id, text
|
141
|
+
yield id, text if text
|
140
142
|
end
|
141
143
|
else
|
142
144
|
database.connection.execute(select_statement).each do |id, text|
|
143
|
-
|
144
|
-
text.force_encoding 'utf-8' # TODO Still needed? Or move to backend?
|
145
|
-
yield id, text
|
145
|
+
yield id, text if text
|
146
146
|
end
|
147
147
|
end
|
148
148
|
end
|
149
149
|
|
150
150
|
# Builds a harvest statement for getting data to index.
|
151
151
|
#
|
152
|
-
|
153
|
-
|
154
|
-
def harvest_statement_with_offset type, category, offset # :nodoc:
|
155
|
-
statement = harvest_statement type, category
|
152
|
+
def harvest_statement_with_offset index, category, offset
|
153
|
+
statement = harvest_statement index, category
|
156
154
|
|
157
155
|
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
158
156
|
|
@@ -161,15 +159,13 @@ module Sources
|
|
161
159
|
|
162
160
|
# The harvest statement used to pull data from the snapshot table.
|
163
161
|
#
|
164
|
-
def harvest_statement
|
165
|
-
"SELECT id, #{category.from} FROM #{snapshot_table_name(
|
162
|
+
def harvest_statement index, category
|
163
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
|
166
164
|
end
|
167
165
|
|
168
166
|
# The amount of records that are loaded each chunk.
|
169
167
|
#
|
170
|
-
def chunksize
|
171
|
-
# TODO Make parametrizable.
|
172
|
-
#
|
168
|
+
def chunksize
|
173
169
|
25_000
|
174
170
|
end
|
175
171
|
|
data/spec/lib/sources/db_spec.rb
CHANGED
@@ -30,7 +30,7 @@ describe Sources::DB do
|
|
30
30
|
|
31
31
|
@connection.should_receive(:execute).
|
32
32
|
once.
|
33
|
-
with('SELECT id, some_category FROM
|
33
|
+
with('SELECT id, some_category FROM picky_some_type_index st WHERE st.__picky_id > some_offset LIMIT 25000').
|
34
34
|
and_return []
|
35
35
|
|
36
36
|
@source.get_data @type, @category, :some_offset
|
@@ -40,7 +40,7 @@ describe Sources::DB do
|
|
40
40
|
it 'yields to the caller' do
|
41
41
|
@connection.should_receive(:execute).
|
42
42
|
any_number_of_times.
|
43
|
-
with('SELECT id, some_category FROM
|
43
|
+
with('SELECT id, some_category FROM picky_some_type_index st WHERE st.__picky_id > some_offset LIMIT 25000').
|
44
44
|
and_return [[1, 'text']]
|
45
45
|
|
46
46
|
@source.get_data @type, @category, :some_offset do |id, text|
|
@@ -86,7 +86,7 @@ describe Sources::DB do
|
|
86
86
|
end
|
87
87
|
it "should get the id count" do
|
88
88
|
result = stub(:result, :to_i => 12_345)
|
89
|
-
@connection.should_receive(:select_value).once.with("SELECT COUNT(__picky_id) FROM
|
89
|
+
@connection.should_receive(:select_value).once.with("SELECT COUNT(__picky_id) FROM picky_some_type_name_index")
|
90
90
|
|
91
91
|
@source.count @type
|
92
92
|
end
|