searchkick 0.5.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile +2 -0
- data/README.md +17 -3
- data/gemfiles/mongoid4.gemfile +1 -1
- data/lib/searchkick.rb +12 -2
- data/lib/searchkick/index.rb +67 -0
- data/lib/searchkick/model.rb +8 -4
- data/lib/searchkick/query.rb +441 -0
- data/lib/searchkick/reindex.rb +15 -23
- data/lib/searchkick/results.rb +81 -6
- data/lib/searchkick/search.rb +4 -387
- data/lib/searchkick/similar.rb +1 -1
- data/lib/searchkick/version.rb +1 -1
- data/searchkick.gemspec +5 -6
- data/test/index_test.rb +6 -3
- data/test/query_test.rb +14 -0
- data/test/sql_test.rb +16 -6
- data/test/test_helper.rb +6 -8
- metadata +24 -33
- data/lib/searchkick/logger.rb +0 -19
data/lib/searchkick/reindex.rb
CHANGED
@@ -2,36 +2,28 @@ module Searchkick
|
|
2
2
|
module Reindex
|
3
3
|
|
4
4
|
# https://gist.github.com/jarosan/3124884
|
5
|
+
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
5
6
|
def reindex
|
6
7
|
alias_name = searchkick_index.name
|
7
|
-
|
8
|
-
index =
|
8
|
+
new_name = alias_name + "_" + Time.now.strftime("%Y%m%d%H%M%S%L")
|
9
|
+
index = Searchkick::Index.new(new_name)
|
9
10
|
|
10
11
|
clean_indices
|
11
12
|
|
12
|
-
|
13
|
-
raise index.response.to_s if !success
|
13
|
+
index.create searchkick_index_options
|
14
14
|
|
15
|
-
if
|
15
|
+
# check if alias exists
|
16
|
+
if Searchkick.client.indices.exists_alias(name: alias_name)
|
16
17
|
searchkick_import(index) # import before swap
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
response = a.save
|
24
|
-
|
25
|
-
if response.success?
|
26
|
-
clean_indices
|
27
|
-
else
|
28
|
-
raise response.to_s
|
29
|
-
end
|
19
|
+
# get existing indices to remove
|
20
|
+
old_indices = Searchkick.client.indices.get_alias(name: alias_name).keys
|
21
|
+
actions = old_indices.map{|name| {remove: {index: name, alias: alias_name}} } + [{add: {index: new_name, alias: alias_name}}]
|
22
|
+
Searchkick.client.indices.update_aliases body: {actions: actions}
|
23
|
+
clean_indices
|
30
24
|
else
|
31
25
|
searchkick_index.delete if searchkick_index.exists?
|
32
|
-
|
33
|
-
raise response.to_s if !response.success?
|
34
|
-
|
26
|
+
Searchkick.client.indices.update_aliases body: {actions: [{add: {index: new_name, alias: alias_name}}]}
|
35
27
|
searchkick_import(index) # import after swap
|
36
28
|
end
|
37
29
|
|
@@ -42,10 +34,10 @@ module Searchkick
|
|
42
34
|
|
43
35
|
# remove old indices that start w/ index_name
|
44
36
|
def clean_indices
|
45
|
-
all_indices =
|
37
|
+
all_indices = Searchkick.client.indices.get_aliases
|
46
38
|
indices = all_indices.select{|k, v| v["aliases"].empty? && k =~ /\A#{Regexp.escape(searchkick_index.name)}_\d{14,17}\z/ }.keys
|
47
39
|
indices.each do |index|
|
48
|
-
|
40
|
+
Searchkick::Index.new(index).delete
|
49
41
|
end
|
50
42
|
indices
|
51
43
|
end
|
@@ -73,7 +65,7 @@ module Searchkick
|
|
73
65
|
items = []
|
74
66
|
scope.all.each do |item|
|
75
67
|
items << item if item.should_index?
|
76
|
-
if items.length
|
68
|
+
if items.length == batch_size
|
77
69
|
index.import items
|
78
70
|
items = []
|
79
71
|
end
|
data/lib/searchkick/results.rb
CHANGED
@@ -1,14 +1,47 @@
|
|
1
1
|
module Searchkick
|
2
|
-
class Results
|
2
|
+
class Results
|
3
|
+
include Enumerable
|
4
|
+
extend Forwardable
|
5
|
+
|
6
|
+
attr_reader :klass, :response, :options
|
7
|
+
|
8
|
+
def_delegators :results, :each, :empty?, :size, :slice, :[], :to_ary
|
9
|
+
|
10
|
+
def initialize(klass, response, options = {})
|
11
|
+
@klass = klass
|
12
|
+
@response = response
|
13
|
+
@options = options
|
14
|
+
end
|
15
|
+
|
16
|
+
def results
|
17
|
+
@results ||= begin
|
18
|
+
if options[:load]
|
19
|
+
hit_ids = hits.map{|hit| hit["_id"] }
|
20
|
+
records = klass
|
21
|
+
if options[:includes]
|
22
|
+
records = records.includes(options[:includes])
|
23
|
+
end
|
24
|
+
records = records.find(hit_ids)
|
25
|
+
hit_ids = hit_ids.map(&:to_s)
|
26
|
+
records.sort_by{|r| hit_ids.index(r.id.to_s) }
|
27
|
+
else
|
28
|
+
hits
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
3
32
|
|
4
33
|
def suggestions
|
5
|
-
if
|
6
|
-
|
34
|
+
if response["suggest"]
|
35
|
+
response["suggest"].values.flat_map{|v| v.first["options"] }.sort_by{|o| -o["score"] }.map{|o| o["text"] }.uniq
|
7
36
|
else
|
8
37
|
raise "Pass `suggest: true` to the search method for suggestions"
|
9
38
|
end
|
10
39
|
end
|
11
40
|
|
41
|
+
def each_with_hit(&block)
|
42
|
+
results.zip(hits).each(&block)
|
43
|
+
end
|
44
|
+
|
12
45
|
def with_details
|
13
46
|
each_with_hit.map do |model, hit|
|
14
47
|
details = {}
|
@@ -19,9 +52,51 @@ module Searchkick
|
|
19
52
|
end
|
20
53
|
end
|
21
54
|
|
22
|
-
|
23
|
-
|
24
|
-
|
55
|
+
def facets
|
56
|
+
response["facets"]
|
57
|
+
end
|
58
|
+
|
59
|
+
def model_name
|
60
|
+
klass.model_name
|
61
|
+
end
|
62
|
+
|
63
|
+
def total_count
|
64
|
+
response["hits"]["total"]
|
65
|
+
end
|
66
|
+
|
67
|
+
def current_page
|
68
|
+
options[:page]
|
25
69
|
end
|
70
|
+
|
71
|
+
def per_page
|
72
|
+
options[:per_page]
|
73
|
+
end
|
74
|
+
|
75
|
+
def total_pages
|
76
|
+
(total_count / per_page.to_f).ceil
|
77
|
+
end
|
78
|
+
|
79
|
+
def limit_value
|
80
|
+
per_page
|
81
|
+
end
|
82
|
+
|
83
|
+
def offset_value
|
84
|
+
current_page * per_page
|
85
|
+
end
|
86
|
+
|
87
|
+
def previous_page
|
88
|
+
current_page > 1 ? (current_page - 1) : nil
|
89
|
+
end
|
90
|
+
|
91
|
+
def next_page
|
92
|
+
current_page < total_pages ? (current_page + 1) : nil
|
93
|
+
end
|
94
|
+
|
95
|
+
protected
|
96
|
+
|
97
|
+
def hits
|
98
|
+
@response["hits"]["hits"]
|
99
|
+
end
|
100
|
+
|
26
101
|
end
|
27
102
|
end
|
data/lib/searchkick/search.rb
CHANGED
@@ -2,395 +2,12 @@ module Searchkick
|
|
2
2
|
module Search
|
3
3
|
|
4
4
|
def search(term, options = {})
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
query = Searchkick::Query.new(self, term, options)
|
6
|
+
if options[:execute] == false
|
7
|
+
query
|
8
8
|
else
|
9
|
-
|
9
|
+
query.execute
|
10
10
|
end
|
11
|
-
|
12
|
-
fields =
|
13
|
-
if options[:fields]
|
14
|
-
if options[:autocomplete]
|
15
|
-
options[:fields].map{|f| "#{f}.autocomplete" }
|
16
|
-
else
|
17
|
-
options[:fields].map do |value|
|
18
|
-
k, v = value.is_a?(Hash) ? value.to_a.first : [value, :word]
|
19
|
-
"#{k}.#{v == :word ? "analyzed" : v}"
|
20
|
-
end
|
21
|
-
end
|
22
|
-
else
|
23
|
-
if options[:autocomplete]
|
24
|
-
(searchkick_options[:autocomplete] || []).map{|f| "#{f}.autocomplete" }
|
25
|
-
else
|
26
|
-
["_all"]
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
operator = options[:partial] ? "or" : "and"
|
31
|
-
|
32
|
-
# model and eagar loading
|
33
|
-
load = options[:load].nil? ? true : options[:load]
|
34
|
-
load = (options[:include] ? {include: options[:include]} : true) if load
|
35
|
-
|
36
|
-
# pagination
|
37
|
-
page = [options[:page].to_i, 1].max
|
38
|
-
per_page = (options[:limit] || options[:per_page] || 100000).to_i
|
39
|
-
offset = options[:offset] || (page - 1) * per_page
|
40
|
-
index_name = options[:index_name] || searchkick_index.name
|
41
|
-
|
42
|
-
conversions_field = searchkick_options[:conversions]
|
43
|
-
personalize_field = searchkick_options[:personalize]
|
44
|
-
|
45
|
-
all = term == "*"
|
46
|
-
|
47
|
-
if options[:query]
|
48
|
-
payload = options[:query]
|
49
|
-
elsif options[:similar]
|
50
|
-
payload = {
|
51
|
-
more_like_this: {
|
52
|
-
fields: fields,
|
53
|
-
like_text: term,
|
54
|
-
min_doc_freq: 1,
|
55
|
-
min_term_freq: 1,
|
56
|
-
analyzer: "searchkick_search2"
|
57
|
-
}
|
58
|
-
}
|
59
|
-
elsif all
|
60
|
-
payload = {
|
61
|
-
match_all: {}
|
62
|
-
}
|
63
|
-
else
|
64
|
-
if options[:autocomplete]
|
65
|
-
payload = {
|
66
|
-
multi_match: {
|
67
|
-
fields: fields,
|
68
|
-
query: term,
|
69
|
-
analyzer: "searchkick_autocomplete_search"
|
70
|
-
}
|
71
|
-
}
|
72
|
-
else
|
73
|
-
queries = []
|
74
|
-
fields.each do |field|
|
75
|
-
if field == "_all" or field.end_with?(".analyzed")
|
76
|
-
shared_options = {
|
77
|
-
fields: [field],
|
78
|
-
query: term,
|
79
|
-
use_dis_max: false,
|
80
|
-
operator: operator,
|
81
|
-
cutoff_frequency: 0.001
|
82
|
-
}
|
83
|
-
queries.concat [
|
84
|
-
{multi_match: shared_options.merge(boost: 10, analyzer: "searchkick_search")},
|
85
|
-
{multi_match: shared_options.merge(boost: 10, analyzer: "searchkick_search2")}
|
86
|
-
]
|
87
|
-
if options[:misspellings] != false
|
88
|
-
distance = (options[:misspellings].is_a?(Hash) && options[:misspellings][:distance]) || 1
|
89
|
-
queries.concat [
|
90
|
-
{multi_match: shared_options.merge(fuzziness: distance, max_expansions: 3, analyzer: "searchkick_search")},
|
91
|
-
{multi_match: shared_options.merge(fuzziness: distance, max_expansions: 3, analyzer: "searchkick_search2")}
|
92
|
-
]
|
93
|
-
end
|
94
|
-
else
|
95
|
-
analyzer = field.match(/\.word_(start|middle|end)\z/) ? "searchkick_word_search" : "searchkick_autocomplete_search"
|
96
|
-
queries << {
|
97
|
-
multi_match: {
|
98
|
-
fields: [field],
|
99
|
-
query: term,
|
100
|
-
analyzer: analyzer
|
101
|
-
}
|
102
|
-
}
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
payload = {
|
107
|
-
dis_max: {
|
108
|
-
queries: queries
|
109
|
-
}
|
110
|
-
}
|
111
|
-
end
|
112
|
-
|
113
|
-
if conversions_field and options[:conversions] != false
|
114
|
-
# wrap payload in a bool query
|
115
|
-
payload = {
|
116
|
-
bool: {
|
117
|
-
must: payload,
|
118
|
-
should: {
|
119
|
-
nested: {
|
120
|
-
path: conversions_field,
|
121
|
-
score_mode: "total",
|
122
|
-
query: {
|
123
|
-
custom_score: {
|
124
|
-
query: {
|
125
|
-
match: {
|
126
|
-
query: term
|
127
|
-
}
|
128
|
-
},
|
129
|
-
script: "doc['count'].value"
|
130
|
-
}
|
131
|
-
}
|
132
|
-
}
|
133
|
-
}
|
134
|
-
}
|
135
|
-
}
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
custom_filters = []
|
140
|
-
|
141
|
-
if options[:boost]
|
142
|
-
custom_filters << {
|
143
|
-
filter: {
|
144
|
-
exists: {
|
145
|
-
field: options[:boost]
|
146
|
-
}
|
147
|
-
},
|
148
|
-
script: "log(doc['#{options[:boost]}'].value + 2.718281828)"
|
149
|
-
}
|
150
|
-
end
|
151
|
-
|
152
|
-
if options[:user_id] and personalize_field
|
153
|
-
custom_filters << {
|
154
|
-
filter: {
|
155
|
-
term: {
|
156
|
-
personalize_field => options[:user_id]
|
157
|
-
}
|
158
|
-
},
|
159
|
-
boost: 100
|
160
|
-
}
|
161
|
-
end
|
162
|
-
|
163
|
-
if options[:personalize]
|
164
|
-
custom_filters << {
|
165
|
-
filter: {
|
166
|
-
term: options[:personalize]
|
167
|
-
},
|
168
|
-
boost: 100
|
169
|
-
}
|
170
|
-
end
|
171
|
-
|
172
|
-
if custom_filters.any?
|
173
|
-
payload = {
|
174
|
-
custom_filters_score: {
|
175
|
-
query: payload,
|
176
|
-
filters: custom_filters,
|
177
|
-
score_mode: "total"
|
178
|
-
}
|
179
|
-
}
|
180
|
-
end
|
181
|
-
|
182
|
-
payload = {
|
183
|
-
query: payload,
|
184
|
-
size: per_page,
|
185
|
-
from: offset
|
186
|
-
}
|
187
|
-
payload[:explain] = options[:explain] if options[:explain]
|
188
|
-
|
189
|
-
# order
|
190
|
-
if options[:order]
|
191
|
-
order = options[:order].is_a?(Enumerable) ? options[:order] : {options[:order] => :asc}
|
192
|
-
payload[:sort] = Hash[ order.map{|k, v| [k.to_s == "id" ? :_id : k, v] } ]
|
193
|
-
end
|
194
|
-
|
195
|
-
term_filters =
|
196
|
-
proc do |field, value|
|
197
|
-
if value.is_a?(Array) # in query
|
198
|
-
if value.any?
|
199
|
-
{or: value.map{|v| term_filters.call(field, v) }}
|
200
|
-
else
|
201
|
-
{terms: {field => value}} # match nothing
|
202
|
-
end
|
203
|
-
elsif value.nil?
|
204
|
-
{missing: {"field" => field, existence: true, null_value: true}}
|
205
|
-
else
|
206
|
-
{term: {field => value}}
|
207
|
-
end
|
208
|
-
end
|
209
|
-
|
210
|
-
# where
|
211
|
-
where_filters =
|
212
|
-
proc do |where|
|
213
|
-
filters = []
|
214
|
-
(where || {}).each do |field, value|
|
215
|
-
field = :_id if field.to_s == "id"
|
216
|
-
|
217
|
-
if field == :or
|
218
|
-
value.each do |or_clause|
|
219
|
-
filters << {or: or_clause.map{|or_statement| {and: where_filters.call(or_statement)} }}
|
220
|
-
end
|
221
|
-
else
|
222
|
-
# expand ranges
|
223
|
-
if value.is_a?(Range)
|
224
|
-
value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
|
225
|
-
end
|
226
|
-
|
227
|
-
if value.is_a?(Hash)
|
228
|
-
value.each do |op, op_value|
|
229
|
-
case op
|
230
|
-
when :within, :bottom_right
|
231
|
-
# do nothing
|
232
|
-
when :near
|
233
|
-
filters << {
|
234
|
-
geo_distance: {
|
235
|
-
field => op_value.map(&:to_f).reverse,
|
236
|
-
distance: value[:within] || "50mi"
|
237
|
-
}
|
238
|
-
}
|
239
|
-
when :top_left
|
240
|
-
filters << {
|
241
|
-
geo_bounding_box: {
|
242
|
-
field => {
|
243
|
-
top_left: op_value.map(&:to_f).reverse,
|
244
|
-
bottom_right: value[:bottom_right].map(&:to_f).reverse
|
245
|
-
}
|
246
|
-
}
|
247
|
-
}
|
248
|
-
when :not # not equal
|
249
|
-
filters << {not: term_filters.call(field, op_value)}
|
250
|
-
when :all
|
251
|
-
filters << {terms: {field => op_value, execution: "and"}}
|
252
|
-
else
|
253
|
-
range_query =
|
254
|
-
case op
|
255
|
-
when :gt
|
256
|
-
{from: op_value, include_lower: false}
|
257
|
-
when :gte
|
258
|
-
{from: op_value, include_lower: true}
|
259
|
-
when :lt
|
260
|
-
{to: op_value, include_upper: false}
|
261
|
-
when :lte
|
262
|
-
{to: op_value, include_upper: true}
|
263
|
-
else
|
264
|
-
raise "Unknown where operator"
|
265
|
-
end
|
266
|
-
# issue 132
|
267
|
-
if existing = filters.find{ |f| f[:range] && f[:range][field] }
|
268
|
-
existing[:range][field].merge!(range_query)
|
269
|
-
else
|
270
|
-
filters << {range: {field => range_query}}
|
271
|
-
end
|
272
|
-
end
|
273
|
-
end
|
274
|
-
else
|
275
|
-
filters << term_filters.call(field, value)
|
276
|
-
end
|
277
|
-
end
|
278
|
-
end
|
279
|
-
filters
|
280
|
-
end
|
281
|
-
|
282
|
-
# filters
|
283
|
-
filters = where_filters.call(options[:where])
|
284
|
-
if filters.any?
|
285
|
-
payload[:filter] = {
|
286
|
-
and: filters
|
287
|
-
}
|
288
|
-
end
|
289
|
-
|
290
|
-
# facets
|
291
|
-
facet_limits = {}
|
292
|
-
if options[:facets]
|
293
|
-
facets = options[:facets] || {}
|
294
|
-
if facets.is_a?(Array) # convert to more advanced syntax
|
295
|
-
facets = Hash[ facets.map{|f| [f, {}] } ]
|
296
|
-
end
|
297
|
-
|
298
|
-
payload[:facets] = {}
|
299
|
-
facets.each do |field, facet_options|
|
300
|
-
# ask for extra facets due to
|
301
|
-
# https://github.com/elasticsearch/elasticsearch/issues/1305
|
302
|
-
|
303
|
-
if facet_options[:ranges]
|
304
|
-
payload[:facets][field] = {
|
305
|
-
range: {
|
306
|
-
field.to_sym => facet_options[:ranges]
|
307
|
-
}
|
308
|
-
}
|
309
|
-
else
|
310
|
-
payload[:facets][field] = {
|
311
|
-
terms: {
|
312
|
-
field: field,
|
313
|
-
size: facet_options[:limit] ? facet_options[:limit] + 150 : 100000
|
314
|
-
}
|
315
|
-
}
|
316
|
-
end
|
317
|
-
|
318
|
-
facet_limits[field] = facet_options[:limit] if facet_options[:limit]
|
319
|
-
|
320
|
-
# offset is not possible
|
321
|
-
# http://elasticsearch-users.115913.n3.nabble.com/Is-pagination-possible-in-termsStatsFacet-td3422943.html
|
322
|
-
|
323
|
-
facet_filters = where_filters.call(facet_options[:where])
|
324
|
-
if facet_filters.any?
|
325
|
-
payload[:facets][field][:facet_filter] = {
|
326
|
-
and: {
|
327
|
-
filters: facet_filters
|
328
|
-
}
|
329
|
-
}
|
330
|
-
end
|
331
|
-
end
|
332
|
-
end
|
333
|
-
|
334
|
-
# suggestions
|
335
|
-
if options[:suggest]
|
336
|
-
suggest_fields = (searchkick_options[:suggest] || []).map(&:to_s)
|
337
|
-
# intersection
|
338
|
-
suggest_fields = suggest_fields & options[:fields].map(&:to_s) if options[:fields]
|
339
|
-
if suggest_fields.any?
|
340
|
-
payload[:suggest] = {text: term}
|
341
|
-
suggest_fields.each do |field|
|
342
|
-
payload[:suggest][field] = {
|
343
|
-
phrase: {
|
344
|
-
field: "#{field}.suggest"
|
345
|
-
}
|
346
|
-
}
|
347
|
-
end
|
348
|
-
end
|
349
|
-
end
|
350
|
-
|
351
|
-
# highlight
|
352
|
-
if options[:highlight]
|
353
|
-
payload[:highlight] = {
|
354
|
-
fields: Hash[ fields.map{|f| [f, {}] } ]
|
355
|
-
}
|
356
|
-
if options[:highlight].is_a?(Hash) and tag = options[:highlight][:tag]
|
357
|
-
payload[:highlight][:pre_tags] = [tag]
|
358
|
-
payload[:highlight][:post_tags] = [tag.to_s.gsub(/\A</, "</")]
|
359
|
-
end
|
360
|
-
end
|
361
|
-
|
362
|
-
# An empty array will cause only the _id and _type for each hit to be returned
|
363
|
-
# http://www.elasticsearch.org/guide/reference/api/search/fields/
|
364
|
-
payload[:fields] = [] if load
|
365
|
-
|
366
|
-
tire_options = {load: load, payload: payload, size: per_page, from: offset}
|
367
|
-
if options[:type] or self != searchkick_klass
|
368
|
-
tire_options[:type] = [options[:type] || self].flatten.map(&:document_type)
|
369
|
-
end
|
370
|
-
search = Tire::Search::Search.new(index_name, tire_options)
|
371
|
-
begin
|
372
|
-
response = search.json
|
373
|
-
rescue Tire::Search::SearchRequestFailed => e
|
374
|
-
status_code = e.message[0..3].to_i
|
375
|
-
if status_code == 404
|
376
|
-
raise "Index missing - run #{searchkick_klass.name}.reindex"
|
377
|
-
elsif status_code == 500 and (e.message.include?("IllegalArgumentException[minimumSimilarity >= 1]") or e.message.include?("No query registered for [multi_match]"))
|
378
|
-
raise "Upgrade Elasticsearch to 0.90.0 or greater"
|
379
|
-
else
|
380
|
-
raise e
|
381
|
-
end
|
382
|
-
end
|
383
|
-
|
384
|
-
# apply facet limit in client due to
|
385
|
-
# https://github.com/elasticsearch/elasticsearch/issues/1305
|
386
|
-
facet_limits.each do |field, limit|
|
387
|
-
field = field.to_s
|
388
|
-
facet = response["facets"][field]
|
389
|
-
response["facets"][field]["terms"] = facet["terms"].first(limit)
|
390
|
-
response["facets"][field]["other"] = facet["total"] - facet["terms"].sum{|term| term["count"] }
|
391
|
-
end
|
392
|
-
|
393
|
-
Searchkick::Results.new(response, search.options.merge(term: term))
|
394
11
|
end
|
395
12
|
|
396
13
|
end
|