searchkick 1.3.4 → 1.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -3
- data/CHANGELOG.md +6 -0
- data/Gemfile +1 -0
- data/README.md +31 -11
- data/lib/searchkick.rb +14 -11
- data/lib/searchkick/index.rb +10 -357
- data/lib/searchkick/index_options.rb +359 -0
- data/lib/searchkick/query.rb +44 -29
- data/lib/searchkick/version.rb +1 -1
- data/test/aggs_test.rb +8 -10
- data/test/boost_test.rb +2 -2
- data/test/errors_test.rb +1 -1
- data/test/index_test.rb +9 -1
- data/test/query_test.rb +6 -1
- data/test/sql_test.rb +32 -1
- data/test/test_helper.rb +11 -9
- data/test/where_test.rb +7 -0
- metadata +4 -4
@@ -0,0 +1,359 @@
|
|
1
|
+
module Searchkick
|
2
|
+
module IndexOptions
|
3
|
+
def index_options
|
4
|
+
options = @options
|
5
|
+
language = options[:language]
|
6
|
+
language = language.call if language.respond_to?(:call)
|
7
|
+
|
8
|
+
if options[:mappings] && !options[:merge_mappings]
|
9
|
+
settings = options[:settings] || {}
|
10
|
+
mappings = options[:mappings]
|
11
|
+
else
|
12
|
+
below22 = Searchkick.server_below?("2.2.0")
|
13
|
+
below50 = Searchkick.server_below?("5.0.0-alpha1")
|
14
|
+
default_type = below50 ? "string" : "text"
|
15
|
+
default_analyzer = below50 ? :default_index : :default
|
16
|
+
keyword_mapping =
|
17
|
+
if below50
|
18
|
+
{
|
19
|
+
type: default_type,
|
20
|
+
index: "not_analyzed"
|
21
|
+
}
|
22
|
+
else
|
23
|
+
{
|
24
|
+
type: "keyword"
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
keyword_mapping[:ignore_above] = 256 unless below22
|
29
|
+
|
30
|
+
settings = {
|
31
|
+
analysis: {
|
32
|
+
analyzer: {
|
33
|
+
searchkick_keyword: {
|
34
|
+
type: "custom",
|
35
|
+
tokenizer: "keyword",
|
36
|
+
filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
|
37
|
+
},
|
38
|
+
default_analyzer => {
|
39
|
+
type: "custom",
|
40
|
+
# character filters -> tokenizer -> token filters
|
41
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
42
|
+
char_filter: ["ampersand"],
|
43
|
+
tokenizer: "standard",
|
44
|
+
# synonym should come last, after stemming and shingle
|
45
|
+
# shingle must come before searchkick_stemmer
|
46
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
47
|
+
},
|
48
|
+
searchkick_search: {
|
49
|
+
type: "custom",
|
50
|
+
char_filter: ["ampersand"],
|
51
|
+
tokenizer: "standard",
|
52
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
53
|
+
},
|
54
|
+
searchkick_search2: {
|
55
|
+
type: "custom",
|
56
|
+
char_filter: ["ampersand"],
|
57
|
+
tokenizer: "standard",
|
58
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
|
59
|
+
},
|
60
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
61
|
+
searchkick_autocomplete_index: {
|
62
|
+
type: "custom",
|
63
|
+
tokenizer: "searchkick_autocomplete_ngram",
|
64
|
+
filter: ["lowercase", "asciifolding"]
|
65
|
+
},
|
66
|
+
searchkick_autocomplete_search: {
|
67
|
+
type: "custom",
|
68
|
+
tokenizer: "keyword",
|
69
|
+
filter: ["lowercase", "asciifolding"]
|
70
|
+
},
|
71
|
+
searchkick_word_search: {
|
72
|
+
type: "custom",
|
73
|
+
tokenizer: "standard",
|
74
|
+
filter: ["lowercase", "asciifolding"]
|
75
|
+
},
|
76
|
+
searchkick_suggest_index: {
|
77
|
+
type: "custom",
|
78
|
+
tokenizer: "standard",
|
79
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
80
|
+
},
|
81
|
+
searchkick_text_start_index: {
|
82
|
+
type: "custom",
|
83
|
+
tokenizer: "keyword",
|
84
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
85
|
+
},
|
86
|
+
searchkick_text_middle_index: {
|
87
|
+
type: "custom",
|
88
|
+
tokenizer: "keyword",
|
89
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
90
|
+
},
|
91
|
+
searchkick_text_end_index: {
|
92
|
+
type: "custom",
|
93
|
+
tokenizer: "keyword",
|
94
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
95
|
+
},
|
96
|
+
searchkick_word_start_index: {
|
97
|
+
type: "custom",
|
98
|
+
tokenizer: "standard",
|
99
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
100
|
+
},
|
101
|
+
searchkick_word_middle_index: {
|
102
|
+
type: "custom",
|
103
|
+
tokenizer: "standard",
|
104
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
105
|
+
},
|
106
|
+
searchkick_word_end_index: {
|
107
|
+
type: "custom",
|
108
|
+
tokenizer: "standard",
|
109
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
110
|
+
}
|
111
|
+
},
|
112
|
+
filter: {
|
113
|
+
searchkick_index_shingle: {
|
114
|
+
type: "shingle",
|
115
|
+
token_separator: ""
|
116
|
+
},
|
117
|
+
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
118
|
+
searchkick_search_shingle: {
|
119
|
+
type: "shingle",
|
120
|
+
token_separator: "",
|
121
|
+
output_unigrams: false,
|
122
|
+
output_unigrams_if_no_shingles: true
|
123
|
+
},
|
124
|
+
searchkick_suggest_shingle: {
|
125
|
+
type: "shingle",
|
126
|
+
max_shingle_size: 5
|
127
|
+
},
|
128
|
+
searchkick_edge_ngram: {
|
129
|
+
type: "edgeNGram",
|
130
|
+
min_gram: 1,
|
131
|
+
max_gram: 50
|
132
|
+
},
|
133
|
+
searchkick_ngram: {
|
134
|
+
type: "nGram",
|
135
|
+
min_gram: 1,
|
136
|
+
max_gram: 50
|
137
|
+
},
|
138
|
+
searchkick_stemmer: {
|
139
|
+
# use stemmer if language is lowercase, snowball otherwise
|
140
|
+
# TODO deprecate language option in favor of stemmer
|
141
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
142
|
+
language: language || "English"
|
143
|
+
}
|
144
|
+
},
|
145
|
+
char_filter: {
|
146
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
147
|
+
# &_to_and
|
148
|
+
ampersand: {
|
149
|
+
type: "mapping",
|
150
|
+
mappings: ["&=> and "]
|
151
|
+
}
|
152
|
+
},
|
153
|
+
tokenizer: {
|
154
|
+
searchkick_autocomplete_ngram: {
|
155
|
+
type: "edgeNGram",
|
156
|
+
min_gram: 1,
|
157
|
+
max_gram: 50
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
}
|
162
|
+
|
163
|
+
if Searchkick.env == "test"
|
164
|
+
settings[:number_of_shards] = 1
|
165
|
+
settings[:number_of_replicas] = 0
|
166
|
+
end
|
167
|
+
|
168
|
+
if options[:similarity]
|
169
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
170
|
+
end
|
171
|
+
|
172
|
+
settings.deep_merge!(options[:settings] || {})
|
173
|
+
|
174
|
+
# synonyms
|
175
|
+
synonyms = options[:synonyms] || []
|
176
|
+
|
177
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
178
|
+
|
179
|
+
if synonyms.any?
|
180
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
181
|
+
type: "synonym",
|
182
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") }
|
183
|
+
}
|
184
|
+
# choosing a place for the synonym filter when stemming is not easy
|
185
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
186
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
187
|
+
|
188
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
189
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
190
|
+
# - Only apply the synonym expansion at index time
|
191
|
+
# - Don't have the synonym filter applied search
|
192
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
193
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym")
|
194
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
|
195
|
+
|
196
|
+
%w(word_start word_middle word_end).each do |type|
|
197
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
198
|
+
end
|
199
|
+
end
|
200
|
+
|
201
|
+
if options[:wordnet]
|
202
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
203
|
+
type: "synonym",
|
204
|
+
format: "wordnet",
|
205
|
+
synonyms_path: Searchkick.wordnet_path
|
206
|
+
}
|
207
|
+
|
208
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
|
209
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
|
210
|
+
|
211
|
+
%w(word_start word_middle word_end).each do |type|
|
212
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
|
213
|
+
end
|
214
|
+
end
|
215
|
+
|
216
|
+
if options[:special_characters] == false
|
217
|
+
settings[:analysis][:analyzer].each do |_, analyzer_settings|
|
218
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
219
|
+
end
|
220
|
+
end
|
221
|
+
|
222
|
+
mapping = {}
|
223
|
+
|
224
|
+
# conversions
|
225
|
+
Array(options[:conversions]).each do |conversions_field|
|
226
|
+
mapping[conversions_field] = {
|
227
|
+
type: "nested",
|
228
|
+
properties: {
|
229
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
230
|
+
count: {type: "integer"}
|
231
|
+
}
|
232
|
+
}
|
233
|
+
end
|
234
|
+
|
235
|
+
mapping_options = Hash[
|
236
|
+
[:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable, :only_analyzed]
|
237
|
+
.map { |type| [type, (options[type] || []).map(&:to_s)] }
|
238
|
+
]
|
239
|
+
|
240
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
241
|
+
|
242
|
+
mapping_options.values.flatten.uniq.each do |field|
|
243
|
+
fields = {}
|
244
|
+
|
245
|
+
if mapping_options[:only_analyzed].include?(field) || (options.key?(:filterable) && !mapping_options[:filterable].include?(field))
|
246
|
+
fields[field] = {type: default_type, index: "no"}
|
247
|
+
else
|
248
|
+
fields[field] = keyword_mapping
|
249
|
+
end
|
250
|
+
|
251
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
252
|
+
if word
|
253
|
+
fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer}
|
254
|
+
|
255
|
+
if mapping_options[:highlight].include?(field)
|
256
|
+
fields["analyzed"][:term_vector] = "with_positions_offsets"
|
257
|
+
end
|
258
|
+
end
|
259
|
+
|
260
|
+
mapping_options.except(:highlight, :searchable, :filterable, :only_analyzed, :word).each do |type, f|
|
261
|
+
if options[:match] == type || f.include?(field)
|
262
|
+
fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"}
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
mapping[field] =
|
268
|
+
if below50
|
269
|
+
{
|
270
|
+
type: "multi_field",
|
271
|
+
fields: fields
|
272
|
+
}
|
273
|
+
elsif fields[field]
|
274
|
+
fields[field].merge(fields: fields.except(field))
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
279
|
+
mapping[field] = {
|
280
|
+
type: "geo_point"
|
281
|
+
}
|
282
|
+
end
|
283
|
+
|
284
|
+
(options[:unsearchable] || []).map(&:to_s).each do |field|
|
285
|
+
mapping[field] = {
|
286
|
+
type: default_type,
|
287
|
+
index: "no"
|
288
|
+
}
|
289
|
+
end
|
290
|
+
|
291
|
+
routing = {}
|
292
|
+
if options[:routing]
|
293
|
+
routing = {required: true}
|
294
|
+
unless options[:routing] == true
|
295
|
+
routing[:path] = options[:routing].to_s
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
dynamic_fields = {
|
300
|
+
# analyzed field must be the default field for include_in_all
|
301
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
302
|
+
# however, we can include the not_analyzed field in _all
|
303
|
+
# and the _all index analyzer will take care of it
|
304
|
+
"{name}" => keyword_mapping.merge(include_in_all: !options[:searchable])
|
305
|
+
}
|
306
|
+
|
307
|
+
if options.key?(:filterable)
|
308
|
+
dynamic_fields["{name}"] = {type: default_type, index: "no"}
|
309
|
+
end
|
310
|
+
|
311
|
+
dynamic_fields["{name}"][:ignore_above] = 256 unless below22
|
312
|
+
|
313
|
+
unless options[:searchable]
|
314
|
+
if options[:match] && options[:match] != :word
|
315
|
+
dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"}
|
316
|
+
end
|
317
|
+
|
318
|
+
if word
|
319
|
+
dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"}
|
320
|
+
end
|
321
|
+
end
|
322
|
+
|
323
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
324
|
+
multi_field =
|
325
|
+
if below50
|
326
|
+
{
|
327
|
+
type: "multi_field",
|
328
|
+
fields: dynamic_fields
|
329
|
+
}
|
330
|
+
else
|
331
|
+
dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
332
|
+
end
|
333
|
+
|
334
|
+
mappings = {
|
335
|
+
_default_: {
|
336
|
+
_all: {type: default_type, index: "analyzed", analyzer: default_analyzer},
|
337
|
+
properties: mapping,
|
338
|
+
_routing: routing,
|
339
|
+
# https://gist.github.com/kimchy/2898285
|
340
|
+
dynamic_templates: [
|
341
|
+
{
|
342
|
+
string_template: {
|
343
|
+
match: "*",
|
344
|
+
match_mapping_type: "string",
|
345
|
+
mapping: multi_field
|
346
|
+
}
|
347
|
+
}
|
348
|
+
]
|
349
|
+
}
|
350
|
+
}.deep_merge(options[:mappings] || {})
|
351
|
+
end
|
352
|
+
|
353
|
+
{
|
354
|
+
settings: settings,
|
355
|
+
mappings: mappings
|
356
|
+
}
|
357
|
+
end
|
358
|
+
end
|
359
|
+
end
|
data/lib/searchkick/query.rb
CHANGED
@@ -10,8 +10,7 @@ module Searchkick
|
|
10
10
|
:took, :error, :model_name, :entry_name, :total_count, :total_entries,
|
11
11
|
:current_page, :per_page, :limit_value, :padding, :total_pages, :num_pages,
|
12
12
|
:offset_value, :offset, :previous_page, :prev_page, :next_page, :first_page?, :last_page?,
|
13
|
-
:out_of_range?, :hits
|
14
|
-
|
13
|
+
:out_of_range?, :hits, :response, :to_a, :first
|
15
14
|
|
16
15
|
def initialize(klass, term, options = {})
|
17
16
|
if term.is_a?(Hash)
|
@@ -30,6 +29,12 @@ module Searchkick
|
|
30
29
|
@options = options
|
31
30
|
@match_suffix = options[:match] || searchkick_options[:match] || "analyzed"
|
32
31
|
|
32
|
+
# prevent Ruby warnings
|
33
|
+
@type = nil
|
34
|
+
@routing = nil
|
35
|
+
@misspellings_below = nil
|
36
|
+
@highlighted_fields = nil
|
37
|
+
|
33
38
|
prepare
|
34
39
|
end
|
35
40
|
|
@@ -59,8 +64,9 @@ module Searchkick
|
|
59
64
|
index: index,
|
60
65
|
body: body
|
61
66
|
}
|
62
|
-
params
|
63
|
-
params
|
67
|
+
params[:type] = @type if @type
|
68
|
+
params[:routing] = @routing if @routing
|
69
|
+
params.merge!(options[:request_params]) if options[:request_params]
|
64
70
|
params
|
65
71
|
end
|
66
72
|
|
@@ -86,7 +92,7 @@ module Searchkick
|
|
86
92
|
|
87
93
|
# no easy way to tell which host the client will use
|
88
94
|
host = Searchkick.client.transport.hosts.first
|
89
|
-
credentials =
|
95
|
+
credentials = host[:user] || host[:password] ? "#{host[:user]}:#{host[:password]}@" : nil
|
90
96
|
"curl #{host[:protocol]}://#{credentials}#{host[:host]}:#{host[:port]}/#{CGI.escape(index)}#{type ? "/#{type.map { |t| CGI.escape(t) }.join(',')}" : ''}/_search?pretty -d '#{query[:body].to_json}'"
|
91
97
|
end
|
92
98
|
|
@@ -159,10 +165,10 @@ module Searchkick
|
|
159
165
|
padding = [options[:padding].to_i, 0].max
|
160
166
|
offset = options[:offset] || (page - 1) * per_page + padding
|
161
167
|
|
162
|
-
# model and
|
168
|
+
# model and eager loading
|
163
169
|
load = options[:load].nil? ? true : options[:load]
|
164
170
|
|
165
|
-
conversions_fields = Array(options[:conversions] || searchkick_options[:conversions])
|
171
|
+
conversions_fields = Array(options[:conversions] || searchkick_options[:conversions]).map(&:to_s)
|
166
172
|
personalize_field = searchkick_options[:personalize]
|
167
173
|
|
168
174
|
all = term == "*"
|
@@ -257,7 +263,7 @@ module Searchkick
|
|
257
263
|
f = field.split(".")[0..-2].join(".")
|
258
264
|
queries << {match: {f => shared_options.merge(analyzer: "keyword")}}
|
259
265
|
else
|
260
|
-
analyzer = field
|
266
|
+
analyzer = field =~ /\.word_(start|middle|end)\z/ ? "searchkick_word_search" : "searchkick_autocomplete_search"
|
261
267
|
qs << shared_options.merge(analyzer: analyzer)
|
262
268
|
end
|
263
269
|
|
@@ -286,22 +292,22 @@ module Searchkick
|
|
286
292
|
{field_value_factor: {field: "#{conversions_field}.count"}}
|
287
293
|
end
|
288
294
|
|
289
|
-
shoulds <<
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
295
|
+
shoulds << {
|
296
|
+
nested: {
|
297
|
+
path: conversions_field,
|
298
|
+
score_mode: "sum",
|
299
|
+
query: {
|
300
|
+
function_score: {
|
301
|
+
boost_mode: "replace",
|
302
|
+
query: {
|
303
|
+
match: {
|
304
|
+
"#{conversions_field}.query" => term
|
305
|
+
}
|
306
|
+
}
|
307
|
+
}.merge(script_score)
|
308
|
+
}
|
309
|
+
}
|
310
|
+
}
|
305
311
|
end
|
306
312
|
payload = {
|
307
313
|
bool: {
|
@@ -378,13 +384,22 @@ module Searchkick
|
|
378
384
|
payload[:fields] = options[:select] if options[:select] != true
|
379
385
|
elsif options[:select_v2]
|
380
386
|
if options[:select_v2] == []
|
381
|
-
|
387
|
+
# intuitively [] makes sense to return no fields, but ES by default returns all fields
|
388
|
+
if below50?
|
389
|
+
payload[:fields] = []
|
390
|
+
else
|
391
|
+
payload[:_source] = false
|
392
|
+
end
|
382
393
|
else
|
383
394
|
payload[:_source] = options[:select_v2]
|
384
395
|
end
|
385
396
|
elsif load
|
386
397
|
# don't need any fields since we're going to load them from the DB anyways
|
387
|
-
|
398
|
+
if below50?
|
399
|
+
payload[:fields] = []
|
400
|
+
else
|
401
|
+
payload[:_source] = false
|
402
|
+
end
|
388
403
|
end
|
389
404
|
|
390
405
|
if options[:type] || (klass != searchkick_klass && searchkick_index)
|
@@ -399,7 +414,7 @@ module Searchkick
|
|
399
414
|
payload = payload.deep_merge(options[:body_options]) if options[:body_options]
|
400
415
|
|
401
416
|
@body = payload
|
402
|
-
@facet_limits
|
417
|
+
@facet_limits ||= {}
|
403
418
|
@page = page
|
404
419
|
@per_page = per_page
|
405
420
|
@padding = padding
|
@@ -751,8 +766,8 @@ module Searchkick
|
|
751
766
|
filters << {bool: {must_not: term_filters(field, op_value)}}
|
752
767
|
end
|
753
768
|
when :all
|
754
|
-
op_value.each do |
|
755
|
-
filters << term_filters(field,
|
769
|
+
op_value.each do |val|
|
770
|
+
filters << term_filters(field, val)
|
756
771
|
end
|
757
772
|
when :in
|
758
773
|
filters << term_filters(field, op_value)
|