searchkick 4.4.0 → 5.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +160 -3
- data/LICENSE.txt +1 -1
- data/README.md +567 -421
- data/lib/searchkick/bulk_reindex_job.rb +12 -8
- data/lib/searchkick/controller_runtime.rb +40 -0
- data/lib/searchkick/index.rb +167 -74
- data/lib/searchkick/index_cache.rb +30 -0
- data/lib/searchkick/index_options.rb +465 -404
- data/lib/searchkick/indexer.rb +15 -8
- data/lib/searchkick/log_subscriber.rb +57 -0
- data/lib/searchkick/middleware.rb +9 -2
- data/lib/searchkick/model.rb +50 -51
- data/lib/searchkick/process_batch_job.rb +9 -25
- data/lib/searchkick/process_queue_job.rb +4 -3
- data/lib/searchkick/query.rb +106 -77
- data/lib/searchkick/record_data.rb +1 -1
- data/lib/searchkick/record_indexer.rb +136 -51
- data/lib/searchkick/reindex_queue.rb +51 -9
- data/lib/searchkick/reindex_v2_job.rb +10 -34
- data/lib/searchkick/relation.rb +247 -0
- data/lib/searchkick/relation_indexer.rb +155 -0
- data/lib/searchkick/results.rb +131 -96
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick/where.rb +11 -0
- data/lib/searchkick.rb +202 -96
- data/lib/tasks/searchkick.rake +14 -10
- metadata +18 -85
- data/CONTRIBUTING.md +0 -53
- data/lib/searchkick/bulk_indexer.rb +0 -173
- data/lib/searchkick/logging.rb +0 -246
@@ -1,491 +1,552 @@
|
|
1
1
|
module Searchkick
|
2
|
-
|
3
|
-
|
4
|
-
options = @options
|
5
|
-
language = options[:language]
|
6
|
-
language = language.call if language.respond_to?(:call)
|
2
|
+
class IndexOptions
|
3
|
+
attr_reader :options
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
if below70
|
13
|
-
index_type = options[:_type]
|
14
|
-
index_type = index_type.call if index_type.respond_to?(:call)
|
15
|
-
end
|
5
|
+
def initialize(index)
|
6
|
+
@options = index.options
|
7
|
+
end
|
16
8
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
9
|
+
def index_options
|
10
|
+
# mortal symbols are garbage collected in Ruby 2.2+
|
11
|
+
custom_settings = (options[:settings] || {}).deep_symbolize_keys
|
12
|
+
custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
|
22
13
|
|
23
14
|
if options[:mappings] && !options[:merge_mappings]
|
24
|
-
settings =
|
25
|
-
mappings =
|
15
|
+
settings = custom_settings
|
16
|
+
mappings = custom_mappings
|
26
17
|
else
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
tokenizer: "keyword",
|
39
|
-
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
40
|
-
},
|
41
|
-
default_analyzer => {
|
42
|
-
type: "custom",
|
43
|
-
# character filters -> tokenizer -> token filters
|
44
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
45
|
-
char_filter: ["ampersand"],
|
46
|
-
tokenizer: "standard",
|
47
|
-
# synonym should come last, after stemming and shingle
|
48
|
-
# shingle must come before searchkick_stemmer
|
49
|
-
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
50
|
-
},
|
51
|
-
searchkick_search: {
|
52
|
-
type: "custom",
|
53
|
-
char_filter: ["ampersand"],
|
54
|
-
tokenizer: "standard",
|
55
|
-
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
56
|
-
},
|
57
|
-
searchkick_search2: {
|
58
|
-
type: "custom",
|
59
|
-
char_filter: ["ampersand"],
|
60
|
-
tokenizer: "standard",
|
61
|
-
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
62
|
-
},
|
63
|
-
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
64
|
-
searchkick_autocomplete_search: {
|
65
|
-
type: "custom",
|
66
|
-
tokenizer: "keyword",
|
67
|
-
filter: ["lowercase", "asciifolding"]
|
68
|
-
},
|
69
|
-
searchkick_word_search: {
|
70
|
-
type: "custom",
|
71
|
-
tokenizer: "standard",
|
72
|
-
filter: ["lowercase", "asciifolding"]
|
73
|
-
},
|
74
|
-
searchkick_suggest_index: {
|
75
|
-
type: "custom",
|
76
|
-
tokenizer: "standard",
|
77
|
-
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
78
|
-
},
|
79
|
-
searchkick_text_start_index: {
|
80
|
-
type: "custom",
|
81
|
-
tokenizer: "keyword",
|
82
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
83
|
-
},
|
84
|
-
searchkick_text_middle_index: {
|
85
|
-
type: "custom",
|
86
|
-
tokenizer: "keyword",
|
87
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
88
|
-
},
|
89
|
-
searchkick_text_end_index: {
|
90
|
-
type: "custom",
|
91
|
-
tokenizer: "keyword",
|
92
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
93
|
-
},
|
94
|
-
searchkick_word_start_index: {
|
95
|
-
type: "custom",
|
96
|
-
tokenizer: "standard",
|
97
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
98
|
-
},
|
99
|
-
searchkick_word_middle_index: {
|
100
|
-
type: "custom",
|
101
|
-
tokenizer: "standard",
|
102
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
103
|
-
},
|
104
|
-
searchkick_word_end_index: {
|
105
|
-
type: "custom",
|
106
|
-
tokenizer: "standard",
|
107
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
108
|
-
}
|
109
|
-
},
|
110
|
-
filter: {
|
111
|
-
searchkick_index_shingle: {
|
112
|
-
type: "shingle",
|
113
|
-
token_separator: ""
|
114
|
-
},
|
115
|
-
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
116
|
-
searchkick_search_shingle: {
|
117
|
-
type: "shingle",
|
118
|
-
token_separator: "",
|
119
|
-
output_unigrams: false,
|
120
|
-
output_unigrams_if_no_shingles: true
|
121
|
-
},
|
122
|
-
searchkick_suggest_shingle: {
|
123
|
-
type: "shingle",
|
124
|
-
max_shingle_size: 5
|
125
|
-
},
|
126
|
-
searchkick_edge_ngram: {
|
127
|
-
type: "edge_ngram",
|
128
|
-
min_gram: 1,
|
129
|
-
max_gram: 50
|
130
|
-
},
|
131
|
-
searchkick_ngram: {
|
132
|
-
type: "ngram",
|
133
|
-
min_gram: 1,
|
134
|
-
max_gram: 50
|
135
|
-
},
|
136
|
-
searchkick_stemmer: {
|
137
|
-
# use stemmer if language is lowercase, snowball otherwise
|
138
|
-
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
139
|
-
language: language || "English"
|
140
|
-
}
|
141
|
-
},
|
142
|
-
char_filter: {
|
143
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
144
|
-
# &_to_and
|
145
|
-
ampersand: {
|
146
|
-
type: "mapping",
|
147
|
-
mappings: ["&=> and "]
|
148
|
-
}
|
149
|
-
}
|
150
|
-
}
|
151
|
-
}
|
18
|
+
settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
|
19
|
+
mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
|
20
|
+
end
|
21
|
+
|
22
|
+
set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
|
23
|
+
|
24
|
+
{
|
25
|
+
settings: settings,
|
26
|
+
mappings: mappings
|
27
|
+
}
|
28
|
+
end
|
152
29
|
|
153
|
-
|
30
|
+
def generate_settings
|
31
|
+
language = options[:language]
|
32
|
+
language = language.call if language.respond_to?(:call)
|
154
33
|
|
155
|
-
|
156
|
-
|
157
|
-
|
34
|
+
settings = {
|
35
|
+
analysis: {
|
36
|
+
analyzer: {
|
37
|
+
searchkick_keyword: {
|
38
|
+
type: "custom",
|
39
|
+
tokenizer: "keyword",
|
40
|
+
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
41
|
+
},
|
158
42
|
default_analyzer => {
|
159
|
-
type: "
|
43
|
+
type: "custom",
|
44
|
+
# character filters -> tokenizer -> token filters
|
45
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
46
|
+
char_filter: ["ampersand"],
|
47
|
+
tokenizer: "standard",
|
48
|
+
# synonym should come last, after stemming and shingle
|
49
|
+
# shingle must come before searchkick_stemmer
|
50
|
+
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
160
51
|
},
|
161
52
|
searchkick_search: {
|
162
|
-
type: "
|
53
|
+
type: "custom",
|
54
|
+
char_filter: ["ampersand"],
|
55
|
+
tokenizer: "standard",
|
56
|
+
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
163
57
|
},
|
164
58
|
searchkick_search2: {
|
165
|
-
type: "
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
stem = false
|
170
|
-
when "chinese2", "smartcn"
|
171
|
-
settings[:analysis][:analyzer].merge!(
|
172
|
-
default_analyzer => {
|
173
|
-
type: "smartcn"
|
59
|
+
type: "custom",
|
60
|
+
char_filter: ["ampersand"],
|
61
|
+
tokenizer: "standard",
|
62
|
+
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
174
63
|
},
|
175
|
-
|
176
|
-
|
64
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
65
|
+
searchkick_autocomplete_search: {
|
66
|
+
type: "custom",
|
67
|
+
tokenizer: "keyword",
|
68
|
+
filter: ["lowercase", "asciifolding"]
|
177
69
|
},
|
178
|
-
|
179
|
-
type: "
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
stem = false
|
184
|
-
when "japanese"
|
185
|
-
settings[:analysis][:analyzer].merge!(
|
186
|
-
default_analyzer => {
|
187
|
-
type: "kuromoji"
|
70
|
+
searchkick_word_search: {
|
71
|
+
type: "custom",
|
72
|
+
tokenizer: "standard",
|
73
|
+
filter: ["lowercase", "asciifolding"]
|
188
74
|
},
|
189
|
-
|
190
|
-
type: "
|
75
|
+
searchkick_suggest_index: {
|
76
|
+
type: "custom",
|
77
|
+
tokenizer: "standard",
|
78
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
191
79
|
},
|
192
|
-
|
193
|
-
type: "
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
stem = false
|
198
|
-
when "korean"
|
199
|
-
settings[:analysis][:analyzer].merge!(
|
200
|
-
default_analyzer => {
|
201
|
-
type: "openkoreantext-analyzer"
|
80
|
+
searchkick_text_start_index: {
|
81
|
+
type: "custom",
|
82
|
+
tokenizer: "keyword",
|
83
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
202
84
|
},
|
203
|
-
|
204
|
-
type: "
|
85
|
+
searchkick_text_middle_index: {
|
86
|
+
type: "custom",
|
87
|
+
tokenizer: "keyword",
|
88
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
205
89
|
},
|
206
|
-
|
207
|
-
type: "
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
stem = false
|
212
|
-
when "korean2"
|
213
|
-
settings[:analysis][:analyzer].merge!(
|
214
|
-
default_analyzer => {
|
215
|
-
type: "nori"
|
90
|
+
searchkick_text_end_index: {
|
91
|
+
type: "custom",
|
92
|
+
tokenizer: "keyword",
|
93
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
216
94
|
},
|
217
|
-
|
218
|
-
type: "
|
95
|
+
searchkick_word_start_index: {
|
96
|
+
type: "custom",
|
97
|
+
tokenizer: "standard",
|
98
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
219
99
|
},
|
220
|
-
|
221
|
-
type: "
|
100
|
+
searchkick_word_middle_index: {
|
101
|
+
type: "custom",
|
102
|
+
tokenizer: "standard",
|
103
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
104
|
+
},
|
105
|
+
searchkick_word_end_index: {
|
106
|
+
type: "custom",
|
107
|
+
tokenizer: "standard",
|
108
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
222
109
|
}
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
default_analyzer => {
|
229
|
-
type: "vi_analyzer"
|
110
|
+
},
|
111
|
+
filter: {
|
112
|
+
searchkick_index_shingle: {
|
113
|
+
type: "shingle",
|
114
|
+
token_separator: ""
|
230
115
|
},
|
231
|
-
|
232
|
-
|
116
|
+
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
117
|
+
searchkick_search_shingle: {
|
118
|
+
type: "shingle",
|
119
|
+
token_separator: "",
|
120
|
+
output_unigrams: false,
|
121
|
+
output_unigrams_if_no_shingles: true
|
233
122
|
},
|
234
|
-
|
235
|
-
type: "
|
236
|
-
|
237
|
-
)
|
238
|
-
|
239
|
-
stem = false
|
240
|
-
when "polish", "ukrainian"
|
241
|
-
settings[:analysis][:analyzer].merge!(
|
242
|
-
default_analyzer => {
|
243
|
-
type: language
|
123
|
+
searchkick_suggest_shingle: {
|
124
|
+
type: "shingle",
|
125
|
+
max_shingle_size: 5
|
244
126
|
},
|
245
|
-
|
246
|
-
type:
|
127
|
+
searchkick_edge_ngram: {
|
128
|
+
type: "edge_ngram",
|
129
|
+
min_gram: 1,
|
130
|
+
max_gram: 50
|
247
131
|
},
|
248
|
-
|
249
|
-
type:
|
132
|
+
searchkick_ngram: {
|
133
|
+
type: "ngram",
|
134
|
+
min_gram: 1,
|
135
|
+
max_gram: 50
|
136
|
+
},
|
137
|
+
searchkick_stemmer: {
|
138
|
+
# use stemmer if language is lowercase, snowball otherwise
|
139
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
140
|
+
language: language || "English"
|
141
|
+
}
|
142
|
+
},
|
143
|
+
char_filter: {
|
144
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
145
|
+
# &_to_and
|
146
|
+
ampersand: {
|
147
|
+
type: "mapping",
|
148
|
+
mappings: ["&=> and "]
|
250
149
|
}
|
251
|
-
|
150
|
+
}
|
151
|
+
}
|
152
|
+
}
|
252
153
|
|
253
|
-
|
254
|
-
|
154
|
+
raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
|
155
|
+
update_language(settings, language)
|
156
|
+
update_stemming(settings)
|
255
157
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
158
|
+
if Searchkick.env == "test"
|
159
|
+
settings[:number_of_shards] = 1
|
160
|
+
settings[:number_of_replicas] = 0
|
161
|
+
end
|
260
162
|
|
261
|
-
|
262
|
-
|
263
|
-
|
163
|
+
if options[:similarity]
|
164
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
165
|
+
end
|
264
166
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
}
|
270
|
-
end
|
167
|
+
settings[:index] = {
|
168
|
+
max_ngram_diff: 49,
|
169
|
+
max_shingle_diff: 4
|
170
|
+
}
|
271
171
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
end
|
172
|
+
if options[:case_sensitive]
|
173
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
174
|
+
analyzer[:filter].delete("lowercase")
|
276
175
|
end
|
176
|
+
end
|
277
177
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
178
|
+
add_synonyms(settings)
|
179
|
+
add_search_synonyms(settings)
|
180
|
+
|
181
|
+
if options[:special_characters] == false
|
182
|
+
settings[:analysis][:analyzer].each_value do |analyzer_settings|
|
183
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
283
184
|
end
|
185
|
+
end
|
284
186
|
|
285
|
-
|
187
|
+
settings
|
188
|
+
end
|
286
189
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
type: "
|
293
|
-
|
294
|
-
|
190
|
+
def update_language(settings, language)
|
191
|
+
case language
|
192
|
+
when "chinese"
|
193
|
+
settings[:analysis][:analyzer].merge!(
|
194
|
+
default_analyzer => {
|
195
|
+
type: "ik_smart"
|
196
|
+
},
|
197
|
+
searchkick_search: {
|
198
|
+
type: "ik_smart"
|
199
|
+
},
|
200
|
+
searchkick_search2: {
|
201
|
+
type: "ik_max_word"
|
295
202
|
}
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
203
|
+
)
|
204
|
+
when "chinese2", "smartcn"
|
205
|
+
settings[:analysis][:analyzer].merge!(
|
206
|
+
default_analyzer => {
|
207
|
+
type: "smartcn"
|
208
|
+
},
|
209
|
+
searchkick_search: {
|
210
|
+
type: "smartcn"
|
211
|
+
},
|
212
|
+
searchkick_search2: {
|
213
|
+
type: "smartcn"
|
214
|
+
}
|
215
|
+
)
|
216
|
+
when "japanese", "japanese2"
|
217
|
+
analyzer = {
|
218
|
+
type: "custom",
|
219
|
+
tokenizer: "kuromoji_tokenizer",
|
220
|
+
filter: [
|
221
|
+
"kuromoji_baseform",
|
222
|
+
"kuromoji_part_of_speech",
|
223
|
+
"cjk_width",
|
224
|
+
"ja_stop",
|
225
|
+
"searchkick_stemmer",
|
226
|
+
"lowercase"
|
227
|
+
]
|
228
|
+
}
|
229
|
+
settings[:analysis][:analyzer].merge!(
|
230
|
+
default_analyzer => analyzer.deep_dup,
|
231
|
+
searchkick_search: analyzer.deep_dup,
|
232
|
+
searchkick_search2: analyzer.deep_dup
|
233
|
+
)
|
234
|
+
settings[:analysis][:filter][:searchkick_stemmer] = {
|
235
|
+
type: "kuromoji_stemmer"
|
236
|
+
}
|
237
|
+
when "korean"
|
238
|
+
settings[:analysis][:analyzer].merge!(
|
239
|
+
default_analyzer => {
|
240
|
+
type: "openkoreantext-analyzer"
|
241
|
+
},
|
242
|
+
searchkick_search: {
|
243
|
+
type: "openkoreantext-analyzer"
|
244
|
+
},
|
245
|
+
searchkick_search2: {
|
246
|
+
type: "openkoreantext-analyzer"
|
247
|
+
}
|
248
|
+
)
|
249
|
+
when "korean2"
|
250
|
+
settings[:analysis][:analyzer].merge!(
|
251
|
+
default_analyzer => {
|
252
|
+
type: "nori"
|
253
|
+
},
|
254
|
+
searchkick_search: {
|
255
|
+
type: "nori"
|
256
|
+
},
|
257
|
+
searchkick_search2: {
|
258
|
+
type: "nori"
|
259
|
+
}
|
260
|
+
)
|
261
|
+
when "vietnamese"
|
262
|
+
settings[:analysis][:analyzer].merge!(
|
263
|
+
default_analyzer => {
|
264
|
+
type: "vi_analyzer"
|
265
|
+
},
|
266
|
+
searchkick_search: {
|
267
|
+
type: "vi_analyzer"
|
268
|
+
},
|
269
|
+
searchkick_search2: {
|
270
|
+
type: "vi_analyzer"
|
271
|
+
}
|
272
|
+
)
|
273
|
+
when "polish", "ukrainian"
|
274
|
+
settings[:analysis][:analyzer].merge!(
|
275
|
+
default_analyzer => {
|
276
|
+
type: language
|
277
|
+
},
|
278
|
+
searchkick_search: {
|
279
|
+
type: language
|
280
|
+
},
|
281
|
+
searchkick_search2: {
|
282
|
+
type: language
|
283
|
+
}
|
284
|
+
)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def update_stemming(settings)
|
289
|
+
if options[:stemmer]
|
290
|
+
stemmer = options[:stemmer]
|
291
|
+
# could also support snowball and stemmer
|
292
|
+
case stemmer[:type]
|
293
|
+
when "hunspell"
|
294
|
+
# supports all token filter options
|
295
|
+
settings[:analysis][:filter][:searchkick_stemmer] = stemmer
|
296
|
+
else
|
297
|
+
raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
|
310
298
|
end
|
299
|
+
end
|
311
300
|
|
312
|
-
|
313
|
-
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
|
314
|
-
if search_synonyms.is_a?(String) || search_synonyms.any?
|
315
|
-
if search_synonyms.is_a?(String)
|
316
|
-
synonym_graph = {
|
317
|
-
type: "synonym_graph",
|
318
|
-
synonyms_path: search_synonyms
|
319
|
-
}
|
320
|
-
synonym_graph[:updateable] = true unless below73
|
321
|
-
else
|
322
|
-
synonym_graph = {
|
323
|
-
type: "synonym_graph",
|
324
|
-
# TODO confirm this is correct
|
325
|
-
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
326
|
-
}
|
327
|
-
end
|
328
|
-
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
301
|
+
stem = options[:stem]
|
329
302
|
|
330
|
-
|
331
|
-
|
332
|
-
end
|
333
|
-
end
|
303
|
+
# language analyzer used
|
304
|
+
stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
|
334
305
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
306
|
+
if stem == false
|
307
|
+
settings[:analysis][:filter].delete(:searchkick_stemmer)
|
308
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
309
|
+
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
|
310
|
+
end
|
311
|
+
end
|
341
312
|
|
342
|
-
|
343
|
-
|
313
|
+
if options[:stemmer_override]
|
314
|
+
stemmer_override = {
|
315
|
+
type: "stemmer_override"
|
316
|
+
}
|
317
|
+
if options[:stemmer_override].is_a?(String)
|
318
|
+
stemmer_override[:rules_path] = options[:stemmer_override]
|
319
|
+
else
|
320
|
+
stemmer_override[:rules] = options[:stemmer_override]
|
321
|
+
end
|
322
|
+
settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
|
344
323
|
|
345
|
-
|
346
|
-
|
347
|
-
|
324
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
325
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
326
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
|
348
327
|
end
|
328
|
+
end
|
349
329
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
330
|
+
if options[:stem_exclusion]
|
331
|
+
settings[:analysis][:filter][:searchkick_stem_exclusion] = {
|
332
|
+
type: "keyword_marker",
|
333
|
+
keywords: options[:stem_exclusion]
|
334
|
+
}
|
335
|
+
|
336
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
337
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
338
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
|
354
339
|
end
|
340
|
+
end
|
341
|
+
end
|
355
342
|
|
356
|
-
|
343
|
+
def generate_mappings
|
344
|
+
mapping = {}
|
357
345
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
346
|
+
keyword_mapping = {type: "keyword"}
|
347
|
+
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
348
|
+
|
349
|
+
# conversions
|
350
|
+
Array(options[:conversions]).each do |conversions_field|
|
351
|
+
mapping[conversions_field] = {
|
352
|
+
type: "nested",
|
353
|
+
properties: {
|
354
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
355
|
+
count: {type: "integer"}
|
366
356
|
}
|
367
|
-
|
357
|
+
}
|
358
|
+
end
|
368
359
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
]
|
360
|
+
mapping_options =
|
361
|
+
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
|
362
|
+
.to_h { |type| [type, (options[type] || []).map(&:to_s)] }
|
373
363
|
|
374
|
-
|
364
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
375
365
|
|
376
|
-
|
366
|
+
mapping_options[:searchable].delete("_all")
|
377
367
|
|
378
|
-
|
368
|
+
analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
|
379
369
|
|
380
|
-
|
381
|
-
|
370
|
+
mapping_options.values.flatten.uniq.each do |field|
|
371
|
+
fields = {}
|
382
372
|
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
373
|
+
if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
|
374
|
+
fields[field] = {type: default_type, index: false}
|
375
|
+
else
|
376
|
+
fields[field] = keyword_mapping
|
377
|
+
end
|
388
378
|
|
389
|
-
|
390
|
-
|
391
|
-
|
379
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
380
|
+
if word
|
381
|
+
fields[:analyzed] = analyzed_field_options
|
392
382
|
|
393
|
-
|
394
|
-
|
395
|
-
end
|
383
|
+
if mapping_options[:highlight].include?(field)
|
384
|
+
fields[:analyzed][:term_vector] = "with_positions_offsets"
|
396
385
|
end
|
386
|
+
end
|
397
387
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
end
|
388
|
+
mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
|
389
|
+
if options[:match] == type || f.include?(field)
|
390
|
+
fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
|
402
391
|
end
|
403
392
|
end
|
404
|
-
|
405
|
-
mapping[field] = fields[field].merge(fields: fields.except(field))
|
406
393
|
end
|
407
394
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
395
|
+
mapping[field] = fields[field].merge(fields: fields.except(field))
|
396
|
+
end
|
397
|
+
|
398
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
399
|
+
mapping[field] = {
|
400
|
+
type: "geo_point"
|
401
|
+
}
|
402
|
+
end
|
403
|
+
|
404
|
+
options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
|
405
|
+
(options[:geo_shape] || {}).each do |field, shape_options|
|
406
|
+
mapping[field] = shape_options.merge(type: "geo_shape")
|
407
|
+
end
|
413
408
|
|
414
|
-
|
415
|
-
|
416
|
-
|
409
|
+
if options[:inheritance]
|
410
|
+
mapping[:type] = keyword_mapping
|
411
|
+
end
|
412
|
+
|
413
|
+
routing = {}
|
414
|
+
if options[:routing]
|
415
|
+
routing = {required: true}
|
416
|
+
unless options[:routing] == true
|
417
|
+
routing[:path] = options[:routing].to_s
|
417
418
|
end
|
419
|
+
end
|
418
420
|
|
419
|
-
|
420
|
-
|
421
|
+
dynamic_fields = {
|
422
|
+
# analyzed field must be the default field for include_in_all
|
423
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
424
|
+
# however, we can include the not_analyzed field in _all
|
425
|
+
# and the _all index analyzer will take care of it
|
426
|
+
"{name}" => keyword_mapping
|
427
|
+
}
|
428
|
+
|
429
|
+
if options.key?(:filterable)
|
430
|
+
dynamic_fields["{name}"] = {type: default_type, index: false}
|
431
|
+
end
|
432
|
+
|
433
|
+
unless options[:searchable]
|
434
|
+
if options[:match] && options[:match] != :word
|
435
|
+
dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
|
421
436
|
end
|
422
437
|
|
423
|
-
|
424
|
-
|
425
|
-
routing = {required: true}
|
426
|
-
unless options[:routing] == true
|
427
|
-
routing[:path] = options[:routing].to_s
|
428
|
-
end
|
438
|
+
if word
|
439
|
+
dynamic_fields[:analyzed] = analyzed_field_options
|
429
440
|
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
444
|
+
multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
445
|
+
|
446
|
+
mappings = {
|
447
|
+
properties: mapping,
|
448
|
+
_routing: routing,
|
449
|
+
# https://gist.github.com/kimchy/2898285
|
450
|
+
dynamic_templates: [
|
451
|
+
{
|
452
|
+
string_template: {
|
453
|
+
match: "*",
|
454
|
+
match_mapping_type: "string",
|
455
|
+
mapping: multi_field
|
456
|
+
}
|
457
|
+
}
|
458
|
+
]
|
459
|
+
}
|
460
|
+
|
461
|
+
mappings
|
462
|
+
end
|
430
463
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
"
|
464
|
+
def add_synonyms(settings)
|
465
|
+
synonyms = options[:synonyms] || []
|
466
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
467
|
+
if synonyms.any?
|
468
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
469
|
+
type: "synonym",
|
470
|
+
# only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
|
471
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
|
437
472
|
}
|
473
|
+
# choosing a place for the synonym filter when stemming is not easy
|
474
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
475
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
476
|
+
|
477
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
478
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
479
|
+
# - Only apply the synonym expansion at index time
|
480
|
+
# - Don't have the synonym filter applied search
|
481
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
482
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
|
483
|
+
|
484
|
+
%w(word_start word_middle word_end).each do |type|
|
485
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
438
489
|
|
439
|
-
|
440
|
-
|
490
|
+
def add_search_synonyms(settings)
|
491
|
+
search_synonyms = options[:search_synonyms] || []
|
492
|
+
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
|
493
|
+
if search_synonyms.is_a?(String) || search_synonyms.any?
|
494
|
+
if search_synonyms.is_a?(String)
|
495
|
+
synonym_graph = {
|
496
|
+
type: "synonym_graph",
|
497
|
+
synonyms_path: search_synonyms
|
498
|
+
}
|
499
|
+
synonym_graph[:updateable] = true unless below73?
|
500
|
+
else
|
501
|
+
synonym_graph = {
|
502
|
+
type: "synonym_graph",
|
503
|
+
# TODO confirm this is correct
|
504
|
+
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
505
|
+
}
|
441
506
|
end
|
507
|
+
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
442
508
|
|
443
|
-
|
444
|
-
|
445
|
-
|
509
|
+
if ["japanese", "japanese2"].include?(options[:language])
|
510
|
+
[:searchkick_search, :searchkick_search2].each do |analyzer|
|
511
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
|
446
512
|
end
|
513
|
+
else
|
514
|
+
[:searchkick_search2, :searchkick_word_search].each do |analyzer|
|
515
|
+
unless settings[:analysis][:analyzer][analyzer].key?(:filter)
|
516
|
+
raise Error, "Search synonyms are not supported yet for language"
|
517
|
+
end
|
447
518
|
|
448
|
-
|
449
|
-
dynamic_fields[:analyzed] = analyzed_field_options
|
519
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
|
450
520
|
end
|
451
521
|
end
|
522
|
+
end
|
523
|
+
end
|
452
524
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
properties: mapping,
|
458
|
-
_routing: routing,
|
459
|
-
# https://gist.github.com/kimchy/2898285
|
460
|
-
dynamic_templates: [
|
461
|
-
{
|
462
|
-
string_template: {
|
463
|
-
match: "*",
|
464
|
-
match_mapping_type: "string",
|
465
|
-
mapping: multi_field
|
466
|
-
}
|
467
|
-
}
|
468
|
-
]
|
469
|
-
}
|
470
|
-
|
471
|
-
if below70
|
472
|
-
mappings = {index_type => mappings}
|
473
|
-
end
|
474
|
-
|
475
|
-
mappings = mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
|
525
|
+
def set_deep_paging(settings)
|
526
|
+
if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
|
527
|
+
settings[:index] ||= {}
|
528
|
+
settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
|
476
529
|
end
|
530
|
+
end
|
477
531
|
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
532
|
+
def index_type
|
533
|
+
@index_type ||= begin
|
534
|
+
index_type = options[:_type]
|
535
|
+
index_type = index_type.call if index_type.respond_to?(:call)
|
536
|
+
index_type
|
483
537
|
end
|
538
|
+
end
|
484
539
|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
540
|
+
def default_type
|
541
|
+
"text"
|
542
|
+
end
|
543
|
+
|
544
|
+
def default_analyzer
|
545
|
+
:searchkick_index
|
546
|
+
end
|
547
|
+
|
548
|
+
def below73?
|
549
|
+
Searchkick.server_below?("7.3.0")
|
489
550
|
end
|
490
551
|
end
|
491
552
|
end
|