searchkick 4.0.0 → 5.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +234 -96
- data/LICENSE.txt +1 -1
- data/README.md +446 -268
- data/lib/searchkick/bulk_reindex_job.rb +12 -8
- data/lib/searchkick/controller_runtime.rb +40 -0
- data/lib/searchkick/index.rb +174 -56
- data/lib/searchkick/index_cache.rb +30 -0
- data/lib/searchkick/index_options.rb +472 -349
- data/lib/searchkick/indexer.rb +15 -8
- data/lib/searchkick/log_subscriber.rb +57 -0
- data/lib/searchkick/middleware.rb +1 -1
- data/lib/searchkick/model.rb +51 -48
- data/lib/searchkick/process_batch_job.rb +10 -26
- data/lib/searchkick/process_queue_job.rb +21 -12
- data/lib/searchkick/query.rb +183 -51
- data/lib/searchkick/record_data.rb +0 -1
- data/lib/searchkick/record_indexer.rb +135 -50
- data/lib/searchkick/reindex_queue.rb +43 -6
- data/lib/searchkick/reindex_v2_job.rb +10 -34
- data/lib/searchkick/relation.rb +36 -0
- data/lib/searchkick/relation_indexer.rb +150 -0
- data/lib/searchkick/results.rb +162 -80
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +203 -79
- data/lib/tasks/searchkick.rake +21 -11
- metadata +17 -71
- data/CONTRIBUTING.md +0 -53
- data/lib/searchkick/bulk_indexer.rb +0 -171
- data/lib/searchkick/logging.rb +0 -243
@@ -1,429 +1,552 @@
|
|
1
1
|
module Searchkick
|
2
|
-
|
2
|
+
class IndexOptions
|
3
|
+
attr_reader :options
|
4
|
+
|
5
|
+
def initialize(index)
|
6
|
+
@options = index.options
|
7
|
+
end
|
8
|
+
|
3
9
|
def index_options
|
4
|
-
|
5
|
-
|
6
|
-
|
10
|
+
# mortal symbols are garbage collected in Ruby 2.2+
|
11
|
+
custom_settings = (options[:settings] || {}).deep_symbolize_keys
|
12
|
+
custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
|
7
13
|
|
8
14
|
if options[:mappings] && !options[:merge_mappings]
|
9
|
-
settings =
|
10
|
-
mappings =
|
15
|
+
settings = custom_settings
|
16
|
+
mappings = custom_mappings
|
11
17
|
else
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
24
|
-
|
25
|
-
settings = {
|
26
|
-
analysis: {
|
27
|
-
analyzer: {
|
28
|
-
searchkick_keyword: {
|
29
|
-
type: "custom",
|
30
|
-
tokenizer: "keyword",
|
31
|
-
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
32
|
-
},
|
33
|
-
default_analyzer => {
|
34
|
-
type: "custom",
|
35
|
-
# character filters -> tokenizer -> token filters
|
36
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
37
|
-
char_filter: ["ampersand"],
|
38
|
-
tokenizer: "standard",
|
39
|
-
# synonym should come last, after stemming and shingle
|
40
|
-
# shingle must come before searchkick_stemmer
|
41
|
-
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
42
|
-
},
|
43
|
-
searchkick_search: {
|
44
|
-
type: "custom",
|
45
|
-
char_filter: ["ampersand"],
|
46
|
-
tokenizer: "standard",
|
47
|
-
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
48
|
-
},
|
49
|
-
searchkick_search2: {
|
50
|
-
type: "custom",
|
51
|
-
char_filter: ["ampersand"],
|
52
|
-
tokenizer: "standard",
|
53
|
-
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
54
|
-
},
|
55
|
-
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
56
|
-
searchkick_autocomplete_search: {
|
57
|
-
type: "custom",
|
58
|
-
tokenizer: "keyword",
|
59
|
-
filter: ["lowercase", "asciifolding"]
|
60
|
-
},
|
61
|
-
searchkick_word_search: {
|
62
|
-
type: "custom",
|
63
|
-
tokenizer: "standard",
|
64
|
-
filter: ["lowercase", "asciifolding"]
|
65
|
-
},
|
66
|
-
searchkick_suggest_index: {
|
67
|
-
type: "custom",
|
68
|
-
tokenizer: "standard",
|
69
|
-
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
70
|
-
},
|
71
|
-
searchkick_text_start_index: {
|
72
|
-
type: "custom",
|
73
|
-
tokenizer: "keyword",
|
74
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
75
|
-
},
|
76
|
-
searchkick_text_middle_index: {
|
77
|
-
type: "custom",
|
78
|
-
tokenizer: "keyword",
|
79
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
80
|
-
},
|
81
|
-
searchkick_text_end_index: {
|
82
|
-
type: "custom",
|
83
|
-
tokenizer: "keyword",
|
84
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
85
|
-
},
|
86
|
-
searchkick_word_start_index: {
|
87
|
-
type: "custom",
|
88
|
-
tokenizer: "standard",
|
89
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
90
|
-
},
|
91
|
-
searchkick_word_middle_index: {
|
92
|
-
type: "custom",
|
93
|
-
tokenizer: "standard",
|
94
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
95
|
-
},
|
96
|
-
searchkick_word_end_index: {
|
97
|
-
type: "custom",
|
98
|
-
tokenizer: "standard",
|
99
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
100
|
-
}
|
101
|
-
},
|
102
|
-
filter: {
|
103
|
-
searchkick_index_shingle: {
|
104
|
-
type: "shingle",
|
105
|
-
token_separator: ""
|
106
|
-
},
|
107
|
-
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
108
|
-
searchkick_search_shingle: {
|
109
|
-
type: "shingle",
|
110
|
-
token_separator: "",
|
111
|
-
output_unigrams: false,
|
112
|
-
output_unigrams_if_no_shingles: true
|
113
|
-
},
|
114
|
-
searchkick_suggest_shingle: {
|
115
|
-
type: "shingle",
|
116
|
-
max_shingle_size: 5
|
117
|
-
},
|
118
|
-
searchkick_edge_ngram: {
|
119
|
-
type: "edgeNGram",
|
120
|
-
min_gram: 1,
|
121
|
-
max_gram: 50
|
122
|
-
},
|
123
|
-
searchkick_ngram: {
|
124
|
-
type: "nGram",
|
125
|
-
min_gram: 1,
|
126
|
-
max_gram: 50
|
127
|
-
},
|
128
|
-
searchkick_stemmer: {
|
129
|
-
# use stemmer if language is lowercase, snowball otherwise
|
130
|
-
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
131
|
-
language: language || "English"
|
132
|
-
}
|
133
|
-
},
|
134
|
-
char_filter: {
|
135
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
136
|
-
# &_to_and
|
137
|
-
ampersand: {
|
138
|
-
type: "mapping",
|
139
|
-
mappings: ["&=> and "]
|
140
|
-
}
|
141
|
-
}
|
142
|
-
}
|
143
|
-
}
|
18
|
+
settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
|
19
|
+
mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
|
20
|
+
end
|
21
|
+
|
22
|
+
set_deep_paging(settings) if options[:deep_paging]
|
23
|
+
|
24
|
+
{
|
25
|
+
settings: settings,
|
26
|
+
mappings: mappings
|
27
|
+
}
|
28
|
+
end
|
144
29
|
|
145
|
-
|
30
|
+
def generate_settings
|
31
|
+
language = options[:language]
|
32
|
+
language = language.call if language.respond_to?(:call)
|
146
33
|
|
147
|
-
|
148
|
-
|
149
|
-
|
34
|
+
settings = {
|
35
|
+
analysis: {
|
36
|
+
analyzer: {
|
37
|
+
searchkick_keyword: {
|
38
|
+
type: "custom",
|
39
|
+
tokenizer: "keyword",
|
40
|
+
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
41
|
+
},
|
150
42
|
default_analyzer => {
|
151
|
-
type: "
|
43
|
+
type: "custom",
|
44
|
+
# character filters -> tokenizer -> token filters
|
45
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
46
|
+
char_filter: ["ampersand"],
|
47
|
+
tokenizer: "standard",
|
48
|
+
# synonym should come last, after stemming and shingle
|
49
|
+
# shingle must come before searchkick_stemmer
|
50
|
+
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
152
51
|
},
|
153
52
|
searchkick_search: {
|
154
|
-
type: "
|
53
|
+
type: "custom",
|
54
|
+
char_filter: ["ampersand"],
|
55
|
+
tokenizer: "standard",
|
56
|
+
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
155
57
|
},
|
156
58
|
searchkick_search2: {
|
157
|
-
type: "
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
stem = false
|
162
|
-
when "japanese"
|
163
|
-
settings[:analysis][:analyzer].merge!(
|
164
|
-
default_analyzer => {
|
165
|
-
type: "kuromoji"
|
59
|
+
type: "custom",
|
60
|
+
char_filter: ["ampersand"],
|
61
|
+
tokenizer: "standard",
|
62
|
+
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
166
63
|
},
|
167
|
-
|
168
|
-
|
64
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
65
|
+
searchkick_autocomplete_search: {
|
66
|
+
type: "custom",
|
67
|
+
tokenizer: "keyword",
|
68
|
+
filter: ["lowercase", "asciifolding"]
|
169
69
|
},
|
170
|
-
|
171
|
-
type: "
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
stem = false
|
176
|
-
when "korean"
|
177
|
-
settings[:analysis][:analyzer].merge!(
|
178
|
-
default_analyzer => {
|
179
|
-
type: "openkoreantext-analyzer"
|
70
|
+
searchkick_word_search: {
|
71
|
+
type: "custom",
|
72
|
+
tokenizer: "standard",
|
73
|
+
filter: ["lowercase", "asciifolding"]
|
180
74
|
},
|
181
|
-
|
182
|
-
type: "
|
75
|
+
searchkick_suggest_index: {
|
76
|
+
type: "custom",
|
77
|
+
tokenizer: "standard",
|
78
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
183
79
|
},
|
184
|
-
|
185
|
-
type: "
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
stem = false
|
190
|
-
when "vietnamese"
|
191
|
-
settings[:analysis][:analyzer].merge!(
|
192
|
-
default_analyzer => {
|
193
|
-
type: "vi_analyzer"
|
80
|
+
searchkick_text_start_index: {
|
81
|
+
type: "custom",
|
82
|
+
tokenizer: "keyword",
|
83
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
194
84
|
},
|
195
|
-
|
196
|
-
type: "
|
85
|
+
searchkick_text_middle_index: {
|
86
|
+
type: "custom",
|
87
|
+
tokenizer: "keyword",
|
88
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
197
89
|
},
|
198
|
-
|
199
|
-
type: "
|
90
|
+
searchkick_text_end_index: {
|
91
|
+
type: "custom",
|
92
|
+
tokenizer: "keyword",
|
93
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
94
|
+
},
|
95
|
+
searchkick_word_start_index: {
|
96
|
+
type: "custom",
|
97
|
+
tokenizer: "standard",
|
98
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
99
|
+
},
|
100
|
+
searchkick_word_middle_index: {
|
101
|
+
type: "custom",
|
102
|
+
tokenizer: "standard",
|
103
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
104
|
+
},
|
105
|
+
searchkick_word_end_index: {
|
106
|
+
type: "custom",
|
107
|
+
tokenizer: "standard",
|
108
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
200
109
|
}
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
default_analyzer => {
|
207
|
-
type: language
|
110
|
+
},
|
111
|
+
filter: {
|
112
|
+
searchkick_index_shingle: {
|
113
|
+
type: "shingle",
|
114
|
+
token_separator: ""
|
208
115
|
},
|
209
|
-
|
210
|
-
|
116
|
+
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
117
|
+
searchkick_search_shingle: {
|
118
|
+
type: "shingle",
|
119
|
+
token_separator: "",
|
120
|
+
output_unigrams: false,
|
121
|
+
output_unigrams_if_no_shingles: true
|
211
122
|
},
|
212
|
-
|
213
|
-
type:
|
123
|
+
searchkick_suggest_shingle: {
|
124
|
+
type: "shingle",
|
125
|
+
max_shingle_size: 5
|
126
|
+
},
|
127
|
+
searchkick_edge_ngram: {
|
128
|
+
type: "edge_ngram",
|
129
|
+
min_gram: 1,
|
130
|
+
max_gram: 50
|
131
|
+
},
|
132
|
+
searchkick_ngram: {
|
133
|
+
type: "ngram",
|
134
|
+
min_gram: 1,
|
135
|
+
max_gram: 50
|
136
|
+
},
|
137
|
+
searchkick_stemmer: {
|
138
|
+
# use stemmer if language is lowercase, snowball otherwise
|
139
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
140
|
+
language: language || "English"
|
141
|
+
}
|
142
|
+
},
|
143
|
+
char_filter: {
|
144
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
145
|
+
# &_to_and
|
146
|
+
ampersand: {
|
147
|
+
type: "mapping",
|
148
|
+
mappings: ["&=> and "]
|
214
149
|
}
|
215
|
-
|
150
|
+
}
|
151
|
+
}
|
152
|
+
}
|
216
153
|
|
217
|
-
|
218
|
-
|
154
|
+
raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
|
155
|
+
update_language(settings, language)
|
156
|
+
update_stemming(settings)
|
219
157
|
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
158
|
+
if Searchkick.env == "test"
|
159
|
+
settings[:number_of_shards] = 1
|
160
|
+
settings[:number_of_replicas] = 0
|
161
|
+
end
|
224
162
|
|
225
|
-
|
226
|
-
|
227
|
-
|
163
|
+
if options[:similarity]
|
164
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
165
|
+
end
|
228
166
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
167
|
+
settings[:index] = {
|
168
|
+
max_ngram_diff: 49,
|
169
|
+
max_shingle_diff: 4
|
170
|
+
}
|
171
|
+
|
172
|
+
if options[:case_sensitive]
|
173
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
174
|
+
analyzer[:filter].delete("lowercase")
|
234
175
|
end
|
176
|
+
end
|
235
177
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
178
|
+
add_synonyms(settings)
|
179
|
+
add_search_synonyms(settings)
|
180
|
+
|
181
|
+
if options[:special_characters] == false
|
182
|
+
settings[:analysis][:analyzer].each_value do |analyzer_settings|
|
183
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
240
184
|
end
|
185
|
+
end
|
241
186
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
187
|
+
settings
|
188
|
+
end
|
189
|
+
|
190
|
+
def update_language(settings, language)
|
191
|
+
case language
|
192
|
+
when "chinese"
|
193
|
+
settings[:analysis][:analyzer].merge!(
|
194
|
+
default_analyzer => {
|
195
|
+
type: "ik_smart"
|
196
|
+
},
|
197
|
+
searchkick_search: {
|
198
|
+
type: "ik_smart"
|
199
|
+
},
|
200
|
+
searchkick_search2: {
|
201
|
+
type: "ik_max_word"
|
202
|
+
}
|
203
|
+
)
|
204
|
+
when "chinese2", "smartcn"
|
205
|
+
settings[:analysis][:analyzer].merge!(
|
206
|
+
default_analyzer => {
|
207
|
+
type: "smartcn"
|
208
|
+
},
|
209
|
+
searchkick_search: {
|
210
|
+
type: "smartcn"
|
211
|
+
},
|
212
|
+
searchkick_search2: {
|
213
|
+
type: "smartcn"
|
214
|
+
}
|
215
|
+
)
|
216
|
+
when "japanese", "japanese2"
|
217
|
+
analyzer = {
|
218
|
+
type: "custom",
|
219
|
+
tokenizer: "kuromoji_tokenizer",
|
220
|
+
filter: [
|
221
|
+
"kuromoji_baseform",
|
222
|
+
"kuromoji_part_of_speech",
|
223
|
+
"cjk_width",
|
224
|
+
"ja_stop",
|
225
|
+
"searchkick_stemmer",
|
226
|
+
"lowercase"
|
227
|
+
]
|
228
|
+
}
|
229
|
+
settings[:analysis][:analyzer].merge!(
|
230
|
+
default_analyzer => analyzer.deep_dup,
|
231
|
+
searchkick_search: analyzer.deep_dup,
|
232
|
+
searchkick_search2: analyzer.deep_dup
|
233
|
+
)
|
234
|
+
settings[:analysis][:filter][:searchkick_stemmer] = {
|
235
|
+
type: "kuromoji_stemmer"
|
236
|
+
}
|
237
|
+
when "korean"
|
238
|
+
settings[:analysis][:analyzer].merge!(
|
239
|
+
default_analyzer => {
|
240
|
+
type: "openkoreantext-analyzer"
|
241
|
+
},
|
242
|
+
searchkick_search: {
|
243
|
+
type: "openkoreantext-analyzer"
|
244
|
+
},
|
245
|
+
searchkick_search2: {
|
246
|
+
type: "openkoreantext-analyzer"
|
247
|
+
}
|
248
|
+
)
|
249
|
+
when "korean2"
|
250
|
+
settings[:analysis][:analyzer].merge!(
|
251
|
+
default_analyzer => {
|
252
|
+
type: "nori"
|
253
|
+
},
|
254
|
+
searchkick_search: {
|
255
|
+
type: "nori"
|
256
|
+
},
|
257
|
+
searchkick_search2: {
|
258
|
+
type: "nori"
|
259
|
+
}
|
260
|
+
)
|
261
|
+
when "vietnamese"
|
262
|
+
settings[:analysis][:analyzer].merge!(
|
263
|
+
default_analyzer => {
|
264
|
+
type: "vi_analyzer"
|
265
|
+
},
|
266
|
+
searchkick_search: {
|
267
|
+
type: "vi_analyzer"
|
268
|
+
},
|
269
|
+
searchkick_search2: {
|
270
|
+
type: "vi_analyzer"
|
271
|
+
}
|
272
|
+
)
|
273
|
+
when "polish", "ukrainian"
|
274
|
+
settings[:analysis][:analyzer].merge!(
|
275
|
+
default_analyzer => {
|
276
|
+
type: language
|
277
|
+
},
|
278
|
+
searchkick_search: {
|
279
|
+
type: language
|
280
|
+
},
|
281
|
+
searchkick_search2: {
|
282
|
+
type: language
|
283
|
+
}
|
284
|
+
)
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
def update_stemming(settings)
|
289
|
+
if options[:stemmer]
|
290
|
+
stemmer = options[:stemmer]
|
291
|
+
# could also support snowball and stemmer
|
292
|
+
case stemmer[:type]
|
293
|
+
when "hunspell"
|
294
|
+
# supports all token filter options
|
295
|
+
settings[:analysis][:filter][:searchkick_stemmer] = stemmer
|
296
|
+
else
|
297
|
+
raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
|
247
298
|
end
|
299
|
+
end
|
248
300
|
|
249
|
-
|
301
|
+
stem = options[:stem]
|
250
302
|
|
251
|
-
|
252
|
-
|
303
|
+
# language analyzer used
|
304
|
+
stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
|
253
305
|
|
254
|
-
|
306
|
+
if stem == false
|
307
|
+
settings[:analysis][:filter].delete(:searchkick_stemmer)
|
308
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
309
|
+
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
|
310
|
+
end
|
311
|
+
end
|
255
312
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
# TODO use a snowball stemmer on synonyms when creating the token filter
|
265
|
-
|
266
|
-
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
267
|
-
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
268
|
-
# - Only apply the synonym expansion at index time
|
269
|
-
# - Don't have the synonym filter applied search
|
270
|
-
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
271
|
-
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
|
272
|
-
|
273
|
-
%w(word_start word_middle word_end).each do |type|
|
274
|
-
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
275
|
-
end
|
313
|
+
if options[:stemmer_override]
|
314
|
+
stemmer_override = {
|
315
|
+
type: "stemmer_override"
|
316
|
+
}
|
317
|
+
if options[:stemmer_override].is_a?(String)
|
318
|
+
stemmer_override[:rules_path] = options[:stemmer_override]
|
319
|
+
else
|
320
|
+
stemmer_override[:rules] = options[:stemmer_override]
|
276
321
|
end
|
322
|
+
settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
|
277
323
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
}
|
324
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
325
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
326
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
|
327
|
+
end
|
328
|
+
end
|
284
329
|
|
285
|
-
|
286
|
-
|
330
|
+
if options[:stem_exclusion]
|
331
|
+
settings[:analysis][:filter][:searchkick_stem_exclusion] = {
|
332
|
+
type: "keyword_marker",
|
333
|
+
keywords: options[:stem_exclusion]
|
334
|
+
}
|
287
335
|
|
288
|
-
|
289
|
-
|
290
|
-
|
336
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
337
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
338
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
|
291
339
|
end
|
340
|
+
end
|
341
|
+
end
|
292
342
|
|
293
|
-
|
294
|
-
|
295
|
-
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
296
|
-
end
|
297
|
-
end
|
343
|
+
def generate_mappings
|
344
|
+
mapping = {}
|
298
345
|
|
299
|
-
|
346
|
+
keyword_mapping = {type: "keyword"}
|
347
|
+
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
300
348
|
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
}
|
349
|
+
# conversions
|
350
|
+
Array(options[:conversions]).each do |conversions_field|
|
351
|
+
mapping[conversions_field] = {
|
352
|
+
type: "nested",
|
353
|
+
properties: {
|
354
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
355
|
+
count: {type: "integer"}
|
309
356
|
}
|
310
|
-
|
357
|
+
}
|
358
|
+
end
|
311
359
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
]
|
360
|
+
mapping_options =
|
361
|
+
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
|
362
|
+
.to_h { |type| [type, (options[type] || []).map(&:to_s)] }
|
316
363
|
|
317
|
-
|
364
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
318
365
|
|
319
|
-
|
366
|
+
mapping_options[:searchable].delete("_all")
|
320
367
|
|
321
|
-
|
368
|
+
analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
|
322
369
|
|
323
|
-
|
324
|
-
|
370
|
+
mapping_options.values.flatten.uniq.each do |field|
|
371
|
+
fields = {}
|
325
372
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
373
|
+
if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
|
374
|
+
fields[field] = {type: default_type, index: false}
|
375
|
+
else
|
376
|
+
fields[field] = keyword_mapping
|
377
|
+
end
|
331
378
|
|
332
|
-
|
333
|
-
|
334
|
-
|
379
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
380
|
+
if word
|
381
|
+
fields[:analyzed] = analyzed_field_options
|
335
382
|
|
336
|
-
|
337
|
-
|
338
|
-
end
|
383
|
+
if mapping_options[:highlight].include?(field)
|
384
|
+
fields[:analyzed][:term_vector] = "with_positions_offsets"
|
339
385
|
end
|
386
|
+
end
|
340
387
|
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
end
|
388
|
+
mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
|
389
|
+
if options[:match] == type || f.include?(field)
|
390
|
+
fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
|
345
391
|
end
|
346
392
|
end
|
347
|
-
|
348
|
-
mapping[field] = fields[field].merge(fields: fields.except(field))
|
349
393
|
end
|
350
394
|
|
351
|
-
|
352
|
-
|
353
|
-
type: "geo_point"
|
354
|
-
}
|
355
|
-
end
|
395
|
+
mapping[field] = fields[field].merge(fields: fields.except(field))
|
396
|
+
end
|
356
397
|
|
357
|
-
|
358
|
-
|
359
|
-
|
398
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
399
|
+
mapping[field] = {
|
400
|
+
type: "geo_point"
|
401
|
+
}
|
402
|
+
end
|
403
|
+
|
404
|
+
options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
|
405
|
+
(options[:geo_shape] || {}).each do |field, shape_options|
|
406
|
+
mapping[field] = shape_options.merge(type: "geo_shape")
|
407
|
+
end
|
408
|
+
|
409
|
+
if options[:inheritance]
|
410
|
+
mapping[:type] = keyword_mapping
|
411
|
+
end
|
412
|
+
|
413
|
+
routing = {}
|
414
|
+
if options[:routing]
|
415
|
+
routing = {required: true}
|
416
|
+
unless options[:routing] == true
|
417
|
+
routing[:path] = options[:routing].to_s
|
360
418
|
end
|
419
|
+
end
|
420
|
+
|
421
|
+
dynamic_fields = {
|
422
|
+
# analyzed field must be the default field for include_in_all
|
423
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
424
|
+
# however, we can include the not_analyzed field in _all
|
425
|
+
# and the _all index analyzer will take care of it
|
426
|
+
"{name}" => keyword_mapping
|
427
|
+
}
|
428
|
+
|
429
|
+
if options.key?(:filterable)
|
430
|
+
dynamic_fields["{name}"] = {type: default_type, index: false}
|
431
|
+
end
|
361
432
|
|
362
|
-
|
363
|
-
|
433
|
+
unless options[:searchable]
|
434
|
+
if options[:match] && options[:match] != :word
|
435
|
+
dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
|
364
436
|
end
|
365
437
|
|
366
|
-
|
367
|
-
|
368
|
-
routing = {required: true}
|
369
|
-
unless options[:routing] == true
|
370
|
-
routing[:path] = options[:routing].to_s
|
371
|
-
end
|
438
|
+
if word
|
439
|
+
dynamic_fields[:analyzed] = analyzed_field_options
|
372
440
|
end
|
441
|
+
end
|
442
|
+
|
443
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
444
|
+
multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
445
|
+
|
446
|
+
mappings = {
|
447
|
+
properties: mapping,
|
448
|
+
_routing: routing,
|
449
|
+
# https://gist.github.com/kimchy/2898285
|
450
|
+
dynamic_templates: [
|
451
|
+
{
|
452
|
+
string_template: {
|
453
|
+
match: "*",
|
454
|
+
match_mapping_type: "string",
|
455
|
+
mapping: multi_field
|
456
|
+
}
|
457
|
+
}
|
458
|
+
]
|
459
|
+
}
|
373
460
|
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
461
|
+
mappings
|
462
|
+
end
|
463
|
+
|
464
|
+
def add_synonyms(settings)
|
465
|
+
synonyms = options[:synonyms] || []
|
466
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
467
|
+
if synonyms.any?
|
468
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
469
|
+
type: "synonym",
|
470
|
+
# only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
|
471
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
|
380
472
|
}
|
473
|
+
# choosing a place for the synonym filter when stemming is not easy
|
474
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
475
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
476
|
+
|
477
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
478
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
479
|
+
# - Only apply the synonym expansion at index time
|
480
|
+
# - Don't have the synonym filter applied search
|
481
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
482
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
|
483
|
+
|
484
|
+
%w(word_start word_middle word_end).each do |type|
|
485
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
486
|
+
end
|
487
|
+
end
|
488
|
+
end
|
381
489
|
|
382
|
-
|
383
|
-
|
490
|
+
def add_search_synonyms(settings)
|
491
|
+
search_synonyms = options[:search_synonyms] || []
|
492
|
+
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
|
493
|
+
if search_synonyms.is_a?(String) || search_synonyms.any?
|
494
|
+
if search_synonyms.is_a?(String)
|
495
|
+
synonym_graph = {
|
496
|
+
type: "synonym_graph",
|
497
|
+
synonyms_path: search_synonyms
|
498
|
+
}
|
499
|
+
synonym_graph[:updateable] = true unless below73?
|
500
|
+
else
|
501
|
+
synonym_graph = {
|
502
|
+
type: "synonym_graph",
|
503
|
+
# TODO confirm this is correct
|
504
|
+
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
505
|
+
}
|
384
506
|
end
|
507
|
+
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
385
508
|
|
386
|
-
|
387
|
-
|
388
|
-
|
509
|
+
if ["japanese", "japanese2"].include?(options[:language])
|
510
|
+
[:searchkick_search, :searchkick_search2].each do |analyzer|
|
511
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
|
389
512
|
end
|
513
|
+
else
|
514
|
+
[:searchkick_search2, :searchkick_word_search].each do |analyzer|
|
515
|
+
unless settings[:analysis][:analyzer][analyzer].key?(:filter)
|
516
|
+
raise Searchkick::Error, "Search synonyms are not supported yet for language"
|
517
|
+
end
|
390
518
|
|
391
|
-
|
392
|
-
dynamic_fields[:analyzed] = analyzed_field_options
|
519
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
|
393
520
|
end
|
394
521
|
end
|
522
|
+
end
|
523
|
+
end
|
395
524
|
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
# https://gist.github.com/kimchy/2898285
|
403
|
-
dynamic_templates: [
|
404
|
-
{
|
405
|
-
string_template: {
|
406
|
-
match: "*",
|
407
|
-
match_mapping_type: "string",
|
408
|
-
mapping: multi_field
|
409
|
-
}
|
410
|
-
}
|
411
|
-
]
|
412
|
-
}
|
413
|
-
|
414
|
-
if below70
|
415
|
-
index_type = options[:_type]
|
416
|
-
index_type = index_type.call if index_type.respond_to?(:call)
|
417
|
-
mappings = {index_type => mappings}
|
418
|
-
end
|
525
|
+
def set_deep_paging(settings)
|
526
|
+
if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
|
527
|
+
settings[:index] ||= {}
|
528
|
+
settings[:index][:max_result_window] = 1_000_000_000
|
529
|
+
end
|
530
|
+
end
|
419
531
|
|
420
|
-
|
532
|
+
def index_type
|
533
|
+
@index_type ||= begin
|
534
|
+
index_type = options[:_type]
|
535
|
+
index_type = index_type.call if index_type.respond_to?(:call)
|
536
|
+
index_type
|
421
537
|
end
|
538
|
+
end
|
422
539
|
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
540
|
+
def default_type
|
541
|
+
"text"
|
542
|
+
end
|
543
|
+
|
544
|
+
def default_analyzer
|
545
|
+
:searchkick_index
|
546
|
+
end
|
547
|
+
|
548
|
+
def below73?
|
549
|
+
Searchkick.server_below?("7.3.0")
|
427
550
|
end
|
428
551
|
end
|
429
552
|
end
|