searchkick 2.3.2 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +251 -84
- data/LICENSE.txt +1 -1
- data/README.md +552 -432
- data/lib/searchkick/bulk_indexer.rb +173 -0
- data/lib/searchkick/bulk_reindex_job.rb +2 -2
- data/lib/searchkick/hash_wrapper.rb +12 -0
- data/lib/searchkick/index.rb +187 -348
- data/lib/searchkick/index_options.rb +494 -282
- data/lib/searchkick/logging.rb +17 -13
- data/lib/searchkick/model.rb +52 -97
- data/lib/searchkick/multi_search.rb +9 -10
- data/lib/searchkick/process_batch_job.rb +17 -4
- data/lib/searchkick/process_queue_job.rb +20 -12
- data/lib/searchkick/query.rb +415 -199
- data/lib/searchkick/railtie.rb +7 -0
- data/lib/searchkick/record_data.rb +128 -0
- data/lib/searchkick/record_indexer.rb +79 -0
- data/lib/searchkick/reindex_queue.rb +1 -1
- data/lib/searchkick/reindex_v2_job.rb +14 -12
- data/lib/searchkick/results.rb +135 -41
- data/lib/searchkick/version.rb +1 -1
- data/lib/searchkick.rb +130 -61
- data/lib/tasks/searchkick.rake +34 -0
- metadata +18 -162
- data/.gitignore +0 -22
- data/.travis.yml +0 -39
- data/Gemfile +0 -16
- data/Rakefile +0 -20
- data/benchmark/Gemfile +0 -23
- data/benchmark/benchmark.rb +0 -97
- data/lib/searchkick/tasks.rb +0 -33
- data/searchkick.gemspec +0 -28
- data/test/aggs_test.rb +0 -197
- data/test/autocomplete_test.rb +0 -75
- data/test/boost_test.rb +0 -202
- data/test/callbacks_test.rb +0 -59
- data/test/ci/before_install.sh +0 -17
- data/test/errors_test.rb +0 -19
- data/test/gemfiles/activerecord31.gemfile +0 -7
- data/test/gemfiles/activerecord32.gemfile +0 -7
- data/test/gemfiles/activerecord40.gemfile +0 -8
- data/test/gemfiles/activerecord41.gemfile +0 -8
- data/test/gemfiles/activerecord42.gemfile +0 -7
- data/test/gemfiles/activerecord50.gemfile +0 -7
- data/test/gemfiles/apartment.gemfile +0 -8
- data/test/gemfiles/cequel.gemfile +0 -8
- data/test/gemfiles/mongoid2.gemfile +0 -7
- data/test/gemfiles/mongoid3.gemfile +0 -6
- data/test/gemfiles/mongoid4.gemfile +0 -7
- data/test/gemfiles/mongoid5.gemfile +0 -7
- data/test/gemfiles/mongoid6.gemfile +0 -12
- data/test/gemfiles/nobrainer.gemfile +0 -8
- data/test/gemfiles/parallel_tests.gemfile +0 -8
- data/test/geo_shape_test.rb +0 -175
- data/test/highlight_test.rb +0 -78
- data/test/index_test.rb +0 -166
- data/test/inheritance_test.rb +0 -83
- data/test/marshal_test.rb +0 -8
- data/test/match_test.rb +0 -276
- data/test/misspellings_test.rb +0 -56
- data/test/model_test.rb +0 -42
- data/test/multi_search_test.rb +0 -36
- data/test/multi_tenancy_test.rb +0 -22
- data/test/order_test.rb +0 -46
- data/test/pagination_test.rb +0 -70
- data/test/partial_reindex_test.rb +0 -58
- data/test/query_test.rb +0 -35
- data/test/records_test.rb +0 -10
- data/test/reindex_test.rb +0 -64
- data/test/reindex_v2_job_test.rb +0 -32
- data/test/routing_test.rb +0 -23
- data/test/should_index_test.rb +0 -32
- data/test/similar_test.rb +0 -28
- data/test/sql_test.rb +0 -214
- data/test/suggest_test.rb +0 -95
- data/test/support/kaminari.yml +0 -21
- data/test/synonyms_test.rb +0 -67
- data/test/test_helper.rb +0 -567
- data/test/where_test.rb +0 -223
@@ -1,346 +1,558 @@
|
|
1
1
|
module Searchkick
|
2
|
-
|
2
|
+
class IndexOptions
|
3
|
+
attr_reader :options
|
4
|
+
|
5
|
+
def initialize(index)
|
6
|
+
@options = index.options
|
7
|
+
end
|
8
|
+
|
3
9
|
def index_options
|
4
|
-
|
5
|
-
|
6
|
-
|
10
|
+
custom_mapping = options[:mappings] || {}
|
11
|
+
if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
|
12
|
+
# add type
|
13
|
+
custom_mapping = {index_type => custom_mapping}
|
14
|
+
end
|
7
15
|
|
8
16
|
if options[:mappings] && !options[:merge_mappings]
|
9
17
|
settings = options[:settings] || {}
|
10
|
-
mappings =
|
18
|
+
mappings = custom_mapping
|
11
19
|
else
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
default_type = below50 ? "string" : "text"
|
16
|
-
default_analyzer = :searchkick_index
|
17
|
-
keyword_mapping =
|
18
|
-
if below50
|
19
|
-
{
|
20
|
-
type: default_type,
|
21
|
-
index: "not_analyzed"
|
22
|
-
}
|
23
|
-
else
|
24
|
-
{
|
25
|
-
type: "keyword"
|
26
|
-
}
|
27
|
-
end
|
20
|
+
settings = generate_settings
|
21
|
+
mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
|
22
|
+
end
|
28
23
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
searchkick_word_start_index: {
|
97
|
-
type: "custom",
|
98
|
-
tokenizer: "standard",
|
99
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
100
|
-
},
|
101
|
-
searchkick_word_middle_index: {
|
102
|
-
type: "custom",
|
103
|
-
tokenizer: "standard",
|
104
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
105
|
-
},
|
106
|
-
searchkick_word_end_index: {
|
107
|
-
type: "custom",
|
108
|
-
tokenizer: "standard",
|
109
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
110
|
-
}
|
24
|
+
set_deep_paging(settings) if options[:deep_paging]
|
25
|
+
|
26
|
+
{
|
27
|
+
settings: settings,
|
28
|
+
mappings: mappings
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate_settings
|
33
|
+
language = options[:language]
|
34
|
+
language = language.call if language.respond_to?(:call)
|
35
|
+
|
36
|
+
settings = {
|
37
|
+
analysis: {
|
38
|
+
analyzer: {
|
39
|
+
searchkick_keyword: {
|
40
|
+
type: "custom",
|
41
|
+
tokenizer: "keyword",
|
42
|
+
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
43
|
+
},
|
44
|
+
default_analyzer => {
|
45
|
+
type: "custom",
|
46
|
+
# character filters -> tokenizer -> token filters
|
47
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
48
|
+
char_filter: ["ampersand"],
|
49
|
+
tokenizer: "standard",
|
50
|
+
# synonym should come last, after stemming and shingle
|
51
|
+
# shingle must come before searchkick_stemmer
|
52
|
+
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
53
|
+
},
|
54
|
+
searchkick_search: {
|
55
|
+
type: "custom",
|
56
|
+
char_filter: ["ampersand"],
|
57
|
+
tokenizer: "standard",
|
58
|
+
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
59
|
+
},
|
60
|
+
searchkick_search2: {
|
61
|
+
type: "custom",
|
62
|
+
char_filter: ["ampersand"],
|
63
|
+
tokenizer: "standard",
|
64
|
+
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
65
|
+
},
|
66
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
67
|
+
searchkick_autocomplete_search: {
|
68
|
+
type: "custom",
|
69
|
+
tokenizer: "keyword",
|
70
|
+
filter: ["lowercase", "asciifolding"]
|
71
|
+
},
|
72
|
+
searchkick_word_search: {
|
73
|
+
type: "custom",
|
74
|
+
tokenizer: "standard",
|
75
|
+
filter: ["lowercase", "asciifolding"]
|
76
|
+
},
|
77
|
+
searchkick_suggest_index: {
|
78
|
+
type: "custom",
|
79
|
+
tokenizer: "standard",
|
80
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
81
|
+
},
|
82
|
+
searchkick_text_start_index: {
|
83
|
+
type: "custom",
|
84
|
+
tokenizer: "keyword",
|
85
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
86
|
+
},
|
87
|
+
searchkick_text_middle_index: {
|
88
|
+
type: "custom",
|
89
|
+
tokenizer: "keyword",
|
90
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
111
91
|
},
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
},
|
117
|
-
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
118
|
-
searchkick_search_shingle: {
|
119
|
-
type: "shingle",
|
120
|
-
token_separator: "",
|
121
|
-
output_unigrams: false,
|
122
|
-
output_unigrams_if_no_shingles: true
|
123
|
-
},
|
124
|
-
searchkick_suggest_shingle: {
|
125
|
-
type: "shingle",
|
126
|
-
max_shingle_size: 5
|
127
|
-
},
|
128
|
-
searchkick_edge_ngram: {
|
129
|
-
type: "edgeNGram",
|
130
|
-
min_gram: 1,
|
131
|
-
max_gram: 50
|
132
|
-
},
|
133
|
-
searchkick_ngram: {
|
134
|
-
type: "nGram",
|
135
|
-
min_gram: 1,
|
136
|
-
max_gram: 50
|
137
|
-
},
|
138
|
-
searchkick_stemmer: {
|
139
|
-
# use stemmer if language is lowercase, snowball otherwise
|
140
|
-
# TODO deprecate language option in favor of stemmer
|
141
|
-
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
142
|
-
language: language || "English"
|
143
|
-
}
|
92
|
+
searchkick_text_end_index: {
|
93
|
+
type: "custom",
|
94
|
+
tokenizer: "keyword",
|
95
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
144
96
|
},
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
97
|
+
searchkick_word_start_index: {
|
98
|
+
type: "custom",
|
99
|
+
tokenizer: "standard",
|
100
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
101
|
+
},
|
102
|
+
searchkick_word_middle_index: {
|
103
|
+
type: "custom",
|
104
|
+
tokenizer: "standard",
|
105
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
106
|
+
},
|
107
|
+
searchkick_word_end_index: {
|
108
|
+
type: "custom",
|
109
|
+
tokenizer: "standard",
|
110
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
111
|
+
}
|
112
|
+
},
|
113
|
+
filter: {
|
114
|
+
searchkick_index_shingle: {
|
115
|
+
type: "shingle",
|
116
|
+
token_separator: ""
|
117
|
+
},
|
118
|
+
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
119
|
+
searchkick_search_shingle: {
|
120
|
+
type: "shingle",
|
121
|
+
token_separator: "",
|
122
|
+
output_unigrams: false,
|
123
|
+
output_unigrams_if_no_shingles: true
|
124
|
+
},
|
125
|
+
searchkick_suggest_shingle: {
|
126
|
+
type: "shingle",
|
127
|
+
max_shingle_size: 5
|
128
|
+
},
|
129
|
+
searchkick_edge_ngram: {
|
130
|
+
type: "edge_ngram",
|
131
|
+
min_gram: 1,
|
132
|
+
max_gram: 50
|
133
|
+
},
|
134
|
+
searchkick_ngram: {
|
135
|
+
type: "ngram",
|
136
|
+
min_gram: 1,
|
137
|
+
max_gram: 50
|
138
|
+
},
|
139
|
+
searchkick_stemmer: {
|
140
|
+
# use stemmer if language is lowercase, snowball otherwise
|
141
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
142
|
+
language: language || "English"
|
143
|
+
}
|
144
|
+
},
|
145
|
+
char_filter: {
|
146
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
147
|
+
# &_to_and
|
148
|
+
ampersand: {
|
149
|
+
type: "mapping",
|
150
|
+
mappings: ["&=> and "]
|
152
151
|
}
|
153
152
|
}
|
154
153
|
}
|
154
|
+
}
|
155
155
|
|
156
|
-
|
157
|
-
|
158
|
-
settings[:number_of_replicas] = 0
|
159
|
-
end
|
156
|
+
update_language(settings, language)
|
157
|
+
update_stemming(settings)
|
160
158
|
|
161
|
-
|
162
|
-
|
163
|
-
|
159
|
+
if Searchkick.env == "test"
|
160
|
+
settings[:number_of_shards] = 1
|
161
|
+
settings[:number_of_replicas] = 0
|
162
|
+
end
|
163
|
+
|
164
|
+
# TODO remove in Searchkick 5 (classic no longer supported)
|
165
|
+
if options[:similarity]
|
166
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
167
|
+
end
|
168
|
+
|
169
|
+
unless below62?
|
170
|
+
settings[:index] = {
|
171
|
+
max_ngram_diff: 49,
|
172
|
+
max_shingle_diff: 4
|
173
|
+
}
|
174
|
+
end
|
164
175
|
|
165
|
-
|
176
|
+
if options[:case_sensitive]
|
177
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
178
|
+
analyzer[:filter].delete("lowercase")
|
179
|
+
end
|
180
|
+
end
|
166
181
|
|
167
|
-
|
168
|
-
|
182
|
+
# TODO do this last in Searchkick 5
|
183
|
+
settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
|
169
184
|
|
170
|
-
|
185
|
+
add_synonyms(settings)
|
186
|
+
add_search_synonyms(settings)
|
187
|
+
# TODO remove in Searchkick 5
|
188
|
+
add_wordnet(settings) if options[:wordnet]
|
171
189
|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
176
|
-
}
|
177
|
-
# choosing a place for the synonym filter when stemming is not easy
|
178
|
-
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
179
|
-
# TODO use a snowball stemmer on synonyms when creating the token filter
|
180
|
-
|
181
|
-
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
182
|
-
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
183
|
-
# - Only apply the synonym expansion at index time
|
184
|
-
# - Don't have the synonym filter applied search
|
185
|
-
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
186
|
-
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") if below60
|
187
|
-
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
|
188
|
-
|
189
|
-
%w(word_start word_middle word_end).each do |type|
|
190
|
-
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
191
|
-
end
|
190
|
+
if options[:special_characters] == false
|
191
|
+
settings[:analysis][:analyzer].each_value do |analyzer_settings|
|
192
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
192
193
|
end
|
194
|
+
end
|
193
195
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
196
|
+
settings
|
197
|
+
end
|
198
|
+
|
199
|
+
def update_language(settings, language)
|
200
|
+
case language
|
201
|
+
when "chinese"
|
202
|
+
settings[:analysis][:analyzer].merge!(
|
203
|
+
default_analyzer => {
|
204
|
+
type: "ik_smart"
|
205
|
+
},
|
206
|
+
searchkick_search: {
|
207
|
+
type: "ik_smart"
|
208
|
+
},
|
209
|
+
searchkick_search2: {
|
210
|
+
type: "ik_max_word"
|
211
|
+
}
|
212
|
+
)
|
213
|
+
when "chinese2", "smartcn"
|
214
|
+
settings[:analysis][:analyzer].merge!(
|
215
|
+
default_analyzer => {
|
216
|
+
type: "smartcn"
|
217
|
+
},
|
218
|
+
searchkick_search: {
|
219
|
+
type: "smartcn"
|
220
|
+
},
|
221
|
+
searchkick_search2: {
|
222
|
+
type: "smartcn"
|
223
|
+
}
|
224
|
+
)
|
225
|
+
when "japanese"
|
226
|
+
settings[:analysis][:analyzer].merge!(
|
227
|
+
default_analyzer => {
|
228
|
+
type: "kuromoji"
|
229
|
+
},
|
230
|
+
searchkick_search: {
|
231
|
+
type: "kuromoji"
|
232
|
+
},
|
233
|
+
searchkick_search2: {
|
234
|
+
type: "kuromoji"
|
199
235
|
}
|
236
|
+
)
|
237
|
+
when "korean"
|
238
|
+
settings[:analysis][:analyzer].merge!(
|
239
|
+
default_analyzer => {
|
240
|
+
type: "openkoreantext-analyzer"
|
241
|
+
},
|
242
|
+
searchkick_search: {
|
243
|
+
type: "openkoreantext-analyzer"
|
244
|
+
},
|
245
|
+
searchkick_search2: {
|
246
|
+
type: "openkoreantext-analyzer"
|
247
|
+
}
|
248
|
+
)
|
249
|
+
when "korean2"
|
250
|
+
settings[:analysis][:analyzer].merge!(
|
251
|
+
default_analyzer => {
|
252
|
+
type: "nori"
|
253
|
+
},
|
254
|
+
searchkick_search: {
|
255
|
+
type: "nori"
|
256
|
+
},
|
257
|
+
searchkick_search2: {
|
258
|
+
type: "nori"
|
259
|
+
}
|
260
|
+
)
|
261
|
+
when "vietnamese"
|
262
|
+
settings[:analysis][:analyzer].merge!(
|
263
|
+
default_analyzer => {
|
264
|
+
type: "vi_analyzer"
|
265
|
+
},
|
266
|
+
searchkick_search: {
|
267
|
+
type: "vi_analyzer"
|
268
|
+
},
|
269
|
+
searchkick_search2: {
|
270
|
+
type: "vi_analyzer"
|
271
|
+
}
|
272
|
+
)
|
273
|
+
when "polish", "ukrainian"
|
274
|
+
settings[:analysis][:analyzer].merge!(
|
275
|
+
default_analyzer => {
|
276
|
+
type: language
|
277
|
+
},
|
278
|
+
searchkick_search: {
|
279
|
+
type: language
|
280
|
+
},
|
281
|
+
searchkick_search2: {
|
282
|
+
type: language
|
283
|
+
}
|
284
|
+
)
|
285
|
+
end
|
286
|
+
end
|
200
287
|
|
201
|
-
|
202
|
-
|
288
|
+
def update_stemming(settings)
|
289
|
+
stem = options[:stem]
|
203
290
|
|
204
|
-
|
205
|
-
|
206
|
-
end
|
207
|
-
end
|
291
|
+
# language analyzer used
|
292
|
+
stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
|
208
293
|
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
294
|
+
if stem == false
|
295
|
+
settings[:analysis][:filter].delete(:searchkick_stemmer)
|
296
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
297
|
+
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
|
213
298
|
end
|
299
|
+
end
|
214
300
|
|
215
|
-
|
301
|
+
if options[:stemmer_override]
|
302
|
+
stemmer_override = {
|
303
|
+
type: "stemmer_override"
|
304
|
+
}
|
305
|
+
if options[:stemmer_override].is_a?(String)
|
306
|
+
stemmer_override[:rules_path] = options[:stemmer_override]
|
307
|
+
else
|
308
|
+
stemmer_override[:rules] = options[:stemmer_override]
|
309
|
+
end
|
310
|
+
settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
|
216
311
|
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
type: "nested",
|
221
|
-
properties: {
|
222
|
-
query: {type: default_type, analyzer: "searchkick_keyword"},
|
223
|
-
count: {type: "integer"}
|
224
|
-
}
|
225
|
-
}
|
312
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
313
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
314
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
|
226
315
|
end
|
316
|
+
end
|
227
317
|
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
318
|
+
if options[:stem_exclusion]
|
319
|
+
settings[:analysis][:filter][:searchkick_stem_exclusion] = {
|
320
|
+
type: "keyword_marker",
|
321
|
+
keywords: options[:stem_exclusion]
|
322
|
+
}
|
323
|
+
|
324
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
325
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
326
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
232
330
|
|
233
|
-
|
331
|
+
def generate_mappings
|
332
|
+
mapping = {}
|
234
333
|
|
235
|
-
|
334
|
+
keyword_mapping = {type: "keyword"}
|
335
|
+
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
236
336
|
|
237
|
-
|
337
|
+
# conversions
|
338
|
+
Array(options[:conversions]).each do |conversions_field|
|
339
|
+
mapping[conversions_field] = {
|
340
|
+
type: "nested",
|
341
|
+
properties: {
|
342
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
343
|
+
count: {type: "integer"}
|
344
|
+
}
|
345
|
+
}
|
346
|
+
end
|
238
347
|
|
239
|
-
|
240
|
-
|
348
|
+
mapping_options = Hash[
|
349
|
+
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
|
350
|
+
.map { |type| [type, (options[type] || []).map(&:to_s)] }
|
351
|
+
]
|
241
352
|
|
242
|
-
|
243
|
-
fields[field] = {type: default_type, index: index_false_value}
|
244
|
-
else
|
245
|
-
fields[field] = keyword_mapping
|
246
|
-
end
|
353
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
247
354
|
|
248
|
-
|
249
|
-
if word
|
250
|
-
fields["analyzed"] = analyzed_field_options
|
355
|
+
mapping_options[:searchable].delete("_all")
|
251
356
|
|
252
|
-
|
253
|
-
fields["analyzed"][:term_vector] = "with_positions_offsets"
|
254
|
-
end
|
255
|
-
end
|
357
|
+
analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
|
256
358
|
|
257
|
-
|
258
|
-
|
259
|
-
fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
|
260
|
-
end
|
261
|
-
end
|
262
|
-
end
|
359
|
+
mapping_options.values.flatten.uniq.each do |field|
|
360
|
+
fields = {}
|
263
361
|
|
264
|
-
|
362
|
+
if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
|
363
|
+
fields[field] = {type: default_type, index: false}
|
364
|
+
else
|
365
|
+
fields[field] = keyword_mapping
|
265
366
|
end
|
266
367
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
}
|
271
|
-
end
|
368
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
369
|
+
if word
|
370
|
+
fields[:analyzed] = analyzed_field_options
|
272
371
|
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
372
|
+
if mapping_options[:highlight].include?(field)
|
373
|
+
fields[:analyzed][:term_vector] = "with_positions_offsets"
|
374
|
+
end
|
375
|
+
end
|
277
376
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
routing[:path] = options[:routing].to_s
|
377
|
+
mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
|
378
|
+
if options[:match] == type || f.include?(field)
|
379
|
+
fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
|
380
|
+
end
|
283
381
|
end
|
284
382
|
end
|
285
383
|
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
"
|
384
|
+
mapping[field] = fields[field].merge(fields: fields.except(field))
|
385
|
+
end
|
386
|
+
|
387
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
388
|
+
mapping[field] = {
|
389
|
+
type: "geo_point"
|
292
390
|
}
|
391
|
+
end
|
293
392
|
|
294
|
-
|
295
|
-
|
296
|
-
|
393
|
+
options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
|
394
|
+
(options[:geo_shape] || {}).each do |field, shape_options|
|
395
|
+
mapping[field] = shape_options.merge(type: "geo_shape")
|
396
|
+
end
|
397
|
+
|
398
|
+
if options[:inheritance]
|
399
|
+
mapping[:type] = keyword_mapping
|
400
|
+
end
|
297
401
|
|
298
|
-
|
299
|
-
|
402
|
+
routing = {}
|
403
|
+
if options[:routing]
|
404
|
+
routing = {required: true}
|
405
|
+
unless options[:routing] == true
|
406
|
+
routing[:path] = options[:routing].to_s
|
300
407
|
end
|
408
|
+
end
|
301
409
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
410
|
+
dynamic_fields = {
|
411
|
+
# analyzed field must be the default field for include_in_all
|
412
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
413
|
+
# however, we can include the not_analyzed field in _all
|
414
|
+
# and the _all index analyzer will take care of it
|
415
|
+
"{name}" => keyword_mapping
|
416
|
+
}
|
306
417
|
|
307
|
-
|
308
|
-
|
309
|
-
|
418
|
+
if options.key?(:filterable)
|
419
|
+
dynamic_fields["{name}"] = {type: default_type, index: false}
|
420
|
+
end
|
421
|
+
|
422
|
+
unless options[:searchable]
|
423
|
+
if options[:match] && options[:match] != :word
|
424
|
+
dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
|
310
425
|
end
|
311
426
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
427
|
+
if word
|
428
|
+
dynamic_fields[:analyzed] = analyzed_field_options
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
433
|
+
multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
434
|
+
|
435
|
+
mappings = {
|
436
|
+
properties: mapping,
|
437
|
+
_routing: routing,
|
438
|
+
# https://gist.github.com/kimchy/2898285
|
439
|
+
dynamic_templates: [
|
440
|
+
{
|
441
|
+
string_template: {
|
442
|
+
match: "*",
|
443
|
+
match_mapping_type: "string",
|
444
|
+
mapping: multi_field
|
445
|
+
}
|
329
446
|
}
|
447
|
+
]
|
448
|
+
}
|
449
|
+
|
450
|
+
if below70?
|
451
|
+
mappings = {index_type => mappings}
|
452
|
+
end
|
453
|
+
|
454
|
+
mappings
|
455
|
+
end
|
456
|
+
|
457
|
+
def add_synonyms(settings)
|
458
|
+
synonyms = options[:synonyms] || []
|
459
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
460
|
+
if synonyms.any?
|
461
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
462
|
+
type: "synonym",
|
463
|
+
# only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
|
464
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
|
330
465
|
}
|
466
|
+
# choosing a place for the synonym filter when stemming is not easy
|
467
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
468
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
469
|
+
|
470
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
471
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
472
|
+
# - Only apply the synonym expansion at index time
|
473
|
+
# - Don't have the synonym filter applied search
|
474
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
475
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
|
476
|
+
|
477
|
+
%w(word_start word_middle word_end).each do |type|
|
478
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
331
482
|
|
332
|
-
|
333
|
-
|
334
|
-
|
483
|
+
def add_search_synonyms(settings)
|
484
|
+
search_synonyms = options[:search_synonyms] || []
|
485
|
+
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
|
486
|
+
if search_synonyms.is_a?(String) || search_synonyms.any?
|
487
|
+
if search_synonyms.is_a?(String)
|
488
|
+
synonym_graph = {
|
489
|
+
type: "synonym_graph",
|
490
|
+
synonyms_path: search_synonyms
|
491
|
+
}
|
492
|
+
synonym_graph[:updateable] = true unless below73?
|
493
|
+
else
|
494
|
+
synonym_graph = {
|
495
|
+
type: "synonym_graph",
|
496
|
+
# TODO confirm this is correct
|
497
|
+
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
498
|
+
}
|
335
499
|
end
|
500
|
+
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
336
501
|
|
337
|
-
|
502
|
+
[:searchkick_search2, :searchkick_word_search].each do |analyzer|
|
503
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
|
504
|
+
end
|
338
505
|
end
|
506
|
+
end
|
339
507
|
|
340
|
-
|
341
|
-
|
342
|
-
|
508
|
+
def add_wordnet(settings)
|
509
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
510
|
+
type: "synonym",
|
511
|
+
format: "wordnet",
|
512
|
+
synonyms_path: Searchkick.wordnet_path
|
343
513
|
}
|
514
|
+
|
515
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
|
516
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
|
517
|
+
|
518
|
+
%w(word_start word_middle word_end).each do |type|
|
519
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
def set_deep_paging(settings)
|
524
|
+
if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
|
525
|
+
settings[:index] ||= {}
|
526
|
+
settings[:index][:max_result_window] = 1_000_000_000
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
def index_type
|
531
|
+
@index_type ||= begin
|
532
|
+
index_type = options[:_type]
|
533
|
+
index_type = index_type.call if index_type.respond_to?(:call)
|
534
|
+
index_type
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
def default_type
|
539
|
+
"text"
|
540
|
+
end
|
541
|
+
|
542
|
+
def default_analyzer
|
543
|
+
:searchkick_index
|
544
|
+
end
|
545
|
+
|
546
|
+
def below62?
|
547
|
+
Searchkick.server_below?("6.2.0")
|
548
|
+
end
|
549
|
+
|
550
|
+
def below70?
|
551
|
+
Searchkick.server_below?("7.0.0")
|
552
|
+
end
|
553
|
+
|
554
|
+
def below73?
|
555
|
+
Searchkick.server_below?("7.3.0")
|
344
556
|
end
|
345
557
|
end
|
346
558
|
end
|