searchkick 2.3.2 → 4.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -1,346 +1,558 @@
1
1
  module Searchkick
2
- module IndexOptions
2
+ class IndexOptions
3
+ attr_reader :options
4
+
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
8
+
3
9
  def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
10
+ custom_mapping = options[:mappings] || {}
11
+ if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
12
+ # add type
13
+ custom_mapping = {index_type => custom_mapping}
14
+ end
7
15
 
8
16
  if options[:mappings] && !options[:merge_mappings]
9
17
  settings = options[:settings] || {}
10
- mappings = options[:mappings]
18
+ mappings = custom_mapping
11
19
  else
12
- below22 = Searchkick.server_below?("2.2.0")
13
- below50 = Searchkick.server_below?("5.0.0-alpha1")
14
- below60 = Searchkick.server_below?("6.0.0-alpha1")
15
- default_type = below50 ? "string" : "text"
16
- default_analyzer = :searchkick_index
17
- keyword_mapping =
18
- if below50
19
- {
20
- type: default_type,
21
- index: "not_analyzed"
22
- }
23
- else
24
- {
25
- type: "keyword"
26
- }
27
- end
20
+ settings = generate_settings
21
+ mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
22
+ end
28
23
 
29
- all = options.key?(:_all) ? options[:_all] : below60
30
- index_true_value = below50 ? "analyzed" : true
31
- index_false_value = below50 ? "no" : false
32
-
33
- keyword_mapping[:ignore_above] = (options[:ignore_above] || 30000) unless below22
34
-
35
- settings = {
36
- analysis: {
37
- analyzer: {
38
- searchkick_keyword: {
39
- type: "custom",
40
- tokenizer: "keyword",
41
- filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
42
- },
43
- default_analyzer => {
44
- type: "custom",
45
- # character filters -> tokenizer -> token filters
46
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
47
- char_filter: ["ampersand"],
48
- tokenizer: "standard",
49
- # synonym should come last, after stemming and shingle
50
- # shingle must come before searchkick_stemmer
51
- filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
52
- },
53
- searchkick_search: {
54
- type: "custom",
55
- char_filter: ["ampersand"],
56
- tokenizer: "standard",
57
- filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
58
- },
59
- searchkick_search2: {
60
- type: "custom",
61
- char_filter: ["ampersand"],
62
- tokenizer: "standard",
63
- filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
64
- },
65
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
66
- searchkick_autocomplete_search: {
67
- type: "custom",
68
- tokenizer: "keyword",
69
- filter: ["lowercase", "asciifolding"]
70
- },
71
- searchkick_word_search: {
72
- type: "custom",
73
- tokenizer: "standard",
74
- filter: ["lowercase", "asciifolding"]
75
- },
76
- searchkick_suggest_index: {
77
- type: "custom",
78
- tokenizer: "standard",
79
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
80
- },
81
- searchkick_text_start_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
85
- },
86
- searchkick_text_middle_index: {
87
- type: "custom",
88
- tokenizer: "keyword",
89
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
90
- },
91
- searchkick_text_end_index: {
92
- type: "custom",
93
- tokenizer: "keyword",
94
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
95
- },
96
- searchkick_word_start_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
100
- },
101
- searchkick_word_middle_index: {
102
- type: "custom",
103
- tokenizer: "standard",
104
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
105
- },
106
- searchkick_word_end_index: {
107
- type: "custom",
108
- tokenizer: "standard",
109
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
110
- }
24
+ set_deep_paging(settings) if options[:deep_paging]
25
+
26
+ {
27
+ settings: settings,
28
+ mappings: mappings
29
+ }
30
+ end
31
+
32
+ def generate_settings
33
+ language = options[:language]
34
+ language = language.call if language.respond_to?(:call)
35
+
36
+ settings = {
37
+ analysis: {
38
+ analyzer: {
39
+ searchkick_keyword: {
40
+ type: "custom",
41
+ tokenizer: "keyword",
42
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
43
+ },
44
+ default_analyzer => {
45
+ type: "custom",
46
+ # character filters -> tokenizer -> token filters
47
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
48
+ char_filter: ["ampersand"],
49
+ tokenizer: "standard",
50
+ # synonym should come last, after stemming and shingle
51
+ # shingle must come before searchkick_stemmer
52
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
53
+ },
54
+ searchkick_search: {
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
59
+ },
60
+ searchkick_search2: {
61
+ type: "custom",
62
+ char_filter: ["ampersand"],
63
+ tokenizer: "standard",
64
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
65
+ },
66
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
67
+ searchkick_autocomplete_search: {
68
+ type: "custom",
69
+ tokenizer: "keyword",
70
+ filter: ["lowercase", "asciifolding"]
71
+ },
72
+ searchkick_word_search: {
73
+ type: "custom",
74
+ tokenizer: "standard",
75
+ filter: ["lowercase", "asciifolding"]
76
+ },
77
+ searchkick_suggest_index: {
78
+ type: "custom",
79
+ tokenizer: "standard",
80
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
81
+ },
82
+ searchkick_text_start_index: {
83
+ type: "custom",
84
+ tokenizer: "keyword",
85
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
86
+ },
87
+ searchkick_text_middle_index: {
88
+ type: "custom",
89
+ tokenizer: "keyword",
90
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
111
91
  },
112
- filter: {
113
- searchkick_index_shingle: {
114
- type: "shingle",
115
- token_separator: ""
116
- },
117
- # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
118
- searchkick_search_shingle: {
119
- type: "shingle",
120
- token_separator: "",
121
- output_unigrams: false,
122
- output_unigrams_if_no_shingles: true
123
- },
124
- searchkick_suggest_shingle: {
125
- type: "shingle",
126
- max_shingle_size: 5
127
- },
128
- searchkick_edge_ngram: {
129
- type: "edgeNGram",
130
- min_gram: 1,
131
- max_gram: 50
132
- },
133
- searchkick_ngram: {
134
- type: "nGram",
135
- min_gram: 1,
136
- max_gram: 50
137
- },
138
- searchkick_stemmer: {
139
- # use stemmer if language is lowercase, snowball otherwise
140
- # TODO deprecate language option in favor of stemmer
141
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
- language: language || "English"
143
- }
92
+ searchkick_text_end_index: {
93
+ type: "custom",
94
+ tokenizer: "keyword",
95
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
144
96
  },
145
- char_filter: {
146
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
- # &_to_and
148
- ampersand: {
149
- type: "mapping",
150
- mappings: ["&=> and "]
151
- }
97
+ searchkick_word_start_index: {
98
+ type: "custom",
99
+ tokenizer: "standard",
100
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
101
+ },
102
+ searchkick_word_middle_index: {
103
+ type: "custom",
104
+ tokenizer: "standard",
105
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
106
+ },
107
+ searchkick_word_end_index: {
108
+ type: "custom",
109
+ tokenizer: "standard",
110
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
111
+ }
112
+ },
113
+ filter: {
114
+ searchkick_index_shingle: {
115
+ type: "shingle",
116
+ token_separator: ""
117
+ },
118
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
119
+ searchkick_search_shingle: {
120
+ type: "shingle",
121
+ token_separator: "",
122
+ output_unigrams: false,
123
+ output_unigrams_if_no_shingles: true
124
+ },
125
+ searchkick_suggest_shingle: {
126
+ type: "shingle",
127
+ max_shingle_size: 5
128
+ },
129
+ searchkick_edge_ngram: {
130
+ type: "edge_ngram",
131
+ min_gram: 1,
132
+ max_gram: 50
133
+ },
134
+ searchkick_ngram: {
135
+ type: "ngram",
136
+ min_gram: 1,
137
+ max_gram: 50
138
+ },
139
+ searchkick_stemmer: {
140
+ # use stemmer if language is lowercase, snowball otherwise
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
152
151
  }
153
152
  }
154
153
  }
154
+ }
155
155
 
156
- if Searchkick.env == "test"
157
- settings[:number_of_shards] = 1
158
- settings[:number_of_replicas] = 0
159
- end
156
+ update_language(settings, language)
157
+ update_stemming(settings)
160
158
 
161
- if options[:similarity]
162
- settings[:similarity] = {default: {type: options[:similarity]}}
163
- end
159
+ if Searchkick.env == "test"
160
+ settings[:number_of_shards] = 1
161
+ settings[:number_of_replicas] = 0
162
+ end
163
+
164
+ # TODO remove in Searchkick 5 (classic no longer supported)
165
+ if options[:similarity]
166
+ settings[:similarity] = {default: {type: options[:similarity]}}
167
+ end
168
+
169
+ unless below62?
170
+ settings[:index] = {
171
+ max_ngram_diff: 49,
172
+ max_shingle_diff: 4
173
+ }
174
+ end
164
175
 
165
- settings.deep_merge!(options[:settings] || {})
176
+ if options[:case_sensitive]
177
+ settings[:analysis][:analyzer].each do |_, analyzer|
178
+ analyzer[:filter].delete("lowercase")
179
+ end
180
+ end
166
181
 
167
- # synonyms
168
- synonyms = options[:synonyms] || []
182
+ # TODO do this last in Searchkick 5
183
+ settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
169
184
 
170
- synonyms = synonyms.call if synonyms.respond_to?(:call)
185
+ add_synonyms(settings)
186
+ add_search_synonyms(settings)
187
+ # TODO remove in Searchkick 5
188
+ add_wordnet(settings) if options[:wordnet]
171
189
 
172
- if synonyms.any?
173
- settings[:analysis][:filter][:searchkick_synonym] = {
174
- type: "synonym",
175
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
176
- }
177
- # choosing a place for the synonym filter when stemming is not easy
178
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
179
- # TODO use a snowball stemmer on synonyms when creating the token filter
180
-
181
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
182
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
183
- # - Only apply the synonym expansion at index time
184
- # - Don't have the synonym filter applied search
185
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
186
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") if below60
187
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
188
-
189
- %w(word_start word_middle word_end).each do |type|
190
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
191
- end
190
+ if options[:special_characters] == false
191
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
192
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
192
193
  end
194
+ end
193
195
 
194
- if options[:wordnet]
195
- settings[:analysis][:filter][:searchkick_wordnet] = {
196
- type: "synonym",
197
- format: "wordnet",
198
- synonyms_path: Searchkick.wordnet_path
196
+ settings
197
+ end
198
+
199
+ def update_language(settings, language)
200
+ case language
201
+ when "chinese"
202
+ settings[:analysis][:analyzer].merge!(
203
+ default_analyzer => {
204
+ type: "ik_smart"
205
+ },
206
+ searchkick_search: {
207
+ type: "ik_smart"
208
+ },
209
+ searchkick_search2: {
210
+ type: "ik_max_word"
211
+ }
212
+ )
213
+ when "chinese2", "smartcn"
214
+ settings[:analysis][:analyzer].merge!(
215
+ default_analyzer => {
216
+ type: "smartcn"
217
+ },
218
+ searchkick_search: {
219
+ type: "smartcn"
220
+ },
221
+ searchkick_search2: {
222
+ type: "smartcn"
223
+ }
224
+ )
225
+ when "japanese"
226
+ settings[:analysis][:analyzer].merge!(
227
+ default_analyzer => {
228
+ type: "kuromoji"
229
+ },
230
+ searchkick_search: {
231
+ type: "kuromoji"
232
+ },
233
+ searchkick_search2: {
234
+ type: "kuromoji"
199
235
  }
236
+ )
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
200
287
 
201
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
202
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
288
+ def update_stemming(settings)
289
+ stem = options[:stem]
203
290
 
204
- %w(word_start word_middle word_end).each do |type|
205
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
206
- end
207
- end
291
+ # language analyzer used
292
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
208
293
 
209
- if options[:special_characters] == false
210
- settings[:analysis][:analyzer].each do |_, analyzer_settings|
211
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
212
- end
294
+ if stem == false
295
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
296
+ settings[:analysis][:analyzer].each do |_, analyzer|
297
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
213
298
  end
299
+ end
214
300
 
215
- mapping = {}
301
+ if options[:stemmer_override]
302
+ stemmer_override = {
303
+ type: "stemmer_override"
304
+ }
305
+ if options[:stemmer_override].is_a?(String)
306
+ stemmer_override[:rules_path] = options[:stemmer_override]
307
+ else
308
+ stemmer_override[:rules] = options[:stemmer_override]
309
+ end
310
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
216
311
 
217
- # conversions
218
- Array(options[:conversions]).each do |conversions_field|
219
- mapping[conversions_field] = {
220
- type: "nested",
221
- properties: {
222
- query: {type: default_type, analyzer: "searchkick_keyword"},
223
- count: {type: "integer"}
224
- }
225
- }
312
+ settings[:analysis][:analyzer].each do |_, analyzer|
313
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
314
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
226
315
  end
316
+ end
227
317
 
228
- mapping_options = Hash[
229
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
230
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
231
- ]
318
+ if options[:stem_exclusion]
319
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
320
+ type: "keyword_marker",
321
+ keywords: options[:stem_exclusion]
322
+ }
323
+
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
327
+ end
328
+ end
329
+ end
232
330
 
233
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
331
+ def generate_mappings
332
+ mapping = {}
234
333
 
235
- mapping_options[:searchable].delete("_all")
334
+ keyword_mapping = {type: "keyword"}
335
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
236
336
 
237
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
337
+ # conversions
338
+ Array(options[:conversions]).each do |conversions_field|
339
+ mapping[conversions_field] = {
340
+ type: "nested",
341
+ properties: {
342
+ query: {type: default_type, analyzer: "searchkick_keyword"},
343
+ count: {type: "integer"}
344
+ }
345
+ }
346
+ end
238
347
 
239
- mapping_options.values.flatten.uniq.each do |field|
240
- fields = {}
348
+ mapping_options = Hash[
349
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
350
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
351
+ ]
241
352
 
242
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
243
- fields[field] = {type: default_type, index: index_false_value}
244
- else
245
- fields[field] = keyword_mapping
246
- end
353
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
247
354
 
248
- if !options[:searchable] || mapping_options[:searchable].include?(field)
249
- if word
250
- fields["analyzed"] = analyzed_field_options
355
+ mapping_options[:searchable].delete("_all")
251
356
 
252
- if mapping_options[:highlight].include?(field)
253
- fields["analyzed"][:term_vector] = "with_positions_offsets"
254
- end
255
- end
357
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
256
358
 
257
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
258
- if options[:match] == type || f.include?(field)
259
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
260
- end
261
- end
262
- end
359
+ mapping_options.values.flatten.uniq.each do |field|
360
+ fields = {}
263
361
 
264
- mapping[field] = fields[field].merge(fields: fields.except(field))
362
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
363
+ fields[field] = {type: default_type, index: false}
364
+ else
365
+ fields[field] = keyword_mapping
265
366
  end
266
367
 
267
- (options[:locations] || []).map(&:to_s).each do |field|
268
- mapping[field] = {
269
- type: "geo_point"
270
- }
271
- end
368
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
369
+ if word
370
+ fields[:analyzed] = analyzed_field_options
272
371
 
273
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
274
- (options[:geo_shape] || {}).each do |field, shape_options|
275
- mapping[field] = shape_options.merge(type: "geo_shape")
276
- end
372
+ if mapping_options[:highlight].include?(field)
373
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
374
+ end
375
+ end
277
376
 
278
- routing = {}
279
- if options[:routing]
280
- routing = {required: true}
281
- unless options[:routing] == true
282
- routing[:path] = options[:routing].to_s
377
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
378
+ if options[:match] == type || f.include?(field)
379
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
380
+ end
283
381
  end
284
382
  end
285
383
 
286
- dynamic_fields = {
287
- # analyzed field must be the default field for include_in_all
288
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
289
- # however, we can include the not_analyzed field in _all
290
- # and the _all index analyzer will take care of it
291
- "{name}" => keyword_mapping
384
+ mapping[field] = fields[field].merge(fields: fields.except(field))
385
+ end
386
+
387
+ (options[:locations] || []).map(&:to_s).each do |field|
388
+ mapping[field] = {
389
+ type: "geo_point"
292
390
  }
391
+ end
293
392
 
294
- if below60 && all
295
- dynamic_fields["{name}"][:include_in_all] = !options[:searchable]
296
- end
393
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
394
+ (options[:geo_shape] || {}).each do |field, shape_options|
395
+ mapping[field] = shape_options.merge(type: "geo_shape")
396
+ end
397
+
398
+ if options[:inheritance]
399
+ mapping[:type] = keyword_mapping
400
+ end
297
401
 
298
- if options.key?(:filterable)
299
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
402
+ routing = {}
403
+ if options[:routing]
404
+ routing = {required: true}
405
+ unless options[:routing] == true
406
+ routing[:path] = options[:routing].to_s
300
407
  end
408
+ end
301
409
 
302
- unless options[:searchable]
303
- if options[:match] && options[:match] != :word
304
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
305
- end
410
+ dynamic_fields = {
411
+ # analyzed field must be the default field for include_in_all
412
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
413
+ # however, we can include the not_analyzed field in _all
414
+ # and the _all index analyzer will take care of it
415
+ "{name}" => keyword_mapping
416
+ }
306
417
 
307
- if word
308
- dynamic_fields["analyzed"] = analyzed_field_options
309
- end
418
+ if options.key?(:filterable)
419
+ dynamic_fields["{name}"] = {type: default_type, index: false}
420
+ end
421
+
422
+ unless options[:searchable]
423
+ if options[:match] && options[:match] != :word
424
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
310
425
  end
311
426
 
312
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
313
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
314
-
315
- mappings = {
316
- _default_: {
317
- properties: mapping,
318
- _routing: routing,
319
- # https://gist.github.com/kimchy/2898285
320
- dynamic_templates: [
321
- {
322
- string_template: {
323
- match: "*",
324
- match_mapping_type: "string",
325
- mapping: multi_field
326
- }
327
- }
328
- ]
427
+ if word
428
+ dynamic_fields[:analyzed] = analyzed_field_options
429
+ end
430
+ end
431
+
432
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
433
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
434
+
435
+ mappings = {
436
+ properties: mapping,
437
+ _routing: routing,
438
+ # https://gist.github.com/kimchy/2898285
439
+ dynamic_templates: [
440
+ {
441
+ string_template: {
442
+ match: "*",
443
+ match_mapping_type: "string",
444
+ mapping: multi_field
445
+ }
329
446
  }
447
+ ]
448
+ }
449
+
450
+ if below70?
451
+ mappings = {index_type => mappings}
452
+ end
453
+
454
+ mappings
455
+ end
456
+
457
+ def add_synonyms(settings)
458
+ synonyms = options[:synonyms] || []
459
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
460
+ if synonyms.any?
461
+ settings[:analysis][:filter][:searchkick_synonym] = {
462
+ type: "synonym",
463
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
464
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
330
465
  }
466
+ # choosing a place for the synonym filter when stemming is not easy
467
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
468
+ # TODO use a snowball stemmer on synonyms when creating the token filter
469
+
470
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
471
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
472
+ # - Only apply the synonym expansion at index time
473
+ # - Don't have the synonym filter applied search
474
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
475
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
476
+
477
+ %w(word_start word_middle word_end).each do |type|
478
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
479
+ end
480
+ end
481
+ end
331
482
 
332
- if below60
333
- all_enabled = all && (!options[:searchable] || options[:searchable].to_a.map(&:to_s).include?("_all"))
334
- mappings[:_default_][:_all] = all_enabled ? analyzed_field_options : {enabled: false}
483
+ def add_search_synonyms(settings)
484
+ search_synonyms = options[:search_synonyms] || []
485
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
486
+ if search_synonyms.is_a?(String) || search_synonyms.any?
487
+ if search_synonyms.is_a?(String)
488
+ synonym_graph = {
489
+ type: "synonym_graph",
490
+ synonyms_path: search_synonyms
491
+ }
492
+ synonym_graph[:updateable] = true unless below73?
493
+ else
494
+ synonym_graph = {
495
+ type: "synonym_graph",
496
+ # TODO confirm this is correct
497
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
498
+ }
335
499
  end
500
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
336
501
 
337
- mappings = mappings.deep_merge(options[:mappings] || {})
502
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
503
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
504
+ end
338
505
  end
506
+ end
339
507
 
340
- {
341
- settings: settings,
342
- mappings: mappings
508
+ def add_wordnet(settings)
509
+ settings[:analysis][:filter][:searchkick_wordnet] = {
510
+ type: "synonym",
511
+ format: "wordnet",
512
+ synonyms_path: Searchkick.wordnet_path
343
513
  }
514
+
515
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
516
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
517
+
518
+ %w(word_start word_middle word_end).each do |type|
519
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
520
+ end
521
+ end
522
+
523
+ def set_deep_paging(settings)
524
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
525
+ settings[:index] ||= {}
526
+ settings[:index][:max_result_window] = 1_000_000_000
527
+ end
528
+ end
529
+
530
+ def index_type
531
+ @index_type ||= begin
532
+ index_type = options[:_type]
533
+ index_type = index_type.call if index_type.respond_to?(:call)
534
+ index_type
535
+ end
536
+ end
537
+
538
+ def default_type
539
+ "text"
540
+ end
541
+
542
+ def default_analyzer
543
+ :searchkick_index
544
+ end
545
+
546
+ def below62?
547
+ Searchkick.server_below?("6.2.0")
548
+ end
549
+
550
+ def below70?
551
+ Searchkick.server_below?("7.0.0")
552
+ end
553
+
554
+ def below73?
555
+ Searchkick.server_below?("7.3.0")
344
556
  end
345
557
  end
346
558
  end