searchkick 2.3.2 → 4.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +251 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +552 -432
  5. data/lib/searchkick/bulk_indexer.rb +173 -0
  6. data/lib/searchkick/bulk_reindex_job.rb +2 -2
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +187 -348
  9. data/lib/searchkick/index_options.rb +494 -282
  10. data/lib/searchkick/logging.rb +17 -13
  11. data/lib/searchkick/model.rb +52 -97
  12. data/lib/searchkick/multi_search.rb +9 -10
  13. data/lib/searchkick/process_batch_job.rb +17 -4
  14. data/lib/searchkick/process_queue_job.rb +20 -12
  15. data/lib/searchkick/query.rb +415 -199
  16. data/lib/searchkick/railtie.rb +7 -0
  17. data/lib/searchkick/record_data.rb +128 -0
  18. data/lib/searchkick/record_indexer.rb +79 -0
  19. data/lib/searchkick/reindex_queue.rb +1 -1
  20. data/lib/searchkick/reindex_v2_job.rb +14 -12
  21. data/lib/searchkick/results.rb +135 -41
  22. data/lib/searchkick/version.rb +1 -1
  23. data/lib/searchkick.rb +130 -61
  24. data/lib/tasks/searchkick.rake +34 -0
  25. metadata +18 -162
  26. data/.gitignore +0 -22
  27. data/.travis.yml +0 -39
  28. data/Gemfile +0 -16
  29. data/Rakefile +0 -20
  30. data/benchmark/Gemfile +0 -23
  31. data/benchmark/benchmark.rb +0 -97
  32. data/lib/searchkick/tasks.rb +0 -33
  33. data/searchkick.gemspec +0 -28
  34. data/test/aggs_test.rb +0 -197
  35. data/test/autocomplete_test.rb +0 -75
  36. data/test/boost_test.rb +0 -202
  37. data/test/callbacks_test.rb +0 -59
  38. data/test/ci/before_install.sh +0 -17
  39. data/test/errors_test.rb +0 -19
  40. data/test/gemfiles/activerecord31.gemfile +0 -7
  41. data/test/gemfiles/activerecord32.gemfile +0 -7
  42. data/test/gemfiles/activerecord40.gemfile +0 -8
  43. data/test/gemfiles/activerecord41.gemfile +0 -8
  44. data/test/gemfiles/activerecord42.gemfile +0 -7
  45. data/test/gemfiles/activerecord50.gemfile +0 -7
  46. data/test/gemfiles/apartment.gemfile +0 -8
  47. data/test/gemfiles/cequel.gemfile +0 -8
  48. data/test/gemfiles/mongoid2.gemfile +0 -7
  49. data/test/gemfiles/mongoid3.gemfile +0 -6
  50. data/test/gemfiles/mongoid4.gemfile +0 -7
  51. data/test/gemfiles/mongoid5.gemfile +0 -7
  52. data/test/gemfiles/mongoid6.gemfile +0 -12
  53. data/test/gemfiles/nobrainer.gemfile +0 -8
  54. data/test/gemfiles/parallel_tests.gemfile +0 -8
  55. data/test/geo_shape_test.rb +0 -175
  56. data/test/highlight_test.rb +0 -78
  57. data/test/index_test.rb +0 -166
  58. data/test/inheritance_test.rb +0 -83
  59. data/test/marshal_test.rb +0 -8
  60. data/test/match_test.rb +0 -276
  61. data/test/misspellings_test.rb +0 -56
  62. data/test/model_test.rb +0 -42
  63. data/test/multi_search_test.rb +0 -36
  64. data/test/multi_tenancy_test.rb +0 -22
  65. data/test/order_test.rb +0 -46
  66. data/test/pagination_test.rb +0 -70
  67. data/test/partial_reindex_test.rb +0 -58
  68. data/test/query_test.rb +0 -35
  69. data/test/records_test.rb +0 -10
  70. data/test/reindex_test.rb +0 -64
  71. data/test/reindex_v2_job_test.rb +0 -32
  72. data/test/routing_test.rb +0 -23
  73. data/test/should_index_test.rb +0 -32
  74. data/test/similar_test.rb +0 -28
  75. data/test/sql_test.rb +0 -214
  76. data/test/suggest_test.rb +0 -95
  77. data/test/support/kaminari.yml +0 -21
  78. data/test/synonyms_test.rb +0 -67
  79. data/test/test_helper.rb +0 -567
  80. data/test/where_test.rb +0 -223
@@ -1,346 +1,558 @@
1
1
  module Searchkick
2
- module IndexOptions
2
+ class IndexOptions
3
+ attr_reader :options
4
+
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
8
+
3
9
  def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
10
+ custom_mapping = options[:mappings] || {}
11
+ if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
12
+ # add type
13
+ custom_mapping = {index_type => custom_mapping}
14
+ end
7
15
 
8
16
  if options[:mappings] && !options[:merge_mappings]
9
17
  settings = options[:settings] || {}
10
- mappings = options[:mappings]
18
+ mappings = custom_mapping
11
19
  else
12
- below22 = Searchkick.server_below?("2.2.0")
13
- below50 = Searchkick.server_below?("5.0.0-alpha1")
14
- below60 = Searchkick.server_below?("6.0.0-alpha1")
15
- default_type = below50 ? "string" : "text"
16
- default_analyzer = :searchkick_index
17
- keyword_mapping =
18
- if below50
19
- {
20
- type: default_type,
21
- index: "not_analyzed"
22
- }
23
- else
24
- {
25
- type: "keyword"
26
- }
27
- end
20
+ settings = generate_settings
21
+ mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
22
+ end
28
23
 
29
- all = options.key?(:_all) ? options[:_all] : below60
30
- index_true_value = below50 ? "analyzed" : true
31
- index_false_value = below50 ? "no" : false
32
-
33
- keyword_mapping[:ignore_above] = (options[:ignore_above] || 30000) unless below22
34
-
35
- settings = {
36
- analysis: {
37
- analyzer: {
38
- searchkick_keyword: {
39
- type: "custom",
40
- tokenizer: "keyword",
41
- filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
42
- },
43
- default_analyzer => {
44
- type: "custom",
45
- # character filters -> tokenizer -> token filters
46
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
47
- char_filter: ["ampersand"],
48
- tokenizer: "standard",
49
- # synonym should come last, after stemming and shingle
50
- # shingle must come before searchkick_stemmer
51
- filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
52
- },
53
- searchkick_search: {
54
- type: "custom",
55
- char_filter: ["ampersand"],
56
- tokenizer: "standard",
57
- filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
58
- },
59
- searchkick_search2: {
60
- type: "custom",
61
- char_filter: ["ampersand"],
62
- tokenizer: "standard",
63
- filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
64
- },
65
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
66
- searchkick_autocomplete_search: {
67
- type: "custom",
68
- tokenizer: "keyword",
69
- filter: ["lowercase", "asciifolding"]
70
- },
71
- searchkick_word_search: {
72
- type: "custom",
73
- tokenizer: "standard",
74
- filter: ["lowercase", "asciifolding"]
75
- },
76
- searchkick_suggest_index: {
77
- type: "custom",
78
- tokenizer: "standard",
79
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
80
- },
81
- searchkick_text_start_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
85
- },
86
- searchkick_text_middle_index: {
87
- type: "custom",
88
- tokenizer: "keyword",
89
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
90
- },
91
- searchkick_text_end_index: {
92
- type: "custom",
93
- tokenizer: "keyword",
94
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
95
- },
96
- searchkick_word_start_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
100
- },
101
- searchkick_word_middle_index: {
102
- type: "custom",
103
- tokenizer: "standard",
104
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
105
- },
106
- searchkick_word_end_index: {
107
- type: "custom",
108
- tokenizer: "standard",
109
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
110
- }
24
+ set_deep_paging(settings) if options[:deep_paging]
25
+
26
+ {
27
+ settings: settings,
28
+ mappings: mappings
29
+ }
30
+ end
31
+
32
+ def generate_settings
33
+ language = options[:language]
34
+ language = language.call if language.respond_to?(:call)
35
+
36
+ settings = {
37
+ analysis: {
38
+ analyzer: {
39
+ searchkick_keyword: {
40
+ type: "custom",
41
+ tokenizer: "keyword",
42
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
43
+ },
44
+ default_analyzer => {
45
+ type: "custom",
46
+ # character filters -> tokenizer -> token filters
47
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
48
+ char_filter: ["ampersand"],
49
+ tokenizer: "standard",
50
+ # synonym should come last, after stemming and shingle
51
+ # shingle must come before searchkick_stemmer
52
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
53
+ },
54
+ searchkick_search: {
55
+ type: "custom",
56
+ char_filter: ["ampersand"],
57
+ tokenizer: "standard",
58
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
59
+ },
60
+ searchkick_search2: {
61
+ type: "custom",
62
+ char_filter: ["ampersand"],
63
+ tokenizer: "standard",
64
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
65
+ },
66
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
67
+ searchkick_autocomplete_search: {
68
+ type: "custom",
69
+ tokenizer: "keyword",
70
+ filter: ["lowercase", "asciifolding"]
71
+ },
72
+ searchkick_word_search: {
73
+ type: "custom",
74
+ tokenizer: "standard",
75
+ filter: ["lowercase", "asciifolding"]
76
+ },
77
+ searchkick_suggest_index: {
78
+ type: "custom",
79
+ tokenizer: "standard",
80
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
81
+ },
82
+ searchkick_text_start_index: {
83
+ type: "custom",
84
+ tokenizer: "keyword",
85
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
86
+ },
87
+ searchkick_text_middle_index: {
88
+ type: "custom",
89
+ tokenizer: "keyword",
90
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
111
91
  },
112
- filter: {
113
- searchkick_index_shingle: {
114
- type: "shingle",
115
- token_separator: ""
116
- },
117
- # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
118
- searchkick_search_shingle: {
119
- type: "shingle",
120
- token_separator: "",
121
- output_unigrams: false,
122
- output_unigrams_if_no_shingles: true
123
- },
124
- searchkick_suggest_shingle: {
125
- type: "shingle",
126
- max_shingle_size: 5
127
- },
128
- searchkick_edge_ngram: {
129
- type: "edgeNGram",
130
- min_gram: 1,
131
- max_gram: 50
132
- },
133
- searchkick_ngram: {
134
- type: "nGram",
135
- min_gram: 1,
136
- max_gram: 50
137
- },
138
- searchkick_stemmer: {
139
- # use stemmer if language is lowercase, snowball otherwise
140
- # TODO deprecate language option in favor of stemmer
141
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
- language: language || "English"
143
- }
92
+ searchkick_text_end_index: {
93
+ type: "custom",
94
+ tokenizer: "keyword",
95
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
144
96
  },
145
- char_filter: {
146
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
- # &_to_and
148
- ampersand: {
149
- type: "mapping",
150
- mappings: ["&=> and "]
151
- }
97
+ searchkick_word_start_index: {
98
+ type: "custom",
99
+ tokenizer: "standard",
100
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
101
+ },
102
+ searchkick_word_middle_index: {
103
+ type: "custom",
104
+ tokenizer: "standard",
105
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
106
+ },
107
+ searchkick_word_end_index: {
108
+ type: "custom",
109
+ tokenizer: "standard",
110
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
111
+ }
112
+ },
113
+ filter: {
114
+ searchkick_index_shingle: {
115
+ type: "shingle",
116
+ token_separator: ""
117
+ },
118
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
119
+ searchkick_search_shingle: {
120
+ type: "shingle",
121
+ token_separator: "",
122
+ output_unigrams: false,
123
+ output_unigrams_if_no_shingles: true
124
+ },
125
+ searchkick_suggest_shingle: {
126
+ type: "shingle",
127
+ max_shingle_size: 5
128
+ },
129
+ searchkick_edge_ngram: {
130
+ type: "edge_ngram",
131
+ min_gram: 1,
132
+ max_gram: 50
133
+ },
134
+ searchkick_ngram: {
135
+ type: "ngram",
136
+ min_gram: 1,
137
+ max_gram: 50
138
+ },
139
+ searchkick_stemmer: {
140
+ # use stemmer if language is lowercase, snowball otherwise
141
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
+ language: language || "English"
143
+ }
144
+ },
145
+ char_filter: {
146
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
+ # &_to_and
148
+ ampersand: {
149
+ type: "mapping",
150
+ mappings: ["&=> and "]
152
151
  }
153
152
  }
154
153
  }
154
+ }
155
155
 
156
- if Searchkick.env == "test"
157
- settings[:number_of_shards] = 1
158
- settings[:number_of_replicas] = 0
159
- end
156
+ update_language(settings, language)
157
+ update_stemming(settings)
160
158
 
161
- if options[:similarity]
162
- settings[:similarity] = {default: {type: options[:similarity]}}
163
- end
159
+ if Searchkick.env == "test"
160
+ settings[:number_of_shards] = 1
161
+ settings[:number_of_replicas] = 0
162
+ end
163
+
164
+ # TODO remove in Searchkick 5 (classic no longer supported)
165
+ if options[:similarity]
166
+ settings[:similarity] = {default: {type: options[:similarity]}}
167
+ end
168
+
169
+ unless below62?
170
+ settings[:index] = {
171
+ max_ngram_diff: 49,
172
+ max_shingle_diff: 4
173
+ }
174
+ end
164
175
 
165
- settings.deep_merge!(options[:settings] || {})
176
+ if options[:case_sensitive]
177
+ settings[:analysis][:analyzer].each do |_, analyzer|
178
+ analyzer[:filter].delete("lowercase")
179
+ end
180
+ end
166
181
 
167
- # synonyms
168
- synonyms = options[:synonyms] || []
182
+ # TODO do this last in Searchkick 5
183
+ settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
169
184
 
170
- synonyms = synonyms.call if synonyms.respond_to?(:call)
185
+ add_synonyms(settings)
186
+ add_search_synonyms(settings)
187
+ # TODO remove in Searchkick 5
188
+ add_wordnet(settings) if options[:wordnet]
171
189
 
172
- if synonyms.any?
173
- settings[:analysis][:filter][:searchkick_synonym] = {
174
- type: "synonym",
175
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
176
- }
177
- # choosing a place for the synonym filter when stemming is not easy
178
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
179
- # TODO use a snowball stemmer on synonyms when creating the token filter
180
-
181
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
182
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
183
- # - Only apply the synonym expansion at index time
184
- # - Don't have the synonym filter applied search
185
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
186
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") if below60
187
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
188
-
189
- %w(word_start word_middle word_end).each do |type|
190
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
191
- end
190
+ if options[:special_characters] == false
191
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
192
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
192
193
  end
194
+ end
193
195
 
194
- if options[:wordnet]
195
- settings[:analysis][:filter][:searchkick_wordnet] = {
196
- type: "synonym",
197
- format: "wordnet",
198
- synonyms_path: Searchkick.wordnet_path
196
+ settings
197
+ end
198
+
199
+ def update_language(settings, language)
200
+ case language
201
+ when "chinese"
202
+ settings[:analysis][:analyzer].merge!(
203
+ default_analyzer => {
204
+ type: "ik_smart"
205
+ },
206
+ searchkick_search: {
207
+ type: "ik_smart"
208
+ },
209
+ searchkick_search2: {
210
+ type: "ik_max_word"
211
+ }
212
+ )
213
+ when "chinese2", "smartcn"
214
+ settings[:analysis][:analyzer].merge!(
215
+ default_analyzer => {
216
+ type: "smartcn"
217
+ },
218
+ searchkick_search: {
219
+ type: "smartcn"
220
+ },
221
+ searchkick_search2: {
222
+ type: "smartcn"
223
+ }
224
+ )
225
+ when "japanese"
226
+ settings[:analysis][:analyzer].merge!(
227
+ default_analyzer => {
228
+ type: "kuromoji"
229
+ },
230
+ searchkick_search: {
231
+ type: "kuromoji"
232
+ },
233
+ searchkick_search2: {
234
+ type: "kuromoji"
199
235
  }
236
+ )
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
200
287
 
201
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
202
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
288
+ def update_stemming(settings)
289
+ stem = options[:stem]
203
290
 
204
- %w(word_start word_middle word_end).each do |type|
205
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
206
- end
207
- end
291
+ # language analyzer used
292
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
208
293
 
209
- if options[:special_characters] == false
210
- settings[:analysis][:analyzer].each do |_, analyzer_settings|
211
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
212
- end
294
+ if stem == false
295
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
296
+ settings[:analysis][:analyzer].each do |_, analyzer|
297
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
213
298
  end
299
+ end
214
300
 
215
- mapping = {}
301
+ if options[:stemmer_override]
302
+ stemmer_override = {
303
+ type: "stemmer_override"
304
+ }
305
+ if options[:stemmer_override].is_a?(String)
306
+ stemmer_override[:rules_path] = options[:stemmer_override]
307
+ else
308
+ stemmer_override[:rules] = options[:stemmer_override]
309
+ end
310
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
216
311
 
217
- # conversions
218
- Array(options[:conversions]).each do |conversions_field|
219
- mapping[conversions_field] = {
220
- type: "nested",
221
- properties: {
222
- query: {type: default_type, analyzer: "searchkick_keyword"},
223
- count: {type: "integer"}
224
- }
225
- }
312
+ settings[:analysis][:analyzer].each do |_, analyzer|
313
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
314
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
226
315
  end
316
+ end
227
317
 
228
- mapping_options = Hash[
229
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
230
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
231
- ]
318
+ if options[:stem_exclusion]
319
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
320
+ type: "keyword_marker",
321
+ keywords: options[:stem_exclusion]
322
+ }
323
+
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
327
+ end
328
+ end
329
+ end
232
330
 
233
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
331
+ def generate_mappings
332
+ mapping = {}
234
333
 
235
- mapping_options[:searchable].delete("_all")
334
+ keyword_mapping = {type: "keyword"}
335
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
236
336
 
237
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
337
+ # conversions
338
+ Array(options[:conversions]).each do |conversions_field|
339
+ mapping[conversions_field] = {
340
+ type: "nested",
341
+ properties: {
342
+ query: {type: default_type, analyzer: "searchkick_keyword"},
343
+ count: {type: "integer"}
344
+ }
345
+ }
346
+ end
238
347
 
239
- mapping_options.values.flatten.uniq.each do |field|
240
- fields = {}
348
+ mapping_options = Hash[
349
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
350
+ .map { |type| [type, (options[type] || []).map(&:to_s)] }
351
+ ]
241
352
 
242
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
243
- fields[field] = {type: default_type, index: index_false_value}
244
- else
245
- fields[field] = keyword_mapping
246
- end
353
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
247
354
 
248
- if !options[:searchable] || mapping_options[:searchable].include?(field)
249
- if word
250
- fields["analyzed"] = analyzed_field_options
355
+ mapping_options[:searchable].delete("_all")
251
356
 
252
- if mapping_options[:highlight].include?(field)
253
- fields["analyzed"][:term_vector] = "with_positions_offsets"
254
- end
255
- end
357
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
256
358
 
257
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
258
- if options[:match] == type || f.include?(field)
259
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
260
- end
261
- end
262
- end
359
+ mapping_options.values.flatten.uniq.each do |field|
360
+ fields = {}
263
361
 
264
- mapping[field] = fields[field].merge(fields: fields.except(field))
362
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
363
+ fields[field] = {type: default_type, index: false}
364
+ else
365
+ fields[field] = keyword_mapping
265
366
  end
266
367
 
267
- (options[:locations] || []).map(&:to_s).each do |field|
268
- mapping[field] = {
269
- type: "geo_point"
270
- }
271
- end
368
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
369
+ if word
370
+ fields[:analyzed] = analyzed_field_options
272
371
 
273
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
274
- (options[:geo_shape] || {}).each do |field, shape_options|
275
- mapping[field] = shape_options.merge(type: "geo_shape")
276
- end
372
+ if mapping_options[:highlight].include?(field)
373
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
374
+ end
375
+ end
277
376
 
278
- routing = {}
279
- if options[:routing]
280
- routing = {required: true}
281
- unless options[:routing] == true
282
- routing[:path] = options[:routing].to_s
377
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
378
+ if options[:match] == type || f.include?(field)
379
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
380
+ end
283
381
  end
284
382
  end
285
383
 
286
- dynamic_fields = {
287
- # analyzed field must be the default field for include_in_all
288
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
289
- # however, we can include the not_analyzed field in _all
290
- # and the _all index analyzer will take care of it
291
- "{name}" => keyword_mapping
384
+ mapping[field] = fields[field].merge(fields: fields.except(field))
385
+ end
386
+
387
+ (options[:locations] || []).map(&:to_s).each do |field|
388
+ mapping[field] = {
389
+ type: "geo_point"
292
390
  }
391
+ end
293
392
 
294
- if below60 && all
295
- dynamic_fields["{name}"][:include_in_all] = !options[:searchable]
296
- end
393
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
394
+ (options[:geo_shape] || {}).each do |field, shape_options|
395
+ mapping[field] = shape_options.merge(type: "geo_shape")
396
+ end
397
+
398
+ if options[:inheritance]
399
+ mapping[:type] = keyword_mapping
400
+ end
297
401
 
298
- if options.key?(:filterable)
299
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
402
+ routing = {}
403
+ if options[:routing]
404
+ routing = {required: true}
405
+ unless options[:routing] == true
406
+ routing[:path] = options[:routing].to_s
300
407
  end
408
+ end
301
409
 
302
- unless options[:searchable]
303
- if options[:match] && options[:match] != :word
304
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
305
- end
410
+ dynamic_fields = {
411
+ # analyzed field must be the default field for include_in_all
412
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
413
+ # however, we can include the not_analyzed field in _all
414
+ # and the _all index analyzer will take care of it
415
+ "{name}" => keyword_mapping
416
+ }
306
417
 
307
- if word
308
- dynamic_fields["analyzed"] = analyzed_field_options
309
- end
418
+ if options.key?(:filterable)
419
+ dynamic_fields["{name}"] = {type: default_type, index: false}
420
+ end
421
+
422
+ unless options[:searchable]
423
+ if options[:match] && options[:match] != :word
424
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
310
425
  end
311
426
 
312
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
313
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
314
-
315
- mappings = {
316
- _default_: {
317
- properties: mapping,
318
- _routing: routing,
319
- # https://gist.github.com/kimchy/2898285
320
- dynamic_templates: [
321
- {
322
- string_template: {
323
- match: "*",
324
- match_mapping_type: "string",
325
- mapping: multi_field
326
- }
327
- }
328
- ]
427
+ if word
428
+ dynamic_fields[:analyzed] = analyzed_field_options
429
+ end
430
+ end
431
+
432
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
433
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
434
+
435
+ mappings = {
436
+ properties: mapping,
437
+ _routing: routing,
438
+ # https://gist.github.com/kimchy/2898285
439
+ dynamic_templates: [
440
+ {
441
+ string_template: {
442
+ match: "*",
443
+ match_mapping_type: "string",
444
+ mapping: multi_field
445
+ }
329
446
  }
447
+ ]
448
+ }
449
+
450
+ if below70?
451
+ mappings = {index_type => mappings}
452
+ end
453
+
454
+ mappings
455
+ end
456
+
457
+ def add_synonyms(settings)
458
+ synonyms = options[:synonyms] || []
459
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
460
+ if synonyms.any?
461
+ settings[:analysis][:filter][:searchkick_synonym] = {
462
+ type: "synonym",
463
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
464
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
330
465
  }
466
+ # choosing a place for the synonym filter when stemming is not easy
467
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
468
+ # TODO use a snowball stemmer on synonyms when creating the token filter
469
+
470
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
471
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
472
+ # - Only apply the synonym expansion at index time
473
+ # - Don't have the synonym filter applied search
474
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
475
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
476
+
477
+ %w(word_start word_middle word_end).each do |type|
478
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
479
+ end
480
+ end
481
+ end
331
482
 
332
- if below60
333
- all_enabled = all && (!options[:searchable] || options[:searchable].to_a.map(&:to_s).include?("_all"))
334
- mappings[:_default_][:_all] = all_enabled ? analyzed_field_options : {enabled: false}
483
+ def add_search_synonyms(settings)
484
+ search_synonyms = options[:search_synonyms] || []
485
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
486
+ if search_synonyms.is_a?(String) || search_synonyms.any?
487
+ if search_synonyms.is_a?(String)
488
+ synonym_graph = {
489
+ type: "synonym_graph",
490
+ synonyms_path: search_synonyms
491
+ }
492
+ synonym_graph[:updateable] = true unless below73?
493
+ else
494
+ synonym_graph = {
495
+ type: "synonym_graph",
496
+ # TODO confirm this is correct
497
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
498
+ }
335
499
  end
500
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
336
501
 
337
- mappings = mappings.deep_merge(options[:mappings] || {})
502
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
503
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
504
+ end
338
505
  end
506
+ end
339
507
 
340
- {
341
- settings: settings,
342
- mappings: mappings
508
+ def add_wordnet(settings)
509
+ settings[:analysis][:filter][:searchkick_wordnet] = {
510
+ type: "synonym",
511
+ format: "wordnet",
512
+ synonyms_path: Searchkick.wordnet_path
343
513
  }
514
+
515
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
516
+ settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
517
+
518
+ %w(word_start word_middle word_end).each do |type|
519
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
520
+ end
521
+ end
522
+
523
+ def set_deep_paging(settings)
524
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
525
+ settings[:index] ||= {}
526
+ settings[:index][:max_result_window] = 1_000_000_000
527
+ end
528
+ end
529
+
530
+ def index_type
531
+ @index_type ||= begin
532
+ index_type = options[:_type]
533
+ index_type = index_type.call if index_type.respond_to?(:call)
534
+ index_type
535
+ end
536
+ end
537
+
538
+ def default_type
539
+ "text"
540
+ end
541
+
542
+ def default_analyzer
543
+ :searchkick_index
544
+ end
545
+
546
+ def below62?
547
+ Searchkick.server_below?("6.2.0")
548
+ end
549
+
550
+ def below70?
551
+ Searchkick.server_below?("7.0.0")
552
+ end
553
+
554
+ def below73?
555
+ Searchkick.server_below?("7.3.0")
344
556
  end
345
557
  end
346
558
  end