searchkick 2.3.2 → 5.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG.md +377 -84
  3. data/LICENSE.txt +1 -1
  4. data/README.md +859 -602
  5. data/lib/searchkick/bulk_reindex_job.rb +13 -9
  6. data/lib/searchkick/controller_runtime.rb +40 -0
  7. data/lib/searchkick/hash_wrapper.rb +12 -0
  8. data/lib/searchkick/index.rb +281 -356
  9. data/lib/searchkick/index_cache.rb +30 -0
  10. data/lib/searchkick/index_options.rb +487 -281
  11. data/lib/searchkick/indexer.rb +15 -8
  12. data/lib/searchkick/log_subscriber.rb +57 -0
  13. data/lib/searchkick/middleware.rb +9 -2
  14. data/lib/searchkick/model.rb +72 -118
  15. data/lib/searchkick/multi_search.rb +9 -10
  16. data/lib/searchkick/process_batch_job.rb +12 -15
  17. data/lib/searchkick/process_queue_job.rb +22 -13
  18. data/lib/searchkick/query.rb +458 -217
  19. data/lib/searchkick/railtie.rb +7 -0
  20. data/lib/searchkick/record_data.rb +128 -0
  21. data/lib/searchkick/record_indexer.rb +164 -0
  22. data/lib/searchkick/reindex_queue.rb +51 -9
  23. data/lib/searchkick/reindex_v2_job.rb +10 -32
  24. data/lib/searchkick/relation.rb +247 -0
  25. data/lib/searchkick/relation_indexer.rb +155 -0
  26. data/lib/searchkick/results.rb +201 -82
  27. data/lib/searchkick/version.rb +1 -1
  28. data/lib/searchkick/where.rb +11 -0
  29. data/lib/searchkick.rb +269 -97
  30. data/lib/tasks/searchkick.rake +37 -0
  31. metadata +24 -178
  32. data/.gitignore +0 -22
  33. data/.travis.yml +0 -39
  34. data/Gemfile +0 -16
  35. data/Rakefile +0 -20
  36. data/benchmark/Gemfile +0 -23
  37. data/benchmark/benchmark.rb +0 -97
  38. data/lib/searchkick/logging.rb +0 -242
  39. data/lib/searchkick/tasks.rb +0 -33
  40. data/searchkick.gemspec +0 -28
  41. data/test/aggs_test.rb +0 -197
  42. data/test/autocomplete_test.rb +0 -75
  43. data/test/boost_test.rb +0 -202
  44. data/test/callbacks_test.rb +0 -59
  45. data/test/ci/before_install.sh +0 -17
  46. data/test/errors_test.rb +0 -19
  47. data/test/gemfiles/activerecord31.gemfile +0 -7
  48. data/test/gemfiles/activerecord32.gemfile +0 -7
  49. data/test/gemfiles/activerecord40.gemfile +0 -8
  50. data/test/gemfiles/activerecord41.gemfile +0 -8
  51. data/test/gemfiles/activerecord42.gemfile +0 -7
  52. data/test/gemfiles/activerecord50.gemfile +0 -7
  53. data/test/gemfiles/apartment.gemfile +0 -8
  54. data/test/gemfiles/cequel.gemfile +0 -8
  55. data/test/gemfiles/mongoid2.gemfile +0 -7
  56. data/test/gemfiles/mongoid3.gemfile +0 -6
  57. data/test/gemfiles/mongoid4.gemfile +0 -7
  58. data/test/gemfiles/mongoid5.gemfile +0 -7
  59. data/test/gemfiles/mongoid6.gemfile +0 -12
  60. data/test/gemfiles/nobrainer.gemfile +0 -8
  61. data/test/gemfiles/parallel_tests.gemfile +0 -8
  62. data/test/geo_shape_test.rb +0 -175
  63. data/test/highlight_test.rb +0 -78
  64. data/test/index_test.rb +0 -166
  65. data/test/inheritance_test.rb +0 -83
  66. data/test/marshal_test.rb +0 -8
  67. data/test/match_test.rb +0 -276
  68. data/test/misspellings_test.rb +0 -56
  69. data/test/model_test.rb +0 -42
  70. data/test/multi_search_test.rb +0 -36
  71. data/test/multi_tenancy_test.rb +0 -22
  72. data/test/order_test.rb +0 -46
  73. data/test/pagination_test.rb +0 -70
  74. data/test/partial_reindex_test.rb +0 -58
  75. data/test/query_test.rb +0 -35
  76. data/test/records_test.rb +0 -10
  77. data/test/reindex_test.rb +0 -64
  78. data/test/reindex_v2_job_test.rb +0 -32
  79. data/test/routing_test.rb +0 -23
  80. data/test/should_index_test.rb +0 -32
  81. data/test/similar_test.rb +0 -28
  82. data/test/sql_test.rb +0 -214
  83. data/test/suggest_test.rb +0 -95
  84. data/test/support/kaminari.yml +0 -21
  85. data/test/synonyms_test.rb +0 -67
  86. data/test/test_helper.rb +0 -567
  87. data/test/where_test.rb +0 -223
@@ -1,346 +1,552 @@
1
1
  module Searchkick
2
- module IndexOptions
2
+ class IndexOptions
3
+ attr_reader :options
4
+
5
+ def initialize(index)
6
+ @options = index.options
7
+ end
8
+
3
9
  def index_options
4
- options = @options
5
- language = options[:language]
6
- language = language.call if language.respond_to?(:call)
10
+ # mortal symbols are garbage collected in Ruby 2.2+
11
+ custom_settings = (options[:settings] || {}).deep_symbolize_keys
12
+ custom_mappings = (options[:mappings] || {}).deep_symbolize_keys
7
13
 
8
14
  if options[:mappings] && !options[:merge_mappings]
9
- settings = options[:settings] || {}
10
- mappings = options[:mappings]
15
+ settings = custom_settings
16
+ mappings = custom_mappings
11
17
  else
12
- below22 = Searchkick.server_below?("2.2.0")
13
- below50 = Searchkick.server_below?("5.0.0-alpha1")
14
- below60 = Searchkick.server_below?("6.0.0-alpha1")
15
- default_type = below50 ? "string" : "text"
16
- default_analyzer = :searchkick_index
17
- keyword_mapping =
18
- if below50
19
- {
20
- type: default_type,
21
- index: "not_analyzed"
22
- }
23
- else
24
- {
25
- type: "keyword"
26
- }
27
- end
18
+ settings = generate_settings.deep_symbolize_keys.deep_merge(custom_settings)
19
+ mappings = generate_mappings.deep_symbolize_keys.deep_merge(custom_mappings)
20
+ end
21
+
22
+ set_deep_paging(settings) if options[:deep_paging] || options[:max_result_window]
23
+
24
+ {
25
+ settings: settings,
26
+ mappings: mappings
27
+ }
28
+ end
29
+
30
+ def generate_settings
31
+ language = options[:language]
32
+ language = language.call if language.respond_to?(:call)
28
33
 
29
- all = options.key?(:_all) ? options[:_all] : below60
30
- index_true_value = below50 ? "analyzed" : true
31
- index_false_value = below50 ? "no" : false
32
-
33
- keyword_mapping[:ignore_above] = (options[:ignore_above] || 30000) unless below22
34
-
35
- settings = {
36
- analysis: {
37
- analyzer: {
38
- searchkick_keyword: {
39
- type: "custom",
40
- tokenizer: "keyword",
41
- filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
42
- },
43
- default_analyzer => {
44
- type: "custom",
45
- # character filters -> tokenizer -> token filters
46
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
47
- char_filter: ["ampersand"],
48
- tokenizer: "standard",
49
- # synonym should come last, after stemming and shingle
50
- # shingle must come before searchkick_stemmer
51
- filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
52
- },
53
- searchkick_search: {
54
- type: "custom",
55
- char_filter: ["ampersand"],
56
- tokenizer: "standard",
57
- filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
58
- },
59
- searchkick_search2: {
60
- type: "custom",
61
- char_filter: ["ampersand"],
62
- tokenizer: "standard",
63
- filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
64
- },
65
- # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
66
- searchkick_autocomplete_search: {
67
- type: "custom",
68
- tokenizer: "keyword",
69
- filter: ["lowercase", "asciifolding"]
70
- },
71
- searchkick_word_search: {
72
- type: "custom",
73
- tokenizer: "standard",
74
- filter: ["lowercase", "asciifolding"]
75
- },
76
- searchkick_suggest_index: {
77
- type: "custom",
78
- tokenizer: "standard",
79
- filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
80
- },
81
- searchkick_text_start_index: {
82
- type: "custom",
83
- tokenizer: "keyword",
84
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
85
- },
86
- searchkick_text_middle_index: {
87
- type: "custom",
88
- tokenizer: "keyword",
89
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
90
- },
91
- searchkick_text_end_index: {
92
- type: "custom",
93
- tokenizer: "keyword",
94
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
95
- },
96
- searchkick_word_start_index: {
97
- type: "custom",
98
- tokenizer: "standard",
99
- filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
100
- },
101
- searchkick_word_middle_index: {
102
- type: "custom",
103
- tokenizer: "standard",
104
- filter: ["lowercase", "asciifolding", "searchkick_ngram"]
105
- },
106
- searchkick_word_end_index: {
107
- type: "custom",
108
- tokenizer: "standard",
109
- filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
110
- }
34
+ settings = {
35
+ analysis: {
36
+ analyzer: {
37
+ searchkick_keyword: {
38
+ type: "custom",
39
+ tokenizer: "keyword",
40
+ filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
41
+ },
42
+ default_analyzer => {
43
+ type: "custom",
44
+ # character filters -> tokenizer -> token filters
45
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
46
+ char_filter: ["ampersand"],
47
+ tokenizer: "standard",
48
+ # synonym should come last, after stemming and shingle
49
+ # shingle must come before searchkick_stemmer
50
+ filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
51
+ },
52
+ searchkick_search: {
53
+ type: "custom",
54
+ char_filter: ["ampersand"],
55
+ tokenizer: "standard",
56
+ filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
57
+ },
58
+ searchkick_search2: {
59
+ type: "custom",
60
+ char_filter: ["ampersand"],
61
+ tokenizer: "standard",
62
+ filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
63
+ },
64
+ # https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
65
+ searchkick_autocomplete_search: {
66
+ type: "custom",
67
+ tokenizer: "keyword",
68
+ filter: ["lowercase", "asciifolding"]
69
+ },
70
+ searchkick_word_search: {
71
+ type: "custom",
72
+ tokenizer: "standard",
73
+ filter: ["lowercase", "asciifolding"]
74
+ },
75
+ searchkick_suggest_index: {
76
+ type: "custom",
77
+ tokenizer: "standard",
78
+ filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
79
+ },
80
+ searchkick_text_start_index: {
81
+ type: "custom",
82
+ tokenizer: "keyword",
83
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
84
+ },
85
+ searchkick_text_middle_index: {
86
+ type: "custom",
87
+ tokenizer: "keyword",
88
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
89
+ },
90
+ searchkick_text_end_index: {
91
+ type: "custom",
92
+ tokenizer: "keyword",
93
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
94
+ },
95
+ searchkick_word_start_index: {
96
+ type: "custom",
97
+ tokenizer: "standard",
98
+ filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
99
+ },
100
+ searchkick_word_middle_index: {
101
+ type: "custom",
102
+ tokenizer: "standard",
103
+ filter: ["lowercase", "asciifolding", "searchkick_ngram"]
104
+ },
105
+ searchkick_word_end_index: {
106
+ type: "custom",
107
+ tokenizer: "standard",
108
+ filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
109
+ }
110
+ },
111
+ filter: {
112
+ searchkick_index_shingle: {
113
+ type: "shingle",
114
+ token_separator: ""
115
+ },
116
+ # lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
117
+ searchkick_search_shingle: {
118
+ type: "shingle",
119
+ token_separator: "",
120
+ output_unigrams: false,
121
+ output_unigrams_if_no_shingles: true
122
+ },
123
+ searchkick_suggest_shingle: {
124
+ type: "shingle",
125
+ max_shingle_size: 5
111
126
  },
112
- filter: {
113
- searchkick_index_shingle: {
114
- type: "shingle",
115
- token_separator: ""
116
- },
117
- # lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
118
- searchkick_search_shingle: {
119
- type: "shingle",
120
- token_separator: "",
121
- output_unigrams: false,
122
- output_unigrams_if_no_shingles: true
123
- },
124
- searchkick_suggest_shingle: {
125
- type: "shingle",
126
- max_shingle_size: 5
127
- },
128
- searchkick_edge_ngram: {
129
- type: "edgeNGram",
130
- min_gram: 1,
131
- max_gram: 50
132
- },
133
- searchkick_ngram: {
134
- type: "nGram",
135
- min_gram: 1,
136
- max_gram: 50
137
- },
138
- searchkick_stemmer: {
139
- # use stemmer if language is lowercase, snowball otherwise
140
- # TODO deprecate language option in favor of stemmer
141
- type: language == language.to_s.downcase ? "stemmer" : "snowball",
142
- language: language || "English"
143
- }
127
+ searchkick_edge_ngram: {
128
+ type: "edge_ngram",
129
+ min_gram: 1,
130
+ max_gram: 50
144
131
  },
145
- char_filter: {
146
- # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
147
- # &_to_and
148
- ampersand: {
149
- type: "mapping",
150
- mappings: ["&=> and "]
151
- }
132
+ searchkick_ngram: {
133
+ type: "ngram",
134
+ min_gram: 1,
135
+ max_gram: 50
136
+ },
137
+ searchkick_stemmer: {
138
+ # use stemmer if language is lowercase, snowball otherwise
139
+ type: language == language.to_s.downcase ? "stemmer" : "snowball",
140
+ language: language || "English"
141
+ }
142
+ },
143
+ char_filter: {
144
+ # https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
145
+ # &_to_and
146
+ ampersand: {
147
+ type: "mapping",
148
+ mappings: ["&=> and "]
152
149
  }
153
150
  }
154
151
  }
152
+ }
155
153
 
156
- if Searchkick.env == "test"
157
- settings[:number_of_shards] = 1
158
- settings[:number_of_replicas] = 0
159
- end
154
+ raise ArgumentError, "Can't pass both language and stemmer" if options[:stemmer] && language
155
+ update_language(settings, language)
156
+ update_stemming(settings)
160
157
 
161
- if options[:similarity]
162
- settings[:similarity] = {default: {type: options[:similarity]}}
158
+ if Searchkick.env == "test"
159
+ settings[:number_of_shards] = 1
160
+ settings[:number_of_replicas] = 0
161
+ end
162
+
163
+ if options[:similarity]
164
+ settings[:similarity] = {default: {type: options[:similarity]}}
165
+ end
166
+
167
+ settings[:index] = {
168
+ max_ngram_diff: 49,
169
+ max_shingle_diff: 4
170
+ }
171
+
172
+ if options[:case_sensitive]
173
+ settings[:analysis][:analyzer].each do |_, analyzer|
174
+ analyzer[:filter].delete("lowercase")
163
175
  end
176
+ end
164
177
 
165
- settings.deep_merge!(options[:settings] || {})
178
+ add_synonyms(settings)
179
+ add_search_synonyms(settings)
166
180
 
167
- # synonyms
168
- synonyms = options[:synonyms] || []
181
+ if options[:special_characters] == false
182
+ settings[:analysis][:analyzer].each_value do |analyzer_settings|
183
+ analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
184
+ end
185
+ end
169
186
 
170
- synonyms = synonyms.call if synonyms.respond_to?(:call)
187
+ settings
188
+ end
171
189
 
172
- if synonyms.any?
173
- settings[:analysis][:filter][:searchkick_synonym] = {
174
- type: "synonym",
175
- synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
190
+ def update_language(settings, language)
191
+ case language
192
+ when "chinese"
193
+ settings[:analysis][:analyzer].merge!(
194
+ default_analyzer => {
195
+ type: "ik_smart"
196
+ },
197
+ searchkick_search: {
198
+ type: "ik_smart"
199
+ },
200
+ searchkick_search2: {
201
+ type: "ik_max_word"
176
202
  }
177
- # choosing a place for the synonym filter when stemming is not easy
178
- # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
179
- # TODO use a snowball stemmer on synonyms when creating the token filter
180
-
181
- # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
182
- # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
183
- # - Only apply the synonym expansion at index time
184
- # - Don't have the synonym filter applied search
185
- # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
186
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym") if below60
187
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
188
-
189
- %w(word_start word_middle word_end).each do |type|
190
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
191
- end
203
+ )
204
+ when "chinese2", "smartcn"
205
+ settings[:analysis][:analyzer].merge!(
206
+ default_analyzer => {
207
+ type: "smartcn"
208
+ },
209
+ searchkick_search: {
210
+ type: "smartcn"
211
+ },
212
+ searchkick_search2: {
213
+ type: "smartcn"
214
+ }
215
+ )
216
+ when "japanese", "japanese2"
217
+ analyzer = {
218
+ type: "custom",
219
+ tokenizer: "kuromoji_tokenizer",
220
+ filter: [
221
+ "kuromoji_baseform",
222
+ "kuromoji_part_of_speech",
223
+ "cjk_width",
224
+ "ja_stop",
225
+ "searchkick_stemmer",
226
+ "lowercase"
227
+ ]
228
+ }
229
+ settings[:analysis][:analyzer].merge!(
230
+ default_analyzer => analyzer.deep_dup,
231
+ searchkick_search: analyzer.deep_dup,
232
+ searchkick_search2: analyzer.deep_dup
233
+ )
234
+ settings[:analysis][:filter][:searchkick_stemmer] = {
235
+ type: "kuromoji_stemmer"
236
+ }
237
+ when "korean"
238
+ settings[:analysis][:analyzer].merge!(
239
+ default_analyzer => {
240
+ type: "openkoreantext-analyzer"
241
+ },
242
+ searchkick_search: {
243
+ type: "openkoreantext-analyzer"
244
+ },
245
+ searchkick_search2: {
246
+ type: "openkoreantext-analyzer"
247
+ }
248
+ )
249
+ when "korean2"
250
+ settings[:analysis][:analyzer].merge!(
251
+ default_analyzer => {
252
+ type: "nori"
253
+ },
254
+ searchkick_search: {
255
+ type: "nori"
256
+ },
257
+ searchkick_search2: {
258
+ type: "nori"
259
+ }
260
+ )
261
+ when "vietnamese"
262
+ settings[:analysis][:analyzer].merge!(
263
+ default_analyzer => {
264
+ type: "vi_analyzer"
265
+ },
266
+ searchkick_search: {
267
+ type: "vi_analyzer"
268
+ },
269
+ searchkick_search2: {
270
+ type: "vi_analyzer"
271
+ }
272
+ )
273
+ when "polish", "ukrainian"
274
+ settings[:analysis][:analyzer].merge!(
275
+ default_analyzer => {
276
+ type: language
277
+ },
278
+ searchkick_search: {
279
+ type: language
280
+ },
281
+ searchkick_search2: {
282
+ type: language
283
+ }
284
+ )
285
+ end
286
+ end
287
+
288
+ def update_stemming(settings)
289
+ if options[:stemmer]
290
+ stemmer = options[:stemmer]
291
+ # could also support snowball and stemmer
292
+ case stemmer[:type]
293
+ when "hunspell"
294
+ # supports all token filter options
295
+ settings[:analysis][:filter][:searchkick_stemmer] = stemmer
296
+ else
297
+ raise ArgumentError, "Unknown stemmer: #{stemmer[:type]}"
192
298
  end
299
+ end
193
300
 
194
- if options[:wordnet]
195
- settings[:analysis][:filter][:searchkick_wordnet] = {
196
- type: "synonym",
197
- format: "wordnet",
198
- synonyms_path: Searchkick.wordnet_path
199
- }
301
+ stem = options[:stem]
200
302
 
201
- settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
202
- settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
303
+ # language analyzer used
304
+ stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
203
305
 
204
- %w(word_start word_middle word_end).each do |type|
205
- settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
206
- end
306
+ if stem == false
307
+ settings[:analysis][:filter].delete(:searchkick_stemmer)
308
+ settings[:analysis][:analyzer].each do |_, analyzer|
309
+ analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
207
310
  end
311
+ end
208
312
 
209
- if options[:special_characters] == false
210
- settings[:analysis][:analyzer].each do |_, analyzer_settings|
211
- analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
212
- end
313
+ if options[:stemmer_override]
314
+ stemmer_override = {
315
+ type: "stemmer_override"
316
+ }
317
+ if options[:stemmer_override].is_a?(String)
318
+ stemmer_override[:rules_path] = options[:stemmer_override]
319
+ else
320
+ stemmer_override[:rules] = options[:stemmer_override]
213
321
  end
322
+ settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
214
323
 
215
- mapping = {}
324
+ settings[:analysis][:analyzer].each do |_, analyzer|
325
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
326
+ analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
327
+ end
328
+ end
216
329
 
217
- # conversions
218
- Array(options[:conversions]).each do |conversions_field|
219
- mapping[conversions_field] = {
220
- type: "nested",
221
- properties: {
222
- query: {type: default_type, analyzer: "searchkick_keyword"},
223
- count: {type: "integer"}
224
- }
225
- }
330
+ if options[:stem_exclusion]
331
+ settings[:analysis][:filter][:searchkick_stem_exclusion] = {
332
+ type: "keyword_marker",
333
+ keywords: options[:stem_exclusion]
334
+ }
335
+
336
+ settings[:analysis][:analyzer].each do |_, analyzer|
337
+ stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
338
+ analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
226
339
  end
340
+ end
341
+ end
227
342
 
228
- mapping_options = Hash[
229
- [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
230
- .map { |type| [type, (options[type] || []).map(&:to_s)] }
231
- ]
343
+ def generate_mappings
344
+ mapping = {}
232
345
 
233
- word = options[:word] != false && (!options[:match] || options[:match] == :word)
346
+ keyword_mapping = {type: "keyword"}
347
+ keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
234
348
 
235
- mapping_options[:searchable].delete("_all")
349
+ # conversions
350
+ Array(options[:conversions]).each do |conversions_field|
351
+ mapping[conversions_field] = {
352
+ type: "nested",
353
+ properties: {
354
+ query: {type: default_type, analyzer: "searchkick_keyword"},
355
+ count: {type: "integer"}
356
+ }
357
+ }
358
+ end
236
359
 
237
- analyzed_field_options = {type: default_type, index: index_true_value, analyzer: default_analyzer}
360
+ mapping_options =
361
+ [:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
362
+ .to_h { |type| [type, (options[type] || []).map(&:to_s)] }
238
363
 
239
- mapping_options.values.flatten.uniq.each do |field|
240
- fields = {}
364
+ word = options[:word] != false && (!options[:match] || options[:match] == :word)
241
365
 
242
- if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
243
- fields[field] = {type: default_type, index: index_false_value}
244
- else
245
- fields[field] = keyword_mapping
246
- end
366
+ mapping_options[:searchable].delete("_all")
247
367
 
248
- if !options[:searchable] || mapping_options[:searchable].include?(field)
249
- if word
250
- fields["analyzed"] = analyzed_field_options
368
+ analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer.to_s}
251
369
 
252
- if mapping_options[:highlight].include?(field)
253
- fields["analyzed"][:term_vector] = "with_positions_offsets"
254
- end
255
- end
370
+ mapping_options.values.flatten.uniq.each do |field|
371
+ fields = {}
372
+
373
+ if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
374
+ fields[field] = {type: default_type, index: false}
375
+ else
376
+ fields[field] = keyword_mapping
377
+ end
256
378
 
257
- mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
258
- if options[:match] == type || f.include?(field)
259
- fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
260
- end
379
+ if !options[:searchable] || mapping_options[:searchable].include?(field)
380
+ if word
381
+ fields[:analyzed] = analyzed_field_options
382
+
383
+ if mapping_options[:highlight].include?(field)
384
+ fields[:analyzed][:term_vector] = "with_positions_offsets"
261
385
  end
262
386
  end
263
387
 
264
- mapping[field] = fields[field].merge(fields: fields.except(field))
388
+ mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
389
+ if options[:match] == type || f.include?(field)
390
+ fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
391
+ end
392
+ end
265
393
  end
266
394
 
267
- (options[:locations] || []).map(&:to_s).each do |field|
268
- mapping[field] = {
269
- type: "geo_point"
270
- }
395
+ mapping[field] = fields[field].merge(fields: fields.except(field))
396
+ end
397
+
398
+ (options[:locations] || []).map(&:to_s).each do |field|
399
+ mapping[field] = {
400
+ type: "geo_point"
401
+ }
402
+ end
403
+
404
+ options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
405
+ (options[:geo_shape] || {}).each do |field, shape_options|
406
+ mapping[field] = shape_options.merge(type: "geo_shape")
407
+ end
408
+
409
+ if options[:inheritance]
410
+ mapping[:type] = keyword_mapping
411
+ end
412
+
413
+ routing = {}
414
+ if options[:routing]
415
+ routing = {required: true}
416
+ unless options[:routing] == true
417
+ routing[:path] = options[:routing].to_s
271
418
  end
419
+ end
420
+
421
+ dynamic_fields = {
422
+ # analyzed field must be the default field for include_in_all
423
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
424
+ # however, we can include the not_analyzed field in _all
425
+ # and the _all index analyzer will take care of it
426
+ "{name}" => keyword_mapping
427
+ }
428
+
429
+ if options.key?(:filterable)
430
+ dynamic_fields["{name}"] = {type: default_type, index: false}
431
+ end
272
432
 
273
- options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
274
- (options[:geo_shape] || {}).each do |field, shape_options|
275
- mapping[field] = shape_options.merge(type: "geo_shape")
433
+ unless options[:searchable]
434
+ if options[:match] && options[:match] != :word
435
+ dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
276
436
  end
277
437
 
278
- routing = {}
279
- if options[:routing]
280
- routing = {required: true}
281
- unless options[:routing] == true
282
- routing[:path] = options[:routing].to_s
283
- end
438
+ if word
439
+ dynamic_fields[:analyzed] = analyzed_field_options
284
440
  end
441
+ end
285
442
 
286
- dynamic_fields = {
287
- # analyzed field must be the default field for include_in_all
288
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
289
- # however, we can include the not_analyzed field in _all
290
- # and the _all index analyzer will take care of it
291
- "{name}" => keyword_mapping
292
- }
443
+ # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
444
+ multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
445
+
446
+ mappings = {
447
+ properties: mapping,
448
+ _routing: routing,
449
+ # https://gist.github.com/kimchy/2898285
450
+ dynamic_templates: [
451
+ {
452
+ string_template: {
453
+ match: "*",
454
+ match_mapping_type: "string",
455
+ mapping: multi_field
456
+ }
457
+ }
458
+ ]
459
+ }
460
+
461
+ mappings
462
+ end
293
463
 
294
- if below60 && all
295
- dynamic_fields["{name}"][:include_in_all] = !options[:searchable]
464
+ def add_synonyms(settings)
465
+ synonyms = options[:synonyms] || []
466
+ synonyms = synonyms.call if synonyms.respond_to?(:call)
467
+ if synonyms.any?
468
+ settings[:analysis][:filter][:searchkick_synonym] = {
469
+ type: "synonym",
470
+ # only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
471
+ synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
472
+ }
473
+ # choosing a place for the synonym filter when stemming is not easy
474
+ # https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
475
+ # TODO use a snowball stemmer on synonyms when creating the token filter
476
+
477
+ # http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
478
+ # I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
479
+ # - Only apply the synonym expansion at index time
480
+ # - Don't have the synonym filter applied search
481
+ # - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
482
+ settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
483
+
484
+ %w(word_start word_middle word_end).each do |type|
485
+ settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
296
486
  end
487
+ end
488
+ end
297
489
 
298
- if options.key?(:filterable)
299
- dynamic_fields["{name}"] = {type: default_type, index: index_false_value}
490
+ def add_search_synonyms(settings)
491
+ search_synonyms = options[:search_synonyms] || []
492
+ search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
493
+ if search_synonyms.is_a?(String) || search_synonyms.any?
494
+ if search_synonyms.is_a?(String)
495
+ synonym_graph = {
496
+ type: "synonym_graph",
497
+ synonyms_path: search_synonyms
498
+ }
499
+ synonym_graph[:updateable] = true unless below73?
500
+ else
501
+ synonym_graph = {
502
+ type: "synonym_graph",
503
+ # TODO confirm this is correct
504
+ synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
505
+ }
300
506
  end
507
+ settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
301
508
 
302
- unless options[:searchable]
303
- if options[:match] && options[:match] != :word
304
- dynamic_fields[options[:match]] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{options[:match]}_index"}
509
+ if ["japanese", "japanese2"].include?(options[:language])
510
+ [:searchkick_search, :searchkick_search2].each do |analyzer|
511
+ settings[:analysis][:analyzer][analyzer][:filter].insert(4, "searchkick_synonym_graph")
305
512
  end
513
+ else
514
+ [:searchkick_search2, :searchkick_word_search].each do |analyzer|
515
+ unless settings[:analysis][:analyzer][analyzer].key?(:filter)
516
+ raise Error, "Search synonyms are not supported yet for language"
517
+ end
306
518
 
307
- if word
308
- dynamic_fields["analyzed"] = analyzed_field_options
519
+ settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
309
520
  end
310
521
  end
522
+ end
523
+ end
311
524
 
312
- # http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
313
- multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
314
-
315
- mappings = {
316
- _default_: {
317
- properties: mapping,
318
- _routing: routing,
319
- # https://gist.github.com/kimchy/2898285
320
- dynamic_templates: [
321
- {
322
- string_template: {
323
- match: "*",
324
- match_mapping_type: "string",
325
- mapping: multi_field
326
- }
327
- }
328
- ]
329
- }
330
- }
331
-
332
- if below60
333
- all_enabled = all && (!options[:searchable] || options[:searchable].to_a.map(&:to_s).include?("_all"))
334
- mappings[:_default_][:_all] = all_enabled ? analyzed_field_options : {enabled: false}
335
- end
525
+ def set_deep_paging(settings)
526
+ if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
527
+ settings[:index] ||= {}
528
+ settings[:index][:max_result_window] = options[:max_result_window] || 1_000_000_000
529
+ end
530
+ end
336
531
 
337
- mappings = mappings.deep_merge(options[:mappings] || {})
532
+ def index_type
533
+ @index_type ||= begin
534
+ index_type = options[:_type]
535
+ index_type = index_type.call if index_type.respond_to?(:call)
536
+ index_type
338
537
  end
538
+ end
339
539
 
340
- {
341
- settings: settings,
342
- mappings: mappings
343
- }
540
+ def default_type
541
+ "text"
542
+ end
543
+
544
+ def default_analyzer
545
+ :searchkick_index
546
+ end
547
+
548
+ def below73?
549
+ Searchkick.server_below?("7.3.0")
344
550
  end
345
551
  end
346
552
  end