searchkick 4.4.0 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -3
- data/README.md +211 -210
- data/lib/searchkick.rb +13 -10
- data/lib/searchkick/index.rb +9 -3
- data/lib/searchkick/index_options.rb +467 -400
- data/lib/searchkick/model.rb +1 -1
- data/lib/searchkick/results.rb +10 -0
- data/lib/searchkick/version.rb +1 -1
- data/lib/tasks/searchkick.rake +12 -11
- metadata +2 -59
- data/CONTRIBUTING.md +0 -53
data/lib/searchkick.rb
CHANGED
@@ -164,6 +164,7 @@ module Searchkick
|
|
164
164
|
|
165
165
|
def self.aws_credentials=(creds)
|
166
166
|
begin
|
167
|
+
# TODO remove in Searchkick 5 (just use aws_sigv4)
|
167
168
|
require "faraday_middleware/aws_signers_v4"
|
168
169
|
rescue LoadError
|
169
170
|
require "faraday_middleware/aws_sigv4"
|
@@ -173,17 +174,16 @@ module Searchkick
|
|
173
174
|
end
|
174
175
|
|
175
176
|
def self.reindex_status(index_name)
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
raise Searchkick::Error, "Redis not configured"
|
184
|
-
end
|
177
|
+
raise Searchkick::Error, "Redis not configured" unless redis
|
178
|
+
|
179
|
+
batches_left = Searchkick::Index.new(index_name).batches_left
|
180
|
+
{
|
181
|
+
completed: batches_left == 0,
|
182
|
+
batches_left: batches_left
|
183
|
+
}
|
185
184
|
end
|
186
185
|
|
186
|
+
# TODO use ConnectionPool::Wrapper when redis is set so this is no longer needed
|
187
187
|
def self.with_redis
|
188
188
|
if redis
|
189
189
|
if redis.respond_to?(:with)
|
@@ -267,9 +267,12 @@ module Searchkick
|
|
267
267
|
end
|
268
268
|
end
|
269
269
|
|
270
|
-
# TODO find better ActiveModel hook
|
271
270
|
require "active_model/callbacks"
|
272
271
|
ActiveModel::Callbacks.include(Searchkick::Model)
|
272
|
+
# TODO use
|
273
|
+
# ActiveSupport.on_load(:mongoid) do
|
274
|
+
# Mongoid::Document::ClassMethods.include Searchkick::Model
|
275
|
+
# end
|
273
276
|
|
274
277
|
ActiveSupport.on_load(:active_record) do
|
275
278
|
extend Searchkick::Model
|
data/lib/searchkick/index.rb
CHANGED
@@ -2,8 +2,6 @@ require "searchkick/index_options"
|
|
2
2
|
|
3
3
|
module Searchkick
|
4
4
|
class Index
|
5
|
-
include IndexOptions
|
6
|
-
|
7
5
|
attr_reader :name, :options
|
8
6
|
|
9
7
|
def initialize(name, options = {})
|
@@ -12,6 +10,10 @@ module Searchkick
|
|
12
10
|
@klass_document_type = {} # cache
|
13
11
|
end
|
14
12
|
|
13
|
+
def index_options
|
14
|
+
IndexOptions.new(self).index_options
|
15
|
+
end
|
16
|
+
|
15
17
|
def create(body = {})
|
16
18
|
client.indices.create index: name, body: body
|
17
19
|
end
|
@@ -178,7 +180,11 @@ module Searchkick
|
|
178
180
|
require "elasticsearch/xpack"
|
179
181
|
raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
|
180
182
|
raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
|
181
|
-
|
183
|
+
begin
|
184
|
+
client.xpack.indices.reload_search_analyzers(index: name)
|
185
|
+
rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
|
186
|
+
raise Error, "Requires non-OSS version of Elasticsearch"
|
187
|
+
end
|
182
188
|
end
|
183
189
|
|
184
190
|
# queue
|
@@ -1,21 +1,14 @@
|
|
1
1
|
module Searchkick
|
2
|
-
|
3
|
-
|
4
|
-
options = @options
|
5
|
-
language = options[:language]
|
6
|
-
language = language.call if language.respond_to?(:call)
|
2
|
+
class IndexOptions
|
3
|
+
attr_reader :options
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
if below70
|
13
|
-
index_type = options[:_type]
|
14
|
-
index_type = index_type.call if index_type.respond_to?(:call)
|
15
|
-
end
|
5
|
+
def initialize(index)
|
6
|
+
@options = index.options
|
7
|
+
end
|
16
8
|
|
9
|
+
def index_options
|
17
10
|
custom_mapping = options[:mappings] || {}
|
18
|
-
if below70 && custom_mapping.keys.map(&:to_sym).include?(:properties)
|
11
|
+
if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
|
19
12
|
# add type
|
20
13
|
custom_mapping = {index_type => custom_mapping}
|
21
14
|
end
|
@@ -24,468 +17,542 @@ module Searchkick
|
|
24
17
|
settings = options[:settings] || {}
|
25
18
|
mappings = custom_mapping
|
26
19
|
else
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
32
|
-
|
33
|
-
settings = {
|
34
|
-
analysis: {
|
35
|
-
analyzer: {
|
36
|
-
searchkick_keyword: {
|
37
|
-
type: "custom",
|
38
|
-
tokenizer: "keyword",
|
39
|
-
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
40
|
-
},
|
41
|
-
default_analyzer => {
|
42
|
-
type: "custom",
|
43
|
-
# character filters -> tokenizer -> token filters
|
44
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
45
|
-
char_filter: ["ampersand"],
|
46
|
-
tokenizer: "standard",
|
47
|
-
# synonym should come last, after stemming and shingle
|
48
|
-
# shingle must come before searchkick_stemmer
|
49
|
-
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
50
|
-
},
|
51
|
-
searchkick_search: {
|
52
|
-
type: "custom",
|
53
|
-
char_filter: ["ampersand"],
|
54
|
-
tokenizer: "standard",
|
55
|
-
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
56
|
-
},
|
57
|
-
searchkick_search2: {
|
58
|
-
type: "custom",
|
59
|
-
char_filter: ["ampersand"],
|
60
|
-
tokenizer: "standard",
|
61
|
-
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
62
|
-
},
|
63
|
-
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
64
|
-
searchkick_autocomplete_search: {
|
65
|
-
type: "custom",
|
66
|
-
tokenizer: "keyword",
|
67
|
-
filter: ["lowercase", "asciifolding"]
|
68
|
-
},
|
69
|
-
searchkick_word_search: {
|
70
|
-
type: "custom",
|
71
|
-
tokenizer: "standard",
|
72
|
-
filter: ["lowercase", "asciifolding"]
|
73
|
-
},
|
74
|
-
searchkick_suggest_index: {
|
75
|
-
type: "custom",
|
76
|
-
tokenizer: "standard",
|
77
|
-
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
78
|
-
},
|
79
|
-
searchkick_text_start_index: {
|
80
|
-
type: "custom",
|
81
|
-
tokenizer: "keyword",
|
82
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
83
|
-
},
|
84
|
-
searchkick_text_middle_index: {
|
85
|
-
type: "custom",
|
86
|
-
tokenizer: "keyword",
|
87
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
88
|
-
},
|
89
|
-
searchkick_text_end_index: {
|
90
|
-
type: "custom",
|
91
|
-
tokenizer: "keyword",
|
92
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
93
|
-
},
|
94
|
-
searchkick_word_start_index: {
|
95
|
-
type: "custom",
|
96
|
-
tokenizer: "standard",
|
97
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
98
|
-
},
|
99
|
-
searchkick_word_middle_index: {
|
100
|
-
type: "custom",
|
101
|
-
tokenizer: "standard",
|
102
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
103
|
-
},
|
104
|
-
searchkick_word_end_index: {
|
105
|
-
type: "custom",
|
106
|
-
tokenizer: "standard",
|
107
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
108
|
-
}
|
109
|
-
},
|
110
|
-
filter: {
|
111
|
-
searchkick_index_shingle: {
|
112
|
-
type: "shingle",
|
113
|
-
token_separator: ""
|
114
|
-
},
|
115
|
-
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
116
|
-
searchkick_search_shingle: {
|
117
|
-
type: "shingle",
|
118
|
-
token_separator: "",
|
119
|
-
output_unigrams: false,
|
120
|
-
output_unigrams_if_no_shingles: true
|
121
|
-
},
|
122
|
-
searchkick_suggest_shingle: {
|
123
|
-
type: "shingle",
|
124
|
-
max_shingle_size: 5
|
125
|
-
},
|
126
|
-
searchkick_edge_ngram: {
|
127
|
-
type: "edge_ngram",
|
128
|
-
min_gram: 1,
|
129
|
-
max_gram: 50
|
130
|
-
},
|
131
|
-
searchkick_ngram: {
|
132
|
-
type: "ngram",
|
133
|
-
min_gram: 1,
|
134
|
-
max_gram: 50
|
135
|
-
},
|
136
|
-
searchkick_stemmer: {
|
137
|
-
# use stemmer if language is lowercase, snowball otherwise
|
138
|
-
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
139
|
-
language: language || "English"
|
140
|
-
}
|
141
|
-
},
|
142
|
-
char_filter: {
|
143
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
144
|
-
# &_to_and
|
145
|
-
ampersand: {
|
146
|
-
type: "mapping",
|
147
|
-
mappings: ["&=> and "]
|
148
|
-
}
|
149
|
-
}
|
150
|
-
}
|
151
|
-
}
|
20
|
+
settings = generate_settings
|
21
|
+
mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
|
22
|
+
end
|
152
23
|
|
153
|
-
|
24
|
+
set_deep_paging(settings) if options[:deep_paging]
|
154
25
|
|
155
|
-
|
156
|
-
|
157
|
-
|
26
|
+
{
|
27
|
+
settings: settings,
|
28
|
+
mappings: mappings
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
def generate_settings
|
33
|
+
language = options[:language]
|
34
|
+
language = language.call if language.respond_to?(:call)
|
35
|
+
|
36
|
+
settings = {
|
37
|
+
analysis: {
|
38
|
+
analyzer: {
|
39
|
+
searchkick_keyword: {
|
40
|
+
type: "custom",
|
41
|
+
tokenizer: "keyword",
|
42
|
+
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
43
|
+
},
|
158
44
|
default_analyzer => {
|
159
|
-
type: "
|
45
|
+
type: "custom",
|
46
|
+
# character filters -> tokenizer -> token filters
|
47
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
48
|
+
char_filter: ["ampersand"],
|
49
|
+
tokenizer: "standard",
|
50
|
+
# synonym should come last, after stemming and shingle
|
51
|
+
# shingle must come before searchkick_stemmer
|
52
|
+
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
160
53
|
},
|
161
54
|
searchkick_search: {
|
162
|
-
type: "
|
55
|
+
type: "custom",
|
56
|
+
char_filter: ["ampersand"],
|
57
|
+
tokenizer: "standard",
|
58
|
+
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
163
59
|
},
|
164
60
|
searchkick_search2: {
|
165
|
-
type: "
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
stem = false
|
170
|
-
when "chinese2", "smartcn"
|
171
|
-
settings[:analysis][:analyzer].merge!(
|
172
|
-
default_analyzer => {
|
173
|
-
type: "smartcn"
|
61
|
+
type: "custom",
|
62
|
+
char_filter: ["ampersand"],
|
63
|
+
tokenizer: "standard",
|
64
|
+
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
174
65
|
},
|
175
|
-
|
176
|
-
|
66
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
67
|
+
searchkick_autocomplete_search: {
|
68
|
+
type: "custom",
|
69
|
+
tokenizer: "keyword",
|
70
|
+
filter: ["lowercase", "asciifolding"]
|
177
71
|
},
|
178
|
-
|
179
|
-
type: "
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
stem = false
|
184
|
-
when "japanese"
|
185
|
-
settings[:analysis][:analyzer].merge!(
|
186
|
-
default_analyzer => {
|
187
|
-
type: "kuromoji"
|
72
|
+
searchkick_word_search: {
|
73
|
+
type: "custom",
|
74
|
+
tokenizer: "standard",
|
75
|
+
filter: ["lowercase", "asciifolding"]
|
188
76
|
},
|
189
|
-
|
190
|
-
type: "
|
77
|
+
searchkick_suggest_index: {
|
78
|
+
type: "custom",
|
79
|
+
tokenizer: "standard",
|
80
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
191
81
|
},
|
192
|
-
|
193
|
-
type: "
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
stem = false
|
198
|
-
when "korean"
|
199
|
-
settings[:analysis][:analyzer].merge!(
|
200
|
-
default_analyzer => {
|
201
|
-
type: "openkoreantext-analyzer"
|
82
|
+
searchkick_text_start_index: {
|
83
|
+
type: "custom",
|
84
|
+
tokenizer: "keyword",
|
85
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
202
86
|
},
|
203
|
-
|
204
|
-
type: "
|
87
|
+
searchkick_text_middle_index: {
|
88
|
+
type: "custom",
|
89
|
+
tokenizer: "keyword",
|
90
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
205
91
|
},
|
206
|
-
|
207
|
-
type: "
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
stem = false
|
212
|
-
when "korean2"
|
213
|
-
settings[:analysis][:analyzer].merge!(
|
214
|
-
default_analyzer => {
|
215
|
-
type: "nori"
|
92
|
+
searchkick_text_end_index: {
|
93
|
+
type: "custom",
|
94
|
+
tokenizer: "keyword",
|
95
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
216
96
|
},
|
217
|
-
|
218
|
-
type: "
|
97
|
+
searchkick_word_start_index: {
|
98
|
+
type: "custom",
|
99
|
+
tokenizer: "standard",
|
100
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
219
101
|
},
|
220
|
-
|
221
|
-
type: "
|
102
|
+
searchkick_word_middle_index: {
|
103
|
+
type: "custom",
|
104
|
+
tokenizer: "standard",
|
105
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
106
|
+
},
|
107
|
+
searchkick_word_end_index: {
|
108
|
+
type: "custom",
|
109
|
+
tokenizer: "standard",
|
110
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
222
111
|
}
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
default_analyzer => {
|
229
|
-
type: "vi_analyzer"
|
112
|
+
},
|
113
|
+
filter: {
|
114
|
+
searchkick_index_shingle: {
|
115
|
+
type: "shingle",
|
116
|
+
token_separator: ""
|
230
117
|
},
|
231
|
-
|
232
|
-
|
118
|
+
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
119
|
+
searchkick_search_shingle: {
|
120
|
+
type: "shingle",
|
121
|
+
token_separator: "",
|
122
|
+
output_unigrams: false,
|
123
|
+
output_unigrams_if_no_shingles: true
|
233
124
|
},
|
234
|
-
|
235
|
-
type: "
|
236
|
-
|
237
|
-
)
|
238
|
-
|
239
|
-
stem = false
|
240
|
-
when "polish", "ukrainian"
|
241
|
-
settings[:analysis][:analyzer].merge!(
|
242
|
-
default_analyzer => {
|
243
|
-
type: language
|
125
|
+
searchkick_suggest_shingle: {
|
126
|
+
type: "shingle",
|
127
|
+
max_shingle_size: 5
|
244
128
|
},
|
245
|
-
|
246
|
-
type:
|
129
|
+
searchkick_edge_ngram: {
|
130
|
+
type: "edge_ngram",
|
131
|
+
min_gram: 1,
|
132
|
+
max_gram: 50
|
247
133
|
},
|
248
|
-
|
249
|
-
type:
|
134
|
+
searchkick_ngram: {
|
135
|
+
type: "ngram",
|
136
|
+
min_gram: 1,
|
137
|
+
max_gram: 50
|
138
|
+
},
|
139
|
+
searchkick_stemmer: {
|
140
|
+
# use stemmer if language is lowercase, snowball otherwise
|
141
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
142
|
+
language: language || "English"
|
250
143
|
}
|
251
|
-
|
144
|
+
},
|
145
|
+
char_filter: {
|
146
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
147
|
+
# &_to_and
|
148
|
+
ampersand: {
|
149
|
+
type: "mapping",
|
150
|
+
mappings: ["&=> and "]
|
151
|
+
}
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
252
155
|
|
253
|
-
|
254
|
-
|
156
|
+
update_language(settings, language)
|
157
|
+
update_stemming(settings)
|
255
158
|
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
159
|
+
if Searchkick.env == "test"
|
160
|
+
settings[:number_of_shards] = 1
|
161
|
+
settings[:number_of_replicas] = 0
|
162
|
+
end
|
260
163
|
|
261
|
-
|
262
|
-
|
263
|
-
|
164
|
+
# TODO remove in Searchkick 5 (classic no longer supported)
|
165
|
+
if options[:similarity]
|
166
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
167
|
+
end
|
264
168
|
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
169
|
+
unless below62?
|
170
|
+
settings[:index] = {
|
171
|
+
max_ngram_diff: 49,
|
172
|
+
max_shingle_diff: 4
|
173
|
+
}
|
174
|
+
end
|
271
175
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
end
|
176
|
+
if options[:case_sensitive]
|
177
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
178
|
+
analyzer[:filter].delete("lowercase")
|
276
179
|
end
|
180
|
+
end
|
277
181
|
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
182
|
+
# TODO do this last in Searchkick 5
|
183
|
+
settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
|
184
|
+
|
185
|
+
add_synonyms(settings)
|
186
|
+
add_search_synonyms(settings)
|
187
|
+
# TODO remove in Searchkick 5
|
188
|
+
add_wordnet(settings) if options[:wordnet]
|
189
|
+
|
190
|
+
if options[:special_characters] == false
|
191
|
+
settings[:analysis][:analyzer].each_value do |analyzer_settings|
|
192
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
283
193
|
end
|
194
|
+
end
|
284
195
|
|
285
|
-
|
196
|
+
settings
|
197
|
+
end
|
286
198
|
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
type: "
|
293
|
-
|
294
|
-
|
199
|
+
def update_language(settings, language)
|
200
|
+
case language
|
201
|
+
when "chinese"
|
202
|
+
settings[:analysis][:analyzer].merge!(
|
203
|
+
default_analyzer => {
|
204
|
+
type: "ik_smart"
|
205
|
+
},
|
206
|
+
searchkick_search: {
|
207
|
+
type: "ik_smart"
|
208
|
+
},
|
209
|
+
searchkick_search2: {
|
210
|
+
type: "ik_max_word"
|
295
211
|
}
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
212
|
+
)
|
213
|
+
when "chinese2", "smartcn"
|
214
|
+
settings[:analysis][:analyzer].merge!(
|
215
|
+
default_analyzer => {
|
216
|
+
type: "smartcn"
|
217
|
+
},
|
218
|
+
searchkick_search: {
|
219
|
+
type: "smartcn"
|
220
|
+
},
|
221
|
+
searchkick_search2: {
|
222
|
+
type: "smartcn"
|
223
|
+
}
|
224
|
+
)
|
225
|
+
when "japanese"
|
226
|
+
settings[:analysis][:analyzer].merge!(
|
227
|
+
default_analyzer => {
|
228
|
+
type: "kuromoji"
|
229
|
+
},
|
230
|
+
searchkick_search: {
|
231
|
+
type: "kuromoji"
|
232
|
+
},
|
233
|
+
searchkick_search2: {
|
234
|
+
type: "kuromoji"
|
235
|
+
}
|
236
|
+
)
|
237
|
+
when "korean"
|
238
|
+
settings[:analysis][:analyzer].merge!(
|
239
|
+
default_analyzer => {
|
240
|
+
type: "openkoreantext-analyzer"
|
241
|
+
},
|
242
|
+
searchkick_search: {
|
243
|
+
type: "openkoreantext-analyzer"
|
244
|
+
},
|
245
|
+
searchkick_search2: {
|
246
|
+
type: "openkoreantext-analyzer"
|
247
|
+
}
|
248
|
+
)
|
249
|
+
when "korean2"
|
250
|
+
settings[:analysis][:analyzer].merge!(
|
251
|
+
default_analyzer => {
|
252
|
+
type: "nori"
|
253
|
+
},
|
254
|
+
searchkick_search: {
|
255
|
+
type: "nori"
|
256
|
+
},
|
257
|
+
searchkick_search2: {
|
258
|
+
type: "nori"
|
259
|
+
}
|
260
|
+
)
|
261
|
+
when "vietnamese"
|
262
|
+
settings[:analysis][:analyzer].merge!(
|
263
|
+
default_analyzer => {
|
264
|
+
type: "vi_analyzer"
|
265
|
+
},
|
266
|
+
searchkick_search: {
|
267
|
+
type: "vi_analyzer"
|
268
|
+
},
|
269
|
+
searchkick_search2: {
|
270
|
+
type: "vi_analyzer"
|
271
|
+
}
|
272
|
+
)
|
273
|
+
when "polish", "ukrainian"
|
274
|
+
settings[:analysis][:analyzer].merge!(
|
275
|
+
default_analyzer => {
|
276
|
+
type: language
|
277
|
+
},
|
278
|
+
searchkick_search: {
|
279
|
+
type: language
|
280
|
+
},
|
281
|
+
searchkick_search2: {
|
282
|
+
type: language
|
283
|
+
}
|
284
|
+
)
|
285
|
+
end
|
286
|
+
end
|
311
287
|
|
312
|
-
|
313
|
-
|
314
|
-
if search_synonyms.is_a?(String) || search_synonyms.any?
|
315
|
-
if search_synonyms.is_a?(String)
|
316
|
-
synonym_graph = {
|
317
|
-
type: "synonym_graph",
|
318
|
-
synonyms_path: search_synonyms
|
319
|
-
}
|
320
|
-
synonym_graph[:updateable] = true unless below73
|
321
|
-
else
|
322
|
-
synonym_graph = {
|
323
|
-
type: "synonym_graph",
|
324
|
-
# TODO confirm this is correct
|
325
|
-
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
326
|
-
}
|
327
|
-
end
|
328
|
-
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
288
|
+
def update_stemming(settings)
|
289
|
+
stem = options[:stem]
|
329
290
|
|
330
|
-
|
331
|
-
|
332
|
-
end
|
333
|
-
end
|
291
|
+
# language analyzer used
|
292
|
+
stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
|
334
293
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
294
|
+
if stem == false
|
295
|
+
settings[:analysis][:filter].delete(:searchkick_stemmer)
|
296
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
297
|
+
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
|
298
|
+
end
|
299
|
+
end
|
341
300
|
|
342
|
-
|
343
|
-
|
301
|
+
if options[:stemmer_override]
|
302
|
+
stemmer_override = {
|
303
|
+
type: "stemmer_override"
|
304
|
+
}
|
305
|
+
if options[:stemmer_override].is_a?(String)
|
306
|
+
stemmer_override[:rules_path] = options[:stemmer_override]
|
307
|
+
else
|
308
|
+
stemmer_override[:rules] = options[:stemmer_override]
|
309
|
+
end
|
310
|
+
settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
|
344
311
|
|
345
|
-
|
346
|
-
|
347
|
-
|
312
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
313
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
314
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
|
348
315
|
end
|
316
|
+
end
|
349
317
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
318
|
+
if options[:stem_exclusion]
|
319
|
+
settings[:analysis][:filter][:searchkick_stem_exclusion] = {
|
320
|
+
type: "keyword_marker",
|
321
|
+
keywords: options[:stem_exclusion]
|
322
|
+
}
|
323
|
+
|
324
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
325
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
326
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
|
354
327
|
end
|
328
|
+
end
|
329
|
+
end
|
355
330
|
|
356
|
-
|
331
|
+
def generate_mappings
|
332
|
+
mapping = {}
|
357
333
|
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
334
|
+
keyword_mapping = {type: "keyword"}
|
335
|
+
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
336
|
+
|
337
|
+
# conversions
|
338
|
+
Array(options[:conversions]).each do |conversions_field|
|
339
|
+
mapping[conversions_field] = {
|
340
|
+
type: "nested",
|
341
|
+
properties: {
|
342
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
343
|
+
count: {type: "integer"}
|
366
344
|
}
|
367
|
-
|
345
|
+
}
|
346
|
+
end
|
368
347
|
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
348
|
+
mapping_options = Hash[
|
349
|
+
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
|
350
|
+
.map { |type| [type, (options[type] || []).map(&:to_s)] }
|
351
|
+
]
|
373
352
|
|
374
|
-
|
353
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
375
354
|
|
376
|
-
|
355
|
+
mapping_options[:searchable].delete("_all")
|
377
356
|
|
378
|
-
|
357
|
+
analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
|
379
358
|
|
380
|
-
|
381
|
-
|
359
|
+
mapping_options.values.flatten.uniq.each do |field|
|
360
|
+
fields = {}
|
382
361
|
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
362
|
+
if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
|
363
|
+
fields[field] = {type: default_type, index: false}
|
364
|
+
else
|
365
|
+
fields[field] = keyword_mapping
|
366
|
+
end
|
388
367
|
|
389
|
-
|
390
|
-
|
391
|
-
|
368
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
369
|
+
if word
|
370
|
+
fields[:analyzed] = analyzed_field_options
|
392
371
|
|
393
|
-
|
394
|
-
|
395
|
-
end
|
372
|
+
if mapping_options[:highlight].include?(field)
|
373
|
+
fields[:analyzed][:term_vector] = "with_positions_offsets"
|
396
374
|
end
|
375
|
+
end
|
397
376
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
end
|
377
|
+
mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
|
378
|
+
if options[:match] == type || f.include?(field)
|
379
|
+
fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
|
402
380
|
end
|
403
381
|
end
|
404
|
-
|
405
|
-
mapping[field] = fields[field].merge(fields: fields.except(field))
|
406
382
|
end
|
407
383
|
|
408
|
-
|
409
|
-
|
410
|
-
type: "geo_point"
|
411
|
-
}
|
412
|
-
end
|
384
|
+
mapping[field] = fields[field].merge(fields: fields.except(field))
|
385
|
+
end
|
413
386
|
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
387
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
388
|
+
mapping[field] = {
|
389
|
+
type: "geo_point"
|
390
|
+
}
|
391
|
+
end
|
418
392
|
|
419
|
-
|
420
|
-
|
421
|
-
|
393
|
+
options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
|
394
|
+
(options[:geo_shape] || {}).each do |field, shape_options|
|
395
|
+
mapping[field] = shape_options.merge(type: "geo_shape")
|
396
|
+
end
|
422
397
|
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
398
|
+
if options[:inheritance]
|
399
|
+
mapping[:type] = keyword_mapping
|
400
|
+
end
|
401
|
+
|
402
|
+
routing = {}
|
403
|
+
if options[:routing]
|
404
|
+
routing = {required: true}
|
405
|
+
unless options[:routing] == true
|
406
|
+
routing[:path] = options[:routing].to_s
|
429
407
|
end
|
408
|
+
end
|
430
409
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
410
|
+
dynamic_fields = {
|
411
|
+
# analyzed field must be the default field for include_in_all
|
412
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
413
|
+
# however, we can include the not_analyzed field in _all
|
414
|
+
# and the _all index analyzer will take care of it
|
415
|
+
"{name}" => keyword_mapping
|
416
|
+
}
|
438
417
|
|
439
|
-
|
440
|
-
|
441
|
-
|
418
|
+
if options.key?(:filterable)
|
419
|
+
dynamic_fields["{name}"] = {type: default_type, index: false}
|
420
|
+
end
|
442
421
|
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
422
|
+
unless options[:searchable]
|
423
|
+
if options[:match] && options[:match] != :word
|
424
|
+
dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
|
425
|
+
end
|
447
426
|
|
448
|
-
|
449
|
-
|
450
|
-
end
|
427
|
+
if word
|
428
|
+
dynamic_fields[:analyzed] = analyzed_field_options
|
451
429
|
end
|
430
|
+
end
|
452
431
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
}
|
432
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
433
|
+
multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
434
|
+
|
435
|
+
mappings = {
|
436
|
+
properties: mapping,
|
437
|
+
_routing: routing,
|
438
|
+
# https://gist.github.com/kimchy/2898285
|
439
|
+
dynamic_templates: [
|
440
|
+
{
|
441
|
+
string_template: {
|
442
|
+
match: "*",
|
443
|
+
match_mapping_type: "string",
|
444
|
+
mapping: multi_field
|
467
445
|
}
|
468
|
-
|
469
|
-
|
446
|
+
}
|
447
|
+
]
|
448
|
+
}
|
470
449
|
|
471
|
-
|
472
|
-
|
473
|
-
|
450
|
+
if below70?
|
451
|
+
mappings = {index_type => mappings}
|
452
|
+
end
|
453
|
+
|
454
|
+
mappings
|
455
|
+
end
|
474
456
|
|
475
|
-
|
457
|
+
def add_synonyms(settings)
|
458
|
+
synonyms = options[:synonyms] || []
|
459
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
460
|
+
if synonyms.any?
|
461
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
462
|
+
type: "synonym",
|
463
|
+
# only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
|
464
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
|
465
|
+
}
|
466
|
+
# choosing a place for the synonym filter when stemming is not easy
|
467
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
468
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
469
|
+
|
470
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
471
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
472
|
+
# - Only apply the synonym expansion at index time
|
473
|
+
# - Don't have the synonym filter applied search
|
474
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
475
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
|
476
|
+
|
477
|
+
%w(word_start word_middle word_end).each do |type|
|
478
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
479
|
+
end
|
476
480
|
end
|
481
|
+
end
|
477
482
|
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
483
|
+
def add_search_synonyms(settings)
|
484
|
+
search_synonyms = options[:search_synonyms] || []
|
485
|
+
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
|
486
|
+
if search_synonyms.is_a?(String) || search_synonyms.any?
|
487
|
+
if search_synonyms.is_a?(String)
|
488
|
+
synonym_graph = {
|
489
|
+
type: "synonym_graph",
|
490
|
+
synonyms_path: search_synonyms
|
491
|
+
}
|
492
|
+
synonym_graph[:updateable] = true unless below73?
|
493
|
+
else
|
494
|
+
synonym_graph = {
|
495
|
+
type: "synonym_graph",
|
496
|
+
# TODO confirm this is correct
|
497
|
+
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
498
|
+
}
|
499
|
+
end
|
500
|
+
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
501
|
+
|
502
|
+
[:searchkick_search2, :searchkick_word_search].each do |analyzer|
|
503
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
|
482
504
|
end
|
483
505
|
end
|
506
|
+
end
|
484
507
|
|
485
|
-
|
486
|
-
|
487
|
-
|
508
|
+
def add_wordnet(settings)
|
509
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
510
|
+
type: "synonym",
|
511
|
+
format: "wordnet",
|
512
|
+
synonyms_path: Searchkick.wordnet_path
|
488
513
|
}
|
514
|
+
|
515
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
|
516
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
|
517
|
+
|
518
|
+
%w(word_start word_middle word_end).each do |type|
|
519
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
def set_deep_paging(settings)
|
524
|
+
if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
|
525
|
+
settings[:index] ||= {}
|
526
|
+
settings[:index][:max_result_window] = 1_000_000_000
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
def index_type
|
531
|
+
@index_type ||= begin
|
532
|
+
index_type = options[:_type]
|
533
|
+
index_type = index_type.call if index_type.respond_to?(:call)
|
534
|
+
index_type
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
def default_type
|
539
|
+
"text"
|
540
|
+
end
|
541
|
+
|
542
|
+
def default_analyzer
|
543
|
+
:searchkick_index
|
544
|
+
end
|
545
|
+
|
546
|
+
def below62?
|
547
|
+
Searchkick.server_below?("6.2.0")
|
548
|
+
end
|
549
|
+
|
550
|
+
def below70?
|
551
|
+
Searchkick.server_below?("7.0.0")
|
552
|
+
end
|
553
|
+
|
554
|
+
def below73?
|
555
|
+
Searchkick.server_below?("7.3.0")
|
489
556
|
end
|
490
557
|
end
|
491
558
|
end
|