searchkick 4.2.0 → 4.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +30 -3
- data/LICENSE.txt +1 -1
- data/README.md +253 -202
- data/lib/searchkick.rb +31 -12
- data/lib/searchkick/bulk_indexer.rb +1 -1
- data/lib/searchkick/index.rb +22 -2
- data/lib/searchkick/index_options.rb +468 -382
- data/lib/searchkick/model.rb +11 -6
- data/lib/searchkick/query.rb +40 -6
- data/lib/searchkick/results.rb +10 -0
- data/lib/searchkick/version.rb +1 -1
- data/lib/tasks/searchkick.rake +12 -11
- metadata +3 -46
- data/CONTRIBUTING.md +0 -53
data/lib/searchkick.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
# dependencies
|
1
2
|
require "active_support"
|
2
3
|
require "active_support/core_ext/hash/deep_merge"
|
3
4
|
require "elasticsearch"
|
4
5
|
require "hashie"
|
5
6
|
|
7
|
+
# modules
|
6
8
|
require "searchkick/bulk_indexer"
|
7
9
|
require "searchkick/index"
|
8
10
|
require "searchkick/indexer"
|
@@ -17,6 +19,7 @@ require "searchkick/record_indexer"
|
|
17
19
|
require "searchkick/results"
|
18
20
|
require "searchkick/version"
|
19
21
|
|
22
|
+
# integrations
|
20
23
|
require "searchkick/railtie" if defined?(Rails)
|
21
24
|
require "searchkick/logging" if defined?(ActiveSupport::Notifications)
|
22
25
|
|
@@ -27,6 +30,7 @@ module Searchkick
|
|
27
30
|
autoload :ProcessQueueJob, "searchkick/process_queue_job"
|
28
31
|
autoload :ReindexV2Job, "searchkick/reindex_v2_job"
|
29
32
|
|
33
|
+
# errors
|
30
34
|
class Error < StandardError; end
|
31
35
|
class MissingIndexError < Error; end
|
32
36
|
class UnsupportedVersionError < Error; end
|
@@ -112,7 +116,7 @@ module Searchkick
|
|
112
116
|
end
|
113
117
|
|
114
118
|
options = options.merge(block: block) if block
|
115
|
-
query = Searchkick::Query.new(klass, term, options)
|
119
|
+
query = Searchkick::Query.new(klass, term, **options)
|
116
120
|
if options[:execute] == false
|
117
121
|
query
|
118
122
|
else
|
@@ -142,7 +146,7 @@ module Searchkick
|
|
142
146
|
end
|
143
147
|
end
|
144
148
|
|
145
|
-
def self.callbacks(value)
|
149
|
+
def self.callbacks(value = nil)
|
146
150
|
if block_given?
|
147
151
|
previous_value = callbacks_value
|
148
152
|
begin
|
@@ -160,6 +164,7 @@ module Searchkick
|
|
160
164
|
|
161
165
|
def self.aws_credentials=(creds)
|
162
166
|
begin
|
167
|
+
# TODO remove in Searchkick 5 (just use aws_sigv4)
|
163
168
|
require "faraday_middleware/aws_signers_v4"
|
164
169
|
rescue LoadError
|
165
170
|
require "faraday_middleware/aws_sigv4"
|
@@ -169,17 +174,16 @@ module Searchkick
|
|
169
174
|
end
|
170
175
|
|
171
176
|
def self.reindex_status(index_name)
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
raise Searchkick::Error, "Redis not configured"
|
180
|
-
end
|
177
|
+
raise Searchkick::Error, "Redis not configured" unless redis
|
178
|
+
|
179
|
+
batches_left = Searchkick::Index.new(index_name).batches_left
|
180
|
+
{
|
181
|
+
completed: batches_left == 0,
|
182
|
+
batches_left: batches_left
|
183
|
+
}
|
181
184
|
end
|
182
185
|
|
186
|
+
# TODO use ConnectionPool::Wrapper when redis is set so this is no longer needed
|
183
187
|
def self.with_redis
|
184
188
|
if redis
|
185
189
|
if redis.respond_to?(:with)
|
@@ -249,11 +253,26 @@ module Searchkick
|
|
249
253
|
}
|
250
254
|
end
|
251
255
|
end
|
256
|
+
|
257
|
+
# private
|
258
|
+
# methods are forwarded to base class
|
259
|
+
# this check to see if scope exists on that class
|
260
|
+
# it's a bit tricky, but this seems to work
|
261
|
+
def self.relation?(klass)
|
262
|
+
if klass.respond_to?(:current_scope)
|
263
|
+
!klass.current_scope.nil?
|
264
|
+
elsif defined?(Mongoid::Threaded)
|
265
|
+
!Mongoid::Threaded.current_scope(klass).nil?
|
266
|
+
end
|
267
|
+
end
|
252
268
|
end
|
253
269
|
|
254
|
-
# TODO find better ActiveModel hook
|
255
270
|
require "active_model/callbacks"
|
256
271
|
ActiveModel::Callbacks.include(Searchkick::Model)
|
272
|
+
# TODO use
|
273
|
+
# ActiveSupport.on_load(:mongoid) do
|
274
|
+
# Mongoid::Document::ClassMethods.include Searchkick::Model
|
275
|
+
# end
|
257
276
|
|
258
277
|
ActiveSupport.on_load(:active_record) do
|
259
278
|
extend Searchkick::Model
|
@@ -141,7 +141,7 @@ module Searchkick
|
|
141
141
|
|
142
142
|
def bulk_reindex_job(scope, batch_id, options)
|
143
143
|
Searchkick.with_redis { |r| r.sadd(batches_key, batch_id) }
|
144
|
-
Searchkick::BulkReindexJob.perform_later({
|
144
|
+
Searchkick::BulkReindexJob.perform_later(**{
|
145
145
|
class_name: scope.searchkick_options[:class_name],
|
146
146
|
index_name: index.name,
|
147
147
|
batch_id: batch_id
|
data/lib/searchkick/index.rb
CHANGED
@@ -2,8 +2,6 @@ require "searchkick/index_options"
|
|
2
2
|
|
3
3
|
module Searchkick
|
4
4
|
class Index
|
5
|
-
include IndexOptions
|
6
|
-
|
7
5
|
attr_reader :name, :options
|
8
6
|
|
9
7
|
def initialize(name, options = {})
|
@@ -12,6 +10,10 @@ module Searchkick
|
|
12
10
|
@klass_document_type = {} # cache
|
13
11
|
end
|
14
12
|
|
13
|
+
def index_options
|
14
|
+
IndexOptions.new(self).index_options
|
15
|
+
end
|
16
|
+
|
15
17
|
def create(body = {})
|
16
18
|
client.indices.create index: name, body: body
|
17
19
|
end
|
@@ -174,6 +176,17 @@ module Searchkick
|
|
174
176
|
Searchkick.search(like_text, model: record.class, **options)
|
175
177
|
end
|
176
178
|
|
179
|
+
def reload_synonyms
|
180
|
+
require "elasticsearch/xpack"
|
181
|
+
raise Error, "Requires Elasticsearch 7.3+" if Searchkick.server_below?("7.3.0")
|
182
|
+
raise Error, "Requires elasticsearch-xpack 7.8+" unless client.xpack.respond_to?(:indices)
|
183
|
+
begin
|
184
|
+
client.xpack.indices.reload_search_analyzers(index: name)
|
185
|
+
rescue Elasticsearch::Transport::Transport::Errors::MethodNotAllowed
|
186
|
+
raise Error, "Requires non-OSS version of Elasticsearch"
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
177
190
|
# queue
|
178
191
|
|
179
192
|
def reindex_queue
|
@@ -184,13 +197,20 @@ module Searchkick
|
|
184
197
|
|
185
198
|
def reindex(relation, method_name, scoped:, full: false, scope: nil, **options)
|
186
199
|
refresh = options.fetch(:refresh, !scoped)
|
200
|
+
options.delete(:refresh)
|
187
201
|
|
188
202
|
if method_name
|
203
|
+
# TODO throw ArgumentError
|
204
|
+
Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
|
205
|
+
|
189
206
|
# update
|
190
207
|
import_scope(relation, method_name: method_name, scope: scope)
|
191
208
|
self.refresh if refresh
|
192
209
|
true
|
193
210
|
elsif scoped && !full
|
211
|
+
# TODO throw ArgumentError
|
212
|
+
Searchkick.warn("unsupported keywords: #{options.keys.map(&:inspect).join(", ")}") if options.any?
|
213
|
+
|
194
214
|
# reindex association
|
195
215
|
import_scope(relation, scope: scope)
|
196
216
|
self.refresh if refresh
|
@@ -1,20 +1,14 @@
|
|
1
1
|
module Searchkick
|
2
|
-
|
3
|
-
|
4
|
-
options = @options
|
5
|
-
language = options[:language]
|
6
|
-
language = language.call if language.respond_to?(:call)
|
2
|
+
class IndexOptions
|
3
|
+
attr_reader :options
|
7
4
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
if below70
|
12
|
-
index_type = options[:_type]
|
13
|
-
index_type = index_type.call if index_type.respond_to?(:call)
|
14
|
-
end
|
5
|
+
def initialize(index)
|
6
|
+
@options = index.options
|
7
|
+
end
|
15
8
|
|
9
|
+
def index_options
|
16
10
|
custom_mapping = options[:mappings] || {}
|
17
|
-
if below70 && custom_mapping.keys.map(&:to_sym).include?(:properties)
|
11
|
+
if below70? && custom_mapping.keys.map(&:to_sym).include?(:properties)
|
18
12
|
# add type
|
19
13
|
custom_mapping = {index_type => custom_mapping}
|
20
14
|
end
|
@@ -23,450 +17,542 @@ module Searchkick
|
|
23
17
|
settings = options[:settings] || {}
|
24
18
|
mappings = custom_mapping
|
25
19
|
else
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
analyzer: {
|
38
|
-
searchkick_keyword: {
|
39
|
-
type: "custom",
|
40
|
-
tokenizer: "keyword",
|
41
|
-
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
42
|
-
},
|
43
|
-
default_analyzer => {
|
44
|
-
type: "custom",
|
45
|
-
# character filters -> tokenizer -> token filters
|
46
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
47
|
-
char_filter: ["ampersand"],
|
48
|
-
tokenizer: "standard",
|
49
|
-
# synonym should come last, after stemming and shingle
|
50
|
-
# shingle must come before searchkick_stemmer
|
51
|
-
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
52
|
-
},
|
53
|
-
searchkick_search: {
|
54
|
-
type: "custom",
|
55
|
-
char_filter: ["ampersand"],
|
56
|
-
tokenizer: "standard",
|
57
|
-
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
58
|
-
},
|
59
|
-
searchkick_search2: {
|
60
|
-
type: "custom",
|
61
|
-
char_filter: ["ampersand"],
|
62
|
-
tokenizer: "standard",
|
63
|
-
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
64
|
-
},
|
65
|
-
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
66
|
-
searchkick_autocomplete_search: {
|
67
|
-
type: "custom",
|
68
|
-
tokenizer: "keyword",
|
69
|
-
filter: ["lowercase", "asciifolding"]
|
70
|
-
},
|
71
|
-
searchkick_word_search: {
|
72
|
-
type: "custom",
|
73
|
-
tokenizer: "standard",
|
74
|
-
filter: ["lowercase", "asciifolding"]
|
75
|
-
},
|
76
|
-
searchkick_suggest_index: {
|
77
|
-
type: "custom",
|
78
|
-
tokenizer: "standard",
|
79
|
-
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
80
|
-
},
|
81
|
-
searchkick_text_start_index: {
|
82
|
-
type: "custom",
|
83
|
-
tokenizer: "keyword",
|
84
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
85
|
-
},
|
86
|
-
searchkick_text_middle_index: {
|
87
|
-
type: "custom",
|
88
|
-
tokenizer: "keyword",
|
89
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
90
|
-
},
|
91
|
-
searchkick_text_end_index: {
|
92
|
-
type: "custom",
|
93
|
-
tokenizer: "keyword",
|
94
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
95
|
-
},
|
96
|
-
searchkick_word_start_index: {
|
97
|
-
type: "custom",
|
98
|
-
tokenizer: "standard",
|
99
|
-
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
100
|
-
},
|
101
|
-
searchkick_word_middle_index: {
|
102
|
-
type: "custom",
|
103
|
-
tokenizer: "standard",
|
104
|
-
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
105
|
-
},
|
106
|
-
searchkick_word_end_index: {
|
107
|
-
type: "custom",
|
108
|
-
tokenizer: "standard",
|
109
|
-
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
110
|
-
}
|
111
|
-
},
|
112
|
-
filter: {
|
113
|
-
searchkick_index_shingle: {
|
114
|
-
type: "shingle",
|
115
|
-
token_separator: ""
|
116
|
-
},
|
117
|
-
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
118
|
-
searchkick_search_shingle: {
|
119
|
-
type: "shingle",
|
120
|
-
token_separator: "",
|
121
|
-
output_unigrams: false,
|
122
|
-
output_unigrams_if_no_shingles: true
|
123
|
-
},
|
124
|
-
searchkick_suggest_shingle: {
|
125
|
-
type: "shingle",
|
126
|
-
max_shingle_size: 5
|
127
|
-
},
|
128
|
-
searchkick_edge_ngram: {
|
129
|
-
type: "edgeNGram",
|
130
|
-
min_gram: 1,
|
131
|
-
max_gram: 50
|
132
|
-
},
|
133
|
-
searchkick_ngram: {
|
134
|
-
type: "nGram",
|
135
|
-
min_gram: 1,
|
136
|
-
max_gram: 50
|
137
|
-
},
|
138
|
-
searchkick_stemmer: {
|
139
|
-
# use stemmer if language is lowercase, snowball otherwise
|
140
|
-
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
141
|
-
language: language || "English"
|
142
|
-
}
|
143
|
-
},
|
144
|
-
char_filter: {
|
145
|
-
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
146
|
-
# &_to_and
|
147
|
-
ampersand: {
|
148
|
-
type: "mapping",
|
149
|
-
mappings: ["&=> and "]
|
150
|
-
}
|
151
|
-
}
|
152
|
-
}
|
153
|
-
}
|
20
|
+
settings = generate_settings
|
21
|
+
mappings = generate_mappings.symbolize_keys.deep_merge(custom_mapping.symbolize_keys)
|
22
|
+
end
|
23
|
+
|
24
|
+
set_deep_paging(settings) if options[:deep_paging]
|
25
|
+
|
26
|
+
{
|
27
|
+
settings: settings,
|
28
|
+
mappings: mappings
|
29
|
+
}
|
30
|
+
end
|
154
31
|
|
155
|
-
|
32
|
+
def generate_settings
|
33
|
+
language = options[:language]
|
34
|
+
language = language.call if language.respond_to?(:call)
|
156
35
|
|
157
|
-
|
158
|
-
|
159
|
-
|
36
|
+
settings = {
|
37
|
+
analysis: {
|
38
|
+
analyzer: {
|
39
|
+
searchkick_keyword: {
|
40
|
+
type: "custom",
|
41
|
+
tokenizer: "keyword",
|
42
|
+
filter: ["lowercase"] + (options[:stem_conversions] ? ["searchkick_stemmer"] : [])
|
43
|
+
},
|
160
44
|
default_analyzer => {
|
161
|
-
type: "
|
45
|
+
type: "custom",
|
46
|
+
# character filters -> tokenizer -> token filters
|
47
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
48
|
+
char_filter: ["ampersand"],
|
49
|
+
tokenizer: "standard",
|
50
|
+
# synonym should come last, after stemming and shingle
|
51
|
+
# shingle must come before searchkick_stemmer
|
52
|
+
filter: ["lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
162
53
|
},
|
163
54
|
searchkick_search: {
|
164
|
-
type: "
|
55
|
+
type: "custom",
|
56
|
+
char_filter: ["ampersand"],
|
57
|
+
tokenizer: "standard",
|
58
|
+
filter: ["lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
165
59
|
},
|
166
60
|
searchkick_search2: {
|
167
|
-
type: "
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
stem = false
|
172
|
-
when "chinese2", "smartcn"
|
173
|
-
settings[:analysis][:analyzer].merge!(
|
174
|
-
default_analyzer => {
|
175
|
-
type: "smartcn"
|
61
|
+
type: "custom",
|
62
|
+
char_filter: ["ampersand"],
|
63
|
+
tokenizer: "standard",
|
64
|
+
filter: ["lowercase", "asciifolding", "searchkick_stemmer"]
|
176
65
|
},
|
177
|
-
|
178
|
-
|
66
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
67
|
+
searchkick_autocomplete_search: {
|
68
|
+
type: "custom",
|
69
|
+
tokenizer: "keyword",
|
70
|
+
filter: ["lowercase", "asciifolding"]
|
179
71
|
},
|
180
|
-
|
181
|
-
type: "
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
stem = false
|
186
|
-
when "japanese"
|
187
|
-
settings[:analysis][:analyzer].merge!(
|
188
|
-
default_analyzer => {
|
189
|
-
type: "kuromoji"
|
72
|
+
searchkick_word_search: {
|
73
|
+
type: "custom",
|
74
|
+
tokenizer: "standard",
|
75
|
+
filter: ["lowercase", "asciifolding"]
|
190
76
|
},
|
191
|
-
|
192
|
-
type: "
|
77
|
+
searchkick_suggest_index: {
|
78
|
+
type: "custom",
|
79
|
+
tokenizer: "standard",
|
80
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
193
81
|
},
|
194
|
-
|
195
|
-
type: "
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
stem = false
|
200
|
-
when "korean"
|
201
|
-
settings[:analysis][:analyzer].merge!(
|
202
|
-
default_analyzer => {
|
203
|
-
type: "openkoreantext-analyzer"
|
82
|
+
searchkick_text_start_index: {
|
83
|
+
type: "custom",
|
84
|
+
tokenizer: "keyword",
|
85
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
204
86
|
},
|
205
|
-
|
206
|
-
type: "
|
87
|
+
searchkick_text_middle_index: {
|
88
|
+
type: "custom",
|
89
|
+
tokenizer: "keyword",
|
90
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
207
91
|
},
|
208
|
-
|
209
|
-
type: "
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
stem = false
|
214
|
-
when "korean2"
|
215
|
-
settings[:analysis][:analyzer].merge!(
|
216
|
-
default_analyzer => {
|
217
|
-
type: "nori"
|
92
|
+
searchkick_text_end_index: {
|
93
|
+
type: "custom",
|
94
|
+
tokenizer: "keyword",
|
95
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
218
96
|
},
|
219
|
-
|
220
|
-
type: "
|
97
|
+
searchkick_word_start_index: {
|
98
|
+
type: "custom",
|
99
|
+
tokenizer: "standard",
|
100
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
221
101
|
},
|
222
|
-
|
223
|
-
type: "
|
102
|
+
searchkick_word_middle_index: {
|
103
|
+
type: "custom",
|
104
|
+
tokenizer: "standard",
|
105
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
106
|
+
},
|
107
|
+
searchkick_word_end_index: {
|
108
|
+
type: "custom",
|
109
|
+
tokenizer: "standard",
|
110
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
224
111
|
}
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
default_analyzer => {
|
231
|
-
type: "vi_analyzer"
|
112
|
+
},
|
113
|
+
filter: {
|
114
|
+
searchkick_index_shingle: {
|
115
|
+
type: "shingle",
|
116
|
+
token_separator: ""
|
232
117
|
},
|
233
|
-
|
234
|
-
|
118
|
+
# lucky find https://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
119
|
+
searchkick_search_shingle: {
|
120
|
+
type: "shingle",
|
121
|
+
token_separator: "",
|
122
|
+
output_unigrams: false,
|
123
|
+
output_unigrams_if_no_shingles: true
|
235
124
|
},
|
236
|
-
|
237
|
-
type: "
|
238
|
-
|
239
|
-
)
|
240
|
-
|
241
|
-
stem = false
|
242
|
-
when "polish", "ukrainian"
|
243
|
-
settings[:analysis][:analyzer].merge!(
|
244
|
-
default_analyzer => {
|
245
|
-
type: language
|
125
|
+
searchkick_suggest_shingle: {
|
126
|
+
type: "shingle",
|
127
|
+
max_shingle_size: 5
|
246
128
|
},
|
247
|
-
|
248
|
-
type:
|
129
|
+
searchkick_edge_ngram: {
|
130
|
+
type: "edge_ngram",
|
131
|
+
min_gram: 1,
|
132
|
+
max_gram: 50
|
249
133
|
},
|
250
|
-
|
251
|
-
type:
|
134
|
+
searchkick_ngram: {
|
135
|
+
type: "ngram",
|
136
|
+
min_gram: 1,
|
137
|
+
max_gram: 50
|
138
|
+
},
|
139
|
+
searchkick_stemmer: {
|
140
|
+
# use stemmer if language is lowercase, snowball otherwise
|
141
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
142
|
+
language: language || "English"
|
143
|
+
}
|
144
|
+
},
|
145
|
+
char_filter: {
|
146
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
147
|
+
# &_to_and
|
148
|
+
ampersand: {
|
149
|
+
type: "mapping",
|
150
|
+
mappings: ["&=> and "]
|
252
151
|
}
|
253
|
-
|
152
|
+
}
|
153
|
+
}
|
154
|
+
}
|
254
155
|
|
255
|
-
|
256
|
-
|
156
|
+
update_language(settings, language)
|
157
|
+
update_stemming(settings)
|
257
158
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
159
|
+
if Searchkick.env == "test"
|
160
|
+
settings[:number_of_shards] = 1
|
161
|
+
settings[:number_of_replicas] = 0
|
162
|
+
end
|
262
163
|
|
263
|
-
|
264
|
-
|
265
|
-
|
164
|
+
# TODO remove in Searchkick 5 (classic no longer supported)
|
165
|
+
if options[:similarity]
|
166
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
167
|
+
end
|
266
168
|
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
169
|
+
unless below62?
|
170
|
+
settings[:index] = {
|
171
|
+
max_ngram_diff: 49,
|
172
|
+
max_shingle_diff: 4
|
173
|
+
}
|
174
|
+
end
|
273
175
|
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
end
|
176
|
+
if options[:case_sensitive]
|
177
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
178
|
+
analyzer[:filter].delete("lowercase")
|
278
179
|
end
|
180
|
+
end
|
279
181
|
|
280
|
-
|
281
|
-
|
282
|
-
settings[:analysis][:analyzer].each do |_, analyzer|
|
283
|
-
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
|
284
|
-
end
|
285
|
-
end
|
182
|
+
# TODO do this last in Searchkick 5
|
183
|
+
settings = settings.symbolize_keys.deep_merge((options[:settings] || {}).symbolize_keys)
|
286
184
|
|
287
|
-
|
185
|
+
add_synonyms(settings)
|
186
|
+
add_search_synonyms(settings)
|
187
|
+
# TODO remove in Searchkick 5
|
188
|
+
add_wordnet(settings) if options[:wordnet]
|
288
189
|
|
289
|
-
|
290
|
-
|
190
|
+
if options[:special_characters] == false
|
191
|
+
settings[:analysis][:analyzer].each_value do |analyzer_settings|
|
192
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
193
|
+
end
|
194
|
+
end
|
291
195
|
|
292
|
-
|
196
|
+
settings
|
197
|
+
end
|
293
198
|
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
199
|
+
def update_language(settings, language)
|
200
|
+
case language
|
201
|
+
when "chinese"
|
202
|
+
settings[:analysis][:analyzer].merge!(
|
203
|
+
default_analyzer => {
|
204
|
+
type: "ik_smart"
|
205
|
+
},
|
206
|
+
searchkick_search: {
|
207
|
+
type: "ik_smart"
|
208
|
+
},
|
209
|
+
searchkick_search2: {
|
210
|
+
type: "ik_max_word"
|
299
211
|
}
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
212
|
+
)
|
213
|
+
when "chinese2", "smartcn"
|
214
|
+
settings[:analysis][:analyzer].merge!(
|
215
|
+
default_analyzer => {
|
216
|
+
type: "smartcn"
|
217
|
+
},
|
218
|
+
searchkick_search: {
|
219
|
+
type: "smartcn"
|
220
|
+
},
|
221
|
+
searchkick_search2: {
|
222
|
+
type: "smartcn"
|
223
|
+
}
|
224
|
+
)
|
225
|
+
when "japanese"
|
226
|
+
settings[:analysis][:analyzer].merge!(
|
227
|
+
default_analyzer => {
|
228
|
+
type: "kuromoji"
|
229
|
+
},
|
230
|
+
searchkick_search: {
|
231
|
+
type: "kuromoji"
|
232
|
+
},
|
233
|
+
searchkick_search2: {
|
234
|
+
type: "kuromoji"
|
235
|
+
}
|
236
|
+
)
|
237
|
+
when "korean"
|
238
|
+
settings[:analysis][:analyzer].merge!(
|
239
|
+
default_analyzer => {
|
240
|
+
type: "openkoreantext-analyzer"
|
241
|
+
},
|
242
|
+
searchkick_search: {
|
243
|
+
type: "openkoreantext-analyzer"
|
244
|
+
},
|
245
|
+
searchkick_search2: {
|
246
|
+
type: "openkoreantext-analyzer"
|
247
|
+
}
|
248
|
+
)
|
249
|
+
when "korean2"
|
250
|
+
settings[:analysis][:analyzer].merge!(
|
251
|
+
default_analyzer => {
|
252
|
+
type: "nori"
|
253
|
+
},
|
254
|
+
searchkick_search: {
|
255
|
+
type: "nori"
|
256
|
+
},
|
257
|
+
searchkick_search2: {
|
258
|
+
type: "nori"
|
321
259
|
}
|
260
|
+
)
|
261
|
+
when "vietnamese"
|
262
|
+
settings[:analysis][:analyzer].merge!(
|
263
|
+
default_analyzer => {
|
264
|
+
type: "vi_analyzer"
|
265
|
+
},
|
266
|
+
searchkick_search: {
|
267
|
+
type: "vi_analyzer"
|
268
|
+
},
|
269
|
+
searchkick_search2: {
|
270
|
+
type: "vi_analyzer"
|
271
|
+
}
|
272
|
+
)
|
273
|
+
when "polish", "ukrainian"
|
274
|
+
settings[:analysis][:analyzer].merge!(
|
275
|
+
default_analyzer => {
|
276
|
+
type: language
|
277
|
+
},
|
278
|
+
searchkick_search: {
|
279
|
+
type: language
|
280
|
+
},
|
281
|
+
searchkick_search2: {
|
282
|
+
type: language
|
283
|
+
}
|
284
|
+
)
|
285
|
+
end
|
286
|
+
end
|
322
287
|
|
323
|
-
|
324
|
-
|
288
|
+
def update_stemming(settings)
|
289
|
+
stem = options[:stem]
|
325
290
|
|
326
|
-
|
327
|
-
|
328
|
-
end
|
329
|
-
end
|
291
|
+
# language analyzer used
|
292
|
+
stem = false if settings[:analysis][:analyzer][default_analyzer][:type] != "custom"
|
330
293
|
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
294
|
+
if stem == false
|
295
|
+
settings[:analysis][:filter].delete(:searchkick_stemmer)
|
296
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
297
|
+
analyzer[:filter].delete("searchkick_stemmer") if analyzer[:filter]
|
335
298
|
end
|
299
|
+
end
|
336
300
|
|
337
|
-
|
301
|
+
if options[:stemmer_override]
|
302
|
+
stemmer_override = {
|
303
|
+
type: "stemmer_override"
|
304
|
+
}
|
305
|
+
if options[:stemmer_override].is_a?(String)
|
306
|
+
stemmer_override[:rules_path] = options[:stemmer_override]
|
307
|
+
else
|
308
|
+
stemmer_override[:rules] = options[:stemmer_override]
|
309
|
+
end
|
310
|
+
settings[:analysis][:filter][:searchkick_stemmer_override] = stemmer_override
|
338
311
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
type: "nested",
|
343
|
-
properties: {
|
344
|
-
query: {type: default_type, analyzer: "searchkick_keyword"},
|
345
|
-
count: {type: "integer"}
|
346
|
-
}
|
347
|
-
}
|
312
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
313
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
314
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stemmer_override") if stemmer_index
|
348
315
|
end
|
316
|
+
end
|
349
317
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
318
|
+
if options[:stem_exclusion]
|
319
|
+
settings[:analysis][:filter][:searchkick_stem_exclusion] = {
|
320
|
+
type: "keyword_marker",
|
321
|
+
keywords: options[:stem_exclusion]
|
322
|
+
}
|
354
323
|
|
355
|
-
|
324
|
+
settings[:analysis][:analyzer].each do |_, analyzer|
|
325
|
+
stemmer_index = analyzer[:filter].index("searchkick_stemmer") if analyzer[:filter]
|
326
|
+
analyzer[:filter].insert(stemmer_index, "searchkick_stem_exclusion") if stemmer_index
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
356
330
|
|
357
|
-
|
331
|
+
def generate_mappings
|
332
|
+
mapping = {}
|
358
333
|
|
359
|
-
|
334
|
+
keyword_mapping = {type: "keyword"}
|
335
|
+
keyword_mapping[:ignore_above] = options[:ignore_above] || 30000
|
360
336
|
|
361
|
-
|
362
|
-
|
337
|
+
# conversions
|
338
|
+
Array(options[:conversions]).each do |conversions_field|
|
339
|
+
mapping[conversions_field] = {
|
340
|
+
type: "nested",
|
341
|
+
properties: {
|
342
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
343
|
+
count: {type: "integer"}
|
344
|
+
}
|
345
|
+
}
|
346
|
+
end
|
363
347
|
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
end
|
348
|
+
mapping_options = Hash[
|
349
|
+
[:suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :filterable]
|
350
|
+
.map { |type| [type, (options[type] || []).map(&:to_s)] }
|
351
|
+
]
|
369
352
|
|
370
|
-
|
371
|
-
if word
|
372
|
-
fields[:analyzed] = analyzed_field_options
|
353
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
373
354
|
|
374
|
-
|
375
|
-
fields[:analyzed][:term_vector] = "with_positions_offsets"
|
376
|
-
end
|
377
|
-
end
|
355
|
+
mapping_options[:searchable].delete("_all")
|
378
356
|
|
379
|
-
|
380
|
-
if options[:match] == type || f.include?(field)
|
381
|
-
fields[type] = {type: default_type, index: index_true_value, analyzer: "searchkick_#{type}_index"}
|
382
|
-
end
|
383
|
-
end
|
384
|
-
end
|
357
|
+
analyzed_field_options = {type: default_type, index: true, analyzer: default_analyzer}
|
385
358
|
|
386
|
-
|
387
|
-
|
359
|
+
mapping_options.values.flatten.uniq.each do |field|
|
360
|
+
fields = {}
|
388
361
|
|
389
|
-
(
|
390
|
-
|
391
|
-
|
392
|
-
|
362
|
+
if options.key?(:filterable) && !mapping_options[:filterable].include?(field)
|
363
|
+
fields[field] = {type: default_type, index: false}
|
364
|
+
else
|
365
|
+
fields[field] = keyword_mapping
|
393
366
|
end
|
394
367
|
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
end
|
368
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
369
|
+
if word
|
370
|
+
fields[:analyzed] = analyzed_field_options
|
399
371
|
|
400
|
-
|
401
|
-
|
402
|
-
|
372
|
+
if mapping_options[:highlight].include?(field)
|
373
|
+
fields[:analyzed][:term_vector] = "with_positions_offsets"
|
374
|
+
end
|
375
|
+
end
|
403
376
|
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
routing[:path] = options[:routing].to_s
|
377
|
+
mapping_options.except(:highlight, :searchable, :filterable, :word).each do |type, f|
|
378
|
+
if options[:match] == type || f.include?(field)
|
379
|
+
fields[type] = {type: default_type, index: true, analyzer: "searchkick_#{type}_index"}
|
380
|
+
end
|
409
381
|
end
|
410
382
|
end
|
411
383
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
"
|
384
|
+
mapping[field] = fields[field].merge(fields: fields.except(field))
|
385
|
+
end
|
386
|
+
|
387
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
388
|
+
mapping[field] = {
|
389
|
+
type: "geo_point"
|
418
390
|
}
|
391
|
+
end
|
419
392
|
|
420
|
-
|
421
|
-
|
422
|
-
|
393
|
+
options[:geo_shape] = options[:geo_shape].product([{}]).to_h if options[:geo_shape].is_a?(Array)
|
394
|
+
(options[:geo_shape] || {}).each do |field, shape_options|
|
395
|
+
mapping[field] = shape_options.merge(type: "geo_shape")
|
396
|
+
end
|
423
397
|
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
end
|
398
|
+
if options[:inheritance]
|
399
|
+
mapping[:type] = keyword_mapping
|
400
|
+
end
|
428
401
|
|
429
|
-
|
430
|
-
|
431
|
-
|
402
|
+
routing = {}
|
403
|
+
if options[:routing]
|
404
|
+
routing = {required: true}
|
405
|
+
unless options[:routing] == true
|
406
|
+
routing[:path] = options[:routing].to_s
|
432
407
|
end
|
408
|
+
end
|
433
409
|
|
410
|
+
dynamic_fields = {
|
411
|
+
# analyzed field must be the default field for include_in_all
|
434
412
|
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
_routing: routing,
|
440
|
-
# https://gist.github.com/kimchy/2898285
|
441
|
-
dynamic_templates: [
|
442
|
-
{
|
443
|
-
string_template: {
|
444
|
-
match: "*",
|
445
|
-
match_mapping_type: "string",
|
446
|
-
mapping: multi_field
|
447
|
-
}
|
448
|
-
}
|
449
|
-
]
|
450
|
-
}
|
413
|
+
# however, we can include the not_analyzed field in _all
|
414
|
+
# and the _all index analyzer will take care of it
|
415
|
+
"{name}" => keyword_mapping
|
416
|
+
}
|
451
417
|
|
452
|
-
|
453
|
-
|
418
|
+
if options.key?(:filterable)
|
419
|
+
dynamic_fields["{name}"] = {type: default_type, index: false}
|
420
|
+
end
|
421
|
+
|
422
|
+
unless options[:searchable]
|
423
|
+
if options[:match] && options[:match] != :word
|
424
|
+
dynamic_fields[options[:match]] = {type: default_type, index: true, analyzer: "searchkick_#{options[:match]}_index"}
|
454
425
|
end
|
455
426
|
|
456
|
-
|
427
|
+
if word
|
428
|
+
dynamic_fields[:analyzed] = analyzed_field_options
|
429
|
+
end
|
457
430
|
end
|
458
431
|
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
432
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
433
|
+
multi_field = dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
434
|
+
|
435
|
+
mappings = {
|
436
|
+
properties: mapping,
|
437
|
+
_routing: routing,
|
438
|
+
# https://gist.github.com/kimchy/2898285
|
439
|
+
dynamic_templates: [
|
440
|
+
{
|
441
|
+
string_template: {
|
442
|
+
match: "*",
|
443
|
+
match_mapping_type: "string",
|
444
|
+
mapping: multi_field
|
445
|
+
}
|
446
|
+
}
|
447
|
+
]
|
448
|
+
}
|
449
|
+
|
450
|
+
if below70?
|
451
|
+
mappings = {index_type => mappings}
|
452
|
+
end
|
453
|
+
|
454
|
+
mappings
|
455
|
+
end
|
456
|
+
|
457
|
+
def add_synonyms(settings)
|
458
|
+
synonyms = options[:synonyms] || []
|
459
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
460
|
+
if synonyms.any?
|
461
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
462
|
+
type: "synonym",
|
463
|
+
# only remove a single space from synonyms so three-word synonyms will fail noisily instead of silently
|
464
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.map { |s2| s2.sub(/\s+/, "") }.join(",") : s }.map(&:downcase)
|
465
|
+
}
|
466
|
+
# choosing a place for the synonym filter when stemming is not easy
|
467
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
468
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
469
|
+
|
470
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
471
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
472
|
+
# - Only apply the synonym expansion at index time
|
473
|
+
# - Don't have the synonym filter applied search
|
474
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
475
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(2, "searchkick_synonym")
|
476
|
+
|
477
|
+
%w(word_start word_middle word_end).each do |type|
|
478
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
479
|
+
end
|
480
|
+
end
|
481
|
+
end
|
482
|
+
|
483
|
+
def add_search_synonyms(settings)
|
484
|
+
search_synonyms = options[:search_synonyms] || []
|
485
|
+
search_synonyms = search_synonyms.call if search_synonyms.respond_to?(:call)
|
486
|
+
if search_synonyms.is_a?(String) || search_synonyms.any?
|
487
|
+
if search_synonyms.is_a?(String)
|
488
|
+
synonym_graph = {
|
489
|
+
type: "synonym_graph",
|
490
|
+
synonyms_path: search_synonyms
|
491
|
+
}
|
492
|
+
synonym_graph[:updateable] = true unless below73?
|
493
|
+
else
|
494
|
+
synonym_graph = {
|
495
|
+
type: "synonym_graph",
|
496
|
+
# TODO confirm this is correct
|
497
|
+
synonyms: search_synonyms.select { |s| s.size > 1 }.map { |s| s.is_a?(Array) ? s.join(",") : s }.map(&:downcase)
|
498
|
+
}
|
499
|
+
end
|
500
|
+
settings[:analysis][:filter][:searchkick_synonym_graph] = synonym_graph
|
501
|
+
|
502
|
+
[:searchkick_search2, :searchkick_word_search].each do |analyzer|
|
503
|
+
settings[:analysis][:analyzer][analyzer][:filter].insert(2, "searchkick_synonym_graph")
|
463
504
|
end
|
464
505
|
end
|
506
|
+
end
|
465
507
|
|
466
|
-
|
467
|
-
|
468
|
-
|
508
|
+
def add_wordnet(settings)
|
509
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
510
|
+
type: "synonym",
|
511
|
+
format: "wordnet",
|
512
|
+
synonyms_path: Searchkick.wordnet_path
|
469
513
|
}
|
514
|
+
|
515
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
|
516
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
|
517
|
+
|
518
|
+
%w(word_start word_middle word_end).each do |type|
|
519
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
def set_deep_paging(settings)
|
524
|
+
if !settings.dig(:index, :max_result_window) && !settings[:"index.max_result_window"]
|
525
|
+
settings[:index] ||= {}
|
526
|
+
settings[:index][:max_result_window] = 1_000_000_000
|
527
|
+
end
|
528
|
+
end
|
529
|
+
|
530
|
+
def index_type
|
531
|
+
@index_type ||= begin
|
532
|
+
index_type = options[:_type]
|
533
|
+
index_type = index_type.call if index_type.respond_to?(:call)
|
534
|
+
index_type
|
535
|
+
end
|
536
|
+
end
|
537
|
+
|
538
|
+
def default_type
|
539
|
+
"text"
|
540
|
+
end
|
541
|
+
|
542
|
+
def default_analyzer
|
543
|
+
:searchkick_index
|
544
|
+
end
|
545
|
+
|
546
|
+
def below62?
|
547
|
+
Searchkick.server_below?("6.2.0")
|
548
|
+
end
|
549
|
+
|
550
|
+
def below70?
|
551
|
+
Searchkick.server_below?("7.0.0")
|
552
|
+
end
|
553
|
+
|
554
|
+
def below73?
|
555
|
+
Searchkick.server_below?("7.3.0")
|
470
556
|
end
|
471
557
|
end
|
472
558
|
end
|