searchkick_bharthur 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +22 -0
- data/.travis.yml +44 -0
- data/CHANGELOG.md +360 -0
- data/Gemfile +8 -0
- data/LICENSE.txt +22 -0
- data/README.md +1443 -0
- data/Rakefile +8 -0
- data/lib/searchkick/index.rb +662 -0
- data/lib/searchkick/logging.rb +185 -0
- data/lib/searchkick/middleware.rb +12 -0
- data/lib/searchkick/model.rb +105 -0
- data/lib/searchkick/query.rb +845 -0
- data/lib/searchkick/reindex_job.rb +26 -0
- data/lib/searchkick/reindex_v2_job.rb +23 -0
- data/lib/searchkick/results.rb +211 -0
- data/lib/searchkick/tasks.rb +33 -0
- data/lib/searchkick/version.rb +3 -0
- data/lib/searchkick.rb +159 -0
- data/searchkick.gemspec +28 -0
- data/test/aggs_test.rb +115 -0
- data/test/autocomplete_test.rb +65 -0
- data/test/boost_test.rb +144 -0
- data/test/callbacks_test.rb +27 -0
- data/test/ci/before_install.sh +21 -0
- data/test/dangerous_reindex_test.rb +27 -0
- data/test/facets_test.rb +90 -0
- data/test/gemfiles/activerecord31.gemfile +7 -0
- data/test/gemfiles/activerecord32.gemfile +7 -0
- data/test/gemfiles/activerecord40.gemfile +8 -0
- data/test/gemfiles/activerecord41.gemfile +8 -0
- data/test/gemfiles/activerecord50.gemfile +7 -0
- data/test/gemfiles/apartment.gemfile +8 -0
- data/test/gemfiles/mongoid2.gemfile +7 -0
- data/test/gemfiles/mongoid3.gemfile +6 -0
- data/test/gemfiles/mongoid4.gemfile +7 -0
- data/test/gemfiles/mongoid5.gemfile +7 -0
- data/test/gemfiles/nobrainer.gemfile +6 -0
- data/test/highlight_test.rb +63 -0
- data/test/index_test.rb +120 -0
- data/test/inheritance_test.rb +78 -0
- data/test/match_test.rb +227 -0
- data/test/misspellings_test.rb +46 -0
- data/test/model_test.rb +42 -0
- data/test/multi_search_test.rb +22 -0
- data/test/multi_tenancy_test.rb +22 -0
- data/test/order_test.rb +44 -0
- data/test/pagination_test.rb +53 -0
- data/test/query_test.rb +13 -0
- data/test/records_test.rb +8 -0
- data/test/reindex_job_test.rb +31 -0
- data/test/reindex_v2_job_test.rb +32 -0
- data/test/routing_test.rb +13 -0
- data/test/should_index_test.rb +32 -0
- data/test/similar_test.rb +28 -0
- data/test/sql_test.rb +196 -0
- data/test/suggest_test.rb +80 -0
- data/test/synonyms_test.rb +54 -0
- data/test/test_helper.rb +361 -0
- data/test/where_test.rb +171 -0
- metadata +231 -0
data/Rakefile
ADDED
@@ -0,0 +1,662 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class Index
|
3
|
+
attr_reader :name, :options
|
4
|
+
|
5
|
+
def initialize(name, options = {})
|
6
|
+
@name = name
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def create(options = {})
|
11
|
+
client.indices.create index: name, body: options
|
12
|
+
end
|
13
|
+
|
14
|
+
def delete
|
15
|
+
client.indices.delete index: name
|
16
|
+
end
|
17
|
+
|
18
|
+
def exists?
|
19
|
+
client.indices.exists index: name
|
20
|
+
end
|
21
|
+
|
22
|
+
def refresh
|
23
|
+
client.indices.refresh index: name
|
24
|
+
end
|
25
|
+
|
26
|
+
def alias_exists?
|
27
|
+
client.indices.exists_alias name: name
|
28
|
+
end
|
29
|
+
|
30
|
+
def mapping
|
31
|
+
client.indices.get_mapping index: name
|
32
|
+
end
|
33
|
+
|
34
|
+
def swap(new_name)
|
35
|
+
old_indices =
|
36
|
+
begin
|
37
|
+
client.indices.get_alias(name: name).keys
|
38
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
39
|
+
{}
|
40
|
+
end
|
41
|
+
actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
|
42
|
+
client.indices.update_aliases body: {actions: actions}
|
43
|
+
end
|
44
|
+
|
45
|
+
# record based
|
46
|
+
|
47
|
+
def store(record)
|
48
|
+
bulk_index([record])
|
49
|
+
end
|
50
|
+
|
51
|
+
def remove(record)
|
52
|
+
bulk_delete([record])
|
53
|
+
end
|
54
|
+
|
55
|
+
def bulk_delete(records)
|
56
|
+
Searchkick.queue_items(records.reject { |r| r.id.blank? }.map { |r| {delete: record_data(r)} })
|
57
|
+
end
|
58
|
+
|
59
|
+
def bulk_index(records)
|
60
|
+
Searchkick.queue_items(records.map { |r| {index: record_data(r).merge(data: search_data(r))} })
|
61
|
+
end
|
62
|
+
alias_method :import, :bulk_index
|
63
|
+
|
64
|
+
def record_data(r)
|
65
|
+
data = {
|
66
|
+
_index: name,
|
67
|
+
_id: search_id(r),
|
68
|
+
_type: document_type(r)
|
69
|
+
}
|
70
|
+
data[:_routing] = r.search_routing if r.respond_to?(:search_routing)
|
71
|
+
data
|
72
|
+
end
|
73
|
+
|
74
|
+
def retrieve(record)
|
75
|
+
client.get(
|
76
|
+
index: name,
|
77
|
+
type: document_type(record),
|
78
|
+
id: search_id(record)
|
79
|
+
)["_source"]
|
80
|
+
end
|
81
|
+
|
82
|
+
def reindex_record(record)
|
83
|
+
if record.destroyed? || !record.should_index?
|
84
|
+
begin
|
85
|
+
remove(record)
|
86
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
87
|
+
# do nothing
|
88
|
+
end
|
89
|
+
else
|
90
|
+
store(record)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def reindex_record_async(record)
|
95
|
+
if Searchkick.callbacks_value.nil?
|
96
|
+
if defined?(Searchkick::ReindexV2Job)
|
97
|
+
Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
|
98
|
+
else
|
99
|
+
Delayed::Job.enqueue Searchkick::ReindexJob.new(record.class.name, record.id.to_s)
|
100
|
+
end
|
101
|
+
else
|
102
|
+
reindex_record(record)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def similar_record(record, options = {})
|
107
|
+
like_text = retrieve(record).to_hash
|
108
|
+
.keep_if { |k, _| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
|
109
|
+
.values.compact.join(" ")
|
110
|
+
|
111
|
+
# TODO deep merge method
|
112
|
+
options[:where] ||= {}
|
113
|
+
options[:where][:_id] ||= {}
|
114
|
+
options[:where][:_id][:not] = record.id.to_s
|
115
|
+
options[:per_page] ||= 10
|
116
|
+
options[:similar] = true
|
117
|
+
|
118
|
+
# TODO use index class instead of record class
|
119
|
+
search_model(record.class, like_text, options)
|
120
|
+
end
|
121
|
+
|
122
|
+
# search
|
123
|
+
|
124
|
+
def search_model(searchkick_klass, term = nil, options = {}, &block)
|
125
|
+
query = Searchkick::Query.new(searchkick_klass, term, options)
|
126
|
+
block.call(query.body) if block
|
127
|
+
if options[:execute] == false
|
128
|
+
query
|
129
|
+
else
|
130
|
+
query.execute
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# reindex
|
135
|
+
|
136
|
+
def create_index(options = {})
|
137
|
+
index_options = options[:index_options] || self.index_options
|
138
|
+
index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
|
139
|
+
index.create(index_options)
|
140
|
+
index
|
141
|
+
end
|
142
|
+
|
143
|
+
# remove old indices that start w/ index_name
|
144
|
+
def clean_indices
|
145
|
+
all_indices =
|
146
|
+
begin
|
147
|
+
client.indices.get_aliases
|
148
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
149
|
+
{}
|
150
|
+
end
|
151
|
+
indices = all_indices.select { |k, v| (v.empty? || v["aliases"].empty?) && k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
|
152
|
+
indices.each do |index|
|
153
|
+
Searchkick::Index.new(index).delete
|
154
|
+
end
|
155
|
+
indices
|
156
|
+
end
|
157
|
+
|
158
|
+
# https://gist.github.com/jarosan/3124884
|
159
|
+
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
160
|
+
def reindex_scope(scope, options = {})
|
161
|
+
skip_import = options[:import] == false
|
162
|
+
|
163
|
+
clean_indices
|
164
|
+
|
165
|
+
index = create_index(index_options: scope.searchkick_index_options)
|
166
|
+
|
167
|
+
# check if alias exists
|
168
|
+
if alias_exists?
|
169
|
+
# import before swap
|
170
|
+
index.import_scope(scope) unless skip_import
|
171
|
+
|
172
|
+
# get existing indices to remove
|
173
|
+
swap(index.name)
|
174
|
+
clean_indices
|
175
|
+
else
|
176
|
+
delete if exists?
|
177
|
+
swap(index.name)
|
178
|
+
|
179
|
+
# import after swap
|
180
|
+
index.import_scope(scope) unless skip_import
|
181
|
+
end
|
182
|
+
|
183
|
+
index.refresh
|
184
|
+
|
185
|
+
true
|
186
|
+
end
|
187
|
+
|
188
|
+
def import_scope(scope)
|
189
|
+
batch_size = @options[:batch_size] || 1000
|
190
|
+
|
191
|
+
# use scope for import
|
192
|
+
scope = scope.search_import if scope.respond_to?(:search_import)
|
193
|
+
if scope.respond_to?(:find_in_batches)
|
194
|
+
scope.find_in_batches batch_size: batch_size do |batch|
|
195
|
+
import batch.select(&:should_index?)
|
196
|
+
end
|
197
|
+
else
|
198
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
199
|
+
# use cursor for Mongoid
|
200
|
+
items = []
|
201
|
+
scope.all.each do |item|
|
202
|
+
items << item if item.should_index?
|
203
|
+
if items.length == batch_size
|
204
|
+
import items
|
205
|
+
items = []
|
206
|
+
end
|
207
|
+
end
|
208
|
+
import items
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def index_options
|
213
|
+
options = @options
|
214
|
+
language = options[:language]
|
215
|
+
language = language.call if language.respond_to?(:call)
|
216
|
+
|
217
|
+
if options[:mappings] && !options[:merge_mappings]
|
218
|
+
settings = options[:settings] || {}
|
219
|
+
mappings = options[:mappings]
|
220
|
+
else
|
221
|
+
below22 = Searchkick.server_below?("2.2.0")
|
222
|
+
below50 = Searchkick.server_below?("5.0.0-alpha1")
|
223
|
+
default_type = below50 ? "string" : "text"
|
224
|
+
default_analyzer = below50 ? :default_index : :default
|
225
|
+
keyword_mapping =
|
226
|
+
if below50
|
227
|
+
{
|
228
|
+
type: default_type,
|
229
|
+
index: "not_analyzed"
|
230
|
+
}
|
231
|
+
else
|
232
|
+
{
|
233
|
+
type: "keyword"
|
234
|
+
}
|
235
|
+
end
|
236
|
+
|
237
|
+
keyword_mapping[:ignore_above] = 256 unless below22
|
238
|
+
|
239
|
+
settings = {
|
240
|
+
analysis: {
|
241
|
+
analyzer: {
|
242
|
+
searchkick_keyword: {
|
243
|
+
type: "custom",
|
244
|
+
tokenizer: "keyword",
|
245
|
+
filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
|
246
|
+
},
|
247
|
+
default_analyzer => {
|
248
|
+
type: "custom",
|
249
|
+
# character filters -> tokenizer -> token filters
|
250
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/analysis-intro.html
|
251
|
+
char_filter: ["ampersand"],
|
252
|
+
tokenizer: "standard",
|
253
|
+
# synonym should come last, after stemming and shingle
|
254
|
+
# shingle must come before searchkick_stemmer
|
255
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
256
|
+
},
|
257
|
+
searchkick_search: {
|
258
|
+
type: "custom",
|
259
|
+
char_filter: ["ampersand"],
|
260
|
+
tokenizer: "standard",
|
261
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
262
|
+
},
|
263
|
+
searchkick_search2: {
|
264
|
+
type: "custom",
|
265
|
+
char_filter: ["ampersand"],
|
266
|
+
tokenizer: "standard",
|
267
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
|
268
|
+
},
|
269
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
270
|
+
searchkick_autocomplete_index: {
|
271
|
+
type: "custom",
|
272
|
+
tokenizer: "searchkick_autocomplete_ngram",
|
273
|
+
filter: ["lowercase", "asciifolding"]
|
274
|
+
},
|
275
|
+
searchkick_autocomplete_search: {
|
276
|
+
type: "custom",
|
277
|
+
tokenizer: "keyword",
|
278
|
+
filter: ["lowercase", "asciifolding"]
|
279
|
+
},
|
280
|
+
searchkick_word_search: {
|
281
|
+
type: "custom",
|
282
|
+
tokenizer: "standard",
|
283
|
+
filter: ["lowercase", "asciifolding"]
|
284
|
+
},
|
285
|
+
searchkick_suggest_index: {
|
286
|
+
type: "custom",
|
287
|
+
tokenizer: "standard",
|
288
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
289
|
+
},
|
290
|
+
searchkick_text_start_index: {
|
291
|
+
type: "custom",
|
292
|
+
tokenizer: "keyword",
|
293
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
294
|
+
},
|
295
|
+
searchkick_text_middle_index: {
|
296
|
+
type: "custom",
|
297
|
+
tokenizer: "keyword",
|
298
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
299
|
+
},
|
300
|
+
searchkick_text_end_index: {
|
301
|
+
type: "custom",
|
302
|
+
tokenizer: "keyword",
|
303
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
304
|
+
},
|
305
|
+
searchkick_word_start_index: {
|
306
|
+
type: "custom",
|
307
|
+
tokenizer: "standard",
|
308
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
309
|
+
},
|
310
|
+
searchkick_word_middle_index: {
|
311
|
+
type: "custom",
|
312
|
+
tokenizer: "standard",
|
313
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
314
|
+
},
|
315
|
+
searchkick_word_end_index: {
|
316
|
+
type: "custom",
|
317
|
+
tokenizer: "standard",
|
318
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
319
|
+
}
|
320
|
+
},
|
321
|
+
filter: {
|
322
|
+
searchkick_index_shingle: {
|
323
|
+
type: "shingle",
|
324
|
+
token_separator: ""
|
325
|
+
},
|
326
|
+
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
327
|
+
searchkick_search_shingle: {
|
328
|
+
type: "shingle",
|
329
|
+
token_separator: "",
|
330
|
+
output_unigrams: false,
|
331
|
+
output_unigrams_if_no_shingles: true
|
332
|
+
},
|
333
|
+
searchkick_suggest_shingle: {
|
334
|
+
type: "shingle",
|
335
|
+
max_shingle_size: 5
|
336
|
+
},
|
337
|
+
searchkick_edge_ngram: {
|
338
|
+
type: "edgeNGram",
|
339
|
+
min_gram: 1,
|
340
|
+
max_gram: 50
|
341
|
+
},
|
342
|
+
searchkick_ngram: {
|
343
|
+
type: "nGram",
|
344
|
+
min_gram: 1,
|
345
|
+
max_gram: 50
|
346
|
+
},
|
347
|
+
searchkick_stemmer: {
|
348
|
+
# use stemmer if language is lowercase, snowball otherwise
|
349
|
+
# TODO deprecate language option in favor of stemmer
|
350
|
+
type: language == language.to_s.downcase ? "stemmer" : "snowball",
|
351
|
+
language: language || "English"
|
352
|
+
}
|
353
|
+
},
|
354
|
+
char_filter: {
|
355
|
+
# https://www.elastic.co/guide/en/elasticsearch/guide/current/custom-analyzers.html
|
356
|
+
# &_to_and
|
357
|
+
ampersand: {
|
358
|
+
type: "mapping",
|
359
|
+
mappings: ["&=> and "]
|
360
|
+
}
|
361
|
+
},
|
362
|
+
tokenizer: {
|
363
|
+
searchkick_autocomplete_ngram: {
|
364
|
+
type: "edgeNGram",
|
365
|
+
min_gram: 1,
|
366
|
+
max_gram: 50
|
367
|
+
}
|
368
|
+
}
|
369
|
+
}
|
370
|
+
}
|
371
|
+
|
372
|
+
if Searchkick.env == "test"
|
373
|
+
settings.merge!(number_of_shards: 1, number_of_replicas: 0)
|
374
|
+
end
|
375
|
+
|
376
|
+
if options[:similarity]
|
377
|
+
settings[:similarity] = {default: {type: options[:similarity]}}
|
378
|
+
end
|
379
|
+
|
380
|
+
settings.deep_merge!(options[:settings] || {})
|
381
|
+
|
382
|
+
# synonyms
|
383
|
+
synonyms = options[:synonyms] || []
|
384
|
+
|
385
|
+
synonyms = synonyms.call if synonyms.respond_to?(:call)
|
386
|
+
|
387
|
+
if synonyms.any?
|
388
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
389
|
+
type: "synonym",
|
390
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") }
|
391
|
+
}
|
392
|
+
# choosing a place for the synonym filter when stemming is not easy
|
393
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
394
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
395
|
+
|
396
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
397
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
398
|
+
# - Only apply the synonym expansion at index time
|
399
|
+
# - Don't have the synonym filter applied search
|
400
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
401
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_synonym")
|
402
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_synonym"
|
403
|
+
|
404
|
+
%w(word_start word_middle word_end).each do |type|
|
405
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_synonym")
|
406
|
+
end
|
407
|
+
end
|
408
|
+
|
409
|
+
if options[:wordnet]
|
410
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
411
|
+
type: "synonym",
|
412
|
+
format: "wordnet",
|
413
|
+
synonyms_path: Searchkick.wordnet_path
|
414
|
+
}
|
415
|
+
|
416
|
+
settings[:analysis][:analyzer][default_analyzer][:filter].insert(4, "searchkick_wordnet")
|
417
|
+
settings[:analysis][:analyzer][default_analyzer][:filter] << "searchkick_wordnet"
|
418
|
+
|
419
|
+
%w(word_start word_middle word_end).each do |type|
|
420
|
+
settings[:analysis][:analyzer]["searchkick_#{type}_index".to_sym][:filter].insert(2, "searchkick_wordnet")
|
421
|
+
end
|
422
|
+
end
|
423
|
+
|
424
|
+
if options[:special_characters] == false
|
425
|
+
settings[:analysis][:analyzer].each do |_, analyzer_settings|
|
426
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
mapping = {}
|
431
|
+
|
432
|
+
# conversions
|
433
|
+
if (conversions_field = options[:conversions])
|
434
|
+
mapping[conversions_field] = {
|
435
|
+
type: "nested",
|
436
|
+
properties: {
|
437
|
+
query: {type: default_type, analyzer: "searchkick_keyword"},
|
438
|
+
count: {type: "integer"}
|
439
|
+
}
|
440
|
+
}
|
441
|
+
end
|
442
|
+
|
443
|
+
mapping_options = Hash[
|
444
|
+
[:autocomplete, :suggest, :word, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight, :searchable, :only_analyzed]
|
445
|
+
.map { |type| [type, (options[type] || []).map(&:to_s)] }
|
446
|
+
]
|
447
|
+
|
448
|
+
word = options[:word] != false && (!options[:match] || options[:match] == :word)
|
449
|
+
|
450
|
+
mapping_options.values.flatten.uniq.each do |field|
|
451
|
+
fields = {}
|
452
|
+
|
453
|
+
if mapping_options[:only_analyzed].include?(field)
|
454
|
+
fields[field] = {type: default_type, index: "no"}
|
455
|
+
else
|
456
|
+
fields[field] = keyword_mapping
|
457
|
+
end
|
458
|
+
|
459
|
+
if !options[:searchable] || mapping_options[:searchable].include?(field)
|
460
|
+
if word
|
461
|
+
fields["analyzed"] = {type: default_type, index: "analyzed", analyzer: default_analyzer}
|
462
|
+
|
463
|
+
if mapping_options[:highlight].include?(field)
|
464
|
+
fields["analyzed"][:term_vector] = "with_positions_offsets"
|
465
|
+
end
|
466
|
+
end
|
467
|
+
|
468
|
+
mapping_options.except(:highlight, :searchable, :only_analyzed).each do |type, f|
|
469
|
+
if options[:match] == type || f.include?(field)
|
470
|
+
fields[type] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{type}_index"}
|
471
|
+
end
|
472
|
+
end
|
473
|
+
end
|
474
|
+
|
475
|
+
mapping[field] =
|
476
|
+
if below50
|
477
|
+
{
|
478
|
+
type: "multi_field",
|
479
|
+
fields: fields
|
480
|
+
}
|
481
|
+
elsif fields[field]
|
482
|
+
fields[field].merge(fields: fields.except(field))
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
487
|
+
mapping[field] = {
|
488
|
+
type: "geo_point"
|
489
|
+
}
|
490
|
+
end
|
491
|
+
|
492
|
+
(options[:unsearchable] || []).map(&:to_s).each do |field|
|
493
|
+
mapping[field] = {
|
494
|
+
type: default_type,
|
495
|
+
index: "no"
|
496
|
+
}
|
497
|
+
end
|
498
|
+
|
499
|
+
routing = {}
|
500
|
+
if options[:routing]
|
501
|
+
routing = {required: true}
|
502
|
+
unless options[:routing] == true
|
503
|
+
routing[:path] = options[:routing].to_s
|
504
|
+
end
|
505
|
+
end
|
506
|
+
|
507
|
+
dynamic_fields = {
|
508
|
+
# analyzed field must be the default field for include_in_all
|
509
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
510
|
+
# however, we can include the not_analyzed field in _all
|
511
|
+
# and the _all index analyzer will take care of it
|
512
|
+
"{name}" => keyword_mapping.merge(include_in_all: !options[:searchable])
|
513
|
+
}
|
514
|
+
|
515
|
+
dynamic_fields["{name}"][:ignore_above] = 256 unless below22
|
516
|
+
|
517
|
+
unless options[:searchable]
|
518
|
+
if options[:match] && options[:match] != :word
|
519
|
+
dynamic_fields[options[:match]] = {type: default_type, index: "analyzed", analyzer: "searchkick_#{options[:match]}_index"}
|
520
|
+
end
|
521
|
+
|
522
|
+
if word
|
523
|
+
dynamic_fields["analyzed"] = {type: default_type, index: "analyzed"}
|
524
|
+
end
|
525
|
+
end
|
526
|
+
|
527
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
528
|
+
multi_field =
|
529
|
+
if below50
|
530
|
+
{
|
531
|
+
type: "multi_field",
|
532
|
+
fields: dynamic_fields
|
533
|
+
}
|
534
|
+
else
|
535
|
+
dynamic_fields["{name}"].merge(fields: dynamic_fields.except("{name}"))
|
536
|
+
end
|
537
|
+
|
538
|
+
mappings = {
|
539
|
+
_default_: {
|
540
|
+
_all: {type: default_type, index: "analyzed", analyzer: default_analyzer},
|
541
|
+
properties: mapping,
|
542
|
+
_routing: routing,
|
543
|
+
# https://gist.github.com/kimchy/2898285
|
544
|
+
dynamic_templates: [
|
545
|
+
{
|
546
|
+
string_template: {
|
547
|
+
match: "*",
|
548
|
+
match_mapping_type: "string",
|
549
|
+
mapping: multi_field
|
550
|
+
}
|
551
|
+
}
|
552
|
+
]
|
553
|
+
}
|
554
|
+
}.deep_merge(options[:mappings] || {})
|
555
|
+
end
|
556
|
+
|
557
|
+
{
|
558
|
+
settings: settings,
|
559
|
+
mappings: mappings
|
560
|
+
}
|
561
|
+
end
|
562
|
+
|
563
|
+
# other
|
564
|
+
|
565
|
+
def tokens(text, options = {})
|
566
|
+
client.indices.analyze({text: text, index: name}.merge(options))["tokens"].map { |t| t["token"] }
|
567
|
+
end
|
568
|
+
|
569
|
+
def klass_document_type(klass)
|
570
|
+
if klass.respond_to?(:document_type)
|
571
|
+
klass.document_type
|
572
|
+
else
|
573
|
+
klass.model_name.to_s.underscore
|
574
|
+
end
|
575
|
+
end
|
576
|
+
|
577
|
+
protected
|
578
|
+
|
579
|
+
def client
|
580
|
+
Searchkick.client
|
581
|
+
end
|
582
|
+
|
583
|
+
def document_type(record)
|
584
|
+
if record.respond_to?(:search_document_type)
|
585
|
+
record.search_document_type
|
586
|
+
else
|
587
|
+
klass_document_type(record.class)
|
588
|
+
end
|
589
|
+
end
|
590
|
+
|
591
|
+
def search_id(record)
|
592
|
+
id = record.respond_to?(:search_document_id) ? record.search_document_id : record.id
|
593
|
+
id.is_a?(Numeric) ? id : id.to_s
|
594
|
+
end
|
595
|
+
|
596
|
+
def search_data(record)
|
597
|
+
source = record.search_data
|
598
|
+
options = record.class.searchkick_options
|
599
|
+
|
600
|
+
# stringify fields
|
601
|
+
# remove _id since search_id is used instead
|
602
|
+
source = source.inject({}) { |memo, (k, v)| memo[k.to_s] = v; memo }.except("_id")
|
603
|
+
|
604
|
+
# conversions
|
605
|
+
conversions_field = options[:conversions]
|
606
|
+
if conversions_field && source[conversions_field]
|
607
|
+
source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
|
608
|
+
end
|
609
|
+
|
610
|
+
# hack to prevent generator field doesn't exist error
|
611
|
+
(options[:suggest] || []).map(&:to_s).each do |field|
|
612
|
+
source[field] = nil unless source[field]
|
613
|
+
end
|
614
|
+
|
615
|
+
# locations
|
616
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
617
|
+
if source[field]
|
618
|
+
if !source[field].is_a?(Hash) && (source[field].first.is_a?(Array) || source[field].first.is_a?(Hash))
|
619
|
+
# multiple locations
|
620
|
+
source[field] = source[field].map { |a| location_value(a) }
|
621
|
+
else
|
622
|
+
source[field] = location_value(source[field])
|
623
|
+
end
|
624
|
+
end
|
625
|
+
end
|
626
|
+
|
627
|
+
cast_big_decimal(source)
|
628
|
+
|
629
|
+
source.as_json
|
630
|
+
end
|
631
|
+
|
632
|
+
def location_value(value)
|
633
|
+
if value.is_a?(Array)
|
634
|
+
value.map(&:to_f).reverse
|
635
|
+
elsif value.is_a?(Hash)
|
636
|
+
{lat: value[:lat].to_f, lon: value[:lon].to_f}
|
637
|
+
else
|
638
|
+
value
|
639
|
+
end
|
640
|
+
end
|
641
|
+
|
642
|
+
# change all BigDecimal values to floats due to
|
643
|
+
# https://github.com/rails/rails/issues/6033
|
644
|
+
# possible loss of precision :/
|
645
|
+
def cast_big_decimal(obj)
|
646
|
+
case obj
|
647
|
+
when BigDecimal
|
648
|
+
obj.to_f
|
649
|
+
when Hash
|
650
|
+
obj.each do |k, v|
|
651
|
+
obj[k] = cast_big_decimal(v)
|
652
|
+
end
|
653
|
+
when Enumerable
|
654
|
+
obj.map do |v|
|
655
|
+
cast_big_decimal(v)
|
656
|
+
end
|
657
|
+
else
|
658
|
+
obj
|
659
|
+
end
|
660
|
+
end
|
661
|
+
end
|
662
|
+
end
|