searchkick-sinneduy 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +20 -0
- data/.travis.yml +28 -0
- data/CHANGELOG.md +272 -0
- data/Gemfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +1109 -0
- data/Rakefile +8 -0
- data/ci/before_install.sh +14 -0
- data/gemfiles/activerecord31.gemfile +7 -0
- data/gemfiles/activerecord32.gemfile +7 -0
- data/gemfiles/activerecord40.gemfile +8 -0
- data/gemfiles/activerecord41.gemfile +8 -0
- data/gemfiles/mongoid2.gemfile +7 -0
- data/gemfiles/mongoid3.gemfile +6 -0
- data/gemfiles/mongoid4.gemfile +7 -0
- data/gemfiles/nobrainer.gemfile +6 -0
- data/lib/searchkick.rb +72 -0
- data/lib/searchkick/index.rb +550 -0
- data/lib/searchkick/logging.rb +136 -0
- data/lib/searchkick/model.rb +102 -0
- data/lib/searchkick/query.rb +567 -0
- data/lib/searchkick/reindex_job.rb +28 -0
- data/lib/searchkick/reindex_v2_job.rb +24 -0
- data/lib/searchkick/results.rb +158 -0
- data/lib/searchkick/tasks.rb +35 -0
- data/lib/searchkick/version.rb +3 -0
- data/searchkick.gemspec +28 -0
- data/test/autocomplete_test.rb +67 -0
- data/test/boost_test.rb +126 -0
- data/test/facets_test.rb +91 -0
- data/test/highlight_test.rb +58 -0
- data/test/index_test.rb +119 -0
- data/test/inheritance_test.rb +80 -0
- data/test/match_test.rb +163 -0
- data/test/model_test.rb +38 -0
- data/test/query_test.rb +14 -0
- data/test/reindex_job_test.rb +33 -0
- data/test/reindex_v2_job_test.rb +34 -0
- data/test/routing_test.rb +14 -0
- data/test/should_index_test.rb +34 -0
- data/test/similar_test.rb +20 -0
- data/test/sql_test.rb +327 -0
- data/test/suggest_test.rb +82 -0
- data/test/synonyms_test.rb +50 -0
- data/test/test_helper.rb +276 -0
- metadata +194 -0
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.3.2.deb
|
4
|
+
sudo dpkg -i elasticsearch-1.3.2.deb
|
5
|
+
sudo service elasticsearch restart
|
6
|
+
|
7
|
+
if [ -n "$NOBRAINER" ]; then
|
8
|
+
source /etc/lsb-release && echo "deb http://download.rethinkdb.com/apt $DISTRIB_CODENAME main" | sudo tee /etc/apt/sources.list.d/rethinkdb.list
|
9
|
+
wget -qO- http://download.rethinkdb.com/apt/pubkey.gpg | sudo apt-key add -
|
10
|
+
sudo apt-get update -q
|
11
|
+
sudo apt-get install rethinkdb
|
12
|
+
sudo cp /etc/rethinkdb/default.conf.sample /etc/rethinkdb/instances.d/instance1.conf
|
13
|
+
sudo service rethinkdb restart
|
14
|
+
fi
|
data/lib/searchkick.rb
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
require "active_model"
|
2
|
+
require "elasticsearch"
|
3
|
+
require "hashie"
|
4
|
+
require "searchkick/version"
|
5
|
+
require "searchkick/index"
|
6
|
+
require "searchkick/results"
|
7
|
+
require "searchkick/query"
|
8
|
+
require "searchkick/reindex_job"
|
9
|
+
require "searchkick/model"
|
10
|
+
require "searchkick/tasks"
|
11
|
+
require "searchkick/logging" if defined?(Rails)
|
12
|
+
|
13
|
+
# background jobs
|
14
|
+
begin
|
15
|
+
require "active_job"
|
16
|
+
rescue LoadError
|
17
|
+
# do nothing
|
18
|
+
end
|
19
|
+
require "searchkick/reindex_v2_job" if defined?(ActiveJob)
|
20
|
+
|
21
|
+
module Searchkick
|
22
|
+
class MissingIndexError < StandardError; end
|
23
|
+
class UnsupportedVersionError < StandardError; end
|
24
|
+
class InvalidQueryError < Elasticsearch::Transport::Transport::Errors::BadRequest; end
|
25
|
+
|
26
|
+
class << self
|
27
|
+
attr_accessor :search_method_name
|
28
|
+
attr_accessor :wordnet_path
|
29
|
+
attr_accessor :timeout
|
30
|
+
attr_accessor :models
|
31
|
+
end
|
32
|
+
self.search_method_name = :search
|
33
|
+
self.wordnet_path = "/var/lib/wn_s.pl"
|
34
|
+
self.timeout = 10
|
35
|
+
self.models = []
|
36
|
+
|
37
|
+
def self.client
|
38
|
+
@client ||=
|
39
|
+
Elasticsearch::Client.new(
|
40
|
+
url: ENV["ELASTICSEARCH_URL"],
|
41
|
+
transport_options: {request: {timeout: timeout}}
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
class << self
|
46
|
+
attr_writer :client
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.server_version
|
50
|
+
@server_version ||= client.info["version"]["number"]
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.enable_callbacks
|
54
|
+
Thread.current[:searchkick_callbacks_enabled] = true
|
55
|
+
end
|
56
|
+
|
57
|
+
def self.disable_callbacks
|
58
|
+
Thread.current[:searchkick_callbacks_enabled] = false
|
59
|
+
end
|
60
|
+
|
61
|
+
def self.callbacks?
|
62
|
+
Thread.current[:searchkick_callbacks_enabled].nil? || Thread.current[:searchkick_callbacks_enabled]
|
63
|
+
end
|
64
|
+
|
65
|
+
def self.env
|
66
|
+
@env ||= ENV["RAILS_ENV"] || ENV["RACK_ENV"] || "development"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
# TODO find better ActiveModel hook
|
71
|
+
ActiveModel::Callbacks.send(:include, Searchkick::Model)
|
72
|
+
ActiveRecord::Base.send(:extend, Searchkick::Model) if defined?(ActiveRecord)
|
@@ -0,0 +1,550 @@
|
|
1
|
+
module Searchkick
|
2
|
+
class Index
|
3
|
+
attr_reader :name, :options
|
4
|
+
|
5
|
+
def initialize(name, options = {})
|
6
|
+
@name = name
|
7
|
+
@options = options
|
8
|
+
end
|
9
|
+
|
10
|
+
def create(options = {})
|
11
|
+
client.indices.create index: name, body: options
|
12
|
+
end
|
13
|
+
|
14
|
+
def delete
|
15
|
+
client.indices.delete index: name
|
16
|
+
end
|
17
|
+
|
18
|
+
def exists?
|
19
|
+
client.indices.exists index: name
|
20
|
+
end
|
21
|
+
|
22
|
+
def refresh
|
23
|
+
client.indices.refresh index: name
|
24
|
+
end
|
25
|
+
|
26
|
+
def alias_exists?
|
27
|
+
client.indices.exists_alias name: name
|
28
|
+
end
|
29
|
+
|
30
|
+
def swap(new_name)
|
31
|
+
old_indices =
|
32
|
+
begin
|
33
|
+
client.indices.get_alias(name: name).keys
|
34
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
35
|
+
[]
|
36
|
+
end
|
37
|
+
actions = old_indices.map { |old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
|
38
|
+
client.indices.update_aliases body: {actions: actions}
|
39
|
+
end
|
40
|
+
|
41
|
+
# record based
|
42
|
+
|
43
|
+
def store(record)
|
44
|
+
client.index(
|
45
|
+
index: name,
|
46
|
+
type: document_type(record),
|
47
|
+
id: search_id(record),
|
48
|
+
body: search_data(record)
|
49
|
+
)
|
50
|
+
end
|
51
|
+
|
52
|
+
def remove(record)
|
53
|
+
id = search_id(record)
|
54
|
+
unless id.blank?
|
55
|
+
client.delete(
|
56
|
+
index: name,
|
57
|
+
type: document_type(record),
|
58
|
+
id: id
|
59
|
+
)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def import(records)
|
64
|
+
records.group_by { |r| document_type(r) }.each do |type, batch|
|
65
|
+
client.bulk(
|
66
|
+
index: name,
|
67
|
+
type: type,
|
68
|
+
body: batch.map { |r| {index: {_id: search_id(r), data: search_data(r)}} }
|
69
|
+
)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def retrieve(record)
|
74
|
+
client.get(
|
75
|
+
index: name,
|
76
|
+
type: document_type(record),
|
77
|
+
id: search_id(record)
|
78
|
+
)["_source"]
|
79
|
+
end
|
80
|
+
|
81
|
+
def reindex_record(record)
|
82
|
+
if record.destroyed? || !record.should_index?
|
83
|
+
begin
|
84
|
+
remove(record)
|
85
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
86
|
+
# do nothing
|
87
|
+
end
|
88
|
+
else
|
89
|
+
store(record)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def reindex_record_async(record)
|
94
|
+
if defined?(Searchkick::ReindexV2Job)
|
95
|
+
Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
|
96
|
+
else
|
97
|
+
Delayed::Job.enqueue Searchkick::ReindexJob.new(record.class.name, record.id.to_s)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def similar_record(record, options = {})
|
102
|
+
like_text = retrieve(record).to_hash
|
103
|
+
.keep_if { |k, v| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
|
104
|
+
.values.compact.join(" ")
|
105
|
+
|
106
|
+
# TODO deep merge method
|
107
|
+
options[:where] ||= {}
|
108
|
+
options[:where][:_id] ||= {}
|
109
|
+
options[:where][:_id][:not] = record.id.to_s
|
110
|
+
options[:limit] ||= 10
|
111
|
+
options[:similar] = true
|
112
|
+
|
113
|
+
# TODO use index class instead of record class
|
114
|
+
search_model(record.class, like_text, options)
|
115
|
+
end
|
116
|
+
|
117
|
+
# search
|
118
|
+
|
119
|
+
def search_model(searchkick_klass, term = nil, options = {}, &block)
|
120
|
+
query = Searchkick::Query.new(searchkick_klass, term, options)
|
121
|
+
if block
|
122
|
+
block.call(query.body)
|
123
|
+
end
|
124
|
+
if options[:execute] == false
|
125
|
+
query
|
126
|
+
else
|
127
|
+
query.execute
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
# reindex
|
132
|
+
|
133
|
+
def create_index
|
134
|
+
index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
|
135
|
+
index.create(index_options)
|
136
|
+
index
|
137
|
+
end
|
138
|
+
|
139
|
+
# remove old indices that start w/ index_name
|
140
|
+
def clean_indices
|
141
|
+
all_indices = client.indices.get_aliases
|
142
|
+
indices = all_indices.select { |k, v| (v.empty? || v["aliases"].empty?) && k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
|
143
|
+
indices.each do |index|
|
144
|
+
Searchkick::Index.new(index).delete
|
145
|
+
end
|
146
|
+
indices
|
147
|
+
end
|
148
|
+
|
149
|
+
# https://gist.github.com/jarosan/3124884
|
150
|
+
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
151
|
+
def reindex_scope(scope, options = {})
|
152
|
+
skip_import = options[:import] == false
|
153
|
+
|
154
|
+
clean_indices
|
155
|
+
|
156
|
+
index = create_index
|
157
|
+
|
158
|
+
# check if alias exists
|
159
|
+
if alias_exists?
|
160
|
+
# import before swap
|
161
|
+
index.import_scope(scope) unless skip_import
|
162
|
+
|
163
|
+
# get existing indices to remove
|
164
|
+
swap(index.name)
|
165
|
+
clean_indices
|
166
|
+
else
|
167
|
+
delete if exists?
|
168
|
+
swap(index.name)
|
169
|
+
|
170
|
+
# import after swap
|
171
|
+
index.import_scope(scope) unless skip_import
|
172
|
+
end
|
173
|
+
|
174
|
+
index.refresh
|
175
|
+
|
176
|
+
true
|
177
|
+
end
|
178
|
+
|
179
|
+
def import_scope(scope)
|
180
|
+
batch_size = @options[:batch_size] || 1000
|
181
|
+
|
182
|
+
# use scope for import
|
183
|
+
scope = scope.search_import if scope.respond_to?(:search_import)
|
184
|
+
if scope.respond_to?(:find_in_batches)
|
185
|
+
scope.find_in_batches batch_size: batch_size do |batch|
|
186
|
+
import batch.select(&:should_index?)
|
187
|
+
end
|
188
|
+
else
|
189
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
190
|
+
# use cursor for Mongoid
|
191
|
+
items = []
|
192
|
+
scope.all.each do |item|
|
193
|
+
items << item if item.should_index?
|
194
|
+
if items.length == batch_size
|
195
|
+
import items
|
196
|
+
items = []
|
197
|
+
end
|
198
|
+
end
|
199
|
+
import items
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def index_options
|
204
|
+
options = @options
|
205
|
+
|
206
|
+
if options[:mappings] && !options[:merge_mappings]
|
207
|
+
settings = options[:settings] || {}
|
208
|
+
mappings = options[:mappings]
|
209
|
+
else
|
210
|
+
settings = {
|
211
|
+
analysis: {
|
212
|
+
analyzer: {
|
213
|
+
searchkick_keyword: {
|
214
|
+
type: "custom",
|
215
|
+
tokenizer: "keyword",
|
216
|
+
filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
|
217
|
+
},
|
218
|
+
default_index: {
|
219
|
+
type: "custom",
|
220
|
+
tokenizer: "standard",
|
221
|
+
# synonym should come last, after stemming and shingle
|
222
|
+
# shingle must come before searchkick_stemmer
|
223
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
224
|
+
},
|
225
|
+
searchkick_search: {
|
226
|
+
type: "custom",
|
227
|
+
tokenizer: "standard",
|
228
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
229
|
+
},
|
230
|
+
searchkick_search2: {
|
231
|
+
type: "custom",
|
232
|
+
tokenizer: "standard",
|
233
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
|
234
|
+
},
|
235
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
236
|
+
searchkick_autocomplete_index: {
|
237
|
+
type: "custom",
|
238
|
+
tokenizer: "searchkick_autocomplete_ngram",
|
239
|
+
filter: ["lowercase", "asciifolding"]
|
240
|
+
},
|
241
|
+
searchkick_autocomplete_search: {
|
242
|
+
type: "custom",
|
243
|
+
tokenizer: "keyword",
|
244
|
+
filter: ["lowercase", "asciifolding"]
|
245
|
+
},
|
246
|
+
searchkick_word_search: {
|
247
|
+
type: "custom",
|
248
|
+
tokenizer: "standard",
|
249
|
+
filter: ["lowercase", "asciifolding"]
|
250
|
+
},
|
251
|
+
searchkick_suggest_index: {
|
252
|
+
type: "custom",
|
253
|
+
tokenizer: "standard",
|
254
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
255
|
+
},
|
256
|
+
searchkick_text_start_index: {
|
257
|
+
type: "custom",
|
258
|
+
tokenizer: "keyword",
|
259
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
260
|
+
},
|
261
|
+
searchkick_text_middle_index: {
|
262
|
+
type: "custom",
|
263
|
+
tokenizer: "keyword",
|
264
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
265
|
+
},
|
266
|
+
searchkick_text_end_index: {
|
267
|
+
type: "custom",
|
268
|
+
tokenizer: "keyword",
|
269
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
270
|
+
},
|
271
|
+
searchkick_word_start_index: {
|
272
|
+
type: "custom",
|
273
|
+
tokenizer: "standard",
|
274
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
275
|
+
},
|
276
|
+
searchkick_word_middle_index: {
|
277
|
+
type: "custom",
|
278
|
+
tokenizer: "standard",
|
279
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
280
|
+
},
|
281
|
+
searchkick_word_end_index: {
|
282
|
+
type: "custom",
|
283
|
+
tokenizer: "standard",
|
284
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
285
|
+
}
|
286
|
+
},
|
287
|
+
filter: {
|
288
|
+
searchkick_index_shingle: {
|
289
|
+
type: "shingle",
|
290
|
+
token_separator: ""
|
291
|
+
},
|
292
|
+
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
293
|
+
searchkick_search_shingle: {
|
294
|
+
type: "shingle",
|
295
|
+
token_separator: "",
|
296
|
+
output_unigrams: false,
|
297
|
+
output_unigrams_if_no_shingles: true
|
298
|
+
},
|
299
|
+
searchkick_suggest_shingle: {
|
300
|
+
type: "shingle",
|
301
|
+
max_shingle_size: 5
|
302
|
+
},
|
303
|
+
searchkick_edge_ngram: {
|
304
|
+
type: "edgeNGram",
|
305
|
+
min_gram: 1,
|
306
|
+
max_gram: 50
|
307
|
+
},
|
308
|
+
searchkick_ngram: {
|
309
|
+
type: "nGram",
|
310
|
+
min_gram: 1,
|
311
|
+
max_gram: 50
|
312
|
+
},
|
313
|
+
searchkick_stemmer: {
|
314
|
+
type: "snowball",
|
315
|
+
language: options[:language] || "English"
|
316
|
+
}
|
317
|
+
},
|
318
|
+
tokenizer: {
|
319
|
+
searchkick_autocomplete_ngram: {
|
320
|
+
type: "edgeNGram",
|
321
|
+
min_gram: 1,
|
322
|
+
max_gram: 50
|
323
|
+
}
|
324
|
+
}
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
if Searchkick.env == "test"
|
329
|
+
settings.merge!(number_of_shards: 1, number_of_replicas: 0)
|
330
|
+
end
|
331
|
+
|
332
|
+
settings.deep_merge!(options[:settings] || {})
|
333
|
+
|
334
|
+
# synonyms
|
335
|
+
synonyms = options[:synonyms] || []
|
336
|
+
if synonyms.any?
|
337
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
338
|
+
type: "synonym",
|
339
|
+
synonyms: synonyms.select { |s| s.size > 1 }.map { |s| s.join(",") }
|
340
|
+
}
|
341
|
+
# choosing a place for the synonym filter when stemming is not easy
|
342
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
343
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
344
|
+
|
345
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
346
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
347
|
+
# - Only apply the synonym expansion at index time
|
348
|
+
# - Don't have the synonym filter applied search
|
349
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
350
|
+
settings[:analysis][:analyzer][:default_index][:filter].insert(4, "searchkick_synonym")
|
351
|
+
settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_synonym"
|
352
|
+
end
|
353
|
+
|
354
|
+
if options[:wordnet]
|
355
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
356
|
+
type: "synonym",
|
357
|
+
format: "wordnet",
|
358
|
+
synonyms_path: Searchkick.wordnet_path
|
359
|
+
}
|
360
|
+
|
361
|
+
settings[:analysis][:analyzer][:default_index][:filter].insert(4, "searchkick_wordnet")
|
362
|
+
settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_wordnet"
|
363
|
+
end
|
364
|
+
|
365
|
+
if options[:special_characters] == false
|
366
|
+
settings[:analysis][:analyzer].each do |analyzer, analyzer_settings|
|
367
|
+
analyzer_settings[:filter].reject! { |f| f == "asciifolding" }
|
368
|
+
end
|
369
|
+
end
|
370
|
+
|
371
|
+
mapping = {}
|
372
|
+
|
373
|
+
# conversions
|
374
|
+
if options[:conversions]
|
375
|
+
mapping[:conversions] = {
|
376
|
+
type: "nested",
|
377
|
+
properties: {
|
378
|
+
query: {type: "string", analyzer: "searchkick_keyword"},
|
379
|
+
count: {type: "integer"}
|
380
|
+
}
|
381
|
+
}
|
382
|
+
end
|
383
|
+
|
384
|
+
mapping_options = Hash[
|
385
|
+
[:autocomplete, :suggest, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight]
|
386
|
+
.map { |type| [type, (options[type] || []).map(&:to_s)] }
|
387
|
+
]
|
388
|
+
|
389
|
+
mapping_options.values.flatten.uniq.each do |field|
|
390
|
+
field_mapping = {
|
391
|
+
type: "multi_field",
|
392
|
+
fields: {
|
393
|
+
field => {type: "string", index: "not_analyzed"},
|
394
|
+
"analyzed" => {type: "string", index: "analyzed"}
|
395
|
+
# term_vector: "with_positions_offsets" for fast / correct highlighting
|
396
|
+
# http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-highlighting.html#_fast_vector_highlighter
|
397
|
+
}
|
398
|
+
}
|
399
|
+
|
400
|
+
mapping_options.except(:highlight).each do |type, fields|
|
401
|
+
if fields.include?(field)
|
402
|
+
field_mapping[:fields][type] = {type: "string", index: "analyzed", analyzer: "searchkick_#{type}_index"}
|
403
|
+
end
|
404
|
+
end
|
405
|
+
|
406
|
+
if mapping_options[:highlight].include?(field)
|
407
|
+
field_mapping[:fields]["analyzed"][:term_vector] = "with_positions_offsets"
|
408
|
+
end
|
409
|
+
|
410
|
+
mapping[field] = field_mapping
|
411
|
+
end
|
412
|
+
|
413
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
414
|
+
mapping[field] = {
|
415
|
+
type: "geo_point"
|
416
|
+
}
|
417
|
+
end
|
418
|
+
|
419
|
+
(options[:unsearchable] || []).map(&:to_s).each do |field|
|
420
|
+
mapping[field] = {
|
421
|
+
type: "string",
|
422
|
+
index: "no"
|
423
|
+
}
|
424
|
+
end
|
425
|
+
|
426
|
+
routing = {}
|
427
|
+
if options[:routing]
|
428
|
+
routing = {required: true, path: options[:routing].to_s}
|
429
|
+
end
|
430
|
+
|
431
|
+
mappings = {
|
432
|
+
_default_: {
|
433
|
+
properties: mapping,
|
434
|
+
_routing: routing,
|
435
|
+
# https://gist.github.com/kimchy/2898285
|
436
|
+
dynamic_templates: [
|
437
|
+
{
|
438
|
+
string_template: {
|
439
|
+
match: "*",
|
440
|
+
match_mapping_type: "string",
|
441
|
+
mapping: {
|
442
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
443
|
+
type: "multi_field",
|
444
|
+
fields: {
|
445
|
+
# analyzed field must be the default field for include_in_all
|
446
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
447
|
+
# however, we can include the not_analyzed field in _all
|
448
|
+
# and the _all index analyzer will take care of it
|
449
|
+
"{name}" => {type: "string", index: "not_analyzed"},
|
450
|
+
"analyzed" => {type: "string", index: "analyzed"}
|
451
|
+
}
|
452
|
+
}
|
453
|
+
}
|
454
|
+
}
|
455
|
+
]
|
456
|
+
}
|
457
|
+
}.deep_merge(options[:mappings] || {})
|
458
|
+
end
|
459
|
+
|
460
|
+
{
|
461
|
+
settings: settings,
|
462
|
+
mappings: mappings
|
463
|
+
}
|
464
|
+
end
|
465
|
+
|
466
|
+
# other
|
467
|
+
|
468
|
+
def tokens(text, options = {})
|
469
|
+
client.indices.analyze({text: text, index: name}.merge(options))["tokens"].map { |t| t["token"] }
|
470
|
+
end
|
471
|
+
|
472
|
+
def klass_document_type(klass)
|
473
|
+
if klass.respond_to?(:document_type)
|
474
|
+
klass.document_type
|
475
|
+
else
|
476
|
+
klass.model_name.to_s.underscore
|
477
|
+
end
|
478
|
+
end
|
479
|
+
|
480
|
+
protected
|
481
|
+
|
482
|
+
def client
|
483
|
+
Searchkick.client
|
484
|
+
end
|
485
|
+
|
486
|
+
def document_type(record)
|
487
|
+
klass_document_type(record.class)
|
488
|
+
end
|
489
|
+
|
490
|
+
def search_id(record)
|
491
|
+
record.id.is_a?(Numeric) ? record.id : record.id.to_s
|
492
|
+
end
|
493
|
+
|
494
|
+
def search_data(record)
|
495
|
+
source = record.search_data
|
496
|
+
options = record.class.searchkick_options
|
497
|
+
|
498
|
+
# stringify fields
|
499
|
+
# remove _id since search_id is used instead
|
500
|
+
source = source.inject({}) { |memo, (k, v)| memo[k.to_s] = v; memo }.except("_id")
|
501
|
+
|
502
|
+
# conversions
|
503
|
+
conversions_field = options[:conversions]
|
504
|
+
if conversions_field && source[conversions_field]
|
505
|
+
source[conversions_field] = source[conversions_field].map { |k, v| {query: k, count: v} }
|
506
|
+
end
|
507
|
+
|
508
|
+
# hack to prevent generator field doesn't exist error
|
509
|
+
(options[:suggest] || []).map(&:to_s).each do |field|
|
510
|
+
source[field] = nil unless source[field]
|
511
|
+
end
|
512
|
+
|
513
|
+
# locations
|
514
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
515
|
+
if source[field]
|
516
|
+
if source[field].first.is_a?(Array) # array of arrays
|
517
|
+
source[field] = source[field].map { |a| a.map(&:to_f).reverse }
|
518
|
+
else
|
519
|
+
source[field] = source[field].map(&:to_f).reverse
|
520
|
+
end
|
521
|
+
end
|
522
|
+
end
|
523
|
+
|
524
|
+
cast_big_decimal(source)
|
525
|
+
|
526
|
+
source.as_json
|
527
|
+
end
|
528
|
+
|
529
|
+
# change all BigDecimal values to floats due to
|
530
|
+
# https://github.com/rails/rails/issues/6033
|
531
|
+
# possible loss of precision :/
|
532
|
+
def cast_big_decimal(obj)
|
533
|
+
case obj
|
534
|
+
when BigDecimal
|
535
|
+
obj.to_f
|
536
|
+
when Hash
|
537
|
+
obj.each do |k, v|
|
538
|
+
obj[k] = cast_big_decimal(v)
|
539
|
+
end
|
540
|
+
when Enumerable
|
541
|
+
obj.map do |v|
|
542
|
+
cast_big_decimal(v)
|
543
|
+
end
|
544
|
+
else
|
545
|
+
obj
|
546
|
+
end
|
547
|
+
end
|
548
|
+
|
549
|
+
end
|
550
|
+
end
|