searchkick 0.8.5 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +6 -1
- data/CHANGELOG.md +9 -0
- data/Gemfile +1 -2
- data/README.md +33 -22
- data/ci/before_install.sh +14 -0
- data/gemfiles/activerecord41.gemfile +8 -0
- data/gemfiles/nobrainer.gemfile +6 -0
- data/lib/searchkick.rb +2 -2
- data/lib/searchkick/index.rb +405 -6
- data/lib/searchkick/model.rb +57 -47
- data/lib/searchkick/query.rb +18 -10
- data/lib/searchkick/results.rb +8 -1
- data/lib/searchkick/tasks.rb +1 -1
- data/lib/searchkick/version.rb +1 -1
- data/test/boost_test.rb +13 -3
- data/test/facets_test.rb +1 -0
- data/test/sql_test.rb +1 -1
- data/test/suggest_test.rb +7 -1
- data/test/test_helper.rb +42 -1
- metadata +6 -5
- data/lib/searchkick/reindex.rb +0 -339
- data/lib/searchkick/similar.rb +0 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 11adf1d76060b33e1a52d9a02f078d70e6b23396
|
4
|
+
data.tar.gz: 913a8ac5be964f35969ff71b59a10e5911d16680
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3b99448bfc3469410428abd5af56ce31ba632d0ebd0b8a9e8990a17f553e776995915925d2e6f17e173dae7576429cfe51d93a865f182322d48e2a4f0b3984a2
|
7
|
+
data.tar.gz: 4322539cd2b8e07d5eb0db03fc6af6dff1cd9f9ede8ad986df9b6717a76b9f5f102c803710fdcb453392f29e1a35cd74c5f8964b60259e1c52acb2c364a45cfa
|
data/.travis.yml
CHANGED
@@ -5,7 +5,7 @@ services:
|
|
5
5
|
- elasticsearch
|
6
6
|
- mongodb
|
7
7
|
before_install:
|
8
|
-
-
|
8
|
+
- ./ci/before_install.sh
|
9
9
|
script: bundle exec rake test
|
10
10
|
before_script:
|
11
11
|
- psql -c 'create database searchkick_test;' -U postgres
|
@@ -15,9 +15,14 @@ notifications:
|
|
15
15
|
on_failure: change
|
16
16
|
gemfile:
|
17
17
|
- Gemfile
|
18
|
+
- gemfiles/activerecord41.gemfile
|
18
19
|
- gemfiles/activerecord40.gemfile
|
19
20
|
- gemfiles/activerecord32.gemfile
|
20
21
|
- gemfiles/activerecord31.gemfile
|
21
22
|
- gemfiles/mongoid2.gemfile
|
22
23
|
- gemfiles/mongoid3.gemfile
|
23
24
|
- gemfiles/mongoid4.gemfile
|
25
|
+
matrix:
|
26
|
+
include:
|
27
|
+
- gemfile: gemfiles/nobrainer.gemfile
|
28
|
+
env: NOBRAINER=true
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.8.6
|
2
|
+
|
3
|
+
- Added support for NoBrainer
|
4
|
+
- Added `stem_conversions: false` option
|
5
|
+
- Added support for multiple `boost_where` values on the same field
|
6
|
+
- Added support for array of values for `boost_where`
|
7
|
+
- Fixed suggestions with partial match boost
|
8
|
+
- Fixed redefining existing instance methods in models
|
9
|
+
|
1
10
|
## 0.8.5
|
2
11
|
|
3
12
|
- Added support for Elasticsearch 1.4
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -134,8 +134,9 @@ boost_by: {orders_count: {factor: 10}} # default factor is 1
|
|
134
134
|
Boost matching documents
|
135
135
|
|
136
136
|
```ruby
|
137
|
-
boost_where: {user_id: 1}
|
138
|
-
boost_where: {user_id: {value: 1, factor: 100}}
|
137
|
+
boost_where: {user_id: 1}
|
138
|
+
boost_where: {user_id: {value: 1, factor: 100}} # default factor is 1000
|
139
|
+
boost_where: {user_id: [{value: 1, factor: 100}, {value: 2, factor: 200}]}
|
139
140
|
```
|
140
141
|
|
141
142
|
[Conversions](#keep-getting-better) are also a great way to boost.
|
@@ -355,6 +356,22 @@ There are three strategies for keeping the index synced with your database.
|
|
355
356
|
end
|
356
357
|
```
|
357
358
|
|
359
|
+
#### Associations
|
360
|
+
|
361
|
+
Data is **not** automatically synced when an association is updated. If this is desired, add a callback to reindex:
|
362
|
+
|
363
|
+
```ruby
|
364
|
+
class Image < ActiveRecord::Base
|
365
|
+
belongs_to :product
|
366
|
+
|
367
|
+
after_commit :reindex_product
|
368
|
+
|
369
|
+
def reindex_product
|
370
|
+
product.reindex # or reindex_async
|
371
|
+
end
|
372
|
+
end
|
373
|
+
```
|
374
|
+
|
358
375
|
### Keep Getting Better
|
359
376
|
|
360
377
|
Searchkick uses conversion data to learn what users are looking for. If a user searches for “ice cream” and adds Ben & Jerry’s Chunky Monkey to the cart (our conversion metric at Instacart), that item gets a little more weight for similar searches.
|
@@ -685,7 +702,7 @@ Product.searchkick_index.tokens("dieg", analyzer: "searchkick_word_search")
|
|
685
702
|
# ["dieg"] - match!!
|
686
703
|
```
|
687
704
|
|
688
|
-
See the [complete list of analyzers](lib/searchkick/
|
705
|
+
See the [complete list of analyzers](lib/searchkick/index.rb#L209).
|
689
706
|
|
690
707
|
## Deployment
|
691
708
|
|
@@ -828,6 +845,8 @@ Reindex one record
|
|
828
845
|
```ruby
|
829
846
|
product = Product.find 10
|
830
847
|
product.reindex
|
848
|
+
# or to reindex in the background
|
849
|
+
product.reindex_async
|
831
850
|
```
|
832
851
|
|
833
852
|
Remove old indices
|
@@ -902,28 +921,17 @@ class Product < ActiveRecord::Base
|
|
902
921
|
end
|
903
922
|
```
|
904
923
|
|
905
|
-
|
924
|
+
Create index without importing
|
906
925
|
|
907
926
|
```ruby
|
908
|
-
|
909
|
-
searchkick unsearchable: [:color]
|
910
|
-
end
|
927
|
+
Product.reindex(import: false)
|
911
928
|
```
|
912
929
|
|
913
|
-
|
930
|
+
Make fields unsearchable but include in the source
|
914
931
|
|
915
932
|
```ruby
|
916
933
|
class Product < ActiveRecord::Base
|
917
|
-
searchkick
|
918
|
-
|
919
|
-
def reindex_async
|
920
|
-
# custom code to reindex
|
921
|
-
end
|
922
|
-
|
923
|
-
after_commit :reindex_async
|
924
|
-
# or for Mongoid
|
925
|
-
# after_save :reindex_async
|
926
|
-
# after_destroy :reindex_async
|
934
|
+
searchkick unsearchable: [:color]
|
927
935
|
end
|
928
936
|
```
|
929
937
|
|
@@ -1011,12 +1019,12 @@ Thanks to Karel Minarik for [Elasticsearch Ruby](https://github.com/elasticsearc
|
|
1011
1019
|
|
1012
1020
|
## Roadmap
|
1013
1021
|
|
1022
|
+
- Semantic search features
|
1014
1023
|
- Search multiple fields for different terms
|
1015
1024
|
- Search across models
|
1016
1025
|
- Search nested objects
|
1017
1026
|
- Add section on testing
|
1018
1027
|
- Much finer customization
|
1019
|
-
- More transparency into generated queries (for advanced use)
|
1020
1028
|
|
1021
1029
|
## Contributing
|
1022
1030
|
|
@@ -1029,6 +1037,9 @@ Everyone is encouraged to help improve this project. Here are a few ways you can
|
|
1029
1037
|
|
1030
1038
|
To get started with development and testing:
|
1031
1039
|
|
1032
|
-
|
1033
|
-
|
1034
|
-
|
1040
|
+
```sh
|
1041
|
+
git clone https://github.com/ankane/searchkick.git
|
1042
|
+
cd searchkick
|
1043
|
+
bundle install
|
1044
|
+
rake test
|
1045
|
+
```
|
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
|
3
|
+
wget https://download.elasticsearch.org/elasticsearch/elasticsearch/elasticsearch-1.3.2.deb
|
4
|
+
sudo dpkg -i elasticsearch-1.3.2.deb
|
5
|
+
sudo service elasticsearch restart
|
6
|
+
|
7
|
+
if [ -n "$NOBRAINER" ]; then
|
8
|
+
source /etc/lsb-release && echo "deb http://download.rethinkdb.com/apt $DISTRIB_CODENAME main" | sudo tee /etc/apt/sources.list.d/rethinkdb.list
|
9
|
+
wget -qO- http://download.rethinkdb.com/apt/pubkey.gpg | sudo apt-key add -
|
10
|
+
sudo apt-get update -q
|
11
|
+
sudo apt-get install rethinkdb
|
12
|
+
sudo cp /etc/rethinkdb/default.conf.sample /etc/rethinkdb/instances.d/instance1.conf
|
13
|
+
sudo service rethinkdb restart
|
14
|
+
fi
|
data/lib/searchkick.rb
CHANGED
@@ -3,10 +3,8 @@ require "elasticsearch"
|
|
3
3
|
require "hashie"
|
4
4
|
require "searchkick/version"
|
5
5
|
require "searchkick/index"
|
6
|
-
require "searchkick/reindex"
|
7
6
|
require "searchkick/results"
|
8
7
|
require "searchkick/query"
|
9
|
-
require "searchkick/similar"
|
10
8
|
require "searchkick/reindex_job"
|
11
9
|
require "searchkick/model"
|
12
10
|
require "searchkick/tasks"
|
@@ -30,11 +28,13 @@ module Searchkick
|
|
30
28
|
attr_accessor :search_method_name
|
31
29
|
attr_accessor :wordnet_path
|
32
30
|
attr_accessor :timeout
|
31
|
+
attr_accessor :models
|
33
32
|
end
|
34
33
|
self.callbacks = true
|
35
34
|
self.search_method_name = :search
|
36
35
|
self.wordnet_path = "/var/lib/wn_s.pl"
|
37
36
|
self.timeout = 10
|
37
|
+
self.models = []
|
38
38
|
|
39
39
|
def self.client
|
40
40
|
@client ||=
|
data/lib/searchkick/index.rb
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
module Searchkick
|
2
2
|
class Index
|
3
|
-
attr_reader :name
|
3
|
+
attr_reader :name, :options
|
4
4
|
|
5
|
-
def initialize(name)
|
5
|
+
def initialize(name, options = {})
|
6
6
|
@name = name
|
7
|
+
@options = options
|
7
8
|
end
|
8
9
|
|
9
10
|
def create(options = {})
|
@@ -22,6 +23,23 @@ module Searchkick
|
|
22
23
|
client.indices.refresh index: name
|
23
24
|
end
|
24
25
|
|
26
|
+
def alias_exists?
|
27
|
+
client.indices.exists_alias name: name
|
28
|
+
end
|
29
|
+
|
30
|
+
def swap(new_name)
|
31
|
+
old_indices =
|
32
|
+
begin
|
33
|
+
client.indices.get_alias(name: name).keys
|
34
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
35
|
+
[]
|
36
|
+
end
|
37
|
+
actions = old_indices.map{|old_name| {remove: {index: old_name, alias: name}} } + [{add: {index: new_name, alias: name}}]
|
38
|
+
client.indices.update_aliases body: {actions: actions}
|
39
|
+
end
|
40
|
+
|
41
|
+
# record based
|
42
|
+
|
25
43
|
def store(record)
|
26
44
|
client.index(
|
27
45
|
index: name,
|
@@ -57,18 +75,399 @@ module Searchkick
|
|
57
75
|
)["_source"]
|
58
76
|
end
|
59
77
|
|
60
|
-
def
|
61
|
-
if
|
62
|
-
|
78
|
+
def reindex_record(record)
|
79
|
+
if record.destroyed? or !record.should_index?
|
80
|
+
begin
|
81
|
+
remove(record)
|
82
|
+
rescue Elasticsearch::Transport::Transport::Errors::NotFound
|
83
|
+
# do nothing
|
84
|
+
end
|
63
85
|
else
|
64
|
-
|
86
|
+
store(record)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def reindex_record_async(record)
|
91
|
+
if defined?(Searchkick::ReindexV2Job)
|
92
|
+
Searchkick::ReindexV2Job.perform_later(record.class.name, record.id.to_s)
|
93
|
+
else
|
94
|
+
Delayed::Job.enqueue Searchkick::ReindexJob.new(record.class.name, record.id.to_s)
|
65
95
|
end
|
66
96
|
end
|
67
97
|
|
98
|
+
def similar_record(record, options = {})
|
99
|
+
like_text = retrieve(record).to_hash
|
100
|
+
.keep_if{|k,v| !options[:fields] || options[:fields].map(&:to_s).include?(k) }
|
101
|
+
.values.compact.join(" ")
|
102
|
+
|
103
|
+
# TODO deep merge method
|
104
|
+
options[:where] ||= {}
|
105
|
+
options[:where][:_id] ||= {}
|
106
|
+
options[:where][:_id][:not] = record.id.to_s
|
107
|
+
options[:limit] ||= 10
|
108
|
+
options[:similar] = true
|
109
|
+
|
110
|
+
# TODO use index class instead of record class
|
111
|
+
search_model(record.class, like_text, options)
|
112
|
+
end
|
113
|
+
|
114
|
+
# search
|
115
|
+
|
116
|
+
def search_model(searchkick_klass, term = nil, options = {}, &block)
|
117
|
+
query = Searchkick::Query.new(searchkick_klass, term, options)
|
118
|
+
if block
|
119
|
+
block.call(query.body)
|
120
|
+
end
|
121
|
+
if options[:execute] == false
|
122
|
+
query
|
123
|
+
else
|
124
|
+
query.execute
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# reindex
|
129
|
+
|
130
|
+
def create_index
|
131
|
+
index = Searchkick::Index.new("#{name}_#{Time.now.strftime('%Y%m%d%H%M%S%L')}", @options)
|
132
|
+
index.create(index_options)
|
133
|
+
index
|
134
|
+
end
|
135
|
+
|
136
|
+
# remove old indices that start w/ index_name
|
137
|
+
def clean_indices
|
138
|
+
all_indices = client.indices.get_aliases
|
139
|
+
indices = all_indices.select{|k, v| (v.empty? || v["aliases"].empty?) && k =~ /\A#{Regexp.escape(name)}_\d{14,17}\z/ }.keys
|
140
|
+
indices.each do |index|
|
141
|
+
Searchkick::Index.new(index).delete
|
142
|
+
end
|
143
|
+
indices
|
144
|
+
end
|
145
|
+
|
146
|
+
# https://gist.github.com/jarosan/3124884
|
147
|
+
# http://www.elasticsearch.org/blog/changing-mapping-with-zero-downtime/
|
148
|
+
def reindex_scope(scope, options = {})
|
149
|
+
skip_import = options[:import] == false
|
150
|
+
|
151
|
+
clean_indices
|
152
|
+
|
153
|
+
index = create_index
|
154
|
+
|
155
|
+
# check if alias exists
|
156
|
+
if alias_exists?
|
157
|
+
# import before swap
|
158
|
+
index.import_scope(scope) unless skip_import
|
159
|
+
|
160
|
+
# get existing indices to remove
|
161
|
+
swap(index.name)
|
162
|
+
clean_indices
|
163
|
+
else
|
164
|
+
delete if exists?
|
165
|
+
swap(index.name)
|
166
|
+
|
167
|
+
# import after swap
|
168
|
+
index.import_scope(scope) unless skip_import
|
169
|
+
end
|
170
|
+
|
171
|
+
index.refresh
|
172
|
+
|
173
|
+
true
|
174
|
+
end
|
175
|
+
|
176
|
+
def import_scope(scope)
|
177
|
+
batch_size = @options[:batch_size] || 1000
|
178
|
+
|
179
|
+
# use scope for import
|
180
|
+
scope = scope.search_import if scope.respond_to?(:search_import)
|
181
|
+
if scope.respond_to?(:find_in_batches)
|
182
|
+
scope.find_in_batches batch_size: batch_size do |batch|
|
183
|
+
import batch.select{|item| item.should_index? }
|
184
|
+
end
|
185
|
+
else
|
186
|
+
# https://github.com/karmi/tire/blob/master/lib/tire/model/import.rb
|
187
|
+
# use cursor for Mongoid
|
188
|
+
items = []
|
189
|
+
scope.all.each do |item|
|
190
|
+
items << item if item.should_index?
|
191
|
+
if items.length == batch_size
|
192
|
+
index.import items
|
193
|
+
items = []
|
194
|
+
end
|
195
|
+
end
|
196
|
+
import items
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def index_options
|
201
|
+
options = @options
|
202
|
+
|
203
|
+
if options[:mappings] and !options[:merge_mappings]
|
204
|
+
settings = options[:settings] || {}
|
205
|
+
mappings = options[:mappings]
|
206
|
+
else
|
207
|
+
settings = {
|
208
|
+
analysis: {
|
209
|
+
analyzer: {
|
210
|
+
searchkick_keyword: {
|
211
|
+
type: "custom",
|
212
|
+
tokenizer: "keyword",
|
213
|
+
filter: ["lowercase"] + (options[:stem_conversions] == false ? [] : ["searchkick_stemmer"])
|
214
|
+
},
|
215
|
+
default_index: {
|
216
|
+
type: "custom",
|
217
|
+
tokenizer: "standard",
|
218
|
+
# synonym should come last, after stemming and shingle
|
219
|
+
# shingle must come before searchkick_stemmer
|
220
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_index_shingle", "searchkick_stemmer"]
|
221
|
+
},
|
222
|
+
searchkick_search: {
|
223
|
+
type: "custom",
|
224
|
+
tokenizer: "standard",
|
225
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_search_shingle", "searchkick_stemmer"]
|
226
|
+
},
|
227
|
+
searchkick_search2: {
|
228
|
+
type: "custom",
|
229
|
+
tokenizer: "standard",
|
230
|
+
filter: ["standard", "lowercase", "asciifolding", "searchkick_stemmer"]
|
231
|
+
},
|
232
|
+
# https://github.com/leschenko/elasticsearch_autocomplete/blob/master/lib/elasticsearch_autocomplete/analyzers.rb
|
233
|
+
searchkick_autocomplete_index: {
|
234
|
+
type: "custom",
|
235
|
+
tokenizer: "searchkick_autocomplete_ngram",
|
236
|
+
filter: ["lowercase", "asciifolding"]
|
237
|
+
},
|
238
|
+
searchkick_autocomplete_search: {
|
239
|
+
type: "custom",
|
240
|
+
tokenizer: "keyword",
|
241
|
+
filter: ["lowercase", "asciifolding"]
|
242
|
+
},
|
243
|
+
searchkick_word_search: {
|
244
|
+
type: "custom",
|
245
|
+
tokenizer: "standard",
|
246
|
+
filter: ["lowercase", "asciifolding"]
|
247
|
+
},
|
248
|
+
searchkick_suggest_index: {
|
249
|
+
type: "custom",
|
250
|
+
tokenizer: "standard",
|
251
|
+
filter: ["lowercase", "asciifolding", "searchkick_suggest_shingle"]
|
252
|
+
},
|
253
|
+
searchkick_text_start_index: {
|
254
|
+
type: "custom",
|
255
|
+
tokenizer: "keyword",
|
256
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
257
|
+
},
|
258
|
+
searchkick_text_middle_index: {
|
259
|
+
type: "custom",
|
260
|
+
tokenizer: "keyword",
|
261
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
262
|
+
},
|
263
|
+
searchkick_text_end_index: {
|
264
|
+
type: "custom",
|
265
|
+
tokenizer: "keyword",
|
266
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
267
|
+
},
|
268
|
+
searchkick_word_start_index: {
|
269
|
+
type: "custom",
|
270
|
+
tokenizer: "standard",
|
271
|
+
filter: ["lowercase", "asciifolding", "searchkick_edge_ngram"]
|
272
|
+
},
|
273
|
+
searchkick_word_middle_index: {
|
274
|
+
type: "custom",
|
275
|
+
tokenizer: "standard",
|
276
|
+
filter: ["lowercase", "asciifolding", "searchkick_ngram"]
|
277
|
+
},
|
278
|
+
searchkick_word_end_index: {
|
279
|
+
type: "custom",
|
280
|
+
tokenizer: "standard",
|
281
|
+
filter: ["lowercase", "asciifolding", "reverse", "searchkick_edge_ngram", "reverse"]
|
282
|
+
}
|
283
|
+
},
|
284
|
+
filter: {
|
285
|
+
searchkick_index_shingle: {
|
286
|
+
type: "shingle",
|
287
|
+
token_separator: ""
|
288
|
+
},
|
289
|
+
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
290
|
+
searchkick_search_shingle: {
|
291
|
+
type: "shingle",
|
292
|
+
token_separator: "",
|
293
|
+
output_unigrams: false,
|
294
|
+
output_unigrams_if_no_shingles: true
|
295
|
+
},
|
296
|
+
searchkick_suggest_shingle: {
|
297
|
+
type: "shingle",
|
298
|
+
max_shingle_size: 5
|
299
|
+
},
|
300
|
+
searchkick_edge_ngram: {
|
301
|
+
type: "edgeNGram",
|
302
|
+
min_gram: 1,
|
303
|
+
max_gram: 50
|
304
|
+
},
|
305
|
+
searchkick_ngram: {
|
306
|
+
type: "nGram",
|
307
|
+
min_gram: 1,
|
308
|
+
max_gram: 50
|
309
|
+
},
|
310
|
+
searchkick_stemmer: {
|
311
|
+
type: "snowball",
|
312
|
+
language: options[:language] || "English"
|
313
|
+
}
|
314
|
+
},
|
315
|
+
tokenizer: {
|
316
|
+
searchkick_autocomplete_ngram: {
|
317
|
+
type: "edgeNGram",
|
318
|
+
min_gram: 1,
|
319
|
+
max_gram: 50
|
320
|
+
}
|
321
|
+
}
|
322
|
+
}
|
323
|
+
}
|
324
|
+
|
325
|
+
if Searchkick.env == "test"
|
326
|
+
settings.merge!(number_of_shards: 1, number_of_replicas: 0)
|
327
|
+
end
|
328
|
+
|
329
|
+
settings.deep_merge!(options[:settings] || {})
|
330
|
+
|
331
|
+
# synonyms
|
332
|
+
synonyms = options[:synonyms] || []
|
333
|
+
if synonyms.any?
|
334
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
335
|
+
type: "synonym",
|
336
|
+
synonyms: synonyms.select{|s| s.size > 1 }.map{|s| s.join(",") }
|
337
|
+
}
|
338
|
+
# choosing a place for the synonym filter when stemming is not easy
|
339
|
+
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
|
340
|
+
# TODO use a snowball stemmer on synonyms when creating the token filter
|
341
|
+
|
342
|
+
# http://elasticsearch-users.115913.n3.nabble.com/synonym-multi-words-search-td4030811.html
|
343
|
+
# I find the following approach effective if you are doing multi-word synonyms (synonym phrases):
|
344
|
+
# - Only apply the synonym expansion at index time
|
345
|
+
# - Don't have the synonym filter applied search
|
346
|
+
# - Use directional synonyms where appropriate. You want to make sure that you're not injecting terms that are too general.
|
347
|
+
settings[:analysis][:analyzer][:default_index][:filter].insert(4, "searchkick_synonym")
|
348
|
+
settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_synonym"
|
349
|
+
end
|
350
|
+
|
351
|
+
if options[:wordnet]
|
352
|
+
settings[:analysis][:filter][:searchkick_wordnet] = {
|
353
|
+
type: "synonym",
|
354
|
+
format: "wordnet",
|
355
|
+
synonyms_path: Searchkick.wordnet_path
|
356
|
+
}
|
357
|
+
|
358
|
+
settings[:analysis][:analyzer][:default_index][:filter].insert(4, "searchkick_wordnet")
|
359
|
+
settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_wordnet"
|
360
|
+
end
|
361
|
+
|
362
|
+
if options[:special_characters] == false
|
363
|
+
settings[:analysis][:analyzer].each do |analyzer, analyzer_settings|
|
364
|
+
analyzer_settings[:filter].reject!{|f| f == "asciifolding" }
|
365
|
+
end
|
366
|
+
end
|
367
|
+
|
368
|
+
mapping = {}
|
369
|
+
|
370
|
+
# conversions
|
371
|
+
if options[:conversions]
|
372
|
+
mapping[:conversions] = {
|
373
|
+
type: "nested",
|
374
|
+
properties: {
|
375
|
+
query: {type: "string", analyzer: "searchkick_keyword"},
|
376
|
+
count: {type: "integer"}
|
377
|
+
}
|
378
|
+
}
|
379
|
+
end
|
380
|
+
|
381
|
+
mapping_options = Hash[
|
382
|
+
[:autocomplete, :suggest, :text_start, :text_middle, :text_end, :word_start, :word_middle, :word_end, :highlight]
|
383
|
+
.map{|type| [type, (options[type] || []).map(&:to_s)] }
|
384
|
+
]
|
385
|
+
|
386
|
+
mapping_options.values.flatten.uniq.each do |field|
|
387
|
+
field_mapping = {
|
388
|
+
type: "multi_field",
|
389
|
+
fields: {
|
390
|
+
field => {type: "string", index: "not_analyzed"},
|
391
|
+
"analyzed" => {type: "string", index: "analyzed"}
|
392
|
+
# term_vector: "with_positions_offsets" for fast / correct highlighting
|
393
|
+
# http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-request-highlighting.html#_fast_vector_highlighter
|
394
|
+
}
|
395
|
+
}
|
396
|
+
|
397
|
+
mapping_options.except(:highlight).each do |type, fields|
|
398
|
+
if fields.include?(field)
|
399
|
+
field_mapping[:fields][type] = {type: "string", index: "analyzed", analyzer: "searchkick_#{type}_index"}
|
400
|
+
end
|
401
|
+
end
|
402
|
+
|
403
|
+
if mapping_options[:highlight].include?(field)
|
404
|
+
field_mapping[:fields]["analyzed"][:term_vector] = "with_positions_offsets"
|
405
|
+
end
|
406
|
+
|
407
|
+
mapping[field] = field_mapping
|
408
|
+
end
|
409
|
+
|
410
|
+
(options[:locations] || []).map(&:to_s).each do |field|
|
411
|
+
mapping[field] = {
|
412
|
+
type: "geo_point"
|
413
|
+
}
|
414
|
+
end
|
415
|
+
|
416
|
+
(options[:unsearchable] || []).map(&:to_s).each do |field|
|
417
|
+
mapping[field] = {
|
418
|
+
type: "string",
|
419
|
+
index: "no"
|
420
|
+
}
|
421
|
+
end
|
422
|
+
|
423
|
+
mappings = {
|
424
|
+
_default_: {
|
425
|
+
properties: mapping,
|
426
|
+
# https://gist.github.com/kimchy/2898285
|
427
|
+
dynamic_templates: [
|
428
|
+
{
|
429
|
+
string_template: {
|
430
|
+
match: "*",
|
431
|
+
match_mapping_type: "string",
|
432
|
+
mapping: {
|
433
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
434
|
+
type: "multi_field",
|
435
|
+
fields: {
|
436
|
+
# analyzed field must be the default field for include_in_all
|
437
|
+
# http://www.elasticsearch.org/guide/reference/mapping/multi-field-type/
|
438
|
+
# however, we can include the not_analyzed field in _all
|
439
|
+
# and the _all index analyzer will take care of it
|
440
|
+
"{name}" => {type: "string", index: "not_analyzed"},
|
441
|
+
"analyzed" => {type: "string", index: "analyzed"}
|
442
|
+
}
|
443
|
+
}
|
444
|
+
}
|
445
|
+
}
|
446
|
+
]
|
447
|
+
}
|
448
|
+
}.deep_merge(options[:mappings] || {})
|
449
|
+
end
|
450
|
+
|
451
|
+
{
|
452
|
+
settings: settings,
|
453
|
+
mappings: mappings
|
454
|
+
}
|
455
|
+
end
|
456
|
+
|
457
|
+
# other
|
458
|
+
|
68
459
|
def tokens(text, options = {})
|
69
460
|
client.indices.analyze({text: text, index: name}.merge(options))["tokens"].map{|t| t["token"] }
|
70
461
|
end
|
71
462
|
|
463
|
+
def klass_document_type(klass)
|
464
|
+
if klass.respond_to?(:document_type)
|
465
|
+
klass.document_type
|
466
|
+
else
|
467
|
+
klass.model_name.to_s.underscore
|
468
|
+
end
|
469
|
+
end
|
470
|
+
|
72
471
|
protected
|
73
472
|
|
74
473
|
def client
|