searchkick 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +179 -62
- data/lib/searchkick.rb +3 -2
- data/lib/searchkick/model.rb +20 -63
- data/lib/searchkick/reindex.rb +88 -5
- data/lib/searchkick/search.rb +109 -99
- data/lib/searchkick/tasks.rb +1 -1
- data/lib/searchkick/version.rb +1 -1
- data/test/searchkick_test.rb +40 -36
- data/test/test_helper.rb +9 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: edbfa8b338d8ddc3b4bcef4755151c98c7afeb30
|
4
|
+
data.tar.gz: aa1e1fbda8e995dfeb22cb005b5fc755020980bf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 531313b35dae913d8d999fe3283c2a2535ada666c58c567cc385ac6574dbec0015bf65f2dbacdc1ac07b18eaca31d40aa776d968d1cb36db16f06dc34de633af
|
7
|
+
data.tar.gz: c3bd0aaee05a28b95d2884104144bc7058f26bd3b2c5eba75c68b02022c203bd3326fdfe99702139d264d386f92879467afb69f783a354662f188ead85788086
|
data/README.md
CHANGED
@@ -2,19 +2,39 @@
|
|
2
2
|
|
3
3
|
:rocket: Search made easy
|
4
4
|
|
5
|
-
Searchkick provides sensible search defaults
|
5
|
+
Searchkick provides sensible search defaults. It handles:
|
6
6
|
|
7
7
|
- stemming - `tomatoes` matches `tomato`
|
8
|
-
- special characters - `
|
8
|
+
- special characters - `jalapeno` matches `jalapeño`
|
9
9
|
- extra whitespace - `dishwasher` matches `dish washer`
|
10
10
|
- misspellings - `zuchini` matches `zucchini`
|
11
11
|
- custom synonyms - `qtip` matches `cotton swab`
|
12
12
|
|
13
|
-
|
13
|
+
Plus:
|
14
|
+
|
15
|
+
- query like SQL - no need to learn a new query language
|
16
|
+
- reindex without downtime
|
17
|
+
- continually improve results from conversions - **pretty awesome**
|
18
|
+
|
19
|
+
Powered by Elasticsearch
|
14
20
|
|
15
21
|
:tangerine: Battle-tested at [Instacart](https://www.instacart.com)
|
16
22
|
|
17
|
-
##
|
23
|
+
## Get Started
|
24
|
+
|
25
|
+
[Install Elasticsearch](http://www.elasticsearch.org/guide/reference/setup/installation/). For Homebrew, use:
|
26
|
+
|
27
|
+
```sh
|
28
|
+
brew install elasticsearch
|
29
|
+
```
|
30
|
+
|
31
|
+
Add this line to your application’s Gemfile:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
gem "searchkick"
|
35
|
+
```
|
36
|
+
|
37
|
+
Add searchkick to models you want to search.
|
18
38
|
|
19
39
|
```ruby
|
20
40
|
class Product < ActiveRecord::Base
|
@@ -22,25 +42,37 @@ class Product < ActiveRecord::Base
|
|
22
42
|
end
|
23
43
|
```
|
24
44
|
|
25
|
-
|
45
|
+
Add data to the search index.
|
26
46
|
|
27
47
|
```ruby
|
28
|
-
Product.
|
48
|
+
Product.reindex
|
29
49
|
```
|
30
50
|
|
31
|
-
|
51
|
+
And to query, use:
|
32
52
|
|
33
53
|
```ruby
|
34
|
-
Product.search "
|
54
|
+
products = Product.search "2% Milk"
|
55
|
+
products.each do |product|
|
56
|
+
puts product.name
|
57
|
+
puts product._score # added by searchkick - between 0 and 1
|
58
|
+
end
|
35
59
|
```
|
36
60
|
|
37
|
-
###
|
61
|
+
### Queries
|
62
|
+
|
63
|
+
Query like SQL
|
38
64
|
|
39
65
|
```ruby
|
40
66
|
Product.search "2% Milk", where: {in_stock: true}, limit: 10, offset: 50
|
41
67
|
```
|
42
68
|
|
43
|
-
|
69
|
+
Search specific fields
|
70
|
+
|
71
|
+
```ruby
|
72
|
+
fields: [:name, :brand]
|
73
|
+
```
|
74
|
+
|
75
|
+
Where
|
44
76
|
|
45
77
|
```ruby
|
46
78
|
where: {
|
@@ -55,28 +87,48 @@ where: {
|
|
55
87
|
}
|
56
88
|
```
|
57
89
|
|
58
|
-
|
90
|
+
Order
|
59
91
|
|
60
92
|
```ruby
|
61
93
|
order: {_score: :desc} # most relevant first - default
|
62
94
|
```
|
63
95
|
|
64
|
-
|
96
|
+
Limit / offset
|
65
97
|
|
66
98
|
```ruby
|
67
|
-
|
99
|
+
limit: 20, offset: 40
|
68
100
|
```
|
69
101
|
|
70
|
-
|
102
|
+
Boost by a field
|
71
103
|
|
72
104
|
```ruby
|
73
|
-
|
105
|
+
boost: "orders_count" # give popular documents a little boost
|
74
106
|
```
|
75
107
|
|
76
|
-
|
108
|
+
### Pagination
|
109
|
+
|
110
|
+
Plays nicely with kaminari and will_paginate.
|
77
111
|
|
78
112
|
```ruby
|
79
|
-
|
113
|
+
# controller
|
114
|
+
@products = Product.search "milk", page: params[:page], per_page: 20
|
115
|
+
|
116
|
+
# view
|
117
|
+
<%= paginate @products %>
|
118
|
+
```
|
119
|
+
|
120
|
+
### Partial Matches
|
121
|
+
|
122
|
+
By default, results must match all words in the query.
|
123
|
+
|
124
|
+
```ruby
|
125
|
+
Product.search "fresh honey" # fresh AND honey
|
126
|
+
```
|
127
|
+
|
128
|
+
To change this, use:
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
Product.search "fresh honey", partial: true # fresh OR honey
|
80
132
|
```
|
81
133
|
|
82
134
|
### Synonyms
|
@@ -89,11 +141,39 @@ end
|
|
89
141
|
|
90
142
|
You must call `Product.reindex` after changing synonyms.
|
91
143
|
|
92
|
-
###
|
144
|
+
### Indexing
|
145
|
+
|
146
|
+
Choose what data is indexed.
|
147
|
+
|
148
|
+
```ruby
|
149
|
+
class Product < ActiveRecord::Base
|
150
|
+
def _source
|
151
|
+
as_json only: [:name, :active], include: {brand: {only: [:city]}}
|
152
|
+
# or equivalently
|
153
|
+
{
|
154
|
+
name: name,
|
155
|
+
active: active,
|
156
|
+
brand: {
|
157
|
+
city: brand.city
|
158
|
+
}
|
159
|
+
}
|
160
|
+
end
|
161
|
+
end
|
162
|
+
```
|
163
|
+
|
164
|
+
Searchkick uses `find_in_batches` to import documents. To eager load associations, use the `searchkick_import` scope.
|
165
|
+
|
166
|
+
```ruby
|
167
|
+
class Product < ActiveRecord::Base
|
168
|
+
scope :searchkick_import, includes(:searches)
|
169
|
+
end
|
170
|
+
```
|
171
|
+
|
172
|
+
### Continually Improve Results
|
93
173
|
|
94
|
-
|
174
|
+
Use data on search conversions to improve results. This can make a **huge** difference.
|
95
175
|
|
96
|
-
First,
|
176
|
+
First, keep track of conversions. The database works well for low volume, but feel free to use Redis or another datastore.
|
97
177
|
|
98
178
|
```ruby
|
99
179
|
class Search < ActiveRecord::Base
|
@@ -102,101 +182,138 @@ class Search < ActiveRecord::Base
|
|
102
182
|
end
|
103
183
|
```
|
104
184
|
|
105
|
-
Add
|
185
|
+
Add conversions to the index.
|
106
186
|
|
107
187
|
```ruby
|
108
188
|
class Product < ActiveRecord::Base
|
109
189
|
has_many :searches
|
110
190
|
|
111
|
-
|
112
|
-
|
113
|
-
def to_indexed_json
|
191
|
+
def _source
|
114
192
|
{
|
115
193
|
name: name,
|
116
|
-
conversions: searches.group("query").count
|
117
|
-
_boost: Math.log(orders_count) # boost more popular products a bit
|
194
|
+
conversions: searches.group("query").count
|
118
195
|
}
|
119
196
|
end
|
120
197
|
end
|
121
198
|
```
|
122
199
|
|
123
|
-
|
200
|
+
Reindex and tell the search method to use conversions.
|
124
201
|
|
125
202
|
```ruby
|
126
203
|
Product.search "Fat Free Milk", conversions: true
|
127
204
|
```
|
128
205
|
|
129
|
-
###
|
206
|
+
### Facets
|
130
207
|
|
131
208
|
```ruby
|
132
|
-
Product.
|
209
|
+
search = Product.search "2% Milk", facets: [:store_id, :aisle_id]
|
210
|
+
p search.facets
|
211
|
+
```
|
212
|
+
|
213
|
+
Advanced
|
214
|
+
|
215
|
+
```ruby
|
216
|
+
Product.search "2% Milk", facets: {store_id: {where: {in_stock: true}}}
|
133
217
|
```
|
134
218
|
|
135
|
-
|
219
|
+
## Deployment
|
220
|
+
|
221
|
+
### Bonsai on Heroku
|
222
|
+
|
223
|
+
Install the add-on:
|
224
|
+
|
225
|
+
```sh
|
226
|
+
heroku addons:add bonsai
|
227
|
+
```
|
136
228
|
|
137
|
-
|
229
|
+
And create an initializer `config/initializers/bonsai.rb` with:
|
138
230
|
|
139
231
|
```ruby
|
140
|
-
|
141
|
-
scope :searchkick_import, where(active: true).includes(:searches)
|
142
|
-
end
|
232
|
+
ENV["ELASTICSEARCH_URL"] = ENV["BONSAI_URL"]
|
143
233
|
```
|
144
234
|
|
145
|
-
|
235
|
+
Then deploy and reindex:
|
146
236
|
|
147
237
|
```sh
|
148
|
-
rake searchkick:reindex CLASS=Product
|
238
|
+
heroku run rake searchkick:reindex CLASS=Product
|
149
239
|
```
|
150
240
|
|
151
|
-
|
241
|
+
## Reference
|
152
242
|
|
153
|
-
|
243
|
+
Reindex one record
|
244
|
+
|
245
|
+
```ruby
|
246
|
+
product = Product.find 10
|
247
|
+
product.reindex
|
248
|
+
```
|
154
249
|
|
155
|
-
|
250
|
+
Use a different index name
|
156
251
|
|
157
252
|
```ruby
|
158
|
-
|
159
|
-
|
253
|
+
class Product < ActiveRecord::Base
|
254
|
+
searchkick index_name: "products_v2"
|
255
|
+
end
|
160
256
|
```
|
161
257
|
|
162
|
-
|
258
|
+
Eagar load associations
|
163
259
|
|
164
260
|
```ruby
|
165
|
-
|
261
|
+
Product.search "milk", include: [:brand, :stores]
|
166
262
|
```
|
167
263
|
|
168
|
-
|
264
|
+
Do not load models
|
169
265
|
|
170
|
-
|
266
|
+
```ruby
|
267
|
+
Product.search "milk", load: false
|
268
|
+
```
|
269
|
+
|
270
|
+
## Migrating from Tire
|
271
|
+
|
272
|
+
1. Change `search` methods to `tire.search` and add index name in existing search calls
|
273
|
+
|
274
|
+
```ruby
|
275
|
+
Product.search "fruit"
|
276
|
+
```
|
277
|
+
|
278
|
+
should be replaced with
|
279
|
+
|
280
|
+
```ruby
|
281
|
+
Product.tire.search "fruit", index: "products"
|
282
|
+
```
|
283
|
+
|
284
|
+
2. Replace tire mapping w/ searchkick method
|
171
285
|
|
172
|
-
|
286
|
+
```ruby
|
287
|
+
searchkick index_name: "products_v2"
|
288
|
+
```
|
173
289
|
|
174
|
-
|
290
|
+
3. Deploy and reindex
|
175
291
|
|
176
|
-
|
292
|
+
```ruby
|
293
|
+
rake searchkick:reindex CLASS=Product # or Product.reindex in the console
|
294
|
+
```
|
295
|
+
|
296
|
+
4. Once it finishes, replace search calls w/ searchkick calls
|
297
|
+
|
298
|
+
## Elasticsearch Gotchas
|
299
|
+
|
300
|
+
### Inconsistent Scores
|
301
|
+
|
302
|
+
Due to the distributed nature of Elasticsearch, you can get incorrect results when the number of documents in the index is low. You can [read more about it here](http://www.elasticsearch.org/blog/understanding-query-then-fetch-vs-dfs-query-then-fetch/). To fix this, do:
|
177
303
|
|
178
304
|
```ruby
|
179
|
-
|
305
|
+
class Product < ActiveRecord::Base
|
306
|
+
searchkick settings: {number_of_shards: 1}
|
307
|
+
end
|
180
308
|
```
|
181
309
|
|
182
|
-
|
310
|
+
## Thanks
|
183
311
|
|
184
|
-
|
185
|
-
bundle
|
186
|
-
```
|
312
|
+
Thanks to [Karel Minarik](https://github.com/karmi) for Tire and [Jaroslav Kalistsuk](https://github.com/jarosan) for zero downtime reindexing.
|
187
313
|
|
188
314
|
## TODO
|
189
315
|
|
190
|
-
- Autocomplete
|
191
|
-
- Option to turn off fuzzy matching (should this be default?)
|
192
|
-
- Exact phrase matches (in order)
|
193
|
-
- Focus on results format (load: true?)
|
194
|
-
- Test helpers - everyone should test their own search
|
195
316
|
- Built-in synonyms from WordNet
|
196
|
-
- Dashboard w/ real-time analytics?
|
197
|
-
- [Suggest API](http://www.elasticsearch.org/guide/reference/api/search/suggest/) "Did you mean?"
|
198
|
-
- Allow for "exact search" with quotes
|
199
|
-
- Make updates to old and new index while reindexing [possibly with an another alias](http://www.kickstarter.com/backing-and-hacking)
|
200
317
|
|
201
318
|
## Contributing
|
202
319
|
|
data/lib/searchkick.rb
CHANGED
@@ -4,6 +4,7 @@ require "searchkick/search"
|
|
4
4
|
require "searchkick/model"
|
5
5
|
require "searchkick/tasks"
|
6
6
|
require "tire"
|
7
|
-
require "active_record" # TODO only require active_model
|
8
7
|
|
9
|
-
|
8
|
+
# TODO find better ActiveModel hook
|
9
|
+
ActiveModel::AttributeMethods::ClassMethods.send(:include, Searchkick::Model)
|
10
|
+
ActiveRecord::Base.send(:extend, Searchkick::Model) if defined?(ActiveRecord)
|
data/lib/searchkick/model.rb
CHANGED
@@ -2,79 +2,36 @@ module Searchkick
|
|
2
2
|
module Model
|
3
3
|
|
4
4
|
def searchkick(options = {})
|
5
|
-
|
6
|
-
|
7
|
-
analyzer: {
|
8
|
-
searchkick_keyword: {
|
9
|
-
type: "custom",
|
10
|
-
tokenizer: "keyword",
|
11
|
-
filter: ["lowercase", "snowball"]
|
12
|
-
},
|
13
|
-
default_index: {
|
14
|
-
type: "custom",
|
15
|
-
tokenizer: "standard",
|
16
|
-
# synonym should come last, after stemming and shingle
|
17
|
-
# shingle must come before snowball
|
18
|
-
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_index_shingle"]
|
19
|
-
},
|
20
|
-
searchkick_search: {
|
21
|
-
type: "custom",
|
22
|
-
tokenizer: "standard",
|
23
|
-
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_search_shingle"]
|
24
|
-
},
|
25
|
-
searchkick_search2: {
|
26
|
-
type: "custom",
|
27
|
-
tokenizer: "standard",
|
28
|
-
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball"] #, "searchkick_search_shingle"]
|
29
|
-
}
|
30
|
-
},
|
31
|
-
filter: {
|
32
|
-
searchkick_index_shingle: {
|
33
|
-
type: "shingle",
|
34
|
-
token_separator: ""
|
35
|
-
},
|
36
|
-
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
37
|
-
searchkick_search_shingle: {
|
38
|
-
type: "shingle",
|
39
|
-
token_separator: "",
|
40
|
-
output_unigrams: false,
|
41
|
-
output_unigrams_if_no_shingles: true
|
42
|
-
}
|
43
|
-
}
|
44
|
-
}
|
45
|
-
}.merge(options[:settings] || {})
|
46
|
-
synonyms = options[:synonyms] || []
|
47
|
-
if synonyms.any?
|
48
|
-
custom_settings[:analysis][:filter][:searchkick_synonym] = {
|
49
|
-
type: "synonym",
|
50
|
-
ignore_case: true,
|
51
|
-
synonyms: synonyms.map{|s| s.join(" => ") } # TODO support more than 2 synonyms on a line
|
52
|
-
}
|
53
|
-
custom_settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_synonym"
|
54
|
-
custom_settings[:analysis][:analyzer][:searchkick_search][:filter].insert(-2, "searchkick_synonym")
|
55
|
-
custom_settings[:analysis][:analyzer][:searchkick_search][:filter] << "searchkick_synonym"
|
56
|
-
custom_settings[:analysis][:analyzer][:searchkick_search2][:filter] << "searchkick_synonym"
|
57
|
-
end
|
5
|
+
@searchkick_options = options.dup
|
6
|
+
@searchkick_options[:conversions] = true if options[:conversions].nil?
|
58
7
|
|
59
8
|
class_eval do
|
60
9
|
extend Searchkick::Search
|
61
10
|
extend Searchkick::Reindex
|
62
11
|
include Tire::Model::Search
|
63
12
|
include Tire::Model::Callbacks
|
64
|
-
|
65
13
|
tire do
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
14
|
+
index_name options[:index_name] || [klass.model_name.plural, ENV["RACK_ENV"] || "development"].join("_")
|
15
|
+
end
|
16
|
+
attr_accessor :_score
|
17
|
+
|
18
|
+
def reindex
|
19
|
+
update_index
|
20
|
+
end
|
21
|
+
|
22
|
+
def _source
|
23
|
+
as_json
|
24
|
+
end
|
25
|
+
|
26
|
+
def to_indexed_json
|
27
|
+
source = _source
|
28
|
+
if self.class.instance_variable_get("@searchkick_options")[:conversions] and source[:conversions]
|
29
|
+
source[:conversions] = source[:conversions].map{|k, v| {query: k, count: v} }
|
75
30
|
end
|
31
|
+
source.to_json
|
76
32
|
end
|
77
33
|
end
|
78
34
|
end
|
35
|
+
|
79
36
|
end
|
80
37
|
end
|
data/lib/searchkick/reindex.rb
CHANGED
@@ -5,17 +5,18 @@ module Searchkick
|
|
5
5
|
def reindex
|
6
6
|
alias_name = tire.index.name
|
7
7
|
new_index = alias_name + "_" + Time.now.strftime("%Y%m%d%H%M%S")
|
8
|
-
|
9
|
-
# Rake::Task["tire:import"].invoke
|
10
8
|
index = Tire::Index.new(new_index)
|
11
|
-
|
9
|
+
|
10
|
+
index.create searchkick_index_options
|
11
|
+
|
12
|
+
# use scope for import
|
12
13
|
scope = respond_to?(:searchkick_import) ? searchkick_import : self
|
13
14
|
scope.find_in_batches do |batch|
|
14
15
|
index.import batch
|
15
16
|
end
|
16
17
|
|
17
18
|
if a = Tire::Alias.find(alias_name)
|
18
|
-
old_indices =
|
19
|
+
old_indices = a.indices.dup
|
19
20
|
old_indices.each do |index|
|
20
21
|
a.indices.delete index
|
21
22
|
end
|
@@ -25,7 +26,7 @@ module Searchkick
|
|
25
26
|
|
26
27
|
old_indices.each do |index|
|
27
28
|
i = Tire::Index.new(index)
|
28
|
-
i.delete
|
29
|
+
i.delete
|
29
30
|
end
|
30
31
|
else
|
31
32
|
i = Tire::Index.new(alias_name)
|
@@ -36,5 +37,87 @@ module Searchkick
|
|
36
37
|
true
|
37
38
|
end
|
38
39
|
|
40
|
+
private
|
41
|
+
|
42
|
+
def searchkick_index_options
|
43
|
+
options = @searchkick_options
|
44
|
+
|
45
|
+
settings = {
|
46
|
+
analysis: {
|
47
|
+
analyzer: {
|
48
|
+
searchkick_keyword: {
|
49
|
+
type: "custom",
|
50
|
+
tokenizer: "keyword",
|
51
|
+
filter: ["lowercase", "snowball"]
|
52
|
+
},
|
53
|
+
default_index: {
|
54
|
+
type: "custom",
|
55
|
+
tokenizer: "standard",
|
56
|
+
# synonym should come last, after stemming and shingle
|
57
|
+
# shingle must come before snowball
|
58
|
+
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_index_shingle"]
|
59
|
+
},
|
60
|
+
searchkick_search: {
|
61
|
+
type: "custom",
|
62
|
+
tokenizer: "standard",
|
63
|
+
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_search_shingle"]
|
64
|
+
},
|
65
|
+
searchkick_search2: {
|
66
|
+
type: "custom",
|
67
|
+
tokenizer: "standard",
|
68
|
+
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball"]
|
69
|
+
}
|
70
|
+
},
|
71
|
+
filter: {
|
72
|
+
searchkick_index_shingle: {
|
73
|
+
type: "shingle",
|
74
|
+
token_separator: ""
|
75
|
+
},
|
76
|
+
# lucky find http://web.archiveorange.com/archive/v/AAfXfQ17f57FcRINsof7
|
77
|
+
searchkick_search_shingle: {
|
78
|
+
type: "shingle",
|
79
|
+
token_separator: "",
|
80
|
+
output_unigrams: false,
|
81
|
+
output_unigrams_if_no_shingles: true
|
82
|
+
}
|
83
|
+
}
|
84
|
+
}
|
85
|
+
}.merge(options[:settings] || {})
|
86
|
+
synonyms = options[:synonyms] || []
|
87
|
+
if synonyms.any?
|
88
|
+
settings[:analysis][:filter][:searchkick_synonym] = {
|
89
|
+
type: "synonym",
|
90
|
+
ignore_case: true,
|
91
|
+
synonyms: synonyms.select{|s| s.size > 1 }.map{|s| "#{s[0..-2].join(",")} => #{s[-1]}" }
|
92
|
+
}
|
93
|
+
settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_synonym"
|
94
|
+
settings[:analysis][:analyzer][:searchkick_search][:filter].insert(-2, "searchkick_synonym")
|
95
|
+
settings[:analysis][:analyzer][:searchkick_search][:filter] << "searchkick_synonym"
|
96
|
+
settings[:analysis][:analyzer][:searchkick_search2][:filter] << "searchkick_synonym"
|
97
|
+
end
|
98
|
+
|
99
|
+
mapping = {}
|
100
|
+
if options[:conversions]
|
101
|
+
mapping[:conversions] = {
|
102
|
+
type: "nested",
|
103
|
+
properties: {
|
104
|
+
query: {type: "string", analyzer: "searchkick_keyword"},
|
105
|
+
count: {type: "integer"}
|
106
|
+
}
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
mappings = {
|
111
|
+
document_type.to_sym => {
|
112
|
+
properties: mapping
|
113
|
+
}
|
114
|
+
}
|
115
|
+
|
116
|
+
{
|
117
|
+
settings: settings,
|
118
|
+
mappings: mappings
|
119
|
+
}
|
120
|
+
end
|
121
|
+
|
39
122
|
end
|
40
123
|
end
|
data/lib/searchkick/search.rb
CHANGED
@@ -1,137 +1,147 @@
|
|
1
1
|
module Searchkick
|
2
|
-
# can't check mapping for conversions since the new index may not be built
|
3
2
|
module Search
|
4
|
-
def index_types
|
5
|
-
Hash[ (((Product.index.mapping || {})["product"] || {})["properties"] || {}).map{|k, v| [k, v["type"]] } ].reject{|k, v| k == "conversions" || k[0] == "_" }
|
6
|
-
end
|
7
3
|
|
8
4
|
def search(term, options = {})
|
5
|
+
term = term.to_s
|
9
6
|
fields = options[:fields] || ["_all"]
|
10
7
|
operator = options[:partial] ? "or" : "and"
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
8
|
+
load = options[:load].nil? ? true : options[:load]
|
9
|
+
load = (options[:include] || true) if load
|
10
|
+
|
11
|
+
collection =
|
12
|
+
tire.search load: load, page: options[:page], per_page: options[:per_page] do
|
13
|
+
query do
|
14
|
+
boolean do
|
15
|
+
must do
|
16
|
+
# TODO escape boost field
|
17
|
+
score_script = options[:boost] ? "_score * log(doc['#{options[:boost]}'].value + 2.718281828)" : "_score"
|
18
|
+
custom_score script: score_script do
|
19
|
+
dis_max do
|
20
|
+
query do
|
21
|
+
match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search"
|
22
|
+
end
|
23
|
+
query do
|
24
|
+
match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search2"
|
25
|
+
end
|
26
|
+
query do
|
27
|
+
match fields, term, use_dis_max: false, fuzziness: 0.7, max_expansions: 1, prefix_length: 1, operator: operator, analyzer: "searchkick_search"
|
28
|
+
end
|
29
|
+
query do
|
30
|
+
match fields, term, use_dis_max: false, fuzziness: 0.7, max_expansions: 1, prefix_length: 1, operator: operator, analyzer: "searchkick_search2"
|
31
|
+
end
|
32
|
+
end
|
27
33
|
end
|
28
34
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
35
|
+
# can't check mapping for conversions since the new index may not be built
|
36
|
+
if options[:conversions]
|
37
|
+
should do
|
38
|
+
nested path: "conversions", score_mode: "total" do
|
39
|
+
query do
|
40
|
+
custom_score script: "log(doc['count'].value)" do
|
41
|
+
match "query", term
|
42
|
+
end
|
36
43
|
end
|
37
44
|
end
|
38
45
|
end
|
39
46
|
end
|
40
47
|
end
|
41
48
|
end
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
explain options[:explain] if options[:explain]
|
49
|
+
size options[:limit] || 100000 # return all - like sql query
|
50
|
+
from options[:offset] if options[:offset]
|
51
|
+
explain options[:explain] if options[:explain]
|
46
52
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
53
|
+
# order
|
54
|
+
if options[:order]
|
55
|
+
sort do
|
56
|
+
options[:order].each do |k, v|
|
57
|
+
by k, v
|
58
|
+
end
|
52
59
|
end
|
53
60
|
end
|
54
|
-
end
|
55
61
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
end
|
62
|
+
# where
|
63
|
+
# TODO expand or
|
64
|
+
where_filters =
|
65
|
+
proc do |where|
|
66
|
+
filters = []
|
67
|
+
(where || {}).each do |field, value|
|
68
|
+
if field == :or
|
69
|
+
value.each do |or_clause|
|
70
|
+
filters << {or: or_clause.map{|or_statement| {term: or_statement} }}
|
71
|
+
end
|
72
|
+
else
|
73
|
+
# expand ranges
|
74
|
+
if value.is_a?(Range)
|
75
|
+
value = {gte: value.first, (value.exclude_end? ? :lt : :lte) => value.last}
|
76
|
+
end
|
72
77
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
else
|
81
|
-
filters << {not: {term: {field => op_value}}}
|
82
|
-
end
|
83
|
-
else
|
84
|
-
range_query =
|
85
|
-
case op
|
86
|
-
when :gt
|
87
|
-
{from: op_value, include_lower: false}
|
88
|
-
when :gte
|
89
|
-
{from: op_value, include_lower: true}
|
90
|
-
when :lt
|
91
|
-
{to: op_value, include_upper: false}
|
92
|
-
when :lte
|
93
|
-
{to: op_value, include_upper: true}
|
78
|
+
if value.is_a?(Array) # in query
|
79
|
+
filters << {terms: {field => value}}
|
80
|
+
elsif value.is_a?(Hash)
|
81
|
+
value.each do |op, op_value|
|
82
|
+
if op == :not # not equal
|
83
|
+
if op_value.is_a?(Array)
|
84
|
+
filters << {not: {terms: {field => op_value}}}
|
94
85
|
else
|
95
|
-
|
86
|
+
filters << {not: {term: {field => op_value}}}
|
96
87
|
end
|
97
|
-
|
88
|
+
else
|
89
|
+
range_query =
|
90
|
+
case op
|
91
|
+
when :gt
|
92
|
+
{from: op_value, include_lower: false}
|
93
|
+
when :gte
|
94
|
+
{from: op_value, include_lower: true}
|
95
|
+
when :lt
|
96
|
+
{to: op_value, include_upper: false}
|
97
|
+
when :lte
|
98
|
+
{to: op_value, include_upper: true}
|
99
|
+
else
|
100
|
+
raise "Unknown where operator"
|
101
|
+
end
|
102
|
+
filters << {range: {field => range_query}}
|
103
|
+
end
|
98
104
|
end
|
105
|
+
else
|
106
|
+
filters << {term: {field => value}}
|
99
107
|
end
|
100
|
-
else
|
101
|
-
filters << {term: {field => value}}
|
102
108
|
end
|
103
109
|
end
|
110
|
+
filters
|
104
111
|
end
|
105
|
-
filters
|
106
|
-
end
|
107
|
-
|
108
|
-
where_filters.call(options[:where]).each do |f|
|
109
|
-
type, value = f.first
|
110
|
-
filter type, value
|
111
|
-
end
|
112
112
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
if facets.is_a?(Array) # convert to more advanced syntax
|
117
|
-
facets = Hash[ facets.map{|f| [f, {}] } ]
|
113
|
+
where_filters.call(options[:where]).each do |f|
|
114
|
+
type, value = f.first
|
115
|
+
filter type, value
|
118
116
|
end
|
119
117
|
|
120
|
-
facets
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
118
|
+
# facets
|
119
|
+
if options[:facets]
|
120
|
+
facets = options[:facets] || {}
|
121
|
+
if facets.is_a?(Array) # convert to more advanced syntax
|
122
|
+
facets = Hash[ facets.map{|f| [f, {}] } ]
|
123
|
+
end
|
124
|
+
|
125
|
+
facets.each do |field, facet_options|
|
126
|
+
facet_filters = where_filters.call(facet_options[:where])
|
127
|
+
facet field do
|
128
|
+
terms field
|
129
|
+
if facet_filters.size == 1
|
130
|
+
type, value = facet_filters.first.first
|
131
|
+
facet_filter type, value
|
132
|
+
elsif facet_filters.size > 1
|
133
|
+
facet_filter :and, *facet_filters
|
134
|
+
end
|
129
135
|
end
|
130
136
|
end
|
131
137
|
end
|
132
138
|
end
|
133
139
|
|
140
|
+
collection.each_with_hit do |model, hit|
|
141
|
+
model._score = hit["_score"].to_f / collection.max_score
|
134
142
|
end
|
143
|
+
collection
|
135
144
|
end
|
145
|
+
|
136
146
|
end
|
137
147
|
end
|
data/lib/searchkick/tasks.rb
CHANGED
data/lib/searchkick/version.rb
CHANGED
data/test/searchkick_test.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require "test_helper"
|
2
2
|
|
3
3
|
class Product < ActiveRecord::Base
|
4
|
+
has_many :searches
|
5
|
+
|
4
6
|
searchkick \
|
5
7
|
synonyms: [
|
6
8
|
["clorox", "bleach"],
|
@@ -15,24 +17,22 @@ class Product < ActiveRecord::Base
|
|
15
17
|
},
|
16
18
|
conversions: true
|
17
19
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
# integer :orders_count
|
22
|
-
# end
|
20
|
+
def _source
|
21
|
+
as_json.merge conversions: searches.group("query").count
|
22
|
+
end
|
23
23
|
end
|
24
24
|
|
25
|
-
|
25
|
+
class Search < ActiveRecord::Base
|
26
|
+
belongs_to :product
|
27
|
+
end
|
28
|
+
|
29
|
+
Product.reindex
|
26
30
|
|
27
31
|
class TestSearchkick < Minitest::Unit::TestCase
|
28
32
|
|
29
33
|
def setup
|
30
|
-
|
31
|
-
Product.
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_reindex
|
35
|
-
assert Product.reindex
|
34
|
+
Search.delete_all
|
35
|
+
Product.destroy_all
|
36
36
|
end
|
37
37
|
|
38
38
|
# exact
|
@@ -106,16 +106,16 @@ class TestSearchkick < Minitest::Unit::TestCase
|
|
106
106
|
# conversions
|
107
107
|
|
108
108
|
def test_conversions
|
109
|
-
|
110
|
-
{name: "Tomato Sauce", conversions: [{query: "tomato sauce", count: 5}, {query: "tomato", count:
|
109
|
+
store_conversions [
|
110
|
+
{name: "Tomato Sauce", conversions: [{query: "tomato sauce", count: 5}, {query: "tomato", count: 20}]},
|
111
111
|
{name: "Tomato Paste", conversions: []},
|
112
|
-
{name: "Tomatoes", conversions: [{query: "tomato", count:
|
112
|
+
{name: "Tomatoes", conversions: [{query: "tomato", count: 10}, {query: "tomato sauce", count: 2}]}
|
113
113
|
]
|
114
114
|
assert_search "tomato", ["Tomato Sauce", "Tomatoes", "Tomato Paste"]
|
115
115
|
end
|
116
116
|
|
117
117
|
def test_conversions_stemmed
|
118
|
-
|
118
|
+
store_conversions [
|
119
119
|
{name: "Tomato A", conversions: [{query: "tomato", count: 2}, {query: "tomatos", count: 2}, {query: "Tomatoes", count: 2}]},
|
120
120
|
{name: "Tomato B", conversions: [{query: "tomato", count: 4}]}
|
121
121
|
]
|
@@ -178,27 +178,17 @@ class TestSearchkick < Minitest::Unit::TestCase
|
|
178
178
|
|
179
179
|
def test_boost
|
180
180
|
store [
|
181
|
-
{name: "Organic Tomato A"
|
182
|
-
{name: "Tomato B"}
|
181
|
+
{name: "Organic Tomato A"},
|
182
|
+
{name: "Tomato B", orders_count: 10}
|
183
183
|
]
|
184
|
-
assert_search "tomato", ["Organic Tomato A",
|
184
|
+
assert_search "tomato", ["Tomato B", "Organic Tomato A"], boost: "orders_count"
|
185
185
|
end
|
186
186
|
|
187
187
|
def test_boost_zero
|
188
188
|
store [
|
189
|
-
{name: "Zero Boost",
|
189
|
+
{name: "Zero Boost", orders_count: 0}
|
190
190
|
]
|
191
|
-
assert_search "zero", ["Zero Boost"]
|
192
|
-
end
|
193
|
-
|
194
|
-
# default to 1
|
195
|
-
def test_boost_null
|
196
|
-
store [
|
197
|
-
{name: "Zero Boost A", _boost: 1.1},
|
198
|
-
{name: "Zero Boost B"},
|
199
|
-
{name: "Zero Boost C", _boost: 0.9},
|
200
|
-
]
|
201
|
-
assert_search "zero", ["Zero Boost A", "Zero Boost B", "Zero Boost C"]
|
191
|
+
assert_search "zero", ["Zero Boost"], boost: "orders_count"
|
202
192
|
end
|
203
193
|
|
204
194
|
# search method
|
@@ -216,10 +206,10 @@ class TestSearchkick < Minitest::Unit::TestCase
|
|
216
206
|
def test_where
|
217
207
|
now = Time.now
|
218
208
|
store [
|
219
|
-
{name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now,
|
220
|
-
{name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1,
|
221
|
-
{name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2,
|
222
|
-
{name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3,
|
209
|
+
{name: "Product A", store_id: 1, in_stock: true, backordered: true, created_at: now, orders_count: 4},
|
210
|
+
{name: "Product B", store_id: 2, in_stock: true, backordered: false, created_at: now - 1, orders_count: 3},
|
211
|
+
{name: "Product C", store_id: 3, in_stock: false, backordered: true, created_at: now - 2, orders_count: 2},
|
212
|
+
{name: "Product D", store_id: 4, in_stock: false, backordered: false, created_at: now - 3, orders_count: 1},
|
223
213
|
]
|
224
214
|
assert_search "product", ["Product A", "Product B"], where: {in_stock: true}
|
225
215
|
# date
|
@@ -267,7 +257,7 @@ class TestSearchkick < Minitest::Unit::TestCase
|
|
267
257
|
|
268
258
|
def store(documents)
|
269
259
|
documents.each do |document|
|
270
|
-
Product.
|
260
|
+
Product.create!(document)
|
271
261
|
end
|
272
262
|
Product.index.refresh
|
273
263
|
end
|
@@ -276,6 +266,20 @@ class TestSearchkick < Minitest::Unit::TestCase
|
|
276
266
|
store names.map{|name| {name: name} }
|
277
267
|
end
|
278
268
|
|
269
|
+
def store_conversions(documents)
|
270
|
+
documents.each do |document|
|
271
|
+
conversions = document.delete(:conversions)
|
272
|
+
product = Product.create!(document)
|
273
|
+
conversions.each do |c|
|
274
|
+
c[:count].times do
|
275
|
+
product.searches.create!(query: c[:query])
|
276
|
+
end
|
277
|
+
end
|
278
|
+
end
|
279
|
+
Product.reindex
|
280
|
+
Product.index.refresh
|
281
|
+
end
|
282
|
+
|
279
283
|
def assert_search(term, expected, options = {})
|
280
284
|
assert_equal expected, Product.search(term, options.merge(fields: [:name], conversions: true)).map(&:name)
|
281
285
|
end
|
data/test/test_helper.rb
CHANGED
@@ -19,8 +19,17 @@ ActiveRecord::Migration.create_table :products, :force => true do |t|
|
|
19
19
|
t.integer :store_id
|
20
20
|
t.boolean :in_stock
|
21
21
|
t.boolean :backordered
|
22
|
+
t.integer :orders_count
|
23
|
+
t.string :color
|
22
24
|
t.timestamps
|
23
25
|
end
|
24
26
|
|
27
|
+
ActiveRecord::Migration.create_table :searches, :force => true do |t|
|
28
|
+
t.string :query
|
29
|
+
t.timestamp :searched_at
|
30
|
+
t.timestamp :converted_at
|
31
|
+
t.references :product
|
32
|
+
end
|
33
|
+
|
25
34
|
File.delete("elasticsearch.log") if File.exists?("elasticsearch.log")
|
26
35
|
Tire.configure { logger "elasticsearch.log", :level => "debug" }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: searchkick
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-07-
|
11
|
+
date: 2013-07-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: tire
|