escargot 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/MIT-LICENSE +20 -0
- data/README.markdown +376 -0
- data/Rakefile +23 -0
- data/escargot.gemspec +24 -0
- data/init.rb +1 -0
- data/install.rb +1 -0
- data/lib/escargot/activerecord_ex.rb +193 -0
- data/lib/escargot/distributed_indexing.rb +58 -0
- data/lib/escargot/elasticsearch_ex.rb +63 -0
- data/lib/escargot/local_indexing.rb +19 -0
- data/lib/escargot/queue_backend/base.rb +7 -0
- data/lib/escargot/queue_backend/resque.rb +9 -0
- data/lib/escargot/version.rb +3 -0
- data/lib/escargot.rb +92 -0
- data/lib/tasks/escargot.rake +50 -0
- data/rails/init.rb +21 -0
- data/test/admin_index_test.rb +26 -0
- data/test/database.yml +21 -0
- data/test/define_index_test.rb +21 -0
- data/test/distributed_index_creation_test.rb +97 -0
- data/test/index_options_test.rb +34 -0
- data/test/indexed_content_test.rb +39 -0
- data/test/local_index_creation_test.rb +51 -0
- data/test/mappings_test.rb +38 -0
- data/test/nrt_enqueue_test.rb +87 -0
- data/test/nrt_immediate_test.rb +57 -0
- data/test/nrt_immediate_with_refresh_test.rb +49 -0
- data/test/schema.rb +20 -0
- data/test/search_multiple_models_test.rb +74 -0
- data/test/search_test.rb +84 -0
- data/test/test_helper.rb +55 -0
- data/uninstall.rb +1 -0
- metadata +133 -0
data/.gitignore
ADDED
data/MIT-LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2010 [name of plugin creator]
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,376 @@
|
|
1
|
+
|
2
|
+
Escargot
|
3
|
+
============
|
4
|
+
|
5
|
+
Connects any Rails model with ElasticSearch, supports near real time updates,
|
6
|
+
distributed indexing and models that integrate data from many databases.
|
7
|
+
|
8
|
+
Requirements
|
9
|
+
============
|
10
|
+
Currently only rails 2.3 is supported. You will need ElasticSearch, the 'rubberband' gem
|
11
|
+
and (if you want to use the **optional** distributed indexing mode) Redis.
|
12
|
+
|
13
|
+
Usage
|
14
|
+
=======
|
15
|
+
|
16
|
+
First, [download and start ElasticSearch](http://www.elasticsearch.com/docs/elasticsearch/setup/installation/) (it's really simple). With the default setup of
|
17
|
+
of ElasticSearch (listening to localhost and port 9200) no configuration of the plugin is
|
18
|
+
necessary.
|
19
|
+
|
20
|
+
To define an index, simply add a line to your model
|
21
|
+
|
22
|
+
class Post < ActiveRecord::Base
|
23
|
+
elastic_index
|
24
|
+
end
|
25
|
+
|
26
|
+
To create the index, execute the rake task that rebuilds all indexes:
|
27
|
+
|
28
|
+
rake escargot:index
|
29
|
+
|
30
|
+
Or restrict it to just one model
|
31
|
+
|
32
|
+
rake "escargot:index[Post]"
|
33
|
+
|
34
|
+
And you are ready to search:
|
35
|
+
|
36
|
+
Post.search "dreams OR nightmares"
|
37
|
+
|
38
|
+
Near Real Time support
|
39
|
+
=======
|
40
|
+
|
41
|
+
The default behavior is that every time you save or delete a record in an indexed model
|
42
|
+
the index will be updated to reflect the changes. You can disable this by
|
43
|
+
|
44
|
+
class Post < ActiveRecord::Base
|
45
|
+
elastic_index :updates => false
|
46
|
+
end
|
47
|
+
|
48
|
+
Please notice that when updates are enabled there may be a slight delay for the changes to appear
|
49
|
+
in search results (with the default elasticsearch settings, this delay is just 1 second). If
|
50
|
+
you absolutely need to ensure that the change is made public before returning control to the user,
|
51
|
+
the `:immediate_with_refresh` option provides this assurance.
|
52
|
+
|
53
|
+
class Post < ActiveRecord::Base
|
54
|
+
elastic_index :updates => :immediate_with_refresh
|
55
|
+
end
|
56
|
+
|
57
|
+
Enabling `:immediate_with_refresh` is not recommended. A better option is to simply call `Post.refresh_index`
|
58
|
+
when you really need the guarantee.
|
59
|
+
|
60
|
+
Choosing the indexed fields
|
61
|
+
=======
|
62
|
+
|
63
|
+
This plugin doesn't provide a DSL to define what fields you want to be indexed. Instead of that
|
64
|
+
it exposes the fact that in ElasticSearch every document is just a JSON string.
|
65
|
+
|
66
|
+
If you define a `indexed_json_document` method in your model this will be used as the JSON
|
67
|
+
representation of the document, otherwise `to_json` will be called instead.
|
68
|
+
|
69
|
+
Luckily, ActiveRecord has excellent support for JSON serialization, so it's really easy
|
70
|
+
to include associations or custom methods.
|
71
|
+
|
72
|
+
class Post < ActiveRecord::Base
|
73
|
+
elastic_index :updates => false
|
74
|
+
belongs_to :category
|
75
|
+
|
76
|
+
def indexed_json_document
|
77
|
+
to_json(:include => :category, :methods => :slug)
|
78
|
+
end
|
79
|
+
|
80
|
+
def slug
|
81
|
+
title.downcase.gsub(" ", "-")
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
See [ActiveRecord's JSON serialization documentation](http://api.rubyonrails.org/classes/ActiveModel/Serializers/JSON.html)
|
86
|
+
|
87
|
+
Search features
|
88
|
+
=======
|
89
|
+
|
90
|
+
Basic Searching
|
91
|
+
----------------
|
92
|
+
|
93
|
+
Calling `Model.search` obtains from ElasticSearch the ids of the results matching
|
94
|
+
your query and then queries your database to get the full ActiveRecord objects.
|
95
|
+
|
96
|
+
results = Post.search "dreams OR nightmares"
|
97
|
+
results.each {|r| puts r.title}
|
98
|
+
|
99
|
+
The query is parsed using lucene's [QueryParser syntax](http://lucene.apache.org/java/2_4_0/queryparsersyntax.html).
|
100
|
+
You can use boolean operators, restrict your search to a field, etc.
|
101
|
+
|
102
|
+
results = Post.search "prologue:dream OR epilogue:nightmare"
|
103
|
+
|
104
|
+
You can also guide the interpretation of the query, with the options `:default_operator` and `:df` (default_field). These two are equivalent:
|
105
|
+
|
106
|
+
results = Post.search "title:(dreams AND nightmares)"
|
107
|
+
results = Post.search "dreams nightmares" , :default_operator => 'AND', :df => 'title'
|
108
|
+
|
109
|
+
Sorting by attributes
|
110
|
+
--------
|
111
|
+
|
112
|
+
The default order is based on the relevancy of the terms in the document. You can also
|
113
|
+
sort by any other field's value.
|
114
|
+
|
115
|
+
Post.search "dreams", :order => :updated_at
|
116
|
+
Post.search "dreams", :order => 'updated_at:desc'
|
117
|
+
Post.search "dreams", :order => ['popularity:desc', 'updated_at:desc']
|
118
|
+
|
119
|
+
Sorting by an arbitrary script is possible using the Query DSL.
|
120
|
+
|
121
|
+
Pagination
|
122
|
+
----------
|
123
|
+
|
124
|
+
`search` returns a WillPaginate collection and accepts the customary *:per\_page*, and *:page* parameters.
|
125
|
+
|
126
|
+
# controller
|
127
|
+
@posts = Post.search("dreams", :page => params[:page], :per_page => 30)
|
128
|
+
|
129
|
+
# in the view:
|
130
|
+
will_paginate @posts
|
131
|
+
|
132
|
+
|
133
|
+
Query DSL
|
134
|
+
----------------
|
135
|
+
Instead of a string, you can pass a query in ElasticSearch's [Query DSL](http://www.elasticsearch.com/docs/elasticsearch/rest_api/query_dsl/)
|
136
|
+
giving you access to the full range of search features.
|
137
|
+
|
138
|
+
Bird.search(:match_all => true}
|
139
|
+
|
140
|
+
Bird.search(:fuzzy => {:name => 'oriale'})
|
141
|
+
|
142
|
+
Bird.search(:custom_score => {:query => {:match_all => true}, :script => "random()"})
|
143
|
+
|
144
|
+
Bird.search(:dis_max => {
|
145
|
+
:tie_breaker => 0.7,
|
146
|
+
:boost => 1.2,
|
147
|
+
:queries => [:term => {:name => 'oriole'}, :term => {:content => 'oriole'}]
|
148
|
+
})
|
149
|
+
|
150
|
+
Bird.search(:more_like_this => {
|
151
|
+
:like_text => "The orioles are a family of Old World passerine birds"
|
152
|
+
})
|
153
|
+
|
154
|
+
|
155
|
+
Bird.search(
|
156
|
+
:filtered => {
|
157
|
+
:query => {
|
158
|
+
:term => {:name => 'oriole'}
|
159
|
+
},
|
160
|
+
:filter => {
|
161
|
+
:term => {:suborder => 'Passeri'}
|
162
|
+
}
|
163
|
+
}
|
164
|
+
)
|
165
|
+
|
166
|
+
Facets
|
167
|
+
----------------
|
168
|
+
|
169
|
+
Term facets returning the most popular terms for a field and partial results counts are
|
170
|
+
available through the `facets` class method.
|
171
|
+
|
172
|
+
Post.facets :author_id
|
173
|
+
Post.facets :author_id, :size => 100
|
174
|
+
|
175
|
+
# restrict the facets to posts that contain 'dream'
|
176
|
+
Post.facets :author_id, :query => "dream"
|
177
|
+
Post.facets [:author_id, :category], :query => "dream"
|
178
|
+
|
179
|
+
This returns a Hash of the form:
|
180
|
+
|
181
|
+
{
|
182
|
+
:author_id => {
|
183
|
+
"1" => 3,
|
184
|
+
"25" => 2
|
185
|
+
},
|
186
|
+
:category_id => {
|
187
|
+
12 => 4,
|
188
|
+
42 => 7,
|
189
|
+
47 => 2
|
190
|
+
}
|
191
|
+
}
|
192
|
+
|
193
|
+
<!-- You can also combine the standard search results with the facets counts in a single request.
|
194
|
+
|
195
|
+
results = Post.search_with_facets [:author, :category], :query => "dream"
|
196
|
+
results.each{|post| puts post.title}
|
197
|
+
results.facets -->
|
198
|
+
|
199
|
+
You should be aware that this only a very simple subset of the facets feature of ElasticSearch.
|
200
|
+
The full feature set (histograms, statistical facets, geo distance facets, etc.) is available
|
201
|
+
through the Query DSL.
|
202
|
+
|
203
|
+
|
204
|
+
Search counts
|
205
|
+
----------
|
206
|
+
|
207
|
+
Use `search_count` to count the number of matches without getting the results.
|
208
|
+
|
209
|
+
Post.search_count("dream OR nightmare")
|
210
|
+
|
211
|
+
|
212
|
+
Index Creation and Type Mapping Options
|
213
|
+
=======
|
214
|
+
|
215
|
+
Index creation options
|
216
|
+
-----------------------
|
217
|
+
|
218
|
+
Any value passed in the :index\_options argument will be sent to ElasticSearch as an index
|
219
|
+
creation option.
|
220
|
+
|
221
|
+
For example, if you want to increase the number of shards for this index:
|
222
|
+
|
223
|
+
class Post < ActiveRecord::Base
|
224
|
+
elastic_index :index_options => {:number_of_shards => 10}
|
225
|
+
end
|
226
|
+
|
227
|
+
If you want the search to be insensitive to accents and other diacritics:
|
228
|
+
|
229
|
+
class Post < ActiveRecord::Base
|
230
|
+
elastic_index :index_options => {
|
231
|
+
"analysis.analyzer.default.tokenizer" => 'standard',
|
232
|
+
"analysis.analyzer.default.filter" => ["standard", "lowercase", "stop", "asciifolding"]
|
233
|
+
}
|
234
|
+
end
|
235
|
+
|
236
|
+
The full list of available options for index creation is documented at
|
237
|
+
[http://www.elasticsearch.com/docs/elasticsearch/index_modules/](http://www.elasticsearch.com/docs/elasticsearch/index_modules/)
|
238
|
+
|
239
|
+
Mapping options
|
240
|
+
----------------
|
241
|
+
|
242
|
+
Mapping is the process of defining how a JSON document should be mapped to the Search Engine,
|
243
|
+
including its searchable characteristics.
|
244
|
+
|
245
|
+
The default (dynamic) mapping provides sane defaults, but defining your own mapping enables
|
246
|
+
powerful features such as boosting a field, using a different analyzer for one field,
|
247
|
+
enabling term vectors, etc.
|
248
|
+
|
249
|
+
Some examples:
|
250
|
+
|
251
|
+
class Post < ActiveRecord::Base
|
252
|
+
elastic_index :mapping_options => {
|
253
|
+
:properties => {
|
254
|
+
:category => {:type => :string, :index => :not_analyzed},
|
255
|
+
:title => {:type => :string, :index => :analyzed, :term_vector => true, :boost => 10.0},
|
256
|
+
:location => {:type => :geo_point}
|
257
|
+
}
|
258
|
+
}
|
259
|
+
end
|
260
|
+
|
261
|
+
|
262
|
+
See the [ElasticSearch Documentation](http://www.elasticsearch.com/docs/elasticsearch/mapping/) for mappings.
|
263
|
+
|
264
|
+
Distributed indexing
|
265
|
+
=======
|
266
|
+
You will need distributed indexing when there is a large amount of data to be indexed. In this
|
267
|
+
indexing mode the task of creating an index is divided between a pool of workers that can be
|
268
|
+
as large as you need. Since ElasticSearch itself provides linear indexing scalability by adding
|
269
|
+
nodes to the cluster, this means that you should, in principle, be able to make your indexing
|
270
|
+
time arbitrarily short.
|
271
|
+
|
272
|
+
Currently, the only work queue supported is [Resque](http://github.com/defunkt/resque). To enable distributed indexing you
|
273
|
+
should first install Redis and set-up Resque.
|
274
|
+
|
275
|
+
If you're on OS X and use homebrew, installing redis can be done with:
|
276
|
+
|
277
|
+
brew install redis
|
278
|
+
redis-server /usr/local/etc/redis.conf
|
279
|
+
|
280
|
+
Install the resque gem:
|
281
|
+
|
282
|
+
$ gem install resque
|
283
|
+
|
284
|
+
Include it on your application:
|
285
|
+
|
286
|
+
require 'resque'
|
287
|
+
|
288
|
+
Add this to your Rakefile:
|
289
|
+
|
290
|
+
require 'resque/tasks'
|
291
|
+
namespace :resque do
|
292
|
+
task :setup => :environment
|
293
|
+
end
|
294
|
+
|
295
|
+
And use the resque:work rake task to start a worker:
|
296
|
+
|
297
|
+
$ QUEUE=es_admin,es_nrt,es_batch rake resque:work
|
298
|
+
|
299
|
+
Once you have set-up Resque and started a number of workers, you can easily create an index for you model using the distributed model:
|
300
|
+
|
301
|
+
rake "elasticsearch:distributed_index[Post]"
|
302
|
+
|
303
|
+
or if you want to re-create all your indexes
|
304
|
+
|
305
|
+
rake elasticsearch:distributed_index
|
306
|
+
|
307
|
+
Be aware that due the distributed nature of indexing the new index may be deployed when some workers are still performing their last
|
308
|
+
indexing job.
|
309
|
+
|
310
|
+
Setting up a resque work queue also allows you to use the *:update => :enqueue* option
|
311
|
+
|
312
|
+
class Post < ActiveRecord::Base
|
313
|
+
elastic_index :update => :enqueue
|
314
|
+
end
|
315
|
+
|
316
|
+
With this setting when a document is updated or deleted the task of updating the index is
|
317
|
+
added to the work queue and will be performed asynchronously by a remote agent.
|
318
|
+
|
319
|
+
Index versions
|
320
|
+
=======
|
321
|
+
In *escargot* indexes are versioned: when you create an index for the
|
322
|
+
model Post the actual index created in ElasticSearch will be named something like
|
323
|
+
'posts_1287849616.57665' with an alias 'posts' pointing to it. The second time
|
324
|
+
you run the "escargot:index" tasks a new index version will be created and the
|
325
|
+
alias will be updated only when the new index is ready.
|
326
|
+
|
327
|
+
This is useful because it makes the deployment of a new index version atomic.
|
328
|
+
|
329
|
+
When a document is saved and index updates are enabled, both the current index version
|
330
|
+
and any version that's in progress will be updated. This ensures that when the new
|
331
|
+
index is published it will include the change.
|
332
|
+
|
333
|
+
Searching multiple models
|
334
|
+
================
|
335
|
+
You can use all the same syntax to search across all indexed models in your application:
|
336
|
+
|
337
|
+
Escargot.search "dreams"
|
338
|
+
|
339
|
+
Calling `Escargot.search "dreams"` will return all objects that match, no matter what model they are from, ordered by relevance
|
340
|
+
|
341
|
+
If you want to limit global searches to a few specific models, you can do so with the `:classes` option
|
342
|
+
|
343
|
+
Escargot.search "dreams", :classes => [Post, Bird]
|
344
|
+
|
345
|
+
Support similar behavior that `Basic Searching` and `Search counts`
|
346
|
+
|
347
|
+
Contributing
|
348
|
+
================
|
349
|
+
Fork on GitHub, create a test & send a pull request.
|
350
|
+
|
351
|
+
Bugs
|
352
|
+
================
|
353
|
+
Use the [Issue Tracker](http://github.com/angelf/escargot/issues)
|
354
|
+
|
355
|
+
|
356
|
+
Aknowledgements
|
357
|
+
================
|
358
|
+
* Some parts of the API plagiarize the excellent Thinking Sphinx plugin, and more will do so in the future.
|
359
|
+
* This plugin depends on rubberband for communication with ElasticSearch.
|
360
|
+
* Elastic Search rules!
|
361
|
+
|
362
|
+
Future Plans
|
363
|
+
======
|
364
|
+
|
365
|
+
* Search features:
|
366
|
+
* Field conditions and term filters
|
367
|
+
* Single-table inheritance support
|
368
|
+
* (optionally) use the _source field from ES and avoid querying the database
|
369
|
+
|
370
|
+
* Indexing features:
|
371
|
+
* Distributing the task of listing document ids
|
372
|
+
* Index partioning
|
373
|
+
* Support for non-ActiveRecord models
|
374
|
+
* Adding other queue backends
|
375
|
+
|
376
|
+
Copyright (c) 2010 Angel Faus & vLex.com, released under the MIT license
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/rdoctask'
|
4
|
+
|
5
|
+
desc 'Default: run unit tests.'
|
6
|
+
task :default => :test
|
7
|
+
|
8
|
+
desc 'Test the elastic_rails plugin.'
|
9
|
+
Rake::TestTask.new(:test) do |t|
|
10
|
+
t.libs << 'lib'
|
11
|
+
t.libs << 'test'
|
12
|
+
t.pattern = 'test/**/*_test.rb'
|
13
|
+
t.verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
desc 'Generate documentation for the elastic_rails plugin.'
|
17
|
+
Rake::RDocTask.new(:rdoc) do |rdoc|
|
18
|
+
rdoc.rdoc_dir = 'rdoc'
|
19
|
+
rdoc.title = 'ElasticRails'
|
20
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
21
|
+
rdoc.rdoc_files.include('README')
|
22
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
23
|
+
end
|
data/escargot.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
require File.expand_path("../lib/escargot/version", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "escargot"
|
6
|
+
s.version = Escargot::VERSION
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ["Angel Faus"]
|
9
|
+
s.email = ["angel@vlex.com"]
|
10
|
+
s.homepage = "http://github.com/angelf/escargot"
|
11
|
+
s.summary = "ElasticSearch connector for Rails"
|
12
|
+
s.description = "Connects any Rails model with ElasticSearch, supports near real time updates, distributed indexing and models that integrate data from many databases."
|
13
|
+
|
14
|
+
s.required_rubygems_version = ">= 1.3.6"
|
15
|
+
s.rubyforge_project = "escargot"
|
16
|
+
|
17
|
+
s.add_development_dependency "bundler", ">= 1.0.0"
|
18
|
+
|
19
|
+
s.add_dependency "rubberband", ">= 0.0.5"
|
20
|
+
|
21
|
+
s.files = `git ls-files`.split("\n")
|
22
|
+
s.executables = `git ls-files`.split("\n").map{|f| f =~ /^bin\/(.*)/ ? $1 : nil}.compact
|
23
|
+
s.require_path = 'lib'
|
24
|
+
end
|
data/init.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# Include hook code here
|
data/install.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
# Install hook code here
|
@@ -0,0 +1,193 @@
|
|
1
|
+
require 'will_paginate/collection'
|
2
|
+
|
3
|
+
module Escargot
|
4
|
+
module ActiveRecordExtensions
|
5
|
+
|
6
|
+
def self.included(base)
|
7
|
+
base.send :extend, ClassMethods
|
8
|
+
end
|
9
|
+
|
10
|
+
module ClassMethods
|
11
|
+
attr_accessor :index_name
|
12
|
+
attr_accessor :update_index_policy
|
13
|
+
|
14
|
+
# defines an elastic search index. Valid options:
|
15
|
+
#
|
16
|
+
# :index_name (will default class name using method "underscore")
|
17
|
+
#
|
18
|
+
# :updates, how to to update the contents of the index when a document is changed, valid options are:
|
19
|
+
#
|
20
|
+
# - false: do not update the index
|
21
|
+
#
|
22
|
+
# - :immediate: update the index but do not refresh it automatically.
|
23
|
+
# With the default settings, this means that the change may take up to 1 second
|
24
|
+
# to be seen by other users.
|
25
|
+
#
|
26
|
+
# See: http://www.elasticsearch.com/docs/elasticsearch/index_modules/engine/robin/
|
27
|
+
#
|
28
|
+
# This is the default option.
|
29
|
+
#
|
30
|
+
# - :immediate_with_refresh: update the index AND ask elasticsearch to refresh it after each
|
31
|
+
# change. This garantuees that the changes will be seen by other users, but may affect
|
32
|
+
# performance.
|
33
|
+
#
|
34
|
+
# - :enqueu: enqueue the document id so that a remote worker will update the index
|
35
|
+
# This is the recommended options if you have set up a job queue (such as Resque)
|
36
|
+
#
|
37
|
+
|
38
|
+
def elastic_index(options = {})
|
39
|
+
Escargot.register_model(self)
|
40
|
+
|
41
|
+
options.symbolize_keys!
|
42
|
+
send :include, InstanceMethods
|
43
|
+
@index_name = options[:index_name] || self.name.underscore.gsub(/\//,'-')
|
44
|
+
@update_index_policy = options.include?(:updates) ? options[:updates] : :immediate
|
45
|
+
|
46
|
+
if @update_index_policy
|
47
|
+
after_save :update_index
|
48
|
+
after_destroy :delete_from_index
|
49
|
+
end
|
50
|
+
@index_options = options[:index_options] || {}
|
51
|
+
@mapping = options[:mapping] || false
|
52
|
+
end
|
53
|
+
|
54
|
+
def search(query, options={})
|
55
|
+
Escargot.search(query, options.merge({:index => self.index_name, :type => elastic_search_type}), true)
|
56
|
+
end
|
57
|
+
|
58
|
+
def search_hits(query, options = {})
|
59
|
+
Escargot.search_hits(query, options.merge({:index => self.index_name, :type => elastic_search_type}), true)
|
60
|
+
end
|
61
|
+
|
62
|
+
def search_count(query = "*", options = {})
|
63
|
+
Escargot.search_count(query, options.merge({:index => self.index_name, :type => elastic_search_type}), true)
|
64
|
+
end
|
65
|
+
|
66
|
+
def facets(fields_list, options = {})
|
67
|
+
size = options.delete(:size) || 10
|
68
|
+
fields_list = [fields_list] unless fields_list.kind_of?(Array)
|
69
|
+
|
70
|
+
if !options[:query]
|
71
|
+
options[:query] = {:match_all => true}
|
72
|
+
elsif options[:query].kind_of?(String)
|
73
|
+
options[:query] = {:query_string => {:query => options[:query]}}
|
74
|
+
end
|
75
|
+
|
76
|
+
options[:facets] = {}
|
77
|
+
fields_list.each do |field|
|
78
|
+
options[:facets][field] = {:terms => {:field => field, :size => size}}
|
79
|
+
end
|
80
|
+
|
81
|
+
hits = $elastic_search_client.search(options, {:index => self.index_name, :type => elastic_search_type})
|
82
|
+
out = {}
|
83
|
+
|
84
|
+
fields_list.each do |field|
|
85
|
+
out[field.to_sym] = {}
|
86
|
+
hits.facets[field.to_s]["terms"].each do |term|
|
87
|
+
out[field.to_sym][term["term"]] = term["count"]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
out
|
92
|
+
end
|
93
|
+
|
94
|
+
# explicitly refresh the index, making all operations performed since the last refresh
|
95
|
+
# available for search
|
96
|
+
#
|
97
|
+
# http://www.elasticsearch.com/docs/elasticsearch/rest_api/admin/indices/refresh/
|
98
|
+
def refresh_index(index_version = nil)
|
99
|
+
$elastic_search_client.refresh(index_version || index_name)
|
100
|
+
end
|
101
|
+
|
102
|
+
# creates a new index version for this model and sets the mapping options for the type
|
103
|
+
def create_index_version
|
104
|
+
index_version = $elastic_search_client.create_index_version(@index_name, @index_options)
|
105
|
+
if @mapping
|
106
|
+
$elastic_search_client.update_mapping(@mapping, :index => index_version, :type => elastic_search_type)
|
107
|
+
end
|
108
|
+
index_version
|
109
|
+
end
|
110
|
+
|
111
|
+
# deletes all index versions for this model and the alias (if exist)
|
112
|
+
def delete_index
|
113
|
+
# set current version to delete alias later
|
114
|
+
current_version = $elastic_search_client.current_index_version(index_name)
|
115
|
+
|
116
|
+
# deletes any index version and the alias
|
117
|
+
$elastic_search_client.index_versions(index_name).each{|index_version|
|
118
|
+
$elastic_search_client.alias_index(:remove => {index_version => index_name}) if (index_version == current_version)
|
119
|
+
$elastic_search_client.delete_index(index_version)
|
120
|
+
}
|
121
|
+
|
122
|
+
# and delete the index itself if it exists
|
123
|
+
begin
|
124
|
+
$elastic_search_client.delete_index(index_name)
|
125
|
+
rescue ElasticSearch::RequestError
|
126
|
+
# it's ok, this means that the index doesn't exist
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
def delete_id_from_index(id, options = {})
|
131
|
+
options[:index] ||= self.index_name
|
132
|
+
options[:type] ||= elastic_search_type
|
133
|
+
$elastic_search_client.delete(id.to_s, options)
|
134
|
+
end
|
135
|
+
|
136
|
+
def optimize_index
|
137
|
+
$elastic_search_client.optimize(index_name)
|
138
|
+
end
|
139
|
+
|
140
|
+
private
|
141
|
+
def elastic_search_type
|
142
|
+
self.name.underscore.singularize.gsub(/\//,'-')
|
143
|
+
end
|
144
|
+
|
145
|
+
end
|
146
|
+
|
147
|
+
module InstanceMethods
|
148
|
+
|
149
|
+
# updates the index using the appropiate policy
|
150
|
+
def update_index
|
151
|
+
if self.class.update_index_policy == :immediate_with_refresh
|
152
|
+
local_index_in_elastic_search(:refresh => true)
|
153
|
+
elsif self.class.update_index_policy == :enqueue
|
154
|
+
Resque.enqueue(DistributedIndexing::ReIndexDocuments, self.class.to_s, [self.id])
|
155
|
+
else
|
156
|
+
local_index_in_elastic_search
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
# deletes the document from the index using the appropiate policy ("simple" or "distributed")
|
161
|
+
def delete_from_index
|
162
|
+
if self.class.update_index_policy == :immediate_with_refresh
|
163
|
+
self.class.delete_id_from_index(self.id, :refresh => true)
|
164
|
+
# As of Oct 25 2010, :refresh => true is not working
|
165
|
+
self.class.refresh_index()
|
166
|
+
elsif self.class.update_index_policy == :enqueue
|
167
|
+
Resque.enqueue(DistributedIndexing::ReIndexDocuments, self.class.to_s, [self.id])
|
168
|
+
else
|
169
|
+
self.class.delete_id_from_index(self.id)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
def local_index_in_elastic_search(options = {})
|
174
|
+
options[:index] ||= self.class.index_name
|
175
|
+
options[:type] ||= self.class.name.underscore.singularize.gsub(/\//,'-')
|
176
|
+
options[:id] ||= self.id.to_s
|
177
|
+
|
178
|
+
$elastic_search_client.index(
|
179
|
+
self.respond_to?(:indexed_json_document) ? self.indexed_json_document : self.to_json,
|
180
|
+
options
|
181
|
+
)
|
182
|
+
|
183
|
+
## !!!!! passing :refresh => true should make ES auto-refresh only the affected
|
184
|
+
## shards but as of Oct 25 2010 with ES 0.12 && rubberband 0.0.2 that's not the case
|
185
|
+
if options[:refresh]
|
186
|
+
self.class.refresh_index(options[:index])
|
187
|
+
end
|
188
|
+
|
189
|
+
end
|
190
|
+
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|