tire 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. data/.gitignore +9 -0
  2. data/Gemfile +4 -0
  3. data/MIT-LICENSE +20 -0
  4. data/README.markdown +435 -0
  5. data/Rakefile +75 -0
  6. data/examples/dsl.rb +73 -0
  7. data/examples/rails-application-template.rb +144 -0
  8. data/examples/tire-dsl.rb +617 -0
  9. data/lib/tire.rb +35 -0
  10. data/lib/tire/client.rb +40 -0
  11. data/lib/tire/configuration.rb +29 -0
  12. data/lib/tire/dsl.rb +33 -0
  13. data/lib/tire/index.rb +209 -0
  14. data/lib/tire/logger.rb +60 -0
  15. data/lib/tire/model/callbacks.rb +23 -0
  16. data/lib/tire/model/import.rb +18 -0
  17. data/lib/tire/model/indexing.rb +50 -0
  18. data/lib/tire/model/naming.rb +30 -0
  19. data/lib/tire/model/persistence.rb +34 -0
  20. data/lib/tire/model/persistence/attributes.rb +60 -0
  21. data/lib/tire/model/persistence/finders.rb +61 -0
  22. data/lib/tire/model/persistence/storage.rb +75 -0
  23. data/lib/tire/model/search.rb +97 -0
  24. data/lib/tire/results/collection.rb +56 -0
  25. data/lib/tire/results/item.rb +39 -0
  26. data/lib/tire/results/pagination.rb +30 -0
  27. data/lib/tire/rubyext/hash.rb +3 -0
  28. data/lib/tire/rubyext/symbol.rb +11 -0
  29. data/lib/tire/search.rb +117 -0
  30. data/lib/tire/search/facet.rb +41 -0
  31. data/lib/tire/search/filter.rb +28 -0
  32. data/lib/tire/search/highlight.rb +37 -0
  33. data/lib/tire/search/query.rb +42 -0
  34. data/lib/tire/search/sort.rb +29 -0
  35. data/lib/tire/tasks.rb +88 -0
  36. data/lib/tire/version.rb +3 -0
  37. data/test/fixtures/articles/1.json +1 -0
  38. data/test/fixtures/articles/2.json +1 -0
  39. data/test/fixtures/articles/3.json +1 -0
  40. data/test/fixtures/articles/4.json +1 -0
  41. data/test/fixtures/articles/5.json +1 -0
  42. data/test/integration/active_model_searchable_test.rb +80 -0
  43. data/test/integration/active_record_searchable_test.rb +193 -0
  44. data/test/integration/facets_test.rb +65 -0
  45. data/test/integration/filters_test.rb +46 -0
  46. data/test/integration/highlight_test.rb +52 -0
  47. data/test/integration/index_mapping_test.rb +44 -0
  48. data/test/integration/index_store_test.rb +68 -0
  49. data/test/integration/persistent_model_test.rb +35 -0
  50. data/test/integration/query_string_test.rb +43 -0
  51. data/test/integration/results_test.rb +28 -0
  52. data/test/integration/sort_test.rb +36 -0
  53. data/test/models/active_model_article.rb +31 -0
  54. data/test/models/active_model_article_with_callbacks.rb +49 -0
  55. data/test/models/active_model_article_with_custom_index_name.rb +5 -0
  56. data/test/models/active_record_article.rb +12 -0
  57. data/test/models/article.rb +15 -0
  58. data/test/models/persistent_article.rb +11 -0
  59. data/test/models/persistent_articles_with_custom_index_name.rb +10 -0
  60. data/test/models/supermodel_article.rb +22 -0
  61. data/test/models/validated_model.rb +11 -0
  62. data/test/test_helper.rb +52 -0
  63. data/test/unit/active_model_lint_test.rb +17 -0
  64. data/test/unit/client_test.rb +43 -0
  65. data/test/unit/configuration_test.rb +71 -0
  66. data/test/unit/index_test.rb +390 -0
  67. data/test/unit/logger_test.rb +114 -0
  68. data/test/unit/model_callbacks_test.rb +90 -0
  69. data/test/unit/model_import_test.rb +71 -0
  70. data/test/unit/model_persistence_test.rb +400 -0
  71. data/test/unit/model_search_test.rb +289 -0
  72. data/test/unit/results_collection_test.rb +131 -0
  73. data/test/unit/results_item_test.rb +59 -0
  74. data/test/unit/rubyext_hash_test.rb +19 -0
  75. data/test/unit/search_facet_test.rb +69 -0
  76. data/test/unit/search_filter_test.rb +36 -0
  77. data/test/unit/search_highlight_test.rb +46 -0
  78. data/test/unit/search_query_test.rb +55 -0
  79. data/test/unit/search_sort_test.rb +50 -0
  80. data/test/unit/search_test.rb +204 -0
  81. data/test/unit/tire_test.rb +55 -0
  82. data/tire.gemspec +54 -0
  83. metadata +372 -0
@@ -0,0 +1,9 @@
1
+ *.gem
2
+ .bundle
3
+ Gemfile.lock
4
+ pkg/*
5
+ rdoc/
6
+ coverage/
7
+ scratch/
8
+ examples/*.html
9
+ *.log
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source "http://rubygems.org"
2
+
3
+ # Specify your gem's dependencies in tire.gemspec
4
+ gemspec
@@ -0,0 +1,20 @@
1
+ Copyright 2011 Karel Minarik
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,435 @@
1
+ Tire
2
+ =========
3
+
4
+ ![Tire](https://github.com/karmi/tire/raw/master/tire.png)
5
+
6
+ _Tire_ is a Ruby client for the [ElasticSearch](http://www.elasticsearch.org/) search engine/database.
7
+
8
+ _ElasticSearch_ is a scalable, distributed, cloud-ready, highly-available,
9
+ full-text search engine and database, communicating by JSON over RESTful HTTP,
10
+ based on [Lucene](http://lucene.apache.org/), written in Java.
11
+
12
+ This document provides just a brief overview of _Tire's_ features. Be sure to check out also
13
+ the extensive documentation at <http://karmi.github.com/tire/> if you're interested.
14
+
15
+ Installation
16
+ ------------
17
+
18
+ First, you need a running _ElasticSearch_ server. Thankfully, it's easy. Let's define easy:
19
+
20
+ $ curl -k -L -o elasticsearch-0.16.0.tar.gz http://github.com/downloads/elasticsearch/elasticsearch/elasticsearch-0.16.0.tar.gz
21
+ $ tar -zxvf elasticsearch-0.16.0.tar.gz
22
+ $ ./elasticsearch-0.16.0/bin/elasticsearch -f
23
+
24
+ OK. Easy. On a Mac, you can also use _Homebrew_:
25
+
26
+ $ brew install elasticsearch
27
+
28
+ OK. Let's install the gem via Rubygems:
29
+
30
+ $ gem install tire
31
+
32
+ Of course, you can install it from the source as well:
33
+
34
+ $ git clone git://github.com/karmi/tire.git
35
+ $ cd tire
36
+ $ rake install
37
+
38
+
39
+ Usage
40
+ -----
41
+
42
+ _Tire_ exposes easy-to-use domain specific language for fluent communication with _ElasticSearch_.
43
+
44
+ It also blends with your [ActiveModel](https://github.com/rails/rails/tree/master/activemodel)
45
+ classes for convenient usage in Rails applications.
46
+
47
+ To test-drive the core _ElasticSearch_ functionality, let's require the gem:
48
+
49
+ require 'rubygems'
50
+ require 'tire'
51
+
52
+ Please note that you can copy these snippets from the much more extensive and heavily annotated file
53
+ in [examples/tire-dsl.rb](http://karmi.github.com/tire/).
54
+
55
+ OK. Let's create an index named `articles` and store/index some documents:
56
+
57
+ Tire.index 'articles' do
58
+ delete
59
+ create
60
+
61
+ store :title => 'One', :tags => ['ruby']
62
+ store :title => 'Two', :tags => ['ruby', 'python']
63
+ store :title => 'Three', :tags => ['java']
64
+ store :title => 'Four', :tags => ['ruby', 'php']
65
+
66
+ refresh
67
+ end
68
+
69
+ We can also create the index with custom
70
+ [mapping](http://www.elasticsearch.org/guide/reference/api/admin-indices-create-index.html)
71
+ for a specific document type:
72
+
73
+ Tire.index 'articles' do
74
+ create :mappings => {
75
+ :article => {
76
+ :properties => {
77
+ :id => { :type => 'string', :index => 'not_analyzed', :include_in_all => false },
78
+ :title => { :type => 'string', :boost => 2.0, :analyzer => 'snowball' },
79
+ :tags => { :type => 'string', :analyzer => 'keyword' },
80
+ :content => { :type => 'string', :analyzer => 'snowball' }
81
+ }
82
+ }
83
+ }
84
+ end
85
+
86
+ Of course, we may have large amounts of data, and it may be impossible or impractical to add them to the index
87
+ one by one. We can use _ElasticSearch's_ [bulk storage](http://www.elasticsearch.org/guide/reference/api/bulk.html):
88
+
89
+ articles = [
90
+ { :id => '1', :title => 'one' },
91
+ { :id => '2', :title => 'two' },
92
+ { :id => '3', :title => 'three' }
93
+ ]
94
+
95
+ Tire.index 'bulk' do
96
+ import articles
97
+ end
98
+
99
+ We can also easily manipulate the documents before storing them in the index, by passing a block to the
100
+ `import` method:
101
+
102
+ Tire.index 'bulk' do
103
+ import articles do |documents|
104
+
105
+ documents.each { |document| document[:title].capitalize! }
106
+ end
107
+ end
108
+
109
+ OK. Now, let's go search all the data.
110
+
111
+ We will be searching for articles whose `title` begins with letter “T”, sorted by `title` in `descending` order,
112
+ filtering them for ones tagged “ruby”, and also retrieving some [_facets_](http://www.elasticsearch.org/guide/reference/api/search/facets/)
113
+ from the database:
114
+
115
+ s = Tire.search 'articles' do
116
+ query do
117
+ string 'title:T*'
118
+ end
119
+
120
+ filter :terms, :tags => ['ruby']
121
+
122
+ sort { title 'desc' }
123
+
124
+ facet 'global-tags' do
125
+ terms :tags, :global => true
126
+ end
127
+
128
+ facet 'current-tags' do
129
+ terms :tags
130
+ end
131
+ end
132
+
133
+ (Of course, we may also page the results with `from` and `size` query options, retrieve only specific fields
134
+ or highlight content matching our query, etc.)
135
+
136
+ Let's display the results:
137
+
138
+ s.results.each do |document|
139
+ puts "* #{ document.title } [tags: #{document.tags.join(', ')}]"
140
+ end
141
+
142
+ # * Two [tags: ruby, python]
143
+
144
+ Let's display the global facets (distribution of tags across the whole database):
145
+
146
+ s.results.facets['global-tags']['terms'].each do |f|
147
+ puts "#{f['term'].ljust(10)} #{f['count']}"
148
+ end
149
+
150
+ # ruby 3
151
+ # python 1
152
+ # php 1
153
+ # java 1
154
+
155
+ Now, let's display the facets based on current query (notice that count for articles
156
+ tagged with 'java' is included, even though it's not returned by our query;
157
+ count for articles tagged 'php' is excluded, since they don't match the current query):
158
+
159
+ s.results.facets['current-tags']['terms'].each do |f|
160
+ puts "#{f['term'].ljust(10)} #{f['count']}"
161
+ end
162
+
163
+ # ruby 1
164
+ # python 1
165
+ # java 1
166
+
167
+ If configuring the search payload with a block somehow feels too weak for you, you can simply pass
168
+ a Ruby `Hash` (or JSON string) with the query declaration to the `search` method:
169
+
170
+ Tire.search 'articles', :query => { :fuzzy => { :title => 'Sour' } }
171
+
172
+ If this sounds like a great idea to you, you are probably able to write your application
173
+ using just `curl`, `sed` and `awk`.
174
+
175
+ We can display the full query JSON for close inspection:
176
+
177
+ puts s.to_json
178
+ # {"facets":{"current-tags":{"terms":{"field":"tags"}},"global-tags":{"global":true,"terms":{"field":"tags"}}},"query":{"query_string":{"query":"title:T*"}},"filter":{"terms":{"tags":["ruby"]}},"sort":[{"title":"desc"}]}
179
+
180
+ Or, better, we can display the corresponding `curl` command to recreate and debug the request in the terminal:
181
+
182
+ puts s.to_curl
183
+ # curl -X POST "http://localhost:9200/articles/_search?pretty=true" -d '{"facets":{"current-tags":{"terms":{"field":"tags"}},"global-tags":{"global":true,"terms":{"field":"tags"}}},"query":{"query_string":{"query":"title:T*"}},"filter":{"terms":{"tags":["ruby"]}},"sort":[{"title":"desc"}]}'
184
+
185
+ However, we can simply log every search query (and other requests) in this `curl`-friendly format:
186
+
187
+ Tire.configure { logger 'elasticsearch.log' }
188
+
189
+ When you set the log level to _debug_:
190
+
191
+ Tire.configure { logger 'elasticsearch.log', :level => 'debug' }
192
+
193
+ the JSON responses are logged as well. This is not a great idea for production environment,
194
+ but it's priceless when you want to paste a complicated transaction to the mailing list or IRC channel.
195
+
196
+ The _Tire_ DSL tries hard to provide a strong Ruby-like API for the main _ElasticSearch_ features.
197
+
198
+ By default, _Tire_ wraps the results collection in a enumerable `Results::Collection` class,
199
+ and result items in a `Results::Item` class, which looks like a child of `Hash` and `Openstruct`,
200
+ for smooth iterating and displaying the results.
201
+
202
+ You may wrap the result items in your own class by setting the `Tire.configuration.wrapper`
203
+ property. Your class must take a `Hash` of attributes on initialization.
204
+
205
+ If that seems like a great idea to you, there's a big chance you already have such class, and one would bet
206
+ it's an `ActiveRecord` or `ActiveModel` class, containing model of your Rails application.
207
+
208
+ Fortunately, _Tire_ makes blending _ElasticSearch_ features into your models trivially possible.
209
+
210
+
211
+ ActiveModel Integration
212
+ -----------------------
213
+
214
+ Let's suppose you have an `Article` class in your Rails application. To make it searchable with
215
+ _Tire_, you just `include` it:
216
+
217
+ class Article < ActiveRecord::Base
218
+ include Tire::Model::Search
219
+ include Tire::Model::Callbacks
220
+ end
221
+
222
+ When you now save a record:
223
+
224
+ Article.create :title => "I Love ElasticSearch",
225
+ :content => "...",
226
+ :author => "Captain Nemo",
227
+ :published_on => Time.now
228
+
229
+ it is automatically added into the index, because of the included callbacks. The document attributes
230
+ are indexed exactly as when you call the `Article#to_json` method.
231
+
232
+ Now you can search the records:
233
+
234
+ Article.search 'love'
235
+
236
+ OK. Often, this is where the game stops. Not here.
237
+
238
+ First of all, you may use the full query DSL, as explained above, with filters, sorting,
239
+ advanced facet aggregation, highlighting, etc:
240
+
241
+ q = 'love'
242
+ Article.search do
243
+ query { string q }
244
+ facet('timeline') { date :published_on, :interval => 'month' }
245
+ sort { published_on 'desc' }
246
+ end
247
+
248
+ Dynamic mapping is a godsend when you're prototyping.
249
+ For serious usage, though, you'll definitely want to define a custom mapping for your model:
250
+
251
+ class Article < ActiveRecord::Base
252
+ include Tire::Model::Search
253
+ include Tire::Model::Callbacks
254
+
255
+ mapping do
256
+ indexes :id, :type => 'string', :analyzed => false
257
+ indexes :title, :type => 'string', :analyzer => 'snowball', :boost => 100
258
+ indexes :content, :type => 'string', :analyzer => 'snowball'
259
+ indexes :author, :type => 'string', :analyzer => 'keyword'
260
+ indexes :published_on, :type => 'date', :include_in_all => false
261
+ end
262
+ end
263
+
264
+ In this case, _only_ the defined model attributes are indexed when adding to the index.
265
+
266
+ When you want tight grip on how your model attributes are added to the index, just
267
+ provide the `to_indexed_json` method yourself:
268
+
269
+ class Article < ActiveRecord::Base
270
+ include Tire::Model::Search
271
+ include Tire::Model::Callbacks
272
+
273
+ def to_indexed_json
274
+ names = author.split(/\W/)
275
+ last_name = names.pop
276
+ first_name = names.join
277
+
278
+ {
279
+ :title => title,
280
+ :content => content,
281
+ :author => {
282
+ :first_name => first_name,
283
+ :last_name => last_name
284
+ }
285
+ }.to_json
286
+ end
287
+
288
+ end
289
+
290
+ Note that _Tire_-enhanced models are fully compatible with [`will_paginate`](https://github.com/mislav/will_paginate),
291
+ so you can pass any parameters to the `search` method in the controller, as usual:
292
+
293
+ @articles = Article.search params[:q], :page => (params[:page] || 1)
294
+
295
+ OK. Chances are, you have lots of records stored in the underlying database. How will you get them to _ElasticSearch_? Easy:
296
+
297
+ Article.index.import Article.all
298
+
299
+ However, this way, all your records are loaded into memory, serialized into JSON,
300
+ and sent down the wire to _ElasticSearch_. Not practical, you say? You're right.
301
+
302
+ Provided your model implements some sort of _pagination_ — and it probably does, for so much data —,
303
+ you can just run:
304
+
305
+ Article.import
306
+
307
+ In this case, the `Article.paginate` method is called, and your records are sent to the index
308
+ in chunks of 1000. If that number doesn't suit you, just provide a better one:
309
+
310
+ Article.import :per_page => 100
311
+
312
+ Any other parameters you provide to the `import` method are passed down to the `paginate` method.
313
+
314
+ Are we saying you have to fiddle with this thing in a `rails console` or silly Ruby scripts? No.
315
+ Just call the included _Rake_ task on the commandline:
316
+
317
+ $ rake environment tire:import CLASS='Article'
318
+
319
+ You can also force-import the data by deleting the index first (and creating it with mapping
320
+ provided by the `mapping` block in your model):
321
+
322
+ $ rake environment tire:import CLASS='Article' FORCE=true
323
+
324
+ When you'll spend more time with _ElasticSearch_, you'll notice how
325
+ [index aliases](http://www.elasticsearch.org/guide/reference/api/admin-indices-aliases.html)
326
+ are the best idea since the invention of inverted index.
327
+ You can index your data into a fresh index (and possibly update an alias if everything's fine):
328
+
329
+ $ rake environment tire:import CLASS='Article' INDEX='articles-2011-05'
330
+
331
+ If you're the type who has no time for long introductions, you can generate a fully working
332
+ example Rails application, with an `ActiveRecord` model and a search form, to play with:
333
+
334
+ $ rails new searchapp -m https://github.com/karmi/tire/raw/master/examples/rails-application-template.rb
335
+
336
+ OK. All this time we have been talking about `ActiveRecord` models, since
337
+ it is a reasonable Rails' default for the storage layer.
338
+
339
+ But what if you use another database such as [MongoDB](http://www.mongodb.org/),
340
+ another object mapping library, such as [Mongoid](http://mongoid.org/)?
341
+
342
+ Well, things stay mostly the same:
343
+
344
+ class Article
345
+ include Mongoid::Document
346
+ field :title, :type => String
347
+ field :content, :type => String
348
+
349
+ include Tire::Model::Search
350
+ include Tire::Model::Callbacks
351
+
352
+ # Let's use a different index name so stuff doesn't get mixed up
353
+ #
354
+ index_name 'mongo-articles'
355
+
356
+ # These Mongo guys sure do some funky stuff with their IDs
357
+ # in +serializable_hash+, let's fix it.
358
+ #
359
+ def to_indexed_json
360
+ self.to_json
361
+ end
362
+
363
+ end
364
+
365
+ Article.create :title => 'I Love ElasticSearch'
366
+
367
+ Article.search 'love'
368
+
369
+ That's kinda nice. But there's more.
370
+
371
+ _Tire_ implements not only _searchable_ features, but also _persistence_ features.
372
+
373
+ This means that you can use a _Tire_ model **instead of** your database, not just
374
+ for searching your database. Why would you like to do that?
375
+
376
+ Well, because you're tired of database migrations and lots of hand-holding with your
377
+ database to store stuff like `{ :name => 'Tire', :tags => [ 'ruby', 'search' ] }`.
378
+ Because what you need is to just dump a JSON-representation of your data into a database and
379
+ load it back when needed.
380
+ Because you've noticed that _searching_ your data is a much more effective way of retrieval
381
+ then constructing elaborate database query conditions.
382
+ Because you have _lots_ of data and want to use _ElasticSearch's_
383
+ advanced distributed features.
384
+
385
+ To use the persistence features, you have to include the `Tire::Persistence` module
386
+ in your class and define the properties (analogous to the way you do with CouchDB- or MongoDB-based models):
387
+
388
+ class Article
389
+ include Tire::Model::Persistence
390
+ include Tire::Model::Search
391
+ include Tire::Model::Callbacks
392
+
393
+ validates_presence_of :title, :author
394
+
395
+ property :title
396
+ property :author
397
+ property :content
398
+ property :published_on
399
+
400
+ end
401
+
402
+ Of course, not all validations or `ActionPack` helpers will be available to your models,
403
+ but if you can live with that, you've just got a schema-free, highly-scalable storage
404
+ and retrieval engine for your data.
405
+
406
+ Todo, Plans & Ideas
407
+ -------------------
408
+
409
+ _Tire_ is already used in production by its authors. Nevertheless, it's not considered finished yet.
410
+
411
+ There are todos, plans and ideas, some of which are listed below, in the order of importance:
412
+
413
+ * Proper RDoc annotations for the source code
414
+ * [Histogram](http://www.elasticsearch.org/guide/reference/api/search/facets/histogram-facet.html) facets
415
+ * [Statistical](http://www.elasticsearch.org/guide/reference/api/search/facets/statistical-facet.html) facets
416
+ * [Geo Distance](http://www.elasticsearch.org/guide/reference/api/search/facets/geo-distance-facet.html) facets
417
+ * [Index aliases](http://www.elasticsearch.org/guide/reference/api/admin-indices-aliases.html) management
418
+ * [Analyze](http://www.elasticsearch.org/guide/reference/api/admin-indices-analyze.html) API support
419
+ * Embedded webserver to display statistics and to allow easy searches
420
+
421
+
422
+ Other Clients
423
+ -------------
424
+
425
+ Check out [other _ElasticSearch_ clients](http://www.elasticsearch.org/guide/appendix/clients.html).
426
+
427
+
428
+ Feedback
429
+ --------
430
+
431
+ You can send feedback via [e-mail](mailto:karmi@karmi.cz) or via [Github Issues](https://github.com/karmi/tire/issues).
432
+
433
+ -----
434
+
435
+ [Karel Minarik](http://karmi.cz)