sequel-elasticsearch 0.3.3 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fedd83674e13bf3111df6c01a4a7c997e39cfd16906ea9849c5e22ca9944d36
4
- data.tar.gz: 3799f9f9eba3821790e96abf8d00016600dc71cb6620c439c6dd82ffb69dd09e
3
+ metadata.gz: 4b72432bc6c37b866fb0959c07d71b0bb90fa8773ad0006fe65250f62e80b48e
4
+ data.tar.gz: 7eb98c377331877a870449a1e3c1060504c70eb6e7f7cc9905a0ad767406090e
5
5
  SHA512:
6
- metadata.gz: 7219a5449f7c7a88f49a71d43ed1cce202e6312ad050cdde494171289aed5ee199628ba7d2cc36b3c28344aff5fdb76a94acea6701cb224cb15062a474aa3994
7
- data.tar.gz: 2db11be8b0cb25339a3399325511a992c69591147b9810978aa03cf2c223f97dc10f7217dbb75720ea6ae82bac2a03ec38c8c5b0451e682959b3eacd448253e3
6
+ metadata.gz: 1de1abc8e613aeaea8f256626b5f20c370e91f8bad909be2b9c76988a08868aed16caab5c10066fe70098984c49bf0c280e10398957d3b277fcf7a36c39c1297
7
+ data.tar.gz: 40005b30e9552ceed6e2d560e843eeec2666f9dd444c36007581a59bc4a6ab94d293172502daf5ed3068cb8229b6673318182c66fed1d841a40c695c37b4b62e
data/README.md CHANGED
@@ -42,11 +42,11 @@ Sequel::Model.plugin :elasticsearch
42
42
  Or per model:
43
43
 
44
44
  ```ruby
45
- Sequel::Model.plugin Sequel::Elasticsearch
45
+ Document.plugin Sequel::Elasticsearch
46
46
 
47
47
  # or
48
48
 
49
- class Node < Sequel::Model
49
+ class Document < Sequel::Model
50
50
  plugin :elasticsearch
51
51
  end
52
52
  ```
@@ -62,6 +62,49 @@ Sequel::Model.plugin :elasticsearch,
62
62
 
63
63
  And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
64
64
 
65
+ ### Searching
66
+
67
+ Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
68
+
69
+ ```ruby
70
+ Document.es('title:Sequel')
71
+ Document.es('title:Sequel AND body:Elasticsearch')
72
+ ```
73
+
74
+ The result from the `es` method is an enumerable containing `Sequel::Model` instances of your model:
75
+
76
+ ```ruby
77
+ results = Document.es('title:Sequel')
78
+ results.each { |e| p e }
79
+ # Outputs
80
+ # #<Document @values={:id=>1, :title=>"Sequel", :body=>"Document 1"}>
81
+ # #<Document @values={:id=>2, :title=>"Sequel", :body=>"Document 2"}>
82
+ ```
83
+
84
+ The result also contains the meta info about the Elasticsearch query result:
85
+
86
+ ```ruby
87
+ results = Document.es('title:Sequel')
88
+ p results.count # The number of documents included in this result
89
+ p results.total # The total number of documents in the index that matches the search
90
+ p results.timed_out # If the search timed out or not
91
+ p results.took # How long, in miliseconds the search took
92
+ ```
93
+
94
+ You can also use the scroll API to search and fetch large datasets:
95
+
96
+ ```ruby
97
+ # Get a dataset that will stay consistent for 5 minutes and extend that time with 1 minute on every iteration
98
+ scroll = Document.es('test', scroll: '5m')
99
+ p scroll_id # Outputs the scroll_id for this specific scrolling snapshot
100
+ puts "Found #{scroll.count} of #{scroll.total} documents"
101
+ scroll.each { |e| p e }
102
+ while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
103
+ puts "Found #{scroll.count} of #{scroll.total} documents"
104
+ scroll.each { |e| p e }
105
+ end
106
+ ```
107
+
65
108
  ## Development
66
109
 
67
110
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -75,7 +118,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/jrgns/
75
118
  Features that needs to be built:
76
119
 
77
120
  - [x] An `es` method to search through the data on the cluster.
78
- - [ ] Let `es` return an enumerator of `Sequel::Model` instances.
121
+ - [x] Let `es` return an enumerator of `Sequel::Model` instances.
79
122
  - [ ] A rake task to create or suggest mappings for a table.
80
123
 
81
124
  ## License
@@ -0,0 +1 @@
1
+ theme: jekyll-theme-architect
@@ -3,7 +3,16 @@ require 'sequel/plugins/elasticsearch/result'
3
3
 
4
4
  module Sequel
5
5
  module Plugins
6
+ # The Sequel::Elasticsearch model plugin
7
+ #
8
+ # @example Simple usage
9
+ #
10
+ # require 'sequel-elasticsearch'
11
+ # Document.plugin Sequel::Elasticsearch
12
+ # Document.es('test')
13
+ #
6
14
  module Elasticsearch
15
+ # Apply the plugin to the specified model
7
16
  def self.apply(model, _opts = OPTS)
8
17
  model.instance_variable_set(:@elasticsearch_opts, {})
9
18
  model.instance_variable_set(:@elasticsearch_index, nil)
@@ -11,6 +20,7 @@ module Sequel
11
20
  model
12
21
  end
13
22
 
23
+ # Configure the plugin
14
24
  def self.configure(model, opts = OPTS)
15
25
  model.elasticsearch_opts = opts[:elasticsearch] || {}
16
26
  model.elasticsearch_index = (opts[:index] || model.table_name).to_sym
@@ -18,13 +28,21 @@ module Sequel
18
28
  model
19
29
  end
20
30
 
31
+ # The class methods that will be added to the Sequel::Model
21
32
  module ClassMethods
22
- attr_accessor :elasticsearch_opts, :elasticsearch_index, :elasticsearch_type
23
-
33
+ # The extra options that will be passed to the Elasticsearch client.
34
+ attr_accessor :elasticsearch_opts
35
+ # The Elasticsearch index to which the documents will be written.
36
+ attr_accessor :elasticsearch_index
37
+ # The Elasticsearch type to which the documents will be written.
38
+ attr_accessor :elasticsearch_type
39
+
40
+ # Return the Elasticsearch client used to communicate with the cluster.
24
41
  def es_client
25
42
  @es_client = ::Elasticsearch::Client.new elasticsearch_opts
26
43
  end
27
44
 
45
+ # Execute a search on the Model's Elasticsearch index without catching Errors.
28
46
  def es!(query = '', opts = {})
29
47
  opts = {
30
48
  index: elasticsearch_index,
@@ -34,10 +52,20 @@ module Sequel
34
52
  Result.new es_client.search(opts), self
35
53
  end
36
54
 
55
+ # Fetch the next page in a scroll without catching Errors.
56
+ def scroll!(scroll_id, duration)
57
+ scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
58
+ return nil unless scroll_id
59
+ Result.new es_client.scroll(scroll_id: scroll_id, scroll: duration), self
60
+ end
61
+
62
+ # Execute a search or a scroll on the Model's Elasticsearch index.
63
+ # This method is "safe" in that it will catch the more common Errors.
37
64
  def es(query = '', opts = {})
38
- call_es { es! query, opts }
65
+ call_es { query.is_a?(Result) ? scroll!(query, opts) : es!(query, opts) }
39
66
  end
40
67
 
68
+ # Wrapper method in which error handling is done for Elasticsearch calls.
41
69
  def call_es
42
70
  yield
43
71
  rescue ::Elasticsearch::Transport::Transport::Errors::NotFound, ::Elasticsearch::Transport::Transport::Error => e
@@ -49,34 +77,45 @@ module Sequel
49
77
  end
50
78
  end
51
79
 
80
+ # The instance methods that will be added to the Sequel::Model
52
81
  module InstanceMethods
82
+ # Sequel::Model after_create hook to add the new record to the Elasticsearch index.
83
+ # It's "safe" in that it won't raise an error if it fails.
53
84
  def after_create
54
85
  super
55
86
  self.class.call_es { index_document }
56
87
  end
57
88
 
89
+ # Sequel::Model after_destroy hook to remove the record from the Elasticsearch index.
90
+ # It's "safe" in that it won't raise an error if it fails.
58
91
  def after_destroy
59
92
  super
60
93
  self.class.call_es { destroy_document }
61
94
  end
62
95
 
96
+ # Sequel::Model after_update hook to update the record in the Elasticsearch index.
97
+ # It's "safe" in that it won't raise an error if it fails.
63
98
  def after_update
64
99
  super
65
100
  self.class.call_es { index_document }
66
101
  end
67
102
 
103
+ # Return the Elasticsearch client used to communicate with the cluster.
68
104
  def es_client
69
105
  self.class.es_client
70
106
  end
71
107
 
72
108
  private
73
109
 
110
+ # Determine the ID to be used for the document in the Elasticsearch cluster.
111
+ # It will join the values of a multi field primary key with an underscore.
74
112
  def document_id
75
113
  doc_id = pk
76
114
  doc_id = doc_id.join('_') if doc_id.is_a? Array
77
115
  doc_id
78
116
  end
79
117
 
118
+ # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
80
119
  def document_path
81
120
  {
82
121
  index: self.class.elasticsearch_index,
@@ -85,12 +124,14 @@ module Sequel
85
124
  }
86
125
  end
87
126
 
127
+ # Create or update the document on the Elasticsearch cluster.
88
128
  def index_document
89
129
  params = document_path
90
130
  params[:body] = values.each_key { |k| values[k] = values[k].strftime('%FT%T%:z') if values[k].is_a?(Time) }
91
131
  es_client.index params
92
132
  end
93
133
 
134
+ # Remove the document from the Elasticsearch cluster.
94
135
  def destroy_document
95
136
  es_client.delete document_path
96
137
  end
@@ -1,38 +1,59 @@
1
1
  module Sequel
2
2
  module Plugins
3
3
  module Elasticsearch
4
+ # A wrapper around Elasticsearch results to make it behave more like a Sequel Dataset.
4
5
  class Result
5
6
  include Enumerable
6
7
 
7
- attr_reader :results, :scroll_id, :total, :took, :timed_out, :model
8
+ # The original result returned from the Elasticsearch client
9
+ attr_reader :result
10
+ # The scroll id, if set, from the result
11
+ attr_reader :scroll_id
12
+ # The total number of documents in the Elasticsearch result
13
+ attr_reader :total
14
+ # The time, in miliseconds, the Elasticsearch call took to complete
15
+ attr_reader :took
16
+ # If the Elasticsearch call timed out or note
17
+ attr_reader :timed_out
18
+ # The model class associated with this result
19
+ attr_reader :model
8
20
 
9
- def initialize(results, model = nil)
10
- return unless results && results['hits']
11
- @results = results
12
- @scroll_id = results['_scroll_id']
13
- @total = results['hits']['total']
14
- @timed_out = results['timed_out']
15
- @took = results['took']
21
+ # Initialize the Result
22
+ #
23
+ # * +result+ The result returns from the Elasticsearch client / +.es+ call.
24
+ # * +model+ The model class on which the results should be applied.
25
+ def initialize(result, model = nil)
26
+ return unless result && result['hits']
27
+
28
+ @result = result
29
+ @scroll_id = result['_scroll_id']
30
+ @total = result['hits']['total']
31
+ @timed_out = result['timed_out']
32
+ @took = result['took']
16
33
  @model = model
34
+
35
+ result['hits']['hits'] = result['hits']['hits'].map { |h| convert(h) }
17
36
  end
18
37
 
38
+ # Each implementation for the Enumerable. Yield each element in the +result['hits']['hits']+ array.
19
39
  def each
20
- return [] unless results['hits'] && results['hits']['hits']
21
- results['hits']['hits'].each do |h|
22
- yield convert(h)
23
- end
24
- # TODO: Use the scroll id to get more if needed
25
- # We will need access to the client, somehow...
40
+ return [] unless result['hits'] && result['hits']['hits'].count.positive?
41
+ result['hits']['hits'].each { |h| yield h }
42
+ end
43
+
44
+ # Send all undefined methods to the +result['hits']['hits']+ array.
45
+ def method_missing(m, *args, &block)
46
+ respond_to_missing?(m) ? result['hits']['hits'].send(m, *args, &block) : super
26
47
  end
27
48
 
28
- def all
29
- results['hits']['hits'].map do |h|
30
- convert(h)
31
- end
49
+ # Send all undefined methods to the +result['hits']['hits']+ array.
50
+ def respond_to_missing?(m, include_private = false)
51
+ result['hits']['hits'].respond_to?(m, include_private) || super
32
52
  end
33
53
 
34
54
  private
35
55
 
56
+ # Convert an Elasticsearch hit to a Sequel::Model
36
57
  def convert(hit)
37
58
  return hit unless model
38
59
  source = hit['_source'].each_with_object({}) { |(k, v), h| h[k.to_sym] = v }
@@ -1,5 +1,6 @@
1
1
  module Sequel
2
2
  module Elasticsearch
3
- VERSION = '0.3.3'.freeze
3
+ # The Gem's version.
4
+ VERSION = '0.4.0'.freeze
4
5
  end
5
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequel-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurgens du Toit
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-02-08 00:00:00.000000000 Z
11
+ date: 2018-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -139,6 +139,7 @@ files:
139
139
  - LICENSE.txt
140
140
  - README.md
141
141
  - Rakefile
142
+ - _config.yml
142
143
  - lib/sequel/plugins/elasticsearch.rb
143
144
  - lib/sequel/plugins/elasticsearch/result.rb
144
145
  - lib/sequel/plugins/elasticsearch/version.rb