sequel-elasticsearch 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9fedd83674e13bf3111df6c01a4a7c997e39cfd16906ea9849c5e22ca9944d36
4
- data.tar.gz: 3799f9f9eba3821790e96abf8d00016600dc71cb6620c439c6dd82ffb69dd09e
3
+ metadata.gz: 4b72432bc6c37b866fb0959c07d71b0bb90fa8773ad0006fe65250f62e80b48e
4
+ data.tar.gz: 7eb98c377331877a870449a1e3c1060504c70eb6e7f7cc9905a0ad767406090e
5
5
  SHA512:
6
- metadata.gz: 7219a5449f7c7a88f49a71d43ed1cce202e6312ad050cdde494171289aed5ee199628ba7d2cc36b3c28344aff5fdb76a94acea6701cb224cb15062a474aa3994
7
- data.tar.gz: 2db11be8b0cb25339a3399325511a992c69591147b9810978aa03cf2c223f97dc10f7217dbb75720ea6ae82bac2a03ec38c8c5b0451e682959b3eacd448253e3
6
+ metadata.gz: 1de1abc8e613aeaea8f256626b5f20c370e91f8bad909be2b9c76988a08868aed16caab5c10066fe70098984c49bf0c280e10398957d3b277fcf7a36c39c1297
7
+ data.tar.gz: 40005b30e9552ceed6e2d560e843eeec2666f9dd444c36007581a59bc4a6ab94d293172502daf5ed3068cb8229b6673318182c66fed1d841a40c695c37b4b62e
data/README.md CHANGED
@@ -42,11 +42,11 @@ Sequel::Model.plugin :elasticsearch
42
42
  Or per model:
43
43
 
44
44
  ```ruby
45
- Sequel::Model.plugin Sequel::Elasticsearch
45
+ Document.plugin Sequel::Elasticsearch
46
46
 
47
47
  # or
48
48
 
49
- class Node < Sequel::Model
49
+ class Document < Sequel::Model
50
50
  plugin :elasticsearch
51
51
  end
52
52
  ```
@@ -62,6 +62,49 @@ Sequel::Model.plugin :elasticsearch,
62
62
 
63
63
  And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
64
64
 
65
+ ### Searching
66
+
67
+ Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
68
+
69
+ ```ruby
70
+ Document.es('title:Sequel')
71
+ Document.es('title:Sequel AND body:Elasticsearch')
72
+ ```
73
+
74
+ The result from the `es` method is an enumerable containing `Sequel::Model` instances of your model:
75
+
76
+ ```ruby
77
+ results = Document.es('title:Sequel')
78
+ results.each { |e| p e }
79
+ # Outputs
80
+ # #<Document @values={:id=>1, :title=>"Sequel", :body=>"Document 1"}>
81
+ # #<Document @values={:id=>2, :title=>"Sequel", :body=>"Document 2"}>
82
+ ```
83
+
84
+ The result also contains the meta info about the Elasticsearch query result:
85
+
86
+ ```ruby
87
+ results = Document.es('title:Sequel')
88
+ p results.count # The number of documents included in this result
89
+ p results.total # The total number of documents in the index that matches the search
90
+ p results.timed_out # If the search timed out or not
91
+ p results.took # How long, in miliseconds the search took
92
+ ```
93
+
94
+ You can also use the scroll API to search and fetch large datasets:
95
+
96
+ ```ruby
97
+ # Get a dataset that will stay consistent for 5 minutes and extend that time with 1 minute on every iteration
98
+ scroll = Document.es('test', scroll: '5m')
99
+ p scroll_id # Outputs the scroll_id for this specific scrolling snapshot
100
+ puts "Found #{scroll.count} of #{scroll.total} documents"
101
+ scroll.each { |e| p e }
102
+ while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
103
+ puts "Found #{scroll.count} of #{scroll.total} documents"
104
+ scroll.each { |e| p e }
105
+ end
106
+ ```
107
+
65
108
  ## Development
66
109
 
67
110
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -75,7 +118,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/jrgns/
75
118
  Features that needs to be built:
76
119
 
77
120
  - [x] An `es` method to search through the data on the cluster.
78
- - [ ] Let `es` return an enumerator of `Sequel::Model` instances.
121
+ - [x] Let `es` return an enumerator of `Sequel::Model` instances.
79
122
  - [ ] A rake task to create or suggest mappings for a table.
80
123
 
81
124
  ## License
@@ -0,0 +1 @@
1
+ theme: jekyll-theme-architect
@@ -3,7 +3,16 @@ require 'sequel/plugins/elasticsearch/result'
3
3
 
4
4
  module Sequel
5
5
  module Plugins
6
+ # The Sequel::Elasticsearch model plugin
7
+ #
8
+ # @example Simple usage
9
+ #
10
+ # require 'sequel-elasticsearch'
11
+ # Document.plugin Sequel::Elasticsearch
12
+ # Document.es('test')
13
+ #
6
14
  module Elasticsearch
15
+ # Apply the plugin to the specified model
7
16
  def self.apply(model, _opts = OPTS)
8
17
  model.instance_variable_set(:@elasticsearch_opts, {})
9
18
  model.instance_variable_set(:@elasticsearch_index, nil)
@@ -11,6 +20,7 @@ module Sequel
11
20
  model
12
21
  end
13
22
 
23
+ # Configure the plugin
14
24
  def self.configure(model, opts = OPTS)
15
25
  model.elasticsearch_opts = opts[:elasticsearch] || {}
16
26
  model.elasticsearch_index = (opts[:index] || model.table_name).to_sym
@@ -18,13 +28,21 @@ module Sequel
18
28
  model
19
29
  end
20
30
 
31
+ # The class methods that will be added to the Sequel::Model
21
32
  module ClassMethods
22
- attr_accessor :elasticsearch_opts, :elasticsearch_index, :elasticsearch_type
23
-
33
+ # The extra options that will be passed to the Elasticsearch client.
34
+ attr_accessor :elasticsearch_opts
35
+ # The Elasticsearch index to which the documents will be written.
36
+ attr_accessor :elasticsearch_index
37
+ # The Elasticsearch type to which the documents will be written.
38
+ attr_accessor :elasticsearch_type
39
+
40
+ # Return the Elasticsearch client used to communicate with the cluster.
24
41
  def es_client
25
42
  @es_client = ::Elasticsearch::Client.new elasticsearch_opts
26
43
  end
27
44
 
45
+ # Execute a search on the Model's Elasticsearch index without catching Errors.
28
46
  def es!(query = '', opts = {})
29
47
  opts = {
30
48
  index: elasticsearch_index,
@@ -34,10 +52,20 @@ module Sequel
34
52
  Result.new es_client.search(opts), self
35
53
  end
36
54
 
55
+ # Fetch the next page in a scroll without catching Errors.
56
+ def scroll!(scroll_id, duration)
57
+ scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
58
+ return nil unless scroll_id
59
+ Result.new es_client.scroll(scroll_id: scroll_id, scroll: duration), self
60
+ end
61
+
62
+ # Execute a search or a scroll on the Model's Elasticsearch index.
63
+ # This method is "safe" in that it will catch the more common Errors.
37
64
  def es(query = '', opts = {})
38
- call_es { es! query, opts }
65
+ call_es { query.is_a?(Result) ? scroll!(query, opts) : es!(query, opts) }
39
66
  end
40
67
 
68
+ # Wrapper method in which error handling is done for Elasticsearch calls.
41
69
  def call_es
42
70
  yield
43
71
  rescue ::Elasticsearch::Transport::Transport::Errors::NotFound, ::Elasticsearch::Transport::Transport::Error => e
@@ -49,34 +77,45 @@ module Sequel
49
77
  end
50
78
  end
51
79
 
80
+ # The instance methods that will be added to the Sequel::Model
52
81
  module InstanceMethods
82
+ # Sequel::Model after_create hook to add the new record to the Elasticsearch index.
83
+ # It's "safe" in that it won't raise an error if it fails.
53
84
  def after_create
54
85
  super
55
86
  self.class.call_es { index_document }
56
87
  end
57
88
 
89
+ # Sequel::Model after_destroy hook to remove the record from the Elasticsearch index.
90
+ # It's "safe" in that it won't raise an error if it fails.
58
91
  def after_destroy
59
92
  super
60
93
  self.class.call_es { destroy_document }
61
94
  end
62
95
 
96
+ # Sequel::Model after_update hook to update the record in the Elasticsearch index.
97
+ # It's "safe" in that it won't raise an error if it fails.
63
98
  def after_update
64
99
  super
65
100
  self.class.call_es { index_document }
66
101
  end
67
102
 
103
+ # Return the Elasticsearch client used to communicate with the cluster.
68
104
  def es_client
69
105
  self.class.es_client
70
106
  end
71
107
 
72
108
  private
73
109
 
110
+ # Determine the ID to be used for the document in the Elasticsearch cluster.
111
+ # It will join the values of a multi field primary key with an underscore.
74
112
  def document_id
75
113
  doc_id = pk
76
114
  doc_id = doc_id.join('_') if doc_id.is_a? Array
77
115
  doc_id
78
116
  end
79
117
 
118
+ # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
80
119
  def document_path
81
120
  {
82
121
  index: self.class.elasticsearch_index,
@@ -85,12 +124,14 @@ module Sequel
85
124
  }
86
125
  end
87
126
 
127
+ # Create or update the document on the Elasticsearch cluster.
88
128
  def index_document
89
129
  params = document_path
90
130
  params[:body] = values.each_key { |k| values[k] = values[k].strftime('%FT%T%:z') if values[k].is_a?(Time) }
91
131
  es_client.index params
92
132
  end
93
133
 
134
+ # Remove the document from the Elasticsearch cluster.
94
135
  def destroy_document
95
136
  es_client.delete document_path
96
137
  end
@@ -1,38 +1,59 @@
1
1
  module Sequel
2
2
  module Plugins
3
3
  module Elasticsearch
4
+ # A wrapper around Elasticsearch results to make it behave more like a Sequel Dataset.
4
5
  class Result
5
6
  include Enumerable
6
7
 
7
- attr_reader :results, :scroll_id, :total, :took, :timed_out, :model
8
+ # The original result returned from the Elasticsearch client
9
+ attr_reader :result
10
+ # The scroll id, if set, from the result
11
+ attr_reader :scroll_id
12
+ # The total number of documents in the Elasticsearch result
13
+ attr_reader :total
14
+ # The time, in miliseconds, the Elasticsearch call took to complete
15
+ attr_reader :took
16
+ # If the Elasticsearch call timed out or note
17
+ attr_reader :timed_out
18
+ # The model class associated with this result
19
+ attr_reader :model
8
20
 
9
- def initialize(results, model = nil)
10
- return unless results && results['hits']
11
- @results = results
12
- @scroll_id = results['_scroll_id']
13
- @total = results['hits']['total']
14
- @timed_out = results['timed_out']
15
- @took = results['took']
21
+ # Initialize the Result
22
+ #
23
+ # * +result+ The result returns from the Elasticsearch client / +.es+ call.
24
+ # * +model+ The model class on which the results should be applied.
25
+ def initialize(result, model = nil)
26
+ return unless result && result['hits']
27
+
28
+ @result = result
29
+ @scroll_id = result['_scroll_id']
30
+ @total = result['hits']['total']
31
+ @timed_out = result['timed_out']
32
+ @took = result['took']
16
33
  @model = model
34
+
35
+ result['hits']['hits'] = result['hits']['hits'].map { |h| convert(h) }
17
36
  end
18
37
 
38
+ # Each implementation for the Enumerable. Yield each element in the +result['hits']['hits']+ array.
19
39
  def each
20
- return [] unless results['hits'] && results['hits']['hits']
21
- results['hits']['hits'].each do |h|
22
- yield convert(h)
23
- end
24
- # TODO: Use the scroll id to get more if needed
25
- # We will need access to the client, somehow...
40
+ return [] unless result['hits'] && result['hits']['hits'].count.positive?
41
+ result['hits']['hits'].each { |h| yield h }
42
+ end
43
+
44
+ # Send all undefined methods to the +result['hits']['hits']+ array.
45
+ def method_missing(m, *args, &block)
46
+ respond_to_missing?(m) ? result['hits']['hits'].send(m, *args, &block) : super
26
47
  end
27
48
 
28
- def all
29
- results['hits']['hits'].map do |h|
30
- convert(h)
31
- end
49
+ # Send all undefined methods to the +result['hits']['hits']+ array.
50
+ def respond_to_missing?(m, include_private = false)
51
+ result['hits']['hits'].respond_to?(m, include_private) || super
32
52
  end
33
53
 
34
54
  private
35
55
 
56
+ # Convert an Elasticsearch hit to a Sequel::Model
36
57
  def convert(hit)
37
58
  return hit unless model
38
59
  source = hit['_source'].each_with_object({}) { |(k, v), h| h[k.to_sym] = v }
@@ -1,5 +1,6 @@
1
1
  module Sequel
2
2
  module Elasticsearch
3
- VERSION = '0.3.3'.freeze
3
+ # The Gem's version.
4
+ VERSION = '0.4.0'.freeze
4
5
  end
5
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequel-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurgens du Toit
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-02-08 00:00:00.000000000 Z
11
+ date: 2018-02-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -139,6 +139,7 @@ files:
139
139
  - LICENSE.txt
140
140
  - README.md
141
141
  - Rakefile
142
+ - _config.yml
142
143
  - lib/sequel/plugins/elasticsearch.rb
143
144
  - lib/sequel/plugins/elasticsearch/result.rb
144
145
  - lib/sequel/plugins/elasticsearch/version.rb