sequel-elasticsearch 0.3.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +46 -3
- data/_config.yml +1 -0
- data/lib/sequel/plugins/elasticsearch.rb +44 -3
- data/lib/sequel/plugins/elasticsearch/result.rb +39 -18
- data/lib/sequel/plugins/elasticsearch/version.rb +2 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b72432bc6c37b866fb0959c07d71b0bb90fa8773ad0006fe65250f62e80b48e
|
4
|
+
data.tar.gz: 7eb98c377331877a870449a1e3c1060504c70eb6e7f7cc9905a0ad767406090e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1de1abc8e613aeaea8f256626b5f20c370e91f8bad909be2b9c76988a08868aed16caab5c10066fe70098984c49bf0c280e10398957d3b277fcf7a36c39c1297
|
7
|
+
data.tar.gz: 40005b30e9552ceed6e2d560e843eeec2666f9dd444c36007581a59bc4a6ab94d293172502daf5ed3068cb8229b6673318182c66fed1d841a40c695c37b4b62e
|
data/README.md
CHANGED
@@ -42,11 +42,11 @@ Sequel::Model.plugin :elasticsearch
|
|
42
42
|
Or per model:
|
43
43
|
|
44
44
|
```ruby
|
45
|
-
|
45
|
+
Document.plugin Sequel::Elasticsearch
|
46
46
|
|
47
47
|
# or
|
48
48
|
|
49
|
-
class
|
49
|
+
class Document < Sequel::Model
|
50
50
|
plugin :elasticsearch
|
51
51
|
end
|
52
52
|
```
|
@@ -62,6 +62,49 @@ Sequel::Model.plugin :elasticsearch,
|
|
62
62
|
|
63
63
|
And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
|
64
64
|
|
65
|
+
### Searching
|
66
|
+
|
67
|
+
Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
Document.es('title:Sequel')
|
71
|
+
Document.es('title:Sequel AND body:Elasticsearch')
|
72
|
+
```
|
73
|
+
|
74
|
+
The result from the `es` method is an enumerable containing `Sequel::Model` instances of your model:
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
results = Document.es('title:Sequel')
|
78
|
+
results.each { |e| p e }
|
79
|
+
# Outputs
|
80
|
+
# #<Document @values={:id=>1, :title=>"Sequel", :body=>"Document 1"}>
|
81
|
+
# #<Document @values={:id=>2, :title=>"Sequel", :body=>"Document 2"}>
|
82
|
+
```
|
83
|
+
|
84
|
+
The result also contains the meta info about the Elasticsearch query result:
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
results = Document.es('title:Sequel')
|
88
|
+
p results.count # The number of documents included in this result
|
89
|
+
p results.total # The total number of documents in the index that matches the search
|
90
|
+
p results.timed_out # If the search timed out or not
|
91
|
+
p results.took # How long, in miliseconds the search took
|
92
|
+
```
|
93
|
+
|
94
|
+
You can also use the scroll API to search and fetch large datasets:
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
# Get a dataset that will stay consistent for 5 minutes and extend that time with 1 minute on every iteration
|
98
|
+
scroll = Document.es('test', scroll: '5m')
|
99
|
+
p scroll_id # Outputs the scroll_id for this specific scrolling snapshot
|
100
|
+
puts "Found #{scroll.count} of #{scroll.total} documents"
|
101
|
+
scroll.each { |e| p e }
|
102
|
+
while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
|
103
|
+
puts "Found #{scroll.count} of #{scroll.total} documents"
|
104
|
+
scroll.each { |e| p e }
|
105
|
+
end
|
106
|
+
```
|
107
|
+
|
65
108
|
## Development
|
66
109
|
|
67
110
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -75,7 +118,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/jrgns/
|
|
75
118
|
Features that needs to be built:
|
76
119
|
|
77
120
|
- [x] An `es` method to search through the data on the cluster.
|
78
|
-
- [
|
121
|
+
- [x] Let `es` return an enumerator of `Sequel::Model` instances.
|
79
122
|
- [ ] A rake task to create or suggest mappings for a table.
|
80
123
|
|
81
124
|
## License
|
data/_config.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
theme: jekyll-theme-architect
|
@@ -3,7 +3,16 @@ require 'sequel/plugins/elasticsearch/result'
|
|
3
3
|
|
4
4
|
module Sequel
|
5
5
|
module Plugins
|
6
|
+
# The Sequel::Elasticsearch model plugin
|
7
|
+
#
|
8
|
+
# @example Simple usage
|
9
|
+
#
|
10
|
+
# require 'sequel-elasticsearch'
|
11
|
+
# Document.plugin Sequel::Elasticsearch
|
12
|
+
# Document.es('test')
|
13
|
+
#
|
6
14
|
module Elasticsearch
|
15
|
+
# Apply the plugin to the specified model
|
7
16
|
def self.apply(model, _opts = OPTS)
|
8
17
|
model.instance_variable_set(:@elasticsearch_opts, {})
|
9
18
|
model.instance_variable_set(:@elasticsearch_index, nil)
|
@@ -11,6 +20,7 @@ module Sequel
|
|
11
20
|
model
|
12
21
|
end
|
13
22
|
|
23
|
+
# Configure the plugin
|
14
24
|
def self.configure(model, opts = OPTS)
|
15
25
|
model.elasticsearch_opts = opts[:elasticsearch] || {}
|
16
26
|
model.elasticsearch_index = (opts[:index] || model.table_name).to_sym
|
@@ -18,13 +28,21 @@ module Sequel
|
|
18
28
|
model
|
19
29
|
end
|
20
30
|
|
31
|
+
# The class methods that will be added to the Sequel::Model
|
21
32
|
module ClassMethods
|
22
|
-
|
23
|
-
|
33
|
+
# The extra options that will be passed to the Elasticsearch client.
|
34
|
+
attr_accessor :elasticsearch_opts
|
35
|
+
# The Elasticsearch index to which the documents will be written.
|
36
|
+
attr_accessor :elasticsearch_index
|
37
|
+
# The Elasticsearch type to which the documents will be written.
|
38
|
+
attr_accessor :elasticsearch_type
|
39
|
+
|
40
|
+
# Return the Elasticsearch client used to communicate with the cluster.
|
24
41
|
def es_client
|
25
42
|
@es_client = ::Elasticsearch::Client.new elasticsearch_opts
|
26
43
|
end
|
27
44
|
|
45
|
+
# Execute a search on the Model's Elasticsearch index without catching Errors.
|
28
46
|
def es!(query = '', opts = {})
|
29
47
|
opts = {
|
30
48
|
index: elasticsearch_index,
|
@@ -34,10 +52,20 @@ module Sequel
|
|
34
52
|
Result.new es_client.search(opts), self
|
35
53
|
end
|
36
54
|
|
55
|
+
# Fetch the next page in a scroll without catching Errors.
|
56
|
+
def scroll!(scroll_id, duration)
|
57
|
+
scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
|
58
|
+
return nil unless scroll_id
|
59
|
+
Result.new es_client.scroll(scroll_id: scroll_id, scroll: duration), self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Execute a search or a scroll on the Model's Elasticsearch index.
|
63
|
+
# This method is "safe" in that it will catch the more common Errors.
|
37
64
|
def es(query = '', opts = {})
|
38
|
-
call_es { es!
|
65
|
+
call_es { query.is_a?(Result) ? scroll!(query, opts) : es!(query, opts) }
|
39
66
|
end
|
40
67
|
|
68
|
+
# Wrapper method in which error handling is done for Elasticsearch calls.
|
41
69
|
def call_es
|
42
70
|
yield
|
43
71
|
rescue ::Elasticsearch::Transport::Transport::Errors::NotFound, ::Elasticsearch::Transport::Transport::Error => e
|
@@ -49,34 +77,45 @@ module Sequel
|
|
49
77
|
end
|
50
78
|
end
|
51
79
|
|
80
|
+
# The instance methods that will be added to the Sequel::Model
|
52
81
|
module InstanceMethods
|
82
|
+
# Sequel::Model after_create hook to add the new record to the Elasticsearch index.
|
83
|
+
# It's "safe" in that it won't raise an error if it fails.
|
53
84
|
def after_create
|
54
85
|
super
|
55
86
|
self.class.call_es { index_document }
|
56
87
|
end
|
57
88
|
|
89
|
+
# Sequel::Model after_destroy hook to remove the record from the Elasticsearch index.
|
90
|
+
# It's "safe" in that it won't raise an error if it fails.
|
58
91
|
def after_destroy
|
59
92
|
super
|
60
93
|
self.class.call_es { destroy_document }
|
61
94
|
end
|
62
95
|
|
96
|
+
# Sequel::Model after_update hook to update the record in the Elasticsearch index.
|
97
|
+
# It's "safe" in that it won't raise an error if it fails.
|
63
98
|
def after_update
|
64
99
|
super
|
65
100
|
self.class.call_es { index_document }
|
66
101
|
end
|
67
102
|
|
103
|
+
# Return the Elasticsearch client used to communicate with the cluster.
|
68
104
|
def es_client
|
69
105
|
self.class.es_client
|
70
106
|
end
|
71
107
|
|
72
108
|
private
|
73
109
|
|
110
|
+
# Determine the ID to be used for the document in the Elasticsearch cluster.
|
111
|
+
# It will join the values of a multi field primary key with an underscore.
|
74
112
|
def document_id
|
75
113
|
doc_id = pk
|
76
114
|
doc_id = doc_id.join('_') if doc_id.is_a? Array
|
77
115
|
doc_id
|
78
116
|
end
|
79
117
|
|
118
|
+
# Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
|
80
119
|
def document_path
|
81
120
|
{
|
82
121
|
index: self.class.elasticsearch_index,
|
@@ -85,12 +124,14 @@ module Sequel
|
|
85
124
|
}
|
86
125
|
end
|
87
126
|
|
127
|
+
# Create or update the document on the Elasticsearch cluster.
|
88
128
|
def index_document
|
89
129
|
params = document_path
|
90
130
|
params[:body] = values.each_key { |k| values[k] = values[k].strftime('%FT%T%:z') if values[k].is_a?(Time) }
|
91
131
|
es_client.index params
|
92
132
|
end
|
93
133
|
|
134
|
+
# Remove the document from the Elasticsearch cluster.
|
94
135
|
def destroy_document
|
95
136
|
es_client.delete document_path
|
96
137
|
end
|
@@ -1,38 +1,59 @@
|
|
1
1
|
module Sequel
|
2
2
|
module Plugins
|
3
3
|
module Elasticsearch
|
4
|
+
# A wrapper around Elasticsearch results to make it behave more like a Sequel Dataset.
|
4
5
|
class Result
|
5
6
|
include Enumerable
|
6
7
|
|
7
|
-
|
8
|
+
# The original result returned from the Elasticsearch client
|
9
|
+
attr_reader :result
|
10
|
+
# The scroll id, if set, from the result
|
11
|
+
attr_reader :scroll_id
|
12
|
+
# The total number of documents in the Elasticsearch result
|
13
|
+
attr_reader :total
|
14
|
+
# The time, in miliseconds, the Elasticsearch call took to complete
|
15
|
+
attr_reader :took
|
16
|
+
# If the Elasticsearch call timed out or note
|
17
|
+
attr_reader :timed_out
|
18
|
+
# The model class associated with this result
|
19
|
+
attr_reader :model
|
8
20
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
21
|
+
# Initialize the Result
|
22
|
+
#
|
23
|
+
# * +result+ The result returns from the Elasticsearch client / +.es+ call.
|
24
|
+
# * +model+ The model class on which the results should be applied.
|
25
|
+
def initialize(result, model = nil)
|
26
|
+
return unless result && result['hits']
|
27
|
+
|
28
|
+
@result = result
|
29
|
+
@scroll_id = result['_scroll_id']
|
30
|
+
@total = result['hits']['total']
|
31
|
+
@timed_out = result['timed_out']
|
32
|
+
@took = result['took']
|
16
33
|
@model = model
|
34
|
+
|
35
|
+
result['hits']['hits'] = result['hits']['hits'].map { |h| convert(h) }
|
17
36
|
end
|
18
37
|
|
38
|
+
# Each implementation for the Enumerable. Yield each element in the +result['hits']['hits']+ array.
|
19
39
|
def each
|
20
|
-
return [] unless
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
40
|
+
return [] unless result['hits'] && result['hits']['hits'].count.positive?
|
41
|
+
result['hits']['hits'].each { |h| yield h }
|
42
|
+
end
|
43
|
+
|
44
|
+
# Send all undefined methods to the +result['hits']['hits']+ array.
|
45
|
+
def method_missing(m, *args, &block)
|
46
|
+
respond_to_missing?(m) ? result['hits']['hits'].send(m, *args, &block) : super
|
26
47
|
end
|
27
48
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
49
|
+
# Send all undefined methods to the +result['hits']['hits']+ array.
|
50
|
+
def respond_to_missing?(m, include_private = false)
|
51
|
+
result['hits']['hits'].respond_to?(m, include_private) || super
|
32
52
|
end
|
33
53
|
|
34
54
|
private
|
35
55
|
|
56
|
+
# Convert an Elasticsearch hit to a Sequel::Model
|
36
57
|
def convert(hit)
|
37
58
|
return hit unless model
|
38
59
|
source = hit['_source'].each_with_object({}) { |(k, v), h| h[k.to_sym] = v }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurgens du Toit
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-02-
|
11
|
+
date: 2018-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- LICENSE.txt
|
140
140
|
- README.md
|
141
141
|
- Rakefile
|
142
|
+
- _config.yml
|
142
143
|
- lib/sequel/plugins/elasticsearch.rb
|
143
144
|
- lib/sequel/plugins/elasticsearch/result.rb
|
144
145
|
- lib/sequel/plugins/elasticsearch/version.rb
|