sequel-elasticsearch 0.3.3 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +46 -3
- data/_config.yml +1 -0
- data/lib/sequel/plugins/elasticsearch.rb +44 -3
- data/lib/sequel/plugins/elasticsearch/result.rb +39 -18
- data/lib/sequel/plugins/elasticsearch/version.rb +2 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4b72432bc6c37b866fb0959c07d71b0bb90fa8773ad0006fe65250f62e80b48e
|
4
|
+
data.tar.gz: 7eb98c377331877a870449a1e3c1060504c70eb6e7f7cc9905a0ad767406090e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1de1abc8e613aeaea8f256626b5f20c370e91f8bad909be2b9c76988a08868aed16caab5c10066fe70098984c49bf0c280e10398957d3b277fcf7a36c39c1297
|
7
|
+
data.tar.gz: 40005b30e9552ceed6e2d560e843eeec2666f9dd444c36007581a59bc4a6ab94d293172502daf5ed3068cb8229b6673318182c66fed1d841a40c695c37b4b62e
|
data/README.md
CHANGED
@@ -42,11 +42,11 @@ Sequel::Model.plugin :elasticsearch
|
|
42
42
|
Or per model:
|
43
43
|
|
44
44
|
```ruby
|
45
|
-
|
45
|
+
Document.plugin Sequel::Elasticsearch
|
46
46
|
|
47
47
|
# or
|
48
48
|
|
49
|
-
class
|
49
|
+
class Document < Sequel::Model
|
50
50
|
plugin :elasticsearch
|
51
51
|
end
|
52
52
|
```
|
@@ -62,6 +62,49 @@ Sequel::Model.plugin :elasticsearch,
|
|
62
62
|
|
63
63
|
And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
|
64
64
|
|
65
|
+
### Searching
|
66
|
+
|
67
|
+
Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
Document.es('title:Sequel')
|
71
|
+
Document.es('title:Sequel AND body:Elasticsearch')
|
72
|
+
```
|
73
|
+
|
74
|
+
The result from the `es` method is an enumerable containing `Sequel::Model` instances of your model:
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
results = Document.es('title:Sequel')
|
78
|
+
results.each { |e| p e }
|
79
|
+
# Outputs
|
80
|
+
# #<Document @values={:id=>1, :title=>"Sequel", :body=>"Document 1"}>
|
81
|
+
# #<Document @values={:id=>2, :title=>"Sequel", :body=>"Document 2"}>
|
82
|
+
```
|
83
|
+
|
84
|
+
The result also contains the meta info about the Elasticsearch query result:
|
85
|
+
|
86
|
+
```ruby
|
87
|
+
results = Document.es('title:Sequel')
|
88
|
+
p results.count # The number of documents included in this result
|
89
|
+
p results.total # The total number of documents in the index that matches the search
|
90
|
+
p results.timed_out # If the search timed out or not
|
91
|
+
p results.took # How long, in miliseconds the search took
|
92
|
+
```
|
93
|
+
|
94
|
+
You can also use the scroll API to search and fetch large datasets:
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
# Get a dataset that will stay consistent for 5 minutes and extend that time with 1 minute on every iteration
|
98
|
+
scroll = Document.es('test', scroll: '5m')
|
99
|
+
p scroll_id # Outputs the scroll_id for this specific scrolling snapshot
|
100
|
+
puts "Found #{scroll.count} of #{scroll.total} documents"
|
101
|
+
scroll.each { |e| p e }
|
102
|
+
while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
|
103
|
+
puts "Found #{scroll.count} of #{scroll.total} documents"
|
104
|
+
scroll.each { |e| p e }
|
105
|
+
end
|
106
|
+
```
|
107
|
+
|
65
108
|
## Development
|
66
109
|
|
67
110
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -75,7 +118,7 @@ Bug reports and pull requests are welcome on GitHub at https://github.com/jrgns/
|
|
75
118
|
Features that needs to be built:
|
76
119
|
|
77
120
|
- [x] An `es` method to search through the data on the cluster.
|
78
|
-
- [
|
121
|
+
- [x] Let `es` return an enumerator of `Sequel::Model` instances.
|
79
122
|
- [ ] A rake task to create or suggest mappings for a table.
|
80
123
|
|
81
124
|
## License
|
data/_config.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
theme: jekyll-theme-architect
|
@@ -3,7 +3,16 @@ require 'sequel/plugins/elasticsearch/result'
|
|
3
3
|
|
4
4
|
module Sequel
|
5
5
|
module Plugins
|
6
|
+
# The Sequel::Elasticsearch model plugin
|
7
|
+
#
|
8
|
+
# @example Simple usage
|
9
|
+
#
|
10
|
+
# require 'sequel-elasticsearch'
|
11
|
+
# Document.plugin Sequel::Elasticsearch
|
12
|
+
# Document.es('test')
|
13
|
+
#
|
6
14
|
module Elasticsearch
|
15
|
+
# Apply the plugin to the specified model
|
7
16
|
def self.apply(model, _opts = OPTS)
|
8
17
|
model.instance_variable_set(:@elasticsearch_opts, {})
|
9
18
|
model.instance_variable_set(:@elasticsearch_index, nil)
|
@@ -11,6 +20,7 @@ module Sequel
|
|
11
20
|
model
|
12
21
|
end
|
13
22
|
|
23
|
+
# Configure the plugin
|
14
24
|
def self.configure(model, opts = OPTS)
|
15
25
|
model.elasticsearch_opts = opts[:elasticsearch] || {}
|
16
26
|
model.elasticsearch_index = (opts[:index] || model.table_name).to_sym
|
@@ -18,13 +28,21 @@ module Sequel
|
|
18
28
|
model
|
19
29
|
end
|
20
30
|
|
31
|
+
# The class methods that will be added to the Sequel::Model
|
21
32
|
module ClassMethods
|
22
|
-
|
23
|
-
|
33
|
+
# The extra options that will be passed to the Elasticsearch client.
|
34
|
+
attr_accessor :elasticsearch_opts
|
35
|
+
# The Elasticsearch index to which the documents will be written.
|
36
|
+
attr_accessor :elasticsearch_index
|
37
|
+
# The Elasticsearch type to which the documents will be written.
|
38
|
+
attr_accessor :elasticsearch_type
|
39
|
+
|
40
|
+
# Return the Elasticsearch client used to communicate with the cluster.
|
24
41
|
def es_client
|
25
42
|
@es_client = ::Elasticsearch::Client.new elasticsearch_opts
|
26
43
|
end
|
27
44
|
|
45
|
+
# Execute a search on the Model's Elasticsearch index without catching Errors.
|
28
46
|
def es!(query = '', opts = {})
|
29
47
|
opts = {
|
30
48
|
index: elasticsearch_index,
|
@@ -34,10 +52,20 @@ module Sequel
|
|
34
52
|
Result.new es_client.search(opts), self
|
35
53
|
end
|
36
54
|
|
55
|
+
# Fetch the next page in a scroll without catching Errors.
|
56
|
+
def scroll!(scroll_id, duration)
|
57
|
+
scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
|
58
|
+
return nil unless scroll_id
|
59
|
+
Result.new es_client.scroll(scroll_id: scroll_id, scroll: duration), self
|
60
|
+
end
|
61
|
+
|
62
|
+
# Execute a search or a scroll on the Model's Elasticsearch index.
|
63
|
+
# This method is "safe" in that it will catch the more common Errors.
|
37
64
|
def es(query = '', opts = {})
|
38
|
-
call_es { es!
|
65
|
+
call_es { query.is_a?(Result) ? scroll!(query, opts) : es!(query, opts) }
|
39
66
|
end
|
40
67
|
|
68
|
+
# Wrapper method in which error handling is done for Elasticsearch calls.
|
41
69
|
def call_es
|
42
70
|
yield
|
43
71
|
rescue ::Elasticsearch::Transport::Transport::Errors::NotFound, ::Elasticsearch::Transport::Transport::Error => e
|
@@ -49,34 +77,45 @@ module Sequel
|
|
49
77
|
end
|
50
78
|
end
|
51
79
|
|
80
|
+
# The instance methods that will be added to the Sequel::Model
|
52
81
|
module InstanceMethods
|
82
|
+
# Sequel::Model after_create hook to add the new record to the Elasticsearch index.
|
83
|
+
# It's "safe" in that it won't raise an error if it fails.
|
53
84
|
def after_create
|
54
85
|
super
|
55
86
|
self.class.call_es { index_document }
|
56
87
|
end
|
57
88
|
|
89
|
+
# Sequel::Model after_destroy hook to remove the record from the Elasticsearch index.
|
90
|
+
# It's "safe" in that it won't raise an error if it fails.
|
58
91
|
def after_destroy
|
59
92
|
super
|
60
93
|
self.class.call_es { destroy_document }
|
61
94
|
end
|
62
95
|
|
96
|
+
# Sequel::Model after_update hook to update the record in the Elasticsearch index.
|
97
|
+
# It's "safe" in that it won't raise an error if it fails.
|
63
98
|
def after_update
|
64
99
|
super
|
65
100
|
self.class.call_es { index_document }
|
66
101
|
end
|
67
102
|
|
103
|
+
# Return the Elasticsearch client used to communicate with the cluster.
|
68
104
|
def es_client
|
69
105
|
self.class.es_client
|
70
106
|
end
|
71
107
|
|
72
108
|
private
|
73
109
|
|
110
|
+
# Determine the ID to be used for the document in the Elasticsearch cluster.
|
111
|
+
# It will join the values of a multi field primary key with an underscore.
|
74
112
|
def document_id
|
75
113
|
doc_id = pk
|
76
114
|
doc_id = doc_id.join('_') if doc_id.is_a? Array
|
77
115
|
doc_id
|
78
116
|
end
|
79
117
|
|
118
|
+
# Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
|
80
119
|
def document_path
|
81
120
|
{
|
82
121
|
index: self.class.elasticsearch_index,
|
@@ -85,12 +124,14 @@ module Sequel
|
|
85
124
|
}
|
86
125
|
end
|
87
126
|
|
127
|
+
# Create or update the document on the Elasticsearch cluster.
|
88
128
|
def index_document
|
89
129
|
params = document_path
|
90
130
|
params[:body] = values.each_key { |k| values[k] = values[k].strftime('%FT%T%:z') if values[k].is_a?(Time) }
|
91
131
|
es_client.index params
|
92
132
|
end
|
93
133
|
|
134
|
+
# Remove the document from the Elasticsearch cluster.
|
94
135
|
def destroy_document
|
95
136
|
es_client.delete document_path
|
96
137
|
end
|
@@ -1,38 +1,59 @@
|
|
1
1
|
module Sequel
|
2
2
|
module Plugins
|
3
3
|
module Elasticsearch
|
4
|
+
# A wrapper around Elasticsearch results to make it behave more like a Sequel Dataset.
|
4
5
|
class Result
|
5
6
|
include Enumerable
|
6
7
|
|
7
|
-
|
8
|
+
# The original result returned from the Elasticsearch client
|
9
|
+
attr_reader :result
|
10
|
+
# The scroll id, if set, from the result
|
11
|
+
attr_reader :scroll_id
|
12
|
+
# The total number of documents in the Elasticsearch result
|
13
|
+
attr_reader :total
|
14
|
+
# The time, in miliseconds, the Elasticsearch call took to complete
|
15
|
+
attr_reader :took
|
16
|
+
# If the Elasticsearch call timed out or note
|
17
|
+
attr_reader :timed_out
|
18
|
+
# The model class associated with this result
|
19
|
+
attr_reader :model
|
8
20
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
21
|
+
# Initialize the Result
|
22
|
+
#
|
23
|
+
# * +result+ The result returns from the Elasticsearch client / +.es+ call.
|
24
|
+
# * +model+ The model class on which the results should be applied.
|
25
|
+
def initialize(result, model = nil)
|
26
|
+
return unless result && result['hits']
|
27
|
+
|
28
|
+
@result = result
|
29
|
+
@scroll_id = result['_scroll_id']
|
30
|
+
@total = result['hits']['total']
|
31
|
+
@timed_out = result['timed_out']
|
32
|
+
@took = result['took']
|
16
33
|
@model = model
|
34
|
+
|
35
|
+
result['hits']['hits'] = result['hits']['hits'].map { |h| convert(h) }
|
17
36
|
end
|
18
37
|
|
38
|
+
# Each implementation for the Enumerable. Yield each element in the +result['hits']['hits']+ array.
|
19
39
|
def each
|
20
|
-
return [] unless
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
40
|
+
return [] unless result['hits'] && result['hits']['hits'].count.positive?
|
41
|
+
result['hits']['hits'].each { |h| yield h }
|
42
|
+
end
|
43
|
+
|
44
|
+
# Send all undefined methods to the +result['hits']['hits']+ array.
|
45
|
+
def method_missing(m, *args, &block)
|
46
|
+
respond_to_missing?(m) ? result['hits']['hits'].send(m, *args, &block) : super
|
26
47
|
end
|
27
48
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
end
|
49
|
+
# Send all undefined methods to the +result['hits']['hits']+ array.
|
50
|
+
def respond_to_missing?(m, include_private = false)
|
51
|
+
result['hits']['hits'].respond_to?(m, include_private) || super
|
32
52
|
end
|
33
53
|
|
34
54
|
private
|
35
55
|
|
56
|
+
# Convert an Elasticsearch hit to a Sequel::Model
|
36
57
|
def convert(hit)
|
37
58
|
return hit unless model
|
38
59
|
source = hit['_source'].each_with_object({}) { |(k, v), h| h[k.to_sym] = v }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurgens du Toit
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-02-
|
11
|
+
date: 2018-02-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch
|
@@ -139,6 +139,7 @@ files:
|
|
139
139
|
- LICENSE.txt
|
140
140
|
- README.md
|
141
141
|
- Rakefile
|
142
|
+
- _config.yml
|
142
143
|
- lib/sequel/plugins/elasticsearch.rb
|
143
144
|
- lib/sequel/plugins/elasticsearch/result.rb
|
144
145
|
- lib/sequel/plugins/elasticsearch/version.rb
|