sequel-elasticsearch 0.4.7 → 0.4.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +9 -5
- data/Gemfile +2 -0
- data/Gemfile.ci +1 -0
- data/README.md +22 -0
- data/Rakefile +2 -0
- data/lib/sequel/plugins/elasticsearch.rb +44 -33
- data/lib/sequel/plugins/elasticsearch/result.rb +7 -0
- data/lib/sequel/plugins/elasticsearch/version.rb +3 -1
- data/sequel-elasticsearch.gemspec +8 -4
- metadata +55 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37a0e42fb1cc78e626eeb7b83b9054bd932ffe60de8bd02cbbb99f8d2997c190
|
4
|
+
data.tar.gz: a1cd9bc96071fa402aeb274269f8f9ac5aacc122153e5f6660d1317ffe0d88ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bed637128c79e6db4afd8feaba9d24bcd5c09386e4bed7d0a94d31e61a7a6319d0b0c51e793597b0a00857dd4663ef689815e083f4026bbfa723aa32c834bc7a
|
7
|
+
data.tar.gz: f6edecdb9f05ad6cfa920e388c3c58551370259c3c4700a31f4e9fe4ac7d4eef41fc44394ed9157cccc3fbc14189bdc87df71be9be9ec157f12be2c3f70a0d6f
|
data/.travis.yml
CHANGED
@@ -1,22 +1,26 @@
|
|
1
|
-
|
1
|
+
os:
|
2
|
+
- linux
|
3
|
+
dist: xenial
|
4
|
+
services:
|
5
|
+
- elasticsearch
|
2
6
|
language: ruby
|
3
7
|
rvm:
|
4
|
-
- 2.3
|
5
8
|
- 2.4
|
6
9
|
- 2.5
|
7
10
|
- 2.6
|
11
|
+
- 2.7
|
8
12
|
gemfile: Gemfile.ci
|
9
13
|
env:
|
10
14
|
global:
|
11
15
|
- CC_TEST_REPORTER_ID=f35fd15664b071c621a239733eb3b063caf333ac7e795d3a20690942f64caf62
|
12
16
|
before_install:
|
13
|
-
- gem install bundler
|
17
|
+
- gem install bundler
|
14
18
|
before_script:
|
15
|
-
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64
|
16
|
-
> ./cc-test-reporter
|
19
|
+
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
17
20
|
- chmod +x ./cc-test-reporter
|
18
21
|
- "./cc-test-reporter before-build"
|
19
22
|
- export TZ=Africa/Johannesburg
|
23
|
+
- sleep 10
|
20
24
|
after_script:
|
21
25
|
- "./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT"
|
22
26
|
deploy:
|
data/Gemfile
CHANGED
data/Gemfile.ci
CHANGED
data/README.md
CHANGED
@@ -62,6 +62,12 @@ Sequel::Model.plugin :elasticsearch,
|
|
62
62
|
|
63
63
|
And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
|
64
64
|
|
65
|
+
### Indexing
|
66
|
+
|
67
|
+
Ensure that you create the [index mappings](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html) for your data before using this plugin, otherwise you might get some weird results.
|
68
|
+
|
69
|
+
The records will by default be indexed using the `values` call of the model. Should you need to customize what's indexed, you can define a `indexed_values` method (or `as_indexed_json` method if you prefer the Rails way).
|
70
|
+
|
65
71
|
### Searching
|
66
72
|
|
67
73
|
Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
|
@@ -105,6 +111,22 @@ while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
|
|
105
111
|
end
|
106
112
|
```
|
107
113
|
|
114
|
+
### Import
|
115
|
+
|
116
|
+
You can import the whole dataset, or specify a dataset to be imported. This will create a new, timestamped index for your dataset, and import all the records from that dataset into the index. An alias will be created (or updated) to point to the newly created index.
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
Document.import! # Import all the Document records. Use the default settings.
|
120
|
+
|
121
|
+
Document.import!(dataset: Document.where(active: true)) # Import all the active Document records
|
122
|
+
|
123
|
+
Document.import!(
|
124
|
+
index: 'active-documents', # Use the active-documents index
|
125
|
+
dataset: Document.where(active: true), # Only index active documents
|
126
|
+
batch_size: 20 # Send documents to Elasticsearch in batches of 20 records
|
127
|
+
)
|
128
|
+
```
|
129
|
+
|
108
130
|
## Development
|
109
131
|
|
110
132
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/Rakefile
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'elasticsearch'
|
2
4
|
require 'sequel/plugins/elasticsearch/result'
|
3
5
|
|
@@ -16,17 +18,14 @@ module Sequel
|
|
16
18
|
module Elasticsearch
|
17
19
|
# Apply the plugin to the specified model
|
18
20
|
def self.apply(model, _opts = OPTS)
|
19
|
-
model.instance_variable_set(:@elasticsearch_opts, {})
|
20
|
-
model.instance_variable_set(:@elasticsearch_index, nil)
|
21
|
-
model.instance_variable_set(:@elasticsearch_type, '_doc')
|
22
21
|
model
|
23
22
|
end
|
24
23
|
|
25
24
|
# Configure the plugin
|
26
25
|
def self.configure(model, opts = OPTS)
|
27
26
|
model.elasticsearch_opts = opts[:elasticsearch] || {}
|
28
|
-
model.elasticsearch_index = (opts[:index] || model.table_name).to_sym
|
29
|
-
model.elasticsearch_type =
|
27
|
+
model.elasticsearch_index = (opts[:index] || model.table_name.to_s.downcase).to_sym
|
28
|
+
model.elasticsearch_type = opts[:type]&.to_sym
|
30
29
|
model
|
31
30
|
end
|
32
31
|
|
@@ -59,7 +58,7 @@ module Sequel
|
|
59
58
|
scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
|
60
59
|
return nil unless scroll_id
|
61
60
|
|
62
|
-
Result.new es_client.scroll(
|
61
|
+
Result.new es_client.scroll(body: scroll_id, scroll: duration), self
|
63
62
|
end
|
64
63
|
|
65
64
|
# Execute a search or a scroll on the Model's Elasticsearch index.
|
@@ -88,27 +87,30 @@ module Sequel
|
|
88
87
|
# Use the +reindex!+ method to create a completely new index and alias.
|
89
88
|
def import!(index: nil, dataset: nil, batch_size: 100)
|
90
89
|
dataset ||= self.dataset
|
91
|
-
index_name = index || last_index
|
90
|
+
index_name = index || last_index || elasticsearch_index
|
92
91
|
|
93
92
|
# Index all the documents
|
94
93
|
body = []
|
95
|
-
dataset.
|
96
|
-
body
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
data: { doc: row.indexed_values, doc_as_upsert: true }
|
102
|
-
}
|
103
|
-
}
|
104
|
-
print '.'
|
105
|
-
next unless body.count >= batch_size
|
94
|
+
dataset.each_page(batch_size) do |ds|
|
95
|
+
body = []
|
96
|
+
ds.all.each do |row|
|
97
|
+
print '.'
|
98
|
+
body << { update: import_object(index_name, row) }
|
99
|
+
end
|
106
100
|
puts '/'
|
107
|
-
|
108
101
|
es_client.bulk body: body
|
109
|
-
body =
|
102
|
+
body = nil
|
110
103
|
end
|
111
|
-
|
104
|
+
end
|
105
|
+
|
106
|
+
def import_object(idx, row)
|
107
|
+
val = {
|
108
|
+
_index: idx,
|
109
|
+
_id: row.document_id,
|
110
|
+
data: { doc: row.as_indexed_json, doc_as_upsert: true }
|
111
|
+
}
|
112
|
+
val[:_type] = elasticsearch_type if elasticsearch_type
|
113
|
+
val
|
112
114
|
end
|
113
115
|
|
114
116
|
# Creates a new index in Elasticsearch from the specified dataset, as
|
@@ -123,6 +125,7 @@ module Sequel
|
|
123
125
|
alias_index(index_name)
|
124
126
|
end
|
125
127
|
|
128
|
+
# Remove previous aliases and point the `elasticsearch_index` to the new index.
|
126
129
|
def alias_index(new_index)
|
127
130
|
es_client.indices.update_aliases body: {
|
128
131
|
actions: [
|
@@ -135,24 +138,30 @@ module Sequel
|
|
135
138
|
# Find the last created index that matches the specified index name.
|
136
139
|
def last_index
|
137
140
|
es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
|
141
|
+
rescue ::Elasticsearch::Transport::Transport::Errors::NotFound
|
142
|
+
nil
|
138
143
|
end
|
139
144
|
|
140
|
-
# Generate a timestamped index name
|
141
|
-
# This will use the
|
142
|
-
# index names like this:
|
143
|
-
#
|
144
|
-
# base-name-staging-20191004.123456 # This is a staging index
|
145
|
-
# base-name-20191005.171213 # This is a production index
|
145
|
+
# Generate a timestamped index name.
|
146
|
+
# This will use the current timestamp to construct index names like this:
|
146
147
|
#
|
148
|
+
# base-name-20191004.123456
|
147
149
|
def timestamped_index
|
148
|
-
time_str = Time.now.strftime('%Y%m%d.%H%M%S')
|
149
|
-
|
150
|
-
[elasticsearch_index, env_str, time_str].compact.join('-')
|
150
|
+
time_str = Time.now.strftime('%Y%m%d.%H%M%S') # TODO: Make the format configurable
|
151
|
+
"#{elasticsearch_index}-#{time_str}".to_sym
|
151
152
|
end
|
152
153
|
end
|
153
154
|
|
154
155
|
# The instance methods that will be added to the Sequel::Model
|
155
156
|
module InstanceMethods
|
157
|
+
def elasticsearch_index
|
158
|
+
self.class.elasticsearch_index
|
159
|
+
end
|
160
|
+
|
161
|
+
def elasticsearch_type
|
162
|
+
self.class.elasticsearch_type
|
163
|
+
end
|
164
|
+
|
156
165
|
# Sequel::Model after_create hook to add the new record to the Elasticsearch index.
|
157
166
|
# It's "safe" in that it won't raise an error if it fails.
|
158
167
|
def after_create
|
@@ -179,6 +188,8 @@ module Sequel
|
|
179
188
|
self.class.es_client
|
180
189
|
end
|
181
190
|
|
191
|
+
# Mirror the Elasticsearch Rails plugin. Use this to override what data
|
192
|
+
# is sent to Elasticsearch
|
182
193
|
def as_indexed_json
|
183
194
|
indexed_values
|
184
195
|
end
|
@@ -192,7 +203,7 @@ module Sequel
|
|
192
203
|
# Create or update the document on the Elasticsearch cluster.
|
193
204
|
def index_document(opts = {})
|
194
205
|
params = document_path(opts)
|
195
|
-
params[:body] =
|
206
|
+
params[:body] = as_indexed_json
|
196
207
|
es_client.index params
|
197
208
|
end
|
198
209
|
|
@@ -210,8 +221,8 @@ module Sequel
|
|
210
221
|
# Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
|
211
222
|
def document_path(opts = {})
|
212
223
|
{
|
213
|
-
index: opts.delete(:index) ||
|
214
|
-
type: opts.delete(:type) ||
|
224
|
+
index: opts.delete(:index) || elasticsearch_index,
|
225
|
+
type: opts.delete(:type) || elasticsearch_type,
|
215
226
|
id: opts.delete(:id) || document_id
|
216
227
|
}
|
217
228
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Sequel
|
2
4
|
module Plugins
|
3
5
|
module Elasticsearch
|
@@ -42,6 +44,11 @@ module Sequel
|
|
42
44
|
result['hits']['hits'].each { |h| yield h }
|
43
45
|
end
|
44
46
|
|
47
|
+
# Send back the complete result set
|
48
|
+
def all
|
49
|
+
result['hits']['hits']
|
50
|
+
end
|
51
|
+
|
45
52
|
# Send all undefined methods to the +result['hits']['hits']+ array.
|
46
53
|
def method_missing(meth, *args, &block)
|
47
54
|
respond_to_missing?(meth) ? result['hits']['hits'].send(meth, *args, &block) : super
|
@@ -1,5 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
lib = File.expand_path('
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'sequel/plugins/elasticsearch/version'
|
5
6
|
|
@@ -25,9 +26,12 @@ Gem::Specification.new do |spec|
|
|
25
26
|
spec.add_dependency 'sequel', '>= 4.0'
|
26
27
|
|
27
28
|
spec.add_development_dependency 'bundler', '>= 1.13'
|
28
|
-
spec.add_development_dependency 'rake', '~>
|
29
|
+
spec.add_development_dependency 'rake', '~> 12.3.3'
|
29
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
30
|
-
spec.add_development_dependency 'rubocop', '
|
31
|
-
spec.add_development_dependency '
|
31
|
+
spec.add_development_dependency 'rubocop', '>= 0.52'
|
32
|
+
spec.add_development_dependency 'rubocop-rspec', '>= 1.37'
|
33
|
+
spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
|
34
|
+
spec.add_development_dependency 'sqlite3', '~> 1.4'
|
35
|
+
spec.add_development_dependency 'timecop', '>= 0.9'
|
32
36
|
spec.add_development_dependency 'webmock', '~> 3.2'
|
33
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurgens du Toit
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 12.3.3
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 12.3.3
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,16 +84,30 @@ dependencies:
|
|
84
84
|
name: rubocop
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0.52'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0.52'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubocop-rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.37'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.37'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: simplecov
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +115,9 @@ dependencies:
|
|
101
115
|
- - "~>"
|
102
116
|
- !ruby/object:Gem::Version
|
103
117
|
version: '0.15'
|
118
|
+
- - "<"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0.18'
|
104
121
|
type: :development
|
105
122
|
prerelease: false
|
106
123
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -108,6 +125,37 @@ dependencies:
|
|
108
125
|
- - "~>"
|
109
126
|
- !ruby/object:Gem::Version
|
110
127
|
version: '0.15'
|
128
|
+
- - "<"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0.18'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: sqlite3
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - "~>"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '1.4'
|
138
|
+
type: :development
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - "~>"
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '1.4'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: timecop
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ">="
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0.9'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0.9'
|
111
159
|
- !ruby/object:Gem::Dependency
|
112
160
|
name: webmock
|
113
161
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,8 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
211
|
- !ruby/object:Gem::Version
|
164
212
|
version: '0'
|
165
213
|
requirements: []
|
166
|
-
|
167
|
-
rubygems_version: 2.7.7
|
214
|
+
rubygems_version: 3.0.8
|
168
215
|
signing_key:
|
169
216
|
specification_version: 4
|
170
217
|
summary: A plugin for the Sequel gem to sync data to Elasticsearch.
|