sequel-elasticsearch 0.4.7 → 0.4.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +9 -5
- data/Gemfile +2 -0
- data/Gemfile.ci +1 -0
- data/README.md +22 -0
- data/Rakefile +2 -0
- data/lib/sequel/plugins/elasticsearch.rb +44 -33
- data/lib/sequel/plugins/elasticsearch/result.rb +7 -0
- data/lib/sequel/plugins/elasticsearch/version.rb +3 -1
- data/sequel-elasticsearch.gemspec +8 -4
- metadata +55 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 37a0e42fb1cc78e626eeb7b83b9054bd932ffe60de8bd02cbbb99f8d2997c190
|
4
|
+
data.tar.gz: a1cd9bc96071fa402aeb274269f8f9ac5aacc122153e5f6660d1317ffe0d88ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bed637128c79e6db4afd8feaba9d24bcd5c09386e4bed7d0a94d31e61a7a6319d0b0c51e793597b0a00857dd4663ef689815e083f4026bbfa723aa32c834bc7a
|
7
|
+
data.tar.gz: f6edecdb9f05ad6cfa920e388c3c58551370259c3c4700a31f4e9fe4ac7d4eef41fc44394ed9157cccc3fbc14189bdc87df71be9be9ec157f12be2c3f70a0d6f
|
data/.travis.yml
CHANGED
@@ -1,22 +1,26 @@
|
|
1
|
-
|
1
|
+
os:
|
2
|
+
- linux
|
3
|
+
dist: xenial
|
4
|
+
services:
|
5
|
+
- elasticsearch
|
2
6
|
language: ruby
|
3
7
|
rvm:
|
4
|
-
- 2.3
|
5
8
|
- 2.4
|
6
9
|
- 2.5
|
7
10
|
- 2.6
|
11
|
+
- 2.7
|
8
12
|
gemfile: Gemfile.ci
|
9
13
|
env:
|
10
14
|
global:
|
11
15
|
- CC_TEST_REPORTER_ID=f35fd15664b071c621a239733eb3b063caf333ac7e795d3a20690942f64caf62
|
12
16
|
before_install:
|
13
|
-
- gem install bundler
|
17
|
+
- gem install bundler
|
14
18
|
before_script:
|
15
|
-
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64
|
16
|
-
> ./cc-test-reporter
|
19
|
+
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
17
20
|
- chmod +x ./cc-test-reporter
|
18
21
|
- "./cc-test-reporter before-build"
|
19
22
|
- export TZ=Africa/Johannesburg
|
23
|
+
- sleep 10
|
20
24
|
after_script:
|
21
25
|
- "./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT"
|
22
26
|
deploy:
|
data/Gemfile
CHANGED
data/Gemfile.ci
CHANGED
data/README.md
CHANGED
@@ -62,6 +62,12 @@ Sequel::Model.plugin :elasticsearch,
|
|
62
62
|
|
63
63
|
And that's it! Just transact as you normally would, and your records will be created and updated in the Elasticsearch cluster.
|
64
64
|
|
65
|
+
### Indexing
|
66
|
+
|
67
|
+
Ensure that you create the [index mappings](https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping.html) for your data before using this plugin, otherwise you might get some weird results.
|
68
|
+
|
69
|
+
The records will by default be indexed using the `values` call of the model. Should you need to customize what's indexed, you can define a `indexed_values` method (or `as_indexed_json` method if you prefer the Rails way).
|
70
|
+
|
65
71
|
### Searching
|
66
72
|
|
67
73
|
Your model is now searchable through Elasticsearch. Just pass down a string that's parsable as a [query string query](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html).
|
@@ -105,6 +111,22 @@ while (scroll = Document.es(scroll, scroll: '1m')) && scroll.empty? == false do
|
|
105
111
|
end
|
106
112
|
```
|
107
113
|
|
114
|
+
### Import
|
115
|
+
|
116
|
+
You can import the whole dataset, or specify a dataset to be imported. This will create a new, timestamped index for your dataset, and import all the records from that dataset into the index. An alias will be created (or updated) to point to the newly created index.
|
117
|
+
|
118
|
+
```ruby
|
119
|
+
Document.import! # Import all the Document records. Use the default settings.
|
120
|
+
|
121
|
+
Document.import!(dataset: Document.where(active: true)) # Import all the active Document records
|
122
|
+
|
123
|
+
Document.import!(
|
124
|
+
index: 'active-documents', # Use the active-documents index
|
125
|
+
dataset: Document.where(active: true), # Only index active documents
|
126
|
+
batch_size: 20 # Send documents to Elasticsearch in batches of 20 records
|
127
|
+
)
|
128
|
+
```
|
129
|
+
|
108
130
|
## Development
|
109
131
|
|
110
132
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/Rakefile
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'elasticsearch'
|
2
4
|
require 'sequel/plugins/elasticsearch/result'
|
3
5
|
|
@@ -16,17 +18,14 @@ module Sequel
|
|
16
18
|
module Elasticsearch
|
17
19
|
# Apply the plugin to the specified model
|
18
20
|
def self.apply(model, _opts = OPTS)
|
19
|
-
model.instance_variable_set(:@elasticsearch_opts, {})
|
20
|
-
model.instance_variable_set(:@elasticsearch_index, nil)
|
21
|
-
model.instance_variable_set(:@elasticsearch_type, '_doc')
|
22
21
|
model
|
23
22
|
end
|
24
23
|
|
25
24
|
# Configure the plugin
|
26
25
|
def self.configure(model, opts = OPTS)
|
27
26
|
model.elasticsearch_opts = opts[:elasticsearch] || {}
|
28
|
-
model.elasticsearch_index = (opts[:index] || model.table_name).to_sym
|
29
|
-
model.elasticsearch_type =
|
27
|
+
model.elasticsearch_index = (opts[:index] || model.table_name.to_s.downcase).to_sym
|
28
|
+
model.elasticsearch_type = opts[:type]&.to_sym
|
30
29
|
model
|
31
30
|
end
|
32
31
|
|
@@ -59,7 +58,7 @@ module Sequel
|
|
59
58
|
scroll_id = scroll_id.scroll_id if scroll_id.is_a? Result
|
60
59
|
return nil unless scroll_id
|
61
60
|
|
62
|
-
Result.new es_client.scroll(
|
61
|
+
Result.new es_client.scroll(body: scroll_id, scroll: duration), self
|
63
62
|
end
|
64
63
|
|
65
64
|
# Execute a search or a scroll on the Model's Elasticsearch index.
|
@@ -88,27 +87,30 @@ module Sequel
|
|
88
87
|
# Use the +reindex!+ method to create a completely new index and alias.
|
89
88
|
def import!(index: nil, dataset: nil, batch_size: 100)
|
90
89
|
dataset ||= self.dataset
|
91
|
-
index_name = index || last_index
|
90
|
+
index_name = index || last_index || elasticsearch_index
|
92
91
|
|
93
92
|
# Index all the documents
|
94
93
|
body = []
|
95
|
-
dataset.
|
96
|
-
body
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
data: { doc: row.indexed_values, doc_as_upsert: true }
|
102
|
-
}
|
103
|
-
}
|
104
|
-
print '.'
|
105
|
-
next unless body.count >= batch_size
|
94
|
+
dataset.each_page(batch_size) do |ds|
|
95
|
+
body = []
|
96
|
+
ds.all.each do |row|
|
97
|
+
print '.'
|
98
|
+
body << { update: import_object(index_name, row) }
|
99
|
+
end
|
106
100
|
puts '/'
|
107
|
-
|
108
101
|
es_client.bulk body: body
|
109
|
-
body =
|
102
|
+
body = nil
|
110
103
|
end
|
111
|
-
|
104
|
+
end
|
105
|
+
|
106
|
+
def import_object(idx, row)
|
107
|
+
val = {
|
108
|
+
_index: idx,
|
109
|
+
_id: row.document_id,
|
110
|
+
data: { doc: row.as_indexed_json, doc_as_upsert: true }
|
111
|
+
}
|
112
|
+
val[:_type] = elasticsearch_type if elasticsearch_type
|
113
|
+
val
|
112
114
|
end
|
113
115
|
|
114
116
|
# Creates a new index in Elasticsearch from the specified dataset, as
|
@@ -123,6 +125,7 @@ module Sequel
|
|
123
125
|
alias_index(index_name)
|
124
126
|
end
|
125
127
|
|
128
|
+
# Remove previous aliases and point the `elasticsearch_index` to the new index.
|
126
129
|
def alias_index(new_index)
|
127
130
|
es_client.indices.update_aliases body: {
|
128
131
|
actions: [
|
@@ -135,24 +138,30 @@ module Sequel
|
|
135
138
|
# Find the last created index that matches the specified index name.
|
136
139
|
def last_index
|
137
140
|
es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
|
141
|
+
rescue ::Elasticsearch::Transport::Transport::Errors::NotFound
|
142
|
+
nil
|
138
143
|
end
|
139
144
|
|
140
|
-
# Generate a timestamped index name
|
141
|
-
# This will use the
|
142
|
-
# index names like this:
|
143
|
-
#
|
144
|
-
# base-name-staging-20191004.123456 # This is a staging index
|
145
|
-
# base-name-20191005.171213 # This is a production index
|
145
|
+
# Generate a timestamped index name.
|
146
|
+
# This will use the current timestamp to construct index names like this:
|
146
147
|
#
|
148
|
+
# base-name-20191004.123456
|
147
149
|
def timestamped_index
|
148
|
-
time_str = Time.now.strftime('%Y%m%d.%H%M%S')
|
149
|
-
|
150
|
-
[elasticsearch_index, env_str, time_str].compact.join('-')
|
150
|
+
time_str = Time.now.strftime('%Y%m%d.%H%M%S') # TODO: Make the format configurable
|
151
|
+
"#{elasticsearch_index}-#{time_str}".to_sym
|
151
152
|
end
|
152
153
|
end
|
153
154
|
|
154
155
|
# The instance methods that will be added to the Sequel::Model
|
155
156
|
module InstanceMethods
|
157
|
+
def elasticsearch_index
|
158
|
+
self.class.elasticsearch_index
|
159
|
+
end
|
160
|
+
|
161
|
+
def elasticsearch_type
|
162
|
+
self.class.elasticsearch_type
|
163
|
+
end
|
164
|
+
|
156
165
|
# Sequel::Model after_create hook to add the new record to the Elasticsearch index.
|
157
166
|
# It's "safe" in that it won't raise an error if it fails.
|
158
167
|
def after_create
|
@@ -179,6 +188,8 @@ module Sequel
|
|
179
188
|
self.class.es_client
|
180
189
|
end
|
181
190
|
|
191
|
+
# Mirror the Elasticsearch Rails plugin. Use this to override what data
|
192
|
+
# is sent to Elasticsearch
|
182
193
|
def as_indexed_json
|
183
194
|
indexed_values
|
184
195
|
end
|
@@ -192,7 +203,7 @@ module Sequel
|
|
192
203
|
# Create or update the document on the Elasticsearch cluster.
|
193
204
|
def index_document(opts = {})
|
194
205
|
params = document_path(opts)
|
195
|
-
params[:body] =
|
206
|
+
params[:body] = as_indexed_json
|
196
207
|
es_client.index params
|
197
208
|
end
|
198
209
|
|
@@ -210,8 +221,8 @@ module Sequel
|
|
210
221
|
# Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
|
211
222
|
def document_path(opts = {})
|
212
223
|
{
|
213
|
-
index: opts.delete(:index) ||
|
214
|
-
type: opts.delete(:type) ||
|
224
|
+
index: opts.delete(:index) || elasticsearch_index,
|
225
|
+
type: opts.delete(:type) || elasticsearch_type,
|
215
226
|
id: opts.delete(:id) || document_id
|
216
227
|
}
|
217
228
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Sequel
|
2
4
|
module Plugins
|
3
5
|
module Elasticsearch
|
@@ -42,6 +44,11 @@ module Sequel
|
|
42
44
|
result['hits']['hits'].each { |h| yield h }
|
43
45
|
end
|
44
46
|
|
47
|
+
# Send back the complete result set
|
48
|
+
def all
|
49
|
+
result['hits']['hits']
|
50
|
+
end
|
51
|
+
|
45
52
|
# Send all undefined methods to the +result['hits']['hits']+ array.
|
46
53
|
def method_missing(meth, *args, &block)
|
47
54
|
respond_to_missing?(meth) ? result['hits']['hits'].send(meth, *args, &block) : super
|
@@ -1,5 +1,6 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
|
-
lib = File.expand_path('
|
3
|
+
lib = File.expand_path('lib', __dir__)
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
5
|
require 'sequel/plugins/elasticsearch/version'
|
5
6
|
|
@@ -25,9 +26,12 @@ Gem::Specification.new do |spec|
|
|
25
26
|
spec.add_dependency 'sequel', '>= 4.0'
|
26
27
|
|
27
28
|
spec.add_development_dependency 'bundler', '>= 1.13'
|
28
|
-
spec.add_development_dependency 'rake', '~>
|
29
|
+
spec.add_development_dependency 'rake', '~> 12.3.3'
|
29
30
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
30
|
-
spec.add_development_dependency 'rubocop', '
|
31
|
-
spec.add_development_dependency '
|
31
|
+
spec.add_development_dependency 'rubocop', '>= 0.52'
|
32
|
+
spec.add_development_dependency 'rubocop-rspec', '>= 1.37'
|
33
|
+
spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
|
34
|
+
spec.add_development_dependency 'sqlite3', '~> 1.4'
|
35
|
+
spec.add_development_dependency 'timecop', '>= 0.9'
|
32
36
|
spec.add_development_dependency 'webmock', '~> 3.2'
|
33
37
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurgens du Toit
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-12-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
61
|
+
version: 12.3.3
|
62
62
|
type: :development
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
68
|
+
version: 12.3.3
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rspec
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -84,16 +84,30 @@ dependencies:
|
|
84
84
|
name: rubocop
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
|
-
- - "
|
87
|
+
- - ">="
|
88
88
|
- !ruby/object:Gem::Version
|
89
89
|
version: '0.52'
|
90
90
|
type: :development
|
91
91
|
prerelease: false
|
92
92
|
version_requirements: !ruby/object:Gem::Requirement
|
93
93
|
requirements:
|
94
|
-
- - "
|
94
|
+
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0.52'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: rubocop-rspec
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '1.37'
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '1.37'
|
97
111
|
- !ruby/object:Gem::Dependency
|
98
112
|
name: simplecov
|
99
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -101,6 +115,9 @@ dependencies:
|
|
101
115
|
- - "~>"
|
102
116
|
- !ruby/object:Gem::Version
|
103
117
|
version: '0.15'
|
118
|
+
- - "<"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0.18'
|
104
121
|
type: :development
|
105
122
|
prerelease: false
|
106
123
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -108,6 +125,37 @@ dependencies:
|
|
108
125
|
- - "~>"
|
109
126
|
- !ruby/object:Gem::Version
|
110
127
|
version: '0.15'
|
128
|
+
- - "<"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0.18'
|
131
|
+
- !ruby/object:Gem::Dependency
|
132
|
+
name: sqlite3
|
133
|
+
requirement: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - "~>"
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '1.4'
|
138
|
+
type: :development
|
139
|
+
prerelease: false
|
140
|
+
version_requirements: !ruby/object:Gem::Requirement
|
141
|
+
requirements:
|
142
|
+
- - "~>"
|
143
|
+
- !ruby/object:Gem::Version
|
144
|
+
version: '1.4'
|
145
|
+
- !ruby/object:Gem::Dependency
|
146
|
+
name: timecop
|
147
|
+
requirement: !ruby/object:Gem::Requirement
|
148
|
+
requirements:
|
149
|
+
- - ">="
|
150
|
+
- !ruby/object:Gem::Version
|
151
|
+
version: '0.9'
|
152
|
+
type: :development
|
153
|
+
prerelease: false
|
154
|
+
version_requirements: !ruby/object:Gem::Requirement
|
155
|
+
requirements:
|
156
|
+
- - ">="
|
157
|
+
- !ruby/object:Gem::Version
|
158
|
+
version: '0.9'
|
111
159
|
- !ruby/object:Gem::Dependency
|
112
160
|
name: webmock
|
113
161
|
requirement: !ruby/object:Gem::Requirement
|
@@ -163,8 +211,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
211
|
- !ruby/object:Gem::Version
|
164
212
|
version: '0'
|
165
213
|
requirements: []
|
166
|
-
|
167
|
-
rubygems_version: 2.7.7
|
214
|
+
rubygems_version: 3.0.8
|
168
215
|
signing_key:
|
169
216
|
specification_version: 4
|
170
217
|
summary: A plugin for the Sequel gem to sync data to Elasticsearch.
|