sequel-elasticsearch 0.4.6 → 0.4.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/sequel/plugins/elasticsearch.rb +84 -16
- data/lib/sequel/plugins/elasticsearch/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f6e85a7a9a92736d74000717cf12de5fb7b42f20e839a0d57611e79ca014f52
|
4
|
+
data.tar.gz: 1e049bd725b6a164747e1982d119a9adc214788dc5a1624fafcdfcd612e7d767
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a72f7253761846209b0b86f704cb44e57be9d0d2561ab78b92bdd6912799ecc8e424c4779c252bf67da61aa62003c1f5bc9dcf0043ca1e3fee618ab3de9b89b3
|
7
|
+
data.tar.gz: 14de18c7db260ad3615a732de6338f7402aaedd34721e3826ba4cb3f7faa052e615080fcafef2e5155c2977cfd44a3bfaac5628268a07b9784446e624c40185a
|
@@ -78,8 +78,76 @@ module Sequel
|
|
78
78
|
nil
|
79
79
|
end
|
80
80
|
|
81
|
-
# Import the whole dataset into Elasticsearch
|
82
|
-
|
81
|
+
# Import the whole dataset into Elasticsearch.
|
82
|
+
#
|
83
|
+
# This assumes that a template that covers all the possible index names
|
84
|
+
# have been created. See +timestamped_index+ for examples of the indices
|
85
|
+
# that will be created.
|
86
|
+
#
|
87
|
+
# This adds or updates records to the last index created by this utility.
|
88
|
+
# Use the +reindex!+ method to create a completely new index and alias.
|
89
|
+
def import!(index: nil, dataset: nil, batch_size: 100)
|
90
|
+
dataset ||= self.dataset
|
91
|
+
index_name = index || last_index
|
92
|
+
|
93
|
+
# Index all the documents
|
94
|
+
body = []
|
95
|
+
dataset.all.each do |row|
|
96
|
+
body << {
|
97
|
+
update: {
|
98
|
+
_index: index_name,
|
99
|
+
_type: elasticsearch_type,
|
100
|
+
_id: row.document_id,
|
101
|
+
data: { doc: row.indexed_values, doc_as_upsert: true }
|
102
|
+
}
|
103
|
+
}
|
104
|
+
print '.'
|
105
|
+
next unless body.count >= batch_size
|
106
|
+
puts '/'
|
107
|
+
|
108
|
+
es_client.bulk body: body
|
109
|
+
body = []
|
110
|
+
end
|
111
|
+
es_client.bulk body: body if body.count.positive?
|
112
|
+
end
|
113
|
+
|
114
|
+
# Creates a new index in Elasticsearch from the specified dataset, as
|
115
|
+
# well as an alias to the new index.
|
116
|
+
#
|
117
|
+
# See the documentation on +import!+ for more details.
|
118
|
+
def reindex!(index: nil, dataset: nil, batch_size: 100)
|
119
|
+
index_name = index || timestamped_index
|
120
|
+
import!(index: index_name, dataset: dataset, batch_size: batch_size)
|
121
|
+
|
122
|
+
# Create an alias to the newly created index
|
123
|
+
alias_index(index_name)
|
124
|
+
end
|
125
|
+
|
126
|
+
def alias_index(new_index)
|
127
|
+
es_client.indices.update_aliases body: {
|
128
|
+
actions: [
|
129
|
+
{ remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } },
|
130
|
+
{ add: { index: new_index, alias: elasticsearch_index } }
|
131
|
+
]
|
132
|
+
}
|
133
|
+
end
|
134
|
+
|
135
|
+
# Find the last created index that matches the specified index name.
|
136
|
+
def last_index
|
137
|
+
es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
|
138
|
+
end
|
139
|
+
|
140
|
+
# Generate a timestamped index name according to the environment.
|
141
|
+
# This will use the +APP_ENV+ ENV variable and a timestamp to construct
|
142
|
+
# index names like this:
|
143
|
+
#
|
144
|
+
# base-name-staging-20191004.123456 # This is a staging index
|
145
|
+
# base-name-20191005.171213 # This is a production index
|
146
|
+
#
|
147
|
+
def timestamped_index
|
148
|
+
time_str = Time.now.strftime('%Y%m%d.%H%M%S')
|
149
|
+
env_str = ENV['APP_ENV'] == 'production' ? nil : ENV['APP_ENV']
|
150
|
+
[elasticsearch_index, env_str, time_str].compact.join('-')
|
83
151
|
end
|
84
152
|
end
|
85
153
|
|
@@ -117,39 +185,37 @@ module Sequel
|
|
117
185
|
|
118
186
|
# Internal reference for index_document. Override this for alternate
|
119
187
|
# implementations of indexing the document.
|
120
|
-
def _index_document
|
121
|
-
index_document
|
188
|
+
def _index_document(opts = {})
|
189
|
+
index_document(opts)
|
122
190
|
end
|
123
191
|
|
124
192
|
# Create or update the document on the Elasticsearch cluster.
|
125
|
-
def index_document
|
126
|
-
params = document_path
|
193
|
+
def index_document(opts = {})
|
194
|
+
params = document_path(opts)
|
127
195
|
params[:body] = indexed_values
|
128
196
|
es_client.index params
|
129
197
|
end
|
130
198
|
|
131
199
|
# Internal reference for destroy_document. Override this for alternate
|
132
200
|
# implementations of removing the document.
|
133
|
-
def _destroy_document
|
134
|
-
destroy_document
|
201
|
+
def _destroy_document(opts = {})
|
202
|
+
destroy_document(opts)
|
135
203
|
end
|
136
204
|
|
137
205
|
# Remove the document from the Elasticsearch cluster.
|
138
|
-
def destroy_document
|
139
|
-
es_client.delete document_path
|
206
|
+
def destroy_document(opts = {})
|
207
|
+
es_client.delete document_path(opts)
|
140
208
|
end
|
141
209
|
|
142
210
|
# Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
|
143
|
-
def document_path
|
211
|
+
def document_path(opts = {})
|
144
212
|
{
|
145
|
-
index: self.class.elasticsearch_index,
|
146
|
-
type: self.class.elasticsearch_type,
|
147
|
-
id: document_id
|
213
|
+
index: opts.delete(:index) || self.class.elasticsearch_index,
|
214
|
+
type: opts.delete(:type) || self.class.elasticsearch_type,
|
215
|
+
id: opts.delete(:id) || document_id
|
148
216
|
}
|
149
217
|
end
|
150
218
|
|
151
|
-
private
|
152
|
-
|
153
219
|
# Determine the ID to be used for the document in the Elasticsearch cluster.
|
154
220
|
# It will join the values of a multi field primary key with an underscore.
|
155
221
|
def document_id
|
@@ -158,6 +224,8 @@ module Sequel
|
|
158
224
|
doc_id
|
159
225
|
end
|
160
226
|
|
227
|
+
private
|
228
|
+
|
161
229
|
# Values to be indexed
|
162
230
|
def indexed_values
|
163
231
|
# TODO: Deprecate this method in favour of as_indexed_json
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurgens du Toit
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch
|
@@ -163,7 +163,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
163
|
- !ruby/object:Gem::Version
|
164
164
|
version: '0'
|
165
165
|
requirements: []
|
166
|
-
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 2.7.7
|
167
168
|
signing_key:
|
168
169
|
specification_version: 4
|
169
170
|
summary: A plugin for the Sequel gem to sync data to Elasticsearch.
|