sequel-elasticsearch 0.4.6 → 0.4.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sequel/plugins/elasticsearch.rb +84 -16
- data/lib/sequel/plugins/elasticsearch/version.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8f6e85a7a9a92736d74000717cf12de5fb7b42f20e839a0d57611e79ca014f52
|
4
|
+
data.tar.gz: 1e049bd725b6a164747e1982d119a9adc214788dc5a1624fafcdfcd612e7d767
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a72f7253761846209b0b86f704cb44e57be9d0d2561ab78b92bdd6912799ecc8e424c4779c252bf67da61aa62003c1f5bc9dcf0043ca1e3fee618ab3de9b89b3
|
7
|
+
data.tar.gz: 14de18c7db260ad3615a732de6338f7402aaedd34721e3826ba4cb3f7faa052e615080fcafef2e5155c2977cfd44a3bfaac5628268a07b9784446e624c40185a
|
@@ -78,8 +78,76 @@ module Sequel
|
|
78
78
|
nil
|
79
79
|
end
|
80
80
|
|
81
|
-
# Import the whole dataset into Elasticsearch
|
82
|
-
|
81
|
+
# Import the whole dataset into Elasticsearch.
|
82
|
+
#
|
83
|
+
# This assumes that a template that covers all the possible index names
|
84
|
+
# have been created. See +timestamped_index+ for examples of the indices
|
85
|
+
# that will be created.
|
86
|
+
#
|
87
|
+
# This adds or updates records to the last index created by this utility.
|
88
|
+
# Use the +reindex!+ method to create a completely new index and alias.
|
89
|
+
def import!(index: nil, dataset: nil, batch_size: 100)
|
90
|
+
dataset ||= self.dataset
|
91
|
+
index_name = index || last_index
|
92
|
+
|
93
|
+
# Index all the documents
|
94
|
+
body = []
|
95
|
+
dataset.all.each do |row|
|
96
|
+
body << {
|
97
|
+
update: {
|
98
|
+
_index: index_name,
|
99
|
+
_type: elasticsearch_type,
|
100
|
+
_id: row.document_id,
|
101
|
+
data: { doc: row.indexed_values, doc_as_upsert: true }
|
102
|
+
}
|
103
|
+
}
|
104
|
+
print '.'
|
105
|
+
next unless body.count >= batch_size
|
106
|
+
puts '/'
|
107
|
+
|
108
|
+
es_client.bulk body: body
|
109
|
+
body = []
|
110
|
+
end
|
111
|
+
es_client.bulk body: body if body.count.positive?
|
112
|
+
end
|
113
|
+
|
114
|
+
# Creates a new index in Elasticsearch from the specified dataset, as
|
115
|
+
# well as an alias to the new index.
|
116
|
+
#
|
117
|
+
# See the documentation on +import!+ for more details.
|
118
|
+
def reindex!(index: nil, dataset: nil, batch_size: 100)
|
119
|
+
index_name = index || timestamped_index
|
120
|
+
import!(index: index_name, dataset: dataset, batch_size: batch_size)
|
121
|
+
|
122
|
+
# Create an alias to the newly created index
|
123
|
+
alias_index(index_name)
|
124
|
+
end
|
125
|
+
|
126
|
+
def alias_index(new_index)
|
127
|
+
es_client.indices.update_aliases body: {
|
128
|
+
actions: [
|
129
|
+
{ remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } },
|
130
|
+
{ add: { index: new_index, alias: elasticsearch_index } }
|
131
|
+
]
|
132
|
+
}
|
133
|
+
end
|
134
|
+
|
135
|
+
# Find the last created index that matches the specified index name.
|
136
|
+
def last_index
|
137
|
+
es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
|
138
|
+
end
|
139
|
+
|
140
|
+
# Generate a timestamped index name according to the environment.
|
141
|
+
# This will use the +APP_ENV+ ENV variable and a timestamp to construct
|
142
|
+
# index names like this:
|
143
|
+
#
|
144
|
+
# base-name-staging-20191004.123456 # This is a staging index
|
145
|
+
# base-name-20191005.171213 # This is a production index
|
146
|
+
#
|
147
|
+
def timestamped_index
|
148
|
+
time_str = Time.now.strftime('%Y%m%d.%H%M%S')
|
149
|
+
env_str = ENV['APP_ENV'] == 'production' ? nil : ENV['APP_ENV']
|
150
|
+
[elasticsearch_index, env_str, time_str].compact.join('-')
|
83
151
|
end
|
84
152
|
end
|
85
153
|
|
@@ -117,39 +185,37 @@ module Sequel
|
|
117
185
|
|
118
186
|
# Internal reference for index_document. Override this for alternate
|
119
187
|
# implementations of indexing the document.
|
120
|
-
def _index_document
|
121
|
-
index_document
|
188
|
+
def _index_document(opts = {})
|
189
|
+
index_document(opts)
|
122
190
|
end
|
123
191
|
|
124
192
|
# Create or update the document on the Elasticsearch cluster.
|
125
|
-
def index_document
|
126
|
-
params = document_path
|
193
|
+
def index_document(opts = {})
|
194
|
+
params = document_path(opts)
|
127
195
|
params[:body] = indexed_values
|
128
196
|
es_client.index params
|
129
197
|
end
|
130
198
|
|
131
199
|
# Internal reference for destroy_document. Override this for alternate
|
132
200
|
# implementations of removing the document.
|
133
|
-
def _destroy_document
|
134
|
-
destroy_document
|
201
|
+
def _destroy_document(opts = {})
|
202
|
+
destroy_document(opts)
|
135
203
|
end
|
136
204
|
|
137
205
|
# Remove the document from the Elasticsearch cluster.
|
138
|
-
def destroy_document
|
139
|
-
es_client.delete document_path
|
206
|
+
def destroy_document(opts = {})
|
207
|
+
es_client.delete document_path(opts)
|
140
208
|
end
|
141
209
|
|
142
210
|
# Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
|
143
|
-
def document_path
|
211
|
+
def document_path(opts = {})
|
144
212
|
{
|
145
|
-
index: self.class.elasticsearch_index,
|
146
|
-
type: self.class.elasticsearch_type,
|
147
|
-
id: document_id
|
213
|
+
index: opts.delete(:index) || self.class.elasticsearch_index,
|
214
|
+
type: opts.delete(:type) || self.class.elasticsearch_type,
|
215
|
+
id: opts.delete(:id) || document_id
|
148
216
|
}
|
149
217
|
end
|
150
218
|
|
151
|
-
private
|
152
|
-
|
153
219
|
# Determine the ID to be used for the document in the Elasticsearch cluster.
|
154
220
|
# It will join the values of a multi field primary key with an underscore.
|
155
221
|
def document_id
|
@@ -158,6 +224,8 @@ module Sequel
|
|
158
224
|
doc_id
|
159
225
|
end
|
160
226
|
|
227
|
+
private
|
228
|
+
|
161
229
|
# Values to be indexed
|
162
230
|
def indexed_values
|
163
231
|
# TODO: Deprecate this method in favour of as_indexed_json
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sequel-elasticsearch
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.4.
|
4
|
+
version: 0.4.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jurgens du Toit
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-10-
|
11
|
+
date: 2019-10-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: elasticsearch
|
@@ -163,7 +163,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
163
163
|
- !ruby/object:Gem::Version
|
164
164
|
version: '0'
|
165
165
|
requirements: []
|
166
|
-
|
166
|
+
rubyforge_project:
|
167
|
+
rubygems_version: 2.7.7
|
167
168
|
signing_key:
|
168
169
|
specification_version: 4
|
169
170
|
summary: A plugin for the Sequel gem to sync data to Elasticsearch.
|