sequel-elasticsearch 0.4.6 → 0.4.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40d0ec9b57158ba7cde955a7e89de398fae1cc40d766444e2955d21c627260cd
4
- data.tar.gz: 8334cdfef385e392dc2d6dd4484c150e019806c4f96492dd4402cbf3ef15c086
3
+ metadata.gz: 8f6e85a7a9a92736d74000717cf12de5fb7b42f20e839a0d57611e79ca014f52
4
+ data.tar.gz: 1e049bd725b6a164747e1982d119a9adc214788dc5a1624fafcdfcd612e7d767
5
5
  SHA512:
6
- metadata.gz: 21bdfe86e4e4b805d3883a8ae6b40fd4764f0042cfbc70e7e02e5da827c6c0d9953bcb778ef27ca0af6ec5b7ec2fcf6da71f0aed3be7487ac86a3261178a9e4b
7
- data.tar.gz: 2d29bcb6942344846e3ffb7cea2fb1fd524c58ecbca4ed36069d50f0dfe6f79ba67967ce86a6232824a0c1c6de4c781dcdc4e2e804304c13e64fe1bc4fb9820a
6
+ metadata.gz: a72f7253761846209b0b86f704cb44e57be9d0d2561ab78b92bdd6912799ecc8e424c4779c252bf67da61aa62003c1f5bc9dcf0043ca1e3fee618ab3de9b89b3
7
+ data.tar.gz: 14de18c7db260ad3615a732de6338f7402aaedd34721e3826ba4cb3f7faa052e615080fcafef2e5155c2977cfd44a3bfaac5628268a07b9784446e624c40185a
@@ -78,8 +78,76 @@ module Sequel
78
78
  nil
79
79
  end
80
80
 
81
- # Import the whole dataset into Elasticsearch
82
- def import!
81
+ # Import the whole dataset into Elasticsearch.
82
+ #
83
+ # This assumes that a template that covers all the possible index names
84
+ # have been created. See +timestamped_index+ for examples of the indices
85
+ # that will be created.
86
+ #
87
+ # This adds or updates records to the last index created by this utility.
88
+ # Use the +reindex!+ method to create a completely new index and alias.
89
+ def import!(index: nil, dataset: nil, batch_size: 100)
90
+ dataset ||= self.dataset
91
+ index_name = index || last_index
92
+
93
+ # Index all the documents
94
+ body = []
95
+ dataset.all.each do |row|
96
+ body << {
97
+ update: {
98
+ _index: index_name,
99
+ _type: elasticsearch_type,
100
+ _id: row.document_id,
101
+ data: { doc: row.indexed_values, doc_as_upsert: true }
102
+ }
103
+ }
104
+ print '.'
105
+ next unless body.count >= batch_size
106
+ puts '/'
107
+
108
+ es_client.bulk body: body
109
+ body = []
110
+ end
111
+ es_client.bulk body: body if body.count.positive?
112
+ end
113
+
114
+ # Creates a new index in Elasticsearch from the specified dataset, as
115
+ # well as an alias to the new index.
116
+ #
117
+ # See the documentation on +import!+ for more details.
118
+ def reindex!(index: nil, dataset: nil, batch_size: 100)
119
+ index_name = index || timestamped_index
120
+ import!(index: index_name, dataset: dataset, batch_size: batch_size)
121
+
122
+ # Create an alias to the newly created index
123
+ alias_index(index_name)
124
+ end
125
+
126
+ def alias_index(new_index)
127
+ es_client.indices.update_aliases body: {
128
+ actions: [
129
+ { remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } },
130
+ { add: { index: new_index, alias: elasticsearch_index } }
131
+ ]
132
+ }
133
+ end
134
+
135
+ # Find the last created index that matches the specified index name.
136
+ def last_index
137
+ es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
138
+ end
139
+
140
+ # Generate a timestamped index name according to the environment.
141
+ # This will use the +APP_ENV+ ENV variable and a timestamp to construct
142
+ # index names like this:
143
+ #
144
+ # base-name-staging-20191004.123456 # This is a staging index
145
+ # base-name-20191005.171213 # This is a production index
146
+ #
147
+ def timestamped_index
148
+ time_str = Time.now.strftime('%Y%m%d.%H%M%S')
149
+ env_str = ENV['APP_ENV'] == 'production' ? nil : ENV['APP_ENV']
150
+ [elasticsearch_index, env_str, time_str].compact.join('-')
83
151
  end
84
152
  end
85
153
 
@@ -117,39 +185,37 @@ module Sequel
117
185
 
118
186
  # Internal reference for index_document. Override this for alternate
119
187
  # implementations of indexing the document.
120
- def _index_document
121
- index_document
188
+ def _index_document(opts = {})
189
+ index_document(opts)
122
190
  end
123
191
 
124
192
  # Create or update the document on the Elasticsearch cluster.
125
- def index_document
126
- params = document_path
193
+ def index_document(opts = {})
194
+ params = document_path(opts)
127
195
  params[:body] = indexed_values
128
196
  es_client.index params
129
197
  end
130
198
 
131
199
  # Internal reference for destroy_document. Override this for alternate
132
200
  # implementations of removing the document.
133
- def _destroy_document
134
- destroy_document
201
+ def _destroy_document(opts = {})
202
+ destroy_document(opts)
135
203
  end
136
204
 
137
205
  # Remove the document from the Elasticsearch cluster.
138
- def destroy_document
139
- es_client.delete document_path
206
+ def destroy_document(opts = {})
207
+ es_client.delete document_path(opts)
140
208
  end
141
209
 
142
210
  # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
143
- def document_path
211
+ def document_path(opts = {})
144
212
  {
145
- index: self.class.elasticsearch_index,
146
- type: self.class.elasticsearch_type,
147
- id: document_id
213
+ index: opts.delete(:index) || self.class.elasticsearch_index,
214
+ type: opts.delete(:type) || self.class.elasticsearch_type,
215
+ id: opts.delete(:id) || document_id
148
216
  }
149
217
  end
150
218
 
151
- private
152
-
153
219
  # Determine the ID to be used for the document in the Elasticsearch cluster.
154
220
  # It will join the values of a multi field primary key with an underscore.
155
221
  def document_id
@@ -158,6 +224,8 @@ module Sequel
158
224
  doc_id
159
225
  end
160
226
 
227
+ private
228
+
161
229
  # Values to be indexed
162
230
  def indexed_values
163
231
  # TODO: Deprecate this method in favour of as_indexed_json
@@ -4,6 +4,6 @@ module Sequel
4
4
  # See https://jrgns.github.io/sequel-elasticsearch
5
5
  module Elasticsearch
6
6
  # The Gem's version.
7
- VERSION = '0.4.6'.freeze
7
+ VERSION = '0.4.7'.freeze
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequel-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.4.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurgens du Toit
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-07 00:00:00.000000000 Z
11
+ date: 2019-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -163,7 +163,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
163
  - !ruby/object:Gem::Version
164
164
  version: '0'
165
165
  requirements: []
166
- rubygems_version: 3.0.6
166
+ rubyforge_project:
167
+ rubygems_version: 2.7.7
167
168
  signing_key:
168
169
  specification_version: 4
169
170
  summary: A plugin for the Sequel gem to sync data to Elasticsearch.