sequel-elasticsearch 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 40d0ec9b57158ba7cde955a7e89de398fae1cc40d766444e2955d21c627260cd
4
- data.tar.gz: 8334cdfef385e392dc2d6dd4484c150e019806c4f96492dd4402cbf3ef15c086
3
+ metadata.gz: 8f6e85a7a9a92736d74000717cf12de5fb7b42f20e839a0d57611e79ca014f52
4
+ data.tar.gz: 1e049bd725b6a164747e1982d119a9adc214788dc5a1624fafcdfcd612e7d767
5
5
  SHA512:
6
- metadata.gz: 21bdfe86e4e4b805d3883a8ae6b40fd4764f0042cfbc70e7e02e5da827c6c0d9953bcb778ef27ca0af6ec5b7ec2fcf6da71f0aed3be7487ac86a3261178a9e4b
7
- data.tar.gz: 2d29bcb6942344846e3ffb7cea2fb1fd524c58ecbca4ed36069d50f0dfe6f79ba67967ce86a6232824a0c1c6de4c781dcdc4e2e804304c13e64fe1bc4fb9820a
6
+ metadata.gz: a72f7253761846209b0b86f704cb44e57be9d0d2561ab78b92bdd6912799ecc8e424c4779c252bf67da61aa62003c1f5bc9dcf0043ca1e3fee618ab3de9b89b3
7
+ data.tar.gz: 14de18c7db260ad3615a732de6338f7402aaedd34721e3826ba4cb3f7faa052e615080fcafef2e5155c2977cfd44a3bfaac5628268a07b9784446e624c40185a
@@ -78,8 +78,76 @@ module Sequel
78
78
  nil
79
79
  end
80
80
 
81
- # Import the whole dataset into Elasticsearch
82
- def import!
81
+ # Import the whole dataset into Elasticsearch.
82
+ #
83
+ # This assumes that a template that covers all the possible index names
84
+ # have been created. See +timestamped_index+ for examples of the indices
85
+ # that will be created.
86
+ #
87
+ # This adds or updates records to the last index created by this utility.
88
+ # Use the +reindex!+ method to create a completely new index and alias.
89
+ def import!(index: nil, dataset: nil, batch_size: 100)
90
+ dataset ||= self.dataset
91
+ index_name = index || last_index
92
+
93
+ # Index all the documents
94
+ body = []
95
+ dataset.all.each do |row|
96
+ body << {
97
+ update: {
98
+ _index: index_name,
99
+ _type: elasticsearch_type,
100
+ _id: row.document_id,
101
+ data: { doc: row.indexed_values, doc_as_upsert: true }
102
+ }
103
+ }
104
+ print '.'
105
+ next unless body.count >= batch_size
106
+ puts '/'
107
+
108
+ es_client.bulk body: body
109
+ body = []
110
+ end
111
+ es_client.bulk body: body if body.count.positive?
112
+ end
113
+
114
+ # Creates a new index in Elasticsearch from the specified dataset, as
115
+ # well as an alias to the new index.
116
+ #
117
+ # See the documentation on +import!+ for more details.
118
+ def reindex!(index: nil, dataset: nil, batch_size: 100)
119
+ index_name = index || timestamped_index
120
+ import!(index: index_name, dataset: dataset, batch_size: batch_size)
121
+
122
+ # Create an alias to the newly created index
123
+ alias_index(index_name)
124
+ end
125
+
126
+ def alias_index(new_index)
127
+ es_client.indices.update_aliases body: {
128
+ actions: [
129
+ { remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } },
130
+ { add: { index: new_index, alias: elasticsearch_index } }
131
+ ]
132
+ }
133
+ end
134
+
135
+ # Find the last created index that matches the specified index name.
136
+ def last_index
137
+ es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
138
+ end
139
+
140
+ # Generate a timestamped index name according to the environment.
141
+ # This will use the +APP_ENV+ ENV variable and a timestamp to construct
142
+ # index names like this:
143
+ #
144
+ # base-name-staging-20191004.123456 # This is a staging index
145
+ # base-name-20191005.171213 # This is a production index
146
+ #
147
+ def timestamped_index
148
+ time_str = Time.now.strftime('%Y%m%d.%H%M%S')
149
+ env_str = ENV['APP_ENV'] == 'production' ? nil : ENV['APP_ENV']
150
+ [elasticsearch_index, env_str, time_str].compact.join('-')
83
151
  end
84
152
  end
85
153
 
@@ -117,39 +185,37 @@ module Sequel
117
185
 
118
186
  # Internal reference for index_document. Override this for alternate
119
187
  # implementations of indexing the document.
120
- def _index_document
121
- index_document
188
+ def _index_document(opts = {})
189
+ index_document(opts)
122
190
  end
123
191
 
124
192
  # Create or update the document on the Elasticsearch cluster.
125
- def index_document
126
- params = document_path
193
+ def index_document(opts = {})
194
+ params = document_path(opts)
127
195
  params[:body] = indexed_values
128
196
  es_client.index params
129
197
  end
130
198
 
131
199
  # Internal reference for destroy_document. Override this for alternate
132
200
  # implementations of removing the document.
133
- def _destroy_document
134
- destroy_document
201
+ def _destroy_document(opts = {})
202
+ destroy_document(opts)
135
203
  end
136
204
 
137
205
  # Remove the document from the Elasticsearch cluster.
138
- def destroy_document
139
- es_client.delete document_path
206
+ def destroy_document(opts = {})
207
+ es_client.delete document_path(opts)
140
208
  end
141
209
 
142
210
  # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
143
- def document_path
211
+ def document_path(opts = {})
144
212
  {
145
- index: self.class.elasticsearch_index,
146
- type: self.class.elasticsearch_type,
147
- id: document_id
213
+ index: opts.delete(:index) || self.class.elasticsearch_index,
214
+ type: opts.delete(:type) || self.class.elasticsearch_type,
215
+ id: opts.delete(:id) || document_id
148
216
  }
149
217
  end
150
218
 
151
- private
152
-
153
219
  # Determine the ID to be used for the document in the Elasticsearch cluster.
154
220
  # It will join the values of a multi field primary key with an underscore.
155
221
  def document_id
@@ -158,6 +224,8 @@ module Sequel
158
224
  doc_id
159
225
  end
160
226
 
227
+ private
228
+
161
229
  # Values to be indexed
162
230
  def indexed_values
163
231
  # TODO: Deprecate this method in favour of as_indexed_json
@@ -4,6 +4,6 @@ module Sequel
4
4
  # See https://jrgns.github.io/sequel-elasticsearch
5
5
  module Elasticsearch
6
6
  # The Gem's version.
7
- VERSION = '0.4.6'.freeze
7
+ VERSION = '0.4.7'.freeze
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sequel-elasticsearch
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.6
4
+ version: 0.4.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jurgens du Toit
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-07 00:00:00.000000000 Z
11
+ date: 2019-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -163,7 +163,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
163
163
  - !ruby/object:Gem::Version
164
164
  version: '0'
165
165
  requirements: []
166
- rubygems_version: 3.0.6
166
+ rubyforge_project:
167
+ rubygems_version: 2.7.7
167
168
  signing_key:
168
169
  specification_version: 4
169
170
  summary: A plugin for the Sequel gem to sync data to Elasticsearch.