RubyGems - sequel-elasticsearch - Versions diffs - 0.4.6 → 0.4.7 - Mend

sequel-elasticsearch 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/sequel/plugins/elasticsearch.rb +84 -16
data/lib/sequel/plugins/elasticsearch/version.rb +1 -1
metadata +4 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 40d0ec9b57158ba7cde955a7e89de398fae1cc40d766444e2955d21c627260cd
-  data.tar.gz: 8334cdfef385e392dc2d6dd4484c150e019806c4f96492dd4402cbf3ef15c086
+  metadata.gz: 8f6e85a7a9a92736d74000717cf12de5fb7b42f20e839a0d57611e79ca014f52
+  data.tar.gz: 1e049bd725b6a164747e1982d119a9adc214788dc5a1624fafcdfcd612e7d767
 SHA512:
-  metadata.gz: 21bdfe86e4e4b805d3883a8ae6b40fd4764f0042cfbc70e7e02e5da827c6c0d9953bcb778ef27ca0af6ec5b7ec2fcf6da71f0aed3be7487ac86a3261178a9e4b
-  data.tar.gz: 2d29bcb6942344846e3ffb7cea2fb1fd524c58ecbca4ed36069d50f0dfe6f79ba67967ce86a6232824a0c1c6de4c781dcdc4e2e804304c13e64fe1bc4fb9820a
+  metadata.gz: a72f7253761846209b0b86f704cb44e57be9d0d2561ab78b92bdd6912799ecc8e424c4779c252bf67da61aa62003c1f5bc9dcf0043ca1e3fee618ab3de9b89b3
+  data.tar.gz: 14de18c7db260ad3615a732de6338f7402aaedd34721e3826ba4cb3f7faa052e615080fcafef2e5155c2977cfd44a3bfaac5628268a07b9784446e624c40185a

data/lib/sequel/plugins/elasticsearch.rb CHANGED Viewed

@@ -78,8 +78,76 @@ module Sequel
           nil
         end
-        # Import the whole dataset into Elasticsearch
-        def import!
+        # Import the whole dataset into Elasticsearch.
+        #
+        # This assumes that a template that covers all the possible index names
+        # have been created. See +timestamped_index+ for examples of the indices
+        # that will be created.
+        #
+        # This adds or updates records to the last index created by this utility.
+        # Use the +reindex!+ method to create a completely new index and alias.
+        def import!(index: nil, dataset: nil, batch_size: 100)
+          dataset ||= self.dataset
+          index_name = index || last_index
+          # Index all the documents
+          body = []
+          dataset.all.each do |row|
+            body << {
+              update: {
+                _index: index_name,
+                _type: elasticsearch_type,
+                _id: row.document_id,
+                data: { doc: row.indexed_values, doc_as_upsert: true }
+              }
+            }
+            print '.'
+            next unless body.count >= batch_size
+            puts '/'
+            es_client.bulk body: body
+            body = []
+          end
+          es_client.bulk body: body if body.count.positive?
+        end
+        # Creates a new index in Elasticsearch from the specified dataset, as
+        # well as an alias to the new index.
+        #
+        # See the documentation on +import!+ for more details.
+        def reindex!(index: nil, dataset: nil, batch_size: 100)
+          index_name = index || timestamped_index
+          import!(index: index_name, dataset: dataset, batch_size: batch_size)
+          # Create an alias to the newly created index
+          alias_index(index_name)
+        end
+        def alias_index(new_index)
+          es_client.indices.update_aliases body: {
+            actions: [
+              { remove: { index: "#{elasticsearch_index}*", alias: elasticsearch_index } },
+              { add: { index: new_index, alias: elasticsearch_index } }
+            ]
+          }
+        end
+        # Find the last created index that matches the specified index name.
+        def last_index
+          es_client.indices.get_alias(name: elasticsearch_index)&.keys&.sort&.first
+        end
+        # Generate a timestamped index name according to the environment.
+        # This will use the +APP_ENV+ ENV variable and a timestamp to construct
+        # index names like this:
+        #
+        #    base-name-staging-20191004.123456 # This is a staging index
+        #    base-name-20191005.171213 # This is a production index
+        #
+        def timestamped_index
+          time_str = Time.now.strftime('%Y%m%d.%H%M%S')
+          env_str = ENV['APP_ENV'] == 'production' ? nil : ENV['APP_ENV']
+          [elasticsearch_index, env_str, time_str].compact.join('-')
         end
       end
@@ -117,39 +185,37 @@ module Sequel
         # Internal reference for index_document. Override this for alternate
         # implementations of indexing the document.
-        def _index_document
-          index_document
+        def _index_document(opts = {})
+          index_document(opts)
         end
         # Create or update the document on the Elasticsearch cluster.
-        def index_document
-          params = document_path
+        def index_document(opts = {})
+          params = document_path(opts)
           params[:body] = indexed_values
           es_client.index params
         end
         # Internal reference for destroy_document. Override this for alternate
         # implementations of removing the document.
-        def _destroy_document
-          destroy_document
+        def _destroy_document(opts = {})
+          destroy_document(opts)
         end
         # Remove the document from the Elasticsearch cluster.
-        def destroy_document
-          es_client.delete document_path
+        def destroy_document(opts = {})
+          es_client.delete document_path(opts)
         end
         # Determine the complete path to a document (/index/type/id) in the Elasticsearch cluster.
-        def document_path
+        def document_path(opts = {})
           {
-            index: self.class.elasticsearch_index,
-            type: self.class.elasticsearch_type,
-            id: document_id
+            index: opts.delete(:index) || self.class.elasticsearch_index,
+            type: opts.delete(:type) || self.class.elasticsearch_type,
+            id: opts.delete(:id) || document_id
           }
         end
-        private
         # Determine the ID to be used for the document in the Elasticsearch cluster.
         # It will join the values of a multi field primary key with an underscore.
         def document_id
@@ -158,6 +224,8 @@ module Sequel
           doc_id
         end
+        private
         # Values to be indexed
         def indexed_values
           # TODO: Deprecate this method in favour of as_indexed_json

data/lib/sequel/plugins/elasticsearch/version.rb CHANGED Viewed

@@ -4,6 +4,6 @@ module Sequel
   # See https://jrgns.github.io/sequel-elasticsearch
   module Elasticsearch
     # The Gem's version.
-    VERSION = '0.4.6'.freeze
+    VERSION = '0.4.7'.freeze
   end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: sequel-elasticsearch
 version: !ruby/object:Gem::Version
-  version: 0.4.6
+  version: 0.4.7
 platform: ruby
 authors:
 - Jurgens du Toit
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-10-07 00:00:00.000000000 Z
+date: 2019-10-30 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: elasticsearch
@@ -163,7 +163,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.6
+rubyforge_project:
+rubygems_version: 2.7.7
 signing_key:
 specification_version: 4
 summary: A plugin for the Sequel gem to sync data to Elasticsearch.