RubyGems - elasticsearch_record - Versions diffs - 1.5.3 → 1.7.0 - Mend

elasticsearch_record 1.5.3 → 1.7.0

Files changed (25) hide show

checksums.yaml +4 -4
data/.yardopts +1 -1
data/README.md +103 -19
data/docs/CHANGELOG.md +34 -2
data/docs/{LICENSE.txt → LICENSE} +1 -1
data/lib/active_record/connection_adapters/elasticsearch/table_statements.rb +62 -4
data/lib/active_record/connection_adapters/elasticsearch/transactions.rb +54 -0
data/lib/active_record/connection_adapters/elasticsearch/unsupported_implementation.rb +0 -7
data/lib/active_record/connection_adapters/elasticsearch_adapter.rb +17 -3
data/lib/arel/collectors/elasticsearch_query.rb +3 -0
data/lib/elasticsearch_record/core.rb +15 -2
data/lib/elasticsearch_record/gem_version.rb +2 -2
data/lib/elasticsearch_record/instrumentation/log_subscriber.rb +7 -3
data/lib/elasticsearch_record/model_api.rb +68 -10
data/lib/elasticsearch_record/model_schema.rb +2 -5
data/lib/elasticsearch_record/persistence.rb +13 -1
data/lib/elasticsearch_record/query.rb +19 -7
data/lib/elasticsearch_record/querying.rb +36 -1
data/lib/elasticsearch_record/relation/calculation_methods.rb +120 -5
data/lib/elasticsearch_record/relation/core_methods.rb +12 -0
data/lib/elasticsearch_record/relation/query_methods.rb +10 -0
data/lib/elasticsearch_record/relation/result_methods.rb +42 -5
data/lib/elasticsearch_record/result.rb +1 -1
data/lib/elasticsearch_record.rb +10 -0
metadata +4 -3

data/lib/elasticsearch_record/model_api.rb CHANGED Viewed

@@ -8,9 +8,6 @@ module ElasticsearchRecord
       @klass = klass
     end
-    # undelegated schema methods: clone rename create
-    # those should not be quick-accessible, since they might end in heavily broken index
     # delegated dangerous methods (created with exclamation mark)
     # not able to provide individual arguments - always the defaults will be used!
     #
@@ -26,6 +23,21 @@ module ElasticsearchRecord
       end
     end
+    # delegated dangerous methods with args
+    #
+    # @example
+    #   create!(:new_table_name, settings: , mappings:, alias: , ...)
+    #   clone!(:new_table_name)
+    #   rename!(:new_table_name)
+    #   backup!(to: :backup_name)
+    #   restore!(from: :backup_name)
+    #   reindex!(:new_table_name)
+    %w(create clone rename backup restore reindex).each do |method|
+      define_method("#{method}!") do |*args|
+        _connection.send("#{method}_table", _index_name, *args)
+      end
+    end
     # delegated dangerous methods with confirm parameter (created with exclamation mark)
     # a exception will be raised, if +confirm:true+ is missing.
     #
@@ -146,12 +158,51 @@ module ElasticsearchRecord
     # Shortcut for meta_exists
     # @return [Boolean]
+    # @!method create!(force: false, copy_from: nil, if_not_exists: false, **options)
+    # Shortcut for create_table
+    # @param [Boolean] force
+    # @param [nil, String] copy_from
+    # @param [Hash] options
+    # @return [Boolean] acknowledged status
+    # @!method clone!(target_name, **options)
+    # Shortcut for clone_table
+    # @param [String] target_name
+    # @param [Hash] options
+    # @return [Boolean]
+    # @!method rename!(target_name, timeout: nil, **options)
+    # Shortcut for rename_table
+    # @param [String] target_name
+    # @param [String (frozen)] timeout
+    # @param [Hash] options
+    # @!method backup!(to: nil, close: true)
+    # Shortcut for backup_table
+    # @param [String] to
+    # @param [Boolean] close
+    # @return [String] backup_name
+    # @!method restore!(from:, timeout: nil, open: true, drop_backup: false)
+    # Shortcut for restore_table
+    # @param [String] from
+    # @param [String (frozen)] timeout
+    # @param [Boolean] open
+    # @return [Boolean] acknowledged status
+    # @!method reindex!(target_name, **options)
+    # Shortcut for reindex_table
+    # @param [String] target_name
+    # @param [Hash] options
+    # @return [Hash] reindex stats
     # fast insert/update data.
+    # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
     #
     # @example
     #   index([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
     #
-    #   index({id: 5, name: 'Georg', age: 87})
+    #   index({_id: 5, name: 'Georg', age: 87})
     #
     # @param [Array<Hash>,Hash] data
     # @param [Hash] options
@@ -160,6 +211,7 @@ module ElasticsearchRecord
     end
     # fast insert new data.
+    # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
     #
     # @example
     #   insert([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
@@ -173,11 +225,12 @@ module ElasticsearchRecord
     end
     # fast update existing data.
+    # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
     #
     # @example
-    #   update([{id: 1, name: 'Hansi'}, {id: 2, name: 'Peter Parker', age: 42}])
+    #   update([{_id: 1, name: 'Hansi'}, {_id: 2, name: 'Peter Parker', age: 42}])
     #
-    #   update({id: 3, name: 'Georg McCain'})
+    #   update({_id: 3, name: 'Georg McCain'})
     #
     # @param [Array<Hash>,Hash] data
     # @param [Hash] options
@@ -186,13 +239,14 @@ module ElasticsearchRecord
     end
     # fast delete data.
+    # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
     #
     # @example
     #   delete([1,2,3,5])
     #
     #   delete(3)
     #
-    #   delete({id: 2})
+    #   delete({_id: 2})
     #
     # @param [Array<Hash>,Hash] data
     # @param [Hash] options
@@ -202,12 +256,12 @@ module ElasticsearchRecord
       if data[0].is_a?(Hash)
         bulk(data, :delete, **options)
       else
-        bulk(data.map { |id| { id: id } }, :delete, **options)
+        bulk(data.map { |id| { _id: id } }, :delete, **options)
       end
     end
     # bulk handle provided data (single Hash or multiple Array<Hash>).
-    # @param [Hash,Array<Hash>] data - the data to insert/update/delete ...
+    # @param [Hash,Array<Hash<Symbol=>Object>>] data - the data to insert/update/delete ...
     # @param [Symbol] operation
     # @param [Boolean, Symbol] refresh
     def bulk(data, operation = :index, refresh: true, **options)
@@ -215,7 +269,11 @@ module ElasticsearchRecord
       _connection.api(:core, :bulk, {
         index:   _index_name,
-        body:    data.map { |item| { operation => { _id: item[:id], data: item.except(:id) } } },
+        body:    if operation == :update
+                   data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: { doc: item.except(:_id, '_id') } } } }
+                 else
+                   data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: item.except(:_id, '_id') } } }
+                 end,
         refresh: refresh
       }, "BULK #{operation.to_s.upcase}", **options)
     end

data/lib/elasticsearch_record/model_schema.rb CHANGED Viewed

@@ -52,11 +52,8 @@ module ElasticsearchRecord
       # @return [Array<String>]
       def searchable_column_names
         @searchable_column_names ||= columns.select(&:enabled?).reduce([]) { |m, column|
-          m << column.name
-          m += column.field_names
-          m += column.property_names
-          m.uniq
-        }
+          m + [column.name] + column.field_names + column.property_names
+        }.uniq
       end
       # clears schema-related instance variables.

data/lib/elasticsearch_record/persistence.rb CHANGED Viewed

@@ -11,7 +11,7 @@ module ElasticsearchRecord
         # values is not a "key=>values"-Hash, but a +ActiveModel::Attribute+ - so the casted values gets resolved here
         values = values.transform_values(&:value)
-        # resolve & update a auto_increment value
+        # resolve & update a auto_increment value, if configured
         _insert_with_auto_increment(values) do |arguments|
           # build new query
           query = ElasticsearchRecord::Query.new(
@@ -68,6 +68,9 @@ module ElasticsearchRecord
         if (id = values[self.primary_key]).present?
           yield({id: id})
         elsif auto_increment?
+          # future increments: uuid (+uuidv6 ?), hex, radix(2-36), integer
+          # allocated through: primary_key_type
           ids = [
             # try to resolve the current-auto-increment value from the tables meta
             connection.table_metas(self.table_name).dig('auto_increment').to_i + 1,
@@ -88,5 +91,14 @@ module ElasticsearchRecord
         end
       end
     end
+    # overwrite to provide a Elasticsearch version:
+    # Creates a record with values matching those of the instance attributes
+    # and returns its id.
+    def _create_record(*args)
+      undelegate_id_attribute_with do
+        super
+      end
+    end
   end
 end

data/lib/elasticsearch_record/query.rb CHANGED Viewed

@@ -12,6 +12,7 @@ module ElasticsearchRecord
     TYPE_SEARCH  = :search
     TYPE_MSEARCH = :msearch
     TYPE_SQL     = :sql
+    TYPE_ESQL    = :esql
     # -- DOCUMENT TYPES ------------------------------------------------------------------------------------------------
     TYPE_CREATE          = :create
@@ -34,7 +35,7 @@ module ElasticsearchRecord
     # includes valid types only
     TYPES = [
       # -- QUERY TYPES
-      TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL,
+      TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL,
       # -- DOCUMENT TYPES
       TYPE_CREATE, TYPE_UPDATE, TYPE_UPDATE_BY_QUERY, TYPE_DELETE, TYPE_DELETE_BY_QUERY,
@@ -46,7 +47,7 @@ module ElasticsearchRecord
     # includes reading types only
     READ_TYPES = [
-      TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL
+      TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL
     ].freeze
     # defines a body to be executed if the query fails - +(none)+
@@ -60,6 +61,7 @@ module ElasticsearchRecord
     # if no special type is defined, it simply uses +[:core,self.type]+
     GATES = {
       TYPE_SQL                  => [:sql, :query],
+      TYPE_ESQL                 => [:esql, :query],
       TYPE_INDEX_CREATE         => [:indices, :create],
       TYPE_INDEX_CLONE          => [:indices, :clone],
       TYPE_INDEX_UPDATE_MAPPING => [:indices, :put_mapping],
@@ -86,9 +88,9 @@ module ElasticsearchRecord
     # @!attribute Boolean
     attr_reader :refresh
-    # defines the query body - in most cases this is a hash
-    # @!attribute Hash
-    # attr_reader :body
+    # defines the query timeout
+    # @!attribute Integer|String
+    attr_reader :timeout
     # defines the query arguments to be passed to the API
     # @!attribute Hash
@@ -98,11 +100,12 @@ module ElasticsearchRecord
     # @!attribute Array
     attr_reader :columns
-    def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, arguments: {}, columns: [])
+    def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, timeout: nil, arguments: {}, columns: [])
       @index     = index
       @type      = type
       @status    = status
       @refresh   = refresh
+      @timeout   = timeout
       @body      = body
       @arguments = arguments
       @columns   = columns
@@ -117,6 +120,12 @@ module ElasticsearchRecord
       self
     end
+    # returns true, if the query failed
+    # @return [Boolean]
+    def failed?
+      self.status == STATUS_FAILED
+    end
     # returns true, if the query is valid (e.g. index & type defined)
     # @return [Boolean]
     def valid?
@@ -142,7 +151,7 @@ module ElasticsearchRecord
     # failed queried will return the related +FAILED_BODIES+ or +{}+ as fallback
     # @return [Hash, nil]
     def body
-      return (FAILED_BODIES[self.type].presence || {}) if self.status == STATUS_FAILED
+      return (FAILED_BODIES[self.type].presence || {}) if failed?
       @body
     end
@@ -163,6 +172,9 @@ module ElasticsearchRecord
       # set refresh, if defined (also includes false value)
       args[:refresh] = self.refresh unless self.refresh.nil?
+      # set timeout, if present
+      args[:timeout] = self.timeout if self.timeout.present?
       args
     end

data/lib/elasticsearch_record/querying.rb CHANGED Viewed

@@ -77,7 +77,42 @@ module ElasticsearchRecord
         _load_from_sql(_query_by_sql(query), &block)
       end
-      # executes a msearch by provided +RAW+ queries
+      # ES|QL query API
+      # Returns search results for an ES|QL (Elasticsearch query language) query.
+      #
+      # @param [String] esql
+      # @param [Proc] block
+      def find_by_esql(esql, &block)
+        # build new query
+        query = ElasticsearchRecord::Query.new(
+          type: ElasticsearchRecord::Query::TYPE_ESQL,
+          body: { query: esql },
+          # IMPORTANT: Always provide all columns
+          columns: source_column_names)
+        _load_from_sql(_query_by_sql(query), &block)
+      end
+      # executes a +esql+ by provided *ES|SL* query
+      # Does NOT instantiate records.
+      # @param [String] esql
+      # @param [Boolean] async (default: false)
+      def esql(esql, async: false)
+        # build new query
+        query = ElasticsearchRecord::Query.new(
+          type: ElasticsearchRecord::Query::TYPE_ESQL,
+          body: { query: esql },
+          # IMPORTANT: Always provide all columns
+          columns: source_column_names)
+        connection.exec_query(query, "#{name} ES|QL", async: async)
+      end
+      # executes a +msearch+ by provided *RAW* queries.
+      # Does NOT instantiate records.
+      # @param [Array<String>] queries
+      # @param [Boolean] async (default: false)
       def msearch(queries, async: false)
         # build new msearch query
         query = ElasticsearchRecord::Query.new(

data/lib/elasticsearch_record/relation/calculation_methods.rb CHANGED Viewed

@@ -45,6 +45,84 @@ module ElasticsearchRecord
         end
       end
+      # A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
+      # These values can be generated from specific numeric or histogram fields in the documents.
+      #
+      # The boxplot aggregation returns essential information for making a box plot:
+      # *minimum*, *maximum*, *median*, *first quartile* (25th percentile) and *third quartile* (75th percentile) values.
+      #
+      #   Person.all.boxplot(:age)
+      #   > {
+      #       "min": 0.0,
+      #       "max": 990.0,
+      #       "q1": 167.5,
+      #       "q2": 445.0,
+      #       "q3": 722.5,
+      #       "lower": 0.0,
+      #       "upper": 990.0
+      #     }
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html
+      #
+      # @param [Symbol, String] column_name
+      def boxplot(column_name)
+        calculate(:boxplot, column_name)
+      end
+      # A multi-value metrics aggregation that computes stats over numeric values extracted from the aggregated documents.      #
+      # The stats that are returned consist of: *min*, *max*, *sum*, *count* and *avg*.
+      #
+      #   Person.all.stats(:age)
+      #   > {
+      #       "count": 10,
+      #       "min": 0.0,
+      #       "max": 990.0,
+      #       "sum": 16859,
+      #       "avg": 75.5
+      #     }
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html
+      #
+      # @param [Symbol, String] column_name
+      def stats(column_name)
+        calculate(:stats, column_name)
+      end
+      # A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
+      # These values can be retrieved either from specific keyword fields.
+      #
+      #   Person.all.string_stats(:name)
+      #   > {
+      #       "count": 5,
+      #       "min_length": 24,
+      #       "max_length": 30,
+      #       "avg_length": 28.8,
+      #       "entropy": 3.94617750050791
+      #     }
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-string-stats-aggregation.html
+      #
+      # @param [Symbol, String] column_name
+      def string_stats(column_name)
+        calculate(:string_stats, column_name)
+      end
+      # The matrix_stats aggregation is a numeric aggregation that computes the following statistics over a set of document fields:
+      # *count*        Number of per field samples included in the calculation.
+      # *mean*        The average value for each field.
+      # *variance*    Per field Measurement for how spread out the samples are from the mean.
+      # *skewness*    Per field measurement quantifying the asymmetric distribution around the mean.
+      # *kurtosis*    Per field measurement quantifying the shape of the distribution.
+      # *covariance*  A matrix that quantitatively describes how changes in one field are associated with another.
+      # *correlation* The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field distributions.
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-matrix-stats-aggregation.html
+      #
+      # @param [Array<Symbol|String>] column_names
+      def matrix_stats(*column_names)
+        calculate(:matrix_stats, *column_names)
+      end
       # A multi-value metrics aggregation that calculates one or more
       # percentiles over numeric values extracted from the aggregated documents.
       # Returns a hash with empty values (but keys still exists) if there is no row.
@@ -59,6 +137,9 @@ module ElasticsearchRecord
       #     "95.0" => 2021.0,
       #     "99.0" => 2022.0
       #     }
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html
+      #
       # @param [Symbol, String] column_name
       def percentiles(column_name)
         calculate(:percentiles, column_name, node: :values)
@@ -81,6 +162,9 @@ module ElasticsearchRecord
       #     "95.0" => 2021.0,
       #     "99.0" => 2022.0
       #     }
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html
+      #
       # @param [Symbol, String] column_name
       # @param [Array] values
       def percentile_ranks(column_name, values)
@@ -92,6 +176,8 @@ module ElasticsearchRecord
       #   Person.all.cardinality(:age)
       #   > 12
       #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html
+      #
       # @param [Symbol, String] column_name
       def cardinality(column_name)
         calculate(:cardinality, column_name)
@@ -101,6 +187,8 @@ module ElasticsearchRecord
       #
       #   Person.all.average(:age) # => 35.8
       #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-avg-aggregation.html
+      #
       # @param [Symbol, String] column_name
       def average(column_name)
         calculate(:avg, column_name)
@@ -112,6 +200,8 @@ module ElasticsearchRecord
       #   Person.all.minimum(:age)
       #   > 7
       #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html
+      #
       # @param [Symbol, String] column_name
       def minimum(column_name)
         calculate(:min, column_name)
@@ -123,33 +213,58 @@ module ElasticsearchRecord
       #
       #   Person.all.maximum(:age) # => 93
       #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html
+      #
       # @param [Symbol, String] column_name
       def maximum(column_name)
         calculate(:max, column_name)
       end
+      # This single-value aggregation approximates the median absolute deviation of its search results.
+      # Median absolute deviation is a measure of variability. It is a robust statistic,
+      # meaning that it is useful for describing data that may have outliers, or may not be normally distributed.
+      # For such data it can be more descriptive than standard deviation.
+      #
+      # It is calculated as the median of each data point’s deviation from the median of the entire sample.
+      # That is, for a random variable X, the median absolute deviation is median(|median(X) - Xi|).
+      #
+      #   Person.all.median_absolute_deviation(:age) # => 91
+      #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-median-absolute-deviation-aggregation.html
+      #
+      # @param [Symbol, String] column_name
+      def median_absolute_deviation(column_name)
+        calculate(:median_absolute_deviation, column_name)
+      end
       # Calculates the sum of values on a given column. The value is returned
       # with the same data type of the column, +0+ if there's no row. See
       # #calculate for examples with options.
       #
       #   Person.all.sum(:age) # => 4562
       #
+      # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-sum-aggregation.html
+      #
       # @param [Symbol, String] column_name (optional)
       def sum(column_name)
         calculate(:sum, column_name)
       end
-      # creates a aggregation with the provided metric (e.g. :sum) and column.
+      # creates a aggregation with the provided metric (e.g. :sum) and columns.
       # returns the metric node (default: :value) from the aggregations result.
       # @param [Symbol, String] metric
-      # @param [Symbol, String] column
+      # @param [Array<Symbol|String>] columns
       # @param [Hash] opts - additional arguments that get merged with the metric definition
       # @param [Symbol] node (default :value)
-      def calculate(metric, column, opts: {}, node: :value)
-        metric_key = "#{column}_#{metric}"
+      def calculate(metric, *columns, opts: {}, node: :value)
+        metric_key = "calculate_#{metric}"
         # spawn a new aggregation and return the aggs
-        response = aggregate(metric_key, { metric => { field: column }.merge(opts) }).aggregations
+        response = if columns.size == 1
+                     aggregate(metric_key, { metric => { field: columns[0] }.merge(opts) }).aggregations
+                   else
+                     aggregate(metric_key, { metric => { fields: columns }.merge(opts) }).aggregations
+                   end
         response[metric_key][node]
       end

data/lib/elasticsearch_record/relation/core_methods.rb CHANGED Viewed

@@ -125,6 +125,18 @@ module ElasticsearchRecord
           self
         end
       end
+      # overwrite original methods to provide a elasticsearch version:
+      # checks against the +#access_id_fielddata?+ to ensure the Elasticsearch Cluster allows access on the +_id+ field.
+      def reverse_sql_order(order_query)
+        if order_query.empty?
+          return [table[primary_key].desc] if primary_key != '_id' || klass.connection.access_id_fielddata?
+          raise ActiveRecord::IrreversibleOrderError,
+                "Relation has no current order and fielddata access on the _id field is disallowed! However, you can re-enable it by updating the dynamic cluster setting: indices.id_field_data.enabled"
+        end
+        super
+      end
     end
   end
 end

data/lib/elasticsearch_record/relation/query_methods.rb CHANGED Viewed

@@ -102,6 +102,16 @@ module ElasticsearchRecord
         configure!(:__query__, refresh: value)
       end
+      # sets the query's +timeout+ value.
+      # @param [Boolean] value (default: true)
+      def timeout(value = true)
+        spawn.timeout!(value)
+      end
+      def timeout!(value = true)
+        configure!(:__query__, timeout: value)
+      end
       # add a whole query 'node' to the query.
       # @example
       #   query(:bool, {filter: ...})

data/lib/elasticsearch_record/relation/result_methods.rb CHANGED Viewed

@@ -90,7 +90,9 @@ module ElasticsearchRecord
       #
       # @param [String] keep_alive - how long to keep alive (for each single request) - default: '1m'
       # @param [Integer] batch_size - how many results per query (default: 1000 - this means at least 10 queries before reaching the +max_result_window+)
-      def pit_results(keep_alive: '1m', batch_size: 1000)
+      # @param [Boolean] ids_only - resolve ids only from results
+      # @return [Integer, Array] either returns the results-array (no block provided) or the total amount of results
+      def pit_results(keep_alive: '1m', batch_size: 1000, ids_only: false)
         raise(ArgumentError, "Batch size cannot be above the 'max_result_window' (#{klass.max_result_window}) !") if batch_size > klass.max_result_window
         # check if limit or offset values where provided
@@ -105,6 +107,9 @@ module ElasticsearchRecord
         # see @ https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html
         relation.order!(_shard_doc: :asc) if relation.order_values.empty? && klass.connection.access_shard_doc?
+        # resolve ids only
+        relation.reselect!('_id') if ids_only
         # clear limit & offset
         relation.offset!(nil).limit!(nil)
@@ -122,10 +127,16 @@ module ElasticsearchRecord
           # resolve new data until we got all we need
           loop do
             # change pit settings & limit (spawn is required, since a +resolve+ will make the relation immutable)
-            current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit').response
+            current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit Results').response
             # resolve only data from hits->hits[{_source}]
-            current_results        = current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
+            current_results        = if ids_only
+                                       current_response['hits']['hits'].map { |result| result['_id'] }
+                                       # future with helper
+                                       # current_response['hits']['hits'].map.from_hash('_id')
+                                     else
+                                       current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
+                                     end
             current_results_length = current_results.length
             # check if we reached the required offset
@@ -171,12 +182,38 @@ module ElasticsearchRecord
           end
         end
-        # return results array
-        results
+        # return results array or total value
+        if block_given?
+          results_total
+        else
+          results
+        end
       end
       alias_method :total_results, :pit_results
+      # executes a delete query in a +point_in_time+ scope.
+      # this will provide the possibility to delete more than the +max_result_window+ (default: 10000) docs in a batched process.
+      # @param [String] keep_alive
+      # @param [Integer] batch_size
+      # @param [Boolean] refresh index after delete finished (default: true)
+      # @return [Integer] total amount of deleted docs
+      def pit_delete(keep_alive: '1m', batch_size: 1000, refresh: true)
+        delete_count = select('_id').pit_results(keep_alive: keep_alive, batch_size: batch_size, ids_only: true) do |ids|
+          # skip empty results
+          next unless ids.any?
+          # delete all IDs, but do not refresh index, yet
+          klass.connection.api(:core, :bulk, { index: klass.table_name, body: ids.map { |id| { delete: { _id: id } } }, refresh: false }, "#{klass} Pit Delete")
+        end
+        # refresh index
+        klass.connection.refresh_table(klass.table_name) if refresh
+        # return total count
+        delete_count
+      end
       # returns the RAW response for the current query
       # @return [Array]
       def response

data/lib/elasticsearch_record/result.rb CHANGED Viewed

@@ -49,7 +49,7 @@ module ElasticsearchRecord
     end
     # Returns the RAW +_source+ data from each hit - aka. +rows+.
-    # PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_score' is not included)
+    # PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_id' or '_score' are not included)
     # @return [Array]
     def results
       return [] unless response['hits']

data/lib/elasticsearch_record.rb CHANGED Viewed

@@ -55,6 +55,16 @@ module ElasticsearchRecord
     autoload :ElasticsearchDatabaseTasks, 'elasticsearch_record/tasks/elasticsearch_database_tasks'
   end
+  ##
+  # :singleton-method:
+  # Specifies if a exception should be raised while using transactions.
+  # Since ActiveRecord does not have any configuration option to support transactions and
+  # Elasticsearch does **NOT** support transactions, it may be risky to ignore them.
+  # As default, transactional are 'silently swallowed' to not break any existing applications...
+  # However enabling this flag will surely fail transactional tests ...
+  singleton_class.attr_accessor :error_on_transaction
+  self.error_on_transaction = false
 end
 ActiveSupport.on_load(:active_record) do