elasticsearch_record 1.5.3 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,9 +8,6 @@ module ElasticsearchRecord
8
8
  @klass = klass
9
9
  end
10
10
 
11
- # undelegated schema methods: clone rename create
12
- # those should not be quick-accessible, since they might end in heavily broken index
13
-
14
11
  # delegated dangerous methods (created with exclamation mark)
15
12
  # not able to provide individual arguments - always the defaults will be used!
16
13
  #
@@ -26,6 +23,21 @@ module ElasticsearchRecord
26
23
  end
27
24
  end
28
25
 
26
+ # delegated dangerous methods with args
27
+ #
28
+ # @example
29
+ # create!(:new_table_name, settings: , mappings:, alias: , ...)
30
+ # clone!(:new_table_name)
31
+ # rename!(:new_table_name)
32
+ # backup!(to: :backup_name)
33
+ # restore!(from: :backup_name)
34
+ # reindex!(:new_table_name)
35
+ %w(create clone rename backup restore reindex).each do |method|
36
+ define_method("#{method}!") do |*args|
37
+ _connection.send("#{method}_table", _index_name, *args)
38
+ end
39
+ end
40
+
29
41
  # delegated dangerous methods with confirm parameter (created with exclamation mark)
30
42
  # a exception will be raised, if +confirm:true+ is missing.
31
43
  #
@@ -146,12 +158,51 @@ module ElasticsearchRecord
146
158
  # Shortcut for meta_exists
147
159
  # @return [Boolean]
148
160
 
161
+ # @!method create!(force: false, copy_from: nil, if_not_exists: false, **options)
162
+ # Shortcut for create_table
163
+ # @param [Boolean] force
164
+ # @param [nil, String] copy_from
165
+ # @param [Hash] options
166
+ # @return [Boolean] acknowledged status
167
+
168
+ # @!method clone!(target_name, **options)
169
+ # Shortcut for clone_table
170
+ # @param [String] target_name
171
+ # @param [Hash] options
172
+ # @return [Boolean]
173
+
174
+ # @!method rename!(target_name, timeout: nil, **options)
175
+ # Shortcut for rename_table
176
+ # @param [String] target_name
177
+ # @param [String (frozen)] timeout
178
+ # @param [Hash] options
179
+
180
+ # @!method backup!(to: nil, close: true)
181
+ # Shortcut for backup_table
182
+ # @param [String] to
183
+ # @param [Boolean] close
184
+ # @return [String] backup_name
185
+
186
+ # @!method restore!(from:, timeout: nil, open: true, drop_backup: false)
187
+ # Shortcut for restore_table
188
+ # @param [String] from
189
+ # @param [String (frozen)] timeout
190
+ # @param [Boolean] open
191
+ # @return [Boolean] acknowledged status
192
+
193
+ # @!method reindex!(target_name, **options)
194
+ # Shortcut for reindex_table
195
+ # @param [String] target_name
196
+ # @param [Hash] options
197
+ # @return [Hash] reindex stats
198
+
149
199
  # fast insert/update data.
200
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
150
201
  #
151
202
  # @example
152
203
  # index([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
153
204
  #
154
- # index({id: 5, name: 'Georg', age: 87})
205
+ # index({_id: 5, name: 'Georg', age: 87})
155
206
  #
156
207
  # @param [Array<Hash>,Hash] data
157
208
  # @param [Hash] options
@@ -160,6 +211,7 @@ module ElasticsearchRecord
160
211
  end
161
212
 
162
213
  # fast insert new data.
214
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
163
215
  #
164
216
  # @example
165
217
  # insert([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
@@ -173,11 +225,12 @@ module ElasticsearchRecord
173
225
  end
174
226
 
175
227
  # fast update existing data.
228
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
176
229
  #
177
230
  # @example
178
- # update([{id: 1, name: 'Hansi'}, {id: 2, name: 'Peter Parker', age: 42}])
231
+ # update([{_id: 1, name: 'Hansi'}, {_id: 2, name: 'Peter Parker', age: 42}])
179
232
  #
180
- # update({id: 3, name: 'Georg McCain'})
233
+ # update({_id: 3, name: 'Georg McCain'})
181
234
  #
182
235
  # @param [Array<Hash>,Hash] data
183
236
  # @param [Hash] options
@@ -186,13 +239,14 @@ module ElasticsearchRecord
186
239
  end
187
240
 
188
241
  # fast delete data.
242
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
189
243
  #
190
244
  # @example
191
245
  # delete([1,2,3,5])
192
246
  #
193
247
  # delete(3)
194
248
  #
195
- # delete({id: 2})
249
+ # delete({_id: 2})
196
250
  #
197
251
  # @param [Array<Hash>,Hash] data
198
252
  # @param [Hash] options
@@ -202,12 +256,12 @@ module ElasticsearchRecord
202
256
  if data[0].is_a?(Hash)
203
257
  bulk(data, :delete, **options)
204
258
  else
205
- bulk(data.map { |id| { id: id } }, :delete, **options)
259
+ bulk(data.map { |id| { _id: id } }, :delete, **options)
206
260
  end
207
261
  end
208
262
 
209
263
  # bulk handle provided data (single Hash or multiple Array<Hash>).
210
- # @param [Hash,Array<Hash>] data - the data to insert/update/delete ...
264
+ # @param [Hash,Array<Hash<Symbol=>Object>>] data - the data to insert/update/delete ...
211
265
  # @param [Symbol] operation
212
266
  # @param [Boolean, Symbol] refresh
213
267
  def bulk(data, operation = :index, refresh: true, **options)
@@ -215,7 +269,11 @@ module ElasticsearchRecord
215
269
 
216
270
  _connection.api(:core, :bulk, {
217
271
  index: _index_name,
218
- body: data.map { |item| { operation => { _id: item[:id], data: item.except(:id) } } },
272
+ body: if operation == :update
273
+ data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: { doc: item.except(:_id, '_id') } } } }
274
+ else
275
+ data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: item.except(:_id, '_id') } } }
276
+ end,
219
277
  refresh: refresh
220
278
  }, "BULK #{operation.to_s.upcase}", **options)
221
279
  end
@@ -52,11 +52,8 @@ module ElasticsearchRecord
52
52
  # @return [Array<String>]
53
53
  def searchable_column_names
54
54
  @searchable_column_names ||= columns.select(&:enabled?).reduce([]) { |m, column|
55
- m << column.name
56
- m += column.field_names
57
- m += column.property_names
58
- m.uniq
59
- }
55
+ m + [column.name] + column.field_names + column.property_names
56
+ }.uniq
60
57
  end
61
58
 
62
59
  # clears schema-related instance variables.
@@ -11,7 +11,7 @@ module ElasticsearchRecord
11
11
  # values is not a "key=>values"-Hash, but a +ActiveModel::Attribute+ - so the casted values gets resolved here
12
12
  values = values.transform_values(&:value)
13
13
 
14
- # resolve & update a auto_increment value
14
+ # resolve & update a auto_increment value, if configured
15
15
  _insert_with_auto_increment(values) do |arguments|
16
16
  # build new query
17
17
  query = ElasticsearchRecord::Query.new(
@@ -68,6 +68,9 @@ module ElasticsearchRecord
68
68
  if (id = values[self.primary_key]).present?
69
69
  yield({id: id})
70
70
  elsif auto_increment?
71
+ # future increments: uuid (+uuidv6 ?), hex, radix(2-36), integer
72
+ # allocated through: primary_key_type
73
+
71
74
  ids = [
72
75
  # try to resolve the current-auto-increment value from the tables meta
73
76
  connection.table_metas(self.table_name).dig('auto_increment').to_i + 1,
@@ -88,5 +91,14 @@ module ElasticsearchRecord
88
91
  end
89
92
  end
90
93
  end
94
+
95
+ # overwrite to provide a Elasticsearch version:
96
+ # Creates a record with values matching those of the instance attributes
97
+ # and returns its id.
98
+ def _create_record(*args)
99
+ undelegate_id_attribute_with do
100
+ super
101
+ end
102
+ end
91
103
  end
92
104
  end
@@ -12,6 +12,7 @@ module ElasticsearchRecord
12
12
  TYPE_SEARCH = :search
13
13
  TYPE_MSEARCH = :msearch
14
14
  TYPE_SQL = :sql
15
+ TYPE_ESQL = :esql
15
16
 
16
17
  # -- DOCUMENT TYPES ------------------------------------------------------------------------------------------------
17
18
  TYPE_CREATE = :create
@@ -34,7 +35,7 @@ module ElasticsearchRecord
34
35
  # includes valid types only
35
36
  TYPES = [
36
37
  # -- QUERY TYPES
37
- TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL,
38
+ TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL,
38
39
  # -- DOCUMENT TYPES
39
40
  TYPE_CREATE, TYPE_UPDATE, TYPE_UPDATE_BY_QUERY, TYPE_DELETE, TYPE_DELETE_BY_QUERY,
40
41
 
@@ -46,7 +47,7 @@ module ElasticsearchRecord
46
47
 
47
48
  # includes reading types only
48
49
  READ_TYPES = [
49
- TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL
50
+ TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL
50
51
  ].freeze
51
52
 
52
53
  # defines a body to be executed if the query fails - +(none)+
@@ -60,6 +61,7 @@ module ElasticsearchRecord
60
61
  # if no special type is defined, it simply uses +[:core,self.type]+
61
62
  GATES = {
62
63
  TYPE_SQL => [:sql, :query],
64
+ TYPE_ESQL => [:esql, :query],
63
65
  TYPE_INDEX_CREATE => [:indices, :create],
64
66
  TYPE_INDEX_CLONE => [:indices, :clone],
65
67
  TYPE_INDEX_UPDATE_MAPPING => [:indices, :put_mapping],
@@ -86,9 +88,9 @@ module ElasticsearchRecord
86
88
  # @!attribute Boolean
87
89
  attr_reader :refresh
88
90
 
89
- # defines the query body - in most cases this is a hash
90
- # @!attribute Hash
91
- # attr_reader :body
91
+ # defines the query timeout
92
+ # @!attribute Integer|String
93
+ attr_reader :timeout
92
94
 
93
95
  # defines the query arguments to be passed to the API
94
96
  # @!attribute Hash
@@ -98,11 +100,12 @@ module ElasticsearchRecord
98
100
  # @!attribute Array
99
101
  attr_reader :columns
100
102
 
101
- def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, arguments: {}, columns: [])
103
+ def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, timeout: nil, arguments: {}, columns: [])
102
104
  @index = index
103
105
  @type = type
104
106
  @status = status
105
107
  @refresh = refresh
108
+ @timeout = timeout
106
109
  @body = body
107
110
  @arguments = arguments
108
111
  @columns = columns
@@ -117,6 +120,12 @@ module ElasticsearchRecord
117
120
  self
118
121
  end
119
122
 
123
+ # returns true, if the query failed
124
+ # @return [Boolean]
125
+ def failed?
126
+ self.status == STATUS_FAILED
127
+ end
128
+
120
129
  # returns true, if the query is valid (e.g. index & type defined)
121
130
  # @return [Boolean]
122
131
  def valid?
@@ -142,7 +151,7 @@ module ElasticsearchRecord
142
151
  # failed queried will return the related +FAILED_BODIES+ or +{}+ as fallback
143
152
  # @return [Hash, nil]
144
153
  def body
145
- return (FAILED_BODIES[self.type].presence || {}) if self.status == STATUS_FAILED
154
+ return (FAILED_BODIES[self.type].presence || {}) if failed?
146
155
 
147
156
  @body
148
157
  end
@@ -163,6 +172,9 @@ module ElasticsearchRecord
163
172
  # set refresh, if defined (also includes false value)
164
173
  args[:refresh] = self.refresh unless self.refresh.nil?
165
174
 
175
+ # set timeout, if present
176
+ args[:timeout] = self.timeout if self.timeout.present?
177
+
166
178
  args
167
179
  end
168
180
 
@@ -77,7 +77,42 @@ module ElasticsearchRecord
77
77
  _load_from_sql(_query_by_sql(query), &block)
78
78
  end
79
79
 
80
- # executes a msearch by provided +RAW+ queries
80
+ # ES|QL query API
81
+ # Returns search results for an ES|QL (Elasticsearch query language) query.
82
+ #
83
+ # @param [String] esql
84
+ # @param [Proc] block
85
+ def find_by_esql(esql, &block)
86
+ # build new query
87
+ query = ElasticsearchRecord::Query.new(
88
+ type: ElasticsearchRecord::Query::TYPE_ESQL,
89
+ body: { query: esql },
90
+ # IMPORTANT: Always provide all columns
91
+ columns: source_column_names)
92
+
93
+ _load_from_sql(_query_by_sql(query), &block)
94
+ end
95
+
96
+ # executes a +esql+ by provided *ES|SL* query
97
+ # Does NOT instantiate records.
98
+ # @param [String] esql
99
+ # @param [Boolean] async (default: false)
100
+ def esql(esql, async: false)
101
+ # build new query
102
+ query = ElasticsearchRecord::Query.new(
103
+ type: ElasticsearchRecord::Query::TYPE_ESQL,
104
+ body: { query: esql },
105
+ # IMPORTANT: Always provide all columns
106
+ columns: source_column_names)
107
+
108
+ connection.exec_query(query, "#{name} ES|QL", async: async)
109
+ end
110
+
111
+
112
+ # executes a +msearch+ by provided *RAW* queries.
113
+ # Does NOT instantiate records.
114
+ # @param [Array<String>] queries
115
+ # @param [Boolean] async (default: false)
81
116
  def msearch(queries, async: false)
82
117
  # build new msearch query
83
118
  query = ElasticsearchRecord::Query.new(
@@ -45,6 +45,84 @@ module ElasticsearchRecord
45
45
  end
46
46
  end
47
47
 
48
+ # A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
49
+ # These values can be generated from specific numeric or histogram fields in the documents.
50
+ #
51
+ # The boxplot aggregation returns essential information for making a box plot:
52
+ # *minimum*, *maximum*, *median*, *first quartile* (25th percentile) and *third quartile* (75th percentile) values.
53
+ #
54
+ # Person.all.boxplot(:age)
55
+ # > {
56
+ # "min": 0.0,
57
+ # "max": 990.0,
58
+ # "q1": 167.5,
59
+ # "q2": 445.0,
60
+ # "q3": 722.5,
61
+ # "lower": 0.0,
62
+ # "upper": 990.0
63
+ # }
64
+ #
65
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html
66
+ #
67
+ # @param [Symbol, String] column_name
68
+ def boxplot(column_name)
69
+ calculate(:boxplot, column_name)
70
+ end
71
+
72
+ # A multi-value metrics aggregation that computes stats over numeric values extracted from the aggregated documents. #
73
+ # The stats that are returned consist of: *min*, *max*, *sum*, *count* and *avg*.
74
+ #
75
+ # Person.all.stats(:age)
76
+ # > {
77
+ # "count": 10,
78
+ # "min": 0.0,
79
+ # "max": 990.0,
80
+ # "sum": 16859,
81
+ # "avg": 75.5
82
+ # }
83
+ #
84
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html
85
+ #
86
+ # @param [Symbol, String] column_name
87
+ def stats(column_name)
88
+ calculate(:stats, column_name)
89
+ end
90
+
91
+ # A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
92
+ # These values can be retrieved either from specific keyword fields.
93
+ #
94
+ # Person.all.string_stats(:name)
95
+ # > {
96
+ # "count": 5,
97
+ # "min_length": 24,
98
+ # "max_length": 30,
99
+ # "avg_length": 28.8,
100
+ # "entropy": 3.94617750050791
101
+ # }
102
+ #
103
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-string-stats-aggregation.html
104
+ #
105
+ # @param [Symbol, String] column_name
106
+ def string_stats(column_name)
107
+ calculate(:string_stats, column_name)
108
+ end
109
+
110
+ # The matrix_stats aggregation is a numeric aggregation that computes the following statistics over a set of document fields:
111
+ # *count* Number of per field samples included in the calculation.
112
+ # *mean* The average value for each field.
113
+ # *variance* Per field Measurement for how spread out the samples are from the mean.
114
+ # *skewness* Per field measurement quantifying the asymmetric distribution around the mean.
115
+ # *kurtosis* Per field measurement quantifying the shape of the distribution.
116
+ # *covariance* A matrix that quantitatively describes how changes in one field are associated with another.
117
+ # *correlation* The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field distributions.
118
+ #
119
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-matrix-stats-aggregation.html
120
+ #
121
+ # @param [Array<Symbol|String>] column_names
122
+ def matrix_stats(*column_names)
123
+ calculate(:matrix_stats, *column_names)
124
+ end
125
+
48
126
  # A multi-value metrics aggregation that calculates one or more
49
127
  # percentiles over numeric values extracted from the aggregated documents.
50
128
  # Returns a hash with empty values (but keys still exists) if there is no row.
@@ -59,6 +137,9 @@ module ElasticsearchRecord
59
137
  # "95.0" => 2021.0,
60
138
  # "99.0" => 2022.0
61
139
  # }
140
+ #
141
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html
142
+ #
62
143
  # @param [Symbol, String] column_name
63
144
  def percentiles(column_name)
64
145
  calculate(:percentiles, column_name, node: :values)
@@ -81,6 +162,9 @@ module ElasticsearchRecord
81
162
  # "95.0" => 2021.0,
82
163
  # "99.0" => 2022.0
83
164
  # }
165
+ #
166
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html
167
+ #
84
168
  # @param [Symbol, String] column_name
85
169
  # @param [Array] values
86
170
  def percentile_ranks(column_name, values)
@@ -92,6 +176,8 @@ module ElasticsearchRecord
92
176
  # Person.all.cardinality(:age)
93
177
  # > 12
94
178
  #
179
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html
180
+ #
95
181
  # @param [Symbol, String] column_name
96
182
  def cardinality(column_name)
97
183
  calculate(:cardinality, column_name)
@@ -101,6 +187,8 @@ module ElasticsearchRecord
101
187
  #
102
188
  # Person.all.average(:age) # => 35.8
103
189
  #
190
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-avg-aggregation.html
191
+ #
104
192
  # @param [Symbol, String] column_name
105
193
  def average(column_name)
106
194
  calculate(:avg, column_name)
@@ -112,6 +200,8 @@ module ElasticsearchRecord
112
200
  # Person.all.minimum(:age)
113
201
  # > 7
114
202
  #
203
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html
204
+ #
115
205
  # @param [Symbol, String] column_name
116
206
  def minimum(column_name)
117
207
  calculate(:min, column_name)
@@ -123,33 +213,58 @@ module ElasticsearchRecord
123
213
  #
124
214
  # Person.all.maximum(:age) # => 93
125
215
  #
216
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html
217
+ #
126
218
  # @param [Symbol, String] column_name
127
219
  def maximum(column_name)
128
220
  calculate(:max, column_name)
129
221
  end
130
222
 
223
+ # This single-value aggregation approximates the median absolute deviation of its search results.
224
+ # Median absolute deviation is a measure of variability. It is a robust statistic,
225
+ # meaning that it is useful for describing data that may have outliers, or may not be normally distributed.
226
+ # For such data it can be more descriptive than standard deviation.
227
+ #
228
+ # It is calculated as the median of each data point’s deviation from the median of the entire sample.
229
+ # That is, for a random variable X, the median absolute deviation is median(|median(X) - Xi|).
230
+ #
231
+ # Person.all.median_absolute_deviation(:age) # => 91
232
+ #
233
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-median-absolute-deviation-aggregation.html
234
+ #
235
+ # @param [Symbol, String] column_name
236
+ def median_absolute_deviation(column_name)
237
+ calculate(:median_absolute_deviation, column_name)
238
+ end
239
+
131
240
  # Calculates the sum of values on a given column. The value is returned
132
241
  # with the same data type of the column, +0+ if there's no row. See
133
242
  # #calculate for examples with options.
134
243
  #
135
244
  # Person.all.sum(:age) # => 4562
136
245
  #
246
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-sum-aggregation.html
247
+ #
137
248
  # @param [Symbol, String] column_name (optional)
138
249
  def sum(column_name)
139
250
  calculate(:sum, column_name)
140
251
  end
141
252
 
142
- # creates a aggregation with the provided metric (e.g. :sum) and column.
253
+ # creates a aggregation with the provided metric (e.g. :sum) and columns.
143
254
  # returns the metric node (default: :value) from the aggregations result.
144
255
  # @param [Symbol, String] metric
145
- # @param [Symbol, String] column
256
+ # @param [Array<Symbol|String>] columns
146
257
  # @param [Hash] opts - additional arguments that get merged with the metric definition
147
258
  # @param [Symbol] node (default :value)
148
- def calculate(metric, column, opts: {}, node: :value)
149
- metric_key = "#{column}_#{metric}"
259
+ def calculate(metric, *columns, opts: {}, node: :value)
260
+ metric_key = "calculate_#{metric}"
150
261
 
151
262
  # spawn a new aggregation and return the aggs
152
- response = aggregate(metric_key, { metric => { field: column }.merge(opts) }).aggregations
263
+ response = if columns.size == 1
264
+ aggregate(metric_key, { metric => { field: columns[0] }.merge(opts) }).aggregations
265
+ else
266
+ aggregate(metric_key, { metric => { fields: columns }.merge(opts) }).aggregations
267
+ end
153
268
 
154
269
  response[metric_key][node]
155
270
  end
@@ -125,6 +125,18 @@ module ElasticsearchRecord
125
125
  self
126
126
  end
127
127
  end
128
+
129
+ # overwrite original methods to provide a elasticsearch version:
130
+ # checks against the +#access_id_fielddata?+ to ensure the Elasticsearch Cluster allows access on the +_id+ field.
131
+ def reverse_sql_order(order_query)
132
+ if order_query.empty?
133
+ return [table[primary_key].desc] if primary_key != '_id' || klass.connection.access_id_fielddata?
134
+ raise ActiveRecord::IrreversibleOrderError,
135
+ "Relation has no current order and fielddata access on the _id field is disallowed! However, you can re-enable it by updating the dynamic cluster setting: indices.id_field_data.enabled"
136
+ end
137
+
138
+ super
139
+ end
128
140
  end
129
141
  end
130
142
  end
@@ -102,6 +102,16 @@ module ElasticsearchRecord
102
102
  configure!(:__query__, refresh: value)
103
103
  end
104
104
 
105
+ # sets the query's +timeout+ value.
106
+ # @param [Boolean] value (default: true)
107
+ def timeout(value = true)
108
+ spawn.timeout!(value)
109
+ end
110
+
111
+ def timeout!(value = true)
112
+ configure!(:__query__, timeout: value)
113
+ end
114
+
105
115
  # add a whole query 'node' to the query.
106
116
  # @example
107
117
  # query(:bool, {filter: ...})
@@ -90,7 +90,9 @@ module ElasticsearchRecord
90
90
  #
91
91
  # @param [String] keep_alive - how long to keep alive (for each single request) - default: '1m'
92
92
  # @param [Integer] batch_size - how many results per query (default: 1000 - this means at least 10 queries before reaching the +max_result_window+)
93
- def pit_results(keep_alive: '1m', batch_size: 1000)
93
+ # @param [Boolean] ids_only - resolve ids only from results
94
+ # @return [Integer, Array] either returns the results-array (no block provided) or the total amount of results
95
+ def pit_results(keep_alive: '1m', batch_size: 1000, ids_only: false)
94
96
  raise(ArgumentError, "Batch size cannot be above the 'max_result_window' (#{klass.max_result_window}) !") if batch_size > klass.max_result_window
95
97
 
96
98
  # check if limit or offset values where provided
@@ -105,6 +107,9 @@ module ElasticsearchRecord
105
107
  # see @ https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html
106
108
  relation.order!(_shard_doc: :asc) if relation.order_values.empty? && klass.connection.access_shard_doc?
107
109
 
110
+ # resolve ids only
111
+ relation.reselect!('_id') if ids_only
112
+
108
113
  # clear limit & offset
109
114
  relation.offset!(nil).limit!(nil)
110
115
 
@@ -122,10 +127,16 @@ module ElasticsearchRecord
122
127
  # resolve new data until we got all we need
123
128
  loop do
124
129
  # change pit settings & limit (spawn is required, since a +resolve+ will make the relation immutable)
125
- current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit').response
130
+ current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit Results').response
126
131
 
127
132
  # resolve only data from hits->hits[{_source}]
128
- current_results = current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
133
+ current_results = if ids_only
134
+ current_response['hits']['hits'].map { |result| result['_id'] }
135
+ # future with helper
136
+ # current_response['hits']['hits'].map.from_hash('_id')
137
+ else
138
+ current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
139
+ end
129
140
  current_results_length = current_results.length
130
141
 
131
142
  # check if we reached the required offset
@@ -171,12 +182,38 @@ module ElasticsearchRecord
171
182
  end
172
183
  end
173
184
 
174
- # return results array
175
- results
185
+ # return results array or total value
186
+ if block_given?
187
+ results_total
188
+ else
189
+ results
190
+ end
176
191
  end
177
192
 
178
193
  alias_method :total_results, :pit_results
179
194
 
195
+ # executes a delete query in a +point_in_time+ scope.
196
+ # this will provide the possibility to delete more than the +max_result_window+ (default: 10000) docs in a batched process.
197
+ # @param [String] keep_alive
198
+ # @param [Integer] batch_size
199
+ # @param [Boolean] refresh index after delete finished (default: true)
200
+ # @return [Integer] total amount of deleted docs
201
+ def pit_delete(keep_alive: '1m', batch_size: 1000, refresh: true)
202
+ delete_count = select('_id').pit_results(keep_alive: keep_alive, batch_size: batch_size, ids_only: true) do |ids|
203
+ # skip empty results
204
+ next unless ids.any?
205
+
206
+ # delete all IDs, but do not refresh index, yet
207
+ klass.connection.api(:core, :bulk, { index: klass.table_name, body: ids.map { |id| { delete: { _id: id } } }, refresh: false }, "#{klass} Pit Delete")
208
+ end
209
+
210
+ # refresh index
211
+ klass.connection.refresh_table(klass.table_name) if refresh
212
+
213
+ # return total count
214
+ delete_count
215
+ end
216
+
180
217
  # returns the RAW response for the current query
181
218
  # @return [Array]
182
219
  def response
@@ -49,7 +49,7 @@ module ElasticsearchRecord
49
49
  end
50
50
 
51
51
  # Returns the RAW +_source+ data from each hit - aka. +rows+.
52
- # PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_score' is not included)
52
+ # PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_id' or '_score' are not included)
53
53
  # @return [Array]
54
54
  def results
55
55
  return [] unless response['hits']
@@ -55,6 +55,16 @@ module ElasticsearchRecord
55
55
 
56
56
  autoload :ElasticsearchDatabaseTasks, 'elasticsearch_record/tasks/elasticsearch_database_tasks'
57
57
  end
58
+
59
+ ##
60
+ # :singleton-method:
61
+ # Specifies if a exception should be raised while using transactions.
62
+ # Since ActiveRecord does not have any configuration option to support transactions and
63
+ # Elasticsearch does **NOT** support transactions, it may be risky to ignore them.
64
+ # As default, transactional are 'silently swallowed' to not break any existing applications...
65
+ # However enabling this flag will surely fail transactional tests ...
66
+ singleton_class.attr_accessor :error_on_transaction
67
+ self.error_on_transaction = false
58
68
  end
59
69
 
60
70
  ActiveSupport.on_load(:active_record) do