elasticsearch_record 1.5.3 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,9 +8,6 @@ module ElasticsearchRecord
8
8
  @klass = klass
9
9
  end
10
10
 
11
- # undelegated schema methods: clone rename create
12
- # those should not be quick-accessible, since they might end in heavily broken index
13
-
14
11
  # delegated dangerous methods (created with exclamation mark)
15
12
  # not able to provide individual arguments - always the defaults will be used!
16
13
  #
@@ -26,6 +23,21 @@ module ElasticsearchRecord
26
23
  end
27
24
  end
28
25
 
26
+ # delegated dangerous methods with args
27
+ #
28
+ # @example
29
+ # create!(:new_table_name, settings: , mappings:, alias: , ...)
30
+ # clone!(:new_table_name)
31
+ # rename!(:new_table_name)
32
+ # backup!(to: :backup_name)
33
+ # restore!(from: :backup_name)
34
+ # reindex!(:new_table_name)
35
+ %w(create clone rename backup restore reindex).each do |method|
36
+ define_method("#{method}!") do |*args|
37
+ _connection.send("#{method}_table", _index_name, *args)
38
+ end
39
+ end
40
+
29
41
  # delegated dangerous methods with confirm parameter (created with exclamation mark)
30
42
  # a exception will be raised, if +confirm:true+ is missing.
31
43
  #
@@ -146,12 +158,51 @@ module ElasticsearchRecord
146
158
  # Shortcut for meta_exists
147
159
  # @return [Boolean]
148
160
 
161
+ # @!method create!(force: false, copy_from: nil, if_not_exists: false, **options)
162
+ # Shortcut for create_table
163
+ # @param [Boolean] force
164
+ # @param [nil, String] copy_from
165
+ # @param [Hash] options
166
+ # @return [Boolean] acknowledged status
167
+
168
+ # @!method clone!(target_name, **options)
169
+ # Shortcut for clone_table
170
+ # @param [String] target_name
171
+ # @param [Hash] options
172
+ # @return [Boolean]
173
+
174
+ # @!method rename!(target_name, timeout: nil, **options)
175
+ # Shortcut for rename_table
176
+ # @param [String] target_name
177
+ # @param [String (frozen)] timeout
178
+ # @param [Hash] options
179
+
180
+ # @!method backup!(to: nil, close: true)
181
+ # Shortcut for backup_table
182
+ # @param [String] to
183
+ # @param [Boolean] close
184
+ # @return [String] backup_name
185
+
186
+ # @!method restore!(from:, timeout: nil, open: true, drop_backup: false)
187
+ # Shortcut for restore_table
188
+ # @param [String] from
189
+ # @param [String (frozen)] timeout
190
+ # @param [Boolean] open
191
+ # @return [Boolean] acknowledged status
192
+
193
+ # @!method reindex!(target_name, **options)
194
+ # Shortcut for reindex_table
195
+ # @param [String] target_name
196
+ # @param [Hash] options
197
+ # @return [Hash] reindex stats
198
+
149
199
  # fast insert/update data.
200
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
150
201
  #
151
202
  # @example
152
203
  # index([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
153
204
  #
154
- # index({id: 5, name: 'Georg', age: 87})
205
+ # index({_id: 5, name: 'Georg', age: 87})
155
206
  #
156
207
  # @param [Array<Hash>,Hash] data
157
208
  # @param [Hash] options
@@ -160,6 +211,7 @@ module ElasticsearchRecord
160
211
  end
161
212
 
162
213
  # fast insert new data.
214
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
163
215
  #
164
216
  # @example
165
217
  # insert([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
@@ -173,11 +225,12 @@ module ElasticsearchRecord
173
225
  end
174
226
 
175
227
  # fast update existing data.
228
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
176
229
  #
177
230
  # @example
178
- # update([{id: 1, name: 'Hansi'}, {id: 2, name: 'Peter Parker', age: 42}])
231
+ # update([{_id: 1, name: 'Hansi'}, {_id: 2, name: 'Peter Parker', age: 42}])
179
232
  #
180
- # update({id: 3, name: 'Georg McCain'})
233
+ # update({_id: 3, name: 'Georg McCain'})
181
234
  #
182
235
  # @param [Array<Hash>,Hash] data
183
236
  # @param [Hash] options
@@ -186,13 +239,14 @@ module ElasticsearchRecord
186
239
  end
187
240
 
188
241
  # fast delete data.
242
+ # IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
189
243
  #
190
244
  # @example
191
245
  # delete([1,2,3,5])
192
246
  #
193
247
  # delete(3)
194
248
  #
195
- # delete({id: 2})
249
+ # delete({_id: 2})
196
250
  #
197
251
  # @param [Array<Hash>,Hash] data
198
252
  # @param [Hash] options
@@ -202,12 +256,12 @@ module ElasticsearchRecord
202
256
  if data[0].is_a?(Hash)
203
257
  bulk(data, :delete, **options)
204
258
  else
205
- bulk(data.map { |id| { id: id } }, :delete, **options)
259
+ bulk(data.map { |id| { _id: id } }, :delete, **options)
206
260
  end
207
261
  end
208
262
 
209
263
  # bulk handle provided data (single Hash or multiple Array<Hash>).
210
- # @param [Hash,Array<Hash>] data - the data to insert/update/delete ...
264
+ # @param [Hash,Array<Hash<Symbol=>Object>>] data - the data to insert/update/delete ...
211
265
  # @param [Symbol] operation
212
266
  # @param [Boolean, Symbol] refresh
213
267
  def bulk(data, operation = :index, refresh: true, **options)
@@ -215,7 +269,11 @@ module ElasticsearchRecord
215
269
 
216
270
  _connection.api(:core, :bulk, {
217
271
  index: _index_name,
218
- body: data.map { |item| { operation => { _id: item[:id], data: item.except(:id) } } },
272
+ body: if operation == :update
273
+ data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: { doc: item.except(:_id, '_id') } } } }
274
+ else
275
+ data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: item.except(:_id, '_id') } } }
276
+ end,
219
277
  refresh: refresh
220
278
  }, "BULK #{operation.to_s.upcase}", **options)
221
279
  end
@@ -52,11 +52,8 @@ module ElasticsearchRecord
52
52
  # @return [Array<String>]
53
53
  def searchable_column_names
54
54
  @searchable_column_names ||= columns.select(&:enabled?).reduce([]) { |m, column|
55
- m << column.name
56
- m += column.field_names
57
- m += column.property_names
58
- m.uniq
59
- }
55
+ m + [column.name] + column.field_names + column.property_names
56
+ }.uniq
60
57
  end
61
58
 
62
59
  # clears schema-related instance variables.
@@ -11,7 +11,7 @@ module ElasticsearchRecord
11
11
  # values is not a "key=>values"-Hash, but a +ActiveModel::Attribute+ - so the casted values gets resolved here
12
12
  values = values.transform_values(&:value)
13
13
 
14
- # resolve & update a auto_increment value
14
+ # resolve & update a auto_increment value, if configured
15
15
  _insert_with_auto_increment(values) do |arguments|
16
16
  # build new query
17
17
  query = ElasticsearchRecord::Query.new(
@@ -68,6 +68,9 @@ module ElasticsearchRecord
68
68
  if (id = values[self.primary_key]).present?
69
69
  yield({id: id})
70
70
  elsif auto_increment?
71
+ # future increments: uuid (+uuidv6 ?), hex, radix(2-36), integer
72
+ # allocated through: primary_key_type
73
+
71
74
  ids = [
72
75
  # try to resolve the current-auto-increment value from the tables meta
73
76
  connection.table_metas(self.table_name).dig('auto_increment').to_i + 1,
@@ -88,5 +91,14 @@ module ElasticsearchRecord
88
91
  end
89
92
  end
90
93
  end
94
+
95
+ # overwrite to provide a Elasticsearch version:
96
+ # Creates a record with values matching those of the instance attributes
97
+ # and returns its id.
98
+ def _create_record(*args)
99
+ undelegate_id_attribute_with do
100
+ super
101
+ end
102
+ end
91
103
  end
92
104
  end
@@ -12,6 +12,7 @@ module ElasticsearchRecord
12
12
  TYPE_SEARCH = :search
13
13
  TYPE_MSEARCH = :msearch
14
14
  TYPE_SQL = :sql
15
+ TYPE_ESQL = :esql
15
16
 
16
17
  # -- DOCUMENT TYPES ------------------------------------------------------------------------------------------------
17
18
  TYPE_CREATE = :create
@@ -34,7 +35,7 @@ module ElasticsearchRecord
34
35
  # includes valid types only
35
36
  TYPES = [
36
37
  # -- QUERY TYPES
37
- TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL,
38
+ TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL,
38
39
  # -- DOCUMENT TYPES
39
40
  TYPE_CREATE, TYPE_UPDATE, TYPE_UPDATE_BY_QUERY, TYPE_DELETE, TYPE_DELETE_BY_QUERY,
40
41
 
@@ -46,7 +47,7 @@ module ElasticsearchRecord
46
47
 
47
48
  # includes reading types only
48
49
  READ_TYPES = [
49
- TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL
50
+ TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL
50
51
  ].freeze
51
52
 
52
53
  # defines a body to be executed if the query fails - +(none)+
@@ -60,6 +61,7 @@ module ElasticsearchRecord
60
61
  # if no special type is defined, it simply uses +[:core,self.type]+
61
62
  GATES = {
62
63
  TYPE_SQL => [:sql, :query],
64
+ TYPE_ESQL => [:esql, :query],
63
65
  TYPE_INDEX_CREATE => [:indices, :create],
64
66
  TYPE_INDEX_CLONE => [:indices, :clone],
65
67
  TYPE_INDEX_UPDATE_MAPPING => [:indices, :put_mapping],
@@ -86,9 +88,9 @@ module ElasticsearchRecord
86
88
  # @!attribute Boolean
87
89
  attr_reader :refresh
88
90
 
89
- # defines the query body - in most cases this is a hash
90
- # @!attribute Hash
91
- # attr_reader :body
91
+ # defines the query timeout
92
+ # @!attribute Integer|String
93
+ attr_reader :timeout
92
94
 
93
95
  # defines the query arguments to be passed to the API
94
96
  # @!attribute Hash
@@ -98,11 +100,12 @@ module ElasticsearchRecord
98
100
  # @!attribute Array
99
101
  attr_reader :columns
100
102
 
101
- def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, arguments: {}, columns: [])
103
+ def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, timeout: nil, arguments: {}, columns: [])
102
104
  @index = index
103
105
  @type = type
104
106
  @status = status
105
107
  @refresh = refresh
108
+ @timeout = timeout
106
109
  @body = body
107
110
  @arguments = arguments
108
111
  @columns = columns
@@ -117,6 +120,12 @@ module ElasticsearchRecord
117
120
  self
118
121
  end
119
122
 
123
+ # returns true, if the query failed
124
+ # @return [Boolean]
125
+ def failed?
126
+ self.status == STATUS_FAILED
127
+ end
128
+
120
129
  # returns true, if the query is valid (e.g. index & type defined)
121
130
  # @return [Boolean]
122
131
  def valid?
@@ -142,7 +151,7 @@ module ElasticsearchRecord
142
151
  # failed queried will return the related +FAILED_BODIES+ or +{}+ as fallback
143
152
  # @return [Hash, nil]
144
153
  def body
145
- return (FAILED_BODIES[self.type].presence || {}) if self.status == STATUS_FAILED
154
+ return (FAILED_BODIES[self.type].presence || {}) if failed?
146
155
 
147
156
  @body
148
157
  end
@@ -163,6 +172,9 @@ module ElasticsearchRecord
163
172
  # set refresh, if defined (also includes false value)
164
173
  args[:refresh] = self.refresh unless self.refresh.nil?
165
174
 
175
+ # set timeout, if present
176
+ args[:timeout] = self.timeout if self.timeout.present?
177
+
166
178
  args
167
179
  end
168
180
 
@@ -77,7 +77,42 @@ module ElasticsearchRecord
77
77
  _load_from_sql(_query_by_sql(query), &block)
78
78
  end
79
79
 
80
- # executes a msearch by provided +RAW+ queries
80
+ # ES|QL query API
81
+ # Returns search results for an ES|QL (Elasticsearch query language) query.
82
+ #
83
+ # @param [String] esql
84
+ # @param [Proc] block
85
+ def find_by_esql(esql, &block)
86
+ # build new query
87
+ query = ElasticsearchRecord::Query.new(
88
+ type: ElasticsearchRecord::Query::TYPE_ESQL,
89
+ body: { query: esql },
90
+ # IMPORTANT: Always provide all columns
91
+ columns: source_column_names)
92
+
93
+ _load_from_sql(_query_by_sql(query), &block)
94
+ end
95
+
96
+ # executes a +esql+ by provided *ES|SL* query
97
+ # Does NOT instantiate records.
98
+ # @param [String] esql
99
+ # @param [Boolean] async (default: false)
100
+ def esql(esql, async: false)
101
+ # build new query
102
+ query = ElasticsearchRecord::Query.new(
103
+ type: ElasticsearchRecord::Query::TYPE_ESQL,
104
+ body: { query: esql },
105
+ # IMPORTANT: Always provide all columns
106
+ columns: source_column_names)
107
+
108
+ connection.exec_query(query, "#{name} ES|QL", async: async)
109
+ end
110
+
111
+
112
+ # executes a +msearch+ by provided *RAW* queries.
113
+ # Does NOT instantiate records.
114
+ # @param [Array<String>] queries
115
+ # @param [Boolean] async (default: false)
81
116
  def msearch(queries, async: false)
82
117
  # build new msearch query
83
118
  query = ElasticsearchRecord::Query.new(
@@ -45,6 +45,84 @@ module ElasticsearchRecord
45
45
  end
46
46
  end
47
47
 
48
+ # A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
49
+ # These values can be generated from specific numeric or histogram fields in the documents.
50
+ #
51
+ # The boxplot aggregation returns essential information for making a box plot:
52
+ # *minimum*, *maximum*, *median*, *first quartile* (25th percentile) and *third quartile* (75th percentile) values.
53
+ #
54
+ # Person.all.boxplot(:age)
55
+ # > {
56
+ # "min": 0.0,
57
+ # "max": 990.0,
58
+ # "q1": 167.5,
59
+ # "q2": 445.0,
60
+ # "q3": 722.5,
61
+ # "lower": 0.0,
62
+ # "upper": 990.0
63
+ # }
64
+ #
65
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html
66
+ #
67
+ # @param [Symbol, String] column_name
68
+ def boxplot(column_name)
69
+ calculate(:boxplot, column_name)
70
+ end
71
+
72
+ # A multi-value metrics aggregation that computes stats over numeric values extracted from the aggregated documents. #
73
+ # The stats that are returned consist of: *min*, *max*, *sum*, *count* and *avg*.
74
+ #
75
+ # Person.all.stats(:age)
76
+ # > {
77
+ # "count": 10,
78
+ # "min": 0.0,
79
+ # "max": 990.0,
80
+ # "sum": 16859,
81
+ # "avg": 75.5
82
+ # }
83
+ #
84
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html
85
+ #
86
+ # @param [Symbol, String] column_name
87
+ def stats(column_name)
88
+ calculate(:stats, column_name)
89
+ end
90
+
91
+ # A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
92
+ # These values can be retrieved either from specific keyword fields.
93
+ #
94
+ # Person.all.string_stats(:name)
95
+ # > {
96
+ # "count": 5,
97
+ # "min_length": 24,
98
+ # "max_length": 30,
99
+ # "avg_length": 28.8,
100
+ # "entropy": 3.94617750050791
101
+ # }
102
+ #
103
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-string-stats-aggregation.html
104
+ #
105
+ # @param [Symbol, String] column_name
106
+ def string_stats(column_name)
107
+ calculate(:string_stats, column_name)
108
+ end
109
+
110
+ # The matrix_stats aggregation is a numeric aggregation that computes the following statistics over a set of document fields:
111
+ # *count* Number of per field samples included in the calculation.
112
+ # *mean* The average value for each field.
113
+ # *variance* Per field Measurement for how spread out the samples are from the mean.
114
+ # *skewness* Per field measurement quantifying the asymmetric distribution around the mean.
115
+ # *kurtosis* Per field measurement quantifying the shape of the distribution.
116
+ # *covariance* A matrix that quantitatively describes how changes in one field are associated with another.
117
+ # *correlation* The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field distributions.
118
+ #
119
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-matrix-stats-aggregation.html
120
+ #
121
+ # @param [Array<Symbol|String>] column_names
122
+ def matrix_stats(*column_names)
123
+ calculate(:matrix_stats, *column_names)
124
+ end
125
+
48
126
  # A multi-value metrics aggregation that calculates one or more
49
127
  # percentiles over numeric values extracted from the aggregated documents.
50
128
  # Returns a hash with empty values (but keys still exists) if there is no row.
@@ -59,6 +137,9 @@ module ElasticsearchRecord
59
137
  # "95.0" => 2021.0,
60
138
  # "99.0" => 2022.0
61
139
  # }
140
+ #
141
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html
142
+ #
62
143
  # @param [Symbol, String] column_name
63
144
  def percentiles(column_name)
64
145
  calculate(:percentiles, column_name, node: :values)
@@ -81,6 +162,9 @@ module ElasticsearchRecord
81
162
  # "95.0" => 2021.0,
82
163
  # "99.0" => 2022.0
83
164
  # }
165
+ #
166
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html
167
+ #
84
168
  # @param [Symbol, String] column_name
85
169
  # @param [Array] values
86
170
  def percentile_ranks(column_name, values)
@@ -92,6 +176,8 @@ module ElasticsearchRecord
92
176
  # Person.all.cardinality(:age)
93
177
  # > 12
94
178
  #
179
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html
180
+ #
95
181
  # @param [Symbol, String] column_name
96
182
  def cardinality(column_name)
97
183
  calculate(:cardinality, column_name)
@@ -101,6 +187,8 @@ module ElasticsearchRecord
101
187
  #
102
188
  # Person.all.average(:age) # => 35.8
103
189
  #
190
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-avg-aggregation.html
191
+ #
104
192
  # @param [Symbol, String] column_name
105
193
  def average(column_name)
106
194
  calculate(:avg, column_name)
@@ -112,6 +200,8 @@ module ElasticsearchRecord
112
200
  # Person.all.minimum(:age)
113
201
  # > 7
114
202
  #
203
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html
204
+ #
115
205
  # @param [Symbol, String] column_name
116
206
  def minimum(column_name)
117
207
  calculate(:min, column_name)
@@ -123,33 +213,58 @@ module ElasticsearchRecord
123
213
  #
124
214
  # Person.all.maximum(:age) # => 93
125
215
  #
216
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html
217
+ #
126
218
  # @param [Symbol, String] column_name
127
219
  def maximum(column_name)
128
220
  calculate(:max, column_name)
129
221
  end
130
222
 
223
+ # This single-value aggregation approximates the median absolute deviation of its search results.
224
+ # Median absolute deviation is a measure of variability. It is a robust statistic,
225
+ # meaning that it is useful for describing data that may have outliers, or may not be normally distributed.
226
+ # For such data it can be more descriptive than standard deviation.
227
+ #
228
+ # It is calculated as the median of each data point’s deviation from the median of the entire sample.
229
+ # That is, for a random variable X, the median absolute deviation is median(|median(X) - Xi|).
230
+ #
231
+ # Person.all.median_absolute_deviation(:age) # => 91
232
+ #
233
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-median-absolute-deviation-aggregation.html
234
+ #
235
+ # @param [Symbol, String] column_name
236
+ def median_absolute_deviation(column_name)
237
+ calculate(:median_absolute_deviation, column_name)
238
+ end
239
+
131
240
  # Calculates the sum of values on a given column. The value is returned
132
241
  # with the same data type of the column, +0+ if there's no row. See
133
242
  # #calculate for examples with options.
134
243
  #
135
244
  # Person.all.sum(:age) # => 4562
136
245
  #
246
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-sum-aggregation.html
247
+ #
137
248
  # @param [Symbol, String] column_name (optional)
138
249
  def sum(column_name)
139
250
  calculate(:sum, column_name)
140
251
  end
141
252
 
142
- # creates a aggregation with the provided metric (e.g. :sum) and column.
253
+ # creates a aggregation with the provided metric (e.g. :sum) and columns.
143
254
  # returns the metric node (default: :value) from the aggregations result.
144
255
  # @param [Symbol, String] metric
145
- # @param [Symbol, String] column
256
+ # @param [Array<Symbol|String>] columns
146
257
  # @param [Hash] opts - additional arguments that get merged with the metric definition
147
258
  # @param [Symbol] node (default :value)
148
- def calculate(metric, column, opts: {}, node: :value)
149
- metric_key = "#{column}_#{metric}"
259
+ def calculate(metric, *columns, opts: {}, node: :value)
260
+ metric_key = "calculate_#{metric}"
150
261
 
151
262
  # spawn a new aggregation and return the aggs
152
- response = aggregate(metric_key, { metric => { field: column }.merge(opts) }).aggregations
263
+ response = if columns.size == 1
264
+ aggregate(metric_key, { metric => { field: columns[0] }.merge(opts) }).aggregations
265
+ else
266
+ aggregate(metric_key, { metric => { fields: columns }.merge(opts) }).aggregations
267
+ end
153
268
 
154
269
  response[metric_key][node]
155
270
  end
@@ -125,6 +125,18 @@ module ElasticsearchRecord
125
125
  self
126
126
  end
127
127
  end
128
+
129
+ # overwrite original methods to provide a elasticsearch version:
130
+ # checks against the +#access_id_fielddata?+ to ensure the Elasticsearch Cluster allows access on the +_id+ field.
131
+ def reverse_sql_order(order_query)
132
+ if order_query.empty?
133
+ return [table[primary_key].desc] if primary_key != '_id' || klass.connection.access_id_fielddata?
134
+ raise ActiveRecord::IrreversibleOrderError,
135
+ "Relation has no current order and fielddata access on the _id field is disallowed! However, you can re-enable it by updating the dynamic cluster setting: indices.id_field_data.enabled"
136
+ end
137
+
138
+ super
139
+ end
128
140
  end
129
141
  end
130
142
  end
@@ -102,6 +102,16 @@ module ElasticsearchRecord
102
102
  configure!(:__query__, refresh: value)
103
103
  end
104
104
 
105
+ # sets the query's +timeout+ value.
106
+ # @param [Boolean] value (default: true)
107
+ def timeout(value = true)
108
+ spawn.timeout!(value)
109
+ end
110
+
111
+ def timeout!(value = true)
112
+ configure!(:__query__, timeout: value)
113
+ end
114
+
105
115
  # add a whole query 'node' to the query.
106
116
  # @example
107
117
  # query(:bool, {filter: ...})
@@ -90,7 +90,9 @@ module ElasticsearchRecord
90
90
  #
91
91
  # @param [String] keep_alive - how long to keep alive (for each single request) - default: '1m'
92
92
  # @param [Integer] batch_size - how many results per query (default: 1000 - this means at least 10 queries before reaching the +max_result_window+)
93
- def pit_results(keep_alive: '1m', batch_size: 1000)
93
+ # @param [Boolean] ids_only - resolve ids only from results
94
+ # @return [Integer, Array] either returns the results-array (no block provided) or the total amount of results
95
+ def pit_results(keep_alive: '1m', batch_size: 1000, ids_only: false)
94
96
  raise(ArgumentError, "Batch size cannot be above the 'max_result_window' (#{klass.max_result_window}) !") if batch_size > klass.max_result_window
95
97
 
96
98
  # check if limit or offset values where provided
@@ -105,6 +107,9 @@ module ElasticsearchRecord
105
107
  # see @ https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html
106
108
  relation.order!(_shard_doc: :asc) if relation.order_values.empty? && klass.connection.access_shard_doc?
107
109
 
110
+ # resolve ids only
111
+ relation.reselect!('_id') if ids_only
112
+
108
113
  # clear limit & offset
109
114
  relation.offset!(nil).limit!(nil)
110
115
 
@@ -122,10 +127,16 @@ module ElasticsearchRecord
122
127
  # resolve new data until we got all we need
123
128
  loop do
124
129
  # change pit settings & limit (spawn is required, since a +resolve+ will make the relation immutable)
125
- current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit').response
130
+ current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit Results').response
126
131
 
127
132
  # resolve only data from hits->hits[{_source}]
128
- current_results = current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
133
+ current_results = if ids_only
134
+ current_response['hits']['hits'].map { |result| result['_id'] }
135
+ # future with helper
136
+ # current_response['hits']['hits'].map.from_hash('_id')
137
+ else
138
+ current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
139
+ end
129
140
  current_results_length = current_results.length
130
141
 
131
142
  # check if we reached the required offset
@@ -171,12 +182,38 @@ module ElasticsearchRecord
171
182
  end
172
183
  end
173
184
 
174
- # return results array
175
- results
185
+ # return results array or total value
186
+ if block_given?
187
+ results_total
188
+ else
189
+ results
190
+ end
176
191
  end
177
192
 
178
193
  alias_method :total_results, :pit_results
179
194
 
195
+ # executes a delete query in a +point_in_time+ scope.
196
+ # this will provide the possibility to delete more than the +max_result_window+ (default: 10000) docs in a batched process.
197
+ # @param [String] keep_alive
198
+ # @param [Integer] batch_size
199
+ # @param [Boolean] refresh index after delete finished (default: true)
200
+ # @return [Integer] total amount of deleted docs
201
+ def pit_delete(keep_alive: '1m', batch_size: 1000, refresh: true)
202
+ delete_count = select('_id').pit_results(keep_alive: keep_alive, batch_size: batch_size, ids_only: true) do |ids|
203
+ # skip empty results
204
+ next unless ids.any?
205
+
206
+ # delete all IDs, but do not refresh index, yet
207
+ klass.connection.api(:core, :bulk, { index: klass.table_name, body: ids.map { |id| { delete: { _id: id } } }, refresh: false }, "#{klass} Pit Delete")
208
+ end
209
+
210
+ # refresh index
211
+ klass.connection.refresh_table(klass.table_name) if refresh
212
+
213
+ # return total count
214
+ delete_count
215
+ end
216
+
180
217
  # returns the RAW response for the current query
181
218
  # @return [Array]
182
219
  def response
@@ -49,7 +49,7 @@ module ElasticsearchRecord
49
49
  end
50
50
 
51
51
  # Returns the RAW +_source+ data from each hit - aka. +rows+.
52
- # PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_score' is not included)
52
+ # PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_id' or '_score' are not included)
53
53
  # @return [Array]
54
54
  def results
55
55
  return [] unless response['hits']
@@ -55,6 +55,16 @@ module ElasticsearchRecord
55
55
 
56
56
  autoload :ElasticsearchDatabaseTasks, 'elasticsearch_record/tasks/elasticsearch_database_tasks'
57
57
  end
58
+
59
+ ##
60
+ # :singleton-method:
61
+ # Specifies if a exception should be raised while using transactions.
62
+ # Since ActiveRecord does not have any configuration option to support transactions and
63
+ # Elasticsearch does **NOT** support transactions, it may be risky to ignore them.
64
+ # As default, transactional are 'silently swallowed' to not break any existing applications...
65
+ # However enabling this flag will surely fail transactional tests ...
66
+ singleton_class.attr_accessor :error_on_transaction
67
+ self.error_on_transaction = false
58
68
  end
59
69
 
60
70
  ActiveSupport.on_load(:active_record) do