elasticsearch_record 1.5.3 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/README.md +103 -19
- data/docs/CHANGELOG.md +34 -2
- data/docs/{LICENSE.txt → LICENSE} +1 -1
- data/lib/active_record/connection_adapters/elasticsearch/table_statements.rb +62 -4
- data/lib/active_record/connection_adapters/elasticsearch/transactions.rb +54 -0
- data/lib/active_record/connection_adapters/elasticsearch/unsupported_implementation.rb +0 -7
- data/lib/active_record/connection_adapters/elasticsearch_adapter.rb +17 -3
- data/lib/arel/collectors/elasticsearch_query.rb +3 -0
- data/lib/elasticsearch_record/core.rb +15 -2
- data/lib/elasticsearch_record/gem_version.rb +2 -2
- data/lib/elasticsearch_record/instrumentation/log_subscriber.rb +7 -3
- data/lib/elasticsearch_record/model_api.rb +68 -10
- data/lib/elasticsearch_record/model_schema.rb +2 -5
- data/lib/elasticsearch_record/persistence.rb +13 -1
- data/lib/elasticsearch_record/query.rb +19 -7
- data/lib/elasticsearch_record/querying.rb +36 -1
- data/lib/elasticsearch_record/relation/calculation_methods.rb +120 -5
- data/lib/elasticsearch_record/relation/core_methods.rb +12 -0
- data/lib/elasticsearch_record/relation/query_methods.rb +10 -0
- data/lib/elasticsearch_record/relation/result_methods.rb +42 -5
- data/lib/elasticsearch_record/result.rb +1 -1
- data/lib/elasticsearch_record.rb +10 -0
- metadata +4 -3
@@ -8,9 +8,6 @@ module ElasticsearchRecord
|
|
8
8
|
@klass = klass
|
9
9
|
end
|
10
10
|
|
11
|
-
# undelegated schema methods: clone rename create
|
12
|
-
# those should not be quick-accessible, since they might end in heavily broken index
|
13
|
-
|
14
11
|
# delegated dangerous methods (created with exclamation mark)
|
15
12
|
# not able to provide individual arguments - always the defaults will be used!
|
16
13
|
#
|
@@ -26,6 +23,21 @@ module ElasticsearchRecord
|
|
26
23
|
end
|
27
24
|
end
|
28
25
|
|
26
|
+
# delegated dangerous methods with args
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# create!(:new_table_name, settings: , mappings:, alias: , ...)
|
30
|
+
# clone!(:new_table_name)
|
31
|
+
# rename!(:new_table_name)
|
32
|
+
# backup!(to: :backup_name)
|
33
|
+
# restore!(from: :backup_name)
|
34
|
+
# reindex!(:new_table_name)
|
35
|
+
%w(create clone rename backup restore reindex).each do |method|
|
36
|
+
define_method("#{method}!") do |*args|
|
37
|
+
_connection.send("#{method}_table", _index_name, *args)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
29
41
|
# delegated dangerous methods with confirm parameter (created with exclamation mark)
|
30
42
|
# a exception will be raised, if +confirm:true+ is missing.
|
31
43
|
#
|
@@ -146,12 +158,51 @@ module ElasticsearchRecord
|
|
146
158
|
# Shortcut for meta_exists
|
147
159
|
# @return [Boolean]
|
148
160
|
|
161
|
+
# @!method create!(force: false, copy_from: nil, if_not_exists: false, **options)
|
162
|
+
# Shortcut for create_table
|
163
|
+
# @param [Boolean] force
|
164
|
+
# @param [nil, String] copy_from
|
165
|
+
# @param [Hash] options
|
166
|
+
# @return [Boolean] acknowledged status
|
167
|
+
|
168
|
+
# @!method clone!(target_name, **options)
|
169
|
+
# Shortcut for clone_table
|
170
|
+
# @param [String] target_name
|
171
|
+
# @param [Hash] options
|
172
|
+
# @return [Boolean]
|
173
|
+
|
174
|
+
# @!method rename!(target_name, timeout: nil, **options)
|
175
|
+
# Shortcut for rename_table
|
176
|
+
# @param [String] target_name
|
177
|
+
# @param [String (frozen)] timeout
|
178
|
+
# @param [Hash] options
|
179
|
+
|
180
|
+
# @!method backup!(to: nil, close: true)
|
181
|
+
# Shortcut for backup_table
|
182
|
+
# @param [String] to
|
183
|
+
# @param [Boolean] close
|
184
|
+
# @return [String] backup_name
|
185
|
+
|
186
|
+
# @!method restore!(from:, timeout: nil, open: true, drop_backup: false)
|
187
|
+
# Shortcut for restore_table
|
188
|
+
# @param [String] from
|
189
|
+
# @param [String (frozen)] timeout
|
190
|
+
# @param [Boolean] open
|
191
|
+
# @return [Boolean] acknowledged status
|
192
|
+
|
193
|
+
# @!method reindex!(target_name, **options)
|
194
|
+
# Shortcut for reindex_table
|
195
|
+
# @param [String] target_name
|
196
|
+
# @param [Hash] options
|
197
|
+
# @return [Hash] reindex stats
|
198
|
+
|
149
199
|
# fast insert/update data.
|
200
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
150
201
|
#
|
151
202
|
# @example
|
152
203
|
# index([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
|
153
204
|
#
|
154
|
-
# index({
|
205
|
+
# index({_id: 5, name: 'Georg', age: 87})
|
155
206
|
#
|
156
207
|
# @param [Array<Hash>,Hash] data
|
157
208
|
# @param [Hash] options
|
@@ -160,6 +211,7 @@ module ElasticsearchRecord
|
|
160
211
|
end
|
161
212
|
|
162
213
|
# fast insert new data.
|
214
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
163
215
|
#
|
164
216
|
# @example
|
165
217
|
# insert([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
|
@@ -173,11 +225,12 @@ module ElasticsearchRecord
|
|
173
225
|
end
|
174
226
|
|
175
227
|
# fast update existing data.
|
228
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
176
229
|
#
|
177
230
|
# @example
|
178
|
-
# update([{
|
231
|
+
# update([{_id: 1, name: 'Hansi'}, {_id: 2, name: 'Peter Parker', age: 42}])
|
179
232
|
#
|
180
|
-
# update({
|
233
|
+
# update({_id: 3, name: 'Georg McCain'})
|
181
234
|
#
|
182
235
|
# @param [Array<Hash>,Hash] data
|
183
236
|
# @param [Hash] options
|
@@ -186,13 +239,14 @@ module ElasticsearchRecord
|
|
186
239
|
end
|
187
240
|
|
188
241
|
# fast delete data.
|
242
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
189
243
|
#
|
190
244
|
# @example
|
191
245
|
# delete([1,2,3,5])
|
192
246
|
#
|
193
247
|
# delete(3)
|
194
248
|
#
|
195
|
-
# delete({
|
249
|
+
# delete({_id: 2})
|
196
250
|
#
|
197
251
|
# @param [Array<Hash>,Hash] data
|
198
252
|
# @param [Hash] options
|
@@ -202,12 +256,12 @@ module ElasticsearchRecord
|
|
202
256
|
if data[0].is_a?(Hash)
|
203
257
|
bulk(data, :delete, **options)
|
204
258
|
else
|
205
|
-
bulk(data.map { |id| {
|
259
|
+
bulk(data.map { |id| { _id: id } }, :delete, **options)
|
206
260
|
end
|
207
261
|
end
|
208
262
|
|
209
263
|
# bulk handle provided data (single Hash or multiple Array<Hash>).
|
210
|
-
# @param [Hash,Array<Hash
|
264
|
+
# @param [Hash,Array<Hash<Symbol=>Object>>] data - the data to insert/update/delete ...
|
211
265
|
# @param [Symbol] operation
|
212
266
|
# @param [Boolean, Symbol] refresh
|
213
267
|
def bulk(data, operation = :index, refresh: true, **options)
|
@@ -215,7 +269,11 @@ module ElasticsearchRecord
|
|
215
269
|
|
216
270
|
_connection.api(:core, :bulk, {
|
217
271
|
index: _index_name,
|
218
|
-
body:
|
272
|
+
body: if operation == :update
|
273
|
+
data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: { doc: item.except(:_id, '_id') } } } }
|
274
|
+
else
|
275
|
+
data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: item.except(:_id, '_id') } } }
|
276
|
+
end,
|
219
277
|
refresh: refresh
|
220
278
|
}, "BULK #{operation.to_s.upcase}", **options)
|
221
279
|
end
|
@@ -52,11 +52,8 @@ module ElasticsearchRecord
|
|
52
52
|
# @return [Array<String>]
|
53
53
|
def searchable_column_names
|
54
54
|
@searchable_column_names ||= columns.select(&:enabled?).reduce([]) { |m, column|
|
55
|
-
m
|
56
|
-
|
57
|
-
m += column.property_names
|
58
|
-
m.uniq
|
59
|
-
}
|
55
|
+
m + [column.name] + column.field_names + column.property_names
|
56
|
+
}.uniq
|
60
57
|
end
|
61
58
|
|
62
59
|
# clears schema-related instance variables.
|
@@ -11,7 +11,7 @@ module ElasticsearchRecord
|
|
11
11
|
# values is not a "key=>values"-Hash, but a +ActiveModel::Attribute+ - so the casted values gets resolved here
|
12
12
|
values = values.transform_values(&:value)
|
13
13
|
|
14
|
-
# resolve & update a auto_increment value
|
14
|
+
# resolve & update a auto_increment value, if configured
|
15
15
|
_insert_with_auto_increment(values) do |arguments|
|
16
16
|
# build new query
|
17
17
|
query = ElasticsearchRecord::Query.new(
|
@@ -68,6 +68,9 @@ module ElasticsearchRecord
|
|
68
68
|
if (id = values[self.primary_key]).present?
|
69
69
|
yield({id: id})
|
70
70
|
elsif auto_increment?
|
71
|
+
# future increments: uuid (+uuidv6 ?), hex, radix(2-36), integer
|
72
|
+
# allocated through: primary_key_type
|
73
|
+
|
71
74
|
ids = [
|
72
75
|
# try to resolve the current-auto-increment value from the tables meta
|
73
76
|
connection.table_metas(self.table_name).dig('auto_increment').to_i + 1,
|
@@ -88,5 +91,14 @@ module ElasticsearchRecord
|
|
88
91
|
end
|
89
92
|
end
|
90
93
|
end
|
94
|
+
|
95
|
+
# overwrite to provide a Elasticsearch version:
|
96
|
+
# Creates a record with values matching those of the instance attributes
|
97
|
+
# and returns its id.
|
98
|
+
def _create_record(*args)
|
99
|
+
undelegate_id_attribute_with do
|
100
|
+
super
|
101
|
+
end
|
102
|
+
end
|
91
103
|
end
|
92
104
|
end
|
@@ -12,6 +12,7 @@ module ElasticsearchRecord
|
|
12
12
|
TYPE_SEARCH = :search
|
13
13
|
TYPE_MSEARCH = :msearch
|
14
14
|
TYPE_SQL = :sql
|
15
|
+
TYPE_ESQL = :esql
|
15
16
|
|
16
17
|
# -- DOCUMENT TYPES ------------------------------------------------------------------------------------------------
|
17
18
|
TYPE_CREATE = :create
|
@@ -34,7 +35,7 @@ module ElasticsearchRecord
|
|
34
35
|
# includes valid types only
|
35
36
|
TYPES = [
|
36
37
|
# -- QUERY TYPES
|
37
|
-
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL,
|
38
|
+
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL,
|
38
39
|
# -- DOCUMENT TYPES
|
39
40
|
TYPE_CREATE, TYPE_UPDATE, TYPE_UPDATE_BY_QUERY, TYPE_DELETE, TYPE_DELETE_BY_QUERY,
|
40
41
|
|
@@ -46,7 +47,7 @@ module ElasticsearchRecord
|
|
46
47
|
|
47
48
|
# includes reading types only
|
48
49
|
READ_TYPES = [
|
49
|
-
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL
|
50
|
+
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL
|
50
51
|
].freeze
|
51
52
|
|
52
53
|
# defines a body to be executed if the query fails - +(none)+
|
@@ -60,6 +61,7 @@ module ElasticsearchRecord
|
|
60
61
|
# if no special type is defined, it simply uses +[:core,self.type]+
|
61
62
|
GATES = {
|
62
63
|
TYPE_SQL => [:sql, :query],
|
64
|
+
TYPE_ESQL => [:esql, :query],
|
63
65
|
TYPE_INDEX_CREATE => [:indices, :create],
|
64
66
|
TYPE_INDEX_CLONE => [:indices, :clone],
|
65
67
|
TYPE_INDEX_UPDATE_MAPPING => [:indices, :put_mapping],
|
@@ -86,9 +88,9 @@ module ElasticsearchRecord
|
|
86
88
|
# @!attribute Boolean
|
87
89
|
attr_reader :refresh
|
88
90
|
|
89
|
-
# defines the query
|
90
|
-
# @!attribute
|
91
|
-
|
91
|
+
# defines the query timeout
|
92
|
+
# @!attribute Integer|String
|
93
|
+
attr_reader :timeout
|
92
94
|
|
93
95
|
# defines the query arguments to be passed to the API
|
94
96
|
# @!attribute Hash
|
@@ -98,11 +100,12 @@ module ElasticsearchRecord
|
|
98
100
|
# @!attribute Array
|
99
101
|
attr_reader :columns
|
100
102
|
|
101
|
-
def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, arguments: {}, columns: [])
|
103
|
+
def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, timeout: nil, arguments: {}, columns: [])
|
102
104
|
@index = index
|
103
105
|
@type = type
|
104
106
|
@status = status
|
105
107
|
@refresh = refresh
|
108
|
+
@timeout = timeout
|
106
109
|
@body = body
|
107
110
|
@arguments = arguments
|
108
111
|
@columns = columns
|
@@ -117,6 +120,12 @@ module ElasticsearchRecord
|
|
117
120
|
self
|
118
121
|
end
|
119
122
|
|
123
|
+
# returns true, if the query failed
|
124
|
+
# @return [Boolean]
|
125
|
+
def failed?
|
126
|
+
self.status == STATUS_FAILED
|
127
|
+
end
|
128
|
+
|
120
129
|
# returns true, if the query is valid (e.g. index & type defined)
|
121
130
|
# @return [Boolean]
|
122
131
|
def valid?
|
@@ -142,7 +151,7 @@ module ElasticsearchRecord
|
|
142
151
|
# failed queried will return the related +FAILED_BODIES+ or +{}+ as fallback
|
143
152
|
# @return [Hash, nil]
|
144
153
|
def body
|
145
|
-
return (FAILED_BODIES[self.type].presence || {}) if
|
154
|
+
return (FAILED_BODIES[self.type].presence || {}) if failed?
|
146
155
|
|
147
156
|
@body
|
148
157
|
end
|
@@ -163,6 +172,9 @@ module ElasticsearchRecord
|
|
163
172
|
# set refresh, if defined (also includes false value)
|
164
173
|
args[:refresh] = self.refresh unless self.refresh.nil?
|
165
174
|
|
175
|
+
# set timeout, if present
|
176
|
+
args[:timeout] = self.timeout if self.timeout.present?
|
177
|
+
|
166
178
|
args
|
167
179
|
end
|
168
180
|
|
@@ -77,7 +77,42 @@ module ElasticsearchRecord
|
|
77
77
|
_load_from_sql(_query_by_sql(query), &block)
|
78
78
|
end
|
79
79
|
|
80
|
-
#
|
80
|
+
# ES|QL query API
|
81
|
+
# Returns search results for an ES|QL (Elasticsearch query language) query.
|
82
|
+
#
|
83
|
+
# @param [String] esql
|
84
|
+
# @param [Proc] block
|
85
|
+
def find_by_esql(esql, &block)
|
86
|
+
# build new query
|
87
|
+
query = ElasticsearchRecord::Query.new(
|
88
|
+
type: ElasticsearchRecord::Query::TYPE_ESQL,
|
89
|
+
body: { query: esql },
|
90
|
+
# IMPORTANT: Always provide all columns
|
91
|
+
columns: source_column_names)
|
92
|
+
|
93
|
+
_load_from_sql(_query_by_sql(query), &block)
|
94
|
+
end
|
95
|
+
|
96
|
+
# executes a +esql+ by provided *ES|SL* query
|
97
|
+
# Does NOT instantiate records.
|
98
|
+
# @param [String] esql
|
99
|
+
# @param [Boolean] async (default: false)
|
100
|
+
def esql(esql, async: false)
|
101
|
+
# build new query
|
102
|
+
query = ElasticsearchRecord::Query.new(
|
103
|
+
type: ElasticsearchRecord::Query::TYPE_ESQL,
|
104
|
+
body: { query: esql },
|
105
|
+
# IMPORTANT: Always provide all columns
|
106
|
+
columns: source_column_names)
|
107
|
+
|
108
|
+
connection.exec_query(query, "#{name} ES|QL", async: async)
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
# executes a +msearch+ by provided *RAW* queries.
|
113
|
+
# Does NOT instantiate records.
|
114
|
+
# @param [Array<String>] queries
|
115
|
+
# @param [Boolean] async (default: false)
|
81
116
|
def msearch(queries, async: false)
|
82
117
|
# build new msearch query
|
83
118
|
query = ElasticsearchRecord::Query.new(
|
@@ -45,6 +45,84 @@ module ElasticsearchRecord
|
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
+
# A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
|
49
|
+
# These values can be generated from specific numeric or histogram fields in the documents.
|
50
|
+
#
|
51
|
+
# The boxplot aggregation returns essential information for making a box plot:
|
52
|
+
# *minimum*, *maximum*, *median*, *first quartile* (25th percentile) and *third quartile* (75th percentile) values.
|
53
|
+
#
|
54
|
+
# Person.all.boxplot(:age)
|
55
|
+
# > {
|
56
|
+
# "min": 0.0,
|
57
|
+
# "max": 990.0,
|
58
|
+
# "q1": 167.5,
|
59
|
+
# "q2": 445.0,
|
60
|
+
# "q3": 722.5,
|
61
|
+
# "lower": 0.0,
|
62
|
+
# "upper": 990.0
|
63
|
+
# }
|
64
|
+
#
|
65
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html
|
66
|
+
#
|
67
|
+
# @param [Symbol, String] column_name
|
68
|
+
def boxplot(column_name)
|
69
|
+
calculate(:boxplot, column_name)
|
70
|
+
end
|
71
|
+
|
72
|
+
# A multi-value metrics aggregation that computes stats over numeric values extracted from the aggregated documents. #
|
73
|
+
# The stats that are returned consist of: *min*, *max*, *sum*, *count* and *avg*.
|
74
|
+
#
|
75
|
+
# Person.all.stats(:age)
|
76
|
+
# > {
|
77
|
+
# "count": 10,
|
78
|
+
# "min": 0.0,
|
79
|
+
# "max": 990.0,
|
80
|
+
# "sum": 16859,
|
81
|
+
# "avg": 75.5
|
82
|
+
# }
|
83
|
+
#
|
84
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html
|
85
|
+
#
|
86
|
+
# @param [Symbol, String] column_name
|
87
|
+
def stats(column_name)
|
88
|
+
calculate(:stats, column_name)
|
89
|
+
end
|
90
|
+
|
91
|
+
# A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
|
92
|
+
# These values can be retrieved either from specific keyword fields.
|
93
|
+
#
|
94
|
+
# Person.all.string_stats(:name)
|
95
|
+
# > {
|
96
|
+
# "count": 5,
|
97
|
+
# "min_length": 24,
|
98
|
+
# "max_length": 30,
|
99
|
+
# "avg_length": 28.8,
|
100
|
+
# "entropy": 3.94617750050791
|
101
|
+
# }
|
102
|
+
#
|
103
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-string-stats-aggregation.html
|
104
|
+
#
|
105
|
+
# @param [Symbol, String] column_name
|
106
|
+
def string_stats(column_name)
|
107
|
+
calculate(:string_stats, column_name)
|
108
|
+
end
|
109
|
+
|
110
|
+
# The matrix_stats aggregation is a numeric aggregation that computes the following statistics over a set of document fields:
|
111
|
+
# *count* Number of per field samples included in the calculation.
|
112
|
+
# *mean* The average value for each field.
|
113
|
+
# *variance* Per field Measurement for how spread out the samples are from the mean.
|
114
|
+
# *skewness* Per field measurement quantifying the asymmetric distribution around the mean.
|
115
|
+
# *kurtosis* Per field measurement quantifying the shape of the distribution.
|
116
|
+
# *covariance* A matrix that quantitatively describes how changes in one field are associated with another.
|
117
|
+
# *correlation* The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field distributions.
|
118
|
+
#
|
119
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-matrix-stats-aggregation.html
|
120
|
+
#
|
121
|
+
# @param [Array<Symbol|String>] column_names
|
122
|
+
def matrix_stats(*column_names)
|
123
|
+
calculate(:matrix_stats, *column_names)
|
124
|
+
end
|
125
|
+
|
48
126
|
# A multi-value metrics aggregation that calculates one or more
|
49
127
|
# percentiles over numeric values extracted from the aggregated documents.
|
50
128
|
# Returns a hash with empty values (but keys still exists) if there is no row.
|
@@ -59,6 +137,9 @@ module ElasticsearchRecord
|
|
59
137
|
# "95.0" => 2021.0,
|
60
138
|
# "99.0" => 2022.0
|
61
139
|
# }
|
140
|
+
#
|
141
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html
|
142
|
+
#
|
62
143
|
# @param [Symbol, String] column_name
|
63
144
|
def percentiles(column_name)
|
64
145
|
calculate(:percentiles, column_name, node: :values)
|
@@ -81,6 +162,9 @@ module ElasticsearchRecord
|
|
81
162
|
# "95.0" => 2021.0,
|
82
163
|
# "99.0" => 2022.0
|
83
164
|
# }
|
165
|
+
#
|
166
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html
|
167
|
+
#
|
84
168
|
# @param [Symbol, String] column_name
|
85
169
|
# @param [Array] values
|
86
170
|
def percentile_ranks(column_name, values)
|
@@ -92,6 +176,8 @@ module ElasticsearchRecord
|
|
92
176
|
# Person.all.cardinality(:age)
|
93
177
|
# > 12
|
94
178
|
#
|
179
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html
|
180
|
+
#
|
95
181
|
# @param [Symbol, String] column_name
|
96
182
|
def cardinality(column_name)
|
97
183
|
calculate(:cardinality, column_name)
|
@@ -101,6 +187,8 @@ module ElasticsearchRecord
|
|
101
187
|
#
|
102
188
|
# Person.all.average(:age) # => 35.8
|
103
189
|
#
|
190
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-avg-aggregation.html
|
191
|
+
#
|
104
192
|
# @param [Symbol, String] column_name
|
105
193
|
def average(column_name)
|
106
194
|
calculate(:avg, column_name)
|
@@ -112,6 +200,8 @@ module ElasticsearchRecord
|
|
112
200
|
# Person.all.minimum(:age)
|
113
201
|
# > 7
|
114
202
|
#
|
203
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html
|
204
|
+
#
|
115
205
|
# @param [Symbol, String] column_name
|
116
206
|
def minimum(column_name)
|
117
207
|
calculate(:min, column_name)
|
@@ -123,33 +213,58 @@ module ElasticsearchRecord
|
|
123
213
|
#
|
124
214
|
# Person.all.maximum(:age) # => 93
|
125
215
|
#
|
216
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html
|
217
|
+
#
|
126
218
|
# @param [Symbol, String] column_name
|
127
219
|
def maximum(column_name)
|
128
220
|
calculate(:max, column_name)
|
129
221
|
end
|
130
222
|
|
223
|
+
# This single-value aggregation approximates the median absolute deviation of its search results.
|
224
|
+
# Median absolute deviation is a measure of variability. It is a robust statistic,
|
225
|
+
# meaning that it is useful for describing data that may have outliers, or may not be normally distributed.
|
226
|
+
# For such data it can be more descriptive than standard deviation.
|
227
|
+
#
|
228
|
+
# It is calculated as the median of each data point’s deviation from the median of the entire sample.
|
229
|
+
# That is, for a random variable X, the median absolute deviation is median(|median(X) - Xi|).
|
230
|
+
#
|
231
|
+
# Person.all.median_absolute_deviation(:age) # => 91
|
232
|
+
#
|
233
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-median-absolute-deviation-aggregation.html
|
234
|
+
#
|
235
|
+
# @param [Symbol, String] column_name
|
236
|
+
def median_absolute_deviation(column_name)
|
237
|
+
calculate(:median_absolute_deviation, column_name)
|
238
|
+
end
|
239
|
+
|
131
240
|
# Calculates the sum of values on a given column. The value is returned
|
132
241
|
# with the same data type of the column, +0+ if there's no row. See
|
133
242
|
# #calculate for examples with options.
|
134
243
|
#
|
135
244
|
# Person.all.sum(:age) # => 4562
|
136
245
|
#
|
246
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-sum-aggregation.html
|
247
|
+
#
|
137
248
|
# @param [Symbol, String] column_name (optional)
|
138
249
|
def sum(column_name)
|
139
250
|
calculate(:sum, column_name)
|
140
251
|
end
|
141
252
|
|
142
|
-
# creates a aggregation with the provided metric (e.g. :sum) and
|
253
|
+
# creates a aggregation with the provided metric (e.g. :sum) and columns.
|
143
254
|
# returns the metric node (default: :value) from the aggregations result.
|
144
255
|
# @param [Symbol, String] metric
|
145
|
-
# @param [Symbol
|
256
|
+
# @param [Array<Symbol|String>] columns
|
146
257
|
# @param [Hash] opts - additional arguments that get merged with the metric definition
|
147
258
|
# @param [Symbol] node (default :value)
|
148
|
-
def calculate(metric,
|
149
|
-
metric_key = "#{
|
259
|
+
def calculate(metric, *columns, opts: {}, node: :value)
|
260
|
+
metric_key = "calculate_#{metric}"
|
150
261
|
|
151
262
|
# spawn a new aggregation and return the aggs
|
152
|
-
response =
|
263
|
+
response = if columns.size == 1
|
264
|
+
aggregate(metric_key, { metric => { field: columns[0] }.merge(opts) }).aggregations
|
265
|
+
else
|
266
|
+
aggregate(metric_key, { metric => { fields: columns }.merge(opts) }).aggregations
|
267
|
+
end
|
153
268
|
|
154
269
|
response[metric_key][node]
|
155
270
|
end
|
@@ -125,6 +125,18 @@ module ElasticsearchRecord
|
|
125
125
|
self
|
126
126
|
end
|
127
127
|
end
|
128
|
+
|
129
|
+
# overwrite original methods to provide a elasticsearch version:
|
130
|
+
# checks against the +#access_id_fielddata?+ to ensure the Elasticsearch Cluster allows access on the +_id+ field.
|
131
|
+
def reverse_sql_order(order_query)
|
132
|
+
if order_query.empty?
|
133
|
+
return [table[primary_key].desc] if primary_key != '_id' || klass.connection.access_id_fielddata?
|
134
|
+
raise ActiveRecord::IrreversibleOrderError,
|
135
|
+
"Relation has no current order and fielddata access on the _id field is disallowed! However, you can re-enable it by updating the dynamic cluster setting: indices.id_field_data.enabled"
|
136
|
+
end
|
137
|
+
|
138
|
+
super
|
139
|
+
end
|
128
140
|
end
|
129
141
|
end
|
130
142
|
end
|
@@ -102,6 +102,16 @@ module ElasticsearchRecord
|
|
102
102
|
configure!(:__query__, refresh: value)
|
103
103
|
end
|
104
104
|
|
105
|
+
# sets the query's +timeout+ value.
|
106
|
+
# @param [Boolean] value (default: true)
|
107
|
+
def timeout(value = true)
|
108
|
+
spawn.timeout!(value)
|
109
|
+
end
|
110
|
+
|
111
|
+
def timeout!(value = true)
|
112
|
+
configure!(:__query__, timeout: value)
|
113
|
+
end
|
114
|
+
|
105
115
|
# add a whole query 'node' to the query.
|
106
116
|
# @example
|
107
117
|
# query(:bool, {filter: ...})
|
@@ -90,7 +90,9 @@ module ElasticsearchRecord
|
|
90
90
|
#
|
91
91
|
# @param [String] keep_alive - how long to keep alive (for each single request) - default: '1m'
|
92
92
|
# @param [Integer] batch_size - how many results per query (default: 1000 - this means at least 10 queries before reaching the +max_result_window+)
|
93
|
-
|
93
|
+
# @param [Boolean] ids_only - resolve ids only from results
|
94
|
+
# @return [Integer, Array] either returns the results-array (no block provided) or the total amount of results
|
95
|
+
def pit_results(keep_alive: '1m', batch_size: 1000, ids_only: false)
|
94
96
|
raise(ArgumentError, "Batch size cannot be above the 'max_result_window' (#{klass.max_result_window}) !") if batch_size > klass.max_result_window
|
95
97
|
|
96
98
|
# check if limit or offset values where provided
|
@@ -105,6 +107,9 @@ module ElasticsearchRecord
|
|
105
107
|
# see @ https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html
|
106
108
|
relation.order!(_shard_doc: :asc) if relation.order_values.empty? && klass.connection.access_shard_doc?
|
107
109
|
|
110
|
+
# resolve ids only
|
111
|
+
relation.reselect!('_id') if ids_only
|
112
|
+
|
108
113
|
# clear limit & offset
|
109
114
|
relation.offset!(nil).limit!(nil)
|
110
115
|
|
@@ -122,10 +127,16 @@ module ElasticsearchRecord
|
|
122
127
|
# resolve new data until we got all we need
|
123
128
|
loop do
|
124
129
|
# change pit settings & limit (spawn is required, since a +resolve+ will make the relation immutable)
|
125
|
-
current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit').response
|
130
|
+
current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit Results').response
|
126
131
|
|
127
132
|
# resolve only data from hits->hits[{_source}]
|
128
|
-
current_results =
|
133
|
+
current_results = if ids_only
|
134
|
+
current_response['hits']['hits'].map { |result| result['_id'] }
|
135
|
+
# future with helper
|
136
|
+
# current_response['hits']['hits'].map.from_hash('_id')
|
137
|
+
else
|
138
|
+
current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
|
139
|
+
end
|
129
140
|
current_results_length = current_results.length
|
130
141
|
|
131
142
|
# check if we reached the required offset
|
@@ -171,12 +182,38 @@ module ElasticsearchRecord
|
|
171
182
|
end
|
172
183
|
end
|
173
184
|
|
174
|
-
# return results array
|
175
|
-
|
185
|
+
# return results array or total value
|
186
|
+
if block_given?
|
187
|
+
results_total
|
188
|
+
else
|
189
|
+
results
|
190
|
+
end
|
176
191
|
end
|
177
192
|
|
178
193
|
alias_method :total_results, :pit_results
|
179
194
|
|
195
|
+
# executes a delete query in a +point_in_time+ scope.
|
196
|
+
# this will provide the possibility to delete more than the +max_result_window+ (default: 10000) docs in a batched process.
|
197
|
+
# @param [String] keep_alive
|
198
|
+
# @param [Integer] batch_size
|
199
|
+
# @param [Boolean] refresh index after delete finished (default: true)
|
200
|
+
# @return [Integer] total amount of deleted docs
|
201
|
+
def pit_delete(keep_alive: '1m', batch_size: 1000, refresh: true)
|
202
|
+
delete_count = select('_id').pit_results(keep_alive: keep_alive, batch_size: batch_size, ids_only: true) do |ids|
|
203
|
+
# skip empty results
|
204
|
+
next unless ids.any?
|
205
|
+
|
206
|
+
# delete all IDs, but do not refresh index, yet
|
207
|
+
klass.connection.api(:core, :bulk, { index: klass.table_name, body: ids.map { |id| { delete: { _id: id } } }, refresh: false }, "#{klass} Pit Delete")
|
208
|
+
end
|
209
|
+
|
210
|
+
# refresh index
|
211
|
+
klass.connection.refresh_table(klass.table_name) if refresh
|
212
|
+
|
213
|
+
# return total count
|
214
|
+
delete_count
|
215
|
+
end
|
216
|
+
|
180
217
|
# returns the RAW response for the current query
|
181
218
|
# @return [Array]
|
182
219
|
def response
|
@@ -49,7 +49,7 @@ module ElasticsearchRecord
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Returns the RAW +_source+ data from each hit - aka. +rows+.
|
52
|
-
# PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_score'
|
52
|
+
# PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_id' or '_score' are not included)
|
53
53
|
# @return [Array]
|
54
54
|
def results
|
55
55
|
return [] unless response['hits']
|
data/lib/elasticsearch_record.rb
CHANGED
@@ -55,6 +55,16 @@ module ElasticsearchRecord
|
|
55
55
|
|
56
56
|
autoload :ElasticsearchDatabaseTasks, 'elasticsearch_record/tasks/elasticsearch_database_tasks'
|
57
57
|
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# :singleton-method:
|
61
|
+
# Specifies if a exception should be raised while using transactions.
|
62
|
+
# Since ActiveRecord does not have any configuration option to support transactions and
|
63
|
+
# Elasticsearch does **NOT** support transactions, it may be risky to ignore them.
|
64
|
+
# As default, transactional are 'silently swallowed' to not break any existing applications...
|
65
|
+
# However enabling this flag will surely fail transactional tests ...
|
66
|
+
singleton_class.attr_accessor :error_on_transaction
|
67
|
+
self.error_on_transaction = false
|
58
68
|
end
|
59
69
|
|
60
70
|
ActiveSupport.on_load(:active_record) do
|