elasticsearch_record 1.5.3 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/README.md +103 -19
- data/docs/CHANGELOG.md +34 -2
- data/docs/{LICENSE.txt → LICENSE} +1 -1
- data/lib/active_record/connection_adapters/elasticsearch/table_statements.rb +62 -4
- data/lib/active_record/connection_adapters/elasticsearch/transactions.rb +54 -0
- data/lib/active_record/connection_adapters/elasticsearch/unsupported_implementation.rb +0 -7
- data/lib/active_record/connection_adapters/elasticsearch_adapter.rb +17 -3
- data/lib/arel/collectors/elasticsearch_query.rb +3 -0
- data/lib/elasticsearch_record/core.rb +15 -2
- data/lib/elasticsearch_record/gem_version.rb +2 -2
- data/lib/elasticsearch_record/instrumentation/log_subscriber.rb +7 -3
- data/lib/elasticsearch_record/model_api.rb +68 -10
- data/lib/elasticsearch_record/model_schema.rb +2 -5
- data/lib/elasticsearch_record/persistence.rb +13 -1
- data/lib/elasticsearch_record/query.rb +19 -7
- data/lib/elasticsearch_record/querying.rb +36 -1
- data/lib/elasticsearch_record/relation/calculation_methods.rb +120 -5
- data/lib/elasticsearch_record/relation/core_methods.rb +12 -0
- data/lib/elasticsearch_record/relation/query_methods.rb +10 -0
- data/lib/elasticsearch_record/relation/result_methods.rb +42 -5
- data/lib/elasticsearch_record/result.rb +1 -1
- data/lib/elasticsearch_record.rb +10 -0
- metadata +4 -3
@@ -8,9 +8,6 @@ module ElasticsearchRecord
|
|
8
8
|
@klass = klass
|
9
9
|
end
|
10
10
|
|
11
|
-
# undelegated schema methods: clone rename create
|
12
|
-
# those should not be quick-accessible, since they might end in heavily broken index
|
13
|
-
|
14
11
|
# delegated dangerous methods (created with exclamation mark)
|
15
12
|
# not able to provide individual arguments - always the defaults will be used!
|
16
13
|
#
|
@@ -26,6 +23,21 @@ module ElasticsearchRecord
|
|
26
23
|
end
|
27
24
|
end
|
28
25
|
|
26
|
+
# delegated dangerous methods with args
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# create!(:new_table_name, settings: , mappings:, alias: , ...)
|
30
|
+
# clone!(:new_table_name)
|
31
|
+
# rename!(:new_table_name)
|
32
|
+
# backup!(to: :backup_name)
|
33
|
+
# restore!(from: :backup_name)
|
34
|
+
# reindex!(:new_table_name)
|
35
|
+
%w(create clone rename backup restore reindex).each do |method|
|
36
|
+
define_method("#{method}!") do |*args|
|
37
|
+
_connection.send("#{method}_table", _index_name, *args)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
29
41
|
# delegated dangerous methods with confirm parameter (created with exclamation mark)
|
30
42
|
# a exception will be raised, if +confirm:true+ is missing.
|
31
43
|
#
|
@@ -146,12 +158,51 @@ module ElasticsearchRecord
|
|
146
158
|
# Shortcut for meta_exists
|
147
159
|
# @return [Boolean]
|
148
160
|
|
161
|
+
# @!method create!(force: false, copy_from: nil, if_not_exists: false, **options)
|
162
|
+
# Shortcut for create_table
|
163
|
+
# @param [Boolean] force
|
164
|
+
# @param [nil, String] copy_from
|
165
|
+
# @param [Hash] options
|
166
|
+
# @return [Boolean] acknowledged status
|
167
|
+
|
168
|
+
# @!method clone!(target_name, **options)
|
169
|
+
# Shortcut for clone_table
|
170
|
+
# @param [String] target_name
|
171
|
+
# @param [Hash] options
|
172
|
+
# @return [Boolean]
|
173
|
+
|
174
|
+
# @!method rename!(target_name, timeout: nil, **options)
|
175
|
+
# Shortcut for rename_table
|
176
|
+
# @param [String] target_name
|
177
|
+
# @param [String (frozen)] timeout
|
178
|
+
# @param [Hash] options
|
179
|
+
|
180
|
+
# @!method backup!(to: nil, close: true)
|
181
|
+
# Shortcut for backup_table
|
182
|
+
# @param [String] to
|
183
|
+
# @param [Boolean] close
|
184
|
+
# @return [String] backup_name
|
185
|
+
|
186
|
+
# @!method restore!(from:, timeout: nil, open: true, drop_backup: false)
|
187
|
+
# Shortcut for restore_table
|
188
|
+
# @param [String] from
|
189
|
+
# @param [String (frozen)] timeout
|
190
|
+
# @param [Boolean] open
|
191
|
+
# @return [Boolean] acknowledged status
|
192
|
+
|
193
|
+
# @!method reindex!(target_name, **options)
|
194
|
+
# Shortcut for reindex_table
|
195
|
+
# @param [String] target_name
|
196
|
+
# @param [Hash] options
|
197
|
+
# @return [Hash] reindex stats
|
198
|
+
|
149
199
|
# fast insert/update data.
|
200
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
150
201
|
#
|
151
202
|
# @example
|
152
203
|
# index([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
|
153
204
|
#
|
154
|
-
# index({
|
205
|
+
# index({_id: 5, name: 'Georg', age: 87})
|
155
206
|
#
|
156
207
|
# @param [Array<Hash>,Hash] data
|
157
208
|
# @param [Hash] options
|
@@ -160,6 +211,7 @@ module ElasticsearchRecord
|
|
160
211
|
end
|
161
212
|
|
162
213
|
# fast insert new data.
|
214
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
163
215
|
#
|
164
216
|
# @example
|
165
217
|
# insert([{name: 'Hans', age: 34}, {name: 'Peter', age: 22}])
|
@@ -173,11 +225,12 @@ module ElasticsearchRecord
|
|
173
225
|
end
|
174
226
|
|
175
227
|
# fast update existing data.
|
228
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
176
229
|
#
|
177
230
|
# @example
|
178
|
-
# update([{
|
231
|
+
# update([{_id: 1, name: 'Hansi'}, {_id: 2, name: 'Peter Parker', age: 42}])
|
179
232
|
#
|
180
|
-
# update({
|
233
|
+
# update({_id: 3, name: 'Georg McCain'})
|
181
234
|
#
|
182
235
|
# @param [Array<Hash>,Hash] data
|
183
236
|
# @param [Hash] options
|
@@ -186,13 +239,14 @@ module ElasticsearchRecord
|
|
186
239
|
end
|
187
240
|
|
188
241
|
# fast delete data.
|
242
|
+
# IMPORTANT: Any 'doc'-id must by provided with underscore '_' ( +:_id+ )
|
189
243
|
#
|
190
244
|
# @example
|
191
245
|
# delete([1,2,3,5])
|
192
246
|
#
|
193
247
|
# delete(3)
|
194
248
|
#
|
195
|
-
# delete({
|
249
|
+
# delete({_id: 2})
|
196
250
|
#
|
197
251
|
# @param [Array<Hash>,Hash] data
|
198
252
|
# @param [Hash] options
|
@@ -202,12 +256,12 @@ module ElasticsearchRecord
|
|
202
256
|
if data[0].is_a?(Hash)
|
203
257
|
bulk(data, :delete, **options)
|
204
258
|
else
|
205
|
-
bulk(data.map { |id| {
|
259
|
+
bulk(data.map { |id| { _id: id } }, :delete, **options)
|
206
260
|
end
|
207
261
|
end
|
208
262
|
|
209
263
|
# bulk handle provided data (single Hash or multiple Array<Hash>).
|
210
|
-
# @param [Hash,Array<Hash
|
264
|
+
# @param [Hash,Array<Hash<Symbol=>Object>>] data - the data to insert/update/delete ...
|
211
265
|
# @param [Symbol] operation
|
212
266
|
# @param [Boolean, Symbol] refresh
|
213
267
|
def bulk(data, operation = :index, refresh: true, **options)
|
@@ -215,7 +269,11 @@ module ElasticsearchRecord
|
|
215
269
|
|
216
270
|
_connection.api(:core, :bulk, {
|
217
271
|
index: _index_name,
|
218
|
-
body:
|
272
|
+
body: if operation == :update
|
273
|
+
data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: { doc: item.except(:_id, '_id') } } } }
|
274
|
+
else
|
275
|
+
data.map { |item| { operation => { _id: (item[:_id].presence || item['_id']), data: item.except(:_id, '_id') } } }
|
276
|
+
end,
|
219
277
|
refresh: refresh
|
220
278
|
}, "BULK #{operation.to_s.upcase}", **options)
|
221
279
|
end
|
@@ -52,11 +52,8 @@ module ElasticsearchRecord
|
|
52
52
|
# @return [Array<String>]
|
53
53
|
def searchable_column_names
|
54
54
|
@searchable_column_names ||= columns.select(&:enabled?).reduce([]) { |m, column|
|
55
|
-
m
|
56
|
-
|
57
|
-
m += column.property_names
|
58
|
-
m.uniq
|
59
|
-
}
|
55
|
+
m + [column.name] + column.field_names + column.property_names
|
56
|
+
}.uniq
|
60
57
|
end
|
61
58
|
|
62
59
|
# clears schema-related instance variables.
|
@@ -11,7 +11,7 @@ module ElasticsearchRecord
|
|
11
11
|
# values is not a "key=>values"-Hash, but a +ActiveModel::Attribute+ - so the casted values gets resolved here
|
12
12
|
values = values.transform_values(&:value)
|
13
13
|
|
14
|
-
# resolve & update a auto_increment value
|
14
|
+
# resolve & update a auto_increment value, if configured
|
15
15
|
_insert_with_auto_increment(values) do |arguments|
|
16
16
|
# build new query
|
17
17
|
query = ElasticsearchRecord::Query.new(
|
@@ -68,6 +68,9 @@ module ElasticsearchRecord
|
|
68
68
|
if (id = values[self.primary_key]).present?
|
69
69
|
yield({id: id})
|
70
70
|
elsif auto_increment?
|
71
|
+
# future increments: uuid (+uuidv6 ?), hex, radix(2-36), integer
|
72
|
+
# allocated through: primary_key_type
|
73
|
+
|
71
74
|
ids = [
|
72
75
|
# try to resolve the current-auto-increment value from the tables meta
|
73
76
|
connection.table_metas(self.table_name).dig('auto_increment').to_i + 1,
|
@@ -88,5 +91,14 @@ module ElasticsearchRecord
|
|
88
91
|
end
|
89
92
|
end
|
90
93
|
end
|
94
|
+
|
95
|
+
# overwrite to provide a Elasticsearch version:
|
96
|
+
# Creates a record with values matching those of the instance attributes
|
97
|
+
# and returns its id.
|
98
|
+
def _create_record(*args)
|
99
|
+
undelegate_id_attribute_with do
|
100
|
+
super
|
101
|
+
end
|
102
|
+
end
|
91
103
|
end
|
92
104
|
end
|
@@ -12,6 +12,7 @@ module ElasticsearchRecord
|
|
12
12
|
TYPE_SEARCH = :search
|
13
13
|
TYPE_MSEARCH = :msearch
|
14
14
|
TYPE_SQL = :sql
|
15
|
+
TYPE_ESQL = :esql
|
15
16
|
|
16
17
|
# -- DOCUMENT TYPES ------------------------------------------------------------------------------------------------
|
17
18
|
TYPE_CREATE = :create
|
@@ -34,7 +35,7 @@ module ElasticsearchRecord
|
|
34
35
|
# includes valid types only
|
35
36
|
TYPES = [
|
36
37
|
# -- QUERY TYPES
|
37
|
-
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL,
|
38
|
+
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL,
|
38
39
|
# -- DOCUMENT TYPES
|
39
40
|
TYPE_CREATE, TYPE_UPDATE, TYPE_UPDATE_BY_QUERY, TYPE_DELETE, TYPE_DELETE_BY_QUERY,
|
40
41
|
|
@@ -46,7 +47,7 @@ module ElasticsearchRecord
|
|
46
47
|
|
47
48
|
# includes reading types only
|
48
49
|
READ_TYPES = [
|
49
|
-
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL
|
50
|
+
TYPE_COUNT, TYPE_SEARCH, TYPE_MSEARCH, TYPE_SQL, TYPE_ESQL
|
50
51
|
].freeze
|
51
52
|
|
52
53
|
# defines a body to be executed if the query fails - +(none)+
|
@@ -60,6 +61,7 @@ module ElasticsearchRecord
|
|
60
61
|
# if no special type is defined, it simply uses +[:core,self.type]+
|
61
62
|
GATES = {
|
62
63
|
TYPE_SQL => [:sql, :query],
|
64
|
+
TYPE_ESQL => [:esql, :query],
|
63
65
|
TYPE_INDEX_CREATE => [:indices, :create],
|
64
66
|
TYPE_INDEX_CLONE => [:indices, :clone],
|
65
67
|
TYPE_INDEX_UPDATE_MAPPING => [:indices, :put_mapping],
|
@@ -86,9 +88,9 @@ module ElasticsearchRecord
|
|
86
88
|
# @!attribute Boolean
|
87
89
|
attr_reader :refresh
|
88
90
|
|
89
|
-
# defines the query
|
90
|
-
# @!attribute
|
91
|
-
|
91
|
+
# defines the query timeout
|
92
|
+
# @!attribute Integer|String
|
93
|
+
attr_reader :timeout
|
92
94
|
|
93
95
|
# defines the query arguments to be passed to the API
|
94
96
|
# @!attribute Hash
|
@@ -98,11 +100,12 @@ module ElasticsearchRecord
|
|
98
100
|
# @!attribute Array
|
99
101
|
attr_reader :columns
|
100
102
|
|
101
|
-
def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, arguments: {}, columns: [])
|
103
|
+
def initialize(index: nil, type: TYPE_UNDEFINED, status: STATUS_VALID, body: nil, refresh: nil, timeout: nil, arguments: {}, columns: [])
|
102
104
|
@index = index
|
103
105
|
@type = type
|
104
106
|
@status = status
|
105
107
|
@refresh = refresh
|
108
|
+
@timeout = timeout
|
106
109
|
@body = body
|
107
110
|
@arguments = arguments
|
108
111
|
@columns = columns
|
@@ -117,6 +120,12 @@ module ElasticsearchRecord
|
|
117
120
|
self
|
118
121
|
end
|
119
122
|
|
123
|
+
# returns true, if the query failed
|
124
|
+
# @return [Boolean]
|
125
|
+
def failed?
|
126
|
+
self.status == STATUS_FAILED
|
127
|
+
end
|
128
|
+
|
120
129
|
# returns true, if the query is valid (e.g. index & type defined)
|
121
130
|
# @return [Boolean]
|
122
131
|
def valid?
|
@@ -142,7 +151,7 @@ module ElasticsearchRecord
|
|
142
151
|
# failed queried will return the related +FAILED_BODIES+ or +{}+ as fallback
|
143
152
|
# @return [Hash, nil]
|
144
153
|
def body
|
145
|
-
return (FAILED_BODIES[self.type].presence || {}) if
|
154
|
+
return (FAILED_BODIES[self.type].presence || {}) if failed?
|
146
155
|
|
147
156
|
@body
|
148
157
|
end
|
@@ -163,6 +172,9 @@ module ElasticsearchRecord
|
|
163
172
|
# set refresh, if defined (also includes false value)
|
164
173
|
args[:refresh] = self.refresh unless self.refresh.nil?
|
165
174
|
|
175
|
+
# set timeout, if present
|
176
|
+
args[:timeout] = self.timeout if self.timeout.present?
|
177
|
+
|
166
178
|
args
|
167
179
|
end
|
168
180
|
|
@@ -77,7 +77,42 @@ module ElasticsearchRecord
|
|
77
77
|
_load_from_sql(_query_by_sql(query), &block)
|
78
78
|
end
|
79
79
|
|
80
|
-
#
|
80
|
+
# ES|QL query API
|
81
|
+
# Returns search results for an ES|QL (Elasticsearch query language) query.
|
82
|
+
#
|
83
|
+
# @param [String] esql
|
84
|
+
# @param [Proc] block
|
85
|
+
def find_by_esql(esql, &block)
|
86
|
+
# build new query
|
87
|
+
query = ElasticsearchRecord::Query.new(
|
88
|
+
type: ElasticsearchRecord::Query::TYPE_ESQL,
|
89
|
+
body: { query: esql },
|
90
|
+
# IMPORTANT: Always provide all columns
|
91
|
+
columns: source_column_names)
|
92
|
+
|
93
|
+
_load_from_sql(_query_by_sql(query), &block)
|
94
|
+
end
|
95
|
+
|
96
|
+
# executes a +esql+ by provided *ES|SL* query
|
97
|
+
# Does NOT instantiate records.
|
98
|
+
# @param [String] esql
|
99
|
+
# @param [Boolean] async (default: false)
|
100
|
+
def esql(esql, async: false)
|
101
|
+
# build new query
|
102
|
+
query = ElasticsearchRecord::Query.new(
|
103
|
+
type: ElasticsearchRecord::Query::TYPE_ESQL,
|
104
|
+
body: { query: esql },
|
105
|
+
# IMPORTANT: Always provide all columns
|
106
|
+
columns: source_column_names)
|
107
|
+
|
108
|
+
connection.exec_query(query, "#{name} ES|QL", async: async)
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
# executes a +msearch+ by provided *RAW* queries.
|
113
|
+
# Does NOT instantiate records.
|
114
|
+
# @param [Array<String>] queries
|
115
|
+
# @param [Boolean] async (default: false)
|
81
116
|
def msearch(queries, async: false)
|
82
117
|
# build new msearch query
|
83
118
|
query = ElasticsearchRecord::Query.new(
|
@@ -45,6 +45,84 @@ module ElasticsearchRecord
|
|
45
45
|
end
|
46
46
|
end
|
47
47
|
|
48
|
+
# A boxplot metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
|
49
|
+
# These values can be generated from specific numeric or histogram fields in the documents.
|
50
|
+
#
|
51
|
+
# The boxplot aggregation returns essential information for making a box plot:
|
52
|
+
# *minimum*, *maximum*, *median*, *first quartile* (25th percentile) and *third quartile* (75th percentile) values.
|
53
|
+
#
|
54
|
+
# Person.all.boxplot(:age)
|
55
|
+
# > {
|
56
|
+
# "min": 0.0,
|
57
|
+
# "max": 990.0,
|
58
|
+
# "q1": 167.5,
|
59
|
+
# "q2": 445.0,
|
60
|
+
# "q3": 722.5,
|
61
|
+
# "lower": 0.0,
|
62
|
+
# "upper": 990.0
|
63
|
+
# }
|
64
|
+
#
|
65
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-boxplot-aggregation.html
|
66
|
+
#
|
67
|
+
# @param [Symbol, String] column_name
|
68
|
+
def boxplot(column_name)
|
69
|
+
calculate(:boxplot, column_name)
|
70
|
+
end
|
71
|
+
|
72
|
+
# A multi-value metrics aggregation that computes stats over numeric values extracted from the aggregated documents. #
|
73
|
+
# The stats that are returned consist of: *min*, *max*, *sum*, *count* and *avg*.
|
74
|
+
#
|
75
|
+
# Person.all.stats(:age)
|
76
|
+
# > {
|
77
|
+
# "count": 10,
|
78
|
+
# "min": 0.0,
|
79
|
+
# "max": 990.0,
|
80
|
+
# "sum": 16859,
|
81
|
+
# "avg": 75.5
|
82
|
+
# }
|
83
|
+
#
|
84
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-stats-aggregation.html
|
85
|
+
#
|
86
|
+
# @param [Symbol, String] column_name
|
87
|
+
def stats(column_name)
|
88
|
+
calculate(:stats, column_name)
|
89
|
+
end
|
90
|
+
|
91
|
+
# A multi-value metrics aggregation that computes statistics over string values extracted from the aggregated documents.
|
92
|
+
# These values can be retrieved either from specific keyword fields.
|
93
|
+
#
|
94
|
+
# Person.all.string_stats(:name)
|
95
|
+
# > {
|
96
|
+
# "count": 5,
|
97
|
+
# "min_length": 24,
|
98
|
+
# "max_length": 30,
|
99
|
+
# "avg_length": 28.8,
|
100
|
+
# "entropy": 3.94617750050791
|
101
|
+
# }
|
102
|
+
#
|
103
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-string-stats-aggregation.html
|
104
|
+
#
|
105
|
+
# @param [Symbol, String] column_name
|
106
|
+
def string_stats(column_name)
|
107
|
+
calculate(:string_stats, column_name)
|
108
|
+
end
|
109
|
+
|
110
|
+
# The matrix_stats aggregation is a numeric aggregation that computes the following statistics over a set of document fields:
|
111
|
+
# *count* Number of per field samples included in the calculation.
|
112
|
+
# *mean* The average value for each field.
|
113
|
+
# *variance* Per field Measurement for how spread out the samples are from the mean.
|
114
|
+
# *skewness* Per field measurement quantifying the asymmetric distribution around the mean.
|
115
|
+
# *kurtosis* Per field measurement quantifying the shape of the distribution.
|
116
|
+
# *covariance* A matrix that quantitatively describes how changes in one field are associated with another.
|
117
|
+
# *correlation* The covariance matrix scaled to a range of -1 to 1, inclusive. Describes the relationship between field distributions.
|
118
|
+
#
|
119
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-matrix-stats-aggregation.html
|
120
|
+
#
|
121
|
+
# @param [Array<Symbol|String>] column_names
|
122
|
+
def matrix_stats(*column_names)
|
123
|
+
calculate(:matrix_stats, *column_names)
|
124
|
+
end
|
125
|
+
|
48
126
|
# A multi-value metrics aggregation that calculates one or more
|
49
127
|
# percentiles over numeric values extracted from the aggregated documents.
|
50
128
|
# Returns a hash with empty values (but keys still exists) if there is no row.
|
@@ -59,6 +137,9 @@ module ElasticsearchRecord
|
|
59
137
|
# "95.0" => 2021.0,
|
60
138
|
# "99.0" => 2022.0
|
61
139
|
# }
|
140
|
+
#
|
141
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-aggregation.html
|
142
|
+
#
|
62
143
|
# @param [Symbol, String] column_name
|
63
144
|
def percentiles(column_name)
|
64
145
|
calculate(:percentiles, column_name, node: :values)
|
@@ -81,6 +162,9 @@ module ElasticsearchRecord
|
|
81
162
|
# "95.0" => 2021.0,
|
82
163
|
# "99.0" => 2022.0
|
83
164
|
# }
|
165
|
+
#
|
166
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-percentile-rank-aggregation.html
|
167
|
+
#
|
84
168
|
# @param [Symbol, String] column_name
|
85
169
|
# @param [Array] values
|
86
170
|
def percentile_ranks(column_name, values)
|
@@ -92,6 +176,8 @@ module ElasticsearchRecord
|
|
92
176
|
# Person.all.cardinality(:age)
|
93
177
|
# > 12
|
94
178
|
#
|
179
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html
|
180
|
+
#
|
95
181
|
# @param [Symbol, String] column_name
|
96
182
|
def cardinality(column_name)
|
97
183
|
calculate(:cardinality, column_name)
|
@@ -101,6 +187,8 @@ module ElasticsearchRecord
|
|
101
187
|
#
|
102
188
|
# Person.all.average(:age) # => 35.8
|
103
189
|
#
|
190
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-avg-aggregation.html
|
191
|
+
#
|
104
192
|
# @param [Symbol, String] column_name
|
105
193
|
def average(column_name)
|
106
194
|
calculate(:avg, column_name)
|
@@ -112,6 +200,8 @@ module ElasticsearchRecord
|
|
112
200
|
# Person.all.minimum(:age)
|
113
201
|
# > 7
|
114
202
|
#
|
203
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-min-aggregation.html
|
204
|
+
#
|
115
205
|
# @param [Symbol, String] column_name
|
116
206
|
def minimum(column_name)
|
117
207
|
calculate(:min, column_name)
|
@@ -123,33 +213,58 @@ module ElasticsearchRecord
|
|
123
213
|
#
|
124
214
|
# Person.all.maximum(:age) # => 93
|
125
215
|
#
|
216
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-max-aggregation.html
|
217
|
+
#
|
126
218
|
# @param [Symbol, String] column_name
|
127
219
|
def maximum(column_name)
|
128
220
|
calculate(:max, column_name)
|
129
221
|
end
|
130
222
|
|
223
|
+
# This single-value aggregation approximates the median absolute deviation of its search results.
|
224
|
+
# Median absolute deviation is a measure of variability. It is a robust statistic,
|
225
|
+
# meaning that it is useful for describing data that may have outliers, or may not be normally distributed.
|
226
|
+
# For such data it can be more descriptive than standard deviation.
|
227
|
+
#
|
228
|
+
# It is calculated as the median of each data point’s deviation from the median of the entire sample.
|
229
|
+
# That is, for a random variable X, the median absolute deviation is median(|median(X) - Xi|).
|
230
|
+
#
|
231
|
+
# Person.all.median_absolute_deviation(:age) # => 91
|
232
|
+
#
|
233
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-median-absolute-deviation-aggregation.html
|
234
|
+
#
|
235
|
+
# @param [Symbol, String] column_name
|
236
|
+
def median_absolute_deviation(column_name)
|
237
|
+
calculate(:median_absolute_deviation, column_name)
|
238
|
+
end
|
239
|
+
|
131
240
|
# Calculates the sum of values on a given column. The value is returned
|
132
241
|
# with the same data type of the column, +0+ if there's no row. See
|
133
242
|
# #calculate for examples with options.
|
134
243
|
#
|
135
244
|
# Person.all.sum(:age) # => 4562
|
136
245
|
#
|
246
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-sum-aggregation.html
|
247
|
+
#
|
137
248
|
# @param [Symbol, String] column_name (optional)
|
138
249
|
def sum(column_name)
|
139
250
|
calculate(:sum, column_name)
|
140
251
|
end
|
141
252
|
|
142
|
-
# creates a aggregation with the provided metric (e.g. :sum) and
|
253
|
+
# creates a aggregation with the provided metric (e.g. :sum) and columns.
|
143
254
|
# returns the metric node (default: :value) from the aggregations result.
|
144
255
|
# @param [Symbol, String] metric
|
145
|
-
# @param [Symbol
|
256
|
+
# @param [Array<Symbol|String>] columns
|
146
257
|
# @param [Hash] opts - additional arguments that get merged with the metric definition
|
147
258
|
# @param [Symbol] node (default :value)
|
148
|
-
def calculate(metric,
|
149
|
-
metric_key = "#{
|
259
|
+
def calculate(metric, *columns, opts: {}, node: :value)
|
260
|
+
metric_key = "calculate_#{metric}"
|
150
261
|
|
151
262
|
# spawn a new aggregation and return the aggs
|
152
|
-
response =
|
263
|
+
response = if columns.size == 1
|
264
|
+
aggregate(metric_key, { metric => { field: columns[0] }.merge(opts) }).aggregations
|
265
|
+
else
|
266
|
+
aggregate(metric_key, { metric => { fields: columns }.merge(opts) }).aggregations
|
267
|
+
end
|
153
268
|
|
154
269
|
response[metric_key][node]
|
155
270
|
end
|
@@ -125,6 +125,18 @@ module ElasticsearchRecord
|
|
125
125
|
self
|
126
126
|
end
|
127
127
|
end
|
128
|
+
|
129
|
+
# overwrite original methods to provide a elasticsearch version:
|
130
|
+
# checks against the +#access_id_fielddata?+ to ensure the Elasticsearch Cluster allows access on the +_id+ field.
|
131
|
+
def reverse_sql_order(order_query)
|
132
|
+
if order_query.empty?
|
133
|
+
return [table[primary_key].desc] if primary_key != '_id' || klass.connection.access_id_fielddata?
|
134
|
+
raise ActiveRecord::IrreversibleOrderError,
|
135
|
+
"Relation has no current order and fielddata access on the _id field is disallowed! However, you can re-enable it by updating the dynamic cluster setting: indices.id_field_data.enabled"
|
136
|
+
end
|
137
|
+
|
138
|
+
super
|
139
|
+
end
|
128
140
|
end
|
129
141
|
end
|
130
142
|
end
|
@@ -102,6 +102,16 @@ module ElasticsearchRecord
|
|
102
102
|
configure!(:__query__, refresh: value)
|
103
103
|
end
|
104
104
|
|
105
|
+
# sets the query's +timeout+ value.
|
106
|
+
# @param [Boolean] value (default: true)
|
107
|
+
def timeout(value = true)
|
108
|
+
spawn.timeout!(value)
|
109
|
+
end
|
110
|
+
|
111
|
+
def timeout!(value = true)
|
112
|
+
configure!(:__query__, timeout: value)
|
113
|
+
end
|
114
|
+
|
105
115
|
# add a whole query 'node' to the query.
|
106
116
|
# @example
|
107
117
|
# query(:bool, {filter: ...})
|
@@ -90,7 +90,9 @@ module ElasticsearchRecord
|
|
90
90
|
#
|
91
91
|
# @param [String] keep_alive - how long to keep alive (for each single request) - default: '1m'
|
92
92
|
# @param [Integer] batch_size - how many results per query (default: 1000 - this means at least 10 queries before reaching the +max_result_window+)
|
93
|
-
|
93
|
+
# @param [Boolean] ids_only - resolve ids only from results
|
94
|
+
# @return [Integer, Array] either returns the results-array (no block provided) or the total amount of results
|
95
|
+
def pit_results(keep_alive: '1m', batch_size: 1000, ids_only: false)
|
94
96
|
raise(ArgumentError, "Batch size cannot be above the 'max_result_window' (#{klass.max_result_window}) !") if batch_size > klass.max_result_window
|
95
97
|
|
96
98
|
# check if limit or offset values where provided
|
@@ -105,6 +107,9 @@ module ElasticsearchRecord
|
|
105
107
|
# see @ https://www.elastic.co/guide/en/elasticsearch/reference/current/paginate-search-results.html
|
106
108
|
relation.order!(_shard_doc: :asc) if relation.order_values.empty? && klass.connection.access_shard_doc?
|
107
109
|
|
110
|
+
# resolve ids only
|
111
|
+
relation.reselect!('_id') if ids_only
|
112
|
+
|
108
113
|
# clear limit & offset
|
109
114
|
relation.offset!(nil).limit!(nil)
|
110
115
|
|
@@ -122,10 +127,16 @@ module ElasticsearchRecord
|
|
122
127
|
# resolve new data until we got all we need
|
123
128
|
loop do
|
124
129
|
# change pit settings & limit (spawn is required, since a +resolve+ will make the relation immutable)
|
125
|
-
current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit').response
|
130
|
+
current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit Results').response
|
126
131
|
|
127
132
|
# resolve only data from hits->hits[{_source}]
|
128
|
-
current_results =
|
133
|
+
current_results = if ids_only
|
134
|
+
current_response['hits']['hits'].map { |result| result['_id'] }
|
135
|
+
# future with helper
|
136
|
+
# current_response['hits']['hits'].map.from_hash('_id')
|
137
|
+
else
|
138
|
+
current_response['hits']['hits'].map { |result| result['_source'].merge('_id' => result['_id']) }
|
139
|
+
end
|
129
140
|
current_results_length = current_results.length
|
130
141
|
|
131
142
|
# check if we reached the required offset
|
@@ -171,12 +182,38 @@ module ElasticsearchRecord
|
|
171
182
|
end
|
172
183
|
end
|
173
184
|
|
174
|
-
# return results array
|
175
|
-
|
185
|
+
# return results array or total value
|
186
|
+
if block_given?
|
187
|
+
results_total
|
188
|
+
else
|
189
|
+
results
|
190
|
+
end
|
176
191
|
end
|
177
192
|
|
178
193
|
alias_method :total_results, :pit_results
|
179
194
|
|
195
|
+
# executes a delete query in a +point_in_time+ scope.
|
196
|
+
# this will provide the possibility to delete more than the +max_result_window+ (default: 10000) docs in a batched process.
|
197
|
+
# @param [String] keep_alive
|
198
|
+
# @param [Integer] batch_size
|
199
|
+
# @param [Boolean] refresh index after delete finished (default: true)
|
200
|
+
# @return [Integer] total amount of deleted docs
|
201
|
+
def pit_delete(keep_alive: '1m', batch_size: 1000, refresh: true)
|
202
|
+
delete_count = select('_id').pit_results(keep_alive: keep_alive, batch_size: batch_size, ids_only: true) do |ids|
|
203
|
+
# skip empty results
|
204
|
+
next unless ids.any?
|
205
|
+
|
206
|
+
# delete all IDs, but do not refresh index, yet
|
207
|
+
klass.connection.api(:core, :bulk, { index: klass.table_name, body: ids.map { |id| { delete: { _id: id } } }, refresh: false }, "#{klass} Pit Delete")
|
208
|
+
end
|
209
|
+
|
210
|
+
# refresh index
|
211
|
+
klass.connection.refresh_table(klass.table_name) if refresh
|
212
|
+
|
213
|
+
# return total count
|
214
|
+
delete_count
|
215
|
+
end
|
216
|
+
|
180
217
|
# returns the RAW response for the current query
|
181
218
|
# @return [Array]
|
182
219
|
def response
|
@@ -49,7 +49,7 @@ module ElasticsearchRecord
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Returns the RAW +_source+ data from each hit - aka. +rows+.
|
52
|
-
# PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_score'
|
52
|
+
# PLEASE NOTE: The array will only contain the RAW data from each +_source+ (meta info like '_id' or '_score' are not included)
|
53
53
|
# @return [Array]
|
54
54
|
def results
|
55
55
|
return [] unless response['hits']
|
data/lib/elasticsearch_record.rb
CHANGED
@@ -55,6 +55,16 @@ module ElasticsearchRecord
|
|
55
55
|
|
56
56
|
autoload :ElasticsearchDatabaseTasks, 'elasticsearch_record/tasks/elasticsearch_database_tasks'
|
57
57
|
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# :singleton-method:
|
61
|
+
# Specifies if a exception should be raised while using transactions.
|
62
|
+
# Since ActiveRecord does not have any configuration option to support transactions and
|
63
|
+
# Elasticsearch does **NOT** support transactions, it may be risky to ignore them.
|
64
|
+
# As default, transactional are 'silently swallowed' to not break any existing applications...
|
65
|
+
# However enabling this flag will surely fail transactional tests ...
|
66
|
+
singleton_class.attr_accessor :error_on_transaction
|
67
|
+
self.error_on_transaction = false
|
58
68
|
end
|
59
69
|
|
60
70
|
ActiveSupport.on_load(:active_record) do
|