elasticsearch_record 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.ruby-version +1 -0
  4. data/Gemfile +6 -0
  5. data/Gemfile.lock +74 -0
  6. data/README.md +216 -0
  7. data/Rakefile +8 -0
  8. data/docs/CHANGELOG.md +44 -0
  9. data/docs/CODE_OF_CONDUCT.md +84 -0
  10. data/docs/LICENSE.txt +21 -0
  11. data/lib/active_record/connection_adapters/elasticsearch/column.rb +32 -0
  12. data/lib/active_record/connection_adapters/elasticsearch/database_statements.rb +149 -0
  13. data/lib/active_record/connection_adapters/elasticsearch/quoting.rb +38 -0
  14. data/lib/active_record/connection_adapters/elasticsearch/schema_statements.rb +134 -0
  15. data/lib/active_record/connection_adapters/elasticsearch/type/format_string.rb +28 -0
  16. data/lib/active_record/connection_adapters/elasticsearch/type/multicast_value.rb +52 -0
  17. data/lib/active_record/connection_adapters/elasticsearch/type/object.rb +44 -0
  18. data/lib/active_record/connection_adapters/elasticsearch/type/range.rb +42 -0
  19. data/lib/active_record/connection_adapters/elasticsearch/type.rb +16 -0
  20. data/lib/active_record/connection_adapters/elasticsearch_adapter.rb +197 -0
  21. data/lib/arel/collectors/elasticsearch_query.rb +112 -0
  22. data/lib/arel/nodes/select_agg.rb +22 -0
  23. data/lib/arel/nodes/select_configure.rb +9 -0
  24. data/lib/arel/nodes/select_kind.rb +9 -0
  25. data/lib/arel/nodes/select_query.rb +20 -0
  26. data/lib/arel/visitors/elasticsearch.rb +589 -0
  27. data/lib/elasticsearch_record/base.rb +14 -0
  28. data/lib/elasticsearch_record/core.rb +59 -0
  29. data/lib/elasticsearch_record/extensions/relation.rb +15 -0
  30. data/lib/elasticsearch_record/gem_version.rb +17 -0
  31. data/lib/elasticsearch_record/instrumentation/controller_runtime.rb +39 -0
  32. data/lib/elasticsearch_record/instrumentation/log_subscriber.rb +70 -0
  33. data/lib/elasticsearch_record/instrumentation/railtie.rb +16 -0
  34. data/lib/elasticsearch_record/instrumentation.rb +17 -0
  35. data/lib/elasticsearch_record/model_schema.rb +43 -0
  36. data/lib/elasticsearch_record/patches/active_record/relation_merger_patch.rb +85 -0
  37. data/lib/elasticsearch_record/patches/arel/select_core_patch.rb +64 -0
  38. data/lib/elasticsearch_record/patches/arel/select_manager_patch.rb +91 -0
  39. data/lib/elasticsearch_record/patches/arel/select_statement_patch.rb +41 -0
  40. data/lib/elasticsearch_record/patches/arel/update_manager_patch.rb +46 -0
  41. data/lib/elasticsearch_record/patches/arel/update_statement_patch.rb +60 -0
  42. data/lib/elasticsearch_record/persistence.rb +80 -0
  43. data/lib/elasticsearch_record/query.rb +129 -0
  44. data/lib/elasticsearch_record/querying.rb +90 -0
  45. data/lib/elasticsearch_record/relation/calculation_methods.rb +155 -0
  46. data/lib/elasticsearch_record/relation/core_methods.rb +64 -0
  47. data/lib/elasticsearch_record/relation/query_clause.rb +43 -0
  48. data/lib/elasticsearch_record/relation/query_clause_tree.rb +94 -0
  49. data/lib/elasticsearch_record/relation/query_methods.rb +276 -0
  50. data/lib/elasticsearch_record/relation/result_methods.rb +222 -0
  51. data/lib/elasticsearch_record/relation/value_methods.rb +54 -0
  52. data/lib/elasticsearch_record/result.rb +236 -0
  53. data/lib/elasticsearch_record/statement_cache.rb +87 -0
  54. data/lib/elasticsearch_record/version.rb +10 -0
  55. data/lib/elasticsearch_record.rb +60 -0
  56. data/sig/elasticsearch_record.rbs +4 -0
  57. metadata +175 -0
@@ -0,0 +1,276 @@
1
+ module ElasticsearchRecord
2
+ module Relation
3
+ module QueryMethods
4
+
5
+ # unsupported method
6
+ def joins(*)
7
+ raise ActiveRecord::StatementInvalid, 'Unsupported method "joins"'
8
+ end
9
+
10
+ # unsupported method
11
+ # def includes(*)
12
+ # raise ActiveRecord::StatementInvalid, 'Unsupported method "includes"'
13
+ # end
14
+
15
+ # sets or overwrites the query kind (e.g. compound queries -> :bool, :boosting, :constant_score, ...).
16
+ # Also other query kinds like :intervals, :match, ... are allowed.
17
+ # As an alternative you can also call the #query(<kind>,{argument}) method.
18
+ # @param [String, Symbol] value - the kind
19
+ def kind(value)
20
+ spawn.kind!(value)
21
+ end
22
+
23
+ # same like +#kind+, but on the same relation (no spawn)
24
+ def kind!(value)
25
+ # :nodoc:
26
+ self.kind_value = value
27
+ self
28
+ end
29
+
30
+ # sets or overwrites additional arguments for the query (not the :query-node, the whole query).
31
+ # You can also force a overwrite of previously defined arguments, like +size+ or +from+.
32
+ # This is useful to force remove of keys.
33
+ #
34
+ # @example
35
+ # # adds {refresh true} to the query
36
+ # configure(:refresh, true)
37
+ #
38
+ # # overwrites or sets {from: 50} but removes the :sort key
39
+ # configure({from: 50, sort: nil})
40
+ # @param [Array] args
41
+ def configure(*args)
42
+ spawn.configure!(*args)
43
+ end
44
+
45
+ # same like +#configure!+, but on the same relation (no spawn)
46
+ def configure!(*args)
47
+ check_if_method_has_arguments!(__callee__, args)
48
+
49
+ if args.length == 1 && args.first.is_a?(Hash)
50
+ self.configure_value = self.configure_value.merge(args[0])
51
+ elsif args.length == 2 && args[0] == :__claim__
52
+ tmp = self.configure_value[:__claim__] || []
53
+ tmp << args[1]
54
+ self.configure_value = self.configure_value.merge(:__claim__ => tmp)
55
+ elsif args.length == 2
56
+ self.configure_value = self.configure_value.merge(args[0] => args[1])
57
+ end
58
+
59
+ self
60
+ end
61
+
62
+ def aggregate(*args)
63
+ check_if_method_has_arguments!(__callee__, args)
64
+ spawn.aggregate!(*args)
65
+ end
66
+
67
+ alias_method :aggs, :aggregate
68
+
69
+ def aggregate!(opts, *rest)
70
+ case opts
71
+ when Symbol, String
72
+ self.aggs_clause += build_query_clause(opts, rest)
73
+ when Hash
74
+ opts.each do |key, value|
75
+ self.aggs_clause += build_query_clause(key, value)
76
+ end
77
+ else
78
+ raise ArgumentError, "Unsupported argument type for aggregate: #{opts}"
79
+ end
80
+
81
+ self
82
+ end
83
+
84
+ def query(*args)
85
+ check_if_method_has_arguments!(__callee__, args)
86
+ spawn.query!(*args)
87
+ end
88
+
89
+ def query!(kind, opts, *rest)
90
+ kind!(kind)
91
+ self.query_clause += build_query_clause(opts.keys[0], opts.values[0], rest)
92
+ self
93
+ end
94
+
95
+ def filter(*args)
96
+ check_if_method_has_arguments!(__callee__, args)
97
+ spawn.filter!(*args)
98
+ end
99
+
100
+ def filter!(opts, *rest)
101
+ # :nodoc:
102
+ set_default_kind!
103
+ self.query_clause += build_query_clause(:filter, opts, rest)
104
+ self
105
+ end
106
+
107
+ def must_not(*args)
108
+ check_if_method_has_arguments!(__callee__, args)
109
+ spawn.must_not!(*args)
110
+ end
111
+
112
+ def must_not!(opts, *rest)
113
+ # :nodoc:
114
+ set_default_kind!
115
+ self.query_clause += build_query_clause(:must_not, opts, rest)
116
+ self
117
+ end
118
+
119
+ def must(*args)
120
+ check_if_method_has_arguments!(__callee__, args)
121
+ spawn.must!(*args)
122
+ end
123
+
124
+ def must!(opts, *rest)
125
+ # :nodoc:
126
+ set_default_kind!
127
+ self.query_clause += build_query_clause(:must, opts, rest)
128
+ self
129
+ end
130
+
131
+ def should(*args)
132
+ check_if_method_has_arguments!(__callee__, args)
133
+ spawn.should!(*args)
134
+ end
135
+
136
+ def should!(opts, *rest)
137
+ # :nodoc:
138
+ set_default_kind!
139
+ self.query_clause += build_query_clause(:should, opts, rest)
140
+ self
141
+ end
142
+
143
+ # creates a condition on the relation.
144
+ # There are several possibilities to call this method.
145
+ #
146
+ # # create a simple 'term' condition on the query[:filter] param
147
+ # where({name: 'hans'})
148
+ # > query[:filter] << { term: { name: 'hans' } }
149
+ #
150
+ # # create a simple 'terms' condition on the query[:filter] param
151
+ # where({name: ['hans','peter']})
152
+ # > query[:filter] << { terms: { name: ['hans','peter'] } }
153
+ #
154
+ # where(:must_not, term: {name: 'horst'})
155
+ # where(:query_string, "(new york OR dublin)", fields: ['name','description'])
156
+ #
157
+ # # nested array
158
+ # where([ [:filter, {...}], [:must_not, {...}]])
159
+ def where(*args)
160
+ return none if args[0] == :none
161
+
162
+ super
163
+ end
164
+
165
+ def where!(opts, *rest)
166
+ case opts
167
+ # check the first provided parameter +opts+ and validate, if this is an alias for "must, must_not, should or filter"
168
+ # if true, we expect the rest[0] to be a hash.
169
+ # For this correlation we forward this as RAW-data without check & manipulation
170
+ when Symbol
171
+ case opts
172
+ when :filter, :must, :must_not, :should
173
+ send("#{opts}!", *rest)
174
+ else
175
+ raise ArgumentError, "Unsupported argument type for where: #{opts}"
176
+ end
177
+ when Array
178
+ # check if this is a nested array of multiple [<kind>,<data>]
179
+ if opts[0].is_a?(Array)
180
+ opts.each { |item|
181
+ where!(*item)
182
+ }
183
+ else
184
+ where!(*opts, *rest)
185
+ end
186
+ when String
187
+ # fallback to ActiveRecords +#where_clause+
188
+ # currently NOT supported
189
+ super(opts, rest)
190
+ else
191
+ # hash -> {name: 'hans'}
192
+ # protects against forwarding params directly to where ...
193
+ # User.where(params) <- will never work
194
+ # User.where(params.permit(:user)) <- ok
195
+ opts = sanitize_forbidden_attributes(opts)
196
+
197
+ # resolve possible aliases
198
+ opts = opts.transform_keys do |key|
199
+ key = key.to_s
200
+ klass.attribute_aliases[key] || key
201
+ end
202
+
203
+ # check if we have keys without Elasticsearch fields
204
+ if (invalid = (opts.keys - klass.searchable_column_names)).present?
205
+ raise(ActiveRecord::UnknownAttributeReference,
206
+ "Unable to build query with unknown searchable attributes: #{invalid.map(&:inspect).join(", ")}. " \
207
+ "If you want to build a custom query you should use one of those methods: 'filter, must, must_not, should'. " \
208
+ "#{klass.name}.filter('#{invalid[0]}' => '...')"
209
+ )
210
+ end
211
+
212
+ # force set default kind
213
+ set_default_kind!
214
+
215
+ # builds predicates from opts (transforms this in a more unreadable way but is required for nested assignment & binds ...)
216
+ parts = predicate_builder.build_from_hash(opts) do |table_name|
217
+ lookup_table_klass_from_join_dependencies(table_name)
218
+ end
219
+
220
+ self.where_clause += ::ActiveRecord::Relation::WhereClause.new(parts)
221
+ end
222
+
223
+ self
224
+ end
225
+
226
+ def unscope!(*args)
227
+ # :nodoc:
228
+ self.unscope_values += args
229
+
230
+ args.each do |scope|
231
+ case scope
232
+ when Symbol
233
+ unless _valid_unscoping_values.include?(scope)
234
+ raise ArgumentError, "Called unscope() with invalid unscoping argument ':#{scope}'. Valid arguments are :#{_valid_unscoping_values.to_a.join(", :")}."
235
+ end
236
+ assert_mutability!
237
+ @values.delete(scope)
238
+ when Hash
239
+ scope.each do |key, target_value|
240
+ target_query_clause = build_query_clause(key, target_value)
241
+ self.query_clause -= target_query_clause
242
+ end
243
+ else
244
+ raise ArgumentError, "Unrecognized scoping: #{args.inspect}. Use .unscope(where: :attribute_name) or .unscope(:order), for example."
245
+ end
246
+ end
247
+
248
+ self
249
+ end
250
+
251
+ private
252
+
253
+ def build_query_clause(kind, data, rest = [])
254
+ ElasticsearchRecord::Relation::QueryClause.new(kind, Array.wrap(data), rest.extract_options!)
255
+ end
256
+
257
+ # sets the default kind if no kind value was defined.
258
+ # this is called by all conditional methods (where, not, filter, must, must_not & should)
259
+ def set_default_kind!
260
+ self.kind_value ||= :bool
261
+ end
262
+
263
+ # overwrite default method to add additional values for kind, query, aggs, ...
264
+ def build_arel(*args)
265
+ arel = super(*args)
266
+
267
+ arel.kind(kind_value) if kind_value
268
+ arel.query(query_clause.ast) unless query_clause.empty?
269
+ arel.aggs(aggs_clause.ast) unless aggs_clause.empty?
270
+ arel.configure(configure_value) if configure_value.present?
271
+
272
+ arel
273
+ end
274
+ end
275
+ end
276
+ end
@@ -0,0 +1,222 @@
1
+ module ElasticsearchRecord
2
+ module Relation
3
+ module ResultMethods
4
+ # aggregate pluck provided columns.
5
+ # returns a hash of values for each provided column
6
+ #
7
+ # Person.agg_pluck(:name)
8
+ # => {"name" => ['David', 'Jeremy', 'Jose']}
9
+ #
10
+ # Person.agg_pluck(:id, :name)
11
+ # => {"id" => ['11', '2', '5'], "name" => ['David', 'Jeremy', 'Jose']}
12
+ #
13
+ # @param [Array] column_names
14
+ # @return [Hash]
15
+ def agg_pluck(*column_names)
16
+ scope = self.spawn
17
+
18
+ column_names.each do |column_name|
19
+ scope.aggregate!(column_name, { terms: { field: column_name, size: limit_value || 10 } })
20
+ end
21
+
22
+ scope.aggregations.reduce({}) { |m, (k, v)|
23
+ m[k.to_s] = v[:buckets].map { |bucket| bucket[:key] }
24
+ m
25
+ }
26
+ end
27
+
28
+ # A multi-bucket aggregation that creates composite buckets from different sources.
29
+ # PLEASE NOTE: The composite aggregation is expensive. Load test your application
30
+ # before deploying a composite aggregation in production!
31
+ #
32
+ # For a single column_name a hash with the distinct key and the +doc_count+ as value is returned.
33
+ # For multiple column_names a hash with the distinct keys (as hash) and the +doc_count+ as value is returned.
34
+ #
35
+ # Person.composite(:name)
36
+ # => {"David" => 10, "Jeremy" => 1, "Jose" => 24}
37
+ #
38
+ # Person.composite(:name, :age)
39
+ # => {
40
+ # {name: "David", age: "16"} => 3,
41
+ # {name: "David", age: "18"} => 6,
42
+ # {name: "David", age: "20"} => 1,
43
+ # {name: "Jeremy", age: "20"} => 1,
44
+ # {name: "Jose", age: "6"} => 2,
45
+ # ...
46
+ # }
47
+ # @param [Array] column_names
48
+ # @return [Hash]
49
+ def composite(*column_names)
50
+ scope = self.spawn
51
+ scope.aggregate!(:composite_bucket, { composite: { size: limit_value || 10, sources: column_names.map { |column_name| { column_name => { terms: { field: column_name } } } } } })
52
+
53
+ if column_names.size == 1
54
+ column_name = column_names[0]
55
+ scope.aggregations[:composite_bucket][:buckets].reduce({}) { |m, bucket| m[bucket[:key][column_name]] = bucket[:doc_count]; m }
56
+ else
57
+ scope.aggregations[:composite_bucket][:buckets].reduce({}) { |m, bucket| m[bucket[:key]] = bucket[:doc_count]; m }
58
+ end
59
+ end
60
+
61
+ # creates and returns a new point in time id.
62
+ # optionally yields the provided block and closes the pit afterwards.
63
+ # @param [String] keep_alive (default: '1m')
64
+ # @return [nil, String] - either returns the pit_id (no block given) or nil
65
+ def point_in_time(keep_alive: '1m')
66
+ # resolve a initial PIT id
67
+ initial_pit_id = klass.connection.api(:core, :open_point_in_time, { index: klass.table_name, keep_alive: keep_alive }, "#{klass} Open Pit").dig('id')
68
+
69
+ return initial_pit_id unless block_given?
70
+
71
+ # block provided, so yield with id
72
+ yield initial_pit_id
73
+
74
+ # close PIT
75
+ klass.connection.api(:core, :close_point_in_time, { body: { id: initial_pit_id } }, "#{klass} Close Pit")
76
+
77
+ # return nil if everything was ok
78
+ nil
79
+ end
80
+
81
+ alias_method :pit, :point_in_time
82
+
83
+ # executes the current query in a +point_in_time+ scope.
84
+ # this will provide the possibility to resolve more than the +max_result_window+ (default: 10000) hits.
85
+ # resolves results (hits->hits) from the search but uses the pit query instead to resolve more than 10000 entries.
86
+ #
87
+ # If a block was provided it'll yield the results array per batch size.
88
+ #
89
+ # @param [String] keep_alive - how long to keep alive (for each single request) - default: '1m'
90
+ # @param [Integer] batch_size - how many results per query (default: 1000 - this means at least 10 queries before reaching the +max_result_window+)
91
+ def pit_results(keep_alive: '1m', batch_size: 1000)
92
+ # check if a limit or offset values was provided
93
+ results_limit = limit_value ? limit_value : Float::INFINITY
94
+ results_offset = offset_value ? offset_value : 0
95
+
96
+ # search_after requires a order - we resolve a order either from provided value or by default ...
97
+ relation = ordered_relation
98
+
99
+ # clear limit & offset
100
+ relation.offset!(nil).limit!(nil)
101
+
102
+ # remove the 'index' from the query arguments (pit doesn't like that)
103
+ relation.configure!(:__claim__, { index: nil })
104
+
105
+ # we store the results in this array
106
+ results = []
107
+ results_total = 0
108
+
109
+ # resolve a new pit and auto-close after we finished
110
+ point_in_time(keep_alive: keep_alive) do |pit_id|
111
+ current_pit_hash = { pit: { id: pit_id, keep_alive: keep_alive } }
112
+
113
+ # resolve new data until we got all we need
114
+ loop do
115
+ # change pit settings & limit (spawn is required, since a +resolve+ will make the relation immutable)
116
+ current_response = relation.spawn.configure!(current_pit_hash).limit!(batch_size).resolve('Pit').response
117
+
118
+ # resolve only data from hits->hits[{_source}]
119
+ current_results = current_response['hits']['hits'].map { |result| result['_source'] }
120
+ current_results_length = current_results.length
121
+
122
+ # check if we reached the required offset
123
+ if results_offset < current_results_length
124
+ # check for parts
125
+ # (maybe a offset 6300 was provided but the batch size is 1000 - so we need to skip a part ...)
126
+ results_from = results_offset > 0 ? results_offset : 0
127
+ results_to = (results_total + current_results_length - results_from) > results_limit ? results_limit - results_total + results_from - 1 : -1
128
+
129
+ ranged_results = current_results[results_from..results_to]
130
+
131
+ if block_given?
132
+ yield ranged_results
133
+ else
134
+ results |= ranged_results
135
+ end
136
+
137
+ # add to total
138
+ results_total += ranged_results.length
139
+ end
140
+
141
+ # -------- BREAK conditions --------
142
+
143
+ # we reached our maximum value
144
+ break if results_total >= results_limit
145
+
146
+ # we ran out of data
147
+ break if current_results_length < batch_size
148
+
149
+ # additional security - required?
150
+ # break if current_pit_hash[:search_after] == current_response['hits']['hits'][-1]['sort']
151
+
152
+ # -------- NEXT LOOP changes --------
153
+
154
+ # reduce the offset
155
+ results_offset -= current_results_length
156
+
157
+ # assign new pit
158
+ current_pit_hash = { search_after: current_response['hits']['hits'][-1]['sort'], pit: { id: current_response['pit_id'], keep_alive: keep_alive } }
159
+
160
+ # we need to justify the +batch_size+ if the query will reach over the limit
161
+ batch_size = results_limit - results_total if results_offset < batch_size && (results_total + batch_size) > results_limit
162
+ end
163
+ end
164
+
165
+ # return results array
166
+ results
167
+ end
168
+
169
+ alias_method :total_results, :pit_results
170
+
171
+ # returns the RAW response for the current query
172
+ # @return [Array]
173
+ def response
174
+ spawn.hits_only!.resolve('Response').response
175
+ end
176
+
177
+ # returns the RAW aggregations for the current query
178
+ # @return [Hash]
179
+ def aggregations
180
+ spawn.aggs_only!.resolve('Aggregations').aggregations
181
+ end
182
+
183
+ # returns the RAW hits for the current query
184
+ # @return [Array]
185
+ def hits
186
+ spawn.hits_only!.resolve('Hits').hits
187
+ end
188
+
189
+ # returns the results for the current query
190
+ # @return [Array]
191
+ def results
192
+ spawn.hits_only!.resolve('Results').results
193
+ end
194
+
195
+ # returns the total value
196
+ def total
197
+ loaded? ? @total : spawn.total_only!.resolve('Total').total
198
+ end
199
+
200
+ # sets query as "hits"-only query (drops the aggs from the query)
201
+ def hits_only!
202
+ configure!({ aggs: nil })
203
+
204
+ self
205
+ end
206
+
207
+ # sets query as "aggs"-only query (drops the size & sort options - so no hits will return)
208
+ def aggs_only!
209
+ configure!({ size: 0, from: nil, sort: nil, _source: false })
210
+
211
+ self
212
+ end
213
+
214
+ # sets query as "total"-only query (drops the size, sort & aggs options - so no hits & aggs will be returned)
215
+ def total_only!
216
+ configure!({ size: 0, from: nil, aggs: nil, sort: nil, _source: false })
217
+
218
+ self
219
+ end
220
+ end
221
+ end
222
+ end
@@ -0,0 +1,54 @@
1
+ module ElasticsearchRecord
2
+ module Relation
3
+ module ValueMethods
4
+ # holds the query kind
5
+ def kind_value
6
+ @values.fetch(:kind, nil)
7
+ end
8
+
9
+ def kind_value=(value)
10
+ # checks if records are already loaded - in this case we cannot mutate the query anymore
11
+ assert_mutability!
12
+
13
+ @values[:kind] = value.to_sym
14
+ end
15
+
16
+ def configure_value
17
+ @values.fetch(:configure, {})
18
+ end
19
+
20
+ def configure_value=(value)
21
+ assert_mutability!
22
+
23
+ @values[:configure] = value
24
+ end
25
+
26
+ def query_clause
27
+ @values.fetch(:query, ElasticsearchRecord::Relation::QueryClauseTree.empty)
28
+ end
29
+
30
+ def query_clause=(value)
31
+ assert_mutability!
32
+
33
+ @values[:query] = value
34
+ end
35
+
36
+ def aggs_clause
37
+ @values.fetch(:aggs, ElasticsearchRecord::Relation::QueryClauseTree.empty)
38
+ end
39
+
40
+ def aggs_clause=(value)
41
+ assert_mutability!
42
+
43
+ @values[:aggs] = value
44
+ end
45
+
46
+ private
47
+
48
+ # alternative method to avoid redefining the const +VALID_UNSCOPING_VALUES+
49
+ def _valid_unscoping_values
50
+ Set.new(ActiveRecord::Relation::VALID_UNSCOPING_VALUES.to_a + [:kind, :configure, :query, :aggs])
51
+ end
52
+ end
53
+ end
54
+ end