esse 0.4.0.rc4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1b78836cea90af6c95e5bad1fec44c734733e21dab660cbb338a79546f2539d5
4
- data.tar.gz: 0d1e073bbcc3ab1576d134ab4c70e2231767b836f59652f2f41b56e5943cc585
3
+ metadata.gz: dc6a554b787bfb51e5568bd590790171cc88ad8c5ce432e1de2c3ef68a5d1def
4
+ data.tar.gz: cac82a25d6e8dd6dad3667a734d58be098f4b7020db76472359f7bdb4fe2067c
5
5
  SHA512:
6
- metadata.gz: 4118d2d6d2d163456f09c487b391abfb7522d4c1900e1517d94a8414b3f14587abf7e3fbb8292321933c767cb21b981c03f829ec2bd12f4cbc7327aa34cbe3e4
7
- data.tar.gz: 80cdb7b8ccdb6ca158ca95207b0f9363b3edd19edddc6e9d29c168cb03ae04d520f110c752dd604b19665891cde2ce787db9eb18ca0119d155b90f1150527d38
6
+ metadata.gz: f26afad34715ed37cec3f1d1cad11ca6e05d2921a6f31884d9ea9f1136ef239ba7661e29796c37abf69ea19aa5f5407884cf88b108bbfa211e5639e371d492ed
7
+ data.tar.gz: 13b1921d85723d3c33bacc54c7bcf95885a196c3a62c095a5ea3be5a5d5061b8cd4eb8fbb1bdbf1bb91c141ed4457f9a1baab5ec121dd7e4f4dc1c1c423277d2
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base_operation'
4
+
5
+ module Esse
6
+ module CLI
7
+ class Index::UpdateLazyAttributes < Index::BaseOperation
8
+ attr_reader :attributes
9
+
10
+ def initialize(indices:, attributes: nil, **options)
11
+ super(indices: indices, **options)
12
+ @attributes = Array(attributes)
13
+ end
14
+
15
+ def run
16
+ validate_options!
17
+ indices.each do |index|
18
+ repos = if (repo = @options[:repo])
19
+ [index.repo(repo)]
20
+ else
21
+ index.repo_hash.values
22
+ end
23
+
24
+ repos.each do |repo|
25
+ attrs = repo_attributes(repo)
26
+ next unless attrs.any?
27
+
28
+ repo.send(:each_batch_ids, **context_options) do |ids|
29
+ attrs.each do |attribute|
30
+ repo.update_documents_attribute(attribute, ids, bulk_options)
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def bulk_options
40
+ @bulk_options ||= (@options[:bulk_options] || {}).transform_values do |value|
41
+ value.is_a?(String) ? Hstring.new(value).coerce_type : value
42
+ end
43
+ end
44
+
45
+ def context_options
46
+ @context_options ||= (@options[:context] || {}).transform_values do |value|
47
+ value.is_a?(String) ? Hstring.new(value).coerce_type : value
48
+ end
49
+ end
50
+
51
+ def validate_options!
52
+ validate_indices_option!
53
+ end
54
+
55
+ def repo_attributes(repo)
56
+ return repo.lazy_document_attribute_names(true) if attributes.empty?
57
+
58
+ repo.lazy_document_attribute_names(attributes)
59
+ end
60
+ end
61
+ end
62
+ end
@@ -19,6 +19,10 @@ module Esse
19
19
  option :reindex, desc: 'Use _reindex API to import documents from the old index to the new index'
20
20
  option :optimize, type: :boolean, default: true, desc: 'Optimize index before import documents by disabling refresh_interval and setting number_of_replicas to 0'
21
21
  option :settings, type: :hash, default: nil, desc: 'List of settings to pass to the index class. Example: --settings=refresh_interval:1s,number_of_replicas:0'
22
+
23
+ option :preload_lazy_attributes, type: :string, default: nil, desc: 'Command separated list of lazy document attributes to preload using search API before the bulk import. Or pass `true` to preload all lazy attributes'
24
+ option :eager_load_lazy_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to include to the bulk index request. Or pass `true` to include all lazy attributes'
25
+ option :update_lazy_attributes, type: :string, default: nil, desc: 'Comma separated list of lazy document attributes to bulk update after the bulk index request Or pass `true` to include all lazy attributes'
22
26
  def reset(*index_classes)
23
27
  require_relative 'index/reset'
24
28
  opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
@@ -27,6 +31,22 @@ module Esse
27
31
  if opts[:import] && opts[:reindex]
28
32
  raise ArgumentError, 'You cannot use --import and --reindex together'
29
33
  end
34
+
35
+ %i[preload_lazy_attributes eager_load_lazy_attributes update_lazy_attributes].each do |key|
36
+ val = opts.delete(key)
37
+ val = 'true' if val == key.to_s
38
+ next if val.nil? || val == 'false'
39
+
40
+ if opts[:reindex]
41
+ raise ArgumentError, "You cannot use --#{key}=#{val} with --reindex"
42
+ elsif opts[:import] == false
43
+ raise ArgumentError, "You cannot use --#{key}=#{val} with --import=false"
44
+ end
45
+
46
+ opts[:import] = {} if opts[:import] == true
47
+ opts[:import][key] = (val == 'true') ? true : val.split(',')
48
+ end
49
+
30
50
  Reset.new(indices: index_classes, **opts).run
31
51
  end
32
52
 
@@ -39,7 +59,7 @@ module Esse
39
59
  DESC
40
60
  option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name'
41
61
  option :alias, type: :boolean, default: false, aliases: '-a', desc: 'Update alias after create index'
42
- option :settings, type: :hash, default: nil, desc: 'List of settings to pass to the index class. Example: --settings=index.refresh_interval:-1,index.number_of_replicas:0'
62
+ option :settings, type: :hash, default: nil, desc: 'List of settings to pass to the index class. Example: --settings=index.refresh_interval:-1 index.number_of_replicas:0'
43
63
  def create(*index_classes)
44
64
  require_relative 'index/create'
45
65
  opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
@@ -66,7 +86,7 @@ module Esse
66
86
  desc 'update_settings *INDEX_CLASS', 'Closes the index for read/write operations, updates the index settings, and open it again'
67
87
  option :suffix, type: :string, default: nil, aliases: '-s', desc: 'Suffix to append to index name'
68
88
  option :type, type: :string, default: nil, aliases: '-t', desc: 'Document Type to update mapping for'
69
- option :settings, type: :hash, default: nil, desc: 'List of settings to pass to the index class. Example: --settings=index.refresh_interval:-1,index.number_of_replicas:0'
89
+ option :settings, type: :hash, default: nil, desc: 'List of settings to pass to the index class. Example: --settings=index.refresh_interval:-1 index.number_of_replicas:0'
70
90
  def update_settings(*index_classes)
71
91
  require_relative 'index/update_settings'
72
92
  opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
@@ -107,11 +127,23 @@ module Esse
107
127
  opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
108
128
  %i[preload_lazy_attributes eager_load_lazy_attributes update_lazy_attributes].each do |key|
109
129
  if (val = opts.delete(key)) && val != 'false'
130
+ val = 'true' if val == key.to_s
110
131
  opts[key] = (val == 'true') ? true : val.split(',')
111
132
  end
112
133
  end
113
134
  Import.new(indices: index_classes, **opts).run
114
135
  end
136
+
137
+ desc "update_lazy_attributes INDEX_CLASS", "Async update lazy attributes for the given index"
138
+ option :repo, type: :string, default: nil, alias: "-r", desc: "Repository to use for import"
139
+ option :suffix, type: :string, default: nil, aliases: "-s", desc: "Suffix to append to index name"
140
+ option :context, type: :hash, default: {}, required: true, desc: "List of options to pass to the index class"
141
+ option :bulk_options, type: :hash, default: nil, desc: 'List of options to pass to the bulk update request. Example: --bulk-options=timeout:30s refresh:true retry_on_conflict:3'
142
+ def update_lazy_attributes(index_class, *attributes)
143
+ require_relative "index/update_lazy_attributes"
144
+ opts = HashUtils.deep_transform_keys(options.to_h, &:to_sym)
145
+ UpdateLazyAttributes.new(indices: [index_class], attributes: attributes, **opts).run
146
+ end
115
147
  end
116
148
  end
117
149
  end
@@ -14,5 +14,11 @@ module Esse
14
14
  def each
15
15
  raise NotImplementedError, 'Override this method to iterate over the collection'
16
16
  end
17
+
18
+ # @yield [<Array>] A batch of document IDs to be processed.
19
+ # @abstract Override this method to yield each chunk of document IDs
20
+ def each_batch_ids
21
+ raise NotImplementedError, 'Override this method to iterate over the collection in batches of IDs'
22
+ end
17
23
  end
18
24
  end
data/lib/esse/document.rb CHANGED
@@ -1,7 +1,23 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # Esse::Document is the base class for all documents in Esse.
4
+ # It provides methods to access the document ID, type, routing, meta, and source.
5
+ # It also provides methods to convert the document to a hash or bulk format.
6
+ #
7
+ # @example
8
+ # class UserDocument < Esse::Document
9
+ # def id
10
+ # object.id
11
+ # end
12
+ #
13
+ # def source
14
+ # { name: object.name, email: object.email }
15
+ # end
16
+ # end
17
+
3
18
  module Esse
4
19
  class Document
20
+
5
21
  MUTATIONS_FALLBACK = {}.freeze
6
22
 
7
23
  attr_reader :object, :options
@@ -39,11 +55,6 @@ module Esse
39
55
  !routing.nil?
40
56
  end
41
57
 
42
- # @TODO allow import, index, bulk to accept a suffix to tell which index to use
43
- # def index_suffix
44
- # nil
45
- # end
46
-
47
58
  # @return [Hash] the document meta
48
59
  # @abstract Override this method to return the document meta
49
60
  def meta
@@ -108,7 +119,7 @@ module Esse
108
119
  DocumentForPartialUpdate.new(self, source: source)
109
120
  end
110
121
 
111
- def inspect
122
+ def to_s
112
123
  attributes = {id: :id, routing: :routing, source: :memoized_source}.map do |attr_name, attr_src|
113
124
  value = send(attr_src)
114
125
  next unless value
@@ -92,9 +92,14 @@ module Esse
92
92
 
93
93
  # @api private
94
94
  def process(event_id, payload)
95
+ # Build the event once and reuse for all listeners. Previously, a new
96
+ # Event was created per listener via Event#payload which does
97
+ # @payload.merge(data) — a full hash copy each time. With 24+
98
+ # esse-rails subscribers, each search was creating 24+ Event objects
99
+ # with 24+ hash merges of the full payload (including Response with
100
+ # the entire OpenSearch JSON). Now it's 1 Event + 1 merge total.
101
+ event = events[event_id].payload(payload)
95
102
  listeners[event_id].each do |listener|
96
- event = events[event_id].payload(payload)
97
-
98
103
  yield(event, listener)
99
104
  end
100
105
  end
@@ -60,6 +60,11 @@ module Esse
60
60
  if publish_event
61
61
  payload[:runtime] ||= Time.now - payload.delete(:__started_at__) if payload[:__started_at__]
62
62
  __bus__.publish(event_id, payload)
63
+ # Release references to large objects (Query, Response with full
64
+ # OpenSearch JSON) so they become GC-eligible immediately after
65
+ # event dispatch, rather than being held until the caller's stack
66
+ # frame unwinds.
67
+ payload.clear
63
68
  end
64
69
  end
65
70
 
data/lib/esse/events.rb CHANGED
@@ -56,6 +56,7 @@ module Esse
56
56
  register_event 'elasticsearch.exist'
57
57
  register_event 'elasticsearch.count'
58
58
  register_event 'elasticsearch.get'
59
+ register_event 'elasticsearch.mget'
59
60
  register_event 'elasticsearch.reindex'
60
61
  register_event 'elasticsearch.update_by_query'
61
62
  register_event 'elasticsearch.delete_by_query'
@@ -1,30 +1,6 @@
1
1
  module Esse
2
2
  module Import
3
3
  class Bulk
4
- def self.build_from_documents(type: nil, index: nil, delete: nil, create: nil, update: nil)
5
- index = Array(index).select(&Esse.method(:document?)).reject(&:ignore_on_index?).map do |doc|
6
- value = doc.to_bulk
7
- value[:_type] ||= type if type
8
- value
9
- end
10
- create = Array(create).select(&Esse.method(:document?)).reject(&:ignore_on_index?).map do |doc|
11
- value = doc.to_bulk
12
- value[:_type] ||= type if type
13
- value
14
- end
15
- update = Array(update).select(&Esse.method(:document?)).reject(&:ignore_on_index?).map do |doc|
16
- value = doc.to_bulk(operation: :update)
17
- value[:_type] ||= type if type
18
- value
19
- end
20
- delete = Array(delete).select(&Esse.method(:document?)).reject(&:ignore_on_delete?).map do |doc|
21
- value = doc.to_bulk(data: false)
22
- value[:_type] ||= type if type
23
- value
24
- end
25
- new(index: index, delete: delete, create: create, update: update)
26
- end
27
-
28
4
  def initialize(index: nil, delete: nil, create: nil, update: nil)
29
5
  @index = Esse::ArrayUtils.wrap(index).map { |payload| { index: payload } }
30
6
  @create = Esse::ArrayUtils.wrap(create).map { |payload| { create: payload } }
@@ -29,6 +29,37 @@ module Esse
29
29
  cluster.api.get(**options)
30
30
  end
31
31
 
32
+ # Retrieves multiple JSON documents by ID from an index.
33
+ #
34
+ # UsersIndex.mget(ids: [1, 2, 3])
35
+ # UsersIndex.mget(ids: [Esse::HashDocument.new(id: 1), Esse::HashDocument.new(id: 2)])
36
+ #
37
+ # @param ids [Array<Esse::Document, Hash, String, Integer>] documents, hashes, or IDs to retrieve
38
+ # @param options [Hash] Hash of paramenters that will be passed along to elasticsearch request
39
+ # @option [String, nil] :suffix The index suffix. Defaults to the nil.
40
+ # @return [Hash] The elasticsearch response with 'docs' array
41
+ #
42
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-multi-get.html
43
+ def mget(ids:, suffix: nil, **options)
44
+ options[:body] = {
45
+ docs: ids.map do |doc|
46
+ if document?(doc)
47
+ datum = { _id: doc.id }
48
+ datum[:_type] = doc.type if doc.type?
49
+ datum[:routing] = doc.routing if doc.routing?
50
+ datum
51
+ elsif doc.is_a?(Hash)
52
+ doc
53
+ else
54
+ { _id: doc }
55
+ end
56
+ end,
57
+ }
58
+ options[:index] = index_name(suffix: suffix)
59
+ cluster.may_update_type!(options)
60
+ cluster.api.mget(**options)
61
+ end
62
+
32
63
  # Check if a JSON document exists
33
64
  #
34
65
  # UsersIndex.exist?(id: 1) # true
@@ -88,6 +119,7 @@ module Esse
88
119
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-delete.html
89
120
  def delete(doc = nil, suffix: nil, **options)
90
121
  if document?(doc)
122
+ options = request_params_for(:delete, doc).merge(options) if request_params_for?(:delete)
91
123
  options[:id] = doc.id
92
124
  options[:type] = doc.type if doc.type?
93
125
  options[:routing] = doc.routing if doc.routing?
@@ -113,6 +145,7 @@ module Esse
113
145
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-update.html
114
146
  def update(doc = nil, suffix: nil, **options)
115
147
  if document?(doc)
148
+ options = request_params_for(:update, doc).merge(options) if request_params_for?(:update)
116
149
  options[:id] = doc.id
117
150
  options[:body] = { doc: doc.mutated_source }
118
151
  options[:type] = doc.type if doc.type?
@@ -139,6 +172,7 @@ module Esse
139
172
  # @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-index_.html
140
173
  def index(doc = nil, suffix: nil, **options)
141
174
  if document?(doc)
175
+ options = request_params_for(:index, doc).merge(options) if request_params_for?(:index)
142
176
  options[:id] = doc.id
143
177
  options[:body] = doc.mutated_source
144
178
  options[:type] = doc.type if doc.type?
@@ -181,6 +215,7 @@ module Esse
181
215
  elsif Esse.document?(doc) && !doc.ignore_on_index?
182
216
  hash = doc.to_bulk
183
217
  hash[:_type] ||= type if type
218
+ hash = request_params_for(:index, doc, bulk: true).merge(hash) if request_params_for?(:index)
184
219
  to_index << hash
185
220
  end
186
221
  end
@@ -190,6 +225,7 @@ module Esse
190
225
  elsif Esse.document?(doc) && !doc.ignore_on_index?
191
226
  hash = doc.to_bulk
192
227
  hash[:_type] ||= type if type
228
+ hash = request_params_for(:create, doc, bulk: true).merge(hash) if request_params_for?(:create)
193
229
  to_create << hash
194
230
  end
195
231
  end
@@ -199,6 +235,7 @@ module Esse
199
235
  elsif Esse.document?(doc) && !doc.ignore_on_index?
200
236
  hash = doc.to_bulk(operation: :update)
201
237
  hash[:_type] ||= type if type
238
+ hash = request_params_for(:update, doc, bulk: true).merge(hash) if request_params_for?(:update)
202
239
  to_update << hash
203
240
  end
204
241
  end
@@ -208,6 +245,7 @@ module Esse
208
245
  elsif Esse.document?(doc) && !doc.ignore_on_delete?
209
246
  hash = doc.to_bulk(data: false)
210
247
  hash[:_type] ||= type if type
248
+ hash = request_params_for(:delete, doc, bulk: true).merge(hash) if request_params_for?(:delete)
211
249
  to_delete << hash
212
250
  end
213
251
  end
@@ -30,12 +30,23 @@ module Esse
30
30
  options = CREATE_INDEX_RESERVED_KEYWORDS.merge(options)
31
31
  name = build_real_index_name(suffix)
32
32
  definition = body || [settings_hash(settings: settings), mappings_hash].reduce(&:merge)
33
+ index_alias = options.delete(:alias)
33
34
 
34
- if options.delete(:alias) && name != index_name
35
+ if index_alias && name != index_name
35
36
  definition[:aliases] = { index_name => {} }
36
37
  end
37
38
 
38
- cluster.api.create_index(index: name, body: definition, **options)
39
+ retried = false
40
+ begin
41
+ cluster.api.create_index(index: name, body: definition, **options)
42
+ rescue Esse::Transport::BadRequestError => e
43
+ if retried == false && e.message.include?('exists with the same name') && index_alias == :force
44
+ cluster.api.delete_index(index: index_name)
45
+ retried = true
46
+ retry
47
+ end
48
+ raise
49
+ end
39
50
  end
40
51
 
41
52
  # Deletes, creates and imports data to the index. Performs zero-downtime index resetting.
@@ -59,8 +70,8 @@ module Esse
59
70
  optimized_creation = optimize && syncronous_import && (import || reindex)
60
71
  if optimized_creation
61
72
  definition = [settings_hash(settings: settings), mappings_hash].reduce(&:merge)
62
- number_of_replicas = definition.dig(Esse::SETTING_ROOT_KEY, :index, :number_of_replicas)
63
- refresh_interval = definition.dig(Esse::SETTING_ROOT_KEY, :index, :refresh_interval)
73
+ number_of_replicas = definition.dig(Esse::SETTING_ROOT_KEY, :index, :number_of_replicas) || 1
74
+ refresh_interval = definition.dig(Esse::SETTING_ROOT_KEY, :index, :refresh_interval) || '1s'
64
75
  new_number_of_replicas = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:number_of_replicas] = 0
65
76
  new_refresh_interval = ((definition[Esse::SETTING_ROOT_KEY] ||= {})[:index] ||= {})[:refresh_interval] = '-1'
66
77
  create_index(**options, suffix: suffix, alias: false, body: definition)
@@ -88,6 +99,10 @@ module Esse
88
99
  end
89
100
 
90
101
  if optimized_creation && number_of_replicas != new_number_of_replicas || refresh_interval != new_refresh_interval
102
+ settings ||= {}
103
+ settings[:index] ||= {}
104
+ settings[:index][:number_of_replicas] = number_of_replicas
105
+ settings[:index][:refresh_interval] = refresh_interval
91
106
  update_settings(suffix: suffix, settings: settings)
92
107
  refresh(suffix: suffix)
93
108
  end
@@ -39,6 +39,7 @@ module Esse
39
39
  :@mapping => nil,
40
40
  :@cluster_id => :dup,
41
41
  :@plugins => :dup,
42
+ :@request_params => :dup,
42
43
  }
43
44
  end
44
45
  end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Esse
4
+ class Index
5
+ module RequestConfigurable
6
+ OPERATIONS = %i[index create update delete].freeze
7
+ BULK_OPERATIONS_AND_PARAMS = {
8
+ index: %i[_index _type routing if_primary_term if_seq_no version version_type dynamic_templates pipeline require_alias],
9
+ create: %i[_index _type routing if_primary_term if_seq_no version version_type dynamic_templates pipeline require_alias],
10
+ update: %i[_index _type routing if_primary_term if_seq_no version version_type require_alias retry_on_conflict],
11
+ delete: %i[_index _type routing if_primary_term if_seq_no version version_type],
12
+ }.freeze
13
+
14
+ def self.extended(base)
15
+ base.extend DSL
16
+ end
17
+
18
+ class RequestParams
19
+ attr_reader :operation, :hash, :block
20
+
21
+ def initialize(operation, hash = {}, &block)
22
+ @operation = operation
23
+ @hash = hash.transform_keys(&:to_sym)
24
+ @block = block
25
+ end
26
+
27
+ # @param doc [Esse::Document] the document to apply the request parameters to
28
+ # @return [Hash] the request parameters for the operation
29
+ # @raise [ArgumentError] if the result of the block is not a Hash
30
+ def call(doc)
31
+ return hash unless block
32
+
33
+ result = block.call(doc) || {}
34
+ raise ArgumentError, "Expected a Hash, got #{result.class}" unless result.is_a?(Hash)
35
+
36
+ hash.merge(result.transform_keys(&:to_sym))
37
+ end
38
+ end
39
+
40
+ class Container
41
+ def initialize
42
+ @mutex = Mutex.new
43
+ @entries = {}.freeze
44
+ end
45
+
46
+ def add(operation, entry)
47
+ @mutex.synchronize do
48
+ hash = @entries.dup
49
+ arr = (hash[operation] || []).dup
50
+ arr << entry
51
+ hash[operation] = arr.freeze
52
+ @entries = hash.freeze
53
+ end
54
+ end
55
+
56
+ def key?(operation)
57
+ @entries.key?(operation)
58
+ end
59
+
60
+ def retrieve(operation, doc)
61
+ return {} unless @entries[operation]
62
+
63
+ @entries[operation].each_with_object({}) do |entry, hash|
64
+ hash.merge!(entry.call(doc))
65
+ end
66
+ end
67
+ end
68
+
69
+ module DSL
70
+ def request_params(*operations, **params, &block)
71
+ operations.each do |operation|
72
+ raise ArgumentError, "Invalid operation: #{operation}" unless OPERATIONS.include?(operation)
73
+
74
+ @request_params ||= Container.new
75
+ @request_params.add(operation, RequestParams.new(operation, params, &block))
76
+ end
77
+
78
+ self
79
+ end
80
+
81
+ def request_params_for(operation, doc, bulk: false)
82
+ return {} unless request_params_for?(operation)
83
+
84
+ params = @request_params.retrieve(operation, doc)
85
+
86
+ if bulk && BULK_OPERATIONS_AND_PARAMS.key?(operation)
87
+ params.slice(*BULK_OPERATIONS_AND_PARAMS[operation])
88
+ else
89
+ params
90
+ end
91
+ end
92
+
93
+ def request_params_for?(operation)
94
+ return false unless @request_params
95
+
96
+ @request_params.key?(operation)
97
+ end
98
+ end
99
+ end
100
+
101
+ extend RequestConfigurable
102
+ end
103
+ end
@@ -11,6 +11,7 @@ module Esse
11
11
  number_of_shards
12
12
  number_of_replicas
13
13
  refresh_interval
14
+ mapping
14
15
  ].freeze
15
16
 
16
17
  def settings_hash(settings: nil)
@@ -22,9 +23,17 @@ module Esse
22
23
  end
23
24
  INDEX_SIMPLIFIED_SETTINGS.each do |key|
24
25
  next unless values.key?(key)
26
+ value = values.delete(key)
27
+ next if value.nil?
25
28
 
26
- (values[:index] ||= {}).merge!(key => values.delete(key))
29
+ (values[:index] ||= {}).merge!(key => value)
27
30
  end
31
+
32
+ if values[:index].is_a?(Hash)
33
+ INDEX_SIMPLIFIED_SETTINGS.each { |key| values[:index].delete(key) if values[:index][key].nil? }
34
+ values.delete(:index) if values[:index].empty?
35
+ end
36
+
28
37
  { Esse::SETTING_ROOT_KEY => values }
29
38
  end
30
39
 
data/lib/esse/index.rb CHANGED
@@ -23,5 +23,6 @@ module Esse
23
23
  require_relative 'index/indices'
24
24
  require_relative 'index/search'
25
25
  require_relative 'index/documents'
26
+ require_relative 'index/request_configurable'
26
27
  end
27
28
  end
@@ -82,5 +82,19 @@ module Esse
82
82
  @value
83
83
  end
84
84
  def_conventional :presence!
85
+
86
+ def coerce_type
87
+ if @value =~ /\A-?\d+\z/
88
+ return @value.to_i
89
+ elsif @value =~ /\A-?\d+\.\d+\z/
90
+ return @value.to_f
91
+ elsif @value == 'true'
92
+ return true
93
+ elsif @value == 'false'
94
+ return false
95
+ end
96
+
97
+ @value
98
+ end
85
99
  end
86
100
  end
@@ -10,6 +10,7 @@ module Esse
10
10
  def update_documents_attribute(name, ids_or_doc_headers = [], kwargs = {})
11
11
  batch = documents_for_lazy_attribute(name, ids_or_doc_headers)
12
12
  return if batch.empty?
13
+
13
14
  kwargs = kwargs.transform_keys(&:to_sym)
14
15
 
15
16
  if kwargs.delete(:index_on_missing) { true }
@@ -157,6 +157,66 @@ module Esse
157
157
  @document_proc.call(model, **kwargs)
158
158
  end
159
159
 
160
+ # Used to fetch batches of ids from the collection that implement the `each_batch_ids` method.
161
+ #
162
+ # @param [Hash] kwargs The context
163
+ # @yield [Array] A batch of document IDs to be processed.
164
+ # @raise [NotImplementedError] if the collection does not implement the `each_batch_ids` method.
165
+ # @raise [NotImplementedError] if the collection is not defined.
166
+ # @return [Enumerator] The enumerator
167
+ # @example
168
+ # each_batch_ids(active: true) do |ids|
169
+ # puts ids.size
170
+ # end
171
+ def each_batch_ids(*args, **kwargs)
172
+ if @collection_proc.nil?
173
+ raise NotImplementedError, format('there is no %<t>p collection defined for the %<k>p index', t: repo_name, k: index.to_s)
174
+ end
175
+
176
+ if @collection_proc.is_a?(Class) && @collection_proc.method_defined?(:each_batch_ids)
177
+ colection_instance = @collection_proc.new(*args, **kwargs)
178
+ if block_given?
179
+ colection_instance.each_batch_ids { |ids| yield ids }
180
+ else
181
+ Enumerator.new do |yielder|
182
+ colection_instance.each_batch_ids { |ids| yielder.yield ids }
183
+ end
184
+ end
185
+ else
186
+ Kernel.warn(<<~MSG)
187
+ The public `#each_batch_ids' method is not available for the collection defined in the #{repo_name} index.
188
+
189
+ The `#each' method will be used instead, which may lead to performance degradation because it serializes the entire document
190
+ to only obtain the IDs. Consider implementing a public `#each_batch_ids' method in your collection class for better performance.
191
+
192
+ Example implementation taking into account you are dealing with an ActiveRecord model:
193
+ class UserCollection < Esse::Collection
194
+ # ....
195
+
196
+ def each_batch_ids
197
+ user_query.except(:includes, :preload, :eager_load).in_batches do |batch|
198
+ yield batch.pluck(:id)
199
+ end
200
+ end
201
+ end
202
+ MSG
203
+
204
+ enumerator = Enumerator.new do |yielder|
205
+ each_batch(*args, **kwargs) do |*batch_args|
206
+ batch, collection_context = batch_args
207
+ collection_context ||= {}
208
+ ids = [*batch].map { |entry| serialize(entry, **collection_context)&.id }.compact
209
+ yielder.yield(ids) if ids.any?
210
+ end
211
+ end
212
+ return enumerator unless block_given?
213
+
214
+ enumerator.each { |ids| yield ids }
215
+ end
216
+ rescue LocalJumpError
217
+ raise(SyntaxError, 'block must be explicitly declared in the collection definition')
218
+ end
219
+
160
220
  protected
161
221
 
162
222
  def coerce_to_document(value)
@@ -40,7 +40,7 @@ module Esse
40
40
 
41
41
  def scroll_hits(batch_size: 1_000, scroll: '1m')
42
42
  response = execute_search_query!(size: batch_size, scroll: scroll)
43
- scroll_id = nil
43
+ scroll_id = response.raw_response['scroll_id'] || response.raw_response['_scroll_id']
44
44
  fetched = 0
45
45
  total = response.total
46
46
 
@@ -48,9 +48,9 @@ module Esse
48
48
  fetched += response.hits.size
49
49
  yield(response.hits) if response.hits.any?
50
50
  break if fetched >= total
51
- scroll_id = response.raw_response['scroll_id'] || response.raw_response['_scroll_id']
52
51
  break unless scroll_id
53
52
  response = execute_scroll_query(scroll: scroll, scroll_id: scroll_id)
53
+ scroll_id = response.raw_response['scroll_id'] || response.raw_response['_scroll_id']
54
54
  end
55
55
  ensure
56
56
  begin
@@ -59,12 +59,40 @@ module Esse
59
59
  end
60
60
  end
61
61
 
62
+ def search_after_hits(batch_size: 1_000)
63
+ body = HashUtils.deep_dup(definition.fetch(:body, {}))
64
+ body[:size] = batch_size
65
+ body.delete(:from)
66
+ body.delete('from')
67
+
68
+ unless body.key?(:sort) || body.key?('sort')
69
+ raise ArgumentError, 'The query body must include a :sort to use search_after'
70
+ end
71
+
72
+ loop do
73
+ response = execute_search_query!(body: body)
74
+ break if response.hits.empty?
75
+
76
+ yield(response.hits)
77
+
78
+ last_sort = response.hits.last['sort']
79
+ break unless last_sort
80
+ break if response.hits.size < batch_size
81
+
82
+ body[:search_after] = last_sort
83
+ end
84
+ end
85
+
86
+ def reset!
87
+ @response = nil
88
+ end
89
+
62
90
  private
63
91
 
64
92
  def execute_search_query!(**execution_options)
65
93
  resp, err = nil
66
94
  Esse::Events.instrument('elasticsearch.execute_search_query') do |payload|
67
- payload[:query] = self
95
+ payload[:query_definition] = definition
68
96
  begin
69
97
  resp = Response.new(self, transport.search(**definition, **execution_options))
70
98
  rescue => e
@@ -81,7 +109,7 @@ module Esse
81
109
  def execute_scroll_query(scroll:, scroll_id:)
82
110
  resp, err = nil
83
111
  Esse::Events.instrument('elasticsearch.execute_search_query') do |payload|
84
- payload[:query] = self
112
+ payload[:query_definition] = definition
85
113
  begin
86
114
  resp = Response.new(self, transport.scroll(scroll: scroll, body: { scroll_id: scroll_id }))
87
115
  rescue => e
@@ -94,10 +122,6 @@ module Esse
94
122
 
95
123
  resp
96
124
  end
97
-
98
- def reset!
99
- @response = nil
100
- end
101
125
  end
102
126
  end
103
127
  end
@@ -7,13 +7,13 @@ module Esse
7
7
  extend Forwardable
8
8
 
9
9
  def_delegators :hits, :each, :size, :empty?
10
- attr_reader :query, :raw_response, :options
10
+ attr_reader :query_definition, :raw_response, :options
11
11
 
12
- # @param [Esse::Search::Query] query The search query
12
+ # @param [Esse::Search::Query, Hash] query The search query or its definition hash
13
13
  # @param [Hash] raw_response The raw response from Elasticsearch
14
14
  # @param [Hash] options The options passed to the search
15
15
  def initialize(query, raw_response, **options)
16
- @query = query
16
+ @query_definition = query.is_a?(Hash) ? query : query.definition
17
17
  @raw_response = raw_response
18
18
  @options = options
19
19
  end
@@ -29,6 +29,30 @@ module Esse
29
29
  end
30
30
  end
31
31
 
32
+ # Allows to get multiple documents in a single request.
33
+ #
34
+ # @option [String] :index The name of the index
35
+ # @option [Hash] :body Document identifiers; can be either `docs` (specifying full document information)
36
+ # or `ids` (when index is provided). (*Required*)
37
+ # @option [String] :preference Specify the node or shard the operation should be performed on (default: random)
38
+ # @option [Boolean] :realtime Specify whether to perform the operation in realtime or search mode
39
+ # @option [Boolean] :refresh Refresh the shard containing the document before performing the operation
40
+ # @option [String] :routing Specific routing value
41
+ # @option [List] :stored_fields A comma-separated list of stored fields to return in the response
42
+ # @option [List] :_source True or false to return the _source field or not, or a list of fields to return
43
+ # @option [List] :_source_excludes A list of fields to exclude from the returned _source field
44
+ # @option [List] :_source_includes A list of fields to extract and return from the _source field
45
+ # @option [Hash] :headers Custom HTTP headers
46
+ #
47
+ # @see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html
48
+ #
49
+ def mget(index:, body:, **options)
50
+ Esse::Events.instrument('elasticsearch.mget') do |payload|
51
+ payload[:request] = opts = options.merge(index: index, body: body)
52
+ payload[:response] = coerce_exception { client.mget(**opts) }
53
+ end
54
+ end
55
+
32
56
  # Returns information about whether a document exists in an index.
33
57
  #
34
58
  # @option [String] :id The document ID
data/lib/esse/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Esse
4
- VERSION = '0.4.0.rc4'
4
+ VERSION = '0.4.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: esse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0.rc4
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Marcos G. Zimmermann
8
8
  autorequire:
9
9
  bindir: exec
10
10
  cert_chain: []
11
- date: 2024-10-07 00:00:00.000000000 Z
11
+ date: 2026-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: multi_json
@@ -226,6 +226,7 @@ files:
226
226
  - lib/esse/cli/index/open.rb
227
227
  - lib/esse/cli/index/reset.rb
228
228
  - lib/esse/cli/index/update_aliases.rb
229
+ - lib/esse/cli/index/update_lazy_attributes.rb
229
230
  - lib/esse/cli/index/update_mapping.rb
230
231
  - lib/esse/cli/index/update_settings.rb
231
232
  - lib/esse/cli/parser/bool_or_hash.rb
@@ -272,6 +273,7 @@ files:
272
273
  - lib/esse/index/mappings.rb
273
274
  - lib/esse/index/object_document_mapper.rb
274
275
  - lib/esse/index/plugins.rb
276
+ - lib/esse/index/request_configurable.rb
275
277
  - lib/esse/index/search.rb
276
278
  - lib/esse/index/settings.rb
277
279
  - lib/esse/index/type.rb
@@ -317,14 +319,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
317
319
  requirements:
318
320
  - - ">="
319
321
  - !ruby/object:Gem::Version
320
- version: 2.3.0
322
+ version: '2.7'
321
323
  required_rubygems_version: !ruby/object:Gem::Requirement
322
324
  requirements:
323
- - - ">"
325
+ - - ">="
324
326
  - !ruby/object:Gem::Version
325
- version: 1.3.1
327
+ version: '0'
326
328
  requirements: []
327
- rubygems_version: 3.0.3.1
329
+ rubygems_version: 3.4.10
328
330
  signing_key:
329
331
  specification_version: 4
330
332
  summary: Pure Ruby and framework-agnostic ElasticSearch/OpenSearch toolkit for building