esse 0.4.0.rc5 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/esse/events/bus.rb +7 -2
- data/lib/esse/events/publisher.rb +5 -0
- data/lib/esse/events.rb +1 -0
- data/lib/esse/index/documents.rb +31 -0
- data/lib/esse/index/settings.rb +12 -20
- data/lib/esse/index_setting.rb +43 -0
- data/lib/esse/search/query.rb +32 -8
- data/lib/esse/search/response.rb +3 -3
- data/lib/esse/transport/documents.rb +24 -0
- data/lib/esse/version.rb +1 -1
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9fc6f63594da661b35bc22ab0c83655ded02b8ebe2e20ec5612a0927934156e9
|
|
4
|
+
data.tar.gz: c46f9a70e262d40a9c60c7ce72be3e3b816d18c536f6e3fe770bd06c06c5f57b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 8525a1ba74d9452163e87e1acf1bf621654f2bb8b9a7811e6eca94fc452c47f9255808b2c45be701112be085f8d3176243a685fcfb6647a8e9845699a727911d
|
|
7
|
+
data.tar.gz: 3d0be64e631a47c07758341b34fc86eedffd76bbd12c4734016d9303c256d9c8470929fc564bdace28bb21f072e44d0193cbee06e8e808675a695b46aac92c38
|
data/lib/esse/events/bus.rb
CHANGED
|
@@ -92,9 +92,14 @@ module Esse
|
|
|
92
92
|
|
|
93
93
|
# @api private
|
|
94
94
|
def process(event_id, payload)
|
|
95
|
+
# Build the event once and reuse for all listeners. Previously, a new
|
|
96
|
+
# Event was created per listener via Event#payload which does
|
|
97
|
+
# @payload.merge(data) — a full hash copy each time. With 24+
|
|
98
|
+
# esse-rails subscribers, each search was creating 24+ Event objects
|
|
99
|
+
# with 24+ hash merges of the full payload (including Response with
|
|
100
|
+
# the entire OpenSearch JSON). Now it's 1 Event + 1 merge total.
|
|
101
|
+
event = events[event_id].payload(payload)
|
|
95
102
|
listeners[event_id].each do |listener|
|
|
96
|
-
event = events[event_id].payload(payload)
|
|
97
|
-
|
|
98
103
|
yield(event, listener)
|
|
99
104
|
end
|
|
100
105
|
end
|
|
@@ -60,6 +60,11 @@ module Esse
|
|
|
60
60
|
if publish_event
|
|
61
61
|
payload[:runtime] ||= Time.now - payload.delete(:__started_at__) if payload[:__started_at__]
|
|
62
62
|
__bus__.publish(event_id, payload)
|
|
63
|
+
# Release references to large objects (Query, Response with full
|
|
64
|
+
# OpenSearch JSON) so they become GC-eligible immediately after
|
|
65
|
+
# event dispatch, rather than being held until the caller's stack
|
|
66
|
+
# frame unwinds.
|
|
67
|
+
payload.clear
|
|
63
68
|
end
|
|
64
69
|
end
|
|
65
70
|
|
data/lib/esse/events.rb
CHANGED
|
@@ -56,6 +56,7 @@ module Esse
|
|
|
56
56
|
register_event 'elasticsearch.exist'
|
|
57
57
|
register_event 'elasticsearch.count'
|
|
58
58
|
register_event 'elasticsearch.get'
|
|
59
|
+
register_event 'elasticsearch.mget'
|
|
59
60
|
register_event 'elasticsearch.reindex'
|
|
60
61
|
register_event 'elasticsearch.update_by_query'
|
|
61
62
|
register_event 'elasticsearch.delete_by_query'
|
data/lib/esse/index/documents.rb
CHANGED
|
@@ -29,6 +29,37 @@ module Esse
|
|
|
29
29
|
cluster.api.get(**options)
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
+
# Retrieves multiple JSON documents by ID from an index.
|
|
33
|
+
#
|
|
34
|
+
# UsersIndex.mget(ids: [1, 2, 3])
|
|
35
|
+
# UsersIndex.mget(ids: [Esse::HashDocument.new(id: 1), Esse::HashDocument.new(id: 2)])
|
|
36
|
+
#
|
|
37
|
+
# @param ids [Array<Esse::Document, Hash, String, Integer>] documents, hashes, or IDs to retrieve
|
|
38
|
+
# @param options [Hash] Hash of paramenters that will be passed along to elasticsearch request
|
|
39
|
+
# @option [String, nil] :suffix The index suffix. Defaults to the nil.
|
|
40
|
+
# @return [Hash] The elasticsearch response with 'docs' array
|
|
41
|
+
#
|
|
42
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/7.5/docs-multi-get.html
|
|
43
|
+
def mget(ids:, suffix: nil, **options)
|
|
44
|
+
options[:body] = {
|
|
45
|
+
docs: ids.map do |doc|
|
|
46
|
+
if document?(doc)
|
|
47
|
+
datum = { _id: doc.id }
|
|
48
|
+
datum[:_type] = doc.type if doc.type?
|
|
49
|
+
datum[:routing] = doc.routing if doc.routing?
|
|
50
|
+
datum
|
|
51
|
+
elsif doc.is_a?(Hash)
|
|
52
|
+
doc
|
|
53
|
+
else
|
|
54
|
+
{ _id: doc }
|
|
55
|
+
end
|
|
56
|
+
end,
|
|
57
|
+
}
|
|
58
|
+
options[:index] = index_name(suffix: suffix)
|
|
59
|
+
cluster.may_update_type!(options)
|
|
60
|
+
cluster.api.mget(**options)
|
|
61
|
+
end
|
|
62
|
+
|
|
32
63
|
# Check if a JSON document exists
|
|
33
64
|
#
|
|
34
65
|
# UsersIndex.exist?(id: 1) # true
|
data/lib/esse/index/settings.rb
CHANGED
|
@@ -4,29 +4,21 @@ module Esse
|
|
|
4
4
|
# https://github.com/elastic/elasticsearch-ruby/blob/master/elasticsearch-api/lib/elasticsearch/api/actions/indices/put_settings.rb
|
|
5
5
|
class Index
|
|
6
6
|
module ClassMethods
|
|
7
|
-
#
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
INDEX_SIMPLIFIED_SETTINGS =
|
|
11
|
-
number_of_shards
|
|
12
|
-
number_of_replicas
|
|
13
|
-
refresh_interval
|
|
14
|
-
mapping
|
|
15
|
-
].freeze
|
|
7
|
+
# Backwards-compatible alias. The canonical list now lives on
|
|
8
|
+
# +Esse::IndexSetting::INDEX_SIMPLIFIED_SETTINGS+ so that the merge
|
|
9
|
+
# logic and the simplified-key promotion stay in sync.
|
|
10
|
+
INDEX_SIMPLIFIED_SETTINGS = Esse::IndexSetting::INDEX_SIMPLIFIED_SETTINGS
|
|
16
11
|
|
|
17
12
|
def settings_hash(settings: nil)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
INDEX_SIMPLIFIED_SETTINGS.each do |key|
|
|
25
|
-
next unless values.key?(key)
|
|
26
|
-
value = values.delete(key)
|
|
27
|
-
next if value.nil?
|
|
13
|
+
# Normalize each side (global vs local) separately before merging so
|
|
14
|
+
# a flat global key (e.g. top-level :number_of_shards) cannot clobber
|
|
15
|
+
# an explicit nested local value (e.g. :index => { :number_of_shards => 8 }).
|
|
16
|
+
global = Esse::IndexSetting.normalize(setting.globals)
|
|
17
|
+
local = Esse::IndexSetting.normalize(setting.to_h)
|
|
18
|
+
values = HashUtils.deep_merge(global, local)
|
|
28
19
|
|
|
29
|
-
|
|
20
|
+
if settings.is_a?(Hash)
|
|
21
|
+
values = HashUtils.deep_merge(values, Esse::IndexSetting.normalize(settings))
|
|
30
22
|
end
|
|
31
23
|
|
|
32
24
|
if values[:index].is_a?(Hash)
|
data/lib/esse/index_setting.rb
CHANGED
|
@@ -3,6 +3,17 @@
|
|
|
3
3
|
module Esse
|
|
4
4
|
# https://www.elastic.co/guide/en/elasticsearch/reference/1.7/indices.html
|
|
5
5
|
class IndexSetting
|
|
6
|
+
# Top-level keys that Elasticsearch/OpenSearch accept either flat or nested
|
|
7
|
+
# under `index:`. We always promote them to the nested form so that values
|
|
8
|
+
# from different sources (cluster globals vs per-index template) merge
|
|
9
|
+
# predictably regardless of which form each side was authored in.
|
|
10
|
+
INDEX_SIMPLIFIED_SETTINGS = %i[
|
|
11
|
+
number_of_shards
|
|
12
|
+
number_of_replicas
|
|
13
|
+
refresh_interval
|
|
14
|
+
mapping
|
|
15
|
+
].freeze
|
|
16
|
+
|
|
6
17
|
# @param [Hash] options
|
|
7
18
|
# @option options [Proc] :globals A proc that will be called to load global settings
|
|
8
19
|
# @option options [Array] :paths A list of paths to load settings from
|
|
@@ -35,6 +46,38 @@ module Esse
|
|
|
35
46
|
HashUtils.deep_merge(global, local)
|
|
36
47
|
end
|
|
37
48
|
|
|
49
|
+
# Returns the raw (unsymbolized) global settings as supplied by the
|
|
50
|
+
# +globals+ proc. Public so that callers like
|
|
51
|
+
# +Esse::Index.settings_hash+ can normalize it independently before
|
|
52
|
+
# merging it with the local template — preventing a flat global value
|
|
53
|
+
# from clobbering a nested local value once both are merged.
|
|
54
|
+
def globals
|
|
55
|
+
@globals.call || {}
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Normalize a settings hash by:
|
|
59
|
+
# * symbolizing keys
|
|
60
|
+
# * stripping the `:settings` root if present
|
|
61
|
+
# * exploding dotted keys ('index.number_of_replicas' -> { index: { number_of_replicas: ... } })
|
|
62
|
+
# * promoting simplified flat keys (number_of_shards, etc.) into the
|
|
63
|
+
# nested `:index` form, preserving any value already present under
|
|
64
|
+
# `:index` (we never overwrite an explicit nested setting with a
|
|
65
|
+
# flat value from the same source).
|
|
66
|
+
def self.normalize(hash)
|
|
67
|
+
values = HashUtils.deep_transform_keys(hash || {}, &:to_sym)
|
|
68
|
+
values = values[Esse::SETTING_ROOT_KEY] if values.key?(Esse::SETTING_ROOT_KEY)
|
|
69
|
+
values = HashUtils.explode_keys(values)
|
|
70
|
+
INDEX_SIMPLIFIED_SETTINGS.each do |key|
|
|
71
|
+
next unless values.key?(key)
|
|
72
|
+
value = values.delete(key)
|
|
73
|
+
next if value.nil?
|
|
74
|
+
|
|
75
|
+
values[:index] ||= {}
|
|
76
|
+
values[:index][key] = value unless values[:index].key?(key)
|
|
77
|
+
end
|
|
78
|
+
values
|
|
79
|
+
end
|
|
80
|
+
|
|
38
81
|
protected
|
|
39
82
|
|
|
40
83
|
def from_template
|
data/lib/esse/search/query.rb
CHANGED
|
@@ -40,7 +40,7 @@ module Esse
|
|
|
40
40
|
|
|
41
41
|
def scroll_hits(batch_size: 1_000, scroll: '1m')
|
|
42
42
|
response = execute_search_query!(size: batch_size, scroll: scroll)
|
|
43
|
-
scroll_id =
|
|
43
|
+
scroll_id = response.raw_response['scroll_id'] || response.raw_response['_scroll_id']
|
|
44
44
|
fetched = 0
|
|
45
45
|
total = response.total
|
|
46
46
|
|
|
@@ -48,9 +48,9 @@ module Esse
|
|
|
48
48
|
fetched += response.hits.size
|
|
49
49
|
yield(response.hits) if response.hits.any?
|
|
50
50
|
break if fetched >= total
|
|
51
|
-
scroll_id = response.raw_response['scroll_id'] || response.raw_response['_scroll_id']
|
|
52
51
|
break unless scroll_id
|
|
53
52
|
response = execute_scroll_query(scroll: scroll, scroll_id: scroll_id)
|
|
53
|
+
scroll_id = response.raw_response['scroll_id'] || response.raw_response['_scroll_id']
|
|
54
54
|
end
|
|
55
55
|
ensure
|
|
56
56
|
begin
|
|
@@ -59,12 +59,40 @@ module Esse
|
|
|
59
59
|
end
|
|
60
60
|
end
|
|
61
61
|
|
|
62
|
+
def search_after_hits(batch_size: 1_000)
|
|
63
|
+
body = HashUtils.deep_dup(definition.fetch(:body, {}))
|
|
64
|
+
body[:size] = batch_size
|
|
65
|
+
body.delete(:from)
|
|
66
|
+
body.delete('from')
|
|
67
|
+
|
|
68
|
+
unless body.key?(:sort) || body.key?('sort')
|
|
69
|
+
raise ArgumentError, 'The query body must include a :sort to use search_after'
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
loop do
|
|
73
|
+
response = execute_search_query!(body: body)
|
|
74
|
+
break if response.hits.empty?
|
|
75
|
+
|
|
76
|
+
yield(response.hits)
|
|
77
|
+
|
|
78
|
+
last_sort = response.hits.last['sort']
|
|
79
|
+
break unless last_sort
|
|
80
|
+
break if response.hits.size < batch_size
|
|
81
|
+
|
|
82
|
+
body[:search_after] = last_sort
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def reset!
|
|
87
|
+
@response = nil
|
|
88
|
+
end
|
|
89
|
+
|
|
62
90
|
private
|
|
63
91
|
|
|
64
92
|
def execute_search_query!(**execution_options)
|
|
65
93
|
resp, err = nil
|
|
66
94
|
Esse::Events.instrument('elasticsearch.execute_search_query') do |payload|
|
|
67
|
-
payload[:
|
|
95
|
+
payload[:query_definition] = definition
|
|
68
96
|
begin
|
|
69
97
|
resp = Response.new(self, transport.search(**definition, **execution_options))
|
|
70
98
|
rescue => e
|
|
@@ -81,7 +109,7 @@ module Esse
|
|
|
81
109
|
def execute_scroll_query(scroll:, scroll_id:)
|
|
82
110
|
resp, err = nil
|
|
83
111
|
Esse::Events.instrument('elasticsearch.execute_search_query') do |payload|
|
|
84
|
-
payload[:
|
|
112
|
+
payload[:query_definition] = definition
|
|
85
113
|
begin
|
|
86
114
|
resp = Response.new(self, transport.scroll(scroll: scroll, body: { scroll_id: scroll_id }))
|
|
87
115
|
rescue => e
|
|
@@ -94,10 +122,6 @@ module Esse
|
|
|
94
122
|
|
|
95
123
|
resp
|
|
96
124
|
end
|
|
97
|
-
|
|
98
|
-
def reset!
|
|
99
|
-
@response = nil
|
|
100
|
-
end
|
|
101
125
|
end
|
|
102
126
|
end
|
|
103
127
|
end
|
data/lib/esse/search/response.rb
CHANGED
|
@@ -7,13 +7,13 @@ module Esse
|
|
|
7
7
|
extend Forwardable
|
|
8
8
|
|
|
9
9
|
def_delegators :hits, :each, :size, :empty?
|
|
10
|
-
attr_reader :
|
|
10
|
+
attr_reader :query_definition, :raw_response, :options
|
|
11
11
|
|
|
12
|
-
# @param [Esse::Search::Query] query The search query
|
|
12
|
+
# @param [Esse::Search::Query, Hash] query The search query or its definition hash
|
|
13
13
|
# @param [Hash] raw_response The raw response from Elasticsearch
|
|
14
14
|
# @param [Hash] options The options passed to the search
|
|
15
15
|
def initialize(query, raw_response, **options)
|
|
16
|
-
@
|
|
16
|
+
@query_definition = query.is_a?(Hash) ? query : query.definition
|
|
17
17
|
@raw_response = raw_response
|
|
18
18
|
@options = options
|
|
19
19
|
end
|
|
@@ -29,6 +29,30 @@ module Esse
|
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
+
# Allows to get multiple documents in a single request.
|
|
33
|
+
#
|
|
34
|
+
# @option [String] :index The name of the index
|
|
35
|
+
# @option [Hash] :body Document identifiers; can be either `docs` (specifying full document information)
|
|
36
|
+
# or `ids` (when index is provided). (*Required*)
|
|
37
|
+
# @option [String] :preference Specify the node or shard the operation should be performed on (default: random)
|
|
38
|
+
# @option [Boolean] :realtime Specify whether to perform the operation in realtime or search mode
|
|
39
|
+
# @option [Boolean] :refresh Refresh the shard containing the document before performing the operation
|
|
40
|
+
# @option [String] :routing Specific routing value
|
|
41
|
+
# @option [List] :stored_fields A comma-separated list of stored fields to return in the response
|
|
42
|
+
# @option [List] :_source True or false to return the _source field or not, or a list of fields to return
|
|
43
|
+
# @option [List] :_source_excludes A list of fields to exclude from the returned _source field
|
|
44
|
+
# @option [List] :_source_includes A list of fields to extract and return from the _source field
|
|
45
|
+
# @option [Hash] :headers Custom HTTP headers
|
|
46
|
+
#
|
|
47
|
+
# @see https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-multi-get.html
|
|
48
|
+
#
|
|
49
|
+
def mget(index:, body:, **options)
|
|
50
|
+
Esse::Events.instrument('elasticsearch.mget') do |payload|
|
|
51
|
+
payload[:request] = opts = options.merge(index: index, body: body)
|
|
52
|
+
payload[:response] = coerce_exception { client.mget(**opts) }
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
32
56
|
# Returns information about whether a document exists in an index.
|
|
33
57
|
#
|
|
34
58
|
# @option [String] :id The document ID
|
data/lib/esse/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: esse
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Marcos G. Zimmermann
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exec
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2026-04-29 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: multi_json
|
|
@@ -319,14 +319,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
319
319
|
requirements:
|
|
320
320
|
- - ">="
|
|
321
321
|
- !ruby/object:Gem::Version
|
|
322
|
-
version: 2.
|
|
322
|
+
version: '2.7'
|
|
323
323
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
324
324
|
requirements:
|
|
325
|
-
- - "
|
|
325
|
+
- - ">="
|
|
326
326
|
- !ruby/object:Gem::Version
|
|
327
|
-
version:
|
|
327
|
+
version: '0'
|
|
328
328
|
requirements: []
|
|
329
|
-
rubygems_version: 3.
|
|
329
|
+
rubygems_version: 3.4.10
|
|
330
330
|
signing_key:
|
|
331
331
|
specification_version: 4
|
|
332
332
|
summary: Pure Ruby and framework-agnostic ElasticSearch/OpenSearch toolkit for building
|