elasticgraph-indexer 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +1 -0
- data/elasticgraph-indexer.gemspec +24 -0
- data/lib/elastic_graph/indexer/config.rb +48 -0
- data/lib/elastic_graph/indexer/datastore_indexing_router.rb +408 -0
- data/lib/elastic_graph/indexer/event_id.rb +32 -0
- data/lib/elastic_graph/indexer/failed_event_error.rb +83 -0
- data/lib/elastic_graph/indexer/hash_differ.rb +37 -0
- data/lib/elastic_graph/indexer/indexing_failures_error.rb +28 -0
- data/lib/elastic_graph/indexer/indexing_preparers/integer.rb +41 -0
- data/lib/elastic_graph/indexer/indexing_preparers/no_op.rb +19 -0
- data/lib/elastic_graph/indexer/indexing_preparers/untyped.rb +22 -0
- data/lib/elastic_graph/indexer/operation/count_accumulator.rb +166 -0
- data/lib/elastic_graph/indexer/operation/factory.rb +226 -0
- data/lib/elastic_graph/indexer/operation/result.rb +76 -0
- data/lib/elastic_graph/indexer/operation/update.rb +160 -0
- data/lib/elastic_graph/indexer/operation/upsert.rb +71 -0
- data/lib/elastic_graph/indexer/processor.rb +137 -0
- data/lib/elastic_graph/indexer/record_preparer.rb +163 -0
- data/lib/elastic_graph/indexer/spec_support/event_matcher.rb +44 -0
- data/lib/elastic_graph/indexer/test_support/converters.rb +36 -0
- data/lib/elastic_graph/indexer.rb +98 -0
- metadata +454 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
class IndexingFailuresError < Error
|
14
|
+
# Builds an `IndexingFailuresError` with a nicely formatted message for the given array of `FailedEventError`.
|
15
|
+
def self.for(failures:, events:)
|
16
|
+
summary = "Got #{failures.size} failure(s) from #{events.size} event(s):"
|
17
|
+
failure_details = failures.map.with_index { |failure, index| "#{index + 1}) #{failure.message}" }
|
18
|
+
|
19
|
+
message_ids = failures.filter_map { |f| f.event["message_id"] }.uniq
|
20
|
+
if message_ids.any?
|
21
|
+
message_details = "These failures came from #{message_ids.size} message(s): #{message_ids.join(", ")}."
|
22
|
+
end
|
23
|
+
|
24
|
+
new([summary, failure_details, message_details].compact.join("\n\n"))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class Indexer
|
11
|
+
module IndexingPreparers
|
12
|
+
class Integer
|
13
|
+
# Here we coerce an integer-valued float like `3.0` to a true integer (e.g. `3`).
|
14
|
+
# This is necessary because:
|
15
|
+
#
|
16
|
+
# 1. If a field is an integer in the datastore mapping, it does not tolerate it coming in
|
17
|
+
# as a float, even if it is integer-valued.
|
18
|
+
# 2. While we use JSON schema to validate event payloads before we get here, JSON schema
|
19
|
+
# cannot consistently enforce that we receive true integers for int fields.
|
20
|
+
#
|
21
|
+
# As https://json-schema.org/understanding-json-schema/reference/numeric.html#integer explains:
|
22
|
+
#
|
23
|
+
# > **Warning**
|
24
|
+
# >
|
25
|
+
# > The precise treatment of the “integer” type may depend on the implementation of your
|
26
|
+
# > JSON Schema validator. JavaScript (and thus also JSON) does not have distinct types
|
27
|
+
# > for integers and floating-point values. Therefore, JSON Schema can not use type alone
|
28
|
+
# > to distinguish between integers and non-integers. The JSON Schema specification
|
29
|
+
# > recommends, but does not require, that validators use the mathematical value to
|
30
|
+
# > determine whether a number is an integer, and not the type alone. Therefore, there
|
31
|
+
# > is some disagreement between validators on this point. For example, a JavaScript-based
|
32
|
+
# > validator may accept 1.0 as an integer, whereas the Python-based jsonschema does not.
|
33
|
+
def self.prepare_for_indexing(value)
|
34
|
+
integer = value.to_i
|
35
|
+
return integer if value == integer
|
36
|
+
raise IndexOperationError, "Cannot safely coerce `#{value.inspect}` to an integer"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class Indexer
|
11
|
+
module IndexingPreparers
|
12
|
+
class NoOp
|
13
|
+
def self.prepare_for_indexing(value)
|
14
|
+
value
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/support/untyped_encoder"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
module IndexingPreparers
|
14
|
+
class Untyped
|
15
|
+
# Converts the given untyped value to a String so it can be indexed in a `keyword` field.
|
16
|
+
def self.prepare_for_indexing(value)
|
17
|
+
Support::UntypedEncoder.encode(value)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
module Operation
|
14
|
+
# Responsible for maintaining state and accumulating list counts while we traverse the `data` we are preparing
|
15
|
+
# to update in the index. Much of the complexity here is due to the fact that we have 3 kinds of list fields:
|
16
|
+
# scalar lists, embedded object lists, and `nested` object lists.
|
17
|
+
#
|
18
|
+
# The Elasticsearch/OpenSearch `nested` type[^1] indexes objects of this type as separate hidden documents. As a result,
|
19
|
+
# each `nested` object type gets its own `__counts` field. In contrast, embedded object lists get flattened into separate
|
20
|
+
# entries (one per field path) in a flat map (with `dot_separated_path: values_at_path` entries) at the document root.
|
21
|
+
#
|
22
|
+
# We mirror this structure with our `__counts`: each document (either a root document, or a hidden `nested` document)
|
23
|
+
# gets its own `__counts` field, so we essentially have multiple "count parents". Each `__counts` field is a map,
|
24
|
+
# keyed by field paths, and containing the number of list elements at that field path after the flattening has
|
25
|
+
# occurred.
|
26
|
+
#
|
27
|
+
# The index mapping defines where the `__counts` fields go. This abstraction uses the mapping to determine when
|
28
|
+
# it needs to create a new "count parent".
|
29
|
+
#
|
30
|
+
# Note: instances of this class are "shallow immutable" (none of the attributes of an instance can be reassigned)
|
31
|
+
# but the `counts` attribute is itself a mutable hash--we use it to accumulate the list counts as we traverse the
|
32
|
+
# structure.
|
33
|
+
#
|
34
|
+
# [^1]: https://www.elastic.co/guide/en/elasticsearch/reference/8.9/nested.html
|
35
|
+
CountAccumulator = ::Data.define(
|
36
|
+
# Hash containing the counts we have accumulated so far. This hash gets mutated as we accumulate,
|
37
|
+
# and multiple accumulator instances share the same hash instance. However, a new `counts` hash will
|
38
|
+
# be created when we reach a new parent.
|
39
|
+
:counts,
|
40
|
+
# String describing our current location in the traversed structure relative to the current parent.
|
41
|
+
# This gets replaced on new accumulator instances as we traverse the data structure.
|
42
|
+
:path_from_parent,
|
43
|
+
# String describing our current location in the traversed structure relative to the overall document root.
|
44
|
+
# This gets replaced on new accumulator instances as we traverse the data structure.
|
45
|
+
:path_from_root,
|
46
|
+
# The index mapping at the current level of the structure when this accumulator instance was created.
|
47
|
+
# As we traverse new levels of the data structure, new `CountAccumulator` instances will be created with
|
48
|
+
# the `mapping` updated to reflect the new level of the structure we are at.
|
49
|
+
:mapping,
|
50
|
+
# Set of field paths to subfields of `LIST_COUNTS_FIELD` for the current source relationship.
|
51
|
+
# This will be used to determine which subfields of the `LIST_COUNTS_FIELD` are populated.
|
52
|
+
:list_counts_field_paths_for_source,
|
53
|
+
# Indicates if our current path is underneath a list; if so, `maybe_increment` will increment when called.
|
54
|
+
:has_list_ancestor
|
55
|
+
) do
|
56
|
+
# @implements CountAccumulator
|
57
|
+
def self.merge_list_counts_into(params, mapping:, list_counts_field_paths_for_source:)
|
58
|
+
# Here we compute the counts of our list elements so that we can index it.
|
59
|
+
data = compute_list_counts_of(params.fetch("data"), CountAccumulator.new_parent(
|
60
|
+
# We merge in `type: nested` since the `nested` type indicates a new count accumulator parent and we want that applied at the root.
|
61
|
+
mapping.merge("type" => "nested"),
|
62
|
+
list_counts_field_paths_for_source
|
63
|
+
))
|
64
|
+
|
65
|
+
# The root `__counts` field needs special handling due to our `sourced_from` feature. Anything in `data`
|
66
|
+
# will overwrite what's in the specified fields when the script executes, but since there could be list
|
67
|
+
# fields from multiple sources, we need `__counts` to get merged properly. So here we "promote" it from
|
68
|
+
# `data.__counts` to being a root-level parameter.
|
69
|
+
params.merge(
|
70
|
+
"data" => data.except(LIST_COUNTS_FIELD),
|
71
|
+
LIST_COUNTS_FIELD => data[LIST_COUNTS_FIELD]
|
72
|
+
)
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.compute_list_counts_of(value, parent_accumulator)
|
76
|
+
case value
|
77
|
+
when nil
|
78
|
+
value
|
79
|
+
when ::Hash
|
80
|
+
parent_accumulator.maybe_increment
|
81
|
+
parent_accumulator.process_hash(value) do |key, subvalue, accumulator|
|
82
|
+
[key, compute_list_counts_of(subvalue, accumulator[key])]
|
83
|
+
end
|
84
|
+
when ::Array
|
85
|
+
parent_accumulator.process_list(value) do |element, accumulator|
|
86
|
+
compute_list_counts_of(element, accumulator)
|
87
|
+
end
|
88
|
+
else
|
89
|
+
parent_accumulator.maybe_increment
|
90
|
+
value
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Creates an initially empty accumulator instance for a new parent (either at the overall document
|
95
|
+
# root are at the root of a `nested` object).
|
96
|
+
def self.new_parent(mapping, list_counts_field_paths_for_source, path_from_root: nil)
|
97
|
+
count_field_prefix = path_from_root ? "#{path_from_root}.#{LIST_COUNTS_FIELD}." : "#{LIST_COUNTS_FIELD}."
|
98
|
+
|
99
|
+
initial_counts = (mapping.dig("properties", LIST_COUNTS_FIELD, "properties") || {}).filter_map do |field, _|
|
100
|
+
[field, 0] if list_counts_field_paths_for_source.include?(count_field_prefix + field)
|
101
|
+
end.to_h
|
102
|
+
|
103
|
+
new(initial_counts, nil, path_from_root, mapping, list_counts_field_paths_for_source, false)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Processes the given hash, beginning a new parent if need. A new parent is needed if the
|
107
|
+
# current mapping has a `__counts` field.
|
108
|
+
#
|
109
|
+
# Yields repeatedly (once per hash entry). We yield the entry key/value, and an accumulator
|
110
|
+
# instance (either the current `self` or a new parent).
|
111
|
+
#
|
112
|
+
# Afterwards, merges the resulting `__counts` into the hash before it's returned, as needed.
|
113
|
+
def process_hash(hash)
|
114
|
+
mapping_type = mapping["type"]
|
115
|
+
|
116
|
+
# As we traverse through the JSON object structure, we also have to traverse through the
|
117
|
+
# condenseed mapping. Doing this requires that the `properties` of the index mapping
|
118
|
+
# match the fields of the JSON data structure. However, Elasticsearch/OpenSearch have a number of field
|
119
|
+
# types which can be represented as a JSON object in an indexing call, but which have no
|
120
|
+
# `properties` in the mapping. We can't successfully traverse through the JSON data and the
|
121
|
+
# mapping when we encounter these field types (since the mapping has no record of the
|
122
|
+
# subfields) so we must treat these types as a special case; we can't proceed, and we won't
|
123
|
+
# have any lists to count, anyway.
|
124
|
+
return hash if DATASTORE_PROPERTYLESS_OBJECT_TYPES.include?(mapping_type)
|
125
|
+
|
126
|
+
# THe `nested` type indicates a new document level, so if it's not `nested`, we should process the hash without making a new parent.
|
127
|
+
return hash.to_h { |key, value| yield key, value, self } unless mapping_type == "nested"
|
128
|
+
|
129
|
+
# ...but otherwise, we should make a new parent.
|
130
|
+
new_parent = CountAccumulator.new_parent(mapping, list_counts_field_paths_for_source, path_from_root: path_from_root)
|
131
|
+
updated_hash = hash.to_h { |key, value| yield key, value, new_parent }
|
132
|
+
|
133
|
+
# If we have a LIST_COUNTS_FIELD at this level of our mapping, we should merge in the counts hash from the new parent.
|
134
|
+
if mapping.dig("properties", LIST_COUNTS_FIELD)
|
135
|
+
updated_hash.merge(LIST_COUNTS_FIELD => new_parent.counts)
|
136
|
+
else
|
137
|
+
updated_hash
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Processes the given list, tracking the fact that subpaths have a list ancestor.
|
142
|
+
def process_list(list)
|
143
|
+
child_accumulator = with(has_list_ancestor: true)
|
144
|
+
list.map { |value| yield value, child_accumulator }
|
145
|
+
end
|
146
|
+
|
147
|
+
# Increments the count at the current `path_from_parent` in the current parent's counts hash if we are under a list.
|
148
|
+
def maybe_increment
|
149
|
+
return unless has_list_ancestor
|
150
|
+
|
151
|
+
key = path_from_parent.to_s
|
152
|
+
counts[key] = counts.fetch(key) + 1
|
153
|
+
end
|
154
|
+
|
155
|
+
# Creates a "child" accumulator at the given subpath. Should be used as we traverse the data structure.
|
156
|
+
def [](subpath)
|
157
|
+
with(
|
158
|
+
path_from_parent: path_from_parent ? "#{path_from_parent}#{LIST_COUNTS_FIELD_PATH_KEY_SEPARATOR}#{subpath}" : subpath,
|
159
|
+
path_from_root: path_from_root ? "#{path_from_root}.#{subpath}" : subpath,
|
160
|
+
mapping: mapping.fetch("properties").fetch(subpath)
|
161
|
+
)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/indexer/event_id"
|
11
|
+
require "elastic_graph/indexer/failed_event_error"
|
12
|
+
require "elastic_graph/indexer/operation/update"
|
13
|
+
require "elastic_graph/indexer/operation/upsert"
|
14
|
+
require "elastic_graph/indexer/record_preparer"
|
15
|
+
require "elastic_graph/json_schema/validator_factory"
|
16
|
+
require "elastic_graph/support/memoizable_data"
|
17
|
+
|
18
|
+
module ElasticGraph
|
19
|
+
class Indexer
|
20
|
+
module Operation
|
21
|
+
class Factory < Support::MemoizableData.define(
|
22
|
+
:schema_artifacts,
|
23
|
+
:index_definitions_by_graphql_type,
|
24
|
+
:record_preparer_factory,
|
25
|
+
:logger,
|
26
|
+
:skip_derived_indexing_type_updates,
|
27
|
+
:configure_record_validator
|
28
|
+
)
|
29
|
+
def build(event)
|
30
|
+
event = prepare_event(event)
|
31
|
+
|
32
|
+
selected_json_schema_version = select_json_schema_version(event) { |failure| return failure }
|
33
|
+
|
34
|
+
# Because the `select_json_schema_version` picks the closest-matching json schema version, the incoming
|
35
|
+
# event might not match the expected json_schema_version value in the json schema (which is a `const` field).
|
36
|
+
# This is by design, since we're picking a schema based on best-effort, so to avoid that by-design validation error,
|
37
|
+
# performing the envelope validation on a "patched" version of the event.
|
38
|
+
event_with_patched_envelope = event.merge({JSON_SCHEMA_VERSION_KEY => selected_json_schema_version})
|
39
|
+
|
40
|
+
if (error_message = validator(EVENT_ENVELOPE_JSON_SCHEMA_NAME, selected_json_schema_version).validate_with_error_message(event_with_patched_envelope))
|
41
|
+
return build_failed_result(event, "event payload", error_message)
|
42
|
+
end
|
43
|
+
|
44
|
+
failed_result = validate_record_returning_failure(event, selected_json_schema_version)
|
45
|
+
failed_result || BuildResult.success(build_all_operations_for(
|
46
|
+
event,
|
47
|
+
record_preparer_factory.for_json_schema_version(selected_json_schema_version)
|
48
|
+
))
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def select_json_schema_version(event)
|
54
|
+
available_json_schema_versions = schema_artifacts.available_json_schema_versions
|
55
|
+
|
56
|
+
requested_json_schema_version = event[JSON_SCHEMA_VERSION_KEY]
|
57
|
+
|
58
|
+
# First check that a valid value has been requested (a positive integer)
|
59
|
+
if !event.key?(JSON_SCHEMA_VERSION_KEY)
|
60
|
+
yield build_failed_result(event, JSON_SCHEMA_VERSION_KEY, "Event lacks a `#{JSON_SCHEMA_VERSION_KEY}`")
|
61
|
+
elsif !requested_json_schema_version.is_a?(Integer) || requested_json_schema_version < 1
|
62
|
+
yield build_failed_result(event, JSON_SCHEMA_VERSION_KEY, "#{JSON_SCHEMA_VERSION_KEY} (#{requested_json_schema_version}) must be a positive integer.")
|
63
|
+
end
|
64
|
+
|
65
|
+
# The requested version might not necessarily be available (if the publisher is deployed ahead of the indexer, or an old schema
|
66
|
+
# version is removed prematurely, or an indexer deployment is rolled back). So the behavior is to always pick the closest-available
|
67
|
+
# version. If there's an exact match, great. Even if not an exact match, if the incoming event payload conforms to the closest match,
|
68
|
+
# the event can still be indexed.
|
69
|
+
#
|
70
|
+
# This min_by block will take the closest version in the list. If a tie occurs, the first value in the list wins. The desired
|
71
|
+
# behavior is in the event of a tie (highly unlikely, there shouldn't be a gap in available json schema versions), the higher version
|
72
|
+
# should be selected. So to get that behavior, the list is sorted in descending order.
|
73
|
+
#
|
74
|
+
selected_json_schema_version = available_json_schema_versions.sort.reverse.min_by { |it| (requested_json_schema_version - it).abs }
|
75
|
+
|
76
|
+
if selected_json_schema_version != requested_json_schema_version
|
77
|
+
logger.info({
|
78
|
+
"message_type" => "ElasticGraphMissingJSONSchemaVersion",
|
79
|
+
"message_id" => event["message_id"],
|
80
|
+
"event_id" => EventID.from_event(event),
|
81
|
+
"event_type" => event["type"],
|
82
|
+
"requested_json_schema_version" => requested_json_schema_version,
|
83
|
+
"selected_json_schema_version" => selected_json_schema_version
|
84
|
+
})
|
85
|
+
end
|
86
|
+
|
87
|
+
if selected_json_schema_version.nil?
|
88
|
+
yield build_failed_result(
|
89
|
+
event, JSON_SCHEMA_VERSION_KEY,
|
90
|
+
"Failed to select json schema version. Requested version: #{event[JSON_SCHEMA_VERSION_KEY]}. \
|
91
|
+
Available json schema versions: #{available_json_schema_versions.sort.join(", ")}"
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
selected_json_schema_version
|
96
|
+
end
|
97
|
+
|
98
|
+
def validator(type, selected_json_schema_version)
|
99
|
+
factory = validator_factories_by_version[selected_json_schema_version]
|
100
|
+
factory.validator_for(type)
|
101
|
+
end
|
102
|
+
|
103
|
+
def validator_factories_by_version
|
104
|
+
@validator_factories_by_version ||= ::Hash.new do |hash, json_schema_version|
|
105
|
+
factory = JSONSchema::ValidatorFactory.new(
|
106
|
+
schema: schema_artifacts.json_schemas_for(json_schema_version),
|
107
|
+
sanitize_pii: true
|
108
|
+
)
|
109
|
+
factory = configure_record_validator.call(factory) if configure_record_validator
|
110
|
+
hash[json_schema_version] = factory
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# This copies the `id` from event into the actual record
|
115
|
+
# This is necessary because we want to index `id` as part of the record so that the datastore will include `id` in returned search payloads.
|
116
|
+
def prepare_event(event)
|
117
|
+
return event unless event["record"].is_a?(::Hash) && event["id"]
|
118
|
+
event.merge("record" => event["record"].merge("id" => event.fetch("id")))
|
119
|
+
end
|
120
|
+
|
121
|
+
def validate_record_returning_failure(event, selected_json_schema_version)
|
122
|
+
record = event.fetch("record")
|
123
|
+
graphql_type_name = event.fetch("type")
|
124
|
+
validator = validator(graphql_type_name, selected_json_schema_version)
|
125
|
+
|
126
|
+
if (error_message = validator.validate_with_error_message(record))
|
127
|
+
build_failed_result(event, "#{graphql_type_name} record", error_message)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def build_failed_result(event, payload_description, validation_message)
|
132
|
+
message = "Malformed #{payload_description}. #{validation_message}"
|
133
|
+
|
134
|
+
# Here we use the `RecordPreparer::Identity` record preparer because we may not have a valid JSON schema
|
135
|
+
# version number in this case (which is usually required to get a `RecordPreparer` from the factory), and
|
136
|
+
# we won't wind up using the record preparer for real on these operations, anyway.
|
137
|
+
operations = build_all_operations_for(event, RecordPreparer::Identity)
|
138
|
+
|
139
|
+
BuildResult.failure(FailedEventError.new(event: event, operations: operations.to_set, main_message: message))
|
140
|
+
end
|
141
|
+
|
142
|
+
def build_all_operations_for(event, record_preparer)
|
143
|
+
upsert_operations(event, record_preparer) + update_operations(event, record_preparer)
|
144
|
+
end
|
145
|
+
|
146
|
+
def upsert_operations(event, record_preparer)
|
147
|
+
type = event.fetch("type") do
|
148
|
+
# This key should only be missing on invalid events. We still want to build operations
|
149
|
+
# for the event (to put it in the `FailedEventError`) but in this case we can't build
|
150
|
+
# any because we don't know what indices to target.
|
151
|
+
return []
|
152
|
+
end
|
153
|
+
|
154
|
+
index_definitions_for(type).reject(&:use_updates_for_indexing?).map do |index_definition|
|
155
|
+
Upsert.new(event, index_definition, record_preparer)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def update_operations(event, record_preparer)
|
160
|
+
# If `type` is missing or is not a known type (as indicated by `runtime_metadata` being nil)
|
161
|
+
# then we can't build a derived indexing type update operation. That case will only happen when we build
|
162
|
+
# operations for an `FailedEventError` rather than to execute.
|
163
|
+
return [] unless (type = event["type"])
|
164
|
+
return [] unless (runtime_metadata = schema_artifacts.runtime_metadata.object_types_by_name[type])
|
165
|
+
|
166
|
+
runtime_metadata.update_targets.flat_map do |update_target|
|
167
|
+
ids_to_skip = skip_derived_indexing_type_updates.fetch(update_target.type, ::Set.new)
|
168
|
+
|
169
|
+
index_definitions_for(update_target.type).flat_map do |destination_index_def|
|
170
|
+
operations = Update.operations_for(
|
171
|
+
event: event,
|
172
|
+
destination_index_def: destination_index_def,
|
173
|
+
record_preparer: record_preparer,
|
174
|
+
update_target: update_target,
|
175
|
+
destination_index_mapping: schema_artifacts.index_mappings_by_index_def_name.fetch(destination_index_def.name)
|
176
|
+
)
|
177
|
+
|
178
|
+
operations.reject do |op|
|
179
|
+
ids_to_skip.include?(op.doc_id).tap do |skipped|
|
180
|
+
if skipped
|
181
|
+
logger.info({
|
182
|
+
"message_type" => "SkippingUpdate",
|
183
|
+
"message_id" => event["message_id"],
|
184
|
+
"update_target" => update_target.type,
|
185
|
+
"id" => op.doc_id,
|
186
|
+
"event_id" => EventID.from_event(event).to_s
|
187
|
+
})
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def index_definitions_for(type)
|
196
|
+
# If `type` is missing or is not a known type (as indicated by not being in this hash)
|
197
|
+
# then we return an empty list. That case will only happen when we build
|
198
|
+
# operations for an `FailedEventError` rather than to execute.
|
199
|
+
index_definitions_by_graphql_type[type] || []
|
200
|
+
end
|
201
|
+
|
202
|
+
# :nocov: -- this should not be called. Instead, it exists to guard against wrongly raising an error from this class.
|
203
|
+
def raise(*args)
|
204
|
+
super("`raise` was called on `Operation::Factory`, but should not. Instead, use " \
|
205
|
+
"`yield build_failed_result(...)` so that we can accumulate all invalid events and allow " \
|
206
|
+
"the valid events to still be processed.")
|
207
|
+
end
|
208
|
+
# :nocov:
|
209
|
+
|
210
|
+
# Return value from `build` that indicates what happened.
|
211
|
+
# - If it was successful, `operations` will be a non-empty array of operations and `failed_event_error` will be nil.
|
212
|
+
# - If there was a validation issue, `operations` will be an empty array and `failed_event_error` will be non-nil.
|
213
|
+
BuildResult = ::Data.define(:operations, :failed_event_error) do
|
214
|
+
# @implements BuildResult
|
215
|
+
def self.success(operations)
|
216
|
+
new(operations, nil)
|
217
|
+
end
|
218
|
+
|
219
|
+
def self.failure(failed_event_error)
|
220
|
+
new([], failed_event_error)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/indexer/event_id"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
module Operation
|
14
|
+
# Describes the result of an operation.
|
15
|
+
# :category value will be one of: [:success, :noop, :failure]
|
16
|
+
Result = ::Data.define(:category, :operation, :description) do
|
17
|
+
# @implements Result
|
18
|
+
def self.success_of(operation)
|
19
|
+
Result.new(
|
20
|
+
category: :success,
|
21
|
+
operation: operation,
|
22
|
+
description: nil
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.noop_of(operation, description)
|
27
|
+
Result.new(
|
28
|
+
category: :noop,
|
29
|
+
operation: operation,
|
30
|
+
description: description
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.failure_of(operation, description)
|
35
|
+
Result.new(
|
36
|
+
category: :failure,
|
37
|
+
operation: operation,
|
38
|
+
description: description
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def operation_type
|
43
|
+
operation.type
|
44
|
+
end
|
45
|
+
|
46
|
+
def event
|
47
|
+
operation.event
|
48
|
+
end
|
49
|
+
|
50
|
+
def event_id
|
51
|
+
EventID.from_event(event)
|
52
|
+
end
|
53
|
+
|
54
|
+
def summary
|
55
|
+
# :nocov: -- `description == nil` case is not covered; not simple to test.
|
56
|
+
suffix = description ? "--#{description}" : nil
|
57
|
+
# :nocov:
|
58
|
+
"<#{operation.description} #{event_id} #{category}#{suffix}>"
|
59
|
+
end
|
60
|
+
|
61
|
+
def inspect
|
62
|
+
parts = [
|
63
|
+
self.class.name,
|
64
|
+
operation_type.inspect,
|
65
|
+
category.inspect,
|
66
|
+
event_id,
|
67
|
+
description
|
68
|
+
].compact
|
69
|
+
|
70
|
+
"#<#{parts.join(" ")}>"
|
71
|
+
end
|
72
|
+
alias_method :to_s, :inspect
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|