elasticgraph-indexer 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +1 -0
- data/elasticgraph-indexer.gemspec +24 -0
- data/lib/elastic_graph/indexer/config.rb +48 -0
- data/lib/elastic_graph/indexer/datastore_indexing_router.rb +408 -0
- data/lib/elastic_graph/indexer/event_id.rb +32 -0
- data/lib/elastic_graph/indexer/failed_event_error.rb +83 -0
- data/lib/elastic_graph/indexer/hash_differ.rb +37 -0
- data/lib/elastic_graph/indexer/indexing_failures_error.rb +28 -0
- data/lib/elastic_graph/indexer/indexing_preparers/integer.rb +41 -0
- data/lib/elastic_graph/indexer/indexing_preparers/no_op.rb +19 -0
- data/lib/elastic_graph/indexer/indexing_preparers/untyped.rb +22 -0
- data/lib/elastic_graph/indexer/operation/count_accumulator.rb +166 -0
- data/lib/elastic_graph/indexer/operation/factory.rb +226 -0
- data/lib/elastic_graph/indexer/operation/result.rb +76 -0
- data/lib/elastic_graph/indexer/operation/update.rb +160 -0
- data/lib/elastic_graph/indexer/operation/upsert.rb +71 -0
- data/lib/elastic_graph/indexer/processor.rb +137 -0
- data/lib/elastic_graph/indexer/record_preparer.rb +163 -0
- data/lib/elastic_graph/indexer/spec_support/event_matcher.rb +44 -0
- data/lib/elastic_graph/indexer/test_support/converters.rb +36 -0
- data/lib/elastic_graph/indexer.rb +98 -0
- metadata +454 -0
@@ -0,0 +1,28 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
class IndexingFailuresError < Error
|
14
|
+
# Builds an `IndexingFailuresError` with a nicely formatted message for the given array of `FailedEventError`.
|
15
|
+
def self.for(failures:, events:)
|
16
|
+
summary = "Got #{failures.size} failure(s) from #{events.size} event(s):"
|
17
|
+
failure_details = failures.map.with_index { |failure, index| "#{index + 1}) #{failure.message}" }
|
18
|
+
|
19
|
+
message_ids = failures.filter_map { |f| f.event["message_id"] }.uniq
|
20
|
+
if message_ids.any?
|
21
|
+
message_details = "These failures came from #{message_ids.size} message(s): #{message_ids.join(", ")}."
|
22
|
+
end
|
23
|
+
|
24
|
+
new([summary, failure_details, message_details].compact.join("\n\n"))
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class Indexer
|
11
|
+
module IndexingPreparers
|
12
|
+
class Integer
|
13
|
+
# Here we coerce an integer-valued float like `3.0` to a true integer (e.g. `3`).
|
14
|
+
# This is necessary because:
|
15
|
+
#
|
16
|
+
# 1. If a field is an integer in the datastore mapping, it does not tolerate it coming in
|
17
|
+
# as a float, even if it is integer-valued.
|
18
|
+
# 2. While we use JSON schema to validate event payloads before we get here, JSON schema
|
19
|
+
# cannot consistently enforce that we receive true integers for int fields.
|
20
|
+
#
|
21
|
+
# As https://json-schema.org/understanding-json-schema/reference/numeric.html#integer explains:
|
22
|
+
#
|
23
|
+
# > **Warning**
|
24
|
+
# >
|
25
|
+
# > The precise treatment of the “integer” type may depend on the implementation of your
|
26
|
+
# > JSON Schema validator. JavaScript (and thus also JSON) does not have distinct types
|
27
|
+
# > for integers and floating-point values. Therefore, JSON Schema can not use type alone
|
28
|
+
# > to distinguish between integers and non-integers. The JSON Schema specification
|
29
|
+
# > recommends, but does not require, that validators use the mathematical value to
|
30
|
+
# > determine whether a number is an integer, and not the type alone. Therefore, there
|
31
|
+
# > is some disagreement between validators on this point. For example, a JavaScript-based
|
32
|
+
# > validator may accept 1.0 as an integer, whereas the Python-based jsonschema does not.
|
33
|
+
def self.prepare_for_indexing(value)
|
34
|
+
integer = value.to_i
|
35
|
+
return integer if value == integer
|
36
|
+
raise IndexOperationError, "Cannot safely coerce `#{value.inspect}` to an integer"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class Indexer
|
11
|
+
module IndexingPreparers
|
12
|
+
class NoOp
|
13
|
+
def self.prepare_for_indexing(value)
|
14
|
+
value
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/support/untyped_encoder"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
module IndexingPreparers
|
14
|
+
class Untyped
|
15
|
+
# Converts the given untyped value to a String so it can be indexed in a `keyword` field.
|
16
|
+
def self.prepare_for_indexing(value)
|
17
|
+
Support::UntypedEncoder.encode(value)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
module Operation
|
14
|
+
# Responsible for maintaining state and accumulating list counts while we traverse the `data` we are preparing
|
15
|
+
# to update in the index. Much of the complexity here is due to the fact that we have 3 kinds of list fields:
|
16
|
+
# scalar lists, embedded object lists, and `nested` object lists.
|
17
|
+
#
|
18
|
+
# The Elasticsearch/OpenSearch `nested` type[^1] indexes objects of this type as separate hidden documents. As a result,
|
19
|
+
# each `nested` object type gets its own `__counts` field. In contrast, embedded object lists get flattened into separate
|
20
|
+
# entries (one per field path) in a flat map (with `dot_separated_path: values_at_path` entries) at the document root.
|
21
|
+
#
|
22
|
+
# We mirror this structure with our `__counts`: each document (either a root document, or a hidden `nested` document)
|
23
|
+
# gets its own `__counts` field, so we essentially have multiple "count parents". Each `__counts` field is a map,
|
24
|
+
# keyed by field paths, and containing the number of list elements at that field path after the flattening has
|
25
|
+
# occurred.
|
26
|
+
#
|
27
|
+
# The index mapping defines where the `__counts` fields go. This abstraction uses the mapping to determine when
|
28
|
+
# it needs to create a new "count parent".
|
29
|
+
#
|
30
|
+
# Note: instances of this class are "shallow immutable" (none of the attributes of an instance can be reassigned)
|
31
|
+
# but the `counts` attribute is itself a mutable hash--we use it to accumulate the list counts as we traverse the
|
32
|
+
# structure.
|
33
|
+
#
|
34
|
+
# [^1]: https://www.elastic.co/guide/en/elasticsearch/reference/8.9/nested.html
|
35
|
+
CountAccumulator = ::Data.define(
|
36
|
+
# Hash containing the counts we have accumulated so far. This hash gets mutated as we accumulate,
|
37
|
+
# and multiple accumulator instances share the same hash instance. However, a new `counts` hash will
|
38
|
+
# be created when we reach a new parent.
|
39
|
+
:counts,
|
40
|
+
# String describing our current location in the traversed structure relative to the current parent.
|
41
|
+
# This gets replaced on new accumulator instances as we traverse the data structure.
|
42
|
+
:path_from_parent,
|
43
|
+
# String describing our current location in the traversed structure relative to the overall document root.
|
44
|
+
# This gets replaced on new accumulator instances as we traverse the data structure.
|
45
|
+
:path_from_root,
|
46
|
+
# The index mapping at the current level of the structure when this accumulator instance was created.
|
47
|
+
# As we traverse new levels of the data structure, new `CountAccumulator` instances will be created with
|
48
|
+
# the `mapping` updated to reflect the new level of the structure we are at.
|
49
|
+
:mapping,
|
50
|
+
# Set of field paths to subfields of `LIST_COUNTS_FIELD` for the current source relationship.
|
51
|
+
# This will be used to determine which subfields of the `LIST_COUNTS_FIELD` are populated.
|
52
|
+
:list_counts_field_paths_for_source,
|
53
|
+
# Indicates if our current path is underneath a list; if so, `maybe_increment` will increment when called.
|
54
|
+
:has_list_ancestor
|
55
|
+
) do
|
56
|
+
# @implements CountAccumulator
|
57
|
+
def self.merge_list_counts_into(params, mapping:, list_counts_field_paths_for_source:)
|
58
|
+
# Here we compute the counts of our list elements so that we can index it.
|
59
|
+
data = compute_list_counts_of(params.fetch("data"), CountAccumulator.new_parent(
|
60
|
+
# We merge in `type: nested` since the `nested` type indicates a new count accumulator parent and we want that applied at the root.
|
61
|
+
mapping.merge("type" => "nested"),
|
62
|
+
list_counts_field_paths_for_source
|
63
|
+
))
|
64
|
+
|
65
|
+
# The root `__counts` field needs special handling due to our `sourced_from` feature. Anything in `data`
|
66
|
+
# will overwrite what's in the specified fields when the script executes, but since there could be list
|
67
|
+
# fields from multiple sources, we need `__counts` to get merged properly. So here we "promote" it from
|
68
|
+
# `data.__counts` to being a root-level parameter.
|
69
|
+
params.merge(
|
70
|
+
"data" => data.except(LIST_COUNTS_FIELD),
|
71
|
+
LIST_COUNTS_FIELD => data[LIST_COUNTS_FIELD]
|
72
|
+
)
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.compute_list_counts_of(value, parent_accumulator)
|
76
|
+
case value
|
77
|
+
when nil
|
78
|
+
value
|
79
|
+
when ::Hash
|
80
|
+
parent_accumulator.maybe_increment
|
81
|
+
parent_accumulator.process_hash(value) do |key, subvalue, accumulator|
|
82
|
+
[key, compute_list_counts_of(subvalue, accumulator[key])]
|
83
|
+
end
|
84
|
+
when ::Array
|
85
|
+
parent_accumulator.process_list(value) do |element, accumulator|
|
86
|
+
compute_list_counts_of(element, accumulator)
|
87
|
+
end
|
88
|
+
else
|
89
|
+
parent_accumulator.maybe_increment
|
90
|
+
value
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Creates an initially empty accumulator instance for a new parent (either at the overall document
|
95
|
+
# root are at the root of a `nested` object).
|
96
|
+
def self.new_parent(mapping, list_counts_field_paths_for_source, path_from_root: nil)
|
97
|
+
count_field_prefix = path_from_root ? "#{path_from_root}.#{LIST_COUNTS_FIELD}." : "#{LIST_COUNTS_FIELD}."
|
98
|
+
|
99
|
+
initial_counts = (mapping.dig("properties", LIST_COUNTS_FIELD, "properties") || {}).filter_map do |field, _|
|
100
|
+
[field, 0] if list_counts_field_paths_for_source.include?(count_field_prefix + field)
|
101
|
+
end.to_h
|
102
|
+
|
103
|
+
new(initial_counts, nil, path_from_root, mapping, list_counts_field_paths_for_source, false)
|
104
|
+
end
|
105
|
+
|
106
|
+
# Processes the given hash, beginning a new parent if need. A new parent is needed if the
|
107
|
+
# current mapping has a `__counts` field.
|
108
|
+
#
|
109
|
+
# Yields repeatedly (once per hash entry). We yield the entry key/value, and an accumulator
|
110
|
+
# instance (either the current `self` or a new parent).
|
111
|
+
#
|
112
|
+
# Afterwards, merges the resulting `__counts` into the hash before it's returned, as needed.
|
113
|
+
def process_hash(hash)
|
114
|
+
mapping_type = mapping["type"]
|
115
|
+
|
116
|
+
# As we traverse through the JSON object structure, we also have to traverse through the
|
117
|
+
# condenseed mapping. Doing this requires that the `properties` of the index mapping
|
118
|
+
# match the fields of the JSON data structure. However, Elasticsearch/OpenSearch have a number of field
|
119
|
+
# types which can be represented as a JSON object in an indexing call, but which have no
|
120
|
+
# `properties` in the mapping. We can't successfully traverse through the JSON data and the
|
121
|
+
# mapping when we encounter these field types (since the mapping has no record of the
|
122
|
+
# subfields) so we must treat these types as a special case; we can't proceed, and we won't
|
123
|
+
# have any lists to count, anyway.
|
124
|
+
return hash if DATASTORE_PROPERTYLESS_OBJECT_TYPES.include?(mapping_type)
|
125
|
+
|
126
|
+
# THe `nested` type indicates a new document level, so if it's not `nested`, we should process the hash without making a new parent.
|
127
|
+
return hash.to_h { |key, value| yield key, value, self } unless mapping_type == "nested"
|
128
|
+
|
129
|
+
# ...but otherwise, we should make a new parent.
|
130
|
+
new_parent = CountAccumulator.new_parent(mapping, list_counts_field_paths_for_source, path_from_root: path_from_root)
|
131
|
+
updated_hash = hash.to_h { |key, value| yield key, value, new_parent }
|
132
|
+
|
133
|
+
# If we have a LIST_COUNTS_FIELD at this level of our mapping, we should merge in the counts hash from the new parent.
|
134
|
+
if mapping.dig("properties", LIST_COUNTS_FIELD)
|
135
|
+
updated_hash.merge(LIST_COUNTS_FIELD => new_parent.counts)
|
136
|
+
else
|
137
|
+
updated_hash
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Processes the given list, tracking the fact that subpaths have a list ancestor.
|
142
|
+
def process_list(list)
|
143
|
+
child_accumulator = with(has_list_ancestor: true)
|
144
|
+
list.map { |value| yield value, child_accumulator }
|
145
|
+
end
|
146
|
+
|
147
|
+
# Increments the count at the current `path_from_parent` in the current parent's counts hash if we are under a list.
|
148
|
+
def maybe_increment
|
149
|
+
return unless has_list_ancestor
|
150
|
+
|
151
|
+
key = path_from_parent.to_s
|
152
|
+
counts[key] = counts.fetch(key) + 1
|
153
|
+
end
|
154
|
+
|
155
|
+
# Creates a "child" accumulator at the given subpath. Should be used as we traverse the data structure.
|
156
|
+
def [](subpath)
|
157
|
+
with(
|
158
|
+
path_from_parent: path_from_parent ? "#{path_from_parent}#{LIST_COUNTS_FIELD_PATH_KEY_SEPARATOR}#{subpath}" : subpath,
|
159
|
+
path_from_root: path_from_root ? "#{path_from_root}.#{subpath}" : subpath,
|
160
|
+
mapping: mapping.fetch("properties").fetch(subpath)
|
161
|
+
)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/indexer/event_id"
|
11
|
+
require "elastic_graph/indexer/failed_event_error"
|
12
|
+
require "elastic_graph/indexer/operation/update"
|
13
|
+
require "elastic_graph/indexer/operation/upsert"
|
14
|
+
require "elastic_graph/indexer/record_preparer"
|
15
|
+
require "elastic_graph/json_schema/validator_factory"
|
16
|
+
require "elastic_graph/support/memoizable_data"
|
17
|
+
|
18
|
+
module ElasticGraph
|
19
|
+
class Indexer
|
20
|
+
module Operation
|
21
|
+
class Factory < Support::MemoizableData.define(
|
22
|
+
:schema_artifacts,
|
23
|
+
:index_definitions_by_graphql_type,
|
24
|
+
:record_preparer_factory,
|
25
|
+
:logger,
|
26
|
+
:skip_derived_indexing_type_updates,
|
27
|
+
:configure_record_validator
|
28
|
+
)
|
29
|
+
def build(event)
|
30
|
+
event = prepare_event(event)
|
31
|
+
|
32
|
+
selected_json_schema_version = select_json_schema_version(event) { |failure| return failure }
|
33
|
+
|
34
|
+
# Because the `select_json_schema_version` picks the closest-matching json schema version, the incoming
|
35
|
+
# event might not match the expected json_schema_version value in the json schema (which is a `const` field).
|
36
|
+
# This is by design, since we're picking a schema based on best-effort, so to avoid that by-design validation error,
|
37
|
+
# performing the envelope validation on a "patched" version of the event.
|
38
|
+
event_with_patched_envelope = event.merge({JSON_SCHEMA_VERSION_KEY => selected_json_schema_version})
|
39
|
+
|
40
|
+
if (error_message = validator(EVENT_ENVELOPE_JSON_SCHEMA_NAME, selected_json_schema_version).validate_with_error_message(event_with_patched_envelope))
|
41
|
+
return build_failed_result(event, "event payload", error_message)
|
42
|
+
end
|
43
|
+
|
44
|
+
failed_result = validate_record_returning_failure(event, selected_json_schema_version)
|
45
|
+
failed_result || BuildResult.success(build_all_operations_for(
|
46
|
+
event,
|
47
|
+
record_preparer_factory.for_json_schema_version(selected_json_schema_version)
|
48
|
+
))
|
49
|
+
end
|
50
|
+
|
51
|
+
private
|
52
|
+
|
53
|
+
def select_json_schema_version(event)
|
54
|
+
available_json_schema_versions = schema_artifacts.available_json_schema_versions
|
55
|
+
|
56
|
+
requested_json_schema_version = event[JSON_SCHEMA_VERSION_KEY]
|
57
|
+
|
58
|
+
# First check that a valid value has been requested (a positive integer)
|
59
|
+
if !event.key?(JSON_SCHEMA_VERSION_KEY)
|
60
|
+
yield build_failed_result(event, JSON_SCHEMA_VERSION_KEY, "Event lacks a `#{JSON_SCHEMA_VERSION_KEY}`")
|
61
|
+
elsif !requested_json_schema_version.is_a?(Integer) || requested_json_schema_version < 1
|
62
|
+
yield build_failed_result(event, JSON_SCHEMA_VERSION_KEY, "#{JSON_SCHEMA_VERSION_KEY} (#{requested_json_schema_version}) must be a positive integer.")
|
63
|
+
end
|
64
|
+
|
65
|
+
# The requested version might not necessarily be available (if the publisher is deployed ahead of the indexer, or an old schema
|
66
|
+
# version is removed prematurely, or an indexer deployment is rolled back). So the behavior is to always pick the closest-available
|
67
|
+
# version. If there's an exact match, great. Even if not an exact match, if the incoming event payload conforms to the closest match,
|
68
|
+
# the event can still be indexed.
|
69
|
+
#
|
70
|
+
# This min_by block will take the closest version in the list. If a tie occurs, the first value in the list wins. The desired
|
71
|
+
# behavior is in the event of a tie (highly unlikely, there shouldn't be a gap in available json schema versions), the higher version
|
72
|
+
# should be selected. So to get that behavior, the list is sorted in descending order.
|
73
|
+
#
|
74
|
+
selected_json_schema_version = available_json_schema_versions.sort.reverse.min_by { |it| (requested_json_schema_version - it).abs }
|
75
|
+
|
76
|
+
if selected_json_schema_version != requested_json_schema_version
|
77
|
+
logger.info({
|
78
|
+
"message_type" => "ElasticGraphMissingJSONSchemaVersion",
|
79
|
+
"message_id" => event["message_id"],
|
80
|
+
"event_id" => EventID.from_event(event),
|
81
|
+
"event_type" => event["type"],
|
82
|
+
"requested_json_schema_version" => requested_json_schema_version,
|
83
|
+
"selected_json_schema_version" => selected_json_schema_version
|
84
|
+
})
|
85
|
+
end
|
86
|
+
|
87
|
+
if selected_json_schema_version.nil?
|
88
|
+
yield build_failed_result(
|
89
|
+
event, JSON_SCHEMA_VERSION_KEY,
|
90
|
+
"Failed to select json schema version. Requested version: #{event[JSON_SCHEMA_VERSION_KEY]}. \
|
91
|
+
Available json schema versions: #{available_json_schema_versions.sort.join(", ")}"
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
selected_json_schema_version
|
96
|
+
end
|
97
|
+
|
98
|
+
def validator(type, selected_json_schema_version)
|
99
|
+
factory = validator_factories_by_version[selected_json_schema_version]
|
100
|
+
factory.validator_for(type)
|
101
|
+
end
|
102
|
+
|
103
|
+
def validator_factories_by_version
|
104
|
+
@validator_factories_by_version ||= ::Hash.new do |hash, json_schema_version|
|
105
|
+
factory = JSONSchema::ValidatorFactory.new(
|
106
|
+
schema: schema_artifacts.json_schemas_for(json_schema_version),
|
107
|
+
sanitize_pii: true
|
108
|
+
)
|
109
|
+
factory = configure_record_validator.call(factory) if configure_record_validator
|
110
|
+
hash[json_schema_version] = factory
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
# This copies the `id` from event into the actual record
|
115
|
+
# This is necessary because we want to index `id` as part of the record so that the datastore will include `id` in returned search payloads.
|
116
|
+
def prepare_event(event)
|
117
|
+
return event unless event["record"].is_a?(::Hash) && event["id"]
|
118
|
+
event.merge("record" => event["record"].merge("id" => event.fetch("id")))
|
119
|
+
end
|
120
|
+
|
121
|
+
def validate_record_returning_failure(event, selected_json_schema_version)
|
122
|
+
record = event.fetch("record")
|
123
|
+
graphql_type_name = event.fetch("type")
|
124
|
+
validator = validator(graphql_type_name, selected_json_schema_version)
|
125
|
+
|
126
|
+
if (error_message = validator.validate_with_error_message(record))
|
127
|
+
build_failed_result(event, "#{graphql_type_name} record", error_message)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def build_failed_result(event, payload_description, validation_message)
|
132
|
+
message = "Malformed #{payload_description}. #{validation_message}"
|
133
|
+
|
134
|
+
# Here we use the `RecordPreparer::Identity` record preparer because we may not have a valid JSON schema
|
135
|
+
# version number in this case (which is usually required to get a `RecordPreparer` from the factory), and
|
136
|
+
# we won't wind up using the record preparer for real on these operations, anyway.
|
137
|
+
operations = build_all_operations_for(event, RecordPreparer::Identity)
|
138
|
+
|
139
|
+
BuildResult.failure(FailedEventError.new(event: event, operations: operations.to_set, main_message: message))
|
140
|
+
end
|
141
|
+
|
142
|
+
def build_all_operations_for(event, record_preparer)
|
143
|
+
upsert_operations(event, record_preparer) + update_operations(event, record_preparer)
|
144
|
+
end
|
145
|
+
|
146
|
+
def upsert_operations(event, record_preparer)
|
147
|
+
type = event.fetch("type") do
|
148
|
+
# This key should only be missing on invalid events. We still want to build operations
|
149
|
+
# for the event (to put it in the `FailedEventError`) but in this case we can't build
|
150
|
+
# any because we don't know what indices to target.
|
151
|
+
return []
|
152
|
+
end
|
153
|
+
|
154
|
+
index_definitions_for(type).reject(&:use_updates_for_indexing?).map do |index_definition|
|
155
|
+
Upsert.new(event, index_definition, record_preparer)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def update_operations(event, record_preparer)
|
160
|
+
# If `type` is missing or is not a known type (as indicated by `runtime_metadata` being nil)
|
161
|
+
# then we can't build a derived indexing type update operation. That case will only happen when we build
|
162
|
+
# operations for an `FailedEventError` rather than to execute.
|
163
|
+
return [] unless (type = event["type"])
|
164
|
+
return [] unless (runtime_metadata = schema_artifacts.runtime_metadata.object_types_by_name[type])
|
165
|
+
|
166
|
+
runtime_metadata.update_targets.flat_map do |update_target|
|
167
|
+
ids_to_skip = skip_derived_indexing_type_updates.fetch(update_target.type, ::Set.new)
|
168
|
+
|
169
|
+
index_definitions_for(update_target.type).flat_map do |destination_index_def|
|
170
|
+
operations = Update.operations_for(
|
171
|
+
event: event,
|
172
|
+
destination_index_def: destination_index_def,
|
173
|
+
record_preparer: record_preparer,
|
174
|
+
update_target: update_target,
|
175
|
+
destination_index_mapping: schema_artifacts.index_mappings_by_index_def_name.fetch(destination_index_def.name)
|
176
|
+
)
|
177
|
+
|
178
|
+
operations.reject do |op|
|
179
|
+
ids_to_skip.include?(op.doc_id).tap do |skipped|
|
180
|
+
if skipped
|
181
|
+
logger.info({
|
182
|
+
"message_type" => "SkippingUpdate",
|
183
|
+
"message_id" => event["message_id"],
|
184
|
+
"update_target" => update_target.type,
|
185
|
+
"id" => op.doc_id,
|
186
|
+
"event_id" => EventID.from_event(event).to_s
|
187
|
+
})
|
188
|
+
end
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
def index_definitions_for(type)
|
196
|
+
# If `type` is missing or is not a known type (as indicated by not being in this hash)
|
197
|
+
# then we return an empty list. That case will only happen when we build
|
198
|
+
# operations for an `FailedEventError` rather than to execute.
|
199
|
+
index_definitions_by_graphql_type[type] || []
|
200
|
+
end
|
201
|
+
|
202
|
+
# :nocov: -- this should not be called. Instead, it exists to guard against wrongly raising an error from this class.
|
203
|
+
def raise(*args)
|
204
|
+
super("`raise` was called on `Operation::Factory`, but should not. Instead, use " \
|
205
|
+
"`yield build_failed_result(...)` so that we can accumulate all invalid events and allow " \
|
206
|
+
"the valid events to still be processed.")
|
207
|
+
end
|
208
|
+
# :nocov:
|
209
|
+
|
210
|
+
# Return value from `build` that indicates what happened.
|
211
|
+
# - If it was successful, `operations` will be a non-empty array of operations and `failed_event_error` will be nil.
|
212
|
+
# - If there was a validation issue, `operations` will be an empty array and `failed_event_error` will be non-nil.
|
213
|
+
BuildResult = ::Data.define(:operations, :failed_event_error) do
|
214
|
+
# @implements BuildResult
|
215
|
+
def self.success(operations)
|
216
|
+
new(operations, nil)
|
217
|
+
end
|
218
|
+
|
219
|
+
def self.failure(failed_event_error)
|
220
|
+
new([], failed_event_error)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/indexer/event_id"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
module Operation
|
14
|
+
# Describes the result of an operation.
|
15
|
+
# :category value will be one of: [:success, :noop, :failure]
|
16
|
+
Result = ::Data.define(:category, :operation, :description) do
|
17
|
+
# @implements Result
|
18
|
+
def self.success_of(operation)
|
19
|
+
Result.new(
|
20
|
+
category: :success,
|
21
|
+
operation: operation,
|
22
|
+
description: nil
|
23
|
+
)
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.noop_of(operation, description)
|
27
|
+
Result.new(
|
28
|
+
category: :noop,
|
29
|
+
operation: operation,
|
30
|
+
description: description
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.failure_of(operation, description)
|
35
|
+
Result.new(
|
36
|
+
category: :failure,
|
37
|
+
operation: operation,
|
38
|
+
description: description
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def operation_type
|
43
|
+
operation.type
|
44
|
+
end
|
45
|
+
|
46
|
+
def event
|
47
|
+
operation.event
|
48
|
+
end
|
49
|
+
|
50
|
+
def event_id
|
51
|
+
EventID.from_event(event)
|
52
|
+
end
|
53
|
+
|
54
|
+
def summary
|
55
|
+
# :nocov: -- `description == nil` case is not covered; not simple to test.
|
56
|
+
suffix = description ? "--#{description}" : nil
|
57
|
+
# :nocov:
|
58
|
+
"<#{operation.description} #{event_id} #{category}#{suffix}>"
|
59
|
+
end
|
60
|
+
|
61
|
+
def inspect
|
62
|
+
parts = [
|
63
|
+
self.class.name,
|
64
|
+
operation_type.inspect,
|
65
|
+
category.inspect,
|
66
|
+
event_id,
|
67
|
+
description
|
68
|
+
].compact
|
69
|
+
|
70
|
+
"#<#{parts.join(" ")}>"
|
71
|
+
end
|
72
|
+
alias_method :to_s, :inspect
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|