elasticgraph-indexer 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +1 -0
- data/elasticgraph-indexer.gemspec +24 -0
- data/lib/elastic_graph/indexer/config.rb +48 -0
- data/lib/elastic_graph/indexer/datastore_indexing_router.rb +408 -0
- data/lib/elastic_graph/indexer/event_id.rb +32 -0
- data/lib/elastic_graph/indexer/failed_event_error.rb +83 -0
- data/lib/elastic_graph/indexer/hash_differ.rb +37 -0
- data/lib/elastic_graph/indexer/indexing_failures_error.rb +28 -0
- data/lib/elastic_graph/indexer/indexing_preparers/integer.rb +41 -0
- data/lib/elastic_graph/indexer/indexing_preparers/no_op.rb +19 -0
- data/lib/elastic_graph/indexer/indexing_preparers/untyped.rb +22 -0
- data/lib/elastic_graph/indexer/operation/count_accumulator.rb +166 -0
- data/lib/elastic_graph/indexer/operation/factory.rb +226 -0
- data/lib/elastic_graph/indexer/operation/result.rb +76 -0
- data/lib/elastic_graph/indexer/operation/update.rb +160 -0
- data/lib/elastic_graph/indexer/operation/upsert.rb +71 -0
- data/lib/elastic_graph/indexer/processor.rb +137 -0
- data/lib/elastic_graph/indexer/record_preparer.rb +163 -0
- data/lib/elastic_graph/indexer/spec_support/event_matcher.rb +44 -0
- data/lib/elastic_graph/indexer/test_support/converters.rb +36 -0
- data/lib/elastic_graph/indexer.rb +98 -0
- metadata +454 -0
@@ -0,0 +1,160 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/error"
|
11
|
+
require "elastic_graph/indexer/event_id"
|
12
|
+
require "elastic_graph/indexer/operation/count_accumulator"
|
13
|
+
require "elastic_graph/indexer/operation/result"
|
14
|
+
require "elastic_graph/support/hash_util"
|
15
|
+
require "elastic_graph/support/memoizable_data"
|
16
|
+
|
17
|
+
module ElasticGraph
|
18
|
+
class Indexer
|
19
|
+
module Operation
|
20
|
+
class Update < Support::MemoizableData.define(:event, :prepared_record, :destination_index_def, :update_target, :doc_id, :destination_index_mapping)
|
21
|
+
# @dynamic event, destination_index_def, doc_id
|
22
|
+
|
23
|
+
def self.operations_for(
|
24
|
+
event:,
|
25
|
+
destination_index_def:,
|
26
|
+
record_preparer:,
|
27
|
+
update_target:,
|
28
|
+
destination_index_mapping:
|
29
|
+
)
|
30
|
+
return [] if update_target.for_normal_indexing? && !destination_index_def.use_updates_for_indexing?
|
31
|
+
|
32
|
+
prepared_record = record_preparer.prepare_for_index(event["type"], event["record"] || {"id" => event["id"]})
|
33
|
+
|
34
|
+
Support::HashUtil
|
35
|
+
.fetch_leaf_values_at_path(prepared_record, update_target.id_source)
|
36
|
+
.reject { |id| id.to_s.strip.empty? }
|
37
|
+
.uniq
|
38
|
+
.map { |doc_id| new(event, prepared_record, destination_index_def, update_target, doc_id, destination_index_mapping) }
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_datastore_bulk
|
42
|
+
@to_datastore_bulk ||= [{update: metadata}, update_request]
|
43
|
+
end
|
44
|
+
|
45
|
+
def categorize(response)
|
46
|
+
update = response.fetch("update")
|
47
|
+
status = update.fetch("status")
|
48
|
+
|
49
|
+
if noop_result?(response)
|
50
|
+
noop_error_message = message_from_thrown_painless_exception(update)
|
51
|
+
&.delete_prefix(UPDATE_WAS_NOOP_MESSAGE_PREAMBLE)
|
52
|
+
|
53
|
+
Result.noop_of(self, noop_error_message)
|
54
|
+
elsif (200..299).cover?(status)
|
55
|
+
Result.success_of(self)
|
56
|
+
else
|
57
|
+
error = update.fetch("error")
|
58
|
+
|
59
|
+
further_detail =
|
60
|
+
if (more_detail = error["caused_by"])
|
61
|
+
# Usually the type/reason details are nested an extra level (`caused_by.caused_by`) but sometimes
|
62
|
+
# it's not. I think it's nested when the script itself throws an exception where as it's unnested
|
63
|
+
# when the datastore is unable to run the script.
|
64
|
+
more_detail = more_detail["caused_by"] if more_detail.key?("caused_by")
|
65
|
+
" (#{more_detail["type"]}: #{more_detail["reason"]})"
|
66
|
+
else
|
67
|
+
"; full response: #{::JSON.pretty_generate(response)}"
|
68
|
+
end
|
69
|
+
|
70
|
+
Result.failure_of(self, "#{update_target.script_id}(applied to `#{doc_id}`): #{error.fetch("reason")}#{further_detail}")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def type
|
75
|
+
:update
|
76
|
+
end
|
77
|
+
|
78
|
+
def description
|
79
|
+
if update_target.type == event.fetch("type")
|
80
|
+
"#{update_target.type} update"
|
81
|
+
else
|
82
|
+
"#{update_target.type} update (from #{event.fetch("type")})"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def inspect
|
87
|
+
"#<#{self.class.name} event=#{EventID.from_event(event)} target=#{update_target.type}>"
|
88
|
+
end
|
89
|
+
alias_method :to_s, :inspect
|
90
|
+
|
91
|
+
def versioned?
|
92
|
+
# We do not track source event versions when applying derived indexing updates, but we do for
|
93
|
+
# normal indexing updates, so if the update target is for normal indexing it's a versioned operation.
|
94
|
+
update_target.for_normal_indexing?
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
# The number of retries of the update script we'll have the datastore attempt on concurrent modification conflicts.
|
100
|
+
CONFLICT_RETRIES = 5
|
101
|
+
|
102
|
+
def metadata
|
103
|
+
{
|
104
|
+
_index: destination_index_def.index_name_for_writes(prepared_record, timestamp_field_path: update_target.rollover_timestamp_value_source),
|
105
|
+
_id: doc_id,
|
106
|
+
routing: destination_index_def.routing_value_for_prepared_record(
|
107
|
+
prepared_record,
|
108
|
+
route_with_path: update_target.routing_value_source,
|
109
|
+
id_path: update_target.id_source
|
110
|
+
),
|
111
|
+
retry_on_conflict: CONFLICT_RETRIES
|
112
|
+
}.compact
|
113
|
+
end
|
114
|
+
|
115
|
+
def update_request
|
116
|
+
{
|
117
|
+
script: {id: update_target.script_id, params: script_params},
|
118
|
+
# We use a scripted upsert instead of formatting an upsert document because it creates
|
119
|
+
# for simpler code. To create the upsert document, we'd have to convert the param
|
120
|
+
# values to their "upsert form"--for example, for an `append_only_set` field, the param
|
121
|
+
# value is generally a single scalar value while in an upsert document it would need to
|
122
|
+
# be a list. By using `scripted_upsert`, we can always just pass the params in a consistent
|
123
|
+
# way, and rely on the script to handle the case where it is creating a brand new document.
|
124
|
+
scripted_upsert: true,
|
125
|
+
upsert: {}
|
126
|
+
}
|
127
|
+
end
|
128
|
+
|
129
|
+
def noop_result?(response)
|
130
|
+
update = response.fetch("update")
|
131
|
+
error_message = message_from_thrown_painless_exception(update).to_s
|
132
|
+
error_message.start_with?(UPDATE_WAS_NOOP_MESSAGE_PREAMBLE) || update["result"] == "noop"
|
133
|
+
end
|
134
|
+
|
135
|
+
def message_from_thrown_painless_exception(update)
|
136
|
+
update.dig("error", "caused_by", "caused_by", "reason")
|
137
|
+
end
|
138
|
+
|
139
|
+
def script_params
|
140
|
+
initial_params = update_target.params_for(
|
141
|
+
doc_id: doc_id,
|
142
|
+
event: event,
|
143
|
+
prepared_record: prepared_record
|
144
|
+
)
|
145
|
+
|
146
|
+
# The normal indexing script uses `__counts`. Other indexing scripts (e.g. the ones generated
|
147
|
+
# for derived indexing) do not use `__counts` so there's no point in spending effort on computing
|
148
|
+
# it. Plus, the logic below raises an exception in that case, so it's important we avoid it.
|
149
|
+
return initial_params unless update_target.for_normal_indexing?
|
150
|
+
|
151
|
+
CountAccumulator.merge_list_counts_into(
|
152
|
+
initial_params,
|
153
|
+
mapping: destination_index_mapping,
|
154
|
+
list_counts_field_paths_for_source: destination_index_def.list_counts_field_paths_for_source(update_target.relationship.to_s)
|
155
|
+
)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/indexer/operation/result"
|
10
|
+
require "elastic_graph/support/hash_util"
|
11
|
+
require "elastic_graph/support/memoizable_data"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class Indexer
|
15
|
+
module Operation
|
16
|
+
Upsert = Support::MemoizableData.define(:event, :destination_index_def, :record_preparer) do
|
17
|
+
# @implements Upsert
|
18
|
+
|
19
|
+
def to_datastore_bulk
|
20
|
+
@to_datastore_bulk ||= [{index: metadata}, prepared_record]
|
21
|
+
end
|
22
|
+
|
23
|
+
def categorize(response)
|
24
|
+
index = response.fetch("index")
|
25
|
+
status = index.fetch("status")
|
26
|
+
|
27
|
+
case status
|
28
|
+
when 200..299
|
29
|
+
Result.success_of(self)
|
30
|
+
when 409
|
31
|
+
Result.noop_of(self, index.fetch("error").fetch("reason"))
|
32
|
+
else
|
33
|
+
Result.failure_of(self, index.fetch("error").fetch("reason"))
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def doc_id
|
38
|
+
@doc_id ||= event.fetch("id")
|
39
|
+
end
|
40
|
+
|
41
|
+
def type
|
42
|
+
:upsert
|
43
|
+
end
|
44
|
+
|
45
|
+
def description
|
46
|
+
"#{event.fetch("type")} upsert"
|
47
|
+
end
|
48
|
+
|
49
|
+
def versioned?
|
50
|
+
true
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
def metadata
|
56
|
+
@metadata ||= {
|
57
|
+
_index: destination_index_def.index_name_for_writes(prepared_record),
|
58
|
+
_id: doc_id,
|
59
|
+
version: event.fetch("version"),
|
60
|
+
version_type: "external",
|
61
|
+
routing: destination_index_def.routing_value_for_prepared_record(prepared_record)
|
62
|
+
}.compact
|
63
|
+
end
|
64
|
+
|
65
|
+
def prepared_record
|
66
|
+
@prepared_record ||= record_preparer.prepare_for_index(event.fetch("type"), event.fetch("record"))
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
require "elastic_graph/indexer/event_id"
|
11
|
+
require "elastic_graph/indexer/indexing_failures_error"
|
12
|
+
require "time"
|
13
|
+
|
14
|
+
module ElasticGraph
|
15
|
+
class Indexer
|
16
|
+
class Processor
|
17
|
+
def initialize(
|
18
|
+
datastore_router:,
|
19
|
+
operation_factory:,
|
20
|
+
logger:,
|
21
|
+
indexing_latency_slo_thresholds_by_timestamp_in_ms:,
|
22
|
+
clock: ::Time
|
23
|
+
)
|
24
|
+
@datastore_router = datastore_router
|
25
|
+
@operation_factory = operation_factory
|
26
|
+
@clock = clock
|
27
|
+
@logger = logger
|
28
|
+
@indexing_latency_slo_thresholds_by_timestamp_in_ms = indexing_latency_slo_thresholds_by_timestamp_in_ms
|
29
|
+
end
|
30
|
+
|
31
|
+
# Processes the given events, writing them to the datastore. If any events are invalid, an
|
32
|
+
# exception will be raised indicating why the events were invalid, but the valid events will
|
33
|
+
# still be written to the datastore. No attempt is made to provide atomic "all or nothing"
|
34
|
+
# behavior.
|
35
|
+
def process(events, refresh_indices: false)
|
36
|
+
failures = process_returning_failures(events, refresh_indices: refresh_indices)
|
37
|
+
return if failures.empty?
|
38
|
+
raise IndexingFailuresError.for(failures: failures, events: events)
|
39
|
+
end
|
40
|
+
|
41
|
+
# Like `process`, but returns failures instead of raising an exception.
|
42
|
+
# The caller is responsible for handling the failures.
|
43
|
+
def process_returning_failures(events, refresh_indices: false)
|
44
|
+
factory_results_by_event = events.to_h { |event| [event, @operation_factory.build(event)] }
|
45
|
+
|
46
|
+
factory_results = factory_results_by_event.values
|
47
|
+
|
48
|
+
bulk_result = @datastore_router.bulk(factory_results.flat_map(&:operations), refresh: refresh_indices)
|
49
|
+
successful_operations = bulk_result.successful_operations(check_failures: false)
|
50
|
+
|
51
|
+
calculate_latency_metrics(successful_operations, bulk_result.noop_results)
|
52
|
+
|
53
|
+
all_failures =
|
54
|
+
factory_results.map(&:failed_event_error).compact +
|
55
|
+
bulk_result.failure_results.map do |result|
|
56
|
+
all_operations_for_event = factory_results_by_event.fetch(result.event).operations
|
57
|
+
FailedEventError.from_failed_operation_result(result, all_operations_for_event.to_set)
|
58
|
+
end
|
59
|
+
|
60
|
+
categorize_failures(all_failures, events)
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
|
65
|
+
def categorize_failures(failures, events)
|
66
|
+
source_event_versions_by_cluster_by_op = @datastore_router.source_event_versions_in_index(
|
67
|
+
failures.flat_map { |f| f.versioned_operations.to_a }
|
68
|
+
)
|
69
|
+
|
70
|
+
superseded_failures, outstanding_failures = failures.partition do |failure|
|
71
|
+
failure.versioned_operations.size > 0 && failure.versioned_operations.all? do |op|
|
72
|
+
# Under normal conditions, we expect to get back only one version per operation per cluster.
|
73
|
+
# However, when a field used for routing or index rollover has mutated, we can wind up with
|
74
|
+
# multiple copies of the document in different indexes or shards. `source_event_versions_in_index`
|
75
|
+
# returns a list of found versions.
|
76
|
+
#
|
77
|
+
# We only need to consider the largest version when deciding if a failure has been supeseded or not.
|
78
|
+
# An event with a larger version is considered to be a full replacement for an earlier event for the
|
79
|
+
# same entity, so if we've processed an event for the same entity with a larger version, we can consider
|
80
|
+
# the failure superseded.
|
81
|
+
max_version_per_cluster = source_event_versions_by_cluster_by_op.fetch(op).values.map(&:max)
|
82
|
+
|
83
|
+
# We only consider an event to be superseded if the document version in the datastore
|
84
|
+
# for all its versioned operations is greater than the version of the failing event.
|
85
|
+
max_version_per_cluster.all? { |v| v && v > failure.version }
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
if superseded_failures.any?
|
90
|
+
superseded_ids = superseded_failures.map { |f| EventID.from_event(f.event).to_s }
|
91
|
+
@logger.warn(
|
92
|
+
"Ignoring #{superseded_ids.size} malformed event(s) because they have been superseded " \
|
93
|
+
"by corrected events targeting the same id: #{superseded_ids.join(", ")}."
|
94
|
+
)
|
95
|
+
end
|
96
|
+
|
97
|
+
outstanding_failures
|
98
|
+
end
|
99
|
+
|
100
|
+
def calculate_latency_metrics(successful_operations, noop_results)
|
101
|
+
current_time = @clock.now
|
102
|
+
successful_events = successful_operations.map(&:event).to_set
|
103
|
+
noop_events = noop_results.map(&:event).to_set
|
104
|
+
all_operations_events = successful_events + noop_events
|
105
|
+
|
106
|
+
all_operations_events.each do |event|
|
107
|
+
latencies_in_ms_from = {} # : Hash[String, Integer]
|
108
|
+
slo_results = {} # : Hash[String, String]
|
109
|
+
|
110
|
+
latency_timestamps = event.fetch("latency_timestamps", _ = {})
|
111
|
+
latency_timestamps.each do |ts_name, ts_value|
|
112
|
+
metric_value = ((current_time - Time.iso8601(ts_value)) * 1000).round
|
113
|
+
|
114
|
+
latencies_in_ms_from[ts_name] = metric_value
|
115
|
+
|
116
|
+
if (threshold = @indexing_latency_slo_thresholds_by_timestamp_in_ms[ts_name])
|
117
|
+
slo_results[ts_name] = (metric_value >= threshold) ? "bad" : "good"
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
result = successful_events.include?(event) ? "success" : "noop"
|
122
|
+
|
123
|
+
@logger.info({
|
124
|
+
"message_type" => "ElasticGraphIndexingLatencies",
|
125
|
+
"message_id" => event["message_id"],
|
126
|
+
"event_type" => event.fetch("type"),
|
127
|
+
"event_id" => EventID.from_event(event).to_s,
|
128
|
+
JSON_SCHEMA_VERSION_KEY => event.fetch(JSON_SCHEMA_VERSION_KEY),
|
129
|
+
"latencies_in_ms_from" => latencies_in_ms_from,
|
130
|
+
"slo_results" => slo_results,
|
131
|
+
"result" => result
|
132
|
+
})
|
133
|
+
end
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class Indexer
|
13
|
+
class RecordPreparer
|
14
|
+
# Provides the ability to get a `RecordPreparer` for a specific JSON schema version.
|
15
|
+
class Factory
|
16
|
+
def initialize(schema_artifacts)
|
17
|
+
@schema_artifacts = schema_artifacts
|
18
|
+
|
19
|
+
scalar_types_by_name = schema_artifacts.runtime_metadata.scalar_types_by_name
|
20
|
+
indexing_preparer_by_scalar_type_name = ::Hash.new do |hash, type_name|
|
21
|
+
hash[type_name] = scalar_types_by_name[type_name]&.load_indexing_preparer&.extension_class
|
22
|
+
end
|
23
|
+
|
24
|
+
@preparers_by_json_schema_version = ::Hash.new do |hash, version|
|
25
|
+
hash[version] = RecordPreparer.new(
|
26
|
+
indexing_preparer_by_scalar_type_name,
|
27
|
+
build_type_metas_from(@schema_artifacts.json_schemas_for(version))
|
28
|
+
)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Gets the `RecordPreparer` for the given JSON schema version.
|
33
|
+
def for_json_schema_version(json_schema_version)
|
34
|
+
@preparers_by_json_schema_version[json_schema_version]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Gets the `RecordPreparer` for the latest JSON schema version. Intended primarily
|
38
|
+
# for use in tests for convenience.
|
39
|
+
def for_latest_json_schema_version
|
40
|
+
for_json_schema_version(@schema_artifacts.latest_json_schema_version)
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
|
45
|
+
def build_type_metas_from(json_schemas)
|
46
|
+
json_schemas.fetch("$defs").filter_map do |type, type_def|
|
47
|
+
next if type == EVENT_ENVELOPE_JSON_SCHEMA_NAME
|
48
|
+
|
49
|
+
properties = type_def.fetch("properties") do
|
50
|
+
{} # : ::Hash[::String, untyped]
|
51
|
+
end # : ::Hash[::String, untyped]
|
52
|
+
|
53
|
+
required_fields = type_def.fetch("required") do
|
54
|
+
[] # : ::Array[::String]
|
55
|
+
end # : ::Array[::String]
|
56
|
+
|
57
|
+
eg_meta_by_field_name = properties.filter_map do |prop_name, prop|
|
58
|
+
eg_meta = prop["ElasticGraph"]
|
59
|
+
[prop_name, eg_meta] if eg_meta
|
60
|
+
end.to_h
|
61
|
+
|
62
|
+
TypeMetadata.new(
|
63
|
+
name: type,
|
64
|
+
requires_typename: required_fields.include?("__typename"),
|
65
|
+
eg_meta_by_field_name: eg_meta_by_field_name
|
66
|
+
)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# An alternate `RecordPreparer` implementation that implements the identity function:
|
72
|
+
# it just echoes back the record it is given.
|
73
|
+
#
|
74
|
+
# This is intended only for use where a `RecordPreparer` is required but the data is not
|
75
|
+
# ultimately going to be sent to the datastore. For example, when an event is invalid, we
|
76
|
+
# still build operations for it, and the operations require a `RecordPreparer`, but we do
|
77
|
+
# not send them to the datastore.
|
78
|
+
module Identity
|
79
|
+
def self.prepare_for_index(type_name, record)
|
80
|
+
record
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def initialize(indexing_preparer_by_scalar_type_name, type_metas)
|
85
|
+
@indexing_preparer_by_scalar_type_name = indexing_preparer_by_scalar_type_name
|
86
|
+
@eg_meta_by_field_name_by_concrete_type = type_metas.to_h do |meta|
|
87
|
+
[meta.name, meta.eg_meta_by_field_name]
|
88
|
+
end
|
89
|
+
|
90
|
+
@types_requiring_typename = type_metas.filter_map do |meta|
|
91
|
+
meta.name if meta.requires_typename
|
92
|
+
end.to_set
|
93
|
+
end
|
94
|
+
|
95
|
+
# Prepares the given payload for being indexed into the named index.
|
96
|
+
# This allows any value or field name conversion to happen before we index
|
97
|
+
# the data, to support the few cases where we expect differences between
|
98
|
+
# the payload received by the ElasticGraph indexer, and the payload we
|
99
|
+
# send to the datastore.
|
100
|
+
#
|
101
|
+
# As part of preparing the data, we also drop any `record` fields that
|
102
|
+
# are not defined in our schema. This allows us to handle events that target
|
103
|
+
# multiple indices (e.g. v1 and v2) for the same type. The event can contain
|
104
|
+
# the set union of fields and this will take care of dropping any unsupported
|
105
|
+
# fields before we attempt to index the record.
|
106
|
+
#
|
107
|
+
# Note: this method does not mutate the given `record`. Instead it returns a
|
108
|
+
# copy with any updates applied to it.
|
109
|
+
def prepare_for_index(type_name, record)
|
110
|
+
prepare_value_for_indexing(record, type_name)
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def prepare_value_for_indexing(value, type_name)
|
116
|
+
type_name = type_name.delete_suffix("!")
|
117
|
+
|
118
|
+
return nil if value.nil?
|
119
|
+
|
120
|
+
if (preparer = @indexing_preparer_by_scalar_type_name[type_name])
|
121
|
+
return (_ = preparer).prepare_for_indexing(value)
|
122
|
+
end
|
123
|
+
|
124
|
+
case value
|
125
|
+
when ::Array
|
126
|
+
element_type_name = type_name.delete_prefix("[").delete_suffix("]")
|
127
|
+
value.map { |v| prepare_value_for_indexing(v, element_type_name) }
|
128
|
+
when ::Hash
|
129
|
+
# `@eg_meta_by_field_name_by_concrete_type` does not have abstract types in it (e.g. type unions).
|
130
|
+
# Instead, it'll have each concrete subtype in it.
|
131
|
+
#
|
132
|
+
# If `type_name` is an abstract type, we need to look at the `__typename` field to see
|
133
|
+
# what the concrete subtype is. `__typename` is required on abstract types and indicates that.
|
134
|
+
eg_meta_by_field_name = @eg_meta_by_field_name_by_concrete_type.fetch(value["__typename"] || type_name)
|
135
|
+
|
136
|
+
value.filter_map do |field_name, field_value|
|
137
|
+
if field_name == "__typename"
|
138
|
+
# We only want to include __typename if it we're dealing with a type that requires it.
|
139
|
+
# (This is the case for an abstract type, so it can differentiate between which subtype we have
|
140
|
+
[field_name, field_value] if @types_requiring_typename.include?(type_name)
|
141
|
+
elsif (eg_meta = eg_meta_by_field_name[field_name])
|
142
|
+
[eg_meta.fetch("nameInIndex"), prepare_value_for_indexing(field_value, eg_meta.fetch("type"))]
|
143
|
+
end
|
144
|
+
end.to_h
|
145
|
+
else
|
146
|
+
# We won't have a registered preparer for enum types, since those aren't dumped in
|
147
|
+
# runtime metadata `scalar_types_by_name`, and we can just return the value as-is in
|
148
|
+
# this case.
|
149
|
+
value
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
TypeMetadata = ::Data.define(
|
154
|
+
# The name of the type this metadata object is for.
|
155
|
+
:name,
|
156
|
+
# Indicates if this type requires a `__typename` field.
|
157
|
+
:requires_typename,
|
158
|
+
# The per-field ElasticGraph metadata, keyed by field name.
|
159
|
+
:eg_meta_by_field_name
|
160
|
+
)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "json"
|
10
|
+
|
11
|
+
# Defines an RSpec matcher that can be used to validate ElasticGraph events.
|
12
|
+
::RSpec::Matchers.define :be_a_valid_elastic_graph_event do |for_indexer:|
|
13
|
+
match do |event|
|
14
|
+
result = for_indexer
|
15
|
+
.operation_factory
|
16
|
+
.with(configure_record_validator: block_arg)
|
17
|
+
.build(event)
|
18
|
+
|
19
|
+
@validation_failure = result.failed_event_error
|
20
|
+
!@validation_failure
|
21
|
+
end
|
22
|
+
|
23
|
+
description do
|
24
|
+
"be a valid ElasticGraph event"
|
25
|
+
end
|
26
|
+
|
27
|
+
failure_message do |event|
|
28
|
+
<<~EOS
|
29
|
+
expected the event[1] to #{description}, but it was invalid[2].
|
30
|
+
|
31
|
+
[1] #{::JSON.pretty_generate(event)}
|
32
|
+
|
33
|
+
[2] #{@validation_failure.message}
|
34
|
+
EOS
|
35
|
+
end
|
36
|
+
|
37
|
+
failure_message_when_negated do |event|
|
38
|
+
<<~EOS
|
39
|
+
expected the event[1] not to #{description}, but it was valid.
|
40
|
+
|
41
|
+
[1] #{::JSON.pretty_generate(event)}
|
42
|
+
EOS
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/support/hash_util"
|
11
|
+
require "json"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class Indexer
|
15
|
+
module TestSupport
|
16
|
+
module Converters
|
17
|
+
# Helper method for testing and generating fake data to convert a factory record into an event
|
18
|
+
def self.upsert_event_for(record)
|
19
|
+
{
|
20
|
+
"op" => "upsert",
|
21
|
+
"id" => record.fetch("id"),
|
22
|
+
"type" => record.fetch("__typename"),
|
23
|
+
"version" => record.fetch("__version"),
|
24
|
+
"record" => record.except("__typename", "__version", "__json_schema_version"),
|
25
|
+
JSON_SCHEMA_VERSION_KEY => record.fetch("__json_schema_version")
|
26
|
+
}
|
27
|
+
end
|
28
|
+
|
29
|
+
# Helper method to create an array of events given an array of records
|
30
|
+
def self.upsert_events_for_records(records)
|
31
|
+
records.map { |record| upsert_event_for(Support::HashUtil.stringify_keys(record)) }
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|