elasticgraph-schema_definition 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +7 -0
- data/elasticgraph-schema_definition.gemspec +26 -0
- data/lib/elastic_graph/schema_definition/api.rb +359 -0
- data/lib/elastic_graph/schema_definition/factory.rb +506 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
- data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
- data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
- data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
- data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
- data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
- data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
- data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
- data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
- data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
- data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
- data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
- data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
- data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
- data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
- data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
- data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
- data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
- data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
- data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
- data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
- data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
- data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
- data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
- data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
- data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
- data/lib/elastic_graph/schema_definition/results.rb +404 -0
- data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
- data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
- data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
- data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
- data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
- data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
- data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
- data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
- data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
- data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
- data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
- data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
- data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
- data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
- data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
- data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
- data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
- data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
- data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
- data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
- data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
- data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
- data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
- data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
- data/lib/elastic_graph/schema_definition/state.rb +212 -0
- data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
- metadata +513 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
module SchemaDefinition
|
11
|
+
module Indexing
|
12
|
+
module DerivedFields
|
13
|
+
# Responsible for providing bits of the painless script specific to a {DerivedIndexedType#immutable_value} field.
|
14
|
+
#
|
15
|
+
# @api private
|
16
|
+
class ImmutableValue < ::Data.define(:destination_field, :source_field, :nullable, :can_change_from_null)
|
17
|
+
# `Data.define` provides the following methods:
|
18
|
+
# @dynamic destination_field, source_field
|
19
|
+
|
20
|
+
# @return [String] a line of painless code to manage an immutable value field and return a boolean indicating if it was updated.
|
21
|
+
def apply_operation_returning_update_status
|
22
|
+
*parent_parts, field = destination_field.split(".")
|
23
|
+
parent_parts = ["ctx", "_source"] + parent_parts
|
24
|
+
|
25
|
+
%{immutableValue_idempotentlyUpdateValue(scriptErrors, data["#{source_field}"], #{parent_parts.join(".")}, "#{destination_field}", "#{field}", #{nullable}, #{can_change_from_null})}
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
|
29
|
+
def setup_statements
|
30
|
+
FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Array<String>] painless functions required by `immutable_value`.
|
34
|
+
def function_definitions
|
35
|
+
[IDEMPOTENTLY_SET_VALUE]
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# Painless function which manages an `immutable_value` field.
|
41
|
+
IDEMPOTENTLY_SET_VALUE = <<~EOS
|
42
|
+
boolean immutableValue_idempotentlyUpdateValue(List scriptErrors, List values, def parentObject, String fullPath, String fieldName, boolean nullable, boolean canChangeFromNull) {
|
43
|
+
boolean fieldAlreadySet = parentObject.containsKey(fieldName);
|
44
|
+
|
45
|
+
// `values` is always passed to us as a `List` (the indexer normalizes to a list, wrapping single
|
46
|
+
// values in a list as needed) but we only ever expect at most 1 element.
|
47
|
+
def newValueCandidate = values.isEmpty() ? null : values[0];
|
48
|
+
|
49
|
+
if (fieldAlreadySet) {
|
50
|
+
def currentValue = parentObject[fieldName];
|
51
|
+
|
52
|
+
// Usually we do not allow `immutable_value` fields to ever change values. However, we make
|
53
|
+
// a special case for `null`, but only when `can_change_from_null: true` has been configured.
|
54
|
+
// This can be important when deriving a field that has not always existed on the source events.
|
55
|
+
// On early events, the value may be `null`, and, when this is enabled, we do not want that to
|
56
|
+
// interfere with our ability to set the value to the correct non-null value based on a different
|
57
|
+
// event which has a value for the source field.
|
58
|
+
if (canChangeFromNull) {
|
59
|
+
if (currentValue == null) {
|
60
|
+
parentObject[fieldName] = newValueCandidate;
|
61
|
+
return true;
|
62
|
+
}
|
63
|
+
|
64
|
+
// When `can_change_from_null: true` is enabled we also need to ignore NEW `null` values that we
|
65
|
+
// see _after_ a non-null value. This is necessary because an ElasticGraph invariant is that events
|
66
|
+
// can be processed in any order. So we might process an old event (predating the existence of the
|
67
|
+
// source field) after we've already set the field to a non-null value. We must always "converge"
|
68
|
+
// on the same indexed state regardless, of the order events are seen, so here we just ignore it.
|
69
|
+
if (newValueCandidate == null) {
|
70
|
+
return false;
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
// Otherwise, if the values differ, it means we are attempting to mutate the immutable value field, which we cannot allow.
|
75
|
+
if (currentValue != newValueCandidate) {
|
76
|
+
if (currentValue == null) {
|
77
|
+
scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + "). Set `can_change_from_null: true` on the `immutable_value` definition to allow this.");
|
78
|
+
} else {
|
79
|
+
scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + ").");
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
return false;
|
84
|
+
}
|
85
|
+
|
86
|
+
if (newValueCandidate == null && !nullable) {
|
87
|
+
scriptErrors.add("Field `" + fullPath + "` cannot be set to `null`, but the source event contains no value for it. Remove `nullable: false` from the `immutable_value` definition to allow this.");
|
88
|
+
return false;
|
89
|
+
}
|
90
|
+
|
91
|
+
parentObject[fieldName] = newValueCandidate;
|
92
|
+
return true;
|
93
|
+
}
|
94
|
+
EOS
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
module SchemaDefinition
|
11
|
+
module Indexing
|
12
|
+
module DerivedFields
|
13
|
+
# Responsible for providing bits of the painless script specific to a {DerivedIndexedType#min_value} or
|
14
|
+
# {DerivedIndexedType#max_value} field.
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
class MinOrMaxValue < ::Data.define(:destination_field, :source_field, :min_or_max)
|
18
|
+
# `Data.define` provides the following methods:
|
19
|
+
# @dynamic destination_field, source_field, min_or_max
|
20
|
+
|
21
|
+
# @return [String] a line of painless code to manage a min or max value field and return a boolean indicating if it was updated.
|
22
|
+
def apply_operation_returning_update_status
|
23
|
+
*parent_parts, field = destination_field.split(".")
|
24
|
+
parent_parts = ["ctx", "_source"] + parent_parts
|
25
|
+
|
26
|
+
%{#{min_or_max}Value_idempotentlyUpdateValue(data["#{source_field}"], #{parent_parts.join(".")}, "#{field}")}
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
|
30
|
+
def setup_statements
|
31
|
+
FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [Array<String>] painless functions required by a min or max value field.
|
35
|
+
def function_definitions
|
36
|
+
[MinOrMaxValue.function_def(min_or_max)]
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param min_or_max [:min, :max] which type of function to generate.
|
40
|
+
# @return [String] painless function for managing a min or max field.
|
41
|
+
def self.function_def(min_or_max)
|
42
|
+
operator = (min_or_max == :min) ? "<" : ">"
|
43
|
+
|
44
|
+
<<~EOS
|
45
|
+
boolean #{min_or_max}Value_idempotentlyUpdateValue(List values, def parentObject, String fieldName) {
|
46
|
+
def currentFieldValue = parentObject[fieldName];
|
47
|
+
def #{min_or_max}NewValue = values.isEmpty() ? null : Collections.#{min_or_max}(values);
|
48
|
+
|
49
|
+
if (currentFieldValue == null || (#{min_or_max}NewValue != null && #{min_or_max}NewValue.compareTo(currentFieldValue) #{operator} 0)) {
|
50
|
+
parentObject[fieldName] = #{min_or_max}NewValue;
|
51
|
+
return true;
|
52
|
+
}
|
53
|
+
|
54
|
+
return false;
|
55
|
+
}
|
56
|
+
EOS
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/schema_artifacts/runtime_metadata/update_target"
|
10
|
+
require "elastic_graph/schema_definition/indexing/derived_fields/append_only_set"
|
11
|
+
require "elastic_graph/schema_definition/indexing/derived_fields/immutable_value"
|
12
|
+
require "elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value"
|
13
|
+
require "elastic_graph/schema_definition/scripting/script"
|
14
|
+
|
15
|
+
module ElasticGraph
|
16
|
+
module SchemaDefinition
|
17
|
+
module Indexing
|
18
|
+
# Used to configure the derivation of a derived indexed type from a source type.
|
19
|
+
# This type is yielded from {Mixins::HasIndices#derive_indexed_type_fields}.
|
20
|
+
#
|
21
|
+
# @example Derive a `Course` type from `StudentCourseEnrollment` events
|
22
|
+
# ElasticGraph.define_schema do |schema|
|
23
|
+
# # `StudentCourseEnrollment` is a directly indexed type.
|
24
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
25
|
+
# t.field "id", "ID"
|
26
|
+
# t.field "courseId", "ID"
|
27
|
+
# t.field "courseName", "String"
|
28
|
+
# t.field "studentName", "String"
|
29
|
+
# t.field "courseStartDate", "Date"
|
30
|
+
#
|
31
|
+
# t.index "student_course_enrollments"
|
32
|
+
#
|
33
|
+
# # Here we define how the `Course` indexed type is derived when we index `StudentCourseEnrollment` events.
|
34
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
35
|
+
# # `derive` is an instance of `DerivedIndexedType`.
|
36
|
+
# derive.immutable_value "name", from: "courseName"
|
37
|
+
# derive.append_only_set "students", from: "studentName"
|
38
|
+
# derive.min_value "firstOfferedDate", from: "courseStartDate"
|
39
|
+
# derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
|
40
|
+
# end
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# # `Course` is an indexed type that is derived entirely from `StudentCourseEnrollment` events.
|
44
|
+
# schema.object_type "Course" do |t|
|
45
|
+
# t.field "id", "ID"
|
46
|
+
# t.field "name", "String"
|
47
|
+
# t.field "students", "[String!]!"
|
48
|
+
# t.field "firstOfferedDate", "Date"
|
49
|
+
# t.field "mostRecentlyOfferedDate", "Date"
|
50
|
+
#
|
51
|
+
# t.index "courses"
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# @!attribute source_type
|
56
|
+
# @return [SchemaElements::ObjectType] the type used as a source for this derive type
|
57
|
+
# @!attribute destination_type_ref
|
58
|
+
# @private
|
59
|
+
# @!attribute id_source
|
60
|
+
# @return [String] path to field on the source type used as `id` on the derived type
|
61
|
+
# @!attribute routing_value_source
|
62
|
+
# @return [String, nil] path to field on the source type used for shard routing
|
63
|
+
# @!attribute rollover_timestamp_value_source
|
64
|
+
# @return [String, nil] path to field on the source type used as the timestamp field for rollover
|
65
|
+
# @!attribute fields
|
66
|
+
# @return [Array<DerivedFields::AppendOnlySet, DerivedFields::ImmutableValue, DerivedFields::MinOrMaxValue>] derived field definitions
|
67
|
+
class DerivedIndexedType < ::Struct.new(
|
68
|
+
:source_type,
|
69
|
+
:destination_type_ref,
|
70
|
+
:id_source,
|
71
|
+
:routing_value_source,
|
72
|
+
:rollover_timestamp_value_source,
|
73
|
+
:fields
|
74
|
+
)
|
75
|
+
# @param source_type [SchemaElements::ObjectType] the type used as a source for this derive type
|
76
|
+
# @param destination_type_ref [SchemaElements::TypeReference] the derived type
|
77
|
+
# @param id_source [String] path to field on the source type used as `id` on the derived type
|
78
|
+
# @param routing_value_source [String, nil] path to field on the source type used for shard routing
|
79
|
+
# @param rollover_timestamp_value_source [String, nil] path to field on the source type used as the timestamp field for rollover
|
80
|
+
# @yield [DerivedIndexedType] the `DerivedIndexedType` instance
|
81
|
+
# @api private
|
82
|
+
def initialize(
|
83
|
+
source_type:,
|
84
|
+
destination_type_ref:,
|
85
|
+
id_source:,
|
86
|
+
routing_value_source:,
|
87
|
+
rollover_timestamp_value_source:
|
88
|
+
)
|
89
|
+
fields = [] # : ::Array[_DerivedField]
|
90
|
+
super(
|
91
|
+
source_type: source_type,
|
92
|
+
destination_type_ref: destination_type_ref,
|
93
|
+
id_source: id_source,
|
94
|
+
routing_value_source: routing_value_source,
|
95
|
+
rollover_timestamp_value_source: rollover_timestamp_value_source,
|
96
|
+
fields: fields
|
97
|
+
)
|
98
|
+
yield self
|
99
|
+
end
|
100
|
+
|
101
|
+
# Configures `field_name` (on the derived indexing type) to contain the set union of all values from
|
102
|
+
# the `from` field on the source type. Values are only ever appended to the set, so the field will
|
103
|
+
# act as an append-only set.
|
104
|
+
#
|
105
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived set
|
106
|
+
# @param from [String] path to field on the source type to source values from
|
107
|
+
# @return [DerivedIndexedType::AppendOnlySet]
|
108
|
+
#
|
109
|
+
# @example
|
110
|
+
# ElasticGraph.define_schema do |schema|
|
111
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
112
|
+
# t.field "id", "ID"
|
113
|
+
# t.field "courseId", "ID"
|
114
|
+
# t.field "studentName", "String"
|
115
|
+
#
|
116
|
+
# t.index "student_course_enrollments"
|
117
|
+
#
|
118
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
119
|
+
# derive.append_only_set "students", from: "studentName"
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
#
|
123
|
+
# schema.object_type "Course" do |t|
|
124
|
+
# t.field "id", "ID"
|
125
|
+
# t.field "students", "[String!]!"
|
126
|
+
#
|
127
|
+
# t.index "courses"
|
128
|
+
# end
|
129
|
+
# end
|
130
|
+
def append_only_set(field_name, from:)
|
131
|
+
fields << DerivedFields::AppendOnlySet.new(field_name, from)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Configures `field_name` (on the derived indexing type) to contain a single immutable value from the
|
135
|
+
# `from` field on the source type. Immutability is enforced by triggering an indexing failure with a
|
136
|
+
# clear error if any event's source value is different from the value already indexed on this field.
|
137
|
+
#
|
138
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived value
|
139
|
+
# @param from [String] path to field on the source type to source values from
|
140
|
+
# @param nullable [Boolean] whether the field is allowed to be set to `null`. When set to false, events
|
141
|
+
# that contain a `null` value in the `from` field will be rejected instead of setting the field’s value
|
142
|
+
# to `null`.
|
143
|
+
# @param can_change_from_null [Boolean] whether a one-time mutation of the field value is allowed from
|
144
|
+
# `null` to a non-`null` value. This can be useful when dealing with a field that may not have a value
|
145
|
+
# on all source events. For example, if the source field was not initially part of the schema of your
|
146
|
+
# source dataset, you may have old records that lack a value for this field. When set, this option
|
147
|
+
# allows a one-time mutation of the field value from `null` to a non-`null` value. Once set to a
|
148
|
+
# non-`null` value, any additional `null` values that are encountered will be ignored (ensuring that
|
149
|
+
# the indexed data converges on the same state regardless of the order the events are ingested in).
|
150
|
+
# Note: this option cannot be enabled when `nullable: false` has been set.
|
151
|
+
# @return [DerivedFields::ImmutableValue]
|
152
|
+
#
|
153
|
+
# @example
|
154
|
+
# ElasticGraph.define_schema do |schema|
|
155
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
156
|
+
# t.field "id", "ID"
|
157
|
+
# t.field "courseId", "ID"
|
158
|
+
# t.field "courseName", "String"
|
159
|
+
#
|
160
|
+
# t.index "student_course_enrollments"
|
161
|
+
#
|
162
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
163
|
+
# derive.immutable_value "name", from: "courseName"
|
164
|
+
# end
|
165
|
+
# end
|
166
|
+
#
|
167
|
+
# schema.object_type "Course" do |t|
|
168
|
+
# t.field "id", "ID"
|
169
|
+
# t.field "name", "String"
|
170
|
+
#
|
171
|
+
# t.index "courses"
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
def immutable_value(field_name, from:, nullable: true, can_change_from_null: false)
|
175
|
+
if !nullable && can_change_from_null
|
176
|
+
raise SchemaError, "`can_change_from_null: true` is not allowed with `nullable: false` (as there would be no `null` values to change from)."
|
177
|
+
end
|
178
|
+
|
179
|
+
fields << DerivedFields::ImmutableValue.new(
|
180
|
+
destination_field: field_name,
|
181
|
+
source_field: from,
|
182
|
+
nullable: nullable,
|
183
|
+
can_change_from_null: can_change_from_null
|
184
|
+
)
|
185
|
+
end
|
186
|
+
|
187
|
+
# Configures `field_name` (on the derived indexing type) to contain the minimum of all values from the `from`
|
188
|
+
# field on the source type.
|
189
|
+
#
|
190
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived value
|
191
|
+
# @param from [String] path to field on the source type to source values from
|
192
|
+
# @return [DerivedIndexedType::MinOrMaxValue]
|
193
|
+
#
|
194
|
+
# @example
|
195
|
+
# ElasticGraph.define_schema do |schema|
|
196
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
197
|
+
# t.field "id", "ID"
|
198
|
+
# t.field "courseId", "ID"
|
199
|
+
# t.field "courseStartDate", "Date"
|
200
|
+
#
|
201
|
+
# t.index "student_course_enrollments"
|
202
|
+
#
|
203
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
204
|
+
# derive.min_value "firstOfferedDate", from: "courseStartDate"
|
205
|
+
# end
|
206
|
+
# end
|
207
|
+
#
|
208
|
+
# schema.object_type "Course" do |t|
|
209
|
+
# t.field "id", "ID"
|
210
|
+
# t.field "firstOfferedDate", "Date"
|
211
|
+
#
|
212
|
+
# t.index "courses"
|
213
|
+
# end
|
214
|
+
# end
|
215
|
+
def min_value(field_name, from:)
|
216
|
+
fields << DerivedFields::MinOrMaxValue.new(field_name, from, :min)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Configures `field_name` (on the derived indexing type) to contain the maximum of all values from the `from`
|
220
|
+
# field on the source type.
|
221
|
+
#
|
222
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived value
|
223
|
+
# @param from [String] path to field on the source type to source values from
|
224
|
+
# @return [DerivedIndexedType::MinOrMaxValue]
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# ElasticGraph.define_schema do |schema|
|
228
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
229
|
+
# t.field "id", "ID"
|
230
|
+
# t.field "courseId", "ID"
|
231
|
+
# t.field "courseStartDate", "Date"
|
232
|
+
#
|
233
|
+
# t.index "student_course_enrollments"
|
234
|
+
#
|
235
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
236
|
+
# derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
|
237
|
+
# end
|
238
|
+
# end
|
239
|
+
#
|
240
|
+
# schema.object_type "Course" do |t|
|
241
|
+
# t.field "id", "ID"
|
242
|
+
# t.field "mostRecentlyOfferedDate", "Date"
|
243
|
+
#
|
244
|
+
# t.index "courses"
|
245
|
+
# end
|
246
|
+
# end
|
247
|
+
def max_value(field_name, from:)
|
248
|
+
fields << DerivedFields::MinOrMaxValue.new(field_name, from, :max)
|
249
|
+
end
|
250
|
+
|
251
|
+
# @return [Scripting::Script] Painless script that will maintain the derived fields
|
252
|
+
# @api private
|
253
|
+
def painless_script
|
254
|
+
Scripting::Script.new(
|
255
|
+
source: generate_script.strip,
|
256
|
+
name: "#{destination_type_ref}_from_#{source_type.name}",
|
257
|
+
language: "painless",
|
258
|
+
context: "update"
|
259
|
+
)
|
260
|
+
end
|
261
|
+
|
262
|
+
# @return [SchemaArtifacts::RuntimeMetadata::UpdateTarget] runtime metadata for the source type
|
263
|
+
# @api private
|
264
|
+
def runtime_metadata_for_source_type
|
265
|
+
SchemaArtifacts::RuntimeMetadata::UpdateTarget.new(
|
266
|
+
type: destination_type_ref.name,
|
267
|
+
relationship: nil,
|
268
|
+
script_id: painless_script.id,
|
269
|
+
id_source: id_source,
|
270
|
+
routing_value_source: routing_value_source,
|
271
|
+
rollover_timestamp_value_source: rollover_timestamp_value_source,
|
272
|
+
metadata_params: {},
|
273
|
+
data_params: fields.map(&:source_field).to_h do |f|
|
274
|
+
[f, SchemaArtifacts::RuntimeMetadata::DynamicParam.new(source_path: f, cardinality: :many)]
|
275
|
+
end
|
276
|
+
)
|
277
|
+
end
|
278
|
+
|
279
|
+
private
|
280
|
+
|
281
|
+
def generate_script
|
282
|
+
if fields.empty?
|
283
|
+
raise SchemaError, "`derive_indexed_type_fields` definition for #{destination_type_ref} (from #{source_type.name}) " \
|
284
|
+
"has no derived field definitions."
|
285
|
+
end
|
286
|
+
|
287
|
+
sorted_fields = fields.sort_by(&:destination_field)
|
288
|
+
|
289
|
+
# We use `uniq` here to avoid re-doing the same setup multiple times, since multiple fields can sometimes
|
290
|
+
# need the same setup (such as initializing a common parent field to an empty map).
|
291
|
+
function_defs = sorted_fields.flat_map(&:function_definitions).uniq.map(&:strip).sort
|
292
|
+
|
293
|
+
setup_statements = [STATIC_SETUP_STATEMENTS] + sorted_fields.flat_map(&:setup_statements).uniq.map(&:strip)
|
294
|
+
|
295
|
+
apply_update_statements = sorted_fields.map { |f| apply_update_statement(f).strip }
|
296
|
+
|
297
|
+
# Note: comments in the script are effectively "free" since:
|
298
|
+
#
|
299
|
+
# - The compiler will strip them out.
|
300
|
+
# - We only send the script to the datastore once (when configuring the cluster), and later
|
301
|
+
# reference it only by id--so we don't pay for the larger payload on each indexing request.
|
302
|
+
<<~EOS
|
303
|
+
#{function_defs.join("\n\n")}
|
304
|
+
|
305
|
+
#{setup_statements.join("\n")}
|
306
|
+
|
307
|
+
#{apply_update_statements.join("\n")}
|
308
|
+
|
309
|
+
if (!#{SCRIPT_ERRORS_VAR}.isEmpty()) {
|
310
|
+
throw new IllegalArgumentException("#{DERIVED_INDEX_FAILURE_MESSAGE_PREAMBLE}: " + #{SCRIPT_ERRORS_VAR}.join(" "));
|
311
|
+
}
|
312
|
+
|
313
|
+
// For records with no new values to index, only skip the update if the document itself doesn't already exist.
|
314
|
+
// Otherwise create an (empty) document to reflect the fact that the id has been seen.
|
315
|
+
if (ctx._source.id != null && #{sorted_fields.map { |f| was_noop_variable(f) }.join(" && ")}) {
|
316
|
+
ctx.op = 'none';
|
317
|
+
} else {
|
318
|
+
// Here we set `_source.id` because if we don't, it'll never be set, making these docs subtly
|
319
|
+
// different from docs indexed the normal way.
|
320
|
+
//
|
321
|
+
// Note also that we MUST use `params.id` instead of `ctx._id`. The latter works on an update
|
322
|
+
// of an existing document, but is unavailable when we are inserting the document for the first time.
|
323
|
+
ctx._source.id = params.id;
|
324
|
+
}
|
325
|
+
EOS
|
326
|
+
end
|
327
|
+
|
328
|
+
def apply_update_statement(field)
|
329
|
+
"boolean #{was_noop_variable(field)} = !#{field.apply_operation_returning_update_status};"
|
330
|
+
end
|
331
|
+
|
332
|
+
def was_noop_variable(field)
|
333
|
+
"#{field.destination_field.gsub(".", "__")}_was_noop"
|
334
|
+
end
|
335
|
+
|
336
|
+
SCRIPT_ERRORS_VAR = "scriptErrors"
|
337
|
+
|
338
|
+
STATIC_SETUP_STATEMENTS = <<~EOS.strip
|
339
|
+
Map data = params.data;
|
340
|
+
// A variable to accumulate script errors so that we can surface _all_ issues and not just the first.
|
341
|
+
List #{SCRIPT_ERRORS_VAR} = new ArrayList();
|
342
|
+
EOS
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
module SchemaDefinition
|
13
|
+
module Indexing
|
14
|
+
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
module EventEnvelope
|
18
|
+
# @param indexed_type_names [Array<String>] names of the indexed types
|
19
|
+
# @param json_schema_version [Integer] the version of the JSON schema
|
20
|
+
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
|
21
|
+
def self.json_schema(indexed_type_names, json_schema_version)
|
22
|
+
{
|
23
|
+
"type" => "object",
|
24
|
+
"properties" => {
|
25
|
+
"op" => {
|
26
|
+
"type" => "string",
|
27
|
+
"enum" => %w[upsert]
|
28
|
+
},
|
29
|
+
"type" => {
|
30
|
+
"type" => "string",
|
31
|
+
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
|
32
|
+
"enum" => indexed_type_names.sort
|
33
|
+
},
|
34
|
+
"id" => {
|
35
|
+
"type" => "string",
|
36
|
+
"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
|
37
|
+
},
|
38
|
+
"version" => {
|
39
|
+
"type" => "integer",
|
40
|
+
"minimum" => 0,
|
41
|
+
"maximum" => (2**63) - 1
|
42
|
+
},
|
43
|
+
"record" => {
|
44
|
+
"type" => "object"
|
45
|
+
},
|
46
|
+
"latency_timestamps" => {
|
47
|
+
"type" => "object",
|
48
|
+
"additionalProperties" => false,
|
49
|
+
"patternProperties" => {
|
50
|
+
"^\\w+_at$" => {"type" => "string", "format" => "date-time"}
|
51
|
+
}
|
52
|
+
},
|
53
|
+
JSON_SCHEMA_VERSION_KEY => {
|
54
|
+
"const" => json_schema_version
|
55
|
+
},
|
56
|
+
"message_id" => {
|
57
|
+
"type" => "string",
|
58
|
+
"description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer."
|
59
|
+
}
|
60
|
+
},
|
61
|
+
"additionalProperties" => false,
|
62
|
+
"required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
|
63
|
+
"if" => {
|
64
|
+
"properties" => {
|
65
|
+
"op" => {"const" => "upsert"}
|
66
|
+
}
|
67
|
+
},
|
68
|
+
"then" => {"required" => ["record"]}
|
69
|
+
}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|