elasticgraph-schema_definition 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +7 -0
- data/elasticgraph-schema_definition.gemspec +26 -0
- data/lib/elastic_graph/schema_definition/api.rb +359 -0
- data/lib/elastic_graph/schema_definition/factory.rb +506 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
- data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
- data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
- data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
- data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
- data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
- data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
- data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
- data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
- data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
- data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
- data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
- data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
- data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
- data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
- data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
- data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
- data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
- data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
- data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
- data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
- data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
- data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
- data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
- data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
- data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
- data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
- data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
- data/lib/elastic_graph/schema_definition/results.rb +404 -0
- data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
- data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
- data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
- data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
- data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
- data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
- data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
- data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
- data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
- data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
- data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
- data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
- data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
- data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
- data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
- data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
- data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
- data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
- data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
- data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
- data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
- data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
- data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
- data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
- data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
- data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
- data/lib/elastic_graph/schema_definition/state.rb +212 -0
- data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
- metadata +513 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
module SchemaDefinition
|
11
|
+
module Indexing
|
12
|
+
module DerivedFields
|
13
|
+
# Responsible for providing bits of the painless script specific to a {DerivedIndexedType#immutable_value} field.
|
14
|
+
#
|
15
|
+
# @api private
|
16
|
+
class ImmutableValue < ::Data.define(:destination_field, :source_field, :nullable, :can_change_from_null)
|
17
|
+
# `Data.define` provides the following methods:
|
18
|
+
# @dynamic destination_field, source_field
|
19
|
+
|
20
|
+
# @return [String] a line of painless code to manage an immutable value field and return a boolean indicating if it was updated.
|
21
|
+
def apply_operation_returning_update_status
|
22
|
+
*parent_parts, field = destination_field.split(".")
|
23
|
+
parent_parts = ["ctx", "_source"] + parent_parts
|
24
|
+
|
25
|
+
%{immutableValue_idempotentlyUpdateValue(scriptErrors, data["#{source_field}"], #{parent_parts.join(".")}, "#{destination_field}", "#{field}", #{nullable}, #{can_change_from_null})}
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
|
29
|
+
def setup_statements
|
30
|
+
FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
|
31
|
+
end
|
32
|
+
|
33
|
+
# @return [Array<String>] painless functions required by `immutable_value`.
|
34
|
+
def function_definitions
|
35
|
+
[IDEMPOTENTLY_SET_VALUE]
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
|
40
|
+
# Painless function which manages an `immutable_value` field.
|
41
|
+
IDEMPOTENTLY_SET_VALUE = <<~EOS
|
42
|
+
boolean immutableValue_idempotentlyUpdateValue(List scriptErrors, List values, def parentObject, String fullPath, String fieldName, boolean nullable, boolean canChangeFromNull) {
|
43
|
+
boolean fieldAlreadySet = parentObject.containsKey(fieldName);
|
44
|
+
|
45
|
+
// `values` is always passed to us as a `List` (the indexer normalizes to a list, wrapping single
|
46
|
+
// values in a list as needed) but we only ever expect at most 1 element.
|
47
|
+
def newValueCandidate = values.isEmpty() ? null : values[0];
|
48
|
+
|
49
|
+
if (fieldAlreadySet) {
|
50
|
+
def currentValue = parentObject[fieldName];
|
51
|
+
|
52
|
+
// Usually we do not allow `immutable_value` fields to ever change values. However, we make
|
53
|
+
// a special case for `null`, but only when `can_change_from_null: true` has been configured.
|
54
|
+
// This can be important when deriving a field that has not always existed on the source events.
|
55
|
+
// On early events, the value may be `null`, and, when this is enabled, we do not want that to
|
56
|
+
// interfere with our ability to set the value to the correct non-null value based on a different
|
57
|
+
// event which has a value for the source field.
|
58
|
+
if (canChangeFromNull) {
|
59
|
+
if (currentValue == null) {
|
60
|
+
parentObject[fieldName] = newValueCandidate;
|
61
|
+
return true;
|
62
|
+
}
|
63
|
+
|
64
|
+
// When `can_change_from_null: true` is enabled we also need to ignore NEW `null` values that we
|
65
|
+
// see _after_ a non-null value. This is necessary because an ElasticGraph invariant is that events
|
66
|
+
// can be processed in any order. So we might process an old event (predating the existence of the
|
67
|
+
// source field) after we've already set the field to a non-null value. We must always "converge"
|
68
|
+
// on the same indexed state regardless, of the order events are seen, so here we just ignore it.
|
69
|
+
if (newValueCandidate == null) {
|
70
|
+
return false;
|
71
|
+
}
|
72
|
+
}
|
73
|
+
|
74
|
+
// Otherwise, if the values differ, it means we are attempting to mutate the immutable value field, which we cannot allow.
|
75
|
+
if (currentValue != newValueCandidate) {
|
76
|
+
if (currentValue == null) {
|
77
|
+
scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + "). Set `can_change_from_null: true` on the `immutable_value` definition to allow this.");
|
78
|
+
} else {
|
79
|
+
scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + ").");
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
return false;
|
84
|
+
}
|
85
|
+
|
86
|
+
if (newValueCandidate == null && !nullable) {
|
87
|
+
scriptErrors.add("Field `" + fullPath + "` cannot be set to `null`, but the source event contains no value for it. Remove `nullable: false` from the `immutable_value` definition to allow this.");
|
88
|
+
return false;
|
89
|
+
}
|
90
|
+
|
91
|
+
parentObject[fieldName] = newValueCandidate;
|
92
|
+
return true;
|
93
|
+
}
|
94
|
+
EOS
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
module SchemaDefinition
|
11
|
+
module Indexing
|
12
|
+
module DerivedFields
|
13
|
+
# Responsible for providing bits of the painless script specific to a {DerivedIndexedType#min_value} or
|
14
|
+
# {DerivedIndexedType#max_value} field.
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
class MinOrMaxValue < ::Data.define(:destination_field, :source_field, :min_or_max)
|
18
|
+
# `Data.define` provides the following methods:
|
19
|
+
# @dynamic destination_field, source_field, min_or_max
|
20
|
+
|
21
|
+
# @return [String] a line of painless code to manage a min or max value field and return a boolean indicating if it was updated.
|
22
|
+
def apply_operation_returning_update_status
|
23
|
+
*parent_parts, field = destination_field.split(".")
|
24
|
+
parent_parts = ["ctx", "_source"] + parent_parts
|
25
|
+
|
26
|
+
%{#{min_or_max}Value_idempotentlyUpdateValue(data["#{source_field}"], #{parent_parts.join(".")}, "#{field}")}
|
27
|
+
end
|
28
|
+
|
29
|
+
# @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
|
30
|
+
def setup_statements
|
31
|
+
FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
|
32
|
+
end
|
33
|
+
|
34
|
+
# @return [Array<String>] painless functions required by a min or max value field.
|
35
|
+
def function_definitions
|
36
|
+
[MinOrMaxValue.function_def(min_or_max)]
|
37
|
+
end
|
38
|
+
|
39
|
+
# @param min_or_max [:min, :max] which type of function to generate.
|
40
|
+
# @return [String] painless function for managing a min or max field.
|
41
|
+
def self.function_def(min_or_max)
|
42
|
+
operator = (min_or_max == :min) ? "<" : ">"
|
43
|
+
|
44
|
+
<<~EOS
|
45
|
+
boolean #{min_or_max}Value_idempotentlyUpdateValue(List values, def parentObject, String fieldName) {
|
46
|
+
def currentFieldValue = parentObject[fieldName];
|
47
|
+
def #{min_or_max}NewValue = values.isEmpty() ? null : Collections.#{min_or_max}(values);
|
48
|
+
|
49
|
+
if (currentFieldValue == null || (#{min_or_max}NewValue != null && #{min_or_max}NewValue.compareTo(currentFieldValue) #{operator} 0)) {
|
50
|
+
parentObject[fieldName] = #{min_or_max}NewValue;
|
51
|
+
return true;
|
52
|
+
}
|
53
|
+
|
54
|
+
return false;
|
55
|
+
}
|
56
|
+
EOS
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,346 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/schema_artifacts/runtime_metadata/update_target"
|
10
|
+
require "elastic_graph/schema_definition/indexing/derived_fields/append_only_set"
|
11
|
+
require "elastic_graph/schema_definition/indexing/derived_fields/immutable_value"
|
12
|
+
require "elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value"
|
13
|
+
require "elastic_graph/schema_definition/scripting/script"
|
14
|
+
|
15
|
+
module ElasticGraph
|
16
|
+
module SchemaDefinition
|
17
|
+
module Indexing
|
18
|
+
# Used to configure the derivation of a derived indexed type from a source type.
|
19
|
+
# This type is yielded from {Mixins::HasIndices#derive_indexed_type_fields}.
|
20
|
+
#
|
21
|
+
# @example Derive a `Course` type from `StudentCourseEnrollment` events
|
22
|
+
# ElasticGraph.define_schema do |schema|
|
23
|
+
# # `StudentCourseEnrollment` is a directly indexed type.
|
24
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
25
|
+
# t.field "id", "ID"
|
26
|
+
# t.field "courseId", "ID"
|
27
|
+
# t.field "courseName", "String"
|
28
|
+
# t.field "studentName", "String"
|
29
|
+
# t.field "courseStartDate", "Date"
|
30
|
+
#
|
31
|
+
# t.index "student_course_enrollments"
|
32
|
+
#
|
33
|
+
# # Here we define how the `Course` indexed type is derived when we index `StudentCourseEnrollment` events.
|
34
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
35
|
+
# # `derive` is an instance of `DerivedIndexedType`.
|
36
|
+
# derive.immutable_value "name", from: "courseName"
|
37
|
+
# derive.append_only_set "students", from: "studentName"
|
38
|
+
# derive.min_value "firstOfferedDate", from: "courseStartDate"
|
39
|
+
# derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
|
40
|
+
# end
|
41
|
+
# end
|
42
|
+
#
|
43
|
+
# # `Course` is an indexed type that is derived entirely from `StudentCourseEnrollment` events.
|
44
|
+
# schema.object_type "Course" do |t|
|
45
|
+
# t.field "id", "ID"
|
46
|
+
# t.field "name", "String"
|
47
|
+
# t.field "students", "[String!]!"
|
48
|
+
# t.field "firstOfferedDate", "Date"
|
49
|
+
# t.field "mostRecentlyOfferedDate", "Date"
|
50
|
+
#
|
51
|
+
# t.index "courses"
|
52
|
+
# end
|
53
|
+
# end
|
54
|
+
#
|
55
|
+
# @!attribute source_type
|
56
|
+
# @return [SchemaElements::ObjectType] the type used as a source for this derive type
|
57
|
+
# @!attribute destination_type_ref
|
58
|
+
# @private
|
59
|
+
# @!attribute id_source
|
60
|
+
# @return [String] path to field on the source type used as `id` on the derived type
|
61
|
+
# @!attribute routing_value_source
|
62
|
+
# @return [String, nil] path to field on the source type used for shard routing
|
63
|
+
# @!attribute rollover_timestamp_value_source
|
64
|
+
# @return [String, nil] path to field on the source type used as the timestamp field for rollover
|
65
|
+
# @!attribute fields
|
66
|
+
# @return [Array<DerivedFields::AppendOnlySet, DerivedFields::ImmutableValue, DerivedFields::MinOrMaxValue>] derived field definitions
|
67
|
+
class DerivedIndexedType < ::Struct.new(
|
68
|
+
:source_type,
|
69
|
+
:destination_type_ref,
|
70
|
+
:id_source,
|
71
|
+
:routing_value_source,
|
72
|
+
:rollover_timestamp_value_source,
|
73
|
+
:fields
|
74
|
+
)
|
75
|
+
# @param source_type [SchemaElements::ObjectType] the type used as a source for this derive type
|
76
|
+
# @param destination_type_ref [SchemaElements::TypeReference] the derived type
|
77
|
+
# @param id_source [String] path to field on the source type used as `id` on the derived type
|
78
|
+
# @param routing_value_source [String, nil] path to field on the source type used for shard routing
|
79
|
+
# @param rollover_timestamp_value_source [String, nil] path to field on the source type used as the timestamp field for rollover
|
80
|
+
# @yield [DerivedIndexedType] the `DerivedIndexedType` instance
|
81
|
+
# @api private
|
82
|
+
def initialize(
|
83
|
+
source_type:,
|
84
|
+
destination_type_ref:,
|
85
|
+
id_source:,
|
86
|
+
routing_value_source:,
|
87
|
+
rollover_timestamp_value_source:
|
88
|
+
)
|
89
|
+
fields = [] # : ::Array[_DerivedField]
|
90
|
+
super(
|
91
|
+
source_type: source_type,
|
92
|
+
destination_type_ref: destination_type_ref,
|
93
|
+
id_source: id_source,
|
94
|
+
routing_value_source: routing_value_source,
|
95
|
+
rollover_timestamp_value_source: rollover_timestamp_value_source,
|
96
|
+
fields: fields
|
97
|
+
)
|
98
|
+
yield self
|
99
|
+
end
|
100
|
+
|
101
|
+
# Configures `field_name` (on the derived indexing type) to contain the set union of all values from
|
102
|
+
# the `from` field on the source type. Values are only ever appended to the set, so the field will
|
103
|
+
# act as an append-only set.
|
104
|
+
#
|
105
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived set
|
106
|
+
# @param from [String] path to field on the source type to source values from
|
107
|
+
# @return [DerivedIndexedType::AppendOnlySet]
|
108
|
+
#
|
109
|
+
# @example
|
110
|
+
# ElasticGraph.define_schema do |schema|
|
111
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
112
|
+
# t.field "id", "ID"
|
113
|
+
# t.field "courseId", "ID"
|
114
|
+
# t.field "studentName", "String"
|
115
|
+
#
|
116
|
+
# t.index "student_course_enrollments"
|
117
|
+
#
|
118
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
119
|
+
# derive.append_only_set "students", from: "studentName"
|
120
|
+
# end
|
121
|
+
# end
|
122
|
+
#
|
123
|
+
# schema.object_type "Course" do |t|
|
124
|
+
# t.field "id", "ID"
|
125
|
+
# t.field "students", "[String!]!"
|
126
|
+
#
|
127
|
+
# t.index "courses"
|
128
|
+
# end
|
129
|
+
# end
|
130
|
+
def append_only_set(field_name, from:)
|
131
|
+
fields << DerivedFields::AppendOnlySet.new(field_name, from)
|
132
|
+
end
|
133
|
+
|
134
|
+
# Configures `field_name` (on the derived indexing type) to contain a single immutable value from the
|
135
|
+
# `from` field on the source type. Immutability is enforced by triggering an indexing failure with a
|
136
|
+
# clear error if any event's source value is different from the value already indexed on this field.
|
137
|
+
#
|
138
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived value
|
139
|
+
# @param from [String] path to field on the source type to source values from
|
140
|
+
# @param nullable [Boolean] whether the field is allowed to be set to `null`. When set to false, events
|
141
|
+
# that contain a `null` value in the `from` field will be rejected instead of setting the field’s value
|
142
|
+
# to `null`.
|
143
|
+
# @param can_change_from_null [Boolean] whether a one-time mutation of the field value is allowed from
|
144
|
+
# `null` to a non-`null` value. This can be useful when dealing with a field that may not have a value
|
145
|
+
# on all source events. For example, if the source field was not initially part of the schema of your
|
146
|
+
# source dataset, you may have old records that lack a value for this field. When set, this option
|
147
|
+
# allows a one-time mutation of the field value from `null` to a non-`null` value. Once set to a
|
148
|
+
# non-`null` value, any additional `null` values that are encountered will be ignored (ensuring that
|
149
|
+
# the indexed data converges on the same state regardless of the order the events are ingested in).
|
150
|
+
# Note: this option cannot be enabled when `nullable: false` has been set.
|
151
|
+
# @return [DerivedFields::ImmutableValue]
|
152
|
+
#
|
153
|
+
# @example
|
154
|
+
# ElasticGraph.define_schema do |schema|
|
155
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
156
|
+
# t.field "id", "ID"
|
157
|
+
# t.field "courseId", "ID"
|
158
|
+
# t.field "courseName", "String"
|
159
|
+
#
|
160
|
+
# t.index "student_course_enrollments"
|
161
|
+
#
|
162
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
163
|
+
# derive.immutable_value "name", from: "courseName"
|
164
|
+
# end
|
165
|
+
# end
|
166
|
+
#
|
167
|
+
# schema.object_type "Course" do |t|
|
168
|
+
# t.field "id", "ID"
|
169
|
+
# t.field "name", "String"
|
170
|
+
#
|
171
|
+
# t.index "courses"
|
172
|
+
# end
|
173
|
+
# end
|
174
|
+
def immutable_value(field_name, from:, nullable: true, can_change_from_null: false)
|
175
|
+
if !nullable && can_change_from_null
|
176
|
+
raise SchemaError, "`can_change_from_null: true` is not allowed with `nullable: false` (as there would be no `null` values to change from)."
|
177
|
+
end
|
178
|
+
|
179
|
+
fields << DerivedFields::ImmutableValue.new(
|
180
|
+
destination_field: field_name,
|
181
|
+
source_field: from,
|
182
|
+
nullable: nullable,
|
183
|
+
can_change_from_null: can_change_from_null
|
184
|
+
)
|
185
|
+
end
|
186
|
+
|
187
|
+
# Configures `field_name` (on the derived indexing type) to contain the minimum of all values from the `from`
|
188
|
+
# field on the source type.
|
189
|
+
#
|
190
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived value
|
191
|
+
# @param from [String] path to field on the source type to source values from
|
192
|
+
# @return [DerivedIndexedType::MinOrMaxValue]
|
193
|
+
#
|
194
|
+
# @example
|
195
|
+
# ElasticGraph.define_schema do |schema|
|
196
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
197
|
+
# t.field "id", "ID"
|
198
|
+
# t.field "courseId", "ID"
|
199
|
+
# t.field "courseStartDate", "Date"
|
200
|
+
#
|
201
|
+
# t.index "student_course_enrollments"
|
202
|
+
#
|
203
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
204
|
+
# derive.min_value "firstOfferedDate", from: "courseStartDate"
|
205
|
+
# end
|
206
|
+
# end
|
207
|
+
#
|
208
|
+
# schema.object_type "Course" do |t|
|
209
|
+
# t.field "id", "ID"
|
210
|
+
# t.field "firstOfferedDate", "Date"
|
211
|
+
#
|
212
|
+
# t.index "courses"
|
213
|
+
# end
|
214
|
+
# end
|
215
|
+
def min_value(field_name, from:)
|
216
|
+
fields << DerivedFields::MinOrMaxValue.new(field_name, from, :min)
|
217
|
+
end
|
218
|
+
|
219
|
+
# Configures `field_name` (on the derived indexing type) to contain the maximum of all values from the `from`
|
220
|
+
# field on the source type.
|
221
|
+
#
|
222
|
+
# @param field_name [String] name of field on the derived indexing type to store the derived value
|
223
|
+
# @param from [String] path to field on the source type to source values from
|
224
|
+
# @return [DerivedIndexedType::MinOrMaxValue]
|
225
|
+
#
|
226
|
+
# @example
|
227
|
+
# ElasticGraph.define_schema do |schema|
|
228
|
+
# schema.object_type "StudentCourseEnrollment" do |t|
|
229
|
+
# t.field "id", "ID"
|
230
|
+
# t.field "courseId", "ID"
|
231
|
+
# t.field "courseStartDate", "Date"
|
232
|
+
#
|
233
|
+
# t.index "student_course_enrollments"
|
234
|
+
#
|
235
|
+
# t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
|
236
|
+
# derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
|
237
|
+
# end
|
238
|
+
# end
|
239
|
+
#
|
240
|
+
# schema.object_type "Course" do |t|
|
241
|
+
# t.field "id", "ID"
|
242
|
+
# t.field "mostRecentlyOfferedDate", "Date"
|
243
|
+
#
|
244
|
+
# t.index "courses"
|
245
|
+
# end
|
246
|
+
# end
|
247
|
+
def max_value(field_name, from:)
|
248
|
+
fields << DerivedFields::MinOrMaxValue.new(field_name, from, :max)
|
249
|
+
end
|
250
|
+
|
251
|
+
# @return [Scripting::Script] Painless script that will maintain the derived fields
|
252
|
+
# @api private
|
253
|
+
def painless_script
|
254
|
+
Scripting::Script.new(
|
255
|
+
source: generate_script.strip,
|
256
|
+
name: "#{destination_type_ref}_from_#{source_type.name}",
|
257
|
+
language: "painless",
|
258
|
+
context: "update"
|
259
|
+
)
|
260
|
+
end
|
261
|
+
|
262
|
+
# @return [SchemaArtifacts::RuntimeMetadata::UpdateTarget] runtime metadata for the source type
|
263
|
+
# @api private
|
264
|
+
def runtime_metadata_for_source_type
|
265
|
+
SchemaArtifacts::RuntimeMetadata::UpdateTarget.new(
|
266
|
+
type: destination_type_ref.name,
|
267
|
+
relationship: nil,
|
268
|
+
script_id: painless_script.id,
|
269
|
+
id_source: id_source,
|
270
|
+
routing_value_source: routing_value_source,
|
271
|
+
rollover_timestamp_value_source: rollover_timestamp_value_source,
|
272
|
+
metadata_params: {},
|
273
|
+
data_params: fields.map(&:source_field).to_h do |f|
|
274
|
+
[f, SchemaArtifacts::RuntimeMetadata::DynamicParam.new(source_path: f, cardinality: :many)]
|
275
|
+
end
|
276
|
+
)
|
277
|
+
end
|
278
|
+
|
279
|
+
private
|
280
|
+
|
281
|
+
def generate_script
|
282
|
+
if fields.empty?
|
283
|
+
raise SchemaError, "`derive_indexed_type_fields` definition for #{destination_type_ref} (from #{source_type.name}) " \
|
284
|
+
"has no derived field definitions."
|
285
|
+
end
|
286
|
+
|
287
|
+
sorted_fields = fields.sort_by(&:destination_field)
|
288
|
+
|
289
|
+
# We use `uniq` here to avoid re-doing the same setup multiple times, since multiple fields can sometimes
|
290
|
+
# need the same setup (such as initializing a common parent field to an empty map).
|
291
|
+
function_defs = sorted_fields.flat_map(&:function_definitions).uniq.map(&:strip).sort
|
292
|
+
|
293
|
+
setup_statements = [STATIC_SETUP_STATEMENTS] + sorted_fields.flat_map(&:setup_statements).uniq.map(&:strip)
|
294
|
+
|
295
|
+
apply_update_statements = sorted_fields.map { |f| apply_update_statement(f).strip }
|
296
|
+
|
297
|
+
# Note: comments in the script are effectively "free" since:
|
298
|
+
#
|
299
|
+
# - The compiler will strip them out.
|
300
|
+
# - We only send the script to the datastore once (when configuring the cluster), and later
|
301
|
+
# reference it only by id--so we don't pay for the larger payload on each indexing request.
|
302
|
+
<<~EOS
|
303
|
+
#{function_defs.join("\n\n")}
|
304
|
+
|
305
|
+
#{setup_statements.join("\n")}
|
306
|
+
|
307
|
+
#{apply_update_statements.join("\n")}
|
308
|
+
|
309
|
+
if (!#{SCRIPT_ERRORS_VAR}.isEmpty()) {
|
310
|
+
throw new IllegalArgumentException("#{DERIVED_INDEX_FAILURE_MESSAGE_PREAMBLE}: " + #{SCRIPT_ERRORS_VAR}.join(" "));
|
311
|
+
}
|
312
|
+
|
313
|
+
// For records with no new values to index, only skip the update if the document itself doesn't already exist.
|
314
|
+
// Otherwise create an (empty) document to reflect the fact that the id has been seen.
|
315
|
+
if (ctx._source.id != null && #{sorted_fields.map { |f| was_noop_variable(f) }.join(" && ")}) {
|
316
|
+
ctx.op = 'none';
|
317
|
+
} else {
|
318
|
+
// Here we set `_source.id` because if we don't, it'll never be set, making these docs subtly
|
319
|
+
// different from docs indexed the normal way.
|
320
|
+
//
|
321
|
+
// Note also that we MUST use `params.id` instead of `ctx._id`. The latter works on an update
|
322
|
+
// of an existing document, but is unavailable when we are inserting the document for the first time.
|
323
|
+
ctx._source.id = params.id;
|
324
|
+
}
|
325
|
+
EOS
|
326
|
+
end
|
327
|
+
|
328
|
+
def apply_update_statement(field)
|
329
|
+
"boolean #{was_noop_variable(field)} = !#{field.apply_operation_returning_update_status};"
|
330
|
+
end
|
331
|
+
|
332
|
+
def was_noop_variable(field)
|
333
|
+
"#{field.destination_field.gsub(".", "__")}_was_noop"
|
334
|
+
end
|
335
|
+
|
336
|
+
SCRIPT_ERRORS_VAR = "scriptErrors"
|
337
|
+
|
338
|
+
STATIC_SETUP_STATEMENTS = <<~EOS.strip
|
339
|
+
Map data = params.data;
|
340
|
+
// A variable to accumulate script errors so that we can surface _all_ issues and not just the first.
|
341
|
+
List #{SCRIPT_ERRORS_VAR} = new ArrayList();
|
342
|
+
EOS
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
module SchemaDefinition
|
13
|
+
module Indexing
|
14
|
+
# Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
|
15
|
+
#
|
16
|
+
# @api private
|
17
|
+
module EventEnvelope
|
18
|
+
# @param indexed_type_names [Array<String>] names of the indexed types
|
19
|
+
# @param json_schema_version [Integer] the version of the JSON schema
|
20
|
+
# @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
|
21
|
+
def self.json_schema(indexed_type_names, json_schema_version)
|
22
|
+
{
|
23
|
+
"type" => "object",
|
24
|
+
"properties" => {
|
25
|
+
"op" => {
|
26
|
+
"type" => "string",
|
27
|
+
"enum" => %w[upsert]
|
28
|
+
},
|
29
|
+
"type" => {
|
30
|
+
"type" => "string",
|
31
|
+
# Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
|
32
|
+
"enum" => indexed_type_names.sort
|
33
|
+
},
|
34
|
+
"id" => {
|
35
|
+
"type" => "string",
|
36
|
+
"maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
|
37
|
+
},
|
38
|
+
"version" => {
|
39
|
+
"type" => "integer",
|
40
|
+
"minimum" => 0,
|
41
|
+
"maximum" => (2**63) - 1
|
42
|
+
},
|
43
|
+
"record" => {
|
44
|
+
"type" => "object"
|
45
|
+
},
|
46
|
+
"latency_timestamps" => {
|
47
|
+
"type" => "object",
|
48
|
+
"additionalProperties" => false,
|
49
|
+
"patternProperties" => {
|
50
|
+
"^\\w+_at$" => {"type" => "string", "format" => "date-time"}
|
51
|
+
}
|
52
|
+
},
|
53
|
+
JSON_SCHEMA_VERSION_KEY => {
|
54
|
+
"const" => json_schema_version
|
55
|
+
},
|
56
|
+
"message_id" => {
|
57
|
+
"type" => "string",
|
58
|
+
"description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer."
|
59
|
+
}
|
60
|
+
},
|
61
|
+
"additionalProperties" => false,
|
62
|
+
"required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
|
63
|
+
"if" => {
|
64
|
+
"properties" => {
|
65
|
+
"op" => {"const" => "upsert"}
|
66
|
+
}
|
67
|
+
},
|
68
|
+
"then" => {"required" => ["record"]}
|
69
|
+
}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|