elasticgraph-schema_definition 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +7 -0
  4. data/elasticgraph-schema_definition.gemspec +26 -0
  5. data/lib/elastic_graph/schema_definition/api.rb +359 -0
  6. data/lib/elastic_graph/schema_definition/factory.rb +506 -0
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
  9. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
  10. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
  11. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
  12. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
  13. data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
  14. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
  17. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
  18. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
  19. data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
  20. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
  21. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
  22. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
  23. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
  24. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
  25. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
  26. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
  27. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
  28. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
  29. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
  30. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
  31. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
  32. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
  33. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
  34. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
  35. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
  36. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
  37. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
  38. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
  39. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
  40. data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
  41. data/lib/elastic_graph/schema_definition/results.rb +404 -0
  42. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
  43. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
  44. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
  45. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
  46. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
  48. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
  49. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
  50. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
  51. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
  52. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
  53. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
  54. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
  55. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
  56. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
  57. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
  58. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
  59. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
  60. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
  61. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
  62. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
  63. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
  65. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
  66. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
  67. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
  68. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
  69. data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
  70. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
  71. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
  72. data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
  73. data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
  74. data/lib/elastic_graph/schema_definition/state.rb +212 -0
  75. data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
  76. metadata +513 -0
@@ -0,0 +1,99 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module SchemaDefinition
11
+ module Indexing
12
+ module DerivedFields
13
+ # Responsible for providing bits of the painless script specific to a {DerivedIndexedType#immutable_value} field.
14
+ #
15
+ # @api private
16
+ class ImmutableValue < ::Data.define(:destination_field, :source_field, :nullable, :can_change_from_null)
17
+ # `Data.define` provides the following methods:
18
+ # @dynamic destination_field, source_field
19
+
20
+ # @return [String] a line of painless code to manage an immutable value field and return a boolean indicating if it was updated.
21
+ def apply_operation_returning_update_status
22
+ *parent_parts, field = destination_field.split(".")
23
+ parent_parts = ["ctx", "_source"] + parent_parts
24
+
25
+ %{immutableValue_idempotentlyUpdateValue(scriptErrors, data["#{source_field}"], #{parent_parts.join(".")}, "#{destination_field}", "#{field}", #{nullable}, #{can_change_from_null})}
26
+ end
27
+
28
+ # @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
29
+ def setup_statements
30
+ FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
31
+ end
32
+
33
+ # @return [Array<String>] painless functions required by `immutable_value`.
34
+ def function_definitions
35
+ [IDEMPOTENTLY_SET_VALUE]
36
+ end
37
+
38
+ private
39
+
40
+ # Painless function which manages an `immutable_value` field.
41
+ IDEMPOTENTLY_SET_VALUE = <<~EOS
42
+ boolean immutableValue_idempotentlyUpdateValue(List scriptErrors, List values, def parentObject, String fullPath, String fieldName, boolean nullable, boolean canChangeFromNull) {
43
+ boolean fieldAlreadySet = parentObject.containsKey(fieldName);
44
+
45
+ // `values` is always passed to us as a `List` (the indexer normalizes to a list, wrapping single
46
+ // values in a list as needed) but we only ever expect at most 1 element.
47
+ def newValueCandidate = values.isEmpty() ? null : values[0];
48
+
49
+ if (fieldAlreadySet) {
50
+ def currentValue = parentObject[fieldName];
51
+
52
+ // Usually we do not allow `immutable_value` fields to ever change values. However, we make
53
+ // a special case for `null`, but only when `can_change_from_null: true` has been configured.
54
+ // This can be important when deriving a field that has not always existed on the source events.
55
+ // On early events, the value may be `null`, and, when this is enabled, we do not want that to
56
+ // interfere with our ability to set the value to the correct non-null value based on a different
57
+ // event which has a value for the source field.
58
+ if (canChangeFromNull) {
59
+ if (currentValue == null) {
60
+ parentObject[fieldName] = newValueCandidate;
61
+ return true;
62
+ }
63
+
64
+ // When `can_change_from_null: true` is enabled we also need to ignore NEW `null` values that we
65
+ // see _after_ a non-null value. This is necessary because an ElasticGraph invariant is that events
66
+ // can be processed in any order. So we might process an old event (predating the existence of the
67
+ // source field) after we've already set the field to a non-null value. We must always "converge"
68
+ // on the same indexed state regardless, of the order events are seen, so here we just ignore it.
69
+ if (newValueCandidate == null) {
70
+ return false;
71
+ }
72
+ }
73
+
74
+ // Otherwise, if the values differ, it means we are attempting to mutate the immutable value field, which we cannot allow.
75
+ if (currentValue != newValueCandidate) {
76
+ if (currentValue == null) {
77
+ scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + "). Set `can_change_from_null: true` on the `immutable_value` definition to allow this.");
78
+ } else {
79
+ scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + ").");
80
+ }
81
+ }
82
+
83
+ return false;
84
+ }
85
+
86
+ if (newValueCandidate == null && !nullable) {
87
+ scriptErrors.add("Field `" + fullPath + "` cannot be set to `null`, but the source event contains no value for it. Remove `nullable: false` from the `immutable_value` definition to allow this.");
88
+ return false;
89
+ }
90
+
91
+ parentObject[fieldName] = newValueCandidate;
92
+ return true;
93
+ }
94
+ EOS
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,62 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module SchemaDefinition
11
+ module Indexing
12
+ module DerivedFields
13
+ # Responsible for providing bits of the painless script specific to a {DerivedIndexedType#min_value} or
14
+ # {DerivedIndexedType#max_value} field.
15
+ #
16
+ # @api private
17
+ class MinOrMaxValue < ::Data.define(:destination_field, :source_field, :min_or_max)
18
+ # `Data.define` provides the following methods:
19
+ # @dynamic destination_field, source_field, min_or_max
20
+
21
+ # @return [String] a line of painless code to manage a min or max value field and return a boolean indicating if it was updated.
22
+ def apply_operation_returning_update_status
23
+ *parent_parts, field = destination_field.split(".")
24
+ parent_parts = ["ctx", "_source"] + parent_parts
25
+
26
+ %{#{min_or_max}Value_idempotentlyUpdateValue(data["#{source_field}"], #{parent_parts.join(".")}, "#{field}")}
27
+ end
28
+
29
+ # @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
30
+ def setup_statements
31
+ FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
32
+ end
33
+
34
+ # @return [Array<String>] painless functions required by a min or max value field.
35
+ def function_definitions
36
+ [MinOrMaxValue.function_def(min_or_max)]
37
+ end
38
+
39
+ # @param min_or_max [:min, :max] which type of function to generate.
40
+ # @return [String] painless function for managing a min or max field.
41
+ def self.function_def(min_or_max)
42
+ operator = (min_or_max == :min) ? "<" : ">"
43
+
44
+ <<~EOS
45
+ boolean #{min_or_max}Value_idempotentlyUpdateValue(List values, def parentObject, String fieldName) {
46
+ def currentFieldValue = parentObject[fieldName];
47
+ def #{min_or_max}NewValue = values.isEmpty() ? null : Collections.#{min_or_max}(values);
48
+
49
+ if (currentFieldValue == null || (#{min_or_max}NewValue != null && #{min_or_max}NewValue.compareTo(currentFieldValue) #{operator} 0)) {
50
+ parentObject[fieldName] = #{min_or_max}NewValue;
51
+ return true;
52
+ }
53
+
54
+ return false;
55
+ }
56
+ EOS
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,346 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/schema_artifacts/runtime_metadata/update_target"
10
+ require "elastic_graph/schema_definition/indexing/derived_fields/append_only_set"
11
+ require "elastic_graph/schema_definition/indexing/derived_fields/immutable_value"
12
+ require "elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value"
13
+ require "elastic_graph/schema_definition/scripting/script"
14
+
15
+ module ElasticGraph
16
+ module SchemaDefinition
17
+ module Indexing
18
+ # Used to configure the derivation of a derived indexed type from a source type.
19
+ # This type is yielded from {Mixins::HasIndices#derive_indexed_type_fields}.
20
+ #
21
+ # @example Derive a `Course` type from `StudentCourseEnrollment` events
22
+ # ElasticGraph.define_schema do |schema|
23
+ # # `StudentCourseEnrollment` is a directly indexed type.
24
+ # schema.object_type "StudentCourseEnrollment" do |t|
25
+ # t.field "id", "ID"
26
+ # t.field "courseId", "ID"
27
+ # t.field "courseName", "String"
28
+ # t.field "studentName", "String"
29
+ # t.field "courseStartDate", "Date"
30
+ #
31
+ # t.index "student_course_enrollments"
32
+ #
33
+ # # Here we define how the `Course` indexed type is derived when we index `StudentCourseEnrollment` events.
34
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
35
+ # # `derive` is an instance of `DerivedIndexedType`.
36
+ # derive.immutable_value "name", from: "courseName"
37
+ # derive.append_only_set "students", from: "studentName"
38
+ # derive.min_value "firstOfferedDate", from: "courseStartDate"
39
+ # derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
40
+ # end
41
+ # end
42
+ #
43
+ # # `Course` is an indexed type that is derived entirely from `StudentCourseEnrollment` events.
44
+ # schema.object_type "Course" do |t|
45
+ # t.field "id", "ID"
46
+ # t.field "name", "String"
47
+ # t.field "students", "[String!]!"
48
+ # t.field "firstOfferedDate", "Date"
49
+ # t.field "mostRecentlyOfferedDate", "Date"
50
+ #
51
+ # t.index "courses"
52
+ # end
53
+ # end
54
+ #
55
+ # @!attribute source_type
56
+ # @return [SchemaElements::ObjectType] the type used as a source for this derive type
57
+ # @!attribute destination_type_ref
58
+ # @private
59
+ # @!attribute id_source
60
+ # @return [String] path to field on the source type used as `id` on the derived type
61
+ # @!attribute routing_value_source
62
+ # @return [String, nil] path to field on the source type used for shard routing
63
+ # @!attribute rollover_timestamp_value_source
64
+ # @return [String, nil] path to field on the source type used as the timestamp field for rollover
65
+ # @!attribute fields
66
+ # @return [Array<DerivedFields::AppendOnlySet, DerivedFields::ImmutableValue, DerivedFields::MinOrMaxValue>] derived field definitions
67
+ class DerivedIndexedType < ::Struct.new(
68
+ :source_type,
69
+ :destination_type_ref,
70
+ :id_source,
71
+ :routing_value_source,
72
+ :rollover_timestamp_value_source,
73
+ :fields
74
+ )
75
+ # @param source_type [SchemaElements::ObjectType] the type used as a source for this derive type
76
+ # @param destination_type_ref [SchemaElements::TypeReference] the derived type
77
+ # @param id_source [String] path to field on the source type used as `id` on the derived type
78
+ # @param routing_value_source [String, nil] path to field on the source type used for shard routing
79
+ # @param rollover_timestamp_value_source [String, nil] path to field on the source type used as the timestamp field for rollover
80
+ # @yield [DerivedIndexedType] the `DerivedIndexedType` instance
81
+ # @api private
82
+ def initialize(
83
+ source_type:,
84
+ destination_type_ref:,
85
+ id_source:,
86
+ routing_value_source:,
87
+ rollover_timestamp_value_source:
88
+ )
89
+ fields = [] # : ::Array[_DerivedField]
90
+ super(
91
+ source_type: source_type,
92
+ destination_type_ref: destination_type_ref,
93
+ id_source: id_source,
94
+ routing_value_source: routing_value_source,
95
+ rollover_timestamp_value_source: rollover_timestamp_value_source,
96
+ fields: fields
97
+ )
98
+ yield self
99
+ end
100
+
101
+ # Configures `field_name` (on the derived indexing type) to contain the set union of all values from
102
+ # the `from` field on the source type. Values are only ever appended to the set, so the field will
103
+ # act as an append-only set.
104
+ #
105
+ # @param field_name [String] name of field on the derived indexing type to store the derived set
106
+ # @param from [String] path to field on the source type to source values from
107
+ # @return [DerivedIndexedType::AppendOnlySet]
108
+ #
109
+ # @example
110
+ # ElasticGraph.define_schema do |schema|
111
+ # schema.object_type "StudentCourseEnrollment" do |t|
112
+ # t.field "id", "ID"
113
+ # t.field "courseId", "ID"
114
+ # t.field "studentName", "String"
115
+ #
116
+ # t.index "student_course_enrollments"
117
+ #
118
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
119
+ # derive.append_only_set "students", from: "studentName"
120
+ # end
121
+ # end
122
+ #
123
+ # schema.object_type "Course" do |t|
124
+ # t.field "id", "ID"
125
+ # t.field "students", "[String!]!"
126
+ #
127
+ # t.index "courses"
128
+ # end
129
+ # end
130
+ def append_only_set(field_name, from:)
131
+ fields << DerivedFields::AppendOnlySet.new(field_name, from)
132
+ end
133
+
134
+ # Configures `field_name` (on the derived indexing type) to contain a single immutable value from the
135
+ # `from` field on the source type. Immutability is enforced by triggering an indexing failure with a
136
+ # clear error if any event's source value is different from the value already indexed on this field.
137
+ #
138
+ # @param field_name [String] name of field on the derived indexing type to store the derived value
139
+ # @param from [String] path to field on the source type to source values from
140
+ # @param nullable [Boolean] whether the field is allowed to be set to `null`. When set to false, events
141
+ # that contain a `null` value in the `from` field will be rejected instead of setting the field’s value
142
+ # to `null`.
143
+ # @param can_change_from_null [Boolean] whether a one-time mutation of the field value is allowed from
144
+ # `null` to a non-`null` value. This can be useful when dealing with a field that may not have a value
145
+ # on all source events. For example, if the source field was not initially part of the schema of your
146
+ # source dataset, you may have old records that lack a value for this field. When set, this option
147
+ # allows a one-time mutation of the field value from `null` to a non-`null` value. Once set to a
148
+ # non-`null` value, any additional `null` values that are encountered will be ignored (ensuring that
149
+ # the indexed data converges on the same state regardless of the order the events are ingested in).
150
+ # Note: this option cannot be enabled when `nullable: false` has been set.
151
+ # @return [DerivedFields::ImmutableValue]
152
+ #
153
+ # @example
154
+ # ElasticGraph.define_schema do |schema|
155
+ # schema.object_type "StudentCourseEnrollment" do |t|
156
+ # t.field "id", "ID"
157
+ # t.field "courseId", "ID"
158
+ # t.field "courseName", "String"
159
+ #
160
+ # t.index "student_course_enrollments"
161
+ #
162
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
163
+ # derive.immutable_value "name", from: "courseName"
164
+ # end
165
+ # end
166
+ #
167
+ # schema.object_type "Course" do |t|
168
+ # t.field "id", "ID"
169
+ # t.field "name", "String"
170
+ #
171
+ # t.index "courses"
172
+ # end
173
+ # end
174
+ def immutable_value(field_name, from:, nullable: true, can_change_from_null: false)
175
+ if !nullable && can_change_from_null
176
+ raise SchemaError, "`can_change_from_null: true` is not allowed with `nullable: false` (as there would be no `null` values to change from)."
177
+ end
178
+
179
+ fields << DerivedFields::ImmutableValue.new(
180
+ destination_field: field_name,
181
+ source_field: from,
182
+ nullable: nullable,
183
+ can_change_from_null: can_change_from_null
184
+ )
185
+ end
186
+
187
+ # Configures `field_name` (on the derived indexing type) to contain the minimum of all values from the `from`
188
+ # field on the source type.
189
+ #
190
+ # @param field_name [String] name of field on the derived indexing type to store the derived value
191
+ # @param from [String] path to field on the source type to source values from
192
+ # @return [DerivedIndexedType::MinOrMaxValue]
193
+ #
194
+ # @example
195
+ # ElasticGraph.define_schema do |schema|
196
+ # schema.object_type "StudentCourseEnrollment" do |t|
197
+ # t.field "id", "ID"
198
+ # t.field "courseId", "ID"
199
+ # t.field "courseStartDate", "Date"
200
+ #
201
+ # t.index "student_course_enrollments"
202
+ #
203
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
204
+ # derive.min_value "firstOfferedDate", from: "courseStartDate"
205
+ # end
206
+ # end
207
+ #
208
+ # schema.object_type "Course" do |t|
209
+ # t.field "id", "ID"
210
+ # t.field "firstOfferedDate", "Date"
211
+ #
212
+ # t.index "courses"
213
+ # end
214
+ # end
215
+ def min_value(field_name, from:)
216
+ fields << DerivedFields::MinOrMaxValue.new(field_name, from, :min)
217
+ end
218
+
219
+ # Configures `field_name` (on the derived indexing type) to contain the maximum of all values from the `from`
220
+ # field on the source type.
221
+ #
222
+ # @param field_name [String] name of field on the derived indexing type to store the derived value
223
+ # @param from [String] path to field on the source type to source values from
224
+ # @return [DerivedIndexedType::MinOrMaxValue]
225
+ #
226
+ # @example
227
+ # ElasticGraph.define_schema do |schema|
228
+ # schema.object_type "StudentCourseEnrollment" do |t|
229
+ # t.field "id", "ID"
230
+ # t.field "courseId", "ID"
231
+ # t.field "courseStartDate", "Date"
232
+ #
233
+ # t.index "student_course_enrollments"
234
+ #
235
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
236
+ # derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
237
+ # end
238
+ # end
239
+ #
240
+ # schema.object_type "Course" do |t|
241
+ # t.field "id", "ID"
242
+ # t.field "mostRecentlyOfferedDate", "Date"
243
+ #
244
+ # t.index "courses"
245
+ # end
246
+ # end
247
+ def max_value(field_name, from:)
248
+ fields << DerivedFields::MinOrMaxValue.new(field_name, from, :max)
249
+ end
250
+
251
+ # @return [Scripting::Script] Painless script that will maintain the derived fields
252
+ # @api private
253
+ def painless_script
254
+ Scripting::Script.new(
255
+ source: generate_script.strip,
256
+ name: "#{destination_type_ref}_from_#{source_type.name}",
257
+ language: "painless",
258
+ context: "update"
259
+ )
260
+ end
261
+
262
+ # @return [SchemaArtifacts::RuntimeMetadata::UpdateTarget] runtime metadata for the source type
263
+ # @api private
264
+ def runtime_metadata_for_source_type
265
+ SchemaArtifacts::RuntimeMetadata::UpdateTarget.new(
266
+ type: destination_type_ref.name,
267
+ relationship: nil,
268
+ script_id: painless_script.id,
269
+ id_source: id_source,
270
+ routing_value_source: routing_value_source,
271
+ rollover_timestamp_value_source: rollover_timestamp_value_source,
272
+ metadata_params: {},
273
+ data_params: fields.map(&:source_field).to_h do |f|
274
+ [f, SchemaArtifacts::RuntimeMetadata::DynamicParam.new(source_path: f, cardinality: :many)]
275
+ end
276
+ )
277
+ end
278
+
279
+ private
280
+
281
+ def generate_script
282
+ if fields.empty?
283
+ raise SchemaError, "`derive_indexed_type_fields` definition for #{destination_type_ref} (from #{source_type.name}) " \
284
+ "has no derived field definitions."
285
+ end
286
+
287
+ sorted_fields = fields.sort_by(&:destination_field)
288
+
289
+ # We use `uniq` here to avoid re-doing the same setup multiple times, since multiple fields can sometimes
290
+ # need the same setup (such as initializing a common parent field to an empty map).
291
+ function_defs = sorted_fields.flat_map(&:function_definitions).uniq.map(&:strip).sort
292
+
293
+ setup_statements = [STATIC_SETUP_STATEMENTS] + sorted_fields.flat_map(&:setup_statements).uniq.map(&:strip)
294
+
295
+ apply_update_statements = sorted_fields.map { |f| apply_update_statement(f).strip }
296
+
297
+ # Note: comments in the script are effectively "free" since:
298
+ #
299
+ # - The compiler will strip them out.
300
+ # - We only send the script to the datastore once (when configuring the cluster), and later
301
+ # reference it only by id--so we don't pay for the larger payload on each indexing request.
302
+ <<~EOS
303
+ #{function_defs.join("\n\n")}
304
+
305
+ #{setup_statements.join("\n")}
306
+
307
+ #{apply_update_statements.join("\n")}
308
+
309
+ if (!#{SCRIPT_ERRORS_VAR}.isEmpty()) {
310
+ throw new IllegalArgumentException("#{DERIVED_INDEX_FAILURE_MESSAGE_PREAMBLE}: " + #{SCRIPT_ERRORS_VAR}.join(" "));
311
+ }
312
+
313
+ // For records with no new values to index, only skip the update if the document itself doesn't already exist.
314
+ // Otherwise create an (empty) document to reflect the fact that the id has been seen.
315
+ if (ctx._source.id != null && #{sorted_fields.map { |f| was_noop_variable(f) }.join(" && ")}) {
316
+ ctx.op = 'none';
317
+ } else {
318
+ // Here we set `_source.id` because if we don't, it'll never be set, making these docs subtly
319
+ // different from docs indexed the normal way.
320
+ //
321
+ // Note also that we MUST use `params.id` instead of `ctx._id`. The latter works on an update
322
+ // of an existing document, but is unavailable when we are inserting the document for the first time.
323
+ ctx._source.id = params.id;
324
+ }
325
+ EOS
326
+ end
327
+
328
+ def apply_update_statement(field)
329
+ "boolean #{was_noop_variable(field)} = !#{field.apply_operation_returning_update_status};"
330
+ end
331
+
332
+ def was_noop_variable(field)
333
+ "#{field.destination_field.gsub(".", "__")}_was_noop"
334
+ end
335
+
336
+ SCRIPT_ERRORS_VAR = "scriptErrors"
337
+
338
+ STATIC_SETUP_STATEMENTS = <<~EOS.strip
339
+ Map data = params.data;
340
+ // A variable to accumulate script errors so that we can surface _all_ issues and not just the first.
341
+ List #{SCRIPT_ERRORS_VAR} = new ArrayList();
342
+ EOS
343
+ end
344
+ end
345
+ end
346
+ end
@@ -0,0 +1,74 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+
11
+ module ElasticGraph
12
+ module SchemaDefinition
13
+ module Indexing
14
+ # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
15
+ #
16
+ # @api private
17
+ module EventEnvelope
18
+ # @param indexed_type_names [Array<String>] names of the indexed types
19
+ # @param json_schema_version [Integer] the version of the JSON schema
20
+ # @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
21
+ def self.json_schema(indexed_type_names, json_schema_version)
22
+ {
23
+ "type" => "object",
24
+ "properties" => {
25
+ "op" => {
26
+ "type" => "string",
27
+ "enum" => %w[upsert]
28
+ },
29
+ "type" => {
30
+ "type" => "string",
31
+ # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
32
+ "enum" => indexed_type_names.sort
33
+ },
34
+ "id" => {
35
+ "type" => "string",
36
+ "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
37
+ },
38
+ "version" => {
39
+ "type" => "integer",
40
+ "minimum" => 0,
41
+ "maximum" => (2**63) - 1
42
+ },
43
+ "record" => {
44
+ "type" => "object"
45
+ },
46
+ "latency_timestamps" => {
47
+ "type" => "object",
48
+ "additionalProperties" => false,
49
+ "patternProperties" => {
50
+ "^\\w+_at$" => {"type" => "string", "format" => "date-time"}
51
+ }
52
+ },
53
+ JSON_SCHEMA_VERSION_KEY => {
54
+ "const" => json_schema_version
55
+ },
56
+ "message_id" => {
57
+ "type" => "string",
58
+ "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer."
59
+ }
60
+ },
61
+ "additionalProperties" => false,
62
+ "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
63
+ "if" => {
64
+ "properties" => {
65
+ "op" => {"const" => "upsert"}
66
+ }
67
+ },
68
+ "then" => {"required" => ["record"]}
69
+ }
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end