elasticgraph-schema_definition 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +7 -0
  4. data/elasticgraph-schema_definition.gemspec +26 -0
  5. data/lib/elastic_graph/schema_definition/api.rb +359 -0
  6. data/lib/elastic_graph/schema_definition/factory.rb +506 -0
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
  9. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
  10. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
  11. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
  12. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
  13. data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
  14. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
  17. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
  18. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
  19. data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
  20. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
  21. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
  22. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
  23. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
  24. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
  25. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
  26. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
  27. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
  28. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
  29. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
  30. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
  31. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
  32. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
  33. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
  34. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
  35. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
  36. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
  37. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
  38. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
  39. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
  40. data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
  41. data/lib/elastic_graph/schema_definition/results.rb +404 -0
  42. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
  43. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
  44. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
  45. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
  46. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
  48. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
  49. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
  50. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
  51. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
  52. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
  53. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
  54. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
  55. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
  56. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
  57. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
  58. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
  59. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
  60. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
  61. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
  62. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
  63. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
  65. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
  66. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
  67. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
  68. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
  69. data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
  70. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
  71. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
  72. data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
  73. data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
  74. data/lib/elastic_graph/schema_definition/state.rb +212 -0
  75. data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
  76. metadata +513 -0
@@ -0,0 +1,99 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module SchemaDefinition
11
+ module Indexing
12
+ module DerivedFields
13
+ # Responsible for providing bits of the painless script specific to a {DerivedIndexedType#immutable_value} field.
14
+ #
15
+ # @api private
16
+ class ImmutableValue < ::Data.define(:destination_field, :source_field, :nullable, :can_change_from_null)
17
+ # `Data.define` provides the following methods:
18
+ # @dynamic destination_field, source_field
19
+
20
+ # @return [String] a line of painless code to manage an immutable value field and return a boolean indicating if it was updated.
21
+ def apply_operation_returning_update_status
22
+ *parent_parts, field = destination_field.split(".")
23
+ parent_parts = ["ctx", "_source"] + parent_parts
24
+
25
+ %{immutableValue_idempotentlyUpdateValue(scriptErrors, data["#{source_field}"], #{parent_parts.join(".")}, "#{destination_field}", "#{field}", #{nullable}, #{can_change_from_null})}
26
+ end
27
+
28
+ # @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
29
+ def setup_statements
30
+ FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
31
+ end
32
+
33
+ # @return [Array<String>] painless functions required by `immutable_value`.
34
+ def function_definitions
35
+ [IDEMPOTENTLY_SET_VALUE]
36
+ end
37
+
38
+ private
39
+
40
+ # Painless function which manages an `immutable_value` field.
41
+ IDEMPOTENTLY_SET_VALUE = <<~EOS
42
+ boolean immutableValue_idempotentlyUpdateValue(List scriptErrors, List values, def parentObject, String fullPath, String fieldName, boolean nullable, boolean canChangeFromNull) {
43
+ boolean fieldAlreadySet = parentObject.containsKey(fieldName);
44
+
45
+ // `values` is always passed to us as a `List` (the indexer normalizes to a list, wrapping single
46
+ // values in a list as needed) but we only ever expect at most 1 element.
47
+ def newValueCandidate = values.isEmpty() ? null : values[0];
48
+
49
+ if (fieldAlreadySet) {
50
+ def currentValue = parentObject[fieldName];
51
+
52
+ // Usually we do not allow `immutable_value` fields to ever change values. However, we make
53
+ // a special case for `null`, but only when `can_change_from_null: true` has been configured.
54
+ // This can be important when deriving a field that has not always existed on the source events.
55
+ // On early events, the value may be `null`, and, when this is enabled, we do not want that to
56
+ // interfere with our ability to set the value to the correct non-null value based on a different
57
+ // event which has a value for the source field.
58
+ if (canChangeFromNull) {
59
+ if (currentValue == null) {
60
+ parentObject[fieldName] = newValueCandidate;
61
+ return true;
62
+ }
63
+
64
+ // When `can_change_from_null: true` is enabled we also need to ignore NEW `null` values that we
65
+ // see _after_ a non-null value. This is necessary because an ElasticGraph invariant is that events
66
+ // can be processed in any order. So we might process an old event (predating the existence of the
67
+ // source field) after we've already set the field to a non-null value. We must always "converge"
68
+ // on the same indexed state regardless, of the order events are seen, so here we just ignore it.
69
+ if (newValueCandidate == null) {
70
+ return false;
71
+ }
72
+ }
73
+
74
+ // Otherwise, if the values differ, it means we are attempting to mutate the immutable value field, which we cannot allow.
75
+ if (currentValue != newValueCandidate) {
76
+ if (currentValue == null) {
77
+ scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + "). Set `can_change_from_null: true` on the `immutable_value` definition to allow this.");
78
+ } else {
79
+ scriptErrors.add("Field `" + fullPath + "` cannot be changed (" + currentValue + " => " + newValueCandidate + ").");
80
+ }
81
+ }
82
+
83
+ return false;
84
+ }
85
+
86
+ if (newValueCandidate == null && !nullable) {
87
+ scriptErrors.add("Field `" + fullPath + "` cannot be set to `null`, but the source event contains no value for it. Remove `nullable: false` from the `immutable_value` definition to allow this.");
88
+ return false;
89
+ }
90
+
91
+ parentObject[fieldName] = newValueCandidate;
92
+ return true;
93
+ }
94
+ EOS
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,62 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module SchemaDefinition
11
+ module Indexing
12
+ module DerivedFields
13
+ # Responsible for providing bits of the painless script specific to a {DerivedIndexedType#min_value} or
14
+ # {DerivedIndexedType#max_value} field.
15
+ #
16
+ # @api private
17
+ class MinOrMaxValue < ::Data.define(:destination_field, :source_field, :min_or_max)
18
+ # `Data.define` provides the following methods:
19
+ # @dynamic destination_field, source_field, min_or_max
20
+
21
+ # @return [String] a line of painless code to manage a min or max value field and return a boolean indicating if it was updated.
22
+ def apply_operation_returning_update_status
23
+ *parent_parts, field = destination_field.split(".")
24
+ parent_parts = ["ctx", "_source"] + parent_parts
25
+
26
+ %{#{min_or_max}Value_idempotentlyUpdateValue(data["#{source_field}"], #{parent_parts.join(".")}, "#{field}")}
27
+ end
28
+
29
+ # @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
30
+ def setup_statements
31
+ FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: :leave_unset)
32
+ end
33
+
34
+ # @return [Array<String>] painless functions required by a min or max value field.
35
+ def function_definitions
36
+ [MinOrMaxValue.function_def(min_or_max)]
37
+ end
38
+
39
+ # @param min_or_max [:min, :max] which type of function to generate.
40
+ # @return [String] painless function for managing a min or max field.
41
+ def self.function_def(min_or_max)
42
+ operator = (min_or_max == :min) ? "<" : ">"
43
+
44
+ <<~EOS
45
+ boolean #{min_or_max}Value_idempotentlyUpdateValue(List values, def parentObject, String fieldName) {
46
+ def currentFieldValue = parentObject[fieldName];
47
+ def #{min_or_max}NewValue = values.isEmpty() ? null : Collections.#{min_or_max}(values);
48
+
49
+ if (currentFieldValue == null || (#{min_or_max}NewValue != null && #{min_or_max}NewValue.compareTo(currentFieldValue) #{operator} 0)) {
50
+ parentObject[fieldName] = #{min_or_max}NewValue;
51
+ return true;
52
+ }
53
+
54
+ return false;
55
+ }
56
+ EOS
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,346 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/schema_artifacts/runtime_metadata/update_target"
10
+ require "elastic_graph/schema_definition/indexing/derived_fields/append_only_set"
11
+ require "elastic_graph/schema_definition/indexing/derived_fields/immutable_value"
12
+ require "elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value"
13
+ require "elastic_graph/schema_definition/scripting/script"
14
+
15
+ module ElasticGraph
16
+ module SchemaDefinition
17
+ module Indexing
18
+ # Used to configure the derivation of a derived indexed type from a source type.
19
+ # This type is yielded from {Mixins::HasIndices#derive_indexed_type_fields}.
20
+ #
21
+ # @example Derive a `Course` type from `StudentCourseEnrollment` events
22
+ # ElasticGraph.define_schema do |schema|
23
+ # # `StudentCourseEnrollment` is a directly indexed type.
24
+ # schema.object_type "StudentCourseEnrollment" do |t|
25
+ # t.field "id", "ID"
26
+ # t.field "courseId", "ID"
27
+ # t.field "courseName", "String"
28
+ # t.field "studentName", "String"
29
+ # t.field "courseStartDate", "Date"
30
+ #
31
+ # t.index "student_course_enrollments"
32
+ #
33
+ # # Here we define how the `Course` indexed type is derived when we index `StudentCourseEnrollment` events.
34
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
35
+ # # `derive` is an instance of `DerivedIndexedType`.
36
+ # derive.immutable_value "name", from: "courseName"
37
+ # derive.append_only_set "students", from: "studentName"
38
+ # derive.min_value "firstOfferedDate", from: "courseStartDate"
39
+ # derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
40
+ # end
41
+ # end
42
+ #
43
+ # # `Course` is an indexed type that is derived entirely from `StudentCourseEnrollment` events.
44
+ # schema.object_type "Course" do |t|
45
+ # t.field "id", "ID"
46
+ # t.field "name", "String"
47
+ # t.field "students", "[String!]!"
48
+ # t.field "firstOfferedDate", "Date"
49
+ # t.field "mostRecentlyOfferedDate", "Date"
50
+ #
51
+ # t.index "courses"
52
+ # end
53
+ # end
54
+ #
55
+ # @!attribute source_type
56
+ # @return [SchemaElements::ObjectType] the type used as a source for this derive type
57
+ # @!attribute destination_type_ref
58
+ # @private
59
+ # @!attribute id_source
60
+ # @return [String] path to field on the source type used as `id` on the derived type
61
+ # @!attribute routing_value_source
62
+ # @return [String, nil] path to field on the source type used for shard routing
63
+ # @!attribute rollover_timestamp_value_source
64
+ # @return [String, nil] path to field on the source type used as the timestamp field for rollover
65
+ # @!attribute fields
66
+ # @return [Array<DerivedFields::AppendOnlySet, DerivedFields::ImmutableValue, DerivedFields::MinOrMaxValue>] derived field definitions
67
+ class DerivedIndexedType < ::Struct.new(
68
+ :source_type,
69
+ :destination_type_ref,
70
+ :id_source,
71
+ :routing_value_source,
72
+ :rollover_timestamp_value_source,
73
+ :fields
74
+ )
75
+ # @param source_type [SchemaElements::ObjectType] the type used as a source for this derive type
76
+ # @param destination_type_ref [SchemaElements::TypeReference] the derived type
77
+ # @param id_source [String] path to field on the source type used as `id` on the derived type
78
+ # @param routing_value_source [String, nil] path to field on the source type used for shard routing
79
+ # @param rollover_timestamp_value_source [String, nil] path to field on the source type used as the timestamp field for rollover
80
+ # @yield [DerivedIndexedType] the `DerivedIndexedType` instance
81
+ # @api private
82
+ def initialize(
83
+ source_type:,
84
+ destination_type_ref:,
85
+ id_source:,
86
+ routing_value_source:,
87
+ rollover_timestamp_value_source:
88
+ )
89
+ fields = [] # : ::Array[_DerivedField]
90
+ super(
91
+ source_type: source_type,
92
+ destination_type_ref: destination_type_ref,
93
+ id_source: id_source,
94
+ routing_value_source: routing_value_source,
95
+ rollover_timestamp_value_source: rollover_timestamp_value_source,
96
+ fields: fields
97
+ )
98
+ yield self
99
+ end
100
+
101
+ # Configures `field_name` (on the derived indexing type) to contain the set union of all values from
102
+ # the `from` field on the source type. Values are only ever appended to the set, so the field will
103
+ # act as an append-only set.
104
+ #
105
+ # @param field_name [String] name of field on the derived indexing type to store the derived set
106
+ # @param from [String] path to field on the source type to source values from
107
+ # @return [DerivedIndexedType::AppendOnlySet]
108
+ #
109
+ # @example
110
+ # ElasticGraph.define_schema do |schema|
111
+ # schema.object_type "StudentCourseEnrollment" do |t|
112
+ # t.field "id", "ID"
113
+ # t.field "courseId", "ID"
114
+ # t.field "studentName", "String"
115
+ #
116
+ # t.index "student_course_enrollments"
117
+ #
118
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
119
+ # derive.append_only_set "students", from: "studentName"
120
+ # end
121
+ # end
122
+ #
123
+ # schema.object_type "Course" do |t|
124
+ # t.field "id", "ID"
125
+ # t.field "students", "[String!]!"
126
+ #
127
+ # t.index "courses"
128
+ # end
129
+ # end
130
+ def append_only_set(field_name, from:)
131
+ fields << DerivedFields::AppendOnlySet.new(field_name, from)
132
+ end
133
+
134
+ # Configures `field_name` (on the derived indexing type) to contain a single immutable value from the
135
+ # `from` field on the source type. Immutability is enforced by triggering an indexing failure with a
136
+ # clear error if any event's source value is different from the value already indexed on this field.
137
+ #
138
+ # @param field_name [String] name of field on the derived indexing type to store the derived value
139
+ # @param from [String] path to field on the source type to source values from
140
+ # @param nullable [Boolean] whether the field is allowed to be set to `null`. When set to false, events
141
+ # that contain a `null` value in the `from` field will be rejected instead of setting the field’s value
142
+ # to `null`.
143
+ # @param can_change_from_null [Boolean] whether a one-time mutation of the field value is allowed from
144
+ # `null` to a non-`null` value. This can be useful when dealing with a field that may not have a value
145
+ # on all source events. For example, if the source field was not initially part of the schema of your
146
+ # source dataset, you may have old records that lack a value for this field. When set, this option
147
+ # allows a one-time mutation of the field value from `null` to a non-`null` value. Once set to a
148
+ # non-`null` value, any additional `null` values that are encountered will be ignored (ensuring that
149
+ # the indexed data converges on the same state regardless of the order the events are ingested in).
150
+ # Note: this option cannot be enabled when `nullable: false` has been set.
151
+ # @return [DerivedFields::ImmutableValue]
152
+ #
153
+ # @example
154
+ # ElasticGraph.define_schema do |schema|
155
+ # schema.object_type "StudentCourseEnrollment" do |t|
156
+ # t.field "id", "ID"
157
+ # t.field "courseId", "ID"
158
+ # t.field "courseName", "String"
159
+ #
160
+ # t.index "student_course_enrollments"
161
+ #
162
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
163
+ # derive.immutable_value "name", from: "courseName"
164
+ # end
165
+ # end
166
+ #
167
+ # schema.object_type "Course" do |t|
168
+ # t.field "id", "ID"
169
+ # t.field "name", "String"
170
+ #
171
+ # t.index "courses"
172
+ # end
173
+ # end
174
+ def immutable_value(field_name, from:, nullable: true, can_change_from_null: false)
175
+ if !nullable && can_change_from_null
176
+ raise SchemaError, "`can_change_from_null: true` is not allowed with `nullable: false` (as there would be no `null` values to change from)."
177
+ end
178
+
179
+ fields << DerivedFields::ImmutableValue.new(
180
+ destination_field: field_name,
181
+ source_field: from,
182
+ nullable: nullable,
183
+ can_change_from_null: can_change_from_null
184
+ )
185
+ end
186
+
187
+ # Configures `field_name` (on the derived indexing type) to contain the minimum of all values from the `from`
188
+ # field on the source type.
189
+ #
190
+ # @param field_name [String] name of field on the derived indexing type to store the derived value
191
+ # @param from [String] path to field on the source type to source values from
192
+ # @return [DerivedIndexedType::MinOrMaxValue]
193
+ #
194
+ # @example
195
+ # ElasticGraph.define_schema do |schema|
196
+ # schema.object_type "StudentCourseEnrollment" do |t|
197
+ # t.field "id", "ID"
198
+ # t.field "courseId", "ID"
199
+ # t.field "courseStartDate", "Date"
200
+ #
201
+ # t.index "student_course_enrollments"
202
+ #
203
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
204
+ # derive.min_value "firstOfferedDate", from: "courseStartDate"
205
+ # end
206
+ # end
207
+ #
208
+ # schema.object_type "Course" do |t|
209
+ # t.field "id", "ID"
210
+ # t.field "firstOfferedDate", "Date"
211
+ #
212
+ # t.index "courses"
213
+ # end
214
+ # end
215
+ def min_value(field_name, from:)
216
+ fields << DerivedFields::MinOrMaxValue.new(field_name, from, :min)
217
+ end
218
+
219
+ # Configures `field_name` (on the derived indexing type) to contain the maximum of all values from the `from`
220
+ # field on the source type.
221
+ #
222
+ # @param field_name [String] name of field on the derived indexing type to store the derived value
223
+ # @param from [String] path to field on the source type to source values from
224
+ # @return [DerivedIndexedType::MinOrMaxValue]
225
+ #
226
+ # @example
227
+ # ElasticGraph.define_schema do |schema|
228
+ # schema.object_type "StudentCourseEnrollment" do |t|
229
+ # t.field "id", "ID"
230
+ # t.field "courseId", "ID"
231
+ # t.field "courseStartDate", "Date"
232
+ #
233
+ # t.index "student_course_enrollments"
234
+ #
235
+ # t.derive_indexed_type_fields "Course", from_id: "courseId" do |derive|
236
+ # derive.max_value "mostRecentlyOfferedDate", from: "courseStartDate"
237
+ # end
238
+ # end
239
+ #
240
+ # schema.object_type "Course" do |t|
241
+ # t.field "id", "ID"
242
+ # t.field "mostRecentlyOfferedDate", "Date"
243
+ #
244
+ # t.index "courses"
245
+ # end
246
+ # end
247
+ def max_value(field_name, from:)
248
+ fields << DerivedFields::MinOrMaxValue.new(field_name, from, :max)
249
+ end
250
+
251
+ # @return [Scripting::Script] Painless script that will maintain the derived fields
252
+ # @api private
253
+ def painless_script
254
+ Scripting::Script.new(
255
+ source: generate_script.strip,
256
+ name: "#{destination_type_ref}_from_#{source_type.name}",
257
+ language: "painless",
258
+ context: "update"
259
+ )
260
+ end
261
+
262
+ # @return [SchemaArtifacts::RuntimeMetadata::UpdateTarget] runtime metadata for the source type
263
+ # @api private
264
+ def runtime_metadata_for_source_type
265
+ SchemaArtifacts::RuntimeMetadata::UpdateTarget.new(
266
+ type: destination_type_ref.name,
267
+ relationship: nil,
268
+ script_id: painless_script.id,
269
+ id_source: id_source,
270
+ routing_value_source: routing_value_source,
271
+ rollover_timestamp_value_source: rollover_timestamp_value_source,
272
+ metadata_params: {},
273
+ data_params: fields.map(&:source_field).to_h do |f|
274
+ [f, SchemaArtifacts::RuntimeMetadata::DynamicParam.new(source_path: f, cardinality: :many)]
275
+ end
276
+ )
277
+ end
278
+
279
+ private
280
+
281
+ def generate_script
282
+ if fields.empty?
283
+ raise SchemaError, "`derive_indexed_type_fields` definition for #{destination_type_ref} (from #{source_type.name}) " \
284
+ "has no derived field definitions."
285
+ end
286
+
287
+ sorted_fields = fields.sort_by(&:destination_field)
288
+
289
+ # We use `uniq` here to avoid re-doing the same setup multiple times, since multiple fields can sometimes
290
+ # need the same setup (such as initializing a common parent field to an empty map).
291
+ function_defs = sorted_fields.flat_map(&:function_definitions).uniq.map(&:strip).sort
292
+
293
+ setup_statements = [STATIC_SETUP_STATEMENTS] + sorted_fields.flat_map(&:setup_statements).uniq.map(&:strip)
294
+
295
+ apply_update_statements = sorted_fields.map { |f| apply_update_statement(f).strip }
296
+
297
+ # Note: comments in the script are effectively "free" since:
298
+ #
299
+ # - The compiler will strip them out.
300
+ # - We only send the script to the datastore once (when configuring the cluster), and later
301
+ # reference it only by id--so we don't pay for the larger payload on each indexing request.
302
+ <<~EOS
303
+ #{function_defs.join("\n\n")}
304
+
305
+ #{setup_statements.join("\n")}
306
+
307
+ #{apply_update_statements.join("\n")}
308
+
309
+ if (!#{SCRIPT_ERRORS_VAR}.isEmpty()) {
310
+ throw new IllegalArgumentException("#{DERIVED_INDEX_FAILURE_MESSAGE_PREAMBLE}: " + #{SCRIPT_ERRORS_VAR}.join(" "));
311
+ }
312
+
313
+ // For records with no new values to index, only skip the update if the document itself doesn't already exist.
314
+ // Otherwise create an (empty) document to reflect the fact that the id has been seen.
315
+ if (ctx._source.id != null && #{sorted_fields.map { |f| was_noop_variable(f) }.join(" && ")}) {
316
+ ctx.op = 'none';
317
+ } else {
318
+ // Here we set `_source.id` because if we don't, it'll never be set, making these docs subtly
319
+ // different from docs indexed the normal way.
320
+ //
321
+ // Note also that we MUST use `params.id` instead of `ctx._id`. The latter works on an update
322
+ // of an existing document, but is unavailable when we are inserting the document for the first time.
323
+ ctx._source.id = params.id;
324
+ }
325
+ EOS
326
+ end
327
+
328
+ def apply_update_statement(field)
329
+ "boolean #{was_noop_variable(field)} = !#{field.apply_operation_returning_update_status};"
330
+ end
331
+
332
+ def was_noop_variable(field)
333
+ "#{field.destination_field.gsub(".", "__")}_was_noop"
334
+ end
335
+
336
+ SCRIPT_ERRORS_VAR = "scriptErrors"
337
+
338
+ STATIC_SETUP_STATEMENTS = <<~EOS.strip
339
+ Map data = params.data;
340
+ // A variable to accumulate script errors so that we can surface _all_ issues and not just the first.
341
+ List #{SCRIPT_ERRORS_VAR} = new ArrayList();
342
+ EOS
343
+ end
344
+ end
345
+ end
346
+ end
@@ -0,0 +1,74 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+
11
+ module ElasticGraph
12
+ module SchemaDefinition
13
+ module Indexing
14
+ # Contains logic related to "event envelope"--the layer of metadata that wraps all indexing events.
15
+ #
16
+ # @api private
17
+ module EventEnvelope
18
+ # @param indexed_type_names [Array<String>] names of the indexed types
19
+ # @param json_schema_version [Integer] the version of the JSON schema
20
+ # @return [Hash<String, Object>] the JSON schema for the ElasticGraph event envelope for the given `indexed_type_names`.
21
+ def self.json_schema(indexed_type_names, json_schema_version)
22
+ {
23
+ "type" => "object",
24
+ "properties" => {
25
+ "op" => {
26
+ "type" => "string",
27
+ "enum" => %w[upsert]
28
+ },
29
+ "type" => {
30
+ "type" => "string",
31
+ # Sorting doesn't really matter here, but it's nice for the output in the schema artifact to be consistent.
32
+ "enum" => indexed_type_names.sort
33
+ },
34
+ "id" => {
35
+ "type" => "string",
36
+ "maxLength" => DEFAULT_MAX_KEYWORD_LENGTH
37
+ },
38
+ "version" => {
39
+ "type" => "integer",
40
+ "minimum" => 0,
41
+ "maximum" => (2**63) - 1
42
+ },
43
+ "record" => {
44
+ "type" => "object"
45
+ },
46
+ "latency_timestamps" => {
47
+ "type" => "object",
48
+ "additionalProperties" => false,
49
+ "patternProperties" => {
50
+ "^\\w+_at$" => {"type" => "string", "format" => "date-time"}
51
+ }
52
+ },
53
+ JSON_SCHEMA_VERSION_KEY => {
54
+ "const" => json_schema_version
55
+ },
56
+ "message_id" => {
57
+ "type" => "string",
58
+ "description" => "The optional ID of the message containing this event from whatever messaging system is being used between the publisher and the ElasticGraph indexer."
59
+ }
60
+ },
61
+ "additionalProperties" => false,
62
+ "required" => ["op", "type", "id", "version", JSON_SCHEMA_VERSION_KEY],
63
+ "if" => {
64
+ "properties" => {
65
+ "op" => {"const" => "upsert"}
66
+ }
67
+ },
68
+ "then" => {"required" => ["record"]}
69
+ }
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end