elasticgraph-schema_definition 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +7 -0
  4. data/elasticgraph-schema_definition.gemspec +26 -0
  5. data/lib/elastic_graph/schema_definition/api.rb +359 -0
  6. data/lib/elastic_graph/schema_definition/factory.rb +506 -0
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
  9. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
  10. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
  11. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
  12. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
  13. data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
  14. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
  17. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
  18. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
  19. data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
  20. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
  21. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
  22. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
  23. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
  24. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
  25. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
  26. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
  27. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
  28. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
  29. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
  30. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
  31. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
  32. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
  33. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
  34. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
  35. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
  36. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
  37. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
  38. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
  39. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
  40. data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
  41. data/lib/elastic_graph/schema_definition/results.rb +404 -0
  42. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
  43. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
  44. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
  45. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
  46. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
  48. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
  49. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
  50. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
  51. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
  52. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
  53. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
  54. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
  55. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
  56. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
  57. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
  58. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
  59. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
  60. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
  61. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
  62. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
  63. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
  65. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
  66. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
  67. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
  68. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
  69. data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
  70. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
  71. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
  72. data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
  73. data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
  74. data/lib/elastic_graph/schema_definition/state.rb +212 -0
  75. data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
  76. metadata +513 -0
@@ -0,0 +1,157 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/schema_definition/indexing/index"
10
+ require "elastic_graph/schema_definition/mixins/can_be_graphql_only"
11
+ require "elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations"
12
+ require "elastic_graph/schema_definition/mixins/has_directives"
13
+ require "elastic_graph/schema_definition/mixins/has_documentation"
14
+ require "elastic_graph/schema_definition/mixins/has_indices"
15
+ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
16
+ require "elastic_graph/schema_definition/mixins/has_subtypes"
17
+ require "elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation"
18
+ require "elastic_graph/schema_definition/mixins/verifies_graphql_name"
19
+
20
+ module ElasticGraph
21
+ module SchemaDefinition
22
+ module SchemaElements
23
+ # {include:API#union_type}
24
+ #
25
+ # @example Define a union type
26
+ # ElasticGraph.define_schema do |schema|
27
+ # schema.object_type "Card" do |t|
28
+ # # ...
29
+ # end
30
+ #
31
+ # schema.object_type "BankAccount" do |t|
32
+ # # ...
33
+ # end
34
+ #
35
+ # schema.object_type "BitcoinWallet" do |t|
36
+ # # ...
37
+ # end
38
+ #
39
+ # schema.union_type "FundingSource" do |t|
40
+ # t.subtype "Card"
41
+ # t.subtypes "BankAccount", "BitcoinWallet"
42
+ # end
43
+ # end
44
+ #
45
+ # @!attribute [r] schema_def_state
46
+ # @return [State] state of the schema
47
+ # @!attribute [rw] type_ref
48
+ # @private
49
+ # @!attribute [rw] subtype_refs
50
+ # @private
51
+ class UnionType < Struct.new(:schema_def_state, :type_ref, :subtype_refs)
52
+ prepend Mixins::VerifiesGraphQLName
53
+ include Mixins::CanBeGraphQLOnly
54
+ include Mixins::HasDocumentation
55
+ include Mixins::HasDirectives
56
+ include Mixins::SupportsFilteringAndAggregation
57
+ include Mixins::HasIndices
58
+ include Mixins::HasSubtypes
59
+ include Mixins::HasDerivedGraphQLTypeCustomizations
60
+ include Mixins::HasReadableToSAndInspect.new { |t| t.name }
61
+
62
+ # @private
63
+ def initialize(schema_def_state, name)
64
+ super(schema_def_state, schema_def_state.type_ref(name).to_final_form, Set.new) do
65
+ yield self
66
+ end
67
+ end
68
+
69
+ # @return [String] the name of the union type
70
+ def name
71
+ type_ref.name
72
+ end
73
+
74
+ # Defines a subtype of this union type.
75
+ #
76
+ # @param name [String] the name of an object type which is a member of this union type
77
+ # @return [void]
78
+ #
79
+ # @example
80
+ # ElasticGraph.define_schema do |schema|
81
+ # schema.object_type "Card" do |t|
82
+ # # ...
83
+ # end
84
+ #
85
+ # schema.union_type "FundingSource" do |t|
86
+ # t.subtype "Card"
87
+ # end
88
+ # end
89
+ def subtype(name)
90
+ type_ref = schema_def_state.type_ref(name.to_s).to_final_form
91
+
92
+ if subtype_refs.include?(type_ref)
93
+ raise SchemaError, "Duplicate subtype on UnionType #{self.name}: #{name}"
94
+ end
95
+
96
+ subtype_refs << type_ref
97
+ end
98
+
99
+ # Defines multiple subtypes of this union type.
100
+ #
101
+ # @param names [Array<String>] names of object types which are members of this union type
102
+ # @return [void]
103
+ #
104
+ # @example Define a union type
105
+ # ElasticGraph.define_schema do |schema|
106
+ # schema.object_type "BankAccount" do |t|
107
+ # # ...
108
+ # end
109
+ #
110
+ # schema.object_type "BitcoinWallet" do |t|
111
+ # # ...
112
+ # end
113
+ #
114
+ # schema.union_type "FundingSource" do |t|
115
+ # t.subtypes "BankAccount", "BitcoinWallet"
116
+ # end
117
+ # end
118
+ def subtypes(*names)
119
+ names.flatten.each { |n| subtype(n) }
120
+ end
121
+
122
+ # @return [String] the formatted GraphQL SDL of the union type
123
+ def to_sdl
124
+ if subtype_refs.empty?
125
+ raise SchemaError, "UnionType type #{name} has no subtypes, but must have at least one."
126
+ end
127
+
128
+ "#{formatted_documentation}union #{name} #{directives_sdl(suffix_with: " ")}= #{subtype_refs.map(&:name).to_a.join(" | ")}"
129
+ end
130
+
131
+ # @private
132
+ def verify_graphql_correctness!
133
+ # Nothing to verify. `verify_graphql_correctness!` will be called on each subtype automatically.
134
+ end
135
+
136
+ # Various things check `mapping_options` on indexed types (usually object types, but can also happen on union types).
137
+ # We need to implement `mapping_options` here to satisfy those method calls, but we will never use custom mapping on
138
+ # a union type so we hardcode it to return nil.
139
+ #
140
+ # @private
141
+ def mapping_options
142
+ {}
143
+ end
144
+
145
+ private
146
+
147
+ def resolve_subtypes
148
+ subtype_refs.map do |ref|
149
+ ref.as_object_type || raise(
150
+ SchemaError, "The subtype `#{ref}` of the UnionType `#{name}` is not a defined object type."
151
+ )
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,77 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/schema_definition/scripting/script"
11
+ require "elastic_graph/support/memoizable_data"
12
+ require "pathname"
13
+
14
+ module ElasticGraph
15
+ module SchemaDefinition
16
+ # @private
17
+ module Scripting
18
+ # A simple abstraction that supports loading static scripts off of disk. The given directory
19
+ # is expected to have a sub-directory per script context, with individual scripts under the
20
+ # context sub-directories. The language is inferred from the script file extensions.
21
+ #
22
+ # @private
23
+ class FileSystemRepository < Support::MemoizableData.define(:dir)
24
+ # Based on https://www.elastic.co/guide/en/elasticsearch/reference/8.5/modules-scripting.html
25
+ SUPPORTED_LANGUAGES_BY_EXTENSION = {
26
+ ".painless" => "painless",
27
+ ".expression" => "expression",
28
+ ".mustache" => "mustache",
29
+ ".java" => "java"
30
+ }
31
+
32
+ # The `Script` objects available in this file system repository.
33
+ def scripts
34
+ @scripts ||= ::Pathname.new(dir).children.sort.flat_map do |context_dir|
35
+ unless context_dir.directory?
36
+ raise InvalidScriptDirectoryError, "`#{dir}` has a file (#{context_dir}) that is not a context directory as expected."
37
+ end
38
+
39
+ context_dir.children.sort.map do |script_file|
40
+ unless script_file.file?
41
+ raise InvalidScriptDirectoryError, "`#{dir}` has extra directory nesting (#{script_file}) that is unexpected."
42
+ end
43
+
44
+ language = SUPPORTED_LANGUAGES_BY_EXTENSION[script_file.extname] || raise(
45
+ InvalidScriptDirectoryError, "`#{dir}` has a file (`#{script_file}`) that has an unrecognized file extension: #{script_file.extname}."
46
+ )
47
+
48
+ Script.new(
49
+ name: script_file.basename.sub_ext("").to_s,
50
+ source: script_file.read.strip,
51
+ language: language,
52
+ context: context_dir.basename.to_s
53
+ )
54
+ end
55
+ end.tap { |all_scripts| verify_no_duplicates!(all_scripts) }
56
+ end
57
+
58
+ # Map of script ids keyed by the `scoped_name` to allow easy lookup of the ids.
59
+ def script_ids_by_scoped_name
60
+ @script_ids_by_scoped_name ||= scripts.to_h { |s| [s.scoped_name, s.id] }
61
+ end
62
+
63
+ private
64
+
65
+ def verify_no_duplicates!(scripts)
66
+ duplicate_scoped_names = scripts.group_by(&:scoped_name).select do |scoped_name, scripts_with_scoped_name|
67
+ scripts_with_scoped_name.size > 1
68
+ end.keys
69
+
70
+ if duplicate_scoped_names.any?
71
+ raise InvalidScriptDirectoryError, "`#{dir}` has multiple scripts with the same scoped name, which is not allowed: #{duplicate_scoped_names.join(", ")}."
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,48 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "digest/md5"
10
+ require "elastic_graph/support/memoizable_data"
11
+
12
+ module ElasticGraph
13
+ module SchemaDefinition
14
+ module Scripting
15
+ # @private
16
+ class Script < Support::MemoizableData.define(:name, :source, :language, :context)
17
+ # The id we use when storing the script in the datastore. The id is based partially on a hash of
18
+ # the source code to make script safely evolveable: when the source code of a script changes, its
19
+ # id changes, and the old and new versions continue to be accessible in the datastore, allowing
20
+ # old and new versions of the deployed ElasticGraph application to be running at the same time
21
+ # (as happens during a zero-downtime rolled-out deploy). Scripts are invoked by their id, so we
22
+ # can trust that when the code tries to use a specific version of a script, it'll definitely use
23
+ # that version.
24
+ def id
25
+ @id ||= "#{context}_#{name}_#{::Digest::MD5.hexdigest(source)}"
26
+ end
27
+
28
+ # The `name` scoped with the `context`. Due to how we structure static scripts on
29
+ # the file system (nested under a directory that names the `context`), a given `name`
30
+ # is only guaranteed to be unique within the scope of a given `context`. The `scoped_name`
31
+ # is how we will refer to a script from elsewhere in the code when we want to use it.
32
+ def scoped_name
33
+ @scoped_name ||= "#{context}/#{name}"
34
+ end
35
+
36
+ def to_artifact_payload
37
+ {
38
+ "context" => context,
39
+ "script" => {
40
+ "lang" => language,
41
+ "source" => source
42
+ }
43
+ }
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,24 @@
1
+ // Check if required params are missing
2
+ if (params.offset_ms == null) {
3
+ throw new IllegalArgumentException("Missing required parameter: offset_ms");
4
+ }
5
+ if (params.time_zone == null) {
6
+ throw new IllegalArgumentException("Missing required parameter: time_zone");
7
+ }
8
+
9
+ // Set variables used in the loop
10
+ ZoneId zoneId = ZoneId.of(params.time_zone);
11
+ List results = new ArrayList();
12
+
13
+ for (ZonedDateTime timestamp : doc[params.field]) {
14
+ // Convert the timestamp to the specified time zone
15
+ ZonedDateTime zonedTimestamp = timestamp.withZoneSameInstant(zoneId);
16
+
17
+ // Adjust the timestamp based on the offset_ms parameter
18
+ ZonedDateTime adjustedTimestamp = zonedTimestamp.plus(params.offset_ms, ChronoUnit.MILLIS);
19
+
20
+ // Format and add the result to the list
21
+ results.add(adjustedTimestamp.getDayOfWeek().name());
22
+ }
23
+
24
+ return results;
@@ -0,0 +1,41 @@
1
+ // Check if required params are missing
2
+ if (params.offset_ms == null) {
3
+ throw new IllegalArgumentException("Missing required parameter: offset_ms");
4
+ }
5
+ if (params.time_zone == null) {
6
+ throw new IllegalArgumentException("Missing required parameter: time_zone");
7
+ }
8
+ if (params.interval == null) {
9
+ throw new IllegalArgumentException("Missing required parameter: interval");
10
+ }
11
+
12
+ // Set variables used in the loop
13
+ ZoneId zoneId = ZoneId.of(params.time_zone);
14
+ ChronoUnit intervalUnit;
15
+ if (params.interval == "hour") {
16
+ intervalUnit = ChronoUnit.HOURS;
17
+ } else if (params.interval == "minute") {
18
+ intervalUnit = ChronoUnit.MINUTES;
19
+ } else if (params.interval == "second") {
20
+ intervalUnit = ChronoUnit.SECONDS;
21
+ } else {
22
+ throw new IllegalArgumentException("Invalid interval value: " + params.interval);
23
+ }
24
+ DateTimeFormatter formatter = DateTimeFormatter.ISO_LOCAL_TIME;
25
+ List results = new ArrayList();
26
+
27
+ for (ZonedDateTime timestamp : doc[params.field]) {
28
+ // Convert the timestamp to the specified time zone
29
+ ZonedDateTime zonedTimestamp = timestamp.withZoneSameInstant(zoneId);
30
+
31
+ // Adjust the timestamp based on the offset_ms parameter
32
+ ZonedDateTime adjustedTimestamp = zonedTimestamp.plus(params.offset_ms, ChronoUnit.MILLIS);
33
+
34
+ // Truncate the timestamp to the specified interval
35
+ adjustedTimestamp = adjustedTimestamp.truncatedTo(intervalUnit);
36
+
37
+ // Format and add the result to the list
38
+ results.add(adjustedTimestamp.format(formatter));
39
+ }
40
+
41
+ return results;
@@ -0,0 +1,22 @@
1
+ ZoneId zoneId = ZoneId.of(params.time_zone);
2
+
3
+ for (ZonedDateTime timestamp : doc[params.field]) {
4
+ long docValue = timestamp
5
+ .withZoneSameInstant(zoneId)
6
+ .toLocalTime()
7
+ .toNanoOfDay();
8
+
9
+ // Perform comparisons based on whichever params are set.
10
+ // ElasticGraph takes care of passing us param values as nano-of-day so that we
11
+ // can directly and efficiently compare against `docValue`.
12
+ if ((params.gte == null || docValue >= params.gte) &&
13
+ (params.gt == null || docValue > params.gt) &&
14
+ (params.lte == null || docValue <= params.lte) &&
15
+ (params.lt == null || docValue < params.lt) &&
16
+ (params.equal_to_any_of == null || params.equal_to_any_of.contains(docValue))) {
17
+ return true;
18
+ }
19
+ }
20
+
21
+ // No timestamp values matched the params, so return `false`.
22
+ return false;
@@ -0,0 +1,93 @@
1
+ Map source = ctx._source;
2
+ String sourceId = params.sourceId;
3
+ String relationship = params.relationship;
4
+
5
+ // Numbers in JSON appear to be parsed as doubles, but we want the version stored as a long, so we need to cast it here.
6
+ long eventVersion = (long) params.version;
7
+
8
+ if (source.__sources == null) {
9
+ source.__sources = [];
10
+ }
11
+
12
+ if (source.__versions == null) {
13
+ source.__versions = [:];
14
+ }
15
+
16
+ if (source.__versions[relationship] == null) {
17
+ source.__versions[relationship] = [:];
18
+ }
19
+
20
+ Map relationshipVersionsMap = source.__versions.get(relationship);
21
+ List previousSourceIdsForRelationship = relationshipVersionsMap.keySet().stream().filter(id -> id != sourceId).collect(Collectors.toList());
22
+
23
+ if (previousSourceIdsForRelationship.size() > 0) {
24
+ String previousIdDescription = previousSourceIdsForRelationship.size() == 1 ? previousSourceIdsForRelationship.get(0) : previousSourceIdsForRelationship.toString();
25
+ throw new IllegalArgumentException(
26
+ "Cannot update document " + params.id + " " +
27
+ "with data from related " + relationship + " " + sourceId + " " +
28
+ "because the related " + relationship + " has apparently changed (was: " + previousSourceIdsForRelationship + "), " +
29
+ "but mutations of relationships used with `sourced_from` are not supported because " +
30
+ "allowing it could break ElasticGraph's out-of-order processing guarantees."
31
+ );
32
+ }
33
+
34
+ // While the version in `__versions` is going to be used for the doc version in the future, for now
35
+ // we need to continue getting it from `__sourceVersions`. Both our old version and this versions of this
36
+ // script keep the value in `__sourceVersions` up-to-date, whereas the old script only writes it to
37
+ // `__sourceVersions`. Until we have completely migrated off of the old script for all ElasticGraph
38
+ // clusters, we need to keep using it.
39
+ //
40
+ // Later, after the old script is no longer used by any clusters, we'll stop using `__sourceVersions`.
41
+ // TODO: switch to `__versions` when we no longer need to maintain compatibility with the old version of the script.
42
+ Number _versionForSourceType = source.get("__sourceVersions")?.get(params.sourceType)?.get(sourceId);
43
+ Number _versionForRelationship = relationshipVersionsMap.get(sourceId);
44
+
45
+ // Our JSON schema requires event versions to be non-negative, so we can safely use Long.MIN_VALUE as a stand-in when the value is null.
46
+ long versionForSourceType = _versionForSourceType == null ? Long.MIN_VALUE : _versionForSourceType.longValue();
47
+ long versionForRelationship = _versionForRelationship == null ? Long.MIN_VALUE : _versionForRelationship.longValue();
48
+
49
+ // Pick the larger of the two versions as our doc version. Note that `Math.max` didn't work for me here for
50
+ // reasons I don't understand, but a simple ternary works fine.
51
+ //
52
+ // In theory, we could just use `versionForSourceType` as the `docVersion` (and not even check `__versions` at all)
53
+ // since both the old version and this version maintain the doc version in `__sourceVersions`. However, that would
54
+ // prevent this version of the script from being forward-compatible with the planned next version of this script.
55
+ // In the next version, we plan to stop writing to `__sourceVersions`, and as we can't deploy that change atomically,
56
+ // this version of the script will continue to run after that has begun to be used. So this version of the script
57
+ // must consider which version is greater here, and not simply trust either version value.
58
+ long docVersion = versionForSourceType > versionForRelationship ? versionForSourceType : versionForRelationship;
59
+
60
+ if (docVersion >= eventVersion) {
61
+ throw new IllegalArgumentException("ElasticGraph update was a no-op: [" +
62
+ params.id + "]: version conflict, current version [" +
63
+ docVersion + "] is higher or equal to the one provided [" +
64
+ eventVersion + "]");
65
+ } else {
66
+ source.putAll(params.data);
67
+ Map __counts = params.__counts;
68
+
69
+ if (__counts != null) {
70
+ if (source.__counts == null) {
71
+ source.__counts = [:];
72
+ }
73
+
74
+ source.__counts.putAll(__counts);
75
+ }
76
+
77
+ source.id = params.id;
78
+ source.__versions[relationship][sourceId] = eventVersion;
79
+
80
+ // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list.
81
+ // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time.
82
+ //
83
+ // As per https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#binarySearch(java.util.List,java.lang.Object):
84
+ //
85
+ // > Returns the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1).
86
+ // > The insertion point is defined as the point at which the key would be inserted into the list: the index
87
+ // > of the first element greater than the key, or list.size() if all elements in the list are less than the
88
+ // > specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
89
+ int sourceBinarySearchResult = Collections.binarySearch(source.__sources, relationship);
90
+ if (sourceBinarySearchResult < 0) {
91
+ source.__sources.add(-sourceBinarySearchResult - 1, relationship);
92
+ }
93
+ }
@@ -0,0 +1,212 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/error"
11
+ require "elastic_graph/schema_definition/factory"
12
+ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
13
+ require "elastic_graph/schema_definition/schema_elements/enum_value_namer"
14
+ require "elastic_graph/schema_definition/schema_elements/type_namer"
15
+ require "elastic_graph/schema_definition/schema_elements/sub_aggregation_path"
16
+
17
+ module ElasticGraph
18
+ module SchemaDefinition
19
+ # Encapsulates all state that needs to be managed while a schema is defined.
20
+ # This is separated from `API` to make it easy to expose some state management
21
+ # helper methods to our internal code without needing to expose it as part of
22
+ # the public API.
23
+ #
24
+ # @private
25
+ class State < Struct.new(
26
+ :api,
27
+ :schema_elements,
28
+ :index_document_sizes,
29
+ :types_by_name,
30
+ :object_types_by_name,
31
+ :scalar_types_by_name,
32
+ :enum_types_by_name,
33
+ :implementations_by_interface_ref,
34
+ :sdl_parts,
35
+ :paginated_collection_element_types,
36
+ :user_defined_fields,
37
+ :renamed_types_by_old_name,
38
+ :deleted_types_by_old_name,
39
+ :renamed_fields_by_type_name_and_old_field_name,
40
+ :deleted_fields_by_type_name_and_old_field_name,
41
+ :json_schema_version,
42
+ :json_schema_version_setter_location,
43
+ :graphql_extension_modules,
44
+ :initially_registered_built_in_types,
45
+ :built_in_types_customization_blocks,
46
+ :user_definition_complete,
47
+ :sub_aggregation_paths_by_type,
48
+ :type_refs_by_name,
49
+ :output,
50
+ :type_namer,
51
+ :enum_value_namer
52
+ )
53
+ include Mixins::HasReadableToSAndInspect.new
54
+
55
+ def self.with(
56
+ api:,
57
+ schema_elements:,
58
+ index_document_sizes:,
59
+ derived_type_name_formats:,
60
+ type_name_overrides:,
61
+ enum_value_overrides_by_type:,
62
+ output: $stdout
63
+ )
64
+ # @type var types_by_name: SchemaElements::typesByNameHash
65
+ types_by_name = {}
66
+
67
+ new(
68
+ api: api,
69
+ schema_elements: schema_elements,
70
+ index_document_sizes: index_document_sizes,
71
+ types_by_name: types_by_name,
72
+ object_types_by_name: {},
73
+ scalar_types_by_name: {},
74
+ enum_types_by_name: {},
75
+ implementations_by_interface_ref: ::Hash.new { |h, k| h[k] = ::Set.new },
76
+ sdl_parts: [],
77
+ paginated_collection_element_types: ::Set.new,
78
+ user_defined_fields: ::Set.new,
79
+ renamed_types_by_old_name: {},
80
+ deleted_types_by_old_name: {},
81
+ renamed_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} },
82
+ deleted_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} },
83
+ json_schema_version_setter_location: nil,
84
+ json_schema_version: nil,
85
+ graphql_extension_modules: [],
86
+ initially_registered_built_in_types: ::Set.new,
87
+ built_in_types_customization_blocks: [],
88
+ user_definition_complete: false,
89
+ sub_aggregation_paths_by_type: {},
90
+ type_refs_by_name: {},
91
+ type_namer: SchemaElements::TypeNamer.new(
92
+ format_overrides: derived_type_name_formats,
93
+ name_overrides: type_name_overrides
94
+ ),
95
+ enum_value_namer: SchemaElements::EnumValueNamer.new(enum_value_overrides_by_type),
96
+ output: output
97
+ )
98
+ end
99
+
100
+ # @dynamic index_document_sizes?
101
+ alias_method :index_document_sizes?, :index_document_sizes
102
+
103
+ def type_ref(name)
104
+ # Type references are immutable and can be safely cached. Here we cache them because we've observed
105
+ # it having a noticeable impact on our test suite runtime.
106
+ type_refs_by_name[name] ||= factory.new_type_reference(name)
107
+ end
108
+
109
+ def register_object_interface_or_union_type(type)
110
+ register_type(type, object_types_by_name)
111
+ end
112
+
113
+ def register_enum_type(type)
114
+ register_type(type, enum_types_by_name)
115
+ end
116
+
117
+ def register_scalar_type(type)
118
+ register_type(type, scalar_types_by_name)
119
+ end
120
+
121
+ def register_input_type(type)
122
+ register_type(type)
123
+ end
124
+
125
+ def register_renamed_type(type_name, from:, defined_at:, defined_via:)
126
+ renamed_types_by_old_name[from] = factory.new_deprecated_element(
127
+ type_name,
128
+ defined_at: defined_at,
129
+ defined_via: defined_via
130
+ )
131
+ end
132
+
133
+ def register_deleted_type(type_name, defined_at:, defined_via:)
134
+ deleted_types_by_old_name[type_name] = factory.new_deprecated_element(
135
+ type_name,
136
+ defined_at: defined_at,
137
+ defined_via: defined_via
138
+ )
139
+ end
140
+
141
+ def register_renamed_field(type_name, from:, to:, defined_at:, defined_via:)
142
+ renamed_fields_by_type_name_and_old_field_name[type_name][from] = factory.new_deprecated_element(
143
+ to,
144
+ defined_at: defined_at,
145
+ defined_via: defined_via
146
+ )
147
+ end
148
+
149
+ def register_deleted_field(type_name, field_name, defined_at:, defined_via:)
150
+ deleted_fields_by_type_name_and_old_field_name[type_name][field_name] = factory.new_deprecated_element(
151
+ field_name,
152
+ defined_at: defined_at,
153
+ defined_via: defined_via
154
+ )
155
+ end
156
+
157
+ # Registers the given `field` as a user-defined field, unless the user definitions are complete.
158
+ def register_user_defined_field(field)
159
+ user_defined_fields << field
160
+ end
161
+
162
+ def user_defined_field_references_by_type_name
163
+ @user_defined_field_references_by_type_name ||= begin
164
+ unless user_definition_complete
165
+ raise SchemaError, "Cannot access `user_defined_field_references_by_type_name` until the schema definition is complete."
166
+ end
167
+
168
+ @user_defined_field_references_by_type_name ||= user_defined_fields
169
+ .group_by { |f| f.type.fully_unwrapped.name }
170
+ end
171
+ end
172
+
173
+ def factory
174
+ @factory ||= Factory.new(self)
175
+ end
176
+
177
+ def enums_for_indexed_types
178
+ @enums_for_indexed_types ||= factory.new_enums_for_indexed_types
179
+ end
180
+
181
+ def sub_aggregation_paths_for(type)
182
+ sub_aggregation_paths_by_type.fetch(type) do
183
+ SchemaElements::SubAggregationPath.paths_for(type, schema_def_state: self).uniq.tap do |paths|
184
+ # Cache our results if the user has finished their schema definition. Otherwise, it's not safe to cache.
185
+ # :nocov: -- we never execute this with `user_definition_complete == false`
186
+ sub_aggregation_paths_by_type[type] = paths if user_definition_complete
187
+ # :nocov:
188
+ end
189
+ end
190
+ end
191
+
192
+ private
193
+
194
+ RESERVED_TYPE_NAMES = [EVENT_ENVELOPE_JSON_SCHEMA_NAME].to_set
195
+
196
+ def register_type(type, additional_type_index = nil)
197
+ name = (_ = type).name
198
+
199
+ if RESERVED_TYPE_NAMES.include?(name)
200
+ raise SchemaError, "`#{name}` cannot be used as a schema type because it is a reserved name."
201
+ end
202
+
203
+ if types_by_name.key?(name)
204
+ raise SchemaError, "Duplicate definition for type #{name} detected. Each type can only be defined once."
205
+ end
206
+
207
+ additional_type_index[name] = type if additional_type_index
208
+ types_by_name[name] = type
209
+ end
210
+ end
211
+ end
212
+ end