elasticgraph-schema_definition 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +7 -0
  4. data/elasticgraph-schema_definition.gemspec +26 -0
  5. data/lib/elastic_graph/schema_definition/api.rb +359 -0
  6. data/lib/elastic_graph/schema_definition/factory.rb +506 -0
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
  9. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
  10. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
  11. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
  12. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
  13. data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
  14. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
  17. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
  18. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
  19. data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
  20. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
  21. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
  22. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
  23. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
  24. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
  25. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
  26. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
  27. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
  28. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
  29. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
  30. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
  31. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
  32. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
  33. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
  34. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
  35. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
  36. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
  37. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
  38. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
  39. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
  40. data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
  41. data/lib/elastic_graph/schema_definition/results.rb +404 -0
  42. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
  43. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
  44. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
  45. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
  46. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
  48. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
  49. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
  50. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
  51. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
  52. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
  53. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
  54. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
  55. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
  56. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
  57. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
  58. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
  59. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
  60. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
  61. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
  62. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
  63. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
  65. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
  66. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
  67. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
  68. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
  69. data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
  70. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
  71. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
  72. data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
  73. data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
  74. data/lib/elastic_graph/schema_definition/state.rb +212 -0
  75. data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
  76. metadata +513 -0
@@ -0,0 +1,157 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/schema_definition/indexing/index"
10
+ require "elastic_graph/schema_definition/mixins/can_be_graphql_only"
11
+ require "elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations"
12
+ require "elastic_graph/schema_definition/mixins/has_directives"
13
+ require "elastic_graph/schema_definition/mixins/has_documentation"
14
+ require "elastic_graph/schema_definition/mixins/has_indices"
15
+ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
16
+ require "elastic_graph/schema_definition/mixins/has_subtypes"
17
+ require "elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation"
18
+ require "elastic_graph/schema_definition/mixins/verifies_graphql_name"
19
+
20
+ module ElasticGraph
21
+ module SchemaDefinition
22
+ module SchemaElements
23
+ # {include:API#union_type}
24
+ #
25
+ # @example Define a union type
26
+ # ElasticGraph.define_schema do |schema|
27
+ # schema.object_type "Card" do |t|
28
+ # # ...
29
+ # end
30
+ #
31
+ # schema.object_type "BankAccount" do |t|
32
+ # # ...
33
+ # end
34
+ #
35
+ # schema.object_type "BitcoinWallet" do |t|
36
+ # # ...
37
+ # end
38
+ #
39
+ # schema.union_type "FundingSource" do |t|
40
+ # t.subtype "Card"
41
+ # t.subtypes "BankAccount", "BitcoinWallet"
42
+ # end
43
+ # end
44
+ #
45
+ # @!attribute [r] schema_def_state
46
+ # @return [State] state of the schema
47
+ # @!attribute [rw] type_ref
48
+ # @private
49
+ # @!attribute [rw] subtype_refs
50
+ # @private
51
+ class UnionType < Struct.new(:schema_def_state, :type_ref, :subtype_refs)
52
+ prepend Mixins::VerifiesGraphQLName
53
+ include Mixins::CanBeGraphQLOnly
54
+ include Mixins::HasDocumentation
55
+ include Mixins::HasDirectives
56
+ include Mixins::SupportsFilteringAndAggregation
57
+ include Mixins::HasIndices
58
+ include Mixins::HasSubtypes
59
+ include Mixins::HasDerivedGraphQLTypeCustomizations
60
+ include Mixins::HasReadableToSAndInspect.new { |t| t.name }
61
+
62
+ # @private
63
+ def initialize(schema_def_state, name)
64
+ super(schema_def_state, schema_def_state.type_ref(name).to_final_form, Set.new) do
65
+ yield self
66
+ end
67
+ end
68
+
69
+ # @return [String] the name of the union type
70
+ def name
71
+ type_ref.name
72
+ end
73
+
74
+ # Defines a subtype of this union type.
75
+ #
76
+ # @param name [String] the name of an object type which is a member of this union type
77
+ # @return [void]
78
+ #
79
+ # @example
80
+ # ElasticGraph.define_schema do |schema|
81
+ # schema.object_type "Card" do |t|
82
+ # # ...
83
+ # end
84
+ #
85
+ # schema.union_type "FundingSource" do |t|
86
+ # t.subtype "Card"
87
+ # end
88
+ # end
89
+ def subtype(name)
90
+ type_ref = schema_def_state.type_ref(name.to_s).to_final_form
91
+
92
+ if subtype_refs.include?(type_ref)
93
+ raise SchemaError, "Duplicate subtype on UnionType #{self.name}: #{name}"
94
+ end
95
+
96
+ subtype_refs << type_ref
97
+ end
98
+
99
+ # Defines multiple subtypes of this union type.
100
+ #
101
+ # @param names [Array<String>] names of object types which are members of this union type
102
+ # @return [void]
103
+ #
104
+ # @example Define a union type
105
+ # ElasticGraph.define_schema do |schema|
106
+ # schema.object_type "BankAccount" do |t|
107
+ # # ...
108
+ # end
109
+ #
110
+ # schema.object_type "BitcoinWallet" do |t|
111
+ # # ...
112
+ # end
113
+ #
114
+ # schema.union_type "FundingSource" do |t|
115
+ # t.subtypes "BankAccount", "BitcoinWallet"
116
+ # end
117
+ # end
118
+ def subtypes(*names)
119
+ names.flatten.each { |n| subtype(n) }
120
+ end
121
+
122
+ # @return [String] the formatted GraphQL SDL of the union type
123
+ def to_sdl
124
+ if subtype_refs.empty?
125
+ raise SchemaError, "UnionType type #{name} has no subtypes, but must have at least one."
126
+ end
127
+
128
+ "#{formatted_documentation}union #{name} #{directives_sdl(suffix_with: " ")}= #{subtype_refs.map(&:name).to_a.join(" | ")}"
129
+ end
130
+
131
+ # @private
132
+ def verify_graphql_correctness!
133
+ # Nothing to verify. `verify_graphql_correctness!` will be called on each subtype automatically.
134
+ end
135
+
136
+ # Various things check `mapping_options` on indexed types (usually object types, but can also happen on union types).
137
+ # We need to implement `mapping_options` here to satisfy those method calls, but we will never use custom mapping on
138
+ # a union type so we hardcode it to return nil.
139
+ #
140
+ # @private
141
+ def mapping_options
142
+ {}
143
+ end
144
+
145
+ private
146
+
147
+ def resolve_subtypes
148
+ subtype_refs.map do |ref|
149
+ ref.as_object_type || raise(
150
+ SchemaError, "The subtype `#{ref}` of the UnionType `#{name}` is not a defined object type."
151
+ )
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,77 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/schema_definition/scripting/script"
11
+ require "elastic_graph/support/memoizable_data"
12
+ require "pathname"
13
+
14
+ module ElasticGraph
15
+ module SchemaDefinition
16
+ # @private
17
+ module Scripting
18
+ # A simple abstraction that supports loading static scripts off of disk. The given directory
19
+ # is expected to have a sub-directory per script context, with individual scripts under the
20
+ # context sub-directories. The language is inferred from the script file extensions.
21
+ #
22
+ # @private
23
+ class FileSystemRepository < Support::MemoizableData.define(:dir)
24
+ # Based on https://www.elastic.co/guide/en/elasticsearch/reference/8.5/modules-scripting.html
25
+ SUPPORTED_LANGUAGES_BY_EXTENSION = {
26
+ ".painless" => "painless",
27
+ ".expression" => "expression",
28
+ ".mustache" => "mustache",
29
+ ".java" => "java"
30
+ }
31
+
32
+ # The `Script` objects available in this file system repository.
33
+ def scripts
34
+ @scripts ||= ::Pathname.new(dir).children.sort.flat_map do |context_dir|
35
+ unless context_dir.directory?
36
+ raise InvalidScriptDirectoryError, "`#{dir}` has a file (#{context_dir}) that is not a context directory as expected."
37
+ end
38
+
39
+ context_dir.children.sort.map do |script_file|
40
+ unless script_file.file?
41
+ raise InvalidScriptDirectoryError, "`#{dir}` has extra directory nesting (#{script_file}) that is unexpected."
42
+ end
43
+
44
+ language = SUPPORTED_LANGUAGES_BY_EXTENSION[script_file.extname] || raise(
45
+ InvalidScriptDirectoryError, "`#{dir}` has a file (`#{script_file}`) that has an unrecognized file extension: #{script_file.extname}."
46
+ )
47
+
48
+ Script.new(
49
+ name: script_file.basename.sub_ext("").to_s,
50
+ source: script_file.read.strip,
51
+ language: language,
52
+ context: context_dir.basename.to_s
53
+ )
54
+ end
55
+ end.tap { |all_scripts| verify_no_duplicates!(all_scripts) }
56
+ end
57
+
58
+ # Map of script ids keyed by the `scoped_name` to allow easy lookup of the ids.
59
+ def script_ids_by_scoped_name
60
+ @script_ids_by_scoped_name ||= scripts.to_h { |s| [s.scoped_name, s.id] }
61
+ end
62
+
63
+ private
64
+
65
+ def verify_no_duplicates!(scripts)
66
+ duplicate_scoped_names = scripts.group_by(&:scoped_name).select do |scoped_name, scripts_with_scoped_name|
67
+ scripts_with_scoped_name.size > 1
68
+ end.keys
69
+
70
+ if duplicate_scoped_names.any?
71
+ raise InvalidScriptDirectoryError, "`#{dir}` has multiple scripts with the same scoped name, which is not allowed: #{duplicate_scoped_names.join(", ")}."
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,48 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "digest/md5"
10
+ require "elastic_graph/support/memoizable_data"
11
+
12
+ module ElasticGraph
13
+ module SchemaDefinition
14
+ module Scripting
15
+ # @private
16
+ class Script < Support::MemoizableData.define(:name, :source, :language, :context)
17
+ # The id we use when storing the script in the datastore. The id is based partially on a hash of
18
+ # the source code to make script safely evolveable: when the source code of a script changes, its
19
+ # id changes, and the old and new versions continue to be accessible in the datastore, allowing
20
+ # old and new versions of the deployed ElasticGraph application to be running at the same time
21
+ # (as happens during a zero-downtime rolled-out deploy). Scripts are invoked by their id, so we
22
+ # can trust that when the code tries to use a specific version of a script, it'll definitely use
23
+ # that version.
24
+ def id
25
+ @id ||= "#{context}_#{name}_#{::Digest::MD5.hexdigest(source)}"
26
+ end
27
+
28
+ # The `name` scoped with the `context`. Due to how we structure static scripts on
29
+ # the file system (nested under a directory that names the `context`), a given `name`
30
+ # is only guaranteed to be unique within the scope of a given `context`. The `scoped_name`
31
+ # is how we will refer to a script from elsewhere in the code when we want to use it.
32
+ def scoped_name
33
+ @scoped_name ||= "#{context}/#{name}"
34
+ end
35
+
36
+ def to_artifact_payload
37
+ {
38
+ "context" => context,
39
+ "script" => {
40
+ "lang" => language,
41
+ "source" => source
42
+ }
43
+ }
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,24 @@
1
+ // Check if required params are missing
2
+ if (params.offset_ms == null) {
3
+ throw new IllegalArgumentException("Missing required parameter: offset_ms");
4
+ }
5
+ if (params.time_zone == null) {
6
+ throw new IllegalArgumentException("Missing required parameter: time_zone");
7
+ }
8
+
9
+ // Set variables used in the loop
10
+ ZoneId zoneId = ZoneId.of(params.time_zone);
11
+ List results = new ArrayList();
12
+
13
+ for (ZonedDateTime timestamp : doc[params.field]) {
14
+ // Convert the timestamp to the specified time zone
15
+ ZonedDateTime zonedTimestamp = timestamp.withZoneSameInstant(zoneId);
16
+
17
+ // Adjust the timestamp based on the offset_ms parameter
18
+ ZonedDateTime adjustedTimestamp = zonedTimestamp.plus(params.offset_ms, ChronoUnit.MILLIS);
19
+
20
+ // Format and add the result to the list
21
+ results.add(adjustedTimestamp.getDayOfWeek().name());
22
+ }
23
+
24
+ return results;
@@ -0,0 +1,41 @@
1
+ // Check if required params are missing
2
+ if (params.offset_ms == null) {
3
+ throw new IllegalArgumentException("Missing required parameter: offset_ms");
4
+ }
5
+ if (params.time_zone == null) {
6
+ throw new IllegalArgumentException("Missing required parameter: time_zone");
7
+ }
8
+ if (params.interval == null) {
9
+ throw new IllegalArgumentException("Missing required parameter: interval");
10
+ }
11
+
12
+ // Set variables used in the loop
13
+ ZoneId zoneId = ZoneId.of(params.time_zone);
14
+ ChronoUnit intervalUnit;
15
+ if (params.interval == "hour") {
16
+ intervalUnit = ChronoUnit.HOURS;
17
+ } else if (params.interval == "minute") {
18
+ intervalUnit = ChronoUnit.MINUTES;
19
+ } else if (params.interval == "second") {
20
+ intervalUnit = ChronoUnit.SECONDS;
21
+ } else {
22
+ throw new IllegalArgumentException("Invalid interval value: " + params.interval);
23
+ }
24
+ DateTimeFormatter formatter = DateTimeFormatter.ISO_LOCAL_TIME;
25
+ List results = new ArrayList();
26
+
27
+ for (ZonedDateTime timestamp : doc[params.field]) {
28
+ // Convert the timestamp to the specified time zone
29
+ ZonedDateTime zonedTimestamp = timestamp.withZoneSameInstant(zoneId);
30
+
31
+ // Adjust the timestamp based on the offset_ms parameter
32
+ ZonedDateTime adjustedTimestamp = zonedTimestamp.plus(params.offset_ms, ChronoUnit.MILLIS);
33
+
34
+ // Truncate the timestamp to the specified interval
35
+ adjustedTimestamp = adjustedTimestamp.truncatedTo(intervalUnit);
36
+
37
+ // Format and add the result to the list
38
+ results.add(adjustedTimestamp.format(formatter));
39
+ }
40
+
41
+ return results;
@@ -0,0 +1,22 @@
1
+ ZoneId zoneId = ZoneId.of(params.time_zone);
2
+
3
+ for (ZonedDateTime timestamp : doc[params.field]) {
4
+ long docValue = timestamp
5
+ .withZoneSameInstant(zoneId)
6
+ .toLocalTime()
7
+ .toNanoOfDay();
8
+
9
+ // Perform comparisons based on whichever params are set.
10
+ // ElasticGraph takes care of passing us param values as nano-of-day so that we
11
+ // can directly and efficiently compare against `docValue`.
12
+ if ((params.gte == null || docValue >= params.gte) &&
13
+ (params.gt == null || docValue > params.gt) &&
14
+ (params.lte == null || docValue <= params.lte) &&
15
+ (params.lt == null || docValue < params.lt) &&
16
+ (params.equal_to_any_of == null || params.equal_to_any_of.contains(docValue))) {
17
+ return true;
18
+ }
19
+ }
20
+
21
+ // No timestamp values matched the params, so return `false`.
22
+ return false;
@@ -0,0 +1,93 @@
1
+ Map source = ctx._source;
2
+ String sourceId = params.sourceId;
3
+ String relationship = params.relationship;
4
+
5
+ // Numbers in JSON appear to be parsed as doubles, but we want the version stored as a long, so we need to cast it here.
6
+ long eventVersion = (long) params.version;
7
+
8
+ if (source.__sources == null) {
9
+ source.__sources = [];
10
+ }
11
+
12
+ if (source.__versions == null) {
13
+ source.__versions = [:];
14
+ }
15
+
16
+ if (source.__versions[relationship] == null) {
17
+ source.__versions[relationship] = [:];
18
+ }
19
+
20
+ Map relationshipVersionsMap = source.__versions.get(relationship);
21
+ List previousSourceIdsForRelationship = relationshipVersionsMap.keySet().stream().filter(id -> id != sourceId).collect(Collectors.toList());
22
+
23
+ if (previousSourceIdsForRelationship.size() > 0) {
24
+ String previousIdDescription = previousSourceIdsForRelationship.size() == 1 ? previousSourceIdsForRelationship.get(0) : previousSourceIdsForRelationship.toString();
25
+ throw new IllegalArgumentException(
26
+ "Cannot update document " + params.id + " " +
27
+ "with data from related " + relationship + " " + sourceId + " " +
28
+ "because the related " + relationship + " has apparently changed (was: " + previousSourceIdsForRelationship + "), " +
29
+ "but mutations of relationships used with `sourced_from` are not supported because " +
30
+ "allowing it could break ElasticGraph's out-of-order processing guarantees."
31
+ );
32
+ }
33
+
34
+ // While the version in `__versions` is going to be used for the doc version in the future, for now
35
+ // we need to continue getting it from `__sourceVersions`. Both our old version and this versions of this
36
+ // script keep the value in `__sourceVersions` up-to-date, whereas the old script only writes it to
37
+ // `__sourceVersions`. Until we have completely migrated off of the old script for all ElasticGraph
38
+ // clusters, we need to keep using it.
39
+ //
40
+ // Later, after the old script is no longer used by any clusters, we'll stop using `__sourceVersions`.
41
+ // TODO: switch to `__versions` when we no longer need to maintain compatibility with the old version of the script.
42
+ Number _versionForSourceType = source.get("__sourceVersions")?.get(params.sourceType)?.get(sourceId);
43
+ Number _versionForRelationship = relationshipVersionsMap.get(sourceId);
44
+
45
+ // Our JSON schema requires event versions to be non-negative, so we can safely use Long.MIN_VALUE as a stand-in when the value is null.
46
+ long versionForSourceType = _versionForSourceType == null ? Long.MIN_VALUE : _versionForSourceType.longValue();
47
+ long versionForRelationship = _versionForRelationship == null ? Long.MIN_VALUE : _versionForRelationship.longValue();
48
+
49
+ // Pick the larger of the two versions as our doc version. Note that `Math.max` didn't work for me here for
50
+ // reasons I don't understand, but a simple ternary works fine.
51
+ //
52
+ // In theory, we could just use `versionForSourceType` as the `docVersion` (and not even check `__versions` at all)
53
+ // since both the old version and this version maintain the doc version in `__sourceVersions`. However, that would
54
+ // prevent this version of the script from being forward-compatible with the planned next version of this script.
55
+ // In the next version, we plan to stop writing to `__sourceVersions`, and as we can't deploy that change atomically,
56
+ // this version of the script will continue to run after that has begun to be used. So this version of the script
57
+ // must consider which version is greater here, and not simply trust either version value.
58
+ long docVersion = versionForSourceType > versionForRelationship ? versionForSourceType : versionForRelationship;
59
+
60
+ if (docVersion >= eventVersion) {
61
+ throw new IllegalArgumentException("ElasticGraph update was a no-op: [" +
62
+ params.id + "]: version conflict, current version [" +
63
+ docVersion + "] is higher or equal to the one provided [" +
64
+ eventVersion + "]");
65
+ } else {
66
+ source.putAll(params.data);
67
+ Map __counts = params.__counts;
68
+
69
+ if (__counts != null) {
70
+ if (source.__counts == null) {
71
+ source.__counts = [:];
72
+ }
73
+
74
+ source.__counts.putAll(__counts);
75
+ }
76
+
77
+ source.id = params.id;
78
+ source.__versions[relationship][sourceId] = eventVersion;
79
+
80
+ // Record the relationship in `__sources` if it's not already there. We maintain it as an append-only set using a sorted list.
81
+ // This ensures deterministic ordering of its elements regardless of event ingestion order, and lets us check membership in O(log N) time.
82
+ //
83
+ // As per https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#binarySearch(java.util.List,java.lang.Object):
84
+ //
85
+ // > Returns the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1).
86
+ // > The insertion point is defined as the point at which the key would be inserted into the list: the index
87
+ // > of the first element greater than the key, or list.size() if all elements in the list are less than the
88
+ // > specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
89
+ int sourceBinarySearchResult = Collections.binarySearch(source.__sources, relationship);
90
+ if (sourceBinarySearchResult < 0) {
91
+ source.__sources.add(-sourceBinarySearchResult - 1, relationship);
92
+ }
93
+ }
@@ -0,0 +1,212 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/error"
11
+ require "elastic_graph/schema_definition/factory"
12
+ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
13
+ require "elastic_graph/schema_definition/schema_elements/enum_value_namer"
14
+ require "elastic_graph/schema_definition/schema_elements/type_namer"
15
+ require "elastic_graph/schema_definition/schema_elements/sub_aggregation_path"
16
+
17
+ module ElasticGraph
18
+ module SchemaDefinition
19
+ # Encapsulates all state that needs to be managed while a schema is defined.
20
+ # This is separated from `API` to make it easy to expose some state management
21
+ # helper methods to our internal code without needing to expose it as part of
22
+ # the public API.
23
+ #
24
+ # @private
25
+ class State < Struct.new(
26
+ :api,
27
+ :schema_elements,
28
+ :index_document_sizes,
29
+ :types_by_name,
30
+ :object_types_by_name,
31
+ :scalar_types_by_name,
32
+ :enum_types_by_name,
33
+ :implementations_by_interface_ref,
34
+ :sdl_parts,
35
+ :paginated_collection_element_types,
36
+ :user_defined_fields,
37
+ :renamed_types_by_old_name,
38
+ :deleted_types_by_old_name,
39
+ :renamed_fields_by_type_name_and_old_field_name,
40
+ :deleted_fields_by_type_name_and_old_field_name,
41
+ :json_schema_version,
42
+ :json_schema_version_setter_location,
43
+ :graphql_extension_modules,
44
+ :initially_registered_built_in_types,
45
+ :built_in_types_customization_blocks,
46
+ :user_definition_complete,
47
+ :sub_aggregation_paths_by_type,
48
+ :type_refs_by_name,
49
+ :output,
50
+ :type_namer,
51
+ :enum_value_namer
52
+ )
53
+ include Mixins::HasReadableToSAndInspect.new
54
+
55
+ def self.with(
56
+ api:,
57
+ schema_elements:,
58
+ index_document_sizes:,
59
+ derived_type_name_formats:,
60
+ type_name_overrides:,
61
+ enum_value_overrides_by_type:,
62
+ output: $stdout
63
+ )
64
+ # @type var types_by_name: SchemaElements::typesByNameHash
65
+ types_by_name = {}
66
+
67
+ new(
68
+ api: api,
69
+ schema_elements: schema_elements,
70
+ index_document_sizes: index_document_sizes,
71
+ types_by_name: types_by_name,
72
+ object_types_by_name: {},
73
+ scalar_types_by_name: {},
74
+ enum_types_by_name: {},
75
+ implementations_by_interface_ref: ::Hash.new { |h, k| h[k] = ::Set.new },
76
+ sdl_parts: [],
77
+ paginated_collection_element_types: ::Set.new,
78
+ user_defined_fields: ::Set.new,
79
+ renamed_types_by_old_name: {},
80
+ deleted_types_by_old_name: {},
81
+ renamed_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} },
82
+ deleted_fields_by_type_name_and_old_field_name: ::Hash.new { |h, k| h[k] = {} },
83
+ json_schema_version_setter_location: nil,
84
+ json_schema_version: nil,
85
+ graphql_extension_modules: [],
86
+ initially_registered_built_in_types: ::Set.new,
87
+ built_in_types_customization_blocks: [],
88
+ user_definition_complete: false,
89
+ sub_aggregation_paths_by_type: {},
90
+ type_refs_by_name: {},
91
+ type_namer: SchemaElements::TypeNamer.new(
92
+ format_overrides: derived_type_name_formats,
93
+ name_overrides: type_name_overrides
94
+ ),
95
+ enum_value_namer: SchemaElements::EnumValueNamer.new(enum_value_overrides_by_type),
96
+ output: output
97
+ )
98
+ end
99
+
100
+ # @dynamic index_document_sizes?
101
+ alias_method :index_document_sizes?, :index_document_sizes
102
+
103
+ def type_ref(name)
104
+ # Type references are immutable and can be safely cached. Here we cache them because we've observed
105
+ # it having a noticeable impact on our test suite runtime.
106
+ type_refs_by_name[name] ||= factory.new_type_reference(name)
107
+ end
108
+
109
+ def register_object_interface_or_union_type(type)
110
+ register_type(type, object_types_by_name)
111
+ end
112
+
113
+ def register_enum_type(type)
114
+ register_type(type, enum_types_by_name)
115
+ end
116
+
117
+ def register_scalar_type(type)
118
+ register_type(type, scalar_types_by_name)
119
+ end
120
+
121
+ def register_input_type(type)
122
+ register_type(type)
123
+ end
124
+
125
+ def register_renamed_type(type_name, from:, defined_at:, defined_via:)
126
+ renamed_types_by_old_name[from] = factory.new_deprecated_element(
127
+ type_name,
128
+ defined_at: defined_at,
129
+ defined_via: defined_via
130
+ )
131
+ end
132
+
133
+ def register_deleted_type(type_name, defined_at:, defined_via:)
134
+ deleted_types_by_old_name[type_name] = factory.new_deprecated_element(
135
+ type_name,
136
+ defined_at: defined_at,
137
+ defined_via: defined_via
138
+ )
139
+ end
140
+
141
+ def register_renamed_field(type_name, from:, to:, defined_at:, defined_via:)
142
+ renamed_fields_by_type_name_and_old_field_name[type_name][from] = factory.new_deprecated_element(
143
+ to,
144
+ defined_at: defined_at,
145
+ defined_via: defined_via
146
+ )
147
+ end
148
+
149
+ def register_deleted_field(type_name, field_name, defined_at:, defined_via:)
150
+ deleted_fields_by_type_name_and_old_field_name[type_name][field_name] = factory.new_deprecated_element(
151
+ field_name,
152
+ defined_at: defined_at,
153
+ defined_via: defined_via
154
+ )
155
+ end
156
+
157
+ # Registers the given `field` as a user-defined field, unless the user definitions are complete.
158
+ def register_user_defined_field(field)
159
+ user_defined_fields << field
160
+ end
161
+
162
+ def user_defined_field_references_by_type_name
163
+ @user_defined_field_references_by_type_name ||= begin
164
+ unless user_definition_complete
165
+ raise SchemaError, "Cannot access `user_defined_field_references_by_type_name` until the schema definition is complete."
166
+ end
167
+
168
+ @user_defined_field_references_by_type_name ||= user_defined_fields
169
+ .group_by { |f| f.type.fully_unwrapped.name }
170
+ end
171
+ end
172
+
173
+ def factory
174
+ @factory ||= Factory.new(self)
175
+ end
176
+
177
+ def enums_for_indexed_types
178
+ @enums_for_indexed_types ||= factory.new_enums_for_indexed_types
179
+ end
180
+
181
+ def sub_aggregation_paths_for(type)
182
+ sub_aggregation_paths_by_type.fetch(type) do
183
+ SchemaElements::SubAggregationPath.paths_for(type, schema_def_state: self).uniq.tap do |paths|
184
+ # Cache our results if the user has finished their schema definition. Otherwise, it's not safe to cache.
185
+ # :nocov: -- we never execute this with `user_definition_complete == false`
186
+ sub_aggregation_paths_by_type[type] = paths if user_definition_complete
187
+ # :nocov:
188
+ end
189
+ end
190
+ end
191
+
192
+ private
193
+
194
+ RESERVED_TYPE_NAMES = [EVENT_ENVELOPE_JSON_SCHEMA_NAME].to_set
195
+
196
+ def register_type(type, additional_type_index = nil)
197
+ name = (_ = type).name
198
+
199
+ if RESERVED_TYPE_NAMES.include?(name)
200
+ raise SchemaError, "`#{name}` cannot be used as a schema type because it is a reserved name."
201
+ end
202
+
203
+ if types_by_name.key?(name)
204
+ raise SchemaError, "Duplicate definition for type #{name} detected. Each type can only be defined once."
205
+ end
206
+
207
+ additional_type_index[name] = type if additional_type_index
208
+ types_by_name[name] = type
209
+ end
210
+ end
211
+ end
212
+ end