elasticgraph-schema_definition 1.0.1 → 1.0.3.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE.txt +1 -1
  3. data/lib/elastic_graph/schema_definition/api.rb +34 -2
  4. data/lib/elastic_graph/schema_definition/factory.rb +31 -1
  5. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +1 -1
  6. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +1 -1
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +1 -1
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +1 -1
  9. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +1 -1
  10. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +1 -1
  11. data/lib/elastic_graph/schema_definition/indexing/field.rb +8 -2
  12. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +6 -4
  13. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +1 -1
  14. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +10 -4
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +1 -1
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +1 -1
  17. data/lib/elastic_graph/schema_definition/indexing/index.rb +39 -4
  18. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +1 -1
  19. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +4 -4
  20. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +1 -1
  21. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +1 -1
  22. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +1 -1
  23. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +1 -1
  24. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +3 -4
  25. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +1 -1
  26. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +1 -1
  27. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +1 -1
  28. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +1 -1
  29. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +1 -1
  30. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +20 -14
  31. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +1 -1
  32. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +1 -1
  33. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +1 -1
  34. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +1 -1
  35. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +1 -1
  36. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +1 -1
  37. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +1 -1
  38. data/lib/elastic_graph/schema_definition/rake_tasks.rb +6 -6
  39. data/lib/elastic_graph/schema_definition/results.rb +16 -5
  40. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +39 -29
  41. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +1 -1
  42. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +53 -1
  43. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +1 -1
  44. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +1 -1
  45. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +1 -1
  46. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +1 -1
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +1 -1
  48. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +1 -1
  49. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +7 -4
  50. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +1 -1
  51. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +1 -1
  52. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +2 -1
  53. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +1 -1
  54. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +1 -1
  55. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +1 -1
  56. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +1 -1
  57. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +1 -1
  58. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +3 -2
  59. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +82 -4
  60. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +1 -1
  61. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +1 -1
  62. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +1 -1
  63. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +2 -1
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +5 -3
  65. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +1 -1
  66. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +1 -1
  67. data/lib/elastic_graph/schema_definition/scripting/script.rb +1 -1
  68. data/lib/elastic_graph/schema_definition/state.rb +7 -3
  69. data/lib/elastic_graph/schema_definition/test_support.rb +2 -3
  70. metadata +30 -24
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -155,14 +155,14 @@ module ElasticGraph
155
155
  end
156
156
 
157
157
  def schema_artifact_manager
158
- require "elastic_graph/schema_definition/schema_artifact_manager"
158
+ require "elastic_graph/schema_definition/api"
159
159
 
160
160
  # :nocov: -- tests don't cover the `VERBOSE` side
161
161
  max_diff_lines = ENV["VERBOSE"] ? 999999999 : 50
162
162
  # :nocov:
163
163
 
164
- SchemaArtifactManager.new(
165
- schema_definition_results: schema_definition_results,
164
+ schema_def_api.factory.new_schema_artifact_manager(
165
+ schema_definition_results: schema_def_api.results,
166
166
  schema_artifacts_directory: @schema_artifacts_directory.to_s,
167
167
  enforce_json_schema_version: @enforce_json_schema_version,
168
168
  output: @output,
@@ -170,7 +170,7 @@ module ElasticGraph
170
170
  )
171
171
  end
172
172
 
173
- def schema_definition_results
173
+ def schema_def_api
174
174
  require "elastic_graph/schema_definition/api"
175
175
 
176
176
  API.new(
@@ -183,7 +183,7 @@ module ElasticGraph
183
183
  output: @output
184
184
  ).tap do |api|
185
185
  api.as_active_instance { load @path_to_schema.to_s }
186
- end.results
186
+ end
187
187
  end
188
188
  end
189
189
  end
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -18,6 +18,7 @@ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
18
18
  require "elastic_graph/schema_definition/schema_elements/field_path"
19
19
  require "elastic_graph/schema_definition/scripting/file_system_repository"
20
20
  require "elastic_graph/support/memoizable_data"
21
+ require "elastic_graph/version"
21
22
 
22
23
  module ElasticGraph
23
24
  module SchemaDefinition
@@ -180,7 +181,7 @@ module ElasticGraph
180
181
  check_for_circular_dependencies!
181
182
 
182
183
  index_templates, indices = state.object_types_by_name.values
183
- .flat_map(&:indices)
184
+ .filter_map(&:index_def)
184
185
  .sort_by(&:name)
185
186
  .partition(&:rollover_config)
186
187
 
@@ -220,11 +221,12 @@ module ElasticGraph
220
221
  .to_h { |t| [t.name, t.runtime_metadata] }
221
222
  .merge(indexed_enum_types_by_name)
222
223
 
223
- index_definitions_by_name = state.object_types_by_name.values.flat_map(&:indices).to_h do |index|
224
+ index_definitions_by_name = state.object_types_by_name.values.filter_map(&:index_def).to_h do |index|
224
225
  [index.name, index.runtime_metadata]
225
226
  end
226
227
 
227
228
  SchemaArtifacts::RuntimeMetadata::Schema.new(
229
+ elasticgraph_version: ElasticGraph::VERSION,
228
230
  object_types_by_name: object_types_by_name,
229
231
  scalar_types_by_name: scalar_types_by_name,
230
232
  enum_types_by_name: enum_types_by_name,
@@ -246,7 +248,7 @@ module ElasticGraph
246
248
  ::Hash.new { |h, k| h[k] = [] } # : ::Hash[untyped, ::Array[SchemaArtifacts::RuntimeMetadata::UpdateTarget]]
247
249
  ) do |object_type, accum|
248
250
  fields_with_sources_by_relationship_name =
249
- if object_type.indices.empty?
251
+ if object_type.index_def.nil?
250
252
  # only indexed types can have `sourced_from` fields, and resolving `fields_with_sources` on an unindexed union type
251
253
  # such as `_Entity` when we are using apollo can lead to exceptions when multiple entity types have the same field name
252
254
  # that use different mapping types.
@@ -274,7 +276,7 @@ module ElasticGraph
274
276
  resolved_relationship, relationship_error = relationship_resolver.resolve
275
277
  relationship_errors << relationship_error if relationship_error
276
278
 
277
- if object_type.indices.any? && resolved_relationship && sourced_fields.any?
279
+ if object_type.index_def && resolved_relationship && sourced_fields.any?
278
280
  update_target_resolver = Indexing::UpdateTargetResolver.new(
279
281
  object_type: object_type,
280
282
  resolved_relationship: resolved_relationship,
@@ -285,6 +287,15 @@ module ElasticGraph
285
287
  update_target, errors = update_target_resolver.resolve
286
288
  accum[resolved_relationship.related_type.name] << update_target if update_target
287
289
  sourced_field_errors.concat(errors)
290
+
291
+ # Validate that has_had_multiple_sources! has been called when sourced_from is used
292
+ if (index_def = object_type.index_def) && !index_def.has_had_multiple_sources_flag
293
+ sourced_field_errors << "Type `#{object_type.name}` uses `sourced_from` fields but its index `#{index_def.name}` " \
294
+ "has not been configured with `has_had_multiple_sources!`. To resolve this, add `i.has_had_multiple_sources!` within the " \
295
+ "`t.index \"#{index_def.name}\"` block. This flag is required because indices with multiple sources can contain " \
296
+ "incomplete documents, and ElasticGraph needs to know this to apply proper filtering. Once set, this flag should remain even " \
297
+ "if you later remove all `sourced_from` fields, as the index may still contain historical incomplete documents."
298
+ end
288
299
  end
289
300
  end
290
301
  end.tap do
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -45,28 +45,6 @@ module ElasticGraph
45
45
  "they can perform code generation and event validation."
46
46
  ]
47
47
  )
48
-
49
- # Here we round-trip the SDL string through the GraphQL gem's formatting logic. This provides
50
- # nice, consistent formatting (alphabetical order, consistent spacing, etc) and also prunes out
51
- # any "orphaned" schema types (that is, types that are defined but never referenced).
52
- # We also prepend a line break so there's a blank line between the comment block and the
53
- # schema elements.
54
- graphql_schema = ::GraphQL::Schema.from_definition(schema_definition_results.graphql_schema_string).to_definition.chomp
55
-
56
- unversioned_artifacts = [
57
- new_yaml_artifact(DATASTORE_CONFIG_FILE, schema_definition_results.datastore_config),
58
- new_yaml_artifact(RUNTIME_METADATA_FILE, pruned_runtime_metadata(graphql_schema).to_dumpable_hash),
59
- @json_schemas_artifact,
60
- new_raw_artifact(GRAPHQL_SCHEMA_FILE, "\n" + graphql_schema)
61
- ]
62
-
63
- versioned_artifacts = build_desired_versioned_json_schemas(@json_schemas_artifact.desired_contents).values.map do |versioned_schema|
64
- new_versioned_json_schema_artifact(versioned_schema)
65
- end
66
-
67
- @artifacts = (unversioned_artifacts + versioned_artifacts).sort_by(&:file_name)
68
- notify_about_unused_type_name_overrides
69
- notify_about_unused_enum_value_overrides
70
48
  end
71
49
 
72
50
  # Dumps all the schema artifacts to disk.
@@ -80,7 +58,7 @@ module ElasticGraph
80
58
  setter_location_path = ::Pathname.new(setter_location.absolute_path.to_s).relative_path_from(::Dir.pwd)
81
59
 
82
60
  abort "A change has been attempted to `json_schemas.yaml`, but the `json_schema_version` has not been correspondingly incremented. Please " \
83
- "increase the schema's version, and then run the `schema_artifacts:dump` command again.\n\n" \
61
+ "increase the schema's version, and then run the `bundle exec rake schema_artifacts:dump` command again.\n\n" \
84
62
  "To update the schema version to the expected version, change line #{setter_location.lineno} at `#{setter_location_path}` to:\n" \
85
63
  " `schema.json_schema_version #{recommended_json_schema_version}`\n\n" \
86
64
  "Alternately, pass `enforce_json_schema_version: false` to `ElasticGraph::SchemaDefinition::RakeTasks.new` to allow the JSON schemas " \
@@ -94,15 +72,15 @@ module ElasticGraph
94
72
  end
95
73
 
96
74
  ::FileUtils.mkdir_p(@schema_artifacts_directory)
97
- @artifacts.each { |artifact| artifact.dump(@output) }
75
+ artifacts.each { |artifact| artifact.dump(@output) }
98
76
  end
99
77
 
100
78
  # Checks that all schema artifacts are up-to-date, raising an exception if not.
101
79
  def check_artifacts
102
- out_of_date_artifacts = @artifacts.select(&:out_of_date?)
80
+ out_of_date_artifacts = artifacts.select(&:out_of_date?)
103
81
 
104
82
  if out_of_date_artifacts.empty?
105
- descriptions = @artifacts.map.with_index(1) { |art, i| "#{i}. #{art.file_name}" }
83
+ descriptions = artifacts.map.with_index(1) { |art, i| "#{i}. #{art.file_name}" }
106
84
  @output.puts <<~EOS
107
85
  Your schema artifacts are all up to date:
108
86
  #{descriptions.join("\n")}
@@ -115,6 +93,38 @@ module ElasticGraph
115
93
 
116
94
  private
117
95
 
96
+ def artifacts
97
+ @artifacts ||= artifacts_from_schema_def.sort_by(&:file_name).tap do
98
+ # This must be deferred until artifacts are generated, as we can't fully detect
99
+ # unused things until after we've used things to generate artifacts.
100
+ notify_about_unused_type_name_overrides
101
+ notify_about_unused_enum_value_overrides
102
+ end
103
+ end
104
+
105
+ # Defined to offer a convenient method to override in an extension in order to add a new schema artifact.
106
+ def artifacts_from_schema_def
107
+ # Here we round-trip the SDL string through the GraphQL gem's formatting logic. This provides
108
+ # nice, consistent formatting (alphabetical order, consistent spacing, etc) and also prunes out
109
+ # any "orphaned" schema types (that is, types that are defined but never referenced).
110
+ # We also prepend a line break so there's a blank line between the comment block and the
111
+ # schema elements.
112
+ graphql_schema = ::GraphQL::Schema.from_definition(schema_definition_results.graphql_schema_string).to_definition.chomp
113
+
114
+ unversioned_artifacts = [
115
+ new_yaml_artifact(DATASTORE_CONFIG_FILE, schema_definition_results.datastore_config),
116
+ new_yaml_artifact(RUNTIME_METADATA_FILE, pruned_runtime_metadata(graphql_schema).to_dumpable_hash),
117
+ @json_schemas_artifact,
118
+ new_raw_artifact(GRAPHQL_SCHEMA_FILE, "\n" + graphql_schema)
119
+ ]
120
+
121
+ versioned_artifacts = build_desired_versioned_json_schemas(@json_schemas_artifact.desired_contents).values.map do |versioned_schema|
122
+ new_versioned_json_schema_artifact(versioned_schema)
123
+ end
124
+
125
+ unversioned_artifacts + versioned_artifacts
126
+ end
127
+
118
128
  def notify_about_unused_type_name_overrides
119
129
  type_namer = @schema_definition_results.state.type_namer
120
130
  return if (unused_overrides = type_namer.unused_name_overrides).empty?
@@ -338,7 +348,7 @@ module ElasticGraph
338
348
  end
339
349
 
340
350
  <<~EOS.strip
341
- #{out_of_date_artifacts.size} schema artifact(s) are out of date. Run `rake schema_artifacts:dump` to update the following artifact(s):
351
+ #{out_of_date_artifacts.size} schema artifact(s) are out of date. Run `bundle exec rake schema_artifacts:dump` to update the following artifact(s):
342
352
 
343
353
  #{descriptions.join("\n")}
344
354
 
@@ -471,7 +481,7 @@ module ElasticGraph
471
481
 
472
482
  def comment_preamble
473
483
  lines = [
474
- "Generated by `rake schema_artifacts:dump`.",
484
+ "Generated by `bundle exec rake schema_artifacts:dump`.",
475
485
  "DO NOT EDIT BY HAND. Any edits will be lost the next time the rake task is run."
476
486
  ]
477
487
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -222,6 +222,15 @@ module ElasticGraph
222
222
  When `null` is passed, matches all documents.
223
223
  EOS
224
224
  end
225
+
226
+ t.field names.matches_query_with_prefix, schema_def_state.type_ref("MatchesQueryWithPrefix").as_filter_input.name do |f|
227
+ f.documentation <<~EOS
228
+ Matches records where the field value matches the provided query with the last term treated as a prefix.
229
+ Similar to `#{names.matches_query}`, but allows prefix matching on the last term.
230
+
231
+ When `null` is passed, matches all documents.
232
+ EOS
233
+ end
225
234
  end.each do |input_type|
226
235
  field_type = input_type.type_ref.list_filter_input? ? "[String]" : "String"
227
236
  input_type.documentation <<~EOS
@@ -287,6 +296,42 @@ module ElasticGraph
287
296
  remove_any_of_and_all_of_and_not_filter_operators_on(t)
288
297
  end
289
298
 
299
+ register_filter "MatchesQueryWithPrefix" do |t|
300
+ t.documentation <<~EOS
301
+ Input type used to specify parameters for the `#{names.matches_query_with_prefix}` filtering operator.
302
+
303
+ When `null` is passed, matches all documents.
304
+ EOS
305
+
306
+ t.field names.query_with_prefix, "String!" do |f|
307
+ f.documentation "The input query to search for, with the last term treated as a prefix."
308
+ end
309
+
310
+ t.field names.allowed_edits_per_term, "MatchesQueryAllowedEditsPerTerm!" do |f|
311
+ f.documentation <<~EOS
312
+ Number of allowed modifications per term to arrive at a match. For example, if set to 'ONE', the input
313
+ term 'glue' would match 'blue' but not 'clued', since the latter requires two modifications.
314
+ EOS
315
+
316
+ f.default "DYNAMIC"
317
+ end
318
+
319
+ t.field names.require_all_terms, "Boolean!" do |f|
320
+ f.documentation <<~EOS
321
+ Set to `true` to match only if all terms in `#{names.query_with_prefix}` are found, or
322
+ `false` to only require one term to be found.
323
+ EOS
324
+
325
+ f.default false
326
+ end
327
+
328
+ # any_of/all_of/not don't really make sense on this filter because it doesn't make sense
329
+ # to apply an OR operator or negation to the fields of this type since they are all an
330
+ # indivisible part of a single filter operation on a specific field. So we remove them
331
+ # here.
332
+ remove_any_of_and_all_of_and_not_filter_operators_on(t)
333
+ end
334
+
290
335
  register_filter "StringContains" do |t|
291
336
  t.documentation <<~EOS
292
337
  Input type used to specify parameters for the `#{names.contains}` string filtering operator.
@@ -674,6 +719,11 @@ module ElasticGraph
674
719
  t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::Integer",
675
720
  defined_at: "elastic_graph/indexer/indexing_preparers/integer"
676
721
 
722
+ # The GraphQL gem automatically coerces Int values, so we can safely use MISSING_NUMERIC_PLACEHOLDER
723
+ # as the grouping missing value placeholder even though we don't override the default (no-op) ElasticGraph
724
+ # scalar coercion adapter.
725
+ t.grouping_missing_value_placeholder MISSING_NUMERIC_PLACEHOLDER
726
+
677
727
  define_integral_aggregated_values_for(t)
678
728
  end
679
729
 
@@ -744,6 +794,8 @@ module ElasticGraph
744
794
  t.json_schema type: "string", format: "date-time"
745
795
  t.coerce_with "ElasticGraph::GraphQL::ScalarCoercionAdapters::DateTime",
746
796
  defined_at: "elastic_graph/graphql/scalar_coercion_adapters/date_time"
797
+ t.prepare_for_indexing_with "ElasticGraph::Indexer::IndexingPreparers::DateTime",
798
+ defined_at: "elastic_graph/indexer/indexing_preparers/date_time"
747
799
 
748
800
  t.documentation <<~EOS
749
801
  A timestamp, represented as an [ISO 8601 time string](https://en.wikipedia.org/wiki/ISO_8601).
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -515,7 +515,9 @@ module ElasticGraph
515
515
  # f.sourced_from "capitalOf", "currency"
516
516
  # end
517
517
  #
518
- # t.index "cities"
518
+ # t.index "cities" do |i|
519
+ # i.has_had_multiple_sources!
520
+ # end
519
521
  # end
520
522
  # end
521
523
  def sourced_from(relationship, field_path)
@@ -968,7 +970,8 @@ module ElasticGraph
968
970
  json_schema_options: json_schema_options,
969
971
  accuracy_confidence: accuracy_confidence,
970
972
  source: source,
971
- runtime_field_script: runtime_field_script
973
+ runtime_field_script: runtime_field_script,
974
+ doc_comment: doc_comment
972
975
  )
973
976
  end
974
977
 
@@ -1045,7 +1048,7 @@ module ElasticGraph
1045
1048
  # are exactly equal (in which case we can return either).
1046
1049
  #
1047
1050
  # @private
1048
- def self.pick_most_accurate_from(field1, field2, to_comparable: ->(it) { it })
1051
+ def self.pick_most_accurate_from(field1, field2, to_comparable: ->(value) { value })
1049
1052
  return field1 if to_comparable.call(field1) == to_comparable.call(field2)
1050
1053
  yield if field1.accuracy_confidence == field2.accuracy_confidence
1051
1054
  # Array#max_by can return nil (when called on an empty array), but our steep type is non-nil.
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -14,6 +14,7 @@ module ElasticGraph
14
14
  # @private
15
15
  class GraphQLSDLEnumerator
16
16
  include ::Enumerable
17
+
17
18
  # @dynamic schema_def_state
18
19
  attr_reader :schema_def_state
19
20
 
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -112,8 +112,9 @@ module ElasticGraph
112
112
  # f.sourced_from "launchPlan", "launchDate"
113
113
  # end
114
114
  #
115
- # t.index "campaigns"do |i|
115
+ # t.index "campaigns" do |i|
116
116
  # i.rollover :yearly, "createdAt"
117
+ # i.has_had_multiple_sources!
117
118
  # end
118
119
  # end
119
120
  #
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -44,6 +44,7 @@ module ElasticGraph
44
44
  class ScalarType < Struct.new(
45
45
  :schema_def_state,
46
46
  :type_ref,
47
+ :grouping_missing_value_placeholder_overridden,
47
48
  :mapping_type,
48
49
  :runtime_metadata,
49
50
  :aggregated_values_customizations,
@@ -66,12 +67,13 @@ module ElasticGraph
66
67
 
67
68
  # @private
68
69
  def initialize(schema_def_state, name)
69
- super(schema_def_state, schema_def_state.type_ref(name).to_final_form)
70
+ super(schema_def_state, schema_def_state.type_ref(name).to_final_form, false)
70
71
 
71
72
  # Default the runtime metadata before yielding, so it can be overridden as needed.
72
73
  self.runtime_metadata = SchemaArtifacts::RuntimeMetadata::ScalarType.new(
73
74
  coercion_adapter_ref: SchemaArtifacts::RuntimeMetadata::ScalarType::DEFAULT_COERCION_ADAPTER_REF,
74
- indexing_preparer_ref: SchemaArtifacts::RuntimeMetadata::ScalarType::DEFAULT_INDEXING_PREPARER_REF
75
+ indexing_preparer_ref: SchemaArtifacts::RuntimeMetadata::ScalarType::DEFAULT_INDEXING_PREPARER_REF,
76
+ grouping_missing_value_placeholder: nil
75
77
  )
76
78
 
77
79
  yield self
@@ -84,6 +86,10 @@ module ElasticGraph
84
86
  if missing.any?
85
87
  raise Errors::SchemaError, "Scalar types require `mapping` and `json_schema` to be configured, but `#{name}` lacks #{missing.join(" and ")}."
86
88
  end
89
+
90
+ if (placeholder = inferred_grouping_missing_value_placeholder)
91
+ self.runtime_metadata = runtime_metadata.with(grouping_missing_value_placeholder: placeholder)
92
+ end
87
93
  end
88
94
 
89
95
  # @return [String] name of the scalar type
@@ -154,6 +160,31 @@ module ElasticGraph
154
160
  }).tap(&:load_indexing_preparer) # verify the preparer is valid.
155
161
  end
156
162
 
163
+ # Specifies a placeholder value to use for missing values when grouping by this scalar type.
164
+ # This optimization allows ElasticGraph to use a single terms aggregation instead of separate
165
+ # terms and missing aggregations, reducing the exponential explosion of subaggregations when
166
+ # grouping by multiple fields.
167
+ #
168
+ # @param placeholder [String, Numeric] the placeholder value to use for missing/null values
169
+ # @return [void]
170
+ #
171
+ # @example Define a grouping missing value placeholder
172
+ # ElasticGraph.define_schema do |schema|
173
+ # schema.scalar_type "BigInt" do |t|
174
+ # t.mapping type: "long"
175
+ # t.json_schema type: "integer", minimum: -(2**53) + 1, maximum: (2**53) - 1
176
+ # t.grouping_missing_value_placeholder "NaN"
177
+ # end
178
+ # end
179
+ def grouping_missing_value_placeholder(placeholder)
180
+ unless placeholder.nil? || placeholder.is_a?(String) || placeholder.is_a?(Numeric)
181
+ raise Errors::SchemaError, "grouping_missing_value_placeholder must be a String or Numeric value, but got #{placeholder.class}: #{placeholder.inspect}"
182
+ end
183
+
184
+ self.grouping_missing_value_placeholder_overridden = true
185
+ self.runtime_metadata = runtime_metadata.with(grouping_missing_value_placeholder: placeholder)
186
+ end
187
+
157
188
  # @return [String] the GraphQL SDL form of this scalar
158
189
  def to_sdl
159
190
  "#{formatted_documentation}scalar #{name} #{directives_sdl}"
@@ -310,9 +341,56 @@ module ElasticGraph
310
341
  schema_def_state.factory.new_aggregated_values_type_for_index_leaf_type(name, &customization_block)
311
342
  end
312
343
 
344
+ def inferred_grouping_missing_value_placeholder
345
+ return nil if grouping_missing_value_placeholder_overridden || mapping_type.nil?
346
+
347
+ if STRING_TYPES.include?(mapping_type)
348
+ MISSING_STRING_PLACEHOLDER
349
+ elsif FLOAT_TYPES.include?(mapping_type)
350
+ MISSING_NUMERIC_PLACEHOLDER
351
+ elsif mapping_type == "long"
352
+ # It is only safe to use NaN for a long when the long's range is safe to coerce to a float
353
+ # without loss of precision. This is because using NaN as the missing value will cause
354
+ # the datastore to coerce the other bucket keys to float.
355
+ # JSON schema min/max only constrains newly indexed values, not existing data that may fall outside the range before the constraints were added.
356
+ # This is an edge case where the long range may exceed safe float precision.
357
+ # In this case, users can set grouping_missing_value_placeholder to nil.
358
+ if (json_schema_options[:minimum] || LONG_STRING_MIN) >= JSON_SAFE_LONG_MIN &&
359
+ (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX
360
+ inferred_numeric_placeholder_for_integer_type
361
+ end
362
+ elsif mapping_type == "unsigned_long"
363
+ # Similar to the checks above for long except we only need to check the max
364
+ # (since the min is zero even if not specified)
365
+ if (json_schema_options[:maximum] || LONG_STRING_MAX) <= JSON_SAFE_LONG_MAX
366
+ inferred_numeric_placeholder_for_integer_type
367
+ end
368
+ elsif INTEGER_TYPES.include?(mapping_type)
369
+ # All other integer types can safely be coerced to float without loss of precision
370
+ inferred_numeric_placeholder_for_integer_type
371
+ end
372
+ end
373
+
374
+ def inferred_numeric_placeholder_for_integer_type
375
+ # Using NaN as the missing value placeholder causes the datastore to coerce all bucket keys to float.
376
+ # If using the default coercion adapter (which is a no-op), the values won't be coerced back to integers,
377
+ # causing a type change in the returned values. Only use NaN if a custom coercion adapter is configured.
378
+ if runtime_metadata.coercion_adapter_ref == SchemaArtifacts::RuntimeMetadata::ScalarType::DEFAULT_COERCION_ADAPTER_REF
379
+ nil
380
+ else
381
+ MISSING_NUMERIC_PLACEHOLDER
382
+ end
383
+ end
384
+
313
385
  # https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
314
386
  # https://www.elastic.co/guide/en/elasticsearch/reference/7.13/number.html#number
315
- NUMERIC_TYPES = %w[long integer short byte double float half_float scaled_float unsigned_long].to_set
387
+ FLOAT_TYPES = %w[double float half_float scaled_float].to_set
388
+ INTEGER_TYPES = %w[long integer short byte unsigned_long].to_set
389
+ NUMERIC_TYPES = FLOAT_TYPES | INTEGER_TYPES
390
+ # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/keyword
391
+ # https://www.elastic.co/docs/reference/elasticsearch/mapping-reference/text-type-family
392
+ # https://docs.opensearch.org/latest/mappings/supported-field-types/index/#string-based-field-types
393
+ STRING_TYPES = %w[keyword constant_keyword wildcard text match_only_text pattern_text semantic_text].to_set
316
394
  DATE_TYPES = %w[date date_nanos].to_set
317
395
  # The Elasticsearch/OpenSearch docs do not exhaustively give a list of types on which range queries are efficient,
318
396
  # but the docs are clear that it is efficient on numeric and date types, and is inefficient on string
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -1,4 +1,4 @@
1
- # Copyright 2024 - 2025 Block, Inc.
1
+ # Copyright 2024 - 2026 Block, Inc.
2
2
  #
3
3
  # Use of this source code is governed by an MIT-style
4
4
  # license that can be found in the LICENSE file or at
@@ -28,6 +28,7 @@ module ElasticGraph
28
28
  # @private
29
29
  class TypeReference < Support::MemoizableData.define(:name, :schema_def_state)
30
30
  extend Forwardable
31
+
31
32
  # @dynamic type_namer
32
33
  def_delegator :schema_def_state, :type_namer
33
34