elasticgraph-schema_definition 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +7 -0
  4. data/elasticgraph-schema_definition.gemspec +26 -0
  5. data/lib/elastic_graph/schema_definition/api.rb +359 -0
  6. data/lib/elastic_graph/schema_definition/factory.rb +506 -0
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
  9. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
  10. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
  11. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
  12. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
  13. data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
  14. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
  17. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
  18. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
  19. data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
  20. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
  21. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
  22. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
  23. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
  24. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
  25. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
  26. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
  27. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
  28. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
  29. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
  30. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
  31. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
  32. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
  33. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
  34. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
  35. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
  36. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
  37. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
  38. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
  39. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
  40. data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
  41. data/lib/elastic_graph/schema_definition/results.rb +404 -0
  42. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
  43. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
  44. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
  45. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
  46. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
  48. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
  49. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
  50. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
  51. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
  52. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
  53. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
  54. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
  55. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
  56. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
  57. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
  58. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
  59. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
  60. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
  61. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
  62. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
  63. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
  65. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
  66. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
  67. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
  68. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
  69. data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
  70. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
  71. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
  72. data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
  73. data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
  74. data/lib/elastic_graph/schema_definition/state.rb +212 -0
  75. data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
  76. metadata +513 -0
@@ -0,0 +1,506 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
11
+ require "elastic_graph/schema_definition/schema_elements/argument"
12
+ require "elastic_graph/schema_definition/schema_elements/built_in_types"
13
+ require "elastic_graph/schema_definition/schema_elements/deprecated_element"
14
+ require "elastic_graph/schema_definition/schema_elements/directive"
15
+ require "elastic_graph/schema_definition/schema_elements/enum_type"
16
+ require "elastic_graph/schema_definition/schema_elements/enum_value"
17
+ require "elastic_graph/schema_definition/schema_elements/enums_for_indexed_types"
18
+ require "elastic_graph/schema_definition/schema_elements/field"
19
+ require "elastic_graph/schema_definition/schema_elements/field_source"
20
+ require "elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator"
21
+ require "elastic_graph/schema_definition/schema_elements/input_field"
22
+ require "elastic_graph/schema_definition/schema_elements/input_type"
23
+ require "elastic_graph/schema_definition/schema_elements/interface_type"
24
+ require "elastic_graph/schema_definition/schema_elements/object_type"
25
+ require "elastic_graph/schema_definition/schema_elements/relationship"
26
+ require "elastic_graph/schema_definition/schema_elements/scalar_type"
27
+ require "elastic_graph/schema_definition/schema_elements/sort_order_enum_value"
28
+ require "elastic_graph/schema_definition/schema_elements/type_reference"
29
+ require "elastic_graph/schema_definition/schema_elements/type_with_subfields"
30
+ require "elastic_graph/schema_definition/schema_elements/union_type"
31
+
32
+ module ElasticGraph
33
+ module SchemaDefinition
34
+ # A class responsible for instantiating all schema elements. We want all schema element instantiation
35
+ # to go through this one class to support extension libraries. ElasticGraph supports extension libraries
36
+ # that provide modules that get extended onto specific instances of ElasticGraph framework classes. We
37
+ # prefer this approach rather than having extension library modules applied via `include` or `prepend`,
38
+ # because they _permanently modify_ the host classes. ElasticGraph is designed to avoid all mutable
39
+ # global state, and that includes mutations to ElasticGraph class ancestor chains from extension libraries.
40
+ #
41
+ # Concretely, if we included or prepended extension libraries modules, we'd have a hard time keeping our
42
+ # tests order-independent and deterministic while running all the ElasticGraph test suites in the same
43
+ # Ruby process. A test using an extension library could cause a core ElasticGraph class to get mutated
44
+ # in a way that impacts a test that runs in the same process later. Instead, we expect extension libraries
45
+ # to hook into ElasticGraph using `extend` on particular object instances.
46
+ #
47
+ # But that creates a bit of a problem: how can an extension library extend a module onto every instance
48
+ # of a specific type of schema element while it is in use? The answer is this factory class:
49
+ #
50
+ # - An extension library can extend a module onto `schema.factory`.
51
+ # - That module can in turn override any of these factory methods and extend another module onto the schema
52
+ # element instances.
53
+ #
54
+ # @private
55
+ class Factory
56
+ include Mixins::HasReadableToSAndInspect.new
57
+
58
+ def initialize(state)
59
+ @state = state
60
+ end
61
+
62
+ # Helper method to help enforce our desired invariant: we want _every_ instantiation of these schema
63
+ # element classes to happen via this factory method provided here. To enforce that, this helper returns
64
+ # the `new` method (as a `Method` object) after removing it from the given class. That makes it impossible
65
+ # for `new` to be called by anyone except from the factory using the captured method object.
66
+ def self.prevent_non_factory_instantiation_of(klass)
67
+ klass.method(:new).tap do
68
+ klass.singleton_class.undef_method :new
69
+ end
70
+ end
71
+
72
+ def new_deprecated_element(name, defined_at:, defined_via:)
73
+ @@deprecated_element_new.call(schema_def_state: @state, name: name, defined_at: defined_at, defined_via: defined_via)
74
+ end
75
+ @@deprecated_element_new = prevent_non_factory_instantiation_of(SchemaElements::DeprecatedElement)
76
+
77
+ def new_argument(field, name, value_type)
78
+ @@argument_new.call(@state, field, name, value_type).tap do |argument|
79
+ yield argument if block_given?
80
+ end
81
+ end
82
+ @@argument_new = prevent_non_factory_instantiation_of(SchemaElements::Argument)
83
+
84
+ def new_built_in_types(api)
85
+ @@built_in_types_new.call(api, @state)
86
+ end
87
+ @@built_in_types_new = prevent_non_factory_instantiation_of(SchemaElements::BuiltInTypes)
88
+
89
+ def new_directive(name, arguments)
90
+ @@directive_new.call(name, arguments)
91
+ end
92
+ @@directive_new = prevent_non_factory_instantiation_of(SchemaElements::Directive)
93
+
94
+ def new_enum_type(name, &block)
95
+ @@enum_type_new.call(@state, name, &(_ = block))
96
+ end
97
+ @@enum_type_new = prevent_non_factory_instantiation_of(SchemaElements::EnumType)
98
+
99
+ def new_enum_value(name, original_name)
100
+ @@enum_value_new.call(@state, name, original_name) do |enum_value|
101
+ yield enum_value if block_given?
102
+ end
103
+ end
104
+ @@enum_value_new = prevent_non_factory_instantiation_of(SchemaElements::EnumValue)
105
+
106
+ def new_enums_for_indexed_types
107
+ @@enums_for_indexed_types_new.call(@state)
108
+ end
109
+ @@enums_for_indexed_types_new = prevent_non_factory_instantiation_of(SchemaElements::EnumsForIndexedTypes)
110
+
111
+ # Hard to type check this.
112
+ # @dynamic new_field
113
+ __skip__ = def new_field(**kwargs, &block)
114
+ @@field_new.call(schema_def_state: @state, **kwargs, &block)
115
+ end
116
+ @@field_new = prevent_non_factory_instantiation_of(SchemaElements::Field)
117
+
118
+ def new_graphql_sdl_enumerator(all_types_except_root_query_type)
119
+ @@graphql_sdl_enumerator_new.call(@state, all_types_except_root_query_type)
120
+ end
121
+ @@graphql_sdl_enumerator_new = prevent_non_factory_instantiation_of(SchemaElements::GraphQLSDLEnumerator)
122
+
123
+ # Hard to type check this.
124
+ # @dynamic new_input_field
125
+ __skip__ = def new_input_field(**kwargs)
126
+ input_field = @@input_field_new.call(new_field(as_input: true, **kwargs))
127
+ yield input_field
128
+ input_field
129
+ end
130
+ @@input_field_new = prevent_non_factory_instantiation_of(SchemaElements::InputField)
131
+
132
+ def new_input_type(name)
133
+ @@input_type_new.call(@state, name) do |input_type|
134
+ yield input_type
135
+ end
136
+ end
137
+ @@input_type_new = prevent_non_factory_instantiation_of(SchemaElements::InputType)
138
+
139
+ def new_filter_input_type(source_type, name_prefix: source_type, category: :filter_input)
140
+ new_input_type(@state.type_ref(name_prefix).as_static_derived_type(category).name) do |t|
141
+ t.documentation <<~EOS
142
+ Input type used to specify filters on `#{source_type}` fields.
143
+
144
+ Will be ignored if passed as an empty object (or as `null`).
145
+ EOS
146
+
147
+ t.field @state.schema_elements.any_of, "[#{t.name}!]" do |f|
148
+ f.documentation <<~EOS
149
+ Matches records where any of the provided sub-filters evaluate to true.
150
+ This works just like an OR operator in SQL.
151
+
152
+ Will be ignored when `null` is passed. When an empty list is passed, will cause this
153
+ part of the filter to match no documents.
154
+ EOS
155
+ end
156
+
157
+ t.field @state.schema_elements.not, t.name do |f|
158
+ f.documentation <<~EOS
159
+ Matches records where the provided sub-filter does not evaluate to true.
160
+ This works just like a NOT operator in SQL.
161
+
162
+ Will be ignored when `null` or an empty object is passed.
163
+ EOS
164
+ end
165
+
166
+ yield t
167
+ end
168
+ end
169
+
170
+ # Builds the standard set of filter input types for types which are indexing leaf types.
171
+ #
172
+ # All GraphQL leaf types (enums and scalars) are indexing leaf types, but some GraphQL object types are
173
+ # as well. For example, `GeoLocation` is an object type in GraphQL (with separate lat/long fields) but is
174
+ # an indexing leaf type because we use the datastore `geo_point` type for it.
175
+ def build_standard_filter_input_types_for_index_leaf_type(source_type, name_prefix: source_type, &define_filter_fields)
176
+ single_value_filter = new_filter_input_type(source_type, name_prefix: name_prefix, &define_filter_fields)
177
+ list_filter = new_list_filter_input_type(source_type, name_prefix: name_prefix, any_satisfy_type_category: :list_element_filter_input)
178
+ list_element_filter = new_list_element_filter_input_type(source_type, name_prefix: name_prefix, &define_filter_fields)
179
+
180
+ [single_value_filter, list_filter, list_element_filter]
181
+ end
182
+
183
+ # Builds the standard set of filter input types for types which are indexing object types.
184
+ #
185
+ # Most GraphQL object types are indexing object types as well, but not all.
186
+ # For example, `GeoLocation` is an object type in GraphQL (with separate lat/long fields) but is
187
+ # an indexing leaf type because we use the datastore `geo_point` type for it.
188
+ def build_standard_filter_input_types_for_index_object_type(source_type, name_prefix: source_type, &define_filter_fields)
189
+ single_value_filter = new_filter_input_type(source_type, name_prefix: name_prefix, &define_filter_fields)
190
+ list_filter = new_list_filter_input_type(source_type, name_prefix: name_prefix, any_satisfy_type_category: :filter_input)
191
+ fields_list_filter = new_fields_list_filter_input_type(source_type, name_prefix: name_prefix)
192
+
193
+ [single_value_filter, list_filter, fields_list_filter]
194
+ end
195
+
196
+ def build_relay_pagination_types(type_name, include_total_edge_count: false, derived_indexed_types: [], support_pagination: true, &customize_connection)
197
+ [
198
+ (edge_type_for(type_name) if support_pagination),
199
+ connection_type_for(type_name, include_total_edge_count, derived_indexed_types, support_pagination, &customize_connection)
200
+ ].compact
201
+ end
202
+
203
+ def new_interface_type(name)
204
+ @@interface_type_new.call(@state, name.to_s) do |interface_type|
205
+ yield interface_type
206
+ end
207
+ end
208
+ @@interface_type_new = prevent_non_factory_instantiation_of(SchemaElements::InterfaceType)
209
+
210
+ def new_object_type(name)
211
+ @@object_type_new.call(@state, name.to_s) do |object_type|
212
+ yield object_type if block_given?
213
+ end
214
+ end
215
+ @@object_type_new = prevent_non_factory_instantiation_of(SchemaElements::ObjectType)
216
+
217
+ def new_scalar_type(name)
218
+ @@scalar_type_new.call(@state, name.to_s) do |scalar_type|
219
+ yield scalar_type
220
+ end
221
+ end
222
+ @@scalar_type_new = prevent_non_factory_instantiation_of(SchemaElements::ScalarType)
223
+
224
+ def new_sort_order_enum_value(enum_value, sort_order_field_path)
225
+ @@sort_order_enum_value_new.call(enum_value, sort_order_field_path)
226
+ end
227
+ @@sort_order_enum_value_new = prevent_non_factory_instantiation_of(SchemaElements::SortOrderEnumValue)
228
+
229
+ def new_type_reference(name)
230
+ @@type_reference_new.call(name, @state)
231
+ end
232
+ @@type_reference_new = prevent_non_factory_instantiation_of(SchemaElements::TypeReference)
233
+
234
+ def new_type_with_subfields(schema_kind, name, wrapping_type:, field_factory:)
235
+ @@type_with_subfields_new.call(schema_kind, @state, name, wrapping_type: wrapping_type, field_factory: field_factory) do |type_with_subfields|
236
+ yield type_with_subfields
237
+ end
238
+ end
239
+ @@type_with_subfields_new = prevent_non_factory_instantiation_of(SchemaElements::TypeWithSubfields)
240
+
241
+ def new_union_type(name)
242
+ @@union_type_new.call(@state, name.to_s) do |union_type|
243
+ yield union_type
244
+ end
245
+ end
246
+ @@union_type_new = prevent_non_factory_instantiation_of(SchemaElements::UnionType)
247
+
248
+ def new_field_source(relationship_name:, field_path:)
249
+ @@field_source_new.call(relationship_name, field_path)
250
+ end
251
+ @@field_source_new = prevent_non_factory_instantiation_of(SchemaElements::FieldSource)
252
+
253
+ def new_relationship(field, cardinality:, related_type:, foreign_key:, direction:)
254
+ @@relationship_new.call(
255
+ field,
256
+ cardinality: cardinality,
257
+ related_type: related_type,
258
+ foreign_key: foreign_key,
259
+ direction: direction
260
+ )
261
+ end
262
+ @@relationship_new = prevent_non_factory_instantiation_of(SchemaElements::Relationship)
263
+
264
+ # Responsible for creating a new `*AggregatedValues` type for an index leaf type.
265
+ #
266
+ # An index leaf type is a scalar, enum, object type that is backed by a single, indivisible
267
+ # field in the index. All scalar and enum types are index leaf types, and object types
268
+ # rarely (but sometimes) are. For example, the `GeoLocation` object type has two subfields
269
+ # (`latitude` and `longitude`) but is backed by a single `geo_point` field in the index,
270
+ # so it is an index leaf type.
271
+ def new_aggregated_values_type_for_index_leaf_type(index_leaf_type)
272
+ new_object_type @state.type_ref(index_leaf_type).as_aggregated_values.name do |type|
273
+ type.graphql_only true
274
+ type.documentation "A return type used from aggregations to provided aggregated values over `#{index_leaf_type}` fields."
275
+ type.runtime_metadata_overrides = {elasticgraph_category: :scalar_aggregated_values}
276
+
277
+ type.field @state.schema_elements.approximate_distinct_value_count, "JsonSafeLong", graphql_only: true do |f|
278
+ # Note: the 1-6% accuracy figure comes from the Elasticsearch docs:
279
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-metrics-cardinality-aggregation.html#_counts_are_approximate
280
+ f.documentation <<~EOS
281
+ An approximation of the number of unique values for this field within this grouping.
282
+
283
+ The approximation uses the HyperLogLog++ algorithm from the [HyperLogLog in Practice](https://research.google.com/pubs/archive/40671.pdf)
284
+ paper. The accuracy of the returned value varies based on the specific dataset, but
285
+ it usually differs from the true distinct value count by less than 7%.
286
+ EOS
287
+
288
+ f.runtime_metadata_graphql_field = f.runtime_metadata_graphql_field.with_computation_detail(
289
+ empty_bucket_value: 0,
290
+ function: :cardinality
291
+ )
292
+ end
293
+
294
+ yield type
295
+ end
296
+ end
297
+
298
+ private
299
+
300
+ def new_list_filter_input_type(source_type, name_prefix:, any_satisfy_type_category:)
301
+ any_satisfy = @state.schema_elements.any_satisfy
302
+ all_of = @state.schema_elements.all_of
303
+
304
+ new_filter_input_type "[#{source_type}]", name_prefix: name_prefix, category: :list_filter_input do |t|
305
+ t.field any_satisfy, @state.type_ref(name_prefix).as_static_derived_type(any_satisfy_type_category).name do |f|
306
+ f.documentation <<~EOS
307
+ Matches records where any of the list elements match the provided sub-filter.
308
+
309
+ Will be ignored when `null` or an empty object is passed.
310
+ EOS
311
+ end
312
+
313
+ t.field all_of, "[#{t.name}!]" do |f|
314
+ f.documentation <<~EOS
315
+ Matches records where all of the provided sub-filters evaluate to true. This works just like an AND operator in SQL.
316
+
317
+ Note: multiple filters are automatically ANDed together. This is only needed when you have multiple filters that can't
318
+ be provided on a single `#{t.name}` input because of collisions between key names. For example, if you want to provide
319
+ multiple `#{any_satisfy}: ...` filters, you could do `#{all_of}: [{#{any_satisfy}: ...}, {#{any_satisfy}: ...}]`.
320
+
321
+ Will be ignored when `null` is passed or an empty list is passed.
322
+ EOS
323
+ end
324
+
325
+ define_list_counts_filter_field_on(t)
326
+ end
327
+ end
328
+
329
+ # Generates a filter type used on elements of a list. Referenced from a `#{type}ListFilterInput` input
330
+ # (which is referenced from `any_satisfy`).
331
+ def new_list_element_filter_input_type(source_type, name_prefix:)
332
+ new_filter_input_type source_type, name_prefix: name_prefix, category: :list_element_filter_input do |t|
333
+ t.documentation <<~EOS
334
+ Input type used to specify filters on elements of a `[#{source_type}]` field.
335
+
336
+ Will be ignored if passed as an empty object (or as `null`).
337
+ EOS
338
+
339
+ # While we support `not: {any_satisfy: ...}` we do not support `any_satisfy: {not ...}` at this time.
340
+ # Since `any_satisfy` does not have a node in the datastore query expression, the naive way we'd
341
+ # generate the datastore filter would be the same for both cases. However, they should have different
342
+ # semantics.
343
+ #
344
+ # For example, if we have these documents:
345
+ #
346
+ # - d1: {tags: ["a", "b"]}
347
+ # - d2: {tags: ["b", "c"]}
348
+ # - d3: {tags: []}
349
+ # - d4: {tags: ["a"]}
350
+ #
351
+ # Then `not: {any_satisfy: {equal_to_any_of: ["a"]}}` should (and does) match d2 and d3.
352
+ # But `any_satisfy: {not: {equal_to_any_of: ["a"]}}` should match d1 and d2 (both have a tag that is not equal to "a").
353
+ # However, Elasticsearch and OpenSearch do not allow us to express that.
354
+ #
355
+ # Technically, we could probably get it to work if we implemented negations of all our filter operators.
356
+ # For example, `gt` negated is `lte`, `lt` negated is `gte`, etc. But for some operators that's not easy.
357
+ # There is no available negation of `equal_to_any_of`, but we could maybe get it to work by using a regex
358
+ # operator that matches any term EXCEPT the provided value, but that's non-trivial to implement and could
359
+ # be quite expensive. So for now we just don't support this.
360
+ #
361
+ # ...therefore, we need to omit `not` from the generated filter here.
362
+ t.graphql_fields_by_name.delete(@state.schema_elements.not)
363
+
364
+ yield t
365
+ end
366
+ end
367
+
368
+ # Generates a filter type used for objects within a list (either at a parent or some ancestor level)
369
+ # when the `nested ` type is not used. The datastore indexes each leaf field as its own flattened list
370
+ # of values. We mirror that structure with this filter type, only offering `any_satisfy` on leaf fields.
371
+ def new_fields_list_filter_input_type(source_type_name, name_prefix:)
372
+ source_type = @state.object_types_by_name.fetch(source_type_name)
373
+
374
+ new_filter_input_type source_type_name, name_prefix: name_prefix, category: :fields_list_filter_input do |t|
375
+ t.documentation <<~EOS
376
+ Input type used to specify filters on a `#{source_type_name}` object referenced directly
377
+ or transitively from a list field that has been configured to index each leaf field as
378
+ its own flattened list of values.
379
+
380
+ Will be ignored if passed as an empty object (or as `null`).
381
+ EOS
382
+
383
+ source_type.graphql_fields_by_name.each do |field_name, field|
384
+ next unless field.filterable?
385
+ t.graphql_fields_by_name[field_name] = field.to_filter_field(
386
+ parent_type: t,
387
+ # We are never filtering on single values in this context (since we are already
388
+ # within a list that isn't using the `nested` mapping type).
389
+ for_single_value: false
390
+ )
391
+ end
392
+
393
+ # We want to add a `count` field so that clients can filter on the count of elements of this list field.
394
+ # However, if the object type of this field has a user-defined `count` field then we cannot do that, as that
395
+ # would create a conflict. So we omit it in that case. Users will still be able to filter on the count of
396
+ # the leaf fields if they spell out the full filter path to a leaf field.
397
+ count_field_name = @state.schema_elements.count
398
+ if t.graphql_fields_by_name.key?(count_field_name)
399
+ @state.output.puts <<~EOS
400
+ WARNING: Since a `#{source_type_name}.#{count_field_name}` field exists, ElasticGraph is not able to
401
+ define its typical `#{t.name}.#{count_field_name}` field, which allows clients to filter on the count
402
+ of values for a `[#{source_type.name}]` field. Clients will still be able to filter on the `#{count_field_name}`
403
+ at a leaf field path. However, there are a couple ways this naming conflict can be avoided if desired:
404
+
405
+ 1. Pick a different name for the `#{source_type_name}.#{count_field_name}` field.
406
+ 2. Change the name used by ElasticGraph for this field. To do that, pass a
407
+ `schema_element_name_overrides: {#{count_field_name.inspect} => "alt_name"}` option alongside
408
+ `schema_element_name_form: ...` when defining `ElasticGraph::SchemaDefinition::RakeTasks`
409
+ (typically in the `Rakefile`).
410
+ EOS
411
+ else
412
+ define_list_counts_filter_field_on(t)
413
+ end
414
+ end
415
+ end
416
+
417
+ def define_list_counts_filter_field_on(type)
418
+ # Note: we use `IntFilterInput` (instead of `JsonSafeLongFilterInput` or similar...) to align with the
419
+ # `integer` mapping type we use for the `__counts` field. If we ever change that
420
+ # in `list_counts_mapping.rb`, we'll want to consider changing this as well.
421
+ #
422
+ # We use `name_in_index: __counts` because we need to indicate that it's the list `count` operator
423
+ # rather than a schema field named "counts". Our filter interpreter logic relies on that name.
424
+ # We can count on `__counts` not being used by a real schema field because the GraphQL spec reserves
425
+ # the `__` prefix for its own use.
426
+ type.field @state.schema_elements.count, @state.type_ref("Int").as_filter_input.name, name_in_index: LIST_COUNTS_FIELD do |f|
427
+ f.documentation <<~EOS
428
+ Used to filter on the number of non-null elements in this list field.
429
+
430
+ Will be ignored when `null` or an empty object is passed.
431
+ EOS
432
+ end
433
+ end
434
+
435
+ def edge_type_for(type_name)
436
+ type_ref = @state.type_ref(type_name)
437
+ new_object_type type_ref.as_edge.name do |t|
438
+ t.relay_pagination_type = true
439
+ t.runtime_metadata_overrides = {elasticgraph_category: :relay_edge}
440
+
441
+ t.documentation <<~EOS
442
+ Represents a specific `#{type_name}` in the context of a `#{type_ref.as_connection.name}`,
443
+ providing access to both the `#{type_name}` and a pagination `Cursor`.
444
+
445
+ See the [Relay GraphQL Cursor Connections
446
+ Specification](https://relay.dev/graphql/connections.htm#sec-Edge-Types) for more info.
447
+ EOS
448
+
449
+ t.field @state.schema_elements.node, type_name do |f|
450
+ f.documentation "The `#{type_name}` of this edge."
451
+ end
452
+
453
+ t.field @state.schema_elements.cursor, "Cursor" do |f|
454
+ f.documentation <<~EOS
455
+ The `Cursor` of this `#{type_name}`. This can be passed in the next query as
456
+ a `before` or `after` argument to continue paginating from this `#{type_name}`.
457
+ EOS
458
+ end
459
+ end
460
+ end
461
+
462
+ def connection_type_for(type_name, include_total_edge_count, derived_indexed_types, support_pagination)
463
+ type_ref = @state.type_ref(type_name)
464
+ new_object_type type_ref.as_connection.name do |t|
465
+ t.relay_pagination_type = true
466
+ t.runtime_metadata_overrides = {elasticgraph_category: :relay_connection}
467
+
468
+ if support_pagination
469
+ t.documentation <<~EOS
470
+ Represents a paginated collection of `#{type_name}` results.
471
+
472
+ See the [Relay GraphQL Cursor Connections
473
+ Specification](https://relay.dev/graphql/connections.htm#sec-Connection-Types) for more info.
474
+ EOS
475
+ else
476
+ t.documentation "Represents a collection of `#{type_name}` results."
477
+ end
478
+
479
+ if support_pagination
480
+ t.field @state.schema_elements.edges, "[#{type_ref.as_edge.name}!]!" do |f|
481
+ f.documentation "Wraps a specific `#{type_name}` to pair it with its pagination cursor."
482
+ end
483
+ end
484
+
485
+ t.field @state.schema_elements.nodes, "[#{type_name}!]!" do |f|
486
+ f.documentation "The list of `#{type_name}` results."
487
+ end
488
+
489
+ if support_pagination
490
+ t.field @state.schema_elements.page_info, "PageInfo!" do |f|
491
+ f.documentation "Provides pagination-related information."
492
+ end
493
+ end
494
+
495
+ if include_total_edge_count
496
+ t.field @state.schema_elements.total_edge_count, "JsonSafeLong!" do |f|
497
+ f.documentation "The total number of edges available in this connection to paginate over."
498
+ end
499
+ end
500
+
501
+ yield t if block_given?
502
+ end
503
+ end
504
+ end
505
+ end
506
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support"
10
+
11
+ module ElasticGraph
12
+ module SchemaDefinition
13
+ module Indexing
14
+ module DerivedFields
15
+ # Responsible for providing bits of the painless script specific to a {DerivedIndexedType#append_only_set} field.
16
+ #
17
+ # @api private
18
+ class AppendOnlySet < ::Data.define(:destination_field, :source_field)
19
+ # `Data.define` provides the following methods:
20
+ # @dynamic destination_field, source_field
21
+
22
+ # @return [Array<String>] painless functions required by `append_only_set`.
23
+ def function_definitions
24
+ [IDEMPOTENTLY_INSERT_VALUES, IDEMPOTENTLY_INSERT_VALUE]
25
+ end
26
+
27
+ # @return [String] a line of painless code to append a value to the set and return a boolean indicating if the set was updated.
28
+ def apply_operation_returning_update_status
29
+ %{appendOnlySet_idempotentlyInsertValues(data["#{source_field}"], ctx._source.#{destination_field})}
30
+ end
31
+
32
+ # The statements here initialize the field to an empty list if it is null. This primarily happens when the document
33
+ # does not already exist, but can also happen when we add a new derived field to an existing type.
34
+ #
35
+ # @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
36
+ def setup_statements
37
+ FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: FieldInitializerSupport::EMPTY_PAINLESS_LIST)
38
+ end
39
+
40
+ private
41
+
42
+ IDEMPOTENTLY_INSERT_VALUES = <<~EOS
43
+ // Wrapper around `idempotentlyInsertValue` that handles a list of values.
44
+ // Returns `true` if the list field was updated.
45
+ boolean appendOnlySet_idempotentlyInsertValues(List values, List sortedList) {
46
+ boolean listUpdated = false;
47
+
48
+ for (def value : values) {
49
+ listUpdated = appendOnlySet_idempotentlyInsertValue(value, sortedList) || listUpdated;
50
+ }
51
+
52
+ return listUpdated;
53
+ }
54
+ EOS
55
+
56
+ IDEMPOTENTLY_INSERT_VALUE = <<~EOS
57
+ // Idempotently inserts the given value in the `sortedList`, returning `true` if the list was updated.
58
+ boolean appendOnlySet_idempotentlyInsertValue(def value, List sortedList) {
59
+ // As per https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#binarySearch(java.util.List,java.lang.Object):
60
+ //
61
+ // > Returns the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1).
62
+ // > The insertion point is defined as the point at which the key would be inserted into the list: the index
63
+ // > of the first element greater than the key, or list.size() if all elements in the list are less than the
64
+ // > specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
65
+ int binarySearchResult = Collections.binarySearch(sortedList, value);
66
+
67
+ if (binarySearchResult < 0) {
68
+ sortedList.add(-binarySearchResult - 1, value);
69
+ return true;
70
+ } else {
71
+ return false;
72
+ }
73
+ }
74
+ EOS
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,59 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module SchemaDefinition
11
+ module Indexing
12
+ # Contains implementation logic for the different kinds of derived fields.
13
+ #
14
+ # @api private
15
+ module DerivedFields
16
+ # Contains helper logic for field initialization applicable to all types of derived fields.
17
+ #
18
+ # @api private
19
+ module FieldInitializerSupport
20
+ # Painless literal for an empty list, from [the docs](https://www.elastic.co/guide/en/elasticsearch/painless/8.15/painless-operators-reference.html#list-initialization-operator).
21
+ EMPTY_PAINLESS_LIST = "[]"
22
+
23
+ # Painless literal for an empty map, from [the docs](https://www.elastic.co/guide/en/elasticsearch/painless/8.15/painless-operators-reference.html#map-initialization-operator).
24
+ EMPTY_PAINLESS_MAP = "[:]"
25
+
26
+ # @return [Array<String>] a list of painless statements that will initialize a given `destination_field` path to an empty value.
27
+ def self.build_empty_value_initializers(destination_field, leaf_value:)
28
+ snippets = [] # : ::Array[::String]
29
+ path_so_far = [] # : ::Array[::String]
30
+
31
+ destination_field.split(".").each do |path_part|
32
+ path_to_this_part = (path_so_far + [path_part]).join(".")
33
+ is_leaf = path_to_this_part == destination_field
34
+
35
+ unless is_leaf && leaf_value == :leave_unset
36
+ # The empty value of all parent fields must be an empty painless map, but for a leaf field it can be different.
37
+ empty_value = is_leaf ? leaf_value : EMPTY_PAINLESS_MAP
38
+
39
+ snippets << default_source_field_to_empty(path_to_this_part, empty_value.to_s)
40
+ path_so_far << path_part
41
+ end
42
+ end
43
+
44
+ snippets
45
+ end
46
+
47
+ # @return [String] a painless statement that will default a single field to an empty value.
48
+ def self.default_source_field_to_empty(field_path, empty_value)
49
+ <<~EOS.strip
50
+ if (ctx._source.#{field_path} == null) {
51
+ ctx._source.#{field_path} = #{empty_value};
52
+ }
53
+ EOS
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end