elasticgraph-schema_definition 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +7 -0
  4. data/elasticgraph-schema_definition.gemspec +26 -0
  5. data/lib/elastic_graph/schema_definition/api.rb +359 -0
  6. data/lib/elastic_graph/schema_definition/factory.rb +506 -0
  7. data/lib/elastic_graph/schema_definition/indexing/derived_fields/append_only_set.rb +79 -0
  8. data/lib/elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support.rb +59 -0
  9. data/lib/elastic_graph/schema_definition/indexing/derived_fields/immutable_value.rb +99 -0
  10. data/lib/elastic_graph/schema_definition/indexing/derived_fields/min_or_max_value.rb +62 -0
  11. data/lib/elastic_graph/schema_definition/indexing/derived_indexed_type.rb +346 -0
  12. data/lib/elastic_graph/schema_definition/indexing/event_envelope.rb +74 -0
  13. data/lib/elastic_graph/schema_definition/indexing/field.rb +181 -0
  14. data/lib/elastic_graph/schema_definition/indexing/field_reference.rb +51 -0
  15. data/lib/elastic_graph/schema_definition/indexing/field_type/enum.rb +65 -0
  16. data/lib/elastic_graph/schema_definition/indexing/field_type/object.rb +113 -0
  17. data/lib/elastic_graph/schema_definition/indexing/field_type/scalar.rb +51 -0
  18. data/lib/elastic_graph/schema_definition/indexing/field_type/union.rb +70 -0
  19. data/lib/elastic_graph/schema_definition/indexing/index.rb +318 -0
  20. data/lib/elastic_graph/schema_definition/indexing/json_schema_field_metadata.rb +34 -0
  21. data/lib/elastic_graph/schema_definition/indexing/json_schema_with_metadata.rb +234 -0
  22. data/lib/elastic_graph/schema_definition/indexing/list_counts_mapping.rb +53 -0
  23. data/lib/elastic_graph/schema_definition/indexing/relationship_resolver.rb +96 -0
  24. data/lib/elastic_graph/schema_definition/indexing/rollover_config.rb +25 -0
  25. data/lib/elastic_graph/schema_definition/indexing/update_target_factory.rb +54 -0
  26. data/lib/elastic_graph/schema_definition/indexing/update_target_resolver.rb +195 -0
  27. data/lib/elastic_graph/schema_definition/json_schema_pruner.rb +61 -0
  28. data/lib/elastic_graph/schema_definition/mixins/can_be_graphql_only.rb +31 -0
  29. data/lib/elastic_graph/schema_definition/mixins/has_derived_graphql_type_customizations.rb +119 -0
  30. data/lib/elastic_graph/schema_definition/mixins/has_directives.rb +65 -0
  31. data/lib/elastic_graph/schema_definition/mixins/has_documentation.rb +74 -0
  32. data/lib/elastic_graph/schema_definition/mixins/has_indices.rb +281 -0
  33. data/lib/elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect.rb +46 -0
  34. data/lib/elastic_graph/schema_definition/mixins/has_subtypes.rb +116 -0
  35. data/lib/elastic_graph/schema_definition/mixins/has_type_info.rb +181 -0
  36. data/lib/elastic_graph/schema_definition/mixins/implements_interfaces.rb +122 -0
  37. data/lib/elastic_graph/schema_definition/mixins/supports_default_value.rb +47 -0
  38. data/lib/elastic_graph/schema_definition/mixins/supports_filtering_and_aggregation.rb +267 -0
  39. data/lib/elastic_graph/schema_definition/mixins/verifies_graphql_name.rb +38 -0
  40. data/lib/elastic_graph/schema_definition/rake_tasks.rb +190 -0
  41. data/lib/elastic_graph/schema_definition/results.rb +404 -0
  42. data/lib/elastic_graph/schema_definition/schema_artifact_manager.rb +482 -0
  43. data/lib/elastic_graph/schema_definition/schema_elements/argument.rb +56 -0
  44. data/lib/elastic_graph/schema_definition/schema_elements/built_in_types.rb +1541 -0
  45. data/lib/elastic_graph/schema_definition/schema_elements/deprecated_element.rb +21 -0
  46. data/lib/elastic_graph/schema_definition/schema_elements/directive.rb +40 -0
  47. data/lib/elastic_graph/schema_definition/schema_elements/enum_type.rb +189 -0
  48. data/lib/elastic_graph/schema_definition/schema_elements/enum_value.rb +73 -0
  49. data/lib/elastic_graph/schema_definition/schema_elements/enum_value_namer.rb +89 -0
  50. data/lib/elastic_graph/schema_definition/schema_elements/enums_for_indexed_types.rb +82 -0
  51. data/lib/elastic_graph/schema_definition/schema_elements/field.rb +1085 -0
  52. data/lib/elastic_graph/schema_definition/schema_elements/field_path.rb +112 -0
  53. data/lib/elastic_graph/schema_definition/schema_elements/field_source.rb +16 -0
  54. data/lib/elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator.rb +113 -0
  55. data/lib/elastic_graph/schema_definition/schema_elements/input_field.rb +31 -0
  56. data/lib/elastic_graph/schema_definition/schema_elements/input_type.rb +60 -0
  57. data/lib/elastic_graph/schema_definition/schema_elements/interface_type.rb +72 -0
  58. data/lib/elastic_graph/schema_definition/schema_elements/list_counts_state.rb +40 -0
  59. data/lib/elastic_graph/schema_definition/schema_elements/object_type.rb +53 -0
  60. data/lib/elastic_graph/schema_definition/schema_elements/relationship.rb +218 -0
  61. data/lib/elastic_graph/schema_definition/schema_elements/scalar_type.rb +310 -0
  62. data/lib/elastic_graph/schema_definition/schema_elements/sort_order_enum_value.rb +36 -0
  63. data/lib/elastic_graph/schema_definition/schema_elements/sub_aggregation_path.rb +66 -0
  64. data/lib/elastic_graph/schema_definition/schema_elements/type_namer.rb +237 -0
  65. data/lib/elastic_graph/schema_definition/schema_elements/type_reference.rb +353 -0
  66. data/lib/elastic_graph/schema_definition/schema_elements/type_with_subfields.rb +579 -0
  67. data/lib/elastic_graph/schema_definition/schema_elements/union_type.rb +157 -0
  68. data/lib/elastic_graph/schema_definition/scripting/file_system_repository.rb +77 -0
  69. data/lib/elastic_graph/schema_definition/scripting/script.rb +48 -0
  70. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_day_of_week.painless +24 -0
  71. data/lib/elastic_graph/schema_definition/scripting/scripts/field/as_time_of_day.painless +41 -0
  72. data/lib/elastic_graph/schema_definition/scripting/scripts/filter/by_time_of_day.painless +22 -0
  73. data/lib/elastic_graph/schema_definition/scripting/scripts/update/index_data.painless +93 -0
  74. data/lib/elastic_graph/schema_definition/state.rb +212 -0
  75. data/lib/elastic_graph/schema_definition/test_support.rb +113 -0
  76. metadata +513 -0
@@ -0,0 +1,506 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/schema_definition/mixins/has_readable_to_s_and_inspect"
11
+ require "elastic_graph/schema_definition/schema_elements/argument"
12
+ require "elastic_graph/schema_definition/schema_elements/built_in_types"
13
+ require "elastic_graph/schema_definition/schema_elements/deprecated_element"
14
+ require "elastic_graph/schema_definition/schema_elements/directive"
15
+ require "elastic_graph/schema_definition/schema_elements/enum_type"
16
+ require "elastic_graph/schema_definition/schema_elements/enum_value"
17
+ require "elastic_graph/schema_definition/schema_elements/enums_for_indexed_types"
18
+ require "elastic_graph/schema_definition/schema_elements/field"
19
+ require "elastic_graph/schema_definition/schema_elements/field_source"
20
+ require "elastic_graph/schema_definition/schema_elements/graphql_sdl_enumerator"
21
+ require "elastic_graph/schema_definition/schema_elements/input_field"
22
+ require "elastic_graph/schema_definition/schema_elements/input_type"
23
+ require "elastic_graph/schema_definition/schema_elements/interface_type"
24
+ require "elastic_graph/schema_definition/schema_elements/object_type"
25
+ require "elastic_graph/schema_definition/schema_elements/relationship"
26
+ require "elastic_graph/schema_definition/schema_elements/scalar_type"
27
+ require "elastic_graph/schema_definition/schema_elements/sort_order_enum_value"
28
+ require "elastic_graph/schema_definition/schema_elements/type_reference"
29
+ require "elastic_graph/schema_definition/schema_elements/type_with_subfields"
30
+ require "elastic_graph/schema_definition/schema_elements/union_type"
31
+
32
+ module ElasticGraph
33
+ module SchemaDefinition
34
+ # A class responsible for instantiating all schema elements. We want all schema element instantiation
35
+ # to go through this one class to support extension libraries. ElasticGraph supports extension libraries
36
+ # that provide modules that get extended onto specific instances of ElasticGraph framework classes. We
37
+ # prefer this approach rather than having extension library modules applied via `include` or `prepend`,
38
+ # because they _permanently modify_ the host classes. ElasticGraph is designed to avoid all mutable
39
+ # global state, and that includes mutations to ElasticGraph class ancestor chains from extension libraries.
40
+ #
41
+ # Concretely, if we included or prepended extension libraries modules, we'd have a hard time keeping our
42
+ # tests order-independent and deterministic while running all the ElasticGraph test suites in the same
43
+ # Ruby process. A test using an extension library could cause a core ElasticGraph class to get mutated
44
+ # in a way that impacts a test that runs in the same process later. Instead, we expect extension libraries
45
+ # to hook into ElasticGraph using `extend` on particular object instances.
46
+ #
47
+ # But that creates a bit of a problem: how can an extension library extend a module onto every instance
48
+ # of a specific type of schema element while it is in use? The answer is this factory class:
49
+ #
50
+ # - An extension library can extend a module onto `schema.factory`.
51
+ # - That module can in turn override any of these factory methods and extend another module onto the schema
52
+ # element instances.
53
+ #
54
+ # @private
55
+ class Factory
56
+ include Mixins::HasReadableToSAndInspect.new
57
+
58
+ def initialize(state)
59
+ @state = state
60
+ end
61
+
62
+ # Helper method to help enforce our desired invariant: we want _every_ instantiation of these schema
63
+ # element classes to happen via this factory method provided here. To enforce that, this helper returns
64
+ # the `new` method (as a `Method` object) after removing it from the given class. That makes it impossible
65
+ # for `new` to be called by anyone except from the factory using the captured method object.
66
+ def self.prevent_non_factory_instantiation_of(klass)
67
+ klass.method(:new).tap do
68
+ klass.singleton_class.undef_method :new
69
+ end
70
+ end
71
+
72
+ def new_deprecated_element(name, defined_at:, defined_via:)
73
+ @@deprecated_element_new.call(schema_def_state: @state, name: name, defined_at: defined_at, defined_via: defined_via)
74
+ end
75
+ @@deprecated_element_new = prevent_non_factory_instantiation_of(SchemaElements::DeprecatedElement)
76
+
77
+ def new_argument(field, name, value_type)
78
+ @@argument_new.call(@state, field, name, value_type).tap do |argument|
79
+ yield argument if block_given?
80
+ end
81
+ end
82
+ @@argument_new = prevent_non_factory_instantiation_of(SchemaElements::Argument)
83
+
84
+ def new_built_in_types(api)
85
+ @@built_in_types_new.call(api, @state)
86
+ end
87
+ @@built_in_types_new = prevent_non_factory_instantiation_of(SchemaElements::BuiltInTypes)
88
+
89
+ def new_directive(name, arguments)
90
+ @@directive_new.call(name, arguments)
91
+ end
92
+ @@directive_new = prevent_non_factory_instantiation_of(SchemaElements::Directive)
93
+
94
+ def new_enum_type(name, &block)
95
+ @@enum_type_new.call(@state, name, &(_ = block))
96
+ end
97
+ @@enum_type_new = prevent_non_factory_instantiation_of(SchemaElements::EnumType)
98
+
99
+ def new_enum_value(name, original_name)
100
+ @@enum_value_new.call(@state, name, original_name) do |enum_value|
101
+ yield enum_value if block_given?
102
+ end
103
+ end
104
+ @@enum_value_new = prevent_non_factory_instantiation_of(SchemaElements::EnumValue)
105
+
106
+ def new_enums_for_indexed_types
107
+ @@enums_for_indexed_types_new.call(@state)
108
+ end
109
+ @@enums_for_indexed_types_new = prevent_non_factory_instantiation_of(SchemaElements::EnumsForIndexedTypes)
110
+
111
+ # Hard to type check this.
112
+ # @dynamic new_field
113
+ __skip__ = def new_field(**kwargs, &block)
114
+ @@field_new.call(schema_def_state: @state, **kwargs, &block)
115
+ end
116
+ @@field_new = prevent_non_factory_instantiation_of(SchemaElements::Field)
117
+
118
+ def new_graphql_sdl_enumerator(all_types_except_root_query_type)
119
+ @@graphql_sdl_enumerator_new.call(@state, all_types_except_root_query_type)
120
+ end
121
+ @@graphql_sdl_enumerator_new = prevent_non_factory_instantiation_of(SchemaElements::GraphQLSDLEnumerator)
122
+
123
+ # Hard to type check this.
124
+ # @dynamic new_input_field
125
+ __skip__ = def new_input_field(**kwargs)
126
+ input_field = @@input_field_new.call(new_field(as_input: true, **kwargs))
127
+ yield input_field
128
+ input_field
129
+ end
130
+ @@input_field_new = prevent_non_factory_instantiation_of(SchemaElements::InputField)
131
+
132
+ def new_input_type(name)
133
+ @@input_type_new.call(@state, name) do |input_type|
134
+ yield input_type
135
+ end
136
+ end
137
+ @@input_type_new = prevent_non_factory_instantiation_of(SchemaElements::InputType)
138
+
139
+ def new_filter_input_type(source_type, name_prefix: source_type, category: :filter_input)
140
+ new_input_type(@state.type_ref(name_prefix).as_static_derived_type(category).name) do |t|
141
+ t.documentation <<~EOS
142
+ Input type used to specify filters on `#{source_type}` fields.
143
+
144
+ Will be ignored if passed as an empty object (or as `null`).
145
+ EOS
146
+
147
+ t.field @state.schema_elements.any_of, "[#{t.name}!]" do |f|
148
+ f.documentation <<~EOS
149
+ Matches records where any of the provided sub-filters evaluate to true.
150
+ This works just like an OR operator in SQL.
151
+
152
+ Will be ignored when `null` is passed. When an empty list is passed, will cause this
153
+ part of the filter to match no documents.
154
+ EOS
155
+ end
156
+
157
+ t.field @state.schema_elements.not, t.name do |f|
158
+ f.documentation <<~EOS
159
+ Matches records where the provided sub-filter does not evaluate to true.
160
+ This works just like a NOT operator in SQL.
161
+
162
+ Will be ignored when `null` or an empty object is passed.
163
+ EOS
164
+ end
165
+
166
+ yield t
167
+ end
168
+ end
169
+
170
+ # Builds the standard set of filter input types for types which are indexing leaf types.
171
+ #
172
+ # All GraphQL leaf types (enums and scalars) are indexing leaf types, but some GraphQL object types are
173
+ # as well. For example, `GeoLocation` is an object type in GraphQL (with separate lat/long fields) but is
174
+ # an indexing leaf type because we use the datastore `geo_point` type for it.
175
+ def build_standard_filter_input_types_for_index_leaf_type(source_type, name_prefix: source_type, &define_filter_fields)
176
+ single_value_filter = new_filter_input_type(source_type, name_prefix: name_prefix, &define_filter_fields)
177
+ list_filter = new_list_filter_input_type(source_type, name_prefix: name_prefix, any_satisfy_type_category: :list_element_filter_input)
178
+ list_element_filter = new_list_element_filter_input_type(source_type, name_prefix: name_prefix, &define_filter_fields)
179
+
180
+ [single_value_filter, list_filter, list_element_filter]
181
+ end
182
+
183
+ # Builds the standard set of filter input types for types which are indexing object types.
184
+ #
185
+ # Most GraphQL object types are indexing object types as well, but not all.
186
+ # For example, `GeoLocation` is an object type in GraphQL (with separate lat/long fields) but is
187
+ # an indexing leaf type because we use the datastore `geo_point` type for it.
188
+ def build_standard_filter_input_types_for_index_object_type(source_type, name_prefix: source_type, &define_filter_fields)
189
+ single_value_filter = new_filter_input_type(source_type, name_prefix: name_prefix, &define_filter_fields)
190
+ list_filter = new_list_filter_input_type(source_type, name_prefix: name_prefix, any_satisfy_type_category: :filter_input)
191
+ fields_list_filter = new_fields_list_filter_input_type(source_type, name_prefix: name_prefix)
192
+
193
+ [single_value_filter, list_filter, fields_list_filter]
194
+ end
195
+
196
+ def build_relay_pagination_types(type_name, include_total_edge_count: false, derived_indexed_types: [], support_pagination: true, &customize_connection)
197
+ [
198
+ (edge_type_for(type_name) if support_pagination),
199
+ connection_type_for(type_name, include_total_edge_count, derived_indexed_types, support_pagination, &customize_connection)
200
+ ].compact
201
+ end
202
+
203
+ def new_interface_type(name)
204
+ @@interface_type_new.call(@state, name.to_s) do |interface_type|
205
+ yield interface_type
206
+ end
207
+ end
208
+ @@interface_type_new = prevent_non_factory_instantiation_of(SchemaElements::InterfaceType)
209
+
210
+ def new_object_type(name)
211
+ @@object_type_new.call(@state, name.to_s) do |object_type|
212
+ yield object_type if block_given?
213
+ end
214
+ end
215
+ @@object_type_new = prevent_non_factory_instantiation_of(SchemaElements::ObjectType)
216
+
217
+ def new_scalar_type(name)
218
+ @@scalar_type_new.call(@state, name.to_s) do |scalar_type|
219
+ yield scalar_type
220
+ end
221
+ end
222
+ @@scalar_type_new = prevent_non_factory_instantiation_of(SchemaElements::ScalarType)
223
+
224
+ def new_sort_order_enum_value(enum_value, sort_order_field_path)
225
+ @@sort_order_enum_value_new.call(enum_value, sort_order_field_path)
226
+ end
227
+ @@sort_order_enum_value_new = prevent_non_factory_instantiation_of(SchemaElements::SortOrderEnumValue)
228
+
229
+ def new_type_reference(name)
230
+ @@type_reference_new.call(name, @state)
231
+ end
232
+ @@type_reference_new = prevent_non_factory_instantiation_of(SchemaElements::TypeReference)
233
+
234
+ def new_type_with_subfields(schema_kind, name, wrapping_type:, field_factory:)
235
+ @@type_with_subfields_new.call(schema_kind, @state, name, wrapping_type: wrapping_type, field_factory: field_factory) do |type_with_subfields|
236
+ yield type_with_subfields
237
+ end
238
+ end
239
+ @@type_with_subfields_new = prevent_non_factory_instantiation_of(SchemaElements::TypeWithSubfields)
240
+
241
+ def new_union_type(name)
242
+ @@union_type_new.call(@state, name.to_s) do |union_type|
243
+ yield union_type
244
+ end
245
+ end
246
+ @@union_type_new = prevent_non_factory_instantiation_of(SchemaElements::UnionType)
247
+
248
+ def new_field_source(relationship_name:, field_path:)
249
+ @@field_source_new.call(relationship_name, field_path)
250
+ end
251
+ @@field_source_new = prevent_non_factory_instantiation_of(SchemaElements::FieldSource)
252
+
253
+ def new_relationship(field, cardinality:, related_type:, foreign_key:, direction:)
254
+ @@relationship_new.call(
255
+ field,
256
+ cardinality: cardinality,
257
+ related_type: related_type,
258
+ foreign_key: foreign_key,
259
+ direction: direction
260
+ )
261
+ end
262
+ @@relationship_new = prevent_non_factory_instantiation_of(SchemaElements::Relationship)
263
+
264
+ # Responsible for creating a new `*AggregatedValues` type for an index leaf type.
265
+ #
266
+ # An index leaf type is a scalar, enum, object type that is backed by a single, indivisible
267
+ # field in the index. All scalar and enum types are index leaf types, and object types
268
+ # rarely (but sometimes) are. For example, the `GeoLocation` object type has two subfields
269
+ # (`latitude` and `longitude`) but is backed by a single `geo_point` field in the index,
270
+ # so it is an index leaf type.
271
+ def new_aggregated_values_type_for_index_leaf_type(index_leaf_type)
272
+ new_object_type @state.type_ref(index_leaf_type).as_aggregated_values.name do |type|
273
+ type.graphql_only true
274
+ type.documentation "A return type used from aggregations to provided aggregated values over `#{index_leaf_type}` fields."
275
+ type.runtime_metadata_overrides = {elasticgraph_category: :scalar_aggregated_values}
276
+
277
+ type.field @state.schema_elements.approximate_distinct_value_count, "JsonSafeLong", graphql_only: true do |f|
278
+ # Note: the 1-6% accuracy figure comes from the Elasticsearch docs:
279
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-metrics-cardinality-aggregation.html#_counts_are_approximate
280
+ f.documentation <<~EOS
281
+ An approximation of the number of unique values for this field within this grouping.
282
+
283
+ The approximation uses the HyperLogLog++ algorithm from the [HyperLogLog in Practice](https://research.google.com/pubs/archive/40671.pdf)
284
+ paper. The accuracy of the returned value varies based on the specific dataset, but
285
+ it usually differs from the true distinct value count by less than 7%.
286
+ EOS
287
+
288
+ f.runtime_metadata_graphql_field = f.runtime_metadata_graphql_field.with_computation_detail(
289
+ empty_bucket_value: 0,
290
+ function: :cardinality
291
+ )
292
+ end
293
+
294
+ yield type
295
+ end
296
+ end
297
+
298
+ private
299
+
300
+ def new_list_filter_input_type(source_type, name_prefix:, any_satisfy_type_category:)
301
+ any_satisfy = @state.schema_elements.any_satisfy
302
+ all_of = @state.schema_elements.all_of
303
+
304
+ new_filter_input_type "[#{source_type}]", name_prefix: name_prefix, category: :list_filter_input do |t|
305
+ t.field any_satisfy, @state.type_ref(name_prefix).as_static_derived_type(any_satisfy_type_category).name do |f|
306
+ f.documentation <<~EOS
307
+ Matches records where any of the list elements match the provided sub-filter.
308
+
309
+ Will be ignored when `null` or an empty object is passed.
310
+ EOS
311
+ end
312
+
313
+ t.field all_of, "[#{t.name}!]" do |f|
314
+ f.documentation <<~EOS
315
+ Matches records where all of the provided sub-filters evaluate to true. This works just like an AND operator in SQL.
316
+
317
+ Note: multiple filters are automatically ANDed together. This is only needed when you have multiple filters that can't
318
+ be provided on a single `#{t.name}` input because of collisions between key names. For example, if you want to provide
319
+ multiple `#{any_satisfy}: ...` filters, you could do `#{all_of}: [{#{any_satisfy}: ...}, {#{any_satisfy}: ...}]`.
320
+
321
+ Will be ignored when `null` is passed or an empty list is passed.
322
+ EOS
323
+ end
324
+
325
+ define_list_counts_filter_field_on(t)
326
+ end
327
+ end
328
+
329
+ # Generates a filter type used on elements of a list. Referenced from a `#{type}ListFilterInput` input
330
+ # (which is referenced from `any_satisfy`).
331
+ def new_list_element_filter_input_type(source_type, name_prefix:)
332
+ new_filter_input_type source_type, name_prefix: name_prefix, category: :list_element_filter_input do |t|
333
+ t.documentation <<~EOS
334
+ Input type used to specify filters on elements of a `[#{source_type}]` field.
335
+
336
+ Will be ignored if passed as an empty object (or as `null`).
337
+ EOS
338
+
339
+ # While we support `not: {any_satisfy: ...}` we do not support `any_satisfy: {not ...}` at this time.
340
+ # Since `any_satisfy` does not have a node in the datastore query expression, the naive way we'd
341
+ # generate the datastore filter would be the same for both cases. However, they should have different
342
+ # semantics.
343
+ #
344
+ # For example, if we have these documents:
345
+ #
346
+ # - d1: {tags: ["a", "b"]}
347
+ # - d2: {tags: ["b", "c"]}
348
+ # - d3: {tags: []}
349
+ # - d4: {tags: ["a"]}
350
+ #
351
+ # Then `not: {any_satisfy: {equal_to_any_of: ["a"]}}` should (and does) match d2 and d3.
352
+ # But `any_satisfy: {not: {equal_to_any_of: ["a"]}}` should match d1 and d2 (both have a tag that is not equal to "a").
353
+ # However, Elasticsearch and OpenSearch do not allow us to express that.
354
+ #
355
+ # Technically, we could probably get it to work if we implemented negations of all our filter operators.
356
+ # For example, `gt` negated is `lte`, `lt` negated is `gte`, etc. But for some operators that's not easy.
357
+ # There is no available negation of `equal_to_any_of`, but we could maybe get it to work by using a regex
358
+ # operator that matches any term EXCEPT the provided value, but that's non-trivial to implement and could
359
+ # be quite expensive. So for now we just don't support this.
360
+ #
361
+ # ...therefore, we need to omit `not` from the generated filter here.
362
+ t.graphql_fields_by_name.delete(@state.schema_elements.not)
363
+
364
+ yield t
365
+ end
366
+ end
367
+
368
+ # Generates a filter type used for objects within a list (either at a parent or some ancestor level)
369
+ # when the `nested ` type is not used. The datastore indexes each leaf field as its own flattened list
370
+ # of values. We mirror that structure with this filter type, only offering `any_satisfy` on leaf fields.
371
+ def new_fields_list_filter_input_type(source_type_name, name_prefix:)
372
+ source_type = @state.object_types_by_name.fetch(source_type_name)
373
+
374
+ new_filter_input_type source_type_name, name_prefix: name_prefix, category: :fields_list_filter_input do |t|
375
+ t.documentation <<~EOS
376
+ Input type used to specify filters on a `#{source_type_name}` object referenced directly
377
+ or transitively from a list field that has been configured to index each leaf field as
378
+ its own flattened list of values.
379
+
380
+ Will be ignored if passed as an empty object (or as `null`).
381
+ EOS
382
+
383
+ source_type.graphql_fields_by_name.each do |field_name, field|
384
+ next unless field.filterable?
385
+ t.graphql_fields_by_name[field_name] = field.to_filter_field(
386
+ parent_type: t,
387
+ # We are never filtering on single values in this context (since we are already
388
+ # within a list that isn't using the `nested` mapping type).
389
+ for_single_value: false
390
+ )
391
+ end
392
+
393
+ # We want to add a `count` field so that clients can filter on the count of elements of this list field.
394
+ # However, if the object type of this field has a user-defined `count` field then we cannot do that, as that
395
+ # would create a conflict. So we omit it in that case. Users will still be able to filter on the count of
396
+ # the leaf fields if they spell out the full filter path to a leaf field.
397
+ count_field_name = @state.schema_elements.count
398
+ if t.graphql_fields_by_name.key?(count_field_name)
399
+ @state.output.puts <<~EOS
400
+ WARNING: Since a `#{source_type_name}.#{count_field_name}` field exists, ElasticGraph is not able to
401
+ define its typical `#{t.name}.#{count_field_name}` field, which allows clients to filter on the count
402
+ of values for a `[#{source_type.name}]` field. Clients will still be able to filter on the `#{count_field_name}`
403
+ at a leaf field path. However, there are a couple ways this naming conflict can be avoided if desired:
404
+
405
+ 1. Pick a different name for the `#{source_type_name}.#{count_field_name}` field.
406
+ 2. Change the name used by ElasticGraph for this field. To do that, pass a
407
+ `schema_element_name_overrides: {#{count_field_name.inspect} => "alt_name"}` option alongside
408
+ `schema_element_name_form: ...` when defining `ElasticGraph::SchemaDefinition::RakeTasks`
409
+ (typically in the `Rakefile`).
410
+ EOS
411
+ else
412
+ define_list_counts_filter_field_on(t)
413
+ end
414
+ end
415
+ end
416
+
417
+ def define_list_counts_filter_field_on(type)
418
+ # Note: we use `IntFilterInput` (instead of `JsonSafeLongFilterInput` or similar...) to align with the
419
+ # `integer` mapping type we use for the `__counts` field. If we ever change that
420
+ # in `list_counts_mapping.rb`, we'll want to consider changing this as well.
421
+ #
422
+ # We use `name_in_index: __counts` because we need to indicate that it's the list `count` operator
423
+ # rather than a schema field named "counts". Our filter interpreter logic relies on that name.
424
+ # We can count on `__counts` not being used by a real schema field because the GraphQL spec reserves
425
+ # the `__` prefix for its own use.
426
+ type.field @state.schema_elements.count, @state.type_ref("Int").as_filter_input.name, name_in_index: LIST_COUNTS_FIELD do |f|
427
+ f.documentation <<~EOS
428
+ Used to filter on the number of non-null elements in this list field.
429
+
430
+ Will be ignored when `null` or an empty object is passed.
431
+ EOS
432
+ end
433
+ end
434
+
435
+ def edge_type_for(type_name)
436
+ type_ref = @state.type_ref(type_name)
437
+ new_object_type type_ref.as_edge.name do |t|
438
+ t.relay_pagination_type = true
439
+ t.runtime_metadata_overrides = {elasticgraph_category: :relay_edge}
440
+
441
+ t.documentation <<~EOS
442
+ Represents a specific `#{type_name}` in the context of a `#{type_ref.as_connection.name}`,
443
+ providing access to both the `#{type_name}` and a pagination `Cursor`.
444
+
445
+ See the [Relay GraphQL Cursor Connections
446
+ Specification](https://relay.dev/graphql/connections.htm#sec-Edge-Types) for more info.
447
+ EOS
448
+
449
+ t.field @state.schema_elements.node, type_name do |f|
450
+ f.documentation "The `#{type_name}` of this edge."
451
+ end
452
+
453
+ t.field @state.schema_elements.cursor, "Cursor" do |f|
454
+ f.documentation <<~EOS
455
+ The `Cursor` of this `#{type_name}`. This can be passed in the next query as
456
+ a `before` or `after` argument to continue paginating from this `#{type_name}`.
457
+ EOS
458
+ end
459
+ end
460
+ end
461
+
462
+ def connection_type_for(type_name, include_total_edge_count, derived_indexed_types, support_pagination)
463
+ type_ref = @state.type_ref(type_name)
464
+ new_object_type type_ref.as_connection.name do |t|
465
+ t.relay_pagination_type = true
466
+ t.runtime_metadata_overrides = {elasticgraph_category: :relay_connection}
467
+
468
+ if support_pagination
469
+ t.documentation <<~EOS
470
+ Represents a paginated collection of `#{type_name}` results.
471
+
472
+ See the [Relay GraphQL Cursor Connections
473
+ Specification](https://relay.dev/graphql/connections.htm#sec-Connection-Types) for more info.
474
+ EOS
475
+ else
476
+ t.documentation "Represents a collection of `#{type_name}` results."
477
+ end
478
+
479
+ if support_pagination
480
+ t.field @state.schema_elements.edges, "[#{type_ref.as_edge.name}!]!" do |f|
481
+ f.documentation "Wraps a specific `#{type_name}` to pair it with its pagination cursor."
482
+ end
483
+ end
484
+
485
+ t.field @state.schema_elements.nodes, "[#{type_name}!]!" do |f|
486
+ f.documentation "The list of `#{type_name}` results."
487
+ end
488
+
489
+ if support_pagination
490
+ t.field @state.schema_elements.page_info, "PageInfo!" do |f|
491
+ f.documentation "Provides pagination-related information."
492
+ end
493
+ end
494
+
495
+ if include_total_edge_count
496
+ t.field @state.schema_elements.total_edge_count, "JsonSafeLong!" do |f|
497
+ f.documentation "The total number of edges available in this connection to paginate over."
498
+ end
499
+ end
500
+
501
+ yield t if block_given?
502
+ end
503
+ end
504
+ end
505
+ end
506
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/schema_definition/indexing/derived_fields/field_initializer_support"
10
+
11
+ module ElasticGraph
12
+ module SchemaDefinition
13
+ module Indexing
14
+ module DerivedFields
15
+ # Responsible for providing bits of the painless script specific to a {DerivedIndexedType#append_only_set} field.
16
+ #
17
+ # @api private
18
+ class AppendOnlySet < ::Data.define(:destination_field, :source_field)
19
+ # `Data.define` provides the following methods:
20
+ # @dynamic destination_field, source_field
21
+
22
+ # @return [Array<String>] painless functions required by `append_only_set`.
23
+ def function_definitions
24
+ [IDEMPOTENTLY_INSERT_VALUES, IDEMPOTENTLY_INSERT_VALUE]
25
+ end
26
+
27
+ # @return [String] a line of painless code to append a value to the set and return a boolean indicating if the set was updated.
28
+ def apply_operation_returning_update_status
29
+ %{appendOnlySet_idempotentlyInsertValues(data["#{source_field}"], ctx._source.#{destination_field})}
30
+ end
31
+
32
+ # The statements here initialize the field to an empty list if it is null. This primarily happens when the document
33
+ # does not already exist, but can also happen when we add a new derived field to an existing type.
34
+ #
35
+ # @return [Array<String>] a list of painless statements that must be called at the top of the script to set things up.
36
+ def setup_statements
37
+ FieldInitializerSupport.build_empty_value_initializers(destination_field, leaf_value: FieldInitializerSupport::EMPTY_PAINLESS_LIST)
38
+ end
39
+
40
+ private
41
+
42
+ IDEMPOTENTLY_INSERT_VALUES = <<~EOS
43
+ // Wrapper around `idempotentlyInsertValue` that handles a list of values.
44
+ // Returns `true` if the list field was updated.
45
+ boolean appendOnlySet_idempotentlyInsertValues(List values, List sortedList) {
46
+ boolean listUpdated = false;
47
+
48
+ for (def value : values) {
49
+ listUpdated = appendOnlySet_idempotentlyInsertValue(value, sortedList) || listUpdated;
50
+ }
51
+
52
+ return listUpdated;
53
+ }
54
+ EOS
55
+
56
+ IDEMPOTENTLY_INSERT_VALUE = <<~EOS
57
+ // Idempotently inserts the given value in the `sortedList`, returning `true` if the list was updated.
58
+ boolean appendOnlySet_idempotentlyInsertValue(def value, List sortedList) {
59
+ // As per https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Collections.html#binarySearch(java.util.List,java.lang.Object):
60
+ //
61
+ // > Returns the index of the search key, if it is contained in the list; otherwise, (-(insertion point) - 1).
62
+ // > The insertion point is defined as the point at which the key would be inserted into the list: the index
63
+ // > of the first element greater than the key, or list.size() if all elements in the list are less than the
64
+ // > specified key. Note that this guarantees that the return value will be >= 0 if and only if the key is found.
65
+ int binarySearchResult = Collections.binarySearch(sortedList, value);
66
+
67
+ if (binarySearchResult < 0) {
68
+ sortedList.add(-binarySearchResult - 1, value);
69
+ return true;
70
+ } else {
71
+ return false;
72
+ }
73
+ }
74
+ EOS
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,59 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module SchemaDefinition
11
+ module Indexing
12
+ # Contains implementation logic for the different kinds of derived fields.
13
+ #
14
+ # @api private
15
+ module DerivedFields
16
+ # Contains helper logic for field initialization applicable to all types of derived fields.
17
+ #
18
+ # @api private
19
+ module FieldInitializerSupport
20
+ # Painless literal for an empty list, from [the docs](https://www.elastic.co/guide/en/elasticsearch/painless/8.15/painless-operators-reference.html#list-initialization-operator).
21
+ EMPTY_PAINLESS_LIST = "[]"
22
+
23
+ # Painless literal for an empty map, from [the docs](https://www.elastic.co/guide/en/elasticsearch/painless/8.15/painless-operators-reference.html#map-initialization-operator).
24
+ EMPTY_PAINLESS_MAP = "[:]"
25
+
26
+ # @return [Array<String>] a list of painless statements that will initialize a given `destination_field` path to an empty value.
27
+ def self.build_empty_value_initializers(destination_field, leaf_value:)
28
+ snippets = [] # : ::Array[::String]
29
+ path_so_far = [] # : ::Array[::String]
30
+
31
+ destination_field.split(".").each do |path_part|
32
+ path_to_this_part = (path_so_far + [path_part]).join(".")
33
+ is_leaf = path_to_this_part == destination_field
34
+
35
+ unless is_leaf && leaf_value == :leave_unset
36
+ # The empty value of all parent fields must be an empty painless map, but for a leaf field it can be different.
37
+ empty_value = is_leaf ? leaf_value : EMPTY_PAINLESS_MAP
38
+
39
+ snippets << default_source_field_to_empty(path_to_this_part, empty_value.to_s)
40
+ path_so_far << path_part
41
+ end
42
+ end
43
+
44
+ snippets
45
+ end
46
+
47
+ # @return [String] a painless statement that will default a single field to an empty value.
48
+ def self.default_source_field_to_empty(field_path, empty_value)
49
+ <<~EOS.strip
50
+ if (ctx._source.#{field_path} == null) {
51
+ ctx._source.#{field_path} = #{empty_value};
52
+ }
53
+ EOS
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end