elasticgraph-graphql 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +3 -0
  4. data/elasticgraph-graphql.gemspec +23 -0
  5. data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
  6. data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
  7. data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
  8. data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
  9. data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
  10. data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
  11. data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
  12. data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
  13. data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
  14. data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
  15. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
  16. data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
  17. data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
  18. data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
  19. data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
  20. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
  21. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
  22. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
  23. data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
  24. data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
  25. data/lib/elastic_graph/graphql/client.rb +43 -0
  26. data/lib/elastic_graph/graphql/config.rb +81 -0
  27. data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
  28. data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
  29. data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
  30. data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
  31. data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
  32. data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
  33. data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
  34. data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
  35. data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
  36. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
  37. data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
  38. data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
  39. data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
  40. data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
  41. data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
  42. data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
  43. data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
  44. data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
  45. data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
  46. data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
  47. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
  48. data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
  49. data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
  50. data/lib/elastic_graph/graphql/query_executor.rb +200 -0
  51. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
  52. data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
  53. data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
  54. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
  55. data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
  56. data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
  57. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
  58. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
  59. data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
  60. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
  61. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
  62. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
  63. data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
  64. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
  65. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
  66. data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
  67. data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
  68. data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
  69. data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
  70. data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
  71. data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
  72. data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
  73. data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
  74. data/lib/elastic_graph/graphql/schema/field.rb +147 -0
  75. data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
  76. data/lib/elastic_graph/graphql/schema/type.rb +263 -0
  77. data/lib/elastic_graph/graphql/schema.rb +164 -0
  78. data/lib/elastic_graph/graphql.rb +253 -0
  79. data/script/dump_time_zones +81 -0
  80. data/script/dump_time_zones.java +17 -0
  81. metadata +503 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b19df00bc750f39aa2cbccab58af18c6cb2a2329017e9d0aa8f889dce5b1c377
4
+ data.tar.gz: 8308fce167bd7cf625bb7b82f2f717c174ffa12655b1df77e58e37369b8d6294
5
+ SHA512:
6
+ metadata.gz: c983f82a5da8da47c2f8146337b9425623d1ebfacf0e2f93db706cd53e465725f12cc43fe4e49d2b7885422a428e7744ae61642264941d3d497eb4d4fd3bc9f6
7
+ data.tar.gz: 277958b0fee144f62342f2e3081f4ebbb9a7529456e62a2b5183ad8fe93f2cca7fd5013cbe9bf461fceee46d5f42cea8149d40b624410fbb9498267e78d1dedb
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Block, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # ElasticGraph::GraphQL
2
+
3
+ Provides the ElasticGraph GraphQL query engine.
@@ -0,0 +1,23 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require_relative "../gemspec_helper"
10
+
11
+ ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
12
+ spec.summary = "The ElasticGraph GraphQL query engine."
13
+
14
+ spec.add_dependency "elasticgraph-datastore_core", eg_version
15
+ spec.add_dependency "elasticgraph-schema_artifacts", eg_version
16
+ spec.add_dependency "graphql", ">= 2.3.7", "< 2.4"
17
+
18
+ spec.add_development_dependency "elasticgraph-admin", eg_version
19
+ spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
20
+ spec.add_development_dependency "elasticgraph-opensearch", eg_version
21
+ spec.add_development_dependency "elasticgraph-indexer", eg_version
22
+ spec.add_development_dependency "elasticgraph-schema_definition", eg_version
23
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Aggregation
12
+ # Grouping adapter that uses a `composite` aggregation.
13
+ #
14
+ # For now, only used for the outermost "root" aggregations but may be used for sub-aggregations in the future.
15
+ module CompositeGroupingAdapter
16
+ class << self
17
+ def meta_name
18
+ "comp"
19
+ end
20
+
21
+ def grouping_detail_for(query)
22
+ sources = build_sources(query)
23
+
24
+ inner_clauses = yield
25
+ inner_clauses = nil if inner_clauses.empty?
26
+
27
+ return AggregationDetail.new(inner_clauses, {}) if sources.empty?
28
+
29
+ clauses = {
30
+ query.name => {
31
+ "composite" => {
32
+ "size" => query.paginator.requested_page_size,
33
+ "sources" => sources,
34
+ "after" => composite_after(query)
35
+ }.compact,
36
+ "aggs" => inner_clauses
37
+ }.compact
38
+ }
39
+
40
+ AggregationDetail.new(clauses, {"buckets_path" => [query.name]})
41
+ end
42
+
43
+ def prepare_response_buckets(sub_agg, buckets_path, meta)
44
+ sub_agg.dig(*buckets_path).fetch("buckets").map do |bucket|
45
+ bucket.merge({"doc_count_error_upper_bound" => 0})
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def composite_after(query)
52
+ return unless (cursor = query.paginator.search_after)
53
+ expected_keys = query.groupings.map(&:key)
54
+
55
+ if cursor.sort_values.keys.sort == expected_keys.sort
56
+ cursor.sort_values
57
+ else
58
+ raise ::GraphQL::ExecutionError, "`#{cursor.encode}` is not a valid cursor for the current groupings."
59
+ end
60
+ end
61
+
62
+ def build_sources(query)
63
+ # We don't want documents that have no value for a grouping field to be omitted, so we set `missing_bucket: true`.
64
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.11/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
65
+ grouping_options = if query.paginator.search_in_reverse?
66
+ {"order" => "desc", "missing_bucket" => true}
67
+ else
68
+ {"missing_bucket" => true}
69
+ end
70
+
71
+ query.groupings.map do |grouping|
72
+ {grouping.key => grouping.composite_clause(grouping_options: grouping_options)}
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,39 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/key"
10
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ module Aggregation
15
+ # Represents some sort of aggregation computation (min, max, avg, sum, etc) on a field.
16
+ # For the relevant Elasticsearch docs, see:
17
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-avg-aggregation.html
18
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-max-aggregation.html
19
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-min-aggregation.html
20
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-sum-aggregation.html
21
+ Computation = ::Data.define(:source_field_path, :computed_index_field_name, :detail) do
22
+ # @implements Computation
23
+
24
+ def key(aggregation_name:)
25
+ Key::AggregatedValue.new(
26
+ aggregation_name: aggregation_name,
27
+ field_path: source_field_path.map(&:name_in_graphql_query),
28
+ function_name: computed_index_field_name
29
+ ).encode
30
+ end
31
+
32
+ def clause
33
+ encoded_path = FieldPathEncoder.join(source_field_path.filter_map(&:name_in_index))
34
+ {detail.function.to_s => {"field" => encoded_path}}
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,83 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
11
+ require "elastic_graph/support/memoizable_data"
12
+
13
+ module ElasticGraph
14
+ class GraphQL
15
+ module Aggregation
16
+ # Represents a grouping of a timestamp field into a date histogram.
17
+ # For the relevant Elasticsearch docs, see:
18
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-datehistogram-aggregation.html
19
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_date_histogram
20
+ class DateHistogramGrouping < Support::MemoizableData.define(:field_path, :interval, :time_zone, :offset)
21
+ def key
22
+ @key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query))
23
+ end
24
+
25
+ def encoded_index_field_path
26
+ @encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index))
27
+ end
28
+
29
+ def composite_clause(grouping_options: {})
30
+ interval_options = INTERVAL_OPTIONS_BY_NAME.fetch(interval) do
31
+ raise ArgumentError, "#{interval.inspect} is an unsupported interval. Valid values: #{INTERVAL_OPTIONS_BY_NAME.keys.inspect}."
32
+ end
33
+
34
+ inner_hash = interval_options.merge(grouping_options).merge({
35
+ "field" => encoded_index_field_path,
36
+ "format" => DATASTORE_DATE_TIME_FORMAT,
37
+ "offset" => offset,
38
+ "time_zone" => time_zone
39
+ }.compact)
40
+
41
+ {"date_histogram" => inner_hash}
42
+ end
43
+
44
+ def non_composite_clause_for(query)
45
+ # `min_doc_count: 1` is important so we don't have excess buckets when there is a large gap
46
+ # between document dates. For example, if you group on a field at the year truncation unit, and
47
+ # a one-off rogue document has an incorrect timestamp for hundreds of years ago, you'll wind
48
+ # up with a bucket for each intervening year. `min_doc_count: 1` excludes those empty buckets.
49
+ composite_clause(grouping_options: {"min_doc_count" => 1})
50
+ end
51
+
52
+ def inner_meta
53
+ INNER_META
54
+ end
55
+
56
+ INNER_META = {
57
+ # On a date histogram aggregation, the `key` is formatted as a number (milliseconds since epoch). We
58
+ # need it formatted as a string, which `key_as_string` provides.
59
+ "key_path" => ["key_as_string"],
60
+ # Date histogram aggregations do not have any doc count error. Our resolver is generic and expects
61
+ # there to always be a `doc_count_error_upper_bound`. So we want to tell it to merge an error of `0`
62
+ # into each bucket.
63
+ "merge_into_bucket" => {"doc_count_error_upper_bound" => 0}
64
+ }
65
+
66
+ INTERVAL_OPTIONS_BY_NAME = {
67
+ # These intervals have only fixed intervals...
68
+ "millisecond" => {"fixed_interval" => "1ms"},
69
+ "second" => {"fixed_interval" => "1s"},
70
+ # ...but the rest have calendar intervals, which we prefer.
71
+ "minute" => {"calendar_interval" => "minute"},
72
+ "hour" => {"calendar_interval" => "hour"},
73
+ "day" => {"calendar_interval" => "day"},
74
+ "week" => {"calendar_interval" => "week"},
75
+ "month" => {"calendar_interval" => "month"},
76
+ "quarter" => {"calendar_interval" => "quarter"},
77
+ "year" => {"calendar_interval" => "year"}
78
+ }
79
+ private_constant :INTERVAL_OPTIONS_BY_NAME
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,47 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+
11
+ module ElasticGraph
12
+ class GraphQL
13
+ module Aggregation
14
+ module FieldPathEncoder
15
+ # Embedded fields need to be specified with dot separators.
16
+ DELIMITER = "."
17
+
18
+ # Takes a list of field names (e.g., ["amountMoney", "amount"])
19
+ # and returns a single field name path string (e.g., "amountMoney.amount").
20
+ def self.encode(field_names)
21
+ field_names.each do |str|
22
+ verify_delimiters(str)
23
+ end
24
+
25
+ join(field_names)
26
+ end
27
+
28
+ # Joins together a list of encoded paths.
29
+ def self.join(encoded_paths)
30
+ encoded_paths.join(DELIMITER)
31
+ end
32
+
33
+ # Takes a field path (e.g., "amountMoney.amount") and returns the field name parts
34
+ # (["amountMoney", "amount"]).
35
+ def self.decode(field_path)
36
+ field_path.split(DELIMITER)
37
+ end
38
+
39
+ private_class_method def self.verify_delimiters(str)
40
+ if str.to_s.include?(DELIMITER)
41
+ raise InvalidArgumentValueError, %("#{str}" contains delimiter: "#{DELIMITER}")
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,26 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/term_grouping"
10
+
11
+ module ElasticGraph
12
+ class GraphQL
13
+ module Aggregation
14
+ class FieldTermGrouping < Support::MemoizableData.define(:field_path)
15
+ # @dynamic field_path
16
+ include TermGrouping
17
+
18
+ private
19
+
20
+ def terms_subclause
21
+ {"field" => encoded_index_field_path}
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,87 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ module Aggregation
15
+ module Key
16
+ # The datastore only gives us an "aggregation key" (or name) to tie response values back to the part of
17
+ # request it came from. We use this delimiter to encode and decode aggregation keys.
18
+ DELIMITER = ":"
19
+
20
+ # Aggregation key implementation used when we're dealing with `aggregated_values`.
21
+ class AggregatedValue < ::Data.define(
22
+ # The name of the aggregation encoded into this key.
23
+ :aggregation_name,
24
+ # The path to the field used by this aggregation (encoded as a string)
25
+ :encoded_field_path,
26
+ # The name of the aggregation function, such as "sum".
27
+ :function_name
28
+ )
29
+ # We encode the field path as part of initialization to enforce an invariant that all `AggregatedValue`
30
+ # instances have valid values for all attributes. `FieldPathEncoder.encode` will raise an exception if
31
+ # the field path is invalid.
32
+ def initialize(aggregation_name:, function_name:, field_path: [], encoded_field_path: FieldPathEncoder.encode(field_path))
33
+ Key.verify_no_delimiter_in(aggregation_name, function_name, *field_path)
34
+
35
+ super(
36
+ aggregation_name: aggregation_name,
37
+ encoded_field_path: encoded_field_path,
38
+ function_name: function_name
39
+ )
40
+ end
41
+
42
+ def encode
43
+ Key.encode([aggregation_name, encoded_field_path, function_name])
44
+ end
45
+
46
+ def field_path
47
+ FieldPathEncoder.decode(encoded_field_path)
48
+ end
49
+ end
50
+
51
+ # Encodes the key used for a `missing` aggregation used to provide a bucket for
52
+ # documents that are missing a value for the field being grouped on.
53
+ def self.missing_value_bucket_key(base_key)
54
+ Key.encode([base_key, "m"])
55
+ end
56
+
57
+ # Extracts an aggregation name from a string that could either already be an aggregation name, or could
58
+ # be an encoded key. We need this for dealing with the multiple forms that aggregation responses take:
59
+ #
60
+ # - When we use `grouped_by`, we run a composite aggregation that has the aggregation name, and
61
+ # that shows up as a key directly under `aggregations` in the datastore response.
62
+ # - For aggregations with no `grouped_by`, we encode the aggregation name in the key, and the keys
63
+ # directly under `aggregations` in the datastore response will take a from like:
64
+ # `[agg_name]:[field_path]:[function]`.
65
+ #
66
+ # It's also possible for these two forms to be mixed under `aggregations` on a datastore response,
67
+ # where some hash keys are in one form and some are in the other form. This can happen when we run
68
+ # multiple aggregations (some with `grouped_by`, some without) in the same query.
69
+ def self.extract_aggregation_name_from(agg_name_or_key)
70
+ agg_name_or_key.split(DELIMITER, 2).first || agg_name_or_key
71
+ end
72
+
73
+ def self.encode(parts)
74
+ parts.join(DELIMITER)
75
+ end
76
+
77
+ def self.verify_no_delimiter_in(*parts)
78
+ parts.each do |part|
79
+ if part.to_s.include?(DELIMITER)
80
+ raise InvalidArgumentValueError, %("#{part}" contains delimiter: "#{DELIMITER}")
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,37 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
10
+
11
+ module ElasticGraph
12
+ class GraphQL
13
+ module Aggregation
14
+ # Represents a sub-aggregation on a `nested` field.
15
+ # For the relevant Elasticsearch docs, see:
16
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-bucket-nested-aggregation.html
17
+ class NestedSubAggregation < ::Data.define(:nested_path, :query)
18
+ def build_agg_hash(filter_interpreter, parent_queries:)
19
+ detail = query.build_agg_detail(filter_interpreter, field_path: nested_path, parent_queries: parent_queries)
20
+ return {} if detail.nil?
21
+
22
+ query_names = parent_queries.map(&:name) + [query.name]
23
+ {
24
+ Key.encode(query_names) => {
25
+ "nested" => {"path" => FieldPathEncoder.encode(nested_path.filter_map(&:name_in_index))},
26
+ "aggs" => detail.clauses,
27
+ "meta" => detail.meta.merge({
28
+ "size" => query.paginator.desired_page_size,
29
+ "adapter" => query.grouping_adapter.meta_name
30
+ })
31
+ }.compact
32
+ }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,129 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Aggregation
12
+ # Grouping adapter that avoids using a `composite` aggregation, due to limitations with Elasticsearch/OpenSearch.
13
+ module NonCompositeGroupingAdapter
14
+ class << self
15
+ def meta_name
16
+ "non_comp"
17
+ end
18
+
19
+ def grouping_detail_for(query)
20
+ date_groupings, term_groupings = query.groupings.partition do |grouping|
21
+ grouping.is_a?(DateHistogramGrouping)
22
+ end
23
+
24
+ grouping_detail(date_groupings, query) do
25
+ # We want term groupings inside date groupings so that, when our bucket aggregations might produce
26
+ # inaccurate doc counts, the innermost grouping aggregation has `doc_count_error_upper_bound` on
27
+ # its buckets allowing us to expose information about the accuracy.
28
+ #
29
+ # Date histogram aggregations do not include `doc_count_error_upper_bound` because, on their own, they are
30
+ # always accurate, but they may not be accurate when used as a sub-aggregation of a `terms` aggregation.
31
+ #
32
+ # For more detail on the issue this ordering is designed to avoid, see:
33
+ # https://discuss.elastic.co/t/accuracy-of-date-histogram-sub-aggregation-doc-count-under-terms-aggregation/348685
34
+ grouping_detail(term_groupings, query) do
35
+ inner_clauses = yield
36
+ inner_clauses = nil if inner_clauses.empty?
37
+ AggregationDetail.new(inner_clauses, {})
38
+ end
39
+ end
40
+ end
41
+
42
+ def prepare_response_buckets(sub_agg, buckets_path, meta)
43
+ sort_and_truncate_buckets(format_buckets(sub_agg, buckets_path), meta.fetch("size"))
44
+ end
45
+
46
+ private
47
+
48
+ def grouping_detail(groupings, query)
49
+ # Our `reduce` here builds the date grouping clauses from the inside out (since each reduction step
50
+ # wraps the prior step's result in an outer `aggs` hash). The natural result of that is a nested set of
51
+ # date grouping clauses that "feels" inside-out compared to what you would naturally expect.
52
+ #
53
+ # While that causes no concrete issue, it's nice to avoid. Here we use `reverse` to correct for that.
54
+ groupings.reverse.reduce(yield) do |inner_detail, grouping|
55
+ inner_detail.wrap_with_grouping(grouping, query: query)
56
+ end
57
+ end
58
+
59
+ # Formats the result of a bucket aggregation into a format that we can easily resolve. There are two things
60
+ # this accomplishes:
61
+ #
62
+ # - Converts bucket keys into hashes that can be used to resolve `grouped_by` fields.
63
+ # - Recursively flattens multiple levels of aggregations (which happens when we need to mix multiple kinds of
64
+ # bucket aggregations to group in the way the client requested) into a single flat list.
65
+ def format_buckets(sub_agg, buckets_path, parent_key_fields: {}, parent_key_values: [])
66
+ agg_with_buckets = sub_agg.dig(*buckets_path)
67
+
68
+ missing_bucket = {
69
+ # Doc counts in missing value buckets are always perfectly accurate.
70
+ "doc_count_error_upper_bound" => 0
71
+ }.merge(sub_agg.dig(*missing_bucket_path_from(buckets_path))) # : ::Hash[::String, untyped]
72
+
73
+ meta = agg_with_buckets.fetch("meta")
74
+
75
+ grouping_field_names = meta.fetch("grouping_fields") # provides the names of the fields being grouped on
76
+ key_path = meta.fetch("key_path") # indicates whether we want to get the key values from `key` or `key_as_string`.
77
+ sub_buckets_path = meta["buckets_path"] # buckets_path is optional, so we don't use fetch.
78
+ merge_into_bucket = meta.fetch("merge_into_bucket")
79
+
80
+ raw_buckets = agg_with_buckets.fetch("buckets") # : ::Array[::Hash[::String, untyped]]
81
+
82
+ # If the missing bucket is non-empty, include it. This matches the behavior of composite aggregations
83
+ # when the `missing_bucket` option is used.
84
+ raw_buckets += [missing_bucket] if missing_bucket.fetch("doc_count") > 0
85
+
86
+ raw_buckets.flat_map do |raw_bucket|
87
+ # The key will either be a single value (e.g. `47`) if we used a `terms`/`date_histogram` aggregation,
88
+ # or a tuple of values (e.g. `[47, "abc"]`) if we used a `multi_terms` aggregation. Here we convert it
89
+ # to the form needed for resolving `grouped_by` fields: a hash like `{"size" => 47, "tag" => "abc"}`.
90
+ key_values = Array(raw_bucket.dig(*key_path))
91
+ key_fields_hash = grouping_field_names.zip(key_values).to_h
92
+
93
+ # If we have multiple levels of aggregations, we need to merge the key fields hash with the key fields from the parent levels.
94
+ key_fields = parent_key_fields.merge(key_fields_hash)
95
+ key_values = parent_key_values + key_values
96
+
97
+ # If there's another level of aggregations, `buckets_path` will provide us with the path to that next level.
98
+ # We can use it to recurse as we build a flat list of buckets.
99
+ if sub_buckets_path
100
+ format_buckets(raw_bucket, sub_buckets_path, parent_key_fields: key_fields, parent_key_values: key_values)
101
+ else
102
+ [raw_bucket.merge(merge_into_bucket).merge({"key" => key_fields, "key_values" => key_values})]
103
+ end
104
+ end
105
+ end
106
+
107
+ # A `terms` or `multi_terms` sub-aggregation is automatically sorted by `doc_count` and we pass
108
+ # `size` to the datastore to limit the number of returned buckets.
109
+ #
110
+ # A `date_histogram` sub-aggregation is sorted ascending by the date, and we don't limit the buckets
111
+ # in any way (there's no `size` parameter).
112
+ #
113
+ # To honor the requested page size and return buckets in a consistent order, we sort the buckets here
114
+ # (by doc count descending, then by the key values ascending), and then take only first `size`.
115
+ def sort_and_truncate_buckets(buckets, size)
116
+ buckets
117
+ .sort_by { |b| [-b.fetch("doc_count"), b.fetch("key_values")] }
118
+ .first(size)
119
+ end
120
+
121
+ def missing_bucket_path_from(buckets_path)
122
+ *all_but_last, last = buckets_path
123
+ all_but_last + [Key.missing_value_bucket_key(last.to_s)]
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,31 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Aggregation
12
+ PathSegment = ::Data.define(
13
+ # The name of this segment's field in the GraphQL query. If it's an aliased field, this
14
+ # will be the alias name.
15
+ :name_in_graphql_query,
16
+ # The name of this segment's field in the datastore index.
17
+ :name_in_index
18
+ ) do
19
+ # Factory method that aids in building a `PathSegment` for a given `field` and `lookahead` node.
20
+ def self.for(lookahead:, field: nil)
21
+ ast_node = lookahead.ast_nodes.first # : ::GraphQL::Language::Nodes::Field
22
+
23
+ new(
24
+ name_in_graphql_query: ast_node.alias || ast_node.name,
25
+ name_in_index: field&.name_in_index&.to_s
26
+ )
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end