elasticgraph-graphql 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +21 -0
  3. data/README.md +3 -0
  4. data/elasticgraph-graphql.gemspec +23 -0
  5. data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
  6. data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
  7. data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
  8. data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
  9. data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
  10. data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
  11. data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
  12. data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
  13. data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
  14. data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
  15. data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
  16. data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
  17. data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
  18. data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
  19. data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
  20. data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
  21. data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
  22. data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
  23. data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
  24. data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
  25. data/lib/elastic_graph/graphql/client.rb +43 -0
  26. data/lib/elastic_graph/graphql/config.rb +81 -0
  27. data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
  28. data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
  29. data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
  30. data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
  31. data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
  32. data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
  33. data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
  34. data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
  35. data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
  36. data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
  37. data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
  38. data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
  39. data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
  40. data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
  41. data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
  42. data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
  43. data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
  44. data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
  45. data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
  46. data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
  47. data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
  48. data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
  49. data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
  50. data/lib/elastic_graph/graphql/query_executor.rb +200 -0
  51. data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
  52. data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
  53. data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
  54. data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
  55. data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
  56. data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
  57. data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
  58. data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
  59. data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
  60. data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
  61. data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
  62. data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
  63. data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
  64. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
  65. data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
  66. data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
  67. data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
  68. data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
  69. data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
  70. data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
  71. data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
  72. data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
  73. data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
  74. data/lib/elastic_graph/graphql/schema/field.rb +147 -0
  75. data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
  76. data/lib/elastic_graph/graphql/schema/type.rb +263 -0
  77. data/lib/elastic_graph/graphql/schema.rb +164 -0
  78. data/lib/elastic_graph/graphql.rb +253 -0
  79. data/script/dump_time_zones +81 -0
  80. data/script/dump_time_zones.java +17 -0
  81. metadata +503 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b19df00bc750f39aa2cbccab58af18c6cb2a2329017e9d0aa8f889dce5b1c377
4
+ data.tar.gz: 8308fce167bd7cf625bb7b82f2f717c174ffa12655b1df77e58e37369b8d6294
5
+ SHA512:
6
+ metadata.gz: c983f82a5da8da47c2f8146337b9425623d1ebfacf0e2f93db706cd53e465725f12cc43fe4e49d2b7885422a428e7744ae61642264941d3d497eb4d4fd3bc9f6
7
+ data.tar.gz: 277958b0fee144f62342f2e3081f4ebbb9a7529456e62a2b5183ad8fe93f2cca7fd5013cbe9bf461fceee46d5f42cea8149d40b624410fbb9498267e78d1dedb
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2024 Block, Inc.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # ElasticGraph::GraphQL
2
+
3
+ Provides the ElasticGraph GraphQL query engine.
@@ -0,0 +1,23 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require_relative "../gemspec_helper"
10
+
11
+ ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
12
+ spec.summary = "The ElasticGraph GraphQL query engine."
13
+
14
+ spec.add_dependency "elasticgraph-datastore_core", eg_version
15
+ spec.add_dependency "elasticgraph-schema_artifacts", eg_version
16
+ spec.add_dependency "graphql", ">= 2.3.7", "< 2.4"
17
+
18
+ spec.add_development_dependency "elasticgraph-admin", eg_version
19
+ spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
20
+ spec.add_development_dependency "elasticgraph-opensearch", eg_version
21
+ spec.add_development_dependency "elasticgraph-indexer", eg_version
22
+ spec.add_development_dependency "elasticgraph-schema_definition", eg_version
23
+ end
@@ -0,0 +1,79 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Aggregation
12
+ # Grouping adapter that uses a `composite` aggregation.
13
+ #
14
+ # For now, only used for the outermost "root" aggregations but may be used for sub-aggregations in the future.
15
+ module CompositeGroupingAdapter
16
+ class << self
17
+ def meta_name
18
+ "comp"
19
+ end
20
+
21
+ def grouping_detail_for(query)
22
+ sources = build_sources(query)
23
+
24
+ inner_clauses = yield
25
+ inner_clauses = nil if inner_clauses.empty?
26
+
27
+ return AggregationDetail.new(inner_clauses, {}) if sources.empty?
28
+
29
+ clauses = {
30
+ query.name => {
31
+ "composite" => {
32
+ "size" => query.paginator.requested_page_size,
33
+ "sources" => sources,
34
+ "after" => composite_after(query)
35
+ }.compact,
36
+ "aggs" => inner_clauses
37
+ }.compact
38
+ }
39
+
40
+ AggregationDetail.new(clauses, {"buckets_path" => [query.name]})
41
+ end
42
+
43
+ def prepare_response_buckets(sub_agg, buckets_path, meta)
44
+ sub_agg.dig(*buckets_path).fetch("buckets").map do |bucket|
45
+ bucket.merge({"doc_count_error_upper_bound" => 0})
46
+ end
47
+ end
48
+
49
+ private
50
+
51
+ def composite_after(query)
52
+ return unless (cursor = query.paginator.search_after)
53
+ expected_keys = query.groupings.map(&:key)
54
+
55
+ if cursor.sort_values.keys.sort == expected_keys.sort
56
+ cursor.sort_values
57
+ else
58
+ raise ::GraphQL::ExecutionError, "`#{cursor.encode}` is not a valid cursor for the current groupings."
59
+ end
60
+ end
61
+
62
+ def build_sources(query)
63
+ # We don't want documents that have no value for a grouping field to be omitted, so we set `missing_bucket: true`.
64
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.11/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
65
+ grouping_options = if query.paginator.search_in_reverse?
66
+ {"order" => "desc", "missing_bucket" => true}
67
+ else
68
+ {"missing_bucket" => true}
69
+ end
70
+
71
+ query.groupings.map do |grouping|
72
+ {grouping.key => grouping.composite_clause(grouping_options: grouping_options)}
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,39 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/key"
10
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ module Aggregation
15
+ # Represents some sort of aggregation computation (min, max, avg, sum, etc) on a field.
16
+ # For the relevant Elasticsearch docs, see:
17
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-avg-aggregation.html
18
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-max-aggregation.html
19
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-min-aggregation.html
20
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-sum-aggregation.html
21
+ Computation = ::Data.define(:source_field_path, :computed_index_field_name, :detail) do
22
+ # @implements Computation
23
+
24
+ def key(aggregation_name:)
25
+ Key::AggregatedValue.new(
26
+ aggregation_name: aggregation_name,
27
+ field_path: source_field_path.map(&:name_in_graphql_query),
28
+ function_name: computed_index_field_name
29
+ ).encode
30
+ end
31
+
32
+ def clause
33
+ encoded_path = FieldPathEncoder.join(source_field_path.filter_map(&:name_in_index))
34
+ {detail.function.to_s => {"field" => encoded_path}}
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,83 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/constants"
10
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
11
+ require "elastic_graph/support/memoizable_data"
12
+
13
+ module ElasticGraph
14
+ class GraphQL
15
+ module Aggregation
16
+ # Represents a grouping of a timestamp field into a date histogram.
17
+ # For the relevant Elasticsearch docs, see:
18
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-datehistogram-aggregation.html
19
+ # https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_date_histogram
20
+ class DateHistogramGrouping < Support::MemoizableData.define(:field_path, :interval, :time_zone, :offset)
21
+ def key
22
+ @key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query))
23
+ end
24
+
25
+ def encoded_index_field_path
26
+ @encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index))
27
+ end
28
+
29
+ def composite_clause(grouping_options: {})
30
+ interval_options = INTERVAL_OPTIONS_BY_NAME.fetch(interval) do
31
+ raise ArgumentError, "#{interval.inspect} is an unsupported interval. Valid values: #{INTERVAL_OPTIONS_BY_NAME.keys.inspect}."
32
+ end
33
+
34
+ inner_hash = interval_options.merge(grouping_options).merge({
35
+ "field" => encoded_index_field_path,
36
+ "format" => DATASTORE_DATE_TIME_FORMAT,
37
+ "offset" => offset,
38
+ "time_zone" => time_zone
39
+ }.compact)
40
+
41
+ {"date_histogram" => inner_hash}
42
+ end
43
+
44
+ def non_composite_clause_for(query)
45
+ # `min_doc_count: 1` is important so we don't have excess buckets when there is a large gap
46
+ # between document dates. For example, if you group on a field at the year truncation unit, and
47
+ # a one-off rogue document has an incorrect timestamp for hundreds of years ago, you'll wind
48
+ # up with a bucket for each intervening year. `min_doc_count: 1` excludes those empty buckets.
49
+ composite_clause(grouping_options: {"min_doc_count" => 1})
50
+ end
51
+
52
+ def inner_meta
53
+ INNER_META
54
+ end
55
+
56
+ INNER_META = {
57
+ # On a date histogram aggregation, the `key` is formatted as a number (milliseconds since epoch). We
58
+ # need it formatted as a string, which `key_as_string` provides.
59
+ "key_path" => ["key_as_string"],
60
+ # Date histogram aggregations do not have any doc count error. Our resolver is generic and expects
61
+ # there to always be a `doc_count_error_upper_bound`. So we want to tell it to merge an error of `0`
62
+ # into each bucket.
63
+ "merge_into_bucket" => {"doc_count_error_upper_bound" => 0}
64
+ }
65
+
66
+ INTERVAL_OPTIONS_BY_NAME = {
67
+ # These intervals have only fixed intervals...
68
+ "millisecond" => {"fixed_interval" => "1ms"},
69
+ "second" => {"fixed_interval" => "1s"},
70
+ # ...but the rest have calendar intervals, which we prefer.
71
+ "minute" => {"calendar_interval" => "minute"},
72
+ "hour" => {"calendar_interval" => "hour"},
73
+ "day" => {"calendar_interval" => "day"},
74
+ "week" => {"calendar_interval" => "week"},
75
+ "month" => {"calendar_interval" => "month"},
76
+ "quarter" => {"calendar_interval" => "quarter"},
77
+ "year" => {"calendar_interval" => "year"}
78
+ }
79
+ private_constant :INTERVAL_OPTIONS_BY_NAME
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,47 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+
11
+ module ElasticGraph
12
+ class GraphQL
13
+ module Aggregation
14
+ module FieldPathEncoder
15
+ # Embedded fields need to be specified with dot separators.
16
+ DELIMITER = "."
17
+
18
+ # Takes a list of field names (e.g., ["amountMoney", "amount"])
19
+ # and returns a single field name path string (e.g., "amountMoney.amount").
20
+ def self.encode(field_names)
21
+ field_names.each do |str|
22
+ verify_delimiters(str)
23
+ end
24
+
25
+ join(field_names)
26
+ end
27
+
28
+ # Joins together a list of encoded paths.
29
+ def self.join(encoded_paths)
30
+ encoded_paths.join(DELIMITER)
31
+ end
32
+
33
+ # Takes a field path (e.g., "amountMoney.amount") and returns the field name parts
34
+ # (["amountMoney", "amount"]).
35
+ def self.decode(field_path)
36
+ field_path.split(DELIMITER)
37
+ end
38
+
39
+ private_class_method def self.verify_delimiters(str)
40
+ if str.to_s.include?(DELIMITER)
41
+ raise InvalidArgumentValueError, %("#{str}" contains delimiter: "#{DELIMITER}")
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,26 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/term_grouping"
10
+
11
+ module ElasticGraph
12
+ class GraphQL
13
+ module Aggregation
14
+ class FieldTermGrouping < Support::MemoizableData.define(:field_path)
15
+ # @dynamic field_path
16
+ include TermGrouping
17
+
18
+ private
19
+
20
+ def terms_subclause
21
+ {"field" => encoded_index_field_path}
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,87 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/error"
10
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
11
+
12
+ module ElasticGraph
13
+ class GraphQL
14
+ module Aggregation
15
+ module Key
16
+ # The datastore only gives us an "aggregation key" (or name) to tie response values back to the part of
17
+ # request it came from. We use this delimiter to encode and decode aggregation keys.
18
+ DELIMITER = ":"
19
+
20
+ # Aggregation key implementation used when we're dealing with `aggregated_values`.
21
+ class AggregatedValue < ::Data.define(
22
+ # The name of the aggregation encoded into this key.
23
+ :aggregation_name,
24
+ # The path to the field used by this aggregation (encoded as a string)
25
+ :encoded_field_path,
26
+ # The name of the aggregation function, such as "sum".
27
+ :function_name
28
+ )
29
+ # We encode the field path as part of initialization to enforce an invariant that all `AggregatedValue`
30
+ # instances have valid values for all attributes. `FieldPathEncoder.encode` will raise an exception if
31
+ # the field path is invalid.
32
+ def initialize(aggregation_name:, function_name:, field_path: [], encoded_field_path: FieldPathEncoder.encode(field_path))
33
+ Key.verify_no_delimiter_in(aggregation_name, function_name, *field_path)
34
+
35
+ super(
36
+ aggregation_name: aggregation_name,
37
+ encoded_field_path: encoded_field_path,
38
+ function_name: function_name
39
+ )
40
+ end
41
+
42
+ def encode
43
+ Key.encode([aggregation_name, encoded_field_path, function_name])
44
+ end
45
+
46
+ def field_path
47
+ FieldPathEncoder.decode(encoded_field_path)
48
+ end
49
+ end
50
+
51
+ # Encodes the key used for a `missing` aggregation used to provide a bucket for
52
+ # documents that are missing a value for the field being grouped on.
53
+ def self.missing_value_bucket_key(base_key)
54
+ Key.encode([base_key, "m"])
55
+ end
56
+
57
+ # Extracts an aggregation name from a string that could either already be an aggregation name, or could
58
+ # be an encoded key. We need this for dealing with the multiple forms that aggregation responses take:
59
+ #
60
+ # - When we use `grouped_by`, we run a composite aggregation that has the aggregation name, and
61
+ # that shows up as a key directly under `aggregations` in the datastore response.
62
+ # - For aggregations with no `grouped_by`, we encode the aggregation name in the key, and the keys
63
+ # directly under `aggregations` in the datastore response will take a from like:
64
+ # `[agg_name]:[field_path]:[function]`.
65
+ #
66
+ # It's also possible for these two forms to be mixed under `aggregations` on a datastore response,
67
+ # where some hash keys are in one form and some are in the other form. This can happen when we run
68
+ # multiple aggregations (some with `grouped_by`, some without) in the same query.
69
+ def self.extract_aggregation_name_from(agg_name_or_key)
70
+ agg_name_or_key.split(DELIMITER, 2).first || agg_name_or_key
71
+ end
72
+
73
+ def self.encode(parts)
74
+ parts.join(DELIMITER)
75
+ end
76
+
77
+ def self.verify_no_delimiter_in(*parts)
78
+ parts.each do |part|
79
+ if part.to_s.include?(DELIMITER)
80
+ raise InvalidArgumentValueError, %("#{part}" contains delimiter: "#{DELIMITER}")
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,37 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql/aggregation/field_path_encoder"
10
+
11
+ module ElasticGraph
12
+ class GraphQL
13
+ module Aggregation
14
+ # Represents a sub-aggregation on a `nested` field.
15
+ # For the relevant Elasticsearch docs, see:
16
+ # https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-bucket-nested-aggregation.html
17
+ class NestedSubAggregation < ::Data.define(:nested_path, :query)
18
+ def build_agg_hash(filter_interpreter, parent_queries:)
19
+ detail = query.build_agg_detail(filter_interpreter, field_path: nested_path, parent_queries: parent_queries)
20
+ return {} if detail.nil?
21
+
22
+ query_names = parent_queries.map(&:name) + [query.name]
23
+ {
24
+ Key.encode(query_names) => {
25
+ "nested" => {"path" => FieldPathEncoder.encode(nested_path.filter_map(&:name_in_index))},
26
+ "aggs" => detail.clauses,
27
+ "meta" => detail.meta.merge({
28
+ "size" => query.paginator.desired_page_size,
29
+ "adapter" => query.grouping_adapter.meta_name
30
+ })
31
+ }.compact
32
+ }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,129 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Aggregation
12
+ # Grouping adapter that avoids using a `composite` aggregation, due to limitations with Elasticsearch/OpenSearch.
13
+ module NonCompositeGroupingAdapter
14
+ class << self
15
+ def meta_name
16
+ "non_comp"
17
+ end
18
+
19
+ def grouping_detail_for(query)
20
+ date_groupings, term_groupings = query.groupings.partition do |grouping|
21
+ grouping.is_a?(DateHistogramGrouping)
22
+ end
23
+
24
+ grouping_detail(date_groupings, query) do
25
+ # We want term groupings inside date groupings so that, when our bucket aggregations might produce
26
+ # inaccurate doc counts, the innermost grouping aggregation has `doc_count_error_upper_bound` on
27
+ # its buckets allowing us to expose information about the accuracy.
28
+ #
29
+ # Date histogram aggregations do not include `doc_count_error_upper_bound` because, on their own, they are
30
+ # always accurate, but they may not be accurate when used as a sub-aggregation of a `terms` aggregation.
31
+ #
32
+ # For more detail on the issue this ordering is designed to avoid, see:
33
+ # https://discuss.elastic.co/t/accuracy-of-date-histogram-sub-aggregation-doc-count-under-terms-aggregation/348685
34
+ grouping_detail(term_groupings, query) do
35
+ inner_clauses = yield
36
+ inner_clauses = nil if inner_clauses.empty?
37
+ AggregationDetail.new(inner_clauses, {})
38
+ end
39
+ end
40
+ end
41
+
42
+ def prepare_response_buckets(sub_agg, buckets_path, meta)
43
+ sort_and_truncate_buckets(format_buckets(sub_agg, buckets_path), meta.fetch("size"))
44
+ end
45
+
46
+ private
47
+
48
+ def grouping_detail(groupings, query)
49
+ # Our `reduce` here builds the date grouping clauses from the inside out (since each reduction step
50
+ # wraps the prior step's result in an outer `aggs` hash). The natural result of that is a nested set of
51
+ # date grouping clauses that "feels" inside-out compared to what you would naturally expect.
52
+ #
53
+ # While that causes no concrete issue, it's nice to avoid. Here we use `reverse` to correct for that.
54
+ groupings.reverse.reduce(yield) do |inner_detail, grouping|
55
+ inner_detail.wrap_with_grouping(grouping, query: query)
56
+ end
57
+ end
58
+
59
+ # Formats the result of a bucket aggregation into a format that we can easily resolve. There are two things
60
+ # this accomplishes:
61
+ #
62
+ # - Converts bucket keys into hashes that can be used to resolve `grouped_by` fields.
63
+ # - Recursively flattens multiple levels of aggregations (which happens when we need to mix multiple kinds of
64
+ # bucket aggregations to group in the way the client requested) into a single flat list.
65
+ def format_buckets(sub_agg, buckets_path, parent_key_fields: {}, parent_key_values: [])
66
+ agg_with_buckets = sub_agg.dig(*buckets_path)
67
+
68
+ missing_bucket = {
69
+ # Doc counts in missing value buckets are always perfectly accurate.
70
+ "doc_count_error_upper_bound" => 0
71
+ }.merge(sub_agg.dig(*missing_bucket_path_from(buckets_path))) # : ::Hash[::String, untyped]
72
+
73
+ meta = agg_with_buckets.fetch("meta")
74
+
75
+ grouping_field_names = meta.fetch("grouping_fields") # provides the names of the fields being grouped on
76
+ key_path = meta.fetch("key_path") # indicates whether we want to get the key values from `key` or `key_as_string`.
77
+ sub_buckets_path = meta["buckets_path"] # buckets_path is optional, so we don't use fetch.
78
+ merge_into_bucket = meta.fetch("merge_into_bucket")
79
+
80
+ raw_buckets = agg_with_buckets.fetch("buckets") # : ::Array[::Hash[::String, untyped]]
81
+
82
+ # If the missing bucket is non-empty, include it. This matches the behavior of composite aggregations
83
+ # when the `missing_bucket` option is used.
84
+ raw_buckets += [missing_bucket] if missing_bucket.fetch("doc_count") > 0
85
+
86
+ raw_buckets.flat_map do |raw_bucket|
87
+ # The key will either be a single value (e.g. `47`) if we used a `terms`/`date_histogram` aggregation,
88
+ # or a tuple of values (e.g. `[47, "abc"]`) if we used a `multi_terms` aggregation. Here we convert it
89
+ # to the form needed for resolving `grouped_by` fields: a hash like `{"size" => 47, "tag" => "abc"}`.
90
+ key_values = Array(raw_bucket.dig(*key_path))
91
+ key_fields_hash = grouping_field_names.zip(key_values).to_h
92
+
93
+ # If we have multiple levels of aggregations, we need to merge the key fields hash with the key fields from the parent levels.
94
+ key_fields = parent_key_fields.merge(key_fields_hash)
95
+ key_values = parent_key_values + key_values
96
+
97
+ # If there's another level of aggregations, `buckets_path` will provide us with the path to that next level.
98
+ # We can use it to recurse as we build a flat list of buckets.
99
+ if sub_buckets_path
100
+ format_buckets(raw_bucket, sub_buckets_path, parent_key_fields: key_fields, parent_key_values: key_values)
101
+ else
102
+ [raw_bucket.merge(merge_into_bucket).merge({"key" => key_fields, "key_values" => key_values})]
103
+ end
104
+ end
105
+ end
106
+
107
+ # A `terms` or `multi_terms` sub-aggregation is automatically sorted by `doc_count` and we pass
108
+ # `size` to the datastore to limit the number of returned buckets.
109
+ #
110
+ # A `date_histogram` sub-aggregation is sorted ascending by the date, and we don't limit the buckets
111
+ # in any way (there's no `size` parameter).
112
+ #
113
+ # To honor the requested page size and return buckets in a consistent order, we sort the buckets here
114
+ # (by doc count descending, then by the key values ascending), and then take only first `size`.
115
+ def sort_and_truncate_buckets(buckets, size)
116
+ buckets
117
+ .sort_by { |b| [-b.fetch("doc_count"), b.fetch("key_values")] }
118
+ .first(size)
119
+ end
120
+
121
+ def missing_bucket_path_from(buckets_path)
122
+ *all_but_last, last = buckets_path
123
+ all_but_last + [Key.missing_value_bucket_key(last.to_s)]
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,31 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ class GraphQL
11
+ module Aggregation
12
+ PathSegment = ::Data.define(
13
+ # The name of this segment's field in the GraphQL query. If it's an aliased field, this
14
+ # will be the alias name.
15
+ :name_in_graphql_query,
16
+ # The name of this segment's field in the datastore index.
17
+ :name_in_index
18
+ ) do
19
+ # Factory method that aids in building a `PathSegment` for a given `field` and `lookahead` node.
20
+ def self.for(lookahead:, field: nil)
21
+ ast_node = lookahead.ast_nodes.first # : ::GraphQL::Language::Nodes::Field
22
+
23
+ new(
24
+ name_in_graphql_query: ast_node.alias || ast_node.name,
25
+ name_in_index: field&.name_in_index&.to_s
26
+ )
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end