elasticgraph-graphql 0.18.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b19df00bc750f39aa2cbccab58af18c6cb2a2329017e9d0aa8f889dce5b1c377
|
|
4
|
+
data.tar.gz: 8308fce167bd7cf625bb7b82f2f717c174ffa12655b1df77e58e37369b8d6294
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: c983f82a5da8da47c2f8146337b9425623d1ebfacf0e2f93db706cd53e465725f12cc43fe4e49d2b7885422a428e7744ae61642264941d3d497eb4d4fd3bc9f6
|
|
7
|
+
data.tar.gz: 277958b0fee144f62342f2e3081f4ebbb9a7529456e62a2b5183ad8fe93f2cca7fd5013cbe9bf461fceee46d5f42cea8149d40b624410fbb9498267e78d1dedb
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Block, Inc.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require_relative "../gemspec_helper"
|
|
10
|
+
|
|
11
|
+
ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
|
|
12
|
+
spec.summary = "The ElasticGraph GraphQL query engine."
|
|
13
|
+
|
|
14
|
+
spec.add_dependency "elasticgraph-datastore_core", eg_version
|
|
15
|
+
spec.add_dependency "elasticgraph-schema_artifacts", eg_version
|
|
16
|
+
spec.add_dependency "graphql", ">= 2.3.7", "< 2.4"
|
|
17
|
+
|
|
18
|
+
spec.add_development_dependency "elasticgraph-admin", eg_version
|
|
19
|
+
spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
|
|
20
|
+
spec.add_development_dependency "elasticgraph-opensearch", eg_version
|
|
21
|
+
spec.add_development_dependency "elasticgraph-indexer", eg_version
|
|
22
|
+
spec.add_development_dependency "elasticgraph-schema_definition", eg_version
|
|
23
|
+
end
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
module ElasticGraph
|
|
10
|
+
class GraphQL
|
|
11
|
+
module Aggregation
|
|
12
|
+
# Grouping adapter that uses a `composite` aggregation.
|
|
13
|
+
#
|
|
14
|
+
# For now, only used for the outermost "root" aggregations but may be used for sub-aggregations in the future.
|
|
15
|
+
module CompositeGroupingAdapter
|
|
16
|
+
class << self
|
|
17
|
+
def meta_name
|
|
18
|
+
"comp"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def grouping_detail_for(query)
|
|
22
|
+
sources = build_sources(query)
|
|
23
|
+
|
|
24
|
+
inner_clauses = yield
|
|
25
|
+
inner_clauses = nil if inner_clauses.empty?
|
|
26
|
+
|
|
27
|
+
return AggregationDetail.new(inner_clauses, {}) if sources.empty?
|
|
28
|
+
|
|
29
|
+
clauses = {
|
|
30
|
+
query.name => {
|
|
31
|
+
"composite" => {
|
|
32
|
+
"size" => query.paginator.requested_page_size,
|
|
33
|
+
"sources" => sources,
|
|
34
|
+
"after" => composite_after(query)
|
|
35
|
+
}.compact,
|
|
36
|
+
"aggs" => inner_clauses
|
|
37
|
+
}.compact
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
AggregationDetail.new(clauses, {"buckets_path" => [query.name]})
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def prepare_response_buckets(sub_agg, buckets_path, meta)
|
|
44
|
+
sub_agg.dig(*buckets_path).fetch("buckets").map do |bucket|
|
|
45
|
+
bucket.merge({"doc_count_error_upper_bound" => 0})
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def composite_after(query)
|
|
52
|
+
return unless (cursor = query.paginator.search_after)
|
|
53
|
+
expected_keys = query.groupings.map(&:key)
|
|
54
|
+
|
|
55
|
+
if cursor.sort_values.keys.sort == expected_keys.sort
|
|
56
|
+
cursor.sort_values
|
|
57
|
+
else
|
|
58
|
+
raise ::GraphQL::ExecutionError, "`#{cursor.encode}` is not a valid cursor for the current groupings."
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def build_sources(query)
|
|
63
|
+
# We don't want documents that have no value for a grouping field to be omitted, so we set `missing_bucket: true`.
|
|
64
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.11/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
|
|
65
|
+
grouping_options = if query.paginator.search_in_reverse?
|
|
66
|
+
{"order" => "desc", "missing_bucket" => true}
|
|
67
|
+
else
|
|
68
|
+
{"missing_bucket" => true}
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
query.groupings.map do |grouping|
|
|
72
|
+
{grouping.key => grouping.composite_clause(grouping_options: grouping_options)}
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/graphql/aggregation/key"
|
|
10
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
|
11
|
+
|
|
12
|
+
module ElasticGraph
|
|
13
|
+
class GraphQL
|
|
14
|
+
module Aggregation
|
|
15
|
+
# Represents some sort of aggregation computation (min, max, avg, sum, etc) on a field.
|
|
16
|
+
# For the relevant Elasticsearch docs, see:
|
|
17
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-avg-aggregation.html
|
|
18
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-max-aggregation.html
|
|
19
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-min-aggregation.html
|
|
20
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-sum-aggregation.html
|
|
21
|
+
Computation = ::Data.define(:source_field_path, :computed_index_field_name, :detail) do
|
|
22
|
+
# @implements Computation
|
|
23
|
+
|
|
24
|
+
def key(aggregation_name:)
|
|
25
|
+
Key::AggregatedValue.new(
|
|
26
|
+
aggregation_name: aggregation_name,
|
|
27
|
+
field_path: source_field_path.map(&:name_in_graphql_query),
|
|
28
|
+
function_name: computed_index_field_name
|
|
29
|
+
).encode
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def clause
|
|
33
|
+
encoded_path = FieldPathEncoder.join(source_field_path.filter_map(&:name_in_index))
|
|
34
|
+
{detail.function.to_s => {"field" => encoded_path}}
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/constants"
|
|
10
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
|
11
|
+
require "elastic_graph/support/memoizable_data"
|
|
12
|
+
|
|
13
|
+
module ElasticGraph
|
|
14
|
+
class GraphQL
|
|
15
|
+
module Aggregation
|
|
16
|
+
# Represents a grouping of a timestamp field into a date histogram.
|
|
17
|
+
# For the relevant Elasticsearch docs, see:
|
|
18
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-datehistogram-aggregation.html
|
|
19
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_date_histogram
|
|
20
|
+
class DateHistogramGrouping < Support::MemoizableData.define(:field_path, :interval, :time_zone, :offset)
|
|
21
|
+
def key
|
|
22
|
+
@key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query))
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def encoded_index_field_path
|
|
26
|
+
@encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index))
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def composite_clause(grouping_options: {})
|
|
30
|
+
interval_options = INTERVAL_OPTIONS_BY_NAME.fetch(interval) do
|
|
31
|
+
raise ArgumentError, "#{interval.inspect} is an unsupported interval. Valid values: #{INTERVAL_OPTIONS_BY_NAME.keys.inspect}."
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
inner_hash = interval_options.merge(grouping_options).merge({
|
|
35
|
+
"field" => encoded_index_field_path,
|
|
36
|
+
"format" => DATASTORE_DATE_TIME_FORMAT,
|
|
37
|
+
"offset" => offset,
|
|
38
|
+
"time_zone" => time_zone
|
|
39
|
+
}.compact)
|
|
40
|
+
|
|
41
|
+
{"date_histogram" => inner_hash}
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def non_composite_clause_for(query)
|
|
45
|
+
# `min_doc_count: 1` is important so we don't have excess buckets when there is a large gap
|
|
46
|
+
# between document dates. For example, if you group on a field at the year truncation unit, and
|
|
47
|
+
# a one-off rogue document has an incorrect timestamp for hundreds of years ago, you'll wind
|
|
48
|
+
# up with a bucket for each intervening year. `min_doc_count: 1` excludes those empty buckets.
|
|
49
|
+
composite_clause(grouping_options: {"min_doc_count" => 1})
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def inner_meta
|
|
53
|
+
INNER_META
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
INNER_META = {
|
|
57
|
+
# On a date histogram aggregation, the `key` is formatted as a number (milliseconds since epoch). We
|
|
58
|
+
# need it formatted as a string, which `key_as_string` provides.
|
|
59
|
+
"key_path" => ["key_as_string"],
|
|
60
|
+
# Date histogram aggregations do not have any doc count error. Our resolver is generic and expects
|
|
61
|
+
# there to always be a `doc_count_error_upper_bound`. So we want to tell it to merge an error of `0`
|
|
62
|
+
# into each bucket.
|
|
63
|
+
"merge_into_bucket" => {"doc_count_error_upper_bound" => 0}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
INTERVAL_OPTIONS_BY_NAME = {
|
|
67
|
+
# These intervals have only fixed intervals...
|
|
68
|
+
"millisecond" => {"fixed_interval" => "1ms"},
|
|
69
|
+
"second" => {"fixed_interval" => "1s"},
|
|
70
|
+
# ...but the rest have calendar intervals, which we prefer.
|
|
71
|
+
"minute" => {"calendar_interval" => "minute"},
|
|
72
|
+
"hour" => {"calendar_interval" => "hour"},
|
|
73
|
+
"day" => {"calendar_interval" => "day"},
|
|
74
|
+
"week" => {"calendar_interval" => "week"},
|
|
75
|
+
"month" => {"calendar_interval" => "month"},
|
|
76
|
+
"quarter" => {"calendar_interval" => "quarter"},
|
|
77
|
+
"year" => {"calendar_interval" => "year"}
|
|
78
|
+
}
|
|
79
|
+
private_constant :INTERVAL_OPTIONS_BY_NAME
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/error"
|
|
10
|
+
|
|
11
|
+
module ElasticGraph
|
|
12
|
+
class GraphQL
|
|
13
|
+
module Aggregation
|
|
14
|
+
module FieldPathEncoder
|
|
15
|
+
# Embedded fields need to be specified with dot separators.
|
|
16
|
+
DELIMITER = "."
|
|
17
|
+
|
|
18
|
+
# Takes a list of field names (e.g., ["amountMoney", "amount"])
|
|
19
|
+
# and returns a single field name path string (e.g., "amountMoney.amount").
|
|
20
|
+
def self.encode(field_names)
|
|
21
|
+
field_names.each do |str|
|
|
22
|
+
verify_delimiters(str)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
join(field_names)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Joins together a list of encoded paths.
|
|
29
|
+
def self.join(encoded_paths)
|
|
30
|
+
encoded_paths.join(DELIMITER)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Takes a field path (e.g., "amountMoney.amount") and returns the field name parts
|
|
34
|
+
# (["amountMoney", "amount"]).
|
|
35
|
+
def self.decode(field_path)
|
|
36
|
+
field_path.split(DELIMITER)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private_class_method def self.verify_delimiters(str)
|
|
40
|
+
if str.to_s.include?(DELIMITER)
|
|
41
|
+
raise InvalidArgumentValueError, %("#{str}" contains delimiter: "#{DELIMITER}")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/graphql/aggregation/term_grouping"
|
|
10
|
+
|
|
11
|
+
module ElasticGraph
|
|
12
|
+
class GraphQL
|
|
13
|
+
module Aggregation
|
|
14
|
+
class FieldTermGrouping < Support::MemoizableData.define(:field_path)
|
|
15
|
+
# @dynamic field_path
|
|
16
|
+
include TermGrouping
|
|
17
|
+
|
|
18
|
+
private
|
|
19
|
+
|
|
20
|
+
def terms_subclause
|
|
21
|
+
{"field" => encoded_index_field_path}
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/error"
|
|
10
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
|
11
|
+
|
|
12
|
+
module ElasticGraph
|
|
13
|
+
class GraphQL
|
|
14
|
+
module Aggregation
|
|
15
|
+
module Key
|
|
16
|
+
# The datastore only gives us an "aggregation key" (or name) to tie response values back to the part of
|
|
17
|
+
# request it came from. We use this delimiter to encode and decode aggregation keys.
|
|
18
|
+
DELIMITER = ":"
|
|
19
|
+
|
|
20
|
+
# Aggregation key implementation used when we're dealing with `aggregated_values`.
|
|
21
|
+
class AggregatedValue < ::Data.define(
|
|
22
|
+
# The name of the aggregation encoded into this key.
|
|
23
|
+
:aggregation_name,
|
|
24
|
+
# The path to the field used by this aggregation (encoded as a string)
|
|
25
|
+
:encoded_field_path,
|
|
26
|
+
# The name of the aggregation function, such as "sum".
|
|
27
|
+
:function_name
|
|
28
|
+
)
|
|
29
|
+
# We encode the field path as part of initialization to enforce an invariant that all `AggregatedValue`
|
|
30
|
+
# instances have valid values for all attributes. `FieldPathEncoder.encode` will raise an exception if
|
|
31
|
+
# the field path is invalid.
|
|
32
|
+
def initialize(aggregation_name:, function_name:, field_path: [], encoded_field_path: FieldPathEncoder.encode(field_path))
|
|
33
|
+
Key.verify_no_delimiter_in(aggregation_name, function_name, *field_path)
|
|
34
|
+
|
|
35
|
+
super(
|
|
36
|
+
aggregation_name: aggregation_name,
|
|
37
|
+
encoded_field_path: encoded_field_path,
|
|
38
|
+
function_name: function_name
|
|
39
|
+
)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def encode
|
|
43
|
+
Key.encode([aggregation_name, encoded_field_path, function_name])
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def field_path
|
|
47
|
+
FieldPathEncoder.decode(encoded_field_path)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Encodes the key used for a `missing` aggregation used to provide a bucket for
|
|
52
|
+
# documents that are missing a value for the field being grouped on.
|
|
53
|
+
def self.missing_value_bucket_key(base_key)
|
|
54
|
+
Key.encode([base_key, "m"])
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Extracts an aggregation name from a string that could either already be an aggregation name, or could
|
|
58
|
+
# be an encoded key. We need this for dealing with the multiple forms that aggregation responses take:
|
|
59
|
+
#
|
|
60
|
+
# - When we use `grouped_by`, we run a composite aggregation that has the aggregation name, and
|
|
61
|
+
# that shows up as a key directly under `aggregations` in the datastore response.
|
|
62
|
+
# - For aggregations with no `grouped_by`, we encode the aggregation name in the key, and the keys
|
|
63
|
+
# directly under `aggregations` in the datastore response will take a from like:
|
|
64
|
+
# `[agg_name]:[field_path]:[function]`.
|
|
65
|
+
#
|
|
66
|
+
# It's also possible for these two forms to be mixed under `aggregations` on a datastore response,
|
|
67
|
+
# where some hash keys are in one form and some are in the other form. This can happen when we run
|
|
68
|
+
# multiple aggregations (some with `grouped_by`, some without) in the same query.
|
|
69
|
+
def self.extract_aggregation_name_from(agg_name_or_key)
|
|
70
|
+
agg_name_or_key.split(DELIMITER, 2).first || agg_name_or_key
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def self.encode(parts)
|
|
74
|
+
parts.join(DELIMITER)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def self.verify_no_delimiter_in(*parts)
|
|
78
|
+
parts.each do |part|
|
|
79
|
+
if part.to_s.include?(DELIMITER)
|
|
80
|
+
raise InvalidArgumentValueError, %("#{part}" contains delimiter: "#{DELIMITER}")
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
|
10
|
+
|
|
11
|
+
module ElasticGraph
|
|
12
|
+
class GraphQL
|
|
13
|
+
module Aggregation
|
|
14
|
+
# Represents a sub-aggregation on a `nested` field.
|
|
15
|
+
# For the relevant Elasticsearch docs, see:
|
|
16
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-bucket-nested-aggregation.html
|
|
17
|
+
class NestedSubAggregation < ::Data.define(:nested_path, :query)
|
|
18
|
+
def build_agg_hash(filter_interpreter, parent_queries:)
|
|
19
|
+
detail = query.build_agg_detail(filter_interpreter, field_path: nested_path, parent_queries: parent_queries)
|
|
20
|
+
return {} if detail.nil?
|
|
21
|
+
|
|
22
|
+
query_names = parent_queries.map(&:name) + [query.name]
|
|
23
|
+
{
|
|
24
|
+
Key.encode(query_names) => {
|
|
25
|
+
"nested" => {"path" => FieldPathEncoder.encode(nested_path.filter_map(&:name_in_index))},
|
|
26
|
+
"aggs" => detail.clauses,
|
|
27
|
+
"meta" => detail.meta.merge({
|
|
28
|
+
"size" => query.paginator.desired_page_size,
|
|
29
|
+
"adapter" => query.grouping_adapter.meta_name
|
|
30
|
+
})
|
|
31
|
+
}.compact
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
module ElasticGraph
|
|
10
|
+
class GraphQL
|
|
11
|
+
module Aggregation
|
|
12
|
+
# Grouping adapter that avoids using a `composite` aggregation, due to limitations with Elasticsearch/OpenSearch.
|
|
13
|
+
module NonCompositeGroupingAdapter
|
|
14
|
+
class << self
|
|
15
|
+
def meta_name
|
|
16
|
+
"non_comp"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def grouping_detail_for(query)
|
|
20
|
+
date_groupings, term_groupings = query.groupings.partition do |grouping|
|
|
21
|
+
grouping.is_a?(DateHistogramGrouping)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
grouping_detail(date_groupings, query) do
|
|
25
|
+
# We want term groupings inside date groupings so that, when our bucket aggregations might produce
|
|
26
|
+
# inaccurate doc counts, the innermost grouping aggregation has `doc_count_error_upper_bound` on
|
|
27
|
+
# its buckets allowing us to expose information about the accuracy.
|
|
28
|
+
#
|
|
29
|
+
# Date histogram aggregations do not include `doc_count_error_upper_bound` because, on their own, they are
|
|
30
|
+
# always accurate, but they may not be accurate when used as a sub-aggregation of a `terms` aggregation.
|
|
31
|
+
#
|
|
32
|
+
# For more detail on the issue this ordering is designed to avoid, see:
|
|
33
|
+
# https://discuss.elastic.co/t/accuracy-of-date-histogram-sub-aggregation-doc-count-under-terms-aggregation/348685
|
|
34
|
+
grouping_detail(term_groupings, query) do
|
|
35
|
+
inner_clauses = yield
|
|
36
|
+
inner_clauses = nil if inner_clauses.empty?
|
|
37
|
+
AggregationDetail.new(inner_clauses, {})
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def prepare_response_buckets(sub_agg, buckets_path, meta)
|
|
43
|
+
sort_and_truncate_buckets(format_buckets(sub_agg, buckets_path), meta.fetch("size"))
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def grouping_detail(groupings, query)
|
|
49
|
+
# Our `reduce` here builds the date grouping clauses from the inside out (since each reduction step
|
|
50
|
+
# wraps the prior step's result in an outer `aggs` hash). The natural result of that is a nested set of
|
|
51
|
+
# date grouping clauses that "feels" inside-out compared to what you would naturally expect.
|
|
52
|
+
#
|
|
53
|
+
# While that causes no concrete issue, it's nice to avoid. Here we use `reverse` to correct for that.
|
|
54
|
+
groupings.reverse.reduce(yield) do |inner_detail, grouping|
|
|
55
|
+
inner_detail.wrap_with_grouping(grouping, query: query)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Formats the result of a bucket aggregation into a format that we can easily resolve. There are two things
|
|
60
|
+
# this accomplishes:
|
|
61
|
+
#
|
|
62
|
+
# - Converts bucket keys into hashes that can be used to resolve `grouped_by` fields.
|
|
63
|
+
# - Recursively flattens multiple levels of aggregations (which happens when we need to mix multiple kinds of
|
|
64
|
+
# bucket aggregations to group in the way the client requested) into a single flat list.
|
|
65
|
+
def format_buckets(sub_agg, buckets_path, parent_key_fields: {}, parent_key_values: [])
|
|
66
|
+
agg_with_buckets = sub_agg.dig(*buckets_path)
|
|
67
|
+
|
|
68
|
+
missing_bucket = {
|
|
69
|
+
# Doc counts in missing value buckets are always perfectly accurate.
|
|
70
|
+
"doc_count_error_upper_bound" => 0
|
|
71
|
+
}.merge(sub_agg.dig(*missing_bucket_path_from(buckets_path))) # : ::Hash[::String, untyped]
|
|
72
|
+
|
|
73
|
+
meta = agg_with_buckets.fetch("meta")
|
|
74
|
+
|
|
75
|
+
grouping_field_names = meta.fetch("grouping_fields") # provides the names of the fields being grouped on
|
|
76
|
+
key_path = meta.fetch("key_path") # indicates whether we want to get the key values from `key` or `key_as_string`.
|
|
77
|
+
sub_buckets_path = meta["buckets_path"] # buckets_path is optional, so we don't use fetch.
|
|
78
|
+
merge_into_bucket = meta.fetch("merge_into_bucket")
|
|
79
|
+
|
|
80
|
+
raw_buckets = agg_with_buckets.fetch("buckets") # : ::Array[::Hash[::String, untyped]]
|
|
81
|
+
|
|
82
|
+
# If the missing bucket is non-empty, include it. This matches the behavior of composite aggregations
|
|
83
|
+
# when the `missing_bucket` option is used.
|
|
84
|
+
raw_buckets += [missing_bucket] if missing_bucket.fetch("doc_count") > 0
|
|
85
|
+
|
|
86
|
+
raw_buckets.flat_map do |raw_bucket|
|
|
87
|
+
# The key will either be a single value (e.g. `47`) if we used a `terms`/`date_histogram` aggregation,
|
|
88
|
+
# or a tuple of values (e.g. `[47, "abc"]`) if we used a `multi_terms` aggregation. Here we convert it
|
|
89
|
+
# to the form needed for resolving `grouped_by` fields: a hash like `{"size" => 47, "tag" => "abc"}`.
|
|
90
|
+
key_values = Array(raw_bucket.dig(*key_path))
|
|
91
|
+
key_fields_hash = grouping_field_names.zip(key_values).to_h
|
|
92
|
+
|
|
93
|
+
# If we have multiple levels of aggregations, we need to merge the key fields hash with the key fields from the parent levels.
|
|
94
|
+
key_fields = parent_key_fields.merge(key_fields_hash)
|
|
95
|
+
key_values = parent_key_values + key_values
|
|
96
|
+
|
|
97
|
+
# If there's another level of aggregations, `buckets_path` will provide us with the path to that next level.
|
|
98
|
+
# We can use it to recurse as we build a flat list of buckets.
|
|
99
|
+
if sub_buckets_path
|
|
100
|
+
format_buckets(raw_bucket, sub_buckets_path, parent_key_fields: key_fields, parent_key_values: key_values)
|
|
101
|
+
else
|
|
102
|
+
[raw_bucket.merge(merge_into_bucket).merge({"key" => key_fields, "key_values" => key_values})]
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
# A `terms` or `multi_terms` sub-aggregation is automatically sorted by `doc_count` and we pass
|
|
108
|
+
# `size` to the datastore to limit the number of returned buckets.
|
|
109
|
+
#
|
|
110
|
+
# A `date_histogram` sub-aggregation is sorted ascending by the date, and we don't limit the buckets
|
|
111
|
+
# in any way (there's no `size` parameter).
|
|
112
|
+
#
|
|
113
|
+
# To honor the requested page size and return buckets in a consistent order, we sort the buckets here
|
|
114
|
+
# (by doc count descending, then by the key values ascending), and then take only first `size`.
|
|
115
|
+
def sort_and_truncate_buckets(buckets, size)
|
|
116
|
+
buckets
|
|
117
|
+
.sort_by { |b| [-b.fetch("doc_count"), b.fetch("key_values")] }
|
|
118
|
+
.first(size)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def missing_bucket_path_from(buckets_path)
|
|
122
|
+
*all_but_last, last = buckets_path
|
|
123
|
+
all_but_last + [Key.missing_value_bucket_key(last.to_s)]
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Use of this source code is governed by an MIT-style
|
|
4
|
+
# license that can be found in the LICENSE file or at
|
|
5
|
+
# https://opensource.org/licenses/MIT.
|
|
6
|
+
#
|
|
7
|
+
# frozen_string_literal: true
|
|
8
|
+
|
|
9
|
+
module ElasticGraph
|
|
10
|
+
class GraphQL
|
|
11
|
+
module Aggregation
|
|
12
|
+
PathSegment = ::Data.define(
|
|
13
|
+
# The name of this segment's field in the GraphQL query. If it's an aliased field, this
|
|
14
|
+
# will be the alias name.
|
|
15
|
+
:name_in_graphql_query,
|
|
16
|
+
# The name of this segment's field in the datastore index.
|
|
17
|
+
:name_in_index
|
|
18
|
+
) do
|
|
19
|
+
# Factory method that aids in building a `PathSegment` for a given `field` and `lookahead` node.
|
|
20
|
+
def self.for(lookahead:, field: nil)
|
|
21
|
+
ast_node = lookahead.ast_nodes.first # : ::GraphQL::Language::Nodes::Field
|
|
22
|
+
|
|
23
|
+
new(
|
|
24
|
+
name_in_graphql_query: ast_node.alias || ast_node.name,
|
|
25
|
+
name_in_index: field&.name_in_index&.to_s
|
|
26
|
+
)
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|