elasticgraph-graphql 0.18.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE.txt +21 -0
- data/README.md +3 -0
- data/elasticgraph-graphql.gemspec +23 -0
- data/lib/elastic_graph/graphql/aggregation/composite_grouping_adapter.rb +79 -0
- data/lib/elastic_graph/graphql/aggregation/computation.rb +39 -0
- data/lib/elastic_graph/graphql/aggregation/date_histogram_grouping.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/field_path_encoder.rb +47 -0
- data/lib/elastic_graph/graphql/aggregation/field_term_grouping.rb +26 -0
- data/lib/elastic_graph/graphql/aggregation/key.rb +87 -0
- data/lib/elastic_graph/graphql/aggregation/nested_sub_aggregation.rb +37 -0
- data/lib/elastic_graph/graphql/aggregation/non_composite_grouping_adapter.rb +129 -0
- data/lib/elastic_graph/graphql/aggregation/path_segment.rb +31 -0
- data/lib/elastic_graph/graphql/aggregation/query.rb +172 -0
- data/lib/elastic_graph/graphql/aggregation/query_adapter.rb +345 -0
- data/lib/elastic_graph/graphql/aggregation/query_optimizer.rb +187 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/aggregated_values.rb +41 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/count_detail.rb +44 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/grouped_by.rb +30 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/node.rb +64 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/relay_connection_builder.rb +83 -0
- data/lib/elastic_graph/graphql/aggregation/resolvers/sub_aggregations.rb +82 -0
- data/lib/elastic_graph/graphql/aggregation/script_term_grouping.rb +32 -0
- data/lib/elastic_graph/graphql/aggregation/term_grouping.rb +118 -0
- data/lib/elastic_graph/graphql/client.rb +43 -0
- data/lib/elastic_graph/graphql/config.rb +81 -0
- data/lib/elastic_graph/graphql/datastore_query/document_paginator.rb +100 -0
- data/lib/elastic_graph/graphql/datastore_query/index_expression_builder.rb +142 -0
- data/lib/elastic_graph/graphql/datastore_query/paginator.rb +199 -0
- data/lib/elastic_graph/graphql/datastore_query/routing_picker.rb +239 -0
- data/lib/elastic_graph/graphql/datastore_query.rb +372 -0
- data/lib/elastic_graph/graphql/datastore_response/document.rb +78 -0
- data/lib/elastic_graph/graphql/datastore_response/search_response.rb +79 -0
- data/lib/elastic_graph/graphql/datastore_search_router.rb +151 -0
- data/lib/elastic_graph/graphql/decoded_cursor.rb +120 -0
- data/lib/elastic_graph/graphql/filtering/boolean_query.rb +45 -0
- data/lib/elastic_graph/graphql/filtering/field_path.rb +81 -0
- data/lib/elastic_graph/graphql/filtering/filter_args_translator.rb +58 -0
- data/lib/elastic_graph/graphql/filtering/filter_interpreter.rb +526 -0
- data/lib/elastic_graph/graphql/filtering/filter_value_set_extractor.rb +148 -0
- data/lib/elastic_graph/graphql/filtering/range_query.rb +56 -0
- data/lib/elastic_graph/graphql/http_endpoint.rb +229 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb +56 -0
- data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb +48 -0
- data/lib/elastic_graph/graphql/query_adapter/filters.rb +161 -0
- data/lib/elastic_graph/graphql/query_adapter/pagination.rb +27 -0
- data/lib/elastic_graph/graphql/query_adapter/requested_fields.rb +124 -0
- data/lib/elastic_graph/graphql/query_adapter/sort.rb +32 -0
- data/lib/elastic_graph/graphql/query_details_tracker.rb +60 -0
- data/lib/elastic_graph/graphql/query_executor.rb +200 -0
- data/lib/elastic_graph/graphql/resolvers/get_record_field_value.rb +49 -0
- data/lib/elastic_graph/graphql/resolvers/graphql_adapter.rb +114 -0
- data/lib/elastic_graph/graphql/resolvers/list_records.rb +29 -0
- data/lib/elastic_graph/graphql/resolvers/nested_relationships.rb +74 -0
- data/lib/elastic_graph/graphql/resolvers/query_adapter.rb +85 -0
- data/lib/elastic_graph/graphql/resolvers/query_source.rb +46 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/array_adapter.rb +71 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/generic_adapter.rb +65 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/page_info.rb +82 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection/search_response_adapter_builder.rb +40 -0
- data/lib/elastic_graph/graphql/resolvers/relay_connection.rb +42 -0
- data/lib/elastic_graph/graphql/resolvers/resolvable_value.rb +56 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/cursor.rb +35 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date.rb +64 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/date_time.rb +60 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/local_time.rb +30 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/longs.rb +47 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/no_op.rb +24 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/time_zone.rb +44 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/untyped.rb +32 -0
- data/lib/elastic_graph/graphql/scalar_coercion_adapters/valid_time_zones.rb +634 -0
- data/lib/elastic_graph/graphql/schema/arguments.rb +78 -0
- data/lib/elastic_graph/graphql/schema/enum_value.rb +30 -0
- data/lib/elastic_graph/graphql/schema/field.rb +147 -0
- data/lib/elastic_graph/graphql/schema/relation_join.rb +103 -0
- data/lib/elastic_graph/graphql/schema/type.rb +263 -0
- data/lib/elastic_graph/graphql/schema.rb +164 -0
- data/lib/elastic_graph/graphql.rb +253 -0
- data/script/dump_time_zones +81 -0
- data/script/dump_time_zones.java +17 -0
- metadata +503 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b19df00bc750f39aa2cbccab58af18c6cb2a2329017e9d0aa8f889dce5b1c377
|
4
|
+
data.tar.gz: 8308fce167bd7cf625bb7b82f2f717c174ffa12655b1df77e58e37369b8d6294
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c983f82a5da8da47c2f8146337b9425623d1ebfacf0e2f93db706cd53e465725f12cc43fe4e49d2b7885422a428e7744ae61642264941d3d497eb4d4fd3bc9f6
|
7
|
+
data.tar.gz: 277958b0fee144f62342f2e3081f4ebbb9a7529456e62a2b5183ad8fe93f2cca7fd5013cbe9bf461fceee46d5f42cea8149d40b624410fbb9498267e78d1dedb
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Block, Inc.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require_relative "../gemspec_helper"
|
10
|
+
|
11
|
+
ElasticGraphGemspecHelper.define_elasticgraph_gem(gemspec_file: __FILE__, category: :core) do |spec, eg_version|
|
12
|
+
spec.summary = "The ElasticGraph GraphQL query engine."
|
13
|
+
|
14
|
+
spec.add_dependency "elasticgraph-datastore_core", eg_version
|
15
|
+
spec.add_dependency "elasticgraph-schema_artifacts", eg_version
|
16
|
+
spec.add_dependency "graphql", ">= 2.3.7", "< 2.4"
|
17
|
+
|
18
|
+
spec.add_development_dependency "elasticgraph-admin", eg_version
|
19
|
+
spec.add_development_dependency "elasticgraph-elasticsearch", eg_version
|
20
|
+
spec.add_development_dependency "elasticgraph-opensearch", eg_version
|
21
|
+
spec.add_development_dependency "elasticgraph-indexer", eg_version
|
22
|
+
spec.add_development_dependency "elasticgraph-schema_definition", eg_version
|
23
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class GraphQL
|
11
|
+
module Aggregation
|
12
|
+
# Grouping adapter that uses a `composite` aggregation.
|
13
|
+
#
|
14
|
+
# For now, only used for the outermost "root" aggregations but may be used for sub-aggregations in the future.
|
15
|
+
module CompositeGroupingAdapter
|
16
|
+
class << self
|
17
|
+
def meta_name
|
18
|
+
"comp"
|
19
|
+
end
|
20
|
+
|
21
|
+
def grouping_detail_for(query)
|
22
|
+
sources = build_sources(query)
|
23
|
+
|
24
|
+
inner_clauses = yield
|
25
|
+
inner_clauses = nil if inner_clauses.empty?
|
26
|
+
|
27
|
+
return AggregationDetail.new(inner_clauses, {}) if sources.empty?
|
28
|
+
|
29
|
+
clauses = {
|
30
|
+
query.name => {
|
31
|
+
"composite" => {
|
32
|
+
"size" => query.paginator.requested_page_size,
|
33
|
+
"sources" => sources,
|
34
|
+
"after" => composite_after(query)
|
35
|
+
}.compact,
|
36
|
+
"aggs" => inner_clauses
|
37
|
+
}.compact
|
38
|
+
}
|
39
|
+
|
40
|
+
AggregationDetail.new(clauses, {"buckets_path" => [query.name]})
|
41
|
+
end
|
42
|
+
|
43
|
+
def prepare_response_buckets(sub_agg, buckets_path, meta)
|
44
|
+
sub_agg.dig(*buckets_path).fetch("buckets").map do |bucket|
|
45
|
+
bucket.merge({"doc_count_error_upper_bound" => 0})
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def composite_after(query)
|
52
|
+
return unless (cursor = query.paginator.search_after)
|
53
|
+
expected_keys = query.groupings.map(&:key)
|
54
|
+
|
55
|
+
if cursor.sort_values.keys.sort == expected_keys.sort
|
56
|
+
cursor.sort_values
|
57
|
+
else
|
58
|
+
raise ::GraphQL::ExecutionError, "`#{cursor.encode}` is not a valid cursor for the current groupings."
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def build_sources(query)
|
63
|
+
# We don't want documents that have no value for a grouping field to be omitted, so we set `missing_bucket: true`.
|
64
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.11/search-aggregations-bucket-composite-aggregation.html#_missing_bucket
|
65
|
+
grouping_options = if query.paginator.search_in_reverse?
|
66
|
+
{"order" => "desc", "missing_bucket" => true}
|
67
|
+
else
|
68
|
+
{"missing_bucket" => true}
|
69
|
+
end
|
70
|
+
|
71
|
+
query.groupings.map do |grouping|
|
72
|
+
{grouping.key => grouping.composite_clause(grouping_options: grouping_options)}
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/graphql/aggregation/key"
|
10
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
11
|
+
|
12
|
+
module ElasticGraph
|
13
|
+
class GraphQL
|
14
|
+
module Aggregation
|
15
|
+
# Represents some sort of aggregation computation (min, max, avg, sum, etc) on a field.
|
16
|
+
# For the relevant Elasticsearch docs, see:
|
17
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-avg-aggregation.html
|
18
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-max-aggregation.html
|
19
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-min-aggregation.html
|
20
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-metrics-sum-aggregation.html
|
21
|
+
Computation = ::Data.define(:source_field_path, :computed_index_field_name, :detail) do
|
22
|
+
# @implements Computation
|
23
|
+
|
24
|
+
def key(aggregation_name:)
|
25
|
+
Key::AggregatedValue.new(
|
26
|
+
aggregation_name: aggregation_name,
|
27
|
+
field_path: source_field_path.map(&:name_in_graphql_query),
|
28
|
+
function_name: computed_index_field_name
|
29
|
+
).encode
|
30
|
+
end
|
31
|
+
|
32
|
+
def clause
|
33
|
+
encoded_path = FieldPathEncoder.join(source_field_path.filter_map(&:name_in_index))
|
34
|
+
{detail.function.to_s => {"field" => encoded_path}}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/constants"
|
10
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
11
|
+
require "elastic_graph/support/memoizable_data"
|
12
|
+
|
13
|
+
module ElasticGraph
|
14
|
+
class GraphQL
|
15
|
+
module Aggregation
|
16
|
+
# Represents a grouping of a timestamp field into a date histogram.
|
17
|
+
# For the relevant Elasticsearch docs, see:
|
18
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-datehistogram-aggregation.html
|
19
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/7.12/search-aggregations-bucket-composite-aggregation.html#_date_histogram
|
20
|
+
class DateHistogramGrouping < Support::MemoizableData.define(:field_path, :interval, :time_zone, :offset)
|
21
|
+
def key
|
22
|
+
@key ||= FieldPathEncoder.encode(field_path.map(&:name_in_graphql_query))
|
23
|
+
end
|
24
|
+
|
25
|
+
def encoded_index_field_path
|
26
|
+
@encoded_index_field_path ||= FieldPathEncoder.join(field_path.filter_map(&:name_in_index))
|
27
|
+
end
|
28
|
+
|
29
|
+
def composite_clause(grouping_options: {})
|
30
|
+
interval_options = INTERVAL_OPTIONS_BY_NAME.fetch(interval) do
|
31
|
+
raise ArgumentError, "#{interval.inspect} is an unsupported interval. Valid values: #{INTERVAL_OPTIONS_BY_NAME.keys.inspect}."
|
32
|
+
end
|
33
|
+
|
34
|
+
inner_hash = interval_options.merge(grouping_options).merge({
|
35
|
+
"field" => encoded_index_field_path,
|
36
|
+
"format" => DATASTORE_DATE_TIME_FORMAT,
|
37
|
+
"offset" => offset,
|
38
|
+
"time_zone" => time_zone
|
39
|
+
}.compact)
|
40
|
+
|
41
|
+
{"date_histogram" => inner_hash}
|
42
|
+
end
|
43
|
+
|
44
|
+
def non_composite_clause_for(query)
|
45
|
+
# `min_doc_count: 1` is important so we don't have excess buckets when there is a large gap
|
46
|
+
# between document dates. For example, if you group on a field at the year truncation unit, and
|
47
|
+
# a one-off rogue document has an incorrect timestamp for hundreds of years ago, you'll wind
|
48
|
+
# up with a bucket for each intervening year. `min_doc_count: 1` excludes those empty buckets.
|
49
|
+
composite_clause(grouping_options: {"min_doc_count" => 1})
|
50
|
+
end
|
51
|
+
|
52
|
+
def inner_meta
|
53
|
+
INNER_META
|
54
|
+
end
|
55
|
+
|
56
|
+
INNER_META = {
|
57
|
+
# On a date histogram aggregation, the `key` is formatted as a number (milliseconds since epoch). We
|
58
|
+
# need it formatted as a string, which `key_as_string` provides.
|
59
|
+
"key_path" => ["key_as_string"],
|
60
|
+
# Date histogram aggregations do not have any doc count error. Our resolver is generic and expects
|
61
|
+
# there to always be a `doc_count_error_upper_bound`. So we want to tell it to merge an error of `0`
|
62
|
+
# into each bucket.
|
63
|
+
"merge_into_bucket" => {"doc_count_error_upper_bound" => 0}
|
64
|
+
}
|
65
|
+
|
66
|
+
INTERVAL_OPTIONS_BY_NAME = {
|
67
|
+
# These intervals have only fixed intervals...
|
68
|
+
"millisecond" => {"fixed_interval" => "1ms"},
|
69
|
+
"second" => {"fixed_interval" => "1s"},
|
70
|
+
# ...but the rest have calendar intervals, which we prefer.
|
71
|
+
"minute" => {"calendar_interval" => "minute"},
|
72
|
+
"hour" => {"calendar_interval" => "hour"},
|
73
|
+
"day" => {"calendar_interval" => "day"},
|
74
|
+
"week" => {"calendar_interval" => "week"},
|
75
|
+
"month" => {"calendar_interval" => "month"},
|
76
|
+
"quarter" => {"calendar_interval" => "quarter"},
|
77
|
+
"year" => {"calendar_interval" => "year"}
|
78
|
+
}
|
79
|
+
private_constant :INTERVAL_OPTIONS_BY_NAME
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class GraphQL
|
13
|
+
module Aggregation
|
14
|
+
module FieldPathEncoder
|
15
|
+
# Embedded fields need to be specified with dot separators.
|
16
|
+
DELIMITER = "."
|
17
|
+
|
18
|
+
# Takes a list of field names (e.g., ["amountMoney", "amount"])
|
19
|
+
# and returns a single field name path string (e.g., "amountMoney.amount").
|
20
|
+
def self.encode(field_names)
|
21
|
+
field_names.each do |str|
|
22
|
+
verify_delimiters(str)
|
23
|
+
end
|
24
|
+
|
25
|
+
join(field_names)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Joins together a list of encoded paths.
|
29
|
+
def self.join(encoded_paths)
|
30
|
+
encoded_paths.join(DELIMITER)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Takes a field path (e.g., "amountMoney.amount") and returns the field name parts
|
34
|
+
# (["amountMoney", "amount"]).
|
35
|
+
def self.decode(field_path)
|
36
|
+
field_path.split(DELIMITER)
|
37
|
+
end
|
38
|
+
|
39
|
+
private_class_method def self.verify_delimiters(str)
|
40
|
+
if str.to_s.include?(DELIMITER)
|
41
|
+
raise InvalidArgumentValueError, %("#{str}" contains delimiter: "#{DELIMITER}")
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/graphql/aggregation/term_grouping"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class GraphQL
|
13
|
+
module Aggregation
|
14
|
+
class FieldTermGrouping < Support::MemoizableData.define(:field_path)
|
15
|
+
# @dynamic field_path
|
16
|
+
include TermGrouping
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def terms_subclause
|
21
|
+
{"field" => encoded_index_field_path}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/error"
|
10
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
11
|
+
|
12
|
+
module ElasticGraph
|
13
|
+
class GraphQL
|
14
|
+
module Aggregation
|
15
|
+
module Key
|
16
|
+
# The datastore only gives us an "aggregation key" (or name) to tie response values back to the part of
|
17
|
+
# request it came from. We use this delimiter to encode and decode aggregation keys.
|
18
|
+
DELIMITER = ":"
|
19
|
+
|
20
|
+
# Aggregation key implementation used when we're dealing with `aggregated_values`.
|
21
|
+
class AggregatedValue < ::Data.define(
|
22
|
+
# The name of the aggregation encoded into this key.
|
23
|
+
:aggregation_name,
|
24
|
+
# The path to the field used by this aggregation (encoded as a string)
|
25
|
+
:encoded_field_path,
|
26
|
+
# The name of the aggregation function, such as "sum".
|
27
|
+
:function_name
|
28
|
+
)
|
29
|
+
# We encode the field path as part of initialization to enforce an invariant that all `AggregatedValue`
|
30
|
+
# instances have valid values for all attributes. `FieldPathEncoder.encode` will raise an exception if
|
31
|
+
# the field path is invalid.
|
32
|
+
def initialize(aggregation_name:, function_name:, field_path: [], encoded_field_path: FieldPathEncoder.encode(field_path))
|
33
|
+
Key.verify_no_delimiter_in(aggregation_name, function_name, *field_path)
|
34
|
+
|
35
|
+
super(
|
36
|
+
aggregation_name: aggregation_name,
|
37
|
+
encoded_field_path: encoded_field_path,
|
38
|
+
function_name: function_name
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def encode
|
43
|
+
Key.encode([aggregation_name, encoded_field_path, function_name])
|
44
|
+
end
|
45
|
+
|
46
|
+
def field_path
|
47
|
+
FieldPathEncoder.decode(encoded_field_path)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Encodes the key used for a `missing` aggregation used to provide a bucket for
|
52
|
+
# documents that are missing a value for the field being grouped on.
|
53
|
+
def self.missing_value_bucket_key(base_key)
|
54
|
+
Key.encode([base_key, "m"])
|
55
|
+
end
|
56
|
+
|
57
|
+
# Extracts an aggregation name from a string that could either already be an aggregation name, or could
|
58
|
+
# be an encoded key. We need this for dealing with the multiple forms that aggregation responses take:
|
59
|
+
#
|
60
|
+
# - When we use `grouped_by`, we run a composite aggregation that has the aggregation name, and
|
61
|
+
# that shows up as a key directly under `aggregations` in the datastore response.
|
62
|
+
# - For aggregations with no `grouped_by`, we encode the aggregation name in the key, and the keys
|
63
|
+
# directly under `aggregations` in the datastore response will take a from like:
|
64
|
+
# `[agg_name]:[field_path]:[function]`.
|
65
|
+
#
|
66
|
+
# It's also possible for these two forms to be mixed under `aggregations` on a datastore response,
|
67
|
+
# where some hash keys are in one form and some are in the other form. This can happen when we run
|
68
|
+
# multiple aggregations (some with `grouped_by`, some without) in the same query.
|
69
|
+
def self.extract_aggregation_name_from(agg_name_or_key)
|
70
|
+
agg_name_or_key.split(DELIMITER, 2).first || agg_name_or_key
|
71
|
+
end
|
72
|
+
|
73
|
+
def self.encode(parts)
|
74
|
+
parts.join(DELIMITER)
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.verify_no_delimiter_in(*parts)
|
78
|
+
parts.each do |part|
|
79
|
+
if part.to_s.include?(DELIMITER)
|
80
|
+
raise InvalidArgumentValueError, %("#{part}" contains delimiter: "#{DELIMITER}")
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
require "elastic_graph/graphql/aggregation/field_path_encoder"
|
10
|
+
|
11
|
+
module ElasticGraph
|
12
|
+
class GraphQL
|
13
|
+
module Aggregation
|
14
|
+
# Represents a sub-aggregation on a `nested` field.
|
15
|
+
# For the relevant Elasticsearch docs, see:
|
16
|
+
# https://www.elastic.co/guide/en/elasticsearch/reference/8.10/search-aggregations-bucket-nested-aggregation.html
|
17
|
+
class NestedSubAggregation < ::Data.define(:nested_path, :query)
|
18
|
+
def build_agg_hash(filter_interpreter, parent_queries:)
|
19
|
+
detail = query.build_agg_detail(filter_interpreter, field_path: nested_path, parent_queries: parent_queries)
|
20
|
+
return {} if detail.nil?
|
21
|
+
|
22
|
+
query_names = parent_queries.map(&:name) + [query.name]
|
23
|
+
{
|
24
|
+
Key.encode(query_names) => {
|
25
|
+
"nested" => {"path" => FieldPathEncoder.encode(nested_path.filter_map(&:name_in_index))},
|
26
|
+
"aggs" => detail.clauses,
|
27
|
+
"meta" => detail.meta.merge({
|
28
|
+
"size" => query.paginator.desired_page_size,
|
29
|
+
"adapter" => query.grouping_adapter.meta_name
|
30
|
+
})
|
31
|
+
}.compact
|
32
|
+
}
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class GraphQL
|
11
|
+
module Aggregation
|
12
|
+
# Grouping adapter that avoids using a `composite` aggregation, due to limitations with Elasticsearch/OpenSearch.
|
13
|
+
module NonCompositeGroupingAdapter
|
14
|
+
class << self
|
15
|
+
def meta_name
|
16
|
+
"non_comp"
|
17
|
+
end
|
18
|
+
|
19
|
+
def grouping_detail_for(query)
|
20
|
+
date_groupings, term_groupings = query.groupings.partition do |grouping|
|
21
|
+
grouping.is_a?(DateHistogramGrouping)
|
22
|
+
end
|
23
|
+
|
24
|
+
grouping_detail(date_groupings, query) do
|
25
|
+
# We want term groupings inside date groupings so that, when our bucket aggregations might produce
|
26
|
+
# inaccurate doc counts, the innermost grouping aggregation has `doc_count_error_upper_bound` on
|
27
|
+
# its buckets allowing us to expose information about the accuracy.
|
28
|
+
#
|
29
|
+
# Date histogram aggregations do not include `doc_count_error_upper_bound` because, on their own, they are
|
30
|
+
# always accurate, but they may not be accurate when used as a sub-aggregation of a `terms` aggregation.
|
31
|
+
#
|
32
|
+
# For more detail on the issue this ordering is designed to avoid, see:
|
33
|
+
# https://discuss.elastic.co/t/accuracy-of-date-histogram-sub-aggregation-doc-count-under-terms-aggregation/348685
|
34
|
+
grouping_detail(term_groupings, query) do
|
35
|
+
inner_clauses = yield
|
36
|
+
inner_clauses = nil if inner_clauses.empty?
|
37
|
+
AggregationDetail.new(inner_clauses, {})
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def prepare_response_buckets(sub_agg, buckets_path, meta)
|
43
|
+
sort_and_truncate_buckets(format_buckets(sub_agg, buckets_path), meta.fetch("size"))
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def grouping_detail(groupings, query)
|
49
|
+
# Our `reduce` here builds the date grouping clauses from the inside out (since each reduction step
|
50
|
+
# wraps the prior step's result in an outer `aggs` hash). The natural result of that is a nested set of
|
51
|
+
# date grouping clauses that "feels" inside-out compared to what you would naturally expect.
|
52
|
+
#
|
53
|
+
# While that causes no concrete issue, it's nice to avoid. Here we use `reverse` to correct for that.
|
54
|
+
groupings.reverse.reduce(yield) do |inner_detail, grouping|
|
55
|
+
inner_detail.wrap_with_grouping(grouping, query: query)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Formats the result of a bucket aggregation into a format that we can easily resolve. There are two things
|
60
|
+
# this accomplishes:
|
61
|
+
#
|
62
|
+
# - Converts bucket keys into hashes that can be used to resolve `grouped_by` fields.
|
63
|
+
# - Recursively flattens multiple levels of aggregations (which happens when we need to mix multiple kinds of
|
64
|
+
# bucket aggregations to group in the way the client requested) into a single flat list.
|
65
|
+
def format_buckets(sub_agg, buckets_path, parent_key_fields: {}, parent_key_values: [])
|
66
|
+
agg_with_buckets = sub_agg.dig(*buckets_path)
|
67
|
+
|
68
|
+
missing_bucket = {
|
69
|
+
# Doc counts in missing value buckets are always perfectly accurate.
|
70
|
+
"doc_count_error_upper_bound" => 0
|
71
|
+
}.merge(sub_agg.dig(*missing_bucket_path_from(buckets_path))) # : ::Hash[::String, untyped]
|
72
|
+
|
73
|
+
meta = agg_with_buckets.fetch("meta")
|
74
|
+
|
75
|
+
grouping_field_names = meta.fetch("grouping_fields") # provides the names of the fields being grouped on
|
76
|
+
key_path = meta.fetch("key_path") # indicates whether we want to get the key values from `key` or `key_as_string`.
|
77
|
+
sub_buckets_path = meta["buckets_path"] # buckets_path is optional, so we don't use fetch.
|
78
|
+
merge_into_bucket = meta.fetch("merge_into_bucket")
|
79
|
+
|
80
|
+
raw_buckets = agg_with_buckets.fetch("buckets") # : ::Array[::Hash[::String, untyped]]
|
81
|
+
|
82
|
+
# If the missing bucket is non-empty, include it. This matches the behavior of composite aggregations
|
83
|
+
# when the `missing_bucket` option is used.
|
84
|
+
raw_buckets += [missing_bucket] if missing_bucket.fetch("doc_count") > 0
|
85
|
+
|
86
|
+
raw_buckets.flat_map do |raw_bucket|
|
87
|
+
# The key will either be a single value (e.g. `47`) if we used a `terms`/`date_histogram` aggregation,
|
88
|
+
# or a tuple of values (e.g. `[47, "abc"]`) if we used a `multi_terms` aggregation. Here we convert it
|
89
|
+
# to the form needed for resolving `grouped_by` fields: a hash like `{"size" => 47, "tag" => "abc"}`.
|
90
|
+
key_values = Array(raw_bucket.dig(*key_path))
|
91
|
+
key_fields_hash = grouping_field_names.zip(key_values).to_h
|
92
|
+
|
93
|
+
# If we have multiple levels of aggregations, we need to merge the key fields hash with the key fields from the parent levels.
|
94
|
+
key_fields = parent_key_fields.merge(key_fields_hash)
|
95
|
+
key_values = parent_key_values + key_values
|
96
|
+
|
97
|
+
# If there's another level of aggregations, `buckets_path` will provide us with the path to that next level.
|
98
|
+
# We can use it to recurse as we build a flat list of buckets.
|
99
|
+
if sub_buckets_path
|
100
|
+
format_buckets(raw_bucket, sub_buckets_path, parent_key_fields: key_fields, parent_key_values: key_values)
|
101
|
+
else
|
102
|
+
[raw_bucket.merge(merge_into_bucket).merge({"key" => key_fields, "key_values" => key_values})]
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
# A `terms` or `multi_terms` sub-aggregation is automatically sorted by `doc_count` and we pass
|
108
|
+
# `size` to the datastore to limit the number of returned buckets.
|
109
|
+
#
|
110
|
+
# A `date_histogram` sub-aggregation is sorted ascending by the date, and we don't limit the buckets
|
111
|
+
# in any way (there's no `size` parameter).
|
112
|
+
#
|
113
|
+
# To honor the requested page size and return buckets in a consistent order, we sort the buckets here
|
114
|
+
# (by doc count descending, then by the key values ascending), and then take only first `size`.
|
115
|
+
def sort_and_truncate_buckets(buckets, size)
|
116
|
+
buckets
|
117
|
+
.sort_by { |b| [-b.fetch("doc_count"), b.fetch("key_values")] }
|
118
|
+
.first(size)
|
119
|
+
end
|
120
|
+
|
121
|
+
def missing_bucket_path_from(buckets_path)
|
122
|
+
*all_but_last, last = buckets_path
|
123
|
+
all_but_last + [Key.missing_value_bucket_key(last.to_s)]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Copyright 2024 Block, Inc.
|
2
|
+
#
|
3
|
+
# Use of this source code is governed by an MIT-style
|
4
|
+
# license that can be found in the LICENSE file or at
|
5
|
+
# https://opensource.org/licenses/MIT.
|
6
|
+
#
|
7
|
+
# frozen_string_literal: true
|
8
|
+
|
9
|
+
module ElasticGraph
|
10
|
+
class GraphQL
|
11
|
+
module Aggregation
|
12
|
+
PathSegment = ::Data.define(
|
13
|
+
# The name of this segment's field in the GraphQL query. If it's an aliased field, this
|
14
|
+
# will be the alias name.
|
15
|
+
:name_in_graphql_query,
|
16
|
+
# The name of this segment's field in the datastore index.
|
17
|
+
:name_in_index
|
18
|
+
) do
|
19
|
+
# Factory method that aids in building a `PathSegment` for a given `field` and `lookahead` node.
|
20
|
+
def self.for(lookahead:, field: nil)
|
21
|
+
ast_node = lookahead.ast_nodes.first # : ::GraphQL::Language::Nodes::Field
|
22
|
+
|
23
|
+
new(
|
24
|
+
name_in_graphql_query: ast_node.alias || ast_node.name,
|
25
|
+
name_in_index: field&.name_in_index&.to_s
|
26
|
+
)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|