elasticgraph-query_registry 0.18.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,195 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql"
10
+ require "elastic_graph/support/from_yaml_file"
11
+ require "pathname"
12
+ require "rake/tasklib"
13
+
14
+ module ElasticGraph
15
+ module QueryRegistry
16
+ class RakeTasks < ::Rake::TaskLib
17
+ # @dynamic self.from_yaml_file
18
+ extend Support::FromYamlFile::ForRakeTasks.new(ElasticGraph::GraphQL)
19
+
20
+ def initialize(registered_queries_by_client_dir, require_eg_latency_slo_directive: false, output: $stdout, &load_graphql)
21
+ @registered_queries_by_client_dir = Pathname.new(registered_queries_by_client_dir)
22
+ @require_eg_latency_slo_directive = require_eg_latency_slo_directive
23
+ @output = output
24
+ @load_graphql = load_graphql
25
+
26
+ define_tasks
27
+ end
28
+
29
+ private
30
+
31
+ def define_tasks
32
+ namespace :query_registry do
33
+ desc "Validates the queries registered in `#{@registered_queries_by_client_dir}`"
34
+ task :validate_queries do
35
+ perform_query_validation
36
+ end
37
+
38
+ desc "Updates the registered information about query variables for a specific client (and optionally, a specific query)."
39
+ task :dump_variables, :client, :query do |_, args|
40
+ dump_variables("#{args.fetch(:client)}/#{args.fetch(:query, "*")}.graphql")
41
+ end
42
+
43
+ namespace :dump_variables do
44
+ desc "Updates the registered information about query variables for all clients."
45
+ task :all do
46
+ dump_variables("*/*.graphql")
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ def dump_variables(query_glob)
53
+ # We defer the loading of these dependencies until the task is running. As a general rule,
54
+ # we want rake tasks to only load their dependencies when they are run--that way, `rake -T`
55
+ # stays snappy, and when we run a rake task, only that task's dependencies are loaded
56
+ # instead of dependencies for all rake tasks.
57
+ require "elastic_graph/query_registry/variable_dumper"
58
+ require "yaml"
59
+
60
+ variable_dumper = VariableDumper.new(graphql.schema.graphql_schema)
61
+
62
+ @registered_queries_by_client_dir.glob(query_glob) do |file|
63
+ dumped_variables = variable_dumper.dump_variables_for_query(file.read)
64
+ variables_file = variable_file_name_for(file.to_s)
65
+ ::File.write(variables_file, variable_file_docs(variables_file) + ::YAML.dump(dumped_variables))
66
+ @output.puts "- Dumped `#{variables_file}`."
67
+ end
68
+ end
69
+
70
+ def variable_file_name_for(query_file_name)
71
+ query_file_name.delete_suffix(".graphql") + ".variables.yaml"
72
+ end
73
+
74
+ def variable_file_docs(file_name)
75
+ client_name = file_name[%r{/([^/]+)/[^/]+\.variables\.yaml}, 1]
76
+ query_name = file_name[%r{/[^/]+/([^/]+)\.variables\.yaml}, 1]
77
+
78
+ <<~EOS
79
+ # Generated by `rake "query_registry:dump_variables[#{client_name}, #{query_name}]"`.
80
+ # DO NOT EDIT BY HAND. Any edits will be lost the next time the rake task is run.
81
+ #
82
+ # This file exists to allow `elasticgraph-query_registry` to track the structure of
83
+ # the variables for the `#{client_name}/#{query_name}` query, so that we can detect
84
+ # when the schema structure of an object or enum variable changes in a way that might
85
+ # break the client.
86
+ EOS
87
+ end
88
+
89
+ def perform_query_validation
90
+ # We defer the loading of these dependencies until the task is running. As a general rule,
91
+ # we want rake tasks to only load their dependencies when they are run--that way, `rake -T`
92
+ # stays snappy, and when we run a rake task, only that task's dependencies are loaded
93
+ # instead of dependencies for all rake tasks.
94
+ require "elastic_graph/query_registry/query_validator"
95
+ require "json"
96
+ require "yaml"
97
+
98
+ validator = QueryValidator.new(
99
+ graphql.schema,
100
+ require_eg_latency_slo_directive: @require_eg_latency_slo_directive
101
+ )
102
+
103
+ all_errors = @registered_queries_by_client_dir.children.sort.flat_map do |client_dir|
104
+ @output.puts "For client `#{client_dir.basename}`:"
105
+ validate_client_queries(validator, client_dir).tap do
106
+ @output.puts
107
+ end
108
+ end
109
+
110
+ unless all_errors.empty?
111
+ raise "Found #{count_description(all_errors, "validation error")} total across all queries."
112
+ end
113
+ end
114
+
115
+ def validate_client_queries(validator, client_dir)
116
+ # @type var file_name_by_operation_name: ::Hash[::String, ::Pathname]
117
+ file_name_by_operation_name = {}
118
+
119
+ client_dir.glob("*.graphql").sort.flat_map do |query_file|
120
+ previously_dumped_variables = previously_dumped_variables_for(query_file.to_s)
121
+ errors_by_operation_name = validator.validate(
122
+ query_file.read,
123
+ client_name: client_dir.basename.to_s,
124
+ query_name: query_file.basename.to_s.delete_suffix(".graphql"),
125
+ previously_dumped_variables: previously_dumped_variables
126
+ )
127
+
128
+ @output.puts " - #{query_file.basename} (#{count_description(errors_by_operation_name, "operation")}):"
129
+
130
+ errors_by_operation_name.flat_map do |op_name, errors|
131
+ if (conflicting_file_name = file_name_by_operation_name[op_name.to_s])
132
+ errors += [conflicting_operation_name_error(client_dir, op_name, conflicting_file_name)]
133
+ else
134
+ file_name_by_operation_name[op_name.to_s] = query_file
135
+ end
136
+
137
+ op_name ||= "(no operation name)"
138
+ if errors.empty?
139
+ @output.puts " - #{op_name}: ✅"
140
+ else
141
+ @output.puts " - #{op_name}: 🛑. Got #{count_description(errors, "validation error")}:\n"
142
+
143
+ errors.each_with_index do |error, index|
144
+ @output.puts format_error(query_file, index, error)
145
+ end
146
+ end
147
+
148
+ errors
149
+ end
150
+ end
151
+ end
152
+
153
+ def previously_dumped_variables_for(query_file_name)
154
+ file_name = variable_file_name_for(query_file_name)
155
+ return nil unless ::File.exist?(file_name)
156
+ ::YAML.safe_load_file(file_name)
157
+ end
158
+
159
+ def conflicting_operation_name_error(client_dir, operation_name, conflicting_file_name)
160
+ message = "A `#{operation_name}` query already exists for `#{client_dir.basename}` in " \
161
+ "`#{conflicting_file_name.basename}`. Each query operation must have a unique name."
162
+
163
+ {"message" => message}
164
+ end
165
+
166
+ def format_error(file_name, index, error_hash)
167
+ file_locations = (error_hash["locations"] || []).map do |location|
168
+ " source: #{file_name}:#{location["line"]}:#{location["column"]}"
169
+ end
170
+
171
+ path = error_hash["path"]&.join(".")
172
+
173
+ detail_lines = (error_hash["extensions"] || {})
174
+ .merge(error_hash.except("message", "locations", "path", "extensions"))
175
+ .map { |key, value| " #{key}: #{value}" }
176
+
177
+ [
178
+ " #{index + 1}) #{error_hash["message"]}",
179
+ (" path: #{path}" if path),
180
+ *file_locations,
181
+ *detail_lines
182
+ ].compact.join("\n ") + "\n\n"
183
+ end
184
+
185
+ def count_description(collection, noun)
186
+ return "1 #{noun}" if collection.size == 1
187
+ "#{collection.size} #{noun}s"
188
+ end
189
+
190
+ def graphql
191
+ @graphql ||= @load_graphql.call
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,101 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/query_registry/query_validators/for_registered_client"
10
+ require "elastic_graph/query_registry/query_validators/for_unregistered_client"
11
+ require "graphql"
12
+ require "pathname"
13
+
14
+ module ElasticGraph
15
+ module QueryRegistry
16
+ # An abstraction that implements a registry of GraphQL queries. Callers should use
17
+ # `build_and_validate_query` to get `GraphQL::Query` objects so that clients are properly
18
+ # limited in what queries we execute on their behalf.
19
+ #
20
+ # Note that this class is designed to be as efficient as possible:
21
+ #
22
+ # - Registered GraphQL queries are only parsed once, and then the parsed form is used
23
+ # each time that query is submitted. In our benchmarking, parsing of large queries
24
+ # can be significant, taking ~10ms or so.
25
+ # - We delay parsing a registered client's queries until the first time that client
26
+ # sends us a query. That way, we don't have to pay any parsing cost for queries
27
+ # that were registered by an old client that no longer sends us requests.
28
+ # - Likewise, we defer reading a client's registered query strings off of disk until
29
+ # the first time it submits a request.
30
+ #
31
+ # In addition, it's worth noting that we support some basic "fuzzy" matching of query
32
+ # strings, based on the query canonicalization performed by the GraphQL gem. Semantically
33
+ # insignificant changes to the query string from a registered query (such as whitespace
34
+ # differences, or comments) are tolerated.
35
+ class Registry
36
+ # Public factory method, which builds a `Registry` instance from the given directory.
37
+ # Subdirectories are treated as client names, and the files in them are treated as
38
+ # individually registered queries.
39
+ def self.build_from_directory(schema, directory, allow_unregistered_clients:, allow_any_query_for_clients:)
40
+ directory = Pathname.new(directory)
41
+
42
+ new(
43
+ schema,
44
+ client_names: directory.children.map { |client_dir| client_dir.basename.to_s },
45
+ allow_unregistered_clients: allow_unregistered_clients,
46
+ allow_any_query_for_clients: allow_any_query_for_clients
47
+ ) do |client_name|
48
+ # Lazily read queries off of disk when we need to for a given client.
49
+ (directory / client_name).glob("*.graphql").map { |file| ::File.read(file.to_s) }
50
+ end
51
+ end
52
+
53
+ # Builds a `GraphQL::Query` object for the given query string, and validates that it is
54
+ # an allowed query. Returns a list of registry validation errors in addition to the built
55
+ # query object. The list of validation errors will be empty if the query should be allowed.
56
+ # A query can be allowed either by virtue of being registered for usage by the given clent,
57
+ # or by being for a completely unregistered client (if `allow_unregistered_clients` is `true`).
58
+ #
59
+ # This is also tolerant of some minimal differences in the query string (such as comments
60
+ # and whitespace). If the query differs in a significant way from a registered query, it
61
+ # will not be recognized as registered.
62
+ def build_and_validate_query(query_string, client:, variables: {}, operation_name: nil, context: {})
63
+ validator =
64
+ if @registered_client_validator.applies_to?(client)
65
+ @registered_client_validator
66
+ else
67
+ @unregistered_client_validator
68
+ end
69
+
70
+ validator.build_and_validate_query(query_string, client: client, variables: variables, operation_name: operation_name, context: context) do
71
+ ::GraphQL::Query.new(
72
+ @graphql_schema,
73
+ query_string,
74
+ variables: variables,
75
+ operation_name: operation_name,
76
+ context: context
77
+ )
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def initialize(schema, client_names:, allow_unregistered_clients:, allow_any_query_for_clients:, &provide_query_strings_for_client)
84
+ @graphql_schema = schema.graphql_schema
85
+ allow_any_query_for_clients_set = allow_any_query_for_clients.to_set
86
+
87
+ @registered_client_validator = QueryValidators::ForRegisteredClient.new(
88
+ schema: schema,
89
+ client_names: client_names,
90
+ allow_any_query_for_clients: allow_any_query_for_clients_set,
91
+ provide_query_strings_for_client: provide_query_strings_for_client
92
+ )
93
+
94
+ @unregistered_client_validator = QueryValidators::ForUnregisteredClient.new(
95
+ allow_unregistered_clients: allow_unregistered_clients,
96
+ allow_any_query_for_clients: allow_any_query_for_clients_set
97
+ )
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,104 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module QueryRegistry
11
+ # Responsible for comparing old and new variable type info to see if any changes are backwards
12
+ # incompatible (and thus my break the client). Incompatibilities are identified by path and described.
13
+ class VariableBackwardIncompatibilityDetector
14
+ # Entry point. Given the old variables for an operation, and the new variables for it, describes
15
+ # any backward incompatibilities in them.
16
+ def detect(old_op_vars:, new_op_vars:)
17
+ detect_incompatibilities(old_op_vars, new_op_vars, "$", "variable")
18
+ end
19
+
20
+ private
21
+
22
+ # Given an `old` and `new` hash (which could be hashes of variables, or hashes of object fields),
23
+ # describes the incompatibities in them.
24
+ def detect_incompatibilities(old, new, path, entry_type)
25
+ removals = old.keys - new.keys
26
+ additions = new.keys - old.keys
27
+ commonalities = old.keys & new.keys
28
+
29
+ incompatible_removals = removals.map do |name|
30
+ # All removals are incompatible, because the client might pass a value for the variable or field.
31
+ Incompatibility.new("#{path}#{name}", "removed")
32
+ end
33
+
34
+ incompatible_commonalities = commonalities.flat_map do |name|
35
+ incompatibilities_for("#{path}#{name}", normalize_type_info(old[name]), normalize_type_info(new[name]))
36
+ end
37
+
38
+ incompatible_additions = additions.filter_map do |name|
39
+ # Additions are only incompatible if it's required (non-nullable).
40
+ _ = if normalize_type_info(new[name]).fetch("type").end_with?("!")
41
+ Incompatibility.new("#{path}#{name}", "new required #{entry_type}")
42
+ end
43
+ end
44
+
45
+ incompatible_removals + incompatible_commonalities + incompatible_additions
46
+ end
47
+
48
+ # Describes the incompatibilities between the old and new type info.
49
+ def incompatibilities_for(path, old_type_info, new_type_info)
50
+ type_incompatibilities(path, old_type_info.fetch("type"), new_type_info.fetch("type")) +
51
+ enum_value_incompatibilities(path, old_type_info["values"], new_type_info["values"]) +
52
+ object_field_incompatibilities(path, old_type_info["fields"], new_type_info["fields"])
53
+ end
54
+
55
+ # Describes the incompatibilities between the old and new type names.
56
+ def type_incompatibilities(path, old_type, new_type)
57
+ if new_type == "#{old_type}!"
58
+ # If the variable or field is being required for the first time, the client may not pass a value
59
+ # for it and could be broken by this change.
60
+ [Incompatibility.new(path, "required for the first time")]
61
+ elsif old_type == "#{new_type}!"
62
+ [] # nullability was relaxed. That can't break the client so it's fine.
63
+ elsif new_type == old_type
64
+ [] # the type did not change.
65
+ else
66
+ # The type name changed. While some type name changes are compatible (e.g. from `ID` to `String`),
67
+ # we don't attempt to figure things out at that granularity.
68
+ [Incompatibility.new(path, "type changed from `#{old_type}` to `#{new_type}`")]
69
+ end
70
+ end
71
+
72
+ # Describes the incompatibilities between the old and new enum values for a field or variable.
73
+ def enum_value_incompatibilities(path, old_enum_values, new_enum_values)
74
+ return [] unless old_enum_values && new_enum_values
75
+ removed_values = old_enum_values - new_enum_values
76
+ return [] if removed_values.empty?
77
+
78
+ # Removed enum values could break the client if it ever passes a removed value in a query.
79
+ [Incompatibility.new(path, "removed enum values: #{removed_values.join(", ")}")]
80
+ end
81
+
82
+ # Describes the incompatibilities between old and new object fields via recursion.
83
+ def object_field_incompatibilities(path, old_fields, new_fields)
84
+ return [] unless old_fields && new_fields
85
+ detect_incompatibilities(old_fields, new_fields, "#{path}.", "field")
86
+ end
87
+
88
+ # Handles the fact that `type_info` can sometimes be a simple string, normalizing
89
+ # it to a hash so that we can consistently treat all type infos as hashes with a `type` field.
90
+ def normalize_type_info(type_info)
91
+ return {"type" => type_info} if type_info.is_a?(::String)
92
+ _ = type_info
93
+ end
94
+
95
+ # Represents a single incompatibility.
96
+ Incompatibility = ::Data.define(:path, :explanation) do
97
+ # @implements Incompatibility
98
+ def description
99
+ "#{path} (#{explanation})"
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,110 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "graphql"
10
+
11
+ module ElasticGraph
12
+ module QueryRegistry
13
+ # Responsible for dumping structural information about query variables.
14
+ #
15
+ # This is necessary for the query registry to be able to support object and enum variables.
16
+ # To understand why, consider what happens when a field is removed from an input object
17
+ # variable used by a client's query. Whether or not it that will break the client depends
18
+ # on which fields of the input object the client populates when sending the query to
19
+ # ElasticGraph. Similarly, if an enum value is removed from an enum value variable used by
20
+ # a client, it could be a breaking change (but only if the client ever passes the removed
21
+ # enum value).
22
+ #
23
+ # To detect this situation, we use this to dump the structural information about all variables.
24
+ # When the structure of variables changes, we can then tell the engineer that they need to verify
25
+ # that it won't break the client.
26
+ class VariableDumper
27
+ def initialize(graphql_schema)
28
+ @graphql_schema = graphql_schema
29
+ end
30
+
31
+ # Returns a hash of operations from the given query string. For each operation, the value
32
+ # is a hash of variables.
33
+ def dump_variables_for_query(query_string)
34
+ query = ::GraphQL::Query.new(@graphql_schema, query_string, validate: false)
35
+
36
+ if query.document.nil?
37
+ # If the query was unparsable, we don't know anything about the variables and must just return an empty hash.
38
+ {}
39
+ else
40
+ # @type var operations: ::Array[::GraphQL::Language::Nodes::OperationDefinition]
41
+ operations = _ = query.document.definitions.grep(::GraphQL::Language::Nodes::OperationDefinition)
42
+ dump_variables_for_operations(operations)
43
+ end
44
+ end
45
+
46
+ # Returns a hash containing the variables for each operation.
47
+ def dump_variables_for_operations(operations)
48
+ operations.each_with_index.to_h do |operation, index|
49
+ [operation.name || "(Anonymous operation #{index + 1})", variables_for_op(operation)]
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ # Returns a hash of variables for the given GraphQL operation.
56
+ def variables_for_op(operation)
57
+ operation.variables.sort_by(&:name).to_h do |variable|
58
+ type_info =
59
+ if (type = @graphql_schema.type_from_ast(variable.type))
60
+ type_info(type)
61
+ else
62
+ # We should only get here if a variable references a type that is undefined. Since we
63
+ # don't know anything about the type other than the name, that's all we can return.
64
+ variable.type.to_query_string
65
+ end
66
+
67
+ [variable.name, type_info]
68
+ end
69
+ end
70
+
71
+ # Returns information about the given type.
72
+ #
73
+ # Note that this is optimized for human readability over data structure consistency.
74
+ # We don't *do* anything with this dumped data (other than comparing its equality
75
+ # against the dumped results for the same query in the future), so we don't need
76
+ # the sort of data structure consistency we'd normally want.
77
+ #
78
+ # For scalars (and lists-of-scalars) the *only* meaningful structural information
79
+ # is the type signature (e.g. `[ID!]`). On the other hand, we need the `fields` for
80
+ # an input object, and the `values` for an enum (along with the type signature for
81
+ # those, to distinguish list vs not and nullable vs not).
82
+ #
83
+ # So, while we return a hash for object/enum variables, for all others we just return
84
+ # the type signature string.
85
+ def type_info(type)
86
+ unwrapped_type = type.unwrap
87
+
88
+ if unwrapped_type.kind.input_object?
89
+ {"type" => type.to_type_signature, "fields" => fields_for(_ = unwrapped_type)}
90
+ elsif unwrapped_type.kind.enum?
91
+ {"type" => type.to_type_signature, "values" => (_ = unwrapped_type).values.keys.sort}
92
+ else
93
+ type.to_type_signature
94
+ end
95
+ end
96
+
97
+ # Returns a hash of input object fields for the given type.
98
+ def fields_for(variable_type)
99
+ variable_type.arguments.values.sort_by(&:name).to_h do |arg|
100
+ if arg.type.unwrap == variable_type
101
+ # Don't recurse (it would never terminate); just dump a reference to the type.
102
+ [arg.name, arg.type.to_type_signature]
103
+ else
104
+ [arg.name, type_info(arg.type)]
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end