elasticgraph-query_registry 0.18.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,195 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/graphql"
10
+ require "elastic_graph/support/from_yaml_file"
11
+ require "pathname"
12
+ require "rake/tasklib"
13
+
14
+ module ElasticGraph
15
+ module QueryRegistry
16
+ class RakeTasks < ::Rake::TaskLib
17
+ # @dynamic self.from_yaml_file
18
+ extend Support::FromYamlFile::ForRakeTasks.new(ElasticGraph::GraphQL)
19
+
20
+ def initialize(registered_queries_by_client_dir, require_eg_latency_slo_directive: false, output: $stdout, &load_graphql)
21
+ @registered_queries_by_client_dir = Pathname.new(registered_queries_by_client_dir)
22
+ @require_eg_latency_slo_directive = require_eg_latency_slo_directive
23
+ @output = output
24
+ @load_graphql = load_graphql
25
+
26
+ define_tasks
27
+ end
28
+
29
+ private
30
+
31
+ def define_tasks
32
+ namespace :query_registry do
33
+ desc "Validates the queries registered in `#{@registered_queries_by_client_dir}`"
34
+ task :validate_queries do
35
+ perform_query_validation
36
+ end
37
+
38
+ desc "Updates the registered information about query variables for a specific client (and optionally, a specific query)."
39
+ task :dump_variables, :client, :query do |_, args|
40
+ dump_variables("#{args.fetch(:client)}/#{args.fetch(:query, "*")}.graphql")
41
+ end
42
+
43
+ namespace :dump_variables do
44
+ desc "Updates the registered information about query variables for all clients."
45
+ task :all do
46
+ dump_variables("*/*.graphql")
47
+ end
48
+ end
49
+ end
50
+ end
51
+
52
+ def dump_variables(query_glob)
53
+ # We defer the loading of these dependencies until the task is running. As a general rule,
54
+ # we want rake tasks to only load their dependencies when they are run--that way, `rake -T`
55
+ # stays snappy, and when we run a rake task, only that task's dependencies are loaded
56
+ # instead of dependencies for all rake tasks.
57
+ require "elastic_graph/query_registry/variable_dumper"
58
+ require "yaml"
59
+
60
+ variable_dumper = VariableDumper.new(graphql.schema.graphql_schema)
61
+
62
+ @registered_queries_by_client_dir.glob(query_glob) do |file|
63
+ dumped_variables = variable_dumper.dump_variables_for_query(file.read)
64
+ variables_file = variable_file_name_for(file.to_s)
65
+ ::File.write(variables_file, variable_file_docs(variables_file) + ::YAML.dump(dumped_variables))
66
+ @output.puts "- Dumped `#{variables_file}`."
67
+ end
68
+ end
69
+
70
+ def variable_file_name_for(query_file_name)
71
+ query_file_name.delete_suffix(".graphql") + ".variables.yaml"
72
+ end
73
+
74
+ def variable_file_docs(file_name)
75
+ client_name = file_name[%r{/([^/]+)/[^/]+\.variables\.yaml}, 1]
76
+ query_name = file_name[%r{/[^/]+/([^/]+)\.variables\.yaml}, 1]
77
+
78
+ <<~EOS
79
+ # Generated by `rake "query_registry:dump_variables[#{client_name}, #{query_name}]"`.
80
+ # DO NOT EDIT BY HAND. Any edits will be lost the next time the rake task is run.
81
+ #
82
+ # This file exists to allow `elasticgraph-query_registry` to track the structure of
83
+ # the variables for the `#{client_name}/#{query_name}` query, so that we can detect
84
+ # when the schema structure of an object or enum variable changes in a way that might
85
+ # break the client.
86
+ EOS
87
+ end
88
+
89
+ def perform_query_validation
90
+ # We defer the loading of these dependencies until the task is running. As a general rule,
91
+ # we want rake tasks to only load their dependencies when they are run--that way, `rake -T`
92
+ # stays snappy, and when we run a rake task, only that task's dependencies are loaded
93
+ # instead of dependencies for all rake tasks.
94
+ require "elastic_graph/query_registry/query_validator"
95
+ require "json"
96
+ require "yaml"
97
+
98
+ validator = QueryValidator.new(
99
+ graphql.schema,
100
+ require_eg_latency_slo_directive: @require_eg_latency_slo_directive
101
+ )
102
+
103
+ all_errors = @registered_queries_by_client_dir.children.sort.flat_map do |client_dir|
104
+ @output.puts "For client `#{client_dir.basename}`:"
105
+ validate_client_queries(validator, client_dir).tap do
106
+ @output.puts
107
+ end
108
+ end
109
+
110
+ unless all_errors.empty?
111
+ raise "Found #{count_description(all_errors, "validation error")} total across all queries."
112
+ end
113
+ end
114
+
115
+ def validate_client_queries(validator, client_dir)
116
+ # @type var file_name_by_operation_name: ::Hash[::String, ::Pathname]
117
+ file_name_by_operation_name = {}
118
+
119
+ client_dir.glob("*.graphql").sort.flat_map do |query_file|
120
+ previously_dumped_variables = previously_dumped_variables_for(query_file.to_s)
121
+ errors_by_operation_name = validator.validate(
122
+ query_file.read,
123
+ client_name: client_dir.basename.to_s,
124
+ query_name: query_file.basename.to_s.delete_suffix(".graphql"),
125
+ previously_dumped_variables: previously_dumped_variables
126
+ )
127
+
128
+ @output.puts " - #{query_file.basename} (#{count_description(errors_by_operation_name, "operation")}):"
129
+
130
+ errors_by_operation_name.flat_map do |op_name, errors|
131
+ if (conflicting_file_name = file_name_by_operation_name[op_name.to_s])
132
+ errors += [conflicting_operation_name_error(client_dir, op_name, conflicting_file_name)]
133
+ else
134
+ file_name_by_operation_name[op_name.to_s] = query_file
135
+ end
136
+
137
+ op_name ||= "(no operation name)"
138
+ if errors.empty?
139
+ @output.puts " - #{op_name}: ✅"
140
+ else
141
+ @output.puts " - #{op_name}: 🛑. Got #{count_description(errors, "validation error")}:\n"
142
+
143
+ errors.each_with_index do |error, index|
144
+ @output.puts format_error(query_file, index, error)
145
+ end
146
+ end
147
+
148
+ errors
149
+ end
150
+ end
151
+ end
152
+
153
+ def previously_dumped_variables_for(query_file_name)
154
+ file_name = variable_file_name_for(query_file_name)
155
+ return nil unless ::File.exist?(file_name)
156
+ ::YAML.safe_load_file(file_name)
157
+ end
158
+
159
+ def conflicting_operation_name_error(client_dir, operation_name, conflicting_file_name)
160
+ message = "A `#{operation_name}` query already exists for `#{client_dir.basename}` in " \
161
+ "`#{conflicting_file_name.basename}`. Each query operation must have a unique name."
162
+
163
+ {"message" => message}
164
+ end
165
+
166
+ def format_error(file_name, index, error_hash)
167
+ file_locations = (error_hash["locations"] || []).map do |location|
168
+ " source: #{file_name}:#{location["line"]}:#{location["column"]}"
169
+ end
170
+
171
+ path = error_hash["path"]&.join(".")
172
+
173
+ detail_lines = (error_hash["extensions"] || {})
174
+ .merge(error_hash.except("message", "locations", "path", "extensions"))
175
+ .map { |key, value| " #{key}: #{value}" }
176
+
177
+ [
178
+ " #{index + 1}) #{error_hash["message"]}",
179
+ (" path: #{path}" if path),
180
+ *file_locations,
181
+ *detail_lines
182
+ ].compact.join("\n ") + "\n\n"
183
+ end
184
+
185
+ def count_description(collection, noun)
186
+ return "1 #{noun}" if collection.size == 1
187
+ "#{collection.size} #{noun}s"
188
+ end
189
+
190
+ def graphql
191
+ @graphql ||= @load_graphql.call
192
+ end
193
+ end
194
+ end
195
+ end
@@ -0,0 +1,101 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "elastic_graph/query_registry/query_validators/for_registered_client"
10
+ require "elastic_graph/query_registry/query_validators/for_unregistered_client"
11
+ require "graphql"
12
+ require "pathname"
13
+
14
+ module ElasticGraph
15
+ module QueryRegistry
16
+ # An abstraction that implements a registry of GraphQL queries. Callers should use
17
+ # `build_and_validate_query` to get `GraphQL::Query` objects so that clients are properly
18
+ # limited in what queries we execute on their behalf.
19
+ #
20
+ # Note that this class is designed to be as efficient as possible:
21
+ #
22
+ # - Registered GraphQL queries are only parsed once, and then the parsed form is used
23
+ # each time that query is submitted. In our benchmarking, parsing of large queries
24
+ # can be significant, taking ~10ms or so.
25
+ # - We delay parsing a registered client's queries until the first time that client
26
+ # sends us a query. That way, we don't have to pay any parsing cost for queries
27
+ # that were registered by an old client that no longer sends us requests.
28
+ # - Likewise, we defer reading a client's registered query strings off of disk until
29
+ # the first time it submits a request.
30
+ #
31
+ # In addition, it's worth noting that we support some basic "fuzzy" matching of query
32
+ # strings, based on the query canonicalization performed by the GraphQL gem. Semantically
33
+ # insignificant changes to the query string from a registered query (such as whitespace
34
+ # differences, or comments) are tolerated.
35
+ class Registry
36
+ # Public factory method, which builds a `Registry` instance from the given directory.
37
+ # Subdirectories are treated as client names, and the files in them are treated as
38
+ # individually registered queries.
39
+ def self.build_from_directory(schema, directory, allow_unregistered_clients:, allow_any_query_for_clients:)
40
+ directory = Pathname.new(directory)
41
+
42
+ new(
43
+ schema,
44
+ client_names: directory.children.map { |client_dir| client_dir.basename.to_s },
45
+ allow_unregistered_clients: allow_unregistered_clients,
46
+ allow_any_query_for_clients: allow_any_query_for_clients
47
+ ) do |client_name|
48
+ # Lazily read queries off of disk when we need to for a given client.
49
+ (directory / client_name).glob("*.graphql").map { |file| ::File.read(file.to_s) }
50
+ end
51
+ end
52
+
53
+ # Builds a `GraphQL::Query` object for the given query string, and validates that it is
54
+ # an allowed query. Returns a list of registry validation errors in addition to the built
55
+ # query object. The list of validation errors will be empty if the query should be allowed.
56
+ # A query can be allowed either by virtue of being registered for usage by the given clent,
57
+ # or by being for a completely unregistered client (if `allow_unregistered_clients` is `true`).
58
+ #
59
+ # This is also tolerant of some minimal differences in the query string (such as comments
60
+ # and whitespace). If the query differs in a significant way from a registered query, it
61
+ # will not be recognized as registered.
62
+ def build_and_validate_query(query_string, client:, variables: {}, operation_name: nil, context: {})
63
+ validator =
64
+ if @registered_client_validator.applies_to?(client)
65
+ @registered_client_validator
66
+ else
67
+ @unregistered_client_validator
68
+ end
69
+
70
+ validator.build_and_validate_query(query_string, client: client, variables: variables, operation_name: operation_name, context: context) do
71
+ ::GraphQL::Query.new(
72
+ @graphql_schema,
73
+ query_string,
74
+ variables: variables,
75
+ operation_name: operation_name,
76
+ context: context
77
+ )
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def initialize(schema, client_names:, allow_unregistered_clients:, allow_any_query_for_clients:, &provide_query_strings_for_client)
84
+ @graphql_schema = schema.graphql_schema
85
+ allow_any_query_for_clients_set = allow_any_query_for_clients.to_set
86
+
87
+ @registered_client_validator = QueryValidators::ForRegisteredClient.new(
88
+ schema: schema,
89
+ client_names: client_names,
90
+ allow_any_query_for_clients: allow_any_query_for_clients_set,
91
+ provide_query_strings_for_client: provide_query_strings_for_client
92
+ )
93
+
94
+ @unregistered_client_validator = QueryValidators::ForUnregisteredClient.new(
95
+ allow_unregistered_clients: allow_unregistered_clients,
96
+ allow_any_query_for_clients: allow_any_query_for_clients_set
97
+ )
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,104 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ module ElasticGraph
10
+ module QueryRegistry
11
+ # Responsible for comparing old and new variable type info to see if any changes are backwards
12
+ # incompatible (and thus my break the client). Incompatibilities are identified by path and described.
13
+ class VariableBackwardIncompatibilityDetector
14
+ # Entry point. Given the old variables for an operation, and the new variables for it, describes
15
+ # any backward incompatibilities in them.
16
+ def detect(old_op_vars:, new_op_vars:)
17
+ detect_incompatibilities(old_op_vars, new_op_vars, "$", "variable")
18
+ end
19
+
20
+ private
21
+
22
+ # Given an `old` and `new` hash (which could be hashes of variables, or hashes of object fields),
23
+ # describes the incompatibities in them.
24
+ def detect_incompatibilities(old, new, path, entry_type)
25
+ removals = old.keys - new.keys
26
+ additions = new.keys - old.keys
27
+ commonalities = old.keys & new.keys
28
+
29
+ incompatible_removals = removals.map do |name|
30
+ # All removals are incompatible, because the client might pass a value for the variable or field.
31
+ Incompatibility.new("#{path}#{name}", "removed")
32
+ end
33
+
34
+ incompatible_commonalities = commonalities.flat_map do |name|
35
+ incompatibilities_for("#{path}#{name}", normalize_type_info(old[name]), normalize_type_info(new[name]))
36
+ end
37
+
38
+ incompatible_additions = additions.filter_map do |name|
39
+ # Additions are only incompatible if it's required (non-nullable).
40
+ _ = if normalize_type_info(new[name]).fetch("type").end_with?("!")
41
+ Incompatibility.new("#{path}#{name}", "new required #{entry_type}")
42
+ end
43
+ end
44
+
45
+ incompatible_removals + incompatible_commonalities + incompatible_additions
46
+ end
47
+
48
+ # Describes the incompatibilities between the old and new type info.
49
+ def incompatibilities_for(path, old_type_info, new_type_info)
50
+ type_incompatibilities(path, old_type_info.fetch("type"), new_type_info.fetch("type")) +
51
+ enum_value_incompatibilities(path, old_type_info["values"], new_type_info["values"]) +
52
+ object_field_incompatibilities(path, old_type_info["fields"], new_type_info["fields"])
53
+ end
54
+
55
+ # Describes the incompatibilities between the old and new type names.
56
+ def type_incompatibilities(path, old_type, new_type)
57
+ if new_type == "#{old_type}!"
58
+ # If the variable or field is being required for the first time, the client may not pass a value
59
+ # for it and could be broken by this change.
60
+ [Incompatibility.new(path, "required for the first time")]
61
+ elsif old_type == "#{new_type}!"
62
+ [] # nullability was relaxed. That can't break the client so it's fine.
63
+ elsif new_type == old_type
64
+ [] # the type did not change.
65
+ else
66
+ # The type name changed. While some type name changes are compatible (e.g. from `ID` to `String`),
67
+ # we don't attempt to figure things out at that granularity.
68
+ [Incompatibility.new(path, "type changed from `#{old_type}` to `#{new_type}`")]
69
+ end
70
+ end
71
+
72
+ # Describes the incompatibilities between the old and new enum values for a field or variable.
73
+ def enum_value_incompatibilities(path, old_enum_values, new_enum_values)
74
+ return [] unless old_enum_values && new_enum_values
75
+ removed_values = old_enum_values - new_enum_values
76
+ return [] if removed_values.empty?
77
+
78
+ # Removed enum values could break the client if it ever passes a removed value in a query.
79
+ [Incompatibility.new(path, "removed enum values: #{removed_values.join(", ")}")]
80
+ end
81
+
82
+ # Describes the incompatibilities between old and new object fields via recursion.
83
+ def object_field_incompatibilities(path, old_fields, new_fields)
84
+ return [] unless old_fields && new_fields
85
+ detect_incompatibilities(old_fields, new_fields, "#{path}.", "field")
86
+ end
87
+
88
+ # Handles the fact that `type_info` can sometimes be a simple string, normalizing
89
+ # it to a hash so that we can consistently treat all type infos as hashes with a `type` field.
90
+ def normalize_type_info(type_info)
91
+ return {"type" => type_info} if type_info.is_a?(::String)
92
+ _ = type_info
93
+ end
94
+
95
+ # Represents a single incompatibility.
96
+ Incompatibility = ::Data.define(:path, :explanation) do
97
+ # @implements Incompatibility
98
+ def description
99
+ "#{path} (#{explanation})"
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,110 @@
1
+ # Copyright 2024 Block, Inc.
2
+ #
3
+ # Use of this source code is governed by an MIT-style
4
+ # license that can be found in the LICENSE file or at
5
+ # https://opensource.org/licenses/MIT.
6
+ #
7
+ # frozen_string_literal: true
8
+
9
+ require "graphql"
10
+
11
+ module ElasticGraph
12
+ module QueryRegistry
13
+ # Responsible for dumping structural information about query variables.
14
+ #
15
+ # This is necessary for the query registry to be able to support object and enum variables.
16
+ # To understand why, consider what happens when a field is removed from an input object
17
+ # variable used by a client's query. Whether or not it that will break the client depends
18
+ # on which fields of the input object the client populates when sending the query to
19
+ # ElasticGraph. Similarly, if an enum value is removed from an enum value variable used by
20
+ # a client, it could be a breaking change (but only if the client ever passes the removed
21
+ # enum value).
22
+ #
23
+ # To detect this situation, we use this to dump the structural information about all variables.
24
+ # When the structure of variables changes, we can then tell the engineer that they need to verify
25
+ # that it won't break the client.
26
+ class VariableDumper
27
+ def initialize(graphql_schema)
28
+ @graphql_schema = graphql_schema
29
+ end
30
+
31
+ # Returns a hash of operations from the given query string. For each operation, the value
32
+ # is a hash of variables.
33
+ def dump_variables_for_query(query_string)
34
+ query = ::GraphQL::Query.new(@graphql_schema, query_string, validate: false)
35
+
36
+ if query.document.nil?
37
+ # If the query was unparsable, we don't know anything about the variables and must just return an empty hash.
38
+ {}
39
+ else
40
+ # @type var operations: ::Array[::GraphQL::Language::Nodes::OperationDefinition]
41
+ operations = _ = query.document.definitions.grep(::GraphQL::Language::Nodes::OperationDefinition)
42
+ dump_variables_for_operations(operations)
43
+ end
44
+ end
45
+
46
+ # Returns a hash containing the variables for each operation.
47
+ def dump_variables_for_operations(operations)
48
+ operations.each_with_index.to_h do |operation, index|
49
+ [operation.name || "(Anonymous operation #{index + 1})", variables_for_op(operation)]
50
+ end
51
+ end
52
+
53
+ private
54
+
55
+ # Returns a hash of variables for the given GraphQL operation.
56
+ def variables_for_op(operation)
57
+ operation.variables.sort_by(&:name).to_h do |variable|
58
+ type_info =
59
+ if (type = @graphql_schema.type_from_ast(variable.type))
60
+ type_info(type)
61
+ else
62
+ # We should only get here if a variable references a type that is undefined. Since we
63
+ # don't know anything about the type other than the name, that's all we can return.
64
+ variable.type.to_query_string
65
+ end
66
+
67
+ [variable.name, type_info]
68
+ end
69
+ end
70
+
71
+ # Returns information about the given type.
72
+ #
73
+ # Note that this is optimized for human readability over data structure consistency.
74
+ # We don't *do* anything with this dumped data (other than comparing its equality
75
+ # against the dumped results for the same query in the future), so we don't need
76
+ # the sort of data structure consistency we'd normally want.
77
+ #
78
+ # For scalars (and lists-of-scalars) the *only* meaningful structural information
79
+ # is the type signature (e.g. `[ID!]`). On the other hand, we need the `fields` for
80
+ # an input object, and the `values` for an enum (along with the type signature for
81
+ # those, to distinguish list vs not and nullable vs not).
82
+ #
83
+ # So, while we return a hash for object/enum variables, for all others we just return
84
+ # the type signature string.
85
+ def type_info(type)
86
+ unwrapped_type = type.unwrap
87
+
88
+ if unwrapped_type.kind.input_object?
89
+ {"type" => type.to_type_signature, "fields" => fields_for(_ = unwrapped_type)}
90
+ elsif unwrapped_type.kind.enum?
91
+ {"type" => type.to_type_signature, "values" => (_ = unwrapped_type).values.keys.sort}
92
+ else
93
+ type.to_type_signature
94
+ end
95
+ end
96
+
97
+ # Returns a hash of input object fields for the given type.
98
+ def fields_for(variable_type)
99
+ variable_type.arguments.values.sort_by(&:name).to_h do |arg|
100
+ if arg.type.unwrap == variable_type
101
+ # Don't recurse (it would never terminate); just dump a reference to the type.
102
+ [arg.name, arg.type.to_type_signature]
103
+ else
104
+ [arg.name, type_info(arg.type)]
105
+ end
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end