RubyGems - elasticgraph-graphql - Versions diffs - 0.18.0.0 - Mend

elasticgraph-graphql 0.18.0.0

Files changed (81) hide show

data/lib/elastic_graph/graphql/filtering/range_query.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# Copyright 2024 Block, Inc.
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+#
+# frozen_string_literal: true
+require "elastic_graph/support/hash_util"
+module ElasticGraph
+  class GraphQL
+    module Filtering
+      # Alternate `BooleanQuery` implementation for range queries. When we get a filter like this:
+      #
+      #     {some_field: {gt: 10, lt: 100}}
+      #
+      # ...we independently build a range query for each predicate. The datastore query structure would look like this:
+      #
+      #     {filter: [
+      #       {range: {some_field: {gt: 10}}},
+      #       {range: {some_field: {lt: 100}}}
+      #     ]}
+      #
+      # However, the `range` query allows these be combined, like so:
+      #
+      #     {filter: [
+      #       {range: {some_field: {gt: 10, lt: 100}}}
+      #     ]}
+      #
+      # While we haven't measured it, it's likely to be more efficient (certainly not _less_ efficient!),
+      # and it's essential that we combine them when we are using `any_satisfy`. Consider this filter:
+      #
+      #     {some_field: {any_satisfy: {gt: 10, lt: 100}}}
+      #
+      # This should match a document with `some_field: [5, 45, 200]` (since 45 is between 10 and 100),
+      # and not match a document with `some_field: [5, 200]` (since `some_field` has no value between 10 and 100).
+      # However, if we keep the range clauses separate, this document would match, because `some_field` has
+      # a value > 10 and a value < 100 (even though no single value satisfies both parts!). When we combine
+      # the clauses into a single `range` query then the filtering works like we expect.
+      class RangeQuery < ::Data.define(:field_name, :operator, :value)
+        def merge_into(bool_node)
+          existing_range_index = bool_node[:filter].find_index { |clause| clause.dig(:range, field_name) }
+          new_range_clause = {range: {field_name => {operator => value}}}
+          if existing_range_index
+            existing_range_clause = bool_node[:filter][existing_range_index]
+            bool_node[:filter][existing_range_index] = Support::HashUtil.deep_merge(existing_range_clause, new_range_clause)
+          else
+            bool_node[:filter] << new_range_clause
+          end
+        end
+      end
+    end
+  end
+end

data/lib/elastic_graph/graphql/http_endpoint.rb ADDED Viewed

@@ -0,0 +1,229 @@
+# Copyright 2024 Block, Inc.
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+#
+# frozen_string_literal: true
+require "elastic_graph/graphql/client"
+require "elastic_graph/support/memoizable_data"
+require "json"
+require "uri"
+module ElasticGraph
+  class GraphQL
+    # Handles HTTP concerns for when ElasticGraph is served via HTTP. The logic here
+    # is based on the graphql.org recommendations:
+    #
+    # https://graphql.org/learn/serving-over-http/#http-methods-headers-and-body
+    #
+    # As that recommends, we support queries in 3 different HTTP forms:
+    #
+    # - A standard POST request as application/json with query/operationName/variables in the body.
+    # - A GET request with `query`, `operationName` and `variables` query params in the URL.
+    # - A POST as application/graphql with a query string in the body.
+    #
+    # Note that this is designed to be agnostic to what the calling HTTP context is (for example,
+    # AWS Lambda, or Rails, or Rack...). Instead, this uses simple Request/Response value objects
+    # that the calling context can easily translate to/from to use this in any HTTP context.
+    class HTTPEndpoint
+      APPLICATION_JSON = "application/json"
+      APPLICATION_GRAPHQL = "application/graphql"
+      def initialize(query_executor:, monotonic_clock:, client_resolver:)
+        @query_executor = query_executor
+        @monotonic_clock = monotonic_clock
+        @client_resolver = client_resolver
+      end
+      # Processes the given HTTP request, returning an HTTP response.
+      #
+      # `max_timeout_in_ms` is not a property of the HTTP request (the
+      # calling application will determine it instead!) so it is a separate argument.
+      #
+      # Note that this method does _not_ convert exceptions to 500 responses. It's up to
+      # the calling application to do that if it wants to (and to determine how much of the
+      # exception to return in the HTTP response...).
+      def process(request, max_timeout_in_ms: nil, start_time_in_ms: @monotonic_clock.now_in_ms)
+        client_or_response = @client_resolver.resolve(request)
+        return client_or_response if client_or_response.is_a?(HTTPResponse)
+        with_parsed_request(request, max_timeout_in_ms: max_timeout_in_ms) do |parsed|
+          result = @query_executor.execute(
+            parsed.query_string,
+            variables: parsed.variables,
+            operation_name: parsed.operation_name,
+            client: client_or_response,
+            timeout_in_ms: parsed.timeout_in_ms,
+            context: parsed.context,
+            start_time_in_ms: start_time_in_ms
+          )
+          HTTPResponse.json(200, result.to_h)
+        end
+      rescue RequestExceededDeadlineError
+        HTTPResponse.error(504, "Search exceeded requested timeout.")
+      end
+      private
+      # Helper method that converts `HTTPRequest` to a parsed form we can work with.
+      # If the request can be successfully parsed, a `ParsedRequest` will be yielded;
+      # otherwise an `HTTPResponse` will be returned with an error.
+      def with_parsed_request(request, max_timeout_in_ms:)
+        with_request_params(request) do |params|
+          with_timeout(request, max_timeout_in_ms: max_timeout_in_ms) do |timeout_in_ms|
+            with_context(request) do |context|
+              yield ParsedRequest.new(
+                query_string: params["query"],
+                variables: params["variables"] || {},
+                operation_name: params["operationName"],
+                timeout_in_ms: timeout_in_ms,
+                context: context
+              )
+            end
+          end
+        end
+      end
+      # Responsible for handling the 3 types of requests we need to handle:
+      #
+      # - A standard POST request as application/json with query/operationName/variables in the body.
+      # - A GET request with `query`, `operationName` and `variables` query params in the URL.
+      # - A POST as application/graphql with a query string in the body.
+      #
+      # This yields a hash containing the query/operationName/variables if successful; otherwise
+      # it returns an `HTTPResponse` with an error.
+      def with_request_params(request)
+        params =
+          # POST with application/json is the most common form requests take, so we have it as the first branch here.
+          if request.http_method == :post && request.content_type == APPLICATION_JSON
+            begin
+              ::JSON.parse(request.body.to_s)
+            rescue ::JSON::ParserError
+              # standard:disable Lint/NoReturnInBeginEndBlocks
+              return HTTPResponse.error(400, "Request body is invalid JSON.")
+              # standard:enable Lint/NoReturnInBeginEndBlocks
+            end
+          elsif request.http_method == :post && request.content_type == APPLICATION_GRAPHQL
+            {"query" => request.body}
+          elsif request.http_method == :post
+            return HTTPResponse.error(415, "`#{request.content_type}` is not a supported content type. Only `#{APPLICATION_JSON}` and `#{APPLICATION_GRAPHQL}` are supported.")
+          elsif request.http_method == :get
+            ::URI.decode_www_form(::URI.parse(request.url).query.to_s).to_h.tap do |hash|
+              # Variables must come in as JSON, even if in the URL. express-graphql does it this way,
+              # which is a bit of a canonical implementation, as it is referenced from graphql.org:
+              # https://github.com/graphql/express-graphql/blob/v0.12.0/src/index.ts#L492-L497
+              hash["variables"] &&= ::JSON.parse(hash["variables"])
+            rescue ::JSON::ParserError
+              return HTTPResponse.error(400, "Variables are invalid JSON.")
+            end
+          else
+            return HTTPResponse.error(405, "GraphQL only supports GET and POST requests.")
+          end
+        # Ignore an empty string operationName.
+        params = params.merge("operationName" => nil) if params["operationName"] && params["operationName"].empty?
+        yield params
+      end
+      # Responsible for figuring out the timeout, based on a header and a provided max.
+      # If successful, yields the timeout value; otherwise will return an `HTTPResponse` with
+      # an error.
+      def with_timeout(request, max_timeout_in_ms:)
+        requested_timeout_in_ms =
+          if (timeout_in_ms_str = request.normalized_headers[HTTPRequest.normalize_header_name(TIMEOUT_MS_HEADER)])
+            begin
+              Integer(timeout_in_ms_str)
+            rescue ::ArgumentError
+              # standard:disable Lint/NoReturnInBeginEndBlocks
+              return HTTPResponse.error(400, "`#{TIMEOUT_MS_HEADER}` header value of #{timeout_in_ms_str.inspect} is invalid")
+              # standard:enable Lint/NoReturnInBeginEndBlocks
+            end
+          end
+        yield [max_timeout_in_ms, requested_timeout_in_ms].compact.min
+      end
+      # Responsible for determining any `context` values to pass down into the `query_executor`,
+      # which in turn will make the values available to the GraphQL resolvers.
+      #
+      # By default, our only context value is the HTTP request. This method exists to provide an extension
+      # point so that ElasticGraph extensions can add `context` values based on the `request` as desired.
+      #
+      # Extensions can return an `HTTPResponse` with an error if the `request` is invalid according
+      # to their requirements. Otherwise, they must call `super` (to delegate to this and any other
+      # extensions) with a block. In the block, they must merge in their `context` values and then `yield`.
+      def with_context(request)
+        yield({http_request: request})
+      end
+      ParsedRequest = Data.define(:query_string, :variables, :operation_name, :timeout_in_ms, :context)
+    end
+    # Represents an HTTP request, containing:
+    #
+    # - http_method: a symbol like :get or :post.
+    # - url: a string containing the full URL.
+    # - headers: a hash with string keys and values containing HTTP headers. The headers can
+    #   be in any form like `Content-Type`, `content-type`, `CONTENT-TYPE`, `CONTENT_TYPE`, etc.
+    # - body: a string containing the request body, if there was one.
+    HTTPRequest = Support::MemoizableData.define(:http_method, :url, :headers, :body) do
+      # @implements HTTPRequest
+      # HTTP headers are intended to be case-insensitive, and different Web frameworks treat them differently.
+      # For example, Rack uppercases them with `_` in place of `-`.  With AWS Lambda proxy integrations API
+      # gateway HTTP APIs, header names are lowercased:
+      # https://docs.aws.amazon.com/apigateway/latest/developerguide/http-api-develop-integrations-lambda.html
+      #
+      # ...but for integration with API gateway REST APIs, header names are provided as-is:
+      # https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html#api-gateway-simple-proxy-for-lambda-input-format
+      #
+      # To be maximally compatible here, this normalizes to uppercase form with dashes in place of underscores.
+      def normalized_headers
+        @normalized_headers ||= headers.transform_keys do |key|
+          HTTPRequest.normalize_header_name(key)
+        end
+      end
+      def content_type
+        @content_type ||= normalized_headers["CONTENT-TYPE"]
+      end
+      def self.normalize_header_name(header)
+        header.upcase.tr("_", "-")
+      end
+    end
+    # Represents an HTTP response, containing:
+    #
+    # - status_code: an integer like 200.
+    # - headers: a hash with string keys and values containing HTTP response headers.
+    # - body: a string containing the response body.
+    HTTPResponse = Data.define(:status_code, :headers, :body) do
+      # @implements HTTPResponse
+      # Helper method for building a JSON response.
+      def self.json(status_code, body)
+        new(status_code, {"Content-Type" => HTTPEndpoint::APPLICATION_JSON}, ::JSON.generate(body))
+      end
+      # Helper method for building an error response.
+      def self.error(status_code, message)
+        json(status_code, {"errors" => [{"message" => message}]})
+      end
+    end
+    # Steep weirdly expects them here...
+    # @dynamic initialize, config, logger, runtime_metadata, graphql_schema_string, datastore_core, clock
+    # @dynamic graphql_http_endpoint, graphql_query_executor, schema, datastore_search_router, filter_interpreter
+    # @dynamic datastore_query_builder, graphql_gem_plugins, graphql_resolvers, datastore_query_adapters, monotonic_clock
+    # @dynamic load_dependencies_eagerly, self.from_parsed_yaml, filter_args_translator, sub_aggregation_grouping_adapter
+  end
+end

data/lib/elastic_graph/graphql/monkey_patches/schema_field.rb ADDED Viewed

@@ -0,0 +1,56 @@
+# Copyright 2024 Block, Inc.
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+#
+# frozen_string_literal: true
+require "graphql"
+module ElasticGraph
+  class GraphQL
+    module MonkeyPatches
+      # This module is designed to monkey patch `GraphQL::Schema::Field`, but to do so in a
+      # conservative, safe way:
+      #
+      # - It defines no new methods.
+      # - It delegates to the original implementation with `super` unless we are sure that a type should be hidden.
+      # - It only changes the behavior for ElasticGraph schemas (as indicated by `:elastic_graph_schema` in the `context`).
+      module SchemaFieldVisibilityDecorator
+        def visible?(context)
+          # `DynamicFields` and `EntryPoints` are built-in introspection types that `field_named` below doesn't support:
+          # https://github.com/rmosolgo/graphql-ruby/blob/0df187995c971b399ed7cc1fbdcbd958af6c4ade/lib/graphql/introspection/entry_points.rb
+          # https://github.com/rmosolgo/graphql-ruby/blob/0df187995c971b399ed7cc1fbdcbd958af6c4ade/lib/graphql/introspection/dynamic_fields.rb
+          #
+          # ...so if the owner is one of those we just return `super` here.
+          return super if %w[DynamicFields EntryPoints].include?(owner.graphql_name)
+          if context[:elastic_graph_schema]&.field_named(owner.graphql_name, graphql_name)&.hidden_from_queries?
+            return false
+          end
+          super
+        end
+      end
+    end
+  end
+end
+# As per https://graphql-ruby.org/authorization/visibility.html, the public API
+# provided by the GraphQL gem to control visibility of object types is to define
+# a `visible?` instance method on a custom subclass of `GraphQL::Schema::Field`.
+# However, because we load our schema from an SDL definition rather than defining
+# classes for each schema type, we don't have a way to register a custom subclass
+# to be used for fields.
+#
+# So, here we solve this a slightly different way: we prepend a module onto
+# the `GraphQL::Schema::Field class. This allows our module to act like a
+# decorator and intercept calls to `visible?` so that it can hide types as needed.
+module GraphQL
+  class Schema
+    class Field
+      prepend ::ElasticGraph::GraphQL::MonkeyPatches::SchemaFieldVisibilityDecorator
+    end
+  end
+end

data/lib/elastic_graph/graphql/monkey_patches/schema_object.rb ADDED Viewed

@@ -0,0 +1,48 @@
+# Copyright 2024 Block, Inc.
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+#
+# frozen_string_literal: true
+require "graphql"
+module ElasticGraph
+  class GraphQL
+    module MonkeyPatches
+      # This module is designed to monkey patch `GraphQL::Schema::Object`, but to do so in a
+      # conservative, safe way:
+      #
+      # - It defines no new methods.
+      # - It delegates to the original implementation with `super` unless we are sure that a type should be hidden.
+      # - It only changes the behavior for ElasticGraph schemas (as indicated by `:elastic_graph_schema` in the `context`).
+      module SchemaObjectVisibilityDecorator
+        def visible?(context)
+          if context[:elastic_graph_schema]&.type_named(graphql_name)&.hidden_from_queries?
+            context[:elastic_graph_query_tracker].record_hidden_type(graphql_name)
+            return false
+          end
+          super
+        end
+      end
+    end
+  end
+end
+# As per https://graphql-ruby.org/authorization/visibility.html, the public API
+# provided by the GraphQL gem to control visibility of object types is to define
+# a `visible?` class method on each of your type classes. However, because we load
+# our schema from an SDL definition rather than defining classes for each schema
+# type, we don't have a way to define the `visible?` on each of our type classes.
+#
+# So, here we solve this a slightly different way: we prepend a module onto
+# the `GraphQL::Schema::Object` singleton class. This allows our module to
+# act like a decorator and intercept calls to `visible?` so that it can hide
+# types as needed. This works because all types must be defined as subclasses
+# of `GraphQL::Schema::Object`, and in fact the GraphQL gem defined anonymous
+# subclasses for each type in our SDL schema, as you can see here:
+#
+# https://github.com/rmosolgo/graphql-ruby/blob/v1.12.16/lib/graphql/schema/build_from_definition.rb#L312
+GraphQL::Schema::Object.singleton_class.prepend ElasticGraph::GraphQL::MonkeyPatches::SchemaObjectVisibilityDecorator

data/lib/elastic_graph/graphql/query_adapter/filters.rb ADDED Viewed

@@ -0,0 +1,161 @@
+# Copyright 2024 Block, Inc.
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+#
+# frozen_string_literal: true
+require "elastic_graph/constants"
+require "elastic_graph/graphql/filtering/filter_value_set_extractor"
+require "elastic_graph/support/memoizable_data"
+module ElasticGraph
+  class GraphQL
+    class QueryAdapter
+      class Filters < Support::MemoizableData.define(:schema_element_names, :filter_args_translator)
+        def call(field:, query:, args:, lookahead:, context:)
+          filter_from_args = filter_args_translator.translate_filter_args(field: field, args: args)
+          automatic_filter = build_automatic_filter(filter_from_args: filter_from_args, query: query)
+          filters = [filter_from_args, automatic_filter].compact
+          return query if filters.empty?
+          query.merge_with(filters: filters)
+        end
+        private
+        def build_automatic_filter(filter_from_args:, query:)
+          # If an incomplete document could be hit by a search with our filters against any of the
+          # index definitions, we must add a filter that will exclude incomplete documents.
+          exclude_incomplete_docs_filter if query
+            .search_index_definitions
+            .any? { |index_def| search_could_hit_incomplete_docs?(index_def, filter_from_args || {}) }
+        end
+        def exclude_incomplete_docs_filter
+          {"__sources" => {schema_element_names.equal_to_any_of => [SELF_RELATIONSHIP_NAME]}}
+        end
+        # Indicates if a search against the given `index_def` using the given `filter_from_args`
+        # could hit an incomplete document.
+        def search_could_hit_incomplete_docs?(index_def, filter_from_args)
+          # If the index definition doesn't allow any searches to hit incomplete documents, we
+          # can immediately return `false` without checking the filters.
+          return false unless index_def.searches_could_hit_incomplete_docs?
+          # ...otherwise, we have to look at how we are filtering. An incomplete document will have `null`
+          # values for all fields with a `SELF_RELATIONSHIP_NAME` source. Therefore, if we filter on a
+          # self-sourced field in a way that excludes documents with a `null` value, the search cannot
+          # hit incomplete documents. However, when in doubt we'd rather return `true` as that's the safer
+          # value to return (no bugs will result from returning `true` when we could have returned `false`,
+          # but the query may not be as efficient as we'd like).
+          #
+          # Here we determine what field paths we need to check (e.g. only those field paths that are against
+          # self-sourced fields).
+          paths_to_check = determine_paths_to_check(filter_from_args, index_def.fields_by_path)
+          # If we have no paths to check, then our filters don't exclude incomplete documents and we must return `true`.
+          return true if paths_to_check.empty?
+          # Finally, we look over each path. If all our filters allow the search to match documents that have `nil`
+          # at that path, then the search can hit incomplete documents. But if even one path excludes documents
+          # that have a `null` value for the field, we can safely return `false` for a more efficient query.
+          paths_to_check.all? { |path| can_match_nil_values_at?(path, filter_from_args) }
+        end
+        # Figures out which field paths we need to check to see if a filter on it could match an incomplete document.
+        # This method returns the set intersection of:
+        #
+        # - The field paths we are filtering on.
+        # - The field paths that are sourced from `SELF_RELATIONSHIP_NAME`.
+        def determine_paths_to_check(filter, index_fields_by_path, parent_path: "")
+          filter.compact.flat_map do |field_name, value|
+            path = parent_path + field_name
+            if (index_field = index_fields_by_path[path])
+              # We've recursed down to a field path. We want that path to be returned if the
+              # field is sourced from SELF_RELATIONSHIP_NAME.
+              (index_field.source == SELF_RELATIONSHIP_NAME) ? [path] : []
+            elsif field_name == schema_element_names.any_of
+              # `any_of` represents an OR and the value will be an array, so we have to flat map over it.
+              value.flat_map do |sub_filter|
+                determine_paths_to_check(sub_filter, index_fields_by_path, parent_path: parent_path)
+              end
+            elsif field_name == schema_element_names.not
+              # While `not` represents negation, we don't have to negate anything here because the negation
+              # is handled later (when we use `filter_value_set_extractor`). Here we are just determining the
+              # paths to check. We want to recurse without adding `not` to the `parent_path` since it's not
+              # part of the field path.
+              determine_paths_to_check(value, index_fields_by_path, parent_path: parent_path)
+            else
+              # ...otherwise, `field_name` is a parent field and we need to recurse down through the children.
+              determine_paths_to_check(value, index_fields_by_path, parent_path: "#{path}.")
+            end
+          end
+        end
+        # Indicates if the given `filter` can match `nil` values at the given `path`. We rely
+        # on `filter_value_set_extractor` to determine it, since it understands the semantics
+        # of `any_of`, `not`, etc.
+        def can_match_nil_values_at?(path, filter)
+          filter_value_set_extractor.extract_filter_value_set([filter], [path]).includes_nil?
+        end
+        def filter_value_set_extractor
+          @filter_value_set_extractor ||=
+            Filtering::FilterValueSetExtractor.new(schema_element_names, IncludesNilSet) do |operator, filter_value|
+              if operator == :equal_to_any_of && filter_value.include?(nil)
+                IncludesNilSet
+              else
+                ExcludesNilSet
+              end
+            end
+        end
+        # Mixin for use with our set implementations that only care about if `nil` is an included value or not.
+        module NilFocusedSet
+          def union(other)
+            (includes_nil? || other.includes_nil?) ? IncludesNilSet : ExcludesNilSet
+          end
+          def intersection(other)
+            (includes_nil? && other.includes_nil?) ? IncludesNilSet : ExcludesNilSet
+          end
+        end
+        # A representation of a set that includes `nil`.
+        module IncludesNilSet
+          extend NilFocusedSet
+          # Methods provided by `extend NilFocusedSet`
+          # @dynamic self.union, self.intersection
+          def self.negate
+            ExcludesNilSet
+          end
+          def self.includes_nil?
+            true
+          end
+        end
+        # A representation of a set that excludes `nil`.
+        module ExcludesNilSet
+          extend NilFocusedSet
+          # Methods provided by `extend NilFocusedSet`
+          # @dynamic self.union, self.intersection
+          def self.negate
+            IncludesNilSet
+          end
+          def self.includes_nil?
+            false
+          end
+        end
+      end
+    end
+  end
+end

data/lib/elastic_graph/graphql/query_adapter/pagination.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# Copyright 2024 Block, Inc.
+#
+# Use of this source code is governed by an MIT-style
+# license that can be found in the LICENSE file or at
+# https://opensource.org/licenses/MIT.
+#
+# frozen_string_literal: true
+module ElasticGraph
+  class GraphQL
+    class QueryAdapter
+      # Note: This class is not tested directly but indirectly through specs on `QueryAdapter`
+      Pagination = Data.define(:schema_element_names) do
+        # @implements Pagination
+        def call(query:, args:, lookahead:, field:, context:)
+          return query unless field.type.unwrap_fully.indexed_document?
+          document_pagination = [:first, :before, :last, :after].to_h do |key|
+            [key, args[schema_element_names.public_send(key)]]
+          end
+          query.merge_with(document_pagination: document_pagination)
+        end
+      end
+    end
+  end
+end