graphql 2.0.27 → 2.2.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (130) hide show
  1. checksums.yaml +4 -4
  2. data/lib/generators/graphql/install/templates/base_mutation.erb +2 -0
  3. data/lib/generators/graphql/install/templates/mutation_type.erb +2 -0
  4. data/lib/generators/graphql/install_generator.rb +3 -0
  5. data/lib/generators/graphql/templates/base_argument.erb +2 -0
  6. data/lib/generators/graphql/templates/base_connection.erb +2 -0
  7. data/lib/generators/graphql/templates/base_edge.erb +2 -0
  8. data/lib/generators/graphql/templates/base_enum.erb +2 -0
  9. data/lib/generators/graphql/templates/base_field.erb +2 -0
  10. data/lib/generators/graphql/templates/base_input_object.erb +2 -0
  11. data/lib/generators/graphql/templates/base_interface.erb +2 -0
  12. data/lib/generators/graphql/templates/base_object.erb +2 -0
  13. data/lib/generators/graphql/templates/base_resolver.erb +6 -0
  14. data/lib/generators/graphql/templates/base_scalar.erb +2 -0
  15. data/lib/generators/graphql/templates/base_union.erb +2 -0
  16. data/lib/generators/graphql/templates/graphql_controller.erb +2 -0
  17. data/lib/generators/graphql/templates/loader.erb +2 -0
  18. data/lib/generators/graphql/templates/mutation.erb +2 -0
  19. data/lib/generators/graphql/templates/node_type.erb +2 -0
  20. data/lib/generators/graphql/templates/query_type.erb +2 -0
  21. data/lib/generators/graphql/templates/schema.erb +2 -0
  22. data/lib/graphql/analysis/ast/analyzer.rb +7 -0
  23. data/lib/graphql/analysis/ast/field_usage.rb +32 -7
  24. data/lib/graphql/analysis/ast/query_complexity.rb +80 -128
  25. data/lib/graphql/analysis/ast/query_depth.rb +7 -2
  26. data/lib/graphql/analysis/ast/visitor.rb +2 -2
  27. data/lib/graphql/analysis/ast.rb +21 -11
  28. data/lib/graphql/backtrace/trace.rb +12 -15
  29. data/lib/graphql/coercion_error.rb +1 -9
  30. data/lib/graphql/dataloader/async_dataloader.rb +85 -0
  31. data/lib/graphql/dataloader/source.rb +11 -3
  32. data/lib/graphql/dataloader.rb +109 -142
  33. data/lib/graphql/duration_encoding_error.rb +16 -0
  34. data/lib/graphql/execution/interpreter/runtime/graphql_result.rb +170 -0
  35. data/lib/graphql/execution/interpreter/runtime.rb +70 -248
  36. data/lib/graphql/execution/interpreter.rb +91 -157
  37. data/lib/graphql/execution/lookahead.rb +88 -21
  38. data/lib/graphql/introspection/dynamic_fields.rb +1 -1
  39. data/lib/graphql/introspection/entry_points.rb +11 -5
  40. data/lib/graphql/introspection/schema_type.rb +3 -1
  41. data/lib/graphql/language/block_string.rb +34 -18
  42. data/lib/graphql/language/definition_slice.rb +1 -1
  43. data/lib/graphql/language/document_from_schema_definition.rb +37 -37
  44. data/lib/graphql/language/lexer.rb +271 -177
  45. data/lib/graphql/language/nodes.rb +74 -56
  46. data/lib/graphql/language/parser.rb +697 -1986
  47. data/lib/graphql/language/printer.rb +299 -146
  48. data/lib/graphql/language/sanitized_printer.rb +20 -22
  49. data/lib/graphql/language/static_visitor.rb +167 -0
  50. data/lib/graphql/language/visitor.rb +20 -81
  51. data/lib/graphql/language.rb +1 -0
  52. data/lib/graphql/load_application_object_failed_error.rb +5 -1
  53. data/lib/graphql/pagination/array_connection.rb +3 -3
  54. data/lib/graphql/pagination/connection.rb +28 -1
  55. data/lib/graphql/pagination/mongoid_relation_connection.rb +1 -2
  56. data/lib/graphql/pagination/relation_connection.rb +3 -3
  57. data/lib/graphql/query/context/scoped_context.rb +101 -0
  58. data/lib/graphql/query/context.rb +36 -98
  59. data/lib/graphql/query/null_context.rb +4 -11
  60. data/lib/graphql/query/validation_pipeline.rb +2 -2
  61. data/lib/graphql/query/variables.rb +3 -3
  62. data/lib/graphql/query.rb +13 -22
  63. data/lib/graphql/railtie.rb +9 -6
  64. data/lib/graphql/rake_task.rb +3 -12
  65. data/lib/graphql/schema/argument.rb +6 -1
  66. data/lib/graphql/schema/build_from_definition.rb +0 -11
  67. data/lib/graphql/schema/directive/one_of.rb +12 -0
  68. data/lib/graphql/schema/directive/specified_by.rb +14 -0
  69. data/lib/graphql/schema/directive.rb +1 -1
  70. data/lib/graphql/schema/enum.rb +3 -3
  71. data/lib/graphql/schema/field/connection_extension.rb +1 -15
  72. data/lib/graphql/schema/field/scope_extension.rb +8 -1
  73. data/lib/graphql/schema/field.rb +8 -5
  74. data/lib/graphql/schema/has_single_input_argument.rb +156 -0
  75. data/lib/graphql/schema/input_object.rb +2 -2
  76. data/lib/graphql/schema/interface.rb +10 -10
  77. data/lib/graphql/schema/introspection_system.rb +2 -0
  78. data/lib/graphql/schema/loader.rb +0 -2
  79. data/lib/graphql/schema/member/base_dsl_methods.rb +2 -1
  80. data/lib/graphql/schema/member/has_arguments.rb +61 -38
  81. data/lib/graphql/schema/member/has_fields.rb +8 -5
  82. data/lib/graphql/schema/member/has_interfaces.rb +23 -9
  83. data/lib/graphql/schema/member/scoped.rb +19 -0
  84. data/lib/graphql/schema/member/validates_input.rb +3 -3
  85. data/lib/graphql/schema/object.rb +8 -0
  86. data/lib/graphql/schema/printer.rb +8 -7
  87. data/lib/graphql/schema/relay_classic_mutation.rb +6 -128
  88. data/lib/graphql/schema/resolver.rb +7 -3
  89. data/lib/graphql/schema/scalar.rb +3 -3
  90. data/lib/graphql/schema/subscription.rb +11 -4
  91. data/lib/graphql/schema/union.rb +1 -1
  92. data/lib/graphql/schema/warden.rb +96 -94
  93. data/lib/graphql/schema.rb +219 -72
  94. data/lib/graphql/static_validation/all_rules.rb +1 -1
  95. data/lib/graphql/static_validation/base_visitor.rb +1 -1
  96. data/lib/graphql/static_validation/literal_validator.rb +1 -1
  97. data/lib/graphql/static_validation/rules/fields_will_merge.rb +1 -1
  98. data/lib/graphql/static_validation/rules/required_arguments_are_present.rb +1 -1
  99. data/lib/graphql/static_validation/rules/required_input_object_attributes_are_present.rb +1 -1
  100. data/lib/graphql/static_validation/validation_context.rb +5 -5
  101. data/lib/graphql/static_validation/validator.rb +3 -0
  102. data/lib/graphql/static_validation.rb +0 -1
  103. data/lib/graphql/subscriptions/action_cable_subscriptions.rb +3 -2
  104. data/lib/graphql/subscriptions/event.rb +8 -2
  105. data/lib/graphql/subscriptions.rb +14 -12
  106. data/lib/graphql/testing/helpers.rb +125 -0
  107. data/lib/graphql/testing.rb +2 -0
  108. data/lib/graphql/tracing/appoptics_trace.rb +2 -2
  109. data/lib/graphql/tracing/appoptics_tracing.rb +2 -2
  110. data/lib/graphql/tracing/data_dog_trace.rb +21 -34
  111. data/lib/graphql/tracing/data_dog_tracing.rb +7 -21
  112. data/lib/graphql/tracing/legacy_hooks_trace.rb +74 -0
  113. data/lib/graphql/tracing/platform_tracing.rb +2 -0
  114. data/lib/graphql/tracing/{prometheus_tracing → prometheus_trace}/graphql_collector.rb +3 -1
  115. data/lib/graphql/tracing/sentry_trace.rb +94 -0
  116. data/lib/graphql/tracing/trace.rb +1 -0
  117. data/lib/graphql/tracing.rb +3 -1
  118. data/lib/graphql/types/iso_8601_duration.rb +77 -0
  119. data/lib/graphql/types/relay/connection_behaviors.rb +32 -2
  120. data/lib/graphql/types/relay/edge_behaviors.rb +7 -0
  121. data/lib/graphql/types.rb +1 -0
  122. data/lib/graphql/version.rb +1 -1
  123. data/lib/graphql.rb +3 -3
  124. data/readme.md +12 -2
  125. metadata +33 -25
  126. data/lib/graphql/deprecation.rb +0 -9
  127. data/lib/graphql/filter.rb +0 -59
  128. data/lib/graphql/language/parser.y +0 -560
  129. data/lib/graphql/static_validation/type_stack.rb +0 -216
  130. data/lib/graphql/subscriptions/instrumentation.rb +0 -28
@@ -14,7 +14,7 @@ module GraphQL
14
14
  # @param include_built_in_directives [Boolean] Whether or not to include built in directives in the AST
15
15
  class DocumentFromSchemaDefinition
16
16
  def initialize(
17
- schema, context: nil, only: nil, except: nil, include_introspection_types: false,
17
+ schema, context: nil, include_introspection_types: false,
18
18
  include_built_in_directives: false, include_built_in_scalars: false, always_include_schema: false
19
19
  )
20
20
  @schema = schema
@@ -26,21 +26,11 @@ module GraphQL
26
26
 
27
27
  schema_context = schema.context_class.new(query: nil, object: nil, schema: schema, values: context)
28
28
 
29
- @warden = if only || except
30
- filter = GraphQL::Filter
31
- .new(only: only, except: except)
32
- .merge(only: @schema.method(:visible?))
33
- GraphQL::Schema::Warden.new(
34
- filter,
35
- schema: @schema,
36
- context: schema_context,
37
- )
38
- else
39
- @schema.warden_class.new(
40
- schema: @schema,
41
- context: schema_context,
42
- )
43
- end
29
+
30
+ @warden = @schema.warden_class.new(
31
+ schema: @schema,
32
+ context: schema_context,
33
+ )
44
34
 
45
35
  schema_context.warden = @warden
46
36
  end
@@ -52,24 +42,30 @@ module GraphQL
52
42
  end
53
43
 
54
44
  def build_schema_node
55
- schema_options = {
56
- # `@schema.directives` is covered by `build_definition_nodes`
57
- directives: definition_directives(@schema, :schema_directives),
58
- }
59
45
  if !schema_respects_root_name_conventions?(@schema)
60
- schema_options.merge!({
46
+ GraphQL::Language::Nodes::SchemaDefinition.new(
61
47
  query: (q = warden.root_type_for_operation("query")) && q.graphql_name,
62
48
  mutation: (m = warden.root_type_for_operation("mutation")) && m.graphql_name,
63
49
  subscription: (s = warden.root_type_for_operation("subscription")) && s.graphql_name,
64
- })
50
+ directives: definition_directives(@schema, :schema_directives)
51
+ )
52
+ else
53
+ # A plain `schema ...` _must_ include root type definitions.
54
+ # If the only difference is directives, then you have to use `extend schema`
55
+ GraphQL::Language::Nodes::SchemaExtension.new(directives: definition_directives(@schema, :schema_directives))
65
56
  end
66
- GraphQL::Language::Nodes::SchemaDefinition.new(schema_options)
67
57
  end
68
58
 
69
59
  def build_object_type_node(object_type)
60
+ ints = warden.interfaces(object_type)
61
+ if ints.any?
62
+ ints.sort_by!(&:graphql_name)
63
+ ints.map! { |iface| build_type_name_node(iface) }
64
+ end
65
+
70
66
  GraphQL::Language::Nodes::ObjectTypeDefinition.new(
71
67
  name: object_type.graphql_name,
72
- interfaces: warden.interfaces(object_type).sort_by(&:graphql_name).map { |iface| build_type_name_node(iface) },
68
+ interfaces: ints,
73
69
  fields: build_field_nodes(warden.fields(object_type)),
74
70
  description: object_type.description,
75
71
  directives: directives(object_type),
@@ -190,7 +186,8 @@ module GraphQL
190
186
  of_type: build_type_name_node(type.of_type)
191
187
  )
192
188
  else
193
- GraphQL::Language::Nodes::TypeName.new(name: type.graphql_name)
189
+ @cached_type_name_nodes ||= {}
190
+ @cached_type_name_nodes[type.graphql_name] ||= GraphQL::Language::Nodes::TypeName.new(name: type.graphql_name)
194
191
  end
195
192
  end
196
193
 
@@ -247,9 +244,13 @@ module GraphQL
247
244
  end
248
245
 
249
246
  def build_argument_nodes(arguments)
250
- arguments
251
- .map { |arg| build_argument_node(arg) }
252
- .sort_by(&:name)
247
+ if arguments.any?
248
+ nodes = arguments.map { |arg| build_argument_node(arg) }
249
+ nodes.sort_by!(&:name)
250
+ nodes
251
+ else
252
+ arguments
253
+ end
253
254
  end
254
255
 
255
256
  def build_directive_nodes(directives)
@@ -263,16 +264,15 @@ module GraphQL
263
264
  if !include_built_in_directives
264
265
  dirs_to_build = dirs_to_build.reject { |directive| directive.default_directive? }
265
266
  end
266
- dir_nodes = build_directive_nodes(dirs_to_build)
267
-
268
- type_nodes = build_type_definition_nodes(warden.reachable_types)
267
+ definitions = build_directive_nodes(dirs_to_build)
269
268
 
269
+ type_nodes = build_type_definition_nodes(warden.reachable_types + schema.extra_types)
270
270
  if @include_one_of
271
271
  # This may have been set to true when iterating over all types
272
- dir_nodes.concat(build_directive_nodes([GraphQL::Schema::Directive::OneOf]))
272
+ definitions.concat(build_directive_nodes([GraphQL::Schema::Directive::OneOf]))
273
273
  end
274
274
 
275
- definitions = [*dir_nodes, *type_nodes]
275
+ definitions.concat(type_nodes)
276
276
  if include_schema_node?
277
277
  definitions.unshift(build_schema_node)
278
278
  end
@@ -295,9 +295,9 @@ module GraphQL
295
295
  end
296
296
 
297
297
  def build_field_nodes(fields)
298
- fields
299
- .map { |field| build_field_node(field) }
300
- .sort_by(&:name)
298
+ f_nodes = fields.map { |field| build_field_node(field) }
299
+ f_nodes.sort_by!(&:name)
300
+ f_nodes
301
301
  end
302
302
 
303
303
  private
@@ -320,7 +320,7 @@ module GraphQL
320
320
 
321
321
  def definition_directives(member, directives_method)
322
322
  dirs = if !member.respond_to?(directives_method) || member.directives.empty?
323
- []
323
+ EmptyObjects::EMPTY_ARRAY
324
324
  else
325
325
  member.public_send(directives_method).map do |dir|
326
326
  args = []
@@ -1,39 +1,228 @@
1
1
  # frozen_string_literal: true
2
-
3
- require "strscan"
4
-
5
2
  module GraphQL
6
3
  module Language
4
+
7
5
  class Lexer
8
- IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*/
9
- NEWLINE = /[\c\r\n]/
10
- BLANK = /[, \t]+/
11
- COMMENT = /#[^\n\r]*/
12
- INT = /[-]?(?:[0]|[1-9][0-9]*)/
13
- FLOAT_DECIMAL = /[.][0-9]+/
14
- FLOAT_EXP = /[eE][+-]?[0-9]+/
15
- FLOAT = /#{INT}(#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP})/
16
-
17
- module Literals
18
- ON = /on\b/
19
- FRAGMENT = /fragment\b/
20
- TRUE = /true\b/
21
- FALSE = /false\b/
22
- NULL = /null\b/
23
- QUERY = /query\b/
24
- MUTATION = /mutation\b/
25
- SUBSCRIPTION = /subscription\b/
26
- SCHEMA = /schema\b/
27
- SCALAR = /scalar\b/
28
- TYPE = /type\b/
29
- EXTEND = /extend\b/
30
- IMPLEMENTS = /implements\b/
31
- INTERFACE = /interface\b/
32
- UNION = /union\b/
33
- ENUM = /enum\b/
34
- INPUT = /input\b/
35
- DIRECTIVE = /directive\b/
36
- REPEATABLE = /repeatable\b/
6
+ def initialize(graphql_str, filename: nil)
7
+ if !(graphql_str.encoding == Encoding::UTF_8 || graphql_str.ascii_only?)
8
+ graphql_str = graphql_str.dup.force_encoding(Encoding::UTF_8)
9
+ end
10
+ @string = graphql_str
11
+ @filename = filename
12
+ @scanner = StringScanner.new(graphql_str)
13
+ @pos = nil
14
+ end
15
+
16
+ def eos?
17
+ @scanner.eos?
18
+ end
19
+
20
+ attr_reader :pos
21
+
22
+ def advance
23
+ @scanner.skip(IGNORE_REGEXP)
24
+ return false if @scanner.eos?
25
+ @pos = @scanner.pos
26
+ next_byte = @string.getbyte(@pos)
27
+ next_byte_is_for = FIRST_BYTES[next_byte]
28
+ case next_byte_is_for
29
+ when ByteFor::PUNCTUATION
30
+ @scanner.pos += 1
31
+ PUNCTUATION_NAME_FOR_BYTE[next_byte]
32
+ when ByteFor::NAME
33
+ if len = @scanner.skip(KEYWORD_REGEXP)
34
+ case len
35
+ when 2
36
+ :ON
37
+ when 12
38
+ :SUBSCRIPTION
39
+ else
40
+ pos = @pos
41
+
42
+ # Use bytes 2 and 3 as a unique identifier for this keyword
43
+ bytes = (@string.getbyte(pos + 2) << 8) | @string.getbyte(pos + 1)
44
+ KEYWORD_BY_TWO_BYTES[_hash(bytes)]
45
+ end
46
+ else
47
+ @scanner.skip(IDENTIFIER_REGEXP)
48
+ :IDENTIFIER
49
+ end
50
+ when ByteFor::IDENTIFIER
51
+ @scanner.skip(IDENTIFIER_REGEXP)
52
+ :IDENTIFIER
53
+ when ByteFor::NUMBER
54
+ @scanner.skip(NUMERIC_REGEXP)
55
+ # Check for a matched decimal:
56
+ @scanner[1] ? :FLOAT : :INT
57
+ when ByteFor::ELLIPSIS
58
+ if @string.getbyte(@pos + 1) != 46 || @string.getbyte(@pos + 2) != 46
59
+ raise_parse_error("Expected `...`, actual: #{@string[@pos..@pos + 2].inspect}")
60
+ end
61
+ @scanner.pos += 3
62
+ :ELLIPSIS
63
+ when ByteFor::STRING
64
+ if @scanner.skip(BLOCK_STRING_REGEXP) || @scanner.skip(QUOTED_STRING_REGEXP)
65
+ :STRING
66
+ else
67
+ raise_parse_error("Expected string or block string, but it was malformed")
68
+ end
69
+ else
70
+ @scanner.pos += 1
71
+ :UNKNOWN_CHAR
72
+ end
73
+ rescue ArgumentError => err
74
+ if err.message == "invalid byte sequence in UTF-8"
75
+ raise_parse_error("Parse error on bad Unicode escape sequence", nil, nil)
76
+ end
77
+ end
78
+
79
+ def token_value
80
+ @string.byteslice(@scanner.pos - @scanner.matched_size, @scanner.matched_size)
81
+ rescue StandardError => err
82
+ raise GraphQL::Error, "(token_value failed: #{err.class}: #{err.message})"
83
+ end
84
+
85
+ def debug_token_value(token_name)
86
+ if token_name && Lexer::Punctuation.const_defined?(token_name)
87
+ Lexer::Punctuation.const_get(token_name)
88
+ elsif token_name == :ELLIPSIS
89
+ "..."
90
+ elsif token_name == :STRING
91
+ string_value
92
+ else
93
+ token_value
94
+ end
95
+ end
96
+
97
+ ESCAPES = /\\["\\\/bfnrt]/
98
+ ESCAPES_REPLACE = {
99
+ '\\"' => '"',
100
+ "\\\\" => "\\",
101
+ "\\/" => '/',
102
+ "\\b" => "\b",
103
+ "\\f" => "\f",
104
+ "\\n" => "\n",
105
+ "\\r" => "\r",
106
+ "\\t" => "\t",
107
+ }
108
+ UTF_8 = /\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i
109
+ VALID_STRING = /\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
110
+
111
+ def string_value
112
+ str = token_value
113
+ is_block = str.start_with?('"""')
114
+ if is_block
115
+ str.gsub!(/\A"""|"""\z/, '')
116
+ else
117
+ str.gsub!(/\A"|"\z/, '')
118
+ end
119
+
120
+ if is_block
121
+ str = Language::BlockString.trim_whitespace(str)
122
+ end
123
+
124
+ if !str.valid_encoding? || !str.match?(VALID_STRING)
125
+ raise_parse_error("Bad unicode escape in #{str.inspect}")
126
+ else
127
+ Lexer.replace_escaped_characters_in_place(str)
128
+
129
+ if !str.valid_encoding?
130
+ raise_parse_error("Bad unicode escape in #{str.inspect}")
131
+ else
132
+ str
133
+ end
134
+ end
135
+ end
136
+
137
+ def line_number
138
+ @scanner.string[0..@pos].count("\n") + 1
139
+ end
140
+
141
+ def column_number
142
+ @scanner.string[0..@pos].split("\n").last.length
143
+ end
144
+
145
+ def raise_parse_error(message, line = line_number, col = column_number)
146
+ raise GraphQL::ParseError.new(message, line, col, @string, filename: @filename)
147
+ end
148
+
149
+ IGNORE_REGEXP = %r{
150
+ (?:
151
+ [, \c\r\n\t]+ |
152
+ \#.*$
153
+ )*
154
+ }x
155
+ IDENTIFIER_REGEXP = /[_A-Za-z][_0-9A-Za-z]*/
156
+ INT_REGEXP = /-?(?:[0]|[1-9][0-9]*)/
157
+ FLOAT_DECIMAL_REGEXP = /[.][0-9]+/
158
+ FLOAT_EXP_REGEXP = /[eE][+-]?[0-9]+/
159
+ NUMERIC_REGEXP = /#{INT_REGEXP}(#{FLOAT_DECIMAL_REGEXP}#{FLOAT_EXP_REGEXP}|#{FLOAT_DECIMAL_REGEXP}|#{FLOAT_EXP_REGEXP})?/
160
+
161
+ KEYWORDS = [
162
+ "on",
163
+ "fragment",
164
+ "true",
165
+ "false",
166
+ "null",
167
+ "query",
168
+ "mutation",
169
+ "subscription",
170
+ "schema",
171
+ "scalar",
172
+ "type",
173
+ "extend",
174
+ "implements",
175
+ "interface",
176
+ "union",
177
+ "enum",
178
+ "input",
179
+ "directive",
180
+ "repeatable"
181
+ ].freeze
182
+
183
+ KEYWORD_REGEXP = /#{Regexp.union(KEYWORDS.sort)}\b/
184
+ KEYWORD_BY_TWO_BYTES = [
185
+ :INTERFACE,
186
+ :MUTATION,
187
+ :EXTEND,
188
+ :FALSE,
189
+ :ENUM,
190
+ :TRUE,
191
+ :NULL,
192
+ nil,
193
+ nil,
194
+ nil,
195
+ nil,
196
+ nil,
197
+ nil,
198
+ nil,
199
+ :QUERY,
200
+ nil,
201
+ nil,
202
+ :REPEATABLE,
203
+ :IMPLEMENTS,
204
+ :INPUT,
205
+ :TYPE,
206
+ :SCHEMA,
207
+ nil,
208
+ nil,
209
+ nil,
210
+ :DIRECTIVE,
211
+ :UNION,
212
+ nil,
213
+ nil,
214
+ :SCALAR,
215
+ nil,
216
+ :FRAGMENT
217
+ ]
218
+
219
+ # This produces a unique integer for bytes 2 and 3 of each keyword string
220
+ # See https://tenderlovemaking.com/2023/09/02/fast-tokenizers-with-stringscanner.html
221
+ def _hash key
222
+ (key * 18592990) >> 27 & 0x1f
223
+ end
224
+
225
+ module Punctuation
37
226
  LCURLY = '{'
38
227
  RCURLY = '}'
39
228
  LPAREN = '('
@@ -43,36 +232,31 @@ module GraphQL
43
232
  COLON = ':'
44
233
  VAR_SIGN = '$'
45
234
  DIR_SIGN = '@'
46
- ELLIPSIS = '...'
47
235
  EQUALS = '='
48
236
  BANG = '!'
49
237
  PIPE = '|'
50
238
  AMP = '&'
51
239
  end
52
240
 
53
- include Literals
241
+ # A sparse array mapping the bytes for each punctuation
242
+ # to a symbol name for that punctuation
243
+ PUNCTUATION_NAME_FOR_BYTE = Punctuation.constants.each_with_object([]) { |name, arr|
244
+ punct = Punctuation.const_get(name)
245
+ arr[punct.ord] = name
246
+ }
54
247
 
55
248
  QUOTE = '"'
56
249
  UNICODE_DIGIT = /[0-9A-Za-z]/
57
250
  FOUR_DIGIT_UNICODE = /#{UNICODE_DIGIT}{4}/
58
- N_DIGIT_UNICODE = %r{#{LCURLY}#{UNICODE_DIGIT}{4,}#{RCURLY}}x
251
+ N_DIGIT_UNICODE = %r{#{Punctuation::LCURLY}#{UNICODE_DIGIT}{4,}#{Punctuation::RCURLY}}x
59
252
  UNICODE_ESCAPE = %r{\\u(?:#{FOUR_DIGIT_UNICODE}|#{N_DIGIT_UNICODE})}
60
- # # https://graphql.github.io/graphql-spec/June2018/#sec-String-Value
253
+ # # https://graphql.github.io/graphql-spec/June2018/#sec-String-Value
61
254
  STRING_ESCAPE = %r{[\\][\\/bfnrt]}
62
255
  BLOCK_QUOTE = '"""'
63
256
  ESCAPED_QUOTE = /\\"/;
64
257
  STRING_CHAR = /#{ESCAPED_QUOTE}|[^"\\]|#{UNICODE_ESCAPE}|#{STRING_ESCAPE}/
65
-
66
- LIT_NAME_LUT = Literals.constants.each_with_object({}) { |n, o|
67
- key = Literals.const_get(n)
68
- key = key.is_a?(Regexp) ? key.source.gsub(/(\\b|\\)/, '') : key
69
- o[key] = n
70
- }
71
-
72
- LIT = Regexp.union(Literals.constants.map { |n| Literals.const_get(n) })
73
-
74
- QUOTED_STRING = %r{#{QUOTE} (?:#{STRING_CHAR})* #{QUOTE}}x
75
- BLOCK_STRING = %r{
258
+ QUOTED_STRING_REGEXP = %r{#{QUOTE} (?:#{STRING_CHAR})* #{QUOTE}}x
259
+ BLOCK_STRING_REGEXP = %r{
76
260
  #{BLOCK_QUOTE}
77
261
  (?: [^"\\] | # Any characters that aren't a quote or slash
78
262
  (?<!") ["]{1,2} (?!") | # Any quotes that don't have quotes next to them
@@ -84,85 +268,33 @@ module GraphQL
84
268
  #{BLOCK_QUOTE}
85
269
  }xm
86
270
 
87
- # # catch-all for anything else. must be at the bottom for precedence.
88
- UNKNOWN_CHAR = /./
89
-
90
- def initialize(value)
91
- @line = 1
92
- @col = 1
93
- @previous_token = nil
94
-
95
- @scan = scanner value
271
+ # Use this array to check, for a given byte that will start a token,
272
+ # what kind of token might it start?
273
+ FIRST_BYTES = Array.new(255)
274
+
275
+ module ByteFor
276
+ NUMBER = 0 # int or float
277
+ NAME = 1 # identifier or keyword
278
+ STRING = 2
279
+ ELLIPSIS = 3
280
+ IDENTIFIER = 4 # identifier, *not* a keyword
281
+ PUNCTUATION = 5
96
282
  end
97
283
 
98
- class BadEncoding < Lexer # :nodoc:
99
- def scanner(value)
100
- [emit(:BAD_UNICODE_ESCAPE, 0, 0, value)]
101
- end
102
-
103
- def next_token
104
- @scan.pop
105
- end
284
+ (0..9).each { |i| FIRST_BYTES[i.to_s.ord] = ByteFor::NUMBER }
285
+ FIRST_BYTES["-".ord] = ByteFor::NUMBER
286
+ # Some of these may be overwritten below, if keywords start with the same character
287
+ ("A".."Z").each { |char| FIRST_BYTES[char.ord] = ByteFor::IDENTIFIER }
288
+ ("a".."z").each { |char| FIRST_BYTES[char.ord] = ByteFor::IDENTIFIER }
289
+ FIRST_BYTES['_'.ord] = ByteFor::IDENTIFIER
290
+ FIRST_BYTES['.'.ord] = ByteFor::ELLIPSIS
291
+ FIRST_BYTES['"'.ord] = ByteFor::STRING
292
+ KEYWORDS.each { |kw| FIRST_BYTES[kw.getbyte(0)] = ByteFor::NAME }
293
+ Punctuation.constants.each do |punct_name|
294
+ punct = Punctuation.const_get(punct_name)
295
+ FIRST_BYTES[punct.ord] = ByteFor::PUNCTUATION
106
296
  end
107
297
 
108
- def self.tokenize(string)
109
- value = string.dup.force_encoding(Encoding::UTF_8)
110
-
111
- scanner = if value.valid_encoding?
112
- new value
113
- else
114
- BadEncoding.new value
115
- end
116
-
117
- toks = []
118
-
119
- while tok = scanner.next_token
120
- toks << tok
121
- end
122
-
123
- toks
124
- end
125
-
126
- def next_token
127
- return if @scan.eos?
128
-
129
- pos = @scan.pos
130
-
131
- case
132
- when str = @scan.scan(FLOAT) then emit(:FLOAT, pos, @scan.pos, str)
133
- when str = @scan.scan(INT) then emit(:INT, pos, @scan.pos, str)
134
- when str = @scan.scan(LIT) then emit(LIT_NAME_LUT[str], pos, @scan.pos, -str)
135
- when str = @scan.scan(IDENTIFIER) then emit(:IDENTIFIER, pos, @scan.pos, str)
136
- when str = @scan.scan(BLOCK_STRING) then emit_block(pos, @scan.pos, str.gsub(/\A#{BLOCK_QUOTE}|#{BLOCK_QUOTE}\z/, ''))
137
- when str = @scan.scan(QUOTED_STRING) then emit_string(pos, @scan.pos, str.gsub(/^"|"$/, ''))
138
- when str = @scan.scan(COMMENT) then record_comment(pos, @scan.pos, str)
139
- when str = @scan.scan(NEWLINE)
140
- @line += 1
141
- @col = 1
142
- next_token
143
- when @scan.scan(BLANK)
144
- @col += @scan.pos - pos
145
- next_token
146
- when str = @scan.scan(UNKNOWN_CHAR) then emit(:UNKNOWN_CHAR, pos, @scan.pos, str)
147
- else
148
- # This should never happen since `UNKNOWN_CHAR` ensures we make progress
149
- raise "Unknown string?"
150
- end
151
- end
152
-
153
- def emit(token_name, ts, te, token_value)
154
- token = [
155
- token_name,
156
- @line,
157
- @col,
158
- token_value,
159
- @previous_token,
160
- ]
161
- @previous_token = token
162
- # Bump the column counter for the next token
163
- @col += te - ts
164
- token
165
- end
166
298
 
167
299
  # Replace any escaped unicode or whitespace with the _actual_ characters
168
300
  # To avoid allocating more strings, this modifies the string passed into it
@@ -190,63 +322,25 @@ module GraphQL
190
322
  nil
191
323
  end
192
324
 
193
- def record_comment(ts, te, str)
194
- token = [
195
- :COMMENT,
196
- @line,
197
- @col,
198
- str,
199
- @previous_token,
200
- ]
201
-
202
- @previous_token = token
203
-
204
- @col += te - ts
205
- next_token
206
- end
207
-
208
- ESCAPES = /\\["\\\/bfnrt]/
209
- ESCAPES_REPLACE = {
210
- '\\"' => '"',
211
- "\\\\" => "\\",
212
- "\\/" => '/',
213
- "\\b" => "\b",
214
- "\\f" => "\f",
215
- "\\n" => "\n",
216
- "\\r" => "\r",
217
- "\\t" => "\t",
218
- }
219
- UTF_8 = /\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i
220
- VALID_STRING = /\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
221
-
222
- def emit_block(ts, te, value)
223
- line_incr = value.count("\n")
224
- value = GraphQL::Language::BlockString.trim_whitespace(value)
225
- tok = emit_string(ts, te, value)
226
- @line += line_incr
227
- tok
228
- end
229
-
230
- def emit_string(ts, te, value)
231
- if !value.valid_encoding? || !value.match?(VALID_STRING)
232
- emit(:BAD_UNICODE_ESCAPE, ts, te, value)
233
- else
234
- self.class.replace_escaped_characters_in_place(value)
235
-
236
- if !value.valid_encoding?
237
- emit(:BAD_UNICODE_ESCAPE, ts, te, value)
238
- else
239
- emit(:STRING, ts, te, value)
240
- end
325
+ # This is not used during parsing because the parser
326
+ # doesn't actually need tokens.
327
+ def self.tokenize(string)
328
+ lexer = GraphQL::Language::Lexer.new(string)
329
+ tokens = []
330
+ prev_token = nil
331
+ while (token_name = lexer.advance)
332
+ new_token = [
333
+ token_name,
334
+ lexer.line_number,
335
+ lexer.column_number,
336
+ lexer.debug_token_value(token_name),
337
+ prev_token,
338
+ ]
339
+ tokens << new_token
340
+ prev_token = new_token
241
341
  end
342
+ tokens
242
343
  end
243
-
244
- private
245
-
246
- def scanner(value)
247
- StringScanner.new value
248
- end
249
-
250
344
  end
251
345
  end
252
346
  end