tinygql 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "strscan"
4
+
5
+ module TinyGQL
6
+ class Lexer
7
+ IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*/
8
+ IGNORE = %r{
9
+ (?:
10
+ [, \c\r\n\t]+ |
11
+ \#.*$
12
+ )
13
+ }x
14
+ INT = /[-]?(?:[0]|[1-9][0-9]*)/
15
+ FLOAT_DECIMAL = /[.][0-9]+/
16
+ FLOAT_EXP = /[eE][+-]?[0-9]+/
17
+ FLOAT = /#{INT}(#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP})/
18
+
19
+ module Literals
20
+ ON = /on\b/
21
+ FRAGMENT = /fragment\b/
22
+ TRUE = /true\b/
23
+ FALSE = /false\b/
24
+ NULL = /null\b/
25
+ QUERY = /query\b/
26
+ MUTATION = /mutation\b/
27
+ SUBSCRIPTION = /subscription\b/
28
+ SCHEMA = /schema\b/
29
+ SCALAR = /scalar\b/
30
+ TYPE = /type\b/
31
+ EXTEND = /extend\b/
32
+ IMPLEMENTS = /implements\b/
33
+ INTERFACE = /interface\b/
34
+ UNION = /union\b/
35
+ ENUM = /enum\b/
36
+ INPUT = /input\b/
37
+ DIRECTIVE = /directive\b/
38
+ REPEATABLE = /repeatable\b/
39
+ LCURLY = '{'
40
+ RCURLY = '}'
41
+ LPAREN = '('
42
+ RPAREN = ')'
43
+ LBRACKET = '['
44
+ RBRACKET = ']'
45
+ COLON = ':'
46
+ VAR_SIGN = '$'
47
+ DIR_SIGN = '@'
48
+ ELLIPSIS = '...'
49
+ EQUALS = '='
50
+ BANG = '!'
51
+ PIPE = '|'
52
+ AMP = '&'
53
+ end
54
+
55
+ include Literals
56
+
57
+ QUOTE = '"'
58
+ UNICODE_DIGIT = /[0-9A-Za-z]/
59
+ FOUR_DIGIT_UNICODE = /#{UNICODE_DIGIT}{4}/
60
+ N_DIGIT_UNICODE = %r{#{LCURLY}#{UNICODE_DIGIT}{4,}#{RCURLY}}x
61
+ UNICODE_ESCAPE = %r{\\u(?:#{FOUR_DIGIT_UNICODE}|#{N_DIGIT_UNICODE})}
62
+ # # https://graphql.github.io/graphql-spec/June2018/#sec-String-Value
63
+ STRING_ESCAPE = %r{[\\][\\/bfnrt]}
64
+ BLOCK_QUOTE = '"""'
65
+ ESCAPED_QUOTE = /\\"/;
66
+ STRING_CHAR = /#{ESCAPED_QUOTE}|[^"\\]|#{UNICODE_ESCAPE}|#{STRING_ESCAPE}/
67
+
68
+ LIT_NAME_LUT = Literals.constants.each_with_object({}) { |n, o|
69
+ key = Literals.const_get(n)
70
+ key = key.is_a?(Regexp) ? key.source.gsub(/(\\b|\\)/, '') : key
71
+ o[key] = n
72
+ }
73
+
74
+ LIT = Regexp.union(Literals.constants.map { |n| Literals.const_get(n) })
75
+
76
+ QUOTED_STRING = %r{#{QUOTE} ((?:#{STRING_CHAR})*) #{QUOTE}}x
77
+ BLOCK_STRING = %r{
78
+ #{BLOCK_QUOTE}
79
+ ((?: [^"\\] | # Any characters that aren't a quote or slash
80
+ (?<!") ["]{1,2} (?!") | # Any quotes that don't have quotes next to them
81
+ \\"{0,3}(?!") | # A slash followed by <= 3 quotes that aren't followed by a quote
82
+ \\ | # A slash
83
+ "{1,2}(?!") # 1 or 2 " followed by something that isn't a quote
84
+ )*
85
+ (?:"")?)
86
+ #{BLOCK_QUOTE}
87
+ }xm
88
+
89
+ # # catch-all for anything else. must be at the bottom for precedence.
90
+ UNKNOWN_CHAR = /./
91
+
92
+ def initialize string
93
+ raise unless string.valid_encoding?
94
+
95
+ @scan = StringScanner.new string
96
+ @token_name = nil
97
+ @token_value = nil
98
+ end
99
+
100
+ def line
101
+ @scan.string[0, @scan.pos].count("\n")
102
+ end
103
+
104
+ def done?
105
+ @scan.eos?
106
+ end
107
+
108
+ def advance
109
+ while true
110
+ if @scan.eos?
111
+ emit nil, nil
112
+ return false
113
+ end
114
+
115
+ case
116
+ when @scan.skip(IGNORE) then redo
117
+ when str = @scan.scan(FLOAT) then return emit(:FLOAT, str)
118
+ when str = @scan.scan(INT) then return emit(:INT, str)
119
+ when str = @scan.scan(LIT) then return emit(LIT_NAME_LUT[str], str)
120
+ when str = @scan.scan(IDENTIFIER) then return emit(:IDENTIFIER, str)
121
+ when @scan.skip(BLOCK_STRING) then return emit_block(@scan[1])
122
+ when @scan.skip(QUOTED_STRING) then return emit_string(@scan[1])
123
+ when str = @scan.scan(UNKNOWN_CHAR) then return emit(:UNKNOWN_CHAR, str)
124
+ else
125
+ # This should never happen since `UNKNOWN_CHAR` ensures we make progress
126
+ raise "Unknown string?"
127
+ end
128
+ end
129
+ end
130
+
131
+ attr_reader :token_name, :token_value
132
+
133
+ def emit token_name, token_value
134
+ @token_name = token_name
135
+ @token_value = token_value
136
+ true
137
+ end
138
+
139
+ def next_token
140
+ advance && [@token_name, @token_value]
141
+ end
142
+
143
+ # Replace any escaped unicode or whitespace with the _actual_ characters
144
+ # To avoid allocating more strings, this modifies the string passed into it
145
+ def replace_escaped_characters_in_place(raw_string)
146
+ raw_string.gsub!(ESCAPES, ESCAPES_REPLACE)
147
+ raw_string.gsub!(UTF_8) do |_matched_str|
148
+ codepoint_1 = ($1 || $2).to_i(16)
149
+ codepoint_2 = $3
150
+
151
+ if codepoint_2
152
+ codepoint_2 = codepoint_2.to_i(16)
153
+ if (codepoint_1 >= 0xD800 && codepoint_1 <= 0xDBFF) && # leading surrogate
154
+ (codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF) # trailing surrogate
155
+ # A surrogate pair
156
+ combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000
157
+ [combined].pack('U'.freeze)
158
+ else
159
+ # Two separate code points
160
+ [codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze)
161
+ end
162
+ else
163
+ [codepoint_1].pack('U'.freeze)
164
+ end
165
+ end
166
+ nil
167
+ end
168
+
169
+ ESCAPES = /\\["\\\/bfnrt]/
170
+ ESCAPES_REPLACE = {
171
+ '\\"' => '"',
172
+ "\\\\" => "\\",
173
+ "\\/" => '/',
174
+ "\\b" => "\b",
175
+ "\\f" => "\f",
176
+ "\\n" => "\n",
177
+ "\\r" => "\r",
178
+ "\\t" => "\t",
179
+ }
180
+ UTF_8 = /\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i
181
+ VALID_STRING = /\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
182
+
183
+ def emit_block(value)
184
+ value = trim_whitespace(value)
185
+ emit_string(value)
186
+ end
187
+
188
+ def emit_string(value)
189
+ if !value.valid_encoding? || !value.match?(VALID_STRING)
190
+ emit(:BAD_UNICODE_ESCAPE, value)
191
+ else
192
+ replace_escaped_characters_in_place(value)
193
+
194
+ if !value.valid_encoding?
195
+ emit(:BAD_UNICODE_ESCAPE, value)
196
+ else
197
+ emit(:STRING, value)
198
+ end
199
+ end
200
+ end
201
+
202
+ def trim_whitespace(str)
203
+ # Early return for the most common cases:
204
+ if str == ""
205
+ return "".dup
206
+ elsif !(has_newline = str.include?("\n")) && !(str.start_with?(" "))
207
+ return str
208
+ end
209
+
210
+ lines = has_newline ? str.split("\n") : [str]
211
+ common_indent = nil
212
+
213
+ # find the common whitespace
214
+ lines.each_with_index do |line, idx|
215
+ if idx == 0
216
+ next
217
+ end
218
+ line_length = line.size
219
+ line_indent = if line.match?(/\A [^ ]/)
220
+ 2
221
+ elsif line.match?(/\A [^ ]/)
222
+ 4
223
+ elsif line.match?(/\A[^ ]/)
224
+ 0
225
+ else
226
+ line[/\A */].size
227
+ end
228
+ if line_indent < line_length && (common_indent.nil? || line_indent < common_indent)
229
+ common_indent = line_indent
230
+ end
231
+ end
232
+
233
+ # Remove the common whitespace
234
+ if common_indent && common_indent > 0
235
+ lines.each_with_index do |line, idx|
236
+ if idx == 0
237
+ next
238
+ else
239
+ line.slice!(0, common_indent)
240
+ end
241
+ end
242
+ end
243
+
244
+ # Remove leading & trailing blank lines
245
+ while lines.size > 0 && lines[0].empty?
246
+ lines.shift
247
+ end
248
+ while lines.size > 0 && lines[-1].empty?
249
+ lines.pop
250
+ end
251
+
252
+ # Rebuild the string
253
+ lines.size > 1 ? lines.join("\n") : (lines.first || "".dup)
254
+ end
255
+ end
256
+ end