tinygql 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,256 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "strscan"
4
+
5
+ module TinyGQL
6
+ class Lexer
7
+ IDENTIFIER = /[_A-Za-z][_0-9A-Za-z]*/
8
+ IGNORE = %r{
9
+ (?:
10
+ [, \c\r\n\t]+ |
11
+ \#.*$
12
+ )
13
+ }x
14
+ INT = /[-]?(?:[0]|[1-9][0-9]*)/
15
+ FLOAT_DECIMAL = /[.][0-9]+/
16
+ FLOAT_EXP = /[eE][+-]?[0-9]+/
17
+ FLOAT = /#{INT}(#{FLOAT_DECIMAL}#{FLOAT_EXP}|#{FLOAT_DECIMAL}|#{FLOAT_EXP})/
18
+
19
+ module Literals
20
+ ON = /on\b/
21
+ FRAGMENT = /fragment\b/
22
+ TRUE = /true\b/
23
+ FALSE = /false\b/
24
+ NULL = /null\b/
25
+ QUERY = /query\b/
26
+ MUTATION = /mutation\b/
27
+ SUBSCRIPTION = /subscription\b/
28
+ SCHEMA = /schema\b/
29
+ SCALAR = /scalar\b/
30
+ TYPE = /type\b/
31
+ EXTEND = /extend\b/
32
+ IMPLEMENTS = /implements\b/
33
+ INTERFACE = /interface\b/
34
+ UNION = /union\b/
35
+ ENUM = /enum\b/
36
+ INPUT = /input\b/
37
+ DIRECTIVE = /directive\b/
38
+ REPEATABLE = /repeatable\b/
39
+ LCURLY = '{'
40
+ RCURLY = '}'
41
+ LPAREN = '('
42
+ RPAREN = ')'
43
+ LBRACKET = '['
44
+ RBRACKET = ']'
45
+ COLON = ':'
46
+ VAR_SIGN = '$'
47
+ DIR_SIGN = '@'
48
+ ELLIPSIS = '...'
49
+ EQUALS = '='
50
+ BANG = '!'
51
+ PIPE = '|'
52
+ AMP = '&'
53
+ end
54
+
55
+ include Literals
56
+
57
+ QUOTE = '"'
58
+ UNICODE_DIGIT = /[0-9A-Za-z]/
59
+ FOUR_DIGIT_UNICODE = /#{UNICODE_DIGIT}{4}/
60
+ N_DIGIT_UNICODE = %r{#{LCURLY}#{UNICODE_DIGIT}{4,}#{RCURLY}}x
61
+ UNICODE_ESCAPE = %r{\\u(?:#{FOUR_DIGIT_UNICODE}|#{N_DIGIT_UNICODE})}
62
+ # # https://graphql.github.io/graphql-spec/June2018/#sec-String-Value
63
+ STRING_ESCAPE = %r{[\\][\\/bfnrt]}
64
+ BLOCK_QUOTE = '"""'
65
+ ESCAPED_QUOTE = /\\"/;
66
+ STRING_CHAR = /#{ESCAPED_QUOTE}|[^"\\]|#{UNICODE_ESCAPE}|#{STRING_ESCAPE}/
67
+
68
+ LIT_NAME_LUT = Literals.constants.each_with_object({}) { |n, o|
69
+ key = Literals.const_get(n)
70
+ key = key.is_a?(Regexp) ? key.source.gsub(/(\\b|\\)/, '') : key
71
+ o[key] = n
72
+ }
73
+
74
+ LIT = Regexp.union(Literals.constants.map { |n| Literals.const_get(n) })
75
+
76
+ QUOTED_STRING = %r{#{QUOTE} ((?:#{STRING_CHAR})*) #{QUOTE}}x
77
+ BLOCK_STRING = %r{
78
+ #{BLOCK_QUOTE}
79
+ ((?: [^"\\] | # Any characters that aren't a quote or slash
80
+ (?<!") ["]{1,2} (?!") | # Any quotes that don't have quotes next to them
81
+ \\"{0,3}(?!") | # A slash followed by <= 3 quotes that aren't followed by a quote
82
+ \\ | # A slash
83
+ "{1,2}(?!") # 1 or 2 " followed by something that isn't a quote
84
+ )*
85
+ (?:"")?)
86
+ #{BLOCK_QUOTE}
87
+ }xm
88
+
89
+ # # catch-all for anything else. must be at the bottom for precedence.
90
+ UNKNOWN_CHAR = /./
91
+
92
+ def initialize string
93
+ raise unless string.valid_encoding?
94
+
95
+ @scan = StringScanner.new string
96
+ @token_name = nil
97
+ @token_value = nil
98
+ end
99
+
100
+ def line
101
+ @scan.string[0, @scan.pos].count("\n")
102
+ end
103
+
104
+ def done?
105
+ @scan.eos?
106
+ end
107
+
108
+ def advance
109
+ while true
110
+ if @scan.eos?
111
+ emit nil, nil
112
+ return false
113
+ end
114
+
115
+ case
116
+ when @scan.skip(IGNORE) then redo
117
+ when str = @scan.scan(FLOAT) then return emit(:FLOAT, str)
118
+ when str = @scan.scan(INT) then return emit(:INT, str)
119
+ when str = @scan.scan(LIT) then return emit(LIT_NAME_LUT[str], str)
120
+ when str = @scan.scan(IDENTIFIER) then return emit(:IDENTIFIER, str)
121
+ when @scan.skip(BLOCK_STRING) then return emit_block(@scan[1])
122
+ when @scan.skip(QUOTED_STRING) then return emit_string(@scan[1])
123
+ when str = @scan.scan(UNKNOWN_CHAR) then return emit(:UNKNOWN_CHAR, str)
124
+ else
125
+ # This should never happen since `UNKNOWN_CHAR` ensures we make progress
126
+ raise "Unknown string?"
127
+ end
128
+ end
129
+ end
130
+
131
+ attr_reader :token_name, :token_value
132
+
133
+ def emit token_name, token_value
134
+ @token_name = token_name
135
+ @token_value = token_value
136
+ true
137
+ end
138
+
139
+ def next_token
140
+ advance && [@token_name, @token_value]
141
+ end
142
+
143
+ # Replace any escaped unicode or whitespace with the _actual_ characters
144
+ # To avoid allocating more strings, this modifies the string passed into it
145
+ def replace_escaped_characters_in_place(raw_string)
146
+ raw_string.gsub!(ESCAPES, ESCAPES_REPLACE)
147
+ raw_string.gsub!(UTF_8) do |_matched_str|
148
+ codepoint_1 = ($1 || $2).to_i(16)
149
+ codepoint_2 = $3
150
+
151
+ if codepoint_2
152
+ codepoint_2 = codepoint_2.to_i(16)
153
+ if (codepoint_1 >= 0xD800 && codepoint_1 <= 0xDBFF) && # leading surrogate
154
+ (codepoint_2 >= 0xDC00 && codepoint_2 <= 0xDFFF) # trailing surrogate
155
+ # A surrogate pair
156
+ combined = ((codepoint_1 - 0xD800) * 0x400) + (codepoint_2 - 0xDC00) + 0x10000
157
+ [combined].pack('U'.freeze)
158
+ else
159
+ # Two separate code points
160
+ [codepoint_1].pack('U'.freeze) + [codepoint_2].pack('U'.freeze)
161
+ end
162
+ else
163
+ [codepoint_1].pack('U'.freeze)
164
+ end
165
+ end
166
+ nil
167
+ end
168
+
169
+ ESCAPES = /\\["\\\/bfnrt]/
170
+ ESCAPES_REPLACE = {
171
+ '\\"' => '"',
172
+ "\\\\" => "\\",
173
+ "\\/" => '/',
174
+ "\\b" => "\b",
175
+ "\\f" => "\f",
176
+ "\\n" => "\n",
177
+ "\\r" => "\r",
178
+ "\\t" => "\t",
179
+ }
180
+ UTF_8 = /\\u(?:([\dAa-f]{4})|\{([\da-f]{4,})\})(?:\\u([\dAa-f]{4}))?/i
181
+ VALID_STRING = /\A(?:[^\\]|#{ESCAPES}|#{UTF_8})*\z/o
182
+
183
+ def emit_block(value)
184
+ value = trim_whitespace(value)
185
+ emit_string(value)
186
+ end
187
+
188
+ def emit_string(value)
189
+ if !value.valid_encoding? || !value.match?(VALID_STRING)
190
+ emit(:BAD_UNICODE_ESCAPE, value)
191
+ else
192
+ replace_escaped_characters_in_place(value)
193
+
194
+ if !value.valid_encoding?
195
+ emit(:BAD_UNICODE_ESCAPE, value)
196
+ else
197
+ emit(:STRING, value)
198
+ end
199
+ end
200
+ end
201
+
202
+ def trim_whitespace(str)
203
+ # Early return for the most common cases:
204
+ if str == ""
205
+ return "".dup
206
+ elsif !(has_newline = str.include?("\n")) && !(str.start_with?(" "))
207
+ return str
208
+ end
209
+
210
+ lines = has_newline ? str.split("\n") : [str]
211
+ common_indent = nil
212
+
213
+ # find the common whitespace
214
+ lines.each_with_index do |line, idx|
215
+ if idx == 0
216
+ next
217
+ end
218
+ line_length = line.size
219
+ line_indent = if line.match?(/\A [^ ]/)
220
+ 2
221
+ elsif line.match?(/\A [^ ]/)
222
+ 4
223
+ elsif line.match?(/\A[^ ]/)
224
+ 0
225
+ else
226
+ line[/\A */].size
227
+ end
228
+ if line_indent < line_length && (common_indent.nil? || line_indent < common_indent)
229
+ common_indent = line_indent
230
+ end
231
+ end
232
+
233
+ # Remove the common whitespace
234
+ if common_indent && common_indent > 0
235
+ lines.each_with_index do |line, idx|
236
+ if idx == 0
237
+ next
238
+ else
239
+ line.slice!(0, common_indent)
240
+ end
241
+ end
242
+ end
243
+
244
+ # Remove leading & trailing blank lines
245
+ while lines.size > 0 && lines[0].empty?
246
+ lines.shift
247
+ end
248
+ while lines.size > 0 && lines[-1].empty?
249
+ lines.pop
250
+ end
251
+
252
+ # Rebuild the string
253
+ lines.size > 1 ? lines.join("\n") : (lines.first || "".dup)
254
+ end
255
+ end
256
+ end