graphql-c_parser 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ #ifndef Graphql_lexer_h
2
+ #define Graphql_lexer_h
3
+ #include <ruby.h>
4
+ VALUE tokenize(VALUE query_rbstr);
5
+ void setup_static_token_variables();
6
+ #endif
@@ -0,0 +1,403 @@
1
+ %%{
2
+ machine graphql_c_lexer;
3
+
4
+ IDENTIFIER = [_A-Za-z][_0-9A-Za-z]*;
5
+ NEWLINE = [\c\r\n];
6
+ BLANK = [, \t]+;
7
+ COMMENT = '#' [^\n\r]*;
8
+ INT = '-'? ('0'|[1-9][0-9]*);
9
+ FLOAT = INT ('.'[0-9]+)? (('e' | 'E')?('+' | '-')?[0-9]+)?;
10
+ ON = 'on';
11
+ FRAGMENT = 'fragment';
12
+ TRUE_LITERAL = 'true';
13
+ FALSE_LITERAL = 'false';
14
+ NULL_LITERAL = 'null';
15
+ QUERY = 'query';
16
+ MUTATION = 'mutation';
17
+ SUBSCRIPTION = 'subscription';
18
+ SCHEMA = 'schema';
19
+ SCALAR = 'scalar';
20
+ TYPE_LITERAL = 'type';
21
+ EXTEND = 'extend';
22
+ IMPLEMENTS = 'implements';
23
+ INTERFACE = 'interface';
24
+ UNION = 'union';
25
+ ENUM = 'enum';
26
+ INPUT = 'input';
27
+ DIRECTIVE = 'directive';
28
+ REPEATABLE = 'repeatable';
29
+ LCURLY = '{';
30
+ RCURLY = '}';
31
+ LPAREN = '(';
32
+ RPAREN = ')';
33
+ LBRACKET = '[';
34
+ RBRACKET = ']';
35
+ COLON = ':';
36
+ # Could limit to hex here, but “bad unicode escape” on 0XXF is probably a
37
+ # more helpful error than “unknown char”
38
+ UNICODE_ESCAPE = "\\u" ([0-9A-Za-z]{4} | LCURLY [0-9A-Za-z]{4,} RCURLY);
39
+ VAR_SIGN = '$';
40
+ DIR_SIGN = '@';
41
+ ELLIPSIS = '...';
42
+ EQUALS = '=';
43
+ BANG = '!';
44
+ PIPE = '|';
45
+ AMP = '&';
46
+
47
+ QUOTED_STRING = ('"' ((('\\"' | ^'"') - "\\") | UNICODE_ESCAPE | '\\' [\\/bfnrt])* '"');
48
+ # catch-all for anything else. must be at the bottom for precedence.
49
+ UNKNOWN_CHAR = /./;
50
+
51
+ BLOCK_STRING = ('"""' ('\\"""' | ^'"' | '"'{1,2} ^'"')* '"'{0,2} '"""');
52
+
53
+ main := |*
54
+ INT => { emit(INT, ts, te, meta); };
55
+ FLOAT => { emit(FLOAT, ts, te, meta); };
56
+ ON => { emit(ON, ts, te, meta); };
57
+ FRAGMENT => { emit(FRAGMENT, ts, te, meta); };
58
+ TRUE_LITERAL => { emit(TRUE_LITERAL, ts, te, meta); };
59
+ FALSE_LITERAL => { emit(FALSE_LITERAL, ts, te, meta); };
60
+ NULL_LITERAL => { emit(NULL_LITERAL, ts, te, meta); };
61
+ QUERY => { emit(QUERY, ts, te, meta); };
62
+ MUTATION => { emit(MUTATION, ts, te, meta); };
63
+ SUBSCRIPTION => { emit(SUBSCRIPTION, ts, te, meta); };
64
+ SCHEMA => { emit(SCHEMA, ts, te, meta); };
65
+ SCALAR => { emit(SCALAR, ts, te, meta); };
66
+ TYPE_LITERAL => { emit(TYPE_LITERAL, ts, te, meta); };
67
+ EXTEND => { emit(EXTEND, ts, te, meta); };
68
+ IMPLEMENTS => { emit(IMPLEMENTS, ts, te, meta); };
69
+ INTERFACE => { emit(INTERFACE, ts, te, meta); };
70
+ UNION => { emit(UNION, ts, te, meta); };
71
+ ENUM => { emit(ENUM, ts, te, meta); };
72
+ INPUT => { emit(INPUT, ts, te, meta); };
73
+ DIRECTIVE => { emit(DIRECTIVE, ts, te, meta); };
74
+ REPEATABLE => { emit(REPEATABLE, ts, te, meta); };
75
+ RCURLY => { emit(RCURLY, ts, te, meta); };
76
+ LCURLY => { emit(LCURLY, ts, te, meta); };
77
+ RPAREN => { emit(RPAREN, ts, te, meta); };
78
+ LPAREN => { emit(LPAREN, ts, te, meta); };
79
+ RBRACKET => { emit(RBRACKET, ts, te, meta); };
80
+ LBRACKET => { emit(LBRACKET, ts, te, meta); };
81
+ COLON => { emit(COLON, ts, te, meta); };
82
+ BLOCK_STRING => { emit(BLOCK_STRING, ts, te, meta); };
83
+ QUOTED_STRING => { emit(QUOTED_STRING, ts, te, meta); };
84
+ VAR_SIGN => { emit(VAR_SIGN, ts, te, meta); };
85
+ DIR_SIGN => { emit(DIR_SIGN, ts, te, meta); };
86
+ ELLIPSIS => { emit(ELLIPSIS, ts, te, meta); };
87
+ EQUALS => { emit(EQUALS, ts, te, meta); };
88
+ BANG => { emit(BANG, ts, te, meta); };
89
+ PIPE => { emit(PIPE, ts, te, meta); };
90
+ AMP => { emit(AMP, ts, te, meta); };
91
+ IDENTIFIER => { emit(IDENTIFIER, ts, te, meta); };
92
+ COMMENT => { emit(COMMENT, ts, te, meta); };
93
+ NEWLINE => {
94
+ meta->line += 1;
95
+ meta->col = 1;
96
+ };
97
+
98
+ BLANK => { meta->col += te - ts; };
99
+
100
+ UNKNOWN_CHAR => { emit(UNKNOWN_CHAR, ts, te, meta); };
101
+ *|;
102
+ }%%
103
+
104
+ %% write data;
105
+
106
+ #include <ruby.h>
107
+
108
+ #define INIT_STATIC_TOKEN_VARIABLE(token_name) \
109
+ static VALUE GraphQLTokenString##token_name;
110
+
111
+ INIT_STATIC_TOKEN_VARIABLE(ON)
112
+ INIT_STATIC_TOKEN_VARIABLE(FRAGMENT)
113
+ INIT_STATIC_TOKEN_VARIABLE(QUERY)
114
+ INIT_STATIC_TOKEN_VARIABLE(MUTATION)
115
+ INIT_STATIC_TOKEN_VARIABLE(SUBSCRIPTION)
116
+ INIT_STATIC_TOKEN_VARIABLE(REPEATABLE)
117
+ INIT_STATIC_TOKEN_VARIABLE(RCURLY)
118
+ INIT_STATIC_TOKEN_VARIABLE(LCURLY)
119
+ INIT_STATIC_TOKEN_VARIABLE(RBRACKET)
120
+ INIT_STATIC_TOKEN_VARIABLE(LBRACKET)
121
+ INIT_STATIC_TOKEN_VARIABLE(RPAREN)
122
+ INIT_STATIC_TOKEN_VARIABLE(LPAREN)
123
+ INIT_STATIC_TOKEN_VARIABLE(COLON)
124
+ INIT_STATIC_TOKEN_VARIABLE(VAR_SIGN)
125
+ INIT_STATIC_TOKEN_VARIABLE(DIR_SIGN)
126
+ INIT_STATIC_TOKEN_VARIABLE(ELLIPSIS)
127
+ INIT_STATIC_TOKEN_VARIABLE(EQUALS)
128
+ INIT_STATIC_TOKEN_VARIABLE(BANG)
129
+ INIT_STATIC_TOKEN_VARIABLE(PIPE)
130
+ INIT_STATIC_TOKEN_VARIABLE(AMP)
131
+ INIT_STATIC_TOKEN_VARIABLE(SCHEMA)
132
+ INIT_STATIC_TOKEN_VARIABLE(SCALAR)
133
+ INIT_STATIC_TOKEN_VARIABLE(EXTEND)
134
+ INIT_STATIC_TOKEN_VARIABLE(IMPLEMENTS)
135
+ INIT_STATIC_TOKEN_VARIABLE(INTERFACE)
136
+ INIT_STATIC_TOKEN_VARIABLE(UNION)
137
+ INIT_STATIC_TOKEN_VARIABLE(ENUM)
138
+ INIT_STATIC_TOKEN_VARIABLE(DIRECTIVE)
139
+ INIT_STATIC_TOKEN_VARIABLE(INPUT)
140
+
141
+ static VALUE GraphQL_type_str;
142
+ static VALUE GraphQL_true_str;
143
+ static VALUE GraphQL_false_str;
144
+ static VALUE GraphQL_null_str;
145
+ typedef enum TokenType {
146
+ AMP,
147
+ BANG,
148
+ COLON,
149
+ DIRECTIVE,
150
+ DIR_SIGN,
151
+ ENUM,
152
+ ELLIPSIS,
153
+ EQUALS,
154
+ EXTEND,
155
+ FALSE_LITERAL,
156
+ FLOAT,
157
+ FRAGMENT,
158
+ IDENTIFIER,
159
+ INPUT,
160
+ IMPLEMENTS,
161
+ INT,
162
+ INTERFACE,
163
+ LBRACKET,
164
+ LCURLY,
165
+ LPAREN,
166
+ MUTATION,
167
+ NULL_LITERAL,
168
+ ON,
169
+ PIPE,
170
+ QUERY,
171
+ RBRACKET,
172
+ RCURLY,
173
+ REPEATABLE,
174
+ RPAREN,
175
+ SCALAR,
176
+ SCHEMA,
177
+ STRING,
178
+ SUBSCRIPTION,
179
+ TRUE_LITERAL,
180
+ TYPE_LITERAL,
181
+ UNION,
182
+ VAR_SIGN,
183
+ BLOCK_STRING,
184
+ QUOTED_STRING,
185
+ UNKNOWN_CHAR,
186
+ COMMENT
187
+ } TokenType;
188
+
189
+ typedef struct Meta {
190
+ int line;
191
+ int col;
192
+ char *query_cstr;
193
+ char *pe;
194
+ VALUE tokens;
195
+ VALUE previous_token;
196
+ } Meta;
197
+
198
+ #define STATIC_VALUE_TOKEN(token_type, content_str) \
199
+ case token_type: \
200
+ token_sym = ID2SYM(rb_intern(#token_type)); \
201
+ token_content = GraphQLTokenString##token_type; \
202
+ break;
203
+
204
+ #define DYNAMIC_VALUE_TOKEN(token_type) \
205
+ case token_type: \
206
+ token_sym = ID2SYM(rb_intern(#token_type)); \
207
+ token_content = rb_utf8_str_new(ts, te - ts); \
208
+ break;
209
+
210
+ void emit(TokenType tt, char *ts, char *te, Meta *meta) {
211
+ int quotes_length = 0; // set by string tokens below
212
+ int line_incr = 0;
213
+ VALUE token_sym = Qnil;
214
+ VALUE token_content = Qnil;
215
+
216
+ switch(tt) {
217
+ STATIC_VALUE_TOKEN(ON, "on")
218
+ STATIC_VALUE_TOKEN(FRAGMENT, "fragment")
219
+ STATIC_VALUE_TOKEN(QUERY, "query")
220
+ STATIC_VALUE_TOKEN(MUTATION, "mutation")
221
+ STATIC_VALUE_TOKEN(SUBSCRIPTION, "subscription")
222
+ STATIC_VALUE_TOKEN(REPEATABLE, "repeatable")
223
+ STATIC_VALUE_TOKEN(RCURLY, "}")
224
+ STATIC_VALUE_TOKEN(LCURLY, "{")
225
+ STATIC_VALUE_TOKEN(RBRACKET, "]")
226
+ STATIC_VALUE_TOKEN(LBRACKET, "[")
227
+ STATIC_VALUE_TOKEN(RPAREN, ")")
228
+ STATIC_VALUE_TOKEN(LPAREN, "(")
229
+ STATIC_VALUE_TOKEN(COLON, ":")
230
+ STATIC_VALUE_TOKEN(VAR_SIGN, "$")
231
+ STATIC_VALUE_TOKEN(DIR_SIGN, "@")
232
+ STATIC_VALUE_TOKEN(ELLIPSIS, "...")
233
+ STATIC_VALUE_TOKEN(EQUALS, "=")
234
+ STATIC_VALUE_TOKEN(BANG, "!")
235
+ STATIC_VALUE_TOKEN(PIPE, "|")
236
+ STATIC_VALUE_TOKEN(AMP, "&")
237
+ STATIC_VALUE_TOKEN(SCHEMA, "schema")
238
+ STATIC_VALUE_TOKEN(SCALAR, "scalar")
239
+ STATIC_VALUE_TOKEN(EXTEND, "extend")
240
+ STATIC_VALUE_TOKEN(IMPLEMENTS, "implements")
241
+ STATIC_VALUE_TOKEN(INTERFACE, "interface")
242
+ STATIC_VALUE_TOKEN(UNION, "union")
243
+ STATIC_VALUE_TOKEN(ENUM, "enum")
244
+ STATIC_VALUE_TOKEN(DIRECTIVE, "directive")
245
+ STATIC_VALUE_TOKEN(INPUT, "input")
246
+ // For these, the enum name doesn't match the symbol name:
247
+ case TYPE_LITERAL:
248
+ token_sym = ID2SYM(rb_intern("TYPE"));
249
+ token_content = GraphQL_type_str;
250
+ break;
251
+ case TRUE_LITERAL:
252
+ token_sym = ID2SYM(rb_intern("TRUE"));
253
+ token_content = GraphQL_true_str;
254
+ break;
255
+ case FALSE_LITERAL:
256
+ token_sym = ID2SYM(rb_intern("FALSE"));
257
+ token_content = GraphQL_false_str;
258
+ break;
259
+ case NULL_LITERAL:
260
+ token_sym = ID2SYM(rb_intern("NULL"));
261
+ token_content = GraphQL_null_str;
262
+ break;
263
+ DYNAMIC_VALUE_TOKEN(IDENTIFIER)
264
+ DYNAMIC_VALUE_TOKEN(INT)
265
+ DYNAMIC_VALUE_TOKEN(FLOAT)
266
+ DYNAMIC_VALUE_TOKEN(COMMENT)
267
+ case UNKNOWN_CHAR:
268
+ if (ts[0] == '\0') {
269
+ return;
270
+ } else {
271
+ token_content = rb_utf8_str_new(ts, te - ts);
272
+ token_sym = ID2SYM(rb_intern("UNKNOWN_CHAR"));
273
+ break;
274
+ }
275
+ case QUOTED_STRING:
276
+ quotes_length = 1;
277
+ token_content = rb_utf8_str_new(ts + quotes_length, (te - ts - (2 * quotes_length)));
278
+ token_sym = ID2SYM(rb_intern("STRING"));
279
+ break;
280
+ case BLOCK_STRING:
281
+ token_sym = ID2SYM(rb_intern("STRING"));
282
+ quotes_length = 3;
283
+ token_content = rb_utf8_str_new(ts + quotes_length, (te - ts - (2 * quotes_length)));
284
+ line_incr = FIX2INT(rb_funcall(token_content, rb_intern("count"), 1, rb_str_new_cstr("\n")));
285
+ break;
286
+ case STRING:
287
+ // This is used only by the parser, this is never reached
288
+ break;
289
+ }
290
+
291
+ if (token_sym != Qnil) {
292
+ if (tt == BLOCK_STRING || tt == QUOTED_STRING) {
293
+ VALUE mGraphQL = rb_const_get_at(rb_cObject, rb_intern("GraphQL"));
294
+ VALUE mGraphQLLanguage = rb_const_get_at(mGraphQL, rb_intern("Language"));
295
+ VALUE mGraphQLLanguageLexer = rb_const_get_at(mGraphQLLanguage, rb_intern("Lexer"));
296
+ VALUE valid_string_pattern = rb_const_get_at(mGraphQLLanguageLexer, rb_intern("VALID_STRING"));
297
+ if (tt == BLOCK_STRING) {
298
+ VALUE mGraphQLLanguageBlockString = rb_const_get_at(mGraphQLLanguage, rb_intern("BlockString"));
299
+ token_content = rb_funcall(mGraphQLLanguageBlockString, rb_intern("trim_whitespace"), 1, token_content);
300
+ }
301
+
302
+ if (
303
+ RB_TEST(rb_funcall(token_content, rb_intern("valid_encoding?"), 0)) &&
304
+ RB_TEST(rb_funcall(token_content, rb_intern("match?"), 1, valid_string_pattern))
305
+ ) {
306
+ rb_funcall(mGraphQLLanguageLexer, rb_intern("replace_escaped_characters_in_place"), 1, token_content);
307
+ if (!RB_TEST(rb_funcall(token_content, rb_intern("valid_encoding?"), 0))) {
308
+ token_sym = ID2SYM(rb_intern("BAD_UNICODE_ESCAPE"));
309
+ }
310
+
311
+
312
+ } else {
313
+ token_sym = ID2SYM(rb_intern("BAD_UNICODE_ESCAPE"));
314
+ }
315
+ // The parser doesn't distinguish between these:
316
+ tt = STRING;
317
+ }
318
+
319
+ VALUE token = rb_ary_new_from_args(6,
320
+ token_sym,
321
+ rb_int2inum(meta->line),
322
+ rb_int2inum(meta->col),
323
+ token_content,
324
+ meta->previous_token,
325
+ INT2FIX(200 + (int)tt)
326
+ );
327
+
328
+ // COMMENTs are retained as `previous_token` but aren't pushed to the normal token list
329
+ if (tt != COMMENT) {
330
+ rb_ary_push(meta->tokens, token);
331
+ }
332
+ meta->previous_token = token;
333
+ }
334
+ // Bump the column counter for the next token
335
+ meta->col += te - ts;
336
+ meta->line += line_incr;
337
+ }
338
+
339
+ VALUE tokenize(VALUE query_rbstr) {
340
+ int cs = 0;
341
+ int act = 0;
342
+ char *p = StringValueCStr(query_rbstr);
343
+ char *pe = p + strlen(p);
344
+ char *eof = pe;
345
+ char *ts = 0;
346
+ char *te = 0;
347
+ VALUE tokens = rb_ary_new();
348
+ struct Meta meta_s = {1, 1, p, pe, tokens, Qnil};
349
+ Meta *meta = &meta_s;
350
+
351
+ %% write init;
352
+ %% write exec;
353
+
354
+ return tokens;
355
+ }
356
+
357
+
358
+ #define SETUP_STATIC_TOKEN_VARIABLE(token_name, token_content) \
359
+ GraphQLTokenString##token_name = rb_str_new_cstr(token_content); \
360
+ rb_funcall(GraphQLTokenString##token_name, rb_intern("-@"), 0); \
361
+ rb_global_variable(&GraphQLTokenString##token_name); \
362
+
363
+ #define SETUP_STATIC_STRING(var_name, str_content) \
364
+ var_name = rb_str_new_cstr(str_content); \
365
+ rb_global_variable(&var_name); \
366
+ rb_str_freeze(var_name); \
367
+
368
+ void setup_static_token_variables() {
369
+ SETUP_STATIC_TOKEN_VARIABLE(ON, "on")
370
+ SETUP_STATIC_TOKEN_VARIABLE(FRAGMENT, "fragment")
371
+ SETUP_STATIC_TOKEN_VARIABLE(QUERY, "query")
372
+ SETUP_STATIC_TOKEN_VARIABLE(MUTATION, "mutation")
373
+ SETUP_STATIC_TOKEN_VARIABLE(SUBSCRIPTION, "subscription")
374
+ SETUP_STATIC_TOKEN_VARIABLE(REPEATABLE, "repeatable")
375
+ SETUP_STATIC_TOKEN_VARIABLE(RCURLY, "}")
376
+ SETUP_STATIC_TOKEN_VARIABLE(LCURLY, "{")
377
+ SETUP_STATIC_TOKEN_VARIABLE(RBRACKET, "]")
378
+ SETUP_STATIC_TOKEN_VARIABLE(LBRACKET, "[")
379
+ SETUP_STATIC_TOKEN_VARIABLE(RPAREN, ")")
380
+ SETUP_STATIC_TOKEN_VARIABLE(LPAREN, "(")
381
+ SETUP_STATIC_TOKEN_VARIABLE(COLON, ":")
382
+ SETUP_STATIC_TOKEN_VARIABLE(VAR_SIGN, "$")
383
+ SETUP_STATIC_TOKEN_VARIABLE(DIR_SIGN, "@")
384
+ SETUP_STATIC_TOKEN_VARIABLE(ELLIPSIS, "...")
385
+ SETUP_STATIC_TOKEN_VARIABLE(EQUALS, "=")
386
+ SETUP_STATIC_TOKEN_VARIABLE(BANG, "!")
387
+ SETUP_STATIC_TOKEN_VARIABLE(PIPE, "|")
388
+ SETUP_STATIC_TOKEN_VARIABLE(AMP, "&")
389
+ SETUP_STATIC_TOKEN_VARIABLE(SCHEMA, "schema")
390
+ SETUP_STATIC_TOKEN_VARIABLE(SCALAR, "scalar")
391
+ SETUP_STATIC_TOKEN_VARIABLE(EXTEND, "extend")
392
+ SETUP_STATIC_TOKEN_VARIABLE(IMPLEMENTS, "implements")
393
+ SETUP_STATIC_TOKEN_VARIABLE(INTERFACE, "interface")
394
+ SETUP_STATIC_TOKEN_VARIABLE(UNION, "union")
395
+ SETUP_STATIC_TOKEN_VARIABLE(ENUM, "enum")
396
+ SETUP_STATIC_TOKEN_VARIABLE(DIRECTIVE, "directive")
397
+ SETUP_STATIC_TOKEN_VARIABLE(INPUT, "input")
398
+
399
+ SETUP_STATIC_STRING(GraphQL_type_str, "type")
400
+ SETUP_STATIC_STRING(GraphQL_true_str, "true")
401
+ SETUP_STATIC_STRING(GraphQL_false_str, "false")
402
+ SETUP_STATIC_STRING(GraphQL_null_str, "null")
403
+ }