c_lexer 2.5.1.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,62 @@
1
+ #ifndef LITERAL_H
2
+ #define LITERAL_H
3
+
4
+ #include "literal/type.h"
5
+ typedef struct literal literal;
6
+ #include "lexer.h"
7
+
8
+ struct literal {
9
+ struct lexer_state *lexer;
10
+ VALUE buffer;
11
+ long buffer_s;
12
+ long buffer_e;
13
+
14
+ int nesting;
15
+
16
+ str_type str_type;
17
+ VALUE start_tok;
18
+ int interpolate;
19
+
20
+ VALUE start_delim;
21
+ VALUE end_delim;
22
+ VALUE delimiter;
23
+
24
+ long heredoc_e;
25
+ long str_s;
26
+ long herebody_s;
27
+
28
+ int indent;
29
+ int label_allowed;
30
+ int interp_braces;
31
+ int space_emitted;
32
+ int monolithic;
33
+
34
+ int dedent_body;
35
+ int dedent_level;
36
+ };
37
+
38
+ static void literal_init(literal*, lexer_state*, VALUE, VALUE, long, long, int, int, int);
39
+ static str_type literal_string_to_str_type(VALUE);
40
+ static VALUE literal_str_type_to_string(str_type);
41
+ static void literal_set_start_tok_and_interpolate(literal*, str_type);
42
+ static VALUE literal_get_start_delim(VALUE);
43
+ static VALUE literal_get_end_delim(VALUE);
44
+ static int literal_munge_escape_p(literal*, VALUE);
45
+ static int literal_nest_and_try_closing(literal*, VALUE, long, long, VALUE);
46
+ static void literal_emit_start_tok(literal*);
47
+ static void literal_start_interp_brace(literal*);
48
+ static int literal_end_interp_brace_and_try_closing(literal*);
49
+ static void literal_extend_string(literal*, VALUE, long, long);
50
+ static void literal_flush_string(literal*);
51
+ static void literal_extend_content(literal*);
52
+ static void literal_extend_space(literal*, long, long);
53
+ static int literal_words_p(literal*);
54
+ static int literal_regexp_p(literal*);
55
+ static int literal_heredoc_p(literal*);
56
+ static int literal_squiggly_heredoc_p(literal*);
57
+ static int newline_char_p(VALUE);
58
+ static void literal_infer_indent_level(literal*, VALUE);
59
+ static int next_state_for_literal(literal*);
60
+ static void literal_clear_buffer(literal*);
61
+
62
+ #endif
@@ -0,0 +1,371 @@
1
+ static void literal_init(literal *self, lexer_state *lexer, VALUE str_type,
2
+ VALUE delimiter, long str_s, long heredoc_e, int indent,
3
+ int dedent_body, int label_allowed)
4
+ {
5
+ self->lexer = lexer;
6
+ self->nesting = 1;
7
+ self->str_type = literal_string_to_str_type(str_type);
8
+
9
+ if (self->str_type == INVALID) {
10
+ VALUE hash = rb_hash_new();
11
+ rb_hash_aset(hash, ID2SYM(rb_intern("type")), str_type);
12
+ diagnostic(lexer, severity_error, unexpected_percent_str, hash,
13
+ range(lexer, str_s, str_s + 2), empty_array);
14
+ }
15
+
16
+ literal_set_start_tok_and_interpolate(self, self->str_type);
17
+
18
+ self->str_s = str_s;
19
+
20
+ self->start_delim = literal_get_start_delim(delimiter);
21
+ self->end_delim = literal_get_end_delim(delimiter);
22
+ self->delimiter = delimiter;
23
+
24
+ self->herebody_s = 0;
25
+ self->heredoc_e = heredoc_e;
26
+ self->indent = indent;
27
+ self->label_allowed = label_allowed;
28
+ self->dedent_body = dedent_body;
29
+ self->dedent_level = -1;
30
+ self->interp_braces = 0;
31
+ self->space_emitted = 1;
32
+ self->monolithic = (self->start_tok == tSTRING_BEG &&
33
+ (self->str_type == SINGLE_QUOTE ||
34
+ self->str_type == DOUBLE_QUOTE) &&
35
+ !heredoc_e);
36
+
37
+ literal_clear_buffer(self);
38
+
39
+ if (!self->monolithic) {
40
+ literal_emit_start_tok(self);
41
+ }
42
+ }
43
+
44
+ static str_type literal_string_to_str_type(VALUE str)
45
+ {
46
+ char *p = RSTRING_PTR(str);
47
+ switch (*p) {
48
+ case '%':
49
+ switch (*++p) {
50
+ case 'q': return PERCENT_Q;
51
+ case 'Q': return BIG_PERCENT_Q;
52
+ case '\0': return BARE_PERCENT;
53
+ case 'w': return PERCENT_W;
54
+ case 'W': return BIG_PERCENT_W;
55
+ case 'i': return PERCENT_I;
56
+ case 'I': return BIG_PERCENT_I;
57
+ case 's': return PERCENT_S;
58
+ case 'r': return PERCENT_R;
59
+ case 'x': return PERCENT_X;
60
+ default: return INVALID;
61
+ }
62
+ break;
63
+ case '"': return DOUBLE_QUOTE;
64
+ case '\'': return SINGLE_QUOTE;
65
+ case '/': return SLASH;
66
+ case ':':
67
+ switch (*++p) {
68
+ case '"': return SYM_DOUBLE_QUOT;
69
+ case '\'': return SYM_SINGLE_QUOT;
70
+ default: return INVALID;
71
+ }
72
+ break;
73
+ case '`': return BACKTICK;
74
+ case '<':
75
+ if (*++p != '<')
76
+ return INVALID;
77
+ switch (*++p) {
78
+ case '"': return LSHFT_DOUBLE_QUOT;
79
+ case '\'': return LSHFT_SINGLE_QUOT;
80
+ case '`': return LSHFT_BACKTICK;
81
+ default: return INVALID;
82
+ }
83
+ default: return INVALID;
84
+ }
85
+ }
86
+
87
+ static VALUE literal_str_type_to_string(str_type stype)
88
+ {
89
+ switch (stype) {
90
+ case SINGLE_QUOTE: return rb_str_new2("'");
91
+ case DOUBLE_QUOTE: return rb_str_new2("\"");
92
+ case PERCENT_Q: return rb_str_new2("%q");
93
+ case BIG_PERCENT_Q: return rb_str_new2("%Q");
94
+ case LSHFT_SINGLE_QUOT: return rb_str_new2("<<'");
95
+ case LSHFT_DOUBLE_QUOT: return rb_str_new2("<<\"");
96
+ case BARE_PERCENT: return rb_str_new2("%");
97
+ case PERCENT_W: return rb_str_new2("%w");
98
+ case BIG_PERCENT_W: return rb_str_new2("%W");
99
+ case PERCENT_I: return rb_str_new2("%i");
100
+ case BIG_PERCENT_I: return rb_str_new2("%I");
101
+ case SYM_SINGLE_QUOT: return rb_str_new2(":'");
102
+ case SYM_DOUBLE_QUOT: return rb_str_new2(":\"");
103
+ case PERCENT_S: return rb_str_new2("%s");
104
+ case SLASH: return rb_str_new2("/");
105
+ case PERCENT_R: return rb_str_new2("%r");
106
+ case PERCENT_X: return rb_str_new2("%x");
107
+ case BACKTICK: return rb_str_new2("`");
108
+ case LSHFT_BACKTICK: return rb_str_new2("<<`");
109
+ default: return Qnil;
110
+ }
111
+ }
112
+
113
+ static void literal_set_start_tok_and_interpolate(literal *lit, str_type stype)
114
+ {
115
+ switch(stype) {
116
+ case SINGLE_QUOTE:
117
+ case PERCENT_Q:
118
+ case LSHFT_SINGLE_QUOT:
119
+ lit->start_tok = tSTRING_BEG;
120
+ lit->interpolate = 0;
121
+ break;
122
+ case DOUBLE_QUOTE:
123
+ case BIG_PERCENT_Q:
124
+ case BARE_PERCENT:
125
+ case LSHFT_DOUBLE_QUOT:
126
+ lit->start_tok = tSTRING_BEG;
127
+ lit->interpolate = 1;
128
+ break;
129
+ case PERCENT_W:
130
+ lit->start_tok = tQWORDS_BEG;
131
+ lit->interpolate = 0;
132
+ break;
133
+ case BIG_PERCENT_W:
134
+ lit->start_tok = tWORDS_BEG;
135
+ lit->interpolate = 1;
136
+ break;
137
+ case PERCENT_I:
138
+ lit->start_tok = tQSYMBOLS_BEG;
139
+ lit->interpolate = 0;
140
+ break;
141
+ case BIG_PERCENT_I:
142
+ lit->start_tok = tSYMBOLS_BEG;
143
+ lit->interpolate = 1;
144
+ break;
145
+ case SYM_SINGLE_QUOT:
146
+ case PERCENT_S:
147
+ lit->start_tok = tSYMBEG;
148
+ lit->interpolate = 0;
149
+ break;
150
+ case SYM_DOUBLE_QUOT:
151
+ lit->start_tok = tSYMBEG;
152
+ lit->interpolate = 1;
153
+ break;
154
+ case SLASH:
155
+ case PERCENT_R:
156
+ lit->start_tok = tREGEXP_BEG;
157
+ lit->interpolate = 1;
158
+ break;
159
+ case PERCENT_X:
160
+ case BACKTICK:
161
+ case LSHFT_BACKTICK:
162
+ lit->start_tok = tXSTRING_BEG;
163
+ lit->interpolate = 1;
164
+ break;
165
+ default:
166
+ lit->start_tok = Qnil;
167
+ break;
168
+ }
169
+ }
170
+
171
+ static VALUE literal_get_start_delim(VALUE str)
172
+ {
173
+ VALUE end_delim = literal_get_end_delim(str);
174
+ if (end_delim == str)
175
+ return Qnil;
176
+ else
177
+ return str;
178
+ }
179
+
180
+ static VALUE literal_get_end_delim(VALUE str)
181
+ {
182
+ char *p = RSTRING_PTR(str);
183
+
184
+ switch (*p) {
185
+ case '(': return rb_str_new2(")");
186
+ case '[': return rb_str_new2("]");
187
+ case '{': return rb_str_new2("}");
188
+ case '<': return rb_str_new2(">");
189
+ default: return str;
190
+ }
191
+ }
192
+
193
+ static int literal_words_p(literal *lit)
194
+ {
195
+ return lit->start_tok == tWORDS_BEG || lit->start_tok == tQWORDS_BEG ||
196
+ lit->start_tok == tSYMBOLS_BEG || lit->start_tok == tQSYMBOLS_BEG;
197
+ }
198
+
199
+ static int literal_regexp_p(literal *lit)
200
+ {
201
+ return lit->start_tok == tREGEXP_BEG;
202
+ }
203
+
204
+ static int literal_heredoc_p(literal *lit)
205
+ {
206
+ return lit->heredoc_e ? 1 : 0;
207
+ }
208
+
209
+ static int literal_squiggly_heredoc_p(literal *lit)
210
+ {
211
+ return literal_heredoc_p(lit) && lit->dedent_body;
212
+ }
213
+
214
+ static int literal_backslash_delimited_p(literal *lit)
215
+ {
216
+ return *RSTRING_PTR(lit->end_delim) == '\\';
217
+ }
218
+
219
+ static int literal_munge_escape_p(literal *lit, VALUE character)
220
+ {
221
+ char *p = RSTRING_PTR(character);
222
+
223
+ if (literal_words_p(lit) && (*p == ' ' || *p == '\t' || *p == '\n' ||
224
+ *p == '\r' || *p == '\v' || *p == '\f')) {
225
+ return 1;
226
+ } else if (*p == '\\' ||
227
+ rb_equal(character, lit->start_delim) ||
228
+ rb_equal(character, lit->end_delim)) {
229
+ return 1;
230
+ } else {
231
+ return 0;
232
+ }
233
+ }
234
+
235
+ static int literal_nest_and_try_closing(literal *lit, VALUE delimiter, long ts, long te, VALUE lookahead)
236
+ {
237
+ if (lit->start_delim != Qnil && rb_equal(lit->start_delim, delimiter)) {
238
+ lit->nesting += 1;
239
+ } else if (lit->indent && rb_equal(lit->end_delim, rb_funcall(delimiter, rb_intern("lstrip"), 0))) {
240
+ lit->nesting -= 1;
241
+ } else if (!lit->indent && rb_equal(lit->end_delim, delimiter)) {
242
+ lit->nesting -= 1;
243
+ }
244
+
245
+ if (lit->nesting == 0) {
246
+ if (literal_words_p(lit)) {
247
+ literal_extend_space(lit, ts, ts);
248
+ }
249
+
250
+ int quoted_label = 0;
251
+
252
+ if (lookahead != Qnil) {
253
+ char *p = RSTRING_PTR(lookahead);
254
+ if (p[0] == ':' && p[1] != ':' && lit->label_allowed && lit->start_tok == tSTRING_BEG)
255
+ quoted_label = 1;
256
+ }
257
+
258
+ if (quoted_label) {
259
+ literal_flush_string(lit);
260
+ emit_token(lit->lexer, tLABEL_END, lit->end_delim, ts, te + 1);
261
+ } else if (lit->monolithic) {
262
+ emit_token(lit->lexer, tSTRING, lit->buffer, lit->str_s, te);
263
+ } else {
264
+ if (!lit->heredoc_e) {
265
+ literal_flush_string(lit);
266
+ }
267
+ emit_token(lit->lexer, tSTRING_END, lit->end_delim, ts, te);
268
+ }
269
+ return 1;
270
+ } else {
271
+ return 0;
272
+ }
273
+ }
274
+
275
+ static void literal_infer_indent_level(literal *lit, VALUE line)
276
+ {
277
+ if (!lit->dedent_body)
278
+ return;
279
+
280
+ char *p = RSTRING_PTR(line);
281
+ int indent_level = 0;
282
+
283
+ while (*p) {
284
+ if (*p == ' ') {
285
+ indent_level += 1;
286
+ } else if (*p == '\t') {
287
+ indent_level += (8 - (indent_level % 8));
288
+ } else {
289
+ if (lit->dedent_level == -1 || lit->dedent_level > indent_level)
290
+ lit->dedent_level = indent_level;
291
+ break;
292
+ }
293
+ p++;
294
+ }
295
+ }
296
+
297
+ static void literal_start_interp_brace(literal *lit)
298
+ {
299
+ lit->interp_braces += 1;
300
+ }
301
+
302
+ static int literal_end_interp_brace_and_try_closing(literal *lit)
303
+ {
304
+ return --lit->interp_braces == 0;
305
+ }
306
+
307
+ static void literal_extend_string(literal *lit, VALUE str, long ts, long te)
308
+ {
309
+ if (!lit->buffer_s)
310
+ lit->buffer_s = ts;
311
+
312
+ lit->buffer_e = te;
313
+ rb_str_concat(lit->buffer, str);
314
+ }
315
+
316
+ static void literal_flush_string(literal *lit)
317
+ {
318
+ if (lit->monolithic) {
319
+ literal_emit_start_tok(lit);
320
+ lit->monolithic = 0;
321
+ }
322
+
323
+ if (RSTRING_LEN(lit->buffer) > 0) {
324
+ emit_token(lit->lexer, tSTRING_CONTENT, lit->buffer, lit->buffer_s, lit->buffer_e);
325
+ literal_clear_buffer(lit);
326
+ lit->space_emitted = 0;
327
+ }
328
+ }
329
+
330
+ static void literal_extend_content(literal *lit)
331
+ {
332
+ lit->space_emitted = 0;
333
+ }
334
+
335
+ static void literal_extend_space(literal *lit, long ts, long te)
336
+ {
337
+ literal_flush_string(lit);
338
+
339
+ if (!lit->space_emitted) {
340
+ emit_token(lit->lexer, tSPACE, Qnil, ts, te);
341
+ lit->space_emitted = 1;
342
+ }
343
+ }
344
+
345
+ static void literal_clear_buffer(literal *lit)
346
+ {
347
+ lit->buffer = rb_str_new2("");
348
+ force_encoding(lit->buffer, lit->lexer->encoding);
349
+
350
+ lit->buffer_s = 0;
351
+ lit->buffer_e = 0;
352
+ }
353
+
354
+ static void literal_emit_start_tok(literal *lit)
355
+ {
356
+ VALUE str_type = literal_str_type_to_string(lit->str_type);
357
+
358
+ if (*RSTRING_PTR(str_type) == '%')
359
+ rb_str_concat(str_type, lit->delimiter);
360
+
361
+ long str_e = lit->heredoc_e;
362
+ if (str_e == 0)
363
+ str_e = lit->str_s + NUM2INT(rb_str_length(str_type));
364
+
365
+ emit_token(lit->lexer, lit->start_tok, str_type, lit->str_s, str_e);
366
+ }
367
+
368
+ inline int newline_char_p(VALUE str)
369
+ {
370
+ return RTEST(rb_str_equal(str, rb_str_new2("\n")));
371
+ }
@@ -0,0 +1,28 @@
1
+ typedef enum {
2
+ SINGLE_QUOTE, /* ' */
3
+ DOUBLE_QUOTE, /* " */
4
+ PERCENT_Q, /* %q */
5
+ BIG_PERCENT_Q, /* %Q */
6
+ LSHFT_SINGLE_QUOT, /* <<' */
7
+ LSHFT_DOUBLE_QUOT, /* <<" */
8
+ BARE_PERCENT, /* % */
9
+
10
+ PERCENT_W, /* %w */
11
+ BIG_PERCENT_W, /* %W */
12
+
13
+ PERCENT_I, /* %i */
14
+ BIG_PERCENT_I, /* %I */
15
+
16
+ SYM_SINGLE_QUOT, /* :' */
17
+ SYM_DOUBLE_QUOT, /* :" */
18
+ PERCENT_S, /* %s */
19
+
20
+ SLASH, /* / for regexp */
21
+ PERCENT_R, /* %r */
22
+
23
+ PERCENT_X, /* %x for xstr */
24
+ BACKTICK, /* ` */
25
+ LSHFT_BACKTICK, /* <<` */
26
+
27
+ INVALID
28
+ } str_type;