clj 0.0.6 → 0.0.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -19,7 +19,6 @@ static VALUE cParser;
19
19
  static VALUE UNICODE_REGEX;
20
20
  static VALUE OCTAL_REGEX;
21
21
 
22
- #define _INSIDE_PARSER
23
22
  typedef enum {
24
23
  NODE_METADATA,
25
24
  NODE_NUMBER,
@@ -36,9 +35,430 @@ typedef enum {
36
35
  NODE_REGEXP
37
36
  } NodeType;
38
37
 
39
- #include "string_parser.c"
40
- #include "io_parser.c"
41
- #undef _INSIDE_PARSER
38
+ #define CALL(what) (what(self, string, position))
39
+ #define STATE VALUE self, char* string, size_t* position
40
+ #define IS_EOF (string[*position] == '\0')
41
+ #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
42
+ #define CURRENT (string[*position])
43
+ #define CURRENT_PTR (&string[*position])
44
+ #define AFTER(n) (string[*position + (n)])
45
+ #define AFTER_PTR(n) (&string[*position + (n)])
46
+ #define BEFORE(n) (string[*position - (n)])
47
+ #define BEFORE_PTR(n) (&string[*position - (n)])
48
+ #define SEEK(n) (*position += (n))
49
+ #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
50
+ #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
51
+ #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
52
+ #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
53
+ #define IS_BOTH(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
54
+ #define IS_KEYWORD(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
55
+
56
+ static VALUE read_next (VALUE self, char* string, size_t* position);
57
+
58
+ static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
59
+ {
60
+ for (size_t i = 0; i < n; i++) {
61
+ if (IS_EOF_AFTER(i)) {
62
+ return false;
63
+ }
64
+ }
65
+
66
+ return true;
67
+ }
68
+
69
+ static void ignore (STATE)
70
+ {
71
+ while (!IS_EOF && IS_IGNORED(CURRENT)) {
72
+ SEEK(1);
73
+ }
74
+ }
75
+
76
+ static NodeType next_type (STATE)
77
+ {
78
+ if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
79
+ return NODE_NUMBER;
80
+ }
81
+
82
+ switch (CURRENT) {
83
+ case '^': return NODE_METADATA;
84
+ case 't': case 'f': return NODE_BOOLEAN;
85
+ case 'n': return NODE_NIL;
86
+ case '\\': return NODE_CHAR;
87
+ case ':': return NODE_KEYWORD;
88
+ case '"': return NODE_STRING;
89
+ case '{': return NODE_MAP;
90
+ case '(': return NODE_LIST;
91
+ case '[': return NODE_VECTOR;
92
+ }
93
+
94
+ if (CURRENT == '#') {
95
+ if (IS_EOF_AFTER(1)) {
96
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
97
+ }
98
+
99
+ switch (AFTER(1)) {
100
+ case 'i': return NODE_INSTANT;
101
+ case '{': return NODE_SET;
102
+ case '"': return NODE_REGEXP;
103
+ }
104
+ }
105
+
106
+ rb_raise(rb_eSyntaxError, "unknown type");
107
+ }
108
+
109
+ static VALUE read_metadata (STATE)
110
+ {
111
+ VALUE result;
112
+ VALUE* metadatas = NULL;
113
+ size_t length = 0;
114
+
115
+ while (CURRENT == '^') {
116
+ metadatas = realloc(metadatas, ++length * sizeof(VALUE));
117
+
118
+ SEEK(1);
119
+
120
+ metadatas[length - 1] = CALL(read_next);
121
+ }
122
+
123
+ result = CALL(read_next);
124
+
125
+ if (!rb_respond_to(result, rb_intern("metadata="))) {
126
+ free(metadatas);
127
+
128
+ rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
129
+ }
130
+
131
+ // FIXME: this could lead to a memleak if #metadata= raises
132
+ for (size_t i = 0; i < length; i++) {
133
+ rb_funcall(result, rb_intern("metadata="), 1, metadatas[i]);
134
+ }
135
+
136
+ free(metadatas);
137
+
138
+ return result;
139
+ }
140
+
141
+ static VALUE read_nil (STATE)
142
+ {
143
+ if (!IS_NOT_EOF_UP_TO(3)) {
144
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
145
+ }
146
+
147
+ if (!IS_EQUAL_UP_TO("nil", 3)) {
148
+ rb_raise(rb_eSyntaxError, "expected nil, got n%c%c", AFTER(1), AFTER(2));
149
+ }
150
+
151
+ SEEK(3);
152
+
153
+ return Qnil;
154
+ }
155
+
156
+ static VALUE read_boolean (STATE)
157
+ {
158
+ if (CURRENT == 't') {
159
+ if (!IS_NOT_EOF_UP_TO(4)) {
160
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
161
+ }
162
+
163
+ if (!IS_EQUAL_UP_TO("true", 4)) {
164
+ rb_raise(rb_eSyntaxError, "expected true, got t%c%c%c", AFTER(1), AFTER(2), AFTER(3));
165
+ }
166
+
167
+ SEEK(4);
168
+
169
+ return Qtrue;
170
+ }
171
+ else {
172
+ if (!IS_NOT_EOF_UP_TO(5)) {
173
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
174
+ }
175
+
176
+ if (!IS_EQUAL_UP_TO("false", 5)) {
177
+ rb_raise(rb_eSyntaxError, "expected false, got f%c%c%c%c", AFTER(1), AFTER(2), AFTER(3), AFTER(4));
178
+ }
179
+
180
+ SEEK(5);
181
+
182
+ return Qfalse;
183
+ }
184
+ }
185
+
186
+ static VALUE read_number (STATE)
187
+ {
188
+ size_t length = 0;
189
+ VALUE rbPiece;
190
+ char* cPiece;
191
+ char* tmp;
192
+
193
+ while (!IS_EOF_AFTER(length) && !IS_BOTH(AFTER(length))) {
194
+ length++;
195
+ }
196
+
197
+ SEEK(length);
198
+
199
+ rbPiece = rb_str_new(BEFORE_PTR(length), length);
200
+ cPiece = StringValueCStr(rbPiece);
201
+
202
+ if (strchr(cPiece, '/')) {
203
+ return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
204
+ }
205
+ else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
206
+ return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
207
+ rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
208
+ }
209
+ else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
210
+ if (cPiece[length - 1] == 'M') {
211
+ return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
212
+ }
213
+ else {
214
+ return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
215
+ }
216
+ }
217
+ else {
218
+ if (cPiece[length - 1] == 'N') {
219
+ rb_str_set_len(rbPiece, length - 1);
220
+ }
221
+
222
+ return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
223
+ }
224
+ }
225
+
226
+ static VALUE read_char (STATE)
227
+ {
228
+ SEEK(1);
229
+
230
+ if (IS_EOF_AFTER(1) || IS_BOTH(AFTER(1))) {
231
+ SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
232
+ }
233
+ else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && (IS_EOF_AFTER(7) || IS_BOTH(AFTER(7)))) {
234
+ SEEK(7); return rb_str_new2("\n");
235
+ }
236
+ else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
237
+ SEEK(5); return rb_str_new2(" ");
238
+ }
239
+ else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && (IS_EOF_AFTER(3) || IS_BOTH(AFTER(3)))) {
240
+ SEEK(3); return rb_str_new2("\t");
241
+ }
242
+ else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && (IS_EOF_AFTER(9) || IS_BOTH(AFTER(9)))) {
243
+ SEEK(9); return rb_str_new2("\b");
244
+ }
245
+ else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && (IS_EOF_AFTER(8) || IS_BOTH(AFTER(8)))) {
246
+ SEEK(8); return rb_str_new2("\f");
247
+ }
248
+ else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && (IS_EOF_AFTER(6) || IS_BOTH(AFTER(6)))) {
249
+ SEEK(6); return rb_str_new2("\r");
250
+ }
251
+ else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
252
+ SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
253
+ rb_intern("pack"), 1, rb_str_new2("U"));
254
+ }
255
+ else if (CURRENT == 'o') {
256
+ size_t length = 1;
257
+
258
+ for (size_t i = 1; i < 5; i++) {
259
+ if (IS_EOF_AFTER(i) || IS_BOTH(AFTER(i))) {
260
+ break;
261
+ }
262
+
263
+ length++;
264
+ }
265
+
266
+ if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && (IS_EOF_AFTER(length) || IS_BOTH(AFTER(length)))) {
267
+ SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
268
+ rb_intern("chr"), 0);
269
+ }
270
+ }
271
+
272
+ // TODO: add unicode and octal chars support
273
+
274
+ rb_raise(rb_eSyntaxError, "unknown character type");
275
+ }
276
+
277
+ static VALUE read_keyword (STATE)
278
+ {
279
+ size_t length = 0;
280
+
281
+ SEEK(1);
282
+
283
+ while (!IS_EOF_AFTER(length) && !IS_KEYWORD(AFTER(length))) {
284
+ length++;
285
+ }
286
+
287
+ SEEK(length);
288
+
289
+ return rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0);
290
+ }
291
+
292
+ static VALUE read_string (STATE)
293
+ {
294
+ size_t length = 0;
295
+
296
+ SEEK(1);
297
+
298
+ while (AFTER(length) != '"') {
299
+ if (IS_EOF_AFTER(length)) {
300
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
301
+ }
302
+
303
+ if (AFTER(length) == '\\') {
304
+ length++;
305
+ }
306
+
307
+ length++;
308
+ }
309
+
310
+ SEEK(length + 1);
311
+
312
+ // TODO: make the escapes work properly
313
+
314
+ return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
315
+ }
316
+
317
+ static VALUE read_regexp (STATE)
318
+ {
319
+ size_t length = 0;
320
+ VALUE args[] = { Qnil };
321
+
322
+ SEEK(2);
323
+
324
+ while (AFTER(length) != '"') {
325
+ if (IS_EOF_AFTER(length)) {
326
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
327
+ }
328
+
329
+ if (AFTER(length) == '\\') {
330
+ length++;
331
+ }
332
+
333
+ length++;
334
+ }
335
+
336
+ SEEK(length + 1);
337
+
338
+ args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
339
+
340
+ return rb_class_new_instance(1, args, rb_cRegexp);
341
+ }
342
+
343
+ static VALUE read_instant (STATE)
344
+ {
345
+ SEEK(1);
346
+
347
+ if (!IS_NOT_EOF_UP_TO(4)) {
348
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
349
+ }
350
+
351
+ if (!IS_EQUAL_UP_TO("inst", 4)) {
352
+ rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
353
+ }
354
+
355
+ SEEK(4);
356
+
357
+ CALL(ignore);
358
+
359
+ return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(read_string));
360
+ }
361
+
362
+ static VALUE read_list (STATE)
363
+ {
364
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
365
+
366
+ SEEK(1); CALL(ignore);
367
+
368
+ while (CURRENT != ')') {
369
+ rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
370
+
371
+ CALL(ignore);
372
+ }
373
+
374
+ SEEK(1);
375
+
376
+ return result;
377
+ }
378
+
379
+ static VALUE read_vector (STATE)
380
+ {
381
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
382
+
383
+ SEEK(1); CALL(ignore);
384
+
385
+ while (CURRENT != ']') {
386
+ rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
387
+
388
+ CALL(ignore);
389
+ }
390
+
391
+ SEEK(1);
392
+
393
+ return result;
394
+ }
395
+
396
+ static VALUE read_set (STATE)
397
+ {
398
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
399
+
400
+ SEEK(2); CALL(ignore);
401
+
402
+ while (CURRENT != '}') {
403
+ rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
404
+
405
+ CALL(ignore);
406
+ }
407
+
408
+ SEEK(1);
409
+
410
+ if (!NIL_P(rb_funcall(result, rb_intern("uniq!"), 0))) {
411
+ rb_raise(rb_eSyntaxError, "the set contains non unique values");
412
+ }
413
+
414
+ return result;
415
+ }
416
+
417
+ static VALUE read_map (STATE)
418
+ {
419
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
420
+ VALUE key;
421
+ VALUE value;
422
+
423
+ SEEK(1); CALL(ignore);
424
+
425
+ while (CURRENT != '}') {
426
+ key = CALL(read_next);
427
+ CALL(ignore);
428
+ value = CALL(read_next);
429
+
430
+ rb_funcall(result, rb_intern("[]="), 2, key, value);
431
+ }
432
+
433
+ SEEK(1);
434
+
435
+ return result;
436
+ }
437
+
438
+ static VALUE read_next (STATE)
439
+ {
440
+ CALL(ignore);
441
+
442
+ if (IS_EOF) {
443
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
444
+ }
445
+
446
+ switch (CALL(next_type)) {
447
+ case NODE_METADATA: return CALL(read_metadata);
448
+ case NODE_NUMBER: return CALL(read_number);
449
+ case NODE_BOOLEAN: return CALL(read_boolean);
450
+ case NODE_NIL: return CALL(read_nil);
451
+ case NODE_CHAR: return CALL(read_char);
452
+ case NODE_KEYWORD: return CALL(read_keyword);
453
+ case NODE_STRING: return CALL(read_string);
454
+ case NODE_MAP: return CALL(read_map);
455
+ case NODE_LIST: return CALL(read_list);
456
+ case NODE_VECTOR: return CALL(read_vector);
457
+ case NODE_INSTANT: return CALL(read_instant);
458
+ case NODE_SET: return CALL(read_set);
459
+ case NODE_REGEXP: return CALL(read_regexp);
460
+ }
461
+ }
42
462
 
43
463
  static VALUE t_init (int argc, VALUE* argv, VALUE self)
44
464
  {
@@ -96,14 +516,19 @@ static VALUE t_init (int argc, VALUE* argv, VALUE self)
96
516
 
97
517
  static VALUE t_parse (VALUE self)
98
518
  {
99
- VALUE source = rb_iv_get(self, "@source");
519
+ size_t position = 0;
520
+ VALUE source = rb_iv_get(self, "@source");
100
521
 
101
- if (rb_obj_is_kind_of(source, rb_cString)) {
102
- return string_parse(self);
103
- }
104
- else if (rb_obj_is_kind_of(source, rb_cIO)) {
105
- return io_parse(self);
522
+ if (!rb_obj_is_kind_of(source, rb_cString)) {
523
+ if (rb_obj_is_kind_of(source, rb_cIO)) {
524
+ source = rb_funcall(source, rb_intern("read"), 0);
525
+ }
526
+ else {
527
+ source = rb_funcall(source, rb_intern("to_str"), 0);
528
+ }
106
529
  }
530
+
531
+ return read_next(self, StringValueCStr(source), &position);
107
532
  }
108
533
 
109
534
  void
data/lib/clj.rb CHANGED
@@ -66,7 +66,6 @@ module Clojure
66
66
  bytes
67
67
  end
68
68
  }
69
-
70
69
  end
71
70
  end
72
71
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &12555040 !ruby/object:Gem::Requirement
16
+ requirement: &7244020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *12555040
24
+ version_requirements: *7244020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &12553860 !ruby/object:Gem::Requirement
27
+ requirement: &7243040 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *12553860
35
+ version_requirements: *7243040
36
36
  description:
37
37
  email: meh@paranoici.org
38
38
  executables: []
@@ -41,8 +41,6 @@ extensions:
41
41
  extra_rdoc_files: []
42
42
  files:
43
43
  - ext/clj/parser.c
44
- - ext/clj/io_parser.c
45
- - ext/clj/string_parser.c
46
44
  - ext/clj/extconf.rb
47
45
  - lib/clj.rb
48
46
  - lib/clj/types.rb
@@ -1,18 +0,0 @@
1
- /**
2
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
- * Version 2, December 2004
4
- *
5
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
- * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
- *
8
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
- **/
10
-
11
- #ifdef _INSIDE_PARSER
12
-
13
- static VALUE io_parse (VALUE self)
14
- {
15
- return Qnil;
16
- }
17
-
18
- #endif
@@ -1,460 +0,0 @@
1
- /**
2
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
- * Version 2, December 2004
4
- *
5
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
- * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
- *
8
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
- **/
10
-
11
- #ifdef _INSIDE_PARSER
12
- #define IS_EOF (string[*position] == '\0')
13
- #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
14
- #define CURRENT (string[*position])
15
- #define CURRENT_PTR (&string[*position])
16
- #define AFTER(n) (string[*position + (n)])
17
- #define AFTER_PTR(n) (&string[*position + (n)])
18
- #define BEFORE(n) (string[*position - (n)])
19
- #define BEFORE_PTR(n) (&string[*position - (n)])
20
- #define SEEK(n) (*position += (n))
21
- #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
22
- #define IS_BOTH(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
23
- #define IS_KEYWORD(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
24
- #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
25
- #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
26
- #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
27
- #define CALL(what) (what(self, string, position))
28
-
29
- static VALUE string_read_next (VALUE self, char* string, size_t* position);
30
-
31
- static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
32
- {
33
- for (size_t i = 0; i < n; i++) {
34
- if (IS_EOF_AFTER(i)) {
35
- return false;
36
- }
37
- }
38
-
39
- return true;
40
- }
41
-
42
- static void string_ignore (VALUE self, char* string, size_t* position)
43
- {
44
- while (!IS_EOF && IS_IGNORED(CURRENT)) {
45
- SEEK(1);
46
- }
47
- }
48
-
49
- static NodeType string_next_type (VALUE self, char* string, size_t* position)
50
- {
51
- if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
52
- return NODE_NUMBER;
53
- }
54
-
55
- switch (CURRENT) {
56
- case '^': return NODE_METADATA;
57
- case 't': case 'f': return NODE_BOOLEAN;
58
- case 'n': return NODE_NIL;
59
- case '\\': return NODE_CHAR;
60
- case ':': return NODE_KEYWORD;
61
- case '"': return NODE_STRING;
62
- case '{': return NODE_MAP;
63
- case '(': return NODE_LIST;
64
- case '[': return NODE_VECTOR;
65
- }
66
-
67
- if (CURRENT == '#') {
68
- if (IS_EOF_AFTER(1)) {
69
- rb_raise(rb_eSyntaxError, "unexpected EOF");
70
- }
71
-
72
- switch (AFTER(1)) {
73
- case 'i': return NODE_INSTANT;
74
- case '{': return NODE_SET;
75
- case '"': return NODE_REGEXP;
76
- }
77
- }
78
-
79
- rb_raise(rb_eSyntaxError, "unknown type");
80
- }
81
-
82
- static VALUE string_read_metadata (VALUE self, char* string, size_t* position)
83
- {
84
- VALUE result;
85
- VALUE* metadatas = NULL;
86
- size_t length = 0;
87
-
88
- while (CURRENT == '^') {
89
- metadatas = realloc(metadatas, ++length * sizeof(VALUE));
90
-
91
- SEEK(1);
92
-
93
- metadatas[length - 1] = CALL(string_read_next);
94
- }
95
-
96
- result = CALL(string_read_next);
97
-
98
- if (!rb_respond_to(result, rb_intern("metadata="))) {
99
- free(metadatas);
100
-
101
- rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
102
- }
103
-
104
- // FIXME: this could lead to a memleak if #metadata= raises
105
- for (size_t i = 0; i < length; i++) {
106
- rb_funcall(result, rb_intern("metadata="), 1, metadatas[i]);
107
- }
108
-
109
- free(metadatas);
110
-
111
- return result;
112
- }
113
-
114
- static VALUE string_read_nil (VALUE self, char* string, size_t* position)
115
- {
116
- if (!IS_NOT_EOF_UP_TO(3)) {
117
- rb_raise(rb_eSyntaxError, "unexpected EOF");
118
- }
119
-
120
- if (!IS_EQUAL_UP_TO("nil", 3)) {
121
- rb_raise(rb_eSyntaxError, "expected nil, got n%c%c", AFTER(1), AFTER(2));
122
- }
123
-
124
- SEEK(3);
125
-
126
- return Qnil;
127
- }
128
-
129
- static VALUE string_read_boolean (VALUE self, char* string, size_t* position)
130
- {
131
- if (CURRENT == 't') {
132
- if (!IS_NOT_EOF_UP_TO(4)) {
133
- rb_raise(rb_eSyntaxError, "unexpected EOF");
134
- }
135
-
136
- if (!IS_EQUAL_UP_TO("true", 4)) {
137
- rb_raise(rb_eSyntaxError, "expected true, got t%c%c%c", AFTER(1), AFTER(2), AFTER(3));
138
- }
139
-
140
- SEEK(4);
141
-
142
- return Qtrue;
143
- }
144
- else {
145
- if (!IS_NOT_EOF_UP_TO(5)) {
146
- rb_raise(rb_eSyntaxError, "unexpected EOF");
147
- }
148
-
149
- if (!IS_EQUAL_UP_TO("false", 5)) {
150
- rb_raise(rb_eSyntaxError, "expected false, got f%c%c%c%c", AFTER(1), AFTER(2), AFTER(3), AFTER(4));
151
- }
152
-
153
- SEEK(5);
154
-
155
- return Qfalse;
156
- }
157
- }
158
-
159
- static VALUE string_read_number (VALUE self, char* string, size_t* position)
160
- {
161
- size_t length = 0;
162
- VALUE rbPiece;
163
- char* cPiece;
164
- char* tmp;
165
-
166
- while (!IS_EOF_AFTER(length) && !IS_BOTH(AFTER(length))) {
167
- length++;
168
- }
169
-
170
- SEEK(length);
171
-
172
- rbPiece = rb_str_new(BEFORE_PTR(length), length);
173
- cPiece = StringValueCStr(rbPiece);
174
-
175
- if (strchr(cPiece, '/')) {
176
- return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
177
- }
178
- else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
179
- return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
180
- rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
181
- }
182
- else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
183
- if (cPiece[length - 1] == 'M') {
184
- return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
185
- }
186
- else {
187
- return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
188
- }
189
- }
190
- else {
191
- if (cPiece[length - 1] == 'N') {
192
- rb_str_set_len(rbPiece, length - 1);
193
- }
194
-
195
- return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
196
- }
197
- }
198
-
199
- static VALUE string_read_char (VALUE self, char* string, size_t* position)
200
- {
201
- SEEK(1);
202
-
203
- if (IS_EOF_AFTER(1) || IS_BOTH(AFTER(1))) {
204
- SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
205
- }
206
- else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && (IS_EOF_AFTER(7) || IS_BOTH(AFTER(7)))) {
207
- SEEK(7); return rb_str_new2("\n");
208
- }
209
- else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
210
- SEEK(5); return rb_str_new2(" ");
211
- }
212
- else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && (IS_EOF_AFTER(3) || IS_BOTH(AFTER(3)))) {
213
- SEEK(3); return rb_str_new2("\t");
214
- }
215
- else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && (IS_EOF_AFTER(9) || IS_BOTH(AFTER(9)))) {
216
- SEEK(9); return rb_str_new2("\b");
217
- }
218
- else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && (IS_EOF_AFTER(8) || IS_BOTH(AFTER(8)))) {
219
- SEEK(8); return rb_str_new2("\f");
220
- }
221
- else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && (IS_EOF_AFTER(6) || IS_BOTH(AFTER(6)))) {
222
- SEEK(6); return rb_str_new2("\r");
223
- }
224
- else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
225
- SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
226
- rb_intern("pack"), 1, rb_str_new2("U"));
227
- }
228
- else if (CURRENT == 'o') {
229
- size_t length = 1;
230
-
231
- for (size_t i = 1; i < 5; i++) {
232
- if (IS_EOF_AFTER(i) || IS_BOTH(AFTER(i))) {
233
- break;
234
- }
235
-
236
- length++;
237
- }
238
-
239
- if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && (IS_EOF_AFTER(length) || IS_BOTH(AFTER(length)))) {
240
- SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
241
- rb_intern("chr"), 0);
242
- }
243
- }
244
-
245
- // TODO: add unicode and octal chars support
246
-
247
- rb_raise(rb_eSyntaxError, "unknown character type");
248
- }
249
-
250
- static VALUE string_read_keyword (VALUE self, char* string, size_t* position)
251
- {
252
- size_t length = 0;
253
-
254
- SEEK(1);
255
-
256
- while (!IS_EOF_AFTER(length) && !IS_KEYWORD(AFTER(length))) {
257
- length++;
258
- }
259
-
260
- SEEK(length);
261
-
262
- return rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0);
263
- }
264
-
265
- static VALUE string_read_string (VALUE self, char* string, size_t* position)
266
- {
267
- size_t length = 0;
268
-
269
- SEEK(1);
270
-
271
- while (AFTER(length) != '"') {
272
- if (IS_EOF_AFTER(length)) {
273
- rb_raise(rb_eSyntaxError, "unexpected EOF");
274
- }
275
-
276
- if (AFTER(length) == '\\') {
277
- length++;
278
- }
279
-
280
- length++;
281
- }
282
-
283
- SEEK(length + 1);
284
-
285
- // TODO: make the escapes work properly
286
-
287
- return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
288
- }
289
-
290
- static VALUE string_read_regexp (VALUE self, char* string, size_t* position)
291
- {
292
- size_t length = 0;
293
- VALUE args[] = { Qnil };
294
-
295
- SEEK(2);
296
-
297
- while (AFTER(length) != '"') {
298
- if (IS_EOF_AFTER(length)) {
299
- rb_raise(rb_eSyntaxError, "unexpected EOF");
300
- }
301
-
302
- if (AFTER(length) == '\\') {
303
- length++;
304
- }
305
-
306
- length++;
307
- }
308
-
309
- SEEK(length + 1);
310
-
311
- args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
312
-
313
- return rb_class_new_instance(1, args, rb_cRegexp);
314
- }
315
-
316
- static VALUE string_read_instant (VALUE self, char* string, size_t* position)
317
- {
318
- SEEK(1);
319
-
320
- if (!IS_NOT_EOF_UP_TO(4)) {
321
- rb_raise(rb_eSyntaxError, "unexpected EOF");
322
- }
323
-
324
- if (!IS_EQUAL_UP_TO("inst", 4)) {
325
- rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
326
- }
327
-
328
- SEEK(4);
329
-
330
- CALL(string_ignore);
331
-
332
- return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(string_read_string));
333
- }
334
-
335
- static VALUE string_read_list (VALUE self, char* string, size_t* position)
336
- {
337
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
338
-
339
- SEEK(1); CALL(string_ignore);
340
-
341
- while (CURRENT != ')') {
342
- rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
343
-
344
- CALL(string_ignore);
345
- }
346
-
347
- SEEK(1);
348
-
349
- return result;
350
- }
351
-
352
- static VALUE string_read_vector (VALUE self, char* string, size_t* position)
353
- {
354
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
355
-
356
- SEEK(1); CALL(string_ignore);
357
-
358
- while (CURRENT != ']') {
359
- rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
360
-
361
- CALL(string_ignore);
362
- }
363
-
364
- SEEK(1);
365
-
366
- return result;
367
- }
368
-
369
- static VALUE string_read_set (VALUE self, char* string, size_t* position)
370
- {
371
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
372
-
373
- SEEK(2); CALL(string_ignore);
374
-
375
- while (CURRENT != '}') {
376
- rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
377
-
378
- CALL(string_ignore);
379
- }
380
-
381
- SEEK(1);
382
-
383
- if (!NIL_P(rb_funcall(result, rb_intern("uniq!"), 0))) {
384
- rb_raise(rb_eSyntaxError, "the set contains non unique values");
385
- }
386
-
387
- return result;
388
- }
389
-
390
- static VALUE string_read_map (VALUE self, char* string, size_t* position)
391
- {
392
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
393
- VALUE key;
394
- VALUE value;
395
-
396
- SEEK(1); CALL(string_ignore);
397
-
398
- while (CURRENT != '}') {
399
- key = CALL(string_read_next);
400
- CALL(string_ignore);
401
- value = CALL(string_read_next);
402
-
403
- rb_funcall(result, rb_intern("[]="), 2, key, value);
404
- }
405
-
406
- SEEK(1);
407
-
408
- return result;
409
- }
410
-
411
- static VALUE string_read_next (VALUE self, char* string, size_t* position)
412
- {
413
- CALL(string_ignore);
414
-
415
- if (IS_EOF) {
416
- rb_raise(rb_eSyntaxError, "unexpected EOF");
417
- }
418
-
419
- switch (CALL(string_next_type)) {
420
- case NODE_METADATA: return CALL(string_read_metadata);
421
- case NODE_NUMBER: return CALL(string_read_number);
422
- case NODE_BOOLEAN: return CALL(string_read_boolean);
423
- case NODE_NIL: return CALL(string_read_nil);
424
- case NODE_CHAR: return CALL(string_read_char);
425
- case NODE_KEYWORD: return CALL(string_read_keyword);
426
- case NODE_STRING: return CALL(string_read_string);
427
- case NODE_MAP: return CALL(string_read_map);
428
- case NODE_LIST: return CALL(string_read_list);
429
- case NODE_VECTOR: return CALL(string_read_vector);
430
- case NODE_INSTANT: return CALL(string_read_instant);
431
- case NODE_SET: return CALL(string_read_set);
432
- case NODE_REGEXP: return CALL(string_read_regexp);
433
- }
434
- }
435
-
436
- static VALUE string_parse (VALUE self)
437
- {
438
- size_t position = 0;
439
- VALUE source = rb_iv_get(self, "@source");
440
-
441
- return string_read_next(self, StringValueCStr(source), &position);
442
- }
443
-
444
- #undef IS_EOF
445
- #undef IS_EOF_AFTER
446
- #undef CURRENT
447
- #undef CURRENT_PTR
448
- #undef AFTER
449
- #undef AFTER_PTR
450
- #undef BEFORE
451
- #undef BEFORE_PTR
452
- #undef SEEK
453
- #undef IS_IGNORED
454
- #undef IS_BOTH
455
- #undef IS_KEYWORD
456
- #undef IS_NOT_EOF_UP_TO
457
- #undef IS_EQUAL_UP_TO
458
- #undef IS_EQUAL
459
- #undef CALL
460
- #endif