clj 0.0.6 → 0.0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,7 +19,6 @@ static VALUE cParser;
19
19
  static VALUE UNICODE_REGEX;
20
20
  static VALUE OCTAL_REGEX;
21
21
 
22
- #define _INSIDE_PARSER
23
22
  typedef enum {
24
23
  NODE_METADATA,
25
24
  NODE_NUMBER,
@@ -36,9 +35,430 @@ typedef enum {
36
35
  NODE_REGEXP
37
36
  } NodeType;
38
37
 
39
- #include "string_parser.c"
40
- #include "io_parser.c"
41
- #undef _INSIDE_PARSER
38
+ #define CALL(what) (what(self, string, position))
39
+ #define STATE VALUE self, char* string, size_t* position
40
+ #define IS_EOF (string[*position] == '\0')
41
+ #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
42
+ #define CURRENT (string[*position])
43
+ #define CURRENT_PTR (&string[*position])
44
+ #define AFTER(n) (string[*position + (n)])
45
+ #define AFTER_PTR(n) (&string[*position + (n)])
46
+ #define BEFORE(n) (string[*position - (n)])
47
+ #define BEFORE_PTR(n) (&string[*position - (n)])
48
+ #define SEEK(n) (*position += (n))
49
+ #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
50
+ #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
51
+ #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
52
+ #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
53
+ #define IS_BOTH(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
54
+ #define IS_KEYWORD(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
55
+
56
+ static VALUE read_next (VALUE self, char* string, size_t* position);
57
+
58
+ static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
59
+ {
60
+ for (size_t i = 0; i < n; i++) {
61
+ if (IS_EOF_AFTER(i)) {
62
+ return false;
63
+ }
64
+ }
65
+
66
+ return true;
67
+ }
68
+
69
+ static void ignore (STATE)
70
+ {
71
+ while (!IS_EOF && IS_IGNORED(CURRENT)) {
72
+ SEEK(1);
73
+ }
74
+ }
75
+
76
+ static NodeType next_type (STATE)
77
+ {
78
+ if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
79
+ return NODE_NUMBER;
80
+ }
81
+
82
+ switch (CURRENT) {
83
+ case '^': return NODE_METADATA;
84
+ case 't': case 'f': return NODE_BOOLEAN;
85
+ case 'n': return NODE_NIL;
86
+ case '\\': return NODE_CHAR;
87
+ case ':': return NODE_KEYWORD;
88
+ case '"': return NODE_STRING;
89
+ case '{': return NODE_MAP;
90
+ case '(': return NODE_LIST;
91
+ case '[': return NODE_VECTOR;
92
+ }
93
+
94
+ if (CURRENT == '#') {
95
+ if (IS_EOF_AFTER(1)) {
96
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
97
+ }
98
+
99
+ switch (AFTER(1)) {
100
+ case 'i': return NODE_INSTANT;
101
+ case '{': return NODE_SET;
102
+ case '"': return NODE_REGEXP;
103
+ }
104
+ }
105
+
106
+ rb_raise(rb_eSyntaxError, "unknown type");
107
+ }
108
+
109
+ static VALUE read_metadata (STATE)
110
+ {
111
+ VALUE result;
112
+ VALUE* metadatas = NULL;
113
+ size_t length = 0;
114
+
115
+ while (CURRENT == '^') {
116
+ metadatas = realloc(metadatas, ++length * sizeof(VALUE));
117
+
118
+ SEEK(1);
119
+
120
+ metadatas[length - 1] = CALL(read_next);
121
+ }
122
+
123
+ result = CALL(read_next);
124
+
125
+ if (!rb_respond_to(result, rb_intern("metadata="))) {
126
+ free(metadatas);
127
+
128
+ rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
129
+ }
130
+
131
+ // FIXME: this could lead to a memleak if #metadata= raises
132
+ for (size_t i = 0; i < length; i++) {
133
+ rb_funcall(result, rb_intern("metadata="), 1, metadatas[i]);
134
+ }
135
+
136
+ free(metadatas);
137
+
138
+ return result;
139
+ }
140
+
141
+ static VALUE read_nil (STATE)
142
+ {
143
+ if (!IS_NOT_EOF_UP_TO(3)) {
144
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
145
+ }
146
+
147
+ if (!IS_EQUAL_UP_TO("nil", 3)) {
148
+ rb_raise(rb_eSyntaxError, "expected nil, got n%c%c", AFTER(1), AFTER(2));
149
+ }
150
+
151
+ SEEK(3);
152
+
153
+ return Qnil;
154
+ }
155
+
156
+ static VALUE read_boolean (STATE)
157
+ {
158
+ if (CURRENT == 't') {
159
+ if (!IS_NOT_EOF_UP_TO(4)) {
160
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
161
+ }
162
+
163
+ if (!IS_EQUAL_UP_TO("true", 4)) {
164
+ rb_raise(rb_eSyntaxError, "expected true, got t%c%c%c", AFTER(1), AFTER(2), AFTER(3));
165
+ }
166
+
167
+ SEEK(4);
168
+
169
+ return Qtrue;
170
+ }
171
+ else {
172
+ if (!IS_NOT_EOF_UP_TO(5)) {
173
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
174
+ }
175
+
176
+ if (!IS_EQUAL_UP_TO("false", 5)) {
177
+ rb_raise(rb_eSyntaxError, "expected false, got f%c%c%c%c", AFTER(1), AFTER(2), AFTER(3), AFTER(4));
178
+ }
179
+
180
+ SEEK(5);
181
+
182
+ return Qfalse;
183
+ }
184
+ }
185
+
186
+ static VALUE read_number (STATE)
187
+ {
188
+ size_t length = 0;
189
+ VALUE rbPiece;
190
+ char* cPiece;
191
+ char* tmp;
192
+
193
+ while (!IS_EOF_AFTER(length) && !IS_BOTH(AFTER(length))) {
194
+ length++;
195
+ }
196
+
197
+ SEEK(length);
198
+
199
+ rbPiece = rb_str_new(BEFORE_PTR(length), length);
200
+ cPiece = StringValueCStr(rbPiece);
201
+
202
+ if (strchr(cPiece, '/')) {
203
+ return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
204
+ }
205
+ else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
206
+ return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
207
+ rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
208
+ }
209
+ else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
210
+ if (cPiece[length - 1] == 'M') {
211
+ return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
212
+ }
213
+ else {
214
+ return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
215
+ }
216
+ }
217
+ else {
218
+ if (cPiece[length - 1] == 'N') {
219
+ rb_str_set_len(rbPiece, length - 1);
220
+ }
221
+
222
+ return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
223
+ }
224
+ }
225
+
226
+ static VALUE read_char (STATE)
227
+ {
228
+ SEEK(1);
229
+
230
+ if (IS_EOF_AFTER(1) || IS_BOTH(AFTER(1))) {
231
+ SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
232
+ }
233
+ else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && (IS_EOF_AFTER(7) || IS_BOTH(AFTER(7)))) {
234
+ SEEK(7); return rb_str_new2("\n");
235
+ }
236
+ else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
237
+ SEEK(5); return rb_str_new2(" ");
238
+ }
239
+ else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && (IS_EOF_AFTER(3) || IS_BOTH(AFTER(3)))) {
240
+ SEEK(3); return rb_str_new2("\t");
241
+ }
242
+ else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && (IS_EOF_AFTER(9) || IS_BOTH(AFTER(9)))) {
243
+ SEEK(9); return rb_str_new2("\b");
244
+ }
245
+ else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && (IS_EOF_AFTER(8) || IS_BOTH(AFTER(8)))) {
246
+ SEEK(8); return rb_str_new2("\f");
247
+ }
248
+ else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && (IS_EOF_AFTER(6) || IS_BOTH(AFTER(6)))) {
249
+ SEEK(6); return rb_str_new2("\r");
250
+ }
251
+ else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
252
+ SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
253
+ rb_intern("pack"), 1, rb_str_new2("U"));
254
+ }
255
+ else if (CURRENT == 'o') {
256
+ size_t length = 1;
257
+
258
+ for (size_t i = 1; i < 5; i++) {
259
+ if (IS_EOF_AFTER(i) || IS_BOTH(AFTER(i))) {
260
+ break;
261
+ }
262
+
263
+ length++;
264
+ }
265
+
266
+ if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && (IS_EOF_AFTER(length) || IS_BOTH(AFTER(length)))) {
267
+ SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
268
+ rb_intern("chr"), 0);
269
+ }
270
+ }
271
+
272
+ // TODO: add unicode and octal chars support
273
+
274
+ rb_raise(rb_eSyntaxError, "unknown character type");
275
+ }
276
+
277
+ static VALUE read_keyword (STATE)
278
+ {
279
+ size_t length = 0;
280
+
281
+ SEEK(1);
282
+
283
+ while (!IS_EOF_AFTER(length) && !IS_KEYWORD(AFTER(length))) {
284
+ length++;
285
+ }
286
+
287
+ SEEK(length);
288
+
289
+ return rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0);
290
+ }
291
+
292
+ static VALUE read_string (STATE)
293
+ {
294
+ size_t length = 0;
295
+
296
+ SEEK(1);
297
+
298
+ while (AFTER(length) != '"') {
299
+ if (IS_EOF_AFTER(length)) {
300
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
301
+ }
302
+
303
+ if (AFTER(length) == '\\') {
304
+ length++;
305
+ }
306
+
307
+ length++;
308
+ }
309
+
310
+ SEEK(length + 1);
311
+
312
+ // TODO: make the escapes work properly
313
+
314
+ return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
315
+ }
316
+
317
+ static VALUE read_regexp (STATE)
318
+ {
319
+ size_t length = 0;
320
+ VALUE args[] = { Qnil };
321
+
322
+ SEEK(2);
323
+
324
+ while (AFTER(length) != '"') {
325
+ if (IS_EOF_AFTER(length)) {
326
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
327
+ }
328
+
329
+ if (AFTER(length) == '\\') {
330
+ length++;
331
+ }
332
+
333
+ length++;
334
+ }
335
+
336
+ SEEK(length + 1);
337
+
338
+ args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
339
+
340
+ return rb_class_new_instance(1, args, rb_cRegexp);
341
+ }
342
+
343
+ static VALUE read_instant (STATE)
344
+ {
345
+ SEEK(1);
346
+
347
+ if (!IS_NOT_EOF_UP_TO(4)) {
348
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
349
+ }
350
+
351
+ if (!IS_EQUAL_UP_TO("inst", 4)) {
352
+ rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
353
+ }
354
+
355
+ SEEK(4);
356
+
357
+ CALL(ignore);
358
+
359
+ return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(read_string));
360
+ }
361
+
362
+ static VALUE read_list (STATE)
363
+ {
364
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
365
+
366
+ SEEK(1); CALL(ignore);
367
+
368
+ while (CURRENT != ')') {
369
+ rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
370
+
371
+ CALL(ignore);
372
+ }
373
+
374
+ SEEK(1);
375
+
376
+ return result;
377
+ }
378
+
379
+ static VALUE read_vector (STATE)
380
+ {
381
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
382
+
383
+ SEEK(1); CALL(ignore);
384
+
385
+ while (CURRENT != ']') {
386
+ rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
387
+
388
+ CALL(ignore);
389
+ }
390
+
391
+ SEEK(1);
392
+
393
+ return result;
394
+ }
395
+
396
+ static VALUE read_set (STATE)
397
+ {
398
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
399
+
400
+ SEEK(2); CALL(ignore);
401
+
402
+ while (CURRENT != '}') {
403
+ rb_funcall(result, rb_intern("<<"), 1, CALL(read_next));
404
+
405
+ CALL(ignore);
406
+ }
407
+
408
+ SEEK(1);
409
+
410
+ if (!NIL_P(rb_funcall(result, rb_intern("uniq!"), 0))) {
411
+ rb_raise(rb_eSyntaxError, "the set contains non unique values");
412
+ }
413
+
414
+ return result;
415
+ }
416
+
417
+ static VALUE read_map (STATE)
418
+ {
419
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
420
+ VALUE key;
421
+ VALUE value;
422
+
423
+ SEEK(1); CALL(ignore);
424
+
425
+ while (CURRENT != '}') {
426
+ key = CALL(read_next);
427
+ CALL(ignore);
428
+ value = CALL(read_next);
429
+
430
+ rb_funcall(result, rb_intern("[]="), 2, key, value);
431
+ }
432
+
433
+ SEEK(1);
434
+
435
+ return result;
436
+ }
437
+
438
+ static VALUE read_next (STATE)
439
+ {
440
+ CALL(ignore);
441
+
442
+ if (IS_EOF) {
443
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
444
+ }
445
+
446
+ switch (CALL(next_type)) {
447
+ case NODE_METADATA: return CALL(read_metadata);
448
+ case NODE_NUMBER: return CALL(read_number);
449
+ case NODE_BOOLEAN: return CALL(read_boolean);
450
+ case NODE_NIL: return CALL(read_nil);
451
+ case NODE_CHAR: return CALL(read_char);
452
+ case NODE_KEYWORD: return CALL(read_keyword);
453
+ case NODE_STRING: return CALL(read_string);
454
+ case NODE_MAP: return CALL(read_map);
455
+ case NODE_LIST: return CALL(read_list);
456
+ case NODE_VECTOR: return CALL(read_vector);
457
+ case NODE_INSTANT: return CALL(read_instant);
458
+ case NODE_SET: return CALL(read_set);
459
+ case NODE_REGEXP: return CALL(read_regexp);
460
+ }
461
+ }
42
462
 
43
463
  static VALUE t_init (int argc, VALUE* argv, VALUE self)
44
464
  {
@@ -96,14 +516,19 @@ static VALUE t_init (int argc, VALUE* argv, VALUE self)
96
516
 
97
517
  static VALUE t_parse (VALUE self)
98
518
  {
99
- VALUE source = rb_iv_get(self, "@source");
519
+ size_t position = 0;
520
+ VALUE source = rb_iv_get(self, "@source");
100
521
 
101
- if (rb_obj_is_kind_of(source, rb_cString)) {
102
- return string_parse(self);
103
- }
104
- else if (rb_obj_is_kind_of(source, rb_cIO)) {
105
- return io_parse(self);
522
+ if (!rb_obj_is_kind_of(source, rb_cString)) {
523
+ if (rb_obj_is_kind_of(source, rb_cIO)) {
524
+ source = rb_funcall(source, rb_intern("read"), 0);
525
+ }
526
+ else {
527
+ source = rb_funcall(source, rb_intern("to_str"), 0);
528
+ }
106
529
  }
530
+
531
+ return read_next(self, StringValueCStr(source), &position);
107
532
  }
108
533
 
109
534
  void
data/lib/clj.rb CHANGED
@@ -66,7 +66,6 @@ module Clojure
66
66
  bytes
67
67
  end
68
68
  }
69
-
70
69
  end
71
70
  end
72
71
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &12555040 !ruby/object:Gem::Requirement
16
+ requirement: &7244020 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *12555040
24
+ version_requirements: *7244020
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &12553860 !ruby/object:Gem::Requirement
27
+ requirement: &7243040 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *12553860
35
+ version_requirements: *7243040
36
36
  description:
37
37
  email: meh@paranoici.org
38
38
  executables: []
@@ -41,8 +41,6 @@ extensions:
41
41
  extra_rdoc_files: []
42
42
  files:
43
43
  - ext/clj/parser.c
44
- - ext/clj/io_parser.c
45
- - ext/clj/string_parser.c
46
44
  - ext/clj/extconf.rb
47
45
  - lib/clj.rb
48
46
  - lib/clj/types.rb
@@ -1,18 +0,0 @@
1
- /**
2
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
- * Version 2, December 2004
4
- *
5
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
- * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
- *
8
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
- **/
10
-
11
- #ifdef _INSIDE_PARSER
12
-
13
- static VALUE io_parse (VALUE self)
14
- {
15
- return Qnil;
16
- }
17
-
18
- #endif
@@ -1,460 +0,0 @@
1
- /**
2
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
- * Version 2, December 2004
4
- *
5
- * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
- * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
- *
8
- * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
- **/
10
-
11
- #ifdef _INSIDE_PARSER
12
- #define IS_EOF (string[*position] == '\0')
13
- #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
14
- #define CURRENT (string[*position])
15
- #define CURRENT_PTR (&string[*position])
16
- #define AFTER(n) (string[*position + (n)])
17
- #define AFTER_PTR(n) (&string[*position + (n)])
18
- #define BEFORE(n) (string[*position - (n)])
19
- #define BEFORE_PTR(n) (&string[*position - (n)])
20
- #define SEEK(n) (*position += (n))
21
- #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
22
- #define IS_BOTH(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
23
- #define IS_KEYWORD(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
24
- #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
25
- #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
26
- #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
27
- #define CALL(what) (what(self, string, position))
28
-
29
- static VALUE string_read_next (VALUE self, char* string, size_t* position);
30
-
31
- static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
32
- {
33
- for (size_t i = 0; i < n; i++) {
34
- if (IS_EOF_AFTER(i)) {
35
- return false;
36
- }
37
- }
38
-
39
- return true;
40
- }
41
-
42
- static void string_ignore (VALUE self, char* string, size_t* position)
43
- {
44
- while (!IS_EOF && IS_IGNORED(CURRENT)) {
45
- SEEK(1);
46
- }
47
- }
48
-
49
- static NodeType string_next_type (VALUE self, char* string, size_t* position)
50
- {
51
- if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
52
- return NODE_NUMBER;
53
- }
54
-
55
- switch (CURRENT) {
56
- case '^': return NODE_METADATA;
57
- case 't': case 'f': return NODE_BOOLEAN;
58
- case 'n': return NODE_NIL;
59
- case '\\': return NODE_CHAR;
60
- case ':': return NODE_KEYWORD;
61
- case '"': return NODE_STRING;
62
- case '{': return NODE_MAP;
63
- case '(': return NODE_LIST;
64
- case '[': return NODE_VECTOR;
65
- }
66
-
67
- if (CURRENT == '#') {
68
- if (IS_EOF_AFTER(1)) {
69
- rb_raise(rb_eSyntaxError, "unexpected EOF");
70
- }
71
-
72
- switch (AFTER(1)) {
73
- case 'i': return NODE_INSTANT;
74
- case '{': return NODE_SET;
75
- case '"': return NODE_REGEXP;
76
- }
77
- }
78
-
79
- rb_raise(rb_eSyntaxError, "unknown type");
80
- }
81
-
82
- static VALUE string_read_metadata (VALUE self, char* string, size_t* position)
83
- {
84
- VALUE result;
85
- VALUE* metadatas = NULL;
86
- size_t length = 0;
87
-
88
- while (CURRENT == '^') {
89
- metadatas = realloc(metadatas, ++length * sizeof(VALUE));
90
-
91
- SEEK(1);
92
-
93
- metadatas[length - 1] = CALL(string_read_next);
94
- }
95
-
96
- result = CALL(string_read_next);
97
-
98
- if (!rb_respond_to(result, rb_intern("metadata="))) {
99
- free(metadatas);
100
-
101
- rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
102
- }
103
-
104
- // FIXME: this could lead to a memleak if #metadata= raises
105
- for (size_t i = 0; i < length; i++) {
106
- rb_funcall(result, rb_intern("metadata="), 1, metadatas[i]);
107
- }
108
-
109
- free(metadatas);
110
-
111
- return result;
112
- }
113
-
114
- static VALUE string_read_nil (VALUE self, char* string, size_t* position)
115
- {
116
- if (!IS_NOT_EOF_UP_TO(3)) {
117
- rb_raise(rb_eSyntaxError, "unexpected EOF");
118
- }
119
-
120
- if (!IS_EQUAL_UP_TO("nil", 3)) {
121
- rb_raise(rb_eSyntaxError, "expected nil, got n%c%c", AFTER(1), AFTER(2));
122
- }
123
-
124
- SEEK(3);
125
-
126
- return Qnil;
127
- }
128
-
129
- static VALUE string_read_boolean (VALUE self, char* string, size_t* position)
130
- {
131
- if (CURRENT == 't') {
132
- if (!IS_NOT_EOF_UP_TO(4)) {
133
- rb_raise(rb_eSyntaxError, "unexpected EOF");
134
- }
135
-
136
- if (!IS_EQUAL_UP_TO("true", 4)) {
137
- rb_raise(rb_eSyntaxError, "expected true, got t%c%c%c", AFTER(1), AFTER(2), AFTER(3));
138
- }
139
-
140
- SEEK(4);
141
-
142
- return Qtrue;
143
- }
144
- else {
145
- if (!IS_NOT_EOF_UP_TO(5)) {
146
- rb_raise(rb_eSyntaxError, "unexpected EOF");
147
- }
148
-
149
- if (!IS_EQUAL_UP_TO("false", 5)) {
150
- rb_raise(rb_eSyntaxError, "expected false, got f%c%c%c%c", AFTER(1), AFTER(2), AFTER(3), AFTER(4));
151
- }
152
-
153
- SEEK(5);
154
-
155
- return Qfalse;
156
- }
157
- }
158
-
159
- static VALUE string_read_number (VALUE self, char* string, size_t* position)
160
- {
161
- size_t length = 0;
162
- VALUE rbPiece;
163
- char* cPiece;
164
- char* tmp;
165
-
166
- while (!IS_EOF_AFTER(length) && !IS_BOTH(AFTER(length))) {
167
- length++;
168
- }
169
-
170
- SEEK(length);
171
-
172
- rbPiece = rb_str_new(BEFORE_PTR(length), length);
173
- cPiece = StringValueCStr(rbPiece);
174
-
175
- if (strchr(cPiece, '/')) {
176
- return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
177
- }
178
- else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
179
- return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
180
- rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
181
- }
182
- else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
183
- if (cPiece[length - 1] == 'M') {
184
- return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
185
- }
186
- else {
187
- return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
188
- }
189
- }
190
- else {
191
- if (cPiece[length - 1] == 'N') {
192
- rb_str_set_len(rbPiece, length - 1);
193
- }
194
-
195
- return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
196
- }
197
- }
198
-
199
- static VALUE string_read_char (VALUE self, char* string, size_t* position)
200
- {
201
- SEEK(1);
202
-
203
- if (IS_EOF_AFTER(1) || IS_BOTH(AFTER(1))) {
204
- SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
205
- }
206
- else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && (IS_EOF_AFTER(7) || IS_BOTH(AFTER(7)))) {
207
- SEEK(7); return rb_str_new2("\n");
208
- }
209
- else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
210
- SEEK(5); return rb_str_new2(" ");
211
- }
212
- else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && (IS_EOF_AFTER(3) || IS_BOTH(AFTER(3)))) {
213
- SEEK(3); return rb_str_new2("\t");
214
- }
215
- else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && (IS_EOF_AFTER(9) || IS_BOTH(AFTER(9)))) {
216
- SEEK(9); return rb_str_new2("\b");
217
- }
218
- else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && (IS_EOF_AFTER(8) || IS_BOTH(AFTER(8)))) {
219
- SEEK(8); return rb_str_new2("\f");
220
- }
221
- else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && (IS_EOF_AFTER(6) || IS_BOTH(AFTER(6)))) {
222
- SEEK(6); return rb_str_new2("\r");
223
- }
224
- else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
225
- SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
226
- rb_intern("pack"), 1, rb_str_new2("U"));
227
- }
228
- else if (CURRENT == 'o') {
229
- size_t length = 1;
230
-
231
- for (size_t i = 1; i < 5; i++) {
232
- if (IS_EOF_AFTER(i) || IS_BOTH(AFTER(i))) {
233
- break;
234
- }
235
-
236
- length++;
237
- }
238
-
239
- if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && (IS_EOF_AFTER(length) || IS_BOTH(AFTER(length)))) {
240
- SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
241
- rb_intern("chr"), 0);
242
- }
243
- }
244
-
245
- // TODO: add unicode and octal chars support
246
-
247
- rb_raise(rb_eSyntaxError, "unknown character type");
248
- }
249
-
250
- static VALUE string_read_keyword (VALUE self, char* string, size_t* position)
251
- {
252
- size_t length = 0;
253
-
254
- SEEK(1);
255
-
256
- while (!IS_EOF_AFTER(length) && !IS_KEYWORD(AFTER(length))) {
257
- length++;
258
- }
259
-
260
- SEEK(length);
261
-
262
- return rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0);
263
- }
264
-
265
- static VALUE string_read_string (VALUE self, char* string, size_t* position)
266
- {
267
- size_t length = 0;
268
-
269
- SEEK(1);
270
-
271
- while (AFTER(length) != '"') {
272
- if (IS_EOF_AFTER(length)) {
273
- rb_raise(rb_eSyntaxError, "unexpected EOF");
274
- }
275
-
276
- if (AFTER(length) == '\\') {
277
- length++;
278
- }
279
-
280
- length++;
281
- }
282
-
283
- SEEK(length + 1);
284
-
285
- // TODO: make the escapes work properly
286
-
287
- return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
288
- }
289
-
290
- static VALUE string_read_regexp (VALUE self, char* string, size_t* position)
291
- {
292
- size_t length = 0;
293
- VALUE args[] = { Qnil };
294
-
295
- SEEK(2);
296
-
297
- while (AFTER(length) != '"') {
298
- if (IS_EOF_AFTER(length)) {
299
- rb_raise(rb_eSyntaxError, "unexpected EOF");
300
- }
301
-
302
- if (AFTER(length) == '\\') {
303
- length++;
304
- }
305
-
306
- length++;
307
- }
308
-
309
- SEEK(length + 1);
310
-
311
- args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
312
-
313
- return rb_class_new_instance(1, args, rb_cRegexp);
314
- }
315
-
316
- static VALUE string_read_instant (VALUE self, char* string, size_t* position)
317
- {
318
- SEEK(1);
319
-
320
- if (!IS_NOT_EOF_UP_TO(4)) {
321
- rb_raise(rb_eSyntaxError, "unexpected EOF");
322
- }
323
-
324
- if (!IS_EQUAL_UP_TO("inst", 4)) {
325
- rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
326
- }
327
-
328
- SEEK(4);
329
-
330
- CALL(string_ignore);
331
-
332
- return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(string_read_string));
333
- }
334
-
335
- static VALUE string_read_list (VALUE self, char* string, size_t* position)
336
- {
337
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
338
-
339
- SEEK(1); CALL(string_ignore);
340
-
341
- while (CURRENT != ')') {
342
- rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
343
-
344
- CALL(string_ignore);
345
- }
346
-
347
- SEEK(1);
348
-
349
- return result;
350
- }
351
-
352
- static VALUE string_read_vector (VALUE self, char* string, size_t* position)
353
- {
354
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
355
-
356
- SEEK(1); CALL(string_ignore);
357
-
358
- while (CURRENT != ']') {
359
- rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
360
-
361
- CALL(string_ignore);
362
- }
363
-
364
- SEEK(1);
365
-
366
- return result;
367
- }
368
-
369
- static VALUE string_read_set (VALUE self, char* string, size_t* position)
370
- {
371
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
372
-
373
- SEEK(2); CALL(string_ignore);
374
-
375
- while (CURRENT != '}') {
376
- rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
377
-
378
- CALL(string_ignore);
379
- }
380
-
381
- SEEK(1);
382
-
383
- if (!NIL_P(rb_funcall(result, rb_intern("uniq!"), 0))) {
384
- rb_raise(rb_eSyntaxError, "the set contains non unique values");
385
- }
386
-
387
- return result;
388
- }
389
-
390
- static VALUE string_read_map (VALUE self, char* string, size_t* position)
391
- {
392
- VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
393
- VALUE key;
394
- VALUE value;
395
-
396
- SEEK(1); CALL(string_ignore);
397
-
398
- while (CURRENT != '}') {
399
- key = CALL(string_read_next);
400
- CALL(string_ignore);
401
- value = CALL(string_read_next);
402
-
403
- rb_funcall(result, rb_intern("[]="), 2, key, value);
404
- }
405
-
406
- SEEK(1);
407
-
408
- return result;
409
- }
410
-
411
- static VALUE string_read_next (VALUE self, char* string, size_t* position)
412
- {
413
- CALL(string_ignore);
414
-
415
- if (IS_EOF) {
416
- rb_raise(rb_eSyntaxError, "unexpected EOF");
417
- }
418
-
419
- switch (CALL(string_next_type)) {
420
- case NODE_METADATA: return CALL(string_read_metadata);
421
- case NODE_NUMBER: return CALL(string_read_number);
422
- case NODE_BOOLEAN: return CALL(string_read_boolean);
423
- case NODE_NIL: return CALL(string_read_nil);
424
- case NODE_CHAR: return CALL(string_read_char);
425
- case NODE_KEYWORD: return CALL(string_read_keyword);
426
- case NODE_STRING: return CALL(string_read_string);
427
- case NODE_MAP: return CALL(string_read_map);
428
- case NODE_LIST: return CALL(string_read_list);
429
- case NODE_VECTOR: return CALL(string_read_vector);
430
- case NODE_INSTANT: return CALL(string_read_instant);
431
- case NODE_SET: return CALL(string_read_set);
432
- case NODE_REGEXP: return CALL(string_read_regexp);
433
- }
434
- }
435
-
436
- static VALUE string_parse (VALUE self)
437
- {
438
- size_t position = 0;
439
- VALUE source = rb_iv_get(self, "@source");
440
-
441
- return string_read_next(self, StringValueCStr(source), &position);
442
- }
443
-
444
- #undef IS_EOF
445
- #undef IS_EOF_AFTER
446
- #undef CURRENT
447
- #undef CURRENT_PTR
448
- #undef AFTER
449
- #undef AFTER_PTR
450
- #undef BEFORE
451
- #undef BEFORE_PTR
452
- #undef SEEK
453
- #undef IS_IGNORED
454
- #undef IS_BOTH
455
- #undef IS_KEYWORD
456
- #undef IS_NOT_EOF_UP_TO
457
- #undef IS_EQUAL_UP_TO
458
- #undef IS_EQUAL
459
- #undef CALL
460
- #endif