clj 0.0.5.6 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,15 @@
1
+ require 'mkmf'
2
+
3
+ unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
4
+ $CFLAGS << ' -O3'
5
+ end
6
+
7
+ if CONFIG['CC'] =~ /gcc/
8
+ $CFLAGS << ' -Wall' << ' -std=c99'
9
+
10
+ if $DEBUG && !$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
11
+ $CFLAGS << ' -O0 -ggdb'
12
+ end
13
+ end
14
+
15
+ create_makefile 'clj/parser_ext'
@@ -0,0 +1,18 @@
1
+ /**
2
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
+ * Version 2, December 2004
4
+ *
5
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
+ * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
+ *
8
+ * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
+ **/
10
+
11
+ #ifdef _INSIDE_PARSER
12
+
13
+ static VALUE io_parse (VALUE self)
14
+ {
15
+ return Qnil;
16
+ }
17
+
18
+ #endif
@@ -0,0 +1,127 @@
1
+ /**
2
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
+ * Version 2, December 2004
4
+ *
5
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
+ * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
+ *
8
+ * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
+ **/
10
+
11
+ #include <stdbool.h>
12
+ #include <ctype.h>
13
+
14
+ #include "ruby.h"
15
+
16
+ static VALUE cClojure;
17
+ static VALUE cParser;
18
+
19
+ static VALUE UNICODE_REGEX;
20
+ static VALUE OCTAL_REGEX;
21
+
22
+ #define _INSIDE_PARSER
23
+ typedef enum {
24
+ NODE_METADATA,
25
+ NODE_NUMBER,
26
+ NODE_BOOLEAN,
27
+ NODE_NIL,
28
+ NODE_CHAR,
29
+ NODE_KEYWORD,
30
+ NODE_STRING,
31
+ NODE_MAP,
32
+ NODE_LIST,
33
+ NODE_VECTOR,
34
+ NODE_INSTANT,
35
+ NODE_SET,
36
+ NODE_REGEXP
37
+ } NodeType;
38
+
39
+ #include "string_parser.c"
40
+ #include "io_parser.c"
41
+ #undef _INSIDE_PARSER
42
+
43
+ static VALUE t_init (int argc, VALUE* argv, VALUE self)
44
+ {
45
+ VALUE tmp;
46
+ VALUE source;
47
+ VALUE options;
48
+
49
+ if (argc < 1) {
50
+ rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)");
51
+ }
52
+ else if (argc > 2) {
53
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
54
+ }
55
+
56
+ if (!rb_obj_is_kind_of(argv[0], rb_cString) && !rb_obj_is_kind_of(argv[0], rb_cIO)) {
57
+ rb_raise(rb_eArgError, "you have to pass a String or an IO");
58
+ }
59
+
60
+ source = argv[0];
61
+ options = argc == 2 ? argv[1] : rb_hash_new();
62
+
63
+ rb_iv_set(self, "@source", source);
64
+ rb_iv_set(self, "@options", options);
65
+
66
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("map_class")))) {
67
+ rb_iv_set(self, "@map_class", tmp);
68
+ }
69
+ else {
70
+ rb_iv_set(self, "@map_class", rb_const_get(cClojure, rb_intern("Map")));
71
+ }
72
+
73
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("vector_class")))) {
74
+ rb_iv_set(self, "@vector_class", tmp);
75
+ }
76
+ else {
77
+ rb_iv_set(self, "@vector_class", rb_const_get(cClojure, rb_intern("Vector")));
78
+ }
79
+
80
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("list_class")))) {
81
+ rb_iv_set(self, "@list_class", tmp);
82
+ }
83
+ else {
84
+ rb_iv_set(self, "@list_class", rb_const_get(cClojure, rb_intern("Vector")));
85
+ }
86
+
87
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("set_class")))) {
88
+ rb_iv_set(self, "@set_class", tmp);
89
+ }
90
+ else {
91
+ rb_iv_set(self, "@set_class", rb_const_get(cClojure, rb_intern("Vector")));
92
+ }
93
+
94
+ return self;
95
+ }
96
+
97
+ static VALUE t_parse (VALUE self)
98
+ {
99
+ VALUE source = rb_iv_get(self, "@source");
100
+
101
+ if (rb_obj_is_kind_of(source, rb_cString)) {
102
+ return string_parse(self);
103
+ }
104
+ else if (rb_obj_is_kind_of(source, rb_cIO)) {
105
+ return io_parse(self);
106
+ }
107
+ }
108
+
109
+ void
110
+ Init_parser_ext (void)
111
+ {
112
+ cClojure = rb_const_get(rb_cObject, rb_intern("Clojure"));
113
+ cParser = rb_define_class_under(cClojure, "Parser", rb_cObject);
114
+
115
+ rb_define_method(cParser, "initialize", t_init, -1);
116
+ rb_define_method(cParser, "parse", t_parse, 0);
117
+
118
+ VALUE args[] = { Qnil };
119
+
120
+ args[0] = rb_str_new2("[0-9|a-f|A-F]{4}");
121
+ UNICODE_REGEX = rb_class_new_instance(1, args, rb_cRegexp);
122
+ rb_define_const(cClojure, "UNICODE_REGEX", UNICODE_REGEX);
123
+
124
+ args[0] = rb_str_new2("[0-3]?[0-7]?[0-7]");
125
+ OCTAL_REGEX = rb_class_new_instance(1, args, rb_cRegexp);
126
+ rb_define_const(cClojure, "OCTAL_REGEX", OCTAL_REGEX);
127
+ }
@@ -0,0 +1,460 @@
1
+ /**
2
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
+ * Version 2, December 2004
4
+ *
5
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
+ * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
+ *
8
+ * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
+ **/
10
+
11
+ #ifdef _INSIDE_PARSER
12
+ #define IS_EOF (string[*position] == '\0')
13
+ #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
14
+ #define CURRENT (string[*position])
15
+ #define CURRENT_PTR (&string[*position])
16
+ #define AFTER(n) (string[*position + (n)])
17
+ #define AFTER_PTR(n) (&string[*position + (n)])
18
+ #define BEFORE(n) (string[*position - (n)])
19
+ #define BEFORE_PTR(n) (&string[*position - (n)])
20
+ #define SEEK(n) (*position += (n))
21
+ #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
22
+ #define IS_BOTH(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
23
+ #define IS_KEYWORD(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
24
+ #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
25
+ #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
26
+ #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
27
+ #define CALL(what) (what(self, string, position))
28
+
29
+ static VALUE string_read_next (VALUE self, char* string, size_t* position);
30
+
31
+ static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
32
+ {
33
+ for (size_t i = 0; i < n; i++) {
34
+ if (IS_EOF_AFTER(i)) {
35
+ return false;
36
+ }
37
+ }
38
+
39
+ return true;
40
+ }
41
+
42
+ static void string_ignore (VALUE self, char* string, size_t* position)
43
+ {
44
+ while (!IS_EOF && IS_IGNORED(CURRENT)) {
45
+ SEEK(1);
46
+ }
47
+ }
48
+
49
+ static NodeType string_next_type (VALUE self, char* string, size_t* position)
50
+ {
51
+ if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
52
+ return NODE_NUMBER;
53
+ }
54
+
55
+ switch (CURRENT) {
56
+ case '^': return NODE_METADATA;
57
+ case 't': case 'f': return NODE_BOOLEAN;
58
+ case 'n': return NODE_NIL;
59
+ case '\\': return NODE_CHAR;
60
+ case ':': return NODE_KEYWORD;
61
+ case '"': return NODE_STRING;
62
+ case '{': return NODE_MAP;
63
+ case '(': return NODE_LIST;
64
+ case '[': return NODE_VECTOR;
65
+ }
66
+
67
+ if (CURRENT == '#') {
68
+ if (IS_EOF_AFTER(1)) {
69
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
70
+ }
71
+
72
+ switch (AFTER(1)) {
73
+ case 'i': return NODE_INSTANT;
74
+ case '{': return NODE_SET;
75
+ case '"': return NODE_REGEXP;
76
+ }
77
+ }
78
+
79
+ rb_raise(rb_eSyntaxError, "unknown type");
80
+ }
81
+
82
+ static VALUE string_read_metadata (VALUE self, char* string, size_t* position)
83
+ {
84
+ VALUE result;
85
+ VALUE* metadatas = NULL;
86
+ size_t length = 0;
87
+
88
+ while (CURRENT == '^') {
89
+ metadatas = realloc(metadatas, ++length * sizeof(VALUE));
90
+
91
+ SEEK(1);
92
+
93
+ metadatas[length - 1] = CALL(string_read_next);
94
+ }
95
+
96
+ result = CALL(string_read_next);
97
+
98
+ if (!rb_respond_to(result, rb_intern("metadata="))) {
99
+ free(metadatas);
100
+
101
+ rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
102
+ }
103
+
104
+ // FIXME: this could lead to a memleak if #metadata= raises
105
+ for (size_t i = 0; i < length; i++) {
106
+ rb_funcall(result, rb_intern("metadata="), 1, metadatas[i]);
107
+ }
108
+
109
+ free(metadatas);
110
+
111
+ return result;
112
+ }
113
+
114
+ static VALUE string_read_nil (VALUE self, char* string, size_t* position)
115
+ {
116
+ if (!IS_NOT_EOF_UP_TO(3)) {
117
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
118
+ }
119
+
120
+ if (!IS_EQUAL_UP_TO("nil", 3)) {
121
+ rb_raise(rb_eSyntaxError, "expected nil, got n%c%c", AFTER(1), AFTER(2));
122
+ }
123
+
124
+ SEEK(3);
125
+
126
+ return Qnil;
127
+ }
128
+
129
+ static VALUE string_read_boolean (VALUE self, char* string, size_t* position)
130
+ {
131
+ if (CURRENT == 't') {
132
+ if (!IS_NOT_EOF_UP_TO(4)) {
133
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
134
+ }
135
+
136
+ if (!IS_EQUAL_UP_TO("true", 4)) {
137
+ rb_raise(rb_eSyntaxError, "expected true, got t%c%c%c", AFTER(1), AFTER(2), AFTER(3));
138
+ }
139
+
140
+ SEEK(4);
141
+
142
+ return Qtrue;
143
+ }
144
+ else {
145
+ if (!IS_NOT_EOF_UP_TO(5)) {
146
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
147
+ }
148
+
149
+ if (!IS_EQUAL_UP_TO("false", 5)) {
150
+ rb_raise(rb_eSyntaxError, "expected false, got f%c%c%c%c", AFTER(1), AFTER(2), AFTER(3), AFTER(4));
151
+ }
152
+
153
+ SEEK(5);
154
+
155
+ return Qfalse;
156
+ }
157
+ }
158
+
159
+ static VALUE string_read_number (VALUE self, char* string, size_t* position)
160
+ {
161
+ size_t length = 0;
162
+ VALUE rbPiece;
163
+ char* cPiece;
164
+ char* tmp;
165
+
166
+ while (!IS_EOF_AFTER(length) && !IS_BOTH(AFTER(length))) {
167
+ length++;
168
+ }
169
+
170
+ SEEK(length);
171
+
172
+ rbPiece = rb_str_new(BEFORE_PTR(length), length);
173
+ cPiece = StringValueCStr(rbPiece);
174
+
175
+ if (strchr(cPiece, '/')) {
176
+ return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
177
+ }
178
+ else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
179
+ return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
180
+ rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
181
+ }
182
+ else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
183
+ if (cPiece[length - 1] == 'M') {
184
+ return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
185
+ }
186
+ else {
187
+ return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
188
+ }
189
+ }
190
+ else {
191
+ if (cPiece[length - 1] == 'N') {
192
+ rb_str_set_len(rbPiece, length - 1);
193
+ }
194
+
195
+ return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
196
+ }
197
+ }
198
+
199
+ static VALUE string_read_char (VALUE self, char* string, size_t* position)
200
+ {
201
+ SEEK(1);
202
+
203
+ if (IS_EOF_AFTER(1) || IS_BOTH(AFTER(1))) {
204
+ SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
205
+ }
206
+ else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && (IS_EOF_AFTER(7) || IS_BOTH(AFTER(7)))) {
207
+ SEEK(7); return rb_str_new2("\n");
208
+ }
209
+ else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
210
+ SEEK(5); return rb_str_new2(" ");
211
+ }
212
+ else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && (IS_EOF_AFTER(3) || IS_BOTH(AFTER(3)))) {
213
+ SEEK(3); return rb_str_new2("\t");
214
+ }
215
+ else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && (IS_EOF_AFTER(9) || IS_BOTH(AFTER(9)))) {
216
+ SEEK(9); return rb_str_new2("\b");
217
+ }
218
+ else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && (IS_EOF_AFTER(8) || IS_BOTH(AFTER(8)))) {
219
+ SEEK(8); return rb_str_new2("\f");
220
+ }
221
+ else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && (IS_EOF_AFTER(6) || IS_BOTH(AFTER(6)))) {
222
+ SEEK(6); return rb_str_new2("\r");
223
+ }
224
+ else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
225
+ SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
226
+ rb_intern("pack"), 1, rb_str_new2("U"));
227
+ }
228
+ else if (CURRENT == 'o') {
229
+ size_t length = 1;
230
+
231
+ for (size_t i = 1; i < 5; i++) {
232
+ if (IS_EOF_AFTER(i) || IS_BOTH(AFTER(i))) {
233
+ break;
234
+ }
235
+
236
+ length++;
237
+ }
238
+
239
+ if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && (IS_EOF_AFTER(length) || IS_BOTH(AFTER(length)))) {
240
+ SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
241
+ rb_intern("chr"), 0);
242
+ }
243
+ }
244
+
245
+ // TODO: add unicode and octal chars support
246
+
247
+ rb_raise(rb_eSyntaxError, "unknown character type");
248
+ }
249
+
250
+ static VALUE string_read_keyword (VALUE self, char* string, size_t* position)
251
+ {
252
+ size_t length = 0;
253
+
254
+ SEEK(1);
255
+
256
+ while (!IS_EOF_AFTER(length) && !IS_KEYWORD(AFTER(length))) {
257
+ length++;
258
+ }
259
+
260
+ SEEK(length);
261
+
262
+ return rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0);
263
+ }
264
+
265
+ static VALUE string_read_string (VALUE self, char* string, size_t* position)
266
+ {
267
+ size_t length = 0;
268
+
269
+ SEEK(1);
270
+
271
+ while (AFTER(length) != '"') {
272
+ if (IS_EOF_AFTER(length)) {
273
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
274
+ }
275
+
276
+ if (AFTER(length) == '\\') {
277
+ length++;
278
+ }
279
+
280
+ length++;
281
+ }
282
+
283
+ SEEK(length + 1);
284
+
285
+ // TODO: make the escapes work properly
286
+
287
+ return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
288
+ }
289
+
290
+ static VALUE string_read_regexp (VALUE self, char* string, size_t* position)
291
+ {
292
+ size_t length = 0;
293
+ VALUE args[] = { Qnil };
294
+
295
+ SEEK(2);
296
+
297
+ while (AFTER(length) != '"') {
298
+ if (IS_EOF_AFTER(length)) {
299
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
300
+ }
301
+
302
+ if (AFTER(length) == '\\') {
303
+ length++;
304
+ }
305
+
306
+ length++;
307
+ }
308
+
309
+ SEEK(length + 1);
310
+
311
+ args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
312
+
313
+ return rb_class_new_instance(1, args, rb_cRegexp);
314
+ }
315
+
316
+ static VALUE string_read_instant (VALUE self, char* string, size_t* position)
317
+ {
318
+ SEEK(1);
319
+
320
+ if (!IS_NOT_EOF_UP_TO(4)) {
321
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
322
+ }
323
+
324
+ if (!IS_EQUAL_UP_TO("inst", 4)) {
325
+ rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
326
+ }
327
+
328
+ SEEK(4);
329
+
330
+ CALL(string_ignore);
331
+
332
+ return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(string_read_string));
333
+ }
334
+
335
+ static VALUE string_read_list (VALUE self, char* string, size_t* position)
336
+ {
337
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
338
+
339
+ SEEK(1); CALL(string_ignore);
340
+
341
+ while (CURRENT != ')') {
342
+ rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
343
+
344
+ CALL(string_ignore);
345
+ }
346
+
347
+ SEEK(1);
348
+
349
+ return result;
350
+ }
351
+
352
+ static VALUE string_read_vector (VALUE self, char* string, size_t* position)
353
+ {
354
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
355
+
356
+ SEEK(1); CALL(string_ignore);
357
+
358
+ while (CURRENT != ']') {
359
+ rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
360
+
361
+ CALL(string_ignore);
362
+ }
363
+
364
+ SEEK(1);
365
+
366
+ return result;
367
+ }
368
+
369
+ static VALUE string_read_set (VALUE self, char* string, size_t* position)
370
+ {
371
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
372
+
373
+ SEEK(2); CALL(string_ignore);
374
+
375
+ while (CURRENT != '}') {
376
+ rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
377
+
378
+ CALL(string_ignore);
379
+ }
380
+
381
+ SEEK(1);
382
+
383
+ if (!NIL_P(rb_funcall(result, rb_intern("uniq!"), 0))) {
384
+ rb_raise(rb_eSyntaxError, "the set contains non unique values");
385
+ }
386
+
387
+ return result;
388
+ }
389
+
390
+ static VALUE string_read_map (VALUE self, char* string, size_t* position)
391
+ {
392
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
393
+ VALUE key;
394
+ VALUE value;
395
+
396
+ SEEK(1); CALL(string_ignore);
397
+
398
+ while (CURRENT != '}') {
399
+ key = CALL(string_read_next);
400
+ CALL(string_ignore);
401
+ value = CALL(string_read_next);
402
+
403
+ rb_funcall(result, rb_intern("[]="), 2, key, value);
404
+ }
405
+
406
+ SEEK(1);
407
+
408
+ return result;
409
+ }
410
+
411
+ static VALUE string_read_next (VALUE self, char* string, size_t* position)
412
+ {
413
+ CALL(string_ignore);
414
+
415
+ if (IS_EOF) {
416
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
417
+ }
418
+
419
+ switch (CALL(string_next_type)) {
420
+ case NODE_METADATA: return CALL(string_read_metadata);
421
+ case NODE_NUMBER: return CALL(string_read_number);
422
+ case NODE_BOOLEAN: return CALL(string_read_boolean);
423
+ case NODE_NIL: return CALL(string_read_nil);
424
+ case NODE_CHAR: return CALL(string_read_char);
425
+ case NODE_KEYWORD: return CALL(string_read_keyword);
426
+ case NODE_STRING: return CALL(string_read_string);
427
+ case NODE_MAP: return CALL(string_read_map);
428
+ case NODE_LIST: return CALL(string_read_list);
429
+ case NODE_VECTOR: return CALL(string_read_vector);
430
+ case NODE_INSTANT: return CALL(string_read_instant);
431
+ case NODE_SET: return CALL(string_read_set);
432
+ case NODE_REGEXP: return CALL(string_read_regexp);
433
+ }
434
+ }
435
+
436
+ static VALUE string_parse (VALUE self)
437
+ {
438
+ size_t position = 0;
439
+ VALUE source = rb_iv_get(self, "@source");
440
+
441
+ return string_read_next(self, StringValueCStr(source), &position);
442
+ }
443
+
444
+ #undef IS_EOF
445
+ #undef IS_EOF_AFTER
446
+ #undef CURRENT
447
+ #undef CURRENT_PTR
448
+ #undef AFTER
449
+ #undef AFTER_PTR
450
+ #undef BEFORE
451
+ #undef BEFORE_PTR
452
+ #undef SEEK
453
+ #undef IS_IGNORED
454
+ #undef IS_BOTH
455
+ #undef IS_KEYWORD
456
+ #undef IS_NOT_EOF_UP_TO
457
+ #undef IS_EQUAL_UP_TO
458
+ #undef IS_EQUAL
459
+ #undef CALL
460
+ #endif
data/lib/clj.rb CHANGED
@@ -11,10 +11,7 @@
11
11
  require 'date'
12
12
  require 'bigdecimal'
13
13
 
14
- require 'clj/parser'
15
- require 'clj/types'
16
-
17
- class Clojure
14
+ module Clojure
18
15
  def self.parse (*args)
19
16
  Clojure::Parser.new(*args).parse
20
17
  end
@@ -24,4 +21,59 @@ class Clojure
24
21
 
25
22
  what.to_clj(options)
26
23
  end
24
+
25
+ UNESCAPE_REGEX = %r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n
26
+
27
+ # Unescape characters in strings.
28
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
29
+ UNESCAPE_MAP.merge!(
30
+ ?" => '"',
31
+ ?\\ => '\\',
32
+ ?/ => '/',
33
+ ?b => "\b",
34
+ ?f => "\f",
35
+ ?n => "\n",
36
+ ?r => "\r",
37
+ ?t => "\t",
38
+ ?u => nil
39
+ )
40
+
41
+ EMPTY_8BIT_STRING = ''
42
+
43
+ if EMPTY_8BIT_STRING.respond_to? :force_encoding
44
+ EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
45
+ end
46
+
47
+ def self.unescape (string)
48
+ string.gsub(UNESCAPE_REGEX) {|escape|
49
+ if u = UNESCAPE_MAP[$&[1]]
50
+ next u
51
+ end
52
+
53
+ bytes = EMPTY_8BIT_STRING.dup
54
+
55
+ i = 0
56
+ while escape[6 * i] == ?\\ && escape[6 * i + 1] == ?u
57
+ bytes << escape[6 * i + 2, 2].to_i(16) << escape[6 * i + 4, 2].to_i(16)
58
+
59
+ i += 1
60
+ end
61
+
62
+ if bytes.respond_to? :force_encoding
63
+ bytes.force_encoding 'UTF-16be'
64
+ bytes.encode 'UTF-8'
65
+ else
66
+ bytes
67
+ end
68
+ }
69
+
70
+ end
71
+ end
72
+
73
+ require 'clj/types'
74
+
75
+ if RUBY_ENGINE == 'ruby' || RUBY_ENGINE == 'rbx'
76
+ require 'clj/parser_ext'
77
+ else
78
+ require 'clj/parser'
27
79
  end
@@ -10,43 +10,22 @@
10
10
 
11
11
  require 'stringio'
12
12
 
13
- class Clojure
13
+ module Clojure
14
14
 
15
15
  class Parser
16
16
  NUMBERS = '0' .. '9'
17
17
 
18
- STRING_REGEX = %r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n
19
- UNICODE_REGEX = /u([0-9|a-f|A-F]{4})/
20
- OCTAL_REGEX = /o([0-3][0-7]?[0-7]?)/
21
-
22
- # Unescape characters in strings.
23
- UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
24
- UNESCAPE_MAP.merge!(
25
- ?" => '"',
26
- ?\\ => '\\',
27
- ?/ => '/',
28
- ?b => "\b",
29
- ?f => "\f",
30
- ?n => "\n",
31
- ?r => "\r",
32
- ?t => "\t",
33
- ?u => nil
34
- )
35
-
36
- EMPTY_8BIT_STRING = ''
37
-
38
- if EMPTY_8BIT_STRING.respond_to? :force_encoding
39
- EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
40
- end
18
+ UNICODE_REGEX = /[0-9|a-f|A-F]{4}/
19
+ OCTAL_REGEX = /[0-3]?[0-7]?[0-7]/
41
20
 
42
21
  def initialize (source, options = {})
43
22
  @source = source.is_a?(String) ? StringIO.new(source) : source
44
23
  @options = options
45
24
 
46
- @map_class = options[:map_class] || Hash
47
- @vector_class = options[:vector_class] || Array
48
- @list_class = options[:list_class] || Array
49
- @set_class = options[:set_class] || Array
25
+ @map_class = options[:map_class] || Clojure::Map
26
+ @vector_class = options[:vector_class] || Clojure::Vector
27
+ @list_class = options[:list_class] || Clojure::List
28
+ @set_class = options[:set_class] || Clojure::Set
50
29
  end
51
30
 
52
31
  def parse
@@ -56,6 +35,7 @@ class Parser
56
35
  private
57
36
  def next_type (ch)
58
37
  case ch
38
+ when '^' then :metadata
59
39
  when NUMBERS, '-', '+' then :number
60
40
  when 't', 'f' then :boolean
61
41
  when 'n' then :nil
@@ -82,6 +62,26 @@ private
82
62
  __send__ "read_#{next_type ch}", ch
83
63
  end
84
64
 
65
+ def read_metadata (ch)
66
+ metadatas = [read_next]
67
+
68
+ while lookahead(1) == '^'
69
+ raise SyntaxError, 'unexpected EOF' unless @source.read(1)
70
+
71
+ metadatas.push(read_next)
72
+ end
73
+
74
+ value = read_next
75
+
76
+ unless value.respond_to? :metadata=
77
+ raise SyntaxError, 'the object cannot hold metadata'
78
+ end
79
+
80
+ metadatas.each { |m| value.metadata = m }
81
+
82
+ value
83
+ end
84
+
85
85
  def read_nil (ch)
86
86
  check = @source.read(2)
87
87
 
@@ -135,8 +135,6 @@ private
135
135
  number.to_i(base.to_i)
136
136
  elsif piece.include? '.' or piece.include? 'e' or piece.include? 'E' or piece.end_with? 'M'
137
137
  if piece.end_with? 'M'
138
- piece[-1] = ''
139
-
140
138
  BigDecimal(piece)
141
139
  else
142
140
  Float(piece)
@@ -165,13 +163,15 @@ private
165
163
  @source.read(8) and "\f"
166
164
  elsif (ahead = lookahead(7)) && ahead[0, 6] == 'return' && (!ahead[6] || both?(ahead[6]))
167
165
  @source.read(6) and "\r"
168
- elsif (ahead = lookahead(6)) && ahead[0, 5] =~ UNICODE_REGEX && (!ahead[5] || both?(ahead[5]))
166
+ elsif (ahead = lookahead(6)) && ahead[0] == 'u' && ahead[1, 5] =~ UNICODE_REGEX && (!ahead[5] || both?(ahead[5]))
169
167
  [@source.read(5)[1, 4].to_i(16)].pack('U')
170
- elsif (ahead = lookahead(5)) && ahead[0, 4] =~ OCTAL_REGEX && (!ahead[4] || both?(ahead[4]))
171
- @source.read(4)[1, 3].to_i(8).chr
172
- else
173
- raise SyntaxError, 'unknown character type'
174
- end
168
+ elsif (ahead = lookahead(5)) && ahead[0] == 'o' && matches = ahead[1, 3].match(OCTAL_REGEX)
169
+ length = matches[0].length + 1
170
+
171
+ if !ahead[length] || both?(ahead[length])
172
+ @source.read(length)[1, 3].to_i(8).chr
173
+ end
174
+ end or raise SyntaxError, 'unknown character type'
175
175
  end
176
176
 
177
177
  def read_keyword (ch)
@@ -199,31 +199,17 @@ private
199
199
  end
200
200
  end
201
201
 
202
- result.gsub(STRING_REGEX) {|escape|
203
- if u = UNESCAPE_MAP[$&[1]]
204
- next u
205
- end
206
-
207
- bytes = EMPTY_8BIT_STRING.dup
208
-
209
- i = 0
210
- while escape[6 * i] == ?\\ && escape[6 * i + 1] == ?u
211
- bytes << escape[6 * i + 2, 2].to_i(16) << escape[6 * i + 4, 2].to_i(16)
212
-
213
- i += 1
214
- end
215
-
216
- if bytes.respond_to? :force_encoding
217
- bytes.force_encoding 'UTF-16be'
218
- bytes.encode 'UTF-8'
219
- else
220
- bytes
221
- end
222
- }
202
+ Clojure.unescape(result)
223
203
  end
224
204
 
225
205
  def read_instant (ch)
226
- @source.read(3)
206
+ check = @source.read(3)
207
+
208
+ if check.length != 3
209
+ raise SyntaxError, 'unexpected EOF'
210
+ elsif check != 'nst'
211
+ raise SyntaxError, "expected inst, found i#{check}"
212
+ end
227
213
 
228
214
  DateTime.rfc3339(read_string(ignore(false)))
229
215
  end
@@ -311,14 +297,12 @@ private
311
297
  result
312
298
  end
313
299
 
314
- def unescape (string)
315
- string
316
- end
317
-
318
300
  def lookahead (length)
319
301
  result = @source.read(length)
320
302
 
321
- @source.seek(-result.length, IO::SEEK_CUR)
303
+ if result
304
+ @source.seek(-result.length, IO::SEEK_CUR)
305
+ end
322
306
 
323
307
  result
324
308
  end
@@ -334,27 +318,15 @@ private
334
318
  end
335
319
 
336
320
  def ignore? (ch)
337
- if ch == ' ' || ch == ',' || ch == "\n" || ch == "\r" || ch == "\t"
338
- true
339
- else
340
- false
341
- end
321
+ ch == ' ' || ch == ',' || ch == "\n" || ch == "\r" || ch == "\t"
342
322
  end
343
323
 
344
324
  def both? (ch)
345
- if ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "\n" || ch == "\r" || ch == "\t"
346
- true
347
- else
348
- false
349
- end
325
+ ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "\n" || ch == "\r" || ch == "\t"
350
326
  end
351
327
 
352
328
  def keyword? (ch)
353
- if ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "'" || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == "\n" || ch == "\r" || ch == "\t"
354
- true
355
- else
356
- false
357
- end
329
+ ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "'" || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == "\n" || ch == "\r" || ch == "\t"
358
330
  end
359
331
  end
360
332
 
@@ -8,6 +8,73 @@
8
8
  # 0. You just DO WHAT THE FUCK YOU WANT TO.
9
9
  #++
10
10
 
11
+ module Clojure
12
+ module Metadata
13
+ def metadata
14
+ @metadata ||= Clojure::Map.new
15
+ end
16
+
17
+ def metadata= (value)
18
+ metadata.merge! case value
19
+ when Hash then value
20
+ when Symbol then { value => true }
21
+ when String then { :tag => value }
22
+ else raise ArgumentError, 'the passed value is not suitable as metadata'
23
+ end
24
+ end
25
+
26
+ def metadata_to_clj (options = {})
27
+ return '' unless options[:metadata] != false && @metadata && !@metadata.empty?
28
+
29
+ '^' + if @metadata.length == 1
30
+ piece = @metadata.first
31
+
32
+ if piece.first.is_a?(Symbol) && piece.last == true
33
+ piece.first.to_clj(options)
34
+ elsif piece.first == :tag && piece.last.is_a?(String)
35
+ piece.last.to_clj(options)
36
+ else
37
+ @metadata.to_clj(options)
38
+ end
39
+ else
40
+ @metadata.to_clj(options)
41
+ end + ' '
42
+ end
43
+ end
44
+
45
+ class Map < Hash
46
+ include Clojure::Metadata
47
+
48
+ def to_clj (options = {})
49
+ metadata_to_clj(options) + '{' + map { |k, v| k.to_clj(options) + ' ' + v.to_clj(options) }.join(' ') + '}'
50
+ end
51
+ end
52
+
53
+ class Vector < Array
54
+ include Clojure::Metadata
55
+
56
+ def to_clj (options = {})
57
+ metadata_to_clj(options) + '[' + map { |o| o.to_clj(options) }.join(' ') + ']'
58
+ end
59
+ end
60
+
61
+ class List < Array
62
+ include Clojure::Metadata
63
+
64
+ def to_clj (options = {})
65
+ metadata_to_clj(options) + '(' + map { |o| o.to_clj(options) }.join(' ') + ')'
66
+ end
67
+ end
68
+
69
+ class Set < Array
70
+ include Clojure::Metadata
71
+
72
+ def to_clj (options = {})
73
+ metadata_to_clj(options) + '#{' + uniq.map { |o| o.to_clj(options) }.join(' ') + '}'
74
+ end
75
+ end
76
+ end
77
+
11
78
  [Numeric, TrueClass, FalseClass, NilClass].each {|klass|
12
79
  klass.instance_eval {
13
80
  define_method :to_clj do |*|
@@ -57,11 +124,7 @@ end
57
124
 
58
125
  class DateTime
59
126
  def to_clj (options = {})
60
- if options[:alpha]
61
- '#inst "' + rfc3339 + '"'
62
- else
63
- to_time.to_i.to_s
64
- end
127
+ options[:alpha] ? '#inst "' + rfc3339 + '"' : to_time.to_i.to_s
65
128
  end
66
129
  end
67
130
 
@@ -91,12 +154,28 @@ end
91
154
 
92
155
  class Array
93
156
  def to_clj (options = {})
94
- '[' + map { |o| o.to_clj(options) }.join(' ') + ']'
157
+ to_vector.to_clj(options)
158
+ end
159
+
160
+ def to_set
161
+ Clojure::Set.new(self)
162
+ end
163
+
164
+ def to_vector
165
+ Clojure::Vector.new(self)
166
+ end
167
+
168
+ def to_list
169
+ Clojure::List.new(self)
95
170
  end
96
171
  end
97
172
 
98
173
  class Hash
99
174
  def to_clj (options = {})
100
- '{' + map { |k, v| k.to_clj(options) + ' ' + v.to_clj(options) }.join(' ') + '}'
175
+ to_map.to_clj(options)
176
+ end
177
+
178
+ def to_map
179
+ Clojure::Map[self]
101
180
  end
102
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5.6
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-16 00:00:00.000000000 Z
12
+ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &4605840 !ruby/object:Gem::Requirement
16
+ requirement: &12555040 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *4605840
24
+ version_requirements: *12555040
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &4605160 !ruby/object:Gem::Requirement
27
+ requirement: &12553860 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,13 +32,18 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *4605160
35
+ version_requirements: *12553860
36
36
  description:
37
37
  email: meh@paranoici.org
38
38
  executables: []
39
- extensions: []
39
+ extensions:
40
+ - ext/clj/extconf.rb
40
41
  extra_rdoc_files: []
41
42
  files:
43
+ - ext/clj/parser.c
44
+ - ext/clj/io_parser.c
45
+ - ext/clj/string_parser.c
46
+ - ext/clj/extconf.rb
42
47
  - lib/clj.rb
43
48
  - lib/clj/types.rb
44
49
  - lib/clj/parser.rb
@@ -62,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
62
67
  version: '0'
63
68
  requirements: []
64
69
  rubyforge_project:
65
- rubygems_version: 1.8.15
70
+ rubygems_version: 1.8.16
66
71
  signing_key:
67
72
  specification_version: 3
68
73
  summary: Like json, but with clojure sexps.