clj 0.0.5.6 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ require 'mkmf'
2
+
3
+ unless $CFLAGS.gsub!(/ -O[\dsz]?/, ' -O3')
4
+ $CFLAGS << ' -O3'
5
+ end
6
+
7
+ if CONFIG['CC'] =~ /gcc/
8
+ $CFLAGS << ' -Wall' << ' -std=c99'
9
+
10
+ if $DEBUG && !$CFLAGS.gsub!(/ -O[\dsz]?/, ' -O0 -ggdb')
11
+ $CFLAGS << ' -O0 -ggdb'
12
+ end
13
+ end
14
+
15
+ create_makefile 'clj/parser_ext'
@@ -0,0 +1,18 @@
1
+ /**
2
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
+ * Version 2, December 2004
4
+ *
5
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
+ * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
+ *
8
+ * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
+ **/
10
+
11
+ #ifdef _INSIDE_PARSER
12
+
13
+ static VALUE io_parse (VALUE self)
14
+ {
15
+ return Qnil;
16
+ }
17
+
18
+ #endif
@@ -0,0 +1,127 @@
1
+ /**
2
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
+ * Version 2, December 2004
4
+ *
5
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
+ * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
+ *
8
+ * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
+ **/
10
+
11
+ #include <stdbool.h>
12
+ #include <ctype.h>
13
+
14
+ #include "ruby.h"
15
+
16
+ static VALUE cClojure;
17
+ static VALUE cParser;
18
+
19
+ static VALUE UNICODE_REGEX;
20
+ static VALUE OCTAL_REGEX;
21
+
22
+ #define _INSIDE_PARSER
23
+ typedef enum {
24
+ NODE_METADATA,
25
+ NODE_NUMBER,
26
+ NODE_BOOLEAN,
27
+ NODE_NIL,
28
+ NODE_CHAR,
29
+ NODE_KEYWORD,
30
+ NODE_STRING,
31
+ NODE_MAP,
32
+ NODE_LIST,
33
+ NODE_VECTOR,
34
+ NODE_INSTANT,
35
+ NODE_SET,
36
+ NODE_REGEXP
37
+ } NodeType;
38
+
39
+ #include "string_parser.c"
40
+ #include "io_parser.c"
41
+ #undef _INSIDE_PARSER
42
+
43
+ static VALUE t_init (int argc, VALUE* argv, VALUE self)
44
+ {
45
+ VALUE tmp;
46
+ VALUE source;
47
+ VALUE options;
48
+
49
+ if (argc < 1) {
50
+ rb_raise(rb_eArgError, "wrong number of arguments (0 for 1)");
51
+ }
52
+ else if (argc > 2) {
53
+ rb_raise(rb_eArgError, "wrong number of arguments (%d for 2)", argc);
54
+ }
55
+
56
+ if (!rb_obj_is_kind_of(argv[0], rb_cString) && !rb_obj_is_kind_of(argv[0], rb_cIO)) {
57
+ rb_raise(rb_eArgError, "you have to pass a String or an IO");
58
+ }
59
+
60
+ source = argv[0];
61
+ options = argc == 2 ? argv[1] : rb_hash_new();
62
+
63
+ rb_iv_set(self, "@source", source);
64
+ rb_iv_set(self, "@options", options);
65
+
66
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("map_class")))) {
67
+ rb_iv_set(self, "@map_class", tmp);
68
+ }
69
+ else {
70
+ rb_iv_set(self, "@map_class", rb_const_get(cClojure, rb_intern("Map")));
71
+ }
72
+
73
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("vector_class")))) {
74
+ rb_iv_set(self, "@vector_class", tmp);
75
+ }
76
+ else {
77
+ rb_iv_set(self, "@vector_class", rb_const_get(cClojure, rb_intern("Vector")));
78
+ }
79
+
80
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("list_class")))) {
81
+ rb_iv_set(self, "@list_class", tmp);
82
+ }
83
+ else {
84
+ rb_iv_set(self, "@list_class", rb_const_get(cClojure, rb_intern("Vector")));
85
+ }
86
+
87
+ if (!NIL_P(tmp = rb_hash_aref(options, rb_intern("set_class")))) {
88
+ rb_iv_set(self, "@set_class", tmp);
89
+ }
90
+ else {
91
+ rb_iv_set(self, "@set_class", rb_const_get(cClojure, rb_intern("Vector")));
92
+ }
93
+
94
+ return self;
95
+ }
96
+
97
+ static VALUE t_parse (VALUE self)
98
+ {
99
+ VALUE source = rb_iv_get(self, "@source");
100
+
101
+ if (rb_obj_is_kind_of(source, rb_cString)) {
102
+ return string_parse(self);
103
+ }
104
+ else if (rb_obj_is_kind_of(source, rb_cIO)) {
105
+ return io_parse(self);
106
+ }
107
+ }
108
+
109
+ void
110
+ Init_parser_ext (void)
111
+ {
112
+ cClojure = rb_const_get(rb_cObject, rb_intern("Clojure"));
113
+ cParser = rb_define_class_under(cClojure, "Parser", rb_cObject);
114
+
115
+ rb_define_method(cParser, "initialize", t_init, -1);
116
+ rb_define_method(cParser, "parse", t_parse, 0);
117
+
118
+ VALUE args[] = { Qnil };
119
+
120
+ args[0] = rb_str_new2("[0-9|a-f|A-F]{4}");
121
+ UNICODE_REGEX = rb_class_new_instance(1, args, rb_cRegexp);
122
+ rb_define_const(cClojure, "UNICODE_REGEX", UNICODE_REGEX);
123
+
124
+ args[0] = rb_str_new2("[0-3]?[0-7]?[0-7]");
125
+ OCTAL_REGEX = rb_class_new_instance(1, args, rb_cRegexp);
126
+ rb_define_const(cClojure, "OCTAL_REGEX", OCTAL_REGEX);
127
+ }
@@ -0,0 +1,460 @@
1
+ /**
2
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
3
+ * Version 2, December 2004
4
+ *
5
+ * DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE
6
+ * TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
7
+ *
8
+ * 0. You just DO WHAT THE FUCK YOU WANT TO.
9
+ **/
10
+
11
+ #ifdef _INSIDE_PARSER
12
+ #define IS_EOF (string[*position] == '\0')
13
+ #define IS_EOF_AFTER(n) (string[*position + (n)] == '\0')
14
+ #define CURRENT (string[*position])
15
+ #define CURRENT_PTR (&string[*position])
16
+ #define AFTER(n) (string[*position + (n)])
17
+ #define AFTER_PTR(n) (&string[*position + (n)])
18
+ #define BEFORE(n) (string[*position - (n)])
19
+ #define BEFORE_PTR(n) (&string[*position - (n)])
20
+ #define SEEK(n) (*position += (n))
21
+ #define IS_IGNORED(ch) (isspace(ch) || ch == ',')
22
+ #define IS_BOTH(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\n' || ch == '\r' || ch == '\t')
23
+ #define IS_KEYWORD(ch) (ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == '\'' || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == '\n' || ch == '\r' || ch == '\t')
24
+ #define IS_NOT_EOF_UP_TO(n) (is_not_eof_up_to(string, position, n))
25
+ #define IS_EQUAL_UP_TO(str, n) (strncmp(CURRENT_PTR, str, (n)) == 0)
26
+ #define IS_EQUAL(str) IS_EQUAL_UP_TO(str, strlen(str))
27
+ #define CALL(what) (what(self, string, position))
28
+
29
+ static VALUE string_read_next (VALUE self, char* string, size_t* position);
30
+
31
+ static inline bool is_not_eof_up_to (char* string, size_t* position, size_t n)
32
+ {
33
+ for (size_t i = 0; i < n; i++) {
34
+ if (IS_EOF_AFTER(i)) {
35
+ return false;
36
+ }
37
+ }
38
+
39
+ return true;
40
+ }
41
+
42
+ static void string_ignore (VALUE self, char* string, size_t* position)
43
+ {
44
+ while (!IS_EOF && IS_IGNORED(CURRENT)) {
45
+ SEEK(1);
46
+ }
47
+ }
48
+
49
+ static NodeType string_next_type (VALUE self, char* string, size_t* position)
50
+ {
51
+ if (isdigit(CURRENT) || CURRENT == '-' || CURRENT == '+') {
52
+ return NODE_NUMBER;
53
+ }
54
+
55
+ switch (CURRENT) {
56
+ case '^': return NODE_METADATA;
57
+ case 't': case 'f': return NODE_BOOLEAN;
58
+ case 'n': return NODE_NIL;
59
+ case '\\': return NODE_CHAR;
60
+ case ':': return NODE_KEYWORD;
61
+ case '"': return NODE_STRING;
62
+ case '{': return NODE_MAP;
63
+ case '(': return NODE_LIST;
64
+ case '[': return NODE_VECTOR;
65
+ }
66
+
67
+ if (CURRENT == '#') {
68
+ if (IS_EOF_AFTER(1)) {
69
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
70
+ }
71
+
72
+ switch (AFTER(1)) {
73
+ case 'i': return NODE_INSTANT;
74
+ case '{': return NODE_SET;
75
+ case '"': return NODE_REGEXP;
76
+ }
77
+ }
78
+
79
+ rb_raise(rb_eSyntaxError, "unknown type");
80
+ }
81
+
82
+ static VALUE string_read_metadata (VALUE self, char* string, size_t* position)
83
+ {
84
+ VALUE result;
85
+ VALUE* metadatas = NULL;
86
+ size_t length = 0;
87
+
88
+ while (CURRENT == '^') {
89
+ metadatas = realloc(metadatas, ++length * sizeof(VALUE));
90
+
91
+ SEEK(1);
92
+
93
+ metadatas[length - 1] = CALL(string_read_next);
94
+ }
95
+
96
+ result = CALL(string_read_next);
97
+
98
+ if (!rb_respond_to(result, rb_intern("metadata="))) {
99
+ free(metadatas);
100
+
101
+ rb_raise(rb_eSyntaxError, "the object cannot hold metadata");
102
+ }
103
+
104
+ // FIXME: this could lead to a memleak if #metadata= raises
105
+ for (size_t i = 0; i < length; i++) {
106
+ rb_funcall(result, rb_intern("metadata="), 1, metadatas[i]);
107
+ }
108
+
109
+ free(metadatas);
110
+
111
+ return result;
112
+ }
113
+
114
+ static VALUE string_read_nil (VALUE self, char* string, size_t* position)
115
+ {
116
+ if (!IS_NOT_EOF_UP_TO(3)) {
117
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
118
+ }
119
+
120
+ if (!IS_EQUAL_UP_TO("nil", 3)) {
121
+ rb_raise(rb_eSyntaxError, "expected nil, got n%c%c", AFTER(1), AFTER(2));
122
+ }
123
+
124
+ SEEK(3);
125
+
126
+ return Qnil;
127
+ }
128
+
129
+ static VALUE string_read_boolean (VALUE self, char* string, size_t* position)
130
+ {
131
+ if (CURRENT == 't') {
132
+ if (!IS_NOT_EOF_UP_TO(4)) {
133
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
134
+ }
135
+
136
+ if (!IS_EQUAL_UP_TO("true", 4)) {
137
+ rb_raise(rb_eSyntaxError, "expected true, got t%c%c%c", AFTER(1), AFTER(2), AFTER(3));
138
+ }
139
+
140
+ SEEK(4);
141
+
142
+ return Qtrue;
143
+ }
144
+ else {
145
+ if (!IS_NOT_EOF_UP_TO(5)) {
146
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
147
+ }
148
+
149
+ if (!IS_EQUAL_UP_TO("false", 5)) {
150
+ rb_raise(rb_eSyntaxError, "expected false, got f%c%c%c%c", AFTER(1), AFTER(2), AFTER(3), AFTER(4));
151
+ }
152
+
153
+ SEEK(5);
154
+
155
+ return Qfalse;
156
+ }
157
+ }
158
+
159
+ static VALUE string_read_number (VALUE self, char* string, size_t* position)
160
+ {
161
+ size_t length = 0;
162
+ VALUE rbPiece;
163
+ char* cPiece;
164
+ char* tmp;
165
+
166
+ while (!IS_EOF_AFTER(length) && !IS_BOTH(AFTER(length))) {
167
+ length++;
168
+ }
169
+
170
+ SEEK(length);
171
+
172
+ rbPiece = rb_str_new(BEFORE_PTR(length), length);
173
+ cPiece = StringValueCStr(rbPiece);
174
+
175
+ if (strchr(cPiece, '/')) {
176
+ return rb_funcall(rb_cObject, rb_intern("Rational"), 1, rbPiece);
177
+ }
178
+ else if ((tmp = strchr(cPiece, 'r')) || (tmp = strchr(cPiece, 'R'))) {
179
+ return rb_funcall(rb_str_new2(tmp + 1), rb_intern("to_i"), 1,
180
+ rb_funcall(rb_str_new(cPiece, tmp - cPiece), rb_intern("to_i"), 0));
181
+ }
182
+ else if (strchr(cPiece, '.') || strchr(cPiece, 'e') || strchr(cPiece, 'E') || cPiece[length - 1] == 'M') {
183
+ if (cPiece[length - 1] == 'M') {
184
+ return rb_funcall(rb_cObject, rb_intern("BigDecimal"), 1, rbPiece);
185
+ }
186
+ else {
187
+ return rb_funcall(rb_cObject, rb_intern("Float"), 1, rbPiece);
188
+ }
189
+ }
190
+ else {
191
+ if (cPiece[length - 1] == 'N') {
192
+ rb_str_set_len(rbPiece, length - 1);
193
+ }
194
+
195
+ return rb_funcall(rb_cObject, rb_intern("Integer"), 1, rbPiece);
196
+ }
197
+ }
198
+
199
+ static VALUE string_read_char (VALUE self, char* string, size_t* position)
200
+ {
201
+ SEEK(1);
202
+
203
+ if (IS_EOF_AFTER(1) || IS_BOTH(AFTER(1))) {
204
+ SEEK(1); return rb_str_new(BEFORE_PTR(1), 1);
205
+ }
206
+ else if (IS_NOT_EOF_UP_TO(7) && IS_EQUAL_UP_TO("newline", 7) && (IS_EOF_AFTER(7) || IS_BOTH(AFTER(7)))) {
207
+ SEEK(7); return rb_str_new2("\n");
208
+ }
209
+ else if (IS_NOT_EOF_UP_TO(5) && IS_EQUAL_UP_TO("space", 5) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
210
+ SEEK(5); return rb_str_new2(" ");
211
+ }
212
+ else if (IS_NOT_EOF_UP_TO(3) && IS_EQUAL_UP_TO("tab", 3) && (IS_EOF_AFTER(3) || IS_BOTH(AFTER(3)))) {
213
+ SEEK(3); return rb_str_new2("\t");
214
+ }
215
+ else if (IS_NOT_EOF_UP_TO(9) && IS_EQUAL_UP_TO("backspace", 9) && (IS_EOF_AFTER(9) || IS_BOTH(AFTER(9)))) {
216
+ SEEK(9); return rb_str_new2("\b");
217
+ }
218
+ else if (IS_NOT_EOF_UP_TO(8) && IS_EQUAL_UP_TO("formfeed", 8) && (IS_EOF_AFTER(8) || IS_BOTH(AFTER(8)))) {
219
+ SEEK(8); return rb_str_new2("\f");
220
+ }
221
+ else if (IS_NOT_EOF_UP_TO(6) && IS_EQUAL_UP_TO("return", 6) && (IS_EOF_AFTER(6) || IS_BOTH(AFTER(6)))) {
222
+ SEEK(6); return rb_str_new2("\r");
223
+ }
224
+ else if (CURRENT == 'u' && IS_NOT_EOF_UP_TO(5) && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), 4), rb_intern("=~"), 1, UNICODE_REGEX)) && (IS_EOF_AFTER(5) || IS_BOTH(AFTER(5)))) {
225
+ SEEK(5); return rb_funcall(rb_ary_new3(1, rb_funcall(rb_str_new(BEFORE_PTR(4), 4), rb_intern("to_i"), 1, INT2FIX(16))),
226
+ rb_intern("pack"), 1, rb_str_new2("U"));
227
+ }
228
+ else if (CURRENT == 'o') {
229
+ size_t length = 1;
230
+
231
+ for (size_t i = 1; i < 5; i++) {
232
+ if (IS_EOF_AFTER(i) || IS_BOTH(AFTER(i))) {
233
+ break;
234
+ }
235
+
236
+ length++;
237
+ }
238
+
239
+ if (length > 1 && !NIL_P(rb_funcall(rb_str_new(AFTER_PTR(1), length - 1), rb_intern("=~"), 1, OCTAL_REGEX)) && (IS_EOF_AFTER(length) || IS_BOTH(AFTER(length)))) {
240
+ SEEK(length); return rb_funcall(rb_funcall(rb_str_new(BEFORE_PTR(length - 1), length - 1), rb_intern("to_i"), 1, INT2FIX(8)),
241
+ rb_intern("chr"), 0);
242
+ }
243
+ }
244
+
245
+ // TODO: add unicode and octal chars support
246
+
247
+ rb_raise(rb_eSyntaxError, "unknown character type");
248
+ }
249
+
250
+ static VALUE string_read_keyword (VALUE self, char* string, size_t* position)
251
+ {
252
+ size_t length = 0;
253
+
254
+ SEEK(1);
255
+
256
+ while (!IS_EOF_AFTER(length) && !IS_KEYWORD(AFTER(length))) {
257
+ length++;
258
+ }
259
+
260
+ SEEK(length);
261
+
262
+ return rb_funcall(rb_str_new(BEFORE_PTR(length), length), rb_intern("to_sym"), 0);
263
+ }
264
+
265
+ static VALUE string_read_string (VALUE self, char* string, size_t* position)
266
+ {
267
+ size_t length = 0;
268
+
269
+ SEEK(1);
270
+
271
+ while (AFTER(length) != '"') {
272
+ if (IS_EOF_AFTER(length)) {
273
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
274
+ }
275
+
276
+ if (AFTER(length) == '\\') {
277
+ length++;
278
+ }
279
+
280
+ length++;
281
+ }
282
+
283
+ SEEK(length + 1);
284
+
285
+ // TODO: make the escapes work properly
286
+
287
+ return rb_funcall(cClojure, rb_intern("unescape"), 1, rb_str_new(BEFORE_PTR(length + 1), length));
288
+ }
289
+
290
+ static VALUE string_read_regexp (VALUE self, char* string, size_t* position)
291
+ {
292
+ size_t length = 0;
293
+ VALUE args[] = { Qnil };
294
+
295
+ SEEK(2);
296
+
297
+ while (AFTER(length) != '"') {
298
+ if (IS_EOF_AFTER(length)) {
299
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
300
+ }
301
+
302
+ if (AFTER(length) == '\\') {
303
+ length++;
304
+ }
305
+
306
+ length++;
307
+ }
308
+
309
+ SEEK(length + 1);
310
+
311
+ args[0] = rb_str_new(BEFORE_PTR(length + 1), length);
312
+
313
+ return rb_class_new_instance(1, args, rb_cRegexp);
314
+ }
315
+
316
+ static VALUE string_read_instant (VALUE self, char* string, size_t* position)
317
+ {
318
+ SEEK(1);
319
+
320
+ if (!IS_NOT_EOF_UP_TO(4)) {
321
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
322
+ }
323
+
324
+ if (!IS_EQUAL_UP_TO("inst", 4)) {
325
+ rb_raise(rb_eSyntaxError, "expected inst, got %c%c%c%c", AFTER(0), AFTER(1), AFTER(2), AFTER(3));
326
+ }
327
+
328
+ SEEK(4);
329
+
330
+ CALL(string_ignore);
331
+
332
+ return rb_funcall(rb_const_get(rb_cObject, rb_intern("DateTime")), rb_intern("rfc3339"), 1, CALL(string_read_string));
333
+ }
334
+
335
+ static VALUE string_read_list (VALUE self, char* string, size_t* position)
336
+ {
337
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@list_class"));
338
+
339
+ SEEK(1); CALL(string_ignore);
340
+
341
+ while (CURRENT != ')') {
342
+ rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
343
+
344
+ CALL(string_ignore);
345
+ }
346
+
347
+ SEEK(1);
348
+
349
+ return result;
350
+ }
351
+
352
+ static VALUE string_read_vector (VALUE self, char* string, size_t* position)
353
+ {
354
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@vector_class"));
355
+
356
+ SEEK(1); CALL(string_ignore);
357
+
358
+ while (CURRENT != ']') {
359
+ rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
360
+
361
+ CALL(string_ignore);
362
+ }
363
+
364
+ SEEK(1);
365
+
366
+ return result;
367
+ }
368
+
369
+ static VALUE string_read_set (VALUE self, char* string, size_t* position)
370
+ {
371
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@set_class"));
372
+
373
+ SEEK(2); CALL(string_ignore);
374
+
375
+ while (CURRENT != '}') {
376
+ rb_funcall(result, rb_intern("<<"), 1, CALL(string_read_next));
377
+
378
+ CALL(string_ignore);
379
+ }
380
+
381
+ SEEK(1);
382
+
383
+ if (!NIL_P(rb_funcall(result, rb_intern("uniq!"), 0))) {
384
+ rb_raise(rb_eSyntaxError, "the set contains non unique values");
385
+ }
386
+
387
+ return result;
388
+ }
389
+
390
+ static VALUE string_read_map (VALUE self, char* string, size_t* position)
391
+ {
392
+ VALUE result = rb_class_new_instance(0, NULL, rb_iv_get(self, "@map_class"));
393
+ VALUE key;
394
+ VALUE value;
395
+
396
+ SEEK(1); CALL(string_ignore);
397
+
398
+ while (CURRENT != '}') {
399
+ key = CALL(string_read_next);
400
+ CALL(string_ignore);
401
+ value = CALL(string_read_next);
402
+
403
+ rb_funcall(result, rb_intern("[]="), 2, key, value);
404
+ }
405
+
406
+ SEEK(1);
407
+
408
+ return result;
409
+ }
410
+
411
+ static VALUE string_read_next (VALUE self, char* string, size_t* position)
412
+ {
413
+ CALL(string_ignore);
414
+
415
+ if (IS_EOF) {
416
+ rb_raise(rb_eSyntaxError, "unexpected EOF");
417
+ }
418
+
419
+ switch (CALL(string_next_type)) {
420
+ case NODE_METADATA: return CALL(string_read_metadata);
421
+ case NODE_NUMBER: return CALL(string_read_number);
422
+ case NODE_BOOLEAN: return CALL(string_read_boolean);
423
+ case NODE_NIL: return CALL(string_read_nil);
424
+ case NODE_CHAR: return CALL(string_read_char);
425
+ case NODE_KEYWORD: return CALL(string_read_keyword);
426
+ case NODE_STRING: return CALL(string_read_string);
427
+ case NODE_MAP: return CALL(string_read_map);
428
+ case NODE_LIST: return CALL(string_read_list);
429
+ case NODE_VECTOR: return CALL(string_read_vector);
430
+ case NODE_INSTANT: return CALL(string_read_instant);
431
+ case NODE_SET: return CALL(string_read_set);
432
+ case NODE_REGEXP: return CALL(string_read_regexp);
433
+ }
434
+ }
435
+
436
+ static VALUE string_parse (VALUE self)
437
+ {
438
+ size_t position = 0;
439
+ VALUE source = rb_iv_get(self, "@source");
440
+
441
+ return string_read_next(self, StringValueCStr(source), &position);
442
+ }
443
+
444
+ #undef IS_EOF
445
+ #undef IS_EOF_AFTER
446
+ #undef CURRENT
447
+ #undef CURRENT_PTR
448
+ #undef AFTER
449
+ #undef AFTER_PTR
450
+ #undef BEFORE
451
+ #undef BEFORE_PTR
452
+ #undef SEEK
453
+ #undef IS_IGNORED
454
+ #undef IS_BOTH
455
+ #undef IS_KEYWORD
456
+ #undef IS_NOT_EOF_UP_TO
457
+ #undef IS_EQUAL_UP_TO
458
+ #undef IS_EQUAL
459
+ #undef CALL
460
+ #endif
data/lib/clj.rb CHANGED
@@ -11,10 +11,7 @@
11
11
  require 'date'
12
12
  require 'bigdecimal'
13
13
 
14
- require 'clj/parser'
15
- require 'clj/types'
16
-
17
- class Clojure
14
+ module Clojure
18
15
  def self.parse (*args)
19
16
  Clojure::Parser.new(*args).parse
20
17
  end
@@ -24,4 +21,59 @@ class Clojure
24
21
 
25
22
  what.to_clj(options)
26
23
  end
24
+
25
+ UNESCAPE_REGEX = %r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n
26
+
27
+ # Unescape characters in strings.
28
+ UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
29
+ UNESCAPE_MAP.merge!(
30
+ ?" => '"',
31
+ ?\\ => '\\',
32
+ ?/ => '/',
33
+ ?b => "\b",
34
+ ?f => "\f",
35
+ ?n => "\n",
36
+ ?r => "\r",
37
+ ?t => "\t",
38
+ ?u => nil
39
+ )
40
+
41
+ EMPTY_8BIT_STRING = ''
42
+
43
+ if EMPTY_8BIT_STRING.respond_to? :force_encoding
44
+ EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
45
+ end
46
+
47
+ def self.unescape (string)
48
+ string.gsub(UNESCAPE_REGEX) {|escape|
49
+ if u = UNESCAPE_MAP[$&[1]]
50
+ next u
51
+ end
52
+
53
+ bytes = EMPTY_8BIT_STRING.dup
54
+
55
+ i = 0
56
+ while escape[6 * i] == ?\\ && escape[6 * i + 1] == ?u
57
+ bytes << escape[6 * i + 2, 2].to_i(16) << escape[6 * i + 4, 2].to_i(16)
58
+
59
+ i += 1
60
+ end
61
+
62
+ if bytes.respond_to? :force_encoding
63
+ bytes.force_encoding 'UTF-16be'
64
+ bytes.encode 'UTF-8'
65
+ else
66
+ bytes
67
+ end
68
+ }
69
+
70
+ end
71
+ end
72
+
73
+ require 'clj/types'
74
+
75
+ if RUBY_ENGINE == 'ruby' || RUBY_ENGINE == 'rbx'
76
+ require 'clj/parser_ext'
77
+ else
78
+ require 'clj/parser'
27
79
  end
@@ -10,43 +10,22 @@
10
10
 
11
11
  require 'stringio'
12
12
 
13
- class Clojure
13
+ module Clojure
14
14
 
15
15
  class Parser
16
16
  NUMBERS = '0' .. '9'
17
17
 
18
- STRING_REGEX = %r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n
19
- UNICODE_REGEX = /u([0-9|a-f|A-F]{4})/
20
- OCTAL_REGEX = /o([0-3][0-7]?[0-7]?)/
21
-
22
- # Unescape characters in strings.
23
- UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr }
24
- UNESCAPE_MAP.merge!(
25
- ?" => '"',
26
- ?\\ => '\\',
27
- ?/ => '/',
28
- ?b => "\b",
29
- ?f => "\f",
30
- ?n => "\n",
31
- ?r => "\r",
32
- ?t => "\t",
33
- ?u => nil
34
- )
35
-
36
- EMPTY_8BIT_STRING = ''
37
-
38
- if EMPTY_8BIT_STRING.respond_to? :force_encoding
39
- EMPTY_8BIT_STRING.force_encoding Encoding::ASCII_8BIT
40
- end
18
+ UNICODE_REGEX = /[0-9|a-f|A-F]{4}/
19
+ OCTAL_REGEX = /[0-3]?[0-7]?[0-7]/
41
20
 
42
21
  def initialize (source, options = {})
43
22
  @source = source.is_a?(String) ? StringIO.new(source) : source
44
23
  @options = options
45
24
 
46
- @map_class = options[:map_class] || Hash
47
- @vector_class = options[:vector_class] || Array
48
- @list_class = options[:list_class] || Array
49
- @set_class = options[:set_class] || Array
25
+ @map_class = options[:map_class] || Clojure::Map
26
+ @vector_class = options[:vector_class] || Clojure::Vector
27
+ @list_class = options[:list_class] || Clojure::List
28
+ @set_class = options[:set_class] || Clojure::Set
50
29
  end
51
30
 
52
31
  def parse
@@ -56,6 +35,7 @@ class Parser
56
35
  private
57
36
  def next_type (ch)
58
37
  case ch
38
+ when '^' then :metadata
59
39
  when NUMBERS, '-', '+' then :number
60
40
  when 't', 'f' then :boolean
61
41
  when 'n' then :nil
@@ -82,6 +62,26 @@ private
82
62
  __send__ "read_#{next_type ch}", ch
83
63
  end
84
64
 
65
+ def read_metadata (ch)
66
+ metadatas = [read_next]
67
+
68
+ while lookahead(1) == '^'
69
+ raise SyntaxError, 'unexpected EOF' unless @source.read(1)
70
+
71
+ metadatas.push(read_next)
72
+ end
73
+
74
+ value = read_next
75
+
76
+ unless value.respond_to? :metadata=
77
+ raise SyntaxError, 'the object cannot hold metadata'
78
+ end
79
+
80
+ metadatas.each { |m| value.metadata = m }
81
+
82
+ value
83
+ end
84
+
85
85
  def read_nil (ch)
86
86
  check = @source.read(2)
87
87
 
@@ -135,8 +135,6 @@ private
135
135
  number.to_i(base.to_i)
136
136
  elsif piece.include? '.' or piece.include? 'e' or piece.include? 'E' or piece.end_with? 'M'
137
137
  if piece.end_with? 'M'
138
- piece[-1] = ''
139
-
140
138
  BigDecimal(piece)
141
139
  else
142
140
  Float(piece)
@@ -165,13 +163,15 @@ private
165
163
  @source.read(8) and "\f"
166
164
  elsif (ahead = lookahead(7)) && ahead[0, 6] == 'return' && (!ahead[6] || both?(ahead[6]))
167
165
  @source.read(6) and "\r"
168
- elsif (ahead = lookahead(6)) && ahead[0, 5] =~ UNICODE_REGEX && (!ahead[5] || both?(ahead[5]))
166
+ elsif (ahead = lookahead(6)) && ahead[0] == 'u' && ahead[1, 5] =~ UNICODE_REGEX && (!ahead[5] || both?(ahead[5]))
169
167
  [@source.read(5)[1, 4].to_i(16)].pack('U')
170
- elsif (ahead = lookahead(5)) && ahead[0, 4] =~ OCTAL_REGEX && (!ahead[4] || both?(ahead[4]))
171
- @source.read(4)[1, 3].to_i(8).chr
172
- else
173
- raise SyntaxError, 'unknown character type'
174
- end
168
+ elsif (ahead = lookahead(5)) && ahead[0] == 'o' && matches = ahead[1, 3].match(OCTAL_REGEX)
169
+ length = matches[0].length + 1
170
+
171
+ if !ahead[length] || both?(ahead[length])
172
+ @source.read(length)[1, 3].to_i(8).chr
173
+ end
174
+ end or raise SyntaxError, 'unknown character type'
175
175
  end
176
176
 
177
177
  def read_keyword (ch)
@@ -199,31 +199,17 @@ private
199
199
  end
200
200
  end
201
201
 
202
- result.gsub(STRING_REGEX) {|escape|
203
- if u = UNESCAPE_MAP[$&[1]]
204
- next u
205
- end
206
-
207
- bytes = EMPTY_8BIT_STRING.dup
208
-
209
- i = 0
210
- while escape[6 * i] == ?\\ && escape[6 * i + 1] == ?u
211
- bytes << escape[6 * i + 2, 2].to_i(16) << escape[6 * i + 4, 2].to_i(16)
212
-
213
- i += 1
214
- end
215
-
216
- if bytes.respond_to? :force_encoding
217
- bytes.force_encoding 'UTF-16be'
218
- bytes.encode 'UTF-8'
219
- else
220
- bytes
221
- end
222
- }
202
+ Clojure.unescape(result)
223
203
  end
224
204
 
225
205
  def read_instant (ch)
226
- @source.read(3)
206
+ check = @source.read(3)
207
+
208
+ if check.length != 3
209
+ raise SyntaxError, 'unexpected EOF'
210
+ elsif check != 'nst'
211
+ raise SyntaxError, "expected inst, found i#{check}"
212
+ end
227
213
 
228
214
  DateTime.rfc3339(read_string(ignore(false)))
229
215
  end
@@ -311,14 +297,12 @@ private
311
297
  result
312
298
  end
313
299
 
314
- def unescape (string)
315
- string
316
- end
317
-
318
300
  def lookahead (length)
319
301
  result = @source.read(length)
320
302
 
321
- @source.seek(-result.length, IO::SEEK_CUR)
303
+ if result
304
+ @source.seek(-result.length, IO::SEEK_CUR)
305
+ end
322
306
 
323
307
  result
324
308
  end
@@ -334,27 +318,15 @@ private
334
318
  end
335
319
 
336
320
  def ignore? (ch)
337
- if ch == ' ' || ch == ',' || ch == "\n" || ch == "\r" || ch == "\t"
338
- true
339
- else
340
- false
341
- end
321
+ ch == ' ' || ch == ',' || ch == "\n" || ch == "\r" || ch == "\t"
342
322
  end
343
323
 
344
324
  def both? (ch)
345
- if ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "\n" || ch == "\r" || ch == "\t"
346
- true
347
- else
348
- false
349
- end
325
+ ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "\n" || ch == "\r" || ch == "\t"
350
326
  end
351
327
 
352
328
  def keyword? (ch)
353
- if ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "'" || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == "\n" || ch == "\r" || ch == "\t"
354
- true
355
- else
356
- false
357
- end
329
+ ch == ' ' || ch == ',' || ch == '"' || ch == '{' || ch == '}' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '#' || ch == ':' || ch == "'" || ch == '^' || ch == '@' || ch == '`' || ch == '~' || ch == '\\' || ch == ';' || ch == "\n" || ch == "\r" || ch == "\t"
358
330
  end
359
331
  end
360
332
 
@@ -8,6 +8,73 @@
8
8
  # 0. You just DO WHAT THE FUCK YOU WANT TO.
9
9
  #++
10
10
 
11
+ module Clojure
12
+ module Metadata
13
+ def metadata
14
+ @metadata ||= Clojure::Map.new
15
+ end
16
+
17
+ def metadata= (value)
18
+ metadata.merge! case value
19
+ when Hash then value
20
+ when Symbol then { value => true }
21
+ when String then { :tag => value }
22
+ else raise ArgumentError, 'the passed value is not suitable as metadata'
23
+ end
24
+ end
25
+
26
+ def metadata_to_clj (options = {})
27
+ return '' unless options[:metadata] != false && @metadata && !@metadata.empty?
28
+
29
+ '^' + if @metadata.length == 1
30
+ piece = @metadata.first
31
+
32
+ if piece.first.is_a?(Symbol) && piece.last == true
33
+ piece.first.to_clj(options)
34
+ elsif piece.first == :tag && piece.last.is_a?(String)
35
+ piece.last.to_clj(options)
36
+ else
37
+ @metadata.to_clj(options)
38
+ end
39
+ else
40
+ @metadata.to_clj(options)
41
+ end + ' '
42
+ end
43
+ end
44
+
45
+ class Map < Hash
46
+ include Clojure::Metadata
47
+
48
+ def to_clj (options = {})
49
+ metadata_to_clj(options) + '{' + map { |k, v| k.to_clj(options) + ' ' + v.to_clj(options) }.join(' ') + '}'
50
+ end
51
+ end
52
+
53
+ class Vector < Array
54
+ include Clojure::Metadata
55
+
56
+ def to_clj (options = {})
57
+ metadata_to_clj(options) + '[' + map { |o| o.to_clj(options) }.join(' ') + ']'
58
+ end
59
+ end
60
+
61
+ class List < Array
62
+ include Clojure::Metadata
63
+
64
+ def to_clj (options = {})
65
+ metadata_to_clj(options) + '(' + map { |o| o.to_clj(options) }.join(' ') + ')'
66
+ end
67
+ end
68
+
69
+ class Set < Array
70
+ include Clojure::Metadata
71
+
72
+ def to_clj (options = {})
73
+ metadata_to_clj(options) + '#{' + uniq.map { |o| o.to_clj(options) }.join(' ') + '}'
74
+ end
75
+ end
76
+ end
77
+
11
78
  [Numeric, TrueClass, FalseClass, NilClass].each {|klass|
12
79
  klass.instance_eval {
13
80
  define_method :to_clj do |*|
@@ -57,11 +124,7 @@ end
57
124
 
58
125
  class DateTime
59
126
  def to_clj (options = {})
60
- if options[:alpha]
61
- '#inst "' + rfc3339 + '"'
62
- else
63
- to_time.to_i.to_s
64
- end
127
+ options[:alpha] ? '#inst "' + rfc3339 + '"' : to_time.to_i.to_s
65
128
  end
66
129
  end
67
130
 
@@ -91,12 +154,28 @@ end
91
154
 
92
155
  class Array
93
156
  def to_clj (options = {})
94
- '[' + map { |o| o.to_clj(options) }.join(' ') + ']'
157
+ to_vector.to_clj(options)
158
+ end
159
+
160
+ def to_set
161
+ Clojure::Set.new(self)
162
+ end
163
+
164
+ def to_vector
165
+ Clojure::Vector.new(self)
166
+ end
167
+
168
+ def to_list
169
+ Clojure::List.new(self)
95
170
  end
96
171
  end
97
172
 
98
173
  class Hash
99
174
  def to_clj (options = {})
100
- '{' + map { |k, v| k.to_clj(options) + ' ' + v.to_clj(options) }.join(' ') + '}'
175
+ to_map.to_clj(options)
176
+ end
177
+
178
+ def to_map
179
+ Clojure::Map[self]
101
180
  end
102
181
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clj
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5.6
4
+ version: 0.0.6
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-16 00:00:00.000000000 Z
12
+ date: 2012-03-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &4605840 !ruby/object:Gem::Requirement
16
+ requirement: &12555040 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,10 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *4605840
24
+ version_requirements: *12555040
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: rspec
27
- requirement: &4605160 !ruby/object:Gem::Requirement
27
+ requirement: &12553860 !ruby/object:Gem::Requirement
28
28
  none: false
29
29
  requirements:
30
30
  - - ! '>='
@@ -32,13 +32,18 @@ dependencies:
32
32
  version: '0'
33
33
  type: :development
34
34
  prerelease: false
35
- version_requirements: *4605160
35
+ version_requirements: *12553860
36
36
  description:
37
37
  email: meh@paranoici.org
38
38
  executables: []
39
- extensions: []
39
+ extensions:
40
+ - ext/clj/extconf.rb
40
41
  extra_rdoc_files: []
41
42
  files:
43
+ - ext/clj/parser.c
44
+ - ext/clj/io_parser.c
45
+ - ext/clj/string_parser.c
46
+ - ext/clj/extconf.rb
42
47
  - lib/clj.rb
43
48
  - lib/clj/types.rb
44
49
  - lib/clj/parser.rb
@@ -62,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
62
67
  version: '0'
63
68
  requirements: []
64
69
  rubyforge_project:
65
- rubygems_version: 1.8.15
70
+ rubygems_version: 1.8.16
66
71
  signing_key:
67
72
  specification_version: 3
68
73
  summary: Like json, but with clojure sexps.