yajl-ruby 1.3.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yajl-ruby might be problematic. Click here for more details.
- checksums.yaml +5 -5
- data/.github/workflows/ci.yml +26 -0
- data/README.md +1 -1
- data/ext/yajl/api/yajl_common.h +6 -0
- data/ext/yajl/api/yajl_gen.h +7 -2
- data/ext/yajl/api/yajl_parse.h +3 -1
- data/ext/yajl/extconf.rb +6 -1
- data/ext/yajl/yajl.c +17 -1
- data/ext/yajl/yajl_buf.c +83 -2
- data/ext/yajl/yajl_buf.h +9 -0
- data/ext/yajl/yajl_bytestack.h +28 -11
- data/ext/yajl/yajl_encode.c +39 -3
- data/ext/yajl/yajl_ext.c +512 -31
- data/ext/yajl/yajl_ext.h +2 -2
- data/ext/yajl/yajl_gen.c +35 -1
- data/ext/yajl/yajl_lex.c +20 -10
- data/ext/yajl/yajl_lex.h +18 -16
- data/ext/yajl/yajl_parser.c +16 -2
- data/lib/yajl/version.rb +1 -1
- data/lib/yajl.rb +7 -0
- data/spec/encoding/encoding_spec.rb +26 -0
- data/spec/projection/project_file.rb +41 -0
- data/spec/projection/projection.rb +498 -0
- data/yajl-ruby.gemspec +5 -5
- metadata +37 -20
- data/.travis.yml +0 -9
data/ext/yajl/yajl_ext.c
CHANGED
@@ -22,6 +22,12 @@
|
|
22
22
|
*/
|
23
23
|
|
24
24
|
#include "yajl_ext.h"
|
25
|
+
#include "yajl_lex.h"
|
26
|
+
#include "yajl_alloc.h"
|
27
|
+
#include "yajl_buf.h"
|
28
|
+
#include "yajl_encode.h"
|
29
|
+
#include "api/yajl_common.h"
|
30
|
+
#include "assert.h"
|
25
31
|
|
26
32
|
#define YAJL_RB_TO_JSON \
|
27
33
|
VALUE rb_encoder, cls; \
|
@@ -32,6 +38,25 @@
|
|
32
38
|
} \
|
33
39
|
return rb_yajl_encoder_encode(1, &self, rb_encoder); \
|
34
40
|
|
41
|
+
static void *rb_internal_malloc(void *ctx, unsigned int sz) {
|
42
|
+
return xmalloc(sz);
|
43
|
+
}
|
44
|
+
|
45
|
+
static void *rb_internal_realloc(void *ctx, void *previous, unsigned int sz) {
|
46
|
+
return xrealloc(previous, sz);
|
47
|
+
}
|
48
|
+
|
49
|
+
static void rb_internal_free(void *ctx, void *ptr) {
|
50
|
+
xfree(ptr);
|
51
|
+
}
|
52
|
+
|
53
|
+
static yajl_alloc_funcs rb_alloc_funcs = {
|
54
|
+
rb_internal_malloc,
|
55
|
+
rb_internal_realloc,
|
56
|
+
rb_internal_free,
|
57
|
+
NULL
|
58
|
+
};
|
59
|
+
|
35
60
|
/* Helpers for building objects */
|
36
61
|
static void yajl_check_and_fire_callback(void * ctx) {
|
37
62
|
yajl_parser_wrapper * wrapper;
|
@@ -39,12 +64,12 @@ static void yajl_check_and_fire_callback(void * ctx) {
|
|
39
64
|
|
40
65
|
/* No need to do any of this if the callback isn't even setup */
|
41
66
|
if (wrapper->parse_complete_callback != Qnil) {
|
42
|
-
|
67
|
+
long len = RARRAY_LEN(wrapper->builderStack);
|
43
68
|
if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
|
44
69
|
rb_funcall(wrapper->parse_complete_callback, intern_call, 1, rb_ary_pop(wrapper->builderStack));
|
45
70
|
}
|
46
71
|
} else {
|
47
|
-
|
72
|
+
long len = RARRAY_LEN(wrapper->builderStack);
|
48
73
|
if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
|
49
74
|
wrapper->objectsFound++;
|
50
75
|
if (wrapper->objectsFound > 1) {
|
@@ -68,7 +93,11 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob
|
|
68
93
|
rb_raise(cEncodeError, "Invalid number: cannot encode Infinity, -Infinity, or NaN");
|
69
94
|
case yajl_gen_no_buf:
|
70
95
|
rb_raise(cEncodeError, "YAJL internal error: yajl_gen_get_buf was called, but a print callback was specified, so no internal buffer is available");
|
96
|
+
case yajl_gen_alloc_error:
|
97
|
+
rb_raise(cEncodeError, "YAJL internal error: failed to allocate memory");
|
71
98
|
default:
|
99
|
+
// fixme: why wasn't this already here??
|
100
|
+
rb_raise(cEncodeError, "Encountered unknown YAJL status %d during JSON generation", status);
|
72
101
|
return NULL;
|
73
102
|
}
|
74
103
|
}
|
@@ -76,7 +105,7 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob
|
|
76
105
|
static void yajl_set_static_value(void * ctx, VALUE val) {
|
77
106
|
yajl_parser_wrapper * wrapper;
|
78
107
|
VALUE lastEntry, hash;
|
79
|
-
|
108
|
+
long len;
|
80
109
|
|
81
110
|
GetParser((VALUE)ctx, wrapper);
|
82
111
|
|
@@ -130,18 +159,47 @@ static void yajl_encoder_wrapper_mark(void * wrapper) {
|
|
130
159
|
}
|
131
160
|
}
|
132
161
|
|
162
|
+
static VALUE yajl_key_to_string(VALUE obj) {
|
163
|
+
switch (TYPE(obj)) {
|
164
|
+
case T_STRING:
|
165
|
+
return obj;
|
166
|
+
case T_SYMBOL:
|
167
|
+
return rb_sym2str(obj);
|
168
|
+
default:
|
169
|
+
return rb_funcall(obj, intern_to_s, 0);
|
170
|
+
}
|
171
|
+
}
|
172
|
+
|
173
|
+
void yajl_encode_part(void * wrapper, VALUE obj, VALUE io);
|
174
|
+
struct yajl_encode_hash_iter {
|
175
|
+
void *w;
|
176
|
+
VALUE io;
|
177
|
+
};
|
178
|
+
|
179
|
+
static int yajl_encode_part_hash_i(VALUE key, VALUE val, VALUE iter_v) {
|
180
|
+
struct yajl_encode_hash_iter *iter = (struct yajl_encode_hash_iter *)iter_v;
|
181
|
+
/* key must be a string */
|
182
|
+
VALUE keyStr = yajl_key_to_string(key);
|
183
|
+
|
184
|
+
/* the key */
|
185
|
+
yajl_encode_part(iter->w, keyStr, iter->io);
|
186
|
+
/* the value */
|
187
|
+
yajl_encode_part(iter->w, val, iter->io);
|
188
|
+
|
189
|
+
return ST_CONTINUE;
|
190
|
+
}
|
191
|
+
|
133
192
|
#define CHECK_STATUS(call) \
|
134
193
|
if ((status = (call)) != yajl_gen_status_ok) { break; }
|
135
194
|
|
136
195
|
void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
137
|
-
VALUE str, outBuff
|
196
|
+
VALUE str, outBuff;
|
138
197
|
yajl_encoder_wrapper * w = wrapper;
|
139
198
|
yajl_gen_status status;
|
140
199
|
int idx = 0;
|
141
200
|
const unsigned char * buffer;
|
142
201
|
const char * cptr;
|
143
202
|
unsigned int len;
|
144
|
-
VALUE keys, entry, keyStr;
|
145
203
|
|
146
204
|
if (io != Qnil || w->on_progress_callback != Qnil) {
|
147
205
|
status = yajl_gen_get_buf(w->encoder, &buffer, &len);
|
@@ -163,24 +221,19 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
163
221
|
case T_HASH:
|
164
222
|
CHECK_STATUS(yajl_gen_map_open(w->encoder));
|
165
223
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
keyStr = rb_funcall(entry, intern_to_s, 0); /* key must be a string */
|
171
|
-
/* the key */
|
172
|
-
yajl_encode_part(w, keyStr, io);
|
173
|
-
/* the value */
|
174
|
-
yajl_encode_part(w, rb_hash_aref(obj, entry), io);
|
175
|
-
}
|
224
|
+
struct yajl_encode_hash_iter iter;
|
225
|
+
iter.w = w;
|
226
|
+
iter.io = io;
|
227
|
+
rb_hash_foreach(obj, yajl_encode_part_hash_i, (VALUE)&iter);
|
176
228
|
|
177
229
|
CHECK_STATUS(yajl_gen_map_close(w->encoder));
|
178
230
|
break;
|
179
231
|
case T_ARRAY:
|
180
232
|
CHECK_STATUS(yajl_gen_array_open(w->encoder));
|
233
|
+
|
234
|
+
VALUE *ptr = RARRAY_PTR(obj);
|
181
235
|
for(idx=0; idx<RARRAY_LEN(obj); idx++) {
|
182
|
-
|
183
|
-
yajl_encode_part(w, otherObj, io);
|
236
|
+
yajl_encode_part(w, ptr[idx], io);
|
184
237
|
}
|
185
238
|
CHECK_STATUS(yajl_gen_array_close(w->encoder));
|
186
239
|
break;
|
@@ -194,11 +247,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
194
247
|
CHECK_STATUS(yajl_gen_bool(w->encoder, 0));
|
195
248
|
break;
|
196
249
|
case T_FIXNUM:
|
250
|
+
CHECK_STATUS(yajl_gen_long(w->encoder, FIX2LONG(obj)));
|
251
|
+
break;
|
197
252
|
case T_FLOAT:
|
198
253
|
case T_BIGNUM:
|
199
254
|
str = rb_funcall(obj, intern_to_s, 0);
|
200
255
|
cptr = RSTRING_PTR(str);
|
201
|
-
len = RSTRING_LEN(str);
|
256
|
+
len = (unsigned int)RSTRING_LEN(str);
|
202
257
|
if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
|
203
258
|
rb_raise(cEncodeError, "'%s' is an invalid number", cptr);
|
204
259
|
}
|
@@ -206,7 +261,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
206
261
|
break;
|
207
262
|
case T_STRING:
|
208
263
|
cptr = RSTRING_PTR(obj);
|
209
|
-
len = RSTRING_LEN(obj);
|
264
|
+
len = (unsigned int)RSTRING_LEN(obj);
|
265
|
+
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
|
266
|
+
break;
|
267
|
+
case T_SYMBOL:
|
268
|
+
str = rb_sym2str(obj);
|
269
|
+
cptr = RSTRING_PTR(str);
|
270
|
+
len = (unsigned int)RSTRING_LEN(str);
|
210
271
|
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
|
211
272
|
break;
|
212
273
|
default:
|
@@ -214,13 +275,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
214
275
|
str = rb_funcall(obj, intern_to_json, 0);
|
215
276
|
Check_Type(str, T_STRING);
|
216
277
|
cptr = RSTRING_PTR(str);
|
217
|
-
len = RSTRING_LEN(str);
|
278
|
+
len = (unsigned int)RSTRING_LEN(str);
|
218
279
|
CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len));
|
219
280
|
} else {
|
220
281
|
str = rb_funcall(obj, intern_to_s, 0);
|
221
282
|
Check_Type(str, T_STRING);
|
222
283
|
cptr = RSTRING_PTR(str);
|
223
|
-
len = RSTRING_LEN(str);
|
284
|
+
len = (unsigned int)RSTRING_LEN(str);
|
224
285
|
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
|
225
286
|
}
|
226
287
|
break;
|
@@ -253,11 +314,17 @@ void yajl_parse_chunk(const unsigned char * chunk, unsigned int len, yajl_handle
|
|
253
314
|
|
254
315
|
stat = yajl_parse(parser, chunk, len);
|
255
316
|
|
256
|
-
if (stat
|
317
|
+
if (stat == yajl_status_ok || stat == yajl_status_insufficient_data) {
|
318
|
+
// success
|
319
|
+
} else if (stat == yajl_status_error) {
|
257
320
|
unsigned char * str = yajl_get_error(parser, 1, chunk, len);
|
258
321
|
VALUE errobj = rb_exc_new2(cParseError, (const char*) str);
|
259
322
|
yajl_free_error(parser, str);
|
260
323
|
rb_exc_raise(errobj);
|
324
|
+
} else {
|
325
|
+
const char * str = yajl_status_to_string(stat);
|
326
|
+
VALUE errobj = rb_exc_new2(cParseError, (const char*) str);
|
327
|
+
rb_exc_raise(errobj);
|
261
328
|
}
|
262
329
|
}
|
263
330
|
|
@@ -420,7 +487,7 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
|
|
420
487
|
cfg = (yajl_parser_config){allowComments, checkUTF8};
|
421
488
|
|
422
489
|
obj = Data_Make_Struct(klass, yajl_parser_wrapper, yajl_parser_wrapper_mark, yajl_parser_wrapper_free, wrapper);
|
423
|
-
wrapper->parser = yajl_alloc(&callbacks, &cfg,
|
490
|
+
wrapper->parser = yajl_alloc(&callbacks, &cfg, &rb_alloc_funcs, (void *)obj);
|
424
491
|
wrapper->nestedArrayLevel = 0;
|
425
492
|
wrapper->nestedHashLevel = 0;
|
426
493
|
wrapper->objectsFound = 0;
|
@@ -450,13 +517,13 @@ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self) {
|
|
450
517
|
* Document-method: parse
|
451
518
|
*
|
452
519
|
* call-seq:
|
453
|
-
* parse(input, buffer_size=
|
454
|
-
* parse(input, buffer_size=
|
520
|
+
* parse(input, buffer_size=8192)
|
521
|
+
* parse(input, buffer_size=8192) { |obj| ... }
|
455
522
|
*
|
456
523
|
* +input+ can either be a string or an IO to parse JSON from
|
457
524
|
*
|
458
525
|
* +buffer_size+ is the size of chunk that will be parsed off the input (if it's an IO) for each loop of the parsing process.
|
459
|
-
*
|
526
|
+
* 8192 is a good balance between the different types of streams (off disk, off a socket, etc...), but this option
|
460
527
|
* is here so the caller can better tune their parsing depending on the type of stream being passed.
|
461
528
|
* A larger read buffer will perform better for files off disk, where as a smaller size may be more efficient for
|
462
529
|
* reading off of a socket directly.
|
@@ -489,13 +556,13 @@ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self) {
|
|
489
556
|
|
490
557
|
if (TYPE(input) == T_STRING) {
|
491
558
|
cptr = RSTRING_PTR(input);
|
492
|
-
len = RSTRING_LEN(input);
|
559
|
+
len = (unsigned int)RSTRING_LEN(input);
|
493
560
|
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
|
494
561
|
} else if (rb_respond_to(input, intern_io_read)) {
|
495
562
|
VALUE parsed = rb_str_new(0, FIX2LONG(rbufsize));
|
496
563
|
while (rb_funcall(input, intern_io_read, 2, rbufsize, parsed) != Qnil) {
|
497
564
|
cptr = RSTRING_PTR(parsed);
|
498
|
-
len = RSTRING_LEN(parsed);
|
565
|
+
len = (unsigned int)RSTRING_LEN(parsed);
|
499
566
|
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
|
500
567
|
}
|
501
568
|
} else {
|
@@ -535,7 +602,7 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) {
|
|
535
602
|
|
536
603
|
if (wrapper->parse_complete_callback != Qnil) {
|
537
604
|
const char * cptr = RSTRING_PTR(chunk);
|
538
|
-
len = RSTRING_LEN(chunk);
|
605
|
+
len = (unsigned int)RSTRING_LEN(chunk);
|
539
606
|
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
|
540
607
|
} else {
|
541
608
|
rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless.");
|
@@ -560,6 +627,405 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
|
|
560
627
|
return Qnil;
|
561
628
|
}
|
562
629
|
|
630
|
+
/*
|
631
|
+
* An event stream pulls data off the IO source into the buffer,
|
632
|
+
* then runs the lexer over that stream.
|
633
|
+
*/
|
634
|
+
struct yajl_event_stream_s {
|
635
|
+
yajl_alloc_funcs *funcs;
|
636
|
+
|
637
|
+
VALUE stream; // source
|
638
|
+
|
639
|
+
VALUE buffer;
|
640
|
+
unsigned int offset;
|
641
|
+
|
642
|
+
yajl_lexer lexer; // event source
|
643
|
+
};
|
644
|
+
|
645
|
+
typedef struct yajl_event_stream_s *yajl_event_stream_t;
|
646
|
+
|
647
|
+
struct yajl_event_s {
|
648
|
+
yajl_tok token;
|
649
|
+
const char *buf;
|
650
|
+
unsigned int len;
|
651
|
+
};
|
652
|
+
typedef struct yajl_event_s yajl_event_t;
|
653
|
+
|
654
|
+
static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop) {
|
655
|
+
assert(parser->stream);
|
656
|
+
assert(parser->buffer);
|
657
|
+
|
658
|
+
while (1) {
|
659
|
+
if (parser->offset >= RSTRING_LEN(parser->buffer)) {
|
660
|
+
//printf("reading offset %d size %ld\n", parser->offset, RSTRING_LEN(parser->buffer));
|
661
|
+
|
662
|
+
// Refill the buffer
|
663
|
+
rb_funcall(parser->stream, intern_io_read, 2, INT2FIX(RSTRING_LEN(parser->buffer)), parser->buffer);
|
664
|
+
if (RSTRING_LEN(parser->buffer) == 0) {
|
665
|
+
yajl_event_t event = {
|
666
|
+
.token = yajl_tok_eof,
|
667
|
+
};
|
668
|
+
return event;
|
669
|
+
}
|
670
|
+
|
671
|
+
parser->offset = 0;
|
672
|
+
}
|
673
|
+
|
674
|
+
// Try to pull an event off the lexer
|
675
|
+
yajl_event_t event;
|
676
|
+
|
677
|
+
yajl_tok token;
|
678
|
+
if (pop == 0) {
|
679
|
+
//printf("peeking %p %ld %d\n", RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset);
|
680
|
+
token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), parser->offset);
|
681
|
+
//printf("peeked event %d\n", token);
|
682
|
+
|
683
|
+
if (token == yajl_tok_eof) {
|
684
|
+
parser->offset = (unsigned int)RSTRING_LEN(parser->buffer);
|
685
|
+
continue;
|
686
|
+
}
|
687
|
+
|
688
|
+
event.token = token;
|
689
|
+
|
690
|
+
return event;
|
691
|
+
}
|
692
|
+
|
693
|
+
//printf("popping\n");
|
694
|
+
token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len);
|
695
|
+
//printf("popped event %d\n", token);
|
696
|
+
|
697
|
+
if (token == yajl_tok_eof) {
|
698
|
+
continue;
|
699
|
+
}
|
700
|
+
|
701
|
+
event.token = token;
|
702
|
+
|
703
|
+
return event;
|
704
|
+
}
|
705
|
+
|
706
|
+
return (yajl_event_t){};
|
707
|
+
}
|
708
|
+
|
709
|
+
static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event);
|
710
|
+
static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event);
|
711
|
+
static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser);
|
712
|
+
static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser);
|
713
|
+
static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event);
|
714
|
+
static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event);
|
715
|
+
|
716
|
+
static VALUE rb_yajl_projector_filter(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
|
717
|
+
assert(parser->stream);
|
718
|
+
|
719
|
+
switch(event.token) {
|
720
|
+
case yajl_tok_left_brace:
|
721
|
+
return rb_yajl_projector_filter_array_subtree(parser, schema, event);
|
722
|
+
break;
|
723
|
+
case yajl_tok_left_bracket:
|
724
|
+
return rb_yajl_projector_filter_object_subtree(parser, schema, event);
|
725
|
+
break;
|
726
|
+
default:
|
727
|
+
return rb_yajl_projector_build_simple_value(parser, event);
|
728
|
+
}
|
729
|
+
}
|
730
|
+
|
731
|
+
static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
|
732
|
+
assert(event.token == yajl_tok_left_brace);
|
733
|
+
|
734
|
+
VALUE ary = rb_ary_new();
|
735
|
+
|
736
|
+
while (1) {
|
737
|
+
event = yajl_event_stream_next(parser, 1);
|
738
|
+
|
739
|
+
if (event.token == yajl_tok_right_brace) {
|
740
|
+
break;
|
741
|
+
}
|
742
|
+
|
743
|
+
VALUE val = rb_yajl_projector_filter(parser, schema, event);
|
744
|
+
rb_ary_push(ary, val);
|
745
|
+
|
746
|
+
event = yajl_event_stream_next(parser, 0);
|
747
|
+
if (event.token == yajl_tok_comma) {
|
748
|
+
event = yajl_event_stream_next(parser, 1);
|
749
|
+
assert(event.token == yajl_tok_comma);
|
750
|
+
|
751
|
+
event = yajl_event_stream_next(parser, 0);
|
752
|
+
if (!(event.token == yajl_tok_string || event.token == yajl_tok_integer || event.token == yajl_tok_double || event.token == yajl_tok_null || event.token == yajl_tok_bool || event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace)) {
|
753
|
+
rb_raise(cParseError, "read a comma, expected a value to follow, actually read %s", yajl_tok_name(event.token));
|
754
|
+
}
|
755
|
+
} else if (event.token != yajl_tok_right_brace) {
|
756
|
+
rb_raise(cParseError, "didn't read a comma, expected closing array, actually read %s", yajl_tok_name(event.token));
|
757
|
+
}
|
758
|
+
}
|
759
|
+
|
760
|
+
return ary;
|
761
|
+
}
|
762
|
+
|
763
|
+
static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
|
764
|
+
assert(event.token == yajl_tok_left_bracket);
|
765
|
+
|
766
|
+
VALUE hsh = rb_hash_new();
|
767
|
+
|
768
|
+
while (1) {
|
769
|
+
event = yajl_event_stream_next(parser, 1);
|
770
|
+
|
771
|
+
if (event.token == yajl_tok_right_bracket) {
|
772
|
+
break;
|
773
|
+
}
|
774
|
+
|
775
|
+
if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) {
|
776
|
+
rb_raise(cParseError, "Expected string, unexpected stream event %s", yajl_tok_name(event.token));
|
777
|
+
}
|
778
|
+
|
779
|
+
VALUE key = rb_yajl_projector_build_string(parser, event);
|
780
|
+
|
781
|
+
event = yajl_event_stream_next(parser, 1);
|
782
|
+
if (!(event.token == yajl_tok_colon)) {
|
783
|
+
rb_raise(cParseError, "Expected colon, unexpected stream event %s", yajl_tok_name(event.token));
|
784
|
+
}
|
785
|
+
|
786
|
+
// nil schema means reify the subtree from here on
|
787
|
+
// otherwise if the schema has a key for this we want it
|
788
|
+
int interesting = (schema == Qnil || rb_funcall(schema, rb_intern("key?"), 1, key) == Qtrue);
|
789
|
+
if (!interesting) {
|
790
|
+
rb_yajl_projector_ignore_value(parser);
|
791
|
+
goto peek_comma;
|
792
|
+
}
|
793
|
+
|
794
|
+
yajl_event_t value_event = yajl_event_stream_next(parser, 1);
|
795
|
+
|
796
|
+
VALUE key_schema;
|
797
|
+
if (schema == Qnil) {
|
798
|
+
key_schema = Qnil;
|
799
|
+
} else {
|
800
|
+
key_schema = rb_hash_aref(schema, key);
|
801
|
+
}
|
802
|
+
|
803
|
+
VALUE val = rb_yajl_projector_filter(parser, key_schema, value_event);
|
804
|
+
|
805
|
+
rb_str_freeze(key);
|
806
|
+
rb_hash_aset(hsh, key, val);
|
807
|
+
|
808
|
+
peek_comma:
|
809
|
+
|
810
|
+
event = yajl_event_stream_next(parser, 0);
|
811
|
+
if (event.token == yajl_tok_comma) {
|
812
|
+
event = yajl_event_stream_next(parser, 1);
|
813
|
+
assert(event.token == yajl_tok_comma);
|
814
|
+
|
815
|
+
event = yajl_event_stream_next(parser, 0);
|
816
|
+
if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) {
|
817
|
+
rb_raise(cParseError, "read a comma, expected a key to follow, actually read %s", yajl_tok_name(event.token));
|
818
|
+
}
|
819
|
+
} else if (event.token != yajl_tok_right_bracket) {
|
820
|
+
rb_raise(cParseError, "read a value without tailing comma, expected closing bracket, actually read %s", yajl_tok_name(event.token));
|
821
|
+
}
|
822
|
+
}
|
823
|
+
|
824
|
+
return hsh;
|
825
|
+
}
|
826
|
+
|
827
|
+
/*
|
828
|
+
# After reading a key if we know we are not interested in the next value,
|
829
|
+
# read and discard all its stream events.
|
830
|
+
#
|
831
|
+
# Values can be simple (string, numeric, boolean, null) or compound (object
|
832
|
+
# or array).
|
833
|
+
#
|
834
|
+
# Returns nothing.
|
835
|
+
*/
|
836
|
+
static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser) {
|
837
|
+
yajl_event_t value_event = yajl_event_stream_next(parser, 1);
|
838
|
+
|
839
|
+
switch (value_event.token) {
|
840
|
+
case yajl_tok_null:
|
841
|
+
case yajl_tok_bool:
|
842
|
+
case yajl_tok_integer:
|
843
|
+
case yajl_tok_double:
|
844
|
+
case yajl_tok_string:
|
845
|
+
case yajl_tok_string_with_escapes:
|
846
|
+
return;
|
847
|
+
default:
|
848
|
+
break;
|
849
|
+
}
|
850
|
+
|
851
|
+
if (value_event.token == yajl_tok_left_brace || value_event.token == yajl_tok_left_bracket) {
|
852
|
+
rb_yajl_projector_ignore_container(parser);
|
853
|
+
return;
|
854
|
+
}
|
855
|
+
|
856
|
+
rb_raise(cParseError, "unknown value type to ignore %s", yajl_tok_name(value_event.token));
|
857
|
+
}
|
858
|
+
|
859
|
+
/*
|
860
|
+
# Given the start of an array or object, read until the closing event.
|
861
|
+
# Object structures can nest and this is considered.
|
862
|
+
#
|
863
|
+
# Returns nothing.
|
864
|
+
*/
|
865
|
+
static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser) {
|
866
|
+
int depth = 1;
|
867
|
+
|
868
|
+
while (depth > 0) {
|
869
|
+
yajl_event_t event = yajl_event_stream_next(parser, 1);
|
870
|
+
|
871
|
+
if (event.token == yajl_tok_eof) {
|
872
|
+
return;
|
873
|
+
}
|
874
|
+
|
875
|
+
if (event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace) {
|
876
|
+
depth += 1;
|
877
|
+
} else if (event.token == yajl_tok_right_bracket || event.token == yajl_tok_right_brace) {
|
878
|
+
depth -= 1;
|
879
|
+
}
|
880
|
+
}
|
881
|
+
}
|
882
|
+
|
883
|
+
static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event) {
|
884
|
+
assert(parser->stream);
|
885
|
+
|
886
|
+
switch (event.token) {
|
887
|
+
case yajl_tok_null:;
|
888
|
+
return Qnil;
|
889
|
+
case yajl_tok_bool:;
|
890
|
+
if (memcmp(event.buf, "true", 4) == 0) {
|
891
|
+
return Qtrue;
|
892
|
+
} else if (memcmp(event.buf, "false", 5) == 0) {
|
893
|
+
return Qfalse;
|
894
|
+
} else {
|
895
|
+
rb_raise(cStandardError, "unknown boolean token %s", event.buf);
|
896
|
+
}
|
897
|
+
case yajl_tok_integer:;
|
898
|
+
case yajl_tok_double:;
|
899
|
+
char *buf = (char *)malloc(event.len + 1);
|
900
|
+
buf[event.len] = 0;
|
901
|
+
memcpy(buf, event.buf, event.len);
|
902
|
+
|
903
|
+
VALUE val;
|
904
|
+
if (memchr(buf, '.', event.len) ||
|
905
|
+
memchr(buf, 'e', event.len) ||
|
906
|
+
memchr(buf, 'E', event.len)) {
|
907
|
+
val = rb_float_new(strtod(buf, NULL));
|
908
|
+
} else {
|
909
|
+
val = rb_cstr2inum(buf, 10);
|
910
|
+
}
|
911
|
+
free(buf);
|
912
|
+
|
913
|
+
return val;
|
914
|
+
|
915
|
+
case yajl_tok_string:;
|
916
|
+
case yajl_tok_string_with_escapes:;
|
917
|
+
return rb_yajl_projector_build_string(parser, event);
|
918
|
+
|
919
|
+
case yajl_tok_eof:;
|
920
|
+
rb_raise(cParseError, "unexpected eof while constructing value");
|
921
|
+
|
922
|
+
case yajl_tok_comma:
|
923
|
+
rb_raise(cParseError, "unexpected comma while constructing value");
|
924
|
+
|
925
|
+
case yajl_tok_colon:
|
926
|
+
rb_raise(cParseError, "unexpected colon while constructing value");
|
927
|
+
|
928
|
+
default:;
|
929
|
+
rb_bug("we should never get here");
|
930
|
+
}
|
931
|
+
}
|
932
|
+
|
933
|
+
static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event) {
|
934
|
+
switch (event.token) {
|
935
|
+
case yajl_tok_string:; {
|
936
|
+
VALUE str = rb_str_new(event.buf, event.len);
|
937
|
+
rb_enc_associate(str, utf8Encoding);
|
938
|
+
|
939
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
940
|
+
if (default_internal_enc) {
|
941
|
+
str = rb_str_export_to_enc(str, default_internal_enc);
|
942
|
+
}
|
943
|
+
|
944
|
+
return str;
|
945
|
+
}
|
946
|
+
|
947
|
+
case yajl_tok_string_with_escapes:; {
|
948
|
+
//printf("decoding string with escapes\n");
|
949
|
+
|
950
|
+
yajl_buf strBuf = yajl_buf_alloc(parser->funcs);
|
951
|
+
yajl_string_decode(strBuf, (const unsigned char *)event.buf, event.len);
|
952
|
+
if (yajl_buf_err(strBuf)) {
|
953
|
+
rb_raise(cParseError, "YAJL internal error: failed to allocate memory");
|
954
|
+
}
|
955
|
+
|
956
|
+
VALUE str = rb_str_new((const char *)yajl_buf_data(strBuf), yajl_buf_len(strBuf));
|
957
|
+
rb_enc_associate(str, utf8Encoding);
|
958
|
+
|
959
|
+
yajl_buf_free(strBuf);
|
960
|
+
|
961
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
962
|
+
if (default_internal_enc) {
|
963
|
+
str = rb_str_export_to_enc(str, default_internal_enc);
|
964
|
+
}
|
965
|
+
|
966
|
+
return str;
|
967
|
+
}
|
968
|
+
|
969
|
+
default:; {
|
970
|
+
rb_bug("we should never get here");
|
971
|
+
}
|
972
|
+
}
|
973
|
+
}
|
974
|
+
|
975
|
+
static VALUE rb_protected_yajl_projector_filter(VALUE pointer) {
|
976
|
+
VALUE *args = (VALUE *)pointer;
|
977
|
+
return rb_yajl_projector_filter((struct yajl_event_stream_s *)args[0],
|
978
|
+
args[1],
|
979
|
+
*(yajl_event_t *)args[2]);
|
980
|
+
}
|
981
|
+
|
982
|
+
/*
|
983
|
+
* Document-method: project
|
984
|
+
*/
|
985
|
+
static VALUE rb_yajl_projector_project(VALUE self, VALUE schema) {
|
986
|
+
VALUE stream = rb_iv_get(self, "@stream");
|
987
|
+
|
988
|
+
long buffer_size = FIX2LONG(rb_iv_get(self, "@buffer_size"));
|
989
|
+
VALUE buffer = rb_str_new(0, buffer_size);
|
990
|
+
|
991
|
+
struct yajl_event_stream_s parser = {
|
992
|
+
.funcs = &rb_alloc_funcs,
|
993
|
+
|
994
|
+
.stream = stream,
|
995
|
+
|
996
|
+
.buffer = buffer,
|
997
|
+
.offset = (unsigned int)buffer_size,
|
998
|
+
|
999
|
+
.lexer = yajl_lex_alloc(&rb_alloc_funcs, 0, 1),
|
1000
|
+
};
|
1001
|
+
|
1002
|
+
yajl_event_t event = yajl_event_stream_next(&parser, 1);
|
1003
|
+
|
1004
|
+
RB_GC_GUARD(stream);
|
1005
|
+
RB_GC_GUARD(buffer);
|
1006
|
+
|
1007
|
+
VALUE result;
|
1008
|
+
int state = 0;
|
1009
|
+
|
1010
|
+
if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) {
|
1011
|
+
VALUE args[3];
|
1012
|
+
args[0] = (VALUE)&parser;
|
1013
|
+
args[1] = schema;
|
1014
|
+
args[2] = (VALUE)&event;
|
1015
|
+
result = rb_protect(rb_protected_yajl_projector_filter,
|
1016
|
+
(VALUE)args,
|
1017
|
+
&state);
|
1018
|
+
} else {
|
1019
|
+
yajl_lex_free(parser.lexer);
|
1020
|
+
rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token));
|
1021
|
+
}
|
1022
|
+
|
1023
|
+
yajl_lex_free(parser.lexer);
|
1024
|
+
if (state) rb_jump_tag(state);
|
1025
|
+
|
1026
|
+
return result;
|
1027
|
+
}
|
1028
|
+
|
563
1029
|
/*
|
564
1030
|
* Document-class: Yajl::Encoder
|
565
1031
|
*
|
@@ -609,9 +1075,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
|
|
609
1075
|
actualIndent = indentString;
|
610
1076
|
}
|
611
1077
|
}
|
1078
|
+
|
612
1079
|
if (rb_hash_aref(opts, sym_html_safe) == Qtrue) {
|
613
1080
|
htmlSafe = 1;
|
614
1081
|
}
|
1082
|
+
|
1083
|
+
if (rb_hash_aref(opts, sym_entities) == Qtrue) {
|
1084
|
+
htmlSafe = 2;
|
1085
|
+
}
|
615
1086
|
}
|
616
1087
|
if (!indentString) {
|
617
1088
|
indentString = defaultIndentString;
|
@@ -620,7 +1091,7 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
|
|
620
1091
|
|
621
1092
|
obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper);
|
622
1093
|
wrapper->indentString = actualIndent;
|
623
|
-
wrapper->encoder = yajl_gen_alloc(&cfg,
|
1094
|
+
wrapper->encoder = yajl_gen_alloc(&cfg, &rb_alloc_funcs);
|
624
1095
|
wrapper->on_progress_callback = Qnil;
|
625
1096
|
if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) {
|
626
1097
|
wrapper->terminator = rb_hash_aref(opts, sym_terminator);
|
@@ -676,6 +1147,7 @@ static VALUE rb_yajl_encoder_encode(int argc, VALUE * argv, VALUE self) {
|
|
676
1147
|
const unsigned char * buffer;
|
677
1148
|
unsigned int len;
|
678
1149
|
VALUE obj, io, blk, outBuff;
|
1150
|
+
yajl_gen_status status;
|
679
1151
|
|
680
1152
|
GetEncoder(self, wrapper);
|
681
1153
|
|
@@ -689,7 +1161,11 @@ static VALUE rb_yajl_encoder_encode(int argc, VALUE * argv, VALUE self) {
|
|
689
1161
|
yajl_encode_part(wrapper, obj, io);
|
690
1162
|
|
691
1163
|
/* just make sure we output the remaining buffer */
|
692
|
-
yajl_gen_get_buf(wrapper->encoder, &buffer, &len);
|
1164
|
+
status = yajl_gen_get_buf(wrapper->encoder, &buffer, &len);
|
1165
|
+
if (status != yajl_gen_status_ok) {
|
1166
|
+
yajl_raise_encode_error_for_status(status, obj);
|
1167
|
+
}
|
1168
|
+
|
693
1169
|
outBuff = rb_str_new((const char *)buffer, len);
|
694
1170
|
#ifdef HAVE_RUBY_ENCODING_H
|
695
1171
|
rb_enc_associate(outBuff, utf8Encoding);
|
@@ -900,6 +1376,7 @@ void Init_yajl() {
|
|
900
1376
|
|
901
1377
|
cParseError = rb_define_class_under(mYajl, "ParseError", rb_eStandardError);
|
902
1378
|
cEncodeError = rb_define_class_under(mYajl, "EncodeError", rb_eStandardError);
|
1379
|
+
cStandardError = rb_const_get(rb_cObject, rb_intern("StandardError"));
|
903
1380
|
|
904
1381
|
cParser = rb_define_class_under(mYajl, "Parser", rb_cObject);
|
905
1382
|
rb_define_singleton_method(cParser, "new", rb_yajl_parser_new, -1);
|
@@ -909,6 +1386,9 @@ void Init_yajl() {
|
|
909
1386
|
rb_define_method(cParser, "<<", rb_yajl_parser_parse_chunk, 1);
|
910
1387
|
rb_define_method(cParser, "on_parse_complete=", rb_yajl_parser_set_complete_cb, 1);
|
911
1388
|
|
1389
|
+
cProjector = rb_define_class_under(mYajl, "Projector", rb_cObject);
|
1390
|
+
rb_define_method(cProjector, "project", rb_yajl_projector_project, 1);
|
1391
|
+
|
912
1392
|
cEncoder = rb_define_class_under(mYajl, "Encoder", rb_cObject);
|
913
1393
|
rb_define_singleton_method(cEncoder, "new", rb_yajl_encoder_new, -1);
|
914
1394
|
rb_define_method(cEncoder, "initialize", rb_yajl_encoder_init, -1);
|
@@ -931,6 +1411,7 @@ void Init_yajl() {
|
|
931
1411
|
sym_pretty = ID2SYM(rb_intern("pretty"));
|
932
1412
|
sym_indent = ID2SYM(rb_intern("indent"));
|
933
1413
|
sym_html_safe = ID2SYM(rb_intern("html_safe"));
|
1414
|
+
sym_entities = ID2SYM(rb_intern("entities"));
|
934
1415
|
sym_terminator = ID2SYM(rb_intern("terminator"));
|
935
1416
|
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
936
1417
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|