yajl-ruby 1.3.0 → 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of yajl-ruby might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/.travis.yml +3 -8
- data/ext/yajl/extconf.rb +1 -1
- data/ext/yajl/yajl_encode.c +41 -5
- data/ext/yajl/yajl_ext.c +443 -12
- data/ext/yajl/yajl_ext.h +2 -2
- data/ext/yajl/yajl_lex.c +9 -9
- data/ext/yajl/yajl_lex.h +16 -15
- data/lib/yajl.rb +7 -0
- data/lib/yajl/bzip2.rb +2 -2
- data/lib/yajl/http_stream.rb +3 -2
- data/lib/yajl/version.rb +1 -1
- data/spec/encoding/encoding_spec.rb +16 -0
- data/spec/parsing/one_off_spec.rb +7 -0
- data/spec/projection/project_file.rb +41 -0
- data/spec/projection/projection.rb +498 -0
- data/yajl-ruby.gemspec +4 -4
- metadata +31 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c99601133192fb2ea1f0586ac1b4a83dc465d9a8
|
4
|
+
data.tar.gz: cbfce4619e6c2ccd5b761bf8d6005dbee6ce043e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 78c816a3a6368d3b0520d27b5987da10a40478357424e305aa121a6359141ef51c8ee9a3caa4bc3dde159156d5cb6f9af6d5cfd3c576c543b5fd9178ad2993e6
|
7
|
+
data.tar.gz: fc968516c955984c03c184d11a349c82c109adcce8c590a7537bd291a315bb2a8ac0be92ae927b9458e10dee07b564ebf15806bc591b89b0d09df89b5d6ce173
|
data/.travis.yml
CHANGED
data/ext/yajl/extconf.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require 'mkmf'
|
2
2
|
require 'rbconfig'
|
3
3
|
|
4
|
-
$CFLAGS << ' -Wall -funroll-loops'
|
4
|
+
$CFLAGS << ' -Wall -funroll-loops -Wno-declaration-after-statement'
|
5
5
|
$CFLAGS << ' -Werror-implicit-function-declaration -Wextra -O0 -ggdb3' if ENV['DEBUG']
|
6
6
|
|
7
7
|
create_makefile('yajl/yajl')
|
data/ext/yajl/yajl_encode.c
CHANGED
@@ -59,12 +59,18 @@ yajl_string_encode2(const yajl_print_t print,
|
|
59
59
|
unsigned int htmlSafe)
|
60
60
|
{
|
61
61
|
unsigned int beg = 0;
|
62
|
-
unsigned int end = 0;
|
62
|
+
unsigned int end = 0;
|
63
|
+
unsigned int increment = 0;
|
63
64
|
char hexBuf[7];
|
65
|
+
char entityBuffer[7];
|
64
66
|
hexBuf[0] = '\\'; hexBuf[1] = 'u'; hexBuf[2] = '0'; hexBuf[3] = '0';
|
65
67
|
hexBuf[6] = 0;
|
66
68
|
|
69
|
+
entityBuffer[0] = '\\'; entityBuffer[1] = 'u'; entityBuffer[2] = '2'; entityBuffer[3] = '0';
|
70
|
+
entityBuffer[6] = 0;
|
71
|
+
|
67
72
|
while (end < len) {
|
73
|
+
increment = 1;
|
68
74
|
const char * escaped = NULL;
|
69
75
|
switch (str[end]) {
|
70
76
|
case '\r': escaped = "\\r"; break;
|
@@ -76,10 +82,39 @@ yajl_string_encode2(const yajl_print_t print,
|
|
76
82
|
case '\b': escaped = "\\b"; break;
|
77
83
|
case '\t': escaped = "\\t"; break;
|
78
84
|
case '/':
|
79
|
-
if (htmlSafe) {
|
85
|
+
if (htmlSafe == 1 || htmlSafe == 2) {
|
80
86
|
escaped = "\\/";
|
81
87
|
}
|
82
88
|
break;
|
89
|
+
/* Escaping 0xe280a8 0xe280a9 */
|
90
|
+
case 0xe2:
|
91
|
+
if (htmlSafe == 2) {
|
92
|
+
if (len - end >= 2 && str[end + 1] == 0x80) {
|
93
|
+
if (str[end + 2] == 0xa8) {
|
94
|
+
increment = 3;
|
95
|
+
entityBuffer[4] = '2';
|
96
|
+
entityBuffer[5] = '8';
|
97
|
+
escaped = entityBuffer;
|
98
|
+
break;
|
99
|
+
}
|
100
|
+
|
101
|
+
if (str[end + 2] == 0xa9) {
|
102
|
+
increment = 3;
|
103
|
+
entityBuffer[4] = '2';
|
104
|
+
entityBuffer[5] = '9';
|
105
|
+
escaped = entityBuffer;
|
106
|
+
break;
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
case '<':
|
111
|
+
case '>':
|
112
|
+
case '&':
|
113
|
+
if (htmlSafe == 2) {
|
114
|
+
CharToHex(str[end], hexBuf + 4);
|
115
|
+
escaped = hexBuf;
|
116
|
+
}
|
117
|
+
break;
|
83
118
|
default:
|
84
119
|
if ((unsigned char) str[end] < 32) {
|
85
120
|
CharToHex(str[end], hexBuf + 4);
|
@@ -90,7 +125,8 @@ yajl_string_encode2(const yajl_print_t print,
|
|
90
125
|
if (escaped != NULL) {
|
91
126
|
print(ctx, (const char *) (str + beg), end - beg);
|
92
127
|
print(ctx, escaped, (unsigned int)strlen(escaped));
|
93
|
-
|
128
|
+
end += increment;
|
129
|
+
beg = end;
|
94
130
|
} else {
|
95
131
|
++end;
|
96
132
|
}
|
@@ -162,8 +198,8 @@ void yajl_string_decode(yajl_buf buf, const unsigned char * str,
|
|
162
198
|
end+=3;
|
163
199
|
/* check if this is a surrogate */
|
164
200
|
if ((codepoint & 0xFC00) == 0xD800) {
|
165
|
-
end
|
166
|
-
|
201
|
+
if (end + 2 < len && str[end + 1] == '\\' && str[end + 2] == 'u') {
|
202
|
+
end++;
|
167
203
|
unsigned int surrogate = 0;
|
168
204
|
hexToDigit(&surrogate, str + end + 2);
|
169
205
|
codepoint =
|
data/ext/yajl/yajl_ext.c
CHANGED
@@ -22,6 +22,12 @@
|
|
22
22
|
*/
|
23
23
|
|
24
24
|
#include "yajl_ext.h"
|
25
|
+
#include "yajl_lex.h"
|
26
|
+
#include "yajl_alloc.h"
|
27
|
+
#include "yajl_buf.h"
|
28
|
+
#include "yajl_encode.h"
|
29
|
+
#include "api/yajl_common.h"
|
30
|
+
#include "assert.h"
|
25
31
|
|
26
32
|
#define YAJL_RB_TO_JSON \
|
27
33
|
VALUE rb_encoder, cls; \
|
@@ -32,6 +38,25 @@
|
|
32
38
|
} \
|
33
39
|
return rb_yajl_encoder_encode(1, &self, rb_encoder); \
|
34
40
|
|
41
|
+
static void *rb_internal_malloc(void *ctx, unsigned int sz) {
|
42
|
+
return xmalloc(sz);
|
43
|
+
}
|
44
|
+
|
45
|
+
static void *rb_internal_realloc(void *ctx, void *previous, unsigned int sz) {
|
46
|
+
return xrealloc(previous, sz);
|
47
|
+
}
|
48
|
+
|
49
|
+
static void rb_internal_free(void *ctx, void *ptr) {
|
50
|
+
xfree(ptr);
|
51
|
+
}
|
52
|
+
|
53
|
+
static yajl_alloc_funcs rb_alloc_funcs = {
|
54
|
+
rb_internal_malloc,
|
55
|
+
rb_internal_realloc,
|
56
|
+
rb_internal_free,
|
57
|
+
NULL
|
58
|
+
};
|
59
|
+
|
35
60
|
/* Helpers for building objects */
|
36
61
|
static void yajl_check_and_fire_callback(void * ctx) {
|
37
62
|
yajl_parser_wrapper * wrapper;
|
@@ -39,12 +64,12 @@ static void yajl_check_and_fire_callback(void * ctx) {
|
|
39
64
|
|
40
65
|
/* No need to do any of this if the callback isn't even setup */
|
41
66
|
if (wrapper->parse_complete_callback != Qnil) {
|
42
|
-
|
67
|
+
long len = RARRAY_LEN(wrapper->builderStack);
|
43
68
|
if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
|
44
69
|
rb_funcall(wrapper->parse_complete_callback, intern_call, 1, rb_ary_pop(wrapper->builderStack));
|
45
70
|
}
|
46
71
|
} else {
|
47
|
-
|
72
|
+
long len = RARRAY_LEN(wrapper->builderStack);
|
48
73
|
if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
|
49
74
|
wrapper->objectsFound++;
|
50
75
|
if (wrapper->objectsFound > 1) {
|
@@ -76,7 +101,7 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob
|
|
76
101
|
static void yajl_set_static_value(void * ctx, VALUE val) {
|
77
102
|
yajl_parser_wrapper * wrapper;
|
78
103
|
VALUE lastEntry, hash;
|
79
|
-
|
104
|
+
long len;
|
80
105
|
|
81
106
|
GetParser((VALUE)ctx, wrapper);
|
82
107
|
|
@@ -198,7 +223,7 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
198
223
|
case T_BIGNUM:
|
199
224
|
str = rb_funcall(obj, intern_to_s, 0);
|
200
225
|
cptr = RSTRING_PTR(str);
|
201
|
-
len = RSTRING_LEN(str);
|
226
|
+
len = (unsigned int)RSTRING_LEN(str);
|
202
227
|
if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
|
203
228
|
rb_raise(cEncodeError, "'%s' is an invalid number", cptr);
|
204
229
|
}
|
@@ -206,7 +231,7 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
206
231
|
break;
|
207
232
|
case T_STRING:
|
208
233
|
cptr = RSTRING_PTR(obj);
|
209
|
-
len = RSTRING_LEN(obj);
|
234
|
+
len = (unsigned int)RSTRING_LEN(obj);
|
210
235
|
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
|
211
236
|
break;
|
212
237
|
default:
|
@@ -214,13 +239,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
|
|
214
239
|
str = rb_funcall(obj, intern_to_json, 0);
|
215
240
|
Check_Type(str, T_STRING);
|
216
241
|
cptr = RSTRING_PTR(str);
|
217
|
-
len = RSTRING_LEN(str);
|
242
|
+
len = (unsigned int)RSTRING_LEN(str);
|
218
243
|
CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len));
|
219
244
|
} else {
|
220
245
|
str = rb_funcall(obj, intern_to_s, 0);
|
221
246
|
Check_Type(str, T_STRING);
|
222
247
|
cptr = RSTRING_PTR(str);
|
223
|
-
len = RSTRING_LEN(str);
|
248
|
+
len = (unsigned int)RSTRING_LEN(str);
|
224
249
|
CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
|
225
250
|
}
|
226
251
|
break;
|
@@ -420,7 +445,7 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
|
|
420
445
|
cfg = (yajl_parser_config){allowComments, checkUTF8};
|
421
446
|
|
422
447
|
obj = Data_Make_Struct(klass, yajl_parser_wrapper, yajl_parser_wrapper_mark, yajl_parser_wrapper_free, wrapper);
|
423
|
-
wrapper->parser = yajl_alloc(&callbacks, &cfg,
|
448
|
+
wrapper->parser = yajl_alloc(&callbacks, &cfg, &rb_alloc_funcs, (void *)obj);
|
424
449
|
wrapper->nestedArrayLevel = 0;
|
425
450
|
wrapper->nestedHashLevel = 0;
|
426
451
|
wrapper->objectsFound = 0;
|
@@ -489,13 +514,13 @@ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self) {
|
|
489
514
|
|
490
515
|
if (TYPE(input) == T_STRING) {
|
491
516
|
cptr = RSTRING_PTR(input);
|
492
|
-
len = RSTRING_LEN(input);
|
517
|
+
len = (unsigned int)RSTRING_LEN(input);
|
493
518
|
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
|
494
519
|
} else if (rb_respond_to(input, intern_io_read)) {
|
495
520
|
VALUE parsed = rb_str_new(0, FIX2LONG(rbufsize));
|
496
521
|
while (rb_funcall(input, intern_io_read, 2, rbufsize, parsed) != Qnil) {
|
497
522
|
cptr = RSTRING_PTR(parsed);
|
498
|
-
len = RSTRING_LEN(parsed);
|
523
|
+
len = (unsigned int)RSTRING_LEN(parsed);
|
499
524
|
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
|
500
525
|
}
|
501
526
|
} else {
|
@@ -535,7 +560,7 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) {
|
|
535
560
|
|
536
561
|
if (wrapper->parse_complete_callback != Qnil) {
|
537
562
|
const char * cptr = RSTRING_PTR(chunk);
|
538
|
-
len = RSTRING_LEN(chunk);
|
563
|
+
len = (unsigned int)RSTRING_LEN(chunk);
|
539
564
|
yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
|
540
565
|
} else {
|
541
566
|
rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless.");
|
@@ -560,6 +585,402 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
|
|
560
585
|
return Qnil;
|
561
586
|
}
|
562
587
|
|
588
|
+
/*
|
589
|
+
* An event stream pulls data off the IO source into the buffer,
|
590
|
+
* then runs the lexer over that stream.
|
591
|
+
*/
|
592
|
+
struct yajl_event_stream_s {
|
593
|
+
yajl_alloc_funcs *funcs;
|
594
|
+
|
595
|
+
VALUE stream; // source
|
596
|
+
|
597
|
+
VALUE buffer;
|
598
|
+
unsigned int offset;
|
599
|
+
|
600
|
+
yajl_lexer lexer; // event source
|
601
|
+
};
|
602
|
+
|
603
|
+
typedef struct yajl_event_stream_s *yajl_event_stream_t;
|
604
|
+
|
605
|
+
struct yajl_event_s {
|
606
|
+
yajl_tok token;
|
607
|
+
const char *buf;
|
608
|
+
unsigned int len;
|
609
|
+
};
|
610
|
+
typedef struct yajl_event_s yajl_event_t;
|
611
|
+
|
612
|
+
static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop) {
|
613
|
+
assert(parser->stream);
|
614
|
+
assert(parser->buffer);
|
615
|
+
|
616
|
+
while (1) {
|
617
|
+
if (parser->offset >= RSTRING_LEN(parser->buffer)) {
|
618
|
+
//printf("reading offset %d size %ld\n", parser->offset, RSTRING_LEN(parser->buffer));
|
619
|
+
|
620
|
+
// Refill the buffer
|
621
|
+
rb_funcall(parser->stream, intern_io_read, 2, INT2FIX(RSTRING_LEN(parser->buffer)), parser->buffer);
|
622
|
+
if (RSTRING_LEN(parser->buffer) == 0) {
|
623
|
+
yajl_event_t event = {
|
624
|
+
.token = yajl_tok_eof,
|
625
|
+
};
|
626
|
+
return event;
|
627
|
+
}
|
628
|
+
|
629
|
+
parser->offset = 0;
|
630
|
+
}
|
631
|
+
|
632
|
+
// Try to pull an event off the lexer
|
633
|
+
yajl_event_t event;
|
634
|
+
|
635
|
+
yajl_tok token;
|
636
|
+
if (pop == 0) {
|
637
|
+
//printf("peeking %p %ld %d\n", RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset);
|
638
|
+
token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), parser->offset);
|
639
|
+
//printf("peeked event %d\n", token);
|
640
|
+
|
641
|
+
if (token == yajl_tok_eof) {
|
642
|
+
parser->offset = (unsigned int)RSTRING_LEN(parser->buffer);
|
643
|
+
continue;
|
644
|
+
}
|
645
|
+
|
646
|
+
event.token = token;
|
647
|
+
|
648
|
+
return event;
|
649
|
+
}
|
650
|
+
|
651
|
+
//printf("popping\n");
|
652
|
+
token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len);
|
653
|
+
//printf("popped event %d\n", token);
|
654
|
+
|
655
|
+
if (token == yajl_tok_eof) {
|
656
|
+
continue;
|
657
|
+
}
|
658
|
+
|
659
|
+
event.token = token;
|
660
|
+
|
661
|
+
return event;
|
662
|
+
}
|
663
|
+
|
664
|
+
return (yajl_event_t){};
|
665
|
+
}
|
666
|
+
|
667
|
+
static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event);
|
668
|
+
static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event);
|
669
|
+
static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser);
|
670
|
+
static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser);
|
671
|
+
static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event);
|
672
|
+
static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event);
|
673
|
+
|
674
|
+
static VALUE rb_yajl_projector_filter(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
|
675
|
+
assert(parser->stream);
|
676
|
+
|
677
|
+
switch(event.token) {
|
678
|
+
case yajl_tok_left_brace:
|
679
|
+
return rb_yajl_projector_filter_array_subtree(parser, schema, event);
|
680
|
+
break;
|
681
|
+
case yajl_tok_left_bracket:
|
682
|
+
return rb_yajl_projector_filter_object_subtree(parser, schema, event);
|
683
|
+
break;
|
684
|
+
default:
|
685
|
+
return rb_yajl_projector_build_simple_value(parser, event);
|
686
|
+
}
|
687
|
+
}
|
688
|
+
|
689
|
+
static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
|
690
|
+
assert(event.token == yajl_tok_left_brace);
|
691
|
+
|
692
|
+
VALUE ary = rb_ary_new();
|
693
|
+
|
694
|
+
while (1) {
|
695
|
+
event = yajl_event_stream_next(parser, 1);
|
696
|
+
|
697
|
+
if (event.token == yajl_tok_right_brace) {
|
698
|
+
break;
|
699
|
+
}
|
700
|
+
|
701
|
+
VALUE val = rb_yajl_projector_filter(parser, schema, event);
|
702
|
+
rb_ary_push(ary, val);
|
703
|
+
|
704
|
+
event = yajl_event_stream_next(parser, 0);
|
705
|
+
if (event.token == yajl_tok_comma) {
|
706
|
+
event = yajl_event_stream_next(parser, 1);
|
707
|
+
assert(event.token == yajl_tok_comma);
|
708
|
+
|
709
|
+
event = yajl_event_stream_next(parser, 0);
|
710
|
+
if (!(event.token == yajl_tok_string || event.token == yajl_tok_integer || event.token == yajl_tok_double || event.token == yajl_tok_null || event.token == yajl_tok_bool || event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace)) {
|
711
|
+
rb_raise(cParseError, "read a comma, expected a value to follow, actually read %s", yajl_tok_name(event.token));
|
712
|
+
}
|
713
|
+
} else if (event.token != yajl_tok_right_brace) {
|
714
|
+
rb_raise(cParseError, "didn't read a comma, expected closing array, actually read %s", yajl_tok_name(event.token));
|
715
|
+
}
|
716
|
+
}
|
717
|
+
|
718
|
+
return ary;
|
719
|
+
}
|
720
|
+
|
721
|
+
static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
|
722
|
+
assert(event.token == yajl_tok_left_bracket);
|
723
|
+
|
724
|
+
VALUE hsh = rb_hash_new();
|
725
|
+
|
726
|
+
while (1) {
|
727
|
+
event = yajl_event_stream_next(parser, 1);
|
728
|
+
|
729
|
+
if (event.token == yajl_tok_right_bracket) {
|
730
|
+
break;
|
731
|
+
}
|
732
|
+
|
733
|
+
if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) {
|
734
|
+
rb_raise(cParseError, "Expected string, unexpected stream event %s", yajl_tok_name(event.token));
|
735
|
+
}
|
736
|
+
|
737
|
+
VALUE key = rb_yajl_projector_build_string(parser, event);
|
738
|
+
|
739
|
+
event = yajl_event_stream_next(parser, 1);
|
740
|
+
if (!(event.token == yajl_tok_colon)) {
|
741
|
+
rb_raise(cParseError, "Expected colon, unexpected stream event %s", yajl_tok_name(event.token));
|
742
|
+
}
|
743
|
+
|
744
|
+
// nil schema means reify the subtree from here on
|
745
|
+
// otherwise if the schema has a key for this we want it
|
746
|
+
int interesting = (schema == Qnil || rb_funcall(schema, rb_intern("key?"), 1, key) == Qtrue);
|
747
|
+
if (!interesting) {
|
748
|
+
rb_yajl_projector_ignore_value(parser);
|
749
|
+
goto peek_comma;
|
750
|
+
}
|
751
|
+
|
752
|
+
yajl_event_t value_event = yajl_event_stream_next(parser, 1);
|
753
|
+
|
754
|
+
VALUE key_schema;
|
755
|
+
if (schema == Qnil) {
|
756
|
+
key_schema = Qnil;
|
757
|
+
} else {
|
758
|
+
key_schema = rb_hash_aref(schema, key);
|
759
|
+
}
|
760
|
+
|
761
|
+
VALUE val = rb_yajl_projector_filter(parser, key_schema, value_event);
|
762
|
+
|
763
|
+
rb_str_freeze(key);
|
764
|
+
rb_hash_aset(hsh, key, val);
|
765
|
+
|
766
|
+
peek_comma:
|
767
|
+
|
768
|
+
event = yajl_event_stream_next(parser, 0);
|
769
|
+
if (event.token == yajl_tok_comma) {
|
770
|
+
event = yajl_event_stream_next(parser, 1);
|
771
|
+
assert(event.token == yajl_tok_comma);
|
772
|
+
|
773
|
+
event = yajl_event_stream_next(parser, 0);
|
774
|
+
if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) {
|
775
|
+
rb_raise(cParseError, "read a comma, expected a key to follow, actually read %s", yajl_tok_name(event.token));
|
776
|
+
}
|
777
|
+
} else if (event.token != yajl_tok_right_bracket) {
|
778
|
+
rb_raise(cParseError, "read a value without tailing comma, expected closing bracket, actually read %s", yajl_tok_name(event.token));
|
779
|
+
}
|
780
|
+
}
|
781
|
+
|
782
|
+
return hsh;
|
783
|
+
}
|
784
|
+
|
785
|
+
/*
|
786
|
+
# After reading a key if we know we are not interested in the next value,
|
787
|
+
# read and discard all its stream events.
|
788
|
+
#
|
789
|
+
# Values can be simple (string, numeric, boolean, null) or compound (object
|
790
|
+
# or array).
|
791
|
+
#
|
792
|
+
# Returns nothing.
|
793
|
+
*/
|
794
|
+
static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser) {
|
795
|
+
yajl_event_t value_event = yajl_event_stream_next(parser, 1);
|
796
|
+
|
797
|
+
switch (value_event.token) {
|
798
|
+
case yajl_tok_null:
|
799
|
+
case yajl_tok_bool:
|
800
|
+
case yajl_tok_integer:
|
801
|
+
case yajl_tok_double:
|
802
|
+
case yajl_tok_string:
|
803
|
+
case yajl_tok_string_with_escapes:
|
804
|
+
return;
|
805
|
+
default:
|
806
|
+
break;
|
807
|
+
}
|
808
|
+
|
809
|
+
if (value_event.token == yajl_tok_left_brace || value_event.token == yajl_tok_left_bracket) {
|
810
|
+
rb_yajl_projector_ignore_container(parser);
|
811
|
+
return;
|
812
|
+
}
|
813
|
+
|
814
|
+
rb_raise(cParseError, "unknown value type to ignore %s", yajl_tok_name(value_event.token));
|
815
|
+
}
|
816
|
+
|
817
|
+
/*
|
818
|
+
# Given the start of an array or object, read until the closing event.
|
819
|
+
# Object structures can nest and this is considered.
|
820
|
+
#
|
821
|
+
# Returns nothing.
|
822
|
+
*/
|
823
|
+
static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser) {
|
824
|
+
int depth = 1;
|
825
|
+
|
826
|
+
while (depth > 0) {
|
827
|
+
yajl_event_t event = yajl_event_stream_next(parser, 1);
|
828
|
+
|
829
|
+
if (event.token == yajl_tok_eof) {
|
830
|
+
return;
|
831
|
+
}
|
832
|
+
|
833
|
+
if (event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace) {
|
834
|
+
depth += 1;
|
835
|
+
} else if (event.token == yajl_tok_right_bracket || event.token == yajl_tok_right_brace) {
|
836
|
+
depth -= 1;
|
837
|
+
}
|
838
|
+
}
|
839
|
+
}
|
840
|
+
|
841
|
+
static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event) {
|
842
|
+
assert(parser->stream);
|
843
|
+
|
844
|
+
switch (event.token) {
|
845
|
+
case yajl_tok_null:;
|
846
|
+
return Qnil;
|
847
|
+
case yajl_tok_bool:;
|
848
|
+
if (memcmp(event.buf, "true", 4) == 0) {
|
849
|
+
return Qtrue;
|
850
|
+
} else if (memcmp(event.buf, "false", 4) == 0) {
|
851
|
+
return Qfalse;
|
852
|
+
} else {
|
853
|
+
rb_raise(cStandardError, "unknown boolean token %s", event.buf);
|
854
|
+
}
|
855
|
+
case yajl_tok_integer:;
|
856
|
+
case yajl_tok_double:;
|
857
|
+
char *buf = (char *)malloc(event.len + 1);
|
858
|
+
buf[event.len] = 0;
|
859
|
+
memcpy(buf, event.buf, event.len);
|
860
|
+
|
861
|
+
VALUE val;
|
862
|
+
if (memchr(buf, '.', event.len) ||
|
863
|
+
memchr(buf, 'e', event.len) ||
|
864
|
+
memchr(buf, 'E', event.len)) {
|
865
|
+
val = rb_float_new(strtod(buf, NULL));
|
866
|
+
} else {
|
867
|
+
val = rb_cstr2inum(buf, 10);
|
868
|
+
}
|
869
|
+
free(buf);
|
870
|
+
|
871
|
+
return val;
|
872
|
+
|
873
|
+
case yajl_tok_string:;
|
874
|
+
case yajl_tok_string_with_escapes:;
|
875
|
+
return rb_yajl_projector_build_string(parser, event);
|
876
|
+
|
877
|
+
case yajl_tok_eof:;
|
878
|
+
rb_raise(cParseError, "unexpected eof while constructing value");
|
879
|
+
|
880
|
+
case yajl_tok_comma:
|
881
|
+
rb_raise(cParseError, "unexpected comma while constructing value");
|
882
|
+
|
883
|
+
case yajl_tok_colon:
|
884
|
+
rb_raise(cParseError, "unexpected colon while constructing value");
|
885
|
+
|
886
|
+
default:;
|
887
|
+
assert(0);
|
888
|
+
}
|
889
|
+
}
|
890
|
+
|
891
|
+
static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event) {
|
892
|
+
switch (event.token) {
|
893
|
+
case yajl_tok_string:; {
|
894
|
+
VALUE str = rb_str_new(event.buf, event.len);
|
895
|
+
rb_enc_associate(str, utf8Encoding);
|
896
|
+
|
897
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
898
|
+
if (default_internal_enc) {
|
899
|
+
str = rb_str_export_to_enc(str, default_internal_enc);
|
900
|
+
}
|
901
|
+
|
902
|
+
return str;
|
903
|
+
}
|
904
|
+
|
905
|
+
case yajl_tok_string_with_escapes:; {
|
906
|
+
//printf("decoding string with escapes\n");
|
907
|
+
|
908
|
+
yajl_buf strBuf = yajl_buf_alloc(parser->funcs);
|
909
|
+
yajl_string_decode(strBuf, (const unsigned char *)event.buf, event.len);
|
910
|
+
|
911
|
+
VALUE str = rb_str_new((const char *)yajl_buf_data(strBuf), yajl_buf_len(strBuf));
|
912
|
+
rb_enc_associate(str, utf8Encoding);
|
913
|
+
|
914
|
+
yajl_buf_free(strBuf);
|
915
|
+
|
916
|
+
rb_encoding *default_internal_enc = rb_default_internal_encoding();
|
917
|
+
if (default_internal_enc) {
|
918
|
+
str = rb_str_export_to_enc(str, default_internal_enc);
|
919
|
+
}
|
920
|
+
|
921
|
+
return str;
|
922
|
+
}
|
923
|
+
|
924
|
+
default:; {
|
925
|
+
assert(0);
|
926
|
+
}
|
927
|
+
}
|
928
|
+
}
|
929
|
+
|
930
|
+
static VALUE rb_protected_yajl_projector_filter(VALUE pointer) {
|
931
|
+
VALUE *args = (VALUE *)pointer;
|
932
|
+
return rb_yajl_projector_filter((struct yajl_event_stream_s *)args[0],
|
933
|
+
args[1],
|
934
|
+
*(yajl_event_t *)args[2]);
|
935
|
+
}
|
936
|
+
|
937
|
+
/*
|
938
|
+
* Document-method: project
|
939
|
+
*/
|
940
|
+
static VALUE rb_yajl_projector_project(VALUE self, VALUE schema) {
|
941
|
+
VALUE stream = rb_iv_get(self, "@stream");
|
942
|
+
|
943
|
+
long buffer_size = FIX2LONG(rb_iv_get(self, "@buffer_size"));
|
944
|
+
VALUE buffer = rb_str_new(0, buffer_size);
|
945
|
+
|
946
|
+
struct yajl_event_stream_s parser = {
|
947
|
+
.funcs = &rb_alloc_funcs,
|
948
|
+
|
949
|
+
.stream = stream,
|
950
|
+
|
951
|
+
.buffer = buffer,
|
952
|
+
.offset = (unsigned int)buffer_size,
|
953
|
+
|
954
|
+
.lexer = yajl_lex_alloc(&rb_alloc_funcs, 0, 1),
|
955
|
+
};
|
956
|
+
|
957
|
+
yajl_event_t event = yajl_event_stream_next(&parser, 1);
|
958
|
+
|
959
|
+
RB_GC_GUARD(stream);
|
960
|
+
RB_GC_GUARD(buffer);
|
961
|
+
|
962
|
+
VALUE result;
|
963
|
+
int state = 0;
|
964
|
+
|
965
|
+
if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) {
|
966
|
+
VALUE args[3];
|
967
|
+
args[0] = (VALUE)&parser;
|
968
|
+
args[1] = schema;
|
969
|
+
args[2] = (VALUE)&event;
|
970
|
+
result = rb_protect(rb_protected_yajl_projector_filter,
|
971
|
+
(VALUE)args,
|
972
|
+
&state);
|
973
|
+
} else {
|
974
|
+
yajl_lex_free(parser.lexer);
|
975
|
+
rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token));
|
976
|
+
}
|
977
|
+
|
978
|
+
yajl_lex_free(parser.lexer);
|
979
|
+
if (state) rb_jump_tag(state);
|
980
|
+
|
981
|
+
return result;
|
982
|
+
}
|
983
|
+
|
563
984
|
/*
|
564
985
|
* Document-class: Yajl::Encoder
|
565
986
|
*
|
@@ -609,9 +1030,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
|
|
609
1030
|
actualIndent = indentString;
|
610
1031
|
}
|
611
1032
|
}
|
1033
|
+
|
612
1034
|
if (rb_hash_aref(opts, sym_html_safe) == Qtrue) {
|
613
1035
|
htmlSafe = 1;
|
614
1036
|
}
|
1037
|
+
|
1038
|
+
if (rb_hash_aref(opts, sym_entities) == Qtrue) {
|
1039
|
+
htmlSafe = 2;
|
1040
|
+
}
|
615
1041
|
}
|
616
1042
|
if (!indentString) {
|
617
1043
|
indentString = defaultIndentString;
|
@@ -620,7 +1046,7 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
|
|
620
1046
|
|
621
1047
|
obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper);
|
622
1048
|
wrapper->indentString = actualIndent;
|
623
|
-
wrapper->encoder = yajl_gen_alloc(&cfg,
|
1049
|
+
wrapper->encoder = yajl_gen_alloc(&cfg, &rb_alloc_funcs);
|
624
1050
|
wrapper->on_progress_callback = Qnil;
|
625
1051
|
if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) {
|
626
1052
|
wrapper->terminator = rb_hash_aref(opts, sym_terminator);
|
@@ -900,6 +1326,7 @@ void Init_yajl() {
|
|
900
1326
|
|
901
1327
|
cParseError = rb_define_class_under(mYajl, "ParseError", rb_eStandardError);
|
902
1328
|
cEncodeError = rb_define_class_under(mYajl, "EncodeError", rb_eStandardError);
|
1329
|
+
cStandardError = rb_const_get(rb_cObject, rb_intern("StandardError"));
|
903
1330
|
|
904
1331
|
cParser = rb_define_class_under(mYajl, "Parser", rb_cObject);
|
905
1332
|
rb_define_singleton_method(cParser, "new", rb_yajl_parser_new, -1);
|
@@ -909,6 +1336,9 @@ void Init_yajl() {
|
|
909
1336
|
rb_define_method(cParser, "<<", rb_yajl_parser_parse_chunk, 1);
|
910
1337
|
rb_define_method(cParser, "on_parse_complete=", rb_yajl_parser_set_complete_cb, 1);
|
911
1338
|
|
1339
|
+
cProjector = rb_define_class_under(mYajl, "Projector", rb_cObject);
|
1340
|
+
rb_define_method(cProjector, "project", rb_yajl_projector_project, 1);
|
1341
|
+
|
912
1342
|
cEncoder = rb_define_class_under(mYajl, "Encoder", rb_cObject);
|
913
1343
|
rb_define_singleton_method(cEncoder, "new", rb_yajl_encoder_new, -1);
|
914
1344
|
rb_define_method(cEncoder, "initialize", rb_yajl_encoder_init, -1);
|
@@ -931,6 +1361,7 @@ void Init_yajl() {
|
|
931
1361
|
sym_pretty = ID2SYM(rb_intern("pretty"));
|
932
1362
|
sym_indent = ID2SYM(rb_intern("indent"));
|
933
1363
|
sym_html_safe = ID2SYM(rb_intern("html_safe"));
|
1364
|
+
sym_entities = ID2SYM(rb_intern("entities"));
|
934
1365
|
sym_terminator = ID2SYM(rb_intern("terminator"));
|
935
1366
|
sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
|
936
1367
|
sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
|