yajl-ruby 1.3.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of yajl-ruby might be problematic. Click here for more details.

data/ext/yajl/yajl_ext.c CHANGED
@@ -22,6 +22,12 @@
22
22
  */
23
23
 
24
24
  #include "yajl_ext.h"
25
+ #include "yajl_lex.h"
26
+ #include "yajl_alloc.h"
27
+ #include "yajl_buf.h"
28
+ #include "yajl_encode.h"
29
+ #include "api/yajl_common.h"
30
+ #include "assert.h"
25
31
 
26
32
  #define YAJL_RB_TO_JSON \
27
33
  VALUE rb_encoder, cls; \
@@ -32,6 +38,25 @@
32
38
  } \
33
39
  return rb_yajl_encoder_encode(1, &self, rb_encoder); \
34
40
 
41
+ static void *rb_internal_malloc(void *ctx, unsigned int sz) {
42
+ return xmalloc(sz);
43
+ }
44
+
45
+ static void *rb_internal_realloc(void *ctx, void *previous, unsigned int sz) {
46
+ return xrealloc(previous, sz);
47
+ }
48
+
49
+ static void rb_internal_free(void *ctx, void *ptr) {
50
+ xfree(ptr);
51
+ }
52
+
53
+ static yajl_alloc_funcs rb_alloc_funcs = {
54
+ rb_internal_malloc,
55
+ rb_internal_realloc,
56
+ rb_internal_free,
57
+ NULL
58
+ };
59
+
35
60
  /* Helpers for building objects */
36
61
  static void yajl_check_and_fire_callback(void * ctx) {
37
62
  yajl_parser_wrapper * wrapper;
@@ -39,12 +64,12 @@ static void yajl_check_and_fire_callback(void * ctx) {
39
64
 
40
65
  /* No need to do any of this if the callback isn't even setup */
41
66
  if (wrapper->parse_complete_callback != Qnil) {
42
- int len = RARRAY_LEN(wrapper->builderStack);
67
+ long len = RARRAY_LEN(wrapper->builderStack);
43
68
  if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
44
69
  rb_funcall(wrapper->parse_complete_callback, intern_call, 1, rb_ary_pop(wrapper->builderStack));
45
70
  }
46
71
  } else {
47
- int len = RARRAY_LEN(wrapper->builderStack);
72
+ long len = RARRAY_LEN(wrapper->builderStack);
48
73
  if (len == 1 && wrapper->nestedArrayLevel == 0 && wrapper->nestedHashLevel == 0) {
49
74
  wrapper->objectsFound++;
50
75
  if (wrapper->objectsFound > 1) {
@@ -68,7 +93,11 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob
68
93
  rb_raise(cEncodeError, "Invalid number: cannot encode Infinity, -Infinity, or NaN");
69
94
  case yajl_gen_no_buf:
70
95
  rb_raise(cEncodeError, "YAJL internal error: yajl_gen_get_buf was called, but a print callback was specified, so no internal buffer is available");
96
+ case yajl_gen_alloc_error:
97
+ rb_raise(cEncodeError, "YAJL internal error: failed to allocate memory");
71
98
  default:
99
+ // fixme: why wasn't this already here??
100
+ rb_raise(cEncodeError, "Encountered unknown YAJL status %d during JSON generation", status);
72
101
  return NULL;
73
102
  }
74
103
  }
@@ -76,7 +105,7 @@ static char *yajl_raise_encode_error_for_status(yajl_gen_status status, VALUE ob
76
105
  static void yajl_set_static_value(void * ctx, VALUE val) {
77
106
  yajl_parser_wrapper * wrapper;
78
107
  VALUE lastEntry, hash;
79
- int len;
108
+ long len;
80
109
 
81
110
  GetParser((VALUE)ctx, wrapper);
82
111
 
@@ -130,18 +159,47 @@ static void yajl_encoder_wrapper_mark(void * wrapper) {
130
159
  }
131
160
  }
132
161
 
162
+ static VALUE yajl_key_to_string(VALUE obj) {
163
+ switch (TYPE(obj)) {
164
+ case T_STRING:
165
+ return obj;
166
+ case T_SYMBOL:
167
+ return rb_sym2str(obj);
168
+ default:
169
+ return rb_funcall(obj, intern_to_s, 0);
170
+ }
171
+ }
172
+
173
+ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io);
174
+ struct yajl_encode_hash_iter {
175
+ void *w;
176
+ VALUE io;
177
+ };
178
+
179
+ static int yajl_encode_part_hash_i(VALUE key, VALUE val, VALUE iter_v) {
180
+ struct yajl_encode_hash_iter *iter = (struct yajl_encode_hash_iter *)iter_v;
181
+ /* key must be a string */
182
+ VALUE keyStr = yajl_key_to_string(key);
183
+
184
+ /* the key */
185
+ yajl_encode_part(iter->w, keyStr, iter->io);
186
+ /* the value */
187
+ yajl_encode_part(iter->w, val, iter->io);
188
+
189
+ return ST_CONTINUE;
190
+ }
191
+
133
192
  #define CHECK_STATUS(call) \
134
193
  if ((status = (call)) != yajl_gen_status_ok) { break; }
135
194
 
136
195
  void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
137
- VALUE str, outBuff, otherObj;
196
+ VALUE str, outBuff;
138
197
  yajl_encoder_wrapper * w = wrapper;
139
198
  yajl_gen_status status;
140
199
  int idx = 0;
141
200
  const unsigned char * buffer;
142
201
  const char * cptr;
143
202
  unsigned int len;
144
- VALUE keys, entry, keyStr;
145
203
 
146
204
  if (io != Qnil || w->on_progress_callback != Qnil) {
147
205
  status = yajl_gen_get_buf(w->encoder, &buffer, &len);
@@ -163,24 +221,19 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
163
221
  case T_HASH:
164
222
  CHECK_STATUS(yajl_gen_map_open(w->encoder));
165
223
 
166
- /* TODO: itterate through keys in the hash */
167
- keys = rb_funcall(obj, intern_keys, 0);
168
- for(idx=0; idx<RARRAY_LEN(keys); idx++) {
169
- entry = rb_ary_entry(keys, idx);
170
- keyStr = rb_funcall(entry, intern_to_s, 0); /* key must be a string */
171
- /* the key */
172
- yajl_encode_part(w, keyStr, io);
173
- /* the value */
174
- yajl_encode_part(w, rb_hash_aref(obj, entry), io);
175
- }
224
+ struct yajl_encode_hash_iter iter;
225
+ iter.w = w;
226
+ iter.io = io;
227
+ rb_hash_foreach(obj, yajl_encode_part_hash_i, (VALUE)&iter);
176
228
 
177
229
  CHECK_STATUS(yajl_gen_map_close(w->encoder));
178
230
  break;
179
231
  case T_ARRAY:
180
232
  CHECK_STATUS(yajl_gen_array_open(w->encoder));
233
+
234
+ VALUE *ptr = RARRAY_PTR(obj);
181
235
  for(idx=0; idx<RARRAY_LEN(obj); idx++) {
182
- otherObj = rb_ary_entry(obj, idx);
183
- yajl_encode_part(w, otherObj, io);
236
+ yajl_encode_part(w, ptr[idx], io);
184
237
  }
185
238
  CHECK_STATUS(yajl_gen_array_close(w->encoder));
186
239
  break;
@@ -194,11 +247,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
194
247
  CHECK_STATUS(yajl_gen_bool(w->encoder, 0));
195
248
  break;
196
249
  case T_FIXNUM:
250
+ CHECK_STATUS(yajl_gen_long(w->encoder, FIX2LONG(obj)));
251
+ break;
197
252
  case T_FLOAT:
198
253
  case T_BIGNUM:
199
254
  str = rb_funcall(obj, intern_to_s, 0);
200
255
  cptr = RSTRING_PTR(str);
201
- len = RSTRING_LEN(str);
256
+ len = (unsigned int)RSTRING_LEN(str);
202
257
  if (memcmp(cptr, "NaN", 3) == 0 || memcmp(cptr, "Infinity", 8) == 0 || memcmp(cptr, "-Infinity", 9) == 0) {
203
258
  rb_raise(cEncodeError, "'%s' is an invalid number", cptr);
204
259
  }
@@ -206,7 +261,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
206
261
  break;
207
262
  case T_STRING:
208
263
  cptr = RSTRING_PTR(obj);
209
- len = RSTRING_LEN(obj);
264
+ len = (unsigned int)RSTRING_LEN(obj);
265
+ CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
266
+ break;
267
+ case T_SYMBOL:
268
+ str = rb_sym2str(obj);
269
+ cptr = RSTRING_PTR(str);
270
+ len = (unsigned int)RSTRING_LEN(str);
210
271
  CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
211
272
  break;
212
273
  default:
@@ -214,13 +275,13 @@ void yajl_encode_part(void * wrapper, VALUE obj, VALUE io) {
214
275
  str = rb_funcall(obj, intern_to_json, 0);
215
276
  Check_Type(str, T_STRING);
216
277
  cptr = RSTRING_PTR(str);
217
- len = RSTRING_LEN(str);
278
+ len = (unsigned int)RSTRING_LEN(str);
218
279
  CHECK_STATUS(yajl_gen_number(w->encoder, cptr, len));
219
280
  } else {
220
281
  str = rb_funcall(obj, intern_to_s, 0);
221
282
  Check_Type(str, T_STRING);
222
283
  cptr = RSTRING_PTR(str);
223
- len = RSTRING_LEN(str);
284
+ len = (unsigned int)RSTRING_LEN(str);
224
285
  CHECK_STATUS(yajl_gen_string(w->encoder, (const unsigned char *)cptr, len));
225
286
  }
226
287
  break;
@@ -253,11 +314,17 @@ void yajl_parse_chunk(const unsigned char * chunk, unsigned int len, yajl_handle
253
314
 
254
315
  stat = yajl_parse(parser, chunk, len);
255
316
 
256
- if (stat != yajl_status_ok && stat != yajl_status_insufficient_data) {
317
+ if (stat == yajl_status_ok || stat == yajl_status_insufficient_data) {
318
+ // success
319
+ } else if (stat == yajl_status_error) {
257
320
  unsigned char * str = yajl_get_error(parser, 1, chunk, len);
258
321
  VALUE errobj = rb_exc_new2(cParseError, (const char*) str);
259
322
  yajl_free_error(parser, str);
260
323
  rb_exc_raise(errobj);
324
+ } else {
325
+ const char * str = yajl_status_to_string(stat);
326
+ VALUE errobj = rb_exc_new2(cParseError, (const char*) str);
327
+ rb_exc_raise(errobj);
261
328
  }
262
329
  }
263
330
 
@@ -420,7 +487,7 @@ static VALUE rb_yajl_parser_new(int argc, VALUE * argv, VALUE klass) {
420
487
  cfg = (yajl_parser_config){allowComments, checkUTF8};
421
488
 
422
489
  obj = Data_Make_Struct(klass, yajl_parser_wrapper, yajl_parser_wrapper_mark, yajl_parser_wrapper_free, wrapper);
423
- wrapper->parser = yajl_alloc(&callbacks, &cfg, NULL, (void *)obj);
490
+ wrapper->parser = yajl_alloc(&callbacks, &cfg, &rb_alloc_funcs, (void *)obj);
424
491
  wrapper->nestedArrayLevel = 0;
425
492
  wrapper->nestedHashLevel = 0;
426
493
  wrapper->objectsFound = 0;
@@ -450,13 +517,13 @@ static VALUE rb_yajl_parser_init(int argc, VALUE * argv, VALUE self) {
450
517
  * Document-method: parse
451
518
  *
452
519
  * call-seq:
453
- * parse(input, buffer_size=8092)
454
- * parse(input, buffer_size=8092) { |obj| ... }
520
+ * parse(input, buffer_size=8192)
521
+ * parse(input, buffer_size=8192) { |obj| ... }
455
522
  *
456
523
  * +input+ can either be a string or an IO to parse JSON from
457
524
  *
458
525
  * +buffer_size+ is the size of chunk that will be parsed off the input (if it's an IO) for each loop of the parsing process.
459
- * 8092 is a good balance between the different types of streams (off disk, off a socket, etc...), but this option
526
+ * 8192 is a good balance between the different types of streams (off disk, off a socket, etc...), but this option
460
527
  * is here so the caller can better tune their parsing depending on the type of stream being passed.
461
528
  * A larger read buffer will perform better for files off disk, where as a smaller size may be more efficient for
462
529
  * reading off of a socket directly.
@@ -489,13 +556,13 @@ static VALUE rb_yajl_parser_parse(int argc, VALUE * argv, VALUE self) {
489
556
 
490
557
  if (TYPE(input) == T_STRING) {
491
558
  cptr = RSTRING_PTR(input);
492
- len = RSTRING_LEN(input);
559
+ len = (unsigned int)RSTRING_LEN(input);
493
560
  yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
494
561
  } else if (rb_respond_to(input, intern_io_read)) {
495
562
  VALUE parsed = rb_str_new(0, FIX2LONG(rbufsize));
496
563
  while (rb_funcall(input, intern_io_read, 2, rbufsize, parsed) != Qnil) {
497
564
  cptr = RSTRING_PTR(parsed);
498
- len = RSTRING_LEN(parsed);
565
+ len = (unsigned int)RSTRING_LEN(parsed);
499
566
  yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
500
567
  }
501
568
  } else {
@@ -535,7 +602,7 @@ static VALUE rb_yajl_parser_parse_chunk(VALUE self, VALUE chunk) {
535
602
 
536
603
  if (wrapper->parse_complete_callback != Qnil) {
537
604
  const char * cptr = RSTRING_PTR(chunk);
538
- len = RSTRING_LEN(chunk);
605
+ len = (unsigned int)RSTRING_LEN(chunk);
539
606
  yajl_parse_chunk((const unsigned char*)cptr, len, wrapper->parser);
540
607
  } else {
541
608
  rb_raise(cParseError, "The on_parse_complete callback isn't setup, parsing useless.");
@@ -560,6 +627,405 @@ static VALUE rb_yajl_parser_set_complete_cb(VALUE self, VALUE callback) {
560
627
  return Qnil;
561
628
  }
562
629
 
630
+ /*
631
+ * An event stream pulls data off the IO source into the buffer,
632
+ * then runs the lexer over that stream.
633
+ */
634
+ struct yajl_event_stream_s {
635
+ yajl_alloc_funcs *funcs;
636
+
637
+ VALUE stream; // source
638
+
639
+ VALUE buffer;
640
+ unsigned int offset;
641
+
642
+ yajl_lexer lexer; // event source
643
+ };
644
+
645
+ typedef struct yajl_event_stream_s *yajl_event_stream_t;
646
+
647
+ struct yajl_event_s {
648
+ yajl_tok token;
649
+ const char *buf;
650
+ unsigned int len;
651
+ };
652
+ typedef struct yajl_event_s yajl_event_t;
653
+
654
+ static yajl_event_t yajl_event_stream_next(yajl_event_stream_t parser, int pop) {
655
+ assert(parser->stream);
656
+ assert(parser->buffer);
657
+
658
+ while (1) {
659
+ if (parser->offset >= RSTRING_LEN(parser->buffer)) {
660
+ //printf("reading offset %d size %ld\n", parser->offset, RSTRING_LEN(parser->buffer));
661
+
662
+ // Refill the buffer
663
+ rb_funcall(parser->stream, intern_io_read, 2, INT2FIX(RSTRING_LEN(parser->buffer)), parser->buffer);
664
+ if (RSTRING_LEN(parser->buffer) == 0) {
665
+ yajl_event_t event = {
666
+ .token = yajl_tok_eof,
667
+ };
668
+ return event;
669
+ }
670
+
671
+ parser->offset = 0;
672
+ }
673
+
674
+ // Try to pull an event off the lexer
675
+ yajl_event_t event;
676
+
677
+ yajl_tok token;
678
+ if (pop == 0) {
679
+ //printf("peeking %p %ld %d\n", RSTRING_PTR(parser->buffer), RSTRING_LEN(parser->buffer), parser->offset);
680
+ token = yajl_lex_peek(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), parser->offset);
681
+ //printf("peeked event %d\n", token);
682
+
683
+ if (token == yajl_tok_eof) {
684
+ parser->offset = (unsigned int)RSTRING_LEN(parser->buffer);
685
+ continue;
686
+ }
687
+
688
+ event.token = token;
689
+
690
+ return event;
691
+ }
692
+
693
+ //printf("popping\n");
694
+ token = yajl_lex_lex(parser->lexer, (const unsigned char *)RSTRING_PTR(parser->buffer), (unsigned int)RSTRING_LEN(parser->buffer), &parser->offset, (const unsigned char **)&event.buf, &event.len);
695
+ //printf("popped event %d\n", token);
696
+
697
+ if (token == yajl_tok_eof) {
698
+ continue;
699
+ }
700
+
701
+ event.token = token;
702
+
703
+ return event;
704
+ }
705
+
706
+ return (yajl_event_t){};
707
+ }
708
+
709
+ static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event);
710
+ static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event);
711
+ static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser);
712
+ static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser);
713
+ static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event);
714
+ static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event);
715
+
716
+ static VALUE rb_yajl_projector_filter(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
717
+ assert(parser->stream);
718
+
719
+ switch(event.token) {
720
+ case yajl_tok_left_brace:
721
+ return rb_yajl_projector_filter_array_subtree(parser, schema, event);
722
+ break;
723
+ case yajl_tok_left_bracket:
724
+ return rb_yajl_projector_filter_object_subtree(parser, schema, event);
725
+ break;
726
+ default:
727
+ return rb_yajl_projector_build_simple_value(parser, event);
728
+ }
729
+ }
730
+
731
+ static VALUE rb_yajl_projector_filter_array_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
732
+ assert(event.token == yajl_tok_left_brace);
733
+
734
+ VALUE ary = rb_ary_new();
735
+
736
+ while (1) {
737
+ event = yajl_event_stream_next(parser, 1);
738
+
739
+ if (event.token == yajl_tok_right_brace) {
740
+ break;
741
+ }
742
+
743
+ VALUE val = rb_yajl_projector_filter(parser, schema, event);
744
+ rb_ary_push(ary, val);
745
+
746
+ event = yajl_event_stream_next(parser, 0);
747
+ if (event.token == yajl_tok_comma) {
748
+ event = yajl_event_stream_next(parser, 1);
749
+ assert(event.token == yajl_tok_comma);
750
+
751
+ event = yajl_event_stream_next(parser, 0);
752
+ if (!(event.token == yajl_tok_string || event.token == yajl_tok_integer || event.token == yajl_tok_double || event.token == yajl_tok_null || event.token == yajl_tok_bool || event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace)) {
753
+ rb_raise(cParseError, "read a comma, expected a value to follow, actually read %s", yajl_tok_name(event.token));
754
+ }
755
+ } else if (event.token != yajl_tok_right_brace) {
756
+ rb_raise(cParseError, "didn't read a comma, expected closing array, actually read %s", yajl_tok_name(event.token));
757
+ }
758
+ }
759
+
760
+ return ary;
761
+ }
762
+
763
+ static VALUE rb_yajl_projector_filter_object_subtree(yajl_event_stream_t parser, VALUE schema, yajl_event_t event) {
764
+ assert(event.token == yajl_tok_left_bracket);
765
+
766
+ VALUE hsh = rb_hash_new();
767
+
768
+ while (1) {
769
+ event = yajl_event_stream_next(parser, 1);
770
+
771
+ if (event.token == yajl_tok_right_bracket) {
772
+ break;
773
+ }
774
+
775
+ if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) {
776
+ rb_raise(cParseError, "Expected string, unexpected stream event %s", yajl_tok_name(event.token));
777
+ }
778
+
779
+ VALUE key = rb_yajl_projector_build_string(parser, event);
780
+
781
+ event = yajl_event_stream_next(parser, 1);
782
+ if (!(event.token == yajl_tok_colon)) {
783
+ rb_raise(cParseError, "Expected colon, unexpected stream event %s", yajl_tok_name(event.token));
784
+ }
785
+
786
+ // nil schema means reify the subtree from here on
787
+ // otherwise if the schema has a key for this we want it
788
+ int interesting = (schema == Qnil || rb_funcall(schema, rb_intern("key?"), 1, key) == Qtrue);
789
+ if (!interesting) {
790
+ rb_yajl_projector_ignore_value(parser);
791
+ goto peek_comma;
792
+ }
793
+
794
+ yajl_event_t value_event = yajl_event_stream_next(parser, 1);
795
+
796
+ VALUE key_schema;
797
+ if (schema == Qnil) {
798
+ key_schema = Qnil;
799
+ } else {
800
+ key_schema = rb_hash_aref(schema, key);
801
+ }
802
+
803
+ VALUE val = rb_yajl_projector_filter(parser, key_schema, value_event);
804
+
805
+ rb_str_freeze(key);
806
+ rb_hash_aset(hsh, key, val);
807
+
808
+ peek_comma:
809
+
810
+ event = yajl_event_stream_next(parser, 0);
811
+ if (event.token == yajl_tok_comma) {
812
+ event = yajl_event_stream_next(parser, 1);
813
+ assert(event.token == yajl_tok_comma);
814
+
815
+ event = yajl_event_stream_next(parser, 0);
816
+ if (!(event.token == yajl_tok_string || event.token == yajl_tok_string_with_escapes)) {
817
+ rb_raise(cParseError, "read a comma, expected a key to follow, actually read %s", yajl_tok_name(event.token));
818
+ }
819
+ } else if (event.token != yajl_tok_right_bracket) {
820
+ rb_raise(cParseError, "read a value without tailing comma, expected closing bracket, actually read %s", yajl_tok_name(event.token));
821
+ }
822
+ }
823
+
824
+ return hsh;
825
+ }
826
+
827
+ /*
828
+ # After reading a key if we know we are not interested in the next value,
829
+ # read and discard all its stream events.
830
+ #
831
+ # Values can be simple (string, numeric, boolean, null) or compound (object
832
+ # or array).
833
+ #
834
+ # Returns nothing.
835
+ */
836
+ static void rb_yajl_projector_ignore_value(yajl_event_stream_t parser) {
837
+ yajl_event_t value_event = yajl_event_stream_next(parser, 1);
838
+
839
+ switch (value_event.token) {
840
+ case yajl_tok_null:
841
+ case yajl_tok_bool:
842
+ case yajl_tok_integer:
843
+ case yajl_tok_double:
844
+ case yajl_tok_string:
845
+ case yajl_tok_string_with_escapes:
846
+ return;
847
+ default:
848
+ break;
849
+ }
850
+
851
+ if (value_event.token == yajl_tok_left_brace || value_event.token == yajl_tok_left_bracket) {
852
+ rb_yajl_projector_ignore_container(parser);
853
+ return;
854
+ }
855
+
856
+ rb_raise(cParseError, "unknown value type to ignore %s", yajl_tok_name(value_event.token));
857
+ }
858
+
859
+ /*
860
+ # Given the start of an array or object, read until the closing event.
861
+ # Object structures can nest and this is considered.
862
+ #
863
+ # Returns nothing.
864
+ */
865
+ static void rb_yajl_projector_ignore_container(yajl_event_stream_t parser) {
866
+ int depth = 1;
867
+
868
+ while (depth > 0) {
869
+ yajl_event_t event = yajl_event_stream_next(parser, 1);
870
+
871
+ if (event.token == yajl_tok_eof) {
872
+ return;
873
+ }
874
+
875
+ if (event.token == yajl_tok_left_bracket || event.token == yajl_tok_left_brace) {
876
+ depth += 1;
877
+ } else if (event.token == yajl_tok_right_bracket || event.token == yajl_tok_right_brace) {
878
+ depth -= 1;
879
+ }
880
+ }
881
+ }
882
+
883
+ static VALUE rb_yajl_projector_build_simple_value(yajl_event_stream_t parser, yajl_event_t event) {
884
+ assert(parser->stream);
885
+
886
+ switch (event.token) {
887
+ case yajl_tok_null:;
888
+ return Qnil;
889
+ case yajl_tok_bool:;
890
+ if (memcmp(event.buf, "true", 4) == 0) {
891
+ return Qtrue;
892
+ } else if (memcmp(event.buf, "false", 5) == 0) {
893
+ return Qfalse;
894
+ } else {
895
+ rb_raise(cStandardError, "unknown boolean token %s", event.buf);
896
+ }
897
+ case yajl_tok_integer:;
898
+ case yajl_tok_double:;
899
+ char *buf = (char *)malloc(event.len + 1);
900
+ buf[event.len] = 0;
901
+ memcpy(buf, event.buf, event.len);
902
+
903
+ VALUE val;
904
+ if (memchr(buf, '.', event.len) ||
905
+ memchr(buf, 'e', event.len) ||
906
+ memchr(buf, 'E', event.len)) {
907
+ val = rb_float_new(strtod(buf, NULL));
908
+ } else {
909
+ val = rb_cstr2inum(buf, 10);
910
+ }
911
+ free(buf);
912
+
913
+ return val;
914
+
915
+ case yajl_tok_string:;
916
+ case yajl_tok_string_with_escapes:;
917
+ return rb_yajl_projector_build_string(parser, event);
918
+
919
+ case yajl_tok_eof:;
920
+ rb_raise(cParseError, "unexpected eof while constructing value");
921
+
922
+ case yajl_tok_comma:
923
+ rb_raise(cParseError, "unexpected comma while constructing value");
924
+
925
+ case yajl_tok_colon:
926
+ rb_raise(cParseError, "unexpected colon while constructing value");
927
+
928
+ default:;
929
+ rb_bug("we should never get here");
930
+ }
931
+ }
932
+
933
+ static VALUE rb_yajl_projector_build_string(yajl_event_stream_t parser, yajl_event_t event) {
934
+ switch (event.token) {
935
+ case yajl_tok_string:; {
936
+ VALUE str = rb_str_new(event.buf, event.len);
937
+ rb_enc_associate(str, utf8Encoding);
938
+
939
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
940
+ if (default_internal_enc) {
941
+ str = rb_str_export_to_enc(str, default_internal_enc);
942
+ }
943
+
944
+ return str;
945
+ }
946
+
947
+ case yajl_tok_string_with_escapes:; {
948
+ //printf("decoding string with escapes\n");
949
+
950
+ yajl_buf strBuf = yajl_buf_alloc(parser->funcs);
951
+ yajl_string_decode(strBuf, (const unsigned char *)event.buf, event.len);
952
+ if (yajl_buf_err(strBuf)) {
953
+ rb_raise(cParseError, "YAJL internal error: failed to allocate memory");
954
+ }
955
+
956
+ VALUE str = rb_str_new((const char *)yajl_buf_data(strBuf), yajl_buf_len(strBuf));
957
+ rb_enc_associate(str, utf8Encoding);
958
+
959
+ yajl_buf_free(strBuf);
960
+
961
+ rb_encoding *default_internal_enc = rb_default_internal_encoding();
962
+ if (default_internal_enc) {
963
+ str = rb_str_export_to_enc(str, default_internal_enc);
964
+ }
965
+
966
+ return str;
967
+ }
968
+
969
+ default:; {
970
+ rb_bug("we should never get here");
971
+ }
972
+ }
973
+ }
974
+
975
+ static VALUE rb_protected_yajl_projector_filter(VALUE pointer) {
976
+ VALUE *args = (VALUE *)pointer;
977
+ return rb_yajl_projector_filter((struct yajl_event_stream_s *)args[0],
978
+ args[1],
979
+ *(yajl_event_t *)args[2]);
980
+ }
981
+
982
+ /*
983
+ * Document-method: project
984
+ */
985
+ static VALUE rb_yajl_projector_project(VALUE self, VALUE schema) {
986
+ VALUE stream = rb_iv_get(self, "@stream");
987
+
988
+ long buffer_size = FIX2LONG(rb_iv_get(self, "@buffer_size"));
989
+ VALUE buffer = rb_str_new(0, buffer_size);
990
+
991
+ struct yajl_event_stream_s parser = {
992
+ .funcs = &rb_alloc_funcs,
993
+
994
+ .stream = stream,
995
+
996
+ .buffer = buffer,
997
+ .offset = (unsigned int)buffer_size,
998
+
999
+ .lexer = yajl_lex_alloc(&rb_alloc_funcs, 0, 1),
1000
+ };
1001
+
1002
+ yajl_event_t event = yajl_event_stream_next(&parser, 1);
1003
+
1004
+ RB_GC_GUARD(stream);
1005
+ RB_GC_GUARD(buffer);
1006
+
1007
+ VALUE result;
1008
+ int state = 0;
1009
+
1010
+ if (event.token == yajl_tok_left_brace || event.token == yajl_tok_left_bracket) {
1011
+ VALUE args[3];
1012
+ args[0] = (VALUE)&parser;
1013
+ args[1] = schema;
1014
+ args[2] = (VALUE)&event;
1015
+ result = rb_protect(rb_protected_yajl_projector_filter,
1016
+ (VALUE)args,
1017
+ &state);
1018
+ } else {
1019
+ yajl_lex_free(parser.lexer);
1020
+ rb_raise(cParseError, "expected left bracket or brace, actually read %s", yajl_tok_name(event.token));
1021
+ }
1022
+
1023
+ yajl_lex_free(parser.lexer);
1024
+ if (state) rb_jump_tag(state);
1025
+
1026
+ return result;
1027
+ }
1028
+
563
1029
  /*
564
1030
  * Document-class: Yajl::Encoder
565
1031
  *
@@ -609,9 +1075,14 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
609
1075
  actualIndent = indentString;
610
1076
  }
611
1077
  }
1078
+
612
1079
  if (rb_hash_aref(opts, sym_html_safe) == Qtrue) {
613
1080
  htmlSafe = 1;
614
1081
  }
1082
+
1083
+ if (rb_hash_aref(opts, sym_entities) == Qtrue) {
1084
+ htmlSafe = 2;
1085
+ }
615
1086
  }
616
1087
  if (!indentString) {
617
1088
  indentString = defaultIndentString;
@@ -620,7 +1091,7 @@ static VALUE rb_yajl_encoder_new(int argc, VALUE * argv, VALUE klass) {
620
1091
 
621
1092
  obj = Data_Make_Struct(klass, yajl_encoder_wrapper, yajl_encoder_wrapper_mark, yajl_encoder_wrapper_free, wrapper);
622
1093
  wrapper->indentString = actualIndent;
623
- wrapper->encoder = yajl_gen_alloc(&cfg, NULL);
1094
+ wrapper->encoder = yajl_gen_alloc(&cfg, &rb_alloc_funcs);
624
1095
  wrapper->on_progress_callback = Qnil;
625
1096
  if (opts != Qnil && rb_funcall(opts, intern_has_key, 1, sym_terminator) == Qtrue) {
626
1097
  wrapper->terminator = rb_hash_aref(opts, sym_terminator);
@@ -676,6 +1147,7 @@ static VALUE rb_yajl_encoder_encode(int argc, VALUE * argv, VALUE self) {
676
1147
  const unsigned char * buffer;
677
1148
  unsigned int len;
678
1149
  VALUE obj, io, blk, outBuff;
1150
+ yajl_gen_status status;
679
1151
 
680
1152
  GetEncoder(self, wrapper);
681
1153
 
@@ -689,7 +1161,11 @@ static VALUE rb_yajl_encoder_encode(int argc, VALUE * argv, VALUE self) {
689
1161
  yajl_encode_part(wrapper, obj, io);
690
1162
 
691
1163
  /* just make sure we output the remaining buffer */
692
- yajl_gen_get_buf(wrapper->encoder, &buffer, &len);
1164
+ status = yajl_gen_get_buf(wrapper->encoder, &buffer, &len);
1165
+ if (status != yajl_gen_status_ok) {
1166
+ yajl_raise_encode_error_for_status(status, obj);
1167
+ }
1168
+
693
1169
  outBuff = rb_str_new((const char *)buffer, len);
694
1170
  #ifdef HAVE_RUBY_ENCODING_H
695
1171
  rb_enc_associate(outBuff, utf8Encoding);
@@ -900,6 +1376,7 @@ void Init_yajl() {
900
1376
 
901
1377
  cParseError = rb_define_class_under(mYajl, "ParseError", rb_eStandardError);
902
1378
  cEncodeError = rb_define_class_under(mYajl, "EncodeError", rb_eStandardError);
1379
+ cStandardError = rb_const_get(rb_cObject, rb_intern("StandardError"));
903
1380
 
904
1381
  cParser = rb_define_class_under(mYajl, "Parser", rb_cObject);
905
1382
  rb_define_singleton_method(cParser, "new", rb_yajl_parser_new, -1);
@@ -909,6 +1386,9 @@ void Init_yajl() {
909
1386
  rb_define_method(cParser, "<<", rb_yajl_parser_parse_chunk, 1);
910
1387
  rb_define_method(cParser, "on_parse_complete=", rb_yajl_parser_set_complete_cb, 1);
911
1388
 
1389
+ cProjector = rb_define_class_under(mYajl, "Projector", rb_cObject);
1390
+ rb_define_method(cProjector, "project", rb_yajl_projector_project, 1);
1391
+
912
1392
  cEncoder = rb_define_class_under(mYajl, "Encoder", rb_cObject);
913
1393
  rb_define_singleton_method(cEncoder, "new", rb_yajl_encoder_new, -1);
914
1394
  rb_define_method(cEncoder, "initialize", rb_yajl_encoder_init, -1);
@@ -931,6 +1411,7 @@ void Init_yajl() {
931
1411
  sym_pretty = ID2SYM(rb_intern("pretty"));
932
1412
  sym_indent = ID2SYM(rb_intern("indent"));
933
1413
  sym_html_safe = ID2SYM(rb_intern("html_safe"));
1414
+ sym_entities = ID2SYM(rb_intern("entities"));
934
1415
  sym_terminator = ID2SYM(rb_intern("terminator"));
935
1416
  sym_symbolize_keys = ID2SYM(rb_intern("symbolize_keys"));
936
1417
  sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));