json_scanner 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c1ddff519827bc802cdcacb5b048402706544b0882c8ac91cd1aa414c4b57e0
4
- data.tar.gz: d1c4f41dbd71ed08a488c2f9647194fd2692c91d522fee35d28d7060bf80321c
3
+ metadata.gz: 59b0c3b3bbb9680bf3cb26983b6d3185a3af14ce45e538e953857a2a453d9391
4
+ data.tar.gz: d86a61eead87fb858fbc8bf997d6a71d578229426ba4cf36a04ecbfdbd6cf3b1
5
5
  SHA512:
6
- metadata.gz: 57bf59cc9495f46675bb98d2fc7545bdc3b8392631c443ad2b89595b22be054c8f8bb268a798c5f104d1e38b73d577662f96637fd9311260c9b0a45b55044265
7
- data.tar.gz: '055432559a23dbf34e679aac7be4967ea163684fda718b433e978c34feb73f298f1346b00629fada8279a9e49e1267b990f73d2d57c8403330faf42ce4086bb8'
6
+ metadata.gz: 638696fd097025ce8aa23eef8095b231d7e9c51905800a6f476dcf2e13d314620302b2cc6988e32f012d08cb7986e150c5bb7369b6ba0e407f933bba0a42489a
7
+ data.tar.gz: 4a54c918f1dc4136e3b5cc93cbc94a98bc225fdb1611f8ab666f926a3834e0b45d6b12c584b762d6a3fc207af4f459df97c0614af910e3b58215b57c3ba4df86
@@ -7,7 +7,13 @@ require "mkmf"
7
7
  # selectively, or entirely remove this flag.
8
8
  append_cflags("-fvisibility=hidden")
9
9
 
10
- dir_config("yajl", "", "")
10
+ idefault, ldefault = if with_config("libyajl2-gem")
11
+ require "libyajl2"
12
+ [Libyajl2.include_path, Libyajl2.opt_path]
13
+ else
14
+ ["", ""]
15
+ end
16
+ dir_config("yajl", idefault, ldefault)
11
17
 
12
18
  unless have_library("yajl") && have_header("yajl/yajl_parse.h") && have_header("yajl/yajl_gen.h")
13
19
  abort "yajl library not found"
@@ -1,11 +1,12 @@
1
1
  #include "json_scanner.h"
2
2
 
3
3
  VALUE rb_mJsonScanner;
4
- VALUE rb_cJsonScannerConfig;
4
+ VALUE rb_cJsonScannerSelector;
5
+ VALUE rb_cJsonScannerOptions;
5
6
  VALUE rb_eJsonScannerParseError;
6
7
  #define BYTES_CONSUMED "bytes_consumed"
7
8
  ID rb_iv_bytes_consumed;
8
- #define SCAN_KWARGS_SIZE 8
9
+ #define SCAN_KWARGS_SIZE 9
9
10
  ID scan_kwargs_table[SCAN_KWARGS_SIZE];
10
11
 
11
12
  VALUE null_sym;
@@ -85,6 +86,7 @@ typedef struct
85
86
  // Easier to use a Ruby array for result than convert later
86
87
  // must be supplied by the caller and RB_GC_GUARD-ed if it isn't on the stack
87
88
  VALUE points_list;
89
+ VALUE roots_info_list;
88
90
  // by depth
89
91
  size_t *starts;
90
92
  // VALUE rb_err;
@@ -92,12 +94,68 @@ typedef struct
92
94
  size_t yajl_bytes_consumed;
93
95
  } scan_ctx;
94
96
 
95
- inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
97
+ typedef struct
98
+ {
99
+ int with_path;
100
+ int verbose_error;
101
+ int allow_comments;
102
+ int dont_validate_strings;
103
+ int allow_trailing_garbage;
104
+ int allow_multiple_values;
105
+ int allow_partial_values;
106
+ int symbolize_path_keys;
107
+ int with_roots_info;
108
+ } scan_options;
109
+ #define SCAN_OPTION_VALUE_MASK 1
110
+ #define SCAN_OPTION_SET_MASK (1 << 1)
111
+ #define SCAN_OPTION(options, field) ((options)->field & SCAN_OPTION_VALUE_MASK)
112
+ #define SCAN_OPTION_IS_SET(options, field) ((options)->field & SCAN_OPTION_SET_MASK)
113
+ #define SCAN_OPTION_SET(options, field, value) ((options)->field = ((value) & SCAN_OPTION_VALUE_MASK) | SCAN_OPTION_SET_MASK)
114
+ #define SCAN_OPTION_FALSE(options, field) \
115
+ (!SCAN_OPTION(options, field) && ((options)->field & SCAN_OPTION_SET_MASK))
116
+
117
+ static void scan_options_init(scan_options *options, VALUE kwargs)
118
+ {
119
+ options->with_path = 0;
120
+ options->verbose_error = 0;
121
+ options->allow_comments = 0;
122
+ options->dont_validate_strings = 0;
123
+ options->allow_trailing_garbage = 0;
124
+ options->allow_multiple_values = 0;
125
+ options->allow_partial_values = 0;
126
+ options->symbolize_path_keys = 0;
127
+ options->with_roots_info = 0;
128
+ if (kwargs != Qnil)
129
+ {
130
+ VALUE kwargs_values[SCAN_KWARGS_SIZE];
131
+ rb_get_kwargs(kwargs, scan_kwargs_table, 0, SCAN_KWARGS_SIZE, kwargs_values);
132
+ if (kwargs_values[0] != Qundef)
133
+ SCAN_OPTION_SET(options, with_path, RTEST(kwargs_values[0]));
134
+ if (kwargs_values[1] != Qundef)
135
+ SCAN_OPTION_SET(options, verbose_error, RTEST(kwargs_values[1]));
136
+ if (kwargs_values[2] != Qundef)
137
+ SCAN_OPTION_SET(options, allow_comments, RTEST(kwargs_values[2]));
138
+ if (kwargs_values[3] != Qundef)
139
+ SCAN_OPTION_SET(options, dont_validate_strings, RTEST(kwargs_values[3]));
140
+ if (kwargs_values[4] != Qundef)
141
+ SCAN_OPTION_SET(options, allow_trailing_garbage, RTEST(kwargs_values[4]));
142
+ if (kwargs_values[5] != Qundef)
143
+ SCAN_OPTION_SET(options, allow_multiple_values, RTEST(kwargs_values[5]));
144
+ if (kwargs_values[6] != Qundef)
145
+ SCAN_OPTION_SET(options, allow_partial_values, RTEST(kwargs_values[6]));
146
+ if (kwargs_values[7] != Qundef)
147
+ SCAN_OPTION_SET(options, symbolize_path_keys, RTEST(kwargs_values[8]));
148
+ if (kwargs_values[8] != Qundef)
149
+ SCAN_OPTION_SET(options, with_roots_info, RTEST(kwargs_values[8]));
150
+ }
151
+ }
152
+
153
+ static inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
96
154
  {
97
155
  return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
98
156
  }
99
157
 
100
- inline void scan_ctx_update_bytes_consumed(scan_ctx *ctx)
158
+ static inline void scan_ctx_save_bytes_consumed(scan_ctx *ctx)
101
159
  {
102
160
  ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
103
161
  }
@@ -175,7 +233,7 @@ void scan_ctx_debug(scan_ctx *ctx)
175
233
 
176
234
  // FIXME: This will cause memory leak if ruby_xmalloc raises
177
235
  // path_ary must be RB_GC_GUARD-ed by the caller
178
- VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
236
+ static VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
179
237
  {
180
238
  int path_ary_len;
181
239
  paths_t *paths;
@@ -254,7 +312,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
254
312
  if (string_keys != Qundef)
255
313
  {
256
314
  // If string_keys is provided, we need to duplicate the string
257
- // to avoid use-after-free issues and to add the newly created string to the string_keys array
315
+ // to avoid use-after-free issues and to add the newly created string to the string_keys array.
316
+ // In Ruby 2.2 and newer symbols can be GC-ed, so we need to duplicate them as well.
258
317
  entry = rb_str_dup(entry);
259
318
  rb_ary_push(string_keys, entry);
260
319
  }
@@ -310,8 +369,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
310
369
  return Qundef; // no error
311
370
  }
312
371
 
313
- // resets temporary values in the config
314
- void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symbolize_path_keys)
372
+ // resets temporary values in the selector
373
+ static void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, VALUE roots_info_list, int with_path, int symbolize_path_keys)
315
374
  {
316
375
  // TODO: reset matched_depth if implemented
317
376
  ctx->current_path_len = 0;
@@ -319,11 +378,12 @@ void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symboli
319
378
  ctx->handle = NULL;
320
379
  ctx->yajl_bytes_consumed = 0;
321
380
  ctx->points_list = points_list;
381
+ ctx->roots_info_list = roots_info_list;
322
382
  ctx->with_path = with_path;
323
383
  ctx->symbolize_path_keys = symbolize_path_keys;
324
384
  }
325
385
 
326
- void scan_ctx_free(scan_ctx *ctx)
386
+ static void scan_ctx_free(scan_ctx *ctx)
327
387
  {
328
388
  // fprintf(stderr, "scan_ctx_free\n");
329
389
  if (!ctx)
@@ -340,7 +400,7 @@ void scan_ctx_free(scan_ctx *ctx)
340
400
  }
341
401
 
342
402
  // noexcept
343
- inline void increment_arr_index(scan_ctx *sctx)
403
+ static inline void increment_arr_index(scan_ctx *sctx)
344
404
  {
345
405
  // remember - any value can be root
346
406
  // TODO: Maybe make current_path_len 1 shorter and get rid of -1; need to change all compares
@@ -361,7 +421,7 @@ typedef enum
361
421
  } value_type;
362
422
 
363
423
  // noexcept
364
- VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
424
+ static VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
365
425
  {
366
426
  VALUE values[3], point;
367
427
  size_t curr_pos = scan_ctx_get_bytes_consumed(sctx);
@@ -402,7 +462,7 @@ VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
402
462
  }
403
463
 
404
464
  // noexcept
405
- VALUE create_path(scan_ctx *sctx)
465
+ static VALUE create_path(scan_ctx *sctx)
406
466
  {
407
467
  VALUE path = rb_ary_new_capa(sctx->current_path_len);
408
468
  for (int i = 0; i < sctx->current_path_len; i++)
@@ -428,7 +488,16 @@ VALUE create_path(scan_ctx *sctx)
428
488
  }
429
489
 
430
490
  // noexcept
431
- void save_point(scan_ctx *sctx, value_type type, size_t length)
491
+ static inline void save_root_info(scan_ctx *sctx, VALUE type, size_t len)
492
+ {
493
+ if (sctx->roots_info_list != Qundef && sctx->current_path_len == 0)
494
+ {
495
+ rb_ary_push(sctx->roots_info_list, rb_ary_new_from_args(2, type, ULL2NUM(scan_ctx_get_bytes_consumed(sctx) - len)));
496
+ }
497
+ }
498
+
499
+ // noexcept
500
+ static void save_point(scan_ctx *sctx, value_type type, size_t length)
432
501
  {
433
502
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
434
503
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
@@ -489,9 +558,10 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
489
558
  }
490
559
 
491
560
  // noexcept
492
- int scan_on_null(void *ctx)
561
+ static int scan_on_null(void *ctx)
493
562
  {
494
563
  scan_ctx *sctx = (scan_ctx *)ctx;
564
+ save_root_info(sctx, null_sym, 4);
495
565
  if (sctx->current_path_len > sctx->max_path_len)
496
566
  return true;
497
567
  increment_arr_index(sctx);
@@ -500,9 +570,10 @@ int scan_on_null(void *ctx)
500
570
  }
501
571
 
502
572
  // noexcept
503
- int scan_on_boolean(void *ctx, int bool_val)
573
+ static int scan_on_boolean(void *ctx, int bool_val)
504
574
  {
505
575
  scan_ctx *sctx = (scan_ctx *)ctx;
576
+ save_root_info(sctx, boolean_sym, bool_val ? 4 : 5);
506
577
  if (sctx->current_path_len > sctx->max_path_len)
507
578
  return true;
508
579
  increment_arr_index(sctx);
@@ -511,9 +582,10 @@ int scan_on_boolean(void *ctx, int bool_val)
511
582
  }
512
583
 
513
584
  // noexcept
514
- int scan_on_number(void *ctx, const char *val, size_t len)
585
+ static int scan_on_number(void *ctx, const char *val, size_t len)
515
586
  {
516
587
  scan_ctx *sctx = (scan_ctx *)ctx;
588
+ save_root_info(sctx, number_sym, len);
517
589
  if (sctx->current_path_len > sctx->max_path_len)
518
590
  return true;
519
591
  increment_arr_index(sctx);
@@ -522,9 +594,10 @@ int scan_on_number(void *ctx, const char *val, size_t len)
522
594
  }
523
595
 
524
596
  // noexcept
525
- int scan_on_string(void *ctx, const unsigned char *val, size_t len)
597
+ static int scan_on_string(void *ctx, const unsigned char *val, size_t len)
526
598
  {
527
599
  scan_ctx *sctx = (scan_ctx *)ctx;
600
+ save_root_info(sctx, string_sym, len + 2);
528
601
  if (sctx->current_path_len > sctx->max_path_len)
529
602
  return true;
530
603
  increment_arr_index(sctx);
@@ -533,9 +606,11 @@ int scan_on_string(void *ctx, const unsigned char *val, size_t len)
533
606
  }
534
607
 
535
608
  // noexcept
536
- int scan_on_start_object(void *ctx)
609
+ static int scan_on_start_object(void *ctx)
537
610
  {
538
611
  scan_ctx *sctx = (scan_ctx *)ctx;
612
+ // Save in the beginning in case of a partial value
613
+ save_root_info(sctx, object_sym, 1);
539
614
  if (sctx->current_path_len > sctx->max_path_len)
540
615
  {
541
616
  sctx->current_path_len++;
@@ -550,7 +625,7 @@ int scan_on_start_object(void *ctx)
550
625
  }
551
626
 
552
627
  // noexcept
553
- int scan_on_key(void *ctx, const unsigned char *key, size_t len)
628
+ static int scan_on_key(void *ctx, const unsigned char *key, size_t len)
554
629
  {
555
630
  scan_ctx *sctx = (scan_ctx *)ctx;
556
631
  if (sctx->current_path_len > sctx->max_path_len)
@@ -563,7 +638,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
563
638
  }
564
639
 
565
640
  // noexcept
566
- int scan_on_end_object(void *ctx)
641
+ static int scan_on_end_object(void *ctx)
567
642
  {
568
643
  scan_ctx *sctx = (scan_ctx *)ctx;
569
644
  sctx->current_path_len--;
@@ -573,9 +648,11 @@ int scan_on_end_object(void *ctx)
573
648
  }
574
649
 
575
650
  // noexcept
576
- int scan_on_start_array(void *ctx)
651
+ static int scan_on_start_array(void *ctx)
577
652
  {
578
653
  scan_ctx *sctx = (scan_ctx *)ctx;
654
+ // Save in the beginning in case of a partial value
655
+ save_root_info(sctx, array_sym, 1);
579
656
  if (sctx->current_path_len > sctx->max_path_len)
580
657
  {
581
658
  sctx->current_path_len++;
@@ -593,7 +670,7 @@ int scan_on_start_array(void *ctx)
593
670
  }
594
671
 
595
672
  // noexcept
596
- int scan_on_end_array(void *ctx)
673
+ static int scan_on_end_array(void *ctx)
597
674
  {
598
675
  scan_ctx *sctx = (scan_ctx *)ctx;
599
676
  sctx->current_path_len--;
@@ -602,13 +679,13 @@ int scan_on_end_array(void *ctx)
602
679
  return true;
603
680
  }
604
681
 
605
- void config_free(void *data)
682
+ static void selector_free(void *data)
606
683
  {
607
684
  scan_ctx_free((scan_ctx *)data);
608
685
  ruby_xfree(data);
609
686
  }
610
687
 
611
- size_t config_size(const void *data)
688
+ static size_t selector_size(const void *data)
612
689
  {
613
690
  // see ObjectSpace.memsize_of
614
691
  scan_ctx *ctx = (scan_ctx *)data;
@@ -630,16 +707,16 @@ size_t config_size(const void *data)
630
707
  return res;
631
708
  }
632
709
 
633
- static const rb_data_type_t config_type = {
634
- .wrap_struct_name = "json_scanner_config",
710
+ static const rb_data_type_t selector_type = {
711
+ .wrap_struct_name = "json_scanner_selector",
635
712
  .function = {
636
- .dfree = config_free,
637
- .dsize = config_size,
713
+ .dfree = selector_free,
714
+ .dsize = selector_size,
638
715
  },
639
716
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
640
717
  };
641
718
 
642
- VALUE config_alloc(VALUE self)
719
+ static VALUE selector_alloc(VALUE self)
643
720
  {
644
721
  scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
645
722
  ctx->paths = NULL;
@@ -647,15 +724,15 @@ VALUE config_alloc(VALUE self)
647
724
  ctx->current_path = NULL;
648
725
  ctx->max_path_len = 0;
649
726
  ctx->starts = NULL;
650
- scan_ctx_reset(ctx, Qundef, false, false);
651
- return TypedData_Wrap_Struct(self, &config_type, ctx);
727
+ scan_ctx_reset(ctx, Qundef, Qundef, false, false);
728
+ return TypedData_Wrap_Struct(self, &selector_type, ctx);
652
729
  }
653
730
 
654
- VALUE config_m_initialize(VALUE self, VALUE path_ary)
731
+ static VALUE selector_m_initialize(VALUE self, VALUE path_ary)
655
732
  {
656
733
  scan_ctx *ctx;
657
734
  VALUE scan_ctx_init_err, string_keys;
658
- TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
735
+ TypedData_Get_Struct(self, scan_ctx, &selector_type, ctx);
659
736
  string_keys = rb_ary_new();
660
737
  scan_ctx_init_err = scan_ctx_init(ctx, path_ary, string_keys);
661
738
  if (scan_ctx_init_err != Qundef)
@@ -666,15 +743,15 @@ VALUE config_m_initialize(VALUE self, VALUE path_ary)
666
743
  return self;
667
744
  }
668
745
 
669
- VALUE config_m_inspect(VALUE self)
746
+ static VALUE selector_m_inspect(VALUE self)
670
747
  {
671
748
  scan_ctx *ctx;
672
749
  VALUE res;
673
- TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
750
+ TypedData_Get_Struct(self, scan_ctx, &selector_type, ctx);
674
751
  res = rb_sprintf("#<%" PRIsVALUE " [", rb_class_name(CLASS_OF(self)));
675
752
  for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
676
753
  {
677
- rb_str_cat_cstr(res, "[");
754
+ rb_str_buf_cat_ascii(res, "[");
678
755
  for (int j = 0; j < ctx->paths[i].len; j++)
679
756
  {
680
757
  switch (ctx->paths[i].elems[j].type)
@@ -686,20 +763,92 @@ VALUE config_m_inspect(VALUE self)
686
763
  rb_str_catf(res, "%ld", ctx->paths[i].elems[j].value.index);
687
764
  break;
688
765
  case MATCHER_INDEX_RANGE:
689
- rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
766
+ rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end == LONG_MAX ? -1L : ctx->paths[i].elems[j].value.range.end);
690
767
  break;
691
768
  case MATCHER_ANY_KEY:
692
- rb_str_cat_cstr(res, "('*'..'*')");
769
+ rb_str_buf_cat_ascii(res, "('*'..'*')");
693
770
  break;
694
771
  }
695
772
  if (j < ctx->paths[i].len - 1)
696
- rb_str_cat_cstr(res, ", ");
773
+ rb_str_buf_cat_ascii(res, ", ");
697
774
  }
698
- rb_str_cat_cstr(res, "]");
775
+ rb_str_buf_cat_ascii(res, "]");
699
776
  if (i < ctx->paths_len - 1)
700
- rb_str_cat_cstr(res, ", ");
777
+ rb_str_buf_cat_ascii(res, ", ");
701
778
  }
702
- rb_str_cat_cstr(res, "]>");
779
+ rb_str_buf_cat_ascii(res, "]>");
780
+ return res;
781
+ }
782
+
783
+ static VALUE selector_m_length(VALUE self)
784
+ {
785
+ scan_ctx *ctx;
786
+ TypedData_Get_Struct(self, scan_ctx, &selector_type, ctx);
787
+ return INT2FIX(ctx->paths_len);
788
+ }
789
+
790
+ static size_t options_size(const void *data)
791
+ {
792
+ return sizeof(scan_options);
793
+ }
794
+
795
+ static const rb_data_type_t options_type = {
796
+ .wrap_struct_name = "json_scanner_options",
797
+ .function = {
798
+ .dfree = RUBY_DEFAULT_FREE,
799
+ .dsize = options_size,
800
+ },
801
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
802
+ };
803
+
804
+ static VALUE options_alloc(VALUE self)
805
+ {
806
+ // NOT INITIALIZED
807
+ scan_options *options;
808
+ return TypedData_Make_Struct(self, scan_options, &options_type, options);
809
+ }
810
+
811
+ static VALUE options_m_initialize(int argc, VALUE *argv, VALUE self)
812
+ {
813
+ VALUE kwargs;
814
+ scan_options *options;
815
+ TypedData_Get_Struct(self, scan_options, &options_type, options);
816
+ #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
817
+ rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &kwargs);
818
+ #else
819
+ rb_scan_args(argc, argv, "0:", &kwargs);
820
+ #endif
821
+ scan_options_init(options, kwargs);
822
+ return self;
823
+ }
824
+
825
+ static VALUE options_m_inspect(VALUE self)
826
+ {
827
+ VALUE res;
828
+ scan_options *options;
829
+ TypedData_Get_Struct(self, scan_options, &options_type, options);
830
+ res = rb_sprintf("#<%" PRIsVALUE " {", rb_class_name(CLASS_OF(self)));
831
+ if (SCAN_OPTION_IS_SET(options, with_path))
832
+ rb_str_catf(res, "with_path: %s, ", SCAN_OPTION(options, with_path) ? "true" : "false");
833
+ if (SCAN_OPTION_IS_SET(options, verbose_error))
834
+ rb_str_catf(res, "verbose_error: %s, ", SCAN_OPTION(options, verbose_error) ? "true" : "false");
835
+ if (SCAN_OPTION_IS_SET(options, allow_comments))
836
+ rb_str_catf(res, "allow_comments: %s, ", SCAN_OPTION(options, allow_comments) ? "true" : "false");
837
+ if (SCAN_OPTION_IS_SET(options, dont_validate_strings))
838
+ rb_str_catf(res, "dont_validate_strings: %s, ", SCAN_OPTION(options, dont_validate_strings) ? "true" : "false");
839
+ if (SCAN_OPTION_IS_SET(options, allow_trailing_garbage))
840
+ rb_str_catf(res, "allow_trailing_garbage: %s, ", SCAN_OPTION(options, allow_trailing_garbage) ? "true" : "false");
841
+ if (SCAN_OPTION_IS_SET(options, allow_multiple_values))
842
+ rb_str_catf(res, "allow_multiple_values: %s, ", SCAN_OPTION(options, allow_multiple_values) ? "true" : "false");
843
+ if (SCAN_OPTION_IS_SET(options, allow_partial_values))
844
+ rb_str_catf(res, "allow_partial_values: %s, ", SCAN_OPTION(options, allow_partial_values) ? "true" : "false");
845
+ if (SCAN_OPTION_IS_SET(options, symbolize_path_keys))
846
+ rb_str_catf(res, "symbolize_path_keys: %s, ", SCAN_OPTION(options, symbolize_path_keys) ? "true" : "false");
847
+ if (SCAN_OPTION_IS_SET(options, with_roots_info))
848
+ rb_str_catf(res, "with_roots_info: %s, ", SCAN_OPTION(options, with_roots_info) ? "true" : "false");
849
+ if (RSTRING_END(res)[-1] == ' ')
850
+ rb_str_resize(res, RSTRING_LEN(res) - 2);
851
+ rb_str_buf_cat_ascii(res, "}>");
703
852
  return res;
704
853
  }
705
854
 
@@ -718,52 +867,60 @@ static yajl_callbacks scan_callbacks = {
718
867
 
719
868
  // def scan(json_str, path_arr, opts)
720
869
  // opts
721
- // with_path: false, verbose_error: false,
870
+ // with_path: false, verbose_error: false, symbolize_path_keys: false, with_roots_info: false
722
871
  // the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
723
872
  // allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
724
- VALUE scan(int argc, VALUE *argv, VALUE self)
873
+ static VALUE scan(int argc, VALUE *argv, VALUE self)
725
874
  {
726
- VALUE json_str, path_ary, with_path_flag, kwargs;
727
- VALUE kwargs_values[SCAN_KWARGS_SIZE];
875
+ VALUE json_str, path_ary, rb_options;
876
+ scan_options options;
728
877
 
729
- int with_path = false, verbose_error = false, symbolize_path_keys = false;
730
878
  char *json_text;
731
879
  size_t json_text_len;
732
880
  yajl_handle handle;
733
881
  yajl_status stat;
734
882
  scan_ctx *ctx;
735
883
  int free_ctx = true;
736
- VALUE err_msg = Qnil, bytes_consumed, err, result;
884
+ VALUE err_msg = Qnil, bytes_consumed = Qnil, result, roots_info_result = Qundef;
737
885
  // Turned out callbacks can't raise exceptions
738
886
  // VALUE callback_err;
739
- #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
740
- rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
741
- #else
742
- rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
743
- #endif
887
+ rb_scan_args(argc, argv, "21", &json_str, &path_ary, &rb_options);
888
+ rb_check_type(json_str, T_STRING);
744
889
  // rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
745
- with_path = RTEST(with_path_flag);
746
- if (kwargs != Qnil)
890
+ switch (TYPE(rb_options))
747
891
  {
748
- rb_get_kwargs(kwargs, scan_kwargs_table, 0, SCAN_KWARGS_SIZE, kwargs_values);
749
- if (kwargs_values[0] != Qundef)
750
- with_path = RTEST(kwargs_values[0]);
751
- if (kwargs_values[1] != Qundef)
752
- verbose_error = RTEST(kwargs_values[1]);
753
- if (kwargs_values[7] != Qundef)
754
- symbolize_path_keys = RTEST(kwargs_values[7]);
892
+ case T_HASH:
893
+ case T_NIL:
894
+ scan_options_init(&options, rb_options);
895
+ break;
896
+ case T_DATA:
897
+ if (rb_obj_is_kind_of(rb_options, rb_cJsonScannerOptions))
898
+ {
899
+ scan_options *ptr;
900
+ TypedData_Get_Struct(rb_options, scan_options, &options_type, ptr);
901
+ options = *ptr;
902
+ }
903
+ else
904
+ {
905
+ rb_raise(rb_eTypeError, "Expected a Hash or %" PRIsVALUE ", got %" PRIsVALUE, rb_cJsonScannerOptions, rb_obj_class(rb_options));
906
+ }
907
+ break;
908
+ default:
909
+ rb_raise(rb_eTypeError, "Expected a Hash or %" PRIsVALUE ", got %" PRIsVALUE, rb_cJsonScannerOptions, rb_obj_class(rb_options));
910
+ break;
755
911
  }
756
- rb_check_type(json_str, T_STRING);
912
+ if (SCAN_OPTION(&options, with_roots_info))
913
+ roots_info_result = rb_ary_new();
757
914
  json_text = RSTRING_PTR(json_str);
758
915
  #if LONG_MAX > SIZE_MAX
759
916
  json_text_len = RSTRING_LENINT(json_str);
760
917
  #else
761
918
  json_text_len = RSTRING_LEN(json_str);
762
919
  #endif
763
- if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerConfig))
920
+ if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerSelector))
764
921
  {
765
922
  free_ctx = false;
766
- TypedData_Get_Struct(path_ary, scan_ctx, &config_type, ctx);
923
+ TypedData_Get_Struct(path_ary, scan_ctx, &selector_type, ctx);
767
924
  }
768
925
  else
769
926
  {
@@ -782,38 +939,33 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
782
939
  {
783
940
  rb_ary_push(result, rb_ary_new());
784
941
  }
785
- scan_ctx_reset(ctx, result, with_path, symbolize_path_keys);
942
+ scan_ctx_reset(ctx, result, roots_info_result, SCAN_OPTION(&options, with_path), SCAN_OPTION(&options, symbolize_path_keys));
786
943
  // scan_ctx_debug(ctx);
787
944
 
788
945
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
789
- if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
790
- {
791
- if (kwargs_values[2] != Qundef)
792
- yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
793
- if (kwargs_values[3] != Qundef)
794
- yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
795
- if (kwargs_values[4] != Qundef)
796
- yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
797
- if (kwargs_values[5] != Qundef)
798
- yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
799
- if (kwargs_values[6] != Qundef)
800
- yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
801
- }
946
+ if (SCAN_OPTION_IS_SET(&options, allow_comments))
947
+ yajl_config(handle, yajl_allow_comments, SCAN_OPTION(&options, allow_comments));
948
+ if (SCAN_OPTION_IS_SET(&options, dont_validate_strings))
949
+ yajl_config(handle, yajl_dont_validate_strings, SCAN_OPTION(&options, dont_validate_strings));
950
+ if (SCAN_OPTION_IS_SET(&options, allow_trailing_garbage))
951
+ yajl_config(handle, yajl_allow_trailing_garbage, SCAN_OPTION(&options, allow_trailing_garbage));
952
+ if (SCAN_OPTION_IS_SET(&options, allow_multiple_values))
953
+ yajl_config(handle, yajl_allow_multiple_values, SCAN_OPTION(&options, allow_multiple_values));
954
+ if (SCAN_OPTION_IS_SET(&options, allow_partial_values))
955
+ yajl_config(handle, yajl_allow_partial_values, SCAN_OPTION(&options, allow_partial_values));
802
956
  ctx->handle = handle;
803
957
  stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
804
- scan_ctx_update_bytes_consumed(ctx);
805
958
  if (stat == yajl_status_ok)
806
959
  {
960
+ scan_ctx_save_bytes_consumed(ctx);
807
961
  stat = yajl_complete_parse(handle);
808
- scan_ctx_update_bytes_consumed(ctx);
809
962
  }
810
963
 
811
964
  if (stat != yajl_status_ok)
812
965
  {
813
- char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
966
+ char *str = (char *)yajl_get_error(handle, SCAN_OPTION(&options, verbose_error), (unsigned char *)json_text, json_text_len);
814
967
  err_msg = rb_utf8_str_new_cstr(str);
815
- // TODO: maybe use scan_ctx_get_bytes_consumed here too? But it makes difference in premature EOF
816
- bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
968
+ bytes_consumed = ULL2NUM(scan_ctx_get_bytes_consumed(ctx));
817
969
  yajl_free_error(handle, (unsigned char *)str);
818
970
  }
819
971
  // // Needed when yajl_allow_partial_values is set
@@ -844,12 +996,16 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
844
996
  yajl_free(handle);
845
997
  if (err_msg != Qnil)
846
998
  {
847
- err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
999
+ VALUE err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
848
1000
  rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
849
1001
  rb_exc_raise(err);
850
1002
  }
851
1003
  // if (callback_err != Qnil)
852
1004
  // rb_exc_raise(callback_err);
1005
+ if (roots_info_result != Qundef)
1006
+ {
1007
+ result = rb_ary_new_from_args(2, result, roots_info_result);
1008
+ }
853
1009
  return result;
854
1010
  }
855
1011
 
@@ -857,10 +1013,16 @@ RUBY_FUNC_EXPORTED void
857
1013
  Init_json_scanner(void)
858
1014
  {
859
1015
  rb_mJsonScanner = rb_define_module("JsonScanner");
860
- rb_cJsonScannerConfig = rb_define_class_under(rb_mJsonScanner, "Config", rb_cObject);
861
- rb_define_alloc_func(rb_cJsonScannerConfig, config_alloc);
862
- rb_define_method(rb_cJsonScannerConfig, "initialize", config_m_initialize, 1);
863
- rb_define_method(rb_cJsonScannerConfig, "inspect", config_m_inspect, 0);
1016
+ rb_cJsonScannerSelector = rb_define_class_under(rb_mJsonScanner, "Selector", rb_cObject);
1017
+ rb_define_alloc_func(rb_cJsonScannerSelector, selector_alloc);
1018
+ rb_define_method(rb_cJsonScannerSelector, "initialize", selector_m_initialize, 1);
1019
+ rb_define_method(rb_cJsonScannerSelector, "inspect", selector_m_inspect, 0);
1020
+ rb_define_method(rb_cJsonScannerSelector, "length", selector_m_length, 0);
1021
+ rb_define_alias(rb_cJsonScannerSelector, "size", "length");
1022
+ rb_cJsonScannerOptions = rb_define_class_under(rb_mJsonScanner, "Options", rb_cObject);
1023
+ rb_define_alloc_func(rb_cJsonScannerOptions, options_alloc);
1024
+ rb_define_method(rb_cJsonScannerOptions, "initialize", options_m_initialize, -1);
1025
+ rb_define_method(rb_cJsonScannerOptions, "inspect", options_m_inspect, 0);
864
1026
  rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
865
1027
  any_key_sym = rb_id2sym(rb_intern("*"));
866
1028
  rb_define_const(rb_mJsonScanner, "ANY_KEY", rb_range_new(any_key_sym, any_key_sym, false));
@@ -882,4 +1044,5 @@ Init_json_scanner(void)
882
1044
  scan_kwargs_table[5] = rb_intern("allow_multiple_values");
883
1045
  scan_kwargs_table[6] = rb_intern("allow_partial_values");
884
1046
  scan_kwargs_table[7] = rb_intern("symbolize_path_keys");
1047
+ scan_kwargs_table[8] = rb_intern("with_roots_info");
885
1048
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.3.0"
4
+ VERSION = "1.0.0"
5
5
  end