json_scanner 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/json_scanner/extconf.rb +7 -1
- data/ext/json_scanner/json_scanner.c +252 -89
- data/lib/json_scanner/version.rb +1 -1
- data/lib/json_scanner.rb +86 -1
- metadata +17 -8
- data/README.md +0 -122
- data/spec/extensiontesttask.rb +0 -128
- data/spec/json_scanner_spec.c +0 -0
- data/spec/json_scanner_spec.rb +0 -352
- data/spec/spec_helper.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 59b0c3b3bbb9680bf3cb26983b6d3185a3af14ce45e538e953857a2a453d9391
|
4
|
+
data.tar.gz: d86a61eead87fb858fbc8bf997d6a71d578229426ba4cf36a04ecbfdbd6cf3b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 638696fd097025ce8aa23eef8095b231d7e9c51905800a6f476dcf2e13d314620302b2cc6988e32f012d08cb7986e150c5bb7369b6ba0e407f933bba0a42489a
|
7
|
+
data.tar.gz: 4a54c918f1dc4136e3b5cc93cbc94a98bc225fdb1611f8ab666f926a3834e0b45d6b12c584b762d6a3fc207af4f459df97c0614af910e3b58215b57c3ba4df86
|
data/ext/json_scanner/extconf.rb
CHANGED
@@ -7,7 +7,13 @@ require "mkmf"
|
|
7
7
|
# selectively, or entirely remove this flag.
|
8
8
|
append_cflags("-fvisibility=hidden")
|
9
9
|
|
10
|
-
|
10
|
+
idefault, ldefault = if with_config("libyajl2-gem")
|
11
|
+
require "libyajl2"
|
12
|
+
[Libyajl2.include_path, Libyajl2.opt_path]
|
13
|
+
else
|
14
|
+
["", ""]
|
15
|
+
end
|
16
|
+
dir_config("yajl", idefault, ldefault)
|
11
17
|
|
12
18
|
unless have_library("yajl") && have_header("yajl/yajl_parse.h") && have_header("yajl/yajl_gen.h")
|
13
19
|
abort "yajl library not found"
|
@@ -1,11 +1,12 @@
|
|
1
1
|
#include "json_scanner.h"
|
2
2
|
|
3
3
|
VALUE rb_mJsonScanner;
|
4
|
-
VALUE
|
4
|
+
VALUE rb_cJsonScannerSelector;
|
5
|
+
VALUE rb_cJsonScannerOptions;
|
5
6
|
VALUE rb_eJsonScannerParseError;
|
6
7
|
#define BYTES_CONSUMED "bytes_consumed"
|
7
8
|
ID rb_iv_bytes_consumed;
|
8
|
-
#define SCAN_KWARGS_SIZE
|
9
|
+
#define SCAN_KWARGS_SIZE 9
|
9
10
|
ID scan_kwargs_table[SCAN_KWARGS_SIZE];
|
10
11
|
|
11
12
|
VALUE null_sym;
|
@@ -85,6 +86,7 @@ typedef struct
|
|
85
86
|
// Easier to use a Ruby array for result than convert later
|
86
87
|
// must be supplied by the caller and RB_GC_GUARD-ed if it isn't on the stack
|
87
88
|
VALUE points_list;
|
89
|
+
VALUE roots_info_list;
|
88
90
|
// by depth
|
89
91
|
size_t *starts;
|
90
92
|
// VALUE rb_err;
|
@@ -92,12 +94,68 @@ typedef struct
|
|
92
94
|
size_t yajl_bytes_consumed;
|
93
95
|
} scan_ctx;
|
94
96
|
|
95
|
-
|
97
|
+
typedef struct
|
98
|
+
{
|
99
|
+
int with_path;
|
100
|
+
int verbose_error;
|
101
|
+
int allow_comments;
|
102
|
+
int dont_validate_strings;
|
103
|
+
int allow_trailing_garbage;
|
104
|
+
int allow_multiple_values;
|
105
|
+
int allow_partial_values;
|
106
|
+
int symbolize_path_keys;
|
107
|
+
int with_roots_info;
|
108
|
+
} scan_options;
|
109
|
+
#define SCAN_OPTION_VALUE_MASK 1
|
110
|
+
#define SCAN_OPTION_SET_MASK (1 << 1)
|
111
|
+
#define SCAN_OPTION(options, field) ((options)->field & SCAN_OPTION_VALUE_MASK)
|
112
|
+
#define SCAN_OPTION_IS_SET(options, field) ((options)->field & SCAN_OPTION_SET_MASK)
|
113
|
+
#define SCAN_OPTION_SET(options, field, value) ((options)->field = ((value) & SCAN_OPTION_VALUE_MASK) | SCAN_OPTION_SET_MASK)
|
114
|
+
#define SCAN_OPTION_FALSE(options, field) \
|
115
|
+
(!SCAN_OPTION(options, field) && ((options)->field & SCAN_OPTION_SET_MASK))
|
116
|
+
|
117
|
+
static void scan_options_init(scan_options *options, VALUE kwargs)
|
118
|
+
{
|
119
|
+
options->with_path = 0;
|
120
|
+
options->verbose_error = 0;
|
121
|
+
options->allow_comments = 0;
|
122
|
+
options->dont_validate_strings = 0;
|
123
|
+
options->allow_trailing_garbage = 0;
|
124
|
+
options->allow_multiple_values = 0;
|
125
|
+
options->allow_partial_values = 0;
|
126
|
+
options->symbolize_path_keys = 0;
|
127
|
+
options->with_roots_info = 0;
|
128
|
+
if (kwargs != Qnil)
|
129
|
+
{
|
130
|
+
VALUE kwargs_values[SCAN_KWARGS_SIZE];
|
131
|
+
rb_get_kwargs(kwargs, scan_kwargs_table, 0, SCAN_KWARGS_SIZE, kwargs_values);
|
132
|
+
if (kwargs_values[0] != Qundef)
|
133
|
+
SCAN_OPTION_SET(options, with_path, RTEST(kwargs_values[0]));
|
134
|
+
if (kwargs_values[1] != Qundef)
|
135
|
+
SCAN_OPTION_SET(options, verbose_error, RTEST(kwargs_values[1]));
|
136
|
+
if (kwargs_values[2] != Qundef)
|
137
|
+
SCAN_OPTION_SET(options, allow_comments, RTEST(kwargs_values[2]));
|
138
|
+
if (kwargs_values[3] != Qundef)
|
139
|
+
SCAN_OPTION_SET(options, dont_validate_strings, RTEST(kwargs_values[3]));
|
140
|
+
if (kwargs_values[4] != Qundef)
|
141
|
+
SCAN_OPTION_SET(options, allow_trailing_garbage, RTEST(kwargs_values[4]));
|
142
|
+
if (kwargs_values[5] != Qundef)
|
143
|
+
SCAN_OPTION_SET(options, allow_multiple_values, RTEST(kwargs_values[5]));
|
144
|
+
if (kwargs_values[6] != Qundef)
|
145
|
+
SCAN_OPTION_SET(options, allow_partial_values, RTEST(kwargs_values[6]));
|
146
|
+
if (kwargs_values[7] != Qundef)
|
147
|
+
SCAN_OPTION_SET(options, symbolize_path_keys, RTEST(kwargs_values[8]));
|
148
|
+
if (kwargs_values[8] != Qundef)
|
149
|
+
SCAN_OPTION_SET(options, with_roots_info, RTEST(kwargs_values[8]));
|
150
|
+
}
|
151
|
+
}
|
152
|
+
|
153
|
+
static inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
|
96
154
|
{
|
97
155
|
return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
|
98
156
|
}
|
99
157
|
|
100
|
-
inline void
|
158
|
+
static inline void scan_ctx_save_bytes_consumed(scan_ctx *ctx)
|
101
159
|
{
|
102
160
|
ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
|
103
161
|
}
|
@@ -175,7 +233,7 @@ void scan_ctx_debug(scan_ctx *ctx)
|
|
175
233
|
|
176
234
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
177
235
|
// path_ary must be RB_GC_GUARD-ed by the caller
|
178
|
-
VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
|
236
|
+
static VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
|
179
237
|
{
|
180
238
|
int path_ary_len;
|
181
239
|
paths_t *paths;
|
@@ -254,7 +312,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
|
|
254
312
|
if (string_keys != Qundef)
|
255
313
|
{
|
256
314
|
// If string_keys is provided, we need to duplicate the string
|
257
|
-
// to avoid use-after-free issues and to add the newly created string to the string_keys array
|
315
|
+
// to avoid use-after-free issues and to add the newly created string to the string_keys array.
|
316
|
+
// In Ruby 2.2 and newer symbols can be GC-ed, so we need to duplicate them as well.
|
258
317
|
entry = rb_str_dup(entry);
|
259
318
|
rb_ary_push(string_keys, entry);
|
260
319
|
}
|
@@ -310,8 +369,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
|
|
310
369
|
return Qundef; // no error
|
311
370
|
}
|
312
371
|
|
313
|
-
// resets temporary values in the
|
314
|
-
void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symbolize_path_keys)
|
372
|
+
// resets temporary values in the selector
|
373
|
+
static void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, VALUE roots_info_list, int with_path, int symbolize_path_keys)
|
315
374
|
{
|
316
375
|
// TODO: reset matched_depth if implemented
|
317
376
|
ctx->current_path_len = 0;
|
@@ -319,11 +378,12 @@ void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symboli
|
|
319
378
|
ctx->handle = NULL;
|
320
379
|
ctx->yajl_bytes_consumed = 0;
|
321
380
|
ctx->points_list = points_list;
|
381
|
+
ctx->roots_info_list = roots_info_list;
|
322
382
|
ctx->with_path = with_path;
|
323
383
|
ctx->symbolize_path_keys = symbolize_path_keys;
|
324
384
|
}
|
325
385
|
|
326
|
-
void scan_ctx_free(scan_ctx *ctx)
|
386
|
+
static void scan_ctx_free(scan_ctx *ctx)
|
327
387
|
{
|
328
388
|
// fprintf(stderr, "scan_ctx_free\n");
|
329
389
|
if (!ctx)
|
@@ -340,7 +400,7 @@ void scan_ctx_free(scan_ctx *ctx)
|
|
340
400
|
}
|
341
401
|
|
342
402
|
// noexcept
|
343
|
-
inline void increment_arr_index(scan_ctx *sctx)
|
403
|
+
static inline void increment_arr_index(scan_ctx *sctx)
|
344
404
|
{
|
345
405
|
// remember - any value can be root
|
346
406
|
// TODO: Maybe make current_path_len 1 shorter and get rid of -1; need to change all compares
|
@@ -361,7 +421,7 @@ typedef enum
|
|
361
421
|
} value_type;
|
362
422
|
|
363
423
|
// noexcept
|
364
|
-
VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
|
424
|
+
static VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
|
365
425
|
{
|
366
426
|
VALUE values[3], point;
|
367
427
|
size_t curr_pos = scan_ctx_get_bytes_consumed(sctx);
|
@@ -402,7 +462,7 @@ VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
|
|
402
462
|
}
|
403
463
|
|
404
464
|
// noexcept
|
405
|
-
VALUE create_path(scan_ctx *sctx)
|
465
|
+
static VALUE create_path(scan_ctx *sctx)
|
406
466
|
{
|
407
467
|
VALUE path = rb_ary_new_capa(sctx->current_path_len);
|
408
468
|
for (int i = 0; i < sctx->current_path_len; i++)
|
@@ -428,7 +488,16 @@ VALUE create_path(scan_ctx *sctx)
|
|
428
488
|
}
|
429
489
|
|
430
490
|
// noexcept
|
431
|
-
void
|
491
|
+
static inline void save_root_info(scan_ctx *sctx, VALUE type, size_t len)
|
492
|
+
{
|
493
|
+
if (sctx->roots_info_list != Qundef && sctx->current_path_len == 0)
|
494
|
+
{
|
495
|
+
rb_ary_push(sctx->roots_info_list, rb_ary_new_from_args(2, type, ULL2NUM(scan_ctx_get_bytes_consumed(sctx) - len)));
|
496
|
+
}
|
497
|
+
}
|
498
|
+
|
499
|
+
// noexcept
|
500
|
+
static void save_point(scan_ctx *sctx, value_type type, size_t length)
|
432
501
|
{
|
433
502
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
434
503
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
@@ -489,9 +558,10 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
489
558
|
}
|
490
559
|
|
491
560
|
// noexcept
|
492
|
-
int scan_on_null(void *ctx)
|
561
|
+
static int scan_on_null(void *ctx)
|
493
562
|
{
|
494
563
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
564
|
+
save_root_info(sctx, null_sym, 4);
|
495
565
|
if (sctx->current_path_len > sctx->max_path_len)
|
496
566
|
return true;
|
497
567
|
increment_arr_index(sctx);
|
@@ -500,9 +570,10 @@ int scan_on_null(void *ctx)
|
|
500
570
|
}
|
501
571
|
|
502
572
|
// noexcept
|
503
|
-
int scan_on_boolean(void *ctx, int bool_val)
|
573
|
+
static int scan_on_boolean(void *ctx, int bool_val)
|
504
574
|
{
|
505
575
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
576
|
+
save_root_info(sctx, boolean_sym, bool_val ? 4 : 5);
|
506
577
|
if (sctx->current_path_len > sctx->max_path_len)
|
507
578
|
return true;
|
508
579
|
increment_arr_index(sctx);
|
@@ -511,9 +582,10 @@ int scan_on_boolean(void *ctx, int bool_val)
|
|
511
582
|
}
|
512
583
|
|
513
584
|
// noexcept
|
514
|
-
int scan_on_number(void *ctx, const char *val, size_t len)
|
585
|
+
static int scan_on_number(void *ctx, const char *val, size_t len)
|
515
586
|
{
|
516
587
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
588
|
+
save_root_info(sctx, number_sym, len);
|
517
589
|
if (sctx->current_path_len > sctx->max_path_len)
|
518
590
|
return true;
|
519
591
|
increment_arr_index(sctx);
|
@@ -522,9 +594,10 @@ int scan_on_number(void *ctx, const char *val, size_t len)
|
|
522
594
|
}
|
523
595
|
|
524
596
|
// noexcept
|
525
|
-
int scan_on_string(void *ctx, const unsigned char *val, size_t len)
|
597
|
+
static int scan_on_string(void *ctx, const unsigned char *val, size_t len)
|
526
598
|
{
|
527
599
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
600
|
+
save_root_info(sctx, string_sym, len + 2);
|
528
601
|
if (sctx->current_path_len > sctx->max_path_len)
|
529
602
|
return true;
|
530
603
|
increment_arr_index(sctx);
|
@@ -533,9 +606,11 @@ int scan_on_string(void *ctx, const unsigned char *val, size_t len)
|
|
533
606
|
}
|
534
607
|
|
535
608
|
// noexcept
|
536
|
-
int scan_on_start_object(void *ctx)
|
609
|
+
static int scan_on_start_object(void *ctx)
|
537
610
|
{
|
538
611
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
612
|
+
// Save in the beginning in case of a partial value
|
613
|
+
save_root_info(sctx, object_sym, 1);
|
539
614
|
if (sctx->current_path_len > sctx->max_path_len)
|
540
615
|
{
|
541
616
|
sctx->current_path_len++;
|
@@ -550,7 +625,7 @@ int scan_on_start_object(void *ctx)
|
|
550
625
|
}
|
551
626
|
|
552
627
|
// noexcept
|
553
|
-
int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
628
|
+
static int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
554
629
|
{
|
555
630
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
556
631
|
if (sctx->current_path_len > sctx->max_path_len)
|
@@ -563,7 +638,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
|
563
638
|
}
|
564
639
|
|
565
640
|
// noexcept
|
566
|
-
int scan_on_end_object(void *ctx)
|
641
|
+
static int scan_on_end_object(void *ctx)
|
567
642
|
{
|
568
643
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
569
644
|
sctx->current_path_len--;
|
@@ -573,9 +648,11 @@ int scan_on_end_object(void *ctx)
|
|
573
648
|
}
|
574
649
|
|
575
650
|
// noexcept
|
576
|
-
int scan_on_start_array(void *ctx)
|
651
|
+
static int scan_on_start_array(void *ctx)
|
577
652
|
{
|
578
653
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
654
|
+
// Save in the beginning in case of a partial value
|
655
|
+
save_root_info(sctx, array_sym, 1);
|
579
656
|
if (sctx->current_path_len > sctx->max_path_len)
|
580
657
|
{
|
581
658
|
sctx->current_path_len++;
|
@@ -593,7 +670,7 @@ int scan_on_start_array(void *ctx)
|
|
593
670
|
}
|
594
671
|
|
595
672
|
// noexcept
|
596
|
-
int scan_on_end_array(void *ctx)
|
673
|
+
static int scan_on_end_array(void *ctx)
|
597
674
|
{
|
598
675
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
599
676
|
sctx->current_path_len--;
|
@@ -602,13 +679,13 @@ int scan_on_end_array(void *ctx)
|
|
602
679
|
return true;
|
603
680
|
}
|
604
681
|
|
605
|
-
void
|
682
|
+
static void selector_free(void *data)
|
606
683
|
{
|
607
684
|
scan_ctx_free((scan_ctx *)data);
|
608
685
|
ruby_xfree(data);
|
609
686
|
}
|
610
687
|
|
611
|
-
size_t
|
688
|
+
static size_t selector_size(const void *data)
|
612
689
|
{
|
613
690
|
// see ObjectSpace.memsize_of
|
614
691
|
scan_ctx *ctx = (scan_ctx *)data;
|
@@ -630,16 +707,16 @@ size_t config_size(const void *data)
|
|
630
707
|
return res;
|
631
708
|
}
|
632
709
|
|
633
|
-
static const rb_data_type_t
|
634
|
-
.wrap_struct_name = "
|
710
|
+
static const rb_data_type_t selector_type = {
|
711
|
+
.wrap_struct_name = "json_scanner_selector",
|
635
712
|
.function = {
|
636
|
-
.dfree =
|
637
|
-
.dsize =
|
713
|
+
.dfree = selector_free,
|
714
|
+
.dsize = selector_size,
|
638
715
|
},
|
639
716
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
640
717
|
};
|
641
718
|
|
642
|
-
VALUE
|
719
|
+
static VALUE selector_alloc(VALUE self)
|
643
720
|
{
|
644
721
|
scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
|
645
722
|
ctx->paths = NULL;
|
@@ -647,15 +724,15 @@ VALUE config_alloc(VALUE self)
|
|
647
724
|
ctx->current_path = NULL;
|
648
725
|
ctx->max_path_len = 0;
|
649
726
|
ctx->starts = NULL;
|
650
|
-
scan_ctx_reset(ctx, Qundef, false, false);
|
651
|
-
return TypedData_Wrap_Struct(self, &
|
727
|
+
scan_ctx_reset(ctx, Qundef, Qundef, false, false);
|
728
|
+
return TypedData_Wrap_Struct(self, &selector_type, ctx);
|
652
729
|
}
|
653
730
|
|
654
|
-
VALUE
|
731
|
+
static VALUE selector_m_initialize(VALUE self, VALUE path_ary)
|
655
732
|
{
|
656
733
|
scan_ctx *ctx;
|
657
734
|
VALUE scan_ctx_init_err, string_keys;
|
658
|
-
TypedData_Get_Struct(self, scan_ctx, &
|
735
|
+
TypedData_Get_Struct(self, scan_ctx, &selector_type, ctx);
|
659
736
|
string_keys = rb_ary_new();
|
660
737
|
scan_ctx_init_err = scan_ctx_init(ctx, path_ary, string_keys);
|
661
738
|
if (scan_ctx_init_err != Qundef)
|
@@ -666,15 +743,15 @@ VALUE config_m_initialize(VALUE self, VALUE path_ary)
|
|
666
743
|
return self;
|
667
744
|
}
|
668
745
|
|
669
|
-
VALUE
|
746
|
+
static VALUE selector_m_inspect(VALUE self)
|
670
747
|
{
|
671
748
|
scan_ctx *ctx;
|
672
749
|
VALUE res;
|
673
|
-
TypedData_Get_Struct(self, scan_ctx, &
|
750
|
+
TypedData_Get_Struct(self, scan_ctx, &selector_type, ctx);
|
674
751
|
res = rb_sprintf("#<%" PRIsVALUE " [", rb_class_name(CLASS_OF(self)));
|
675
752
|
for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
|
676
753
|
{
|
677
|
-
|
754
|
+
rb_str_buf_cat_ascii(res, "[");
|
678
755
|
for (int j = 0; j < ctx->paths[i].len; j++)
|
679
756
|
{
|
680
757
|
switch (ctx->paths[i].elems[j].type)
|
@@ -686,20 +763,92 @@ VALUE config_m_inspect(VALUE self)
|
|
686
763
|
rb_str_catf(res, "%ld", ctx->paths[i].elems[j].value.index);
|
687
764
|
break;
|
688
765
|
case MATCHER_INDEX_RANGE:
|
689
|
-
rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
|
766
|
+
rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end == LONG_MAX ? -1L : ctx->paths[i].elems[j].value.range.end);
|
690
767
|
break;
|
691
768
|
case MATCHER_ANY_KEY:
|
692
|
-
|
769
|
+
rb_str_buf_cat_ascii(res, "('*'..'*')");
|
693
770
|
break;
|
694
771
|
}
|
695
772
|
if (j < ctx->paths[i].len - 1)
|
696
|
-
|
773
|
+
rb_str_buf_cat_ascii(res, ", ");
|
697
774
|
}
|
698
|
-
|
775
|
+
rb_str_buf_cat_ascii(res, "]");
|
699
776
|
if (i < ctx->paths_len - 1)
|
700
|
-
|
777
|
+
rb_str_buf_cat_ascii(res, ", ");
|
701
778
|
}
|
702
|
-
|
779
|
+
rb_str_buf_cat_ascii(res, "]>");
|
780
|
+
return res;
|
781
|
+
}
|
782
|
+
|
783
|
+
static VALUE selector_m_length(VALUE self)
|
784
|
+
{
|
785
|
+
scan_ctx *ctx;
|
786
|
+
TypedData_Get_Struct(self, scan_ctx, &selector_type, ctx);
|
787
|
+
return INT2FIX(ctx->paths_len);
|
788
|
+
}
|
789
|
+
|
790
|
+
static size_t options_size(const void *data)
|
791
|
+
{
|
792
|
+
return sizeof(scan_options);
|
793
|
+
}
|
794
|
+
|
795
|
+
static const rb_data_type_t options_type = {
|
796
|
+
.wrap_struct_name = "json_scanner_options",
|
797
|
+
.function = {
|
798
|
+
.dfree = RUBY_DEFAULT_FREE,
|
799
|
+
.dsize = options_size,
|
800
|
+
},
|
801
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
802
|
+
};
|
803
|
+
|
804
|
+
static VALUE options_alloc(VALUE self)
|
805
|
+
{
|
806
|
+
// NOT INITIALIZED
|
807
|
+
scan_options *options;
|
808
|
+
return TypedData_Make_Struct(self, scan_options, &options_type, options);
|
809
|
+
}
|
810
|
+
|
811
|
+
static VALUE options_m_initialize(int argc, VALUE *argv, VALUE self)
|
812
|
+
{
|
813
|
+
VALUE kwargs;
|
814
|
+
scan_options *options;
|
815
|
+
TypedData_Get_Struct(self, scan_options, &options_type, options);
|
816
|
+
#if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
|
817
|
+
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "0:", &kwargs);
|
818
|
+
#else
|
819
|
+
rb_scan_args(argc, argv, "0:", &kwargs);
|
820
|
+
#endif
|
821
|
+
scan_options_init(options, kwargs);
|
822
|
+
return self;
|
823
|
+
}
|
824
|
+
|
825
|
+
static VALUE options_m_inspect(VALUE self)
|
826
|
+
{
|
827
|
+
VALUE res;
|
828
|
+
scan_options *options;
|
829
|
+
TypedData_Get_Struct(self, scan_options, &options_type, options);
|
830
|
+
res = rb_sprintf("#<%" PRIsVALUE " {", rb_class_name(CLASS_OF(self)));
|
831
|
+
if (SCAN_OPTION_IS_SET(options, with_path))
|
832
|
+
rb_str_catf(res, "with_path: %s, ", SCAN_OPTION(options, with_path) ? "true" : "false");
|
833
|
+
if (SCAN_OPTION_IS_SET(options, verbose_error))
|
834
|
+
rb_str_catf(res, "verbose_error: %s, ", SCAN_OPTION(options, verbose_error) ? "true" : "false");
|
835
|
+
if (SCAN_OPTION_IS_SET(options, allow_comments))
|
836
|
+
rb_str_catf(res, "allow_comments: %s, ", SCAN_OPTION(options, allow_comments) ? "true" : "false");
|
837
|
+
if (SCAN_OPTION_IS_SET(options, dont_validate_strings))
|
838
|
+
rb_str_catf(res, "dont_validate_strings: %s, ", SCAN_OPTION(options, dont_validate_strings) ? "true" : "false");
|
839
|
+
if (SCAN_OPTION_IS_SET(options, allow_trailing_garbage))
|
840
|
+
rb_str_catf(res, "allow_trailing_garbage: %s, ", SCAN_OPTION(options, allow_trailing_garbage) ? "true" : "false");
|
841
|
+
if (SCAN_OPTION_IS_SET(options, allow_multiple_values))
|
842
|
+
rb_str_catf(res, "allow_multiple_values: %s, ", SCAN_OPTION(options, allow_multiple_values) ? "true" : "false");
|
843
|
+
if (SCAN_OPTION_IS_SET(options, allow_partial_values))
|
844
|
+
rb_str_catf(res, "allow_partial_values: %s, ", SCAN_OPTION(options, allow_partial_values) ? "true" : "false");
|
845
|
+
if (SCAN_OPTION_IS_SET(options, symbolize_path_keys))
|
846
|
+
rb_str_catf(res, "symbolize_path_keys: %s, ", SCAN_OPTION(options, symbolize_path_keys) ? "true" : "false");
|
847
|
+
if (SCAN_OPTION_IS_SET(options, with_roots_info))
|
848
|
+
rb_str_catf(res, "with_roots_info: %s, ", SCAN_OPTION(options, with_roots_info) ? "true" : "false");
|
849
|
+
if (RSTRING_END(res)[-1] == ' ')
|
850
|
+
rb_str_resize(res, RSTRING_LEN(res) - 2);
|
851
|
+
rb_str_buf_cat_ascii(res, "}>");
|
703
852
|
return res;
|
704
853
|
}
|
705
854
|
|
@@ -718,52 +867,60 @@ static yajl_callbacks scan_callbacks = {
|
|
718
867
|
|
719
868
|
// def scan(json_str, path_arr, opts)
|
720
869
|
// opts
|
721
|
-
// with_path: false, verbose_error: false,
|
870
|
+
// with_path: false, verbose_error: false, symbolize_path_keys: false, with_roots_info: false
|
722
871
|
// the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
|
723
872
|
// allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
|
724
|
-
VALUE scan(int argc, VALUE *argv, VALUE self)
|
873
|
+
static VALUE scan(int argc, VALUE *argv, VALUE self)
|
725
874
|
{
|
726
|
-
VALUE json_str, path_ary,
|
727
|
-
|
875
|
+
VALUE json_str, path_ary, rb_options;
|
876
|
+
scan_options options;
|
728
877
|
|
729
|
-
int with_path = false, verbose_error = false, symbolize_path_keys = false;
|
730
878
|
char *json_text;
|
731
879
|
size_t json_text_len;
|
732
880
|
yajl_handle handle;
|
733
881
|
yajl_status stat;
|
734
882
|
scan_ctx *ctx;
|
735
883
|
int free_ctx = true;
|
736
|
-
VALUE err_msg = Qnil, bytes_consumed,
|
884
|
+
VALUE err_msg = Qnil, bytes_consumed = Qnil, result, roots_info_result = Qundef;
|
737
885
|
// Turned out callbacks can't raise exceptions
|
738
886
|
// VALUE callback_err;
|
739
|
-
|
740
|
-
|
741
|
-
#else
|
742
|
-
rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
743
|
-
#endif
|
887
|
+
rb_scan_args(argc, argv, "21", &json_str, &path_ary, &rb_options);
|
888
|
+
rb_check_type(json_str, T_STRING);
|
744
889
|
// rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
|
745
|
-
|
746
|
-
if (kwargs != Qnil)
|
890
|
+
switch (TYPE(rb_options))
|
747
891
|
{
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
if (
|
754
|
-
|
892
|
+
case T_HASH:
|
893
|
+
case T_NIL:
|
894
|
+
scan_options_init(&options, rb_options);
|
895
|
+
break;
|
896
|
+
case T_DATA:
|
897
|
+
if (rb_obj_is_kind_of(rb_options, rb_cJsonScannerOptions))
|
898
|
+
{
|
899
|
+
scan_options *ptr;
|
900
|
+
TypedData_Get_Struct(rb_options, scan_options, &options_type, ptr);
|
901
|
+
options = *ptr;
|
902
|
+
}
|
903
|
+
else
|
904
|
+
{
|
905
|
+
rb_raise(rb_eTypeError, "Expected a Hash or %" PRIsVALUE ", got %" PRIsVALUE, rb_cJsonScannerOptions, rb_obj_class(rb_options));
|
906
|
+
}
|
907
|
+
break;
|
908
|
+
default:
|
909
|
+
rb_raise(rb_eTypeError, "Expected a Hash or %" PRIsVALUE ", got %" PRIsVALUE, rb_cJsonScannerOptions, rb_obj_class(rb_options));
|
910
|
+
break;
|
755
911
|
}
|
756
|
-
|
912
|
+
if (SCAN_OPTION(&options, with_roots_info))
|
913
|
+
roots_info_result = rb_ary_new();
|
757
914
|
json_text = RSTRING_PTR(json_str);
|
758
915
|
#if LONG_MAX > SIZE_MAX
|
759
916
|
json_text_len = RSTRING_LENINT(json_str);
|
760
917
|
#else
|
761
918
|
json_text_len = RSTRING_LEN(json_str);
|
762
919
|
#endif
|
763
|
-
if (rb_obj_is_kind_of(path_ary,
|
920
|
+
if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerSelector))
|
764
921
|
{
|
765
922
|
free_ctx = false;
|
766
|
-
TypedData_Get_Struct(path_ary, scan_ctx, &
|
923
|
+
TypedData_Get_Struct(path_ary, scan_ctx, &selector_type, ctx);
|
767
924
|
}
|
768
925
|
else
|
769
926
|
{
|
@@ -782,38 +939,33 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
782
939
|
{
|
783
940
|
rb_ary_push(result, rb_ary_new());
|
784
941
|
}
|
785
|
-
scan_ctx_reset(ctx, result, with_path, symbolize_path_keys);
|
942
|
+
scan_ctx_reset(ctx, result, roots_info_result, SCAN_OPTION(&options, with_path), SCAN_OPTION(&options, symbolize_path_keys));
|
786
943
|
// scan_ctx_debug(ctx);
|
787
944
|
|
788
945
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
789
|
-
if (
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
if (kwargs_values[6] != Qundef)
|
800
|
-
yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
|
801
|
-
}
|
946
|
+
if (SCAN_OPTION_IS_SET(&options, allow_comments))
|
947
|
+
yajl_config(handle, yajl_allow_comments, SCAN_OPTION(&options, allow_comments));
|
948
|
+
if (SCAN_OPTION_IS_SET(&options, dont_validate_strings))
|
949
|
+
yajl_config(handle, yajl_dont_validate_strings, SCAN_OPTION(&options, dont_validate_strings));
|
950
|
+
if (SCAN_OPTION_IS_SET(&options, allow_trailing_garbage))
|
951
|
+
yajl_config(handle, yajl_allow_trailing_garbage, SCAN_OPTION(&options, allow_trailing_garbage));
|
952
|
+
if (SCAN_OPTION_IS_SET(&options, allow_multiple_values))
|
953
|
+
yajl_config(handle, yajl_allow_multiple_values, SCAN_OPTION(&options, allow_multiple_values));
|
954
|
+
if (SCAN_OPTION_IS_SET(&options, allow_partial_values))
|
955
|
+
yajl_config(handle, yajl_allow_partial_values, SCAN_OPTION(&options, allow_partial_values));
|
802
956
|
ctx->handle = handle;
|
803
957
|
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
804
|
-
scan_ctx_update_bytes_consumed(ctx);
|
805
958
|
if (stat == yajl_status_ok)
|
806
959
|
{
|
960
|
+
scan_ctx_save_bytes_consumed(ctx);
|
807
961
|
stat = yajl_complete_parse(handle);
|
808
|
-
scan_ctx_update_bytes_consumed(ctx);
|
809
962
|
}
|
810
963
|
|
811
964
|
if (stat != yajl_status_ok)
|
812
965
|
{
|
813
|
-
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
|
966
|
+
char *str = (char *)yajl_get_error(handle, SCAN_OPTION(&options, verbose_error), (unsigned char *)json_text, json_text_len);
|
814
967
|
err_msg = rb_utf8_str_new_cstr(str);
|
815
|
-
|
816
|
-
bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
|
968
|
+
bytes_consumed = ULL2NUM(scan_ctx_get_bytes_consumed(ctx));
|
817
969
|
yajl_free_error(handle, (unsigned char *)str);
|
818
970
|
}
|
819
971
|
// // Needed when yajl_allow_partial_values is set
|
@@ -844,12 +996,16 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
844
996
|
yajl_free(handle);
|
845
997
|
if (err_msg != Qnil)
|
846
998
|
{
|
847
|
-
err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
|
999
|
+
VALUE err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
|
848
1000
|
rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
|
849
1001
|
rb_exc_raise(err);
|
850
1002
|
}
|
851
1003
|
// if (callback_err != Qnil)
|
852
1004
|
// rb_exc_raise(callback_err);
|
1005
|
+
if (roots_info_result != Qundef)
|
1006
|
+
{
|
1007
|
+
result = rb_ary_new_from_args(2, result, roots_info_result);
|
1008
|
+
}
|
853
1009
|
return result;
|
854
1010
|
}
|
855
1011
|
|
@@ -857,10 +1013,16 @@ RUBY_FUNC_EXPORTED void
|
|
857
1013
|
Init_json_scanner(void)
|
858
1014
|
{
|
859
1015
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
860
|
-
|
861
|
-
rb_define_alloc_func(
|
862
|
-
rb_define_method(
|
863
|
-
rb_define_method(
|
1016
|
+
rb_cJsonScannerSelector = rb_define_class_under(rb_mJsonScanner, "Selector", rb_cObject);
|
1017
|
+
rb_define_alloc_func(rb_cJsonScannerSelector, selector_alloc);
|
1018
|
+
rb_define_method(rb_cJsonScannerSelector, "initialize", selector_m_initialize, 1);
|
1019
|
+
rb_define_method(rb_cJsonScannerSelector, "inspect", selector_m_inspect, 0);
|
1020
|
+
rb_define_method(rb_cJsonScannerSelector, "length", selector_m_length, 0);
|
1021
|
+
rb_define_alias(rb_cJsonScannerSelector, "size", "length");
|
1022
|
+
rb_cJsonScannerOptions = rb_define_class_under(rb_mJsonScanner, "Options", rb_cObject);
|
1023
|
+
rb_define_alloc_func(rb_cJsonScannerOptions, options_alloc);
|
1024
|
+
rb_define_method(rb_cJsonScannerOptions, "initialize", options_m_initialize, -1);
|
1025
|
+
rb_define_method(rb_cJsonScannerOptions, "inspect", options_m_inspect, 0);
|
864
1026
|
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
865
1027
|
any_key_sym = rb_id2sym(rb_intern("*"));
|
866
1028
|
rb_define_const(rb_mJsonScanner, "ANY_KEY", rb_range_new(any_key_sym, any_key_sym, false));
|
@@ -882,4 +1044,5 @@ Init_json_scanner(void)
|
|
882
1044
|
scan_kwargs_table[5] = rb_intern("allow_multiple_values");
|
883
1045
|
scan_kwargs_table[6] = rb_intern("allow_partial_values");
|
884
1046
|
scan_kwargs_table[7] = rb_intern("symbolize_path_keys");
|
1047
|
+
scan_kwargs_table[8] = rb_intern("with_roots_info");
|
885
1048
|
}
|
data/lib/json_scanner/version.rb
CHANGED