json_scanner 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/json_scanner/json_scanner.c +91 -34
- data/ext/json_scanner/json_scanner.h +1 -0
- data/lib/json_scanner/version.rb +1 -1
- data/spec/json_scanner_spec.rb +144 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
|
4
|
+
data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
|
7
|
+
data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#include "json_scanner.h"
|
2
2
|
|
3
3
|
VALUE rb_mJsonScanner;
|
4
|
-
VALUE rb_mJsonScannerOptions;
|
5
4
|
VALUE rb_eJsonScannerParseError;
|
5
|
+
ID scan_kwargs_table[7];
|
6
6
|
|
7
7
|
VALUE null_sym;
|
8
8
|
VALUE boolean_sym;
|
@@ -84,7 +84,7 @@ typedef struct
|
|
84
84
|
} scan_ctx;
|
85
85
|
|
86
86
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
87
|
-
scan_ctx *scan_ctx_init(VALUE path_ary,
|
87
|
+
scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
88
88
|
{
|
89
89
|
int path_ary_len;
|
90
90
|
scan_ctx *ctx;
|
@@ -134,7 +134,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
134
134
|
|
135
135
|
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
136
136
|
|
137
|
-
ctx->with_path =
|
137
|
+
ctx->with_path = with_path;
|
138
138
|
ctx->max_path_len = 0;
|
139
139
|
|
140
140
|
paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
|
@@ -196,7 +196,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
196
196
|
rb_ary_push(ctx->points_list, rb_ary_new());
|
197
197
|
}
|
198
198
|
|
199
|
-
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
|
199
|
+
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
|
200
200
|
// ctx->rb_err = Qnil;
|
201
201
|
ctx->handle = NULL;
|
202
202
|
|
@@ -239,10 +239,10 @@ typedef enum
|
|
239
239
|
} value_type;
|
240
240
|
|
241
241
|
// noexcept
|
242
|
-
|
242
|
+
VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
|
243
243
|
{
|
244
244
|
VALUE values[3];
|
245
|
-
|
245
|
+
VALUE point = rb_ary_new_capa(3);
|
246
246
|
// noexcept
|
247
247
|
values[1] = RB_ULONG2NUM(curr_pos);
|
248
248
|
switch (type)
|
@@ -274,7 +274,31 @@ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length,
|
|
274
274
|
break;
|
275
275
|
}
|
276
276
|
// rb_ary_cat raise only in case of a frozen array or if len is too long
|
277
|
-
rb_ary_cat(
|
277
|
+
rb_ary_cat(point, values, 3);
|
278
|
+
return point;
|
279
|
+
}
|
280
|
+
|
281
|
+
// noexcept
|
282
|
+
VALUE create_path(scan_ctx *sctx)
|
283
|
+
{
|
284
|
+
VALUE path = rb_ary_new_capa(sctx->current_path_len);
|
285
|
+
for (int i = 0; i < sctx->current_path_len; i++)
|
286
|
+
{
|
287
|
+
VALUE entry;
|
288
|
+
switch (sctx->current_path[i].type)
|
289
|
+
{
|
290
|
+
case PATH_KEY:
|
291
|
+
entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
|
292
|
+
break;
|
293
|
+
case PATH_INDEX:
|
294
|
+
entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
|
295
|
+
break;
|
296
|
+
default:
|
297
|
+
entry = Qnil;
|
298
|
+
}
|
299
|
+
rb_ary_push(path, entry);
|
300
|
+
}
|
301
|
+
return path;
|
278
302
|
}
|
279
303
|
|
280
304
|
// noexcept
|
@@ -282,6 +306,7 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
282
306
|
{
|
283
307
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
284
308
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
309
|
+
// TODO: Might fail in case of no memory
|
285
310
|
VALUE point = Qundef;
|
286
311
|
int match;
|
287
312
|
for (int i = 0; i < sctx->paths_len; i++)
|
@@ -319,7 +344,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
319
344
|
{
|
320
345
|
if (point == Qundef)
|
321
346
|
{
|
322
|
-
create_point(
|
347
|
+
point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
|
348
|
+
if (sctx->with_path)
|
349
|
+
{
|
350
|
+
point = rb_ary_new_from_args(2, create_path(sctx), point);
|
351
|
+
}
|
323
352
|
}
|
324
353
|
// rb_ary_push raises only in case of a frozen array, which is not the case
|
325
354
|
// rb_ary_entry is safe
|
@@ -382,11 +411,9 @@ int scan_on_start_object(void *ctx)
|
|
382
411
|
return true;
|
383
412
|
}
|
384
413
|
increment_arr_index(sctx);
|
414
|
+
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
385
415
|
if (sctx->current_path_len < sctx->max_path_len)
|
386
|
-
{
|
387
|
-
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
388
416
|
sctx->current_path[sctx->current_path_len].type = PATH_KEY;
|
389
|
-
}
|
390
417
|
sctx->current_path_len++;
|
391
418
|
return true;
|
392
419
|
}
|
@@ -409,9 +436,8 @@ int scan_on_end_object(void *ctx)
|
|
409
436
|
{
|
410
437
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
411
438
|
sctx->current_path_len--;
|
412
|
-
if (sctx->current_path_len
|
413
|
-
|
414
|
-
save_point(sctx, object_value, 0);
|
439
|
+
if (sctx->current_path_len <= sctx->max_path_len)
|
440
|
+
save_point(sctx, object_value, 0);
|
415
441
|
return true;
|
416
442
|
}
|
417
443
|
|
@@ -425,9 +451,9 @@ int scan_on_start_array(void *ctx)
|
|
425
451
|
return true;
|
426
452
|
}
|
427
453
|
increment_arr_index(sctx);
|
454
|
+
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
428
455
|
if (sctx->current_path_len < sctx->max_path_len)
|
429
456
|
{
|
430
|
-
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
431
457
|
sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
|
432
458
|
sctx->current_path[sctx->current_path_len].value.index = -1;
|
433
459
|
}
|
@@ -440,9 +466,8 @@ int scan_on_end_array(void *ctx)
|
|
440
466
|
{
|
441
467
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
442
468
|
sctx->current_path_len--;
|
443
|
-
if (sctx->current_path_len
|
444
|
-
|
445
|
-
save_point(sctx, array_value, 0);
|
469
|
+
if (sctx->current_path_len <= sctx->max_path_len)
|
470
|
+
save_point(sctx, array_value, 0);
|
446
471
|
return true;
|
447
472
|
}
|
448
473
|
|
@@ -459,9 +484,17 @@ static yajl_callbacks scan_callbacks = {
|
|
459
484
|
scan_on_start_array,
|
460
485
|
scan_on_end_array};
|
461
486
|
|
462
|
-
//
|
463
|
-
|
487
|
+
// def scan(json_str, path_arr, opts)
|
488
|
+
// opts
|
489
|
+
// with_path: false, verbose_error: false,
|
490
|
+
// the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
|
491
|
+
// allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
|
492
|
+
VALUE scan(int argc, VALUE *argv, VALUE self)
|
464
493
|
{
|
494
|
+
VALUE json_str, path_ary, with_path_flag, kwargs;
|
495
|
+
VALUE kwargs_values[7];
|
496
|
+
|
497
|
+
int with_path = false, verbose_error = false;
|
465
498
|
char *json_text;
|
466
499
|
size_t json_text_len;
|
467
500
|
yajl_handle handle;
|
@@ -470,8 +503,21 @@ VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
|
|
470
503
|
VALUE err = Qnil, result;
|
471
504
|
// Turned out callbacks can't raise exceptions
|
472
505
|
// VALUE callback_err;
|
473
|
-
|
474
|
-
|
506
|
+
#if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
|
507
|
+
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
508
|
+
#else
|
509
|
+
rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
510
|
+
#endif
|
511
|
+
// rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
|
512
|
+
with_path = RTEST(with_path_flag);
|
513
|
+
if (kwargs != Qnil)
|
514
|
+
{
|
515
|
+
rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
|
516
|
+
if (kwargs_values[0] != Qundef)
|
517
|
+
with_path = RTEST(kwargs_values[0]);
|
518
|
+
if (kwargs_values[1] != Qundef)
|
519
|
+
verbose_error = RTEST(kwargs_values[1]);
|
520
|
+
}
|
475
521
|
rb_check_type(json_str, T_STRING);
|
476
522
|
json_text = RSTRING_PTR(json_str);
|
477
523
|
#if LONG_MAX > SIZE_MAX
|
@@ -482,18 +528,28 @@ VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
|
|
482
528
|
ctx = scan_ctx_init(path_ary, with_path);
|
483
529
|
|
484
530
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
531
|
+
if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
|
532
|
+
{
|
533
|
+
if (kwargs_values[2] != Qundef)
|
534
|
+
yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
|
535
|
+
if (kwargs_values[3] != Qundef)
|
536
|
+
yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
|
537
|
+
if (kwargs_values[4] != Qundef)
|
538
|
+
yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
|
539
|
+
if (kwargs_values[5] != Qundef)
|
540
|
+
yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
|
541
|
+
if (kwargs_values[6] != Qundef)
|
542
|
+
yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
|
543
|
+
}
|
485
544
|
ctx->handle = handle;
|
486
|
-
// TODO: make it configurable
|
487
|
-
// yajl_config(handle, yajl_allow_comments, true);
|
488
|
-
// yajl_config(handle, yajl_allow_trailing_garbage, true);
|
489
545
|
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
490
546
|
if (stat == yajl_status_ok)
|
491
547
|
stat = yajl_complete_parse(handle);
|
492
548
|
|
493
549
|
if (stat != yajl_status_ok)
|
494
550
|
{
|
495
|
-
char *str = (char *)yajl_get_error(handle,
|
496
|
-
err =
|
551
|
+
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
|
552
|
+
err = rb_utf8_str_new_cstr(str);
|
497
553
|
yajl_free_error(handle, (unsigned char *)str);
|
498
554
|
}
|
499
555
|
// callback_err = ctx->rb_err;
|
@@ -513,18 +569,19 @@ Init_json_scanner(void)
|
|
513
569
|
{
|
514
570
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
515
571
|
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
516
|
-
rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
|
517
572
|
rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
|
518
|
-
|
519
|
-
rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
|
520
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
|
521
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
|
522
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
|
523
|
-
rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
|
573
|
+
rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
|
524
574
|
null_sym = rb_id2sym(rb_intern("null"));
|
525
575
|
boolean_sym = rb_id2sym(rb_intern("boolean"));
|
526
576
|
number_sym = rb_id2sym(rb_intern("number"));
|
527
577
|
string_sym = rb_id2sym(rb_intern("string"));
|
528
578
|
object_sym = rb_id2sym(rb_intern("object"));
|
529
579
|
array_sym = rb_id2sym(rb_intern("array"));
|
580
|
+
scan_kwargs_table[0] = rb_intern("with_path");
|
581
|
+
scan_kwargs_table[1] = rb_intern("verbose_error");
|
582
|
+
scan_kwargs_table[2] = rb_intern("allow_comments");
|
583
|
+
scan_kwargs_table[3] = rb_intern("dont_validate_strings");
|
584
|
+
scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
|
585
|
+
scan_kwargs_table[5] = rb_intern("allow_multiple_values");
|
586
|
+
scan_kwargs_table[6] = rb_intern("allow_partial_values");
|
530
587
|
}
|
data/lib/json_scanner/version.rb
CHANGED
data/spec/json_scanner_spec.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "spec_helper"
|
4
|
+
require "json"
|
4
5
|
|
5
6
|
RSpec.describe JsonScanner do
|
6
7
|
it "has a version number" do
|
@@ -8,19 +9,35 @@ RSpec.describe JsonScanner do
|
|
8
9
|
end
|
9
10
|
|
10
11
|
it "scans json" do
|
11
|
-
result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []]
|
12
|
+
result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []])
|
12
13
|
expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
|
13
|
-
expect(described_class.scan('"2"', [[]]
|
14
|
+
expect(described_class.scan('"2"', [[]])).to eq([[[0, 3, :string]]])
|
14
15
|
expect(
|
15
|
-
described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]],
|
16
|
+
described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]]),
|
16
17
|
).to eq(
|
17
|
-
[[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
|
18
|
+
[[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]],
|
18
19
|
)
|
19
|
-
expect(described_class.scan('{"a": 1}', [["a"], []]
|
20
|
-
[[[6, 7, :number]], [[0, 8, :object]]]
|
20
|
+
expect(described_class.scan('{"a": 1}', [["a"], []])).to eq(
|
21
|
+
[[[6, 7, :number]], [[0, 8, :object]]],
|
21
22
|
)
|
22
23
|
end
|
23
24
|
|
25
|
+
it "works with max path len correctly" do
|
26
|
+
expect(
|
27
|
+
described_class.scan('{"a": [1]}', [[], ["a"]]),
|
28
|
+
).to eq(
|
29
|
+
[[[0, 10, :object]], [[6, 9, :array]]],
|
30
|
+
)
|
31
|
+
expect(
|
32
|
+
described_class.scan('{"a": {"b": 1}}', [[], ["a"]]),
|
33
|
+
).to eq(
|
34
|
+
[[[0, 15, :object]], [[6, 14, :object]]],
|
35
|
+
)
|
36
|
+
expect(described_class.scan('{"a": 1}', [[]])).to eq([[[0, 8, :object]]])
|
37
|
+
expect(described_class.scan("[[1]]", [[]])).to eq([[[0, 5, :array]]])
|
38
|
+
expect(described_class.scan("[[1]]", [[0]])).to eq([[[1, 4, :array]]])
|
39
|
+
end
|
40
|
+
|
24
41
|
it "raises on invalid json" do
|
25
42
|
expect do
|
26
43
|
begin
|
@@ -28,10 +45,10 @@ RSpec.describe JsonScanner do
|
|
28
45
|
# TODO: investigate
|
29
46
|
# got "munmap_chunk(): invalid pointer" in in console once after
|
30
47
|
# JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
|
31
|
-
# (last arg wasn't handled at the time)
|
32
|
-
# but I don't think it's a problem of
|
48
|
+
# (last arg wasn't handled at the time and was intended for with_path kwarg)
|
49
|
+
# but I don't think it's a problem of the extension or libyajl, it happened at exit and I free everything before
|
33
50
|
# `JsonScanner.scan` returns
|
34
|
-
described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
|
51
|
+
described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
|
35
52
|
ensure
|
36
53
|
GC.stress = false
|
37
54
|
end
|
@@ -40,26 +57,137 @@ RSpec.describe JsonScanner do
|
|
40
57
|
|
41
58
|
it "allows to select ranges" do
|
42
59
|
expect(
|
43
|
-
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]],
|
60
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]]),
|
44
61
|
).to eq(
|
45
|
-
[[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]]
|
62
|
+
[[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]],
|
46
63
|
)
|
47
64
|
expect(
|
48
|
-
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]],
|
65
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]]),
|
49
66
|
).to eq(
|
50
|
-
[[[2, 3, :number], [8, 9, :number]]]
|
67
|
+
[[[2, 3, :number], [8, 9, :number]]],
|
51
68
|
)
|
52
69
|
end
|
53
70
|
|
54
71
|
it "allows only positive or -1 values" do
|
55
72
|
expect do
|
56
|
-
described_class.scan("[[1,2],[3,4]]", [[(0...-1)]]
|
73
|
+
described_class.scan("[[1,2],[3,4]]", [[(0...-1)]])
|
57
74
|
end.to raise_error ArgumentError
|
58
75
|
expect do
|
59
|
-
described_class.scan("[[1,2],[3,4]]", [[(0..-2)]]
|
76
|
+
described_class.scan("[[1,2],[3,4]]", [[(0..-2)]])
|
60
77
|
end.to raise_error ArgumentError
|
61
78
|
expect do
|
62
|
-
described_class.scan("[[1,2],[3,4]]", [[(-42..1)]]
|
79
|
+
described_class.scan("[[1,2],[3,4]]", [[(-42..1)]])
|
63
80
|
end.to raise_error ArgumentError
|
64
81
|
end
|
82
|
+
|
83
|
+
it "allows to configure error messages" do
|
84
|
+
expect do
|
85
|
+
described_class.scan "{1}", []
|
86
|
+
end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
|
87
|
+
expect do
|
88
|
+
described_class.scan "{1}", [], verbose_error: false
|
89
|
+
end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
|
90
|
+
expect do
|
91
|
+
described_class.scan "{1}", [], verbose_error: true
|
92
|
+
end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
|
93
|
+
end
|
94
|
+
|
95
|
+
it "allows to return an actual path to the element" do
|
96
|
+
with_path_expected_res = [
|
97
|
+
# result for first mathcer, each element array of two items:
|
98
|
+
# array of path elements and 3-element array start,end,type
|
99
|
+
[[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
|
100
|
+
[
|
101
|
+
[[0, 0], [2, 3, :number]], [[0, 1], [4, 5, :number]],
|
102
|
+
[[1, 0], [8, 9, :number]], [[1, 1], [10, 11, :number]],
|
103
|
+
],
|
104
|
+
]
|
105
|
+
params = [
|
106
|
+
"[[1,2],[3,4]]",
|
107
|
+
[
|
108
|
+
[described_class::ANY_INDEX],
|
109
|
+
[described_class::ANY_INDEX, described_class::ANY_INDEX],
|
110
|
+
],
|
111
|
+
]
|
112
|
+
expect(described_class.scan(*params, with_path: true)).to eq(with_path_expected_res)
|
113
|
+
expect(described_class.scan(*params, true)).to eq(with_path_expected_res)
|
114
|
+
expect(
|
115
|
+
described_class.scan(*params, false, with_path: true),
|
116
|
+
).to eq(with_path_expected_res)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "ignores reqular flag if kwarg is given" do
|
120
|
+
expect(
|
121
|
+
described_class.scan(
|
122
|
+
"[[1,2],[3,4]]",
|
123
|
+
[
|
124
|
+
[described_class::ANY_INDEX],
|
125
|
+
[described_class::ANY_INDEX, described_class::ANY_INDEX],
|
126
|
+
],
|
127
|
+
true, with_path: false,
|
128
|
+
),
|
129
|
+
).to eq(
|
130
|
+
[
|
131
|
+
# result for first mathcer, each element 3-element array start,end,type
|
132
|
+
[[1, 6, :array], [7, 12, :array]],
|
133
|
+
[
|
134
|
+
[2, 3, :number], [4, 5, :number],
|
135
|
+
[8, 9, :number], [10, 11, :number],
|
136
|
+
],
|
137
|
+
],
|
138
|
+
)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "allows to pass config as a hash" do
|
142
|
+
expect(
|
143
|
+
described_class.scan("[1]", [[0]], { with_path: true }),
|
144
|
+
).to eq(
|
145
|
+
[
|
146
|
+
[[[0], [1, 2, :number]]],
|
147
|
+
],
|
148
|
+
)
|
149
|
+
end
|
150
|
+
|
151
|
+
it "allows to configure yajl" do
|
152
|
+
expect(
|
153
|
+
described_class.scan("[1]____________", [[0]], { allow_trailing_garbage: true }),
|
154
|
+
).to eq([[[1, 2, :number]]])
|
155
|
+
expect(
|
156
|
+
described_class.scan(
|
157
|
+
'["1", {"a": /* comment */ 2}]____________', [[1, "a"]],
|
158
|
+
{ allow_trailing_garbage: true, allow_comments: true },
|
159
|
+
),
|
160
|
+
).to eq([[[26, 27, :number]]])
|
161
|
+
expect(
|
162
|
+
described_class.scan(
|
163
|
+
'[{"a": /* comment */ 1}]_________', [[]],
|
164
|
+
{ allow_comments: true, allow_trailing_garbage: true },
|
165
|
+
),
|
166
|
+
).to eq([[[0, 24, :array]]])
|
167
|
+
end
|
168
|
+
|
169
|
+
it "works with utf-8" do
|
170
|
+
json = '{"ルビー": ["Руби"]}'.encode(Encoding::UTF_8)
|
171
|
+
expect(described_class.scan(json, [[]])).to eq([[[0, json.bytesize, :object]]])
|
172
|
+
res = described_class.scan(json, [["ルビー", 0]])
|
173
|
+
expect(res).to eq([[[15, 25, :string]]])
|
174
|
+
elem = res.first.first
|
175
|
+
expect(JSON.parse(json.byteslice(elem[0]...elem[1]), quirks_mode: true)).to eq("Руби")
|
176
|
+
end
|
177
|
+
|
178
|
+
it "raises exceptions in utf-8" do
|
179
|
+
bad_json = '{"ルビー": ["Руби" 1]}'.encode(Encoding::UTF_8)
|
180
|
+
expect do
|
181
|
+
described_class.scan(bad_json, [[]], verbose_error: true)
|
182
|
+
# Checks encoding
|
183
|
+
end.to raise_error(described_class::ParseError, Regexp.new(Regexp.escape(bad_json)))
|
184
|
+
end
|
185
|
+
|
186
|
+
it "works with different encodings" do
|
187
|
+
# TODO: encoding validation
|
188
|
+
json = '{"a": 1}'.encode(Encoding::UTF_32LE)
|
189
|
+
expect do
|
190
|
+
described_class.scan(json, [[]])
|
191
|
+
end.to raise_error(described_class::ParseError)
|
192
|
+
end
|
65
193
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This gem uses yajl lib to scan a json string and allows you to parse
|
14
14
|
pieces of it
|