json_scanner 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1ffddd81459c088040c2ddee0006b1c9172e8a15aa8bab987dc59743e697f49f
4
- data.tar.gz: 73b69bbcddaaf6711b2563787c4cbde1d258bf1d28b1e694714fd782c42d4c2c
3
+ metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
4
+ data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
5
5
  SHA512:
6
- metadata.gz: 44a153e578da606f67399a09387cf26d63b1528ad9e6bb258083435202f094ebc95e3a2c895a83334eb2aeef4ea9bfbe0f0c38374ac76f5772beea6bc3910a0f
7
- data.tar.gz: ff9bcbc934fafc4857faf926bca5f5c6c82ec357107b8d6e5f2c0a7e9e622afe3a19d4b0a3f4a987d2e965432c73615203afd1c3e1ac6a52965555b20ef2f377
6
+ metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
7
+ data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
@@ -1,8 +1,8 @@
1
1
  #include "json_scanner.h"
2
2
 
3
3
  VALUE rb_mJsonScanner;
4
- VALUE rb_mJsonScannerOptions;
5
4
  VALUE rb_eJsonScannerParseError;
5
+ ID scan_kwargs_table[7];
6
6
 
7
7
  VALUE null_sym;
8
8
  VALUE boolean_sym;
@@ -84,7 +84,7 @@ typedef struct
84
84
  } scan_ctx;
85
85
 
86
86
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
- scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
87
+ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
88
88
  {
89
89
  int path_ary_len;
90
90
  scan_ctx *ctx;
@@ -134,7 +134,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
134
134
 
135
135
  ctx = ruby_xmalloc(sizeof(scan_ctx));
136
136
 
137
- ctx->with_path = RTEST(with_path);
137
+ ctx->with_path = with_path;
138
138
  ctx->max_path_len = 0;
139
139
 
140
140
  paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
@@ -196,7 +196,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
196
196
  rb_ary_push(ctx->points_list, rb_ary_new());
197
197
  }
198
198
 
199
- ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
199
+ ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
200
200
  // ctx->rb_err = Qnil;
201
201
  ctx->handle = NULL;
202
202
 
@@ -239,10 +239,10 @@ typedef enum
239
239
  } value_type;
240
240
 
241
241
  // noexcept
242
- void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
242
+ VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
243
243
  {
244
244
  VALUE values[3];
245
- *point = rb_ary_new_capa(3);
245
+ VALUE point = rb_ary_new_capa(3);
246
246
  // noexcept
247
247
  values[1] = RB_ULONG2NUM(curr_pos);
248
248
  switch (type)
@@ -274,7 +274,31 @@ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length,
274
274
  break;
275
275
  }
276
276
  // rb_ary_cat raise only in case of a frozen array or if len is too long
277
- rb_ary_cat(*point, values, 3);
277
+ rb_ary_cat(point, values, 3);
278
+ return point;
279
+ }
280
+
281
+ // noexcept
282
+ VALUE create_path(scan_ctx *sctx)
283
+ {
284
+ VALUE path = rb_ary_new_capa(sctx->current_path_len);
285
+ for (int i = 0; i < sctx->current_path_len; i++)
286
+ {
287
+ VALUE entry;
288
+ switch (sctx->current_path[i].type)
289
+ {
290
+ case PATH_KEY:
291
+ entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
292
+ break;
293
+ case PATH_INDEX:
294
+ entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
295
+ break;
296
+ default:
297
+ entry = Qnil;
298
+ }
299
+ rb_ary_push(path, entry);
300
+ }
301
+ return path;
278
302
  }
279
303
 
280
304
  // noexcept
@@ -282,6 +306,7 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
282
306
  {
283
307
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
284
308
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
309
+ // TODO: Might fail in case of no memory
285
310
  VALUE point = Qundef;
286
311
  int match;
287
312
  for (int i = 0; i < sctx->paths_len; i++)
@@ -319,7 +344,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
319
344
  {
320
345
  if (point == Qundef)
321
346
  {
322
- create_point(&point, sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
347
+ point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
348
+ if (sctx->with_path)
349
+ {
350
+ point = rb_ary_new_from_args(2, create_path(sctx), point);
351
+ }
323
352
  }
324
353
  // rb_ary_push raises only in case of a frozen array, which is not the case
325
354
  // rb_ary_entry is safe
@@ -382,11 +411,9 @@ int scan_on_start_object(void *ctx)
382
411
  return true;
383
412
  }
384
413
  increment_arr_index(sctx);
414
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
385
415
  if (sctx->current_path_len < sctx->max_path_len)
386
- {
387
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
388
416
  sctx->current_path[sctx->current_path_len].type = PATH_KEY;
389
- }
390
417
  sctx->current_path_len++;
391
418
  return true;
392
419
  }
@@ -409,9 +436,8 @@ int scan_on_end_object(void *ctx)
409
436
  {
410
437
  scan_ctx *sctx = (scan_ctx *)ctx;
411
438
  sctx->current_path_len--;
412
- if (sctx->current_path_len >= sctx->max_path_len)
413
- return true;
414
- save_point(sctx, object_value, 0);
439
+ if (sctx->current_path_len <= sctx->max_path_len)
440
+ save_point(sctx, object_value, 0);
415
441
  return true;
416
442
  }
417
443
 
@@ -425,9 +451,9 @@ int scan_on_start_array(void *ctx)
425
451
  return true;
426
452
  }
427
453
  increment_arr_index(sctx);
454
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
428
455
  if (sctx->current_path_len < sctx->max_path_len)
429
456
  {
430
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
431
457
  sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
432
458
  sctx->current_path[sctx->current_path_len].value.index = -1;
433
459
  }
@@ -440,9 +466,8 @@ int scan_on_end_array(void *ctx)
440
466
  {
441
467
  scan_ctx *sctx = (scan_ctx *)ctx;
442
468
  sctx->current_path_len--;
443
- if (sctx->current_path_len >= sctx->max_path_len)
444
- return true;
445
- save_point(sctx, array_value, 0);
469
+ if (sctx->current_path_len <= sctx->max_path_len)
470
+ save_point(sctx, array_value, 0);
446
471
  return true;
447
472
  }
448
473
 
@@ -459,9 +484,17 @@ static yajl_callbacks scan_callbacks = {
459
484
  scan_on_start_array,
460
485
  scan_on_end_array};
461
486
 
462
- // TODO: make with_path optional kw: `with_path: false`
463
- VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
487
+ // def scan(json_str, path_arr, opts)
488
+ // opts
489
+ // with_path: false, verbose_error: false,
490
+ // the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
491
+ // allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
492
+ VALUE scan(int argc, VALUE *argv, VALUE self)
464
493
  {
494
+ VALUE json_str, path_ary, with_path_flag, kwargs;
495
+ VALUE kwargs_values[7];
496
+
497
+ int with_path = false, verbose_error = false;
465
498
  char *json_text;
466
499
  size_t json_text_len;
467
500
  yajl_handle handle;
@@ -470,8 +503,21 @@ VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
470
503
  VALUE err = Qnil, result;
471
504
  // Turned out callbacks can't raise exceptions
472
505
  // VALUE callback_err;
473
- // TODO
474
- int opt_verbose_error = 0;
506
+ #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
507
+ rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
508
+ #else
509
+ rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
510
+ #endif
511
+ // rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
512
+ with_path = RTEST(with_path_flag);
513
+ if (kwargs != Qnil)
514
+ {
515
+ rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
516
+ if (kwargs_values[0] != Qundef)
517
+ with_path = RTEST(kwargs_values[0]);
518
+ if (kwargs_values[1] != Qundef)
519
+ verbose_error = RTEST(kwargs_values[1]);
520
+ }
475
521
  rb_check_type(json_str, T_STRING);
476
522
  json_text = RSTRING_PTR(json_str);
477
523
  #if LONG_MAX > SIZE_MAX
@@ -482,18 +528,28 @@ VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
482
528
  ctx = scan_ctx_init(path_ary, with_path);
483
529
 
484
530
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
531
+ if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
532
+ {
533
+ if (kwargs_values[2] != Qundef)
534
+ yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
535
+ if (kwargs_values[3] != Qundef)
536
+ yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
537
+ if (kwargs_values[4] != Qundef)
538
+ yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
539
+ if (kwargs_values[5] != Qundef)
540
+ yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
541
+ if (kwargs_values[6] != Qundef)
542
+ yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
543
+ }
485
544
  ctx->handle = handle;
486
- // TODO: make it configurable
487
- // yajl_config(handle, yajl_allow_comments, true);
488
- // yajl_config(handle, yajl_allow_trailing_garbage, true);
489
545
  stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
490
546
  if (stat == yajl_status_ok)
491
547
  stat = yajl_complete_parse(handle);
492
548
 
493
549
  if (stat != yajl_status_ok)
494
550
  {
495
- char *str = (char *)yajl_get_error(handle, opt_verbose_error, (unsigned char *)json_text, json_text_len);
496
- err = rb_str_new_cstr(str);
551
+ char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
552
+ err = rb_utf8_str_new_cstr(str);
497
553
  yajl_free_error(handle, (unsigned char *)str);
498
554
  }
499
555
  // callback_err = ctx->rb_err;
@@ -513,18 +569,19 @@ Init_json_scanner(void)
513
569
  {
514
570
  rb_mJsonScanner = rb_define_module("JsonScanner");
515
571
  rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
516
- rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
517
572
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
518
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
519
- rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
520
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
521
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
522
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
523
- rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
573
+ rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
524
574
  null_sym = rb_id2sym(rb_intern("null"));
525
575
  boolean_sym = rb_id2sym(rb_intern("boolean"));
526
576
  number_sym = rb_id2sym(rb_intern("number"));
527
577
  string_sym = rb_id2sym(rb_intern("string"));
528
578
  object_sym = rb_id2sym(rb_intern("object"));
529
579
  array_sym = rb_id2sym(rb_intern("array"));
580
+ scan_kwargs_table[0] = rb_intern("with_path");
581
+ scan_kwargs_table[1] = rb_intern("verbose_error");
582
+ scan_kwargs_table[2] = rb_intern("allow_comments");
583
+ scan_kwargs_table[3] = rb_intern("dont_validate_strings");
584
+ scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
585
+ scan_kwargs_table[5] = rb_intern("allow_multiple_values");
586
+ scan_kwargs_table[6] = rb_intern("allow_partial_values");
530
587
  }
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include "ruby.h"
5
5
  #include "ruby/intern.h"
6
+ #include "ruby/version.h"
6
7
  #include <yajl/yajl_parse.h>
7
8
  #include <yajl/yajl_gen.h>
8
9
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.1.1"
4
+ VERSION = "0.2.0"
5
5
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "spec_helper"
4
+ require "json"
4
5
 
5
6
  RSpec.describe JsonScanner do
6
7
  it "has a version number" do
@@ -8,19 +9,35 @@ RSpec.describe JsonScanner do
8
9
  end
9
10
 
10
11
  it "scans json" do
11
- result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []], false)
12
+ result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []])
12
13
  expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
13
- expect(described_class.scan('"2"', [[]], false)).to eq([[[0, 3, :string]]])
14
+ expect(described_class.scan('"2"', [[]])).to eq([[[0, 3, :string]]])
14
15
  expect(
15
- described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]], false)
16
+ described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]]),
16
17
  ).to eq(
17
- [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
18
+ [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]],
18
19
  )
19
- expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
- [[[6, 7, :number]], [[0, 8, :object]]]
20
+ expect(described_class.scan('{"a": 1}', [["a"], []])).to eq(
21
+ [[[6, 7, :number]], [[0, 8, :object]]],
21
22
  )
22
23
  end
23
24
 
25
+ it "works with max path len correctly" do
26
+ expect(
27
+ described_class.scan('{"a": [1]}', [[], ["a"]]),
28
+ ).to eq(
29
+ [[[0, 10, :object]], [[6, 9, :array]]],
30
+ )
31
+ expect(
32
+ described_class.scan('{"a": {"b": 1}}', [[], ["a"]]),
33
+ ).to eq(
34
+ [[[0, 15, :object]], [[6, 14, :object]]],
35
+ )
36
+ expect(described_class.scan('{"a": 1}', [[]])).to eq([[[0, 8, :object]]])
37
+ expect(described_class.scan("[[1]]", [[]])).to eq([[[0, 5, :array]]])
38
+ expect(described_class.scan("[[1]]", [[0]])).to eq([[[1, 4, :array]]])
39
+ end
40
+
24
41
  it "raises on invalid json" do
25
42
  expect do
26
43
  begin
@@ -28,10 +45,10 @@ RSpec.describe JsonScanner do
28
45
  # TODO: investigate
29
46
  # got "munmap_chunk(): invalid pointer" in in console once after
30
47
  # JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
31
- # (last arg wasn't handled at the time)
32
- # but I don't think it's a problem of tht extension or libyajl, it happened at exit and I free everything before
48
+ # (last arg wasn't handled at the time and was intended for with_path kwarg)
49
+ # but I don't think it's a problem of the extension or libyajl, it happened at exit and I free everything before
33
50
  # `JsonScanner.scan` returns
34
- described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]], false
51
+ described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
35
52
  ensure
36
53
  GC.stress = false
37
54
  end
@@ -40,26 +57,137 @@ RSpec.describe JsonScanner do
40
57
 
41
58
  it "allows to select ranges" do
42
59
  expect(
43
- described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]], false)
60
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]]),
44
61
  ).to eq(
45
- [[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]]
62
+ [[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]],
46
63
  )
47
64
  expect(
48
- described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]], false)
65
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]]),
49
66
  ).to eq(
50
- [[[2, 3, :number], [8, 9, :number]]]
67
+ [[[2, 3, :number], [8, 9, :number]]],
51
68
  )
52
69
  end
53
70
 
54
71
  it "allows only positive or -1 values" do
55
72
  expect do
56
- described_class.scan("[[1,2],[3,4]]", [[(0...-1)]], false)
73
+ described_class.scan("[[1,2],[3,4]]", [[(0...-1)]])
57
74
  end.to raise_error ArgumentError
58
75
  expect do
59
- described_class.scan("[[1,2],[3,4]]", [[(0..-2)]], false)
76
+ described_class.scan("[[1,2],[3,4]]", [[(0..-2)]])
60
77
  end.to raise_error ArgumentError
61
78
  expect do
62
- described_class.scan("[[1,2],[3,4]]", [[(-42..1)]], false)
79
+ described_class.scan("[[1,2],[3,4]]", [[(-42..1)]])
63
80
  end.to raise_error ArgumentError
64
81
  end
82
+
83
+ it "allows to configure error messages" do
84
+ expect do
85
+ described_class.scan "{1}", []
86
+ end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
87
+ expect do
88
+ described_class.scan "{1}", [], verbose_error: false
89
+ end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
90
+ expect do
91
+ described_class.scan "{1}", [], verbose_error: true
92
+ end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
93
+ end
94
+
95
+ it "allows to return an actual path to the element" do
96
+ with_path_expected_res = [
97
+ # result for first mathcer, each element array of two items:
98
+ # array of path elements and 3-element array start,end,type
99
+ [[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
100
+ [
101
+ [[0, 0], [2, 3, :number]], [[0, 1], [4, 5, :number]],
102
+ [[1, 0], [8, 9, :number]], [[1, 1], [10, 11, :number]],
103
+ ],
104
+ ]
105
+ params = [
106
+ "[[1,2],[3,4]]",
107
+ [
108
+ [described_class::ANY_INDEX],
109
+ [described_class::ANY_INDEX, described_class::ANY_INDEX],
110
+ ],
111
+ ]
112
+ expect(described_class.scan(*params, with_path: true)).to eq(with_path_expected_res)
113
+ expect(described_class.scan(*params, true)).to eq(with_path_expected_res)
114
+ expect(
115
+ described_class.scan(*params, false, with_path: true),
116
+ ).to eq(with_path_expected_res)
117
+ end
118
+
119
+ it "ignores reqular flag if kwarg is given" do
120
+ expect(
121
+ described_class.scan(
122
+ "[[1,2],[3,4]]",
123
+ [
124
+ [described_class::ANY_INDEX],
125
+ [described_class::ANY_INDEX, described_class::ANY_INDEX],
126
+ ],
127
+ true, with_path: false,
128
+ ),
129
+ ).to eq(
130
+ [
131
+ # result for first mathcer, each element 3-element array start,end,type
132
+ [[1, 6, :array], [7, 12, :array]],
133
+ [
134
+ [2, 3, :number], [4, 5, :number],
135
+ [8, 9, :number], [10, 11, :number],
136
+ ],
137
+ ],
138
+ )
139
+ end
140
+
141
+ it "allows to pass config as a hash" do
142
+ expect(
143
+ described_class.scan("[1]", [[0]], { with_path: true }),
144
+ ).to eq(
145
+ [
146
+ [[[0], [1, 2, :number]]],
147
+ ],
148
+ )
149
+ end
150
+
151
+ it "allows to configure yajl" do
152
+ expect(
153
+ described_class.scan("[1]____________", [[0]], { allow_trailing_garbage: true }),
154
+ ).to eq([[[1, 2, :number]]])
155
+ expect(
156
+ described_class.scan(
157
+ '["1", {"a": /* comment */ 2}]____________', [[1, "a"]],
158
+ { allow_trailing_garbage: true, allow_comments: true },
159
+ ),
160
+ ).to eq([[[26, 27, :number]]])
161
+ expect(
162
+ described_class.scan(
163
+ '[{"a": /* comment */ 1}]_________', [[]],
164
+ { allow_comments: true, allow_trailing_garbage: true },
165
+ ),
166
+ ).to eq([[[0, 24, :array]]])
167
+ end
168
+
169
+ it "works with utf-8" do
170
+ json = '{"ルビー": ["Руби"]}'.encode(Encoding::UTF_8)
171
+ expect(described_class.scan(json, [[]])).to eq([[[0, json.bytesize, :object]]])
172
+ res = described_class.scan(json, [["ルビー", 0]])
173
+ expect(res).to eq([[[15, 25, :string]]])
174
+ elem = res.first.first
175
+ expect(JSON.parse(json.byteslice(elem[0]...elem[1]), quirks_mode: true)).to eq("Руби")
176
+ end
177
+
178
+ it "raises exceptions in utf-8" do
179
+ bad_json = '{"ルビー": ["Руби" 1]}'.encode(Encoding::UTF_8)
180
+ expect do
181
+ described_class.scan(bad_json, [[]], verbose_error: true)
182
+ # Checks encoding
183
+ end.to raise_error(described_class::ParseError, Regexp.new(Regexp.escape(bad_json)))
184
+ end
185
+
186
+ it "works with different encodings" do
187
+ # TODO: encoding validation
188
+ json = '{"a": 1}'.encode(Encoding::UTF_32LE)
189
+ expect do
190
+ described_class.scan(json, [[]])
191
+ end.to raise_error(described_class::ParseError)
192
+ end
65
193
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-16 00:00:00.000000000 Z
11
+ date: 2024-12-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem uses yajl lib to scan a json string and allows you to parse
14
14
  pieces of it