json_scanner 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0af7c4c2fce9ca74ec96c00e8972088b34c99a005b8fc966d1a0e9ae7d75dcb
4
- data.tar.gz: 70f2365add4838ef7409d3ff9568ab59d1a3771ac38d6168757bf8de1de71b1d
3
+ metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
4
+ data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
5
5
  SHA512:
6
- metadata.gz: 96958c94108fafca33f68f091dcea150e549e1fa61c02aaf62790f0d2f77c8762abe7b702a4cf95b9d4e28929a3dd1ce77681bb7cc0f6e7e8fdd22d32f74f378
7
- data.tar.gz: 3da8a8713b1f1994d50ee3032d450b5d34070c843e7c2625db1f2945c5a6c1cdc223d9b957c0386562ea1fdf68d2d03c6fc985ca2f0ddda8a7f3b83ff2c19b36
6
+ metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
7
+ data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
data/README.md CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
32
  begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
33
  emoji_json.byteslice(begin_pos...end_pos)
34
34
  # => "\"😍\""
35
- emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
35
+ # Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
36
+ # with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
37
+ JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
38
+ # => "😍"
39
+ # You can also do this
40
+ # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
36
41
  # => "\"😍\""
37
42
  ```
38
43
 
@@ -1,8 +1,8 @@
1
1
  #include "json_scanner.h"
2
2
 
3
3
  VALUE rb_mJsonScanner;
4
- VALUE rb_mJsonScannerOptions;
5
4
  VALUE rb_eJsonScannerParseError;
5
+ ID scan_kwargs_table[7];
6
6
 
7
7
  VALUE null_sym;
8
8
  VALUE boolean_sym;
@@ -16,9 +16,9 @@ enum matcher_type
16
16
  MATCHER_KEY,
17
17
  MATCHER_INDEX,
18
18
  // MATCHER_ANY_KEY,
19
- // MATCHER_ANY_INDEX,
20
19
  MATCHER_INDEX_RANGE,
21
20
  // MATCHER_KEYS_LIST,
21
+ // MATCHER_KEY_REGEX,
22
22
  };
23
23
 
24
24
  enum path_type
@@ -84,18 +84,22 @@ typedef struct
84
84
  } scan_ctx;
85
85
 
86
86
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
- scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
87
+ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
88
88
  {
89
+ int path_ary_len;
90
+ scan_ctx *ctx;
91
+ paths_t *paths;
89
92
  // TODO: Allow to_ary and sized enumerables
90
93
  rb_check_type(path_ary, T_ARRAY);
91
- int path_ary_len = rb_long2int(rb_array_len(path_ary));
94
+ path_ary_len = rb_long2int(rb_array_len(path_ary));
92
95
  // Check types early before any allocations, so exception is ok
93
96
  // TODO: Fix this, just handle errors
94
97
  for (int i = 0; i < path_ary_len; i++)
95
98
  {
99
+ int path_len;
96
100
  VALUE path = rb_ary_entry(path_ary, i);
97
101
  rb_check_type(path, T_ARRAY);
98
- int path_len = rb_long2int(rb_array_len(path));
102
+ path_len = rb_long2int(rb_array_len(path));
99
103
  for (int j = 0; j < path_len; j++)
100
104
  {
101
105
  VALUE entry = rb_ary_entry(path, j);
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
113
117
  else
114
118
  {
115
119
  VALUE range_beg, range_end;
120
+ long end_val;
116
121
  int open_ended;
117
122
  if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
118
123
  rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
119
- RB_NUM2LONG(range_beg);
120
- RB_NUM2LONG(range_end);
124
+ if (RB_NUM2LONG(range_beg) < 0L)
125
+ rb_raise(rb_eArgError, "range start must be positive");
126
+ end_val = RB_NUM2LONG(range_end);
127
+ if (end_val < -1L)
128
+ rb_raise(rb_eArgError, "range end must be positive or -1");
129
+ if (end_val == -1L && open_ended)
130
+ rb_raise(rb_eArgError, "range with -1 end must be closed");
121
131
  }
122
132
  }
123
133
  }
124
134
 
125
- scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
135
+ ctx = ruby_xmalloc(sizeof(scan_ctx));
126
136
 
127
- ctx->with_path = RB_TEST(with_path);
137
+ ctx->with_path = with_path;
128
138
  ctx->max_path_len = 0;
129
139
 
130
- paths_t *paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
140
+ paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
131
141
  for (int i = 0; i < path_ary_len; i++)
132
142
  {
143
+ int path_len;
133
144
  VALUE path = rb_ary_entry(path_ary, i);
134
- int path_len = rb_long2int(rb_array_len(path));
145
+ path_len = rb_long2int(rb_array_len(path));
135
146
  if (path_len > ctx->max_path_len)
136
147
  ctx->max_path_len = path_len;
137
148
  paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
156
167
  }
157
168
  else
158
169
  {
159
- paths[i].elems[j].type = MATCHER_INDEX_RANGE;
160
170
  VALUE range_beg, range_end;
161
171
  int open_ended;
172
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
162
173
  rb_range_values(entry, &range_beg, &range_end, &open_ended);
163
174
  paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
164
175
  paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
176
+ // (value..-1) works as expected, (value...-1) is forbidden above
177
+ if (paths[i].elems[j].value.range.end == -1L)
178
+ paths[i].elems[j].value.range.end = LONG_MAX;
179
+ // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
165
180
  if (open_ended)
166
181
  paths[i].elems[j].value.range.end--;
167
182
  }
@@ -181,7 +196,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
181
196
  rb_ary_push(ctx->points_list, rb_ary_new());
182
197
  }
183
198
 
184
- ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
199
+ ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
185
200
  // ctx->rb_err = Qnil;
186
201
  ctx->handle = NULL;
187
202
 
@@ -224,10 +239,10 @@ typedef enum
224
239
  } value_type;
225
240
 
226
241
  // noexcept
227
- void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
242
+ VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
228
243
  {
229
- *point = rb_ary_new_capa(3);
230
244
  VALUE values[3];
245
+ VALUE point = rb_ary_new_capa(3);
231
246
  // noexcept
232
247
  values[1] = RB_ULONG2NUM(curr_pos);
233
248
  switch (type)
@@ -259,7 +274,31 @@ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length,
259
274
  break;
260
275
  }
261
276
  // rb_ary_cat raise only in case of a frozen array or if len is too long
262
- rb_ary_cat(*point, values, 3);
277
+ rb_ary_cat(point, values, 3);
278
+ return point;
279
+ }
280
+
281
+ // noexcept
282
+ VALUE create_path(scan_ctx *sctx)
283
+ {
284
+ VALUE path = rb_ary_new_capa(sctx->current_path_len);
285
+ for (int i = 0; i < sctx->current_path_len; i++)
286
+ {
287
+ VALUE entry;
288
+ switch (sctx->current_path[i].type)
289
+ {
290
+ case PATH_KEY:
291
+ entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
292
+ break;
293
+ case PATH_INDEX:
294
+ entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
295
+ break;
296
+ default:
297
+ entry = Qnil;
298
+ }
299
+ rb_ary_push(path, entry);
300
+ }
301
+ return path;
263
302
  }
264
303
 
265
304
  // noexcept
@@ -267,13 +306,15 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
267
306
  {
268
307
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
269
308
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
309
+ // TODO: Might fail in case of no memory
270
310
  VALUE point = Qundef;
311
+ int match;
271
312
  for (int i = 0; i < sctx->paths_len; i++)
272
313
  {
273
314
  if (sctx->paths[i].len != sctx->current_path_len)
274
315
  continue;
275
316
 
276
- int match = true;
317
+ match = true;
277
318
  for (int j = 0; j < sctx->current_path_len; j++)
278
319
  {
279
320
  switch (sctx->paths[i].elems[j].type)
@@ -303,7 +344,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
303
344
  {
304
345
  if (point == Qundef)
305
346
  {
306
- create_point(&point, sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
347
+ point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
348
+ if (sctx->with_path)
349
+ {
350
+ point = rb_ary_new_from_args(2, create_path(sctx), point);
351
+ }
307
352
  }
308
353
  // rb_ary_push raises only in case of a frozen array, which is not the case
309
354
  // rb_ary_entry is safe
@@ -366,11 +411,9 @@ int scan_on_start_object(void *ctx)
366
411
  return true;
367
412
  }
368
413
  increment_arr_index(sctx);
414
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
369
415
  if (sctx->current_path_len < sctx->max_path_len)
370
- {
371
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
372
416
  sctx->current_path[sctx->current_path_len].type = PATH_KEY;
373
- }
374
417
  sctx->current_path_len++;
375
418
  return true;
376
419
  }
@@ -383,7 +426,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
383
426
  return true;
384
427
  // Can't be called without scan_on_start_object being called before
385
428
  // So current_path_len at least 1 and key.type is set to PATH_KEY;
386
- sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *) key;
429
+ sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
387
430
  sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
388
431
  return true;
389
432
  }
@@ -393,9 +436,8 @@ int scan_on_end_object(void *ctx)
393
436
  {
394
437
  scan_ctx *sctx = (scan_ctx *)ctx;
395
438
  sctx->current_path_len--;
396
- if (sctx->current_path_len >= sctx->max_path_len)
397
- return true;
398
- save_point(sctx, object_value, 0);
439
+ if (sctx->current_path_len <= sctx->max_path_len)
440
+ save_point(sctx, object_value, 0);
399
441
  return true;
400
442
  }
401
443
 
@@ -409,9 +451,9 @@ int scan_on_start_array(void *ctx)
409
451
  return true;
410
452
  }
411
453
  increment_arr_index(sctx);
454
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
412
455
  if (sctx->current_path_len < sctx->max_path_len)
413
456
  {
414
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
415
457
  sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
416
458
  sctx->current_path[sctx->current_path_len].value.index = -1;
417
459
  }
@@ -424,9 +466,8 @@ int scan_on_end_array(void *ctx)
424
466
  {
425
467
  scan_ctx *sctx = (scan_ctx *)ctx;
426
468
  sctx->current_path_len--;
427
- if (sctx->current_path_len >= sctx->max_path_len)
428
- return true;
429
- save_point(sctx, array_value, 0);
469
+ if (sctx->current_path_len <= sctx->max_path_len)
470
+ save_point(sctx, array_value, 0);
430
471
  return true;
431
472
  }
432
473
 
@@ -443,40 +484,73 @@ static yajl_callbacks scan_callbacks = {
443
484
  scan_on_start_array,
444
485
  scan_on_end_array};
445
486
 
446
- // TODO: make with_path optional kw: `with_path: false`
447
- VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
487
+ // def scan(json_str, path_arr, opts)
488
+ // opts
489
+ // with_path: false, verbose_error: false,
490
+ // the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
491
+ // allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
492
+ VALUE scan(int argc, VALUE *argv, VALUE self)
448
493
  {
449
- rb_check_type(json_str, T_STRING);
450
- char *json_text = RSTRING_PTR(json_str);
451
- #if LONG_MAX > SIZE_MAX
452
- size_t json_text_len = RSTRING_LENINT(json_str);
453
- #else
454
- size_t json_text_len = RSTRING_LEN(json_str);
455
- #endif
494
+ VALUE json_str, path_ary, with_path_flag, kwargs;
495
+ VALUE kwargs_values[7];
496
+
497
+ int with_path = false, verbose_error = false;
498
+ char *json_text;
499
+ size_t json_text_len;
456
500
  yajl_handle handle;
457
- // TODO
458
- int opt_verbose_error = 0;
459
501
  yajl_status stat;
460
- scan_ctx *ctx = scan_ctx_init(path_ary, with_path);
461
- VALUE err = Qnil;
462
- VALUE result;
502
+ scan_ctx *ctx;
503
+ VALUE err = Qnil, result;
463
504
  // Turned out callbacks can't raise exceptions
464
505
  // VALUE callback_err;
506
+ #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
507
+ rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
508
+ #else
509
+ rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
510
+ #endif
511
+ // rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
512
+ with_path = RTEST(with_path_flag);
513
+ if (kwargs != Qnil)
514
+ {
515
+ rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
516
+ if (kwargs_values[0] != Qundef)
517
+ with_path = RTEST(kwargs_values[0]);
518
+ if (kwargs_values[1] != Qundef)
519
+ verbose_error = RTEST(kwargs_values[1]);
520
+ }
521
+ rb_check_type(json_str, T_STRING);
522
+ json_text = RSTRING_PTR(json_str);
523
+ #if LONG_MAX > SIZE_MAX
524
+ json_text_len = RSTRING_LENINT(json_str);
525
+ #else
526
+ json_text_len = RSTRING_LEN(json_str);
527
+ #endif
528
+ ctx = scan_ctx_init(path_ary, with_path);
465
529
 
466
530
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
531
+ if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
532
+ {
533
+ if (kwargs_values[2] != Qundef)
534
+ yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
535
+ if (kwargs_values[3] != Qundef)
536
+ yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
537
+ if (kwargs_values[4] != Qundef)
538
+ yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
539
+ if (kwargs_values[5] != Qundef)
540
+ yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
541
+ if (kwargs_values[6] != Qundef)
542
+ yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
543
+ }
467
544
  ctx->handle = handle;
468
- // TODO: make it configurable
469
- // yajl_config(handle, yajl_allow_comments, true);
470
- // yajl_config(handle, yajl_allow_trailing_garbage, true);
471
- stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
545
+ stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
472
546
  if (stat == yajl_status_ok)
473
547
  stat = yajl_complete_parse(handle);
474
548
 
475
549
  if (stat != yajl_status_ok)
476
550
  {
477
- char *str = (char *) yajl_get_error(handle, opt_verbose_error, (unsigned char *) json_text, json_text_len);
478
- err = rb_str_new_cstr(str);
479
- yajl_free_error(handle, (unsigned char *) str);
551
+ char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
552
+ err = rb_utf8_str_new_cstr(str);
553
+ yajl_free_error(handle, (unsigned char *)str);
480
554
  }
481
555
  // callback_err = ctx->rb_err;
482
556
  result = ctx->points_list;
@@ -494,19 +568,20 @@ RUBY_FUNC_EXPORTED void
494
568
  Init_json_scanner(void)
495
569
  {
496
570
  rb_mJsonScanner = rb_define_module("JsonScanner");
497
- rb_define_const(rb_mJsonScanner, "ALL", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
498
- rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
571
+ rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
499
572
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
500
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
501
- rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
502
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
503
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
504
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
505
- rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
573
+ rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
506
574
  null_sym = rb_id2sym(rb_intern("null"));
507
575
  boolean_sym = rb_id2sym(rb_intern("boolean"));
508
576
  number_sym = rb_id2sym(rb_intern("number"));
509
577
  string_sym = rb_id2sym(rb_intern("string"));
510
578
  object_sym = rb_id2sym(rb_intern("object"));
511
579
  array_sym = rb_id2sym(rb_intern("array"));
580
+ scan_kwargs_table[0] = rb_intern("with_path");
581
+ scan_kwargs_table[1] = rb_intern("verbose_error");
582
+ scan_kwargs_table[2] = rb_intern("allow_comments");
583
+ scan_kwargs_table[3] = rb_intern("dont_validate_strings");
584
+ scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
585
+ scan_kwargs_table[5] = rb_intern("allow_multiple_values");
586
+ scan_kwargs_table[6] = rb_intern("allow_partial_values");
512
587
  }
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include "ruby.h"
5
5
  #include "ruby/intern.h"
6
+ #include "ruby/version.h"
6
7
  #include <yajl/yajl_parse.h>
7
8
  #include <yajl/yajl_gen.h>
8
9
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
@@ -1,37 +1,193 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "spec_helper"
4
+ require "json"
4
5
 
5
6
  RSpec.describe JsonScanner do
6
7
  it "has a version number" do
7
- expect(described_class::VERSION).not_to be nil
8
+ expect(described_class::VERSION).not_to be_nil
8
9
  end
9
10
 
10
11
  it "scans json" do
11
- result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []], false)
12
+ result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []])
12
13
  expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
13
- expect(described_class.scan('"2"', [[]], false)).to eq([[[0, 3, :string]]])
14
+ expect(described_class.scan('"2"', [[]])).to eq([[[0, 3, :string]]])
14
15
  expect(
15
- described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]], false)
16
+ described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]]),
16
17
  ).to eq(
17
- [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
18
+ [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]],
18
19
  )
19
- expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
- [[[6, 7, :number]], [[0, 8, :object]]]
20
+ expect(described_class.scan('{"a": 1}', [["a"], []])).to eq(
21
+ [[[6, 7, :number]], [[0, 8, :object]]],
21
22
  )
23
+ end
24
+
25
+ it "works with max path len correctly" do
26
+ expect(
27
+ described_class.scan('{"a": [1]}', [[], ["a"]]),
28
+ ).to eq(
29
+ [[[0, 10, :object]], [[6, 9, :array]]],
30
+ )
31
+ expect(
32
+ described_class.scan('{"a": {"b": 1}}', [[], ["a"]]),
33
+ ).to eq(
34
+ [[[0, 15, :object]], [[6, 14, :object]]],
35
+ )
36
+ expect(described_class.scan('{"a": 1}', [[]])).to eq([[[0, 8, :object]]])
37
+ expect(described_class.scan("[[1]]", [[]])).to eq([[[0, 5, :array]]])
38
+ expect(described_class.scan("[[1]]", [[0]])).to eq([[[1, 4, :array]]])
39
+ end
40
+
41
+ it "raises on invalid json" do
22
42
  expect do
23
43
  begin
24
44
  GC.stress = true
25
45
  # TODO: investigate
26
46
  # got "munmap_chunk(): invalid pointer" in in console once after
27
47
  # JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
28
- # (last arg wasn't handled at the time)
29
- # but I don't think it's a problem of tht extension or libyajl, it happened at exit and I free everything before
48
+ # (last arg wasn't handled at the time and was intended for with_path kwarg)
49
+ # but I don't think it's a problem of the extension or libyajl, it happened at exit and I free everything before
30
50
  # `JsonScanner.scan` returns
31
- described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]], false
51
+ described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
32
52
  ensure
33
53
  GC.stress = false
34
54
  end
35
55
  end.to raise_error described_class::ParseError
36
56
  end
57
+
58
+ it "allows to select ranges" do
59
+ expect(
60
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]]),
61
+ ).to eq(
62
+ [[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]],
63
+ )
64
+ expect(
65
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]]),
66
+ ).to eq(
67
+ [[[2, 3, :number], [8, 9, :number]]],
68
+ )
69
+ end
70
+
71
+ it "allows only positive or -1 values" do
72
+ expect do
73
+ described_class.scan("[[1,2],[3,4]]", [[(0...-1)]])
74
+ end.to raise_error ArgumentError
75
+ expect do
76
+ described_class.scan("[[1,2],[3,4]]", [[(0..-2)]])
77
+ end.to raise_error ArgumentError
78
+ expect do
79
+ described_class.scan("[[1,2],[3,4]]", [[(-42..1)]])
80
+ end.to raise_error ArgumentError
81
+ end
82
+
83
+ it "allows to configure error messages" do
84
+ expect do
85
+ described_class.scan "{1}", []
86
+ end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
87
+ expect do
88
+ described_class.scan "{1}", [], verbose_error: false
89
+ end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
90
+ expect do
91
+ described_class.scan "{1}", [], verbose_error: true
92
+ end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
93
+ end
94
+
95
+ it "allows to return an actual path to the element" do
96
+ with_path_expected_res = [
97
+ # result for first mathcer, each element array of two items:
98
+ # array of path elements and 3-element array start,end,type
99
+ [[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
100
+ [
101
+ [[0, 0], [2, 3, :number]], [[0, 1], [4, 5, :number]],
102
+ [[1, 0], [8, 9, :number]], [[1, 1], [10, 11, :number]],
103
+ ],
104
+ ]
105
+ params = [
106
+ "[[1,2],[3,4]]",
107
+ [
108
+ [described_class::ANY_INDEX],
109
+ [described_class::ANY_INDEX, described_class::ANY_INDEX],
110
+ ],
111
+ ]
112
+ expect(described_class.scan(*params, with_path: true)).to eq(with_path_expected_res)
113
+ expect(described_class.scan(*params, true)).to eq(with_path_expected_res)
114
+ expect(
115
+ described_class.scan(*params, false, with_path: true),
116
+ ).to eq(with_path_expected_res)
117
+ end
118
+
119
+ it "ignores reqular flag if kwarg is given" do
120
+ expect(
121
+ described_class.scan(
122
+ "[[1,2],[3,4]]",
123
+ [
124
+ [described_class::ANY_INDEX],
125
+ [described_class::ANY_INDEX, described_class::ANY_INDEX],
126
+ ],
127
+ true, with_path: false,
128
+ ),
129
+ ).to eq(
130
+ [
131
+ # result for first mathcer, each element 3-element array start,end,type
132
+ [[1, 6, :array], [7, 12, :array]],
133
+ [
134
+ [2, 3, :number], [4, 5, :number],
135
+ [8, 9, :number], [10, 11, :number],
136
+ ],
137
+ ],
138
+ )
139
+ end
140
+
141
+ it "allows to pass config as a hash" do
142
+ expect(
143
+ described_class.scan("[1]", [[0]], { with_path: true }),
144
+ ).to eq(
145
+ [
146
+ [[[0], [1, 2, :number]]],
147
+ ],
148
+ )
149
+ end
150
+
151
+ it "allows to configure yajl" do
152
+ expect(
153
+ described_class.scan("[1]____________", [[0]], { allow_trailing_garbage: true }),
154
+ ).to eq([[[1, 2, :number]]])
155
+ expect(
156
+ described_class.scan(
157
+ '["1", {"a": /* comment */ 2}]____________', [[1, "a"]],
158
+ { allow_trailing_garbage: true, allow_comments: true },
159
+ ),
160
+ ).to eq([[[26, 27, :number]]])
161
+ expect(
162
+ described_class.scan(
163
+ '[{"a": /* comment */ 1}]_________', [[]],
164
+ { allow_comments: true, allow_trailing_garbage: true },
165
+ ),
166
+ ).to eq([[[0, 24, :array]]])
167
+ end
168
+
169
+ it "works with utf-8" do
170
+ json = '{"ルビー": ["Руби"]}'.encode(Encoding::UTF_8)
171
+ expect(described_class.scan(json, [[]])).to eq([[[0, json.bytesize, :object]]])
172
+ res = described_class.scan(json, [["ルビー", 0]])
173
+ expect(res).to eq([[[15, 25, :string]]])
174
+ elem = res.first.first
175
+ expect(JSON.parse(json.byteslice(elem[0]...elem[1]), quirks_mode: true)).to eq("Руби")
176
+ end
177
+
178
+ it "raises exceptions in utf-8" do
179
+ bad_json = '{"ルビー": ["Руби" 1]}'.encode(Encoding::UTF_8)
180
+ expect do
181
+ described_class.scan(bad_json, [[]], verbose_error: true)
182
+ # Checks encoding
183
+ end.to raise_error(described_class::ParseError, Regexp.new(Regexp.escape(bad_json)))
184
+ end
185
+
186
+ it "works with different encodings" do
187
+ # TODO: encoding validation
188
+ json = '{"a": 1}'.encode(Encoding::UTF_32LE)
189
+ expect do
190
+ described_class.scan(json, [[]])
191
+ end.to raise_error(described_class::ParseError)
192
+ end
37
193
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-15 00:00:00.000000000 Z
11
+ date: 2024-12-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem uses yajl lib to scan a json string and allows you to parse
14
14
  pieces of it
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - libyajl2, v2.1
55
55
  - libyajl-dev, v2.1
56
- rubygems_version: 3.5.7
56
+ rubygems_version: 3.4.20
57
57
  signing_key:
58
58
  specification_version: 4
59
59
  summary: Extract values from JSON without full parsing