json_scanner 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0af7c4c2fce9ca74ec96c00e8972088b34c99a005b8fc966d1a0e9ae7d75dcb
4
- data.tar.gz: 70f2365add4838ef7409d3ff9568ab59d1a3771ac38d6168757bf8de1de71b1d
3
+ metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
4
+ data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
5
5
  SHA512:
6
- metadata.gz: 96958c94108fafca33f68f091dcea150e549e1fa61c02aaf62790f0d2f77c8762abe7b702a4cf95b9d4e28929a3dd1ce77681bb7cc0f6e7e8fdd22d32f74f378
7
- data.tar.gz: 3da8a8713b1f1994d50ee3032d450b5d34070c843e7c2625db1f2945c5a6c1cdc223d9b957c0386562ea1fdf68d2d03c6fc985ca2f0ddda8a7f3b83ff2c19b36
6
+ metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
7
+ data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
data/README.md CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
32
  begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
33
  emoji_json.byteslice(begin_pos...end_pos)
34
34
  # => "\"😍\""
35
- emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
35
+ # Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
36
+ # with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
37
+ JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
38
+ # => "😍"
39
+ # You can also do this
40
+ # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
36
41
  # => "\"😍\""
37
42
  ```
38
43
 
@@ -1,8 +1,8 @@
1
1
  #include "json_scanner.h"
2
2
 
3
3
  VALUE rb_mJsonScanner;
4
- VALUE rb_mJsonScannerOptions;
5
4
  VALUE rb_eJsonScannerParseError;
5
+ ID scan_kwargs_table[7];
6
6
 
7
7
  VALUE null_sym;
8
8
  VALUE boolean_sym;
@@ -16,9 +16,9 @@ enum matcher_type
16
16
  MATCHER_KEY,
17
17
  MATCHER_INDEX,
18
18
  // MATCHER_ANY_KEY,
19
- // MATCHER_ANY_INDEX,
20
19
  MATCHER_INDEX_RANGE,
21
20
  // MATCHER_KEYS_LIST,
21
+ // MATCHER_KEY_REGEX,
22
22
  };
23
23
 
24
24
  enum path_type
@@ -84,18 +84,22 @@ typedef struct
84
84
  } scan_ctx;
85
85
 
86
86
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
- scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
87
+ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
88
88
  {
89
+ int path_ary_len;
90
+ scan_ctx *ctx;
91
+ paths_t *paths;
89
92
  // TODO: Allow to_ary and sized enumerables
90
93
  rb_check_type(path_ary, T_ARRAY);
91
- int path_ary_len = rb_long2int(rb_array_len(path_ary));
94
+ path_ary_len = rb_long2int(rb_array_len(path_ary));
92
95
  // Check types early before any allocations, so exception is ok
93
96
  // TODO: Fix this, just handle errors
94
97
  for (int i = 0; i < path_ary_len; i++)
95
98
  {
99
+ int path_len;
96
100
  VALUE path = rb_ary_entry(path_ary, i);
97
101
  rb_check_type(path, T_ARRAY);
98
- int path_len = rb_long2int(rb_array_len(path));
102
+ path_len = rb_long2int(rb_array_len(path));
99
103
  for (int j = 0; j < path_len; j++)
100
104
  {
101
105
  VALUE entry = rb_ary_entry(path, j);
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
113
117
  else
114
118
  {
115
119
  VALUE range_beg, range_end;
120
+ long end_val;
116
121
  int open_ended;
117
122
  if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
118
123
  rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
119
- RB_NUM2LONG(range_beg);
120
- RB_NUM2LONG(range_end);
124
+ if (RB_NUM2LONG(range_beg) < 0L)
125
+ rb_raise(rb_eArgError, "range start must be positive");
126
+ end_val = RB_NUM2LONG(range_end);
127
+ if (end_val < -1L)
128
+ rb_raise(rb_eArgError, "range end must be positive or -1");
129
+ if (end_val == -1L && open_ended)
130
+ rb_raise(rb_eArgError, "range with -1 end must be closed");
121
131
  }
122
132
  }
123
133
  }
124
134
 
125
- scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
135
+ ctx = ruby_xmalloc(sizeof(scan_ctx));
126
136
 
127
- ctx->with_path = RB_TEST(with_path);
137
+ ctx->with_path = with_path;
128
138
  ctx->max_path_len = 0;
129
139
 
130
- paths_t *paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
140
+ paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
131
141
  for (int i = 0; i < path_ary_len; i++)
132
142
  {
143
+ int path_len;
133
144
  VALUE path = rb_ary_entry(path_ary, i);
134
- int path_len = rb_long2int(rb_array_len(path));
145
+ path_len = rb_long2int(rb_array_len(path));
135
146
  if (path_len > ctx->max_path_len)
136
147
  ctx->max_path_len = path_len;
137
148
  paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
156
167
  }
157
168
  else
158
169
  {
159
- paths[i].elems[j].type = MATCHER_INDEX_RANGE;
160
170
  VALUE range_beg, range_end;
161
171
  int open_ended;
172
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
162
173
  rb_range_values(entry, &range_beg, &range_end, &open_ended);
163
174
  paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
164
175
  paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
176
+ // (value..-1) works as expected, (value...-1) is forbidden above
177
+ if (paths[i].elems[j].value.range.end == -1L)
178
+ paths[i].elems[j].value.range.end = LONG_MAX;
179
+ // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
165
180
  if (open_ended)
166
181
  paths[i].elems[j].value.range.end--;
167
182
  }
@@ -181,7 +196,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
181
196
  rb_ary_push(ctx->points_list, rb_ary_new());
182
197
  }
183
198
 
184
- ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
199
+ ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
185
200
  // ctx->rb_err = Qnil;
186
201
  ctx->handle = NULL;
187
202
 
@@ -224,10 +239,10 @@ typedef enum
224
239
  } value_type;
225
240
 
226
241
  // noexcept
227
- void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
242
+ VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
228
243
  {
229
- *point = rb_ary_new_capa(3);
230
244
  VALUE values[3];
245
+ VALUE point = rb_ary_new_capa(3);
231
246
  // noexcept
232
247
  values[1] = RB_ULONG2NUM(curr_pos);
233
248
  switch (type)
@@ -259,7 +274,31 @@ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length,
259
274
  break;
260
275
  }
261
276
  // rb_ary_cat raise only in case of a frozen array or if len is too long
262
- rb_ary_cat(*point, values, 3);
277
+ rb_ary_cat(point, values, 3);
278
+ return point;
279
+ }
280
+
281
+ // noexcept
282
+ VALUE create_path(scan_ctx *sctx)
283
+ {
284
+ VALUE path = rb_ary_new_capa(sctx->current_path_len);
285
+ for (int i = 0; i < sctx->current_path_len; i++)
286
+ {
287
+ VALUE entry;
288
+ switch (sctx->current_path[i].type)
289
+ {
290
+ case PATH_KEY:
291
+ entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
292
+ break;
293
+ case PATH_INDEX:
294
+ entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
295
+ break;
296
+ default:
297
+ entry = Qnil;
298
+ }
299
+ rb_ary_push(path, entry);
300
+ }
301
+ return path;
263
302
  }
264
303
 
265
304
  // noexcept
@@ -267,13 +306,15 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
267
306
  {
268
307
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
269
308
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
309
+ // TODO: Might fail in case of no memory
270
310
  VALUE point = Qundef;
311
+ int match;
271
312
  for (int i = 0; i < sctx->paths_len; i++)
272
313
  {
273
314
  if (sctx->paths[i].len != sctx->current_path_len)
274
315
  continue;
275
316
 
276
- int match = true;
317
+ match = true;
277
318
  for (int j = 0; j < sctx->current_path_len; j++)
278
319
  {
279
320
  switch (sctx->paths[i].elems[j].type)
@@ -303,7 +344,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
303
344
  {
304
345
  if (point == Qundef)
305
346
  {
306
- create_point(&point, sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
347
+ point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
348
+ if (sctx->with_path)
349
+ {
350
+ point = rb_ary_new_from_args(2, create_path(sctx), point);
351
+ }
307
352
  }
308
353
  // rb_ary_push raises only in case of a frozen array, which is not the case
309
354
  // rb_ary_entry is safe
@@ -366,11 +411,9 @@ int scan_on_start_object(void *ctx)
366
411
  return true;
367
412
  }
368
413
  increment_arr_index(sctx);
414
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
369
415
  if (sctx->current_path_len < sctx->max_path_len)
370
- {
371
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
372
416
  sctx->current_path[sctx->current_path_len].type = PATH_KEY;
373
- }
374
417
  sctx->current_path_len++;
375
418
  return true;
376
419
  }
@@ -383,7 +426,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
383
426
  return true;
384
427
  // Can't be called without scan_on_start_object being called before
385
428
  // So current_path_len at least 1 and key.type is set to PATH_KEY;
386
- sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *) key;
429
+ sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
387
430
  sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
388
431
  return true;
389
432
  }
@@ -393,9 +436,8 @@ int scan_on_end_object(void *ctx)
393
436
  {
394
437
  scan_ctx *sctx = (scan_ctx *)ctx;
395
438
  sctx->current_path_len--;
396
- if (sctx->current_path_len >= sctx->max_path_len)
397
- return true;
398
- save_point(sctx, object_value, 0);
439
+ if (sctx->current_path_len <= sctx->max_path_len)
440
+ save_point(sctx, object_value, 0);
399
441
  return true;
400
442
  }
401
443
 
@@ -409,9 +451,9 @@ int scan_on_start_array(void *ctx)
409
451
  return true;
410
452
  }
411
453
  increment_arr_index(sctx);
454
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
412
455
  if (sctx->current_path_len < sctx->max_path_len)
413
456
  {
414
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
415
457
  sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
416
458
  sctx->current_path[sctx->current_path_len].value.index = -1;
417
459
  }
@@ -424,9 +466,8 @@ int scan_on_end_array(void *ctx)
424
466
  {
425
467
  scan_ctx *sctx = (scan_ctx *)ctx;
426
468
  sctx->current_path_len--;
427
- if (sctx->current_path_len >= sctx->max_path_len)
428
- return true;
429
- save_point(sctx, array_value, 0);
469
+ if (sctx->current_path_len <= sctx->max_path_len)
470
+ save_point(sctx, array_value, 0);
430
471
  return true;
431
472
  }
432
473
 
@@ -443,40 +484,73 @@ static yajl_callbacks scan_callbacks = {
443
484
  scan_on_start_array,
444
485
  scan_on_end_array};
445
486
 
446
- // TODO: make with_path optional kw: `with_path: false`
447
- VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
487
+ // def scan(json_str, path_arr, opts)
488
+ // opts
489
+ // with_path: false, verbose_error: false,
490
+ // the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
491
+ // allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
492
+ VALUE scan(int argc, VALUE *argv, VALUE self)
448
493
  {
449
- rb_check_type(json_str, T_STRING);
450
- char *json_text = RSTRING_PTR(json_str);
451
- #if LONG_MAX > SIZE_MAX
452
- size_t json_text_len = RSTRING_LENINT(json_str);
453
- #else
454
- size_t json_text_len = RSTRING_LEN(json_str);
455
- #endif
494
+ VALUE json_str, path_ary, with_path_flag, kwargs;
495
+ VALUE kwargs_values[7];
496
+
497
+ int with_path = false, verbose_error = false;
498
+ char *json_text;
499
+ size_t json_text_len;
456
500
  yajl_handle handle;
457
- // TODO
458
- int opt_verbose_error = 0;
459
501
  yajl_status stat;
460
- scan_ctx *ctx = scan_ctx_init(path_ary, with_path);
461
- VALUE err = Qnil;
462
- VALUE result;
502
+ scan_ctx *ctx;
503
+ VALUE err = Qnil, result;
463
504
  // Turned out callbacks can't raise exceptions
464
505
  // VALUE callback_err;
506
+ #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
507
+ rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
508
+ #else
509
+ rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
510
+ #endif
511
+ // rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
512
+ with_path = RTEST(with_path_flag);
513
+ if (kwargs != Qnil)
514
+ {
515
+ rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
516
+ if (kwargs_values[0] != Qundef)
517
+ with_path = RTEST(kwargs_values[0]);
518
+ if (kwargs_values[1] != Qundef)
519
+ verbose_error = RTEST(kwargs_values[1]);
520
+ }
521
+ rb_check_type(json_str, T_STRING);
522
+ json_text = RSTRING_PTR(json_str);
523
+ #if LONG_MAX > SIZE_MAX
524
+ json_text_len = RSTRING_LENINT(json_str);
525
+ #else
526
+ json_text_len = RSTRING_LEN(json_str);
527
+ #endif
528
+ ctx = scan_ctx_init(path_ary, with_path);
465
529
 
466
530
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
531
+ if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
532
+ {
533
+ if (kwargs_values[2] != Qundef)
534
+ yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
535
+ if (kwargs_values[3] != Qundef)
536
+ yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
537
+ if (kwargs_values[4] != Qundef)
538
+ yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
539
+ if (kwargs_values[5] != Qundef)
540
+ yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
541
+ if (kwargs_values[6] != Qundef)
542
+ yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
543
+ }
467
544
  ctx->handle = handle;
468
- // TODO: make it configurable
469
- // yajl_config(handle, yajl_allow_comments, true);
470
- // yajl_config(handle, yajl_allow_trailing_garbage, true);
471
- stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
545
+ stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
472
546
  if (stat == yajl_status_ok)
473
547
  stat = yajl_complete_parse(handle);
474
548
 
475
549
  if (stat != yajl_status_ok)
476
550
  {
477
- char *str = (char *) yajl_get_error(handle, opt_verbose_error, (unsigned char *) json_text, json_text_len);
478
- err = rb_str_new_cstr(str);
479
- yajl_free_error(handle, (unsigned char *) str);
551
+ char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
552
+ err = rb_utf8_str_new_cstr(str);
553
+ yajl_free_error(handle, (unsigned char *)str);
480
554
  }
481
555
  // callback_err = ctx->rb_err;
482
556
  result = ctx->points_list;
@@ -494,19 +568,20 @@ RUBY_FUNC_EXPORTED void
494
568
  Init_json_scanner(void)
495
569
  {
496
570
  rb_mJsonScanner = rb_define_module("JsonScanner");
497
- rb_define_const(rb_mJsonScanner, "ALL", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
498
- rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
571
+ rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
499
572
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
500
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
501
- rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
502
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
503
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
504
- rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
505
- rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
573
+ rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
506
574
  null_sym = rb_id2sym(rb_intern("null"));
507
575
  boolean_sym = rb_id2sym(rb_intern("boolean"));
508
576
  number_sym = rb_id2sym(rb_intern("number"));
509
577
  string_sym = rb_id2sym(rb_intern("string"));
510
578
  object_sym = rb_id2sym(rb_intern("object"));
511
579
  array_sym = rb_id2sym(rb_intern("array"));
580
+ scan_kwargs_table[0] = rb_intern("with_path");
581
+ scan_kwargs_table[1] = rb_intern("verbose_error");
582
+ scan_kwargs_table[2] = rb_intern("allow_comments");
583
+ scan_kwargs_table[3] = rb_intern("dont_validate_strings");
584
+ scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
585
+ scan_kwargs_table[5] = rb_intern("allow_multiple_values");
586
+ scan_kwargs_table[6] = rb_intern("allow_partial_values");
512
587
  }
@@ -3,6 +3,7 @@
3
3
 
4
4
  #include "ruby.h"
5
5
  #include "ruby/intern.h"
6
+ #include "ruby/version.h"
6
7
  #include <yajl/yajl_parse.h>
7
8
  #include <yajl/yajl_gen.h>
8
9
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
@@ -1,37 +1,193 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative "spec_helper"
4
+ require "json"
4
5
 
5
6
  RSpec.describe JsonScanner do
6
7
  it "has a version number" do
7
- expect(described_class::VERSION).not_to be nil
8
+ expect(described_class::VERSION).not_to be_nil
8
9
  end
9
10
 
10
11
  it "scans json" do
11
- result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []], false)
12
+ result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []])
12
13
  expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
13
- expect(described_class.scan('"2"', [[]], false)).to eq([[[0, 3, :string]]])
14
+ expect(described_class.scan('"2"', [[]])).to eq([[[0, 3, :string]]])
14
15
  expect(
15
- described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]], false)
16
+ described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]]),
16
17
  ).to eq(
17
- [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
18
+ [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]],
18
19
  )
19
- expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
- [[[6, 7, :number]], [[0, 8, :object]]]
20
+ expect(described_class.scan('{"a": 1}', [["a"], []])).to eq(
21
+ [[[6, 7, :number]], [[0, 8, :object]]],
21
22
  )
23
+ end
24
+
25
+ it "works with max path len correctly" do
26
+ expect(
27
+ described_class.scan('{"a": [1]}', [[], ["a"]]),
28
+ ).to eq(
29
+ [[[0, 10, :object]], [[6, 9, :array]]],
30
+ )
31
+ expect(
32
+ described_class.scan('{"a": {"b": 1}}', [[], ["a"]]),
33
+ ).to eq(
34
+ [[[0, 15, :object]], [[6, 14, :object]]],
35
+ )
36
+ expect(described_class.scan('{"a": 1}', [[]])).to eq([[[0, 8, :object]]])
37
+ expect(described_class.scan("[[1]]", [[]])).to eq([[[0, 5, :array]]])
38
+ expect(described_class.scan("[[1]]", [[0]])).to eq([[[1, 4, :array]]])
39
+ end
40
+
41
+ it "raises on invalid json" do
22
42
  expect do
23
43
  begin
24
44
  GC.stress = true
25
45
  # TODO: investigate
26
46
  # got "munmap_chunk(): invalid pointer" in in console once after
27
47
  # JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
28
- # (last arg wasn't handled at the time)
29
- # but I don't think it's a problem of tht extension or libyajl, it happened at exit and I free everything before
48
+ # (last arg wasn't handled at the time and was intended for with_path kwarg)
49
+ # but I don't think it's a problem of the extension or libyajl, it happened at exit and I free everything before
30
50
  # `JsonScanner.scan` returns
31
- described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]], false
51
+ described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
32
52
  ensure
33
53
  GC.stress = false
34
54
  end
35
55
  end.to raise_error described_class::ParseError
36
56
  end
57
+
58
+ it "allows to select ranges" do
59
+ expect(
60
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]]),
61
+ ).to eq(
62
+ [[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]],
63
+ )
64
+ expect(
65
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]]),
66
+ ).to eq(
67
+ [[[2, 3, :number], [8, 9, :number]]],
68
+ )
69
+ end
70
+
71
+ it "allows only positive or -1 values" do
72
+ expect do
73
+ described_class.scan("[[1,2],[3,4]]", [[(0...-1)]])
74
+ end.to raise_error ArgumentError
75
+ expect do
76
+ described_class.scan("[[1,2],[3,4]]", [[(0..-2)]])
77
+ end.to raise_error ArgumentError
78
+ expect do
79
+ described_class.scan("[[1,2],[3,4]]", [[(-42..1)]])
80
+ end.to raise_error ArgumentError
81
+ end
82
+
83
+ it "allows to configure error messages" do
84
+ expect do
85
+ described_class.scan "{1}", []
86
+ end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
87
+ expect do
88
+ described_class.scan "{1}", [], verbose_error: false
89
+ end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
90
+ expect do
91
+ described_class.scan "{1}", [], verbose_error: true
92
+ end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
93
+ end
94
+
95
+ it "allows to return an actual path to the element" do
96
+ with_path_expected_res = [
97
+ # result for first mathcer, each element array of two items:
98
+ # array of path elements and 3-element array start,end,type
99
+ [[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
100
+ [
101
+ [[0, 0], [2, 3, :number]], [[0, 1], [4, 5, :number]],
102
+ [[1, 0], [8, 9, :number]], [[1, 1], [10, 11, :number]],
103
+ ],
104
+ ]
105
+ params = [
106
+ "[[1,2],[3,4]]",
107
+ [
108
+ [described_class::ANY_INDEX],
109
+ [described_class::ANY_INDEX, described_class::ANY_INDEX],
110
+ ],
111
+ ]
112
+ expect(described_class.scan(*params, with_path: true)).to eq(with_path_expected_res)
113
+ expect(described_class.scan(*params, true)).to eq(with_path_expected_res)
114
+ expect(
115
+ described_class.scan(*params, false, with_path: true),
116
+ ).to eq(with_path_expected_res)
117
+ end
118
+
119
+ it "ignores reqular flag if kwarg is given" do
120
+ expect(
121
+ described_class.scan(
122
+ "[[1,2],[3,4]]",
123
+ [
124
+ [described_class::ANY_INDEX],
125
+ [described_class::ANY_INDEX, described_class::ANY_INDEX],
126
+ ],
127
+ true, with_path: false,
128
+ ),
129
+ ).to eq(
130
+ [
131
+ # result for first mathcer, each element 3-element array start,end,type
132
+ [[1, 6, :array], [7, 12, :array]],
133
+ [
134
+ [2, 3, :number], [4, 5, :number],
135
+ [8, 9, :number], [10, 11, :number],
136
+ ],
137
+ ],
138
+ )
139
+ end
140
+
141
+ it "allows to pass config as a hash" do
142
+ expect(
143
+ described_class.scan("[1]", [[0]], { with_path: true }),
144
+ ).to eq(
145
+ [
146
+ [[[0], [1, 2, :number]]],
147
+ ],
148
+ )
149
+ end
150
+
151
+ it "allows to configure yajl" do
152
+ expect(
153
+ described_class.scan("[1]____________", [[0]], { allow_trailing_garbage: true }),
154
+ ).to eq([[[1, 2, :number]]])
155
+ expect(
156
+ described_class.scan(
157
+ '["1", {"a": /* comment */ 2}]____________', [[1, "a"]],
158
+ { allow_trailing_garbage: true, allow_comments: true },
159
+ ),
160
+ ).to eq([[[26, 27, :number]]])
161
+ expect(
162
+ described_class.scan(
163
+ '[{"a": /* comment */ 1}]_________', [[]],
164
+ { allow_comments: true, allow_trailing_garbage: true },
165
+ ),
166
+ ).to eq([[[0, 24, :array]]])
167
+ end
168
+
169
+ it "works with utf-8" do
170
+ json = '{"ルビー": ["Руби"]}'.encode(Encoding::UTF_8)
171
+ expect(described_class.scan(json, [[]])).to eq([[[0, json.bytesize, :object]]])
172
+ res = described_class.scan(json, [["ルビー", 0]])
173
+ expect(res).to eq([[[15, 25, :string]]])
174
+ elem = res.first.first
175
+ expect(JSON.parse(json.byteslice(elem[0]...elem[1]), quirks_mode: true)).to eq("Руби")
176
+ end
177
+
178
+ it "raises exceptions in utf-8" do
179
+ bad_json = '{"ルビー": ["Руби" 1]}'.encode(Encoding::UTF_8)
180
+ expect do
181
+ described_class.scan(bad_json, [[]], verbose_error: true)
182
+ # Checks encoding
183
+ end.to raise_error(described_class::ParseError, Regexp.new(Regexp.escape(bad_json)))
184
+ end
185
+
186
+ it "works with different encodings" do
187
+ # TODO: encoding validation
188
+ json = '{"a": 1}'.encode(Encoding::UTF_32LE)
189
+ expect do
190
+ described_class.scan(json, [[]])
191
+ end.to raise_error(described_class::ParseError)
192
+ end
37
193
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-15 00:00:00.000000000 Z
11
+ date: 2024-12-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem uses yajl lib to scan a json string and allows you to parse
14
14
  pieces of it
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - libyajl2, v2.1
55
55
  - libyajl-dev, v2.1
56
- rubygems_version: 3.5.7
56
+ rubygems_version: 3.4.20
57
57
  signing_key:
58
58
  specification_version: 4
59
59
  summary: Extract values from JSON without full parsing