json_scanner 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/ext/json_scanner/json_scanner.c +133 -58
- data/ext/json_scanner/json_scanner.h +1 -0
- data/lib/json_scanner/version.rb +1 -1
- data/spec/json_scanner_spec.rb +166 -10
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
|
4
|
+
data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
|
7
|
+
data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
|
data/README.md
CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
|
|
32
32
|
begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
|
33
33
|
emoji_json.byteslice(begin_pos...end_pos)
|
34
34
|
# => "\"😍\""
|
35
|
-
|
35
|
+
# Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
|
36
|
+
# with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
|
37
|
+
JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
38
|
+
# => "😍"
|
39
|
+
# You can also do this
|
40
|
+
# emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
|
36
41
|
# => "\"😍\""
|
37
42
|
```
|
38
43
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#include "json_scanner.h"
|
2
2
|
|
3
3
|
VALUE rb_mJsonScanner;
|
4
|
-
VALUE rb_mJsonScannerOptions;
|
5
4
|
VALUE rb_eJsonScannerParseError;
|
5
|
+
ID scan_kwargs_table[7];
|
6
6
|
|
7
7
|
VALUE null_sym;
|
8
8
|
VALUE boolean_sym;
|
@@ -16,9 +16,9 @@ enum matcher_type
|
|
16
16
|
MATCHER_KEY,
|
17
17
|
MATCHER_INDEX,
|
18
18
|
// MATCHER_ANY_KEY,
|
19
|
-
// MATCHER_ANY_INDEX,
|
20
19
|
MATCHER_INDEX_RANGE,
|
21
20
|
// MATCHER_KEYS_LIST,
|
21
|
+
// MATCHER_KEY_REGEX,
|
22
22
|
};
|
23
23
|
|
24
24
|
enum path_type
|
@@ -84,18 +84,22 @@ typedef struct
|
|
84
84
|
} scan_ctx;
|
85
85
|
|
86
86
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
87
|
-
scan_ctx *scan_ctx_init(VALUE path_ary,
|
87
|
+
scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
88
88
|
{
|
89
|
+
int path_ary_len;
|
90
|
+
scan_ctx *ctx;
|
91
|
+
paths_t *paths;
|
89
92
|
// TODO: Allow to_ary and sized enumerables
|
90
93
|
rb_check_type(path_ary, T_ARRAY);
|
91
|
-
|
94
|
+
path_ary_len = rb_long2int(rb_array_len(path_ary));
|
92
95
|
// Check types early before any allocations, so exception is ok
|
93
96
|
// TODO: Fix this, just handle errors
|
94
97
|
for (int i = 0; i < path_ary_len; i++)
|
95
98
|
{
|
99
|
+
int path_len;
|
96
100
|
VALUE path = rb_ary_entry(path_ary, i);
|
97
101
|
rb_check_type(path, T_ARRAY);
|
98
|
-
|
102
|
+
path_len = rb_long2int(rb_array_len(path));
|
99
103
|
for (int j = 0; j < path_len; j++)
|
100
104
|
{
|
101
105
|
VALUE entry = rb_ary_entry(path, j);
|
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
113
117
|
else
|
114
118
|
{
|
115
119
|
VALUE range_beg, range_end;
|
120
|
+
long end_val;
|
116
121
|
int open_ended;
|
117
122
|
if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
|
118
123
|
rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
|
119
|
-
RB_NUM2LONG(range_beg)
|
120
|
-
|
124
|
+
if (RB_NUM2LONG(range_beg) < 0L)
|
125
|
+
rb_raise(rb_eArgError, "range start must be positive");
|
126
|
+
end_val = RB_NUM2LONG(range_end);
|
127
|
+
if (end_val < -1L)
|
128
|
+
rb_raise(rb_eArgError, "range end must be positive or -1");
|
129
|
+
if (end_val == -1L && open_ended)
|
130
|
+
rb_raise(rb_eArgError, "range with -1 end must be closed");
|
121
131
|
}
|
122
132
|
}
|
123
133
|
}
|
124
134
|
|
125
|
-
|
135
|
+
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
126
136
|
|
127
|
-
ctx->with_path =
|
137
|
+
ctx->with_path = with_path;
|
128
138
|
ctx->max_path_len = 0;
|
129
139
|
|
130
|
-
|
140
|
+
paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
|
131
141
|
for (int i = 0; i < path_ary_len; i++)
|
132
142
|
{
|
143
|
+
int path_len;
|
133
144
|
VALUE path = rb_ary_entry(path_ary, i);
|
134
|
-
|
145
|
+
path_len = rb_long2int(rb_array_len(path));
|
135
146
|
if (path_len > ctx->max_path_len)
|
136
147
|
ctx->max_path_len = path_len;
|
137
148
|
paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
|
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
156
167
|
}
|
157
168
|
else
|
158
169
|
{
|
159
|
-
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
160
170
|
VALUE range_beg, range_end;
|
161
171
|
int open_ended;
|
172
|
+
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
162
173
|
rb_range_values(entry, &range_beg, &range_end, &open_ended);
|
163
174
|
paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
|
164
175
|
paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
|
176
|
+
// (value..-1) works as expected, (value...-1) is forbidden above
|
177
|
+
if (paths[i].elems[j].value.range.end == -1L)
|
178
|
+
paths[i].elems[j].value.range.end = LONG_MAX;
|
179
|
+
// -1 here is fine, so, (0...0) works just as expected - doesn't match anything
|
165
180
|
if (open_ended)
|
166
181
|
paths[i].elems[j].value.range.end--;
|
167
182
|
}
|
@@ -181,7 +196,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
181
196
|
rb_ary_push(ctx->points_list, rb_ary_new());
|
182
197
|
}
|
183
198
|
|
184
|
-
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
|
199
|
+
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
|
185
200
|
// ctx->rb_err = Qnil;
|
186
201
|
ctx->handle = NULL;
|
187
202
|
|
@@ -224,10 +239,10 @@ typedef enum
|
|
224
239
|
} value_type;
|
225
240
|
|
226
241
|
// noexcept
|
227
|
-
|
242
|
+
VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
|
228
243
|
{
|
229
|
-
*point = rb_ary_new_capa(3);
|
230
244
|
VALUE values[3];
|
245
|
+
VALUE point = rb_ary_new_capa(3);
|
231
246
|
// noexcept
|
232
247
|
values[1] = RB_ULONG2NUM(curr_pos);
|
233
248
|
switch (type)
|
@@ -259,7 +274,31 @@ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length,
|
|
259
274
|
break;
|
260
275
|
}
|
261
276
|
// rb_ary_cat raise only in case of a frozen array or if len is too long
|
262
|
-
rb_ary_cat(
|
277
|
+
rb_ary_cat(point, values, 3);
|
278
|
+
return point;
|
279
|
+
}
|
280
|
+
|
281
|
+
// noexcept
|
282
|
+
VALUE create_path(scan_ctx *sctx)
|
283
|
+
{
|
284
|
+
VALUE path = rb_ary_new_capa(sctx->current_path_len);
|
285
|
+
for (int i = 0; i < sctx->current_path_len; i++)
|
286
|
+
{
|
287
|
+
VALUE entry;
|
288
|
+
switch (sctx->current_path[i].type)
|
289
|
+
{
|
290
|
+
case PATH_KEY:
|
291
|
+
entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
|
292
|
+
break;
|
293
|
+
case PATH_INDEX:
|
294
|
+
entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
|
295
|
+
break;
|
296
|
+
default:
|
297
|
+
entry = Qnil;
|
298
|
+
}
|
299
|
+
rb_ary_push(path, entry);
|
300
|
+
}
|
301
|
+
return path;
|
263
302
|
}
|
264
303
|
|
265
304
|
// noexcept
|
@@ -267,13 +306,15 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
267
306
|
{
|
268
307
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
269
308
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
309
|
+
// TODO: Might fail in case of no memory
|
270
310
|
VALUE point = Qundef;
|
311
|
+
int match;
|
271
312
|
for (int i = 0; i < sctx->paths_len; i++)
|
272
313
|
{
|
273
314
|
if (sctx->paths[i].len != sctx->current_path_len)
|
274
315
|
continue;
|
275
316
|
|
276
|
-
|
317
|
+
match = true;
|
277
318
|
for (int j = 0; j < sctx->current_path_len; j++)
|
278
319
|
{
|
279
320
|
switch (sctx->paths[i].elems[j].type)
|
@@ -303,7 +344,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
303
344
|
{
|
304
345
|
if (point == Qundef)
|
305
346
|
{
|
306
|
-
create_point(
|
347
|
+
point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
|
348
|
+
if (sctx->with_path)
|
349
|
+
{
|
350
|
+
point = rb_ary_new_from_args(2, create_path(sctx), point);
|
351
|
+
}
|
307
352
|
}
|
308
353
|
// rb_ary_push raises only in case of a frozen array, which is not the case
|
309
354
|
// rb_ary_entry is safe
|
@@ -366,11 +411,9 @@ int scan_on_start_object(void *ctx)
|
|
366
411
|
return true;
|
367
412
|
}
|
368
413
|
increment_arr_index(sctx);
|
414
|
+
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
369
415
|
if (sctx->current_path_len < sctx->max_path_len)
|
370
|
-
{
|
371
|
-
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
372
416
|
sctx->current_path[sctx->current_path_len].type = PATH_KEY;
|
373
|
-
}
|
374
417
|
sctx->current_path_len++;
|
375
418
|
return true;
|
376
419
|
}
|
@@ -383,7 +426,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
|
383
426
|
return true;
|
384
427
|
// Can't be called without scan_on_start_object being called before
|
385
428
|
// So current_path_len at least 1 and key.type is set to PATH_KEY;
|
386
|
-
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)
|
429
|
+
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
|
387
430
|
sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
|
388
431
|
return true;
|
389
432
|
}
|
@@ -393,9 +436,8 @@ int scan_on_end_object(void *ctx)
|
|
393
436
|
{
|
394
437
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
395
438
|
sctx->current_path_len--;
|
396
|
-
if (sctx->current_path_len
|
397
|
-
|
398
|
-
save_point(sctx, object_value, 0);
|
439
|
+
if (sctx->current_path_len <= sctx->max_path_len)
|
440
|
+
save_point(sctx, object_value, 0);
|
399
441
|
return true;
|
400
442
|
}
|
401
443
|
|
@@ -409,9 +451,9 @@ int scan_on_start_array(void *ctx)
|
|
409
451
|
return true;
|
410
452
|
}
|
411
453
|
increment_arr_index(sctx);
|
454
|
+
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
412
455
|
if (sctx->current_path_len < sctx->max_path_len)
|
413
456
|
{
|
414
|
-
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
415
457
|
sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
|
416
458
|
sctx->current_path[sctx->current_path_len].value.index = -1;
|
417
459
|
}
|
@@ -424,9 +466,8 @@ int scan_on_end_array(void *ctx)
|
|
424
466
|
{
|
425
467
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
426
468
|
sctx->current_path_len--;
|
427
|
-
if (sctx->current_path_len
|
428
|
-
|
429
|
-
save_point(sctx, array_value, 0);
|
469
|
+
if (sctx->current_path_len <= sctx->max_path_len)
|
470
|
+
save_point(sctx, array_value, 0);
|
430
471
|
return true;
|
431
472
|
}
|
432
473
|
|
@@ -443,40 +484,73 @@ static yajl_callbacks scan_callbacks = {
|
|
443
484
|
scan_on_start_array,
|
444
485
|
scan_on_end_array};
|
445
486
|
|
446
|
-
//
|
447
|
-
|
487
|
+
// def scan(json_str, path_arr, opts)
|
488
|
+
// opts
|
489
|
+
// with_path: false, verbose_error: false,
|
490
|
+
// the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
|
491
|
+
// allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
|
492
|
+
VALUE scan(int argc, VALUE *argv, VALUE self)
|
448
493
|
{
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
size_t json_text_len
|
455
|
-
#endif
|
494
|
+
VALUE json_str, path_ary, with_path_flag, kwargs;
|
495
|
+
VALUE kwargs_values[7];
|
496
|
+
|
497
|
+
int with_path = false, verbose_error = false;
|
498
|
+
char *json_text;
|
499
|
+
size_t json_text_len;
|
456
500
|
yajl_handle handle;
|
457
|
-
// TODO
|
458
|
-
int opt_verbose_error = 0;
|
459
501
|
yajl_status stat;
|
460
|
-
scan_ctx *ctx
|
461
|
-
VALUE err = Qnil;
|
462
|
-
VALUE result;
|
502
|
+
scan_ctx *ctx;
|
503
|
+
VALUE err = Qnil, result;
|
463
504
|
// Turned out callbacks can't raise exceptions
|
464
505
|
// VALUE callback_err;
|
506
|
+
#if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
|
507
|
+
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
508
|
+
#else
|
509
|
+
rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
510
|
+
#endif
|
511
|
+
// rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
|
512
|
+
with_path = RTEST(with_path_flag);
|
513
|
+
if (kwargs != Qnil)
|
514
|
+
{
|
515
|
+
rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
|
516
|
+
if (kwargs_values[0] != Qundef)
|
517
|
+
with_path = RTEST(kwargs_values[0]);
|
518
|
+
if (kwargs_values[1] != Qundef)
|
519
|
+
verbose_error = RTEST(kwargs_values[1]);
|
520
|
+
}
|
521
|
+
rb_check_type(json_str, T_STRING);
|
522
|
+
json_text = RSTRING_PTR(json_str);
|
523
|
+
#if LONG_MAX > SIZE_MAX
|
524
|
+
json_text_len = RSTRING_LENINT(json_str);
|
525
|
+
#else
|
526
|
+
json_text_len = RSTRING_LEN(json_str);
|
527
|
+
#endif
|
528
|
+
ctx = scan_ctx_init(path_ary, with_path);
|
465
529
|
|
466
530
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
531
|
+
if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
|
532
|
+
{
|
533
|
+
if (kwargs_values[2] != Qundef)
|
534
|
+
yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
|
535
|
+
if (kwargs_values[3] != Qundef)
|
536
|
+
yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
|
537
|
+
if (kwargs_values[4] != Qundef)
|
538
|
+
yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
|
539
|
+
if (kwargs_values[5] != Qundef)
|
540
|
+
yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
|
541
|
+
if (kwargs_values[6] != Qundef)
|
542
|
+
yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
|
543
|
+
}
|
467
544
|
ctx->handle = handle;
|
468
|
-
|
469
|
-
// yajl_config(handle, yajl_allow_comments, true);
|
470
|
-
// yajl_config(handle, yajl_allow_trailing_garbage, true);
|
471
|
-
stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
|
545
|
+
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
472
546
|
if (stat == yajl_status_ok)
|
473
547
|
stat = yajl_complete_parse(handle);
|
474
548
|
|
475
549
|
if (stat != yajl_status_ok)
|
476
550
|
{
|
477
|
-
char *str = (char *)
|
478
|
-
err =
|
479
|
-
yajl_free_error(handle, (unsigned char *)
|
551
|
+
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
|
552
|
+
err = rb_utf8_str_new_cstr(str);
|
553
|
+
yajl_free_error(handle, (unsigned char *)str);
|
480
554
|
}
|
481
555
|
// callback_err = ctx->rb_err;
|
482
556
|
result = ctx->points_list;
|
@@ -494,19 +568,20 @@ RUBY_FUNC_EXPORTED void
|
|
494
568
|
Init_json_scanner(void)
|
495
569
|
{
|
496
570
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
497
|
-
rb_define_const(rb_mJsonScanner, "
|
498
|
-
rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
|
571
|
+
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
499
572
|
rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
|
500
|
-
|
501
|
-
rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
|
502
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
|
503
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
|
504
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
|
505
|
-
rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
|
573
|
+
rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
|
506
574
|
null_sym = rb_id2sym(rb_intern("null"));
|
507
575
|
boolean_sym = rb_id2sym(rb_intern("boolean"));
|
508
576
|
number_sym = rb_id2sym(rb_intern("number"));
|
509
577
|
string_sym = rb_id2sym(rb_intern("string"));
|
510
578
|
object_sym = rb_id2sym(rb_intern("object"));
|
511
579
|
array_sym = rb_id2sym(rb_intern("array"));
|
580
|
+
scan_kwargs_table[0] = rb_intern("with_path");
|
581
|
+
scan_kwargs_table[1] = rb_intern("verbose_error");
|
582
|
+
scan_kwargs_table[2] = rb_intern("allow_comments");
|
583
|
+
scan_kwargs_table[3] = rb_intern("dont_validate_strings");
|
584
|
+
scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
|
585
|
+
scan_kwargs_table[5] = rb_intern("allow_multiple_values");
|
586
|
+
scan_kwargs_table[6] = rb_intern("allow_partial_values");
|
512
587
|
}
|
data/lib/json_scanner/version.rb
CHANGED
data/spec/json_scanner_spec.rb
CHANGED
@@ -1,37 +1,193 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "spec_helper"
|
4
|
+
require "json"
|
4
5
|
|
5
6
|
RSpec.describe JsonScanner do
|
6
7
|
it "has a version number" do
|
7
|
-
expect(described_class::VERSION).not_to
|
8
|
+
expect(described_class::VERSION).not_to be_nil
|
8
9
|
end
|
9
10
|
|
10
11
|
it "scans json" do
|
11
|
-
result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []]
|
12
|
+
result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []])
|
12
13
|
expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
|
13
|
-
expect(described_class.scan('"2"', [[]]
|
14
|
+
expect(described_class.scan('"2"', [[]])).to eq([[[0, 3, :string]]])
|
14
15
|
expect(
|
15
|
-
described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]],
|
16
|
+
described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]]),
|
16
17
|
).to eq(
|
17
|
-
[[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
|
18
|
+
[[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]],
|
18
19
|
)
|
19
|
-
expect(described_class.scan('{"a": 1}', [["a"], []]
|
20
|
-
[[[6, 7, :number]], [[0, 8, :object]]]
|
20
|
+
expect(described_class.scan('{"a": 1}', [["a"], []])).to eq(
|
21
|
+
[[[6, 7, :number]], [[0, 8, :object]]],
|
21
22
|
)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "works with max path len correctly" do
|
26
|
+
expect(
|
27
|
+
described_class.scan('{"a": [1]}', [[], ["a"]]),
|
28
|
+
).to eq(
|
29
|
+
[[[0, 10, :object]], [[6, 9, :array]]],
|
30
|
+
)
|
31
|
+
expect(
|
32
|
+
described_class.scan('{"a": {"b": 1}}', [[], ["a"]]),
|
33
|
+
).to eq(
|
34
|
+
[[[0, 15, :object]], [[6, 14, :object]]],
|
35
|
+
)
|
36
|
+
expect(described_class.scan('{"a": 1}', [[]])).to eq([[[0, 8, :object]]])
|
37
|
+
expect(described_class.scan("[[1]]", [[]])).to eq([[[0, 5, :array]]])
|
38
|
+
expect(described_class.scan("[[1]]", [[0]])).to eq([[[1, 4, :array]]])
|
39
|
+
end
|
40
|
+
|
41
|
+
it "raises on invalid json" do
|
22
42
|
expect do
|
23
43
|
begin
|
24
44
|
GC.stress = true
|
25
45
|
# TODO: investigate
|
26
46
|
# got "munmap_chunk(): invalid pointer" in in console once after
|
27
47
|
# JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
|
28
|
-
# (last arg wasn't handled at the time)
|
29
|
-
# but I don't think it's a problem of
|
48
|
+
# (last arg wasn't handled at the time and was intended for with_path kwarg)
|
49
|
+
# but I don't think it's a problem of the extension or libyajl, it happened at exit and I free everything before
|
30
50
|
# `JsonScanner.scan` returns
|
31
|
-
described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
|
51
|
+
described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
|
32
52
|
ensure
|
33
53
|
GC.stress = false
|
34
54
|
end
|
35
55
|
end.to raise_error described_class::ParseError
|
36
56
|
end
|
57
|
+
|
58
|
+
it "allows to select ranges" do
|
59
|
+
expect(
|
60
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]]),
|
61
|
+
).to eq(
|
62
|
+
[[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]],
|
63
|
+
)
|
64
|
+
expect(
|
65
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]]),
|
66
|
+
).to eq(
|
67
|
+
[[[2, 3, :number], [8, 9, :number]]],
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "allows only positive or -1 values" do
|
72
|
+
expect do
|
73
|
+
described_class.scan("[[1,2],[3,4]]", [[(0...-1)]])
|
74
|
+
end.to raise_error ArgumentError
|
75
|
+
expect do
|
76
|
+
described_class.scan("[[1,2],[3,4]]", [[(0..-2)]])
|
77
|
+
end.to raise_error ArgumentError
|
78
|
+
expect do
|
79
|
+
described_class.scan("[[1,2],[3,4]]", [[(-42..1)]])
|
80
|
+
end.to raise_error ArgumentError
|
81
|
+
end
|
82
|
+
|
83
|
+
it "allows to configure error messages" do
|
84
|
+
expect do
|
85
|
+
described_class.scan "{1}", []
|
86
|
+
end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
|
87
|
+
expect do
|
88
|
+
described_class.scan "{1}", [], verbose_error: false
|
89
|
+
end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
|
90
|
+
expect do
|
91
|
+
described_class.scan "{1}", [], verbose_error: true
|
92
|
+
end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
|
93
|
+
end
|
94
|
+
|
95
|
+
it "allows to return an actual path to the element" do
|
96
|
+
with_path_expected_res = [
|
97
|
+
# result for first mathcer, each element array of two items:
|
98
|
+
# array of path elements and 3-element array start,end,type
|
99
|
+
[[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
|
100
|
+
[
|
101
|
+
[[0, 0], [2, 3, :number]], [[0, 1], [4, 5, :number]],
|
102
|
+
[[1, 0], [8, 9, :number]], [[1, 1], [10, 11, :number]],
|
103
|
+
],
|
104
|
+
]
|
105
|
+
params = [
|
106
|
+
"[[1,2],[3,4]]",
|
107
|
+
[
|
108
|
+
[described_class::ANY_INDEX],
|
109
|
+
[described_class::ANY_INDEX, described_class::ANY_INDEX],
|
110
|
+
],
|
111
|
+
]
|
112
|
+
expect(described_class.scan(*params, with_path: true)).to eq(with_path_expected_res)
|
113
|
+
expect(described_class.scan(*params, true)).to eq(with_path_expected_res)
|
114
|
+
expect(
|
115
|
+
described_class.scan(*params, false, with_path: true),
|
116
|
+
).to eq(with_path_expected_res)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "ignores reqular flag if kwarg is given" do
|
120
|
+
expect(
|
121
|
+
described_class.scan(
|
122
|
+
"[[1,2],[3,4]]",
|
123
|
+
[
|
124
|
+
[described_class::ANY_INDEX],
|
125
|
+
[described_class::ANY_INDEX, described_class::ANY_INDEX],
|
126
|
+
],
|
127
|
+
true, with_path: false,
|
128
|
+
),
|
129
|
+
).to eq(
|
130
|
+
[
|
131
|
+
# result for first mathcer, each element 3-element array start,end,type
|
132
|
+
[[1, 6, :array], [7, 12, :array]],
|
133
|
+
[
|
134
|
+
[2, 3, :number], [4, 5, :number],
|
135
|
+
[8, 9, :number], [10, 11, :number],
|
136
|
+
],
|
137
|
+
],
|
138
|
+
)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "allows to pass config as a hash" do
|
142
|
+
expect(
|
143
|
+
described_class.scan("[1]", [[0]], { with_path: true }),
|
144
|
+
).to eq(
|
145
|
+
[
|
146
|
+
[[[0], [1, 2, :number]]],
|
147
|
+
],
|
148
|
+
)
|
149
|
+
end
|
150
|
+
|
151
|
+
it "allows to configure yajl" do
|
152
|
+
expect(
|
153
|
+
described_class.scan("[1]____________", [[0]], { allow_trailing_garbage: true }),
|
154
|
+
).to eq([[[1, 2, :number]]])
|
155
|
+
expect(
|
156
|
+
described_class.scan(
|
157
|
+
'["1", {"a": /* comment */ 2}]____________', [[1, "a"]],
|
158
|
+
{ allow_trailing_garbage: true, allow_comments: true },
|
159
|
+
),
|
160
|
+
).to eq([[[26, 27, :number]]])
|
161
|
+
expect(
|
162
|
+
described_class.scan(
|
163
|
+
'[{"a": /* comment */ 1}]_________', [[]],
|
164
|
+
{ allow_comments: true, allow_trailing_garbage: true },
|
165
|
+
),
|
166
|
+
).to eq([[[0, 24, :array]]])
|
167
|
+
end
|
168
|
+
|
169
|
+
it "works with utf-8" do
|
170
|
+
json = '{"ルビー": ["Руби"]}'.encode(Encoding::UTF_8)
|
171
|
+
expect(described_class.scan(json, [[]])).to eq([[[0, json.bytesize, :object]]])
|
172
|
+
res = described_class.scan(json, [["ルビー", 0]])
|
173
|
+
expect(res).to eq([[[15, 25, :string]]])
|
174
|
+
elem = res.first.first
|
175
|
+
expect(JSON.parse(json.byteslice(elem[0]...elem[1]), quirks_mode: true)).to eq("Руби")
|
176
|
+
end
|
177
|
+
|
178
|
+
it "raises exceptions in utf-8" do
|
179
|
+
bad_json = '{"ルビー": ["Руби" 1]}'.encode(Encoding::UTF_8)
|
180
|
+
expect do
|
181
|
+
described_class.scan(bad_json, [[]], verbose_error: true)
|
182
|
+
# Checks encoding
|
183
|
+
end.to raise_error(described_class::ParseError, Regexp.new(Regexp.escape(bad_json)))
|
184
|
+
end
|
185
|
+
|
186
|
+
it "works with different encodings" do
|
187
|
+
# TODO: encoding validation
|
188
|
+
json = '{"a": 1}'.encode(Encoding::UTF_32LE)
|
189
|
+
expect do
|
190
|
+
described_class.scan(json, [[]])
|
191
|
+
end.to raise_error(described_class::ParseError)
|
192
|
+
end
|
37
193
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This gem uses yajl lib to scan a json string and allows you to parse
|
14
14
|
pieces of it
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- libyajl2, v2.1
|
55
55
|
- libyajl-dev, v2.1
|
56
|
-
rubygems_version: 3.
|
56
|
+
rubygems_version: 3.4.20
|
57
57
|
signing_key:
|
58
58
|
specification_version: 4
|
59
59
|
summary: Extract values from JSON without full parsing
|