json_scanner 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/ext/json_scanner/json_scanner.c +133 -58
- data/ext/json_scanner/json_scanner.h +1 -0
- data/lib/json_scanner/version.rb +1 -1
- data/spec/json_scanner_spec.rb +166 -10
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
|
4
|
+
data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
|
7
|
+
data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
|
data/README.md
CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
|
|
32
32
|
begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
|
33
33
|
emoji_json.byteslice(begin_pos...end_pos)
|
34
34
|
# => "\"😍\""
|
35
|
-
|
35
|
+
# Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
|
36
|
+
# with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
|
37
|
+
JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
38
|
+
# => "😍"
|
39
|
+
# You can also do this
|
40
|
+
# emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
|
36
41
|
# => "\"😍\""
|
37
42
|
```
|
38
43
|
|
@@ -1,8 +1,8 @@
|
|
1
1
|
#include "json_scanner.h"
|
2
2
|
|
3
3
|
VALUE rb_mJsonScanner;
|
4
|
-
VALUE rb_mJsonScannerOptions;
|
5
4
|
VALUE rb_eJsonScannerParseError;
|
5
|
+
ID scan_kwargs_table[7];
|
6
6
|
|
7
7
|
VALUE null_sym;
|
8
8
|
VALUE boolean_sym;
|
@@ -16,9 +16,9 @@ enum matcher_type
|
|
16
16
|
MATCHER_KEY,
|
17
17
|
MATCHER_INDEX,
|
18
18
|
// MATCHER_ANY_KEY,
|
19
|
-
// MATCHER_ANY_INDEX,
|
20
19
|
MATCHER_INDEX_RANGE,
|
21
20
|
// MATCHER_KEYS_LIST,
|
21
|
+
// MATCHER_KEY_REGEX,
|
22
22
|
};
|
23
23
|
|
24
24
|
enum path_type
|
@@ -84,18 +84,22 @@ typedef struct
|
|
84
84
|
} scan_ctx;
|
85
85
|
|
86
86
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
87
|
-
scan_ctx *scan_ctx_init(VALUE path_ary,
|
87
|
+
scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
88
88
|
{
|
89
|
+
int path_ary_len;
|
90
|
+
scan_ctx *ctx;
|
91
|
+
paths_t *paths;
|
89
92
|
// TODO: Allow to_ary and sized enumerables
|
90
93
|
rb_check_type(path_ary, T_ARRAY);
|
91
|
-
|
94
|
+
path_ary_len = rb_long2int(rb_array_len(path_ary));
|
92
95
|
// Check types early before any allocations, so exception is ok
|
93
96
|
// TODO: Fix this, just handle errors
|
94
97
|
for (int i = 0; i < path_ary_len; i++)
|
95
98
|
{
|
99
|
+
int path_len;
|
96
100
|
VALUE path = rb_ary_entry(path_ary, i);
|
97
101
|
rb_check_type(path, T_ARRAY);
|
98
|
-
|
102
|
+
path_len = rb_long2int(rb_array_len(path));
|
99
103
|
for (int j = 0; j < path_len; j++)
|
100
104
|
{
|
101
105
|
VALUE entry = rb_ary_entry(path, j);
|
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
113
117
|
else
|
114
118
|
{
|
115
119
|
VALUE range_beg, range_end;
|
120
|
+
long end_val;
|
116
121
|
int open_ended;
|
117
122
|
if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
|
118
123
|
rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
|
119
|
-
RB_NUM2LONG(range_beg)
|
120
|
-
|
124
|
+
if (RB_NUM2LONG(range_beg) < 0L)
|
125
|
+
rb_raise(rb_eArgError, "range start must be positive");
|
126
|
+
end_val = RB_NUM2LONG(range_end);
|
127
|
+
if (end_val < -1L)
|
128
|
+
rb_raise(rb_eArgError, "range end must be positive or -1");
|
129
|
+
if (end_val == -1L && open_ended)
|
130
|
+
rb_raise(rb_eArgError, "range with -1 end must be closed");
|
121
131
|
}
|
122
132
|
}
|
123
133
|
}
|
124
134
|
|
125
|
-
|
135
|
+
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
126
136
|
|
127
|
-
ctx->with_path =
|
137
|
+
ctx->with_path = with_path;
|
128
138
|
ctx->max_path_len = 0;
|
129
139
|
|
130
|
-
|
140
|
+
paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
|
131
141
|
for (int i = 0; i < path_ary_len; i++)
|
132
142
|
{
|
143
|
+
int path_len;
|
133
144
|
VALUE path = rb_ary_entry(path_ary, i);
|
134
|
-
|
145
|
+
path_len = rb_long2int(rb_array_len(path));
|
135
146
|
if (path_len > ctx->max_path_len)
|
136
147
|
ctx->max_path_len = path_len;
|
137
148
|
paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
|
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
156
167
|
}
|
157
168
|
else
|
158
169
|
{
|
159
|
-
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
160
170
|
VALUE range_beg, range_end;
|
161
171
|
int open_ended;
|
172
|
+
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
162
173
|
rb_range_values(entry, &range_beg, &range_end, &open_ended);
|
163
174
|
paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
|
164
175
|
paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
|
176
|
+
// (value..-1) works as expected, (value...-1) is forbidden above
|
177
|
+
if (paths[i].elems[j].value.range.end == -1L)
|
178
|
+
paths[i].elems[j].value.range.end = LONG_MAX;
|
179
|
+
// -1 here is fine, so, (0...0) works just as expected - doesn't match anything
|
165
180
|
if (open_ended)
|
166
181
|
paths[i].elems[j].value.range.end--;
|
167
182
|
}
|
@@ -181,7 +196,7 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
181
196
|
rb_ary_push(ctx->points_list, rb_ary_new());
|
182
197
|
}
|
183
198
|
|
184
|
-
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
|
199
|
+
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
|
185
200
|
// ctx->rb_err = Qnil;
|
186
201
|
ctx->handle = NULL;
|
187
202
|
|
@@ -224,10 +239,10 @@ typedef enum
|
|
224
239
|
} value_type;
|
225
240
|
|
226
241
|
// noexcept
|
227
|
-
|
242
|
+
VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
|
228
243
|
{
|
229
|
-
*point = rb_ary_new_capa(3);
|
230
244
|
VALUE values[3];
|
245
|
+
VALUE point = rb_ary_new_capa(3);
|
231
246
|
// noexcept
|
232
247
|
values[1] = RB_ULONG2NUM(curr_pos);
|
233
248
|
switch (type)
|
@@ -259,7 +274,31 @@ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length,
|
|
259
274
|
break;
|
260
275
|
}
|
261
276
|
// rb_ary_cat raise only in case of a frozen array or if len is too long
|
262
|
-
rb_ary_cat(
|
277
|
+
rb_ary_cat(point, values, 3);
|
278
|
+
return point;
|
279
|
+
}
|
280
|
+
|
281
|
+
// noexcept
|
282
|
+
VALUE create_path(scan_ctx *sctx)
|
283
|
+
{
|
284
|
+
VALUE path = rb_ary_new_capa(sctx->current_path_len);
|
285
|
+
for (int i = 0; i < sctx->current_path_len; i++)
|
286
|
+
{
|
287
|
+
VALUE entry;
|
288
|
+
switch (sctx->current_path[i].type)
|
289
|
+
{
|
290
|
+
case PATH_KEY:
|
291
|
+
entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
|
292
|
+
break;
|
293
|
+
case PATH_INDEX:
|
294
|
+
entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
|
295
|
+
break;
|
296
|
+
default:
|
297
|
+
entry = Qnil;
|
298
|
+
}
|
299
|
+
rb_ary_push(path, entry);
|
300
|
+
}
|
301
|
+
return path;
|
263
302
|
}
|
264
303
|
|
265
304
|
// noexcept
|
@@ -267,13 +306,15 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
267
306
|
{
|
268
307
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
269
308
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
309
|
+
// TODO: Might fail in case of no memory
|
270
310
|
VALUE point = Qundef;
|
311
|
+
int match;
|
271
312
|
for (int i = 0; i < sctx->paths_len; i++)
|
272
313
|
{
|
273
314
|
if (sctx->paths[i].len != sctx->current_path_len)
|
274
315
|
continue;
|
275
316
|
|
276
|
-
|
317
|
+
match = true;
|
277
318
|
for (int j = 0; j < sctx->current_path_len; j++)
|
278
319
|
{
|
279
320
|
switch (sctx->paths[i].elems[j].type)
|
@@ -303,7 +344,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
303
344
|
{
|
304
345
|
if (point == Qundef)
|
305
346
|
{
|
306
|
-
create_point(
|
347
|
+
point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
|
348
|
+
if (sctx->with_path)
|
349
|
+
{
|
350
|
+
point = rb_ary_new_from_args(2, create_path(sctx), point);
|
351
|
+
}
|
307
352
|
}
|
308
353
|
// rb_ary_push raises only in case of a frozen array, which is not the case
|
309
354
|
// rb_ary_entry is safe
|
@@ -366,11 +411,9 @@ int scan_on_start_object(void *ctx)
|
|
366
411
|
return true;
|
367
412
|
}
|
368
413
|
increment_arr_index(sctx);
|
414
|
+
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
369
415
|
if (sctx->current_path_len < sctx->max_path_len)
|
370
|
-
{
|
371
|
-
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
372
416
|
sctx->current_path[sctx->current_path_len].type = PATH_KEY;
|
373
|
-
}
|
374
417
|
sctx->current_path_len++;
|
375
418
|
return true;
|
376
419
|
}
|
@@ -383,7 +426,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
|
383
426
|
return true;
|
384
427
|
// Can't be called without scan_on_start_object being called before
|
385
428
|
// So current_path_len at least 1 and key.type is set to PATH_KEY;
|
386
|
-
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)
|
429
|
+
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
|
387
430
|
sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
|
388
431
|
return true;
|
389
432
|
}
|
@@ -393,9 +436,8 @@ int scan_on_end_object(void *ctx)
|
|
393
436
|
{
|
394
437
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
395
438
|
sctx->current_path_len--;
|
396
|
-
if (sctx->current_path_len
|
397
|
-
|
398
|
-
save_point(sctx, object_value, 0);
|
439
|
+
if (sctx->current_path_len <= sctx->max_path_len)
|
440
|
+
save_point(sctx, object_value, 0);
|
399
441
|
return true;
|
400
442
|
}
|
401
443
|
|
@@ -409,9 +451,9 @@ int scan_on_start_array(void *ctx)
|
|
409
451
|
return true;
|
410
452
|
}
|
411
453
|
increment_arr_index(sctx);
|
454
|
+
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
412
455
|
if (sctx->current_path_len < sctx->max_path_len)
|
413
456
|
{
|
414
|
-
sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
|
415
457
|
sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
|
416
458
|
sctx->current_path[sctx->current_path_len].value.index = -1;
|
417
459
|
}
|
@@ -424,9 +466,8 @@ int scan_on_end_array(void *ctx)
|
|
424
466
|
{
|
425
467
|
scan_ctx *sctx = (scan_ctx *)ctx;
|
426
468
|
sctx->current_path_len--;
|
427
|
-
if (sctx->current_path_len
|
428
|
-
|
429
|
-
save_point(sctx, array_value, 0);
|
469
|
+
if (sctx->current_path_len <= sctx->max_path_len)
|
470
|
+
save_point(sctx, array_value, 0);
|
430
471
|
return true;
|
431
472
|
}
|
432
473
|
|
@@ -443,40 +484,73 @@ static yajl_callbacks scan_callbacks = {
|
|
443
484
|
scan_on_start_array,
|
444
485
|
scan_on_end_array};
|
445
486
|
|
446
|
-
//
|
447
|
-
|
487
|
+
// def scan(json_str, path_arr, opts)
|
488
|
+
// opts
|
489
|
+
// with_path: false, verbose_error: false,
|
490
|
+
// the following opts converted to bool and passed to yajl_config if provided, ignored if not provided
|
491
|
+
// allow_comments, dont_validate_strings, allow_trailing_garbage, allow_multiple_values, allow_partial_values
|
492
|
+
VALUE scan(int argc, VALUE *argv, VALUE self)
|
448
493
|
{
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
size_t json_text_len
|
455
|
-
#endif
|
494
|
+
VALUE json_str, path_ary, with_path_flag, kwargs;
|
495
|
+
VALUE kwargs_values[7];
|
496
|
+
|
497
|
+
int with_path = false, verbose_error = false;
|
498
|
+
char *json_text;
|
499
|
+
size_t json_text_len;
|
456
500
|
yajl_handle handle;
|
457
|
-
// TODO
|
458
|
-
int opt_verbose_error = 0;
|
459
501
|
yajl_status stat;
|
460
|
-
scan_ctx *ctx
|
461
|
-
VALUE err = Qnil;
|
462
|
-
VALUE result;
|
502
|
+
scan_ctx *ctx;
|
503
|
+
VALUE err = Qnil, result;
|
463
504
|
// Turned out callbacks can't raise exceptions
|
464
505
|
// VALUE callback_err;
|
506
|
+
#if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
|
507
|
+
rb_scan_args_kw(RB_SCAN_ARGS_LAST_HASH_KEYWORDS, argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
508
|
+
#else
|
509
|
+
rb_scan_args(argc, argv, "21:", &json_str, &path_ary, &with_path_flag, &kwargs);
|
510
|
+
#endif
|
511
|
+
// rb_io_write(rb_stderr, rb_sprintf("with_path_flag: %" PRIsVALUE " \n", with_path_flag));
|
512
|
+
with_path = RTEST(with_path_flag);
|
513
|
+
if (kwargs != Qnil)
|
514
|
+
{
|
515
|
+
rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
|
516
|
+
if (kwargs_values[0] != Qundef)
|
517
|
+
with_path = RTEST(kwargs_values[0]);
|
518
|
+
if (kwargs_values[1] != Qundef)
|
519
|
+
verbose_error = RTEST(kwargs_values[1]);
|
520
|
+
}
|
521
|
+
rb_check_type(json_str, T_STRING);
|
522
|
+
json_text = RSTRING_PTR(json_str);
|
523
|
+
#if LONG_MAX > SIZE_MAX
|
524
|
+
json_text_len = RSTRING_LENINT(json_str);
|
525
|
+
#else
|
526
|
+
json_text_len = RSTRING_LEN(json_str);
|
527
|
+
#endif
|
528
|
+
ctx = scan_ctx_init(path_ary, with_path);
|
465
529
|
|
466
530
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
531
|
+
if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
|
532
|
+
{
|
533
|
+
if (kwargs_values[2] != Qundef)
|
534
|
+
yajl_config(handle, yajl_allow_comments, RTEST(kwargs_values[2]));
|
535
|
+
if (kwargs_values[3] != Qundef)
|
536
|
+
yajl_config(handle, yajl_dont_validate_strings, RTEST(kwargs_values[3]));
|
537
|
+
if (kwargs_values[4] != Qundef)
|
538
|
+
yajl_config(handle, yajl_allow_trailing_garbage, RTEST(kwargs_values[4]));
|
539
|
+
if (kwargs_values[5] != Qundef)
|
540
|
+
yajl_config(handle, yajl_allow_multiple_values, RTEST(kwargs_values[5]));
|
541
|
+
if (kwargs_values[6] != Qundef)
|
542
|
+
yajl_config(handle, yajl_allow_partial_values, RTEST(kwargs_values[6]));
|
543
|
+
}
|
467
544
|
ctx->handle = handle;
|
468
|
-
|
469
|
-
// yajl_config(handle, yajl_allow_comments, true);
|
470
|
-
// yajl_config(handle, yajl_allow_trailing_garbage, true);
|
471
|
-
stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
|
545
|
+
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
472
546
|
if (stat == yajl_status_ok)
|
473
547
|
stat = yajl_complete_parse(handle);
|
474
548
|
|
475
549
|
if (stat != yajl_status_ok)
|
476
550
|
{
|
477
|
-
char *str = (char *)
|
478
|
-
err =
|
479
|
-
yajl_free_error(handle, (unsigned char *)
|
551
|
+
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
|
552
|
+
err = rb_utf8_str_new_cstr(str);
|
553
|
+
yajl_free_error(handle, (unsigned char *)str);
|
480
554
|
}
|
481
555
|
// callback_err = ctx->rb_err;
|
482
556
|
result = ctx->points_list;
|
@@ -494,19 +568,20 @@ RUBY_FUNC_EXPORTED void
|
|
494
568
|
Init_json_scanner(void)
|
495
569
|
{
|
496
570
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
497
|
-
rb_define_const(rb_mJsonScanner, "
|
498
|
-
rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
|
571
|
+
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
499
572
|
rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
|
500
|
-
|
501
|
-
rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
|
502
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
|
503
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
|
504
|
-
rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
|
505
|
-
rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
|
573
|
+
rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
|
506
574
|
null_sym = rb_id2sym(rb_intern("null"));
|
507
575
|
boolean_sym = rb_id2sym(rb_intern("boolean"));
|
508
576
|
number_sym = rb_id2sym(rb_intern("number"));
|
509
577
|
string_sym = rb_id2sym(rb_intern("string"));
|
510
578
|
object_sym = rb_id2sym(rb_intern("object"));
|
511
579
|
array_sym = rb_id2sym(rb_intern("array"));
|
580
|
+
scan_kwargs_table[0] = rb_intern("with_path");
|
581
|
+
scan_kwargs_table[1] = rb_intern("verbose_error");
|
582
|
+
scan_kwargs_table[2] = rb_intern("allow_comments");
|
583
|
+
scan_kwargs_table[3] = rb_intern("dont_validate_strings");
|
584
|
+
scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
|
585
|
+
scan_kwargs_table[5] = rb_intern("allow_multiple_values");
|
586
|
+
scan_kwargs_table[6] = rb_intern("allow_partial_values");
|
512
587
|
}
|
data/lib/json_scanner/version.rb
CHANGED
data/spec/json_scanner_spec.rb
CHANGED
@@ -1,37 +1,193 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "spec_helper"
|
4
|
+
require "json"
|
4
5
|
|
5
6
|
RSpec.describe JsonScanner do
|
6
7
|
it "has a version number" do
|
7
|
-
expect(described_class::VERSION).not_to
|
8
|
+
expect(described_class::VERSION).not_to be_nil
|
8
9
|
end
|
9
10
|
|
10
11
|
it "scans json" do
|
11
|
-
result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []]
|
12
|
+
result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []])
|
12
13
|
expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
|
13
|
-
expect(described_class.scan('"2"', [[]]
|
14
|
+
expect(described_class.scan('"2"', [[]])).to eq([[[0, 3, :string]]])
|
14
15
|
expect(
|
15
|
-
described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]],
|
16
|
+
described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]]),
|
16
17
|
).to eq(
|
17
|
-
[[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
|
18
|
+
[[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]],
|
18
19
|
)
|
19
|
-
expect(described_class.scan('{"a": 1}', [["a"], []]
|
20
|
-
[[[6, 7, :number]], [[0, 8, :object]]]
|
20
|
+
expect(described_class.scan('{"a": 1}', [["a"], []])).to eq(
|
21
|
+
[[[6, 7, :number]], [[0, 8, :object]]],
|
21
22
|
)
|
23
|
+
end
|
24
|
+
|
25
|
+
it "works with max path len correctly" do
|
26
|
+
expect(
|
27
|
+
described_class.scan('{"a": [1]}', [[], ["a"]]),
|
28
|
+
).to eq(
|
29
|
+
[[[0, 10, :object]], [[6, 9, :array]]],
|
30
|
+
)
|
31
|
+
expect(
|
32
|
+
described_class.scan('{"a": {"b": 1}}', [[], ["a"]]),
|
33
|
+
).to eq(
|
34
|
+
[[[0, 15, :object]], [[6, 14, :object]]],
|
35
|
+
)
|
36
|
+
expect(described_class.scan('{"a": 1}', [[]])).to eq([[[0, 8, :object]]])
|
37
|
+
expect(described_class.scan("[[1]]", [[]])).to eq([[[0, 5, :array]]])
|
38
|
+
expect(described_class.scan("[[1]]", [[0]])).to eq([[[1, 4, :array]]])
|
39
|
+
end
|
40
|
+
|
41
|
+
it "raises on invalid json" do
|
22
42
|
expect do
|
23
43
|
begin
|
24
44
|
GC.stress = true
|
25
45
|
# TODO: investigate
|
26
46
|
# got "munmap_chunk(): invalid pointer" in in console once after
|
27
47
|
# JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
|
28
|
-
# (last arg wasn't handled at the time)
|
29
|
-
# but I don't think it's a problem of
|
48
|
+
# (last arg wasn't handled at the time and was intended for with_path kwarg)
|
49
|
+
# but I don't think it's a problem of the extension or libyajl, it happened at exit and I free everything before
|
30
50
|
# `JsonScanner.scan` returns
|
31
|
-
described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
|
51
|
+
described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]]
|
32
52
|
ensure
|
33
53
|
GC.stress = false
|
34
54
|
end
|
35
55
|
end.to raise_error described_class::ParseError
|
36
56
|
end
|
57
|
+
|
58
|
+
it "allows to select ranges" do
|
59
|
+
expect(
|
60
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]]),
|
61
|
+
).to eq(
|
62
|
+
[[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]],
|
63
|
+
)
|
64
|
+
expect(
|
65
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]]),
|
66
|
+
).to eq(
|
67
|
+
[[[2, 3, :number], [8, 9, :number]]],
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "allows only positive or -1 values" do
|
72
|
+
expect do
|
73
|
+
described_class.scan("[[1,2],[3,4]]", [[(0...-1)]])
|
74
|
+
end.to raise_error ArgumentError
|
75
|
+
expect do
|
76
|
+
described_class.scan("[[1,2],[3,4]]", [[(0..-2)]])
|
77
|
+
end.to raise_error ArgumentError
|
78
|
+
expect do
|
79
|
+
described_class.scan("[[1,2],[3,4]]", [[(-42..1)]])
|
80
|
+
end.to raise_error ArgumentError
|
81
|
+
end
|
82
|
+
|
83
|
+
it "allows to configure error messages" do
|
84
|
+
expect do
|
85
|
+
described_class.scan "{1}", []
|
86
|
+
end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
|
87
|
+
expect do
|
88
|
+
described_class.scan "{1}", [], verbose_error: false
|
89
|
+
end.to raise_error described_class::ParseError, /invalid object key(?!.*\(right here\))/m
|
90
|
+
expect do
|
91
|
+
described_class.scan "{1}", [], verbose_error: true
|
92
|
+
end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
|
93
|
+
end
|
94
|
+
|
95
|
+
it "allows to return an actual path to the element" do
|
96
|
+
with_path_expected_res = [
|
97
|
+
# result for first mathcer, each element array of two items:
|
98
|
+
# array of path elements and 3-element array start,end,type
|
99
|
+
[[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
|
100
|
+
[
|
101
|
+
[[0, 0], [2, 3, :number]], [[0, 1], [4, 5, :number]],
|
102
|
+
[[1, 0], [8, 9, :number]], [[1, 1], [10, 11, :number]],
|
103
|
+
],
|
104
|
+
]
|
105
|
+
params = [
|
106
|
+
"[[1,2],[3,4]]",
|
107
|
+
[
|
108
|
+
[described_class::ANY_INDEX],
|
109
|
+
[described_class::ANY_INDEX, described_class::ANY_INDEX],
|
110
|
+
],
|
111
|
+
]
|
112
|
+
expect(described_class.scan(*params, with_path: true)).to eq(with_path_expected_res)
|
113
|
+
expect(described_class.scan(*params, true)).to eq(with_path_expected_res)
|
114
|
+
expect(
|
115
|
+
described_class.scan(*params, false, with_path: true),
|
116
|
+
).to eq(with_path_expected_res)
|
117
|
+
end
|
118
|
+
|
119
|
+
it "ignores reqular flag if kwarg is given" do
|
120
|
+
expect(
|
121
|
+
described_class.scan(
|
122
|
+
"[[1,2],[3,4]]",
|
123
|
+
[
|
124
|
+
[described_class::ANY_INDEX],
|
125
|
+
[described_class::ANY_INDEX, described_class::ANY_INDEX],
|
126
|
+
],
|
127
|
+
true, with_path: false,
|
128
|
+
),
|
129
|
+
).to eq(
|
130
|
+
[
|
131
|
+
# result for first mathcer, each element 3-element array start,end,type
|
132
|
+
[[1, 6, :array], [7, 12, :array]],
|
133
|
+
[
|
134
|
+
[2, 3, :number], [4, 5, :number],
|
135
|
+
[8, 9, :number], [10, 11, :number],
|
136
|
+
],
|
137
|
+
],
|
138
|
+
)
|
139
|
+
end
|
140
|
+
|
141
|
+
it "allows to pass config as a hash" do
|
142
|
+
expect(
|
143
|
+
described_class.scan("[1]", [[0]], { with_path: true }),
|
144
|
+
).to eq(
|
145
|
+
[
|
146
|
+
[[[0], [1, 2, :number]]],
|
147
|
+
],
|
148
|
+
)
|
149
|
+
end
|
150
|
+
|
151
|
+
it "allows to configure yajl" do
|
152
|
+
expect(
|
153
|
+
described_class.scan("[1]____________", [[0]], { allow_trailing_garbage: true }),
|
154
|
+
).to eq([[[1, 2, :number]]])
|
155
|
+
expect(
|
156
|
+
described_class.scan(
|
157
|
+
'["1", {"a": /* comment */ 2}]____________', [[1, "a"]],
|
158
|
+
{ allow_trailing_garbage: true, allow_comments: true },
|
159
|
+
),
|
160
|
+
).to eq([[[26, 27, :number]]])
|
161
|
+
expect(
|
162
|
+
described_class.scan(
|
163
|
+
'[{"a": /* comment */ 1}]_________', [[]],
|
164
|
+
{ allow_comments: true, allow_trailing_garbage: true },
|
165
|
+
),
|
166
|
+
).to eq([[[0, 24, :array]]])
|
167
|
+
end
|
168
|
+
|
169
|
+
it "works with utf-8" do
|
170
|
+
json = '{"ルビー": ["Руби"]}'.encode(Encoding::UTF_8)
|
171
|
+
expect(described_class.scan(json, [[]])).to eq([[[0, json.bytesize, :object]]])
|
172
|
+
res = described_class.scan(json, [["ルビー", 0]])
|
173
|
+
expect(res).to eq([[[15, 25, :string]]])
|
174
|
+
elem = res.first.first
|
175
|
+
expect(JSON.parse(json.byteslice(elem[0]...elem[1]), quirks_mode: true)).to eq("Руби")
|
176
|
+
end
|
177
|
+
|
178
|
+
it "raises exceptions in utf-8" do
|
179
|
+
bad_json = '{"ルビー": ["Руби" 1]}'.encode(Encoding::UTF_8)
|
180
|
+
expect do
|
181
|
+
described_class.scan(bad_json, [[]], verbose_error: true)
|
182
|
+
# Checks encoding
|
183
|
+
end.to raise_error(described_class::ParseError, Regexp.new(Regexp.escape(bad_json)))
|
184
|
+
end
|
185
|
+
|
186
|
+
it "works with different encodings" do
|
187
|
+
# TODO: encoding validation
|
188
|
+
json = '{"a": 1}'.encode(Encoding::UTF_32LE)
|
189
|
+
expect do
|
190
|
+
described_class.scan(json, [[]])
|
191
|
+
end.to raise_error(described_class::ParseError)
|
192
|
+
end
|
37
193
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-27 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This gem uses yajl lib to scan a json string and allows you to parse
|
14
14
|
pieces of it
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- libyajl2, v2.1
|
55
55
|
- libyajl-dev, v2.1
|
56
|
-
rubygems_version: 3.
|
56
|
+
rubygems_version: 3.4.20
|
57
57
|
signing_key:
|
58
58
|
specification_version: 4
|
59
59
|
summary: Extract values from JSON without full parsing
|