json_scanner 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/ext/json_scanner/json_scanner.c +47 -29
- data/lib/json_scanner/version.rb +1 -1
- data/spec/json_scanner_spec.rb +29 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ffddd81459c088040c2ddee0006b1c9172e8a15aa8bab987dc59743e697f49f
|
4
|
+
data.tar.gz: 73b69bbcddaaf6711b2563787c4cbde1d258bf1d28b1e694714fd782c42d4c2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44a153e578da606f67399a09387cf26d63b1528ad9e6bb258083435202f094ebc95e3a2c895a83334eb2aeef4ea9bfbe0f0c38374ac76f5772beea6bc3910a0f
|
7
|
+
data.tar.gz: ff9bcbc934fafc4857faf926bca5f5c6c82ec357107b8d6e5f2c0a7e9e622afe3a19d4b0a3f4a987d2e965432c73615203afd1c3e1ac6a52965555b20ef2f377
|
data/README.md
CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
|
|
32
32
|
begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
|
33
33
|
emoji_json.byteslice(begin_pos...end_pos)
|
34
34
|
# => "\"😍\""
|
35
|
-
|
35
|
+
# Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
|
36
|
+
# with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
|
37
|
+
JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
38
|
+
# => "😍"
|
39
|
+
# You can also do this
|
40
|
+
# emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
|
36
41
|
# => "\"😍\""
|
37
42
|
```
|
38
43
|
|
@@ -16,9 +16,9 @@ enum matcher_type
|
|
16
16
|
MATCHER_KEY,
|
17
17
|
MATCHER_INDEX,
|
18
18
|
// MATCHER_ANY_KEY,
|
19
|
-
// MATCHER_ANY_INDEX,
|
20
19
|
MATCHER_INDEX_RANGE,
|
21
20
|
// MATCHER_KEYS_LIST,
|
21
|
+
// MATCHER_KEY_REGEX,
|
22
22
|
};
|
23
23
|
|
24
24
|
enum path_type
|
@@ -86,16 +86,20 @@ typedef struct
|
|
86
86
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
87
87
|
scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
88
88
|
{
|
89
|
+
int path_ary_len;
|
90
|
+
scan_ctx *ctx;
|
91
|
+
paths_t *paths;
|
89
92
|
// TODO: Allow to_ary and sized enumerables
|
90
93
|
rb_check_type(path_ary, T_ARRAY);
|
91
|
-
|
94
|
+
path_ary_len = rb_long2int(rb_array_len(path_ary));
|
92
95
|
// Check types early before any allocations, so exception is ok
|
93
96
|
// TODO: Fix this, just handle errors
|
94
97
|
for (int i = 0; i < path_ary_len; i++)
|
95
98
|
{
|
99
|
+
int path_len;
|
96
100
|
VALUE path = rb_ary_entry(path_ary, i);
|
97
101
|
rb_check_type(path, T_ARRAY);
|
98
|
-
|
102
|
+
path_len = rb_long2int(rb_array_len(path));
|
99
103
|
for (int j = 0; j < path_len; j++)
|
100
104
|
{
|
101
105
|
VALUE entry = rb_ary_entry(path, j);
|
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
113
117
|
else
|
114
118
|
{
|
115
119
|
VALUE range_beg, range_end;
|
120
|
+
long end_val;
|
116
121
|
int open_ended;
|
117
122
|
if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
|
118
123
|
rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
|
119
|
-
RB_NUM2LONG(range_beg)
|
120
|
-
|
124
|
+
if (RB_NUM2LONG(range_beg) < 0L)
|
125
|
+
rb_raise(rb_eArgError, "range start must be positive");
|
126
|
+
end_val = RB_NUM2LONG(range_end);
|
127
|
+
if (end_val < -1L)
|
128
|
+
rb_raise(rb_eArgError, "range end must be positive or -1");
|
129
|
+
if (end_val == -1L && open_ended)
|
130
|
+
rb_raise(rb_eArgError, "range with -1 end must be closed");
|
121
131
|
}
|
122
132
|
}
|
123
133
|
}
|
124
134
|
|
125
|
-
|
135
|
+
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
126
136
|
|
127
|
-
ctx->with_path =
|
137
|
+
ctx->with_path = RTEST(with_path);
|
128
138
|
ctx->max_path_len = 0;
|
129
139
|
|
130
|
-
|
140
|
+
paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
|
131
141
|
for (int i = 0; i < path_ary_len; i++)
|
132
142
|
{
|
143
|
+
int path_len;
|
133
144
|
VALUE path = rb_ary_entry(path_ary, i);
|
134
|
-
|
145
|
+
path_len = rb_long2int(rb_array_len(path));
|
135
146
|
if (path_len > ctx->max_path_len)
|
136
147
|
ctx->max_path_len = path_len;
|
137
148
|
paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
|
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
156
167
|
}
|
157
168
|
else
|
158
169
|
{
|
159
|
-
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
160
170
|
VALUE range_beg, range_end;
|
161
171
|
int open_ended;
|
172
|
+
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
162
173
|
rb_range_values(entry, &range_beg, &range_end, &open_ended);
|
163
174
|
paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
|
164
175
|
paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
|
176
|
+
// (value..-1) works as expected, (value...-1) is forbidden above
|
177
|
+
if (paths[i].elems[j].value.range.end == -1L)
|
178
|
+
paths[i].elems[j].value.range.end = LONG_MAX;
|
179
|
+
// -1 here is fine, so, (0...0) works just as expected - doesn't match anything
|
165
180
|
if (open_ended)
|
166
181
|
paths[i].elems[j].value.range.end--;
|
167
182
|
}
|
@@ -226,8 +241,8 @@ typedef enum
|
|
226
241
|
// noexcept
|
227
242
|
void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
|
228
243
|
{
|
229
|
-
*point = rb_ary_new_capa(3);
|
230
244
|
VALUE values[3];
|
245
|
+
*point = rb_ary_new_capa(3);
|
231
246
|
// noexcept
|
232
247
|
values[1] = RB_ULONG2NUM(curr_pos);
|
233
248
|
switch (type)
|
@@ -268,12 +283,13 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
268
283
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
269
284
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
270
285
|
VALUE point = Qundef;
|
286
|
+
int match;
|
271
287
|
for (int i = 0; i < sctx->paths_len; i++)
|
272
288
|
{
|
273
289
|
if (sctx->paths[i].len != sctx->current_path_len)
|
274
290
|
continue;
|
275
291
|
|
276
|
-
|
292
|
+
match = true;
|
277
293
|
for (int j = 0; j < sctx->current_path_len; j++)
|
278
294
|
{
|
279
295
|
switch (sctx->paths[i].elems[j].type)
|
@@ -383,7 +399,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
|
383
399
|
return true;
|
384
400
|
// Can't be called without scan_on_start_object being called before
|
385
401
|
// So current_path_len at least 1 and key.type is set to PATH_KEY;
|
386
|
-
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)
|
402
|
+
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
|
387
403
|
sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
|
388
404
|
return true;
|
389
405
|
}
|
@@ -446,37 +462,39 @@ static yajl_callbacks scan_callbacks = {
|
|
446
462
|
// TODO: make with_path optional kw: `with_path: false`
|
447
463
|
VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
|
448
464
|
{
|
449
|
-
|
450
|
-
|
451
|
-
#if LONG_MAX > SIZE_MAX
|
452
|
-
size_t json_text_len = RSTRING_LENINT(json_str);
|
453
|
-
#else
|
454
|
-
size_t json_text_len = RSTRING_LEN(json_str);
|
455
|
-
#endif
|
465
|
+
char *json_text;
|
466
|
+
size_t json_text_len;
|
456
467
|
yajl_handle handle;
|
457
|
-
// TODO
|
458
|
-
int opt_verbose_error = 0;
|
459
468
|
yajl_status stat;
|
460
|
-
scan_ctx *ctx
|
461
|
-
VALUE err = Qnil;
|
462
|
-
VALUE result;
|
469
|
+
scan_ctx *ctx;
|
470
|
+
VALUE err = Qnil, result;
|
463
471
|
// Turned out callbacks can't raise exceptions
|
464
472
|
// VALUE callback_err;
|
473
|
+
// TODO
|
474
|
+
int opt_verbose_error = 0;
|
475
|
+
rb_check_type(json_str, T_STRING);
|
476
|
+
json_text = RSTRING_PTR(json_str);
|
477
|
+
#if LONG_MAX > SIZE_MAX
|
478
|
+
json_text_len = RSTRING_LENINT(json_str);
|
479
|
+
#else
|
480
|
+
json_text_len = RSTRING_LEN(json_str);
|
481
|
+
#endif
|
482
|
+
ctx = scan_ctx_init(path_ary, with_path);
|
465
483
|
|
466
484
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
467
485
|
ctx->handle = handle;
|
468
486
|
// TODO: make it configurable
|
469
487
|
// yajl_config(handle, yajl_allow_comments, true);
|
470
488
|
// yajl_config(handle, yajl_allow_trailing_garbage, true);
|
471
|
-
stat = yajl_parse(handle, (unsigned char *)
|
489
|
+
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
472
490
|
if (stat == yajl_status_ok)
|
473
491
|
stat = yajl_complete_parse(handle);
|
474
492
|
|
475
493
|
if (stat != yajl_status_ok)
|
476
494
|
{
|
477
|
-
char *str = (char *)
|
495
|
+
char *str = (char *)yajl_get_error(handle, opt_verbose_error, (unsigned char *)json_text, json_text_len);
|
478
496
|
err = rb_str_new_cstr(str);
|
479
|
-
yajl_free_error(handle, (unsigned char *)
|
497
|
+
yajl_free_error(handle, (unsigned char *)str);
|
480
498
|
}
|
481
499
|
// callback_err = ctx->rb_err;
|
482
500
|
result = ctx->points_list;
|
@@ -494,7 +512,7 @@ RUBY_FUNC_EXPORTED void
|
|
494
512
|
Init_json_scanner(void)
|
495
513
|
{
|
496
514
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
497
|
-
rb_define_const(rb_mJsonScanner, "
|
515
|
+
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
498
516
|
rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
|
499
517
|
rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
|
500
518
|
rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
|
data/lib/json_scanner/version.rb
CHANGED
data/spec/json_scanner_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require_relative "spec_helper"
|
|
4
4
|
|
5
5
|
RSpec.describe JsonScanner do
|
6
6
|
it "has a version number" do
|
7
|
-
expect(described_class::VERSION).not_to
|
7
|
+
expect(described_class::VERSION).not_to be_nil
|
8
8
|
end
|
9
9
|
|
10
10
|
it "scans json" do
|
@@ -19,6 +19,9 @@ RSpec.describe JsonScanner do
|
|
19
19
|
expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
|
20
20
|
[[[6, 7, :number]], [[0, 8, :object]]]
|
21
21
|
)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "raises on invalid json" do
|
22
25
|
expect do
|
23
26
|
begin
|
24
27
|
GC.stress = true
|
@@ -34,4 +37,29 @@ RSpec.describe JsonScanner do
|
|
34
37
|
end
|
35
38
|
end.to raise_error described_class::ParseError
|
36
39
|
end
|
40
|
+
|
41
|
+
it "allows to select ranges" do
|
42
|
+
expect(
|
43
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]], false)
|
44
|
+
).to eq(
|
45
|
+
[[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]]
|
46
|
+
)
|
47
|
+
expect(
|
48
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]], false)
|
49
|
+
).to eq(
|
50
|
+
[[[2, 3, :number], [8, 9, :number]]]
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "allows only positive or -1 values" do
|
55
|
+
expect do
|
56
|
+
described_class.scan("[[1,2],[3,4]]", [[(0...-1)]], false)
|
57
|
+
end.to raise_error ArgumentError
|
58
|
+
expect do
|
59
|
+
described_class.scan("[[1,2],[3,4]]", [[(0..-2)]], false)
|
60
|
+
end.to raise_error ArgumentError
|
61
|
+
expect do
|
62
|
+
described_class.scan("[[1,2],[3,4]]", [[(-42..1)]], false)
|
63
|
+
end.to raise_error ArgumentError
|
64
|
+
end
|
37
65
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This gem uses yajl lib to scan a json string and allows you to parse
|
14
14
|
pieces of it
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- libyajl2, v2.1
|
55
55
|
- libyajl-dev, v2.1
|
56
|
-
rubygems_version: 3.
|
56
|
+
rubygems_version: 3.4.20
|
57
57
|
signing_key:
|
58
58
|
specification_version: 4
|
59
59
|
summary: Extract values from JSON without full parsing
|