json_scanner 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +6 -1
- data/ext/json_scanner/json_scanner.c +47 -29
- data/lib/json_scanner/version.rb +1 -1
- data/spec/json_scanner_spec.rb +29 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ffddd81459c088040c2ddee0006b1c9172e8a15aa8bab987dc59743e697f49f
|
4
|
+
data.tar.gz: 73b69bbcddaaf6711b2563787c4cbde1d258bf1d28b1e694714fd782c42d4c2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 44a153e578da606f67399a09387cf26d63b1528ad9e6bb258083435202f094ebc95e3a2c895a83334eb2aeef4ea9bfbe0f0c38374ac76f5772beea6bc3910a0f
|
7
|
+
data.tar.gz: ff9bcbc934fafc4857faf926bca5f5c6c82ec357107b8d6e5f2c0a7e9e622afe3a19d4b0a3f4a987d2e965432c73615203afd1c3e1ac6a52965555b20ef2f377
|
data/README.md
CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
|
|
32
32
|
begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
|
33
33
|
emoji_json.byteslice(begin_pos...end_pos)
|
34
34
|
# => "\"😍\""
|
35
|
-
|
35
|
+
# Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
|
36
|
+
# with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
|
37
|
+
JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
38
|
+
# => "😍"
|
39
|
+
# You can also do this
|
40
|
+
# emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
|
36
41
|
# => "\"😍\""
|
37
42
|
```
|
38
43
|
|
@@ -16,9 +16,9 @@ enum matcher_type
|
|
16
16
|
MATCHER_KEY,
|
17
17
|
MATCHER_INDEX,
|
18
18
|
// MATCHER_ANY_KEY,
|
19
|
-
// MATCHER_ANY_INDEX,
|
20
19
|
MATCHER_INDEX_RANGE,
|
21
20
|
// MATCHER_KEYS_LIST,
|
21
|
+
// MATCHER_KEY_REGEX,
|
22
22
|
};
|
23
23
|
|
24
24
|
enum path_type
|
@@ -86,16 +86,20 @@ typedef struct
|
|
86
86
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
87
87
|
scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
88
88
|
{
|
89
|
+
int path_ary_len;
|
90
|
+
scan_ctx *ctx;
|
91
|
+
paths_t *paths;
|
89
92
|
// TODO: Allow to_ary and sized enumerables
|
90
93
|
rb_check_type(path_ary, T_ARRAY);
|
91
|
-
|
94
|
+
path_ary_len = rb_long2int(rb_array_len(path_ary));
|
92
95
|
// Check types early before any allocations, so exception is ok
|
93
96
|
// TODO: Fix this, just handle errors
|
94
97
|
for (int i = 0; i < path_ary_len; i++)
|
95
98
|
{
|
99
|
+
int path_len;
|
96
100
|
VALUE path = rb_ary_entry(path_ary, i);
|
97
101
|
rb_check_type(path, T_ARRAY);
|
98
|
-
|
102
|
+
path_len = rb_long2int(rb_array_len(path));
|
99
103
|
for (int j = 0; j < path_len; j++)
|
100
104
|
{
|
101
105
|
VALUE entry = rb_ary_entry(path, j);
|
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
113
117
|
else
|
114
118
|
{
|
115
119
|
VALUE range_beg, range_end;
|
120
|
+
long end_val;
|
116
121
|
int open_ended;
|
117
122
|
if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
|
118
123
|
rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
|
119
|
-
RB_NUM2LONG(range_beg)
|
120
|
-
|
124
|
+
if (RB_NUM2LONG(range_beg) < 0L)
|
125
|
+
rb_raise(rb_eArgError, "range start must be positive");
|
126
|
+
end_val = RB_NUM2LONG(range_end);
|
127
|
+
if (end_val < -1L)
|
128
|
+
rb_raise(rb_eArgError, "range end must be positive or -1");
|
129
|
+
if (end_val == -1L && open_ended)
|
130
|
+
rb_raise(rb_eArgError, "range with -1 end must be closed");
|
121
131
|
}
|
122
132
|
}
|
123
133
|
}
|
124
134
|
|
125
|
-
|
135
|
+
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
126
136
|
|
127
|
-
ctx->with_path =
|
137
|
+
ctx->with_path = RTEST(with_path);
|
128
138
|
ctx->max_path_len = 0;
|
129
139
|
|
130
|
-
|
140
|
+
paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
|
131
141
|
for (int i = 0; i < path_ary_len; i++)
|
132
142
|
{
|
143
|
+
int path_len;
|
133
144
|
VALUE path = rb_ary_entry(path_ary, i);
|
134
|
-
|
145
|
+
path_len = rb_long2int(rb_array_len(path));
|
135
146
|
if (path_len > ctx->max_path_len)
|
136
147
|
ctx->max_path_len = path_len;
|
137
148
|
paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
|
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
|
|
156
167
|
}
|
157
168
|
else
|
158
169
|
{
|
159
|
-
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
160
170
|
VALUE range_beg, range_end;
|
161
171
|
int open_ended;
|
172
|
+
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
162
173
|
rb_range_values(entry, &range_beg, &range_end, &open_ended);
|
163
174
|
paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
|
164
175
|
paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
|
176
|
+
// (value..-1) works as expected, (value...-1) is forbidden above
|
177
|
+
if (paths[i].elems[j].value.range.end == -1L)
|
178
|
+
paths[i].elems[j].value.range.end = LONG_MAX;
|
179
|
+
// -1 here is fine, so, (0...0) works just as expected - doesn't match anything
|
165
180
|
if (open_ended)
|
166
181
|
paths[i].elems[j].value.range.end--;
|
167
182
|
}
|
@@ -226,8 +241,8 @@ typedef enum
|
|
226
241
|
// noexcept
|
227
242
|
void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
|
228
243
|
{
|
229
|
-
*point = rb_ary_new_capa(3);
|
230
244
|
VALUE values[3];
|
245
|
+
*point = rb_ary_new_capa(3);
|
231
246
|
// noexcept
|
232
247
|
values[1] = RB_ULONG2NUM(curr_pos);
|
233
248
|
switch (type)
|
@@ -268,12 +283,13 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
268
283
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
269
284
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
270
285
|
VALUE point = Qundef;
|
286
|
+
int match;
|
271
287
|
for (int i = 0; i < sctx->paths_len; i++)
|
272
288
|
{
|
273
289
|
if (sctx->paths[i].len != sctx->current_path_len)
|
274
290
|
continue;
|
275
291
|
|
276
|
-
|
292
|
+
match = true;
|
277
293
|
for (int j = 0; j < sctx->current_path_len; j++)
|
278
294
|
{
|
279
295
|
switch (sctx->paths[i].elems[j].type)
|
@@ -383,7 +399,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
|
|
383
399
|
return true;
|
384
400
|
// Can't be called without scan_on_start_object being called before
|
385
401
|
// So current_path_len at least 1 and key.type is set to PATH_KEY;
|
386
|
-
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)
|
402
|
+
sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
|
387
403
|
sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
|
388
404
|
return true;
|
389
405
|
}
|
@@ -446,37 +462,39 @@ static yajl_callbacks scan_callbacks = {
|
|
446
462
|
// TODO: make with_path optional kw: `with_path: false`
|
447
463
|
VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
|
448
464
|
{
|
449
|
-
|
450
|
-
|
451
|
-
#if LONG_MAX > SIZE_MAX
|
452
|
-
size_t json_text_len = RSTRING_LENINT(json_str);
|
453
|
-
#else
|
454
|
-
size_t json_text_len = RSTRING_LEN(json_str);
|
455
|
-
#endif
|
465
|
+
char *json_text;
|
466
|
+
size_t json_text_len;
|
456
467
|
yajl_handle handle;
|
457
|
-
// TODO
|
458
|
-
int opt_verbose_error = 0;
|
459
468
|
yajl_status stat;
|
460
|
-
scan_ctx *ctx
|
461
|
-
VALUE err = Qnil;
|
462
|
-
VALUE result;
|
469
|
+
scan_ctx *ctx;
|
470
|
+
VALUE err = Qnil, result;
|
463
471
|
// Turned out callbacks can't raise exceptions
|
464
472
|
// VALUE callback_err;
|
473
|
+
// TODO
|
474
|
+
int opt_verbose_error = 0;
|
475
|
+
rb_check_type(json_str, T_STRING);
|
476
|
+
json_text = RSTRING_PTR(json_str);
|
477
|
+
#if LONG_MAX > SIZE_MAX
|
478
|
+
json_text_len = RSTRING_LENINT(json_str);
|
479
|
+
#else
|
480
|
+
json_text_len = RSTRING_LEN(json_str);
|
481
|
+
#endif
|
482
|
+
ctx = scan_ctx_init(path_ary, with_path);
|
465
483
|
|
466
484
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
467
485
|
ctx->handle = handle;
|
468
486
|
// TODO: make it configurable
|
469
487
|
// yajl_config(handle, yajl_allow_comments, true);
|
470
488
|
// yajl_config(handle, yajl_allow_trailing_garbage, true);
|
471
|
-
stat = yajl_parse(handle, (unsigned char *)
|
489
|
+
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
472
490
|
if (stat == yajl_status_ok)
|
473
491
|
stat = yajl_complete_parse(handle);
|
474
492
|
|
475
493
|
if (stat != yajl_status_ok)
|
476
494
|
{
|
477
|
-
char *str = (char *)
|
495
|
+
char *str = (char *)yajl_get_error(handle, opt_verbose_error, (unsigned char *)json_text, json_text_len);
|
478
496
|
err = rb_str_new_cstr(str);
|
479
|
-
yajl_free_error(handle, (unsigned char *)
|
497
|
+
yajl_free_error(handle, (unsigned char *)str);
|
480
498
|
}
|
481
499
|
// callback_err = ctx->rb_err;
|
482
500
|
result = ctx->points_list;
|
@@ -494,7 +512,7 @@ RUBY_FUNC_EXPORTED void
|
|
494
512
|
Init_json_scanner(void)
|
495
513
|
{
|
496
514
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
497
|
-
rb_define_const(rb_mJsonScanner, "
|
515
|
+
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
498
516
|
rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
|
499
517
|
rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
|
500
518
|
rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
|
data/lib/json_scanner/version.rb
CHANGED
data/spec/json_scanner_spec.rb
CHANGED
@@ -4,7 +4,7 @@ require_relative "spec_helper"
|
|
4
4
|
|
5
5
|
RSpec.describe JsonScanner do
|
6
6
|
it "has a version number" do
|
7
|
-
expect(described_class::VERSION).not_to
|
7
|
+
expect(described_class::VERSION).not_to be_nil
|
8
8
|
end
|
9
9
|
|
10
10
|
it "scans json" do
|
@@ -19,6 +19,9 @@ RSpec.describe JsonScanner do
|
|
19
19
|
expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
|
20
20
|
[[[6, 7, :number]], [[0, 8, :object]]]
|
21
21
|
)
|
22
|
+
end
|
23
|
+
|
24
|
+
it "raises on invalid json" do
|
22
25
|
expect do
|
23
26
|
begin
|
24
27
|
GC.stress = true
|
@@ -34,4 +37,29 @@ RSpec.describe JsonScanner do
|
|
34
37
|
end
|
35
38
|
end.to raise_error described_class::ParseError
|
36
39
|
end
|
40
|
+
|
41
|
+
it "allows to select ranges" do
|
42
|
+
expect(
|
43
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]], false)
|
44
|
+
).to eq(
|
45
|
+
[[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]]
|
46
|
+
)
|
47
|
+
expect(
|
48
|
+
described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]], false)
|
49
|
+
).to eq(
|
50
|
+
[[[2, 3, :number], [8, 9, :number]]]
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
it "allows only positive or -1 values" do
|
55
|
+
expect do
|
56
|
+
described_class.scan("[[1,2],[3,4]]", [[(0...-1)]], false)
|
57
|
+
end.to raise_error ArgumentError
|
58
|
+
expect do
|
59
|
+
described_class.scan("[[1,2],[3,4]]", [[(0..-2)]], false)
|
60
|
+
end.to raise_error ArgumentError
|
61
|
+
expect do
|
62
|
+
described_class.scan("[[1,2],[3,4]]", [[(-42..1)]], false)
|
63
|
+
end.to raise_error ArgumentError
|
64
|
+
end
|
37
65
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-12-
|
11
|
+
date: 2024-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: This gem uses yajl lib to scan a json string and allows you to parse
|
14
14
|
pieces of it
|
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
53
|
requirements:
|
54
54
|
- libyajl2, v2.1
|
55
55
|
- libyajl-dev, v2.1
|
56
|
-
rubygems_version: 3.
|
56
|
+
rubygems_version: 3.4.20
|
57
57
|
signing_key:
|
58
58
|
specification_version: 4
|
59
59
|
summary: Extract values from JSON without full parsing
|