json_scanner 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +69 -3
- data/ext/json_scanner/json_scanner.c +374 -76
- data/lib/json_scanner/version.rb +1 -1
- data/spec/extensiontesttask.rb +128 -0
- data/spec/json_scanner_spec.c +0 -0
- data/spec/json_scanner_spec.rb +161 -2
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9c1ddff519827bc802cdcacb5b048402706544b0882c8ac91cd1aa414c4b57e0
|
4
|
+
data.tar.gz: d1c4f41dbd71ed08a488c2f9647194fd2692c91d522fee35d28d7060bf80321c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57bf59cc9495f46675bb98d2fc7545bdc3b8392631c443ad2b89595b22be054c8f8bb268a798c5f104d1e38b73d577662f96637fd9311260c9b0a45b55044265
|
7
|
+
data.tar.gz: '055432559a23dbf34e679aac7be4967ea163684fda718b433e978c34feb73f298f1346b00629fada8279a9e49e1267b990f73d2d57c8403330faf42ce4086bb8'
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
# JsonScanner
|
4
4
|
|
5
|
-
Extract values from JSON without full parsing. This gem uses yajl
|
5
|
+
Extract values from JSON without full parsing. This gem uses the `yajl` library to scan a JSON string and allows you to parse pieces of it.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -16,6 +16,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
|
|
16
16
|
|
17
17
|
## Usage
|
18
18
|
|
19
|
+
Basic usage
|
20
|
+
|
19
21
|
```ruby
|
20
22
|
require "json"
|
21
23
|
require "json_scanner"
|
@@ -32,13 +34,77 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
|
|
32
34
|
begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
|
33
35
|
emoji_json.byteslice(begin_pos...end_pos)
|
34
36
|
# => "\"😍\""
|
35
|
-
# Note: most likely don't need `quirks_mode` option
|
36
|
-
#
|
37
|
+
# Note: You most likely don't need the `quirks_mode` option unless you are using an older version
|
38
|
+
# of Ruby with the stdlib - or just also old - version of the json gem. In newer versions, `quirks_mode` is enabled by default.
|
37
39
|
JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
38
40
|
# => "😍"
|
39
41
|
# You can also do this
|
40
42
|
# emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
|
41
43
|
# => "\"😍\""
|
44
|
+
|
45
|
+
# Ranges are supported as matchers for indexes with the following restrictions:
|
46
|
+
# - the start of a range must be positive
|
47
|
+
# - the end of a range must be positive or -1
|
48
|
+
# - a range with -1 end must be closed, e.g. (0..-1) works, but (0...-1) is forbidden
|
49
|
+
JsonScanner.scan('[0, 42, 0]', [[(1..-1)]])
|
50
|
+
# => [[[4, 6, :number], [8, 9, :number]]]
|
51
|
+
JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
|
52
|
+
# => [[[1, 2, :number], [4, 6, :number], [8, 9, :number]]]
|
53
|
+
|
54
|
+
# Special matcher JsonScanner::ANY_KEY is supported for object keys
|
55
|
+
JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
|
56
|
+
# => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
|
57
|
+
```
|
58
|
+
|
59
|
+
It supports multiple options
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
|
63
|
+
# => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
|
64
|
+
JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
|
65
|
+
# JsonScanner::ParseError (parse error: premature EOF)
|
66
|
+
# [0, 42,
|
67
|
+
# (right here) ------^
|
68
|
+
JsonScanner.scan('[0, /* answer */ 42, 0]', [[(1..-1)]], allow_comments: true)
|
69
|
+
# => [[[17, 19, :number], [21, 22, :number]]]
|
70
|
+
JsonScanner.scan("\"\x81\x83\"", [[]], dont_validate_strings: true)
|
71
|
+
# => [[[0, 4, :string]]]
|
72
|
+
JsonScanner.scan("{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]], dont_validate_strings: true, with_path: true)
|
73
|
+
# => [[[["\x81\x83"], [7, 9, :number]]]]
|
74
|
+
JsonScanner.scan('[0, 42, 0]garbage', [[(1..-1)]], allow_trailing_garbage: true)
|
75
|
+
# => [[[4, 6, :number], [8, 9, :number]]]
|
76
|
+
JsonScanner.scan('[0, 42, 0] [0, 34]', [[(1..-1)]], allow_multiple_values: true)
|
77
|
+
# => [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]]
|
78
|
+
JsonScanner.scan('[0, 42, 0', [[(1..-1)]], allow_partial_values: true)
|
79
|
+
# => [[[4, 6, :number], [8, 9, :number]]]
|
80
|
+
JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symbolize_path_keys: true)
|
81
|
+
# => [[[[:a], [6, 7, :number]]]]
|
82
|
+
```
|
83
|
+
|
84
|
+
Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
|
85
|
+
```ruby
|
86
|
+
json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
|
87
|
+
JsonScanner.scan(json_str, [["answer"]], allow_comments: true).first.map do |begin_pos, end_pos, _type|
|
88
|
+
JSON.parse(json_str.byteslice(begin_pos...end_pos), quirks_mode: true)
|
89
|
+
end
|
90
|
+
# => [{"value"=>42}]
|
91
|
+
```
|
92
|
+
|
93
|
+
You can also create a config and reuse it
|
94
|
+
|
95
|
+
```ruby
|
96
|
+
require "json_scanner"
|
97
|
+
|
98
|
+
config = JsonScanner::Config.new([[], ["key"], [(0..-1)]])
|
99
|
+
# => #<JsonScanner::Config [[], ['key'], [(0..9223372036854775807)]]>
|
100
|
+
JsonScanner.scan('{"key": "42"}', config)
|
101
|
+
# => [[[0, 13, :object]], [[8, 12, :string]], []]
|
102
|
+
JsonScanner.scan('{"key": "42"}', config, with_path: true)
|
103
|
+
# => [[[[], [0, 13, :object]]], [[["key"], [8, 12, :string]]], []]
|
104
|
+
JsonScanner.scan('[0, 42]', config)
|
105
|
+
# => [[[0, 7, :array]], [], [[1, 2, :number], [4, 6, :number]]]
|
106
|
+
JsonScanner.scan('[0, 42]', config, with_path: true)
|
107
|
+
# => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
|
42
108
|
```
|
43
109
|
|
44
110
|
## Development
|
@@ -1,8 +1,12 @@
|
|
1
1
|
#include "json_scanner.h"
|
2
2
|
|
3
3
|
VALUE rb_mJsonScanner;
|
4
|
+
VALUE rb_cJsonScannerConfig;
|
4
5
|
VALUE rb_eJsonScannerParseError;
|
5
|
-
|
6
|
+
#define BYTES_CONSUMED "bytes_consumed"
|
7
|
+
ID rb_iv_bytes_consumed;
|
8
|
+
#define SCAN_KWARGS_SIZE 8
|
9
|
+
ID scan_kwargs_table[SCAN_KWARGS_SIZE];
|
6
10
|
|
7
11
|
VALUE null_sym;
|
8
12
|
VALUE boolean_sym;
|
@@ -11,11 +15,13 @@ VALUE string_sym;
|
|
11
15
|
VALUE object_sym;
|
12
16
|
VALUE array_sym;
|
13
17
|
|
18
|
+
VALUE any_key_sym;
|
19
|
+
|
14
20
|
enum matcher_type
|
15
21
|
{
|
16
22
|
MATCHER_KEY,
|
17
23
|
MATCHER_INDEX,
|
18
|
-
|
24
|
+
MATCHER_ANY_KEY,
|
19
25
|
MATCHER_INDEX_RANGE,
|
20
26
|
// MATCHER_KEYS_LIST,
|
21
27
|
// MATCHER_KEY_REGEX,
|
@@ -70,30 +76,115 @@ typedef struct
|
|
70
76
|
typedef struct
|
71
77
|
{
|
72
78
|
int with_path;
|
73
|
-
|
79
|
+
int symbolize_path_keys;
|
74
80
|
int paths_len;
|
75
|
-
|
81
|
+
paths_t *paths;
|
76
82
|
int current_path_len;
|
77
83
|
int max_path_len;
|
84
|
+
path_elem_t *current_path;
|
78
85
|
// Easier to use a Ruby array for result than convert later
|
86
|
+
// must be supplied by the caller and RB_GC_GUARD-ed if it isn't on the stack
|
79
87
|
VALUE points_list;
|
80
88
|
// by depth
|
81
89
|
size_t *starts;
|
82
90
|
// VALUE rb_err;
|
83
91
|
yajl_handle handle;
|
92
|
+
size_t yajl_bytes_consumed;
|
84
93
|
} scan_ctx;
|
85
94
|
|
95
|
+
inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
|
96
|
+
{
|
97
|
+
return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
|
98
|
+
}
|
99
|
+
|
100
|
+
inline void scan_ctx_update_bytes_consumed(scan_ctx *ctx)
|
101
|
+
{
|
102
|
+
ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
|
103
|
+
}
|
104
|
+
|
105
|
+
void scan_ctx_debug(scan_ctx *ctx)
|
106
|
+
{
|
107
|
+
// actually might have been cleared by GC already, be careful, debug only when in valid state
|
108
|
+
VALUE points_list_inspect = ctx->points_list == Qundef ? rb_str_new_cstr("undef") : rb_sprintf("%" PRIsVALUE, rb_inspect(ctx->points_list));
|
109
|
+
fprintf(stderr, "\nscan_ctx {\n");
|
110
|
+
fprintf(stderr, " with_path: %s,\n", ctx->with_path ? "true" : "false");
|
111
|
+
fprintf(stderr, " symbolize_path_keys: %s,\n", ctx->symbolize_path_keys ? "true" : "false");
|
112
|
+
fprintf(stderr, " paths_len: %d,\n", ctx->paths_len);
|
113
|
+
|
114
|
+
fprintf(stderr, " paths: [\n");
|
115
|
+
for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
|
116
|
+
{
|
117
|
+
fprintf(stderr, " [");
|
118
|
+
for (int j = 0; j < ctx->paths[i].len; j++)
|
119
|
+
{
|
120
|
+
switch (ctx->paths[i].elems[j].type)
|
121
|
+
{
|
122
|
+
case MATCHER_KEY:
|
123
|
+
fprintf(stderr, "'%.*s'", (int)ctx->paths[i].elems[j].value.key.len, ctx->paths[i].elems[j].value.key.val);
|
124
|
+
break;
|
125
|
+
case MATCHER_INDEX:
|
126
|
+
fprintf(stderr, "%ld", ctx->paths[i].elems[j].value.index);
|
127
|
+
break;
|
128
|
+
case MATCHER_INDEX_RANGE:
|
129
|
+
fprintf(stderr, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
|
130
|
+
break;
|
131
|
+
case MATCHER_ANY_KEY:
|
132
|
+
fprintf(stderr, "('*'..'*')");
|
133
|
+
break;
|
134
|
+
}
|
135
|
+
if (j < ctx->paths[i].len - 1)
|
136
|
+
fprintf(stderr, ", ");
|
137
|
+
}
|
138
|
+
fprintf(stderr, "],\n");
|
139
|
+
}
|
140
|
+
fprintf(stderr, " ],\n");
|
141
|
+
|
142
|
+
fprintf(stderr, " current_path_len: %d,\n", ctx->current_path_len);
|
143
|
+
fprintf(stderr, " max_path_len: %d,\n", ctx->max_path_len);
|
144
|
+
fprintf(stderr, " current_path: [");
|
145
|
+
for (int i = 0; i < ctx->current_path_len; i++)
|
146
|
+
{
|
147
|
+
switch (ctx->current_path[i].type)
|
148
|
+
{
|
149
|
+
case PATH_KEY:
|
150
|
+
fprintf(stderr, "'%.*s'", (int)ctx->current_path[i].value.key.len, ctx->current_path[i].value.key.val);
|
151
|
+
break;
|
152
|
+
case PATH_INDEX:
|
153
|
+
fprintf(stderr, "%ld", ctx->current_path[i].value.index);
|
154
|
+
break;
|
155
|
+
}
|
156
|
+
if (i < ctx->current_path_len - 1)
|
157
|
+
fprintf(stderr, ", ");
|
158
|
+
}
|
159
|
+
fprintf(stderr, "],\n");
|
160
|
+
|
161
|
+
fprintf(stderr, " points_list: %.*s,\n", RSTRING_LENINT(points_list_inspect), RSTRING_PTR(points_list_inspect));
|
162
|
+
fprintf(stderr, " starts: [");
|
163
|
+
for (int i = 0; i <= ctx->max_path_len; i++)
|
164
|
+
{
|
165
|
+
fprintf(stderr, "%ld", ctx->starts[i]);
|
166
|
+
if (i < ctx->max_path_len)
|
167
|
+
fprintf(stderr, ", ");
|
168
|
+
}
|
169
|
+
fprintf(stderr, "],\n");
|
170
|
+
|
171
|
+
fprintf(stderr, " handle: %p,\n", ctx->handle);
|
172
|
+
fprintf(stderr, " yajl_bytes_consumed: %ld,\n", ctx->yajl_bytes_consumed);
|
173
|
+
fprintf(stderr, "}\n\n\n");
|
174
|
+
}
|
175
|
+
|
86
176
|
// FIXME: This will cause memory leak if ruby_xmalloc raises
|
87
|
-
|
177
|
+
// path_ary must be RB_GC_GUARD-ed by the caller
|
178
|
+
VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
|
88
179
|
{
|
89
180
|
int path_ary_len;
|
90
|
-
scan_ctx *ctx;
|
91
181
|
paths_t *paths;
|
92
182
|
// TODO: Allow to_ary and sized enumerables
|
93
183
|
rb_check_type(path_ary, T_ARRAY);
|
94
184
|
path_ary_len = rb_long2int(rb_array_len(path_ary));
|
95
185
|
// Check types early before any allocations, so exception is ok
|
96
186
|
// TODO: Fix this, just handle errors
|
187
|
+
// It's not possible that another Ruby thread changes path_ary items between these two loops, because C call holds GVL
|
97
188
|
for (int i = 0; i < path_ary_len; i++)
|
98
189
|
{
|
99
190
|
int path_len;
|
@@ -103,38 +194,42 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
|
103
194
|
for (int j = 0; j < path_len; j++)
|
104
195
|
{
|
105
196
|
VALUE entry = rb_ary_entry(path, j);
|
106
|
-
|
107
|
-
if (type == T_STRING)
|
197
|
+
switch (TYPE(entry))
|
108
198
|
{
|
199
|
+
case T_SYMBOL:
|
200
|
+
entry = rb_sym2str(entry);
|
201
|
+
/* fall through */
|
202
|
+
case T_STRING:
|
109
203
|
#if LONG_MAX > SIZE_MAX
|
110
204
|
RSTRING_LENINT(entry);
|
111
205
|
#endif
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
206
|
+
break;
|
207
|
+
case T_FIXNUM:
|
208
|
+
case T_BIGNUM:
|
209
|
+
NUM2LONG(entry);
|
210
|
+
break;
|
211
|
+
default:
|
118
212
|
{
|
119
213
|
VALUE range_beg, range_end;
|
120
214
|
long end_val;
|
121
215
|
int open_ended;
|
122
216
|
if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
|
123
|
-
|
124
|
-
if (
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
217
|
+
return rb_exc_new_cstr(rb_eArgError, "path elements must be strings, integers, or ranges");
|
218
|
+
if (range_beg != any_key_sym || range_end != any_key_sym)
|
219
|
+
{
|
220
|
+
if (NUM2LONG(range_beg) < 0L)
|
221
|
+
return rb_exc_new_cstr(rb_eArgError, "range start must be positive");
|
222
|
+
end_val = NUM2LONG(range_end);
|
223
|
+
if (end_val < -1L)
|
224
|
+
return rb_exc_new_cstr(rb_eArgError, "range end must be positive or -1");
|
225
|
+
if (end_val == -1L && open_ended)
|
226
|
+
return rb_exc_new_cstr(rb_eArgError, "range with -1 end must be closed");
|
227
|
+
}
|
228
|
+
}
|
131
229
|
}
|
132
230
|
}
|
133
231
|
}
|
134
232
|
|
135
|
-
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
136
|
-
|
137
|
-
ctx->with_path = with_path;
|
138
233
|
ctx->max_path_len = 0;
|
139
234
|
|
140
235
|
paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
|
@@ -149,9 +244,20 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
|
149
244
|
for (int j = 0; j < path_len; j++)
|
150
245
|
{
|
151
246
|
VALUE entry = rb_ary_entry(path, j);
|
152
|
-
|
153
|
-
if (type == T_STRING)
|
247
|
+
switch (TYPE(entry))
|
154
248
|
{
|
249
|
+
case T_SYMBOL:
|
250
|
+
entry = rb_sym2str(entry);
|
251
|
+
/* fall through */
|
252
|
+
case T_STRING:
|
253
|
+
{
|
254
|
+
if (string_keys != Qundef)
|
255
|
+
{
|
256
|
+
// If string_keys is provided, we need to duplicate the string
|
257
|
+
// to avoid use-after-free issues and to add the newly created string to the string_keys array
|
258
|
+
entry = rb_str_dup(entry);
|
259
|
+
rb_ary_push(string_keys, entry);
|
260
|
+
}
|
155
261
|
paths[i].elems[j].type = MATCHER_KEY;
|
156
262
|
paths[i].elems[j].value.key.val = RSTRING_PTR(entry);
|
157
263
|
#if LONG_MAX > SIZE_MAX
|
@@ -160,25 +266,36 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
|
160
266
|
paths[i].elems[j].value.key.len = RSTRING_LEN(entry);
|
161
267
|
#endif
|
162
268
|
}
|
163
|
-
|
269
|
+
break;
|
270
|
+
case T_FIXNUM:
|
271
|
+
case T_BIGNUM:
|
164
272
|
{
|
165
273
|
paths[i].elems[j].type = MATCHER_INDEX;
|
166
274
|
paths[i].elems[j].value.index = FIX2LONG(entry);
|
167
275
|
}
|
168
|
-
|
276
|
+
break;
|
277
|
+
default:
|
169
278
|
{
|
170
279
|
VALUE range_beg, range_end;
|
171
280
|
int open_ended;
|
172
|
-
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
173
281
|
rb_range_values(entry, &range_beg, &range_end, &open_ended);
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
paths[i].elems[j].value.range.
|
282
|
+
if (range_beg == any_key_sym && range_end == any_key_sym)
|
283
|
+
{
|
284
|
+
paths[i].elems[j].type = MATCHER_ANY_KEY;
|
285
|
+
}
|
286
|
+
else
|
287
|
+
{
|
288
|
+
paths[i].elems[j].type = MATCHER_INDEX_RANGE;
|
289
|
+
paths[i].elems[j].value.range.start = NUM2LONG(range_beg);
|
290
|
+
paths[i].elems[j].value.range.end = NUM2LONG(range_end);
|
291
|
+
// (value..-1) works as expected, (value...-1) is forbidden above
|
292
|
+
if (paths[i].elems[j].value.range.end == -1L)
|
293
|
+
paths[i].elems[j].value.range.end = LONG_MAX;
|
294
|
+
// -1 here is fine, so, (0...0) works just as expected - doesn't match anything
|
295
|
+
if (open_ended)
|
296
|
+
paths[i].elems[j].value.range.end--;
|
297
|
+
}
|
298
|
+
}
|
182
299
|
}
|
183
300
|
}
|
184
301
|
paths[i].len = path_len;
|
@@ -189,32 +306,37 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
|
|
189
306
|
ctx->paths_len = path_ary_len;
|
190
307
|
ctx->current_path = ruby_xmalloc2(sizeof(path_elem_t), ctx->max_path_len);
|
191
308
|
|
192
|
-
ctx->current_path_len = 0;
|
193
|
-
ctx->points_list = rb_ary_new_capa(path_ary_len);
|
194
|
-
for (int i = 0; i < path_ary_len; i++)
|
195
|
-
{
|
196
|
-
rb_ary_push(ctx->points_list, rb_ary_new());
|
197
|
-
}
|
198
|
-
|
199
309
|
ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
|
310
|
+
return Qundef; // no error
|
311
|
+
}
|
312
|
+
|
313
|
+
// resets temporary values in the config
|
314
|
+
void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symbolize_path_keys)
|
315
|
+
{
|
316
|
+
// TODO: reset matched_depth if implemented
|
317
|
+
ctx->current_path_len = 0;
|
200
318
|
// ctx->rb_err = Qnil;
|
201
319
|
ctx->handle = NULL;
|
202
|
-
|
203
|
-
|
320
|
+
ctx->yajl_bytes_consumed = 0;
|
321
|
+
ctx->points_list = points_list;
|
322
|
+
ctx->with_path = with_path;
|
323
|
+
ctx->symbolize_path_keys = symbolize_path_keys;
|
204
324
|
}
|
205
325
|
|
206
326
|
void scan_ctx_free(scan_ctx *ctx)
|
207
327
|
{
|
328
|
+
// fprintf(stderr, "scan_ctx_free\n");
|
208
329
|
if (!ctx)
|
209
330
|
return;
|
210
331
|
ruby_xfree(ctx->starts);
|
211
332
|
ruby_xfree(ctx->current_path);
|
333
|
+
if (!ctx->paths)
|
334
|
+
return;
|
212
335
|
for (int i = 0; i < ctx->paths_len; i++)
|
213
336
|
{
|
214
337
|
ruby_xfree(ctx->paths[i].elems);
|
215
338
|
}
|
216
339
|
ruby_xfree(ctx->paths);
|
217
|
-
ruby_xfree(ctx);
|
218
340
|
}
|
219
341
|
|
220
342
|
// noexcept
|
@@ -239,37 +361,38 @@ typedef enum
|
|
239
361
|
} value_type;
|
240
362
|
|
241
363
|
// noexcept
|
242
|
-
VALUE create_point(scan_ctx *sctx, value_type type, size_t length
|
364
|
+
VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
|
243
365
|
{
|
244
|
-
VALUE values[3];
|
245
|
-
|
366
|
+
VALUE values[3], point;
|
367
|
+
size_t curr_pos = scan_ctx_get_bytes_consumed(sctx);
|
368
|
+
point = rb_ary_new_capa(3);
|
246
369
|
// noexcept
|
247
|
-
values[1] =
|
370
|
+
values[1] = ULL2NUM(curr_pos);
|
248
371
|
switch (type)
|
249
372
|
{
|
250
373
|
// FIXME: size_t can be longer than ulong
|
251
374
|
case null_value:
|
252
|
-
values[0] =
|
375
|
+
values[0] = ULL2NUM(curr_pos - length);
|
253
376
|
values[2] = null_sym;
|
254
377
|
break;
|
255
378
|
case boolean_value:
|
256
|
-
values[0] =
|
379
|
+
values[0] = ULL2NUM(curr_pos - length);
|
257
380
|
values[2] = boolean_sym;
|
258
381
|
break;
|
259
382
|
case number_value:
|
260
|
-
values[0] =
|
383
|
+
values[0] = ULL2NUM(curr_pos - length);
|
261
384
|
values[2] = number_sym;
|
262
385
|
break;
|
263
386
|
case string_value:
|
264
|
-
values[0] =
|
387
|
+
values[0] = ULL2NUM(curr_pos - length);
|
265
388
|
values[2] = string_sym;
|
266
389
|
break;
|
267
390
|
case object_value:
|
268
|
-
values[0] =
|
391
|
+
values[0] = ULL2NUM(sctx->starts[sctx->current_path_len]);
|
269
392
|
values[2] = object_sym;
|
270
393
|
break;
|
271
394
|
case array_value:
|
272
|
-
values[0] =
|
395
|
+
values[0] = ULL2NUM(sctx->starts[sctx->current_path_len]);
|
273
396
|
values[2] = array_sym;
|
274
397
|
break;
|
275
398
|
}
|
@@ -288,10 +411,13 @@ VALUE create_path(scan_ctx *sctx)
|
|
288
411
|
switch (sctx->current_path[i].type)
|
289
412
|
{
|
290
413
|
case PATH_KEY:
|
291
|
-
|
414
|
+
if (sctx->symbolize_path_keys)
|
415
|
+
entry = rb_id2sym(rb_intern2(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len));
|
416
|
+
else
|
417
|
+
entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
|
292
418
|
break;
|
293
419
|
case PATH_INDEX:
|
294
|
-
entry =
|
420
|
+
entry = LONG2NUM(sctx->current_path[i].value.index);
|
295
421
|
break;
|
296
422
|
default:
|
297
423
|
entry = Qnil;
|
@@ -307,7 +433,7 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
307
433
|
// TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
|
308
434
|
// TODO: Don't re-compare already matched prefixes; hard to invalidate, though
|
309
435
|
// TODO: Might fail in case of no memory
|
310
|
-
VALUE point = Qundef;
|
436
|
+
VALUE point = Qundef, path;
|
311
437
|
int match;
|
312
438
|
for (int i = 0; i < sctx->paths_len; i++)
|
313
439
|
{
|
@@ -319,6 +445,10 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
319
445
|
{
|
320
446
|
switch (sctx->paths[i].elems[j].type)
|
321
447
|
{
|
448
|
+
case MATCHER_ANY_KEY:
|
449
|
+
if (sctx->current_path[j].type != PATH_KEY)
|
450
|
+
match = false;
|
451
|
+
break;
|
322
452
|
case MATCHER_KEY:
|
323
453
|
if (sctx->current_path[j].type != PATH_KEY ||
|
324
454
|
sctx->current_path[j].value.key.len != sctx->paths[i].elems[j].value.key.len ||
|
@@ -344,10 +474,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
|
|
344
474
|
{
|
345
475
|
if (point == Qundef)
|
346
476
|
{
|
347
|
-
point = create_point(sctx, type, length
|
477
|
+
point = create_point(sctx, type, length);
|
348
478
|
if (sctx->with_path)
|
349
479
|
{
|
350
|
-
|
480
|
+
path = create_path(sctx);
|
481
|
+
point = rb_ary_new_from_args(2, path, point);
|
351
482
|
}
|
352
483
|
}
|
353
484
|
// rb_ary_push raises only in case of a frozen array, which is not the case
|
@@ -411,7 +542,7 @@ int scan_on_start_object(void *ctx)
|
|
411
542
|
return true;
|
412
543
|
}
|
413
544
|
increment_arr_index(sctx);
|
414
|
-
sctx->starts[sctx->current_path_len] =
|
545
|
+
sctx->starts[sctx->current_path_len] = scan_ctx_get_bytes_consumed(sctx) - 1;
|
415
546
|
if (sctx->current_path_len < sctx->max_path_len)
|
416
547
|
sctx->current_path[sctx->current_path_len].type = PATH_KEY;
|
417
548
|
sctx->current_path_len++;
|
@@ -451,7 +582,7 @@ int scan_on_start_array(void *ctx)
|
|
451
582
|
return true;
|
452
583
|
}
|
453
584
|
increment_arr_index(sctx);
|
454
|
-
sctx->starts[sctx->current_path_len] =
|
585
|
+
sctx->starts[sctx->current_path_len] = scan_ctx_get_bytes_consumed(sctx) - 1;
|
455
586
|
if (sctx->current_path_len < sctx->max_path_len)
|
456
587
|
{
|
457
588
|
sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
|
@@ -471,6 +602,107 @@ int scan_on_end_array(void *ctx)
|
|
471
602
|
return true;
|
472
603
|
}
|
473
604
|
|
605
|
+
void config_free(void *data)
|
606
|
+
{
|
607
|
+
scan_ctx_free((scan_ctx *)data);
|
608
|
+
ruby_xfree(data);
|
609
|
+
}
|
610
|
+
|
611
|
+
size_t config_size(const void *data)
|
612
|
+
{
|
613
|
+
// see ObjectSpace.memsize_of
|
614
|
+
scan_ctx *ctx = (scan_ctx *)data;
|
615
|
+
size_t res = sizeof(scan_ctx);
|
616
|
+
// current_path
|
617
|
+
if (ctx->current_path != NULL)
|
618
|
+
res += ctx->max_path_len * sizeof(path_elem_t);
|
619
|
+
// starts
|
620
|
+
if (ctx->starts != NULL)
|
621
|
+
res += ctx->max_path_len * sizeof(size_t);
|
622
|
+
if (ctx->paths != NULL)
|
623
|
+
{
|
624
|
+
res += ctx->paths_len * sizeof(paths_t);
|
625
|
+
for (int i = 0; i < ctx->paths_len; i++)
|
626
|
+
{
|
627
|
+
res += ctx->paths[i].len * sizeof(path_matcher_elem_t);
|
628
|
+
}
|
629
|
+
}
|
630
|
+
return res;
|
631
|
+
}
|
632
|
+
|
633
|
+
static const rb_data_type_t config_type = {
|
634
|
+
.wrap_struct_name = "json_scanner_config",
|
635
|
+
.function = {
|
636
|
+
.dfree = config_free,
|
637
|
+
.dsize = config_size,
|
638
|
+
},
|
639
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
640
|
+
};
|
641
|
+
|
642
|
+
VALUE config_alloc(VALUE self)
|
643
|
+
{
|
644
|
+
scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
|
645
|
+
ctx->paths = NULL;
|
646
|
+
ctx->paths_len = 0;
|
647
|
+
ctx->current_path = NULL;
|
648
|
+
ctx->max_path_len = 0;
|
649
|
+
ctx->starts = NULL;
|
650
|
+
scan_ctx_reset(ctx, Qundef, false, false);
|
651
|
+
return TypedData_Wrap_Struct(self, &config_type, ctx);
|
652
|
+
}
|
653
|
+
|
654
|
+
VALUE config_m_initialize(VALUE self, VALUE path_ary)
|
655
|
+
{
|
656
|
+
scan_ctx *ctx;
|
657
|
+
VALUE scan_ctx_init_err, string_keys;
|
658
|
+
TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
|
659
|
+
string_keys = rb_ary_new();
|
660
|
+
scan_ctx_init_err = scan_ctx_init(ctx, path_ary, string_keys);
|
661
|
+
if (scan_ctx_init_err != Qundef)
|
662
|
+
{
|
663
|
+
rb_exc_raise(scan_ctx_init_err);
|
664
|
+
}
|
665
|
+
rb_iv_set(self, "string_keys", string_keys);
|
666
|
+
return self;
|
667
|
+
}
|
668
|
+
|
669
|
+
VALUE config_m_inspect(VALUE self)
|
670
|
+
{
|
671
|
+
scan_ctx *ctx;
|
672
|
+
VALUE res;
|
673
|
+
TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
|
674
|
+
res = rb_sprintf("#<%" PRIsVALUE " [", rb_class_name(CLASS_OF(self)));
|
675
|
+
for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
|
676
|
+
{
|
677
|
+
rb_str_cat_cstr(res, "[");
|
678
|
+
for (int j = 0; j < ctx->paths[i].len; j++)
|
679
|
+
{
|
680
|
+
switch (ctx->paths[i].elems[j].type)
|
681
|
+
{
|
682
|
+
case MATCHER_KEY:
|
683
|
+
rb_str_catf(res, "'%.*s'", (int)ctx->paths[i].elems[j].value.key.len, ctx->paths[i].elems[j].value.key.val);
|
684
|
+
break;
|
685
|
+
case MATCHER_INDEX:
|
686
|
+
rb_str_catf(res, "%ld", ctx->paths[i].elems[j].value.index);
|
687
|
+
break;
|
688
|
+
case MATCHER_INDEX_RANGE:
|
689
|
+
rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
|
690
|
+
break;
|
691
|
+
case MATCHER_ANY_KEY:
|
692
|
+
rb_str_cat_cstr(res, "('*'..'*')");
|
693
|
+
break;
|
694
|
+
}
|
695
|
+
if (j < ctx->paths[i].len - 1)
|
696
|
+
rb_str_cat_cstr(res, ", ");
|
697
|
+
}
|
698
|
+
rb_str_cat_cstr(res, "]");
|
699
|
+
if (i < ctx->paths_len - 1)
|
700
|
+
rb_str_cat_cstr(res, ", ");
|
701
|
+
}
|
702
|
+
rb_str_cat_cstr(res, "]>");
|
703
|
+
return res;
|
704
|
+
}
|
705
|
+
|
474
706
|
static yajl_callbacks scan_callbacks = {
|
475
707
|
scan_on_null,
|
476
708
|
scan_on_boolean,
|
@@ -492,15 +724,16 @@ static yajl_callbacks scan_callbacks = {
|
|
492
724
|
VALUE scan(int argc, VALUE *argv, VALUE self)
|
493
725
|
{
|
494
726
|
VALUE json_str, path_ary, with_path_flag, kwargs;
|
495
|
-
VALUE kwargs_values[
|
727
|
+
VALUE kwargs_values[SCAN_KWARGS_SIZE];
|
496
728
|
|
497
|
-
int with_path = false, verbose_error = false;
|
729
|
+
int with_path = false, verbose_error = false, symbolize_path_keys = false;
|
498
730
|
char *json_text;
|
499
731
|
size_t json_text_len;
|
500
732
|
yajl_handle handle;
|
501
733
|
yajl_status stat;
|
502
734
|
scan_ctx *ctx;
|
503
|
-
|
735
|
+
int free_ctx = true;
|
736
|
+
VALUE err_msg = Qnil, bytes_consumed, err, result;
|
504
737
|
// Turned out callbacks can't raise exceptions
|
505
738
|
// VALUE callback_err;
|
506
739
|
#if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
|
@@ -512,11 +745,13 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
512
745
|
with_path = RTEST(with_path_flag);
|
513
746
|
if (kwargs != Qnil)
|
514
747
|
{
|
515
|
-
rb_get_kwargs(kwargs, scan_kwargs_table, 0,
|
748
|
+
rb_get_kwargs(kwargs, scan_kwargs_table, 0, SCAN_KWARGS_SIZE, kwargs_values);
|
516
749
|
if (kwargs_values[0] != Qundef)
|
517
750
|
with_path = RTEST(kwargs_values[0]);
|
518
751
|
if (kwargs_values[1] != Qundef)
|
519
752
|
verbose_error = RTEST(kwargs_values[1]);
|
753
|
+
if (kwargs_values[7] != Qundef)
|
754
|
+
symbolize_path_keys = RTEST(kwargs_values[7]);
|
520
755
|
}
|
521
756
|
rb_check_type(json_str, T_STRING);
|
522
757
|
json_text = RSTRING_PTR(json_str);
|
@@ -525,7 +760,30 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
525
760
|
#else
|
526
761
|
json_text_len = RSTRING_LEN(json_str);
|
527
762
|
#endif
|
528
|
-
|
763
|
+
if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerConfig))
|
764
|
+
{
|
765
|
+
free_ctx = false;
|
766
|
+
TypedData_Get_Struct(path_ary, scan_ctx, &config_type, ctx);
|
767
|
+
}
|
768
|
+
else
|
769
|
+
{
|
770
|
+
VALUE scan_ctx_init_err;
|
771
|
+
ctx = ruby_xmalloc(sizeof(scan_ctx));
|
772
|
+
scan_ctx_init_err = scan_ctx_init(ctx, path_ary, Qundef);
|
773
|
+
if (scan_ctx_init_err != Qundef)
|
774
|
+
{
|
775
|
+
ruby_xfree(ctx);
|
776
|
+
rb_exc_raise(scan_ctx_init_err);
|
777
|
+
}
|
778
|
+
}
|
779
|
+
// Need to keep a ref to result array on the stack to prevent it from being GC-ed
|
780
|
+
result = rb_ary_new_capa(ctx->paths_len);
|
781
|
+
for (int i = 0; i < ctx->paths_len; i++)
|
782
|
+
{
|
783
|
+
rb_ary_push(result, rb_ary_new());
|
784
|
+
}
|
785
|
+
scan_ctx_reset(ctx, result, with_path, symbolize_path_keys);
|
786
|
+
// scan_ctx_debug(ctx);
|
529
787
|
|
530
788
|
handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
|
531
789
|
if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
|
@@ -543,24 +801,55 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
543
801
|
}
|
544
802
|
ctx->handle = handle;
|
545
803
|
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
804
|
+
scan_ctx_update_bytes_consumed(ctx);
|
546
805
|
if (stat == yajl_status_ok)
|
806
|
+
{
|
547
807
|
stat = yajl_complete_parse(handle);
|
808
|
+
scan_ctx_update_bytes_consumed(ctx);
|
809
|
+
}
|
548
810
|
|
549
811
|
if (stat != yajl_status_ok)
|
550
812
|
{
|
551
813
|
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
|
552
|
-
|
814
|
+
err_msg = rb_utf8_str_new_cstr(str);
|
815
|
+
// TODO: maybe use scan_ctx_get_bytes_consumed here too? But it makes difference in premature EOF
|
816
|
+
bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
|
553
817
|
yajl_free_error(handle, (unsigned char *)str);
|
554
818
|
}
|
819
|
+
// // Needed when yajl_allow_partial_values is set
|
820
|
+
// if (ctx->current_path_len > 0)
|
821
|
+
// {
|
822
|
+
// if (ctx->current_path_len > ctx->max_path_len)
|
823
|
+
// ctx->current_path_len = ctx->max_path_len;
|
824
|
+
// for (int i = ctx->current_path_len - 1; i > 0; i--)
|
825
|
+
// {
|
826
|
+
// switch (ctx->current_path[i].type)
|
827
|
+
// {
|
828
|
+
// case PATH_KEY:
|
829
|
+
// scan_on_end_object(ctx);
|
830
|
+
// break;
|
831
|
+
// case PATH_INDEX:
|
832
|
+
// scan_on_end_array(ctx);
|
833
|
+
// break;
|
834
|
+
// }
|
835
|
+
// }
|
836
|
+
// }
|
555
837
|
// callback_err = ctx->rb_err;
|
556
|
-
|
557
|
-
|
838
|
+
if (free_ctx)
|
839
|
+
{
|
840
|
+
// fprintf(stderr, "free_ctx\n");
|
841
|
+
scan_ctx_free(ctx);
|
842
|
+
ruby_xfree(ctx);
|
843
|
+
}
|
558
844
|
yajl_free(handle);
|
559
|
-
if (
|
560
|
-
|
845
|
+
if (err_msg != Qnil)
|
846
|
+
{
|
847
|
+
err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
|
848
|
+
rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
|
849
|
+
rb_exc_raise(err);
|
850
|
+
}
|
561
851
|
// if (callback_err != Qnil)
|
562
852
|
// rb_exc_raise(callback_err);
|
563
|
-
// TODO: report yajl_get_bytes_consumed(handle)
|
564
853
|
return result;
|
565
854
|
}
|
566
855
|
|
@@ -568,8 +857,16 @@ RUBY_FUNC_EXPORTED void
|
|
568
857
|
Init_json_scanner(void)
|
569
858
|
{
|
570
859
|
rb_mJsonScanner = rb_define_module("JsonScanner");
|
860
|
+
rb_cJsonScannerConfig = rb_define_class_under(rb_mJsonScanner, "Config", rb_cObject);
|
861
|
+
rb_define_alloc_func(rb_cJsonScannerConfig, config_alloc);
|
862
|
+
rb_define_method(rb_cJsonScannerConfig, "initialize", config_m_initialize, 1);
|
863
|
+
rb_define_method(rb_cJsonScannerConfig, "inspect", config_m_inspect, 0);
|
571
864
|
rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
|
865
|
+
any_key_sym = rb_id2sym(rb_intern("*"));
|
866
|
+
rb_define_const(rb_mJsonScanner, "ANY_KEY", rb_range_new(any_key_sym, any_key_sym, false));
|
572
867
|
rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
|
868
|
+
rb_define_attr(rb_eJsonScannerParseError, BYTES_CONSUMED, true, false);
|
869
|
+
rb_iv_bytes_consumed = rb_intern("@" BYTES_CONSUMED);
|
573
870
|
rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
|
574
871
|
null_sym = rb_id2sym(rb_intern("null"));
|
575
872
|
boolean_sym = rb_id2sym(rb_intern("boolean"));
|
@@ -584,4 +881,5 @@ Init_json_scanner(void)
|
|
584
881
|
scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
|
585
882
|
scan_kwargs_table[5] = rb_intern("allow_multiple_values");
|
586
883
|
scan_kwargs_table[6] = rb_intern("allow_partial_values");
|
884
|
+
scan_kwargs_table[7] = rb_intern("symbolize_path_keys");
|
587
885
|
}
|
data/lib/json_scanner/version.rb
CHANGED
@@ -0,0 +1,128 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake/clean"
|
4
|
+
require "rake/extensiontask"
|
5
|
+
|
6
|
+
module Rake
|
7
|
+
class ExtensionTestTask < ExtensionTask
|
8
|
+
#
|
9
|
+
# The C files to compile.
|
10
|
+
#
|
11
|
+
attr_accessor :c_spec_files
|
12
|
+
|
13
|
+
#
|
14
|
+
# The folders where includes for the test files are.
|
15
|
+
#
|
16
|
+
# Default: %w{/usr/include /usr/include/google}
|
17
|
+
#
|
18
|
+
attr_accessor :test_includes
|
19
|
+
|
20
|
+
#
|
21
|
+
# The libraries to link against.
|
22
|
+
#
|
23
|
+
# Default: %w{cmockery}
|
24
|
+
#
|
25
|
+
attr_accessor :test_libraries
|
26
|
+
|
27
|
+
#
|
28
|
+
# The folders where the libraries are
|
29
|
+
#
|
30
|
+
# Default: %w{/usr/lib}
|
31
|
+
#
|
32
|
+
attr_accessor :test_lib_folders
|
33
|
+
|
34
|
+
def initialize(*args, &block)
|
35
|
+
super
|
36
|
+
@c_spec_files = []
|
37
|
+
@test_includes = %w[/usr/include /usr/include/google]
|
38
|
+
@test_libraries = %w[cmockery]
|
39
|
+
@test_lib_folders = %w[/usr/lib]
|
40
|
+
init_test_tasks(
|
41
|
+
"#{@tmp_dir}/test", "compile:#{@name}:test",
|
42
|
+
"spec:c:#{@name}", "spec:valgrind:#{@name}", "spec:gdb:#{@name}",
|
43
|
+
)
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
|
48
|
+
def includes
|
49
|
+
@includes ||= (@test_includes + [
|
50
|
+
".",
|
51
|
+
"../../#{@ext_dir}",
|
52
|
+
"/usr/include/ruby-#{RUBY_VERSION}",
|
53
|
+
"/usr/include/ruby-#{RUBY_VERSION}/#{RUBY_PLATFORM}",
|
54
|
+
]).map { |l| "-I#{l}" }.join(" ")
|
55
|
+
end
|
56
|
+
|
57
|
+
def libraries
|
58
|
+
@libraries ||= (@test_libraries + %w[ruby pthread crypto]).map { |l| "-l#{l}" }.join(" ")
|
59
|
+
end
|
60
|
+
|
61
|
+
def lib_folders
|
62
|
+
@lib_folders ||= (@test_lib_folders + %w[/usr/lib .]).map { |l| "-L#{l}" }.join(" ")
|
63
|
+
end
|
64
|
+
|
65
|
+
def compile_tests
|
66
|
+
# compile the test sources
|
67
|
+
FileList["*.c"].each do |cfile|
|
68
|
+
sh "gcc -g #{includes} -c #{cfile}"
|
69
|
+
end
|
70
|
+
|
71
|
+
source_objects = FileList["../#{RUBY_PLATFORM}/#{@name}/#{RUBY_VERSION}/*.o"]
|
72
|
+
# link the executables
|
73
|
+
FileList["*.o"].each do |ofile|
|
74
|
+
sh "gcc -g #{lib_folders} #{libraries} #{source_objects} #{ofile} -o #{ofile.ext}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def init_compile_task(compile_dir, compile_task)
|
79
|
+
directory compile_dir
|
80
|
+
desc "Compile #{@name} tests"
|
81
|
+
task compile_task => ["compile:#{@name}", compile_dir] do
|
82
|
+
# copy the test files into the compilation folder
|
83
|
+
@c_spec_files.each { |file| cp file, compile_dir }
|
84
|
+
|
85
|
+
# start compilation
|
86
|
+
chdir(compile_dir) { compile_tests }
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def init_valgrind_task(compile_dir, compile_task, valgrind_task)
|
91
|
+
desc "Execute valgrind for a #{@name} test"
|
92
|
+
task valgrind_task => [compile_task] do |_t, args|
|
93
|
+
sh "valgrind --num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no " \
|
94
|
+
"--leak-check=full #{compile_dir}/#{args.test}"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def init_gdb_task(compile_dir, compile_task, gdb_task)
|
99
|
+
desc "Execute gdb for a #{@name} test"
|
100
|
+
task gdb_task => [compile_task] do |_t, args|
|
101
|
+
sh "gdb #{compile_dir}/#{args.test}"
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
def init_test_task(compile_dir, compile_task, test_task)
|
106
|
+
desc "Test #{@name}"
|
107
|
+
task test_task => [compile_task] do |_t, args|
|
108
|
+
if args.test
|
109
|
+
sh "#{compile_dir}/#{args.test}"
|
110
|
+
else
|
111
|
+
FileList["#{compile_dir}/*.o"].each do |ofile|
|
112
|
+
sh ofile.ext.to_s
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
def init_test_tasks(compile_dir, compile_task, test_task, valgrind_task, gdb_task)
|
119
|
+
init_compile_task(compile_dir, compile_task)
|
120
|
+
init_valgrind_task(compile_dir, compile_task, valgrind_task)
|
121
|
+
init_gdb_task(compile_dir, compile_task, gdb_task)
|
122
|
+
init_test_task(compile_dir, compile_task, test_task)
|
123
|
+
|
124
|
+
desc "Test all C extensions"
|
125
|
+
task "spec:c" => [test_task]
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
File without changes
|
data/spec/json_scanner_spec.rb
CHANGED
@@ -22,6 +22,34 @@ RSpec.describe JsonScanner do
|
|
22
22
|
)
|
23
23
|
end
|
24
24
|
|
25
|
+
it "supports 'symbolize_path_keys'" do
|
26
|
+
expect(
|
27
|
+
described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true),
|
28
|
+
).to eq([[[%w[a b], [12, 13, :number]]]])
|
29
|
+
expect(
|
30
|
+
described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true, symbolize_path_keys: true),
|
31
|
+
).to eq([[[%i[a b], [12, 13, :number]]]])
|
32
|
+
end
|
33
|
+
|
34
|
+
it "supports any key selector" do
|
35
|
+
expect(
|
36
|
+
described_class.scan(
|
37
|
+
'[{"a":1,"b":2},{"c":3,"d":4},[5]]',
|
38
|
+
[[described_class::ANY_INDEX, described_class::ANY_KEY]],
|
39
|
+
),
|
40
|
+
).to eq(
|
41
|
+
[[[6, 7, :number], [12, 13, :number], [20, 21, :number], [26, 27, :number]]],
|
42
|
+
)
|
43
|
+
expect(
|
44
|
+
described_class.scan(
|
45
|
+
'{"a":[1,2],"b":{"c":3}}',
|
46
|
+
[[described_class::ANY_KEY, described_class::ANY_INDEX]],
|
47
|
+
),
|
48
|
+
).to eq(
|
49
|
+
[[[6, 7, :number], [8, 9, :number]]],
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
25
53
|
it "works with max path len correctly" do
|
26
54
|
expect(
|
27
55
|
described_class.scan('{"a": [1]}', [[], ["a"]]),
|
@@ -90,11 +118,24 @@ RSpec.describe JsonScanner do
|
|
90
118
|
expect do
|
91
119
|
described_class.scan "{1}", [], verbose_error: true
|
92
120
|
end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
|
121
|
+
expect do
|
122
|
+
described_class.scan("[0, 42,", [[(1..-1)]], verbose_error: true)
|
123
|
+
end.to raise_error described_class::ParseError, /parse error: premature EOF.*\[0, 42,.*\(right here\) ------\^/m
|
124
|
+
end
|
125
|
+
|
126
|
+
it "includes bytes consumed in the exception" do
|
127
|
+
expect do
|
128
|
+
described_class.scan("[[1,2],,[3,4]]", [])
|
129
|
+
end.to(
|
130
|
+
raise_error(described_class::ParseError) do |exc|
|
131
|
+
expect(exc.bytes_consumed).to eq(8)
|
132
|
+
end,
|
133
|
+
)
|
93
134
|
end
|
94
135
|
|
95
136
|
it "allows to return an actual path to the element" do
|
96
137
|
with_path_expected_res = [
|
97
|
-
# result for first
|
138
|
+
# result for first matcher, each element array of two items:
|
98
139
|
# array of path elements and 3-element array start,end,type
|
99
140
|
[[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
|
100
141
|
[
|
@@ -128,7 +169,7 @@ RSpec.describe JsonScanner do
|
|
128
169
|
),
|
129
170
|
).to eq(
|
130
171
|
[
|
131
|
-
# result for first
|
172
|
+
# result for first matcher, each element 3-element array start,end,type
|
132
173
|
[[1, 6, :array], [7, 12, :array]],
|
133
174
|
[
|
134
175
|
[2, 3, :number], [4, 5, :number],
|
@@ -190,4 +231,122 @@ RSpec.describe JsonScanner do
|
|
190
231
|
described_class.scan(json, [[]])
|
191
232
|
end.to raise_error(described_class::ParseError)
|
192
233
|
end
|
234
|
+
|
235
|
+
context "with yajl params" do
|
236
|
+
it "supports 'allow_comments'" do
|
237
|
+
params = ["[0, /* answer */ 42, 0]", [[(1..-1)]]]
|
238
|
+
expect(described_class.scan(*params, allow_comments: true)).to eq(
|
239
|
+
[[[17, 19, :number], [21, 22, :number]]],
|
240
|
+
)
|
241
|
+
expect do
|
242
|
+
described_class.scan(*params)
|
243
|
+
end.to raise_error(described_class::ParseError)
|
244
|
+
end
|
245
|
+
|
246
|
+
it "supports 'dont_validate_strings'" do
|
247
|
+
params = ["\"\x81\x83\"", [[]]]
|
248
|
+
expect(described_class.scan(*params, dont_validate_strings: true)).to eq(
|
249
|
+
[[[0, 4, :string]]],
|
250
|
+
)
|
251
|
+
expect do
|
252
|
+
described_class.scan(*params)
|
253
|
+
end.to raise_error(described_class::ParseError)
|
254
|
+
params = ["{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]]]
|
255
|
+
expect(described_class.scan(*params, dont_validate_strings: true, with_path: true)).to eq(
|
256
|
+
[[[["\x81\x83".dup.force_encoding(Encoding::BINARY)], [7, 9, :number]]]],
|
257
|
+
)
|
258
|
+
expect do
|
259
|
+
described_class.scan(*params, with_path: true)
|
260
|
+
end.to raise_error(described_class::ParseError)
|
261
|
+
end
|
262
|
+
|
263
|
+
it "supports 'allow_trailing_garbage'" do
|
264
|
+
params = ["[0, 42, 0]garbage", [[(1..-1)]]]
|
265
|
+
expect(described_class.scan(*params, allow_trailing_garbage: true)).to eq(
|
266
|
+
[[[4, 6, :number], [8, 9, :number]]],
|
267
|
+
)
|
268
|
+
expect do
|
269
|
+
described_class.scan(*params)
|
270
|
+
end.to raise_error(described_class::ParseError)
|
271
|
+
end
|
272
|
+
|
273
|
+
it "supports 'allow_multiple_values'" do
|
274
|
+
params = ["[0, 42, 0] [0, 34]", [[(1..-1)]]]
|
275
|
+
expect(described_class.scan(*params, allow_multiple_values: true)).to eq(
|
276
|
+
[[[4, 6, :number], [8, 9, :number], [16, 18, :number]]],
|
277
|
+
)
|
278
|
+
expect do
|
279
|
+
described_class.scan(*params)
|
280
|
+
end.to raise_error(described_class::ParseError)
|
281
|
+
end
|
282
|
+
|
283
|
+
it "handles multiple top-level values correctly with 'allow_multiple_values'" do
|
284
|
+
expect(described_class.scan("[0, 42, 0] [0, 34]", [[]], allow_multiple_values: true)).to eq(
|
285
|
+
[[[0, 10, :array], [12, 19, :array]]],
|
286
|
+
)
|
287
|
+
expect(described_class.scan('{"42": 34} [0, 34]', [[]], allow_multiple_values: true)).to eq(
|
288
|
+
[[[0, 10, :object], [12, 19, :array]]],
|
289
|
+
)
|
290
|
+
expect(described_class.scan('[0, 42, 0] {"42": 34}', [[]], allow_multiple_values: true)).to eq(
|
291
|
+
[[[0, 10, :array], [12, 22, :object]]],
|
292
|
+
)
|
293
|
+
expect(described_class.scan('{"42": 34} {"0": 34}', [[]], allow_multiple_values: true)).to eq(
|
294
|
+
[[[0, 10, :object], [12, 21, :object]]],
|
295
|
+
)
|
296
|
+
end
|
297
|
+
|
298
|
+
it "supports 'allow_partial_values'" do
|
299
|
+
params = ["[0, 42, 0,", [[(1..-1)]]]
|
300
|
+
expect(described_class.scan(*params, allow_partial_values: true)).to eq(
|
301
|
+
[[[4, 6, :number], [8, 9, :number]]],
|
302
|
+
)
|
303
|
+
expect do
|
304
|
+
described_class.scan(*params)
|
305
|
+
end.to raise_error(described_class::ParseError)
|
306
|
+
expect(described_class.scan("[0, 42, 0", [[(1..-1)]], allow_partial_values: true)).to eq(
|
307
|
+
[[[4, 6, :number], [8, 9, :number]]],
|
308
|
+
)
|
309
|
+
expect(described_class.scan("[0, 42, true", [[(1..-1)]], allow_partial_values: true)).to eq(
|
310
|
+
[[[4, 6, :number], [8, 12, :boolean]]],
|
311
|
+
)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
|
315
|
+
describe described_class::Config do
|
316
|
+
it "saves state" do
|
317
|
+
key = "abracadabra".dup
|
318
|
+
conf = described_class.new [[], [key]]
|
319
|
+
key["cad"] = 0.chr
|
320
|
+
key = nil # rubocop:disable Lint/UselessAssignment
|
321
|
+
GC.start
|
322
|
+
expect(
|
323
|
+
10.times.map do
|
324
|
+
JsonScanner.scan '{"abracadabra": 10}', conf, with_path: true
|
325
|
+
end.uniq,
|
326
|
+
).to eq([[[[[], [0, 19, :object]]], [[["abracadabra"], [16, 18, :number]]]]])
|
327
|
+
expect(
|
328
|
+
10.times.map do
|
329
|
+
JsonScanner.scan '{"abracadabra": 10}', conf
|
330
|
+
end.uniq,
|
331
|
+
).to eq([[[[0, 19, :object]], [[16, 18, :number]]]])
|
332
|
+
end
|
333
|
+
|
334
|
+
it "re-raises exceptions" do
|
335
|
+
expect do
|
336
|
+
described_class.new [[(0...-1)]]
|
337
|
+
end.to raise_error ArgumentError
|
338
|
+
expect do
|
339
|
+
described_class.new [[(0..-2)]]
|
340
|
+
end.to raise_error ArgumentError
|
341
|
+
expect do
|
342
|
+
described_class.new [[(-42..1)]]
|
343
|
+
end.to raise_error ArgumentError
|
344
|
+
end
|
345
|
+
|
346
|
+
it "supports inspect" do
|
347
|
+
expect(
|
348
|
+
described_class.new([[], ["abracadabra", JsonScanner::ANY_INDEX], [42, JsonScanner::ANY_KEY]]).inspect,
|
349
|
+
).to eq("#<JsonScanner::Config [[], ['abracadabra', (0..9223372036854775807)], [42, ('*'..'*')]]>")
|
350
|
+
end
|
351
|
+
end
|
193
352
|
end
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: This gem uses yajl lib to scan a
|
13
|
+
description: This gem uses the yajl lib to scan a JSON string and allows you to parse
|
14
14
|
pieces of it
|
15
15
|
email:
|
16
16
|
- uvlad7@gmail.com
|
@@ -26,6 +26,8 @@ files:
|
|
26
26
|
- lib/json_scanner.rb
|
27
27
|
- lib/json_scanner/version.rb
|
28
28
|
- sig/json_scanner.rbs
|
29
|
+
- spec/extensiontesttask.rb
|
30
|
+
- spec/json_scanner_spec.c
|
29
31
|
- spec/json_scanner_spec.rb
|
30
32
|
- spec/spec_helper.rb
|
31
33
|
homepage: https://github.com/uvlad7/json_scanner
|