json_scanner 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
4
- data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
3
+ metadata.gz: ca9d160b389a5b605a37baeebd8e9d704a9b42712b9c8de9d0667ffa7e6b3d35
4
+ data.tar.gz: f2c0362e6bb4484e47fd5dbdda4afd7332ce42d266b4d0ebe681f24e18c31b0a
5
5
  SHA512:
6
- metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
7
- data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
6
+ metadata.gz: 91d190291d0e2c16db38c523a58fbbff23cde136481b2c4a19446d25169a77390eee4792979cd3a22724bdf2c89132b5dc3ccc78c1553c17fe21227a70e1bf3c
7
+ data.tar.gz: dc704ac0ba609f209883d5af50f3f9cabcde04a0eb2c3f5ac04f964010bf866fd727c5c175dd40917526b693f13bc4b532f7a4fabce9859c0b44238cf3fa421b
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # JsonScanner
4
4
 
5
- Extract values from JSON without full parsing. This gem uses yajl lib to scan a json string and allows you to parse pieces of it.
5
+ Extract values from JSON without full parsing. This gem uses the `yajl` library to scan a JSON string and allows you to parse pieces of it.
6
6
 
7
7
  ## Installation
8
8
 
@@ -16,6 +16,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
16
16
 
17
17
  ## Usage
18
18
 
19
+ Basic usage
20
+
19
21
  ```ruby
20
22
  require "json"
21
23
  require "json_scanner"
@@ -32,15 +34,123 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
34
  begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
35
  emoji_json.byteslice(begin_pos...end_pos)
34
36
  # => "\"😍\""
35
- # Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
36
- # with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
37
+ # Note: You most likely don't need the `quirks_mode` option unless you are using an older version
38
+ # of Ruby with the stdlib - or just also old - version of the json gem. In newer versions, `quirks_mode` is enabled by default.
37
39
  JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
38
40
  # => "😍"
39
41
  # You can also do this
40
42
  # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
41
43
  # => "\"😍\""
44
+
45
+ # Ranges are supported as matchers for indexes with the following restrictions:
46
+ # - the start of a range must be positive
47
+ # - the end of a range must be positive or -1
48
+ # - a range with -1 end must be closed, e.g. (0..-1) works, but (0...-1) is forbidden
49
+ JsonScanner.scan('[0, 42, 0]', [[(1..-1)]])
50
+ # => [[[4, 6, :number], [8, 9, :number]]]
51
+ JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
52
+ # => [[[1, 2, :number], [4, 6, :number], [8, 9, :number]]]
53
+
54
+ # Special matcher JsonScanner::ANY_KEY is supported for object keys
55
+ JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
56
+ # => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
57
+ # Regex mathers aren't supported yet, but you can simulate it using `with_path` option
58
+ JsonScanner.scan(
59
+ '{"question1": 1, "answer": 42, "question2": 2}',
60
+ [[JsonScanner::ANY_KEY]], with_path: true,
61
+ ).map do |res|
62
+ res.map do |path, (begin_pos, end_pos, type)|
63
+ [begin_pos, end_pos, type] if path[0] =~ /\Aquestion/
64
+ end.compact
65
+ end
66
+ # => [[[14, 15, :number], [44, 45, :number]]]
67
+ ```
68
+
69
+ ## Options
70
+
71
+ `JsonScanner` supports multiple options
72
+
73
+ ```ruby
74
+ JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
75
+ # => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
76
+ JsonScanner.scan('[0, 42],', [[(1..-1)]], verbose_error: true)
77
+ # JsonScanner::ParseError (parse error: trailing garbage)
78
+ # [0, 42],
79
+ # (right here) ------^
80
+ # Note: the 'right here' pointer is wrong in case of a premature EOF error, it's a bug of the libyajl
81
+ JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
82
+ # JsonScanner::ParseError (parse error: premature EOF)
83
+ # [0, 42,
84
+ # (right here) ------^
85
+ JsonScanner.scan('[0, /* answer */ 42, 0]', [[(1..-1)]], allow_comments: true)
86
+ # => [[[17, 19, :number], [21, 22, :number]]]
87
+ JsonScanner.scan("\"\x81\x83\"", [[]], dont_validate_strings: true)
88
+ # => [[[0, 4, :string]]]
89
+ JsonScanner.scan("{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]], dont_validate_strings: true, with_path: true)
90
+ # => [[[["\x81\x83"], [7, 9, :number]]]]
91
+ JsonScanner.scan('[0, 42, 0]garbage', [[(1..-1)]], allow_trailing_garbage: true)
92
+ # => [[[4, 6, :number], [8, 9, :number]]]
93
+ JsonScanner.scan('[0, 42, 0] [0, 34]', [[(1..-1)]], allow_multiple_values: true)
94
+ # => [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]]
95
+ JsonScanner.scan('[0, 42, 0', [[(1..-1)]], allow_partial_values: true)
96
+ # => [[[4, 6, :number], [8, 9, :number]]]
97
+ JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symbolize_path_keys: true)
98
+ # => [[[[:a], [6, 7, :number]]]]
42
99
  ```
43
100
 
101
+ ### Comments in the JSON
102
+
103
+ Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
104
+ ```ruby
105
+ json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
106
+ JsonScanner.scan(json_str, [["answer"]], allow_comments: true).first.map do |begin_pos, end_pos, _type|
107
+ JSON.parse(json_str.byteslice(begin_pos...end_pos), quirks_mode: true)
108
+ end
109
+ # => [{"value"=>42}]
110
+ ```
111
+
112
+ ### Find the end of a JSON string
113
+
114
+ `allow_trailing_garbage` option may come in handy if you want to extract a JSON string from a JS text
115
+ ```ruby
116
+ script_text = <<~'JS'
117
+ <script>window.__APOLLO_STATE__={"ContentItem:0":{"__typename":"ContentItem","id":0, "configurationType":"NO_CONFIGURATION","replacementPartsUrl":null,"relatedCategories":[{"__ref":"Category:109450"},{"__ref":"Category:82044355"},{"__ref":"Category:109441"},{"__ref":"Category:109442"},{"__ref":"Category:109449"},{"__ref":"Category:109444"},{"__ref":"Category:82043730"}],"recommendedOptions":[]}};window.__APPVERSION__=7018;window.__CONFIG_ENV__={value: 'PRODUCTION'};</script>
118
+ JS
119
+ json_with_trailing_garbage = script_text[/__APOLLO_STATE__\s*=\s*({.+)/, 1]
120
+ json_end_pos = JsonScanner.scan(json_with_trailing_garbage, [[]], allow_trailing_garbage: true).first.first[1]
121
+ apollo_state = JSON.parse(json_with_trailing_garbage[0...json_end_pos])
122
+ ```
123
+
124
+ ## Reuse configuration
125
+
126
+ You can create a `JsonScanner::Config` instance and reuse it between `JsonScanner.scan` calls
127
+
128
+ ```ruby
129
+ require "json_scanner"
130
+
131
+ config = JsonScanner::Config.new([[], ["key"], [(0..-1)]])
132
+ # => #<JsonScanner::Config [[], ['key'], [(0..9223372036854775807)]]>
133
+ JsonScanner.scan('{"key": "42"}', config)
134
+ # => [[[0, 13, :object]], [[8, 12, :string]], []]
135
+ JsonScanner.scan('{"key": "42"}', config, with_path: true)
136
+ # => [[[[], [0, 13, :object]]], [[["key"], [8, 12, :string]]], []]
137
+ JsonScanner.scan('[0, 42]', config)
138
+ # => [[[0, 7, :array]], [], [[1, 2, :number], [4, 6, :number]]]
139
+ JsonScanner.scan('[0, 42]', config, with_path: true)
140
+ # => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
141
+ ```
142
+
143
+ Options can be passed as a hash, even on Ruby 3
144
+ ```ruby
145
+ options = { allow_trailing_garbage: true, allow_partial_values: true }
146
+ JsonScanner.scan('[0, 42', [[1]], options) == JsonScanner.scan('[0, 42]_', [[1]], options)
147
+ # => true
148
+ ```
149
+
150
+ ## Streaming mode
151
+
152
+ Streaming mode isn't supported yet, as it's harder to implement and to use. I plan to add it in the future, its API is a subject to discussion. If you have suggestions, use cases, or preferences for how it should behave, I’d love to hear from you!
153
+
44
154
  ## Development
45
155
 
46
156
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -1,8 +1,12 @@
1
1
  #include "json_scanner.h"
2
2
 
3
3
  VALUE rb_mJsonScanner;
4
+ VALUE rb_cJsonScannerConfig;
4
5
  VALUE rb_eJsonScannerParseError;
5
- ID scan_kwargs_table[7];
6
+ #define BYTES_CONSUMED "bytes_consumed"
7
+ ID rb_iv_bytes_consumed;
8
+ #define SCAN_KWARGS_SIZE 8
9
+ ID scan_kwargs_table[SCAN_KWARGS_SIZE];
6
10
 
7
11
  VALUE null_sym;
8
12
  VALUE boolean_sym;
@@ -11,11 +15,13 @@ VALUE string_sym;
11
15
  VALUE object_sym;
12
16
  VALUE array_sym;
13
17
 
18
+ VALUE any_key_sym;
19
+
14
20
  enum matcher_type
15
21
  {
16
22
  MATCHER_KEY,
17
23
  MATCHER_INDEX,
18
- // MATCHER_ANY_KEY,
24
+ MATCHER_ANY_KEY,
19
25
  MATCHER_INDEX_RANGE,
20
26
  // MATCHER_KEYS_LIST,
21
27
  // MATCHER_KEY_REGEX,
@@ -70,30 +76,115 @@ typedef struct
70
76
  typedef struct
71
77
  {
72
78
  int with_path;
73
- paths_t *paths;
79
+ int symbolize_path_keys;
74
80
  int paths_len;
75
- path_elem_t *current_path;
81
+ paths_t *paths;
76
82
  int current_path_len;
77
83
  int max_path_len;
84
+ path_elem_t *current_path;
78
85
  // Easier to use a Ruby array for result than convert later
86
+ // must be supplied by the caller and RB_GC_GUARD-ed if it isn't on the stack
79
87
  VALUE points_list;
80
88
  // by depth
81
89
  size_t *starts;
82
90
  // VALUE rb_err;
83
91
  yajl_handle handle;
92
+ size_t yajl_bytes_consumed;
84
93
  } scan_ctx;
85
94
 
95
+ inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
96
+ {
97
+ return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
98
+ }
99
+
100
+ inline void scan_ctx_save_bytes_consumed(scan_ctx *ctx)
101
+ {
102
+ ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
103
+ }
104
+
105
+ void scan_ctx_debug(scan_ctx *ctx)
106
+ {
107
+ // actually might have been cleared by GC already, be careful, debug only when in valid state
108
+ VALUE points_list_inspect = ctx->points_list == Qundef ? rb_str_new_cstr("undef") : rb_sprintf("%" PRIsVALUE, rb_inspect(ctx->points_list));
109
+ fprintf(stderr, "\nscan_ctx {\n");
110
+ fprintf(stderr, " with_path: %s,\n", ctx->with_path ? "true" : "false");
111
+ fprintf(stderr, " symbolize_path_keys: %s,\n", ctx->symbolize_path_keys ? "true" : "false");
112
+ fprintf(stderr, " paths_len: %d,\n", ctx->paths_len);
113
+
114
+ fprintf(stderr, " paths: [\n");
115
+ for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
116
+ {
117
+ fprintf(stderr, " [");
118
+ for (int j = 0; j < ctx->paths[i].len; j++)
119
+ {
120
+ switch (ctx->paths[i].elems[j].type)
121
+ {
122
+ case MATCHER_KEY:
123
+ fprintf(stderr, "'%.*s'", (int)ctx->paths[i].elems[j].value.key.len, ctx->paths[i].elems[j].value.key.val);
124
+ break;
125
+ case MATCHER_INDEX:
126
+ fprintf(stderr, "%ld", ctx->paths[i].elems[j].value.index);
127
+ break;
128
+ case MATCHER_INDEX_RANGE:
129
+ fprintf(stderr, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
130
+ break;
131
+ case MATCHER_ANY_KEY:
132
+ fprintf(stderr, "('*'..'*')");
133
+ break;
134
+ }
135
+ if (j < ctx->paths[i].len - 1)
136
+ fprintf(stderr, ", ");
137
+ }
138
+ fprintf(stderr, "],\n");
139
+ }
140
+ fprintf(stderr, " ],\n");
141
+
142
+ fprintf(stderr, " current_path_len: %d,\n", ctx->current_path_len);
143
+ fprintf(stderr, " max_path_len: %d,\n", ctx->max_path_len);
144
+ fprintf(stderr, " current_path: [");
145
+ for (int i = 0; i < ctx->current_path_len; i++)
146
+ {
147
+ switch (ctx->current_path[i].type)
148
+ {
149
+ case PATH_KEY:
150
+ fprintf(stderr, "'%.*s'", (int)ctx->current_path[i].value.key.len, ctx->current_path[i].value.key.val);
151
+ break;
152
+ case PATH_INDEX:
153
+ fprintf(stderr, "%ld", ctx->current_path[i].value.index);
154
+ break;
155
+ }
156
+ if (i < ctx->current_path_len - 1)
157
+ fprintf(stderr, ", ");
158
+ }
159
+ fprintf(stderr, "],\n");
160
+
161
+ fprintf(stderr, " points_list: %.*s,\n", RSTRING_LENINT(points_list_inspect), RSTRING_PTR(points_list_inspect));
162
+ fprintf(stderr, " starts: [");
163
+ for (int i = 0; i <= ctx->max_path_len; i++)
164
+ {
165
+ fprintf(stderr, "%ld", ctx->starts[i]);
166
+ if (i < ctx->max_path_len)
167
+ fprintf(stderr, ", ");
168
+ }
169
+ fprintf(stderr, "],\n");
170
+
171
+ fprintf(stderr, " handle: %p,\n", ctx->handle);
172
+ fprintf(stderr, " yajl_bytes_consumed: %ld,\n", ctx->yajl_bytes_consumed);
173
+ fprintf(stderr, "}\n\n\n");
174
+ }
175
+
86
176
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
- scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
177
+ // path_ary must be RB_GC_GUARD-ed by the caller
178
+ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
88
179
  {
89
180
  int path_ary_len;
90
- scan_ctx *ctx;
91
181
  paths_t *paths;
92
182
  // TODO: Allow to_ary and sized enumerables
93
183
  rb_check_type(path_ary, T_ARRAY);
94
184
  path_ary_len = rb_long2int(rb_array_len(path_ary));
95
185
  // Check types early before any allocations, so exception is ok
96
186
  // TODO: Fix this, just handle errors
187
+ // It's not possible that another Ruby thread changes path_ary items between these two loops, because C call holds GVL
97
188
  for (int i = 0; i < path_ary_len; i++)
98
189
  {
99
190
  int path_len;
@@ -103,38 +194,42 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
103
194
  for (int j = 0; j < path_len; j++)
104
195
  {
105
196
  VALUE entry = rb_ary_entry(path, j);
106
- int type = TYPE(entry);
107
- if (type == T_STRING)
197
+ switch (TYPE(entry))
108
198
  {
199
+ case T_SYMBOL:
200
+ entry = rb_sym2str(entry);
201
+ /* fall through */
202
+ case T_STRING:
109
203
  #if LONG_MAX > SIZE_MAX
110
204
  RSTRING_LENINT(entry);
111
205
  #endif
112
- }
113
- else if (type == T_FIXNUM || type == T_BIGNUM)
114
- {
115
- RB_NUM2LONG(entry);
116
- }
117
- else
206
+ break;
207
+ case T_FIXNUM:
208
+ case T_BIGNUM:
209
+ NUM2LONG(entry);
210
+ break;
211
+ default:
118
212
  {
119
213
  VALUE range_beg, range_end;
120
214
  long end_val;
121
215
  int open_ended;
122
216
  if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
123
- rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
124
- if (RB_NUM2LONG(range_beg) < 0L)
125
- rb_raise(rb_eArgError, "range start must be positive");
126
- end_val = RB_NUM2LONG(range_end);
127
- if (end_val < -1L)
128
- rb_raise(rb_eArgError, "range end must be positive or -1");
129
- if (end_val == -1L && open_ended)
130
- rb_raise(rb_eArgError, "range with -1 end must be closed");
217
+ return rb_exc_new_cstr(rb_eArgError, "path elements must be strings, integers, or ranges");
218
+ if (range_beg != any_key_sym || range_end != any_key_sym)
219
+ {
220
+ if (NUM2LONG(range_beg) < 0L)
221
+ return rb_exc_new_cstr(rb_eArgError, "range start must be positive");
222
+ end_val = NUM2LONG(range_end);
223
+ if (end_val < -1L)
224
+ return rb_exc_new_cstr(rb_eArgError, "range end must be positive or -1");
225
+ if (end_val == -1L && open_ended)
226
+ return rb_exc_new_cstr(rb_eArgError, "range with -1 end must be closed");
227
+ }
228
+ }
131
229
  }
132
230
  }
133
231
  }
134
232
 
135
- ctx = ruby_xmalloc(sizeof(scan_ctx));
136
-
137
- ctx->with_path = with_path;
138
233
  ctx->max_path_len = 0;
139
234
 
140
235
  paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
@@ -149,9 +244,21 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
149
244
  for (int j = 0; j < path_len; j++)
150
245
  {
151
246
  VALUE entry = rb_ary_entry(path, j);
152
- int type = TYPE(entry);
153
- if (type == T_STRING)
247
+ switch (TYPE(entry))
154
248
  {
249
+ case T_SYMBOL:
250
+ entry = rb_sym2str(entry);
251
+ /* fall through */
252
+ case T_STRING:
253
+ {
254
+ if (string_keys != Qundef)
255
+ {
256
+ // If string_keys is provided, we need to duplicate the string
257
+ // to avoid use-after-free issues and to add the newly created string to the string_keys array.
258
+ // In Ruby 2.2 and newer symbols can be GC-ed, so we need to duplicate them as well.
259
+ entry = rb_str_dup(entry);
260
+ rb_ary_push(string_keys, entry);
261
+ }
155
262
  paths[i].elems[j].type = MATCHER_KEY;
156
263
  paths[i].elems[j].value.key.val = RSTRING_PTR(entry);
157
264
  #if LONG_MAX > SIZE_MAX
@@ -160,25 +267,36 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
160
267
  paths[i].elems[j].value.key.len = RSTRING_LEN(entry);
161
268
  #endif
162
269
  }
163
- else if (type == T_FIXNUM || type == T_BIGNUM)
270
+ break;
271
+ case T_FIXNUM:
272
+ case T_BIGNUM:
164
273
  {
165
274
  paths[i].elems[j].type = MATCHER_INDEX;
166
275
  paths[i].elems[j].value.index = FIX2LONG(entry);
167
276
  }
168
- else
277
+ break;
278
+ default:
169
279
  {
170
280
  VALUE range_beg, range_end;
171
281
  int open_ended;
172
- paths[i].elems[j].type = MATCHER_INDEX_RANGE;
173
282
  rb_range_values(entry, &range_beg, &range_end, &open_ended);
174
- paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
175
- paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
176
- // (value..-1) works as expected, (value...-1) is forbidden above
177
- if (paths[i].elems[j].value.range.end == -1L)
178
- paths[i].elems[j].value.range.end = LONG_MAX;
179
- // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
180
- if (open_ended)
181
- paths[i].elems[j].value.range.end--;
283
+ if (range_beg == any_key_sym && range_end == any_key_sym)
284
+ {
285
+ paths[i].elems[j].type = MATCHER_ANY_KEY;
286
+ }
287
+ else
288
+ {
289
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
290
+ paths[i].elems[j].value.range.start = NUM2LONG(range_beg);
291
+ paths[i].elems[j].value.range.end = NUM2LONG(range_end);
292
+ // (value..-1) works as expected, (value...-1) is forbidden above
293
+ if (paths[i].elems[j].value.range.end == -1L)
294
+ paths[i].elems[j].value.range.end = LONG_MAX;
295
+ // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
296
+ if (open_ended)
297
+ paths[i].elems[j].value.range.end--;
298
+ }
299
+ }
182
300
  }
183
301
  }
184
302
  paths[i].len = path_len;
@@ -189,32 +307,37 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
189
307
  ctx->paths_len = path_ary_len;
190
308
  ctx->current_path = ruby_xmalloc2(sizeof(path_elem_t), ctx->max_path_len);
191
309
 
192
- ctx->current_path_len = 0;
193
- ctx->points_list = rb_ary_new_capa(path_ary_len);
194
- for (int i = 0; i < path_ary_len; i++)
195
- {
196
- rb_ary_push(ctx->points_list, rb_ary_new());
197
- }
198
-
199
310
  ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
311
+ return Qundef; // no error
312
+ }
313
+
314
+ // resets temporary values in the config
315
+ void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symbolize_path_keys)
316
+ {
317
+ // TODO: reset matched_depth if implemented
318
+ ctx->current_path_len = 0;
200
319
  // ctx->rb_err = Qnil;
201
320
  ctx->handle = NULL;
202
-
203
- return ctx;
321
+ ctx->yajl_bytes_consumed = 0;
322
+ ctx->points_list = points_list;
323
+ ctx->with_path = with_path;
324
+ ctx->symbolize_path_keys = symbolize_path_keys;
204
325
  }
205
326
 
206
327
  void scan_ctx_free(scan_ctx *ctx)
207
328
  {
329
+ // fprintf(stderr, "scan_ctx_free\n");
208
330
  if (!ctx)
209
331
  return;
210
332
  ruby_xfree(ctx->starts);
211
333
  ruby_xfree(ctx->current_path);
334
+ if (!ctx->paths)
335
+ return;
212
336
  for (int i = 0; i < ctx->paths_len; i++)
213
337
  {
214
338
  ruby_xfree(ctx->paths[i].elems);
215
339
  }
216
340
  ruby_xfree(ctx->paths);
217
- ruby_xfree(ctx);
218
341
  }
219
342
 
220
343
  // noexcept
@@ -239,37 +362,38 @@ typedef enum
239
362
  } value_type;
240
363
 
241
364
  // noexcept
242
- VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
365
+ VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
243
366
  {
244
- VALUE values[3];
245
- VALUE point = rb_ary_new_capa(3);
367
+ VALUE values[3], point;
368
+ size_t curr_pos = scan_ctx_get_bytes_consumed(sctx);
369
+ point = rb_ary_new_capa(3);
246
370
  // noexcept
247
- values[1] = RB_ULONG2NUM(curr_pos);
371
+ values[1] = ULL2NUM(curr_pos);
248
372
  switch (type)
249
373
  {
250
374
  // FIXME: size_t can be longer than ulong
251
375
  case null_value:
252
- values[0] = RB_ULONG2NUM(curr_pos - length);
376
+ values[0] = ULL2NUM(curr_pos - length);
253
377
  values[2] = null_sym;
254
378
  break;
255
379
  case boolean_value:
256
- values[0] = RB_ULONG2NUM(curr_pos - length);
380
+ values[0] = ULL2NUM(curr_pos - length);
257
381
  values[2] = boolean_sym;
258
382
  break;
259
383
  case number_value:
260
- values[0] = RB_ULONG2NUM(curr_pos - length);
384
+ values[0] = ULL2NUM(curr_pos - length);
261
385
  values[2] = number_sym;
262
386
  break;
263
387
  case string_value:
264
- values[0] = RB_ULONG2NUM(curr_pos - length);
388
+ values[0] = ULL2NUM(curr_pos - length);
265
389
  values[2] = string_sym;
266
390
  break;
267
391
  case object_value:
268
- values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
392
+ values[0] = ULL2NUM(sctx->starts[sctx->current_path_len]);
269
393
  values[2] = object_sym;
270
394
  break;
271
395
  case array_value:
272
- values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
396
+ values[0] = ULL2NUM(sctx->starts[sctx->current_path_len]);
273
397
  values[2] = array_sym;
274
398
  break;
275
399
  }
@@ -288,10 +412,13 @@ VALUE create_path(scan_ctx *sctx)
288
412
  switch (sctx->current_path[i].type)
289
413
  {
290
414
  case PATH_KEY:
291
- entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
415
+ if (sctx->symbolize_path_keys)
416
+ entry = rb_id2sym(rb_intern2(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len));
417
+ else
418
+ entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
292
419
  break;
293
420
  case PATH_INDEX:
294
- entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
421
+ entry = LONG2NUM(sctx->current_path[i].value.index);
295
422
  break;
296
423
  default:
297
424
  entry = Qnil;
@@ -307,7 +434,7 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
307
434
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
308
435
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
309
436
  // TODO: Might fail in case of no memory
310
- VALUE point = Qundef;
437
+ VALUE point = Qundef, path;
311
438
  int match;
312
439
  for (int i = 0; i < sctx->paths_len; i++)
313
440
  {
@@ -319,6 +446,10 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
319
446
  {
320
447
  switch (sctx->paths[i].elems[j].type)
321
448
  {
449
+ case MATCHER_ANY_KEY:
450
+ if (sctx->current_path[j].type != PATH_KEY)
451
+ match = false;
452
+ break;
322
453
  case MATCHER_KEY:
323
454
  if (sctx->current_path[j].type != PATH_KEY ||
324
455
  sctx->current_path[j].value.key.len != sctx->paths[i].elems[j].value.key.len ||
@@ -344,10 +475,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
344
475
  {
345
476
  if (point == Qundef)
346
477
  {
347
- point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
478
+ point = create_point(sctx, type, length);
348
479
  if (sctx->with_path)
349
480
  {
350
- point = rb_ary_new_from_args(2, create_path(sctx), point);
481
+ path = create_path(sctx);
482
+ point = rb_ary_new_from_args(2, path, point);
351
483
  }
352
484
  }
353
485
  // rb_ary_push raises only in case of a frozen array, which is not the case
@@ -411,7 +543,7 @@ int scan_on_start_object(void *ctx)
411
543
  return true;
412
544
  }
413
545
  increment_arr_index(sctx);
414
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
546
+ sctx->starts[sctx->current_path_len] = scan_ctx_get_bytes_consumed(sctx) - 1;
415
547
  if (sctx->current_path_len < sctx->max_path_len)
416
548
  sctx->current_path[sctx->current_path_len].type = PATH_KEY;
417
549
  sctx->current_path_len++;
@@ -451,7 +583,7 @@ int scan_on_start_array(void *ctx)
451
583
  return true;
452
584
  }
453
585
  increment_arr_index(sctx);
454
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
586
+ sctx->starts[sctx->current_path_len] = scan_ctx_get_bytes_consumed(sctx) - 1;
455
587
  if (sctx->current_path_len < sctx->max_path_len)
456
588
  {
457
589
  sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
@@ -471,6 +603,107 @@ int scan_on_end_array(void *ctx)
471
603
  return true;
472
604
  }
473
605
 
606
+ void config_free(void *data)
607
+ {
608
+ scan_ctx_free((scan_ctx *)data);
609
+ ruby_xfree(data);
610
+ }
611
+
612
+ size_t config_size(const void *data)
613
+ {
614
+ // see ObjectSpace.memsize_of
615
+ scan_ctx *ctx = (scan_ctx *)data;
616
+ size_t res = sizeof(scan_ctx);
617
+ // current_path
618
+ if (ctx->current_path != NULL)
619
+ res += ctx->max_path_len * sizeof(path_elem_t);
620
+ // starts
621
+ if (ctx->starts != NULL)
622
+ res += ctx->max_path_len * sizeof(size_t);
623
+ if (ctx->paths != NULL)
624
+ {
625
+ res += ctx->paths_len * sizeof(paths_t);
626
+ for (int i = 0; i < ctx->paths_len; i++)
627
+ {
628
+ res += ctx->paths[i].len * sizeof(path_matcher_elem_t);
629
+ }
630
+ }
631
+ return res;
632
+ }
633
+
634
+ static const rb_data_type_t config_type = {
635
+ .wrap_struct_name = "json_scanner_config",
636
+ .function = {
637
+ .dfree = config_free,
638
+ .dsize = config_size,
639
+ },
640
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
641
+ };
642
+
643
+ VALUE config_alloc(VALUE self)
644
+ {
645
+ scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
646
+ ctx->paths = NULL;
647
+ ctx->paths_len = 0;
648
+ ctx->current_path = NULL;
649
+ ctx->max_path_len = 0;
650
+ ctx->starts = NULL;
651
+ scan_ctx_reset(ctx, Qundef, false, false);
652
+ return TypedData_Wrap_Struct(self, &config_type, ctx);
653
+ }
654
+
655
+ VALUE config_m_initialize(VALUE self, VALUE path_ary)
656
+ {
657
+ scan_ctx *ctx;
658
+ VALUE scan_ctx_init_err, string_keys;
659
+ TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
660
+ string_keys = rb_ary_new();
661
+ scan_ctx_init_err = scan_ctx_init(ctx, path_ary, string_keys);
662
+ if (scan_ctx_init_err != Qundef)
663
+ {
664
+ rb_exc_raise(scan_ctx_init_err);
665
+ }
666
+ rb_iv_set(self, "string_keys", string_keys);
667
+ return self;
668
+ }
669
+
670
+ VALUE config_m_inspect(VALUE self)
671
+ {
672
+ scan_ctx *ctx;
673
+ VALUE res;
674
+ TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
675
+ res = rb_sprintf("#<%" PRIsVALUE " [", rb_class_name(CLASS_OF(self)));
676
+ for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
677
+ {
678
+ rb_str_cat_cstr(res, "[");
679
+ for (int j = 0; j < ctx->paths[i].len; j++)
680
+ {
681
+ switch (ctx->paths[i].elems[j].type)
682
+ {
683
+ case MATCHER_KEY:
684
+ rb_str_catf(res, "'%.*s'", (int)ctx->paths[i].elems[j].value.key.len, ctx->paths[i].elems[j].value.key.val);
685
+ break;
686
+ case MATCHER_INDEX:
687
+ rb_str_catf(res, "%ld", ctx->paths[i].elems[j].value.index);
688
+ break;
689
+ case MATCHER_INDEX_RANGE:
690
+ rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
691
+ break;
692
+ case MATCHER_ANY_KEY:
693
+ rb_str_cat_cstr(res, "('*'..'*')");
694
+ break;
695
+ }
696
+ if (j < ctx->paths[i].len - 1)
697
+ rb_str_cat_cstr(res, ", ");
698
+ }
699
+ rb_str_cat_cstr(res, "]");
700
+ if (i < ctx->paths_len - 1)
701
+ rb_str_cat_cstr(res, ", ");
702
+ }
703
+ rb_str_cat_cstr(res, "]>");
704
+ return res;
705
+ }
706
+
474
707
  static yajl_callbacks scan_callbacks = {
475
708
  scan_on_null,
476
709
  scan_on_boolean,
@@ -492,15 +725,16 @@ static yajl_callbacks scan_callbacks = {
492
725
  VALUE scan(int argc, VALUE *argv, VALUE self)
493
726
  {
494
727
  VALUE json_str, path_ary, with_path_flag, kwargs;
495
- VALUE kwargs_values[7];
728
+ VALUE kwargs_values[SCAN_KWARGS_SIZE];
496
729
 
497
- int with_path = false, verbose_error = false;
730
+ int with_path = false, verbose_error = false, symbolize_path_keys = false;
498
731
  char *json_text;
499
732
  size_t json_text_len;
500
733
  yajl_handle handle;
501
734
  yajl_status stat;
502
735
  scan_ctx *ctx;
503
- VALUE err = Qnil, result;
736
+ int free_ctx = true;
737
+ VALUE err_msg = Qnil, bytes_consumed, result;
504
738
  // Turned out callbacks can't raise exceptions
505
739
  // VALUE callback_err;
506
740
  #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
@@ -512,11 +746,13 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
512
746
  with_path = RTEST(with_path_flag);
513
747
  if (kwargs != Qnil)
514
748
  {
515
- rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
749
+ rb_get_kwargs(kwargs, scan_kwargs_table, 0, SCAN_KWARGS_SIZE, kwargs_values);
516
750
  if (kwargs_values[0] != Qundef)
517
751
  with_path = RTEST(kwargs_values[0]);
518
752
  if (kwargs_values[1] != Qundef)
519
753
  verbose_error = RTEST(kwargs_values[1]);
754
+ if (kwargs_values[7] != Qundef)
755
+ symbolize_path_keys = RTEST(kwargs_values[7]);
520
756
  }
521
757
  rb_check_type(json_str, T_STRING);
522
758
  json_text = RSTRING_PTR(json_str);
@@ -525,7 +761,30 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
525
761
  #else
526
762
  json_text_len = RSTRING_LEN(json_str);
527
763
  #endif
528
- ctx = scan_ctx_init(path_ary, with_path);
764
+ if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerConfig))
765
+ {
766
+ free_ctx = false;
767
+ TypedData_Get_Struct(path_ary, scan_ctx, &config_type, ctx);
768
+ }
769
+ else
770
+ {
771
+ VALUE scan_ctx_init_err;
772
+ ctx = ruby_xmalloc(sizeof(scan_ctx));
773
+ scan_ctx_init_err = scan_ctx_init(ctx, path_ary, Qundef);
774
+ if (scan_ctx_init_err != Qundef)
775
+ {
776
+ ruby_xfree(ctx);
777
+ rb_exc_raise(scan_ctx_init_err);
778
+ }
779
+ }
780
+ // Need to keep a ref to result array on the stack to prevent it from being GC-ed
781
+ result = rb_ary_new_capa(ctx->paths_len);
782
+ for (int i = 0; i < ctx->paths_len; i++)
783
+ {
784
+ rb_ary_push(result, rb_ary_new());
785
+ }
786
+ scan_ctx_reset(ctx, result, with_path, symbolize_path_keys);
787
+ // scan_ctx_debug(ctx);
529
788
 
530
789
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
531
790
  if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
@@ -544,23 +803,52 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
544
803
  ctx->handle = handle;
545
804
  stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
546
805
  if (stat == yajl_status_ok)
806
+ {
807
+ scan_ctx_save_bytes_consumed(ctx);
547
808
  stat = yajl_complete_parse(handle);
809
+ }
548
810
 
549
811
  if (stat != yajl_status_ok)
550
812
  {
551
813
  char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
552
- err = rb_utf8_str_new_cstr(str);
814
+ err_msg = rb_utf8_str_new_cstr(str);
815
+ bytes_consumed = ULL2NUM(scan_ctx_get_bytes_consumed(ctx));
553
816
  yajl_free_error(handle, (unsigned char *)str);
554
817
  }
818
+ // // Needed when yajl_allow_partial_values is set
819
+ // if (ctx->current_path_len > 0)
820
+ // {
821
+ // if (ctx->current_path_len > ctx->max_path_len)
822
+ // ctx->current_path_len = ctx->max_path_len;
823
+ // for (int i = ctx->current_path_len - 1; i > 0; i--)
824
+ // {
825
+ // switch (ctx->current_path[i].type)
826
+ // {
827
+ // case PATH_KEY:
828
+ // scan_on_end_object(ctx);
829
+ // break;
830
+ // case PATH_INDEX:
831
+ // scan_on_end_array(ctx);
832
+ // break;
833
+ // }
834
+ // }
835
+ // }
555
836
  // callback_err = ctx->rb_err;
556
- result = ctx->points_list;
557
- scan_ctx_free(ctx);
837
+ if (free_ctx)
838
+ {
839
+ // fprintf(stderr, "free_ctx\n");
840
+ scan_ctx_free(ctx);
841
+ ruby_xfree(ctx);
842
+ }
558
843
  yajl_free(handle);
559
- if (err != Qnil)
560
- rb_exc_raise(rb_exc_new_str(rb_eJsonScannerParseError, err));
844
+ if (err_msg != Qnil)
845
+ {
846
+ VALUE err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
847
+ rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
848
+ rb_exc_raise(err);
849
+ }
561
850
  // if (callback_err != Qnil)
562
851
  // rb_exc_raise(callback_err);
563
- // TODO: report yajl_get_bytes_consumed(handle)
564
852
  return result;
565
853
  }
566
854
 
@@ -568,8 +856,16 @@ RUBY_FUNC_EXPORTED void
568
856
  Init_json_scanner(void)
569
857
  {
570
858
  rb_mJsonScanner = rb_define_module("JsonScanner");
859
+ rb_cJsonScannerConfig = rb_define_class_under(rb_mJsonScanner, "Config", rb_cObject);
860
+ rb_define_alloc_func(rb_cJsonScannerConfig, config_alloc);
861
+ rb_define_method(rb_cJsonScannerConfig, "initialize", config_m_initialize, 1);
862
+ rb_define_method(rb_cJsonScannerConfig, "inspect", config_m_inspect, 0);
571
863
  rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
864
+ any_key_sym = rb_id2sym(rb_intern("*"));
865
+ rb_define_const(rb_mJsonScanner, "ANY_KEY", rb_range_new(any_key_sym, any_key_sym, false));
572
866
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
867
+ rb_define_attr(rb_eJsonScannerParseError, BYTES_CONSUMED, true, false);
868
+ rb_iv_bytes_consumed = rb_intern("@" BYTES_CONSUMED);
573
869
  rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
574
870
  null_sym = rb_id2sym(rb_intern("null"));
575
871
  boolean_sym = rb_id2sym(rb_intern("boolean"));
@@ -584,4 +880,5 @@ Init_json_scanner(void)
584
880
  scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
585
881
  scan_kwargs_table[5] = rb_intern("allow_multiple_values");
586
882
  scan_kwargs_table[6] = rb_intern("allow_partial_values");
883
+ scan_kwargs_table[7] = rb_intern("symbolize_path_keys");
587
884
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.1"
5
5
  end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake/clean"
4
+ require "rake/extensiontask"
5
+
6
+ module Rake
7
+ class ExtensionTestTask < ExtensionTask
8
+ #
9
+ # The C files to compile.
10
+ #
11
+ attr_accessor :c_spec_files
12
+
13
+ #
14
+ # The folders where includes for the test files are.
15
+ #
16
+ # Default: %w{/usr/include /usr/include/google}
17
+ #
18
+ attr_accessor :test_includes
19
+
20
+ #
21
+ # The libraries to link against.
22
+ #
23
+ # Default: %w{cmockery}
24
+ #
25
+ attr_accessor :test_libraries
26
+
27
+ #
28
+ # The folders where the libraries are
29
+ #
30
+ # Default: %w{/usr/lib}
31
+ #
32
+ attr_accessor :test_lib_folders
33
+
34
+ def initialize(*args, &block)
35
+ super
36
+ @c_spec_files = []
37
+ @test_includes = %w[/usr/include /usr/include/google]
38
+ @test_libraries = %w[cmockery]
39
+ @test_lib_folders = %w[/usr/lib]
40
+ init_test_tasks(
41
+ "#{@tmp_dir}/test", "compile:#{@name}:test",
42
+ "spec:c:#{@name}", "spec:valgrind:#{@name}", "spec:gdb:#{@name}",
43
+ )
44
+ end
45
+
46
+ private
47
+
48
+ def includes
49
+ @includes ||= (@test_includes + [
50
+ ".",
51
+ "../../#{@ext_dir}",
52
+ "/usr/include/ruby-#{RUBY_VERSION}",
53
+ "/usr/include/ruby-#{RUBY_VERSION}/#{RUBY_PLATFORM}",
54
+ ]).map { |l| "-I#{l}" }.join(" ")
55
+ end
56
+
57
+ def libraries
58
+ @libraries ||= (@test_libraries + %w[ruby pthread crypto]).map { |l| "-l#{l}" }.join(" ")
59
+ end
60
+
61
+ def lib_folders
62
+ @lib_folders ||= (@test_lib_folders + %w[/usr/lib .]).map { |l| "-L#{l}" }.join(" ")
63
+ end
64
+
65
+ def compile_tests
66
+ # compile the test sources
67
+ FileList["*.c"].each do |cfile|
68
+ sh "gcc -g #{includes} -c #{cfile}"
69
+ end
70
+
71
+ source_objects = FileList["../#{RUBY_PLATFORM}/#{@name}/#{RUBY_VERSION}/*.o"]
72
+ # link the executables
73
+ FileList["*.o"].each do |ofile|
74
+ sh "gcc -g #{lib_folders} #{libraries} #{source_objects} #{ofile} -o #{ofile.ext}"
75
+ end
76
+ end
77
+
78
+ def init_compile_task(compile_dir, compile_task)
79
+ directory compile_dir
80
+ desc "Compile #{@name} tests"
81
+ task compile_task => ["compile:#{@name}", compile_dir] do
82
+ # copy the test files into the compilation folder
83
+ @c_spec_files.each { |file| cp file, compile_dir }
84
+
85
+ # start compilation
86
+ chdir(compile_dir) { compile_tests }
87
+ end
88
+ end
89
+
90
+ def init_valgrind_task(compile_dir, compile_task, valgrind_task)
91
+ desc "Execute valgrind for a #{@name} test"
92
+ task valgrind_task => [compile_task] do |_t, args|
93
+ sh "valgrind --num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no " \
94
+ "--leak-check=full #{compile_dir}/#{args.test}"
95
+ end
96
+ end
97
+
98
+ def init_gdb_task(compile_dir, compile_task, gdb_task)
99
+ desc "Execute gdb for a #{@name} test"
100
+ task gdb_task => [compile_task] do |_t, args|
101
+ sh "gdb #{compile_dir}/#{args.test}"
102
+ end
103
+ end
104
+
105
+ def init_test_task(compile_dir, compile_task, test_task)
106
+ desc "Test #{@name}"
107
+ task test_task => [compile_task] do |_t, args|
108
+ if args.test
109
+ sh "#{compile_dir}/#{args.test}"
110
+ else
111
+ FileList["#{compile_dir}/*.o"].each do |ofile|
112
+ sh ofile.ext.to_s
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ def init_test_tasks(compile_dir, compile_task, test_task, valgrind_task, gdb_task)
119
+ init_compile_task(compile_dir, compile_task)
120
+ init_valgrind_task(compile_dir, compile_task, valgrind_task)
121
+ init_gdb_task(compile_dir, compile_task, gdb_task)
122
+ init_test_task(compile_dir, compile_task, test_task)
123
+
124
+ desc "Test all C extensions"
125
+ task "spec:c" => [test_task]
126
+ end
127
+ end
128
+ end
File without changes
@@ -22,6 +22,34 @@ RSpec.describe JsonScanner do
22
22
  )
23
23
  end
24
24
 
25
+ it "supports 'symbolize_path_keys'" do
26
+ expect(
27
+ described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true),
28
+ ).to eq([[[%w[a b], [12, 13, :number]]]])
29
+ expect(
30
+ described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true, symbolize_path_keys: true),
31
+ ).to eq([[[%i[a b], [12, 13, :number]]]])
32
+ end
33
+
34
+ it "supports any key selector" do
35
+ expect(
36
+ described_class.scan(
37
+ '[{"a":1,"b":2},{"c":3,"d":4},[5]]',
38
+ [[described_class::ANY_INDEX, described_class::ANY_KEY]],
39
+ ),
40
+ ).to eq(
41
+ [[[6, 7, :number], [12, 13, :number], [20, 21, :number], [26, 27, :number]]],
42
+ )
43
+ expect(
44
+ described_class.scan(
45
+ '{"a":[1,2],"b":{"c":3}}',
46
+ [[described_class::ANY_KEY, described_class::ANY_INDEX]],
47
+ ),
48
+ ).to eq(
49
+ [[[6, 7, :number], [8, 9, :number]]],
50
+ )
51
+ end
52
+
25
53
  it "works with max path len correctly" do
26
54
  expect(
27
55
  described_class.scan('{"a": [1]}', [[], ["a"]]),
@@ -90,11 +118,32 @@ RSpec.describe JsonScanner do
90
118
  expect do
91
119
  described_class.scan "{1}", [], verbose_error: true
92
120
  end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
121
+ expect do
122
+ described_class.scan("[0, 42,", [[(1..-1)]], verbose_error: true)
123
+ end.to raise_error described_class::ParseError, /parse error: premature EOF.*\[0, 42,.*\(right here\) ------\^/m
124
+ end
125
+
126
+ it "includes bytes consumed in the exception" do
127
+ expect do
128
+ described_class.scan("[[1,2],,[3,4]]", [])
129
+ end.to(
130
+ raise_error(described_class::ParseError) do |exc|
131
+ expect(exc.bytes_consumed).to eq(8)
132
+ end,
133
+ )
134
+ expect do
135
+ described_class.scan("[[1,2", [])
136
+ end.to(
137
+ raise_error(described_class::ParseError) do |exc|
138
+ # 6 because of the final " " chunk - that's how yajl works
139
+ expect(exc.bytes_consumed).to eq(6)
140
+ end,
141
+ )
93
142
  end
94
143
 
95
144
  it "allows to return an actual path to the element" do
96
145
  with_path_expected_res = [
97
- # result for first mathcer, each element array of two items:
146
+ # result for first matcher, each element array of two items:
98
147
  # array of path elements and 3-element array start,end,type
99
148
  [[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
100
149
  [
@@ -128,7 +177,7 @@ RSpec.describe JsonScanner do
128
177
  ),
129
178
  ).to eq(
130
179
  [
131
- # result for first mathcer, each element 3-element array start,end,type
180
+ # result for first matcher, each element 3-element array start,end,type
132
181
  [[1, 6, :array], [7, 12, :array]],
133
182
  [
134
183
  [2, 3, :number], [4, 5, :number],
@@ -190,4 +239,122 @@ RSpec.describe JsonScanner do
190
239
  described_class.scan(json, [[]])
191
240
  end.to raise_error(described_class::ParseError)
192
241
  end
242
+
243
+ context "with yajl params" do
244
+ it "supports 'allow_comments'" do
245
+ params = ["[0, /* answer */ 42, 0]", [[(1..-1)]]]
246
+ expect(described_class.scan(*params, allow_comments: true)).to eq(
247
+ [[[17, 19, :number], [21, 22, :number]]],
248
+ )
249
+ expect do
250
+ described_class.scan(*params)
251
+ end.to raise_error(described_class::ParseError)
252
+ end
253
+
254
+ it "supports 'dont_validate_strings'" do
255
+ params = ["\"\x81\x83\"", [[]]]
256
+ expect(described_class.scan(*params, dont_validate_strings: true)).to eq(
257
+ [[[0, 4, :string]]],
258
+ )
259
+ expect do
260
+ described_class.scan(*params)
261
+ end.to raise_error(described_class::ParseError)
262
+ params = ["{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]]]
263
+ expect(described_class.scan(*params, dont_validate_strings: true, with_path: true)).to eq(
264
+ [[[["\x81\x83".dup.force_encoding(Encoding::BINARY)], [7, 9, :number]]]],
265
+ )
266
+ expect do
267
+ described_class.scan(*params, with_path: true)
268
+ end.to raise_error(described_class::ParseError)
269
+ end
270
+
271
+ it "supports 'allow_trailing_garbage'" do
272
+ params = ["[0, 42, 0]garbage", [[(1..-1)]]]
273
+ expect(described_class.scan(*params, allow_trailing_garbage: true)).to eq(
274
+ [[[4, 6, :number], [8, 9, :number]]],
275
+ )
276
+ expect do
277
+ described_class.scan(*params)
278
+ end.to raise_error(described_class::ParseError)
279
+ end
280
+
281
+ it "supports 'allow_multiple_values'" do
282
+ params = ["[0, 42, 0] [0, 34]", [[(1..-1)]]]
283
+ expect(described_class.scan(*params, allow_multiple_values: true)).to eq(
284
+ [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]],
285
+ )
286
+ expect do
287
+ described_class.scan(*params)
288
+ end.to raise_error(described_class::ParseError)
289
+ end
290
+
291
+ it "handles multiple top-level values correctly with 'allow_multiple_values'" do
292
+ expect(described_class.scan("[0, 42, 0] [0, 34]", [[]], allow_multiple_values: true)).to eq(
293
+ [[[0, 10, :array], [12, 19, :array]]],
294
+ )
295
+ expect(described_class.scan('{"42": 34} [0, 34]', [[]], allow_multiple_values: true)).to eq(
296
+ [[[0, 10, :object], [12, 19, :array]]],
297
+ )
298
+ expect(described_class.scan('[0, 42, 0] {"42": 34}', [[]], allow_multiple_values: true)).to eq(
299
+ [[[0, 10, :array], [12, 22, :object]]],
300
+ )
301
+ expect(described_class.scan('{"42": 34} {"0": 34}', [[]], allow_multiple_values: true)).to eq(
302
+ [[[0, 10, :object], [12, 21, :object]]],
303
+ )
304
+ end
305
+
306
+ it "supports 'allow_partial_values'" do
307
+ params = ["[0, 42, 0,", [[(1..-1)]]]
308
+ expect(described_class.scan(*params, allow_partial_values: true)).to eq(
309
+ [[[4, 6, :number], [8, 9, :number]]],
310
+ )
311
+ expect do
312
+ described_class.scan(*params)
313
+ end.to raise_error(described_class::ParseError)
314
+ expect(described_class.scan("[0, 42, 0", [[(1..-1)]], allow_partial_values: true)).to eq(
315
+ [[[4, 6, :number], [8, 9, :number]]],
316
+ )
317
+ expect(described_class.scan("[0, 42, true", [[(1..-1)]], allow_partial_values: true)).to eq(
318
+ [[[4, 6, :number], [8, 12, :boolean]]],
319
+ )
320
+ end
321
+ end
322
+
323
+ describe described_class::Config do
324
+ it "saves state" do
325
+ key = "abracadabra".dup
326
+ conf = described_class.new [[], [key]]
327
+ key["cad"] = 0.chr
328
+ key = nil # rubocop:disable Lint/UselessAssignment
329
+ GC.start
330
+ expect(
331
+ 10.times.map do
332
+ JsonScanner.scan '{"abracadabra": 10}', conf, with_path: true
333
+ end.uniq,
334
+ ).to eq([[[[[], [0, 19, :object]]], [[["abracadabra"], [16, 18, :number]]]]])
335
+ expect(
336
+ 10.times.map do
337
+ JsonScanner.scan '{"abracadabra": 10}', conf
338
+ end.uniq,
339
+ ).to eq([[[[0, 19, :object]], [[16, 18, :number]]]])
340
+ end
341
+
342
+ it "re-raises exceptions" do
343
+ expect do
344
+ described_class.new [[(0...-1)]]
345
+ end.to raise_error ArgumentError
346
+ expect do
347
+ described_class.new [[(0..-2)]]
348
+ end.to raise_error ArgumentError
349
+ expect do
350
+ described_class.new [[(-42..1)]]
351
+ end.to raise_error ArgumentError
352
+ end
353
+
354
+ it "supports inspect" do
355
+ expect(
356
+ described_class.new([[], ["abracadabra", JsonScanner::ANY_INDEX], [42, JsonScanner::ANY_KEY]]).inspect,
357
+ ).to eq("#<JsonScanner::Config [[], ['abracadabra', (0..9223372036854775807)], [42, ('*'..'*')]]>")
358
+ end
359
+ end
193
360
  end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-27 00:00:00.000000000 Z
11
+ date: 2025-08-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: This gem uses yajl lib to scan a json string and allows you to parse
13
+ description: This gem uses the yajl lib to scan a JSON string and allows you to parse
14
14
  pieces of it
15
15
  email:
16
16
  - uvlad7@gmail.com
@@ -26,6 +26,8 @@ files:
26
26
  - lib/json_scanner.rb
27
27
  - lib/json_scanner/version.rb
28
28
  - sig/json_scanner.rbs
29
+ - spec/extensiontesttask.rb
30
+ - spec/json_scanner_spec.c
29
31
  - spec/json_scanner_spec.rb
30
32
  - spec/spec_helper.rb
31
33
  homepage: https://github.com/uvlad7/json_scanner