json_scanner 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f34237d4ceab009f685b82a4e480247f23c804db96bded6d1bacea5ddd4a0946
4
- data.tar.gz: 87484e4cbab84666b41ddb67553b0c985ef6dc29d8f1154a458173deade08587
3
+ metadata.gz: 9c1ddff519827bc802cdcacb5b048402706544b0882c8ac91cd1aa414c4b57e0
4
+ data.tar.gz: d1c4f41dbd71ed08a488c2f9647194fd2692c91d522fee35d28d7060bf80321c
5
5
  SHA512:
6
- metadata.gz: 5f6ae7f8d7afc88fee60e88eb8efe776b7ae0dffe25ecffa5ffc61241629eb4c3ec86a9ab1b1e76d49b4bd12498ee15625677afbc997039f9295e65e510a32df
7
- data.tar.gz: b5e95df2d53c0a224f6a089a55a6a0c502adb7e4dce276f2f85c48a063182f65f1425eff1f97cf409de46c815b45bbfa0626e54d76036a5c66c13eefa4146648
6
+ metadata.gz: 57bf59cc9495f46675bb98d2fc7545bdc3b8392631c443ad2b89595b22be054c8f8bb268a798c5f104d1e38b73d577662f96637fd9311260c9b0a45b55044265
7
+ data.tar.gz: '055432559a23dbf34e679aac7be4967ea163684fda718b433e978c34feb73f298f1346b00629fada8279a9e49e1267b990f73d2d57c8403330faf42ce4086bb8'
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # JsonScanner
4
4
 
5
- Extract values from JSON without full parsing. This gem uses yajl lib to scan a json string and allows you to parse pieces of it.
5
+ Extract values from JSON without full parsing. This gem uses the `yajl` library to scan a JSON string and allows you to parse pieces of it.
6
6
 
7
7
  ## Installation
8
8
 
@@ -16,6 +16,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
16
16
 
17
17
  ## Usage
18
18
 
19
+ Basic usage
20
+
19
21
  ```ruby
20
22
  require "json"
21
23
  require "json_scanner"
@@ -32,13 +34,77 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
34
  begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
35
  emoji_json.byteslice(begin_pos...end_pos)
34
36
  # => "\"😍\""
35
- # Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
36
- # with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
37
+ # Note: You most likely don't need the `quirks_mode` option unless you are using an older version
38
+ # of Ruby with the stdlib - or just also old - version of the json gem. In newer versions, `quirks_mode` is enabled by default.
37
39
  JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
38
40
  # => "😍"
39
41
  # You can also do this
40
42
  # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
41
43
  # => "\"😍\""
44
+
45
+ # Ranges are supported as matchers for indexes with the following restrictions:
46
+ # - the start of a range must be positive
47
+ # - the end of a range must be positive or -1
48
+ # - a range with -1 end must be closed, e.g. (0..-1) works, but (0...-1) is forbidden
49
+ JsonScanner.scan('[0, 42, 0]', [[(1..-1)]])
50
+ # => [[[4, 6, :number], [8, 9, :number]]]
51
+ JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
52
+ # => [[[1, 2, :number], [4, 6, :number], [8, 9, :number]]]
53
+
54
+ # Special matcher JsonScanner::ANY_KEY is supported for object keys
55
+ JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
56
+ # => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
57
+ ```
58
+
59
+ It supports multiple options
60
+
61
+ ```ruby
62
+ JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
63
+ # => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
64
+ JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
65
+ # JsonScanner::ParseError (parse error: premature EOF)
66
+ # [0, 42,
67
+ # (right here) ------^
68
+ JsonScanner.scan('[0, /* answer */ 42, 0]', [[(1..-1)]], allow_comments: true)
69
+ # => [[[17, 19, :number], [21, 22, :number]]]
70
+ JsonScanner.scan("\"\x81\x83\"", [[]], dont_validate_strings: true)
71
+ # => [[[0, 4, :string]]]
72
+ JsonScanner.scan("{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]], dont_validate_strings: true, with_path: true)
73
+ # => [[[["\x81\x83"], [7, 9, :number]]]]
74
+ JsonScanner.scan('[0, 42, 0]garbage', [[(1..-1)]], allow_trailing_garbage: true)
75
+ # => [[[4, 6, :number], [8, 9, :number]]]
76
+ JsonScanner.scan('[0, 42, 0] [0, 34]', [[(1..-1)]], allow_multiple_values: true)
77
+ # => [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]]
78
+ JsonScanner.scan('[0, 42, 0', [[(1..-1)]], allow_partial_values: true)
79
+ # => [[[4, 6, :number], [8, 9, :number]]]
80
+ JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symbolize_path_keys: true)
81
+ # => [[[[:a], [6, 7, :number]]]]
82
+ ```
83
+
84
+ Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
85
+ ```ruby
86
+ json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
87
+ JsonScanner.scan(json_str, [["answer"]], allow_comments: true).first.map do |begin_pos, end_pos, _type|
88
+ JSON.parse(json_str.byteslice(begin_pos...end_pos), quirks_mode: true)
89
+ end
90
+ # => [{"value"=>42}]
91
+ ```
92
+
93
+ You can also create a config and reuse it
94
+
95
+ ```ruby
96
+ require "json_scanner"
97
+
98
+ config = JsonScanner::Config.new([[], ["key"], [(0..-1)]])
99
+ # => #<JsonScanner::Config [[], ['key'], [(0..9223372036854775807)]]>
100
+ JsonScanner.scan('{"key": "42"}', config)
101
+ # => [[[0, 13, :object]], [[8, 12, :string]], []]
102
+ JsonScanner.scan('{"key": "42"}', config, with_path: true)
103
+ # => [[[[], [0, 13, :object]]], [[["key"], [8, 12, :string]]], []]
104
+ JsonScanner.scan('[0, 42]', config)
105
+ # => [[[0, 7, :array]], [], [[1, 2, :number], [4, 6, :number]]]
106
+ JsonScanner.scan('[0, 42]', config, with_path: true)
107
+ # => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
42
108
  ```
43
109
 
44
110
  ## Development
@@ -1,8 +1,12 @@
1
1
  #include "json_scanner.h"
2
2
 
3
3
  VALUE rb_mJsonScanner;
4
+ VALUE rb_cJsonScannerConfig;
4
5
  VALUE rb_eJsonScannerParseError;
5
- ID scan_kwargs_table[7];
6
+ #define BYTES_CONSUMED "bytes_consumed"
7
+ ID rb_iv_bytes_consumed;
8
+ #define SCAN_KWARGS_SIZE 8
9
+ ID scan_kwargs_table[SCAN_KWARGS_SIZE];
6
10
 
7
11
  VALUE null_sym;
8
12
  VALUE boolean_sym;
@@ -11,11 +15,13 @@ VALUE string_sym;
11
15
  VALUE object_sym;
12
16
  VALUE array_sym;
13
17
 
18
+ VALUE any_key_sym;
19
+
14
20
  enum matcher_type
15
21
  {
16
22
  MATCHER_KEY,
17
23
  MATCHER_INDEX,
18
- // MATCHER_ANY_KEY,
24
+ MATCHER_ANY_KEY,
19
25
  MATCHER_INDEX_RANGE,
20
26
  // MATCHER_KEYS_LIST,
21
27
  // MATCHER_KEY_REGEX,
@@ -70,30 +76,115 @@ typedef struct
70
76
  typedef struct
71
77
  {
72
78
  int with_path;
73
- paths_t *paths;
79
+ int symbolize_path_keys;
74
80
  int paths_len;
75
- path_elem_t *current_path;
81
+ paths_t *paths;
76
82
  int current_path_len;
77
83
  int max_path_len;
84
+ path_elem_t *current_path;
78
85
  // Easier to use a Ruby array for result than convert later
86
+ // must be supplied by the caller and RB_GC_GUARD-ed if it isn't on the stack
79
87
  VALUE points_list;
80
88
  // by depth
81
89
  size_t *starts;
82
90
  // VALUE rb_err;
83
91
  yajl_handle handle;
92
+ size_t yajl_bytes_consumed;
84
93
  } scan_ctx;
85
94
 
95
+ inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
96
+ {
97
+ return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
98
+ }
99
+
100
+ inline void scan_ctx_update_bytes_consumed(scan_ctx *ctx)
101
+ {
102
+ ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
103
+ }
104
+
105
+ void scan_ctx_debug(scan_ctx *ctx)
106
+ {
107
+ // actually might have been cleared by GC already, be careful, debug only when in valid state
108
+ VALUE points_list_inspect = ctx->points_list == Qundef ? rb_str_new_cstr("undef") : rb_sprintf("%" PRIsVALUE, rb_inspect(ctx->points_list));
109
+ fprintf(stderr, "\nscan_ctx {\n");
110
+ fprintf(stderr, " with_path: %s,\n", ctx->with_path ? "true" : "false");
111
+ fprintf(stderr, " symbolize_path_keys: %s,\n", ctx->symbolize_path_keys ? "true" : "false");
112
+ fprintf(stderr, " paths_len: %d,\n", ctx->paths_len);
113
+
114
+ fprintf(stderr, " paths: [\n");
115
+ for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
116
+ {
117
+ fprintf(stderr, " [");
118
+ for (int j = 0; j < ctx->paths[i].len; j++)
119
+ {
120
+ switch (ctx->paths[i].elems[j].type)
121
+ {
122
+ case MATCHER_KEY:
123
+ fprintf(stderr, "'%.*s'", (int)ctx->paths[i].elems[j].value.key.len, ctx->paths[i].elems[j].value.key.val);
124
+ break;
125
+ case MATCHER_INDEX:
126
+ fprintf(stderr, "%ld", ctx->paths[i].elems[j].value.index);
127
+ break;
128
+ case MATCHER_INDEX_RANGE:
129
+ fprintf(stderr, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
130
+ break;
131
+ case MATCHER_ANY_KEY:
132
+ fprintf(stderr, "('*'..'*')");
133
+ break;
134
+ }
135
+ if (j < ctx->paths[i].len - 1)
136
+ fprintf(stderr, ", ");
137
+ }
138
+ fprintf(stderr, "],\n");
139
+ }
140
+ fprintf(stderr, " ],\n");
141
+
142
+ fprintf(stderr, " current_path_len: %d,\n", ctx->current_path_len);
143
+ fprintf(stderr, " max_path_len: %d,\n", ctx->max_path_len);
144
+ fprintf(stderr, " current_path: [");
145
+ for (int i = 0; i < ctx->current_path_len; i++)
146
+ {
147
+ switch (ctx->current_path[i].type)
148
+ {
149
+ case PATH_KEY:
150
+ fprintf(stderr, "'%.*s'", (int)ctx->current_path[i].value.key.len, ctx->current_path[i].value.key.val);
151
+ break;
152
+ case PATH_INDEX:
153
+ fprintf(stderr, "%ld", ctx->current_path[i].value.index);
154
+ break;
155
+ }
156
+ if (i < ctx->current_path_len - 1)
157
+ fprintf(stderr, ", ");
158
+ }
159
+ fprintf(stderr, "],\n");
160
+
161
+ fprintf(stderr, " points_list: %.*s,\n", RSTRING_LENINT(points_list_inspect), RSTRING_PTR(points_list_inspect));
162
+ fprintf(stderr, " starts: [");
163
+ for (int i = 0; i <= ctx->max_path_len; i++)
164
+ {
165
+ fprintf(stderr, "%ld", ctx->starts[i]);
166
+ if (i < ctx->max_path_len)
167
+ fprintf(stderr, ", ");
168
+ }
169
+ fprintf(stderr, "],\n");
170
+
171
+ fprintf(stderr, " handle: %p,\n", ctx->handle);
172
+ fprintf(stderr, " yajl_bytes_consumed: %ld,\n", ctx->yajl_bytes_consumed);
173
+ fprintf(stderr, "}\n\n\n");
174
+ }
175
+
86
176
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
- scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
177
+ // path_ary must be RB_GC_GUARD-ed by the caller
178
+ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
88
179
  {
89
180
  int path_ary_len;
90
- scan_ctx *ctx;
91
181
  paths_t *paths;
92
182
  // TODO: Allow to_ary and sized enumerables
93
183
  rb_check_type(path_ary, T_ARRAY);
94
184
  path_ary_len = rb_long2int(rb_array_len(path_ary));
95
185
  // Check types early before any allocations, so exception is ok
96
186
  // TODO: Fix this, just handle errors
187
+ // It's not possible that another Ruby thread changes path_ary items between these two loops, because C call holds GVL
97
188
  for (int i = 0; i < path_ary_len; i++)
98
189
  {
99
190
  int path_len;
@@ -103,38 +194,42 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
103
194
  for (int j = 0; j < path_len; j++)
104
195
  {
105
196
  VALUE entry = rb_ary_entry(path, j);
106
- int type = TYPE(entry);
107
- if (type == T_STRING)
197
+ switch (TYPE(entry))
108
198
  {
199
+ case T_SYMBOL:
200
+ entry = rb_sym2str(entry);
201
+ /* fall through */
202
+ case T_STRING:
109
203
  #if LONG_MAX > SIZE_MAX
110
204
  RSTRING_LENINT(entry);
111
205
  #endif
112
- }
113
- else if (type == T_FIXNUM || type == T_BIGNUM)
114
- {
115
- RB_NUM2LONG(entry);
116
- }
117
- else
206
+ break;
207
+ case T_FIXNUM:
208
+ case T_BIGNUM:
209
+ NUM2LONG(entry);
210
+ break;
211
+ default:
118
212
  {
119
213
  VALUE range_beg, range_end;
120
214
  long end_val;
121
215
  int open_ended;
122
216
  if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
123
- rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
124
- if (RB_NUM2LONG(range_beg) < 0L)
125
- rb_raise(rb_eArgError, "range start must be positive");
126
- end_val = RB_NUM2LONG(range_end);
127
- if (end_val < -1L)
128
- rb_raise(rb_eArgError, "range end must be positive or -1");
129
- if (end_val == -1L && open_ended)
130
- rb_raise(rb_eArgError, "range with -1 end must be closed");
217
+ return rb_exc_new_cstr(rb_eArgError, "path elements must be strings, integers, or ranges");
218
+ if (range_beg != any_key_sym || range_end != any_key_sym)
219
+ {
220
+ if (NUM2LONG(range_beg) < 0L)
221
+ return rb_exc_new_cstr(rb_eArgError, "range start must be positive");
222
+ end_val = NUM2LONG(range_end);
223
+ if (end_val < -1L)
224
+ return rb_exc_new_cstr(rb_eArgError, "range end must be positive or -1");
225
+ if (end_val == -1L && open_ended)
226
+ return rb_exc_new_cstr(rb_eArgError, "range with -1 end must be closed");
227
+ }
228
+ }
131
229
  }
132
230
  }
133
231
  }
134
232
 
135
- ctx = ruby_xmalloc(sizeof(scan_ctx));
136
-
137
- ctx->with_path = with_path;
138
233
  ctx->max_path_len = 0;
139
234
 
140
235
  paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
@@ -149,9 +244,20 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
149
244
  for (int j = 0; j < path_len; j++)
150
245
  {
151
246
  VALUE entry = rb_ary_entry(path, j);
152
- int type = TYPE(entry);
153
- if (type == T_STRING)
247
+ switch (TYPE(entry))
154
248
  {
249
+ case T_SYMBOL:
250
+ entry = rb_sym2str(entry);
251
+ /* fall through */
252
+ case T_STRING:
253
+ {
254
+ if (string_keys != Qundef)
255
+ {
256
+ // If string_keys is provided, we need to duplicate the string
257
+ // to avoid use-after-free issues and to add the newly created string to the string_keys array
258
+ entry = rb_str_dup(entry);
259
+ rb_ary_push(string_keys, entry);
260
+ }
155
261
  paths[i].elems[j].type = MATCHER_KEY;
156
262
  paths[i].elems[j].value.key.val = RSTRING_PTR(entry);
157
263
  #if LONG_MAX > SIZE_MAX
@@ -160,25 +266,36 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
160
266
  paths[i].elems[j].value.key.len = RSTRING_LEN(entry);
161
267
  #endif
162
268
  }
163
- else if (type == T_FIXNUM || type == T_BIGNUM)
269
+ break;
270
+ case T_FIXNUM:
271
+ case T_BIGNUM:
164
272
  {
165
273
  paths[i].elems[j].type = MATCHER_INDEX;
166
274
  paths[i].elems[j].value.index = FIX2LONG(entry);
167
275
  }
168
- else
276
+ break;
277
+ default:
169
278
  {
170
279
  VALUE range_beg, range_end;
171
280
  int open_ended;
172
- paths[i].elems[j].type = MATCHER_INDEX_RANGE;
173
281
  rb_range_values(entry, &range_beg, &range_end, &open_ended);
174
- paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
175
- paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
176
- // (value..-1) works as expected, (value...-1) is forbidden above
177
- if (paths[i].elems[j].value.range.end == -1L)
178
- paths[i].elems[j].value.range.end = LONG_MAX;
179
- // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
180
- if (open_ended)
181
- paths[i].elems[j].value.range.end--;
282
+ if (range_beg == any_key_sym && range_end == any_key_sym)
283
+ {
284
+ paths[i].elems[j].type = MATCHER_ANY_KEY;
285
+ }
286
+ else
287
+ {
288
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
289
+ paths[i].elems[j].value.range.start = NUM2LONG(range_beg);
290
+ paths[i].elems[j].value.range.end = NUM2LONG(range_end);
291
+ // (value..-1) works as expected, (value...-1) is forbidden above
292
+ if (paths[i].elems[j].value.range.end == -1L)
293
+ paths[i].elems[j].value.range.end = LONG_MAX;
294
+ // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
295
+ if (open_ended)
296
+ paths[i].elems[j].value.range.end--;
297
+ }
298
+ }
182
299
  }
183
300
  }
184
301
  paths[i].len = path_len;
@@ -189,32 +306,37 @@ scan_ctx *scan_ctx_init(VALUE path_ary, int with_path)
189
306
  ctx->paths_len = path_ary_len;
190
307
  ctx->current_path = ruby_xmalloc2(sizeof(path_elem_t), ctx->max_path_len);
191
308
 
192
- ctx->current_path_len = 0;
193
- ctx->points_list = rb_ary_new_capa(path_ary_len);
194
- for (int i = 0; i < path_ary_len; i++)
195
- {
196
- rb_ary_push(ctx->points_list, rb_ary_new());
197
- }
198
-
199
309
  ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len + 1);
310
+ return Qundef; // no error
311
+ }
312
+
313
+ // resets temporary values in the config
314
+ void scan_ctx_reset(scan_ctx *ctx, VALUE points_list, int with_path, int symbolize_path_keys)
315
+ {
316
+ // TODO: reset matched_depth if implemented
317
+ ctx->current_path_len = 0;
200
318
  // ctx->rb_err = Qnil;
201
319
  ctx->handle = NULL;
202
-
203
- return ctx;
320
+ ctx->yajl_bytes_consumed = 0;
321
+ ctx->points_list = points_list;
322
+ ctx->with_path = with_path;
323
+ ctx->symbolize_path_keys = symbolize_path_keys;
204
324
  }
205
325
 
206
326
  void scan_ctx_free(scan_ctx *ctx)
207
327
  {
328
+ // fprintf(stderr, "scan_ctx_free\n");
208
329
  if (!ctx)
209
330
  return;
210
331
  ruby_xfree(ctx->starts);
211
332
  ruby_xfree(ctx->current_path);
333
+ if (!ctx->paths)
334
+ return;
212
335
  for (int i = 0; i < ctx->paths_len; i++)
213
336
  {
214
337
  ruby_xfree(ctx->paths[i].elems);
215
338
  }
216
339
  ruby_xfree(ctx->paths);
217
- ruby_xfree(ctx);
218
340
  }
219
341
 
220
342
  // noexcept
@@ -239,37 +361,38 @@ typedef enum
239
361
  } value_type;
240
362
 
241
363
  // noexcept
242
- VALUE create_point(scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
364
+ VALUE create_point(scan_ctx *sctx, value_type type, size_t length)
243
365
  {
244
- VALUE values[3];
245
- VALUE point = rb_ary_new_capa(3);
366
+ VALUE values[3], point;
367
+ size_t curr_pos = scan_ctx_get_bytes_consumed(sctx);
368
+ point = rb_ary_new_capa(3);
246
369
  // noexcept
247
- values[1] = RB_ULONG2NUM(curr_pos);
370
+ values[1] = ULL2NUM(curr_pos);
248
371
  switch (type)
249
372
  {
250
373
  // FIXME: size_t can be longer than ulong
251
374
  case null_value:
252
- values[0] = RB_ULONG2NUM(curr_pos - length);
375
+ values[0] = ULL2NUM(curr_pos - length);
253
376
  values[2] = null_sym;
254
377
  break;
255
378
  case boolean_value:
256
- values[0] = RB_ULONG2NUM(curr_pos - length);
379
+ values[0] = ULL2NUM(curr_pos - length);
257
380
  values[2] = boolean_sym;
258
381
  break;
259
382
  case number_value:
260
- values[0] = RB_ULONG2NUM(curr_pos - length);
383
+ values[0] = ULL2NUM(curr_pos - length);
261
384
  values[2] = number_sym;
262
385
  break;
263
386
  case string_value:
264
- values[0] = RB_ULONG2NUM(curr_pos - length);
387
+ values[0] = ULL2NUM(curr_pos - length);
265
388
  values[2] = string_sym;
266
389
  break;
267
390
  case object_value:
268
- values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
391
+ values[0] = ULL2NUM(sctx->starts[sctx->current_path_len]);
269
392
  values[2] = object_sym;
270
393
  break;
271
394
  case array_value:
272
- values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
395
+ values[0] = ULL2NUM(sctx->starts[sctx->current_path_len]);
273
396
  values[2] = array_sym;
274
397
  break;
275
398
  }
@@ -288,10 +411,13 @@ VALUE create_path(scan_ctx *sctx)
288
411
  switch (sctx->current_path[i].type)
289
412
  {
290
413
  case PATH_KEY:
291
- entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
414
+ if (sctx->symbolize_path_keys)
415
+ entry = rb_id2sym(rb_intern2(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len));
416
+ else
417
+ entry = rb_str_new(sctx->current_path[i].value.key.val, sctx->current_path[i].value.key.len);
292
418
  break;
293
419
  case PATH_INDEX:
294
- entry = RB_ULONG2NUM(sctx->current_path[i].value.index);
420
+ entry = LONG2NUM(sctx->current_path[i].value.index);
295
421
  break;
296
422
  default:
297
423
  entry = Qnil;
@@ -307,7 +433,7 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
307
433
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
308
434
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
309
435
  // TODO: Might fail in case of no memory
310
- VALUE point = Qundef;
436
+ VALUE point = Qundef, path;
311
437
  int match;
312
438
  for (int i = 0; i < sctx->paths_len; i++)
313
439
  {
@@ -319,6 +445,10 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
319
445
  {
320
446
  switch (sctx->paths[i].elems[j].type)
321
447
  {
448
+ case MATCHER_ANY_KEY:
449
+ if (sctx->current_path[j].type != PATH_KEY)
450
+ match = false;
451
+ break;
322
452
  case MATCHER_KEY:
323
453
  if (sctx->current_path[j].type != PATH_KEY ||
324
454
  sctx->current_path[j].value.key.len != sctx->paths[i].elems[j].value.key.len ||
@@ -344,10 +474,11 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
344
474
  {
345
475
  if (point == Qundef)
346
476
  {
347
- point = create_point(sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
477
+ point = create_point(sctx, type, length);
348
478
  if (sctx->with_path)
349
479
  {
350
- point = rb_ary_new_from_args(2, create_path(sctx), point);
480
+ path = create_path(sctx);
481
+ point = rb_ary_new_from_args(2, path, point);
351
482
  }
352
483
  }
353
484
  // rb_ary_push raises only in case of a frozen array, which is not the case
@@ -411,7 +542,7 @@ int scan_on_start_object(void *ctx)
411
542
  return true;
412
543
  }
413
544
  increment_arr_index(sctx);
414
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
545
+ sctx->starts[sctx->current_path_len] = scan_ctx_get_bytes_consumed(sctx) - 1;
415
546
  if (sctx->current_path_len < sctx->max_path_len)
416
547
  sctx->current_path[sctx->current_path_len].type = PATH_KEY;
417
548
  sctx->current_path_len++;
@@ -451,7 +582,7 @@ int scan_on_start_array(void *ctx)
451
582
  return true;
452
583
  }
453
584
  increment_arr_index(sctx);
454
- sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
585
+ sctx->starts[sctx->current_path_len] = scan_ctx_get_bytes_consumed(sctx) - 1;
455
586
  if (sctx->current_path_len < sctx->max_path_len)
456
587
  {
457
588
  sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
@@ -471,6 +602,107 @@ int scan_on_end_array(void *ctx)
471
602
  return true;
472
603
  }
473
604
 
605
+ void config_free(void *data)
606
+ {
607
+ scan_ctx_free((scan_ctx *)data);
608
+ ruby_xfree(data);
609
+ }
610
+
611
+ size_t config_size(const void *data)
612
+ {
613
+ // see ObjectSpace.memsize_of
614
+ scan_ctx *ctx = (scan_ctx *)data;
615
+ size_t res = sizeof(scan_ctx);
616
+ // current_path
617
+ if (ctx->current_path != NULL)
618
+ res += ctx->max_path_len * sizeof(path_elem_t);
619
+ // starts
620
+ if (ctx->starts != NULL)
621
+ res += ctx->max_path_len * sizeof(size_t);
622
+ if (ctx->paths != NULL)
623
+ {
624
+ res += ctx->paths_len * sizeof(paths_t);
625
+ for (int i = 0; i < ctx->paths_len; i++)
626
+ {
627
+ res += ctx->paths[i].len * sizeof(path_matcher_elem_t);
628
+ }
629
+ }
630
+ return res;
631
+ }
632
+
633
+ static const rb_data_type_t config_type = {
634
+ .wrap_struct_name = "json_scanner_config",
635
+ .function = {
636
+ .dfree = config_free,
637
+ .dsize = config_size,
638
+ },
639
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY,
640
+ };
641
+
642
+ VALUE config_alloc(VALUE self)
643
+ {
644
+ scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
645
+ ctx->paths = NULL;
646
+ ctx->paths_len = 0;
647
+ ctx->current_path = NULL;
648
+ ctx->max_path_len = 0;
649
+ ctx->starts = NULL;
650
+ scan_ctx_reset(ctx, Qundef, false, false);
651
+ return TypedData_Wrap_Struct(self, &config_type, ctx);
652
+ }
653
+
654
+ VALUE config_m_initialize(VALUE self, VALUE path_ary)
655
+ {
656
+ scan_ctx *ctx;
657
+ VALUE scan_ctx_init_err, string_keys;
658
+ TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
659
+ string_keys = rb_ary_new();
660
+ scan_ctx_init_err = scan_ctx_init(ctx, path_ary, string_keys);
661
+ if (scan_ctx_init_err != Qundef)
662
+ {
663
+ rb_exc_raise(scan_ctx_init_err);
664
+ }
665
+ rb_iv_set(self, "string_keys", string_keys);
666
+ return self;
667
+ }
668
+
669
+ VALUE config_m_inspect(VALUE self)
670
+ {
671
+ scan_ctx *ctx;
672
+ VALUE res;
673
+ TypedData_Get_Struct(self, scan_ctx, &config_type, ctx);
674
+ res = rb_sprintf("#<%" PRIsVALUE " [", rb_class_name(CLASS_OF(self)));
675
+ for (int i = 0; ctx->paths && i < ctx->paths_len; i++)
676
+ {
677
+ rb_str_cat_cstr(res, "[");
678
+ for (int j = 0; j < ctx->paths[i].len; j++)
679
+ {
680
+ switch (ctx->paths[i].elems[j].type)
681
+ {
682
+ case MATCHER_KEY:
683
+ rb_str_catf(res, "'%.*s'", (int)ctx->paths[i].elems[j].value.key.len, ctx->paths[i].elems[j].value.key.val);
684
+ break;
685
+ case MATCHER_INDEX:
686
+ rb_str_catf(res, "%ld", ctx->paths[i].elems[j].value.index);
687
+ break;
688
+ case MATCHER_INDEX_RANGE:
689
+ rb_str_catf(res, "(%ld..%ld)", ctx->paths[i].elems[j].value.range.start, ctx->paths[i].elems[j].value.range.end);
690
+ break;
691
+ case MATCHER_ANY_KEY:
692
+ rb_str_cat_cstr(res, "('*'..'*')");
693
+ break;
694
+ }
695
+ if (j < ctx->paths[i].len - 1)
696
+ rb_str_cat_cstr(res, ", ");
697
+ }
698
+ rb_str_cat_cstr(res, "]");
699
+ if (i < ctx->paths_len - 1)
700
+ rb_str_cat_cstr(res, ", ");
701
+ }
702
+ rb_str_cat_cstr(res, "]>");
703
+ return res;
704
+ }
705
+
474
706
  static yajl_callbacks scan_callbacks = {
475
707
  scan_on_null,
476
708
  scan_on_boolean,
@@ -492,15 +724,16 @@ static yajl_callbacks scan_callbacks = {
492
724
  VALUE scan(int argc, VALUE *argv, VALUE self)
493
725
  {
494
726
  VALUE json_str, path_ary, with_path_flag, kwargs;
495
- VALUE kwargs_values[7];
727
+ VALUE kwargs_values[SCAN_KWARGS_SIZE];
496
728
 
497
- int with_path = false, verbose_error = false;
729
+ int with_path = false, verbose_error = false, symbolize_path_keys = false;
498
730
  char *json_text;
499
731
  size_t json_text_len;
500
732
  yajl_handle handle;
501
733
  yajl_status stat;
502
734
  scan_ctx *ctx;
503
- VALUE err = Qnil, result;
735
+ int free_ctx = true;
736
+ VALUE err_msg = Qnil, bytes_consumed, err, result;
504
737
  // Turned out callbacks can't raise exceptions
505
738
  // VALUE callback_err;
506
739
  #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
@@ -512,11 +745,13 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
512
745
  with_path = RTEST(with_path_flag);
513
746
  if (kwargs != Qnil)
514
747
  {
515
- rb_get_kwargs(kwargs, scan_kwargs_table, 0, 7, kwargs_values);
748
+ rb_get_kwargs(kwargs, scan_kwargs_table, 0, SCAN_KWARGS_SIZE, kwargs_values);
516
749
  if (kwargs_values[0] != Qundef)
517
750
  with_path = RTEST(kwargs_values[0]);
518
751
  if (kwargs_values[1] != Qundef)
519
752
  verbose_error = RTEST(kwargs_values[1]);
753
+ if (kwargs_values[7] != Qundef)
754
+ symbolize_path_keys = RTEST(kwargs_values[7]);
520
755
  }
521
756
  rb_check_type(json_str, T_STRING);
522
757
  json_text = RSTRING_PTR(json_str);
@@ -525,7 +760,30 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
525
760
  #else
526
761
  json_text_len = RSTRING_LEN(json_str);
527
762
  #endif
528
- ctx = scan_ctx_init(path_ary, with_path);
763
+ if (rb_obj_is_kind_of(path_ary, rb_cJsonScannerConfig))
764
+ {
765
+ free_ctx = false;
766
+ TypedData_Get_Struct(path_ary, scan_ctx, &config_type, ctx);
767
+ }
768
+ else
769
+ {
770
+ VALUE scan_ctx_init_err;
771
+ ctx = ruby_xmalloc(sizeof(scan_ctx));
772
+ scan_ctx_init_err = scan_ctx_init(ctx, path_ary, Qundef);
773
+ if (scan_ctx_init_err != Qundef)
774
+ {
775
+ ruby_xfree(ctx);
776
+ rb_exc_raise(scan_ctx_init_err);
777
+ }
778
+ }
779
+ // Need to keep a ref to result array on the stack to prevent it from being GC-ed
780
+ result = rb_ary_new_capa(ctx->paths_len);
781
+ for (int i = 0; i < ctx->paths_len; i++)
782
+ {
783
+ rb_ary_push(result, rb_ary_new());
784
+ }
785
+ scan_ctx_reset(ctx, result, with_path, symbolize_path_keys);
786
+ // scan_ctx_debug(ctx);
529
787
 
530
788
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
531
789
  if (kwargs != Qnil) // it's safe to read kwargs_values only if rb_get_kwargs was called
@@ -543,24 +801,55 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
543
801
  }
544
802
  ctx->handle = handle;
545
803
  stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
804
+ scan_ctx_update_bytes_consumed(ctx);
546
805
  if (stat == yajl_status_ok)
806
+ {
547
807
  stat = yajl_complete_parse(handle);
808
+ scan_ctx_update_bytes_consumed(ctx);
809
+ }
548
810
 
549
811
  if (stat != yajl_status_ok)
550
812
  {
551
813
  char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
552
- err = rb_utf8_str_new_cstr(str);
814
+ err_msg = rb_utf8_str_new_cstr(str);
815
+ // TODO: maybe use scan_ctx_get_bytes_consumed here too? But it makes difference in premature EOF
816
+ bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
553
817
  yajl_free_error(handle, (unsigned char *)str);
554
818
  }
819
+ // // Needed when yajl_allow_partial_values is set
820
+ // if (ctx->current_path_len > 0)
821
+ // {
822
+ // if (ctx->current_path_len > ctx->max_path_len)
823
+ // ctx->current_path_len = ctx->max_path_len;
824
+ // for (int i = ctx->current_path_len - 1; i > 0; i--)
825
+ // {
826
+ // switch (ctx->current_path[i].type)
827
+ // {
828
+ // case PATH_KEY:
829
+ // scan_on_end_object(ctx);
830
+ // break;
831
+ // case PATH_INDEX:
832
+ // scan_on_end_array(ctx);
833
+ // break;
834
+ // }
835
+ // }
836
+ // }
555
837
  // callback_err = ctx->rb_err;
556
- result = ctx->points_list;
557
- scan_ctx_free(ctx);
838
+ if (free_ctx)
839
+ {
840
+ // fprintf(stderr, "free_ctx\n");
841
+ scan_ctx_free(ctx);
842
+ ruby_xfree(ctx);
843
+ }
558
844
  yajl_free(handle);
559
- if (err != Qnil)
560
- rb_exc_raise(rb_exc_new_str(rb_eJsonScannerParseError, err));
845
+ if (err_msg != Qnil)
846
+ {
847
+ err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
848
+ rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
849
+ rb_exc_raise(err);
850
+ }
561
851
  // if (callback_err != Qnil)
562
852
  // rb_exc_raise(callback_err);
563
- // TODO: report yajl_get_bytes_consumed(handle)
564
853
  return result;
565
854
  }
566
855
 
@@ -568,8 +857,16 @@ RUBY_FUNC_EXPORTED void
568
857
  Init_json_scanner(void)
569
858
  {
570
859
  rb_mJsonScanner = rb_define_module("JsonScanner");
860
+ rb_cJsonScannerConfig = rb_define_class_under(rb_mJsonScanner, "Config", rb_cObject);
861
+ rb_define_alloc_func(rb_cJsonScannerConfig, config_alloc);
862
+ rb_define_method(rb_cJsonScannerConfig, "initialize", config_m_initialize, 1);
863
+ rb_define_method(rb_cJsonScannerConfig, "inspect", config_m_inspect, 0);
571
864
  rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
865
+ any_key_sym = rb_id2sym(rb_intern("*"));
866
+ rb_define_const(rb_mJsonScanner, "ANY_KEY", rb_range_new(any_key_sym, any_key_sym, false));
572
867
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
868
+ rb_define_attr(rb_eJsonScannerParseError, BYTES_CONSUMED, true, false);
869
+ rb_iv_bytes_consumed = rb_intern("@" BYTES_CONSUMED);
573
870
  rb_define_module_function(rb_mJsonScanner, "scan", scan, -1);
574
871
  null_sym = rb_id2sym(rb_intern("null"));
575
872
  boolean_sym = rb_id2sym(rb_intern("boolean"));
@@ -584,4 +881,5 @@ Init_json_scanner(void)
584
881
  scan_kwargs_table[4] = rb_intern("allow_trailing_garbage");
585
882
  scan_kwargs_table[5] = rb_intern("allow_multiple_values");
586
883
  scan_kwargs_table[6] = rb_intern("allow_partial_values");
884
+ scan_kwargs_table[7] = rb_intern("symbolize_path_keys");
587
885
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.2.0"
4
+ VERSION = "0.3.0"
5
5
  end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake/clean"
4
+ require "rake/extensiontask"
5
+
6
+ module Rake
7
+ class ExtensionTestTask < ExtensionTask
8
+ #
9
+ # The C files to compile.
10
+ #
11
+ attr_accessor :c_spec_files
12
+
13
+ #
14
+ # The folders where includes for the test files are.
15
+ #
16
+ # Default: %w{/usr/include /usr/include/google}
17
+ #
18
+ attr_accessor :test_includes
19
+
20
+ #
21
+ # The libraries to link against.
22
+ #
23
+ # Default: %w{cmockery}
24
+ #
25
+ attr_accessor :test_libraries
26
+
27
+ #
28
+ # The folders where the libraries are
29
+ #
30
+ # Default: %w{/usr/lib}
31
+ #
32
+ attr_accessor :test_lib_folders
33
+
34
+ def initialize(*args, &block)
35
+ super
36
+ @c_spec_files = []
37
+ @test_includes = %w[/usr/include /usr/include/google]
38
+ @test_libraries = %w[cmockery]
39
+ @test_lib_folders = %w[/usr/lib]
40
+ init_test_tasks(
41
+ "#{@tmp_dir}/test", "compile:#{@name}:test",
42
+ "spec:c:#{@name}", "spec:valgrind:#{@name}", "spec:gdb:#{@name}",
43
+ )
44
+ end
45
+
46
+ private
47
+
48
+ def includes
49
+ @includes ||= (@test_includes + [
50
+ ".",
51
+ "../../#{@ext_dir}",
52
+ "/usr/include/ruby-#{RUBY_VERSION}",
53
+ "/usr/include/ruby-#{RUBY_VERSION}/#{RUBY_PLATFORM}",
54
+ ]).map { |l| "-I#{l}" }.join(" ")
55
+ end
56
+
57
+ def libraries
58
+ @libraries ||= (@test_libraries + %w[ruby pthread crypto]).map { |l| "-l#{l}" }.join(" ")
59
+ end
60
+
61
+ def lib_folders
62
+ @lib_folders ||= (@test_lib_folders + %w[/usr/lib .]).map { |l| "-L#{l}" }.join(" ")
63
+ end
64
+
65
+ def compile_tests
66
+ # compile the test sources
67
+ FileList["*.c"].each do |cfile|
68
+ sh "gcc -g #{includes} -c #{cfile}"
69
+ end
70
+
71
+ source_objects = FileList["../#{RUBY_PLATFORM}/#{@name}/#{RUBY_VERSION}/*.o"]
72
+ # link the executables
73
+ FileList["*.o"].each do |ofile|
74
+ sh "gcc -g #{lib_folders} #{libraries} #{source_objects} #{ofile} -o #{ofile.ext}"
75
+ end
76
+ end
77
+
78
+ def init_compile_task(compile_dir, compile_task)
79
+ directory compile_dir
80
+ desc "Compile #{@name} tests"
81
+ task compile_task => ["compile:#{@name}", compile_dir] do
82
+ # copy the test files into the compilation folder
83
+ @c_spec_files.each { |file| cp file, compile_dir }
84
+
85
+ # start compilation
86
+ chdir(compile_dir) { compile_tests }
87
+ end
88
+ end
89
+
90
+ def init_valgrind_task(compile_dir, compile_task, valgrind_task)
91
+ desc "Execute valgrind for a #{@name} test"
92
+ task valgrind_task => [compile_task] do |_t, args|
93
+ sh "valgrind --num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no " \
94
+ "--leak-check=full #{compile_dir}/#{args.test}"
95
+ end
96
+ end
97
+
98
+ def init_gdb_task(compile_dir, compile_task, gdb_task)
99
+ desc "Execute gdb for a #{@name} test"
100
+ task gdb_task => [compile_task] do |_t, args|
101
+ sh "gdb #{compile_dir}/#{args.test}"
102
+ end
103
+ end
104
+
105
+ def init_test_task(compile_dir, compile_task, test_task)
106
+ desc "Test #{@name}"
107
+ task test_task => [compile_task] do |_t, args|
108
+ if args.test
109
+ sh "#{compile_dir}/#{args.test}"
110
+ else
111
+ FileList["#{compile_dir}/*.o"].each do |ofile|
112
+ sh ofile.ext.to_s
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ def init_test_tasks(compile_dir, compile_task, test_task, valgrind_task, gdb_task)
119
+ init_compile_task(compile_dir, compile_task)
120
+ init_valgrind_task(compile_dir, compile_task, valgrind_task)
121
+ init_gdb_task(compile_dir, compile_task, gdb_task)
122
+ init_test_task(compile_dir, compile_task, test_task)
123
+
124
+ desc "Test all C extensions"
125
+ task "spec:c" => [test_task]
126
+ end
127
+ end
128
+ end
File without changes
@@ -22,6 +22,34 @@ RSpec.describe JsonScanner do
22
22
  )
23
23
  end
24
24
 
25
+ it "supports 'symbolize_path_keys'" do
26
+ expect(
27
+ described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true),
28
+ ).to eq([[[%w[a b], [12, 13, :number]]]])
29
+ expect(
30
+ described_class.scan('{"a": {"b": 1}}', [[:a, "b"]], with_path: true, symbolize_path_keys: true),
31
+ ).to eq([[[%i[a b], [12, 13, :number]]]])
32
+ end
33
+
34
+ it "supports any key selector" do
35
+ expect(
36
+ described_class.scan(
37
+ '[{"a":1,"b":2},{"c":3,"d":4},[5]]',
38
+ [[described_class::ANY_INDEX, described_class::ANY_KEY]],
39
+ ),
40
+ ).to eq(
41
+ [[[6, 7, :number], [12, 13, :number], [20, 21, :number], [26, 27, :number]]],
42
+ )
43
+ expect(
44
+ described_class.scan(
45
+ '{"a":[1,2],"b":{"c":3}}',
46
+ [[described_class::ANY_KEY, described_class::ANY_INDEX]],
47
+ ),
48
+ ).to eq(
49
+ [[[6, 7, :number], [8, 9, :number]]],
50
+ )
51
+ end
52
+
25
53
  it "works with max path len correctly" do
26
54
  expect(
27
55
  described_class.scan('{"a": [1]}', [[], ["a"]]),
@@ -90,11 +118,24 @@ RSpec.describe JsonScanner do
90
118
  expect do
91
119
  described_class.scan "{1}", [], verbose_error: true
92
120
  end.to raise_error described_class::ParseError, /invalid object key(?=.*\(right here\))/m
121
+ expect do
122
+ described_class.scan("[0, 42,", [[(1..-1)]], verbose_error: true)
123
+ end.to raise_error described_class::ParseError, /parse error: premature EOF.*\[0, 42,.*\(right here\) ------\^/m
124
+ end
125
+
126
+ it "includes bytes consumed in the exception" do
127
+ expect do
128
+ described_class.scan("[[1,2],,[3,4]]", [])
129
+ end.to(
130
+ raise_error(described_class::ParseError) do |exc|
131
+ expect(exc.bytes_consumed).to eq(8)
132
+ end,
133
+ )
93
134
  end
94
135
 
95
136
  it "allows to return an actual path to the element" do
96
137
  with_path_expected_res = [
97
- # result for first mathcer, each element array of two items:
138
+ # result for first matcher, each element array of two items:
98
139
  # array of path elements and 3-element array start,end,type
99
140
  [[[0], [1, 6, :array]], [[1], [7, 12, :array]]],
100
141
  [
@@ -128,7 +169,7 @@ RSpec.describe JsonScanner do
128
169
  ),
129
170
  ).to eq(
130
171
  [
131
- # result for first mathcer, each element 3-element array start,end,type
172
+ # result for first matcher, each element 3-element array start,end,type
132
173
  [[1, 6, :array], [7, 12, :array]],
133
174
  [
134
175
  [2, 3, :number], [4, 5, :number],
@@ -190,4 +231,122 @@ RSpec.describe JsonScanner do
190
231
  described_class.scan(json, [[]])
191
232
  end.to raise_error(described_class::ParseError)
192
233
  end
234
+
235
+ context "with yajl params" do
236
+ it "supports 'allow_comments'" do
237
+ params = ["[0, /* answer */ 42, 0]", [[(1..-1)]]]
238
+ expect(described_class.scan(*params, allow_comments: true)).to eq(
239
+ [[[17, 19, :number], [21, 22, :number]]],
240
+ )
241
+ expect do
242
+ described_class.scan(*params)
243
+ end.to raise_error(described_class::ParseError)
244
+ end
245
+
246
+ it "supports 'dont_validate_strings'" do
247
+ params = ["\"\x81\x83\"", [[]]]
248
+ expect(described_class.scan(*params, dont_validate_strings: true)).to eq(
249
+ [[[0, 4, :string]]],
250
+ )
251
+ expect do
252
+ described_class.scan(*params)
253
+ end.to raise_error(described_class::ParseError)
254
+ params = ["{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]]]
255
+ expect(described_class.scan(*params, dont_validate_strings: true, with_path: true)).to eq(
256
+ [[[["\x81\x83".dup.force_encoding(Encoding::BINARY)], [7, 9, :number]]]],
257
+ )
258
+ expect do
259
+ described_class.scan(*params, with_path: true)
260
+ end.to raise_error(described_class::ParseError)
261
+ end
262
+
263
+ it "supports 'allow_trailing_garbage'" do
264
+ params = ["[0, 42, 0]garbage", [[(1..-1)]]]
265
+ expect(described_class.scan(*params, allow_trailing_garbage: true)).to eq(
266
+ [[[4, 6, :number], [8, 9, :number]]],
267
+ )
268
+ expect do
269
+ described_class.scan(*params)
270
+ end.to raise_error(described_class::ParseError)
271
+ end
272
+
273
+ it "supports 'allow_multiple_values'" do
274
+ params = ["[0, 42, 0] [0, 34]", [[(1..-1)]]]
275
+ expect(described_class.scan(*params, allow_multiple_values: true)).to eq(
276
+ [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]],
277
+ )
278
+ expect do
279
+ described_class.scan(*params)
280
+ end.to raise_error(described_class::ParseError)
281
+ end
282
+
283
+ it "handles multiple top-level values correctly with 'allow_multiple_values'" do
284
+ expect(described_class.scan("[0, 42, 0] [0, 34]", [[]], allow_multiple_values: true)).to eq(
285
+ [[[0, 10, :array], [12, 19, :array]]],
286
+ )
287
+ expect(described_class.scan('{"42": 34} [0, 34]', [[]], allow_multiple_values: true)).to eq(
288
+ [[[0, 10, :object], [12, 19, :array]]],
289
+ )
290
+ expect(described_class.scan('[0, 42, 0] {"42": 34}', [[]], allow_multiple_values: true)).to eq(
291
+ [[[0, 10, :array], [12, 22, :object]]],
292
+ )
293
+ expect(described_class.scan('{"42": 34} {"0": 34}', [[]], allow_multiple_values: true)).to eq(
294
+ [[[0, 10, :object], [12, 21, :object]]],
295
+ )
296
+ end
297
+
298
+ it "supports 'allow_partial_values'" do
299
+ params = ["[0, 42, 0,", [[(1..-1)]]]
300
+ expect(described_class.scan(*params, allow_partial_values: true)).to eq(
301
+ [[[4, 6, :number], [8, 9, :number]]],
302
+ )
303
+ expect do
304
+ described_class.scan(*params)
305
+ end.to raise_error(described_class::ParseError)
306
+ expect(described_class.scan("[0, 42, 0", [[(1..-1)]], allow_partial_values: true)).to eq(
307
+ [[[4, 6, :number], [8, 9, :number]]],
308
+ )
309
+ expect(described_class.scan("[0, 42, true", [[(1..-1)]], allow_partial_values: true)).to eq(
310
+ [[[4, 6, :number], [8, 12, :boolean]]],
311
+ )
312
+ end
313
+ end
314
+
315
+ describe described_class::Config do
316
+ it "saves state" do
317
+ key = "abracadabra".dup
318
+ conf = described_class.new [[], [key]]
319
+ key["cad"] = 0.chr
320
+ key = nil # rubocop:disable Lint/UselessAssignment
321
+ GC.start
322
+ expect(
323
+ 10.times.map do
324
+ JsonScanner.scan '{"abracadabra": 10}', conf, with_path: true
325
+ end.uniq,
326
+ ).to eq([[[[[], [0, 19, :object]]], [[["abracadabra"], [16, 18, :number]]]]])
327
+ expect(
328
+ 10.times.map do
329
+ JsonScanner.scan '{"abracadabra": 10}', conf
330
+ end.uniq,
331
+ ).to eq([[[[0, 19, :object]], [[16, 18, :number]]]])
332
+ end
333
+
334
+ it "re-raises exceptions" do
335
+ expect do
336
+ described_class.new [[(0...-1)]]
337
+ end.to raise_error ArgumentError
338
+ expect do
339
+ described_class.new [[(0..-2)]]
340
+ end.to raise_error ArgumentError
341
+ expect do
342
+ described_class.new [[(-42..1)]]
343
+ end.to raise_error ArgumentError
344
+ end
345
+
346
+ it "supports inspect" do
347
+ expect(
348
+ described_class.new([[], ["abracadabra", JsonScanner::ANY_INDEX], [42, JsonScanner::ANY_KEY]]).inspect,
349
+ ).to eq("#<JsonScanner::Config [[], ['abracadabra', (0..9223372036854775807)], [42, ('*'..'*')]]>")
350
+ end
351
+ end
193
352
  end
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-27 00:00:00.000000000 Z
11
+ date: 2025-08-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
- description: This gem uses yajl lib to scan a json string and allows you to parse
13
+ description: This gem uses the yajl lib to scan a JSON string and allows you to parse
14
14
  pieces of it
15
15
  email:
16
16
  - uvlad7@gmail.com
@@ -26,6 +26,8 @@ files:
26
26
  - lib/json_scanner.rb
27
27
  - lib/json_scanner/version.rb
28
28
  - sig/json_scanner.rbs
29
+ - spec/extensiontesttask.rb
30
+ - spec/json_scanner_spec.c
29
31
  - spec/json_scanner_spec.rb
30
32
  - spec/spec_helper.rb
31
33
  homepage: https://github.com/uvlad7/json_scanner