json_scanner 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d0af7c4c2fce9ca74ec96c00e8972088b34c99a005b8fc966d1a0e9ae7d75dcb
4
+ data.tar.gz: 70f2365add4838ef7409d3ff9568ab59d1a3771ac38d6168757bf8de1de71b1d
5
+ SHA512:
6
+ metadata.gz: 96958c94108fafca33f68f091dcea150e549e1fa61c02aaf62790f0d2f77c8762abe7b702a4cf95b9d4e28929a3dd1ce77681bb7cc0f6e7e8fdd22d32f74f378
7
+ data.tar.gz: 3da8a8713b1f1994d50ee3032d450b5d34070c843e7c2625db1f2945c5a6c1cdc223d9b957c0386562ea1fdf68d2d03c6fc985ca2f0ddda8a7f3b83ff2c19b36
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ [![Tests](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml/badge.svg)](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml)
2
+
3
+ # JsonScanner
4
+
5
+ Extract values from JSON without full parsing. This gem uses yajl lib to scan a json string and allows you to parse pieces of it.
6
+
7
+ ## Installation
8
+
9
+ Install the gem and add to the application's Gemfile by executing:
10
+
11
+ $ bundle add json_scanner
12
+
13
+ If bundler is not being used to manage dependencies, install the gem by executing:
14
+
15
+ $ gem install json_scanner
16
+
17
+ ## Usage
18
+
19
+ ```ruby
20
+ require "json"
21
+ require "json_scanner"
22
+
23
+ large_json = "[#{"4," * 100_000}42#{",2" * 100_000}]"
24
+ where_is_42 = JsonScanner.scan(large_json, [[100_000]], false).first
25
+ # => [[200001, 200003, :number]]
26
+ where_is_42.map do |begin_pos, end_pos, _type|
27
+ JSON.parse(large_json.byteslice(begin_pos...end_pos), quirks_mode: true)
28
+ end
29
+ # => [42]
30
+
31
+ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
+ begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
+ emoji_json.byteslice(begin_pos...end_pos)
34
+ # => "\"😍\""
35
+ emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
36
+ # => "\"😍\""
37
+ ```
38
+
39
+ ## Development
40
+
41
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
42
+
43
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
44
+
45
+ ## Contributing
46
+
47
+ Bug reports and pull requests are welcome on GitHub at [github](https://github.com/uvlad7/json_scanner).
48
+
49
+ ## License
50
+
51
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ # Makes all symbols private by default to avoid unintended conflict
6
+ # with other gems. To explicitly export symbols you can use RUBY_FUNC_EXPORTED
7
+ # selectively, or entirely remove this flag.
8
+ append_cflags("-fvisibility=hidden")
9
+
10
+ dir_config("yajl", "", "")
11
+
12
+ unless have_library("yajl") && have_header("yajl/yajl_parse.h") && have_header("yajl/yajl_gen.h")
13
+ abort "yajl library not found"
14
+ end
15
+
16
+ create_makefile("json_scanner/json_scanner")
@@ -0,0 +1,512 @@
1
+ #include "json_scanner.h"
2
+
3
+ VALUE rb_mJsonScanner;
4
+ VALUE rb_mJsonScannerOptions;
5
+ VALUE rb_eJsonScannerParseError;
6
+
7
+ VALUE null_sym;
8
+ VALUE boolean_sym;
9
+ VALUE number_sym;
10
+ VALUE string_sym;
11
+ VALUE object_sym;
12
+ VALUE array_sym;
13
+
14
+ enum matcher_type
15
+ {
16
+ MATCHER_KEY,
17
+ MATCHER_INDEX,
18
+ // MATCHER_ANY_KEY,
19
+ // MATCHER_ANY_INDEX,
20
+ MATCHER_INDEX_RANGE,
21
+ // MATCHER_KEYS_LIST,
22
+ };
23
+
24
+ enum path_type
25
+ {
26
+ PATH_KEY,
27
+ PATH_INDEX,
28
+ };
29
+
30
+ typedef struct
31
+ {
32
+ const char *val;
33
+ size_t len;
34
+ } hashkey_t;
35
+
36
+ typedef struct
37
+ {
38
+ long start;
39
+ long end;
40
+ } range_t;
41
+
42
+ typedef struct
43
+ {
44
+ enum matcher_type type;
45
+ union
46
+ {
47
+ hashkey_t key;
48
+ long index;
49
+ range_t range;
50
+ } value;
51
+ } path_matcher_elem_t;
52
+
53
+ typedef struct
54
+ {
55
+ enum path_type type;
56
+ union
57
+ {
58
+ hashkey_t key;
59
+ long index;
60
+ } value;
61
+ } path_elem_t;
62
+
63
+ typedef struct
64
+ {
65
+ path_matcher_elem_t *elems;
66
+ int len;
67
+ int matched_depth;
68
+ } paths_t;
69
+
70
+ typedef struct
71
+ {
72
+ int with_path;
73
+ paths_t *paths;
74
+ int paths_len;
75
+ path_elem_t *current_path;
76
+ int current_path_len;
77
+ int max_path_len;
78
+ // Easier to use a Ruby array for result than convert later
79
+ VALUE points_list;
80
+ // by depth
81
+ size_t *starts;
82
+ // VALUE rb_err;
83
+ yajl_handle handle;
84
+ } scan_ctx;
85
+
86
+ // FIXME: This will cause memory leak if ruby_xmalloc raises
87
+ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
88
+ {
89
+ // TODO: Allow to_ary and sized enumerables
90
+ rb_check_type(path_ary, T_ARRAY);
91
+ int path_ary_len = rb_long2int(rb_array_len(path_ary));
92
+ // Check types early before any allocations, so exception is ok
93
+ // TODO: Fix this, just handle errors
94
+ for (int i = 0; i < path_ary_len; i++)
95
+ {
96
+ VALUE path = rb_ary_entry(path_ary, i);
97
+ rb_check_type(path, T_ARRAY);
98
+ int path_len = rb_long2int(rb_array_len(path));
99
+ for (int j = 0; j < path_len; j++)
100
+ {
101
+ VALUE entry = rb_ary_entry(path, j);
102
+ int type = TYPE(entry);
103
+ if (type == T_STRING)
104
+ {
105
+ #if LONG_MAX > SIZE_MAX
106
+ RSTRING_LENINT(entry);
107
+ #endif
108
+ }
109
+ else if (type == T_FIXNUM || type == T_BIGNUM)
110
+ {
111
+ RB_NUM2LONG(entry);
112
+ }
113
+ else
114
+ {
115
+ VALUE range_beg, range_end;
116
+ int open_ended;
117
+ if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
118
+ rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
119
+ RB_NUM2LONG(range_beg);
120
+ RB_NUM2LONG(range_end);
121
+ }
122
+ }
123
+ }
124
+
125
+ scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
126
+
127
+ ctx->with_path = RB_TEST(with_path);
128
+ ctx->max_path_len = 0;
129
+
130
+ paths_t *paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
131
+ for (int i = 0; i < path_ary_len; i++)
132
+ {
133
+ VALUE path = rb_ary_entry(path_ary, i);
134
+ int path_len = rb_long2int(rb_array_len(path));
135
+ if (path_len > ctx->max_path_len)
136
+ ctx->max_path_len = path_len;
137
+ paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
138
+ for (int j = 0; j < path_len; j++)
139
+ {
140
+ VALUE entry = rb_ary_entry(path, j);
141
+ int type = TYPE(entry);
142
+ if (type == T_STRING)
143
+ {
144
+ paths[i].elems[j].type = MATCHER_KEY;
145
+ paths[i].elems[j].value.key.val = RSTRING_PTR(entry);
146
+ #if LONG_MAX > SIZE_MAX
147
+ paths[i].elems[j].value.key.len = RSTRING_LENINT(entry);
148
+ #else
149
+ paths[i].elems[j].value.key.len = RSTRING_LEN(entry);
150
+ #endif
151
+ }
152
+ else if (type == T_FIXNUM || type == T_BIGNUM)
153
+ {
154
+ paths[i].elems[j].type = MATCHER_INDEX;
155
+ paths[i].elems[j].value.index = FIX2LONG(entry);
156
+ }
157
+ else
158
+ {
159
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
160
+ VALUE range_beg, range_end;
161
+ int open_ended;
162
+ rb_range_values(entry, &range_beg, &range_end, &open_ended);
163
+ paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
164
+ paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
165
+ if (open_ended)
166
+ paths[i].elems[j].value.range.end--;
167
+ }
168
+ }
169
+ paths[i].len = path_len;
170
+ paths[i].matched_depth = 0;
171
+ }
172
+
173
+ ctx->paths = paths;
174
+ ctx->paths_len = path_ary_len;
175
+ ctx->current_path = ruby_xmalloc2(sizeof(path_elem_t), ctx->max_path_len);
176
+
177
+ ctx->current_path_len = 0;
178
+ ctx->points_list = rb_ary_new_capa(path_ary_len);
179
+ for (int i = 0; i < path_ary_len; i++)
180
+ {
181
+ rb_ary_push(ctx->points_list, rb_ary_new());
182
+ }
183
+
184
+ ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
185
+ // ctx->rb_err = Qnil;
186
+ ctx->handle = NULL;
187
+
188
+ return ctx;
189
+ }
190
+
191
+ void scan_ctx_free(scan_ctx *ctx)
192
+ {
193
+ if (!ctx)
194
+ return;
195
+ ruby_xfree(ctx->starts);
196
+ ruby_xfree(ctx->current_path);
197
+ for (int i = 0; i < ctx->paths_len; i++)
198
+ {
199
+ ruby_xfree(ctx->paths[i].elems);
200
+ }
201
+ ruby_xfree(ctx->paths);
202
+ ruby_xfree(ctx);
203
+ }
204
+
205
+ // noexcept
206
+ inline void increment_arr_index(scan_ctx *sctx)
207
+ {
208
+ // remember - any value can be root
209
+ // TODO: Maybe make current_path_len 1 shorter and get rid of -1; need to change all compares
210
+ if (sctx->current_path_len && sctx->current_path[sctx->current_path_len - 1].type == PATH_INDEX)
211
+ {
212
+ sctx->current_path[sctx->current_path_len - 1].value.index++;
213
+ }
214
+ }
215
+
216
+ typedef enum
217
+ {
218
+ null_value,
219
+ boolean_value,
220
+ number_value,
221
+ string_value,
222
+ object_value,
223
+ array_value,
224
+ } value_type;
225
+
226
+ // noexcept
227
+ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
228
+ {
229
+ *point = rb_ary_new_capa(3);
230
+ VALUE values[3];
231
+ // noexcept
232
+ values[1] = RB_ULONG2NUM(curr_pos);
233
+ switch (type)
234
+ {
235
+ // FIXME: size_t can be longer than ulong
236
+ case null_value:
237
+ values[0] = RB_ULONG2NUM(curr_pos - length);
238
+ values[2] = null_sym;
239
+ break;
240
+ case boolean_value:
241
+ values[0] = RB_ULONG2NUM(curr_pos - length);
242
+ values[2] = boolean_sym;
243
+ break;
244
+ case number_value:
245
+ values[0] = RB_ULONG2NUM(curr_pos - length);
246
+ values[2] = number_sym;
247
+ break;
248
+ case string_value:
249
+ values[0] = RB_ULONG2NUM(curr_pos - length);
250
+ values[2] = string_sym;
251
+ break;
252
+ case object_value:
253
+ values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
254
+ values[2] = object_sym;
255
+ break;
256
+ case array_value:
257
+ values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
258
+ values[2] = array_sym;
259
+ break;
260
+ }
261
+ // rb_ary_cat raise only in case of a frozen array or if len is too long
262
+ rb_ary_cat(*point, values, 3);
263
+ }
264
+
265
+ // noexcept
266
+ void save_point(scan_ctx *sctx, value_type type, size_t length)
267
+ {
268
+ // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
269
+ // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
270
+ VALUE point = Qundef;
271
+ for (int i = 0; i < sctx->paths_len; i++)
272
+ {
273
+ if (sctx->paths[i].len != sctx->current_path_len)
274
+ continue;
275
+
276
+ int match = true;
277
+ for (int j = 0; j < sctx->current_path_len; j++)
278
+ {
279
+ switch (sctx->paths[i].elems[j].type)
280
+ {
281
+ case MATCHER_KEY:
282
+ if (sctx->current_path[j].type != PATH_KEY ||
283
+ sctx->current_path[j].value.key.len != sctx->paths[i].elems[j].value.key.len ||
284
+ strncmp(sctx->current_path[j].value.key.val, sctx->paths[i].elems[j].value.key.val, sctx->current_path[j].value.key.len))
285
+ match = false;
286
+ break;
287
+ case MATCHER_INDEX:
288
+ if (sctx->current_path[j].type != PATH_INDEX ||
289
+ sctx->current_path[j].value.index != sctx->paths[i].elems[j].value.index)
290
+ match = false;
291
+ break;
292
+ case MATCHER_INDEX_RANGE:
293
+ if (sctx->current_path[j].type != PATH_INDEX ||
294
+ sctx->current_path[j].value.index < sctx->paths[i].elems[j].value.range.start ||
295
+ sctx->current_path[j].value.index > sctx->paths[i].elems[j].value.range.end)
296
+ match = false;
297
+ break;
298
+ }
299
+ if (!match)
300
+ break;
301
+ }
302
+ if (match)
303
+ {
304
+ if (point == Qundef)
305
+ {
306
+ create_point(&point, sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
307
+ }
308
+ // rb_ary_push raises only in case of a frozen array, which is not the case
309
+ // rb_ary_entry is safe
310
+ rb_ary_push(rb_ary_entry(sctx->points_list, i), point);
311
+ }
312
+ }
313
+ }
314
+
315
+ // noexcept
316
+ int scan_on_null(void *ctx)
317
+ {
318
+ scan_ctx *sctx = (scan_ctx *)ctx;
319
+ if (sctx->current_path_len > sctx->max_path_len)
320
+ return true;
321
+ increment_arr_index(sctx);
322
+ save_point(sctx, null_value, 4);
323
+ return true;
324
+ }
325
+
326
+ // noexcept
327
+ int scan_on_boolean(void *ctx, int bool_val)
328
+ {
329
+ scan_ctx *sctx = (scan_ctx *)ctx;
330
+ if (sctx->current_path_len > sctx->max_path_len)
331
+ return true;
332
+ increment_arr_index(sctx);
333
+ save_point(sctx, boolean_value, bool_val ? 4 : 5);
334
+ return true;
335
+ }
336
+
337
+ // noexcept
338
+ int scan_on_number(void *ctx, const char *val, size_t len)
339
+ {
340
+ scan_ctx *sctx = (scan_ctx *)ctx;
341
+ if (sctx->current_path_len > sctx->max_path_len)
342
+ return true;
343
+ increment_arr_index(sctx);
344
+ save_point(sctx, number_value, len);
345
+ return true;
346
+ }
347
+
348
+ // noexcept
349
+ int scan_on_string(void *ctx, const unsigned char *val, size_t len)
350
+ {
351
+ scan_ctx *sctx = (scan_ctx *)ctx;
352
+ if (sctx->current_path_len > sctx->max_path_len)
353
+ return true;
354
+ increment_arr_index(sctx);
355
+ save_point(sctx, string_value, len + 2);
356
+ return true;
357
+ }
358
+
359
+ // noexcept
360
+ int scan_on_start_object(void *ctx)
361
+ {
362
+ scan_ctx *sctx = (scan_ctx *)ctx;
363
+ if (sctx->current_path_len > sctx->max_path_len)
364
+ {
365
+ sctx->current_path_len++;
366
+ return true;
367
+ }
368
+ increment_arr_index(sctx);
369
+ if (sctx->current_path_len < sctx->max_path_len)
370
+ {
371
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
372
+ sctx->current_path[sctx->current_path_len].type = PATH_KEY;
373
+ }
374
+ sctx->current_path_len++;
375
+ return true;
376
+ }
377
+
378
+ // noexcept
379
+ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
380
+ {
381
+ scan_ctx *sctx = (scan_ctx *)ctx;
382
+ if (sctx->current_path_len > sctx->max_path_len)
383
+ return true;
384
+ // Can't be called without scan_on_start_object being called before
385
+ // So current_path_len at least 1 and key.type is set to PATH_KEY;
386
+ sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *) key;
387
+ sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
388
+ return true;
389
+ }
390
+
391
+ // noexcept
392
+ int scan_on_end_object(void *ctx)
393
+ {
394
+ scan_ctx *sctx = (scan_ctx *)ctx;
395
+ sctx->current_path_len--;
396
+ if (sctx->current_path_len >= sctx->max_path_len)
397
+ return true;
398
+ save_point(sctx, object_value, 0);
399
+ return true;
400
+ }
401
+
402
+ // noexcept
403
+ int scan_on_start_array(void *ctx)
404
+ {
405
+ scan_ctx *sctx = (scan_ctx *)ctx;
406
+ if (sctx->current_path_len > sctx->max_path_len)
407
+ {
408
+ sctx->current_path_len++;
409
+ return true;
410
+ }
411
+ increment_arr_index(sctx);
412
+ if (sctx->current_path_len < sctx->max_path_len)
413
+ {
414
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
415
+ sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
416
+ sctx->current_path[sctx->current_path_len].value.index = -1;
417
+ }
418
+ sctx->current_path_len++;
419
+ return true;
420
+ }
421
+
422
+ // noexcept
423
+ int scan_on_end_array(void *ctx)
424
+ {
425
+ scan_ctx *sctx = (scan_ctx *)ctx;
426
+ sctx->current_path_len--;
427
+ if (sctx->current_path_len >= sctx->max_path_len)
428
+ return true;
429
+ save_point(sctx, array_value, 0);
430
+ return true;
431
+ }
432
+
433
+ static yajl_callbacks scan_callbacks = {
434
+ scan_on_null,
435
+ scan_on_boolean,
436
+ NULL,
437
+ NULL,
438
+ scan_on_number,
439
+ scan_on_string,
440
+ scan_on_start_object,
441
+ scan_on_key,
442
+ scan_on_end_object,
443
+ scan_on_start_array,
444
+ scan_on_end_array};
445
+
446
+ // TODO: make with_path optional kw: `with_path: false`
447
+ VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
448
+ {
449
+ rb_check_type(json_str, T_STRING);
450
+ char *json_text = RSTRING_PTR(json_str);
451
+ #if LONG_MAX > SIZE_MAX
452
+ size_t json_text_len = RSTRING_LENINT(json_str);
453
+ #else
454
+ size_t json_text_len = RSTRING_LEN(json_str);
455
+ #endif
456
+ yajl_handle handle;
457
+ // TODO
458
+ int opt_verbose_error = 0;
459
+ yajl_status stat;
460
+ scan_ctx *ctx = scan_ctx_init(path_ary, with_path);
461
+ VALUE err = Qnil;
462
+ VALUE result;
463
+ // Turned out callbacks can't raise exceptions
464
+ // VALUE callback_err;
465
+
466
+ handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
467
+ ctx->handle = handle;
468
+ // TODO: make it configurable
469
+ // yajl_config(handle, yajl_allow_comments, true);
470
+ // yajl_config(handle, yajl_allow_trailing_garbage, true);
471
+ stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
472
+ if (stat == yajl_status_ok)
473
+ stat = yajl_complete_parse(handle);
474
+
475
+ if (stat != yajl_status_ok)
476
+ {
477
+ char *str = (char *) yajl_get_error(handle, opt_verbose_error, (unsigned char *) json_text, json_text_len);
478
+ err = rb_str_new_cstr(str);
479
+ yajl_free_error(handle, (unsigned char *) str);
480
+ }
481
+ // callback_err = ctx->rb_err;
482
+ result = ctx->points_list;
483
+ scan_ctx_free(ctx);
484
+ yajl_free(handle);
485
+ if (err != Qnil)
486
+ rb_exc_raise(rb_exc_new_str(rb_eJsonScannerParseError, err));
487
+ // if (callback_err != Qnil)
488
+ // rb_exc_raise(callback_err);
489
+ // TODO: report yajl_get_bytes_consumed(handle)
490
+ return result;
491
+ }
492
+
493
+ RUBY_FUNC_EXPORTED void
494
+ Init_json_scanner(void)
495
+ {
496
+ rb_mJsonScanner = rb_define_module("JsonScanner");
497
+ rb_define_const(rb_mJsonScanner, "ALL", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
498
+ rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
499
+ rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
500
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
501
+ rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
502
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
503
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
504
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
505
+ rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
506
+ null_sym = rb_id2sym(rb_intern("null"));
507
+ boolean_sym = rb_id2sym(rb_intern("boolean"));
508
+ number_sym = rb_id2sym(rb_intern("number"));
509
+ string_sym = rb_id2sym(rb_intern("string"));
510
+ object_sym = rb_id2sym(rb_intern("object"));
511
+ array_sym = rb_id2sym(rb_intern("array"));
512
+ }
@@ -0,0 +1,12 @@
1
+ #ifndef JSON_SCANNER_H
2
+ #define JSON_SCANNER_H 1
3
+
4
+ #include "ruby.h"
5
+ #include "ruby/intern.h"
6
+ #include <yajl/yajl_parse.h>
7
+ #include <yajl/yajl_gen.h>
8
+
9
+ #define true 1
10
+ #define false 0
11
+
12
+ #endif /* JSON_SCANNER_H */
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JsonScanner
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "json_scanner/version"
4
+ require_relative "json_scanner/json_scanner"
5
+
6
+ module JsonScanner
7
+ class Error < StandardError; end
8
+ # Your code goes here...
9
+ end
@@ -0,0 +1,4 @@
1
+ module JsonScanner
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "spec_helper"
4
+
5
+ RSpec.describe JsonScanner do
6
+ it "has a version number" do
7
+ expect(described_class::VERSION).not_to be nil
8
+ end
9
+
10
+ it "scans json" do
11
+ result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []], false)
12
+ expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
13
+ expect(described_class.scan('"2"', [[]], false)).to eq([[[0, 3, :string]]])
14
+ expect(
15
+ described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]], false)
16
+ ).to eq(
17
+ [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
18
+ )
19
+ expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
+ [[[6, 7, :number]], [[0, 8, :object]]]
21
+ )
22
+ expect do
23
+ begin
24
+ GC.stress = true
25
+ # TODO: investigate
26
+ # got "munmap_chunk(): invalid pointer" in in console once after
27
+ # JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
28
+ # (last arg wasn't handled at the time)
29
+ # but I don't think it's a problem of tht extension or libyajl, it happened at exit and I free everything before
30
+ # `JsonScanner.scan` returns
31
+ described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]], false
32
+ ensure
33
+ GC.stress = false
34
+ end
35
+ end.to raise_error described_class::ParseError
36
+ end
37
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json_scanner"
4
+
5
+ RSpec.configure do |config|
6
+ # Enable flags like --only-failures and --next-failure
7
+ config.example_status_persistence_file_path = ".rspec_status"
8
+
9
+ # Disable RSpec exposing methods globally on `Module` and `main`
10
+ config.disable_monkey_patching!
11
+
12
+ config.expect_with :rspec do |c|
13
+ c.syntax = :expect
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json_scanner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - uvlad7
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-12-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This gem uses yajl lib to scan a json string and allows you to parse
14
+ pieces of it
15
+ email:
16
+ - uvlad7@gmail.com
17
+ executables: []
18
+ extensions:
19
+ - ext/json_scanner/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - README.md
23
+ - ext/json_scanner/extconf.rb
24
+ - ext/json_scanner/json_scanner.c
25
+ - ext/json_scanner/json_scanner.h
26
+ - lib/json_scanner.rb
27
+ - lib/json_scanner/version.rb
28
+ - sig/json_scanner.rbs
29
+ - spec/json_scanner_spec.rb
30
+ - spec/spec_helper.rb
31
+ homepage: https://github.com/uvlad7/json_scanner
32
+ licenses:
33
+ - MIT
34
+ metadata:
35
+ homepage_uri: https://github.com/uvlad7/json_scanner
36
+ source_code_uri: https://github.com/uvlad7/json_scanner
37
+ changelog_uri: https://github.com/uvlad7/json_scanner/CHANGELOG.md
38
+ rubygems_mfa_required: 'true'
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.3.8
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements:
54
+ - libyajl2, v2.1
55
+ - libyajl-dev, v2.1
56
+ rubygems_version: 3.5.7
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Extract values from JSON without full parsing
60
+ test_files: []