json_scanner 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d0af7c4c2fce9ca74ec96c00e8972088b34c99a005b8fc966d1a0e9ae7d75dcb
4
+ data.tar.gz: 70f2365add4838ef7409d3ff9568ab59d1a3771ac38d6168757bf8de1de71b1d
5
+ SHA512:
6
+ metadata.gz: 96958c94108fafca33f68f091dcea150e549e1fa61c02aaf62790f0d2f77c8762abe7b702a4cf95b9d4e28929a3dd1ce77681bb7cc0f6e7e8fdd22d32f74f378
7
+ data.tar.gz: 3da8a8713b1f1994d50ee3032d450b5d34070c843e7c2625db1f2945c5a6c1cdc223d9b957c0386562ea1fdf68d2d03c6fc985ca2f0ddda8a7f3b83ff2c19b36
data/README.md ADDED
@@ -0,0 +1,51 @@
1
+ [![Tests](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml/badge.svg)](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml)
2
+
3
+ # JsonScanner
4
+
5
+ Extract values from JSON without full parsing. This gem uses yajl lib to scan a json string and allows you to parse pieces of it.
6
+
7
+ ## Installation
8
+
9
+ Install the gem and add to the application's Gemfile by executing:
10
+
11
+ $ bundle add json_scanner
12
+
13
+ If bundler is not being used to manage dependencies, install the gem by executing:
14
+
15
+ $ gem install json_scanner
16
+
17
+ ## Usage
18
+
19
+ ```ruby
20
+ require "json"
21
+ require "json_scanner"
22
+
23
+ large_json = "[#{"4," * 100_000}42#{",2" * 100_000}]"
24
+ where_is_42 = JsonScanner.scan(large_json, [[100_000]], false).first
25
+ # => [[200001, 200003, :number]]
26
+ where_is_42.map do |begin_pos, end_pos, _type|
27
+ JSON.parse(large_json.byteslice(begin_pos...end_pos), quirks_mode: true)
28
+ end
29
+ # => [42]
30
+
31
+ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
+ begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
+ emoji_json.byteslice(begin_pos...end_pos)
34
+ # => "\"😍\""
35
+ emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
36
+ # => "\"😍\""
37
+ ```
38
+
39
+ ## Development
40
+
41
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
42
+
43
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
44
+
45
+ ## Contributing
46
+
47
+ Bug reports and pull requests are welcome on GitHub at [github](https://github.com/uvlad7/json_scanner).
48
+
49
+ ## License
50
+
51
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mkmf"
4
+
5
+ # Makes all symbols private by default to avoid unintended conflict
6
+ # with other gems. To explicitly export symbols you can use RUBY_FUNC_EXPORTED
7
+ # selectively, or entirely remove this flag.
8
+ append_cflags("-fvisibility=hidden")
9
+
10
+ dir_config("yajl", "", "")
11
+
12
+ unless have_library("yajl") && have_header("yajl/yajl_parse.h") && have_header("yajl/yajl_gen.h")
13
+ abort "yajl library not found"
14
+ end
15
+
16
+ create_makefile("json_scanner/json_scanner")
@@ -0,0 +1,512 @@
1
+ #include "json_scanner.h"
2
+
3
+ VALUE rb_mJsonScanner;
4
+ VALUE rb_mJsonScannerOptions;
5
+ VALUE rb_eJsonScannerParseError;
6
+
7
+ VALUE null_sym;
8
+ VALUE boolean_sym;
9
+ VALUE number_sym;
10
+ VALUE string_sym;
11
+ VALUE object_sym;
12
+ VALUE array_sym;
13
+
14
+ enum matcher_type
15
+ {
16
+ MATCHER_KEY,
17
+ MATCHER_INDEX,
18
+ // MATCHER_ANY_KEY,
19
+ // MATCHER_ANY_INDEX,
20
+ MATCHER_INDEX_RANGE,
21
+ // MATCHER_KEYS_LIST,
22
+ };
23
+
24
+ enum path_type
25
+ {
26
+ PATH_KEY,
27
+ PATH_INDEX,
28
+ };
29
+
30
+ typedef struct
31
+ {
32
+ const char *val;
33
+ size_t len;
34
+ } hashkey_t;
35
+
36
+ typedef struct
37
+ {
38
+ long start;
39
+ long end;
40
+ } range_t;
41
+
42
+ typedef struct
43
+ {
44
+ enum matcher_type type;
45
+ union
46
+ {
47
+ hashkey_t key;
48
+ long index;
49
+ range_t range;
50
+ } value;
51
+ } path_matcher_elem_t;
52
+
53
+ typedef struct
54
+ {
55
+ enum path_type type;
56
+ union
57
+ {
58
+ hashkey_t key;
59
+ long index;
60
+ } value;
61
+ } path_elem_t;
62
+
63
+ typedef struct
64
+ {
65
+ path_matcher_elem_t *elems;
66
+ int len;
67
+ int matched_depth;
68
+ } paths_t;
69
+
70
+ typedef struct
71
+ {
72
+ int with_path;
73
+ paths_t *paths;
74
+ int paths_len;
75
+ path_elem_t *current_path;
76
+ int current_path_len;
77
+ int max_path_len;
78
+ // Easier to use a Ruby array for result than convert later
79
+ VALUE points_list;
80
+ // by depth
81
+ size_t *starts;
82
+ // VALUE rb_err;
83
+ yajl_handle handle;
84
+ } scan_ctx;
85
+
86
+ // FIXME: This will cause memory leak if ruby_xmalloc raises
87
+ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
88
+ {
89
+ // TODO: Allow to_ary and sized enumerables
90
+ rb_check_type(path_ary, T_ARRAY);
91
+ int path_ary_len = rb_long2int(rb_array_len(path_ary));
92
+ // Check types early before any allocations, so exception is ok
93
+ // TODO: Fix this, just handle errors
94
+ for (int i = 0; i < path_ary_len; i++)
95
+ {
96
+ VALUE path = rb_ary_entry(path_ary, i);
97
+ rb_check_type(path, T_ARRAY);
98
+ int path_len = rb_long2int(rb_array_len(path));
99
+ for (int j = 0; j < path_len; j++)
100
+ {
101
+ VALUE entry = rb_ary_entry(path, j);
102
+ int type = TYPE(entry);
103
+ if (type == T_STRING)
104
+ {
105
+ #if LONG_MAX > SIZE_MAX
106
+ RSTRING_LENINT(entry);
107
+ #endif
108
+ }
109
+ else if (type == T_FIXNUM || type == T_BIGNUM)
110
+ {
111
+ RB_NUM2LONG(entry);
112
+ }
113
+ else
114
+ {
115
+ VALUE range_beg, range_end;
116
+ int open_ended;
117
+ if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
118
+ rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
119
+ RB_NUM2LONG(range_beg);
120
+ RB_NUM2LONG(range_end);
121
+ }
122
+ }
123
+ }
124
+
125
+ scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
126
+
127
+ ctx->with_path = RB_TEST(with_path);
128
+ ctx->max_path_len = 0;
129
+
130
+ paths_t *paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
131
+ for (int i = 0; i < path_ary_len; i++)
132
+ {
133
+ VALUE path = rb_ary_entry(path_ary, i);
134
+ int path_len = rb_long2int(rb_array_len(path));
135
+ if (path_len > ctx->max_path_len)
136
+ ctx->max_path_len = path_len;
137
+ paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
138
+ for (int j = 0; j < path_len; j++)
139
+ {
140
+ VALUE entry = rb_ary_entry(path, j);
141
+ int type = TYPE(entry);
142
+ if (type == T_STRING)
143
+ {
144
+ paths[i].elems[j].type = MATCHER_KEY;
145
+ paths[i].elems[j].value.key.val = RSTRING_PTR(entry);
146
+ #if LONG_MAX > SIZE_MAX
147
+ paths[i].elems[j].value.key.len = RSTRING_LENINT(entry);
148
+ #else
149
+ paths[i].elems[j].value.key.len = RSTRING_LEN(entry);
150
+ #endif
151
+ }
152
+ else if (type == T_FIXNUM || type == T_BIGNUM)
153
+ {
154
+ paths[i].elems[j].type = MATCHER_INDEX;
155
+ paths[i].elems[j].value.index = FIX2LONG(entry);
156
+ }
157
+ else
158
+ {
159
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
160
+ VALUE range_beg, range_end;
161
+ int open_ended;
162
+ rb_range_values(entry, &range_beg, &range_end, &open_ended);
163
+ paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
164
+ paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
165
+ if (open_ended)
166
+ paths[i].elems[j].value.range.end--;
167
+ }
168
+ }
169
+ paths[i].len = path_len;
170
+ paths[i].matched_depth = 0;
171
+ }
172
+
173
+ ctx->paths = paths;
174
+ ctx->paths_len = path_ary_len;
175
+ ctx->current_path = ruby_xmalloc2(sizeof(path_elem_t), ctx->max_path_len);
176
+
177
+ ctx->current_path_len = 0;
178
+ ctx->points_list = rb_ary_new_capa(path_ary_len);
179
+ for (int i = 0; i < path_ary_len; i++)
180
+ {
181
+ rb_ary_push(ctx->points_list, rb_ary_new());
182
+ }
183
+
184
+ ctx->starts = ruby_xmalloc2(sizeof(size_t), ctx->max_path_len);
185
+ // ctx->rb_err = Qnil;
186
+ ctx->handle = NULL;
187
+
188
+ return ctx;
189
+ }
190
+
191
+ void scan_ctx_free(scan_ctx *ctx)
192
+ {
193
+ if (!ctx)
194
+ return;
195
+ ruby_xfree(ctx->starts);
196
+ ruby_xfree(ctx->current_path);
197
+ for (int i = 0; i < ctx->paths_len; i++)
198
+ {
199
+ ruby_xfree(ctx->paths[i].elems);
200
+ }
201
+ ruby_xfree(ctx->paths);
202
+ ruby_xfree(ctx);
203
+ }
204
+
205
+ // noexcept
206
+ inline void increment_arr_index(scan_ctx *sctx)
207
+ {
208
+ // remember - any value can be root
209
+ // TODO: Maybe make current_path_len 1 shorter and get rid of -1; need to change all compares
210
+ if (sctx->current_path_len && sctx->current_path[sctx->current_path_len - 1].type == PATH_INDEX)
211
+ {
212
+ sctx->current_path[sctx->current_path_len - 1].value.index++;
213
+ }
214
+ }
215
+
216
+ typedef enum
217
+ {
218
+ null_value,
219
+ boolean_value,
220
+ number_value,
221
+ string_value,
222
+ object_value,
223
+ array_value,
224
+ } value_type;
225
+
226
+ // noexcept
227
+ void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
228
+ {
229
+ *point = rb_ary_new_capa(3);
230
+ VALUE values[3];
231
+ // noexcept
232
+ values[1] = RB_ULONG2NUM(curr_pos);
233
+ switch (type)
234
+ {
235
+ // FIXME: size_t can be longer than ulong
236
+ case null_value:
237
+ values[0] = RB_ULONG2NUM(curr_pos - length);
238
+ values[2] = null_sym;
239
+ break;
240
+ case boolean_value:
241
+ values[0] = RB_ULONG2NUM(curr_pos - length);
242
+ values[2] = boolean_sym;
243
+ break;
244
+ case number_value:
245
+ values[0] = RB_ULONG2NUM(curr_pos - length);
246
+ values[2] = number_sym;
247
+ break;
248
+ case string_value:
249
+ values[0] = RB_ULONG2NUM(curr_pos - length);
250
+ values[2] = string_sym;
251
+ break;
252
+ case object_value:
253
+ values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
254
+ values[2] = object_sym;
255
+ break;
256
+ case array_value:
257
+ values[0] = RB_ULONG2NUM(sctx->starts[sctx->current_path_len]);
258
+ values[2] = array_sym;
259
+ break;
260
+ }
261
+ // rb_ary_cat raise only in case of a frozen array or if len is too long
262
+ rb_ary_cat(*point, values, 3);
263
+ }
264
+
265
+ // noexcept
266
+ void save_point(scan_ctx *sctx, value_type type, size_t length)
267
+ {
268
+ // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
269
+ // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
270
+ VALUE point = Qundef;
271
+ for (int i = 0; i < sctx->paths_len; i++)
272
+ {
273
+ if (sctx->paths[i].len != sctx->current_path_len)
274
+ continue;
275
+
276
+ int match = true;
277
+ for (int j = 0; j < sctx->current_path_len; j++)
278
+ {
279
+ switch (sctx->paths[i].elems[j].type)
280
+ {
281
+ case MATCHER_KEY:
282
+ if (sctx->current_path[j].type != PATH_KEY ||
283
+ sctx->current_path[j].value.key.len != sctx->paths[i].elems[j].value.key.len ||
284
+ strncmp(sctx->current_path[j].value.key.val, sctx->paths[i].elems[j].value.key.val, sctx->current_path[j].value.key.len))
285
+ match = false;
286
+ break;
287
+ case MATCHER_INDEX:
288
+ if (sctx->current_path[j].type != PATH_INDEX ||
289
+ sctx->current_path[j].value.index != sctx->paths[i].elems[j].value.index)
290
+ match = false;
291
+ break;
292
+ case MATCHER_INDEX_RANGE:
293
+ if (sctx->current_path[j].type != PATH_INDEX ||
294
+ sctx->current_path[j].value.index < sctx->paths[i].elems[j].value.range.start ||
295
+ sctx->current_path[j].value.index > sctx->paths[i].elems[j].value.range.end)
296
+ match = false;
297
+ break;
298
+ }
299
+ if (!match)
300
+ break;
301
+ }
302
+ if (match)
303
+ {
304
+ if (point == Qundef)
305
+ {
306
+ create_point(&point, sctx, type, length, yajl_get_bytes_consumed(sctx->handle));
307
+ }
308
+ // rb_ary_push raises only in case of a frozen array, which is not the case
309
+ // rb_ary_entry is safe
310
+ rb_ary_push(rb_ary_entry(sctx->points_list, i), point);
311
+ }
312
+ }
313
+ }
314
+
315
+ // noexcept
316
+ int scan_on_null(void *ctx)
317
+ {
318
+ scan_ctx *sctx = (scan_ctx *)ctx;
319
+ if (sctx->current_path_len > sctx->max_path_len)
320
+ return true;
321
+ increment_arr_index(sctx);
322
+ save_point(sctx, null_value, 4);
323
+ return true;
324
+ }
325
+
326
+ // noexcept
327
+ int scan_on_boolean(void *ctx, int bool_val)
328
+ {
329
+ scan_ctx *sctx = (scan_ctx *)ctx;
330
+ if (sctx->current_path_len > sctx->max_path_len)
331
+ return true;
332
+ increment_arr_index(sctx);
333
+ save_point(sctx, boolean_value, bool_val ? 4 : 5);
334
+ return true;
335
+ }
336
+
337
+ // noexcept
338
+ int scan_on_number(void *ctx, const char *val, size_t len)
339
+ {
340
+ scan_ctx *sctx = (scan_ctx *)ctx;
341
+ if (sctx->current_path_len > sctx->max_path_len)
342
+ return true;
343
+ increment_arr_index(sctx);
344
+ save_point(sctx, number_value, len);
345
+ return true;
346
+ }
347
+
348
+ // noexcept
349
+ int scan_on_string(void *ctx, const unsigned char *val, size_t len)
350
+ {
351
+ scan_ctx *sctx = (scan_ctx *)ctx;
352
+ if (sctx->current_path_len > sctx->max_path_len)
353
+ return true;
354
+ increment_arr_index(sctx);
355
+ save_point(sctx, string_value, len + 2);
356
+ return true;
357
+ }
358
+
359
+ // noexcept
360
+ int scan_on_start_object(void *ctx)
361
+ {
362
+ scan_ctx *sctx = (scan_ctx *)ctx;
363
+ if (sctx->current_path_len > sctx->max_path_len)
364
+ {
365
+ sctx->current_path_len++;
366
+ return true;
367
+ }
368
+ increment_arr_index(sctx);
369
+ if (sctx->current_path_len < sctx->max_path_len)
370
+ {
371
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
372
+ sctx->current_path[sctx->current_path_len].type = PATH_KEY;
373
+ }
374
+ sctx->current_path_len++;
375
+ return true;
376
+ }
377
+
378
+ // noexcept
379
+ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
380
+ {
381
+ scan_ctx *sctx = (scan_ctx *)ctx;
382
+ if (sctx->current_path_len > sctx->max_path_len)
383
+ return true;
384
+ // Can't be called without scan_on_start_object being called before
385
+ // So current_path_len at least 1 and key.type is set to PATH_KEY;
386
+ sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *) key;
387
+ sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
388
+ return true;
389
+ }
390
+
391
+ // noexcept
392
+ int scan_on_end_object(void *ctx)
393
+ {
394
+ scan_ctx *sctx = (scan_ctx *)ctx;
395
+ sctx->current_path_len--;
396
+ if (sctx->current_path_len >= sctx->max_path_len)
397
+ return true;
398
+ save_point(sctx, object_value, 0);
399
+ return true;
400
+ }
401
+
402
+ // noexcept
403
+ int scan_on_start_array(void *ctx)
404
+ {
405
+ scan_ctx *sctx = (scan_ctx *)ctx;
406
+ if (sctx->current_path_len > sctx->max_path_len)
407
+ {
408
+ sctx->current_path_len++;
409
+ return true;
410
+ }
411
+ increment_arr_index(sctx);
412
+ if (sctx->current_path_len < sctx->max_path_len)
413
+ {
414
+ sctx->starts[sctx->current_path_len] = yajl_get_bytes_consumed(sctx->handle) - 1;
415
+ sctx->current_path[sctx->current_path_len].type = PATH_INDEX;
416
+ sctx->current_path[sctx->current_path_len].value.index = -1;
417
+ }
418
+ sctx->current_path_len++;
419
+ return true;
420
+ }
421
+
422
+ // noexcept
423
+ int scan_on_end_array(void *ctx)
424
+ {
425
+ scan_ctx *sctx = (scan_ctx *)ctx;
426
+ sctx->current_path_len--;
427
+ if (sctx->current_path_len >= sctx->max_path_len)
428
+ return true;
429
+ save_point(sctx, array_value, 0);
430
+ return true;
431
+ }
432
+
433
+ static yajl_callbacks scan_callbacks = {
434
+ scan_on_null,
435
+ scan_on_boolean,
436
+ NULL,
437
+ NULL,
438
+ scan_on_number,
439
+ scan_on_string,
440
+ scan_on_start_object,
441
+ scan_on_key,
442
+ scan_on_end_object,
443
+ scan_on_start_array,
444
+ scan_on_end_array};
445
+
446
+ // TODO: make with_path optional kw: `with_path: false`
447
+ VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
448
+ {
449
+ rb_check_type(json_str, T_STRING);
450
+ char *json_text = RSTRING_PTR(json_str);
451
+ #if LONG_MAX > SIZE_MAX
452
+ size_t json_text_len = RSTRING_LENINT(json_str);
453
+ #else
454
+ size_t json_text_len = RSTRING_LEN(json_str);
455
+ #endif
456
+ yajl_handle handle;
457
+ // TODO
458
+ int opt_verbose_error = 0;
459
+ yajl_status stat;
460
+ scan_ctx *ctx = scan_ctx_init(path_ary, with_path);
461
+ VALUE err = Qnil;
462
+ VALUE result;
463
+ // Turned out callbacks can't raise exceptions
464
+ // VALUE callback_err;
465
+
466
+ handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
467
+ ctx->handle = handle;
468
+ // TODO: make it configurable
469
+ // yajl_config(handle, yajl_allow_comments, true);
470
+ // yajl_config(handle, yajl_allow_trailing_garbage, true);
471
+ stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
472
+ if (stat == yajl_status_ok)
473
+ stat = yajl_complete_parse(handle);
474
+
475
+ if (stat != yajl_status_ok)
476
+ {
477
+ char *str = (char *) yajl_get_error(handle, opt_verbose_error, (unsigned char *) json_text, json_text_len);
478
+ err = rb_str_new_cstr(str);
479
+ yajl_free_error(handle, (unsigned char *) str);
480
+ }
481
+ // callback_err = ctx->rb_err;
482
+ result = ctx->points_list;
483
+ scan_ctx_free(ctx);
484
+ yajl_free(handle);
485
+ if (err != Qnil)
486
+ rb_exc_raise(rb_exc_new_str(rb_eJsonScannerParseError, err));
487
+ // if (callback_err != Qnil)
488
+ // rb_exc_raise(callback_err);
489
+ // TODO: report yajl_get_bytes_consumed(handle)
490
+ return result;
491
+ }
492
+
493
+ RUBY_FUNC_EXPORTED void
494
+ Init_json_scanner(void)
495
+ {
496
+ rb_mJsonScanner = rb_define_module("JsonScanner");
497
+ rb_define_const(rb_mJsonScanner, "ALL", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
498
+ rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
499
+ rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
500
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
501
+ rb_define_const(rb_mJsonScannerOptions, "DONT_VALIDATE_STRINGS", INT2FIX(yajl_dont_validate_strings));
502
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_TRAILING_GARBAGE", INT2FIX(yajl_allow_trailing_garbage));
503
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_MULTIPLE_VALUES", INT2FIX(yajl_allow_multiple_values));
504
+ rb_define_const(rb_mJsonScannerOptions, "ALLOW_PARTIAL_VALUES", INT2FIX(yajl_allow_partial_values));
505
+ rb_define_module_function(rb_mJsonScanner, "scan", scan, 3);
506
+ null_sym = rb_id2sym(rb_intern("null"));
507
+ boolean_sym = rb_id2sym(rb_intern("boolean"));
508
+ number_sym = rb_id2sym(rb_intern("number"));
509
+ string_sym = rb_id2sym(rb_intern("string"));
510
+ object_sym = rb_id2sym(rb_intern("object"));
511
+ array_sym = rb_id2sym(rb_intern("array"));
512
+ }
@@ -0,0 +1,12 @@
1
+ #ifndef JSON_SCANNER_H
2
+ #define JSON_SCANNER_H 1
3
+
4
+ #include "ruby.h"
5
+ #include "ruby/intern.h"
6
+ #include <yajl/yajl_parse.h>
7
+ #include <yajl/yajl_gen.h>
8
+
9
+ #define true 1
10
+ #define false 0
11
+
12
+ #endif /* JSON_SCANNER_H */
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module JsonScanner
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,9 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "json_scanner/version"
4
+ require_relative "json_scanner/json_scanner"
5
+
6
+ module JsonScanner
7
+ class Error < StandardError; end
8
+ # Your code goes here...
9
+ end
@@ -0,0 +1,4 @@
1
+ module JsonScanner
2
+ VERSION: String
3
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
4
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "spec_helper"
4
+
5
+ RSpec.describe JsonScanner do
6
+ it "has a version number" do
7
+ expect(described_class::VERSION).not_to be nil
8
+ end
9
+
10
+ it "scans json" do
11
+ result = described_class.scan('["1", {"a": 2}]', [[0], [1, "a"], []], false)
12
+ expect(result).to eq([[[1, 4, :string]], [[12, 13, :number]], [[0, 15, :array]]])
13
+ expect(described_class.scan('"2"', [[]], false)).to eq([[[0, 3, :string]]])
14
+ expect(
15
+ described_class.scan("[0,1,2,3,4,5,6,7]", [[(0..2)], [(4...6)]], false)
16
+ ).to eq(
17
+ [[[1, 2, :number], [3, 4, :number], [5, 6, :number]], [[9, 10, :number], [11, 12, :number]]]
18
+ )
19
+ expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
+ [[[6, 7, :number]], [[0, 8, :object]]]
21
+ )
22
+ expect do
23
+ begin
24
+ GC.stress = true
25
+ # TODO: investigate
26
+ # got "munmap_chunk(): invalid pointer" in in console once after
27
+ # JsonScanner.scan '[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]', [[0,0,0,0,0,0,0]], true + Ctrl+D
28
+ # (last arg wasn't handled at the time)
29
+ # but I don't think it's a problem of tht extension or libyajl, it happened at exit and I free everything before
30
+ # `JsonScanner.scan` returns
31
+ described_class.scan "[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[]]]]]]]]]]]]]]]]]]]]]]", [[0, 0, 0, 0, 0, 0, 0]], false
32
+ ensure
33
+ GC.stress = false
34
+ end
35
+ end.to raise_error described_class::ParseError
36
+ end
37
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json_scanner"
4
+
5
+ RSpec.configure do |config|
6
+ # Enable flags like --only-failures and --next-failure
7
+ config.example_status_persistence_file_path = ".rspec_status"
8
+
9
+ # Disable RSpec exposing methods globally on `Module` and `main`
10
+ config.disable_monkey_patching!
11
+
12
+ config.expect_with :rspec do |c|
13
+ c.syntax = :expect
14
+ end
15
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json_scanner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - uvlad7
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2024-12-15 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: This gem uses yajl lib to scan a json string and allows you to parse
14
+ pieces of it
15
+ email:
16
+ - uvlad7@gmail.com
17
+ executables: []
18
+ extensions:
19
+ - ext/json_scanner/extconf.rb
20
+ extra_rdoc_files: []
21
+ files:
22
+ - README.md
23
+ - ext/json_scanner/extconf.rb
24
+ - ext/json_scanner/json_scanner.c
25
+ - ext/json_scanner/json_scanner.h
26
+ - lib/json_scanner.rb
27
+ - lib/json_scanner/version.rb
28
+ - sig/json_scanner.rbs
29
+ - spec/json_scanner_spec.rb
30
+ - spec/spec_helper.rb
31
+ homepage: https://github.com/uvlad7/json_scanner
32
+ licenses:
33
+ - MIT
34
+ metadata:
35
+ homepage_uri: https://github.com/uvlad7/json_scanner
36
+ source_code_uri: https://github.com/uvlad7/json_scanner
37
+ changelog_uri: https://github.com/uvlad7/json_scanner/CHANGELOG.md
38
+ rubygems_mfa_required: 'true'
39
+ post_install_message:
40
+ rdoc_options: []
41
+ require_paths:
42
+ - lib
43
+ required_ruby_version: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: 2.3.8
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: '0'
53
+ requirements:
54
+ - libyajl2, v2.1
55
+ - libyajl-dev, v2.1
56
+ rubygems_version: 3.5.7
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Extract values from JSON without full parsing
60
+ test_files: []