json_scanner 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0af7c4c2fce9ca74ec96c00e8972088b34c99a005b8fc966d1a0e9ae7d75dcb
4
- data.tar.gz: 70f2365add4838ef7409d3ff9568ab59d1a3771ac38d6168757bf8de1de71b1d
3
+ metadata.gz: 1ffddd81459c088040c2ddee0006b1c9172e8a15aa8bab987dc59743e697f49f
4
+ data.tar.gz: 73b69bbcddaaf6711b2563787c4cbde1d258bf1d28b1e694714fd782c42d4c2c
5
5
  SHA512:
6
- metadata.gz: 96958c94108fafca33f68f091dcea150e549e1fa61c02aaf62790f0d2f77c8762abe7b702a4cf95b9d4e28929a3dd1ce77681bb7cc0f6e7e8fdd22d32f74f378
7
- data.tar.gz: 3da8a8713b1f1994d50ee3032d450b5d34070c843e7c2625db1f2945c5a6c1cdc223d9b957c0386562ea1fdf68d2d03c6fc985ca2f0ddda8a7f3b83ff2c19b36
6
+ metadata.gz: 44a153e578da606f67399a09387cf26d63b1528ad9e6bb258083435202f094ebc95e3a2c895a83334eb2aeef4ea9bfbe0f0c38374ac76f5772beea6bc3910a0f
7
+ data.tar.gz: ff9bcbc934fafc4857faf926bca5f5c6c82ec357107b8d6e5f2c0a7e9e622afe3a19d4b0a3f4a987d2e965432c73615203afd1c3e1ac6a52965555b20ef2f377
data/README.md CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
32
  begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
33
  emoji_json.byteslice(begin_pos...end_pos)
34
34
  # => "\"😍\""
35
- emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
35
+ # Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
36
+ # with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
37
+ JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
38
+ # => "😍"
39
+ # You can also do this
40
+ # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
36
41
  # => "\"😍\""
37
42
  ```
38
43
 
@@ -16,9 +16,9 @@ enum matcher_type
16
16
  MATCHER_KEY,
17
17
  MATCHER_INDEX,
18
18
  // MATCHER_ANY_KEY,
19
- // MATCHER_ANY_INDEX,
20
19
  MATCHER_INDEX_RANGE,
21
20
  // MATCHER_KEYS_LIST,
21
+ // MATCHER_KEY_REGEX,
22
22
  };
23
23
 
24
24
  enum path_type
@@ -86,16 +86,20 @@ typedef struct
86
86
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
87
  scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
88
88
  {
89
+ int path_ary_len;
90
+ scan_ctx *ctx;
91
+ paths_t *paths;
89
92
  // TODO: Allow to_ary and sized enumerables
90
93
  rb_check_type(path_ary, T_ARRAY);
91
- int path_ary_len = rb_long2int(rb_array_len(path_ary));
94
+ path_ary_len = rb_long2int(rb_array_len(path_ary));
92
95
  // Check types early before any allocations, so exception is ok
93
96
  // TODO: Fix this, just handle errors
94
97
  for (int i = 0; i < path_ary_len; i++)
95
98
  {
99
+ int path_len;
96
100
  VALUE path = rb_ary_entry(path_ary, i);
97
101
  rb_check_type(path, T_ARRAY);
98
- int path_len = rb_long2int(rb_array_len(path));
102
+ path_len = rb_long2int(rb_array_len(path));
99
103
  for (int j = 0; j < path_len; j++)
100
104
  {
101
105
  VALUE entry = rb_ary_entry(path, j);
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
113
117
  else
114
118
  {
115
119
  VALUE range_beg, range_end;
120
+ long end_val;
116
121
  int open_ended;
117
122
  if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
118
123
  rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
119
- RB_NUM2LONG(range_beg);
120
- RB_NUM2LONG(range_end);
124
+ if (RB_NUM2LONG(range_beg) < 0L)
125
+ rb_raise(rb_eArgError, "range start must be positive");
126
+ end_val = RB_NUM2LONG(range_end);
127
+ if (end_val < -1L)
128
+ rb_raise(rb_eArgError, "range end must be positive or -1");
129
+ if (end_val == -1L && open_ended)
130
+ rb_raise(rb_eArgError, "range with -1 end must be closed");
121
131
  }
122
132
  }
123
133
  }
124
134
 
125
- scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
135
+ ctx = ruby_xmalloc(sizeof(scan_ctx));
126
136
 
127
- ctx->with_path = RB_TEST(with_path);
137
+ ctx->with_path = RTEST(with_path);
128
138
  ctx->max_path_len = 0;
129
139
 
130
- paths_t *paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
140
+ paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
131
141
  for (int i = 0; i < path_ary_len; i++)
132
142
  {
143
+ int path_len;
133
144
  VALUE path = rb_ary_entry(path_ary, i);
134
- int path_len = rb_long2int(rb_array_len(path));
145
+ path_len = rb_long2int(rb_array_len(path));
135
146
  if (path_len > ctx->max_path_len)
136
147
  ctx->max_path_len = path_len;
137
148
  paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
156
167
  }
157
168
  else
158
169
  {
159
- paths[i].elems[j].type = MATCHER_INDEX_RANGE;
160
170
  VALUE range_beg, range_end;
161
171
  int open_ended;
172
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
162
173
  rb_range_values(entry, &range_beg, &range_end, &open_ended);
163
174
  paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
164
175
  paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
176
+ // (value..-1) works as expected, (value...-1) is forbidden above
177
+ if (paths[i].elems[j].value.range.end == -1L)
178
+ paths[i].elems[j].value.range.end = LONG_MAX;
179
+ // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
165
180
  if (open_ended)
166
181
  paths[i].elems[j].value.range.end--;
167
182
  }
@@ -226,8 +241,8 @@ typedef enum
226
241
  // noexcept
227
242
  void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
228
243
  {
229
- *point = rb_ary_new_capa(3);
230
244
  VALUE values[3];
245
+ *point = rb_ary_new_capa(3);
231
246
  // noexcept
232
247
  values[1] = RB_ULONG2NUM(curr_pos);
233
248
  switch (type)
@@ -268,12 +283,13 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
268
283
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
269
284
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
270
285
  VALUE point = Qundef;
286
+ int match;
271
287
  for (int i = 0; i < sctx->paths_len; i++)
272
288
  {
273
289
  if (sctx->paths[i].len != sctx->current_path_len)
274
290
  continue;
275
291
 
276
- int match = true;
292
+ match = true;
277
293
  for (int j = 0; j < sctx->current_path_len; j++)
278
294
  {
279
295
  switch (sctx->paths[i].elems[j].type)
@@ -383,7 +399,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
383
399
  return true;
384
400
  // Can't be called without scan_on_start_object being called before
385
401
  // So current_path_len at least 1 and key.type is set to PATH_KEY;
386
- sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *) key;
402
+ sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
387
403
  sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
388
404
  return true;
389
405
  }
@@ -446,37 +462,39 @@ static yajl_callbacks scan_callbacks = {
446
462
  // TODO: make with_path optional kw: `with_path: false`
447
463
  VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
448
464
  {
449
- rb_check_type(json_str, T_STRING);
450
- char *json_text = RSTRING_PTR(json_str);
451
- #if LONG_MAX > SIZE_MAX
452
- size_t json_text_len = RSTRING_LENINT(json_str);
453
- #else
454
- size_t json_text_len = RSTRING_LEN(json_str);
455
- #endif
465
+ char *json_text;
466
+ size_t json_text_len;
456
467
  yajl_handle handle;
457
- // TODO
458
- int opt_verbose_error = 0;
459
468
  yajl_status stat;
460
- scan_ctx *ctx = scan_ctx_init(path_ary, with_path);
461
- VALUE err = Qnil;
462
- VALUE result;
469
+ scan_ctx *ctx;
470
+ VALUE err = Qnil, result;
463
471
  // Turned out callbacks can't raise exceptions
464
472
  // VALUE callback_err;
473
+ // TODO
474
+ int opt_verbose_error = 0;
475
+ rb_check_type(json_str, T_STRING);
476
+ json_text = RSTRING_PTR(json_str);
477
+ #if LONG_MAX > SIZE_MAX
478
+ json_text_len = RSTRING_LENINT(json_str);
479
+ #else
480
+ json_text_len = RSTRING_LEN(json_str);
481
+ #endif
482
+ ctx = scan_ctx_init(path_ary, with_path);
465
483
 
466
484
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
467
485
  ctx->handle = handle;
468
486
  // TODO: make it configurable
469
487
  // yajl_config(handle, yajl_allow_comments, true);
470
488
  // yajl_config(handle, yajl_allow_trailing_garbage, true);
471
- stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
489
+ stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
472
490
  if (stat == yajl_status_ok)
473
491
  stat = yajl_complete_parse(handle);
474
492
 
475
493
  if (stat != yajl_status_ok)
476
494
  {
477
- char *str = (char *) yajl_get_error(handle, opt_verbose_error, (unsigned char *) json_text, json_text_len);
495
+ char *str = (char *)yajl_get_error(handle, opt_verbose_error, (unsigned char *)json_text, json_text_len);
478
496
  err = rb_str_new_cstr(str);
479
- yajl_free_error(handle, (unsigned char *) str);
497
+ yajl_free_error(handle, (unsigned char *)str);
480
498
  }
481
499
  // callback_err = ctx->rb_err;
482
500
  result = ctx->points_list;
@@ -494,7 +512,7 @@ RUBY_FUNC_EXPORTED void
494
512
  Init_json_scanner(void)
495
513
  {
496
514
  rb_mJsonScanner = rb_define_module("JsonScanner");
497
- rb_define_const(rb_mJsonScanner, "ALL", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
515
+ rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
498
516
  rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
499
517
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
500
518
  rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
@@ -4,7 +4,7 @@ require_relative "spec_helper"
4
4
 
5
5
  RSpec.describe JsonScanner do
6
6
  it "has a version number" do
7
- expect(described_class::VERSION).not_to be nil
7
+ expect(described_class::VERSION).not_to be_nil
8
8
  end
9
9
 
10
10
  it "scans json" do
@@ -19,6 +19,9 @@ RSpec.describe JsonScanner do
19
19
  expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
20
  [[[6, 7, :number]], [[0, 8, :object]]]
21
21
  )
22
+ end
23
+
24
+ it "raises on invalid json" do
22
25
  expect do
23
26
  begin
24
27
  GC.stress = true
@@ -34,4 +37,29 @@ RSpec.describe JsonScanner do
34
37
  end
35
38
  end.to raise_error described_class::ParseError
36
39
  end
40
+
41
+ it "allows to select ranges" do
42
+ expect(
43
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]], false)
44
+ ).to eq(
45
+ [[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]]
46
+ )
47
+ expect(
48
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]], false)
49
+ ).to eq(
50
+ [[[2, 3, :number], [8, 9, :number]]]
51
+ )
52
+ end
53
+
54
+ it "allows only positive or -1 values" do
55
+ expect do
56
+ described_class.scan("[[1,2],[3,4]]", [[(0...-1)]], false)
57
+ end.to raise_error ArgumentError
58
+ expect do
59
+ described_class.scan("[[1,2],[3,4]]", [[(0..-2)]], false)
60
+ end.to raise_error ArgumentError
61
+ expect do
62
+ described_class.scan("[[1,2],[3,4]]", [[(-42..1)]], false)
63
+ end.to raise_error ArgumentError
64
+ end
37
65
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-15 00:00:00.000000000 Z
11
+ date: 2024-12-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem uses yajl lib to scan a json string and allows you to parse
14
14
  pieces of it
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - libyajl2, v2.1
55
55
  - libyajl-dev, v2.1
56
- rubygems_version: 3.5.7
56
+ rubygems_version: 3.4.20
57
57
  signing_key:
58
58
  specification_version: 4
59
59
  summary: Extract values from JSON without full parsing