json_scanner 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d0af7c4c2fce9ca74ec96c00e8972088b34c99a005b8fc966d1a0e9ae7d75dcb
4
- data.tar.gz: 70f2365add4838ef7409d3ff9568ab59d1a3771ac38d6168757bf8de1de71b1d
3
+ metadata.gz: 1ffddd81459c088040c2ddee0006b1c9172e8a15aa8bab987dc59743e697f49f
4
+ data.tar.gz: 73b69bbcddaaf6711b2563787c4cbde1d258bf1d28b1e694714fd782c42d4c2c
5
5
  SHA512:
6
- metadata.gz: 96958c94108fafca33f68f091dcea150e549e1fa61c02aaf62790f0d2f77c8762abe7b702a4cf95b9d4e28929a3dd1ce77681bb7cc0f6e7e8fdd22d32f74f378
7
- data.tar.gz: 3da8a8713b1f1994d50ee3032d450b5d34070c843e7c2625db1f2945c5a6c1cdc223d9b957c0386562ea1fdf68d2d03c6fc985ca2f0ddda8a7f3b83ff2c19b36
6
+ metadata.gz: 44a153e578da606f67399a09387cf26d63b1528ad9e6bb258083435202f094ebc95e3a2c895a83334eb2aeef4ea9bfbe0f0c38374ac76f5772beea6bc3910a0f
7
+ data.tar.gz: ff9bcbc934fafc4857faf926bca5f5c6c82ec357107b8d6e5f2c0a7e9e622afe3a19d4b0a3f4a987d2e965432c73615203afd1c3e1ac6a52965555b20ef2f377
data/README.md CHANGED
@@ -32,7 +32,12 @@ emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
32
32
  begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
33
33
  emoji_json.byteslice(begin_pos...end_pos)
34
34
  # => "\"😍\""
35
- emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
35
+ # Note: most likely don't need `quirks_mode` option, unless you are using some old ruby
36
+ # with stdlib version of json gem or its old version. In new versions `quirks_mode` is default
37
+ JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
38
+ # => "😍"
39
+ # You can also do this
40
+ # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
36
41
  # => "\"😍\""
37
42
  ```
38
43
 
@@ -16,9 +16,9 @@ enum matcher_type
16
16
  MATCHER_KEY,
17
17
  MATCHER_INDEX,
18
18
  // MATCHER_ANY_KEY,
19
- // MATCHER_ANY_INDEX,
20
19
  MATCHER_INDEX_RANGE,
21
20
  // MATCHER_KEYS_LIST,
21
+ // MATCHER_KEY_REGEX,
22
22
  };
23
23
 
24
24
  enum path_type
@@ -86,16 +86,20 @@ typedef struct
86
86
  // FIXME: This will cause memory leak if ruby_xmalloc raises
87
87
  scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
88
88
  {
89
+ int path_ary_len;
90
+ scan_ctx *ctx;
91
+ paths_t *paths;
89
92
  // TODO: Allow to_ary and sized enumerables
90
93
  rb_check_type(path_ary, T_ARRAY);
91
- int path_ary_len = rb_long2int(rb_array_len(path_ary));
94
+ path_ary_len = rb_long2int(rb_array_len(path_ary));
92
95
  // Check types early before any allocations, so exception is ok
93
96
  // TODO: Fix this, just handle errors
94
97
  for (int i = 0; i < path_ary_len; i++)
95
98
  {
99
+ int path_len;
96
100
  VALUE path = rb_ary_entry(path_ary, i);
97
101
  rb_check_type(path, T_ARRAY);
98
- int path_len = rb_long2int(rb_array_len(path));
102
+ path_len = rb_long2int(rb_array_len(path));
99
103
  for (int j = 0; j < path_len; j++)
100
104
  {
101
105
  VALUE entry = rb_ary_entry(path, j);
@@ -113,25 +117,32 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
113
117
  else
114
118
  {
115
119
  VALUE range_beg, range_end;
120
+ long end_val;
116
121
  int open_ended;
117
122
  if (rb_range_values(entry, &range_beg, &range_end, &open_ended) != Qtrue)
118
123
  rb_raise(rb_eArgError, "path elements must be strings, integers, or ranges");
119
- RB_NUM2LONG(range_beg);
120
- RB_NUM2LONG(range_end);
124
+ if (RB_NUM2LONG(range_beg) < 0L)
125
+ rb_raise(rb_eArgError, "range start must be positive");
126
+ end_val = RB_NUM2LONG(range_end);
127
+ if (end_val < -1L)
128
+ rb_raise(rb_eArgError, "range end must be positive or -1");
129
+ if (end_val == -1L && open_ended)
130
+ rb_raise(rb_eArgError, "range with -1 end must be closed");
121
131
  }
122
132
  }
123
133
  }
124
134
 
125
- scan_ctx *ctx = ruby_xmalloc(sizeof(scan_ctx));
135
+ ctx = ruby_xmalloc(sizeof(scan_ctx));
126
136
 
127
- ctx->with_path = RB_TEST(with_path);
137
+ ctx->with_path = RTEST(with_path);
128
138
  ctx->max_path_len = 0;
129
139
 
130
- paths_t *paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
140
+ paths = ruby_xmalloc(sizeof(paths_t) * path_ary_len);
131
141
  for (int i = 0; i < path_ary_len; i++)
132
142
  {
143
+ int path_len;
133
144
  VALUE path = rb_ary_entry(path_ary, i);
134
- int path_len = rb_long2int(rb_array_len(path));
145
+ path_len = rb_long2int(rb_array_len(path));
135
146
  if (path_len > ctx->max_path_len)
136
147
  ctx->max_path_len = path_len;
137
148
  paths[i].elems = ruby_xmalloc2(sizeof(path_matcher_elem_t), path_len);
@@ -156,12 +167,16 @@ scan_ctx *scan_ctx_init(VALUE path_ary, VALUE with_path)
156
167
  }
157
168
  else
158
169
  {
159
- paths[i].elems[j].type = MATCHER_INDEX_RANGE;
160
170
  VALUE range_beg, range_end;
161
171
  int open_ended;
172
+ paths[i].elems[j].type = MATCHER_INDEX_RANGE;
162
173
  rb_range_values(entry, &range_beg, &range_end, &open_ended);
163
174
  paths[i].elems[j].value.range.start = RB_NUM2LONG(range_beg);
164
175
  paths[i].elems[j].value.range.end = RB_NUM2LONG(range_end);
176
+ // (value..-1) works as expected, (value...-1) is forbidden above
177
+ if (paths[i].elems[j].value.range.end == -1L)
178
+ paths[i].elems[j].value.range.end = LONG_MAX;
179
+ // -1 here is fine, so, (0...0) works just as expected - doesn't match anything
165
180
  if (open_ended)
166
181
  paths[i].elems[j].value.range.end--;
167
182
  }
@@ -226,8 +241,8 @@ typedef enum
226
241
  // noexcept
227
242
  void create_point(VALUE *point, scan_ctx *sctx, value_type type, size_t length, size_t curr_pos)
228
243
  {
229
- *point = rb_ary_new_capa(3);
230
244
  VALUE values[3];
245
+ *point = rb_ary_new_capa(3);
231
246
  // noexcept
232
247
  values[1] = RB_ULONG2NUM(curr_pos);
233
248
  switch (type)
@@ -268,12 +283,13 @@ void save_point(scan_ctx *sctx, value_type type, size_t length)
268
283
  // TODO: Abort parsing if all paths are matched and no more mathces are possible: only trivial key/index matchers at the current level
269
284
  // TODO: Don't re-compare already matched prefixes; hard to invalidate, though
270
285
  VALUE point = Qundef;
286
+ int match;
271
287
  for (int i = 0; i < sctx->paths_len; i++)
272
288
  {
273
289
  if (sctx->paths[i].len != sctx->current_path_len)
274
290
  continue;
275
291
 
276
- int match = true;
292
+ match = true;
277
293
  for (int j = 0; j < sctx->current_path_len; j++)
278
294
  {
279
295
  switch (sctx->paths[i].elems[j].type)
@@ -383,7 +399,7 @@ int scan_on_key(void *ctx, const unsigned char *key, size_t len)
383
399
  return true;
384
400
  // Can't be called without scan_on_start_object being called before
385
401
  // So current_path_len at least 1 and key.type is set to PATH_KEY;
386
- sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *) key;
402
+ sctx->current_path[sctx->current_path_len - 1].value.key.val = (char *)key;
387
403
  sctx->current_path[sctx->current_path_len - 1].value.key.len = len;
388
404
  return true;
389
405
  }
@@ -446,37 +462,39 @@ static yajl_callbacks scan_callbacks = {
446
462
  // TODO: make with_path optional kw: `with_path: false`
447
463
  VALUE scan(VALUE self, VALUE json_str, VALUE path_ary, VALUE with_path)
448
464
  {
449
- rb_check_type(json_str, T_STRING);
450
- char *json_text = RSTRING_PTR(json_str);
451
- #if LONG_MAX > SIZE_MAX
452
- size_t json_text_len = RSTRING_LENINT(json_str);
453
- #else
454
- size_t json_text_len = RSTRING_LEN(json_str);
455
- #endif
465
+ char *json_text;
466
+ size_t json_text_len;
456
467
  yajl_handle handle;
457
- // TODO
458
- int opt_verbose_error = 0;
459
468
  yajl_status stat;
460
- scan_ctx *ctx = scan_ctx_init(path_ary, with_path);
461
- VALUE err = Qnil;
462
- VALUE result;
469
+ scan_ctx *ctx;
470
+ VALUE err = Qnil, result;
463
471
  // Turned out callbacks can't raise exceptions
464
472
  // VALUE callback_err;
473
+ // TODO
474
+ int opt_verbose_error = 0;
475
+ rb_check_type(json_str, T_STRING);
476
+ json_text = RSTRING_PTR(json_str);
477
+ #if LONG_MAX > SIZE_MAX
478
+ json_text_len = RSTRING_LENINT(json_str);
479
+ #else
480
+ json_text_len = RSTRING_LEN(json_str);
481
+ #endif
482
+ ctx = scan_ctx_init(path_ary, with_path);
465
483
 
466
484
  handle = yajl_alloc(&scan_callbacks, NULL, (void *)ctx);
467
485
  ctx->handle = handle;
468
486
  // TODO: make it configurable
469
487
  // yajl_config(handle, yajl_allow_comments, true);
470
488
  // yajl_config(handle, yajl_allow_trailing_garbage, true);
471
- stat = yajl_parse(handle, (unsigned char *) json_text, json_text_len);
489
+ stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
472
490
  if (stat == yajl_status_ok)
473
491
  stat = yajl_complete_parse(handle);
474
492
 
475
493
  if (stat != yajl_status_ok)
476
494
  {
477
- char *str = (char *) yajl_get_error(handle, opt_verbose_error, (unsigned char *) json_text, json_text_len);
495
+ char *str = (char *)yajl_get_error(handle, opt_verbose_error, (unsigned char *)json_text, json_text_len);
478
496
  err = rb_str_new_cstr(str);
479
- yajl_free_error(handle, (unsigned char *) str);
497
+ yajl_free_error(handle, (unsigned char *)str);
480
498
  }
481
499
  // callback_err = ctx->rb_err;
482
500
  result = ctx->points_list;
@@ -494,7 +512,7 @@ RUBY_FUNC_EXPORTED void
494
512
  Init_json_scanner(void)
495
513
  {
496
514
  rb_mJsonScanner = rb_define_module("JsonScanner");
497
- rb_define_const(rb_mJsonScanner, "ALL", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
515
+ rb_define_const(rb_mJsonScanner, "ANY_INDEX", rb_range_new(INT2FIX(0), INT2FIX(-1), false));
498
516
  rb_mJsonScannerOptions = rb_define_module_under(rb_mJsonScanner, "Options");
499
517
  rb_eJsonScannerParseError = rb_define_class_under(rb_mJsonScanner, "ParseError", rb_eRuntimeError);
500
518
  rb_define_const(rb_mJsonScannerOptions, "ALLOW_COMMENTS", INT2FIX(yajl_allow_comments));
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.1.0"
4
+ VERSION = "0.1.1"
5
5
  end
@@ -4,7 +4,7 @@ require_relative "spec_helper"
4
4
 
5
5
  RSpec.describe JsonScanner do
6
6
  it "has a version number" do
7
- expect(described_class::VERSION).not_to be nil
7
+ expect(described_class::VERSION).not_to be_nil
8
8
  end
9
9
 
10
10
  it "scans json" do
@@ -19,6 +19,9 @@ RSpec.describe JsonScanner do
19
19
  expect(described_class.scan('{"a": 1}', [["a"], []], false)).to eq(
20
20
  [[[6, 7, :number]], [[0, 8, :object]]]
21
21
  )
22
+ end
23
+
24
+ it "raises on invalid json" do
22
25
  expect do
23
26
  begin
24
27
  GC.stress = true
@@ -34,4 +37,29 @@ RSpec.describe JsonScanner do
34
37
  end
35
38
  end.to raise_error described_class::ParseError
36
39
  end
40
+
41
+ it "allows to select ranges" do
42
+ expect(
43
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, described_class::ANY_INDEX]], false)
44
+ ).to eq(
45
+ [[[2, 3, :number], [4, 5, :number], [8, 9, :number], [10, 11, :number]]]
46
+ )
47
+ expect(
48
+ described_class.scan("[[1,2],[3,4]]", [[described_class::ANY_INDEX, (0...1)]], false)
49
+ ).to eq(
50
+ [[[2, 3, :number], [8, 9, :number]]]
51
+ )
52
+ end
53
+
54
+ it "allows only positive or -1 values" do
55
+ expect do
56
+ described_class.scan("[[1,2],[3,4]]", [[(0...-1)]], false)
57
+ end.to raise_error ArgumentError
58
+ expect do
59
+ described_class.scan("[[1,2],[3,4]]", [[(0..-2)]], false)
60
+ end.to raise_error ArgumentError
61
+ expect do
62
+ described_class.scan("[[1,2],[3,4]]", [[(-42..1)]], false)
63
+ end.to raise_error ArgumentError
64
+ end
37
65
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-15 00:00:00.000000000 Z
11
+ date: 2024-12-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: This gem uses yajl lib to scan a json string and allows you to parse
14
14
  pieces of it
@@ -53,7 +53,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
53
  requirements:
54
54
  - libyajl2, v2.1
55
55
  - libyajl-dev, v2.1
56
- rubygems_version: 3.5.7
56
+ rubygems_version: 3.4.20
57
57
  signing_key:
58
58
  specification_version: 4
59
59
  summary: Extract values from JSON without full parsing