json_scanner 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9c1ddff519827bc802cdcacb5b048402706544b0882c8ac91cd1aa414c4b57e0
4
- data.tar.gz: d1c4f41dbd71ed08a488c2f9647194fd2692c91d522fee35d28d7060bf80321c
3
+ metadata.gz: ca9d160b389a5b605a37baeebd8e9d704a9b42712b9c8de9d0667ffa7e6b3d35
4
+ data.tar.gz: f2c0362e6bb4484e47fd5dbdda4afd7332ce42d266b4d0ebe681f24e18c31b0a
5
5
  SHA512:
6
- metadata.gz: 57bf59cc9495f46675bb98d2fc7545bdc3b8392631c443ad2b89595b22be054c8f8bb268a798c5f104d1e38b73d577662f96637fd9311260c9b0a45b55044265
7
- data.tar.gz: '055432559a23dbf34e679aac7be4967ea163684fda718b433e978c34feb73f298f1346b00629fada8279a9e49e1267b990f73d2d57c8403330faf42ce4086bb8'
6
+ metadata.gz: 91d190291d0e2c16db38c523a58fbbff23cde136481b2c4a19446d25169a77390eee4792979cd3a22724bdf2c89132b5dc3ccc78c1553c17fe21227a70e1bf3c
7
+ data.tar.gz: dc704ac0ba609f209883d5af50f3f9cabcde04a0eb2c3f5ac04f964010bf866fd727c5c175dd40917526b693f13bc4b532f7a4fabce9859c0b44238cf3fa421b
data/README.md CHANGED
@@ -54,13 +54,30 @@ JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
54
54
  # Special matcher JsonScanner::ANY_KEY is supported for object keys
55
55
  JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
56
56
  # => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
57
+ # Regex mathers aren't supported yet, but you can simulate it using `with_path` option
58
+ JsonScanner.scan(
59
+ '{"question1": 1, "answer": 42, "question2": 2}',
60
+ [[JsonScanner::ANY_KEY]], with_path: true,
61
+ ).map do |res|
62
+ res.map do |path, (begin_pos, end_pos, type)|
63
+ [begin_pos, end_pos, type] if path[0] =~ /\Aquestion/
64
+ end.compact
65
+ end
66
+ # => [[[14, 15, :number], [44, 45, :number]]]
57
67
  ```
58
68
 
59
- It supports multiple options
69
+ ## Options
70
+
71
+ `JsonScanner` supports multiple options
60
72
 
61
73
  ```ruby
62
74
  JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
63
75
  # => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
76
+ JsonScanner.scan('[0, 42],', [[(1..-1)]], verbose_error: true)
77
+ # JsonScanner::ParseError (parse error: trailing garbage)
78
+ # [0, 42],
79
+ # (right here) ------^
80
+ # Note: the 'right here' pointer is wrong in case of a premature EOF error, it's a bug of the libyajl
64
81
  JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
65
82
  # JsonScanner::ParseError (parse error: premature EOF)
66
83
  # [0, 42,
@@ -81,6 +98,8 @@ JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symboliz
81
98
  # => [[[[:a], [6, 7, :number]]]]
82
99
  ```
83
100
 
101
+ ### Comments in the JSON
102
+
84
103
  Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
85
104
  ```ruby
86
105
  json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
@@ -90,7 +109,21 @@ end
90
109
  # => [{"value"=>42}]
91
110
  ```
92
111
 
93
- You can also create a config and reuse it
112
+ ### Find the end of a JSON string
113
+
114
+ `allow_trailing_garbage` option may come in handy if you want to extract a JSON string from a JS text
115
+ ```ruby
116
+ script_text = <<~'JS'
117
+ <script>window.__APOLLO_STATE__={"ContentItem:0":{"__typename":"ContentItem","id":0, "configurationType":"NO_CONFIGURATION","replacementPartsUrl":null,"relatedCategories":[{"__ref":"Category:109450"},{"__ref":"Category:82044355"},{"__ref":"Category:109441"},{"__ref":"Category:109442"},{"__ref":"Category:109449"},{"__ref":"Category:109444"},{"__ref":"Category:82043730"}],"recommendedOptions":[]}};window.__APPVERSION__=7018;window.__CONFIG_ENV__={value: 'PRODUCTION'};</script>
118
+ JS
119
+ json_with_trailing_garbage = script_text[/__APOLLO_STATE__\s*=\s*({.+)/, 1]
120
+ json_end_pos = JsonScanner.scan(json_with_trailing_garbage, [[]], allow_trailing_garbage: true).first.first[1]
121
+ apollo_state = JSON.parse(json_with_trailing_garbage[0...json_end_pos])
122
+ ```
123
+
124
+ ## Reuse configuration
125
+
126
+ You can create a `JsonScanner::Config` instance and reuse it between `JsonScanner.scan` calls
94
127
 
95
128
  ```ruby
96
129
  require "json_scanner"
@@ -107,6 +140,17 @@ JsonScanner.scan('[0, 42]', config, with_path: true)
107
140
  # => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
108
141
  ```
109
142
 
143
+ Options can be passed as a hash, even on Ruby 3
144
+ ```ruby
145
+ options = { allow_trailing_garbage: true, allow_partial_values: true }
146
+ JsonScanner.scan('[0, 42', [[1]], options) == JsonScanner.scan('[0, 42]_', [[1]], options)
147
+ # => true
148
+ ```
149
+
150
+ ## Streaming mode
151
+
152
+ Streaming mode isn't supported yet, as it's harder to implement and to use. I plan to add it in the future, its API is a subject to discussion. If you have suggestions, use cases, or preferences for how it should behave, I’d love to hear from you!
153
+
110
154
  ## Development
111
155
 
112
156
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -97,7 +97,7 @@ inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
97
97
  return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
98
98
  }
99
99
 
100
- inline void scan_ctx_update_bytes_consumed(scan_ctx *ctx)
100
+ inline void scan_ctx_save_bytes_consumed(scan_ctx *ctx)
101
101
  {
102
102
  ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
103
103
  }
@@ -254,7 +254,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
254
254
  if (string_keys != Qundef)
255
255
  {
256
256
  // If string_keys is provided, we need to duplicate the string
257
- // to avoid use-after-free issues and to add the newly created string to the string_keys array
257
+ // to avoid use-after-free issues and to add the newly created string to the string_keys array.
258
+ // In Ruby 2.2 and newer symbols can be GC-ed, so we need to duplicate them as well.
258
259
  entry = rb_str_dup(entry);
259
260
  rb_ary_push(string_keys, entry);
260
261
  }
@@ -733,7 +734,7 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
733
734
  yajl_status stat;
734
735
  scan_ctx *ctx;
735
736
  int free_ctx = true;
736
- VALUE err_msg = Qnil, bytes_consumed, err, result;
737
+ VALUE err_msg = Qnil, bytes_consumed, result;
737
738
  // Turned out callbacks can't raise exceptions
738
739
  // VALUE callback_err;
739
740
  #if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
@@ -801,19 +802,17 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
801
802
  }
802
803
  ctx->handle = handle;
803
804
  stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
804
- scan_ctx_update_bytes_consumed(ctx);
805
805
  if (stat == yajl_status_ok)
806
806
  {
807
+ scan_ctx_save_bytes_consumed(ctx);
807
808
  stat = yajl_complete_parse(handle);
808
- scan_ctx_update_bytes_consumed(ctx);
809
809
  }
810
810
 
811
811
  if (stat != yajl_status_ok)
812
812
  {
813
813
  char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
814
814
  err_msg = rb_utf8_str_new_cstr(str);
815
- // TODO: maybe use scan_ctx_get_bytes_consumed here too? But it makes difference in premature EOF
816
- bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
815
+ bytes_consumed = ULL2NUM(scan_ctx_get_bytes_consumed(ctx));
817
816
  yajl_free_error(handle, (unsigned char *)str);
818
817
  }
819
818
  // // Needed when yajl_allow_partial_values is set
@@ -844,7 +843,7 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
844
843
  yajl_free(handle);
845
844
  if (err_msg != Qnil)
846
845
  {
847
- err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
846
+ VALUE err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
848
847
  rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
849
848
  rb_exc_raise(err);
850
849
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonScanner
4
- VERSION = "0.3.0"
4
+ VERSION = "0.3.1"
5
5
  end
@@ -131,6 +131,14 @@ RSpec.describe JsonScanner do
131
131
  expect(exc.bytes_consumed).to eq(8)
132
132
  end,
133
133
  )
134
+ expect do
135
+ described_class.scan("[[1,2", [])
136
+ end.to(
137
+ raise_error(described_class::ParseError) do |exc|
138
+ # 6 because of the final " " chunk - that's how yajl works
139
+ expect(exc.bytes_consumed).to eq(6)
140
+ end,
141
+ )
134
142
  end
135
143
 
136
144
  it "allows to return an actual path to the element" do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7