json_scanner 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +46 -2
- data/ext/json_scanner/json_scanner.c +7 -8
- data/lib/json_scanner/version.rb +1 -1
- data/spec/json_scanner_spec.rb +8 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ca9d160b389a5b605a37baeebd8e9d704a9b42712b9c8de9d0667ffa7e6b3d35
|
4
|
+
data.tar.gz: f2c0362e6bb4484e47fd5dbdda4afd7332ce42d266b4d0ebe681f24e18c31b0a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 91d190291d0e2c16db38c523a58fbbff23cde136481b2c4a19446d25169a77390eee4792979cd3a22724bdf2c89132b5dc3ccc78c1553c17fe21227a70e1bf3c
|
7
|
+
data.tar.gz: dc704ac0ba609f209883d5af50f3f9cabcde04a0eb2c3f5ac04f964010bf866fd727c5c175dd40917526b693f13bc4b532f7a4fabce9859c0b44238cf3fa421b
|
data/README.md
CHANGED
@@ -54,13 +54,30 @@ JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
|
|
54
54
|
# Special matcher JsonScanner::ANY_KEY is supported for object keys
|
55
55
|
JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
|
56
56
|
# => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
|
57
|
+
# Regex mathers aren't supported yet, but you can simulate it using `with_path` option
|
58
|
+
JsonScanner.scan(
|
59
|
+
'{"question1": 1, "answer": 42, "question2": 2}',
|
60
|
+
[[JsonScanner::ANY_KEY]], with_path: true,
|
61
|
+
).map do |res|
|
62
|
+
res.map do |path, (begin_pos, end_pos, type)|
|
63
|
+
[begin_pos, end_pos, type] if path[0] =~ /\Aquestion/
|
64
|
+
end.compact
|
65
|
+
end
|
66
|
+
# => [[[14, 15, :number], [44, 45, :number]]]
|
57
67
|
```
|
58
68
|
|
59
|
-
|
69
|
+
## Options
|
70
|
+
|
71
|
+
`JsonScanner` supports multiple options
|
60
72
|
|
61
73
|
```ruby
|
62
74
|
JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
|
63
75
|
# => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
|
76
|
+
JsonScanner.scan('[0, 42],', [[(1..-1)]], verbose_error: true)
|
77
|
+
# JsonScanner::ParseError (parse error: trailing garbage)
|
78
|
+
# [0, 42],
|
79
|
+
# (right here) ------^
|
80
|
+
# Note: the 'right here' pointer is wrong in case of a premature EOF error, it's a bug of the libyajl
|
64
81
|
JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
|
65
82
|
# JsonScanner::ParseError (parse error: premature EOF)
|
66
83
|
# [0, 42,
|
@@ -81,6 +98,8 @@ JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symboliz
|
|
81
98
|
# => [[[[:a], [6, 7, :number]]]]
|
82
99
|
```
|
83
100
|
|
101
|
+
### Comments in the JSON
|
102
|
+
|
84
103
|
Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
|
85
104
|
```ruby
|
86
105
|
json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
|
@@ -90,7 +109,21 @@ end
|
|
90
109
|
# => [{"value"=>42}]
|
91
110
|
```
|
92
111
|
|
93
|
-
|
112
|
+
### Find the end of a JSON string
|
113
|
+
|
114
|
+
`allow_trailing_garbage` option may come in handy if you want to extract a JSON string from a JS text
|
115
|
+
```ruby
|
116
|
+
script_text = <<~'JS'
|
117
|
+
<script>window.__APOLLO_STATE__={"ContentItem:0":{"__typename":"ContentItem","id":0, "configurationType":"NO_CONFIGURATION","replacementPartsUrl":null,"relatedCategories":[{"__ref":"Category:109450"},{"__ref":"Category:82044355"},{"__ref":"Category:109441"},{"__ref":"Category:109442"},{"__ref":"Category:109449"},{"__ref":"Category:109444"},{"__ref":"Category:82043730"}],"recommendedOptions":[]}};window.__APPVERSION__=7018;window.__CONFIG_ENV__={value: 'PRODUCTION'};</script>
|
118
|
+
JS
|
119
|
+
json_with_trailing_garbage = script_text[/__APOLLO_STATE__\s*=\s*({.+)/, 1]
|
120
|
+
json_end_pos = JsonScanner.scan(json_with_trailing_garbage, [[]], allow_trailing_garbage: true).first.first[1]
|
121
|
+
apollo_state = JSON.parse(json_with_trailing_garbage[0...json_end_pos])
|
122
|
+
```
|
123
|
+
|
124
|
+
## Reuse configuration
|
125
|
+
|
126
|
+
You can create a `JsonScanner::Config` instance and reuse it between `JsonScanner.scan` calls
|
94
127
|
|
95
128
|
```ruby
|
96
129
|
require "json_scanner"
|
@@ -107,6 +140,17 @@ JsonScanner.scan('[0, 42]', config, with_path: true)
|
|
107
140
|
# => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
|
108
141
|
```
|
109
142
|
|
143
|
+
Options can be passed as a hash, even on Ruby 3
|
144
|
+
```ruby
|
145
|
+
options = { allow_trailing_garbage: true, allow_partial_values: true }
|
146
|
+
JsonScanner.scan('[0, 42', [[1]], options) == JsonScanner.scan('[0, 42]_', [[1]], options)
|
147
|
+
# => true
|
148
|
+
```
|
149
|
+
|
150
|
+
## Streaming mode
|
151
|
+
|
152
|
+
Streaming mode isn't supported yet, as it's harder to implement and to use. I plan to add it in the future, its API is a subject to discussion. If you have suggestions, use cases, or preferences for how it should behave, I’d love to hear from you!
|
153
|
+
|
110
154
|
## Development
|
111
155
|
|
112
156
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
@@ -97,7 +97,7 @@ inline size_t scan_ctx_get_bytes_consumed(scan_ctx *ctx)
|
|
97
97
|
return ctx->yajl_bytes_consumed + yajl_get_bytes_consumed(ctx->handle);
|
98
98
|
}
|
99
99
|
|
100
|
-
inline void
|
100
|
+
inline void scan_ctx_save_bytes_consumed(scan_ctx *ctx)
|
101
101
|
{
|
102
102
|
ctx->yajl_bytes_consumed += yajl_get_bytes_consumed(ctx->handle);
|
103
103
|
}
|
@@ -254,7 +254,8 @@ VALUE scan_ctx_init(scan_ctx *ctx, VALUE path_ary, VALUE string_keys)
|
|
254
254
|
if (string_keys != Qundef)
|
255
255
|
{
|
256
256
|
// If string_keys is provided, we need to duplicate the string
|
257
|
-
// to avoid use-after-free issues and to add the newly created string to the string_keys array
|
257
|
+
// to avoid use-after-free issues and to add the newly created string to the string_keys array.
|
258
|
+
// In Ruby 2.2 and newer symbols can be GC-ed, so we need to duplicate them as well.
|
258
259
|
entry = rb_str_dup(entry);
|
259
260
|
rb_ary_push(string_keys, entry);
|
260
261
|
}
|
@@ -733,7 +734,7 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
733
734
|
yajl_status stat;
|
734
735
|
scan_ctx *ctx;
|
735
736
|
int free_ctx = true;
|
736
|
-
VALUE err_msg = Qnil, bytes_consumed,
|
737
|
+
VALUE err_msg = Qnil, bytes_consumed, result;
|
737
738
|
// Turned out callbacks can't raise exceptions
|
738
739
|
// VALUE callback_err;
|
739
740
|
#if RUBY_API_VERSION_MAJOR > 2 || (RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR >= 7)
|
@@ -801,19 +802,17 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
801
802
|
}
|
802
803
|
ctx->handle = handle;
|
803
804
|
stat = yajl_parse(handle, (unsigned char *)json_text, json_text_len);
|
804
|
-
scan_ctx_update_bytes_consumed(ctx);
|
805
805
|
if (stat == yajl_status_ok)
|
806
806
|
{
|
807
|
+
scan_ctx_save_bytes_consumed(ctx);
|
807
808
|
stat = yajl_complete_parse(handle);
|
808
|
-
scan_ctx_update_bytes_consumed(ctx);
|
809
809
|
}
|
810
810
|
|
811
811
|
if (stat != yajl_status_ok)
|
812
812
|
{
|
813
813
|
char *str = (char *)yajl_get_error(handle, verbose_error, (unsigned char *)json_text, json_text_len);
|
814
814
|
err_msg = rb_utf8_str_new_cstr(str);
|
815
|
-
|
816
|
-
bytes_consumed = ULL2NUM(yajl_get_bytes_consumed(handle));
|
815
|
+
bytes_consumed = ULL2NUM(scan_ctx_get_bytes_consumed(ctx));
|
817
816
|
yajl_free_error(handle, (unsigned char *)str);
|
818
817
|
}
|
819
818
|
// // Needed when yajl_allow_partial_values is set
|
@@ -844,7 +843,7 @@ VALUE scan(int argc, VALUE *argv, VALUE self)
|
|
844
843
|
yajl_free(handle);
|
845
844
|
if (err_msg != Qnil)
|
846
845
|
{
|
847
|
-
err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
|
846
|
+
VALUE err = rb_exc_new_str(rb_eJsonScannerParseError, err_msg);
|
848
847
|
rb_ivar_set(err, rb_iv_bytes_consumed, bytes_consumed);
|
849
848
|
rb_exc_raise(err);
|
850
849
|
}
|
data/lib/json_scanner/version.rb
CHANGED
data/spec/json_scanner_spec.rb
CHANGED
@@ -131,6 +131,14 @@ RSpec.describe JsonScanner do
|
|
131
131
|
expect(exc.bytes_consumed).to eq(8)
|
132
132
|
end,
|
133
133
|
)
|
134
|
+
expect do
|
135
|
+
described_class.scan("[[1,2", [])
|
136
|
+
end.to(
|
137
|
+
raise_error(described_class::ParseError) do |exc|
|
138
|
+
# 6 because of the final " " chunk - that's how yajl works
|
139
|
+
expect(exc.bytes_consumed).to eq(6)
|
140
|
+
end,
|
141
|
+
)
|
134
142
|
end
|
135
143
|
|
136
144
|
it "allows to return an actual path to the element" do
|