json_scanner 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/json_scanner/extconf.rb +7 -1
- data/ext/json_scanner/json_scanner.c +252 -89
- data/lib/json_scanner/version.rb +1 -1
- data/lib/json_scanner.rb +86 -1
- metadata +17 -8
- data/README.md +0 -122
- data/spec/extensiontesttask.rb +0 -128
- data/spec/json_scanner_spec.c +0 -0
- data/spec/json_scanner_spec.rb +0 -352
- data/spec/spec_helper.rb +0 -15
data/lib/json_scanner.rb
CHANGED
@@ -3,7 +3,92 @@
|
|
3
3
|
require_relative "json_scanner/version"
|
4
4
|
require_relative "json_scanner/json_scanner"
|
5
5
|
|
6
|
+
require "json"
|
7
|
+
|
8
|
+
# Extract values from JSON without full parsing. This gem uses the +yajl+ library
|
9
|
+
# to scan a JSON string and allows you to parse pieces of it.
|
6
10
|
module JsonScanner
|
7
11
|
class Error < StandardError; end
|
8
|
-
|
12
|
+
|
13
|
+
ALLOWED_OPTS = %i[verbose_error allow_comments dont_validate_strings allow_multiple_values
|
14
|
+
allow_trailing_garbage allow_partial_values symbolize_path_keys symbolize_names].freeze
|
15
|
+
private_constant :ALLOWED_OPTS
|
16
|
+
STUB = :stub
|
17
|
+
private_constant :STUB
|
18
|
+
SCAN_OPTS = { with_path: true, with_roots_info: true }.freeze
|
19
|
+
private_constant :SCAN_OPTS
|
20
|
+
SCAN_OPTIONS = Options.new(SCAN_OPTS)
|
21
|
+
private_constant :SCAN_OPTIONS
|
22
|
+
|
23
|
+
def self.parse(json_str, config_or_path_ary, **opts)
|
24
|
+
# with_path and with_roots_info is set here
|
25
|
+
unless (extra_opts = opts.keys - ALLOWED_OPTS).empty?
|
26
|
+
raise ArgumentError, "unknown keyword#{"s" if extra_opts.size > 1}: #{extra_opts.map(&:inspect).join(", ")}"
|
27
|
+
end
|
28
|
+
|
29
|
+
opts[:symbolize_path_keys] = opts.delete(:symbolize_names) if opts.key?(:symbolize_names)
|
30
|
+
results, roots = if opts.empty?
|
31
|
+
scan(json_str, config_or_path_ary, SCAN_OPTIONS)
|
32
|
+
else
|
33
|
+
scan(json_str, config_or_path_ary, **opts, **SCAN_OPTS)
|
34
|
+
end
|
35
|
+
|
36
|
+
res = process_results(json_str, results, roots, opts[:symbolize_path_keys])
|
37
|
+
|
38
|
+
opts[:allow_multiple_values] ? res : res.first
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.process_results(json_str, results, roots, symbolize_names)
|
42
|
+
# stubs are symbols, so they can be distinguished from real values
|
43
|
+
res = roots.map(&:first)
|
44
|
+
# results for different path matchers can overlap, in that case we will simply parse more than one time,
|
45
|
+
# but there shouln't be any surprises in the behavior
|
46
|
+
results.each do |result|
|
47
|
+
process_result(res, result, roots, json_str, symbolize_names)
|
48
|
+
end
|
49
|
+
res
|
50
|
+
end
|
51
|
+
|
52
|
+
private_class_method :process_results
|
53
|
+
|
54
|
+
def self.process_result(res, result, roots, json_str, symbolize_names)
|
55
|
+
current_root_index = 0
|
56
|
+
next_root = roots[1]
|
57
|
+
result.each do |path, (begin_pos, end_pos, _type)|
|
58
|
+
while next_root && begin_pos >= next_root[1]
|
59
|
+
current_root_index += 1
|
60
|
+
next_root = roots[current_root_index + 1]
|
61
|
+
end
|
62
|
+
|
63
|
+
# for 'res[index]' check inside insert_value
|
64
|
+
res[current_root_index] = nil if res[current_root_index].is_a?(Symbol)
|
65
|
+
insert_value(res, parse_value(json_str, begin_pos, end_pos, symbolize_names), current_root_index, path)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
private_class_method :process_result
|
70
|
+
|
71
|
+
def self.parse_value(json_str, begin_pos, end_pos, symbolize_names)
|
72
|
+
# TODO: opts for JSON.parse
|
73
|
+
JSON.parse(
|
74
|
+
json_str.byteslice(begin_pos...end_pos),
|
75
|
+
quirks_mode: true, symbolize_names: symbolize_names,
|
76
|
+
)
|
77
|
+
end
|
78
|
+
|
79
|
+
private_class_method :parse_value
|
80
|
+
|
81
|
+
def self.insert_value(res, parsed_value, index, path)
|
82
|
+
until path.empty?
|
83
|
+
new_index = path.shift
|
84
|
+
res[index] ||= new_index.is_a?(Integer) ? [] : {}
|
85
|
+
res = res[index]
|
86
|
+
index = new_index
|
87
|
+
end
|
88
|
+
|
89
|
+
(index - res.size).times { res.push(STUB) } if res.is_a?(Array) && res.size < index
|
90
|
+
res[index] = parsed_value
|
91
|
+
end
|
92
|
+
|
93
|
+
private_class_method :insert_value
|
9
94
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_scanner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- uvlad7
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-
|
12
|
-
dependencies:
|
11
|
+
date: 2025-10-10 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 1.8.3.1
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 1.8.3.1
|
13
27
|
description: This gem uses the yajl lib to scan a JSON string and allows you to parse
|
14
28
|
pieces of it
|
15
29
|
email:
|
@@ -19,17 +33,12 @@ extensions:
|
|
19
33
|
- ext/json_scanner/extconf.rb
|
20
34
|
extra_rdoc_files: []
|
21
35
|
files:
|
22
|
-
- README.md
|
23
36
|
- ext/json_scanner/extconf.rb
|
24
37
|
- ext/json_scanner/json_scanner.c
|
25
38
|
- ext/json_scanner/json_scanner.h
|
26
39
|
- lib/json_scanner.rb
|
27
40
|
- lib/json_scanner/version.rb
|
28
41
|
- sig/json_scanner.rbs
|
29
|
-
- spec/extensiontesttask.rb
|
30
|
-
- spec/json_scanner_spec.c
|
31
|
-
- spec/json_scanner_spec.rb
|
32
|
-
- spec/spec_helper.rb
|
33
42
|
homepage: https://github.com/uvlad7/json_scanner
|
34
43
|
licenses:
|
35
44
|
- MIT
|
data/README.md
DELETED
@@ -1,122 +0,0 @@
|
|
1
|
-
[](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml)
|
2
|
-
|
3
|
-
# JsonScanner
|
4
|
-
|
5
|
-
Extract values from JSON without full parsing. This gem uses the `yajl` library to scan a JSON string and allows you to parse pieces of it.
|
6
|
-
|
7
|
-
## Installation
|
8
|
-
|
9
|
-
Install the gem and add to the application's Gemfile by executing:
|
10
|
-
|
11
|
-
$ bundle add json_scanner
|
12
|
-
|
13
|
-
If bundler is not being used to manage dependencies, install the gem by executing:
|
14
|
-
|
15
|
-
$ gem install json_scanner
|
16
|
-
|
17
|
-
## Usage
|
18
|
-
|
19
|
-
Basic usage
|
20
|
-
|
21
|
-
```ruby
|
22
|
-
require "json"
|
23
|
-
require "json_scanner"
|
24
|
-
|
25
|
-
large_json = "[#{"4," * 100_000}42#{",2" * 100_000}]"
|
26
|
-
where_is_42 = JsonScanner.scan(large_json, [[100_000]], false).first
|
27
|
-
# => [[200001, 200003, :number]]
|
28
|
-
where_is_42.map do |begin_pos, end_pos, _type|
|
29
|
-
JSON.parse(large_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
30
|
-
end
|
31
|
-
# => [42]
|
32
|
-
|
33
|
-
emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
|
34
|
-
begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
|
35
|
-
emoji_json.byteslice(begin_pos...end_pos)
|
36
|
-
# => "\"😍\""
|
37
|
-
# Note: You most likely don't need the `quirks_mode` option unless you are using an older version
|
38
|
-
# of Ruby with the stdlib - or just also old - version of the json gem. In newer versions, `quirks_mode` is enabled by default.
|
39
|
-
JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
|
40
|
-
# => "😍"
|
41
|
-
# You can also do this
|
42
|
-
# emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
|
43
|
-
# => "\"😍\""
|
44
|
-
|
45
|
-
# Ranges are supported as matchers for indexes with the following restrictions:
|
46
|
-
# - the start of a range must be positive
|
47
|
-
# - the end of a range must be positive or -1
|
48
|
-
# - a range with -1 end must be closed, e.g. (0..-1) works, but (0...-1) is forbidden
|
49
|
-
JsonScanner.scan('[0, 42, 0]', [[(1..-1)]])
|
50
|
-
# => [[[4, 6, :number], [8, 9, :number]]]
|
51
|
-
JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
|
52
|
-
# => [[[1, 2, :number], [4, 6, :number], [8, 9, :number]]]
|
53
|
-
|
54
|
-
# Special matcher JsonScanner::ANY_KEY is supported for object keys
|
55
|
-
JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
|
56
|
-
# => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
|
57
|
-
```
|
58
|
-
|
59
|
-
It supports multiple options
|
60
|
-
|
61
|
-
```ruby
|
62
|
-
JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
|
63
|
-
# => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
|
64
|
-
JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
|
65
|
-
# JsonScanner::ParseError (parse error: premature EOF)
|
66
|
-
# [0, 42,
|
67
|
-
# (right here) ------^
|
68
|
-
JsonScanner.scan('[0, /* answer */ 42, 0]', [[(1..-1)]], allow_comments: true)
|
69
|
-
# => [[[17, 19, :number], [21, 22, :number]]]
|
70
|
-
JsonScanner.scan("\"\x81\x83\"", [[]], dont_validate_strings: true)
|
71
|
-
# => [[[0, 4, :string]]]
|
72
|
-
JsonScanner.scan("{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]], dont_validate_strings: true, with_path: true)
|
73
|
-
# => [[[["\x81\x83"], [7, 9, :number]]]]
|
74
|
-
JsonScanner.scan('[0, 42, 0]garbage', [[(1..-1)]], allow_trailing_garbage: true)
|
75
|
-
# => [[[4, 6, :number], [8, 9, :number]]]
|
76
|
-
JsonScanner.scan('[0, 42, 0] [0, 34]', [[(1..-1)]], allow_multiple_values: true)
|
77
|
-
# => [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]]
|
78
|
-
JsonScanner.scan('[0, 42, 0', [[(1..-1)]], allow_partial_values: true)
|
79
|
-
# => [[[4, 6, :number], [8, 9, :number]]]
|
80
|
-
JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symbolize_path_keys: true)
|
81
|
-
# => [[[[:a], [6, 7, :number]]]]
|
82
|
-
```
|
83
|
-
|
84
|
-
Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
|
85
|
-
```ruby
|
86
|
-
json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
|
87
|
-
JsonScanner.scan(json_str, [["answer"]], allow_comments: true).first.map do |begin_pos, end_pos, _type|
|
88
|
-
JSON.parse(json_str.byteslice(begin_pos...end_pos), quirks_mode: true)
|
89
|
-
end
|
90
|
-
# => [{"value"=>42}]
|
91
|
-
```
|
92
|
-
|
93
|
-
You can also create a config and reuse it
|
94
|
-
|
95
|
-
```ruby
|
96
|
-
require "json_scanner"
|
97
|
-
|
98
|
-
config = JsonScanner::Config.new([[], ["key"], [(0..-1)]])
|
99
|
-
# => #<JsonScanner::Config [[], ['key'], [(0..9223372036854775807)]]>
|
100
|
-
JsonScanner.scan('{"key": "42"}', config)
|
101
|
-
# => [[[0, 13, :object]], [[8, 12, :string]], []]
|
102
|
-
JsonScanner.scan('{"key": "42"}', config, with_path: true)
|
103
|
-
# => [[[[], [0, 13, :object]]], [[["key"], [8, 12, :string]]], []]
|
104
|
-
JsonScanner.scan('[0, 42]', config)
|
105
|
-
# => [[[0, 7, :array]], [], [[1, 2, :number], [4, 6, :number]]]
|
106
|
-
JsonScanner.scan('[0, 42]', config, with_path: true)
|
107
|
-
# => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
|
108
|
-
```
|
109
|
-
|
110
|
-
## Development
|
111
|
-
|
112
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
113
|
-
|
114
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
115
|
-
|
116
|
-
## Contributing
|
117
|
-
|
118
|
-
Bug reports and pull requests are welcome on GitHub at [github](https://github.com/uvlad7/json_scanner).
|
119
|
-
|
120
|
-
## License
|
121
|
-
|
122
|
-
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/spec/extensiontesttask.rb
DELETED
@@ -1,128 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "rake/clean"
|
4
|
-
require "rake/extensiontask"
|
5
|
-
|
6
|
-
module Rake
|
7
|
-
class ExtensionTestTask < ExtensionTask
|
8
|
-
#
|
9
|
-
# The C files to compile.
|
10
|
-
#
|
11
|
-
attr_accessor :c_spec_files
|
12
|
-
|
13
|
-
#
|
14
|
-
# The folders where includes for the test files are.
|
15
|
-
#
|
16
|
-
# Default: %w{/usr/include /usr/include/google}
|
17
|
-
#
|
18
|
-
attr_accessor :test_includes
|
19
|
-
|
20
|
-
#
|
21
|
-
# The libraries to link against.
|
22
|
-
#
|
23
|
-
# Default: %w{cmockery}
|
24
|
-
#
|
25
|
-
attr_accessor :test_libraries
|
26
|
-
|
27
|
-
#
|
28
|
-
# The folders where the libraries are
|
29
|
-
#
|
30
|
-
# Default: %w{/usr/lib}
|
31
|
-
#
|
32
|
-
attr_accessor :test_lib_folders
|
33
|
-
|
34
|
-
def initialize(*args, &block)
|
35
|
-
super
|
36
|
-
@c_spec_files = []
|
37
|
-
@test_includes = %w[/usr/include /usr/include/google]
|
38
|
-
@test_libraries = %w[cmockery]
|
39
|
-
@test_lib_folders = %w[/usr/lib]
|
40
|
-
init_test_tasks(
|
41
|
-
"#{@tmp_dir}/test", "compile:#{@name}:test",
|
42
|
-
"spec:c:#{@name}", "spec:valgrind:#{@name}", "spec:gdb:#{@name}",
|
43
|
-
)
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
def includes
|
49
|
-
@includes ||= (@test_includes + [
|
50
|
-
".",
|
51
|
-
"../../#{@ext_dir}",
|
52
|
-
"/usr/include/ruby-#{RUBY_VERSION}",
|
53
|
-
"/usr/include/ruby-#{RUBY_VERSION}/#{RUBY_PLATFORM}",
|
54
|
-
]).map { |l| "-I#{l}" }.join(" ")
|
55
|
-
end
|
56
|
-
|
57
|
-
def libraries
|
58
|
-
@libraries ||= (@test_libraries + %w[ruby pthread crypto]).map { |l| "-l#{l}" }.join(" ")
|
59
|
-
end
|
60
|
-
|
61
|
-
def lib_folders
|
62
|
-
@lib_folders ||= (@test_lib_folders + %w[/usr/lib .]).map { |l| "-L#{l}" }.join(" ")
|
63
|
-
end
|
64
|
-
|
65
|
-
def compile_tests
|
66
|
-
# compile the test sources
|
67
|
-
FileList["*.c"].each do |cfile|
|
68
|
-
sh "gcc -g #{includes} -c #{cfile}"
|
69
|
-
end
|
70
|
-
|
71
|
-
source_objects = FileList["../#{RUBY_PLATFORM}/#{@name}/#{RUBY_VERSION}/*.o"]
|
72
|
-
# link the executables
|
73
|
-
FileList["*.o"].each do |ofile|
|
74
|
-
sh "gcc -g #{lib_folders} #{libraries} #{source_objects} #{ofile} -o #{ofile.ext}"
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def init_compile_task(compile_dir, compile_task)
|
79
|
-
directory compile_dir
|
80
|
-
desc "Compile #{@name} tests"
|
81
|
-
task compile_task => ["compile:#{@name}", compile_dir] do
|
82
|
-
# copy the test files into the compilation folder
|
83
|
-
@c_spec_files.each { |file| cp file, compile_dir }
|
84
|
-
|
85
|
-
# start compilation
|
86
|
-
chdir(compile_dir) { compile_tests }
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
def init_valgrind_task(compile_dir, compile_task, valgrind_task)
|
91
|
-
desc "Execute valgrind for a #{@name} test"
|
92
|
-
task valgrind_task => [compile_task] do |_t, args|
|
93
|
-
sh "valgrind --num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no " \
|
94
|
-
"--leak-check=full #{compile_dir}/#{args.test}"
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
def init_gdb_task(compile_dir, compile_task, gdb_task)
|
99
|
-
desc "Execute gdb for a #{@name} test"
|
100
|
-
task gdb_task => [compile_task] do |_t, args|
|
101
|
-
sh "gdb #{compile_dir}/#{args.test}"
|
102
|
-
end
|
103
|
-
end
|
104
|
-
|
105
|
-
def init_test_task(compile_dir, compile_task, test_task)
|
106
|
-
desc "Test #{@name}"
|
107
|
-
task test_task => [compile_task] do |_t, args|
|
108
|
-
if args.test
|
109
|
-
sh "#{compile_dir}/#{args.test}"
|
110
|
-
else
|
111
|
-
FileList["#{compile_dir}/*.o"].each do |ofile|
|
112
|
-
sh ofile.ext.to_s
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
def init_test_tasks(compile_dir, compile_task, test_task, valgrind_task, gdb_task)
|
119
|
-
init_compile_task(compile_dir, compile_task)
|
120
|
-
init_valgrind_task(compile_dir, compile_task, valgrind_task)
|
121
|
-
init_gdb_task(compile_dir, compile_task, gdb_task)
|
122
|
-
init_test_task(compile_dir, compile_task, test_task)
|
123
|
-
|
124
|
-
desc "Test all C extensions"
|
125
|
-
task "spec:c" => [test_task]
|
126
|
-
end
|
127
|
-
end
|
128
|
-
end
|
data/spec/json_scanner_spec.c
DELETED
File without changes
|