json_scanner 0.3.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/json_scanner.rb CHANGED
@@ -3,7 +3,92 @@
3
3
  require_relative "json_scanner/version"
4
4
  require_relative "json_scanner/json_scanner"
5
5
 
6
+ require "json"
7
+
8
+ # Extract values from JSON without full parsing. This gem uses the +yajl+ library
9
+ # to scan a JSON string and allows you to parse pieces of it.
6
10
  module JsonScanner
7
11
  class Error < StandardError; end
8
- # Your code goes here...
12
+
13
+ ALLOWED_OPTS = %i[verbose_error allow_comments dont_validate_strings allow_multiple_values
14
+ allow_trailing_garbage allow_partial_values symbolize_path_keys symbolize_names].freeze
15
+ private_constant :ALLOWED_OPTS
16
+ STUB = :stub
17
+ private_constant :STUB
18
+ SCAN_OPTS = { with_path: true, with_roots_info: true }.freeze
19
+ private_constant :SCAN_OPTS
20
+ SCAN_OPTIONS = Options.new(SCAN_OPTS)
21
+ private_constant :SCAN_OPTIONS
22
+
23
+ def self.parse(json_str, config_or_path_ary, **opts)
24
+ # with_path and with_roots_info is set here
25
+ unless (extra_opts = opts.keys - ALLOWED_OPTS).empty?
26
+ raise ArgumentError, "unknown keyword#{"s" if extra_opts.size > 1}: #{extra_opts.map(&:inspect).join(", ")}"
27
+ end
28
+
29
+ opts[:symbolize_path_keys] = opts.delete(:symbolize_names) if opts.key?(:symbolize_names)
30
+ results, roots = if opts.empty?
31
+ scan(json_str, config_or_path_ary, SCAN_OPTIONS)
32
+ else
33
+ scan(json_str, config_or_path_ary, **opts, **SCAN_OPTS)
34
+ end
35
+
36
+ res = process_results(json_str, results, roots, opts[:symbolize_path_keys])
37
+
38
+ opts[:allow_multiple_values] ? res : res.first
39
+ end
40
+
41
+ def self.process_results(json_str, results, roots, symbolize_names)
42
+ # stubs are symbols, so they can be distinguished from real values
43
+ res = roots.map(&:first)
44
+ # results for different path matchers can overlap, in that case we will simply parse more than one time,
45
+ # but there shouln't be any surprises in the behavior
46
+ results.each do |result|
47
+ process_result(res, result, roots, json_str, symbolize_names)
48
+ end
49
+ res
50
+ end
51
+
52
+ private_class_method :process_results
53
+
54
+ def self.process_result(res, result, roots, json_str, symbolize_names)
55
+ current_root_index = 0
56
+ next_root = roots[1]
57
+ result.each do |path, (begin_pos, end_pos, _type)|
58
+ while next_root && begin_pos >= next_root[1]
59
+ current_root_index += 1
60
+ next_root = roots[current_root_index + 1]
61
+ end
62
+
63
+ # for 'res[index]' check inside insert_value
64
+ res[current_root_index] = nil if res[current_root_index].is_a?(Symbol)
65
+ insert_value(res, parse_value(json_str, begin_pos, end_pos, symbolize_names), current_root_index, path)
66
+ end
67
+ end
68
+
69
+ private_class_method :process_result
70
+
71
+ def self.parse_value(json_str, begin_pos, end_pos, symbolize_names)
72
+ # TODO: opts for JSON.parse
73
+ JSON.parse(
74
+ json_str.byteslice(begin_pos...end_pos),
75
+ quirks_mode: true, symbolize_names: symbolize_names,
76
+ )
77
+ end
78
+
79
+ private_class_method :parse_value
80
+
81
+ def self.insert_value(res, parsed_value, index, path)
82
+ until path.empty?
83
+ new_index = path.shift
84
+ res[index] ||= new_index.is_a?(Integer) ? [] : {}
85
+ res = res[index]
86
+ index = new_index
87
+ end
88
+
89
+ (index - res.size).times { res.push(STUB) } if res.is_a?(Array) && res.size < index
90
+ res[index] = parsed_value
91
+ end
92
+
93
+ private_class_method :insert_value
9
94
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_scanner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - uvlad7
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-08-14 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2025-10-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: 1.8.3.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: 1.8.3.1
13
27
  description: This gem uses the yajl lib to scan a JSON string and allows you to parse
14
28
  pieces of it
15
29
  email:
@@ -19,17 +33,12 @@ extensions:
19
33
  - ext/json_scanner/extconf.rb
20
34
  extra_rdoc_files: []
21
35
  files:
22
- - README.md
23
36
  - ext/json_scanner/extconf.rb
24
37
  - ext/json_scanner/json_scanner.c
25
38
  - ext/json_scanner/json_scanner.h
26
39
  - lib/json_scanner.rb
27
40
  - lib/json_scanner/version.rb
28
41
  - sig/json_scanner.rbs
29
- - spec/extensiontesttask.rb
30
- - spec/json_scanner_spec.c
31
- - spec/json_scanner_spec.rb
32
- - spec/spec_helper.rb
33
42
  homepage: https://github.com/uvlad7/json_scanner
34
43
  licenses:
35
44
  - MIT
data/README.md DELETED
@@ -1,166 +0,0 @@
1
- [![Tests](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml/badge.svg)](https://github.com/uvlad7/json_scanner/actions/workflows/main.yml)
2
-
3
- # JsonScanner
4
-
5
- Extract values from JSON without full parsing. This gem uses the `yajl` library to scan a JSON string and allows you to parse pieces of it.
6
-
7
- ## Installation
8
-
9
- Install the gem and add to the application's Gemfile by executing:
10
-
11
- $ bundle add json_scanner
12
-
13
- If bundler is not being used to manage dependencies, install the gem by executing:
14
-
15
- $ gem install json_scanner
16
-
17
- ## Usage
18
-
19
- Basic usage
20
-
21
- ```ruby
22
- require "json"
23
- require "json_scanner"
24
-
25
- large_json = "[#{"4," * 100_000}42#{",2" * 100_000}]"
26
- where_is_42 = JsonScanner.scan(large_json, [[100_000]], false).first
27
- # => [[200001, 200003, :number]]
28
- where_is_42.map do |begin_pos, end_pos, _type|
29
- JSON.parse(large_json.byteslice(begin_pos...end_pos), quirks_mode: true)
30
- end
31
- # => [42]
32
-
33
- emoji_json = '{"grin": "😁", "heart": "😍", "rofl": "🤣"}'
34
- begin_pos, end_pos, = JsonScanner.scan(emoji_json, [["heart"]], false).first.first
35
- emoji_json.byteslice(begin_pos...end_pos)
36
- # => "\"😍\""
37
- # Note: You most likely don't need the `quirks_mode` option unless you are using an older version
38
- # of Ruby with the stdlib - or just also old - version of the json gem. In newer versions, `quirks_mode` is enabled by default.
39
- JSON.parse(emoji_json.byteslice(begin_pos...end_pos), quirks_mode: true)
40
- # => "😍"
41
- # You can also do this
42
- # emoji_json.force_encoding(Encoding::BINARY)[begin_pos...end_pos].force_encoding(Encoding::UTF_8)
43
- # => "\"😍\""
44
-
45
- # Ranges are supported as matchers for indexes with the following restrictions:
46
- # - the start of a range must be positive
47
- # - the end of a range must be positive or -1
48
- # - a range with -1 end must be closed, e.g. (0..-1) works, but (0...-1) is forbidden
49
- JsonScanner.scan('[0, 42, 0]', [[(1..-1)]])
50
- # => [[[4, 6, :number], [8, 9, :number]]]
51
- JsonScanner.scan('[0, 42, 0]', [[JsonScanner::ANY_INDEX]])
52
- # => [[[1, 2, :number], [4, 6, :number], [8, 9, :number]]]
53
-
54
- # Special matcher JsonScanner::ANY_KEY is supported for object keys
55
- JsonScanner.scan('{"a": 1, "b": 2}', [[JsonScanner::ANY_KEY]], with_path: true)
56
- # => [[[["a"], [6, 7, :number]], [["b"], [14, 15, :number]]]]
57
- # Regex mathers aren't supported yet, but you can simulate it using `with_path` option
58
- JsonScanner.scan(
59
- '{"question1": 1, "answer": 42, "question2": 2}',
60
- [[JsonScanner::ANY_KEY]], with_path: true,
61
- ).map do |res|
62
- res.map do |path, (begin_pos, end_pos, type)|
63
- [begin_pos, end_pos, type] if path[0] =~ /\Aquestion/
64
- end.compact
65
- end
66
- # => [[[14, 15, :number], [44, 45, :number]]]
67
- ```
68
-
69
- ## Options
70
-
71
- `JsonScanner` supports multiple options
72
-
73
- ```ruby
74
- JsonScanner.scan('[0, 42, 0]', [[(1..-1)]], with_path: true)
75
- # => [[[[1], [4, 6, :number]], [[2], [8, 9, :number]]]]
76
- JsonScanner.scan('[0, 42],', [[(1..-1)]], verbose_error: true)
77
- # JsonScanner::ParseError (parse error: trailing garbage)
78
- # [0, 42],
79
- # (right here) ------^
80
- # Note: the 'right here' pointer is wrong in case of a premature EOF error, it's a bug of the libyajl
81
- JsonScanner.scan('[0, 42,', [[(1..-1)]], verbose_error: true)
82
- # JsonScanner::ParseError (parse error: premature EOF)
83
- # [0, 42,
84
- # (right here) ------^
85
- JsonScanner.scan('[0, /* answer */ 42, 0]', [[(1..-1)]], allow_comments: true)
86
- # => [[[17, 19, :number], [21, 22, :number]]]
87
- JsonScanner.scan("\"\x81\x83\"", [[]], dont_validate_strings: true)
88
- # => [[[0, 4, :string]]]
89
- JsonScanner.scan("{\"\x81\x83\": 42}", [[JsonScanner::ANY_KEY]], dont_validate_strings: true, with_path: true)
90
- # => [[[["\x81\x83"], [7, 9, :number]]]]
91
- JsonScanner.scan('[0, 42, 0]garbage', [[(1..-1)]], allow_trailing_garbage: true)
92
- # => [[[4, 6, :number], [8, 9, :number]]]
93
- JsonScanner.scan('[0, 42, 0] [0, 34]', [[(1..-1)]], allow_multiple_values: true)
94
- # => [[[4, 6, :number], [8, 9, :number], [16, 18, :number]]]
95
- JsonScanner.scan('[0, 42, 0', [[(1..-1)]], allow_partial_values: true)
96
- # => [[[4, 6, :number], [8, 9, :number]]]
97
- JsonScanner.scan('{"a": 1}', [[JsonScanner::ANY_KEY]], with_path: true, symbolize_path_keys: true)
98
- # => [[[[:a], [6, 7, :number]]]]
99
- ```
100
-
101
- ### Comments in the JSON
102
-
103
- Note that the standard `JSON` library supports comments, so you may want to enable it in the `JsonScanner` as well
104
- ```ruby
105
- json_str = '{"answer": {"value": 42 /* the Ultimate Question of Life, the Universe, and Everything */ }}'
106
- JsonScanner.scan(json_str, [["answer"]], allow_comments: true).first.map do |begin_pos, end_pos, _type|
107
- JSON.parse(json_str.byteslice(begin_pos...end_pos), quirks_mode: true)
108
- end
109
- # => [{"value"=>42}]
110
- ```
111
-
112
- ### Find the end of a JSON string
113
-
114
- `allow_trailing_garbage` option may come in handy if you want to extract a JSON string from a JS text
115
- ```ruby
116
- script_text = <<~'JS'
117
- <script>window.__APOLLO_STATE__={"ContentItem:0":{"__typename":"ContentItem","id":0, "configurationType":"NO_CONFIGURATION","replacementPartsUrl":null,"relatedCategories":[{"__ref":"Category:109450"},{"__ref":"Category:82044355"},{"__ref":"Category:109441"},{"__ref":"Category:109442"},{"__ref":"Category:109449"},{"__ref":"Category:109444"},{"__ref":"Category:82043730"}],"recommendedOptions":[]}};window.__APPVERSION__=7018;window.__CONFIG_ENV__={value: 'PRODUCTION'};</script>
118
- JS
119
- json_with_trailing_garbage = script_text[/__APOLLO_STATE__\s*=\s*({.+)/, 1]
120
- json_end_pos = JsonScanner.scan(json_with_trailing_garbage, [[]], allow_trailing_garbage: true).first.first[1]
121
- apollo_state = JSON.parse(json_with_trailing_garbage[0...json_end_pos])
122
- ```
123
-
124
- ## Reuse configuration
125
-
126
- You can create a `JsonScanner::Config` instance and reuse it between `JsonScanner.scan` calls
127
-
128
- ```ruby
129
- require "json_scanner"
130
-
131
- config = JsonScanner::Config.new([[], ["key"], [(0..-1)]])
132
- # => #<JsonScanner::Config [[], ['key'], [(0..9223372036854775807)]]>
133
- JsonScanner.scan('{"key": "42"}', config)
134
- # => [[[0, 13, :object]], [[8, 12, :string]], []]
135
- JsonScanner.scan('{"key": "42"}', config, with_path: true)
136
- # => [[[[], [0, 13, :object]]], [[["key"], [8, 12, :string]]], []]
137
- JsonScanner.scan('[0, 42]', config)
138
- # => [[[0, 7, :array]], [], [[1, 2, :number], [4, 6, :number]]]
139
- JsonScanner.scan('[0, 42]', config, with_path: true)
140
- # => [[[[], [0, 7, :array]]], [], [[[0], [1, 2, :number]], [[1], [4, 6, :number]]]]
141
- ```
142
-
143
- Options can be passed as a hash, even on Ruby 3
144
- ```ruby
145
- options = { allow_trailing_garbage: true, allow_partial_values: true }
146
- JsonScanner.scan('[0, 42', [[1]], options) == JsonScanner.scan('[0, 42]_', [[1]], options)
147
- # => true
148
- ```
149
-
150
- ## Streaming mode
151
-
152
- Streaming mode isn't supported yet, as it's harder to implement and to use. I plan to add it in the future, its API is a subject to discussion. If you have suggestions, use cases, or preferences for how it should behave, I’d love to hear from you!
153
-
154
- ## Development
155
-
156
- After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
157
-
158
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
159
-
160
- ## Contributing
161
-
162
- Bug reports and pull requests are welcome on GitHub at [github](https://github.com/uvlad7/json_scanner).
163
-
164
- ## License
165
-
166
- The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -1,128 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "rake/clean"
4
- require "rake/extensiontask"
5
-
6
- module Rake
7
- class ExtensionTestTask < ExtensionTask
8
- #
9
- # The C files to compile.
10
- #
11
- attr_accessor :c_spec_files
12
-
13
- #
14
- # The folders where includes for the test files are.
15
- #
16
- # Default: %w{/usr/include /usr/include/google}
17
- #
18
- attr_accessor :test_includes
19
-
20
- #
21
- # The libraries to link against.
22
- #
23
- # Default: %w{cmockery}
24
- #
25
- attr_accessor :test_libraries
26
-
27
- #
28
- # The folders where the libraries are
29
- #
30
- # Default: %w{/usr/lib}
31
- #
32
- attr_accessor :test_lib_folders
33
-
34
- def initialize(*args, &block)
35
- super
36
- @c_spec_files = []
37
- @test_includes = %w[/usr/include /usr/include/google]
38
- @test_libraries = %w[cmockery]
39
- @test_lib_folders = %w[/usr/lib]
40
- init_test_tasks(
41
- "#{@tmp_dir}/test", "compile:#{@name}:test",
42
- "spec:c:#{@name}", "spec:valgrind:#{@name}", "spec:gdb:#{@name}",
43
- )
44
- end
45
-
46
- private
47
-
48
- def includes
49
- @includes ||= (@test_includes + [
50
- ".",
51
- "../../#{@ext_dir}",
52
- "/usr/include/ruby-#{RUBY_VERSION}",
53
- "/usr/include/ruby-#{RUBY_VERSION}/#{RUBY_PLATFORM}",
54
- ]).map { |l| "-I#{l}" }.join(" ")
55
- end
56
-
57
- def libraries
58
- @libraries ||= (@test_libraries + %w[ruby pthread crypto]).map { |l| "-l#{l}" }.join(" ")
59
- end
60
-
61
- def lib_folders
62
- @lib_folders ||= (@test_lib_folders + %w[/usr/lib .]).map { |l| "-L#{l}" }.join(" ")
63
- end
64
-
65
- def compile_tests
66
- # compile the test sources
67
- FileList["*.c"].each do |cfile|
68
- sh "gcc -g #{includes} -c #{cfile}"
69
- end
70
-
71
- source_objects = FileList["../#{RUBY_PLATFORM}/#{@name}/#{RUBY_VERSION}/*.o"]
72
- # link the executables
73
- FileList["*.o"].each do |ofile|
74
- sh "gcc -g #{lib_folders} #{libraries} #{source_objects} #{ofile} -o #{ofile.ext}"
75
- end
76
- end
77
-
78
- def init_compile_task(compile_dir, compile_task)
79
- directory compile_dir
80
- desc "Compile #{@name} tests"
81
- task compile_task => ["compile:#{@name}", compile_dir] do
82
- # copy the test files into the compilation folder
83
- @c_spec_files.each { |file| cp file, compile_dir }
84
-
85
- # start compilation
86
- chdir(compile_dir) { compile_tests }
87
- end
88
- end
89
-
90
- def init_valgrind_task(compile_dir, compile_task, valgrind_task)
91
- desc "Execute valgrind for a #{@name} test"
92
- task valgrind_task => [compile_task] do |_t, args|
93
- sh "valgrind --num-callers=50 --error-limit=no --partial-loads-ok=yes --undef-value-errors=no " \
94
- "--leak-check=full #{compile_dir}/#{args.test}"
95
- end
96
- end
97
-
98
- def init_gdb_task(compile_dir, compile_task, gdb_task)
99
- desc "Execute gdb for a #{@name} test"
100
- task gdb_task => [compile_task] do |_t, args|
101
- sh "gdb #{compile_dir}/#{args.test}"
102
- end
103
- end
104
-
105
- def init_test_task(compile_dir, compile_task, test_task)
106
- desc "Test #{@name}"
107
- task test_task => [compile_task] do |_t, args|
108
- if args.test
109
- sh "#{compile_dir}/#{args.test}"
110
- else
111
- FileList["#{compile_dir}/*.o"].each do |ofile|
112
- sh ofile.ext.to_s
113
- end
114
- end
115
- end
116
- end
117
-
118
- def init_test_tasks(compile_dir, compile_task, test_task, valgrind_task, gdb_task)
119
- init_compile_task(compile_dir, compile_task)
120
- init_valgrind_task(compile_dir, compile_task, valgrind_task)
121
- init_gdb_task(compile_dir, compile_task, gdb_task)
122
- init_test_task(compile_dir, compile_task, test_task)
123
-
124
- desc "Test all C extensions"
125
- task "spec:c" => [test_task]
126
- end
127
- end
128
- end
File without changes