smarter_json 0.9.2 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/CHANGELOG.md +77 -54
- data/README.md +215 -72
- data/docs/_introduction.md +6 -12
- data/docs/basic_read_api.md +29 -19
- data/docs/basic_write_api.md +2 -2
- data/docs/examples.md +32 -23
- data/docs/options.md +14 -14
- data/ext/smarter_json/smarter_json.c +223 -89
- data/ext/smarter_json/vendor/LICENSE-fast_float-MIT +27 -0
- data/ext/smarter_json/vendor/eisel_lemire.h +117 -0
- data/ext/smarter_json/vendor/eisel_lemire.md +29 -0
- data/ext/smarter_json/vendor/eisel_lemire_powers.h +663 -0
- data/lib/smarter_json/backports.rb +28 -0
- data/lib/smarter_json/options.rb +52 -0
- data/lib/smarter_json/parser.rb +400 -139
- data/lib/smarter_json/version.rb +1 -1
- data/lib/smarter_json.rb +3 -1
- metadata +9 -5
- data/ext/smarter_json/vendor/ryu.h +0 -819
- data/ext/smarter_json/vendor/ryu.md +0 -22
data/lib/smarter_json/parser.rb
CHANGED
|
@@ -1,5 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
# Array#filter_map (used in Recovery#extract_payloads) is Ruby 2.7+; on Ruby < 2.7
|
|
4
|
+
# activate the scoped refinement backport (no-op on 2.7+, which uses native filter_map).
|
|
5
|
+
using SmarterJSON::Backports if Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.7")
|
|
6
|
+
|
|
3
7
|
module SmarterJSON
|
|
4
8
|
# ParseError / EncodingError live in errors.rb (loaded first) so they can inherit
|
|
5
9
|
# from the shared SmarterJSON::Error base.
|
|
@@ -12,15 +16,20 @@ module SmarterJSON
|
|
|
12
16
|
# is always content, never a filename — use process_file for paths.) The values
|
|
13
17
|
# in `options` override Parser::DEFAULT_OPTIONS.
|
|
14
18
|
#
|
|
15
|
-
# Without a block: returns
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
+
# Without a block: always returns an Array of the documents found — [] for none,
|
|
20
|
+
# [doc] for one, [d1, d2, …] for several (NDJSON / JSONL / concatenated). A
|
|
21
|
+
# top-level value must be a recognized JSON value (number / literal / quoted
|
|
22
|
+
# string / object / array) or an implicit-root object, else it raises. For the
|
|
23
|
+
# single-document case use SmarterJSON.process_one (returns the bare value).
|
|
24
|
+
# :acceleration (default true) selects the C extension when compiled and loaded
|
|
25
|
+
# (SmarterJSON::HAS_ACCELERATION); otherwise the pure-Ruby parser.
|
|
19
26
|
#
|
|
20
|
-
# With a block: yields each top-level document as it is parsed, and returns
|
|
21
|
-
# For an IO this streams document-by-document in bounded memory —
|
|
22
|
-
# stream as newline-delimited documents (NDJSON / JSONL), one per
|
|
27
|
+
# With a block: yields each top-level document as it is parsed, and returns the
|
|
28
|
+
# document count. For an IO this streams document-by-document in bounded memory —
|
|
29
|
+
# it reads the stream as newline-delimited documents (NDJSON / JSONL), one per
|
|
30
|
+
# line.
|
|
23
31
|
def process(input, options = {}, &block)
|
|
32
|
+
options = Options.process_options(options)
|
|
24
33
|
if input.is_a?(String)
|
|
25
34
|
Recovery.process_string(input, options, &block)
|
|
26
35
|
elsif input.respond_to?(:read)
|
|
@@ -39,7 +48,8 @@ module SmarterJSON
|
|
|
39
48
|
# loading the whole file); the documents are read as newline-delimited
|
|
40
49
|
# (NDJSON / JSONL), one per line.
|
|
41
50
|
def process_file(path, options = {}, &block)
|
|
42
|
-
|
|
51
|
+
options = Options.process_options(options)
|
|
52
|
+
encoding = options[:encoding] || "UTF-8"
|
|
43
53
|
if block
|
|
44
54
|
File.open(path, "r:#{encoding}") { |io| stream_io(io, options, &block) }
|
|
45
55
|
else
|
|
@@ -47,8 +57,44 @@ module SmarterJSON
|
|
|
47
57
|
end
|
|
48
58
|
end
|
|
49
59
|
|
|
50
|
-
#
|
|
51
|
-
#
|
|
60
|
+
# SmarterJSON.process_one(input, options = {}) — the single-document accessor.
|
|
61
|
+
#
|
|
62
|
+
# Returns the first document's value (or nil when the input holds no documents).
|
|
63
|
+
# When the input holds MORE than one document it returns the first and warns once
|
|
64
|
+
# — it never raises, since an extra document is valid data; the warning goes to
|
|
65
|
+
# on_warning if set, else Rails.logger.warn when Rails is loaded, else Kernel#warn.
|
|
66
|
+
# For an IO this is bounded memory: it parses just the first document and stops as
|
|
67
|
+
# soon as a second is seen, instead of materialising the whole stream the way
|
|
68
|
+
# process(io).first would. (process(input).first and process(input)[0] silently
|
|
69
|
+
# drop documents 2+ — a footgun; use process_one instead.)
|
|
70
|
+
def process_one(input, options = {})
|
|
71
|
+
options = Options.process_options(options)
|
|
72
|
+
|
|
73
|
+
# IO: bounded memory — parse just the first document and stop once a second is
|
|
74
|
+
# seen (peek-to-warn). A String is already in memory, so use the plain no-block
|
|
75
|
+
# path: it returns the full (wrapper-recovered, de-duplicated) Array in one pass,
|
|
76
|
+
# which also avoids the reactive-recovery double-yield the block path would hit.
|
|
77
|
+
unless input.respond_to?(:read)
|
|
78
|
+
docs = process(input, options)
|
|
79
|
+
warn_extra_documents(options) if docs.length > 1
|
|
80
|
+
return docs.first
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
first = nil
|
|
84
|
+
count = 0
|
|
85
|
+
catch(:smarter_json_first_document) do
|
|
86
|
+
process(input, options) do |doc|
|
|
87
|
+
count += 1
|
|
88
|
+
first = doc if count == 1
|
|
89
|
+
throw(:smarter_json_first_document) if count > 1
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
warn_extra_documents(options) if count > 1
|
|
93
|
+
first
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Parse a String of JSON content (the in-memory path). Returns an Array of the
|
|
97
|
+
# documents found (empty for none); the C extension is used when available.
|
|
52
98
|
def process_content(input, options, &block)
|
|
53
99
|
if block
|
|
54
100
|
if options.fetch(:acceleration, true) && HAS_ACCELERATION
|
|
@@ -66,11 +112,33 @@ module SmarterJSON
|
|
|
66
112
|
# Stream documents from an IO incrementally, yielding each recovered top-level
|
|
67
113
|
# document without slurping the whole input into memory first.
|
|
68
114
|
def stream_io(io, options, &block)
|
|
69
|
-
|
|
70
|
-
|
|
115
|
+
count = 0
|
|
116
|
+
Framer.each_document(io) do |doc|
|
|
117
|
+
# Recovery.process_string yields each value and returns how many it yielded;
|
|
118
|
+
# blank / comment-only framed segments yield none, so count tracks actual
|
|
119
|
+
# documents (== values yielded), not raw framed segments.
|
|
120
|
+
count += Recovery.process_string(doc, options, &block)
|
|
121
|
+
end
|
|
122
|
+
count
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# process_one's "more than one document" notice — routed to on_warning if the caller
|
|
126
|
+
# gave one, else Rails.logger when Rails is loaded, else Kernel#warn. Never silent,
|
|
127
|
+
# never raised.
|
|
128
|
+
def warn_extra_documents(options)
|
|
129
|
+
message = "SmarterJSON.process_one: input has more than one document — returning the first and " \
|
|
130
|
+
"dropping the rest. Use SmarterJSON.process to get every document."
|
|
131
|
+
handler = options[:on_warning]
|
|
132
|
+
if handler
|
|
133
|
+
handler.call(Warning.new(:extra_documents, message, nil, nil))
|
|
134
|
+
elsif defined?(Rails) && Rails.respond_to?(:logger) && Rails.logger
|
|
135
|
+
Rails.logger.warn(message)
|
|
136
|
+
else
|
|
137
|
+
Kernel.warn(message)
|
|
138
|
+
end
|
|
71
139
|
end
|
|
72
140
|
|
|
73
|
-
private_class_method :process_content, :stream_io
|
|
141
|
+
private_class_method :process_content, :stream_io, :warn_extra_documents
|
|
74
142
|
|
|
75
143
|
# Named byte values, shared by the Parser FSM and the Framer / Recovery byte
|
|
76
144
|
# scanners so none of them spell out raw hex. Included where needed.
|
|
@@ -119,7 +187,7 @@ module SmarterJSON
|
|
|
119
187
|
|
|
120
188
|
module_function
|
|
121
189
|
|
|
122
|
-
def each_document(io
|
|
190
|
+
def each_document(io)
|
|
123
191
|
buffer = +""
|
|
124
192
|
scan = 0
|
|
125
193
|
doc_start = nil
|
|
@@ -385,15 +453,23 @@ module SmarterJSON
|
|
|
385
453
|
handler = options[:on_warning]
|
|
386
454
|
emit_wrapper_warnings(payloads, handler)
|
|
387
455
|
|
|
388
|
-
|
|
389
|
-
|
|
456
|
+
if block_given?
|
|
457
|
+
count = 0
|
|
458
|
+
payloads.each do |payload|
|
|
459
|
+
SmarterJSON.send(:process_content, payload[:slice], options) do |doc|
|
|
460
|
+
block.call(doc)
|
|
461
|
+
count += 1
|
|
462
|
+
end
|
|
463
|
+
end
|
|
464
|
+
return count
|
|
390
465
|
end
|
|
391
466
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
467
|
+
# Each payload's process_content now returns an Array of its documents; flatten
|
|
468
|
+
# so several recovered payloads yield one flat Array<doc> (the always-array
|
|
469
|
+
# contract), not an Array of Arrays.
|
|
470
|
+
payloads.flat_map do |payload|
|
|
471
|
+
SmarterJSON.send(:process_content, payload[:slice], options)
|
|
472
|
+
end
|
|
397
473
|
end
|
|
398
474
|
|
|
399
475
|
def emit_wrapper_warnings(payloads, handler)
|
|
@@ -613,18 +689,22 @@ module SmarterJSON
|
|
|
613
689
|
# followed by a digit ("5.", "5.e3"). Matches iff normalize_for_bigdecimal
|
|
614
690
|
# would change the string — so when it doesn't match, we skip normalization.
|
|
615
691
|
NEEDS_DECIMAL_FIXUP = /\A[+-]?\.|\.(?:[eE]|\z)/.freeze
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
#
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
692
|
+
|
|
693
|
+
# parse_string scans to the next closing-quote-or-backslash. byteindex (Ruby 3.2+,
|
|
694
|
+
# MRI) does that jump at C speed; the getbyte loop in scan_string_delimiter is the
|
|
695
|
+
# portable fallback (JRuby / TruffleRuby / older MRI). Both find the same byte.
|
|
696
|
+
BYTEINDEX_AVAILABLE = "".respond_to?(:byteindex)
|
|
697
|
+
DQUOTE_OR_BACKSLASH = /["\\]/.freeze
|
|
698
|
+
SQUOTE_OR_BACKSLASH = /['\\]/.freeze
|
|
699
|
+
|
|
700
|
+
# scan_quoteless_run's fast path jumps (in C) to the first structural terminator
|
|
701
|
+
# (',' '}' ']' '{' '[') OR any whitespace ([[:space:]] covers ASCII + Unicode space,
|
|
702
|
+
# incl. LF/CR which also terminate). Stopping at a terminator/EOF means the run had no
|
|
703
|
+
# interior whitespace, so there's nothing to trim and no comment marker can apply.
|
|
704
|
+
QL_BREAK = /[,{}\[\]]|[[:space:]]/.freeze
|
|
705
|
+
|
|
706
|
+
# The defaults live centrally in SmarterJSON::Options (lib/smarter_json/options.rb).
|
|
707
|
+
DEFAULT_OPTIONS = Options::DEFAULT_OPTIONS
|
|
628
708
|
|
|
629
709
|
def initialize(input, options = {})
|
|
630
710
|
raise ArgumentError, "input must be a String" unless input.is_a?(String)
|
|
@@ -632,8 +712,13 @@ module SmarterJSON
|
|
|
632
712
|
opts = DEFAULT_OPTIONS.merge(options)
|
|
633
713
|
@symbolize_keys = opts[:symbolize_keys]
|
|
634
714
|
@duplicate_key = opts[:duplicate_key]
|
|
635
|
-
@
|
|
636
|
-
@on_warning
|
|
715
|
+
@decimal_precision = opts[:decimal_precision]
|
|
716
|
+
@on_warning = opts[:on_warning]
|
|
717
|
+
# store_member only needs the (per-member) Hash#key? duplicate lookup when a
|
|
718
|
+
# repeat would change behavior: a warning must fire, or :first_wins must keep the
|
|
719
|
+
# first. With the default (:last_wins, no handler) a duplicate just overwrites,
|
|
720
|
+
# which `hash[k] = value` already does — so skip the lookup entirely.
|
|
721
|
+
@check_duplicates = !@on_warning.nil? || @duplicate_key == :first_wins
|
|
637
722
|
|
|
638
723
|
encoding = opts[:encoding]
|
|
639
724
|
@input = encoding ? input.dup.force_encoding(encoding) : input
|
|
@@ -642,8 +727,6 @@ module SmarterJSON
|
|
|
642
727
|
@bytesize = @input.bytesize
|
|
643
728
|
# Skip a UTF-8 BOM (EF BB BF) at the start of input.
|
|
644
729
|
@pos = @input.getbyte(0) == 0xEF && @input.getbyte(1) == 0xBB && @input.getbyte(2) == 0xBF ? 3 : 0
|
|
645
|
-
@line = 1
|
|
646
|
-
@col = 1
|
|
647
730
|
end
|
|
648
731
|
|
|
649
732
|
# No block: auto-detect the document count for free (the same "is there
|
|
@@ -653,17 +736,14 @@ module SmarterJSON
|
|
|
653
736
|
# value. Commas do NOT separate documents (only whitespace / newline /
|
|
654
737
|
# concatenation do), so a bracketless comma list still raises in parse_document.
|
|
655
738
|
def parse
|
|
656
|
-
|
|
657
|
-
return nil if eof?
|
|
658
|
-
|
|
659
|
-
value = parse_document
|
|
660
|
-
skip_whitespace_and_comments
|
|
661
|
-
return value if eof?
|
|
662
|
-
|
|
663
|
-
results = [value]
|
|
739
|
+
results = []
|
|
664
740
|
until eof?
|
|
665
|
-
|
|
666
|
-
|
|
741
|
+
skip_document_separators
|
|
742
|
+
break if eof?
|
|
743
|
+
|
|
744
|
+
value = parse_document
|
|
745
|
+
enforce_scalar_boundary(value)
|
|
746
|
+
results << value
|
|
667
747
|
end
|
|
668
748
|
results
|
|
669
749
|
end
|
|
@@ -671,13 +751,17 @@ module SmarterJSON
|
|
|
671
751
|
# Yield each top-level value until EOF (JSONL / NDJSON / concatenated /
|
|
672
752
|
# whitespace-separated). Used by the block form of SmarterJSON.process.
|
|
673
753
|
def each_value
|
|
674
|
-
|
|
675
|
-
|
|
754
|
+
count = 0
|
|
755
|
+
until eof?
|
|
756
|
+
skip_document_separators
|
|
676
757
|
break if eof?
|
|
677
758
|
|
|
678
|
-
|
|
759
|
+
value = parse_document
|
|
760
|
+
enforce_scalar_boundary(value)
|
|
761
|
+
yield value
|
|
762
|
+
count += 1
|
|
679
763
|
end
|
|
680
|
-
|
|
764
|
+
count
|
|
681
765
|
end
|
|
682
766
|
|
|
683
767
|
private
|
|
@@ -688,6 +772,48 @@ module SmarterJSON
|
|
|
688
772
|
parse_iter(implicit_root_object_ahead?)
|
|
689
773
|
end
|
|
690
774
|
|
|
775
|
+
# Between top-level documents, whitespace, comments, AND commas all separate
|
|
776
|
+
# (commas collapse like the in-container lenient-comma rule). A space alone never
|
|
777
|
+
# separates — that is handled inside the document by the quoteless run, so
|
|
778
|
+
# `1 2 3` is one document (the string "1 2 3") while `1, 2, 3` is three.
|
|
779
|
+
def skip_document_separators
|
|
780
|
+
skip_whitespace_and_comments
|
|
781
|
+
while byte == COMMA
|
|
782
|
+
advance(1)
|
|
783
|
+
skip_whitespace_and_comments
|
|
784
|
+
end
|
|
785
|
+
end
|
|
786
|
+
|
|
787
|
+
# After a top-level value: a self-delimiting value (object / array / quoted string)
|
|
788
|
+
# may be followed by anything (the next document self-delimits), but a bare scalar
|
|
789
|
+
# (number / keyword) must be followed by a real separator — a newline, ',', a
|
|
790
|
+
# comment, or EOF. A space is NOT a separator, so `1 2 3` and `42 "x" true` raise
|
|
791
|
+
# rather than silently splitting; bare top-level words raise in parse_value itself.
|
|
792
|
+
def enforce_scalar_boundary(value)
|
|
793
|
+
return if value.is_a?(String) || value.is_a?(Hash) || value.is_a?(Array)
|
|
794
|
+
|
|
795
|
+
skip_horizontal_whitespace
|
|
796
|
+
b = byte
|
|
797
|
+
return if b.nil? || b == LF || b == CR || b == COMMA
|
|
798
|
+
return if b == HASH || (b == SLASH && ((c = byte_at(1)) == SLASH || c == STAR))
|
|
799
|
+
|
|
800
|
+
raise error("a top-level number or keyword must be followed by a newline, ',', or end of input")
|
|
801
|
+
end
|
|
802
|
+
|
|
803
|
+
# Skip horizontal whitespace only (space / tab / VT / FF) — NOT newlines, which are
|
|
804
|
+
# document separators. Used by the scalar-boundary check above.
|
|
805
|
+
def skip_horizontal_whitespace
|
|
806
|
+
while (b = byte)
|
|
807
|
+
if b == SPACE || b == TAB || b == 0x0B || b == 0x0C
|
|
808
|
+
advance(1)
|
|
809
|
+
elsif b >= 0x80 && (n = multibyte_ws_len(@pos)).positive?
|
|
810
|
+
@pos += n # multibyte horizontal whitespace (NBSP, U+2000–200A, …)
|
|
811
|
+
else
|
|
812
|
+
break
|
|
813
|
+
end
|
|
814
|
+
end
|
|
815
|
+
end
|
|
816
|
+
|
|
691
817
|
# Iterative container parser — explicit stack, NO Ruby recursion, so nesting
|
|
692
818
|
# is bounded only by memory (like Oj and the C extension's fj_parse_iter),
|
|
693
819
|
# never by the call stack. Mirrors the C driver to keep the two paths in
|
|
@@ -708,9 +834,10 @@ module SmarterJSON
|
|
|
708
834
|
end
|
|
709
835
|
|
|
710
836
|
vss = false # warnings: has a value landed in the current container since the last separator?
|
|
711
|
-
|
|
837
|
+
input = @input # hoisted: @input never changes mid-parse; byte reads inline as input.getbyte(@pos)
|
|
838
|
+
while true
|
|
712
839
|
skip_whitespace_and_comments
|
|
713
|
-
b =
|
|
840
|
+
b = input.getbyte(@pos)
|
|
714
841
|
if at_top
|
|
715
842
|
if b == LBRACE
|
|
716
843
|
advance(1)
|
|
@@ -729,8 +856,17 @@ module SmarterJSON
|
|
|
729
856
|
at_top = false
|
|
730
857
|
vss = false
|
|
731
858
|
elsif b.nil?
|
|
859
|
+
# Defensive guard: parse / each_value check eof? before calling parse_iter,
|
|
860
|
+
# so `at_top` never meets end-of-input here. Kept to mirror the C driver.
|
|
861
|
+
# :nocov:
|
|
732
862
|
raise error("unexpected end of input")
|
|
863
|
+
# :nocov:
|
|
733
864
|
else
|
|
865
|
+
# Top-level scalar: must be a recognized JSON value (number / literal /
|
|
866
|
+
# quoted string). A bare word raises — there are no top-level quoteless
|
|
867
|
+
# strings (Decision 2 = B-broad). In-container quoteless still uses
|
|
868
|
+
# parse_member_value; the scalar-vs-separator boundary is enforced by the
|
|
869
|
+
# parse / each_value loop via enforce_scalar_boundary.
|
|
734
870
|
return parse_value
|
|
735
871
|
end
|
|
736
872
|
elsif b == COMMA
|
|
@@ -758,12 +894,12 @@ module SmarterJSON
|
|
|
758
894
|
else
|
|
759
895
|
key = parse_object_key
|
|
760
896
|
skip_whitespace_and_comments
|
|
761
|
-
raise error("expected ':' after key #{key.inspect}") unless
|
|
897
|
+
raise error("expected ':' after key #{key.inspect}") unless input.getbyte(@pos) == COLON
|
|
762
898
|
|
|
763
899
|
advance(1)
|
|
764
900
|
skip_whitespace_and_comments
|
|
765
|
-
b =
|
|
766
|
-
if
|
|
901
|
+
b = input.getbyte(@pos)
|
|
902
|
+
if b == LBRACE || b == LBRACKET
|
|
767
903
|
child = b == LBRACE ? {} : []
|
|
768
904
|
advance(1) # consume { or [
|
|
769
905
|
store_member(cur, key, child)
|
|
@@ -771,7 +907,7 @@ module SmarterJSON
|
|
|
771
907
|
cur = child
|
|
772
908
|
cur_obj = (b == LBRACE)
|
|
773
909
|
vss = false
|
|
774
|
-
elsif
|
|
910
|
+
elsif b == RBRACE || b == COMMA
|
|
775
911
|
# key with a colon but no value -> null (don't consume } or ,; the loop does)
|
|
776
912
|
store_member(cur, key, nil)
|
|
777
913
|
warn(:empty_value, "key #{key.inspect} had no value — used null") if @on_warning
|
|
@@ -796,7 +932,7 @@ module SmarterJSON
|
|
|
796
932
|
raise error("unterminated array")
|
|
797
933
|
elsif b == RBRACE
|
|
798
934
|
raise error("unexpected '}' — expected ']' or a value")
|
|
799
|
-
elsif
|
|
935
|
+
elsif b == LBRACE || b == LBRACKET
|
|
800
936
|
child = b == LBRACE ? {} : []
|
|
801
937
|
advance(1) # consume { or [
|
|
802
938
|
cur.push(child)
|
|
@@ -818,11 +954,11 @@ module SmarterJSON
|
|
|
818
954
|
b = byte
|
|
819
955
|
return false unless b && key_start_byte?(b)
|
|
820
956
|
|
|
821
|
-
saved =
|
|
957
|
+
saved = @pos
|
|
822
958
|
advance(1) while (c = byte) && key_continue_byte?(c)
|
|
823
959
|
skip_pure_whitespace
|
|
824
960
|
result = (byte == COLON)
|
|
825
|
-
@pos
|
|
961
|
+
@pos = saved
|
|
826
962
|
result
|
|
827
963
|
end
|
|
828
964
|
|
|
@@ -840,46 +976,72 @@ module SmarterJSON
|
|
|
840
976
|
@pos >= @bytesize
|
|
841
977
|
end
|
|
842
978
|
|
|
979
|
+
# Advance the byte cursor by n (clamped to EOF). No line/col bookkeeping — that
|
|
980
|
+
# is computed lazily in line_col_at only when an error/warning is built. This is
|
|
981
|
+
# the hot-path primitive every consumed byte goes through, so it stays O(1) with
|
|
982
|
+
# no block, no re-read, and no per-byte branching. Mirrors the C fj_advance.
|
|
843
983
|
def advance(n = 1)
|
|
844
|
-
n
|
|
845
|
-
|
|
846
|
-
|
|
984
|
+
@pos += n
|
|
985
|
+
@pos = @bytesize if @pos > @bytesize
|
|
986
|
+
end
|
|
847
987
|
|
|
988
|
+
# Line and 1-based BYTE column at byte position `pos`, computed lazily by scanning
|
|
989
|
+
# from the start of the buffer — only on the cold path (error / warning / triple-quote
|
|
990
|
+
# indent), never per byte. CR, LF, and CRLF each count as one newline; the column is
|
|
991
|
+
# the byte offset within the line. Mirrors the C extension's fj_line_col so both paths
|
|
992
|
+
# report identical positions.
|
|
993
|
+
def line_col_at(pos = @pos)
|
|
994
|
+
limit = pos < @bytesize ? pos : @bytesize
|
|
995
|
+
line = 1
|
|
996
|
+
col = 1
|
|
997
|
+
i = 0
|
|
998
|
+
while i < limit
|
|
999
|
+
b = @input.getbyte(i)
|
|
848
1000
|
if b == LF
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
@pos += 1
|
|
1001
|
+
line += 1
|
|
1002
|
+
col = 1
|
|
852
1003
|
elsif b == CR
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
@pos += 1 if @input.getbyte(@pos) == LF
|
|
1004
|
+
line += 1
|
|
1005
|
+
col = 1
|
|
1006
|
+
i += 1 if i + 1 < @bytesize && @input.getbyte(i + 1) == LF
|
|
857
1007
|
else
|
|
858
|
-
|
|
859
|
-
@pos += 1
|
|
1008
|
+
col += 1
|
|
860
1009
|
end
|
|
1010
|
+
i += 1
|
|
861
1011
|
end
|
|
1012
|
+
[line, col]
|
|
1013
|
+
end
|
|
1014
|
+
|
|
1015
|
+
# 1-based byte column at `pos` (bytes since the last line start). Used for
|
|
1016
|
+
# triple-quoted-string indentation stripping. Mirrors the C fj_column.
|
|
1017
|
+
def column_at(pos = @pos)
|
|
1018
|
+
c = 1
|
|
1019
|
+
i = pos - 1
|
|
1020
|
+
while i >= 0 && (b = @input.getbyte(i)) != LF && b != CR
|
|
1021
|
+
c += 1
|
|
1022
|
+
i -= 1
|
|
1023
|
+
end
|
|
1024
|
+
c
|
|
862
1025
|
end
|
|
863
1026
|
|
|
864
1027
|
# --- whitespace (Unicode [[:space:]] / Rails blank?; see smarter_json.md §4.7) ---
|
|
865
1028
|
|
|
866
1029
|
def skip_pure_whitespace
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
1030
|
+
input = @input
|
|
1031
|
+
pos = @pos
|
|
1032
|
+
while (b = input.getbyte(pos))
|
|
871
1033
|
if b == SPACE || (b >= TAB && b <= CR) # 0x20, or 0x09..0x0D
|
|
872
|
-
|
|
1034
|
+
pos += 1
|
|
873
1035
|
elsif b >= 0x80
|
|
874
|
-
n = multibyte_ws_len(
|
|
1036
|
+
n = multibyte_ws_len(pos)
|
|
875
1037
|
break if n.zero?
|
|
876
1038
|
|
|
877
|
-
|
|
878
|
-
@col += 1
|
|
1039
|
+
pos += n
|
|
879
1040
|
else
|
|
880
1041
|
break
|
|
881
1042
|
end
|
|
882
1043
|
end
|
|
1044
|
+
@pos = pos
|
|
883
1045
|
end
|
|
884
1046
|
|
|
885
1047
|
# Number of bytes of the Unicode-whitespace char starting at pos, or 0.
|
|
@@ -913,19 +1075,20 @@ module SmarterJSON
|
|
|
913
1075
|
# A '#', '//', or '/*' starts a comment only when preceded by whitespace
|
|
914
1076
|
# or at the very start of input (the comment-marker rule).
|
|
915
1077
|
def skip_whitespace_and_comments
|
|
916
|
-
|
|
1078
|
+
while true
|
|
917
1079
|
skip_pure_whitespace
|
|
918
1080
|
b = byte
|
|
919
|
-
|
|
1081
|
+
if b == HASH
|
|
1082
|
+
break unless preceded_by_ws_or_start?
|
|
920
1083
|
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
1084
|
+
skip_to_eol
|
|
1085
|
+
elsif b == SLASH
|
|
1086
|
+
c = byte_at(1)
|
|
1087
|
+
break unless (c == SLASH || c == STAR) && preceded_by_ws_or_start?
|
|
924
1088
|
|
|
925
|
-
|
|
926
|
-
skip_block_comment
|
|
1089
|
+
c == STAR ? skip_block_comment : skip_to_eol
|
|
927
1090
|
else
|
|
928
|
-
|
|
1091
|
+
break
|
|
929
1092
|
end
|
|
930
1093
|
end
|
|
931
1094
|
end
|
|
@@ -965,8 +1128,9 @@ module SmarterJSON
|
|
|
965
1128
|
# --- values ---
|
|
966
1129
|
|
|
967
1130
|
# Top-level / strict value: no quoteless fallback.
|
|
1131
|
+
# Precondition: callers (parse_iter) have already run skip_whitespace_and_comments,
|
|
1132
|
+
# so @pos is at the value's first byte — no leading skip needed here.
|
|
968
1133
|
def parse_value
|
|
969
|
-
skip_whitespace_and_comments
|
|
970
1134
|
raise error("unexpected end of input") if eof?
|
|
971
1135
|
|
|
972
1136
|
b = byte
|
|
@@ -999,8 +1163,9 @@ module SmarterJSON
|
|
|
999
1163
|
end
|
|
1000
1164
|
|
|
1001
1165
|
# Value in object-value or array-element position: quoteless allowed.
|
|
1166
|
+
# Precondition: callers (parse_iter) have already run skip_whitespace_and_comments,
|
|
1167
|
+
# so @pos is at the value's first byte — no leading skip needed here.
|
|
1002
1168
|
def parse_member_value
|
|
1003
|
-
skip_whitespace_and_comments
|
|
1004
1169
|
raise error("unexpected end of input") if eof?
|
|
1005
1170
|
|
|
1006
1171
|
b = byte
|
|
@@ -1033,7 +1198,7 @@ module SmarterJSON
|
|
|
1033
1198
|
until eof?
|
|
1034
1199
|
if @input.getbyte(@pos) == 0xE2 && @input.getbyte(@pos + 1) == 0x80 &&
|
|
1035
1200
|
closers.include?(@input.getbyte(@pos + 2))
|
|
1036
|
-
result = @input.byteslice(start, @pos - start)
|
|
1201
|
+
result = @input.byteslice(start, @pos - start) # byteslice preserves @input's encoding
|
|
1037
1202
|
advance(3)
|
|
1038
1203
|
return result
|
|
1039
1204
|
end
|
|
@@ -1044,9 +1209,7 @@ module SmarterJSON
|
|
|
1044
1209
|
|
|
1045
1210
|
def store_member(hash, key, value)
|
|
1046
1211
|
k = @symbolize_keys ? key.to_sym : key
|
|
1047
|
-
if hash.key?(k)
|
|
1048
|
-
raise error("duplicate key #{k.inspect}") if @duplicate_key == :raise
|
|
1049
|
-
|
|
1212
|
+
if @check_duplicates && hash.key?(k)
|
|
1050
1213
|
warn(:duplicate_key, "duplicate key #{k.inspect} — #{@duplicate_key}") if @on_warning
|
|
1051
1214
|
return if @duplicate_key == :first_wins
|
|
1052
1215
|
end
|
|
@@ -1077,51 +1240,77 @@ module SmarterJSON
|
|
|
1077
1240
|
start = @pos
|
|
1078
1241
|
advance(1)
|
|
1079
1242
|
advance(1) while (b = byte) && key_continue_byte?(b)
|
|
1080
|
-
@input.byteslice(start, @pos - start)
|
|
1243
|
+
@input.byteslice(start, @pos - start) # byteslice preserves @input's encoding
|
|
1081
1244
|
end
|
|
1082
1245
|
|
|
1083
1246
|
# --- quoteless strings & literal classification ---
|
|
1084
1247
|
|
|
1085
1248
|
def parse_quoteless_or_literal
|
|
1086
1249
|
start = @pos
|
|
1087
|
-
scan_quoteless_run
|
|
1250
|
+
value_end = scan_quoteless_run
|
|
1088
1251
|
# A quoteless run must consume at least one byte. If the first byte is a
|
|
1089
1252
|
# delimiter (',' '}' ']'), the run is empty and @pos didn't move — returning
|
|
1090
1253
|
# here would make the caller's `result << parse_member_value` loop forever.
|
|
1091
1254
|
# Raise instead (correct today: the Lenient Commas Option is not adopted).
|
|
1092
1255
|
raise error("expected a value") if @pos == start
|
|
1093
1256
|
|
|
1094
|
-
|
|
1095
|
-
|
|
1257
|
+
# value_end is the end of the last non-whitespace char in the run; slicing to it
|
|
1258
|
+
# drops trailing whitespace without a regex (the caller already skipped leading
|
|
1259
|
+
# whitespace, so there is none to trim at the front). Equivalent to the old
|
|
1260
|
+
# trim_blank(raw) but with no per-scalar String#sub allocations.
|
|
1261
|
+
raw = @input.byteslice(start, value_end - start) # byteslice preserves @input's encoding
|
|
1262
|
+
classify_quoteless(raw)
|
|
1096
1263
|
end
|
|
1097
1264
|
|
|
1098
1265
|
# Advance to the end of a quoteless run. Stops at structural punctuation
|
|
1099
|
-
# (',' '}' ']'
|
|
1100
|
-
#
|
|
1266
|
+
# (',' '{' '}' '[' ']' — openers terminate symmetrically with closers, so a
|
|
1267
|
+
# self-delimiting value starts fresh: `localhost {"a":1}` -> ["localhost", {...}]),
|
|
1268
|
+
# a newline, EOF, or a comment marker that is preceded by whitespace. Spaces by
|
|
1269
|
+
# themselves are not delimiters.
|
|
1270
|
+
# Advance @pos to the end of the quoteless run (including any trailing whitespace,
|
|
1271
|
+
# so the parser resumes correctly after the value). Returns value_end: the byte
|
|
1272
|
+
# offset just past the last NON-whitespace char, so the caller can slice off
|
|
1273
|
+
# trailing whitespace without a regex.
|
|
1101
1274
|
def scan_quoteless_run
|
|
1275
|
+
input = @input
|
|
1276
|
+
pos = @pos
|
|
1277
|
+
# Fast path: one C-level byteindex jumps to the first structural terminator or
|
|
1278
|
+
# whitespace. If it lands on a terminator (or EOF) the run had no interior whitespace,
|
|
1279
|
+
# so [pos, hit) is the whole value — value_end == hit (no trailing trim) and no comment
|
|
1280
|
+
# marker can apply (those only break after whitespace). This is the common case
|
|
1281
|
+
# (numbers and simple tokens). Anything with whitespace falls to the byte-by-byte loop.
|
|
1282
|
+
if BYTEINDEX_AVAILABLE
|
|
1283
|
+
hit = input.byteindex(QL_BREAK, pos) || @bytesize
|
|
1284
|
+
b = hit < @bytesize ? input.getbyte(hit) : nil
|
|
1285
|
+
if b.nil? || b == COMMA || b == RBRACE || b == RBRACKET || b == LBRACE || b == LBRACKET || b == LF || b == CR
|
|
1286
|
+
@pos = hit
|
|
1287
|
+
return hit
|
|
1288
|
+
end
|
|
1289
|
+
end
|
|
1290
|
+
|
|
1291
|
+
# Slow path: the run contains whitespace — scan byte by byte to honor interior
|
|
1292
|
+
# whitespace, trailing-whitespace trimming (value_end is the end of the last
|
|
1293
|
+
# non-whitespace char), and the comment-marker-after-whitespace rule.
|
|
1294
|
+
value_end = pos
|
|
1102
1295
|
prev_ws = false
|
|
1103
|
-
|
|
1104
|
-
b
|
|
1105
|
-
break if b.
|
|
1106
|
-
break if [COMMA, RBRACE, RBRACKET, LF, CR].include?(b)
|
|
1107
|
-
break if prev_ws && (b == HASH || (b == SLASH && [SLASH, STAR].include?(byte_at(1))))
|
|
1296
|
+
while (b = input.getbyte(pos))
|
|
1297
|
+
break if b == COMMA || b == RBRACE || b == RBRACKET || b == LBRACE || b == LBRACKET || b == LF || b == CR
|
|
1298
|
+
break if prev_ws && (b == HASH || (b == SLASH && ((c = input.getbyte(pos + 1)) == SLASH || c == STAR)))
|
|
1108
1299
|
|
|
1109
1300
|
if b == SPACE || (b >= TAB && b <= CR) # tab/VT/FF/space (LF/CR already broke)
|
|
1110
1301
|
prev_ws = true
|
|
1111
|
-
|
|
1112
|
-
elsif b >= 0x80 && (n = multibyte_ws_len(
|
|
1302
|
+
pos += 1
|
|
1303
|
+
elsif b >= 0x80 && (n = multibyte_ws_len(pos)).positive?
|
|
1113
1304
|
prev_ws = true
|
|
1114
|
-
|
|
1115
|
-
@col += 1
|
|
1305
|
+
pos += n
|
|
1116
1306
|
else
|
|
1117
1307
|
prev_ws = false
|
|
1118
|
-
|
|
1308
|
+
pos += 1
|
|
1309
|
+
value_end = pos
|
|
1119
1310
|
end
|
|
1120
1311
|
end
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
def trim_blank(str)
|
|
1124
|
-
str.sub(BLANK_HEAD, "").sub(BLANK_TAIL, "")
|
|
1312
|
+
@pos = pos
|
|
1313
|
+
value_end
|
|
1125
1314
|
end
|
|
1126
1315
|
|
|
1127
1316
|
def classify_quoteless(str)
|
|
@@ -1132,7 +1321,7 @@ module SmarterJSON
|
|
|
1132
1321
|
when "undefined" then return nil
|
|
1133
1322
|
when "NaN" then return Float::NAN
|
|
1134
1323
|
when "Infinity", "+Infinity" then return Float::INFINITY
|
|
1135
|
-
when "-Infinity" then return
|
|
1324
|
+
when "-Infinity" then return -Float::INFINITY
|
|
1136
1325
|
end
|
|
1137
1326
|
num = numeric_value(str)
|
|
1138
1327
|
num.equal?(NOT_NUMERIC) ? str : num
|
|
@@ -1140,31 +1329,73 @@ module SmarterJSON
|
|
|
1140
1329
|
|
|
1141
1330
|
# Returns an Integer/Float, or NOT_NUMERIC if the whole token isn't a number.
|
|
1142
1331
|
def numeric_value(str)
|
|
1143
|
-
|
|
1144
|
-
|
|
1332
|
+
# Cheap hex gate: only invoke HEX_RE when the token actually looks like [+-]?0x… .
|
|
1333
|
+
# A Regexp#match? has real per-call cost; almost no number is hex, so the 1–3 byte
|
|
1334
|
+
# check skips that call on the common path (measured +21% on long-token decimals).
|
|
1335
|
+
if hex_prefix?(str) && HEX_RE.match?(str)
|
|
1336
|
+
neg = str.getbyte(0) == MINUS
|
|
1145
1337
|
body = str.sub(/\A[-+]/, "").delete("_") # "0x...."
|
|
1146
1338
|
v = body[2..-1].to_i(16)
|
|
1147
1339
|
return neg ? -v : v
|
|
1148
1340
|
end
|
|
1149
1341
|
return NOT_NUMERIC unless DEC_RE.match?(str) && str.match?(/[0-9]/)
|
|
1150
1342
|
|
|
1151
|
-
|
|
1343
|
+
# delete("_") allocates a fresh string even when there is nothing to delete; on long
|
|
1344
|
+
# number tokens that is a real per-value allocation. Underscores are rare, so only
|
|
1345
|
+
# pay it when the token actually contains one (measured +27% on long-token decimals).
|
|
1346
|
+
body = str.include?("_") ? str.delete("_") : str
|
|
1152
1347
|
body.match?(/[.eE]/) ? decimal_value(body) : body.to_i
|
|
1153
1348
|
end
|
|
1154
1349
|
|
|
1155
|
-
#
|
|
1350
|
+
# True when the token starts with [+-]?0[xX] — the only shape HEX_RE can match.
|
|
1351
|
+
def hex_prefix?(str)
|
|
1352
|
+
c0 = str.getbyte(0)
|
|
1353
|
+
if c0 == ZERO
|
|
1354
|
+
x = str.getbyte(1)
|
|
1355
|
+
x == LOWER_X || x == UPPER_X
|
|
1356
|
+
elsif c0 == MINUS || c0 == PLUS
|
|
1357
|
+
str.getbyte(1) == ZERO && ((x = str.getbyte(2)) == LOWER_X || x == UPPER_X)
|
|
1358
|
+
else
|
|
1359
|
+
false
|
|
1360
|
+
end
|
|
1361
|
+
end
|
|
1362
|
+
|
|
1363
|
+
# A decimal (has '.' or exponent). decimal_precision: :float -> Float,
|
|
1156
1364
|
# :bigdecimal -> BigDecimal, :auto -> BigDecimal when the mantissa has more
|
|
1157
1365
|
# than 16 significant digits (Oj's DEC_MAX threshold), else Float.
|
|
1158
1366
|
def decimal_value(body)
|
|
1159
|
-
case @
|
|
1367
|
+
case @decimal_precision
|
|
1160
1368
|
when :float then body.to_f
|
|
1161
1369
|
when :bigdecimal then to_big_decimal(body)
|
|
1162
1370
|
else significant_digits(body) > 16 ? to_big_decimal(body) : body.to_f
|
|
1163
1371
|
end
|
|
1164
1372
|
end
|
|
1165
1373
|
|
|
1374
|
+
# Count significant mantissa digits (leading zeros excluded, exponent ignored) to pick
|
|
1375
|
+
# Float vs BigDecimal in :auto mode. A single byte-scan — the old three-regex version
|
|
1376
|
+
# (strip exponent, strip non-digits, strip leading zeros, .length) ran on every float
|
|
1377
|
+
# and dominated the number path's cost. body is a DEC_RE-validated token (digits, at most
|
|
1378
|
+
# one '.', optional sign, optional e/E exponent), underscores already removed.
|
|
1166
1379
|
def significant_digits(body)
|
|
1167
|
-
|
|
1380
|
+
count = 0
|
|
1381
|
+
leading = true
|
|
1382
|
+
i = 0
|
|
1383
|
+
n = body.bytesize
|
|
1384
|
+
while i < n
|
|
1385
|
+
b = body.getbyte(i)
|
|
1386
|
+
i += 1
|
|
1387
|
+
break if b == LOWER_E || b == UPPER_E # exponent: its digits aren't significant
|
|
1388
|
+
|
|
1389
|
+
next unless b >= ZERO && b <= NINE # skip sign and the decimal point
|
|
1390
|
+
|
|
1391
|
+
if leading && b == ZERO
|
|
1392
|
+
next # leading zero (incl. those after '.') — not significant
|
|
1393
|
+
else
|
|
1394
|
+
leading = false
|
|
1395
|
+
count += 1
|
|
1396
|
+
end
|
|
1397
|
+
end
|
|
1398
|
+
count
|
|
1168
1399
|
end
|
|
1169
1400
|
|
|
1170
1401
|
def to_big_decimal(body)
|
|
@@ -1175,7 +1406,11 @@ module SmarterJSON
|
|
|
1175
1406
|
body = normalize_for_bigdecimal(body) if NEEDS_DECIMAL_FIXUP.match?(body)
|
|
1176
1407
|
BigDecimal(body)
|
|
1177
1408
|
rescue ArgumentError
|
|
1409
|
+
# Defensive: BigDecimal() does not reject a DEC_RE-validated, normalized token,
|
|
1410
|
+
# so this fallback is unreachable from valid input. Kept as a safety net.
|
|
1411
|
+
# :nocov:
|
|
1178
1412
|
body.to_f
|
|
1413
|
+
# :nocov:
|
|
1179
1414
|
end
|
|
1180
1415
|
|
|
1181
1416
|
# BigDecimal() rejects a bare leading/trailing dot (".5", "5.", "5.e3").
|
|
@@ -1194,7 +1429,7 @@ module SmarterJSON
|
|
|
1194
1429
|
end
|
|
1195
1430
|
|
|
1196
1431
|
def parse_triple_quoted
|
|
1197
|
-
indent = @
|
|
1432
|
+
indent = column_at(@pos) - 1
|
|
1198
1433
|
advance(3)
|
|
1199
1434
|
raw_start = @pos
|
|
1200
1435
|
until eof?
|
|
@@ -1204,7 +1439,7 @@ module SmarterJSON
|
|
|
1204
1439
|
end
|
|
1205
1440
|
raise error("unterminated triple-quoted string") if eof?
|
|
1206
1441
|
|
|
1207
|
-
raw = @input.byteslice(raw_start, @pos - raw_start)
|
|
1442
|
+
raw = @input.byteslice(raw_start, @pos - raw_start) # byteslice preserves @input's encoding
|
|
1208
1443
|
advance(3)
|
|
1209
1444
|
strip_triple(raw, indent)
|
|
1210
1445
|
end
|
|
@@ -1234,20 +1469,30 @@ module SmarterJSON
|
|
|
1234
1469
|
def parse_string(quote)
|
|
1235
1470
|
advance(1)
|
|
1236
1471
|
start = @pos
|
|
1237
|
-
|
|
1472
|
+
# Fast path (the common case — a string with no escapes): jump straight to the
|
|
1473
|
+
# closing quote with byteindex. It is called only here, from `start`, which is
|
|
1474
|
+
# always a character boundary, so byteindex never sees a mid-char offset.
|
|
1475
|
+
hit = scan_string_delimiter(quote)
|
|
1476
|
+
raise error("unterminated string") if hit.nil?
|
|
1477
|
+
|
|
1478
|
+
if @input.getbyte(hit) == quote
|
|
1479
|
+
@pos = hit
|
|
1480
|
+
result = @input.byteslice(start, @pos - start) # byteslice preserves @input's encoding
|
|
1481
|
+
advance(1)
|
|
1482
|
+
return result
|
|
1483
|
+
end
|
|
1484
|
+
|
|
1485
|
+
# Escape path: a backslash precedes the closing quote. Scan byte by byte from
|
|
1486
|
+
# here — byteindex can't be used past a backslash (a lenient \<multibyte> would
|
|
1487
|
+
# leave @pos mid-character), and this lets the decoder flag invalid escapes
|
|
1488
|
+
# exactly as before. decode_string_with_escapes handles the whole [start, finish].
|
|
1489
|
+
@pos = hit
|
|
1238
1490
|
while (b = byte)
|
|
1239
1491
|
if b == quote
|
|
1240
|
-
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
return decoded
|
|
1244
|
-
else
|
|
1245
|
-
result = @input.byteslice(start, @pos - start).force_encoding(@input.encoding)
|
|
1246
|
-
advance(1)
|
|
1247
|
-
return result
|
|
1248
|
-
end
|
|
1492
|
+
decoded = decode_string_with_escapes(start, @pos, quote)
|
|
1493
|
+
advance(1)
|
|
1494
|
+
return decoded
|
|
1249
1495
|
elsif b == BACKSLASH
|
|
1250
|
-
has_escape = true
|
|
1251
1496
|
advance(1)
|
|
1252
1497
|
raise error("unterminated string escape") if eof?
|
|
1253
1498
|
|
|
@@ -1259,6 +1504,20 @@ module SmarterJSON
|
|
|
1259
1504
|
raise error("unterminated string")
|
|
1260
1505
|
end
|
|
1261
1506
|
|
|
1507
|
+
# Byte index of the next closing quote or backslash at/after @pos, or nil if
|
|
1508
|
+
# neither occurs before EOF. byteindex scans inside MRI's C; the fallback is a
|
|
1509
|
+
# tight getbyte loop (the ASCII delimiters never alias UTF-8 continuation bytes,
|
|
1510
|
+
# so byte scanning is correct for UTF-8 string content).
|
|
1511
|
+
def scan_string_delimiter(quote)
|
|
1512
|
+
if BYTEINDEX_AVAILABLE
|
|
1513
|
+
@input.byteindex(quote == DQUOTE ? DQUOTE_OR_BACKSLASH : SQUOTE_OR_BACKSLASH, @pos)
|
|
1514
|
+
else
|
|
1515
|
+
i = @pos
|
|
1516
|
+
i += 1 while i < @bytesize && (b = @input.getbyte(i)) != quote && b != BACKSLASH
|
|
1517
|
+
i < @bytesize ? i : nil
|
|
1518
|
+
end
|
|
1519
|
+
end
|
|
1520
|
+
|
|
1262
1521
|
def decode_string_with_escapes(start, finish, _quote)
|
|
1263
1522
|
buf = String.new(encoding: Encoding::ASCII_8BIT)
|
|
1264
1523
|
i = start
|
|
@@ -1350,7 +1609,7 @@ module SmarterJSON
|
|
|
1350
1609
|
|
|
1351
1610
|
if byte == ZERO
|
|
1352
1611
|
advance(1)
|
|
1353
|
-
if
|
|
1612
|
+
if (x = byte) == LOWER_X || x == UPPER_X
|
|
1354
1613
|
advance(1)
|
|
1355
1614
|
hex_start = @pos
|
|
1356
1615
|
advance(1) while (b = byte) && (hex_digit?(b) || b == UNDERSCORE)
|
|
@@ -1375,10 +1634,10 @@ module SmarterJSON
|
|
|
1375
1634
|
advance(1) while (b = byte) && ((b >= ZERO && b <= NINE) || b == UNDERSCORE)
|
|
1376
1635
|
end
|
|
1377
1636
|
|
|
1378
|
-
if
|
|
1637
|
+
if (e = byte) == LOWER_E || e == UPPER_E
|
|
1379
1638
|
is_float = true
|
|
1380
1639
|
advance(1)
|
|
1381
|
-
advance(1) if
|
|
1640
|
+
advance(1) if (s = byte) == PLUS || s == MINUS
|
|
1382
1641
|
raise error("invalid number: expected digits in exponent") unless byte && byte >= ZERO && byte <= NINE
|
|
1383
1642
|
|
|
1384
1643
|
advance(1) while (b = byte) && ((b >= ZERO && b <= NINE) || b == UNDERSCORE)
|
|
@@ -1414,11 +1673,13 @@ module SmarterJSON
|
|
|
1414
1673
|
def warn(type, message)
|
|
1415
1674
|
return unless @on_warning
|
|
1416
1675
|
|
|
1417
|
-
|
|
1676
|
+
line, col = line_col_at(@pos)
|
|
1677
|
+
@on_warning.call(Warning.new(type, message, line, col))
|
|
1418
1678
|
end
|
|
1419
1679
|
|
|
1420
1680
|
def error(message)
|
|
1421
|
-
|
|
1681
|
+
line, col = line_col_at(@pos)
|
|
1682
|
+
ParseError.new(message, line, col)
|
|
1422
1683
|
end
|
|
1423
1684
|
|
|
1424
1685
|
def display_byte(b)
|