json-repair 0.11.3 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/lib/json/repair/version.rb +1 -1
- data/lib/json/repairer.rb +29 -7
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: aef10e86ea82fb56d9666ad7470317cf751f00730ef51910094ce8b2ee876a53
|
|
4
|
+
data.tar.gz: 682f0cdacc02896687e6c39e534f92f0beda52110679da8b7b7f43721aa6c2a4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 572225e5c09ac6ab7795d21d179e9ac07bc2c3bffedb5f1df48afa5a33ab8a73923af90429730532a822f3a78004181f3a9d2d7a2bddf17b9b649819b169ec65
|
|
7
|
+
data.tar.gz: a62311d2002b538b81132f6efb8525e6c6801baaa5a8f9eac020abbc2726e5ed783ee2719aeed9dc77301492f869f79deb807b9eee74f3a56e0a37d4712b7279
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,36 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
### 2026-06-12 (0.12.0)
|
|
4
|
+
|
|
5
|
+
* Repair the three known input families that raised `Internal error:
|
|
6
|
+
repaired output is not valid JSON` — cases where upstream
|
|
7
|
+
[jsonrepair](https://github.com/josdejong/jsonrepair) (v3.14.0, still
|
|
8
|
+
its latest release) emits invalid JSON and this gem's canonical
|
|
9
|
+
re-serialize guard caught it but blamed the Repairer. All three are
|
|
10
|
+
deliberate divergences from upstream, commented at each site:
|
|
11
|
+
* A stray `e`/`E` with no mantissa is now an unquoted string instead
|
|
12
|
+
of an empty-mantissa exponent: `[e]` → `["e"]`, `[e5]` → `["e5"]`,
|
|
13
|
+
`[truee]` → `[true,"e"]`, `{"k": e}` → `{"k":"e"}` (upstream emits
|
|
14
|
+
`e0` / raw `e5`). Numbers truncated at a real exponent (`[2e]` →
|
|
15
|
+
`[2.0]`) are unchanged.
|
|
16
|
+
* Negative leading-zero numbers are quoted like positive ones:
|
|
17
|
+
`{"n": -05}` → `{"n":"-05"}`, matching the existing `{"n": 05}` →
|
|
18
|
+
`{"n":"05"}` (upstream emits `-05` unrepaired). The same rule now
|
|
19
|
+
also covers the truncated-number repair, which bypassed it:
|
|
20
|
+
`[05e]` → `["05e0"]`, `00.` → `"00.0"` (upstream emits `05e0` /
|
|
21
|
+
`00.0` unrepaired). Valid `-0` / `-0.5` / `0e` / `0.` are
|
|
22
|
+
unchanged.
|
|
23
|
+
* The trailing-comma repair no longer strips a comma belonging to the
|
|
24
|
+
enclosing container when an inner object/array fails on its first
|
|
25
|
+
key or value: `[{{]` → `[{},{}]`, `[1,[}]` → `[1,[]]`,
|
|
26
|
+
`{"a": 1, "b": [}` → `{"a":1,"b":[]}` (upstream emits `[{}{}]`,
|
|
27
|
+
`[1[]]`, `{"a": 1 "b": []}`).
|
|
28
|
+
Validated by differential testing against upstream over a 270-input
|
|
29
|
+
grid of these shapes in every container context: the only behavior
|
|
30
|
+
changes vs 0.11.3 are the 123 previously-`Internal error` inputs now
|
|
31
|
+
repairing (or, for `e+` shapes where upstream emits invalid `e+0`,
|
|
32
|
+
raising a clean position-bearing error). Benchmarks flat.
|
|
33
|
+
|
|
3
34
|
### 2026-06-12 (0.11.3)
|
|
4
35
|
|
|
5
36
|
* Fix infinite recursion (`SystemStackError`) on a quoted string
|
data/lib/json/repair/version.rb
CHANGED
data/lib/json/repairer.rb
CHANGED
|
@@ -237,6 +237,7 @@ module JSON
|
|
|
237
237
|
|
|
238
238
|
initial = true
|
|
239
239
|
while @index < @json.length && @json[@index] != CLOSING_BRACE
|
|
240
|
+
first_pair = initial
|
|
240
241
|
if initial
|
|
241
242
|
initial = false
|
|
242
243
|
else
|
|
@@ -255,8 +256,12 @@ module JSON
|
|
|
255
256
|
if @json[@index] == CLOSING_BRACE || @json[@index] == OPENING_BRACE ||
|
|
256
257
|
@json[@index] == CLOSING_BRACKET || @json[@index] == OPENING_BRACKET ||
|
|
257
258
|
@json[@index].nil?
|
|
258
|
-
# repair trailing comma
|
|
259
|
-
|
|
259
|
+
# repair trailing comma — but only the one this object's own loop
|
|
260
|
+
# emitted or inserted; on the first pair the buffer's last
|
|
261
|
+
# comma belongs to the enclosing container, like in [{{] or
|
|
262
|
+
# {"a": 1, "b": {] (divergence from upstream, which strips
|
|
263
|
+
# the parent's comma and emits invalid JSON like [{}{}])
|
|
264
|
+
@output = strip_last_occurrence(@output, ',') unless first_pair
|
|
260
265
|
else
|
|
261
266
|
throw_object_key_expected
|
|
262
267
|
end
|
|
@@ -738,7 +743,13 @@ module JSON
|
|
|
738
743
|
@index += 1 while digit?(@json[@index])
|
|
739
744
|
end
|
|
740
745
|
|
|
741
|
-
|
|
746
|
+
# Divergence from upstream: only enter the exponent branch when a
|
|
747
|
+
# mantissa was consumed — at this point @index > start implies at
|
|
748
|
+
# least one digit (the '-' and '.' paths reset otherwise). Upstream
|
|
749
|
+
# accepts a bare "e"/"E" here and emits invalid JSON like `e0` or
|
|
750
|
+
# raw `e5`; declining lets the token fall through to
|
|
751
|
+
# parse_unquoted_string, matching how "-e5" already becomes "-e5".
|
|
752
|
+
if @index > start && @json[@index] && @json[@index].downcase == 'e'
|
|
742
753
|
@index += 1
|
|
743
754
|
@index += 1 if ['-', '+'].include?(@json[@index])
|
|
744
755
|
if at_end_of_number?
|
|
@@ -761,7 +772,9 @@ module JSON
|
|
|
761
772
|
if @index > start
|
|
762
773
|
# repair a number with leading zeros like "00789"
|
|
763
774
|
num = @json[start...@index]
|
|
764
|
-
|
|
775
|
+
# the optional sign quotes "-05" like "05" (divergence from
|
|
776
|
+
# upstream, whose unsigned check lets "-05" through unrepaired)
|
|
777
|
+
has_invalid_leading_zero = num.match?(/^-?0\d/)
|
|
765
778
|
|
|
766
779
|
@output << (has_invalid_leading_zero ? "\"#{num}\"" : repair_leading_dot_number(num))
|
|
767
780
|
return true
|
|
@@ -786,6 +799,7 @@ module JSON
|
|
|
786
799
|
|
|
787
800
|
initial = true
|
|
788
801
|
while @index < @json.length && @json[@index] != CLOSING_BRACKET
|
|
802
|
+
first_item = initial
|
|
789
803
|
if initial
|
|
790
804
|
initial = false
|
|
791
805
|
else
|
|
@@ -799,8 +813,12 @@ module JSON
|
|
|
799
813
|
processed_value = parse_value
|
|
800
814
|
next if processed_value
|
|
801
815
|
|
|
802
|
-
# repair trailing comma
|
|
803
|
-
|
|
816
|
+
# repair trailing comma — but only the one this array's own loop
|
|
817
|
+
# emitted or inserted; on the first item the buffer's last
|
|
818
|
+
# comma belongs to the enclosing container, like in [1,[}] or
|
|
819
|
+
# {"a": 1, "b": [} (divergence from upstream, which strips
|
|
820
|
+
# the parent's comma and emits invalid JSON like [1[]])
|
|
821
|
+
@output = strip_last_occurrence(@output, ',') unless first_item
|
|
804
822
|
break
|
|
805
823
|
end
|
|
806
824
|
|
|
@@ -859,7 +877,11 @@ module JSON
|
|
|
859
877
|
# repair numbers cut off at the end
|
|
860
878
|
# this will only be called when we end after a '.', '-', or 'e' and does not
|
|
861
879
|
# change the number more than it needs to make it valid JSON
|
|
862
|
-
|
|
880
|
+
num = "#{@json[start...@index]}0"
|
|
881
|
+
# quote a padded token that has an invalid leading zero, like "05e" ->
|
|
882
|
+
# "05e0", applying the same rule as the end of parse_number (divergence
|
|
883
|
+
# from upstream, which emits the invalid number raw)
|
|
884
|
+
@output << (num.match?(/^-?0\d/) ? "\"#{num}\"" : repair_leading_dot_number(num))
|
|
863
885
|
end
|
|
864
886
|
|
|
865
887
|
# Repair a number missing its digit before the decimal point, like ".5"
|