json-repair 0.11.3 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 69085d74f416811c4ac11ca7cfe2e9545a6cecdaeb32de96532932c99ab4aaf3
4
- data.tar.gz: 4deee8e6715200ae693144a2c8cab914b9e8c78c3b539671f7632ce39d7b77f3
3
+ metadata.gz: aef10e86ea82fb56d9666ad7470317cf751f00730ef51910094ce8b2ee876a53
4
+ data.tar.gz: 682f0cdacc02896687e6c39e534f92f0beda52110679da8b7b7f43721aa6c2a4
5
5
  SHA512:
6
- metadata.gz: 31242bd165c070b1836d85a3fca120b5853a6e1ed715dbaeb75aa026563e033a7af491e318c119f3f3f899c4a7bb55382fc998372d7b55953358dc95d9c526be
7
- data.tar.gz: b8a5a58a36d1c2b36922205f2b3b8d33e89e570fc215ed02ef288577a8d1325d96fa21cda9267ca78f44754b00a3150e5f6d149d6087c9f020e23090d47a13f2
6
+ metadata.gz: 572225e5c09ac6ab7795d21d179e9ac07bc2c3bffedb5f1df48afa5a33ab8a73923af90429730532a822f3a78004181f3a9d2d7a2bddf17b9b649819b169ec65
7
+ data.tar.gz: a62311d2002b538b81132f6efb8525e6c6801baaa5a8f9eac020abbc2726e5ed783ee2719aeed9dc77301492f869f79deb807b9eee74f3a56e0a37d4712b7279
data/CHANGELOG.md CHANGED
@@ -1,5 +1,36 @@
1
1
  # Changes
2
2
 
3
+ ### 2026-06-12 (0.12.0)
4
+
5
+ * Repair the three known input families that raised `Internal error:
6
+ repaired output is not valid JSON` — cases where upstream
7
+ [jsonrepair](https://github.com/josdejong/jsonrepair) (v3.14.0, still
8
+ its latest release) emits invalid JSON and this gem's canonical
9
+ re-serialize guard caught it but blamed the Repairer. All three are
10
+ deliberate divergences from upstream, commented at each site:
11
+ * A stray `e`/`E` with no mantissa is now an unquoted string instead
12
+ of an empty-mantissa exponent: `[e]` → `["e"]`, `[e5]` → `["e5"]`,
13
+ `[truee]` → `[true,"e"]`, `{"k": e}` → `{"k":"e"}` (upstream emits
14
+ `e0` / raw `e5`). Numbers truncated at a real exponent (`[2e]` →
15
+ `[2.0]`) are unchanged.
16
+ * Negative leading-zero numbers are quoted like positive ones:
17
+ `{"n": -05}` → `{"n":"-05"}`, matching the existing `{"n": 05}` →
18
+ `{"n":"05"}` (upstream emits `-05` unrepaired). The same rule now
19
+ also covers the truncated-number repair, which bypassed it:
20
+ `[05e]` → `["05e0"]`, `00.` → `"00.0"` (upstream emits `05e0` /
21
+ `00.0` unrepaired). Valid `-0` / `-0.5` / `0e` / `0.` are
22
+ unchanged.
23
+ * The trailing-comma repair no longer strips a comma belonging to the
24
+ enclosing container when an inner object/array fails on its first
25
+ key or value: `[{{]` → `[{},{}]`, `[1,[}]` → `[1,[]]`,
26
+ `{"a": 1, "b": [}` → `{"a":1,"b":[]}` (upstream emits `[{}{}]`,
27
+ `[1[]]`, `{"a": 1 "b": []}`).
28
+ Validated by differential testing against upstream over a 270-input
29
+ grid of these shapes in every container context: the only behavior
30
+ changes vs 0.11.3 are the 123 previously-`Internal error` inputs now
31
+ repairing (or, for `e+` shapes where upstream emits invalid `e+0`,
32
+ raising a clean position-bearing error). Benchmarks flat.
33
+
3
34
  ### 2026-06-12 (0.11.3)
4
35
 
5
36
  * Fix infinite recursion (`SystemStackError`) on a quoted string
@@ -2,6 +2,6 @@
2
2
 
3
3
  module JSON
4
4
  module Repair
5
- VERSION = '0.11.3'
5
+ VERSION = '0.12.0'
6
6
  end
7
7
  end
data/lib/json/repairer.rb CHANGED
@@ -237,6 +237,7 @@ module JSON
237
237
 
238
238
  initial = true
239
239
  while @index < @json.length && @json[@index] != CLOSING_BRACE
240
+ first_pair = initial
240
241
  if initial
241
242
  initial = false
242
243
  else
@@ -255,8 +256,12 @@ module JSON
255
256
  if @json[@index] == CLOSING_BRACE || @json[@index] == OPENING_BRACE ||
256
257
  @json[@index] == CLOSING_BRACKET || @json[@index] == OPENING_BRACKET ||
257
258
  @json[@index].nil?
258
- # repair trailing comma
259
- @output = strip_last_occurrence(@output, ',')
259
+ # repair trailing comma — but only the one this object's own loop
260
+ # emitted or inserted; on the first pair the buffer's last
261
+ # comma belongs to the enclosing container, like in [{{] or
262
+ # {"a": 1, "b": {] (divergence from upstream, which strips
263
+ # the parent's comma and emits invalid JSON like [{}{}])
264
+ @output = strip_last_occurrence(@output, ',') unless first_pair
260
265
  else
261
266
  throw_object_key_expected
262
267
  end
@@ -738,7 +743,13 @@ module JSON
738
743
  @index += 1 while digit?(@json[@index])
739
744
  end
740
745
 
741
- if @json[@index] && @json[@index].downcase == 'e'
746
+ # Divergence from upstream: only enter the exponent branch when a
747
+ # mantissa was consumed — at this point @index > start implies at
748
+ # least one digit (the '-' and '.' paths reset otherwise). Upstream
749
+ # accepts a bare "e"/"E" here and emits invalid JSON like `e0` or
750
+ # raw `e5`; declining lets the token fall through to
751
+ # parse_unquoted_string, matching how "-e5" already becomes "-e5".
752
+ if @index > start && @json[@index] && @json[@index].downcase == 'e'
742
753
  @index += 1
743
754
  @index += 1 if ['-', '+'].include?(@json[@index])
744
755
  if at_end_of_number?
@@ -761,7 +772,9 @@ module JSON
761
772
  if @index > start
762
773
  # repair a number with leading zeros like "00789"
763
774
  num = @json[start...@index]
764
- has_invalid_leading_zero = num.match?(/^0\d/)
775
+ # the optional sign quotes "-05" like "05" (divergence from
776
+ # upstream, whose unsigned check lets "-05" through unrepaired)
777
+ has_invalid_leading_zero = num.match?(/^-?0\d/)
765
778
 
766
779
  @output << (has_invalid_leading_zero ? "\"#{num}\"" : repair_leading_dot_number(num))
767
780
  return true
@@ -786,6 +799,7 @@ module JSON
786
799
 
787
800
  initial = true
788
801
  while @index < @json.length && @json[@index] != CLOSING_BRACKET
802
+ first_item = initial
789
803
  if initial
790
804
  initial = false
791
805
  else
@@ -799,8 +813,12 @@ module JSON
799
813
  processed_value = parse_value
800
814
  next if processed_value
801
815
 
802
- # repair trailing comma
803
- @output = strip_last_occurrence(@output, ',')
816
+ # repair trailing comma — but only the one this array's own loop
817
+ # emitted or inserted; on the first item the buffer's last
818
+ # comma belongs to the enclosing container, like in [1,[}] or
819
+ # {"a": 1, "b": [} (divergence from upstream, which strips
820
+ # the parent's comma and emits invalid JSON like [1[]])
821
+ @output = strip_last_occurrence(@output, ',') unless first_item
804
822
  break
805
823
  end
806
824
 
@@ -859,7 +877,11 @@ module JSON
859
877
  # repair numbers cut off at the end
860
878
  # this will only be called when we end after a '.', '-', or 'e' and does not
861
879
  # change the number more than it needs to make it valid JSON
862
- @output << repair_leading_dot_number("#{@json[start...@index]}0")
880
+ num = "#{@json[start...@index]}0"
881
+ # quote a padded token that has an invalid leading zero, like "05e" ->
882
+ # "05e0", applying the same rule as the end of parse_number (divergence
883
+ # from upstream, which emits the invalid number raw)
884
+ @output << (num.match?(/^-?0\d/) ? "\"#{num}\"" : repair_leading_dot_number(num))
863
885
  end
864
886
 
865
887
  # Repair a number missing its digit before the decimal point, like ".5"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json-repair
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.3
4
+ version: 0.12.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Zykov