json-repair 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +22 -0
- data/lib/json/repair/version.rb +1 -1
- data/lib/json/repairer.rb +40 -9
- data/sig/json/repairer.rbs +4 -2
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cf5d06053ce264da6b60de4beef4a5718fd3482c81d295deea6dbf90fc63c098
|
|
4
|
+
data.tar.gz: 9d343f549cf34414e0de45618cb19bd4a9b58b12e53c2f52da0380fc73b4d95a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 269f7de75ec6cadde857d2f7c10186b1d0c63a19aa3e863744957c95b98db8d0cef790deef8814d532ca62c0405d97cfc5f441c37be9f1c4e6c7f5cfe07eaa67
|
|
7
|
+
data.tar.gz: 9cbc099462defe41b8bf96f590833d61de90f79c8e5b0f2d8a7d0ae04ee72dcc7e512c6f3886f67cb0e700dfb1046abd143b2adbde560c502d25700035e69834
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,27 @@
|
|
|
1
1
|
# Changes
|
|
2
2
|
|
|
3
|
+
### 2026-06-12 (0.13.0)
|
|
4
|
+
|
|
5
|
+
* Repair `#` hash line comments, like in Python, YAML, or Hjson:
|
|
6
|
+
`{"a": 1 # comment\n}` → `{"a":1}`, `{ # note\n "a": 1}` →
|
|
7
|
+
`{"a":1}`, `# lead\n{"a": 1}` → `{"a":1}`. Divergence from upstream
|
|
8
|
+
[jsonrepair](https://github.com/josdejong/jsonrepair) (v3.14.0
|
|
9
|
+
raises on all of these), commented at the site. Recognition is
|
|
10
|
+
context-aware so unquoted values starting with `#` keep repairing
|
|
11
|
+
into strings — `{"color": #ff0000}` → `{"color":"#ff0000"}`,
|
|
12
|
+
`{#tag: 1}` → `{"#tag":1}`, `#standalone` → `"#standalone"` — where
|
|
13
|
+
Python's `json_repair` silently loses them (`{"color": ""}`).
|
|
14
|
+
Where a value or key is expected, a `#` token reaching a structural
|
|
15
|
+
delimiter (`,` `}` `]` `:`) before any whitespace, or running to
|
|
16
|
+
end-of-input without a newline, stays a value; anything else is a
|
|
17
|
+
comment stripped to the end of the line. The tradeoff: a `#` token
|
|
18
|
+
followed by whitespace or a newline at a value position now reads
|
|
19
|
+
as a comment — `{"a": #b c}` → `{"a":null}` and `{"a": #tag\n}` →
|
|
20
|
+
`{"a":null}`, where 0.12.0 kept them as strings (Python drops them
|
|
21
|
+
too), and a comment-only document now raises like `// only a
|
|
22
|
+
comment` always has. Pinned in the spec suite as conscious
|
|
23
|
+
decisions.
|
|
24
|
+
|
|
3
25
|
### 2026-06-12 (0.12.0)
|
|
4
26
|
|
|
5
27
|
* Repair the three known input families that raised `Internal error:
|
data/lib/json/repair/version.rb
CHANGED
data/lib/json/repairer.rb
CHANGED
|
@@ -71,7 +71,7 @@ module JSON
|
|
|
71
71
|
# repair redundant end quotes
|
|
72
72
|
while [CLOSING_BRACE, CLOSING_BRACKET].include?(@json[@index])
|
|
73
73
|
@index += 1
|
|
74
|
-
parse_whitespace_and_skip_comments
|
|
74
|
+
parse_whitespace_and_skip_comments(value_expected: false)
|
|
75
75
|
end
|
|
76
76
|
|
|
77
77
|
if @index >= @json.length
|
|
@@ -93,17 +93,17 @@ module JSON
|
|
|
93
93
|
parse_keywords ||
|
|
94
94
|
parse_unquoted_string(false) ||
|
|
95
95
|
parse_regex
|
|
96
|
-
parse_whitespace_and_skip_comments
|
|
96
|
+
parse_whitespace_and_skip_comments(value_expected: false)
|
|
97
97
|
|
|
98
98
|
process
|
|
99
99
|
end
|
|
100
100
|
|
|
101
|
-
def parse_whitespace_and_skip_comments(skip_newline: true)
|
|
101
|
+
def parse_whitespace_and_skip_comments(skip_newline: true, value_expected: true)
|
|
102
102
|
start = @index
|
|
103
103
|
|
|
104
104
|
changed = parse_whitespace(skip_newline: skip_newline)
|
|
105
105
|
loop do
|
|
106
|
-
changed = parse_comment
|
|
106
|
+
changed = parse_comment(value_expected: value_expected)
|
|
107
107
|
changed = parse_whitespace(skip_newline: skip_newline) if changed
|
|
108
108
|
break unless changed
|
|
109
109
|
end
|
|
@@ -131,7 +131,7 @@ module JSON
|
|
|
131
131
|
false
|
|
132
132
|
end
|
|
133
133
|
|
|
134
|
-
def parse_comment
|
|
134
|
+
def parse_comment(value_expected: true)
|
|
135
135
|
if @json[@index] == '/' && @json[@index + 1] == '*'
|
|
136
136
|
# Block comment
|
|
137
137
|
@index += 2
|
|
@@ -143,11 +143,42 @@ module JSON
|
|
|
143
143
|
@index += 2
|
|
144
144
|
@index += 1 until @json[@index].nil? || @json[@index] == "\n"
|
|
145
145
|
true
|
|
146
|
+
elsif @json[@index] == '#' && hash_comment?(value_expected)
|
|
147
|
+
# Hash line comment, like in Python, YAML, or Hjson (divergence
|
|
148
|
+
# from upstream, which raises on `#` as of v3.14.0)
|
|
149
|
+
@index += 1
|
|
150
|
+
@index += 1 until @json[@index].nil? || @json[@index] == "\n"
|
|
151
|
+
true
|
|
146
152
|
else
|
|
147
153
|
false
|
|
148
154
|
end
|
|
149
155
|
end
|
|
150
156
|
|
|
157
|
+
# Decide whether the `#` at @index starts a line comment or an
|
|
158
|
+
# unquoted value like {"color": #ff0000}, {#tag: 1}, or a root
|
|
159
|
+
# #hashtag (which Python's json_repair eats as comments, losing
|
|
160
|
+
# data). Where no value or key is expected an unquoted token would
|
|
161
|
+
# be junk anyway, so `#` is always a comment, exactly like `//`.
|
|
162
|
+
# Where one is expected, scan the rest of the line: a structural
|
|
163
|
+
# delimiter (`,` `}` `]` `:`) before any whitespace means the token
|
|
164
|
+
# reads as a value in context; whitespace (including the newline
|
|
165
|
+
# itself) first means comment prose; reaching EOF without a newline
|
|
166
|
+
# keeps the token a value, so truncated input like {"a": #tag is
|
|
167
|
+
# repaired, not dropped. Divergence from upstream, as above.
|
|
168
|
+
def hash_comment?(value_expected)
|
|
169
|
+
return true unless value_expected
|
|
170
|
+
|
|
171
|
+
i = @index + 1
|
|
172
|
+
while (char = @json[i])
|
|
173
|
+
return true if whitespace_or_special?(char)
|
|
174
|
+
return false if [COMMA, COLON, CLOSING_BRACE, CLOSING_BRACKET].include?(char)
|
|
175
|
+
|
|
176
|
+
i += 1
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
false
|
|
180
|
+
end
|
|
181
|
+
|
|
151
182
|
# Find and skip over a Markdown fenced code block:
|
|
152
183
|
# ``` ... ```
|
|
153
184
|
# or
|
|
@@ -268,7 +299,7 @@ module JSON
|
|
|
268
299
|
break
|
|
269
300
|
end
|
|
270
301
|
|
|
271
|
-
parse_whitespace_and_skip_comments
|
|
302
|
+
parse_whitespace_and_skip_comments(value_expected: false)
|
|
272
303
|
processed_colon = parse_character(COLON)
|
|
273
304
|
truncated_text = @index >= @json.length
|
|
274
305
|
unless processed_colon
|
|
@@ -491,7 +522,7 @@ module JSON
|
|
|
491
522
|
@index += 1
|
|
492
523
|
@output << str
|
|
493
524
|
|
|
494
|
-
parse_whitespace_and_skip_comments(skip_newline: false)
|
|
525
|
+
parse_whitespace_and_skip_comments(skip_newline: false, value_expected: false)
|
|
495
526
|
|
|
496
527
|
if stop_at_delimiter ||
|
|
497
528
|
@index >= @json.length ||
|
|
@@ -846,11 +877,11 @@ module JSON
|
|
|
846
877
|
def parse_concatenated_string
|
|
847
878
|
processed = false
|
|
848
879
|
|
|
849
|
-
parse_whitespace_and_skip_comments
|
|
880
|
+
parse_whitespace_and_skip_comments(value_expected: false)
|
|
850
881
|
while @json[@index] == PLUS
|
|
851
882
|
processed = true
|
|
852
883
|
@index += 1
|
|
853
|
-
parse_whitespace_and_skip_comments
|
|
884
|
+
parse_whitespace_and_skip_comments(value_expected: false)
|
|
854
885
|
|
|
855
886
|
# repair: remove the end quote of the first string
|
|
856
887
|
@output = strip_last_occurrence(@output, '"', strip_remaining_text: true)
|
data/sig/json/repairer.rbs
CHANGED
|
@@ -37,7 +37,9 @@ module JSON
|
|
|
37
37
|
|
|
38
38
|
def parse_whitespace: (?skip_newline: bool) -> bool
|
|
39
39
|
|
|
40
|
-
def parse_comment: () -> bool
|
|
40
|
+
def parse_comment: (?value_expected: bool) -> bool
|
|
41
|
+
|
|
42
|
+
def hash_comment?: (bool value_expected) -> bool
|
|
41
43
|
|
|
42
44
|
# Find and skip over a Markdown fenced code block
|
|
43
45
|
def parse_markdown_code_block: (::Array[::String] blocks) -> bool
|
|
@@ -87,7 +89,7 @@ module JSON
|
|
|
87
89
|
|
|
88
90
|
def parse_character: (::String char) -> bool
|
|
89
91
|
|
|
90
|
-
def parse_whitespace_and_skip_comments: (?skip_newline: bool) -> bool
|
|
92
|
+
def parse_whitespace_and_skip_comments: (?skip_newline: bool, ?value_expected: bool) -> bool
|
|
91
93
|
|
|
92
94
|
# Parse a number like 2.4 or 2.4e6
|
|
93
95
|
def parse_number: () -> bool
|