json-repair 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: aef10e86ea82fb56d9666ad7470317cf751f00730ef51910094ce8b2ee876a53
4
- data.tar.gz: 682f0cdacc02896687e6c39e534f92f0beda52110679da8b7b7f43721aa6c2a4
3
+ metadata.gz: cf5d06053ce264da6b60de4beef4a5718fd3482c81d295deea6dbf90fc63c098
4
+ data.tar.gz: 9d343f549cf34414e0de45618cb19bd4a9b58b12e53c2f52da0380fc73b4d95a
5
5
  SHA512:
6
- metadata.gz: 572225e5c09ac6ab7795d21d179e9ac07bc2c3bffedb5f1df48afa5a33ab8a73923af90429730532a822f3a78004181f3a9d2d7a2bddf17b9b649819b169ec65
7
- data.tar.gz: a62311d2002b538b81132f6efb8525e6c6801baaa5a8f9eac020abbc2726e5ed783ee2719aeed9dc77301492f869f79deb807b9eee74f3a56e0a37d4712b7279
6
+ metadata.gz: 269f7de75ec6cadde857d2f7c10186b1d0c63a19aa3e863744957c95b98db8d0cef790deef8814d532ca62c0405d97cfc5f441c37be9f1c4e6c7f5cfe07eaa67
7
+ data.tar.gz: 9cbc099462defe41b8bf96f590833d61de90f79c8e5b0f2d8a7d0ae04ee72dcc7e512c6f3886f67cb0e700dfb1046abd143b2adbde560c502d25700035e69834
data/CHANGELOG.md CHANGED
@@ -1,5 +1,27 @@
1
1
  # Changes
2
2
 
3
+ ### 2026-06-12 (0.13.0)
4
+
5
+ * Repair `#` hash line comments, like in Python, YAML, or Hjson:
6
+ `{"a": 1 # comment\n}` → `{"a":1}`, `{ # note\n "a": 1}` →
7
+ `{"a":1}`, `# lead\n{"a": 1}` → `{"a":1}`. Divergence from upstream
8
+ [jsonrepair](https://github.com/josdejong/jsonrepair) (v3.14.0
9
+ raises on all of these), commented at the site. Recognition is
10
+ context-aware so unquoted values starting with `#` keep repairing
11
+ into strings — `{"color": #ff0000}` → `{"color":"#ff0000"}`,
12
+ `{#tag: 1}` → `{"#tag":1}`, `#standalone` → `"#standalone"` — where
13
+ Python's `json_repair` silently loses them (`{"color": ""}`).
14
+ Where a value or key is expected, a `#` token reaching a structural
15
+ delimiter (`,` `}` `]` `:`) before any whitespace, or running to
16
+ end-of-input without a newline, stays a value; anything else is a
17
+ comment stripped to the end of the line. The tradeoff: a `#` token
18
+ followed by whitespace or a newline at a value position now reads
19
+ as a comment — `{"a": #b c}` → `{"a":null}` and `{"a": #tag\n}` →
20
+ `{"a":null}`, where 0.12.0 kept them as strings (Python drops them
21
+ too), and a comment-only document now raises like `// only a
22
+ comment` always has. Pinned in the spec suite as conscious
23
+ decisions.
24
+
3
25
  ### 2026-06-12 (0.12.0)
4
26
 
5
27
  * Repair the three known input families that raised `Internal error:
@@ -2,6 +2,6 @@
2
2
 
3
3
  module JSON
4
4
  module Repair
5
- VERSION = '0.12.0'
5
+ VERSION = '0.13.0'
6
6
  end
7
7
  end
data/lib/json/repairer.rb CHANGED
@@ -71,7 +71,7 @@ module JSON
71
71
  # repair redundant end quotes
72
72
  while [CLOSING_BRACE, CLOSING_BRACKET].include?(@json[@index])
73
73
  @index += 1
74
- parse_whitespace_and_skip_comments
74
+ parse_whitespace_and_skip_comments(value_expected: false)
75
75
  end
76
76
 
77
77
  if @index >= @json.length
@@ -93,17 +93,17 @@ module JSON
93
93
  parse_keywords ||
94
94
  parse_unquoted_string(false) ||
95
95
  parse_regex
96
- parse_whitespace_and_skip_comments
96
+ parse_whitespace_and_skip_comments(value_expected: false)
97
97
 
98
98
  process
99
99
  end
100
100
 
101
- def parse_whitespace_and_skip_comments(skip_newline: true)
101
+ def parse_whitespace_and_skip_comments(skip_newline: true, value_expected: true)
102
102
  start = @index
103
103
 
104
104
  changed = parse_whitespace(skip_newline: skip_newline)
105
105
  loop do
106
- changed = parse_comment
106
+ changed = parse_comment(value_expected: value_expected)
107
107
  changed = parse_whitespace(skip_newline: skip_newline) if changed
108
108
  break unless changed
109
109
  end
@@ -131,7 +131,7 @@ module JSON
131
131
  false
132
132
  end
133
133
 
134
- def parse_comment
134
+ def parse_comment(value_expected: true)
135
135
  if @json[@index] == '/' && @json[@index + 1] == '*'
136
136
  # Block comment
137
137
  @index += 2
@@ -143,11 +143,42 @@ module JSON
143
143
  @index += 2
144
144
  @index += 1 until @json[@index].nil? || @json[@index] == "\n"
145
145
  true
146
+ elsif @json[@index] == '#' && hash_comment?(value_expected)
147
+ # Hash line comment, like in Python, YAML, or Hjson (divergence
148
+ # from upstream, which raises on `#` as of v3.14.0)
149
+ @index += 1
150
+ @index += 1 until @json[@index].nil? || @json[@index] == "\n"
151
+ true
146
152
  else
147
153
  false
148
154
  end
149
155
  end
150
156
 
157
+ # Decide whether the `#` at @index starts a line comment or an
158
+ # unquoted value like {"color": #ff0000}, {#tag: 1}, or a root
159
+ # #hashtag (which Python's json_repair eats as comments, losing
160
+ # data). Where no value or key is expected an unquoted token would
161
+ # be junk anyway, so `#` is always a comment, exactly like `//`.
162
+ # Where one is expected, scan the rest of the line: a structural
163
+ # delimiter (`,` `}` `]` `:`) before any whitespace means the token
164
+ # reads as a value in context; whitespace (including the newline
165
+ # itself) first means comment prose; reaching EOF without a newline
166
+ # keeps the token a value, so truncated input like {"a": #tag is
167
+ # repaired, not dropped. Divergence from upstream, as above.
168
+ def hash_comment?(value_expected)
169
+ return true unless value_expected
170
+
171
+ i = @index + 1
172
+ while (char = @json[i])
173
+ return true if whitespace_or_special?(char)
174
+ return false if [COMMA, COLON, CLOSING_BRACE, CLOSING_BRACKET].include?(char)
175
+
176
+ i += 1
177
+ end
178
+
179
+ false
180
+ end
181
+
151
182
  # Find and skip over a Markdown fenced code block:
152
183
  # ``` ... ```
153
184
  # or
@@ -268,7 +299,7 @@ module JSON
268
299
  break
269
300
  end
270
301
 
271
- parse_whitespace_and_skip_comments
302
+ parse_whitespace_and_skip_comments(value_expected: false)
272
303
  processed_colon = parse_character(COLON)
273
304
  truncated_text = @index >= @json.length
274
305
  unless processed_colon
@@ -491,7 +522,7 @@ module JSON
491
522
  @index += 1
492
523
  @output << str
493
524
 
494
- parse_whitespace_and_skip_comments(skip_newline: false)
525
+ parse_whitespace_and_skip_comments(skip_newline: false, value_expected: false)
495
526
 
496
527
  if stop_at_delimiter ||
497
528
  @index >= @json.length ||
@@ -846,11 +877,11 @@ module JSON
846
877
  def parse_concatenated_string
847
878
  processed = false
848
879
 
849
- parse_whitespace_and_skip_comments
880
+ parse_whitespace_and_skip_comments(value_expected: false)
850
881
  while @json[@index] == PLUS
851
882
  processed = true
852
883
  @index += 1
853
- parse_whitespace_and_skip_comments
884
+ parse_whitespace_and_skip_comments(value_expected: false)
854
885
 
855
886
  # repair: remove the end quote of the first string
856
887
  @output = strip_last_occurrence(@output, '"', strip_remaining_text: true)
@@ -37,7 +37,9 @@ module JSON
37
37
 
38
38
  def parse_whitespace: (?skip_newline: bool) -> bool
39
39
 
40
- def parse_comment: () -> bool
40
+ def parse_comment: (?value_expected: bool) -> bool
41
+
42
+ def hash_comment?: (bool value_expected) -> bool
41
43
 
42
44
  # Find and skip over a Markdown fenced code block
43
45
  def parse_markdown_code_block: (::Array[::String] blocks) -> bool
@@ -87,7 +89,7 @@ module JSON
87
89
 
88
90
  def parse_character: (::String char) -> bool
89
91
 
90
- def parse_whitespace_and_skip_comments: (?skip_newline: bool) -> bool
92
+ def parse_whitespace_and_skip_comments: (?skip_newline: bool, ?value_expected: bool) -> bool
91
93
 
92
94
  # Parse a number like 2.4 or 2.4e6
93
95
  def parse_number: () -> bool
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json-repair
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aleksandr Zykov