json_mend 0.1.7 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +6 -3
- data/.tool-versions +1 -1
- data/README.md +13 -1
- data/lib/json_mend/parser.rb +186 -98
- data/lib/json_mend/version.rb +1 -1
- data/lib/json_mend.rb +10 -4
- metadata +4 -4
- /data/sig/{manifest.yaml → manifest.yml} +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 9a3bfc54ceae164d2837fbcdf3751e79359397b735693ec990954fee19bfa60e
|
|
4
|
+
data.tar.gz: 2dfea1b0a6ada799891385ec4c35a83153005f5b344e23b7a9921c726a94c220
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: '0592fea3e3859aaafbc6b2508b03de3a54eefd17e4520375ce1163e929ed24be10ad16b2936db2175ec79d9f9f4dd7acbd82ba72b50c3df3ea347ffe8436ded6'
|
|
7
|
+
data.tar.gz: 6708504ef9b2c1f68f0bdc1da889c254580a169066d37db99993468f78bcb9c3f8ca351dfaafecbc62780d319c12e51a4536b6da70a589d450ead3f407ed8ea7
|
data/.rubocop.yml
CHANGED
|
@@ -7,10 +7,10 @@ AllCops:
|
|
|
7
7
|
SuggestExtensions: false
|
|
8
8
|
|
|
9
9
|
Metrics/AbcSize:
|
|
10
|
-
Max:
|
|
10
|
+
Max: 70
|
|
11
11
|
|
|
12
12
|
Metrics/ClassLength:
|
|
13
|
-
Max:
|
|
13
|
+
Max: 900
|
|
14
14
|
|
|
15
15
|
Metrics/CyclomaticComplexity:
|
|
16
16
|
Max: 35
|
|
@@ -18,11 +18,14 @@ Metrics/CyclomaticComplexity:
|
|
|
18
18
|
Metrics/MethodLength:
|
|
19
19
|
Max: 80
|
|
20
20
|
|
|
21
|
+
Metrics/BlockLength:
|
|
22
|
+
Max: 40
|
|
23
|
+
|
|
21
24
|
Metrics/PerceivedComplexity:
|
|
22
25
|
Max: 35
|
|
23
26
|
|
|
24
27
|
Metrics/BlockNesting:
|
|
25
|
-
Max:
|
|
28
|
+
Max: 8
|
|
26
29
|
|
|
27
30
|
Naming/PredicateMethod:
|
|
28
31
|
Enabled: false
|
data/.tool-versions
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
ruby
|
|
1
|
+
ruby 4.0.2
|
data/README.md
CHANGED
|
@@ -1,6 +1,18 @@
|
|
|
1
1
|
# JsonMend [](https://github.com/le0pard/json_mend/actions/workflows/main.yml)
|
|
2
2
|
|
|
3
|
-
`JsonMend` is a robust Ruby gem designed to repair broken or malformed JSON strings. It is specifically optimized to handle common errors found in JSON generated by Large Language Models (LLMs), such as missing quotes, trailing commas, unescaped characters, and stray comments
|
|
3
|
+
`JsonMend` is a robust Ruby gem designed to repair broken or malformed JSON strings. It is specifically optimized to handle common errors found in JSON generated by Large Language Models (LLMs), such as missing quotes, trailing commas, unescaped characters, and stray comments
|
|
4
|
+
|
|
5
|
+
# Why?
|
|
6
|
+
|
|
7
|
+
Integrating Large Language Models (LLMs) into software workflows often requires structured data output. While prompting an LLM to "return JSON" is a common pattern, models are probabilistic text generators, not strict serialization engines. They frequently treat JSON syntax as a loose suggestion rather than a rigid standard.
|
|
8
|
+
|
|
9
|
+
Standard `JSON.parse` is fragile when facing the chaotic output of an LLM. Common failure modes include:
|
|
10
|
+
|
|
11
|
+
- **Hallucinated Syntax**: LLMs often include trailing commas, code comments (`//` or `#`), single quotes, or Python-style literals (`True`, `False`) that break standard JSON parsers
|
|
12
|
+
- **"Chatty" Wrappers**: Models frequently wrap JSON in Markdown code blocks (`json ...`) or include conversational preambles (`Here is the data you requested: ...`), turning valid data into invalid syntax errors
|
|
13
|
+
- **Truncation**: JSON is verbose. Output limits often cut off the response mid-stream, leaving unclosed brackets and braces
|
|
14
|
+
|
|
15
|
+
`JsonMend` acts as a middleware layer between the messy text output of an LLM and your Ruby application. It aggressively parses, cleans, and repairs the raw string—handling truncation, stripping garbage text, and normalizing syntax—to ensure you get usable structured data instead of a `JSON::ParserError`
|
|
4
16
|
|
|
5
17
|
## Features
|
|
6
18
|
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -1,15 +1,23 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'strscan'
|
|
4
|
-
require 'set'
|
|
5
4
|
|
|
6
5
|
# Root module
|
|
7
6
|
module JsonMend
|
|
8
7
|
# The core parser that does the heavy lifting of fixing the JSON
|
|
9
8
|
class Parser
|
|
9
|
+
MAX_ALLOWED_DEPTH = 100
|
|
10
10
|
COMMENT_DELIMETERS = ['#', '/'].freeze
|
|
11
11
|
NUMBER_CHARS = Set.new('0123456789-.eE/,_'.chars).freeze
|
|
12
12
|
STRING_DELIMITERS = ['"', "'", '“', '”'].freeze
|
|
13
|
+
SKIP_CHARS_REGEX_CACHE = {
|
|
14
|
+
'"' => /"/,
|
|
15
|
+
"'" => /'/,
|
|
16
|
+
'“' => /“/,
|
|
17
|
+
'”' => /”/,
|
|
18
|
+
':' => /:/,
|
|
19
|
+
'}' => /\}/
|
|
20
|
+
}.freeze
|
|
13
21
|
ESCAPE_MAPPING = {
|
|
14
22
|
't' => "\t",
|
|
15
23
|
'n' => "\n",
|
|
@@ -40,6 +48,7 @@ module JsonMend
|
|
|
40
48
|
def initialize(json_string)
|
|
41
49
|
@scanner = StringScanner.new(json_string)
|
|
42
50
|
@context = []
|
|
51
|
+
@depth = 0
|
|
43
52
|
end
|
|
44
53
|
|
|
45
54
|
# Kicks off the parsing process. This is a direct port of the robust Python logic
|
|
@@ -63,8 +72,11 @@ module JsonMend
|
|
|
63
72
|
# Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
|
|
64
73
|
next if new_json.is_a?(String) && new_json.strip.match?(/^[}\]]+$/)
|
|
65
74
|
|
|
66
|
-
|
|
67
|
-
|
|
75
|
+
if both_hash?(json.last, new_json)
|
|
76
|
+
deep_merge_hashes!(json.last, new_json)
|
|
77
|
+
else
|
|
78
|
+
json << new_json
|
|
79
|
+
end
|
|
68
80
|
end
|
|
69
81
|
end
|
|
70
82
|
|
|
@@ -76,6 +88,33 @@ module JsonMend
|
|
|
76
88
|
|
|
77
89
|
private
|
|
78
90
|
|
|
91
|
+
def with_depth_check
|
|
92
|
+
@depth += 1
|
|
93
|
+
raise JSON::NestingError, "nesting of #{@depth} is too deep" if @depth > MAX_ALLOWED_DEPTH
|
|
94
|
+
|
|
95
|
+
yield
|
|
96
|
+
ensure
|
|
97
|
+
@depth -= 1
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def deep_merge_hashes!(target, source)
|
|
101
|
+
source.each do |key, new_val|
|
|
102
|
+
if target.key?(key)
|
|
103
|
+
old_val = target[key]
|
|
104
|
+
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
105
|
+
deep_merge_hashes!(old_val, new_val)
|
|
106
|
+
elsif old_val.is_a?(Array) && new_val.is_a?(Array)
|
|
107
|
+
target[key] = old_val + new_val
|
|
108
|
+
else
|
|
109
|
+
target[key] = new_val
|
|
110
|
+
end
|
|
111
|
+
else
|
|
112
|
+
target[key] = new_val
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
target
|
|
116
|
+
end
|
|
117
|
+
|
|
79
118
|
def parse_json
|
|
80
119
|
until @scanner.eos?
|
|
81
120
|
char = peek_char
|
|
@@ -123,51 +162,53 @@ module JsonMend
|
|
|
123
162
|
|
|
124
163
|
# Parses a JSON object.
|
|
125
164
|
def parse_object
|
|
126
|
-
|
|
165
|
+
with_depth_check do
|
|
166
|
+
object = {}
|
|
127
167
|
|
|
128
|
-
|
|
129
|
-
|
|
168
|
+
loop do
|
|
169
|
+
skip_whitespaces
|
|
130
170
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
171
|
+
# Explicitly consume comments to ensure they don't hide separators (like commas)
|
|
172
|
+
# or get parsed as part of the next key.
|
|
173
|
+
if COMMENT_DELIMETERS.include?(peek_char)
|
|
174
|
+
parse_comment
|
|
175
|
+
next
|
|
176
|
+
end
|
|
137
177
|
|
|
138
|
-
|
|
139
|
-
|
|
178
|
+
# >> PRIMARY EXIT: End of object or end of string.
|
|
179
|
+
break if @scanner.eos? || @scanner.scan('}') || peek_char == ']'
|
|
140
180
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
181
|
+
# Leniently consume any leading junk characters (like stray commas or colons)
|
|
182
|
+
# that might appear before a key.
|
|
183
|
+
@scanner.skip(/[,\s]+/)
|
|
144
184
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
185
|
+
# --- Delegate to a helper to parse the next Key-Value pair ---
|
|
186
|
+
key, value, colon_found = parse_object_pair(object)
|
|
187
|
+
next if SKIPPED_KEYS.include?(key)
|
|
148
188
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
189
|
+
# If the helper returns nil for the key, it signals that we should
|
|
190
|
+
# stop parsing this object (e.g. a duplicate key was found,
|
|
191
|
+
# indicating the start of a new object).
|
|
192
|
+
if key.nil?
|
|
193
|
+
@scanner.scan('}')
|
|
194
|
+
break
|
|
195
|
+
end
|
|
156
196
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
197
|
+
# Assign the parsed pair to our object, avoiding empty keys.
|
|
198
|
+
# But only if we didn't firmly establish the key with a colon already.
|
|
199
|
+
skip_whitespaces
|
|
200
|
+
if peek_char == ':' && !colon_found
|
|
201
|
+
key = value.to_s
|
|
202
|
+
@scanner.getch # consume ':'
|
|
203
|
+
value = parse_object_value
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Assign the parsed pair to our object.
|
|
207
|
+
object[key] = value
|
|
164
208
|
end
|
|
165
209
|
|
|
166
|
-
|
|
167
|
-
object[key] = value
|
|
210
|
+
object
|
|
168
211
|
end
|
|
169
|
-
|
|
170
|
-
object
|
|
171
212
|
end
|
|
172
213
|
|
|
173
214
|
# Attempts to parse a single key-value pair.
|
|
@@ -294,60 +335,62 @@ module JsonMend
|
|
|
294
335
|
# Assumes the opening '[' has already been consumed by the caller.
|
|
295
336
|
# This is a lenient parser designed to handle malformed JSON.
|
|
296
337
|
def parse_array
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
# Stop when you find the closing bracket or an invalid character like '}'
|
|
301
|
-
while !@scanner.eos? && !TERMINATORS_ARRAY.include?(char)
|
|
302
|
-
skip_whitespaces
|
|
338
|
+
with_depth_check do
|
|
339
|
+
arr = []
|
|
340
|
+
@context.push(:array)
|
|
303
341
|
char = peek_char
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
parse_comment
|
|
342
|
+
# Stop when you find the closing bracket or an invalid character like '}'
|
|
343
|
+
while !@scanner.eos? && !TERMINATORS_ARRAY.include?(char)
|
|
344
|
+
skip_whitespaces
|
|
308
345
|
char = peek_char
|
|
309
|
-
next
|
|
310
|
-
end
|
|
311
346
|
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
i = skip_to_character(char, start_idx: i)
|
|
319
|
-
i = skip_whitespaces_at(start_idx: i + 1)
|
|
320
|
-
value = (peek_char(i) == ':' ? parse_object : parse_string)
|
|
321
|
-
else
|
|
322
|
-
value = parse_json
|
|
323
|
-
end
|
|
347
|
+
# Check for comments explicitly inside array to avoid recursion or garbage consumption issues
|
|
348
|
+
if COMMENT_DELIMETERS.include?(char)
|
|
349
|
+
parse_comment
|
|
350
|
+
char = peek_char
|
|
351
|
+
next
|
|
352
|
+
end
|
|
324
353
|
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
354
|
+
value = ''
|
|
355
|
+
if STRING_DELIMITERS.include?(char)
|
|
356
|
+
# Sometimes it can happen that LLMs forget to start an object and then you think it's a string in an array
|
|
357
|
+
# So we are going to check if this string is followed by a : or not
|
|
358
|
+
# And either parse the string or parse the object
|
|
359
|
+
i = 1
|
|
360
|
+
i = skip_to_character(char, start_idx: i)
|
|
361
|
+
i = skip_whitespaces_at(start_idx: i + 1)
|
|
362
|
+
value = (peek_char(i) == ':' ? parse_object : parse_string)
|
|
363
|
+
else
|
|
364
|
+
value = parse_json
|
|
365
|
+
end
|
|
366
|
+
|
|
367
|
+
# Handle JSON_STOP_TOKEN from parse_json (EOS or consumed terminator)
|
|
368
|
+
if value == JSON_STOP_TOKEN
|
|
369
|
+
# Do nothing, just skipped garbage
|
|
370
|
+
elsif strictly_empty?(value)
|
|
371
|
+
# Only consume if we didn't just hit a terminator that parse_json successfully respected
|
|
372
|
+
@scanner.getch unless value.nil? && TERMINATORS_ARRAY.include?(peek_char)
|
|
373
|
+
elsif value == '...' && @scanner.string.getbyte(@scanner.pos - 1) == 46
|
|
374
|
+
# just skip if the previous byte was a dot (46)
|
|
375
|
+
else
|
|
376
|
+
arr << value
|
|
377
|
+
end
|
|
336
378
|
|
|
337
|
-
char = peek_char
|
|
338
|
-
while char && char != ']' && (char.match?(/\s/) || char == ',')
|
|
339
|
-
@scanner.getch
|
|
340
379
|
char = peek_char
|
|
380
|
+
while char && char != ']' && (char.match?(/\s/) || char == ',')
|
|
381
|
+
@scanner.getch
|
|
382
|
+
char = peek_char
|
|
383
|
+
end
|
|
341
384
|
end
|
|
342
|
-
end
|
|
343
385
|
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
386
|
+
# Handle a potentially missing closing bracket, a common LLM error.
|
|
387
|
+
unless @scanner.scan(']')
|
|
388
|
+
@scanner.scan('}') # Consume } if it was the closer
|
|
389
|
+
end
|
|
390
|
+
@context.pop
|
|
349
391
|
|
|
350
|
-
|
|
392
|
+
arr
|
|
393
|
+
end
|
|
351
394
|
end
|
|
352
395
|
|
|
353
396
|
# Parses a JSON string. This is a very lenient parser designed to handle
|
|
@@ -744,7 +787,9 @@ module JsonMend
|
|
|
744
787
|
bk = 1
|
|
745
788
|
slashes = 0
|
|
746
789
|
# Look back in the string buffer directly for speed
|
|
747
|
-
while (
|
|
790
|
+
while (@scanner.pos - 1 - bk >= 0) &&
|
|
791
|
+
(char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) &&
|
|
792
|
+
char_code == 92 # 92 is backslash
|
|
748
793
|
slashes += 1
|
|
749
794
|
bk += 1
|
|
750
795
|
end
|
|
@@ -902,7 +947,37 @@ module JsonMend
|
|
|
902
947
|
# Validate valid hex digits
|
|
903
948
|
if hex_parts.length == num_chars && hex_parts.all? { |c| c.match?(/[0-9a-fA-F]/) }
|
|
904
949
|
string_parts.pop
|
|
905
|
-
|
|
950
|
+
hex_val = hex_parts.join.to_i(16)
|
|
951
|
+
|
|
952
|
+
if char == 'u' && hex_val.between?(0xD800, 0xDBFF)
|
|
953
|
+
# Handle high surrogate pair
|
|
954
|
+
saved_pos = @scanner.pos
|
|
955
|
+
if @scanner.scan(/\\u([0-9a-fA-F]{4})/)
|
|
956
|
+
low_surrogate = @scanner[1].to_i(16)
|
|
957
|
+
if low_surrogate.between?(0xDC00, 0xDFFF)
|
|
958
|
+
# Combine surrogates into a valid UTF-8 character
|
|
959
|
+
code_point = 0x10000 + ((hex_val - 0xD800) * 0x400) + (low_surrogate - 0xDC00)
|
|
960
|
+
string_parts << code_point.chr('UTF-8')
|
|
961
|
+
else
|
|
962
|
+
# Invalid low surrogate: backtrack and use replacement char
|
|
963
|
+
@scanner.pos = saved_pos
|
|
964
|
+
string_parts << "\uFFFD"
|
|
965
|
+
end
|
|
966
|
+
else
|
|
967
|
+
# Missing low surrogate
|
|
968
|
+
string_parts << "\uFFFD"
|
|
969
|
+
end
|
|
970
|
+
elsif char == 'u' && hex_val.between?(0xDC00, 0xDFFF)
|
|
971
|
+
# Unpaired low surrogate
|
|
972
|
+
string_parts << "\uFFFD"
|
|
973
|
+
else
|
|
974
|
+
# Regular code point or hex escape
|
|
975
|
+
begin
|
|
976
|
+
string_parts << hex_val.chr('UTF-8')
|
|
977
|
+
rescue RangeError
|
|
978
|
+
string_parts << "\uFFFD"
|
|
979
|
+
end
|
|
980
|
+
end
|
|
906
981
|
|
|
907
982
|
# Scanner is already advanced past digits
|
|
908
983
|
char = peek_char
|
|
@@ -1014,7 +1089,18 @@ module JsonMend
|
|
|
1014
1089
|
if scanned_str.end_with?('.')
|
|
1015
1090
|
Float(scanned_str[0...-1])
|
|
1016
1091
|
elsif scanned_str.include?(',')
|
|
1017
|
-
|
|
1092
|
+
# Check if commas are being used as thousands separators (e.g., 1,234 or 1,234,567.89)
|
|
1093
|
+
if scanned_str.count(',') > 1 || scanned_str.match?(/,\d{3}(?:\.\d+)?$/)
|
|
1094
|
+
cleaned = scanned_str.delete(',')
|
|
1095
|
+
if cleaned.match?(/[.eE]/)
|
|
1096
|
+
Float(cleaned)
|
|
1097
|
+
else
|
|
1098
|
+
Integer(cleaned, 10)
|
|
1099
|
+
end
|
|
1100
|
+
else
|
|
1101
|
+
# Treat single comma as a decimal point (European style, e.g., 1,5 -> 1.5)
|
|
1102
|
+
Float(scanned_str.tr(',', '.'))
|
|
1103
|
+
end
|
|
1018
1104
|
elsif scanned_str.match?(/[.eE]/)
|
|
1019
1105
|
Float(scanned_str)
|
|
1020
1106
|
else
|
|
@@ -1060,20 +1146,24 @@ module JsonMend
|
|
|
1060
1146
|
|
|
1061
1147
|
if context_contain?(:object_key)
|
|
1062
1148
|
# If parsing a key, we must stop at ':' and structural closers
|
|
1063
|
-
@scanner.scan_until(/(?=[\n\r:}\]])/)
|
|
1149
|
+
@scanner.scan_until(/(?=[\n\r:}\]]|\\n|\\r)/) || @scanner.terminate
|
|
1064
1150
|
elsif in_array && in_object
|
|
1065
1151
|
# Nested ambiguity, stop at any closer
|
|
1066
|
-
@scanner.scan_until(/(?=[\n\r}\]])/)
|
|
1152
|
+
@scanner.scan_until(/(?=[\n\r}\]]|\\n|\\r)/) || @scanner.terminate
|
|
1067
1153
|
elsif in_array
|
|
1068
1154
|
# Inside array, stop at ']'
|
|
1069
|
-
@scanner.scan_until(/(?=[\n\r\]])/)
|
|
1155
|
+
@scanner.scan_until(/(?=[\n\r\]]|\\n|\\r)/) || @scanner.terminate
|
|
1070
1156
|
elsif in_object
|
|
1071
1157
|
# Inside object value, stop at '}'
|
|
1072
|
-
@scanner.scan_until(/(?=[\n\r}])/)
|
|
1158
|
+
@scanner.scan_until(/(?=[\n\r}]|\\n|\\r)/) || @scanner.terminate
|
|
1073
1159
|
else
|
|
1074
1160
|
# Top level or neutral, stop at newline
|
|
1075
|
-
@scanner.scan_until(/(?=[\n\r])/)
|
|
1161
|
+
@scanner.scan_until(/(?=[\n\r]|\\n|\\r)/) || @scanner.terminate
|
|
1076
1162
|
end
|
|
1163
|
+
|
|
1164
|
+
# Consume literal escaped newlines so they don't break subsequent parsing.
|
|
1165
|
+
# (Real newlines will be left alone here and consumed normally by skip_whitespaces).
|
|
1166
|
+
@scanner.skip(/\\n|\\r/)
|
|
1077
1167
|
else
|
|
1078
1168
|
# The character at the current position (likely '/') is not the start of a
|
|
1079
1169
|
# valid comment. To prevent an infinite loop in the calling parser, we must
|
|
@@ -1088,13 +1178,11 @@ module JsonMend
|
|
|
1088
1178
|
# It quickly iterates to find a character, handling escaped characters, and
|
|
1089
1179
|
# returns the index (offset) from the scanner
|
|
1090
1180
|
def skip_to_character(characters, start_idx: 0)
|
|
1091
|
-
pattern =
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
Regexp.new(chars.join('|'))
|
|
1097
|
-
end
|
|
1181
|
+
pattern = SKIP_CHARS_REGEX_CACHE.fetch(characters, nil)
|
|
1182
|
+
if pattern.nil?
|
|
1183
|
+
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
|
|
1184
|
+
pattern = Regexp.new(chars.join('|'))
|
|
1185
|
+
end
|
|
1098
1186
|
|
|
1099
1187
|
saved_pos = @scanner.pos
|
|
1100
1188
|
# Skip start_idx
|
data/lib/json_mend/version.rb
CHANGED
data/lib/json_mend.rb
CHANGED
|
@@ -16,12 +16,18 @@ module JsonMend
|
|
|
16
16
|
def repair(json_string, return_objects: false)
|
|
17
17
|
# First, attempt to parse the string with the standard library.
|
|
18
18
|
repaired_json = begin
|
|
19
|
-
JSON.parse(
|
|
19
|
+
parsed = JSON.parse(
|
|
20
20
|
json_string,
|
|
21
21
|
allow_trailing_comma: true,
|
|
22
22
|
allow_control_characters: true
|
|
23
23
|
)
|
|
24
|
-
|
|
24
|
+
|
|
25
|
+
# Verify the native parser didn't produce invalid UTF-8 (like unpaired surrogates)
|
|
26
|
+
# by ensuring it can safely dump its own output.
|
|
27
|
+
JSON.dump(parsed)
|
|
28
|
+
|
|
29
|
+
parsed
|
|
30
|
+
rescue JSON::ParserError, JSON::GeneratorError
|
|
25
31
|
parser = Parser.new(json_string)
|
|
26
32
|
parser.parse
|
|
27
33
|
end
|
|
@@ -29,8 +35,8 @@ module JsonMend
|
|
|
29
35
|
# Avoids returning `null` for empty results, returns the object directly
|
|
30
36
|
return repaired_json if return_objects
|
|
31
37
|
|
|
32
|
-
#
|
|
33
|
-
|
|
38
|
+
# Always return a valid JSON string. For unparseable input, `nil` dumps to "null".
|
|
39
|
+
JSON.dump(repaired_json)
|
|
34
40
|
end
|
|
35
41
|
end
|
|
36
42
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json_mend
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Oleksii Vasyliev
|
|
@@ -59,7 +59,7 @@ files:
|
|
|
59
59
|
- lib/json_mend/parser.rb
|
|
60
60
|
- lib/json_mend/version.rb
|
|
61
61
|
- sig/json_mend.rbs
|
|
62
|
-
- sig/manifest.
|
|
62
|
+
- sig/manifest.yml
|
|
63
63
|
homepage: https://github.com/le0pard/json_mend
|
|
64
64
|
licenses:
|
|
65
65
|
- MIT
|
|
@@ -77,14 +77,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
77
77
|
requirements:
|
|
78
78
|
- - ">="
|
|
79
79
|
- !ruby/object:Gem::Version
|
|
80
|
-
version: 3.
|
|
80
|
+
version: 3.2.0
|
|
81
81
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
82
|
requirements:
|
|
83
83
|
- - ">="
|
|
84
84
|
- !ruby/object:Gem::Version
|
|
85
85
|
version: '0'
|
|
86
86
|
requirements: []
|
|
87
|
-
rubygems_version:
|
|
87
|
+
rubygems_version: 4.0.6
|
|
88
88
|
specification_version: 4
|
|
89
89
|
summary: Repair broken JSON
|
|
90
90
|
test_files: []
|
|
File without changes
|