json_mend 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_mend/parser.rb +15 -32
- data/lib/json_mend/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ae2e6bb67d6227598226e35e852e4b4ae7f681e6ce8bcb94d11bd5e61c74a989
|
|
4
|
+
data.tar.gz: 0fbd0b9cc00caa626a794e2024d34d0f31d9bf407f31e7ce8cbb0e1b78c9773d
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: afd9daaec420a7bdb29ed2bf5d6438e5b335d2990eeae24503ab0fc10e6221a299dbcf63f4d0b09e093a7a03a523f5fa90db5ff46fc7cbec40670cac71f4955a
|
|
7
|
+
data.tar.gz: 730768fa1d1954c626c88c17b8afc3ec7e4726abcd2b4602072453e35d910da0cd6e1b92c5841e571abcd22c6bc7e02df65de7b8dfab45e9cd1210de24ca6f94
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -70,7 +70,7 @@ module JsonMend
|
|
|
70
70
|
break
|
|
71
71
|
else
|
|
72
72
|
# Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
|
|
73
|
-
next if new_json.is_a?(String) && new_json.
|
|
73
|
+
next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
|
|
74
74
|
|
|
75
75
|
if both_hash?(json.last, new_json)
|
|
76
76
|
json[-1] = deep_merge_hashes(json.last, new_json)
|
|
@@ -265,20 +265,14 @@ module JsonMend
|
|
|
265
265
|
|
|
266
266
|
if value == :inferred_true
|
|
267
267
|
if %w[true false null].include?(key.downcase)
|
|
268
|
-
|
|
269
|
-
#
|
|
270
|
-
|
|
271
|
-
prev_byte
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
prev_byte.between?(97, 122) || # a-z
|
|
277
|
-
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
278
|
-
)
|
|
279
|
-
else
|
|
280
|
-
is_concatenated = false
|
|
281
|
-
end
|
|
268
|
+
prev_byte = @scanner.string.getbyte(pos_before_key - 1)
|
|
269
|
+
# Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
|
|
270
|
+
is_concatenated = prev_byte && (
|
|
271
|
+
prev_byte.between?(48, 57) || # 0-9
|
|
272
|
+
prev_byte.between?(65, 90) || # A-Z
|
|
273
|
+
prev_byte.between?(97, 122) || # a-z
|
|
274
|
+
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
275
|
+
)
|
|
282
276
|
|
|
283
277
|
return [nil, nil, false] unless is_concatenated
|
|
284
278
|
end
|
|
@@ -335,7 +329,7 @@ module JsonMend
|
|
|
335
329
|
@context.pop
|
|
336
330
|
|
|
337
331
|
# If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
|
|
338
|
-
# treat it as
|
|
332
|
+
# treat it as empty string for object values to be safe.
|
|
339
333
|
value == JSON_STOP_TOKEN ? '' : value
|
|
340
334
|
end
|
|
341
335
|
|
|
@@ -426,10 +420,10 @@ module JsonMend
|
|
|
426
420
|
# many common errors found in LLM-generated JSON, such as missing quotes,
|
|
427
421
|
# incorrect escape sequences, and ambiguous string terminators
|
|
428
422
|
def parse_string
|
|
429
|
-
char =
|
|
423
|
+
char = peek_char
|
|
430
424
|
|
|
431
425
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
|
432
|
-
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char
|
|
426
|
+
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char&.match?(/[\p{L}0-9$_-]/)
|
|
433
427
|
return '' if TERMINATORS_STRING_GUESSED.include?(char)
|
|
434
428
|
|
|
435
429
|
@scanner.getch
|
|
@@ -485,18 +479,6 @@ module JsonMend
|
|
|
485
479
|
|
|
486
480
|
# string helper methods
|
|
487
481
|
|
|
488
|
-
def prepare_string_parsing
|
|
489
|
-
char = peek_char
|
|
490
|
-
|
|
491
|
-
# Consume comments that appear before the string starts
|
|
492
|
-
while COMMENT_DELIMETERS.include?(char)
|
|
493
|
-
parse_comment
|
|
494
|
-
char = peek_char
|
|
495
|
-
end
|
|
496
|
-
|
|
497
|
-
char
|
|
498
|
-
end
|
|
499
|
-
|
|
500
482
|
def determine_delimiters(char:)
|
|
501
483
|
missing_quotes = false
|
|
502
484
|
lstring_delimiter = rstring_delimiter = '"'
|
|
@@ -1251,10 +1233,12 @@ module JsonMend
|
|
|
1251
1233
|
# returns the index (offset) from the scanner
|
|
1252
1234
|
def skip_to_character(characters, start_idx: 0)
|
|
1253
1235
|
pattern = SKIP_CHARS_REGEX_CACHE.fetch(characters, nil)
|
|
1236
|
+
# :nocov:
|
|
1254
1237
|
if pattern.nil?
|
|
1255
1238
|
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
|
|
1256
1239
|
pattern = Regexp.new(chars.join('|'))
|
|
1257
1240
|
end
|
|
1241
|
+
# :nocov:
|
|
1258
1242
|
|
|
1259
1243
|
saved_pos = @scanner.pos
|
|
1260
1244
|
# Skip start_idx
|
|
@@ -1315,7 +1299,7 @@ module JsonMend
|
|
|
1315
1299
|
(matched.length - 1) + start_idx
|
|
1316
1300
|
else
|
|
1317
1301
|
# No non-space found.
|
|
1318
|
-
@scanner.
|
|
1302
|
+
(@scanner.string.length - @scanner.charpos) + start_idx
|
|
1319
1303
|
end
|
|
1320
1304
|
|
|
1321
1305
|
@scanner.pos = saved_pos
|
|
@@ -1350,7 +1334,6 @@ module JsonMend
|
|
|
1350
1334
|
return nil if byte_str.empty?
|
|
1351
1335
|
|
|
1352
1336
|
# Fast path: If it's a standard ASCII char (0-127), return it directly.
|
|
1353
|
-
# This avoids the regex overhead for standard JSON characters ({, [, ", etc).
|
|
1354
1337
|
return byte_str if byte_str.getbyte(0) < 128
|
|
1355
1338
|
|
|
1356
1339
|
# Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
|
data/lib/json_mend/version.rb
CHANGED