json_mend 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_mend/parser.rb +19 -35
- data/lib/json_mend/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 923008e3c63e24de16c3ee6b26097cf4064a32a16bac749c5501e313996238f1
|
|
4
|
+
data.tar.gz: c6c6040f9d54fe7604ae7126402886af159aae5e001dca93cbceebf254839a55
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c869c17b06f5ed0e46e3f74ccf59c8c374af1431b43ee7d9d59be2170e5ba88e10f83753db36af1e4ebba2cf519273b5f5c32c42fa386978f6a96c22095b063d
|
|
7
|
+
data.tar.gz: 770eb1238b3f73261a2130b2a900dfff0bd2770cf0f6b3b38ecee30cafdbfb09e59b87e4df4dd8aba722c5dbbd14bb1b35df509fdc92efbe081d8342ed7e435b
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -70,7 +70,7 @@ module JsonMend
|
|
|
70
70
|
break
|
|
71
71
|
else
|
|
72
72
|
# Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
|
|
73
|
-
next if new_json.is_a?(String) && new_json.
|
|
73
|
+
next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
|
|
74
74
|
|
|
75
75
|
if both_hash?(json.last, new_json)
|
|
76
76
|
json[-1] = deep_merge_hashes(json.last, new_json)
|
|
@@ -265,20 +265,14 @@ module JsonMend
|
|
|
265
265
|
|
|
266
266
|
if value == :inferred_true
|
|
267
267
|
if %w[true false null].include?(key.downcase)
|
|
268
|
-
|
|
269
|
-
#
|
|
270
|
-
|
|
271
|
-
prev_byte
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
prev_byte.between?(97, 122) || # a-z
|
|
277
|
-
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
278
|
-
)
|
|
279
|
-
else
|
|
280
|
-
is_concatenated = false
|
|
281
|
-
end
|
|
268
|
+
prev_byte = @scanner.string.getbyte(pos_before_key - 1)
|
|
269
|
+
# Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
|
|
270
|
+
is_concatenated = prev_byte && (
|
|
271
|
+
prev_byte.between?(48, 57) || # 0-9
|
|
272
|
+
prev_byte.between?(65, 90) || # A-Z
|
|
273
|
+
prev_byte.between?(97, 122) || # a-z
|
|
274
|
+
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
275
|
+
)
|
|
282
276
|
|
|
283
277
|
return [nil, nil, false] unless is_concatenated
|
|
284
278
|
end
|
|
@@ -335,7 +329,7 @@ module JsonMend
|
|
|
335
329
|
@context.pop
|
|
336
330
|
|
|
337
331
|
# If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
|
|
338
|
-
# treat it as
|
|
332
|
+
# treat it as empty string for object values to be safe.
|
|
339
333
|
value == JSON_STOP_TOKEN ? '' : value
|
|
340
334
|
end
|
|
341
335
|
|
|
@@ -426,10 +420,10 @@ module JsonMend
|
|
|
426
420
|
# many common errors found in LLM-generated JSON, such as missing quotes,
|
|
427
421
|
# incorrect escape sequences, and ambiguous string terminators
|
|
428
422
|
def parse_string
|
|
429
|
-
char =
|
|
423
|
+
char = peek_char
|
|
430
424
|
|
|
431
425
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
|
432
|
-
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char
|
|
426
|
+
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char&.match?(/[\p{L}0-9$_-]/)
|
|
433
427
|
return '' if TERMINATORS_STRING_GUESSED.include?(char)
|
|
434
428
|
|
|
435
429
|
@scanner.getch
|
|
@@ -485,18 +479,6 @@ module JsonMend
|
|
|
485
479
|
|
|
486
480
|
# string helper methods
|
|
487
481
|
|
|
488
|
-
def prepare_string_parsing
|
|
489
|
-
char = peek_char
|
|
490
|
-
|
|
491
|
-
# Consume comments that appear before the string starts
|
|
492
|
-
while COMMENT_DELIMETERS.include?(char)
|
|
493
|
-
parse_comment
|
|
494
|
-
char = peek_char
|
|
495
|
-
end
|
|
496
|
-
|
|
497
|
-
char
|
|
498
|
-
end
|
|
499
|
-
|
|
500
482
|
def determine_delimiters(char:)
|
|
501
483
|
missing_quotes = false
|
|
502
484
|
lstring_delimiter = rstring_delimiter = '"'
|
|
@@ -1251,10 +1233,12 @@ module JsonMend
|
|
|
1251
1233
|
# returns the index (offset) from the scanner
|
|
1252
1234
|
def skip_to_character(characters, start_idx: 0)
|
|
1253
1235
|
pattern = SKIP_CHARS_REGEX_CACHE.fetch(characters, nil)
|
|
1236
|
+
# :nocov:
|
|
1254
1237
|
if pattern.nil?
|
|
1255
1238
|
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
|
|
1256
1239
|
pattern = Regexp.new(chars.join('|'))
|
|
1257
1240
|
end
|
|
1241
|
+
# :nocov:
|
|
1258
1242
|
|
|
1259
1243
|
saved_pos = @scanner.pos
|
|
1260
1244
|
# Skip start_idx
|
|
@@ -1315,7 +1299,7 @@ module JsonMend
|
|
|
1315
1299
|
(matched.length - 1) + start_idx
|
|
1316
1300
|
else
|
|
1317
1301
|
# No non-space found.
|
|
1318
|
-
@scanner.
|
|
1302
|
+
(@scanner.string.length - @scanner.charpos) + start_idx
|
|
1319
1303
|
end
|
|
1320
1304
|
|
|
1321
1305
|
@scanner.pos = saved_pos
|
|
@@ -1346,12 +1330,12 @@ module JsonMend
|
|
|
1346
1330
|
# Handle the common 0-offset case
|
|
1347
1331
|
if offset.zero?
|
|
1348
1332
|
# peek(1) returns the next BYTE, not character
|
|
1349
|
-
|
|
1350
|
-
return nil
|
|
1333
|
+
byte = @scanner.string.getbyte(@scanner.pos)
|
|
1334
|
+
return nil unless byte
|
|
1351
1335
|
|
|
1352
1336
|
# Fast path: If it's a standard ASCII char (0-127), return it directly.
|
|
1353
|
-
#
|
|
1354
|
-
return
|
|
1337
|
+
# Enforcing UTF-8 ensures we don't mix US-ASCII and UTF-8 strings later.
|
|
1338
|
+
return byte.chr(Encoding::UTF_8) if byte < 128
|
|
1355
1339
|
|
|
1356
1340
|
# Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
|
|
1357
1341
|
return @scanner.check(/./m)
|
data/lib/json_mend/version.rb
CHANGED