json_mend 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -2
- data/lib/json_mend/parser.rb +42 -64
- data/lib/json_mend/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 923008e3c63e24de16c3ee6b26097cf4064a32a16bac749c5501e313996238f1
|
|
4
|
+
data.tar.gz: c6c6040f9d54fe7604ae7126402886af159aae5e001dca93cbceebf254839a55
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c869c17b06f5ed0e46e3f74ccf59c8c374af1431b43ee7d9d59be2170e5ba88e10f83753db36af1e4ebba2cf519273b5f5c32c42fa386978f6a96c22095b063d
|
|
7
|
+
data.tar.gz: 770eb1238b3f73261a2130b2a900dfff0bd2770cf0f6b3b38ecee30cafdbfb09e59b87e4df4dd8aba722c5dbbd14bb1b35df509fdc92efbe081d8342ed7e435b
|
data/.rubocop.yml
CHANGED
data/lib/json_mend/parser.rb
CHANGED
|
@@ -70,7 +70,7 @@ module JsonMend
|
|
|
70
70
|
break
|
|
71
71
|
else
|
|
72
72
|
# Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
|
|
73
|
-
next if new_json.is_a?(String) && new_json.
|
|
73
|
+
next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
|
|
74
74
|
|
|
75
75
|
if both_hash?(json.last, new_json)
|
|
76
76
|
json[-1] = deep_merge_hashes(json.last, new_json)
|
|
@@ -265,20 +265,14 @@ module JsonMend
|
|
|
265
265
|
|
|
266
266
|
if value == :inferred_true
|
|
267
267
|
if %w[true false null].include?(key.downcase)
|
|
268
|
-
|
|
269
|
-
#
|
|
270
|
-
|
|
271
|
-
prev_byte
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
prev_byte.between?(97, 122) || # a-z
|
|
277
|
-
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
278
|
-
)
|
|
279
|
-
else
|
|
280
|
-
is_concatenated = false
|
|
281
|
-
end
|
|
268
|
+
prev_byte = @scanner.string.getbyte(pos_before_key - 1)
|
|
269
|
+
# Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
|
|
270
|
+
is_concatenated = prev_byte && (
|
|
271
|
+
prev_byte.between?(48, 57) || # 0-9
|
|
272
|
+
prev_byte.between?(65, 90) || # A-Z
|
|
273
|
+
prev_byte.between?(97, 122) || # a-z
|
|
274
|
+
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
275
|
+
)
|
|
282
276
|
|
|
283
277
|
return [nil, nil, false] unless is_concatenated
|
|
284
278
|
end
|
|
@@ -335,7 +329,7 @@ module JsonMend
|
|
|
335
329
|
@context.pop
|
|
336
330
|
|
|
337
331
|
# If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
|
|
338
|
-
# treat it as
|
|
332
|
+
# treat it as empty string for object values to be safe.
|
|
339
333
|
value == JSON_STOP_TOKEN ? '' : value
|
|
340
334
|
end
|
|
341
335
|
|
|
@@ -426,10 +420,10 @@ module JsonMend
|
|
|
426
420
|
# many common errors found in LLM-generated JSON, such as missing quotes,
|
|
427
421
|
# incorrect escape sequences, and ambiguous string terminators
|
|
428
422
|
def parse_string
|
|
429
|
-
char =
|
|
423
|
+
char = peek_char
|
|
430
424
|
|
|
431
425
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
|
432
|
-
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char
|
|
426
|
+
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char&.match?(/[\p{L}0-9$_-]/)
|
|
433
427
|
return '' if TERMINATORS_STRING_GUESSED.include?(char)
|
|
434
428
|
|
|
435
429
|
@scanner.getch
|
|
@@ -485,18 +479,6 @@ module JsonMend
|
|
|
485
479
|
|
|
486
480
|
# string helper methods
|
|
487
481
|
|
|
488
|
-
def prepare_string_parsing
|
|
489
|
-
char = peek_char
|
|
490
|
-
|
|
491
|
-
# Consume comments that appear before the string starts
|
|
492
|
-
while COMMENT_DELIMETERS.include?(char)
|
|
493
|
-
parse_comment
|
|
494
|
-
char = peek_char
|
|
495
|
-
end
|
|
496
|
-
|
|
497
|
-
char
|
|
498
|
-
end
|
|
499
|
-
|
|
500
482
|
def determine_delimiters(char:)
|
|
501
483
|
missing_quotes = false
|
|
502
484
|
lstring_delimiter = rstring_delimiter = '"'
|
|
@@ -581,7 +563,6 @@ module JsonMend
|
|
|
581
563
|
)
|
|
582
564
|
char = peek_char
|
|
583
565
|
unmatched_delimiter = false
|
|
584
|
-
safe_string_until = -1 # Fast-forward pointer to safely bypass O(N^2) lookaheads
|
|
585
566
|
# --- Main Parsing Loop ---
|
|
586
567
|
while !@scanner.eos? && char != rstring_delimiter
|
|
587
568
|
# Fast-path for unquoted keys (e.g. { key: val })
|
|
@@ -600,33 +581,30 @@ module JsonMend
|
|
|
600
581
|
missing_quotes:
|
|
601
582
|
)
|
|
602
583
|
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
if current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char) &&
|
|
606
|
-
(string_parts.empty? || string_parts.last != rstring_delimiter)
|
|
607
|
-
|
|
608
|
-
is_break = check_rstring_delimiter_missing(
|
|
609
|
-
string_parts:,
|
|
610
|
-
lstring_delimiter:,
|
|
611
|
-
rstring_delimiter:,
|
|
612
|
-
missing_quotes:
|
|
613
|
-
)
|
|
614
|
-
break if is_break
|
|
615
|
-
end
|
|
584
|
+
if current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char) &&
|
|
585
|
+
(string_parts.empty? || string_parts.last != rstring_delimiter)
|
|
616
586
|
|
|
617
|
-
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
587
|
+
is_break = check_rstring_delimiter_missing(
|
|
588
|
+
string_parts:,
|
|
589
|
+
lstring_delimiter:,
|
|
590
|
+
rstring_delimiter:,
|
|
591
|
+
missing_quotes:
|
|
592
|
+
)
|
|
593
|
+
break if is_break
|
|
594
|
+
end
|
|
622
595
|
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
596
|
+
if char == ']' && context_contain?(:array) && string_parts.last != rstring_delimiter
|
|
597
|
+
i = skip_to_character(rstring_delimiter)
|
|
598
|
+
# No delimiter found
|
|
599
|
+
break unless peek_char(i)
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
if current_context?(:object_value) && char == '}'
|
|
603
|
+
# We found the end of an object while parsing a value
|
|
604
|
+
# Check if the object is really over, to avoid doubling the closing brace
|
|
605
|
+
i = skip_whitespaces_at(start_idx: 1)
|
|
606
|
+
next_c = peek_char(i)
|
|
607
|
+
break unless next_c
|
|
630
608
|
end
|
|
631
609
|
|
|
632
610
|
string_parts << char
|
|
@@ -644,7 +622,7 @@ module JsonMend
|
|
|
644
622
|
end
|
|
645
623
|
|
|
646
624
|
# If we are in object key context and we find a colon, it could be a missing right quote
|
|
647
|
-
if
|
|
625
|
+
if char == ':' && !missing_quotes && current_context?(:object_key)
|
|
648
626
|
is_break = handle_missing_quotes_termination(
|
|
649
627
|
lstring_delimiter:,
|
|
650
628
|
rstring_delimiter:
|
|
@@ -670,8 +648,6 @@ module JsonMend
|
|
|
670
648
|
string_parts << char.to_s
|
|
671
649
|
@scanner.getch
|
|
672
650
|
char = peek_char
|
|
673
|
-
|
|
674
|
-
safe_string_until = @scanner.pos + skip_to_character(rstring_delimiter)
|
|
675
651
|
end
|
|
676
652
|
end
|
|
677
653
|
end
|
|
@@ -1257,10 +1233,12 @@ module JsonMend
|
|
|
1257
1233
|
# returns the index (offset) from the scanner
|
|
1258
1234
|
def skip_to_character(characters, start_idx: 0)
|
|
1259
1235
|
pattern = SKIP_CHARS_REGEX_CACHE.fetch(characters, nil)
|
|
1236
|
+
# :nocov:
|
|
1260
1237
|
if pattern.nil?
|
|
1261
1238
|
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
|
|
1262
1239
|
pattern = Regexp.new(chars.join('|'))
|
|
1263
1240
|
end
|
|
1241
|
+
# :nocov:
|
|
1264
1242
|
|
|
1265
1243
|
saved_pos = @scanner.pos
|
|
1266
1244
|
# Skip start_idx
|
|
@@ -1321,7 +1299,7 @@ module JsonMend
|
|
|
1321
1299
|
(matched.length - 1) + start_idx
|
|
1322
1300
|
else
|
|
1323
1301
|
# No non-space found.
|
|
1324
|
-
@scanner.
|
|
1302
|
+
(@scanner.string.length - @scanner.charpos) + start_idx
|
|
1325
1303
|
end
|
|
1326
1304
|
|
|
1327
1305
|
@scanner.pos = saved_pos
|
|
@@ -1352,12 +1330,12 @@ module JsonMend
|
|
|
1352
1330
|
# Handle the common 0-offset case
|
|
1353
1331
|
if offset.zero?
|
|
1354
1332
|
# peek(1) returns the next BYTE, not character
|
|
1355
|
-
|
|
1356
|
-
return nil
|
|
1333
|
+
byte = @scanner.string.getbyte(@scanner.pos)
|
|
1334
|
+
return nil unless byte
|
|
1357
1335
|
|
|
1358
1336
|
# Fast path: If it's a standard ASCII char (0-127), return it directly.
|
|
1359
|
-
#
|
|
1360
|
-
return
|
|
1337
|
+
# Enforcing UTF-8 ensures we don't mix US-ASCII and UTF-8 strings later.
|
|
1338
|
+
return byte.chr(Encoding::UTF_8) if byte < 128
|
|
1361
1339
|
|
|
1362
1340
|
# Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
|
|
1363
1341
|
return @scanner.check(/./m)
|
data/lib/json_mend/version.rb
CHANGED