json_mend 0.3.3 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_mend/parser.rb +20 -47
- data/lib/json_mend/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7dcc378c148dc0514753b966693e0afe286bf145c283d069f01c0b9c74f59a75
|
|
4
|
+
data.tar.gz: 37fa52116d57bc80b168c20feba1fd7b33edab73d1f157759bc71f4a3362802b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c94ff3c9d0c2e3602b6a125e74fdc72896709b311221eeceeecd3fbf96b0aef8dc82edfed136512443b5ccc403d512d7beb3476c59732f1a04280a71bbf7de5b
|
|
7
|
+
data.tar.gz: f3603aa1d247686329ec0effe5d1819f4feec47cb43adbff520ed7695831cc30649fa99b4e21a51d4e460754af3d825480335cea10ffbb79feea773fabfa1cd9
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -44,6 +44,11 @@ module JsonMend
|
|
|
44
44
|
# Pre-compile regexes for performance
|
|
45
45
|
NUMBER_REGEX = /[#{Regexp.escape(NUMBER_CHARS.to_a.join)}]+/
|
|
46
46
|
NUMBER_NO_COMMA_REGEX = /[#{Regexp.escape(NUMBER_CHARS.dup.tap { |s| s.delete(',') }.to_a.join)}]+/
|
|
47
|
+
INVALID_NUMBER_TRAILERS_REGEX = /[#{Regexp.union(*INVALID_NUMBER_TRAILERS)}]+\z/
|
|
48
|
+
HEX_ESCAPE_REGEXES = {
|
|
49
|
+
'u' => /[0-9a-fA-F]{4}/,
|
|
50
|
+
'x' => /[0-9a-fA-F]{2}/
|
|
51
|
+
}.freeze
|
|
47
52
|
|
|
48
53
|
def initialize(json_string)
|
|
49
54
|
@scanner = StringScanner.new(json_string)
|
|
@@ -72,7 +77,7 @@ module JsonMend
|
|
|
72
77
|
# Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
|
|
73
78
|
next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
|
|
74
79
|
|
|
75
|
-
if
|
|
80
|
+
if json.last.is_a?(Hash) && new_json.is_a?(Hash)
|
|
76
81
|
json[-1] = deep_merge_hashes(json.last, new_json)
|
|
77
82
|
else
|
|
78
83
|
json << new_json
|
|
@@ -390,20 +395,14 @@ module JsonMend
|
|
|
390
395
|
# Handle JSON_STOP_TOKEN from parse_json (EOS or consumed terminator)
|
|
391
396
|
if value == JSON_STOP_TOKEN
|
|
392
397
|
# Do nothing, just skipped garbage
|
|
393
|
-
elsif strictly_empty?(value)
|
|
394
|
-
# Only consume if we didn't just hit a terminator that parse_json successfully respected
|
|
395
|
-
@scanner.getch unless value.nil? && TERMINATORS_ARRAY.include?(peek_char)
|
|
396
398
|
elsif value == '...' && @scanner.string.getbyte(@scanner.pos - 1) == 46
|
|
397
399
|
# just skip if the previous byte was a dot (46)
|
|
398
400
|
else
|
|
399
401
|
arr << value
|
|
400
402
|
end
|
|
401
403
|
|
|
404
|
+
@scanner.skip(/[\s,]+/)
|
|
402
405
|
char = peek_char
|
|
403
|
-
while char && char != ']' && (char.match?(/\s/) || char == ',')
|
|
404
|
-
@scanner.getch
|
|
405
|
-
char = peek_char
|
|
406
|
-
end
|
|
407
406
|
end
|
|
408
407
|
|
|
409
408
|
# Handle a potentially missing closing bracket, a common LLM error.
|
|
@@ -922,13 +921,7 @@ module JsonMend
|
|
|
922
921
|
rstring_delimiter_missing = false
|
|
923
922
|
elsif peek_char(j)
|
|
924
923
|
# Check for an unmatched opening brace in string_parts
|
|
925
|
-
string_parts.
|
|
926
|
-
next unless c == '{'
|
|
927
|
-
|
|
928
|
-
# Ok then this is part of the string
|
|
929
|
-
rstring_delimiter_missing = false
|
|
930
|
-
break
|
|
931
|
-
end
|
|
924
|
+
rstring_delimiter_missing = false if string_parts.include?('{')
|
|
932
925
|
end
|
|
933
926
|
|
|
934
927
|
end
|
|
@@ -962,21 +955,12 @@ module JsonMend
|
|
|
962
955
|
entry_pos = @scanner.pos
|
|
963
956
|
@scanner.getch # consume 'u' or 'x'
|
|
964
957
|
|
|
965
|
-
|
|
966
|
-
hex_parts = []
|
|
967
|
-
|
|
968
|
-
# Use getch in loop to correctly extract chars (handling multibyte)
|
|
969
|
-
num_chars.times do
|
|
970
|
-
c = @scanner.getch
|
|
971
|
-
break unless c
|
|
972
|
-
|
|
973
|
-
hex_parts << c
|
|
974
|
-
end
|
|
958
|
+
hex_regex = HEX_ESCAPE_REGEXES.fetch(char)
|
|
975
959
|
|
|
976
960
|
# Validate valid hex digits
|
|
977
|
-
if
|
|
961
|
+
if (hex_str = @scanner.scan(hex_regex))
|
|
978
962
|
string_parts.pop
|
|
979
|
-
hex_val =
|
|
963
|
+
hex_val = hex_str.to_i(16)
|
|
980
964
|
|
|
981
965
|
if char == 'u' && hex_val.between?(0xD800, 0xDBFF)
|
|
982
966
|
# Handle high surrogate pair
|
|
@@ -1111,18 +1095,21 @@ module JsonMend
|
|
|
1111
1095
|
# Save the original length so we can safely roll back if it's completely invalid
|
|
1112
1096
|
original_length = scanned_str.bytesize
|
|
1113
1097
|
|
|
1114
|
-
# Handle cases where the number ends with an invalid character.
|
|
1115
|
-
if !scanned_str.empty? && INVALID_NUMBER_TRAILERS.include?(scanned_str[-1])
|
|
1116
|
-
# Do not rewind scanner, simply discard the invalid trailing char (garbage)
|
|
1117
|
-
scanned_str = scanned_str[0...-1]
|
|
1118
1098
|
# Handle cases where what looked like a number is actually a string.
|
|
1119
|
-
# e.g. "123-abc"
|
|
1120
|
-
|
|
1099
|
+
# e.g. "123-abc" or "-Infinity". We exclude strings ending in a comma
|
|
1100
|
+
# to preserve comma recovery logic (e.g. `105,next_key`).
|
|
1101
|
+
if peek_char&.match?(/\p{L}/) && !scanned_str.end_with?(',')
|
|
1121
1102
|
# Roll back the entire scan and re-parse as a string.
|
|
1122
1103
|
@scanner.pos -= original_length
|
|
1123
1104
|
return parse_string
|
|
1124
1105
|
end
|
|
1125
1106
|
|
|
1107
|
+
# Handle cases where the number ends with one or more invalid characters.
|
|
1108
|
+
if !scanned_str.empty? && scanned_str.match?(INVALID_NUMBER_TRAILERS_REGEX)
|
|
1109
|
+
# Do not rewind scanner, simply discard the invalid trailing chars (garbage)
|
|
1110
|
+
scanned_str.sub!(INVALID_NUMBER_TRAILERS_REGEX, '')
|
|
1111
|
+
end
|
|
1112
|
+
|
|
1126
1113
|
# Reject non-numbers (e.g., stray periods "." or dashes "-" from LLM conversational text)
|
|
1127
1114
|
unless scanned_str.match?(/\d/)
|
|
1128
1115
|
@scanner.pos -= original_length
|
|
@@ -1306,20 +1293,6 @@ module JsonMend
|
|
|
1306
1293
|
res
|
|
1307
1294
|
end
|
|
1308
1295
|
|
|
1309
|
-
def both_hash?(obj1, obj2)
|
|
1310
|
-
obj1.is_a?(Hash) && obj2.is_a?(Hash)
|
|
1311
|
-
end
|
|
1312
|
-
|
|
1313
|
-
def strictly_empty?(value)
|
|
1314
|
-
# Check if the value is a container AND if it's empty.
|
|
1315
|
-
case value
|
|
1316
|
-
when String, Array, Hash, Set
|
|
1317
|
-
value.empty?
|
|
1318
|
-
else
|
|
1319
|
-
false
|
|
1320
|
-
end
|
|
1321
|
-
end
|
|
1322
|
-
|
|
1323
1296
|
# Skips whitespaces
|
|
1324
1297
|
def skip_whitespaces
|
|
1325
1298
|
@scanner.skip(/\s+/)
|
data/lib/json_mend/version.rb
CHANGED