json_mend 0.3.4 → 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_mend/parser.rb +13 -30
- data/lib/json_mend/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7dcc378c148dc0514753b966693e0afe286bf145c283d069f01c0b9c74f59a75
|
|
4
|
+
data.tar.gz: 37fa52116d57bc80b168c20feba1fd7b33edab73d1f157759bc71f4a3362802b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c94ff3c9d0c2e3602b6a125e74fdc72896709b311221eeceeecd3fbf96b0aef8dc82edfed136512443b5ccc403d512d7beb3476c59732f1a04280a71bbf7de5b
|
|
7
|
+
data.tar.gz: f3603aa1d247686329ec0effe5d1819f4feec47cb43adbff520ed7695831cc30649fa99b4e21a51d4e460754af3d825480335cea10ffbb79feea773fabfa1cd9
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -44,6 +44,11 @@ module JsonMend
|
|
|
44
44
|
# Pre-compile regexes for performance
|
|
45
45
|
NUMBER_REGEX = /[#{Regexp.escape(NUMBER_CHARS.to_a.join)}]+/
|
|
46
46
|
NUMBER_NO_COMMA_REGEX = /[#{Regexp.escape(NUMBER_CHARS.dup.tap { |s| s.delete(',') }.to_a.join)}]+/
|
|
47
|
+
INVALID_NUMBER_TRAILERS_REGEX = /[#{Regexp.union(*INVALID_NUMBER_TRAILERS)}]+\z/
|
|
48
|
+
HEX_ESCAPE_REGEXES = {
|
|
49
|
+
'u' => /[0-9a-fA-F]{4}/,
|
|
50
|
+
'x' => /[0-9a-fA-F]{2}/
|
|
51
|
+
}.freeze
|
|
47
52
|
|
|
48
53
|
def initialize(json_string)
|
|
49
54
|
@scanner = StringScanner.new(json_string)
|
|
@@ -72,7 +77,7 @@ module JsonMend
|
|
|
72
77
|
# Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
|
|
73
78
|
next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
|
|
74
79
|
|
|
75
|
-
if
|
|
80
|
+
if json.last.is_a?(Hash) && new_json.is_a?(Hash)
|
|
76
81
|
json[-1] = deep_merge_hashes(json.last, new_json)
|
|
77
82
|
else
|
|
78
83
|
json << new_json
|
|
@@ -396,11 +401,8 @@ module JsonMend
|
|
|
396
401
|
arr << value
|
|
397
402
|
end
|
|
398
403
|
|
|
404
|
+
@scanner.skip(/[\s,]+/)
|
|
399
405
|
char = peek_char
|
|
400
|
-
while char && char != ']' && (char.match?(/\s/) || char == ',')
|
|
401
|
-
@scanner.getch
|
|
402
|
-
char = peek_char
|
|
403
|
-
end
|
|
404
406
|
end
|
|
405
407
|
|
|
406
408
|
# Handle a potentially missing closing bracket, a common LLM error.
|
|
@@ -919,13 +921,7 @@ module JsonMend
|
|
|
919
921
|
rstring_delimiter_missing = false
|
|
920
922
|
elsif peek_char(j)
|
|
921
923
|
# Check for an unmatched opening brace in string_parts
|
|
922
|
-
string_parts.
|
|
923
|
-
next unless c == '{'
|
|
924
|
-
|
|
925
|
-
# Ok then this is part of the string
|
|
926
|
-
rstring_delimiter_missing = false
|
|
927
|
-
break
|
|
928
|
-
end
|
|
924
|
+
rstring_delimiter_missing = false if string_parts.include?('{')
|
|
929
925
|
end
|
|
930
926
|
|
|
931
927
|
end
|
|
@@ -959,21 +955,12 @@ module JsonMend
|
|
|
959
955
|
entry_pos = @scanner.pos
|
|
960
956
|
@scanner.getch # consume 'u' or 'x'
|
|
961
957
|
|
|
962
|
-
|
|
963
|
-
hex_parts = []
|
|
964
|
-
|
|
965
|
-
# Use getch in loop to correctly extract chars (handling multibyte)
|
|
966
|
-
num_chars.times do
|
|
967
|
-
c = @scanner.getch
|
|
968
|
-
break unless c
|
|
969
|
-
|
|
970
|
-
hex_parts << c
|
|
971
|
-
end
|
|
958
|
+
hex_regex = HEX_ESCAPE_REGEXES.fetch(char)
|
|
972
959
|
|
|
973
960
|
# Validate valid hex digits
|
|
974
|
-
if
|
|
961
|
+
if (hex_str = @scanner.scan(hex_regex))
|
|
975
962
|
string_parts.pop
|
|
976
|
-
hex_val =
|
|
963
|
+
hex_val = hex_str.to_i(16)
|
|
977
964
|
|
|
978
965
|
if char == 'u' && hex_val.between?(0xD800, 0xDBFF)
|
|
979
966
|
# Handle high surrogate pair
|
|
@@ -1118,9 +1105,9 @@ module JsonMend
|
|
|
1118
1105
|
end
|
|
1119
1106
|
|
|
1120
1107
|
# Handle cases where the number ends with one or more invalid characters.
|
|
1121
|
-
if !scanned_str.empty? &&
|
|
1108
|
+
if !scanned_str.empty? && scanned_str.match?(INVALID_NUMBER_TRAILERS_REGEX)
|
|
1122
1109
|
# Do not rewind scanner, simply discard the invalid trailing chars (garbage)
|
|
1123
|
-
scanned_str
|
|
1110
|
+
scanned_str.sub!(INVALID_NUMBER_TRAILERS_REGEX, '')
|
|
1124
1111
|
end
|
|
1125
1112
|
|
|
1126
1113
|
# Reject non-numbers (e.g., stray periods "." or dashes "-" from LLM conversational text)
|
|
@@ -1306,10 +1293,6 @@ module JsonMend
|
|
|
1306
1293
|
res
|
|
1307
1294
|
end
|
|
1308
1295
|
|
|
1309
|
-
def both_hash?(obj1, obj2)
|
|
1310
|
-
obj1.is_a?(Hash) && obj2.is_a?(Hash)
|
|
1311
|
-
end
|
|
1312
|
-
|
|
1313
1296
|
# Skips whitespaces
|
|
1314
1297
|
def skip_whitespaces
|
|
1315
1298
|
@scanner.skip(/\s+/)
|
data/lib/json_mend/version.rb
CHANGED