json_mend 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +5 -5
- data/lib/json_mend/parser.rb +103 -49
- data/lib/json_mend/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 96e37cada4ab9945473f6a49b1d57a24c7822dceb0e6402847d129827686202a
|
|
4
|
+
data.tar.gz: b497489826f674239f601203e237ee7358c7305475a2f1a43d37e465d04ce875
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 641ef79051d205ed61c16b9d6deb5f755d41bfb74f3958ac5b1910e2770ac3df2ccdf7e971ba6d44222e7a71de51078b3964da957b44bf70898db13a798ad239
|
|
7
|
+
data.tar.gz: 605ec12dc272c8b1ce7526c8dcf53748e148a58957e6eca9ebf13133ac7f56db46a56161db03227790ddf5c064a582440a3dc02d33bb117f808e536d910f55fe
|
data/.rubocop.yml
CHANGED
|
@@ -7,19 +7,19 @@ AllCops:
|
|
|
7
7
|
SuggestExtensions: false
|
|
8
8
|
|
|
9
9
|
Metrics/AbcSize:
|
|
10
|
-
Max:
|
|
10
|
+
Max: 65
|
|
11
11
|
|
|
12
12
|
Metrics/ClassLength:
|
|
13
|
-
Max:
|
|
13
|
+
Max: 820
|
|
14
14
|
|
|
15
15
|
Metrics/CyclomaticComplexity:
|
|
16
|
-
Max:
|
|
16
|
+
Max: 35
|
|
17
17
|
|
|
18
18
|
Metrics/MethodLength:
|
|
19
|
-
Max:
|
|
19
|
+
Max: 80
|
|
20
20
|
|
|
21
21
|
Metrics/PerceivedComplexity:
|
|
22
|
-
Max:
|
|
22
|
+
Max: 35
|
|
23
23
|
|
|
24
24
|
Metrics/BlockNesting:
|
|
25
25
|
Max: 5
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -190,7 +190,7 @@ module JsonMend
|
|
|
190
190
|
end
|
|
191
191
|
|
|
192
192
|
# If we get an empty key and the next character is a closing brace, we're done.
|
|
193
|
-
return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}')
|
|
193
|
+
return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}' || @scanner.pos == pos_before_key)
|
|
194
194
|
|
|
195
195
|
# --- 2. Handle Duplicate Keys (Safer Method) ---
|
|
196
196
|
# This is a critical repair for lists of objects missing a comma separator.
|
|
@@ -222,8 +222,10 @@ module JsonMend
|
|
|
222
222
|
# Parses the key of an object, including the special logic for merging dangling arrays.
|
|
223
223
|
# Returns [key, was_array_merged_flag]
|
|
224
224
|
def parse_object_key(object)
|
|
225
|
+
char = peek_char
|
|
226
|
+
|
|
225
227
|
# First, check for and handle the dangling array merge logic.
|
|
226
|
-
if try_to_merge_dangling_array(object)
|
|
228
|
+
if char == '[' && try_to_merge_dangling_array(object)
|
|
227
229
|
return [nil, true, false] # Signal that an array was merged.
|
|
228
230
|
end
|
|
229
231
|
|
|
@@ -231,7 +233,7 @@ module JsonMend
|
|
|
231
233
|
@context.push(:object_key)
|
|
232
234
|
is_bracketed = false
|
|
233
235
|
|
|
234
|
-
if
|
|
236
|
+
if char == '['
|
|
235
237
|
@scanner.getch # Consume '['
|
|
236
238
|
arr = parse_array
|
|
237
239
|
key = arr.first.to_s
|
|
@@ -242,7 +244,7 @@ module JsonMend
|
|
|
242
244
|
@context.pop
|
|
243
245
|
|
|
244
246
|
# If the key is empty, consume any stray characters to prevent infinite loops.
|
|
245
|
-
@scanner.getch if key.empty? && !@scanner.check(/[:}]/) && !@scanner.eos?
|
|
247
|
+
@scanner.getch if key.empty? && !@scanner.check(/[:{\[}\]]/) && !@scanner.eos?
|
|
246
248
|
|
|
247
249
|
[key, false, is_bracketed] # Signal that a key was parsed.
|
|
248
250
|
end
|
|
@@ -355,7 +357,7 @@ module JsonMend
|
|
|
355
357
|
char = prepare_string_parsing
|
|
356
358
|
|
|
357
359
|
# A valid string can only start with a valid quote or, in our case, with a literal
|
|
358
|
-
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9]/)
|
|
360
|
+
while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9$_-]/)
|
|
359
361
|
return '' if TERMINATORS_STRING_GUESSED.include?(char)
|
|
360
362
|
|
|
361
363
|
@scanner.getch
|
|
@@ -434,7 +436,7 @@ module JsonMend
|
|
|
434
436
|
when '“'
|
|
435
437
|
lstring_delimiter = '“'
|
|
436
438
|
rstring_delimiter = '”'
|
|
437
|
-
when /[\p{L}0-9]/
|
|
439
|
+
when /[\p{L}0-9$_-]/
|
|
438
440
|
# Could be a boolean/null, but not if it's an object key.
|
|
439
441
|
if BOOLEAN_OR_NULL_CHARS.include?(char.downcase) && !current_context?(:object_key)
|
|
440
442
|
# parse_literal is non-destructive if it fails to match.
|
|
@@ -509,6 +511,17 @@ module JsonMend
|
|
|
509
511
|
unmatched_delimiter = false
|
|
510
512
|
# --- Main Parsing Loop ---
|
|
511
513
|
while !@scanner.eos? && char != rstring_delimiter
|
|
514
|
+
# Fast-path for unquoted keys (e.g. { key: val })
|
|
515
|
+
# consumes a chunk of valid identifier characters at once
|
|
516
|
+
if missing_quotes && current_context?(:object_key)
|
|
517
|
+
chunk = @scanner.scan(/[a-zA-Z0-9_$-]+/)
|
|
518
|
+
if chunk
|
|
519
|
+
string_parts << chunk
|
|
520
|
+
char = peek_char
|
|
521
|
+
next
|
|
522
|
+
end
|
|
523
|
+
end
|
|
524
|
+
|
|
512
525
|
break if context_termination_reached?(
|
|
513
526
|
char:,
|
|
514
527
|
missing_quotes:
|
|
@@ -715,37 +728,52 @@ module JsonMend
|
|
|
715
728
|
end
|
|
716
729
|
|
|
717
730
|
def check_unmatched_in_array(rstring_delimiter:)
|
|
718
|
-
|
|
719
|
-
#
|
|
720
|
-
|
|
731
|
+
saved_pos = @scanner.pos
|
|
732
|
+
@scanner.getch # Skip the current char (the potential closer)
|
|
733
|
+
|
|
721
734
|
found_next = false
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
slashes = 0
|
|
727
|
-
while j - bk >= 0 && peek_char(j - bk) == '\\'
|
|
728
|
-
slashes += 1
|
|
729
|
-
bk += 1
|
|
730
|
-
end
|
|
731
|
-
if slashes.even?
|
|
732
|
-
found_next = true
|
|
733
|
-
break
|
|
734
|
-
end
|
|
735
|
-
end
|
|
735
|
+
j = 1
|
|
736
|
+
|
|
737
|
+
# Scan forward linearly
|
|
738
|
+
while (c = @scanner.getch)
|
|
736
739
|
j += 1
|
|
740
|
+
next if c != rstring_delimiter
|
|
741
|
+
|
|
742
|
+
# Check if escaped (count preceding backslashes)
|
|
743
|
+
# We need to look behind from the current scanner position
|
|
744
|
+
bk = 1
|
|
745
|
+
slashes = 0
|
|
746
|
+
# Look back in the string buffer directly for speed
|
|
747
|
+
while (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) && char_code == 92 # 92 is backslash
|
|
748
|
+
slashes += 1
|
|
749
|
+
bk += 1
|
|
750
|
+
end
|
|
751
|
+
|
|
752
|
+
if slashes.even?
|
|
753
|
+
found_next = true
|
|
754
|
+
break
|
|
755
|
+
end
|
|
737
756
|
end
|
|
738
757
|
|
|
739
|
-
#
|
|
740
|
-
|
|
741
|
-
|
|
758
|
+
# Reset position immediately after scanning
|
|
759
|
+
@scanner.pos = saved_pos
|
|
760
|
+
|
|
761
|
+
# Check conditions to STOP (treat as closing quote):
|
|
762
|
+
# a) Strictly whitespace between quotes
|
|
763
|
+
# We can check this by examining the substring we just scanned
|
|
764
|
+
substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
|
|
765
|
+
is_whitespace = substring_between&.match?(/\A\s*\z/)
|
|
742
766
|
|
|
743
|
-
#
|
|
767
|
+
# b) Next quote is followed by a separator
|
|
744
768
|
is_next_closer = false
|
|
745
769
|
if found_next
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
770
|
+
# We need to peek ahead from where we found the next quote.
|
|
771
|
+
# Since we reset the scanner, we can use peek_char with the calculated offset `j`
|
|
772
|
+
# OR better, temporarily move scanner to `saved_pos + j`
|
|
773
|
+
@scanner.pos = saved_pos + j
|
|
774
|
+
@scanner.skip(/\s+/)
|
|
775
|
+
is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
|
|
776
|
+
@scanner.pos = saved_pos
|
|
749
777
|
end
|
|
750
778
|
|
|
751
779
|
return [true, true] unless is_whitespace || is_next_closer
|
|
@@ -1027,18 +1055,25 @@ module JsonMend
|
|
|
1027
1055
|
|
|
1028
1056
|
# Check for a line comment `//...` or `#...`
|
|
1029
1057
|
elsif @scanner.scan(%r{//|#})
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1058
|
+
in_array = context_contain?(:array)
|
|
1059
|
+
in_object = context_contain?(:object_value)
|
|
1060
|
+
|
|
1061
|
+
if context_contain?(:object_key)
|
|
1062
|
+
# If parsing a key, we must stop at ':' and structural closers
|
|
1063
|
+
@scanner.scan_until(/(?=[\n\r:}\]])/)
|
|
1064
|
+
elsif in_array && in_object
|
|
1065
|
+
# Nested ambiguity, stop at any closer
|
|
1066
|
+
@scanner.scan_until(/(?=[\n\r}\]])/)
|
|
1067
|
+
elsif in_array
|
|
1068
|
+
# Inside array, stop at ']'
|
|
1069
|
+
@scanner.scan_until(/(?=[\n\r\]])/)
|
|
1070
|
+
elsif in_object
|
|
1071
|
+
# Inside object value, stop at '}'
|
|
1072
|
+
@scanner.scan_until(/(?=[\n\r}])/)
|
|
1073
|
+
else
|
|
1074
|
+
# Top level or neutral, stop at newline
|
|
1075
|
+
@scanner.scan_until(/(?=[\n\r])/)
|
|
1076
|
+
end
|
|
1042
1077
|
else
|
|
1043
1078
|
# The character at the current position (likely '/') is not the start of a
|
|
1044
1079
|
# valid comment. To prevent an infinite loop in the calling parser, we must
|
|
@@ -1053,7 +1088,13 @@ module JsonMend
|
|
|
1053
1088
|
# It quickly iterates to find a character, handling escaped characters, and
|
|
1054
1089
|
# returns the index (offset) from the scanner
|
|
1055
1090
|
def skip_to_character(characters, start_idx: 0)
|
|
1056
|
-
pattern = characters.is_a?(
|
|
1091
|
+
pattern = if characters.is_a?(Regexp)
|
|
1092
|
+
characters
|
|
1093
|
+
else
|
|
1094
|
+
# Escape if it's a string, join if it's an array
|
|
1095
|
+
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
|
|
1096
|
+
Regexp.new(chars.join('|'))
|
|
1097
|
+
end
|
|
1057
1098
|
|
|
1058
1099
|
saved_pos = @scanner.pos
|
|
1059
1100
|
# Skip start_idx
|
|
@@ -1142,16 +1183,29 @@ module JsonMend
|
|
|
1142
1183
|
|
|
1143
1184
|
# Peeks the next character without advancing the scanner
|
|
1144
1185
|
def peek_char(offset = 0)
|
|
1145
|
-
|
|
1186
|
+
# Handle the common 0-offset case
|
|
1187
|
+
if offset.zero?
|
|
1188
|
+
# peek(1) returns the next BYTE, not character
|
|
1189
|
+
byte_str = @scanner.peek(1)
|
|
1190
|
+
return nil if byte_str.empty?
|
|
1191
|
+
|
|
1192
|
+
# Fast path: If it's a standard ASCII char (0-127), return it directly.
|
|
1193
|
+
# This avoids the regex overhead for standard JSON characters ({, [, ", etc).
|
|
1194
|
+
return byte_str if byte_str.getbyte(0) < 128
|
|
1195
|
+
|
|
1196
|
+
# Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
|
|
1197
|
+
return @scanner.check(/./m)
|
|
1198
|
+
end
|
|
1146
1199
|
|
|
1200
|
+
# For offsets > 0, we must scan to skip correctly (as characters can be variable width)
|
|
1147
1201
|
saved_pos = @scanner.pos
|
|
1148
|
-
|
|
1202
|
+
res = nil
|
|
1149
1203
|
(offset + 1).times do
|
|
1150
|
-
|
|
1151
|
-
break if
|
|
1204
|
+
res = @scanner.getch
|
|
1205
|
+
break if res.nil?
|
|
1152
1206
|
end
|
|
1153
1207
|
@scanner.pos = saved_pos
|
|
1154
|
-
|
|
1208
|
+
res
|
|
1155
1209
|
end
|
|
1156
1210
|
|
|
1157
1211
|
def current_context?(value)
|
|
@@ -1164,7 +1218,7 @@ module JsonMend
|
|
|
1164
1218
|
|
|
1165
1219
|
# Checks if the character signifies the start of a string or literal
|
|
1166
1220
|
def string_start?(char)
|
|
1167
|
-
STRING_DELIMITERS.include?(char) || char&.match?(
|
|
1221
|
+
STRING_DELIMITERS.include?(char) || char&.match?(/[\p{L}$_]/)
|
|
1168
1222
|
end
|
|
1169
1223
|
|
|
1170
1224
|
# Checks if the character signifies the start of a number
|
data/lib/json_mend/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json_mend
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
-
|
|
7
|
+
- Oleksii Vasyliev
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
@@ -37,7 +37,10 @@ dependencies:
|
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: '0'
|
|
40
|
-
description:
|
|
40
|
+
description: JsonMend is a robust Ruby gem designed to repair broken or malformed
|
|
41
|
+
JSON strings. It is specifically optimized to handle common errors found in JSON
|
|
42
|
+
generated by Large Language Models (LLMs), such as missing quotes, trailing commas,
|
|
43
|
+
unescaped characters, and stray comments
|
|
41
44
|
email:
|
|
42
45
|
- leopard.not.a@gmail.com
|
|
43
46
|
executables: []
|