json_mend 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_mend/parser.rb +67 -39
- data/lib/json_mend/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 85624e37a002e82e9edcb6d7fffe16799429e75389662d8059e9e89f8b6f14d8
|
|
4
|
+
data.tar.gz: 19309bc419b6f4481193e140eea31c2d24153ee87265cf2f4e2ecca1f0421bf4
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 85aa783092d768f3ff9543e2de2b59b0bbfe7a285e7d5db91bfb3d5c12dab233d2735f6f605ede6cab701b5085bc740332e91d93fddb90f9361b1033e0edf57e
|
|
7
|
+
data.tar.gz: 75d3dc3b22f72748fe5b21f916ba930a04bd03aaea0d9aca8126ce38b2fdf9cfae9ad38c33f35a7c4e0c39ee7784b79df06d60772bcdd062e529372cc1bbd733
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -190,7 +190,7 @@ module JsonMend
|
|
|
190
190
|
end
|
|
191
191
|
|
|
192
192
|
# If we get an empty key and the next character is a closing brace, we're done.
|
|
193
|
-
return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}')
|
|
193
|
+
return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}' || @scanner.pos == pos_before_key)
|
|
194
194
|
|
|
195
195
|
# --- 2. Handle Duplicate Keys (Safer Method) ---
|
|
196
196
|
# This is a critical repair for lists of objects missing a comma separator.
|
|
@@ -242,7 +242,7 @@ module JsonMend
|
|
|
242
242
|
@context.pop
|
|
243
243
|
|
|
244
244
|
# If the key is empty, consume any stray characters to prevent infinite loops.
|
|
245
|
-
@scanner.getch if key.empty? && !@scanner.check(/[:}]/) && !@scanner.eos?
|
|
245
|
+
@scanner.getch if key.empty? && !@scanner.check(/[:{\[}\]]/) && !@scanner.eos?
|
|
246
246
|
|
|
247
247
|
[key, false, is_bracketed] # Signal that a key was parsed.
|
|
248
248
|
end
|
|
@@ -715,37 +715,52 @@ module JsonMend
|
|
|
715
715
|
end
|
|
716
716
|
|
|
717
717
|
def check_unmatched_in_array(rstring_delimiter:)
|
|
718
|
-
|
|
719
|
-
#
|
|
720
|
-
|
|
718
|
+
saved_pos = @scanner.pos
|
|
719
|
+
@scanner.getch # Skip the current char (the potential closer)
|
|
720
|
+
|
|
721
721
|
found_next = false
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
slashes = 0
|
|
727
|
-
while j - bk >= 0 && peek_char(j - bk) == '\\'
|
|
728
|
-
slashes += 1
|
|
729
|
-
bk += 1
|
|
730
|
-
end
|
|
731
|
-
if slashes.even?
|
|
732
|
-
found_next = true
|
|
733
|
-
break
|
|
734
|
-
end
|
|
735
|
-
end
|
|
722
|
+
j = 1
|
|
723
|
+
|
|
724
|
+
# Scan forward linearly
|
|
725
|
+
while (c = @scanner.getch)
|
|
736
726
|
j += 1
|
|
727
|
+
next if c != rstring_delimiter
|
|
728
|
+
|
|
729
|
+
# Check if escaped (count preceding backslashes)
|
|
730
|
+
# We need to look behind from the current scanner position
|
|
731
|
+
bk = 1
|
|
732
|
+
slashes = 0
|
|
733
|
+
# Look back in the string buffer directly for speed
|
|
734
|
+
while (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) && char_code == 92 # 92 is backslash
|
|
735
|
+
slashes += 1
|
|
736
|
+
bk += 1
|
|
737
|
+
end
|
|
738
|
+
|
|
739
|
+
if slashes.even?
|
|
740
|
+
found_next = true
|
|
741
|
+
break
|
|
742
|
+
end
|
|
737
743
|
end
|
|
738
744
|
|
|
739
|
-
#
|
|
740
|
-
|
|
741
|
-
|
|
745
|
+
# Reset position immediately after scanning
|
|
746
|
+
@scanner.pos = saved_pos
|
|
747
|
+
|
|
748
|
+
# Check conditions to STOP (treat as closing quote):
|
|
749
|
+
# a) Strictly whitespace between quotes
|
|
750
|
+
# We can check this by examining the substring we just scanned
|
|
751
|
+
substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
|
|
752
|
+
is_whitespace = substring_between&.match?(/\A\s*\z/)
|
|
742
753
|
|
|
743
|
-
#
|
|
754
|
+
# b) Next quote is followed by a separator
|
|
744
755
|
is_next_closer = false
|
|
745
756
|
if found_next
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
757
|
+
# We need to peek ahead from where we found the next quote.
|
|
758
|
+
# Since we reset the scanner, we can use peek_char with the calculated offset `j`
|
|
759
|
+
# OR better, temporarily move scanner to `saved_pos + j`
|
|
760
|
+
@scanner.pos = saved_pos + j
|
|
761
|
+
@scanner.skip(/\s+/)
|
|
762
|
+
is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
|
|
763
|
+
@scanner.pos = saved_pos
|
|
749
764
|
end
|
|
750
765
|
|
|
751
766
|
return [true, true] unless is_whitespace || is_next_closer
|
|
@@ -1027,18 +1042,25 @@ module JsonMend
|
|
|
1027
1042
|
|
|
1028
1043
|
# Check for a line comment `//...` or `#...`
|
|
1029
1044
|
elsif @scanner.scan(%r{//|#})
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1045
|
+
in_array = context_contain?(:array)
|
|
1046
|
+
in_object = context_contain?(:object_value)
|
|
1047
|
+
|
|
1048
|
+
if context_contain?(:object_key)
|
|
1049
|
+
# If parsing a key, we must stop at ':' and structural closers
|
|
1050
|
+
@scanner.scan_until(/(?=[\n\r:}\]])/)
|
|
1051
|
+
elsif in_array && in_object
|
|
1052
|
+
# Nested ambiguity, stop at any closer
|
|
1053
|
+
@scanner.scan_until(/(?=[\n\r}\]])/)
|
|
1054
|
+
elsif in_array
|
|
1055
|
+
# Inside array, stop at ']'
|
|
1056
|
+
@scanner.scan_until(/(?=[\n\r\]])/)
|
|
1057
|
+
elsif in_object
|
|
1058
|
+
# Inside object value, stop at '}'
|
|
1059
|
+
@scanner.scan_until(/(?=[\n\r}])/)
|
|
1060
|
+
else
|
|
1061
|
+
# Top level or neutral, stop at newline
|
|
1062
|
+
@scanner.scan_until(/(?=[\n\r])/)
|
|
1063
|
+
end
|
|
1042
1064
|
else
|
|
1043
1065
|
# The character at the current position (likely '/') is not the start of a
|
|
1044
1066
|
# valid comment. To prevent an infinite loop in the calling parser, we must
|
|
@@ -1053,7 +1075,13 @@ module JsonMend
|
|
|
1053
1075
|
# It quickly iterates to find a character, handling escaped characters, and
|
|
1054
1076
|
# returns the index (offset) from the scanner
|
|
1055
1077
|
def skip_to_character(characters, start_idx: 0)
|
|
1056
|
-
pattern = characters.is_a?(
|
|
1078
|
+
pattern = if characters.is_a?(Regexp)
|
|
1079
|
+
characters
|
|
1080
|
+
else
|
|
1081
|
+
# Escape if it's a string, join if it's an array
|
|
1082
|
+
chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
|
|
1083
|
+
Regexp.new(chars.join('|'))
|
|
1084
|
+
end
|
|
1057
1085
|
|
|
1058
1086
|
saved_pos = @scanner.pos
|
|
1059
1087
|
# Skip start_idx
|
data/lib/json_mend/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: json_mend
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
|
-
-
|
|
7
|
+
- Oleksii Vasyliev
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
|
@@ -37,7 +37,10 @@ dependencies:
|
|
|
37
37
|
- - ">="
|
|
38
38
|
- !ruby/object:Gem::Version
|
|
39
39
|
version: '0'
|
|
40
|
-
description:
|
|
40
|
+
description: JsonMend is a robust Ruby gem designed to repair broken or malformed
|
|
41
|
+
JSON strings. It is specifically optimized to handle common errors found in JSON
|
|
42
|
+
generated by Large Language Models (LLMs), such as missing quotes, trailing commas,
|
|
43
|
+
unescaped characters, and stray comments
|
|
41
44
|
email:
|
|
42
45
|
- leopard.not.a@gmail.com
|
|
43
46
|
executables: []
|