json_mend 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab406c9f47e6e3f844c34b50f87396328ed8c050ab1173099fb2d0f07a86dfa7
4
- data.tar.gz: 650ed3e990b93dbb53e609c7f247ecf2a275a6c58eed7998c96a2896d8a72e36
3
+ metadata.gz: 85624e37a002e82e9edcb6d7fffe16799429e75389662d8059e9e89f8b6f14d8
4
+ data.tar.gz: 19309bc419b6f4481193e140eea31c2d24153ee87265cf2f4e2ecca1f0421bf4
5
5
  SHA512:
6
- metadata.gz: 851479fd3d7315fb2ffb241618567ca471bff5a58abf66b23a41e8cf313cc355178ca1c052dbbc56aa8def7749aaec3660428ec4e303e34b56d69a62d9326618
7
- data.tar.gz: da87b0f02f6538584cd2740e1cfd71cfb29781f44f10621a7c818c3c09f22f92754f109330333344098305956f9143e8cadedb27b422c3fa89cef5a572a2a23a
6
+ metadata.gz: 85aa783092d768f3ff9543e2de2b59b0bbfe7a285e7d5db91bfb3d5c12dab233d2735f6f605ede6cab701b5085bc740332e91d93fddb90f9361b1033e0edf57e
7
+ data.tar.gz: 75d3dc3b22f72748fe5b21f916ba930a04bd03aaea0d9aca8126ce38b2fdf9cfae9ad38c33f35a7c4e0c39ee7784b79df06d60772bcdd062e529372cc1bbd733
@@ -190,7 +190,7 @@ module JsonMend
190
190
  end
191
191
 
192
192
  # If we get an empty key and the next character is a closing brace, we're done.
193
- return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}')
193
+ return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}' || @scanner.pos == pos_before_key)
194
194
 
195
195
  # --- 2. Handle Duplicate Keys (Safer Method) ---
196
196
  # This is a critical repair for lists of objects missing a comma separator.
@@ -242,7 +242,7 @@ module JsonMend
242
242
  @context.pop
243
243
 
244
244
  # If the key is empty, consume any stray characters to prevent infinite loops.
245
- @scanner.getch if key.empty? && !@scanner.check(/[:}]/) && !@scanner.eos?
245
+ @scanner.getch if key.empty? && !@scanner.check(/[:{\[}\]]/) && !@scanner.eos?
246
246
 
247
247
  [key, false, is_bracketed] # Signal that a key was parsed.
248
248
  end
@@ -715,37 +715,52 @@ module JsonMend
715
715
  end
716
716
 
717
717
  def check_unmatched_in_array(rstring_delimiter:)
718
- # Heuristic: Check if this quote is a closer or internal.
719
- # 1. Find the NEXT delimiter (quote) index `j`.
720
- j = 1
718
+ saved_pos = @scanner.pos
719
+ @scanner.getch # Skip the current char (the potential closer)
720
+
721
721
  found_next = false
722
- while (c = peek_char(j))
723
- if c == rstring_delimiter
724
- # Check if escaped (count preceding backslashes)
725
- bk = 1
726
- slashes = 0
727
- while j - bk >= 0 && peek_char(j - bk) == '\\'
728
- slashes += 1
729
- bk += 1
730
- end
731
- if slashes.even?
732
- found_next = true
733
- break
734
- end
735
- end
722
+ j = 1
723
+
724
+ # Scan forward linearly
725
+ while (c = @scanner.getch)
736
726
  j += 1
727
+ next if c != rstring_delimiter
728
+
729
+ # Check if escaped (count preceding backslashes)
730
+ # We need to look behind from the current scanner position
731
+ bk = 1
732
+ slashes = 0
733
+ # Look back in the string buffer directly for speed
734
+ while (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) && char_code == 92 # 92 is backslash
735
+ slashes += 1
736
+ bk += 1
737
+ end
738
+
739
+ if slashes.even?
740
+ found_next = true
741
+ break
742
+ end
737
743
  end
738
744
 
739
- # 2. Check conditions to STOP (treat as closing quote):
740
- # a) Strictly whitespace between quotes: ["a" "b"]
741
- is_whitespace = (1...j).all? { |k| peek_char(k).match?(/\s/) }
745
+ # Reset position immediately after scanning
746
+ @scanner.pos = saved_pos
747
+
748
+ # Check conditions to STOP (treat as closing quote):
749
+ # a) Strictly whitespace between quotes
750
+ # We can check this by examining the substring we just scanned
751
+ substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
752
+ is_whitespace = substring_between&.match?(/\A\s*\z/)
742
753
 
743
- # b) Next quote is followed by a separator: ["val1" val2",]
754
+ # b) Next quote is followed by a separator
744
755
  is_next_closer = false
745
756
  if found_next
746
- k = j + 1
747
- k += 1 while peek_char(k)&.match?(/\s/) # skip whitespaces
748
- is_next_closer = TERMINATORS_VALUE.include?(peek_char(k))
757
+ # We need to peek ahead from where we found the next quote.
758
+ # Since we reset the scanner, we can use peek_char with the calculated offset `j`
759
+ # OR better, temporarily move scanner to `saved_pos + j`
760
+ @scanner.pos = saved_pos + j
761
+ @scanner.skip(/\s+/)
762
+ is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
763
+ @scanner.pos = saved_pos
749
764
  end
750
765
 
751
766
  return [true, true] unless is_whitespace || is_next_closer
@@ -1027,18 +1042,25 @@ module JsonMend
1027
1042
 
1028
1043
  # Check for a line comment `//...` or `#...`
1029
1044
  elsif @scanner.scan(%r{//|#})
1030
- # Determine valid line comment termination characters based on context.
1031
- termination_chars = ["\n", "\r"]
1032
- termination_chars << ']' if context_contain?(:array)
1033
- termination_chars << '}' if context_contain?(:object_value)
1034
- termination_chars << ':' if context_contain?(:object_key)
1035
-
1036
- # Create a regex that will scan until it hits one of the terminators.
1037
- # The terminators are positive lookaheads, so they aren't consumed by the scan.
1038
- terminator_regex = Regexp.new("(?=#{termination_chars.map { |c| Regexp.escape(c) }.join('|')})")
1039
-
1040
- # Scan until the end of the comment.
1041
- @scanner.scan_until(terminator_regex)
1045
+ in_array = context_contain?(:array)
1046
+ in_object = context_contain?(:object_value)
1047
+
1048
+ if context_contain?(:object_key)
1049
+ # If parsing a key, we must stop at ':' and structural closers
1050
+ @scanner.scan_until(/(?=[\n\r:}\]])/)
1051
+ elsif in_array && in_object
1052
+ # Nested ambiguity, stop at any closer
1053
+ @scanner.scan_until(/(?=[\n\r}\]])/)
1054
+ elsif in_array
1055
+ # Inside array, stop at ']'
1056
+ @scanner.scan_until(/(?=[\n\r\]])/)
1057
+ elsif in_object
1058
+ # Inside object value, stop at '}'
1059
+ @scanner.scan_until(/(?=[\n\r}])/)
1060
+ else
1061
+ # Top level or neutral, stop at newline
1062
+ @scanner.scan_until(/(?=[\n\r])/)
1063
+ end
1042
1064
  else
1043
1065
  # The character at the current position (likely '/') is not the start of a
1044
1066
  # valid comment. To prevent an infinite loop in the calling parser, we must
@@ -1053,7 +1075,13 @@ module JsonMend
1053
1075
  # It quickly iterates to find a character, handling escaped characters, and
1054
1076
  # returns the index (offset) from the scanner
1055
1077
  def skip_to_character(characters, start_idx: 0)
1056
- pattern = characters.is_a?(Array) ? Regexp.union(characters) : characters
1078
+ pattern = if characters.is_a?(Regexp)
1079
+ characters
1080
+ else
1081
+ # Escape if it's a string, join if it's an array
1082
+ chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
1083
+ Regexp.new(chars.join('|'))
1084
+ end
1057
1085
 
1058
1086
  saved_pos = @scanner.pos
1059
1087
  # Skip start_idx
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonMend
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.2'
5
5
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_mend
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
- - Alexey Vasiliev
7
+ - Oleksii Vasyliev
8
8
  bindir: exe
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
@@ -37,7 +37,10 @@ dependencies:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
39
  version: '0'
40
- description: Repair broken JSON
40
+ description: JsonMend is a robust Ruby gem designed to repair broken or malformed
41
+ JSON strings. It is specifically optimized to handle common errors found in JSON
42
+ generated by Large Language Models (LLMs), such as missing quotes, trailing commas,
43
+ unescaped characters, and stray comments
41
44
  email:
42
45
  - leopard.not.a@gmail.com
43
46
  executables: []