json_mend 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ab406c9f47e6e3f844c34b50f87396328ed8c050ab1173099fb2d0f07a86dfa7
4
- data.tar.gz: 650ed3e990b93dbb53e609c7f247ecf2a275a6c58eed7998c96a2896d8a72e36
3
+ metadata.gz: 96e37cada4ab9945473f6a49b1d57a24c7822dceb0e6402847d129827686202a
4
+ data.tar.gz: b497489826f674239f601203e237ee7358c7305475a2f1a43d37e465d04ce875
5
5
  SHA512:
6
- metadata.gz: 851479fd3d7315fb2ffb241618567ca471bff5a58abf66b23a41e8cf313cc355178ca1c052dbbc56aa8def7749aaec3660428ec4e303e34b56d69a62d9326618
7
- data.tar.gz: da87b0f02f6538584cd2740e1cfd71cfb29781f44f10621a7c818c3c09f22f92754f109330333344098305956f9143e8cadedb27b422c3fa89cef5a572a2a23a
6
+ metadata.gz: 641ef79051d205ed61c16b9d6deb5f755d41bfb74f3958ac5b1910e2770ac3df2ccdf7e971ba6d44222e7a71de51078b3964da957b44bf70898db13a798ad239
7
+ data.tar.gz: 605ec12dc272c8b1ce7526c8dcf53748e148a58957e6eca9ebf13133ac7f56db46a56161db03227790ddf5c064a582440a3dc02d33bb117f808e536d910f55fe
data/.rubocop.yml CHANGED
@@ -7,19 +7,19 @@ AllCops:
7
7
  SuggestExtensions: false
8
8
 
9
9
  Metrics/AbcSize:
10
- Max: 60
10
+ Max: 65
11
11
 
12
12
  Metrics/ClassLength:
13
- Max: 800
13
+ Max: 820
14
14
 
15
15
  Metrics/CyclomaticComplexity:
16
- Max: 30
16
+ Max: 35
17
17
 
18
18
  Metrics/MethodLength:
19
- Max: 70
19
+ Max: 80
20
20
 
21
21
  Metrics/PerceivedComplexity:
22
- Max: 32
22
+ Max: 35
23
23
 
24
24
  Metrics/BlockNesting:
25
25
  Max: 5
@@ -190,7 +190,7 @@ module JsonMend
190
190
  end
191
191
 
192
192
  # If we get an empty key and the next character is a closing brace, we're done.
193
- return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}')
193
+ return [nil, nil, false] if key.empty? && (peek_char.nil? || peek_char == '}' || @scanner.pos == pos_before_key)
194
194
 
195
195
  # --- 2. Handle Duplicate Keys (Safer Method) ---
196
196
  # This is a critical repair for lists of objects missing a comma separator.
@@ -222,8 +222,10 @@ module JsonMend
222
222
  # Parses the key of an object, including the special logic for merging dangling arrays.
223
223
  # Returns [key, was_array_merged_flag]
224
224
  def parse_object_key(object)
225
+ char = peek_char
226
+
225
227
  # First, check for and handle the dangling array merge logic.
226
- if try_to_merge_dangling_array(object)
228
+ if char == '[' && try_to_merge_dangling_array(object)
227
229
  return [nil, true, false] # Signal that an array was merged.
228
230
  end
229
231
 
@@ -231,7 +233,7 @@ module JsonMend
231
233
  @context.push(:object_key)
232
234
  is_bracketed = false
233
235
 
234
- if peek_char == '['
236
+ if char == '['
235
237
  @scanner.getch # Consume '['
236
238
  arr = parse_array
237
239
  key = arr.first.to_s
@@ -242,7 +244,7 @@ module JsonMend
242
244
  @context.pop
243
245
 
244
246
  # If the key is empty, consume any stray characters to prevent infinite loops.
245
- @scanner.getch if key.empty? && !@scanner.check(/[:}]/) && !@scanner.eos?
247
+ @scanner.getch if key.empty? && !@scanner.check(/[:{\[}\]]/) && !@scanner.eos?
246
248
 
247
249
  [key, false, is_bracketed] # Signal that a key was parsed.
248
250
  end
@@ -355,7 +357,7 @@ module JsonMend
355
357
  char = prepare_string_parsing
356
358
 
357
359
  # A valid string can only start with a valid quote or, in our case, with a literal
358
- while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9]/)
360
+ while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9$_-]/)
359
361
  return '' if TERMINATORS_STRING_GUESSED.include?(char)
360
362
 
361
363
  @scanner.getch
@@ -434,7 +436,7 @@ module JsonMend
434
436
  when '“'
435
437
  lstring_delimiter = '“'
436
438
  rstring_delimiter = '”'
437
- when /[\p{L}0-9]/
439
+ when /[\p{L}0-9$_-]/
438
440
  # Could be a boolean/null, but not if it's an object key.
439
441
  if BOOLEAN_OR_NULL_CHARS.include?(char.downcase) && !current_context?(:object_key)
440
442
  # parse_literal is non-destructive if it fails to match.
@@ -509,6 +511,17 @@ module JsonMend
509
511
  unmatched_delimiter = false
510
512
  # --- Main Parsing Loop ---
511
513
  while !@scanner.eos? && char != rstring_delimiter
514
+ # Fast-path for unquoted keys (e.g. { key: val })
515
+ # consumes a chunk of valid identifier characters at once
516
+ if missing_quotes && current_context?(:object_key)
517
+ chunk = @scanner.scan(/[a-zA-Z0-9_$-]+/)
518
+ if chunk
519
+ string_parts << chunk
520
+ char = peek_char
521
+ next
522
+ end
523
+ end
524
+
512
525
  break if context_termination_reached?(
513
526
  char:,
514
527
  missing_quotes:
@@ -715,37 +728,52 @@ module JsonMend
715
728
  end
716
729
 
717
730
  def check_unmatched_in_array(rstring_delimiter:)
718
- # Heuristic: Check if this quote is a closer or internal.
719
- # 1. Find the NEXT delimiter (quote) index `j`.
720
- j = 1
731
+ saved_pos = @scanner.pos
732
+ @scanner.getch # Skip the current char (the potential closer)
733
+
721
734
  found_next = false
722
- while (c = peek_char(j))
723
- if c == rstring_delimiter
724
- # Check if escaped (count preceding backslashes)
725
- bk = 1
726
- slashes = 0
727
- while j - bk >= 0 && peek_char(j - bk) == '\\'
728
- slashes += 1
729
- bk += 1
730
- end
731
- if slashes.even?
732
- found_next = true
733
- break
734
- end
735
- end
735
+ j = 1
736
+
737
+ # Scan forward linearly
738
+ while (c = @scanner.getch)
736
739
  j += 1
740
+ next if c != rstring_delimiter
741
+
742
+ # Check if escaped (count preceding backslashes)
743
+ # We need to look behind from the current scanner position
744
+ bk = 1
745
+ slashes = 0
746
+ # Look back in the string buffer directly for speed
747
+ while (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) && char_code == 92 # 92 is backslash
748
+ slashes += 1
749
+ bk += 1
750
+ end
751
+
752
+ if slashes.even?
753
+ found_next = true
754
+ break
755
+ end
737
756
  end
738
757
 
739
- # 2. Check conditions to STOP (treat as closing quote):
740
- # a) Strictly whitespace between quotes: ["a" "b"]
741
- is_whitespace = (1...j).all? { |k| peek_char(k).match?(/\s/) }
758
+ # Reset position immediately after scanning
759
+ @scanner.pos = saved_pos
760
+
761
+ # Check conditions to STOP (treat as closing quote):
762
+ # a) Strictly whitespace between quotes
763
+ # We can check this by examining the substring we just scanned
764
+ substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
765
+ is_whitespace = substring_between&.match?(/\A\s*\z/)
742
766
 
743
- # b) Next quote is followed by a separator: ["val1" val2",]
767
+ # b) Next quote is followed by a separator
744
768
  is_next_closer = false
745
769
  if found_next
746
- k = j + 1
747
- k += 1 while peek_char(k)&.match?(/\s/) # skip whitespaces
748
- is_next_closer = TERMINATORS_VALUE.include?(peek_char(k))
770
+ # We need to peek ahead from where we found the next quote.
771
+ # Since we reset the scanner, we can use peek_char with the calculated offset `j`
772
+ # OR better, temporarily move scanner to `saved_pos + j`
773
+ @scanner.pos = saved_pos + j
774
+ @scanner.skip(/\s+/)
775
+ is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
776
+ @scanner.pos = saved_pos
749
777
  end
750
778
 
751
779
  return [true, true] unless is_whitespace || is_next_closer
@@ -1027,18 +1055,25 @@ module JsonMend
1027
1055
 
1028
1056
  # Check for a line comment `//...` or `#...`
1029
1057
  elsif @scanner.scan(%r{//|#})
1030
- # Determine valid line comment termination characters based on context.
1031
- termination_chars = ["\n", "\r"]
1032
- termination_chars << ']' if context_contain?(:array)
1033
- termination_chars << '}' if context_contain?(:object_value)
1034
- termination_chars << ':' if context_contain?(:object_key)
1035
-
1036
- # Create a regex that will scan until it hits one of the terminators.
1037
- # The terminators are positive lookaheads, so they aren't consumed by the scan.
1038
- terminator_regex = Regexp.new("(?=#{termination_chars.map { |c| Regexp.escape(c) }.join('|')})")
1039
-
1040
- # Scan until the end of the comment.
1041
- @scanner.scan_until(terminator_regex)
1058
+ in_array = context_contain?(:array)
1059
+ in_object = context_contain?(:object_value)
1060
+
1061
+ if context_contain?(:object_key)
1062
+ # If parsing a key, we must stop at ':' and structural closers
1063
+ @scanner.scan_until(/(?=[\n\r:}\]])/)
1064
+ elsif in_array && in_object
1065
+ # Nested ambiguity, stop at any closer
1066
+ @scanner.scan_until(/(?=[\n\r}\]])/)
1067
+ elsif in_array
1068
+ # Inside array, stop at ']'
1069
+ @scanner.scan_until(/(?=[\n\r\]])/)
1070
+ elsif in_object
1071
+ # Inside object value, stop at '}'
1072
+ @scanner.scan_until(/(?=[\n\r}])/)
1073
+ else
1074
+ # Top level or neutral, stop at newline
1075
+ @scanner.scan_until(/(?=[\n\r])/)
1076
+ end
1042
1077
  else
1043
1078
  # The character at the current position (likely '/') is not the start of a
1044
1079
  # valid comment. To prevent an infinite loop in the calling parser, we must
@@ -1053,7 +1088,13 @@ module JsonMend
1053
1088
  # It quickly iterates to find a character, handling escaped characters, and
1054
1089
  # returns the index (offset) from the scanner
1055
1090
  def skip_to_character(characters, start_idx: 0)
1056
- pattern = characters.is_a?(Array) ? Regexp.union(characters) : characters
1091
+ pattern = if characters.is_a?(Regexp)
1092
+ characters
1093
+ else
1094
+ # Escape if it's a string, join if it's an array
1095
+ chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
1096
+ Regexp.new(chars.join('|'))
1097
+ end
1057
1098
 
1058
1099
  saved_pos = @scanner.pos
1059
1100
  # Skip start_idx
@@ -1142,16 +1183,29 @@ module JsonMend
1142
1183
 
1143
1184
  # Peeks the next character without advancing the scanner
1144
1185
  def peek_char(offset = 0)
1145
- return @scanner.check(/./m) if offset.zero?
1186
+ # Handle the common 0-offset case
1187
+ if offset.zero?
1188
+ # peek(1) returns the next BYTE, not character
1189
+ byte_str = @scanner.peek(1)
1190
+ return nil if byte_str.empty?
1191
+
1192
+ # Fast path: If it's a standard ASCII char (0-127), return it directly.
1193
+ # This avoids the regex overhead for standard JSON characters ({, [, ", etc).
1194
+ return byte_str if byte_str.getbyte(0) < 128
1195
+
1196
+ # Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
1197
+ return @scanner.check(/./m)
1198
+ end
1146
1199
 
1200
+ # For offsets > 0, we must scan to skip correctly (as characters can be variable width)
1147
1201
  saved_pos = @scanner.pos
1148
- c = nil
1202
+ res = nil
1149
1203
  (offset + 1).times do
1150
- c = @scanner.getch
1151
- break if c.nil?
1204
+ res = @scanner.getch
1205
+ break if res.nil?
1152
1206
  end
1153
1207
  @scanner.pos = saved_pos
1154
- c
1208
+ res
1155
1209
  end
1156
1210
 
1157
1211
  def current_context?(value)
@@ -1164,7 +1218,7 @@ module JsonMend
1164
1218
 
1165
1219
  # Checks if the character signifies the start of a string or literal
1166
1220
  def string_start?(char)
1167
- STRING_DELIMITERS.include?(char) || char&.match?(/\p{L}/)
1221
+ STRING_DELIMITERS.include?(char) || char&.match?(/[\p{L}$_]/)
1168
1222
  end
1169
1223
 
1170
1224
  # Checks if the character signifies the start of a number
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonMend
4
- VERSION = '0.1.1'
4
+ VERSION = '0.1.3'
5
5
  end
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_mend
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.3
5
5
  platform: ruby
6
6
  authors:
7
- - Alexey Vasiliev
7
+ - Oleksii Vasyliev
8
8
  bindir: exe
9
9
  cert_chain: []
10
10
  date: 1980-01-02 00:00:00.000000000 Z
@@ -37,7 +37,10 @@ dependencies:
37
37
  - - ">="
38
38
  - !ruby/object:Gem::Version
39
39
  version: '0'
40
- description: Repair broken JSON
40
+ description: JsonMend is a robust Ruby gem designed to repair broken or malformed
41
+ JSON strings. It is specifically optimized to handle common errors found in JSON
42
+ generated by Large Language Models (LLMs), such as missing quotes, trailing commas,
43
+ unescaped characters, and stray comments
41
44
  email:
42
45
  - leopard.not.a@gmail.com
43
46
  executables: []