json_mend 0.3.3 → 0.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae2e6bb67d6227598226e35e852e4b4ae7f681e6ce8bcb94d11bd5e61c74a989
4
- data.tar.gz: 0fbd0b9cc00caa626a794e2024d34d0f31d9bf407f31e7ce8cbb0e1b78c9773d
3
+ metadata.gz: 7dcc378c148dc0514753b966693e0afe286bf145c283d069f01c0b9c74f59a75
4
+ data.tar.gz: 37fa52116d57bc80b168c20feba1fd7b33edab73d1f157759bc71f4a3362802b
5
5
  SHA512:
6
- metadata.gz: afd9daaec420a7bdb29ed2bf5d6438e5b335d2990eeae24503ab0fc10e6221a299dbcf63f4d0b09e093a7a03a523f5fa90db5ff46fc7cbec40670cac71f4955a
7
- data.tar.gz: 730768fa1d1954c626c88c17b8afc3ec7e4726abcd2b4602072453e35d910da0cd6e1b92c5841e571abcd22c6bc7e02df65de7b8dfab45e9cd1210de24ca6f94
6
+ metadata.gz: c94ff3c9d0c2e3602b6a125e74fdc72896709b311221eeceeecd3fbf96b0aef8dc82edfed136512443b5ccc403d512d7beb3476c59732f1a04280a71bbf7de5b
7
+ data.tar.gz: f3603aa1d247686329ec0effe5d1819f4feec47cb43adbff520ed7695831cc30649fa99b4e21a51d4e460754af3d825480335cea10ffbb79feea773fabfa1cd9
@@ -44,6 +44,11 @@ module JsonMend
44
44
  # Pre-compile regexes for performance
45
45
  NUMBER_REGEX = /[#{Regexp.escape(NUMBER_CHARS.to_a.join)}]+/
46
46
  NUMBER_NO_COMMA_REGEX = /[#{Regexp.escape(NUMBER_CHARS.dup.tap { |s| s.delete(',') }.to_a.join)}]+/
47
+ INVALID_NUMBER_TRAILERS_REGEX = /[#{Regexp.union(*INVALID_NUMBER_TRAILERS)}]+\z/
48
+ HEX_ESCAPE_REGEXES = {
49
+ 'u' => /[0-9a-fA-F]{4}/,
50
+ 'x' => /[0-9a-fA-F]{2}/
51
+ }.freeze
47
52
 
48
53
  def initialize(json_string)
49
54
  @scanner = StringScanner.new(json_string)
@@ -72,7 +77,7 @@ module JsonMend
72
77
  # Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
73
78
  next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
74
79
 
75
- if both_hash?(json.last, new_json)
80
+ if json.last.is_a?(Hash) && new_json.is_a?(Hash)
76
81
  json[-1] = deep_merge_hashes(json.last, new_json)
77
82
  else
78
83
  json << new_json
@@ -390,20 +395,14 @@ module JsonMend
390
395
  # Handle JSON_STOP_TOKEN from parse_json (EOS or consumed terminator)
391
396
  if value == JSON_STOP_TOKEN
392
397
  # Do nothing, just skipped garbage
393
- elsif strictly_empty?(value)
394
- # Only consume if we didn't just hit a terminator that parse_json successfully respected
395
- @scanner.getch unless value.nil? && TERMINATORS_ARRAY.include?(peek_char)
396
398
  elsif value == '...' && @scanner.string.getbyte(@scanner.pos - 1) == 46
397
399
  # just skip if the previous byte was a dot (46)
398
400
  else
399
401
  arr << value
400
402
  end
401
403
 
404
+ @scanner.skip(/[\s,]+/)
402
405
  char = peek_char
403
- while char && char != ']' && (char.match?(/\s/) || char == ',')
404
- @scanner.getch
405
- char = peek_char
406
- end
407
406
  end
408
407
 
409
408
  # Handle a potentially missing closing bracket, a common LLM error.
@@ -922,13 +921,7 @@ module JsonMend
922
921
  rstring_delimiter_missing = false
923
922
  elsif peek_char(j)
924
923
  # Check for an unmatched opening brace in string_parts
925
- string_parts.reverse_each do |c|
926
- next unless c == '{'
927
-
928
- # Ok then this is part of the string
929
- rstring_delimiter_missing = false
930
- break
931
- end
924
+ rstring_delimiter_missing = false if string_parts.include?('{')
932
925
  end
933
926
 
934
927
  end
@@ -962,21 +955,12 @@ module JsonMend
962
955
  entry_pos = @scanner.pos
963
956
  @scanner.getch # consume 'u' or 'x'
964
957
 
965
- num_chars = (char == 'u' ? 4 : 2)
966
- hex_parts = []
967
-
968
- # Use getch in loop to correctly extract chars (handling multibyte)
969
- num_chars.times do
970
- c = @scanner.getch
971
- break unless c
972
-
973
- hex_parts << c
974
- end
958
+ hex_regex = HEX_ESCAPE_REGEXES.fetch(char)
975
959
 
976
960
  # Validate valid hex digits
977
- if hex_parts.length == num_chars && hex_parts.all? { |c| c.match?(/[0-9a-fA-F]/) }
961
+ if (hex_str = @scanner.scan(hex_regex))
978
962
  string_parts.pop
979
- hex_val = hex_parts.join.to_i(16)
963
+ hex_val = hex_str.to_i(16)
980
964
 
981
965
  if char == 'u' && hex_val.between?(0xD800, 0xDBFF)
982
966
  # Handle high surrogate pair
@@ -1111,18 +1095,21 @@ module JsonMend
1111
1095
  # Save the original length so we can safely roll back if it's completely invalid
1112
1096
  original_length = scanned_str.bytesize
1113
1097
 
1114
- # Handle cases where the number ends with an invalid character.
1115
- if !scanned_str.empty? && INVALID_NUMBER_TRAILERS.include?(scanned_str[-1])
1116
- # Do not rewind scanner, simply discard the invalid trailing char (garbage)
1117
- scanned_str = scanned_str[0...-1]
1118
1098
  # Handle cases where what looked like a number is actually a string.
1119
- # e.g. "123-abc"
1120
- elsif peek_char&.match?(/\p{L}/)
1099
+ # e.g. "123-abc" or "-Infinity". We exclude strings ending in a comma
1100
+ # to preserve comma recovery logic (e.g. `105,next_key`).
1101
+ if peek_char&.match?(/\p{L}/) && !scanned_str.end_with?(',')
1121
1102
  # Roll back the entire scan and re-parse as a string.
1122
1103
  @scanner.pos -= original_length
1123
1104
  return parse_string
1124
1105
  end
1125
1106
 
1107
+ # Handle cases where the number ends with one or more invalid characters.
1108
+ if !scanned_str.empty? && scanned_str.match?(INVALID_NUMBER_TRAILERS_REGEX)
1109
+ # Do not rewind scanner, simply discard the invalid trailing chars (garbage)
1110
+ scanned_str.sub!(INVALID_NUMBER_TRAILERS_REGEX, '')
1111
+ end
1112
+
1126
1113
  # Reject non-numbers (e.g., stray periods "." or dashes "-" from LLM conversational text)
1127
1114
  unless scanned_str.match?(/\d/)
1128
1115
  @scanner.pos -= original_length
@@ -1306,20 +1293,6 @@ module JsonMend
1306
1293
  res
1307
1294
  end
1308
1295
 
1309
- def both_hash?(obj1, obj2)
1310
- obj1.is_a?(Hash) && obj2.is_a?(Hash)
1311
- end
1312
-
1313
- def strictly_empty?(value)
1314
- # Check if the value is a container AND if it's empty.
1315
- case value
1316
- when String, Array, Hash, Set
1317
- value.empty?
1318
- else
1319
- false
1320
- end
1321
- end
1322
-
1323
1296
  # Skips whitespaces
1324
1297
  def skip_whitespaces
1325
1298
  @scanner.skip(/\s+/)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonMend
4
- VERSION = '0.3.3'
4
+ VERSION = '0.3.5'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_mend
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.3
4
+ version: 0.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Oleksii Vasyliev