json_mend 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1c8f01b52f6eed3640e4be622b142646276e1253a33ec1404bc0c87415e01948
4
- data.tar.gz: bd40c8087d7a94795daff1a89b2cc3aab79df833bc35daf59d130e84ebd0af70
3
+ metadata.gz: 923008e3c63e24de16c3ee6b26097cf4064a32a16bac749c5501e313996238f1
4
+ data.tar.gz: c6c6040f9d54fe7604ae7126402886af159aae5e001dca93cbceebf254839a55
5
5
  SHA512:
6
- metadata.gz: 9f7ea495a91444ff58ae0f2dfa082c7b3ee6bc110bad0c2cef758f93c844edabfaff8622e15306f28b4fba8f440abc926f47728d9df6c0267c1039fab879b789
7
- data.tar.gz: 37493919d5d08baf3ae589070fb74105097c541363e6864e1b848d289642848f0e763ffd3d4a7ded437d2ccae793f933ce94e437fc3d90feb51d593c62425d46
6
+ metadata.gz: c869c17b06f5ed0e46e3f74ccf59c8c374af1431b43ee7d9d59be2170e5ba88e10f83753db36af1e4ebba2cf519273b5f5c32c42fa386978f6a96c22095b063d
7
+ data.tar.gz: 770eb1238b3f73261a2130b2a900dfff0bd2770cf0f6b3b38ecee30cafdbfb09e59b87e4df4dd8aba722c5dbbd14bb1b35df509fdc92efbe081d8342ed7e435b
data/.rubocop.yml CHANGED
@@ -7,13 +7,13 @@ AllCops:
7
7
  SuggestExtensions: false
8
8
 
9
9
  Metrics/AbcSize:
10
- Max: 75
10
+ Max: 78
11
11
 
12
12
  Metrics/ClassLength:
13
13
  Max: 950
14
14
 
15
15
  Metrics/CyclomaticComplexity:
16
- Max: 35
16
+ Max: 36
17
17
 
18
18
  Metrics/MethodLength:
19
19
  Max: 85
@@ -70,7 +70,7 @@ module JsonMend
70
70
  break
71
71
  else
72
72
  # Ignore strings that look like closing braces garbage (e.g. "}", " ] ")
73
- next if new_json.is_a?(String) && new_json.strip.match?(/^[}\]]+$/)
73
+ next if new_json.is_a?(String) && new_json.match?(/\A\s*[}\]]+\s*\z/)
74
74
 
75
75
  if both_hash?(json.last, new_json)
76
76
  json[-1] = deep_merge_hashes(json.last, new_json)
@@ -265,20 +265,14 @@ module JsonMend
265
265
 
266
266
  if value == :inferred_true
267
267
  if %w[true false null].include?(key.downcase)
268
- # Look back: If it's concatenated to the previous value (like falsetrue), keep it.
269
- # If it's separated by space/delimiters, it's trailing garbage, so drop it.
270
- if pos_before_key.positive?
271
- prev_byte = @scanner.string.getbyte(pos_before_key - 1)
272
- # Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
273
- is_concatenated = prev_byte && (
274
- prev_byte.between?(48, 57) || # 0-9
275
- prev_byte.between?(65, 90) || # A-Z
276
- prev_byte.between?(97, 122) || # a-z
277
- [36, 45, 95].include?(prev_byte) # $, -, _
278
- )
279
- else
280
- is_concatenated = false
281
- end
268
+ prev_byte = @scanner.string.getbyte(pos_before_key - 1)
269
+ # Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
270
+ is_concatenated = prev_byte && (
271
+ prev_byte.between?(48, 57) || # 0-9
272
+ prev_byte.between?(65, 90) || # A-Z
273
+ prev_byte.between?(97, 122) || # a-z
274
+ [36, 45, 95].include?(prev_byte) # $, -, _
275
+ )
282
276
 
283
277
  return [nil, nil, false] unless is_concatenated
284
278
  end
@@ -335,7 +329,7 @@ module JsonMend
335
329
  @context.pop
336
330
 
337
331
  # If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
338
- # treat it as nil (null) for object values to be safe.
332
+ # treat it as empty string for object values to be safe.
339
333
  value == JSON_STOP_TOKEN ? '' : value
340
334
  end
341
335
 
@@ -426,10 +420,10 @@ module JsonMend
426
420
  # many common errors found in LLM-generated JSON, such as missing quotes,
427
421
  # incorrect escape sequences, and ambiguous string terminators
428
422
  def parse_string
429
- char = prepare_string_parsing
423
+ char = peek_char
430
424
 
431
425
  # A valid string can only start with a valid quote or, in our case, with a literal
432
- while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char.match?(/[\p{L}0-9$_-]/)
426
+ while !@scanner.eos? && !STRING_DELIMITERS.include?(char) && !char&.match?(/[\p{L}0-9$_-]/)
433
427
  return '' if TERMINATORS_STRING_GUESSED.include?(char)
434
428
 
435
429
  @scanner.getch
@@ -485,18 +479,6 @@ module JsonMend
485
479
 
486
480
  # string helper methods
487
481
 
488
- def prepare_string_parsing
489
- char = peek_char
490
-
491
- # Consume comments that appear before the string starts
492
- while COMMENT_DELIMETERS.include?(char)
493
- parse_comment
494
- char = peek_char
495
- end
496
-
497
- char
498
- end
499
-
500
482
  def determine_delimiters(char:)
501
483
  missing_quotes = false
502
484
  lstring_delimiter = rstring_delimiter = '"'
@@ -581,7 +563,6 @@ module JsonMend
581
563
  )
582
564
  char = peek_char
583
565
  unmatched_delimiter = false
584
- safe_string_until = -1 # Fast-forward pointer to safely bypass O(N^2) lookaheads
585
566
  # --- Main Parsing Loop ---
586
567
  while !@scanner.eos? && char != rstring_delimiter
587
568
  # Fast-path for unquoted keys (e.g. { key: val })
@@ -600,33 +581,30 @@ module JsonMend
600
581
  missing_quotes:
601
582
  )
602
583
 
603
- # Bypass expensive comma/bracket checks because we already validated this segment extends to the next quote
604
- if @scanner.pos > safe_string_until
605
- if current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char) &&
606
- (string_parts.empty? || string_parts.last != rstring_delimiter)
607
-
608
- is_break = check_rstring_delimiter_missing(
609
- string_parts:,
610
- lstring_delimiter:,
611
- rstring_delimiter:,
612
- missing_quotes:
613
- )
614
- break if is_break
615
- end
584
+ if current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char) &&
585
+ (string_parts.empty? || string_parts.last != rstring_delimiter)
616
586
 
617
- if char == ']' && context_contain?(:array) && string_parts.last != rstring_delimiter
618
- i = skip_to_character(rstring_delimiter)
619
- # No delimiter found
620
- break unless peek_char(i)
621
- end
587
+ is_break = check_rstring_delimiter_missing(
588
+ string_parts:,
589
+ lstring_delimiter:,
590
+ rstring_delimiter:,
591
+ missing_quotes:
592
+ )
593
+ break if is_break
594
+ end
622
595
 
623
- if current_context?(:object_value) && char == '}'
624
- # We found the end of an object while parsing a value
625
- # Check if the object is really over, to avoid doubling the closing brace
626
- i = skip_whitespaces_at(start_idx: 1)
627
- next_c = peek_char(i)
628
- break unless next_c
629
- end
596
+ if char == ']' && context_contain?(:array) && string_parts.last != rstring_delimiter
597
+ i = skip_to_character(rstring_delimiter)
598
+ # No delimiter found
599
+ break unless peek_char(i)
600
+ end
601
+
602
+ if current_context?(:object_value) && char == '}'
603
+ # We found the end of an object while parsing a value
604
+ # Check if the object is really over, to avoid doubling the closing brace
605
+ i = skip_whitespaces_at(start_idx: 1)
606
+ next_c = peek_char(i)
607
+ break unless next_c
630
608
  end
631
609
 
632
610
  string_parts << char
@@ -644,7 +622,7 @@ module JsonMend
644
622
  end
645
623
 
646
624
  # If we are in object key context and we find a colon, it could be a missing right quote
647
- if @scanner.pos > safe_string_until && char == ':' && !missing_quotes && current_context?(:object_key)
625
+ if char == ':' && !missing_quotes && current_context?(:object_key)
648
626
  is_break = handle_missing_quotes_termination(
649
627
  lstring_delimiter:,
650
628
  rstring_delimiter:
@@ -670,8 +648,6 @@ module JsonMend
670
648
  string_parts << char.to_s
671
649
  @scanner.getch
672
650
  char = peek_char
673
-
674
- safe_string_until = @scanner.pos + skip_to_character(rstring_delimiter)
675
651
  end
676
652
  end
677
653
  end
@@ -1257,10 +1233,12 @@ module JsonMend
1257
1233
  # returns the index (offset) from the scanner
1258
1234
  def skip_to_character(characters, start_idx: 0)
1259
1235
  pattern = SKIP_CHARS_REGEX_CACHE.fetch(characters, nil)
1236
+ # :nocov:
1260
1237
  if pattern.nil?
1261
1238
  chars = Array(characters).map { |c| Regexp.escape(c.to_s) }
1262
1239
  pattern = Regexp.new(chars.join('|'))
1263
1240
  end
1241
+ # :nocov:
1264
1242
 
1265
1243
  saved_pos = @scanner.pos
1266
1244
  # Skip start_idx
@@ -1321,7 +1299,7 @@ module JsonMend
1321
1299
  (matched.length - 1) + start_idx
1322
1300
  else
1323
1301
  # No non-space found.
1324
- @scanner.rest.length + start_idx
1302
+ (@scanner.string.length - @scanner.charpos) + start_idx
1325
1303
  end
1326
1304
 
1327
1305
  @scanner.pos = saved_pos
@@ -1352,12 +1330,12 @@ module JsonMend
1352
1330
  # Handle the common 0-offset case
1353
1331
  if offset.zero?
1354
1332
  # peek(1) returns the next BYTE, not character
1355
- byte_str = @scanner.peek(1)
1356
- return nil if byte_str.empty?
1333
+ byte = @scanner.string.getbyte(@scanner.pos)
1334
+ return nil unless byte
1357
1335
 
1358
1336
  # Fast path: If it's a standard ASCII char (0-127), return it directly.
1359
- # This avoids the regex overhead for standard JSON characters ({, [, ", etc).
1360
- return byte_str if byte_str.getbyte(0) < 128
1337
+ # Enforcing UTF-8 ensures we don't mix US-ASCII and UTF-8 strings later.
1338
+ return byte.chr(Encoding::UTF_8) if byte < 128
1361
1339
 
1362
1340
  # Slow path: If it's a multibyte char (e.g. “), use regex to match the full character.
1363
1341
  return @scanner.check(/./m)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonMend
4
- VERSION = '0.3.0'
4
+ VERSION = '0.3.2'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_mend
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Oleksii Vasyliev