json_mend 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 132fa0fa5489f3d26c01bbb256ed9abbc2af84bf5e39da35d3ef2aa62d610154
4
- data.tar.gz: 3dbf1c1d09ebfdd61a07e51465e3239d7f069de57853c26a540349638fdce4b6
3
+ metadata.gz: cef80671ecf0296d0b0742bb94342c93a9cdf52e5def2ee4592a11f473ed6d6b
4
+ data.tar.gz: 831e6b821e39dff4adcf605e4f0beb53500ca846ac02a96e8c49e9822c7b1475
5
5
  SHA512:
6
- metadata.gz: ce3494e2d0a6c35de7828051050a35a022ecba2659d93d814b455cab38f6bc7c3128ba2751789663e66bd19ac7a8598f63681489705578aa101d9ef543d3fa17
7
- data.tar.gz: d469f040577069840fb06cfad141793c4a17d6e4113705205e3045ebe3d75669e0b5d190a7221fcc3a8909a5a4217cbb3ad37f9fd0a53d8a4fe855e744fcf528
6
+ metadata.gz: a43f2089605d476414103b4333fe7d248856f27d0a8589041d7826905c611619a18fe45841dcf8e97bdfae90cfb6718d39323d0839c8fe97dbb584a54db4712f
7
+ data.tar.gz: 65c72349a8a6b721ef0f1a077bb69f82fe36fb499aa30da57f36fbc0dddfd4a7129082ffc32f940569af876fa8f43d4f626507652f5480b06fc7717b648cf418
data/.rubocop.yml CHANGED
@@ -7,22 +7,22 @@ AllCops:
7
7
  SuggestExtensions: false
8
8
 
9
9
  Metrics/AbcSize:
10
- Max: 70
10
+ Max: 78
11
11
 
12
12
  Metrics/ClassLength:
13
- Max: 900
13
+ Max: 950
14
14
 
15
15
  Metrics/CyclomaticComplexity:
16
- Max: 35
16
+ Max: 36
17
17
 
18
18
  Metrics/MethodLength:
19
- Max: 80
19
+ Max: 85
20
20
 
21
21
  Metrics/BlockLength:
22
22
  Max: 40
23
23
 
24
24
  Metrics/PerceivedComplexity:
25
- Max: 35
25
+ Max: 37
26
26
 
27
27
  Metrics/BlockNesting:
28
28
  Max: 8
@@ -73,7 +73,7 @@ module JsonMend
73
73
  next if new_json.is_a?(String) && new_json.strip.match?(/^[}\]]+$/)
74
74
 
75
75
  if both_hash?(json.last, new_json)
76
- deep_merge_hashes!(json.last, new_json)
76
+ json[-1] = deep_merge_hashes(json.last, new_json)
77
77
  else
78
78
  json << new_json
79
79
  end
@@ -97,22 +97,30 @@ module JsonMend
97
97
  @depth -= 1
98
98
  end
99
99
 
100
- def deep_merge_hashes!(target, source)
100
+ def deep_merge_hashes(target, source, current_depth = 0)
101
+ raise JSON::NestingError, "merge nesting of #{current_depth} is too deep" if current_depth > MAX_ALLOWED_DEPTH
102
+
103
+ result = target.dup
101
104
  source.each do |key, new_val|
102
- if target.key?(key)
103
- old_val = target[key]
104
- if old_val.is_a?(Hash) && new_val.is_a?(Hash)
105
- deep_merge_hashes!(old_val, new_val)
106
- elsif old_val.is_a?(Array) && new_val.is_a?(Array)
107
- target[key] = old_val + new_val
108
- else
109
- target[key] = new_val
110
- end
105
+ if result.key?(key)
106
+ old_val = result[key]
107
+ result[key] = if old_val.is_a?(Hash) && new_val.is_a?(Hash)
108
+ deep_merge_hashes(old_val, new_val, current_depth + 1)
109
+ elsif old_val.is_a?(Array) && new_val.is_a?(Array)
110
+ old_val + new_val
111
+ elsif old_val.is_a?(Array)
112
+ old_val + [new_val]
113
+ elsif new_val.is_a?(Array)
114
+ [old_val] + new_val
115
+ else
116
+ # If primitives collide, preserve both in an array unless identical
117
+ old_val == new_val ? old_val : [old_val, new_val]
118
+ end
111
119
  else
112
- target[key] = new_val
120
+ result[key] = new_val
113
121
  end
114
122
  end
115
- target
123
+ result
116
124
  end
117
125
 
118
126
  def parse_json
@@ -148,7 +156,7 @@ module JsonMend
148
156
  else
149
157
  # Stop if we hit a terminator for the current context to avoid consuming it as garbage
150
158
  if (current_context?(:array) && char == ']') ||
151
- (current_context?(:object_value) && char == '}') ||
159
+ (current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char)) ||
152
160
  (current_context?(:object_key) && char == '}')
153
161
  return JSON_STOP_TOKEN
154
162
  end
@@ -165,6 +173,8 @@ module JsonMend
165
173
  with_depth_check do
166
174
  object = {}
167
175
 
176
+ @context.push(:object)
177
+
168
178
  loop do
169
179
  skip_whitespaces
170
180
 
@@ -207,6 +217,8 @@ module JsonMend
207
217
  object[key] = value
208
218
  end
209
219
 
220
+ @context.pop
221
+
210
222
  object
211
223
  end
212
224
  end
@@ -252,7 +264,24 @@ module JsonMend
252
264
  value = parse_object_value(colon_found: colon_found || is_bracketed)
253
265
 
254
266
  if value == :inferred_true
255
- return [nil, nil, false] if %w[true false null].include?(key.downcase)
267
+ if %w[true false null].include?(key.downcase)
268
+ # Look back: If it's concatenated to the previous value (like falsetrue), keep it.
269
+ # If it's separated by space/delimiters, it's trailing garbage, so drop it.
270
+ if pos_before_key.positive?
271
+ prev_byte = @scanner.string.getbyte(pos_before_key - 1)
272
+ # Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
273
+ is_concatenated = prev_byte && (
274
+ prev_byte.between?(48, 57) || # 0-9
275
+ prev_byte.between?(65, 90) || # A-Z
276
+ prev_byte.between?(97, 122) || # a-z
277
+ [36, 45, 95].include?(prev_byte) # $, -, _
278
+ )
279
+ else
280
+ is_concatenated = false
281
+ end
282
+
283
+ return [nil, nil, false] unless is_concatenated
284
+ end
256
285
 
257
286
  value = true
258
287
  end
@@ -307,7 +336,7 @@ module JsonMend
307
336
 
308
337
  # If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
309
338
  # treat it as nil (null) for object values to be safe.
310
- value == JSON_STOP_TOKEN ? nil : value
339
+ value == JSON_STOP_TOKEN ? '' : value
311
340
  end
312
341
 
313
342
  # Encapsulates the logic for merging an array that appears without a key.
@@ -660,12 +689,17 @@ module JsonMend
660
689
  return false unless missing_quotes && current_context?(:object_value)
661
690
 
662
691
  i = 1
663
- next_c = peek_char(i)
664
- while next_c && ![rstring_delimiter, lstring_delimiter].include?(next_c)
692
+ saved_pos = @scanner.pos
693
+ @scanner.getch # Skip char at offset 0
694
+
695
+ while (next_c = @scanner.getch)
696
+ break if [rstring_delimiter, lstring_delimiter].include?(next_c)
697
+
665
698
  i += 1
666
- next_c = peek_char(i)
667
699
  end
668
700
 
701
+ @scanner.pos = saved_pos
702
+
669
703
  return false unless next_c
670
704
 
671
705
  # We found a quote, now let's make sure there's a ":" following
@@ -674,27 +708,28 @@ module JsonMend
674
708
  i = skip_whitespaces_at(start_idx: i)
675
709
  next_c = peek_char(i)
676
710
 
677
- if next_c && next_c == ':'
678
- @scanner.pos -= 1
679
- return true
680
- end
711
+ return true if next_c && next_c == ':'
681
712
 
682
713
  false
683
714
  end
684
715
 
685
716
  def determine_complex_delimiter_action(lstring_delimiter, rstring_delimiter)
717
+ saved_pos = @scanner.pos
718
+ @scanner.getch # Skip char at offset 0
719
+
686
720
  i = 1
687
- next_c = peek_char(i)
688
721
  check_comma_in_object_value = true
689
722
 
690
723
  # Check if eventually there is a rstring delimiter, otherwise we bail
691
- while next_c && ![rstring_delimiter, lstring_delimiter].include?(next_c)
724
+ while (next_c = @scanner.getch)
725
+ break if [rstring_delimiter, lstring_delimiter].include?(next_c)
726
+
692
727
  # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
693
728
  # This is because the routine after will make sure to correct any bad guess and this solves a corner case
694
729
  check_comma_in_object_value = false if check_comma_in_object_value && next_c.match?(/\p{L}/)
730
+
695
731
  # If we are in an object context, let's check for the right delimiters
696
- if (context_contain?(:object_key) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
697
- (context_contain?(:object_value) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
732
+ if (context_contain?(:object) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
698
733
  (context_contain?(:array) && TERMINATORS_ARRAY_ITEM.include?(next_c)) ||
699
734
  (
700
735
  check_comma_in_object_value &&
@@ -705,9 +740,11 @@ module JsonMend
705
740
  end
706
741
 
707
742
  i += 1
708
- next_c = peek_char(i)
709
743
  end
710
744
 
745
+ @scanner.pos = saved_pos
746
+ next_c = peek_char(i)
747
+
711
748
  # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
712
749
  if next_c == ',' && current_context?(:object_value)
713
750
  i += 1
@@ -719,8 +756,8 @@ module JsonMend
719
756
  next_c = peek_char(i)
720
757
  return [true, false] if TERMINATORS_OBJECT_VALUE.include?(next_c)
721
758
  elsif next_c == rstring_delimiter && peek_char(i - 1) != '\\'
722
- # Check if self.index:self.index+i is only whitespaces, break if that's the case
723
- return [false, false] if (1..i).all? { |j| peek_char(j).to_s.match(/\s/) }
759
+ # Check if self.index:self.index+i is only whitespaces
760
+ return [false, false] if skip_whitespaces_at(start_idx: 1) >= i
724
761
 
725
762
  if current_context?(:object_value)
726
763
  return check_unmatched_in_object_value(index: i, lstring_delimiter:, rstring_delimiter:)
@@ -747,23 +784,30 @@ module JsonMend
747
784
  next_c = peek_char(index)
748
785
  return [true, false] if next_c == ':'
749
786
  end
787
+
750
788
  # We found a delimiter and we need to check if this is a key
751
789
  # so find a rstring_delimiter and a colon after
752
790
  index = skip_to_character(rstring_delimiter, start_idx: index + 1)
753
791
  index += 1
754
- next_c = peek_char(index)
755
- while next_c && next_c != ':'
756
- if TERMINATORS_VALUE.include?(next_c) || (
757
- next_c == rstring_delimiter &&
758
- peek_char(index - 1) != '\\'
759
- )
760
- break
761
- end
792
+
793
+ saved_pos = @scanner.pos
794
+ index.times { @scanner.getch } # Advance to starting index safely
795
+
796
+ while (next_c = @scanner.getch)
797
+ break if next_c == ':'
798
+
799
+ # Safely determine if the previous character was a backslash, guarding against multibyte characters
800
+ prev_byte_idx = @scanner.pos - next_c.bytesize - 1
801
+ is_escaped = prev_byte_idx >= 0 && @scanner.string.getbyte(prev_byte_idx) == 92 # 92 is backslash
802
+
803
+ break if TERMINATORS_VALUE.include?(next_c) || (next_c == rstring_delimiter && !is_escaped)
762
804
 
763
805
  index += 1
764
- next_c = peek_char(index)
765
806
  end
766
807
 
808
+ @scanner.pos = saved_pos
809
+ next_c = peek_char(index)
810
+
767
811
  # Only if we fail to find a ':' then we know this is misplaced quote
768
812
  return [true, true] if next_c != ':'
769
813
 
@@ -772,21 +816,19 @@ module JsonMend
772
816
 
773
817
  def check_unmatched_in_array(rstring_delimiter:)
774
818
  saved_pos = @scanner.pos
819
+
775
820
  @scanner.getch # Skip the current char (the potential closer)
821
+ pos_after_first_quote = @scanner.pos # Safely records offset even if quote was a multibyte smart quote
776
822
 
777
823
  found_next = false
778
- j = 1
779
824
 
780
825
  # Scan forward linearly
781
826
  while (c = @scanner.getch)
782
- j += 1
783
827
  next if c != rstring_delimiter
784
828
 
785
829
  # Check if escaped (count preceding backslashes)
786
- # We need to look behind from the current scanner position
787
830
  bk = 1
788
831
  slashes = 0
789
- # Look back in the string buffer directly for speed
790
832
  while (@scanner.pos - 1 - bk >= 0) &&
791
833
  (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) &&
792
834
  char_code == 92 # 92 is backslash
@@ -800,22 +842,26 @@ module JsonMend
800
842
  end
801
843
  end
802
844
 
845
+ # Record exact byte position after we found the next valid quote
846
+ pos_after_second_quote = @scanner.pos
847
+ pos_before_second_quote = found_next ? pos_after_second_quote - rstring_delimiter.bytesize : @scanner.pos
848
+
803
849
  # Reset position immediately after scanning
804
850
  @scanner.pos = saved_pos
805
851
 
806
852
  # Check conditions to STOP (treat as closing quote):
807
853
  # a) Strictly whitespace between quotes
808
- # We can check this by examining the substring we just scanned
809
- substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
854
+ byte_length = pos_before_second_quote - pos_after_first_quote
855
+ byte_length = 0 if byte_length.negative?
856
+
857
+ substring_between = @scanner.string.byteslice(pos_after_first_quote, byte_length)
810
858
  is_whitespace = substring_between&.match?(/\A\s*\z/)
811
859
 
812
860
  # b) Next quote is followed by a separator
813
861
  is_next_closer = false
814
862
  if found_next
815
- # We need to peek ahead from where we found the next quote.
816
- # Since we reset the scanner, we can use peek_char with the calculated offset `j`
817
- # OR better, temporarily move scanner to `saved_pos + j`
818
- @scanner.pos = saved_pos + j
863
+ # Jump directly to the exact byte offset after the second quote!
864
+ @scanner.pos = pos_after_second_quote
819
865
  @scanner.skip(/\s+/)
820
866
  is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
821
867
  @scanner.pos = saved_pos
@@ -844,7 +890,8 @@ module JsonMend
844
890
  next_c = peek_char(i)
845
891
 
846
892
  is_gap_clean = true
847
- is_gap_clean = (1...i).all? { |k| peek_char(k)&.match?(/\s/) } if missing_quotes && next_c
893
+ is_gap_clean = skip_whitespaces_at(start_idx: 1) >= i if missing_quotes && next_c
894
+
848
895
  if next_c && is_gap_clean
849
896
  i += 1
850
897
  # found a delimiter, now we need to check that is followed strictly by a comma or brace
@@ -1047,8 +1094,22 @@ module JsonMend
1047
1094
  missing_quotes:
1048
1095
  )
1049
1096
  return false unless missing_quotes
1050
- return true if current_context?(:object_key) && (char == ':' || char.match?(/\s/))
1051
- return true if current_context?(:object_key) && TERMINATORS_ARRAY.include?(char)
1097
+
1098
+ if current_context?(:object_key)
1099
+ return true if char == ':' || char.match?(/\s/) || TERMINATORS_ARRAY.include?(char)
1100
+
1101
+ if char == ','
1102
+ # Break on comma UNLESS it looks like part of a number format (e.g., 105,12)
1103
+ # We check if the comma is flanked by digits on both sides
1104
+ prev_byte = @scanner.pos.positive? ? @scanner.string.getbyte(@scanner.pos - 1) : nil
1105
+ next_char = peek_char(1)
1106
+ # Check if the previous byte is ASCII '0' to '9' (bytes 48 to 57)
1107
+ is_number_comma = prev_byte&.between?(48, 57) && next_char&.match?(/\d/)
1108
+
1109
+ return true unless is_number_comma
1110
+ end
1111
+ end
1112
+
1052
1113
  return true if current_context?(:array) && TERMINATORS_ARRAY_ITEM.include?(char)
1053
1114
 
1054
1115
  false
@@ -1090,34 +1151,33 @@ module JsonMend
1090
1151
  @scanner.getch if peek_char == '"'
1091
1152
 
1092
1153
  # Attempt to convert the string to the appropriate number type.
1093
- # Use rescue to handle conversion errors gracefully, returning the original string.
1094
- begin
1095
- # Fix for Ruby < 3.4: "1." is not a valid float.
1096
- # If it ends with '.', we strip the dot and force Float conversion
1097
- # to ensure "1." becomes 1.0 (Float) instead of 1 (Integer).
1098
- if scanned_str.end_with?('.')
1099
- Float(scanned_str[0...-1])
1100
- elsif scanned_str.include?(',')
1101
- # Check if commas are being used as thousands separators (e.g., 1,234 or 1,234,567.89)
1102
- if scanned_str.count(',') > 1 || scanned_str.match?(/,\d{3}(?:\.\d+)?$/)
1103
- cleaned = scanned_str.delete(',')
1104
- if cleaned.match?(/[.eE]/)
1105
- Float(cleaned)
1106
- else
1107
- Integer(cleaned, 10)
1108
- end
1109
- else
1110
- # Treat single comma as a decimal point (European style, e.g., 1,5 -> 1.5)
1111
- Float(scanned_str.tr(',', '.'))
1112
- end
1113
- elsif scanned_str.match?(/[.eE]/)
1114
- Float(scanned_str)
1115
- else
1116
- Integer(scanned_str, 10)
1117
- end
1118
- rescue ArgumentError
1119
- scanned_str
1120
- end
1154
+ # Fix for Ruby < 3.4: "1." is not a valid float.
1155
+ # If it ends with '.', we strip the dot and force Float conversion
1156
+ # to ensure "1." becomes 1.0 (Float) instead of 1 (Integer).
1157
+ result = if scanned_str.end_with?('.')
1158
+ Float(scanned_str[0...-1], exception: false)
1159
+ elsif scanned_str.include?(',')
1160
+ # Check if commas are being used as thousands separators (e.g., 1,234 or 1,234,567.89)
1161
+ if scanned_str.count(',') > 1 || scanned_str.match?(/,\d{3}(?:\.\d+)?$/)
1162
+ cleaned = scanned_str.delete(',')
1163
+ if cleaned.match?(/[.eE]/)
1164
+ Float(cleaned, exception: false)
1165
+ else
1166
+ Integer(cleaned, 10, exception: false)
1167
+ end
1168
+ else
1169
+ # Treat single comma as a decimal point (European style, e.g., 1,5 -> 1.5)
1170
+ Float(scanned_str.tr(',', '.'), exception: false)
1171
+ end
1172
+ elsif scanned_str.match?(/[.eE]/)
1173
+ Float(scanned_str, exception: false)
1174
+ else
1175
+ Integer(scanned_str, 10, exception: false)
1176
+ end
1177
+
1178
+ return scanned_str if result.is_a?(Float) && (result.infinite? || result.nan?)
1179
+
1180
+ result || scanned_str
1121
1181
  end
1122
1182
 
1123
1183
  # Parses the JSON literals `true`, `false`, or `null`.
@@ -1151,23 +1211,26 @@ module JsonMend
1151
1211
  # Check for a line comment `//...` or `#...`
1152
1212
  elsif @scanner.scan(%r{//|#})
1153
1213
  in_array = context_contain?(:array)
1154
- in_object = context_contain?(:object_value)
1155
-
1156
- if context_contain?(:object_key)
1157
- # If parsing a key, we must stop at ':' and structural closers
1158
- @scanner.scan_until(/(?=[\n\r:}\]]|\\n|\\r)/) || @scanner.terminate
1159
- elsif in_array && in_object
1160
- # Nested ambiguity, stop at any closer
1161
- @scanner.scan_until(/(?=[\n\r}\]]|\\n|\\r)/) || @scanner.terminate
1162
- elsif in_array
1163
- # Inside array, stop at ']'
1164
- @scanner.scan_until(/(?=[\n\r\]]|\\n|\\r)/) || @scanner.terminate
1165
- elsif in_object
1166
- # Inside object value, stop at '}'
1167
- @scanner.scan_until(/(?=[\n\r}]|\\n|\\r)/) || @scanner.terminate
1214
+ in_object = context_contain?(:object)
1215
+
1216
+ pattern = if context_contain?(:object_key)
1217
+ /[\n\r:}\]]|\\n|\\r/
1218
+ elsif in_array && in_object
1219
+ /[\n\r}\]]|\\n|\\r/
1220
+ elsif in_array
1221
+ /[\n\r\]]|\\n|\\r/
1222
+ elsif in_object
1223
+ /[\n\r}]|\\n|\\r/
1224
+ else
1225
+ /[\n\r]|\\n|\\r/
1226
+ end
1227
+
1228
+ if (text = @scanner.scan_until(pattern))
1229
+ # Un-consume the terminator so it can be handled structurally
1230
+ terminator_size = text.end_with?('\\n', '\\r') ? 2 : 1
1231
+ @scanner.pos -= terminator_size
1168
1232
  else
1169
- # Top level or neutral, stop at newline
1170
- @scanner.scan_until(/(?=[\n\r]|\\n|\\r)/) || @scanner.terminate
1233
+ @scanner.terminate
1171
1234
  end
1172
1235
 
1173
1236
  # Consume literal escaped newlines so they don't break subsequent parsing.
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module JsonMend
4
- VERSION = '0.2.2'
4
+ VERSION = '0.3.1'
5
5
  end
data/lib/json_mend.rb CHANGED
@@ -24,7 +24,7 @@ module JsonMend
24
24
 
25
25
  # Verify the native parser didn't produce invalid UTF-8 (like unpaired surrogates)
26
26
  # by ensuring it can safely dump its own output.
27
- JSON.dump(parsed)
27
+ JSON.generate(parsed)
28
28
 
29
29
  parsed
30
30
  rescue JSON::ParserError, JSON::GeneratorError
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: json_mend
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Oleksii Vasyliev