json_mend 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +4 -4
- data/lib/json_mend/parser.rb +198 -120
- data/lib/json_mend/version.rb +1 -1
- data/lib/json_mend.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1c8f01b52f6eed3640e4be622b142646276e1253a33ec1404bc0c87415e01948
|
|
4
|
+
data.tar.gz: bd40c8087d7a94795daff1a89b2cc3aab79df833bc35daf59d130e84ebd0af70
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9f7ea495a91444ff58ae0f2dfa082c7b3ee6bc110bad0c2cef758f93c844edabfaff8622e15306f28b4fba8f440abc926f47728d9df6c0267c1039fab879b789
|
|
7
|
+
data.tar.gz: 37493919d5d08baf3ae589070fb74105097c541363e6864e1b848d289642848f0e763ffd3d4a7ded437d2ccae793f933ce94e437fc3d90feb51d593c62425d46
|
data/.rubocop.yml
CHANGED
|
@@ -7,22 +7,22 @@ AllCops:
|
|
|
7
7
|
SuggestExtensions: false
|
|
8
8
|
|
|
9
9
|
Metrics/AbcSize:
|
|
10
|
-
Max:
|
|
10
|
+
Max: 75
|
|
11
11
|
|
|
12
12
|
Metrics/ClassLength:
|
|
13
|
-
Max:
|
|
13
|
+
Max: 950
|
|
14
14
|
|
|
15
15
|
Metrics/CyclomaticComplexity:
|
|
16
16
|
Max: 35
|
|
17
17
|
|
|
18
18
|
Metrics/MethodLength:
|
|
19
|
-
Max:
|
|
19
|
+
Max: 85
|
|
20
20
|
|
|
21
21
|
Metrics/BlockLength:
|
|
22
22
|
Max: 40
|
|
23
23
|
|
|
24
24
|
Metrics/PerceivedComplexity:
|
|
25
|
-
Max:
|
|
25
|
+
Max: 37
|
|
26
26
|
|
|
27
27
|
Metrics/BlockNesting:
|
|
28
28
|
Max: 8
|
data/lib/json_mend/parser.rb
CHANGED
|
@@ -73,7 +73,7 @@ module JsonMend
|
|
|
73
73
|
next if new_json.is_a?(String) && new_json.strip.match?(/^[}\]]+$/)
|
|
74
74
|
|
|
75
75
|
if both_hash?(json.last, new_json)
|
|
76
|
-
deep_merge_hashes
|
|
76
|
+
json[-1] = deep_merge_hashes(json.last, new_json)
|
|
77
77
|
else
|
|
78
78
|
json << new_json
|
|
79
79
|
end
|
|
@@ -97,22 +97,30 @@ module JsonMend
|
|
|
97
97
|
@depth -= 1
|
|
98
98
|
end
|
|
99
99
|
|
|
100
|
-
def deep_merge_hashes
|
|
100
|
+
def deep_merge_hashes(target, source, current_depth = 0)
|
|
101
|
+
raise JSON::NestingError, "merge nesting of #{current_depth} is too deep" if current_depth > MAX_ALLOWED_DEPTH
|
|
102
|
+
|
|
103
|
+
result = target.dup
|
|
101
104
|
source.each do |key, new_val|
|
|
102
|
-
if
|
|
103
|
-
old_val =
|
|
104
|
-
if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
105
|
+
if result.key?(key)
|
|
106
|
+
old_val = result[key]
|
|
107
|
+
result[key] = if old_val.is_a?(Hash) && new_val.is_a?(Hash)
|
|
108
|
+
deep_merge_hashes(old_val, new_val, current_depth + 1)
|
|
109
|
+
elsif old_val.is_a?(Array) && new_val.is_a?(Array)
|
|
110
|
+
old_val + new_val
|
|
111
|
+
elsif old_val.is_a?(Array)
|
|
112
|
+
old_val + [new_val]
|
|
113
|
+
elsif new_val.is_a?(Array)
|
|
114
|
+
[old_val] + new_val
|
|
115
|
+
else
|
|
116
|
+
# If primitives collide, preserve both in an array unless identical
|
|
117
|
+
old_val == new_val ? old_val : [old_val, new_val]
|
|
118
|
+
end
|
|
111
119
|
else
|
|
112
|
-
|
|
120
|
+
result[key] = new_val
|
|
113
121
|
end
|
|
114
122
|
end
|
|
115
|
-
|
|
123
|
+
result
|
|
116
124
|
end
|
|
117
125
|
|
|
118
126
|
def parse_json
|
|
@@ -148,7 +156,7 @@ module JsonMend
|
|
|
148
156
|
else
|
|
149
157
|
# Stop if we hit a terminator for the current context to avoid consuming it as garbage
|
|
150
158
|
if (current_context?(:array) && char == ']') ||
|
|
151
|
-
(current_context?(:object_value) && char
|
|
159
|
+
(current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char)) ||
|
|
152
160
|
(current_context?(:object_key) && char == '}')
|
|
153
161
|
return JSON_STOP_TOKEN
|
|
154
162
|
end
|
|
@@ -165,6 +173,8 @@ module JsonMend
|
|
|
165
173
|
with_depth_check do
|
|
166
174
|
object = {}
|
|
167
175
|
|
|
176
|
+
@context.push(:object)
|
|
177
|
+
|
|
168
178
|
loop do
|
|
169
179
|
skip_whitespaces
|
|
170
180
|
|
|
@@ -207,6 +217,8 @@ module JsonMend
|
|
|
207
217
|
object[key] = value
|
|
208
218
|
end
|
|
209
219
|
|
|
220
|
+
@context.pop
|
|
221
|
+
|
|
210
222
|
object
|
|
211
223
|
end
|
|
212
224
|
end
|
|
@@ -252,7 +264,24 @@ module JsonMend
|
|
|
252
264
|
value = parse_object_value(colon_found: colon_found || is_bracketed)
|
|
253
265
|
|
|
254
266
|
if value == :inferred_true
|
|
255
|
-
|
|
267
|
+
if %w[true false null].include?(key.downcase)
|
|
268
|
+
# Look back: If it's concatenated to the previous value (like falsetrue), keep it.
|
|
269
|
+
# If it's separated by space/delimiters, it's trailing garbage, so drop it.
|
|
270
|
+
if pos_before_key.positive?
|
|
271
|
+
prev_byte = @scanner.string.getbyte(pos_before_key - 1)
|
|
272
|
+
# Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
|
|
273
|
+
is_concatenated = prev_byte && (
|
|
274
|
+
prev_byte.between?(48, 57) || # 0-9
|
|
275
|
+
prev_byte.between?(65, 90) || # A-Z
|
|
276
|
+
prev_byte.between?(97, 122) || # a-z
|
|
277
|
+
[36, 45, 95].include?(prev_byte) # $, -, _
|
|
278
|
+
)
|
|
279
|
+
else
|
|
280
|
+
is_concatenated = false
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
return [nil, nil, false] unless is_concatenated
|
|
284
|
+
end
|
|
256
285
|
|
|
257
286
|
value = true
|
|
258
287
|
end
|
|
@@ -307,7 +336,7 @@ module JsonMend
|
|
|
307
336
|
|
|
308
337
|
# If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
|
|
309
338
|
# treat it as nil (null) for object values to be safe.
|
|
310
|
-
value == JSON_STOP_TOKEN ?
|
|
339
|
+
value == JSON_STOP_TOKEN ? '' : value
|
|
311
340
|
end
|
|
312
341
|
|
|
313
342
|
# Encapsulates the logic for merging an array that appears without a key.
|
|
@@ -552,6 +581,7 @@ module JsonMend
|
|
|
552
581
|
)
|
|
553
582
|
char = peek_char
|
|
554
583
|
unmatched_delimiter = false
|
|
584
|
+
safe_string_until = -1 # Fast-forward pointer to safely bypass O(N^2) lookaheads
|
|
555
585
|
# --- Main Parsing Loop ---
|
|
556
586
|
while !@scanner.eos? && char != rstring_delimiter
|
|
557
587
|
# Fast-path for unquoted keys (e.g. { key: val })
|
|
@@ -570,30 +600,33 @@ module JsonMend
|
|
|
570
600
|
missing_quotes:
|
|
571
601
|
)
|
|
572
602
|
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
603
|
+
# Bypass expensive comma/bracket checks because we already validated this segment extends to the next quote
|
|
604
|
+
if @scanner.pos > safe_string_until
|
|
605
|
+
if current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char) &&
|
|
606
|
+
(string_parts.empty? || string_parts.last != rstring_delimiter)
|
|
607
|
+
|
|
608
|
+
is_break = check_rstring_delimiter_missing(
|
|
609
|
+
string_parts:,
|
|
610
|
+
lstring_delimiter:,
|
|
611
|
+
rstring_delimiter:,
|
|
612
|
+
missing_quotes:
|
|
613
|
+
)
|
|
614
|
+
break if is_break
|
|
615
|
+
end
|
|
584
616
|
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
617
|
+
if char == ']' && context_contain?(:array) && string_parts.last != rstring_delimiter
|
|
618
|
+
i = skip_to_character(rstring_delimiter)
|
|
619
|
+
# No delimiter found
|
|
620
|
+
break unless peek_char(i)
|
|
621
|
+
end
|
|
590
622
|
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
623
|
+
if current_context?(:object_value) && char == '}'
|
|
624
|
+
# We found the end of an object while parsing a value
|
|
625
|
+
# Check if the object is really over, to avoid doubling the closing brace
|
|
626
|
+
i = skip_whitespaces_at(start_idx: 1)
|
|
627
|
+
next_c = peek_char(i)
|
|
628
|
+
break unless next_c
|
|
629
|
+
end
|
|
597
630
|
end
|
|
598
631
|
|
|
599
632
|
string_parts << char
|
|
@@ -611,7 +644,7 @@ module JsonMend
|
|
|
611
644
|
end
|
|
612
645
|
|
|
613
646
|
# If we are in object key context and we find a colon, it could be a missing right quote
|
|
614
|
-
if char == ':' && !missing_quotes && current_context?(:object_key)
|
|
647
|
+
if @scanner.pos > safe_string_until && char == ':' && !missing_quotes && current_context?(:object_key)
|
|
615
648
|
is_break = handle_missing_quotes_termination(
|
|
616
649
|
lstring_delimiter:,
|
|
617
650
|
rstring_delimiter:
|
|
@@ -637,6 +670,8 @@ module JsonMend
|
|
|
637
670
|
string_parts << char.to_s
|
|
638
671
|
@scanner.getch
|
|
639
672
|
char = peek_char
|
|
673
|
+
|
|
674
|
+
safe_string_until = @scanner.pos + skip_to_character(rstring_delimiter)
|
|
640
675
|
end
|
|
641
676
|
end
|
|
642
677
|
end
|
|
@@ -660,12 +695,17 @@ module JsonMend
|
|
|
660
695
|
return false unless missing_quotes && current_context?(:object_value)
|
|
661
696
|
|
|
662
697
|
i = 1
|
|
663
|
-
|
|
664
|
-
|
|
698
|
+
saved_pos = @scanner.pos
|
|
699
|
+
@scanner.getch # Skip char at offset 0
|
|
700
|
+
|
|
701
|
+
while (next_c = @scanner.getch)
|
|
702
|
+
break if [rstring_delimiter, lstring_delimiter].include?(next_c)
|
|
703
|
+
|
|
665
704
|
i += 1
|
|
666
|
-
next_c = peek_char(i)
|
|
667
705
|
end
|
|
668
706
|
|
|
707
|
+
@scanner.pos = saved_pos
|
|
708
|
+
|
|
669
709
|
return false unless next_c
|
|
670
710
|
|
|
671
711
|
# We found a quote, now let's make sure there's a ":" following
|
|
@@ -674,27 +714,28 @@ module JsonMend
|
|
|
674
714
|
i = skip_whitespaces_at(start_idx: i)
|
|
675
715
|
next_c = peek_char(i)
|
|
676
716
|
|
|
677
|
-
if next_c && next_c == ':'
|
|
678
|
-
@scanner.pos -= 1
|
|
679
|
-
return true
|
|
680
|
-
end
|
|
717
|
+
return true if next_c && next_c == ':'
|
|
681
718
|
|
|
682
719
|
false
|
|
683
720
|
end
|
|
684
721
|
|
|
685
722
|
def determine_complex_delimiter_action(lstring_delimiter, rstring_delimiter)
|
|
723
|
+
saved_pos = @scanner.pos
|
|
724
|
+
@scanner.getch # Skip char at offset 0
|
|
725
|
+
|
|
686
726
|
i = 1
|
|
687
|
-
next_c = peek_char(i)
|
|
688
727
|
check_comma_in_object_value = true
|
|
689
728
|
|
|
690
729
|
# Check if eventually there is a rstring delimiter, otherwise we bail
|
|
691
|
-
while next_c
|
|
730
|
+
while (next_c = @scanner.getch)
|
|
731
|
+
break if [rstring_delimiter, lstring_delimiter].include?(next_c)
|
|
732
|
+
|
|
692
733
|
# This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
|
|
693
734
|
# This is because the routine after will make sure to correct any bad guess and this solves a corner case
|
|
694
735
|
check_comma_in_object_value = false if check_comma_in_object_value && next_c.match?(/\p{L}/)
|
|
736
|
+
|
|
695
737
|
# If we are in an object context, let's check for the right delimiters
|
|
696
|
-
if (context_contain?(:
|
|
697
|
-
(context_contain?(:object_value) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
|
|
738
|
+
if (context_contain?(:object) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
|
|
698
739
|
(context_contain?(:array) && TERMINATORS_ARRAY_ITEM.include?(next_c)) ||
|
|
699
740
|
(
|
|
700
741
|
check_comma_in_object_value &&
|
|
@@ -705,9 +746,11 @@ module JsonMend
|
|
|
705
746
|
end
|
|
706
747
|
|
|
707
748
|
i += 1
|
|
708
|
-
next_c = peek_char(i)
|
|
709
749
|
end
|
|
710
750
|
|
|
751
|
+
@scanner.pos = saved_pos
|
|
752
|
+
next_c = peek_char(i)
|
|
753
|
+
|
|
711
754
|
# If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
|
|
712
755
|
if next_c == ',' && current_context?(:object_value)
|
|
713
756
|
i += 1
|
|
@@ -719,8 +762,8 @@ module JsonMend
|
|
|
719
762
|
next_c = peek_char(i)
|
|
720
763
|
return [true, false] if TERMINATORS_OBJECT_VALUE.include?(next_c)
|
|
721
764
|
elsif next_c == rstring_delimiter && peek_char(i - 1) != '\\'
|
|
722
|
-
# Check if self.index:self.index+i is only whitespaces
|
|
723
|
-
return [false, false] if (1
|
|
765
|
+
# Check if self.index:self.index+i is only whitespaces
|
|
766
|
+
return [false, false] if skip_whitespaces_at(start_idx: 1) >= i
|
|
724
767
|
|
|
725
768
|
if current_context?(:object_value)
|
|
726
769
|
return check_unmatched_in_object_value(index: i, lstring_delimiter:, rstring_delimiter:)
|
|
@@ -747,23 +790,30 @@ module JsonMend
|
|
|
747
790
|
next_c = peek_char(index)
|
|
748
791
|
return [true, false] if next_c == ':'
|
|
749
792
|
end
|
|
793
|
+
|
|
750
794
|
# We found a delimiter and we need to check if this is a key
|
|
751
795
|
# so find a rstring_delimiter and a colon after
|
|
752
796
|
index = skip_to_character(rstring_delimiter, start_idx: index + 1)
|
|
753
797
|
index += 1
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
798
|
+
|
|
799
|
+
saved_pos = @scanner.pos
|
|
800
|
+
index.times { @scanner.getch } # Advance to starting index safely
|
|
801
|
+
|
|
802
|
+
while (next_c = @scanner.getch)
|
|
803
|
+
break if next_c == ':'
|
|
804
|
+
|
|
805
|
+
# Safely determine if the previous character was a backslash, guarding against multibyte characters
|
|
806
|
+
prev_byte_idx = @scanner.pos - next_c.bytesize - 1
|
|
807
|
+
is_escaped = prev_byte_idx >= 0 && @scanner.string.getbyte(prev_byte_idx) == 92 # 92 is backslash
|
|
808
|
+
|
|
809
|
+
break if TERMINATORS_VALUE.include?(next_c) || (next_c == rstring_delimiter && !is_escaped)
|
|
762
810
|
|
|
763
811
|
index += 1
|
|
764
|
-
next_c = peek_char(index)
|
|
765
812
|
end
|
|
766
813
|
|
|
814
|
+
@scanner.pos = saved_pos
|
|
815
|
+
next_c = peek_char(index)
|
|
816
|
+
|
|
767
817
|
# Only if we fail to find a ':' then we know this is misplaced quote
|
|
768
818
|
return [true, true] if next_c != ':'
|
|
769
819
|
|
|
@@ -772,21 +822,19 @@ module JsonMend
|
|
|
772
822
|
|
|
773
823
|
def check_unmatched_in_array(rstring_delimiter:)
|
|
774
824
|
saved_pos = @scanner.pos
|
|
825
|
+
|
|
775
826
|
@scanner.getch # Skip the current char (the potential closer)
|
|
827
|
+
pos_after_first_quote = @scanner.pos # Safely records offset even if quote was a multibyte smart quote
|
|
776
828
|
|
|
777
829
|
found_next = false
|
|
778
|
-
j = 1
|
|
779
830
|
|
|
780
831
|
# Scan forward linearly
|
|
781
832
|
while (c = @scanner.getch)
|
|
782
|
-
j += 1
|
|
783
833
|
next if c != rstring_delimiter
|
|
784
834
|
|
|
785
835
|
# Check if escaped (count preceding backslashes)
|
|
786
|
-
# We need to look behind from the current scanner position
|
|
787
836
|
bk = 1
|
|
788
837
|
slashes = 0
|
|
789
|
-
# Look back in the string buffer directly for speed
|
|
790
838
|
while (@scanner.pos - 1 - bk >= 0) &&
|
|
791
839
|
(char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) &&
|
|
792
840
|
char_code == 92 # 92 is backslash
|
|
@@ -800,22 +848,26 @@ module JsonMend
|
|
|
800
848
|
end
|
|
801
849
|
end
|
|
802
850
|
|
|
851
|
+
# Record exact byte position after we found the next valid quote
|
|
852
|
+
pos_after_second_quote = @scanner.pos
|
|
853
|
+
pos_before_second_quote = found_next ? pos_after_second_quote - rstring_delimiter.bytesize : @scanner.pos
|
|
854
|
+
|
|
803
855
|
# Reset position immediately after scanning
|
|
804
856
|
@scanner.pos = saved_pos
|
|
805
857
|
|
|
806
858
|
# Check conditions to STOP (treat as closing quote):
|
|
807
859
|
# a) Strictly whitespace between quotes
|
|
808
|
-
|
|
809
|
-
|
|
860
|
+
byte_length = pos_before_second_quote - pos_after_first_quote
|
|
861
|
+
byte_length = 0 if byte_length.negative?
|
|
862
|
+
|
|
863
|
+
substring_between = @scanner.string.byteslice(pos_after_first_quote, byte_length)
|
|
810
864
|
is_whitespace = substring_between&.match?(/\A\s*\z/)
|
|
811
865
|
|
|
812
866
|
# b) Next quote is followed by a separator
|
|
813
867
|
is_next_closer = false
|
|
814
868
|
if found_next
|
|
815
|
-
#
|
|
816
|
-
|
|
817
|
-
# OR better, temporarily move scanner to `saved_pos + j`
|
|
818
|
-
@scanner.pos = saved_pos + j
|
|
869
|
+
# Jump directly to the exact byte offset after the second quote!
|
|
870
|
+
@scanner.pos = pos_after_second_quote
|
|
819
871
|
@scanner.skip(/\s+/)
|
|
820
872
|
is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
|
|
821
873
|
@scanner.pos = saved_pos
|
|
@@ -844,7 +896,8 @@ module JsonMend
|
|
|
844
896
|
next_c = peek_char(i)
|
|
845
897
|
|
|
846
898
|
is_gap_clean = true
|
|
847
|
-
is_gap_clean = (1
|
|
899
|
+
is_gap_clean = skip_whitespaces_at(start_idx: 1) >= i if missing_quotes && next_c
|
|
900
|
+
|
|
848
901
|
if next_c && is_gap_clean
|
|
849
902
|
i += 1
|
|
850
903
|
# found a delimiter, now we need to check that is followed strictly by a comma or brace
|
|
@@ -1047,8 +1100,22 @@ module JsonMend
|
|
|
1047
1100
|
missing_quotes:
|
|
1048
1101
|
)
|
|
1049
1102
|
return false unless missing_quotes
|
|
1050
|
-
|
|
1051
|
-
|
|
1103
|
+
|
|
1104
|
+
if current_context?(:object_key)
|
|
1105
|
+
return true if char == ':' || char.match?(/\s/) || TERMINATORS_ARRAY.include?(char)
|
|
1106
|
+
|
|
1107
|
+
if char == ','
|
|
1108
|
+
# Break on comma UNLESS it looks like part of a number format (e.g., 105,12)
|
|
1109
|
+
# We check if the comma is flanked by digits on both sides
|
|
1110
|
+
prev_byte = @scanner.pos.positive? ? @scanner.string.getbyte(@scanner.pos - 1) : nil
|
|
1111
|
+
next_char = peek_char(1)
|
|
1112
|
+
# Check if the previous byte is ASCII '0' to '9' (bytes 48 to 57)
|
|
1113
|
+
is_number_comma = prev_byte&.between?(48, 57) && next_char&.match?(/\d/)
|
|
1114
|
+
|
|
1115
|
+
return true unless is_number_comma
|
|
1116
|
+
end
|
|
1117
|
+
end
|
|
1118
|
+
|
|
1052
1119
|
return true if current_context?(:array) && TERMINATORS_ARRAY_ITEM.include?(char)
|
|
1053
1120
|
|
|
1054
1121
|
false
|
|
@@ -1065,6 +1132,9 @@ module JsonMend
|
|
|
1065
1132
|
scanned_str = @scanner.scan(regex)
|
|
1066
1133
|
return nil unless scanned_str
|
|
1067
1134
|
|
|
1135
|
+
# Save the original length so we can safely roll back if it's completely invalid
|
|
1136
|
+
original_length = scanned_str.bytesize
|
|
1137
|
+
|
|
1068
1138
|
# Handle cases where the number ends with an invalid character.
|
|
1069
1139
|
if !scanned_str.empty? && INVALID_NUMBER_TRAILERS.include?(scanned_str[-1])
|
|
1070
1140
|
# Do not rewind scanner, simply discard the invalid trailing char (garbage)
|
|
@@ -1073,42 +1143,47 @@ module JsonMend
|
|
|
1073
1143
|
# e.g. "123-abc"
|
|
1074
1144
|
elsif peek_char&.match?(/\p{L}/)
|
|
1075
1145
|
# Roll back the entire scan and re-parse as a string.
|
|
1076
|
-
@scanner.pos -=
|
|
1146
|
+
@scanner.pos -= original_length
|
|
1077
1147
|
return parse_string
|
|
1078
1148
|
end
|
|
1079
1149
|
|
|
1150
|
+
# Reject non-numbers (e.g., stray periods "." or dashes "-" from LLM conversational text)
|
|
1151
|
+
unless scanned_str.match?(/\d/)
|
|
1152
|
+
@scanner.pos -= original_length
|
|
1153
|
+
return ''
|
|
1154
|
+
end
|
|
1155
|
+
|
|
1080
1156
|
# Sometimes numbers are followed by a quote, which is garbage
|
|
1081
1157
|
@scanner.getch if peek_char == '"'
|
|
1082
1158
|
|
|
1083
1159
|
# Attempt to convert the string to the appropriate number type.
|
|
1084
|
-
#
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
|
|
1104
|
-
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1108
|
-
|
|
1109
|
-
|
|
1110
|
-
|
|
1111
|
-
end
|
|
1160
|
+
# Fix for Ruby < 3.4: "1." is not a valid float.
|
|
1161
|
+
# If it ends with '.', we strip the dot and force Float conversion
|
|
1162
|
+
# to ensure "1." becomes 1.0 (Float) instead of 1 (Integer).
|
|
1163
|
+
result = if scanned_str.end_with?('.')
|
|
1164
|
+
Float(scanned_str[0...-1], exception: false)
|
|
1165
|
+
elsif scanned_str.include?(',')
|
|
1166
|
+
# Check if commas are being used as thousands separators (e.g., 1,234 or 1,234,567.89)
|
|
1167
|
+
if scanned_str.count(',') > 1 || scanned_str.match?(/,\d{3}(?:\.\d+)?$/)
|
|
1168
|
+
cleaned = scanned_str.delete(',')
|
|
1169
|
+
if cleaned.match?(/[.eE]/)
|
|
1170
|
+
Float(cleaned, exception: false)
|
|
1171
|
+
else
|
|
1172
|
+
Integer(cleaned, 10, exception: false)
|
|
1173
|
+
end
|
|
1174
|
+
else
|
|
1175
|
+
# Treat single comma as a decimal point (European style, e.g., 1,5 -> 1.5)
|
|
1176
|
+
Float(scanned_str.tr(',', '.'), exception: false)
|
|
1177
|
+
end
|
|
1178
|
+
elsif scanned_str.match?(/[.eE]/)
|
|
1179
|
+
Float(scanned_str, exception: false)
|
|
1180
|
+
else
|
|
1181
|
+
Integer(scanned_str, 10, exception: false)
|
|
1182
|
+
end
|
|
1183
|
+
|
|
1184
|
+
return scanned_str if result.is_a?(Float) && (result.infinite? || result.nan?)
|
|
1185
|
+
|
|
1186
|
+
result || scanned_str
|
|
1112
1187
|
end
|
|
1113
1188
|
|
|
1114
1189
|
# Parses the JSON literals `true`, `false`, or `null`.
|
|
@@ -1142,23 +1217,26 @@ module JsonMend
|
|
|
1142
1217
|
# Check for a line comment `//...` or `#...`
|
|
1143
1218
|
elsif @scanner.scan(%r{//|#})
|
|
1144
1219
|
in_array = context_contain?(:array)
|
|
1145
|
-
in_object = context_contain?(:
|
|
1146
|
-
|
|
1147
|
-
if context_contain?(:object_key)
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1220
|
+
in_object = context_contain?(:object)
|
|
1221
|
+
|
|
1222
|
+
pattern = if context_contain?(:object_key)
|
|
1223
|
+
/[\n\r:}\]]|\\n|\\r/
|
|
1224
|
+
elsif in_array && in_object
|
|
1225
|
+
/[\n\r}\]]|\\n|\\r/
|
|
1226
|
+
elsif in_array
|
|
1227
|
+
/[\n\r\]]|\\n|\\r/
|
|
1228
|
+
elsif in_object
|
|
1229
|
+
/[\n\r}]|\\n|\\r/
|
|
1230
|
+
else
|
|
1231
|
+
/[\n\r]|\\n|\\r/
|
|
1232
|
+
end
|
|
1233
|
+
|
|
1234
|
+
if (text = @scanner.scan_until(pattern))
|
|
1235
|
+
# Un-consume the terminator so it can be handled structurally
|
|
1236
|
+
terminator_size = text.end_with?('\\n', '\\r') ? 2 : 1
|
|
1237
|
+
@scanner.pos -= terminator_size
|
|
1159
1238
|
else
|
|
1160
|
-
|
|
1161
|
-
@scanner.scan_until(/(?=[\n\r]|\\n|\\r)/) || @scanner.terminate
|
|
1239
|
+
@scanner.terminate
|
|
1162
1240
|
end
|
|
1163
1241
|
|
|
1164
1242
|
# Consume literal escaped newlines so they don't break subsequent parsing.
|
data/lib/json_mend/version.rb
CHANGED
data/lib/json_mend.rb
CHANGED
|
@@ -24,7 +24,7 @@ module JsonMend
|
|
|
24
24
|
|
|
25
25
|
# Verify the native parser didn't produce invalid UTF-8 (like unpaired surrogates)
|
|
26
26
|
# by ensuring it can safely dump its own output.
|
|
27
|
-
JSON.
|
|
27
|
+
JSON.generate(parsed)
|
|
28
28
|
|
|
29
29
|
parsed
|
|
30
30
|
rescue JSON::ParserError, JSON::GeneratorError
|