RubyGems - json_mend - Versions diffs - 0.2.2 → 0.3.1 - Mend

json_mend 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 132fa0fa5489f3d26c01bbb256ed9abbc2af84bf5e39da35d3ef2aa62d610154
-  data.tar.gz: 3dbf1c1d09ebfdd61a07e51465e3239d7f069de57853c26a540349638fdce4b6
+  metadata.gz: cef80671ecf0296d0b0742bb94342c93a9cdf52e5def2ee4592a11f473ed6d6b
+  data.tar.gz: 831e6b821e39dff4adcf605e4f0beb53500ca846ac02a96e8c49e9822c7b1475
 SHA512:
-  metadata.gz: ce3494e2d0a6c35de7828051050a35a022ecba2659d93d814b455cab38f6bc7c3128ba2751789663e66bd19ac7a8598f63681489705578aa101d9ef543d3fa17
-  data.tar.gz: d469f040577069840fb06cfad141793c4a17d6e4113705205e3045ebe3d75669e0b5d190a7221fcc3a8909a5a4217cbb3ad37f9fd0a53d8a4fe855e744fcf528
+  metadata.gz: a43f2089605d476414103b4333fe7d248856f27d0a8589041d7826905c611619a18fe45841dcf8e97bdfae90cfb6718d39323d0839c8fe97dbb584a54db4712f
+  data.tar.gz: 65c72349a8a6b721ef0f1a077bb69f82fe36fb499aa30da57f36fbc0dddfd4a7129082ffc32f940569af876fa8f43d4f626507652f5480b06fc7717b648cf418

data/.rubocop.yml CHANGED Viewed

@@ -7,22 +7,22 @@ AllCops:
   SuggestExtensions: false
 Metrics/AbcSize:
-  Max: 70
+  Max: 78
 Metrics/ClassLength:
-  Max: 900
+  Max: 950
 Metrics/CyclomaticComplexity:
-  Max: 35
+  Max: 36
 Metrics/MethodLength:
-  Max: 80
+  Max: 85
 Metrics/BlockLength:
   Max: 40
 Metrics/PerceivedComplexity:
-  Max: 35
+  Max: 37
 Metrics/BlockNesting:
   Max: 8

data/lib/json_mend/parser.rb CHANGED Viewed

@@ -73,7 +73,7 @@ module JsonMend
             next if new_json.is_a?(String) && new_json.strip.match?(/^[}\]]+$/)
             if both_hash?(json.last, new_json)
-              deep_merge_hashes!(json.last, new_json)
+              json[-1] = deep_merge_hashes(json.last, new_json)
             else
               json << new_json
             end
@@ -97,22 +97,30 @@ module JsonMend
       @depth -= 1
     end
-    def deep_merge_hashes!(target, source)
+    def deep_merge_hashes(target, source, current_depth = 0)
+      raise JSON::NestingError, "merge nesting of #{current_depth} is too deep" if current_depth > MAX_ALLOWED_DEPTH
+      result = target.dup
       source.each do |key, new_val|
-        if target.key?(key)
-          old_val = target[key]
-          if old_val.is_a?(Hash) && new_val.is_a?(Hash)
-            deep_merge_hashes!(old_val, new_val)
-          elsif old_val.is_a?(Array) && new_val.is_a?(Array)
-            target[key] = old_val + new_val
-          else
-            target[key] = new_val
-          end
+        if result.key?(key)
+          old_val = result[key]
+          result[key] = if old_val.is_a?(Hash) && new_val.is_a?(Hash)
+                          deep_merge_hashes(old_val, new_val, current_depth + 1)
+                        elsif old_val.is_a?(Array) && new_val.is_a?(Array)
+                          old_val + new_val
+                        elsif old_val.is_a?(Array)
+                          old_val + [new_val]
+                        elsif new_val.is_a?(Array)
+                          [old_val] + new_val
+                        else
+                          # If primitives collide, preserve both in an array unless identical
+                          old_val == new_val ? old_val : [old_val, new_val]
+                        end
         else
-          target[key] = new_val
+          result[key] = new_val
         end
       end
-      target
+      result
     end
     def parse_json
@@ -148,7 +156,7 @@ module JsonMend
           else
             # Stop if we hit a terminator for the current context to avoid consuming it as garbage
             if (current_context?(:array) && char == ']') ||
-               (current_context?(:object_value) && char == '}') ||
+               (current_context?(:object_value) && TERMINATORS_OBJECT_VALUE.include?(char)) ||
                (current_context?(:object_key) && char == '}')
               return JSON_STOP_TOKEN
             end
@@ -165,6 +173,8 @@ module JsonMend
       with_depth_check do
         object = {}
+        @context.push(:object)
         loop do
           skip_whitespaces
@@ -207,6 +217,8 @@ module JsonMend
           object[key] = value
         end
+        @context.pop
         object
       end
     end
@@ -252,7 +264,24 @@ module JsonMend
       value = parse_object_value(colon_found: colon_found || is_bracketed)
       if value == :inferred_true
-        return [nil, nil, false] if %w[true false null].include?(key.downcase)
+        if %w[true false null].include?(key.downcase)
+          # Look back: If it's concatenated to the previous value (like falsetrue), keep it.
+          # If it's separated by space/delimiters, it's trailing garbage, so drop it.
+          if pos_before_key.positive?
+            prev_byte = @scanner.string.getbyte(pos_before_key - 1)
+            # Check ASCII byte ranges for a-z, A-Z, 0-9, $, -, and _
+            is_concatenated = prev_byte && (
+              prev_byte.between?(48, 57)  || # 0-9
+              prev_byte.between?(65, 90)  || # A-Z
+              prev_byte.between?(97, 122) || # a-z
+              [36, 45, 95].include?(prev_byte) # $, -, _
+            )
+          else
+            is_concatenated = false
+          end
+          return [nil, nil, false] unless is_concatenated
+        end
         value = true
       end
@@ -307,7 +336,7 @@ module JsonMend
       # If parse_json returned JSON_STOP_TOKEN (nothing found due to garbage->terminator),
       # treat it as nil (null) for object values to be safe.
-      value == JSON_STOP_TOKEN ? nil : value
+      value == JSON_STOP_TOKEN ? '' : value
     end
     # Encapsulates the logic for merging an array that appears without a key.
@@ -660,12 +689,17 @@ module JsonMend
       return false unless missing_quotes && current_context?(:object_value)
       i = 1
-      next_c = peek_char(i)
-      while next_c && ![rstring_delimiter, lstring_delimiter].include?(next_c)
+      saved_pos = @scanner.pos
+      @scanner.getch # Skip char at offset 0
+      while (next_c = @scanner.getch)
+        break if [rstring_delimiter, lstring_delimiter].include?(next_c)
         i += 1
-        next_c = peek_char(i)
       end
+      @scanner.pos = saved_pos
       return false unless next_c
       # We found a quote, now let's make sure there's a ":" following
@@ -674,27 +708,28 @@ module JsonMend
       i = skip_whitespaces_at(start_idx: i)
       next_c = peek_char(i)
-      if next_c && next_c == ':'
-        @scanner.pos -= 1
-        return true
-      end
+      return true if next_c && next_c == ':'
       false
     end
     def determine_complex_delimiter_action(lstring_delimiter, rstring_delimiter)
+      saved_pos = @scanner.pos
+      @scanner.getch # Skip char at offset 0
       i = 1
-      next_c = peek_char(i)
       check_comma_in_object_value = true
       # Check if eventually there is a rstring delimiter, otherwise we bail
-      while next_c && ![rstring_delimiter, lstring_delimiter].include?(next_c)
+      while (next_c = @scanner.getch)
+        break if [rstring_delimiter, lstring_delimiter].include?(next_c)
         # This is a bit of a weird workaround, essentially in object_value context we don't always break on commas
         # This is because the routine after will make sure to correct any bad guess and this solves a corner case
         check_comma_in_object_value = false if check_comma_in_object_value && next_c.match?(/\p{L}/)
         # If we are in an object context, let's check for the right delimiters
-        if (context_contain?(:object_key) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
-           (context_contain?(:object_value) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
+        if (context_contain?(:object) && TERMINATORS_OBJECT_KEY.include?(next_c)) ||
            (context_contain?(:array) && TERMINATORS_ARRAY_ITEM.include?(next_c)) ||
            (
              check_comma_in_object_value &&
@@ -705,9 +740,11 @@ module JsonMend
         end
         i += 1
-        next_c = peek_char(i)
       end
+      @scanner.pos = saved_pos
+      next_c = peek_char(i)
       # If we stopped for a comma in object_value context, let's check if find a "} at the end of the string
       if next_c == ',' && current_context?(:object_value)
         i += 1
@@ -719,8 +756,8 @@ module JsonMend
         next_c = peek_char(i)
         return [true, false] if TERMINATORS_OBJECT_VALUE.include?(next_c)
       elsif next_c == rstring_delimiter && peek_char(i - 1) != '\\'
-        # Check if self.index:self.index+i is only whitespaces, break if that's the case
-        return [false, false] if (1..i).all? { |j| peek_char(j).to_s.match(/\s/) }
+        # Check if self.index:self.index+i is only whitespaces
+        return [false, false] if skip_whitespaces_at(start_idx: 1) >= i
         if current_context?(:object_value)
           return check_unmatched_in_object_value(index: i, lstring_delimiter:, rstring_delimiter:)
@@ -747,23 +784,30 @@ module JsonMend
         next_c = peek_char(index)
         return [true, false] if next_c == ':'
       end
       # We found a delimiter and we need to check if this is a key
       # so find a rstring_delimiter and a colon after
       index = skip_to_character(rstring_delimiter, start_idx: index + 1)
       index += 1
-      next_c = peek_char(index)
-      while next_c && next_c != ':'
-        if TERMINATORS_VALUE.include?(next_c) || (
-          next_c == rstring_delimiter &&
-          peek_char(index - 1) != '\\'
-        )
-          break
-        end
+      saved_pos = @scanner.pos
+      index.times { @scanner.getch } # Advance to starting index safely
+      while (next_c = @scanner.getch)
+        break if next_c == ':'
+        # Safely determine if the previous character was a backslash, guarding against multibyte characters
+        prev_byte_idx = @scanner.pos - next_c.bytesize - 1
+        is_escaped = prev_byte_idx >= 0 && @scanner.string.getbyte(prev_byte_idx) == 92 # 92 is backslash
+        break if TERMINATORS_VALUE.include?(next_c) || (next_c == rstring_delimiter && !is_escaped)
         index += 1
-        next_c = peek_char(index)
       end
+      @scanner.pos = saved_pos
+      next_c = peek_char(index)
       # Only if we fail to find a ':' then we know this is misplaced quote
       return [true, true] if next_c != ':'
@@ -772,21 +816,19 @@ module JsonMend
     def check_unmatched_in_array(rstring_delimiter:)
       saved_pos = @scanner.pos
       @scanner.getch # Skip the current char (the potential closer)
+      pos_after_first_quote = @scanner.pos # Safely records offset even if quote was a multibyte smart quote
       found_next = false
-      j = 1
       # Scan forward linearly
       while (c = @scanner.getch)
-        j += 1
         next if c != rstring_delimiter
         # Check if escaped (count preceding backslashes)
-        # We need to look behind from the current scanner position
         bk = 1
         slashes = 0
-        # Look back in the string buffer directly for speed
         while (@scanner.pos - 1 - bk >= 0) &&
               (char_code = @scanner.string.getbyte(@scanner.pos - 1 - bk)) &&
               char_code == 92 # 92 is backslash
@@ -800,22 +842,26 @@ module JsonMend
         end
       end
+      # Record exact byte position after we found the next valid quote
+      pos_after_second_quote = @scanner.pos
+      pos_before_second_quote = found_next ? pos_after_second_quote - rstring_delimiter.bytesize : @scanner.pos
       # Reset position immediately after scanning
       @scanner.pos = saved_pos
       # Check conditions to STOP (treat as closing quote):
       # a) Strictly whitespace between quotes
-      # We can check this by examining the substring we just scanned
-      substring_between = @scanner.string.byteslice(saved_pos + 1, j - 2)
+      byte_length = pos_before_second_quote - pos_after_first_quote
+      byte_length = 0 if byte_length.negative?
+      substring_between = @scanner.string.byteslice(pos_after_first_quote, byte_length)
       is_whitespace = substring_between&.match?(/\A\s*\z/)
       # b) Next quote is followed by a separator
       is_next_closer = false
       if found_next
-        # We need to peek ahead from where we found the next quote.
-        # Since we reset the scanner, we can use peek_char with the calculated offset `j`
-        # OR better, temporarily move scanner to `saved_pos + j`
-        @scanner.pos = saved_pos + j
+        # Jump directly to the exact byte offset after the second quote!
+        @scanner.pos = pos_after_second_quote
         @scanner.skip(/\s+/)
         is_next_closer = TERMINATORS_VALUE.include?(@scanner.check(/./))
         @scanner.pos = saved_pos
@@ -844,7 +890,8 @@ module JsonMend
       next_c = peek_char(i)
       is_gap_clean = true
-      is_gap_clean = (1...i).all? { |k| peek_char(k)&.match?(/\s/) } if missing_quotes && next_c
+      is_gap_clean = skip_whitespaces_at(start_idx: 1) >= i if missing_quotes && next_c
       if next_c && is_gap_clean
         i += 1
         # found a delimiter, now we need to check that is followed strictly by a comma or brace
@@ -1047,8 +1094,22 @@ module JsonMend
       missing_quotes:
     )
       return false unless missing_quotes
-      return true if current_context?(:object_key) && (char == ':' || char.match?(/\s/))
-      return true if current_context?(:object_key) && TERMINATORS_ARRAY.include?(char)
+      if current_context?(:object_key)
+        return true if char == ':' || char.match?(/\s/) || TERMINATORS_ARRAY.include?(char)
+        if char == ','
+          # Break on comma UNLESS it looks like part of a number format (e.g., 105,12)
+          # We check if the comma is flanked by digits on both sides
+          prev_byte = @scanner.pos.positive? ? @scanner.string.getbyte(@scanner.pos - 1) : nil
+          next_char = peek_char(1)
+          # Check if the previous byte is ASCII '0' to '9' (bytes 48 to 57)
+          is_number_comma = prev_byte&.between?(48, 57) && next_char&.match?(/\d/)
+          return true unless is_number_comma
+        end
+      end
       return true if current_context?(:array) && TERMINATORS_ARRAY_ITEM.include?(char)
       false
@@ -1090,34 +1151,33 @@ module JsonMend
       @scanner.getch if peek_char == '"'
       # Attempt to convert the string to the appropriate number type.
-      # Use rescue to handle conversion errors gracefully, returning the original string.
-      begin
-        # Fix for Ruby < 3.4: "1." is not a valid float.
-        # If it ends with '.', we strip the dot and force Float conversion
-        # to ensure "1." becomes 1.0 (Float) instead of 1 (Integer).
-        if scanned_str.end_with?('.')
-          Float(scanned_str[0...-1])
-        elsif scanned_str.include?(',')
-          # Check if commas are being used as thousands separators (e.g., 1,234 or 1,234,567.89)
-          if scanned_str.count(',') > 1 || scanned_str.match?(/,\d{3}(?:\.\d+)?$/)
-            cleaned = scanned_str.delete(',')
-            if cleaned.match?(/[.eE]/)
-              Float(cleaned)
-            else
-              Integer(cleaned, 10)
-            end
-          else
-            # Treat single comma as a decimal point (European style, e.g., 1,5 -> 1.5)
-            Float(scanned_str.tr(',', '.'))
-          end
-        elsif scanned_str.match?(/[.eE]/)
-          Float(scanned_str)
-        else
-          Integer(scanned_str, 10)
-        end
-      rescue ArgumentError
-        scanned_str
-      end
+      # Fix for Ruby < 3.4: "1." is not a valid float.
+      # If it ends with '.', we strip the dot and force Float conversion
+      # to ensure "1." becomes 1.0 (Float) instead of 1 (Integer).
+      result = if scanned_str.end_with?('.')
+                 Float(scanned_str[0...-1], exception: false)
+               elsif scanned_str.include?(',')
+                 # Check if commas are being used as thousands separators (e.g., 1,234 or 1,234,567.89)
+                 if scanned_str.count(',') > 1 || scanned_str.match?(/,\d{3}(?:\.\d+)?$/)
+                   cleaned = scanned_str.delete(',')
+                   if cleaned.match?(/[.eE]/)
+                     Float(cleaned, exception: false)
+                   else
+                     Integer(cleaned, 10, exception: false)
+                   end
+                 else
+                   # Treat single comma as a decimal point (European style, e.g., 1,5 -> 1.5)
+                   Float(scanned_str.tr(',', '.'), exception: false)
+                 end
+               elsif scanned_str.match?(/[.eE]/)
+                 Float(scanned_str, exception: false)
+               else
+                 Integer(scanned_str, 10, exception: false)
+               end
+      return scanned_str if result.is_a?(Float) && (result.infinite? || result.nan?)
+      result || scanned_str
     end
     # Parses the JSON literals `true`, `false`, or `null`.
@@ -1151,23 +1211,26 @@ module JsonMend
       # Check for a line comment `//...` or `#...`
       elsif @scanner.scan(%r{//|#})
         in_array = context_contain?(:array)
-        in_object = context_contain?(:object_value)
-        if context_contain?(:object_key)
-          # If parsing a key, we must stop at ':' and structural closers
-          @scanner.scan_until(/(?=[\n\r:}\]]|\\n|\\r)/) || @scanner.terminate
-        elsif in_array && in_object
-          # Nested ambiguity, stop at any closer
-          @scanner.scan_until(/(?=[\n\r}\]]|\\n|\\r)/) || @scanner.terminate
-        elsif in_array
-          # Inside array, stop at ']'
-          @scanner.scan_until(/(?=[\n\r\]]|\\n|\\r)/) || @scanner.terminate
-        elsif in_object
-          # Inside object value, stop at '}'
-          @scanner.scan_until(/(?=[\n\r}]|\\n|\\r)/) || @scanner.terminate
+        in_object = context_contain?(:object)
+        pattern = if context_contain?(:object_key)
+                    /[\n\r:}\]]|\\n|\\r/
+                  elsif in_array && in_object
+                    /[\n\r}\]]|\\n|\\r/
+                  elsif in_array
+                    /[\n\r\]]|\\n|\\r/
+                  elsif in_object
+                    /[\n\r}]|\\n|\\r/
+                  else
+                    /[\n\r]|\\n|\\r/
+                  end
+        if (text = @scanner.scan_until(pattern))
+          # Un-consume the terminator so it can be handled structurally
+          terminator_size = text.end_with?('\\n', '\\r') ? 2 : 1
+          @scanner.pos -= terminator_size
         else
-          # Top level or neutral, stop at newline
-          @scanner.scan_until(/(?=[\n\r]|\\n|\\r)/) || @scanner.terminate
+          @scanner.terminate
         end
         # Consume literal escaped newlines so they don't break subsequent parsing.

data/lib/json_mend/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module JsonMend
-  VERSION = '0.2.2'
+  VERSION = '0.3.1'
 end

data/lib/json_mend.rb CHANGED Viewed

@@ -24,7 +24,7 @@ module JsonMend
         # Verify the native parser didn't produce invalid UTF-8 (like unpaired surrogates)
         # by ensuring it can safely dump its own output.
-        JSON.dump(parsed)
+        JSON.generate(parsed)
         parsed
       rescue JSON::ParserError, JSON::GeneratorError

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: json_mend
 version: !ruby/object:Gem::Version
-  version: 0.2.2
+  version: 0.3.1
 platform: ruby
 authors:
 - Oleksii Vasyliev