RubyGems - encoded_id - Versions diffs - 1.0.0.rc5 → 1.0.0.rc7 - Mend

encoded_id 1.0.0.rc5 → 1.0.0.rc7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +99 -3
data/README.md +86 -329
data/context/encoded_id.md +437 -0
data/lib/encoded_id/alphabet.rb +34 -3
data/lib/encoded_id/blocklist.rb +100 -0
data/lib/encoded_id/encoders/base_configuration.rb +154 -0
data/lib/encoded_id/encoders/hashid.rb +527 -0
data/lib/encoded_id/encoders/hashid_configuration.rb +40 -0
data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb +110 -0
data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb +244 -0
data/lib/encoded_id/encoders/hashid_salt.rb +51 -0
data/lib/encoded_id/encoders/my_sqids.rb +454 -0
data/lib/encoded_id/encoders/sqids.rb +59 -0
data/lib/encoded_id/encoders/sqids_configuration.rb +22 -0
data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +54 -0
data/lib/encoded_id/hex_representation.rb +29 -14
data/lib/encoded_id/reversible_id.rb +115 -82
data/lib/encoded_id/version.rb +3 -1
data/lib/encoded_id.rb +34 -4
metadata +34 -26
data/.devcontainer/Dockerfile +0 -9
data/.devcontainer/compose.yml +0 -8
data/.devcontainer/devcontainer.json +0 -8
data/.standard.yml +0 -2
data/Gemfile +0 -36
data/Rakefile +0 -20
data/Steepfile +0 -5
data/ext/encoded_id/extconf.rb +0 -3
data/ext/encoded_id/extension.c +0 -123
data/ext/encoded_id/hashids.c +0 -939
data/ext/encoded_id/hashids.h +0 -139
data/lib/encoded_id/hash_id.rb +0 -227
data/lib/encoded_id/hash_id_consistent_shuffle.rb +0 -27
data/lib/encoded_id/hash_id_salt.rb +0 -15
data/lib/encoded_id/ordinal_alphabet_separator_guards.rb +0 -90
data/rbs_collection.yaml +0 -24
data/sig/encoded_id.rbs +0 -189

data/lib/encoded_id/encoders/hashid_configuration.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module EncodedId
+  module Encoders
+    # Configuration for Hashids encoder
+    # Hashids requires a salt for encoding/decoding
+    class HashidConfiguration < BaseConfiguration
+      # @rbs @salt: String
+      attr_reader :salt
+      # @rbs (salt: String, **untyped options) -> void
+      def initialize(salt:, **options)
+        @salt = validate_salt(salt)
+        super(**options)
+      end
+      # @rbs () -> Symbol
+      def encoder_type
+        :hashids
+      end
+      # Create the Hashid encoder instance
+      # @rbs () -> Hashid
+      def create_encoder
+        Hashid.new(salt, min_length, alphabet, blocklist, blocklist_mode, blocklist_max_length)
+      end
+      private
+      # @rbs (String salt) -> String
+      def validate_salt(salt)
+        return salt if salt.is_a?(String) && salt.size > 3
+        raise InvalidConfigurationError, "salt must be a string longer than 3 characters"
+      end
+    end
+  end
+end

data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb ADDED Viewed

@@ -0,0 +1,110 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module EncodedId
+  module Encoders
+    # Implements a deterministic, salt-based shuffle algorithm for Hashids.
+    #
+    # This is the core obfuscation mechanism that makes Hashids non-sequential.
+    # The algorithm has several critical properties:
+    #
+    # 1. **Deterministic**: Same input + same salt = same output (always)
+    # 2. **Reversible**: The shuffle can be undone if needed
+    # 3. **Salt-dependent**: Different salts produce different permutations
+    # 4. **Consistent**: Multiple calls with the same salt produce the same shuffle
+    #
+    # == Algorithm Overview:
+    #
+    # The shuffle works by:
+    # - Walking backwards through the collection (from last to second element)
+    # - For each position i, selecting a swap partner j using the salt
+    # - The swap position is calculated from: (salt_char + index + running_total) % i
+    # - Cycling through salt characters, wrapping when we reach the end
+    #
+    # This is similar to a Fisher-Yates shuffle, but with deterministic swap positions
+    # derived from the salt rather than random numbers.
+    #
+    # == Why Two Salt Parts?
+    #
+    # The algorithm accepts salt in two parts (salt_part_1 and salt_part_2) to support
+    # scenarios where the salt is constructed from multiple sources:
+    # - salt_part_1: Primary salt (e.g., lottery + user salt)
+    # - salt_part_2: Secondary salt (e.g., pre-shuffle alphabet copy)
+    #
+    # When cycling through salt characters, it reads from salt_part_1 first, then
+    # salt_part_2 if the index exceeds salt_part_1's length.
+    #
+    # == Example:
+    #
+    # Input: [1, 2, 3, 4], salt: [65, 66, 67] (ABC)
+    # Step 1: i=3, salt[0]=65, ord_total=0   → swap positions 3 and ((65+0+0)%3=2)  → [1,2,4,3]
+    # Step 2: i=2, salt[1]=66, ord_total=65  → swap positions 2 and ((66+1+65)%2=0) → [4,2,1,3]
+    # Step 3: i=1, salt[2]=67, ord_total=131 → swap positions 1 and ((67+2+131)%1=0)→ [4,2,1,3]
+    # Result: [4, 2, 1, 3]
+    #
+    module HashidConsistentShuffle
+      # Deterministically shuffle a collection based on a salt.
+      #
+      # Shuffles the collection in place using a salt-based algorithm that produces
+      # consistent results for the same inputs.
+      #
+      # @param collection_to_shuffle [Array<Integer>] Array to shuffle (modified in place)
+      # @param salt_part_1 [Array<Integer>] Primary salt characters (as ordinals)
+      # @param salt_part_2 [Array<Integer>?] Optional secondary salt characters
+      # @param max_salt_length [Integer] Maximum salt length to use (for cycling)
+      # @return [Array<Integer>] The shuffled array (same object as input)
+      # @raise [SaltError] If salt is too short or shuffle fails
+      #
+      # @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
+      def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
+        salt_part_1_length = salt_part_1.length
+        # Validate we have enough salt. If max_salt_length exceeds salt_part_1,
+        # we need salt_part_2 to provide the additional characters.
+        raise SaltError, "Salt is too short in shuffle" if salt_part_1_length < max_salt_length && salt_part_2.nil?
+        # Short-circuit if there's nothing to shuffle.
+        return collection_to_shuffle if collection_to_shuffle.empty? || max_salt_length == 0 || salt_part_1.nil? || salt_part_1_length == 0
+        # idx: Current position in the salt (cycles through 0..max_salt_length-1)
+        # ord_total: Running sum of salt character ordinals (affects swap positions)
+        idx = ord_total = 0
+        # Walk backwards through the collection from last to second element.
+        # We don't shuffle the first element (i=0) because it has nowhere to swap to.
+        i = collection_to_shuffle.length - 1
+        while i >= 1
+          # Get the current salt character ordinal.
+          # If we've exceeded salt_part_1, read from salt_part_2.
+          n = if idx >= salt_part_1_length
+            raise SaltError, "Salt shuffle has failed" unless salt_part_2
+            salt_part_2[idx - salt_part_1_length]
+          else
+            salt_part_1[idx]
+          end
+          # Update running total with current salt character.
+          ord_total += n
+          # Calculate swap position deterministically from:
+          # - n: Current salt character ordinal
+          # - idx: Current position in salt
+          # - ord_total: Running sum of all salt characters used so far
+          # - i: Current position in collection (modulo to ensure valid index)
+          j = (n + idx + ord_total) % i
+          # Swap elements at positions i and j.
+          collection_to_shuffle[i], collection_to_shuffle[j] = collection_to_shuffle[j], collection_to_shuffle[i]
+          # Move to next salt character (wrapping around if needed).
+          idx = (idx + 1) % max_salt_length
+          i -= 1
+        end
+        collection_to_shuffle
+      end
+    end
+  end
+end

data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb ADDED Viewed

@@ -0,0 +1,244 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module EncodedId
+  module Encoders
+    # Prepares and partitions the character sets for HashID encoding.
+    #
+    # This class is responsible for splitting a single input alphabet into three disjoint sets:
+    # 1. **Alphabet**: Main characters used to encode numbers
+    # 2. **Separators (seps)**: Characters that separate encoded numbers in the hash
+    # 3. **Guards**: Characters added at boundaries to meet minimum length requirements
+    #
+    # == Initialization Process:
+    #
+    # Step 1: Start with default separators ("cfhistuCFHISTU")
+    # Step 2: Ensure separators and alphabet are disjoint (remove overlaps)
+    # Step 3: Shuffle separators using the salt
+    # Step 4: Balance alphabet-to-separator ratio (target ≈ 3.5:1)
+    # Step 5: Create guards from alphabet or separators (target ≈ 12:1 alphabet-to-guards)
+    # Step 6: Shuffle alphabet using the salt
+    #
+    # == Character Set Ratios:
+    #
+    # The algorithm maintains specific ratios between the character sets:
+    # - Alphabet : Separators ≈ 3.5 : 1 (SEP_DIV)
+    # - Alphabet : Guards ≈ 12 : 1 (GUARD_DIV)
+    #
+    # These ratios ensure:
+    # - Enough separators to avoid patterns in multi-number hashes
+    # - Guards are rare enough to not waste space but common enough to be useful
+    # - Alphabet is large enough for efficient encoding (shorter hashes)
+    #
+    # == Why Ordinals?
+    #
+    # All characters are stored as integer ordinals (Unicode codepoints) rather than strings.
+    # This provides:
+    # - Faster comparisons and lookups
+    # - More efficient memory usage
+    # - Direct array indexing without string allocations
+    #
+    class HashidOrdinalAlphabetSeparatorGuards
+      include HashidConsistentShuffle
+      # Target ratio of alphabet to separators (alphabet.length / seps.length ≈ 3.5)
+      SEP_DIV = 3.5
+      # Default separator characters - chosen to be visually distinct and common in many fonts
+      DEFAULT_SEPS = "cfhistuCFHISTU".chars.map(&:ord).freeze
+      # Target ratio of alphabet to guards (alphabet.length / guards.length ≈ 12)
+      GUARD_DIV = 12.0
+      # Space character ordinal - used as a placeholder when removing characters
+      SPACE_CHAR = " ".ord
+      # @rbs @alphabet: Array[Integer]
+      # @rbs @salt: Array[Integer]
+      # @rbs @seps: Array[Integer]
+      # @rbs @guards: Array[Integer]
+      # @rbs @seps_tr_selector: String
+      # @rbs @guards_tr_selector: String
+      # Initialize and partition the character sets.
+      #
+      # Takes an alphabet and salt, then:
+      # 1. Converts all characters to ordinals (integer codepoints)
+      # 2. Partitions the alphabet into separators, guards, and the remaining alphabet
+      # 3. Shuffles each set deterministically using the salt
+      # 4. Balances the ratios between the sets
+      # 5. Creates escaped versions for use with String#tr
+      #
+      # All arrays are frozen after setup to prevent accidental modification.
+      #
+      # @param alphabet [Alphabet] The character set to partition
+      # @param salt [String] The salt used for shuffling
+      #
+      # @rbs (Alphabet alphabet, String salt) -> void
+      def initialize(alphabet, salt)
+        @alphabet = alphabet.characters.chars.map(&:ord)
+        @salt = salt.chars.map(&:ord)
+        setup_seps
+        setup_guards
+        # Pre-compute escaped versions for String#tr operations during decode.
+        # This escapes special characters like '-', '\\', and '^' that have
+        # special meaning in tr() character ranges.
+        @seps_tr_selector = escape_characters_string_for_tr(@seps.map(&:chr))
+        @guards_tr_selector = escape_characters_string_for_tr(@guards.map(&:chr))
+        @alphabet.freeze
+        @seps.freeze
+        @guards.freeze
+      end
+      attr_reader :salt #: Array[Integer]
+      attr_reader :alphabet #: Array[Integer]
+      attr_reader :seps #: Array[Integer]
+      attr_reader :guards #: Array[Integer]
+      attr_reader :seps_tr_selector #: String
+      attr_reader :guards_tr_selector #: String
+      private
+      # Escape special characters for safe use in String#tr.
+      #
+      # String#tr treats certain characters specially:
+      # - '-' : Defines character ranges (e.g., 'a-z')
+      # - '\\' : Escape character
+      # - '^' : Negation when at the start
+      #
+      # This method escapes these characters so they're treated literally.
+      #
+      # Example: ['a', '-', 'z'] → "a\\-z" (not a range from a to z)
+      #
+      # @param chars [Array<String>] Characters to join and escape
+      # @return [String] Escaped string safe for use in tr()
+      #
+      # @rbs (Array[String] chars) -> String
+      def escape_characters_string_for_tr(chars)
+        chars.join.gsub(/([-\\^])/) { "\\#{$1}" }
+      end
+      # Setup and partition separators from the alphabet.
+      #
+      # This method:
+      # 1. Starts with default separators ("cfhistuCFHISTU")
+      # 2. Makes alphabet and separators disjoint (removes overlaps)
+      # 3. Removes any space characters that may have been introduced
+      # 4. Shuffles separators using the salt
+      # 5. Balances the alphabet-to-separator ratio to approximately 3.5:1
+      # 6. Shuffles the final alphabet using the salt
+      #
+      # The ratio balancing ensures:
+      # - If there are too few separators, take some from the alphabet
+      # - If there are too many separators, trim the excess
+      # - Minimum of 2 separators is maintained
+      #
+      # @rbs () -> void
+      def setup_seps
+        @seps = DEFAULT_SEPS.dup
+        # Make alphabet and separators disjoint: keep separator if it exists in alphabet,
+        # otherwise remove it. This ensures separators only contains characters from the original alphabet.
+        @seps.length.times do |sep_index|
+          if (alphabet_index = @alphabet.index(@seps[sep_index]))
+            @alphabet = remove_character_at(@alphabet, alphabet_index)
+          else
+            @seps = remove_character_at(@seps, sep_index)
+          end
+        end
+        # Remove space placeholders introduced by remove_character_at
+        @alphabet.delete(SPACE_CHAR)
+        @seps.delete(SPACE_CHAR)
+        salt_length = @salt.length
+        consistent_shuffle!(@seps, @salt, nil, salt_length)
+        # Balance the alphabet-to-separator ratio to approximately SEP_DIV (3.5:1)
+        alphabet_length = @alphabet.length
+        seps_count = @seps.length
+        if seps_count == 0 || (alphabet_length / seps_count.to_f) > SEP_DIV
+          seps_target_count = (alphabet_length / SEP_DIV).ceil
+          seps_target_count = 2 if seps_target_count == 1 # Minimum 2 separators
+          if seps_target_count > seps_count
+            # Not enough separators - take some from the alphabet.
+            diff = seps_target_count - seps_count
+            # These are safe: diff > 0 and @alphabet has enough elements by design
+            additonal_seps = @alphabet[0, diff] #: Array[Integer]
+            @seps += additonal_seps
+            @alphabet = @alphabet[diff..] #: Array[Integer]
+          else
+            # Too many separators - trim to target length.
+            @seps = @seps[0, seps_target_count] #: Array[Integer]
+          end
+        end
+        consistent_shuffle!(@alphabet, @salt, nil, salt_length)
+      end
+      # Setup guards by extracting them from separators or alphabet.
+      #
+      # Guards are special boundary characters used for minimum length padding.
+      # They're chosen from either the separator set or alphabet based on alphabet size:
+      #
+      # - If alphabet is very small (< 3 characters): Take guards from separators
+      # - Otherwise: Take guards from alphabet
+      #
+      # The number of guards is calculated to maintain approximately a 12:1 ratio
+      # with the alphabet (alphabet.length / GUARD_DIV).
+      #
+      # Why this matters:
+      # - Guards don't encode data, so we want them to be rare
+      # - But we need enough variety to avoid patterns in minimum-length hashes
+      # - Taking from separators when alphabet is small preserves encoding characters
+      #
+      # @rbs () -> void
+      def setup_guards
+        alphabet_length = @alphabet.length
+        gc = (alphabet_length / GUARD_DIV).ceil
+        if alphabet_length < 3
+          # Very small alphabet - take guards from separators to preserve alphabet.
+          @guards = @seps[0, gc] #: Array[Integer]
+          @seps = @seps[gc..] || [] #: Array[Integer]
+        else
+          # Normal case - take guards from alphabet.
+          @guards = @alphabet[0, gc] #: Array[Integer]
+          @alphabet = @alphabet[gc..] || [] #: Array[Integer]
+        end
+      end
+      # Remove a character from an array by replacing it with a space placeholder.
+      #
+      # This is used during the separator/alphabet disjoint operation.
+      # Instead of mutating the array in place, it creates a new array with:
+      # - All characters before the index
+      # - A SPACE_CHAR placeholder
+      # - All characters after the index
+      #
+      # The space acts as a placeholder that gets removed later by Array#delete.
+      # This approach maintains array indices during iteration.
+      #
+      # Example:
+      #   remove_character_at([97, 98, 99], 1) → [97, 32, 99]  # [a, space, c]
+      #
+      # @param array [Array<Integer>] The array to remove from
+      # @param index [Integer] The index of the character to remove
+      # @return [Array<Integer>] New array with character replaced by space
+      #
+      # @rbs (Array[Integer] array, Integer index) -> Array[Integer]
+      def remove_character_at(array, index)
+        tail = array[index + 1..]
+        head = array[0, index] || []
+        head << SPACE_CHAR
+        tail ? head + tail : head
+      end
+    end
+  end
+end

data/lib/encoded_id/encoders/hashid_salt.rb ADDED Viewed

@@ -0,0 +1,51 @@
+# frozen_string_literal: true
+# rbs_inline: enabled
+module EncodedId
+  module Encoders
+    # Simple wrapper class for HashID salt values.
+    #
+    # This class encapsulates the salt string and provides convenient access to:
+    # - The original salt string
+    # - The salt as an array of individual characters
+    #
+    # Both representations are frozen to prevent accidental modification.
+    #
+    # == Security Note:
+    #
+    # The salt is the 'secret' that makes your Hashids unique. Without knowing the
+    # salt, it's harder to reverse-engineer the encoding scheme
+    # or predict hash values BUT Hashids is not a secure encryption technique. It
+    # is only to be used to obfuscate values which are not secure (you would just
+    # prefer the average person cannot see them).
+    #
+    class HashidSalt
+      # @rbs @salt: String
+      # @rbs @chars: Array[String]
+      # Initialize a new salt wrapper.
+      #
+      # @param salt [String] The salt string (can be empty but must be a String)
+      # @raise [SaltError] If salt is not a String
+      #
+      # @rbs (String salt) -> void
+      def initialize(salt)
+        unless salt.is_a?(String)
+          raise SaltError, "The salt must be a String"
+        end
+        # Freeze both representations to prevent modification.
+        # This ensures the salt remains constant and thread-safe.
+        @salt = salt.freeze
+        @chars = salt.chars.freeze
+      end
+      # The original salt string (frozen)
+      attr_reader :salt #: String
+      # The salt as an array of individual characters (frozen)
+      attr_reader :chars #: Array[String]
+    end
+  end
+end