encoded_id 1.0.0.rc5 → 1.0.0.rc7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +99 -3
  3. data/README.md +86 -329
  4. data/context/encoded_id.md +437 -0
  5. data/lib/encoded_id/alphabet.rb +34 -3
  6. data/lib/encoded_id/blocklist.rb +100 -0
  7. data/lib/encoded_id/encoders/base_configuration.rb +154 -0
  8. data/lib/encoded_id/encoders/hashid.rb +527 -0
  9. data/lib/encoded_id/encoders/hashid_configuration.rb +40 -0
  10. data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb +110 -0
  11. data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb +244 -0
  12. data/lib/encoded_id/encoders/hashid_salt.rb +51 -0
  13. data/lib/encoded_id/encoders/my_sqids.rb +454 -0
  14. data/lib/encoded_id/encoders/sqids.rb +59 -0
  15. data/lib/encoded_id/encoders/sqids_configuration.rb +22 -0
  16. data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +54 -0
  17. data/lib/encoded_id/hex_representation.rb +29 -14
  18. data/lib/encoded_id/reversible_id.rb +115 -82
  19. data/lib/encoded_id/version.rb +3 -1
  20. data/lib/encoded_id.rb +34 -4
  21. metadata +34 -26
  22. data/.devcontainer/Dockerfile +0 -9
  23. data/.devcontainer/compose.yml +0 -8
  24. data/.devcontainer/devcontainer.json +0 -8
  25. data/.standard.yml +0 -2
  26. data/Gemfile +0 -36
  27. data/Rakefile +0 -20
  28. data/Steepfile +0 -5
  29. data/ext/encoded_id/extconf.rb +0 -3
  30. data/ext/encoded_id/extension.c +0 -123
  31. data/ext/encoded_id/hashids.c +0 -939
  32. data/ext/encoded_id/hashids.h +0 -139
  33. data/lib/encoded_id/hash_id.rb +0 -227
  34. data/lib/encoded_id/hash_id_consistent_shuffle.rb +0 -27
  35. data/lib/encoded_id/hash_id_salt.rb +0 -15
  36. data/lib/encoded_id/ordinal_alphabet_separator_guards.rb +0 -90
  37. data/rbs_collection.yaml +0 -24
  38. data/sig/encoded_id.rbs +0 -189
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Configuration for Hashids encoder
8
+ # Hashids requires a salt for encoding/decoding
9
+ class HashidConfiguration < BaseConfiguration
10
+ # @rbs @salt: String
11
+
12
+ attr_reader :salt
13
+
14
+ # @rbs (salt: String, **untyped options) -> void
15
+ def initialize(salt:, **options)
16
+ @salt = validate_salt(salt)
17
+ super(**options)
18
+ end
19
+
20
+ # @rbs () -> Symbol
21
+ def encoder_type
22
+ :hashids
23
+ end
24
+
25
+ # Create the Hashid encoder instance
26
+ # @rbs () -> Hashid
27
+ def create_encoder
28
+ Hashid.new(salt, min_length, alphabet, blocklist, blocklist_mode, blocklist_max_length)
29
+ end
30
+
31
+ private
32
+
33
+ # @rbs (String salt) -> String
34
+ def validate_salt(salt)
35
+ return salt if salt.is_a?(String) && salt.size > 3
36
+ raise InvalidConfigurationError, "salt must be a string longer than 3 characters"
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Implements a deterministic, salt-based shuffle algorithm for Hashids.
8
+ #
9
+ # This is the core obfuscation mechanism that makes Hashids non-sequential.
10
+ # The algorithm has several critical properties:
11
+ #
12
+ # 1. **Deterministic**: Same input + same salt = same output (always)
13
+ # 2. **Reversible**: The shuffle can be undone if needed
14
+ # 3. **Salt-dependent**: Different salts produce different permutations
15
+ # 4. **Consistent**: Multiple calls with the same salt produce the same shuffle
16
+ #
17
+ # == Algorithm Overview:
18
+ #
19
+ # The shuffle works by:
20
+ # - Walking backwards through the collection (from last to second element)
21
+ # - For each position i, selecting a swap partner j using the salt
22
+ # - The swap position is calculated from: (salt_char + index + running_total) % i
23
+ # - Cycling through salt characters, wrapping when we reach the end
24
+ #
25
+ # This is similar to a Fisher-Yates shuffle, but with deterministic swap positions
26
+ # derived from the salt rather than random numbers.
27
+ #
28
+ # == Why Two Salt Parts?
29
+ #
30
+ # The algorithm accepts salt in two parts (salt_part_1 and salt_part_2) to support
31
+ # scenarios where the salt is constructed from multiple sources:
32
+ # - salt_part_1: Primary salt (e.g., lottery + user salt)
33
+ # - salt_part_2: Secondary salt (e.g., pre-shuffle alphabet copy)
34
+ #
35
+ # When cycling through salt characters, it reads from salt_part_1 first, then
36
+ # salt_part_2 if the index exceeds salt_part_1's length.
37
+ #
38
+ # == Example:
39
+ #
40
+ # Input: [1, 2, 3, 4], salt: [65, 66, 67] (ABC)
41
+ # Step 1: i=3, salt[0]=65, ord_total=0 → swap positions 3 and ((65+0+0)%3=2) → [1,2,4,3]
42
+ # Step 2: i=2, salt[1]=66, ord_total=65 → swap positions 2 and ((66+1+65)%2=0) → [4,2,1,3]
43
+ # Step 3: i=1, salt[2]=67, ord_total=131 → swap positions 1 and ((67+2+131)%1=0)→ [4,2,1,3]
44
+ # Result: [4, 2, 1, 3]
45
+ #
46
+ module HashidConsistentShuffle
47
+ # Deterministically shuffle a collection based on a salt.
48
+ #
49
+ # Shuffles the collection in place using a salt-based algorithm that produces
50
+ # consistent results for the same inputs.
51
+ #
52
+ # @param collection_to_shuffle [Array<Integer>] Array to shuffle (modified in place)
53
+ # @param salt_part_1 [Array<Integer>] Primary salt characters (as ordinals)
54
+ # @param salt_part_2 [Array<Integer>?] Optional secondary salt characters
55
+ # @param max_salt_length [Integer] Maximum salt length to use (for cycling)
56
+ # @return [Array<Integer>] The shuffled array (same object as input)
57
+ # @raise [SaltError] If salt is too short or shuffle fails
58
+ #
59
+ # @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
60
+ def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
61
+ salt_part_1_length = salt_part_1.length
62
+
63
+ # Validate we have enough salt. If max_salt_length exceeds salt_part_1,
64
+ # we need salt_part_2 to provide the additional characters.
65
+ raise SaltError, "Salt is too short in shuffle" if salt_part_1_length < max_salt_length && salt_part_2.nil?
66
+
67
+ # Short-circuit if there's nothing to shuffle.
68
+ return collection_to_shuffle if collection_to_shuffle.empty? || max_salt_length == 0 || salt_part_1.nil? || salt_part_1_length == 0
69
+
70
+ # idx: Current position in the salt (cycles through 0..max_salt_length-1)
71
+ # ord_total: Running sum of salt character ordinals (affects swap positions)
72
+ idx = ord_total = 0
73
+
74
+ # Walk backwards through the collection from last to second element.
75
+ # We don't shuffle the first element (i=0) because it has nowhere to swap to.
76
+ i = collection_to_shuffle.length - 1
77
+ while i >= 1
78
+ # Get the current salt character ordinal.
79
+ # If we've exceeded salt_part_1, read from salt_part_2.
80
+ n = if idx >= salt_part_1_length
81
+ raise SaltError, "Salt shuffle has failed" unless salt_part_2
82
+
83
+ salt_part_2[idx - salt_part_1_length]
84
+ else
85
+ salt_part_1[idx]
86
+ end
87
+
88
+ # Update running total with current salt character.
89
+ ord_total += n
90
+
91
+ # Calculate swap position deterministically from:
92
+ # - n: Current salt character ordinal
93
+ # - idx: Current position in salt
94
+ # - ord_total: Running sum of all salt characters used so far
95
+ # - i: Current position in collection (modulo to ensure valid index)
96
+ j = (n + idx + ord_total) % i
97
+
98
+ # Swap elements at positions i and j.
99
+ collection_to_shuffle[i], collection_to_shuffle[j] = collection_to_shuffle[j], collection_to_shuffle[i]
100
+
101
+ # Move to next salt character (wrapping around if needed).
102
+ idx = (idx + 1) % max_salt_length
103
+ i -= 1
104
+ end
105
+
106
+ collection_to_shuffle
107
+ end
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,244 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Prepares and partitions the character sets for HashID encoding.
8
+ #
9
+ # This class is responsible for splitting a single input alphabet into three disjoint sets:
10
+ # 1. **Alphabet**: Main characters used to encode numbers
11
+ # 2. **Separators (seps)**: Characters that separate encoded numbers in the hash
12
+ # 3. **Guards**: Characters added at boundaries to meet minimum length requirements
13
+ #
14
+ # == Initialization Process:
15
+ #
16
+ # Step 1: Start with default separators ("cfhistuCFHISTU")
17
+ # Step 2: Ensure separators and alphabet are disjoint (remove overlaps)
18
+ # Step 3: Shuffle separators using the salt
19
+ # Step 4: Balance alphabet-to-separator ratio (target ≈ 3.5:1)
20
+ # Step 5: Create guards from alphabet or separators (target ≈ 12:1 alphabet-to-guards)
21
+ # Step 6: Shuffle alphabet using the salt
22
+ #
23
+ # == Character Set Ratios:
24
+ #
25
+ # The algorithm maintains specific ratios between the character sets:
26
+ # - Alphabet : Separators ≈ 3.5 : 1 (SEP_DIV)
27
+ # - Alphabet : Guards ≈ 12 : 1 (GUARD_DIV)
28
+ #
29
+ # These ratios ensure:
30
+ # - Enough separators to avoid patterns in multi-number hashes
31
+ # - Guards are rare enough to not waste space but common enough to be useful
32
+ # - Alphabet is large enough for efficient encoding (shorter hashes)
33
+ #
34
+ # == Why Ordinals?
35
+ #
36
+ # All characters are stored as integer ordinals (Unicode codepoints) rather than strings.
37
+ # This provides:
38
+ # - Faster comparisons and lookups
39
+ # - More efficient memory usage
40
+ # - Direct array indexing without string allocations
41
+ #
42
+ class HashidOrdinalAlphabetSeparatorGuards
43
+ include HashidConsistentShuffle
44
+
45
+ # Target ratio of alphabet to separators (alphabet.length / seps.length ≈ 3.5)
46
+ SEP_DIV = 3.5
47
+
48
+ # Default separator characters - chosen to be visually distinct and common in many fonts
49
+ DEFAULT_SEPS = "cfhistuCFHISTU".chars.map(&:ord).freeze
50
+
51
+ # Target ratio of alphabet to guards (alphabet.length / guards.length ≈ 12)
52
+ GUARD_DIV = 12.0
53
+
54
+ # Space character ordinal - used as a placeholder when removing characters
55
+ SPACE_CHAR = " ".ord
56
+
57
+ # @rbs @alphabet: Array[Integer]
58
+ # @rbs @salt: Array[Integer]
59
+ # @rbs @seps: Array[Integer]
60
+ # @rbs @guards: Array[Integer]
61
+ # @rbs @seps_tr_selector: String
62
+ # @rbs @guards_tr_selector: String
63
+
64
+ # Initialize and partition the character sets.
65
+ #
66
+ # Takes an alphabet and salt, then:
67
+ # 1. Converts all characters to ordinals (integer codepoints)
68
+ # 2. Partitions the alphabet into separators, guards, and the remaining alphabet
69
+ # 3. Shuffles each set deterministically using the salt
70
+ # 4. Balances the ratios between the sets
71
+ # 5. Creates escaped versions for use with String#tr
72
+ #
73
+ # All arrays are frozen after setup to prevent accidental modification.
74
+ #
75
+ # @param alphabet [Alphabet] The character set to partition
76
+ # @param salt [String] The salt used for shuffling
77
+ #
78
+ # @rbs (Alphabet alphabet, String salt) -> void
79
+ def initialize(alphabet, salt)
80
+ @alphabet = alphabet.characters.chars.map(&:ord)
81
+ @salt = salt.chars.map(&:ord)
82
+
83
+ setup_seps
84
+ setup_guards
85
+
86
+ # Pre-compute escaped versions for String#tr operations during decode.
87
+ # This escapes special characters like '-', '\\', and '^' that have
88
+ # special meaning in tr() character ranges.
89
+ @seps_tr_selector = escape_characters_string_for_tr(@seps.map(&:chr))
90
+ @guards_tr_selector = escape_characters_string_for_tr(@guards.map(&:chr))
91
+
92
+ @alphabet.freeze
93
+ @seps.freeze
94
+ @guards.freeze
95
+ end
96
+
97
+ attr_reader :salt #: Array[Integer]
98
+ attr_reader :alphabet #: Array[Integer]
99
+ attr_reader :seps #: Array[Integer]
100
+ attr_reader :guards #: Array[Integer]
101
+ attr_reader :seps_tr_selector #: String
102
+ attr_reader :guards_tr_selector #: String
103
+
104
+ private
105
+
106
+ # Escape special characters for safe use in String#tr.
107
+ #
108
+ # String#tr treats certain characters specially:
109
+ # - '-' : Defines character ranges (e.g., 'a-z')
110
+ # - '\\' : Escape character
111
+ # - '^' : Negation when at the start
112
+ #
113
+ # This method escapes these characters so they're treated literally.
114
+ #
115
+ # Example: ['a', '-', 'z'] → "a\\-z" (not a range from a to z)
116
+ #
117
+ # @param chars [Array<String>] Characters to join and escape
118
+ # @return [String] Escaped string safe for use in tr()
119
+ #
120
+ # @rbs (Array[String] chars) -> String
121
+ def escape_characters_string_for_tr(chars)
122
+ chars.join.gsub(/([-\\^])/) { "\\#{$1}" }
123
+ end
124
+
125
+ # Setup and partition separators from the alphabet.
126
+ #
127
+ # This method:
128
+ # 1. Starts with default separators ("cfhistuCFHISTU")
129
+ # 2. Makes alphabet and separators disjoint (removes overlaps)
130
+ # 3. Removes any space characters that may have been introduced
131
+ # 4. Shuffles separators using the salt
132
+ # 5. Balances the alphabet-to-separator ratio to approximately 3.5:1
133
+ # 6. Shuffles the final alphabet using the salt
134
+ #
135
+ # The ratio balancing ensures:
136
+ # - If there are too few separators, take some from the alphabet
137
+ # - If there are too many separators, trim the excess
138
+ # - Minimum of 2 separators is maintained
139
+ #
140
+ # @rbs () -> void
141
+ def setup_seps
142
+ @seps = DEFAULT_SEPS.dup
143
+
144
+ # Make alphabet and separators disjoint: keep separator if it exists in alphabet,
145
+ # otherwise remove it. This ensures separators only contains characters from the original alphabet.
146
+ @seps.length.times do |sep_index|
147
+ if (alphabet_index = @alphabet.index(@seps[sep_index]))
148
+ @alphabet = remove_character_at(@alphabet, alphabet_index)
149
+ else
150
+ @seps = remove_character_at(@seps, sep_index)
151
+ end
152
+ end
153
+
154
+ # Remove space placeholders introduced by remove_character_at
155
+ @alphabet.delete(SPACE_CHAR)
156
+ @seps.delete(SPACE_CHAR)
157
+
158
+ salt_length = @salt.length
159
+ consistent_shuffle!(@seps, @salt, nil, salt_length)
160
+
161
+ # Balance the alphabet-to-separator ratio to approximately SEP_DIV (3.5:1)
162
+ alphabet_length = @alphabet.length
163
+ seps_count = @seps.length
164
+ if seps_count == 0 || (alphabet_length / seps_count.to_f) > SEP_DIV
165
+ seps_target_count = (alphabet_length / SEP_DIV).ceil
166
+ seps_target_count = 2 if seps_target_count == 1 # Minimum 2 separators
167
+
168
+ if seps_target_count > seps_count
169
+ # Not enough separators - take some from the alphabet.
170
+ diff = seps_target_count - seps_count
171
+
172
+ # These are safe: diff > 0 and @alphabet has enough elements by design
173
+ additonal_seps = @alphabet[0, diff] #: Array[Integer]
174
+ @seps += additonal_seps
175
+ @alphabet = @alphabet[diff..] #: Array[Integer]
176
+ else
177
+ # Too many separators - trim to target length.
178
+ @seps = @seps[0, seps_target_count] #: Array[Integer]
179
+ end
180
+ end
181
+
182
+ consistent_shuffle!(@alphabet, @salt, nil, salt_length)
183
+ end
184
+
185
+ # Setup guards by extracting them from separators or alphabet.
186
+ #
187
+ # Guards are special boundary characters used for minimum length padding.
188
+ # They're chosen from either the separator set or alphabet based on alphabet size:
189
+ #
190
+ # - If alphabet is very small (< 3 characters): Take guards from separators
191
+ # - Otherwise: Take guards from alphabet
192
+ #
193
+ # The number of guards is calculated to maintain approximately a 12:1 ratio
194
+ # with the alphabet (alphabet.length / GUARD_DIV).
195
+ #
196
+ # Why this matters:
197
+ # - Guards don't encode data, so we want them to be rare
198
+ # - But we need enough variety to avoid patterns in minimum-length hashes
199
+ # - Taking from separators when alphabet is small preserves encoding characters
200
+ #
201
+ # @rbs () -> void
202
+ def setup_guards
203
+ alphabet_length = @alphabet.length
204
+ gc = (alphabet_length / GUARD_DIV).ceil
205
+
206
+ if alphabet_length < 3
207
+ # Very small alphabet - take guards from separators to preserve alphabet.
208
+ @guards = @seps[0, gc] #: Array[Integer]
209
+ @seps = @seps[gc..] || [] #: Array[Integer]
210
+ else
211
+ # Normal case - take guards from alphabet.
212
+ @guards = @alphabet[0, gc] #: Array[Integer]
213
+ @alphabet = @alphabet[gc..] || [] #: Array[Integer]
214
+ end
215
+ end
216
+
217
+ # Remove a character from an array by replacing it with a space placeholder.
218
+ #
219
+ # This is used during the separator/alphabet disjoint operation.
220
+ # Instead of mutating the array in place, it creates a new array with:
221
+ # - All characters before the index
222
+ # - A SPACE_CHAR placeholder
223
+ # - All characters after the index
224
+ #
225
+ # The space acts as a placeholder that gets removed later by Array#delete.
226
+ # This approach maintains array indices during iteration.
227
+ #
228
+ # Example:
229
+ # remove_character_at([97, 98, 99], 1) → [97, 32, 99] # [a, space, c]
230
+ #
231
+ # @param array [Array<Integer>] The array to remove from
232
+ # @param index [Integer] The index of the character to remove
233
+ # @return [Array<Integer>] New array with character replaced by space
234
+ #
235
+ # @rbs (Array[Integer] array, Integer index) -> Array[Integer]
236
+ def remove_character_at(array, index)
237
+ tail = array[index + 1..]
238
+ head = array[0, index] || []
239
+ head << SPACE_CHAR
240
+ tail ? head + tail : head
241
+ end
242
+ end
243
+ end
244
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Simple wrapper class for HashID salt values.
8
+ #
9
+ # This class encapsulates the salt string and provides convenient access to:
10
+ # - The original salt string
11
+ # - The salt as an array of individual characters
12
+ #
13
+ # Both representations are frozen to prevent accidental modification.
14
+ #
15
+ # == Security Note:
16
+ #
17
+ # The salt is the 'secret' that makes your Hashids unique. Without knowing the
18
+ # salt, it's harder to reverse-engineer the encoding scheme
19
+ # or predict hash values BUT Hashids is not a secure encryption technique. It
20
+ # is only to be used to obfuscate values which are not secure (you would just
21
+ # prefer the average person cannot see them).
22
+ #
23
+ class HashidSalt
24
+ # @rbs @salt: String
25
+ # @rbs @chars: Array[String]
26
+
27
+ # Initialize a new salt wrapper.
28
+ #
29
+ # @param salt [String] The salt string (can be empty but must be a String)
30
+ # @raise [SaltError] If salt is not a String
31
+ #
32
+ # @rbs (String salt) -> void
33
+ def initialize(salt)
34
+ unless salt.is_a?(String)
35
+ raise SaltError, "The salt must be a String"
36
+ end
37
+
38
+ # Freeze both representations to prevent modification.
39
+ # This ensures the salt remains constant and thread-safe.
40
+ @salt = salt.freeze
41
+ @chars = salt.chars.freeze
42
+ end
43
+
44
+ # The original salt string (frozen)
45
+ attr_reader :salt #: String
46
+
47
+ # The salt as an array of individual characters (frozen)
48
+ attr_reader :chars #: Array[String]
49
+ end
50
+ end
51
+ end