encoded_id 1.0.0.rc4 → 1.0.0.rc6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,270 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Prepares and partitions the character sets for HashID encoding.
8
+ #
9
+ # This class is responsible for splitting a single input alphabet into three disjoint sets:
10
+ # 1. **Alphabet**: Main characters used to encode numbers
11
+ # 2. **Separators (seps)**: Characters that separate encoded numbers in the hash
12
+ # 3. **Guards**: Characters added at boundaries to meet minimum length requirements
13
+ #
14
+ # == Initialization Process:
15
+ #
16
+ # Step 1: Start with default separators ("cfhistuCFHISTU")
17
+ # Step 2: Ensure separators and alphabet are disjoint (remove overlaps)
18
+ # Step 3: Shuffle separators using the salt
19
+ # Step 4: Balance alphabet-to-separator ratio (target ≈ 3.5:1)
20
+ # Step 5: Create guards from alphabet or separators (target ≈ 12:1 alphabet-to-guards)
21
+ # Step 6: Shuffle alphabet using the salt
22
+ #
23
+ # == Character Set Ratios:
24
+ #
25
+ # The algorithm maintains specific ratios between the character sets:
26
+ # - Alphabet : Separators ≈ 3.5 : 1 (SEP_DIV)
27
+ # - Alphabet : Guards ≈ 12 : 1 (GUARD_DIV)
28
+ #
29
+ # These ratios ensure:
30
+ # - Enough separators to avoid patterns in multi-number hashes
31
+ # - Guards are rare enough to not waste space but common enough to be useful
32
+ # - Alphabet is large enough for efficient encoding (shorter hashes)
33
+ #
34
+ # == Why Ordinals?
35
+ #
36
+ # All characters are stored as integer ordinals (Unicode codepoints) rather than strings.
37
+ # This provides:
38
+ # - Faster comparisons and lookups
39
+ # - More efficient memory usage
40
+ # - Direct array indexing without string allocations
41
+ #
42
+ class HashIdOrdinalAlphabetSeparatorGuards
43
+ # Target ratio of alphabet to separators (alphabet.length / seps.length ≈ 3.5)
44
+ SEP_DIV = 3.5
45
+
46
+ # Default separator characters - chosen to be visually distinct and common in many fonts
47
+ DEFAULT_SEPS = "cfhistuCFHISTU".chars.map(&:ord).freeze
48
+
49
+ # Target ratio of alphabet to guards (alphabet.length / guards.length ≈ 12)
50
+ GUARD_DIV = 12.0
51
+
52
+ # Space character ordinal - used as a placeholder when removing characters
53
+ SPACE_CHAR = " ".ord
54
+
55
+ # @rbs @alphabet: Array[Integer]
56
+ # @rbs @salt: Array[Integer]
57
+ # @rbs @seps: Array[Integer]
58
+ # @rbs @guards: Array[Integer]
59
+ # @rbs @seps_tr_selector: String
60
+ # @rbs @guards_tr_selector: String
61
+
62
+ # Initialize and partition the character sets.
63
+ #
64
+ # Takes an alphabet and salt, then:
65
+ # 1. Converts all characters to ordinals (integer codepoints)
66
+ # 2. Partitions the alphabet into separators, guards, and the remaining alphabet
67
+ # 3. Shuffles each set deterministically using the salt
68
+ # 4. Balances the ratios between the sets
69
+ # 5. Creates escaped versions for use with String#tr
70
+ #
71
+ # All arrays are frozen after setup to prevent accidental modification.
72
+ #
73
+ # @param alphabet [Alphabet] The character set to partition
74
+ # @param salt [String] The salt used for shuffling
75
+ #
76
+ # @rbs (Alphabet alphabet, String salt) -> void
77
+ def initialize(alphabet, salt)
78
+ # Convert alphabet and salt to arrays of ordinals (integer codepoints).
79
+ @alphabet = alphabet.characters.chars.map(&:ord)
80
+ @salt = salt.chars.map(&:ord)
81
+
82
+ # Partition the alphabet into separators and alphabet.
83
+ # This ensures they're disjoint and properly balanced.
84
+ setup_seps
85
+
86
+ # Extract guards from either separators or alphabet.
87
+ # Guards are boundary markers used for minimum length padding.
88
+ setup_guards
89
+
90
+ # Pre-compute escaped versions for String#tr operations during decode.
91
+ # This escapes special characters like '-', '\\', and '^' that have
92
+ # special meaning in tr() character ranges.
93
+ @seps_tr_selector = escape_characters_string_for_tr(@seps.map(&:chr))
94
+ @guards_tr_selector = escape_characters_string_for_tr(@guards.map(&:chr))
95
+
96
+ # Freeze all arrays to prevent accidental modification.
97
+ @alphabet.freeze
98
+ @seps.freeze
99
+ @guards.freeze
100
+ end
101
+
102
+ attr_reader :salt #: Array[Integer]
103
+ attr_reader :alphabet #: Array[Integer]
104
+ attr_reader :seps #: Array[Integer]
105
+ attr_reader :guards #: Array[Integer]
106
+ attr_reader :seps_tr_selector #: String
107
+ attr_reader :guards_tr_selector #: String
108
+
109
+ private
110
+
111
+ # Escape special characters for safe use in String#tr.
112
+ #
113
+ # String#tr treats certain characters specially:
114
+ # - '-' : Defines character ranges (e.g., 'a-z')
115
+ # - '\\' : Escape character
116
+ # - '^' : Negation when at the start
117
+ #
118
+ # This method escapes these characters so they're treated literally.
119
+ #
120
+ # Example: ['a', '-', 'z'] → "a\\-z" (not a range from a to z)
121
+ #
122
+ # @param chars [Array<String>] Characters to join and escape
123
+ # @return [String] Escaped string safe for use in tr()
124
+ #
125
+ # @rbs (Array[String] chars) -> String
126
+ def escape_characters_string_for_tr(chars)
127
+ chars.join.gsub(/([-\\^])/) { "\\#{$1}" }
128
+ end
129
+
130
+ # Setup and partition separators from the alphabet.
131
+ #
132
+ # This method:
133
+ # 1. Starts with default separators ("cfhistuCFHISTU")
134
+ # 2. Makes alphabet and separators disjoint (removes overlaps)
135
+ # 3. Removes any space characters that may have been introduced
136
+ # 4. Shuffles separators using the salt
137
+ # 5. Balances the alphabet-to-separator ratio to approximately 3.5:1
138
+ # 6. Shuffles the final alphabet using the salt
139
+ #
140
+ # The ratio balancing ensures:
141
+ # - If there are too few separators, take some from the alphabet
142
+ # - If there are too many separators, trim the excess
143
+ # - Minimum of 2 separators is maintained
144
+ #
145
+ # @rbs () -> void
146
+ def setup_seps
147
+ @seps = DEFAULT_SEPS.dup
148
+
149
+ # Make alphabet and separators disjoint.
150
+ # For each separator:
151
+ # - If it exists in the alphabet, remove it from the alphabet
152
+ # - If it doesn't exist in the alphabet, remove it from separators
153
+ # This ensures separators only contains characters from the original alphabet.
154
+ @seps.length.times do |i|
155
+ if (j = @alphabet.index(@seps[i]))
156
+ # Separator exists in alphabet - remove it from alphabet.
157
+ @alphabet = pick_characters(@alphabet, j)
158
+ else
159
+ # Separator doesn't exist in alphabet - remove it from separators.
160
+ @seps = pick_characters(@seps, i)
161
+ end
162
+ end
163
+
164
+ # Remove any space characters introduced by pick_characters.
165
+ # Spaces are placeholders and shouldn't appear in the final sets.
166
+ @alphabet.delete(SPACE_CHAR)
167
+ @seps.delete(SPACE_CHAR)
168
+
169
+ # Shuffle separators deterministically using the salt.
170
+ consistent_shuffle!(@seps, @salt, nil, @salt.length)
171
+
172
+ # Balance the alphabet-to-separator ratio to approximately SEP_DIV (3.5:1).
173
+ # This ensures we have enough separators for good distribution in multi-number hashes.
174
+ if @seps.length == 0 || (@alphabet.length / @seps.length.to_f) > SEP_DIV
175
+ # Calculate target separator count based on alphabet size.
176
+ seps_length = (@alphabet.length / SEP_DIV).ceil
177
+ seps_length = 2 if seps_length == 1 # Minimum 2 separators
178
+
179
+ if seps_length > @seps.length
180
+ # Not enough separators - take some from the alphabet.
181
+ diff = seps_length - @seps.length
182
+
183
+ # These are safe: diff > 0 and @alphabet has enough elements by design
184
+ additonal_seps = @alphabet[0, diff] #: Array[Integer]
185
+ @seps += additonal_seps
186
+ @alphabet = @alphabet[diff..] #: Array[Integer]
187
+ else
188
+ # Too many separators - trim to target length.
189
+ @seps = @seps[0, seps_length] #: Array[Integer]
190
+ end
191
+ end
192
+
193
+ # Shuffle the final alphabet deterministically using the salt.
194
+ # This ensures different salts produce different alphabet orderings.
195
+ consistent_shuffle!(@alphabet, @salt, nil, @salt.length)
196
+ end
197
+
198
+ # Setup guards by extracting them from separators or alphabet.
199
+ #
200
+ # Guards are special boundary characters used for minimum length padding.
201
+ # They're chosen from either the separator set or alphabet based on alphabet size:
202
+ #
203
+ # - If alphabet is very small (< 3 characters): Take guards from separators
204
+ # - Otherwise: Take guards from alphabet
205
+ #
206
+ # The number of guards is calculated to maintain approximately a 12:1 ratio
207
+ # with the alphabet (alphabet.length / GUARD_DIV).
208
+ #
209
+ # Why this matters:
210
+ # - Guards don't encode data, so we want them to be rare
211
+ # - But we need enough variety to avoid patterns in minimum-length hashes
212
+ # - Taking from separators when alphabet is small preserves encoding characters
213
+ #
214
+ # @rbs () -> void
215
+ def setup_guards
216
+ # Calculate target guard count: approximately 1/12th of alphabet length.
217
+ gc = (@alphabet.length / GUARD_DIV).ceil
218
+
219
+ if @alphabet.length < 3
220
+ # Very small alphabet - take guards from separators to preserve alphabet.
221
+ @guards = @seps[0, gc] #: Array[Integer]
222
+ @seps = @seps[gc..] || [] #: Array[Integer]
223
+ else
224
+ # Normal case - take guards from alphabet.
225
+ @guards = @alphabet[0, gc] #: Array[Integer]
226
+ @alphabet = @alphabet[gc..] || [] #: Array[Integer]
227
+ end
228
+ end
229
+
230
+ # Remove a character from an array by replacing it with a space.
231
+ #
232
+ # This is used during the separator/alphabet disjoint operation.
233
+ # Instead of mutating the array in place, it creates a new array with:
234
+ # - All characters before the index
235
+ # - A SPACE_CHAR placeholder
236
+ # - All characters after the index
237
+ #
238
+ # The space acts as a placeholder that gets removed later by Array#delete.
239
+ # This approach maintains array indices during iteration.
240
+ #
241
+ # Example:
242
+ # pick_characters([97, 98, 99], 1) → [97, 32, 99] # [a, space, c]
243
+ #
244
+ # @param array [Array<Integer>] The array to remove from
245
+ # @param index [Integer] The index of the character to remove
246
+ # @return [Array<Integer>] New array with character replaced by space
247
+ #
248
+ # @rbs (Array[Integer] array, Integer index) -> Array[Integer]
249
+ def pick_characters(array, index)
250
+ tail = array[index + 1..]
251
+ head = array[0, index] || []
252
+ head << SPACE_CHAR
253
+ tail ? head + tail : head
254
+ end
255
+
256
+ # Delegate to the consistent shuffle algorithm.
257
+ #
258
+ # @param collection_to_shuffle [Array<Integer>] The array to shuffle (modified in place)
259
+ # @param salt_part_1 [Array<Integer>] The salt to use for shuffling
260
+ # @param salt_part_2 [Array<Integer>?] Optional second salt part (unused here)
261
+ # @param max_salt_length [Integer] Maximum salt length to use
262
+ # @return [Array<Integer>] The shuffled array
263
+ #
264
+ # @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
265
+ def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
266
+ HashIdConsistentShuffle.shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
267
+ end
268
+ end
269
+ end
270
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Simple wrapper class for HashID salt values.
8
+ #
9
+ # This class encapsulates the salt string and provides convenient access to:
10
+ # - The original salt string
11
+ # - The salt as an array of individual characters
12
+ #
13
+ # Both representations are frozen to prevent accidental modification.
14
+ #
15
+ # == Security Note:
16
+ #
17
+ # The salt is the 'secret' that makes your HashIDs unique. Without knowing the
18
+ # salt, it's harder to reverse-engineer the encoding scheme
19
+ # or predict hash values BUT HashIDs is not a secure encryption technique. It
20
+ # is only to be used to obfuscate values which are not secure (you would just
21
+ # prefer the average person cannot see them).
22
+ #
23
+ class HashIdSalt
24
+ # @rbs @salt: String
25
+ # @rbs @chars: Array[String]
26
+
27
+ # Initialize a new salt wrapper.
28
+ #
29
+ # @param salt [String] The salt string (can be empty but must be a String)
30
+ # @raise [SaltError] If salt is not a String
31
+ #
32
+ # @rbs (String salt) -> void
33
+ def initialize(salt)
34
+ unless salt.is_a?(String)
35
+ raise SaltError, "The salt must be a String"
36
+ end
37
+
38
+ # Freeze both representations to prevent modification.
39
+ # This ensures the salt remains constant and thread-safe.
40
+ @salt = salt.freeze
41
+ @chars = salt.chars.freeze
42
+ end
43
+
44
+ # The original salt string (frozen)
45
+ attr_reader :salt #: String
46
+
47
+ # The salt as an array of individual characters (frozen)
48
+ attr_reader :chars #: Array[String]
49
+ end
50
+ end
51
+ end