encoded_id 1.0.0.rc4 → 1.0.0.rc6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,531 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This implementation based on https://github.com/peterhellberg/hashids.rb
4
+ # --------------------------------------------------------------------------
5
+ # Original Hashids implementation is MIT licensed:
6
+ #
7
+ # Copyright (c) 2013-2017 Peter Hellberg
8
+ #
9
+ # MIT License
10
+ #
11
+ # Permission is hereby granted, free of charge, to any person obtaining
12
+ # a copy of this software and associated documentation files (the
13
+ # "Software"), to deal in the Software without restriction, including
14
+ # without limitation the rights to use, copy, modify, merge, publish,
15
+ # distribute, sublicense, and/or sell copies of the Software, and to
16
+ # permit persons to whom the Software is furnished to do so, subject to
17
+ # the following conditions:
18
+ #
19
+ # The above copyright notice and this permission notice shall be
20
+ # included in all copies or substantial portions of the Software.
21
+ #
22
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29
+ # --------------------------------------------------------------------------
30
+ #
31
+ # This version also MIT licensed (Stephen Ierodiaconou 2023-2025):
32
+ # see LICENSE.txt file
33
+ # rbs_inline: enabled
34
+
35
+ # == HashID Algorithm Overview
36
+ #
37
+ # Hashids is a small library that generates short, unique, non-sequential IDs from numbers.
38
+ # The algorithm has several key properties:
39
+ #
40
+ # 1. **Deterministic**: Same input numbers always produce the same hash
41
+ # 2. **Reversible**: You can decode the hash back to the original numbers
42
+ # 3. **Non-sequential**: Sequential numbers don't produce sequential hashes
43
+ # 4. **Customizable**: Uses a salt, minimum length, alphabet, and optional blocklist
44
+ #
45
+ # === Core Algorithm Concepts:
46
+ #
47
+ # The algorithm works by:
48
+ # - Converting each integer to a custom base-N representation using a shuffled alphabet
49
+ # - The alphabet permutation is deterministic based on a "lottery" character and salt
50
+ # - A lottery character is chosen based on a hash of the input numbers
51
+ # - Each number is encoded with a different alphabet permutation (for obfuscation)
52
+ # - Separators divide encoded numbers, and guards are added for minimum length
53
+ # - The decode process reverses this by extracting the lottery, splitting on separators,
54
+ # and converting each segment back from the custom base-N representation
55
+ #
56
+ # === Character Sets:
57
+ #
58
+ # - **Alphabet**: Main characters used to encode numbers (after setup, doesn't include separators/guards)
59
+ # - **Separators**: Characters that separate encoded number segments within a hash
60
+ # - **Guards**: Special characters added at boundaries to meet minimum length requirements
61
+ # - All three sets are disjoint (no overlap) after initialization
62
+ #
63
+ # === Why This Design?
64
+ #
65
+ # The shuffling and lottery system ensures that:
66
+ # - Similar numbers produce very different hashes (no sequential patterns)
67
+ # - Each position in a multi-number sequence uses a different encoding
68
+ # - The hash obfuscates the inputs if the salt is unknown
69
+ # - The same numbers always produce the same hash (deterministic)
70
+
71
+ module EncodedId
72
+ module Encoders
73
+ class HashId < Base
74
+ # @rbs @separators_and_guards: HashIdOrdinalAlphabetSeparatorGuards
75
+ # @rbs @alphabet_ordinals: Array[Integer]
76
+ # @rbs @separator_ordinals: Array[Integer]
77
+ # @rbs @guard_ordinals: Array[Integer]
78
+ # @rbs @salt_ordinals: Array[Integer]
79
+ # @rbs @escaped_separator_selector: String
80
+ # @rbs @escaped_guards_selector: String
81
+
82
+ # Initialize a new HashId encoder with custom parameters.
83
+ #
84
+ # The initialization process sets up the character sets (alphabet, separators, guards)
85
+ # that will be used for encoding and decoding. These character sets are:
86
+ # 1. Shuffled based on the salt for uniqueness
87
+ # 2. Balanced in ratios (alphabet:separators ≈ 3.5:1, alphabet:guards ≈ 12:1)
88
+ # 3. Made disjoint (no character appears in multiple sets)
89
+ #
90
+ # @param salt [String] Secret salt used to shuffle the alphabet (empty string is valid)
91
+ # @param min_hash_length [Integer] Minimum length of generated hashes (0 for no minimum)
92
+ # @param alphabet [Alphabet] Character set to use for encoding
93
+ # @param blocklist [Blocklist?] Optional list of words that shouldn't appear in hashes
94
+ #
95
+ # @rbs (String salt, ?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist? blocklist) -> void
96
+ def initialize(salt, min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = nil)
97
+ super
98
+
99
+ unless min_hash_length.is_a?(Integer) && min_hash_length >= 0
100
+ raise ArgumentError, "The min length must be a Integer and greater than or equal to 0"
101
+ end
102
+ @min_hash_length = min_hash_length
103
+
104
+ @separators_and_guards = HashIdOrdinalAlphabetSeparatorGuards.new(alphabet, salt)
105
+ @alphabet_ordinals = @separators_and_guards.alphabet
106
+ @separator_ordinals = @separators_and_guards.seps
107
+ @guard_ordinals = @separators_and_guards.guards
108
+ @salt_ordinals = @separators_and_guards.salt
109
+
110
+ # Pre-compute escaped versions for use with String#tr during decoding.
111
+ # This escapes special regex characters like '-', '\\', and '^' for safe use in tr().
112
+ @escaped_separator_selector = @separators_and_guards.seps_tr_selector
113
+ @escaped_guards_selector = @separators_and_guards.guards_tr_selector
114
+ end
115
+
116
+ attr_reader :alphabet_ordinals #: Array[Integer]
117
+ attr_reader :separator_ordinals #: Array[Integer]
118
+ attr_reader :guard_ordinals #: Array[Integer]
119
+ attr_reader :salt_ordinals #: Array[Integer]
120
+
121
+ # Encode an array of non-negative integers into a hash string.
122
+ #
123
+ # The encoding process:
124
+ # 1. Validates all numbers are integers and non-negative
125
+ # 2. Calculates a "lottery" character based on the input numbers
126
+ # 3. For each number, shuffles the alphabet and encodes the number in that custom base
127
+ # 4. Inserts separator characters between encoded numbers
128
+ # 5. Adds guards and padding if needed to meet minimum length
129
+ # 6. Validates the result doesn't contain blocklisted words
130
+ #
131
+ # @param numbers [Array<Integer>] Array of non-negative integers to encode
132
+ # @return [String] The encoded hash string (empty if input is empty or contains negatives)
133
+ # @raise [BlocklistError] If the generated hash contains a blocklisted word
134
+ #
135
+ # @rbs (Array[Integer] numbers) -> String
136
+ def encode(numbers)
137
+ numbers.all? { |n| Integer(n) } # raises if conversion fails
138
+
139
+ return "" if numbers.empty? || numbers.any? { |n| n < 0 }
140
+
141
+ encoded = internal_encode(numbers)
142
+ if blocklist && !blocklist.empty?
143
+ blocked_word = contains_blocklisted_word?(encoded)
144
+ if blocked_word
145
+ raise EncodedId::BlocklistError, "Generated ID contains blocklisted word: '#{blocked_word}'"
146
+ end
147
+ end
148
+
149
+ encoded
150
+ end
151
+
152
+ # Decode a hash string back into an array of integers.
153
+ #
154
+ # The decoding process:
155
+ # 1. Removes guards by replacing them with spaces and splitting
156
+ # 2. Extracts the lottery character (first character after guard removal)
157
+ # 3. Splits on separators to get individual encoded number segments
158
+ # 4. For each segment, shuffles the alphabet the same way as encoding and decodes
159
+ # 5. Verifies by re-encoding the result and comparing to the original hash
160
+ #
161
+ # This verification step is critical for valid decoding: it ensures that random strings
162
+ # won't decode to valid numbers. Only properly encoded hashes will pass.
163
+ #
164
+ # @param hash [String] The hash string to decode
165
+ # @return [Array<Integer>] Array of decoded integers (empty if hash is invalid)
166
+ #
167
+ # @rbs (String hash) -> Array[Integer]
168
+ def decode(hash)
169
+ return [] if hash.nil? || hash.empty?
170
+
171
+ internal_decode(hash)
172
+ end
173
+
174
+ # Decode a hash that was encoded from hexadecimal numbers.
175
+ #
176
+ # This is a specialized variant for hashes created from hex strings.
177
+ # It decodes the hash to integers, then converts each integer back to hex
178
+ # (skipping the leading '1' that was added during hex encoding).
179
+ #
180
+ # @param hash [String] The hash string to decode
181
+ # @return [String] The original hexadecimal string (uppercase)
182
+ #
183
+ # @rbs (String hash) -> String
184
+ def decode_hex(hash)
185
+ numbers = decode(hash)
186
+
187
+ ret = numbers.map do |n|
188
+ n.to_s(16)[1..]
189
+ end
190
+
191
+ ret.join.upcase
192
+ end
193
+
194
+ private
195
+
196
+ # Internal encoding implementation - converts numbers to a hash string.
197
+ #
198
+ # Algorithm steps:
199
+ #
200
+ # Step 1: Calculate the "lottery" character
201
+ # - Create a hash_int from the input numbers (weighted sum: num % (index + 100))
202
+ # - Use hash_int to pick a lottery character from the alphabet
203
+ # - The lottery becomes the first character and seeds all alphabet shuffles
204
+ #
205
+ # Step 2: Encode each number
206
+ # - For each number:
207
+ # a. Shuffle alphabet using (lottery + salt) as the shuffle key
208
+ # b. Convert number to custom base-N using shuffled alphabet (via hash_one_number)
209
+ # c. Insert a separator character between numbers (chosen deterministically)
210
+ # - Each number gets a different alphabet permutation due to the shuffle
211
+ #
212
+ # Step 3: Add guards if below minimum length
213
+ # - Guards are special boundary characters that don't encode data
214
+ # - First guard is prepended based on (hash_int + first_char)
215
+ # - Second guard is appended based on (hash_int + third_char)
216
+ #
217
+ # Step 4: Pad with alphabet if still below minimum length
218
+ # - Shuffle the alphabet using itself as the key
219
+ # - Wrap the hash with the shuffled alphabet (second half + hash + first half)
220
+ # - Trim excess from the middle if we overshoot the target length
221
+ #
222
+ # The result is a string where:
223
+ # - Structure: [guard?] lottery encoded_num1 sep encoded_num2 sep ... [guard?] [padding?]
224
+ # - Each component is deterministic based on the input numbers and salt
225
+ # - Similar inputs produce very different outputs due to the lottery system
226
+ #
227
+ # @param numbers [Array<Integer>] Non-negative integers to encode
228
+ # @return [String] The encoded hash string
229
+ #
230
+ # @rbs (Array[Integer] numbers) -> String
231
+ def internal_encode(numbers)
232
+ current_alphabet = @alphabet_ordinals.dup
233
+ separator_ordinals = @separator_ordinals
234
+ guard_ordinals = @guard_ordinals
235
+
236
+ alphabet_length = current_alphabet.length
237
+ length = numbers.length
238
+
239
+ # Step 1: Calculate lottery character using a weighted hash of all input numbers.
240
+ # The modulo (i + 100) ensures different positions contribute differently to the hash.
241
+ # We use a manual loop instead of Array#sum to avoid extra array allocation.
242
+ hash_int = 0
243
+ i = 0
244
+ while i < length
245
+ hash_int += numbers[i] % (i + 100)
246
+ i += 1
247
+ end
248
+
249
+ # The lottery character is chosen deterministically from the alphabet.
250
+ # This becomes the first character of the hash AND the seed for all shuffles.
251
+ lottery = current_alphabet[hash_int % alphabet_length]
252
+
253
+ # This array will hold the final hash as character ordinals (codepoints).
254
+ # Start with the lottery character.
255
+ # @type var hashid_code: Array[Integer]
256
+ hashid_code = []
257
+ hashid_code << lottery
258
+
259
+ # The "seasoning" is the shuffle key: lottery + salt.
260
+ # This same seasoning will be used to shuffle the alphabet for each number.
261
+ seasoning = [lottery].concat(@salt_ordinals)
262
+
263
+ # Reusable buffer for the pre-shuffle alphabet state to avoid allocations in the loop.
264
+ alphabet_buffer = current_alphabet.dup
265
+
266
+ # Step 2: Encode each number with its own alphabet permutation.
267
+ i = 0
268
+ while i < length
269
+ num = numbers[i]
270
+
271
+ # Shuffle the alphabet using the seasoning. This is deterministic but produces
272
+ # a different permutation than the original alphabet. Since we reshuffle on each
273
+ # iteration with the same key, we need to pass the pre-shuffle state as salt_part_2.
274
+ alphabet_buffer.replace(current_alphabet)
275
+ consistent_shuffle!(current_alphabet, seasoning, alphabet_buffer, alphabet_length)
276
+
277
+ # Convert this number to base-N using the current shuffled alphabet.
278
+ # Returns the last character added (used for separator selection).
279
+ last_char_ord = hash_one_number(hashid_code, num, current_alphabet, alphabet_length)
280
+
281
+ # Add a separator between numbers (but not after the last number).
282
+ # The separator is chosen deterministically based on the encoded number and position.
283
+ if (i + 1) < length
284
+ num %= (last_char_ord + i)
285
+ hashid_code << separator_ordinals[num % separator_ordinals.length]
286
+ end
287
+
288
+ i += 1
289
+ end
290
+
291
+ # Step 3: Add guards if we're below the minimum length.
292
+ # Guards are boundary markers chosen deterministically from the guard set.
293
+ if hashid_code.length < @min_hash_length
294
+ # Prepend first guard based on hash_int and the lottery character.
295
+ first_char = hashid_code[0] #: Integer
296
+ hashid_code.prepend(guard_ordinals[(hash_int + first_char) % guard_ordinals.length])
297
+
298
+ # If still too short, append second guard based on hash_int and third character.
299
+ if hashid_code.length < @min_hash_length
300
+ # At this point hashid_code has at least 2 elements (lottery + guard), check for 3rd
301
+ third_char = hashid_code[2]
302
+ hashid_code << if third_char
303
+ guard_ordinals[(hash_int + third_char) % guard_ordinals.length]
304
+ else
305
+ # If no third character exists, use 0 as default
306
+ guard_ordinals[hash_int % guard_ordinals.length]
307
+ end
308
+ end
309
+ end
310
+
311
+ # Step 4: Pad with shuffled alphabet if still below minimum length.
312
+ half_length = current_alphabet.length.div(2)
313
+
314
+ while hashid_code.length < @min_hash_length
315
+ # Shuffle the alphabet using itself as the key (creates a new permutation).
316
+ consistent_shuffle!(current_alphabet, current_alphabet.dup, nil, current_alphabet.length)
317
+
318
+ # Wrap the hash: second_half + hash + first_half
319
+ second_half = current_alphabet[half_length..] #: Array[Integer]
320
+ first_half = current_alphabet[0, half_length] #: Array[Integer]
321
+ hashid_code.prepend(*second_half)
322
+ hashid_code.concat(first_half)
323
+
324
+ # If we've overshot the target, trim excess from the middle.
325
+ excess = hashid_code.length - @min_hash_length
326
+ if excess > 0
327
+ hashid_code = hashid_code[excess / 2, @min_hash_length] #: Array[Integer]
328
+ end
329
+ end
330
+
331
+ # Convert the array of character ordinals to a UTF-8 string.
332
+ hashid_code.pack("U*")
333
+ end
334
+
335
+ # Internal decoding implementation - converts a hash string back to numbers.
336
+ #
337
+ # Algorithm steps:
338
+ #
339
+ # Step 1: Remove guards
340
+ # - Replace all guard characters with spaces and split
341
+ # - Guards can appear at positions [0] or [0] and [-1]
342
+ # - If array has 2 or 3 elements, the middle one contains the actual hash
343
+ # - Otherwise, element [0] contains the hash
344
+ #
345
+ # Step 2: Extract lottery and split on separators
346
+ # - First character is the lottery (same as during encoding)
347
+ # - Replace separator characters with spaces and split
348
+ # - Each segment is an encoded number
349
+ #
350
+ # Step 3: Decode each number
351
+ # - For each segment:
352
+ # a. Shuffle alphabet using (lottery + salt) - same as encoding
353
+ # b. Convert from custom base-N back to integer (via unhash)
354
+ # - The alphabet shuffles must match the encoding shuffles exactly
355
+ #
356
+ # Step 4: Verify the result
357
+ # - Re-encode the decoded numbers and compare to original hash
358
+ # - If they don't match, return empty array
359
+ # - This prevents random strings from decoding to valid numbers
360
+ #
361
+ # @param hash [String] The hash string to decode
362
+ # @return [Array<Integer>] Decoded integers (empty if hash is invalid)
363
+ #
364
+ # @rbs (String hash) -> Array[Integer]
365
+ def internal_decode(hash)
366
+ # @type var ret: Array[Integer]
367
+ ret = []
368
+ current_alphabet = @alphabet_ordinals.dup
369
+ salt_ordinals = @salt_ordinals
370
+
371
+ # Step 1: Remove guards by replacing them with spaces and splitting.
372
+ # This separates the actual hash from any guard characters that were added.
373
+ breakdown = hash.tr(@escaped_guards_selector, " ")
374
+ array = breakdown.split(" ")
375
+
376
+ # If guards were present, the hash will be in the middle segment.
377
+ # - Length 1: No guards, hash is at [0]
378
+ # - Length 2: One guard, hash is at [1]
379
+ # - Length 3: Two guards, hash is at [1]
380
+ i = [3, 2].include?(array.length) ? 1 : 0
381
+
382
+ if (breakdown = array[i])
383
+ # Step 2: Extract the lottery character (first char) and the rest.
384
+ # Check if breakdown is not empty
385
+ lottery = breakdown[0] #: String
386
+ remainder = breakdown[1..] || "" #: String
387
+
388
+ # Replace separator characters with spaces and split to get individual encoded numbers.
389
+ remainder.tr!(@escaped_separator_selector, " ")
390
+ sub_hashes = remainder.split(" ")
391
+
392
+ # Create the same seasoning used during encoding: lottery + salt.
393
+ seasoning = [lottery.ord].concat(salt_ordinals)
394
+
395
+ # Step 3: Decode each number segment.
396
+ len = sub_hashes.length
397
+ time = 0
398
+ while time < len
399
+ sub_hash = sub_hashes[time]
400
+
401
+ # Shuffle the alphabet exactly as we did during encoding.
402
+ # This must produce the same permutation to correctly decode.
403
+ consistent_shuffle!(current_alphabet, seasoning, current_alphabet.dup, current_alphabet.length)
404
+
405
+ # Convert this segment from base-N back to an integer.
406
+ ret.push unhash(sub_hash, current_alphabet)
407
+ time += 1
408
+ end
409
+
410
+ # Step 4: Verify by re-encoding and comparing.
411
+ # This is critical for validity: it ensures only valid hashes decode successfully.
412
+ # Random strings will fail this check and return an empty array.
413
+ if encode(ret) != hash
414
+ # @type var ret: Array[Integer]
415
+ ret = []
416
+ end
417
+ end
418
+
419
+ ret
420
+ end
421
+
422
+ # Convert a single integer to its representation in a custom base-N system.
423
+ #
424
+ # This is similar to converting a decimal number to binary, hex, etc., but:
425
+ # - Uses a custom alphabet instead of 0-9 or 0-9A-F
426
+ # - The alphabet can be any length (base-N where N = alphabet.length)
427
+ # - Characters are inserted in reverse order (most significant digit last)
428
+ #
429
+ # Example: Converting 123 to base-10 with alphabet ['a','b','c','d','e','f','g','h','i','j']
430
+ # - 123 % 10 = 3 → 'd' (index 3)
431
+ # - 12 % 10 = 2 → 'c' (index 2)
432
+ # - 1 % 10 = 1 → 'b' (index 1)
433
+ # - Result: "bcd" (but inserted in reverse, so appears as "bcd" in hash_code)
434
+ #
435
+ # @param hash_code [Array<Integer>] The array to append characters to (modified in place)
436
+ # @param num [Integer] The number to convert
437
+ # @param alphabet [Array<Integer>] The alphabet ordinals to use for encoding
438
+ # @param alphabet_length [Integer] Length of the alphabet (cached for performance)
439
+ # @return [Integer] The ordinal of the last character added
440
+ #
441
+ # @rbs (Array[Integer] hash_code, Integer num, Array[Integer] alphabet, Integer alphabet_length) -> Integer
442
+ def hash_one_number(hash_code, num, alphabet, alphabet_length)
443
+ char = 0 #: Integer
444
+ insert_at = 0
445
+
446
+ # Convert number to base-N by repeatedly dividing by alphabet_length.
447
+ # Insert characters at the end (using negative index) so they appear in correct order.
448
+ while true # standard:disable Style/InfiniteLoop
449
+ char = alphabet[num % alphabet_length] || 0
450
+ insert_at -= 1
451
+ hash_code.insert(insert_at, char)
452
+ num /= alphabet_length
453
+ break unless num > 0
454
+ end
455
+
456
+ # Return the last character added (used for separator selection).
457
+ char
458
+ end
459
+
460
+ # Convert a custom base-N encoded string back to an integer.
461
+ #
462
+ # This is the inverse of hash_one_number. It treats the input string as a number
463
+ # in a custom base where each character's position in the alphabet represents its digit value.
464
+ #
465
+ # Example: Decoding "bcd" with alphabet ['a','b','c','d','e','f','g','h','i','j'] (base-10)
466
+ # - 'b' at position 1: 1 × 10² = 100
467
+ # - 'c' at position 2: 2 × 10¹ = 20
468
+ # - 'd' at position 3: 3 × 10⁰ = 3
469
+ # - Result: 100 + 20 + 3 = 123
470
+ #
471
+ # @param input [String] The encoded string to decode
472
+ # @param alphabet [Array<Integer>] The alphabet ordinals used for encoding
473
+ # @return [Integer] The decoded number
474
+ # @raise [InvalidInputError] If input contains characters not in the alphabet
475
+ #
476
+ # @rbs (String input, Array[Integer] alphabet) -> Integer
477
+ def unhash(input, alphabet)
478
+ num = 0 #: Integer
479
+ input_length = input.length
480
+ alphabet_length = alphabet.length
481
+ i = 0
482
+
483
+ # Process each character from left to right (most significant to least).
484
+ while i < input_length
485
+ first_char = input[i] #: String
486
+ pos = alphabet.index(first_char.ord)
487
+ raise InvalidInputError, "unable to unhash" unless pos
488
+
489
+ # Calculate this digit's contribution: position_in_alphabet × base^exponent
490
+ exponent = input_length - i - 1
491
+ multiplier = alphabet_length**exponent #: Integer
492
+ num += pos * multiplier
493
+ i += 1
494
+ end
495
+
496
+ num
497
+ end
498
+
499
+ # Delegate to the consistent shuffle algorithm.
500
+ #
501
+ # This deterministic shuffle is the heart of the HashID algorithm's obfuscation.
502
+ # It ensures that the same salt always produces the same permutation of the alphabet.
503
+ #
504
+ # @param collection_to_shuffle [Array<Integer>] The array to shuffle (modified in place)
505
+ # @param salt_part_1 [Array<Integer>] First part of the salt (lottery + salt, or alphabet)
506
+ # @param salt_part_2 [Array<Integer>?] Second part of the salt (pre-shuffle alphabet copy)
507
+ # @param max_salt_length [Integer] Maximum length to use from combined salt
508
+ # @return [Array<Integer>] The shuffled array (same object as collection_to_shuffle)
509
+ #
510
+ # @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
511
+ def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
512
+ HashIdConsistentShuffle.shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
513
+ end
514
+
515
+ # Check if the encoded string contains any blocklisted words.
516
+ #
517
+ # @param encoded_string [String] The encoded hash to check
518
+ # @return [String, false] The blocklisted word if found, false otherwise
519
+ #
520
+ # @rbs (String encoded_string) -> (String | false)
521
+ def contains_blocklisted_word?(encoded_string)
522
+ return false unless @blocklist && !@blocklist.empty?
523
+
524
+ blocked_word = @blocklist.blocks?(encoded_string)
525
+ return blocked_word if blocked_word
526
+
527
+ false
528
+ end
529
+ end
530
+ end
531
+ end
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Implements a deterministic, salt-based shuffle algorithm for HashIDs.
8
+ #
9
+ # This is the core obfuscation mechanism that makes HashIDs non-sequential.
10
+ # The algorithm has several critical properties:
11
+ #
12
+ # 1. **Deterministic**: Same input + same salt = same output (always)
13
+ # 2. **Reversible**: The shuffle can be undone if needed
14
+ # 3. **Salt-dependent**: Different salts produce different permutations
15
+ # 4. **Consistent**: Multiple calls with the same salt produce the same shuffle
16
+ #
17
+ # == Algorithm Overview:
18
+ #
19
+ # The shuffle works by:
20
+ # - Walking backwards through the collection (from last to second element)
21
+ # - For each position i, selecting a swap partner j using the salt
22
+ # - The swap position is calculated from: (salt_char + index + running_total) % i
23
+ # - Cycling through salt characters, wrapping when we reach the end
24
+ #
25
+ # This is similar to a Fisher-Yates shuffle, but with deterministic swap positions
26
+ # derived from the salt rather than random numbers.
27
+ #
28
+ # == Why Two Salt Parts?
29
+ #
30
+ # The algorithm accepts salt in two parts (salt_part_1 and salt_part_2) to support
31
+ # scenarios where the salt is constructed from multiple sources:
32
+ # - salt_part_1: Primary salt (e.g., lottery + user salt)
33
+ # - salt_part_2: Secondary salt (e.g., pre-shuffle alphabet copy)
34
+ #
35
+ # When cycling through salt characters, it reads from salt_part_1 first, then
36
+ # salt_part_2 if the index exceeds salt_part_1's length.
37
+ #
38
+ # == Example:
39
+ #
40
+ # Input: [1, 2, 3, 4], salt: [65, 66, 67] (ABC)
41
+ # Step 1: i=3, salt[0]=65, ord_total=0 → swap positions 3 and ((65+0+0)%3=2) → [1,2,4,3]
42
+ # Step 2: i=2, salt[1]=66, ord_total=65 → swap positions 2 and ((66+1+65)%2=0) → [4,2,1,3]
43
+ # Step 3: i=1, salt[2]=67, ord_total=131 → swap positions 1 and ((67+2+131)%1=0)→ [4,2,1,3]
44
+ # Result: [4, 2, 1, 3]
45
+ #
46
+ class HashIdConsistentShuffle
47
+ # Deterministically shuffle a collection based on a salt.
48
+ #
49
+ # Shuffles the collection in place using a salt-based algorithm that produces
50
+ # consistent results for the same inputs.
51
+ #
52
+ # @param collection_to_shuffle [Array<Integer>] Array to shuffle (modified in place)
53
+ # @param salt_part_1 [Array<Integer>] Primary salt characters (as ordinals)
54
+ # @param salt_part_2 [Array<Integer>?] Optional secondary salt characters
55
+ # @param max_salt_length [Integer] Maximum salt length to use (for cycling)
56
+ # @return [Array<Integer>] The shuffled array (same object as input)
57
+ # @raise [SaltError] If salt is too short or shuffle fails
58
+ #
59
+ # @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
60
+ def self.shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
61
+ salt_part_1_length = salt_part_1.length
62
+
63
+ # Validate we have enough salt. If max_salt_length exceeds salt_part_1,
64
+ # we need salt_part_2 to provide the additional characters.
65
+ raise SaltError, "Salt is too short in shuffle" if salt_part_1_length < max_salt_length && salt_part_2.nil?
66
+
67
+ # Short-circuit if there's nothing to shuffle.
68
+ return collection_to_shuffle if collection_to_shuffle.empty? || max_salt_length == 0 || salt_part_1.nil? || salt_part_1_length == 0
69
+
70
+ # idx: Current position in the salt (cycles through 0..max_salt_length-1)
71
+ # ord_total: Running sum of salt character ordinals (affects swap positions)
72
+ idx = ord_total = 0
73
+
74
+ # Walk backwards through the collection from last to second element.
75
+ # We don't shuffle the first element (i=0) because it has nowhere to swap to.
76
+ i = collection_to_shuffle.length - 1
77
+ while i >= 1
78
+ # Get the current salt character ordinal.
79
+ # If we've exceeded salt_part_1, read from salt_part_2.
80
+ n = if idx >= salt_part_1_length
81
+ raise SaltError, "Salt shuffle has failed" unless salt_part_2
82
+
83
+ salt_part_2[idx - salt_part_1_length]
84
+ else
85
+ salt_part_1[idx]
86
+ end
87
+
88
+ # Update running total with current salt character.
89
+ ord_total += n
90
+
91
+ # Calculate swap position deterministically from:
92
+ # - n: Current salt character ordinal
93
+ # - idx: Current position in salt
94
+ # - ord_total: Running sum of all salt characters used so far
95
+ # - i: Current position in collection (modulo to ensure valid index)
96
+ j = (n + idx + ord_total) % i
97
+
98
+ # Swap elements at positions i and j.
99
+ collection_to_shuffle[i], collection_to_shuffle[j] = collection_to_shuffle[j], collection_to_shuffle[i]
100
+
101
+ # Move to next salt character (wrapping around if needed).
102
+ idx = (idx + 1) % max_salt_length
103
+ i -= 1
104
+ end
105
+
106
+ collection_to_shuffle
107
+ end
108
+ end
109
+ end
110
+ end