encoded_id 1.0.0.rc5 → 1.0.0.rc7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +99 -3
  3. data/README.md +86 -329
  4. data/context/encoded_id.md +437 -0
  5. data/lib/encoded_id/alphabet.rb +34 -3
  6. data/lib/encoded_id/blocklist.rb +100 -0
  7. data/lib/encoded_id/encoders/base_configuration.rb +154 -0
  8. data/lib/encoded_id/encoders/hashid.rb +527 -0
  9. data/lib/encoded_id/encoders/hashid_configuration.rb +40 -0
  10. data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb +110 -0
  11. data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb +244 -0
  12. data/lib/encoded_id/encoders/hashid_salt.rb +51 -0
  13. data/lib/encoded_id/encoders/my_sqids.rb +454 -0
  14. data/lib/encoded_id/encoders/sqids.rb +59 -0
  15. data/lib/encoded_id/encoders/sqids_configuration.rb +22 -0
  16. data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +54 -0
  17. data/lib/encoded_id/hex_representation.rb +29 -14
  18. data/lib/encoded_id/reversible_id.rb +115 -82
  19. data/lib/encoded_id/version.rb +3 -1
  20. data/lib/encoded_id.rb +34 -4
  21. metadata +34 -26
  22. data/.devcontainer/Dockerfile +0 -9
  23. data/.devcontainer/compose.yml +0 -8
  24. data/.devcontainer/devcontainer.json +0 -8
  25. data/.standard.yml +0 -2
  26. data/Gemfile +0 -36
  27. data/Rakefile +0 -20
  28. data/Steepfile +0 -5
  29. data/ext/encoded_id/extconf.rb +0 -3
  30. data/ext/encoded_id/extension.c +0 -123
  31. data/ext/encoded_id/hashids.c +0 -939
  32. data/ext/encoded_id/hashids.h +0 -139
  33. data/lib/encoded_id/hash_id.rb +0 -227
  34. data/lib/encoded_id/hash_id_consistent_shuffle.rb +0 -27
  35. data/lib/encoded_id/hash_id_salt.rb +0 -15
  36. data/lib/encoded_id/ordinal_alphabet_separator_guards.rb +0 -90
  37. data/rbs_collection.yaml +0 -24
  38. data/sig/encoded_id.rbs +0 -189
@@ -0,0 +1,154 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Base configuration class for encoder-specific settings
8
+ # This provides common parameters shared across all encoders
9
+ class BaseConfiguration
10
+ # @rbs @min_length: Integer
11
+ # @rbs @alphabet: Alphabet
12
+ # @rbs @split_at: Integer?
13
+ # @rbs @split_with: String?
14
+ # @rbs @hex_digit_encoding_group_size: Integer
15
+ # @rbs @max_length: Integer?
16
+ # @rbs @max_inputs_per_id: Integer
17
+ # @rbs @blocklist: Blocklist
18
+ # @rbs @blocklist_mode: Symbol
19
+ # @rbs @blocklist_max_length: Integer
20
+
21
+ attr_reader :min_length, :alphabet, :split_at, :split_with,
22
+ :hex_digit_encoding_group_size, :max_length,
23
+ :max_inputs_per_id, :blocklist, :blocklist_mode, :blocklist_max_length
24
+
25
+ # @rbs (?min_length: Integer, ?alphabet: Alphabet, ?split_at: Integer?, ?split_with: String?, ?hex_digit_encoding_group_size: Integer, ?max_length: Integer?, ?max_inputs_per_id: Integer, ?blocklist: Blocklist | Array[String] | Set[String] | nil, ?blocklist_mode: Symbol, ?blocklist_max_length: Integer) -> void
26
+ def initialize(
27
+ min_length: 8,
28
+ alphabet: Alphabet.modified_crockford,
29
+ split_at: 4,
30
+ split_with: "-",
31
+ hex_digit_encoding_group_size: 4,
32
+ max_length: 128,
33
+ max_inputs_per_id: 32,
34
+ blocklist: Blocklist.empty,
35
+ blocklist_mode: :length_threshold,
36
+ blocklist_max_length: 32
37
+ )
38
+ @min_length = validate_min_length(min_length)
39
+ @alphabet = validate_alphabet(alphabet)
40
+ @split_at = validate_split_at(split_at)
41
+ @split_with = validate_split_with(split_with, @alphabet)
42
+ @hex_digit_encoding_group_size = hex_digit_encoding_group_size
43
+ @max_length = validate_max_length(max_length)
44
+ @max_inputs_per_id = validate_max_inputs_per_id(max_inputs_per_id)
45
+ @blocklist = validate_blocklist(blocklist)
46
+ @blocklist = @blocklist.filter_for_alphabet(@alphabet) unless @blocklist.empty?
47
+ @blocklist_mode = validate_blocklist_mode(blocklist_mode)
48
+ @blocklist_max_length = validate_blocklist_max_length(blocklist_max_length)
49
+ validate_blocklist_collision_risk
50
+ end
51
+
52
+ # @rbs () -> Symbol
53
+ def encoder_type
54
+ raise NotImplementedError, "Subclasses must implement encoder_type"
55
+ end
56
+
57
+ # @rbs () -> untyped
58
+ def create_encoder
59
+ raise NotImplementedError, "Subclasses must implement create_encoder"
60
+ end
61
+
62
+ private
63
+
64
+ # @rbs (Alphabet alphabet) -> Alphabet
65
+ def validate_alphabet(alphabet)
66
+ return alphabet if alphabet.is_a?(Alphabet)
67
+ raise InvalidAlphabetError, "alphabet must be an instance of Alphabet"
68
+ end
69
+
70
+ # @rbs (Integer min_length) -> Integer
71
+ def validate_min_length(min_length)
72
+ return min_length if valid_integer_option?(min_length)
73
+ raise InvalidConfigurationError, "min_length must be an integer greater than 0"
74
+ end
75
+
76
+ # @rbs (Integer? max_length) -> Integer?
77
+ def validate_max_length(max_length)
78
+ return max_length if valid_integer_option?(max_length) || max_length.nil?
79
+ raise InvalidConfigurationError, "max_length must be an integer greater than 0 or nil"
80
+ end
81
+
82
+ # @rbs (Integer max_inputs_per_id) -> Integer
83
+ def validate_max_inputs_per_id(max_inputs_per_id)
84
+ return max_inputs_per_id if valid_integer_option?(max_inputs_per_id)
85
+ raise InvalidConfigurationError, "max_inputs_per_id must be an integer greater than 0"
86
+ end
87
+
88
+ # @rbs (Integer? split_at) -> Integer?
89
+ def validate_split_at(split_at)
90
+ return split_at if valid_integer_option?(split_at) || split_at.nil?
91
+ raise InvalidConfigurationError, "split_at must be an integer greater than 0 or nil"
92
+ end
93
+
94
+ # @rbs (String? split_with, Alphabet alphabet) -> String?
95
+ def validate_split_with(split_with, alphabet)
96
+ return split_with if split_with.nil? || (split_with.is_a?(String) && !alphabet.characters.include?(split_with))
97
+ raise InvalidConfigurationError, "split_with must be a string not part of the alphabet, or nil"
98
+ end
99
+
100
+ # @rbs (Integer? value) -> bool
101
+ def valid_integer_option?(value)
102
+ value.is_a?(Integer) && value > 0
103
+ end
104
+
105
+ # @rbs (Blocklist | Array[String] | Set[String] | nil blocklist) -> Blocklist
106
+ def validate_blocklist(blocklist)
107
+ return blocklist if blocklist.is_a?(Blocklist)
108
+ return Blocklist.empty if blocklist.nil?
109
+ return Blocklist.new(blocklist) if blocklist.is_a?(Array) || blocklist.is_a?(Set)
110
+
111
+ raise InvalidConfigurationError, "blocklist must be a Blocklist, Set, or Array of strings"
112
+ end
113
+
114
+ # @rbs (Symbol blocklist_mode) -> Symbol
115
+ def validate_blocklist_mode(blocklist_mode)
116
+ valid_modes = [:always, :length_threshold, :raise_if_likely]
117
+ return blocklist_mode if valid_modes.include?(blocklist_mode)
118
+
119
+ raise InvalidConfigurationError, "blocklist_mode must be one of #{valid_modes.inspect}, got #{blocklist_mode.inspect}"
120
+ end
121
+
122
+ # @rbs (Integer blocklist_max_length) -> Integer
123
+ def validate_blocklist_max_length(blocklist_max_length)
124
+ return blocklist_max_length if valid_integer_option?(blocklist_max_length)
125
+
126
+ raise InvalidConfigurationError, "blocklist_max_length must be an integer greater than 0"
127
+ end
128
+
129
+ # Validates configuration for :raise_if_likely mode
130
+ # @rbs () -> void
131
+ def validate_blocklist_collision_risk
132
+ return if @blocklist.empty?
133
+ return unless @blocklist_mode == :raise_if_likely
134
+
135
+ # Check if min_length suggests long IDs
136
+ if @min_length > @blocklist_max_length
137
+ raise InvalidConfigurationError,
138
+ "blocklist_mode is :raise_if_likely and min_length (#{@min_length}) exceeds blocklist_max_length (#{@blocklist_max_length}). " \
139
+ "Long IDs have high collision probability with blocklists. " \
140
+ "Use blocklist_mode: :length_threshold or remove the blocklist."
141
+ end
142
+
143
+ # Check if max_inputs_per_id suggests long IDs
144
+ # Rough heuristic: encoding 100+ inputs typically results in long IDs
145
+ if @max_inputs_per_id > 100
146
+ raise InvalidConfigurationError,
147
+ "blocklist_mode is :raise_if_likely and max_inputs_per_id (#{@max_inputs_per_id}) is very high. " \
148
+ "Encoding many inputs typically results in long IDs with high blocklist collision probability. " \
149
+ "Use blocklist_mode: :length_threshold or remove the blocklist."
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,527 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This implementation based on https://github.com/peterhellberg/hashids.rb
4
+ # --------------------------------------------------------------------------
5
+ # Original Hashids implementation is MIT licensed:
6
+ #
7
+ # Copyright (c) 2013-2017 Peter Hellberg
8
+ #
9
+ # MIT License
10
+ #
11
+ # Permission is hereby granted, free of charge, to any person obtaining
12
+ # a copy of this software and associated documentation files (the
13
+ # "Software"), to deal in the Software without restriction, including
14
+ # without limitation the rights to use, copy, modify, merge, publish,
15
+ # distribute, sublicense, and/or sell copies of the Software, and to
16
+ # permit persons to whom the Software is furnished to do so, subject to
17
+ # the following conditions:
18
+ #
19
+ # The above copyright notice and this permission notice shall be
20
+ # included in all copies or substantial portions of the Software.
21
+ #
22
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25
+ # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
26
+ # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
27
+ # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28
+ # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29
+ # --------------------------------------------------------------------------
30
+ #
31
+ # This version also MIT licensed (Stephen Ierodiaconou 2023-2025):
32
+ # see LICENSE.txt file
33
+ # rbs_inline: enabled
34
+
35
+ # == HashID Algorithm Overview
36
+ #
37
+ # Hashids is a small library that generates short, unique, non-sequential IDs from numbers.
38
+ # The algorithm has several key properties:
39
+ #
40
+ # 1. **Deterministic**: Same input numbers always produce the same hash
41
+ # 2. **Reversible**: You can decode the hash back to the original numbers
42
+ # 3. **Non-sequential**: Sequential numbers don't produce sequential hashes
43
+ # 4. **Customizable**: Uses a salt, minimum length, alphabet, and optional blocklist
44
+ #
45
+ # === Core Algorithm Concepts:
46
+ #
47
+ # The algorithm works by:
48
+ # - Converting each integer to a custom base-N representation using a shuffled alphabet
49
+ # - The alphabet permutation is deterministic based on a "lottery" character and salt
50
+ # - A lottery character is chosen based on a hash of the input numbers
51
+ # - Each number is encoded with a different alphabet permutation (for obfuscation)
52
+ # - Separators divide encoded numbers, and guards are added for minimum length
53
+ # - The decode process reverses this by extracting the lottery, splitting on separators,
54
+ # and converting each segment back from the custom base-N representation
55
+ #
56
+ # === Character Sets:
57
+ #
58
+ # - **Alphabet**: Main characters used to encode numbers (after setup, doesn't include separators/guards)
59
+ # - **Separators**: Characters that separate encoded number segments within a hash
60
+ # - **Guards**: Special characters added at boundaries to meet minimum length requirements
61
+ # - All three sets are disjoint (no overlap) after initialization
62
+ #
63
+ # === Why This Design?
64
+ #
65
+ # The shuffling and lottery system ensures that:
66
+ # - Similar numbers produce very different hashes (no sequential patterns)
67
+ # - Each position in a multi-number sequence uses a different encoding
68
+ # - The hash obfuscates the inputs if the salt is unknown
69
+ # - The same numbers always produce the same hash (deterministic)
70
+
71
+ module EncodedId
72
+ module Encoders
73
+ # Implementation of HashId, optimised and adapted from the original `hashid.rb` gem
74
+ class Hashid
75
+ include HashidConsistentShuffle
76
+
77
+ # @rbs @separators_and_guards: HashidOrdinalAlphabetSeparatorGuards
78
+ # @rbs @alphabet_ordinals: Array[Integer]
79
+ # @rbs @separator_ordinals: Array[Integer]
80
+ # @rbs @guard_ordinals: Array[Integer]
81
+ # @rbs @salt_ordinals: Array[Integer]
82
+ # @rbs @escaped_separator_selector: String
83
+ # @rbs @escaped_guards_selector: String
84
+ # @rbs @blocklist_mode: Symbol
85
+ # @rbs @blocklist_max_length: Integer
86
+
87
+ # Initialize a new HashId encoder with custom parameters.
88
+ #
89
+ # The initialization process sets up the character sets (alphabet, separators, guards)
90
+ # that will be used for encoding and decoding. These character sets are:
91
+ # 1. Shuffled based on the salt for uniqueness
92
+ # 2. Balanced in ratios (alphabet:separators ≈ 3.5:1, alphabet:guards ≈ 12:1)
93
+ # 3. Made disjoint (no character appears in multiple sets)
94
+ #
95
+ # @param salt [String] Secret salt used to shuffle the alphabet (empty string is valid)
96
+ # @param min_hash_length [Integer] Minimum length of generated hashes (0 for no minimum)
97
+ # @param alphabet [Alphabet] Character set to use for encoding
98
+ # @param blocklist [Blocklist?] Optional list of words that shouldn't appear in hashes
99
+ # @param blocklist_mode [Symbol] Mode for blocklist checking (:always, :length_threshold, :raise_if_likely)
100
+ # @param blocklist_max_length [Integer] Maximum ID length for blocklist checking (when mode is :length_threshold)
101
+ #
102
+ # @rbs (String salt, ?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist? blocklist, ?Symbol blocklist_mode, ?Integer blocklist_max_length) -> void
103
+ def initialize(salt, min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = nil, blocklist_mode = :length_threshold, blocklist_max_length = 32)
104
+ unless min_hash_length.is_a?(Integer) && min_hash_length >= 0
105
+ raise ArgumentError, "The min length must be a Integer and greater than or equal to 0"
106
+ end
107
+ @min_hash_length = min_hash_length
108
+ @salt = salt
109
+ @alphabet = alphabet
110
+ @blocklist = blocklist
111
+ @blocklist_mode = blocklist_mode
112
+ @blocklist_max_length = blocklist_max_length
113
+
114
+ @separators_and_guards = HashidOrdinalAlphabetSeparatorGuards.new(alphabet, salt)
115
+ @alphabet_ordinals = @separators_and_guards.alphabet
116
+ @separator_ordinals = @separators_and_guards.seps
117
+ @guard_ordinals = @separators_and_guards.guards
118
+ @salt_ordinals = @separators_and_guards.salt
119
+
120
+ # Pre-compute escaped versions for use with String#tr during decoding.
121
+ # This escapes special regex characters like '-', '\\', and '^' for safe use in tr().
122
+ @escaped_separator_selector = @separators_and_guards.seps_tr_selector
123
+ @escaped_guards_selector = @separators_and_guards.guards_tr_selector
124
+ end
125
+
126
+ attr_reader :alphabet_ordinals #: Array[Integer]
127
+ attr_reader :separator_ordinals #: Array[Integer]
128
+ attr_reader :guard_ordinals #: Array[Integer]
129
+ attr_reader :salt_ordinals #: Array[Integer]
130
+ attr_reader :salt #: String
131
+ attr_reader :alphabet #: Alphabet
132
+ attr_reader :blocklist #: Blocklist?
133
+ attr_reader :min_hash_length #: Integer
134
+
135
+ # Encode an array of non-negative integers into a hash string.
136
+ #
137
+ # The encoding process:
138
+ # 1. Validates all numbers are integers and non-negative
139
+ # 2. Calculates a "lottery" character based on the input numbers
140
+ # 3. For each number, shuffles the alphabet and encodes the number in that custom base
141
+ # 4. Inserts separator characters between encoded numbers
142
+ # 5. Adds guards and padding if needed to meet minimum length
143
+ # 6. Validates the result doesn't contain blocklisted words
144
+ #
145
+ # @param numbers [Array<Integer>] Array of non-negative integers to encode
146
+ # @return [String] The encoded hash string (empty if input is empty or contains negatives)
147
+ # @raise [BlocklistError] If the generated hash contains a blocklisted word
148
+ #
149
+ # @rbs (Array[Integer] numbers) -> String
150
+ def encode(numbers)
151
+ numbers.all? { |n| Integer(n) }
152
+
153
+ return "" if numbers.empty? || numbers.any? { |n| n < 0 }
154
+
155
+ encoded = internal_encode(numbers)
156
+ if check_blocklist?(encoded)
157
+ blocked_word = contains_blocklisted_word?(encoded)
158
+ if blocked_word
159
+ raise EncodedId::BlocklistError, "Generated ID '#{encoded}' contains blocklisted word: '#{blocked_word}'"
160
+ end
161
+ end
162
+
163
+ encoded
164
+ end
165
+
166
+ # Decode a hash string back into an array of integers.
167
+ #
168
+ # The decoding process:
169
+ # 1. Removes guards by replacing them with spaces and splitting
170
+ # 2. Extracts the lottery character (first character after guard removal)
171
+ # 3. Splits on separators to get individual encoded number segments
172
+ # 4. For each segment, shuffles the alphabet the same way as encoding and decodes
173
+ # 5. Verifies by re-encoding the result and comparing to the original hash
174
+ #
175
+ # This verification step is critical for valid decoding: it ensures that random strings
176
+ # won't decode to valid numbers. Only properly encoded hashes will pass.
177
+ #
178
+ # @param hash [String] The hash string to decode
179
+ # @return [Array<Integer>] Array of decoded integers (empty if hash is invalid)
180
+ #
181
+ # @rbs (String hash) -> Array[Integer]
182
+ def decode(hash)
183
+ return [] if hash.nil? || hash.empty?
184
+
185
+ internal_decode(hash)
186
+ end
187
+
188
+ private
189
+
190
+ # Internal encoding implementation - converts numbers to a hash string.
191
+ #
192
+ # Algorithm steps:
193
+ #
194
+ # Step 1: Calculate the "lottery" character
195
+ # - Create a hash_int from the input numbers (weighted sum: num % (index + 100))
196
+ # - Use hash_int to pick a lottery character from the alphabet
197
+ # - The lottery becomes the first character and seeds all alphabet shuffles
198
+ #
199
+ # Step 2: Encode each number
200
+ # - For each number:
201
+ # a. Shuffle alphabet using (lottery + salt) as the shuffle key
202
+ # b. Convert number to custom base-N using shuffled alphabet (via hash_one_number)
203
+ # c. Insert a separator character between numbers (chosen deterministically)
204
+ # - Each number gets a different alphabet permutation due to the shuffle
205
+ #
206
+ # Step 3: Add guards if below minimum length
207
+ # - Guards are special boundary characters that don't encode data
208
+ # - First guard is prepended based on (hash_int + first_char)
209
+ # - Second guard is appended based on (hash_int + third_char)
210
+ #
211
+ # Step 4: Pad with alphabet if still below minimum length
212
+ # - Shuffle the alphabet using itself as the key
213
+ # - Wrap the hash with the shuffled alphabet (second half + hash + first half)
214
+ # - Trim excess from the middle if we overshoot the target length
215
+ #
216
+ # The result is a string where:
217
+ # - Structure: [guard?] lottery encoded_num1 sep encoded_num2 sep ... [guard?] [padding?]
218
+ # - Each component is deterministic based on the input numbers and salt
219
+ # - Similar inputs produce very different outputs due to the lottery system
220
+ #
221
+ # @param numbers [Array<Integer>] Non-negative integers to encode
222
+ # @return [String] The encoded hash string
223
+ #
224
+ # @rbs (Array[Integer] numbers) -> String
225
+ def internal_encode(numbers)
226
+ current_alphabet = @alphabet_ordinals.dup
227
+ separator_ordinals = @separator_ordinals
228
+ guard_ordinals = @guard_ordinals
229
+
230
+ alphabet_length = current_alphabet.length
231
+ length = numbers.length
232
+
233
+ # Step 1: Calculate lottery character using a weighted hash of all input numbers.
234
+ # The modulo (i + 100) ensures different positions contribute differently to the hash.
235
+ # We use a manual loop instead of Array#sum to avoid extra array allocation.
236
+ hash_int = 0
237
+ i = 0
238
+ while i < length
239
+ hash_int += numbers[i] % (i + 100)
240
+ i += 1
241
+ end
242
+
243
+ # The lottery character is chosen deterministically from the alphabet.
244
+ # This becomes the first character of the hash AND the seed for all shuffles.
245
+ lottery = current_alphabet[hash_int % alphabet_length]
246
+
247
+ # This array will hold the final hash as character ordinals (codepoints).
248
+ # @type var hashid_code: Array[Integer]
249
+ hashid_code = []
250
+ hashid_code << lottery
251
+
252
+ # The "seasoning" is the shuffle key: lottery + salt.
253
+ # This same seasoning will be used to shuffle the alphabet for each number.
254
+ seasoning = [lottery].concat(@salt_ordinals)
255
+
256
+ # Reusable buffer for the pre-shuffle alphabet state to avoid allocations in the loop.
257
+ alphabet_buffer = current_alphabet.dup
258
+
259
+ # Step 2: Encode each number with its own alphabet permutation.
260
+ i = 0
261
+ while i < length
262
+ num = numbers[i]
263
+
264
+ # Shuffle the alphabet using the seasoning. This is deterministic but produces
265
+ # a different permutation than the original alphabet. Since we reshuffle on each
266
+ # iteration with the same key, we need to pass the pre-shuffle state as salt_part_2.
267
+ alphabet_buffer.replace(current_alphabet)
268
+ consistent_shuffle!(current_alphabet, seasoning, alphabet_buffer, alphabet_length)
269
+
270
+ # Convert this number to base-N using the current shuffled alphabet.
271
+ last_char_ord = hash_one_number(hashid_code, num, current_alphabet, alphabet_length)
272
+
273
+ # Add a separator between numbers (but not after the last number).
274
+ # The separator is chosen deterministically based on the encoded number and position.
275
+ if (i + 1) < length
276
+ num %= (last_char_ord + i)
277
+ hashid_code << separator_ordinals[num % separator_ordinals.length]
278
+ end
279
+
280
+ i += 1
281
+ end
282
+
283
+ # Step 3: Add guards if we're below the minimum length.
284
+ # Guards are boundary markers chosen deterministically from the guard set.
285
+ if hashid_code.length < @min_hash_length
286
+ # Prepend first guard based on hash_int and the lottery character.
287
+ guard_count = guard_ordinals.length
288
+ first_char = hashid_code[0] #: Integer
289
+ hashid_code.prepend(guard_ordinals[(hash_int + first_char) % guard_count])
290
+
291
+ # If still too short, append second guard based on hash_int and third character.
292
+ if hashid_code.length < @min_hash_length
293
+ # At this point hashid_code has at least 2 elements (lottery + guard), check for 3rd
294
+ third_char = hashid_code[2]
295
+ hashid_code << if third_char
296
+ guard_ordinals[(hash_int + third_char) % guard_count]
297
+ else
298
+ # If no third character exists, use 0 as default
299
+ guard_ordinals[hash_int % guard_count]
300
+ end
301
+ end
302
+ end
303
+
304
+ # Step 4: Pad with shuffled alphabet if still below minimum length.
305
+ half_length = alphabet_length.div(2)
306
+
307
+ while hashid_code.length < @min_hash_length
308
+ # Shuffle the alphabet using itself as the key (creates a new permutation).
309
+ consistent_shuffle!(current_alphabet, current_alphabet.dup, nil, alphabet_length)
310
+
311
+ # Wrap the hash: second_half + hash + first_half
312
+ second_half = current_alphabet[half_length..] #: Array[Integer]
313
+ first_half = current_alphabet[0, half_length] #: Array[Integer]
314
+ hashid_code.prepend(*second_half)
315
+ hashid_code.concat(first_half)
316
+
317
+ # If we've overshot the target, trim excess from the middle.
318
+ excess = hashid_code.length - @min_hash_length
319
+ if excess > 0
320
+ hashid_code = hashid_code[excess / 2, @min_hash_length] #: Array[Integer]
321
+ end
322
+ end
323
+
324
+ # Convert the array of character ordinals to a UTF-8 string.
325
+ hashid_code.pack("U*")
326
+ end
327
+
328
+ # Internal decoding implementation - converts a hash string back to numbers.
329
+ #
330
+ # Algorithm steps:
331
+ #
332
+ # Step 1: Remove guards
333
+ # - Replace all guard characters with spaces and split
334
+ # - Guards can appear at positions [0] or [0] and [-1]
335
+ # - If array has 2 or 3 elements, the middle one contains the actual hash
336
+ # - Otherwise, element [0] contains the hash
337
+ #
338
+ # Step 2: Extract lottery and split on separators
339
+ # - First character is the lottery (same as during encoding)
340
+ # - Replace separator characters with spaces and split
341
+ # - Each segment is an encoded number
342
+ #
343
+ # Step 3: Decode each number
344
+ # - For each segment:
345
+ # a. Shuffle alphabet using (lottery + salt) - same as encoding
346
+ # b. Convert from custom base-N back to integer (via unhash)
347
+ # - The alphabet shuffles must match the encoding shuffles exactly
348
+ #
349
+ # Step 4: Verify the result
350
+ # - Re-encode the decoded numbers and compare to original hash
351
+ # - If they don't match, return empty array
352
+ # - This prevents random strings from decoding to valid numbers
353
+ #
354
+ # @param hash [String] The hash string to decode
355
+ # @return [Array<Integer>] Decoded integers (empty if hash is invalid)
356
+ #
357
+ # @rbs (String hash) -> Array[Integer]
358
+ def internal_decode(hash)
359
+ # @type var ret: Array[Integer]
360
+ ret = []
361
+ current_alphabet = @alphabet_ordinals.dup
362
+ salt_ordinals = @salt_ordinals
363
+
364
+ # Step 1: Remove guards by replacing them with spaces and splitting.
365
+ # This separates the actual hash from any guard characters that were added.
366
+ breakdown = hash.tr(@escaped_guards_selector, " ")
367
+ array = breakdown.split(" ")
368
+
369
+ # If guards were present, the hash will be in the middle segment.
370
+ # - Length 1: No guards, hash is at [0]
371
+ # - Length 2: One guard, hash is at [1]
372
+ # - Length 3: Two guards, hash is at [1]
373
+ i = [3, 2].include?(array.length) ? 1 : 0
374
+
375
+ if (breakdown = array[i])
376
+ # Step 2: Extract the lottery character (first char) and the rest.
377
+ lottery = breakdown[0] #: String
378
+ remainder = breakdown[1..] || "" #: String
379
+
380
+ # Replace separator characters with spaces and split to get individual encoded numbers.
381
+ remainder.tr!(@escaped_separator_selector, " ")
382
+ sub_hashes = remainder.split(" ")
383
+
384
+ # Create the same seasoning used during encoding: lottery + salt.
385
+ seasoning = [lottery.ord].concat(salt_ordinals)
386
+
387
+ # Step 3: Decode each number segment.
388
+ len = sub_hashes.length
389
+ time = 0
390
+ while time < len
391
+ sub_hash = sub_hashes[time]
392
+
393
+ # Shuffle the alphabet exactly as we did during encoding.
394
+ # This must produce the same permutation to correctly decode.
395
+ consistent_shuffle!(current_alphabet, seasoning, current_alphabet.dup, current_alphabet.length)
396
+
397
+ # Convert this segment from base-N back to an integer.
398
+ ret.push unhash(sub_hash, current_alphabet)
399
+ time += 1
400
+ end
401
+
402
+ # Step 4: Verify by re-encoding and comparing.
403
+ # This is critical for validity: it ensures only valid hashes decode successfully.
404
+ if encode(ret) != hash
405
+ # @type var ret: Array[Integer]
406
+ ret = []
407
+ end
408
+ end
409
+
410
+ ret
411
+ end
412
+
413
+ # Convert a single integer to its representation in a custom base-N system.
414
+ #
415
+ # This is similar to converting a decimal number to binary, hex, etc., but:
416
+ # - Uses a custom alphabet instead of 0-9 or 0-9A-F
417
+ # - The alphabet can be any length (base-N where N = alphabet.length)
418
+ # - Characters are inserted in reverse order (most significant digit last)
419
+ #
420
+ # Example: Converting 123 to base-10 with alphabet ['a','b','c','d','e','f','g','h','i','j']
421
+ # - 123 % 10 = 3 → 'd' (index 3)
422
+ # - 12 % 10 = 2 → 'c' (index 2)
423
+ # - 1 % 10 = 1 → 'b' (index 1)
424
+ # - Result: "bcd" (but inserted in reverse, so appears as "bcd" in hash_code)
425
+ #
426
+ # @param hash_code [Array<Integer>] The array to append characters to (modified in place)
427
+ # @param num [Integer] The number to convert
428
+ # @param alphabet [Array<Integer>] The alphabet ordinals to use for encoding
429
+ # @param alphabet_length [Integer] Length of the alphabet (cached for performance)
430
+ # @return [Integer] The ordinal of the last character added
431
+ #
432
+ # @rbs (Array[Integer] hash_code, Integer num, Array[Integer] alphabet, Integer alphabet_length) -> Integer
433
+ def hash_one_number(hash_code, num, alphabet, alphabet_length)
434
+ char = 0 #: Integer
435
+ insert_at = 0
436
+
437
+ # Convert number to base-N by repeatedly dividing by alphabet_length.
438
+ # Insert characters at the end (using negative index) so they appear in correct order.
439
+ while true # standard:disable Style/InfiniteLoop
440
+ char = alphabet[num % alphabet_length] || 0
441
+ insert_at -= 1
442
+ hash_code.insert(insert_at, char)
443
+ num /= alphabet_length
444
+ break unless num > 0
445
+ end
446
+
447
+ char
448
+ end
449
+
450
+ # Convert a custom base-N encoded string back to an integer.
451
+ #
452
+ # This is the inverse of hash_one_number. It treats the input string as a number
453
+ # in a custom base where each character's position in the alphabet represents its digit value.
454
+ #
455
+ # Example: Decoding "bcd" with alphabet ['a','b','c','d','e','f','g','h','i','j'] (base-10)
456
+ # - 'b' at position 1: 1 × 10² = 100
457
+ # - 'c' at position 2: 2 × 10¹ = 20
458
+ # - 'd' at position 3: 3 × 10⁰ = 3
459
+ # - Result: 100 + 20 + 3 = 123
460
+ #
461
+ # @param input [String] The encoded string to decode
462
+ # @param alphabet [Array<Integer>] The alphabet ordinals used for encoding
463
+ # @return [Integer] The decoded number
464
+ # @raise [InvalidInputError] If input contains characters not in the alphabet
465
+ #
466
+ # @rbs (String input, Array[Integer] alphabet) -> Integer
467
+ def unhash(input, alphabet)
468
+ num = 0 #: Integer
469
+ input_length = input.length
470
+ alphabet_length = alphabet.length
471
+ i = 0
472
+
473
+ # Process each character from left to right (most significant to least).
474
+ while i < input_length
475
+ first_char = input[i] #: String
476
+ pos = alphabet.index(first_char.ord)
477
+ raise InvalidInputError, "unable to unhash" unless pos
478
+
479
+ # Calculate this digit's contribution: position_in_alphabet × base^exponent
480
+ exponent = input_length - i - 1
481
+ multiplier = alphabet_length**exponent #: Integer
482
+ num += pos * multiplier
483
+ i += 1
484
+ end
485
+
486
+ num
487
+ end
488
+
489
+ # Check if the encoded string contains any blocklisted words.
490
+ #
491
+ # Determines if blocklist checking should be performed based on mode and ID length
492
+ #
493
+ # @param encoded_string [String] The encoded ID to check
494
+ # @return [Boolean] True if blocklist should be checked
495
+ #
496
+ # @rbs (String encoded_string) -> bool
497
+ def check_blocklist?(encoded_string)
498
+ return false if !blocklist || blocklist.empty?
499
+
500
+ case @blocklist_mode
501
+ when :always
502
+ true
503
+ when :length_threshold
504
+ encoded_string.length <= @blocklist_max_length
505
+ when :raise_if_likely
506
+ # This mode raises at configuration time, so if we get here, we check
507
+ true
508
+ else
509
+ true
510
+ end
511
+ end
512
+
513
+ # @param encoded_string [String] The encoded hash to check
514
+ # @return [String, false] The blocklisted word if found, false otherwise
515
+ #
516
+ # @rbs (String encoded_string) -> (String | false)
517
+ def contains_blocklisted_word?(encoded_string)
518
+ return false if !blocklist || blocklist.empty?
519
+
520
+ blocked_word = blocklist.blocks?(encoded_string)
521
+ return blocked_word if blocked_word
522
+
523
+ false
524
+ end
525
+ end
526
+ end
527
+ end