encoded_id 1.0.0.rc4 → 1.0.0.rc6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +84 -1
- data/LICENSE.txt +1 -1
- data/README.md +79 -286
- data/context/encoded_id.md +283 -0
- data/lib/encoded_id/alphabet.rb +36 -3
- data/lib/encoded_id/blocklist.rb +90 -0
- data/lib/encoded_id/encoders/base.rb +71 -0
- data/lib/encoded_id/encoders/hash_id.rb +531 -0
- data/lib/encoded_id/encoders/hash_id_consistent_shuffle.rb +110 -0
- data/lib/encoded_id/encoders/hash_id_ordinal_alphabet_separator_guards.rb +270 -0
- data/lib/encoded_id/encoders/hash_id_salt.rb +51 -0
- data/lib/encoded_id/encoders/my_sqids.rb +465 -0
- data/lib/encoded_id/encoders/sqids.rb +42 -0
- data/lib/encoded_id/hex_representation.rb +23 -5
- data/lib/encoded_id/reversible_id.rb +110 -26
- data/lib/encoded_id/version.rb +4 -1
- data/lib/encoded_id.rb +41 -0
- metadata +17 -31
- data/Gemfile +0 -24
- data/Rakefile +0 -14
- data/Steepfile +0 -5
- data/rbs_collection.yaml +0 -24
- data/sig/encoded_id.rbs +0 -117
- data/sig/hash_ids.rbs +0 -70
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This implementation based on https://github.com/peterhellberg/hashids.rb
|
|
4
|
+
# --------------------------------------------------------------------------
|
|
5
|
+
# Original Hashids implementation is MIT licensed:
|
|
6
|
+
#
|
|
7
|
+
# Copyright (c) 2013-2017 Peter Hellberg
|
|
8
|
+
#
|
|
9
|
+
# MIT License
|
|
10
|
+
#
|
|
11
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
12
|
+
# a copy of this software and associated documentation files (the
|
|
13
|
+
# "Software"), to deal in the Software without restriction, including
|
|
14
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
15
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
16
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
17
|
+
# the following conditions:
|
|
18
|
+
#
|
|
19
|
+
# The above copyright notice and this permission notice shall be
|
|
20
|
+
# included in all copies or substantial portions of the Software.
|
|
21
|
+
#
|
|
22
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
23
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
24
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
25
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
26
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
27
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
28
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
29
|
+
# --------------------------------------------------------------------------
|
|
30
|
+
#
|
|
31
|
+
# This version also MIT licensed (Stephen Ierodiaconou 2023-2025):
|
|
32
|
+
# see LICENSE.txt file
|
|
33
|
+
# rbs_inline: enabled
|
|
34
|
+
|
|
35
|
+
# == HashID Algorithm Overview
|
|
36
|
+
#
|
|
37
|
+
# Hashids is a small library that generates short, unique, non-sequential IDs from numbers.
|
|
38
|
+
# The algorithm has several key properties:
|
|
39
|
+
#
|
|
40
|
+
# 1. **Deterministic**: Same input numbers always produce the same hash
|
|
41
|
+
# 2. **Reversible**: You can decode the hash back to the original numbers
|
|
42
|
+
# 3. **Non-sequential**: Sequential numbers don't produce sequential hashes
|
|
43
|
+
# 4. **Customizable**: Uses a salt, minimum length, alphabet, and optional blocklist
|
|
44
|
+
#
|
|
45
|
+
# === Core Algorithm Concepts:
|
|
46
|
+
#
|
|
47
|
+
# The algorithm works by:
|
|
48
|
+
# - Converting each integer to a custom base-N representation using a shuffled alphabet
|
|
49
|
+
# - The alphabet permutation is deterministic based on a "lottery" character and salt
|
|
50
|
+
# - A lottery character is chosen based on a hash of the input numbers
|
|
51
|
+
# - Each number is encoded with a different alphabet permutation (for obfuscation)
|
|
52
|
+
# - Separators divide encoded numbers, and guards are added for minimum length
|
|
53
|
+
# - The decode process reverses this by extracting the lottery, splitting on separators,
|
|
54
|
+
# and converting each segment back from the custom base-N representation
|
|
55
|
+
#
|
|
56
|
+
# === Character Sets:
|
|
57
|
+
#
|
|
58
|
+
# - **Alphabet**: Main characters used to encode numbers (after setup, doesn't include separators/guards)
|
|
59
|
+
# - **Separators**: Characters that separate encoded number segments within a hash
|
|
60
|
+
# - **Guards**: Special characters added at boundaries to meet minimum length requirements
|
|
61
|
+
# - All three sets are disjoint (no overlap) after initialization
|
|
62
|
+
#
|
|
63
|
+
# === Why This Design?
|
|
64
|
+
#
|
|
65
|
+
# The shuffling and lottery system ensures that:
|
|
66
|
+
# - Similar numbers produce very different hashes (no sequential patterns)
|
|
67
|
+
# - Each position in a multi-number sequence uses a different encoding
|
|
68
|
+
# - The hash obfuscates the inputs if the salt is unknown
|
|
69
|
+
# - The same numbers always produce the same hash (deterministic)
|
|
70
|
+
|
|
71
|
+
module EncodedId
|
|
72
|
+
module Encoders
|
|
73
|
+
class HashId < Base
|
|
74
|
+
# @rbs @separators_and_guards: HashIdOrdinalAlphabetSeparatorGuards
|
|
75
|
+
# @rbs @alphabet_ordinals: Array[Integer]
|
|
76
|
+
# @rbs @separator_ordinals: Array[Integer]
|
|
77
|
+
# @rbs @guard_ordinals: Array[Integer]
|
|
78
|
+
# @rbs @salt_ordinals: Array[Integer]
|
|
79
|
+
# @rbs @escaped_separator_selector: String
|
|
80
|
+
# @rbs @escaped_guards_selector: String
|
|
81
|
+
|
|
82
|
+
# Initialize a new HashId encoder with custom parameters.
|
|
83
|
+
#
|
|
84
|
+
# The initialization process sets up the character sets (alphabet, separators, guards)
|
|
85
|
+
# that will be used for encoding and decoding. These character sets are:
|
|
86
|
+
# 1. Shuffled based on the salt for uniqueness
|
|
87
|
+
# 2. Balanced in ratios (alphabet:separators ≈ 3.5:1, alphabet:guards ≈ 12:1)
|
|
88
|
+
# 3. Made disjoint (no character appears in multiple sets)
|
|
89
|
+
#
|
|
90
|
+
# @param salt [String] Secret salt used to shuffle the alphabet (empty string is valid)
|
|
91
|
+
# @param min_hash_length [Integer] Minimum length of generated hashes (0 for no minimum)
|
|
92
|
+
# @param alphabet [Alphabet] Character set to use for encoding
|
|
93
|
+
# @param blocklist [Blocklist?] Optional list of words that shouldn't appear in hashes
|
|
94
|
+
#
|
|
95
|
+
# @rbs (String salt, ?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist? blocklist) -> void
|
|
96
|
+
def initialize(salt, min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = nil)
|
|
97
|
+
super
|
|
98
|
+
|
|
99
|
+
unless min_hash_length.is_a?(Integer) && min_hash_length >= 0
|
|
100
|
+
raise ArgumentError, "The min length must be a Integer and greater than or equal to 0"
|
|
101
|
+
end
|
|
102
|
+
@min_hash_length = min_hash_length
|
|
103
|
+
|
|
104
|
+
@separators_and_guards = HashIdOrdinalAlphabetSeparatorGuards.new(alphabet, salt)
|
|
105
|
+
@alphabet_ordinals = @separators_and_guards.alphabet
|
|
106
|
+
@separator_ordinals = @separators_and_guards.seps
|
|
107
|
+
@guard_ordinals = @separators_and_guards.guards
|
|
108
|
+
@salt_ordinals = @separators_and_guards.salt
|
|
109
|
+
|
|
110
|
+
# Pre-compute escaped versions for use with String#tr during decoding.
|
|
111
|
+
# This escapes special regex characters like '-', '\\', and '^' for safe use in tr().
|
|
112
|
+
@escaped_separator_selector = @separators_and_guards.seps_tr_selector
|
|
113
|
+
@escaped_guards_selector = @separators_and_guards.guards_tr_selector
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
attr_reader :alphabet_ordinals #: Array[Integer]
|
|
117
|
+
attr_reader :separator_ordinals #: Array[Integer]
|
|
118
|
+
attr_reader :guard_ordinals #: Array[Integer]
|
|
119
|
+
attr_reader :salt_ordinals #: Array[Integer]
|
|
120
|
+
|
|
121
|
+
# Encode an array of non-negative integers into a hash string.
|
|
122
|
+
#
|
|
123
|
+
# The encoding process:
|
|
124
|
+
# 1. Validates all numbers are integers and non-negative
|
|
125
|
+
# 2. Calculates a "lottery" character based on the input numbers
|
|
126
|
+
# 3. For each number, shuffles the alphabet and encodes the number in that custom base
|
|
127
|
+
# 4. Inserts separator characters between encoded numbers
|
|
128
|
+
# 5. Adds guards and padding if needed to meet minimum length
|
|
129
|
+
# 6. Validates the result doesn't contain blocklisted words
|
|
130
|
+
#
|
|
131
|
+
# @param numbers [Array<Integer>] Array of non-negative integers to encode
|
|
132
|
+
# @return [String] The encoded hash string (empty if input is empty or contains negatives)
|
|
133
|
+
# @raise [BlocklistError] If the generated hash contains a blocklisted word
|
|
134
|
+
#
|
|
135
|
+
# @rbs (Array[Integer] numbers) -> String
|
|
136
|
+
def encode(numbers)
|
|
137
|
+
numbers.all? { |n| Integer(n) } # raises if conversion fails
|
|
138
|
+
|
|
139
|
+
return "" if numbers.empty? || numbers.any? { |n| n < 0 }
|
|
140
|
+
|
|
141
|
+
encoded = internal_encode(numbers)
|
|
142
|
+
if blocklist && !blocklist.empty?
|
|
143
|
+
blocked_word = contains_blocklisted_word?(encoded)
|
|
144
|
+
if blocked_word
|
|
145
|
+
raise EncodedId::BlocklistError, "Generated ID contains blocklisted word: '#{blocked_word}'"
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
encoded
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Decode a hash string back into an array of integers.
|
|
153
|
+
#
|
|
154
|
+
# The decoding process:
|
|
155
|
+
# 1. Removes guards by replacing them with spaces and splitting
|
|
156
|
+
# 2. Extracts the lottery character (first character after guard removal)
|
|
157
|
+
# 3. Splits on separators to get individual encoded number segments
|
|
158
|
+
# 4. For each segment, shuffles the alphabet the same way as encoding and decodes
|
|
159
|
+
# 5. Verifies by re-encoding the result and comparing to the original hash
|
|
160
|
+
#
|
|
161
|
+
# This verification step is critical for valid decoding: it ensures that random strings
|
|
162
|
+
# won't decode to valid numbers. Only properly encoded hashes will pass.
|
|
163
|
+
#
|
|
164
|
+
# @param hash [String] The hash string to decode
|
|
165
|
+
# @return [Array<Integer>] Array of decoded integers (empty if hash is invalid)
|
|
166
|
+
#
|
|
167
|
+
# @rbs (String hash) -> Array[Integer]
|
|
168
|
+
def decode(hash)
|
|
169
|
+
return [] if hash.nil? || hash.empty?
|
|
170
|
+
|
|
171
|
+
internal_decode(hash)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Decode a hash that was encoded from hexadecimal numbers.
|
|
175
|
+
#
|
|
176
|
+
# This is a specialized variant for hashes created from hex strings.
|
|
177
|
+
# It decodes the hash to integers, then converts each integer back to hex
|
|
178
|
+
# (skipping the leading '1' that was added during hex encoding).
|
|
179
|
+
#
|
|
180
|
+
# @param hash [String] The hash string to decode
|
|
181
|
+
# @return [String] The original hexadecimal string (uppercase)
|
|
182
|
+
#
|
|
183
|
+
# @rbs (String hash) -> String
|
|
184
|
+
def decode_hex(hash)
|
|
185
|
+
numbers = decode(hash)
|
|
186
|
+
|
|
187
|
+
ret = numbers.map do |n|
|
|
188
|
+
n.to_s(16)[1..]
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
ret.join.upcase
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
private
|
|
195
|
+
|
|
196
|
+
# Internal encoding implementation - converts numbers to a hash string.
|
|
197
|
+
#
|
|
198
|
+
# Algorithm steps:
|
|
199
|
+
#
|
|
200
|
+
# Step 1: Calculate the "lottery" character
|
|
201
|
+
# - Create a hash_int from the input numbers (weighted sum: num % (index + 100))
|
|
202
|
+
# - Use hash_int to pick a lottery character from the alphabet
|
|
203
|
+
# - The lottery becomes the first character and seeds all alphabet shuffles
|
|
204
|
+
#
|
|
205
|
+
# Step 2: Encode each number
|
|
206
|
+
# - For each number:
|
|
207
|
+
# a. Shuffle alphabet using (lottery + salt) as the shuffle key
|
|
208
|
+
# b. Convert number to custom base-N using shuffled alphabet (via hash_one_number)
|
|
209
|
+
# c. Insert a separator character between numbers (chosen deterministically)
|
|
210
|
+
# - Each number gets a different alphabet permutation due to the shuffle
|
|
211
|
+
#
|
|
212
|
+
# Step 3: Add guards if below minimum length
|
|
213
|
+
# - Guards are special boundary characters that don't encode data
|
|
214
|
+
# - First guard is prepended based on (hash_int + first_char)
|
|
215
|
+
# - Second guard is appended based on (hash_int + third_char)
|
|
216
|
+
#
|
|
217
|
+
# Step 4: Pad with alphabet if still below minimum length
|
|
218
|
+
# - Shuffle the alphabet using itself as the key
|
|
219
|
+
# - Wrap the hash with the shuffled alphabet (second half + hash + first half)
|
|
220
|
+
# - Trim excess from the middle if we overshoot the target length
|
|
221
|
+
#
|
|
222
|
+
# The result is a string where:
|
|
223
|
+
# - Structure: [guard?] lottery encoded_num1 sep encoded_num2 sep ... [guard?] [padding?]
|
|
224
|
+
# - Each component is deterministic based on the input numbers and salt
|
|
225
|
+
# - Similar inputs produce very different outputs due to the lottery system
|
|
226
|
+
#
|
|
227
|
+
# @param numbers [Array<Integer>] Non-negative integers to encode
|
|
228
|
+
# @return [String] The encoded hash string
|
|
229
|
+
#
|
|
230
|
+
# @rbs (Array[Integer] numbers) -> String
|
|
231
|
+
def internal_encode(numbers)
|
|
232
|
+
current_alphabet = @alphabet_ordinals.dup
|
|
233
|
+
separator_ordinals = @separator_ordinals
|
|
234
|
+
guard_ordinals = @guard_ordinals
|
|
235
|
+
|
|
236
|
+
alphabet_length = current_alphabet.length
|
|
237
|
+
length = numbers.length
|
|
238
|
+
|
|
239
|
+
# Step 1: Calculate lottery character using a weighted hash of all input numbers.
|
|
240
|
+
# The modulo (i + 100) ensures different positions contribute differently to the hash.
|
|
241
|
+
# We use a manual loop instead of Array#sum to avoid extra array allocation.
|
|
242
|
+
hash_int = 0
|
|
243
|
+
i = 0
|
|
244
|
+
while i < length
|
|
245
|
+
hash_int += numbers[i] % (i + 100)
|
|
246
|
+
i += 1
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# The lottery character is chosen deterministically from the alphabet.
|
|
250
|
+
# This becomes the first character of the hash AND the seed for all shuffles.
|
|
251
|
+
lottery = current_alphabet[hash_int % alphabet_length]
|
|
252
|
+
|
|
253
|
+
# This array will hold the final hash as character ordinals (codepoints).
|
|
254
|
+
# Start with the lottery character.
|
|
255
|
+
# @type var hashid_code: Array[Integer]
|
|
256
|
+
hashid_code = []
|
|
257
|
+
hashid_code << lottery
|
|
258
|
+
|
|
259
|
+
# The "seasoning" is the shuffle key: lottery + salt.
|
|
260
|
+
# This same seasoning will be used to shuffle the alphabet for each number.
|
|
261
|
+
seasoning = [lottery].concat(@salt_ordinals)
|
|
262
|
+
|
|
263
|
+
# Reusable buffer for the pre-shuffle alphabet state to avoid allocations in the loop.
|
|
264
|
+
alphabet_buffer = current_alphabet.dup
|
|
265
|
+
|
|
266
|
+
# Step 2: Encode each number with its own alphabet permutation.
|
|
267
|
+
i = 0
|
|
268
|
+
while i < length
|
|
269
|
+
num = numbers[i]
|
|
270
|
+
|
|
271
|
+
# Shuffle the alphabet using the seasoning. This is deterministic but produces
|
|
272
|
+
# a different permutation than the original alphabet. Since we reshuffle on each
|
|
273
|
+
# iteration with the same key, we need to pass the pre-shuffle state as salt_part_2.
|
|
274
|
+
alphabet_buffer.replace(current_alphabet)
|
|
275
|
+
consistent_shuffle!(current_alphabet, seasoning, alphabet_buffer, alphabet_length)
|
|
276
|
+
|
|
277
|
+
# Convert this number to base-N using the current shuffled alphabet.
|
|
278
|
+
# Returns the last character added (used for separator selection).
|
|
279
|
+
last_char_ord = hash_one_number(hashid_code, num, current_alphabet, alphabet_length)
|
|
280
|
+
|
|
281
|
+
# Add a separator between numbers (but not after the last number).
|
|
282
|
+
# The separator is chosen deterministically based on the encoded number and position.
|
|
283
|
+
if (i + 1) < length
|
|
284
|
+
num %= (last_char_ord + i)
|
|
285
|
+
hashid_code << separator_ordinals[num % separator_ordinals.length]
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
i += 1
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# Step 3: Add guards if we're below the minimum length.
|
|
292
|
+
# Guards are boundary markers chosen deterministically from the guard set.
|
|
293
|
+
if hashid_code.length < @min_hash_length
|
|
294
|
+
# Prepend first guard based on hash_int and the lottery character.
|
|
295
|
+
first_char = hashid_code[0] #: Integer
|
|
296
|
+
hashid_code.prepend(guard_ordinals[(hash_int + first_char) % guard_ordinals.length])
|
|
297
|
+
|
|
298
|
+
# If still too short, append second guard based on hash_int and third character.
|
|
299
|
+
if hashid_code.length < @min_hash_length
|
|
300
|
+
# At this point hashid_code has at least 2 elements (lottery + guard), check for 3rd
|
|
301
|
+
third_char = hashid_code[2]
|
|
302
|
+
hashid_code << if third_char
|
|
303
|
+
guard_ordinals[(hash_int + third_char) % guard_ordinals.length]
|
|
304
|
+
else
|
|
305
|
+
# If no third character exists, use 0 as default
|
|
306
|
+
guard_ordinals[hash_int % guard_ordinals.length]
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
# Step 4: Pad with shuffled alphabet if still below minimum length.
|
|
312
|
+
half_length = current_alphabet.length.div(2)
|
|
313
|
+
|
|
314
|
+
while hashid_code.length < @min_hash_length
|
|
315
|
+
# Shuffle the alphabet using itself as the key (creates a new permutation).
|
|
316
|
+
consistent_shuffle!(current_alphabet, current_alphabet.dup, nil, current_alphabet.length)
|
|
317
|
+
|
|
318
|
+
# Wrap the hash: second_half + hash + first_half
|
|
319
|
+
second_half = current_alphabet[half_length..] #: Array[Integer]
|
|
320
|
+
first_half = current_alphabet[0, half_length] #: Array[Integer]
|
|
321
|
+
hashid_code.prepend(*second_half)
|
|
322
|
+
hashid_code.concat(first_half)
|
|
323
|
+
|
|
324
|
+
# If we've overshot the target, trim excess from the middle.
|
|
325
|
+
excess = hashid_code.length - @min_hash_length
|
|
326
|
+
if excess > 0
|
|
327
|
+
hashid_code = hashid_code[excess / 2, @min_hash_length] #: Array[Integer]
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Convert the array of character ordinals to a UTF-8 string.
|
|
332
|
+
hashid_code.pack("U*")
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
# Internal decoding implementation - converts a hash string back to numbers.
|
|
336
|
+
#
|
|
337
|
+
# Algorithm steps:
|
|
338
|
+
#
|
|
339
|
+
# Step 1: Remove guards
|
|
340
|
+
# - Replace all guard characters with spaces and split
|
|
341
|
+
# - Guards can appear at positions [0] or [0] and [-1]
|
|
342
|
+
# - If array has 2 or 3 elements, the middle one contains the actual hash
|
|
343
|
+
# - Otherwise, element [0] contains the hash
|
|
344
|
+
#
|
|
345
|
+
# Step 2: Extract lottery and split on separators
|
|
346
|
+
# - First character is the lottery (same as during encoding)
|
|
347
|
+
# - Replace separator characters with spaces and split
|
|
348
|
+
# - Each segment is an encoded number
|
|
349
|
+
#
|
|
350
|
+
# Step 3: Decode each number
|
|
351
|
+
# - For each segment:
|
|
352
|
+
# a. Shuffle alphabet using (lottery + salt) - same as encoding
|
|
353
|
+
# b. Convert from custom base-N back to integer (via unhash)
|
|
354
|
+
# - The alphabet shuffles must match the encoding shuffles exactly
|
|
355
|
+
#
|
|
356
|
+
# Step 4: Verify the result
|
|
357
|
+
# - Re-encode the decoded numbers and compare to original hash
|
|
358
|
+
# - If they don't match, return empty array
|
|
359
|
+
# - This prevents random strings from decoding to valid numbers
|
|
360
|
+
#
|
|
361
|
+
# @param hash [String] The hash string to decode
|
|
362
|
+
# @return [Array<Integer>] Decoded integers (empty if hash is invalid)
|
|
363
|
+
#
|
|
364
|
+
# @rbs (String hash) -> Array[Integer]
|
|
365
|
+
def internal_decode(hash)
|
|
366
|
+
# @type var ret: Array[Integer]
|
|
367
|
+
ret = []
|
|
368
|
+
current_alphabet = @alphabet_ordinals.dup
|
|
369
|
+
salt_ordinals = @salt_ordinals
|
|
370
|
+
|
|
371
|
+
# Step 1: Remove guards by replacing them with spaces and splitting.
|
|
372
|
+
# This separates the actual hash from any guard characters that were added.
|
|
373
|
+
breakdown = hash.tr(@escaped_guards_selector, " ")
|
|
374
|
+
array = breakdown.split(" ")
|
|
375
|
+
|
|
376
|
+
# If guards were present, the hash will be in the middle segment.
|
|
377
|
+
# - Length 1: No guards, hash is at [0]
|
|
378
|
+
# - Length 2: One guard, hash is at [1]
|
|
379
|
+
# - Length 3: Two guards, hash is at [1]
|
|
380
|
+
i = [3, 2].include?(array.length) ? 1 : 0
|
|
381
|
+
|
|
382
|
+
if (breakdown = array[i])
|
|
383
|
+
# Step 2: Extract the lottery character (first char) and the rest.
|
|
384
|
+
# Check if breakdown is not empty
|
|
385
|
+
lottery = breakdown[0] #: String
|
|
386
|
+
remainder = breakdown[1..] || "" #: String
|
|
387
|
+
|
|
388
|
+
# Replace separator characters with spaces and split to get individual encoded numbers.
|
|
389
|
+
remainder.tr!(@escaped_separator_selector, " ")
|
|
390
|
+
sub_hashes = remainder.split(" ")
|
|
391
|
+
|
|
392
|
+
# Create the same seasoning used during encoding: lottery + salt.
|
|
393
|
+
seasoning = [lottery.ord].concat(salt_ordinals)
|
|
394
|
+
|
|
395
|
+
# Step 3: Decode each number segment.
|
|
396
|
+
len = sub_hashes.length
|
|
397
|
+
time = 0
|
|
398
|
+
while time < len
|
|
399
|
+
sub_hash = sub_hashes[time]
|
|
400
|
+
|
|
401
|
+
# Shuffle the alphabet exactly as we did during encoding.
|
|
402
|
+
# This must produce the same permutation to correctly decode.
|
|
403
|
+
consistent_shuffle!(current_alphabet, seasoning, current_alphabet.dup, current_alphabet.length)
|
|
404
|
+
|
|
405
|
+
# Convert this segment from base-N back to an integer.
|
|
406
|
+
ret.push unhash(sub_hash, current_alphabet)
|
|
407
|
+
time += 1
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
# Step 4: Verify by re-encoding and comparing.
|
|
411
|
+
# This is critical for validity: it ensures only valid hashes decode successfully.
|
|
412
|
+
# Random strings will fail this check and return an empty array.
|
|
413
|
+
if encode(ret) != hash
|
|
414
|
+
# @type var ret: Array[Integer]
|
|
415
|
+
ret = []
|
|
416
|
+
end
|
|
417
|
+
end
|
|
418
|
+
|
|
419
|
+
ret
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
# Convert a single integer to its representation in a custom base-N system.
|
|
423
|
+
#
|
|
424
|
+
# This is similar to converting a decimal number to binary, hex, etc., but:
|
|
425
|
+
# - Uses a custom alphabet instead of 0-9 or 0-9A-F
|
|
426
|
+
# - The alphabet can be any length (base-N where N = alphabet.length)
|
|
427
|
+
# - Characters are inserted in reverse order (most significant digit last)
|
|
428
|
+
#
|
|
429
|
+
# Example: Converting 123 to base-10 with alphabet ['a','b','c','d','e','f','g','h','i','j']
|
|
430
|
+
# - 123 % 10 = 3 → 'd' (index 3)
|
|
431
|
+
# - 12 % 10 = 2 → 'c' (index 2)
|
|
432
|
+
# - 1 % 10 = 1 → 'b' (index 1)
|
|
433
|
+
# - Result: "bcd" (but inserted in reverse, so appears as "bcd" in hash_code)
|
|
434
|
+
#
|
|
435
|
+
# @param hash_code [Array<Integer>] The array to append characters to (modified in place)
|
|
436
|
+
# @param num [Integer] The number to convert
|
|
437
|
+
# @param alphabet [Array<Integer>] The alphabet ordinals to use for encoding
|
|
438
|
+
# @param alphabet_length [Integer] Length of the alphabet (cached for performance)
|
|
439
|
+
# @return [Integer] The ordinal of the last character added
|
|
440
|
+
#
|
|
441
|
+
# @rbs (Array[Integer] hash_code, Integer num, Array[Integer] alphabet, Integer alphabet_length) -> Integer
|
|
442
|
+
def hash_one_number(hash_code, num, alphabet, alphabet_length)
|
|
443
|
+
char = 0 #: Integer
|
|
444
|
+
insert_at = 0
|
|
445
|
+
|
|
446
|
+
# Convert number to base-N by repeatedly dividing by alphabet_length.
|
|
447
|
+
# Insert characters at the end (using negative index) so they appear in correct order.
|
|
448
|
+
while true # standard:disable Style/InfiniteLoop
|
|
449
|
+
char = alphabet[num % alphabet_length] || 0
|
|
450
|
+
insert_at -= 1
|
|
451
|
+
hash_code.insert(insert_at, char)
|
|
452
|
+
num /= alphabet_length
|
|
453
|
+
break unless num > 0
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Return the last character added (used for separator selection).
|
|
457
|
+
char
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
# Convert a custom base-N encoded string back to an integer.
|
|
461
|
+
#
|
|
462
|
+
# This is the inverse of hash_one_number. It treats the input string as a number
|
|
463
|
+
# in a custom base where each character's position in the alphabet represents its digit value.
|
|
464
|
+
#
|
|
465
|
+
# Example: Decoding "bcd" with alphabet ['a','b','c','d','e','f','g','h','i','j'] (base-10)
|
|
466
|
+
# - 'b' at position 1: 1 × 10² = 100
|
|
467
|
+
# - 'c' at position 2: 2 × 10¹ = 20
|
|
468
|
+
# - 'd' at position 3: 3 × 10⁰ = 3
|
|
469
|
+
# - Result: 100 + 20 + 3 = 123
|
|
470
|
+
#
|
|
471
|
+
# @param input [String] The encoded string to decode
|
|
472
|
+
# @param alphabet [Array<Integer>] The alphabet ordinals used for encoding
|
|
473
|
+
# @return [Integer] The decoded number
|
|
474
|
+
# @raise [InvalidInputError] If input contains characters not in the alphabet
|
|
475
|
+
#
|
|
476
|
+
# @rbs (String input, Array[Integer] alphabet) -> Integer
|
|
477
|
+
def unhash(input, alphabet)
|
|
478
|
+
num = 0 #: Integer
|
|
479
|
+
input_length = input.length
|
|
480
|
+
alphabet_length = alphabet.length
|
|
481
|
+
i = 0
|
|
482
|
+
|
|
483
|
+
# Process each character from left to right (most significant to least).
|
|
484
|
+
while i < input_length
|
|
485
|
+
first_char = input[i] #: String
|
|
486
|
+
pos = alphabet.index(first_char.ord)
|
|
487
|
+
raise InvalidInputError, "unable to unhash" unless pos
|
|
488
|
+
|
|
489
|
+
# Calculate this digit's contribution: position_in_alphabet × base^exponent
|
|
490
|
+
exponent = input_length - i - 1
|
|
491
|
+
multiplier = alphabet_length**exponent #: Integer
|
|
492
|
+
num += pos * multiplier
|
|
493
|
+
i += 1
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
num
|
|
497
|
+
end
|
|
498
|
+
|
|
499
|
+
# Delegate to the consistent shuffle algorithm.
|
|
500
|
+
#
|
|
501
|
+
# This deterministic shuffle is the heart of the HashID algorithm's obfuscation.
|
|
502
|
+
# It ensures that the same salt always produces the same permutation of the alphabet.
|
|
503
|
+
#
|
|
504
|
+
# @param collection_to_shuffle [Array<Integer>] The array to shuffle (modified in place)
|
|
505
|
+
# @param salt_part_1 [Array<Integer>] First part of the salt (lottery + salt, or alphabet)
|
|
506
|
+
# @param salt_part_2 [Array<Integer>?] Second part of the salt (pre-shuffle alphabet copy)
|
|
507
|
+
# @param max_salt_length [Integer] Maximum length to use from combined salt
|
|
508
|
+
# @return [Array<Integer>] The shuffled array (same object as collection_to_shuffle)
|
|
509
|
+
#
|
|
510
|
+
# @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
|
|
511
|
+
def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
|
|
512
|
+
HashIdConsistentShuffle.shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
# Check if the encoded string contains any blocklisted words.
|
|
516
|
+
#
|
|
517
|
+
# @param encoded_string [String] The encoded hash to check
|
|
518
|
+
# @return [String, false] The blocklisted word if found, false otherwise
|
|
519
|
+
#
|
|
520
|
+
# @rbs (String encoded_string) -> (String | false)
|
|
521
|
+
def contains_blocklisted_word?(encoded_string)
|
|
522
|
+
return false unless @blocklist && !@blocklist.empty?
|
|
523
|
+
|
|
524
|
+
blocked_word = @blocklist.blocks?(encoded_string)
|
|
525
|
+
return blocked_word if blocked_word
|
|
526
|
+
|
|
527
|
+
false
|
|
528
|
+
end
|
|
529
|
+
end
|
|
530
|
+
end
|
|
531
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
module EncodedId
|
|
6
|
+
module Encoders
|
|
7
|
+
# Implements a deterministic, salt-based shuffle algorithm for HashIDs.
|
|
8
|
+
#
|
|
9
|
+
# This is the core obfuscation mechanism that makes HashIDs non-sequential.
|
|
10
|
+
# The algorithm has several critical properties:
|
|
11
|
+
#
|
|
12
|
+
# 1. **Deterministic**: Same input + same salt = same output (always)
|
|
13
|
+
# 2. **Reversible**: The shuffle can be undone if needed
|
|
14
|
+
# 3. **Salt-dependent**: Different salts produce different permutations
|
|
15
|
+
# 4. **Consistent**: Multiple calls with the same salt produce the same shuffle
|
|
16
|
+
#
|
|
17
|
+
# == Algorithm Overview:
|
|
18
|
+
#
|
|
19
|
+
# The shuffle works by:
|
|
20
|
+
# - Walking backwards through the collection (from last to second element)
|
|
21
|
+
# - For each position i, selecting a swap partner j using the salt
|
|
22
|
+
# - The swap position is calculated from: (salt_char + index + running_total) % i
|
|
23
|
+
# - Cycling through salt characters, wrapping when we reach the end
|
|
24
|
+
#
|
|
25
|
+
# This is similar to a Fisher-Yates shuffle, but with deterministic swap positions
|
|
26
|
+
# derived from the salt rather than random numbers.
|
|
27
|
+
#
|
|
28
|
+
# == Why Two Salt Parts?
|
|
29
|
+
#
|
|
30
|
+
# The algorithm accepts salt in two parts (salt_part_1 and salt_part_2) to support
|
|
31
|
+
# scenarios where the salt is constructed from multiple sources:
|
|
32
|
+
# - salt_part_1: Primary salt (e.g., lottery + user salt)
|
|
33
|
+
# - salt_part_2: Secondary salt (e.g., pre-shuffle alphabet copy)
|
|
34
|
+
#
|
|
35
|
+
# When cycling through salt characters, it reads from salt_part_1 first, then
|
|
36
|
+
# salt_part_2 if the index exceeds salt_part_1's length.
|
|
37
|
+
#
|
|
38
|
+
# == Example:
|
|
39
|
+
#
|
|
40
|
+
# Input: [1, 2, 3, 4], salt: [65, 66, 67] (ABC)
|
|
41
|
+
# Step 1: i=3, salt[0]=65, ord_total=0 → swap positions 3 and ((65+0+0)%3=2) → [1,2,4,3]
|
|
42
|
+
# Step 2: i=2, salt[1]=66, ord_total=65 → swap positions 2 and ((66+1+65)%2=0) → [4,2,1,3]
|
|
43
|
+
# Step 3: i=1, salt[2]=67, ord_total=131 → swap positions 1 and ((67+2+131)%1=0)→ [4,2,1,3]
|
|
44
|
+
# Result: [4, 2, 1, 3]
|
|
45
|
+
#
|
|
46
|
+
class HashIdConsistentShuffle
|
|
47
|
+
# Deterministically shuffle a collection based on a salt.
|
|
48
|
+
#
|
|
49
|
+
# Shuffles the collection in place using a salt-based algorithm that produces
|
|
50
|
+
# consistent results for the same inputs.
|
|
51
|
+
#
|
|
52
|
+
# @param collection_to_shuffle [Array<Integer>] Array to shuffle (modified in place)
|
|
53
|
+
# @param salt_part_1 [Array<Integer>] Primary salt characters (as ordinals)
|
|
54
|
+
# @param salt_part_2 [Array<Integer>?] Optional secondary salt characters
|
|
55
|
+
# @param max_salt_length [Integer] Maximum salt length to use (for cycling)
|
|
56
|
+
# @return [Array<Integer>] The shuffled array (same object as input)
|
|
57
|
+
# @raise [SaltError] If salt is too short or shuffle fails
|
|
58
|
+
#
|
|
59
|
+
# @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
|
|
60
|
+
def self.shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
|
|
61
|
+
salt_part_1_length = salt_part_1.length
|
|
62
|
+
|
|
63
|
+
# Validate we have enough salt. If max_salt_length exceeds salt_part_1,
|
|
64
|
+
# we need salt_part_2 to provide the additional characters.
|
|
65
|
+
raise SaltError, "Salt is too short in shuffle" if salt_part_1_length < max_salt_length && salt_part_2.nil?
|
|
66
|
+
|
|
67
|
+
# Short-circuit if there's nothing to shuffle.
|
|
68
|
+
return collection_to_shuffle if collection_to_shuffle.empty? || max_salt_length == 0 || salt_part_1.nil? || salt_part_1_length == 0
|
|
69
|
+
|
|
70
|
+
# idx: Current position in the salt (cycles through 0..max_salt_length-1)
|
|
71
|
+
# ord_total: Running sum of salt character ordinals (affects swap positions)
|
|
72
|
+
idx = ord_total = 0
|
|
73
|
+
|
|
74
|
+
# Walk backwards through the collection from last to second element.
|
|
75
|
+
# We don't shuffle the first element (i=0) because it has nowhere to swap to.
|
|
76
|
+
i = collection_to_shuffle.length - 1
|
|
77
|
+
while i >= 1
|
|
78
|
+
# Get the current salt character ordinal.
|
|
79
|
+
# If we've exceeded salt_part_1, read from salt_part_2.
|
|
80
|
+
n = if idx >= salt_part_1_length
|
|
81
|
+
raise SaltError, "Salt shuffle has failed" unless salt_part_2
|
|
82
|
+
|
|
83
|
+
salt_part_2[idx - salt_part_1_length]
|
|
84
|
+
else
|
|
85
|
+
salt_part_1[idx]
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Update running total with current salt character.
|
|
89
|
+
ord_total += n
|
|
90
|
+
|
|
91
|
+
# Calculate swap position deterministically from:
|
|
92
|
+
# - n: Current salt character ordinal
|
|
93
|
+
# - idx: Current position in salt
|
|
94
|
+
# - ord_total: Running sum of all salt characters used so far
|
|
95
|
+
# - i: Current position in collection (modulo to ensure valid index)
|
|
96
|
+
j = (n + idx + ord_total) % i
|
|
97
|
+
|
|
98
|
+
# Swap elements at positions i and j.
|
|
99
|
+
collection_to_shuffle[i], collection_to_shuffle[j] = collection_to_shuffle[j], collection_to_shuffle[i]
|
|
100
|
+
|
|
101
|
+
# Move to next salt character (wrapping around if needed).
|
|
102
|
+
idx = (idx + 1) % max_salt_length
|
|
103
|
+
i -= 1
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
collection_to_shuffle
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|