encoded_id 1.0.0.rc6 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +29 -4
- data/README.md +44 -33
- data/context/encoded_id.md +229 -75
- data/lib/encoded_id/alphabet.rb +2 -0
- data/lib/encoded_id/blocklist.rb +17 -7
- data/lib/encoded_id/encoders/base_configuration.rb +145 -0
- data/lib/encoded_id/encoders/{hash_id.rb → hashid.rb} +53 -57
- data/lib/encoded_id/encoders/hashid_configuration.rb +33 -0
- data/lib/encoded_id/encoders/{hash_id_consistent_shuffle.rb → hashid_consistent_shuffle.rb} +4 -4
- data/lib/encoded_id/encoders/{hash_id_ordinal_alphabet_separator_guards.rb → hashid_ordinal_alphabet_separator_guards.rb} +28 -54
- data/lib/encoded_id/encoders/{hash_id_salt.rb → hashid_salt.rb} +3 -3
- data/lib/encoded_id/encoders/my_sqids.rb +5 -16
- data/lib/encoded_id/encoders/sqids.rb +25 -11
- data/lib/encoded_id/encoders/sqids_configuration.rb +17 -0
- data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +52 -0
- data/lib/encoded_id/hex_representation.rb +6 -9
- data/lib/encoded_id/reversible_id.rb +56 -138
- data/lib/encoded_id/version.rb +1 -2
- data/lib/encoded_id.rb +16 -19
- metadata +27 -10
- data/lib/encoded_id/encoders/base.rb +0 -71
data/lib/encoded_id/blocklist.rb
CHANGED
|
@@ -3,23 +3,18 @@
|
|
|
3
3
|
# rbs_inline: enabled
|
|
4
4
|
|
|
5
5
|
module EncodedId
|
|
6
|
+
# A blocklist of words that should not appear in encoded IDs.
|
|
6
7
|
class Blocklist
|
|
7
8
|
include Enumerable #[String]
|
|
8
9
|
|
|
9
10
|
# @rbs @words: Set[String]
|
|
10
|
-
|
|
11
|
-
# Class instance variables for memoization
|
|
12
11
|
# @rbs self.@empty: Blocklist
|
|
13
12
|
# @rbs self.@minimal: Blocklist
|
|
14
13
|
|
|
15
14
|
class << self
|
|
16
15
|
# @rbs () -> Blocklist
|
|
17
16
|
def sqids_blocklist
|
|
18
|
-
|
|
19
|
-
new(::Sqids::DEFAULT_BLOCKLIST)
|
|
20
|
-
else
|
|
21
|
-
empty
|
|
22
|
-
end
|
|
17
|
+
new(::Sqids::DEFAULT_BLOCKLIST)
|
|
23
18
|
end
|
|
24
19
|
|
|
25
20
|
# @rbs () -> Blocklist
|
|
@@ -86,5 +81,20 @@ module EncodedId
|
|
|
86
81
|
def merge(other_blocklist)
|
|
87
82
|
self.class.new(to_a + other_blocklist.to_a)
|
|
88
83
|
end
|
|
84
|
+
|
|
85
|
+
# Filters the blocklist to only include words that can be formed from the given alphabet.
|
|
86
|
+
# Only keeps words where ALL characters exist in the alphabet (case-insensitive).
|
|
87
|
+
# Maintains minimum 3-character length requirement.
|
|
88
|
+
#
|
|
89
|
+
# @rbs (Alphabet | String alphabet) -> Blocklist
|
|
90
|
+
def filter_for_alphabet(alphabet)
|
|
91
|
+
alphabet_chars = Set.new(
|
|
92
|
+
alphabet.is_a?(Alphabet) ? alphabet.unique_characters : alphabet.to_s.chars
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
self.class.new(
|
|
96
|
+
@words.select { |word| word.length >= 3 && word.chars.to_set.subset?(alphabet_chars) }
|
|
97
|
+
)
|
|
98
|
+
end
|
|
89
99
|
end
|
|
90
100
|
end
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
module EncodedId
|
|
6
|
+
module Encoders
|
|
7
|
+
# Base configuration class for encoder-specific settings
|
|
8
|
+
# This provides common parameters shared across all encoders
|
|
9
|
+
class BaseConfiguration
|
|
10
|
+
attr_reader :min_length #: Integer
|
|
11
|
+
attr_reader :alphabet #: Alphabet
|
|
12
|
+
attr_reader :split_at #: Integer?
|
|
13
|
+
attr_reader :split_with #: String?
|
|
14
|
+
attr_reader :hex_digit_encoding_group_size #: Integer
|
|
15
|
+
attr_reader :max_length #: Integer?
|
|
16
|
+
attr_reader :max_inputs_per_id #: Integer
|
|
17
|
+
attr_reader :blocklist #: Blocklist
|
|
18
|
+
attr_reader :blocklist_mode #: Symbol
|
|
19
|
+
attr_reader :blocklist_max_length #: Integer
|
|
20
|
+
|
|
21
|
+
# @rbs (?min_length: Integer, ?alphabet: Alphabet, ?split_at: Integer?, ?split_with: String?, ?hex_digit_encoding_group_size: Integer, ?max_length: Integer?, ?max_inputs_per_id: Integer, ?blocklist: Blocklist | Array[String] | Set[String] | nil, ?blocklist_mode: Symbol, ?blocklist_max_length: Integer) -> void
|
|
22
|
+
def initialize(
|
|
23
|
+
min_length: 8,
|
|
24
|
+
alphabet: Alphabet.modified_crockford,
|
|
25
|
+
split_at: 4,
|
|
26
|
+
split_with: "-",
|
|
27
|
+
hex_digit_encoding_group_size: 4,
|
|
28
|
+
max_length: 128,
|
|
29
|
+
max_inputs_per_id: 32,
|
|
30
|
+
blocklist: Blocklist.empty,
|
|
31
|
+
blocklist_mode: :length_threshold,
|
|
32
|
+
blocklist_max_length: 32
|
|
33
|
+
)
|
|
34
|
+
@min_length = validate_min_length(min_length)
|
|
35
|
+
@alphabet = validate_alphabet(alphabet)
|
|
36
|
+
@split_at = validate_split_at(split_at)
|
|
37
|
+
@split_with = validate_split_with(split_with, @alphabet)
|
|
38
|
+
@hex_digit_encoding_group_size = hex_digit_encoding_group_size
|
|
39
|
+
@max_length = validate_max_length(max_length)
|
|
40
|
+
@max_inputs_per_id = validate_max_inputs_per_id(max_inputs_per_id)
|
|
41
|
+
@blocklist = validate_blocklist(blocklist)
|
|
42
|
+
@blocklist = @blocklist.filter_for_alphabet(@alphabet) unless @blocklist.empty?
|
|
43
|
+
@blocklist_mode = validate_blocklist_mode(blocklist_mode)
|
|
44
|
+
@blocklist_max_length = validate_blocklist_max_length(blocklist_max_length)
|
|
45
|
+
validate_blocklist_collision_risk
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# @rbs () -> (Hashid | Sqids)
|
|
49
|
+
def create_encoder
|
|
50
|
+
raise NotImplementedError, "Subclasses must implement create_encoder"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
private
|
|
54
|
+
|
|
55
|
+
# @rbs (Alphabet alphabet) -> Alphabet
|
|
56
|
+
def validate_alphabet(alphabet)
|
|
57
|
+
return alphabet if alphabet.is_a?(Alphabet)
|
|
58
|
+
raise InvalidAlphabetError, "alphabet must be an instance of Alphabet"
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# @rbs (Integer min_length) -> Integer
|
|
62
|
+
def validate_min_length(min_length)
|
|
63
|
+
return min_length if valid_integer_option?(min_length)
|
|
64
|
+
raise InvalidConfigurationError, "min_length must be an integer greater than 0"
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# @rbs (Integer? max_length) -> Integer?
|
|
68
|
+
def validate_max_length(max_length)
|
|
69
|
+
return max_length if valid_integer_option?(max_length) || max_length.nil?
|
|
70
|
+
raise InvalidConfigurationError, "max_length must be an integer greater than 0 or nil"
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# @rbs (Integer max_inputs_per_id) -> Integer
|
|
74
|
+
def validate_max_inputs_per_id(max_inputs_per_id)
|
|
75
|
+
return max_inputs_per_id if valid_integer_option?(max_inputs_per_id)
|
|
76
|
+
raise InvalidConfigurationError, "max_inputs_per_id must be an integer greater than 0"
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# @rbs (Integer? split_at) -> Integer?
|
|
80
|
+
def validate_split_at(split_at)
|
|
81
|
+
return split_at if valid_integer_option?(split_at) || split_at.nil?
|
|
82
|
+
raise InvalidConfigurationError, "split_at must be an integer greater than 0 or nil"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# @rbs (String? split_with, Alphabet alphabet) -> String?
|
|
86
|
+
def validate_split_with(split_with, alphabet)
|
|
87
|
+
return split_with if split_with.nil? || (split_with.is_a?(String) && !alphabet.characters.include?(split_with))
|
|
88
|
+
raise InvalidConfigurationError, "split_with must be a string not part of the alphabet, or nil"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# @rbs (Integer? value) -> bool
|
|
92
|
+
def valid_integer_option?(value)
|
|
93
|
+
value.is_a?(Integer) && value > 0
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# @rbs (Blocklist | Array[String] | Set[String] | nil blocklist) -> Blocklist
|
|
97
|
+
def validate_blocklist(blocklist)
|
|
98
|
+
return blocklist if blocklist.is_a?(Blocklist)
|
|
99
|
+
return Blocklist.empty if blocklist.nil?
|
|
100
|
+
return Blocklist.new(blocklist) if blocklist.is_a?(Array) || blocklist.is_a?(Set)
|
|
101
|
+
|
|
102
|
+
raise InvalidConfigurationError, "blocklist must be a Blocklist, Set, or Array of strings"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# @rbs (Symbol blocklist_mode) -> Symbol
|
|
106
|
+
def validate_blocklist_mode(blocklist_mode)
|
|
107
|
+
valid_modes = [:always, :length_threshold, :raise_if_likely]
|
|
108
|
+
return blocklist_mode if valid_modes.include?(blocklist_mode)
|
|
109
|
+
|
|
110
|
+
raise InvalidConfigurationError, "blocklist_mode must be one of #{valid_modes.inspect}, got #{blocklist_mode.inspect}"
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# @rbs (Integer blocklist_max_length) -> Integer
|
|
114
|
+
def validate_blocklist_max_length(blocklist_max_length)
|
|
115
|
+
return blocklist_max_length if valid_integer_option?(blocklist_max_length)
|
|
116
|
+
|
|
117
|
+
raise InvalidConfigurationError, "blocklist_max_length must be an integer greater than 0"
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# Validates configuration for :raise_if_likely mode
|
|
121
|
+
# @rbs () -> void
|
|
122
|
+
def validate_blocklist_collision_risk
|
|
123
|
+
return if @blocklist.empty?
|
|
124
|
+
return unless @blocklist_mode == :raise_if_likely
|
|
125
|
+
|
|
126
|
+
# Check if min_length suggests long IDs
|
|
127
|
+
if @min_length > @blocklist_max_length
|
|
128
|
+
raise InvalidConfigurationError,
|
|
129
|
+
"blocklist_mode is :raise_if_likely and min_length (#{@min_length}) exceeds blocklist_max_length (#{@blocklist_max_length}). " \
|
|
130
|
+
"Long IDs have high collision probability with blocklists. " \
|
|
131
|
+
"Use blocklist_mode: :length_threshold or remove the blocklist."
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# Check if max_inputs_per_id suggests long IDs
|
|
135
|
+
# Rough heuristic: encoding 100+ inputs typically results in long IDs
|
|
136
|
+
if @max_inputs_per_id > 100
|
|
137
|
+
raise InvalidConfigurationError,
|
|
138
|
+
"blocklist_mode is :raise_if_likely and max_inputs_per_id (#{@max_inputs_per_id}) is very high. " \
|
|
139
|
+
"Encoding many inputs typically results in long IDs with high blocklist collision probability. " \
|
|
140
|
+
"Use blocklist_mode: :length_threshold or remove the blocklist."
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
@@ -70,14 +70,19 @@
|
|
|
70
70
|
|
|
71
71
|
module EncodedId
|
|
72
72
|
module Encoders
|
|
73
|
-
|
|
74
|
-
|
|
73
|
+
# Implementation of HashId, optimised and adapted from the original `hashid.rb` gem
|
|
74
|
+
class Hashid
|
|
75
|
+
include HashidConsistentShuffle
|
|
76
|
+
|
|
77
|
+
# @rbs @separators_and_guards: HashidOrdinalAlphabetSeparatorGuards
|
|
75
78
|
# @rbs @alphabet_ordinals: Array[Integer]
|
|
76
79
|
# @rbs @separator_ordinals: Array[Integer]
|
|
77
80
|
# @rbs @guard_ordinals: Array[Integer]
|
|
78
81
|
# @rbs @salt_ordinals: Array[Integer]
|
|
79
82
|
# @rbs @escaped_separator_selector: String
|
|
80
83
|
# @rbs @escaped_guards_selector: String
|
|
84
|
+
# @rbs @blocklist_mode: Symbol
|
|
85
|
+
# @rbs @blocklist_max_length: Integer
|
|
81
86
|
|
|
82
87
|
# Initialize a new HashId encoder with custom parameters.
|
|
83
88
|
#
|
|
@@ -91,17 +96,22 @@ module EncodedId
|
|
|
91
96
|
# @param min_hash_length [Integer] Minimum length of generated hashes (0 for no minimum)
|
|
92
97
|
# @param alphabet [Alphabet] Character set to use for encoding
|
|
93
98
|
# @param blocklist [Blocklist?] Optional list of words that shouldn't appear in hashes
|
|
99
|
+
# @param blocklist_mode [Symbol] Mode for blocklist checking (:always, :length_threshold, :raise_if_likely)
|
|
100
|
+
# @param blocklist_max_length [Integer] Maximum ID length for blocklist checking (when mode is :length_threshold)
|
|
94
101
|
#
|
|
95
|
-
# @rbs (String salt, ?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist? blocklist) -> void
|
|
96
|
-
def initialize(salt, min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = nil)
|
|
97
|
-
super
|
|
98
|
-
|
|
102
|
+
# @rbs (String salt, ?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist? blocklist, ?Symbol blocklist_mode, ?Integer blocklist_max_length) -> void
|
|
103
|
+
def initialize(salt, min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = nil, blocklist_mode = :length_threshold, blocklist_max_length = 32)
|
|
99
104
|
unless min_hash_length.is_a?(Integer) && min_hash_length >= 0
|
|
100
105
|
raise ArgumentError, "The min length must be a Integer and greater than or equal to 0"
|
|
101
106
|
end
|
|
102
107
|
@min_hash_length = min_hash_length
|
|
108
|
+
@salt = salt
|
|
109
|
+
@alphabet = alphabet
|
|
110
|
+
@blocklist = blocklist
|
|
111
|
+
@blocklist_mode = blocklist_mode
|
|
112
|
+
@blocklist_max_length = blocklist_max_length
|
|
103
113
|
|
|
104
|
-
@separators_and_guards =
|
|
114
|
+
@separators_and_guards = HashidOrdinalAlphabetSeparatorGuards.new(alphabet, salt)
|
|
105
115
|
@alphabet_ordinals = @separators_and_guards.alphabet
|
|
106
116
|
@separator_ordinals = @separators_and_guards.seps
|
|
107
117
|
@guard_ordinals = @separators_and_guards.guards
|
|
@@ -117,6 +127,10 @@ module EncodedId
|
|
|
117
127
|
attr_reader :separator_ordinals #: Array[Integer]
|
|
118
128
|
attr_reader :guard_ordinals #: Array[Integer]
|
|
119
129
|
attr_reader :salt_ordinals #: Array[Integer]
|
|
130
|
+
attr_reader :salt #: String
|
|
131
|
+
attr_reader :alphabet #: Alphabet
|
|
132
|
+
attr_reader :blocklist #: Blocklist?
|
|
133
|
+
attr_reader :min_hash_length #: Integer
|
|
120
134
|
|
|
121
135
|
# Encode an array of non-negative integers into a hash string.
|
|
122
136
|
#
|
|
@@ -134,15 +148,15 @@ module EncodedId
|
|
|
134
148
|
#
|
|
135
149
|
# @rbs (Array[Integer] numbers) -> String
|
|
136
150
|
def encode(numbers)
|
|
137
|
-
numbers.all? { |n| Integer(n) }
|
|
151
|
+
numbers.all? { |n| Integer(n) }
|
|
138
152
|
|
|
139
153
|
return "" if numbers.empty? || numbers.any? { |n| n < 0 }
|
|
140
154
|
|
|
141
155
|
encoded = internal_encode(numbers)
|
|
142
|
-
if
|
|
156
|
+
if check_blocklist?(encoded)
|
|
143
157
|
blocked_word = contains_blocklisted_word?(encoded)
|
|
144
158
|
if blocked_word
|
|
145
|
-
raise EncodedId::BlocklistError, "Generated ID contains blocklisted word: '#{blocked_word}'"
|
|
159
|
+
raise EncodedId::BlocklistError, "Generated ID '#{encoded}' contains blocklisted word: '#{blocked_word}'"
|
|
146
160
|
end
|
|
147
161
|
end
|
|
148
162
|
|
|
@@ -171,26 +185,6 @@ module EncodedId
|
|
|
171
185
|
internal_decode(hash)
|
|
172
186
|
end
|
|
173
187
|
|
|
174
|
-
# Decode a hash that was encoded from hexadecimal numbers.
|
|
175
|
-
#
|
|
176
|
-
# This is a specialized variant for hashes created from hex strings.
|
|
177
|
-
# It decodes the hash to integers, then converts each integer back to hex
|
|
178
|
-
# (skipping the leading '1' that was added during hex encoding).
|
|
179
|
-
#
|
|
180
|
-
# @param hash [String] The hash string to decode
|
|
181
|
-
# @return [String] The original hexadecimal string (uppercase)
|
|
182
|
-
#
|
|
183
|
-
# @rbs (String hash) -> String
|
|
184
|
-
def decode_hex(hash)
|
|
185
|
-
numbers = decode(hash)
|
|
186
|
-
|
|
187
|
-
ret = numbers.map do |n|
|
|
188
|
-
n.to_s(16)[1..]
|
|
189
|
-
end
|
|
190
|
-
|
|
191
|
-
ret.join.upcase
|
|
192
|
-
end
|
|
193
|
-
|
|
194
188
|
private
|
|
195
189
|
|
|
196
190
|
# Internal encoding implementation - converts numbers to a hash string.
|
|
@@ -251,7 +245,6 @@ module EncodedId
|
|
|
251
245
|
lottery = current_alphabet[hash_int % alphabet_length]
|
|
252
246
|
|
|
253
247
|
# This array will hold the final hash as character ordinals (codepoints).
|
|
254
|
-
# Start with the lottery character.
|
|
255
248
|
# @type var hashid_code: Array[Integer]
|
|
256
249
|
hashid_code = []
|
|
257
250
|
hashid_code << lottery
|
|
@@ -275,7 +268,6 @@ module EncodedId
|
|
|
275
268
|
consistent_shuffle!(current_alphabet, seasoning, alphabet_buffer, alphabet_length)
|
|
276
269
|
|
|
277
270
|
# Convert this number to base-N using the current shuffled alphabet.
|
|
278
|
-
# Returns the last character added (used for separator selection).
|
|
279
271
|
last_char_ord = hash_one_number(hashid_code, num, current_alphabet, alphabet_length)
|
|
280
272
|
|
|
281
273
|
# Add a separator between numbers (but not after the last number).
|
|
@@ -292,28 +284,29 @@ module EncodedId
|
|
|
292
284
|
# Guards are boundary markers chosen deterministically from the guard set.
|
|
293
285
|
if hashid_code.length < @min_hash_length
|
|
294
286
|
# Prepend first guard based on hash_int and the lottery character.
|
|
287
|
+
guard_count = guard_ordinals.length
|
|
295
288
|
first_char = hashid_code[0] #: Integer
|
|
296
|
-
hashid_code.prepend(guard_ordinals[(hash_int + first_char) %
|
|
289
|
+
hashid_code.prepend(guard_ordinals[(hash_int + first_char) % guard_count])
|
|
297
290
|
|
|
298
291
|
# If still too short, append second guard based on hash_int and third character.
|
|
299
292
|
if hashid_code.length < @min_hash_length
|
|
300
293
|
# At this point hashid_code has at least 2 elements (lottery + guard), check for 3rd
|
|
301
294
|
third_char = hashid_code[2]
|
|
302
295
|
hashid_code << if third_char
|
|
303
|
-
guard_ordinals[(hash_int + third_char) %
|
|
296
|
+
guard_ordinals[(hash_int + third_char) % guard_count]
|
|
304
297
|
else
|
|
305
298
|
# If no third character exists, use 0 as default
|
|
306
|
-
guard_ordinals[hash_int %
|
|
299
|
+
guard_ordinals[hash_int % guard_count]
|
|
307
300
|
end
|
|
308
301
|
end
|
|
309
302
|
end
|
|
310
303
|
|
|
311
304
|
# Step 4: Pad with shuffled alphabet if still below minimum length.
|
|
312
|
-
half_length =
|
|
305
|
+
half_length = alphabet_length.div(2)
|
|
313
306
|
|
|
314
307
|
while hashid_code.length < @min_hash_length
|
|
315
308
|
# Shuffle the alphabet using itself as the key (creates a new permutation).
|
|
316
|
-
consistent_shuffle!(current_alphabet, current_alphabet.dup, nil,
|
|
309
|
+
consistent_shuffle!(current_alphabet, current_alphabet.dup, nil, alphabet_length)
|
|
317
310
|
|
|
318
311
|
# Wrap the hash: second_half + hash + first_half
|
|
319
312
|
second_half = current_alphabet[half_length..] #: Array[Integer]
|
|
@@ -381,7 +374,6 @@ module EncodedId
|
|
|
381
374
|
|
|
382
375
|
if (breakdown = array[i])
|
|
383
376
|
# Step 2: Extract the lottery character (first char) and the rest.
|
|
384
|
-
# Check if breakdown is not empty
|
|
385
377
|
lottery = breakdown[0] #: String
|
|
386
378
|
remainder = breakdown[1..] || "" #: String
|
|
387
379
|
|
|
@@ -409,7 +401,6 @@ module EncodedId
|
|
|
409
401
|
|
|
410
402
|
# Step 4: Verify by re-encoding and comparing.
|
|
411
403
|
# This is critical for validity: it ensures only valid hashes decode successfully.
|
|
412
|
-
# Random strings will fail this check and return an empty array.
|
|
413
404
|
if encode(ret) != hash
|
|
414
405
|
# @type var ret: Array[Integer]
|
|
415
406
|
ret = []
|
|
@@ -453,7 +444,6 @@ module EncodedId
|
|
|
453
444
|
break unless num > 0
|
|
454
445
|
end
|
|
455
446
|
|
|
456
|
-
# Return the last character added (used for separator selection).
|
|
457
447
|
char
|
|
458
448
|
end
|
|
459
449
|
|
|
@@ -496,32 +486,38 @@ module EncodedId
|
|
|
496
486
|
num
|
|
497
487
|
end
|
|
498
488
|
|
|
499
|
-
#
|
|
500
|
-
#
|
|
501
|
-
# This deterministic shuffle is the heart of the HashID algorithm's obfuscation.
|
|
502
|
-
# It ensures that the same salt always produces the same permutation of the alphabet.
|
|
503
|
-
#
|
|
504
|
-
# @param collection_to_shuffle [Array<Integer>] The array to shuffle (modified in place)
|
|
505
|
-
# @param salt_part_1 [Array<Integer>] First part of the salt (lottery + salt, or alphabet)
|
|
506
|
-
# @param salt_part_2 [Array<Integer>?] Second part of the salt (pre-shuffle alphabet copy)
|
|
507
|
-
# @param max_salt_length [Integer] Maximum length to use from combined salt
|
|
508
|
-
# @return [Array<Integer>] The shuffled array (same object as collection_to_shuffle)
|
|
489
|
+
# Check if the encoded string contains any blocklisted words.
|
|
509
490
|
#
|
|
510
|
-
#
|
|
511
|
-
|
|
512
|
-
|
|
491
|
+
# Determines if blocklist checking should be performed based on mode and ID length
|
|
492
|
+
#
|
|
493
|
+
# @param encoded_string [String] The encoded ID to check
|
|
494
|
+
# @return [Boolean] True if blocklist should be checked
|
|
495
|
+
#
|
|
496
|
+
# @rbs (String encoded_string) -> bool
|
|
497
|
+
def check_blocklist?(encoded_string)
|
|
498
|
+
return false if !blocklist || blocklist.empty?
|
|
499
|
+
|
|
500
|
+
case @blocklist_mode
|
|
501
|
+
when :always
|
|
502
|
+
true
|
|
503
|
+
when :length_threshold
|
|
504
|
+
encoded_string.length <= @blocklist_max_length
|
|
505
|
+
when :raise_if_likely
|
|
506
|
+
# This mode raises at configuration time, so if we get here, we check
|
|
507
|
+
true
|
|
508
|
+
else
|
|
509
|
+
true
|
|
510
|
+
end
|
|
513
511
|
end
|
|
514
512
|
|
|
515
|
-
# Check if the encoded string contains any blocklisted words.
|
|
516
|
-
#
|
|
517
513
|
# @param encoded_string [String] The encoded hash to check
|
|
518
514
|
# @return [String, false] The blocklisted word if found, false otherwise
|
|
519
515
|
#
|
|
520
516
|
# @rbs (String encoded_string) -> (String | false)
|
|
521
517
|
def contains_blocklisted_word?(encoded_string)
|
|
522
|
-
return false
|
|
518
|
+
return false if !blocklist || blocklist.empty?
|
|
523
519
|
|
|
524
|
-
blocked_word =
|
|
520
|
+
blocked_word = blocklist.blocks?(encoded_string)
|
|
525
521
|
return blocked_word if blocked_word
|
|
526
522
|
|
|
527
523
|
false
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
module EncodedId
|
|
6
|
+
module Encoders
|
|
7
|
+
# Configuration for Hashids encoder
|
|
8
|
+
# Hashids requires a salt for encoding/decoding
|
|
9
|
+
class HashidConfiguration < BaseConfiguration
|
|
10
|
+
attr_reader :salt #: String
|
|
11
|
+
|
|
12
|
+
# @rbs (salt: String, **untyped options) -> void
|
|
13
|
+
def initialize(salt:, **options)
|
|
14
|
+
@salt = validate_salt(salt)
|
|
15
|
+
super(**options)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Create the Hashid encoder instance
|
|
19
|
+
# @rbs () -> Hashid
|
|
20
|
+
def create_encoder
|
|
21
|
+
Hashid.new(salt, min_length, alphabet, blocklist, blocklist_mode, blocklist_max_length)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
# @rbs (String salt) -> String
|
|
27
|
+
def validate_salt(salt)
|
|
28
|
+
return salt if salt.is_a?(String) && salt.size > 3
|
|
29
|
+
raise InvalidConfigurationError, "salt must be a string longer than 3 characters"
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -4,9 +4,9 @@
|
|
|
4
4
|
|
|
5
5
|
module EncodedId
|
|
6
6
|
module Encoders
|
|
7
|
-
# Implements a deterministic, salt-based shuffle algorithm for
|
|
7
|
+
# Implements a deterministic, salt-based shuffle algorithm for Hashids.
|
|
8
8
|
#
|
|
9
|
-
# This is the core obfuscation mechanism that makes
|
|
9
|
+
# This is the core obfuscation mechanism that makes Hashids non-sequential.
|
|
10
10
|
# The algorithm has several critical properties:
|
|
11
11
|
#
|
|
12
12
|
# 1. **Deterministic**: Same input + same salt = same output (always)
|
|
@@ -43,7 +43,7 @@ module EncodedId
|
|
|
43
43
|
# Step 3: i=1, salt[2]=67, ord_total=131 → swap positions 1 and ((67+2+131)%1=0)→ [4,2,1,3]
|
|
44
44
|
# Result: [4, 2, 1, 3]
|
|
45
45
|
#
|
|
46
|
-
|
|
46
|
+
module HashidConsistentShuffle
|
|
47
47
|
# Deterministically shuffle a collection based on a salt.
|
|
48
48
|
#
|
|
49
49
|
# Shuffles the collection in place using a salt-based algorithm that produces
|
|
@@ -57,7 +57,7 @@ module EncodedId
|
|
|
57
57
|
# @raise [SaltError] If salt is too short or shuffle fails
|
|
58
58
|
#
|
|
59
59
|
# @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
|
|
60
|
-
def
|
|
60
|
+
def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
|
|
61
61
|
salt_part_1_length = salt_part_1.length
|
|
62
62
|
|
|
63
63
|
# Validate we have enough salt. If max_salt_length exceeds salt_part_1,
|
|
@@ -39,7 +39,9 @@ module EncodedId
|
|
|
39
39
|
# - More efficient memory usage
|
|
40
40
|
# - Direct array indexing without string allocations
|
|
41
41
|
#
|
|
42
|
-
class
|
|
42
|
+
class HashidOrdinalAlphabetSeparatorGuards
|
|
43
|
+
include HashidConsistentShuffle
|
|
44
|
+
|
|
43
45
|
# Target ratio of alphabet to separators (alphabet.length / seps.length ≈ 3.5)
|
|
44
46
|
SEP_DIV = 3.5
|
|
45
47
|
|
|
@@ -75,16 +77,10 @@ module EncodedId
|
|
|
75
77
|
#
|
|
76
78
|
# @rbs (Alphabet alphabet, String salt) -> void
|
|
77
79
|
def initialize(alphabet, salt)
|
|
78
|
-
# Convert alphabet and salt to arrays of ordinals (integer codepoints).
|
|
79
80
|
@alphabet = alphabet.characters.chars.map(&:ord)
|
|
80
81
|
@salt = salt.chars.map(&:ord)
|
|
81
82
|
|
|
82
|
-
# Partition the alphabet into separators and alphabet.
|
|
83
|
-
# This ensures they're disjoint and properly balanced.
|
|
84
83
|
setup_seps
|
|
85
|
-
|
|
86
|
-
# Extract guards from either separators or alphabet.
|
|
87
|
-
# Guards are boundary markers used for minimum length padding.
|
|
88
84
|
setup_guards
|
|
89
85
|
|
|
90
86
|
# Pre-compute escaped versions for String#tr operations during decode.
|
|
@@ -93,7 +89,6 @@ module EncodedId
|
|
|
93
89
|
@seps_tr_selector = escape_characters_string_for_tr(@seps.map(&:chr))
|
|
94
90
|
@guards_tr_selector = escape_characters_string_for_tr(@guards.map(&:chr))
|
|
95
91
|
|
|
96
|
-
# Freeze all arrays to prevent accidental modification.
|
|
97
92
|
@alphabet.freeze
|
|
98
93
|
@seps.freeze
|
|
99
94
|
@guards.freeze
|
|
@@ -146,39 +141,33 @@ module EncodedId
|
|
|
146
141
|
def setup_seps
|
|
147
142
|
@seps = DEFAULT_SEPS.dup
|
|
148
143
|
|
|
149
|
-
# Make alphabet and separators disjoint
|
|
150
|
-
#
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
@seps.length.times do |i|
|
|
155
|
-
if (j = @alphabet.index(@seps[i]))
|
|
156
|
-
# Separator exists in alphabet - remove it from alphabet.
|
|
157
|
-
@alphabet = pick_characters(@alphabet, j)
|
|
144
|
+
# Make alphabet and separators disjoint: keep separator if it exists in alphabet,
|
|
145
|
+
# otherwise remove it. This ensures separators only contains characters from the original alphabet.
|
|
146
|
+
@seps.length.times do |sep_index|
|
|
147
|
+
if (alphabet_index = @alphabet.index(@seps[sep_index]))
|
|
148
|
+
@alphabet = remove_character_at(@alphabet, alphabet_index)
|
|
158
149
|
else
|
|
159
|
-
|
|
160
|
-
@seps = pick_characters(@seps, i)
|
|
150
|
+
@seps = remove_character_at(@seps, sep_index)
|
|
161
151
|
end
|
|
162
152
|
end
|
|
163
153
|
|
|
164
|
-
# Remove
|
|
165
|
-
# Spaces are placeholders and shouldn't appear in the final sets.
|
|
154
|
+
# Remove space placeholders introduced by remove_character_at
|
|
166
155
|
@alphabet.delete(SPACE_CHAR)
|
|
167
156
|
@seps.delete(SPACE_CHAR)
|
|
168
157
|
|
|
169
|
-
|
|
170
|
-
consistent_shuffle!(@seps, @salt, nil,
|
|
158
|
+
salt_length = @salt.length
|
|
159
|
+
consistent_shuffle!(@seps, @salt, nil, salt_length)
|
|
171
160
|
|
|
172
|
-
# Balance the alphabet-to-separator ratio to approximately SEP_DIV (3.5:1)
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
161
|
+
# Balance the alphabet-to-separator ratio to approximately SEP_DIV (3.5:1)
|
|
162
|
+
alphabet_length = @alphabet.length
|
|
163
|
+
seps_count = @seps.length
|
|
164
|
+
if seps_count == 0 || (alphabet_length / seps_count.to_f) > SEP_DIV
|
|
165
|
+
seps_target_count = (alphabet_length / SEP_DIV).ceil
|
|
166
|
+
seps_target_count = 2 if seps_target_count == 1 # Minimum 2 separators
|
|
178
167
|
|
|
179
|
-
if
|
|
168
|
+
if seps_target_count > seps_count
|
|
180
169
|
# Not enough separators - take some from the alphabet.
|
|
181
|
-
diff =
|
|
170
|
+
diff = seps_target_count - seps_count
|
|
182
171
|
|
|
183
172
|
# These are safe: diff > 0 and @alphabet has enough elements by design
|
|
184
173
|
additonal_seps = @alphabet[0, diff] #: Array[Integer]
|
|
@@ -186,13 +175,11 @@ module EncodedId
|
|
|
186
175
|
@alphabet = @alphabet[diff..] #: Array[Integer]
|
|
187
176
|
else
|
|
188
177
|
# Too many separators - trim to target length.
|
|
189
|
-
@seps = @seps[0,
|
|
178
|
+
@seps = @seps[0, seps_target_count] #: Array[Integer]
|
|
190
179
|
end
|
|
191
180
|
end
|
|
192
181
|
|
|
193
|
-
|
|
194
|
-
# This ensures different salts produce different alphabet orderings.
|
|
195
|
-
consistent_shuffle!(@alphabet, @salt, nil, @salt.length)
|
|
182
|
+
consistent_shuffle!(@alphabet, @salt, nil, salt_length)
|
|
196
183
|
end
|
|
197
184
|
|
|
198
185
|
# Setup guards by extracting them from separators or alphabet.
|
|
@@ -213,10 +200,10 @@ module EncodedId
|
|
|
213
200
|
#
|
|
214
201
|
# @rbs () -> void
|
|
215
202
|
def setup_guards
|
|
216
|
-
|
|
217
|
-
gc = (
|
|
203
|
+
alphabet_length = @alphabet.length
|
|
204
|
+
gc = (alphabet_length / GUARD_DIV).ceil
|
|
218
205
|
|
|
219
|
-
if
|
|
206
|
+
if alphabet_length < 3
|
|
220
207
|
# Very small alphabet - take guards from separators to preserve alphabet.
|
|
221
208
|
@guards = @seps[0, gc] #: Array[Integer]
|
|
222
209
|
@seps = @seps[gc..] || [] #: Array[Integer]
|
|
@@ -227,7 +214,7 @@ module EncodedId
|
|
|
227
214
|
end
|
|
228
215
|
end
|
|
229
216
|
|
|
230
|
-
# Remove a character from an array by replacing it with a space.
|
|
217
|
+
# Remove a character from an array by replacing it with a space placeholder.
|
|
231
218
|
#
|
|
232
219
|
# This is used during the separator/alphabet disjoint operation.
|
|
233
220
|
# Instead of mutating the array in place, it creates a new array with:
|
|
@@ -239,32 +226,19 @@ module EncodedId
|
|
|
239
226
|
# This approach maintains array indices during iteration.
|
|
240
227
|
#
|
|
241
228
|
# Example:
|
|
242
|
-
#
|
|
229
|
+
# remove_character_at([97, 98, 99], 1) → [97, 32, 99] # [a, space, c]
|
|
243
230
|
#
|
|
244
231
|
# @param array [Array<Integer>] The array to remove from
|
|
245
232
|
# @param index [Integer] The index of the character to remove
|
|
246
233
|
# @return [Array<Integer>] New array with character replaced by space
|
|
247
234
|
#
|
|
248
235
|
# @rbs (Array[Integer] array, Integer index) -> Array[Integer]
|
|
249
|
-
def
|
|
236
|
+
def remove_character_at(array, index)
|
|
250
237
|
tail = array[index + 1..]
|
|
251
238
|
head = array[0, index] || []
|
|
252
239
|
head << SPACE_CHAR
|
|
253
240
|
tail ? head + tail : head
|
|
254
241
|
end
|
|
255
|
-
|
|
256
|
-
# Delegate to the consistent shuffle algorithm.
|
|
257
|
-
#
|
|
258
|
-
# @param collection_to_shuffle [Array<Integer>] The array to shuffle (modified in place)
|
|
259
|
-
# @param salt_part_1 [Array<Integer>] The salt to use for shuffling
|
|
260
|
-
# @param salt_part_2 [Array<Integer>?] Optional second salt part (unused here)
|
|
261
|
-
# @param max_salt_length [Integer] Maximum salt length to use
|
|
262
|
-
# @return [Array<Integer>] The shuffled array
|
|
263
|
-
#
|
|
264
|
-
# @rbs (Array[Integer] collection_to_shuffle, Array[Integer] salt_part_1, Array[Integer]? salt_part_2, Integer max_salt_length) -> Array[Integer]
|
|
265
|
-
def consistent_shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
|
|
266
|
-
HashIdConsistentShuffle.shuffle!(collection_to_shuffle, salt_part_1, salt_part_2, max_salt_length)
|
|
267
|
-
end
|
|
268
242
|
end
|
|
269
243
|
end
|
|
270
244
|
end
|
|
@@ -14,13 +14,13 @@ module EncodedId
|
|
|
14
14
|
#
|
|
15
15
|
# == Security Note:
|
|
16
16
|
#
|
|
17
|
-
# The salt is the 'secret' that makes your
|
|
17
|
+
# The salt is the 'secret' that makes your Hashids unique. Without knowing the
|
|
18
18
|
# salt, it's harder to reverse-engineer the encoding scheme
|
|
19
|
-
# or predict hash values BUT
|
|
19
|
+
# or predict hash values BUT Hashids is not a secure encryption technique. It
|
|
20
20
|
# is only to be used to obfuscate values which are not secure (you would just
|
|
21
21
|
# prefer the average person cannot see them).
|
|
22
22
|
#
|
|
23
|
-
class
|
|
23
|
+
class HashidSalt
|
|
24
24
|
# @rbs @salt: String
|
|
25
25
|
# @rbs @chars: Array[String]
|
|
26
26
|
|