encoded_id 1.0.0.rc5 → 1.0.0.rc7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +99 -3
- data/README.md +86 -329
- data/context/encoded_id.md +437 -0
- data/lib/encoded_id/alphabet.rb +34 -3
- data/lib/encoded_id/blocklist.rb +100 -0
- data/lib/encoded_id/encoders/base_configuration.rb +154 -0
- data/lib/encoded_id/encoders/hashid.rb +527 -0
- data/lib/encoded_id/encoders/hashid_configuration.rb +40 -0
- data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb +110 -0
- data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb +244 -0
- data/lib/encoded_id/encoders/hashid_salt.rb +51 -0
- data/lib/encoded_id/encoders/my_sqids.rb +454 -0
- data/lib/encoded_id/encoders/sqids.rb +59 -0
- data/lib/encoded_id/encoders/sqids_configuration.rb +22 -0
- data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +54 -0
- data/lib/encoded_id/hex_representation.rb +29 -14
- data/lib/encoded_id/reversible_id.rb +115 -82
- data/lib/encoded_id/version.rb +3 -1
- data/lib/encoded_id.rb +34 -4
- metadata +34 -26
- data/.devcontainer/Dockerfile +0 -9
- data/.devcontainer/compose.yml +0 -8
- data/.devcontainer/devcontainer.json +0 -8
- data/.standard.yml +0 -2
- data/Gemfile +0 -36
- data/Rakefile +0 -20
- data/Steepfile +0 -5
- data/ext/encoded_id/extconf.rb +0 -3
- data/ext/encoded_id/extension.c +0 -123
- data/ext/encoded_id/hashids.c +0 -939
- data/ext/encoded_id/hashids.h +0 -139
- data/lib/encoded_id/hash_id.rb +0 -227
- data/lib/encoded_id/hash_id_consistent_shuffle.rb +0 -27
- data/lib/encoded_id/hash_id_salt.rb +0 -15
- data/lib/encoded_id/ordinal_alphabet_separator_guards.rb +0 -90
- data/rbs_collection.yaml +0 -24
- data/sig/encoded_id.rbs +0 -189
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
# Sqids (pronounced "squids") is a library that generates short, unique, non-sequential IDs
|
|
6
|
+
# from numbers. It's useful for obfuscating database IDs, creating URL-friendly identifiers,
|
|
7
|
+
# and generating human-readable codes.
|
|
8
|
+
#
|
|
9
|
+
# Key features:
|
|
10
|
+
# - Reversible: encoded IDs can be decoded back to the original numbers
|
|
11
|
+
# - Customizable: supports custom alphabets, minimum lengths, and blocklists
|
|
12
|
+
# - Collision-free: same input always produces the same output
|
|
13
|
+
# - Blocklist filtering: automatically regenerates IDs that contain blocked words
|
|
14
|
+
#
|
|
15
|
+
# The algorithm uses a shuffling mechanism based on the input numbers to select characters
|
|
16
|
+
# from a customized alphabet, ensuring that sequential numbers produce non-sequential IDs.
|
|
17
|
+
#
|
|
18
|
+
class MySqids
|
|
19
|
+
# @rbs @alphabet: Array[Integer]
|
|
20
|
+
# @rbs @min_length: Integer
|
|
21
|
+
# @rbs @blocklist: (Array[String] | Set[String])
|
|
22
|
+
|
|
23
|
+
DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
|
24
|
+
|
|
25
|
+
# Default minimum length of 0 means no padding is applied to generated IDs
|
|
26
|
+
DEFAULT_MIN_LENGTH = 0
|
|
27
|
+
# rubocop:disable Metrics/CollectionLiteralLength, Layout/LineLength
|
|
28
|
+
# Default blocklist containing words that should not appear in generated IDs
|
|
29
|
+
# The blocklist prevents offensive or inappropriate words from appearing in IDs by
|
|
30
|
+
# regenerating IDs that contain these patterns.
|
|
31
|
+
DEFAULT_BLOCKLIST = %w[0rgasm 1d10t 1d1ot 1di0t 1diot 1eccacu10 1eccacu1o 1eccacul0
|
|
32
|
+
1eccaculo 1mbec11e 1mbec1le 1mbeci1e 1mbecile a11upat0 a11upato a1lupat0 a1lupato aand ah01e ah0le aho1e ahole al1upat0 al1upato allupat0 allupato ana1 ana1e anal anale anus arrapat0 arrapato arsch arse ass b00b b00be b01ata b0ceta b0iata b0ob b0obe b0sta b1tch b1te b1tte ba1atkar balatkar bastard0 bastardo batt0na battona bitch bite bitte bo0b bo0be bo1ata boceta boiata boob boobe bosta bran1age bran1er bran1ette bran1eur bran1euse branlage branler branlette branleur branleuse c0ck c0g110ne c0g11one c0g1i0ne c0g1ione c0gl10ne c0gl1one c0gli0ne c0glione c0na c0nnard c0nnasse c0nne c0u111es c0u11les c0u1l1es c0u1lles c0ui11es c0ui1les c0uil1es c0uilles c11t c11t0 c11to c1it c1it0 c1ito cabr0n cabra0 cabrao cabron caca cacca cacete cagante cagar cagare cagna cara1h0 cara1ho caracu10 caracu1o caracul0 caraculo caralh0 caralho cazz0 cazz1mma cazzata cazzimma cazzo ch00t1a ch00t1ya ch00tia ch00tiya ch0d ch0ot1a ch0ot1ya ch0otia ch0otiya ch1asse ch1avata ch1er ch1ng0 ch1ngadaz0s ch1ngadazos ch1ngader1ta ch1ngaderita ch1ngar ch1ngo ch1ngues ch1nk chatte chiasse chiavata chier ching0 chingadaz0s chingadazos chingader1ta chingaderita chingar chingo chingues chink cho0t1a cho0t1ya cho0tia cho0tiya chod choot1a choot1ya chootia chootiya cl1t cl1t0 cl1to clit clit0 clito cock cog110ne cog11one cog1i0ne cog1ione cogl10ne cogl1one cogli0ne coglione cona connard connasse conne cou111es cou11les cou1l1es cou1lles coui11es coui1les couil1es couilles cracker crap cu10 cu1att0ne cu1attone cu1er0 cu1ero cu1o cul0 culatt0ne culattone culer0 culero culo cum cunt d11d0 d11do d1ck d1ld0 d1ldo damn de1ch deich depp di1d0 di1do dick dild0 dildo dyke encu1e encule enema enf01re enf0ire enfo1re enfoire estup1d0 estup1do estupid0 estupido etr0n etron f0da f0der f0ttere f0tters1 f0ttersi f0tze f0utre f1ca f1cker f1ga fag fica ficker figa foda foder fottere fotters1 fottersi fotze foutre fr0c10 fr0c1o fr0ci0 fr0cio fr0sc10 fr0sc1o fr0sci0 fr0scio froc10 froc1o froci0 frocio frosc10 frosc1o frosci0 froscio fuck g00 g0o g0u1ne g0uine gandu go0 goo gou1ne gouine gr0gnasse grognasse haram1 harami haramzade hund1n hundin id10t id1ot idi0t idiot imbec11e imbec1le imbeci1e imbecile j1zz jerk jizz k1ke kam1ne kamine kike leccacu10 leccacu1o leccacul0 leccaculo m1erda m1gn0tta m1gnotta m1nch1a m1nchia m1st mam0n mamahuev0 mamahuevo mamon masturbat10n masturbat1on masturbate masturbati0n masturbation merd0s0 merd0so merda merde merdos0 merdoso mierda mign0tta mignotta minch1a minchia mist musch1 muschi n1gger neger negr0 negre negro nerch1a nerchia nigger orgasm p00p p011a p01la p0l1a p0lla p0mp1n0 p0mp1no p0mpin0 p0mpino p0op p0rca p0rn p0rra p0uff1asse p0uffiasse p1p1 p1pi p1r1a p1rla p1sc10 p1sc1o p1sci0 p1scio p1sser pa11e pa1le pal1e palle pane1e1r0 pane1e1ro pane1eir0 pane1eiro panele1r0 panele1ro paneleir0 paneleiro patakha pec0r1na pec0rina pecor1na pecorina pen1s pendej0 pendejo penis pip1 pipi pir1a pirla pisc10 pisc1o pisci0 piscio pisser po0p po11a po1la pol1a polla pomp1n0 pomp1no pompin0 pompino poop porca porn porra pouff1asse pouffiasse pr1ck prick pussy put1za puta puta1n putain pute putiza puttana queca r0mp1ba11e r0mp1ba1le r0mp1bal1e r0mp1balle r0mpiba11e r0mpiba1le r0mpibal1e r0mpiballe rand1 randi rape recch10ne recch1one recchi0ne recchione retard romp1ba11e romp1ba1le romp1bal1e romp1balle rompiba11e rompiba1le rompibal1e rompiballe ruff1an0 ruff1ano ruffian0 ruffiano s1ut sa10pe sa1aud sa1ope sacanagem sal0pe salaud salope saugnapf sb0rr0ne sb0rra sb0rrone sbattere sbatters1 sbattersi sborr0ne sborra sborrone sc0pare sc0pata sch1ampe sche1se sche1sse scheise scheisse schlampe schwachs1nn1g schwachs1nnig schwachsinn1g schwachsinnig schwanz scopare scopata sexy sh1t shit slut sp0mp1nare sp0mpinare spomp1nare spompinare str0nz0 str0nza str0nzo stronz0 stronza stronzo stup1d stupid succh1am1 succh1ami succhiam1 succhiami sucker t0pa tapette test1c1e test1cle testic1e testicle tette topa tr01a tr0ia tr0mbare tr1ng1er tr1ngler tring1er tringler tro1a troia trombare turd twat vaffancu10 vaffancu1o vaffancul0 vaffanculo vag1na vagina verdammt verga w1chsen wank wichsen x0ch0ta x0chota xana xoch0ta xochota z0cc01a z0cc0la z0cco1a z0ccola z1z1 z1zi ziz1 zizi zocc01a zocc0la zocco1a zoccola].freeze
|
|
33
|
+
# rubocop:enable Metrics/CollectionLiteralLength, Layout/LineLength
|
|
34
|
+
|
|
35
|
+
# Maximum integer value that can be encoded
|
|
36
|
+
# Uses Integer::MAX if available (Ruby 2.4+), otherwise calculates the max fixnum value
|
|
37
|
+
# based on the platform's word size
|
|
38
|
+
MAX_INT = defined?(Integer::MAX) ? Integer::MAX : ((2**((0.size * 8) - 2)) - 1)
|
|
39
|
+
|
|
40
|
+
# Returns the maximum integer value that can be safely encoded
|
|
41
|
+
# @rbs () -> Integer
|
|
42
|
+
def self.max_value
|
|
43
|
+
MAX_INT
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Initializes a new MySqids encoder with custom options
|
|
47
|
+
#
|
|
48
|
+
# @param options [Hash] Configuration options
|
|
49
|
+
# @option options [String, Array<String>] :alphabet Custom alphabet to use for encoding
|
|
50
|
+
# (default: DEFAULT_ALPHABET). Must be at least 3 characters and contain only single-byte chars.
|
|
51
|
+
# @option options [Integer] :min_length Minimum length for generated IDs (default: 0).
|
|
52
|
+
# IDs shorter than this will be padded. Must be between 0 and 255.
|
|
53
|
+
# @option options [Array<String>, Set<String>] :blocklist Words to exclude from generated IDs
|
|
54
|
+
# (default: DEFAULT_BLOCKLIST). Words must be at least 3 characters long.
|
|
55
|
+
#
|
|
56
|
+
# @raise [ArgumentError] If alphabet contains multibyte characters
|
|
57
|
+
# @raise [ArgumentError] If alphabet is shorter than 3 characters
|
|
58
|
+
# @raise [ArgumentError] If alphabet contains duplicate characters
|
|
59
|
+
# @raise [TypeError] If min_length is not between 0 and 255
|
|
60
|
+
#
|
|
61
|
+
# @rbs (?Hash[Symbol, untyped] options) -> void
|
|
62
|
+
def initialize(options = {})
|
|
63
|
+
alphabet = options[:alphabet] || DEFAULT_ALPHABET
|
|
64
|
+
min_length = options[:min_length] || DEFAULT_MIN_LENGTH
|
|
65
|
+
blocklist = options[:blocklist] || DEFAULT_BLOCKLIST
|
|
66
|
+
|
|
67
|
+
raise ArgumentError, "Alphabet cannot contain multibyte characters" if contains_multibyte_chars?(alphabet)
|
|
68
|
+
raise ArgumentError, "Alphabet length must be at least 3" if alphabet.length < 3
|
|
69
|
+
|
|
70
|
+
alphabet = alphabet.chars unless alphabet.is_a?(Array)
|
|
71
|
+
|
|
72
|
+
if alphabet.uniq.size != alphabet.length
|
|
73
|
+
raise ArgumentError,
|
|
74
|
+
"Alphabet must contain unique characters"
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
min_length_limit = 255
|
|
78
|
+
unless min_length.is_a?(Integer) && min_length >= 0 && min_length <= min_length_limit
|
|
79
|
+
raise TypeError,
|
|
80
|
+
"Minimum length has to be between 0 and #{min_length_limit}"
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
filtered_blocklist = if options[:blocklist].nil? && options[:alphabet].nil?
|
|
84
|
+
blocklist
|
|
85
|
+
else
|
|
86
|
+
downcased_alphabet = alphabet.map(&:downcase)
|
|
87
|
+
blocklist.select do |word|
|
|
88
|
+
word.length >= 3 && (word.downcase.chars - downcased_alphabet).empty?
|
|
89
|
+
end.to_set(&:downcase)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
@alphabet = shuffle(alphabet.map(&:ord))
|
|
93
|
+
@min_length = min_length
|
|
94
|
+
@blocklist = filtered_blocklist
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Encodes an array of integers into a single Sqids string
|
|
98
|
+
#
|
|
99
|
+
# The encoding process:
|
|
100
|
+
# 1. Validates all numbers are in valid range (0 to MAX_INT)
|
|
101
|
+
# 2. Generates a prefix character based on the numbers and alphabet
|
|
102
|
+
# 3. Encodes each number using a shuffled alphabet
|
|
103
|
+
# 4. Separates encoded numbers with the first character of the shuffled alphabet
|
|
104
|
+
# 5. Pads the result if it's shorter than min_length
|
|
105
|
+
# 6. Regenerates if the result contains blocklisted words
|
|
106
|
+
#
|
|
107
|
+
# @param numbers [Array<Integer>] Array of non-negative integers to encode
|
|
108
|
+
# @return [String] The encoded Sqids string
|
|
109
|
+
# @raise [ArgumentError] If any number is outside the valid range (0 to MAX_INT)
|
|
110
|
+
#
|
|
111
|
+
# @example
|
|
112
|
+
# sqids.encode([1, 2, 3]) # => "86Rf07"
|
|
113
|
+
# sqids.encode([]) # => ""
|
|
114
|
+
#
|
|
115
|
+
# @rbs (Array[Integer] numbers) -> String
|
|
116
|
+
def encode(numbers)
|
|
117
|
+
return "" if numbers.empty?
|
|
118
|
+
|
|
119
|
+
# Validate that all numbers are within the acceptable range
|
|
120
|
+
in_range_numbers = numbers.filter_map { |n|
|
|
121
|
+
i = n.to_i
|
|
122
|
+
i if i.between?(0, MAX_INT)
|
|
123
|
+
}
|
|
124
|
+
unless in_range_numbers.length == numbers.length
|
|
125
|
+
raise ArgumentError,
|
|
126
|
+
"Encoding supports numbers between 0 and #{MAX_INT}"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
encode_numbers(in_range_numbers)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Decodes a Sqids string back into the original array of integers
|
|
133
|
+
#
|
|
134
|
+
# The decoding process mirrors the encoding:
|
|
135
|
+
# 1. Validates all characters exist in the alphabet
|
|
136
|
+
# 2. Extracts the prefix to determine the alphabet offset
|
|
137
|
+
# 3. Rotates and reverses the alphabet based on the offset
|
|
138
|
+
# 4. Splits the ID by separator characters (first char of shuffled alphabet)
|
|
139
|
+
# 5. Converts each chunk back to its original number
|
|
140
|
+
# 6. Re-shuffles the alphabet between chunks
|
|
141
|
+
#
|
|
142
|
+
# @param id [String] The Sqids string to decode
|
|
143
|
+
# @return [Array<Integer>] Array of integers that were encoded, or empty array if invalid
|
|
144
|
+
#
|
|
145
|
+
# @example
|
|
146
|
+
# sqids.decode("86Rf07") # => [1, 2, 3]
|
|
147
|
+
# sqids.decode("") # => []
|
|
148
|
+
# sqids.decode("xyz") # => [] (if 'xyz' contains invalid chars)
|
|
149
|
+
#
|
|
150
|
+
# @rbs (String id) -> Array[Integer]
|
|
151
|
+
def decode(id)
|
|
152
|
+
ret = [] #: Array[Integer]
|
|
153
|
+
|
|
154
|
+
return ret if id.empty?
|
|
155
|
+
|
|
156
|
+
id = id.codepoints
|
|
157
|
+
|
|
158
|
+
# Validate that all characters in the ID exist in our alphabet
|
|
159
|
+
# If any character is invalid, return empty array
|
|
160
|
+
id.each do |c|
|
|
161
|
+
return ret unless @alphabet.include?(c)
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Extract the prefix (first character) which determines the alphabet transformation
|
|
165
|
+
prefix = id[0]
|
|
166
|
+
offset = @alphabet.index(prefix)
|
|
167
|
+
# If prefix not found in alphabet, return empty (should never happen after validation)
|
|
168
|
+
return [] if offset.nil?
|
|
169
|
+
|
|
170
|
+
# Reconstruct the alphabet used during encoding
|
|
171
|
+
alphabet = rotate_and_reverse_alphabet(@alphabet, offset)
|
|
172
|
+
|
|
173
|
+
# Remove the prefix, leaving only the encoded number segments
|
|
174
|
+
id = id[1, id.length] || [] #: Array[Integer]
|
|
175
|
+
|
|
176
|
+
# Decode each segment separated by the separator character
|
|
177
|
+
while id.length.positive?
|
|
178
|
+
separator = alphabet[0]
|
|
179
|
+
chunks = split_array(id, separator)
|
|
180
|
+
if chunks.any?
|
|
181
|
+
# Empty chunk indicates invalid ID structure
|
|
182
|
+
return ret if chunks[0] == []
|
|
183
|
+
|
|
184
|
+
# Convert the chunk back to its original number
|
|
185
|
+
ret.push(to_number(chunks[0], alphabet))
|
|
186
|
+
# Re-shuffle alphabet before processing next segment (matches encoding)
|
|
187
|
+
alphabet = shuffle(alphabet) if chunks.length > 1
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Continue with the next segment, or empty array if no more segments
|
|
191
|
+
id = (chunks.length > 1) ? chunks[1] : []
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
ret
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
private
|
|
198
|
+
|
|
199
|
+
# Splits an array into two parts at the first occurrence of a separator
|
|
200
|
+
#
|
|
201
|
+
# This is used during decoding to split the encoded ID at separator characters,
|
|
202
|
+
# which mark the boundaries between encoded numbers.
|
|
203
|
+
#
|
|
204
|
+
# @param arr [Array<Integer>] The array to split (character codepoints)
|
|
205
|
+
# @param separator [Integer] The separator character codepoint to split on
|
|
206
|
+
# @return [Array<Array<Integer>>] An array containing the left part (before separator)
|
|
207
|
+
# and right part (after separator). If separator not found, returns [arr].
|
|
208
|
+
#
|
|
209
|
+
# @example
|
|
210
|
+
# split_array([1, 2, 3, 4, 5], 3) # => [[1, 2], [4, 5]]
|
|
211
|
+
# split_array([1, 2, 3], 9) # => [[1, 2, 3]]
|
|
212
|
+
#
|
|
213
|
+
# @rbs (Array[Integer] arr, Integer separator) -> Array[Array[Integer]]
|
|
214
|
+
def split_array(arr, separator)
|
|
215
|
+
index = arr.index(separator)
|
|
216
|
+
return [arr] if index.nil?
|
|
217
|
+
|
|
218
|
+
left = arr[0...index] #: Array[Integer]
|
|
219
|
+
right = arr[index + 1..] #: Array[Integer]
|
|
220
|
+
|
|
221
|
+
[left, right]
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Shuffles an array of character codepoints using a consistent, deterministic algorithm
|
|
225
|
+
#
|
|
226
|
+
# This is a key part of the Sqids algorithm. The shuffle is deterministic and reversible,
|
|
227
|
+
# meaning the same input always produces the same output. The algorithm performs a series
|
|
228
|
+
# of swaps based on the current index and character values.
|
|
229
|
+
#
|
|
230
|
+
# The shuffle ensures that:
|
|
231
|
+
# - Sequential numbers don't produce sequential IDs
|
|
232
|
+
# - The same alphabet configuration always produces the same shuffle
|
|
233
|
+
# - The transformation is reversible
|
|
234
|
+
#
|
|
235
|
+
# @param chars [Array<Integer>] Array of character codepoints to shuffle
|
|
236
|
+
# @return [Array<Integer>] The shuffled array (modifies in place and returns)
|
|
237
|
+
#
|
|
238
|
+
# @rbs (Array[Integer] chars) -> Array[Integer]
|
|
239
|
+
def shuffle(chars)
|
|
240
|
+
i = 0
|
|
241
|
+
length = chars.length
|
|
242
|
+
j = length - 1
|
|
243
|
+
while j > 0
|
|
244
|
+
r = ((i * j) + chars[i] + chars[j]) % length
|
|
245
|
+
chars[i], chars[r] = chars[r], chars[i]
|
|
246
|
+
i += 1
|
|
247
|
+
j -= 1
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
chars
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Internal method to encode an array of numbers into a Sqids string
|
|
254
|
+
#
|
|
255
|
+
# This is the core encoding logic. The process:
|
|
256
|
+
# 1. Calculates an offset based on the numbers and alphabet (ensures uniqueness)
|
|
257
|
+
# 2. Applies an increment if this is a retry (for blocklist filtering)
|
|
258
|
+
# 3. Selects a prefix character from the alphabet at the offset position
|
|
259
|
+
# 4. Rotates and reverses the alphabet based on the offset
|
|
260
|
+
# 5. Encodes each number and separates them with the first shuffled alphabet character
|
|
261
|
+
# 6. Pads to minimum length if needed
|
|
262
|
+
# 7. Checks against blocklist and retries with incremented offset if needed
|
|
263
|
+
#
|
|
264
|
+
# @param numbers [Array<Integer>] Array of integers to encode
|
|
265
|
+
# @param increment [Integer] Retry counter for blocklist collision avoidance (default: 0)
|
|
266
|
+
# @return [String] The encoded Sqids string
|
|
267
|
+
# @raise [ArgumentError] If max retry attempts (alphabet length) is exceeded
|
|
268
|
+
#
|
|
269
|
+
# @rbs (Array[Integer] numbers, ?increment: Integer) -> String
|
|
270
|
+
def encode_numbers(numbers, increment: 0)
|
|
271
|
+
alphabet_length = @alphabet.length
|
|
272
|
+
raise ArgumentError, "Reached max attempts to re-generate the ID" if increment > alphabet_length
|
|
273
|
+
|
|
274
|
+
numbers_length = numbers.length
|
|
275
|
+
offset = numbers_length
|
|
276
|
+
i = 0
|
|
277
|
+
while i < numbers_length
|
|
278
|
+
offset += @alphabet[numbers[i] % alphabet_length] + i
|
|
279
|
+
i += 1
|
|
280
|
+
end
|
|
281
|
+
offset %= alphabet_length
|
|
282
|
+
offset = (offset + increment) % alphabet_length
|
|
283
|
+
|
|
284
|
+
prefix = @alphabet[offset]
|
|
285
|
+
alphabet = rotate_and_reverse_alphabet(@alphabet, offset)
|
|
286
|
+
id = [prefix]
|
|
287
|
+
|
|
288
|
+
i = 0
|
|
289
|
+
while i < numbers_length
|
|
290
|
+
to_id(id, numbers[i], alphabet)
|
|
291
|
+
|
|
292
|
+
if i < numbers_length - 1
|
|
293
|
+
id.push(alphabet[0])
|
|
294
|
+
alphabet = shuffle(alphabet)
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
i += 1
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
if @min_length > id.length
|
|
301
|
+
id << alphabet[0]
|
|
302
|
+
|
|
303
|
+
while (@min_length - id.length) > 0
|
|
304
|
+
alphabet = shuffle(alphabet)
|
|
305
|
+
slice_length = [@min_length - id.length, alphabet.length].min
|
|
306
|
+
alphabet_slice = alphabet.slice(0, slice_length) #: Array[Integer]
|
|
307
|
+
id.concat alphabet_slice
|
|
308
|
+
end
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
id = id.pack("U*")
|
|
312
|
+
|
|
313
|
+
id = encode_numbers(numbers, increment: increment + 1) if blocked_id?(id)
|
|
314
|
+
|
|
315
|
+
id
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Converts a single number into its encoded representation and appends to the ID
|
|
319
|
+
#
|
|
320
|
+
# This implements a base conversion algorithm where:
|
|
321
|
+
# - The base is (alphabet_length - 1) because the first character is reserved as separator
|
|
322
|
+
# - Characters are added at the start_index position (building the number representation)
|
|
323
|
+
# - The process continues until the number is fully converted
|
|
324
|
+
#
|
|
325
|
+
# The algorithm repeatedly:
|
|
326
|
+
# 1. Takes the remainder (mod alphabet_length - 1) to get the next character index
|
|
327
|
+
# 2. Adds 1 to skip the first character (reserved as separator)
|
|
328
|
+
# 3. Inserts the character into the ID
|
|
329
|
+
# 4. Divides the number by the base to continue with the quotient
|
|
330
|
+
#
|
|
331
|
+
# @param id [Array<Integer>] The ID array being built (modified in place)
|
|
332
|
+
# @param num [Integer] The number to encode
|
|
333
|
+
# @param alphabet [Array<Integer>] The alphabet to use for encoding
|
|
334
|
+
# @return [void] Modifies id in place
|
|
335
|
+
#
|
|
336
|
+
# @rbs (Array[Integer] id, Integer num, Array[Integer] alphabet) -> void
|
|
337
|
+
def to_id(id, num, alphabet)
|
|
338
|
+
result = num
|
|
339
|
+
start_index = id.length
|
|
340
|
+
# We are effectively removing the first character of the alphabet, hence the -1 on length and the +1 on the index
|
|
341
|
+
alphabet_length = alphabet.length - 1
|
|
342
|
+
while true # rubocop:disable Style/InfiniteLoop
|
|
343
|
+
new_char_index = (result % alphabet_length) + 1
|
|
344
|
+
new_char = alphabet[new_char_index]
|
|
345
|
+
id.insert(start_index, new_char)
|
|
346
|
+
result /= alphabet_length
|
|
347
|
+
break if result <= 0
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Converts an encoded ID chunk back into its original number
|
|
352
|
+
#
|
|
353
|
+
# This is the inverse of to_id, implementing base conversion from the custom alphabet
|
|
354
|
+
# back to a decimal integer. It processes each character in the ID chunk, treating it
|
|
355
|
+
# as a digit in a positional number system with base (alphabet_length - 1).
|
|
356
|
+
#
|
|
357
|
+
# The algorithm:
|
|
358
|
+
# 1. Finds each character's index in the alphabet
|
|
359
|
+
# 2. Subtracts 1 (because we added 1 during encoding to skip separator)
|
|
360
|
+
# 3. Multiplies accumulator by base and adds the digit value
|
|
361
|
+
#
|
|
362
|
+
# @param id [Array<Integer>] The encoded ID chunk (character codepoints)
|
|
363
|
+
# @param alphabet [Array<Integer>] The alphabet used during encoding
|
|
364
|
+
# @return [Integer] The decoded number
|
|
365
|
+
# @raise [RuntimeError] If a character is not found in the alphabet
|
|
366
|
+
#
|
|
367
|
+
# @rbs (Array[Integer] id, Array[Integer] alphabet) -> Integer
|
|
368
|
+
def to_number(id, alphabet)
|
|
369
|
+
# We are effectively removing the first character of the alphabet, hence the -1 on length and the -1 on the index
|
|
370
|
+
alphabet_length = alphabet.length - 1
|
|
371
|
+
id.reduce(0) do |a, v|
|
|
372
|
+
v_index = alphabet.index(v)
|
|
373
|
+
raise "Character #{v} not found in alphabet" if v_index.nil?
|
|
374
|
+
(a * alphabet_length) + v_index - 1
|
|
375
|
+
end
|
|
376
|
+
end
|
|
377
|
+
|
|
378
|
+
# Checks if a generated ID contains any blocklisted words
|
|
379
|
+
#
|
|
380
|
+
# The filtering rules vary by word and ID length:
|
|
381
|
+
# - For very short IDs/words (≤3 chars): requires exact match
|
|
382
|
+
# - For words containing digits: checks if ID starts or ends with the word
|
|
383
|
+
# - For other words: checks if word appears anywhere in the ID
|
|
384
|
+
#
|
|
385
|
+
# This helps prevent offensive or inappropriate words from appearing in generated IDs
|
|
386
|
+
# while minimizing false positives.
|
|
387
|
+
#
|
|
388
|
+
# @param id [String] The generated ID to check
|
|
389
|
+
# @return [Boolean] true if the ID contains a blocklisted word, false otherwise
|
|
390
|
+
#
|
|
391
|
+
# @rbs (String id) -> bool
|
|
392
|
+
def blocked_id?(id)
|
|
393
|
+
id = id.downcase
|
|
394
|
+
|
|
395
|
+
@blocklist.any? do |word|
|
|
396
|
+
if word.length <= id.length
|
|
397
|
+
if id.length <= 3 || word.length <= 3
|
|
398
|
+
id == word
|
|
399
|
+
elsif word.match?(/\d/)
|
|
400
|
+
id.start_with?(word) || id.end_with?(word)
|
|
401
|
+
else
|
|
402
|
+
id.include?(word)
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
# Checks if a string contains any multibyte (non-ASCII) characters
|
|
409
|
+
#
|
|
410
|
+
# Sqids requires single-byte characters only because:
|
|
411
|
+
# - The algorithm uses character codepoints (ord) for shuffling and encoding
|
|
412
|
+
# - Multibyte characters would complicate the mathematical operations
|
|
413
|
+
# - Single-byte ensures consistent behavior across different Ruby versions/platforms
|
|
414
|
+
#
|
|
415
|
+
# @param input_str [String] The string to check
|
|
416
|
+
# @return [Boolean] true if any character requires multiple bytes, false otherwise
|
|
417
|
+
#
|
|
418
|
+
# @rbs (String input_str) -> bool
|
|
419
|
+
def contains_multibyte_chars?(input_str)
|
|
420
|
+
input_str.each_char do |char|
|
|
421
|
+
return true if char.bytesize > 1
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
false
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
# Rotates and reverses the alphabet based on an offset
|
|
428
|
+
#
|
|
429
|
+
# This transformation is a crucial part of the Sqids algorithm:
|
|
430
|
+
# - Rotation: moves elements from the start to the end by 'offset' positions
|
|
431
|
+
# - Reversal: reverses the entire array order
|
|
432
|
+
#
|
|
433
|
+
# These operations ensure that:
|
|
434
|
+
# - Different input numbers produce different alphabet arrangements
|
|
435
|
+
# - The transformation is deterministic and reproducible during decoding
|
|
436
|
+
# - Sequential numbers don't produce predictable patterns
|
|
437
|
+
#
|
|
438
|
+
# Both encoder and decoder use this to synchronize their alphabet state.
|
|
439
|
+
#
|
|
440
|
+
# @param alphabet [Array<Integer>] The alphabet to transform (character codepoints)
|
|
441
|
+
# @param offset [Integer] Number of positions to rotate
|
|
442
|
+
# @return [Array<Integer>] A new rotated and reversed alphabet
|
|
443
|
+
#
|
|
444
|
+
# @example
|
|
445
|
+
# rotate_and_reverse_alphabet([1,2,3,4,5], 2)
|
|
446
|
+
# # => [5, 4, 1, 2, 3] (rotated by 2: [3,4,5,1,2], then reversed)
|
|
447
|
+
#
|
|
448
|
+
# @rbs (Array[Integer] alphabet, Integer offset) -> Array[Integer]
|
|
449
|
+
def rotate_and_reverse_alphabet(alphabet, offset)
|
|
450
|
+
rotated_alphabet = alphabet.dup
|
|
451
|
+
rotated_alphabet.rotate!(offset)
|
|
452
|
+
rotated_alphabet.reverse!
|
|
453
|
+
end
|
|
454
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
module EncodedId
|
|
6
|
+
module Encoders
|
|
7
|
+
# Encoder implementation using the Sqids algorithm for encoding/decoding IDs.
|
|
8
|
+
class Sqids
|
|
9
|
+
# @rbs @sqids: untyped
|
|
10
|
+
# @rbs @min_hash_length: Integer
|
|
11
|
+
# @rbs @alphabet: Alphabet
|
|
12
|
+
# @rbs @blocklist: Blocklist
|
|
13
|
+
# @rbs @blocklist_mode: Symbol
|
|
14
|
+
# @rbs @blocklist_max_length: Integer
|
|
15
|
+
|
|
16
|
+
# @rbs (?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist blocklist, ?Symbol blocklist_mode, ?Integer blocklist_max_length) -> void
|
|
17
|
+
def initialize(min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = Blocklist.empty, blocklist_mode = :length_threshold, blocklist_max_length = 32)
|
|
18
|
+
@min_hash_length = min_hash_length
|
|
19
|
+
@alphabet = alphabet
|
|
20
|
+
@blocklist = blocklist
|
|
21
|
+
@blocklist_mode = blocklist_mode
|
|
22
|
+
@blocklist_max_length = blocklist_max_length
|
|
23
|
+
|
|
24
|
+
@sqids = ::SqidsWithBlocklistMode.new(
|
|
25
|
+
{
|
|
26
|
+
min_length: min_hash_length,
|
|
27
|
+
alphabet: alphabet.characters,
|
|
28
|
+
blocklist: blocklist,
|
|
29
|
+
blocklist_mode: blocklist_mode,
|
|
30
|
+
blocklist_max_length: blocklist_max_length
|
|
31
|
+
}
|
|
32
|
+
)
|
|
33
|
+
rescue TypeError, ArgumentError => error
|
|
34
|
+
raise InvalidInputError, "unable to create sqids instance: #{error.message}"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
attr_reader :min_hash_length #: Integer
|
|
38
|
+
attr_reader :alphabet #: Alphabet
|
|
39
|
+
attr_reader :blocklist #: Blocklist
|
|
40
|
+
|
|
41
|
+
# @rbs (Array[Integer] numbers) -> String
|
|
42
|
+
def encode(numbers)
|
|
43
|
+
numbers.all? { Integer(_1) } # raises if conversion fails
|
|
44
|
+
return "" if numbers.empty? || numbers.any?(&:negative?)
|
|
45
|
+
|
|
46
|
+
@sqids.encode(numbers)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# @rbs (String hash) -> Array[Integer]
|
|
50
|
+
def decode(hash)
|
|
51
|
+
return [] if hash.nil? || hash.empty?
|
|
52
|
+
|
|
53
|
+
@sqids.decode(hash)
|
|
54
|
+
rescue
|
|
55
|
+
raise InvalidInputError, "unable to unhash"
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
module EncodedId
|
|
6
|
+
module Encoders
|
|
7
|
+
# Configuration for Sqids encoder
|
|
8
|
+
# Sqids does not use a salt - it shuffles the alphabet deterministically
|
|
9
|
+
class SqidsConfiguration < BaseConfiguration
|
|
10
|
+
# @rbs () -> Symbol
|
|
11
|
+
def encoder_type
|
|
12
|
+
:sqids
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# Create the Sqids encoder instance
|
|
16
|
+
# @rbs () -> Sqids
|
|
17
|
+
def create_encoder
|
|
18
|
+
Sqids.new(min_length, alphabet, blocklist, blocklist_mode, blocklist_max_length)
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
# Extension of MySqids (vendored Sqids) that adds blocklist mode support.
|
|
6
|
+
# This subclass overrides blocklist checking to support different modes
|
|
7
|
+
# without modifying the vendored library.
|
|
8
|
+
# In the future, the base class can be changed from MySqids to ::Sqids::Sqids
|
|
9
|
+
# once we use the official gem.
|
|
10
|
+
class SqidsWithBlocklistMode < MySqids
|
|
11
|
+
# @rbs @blocklist_mode: Symbol
|
|
12
|
+
# @rbs @blocklist_max_length: Integer
|
|
13
|
+
|
|
14
|
+
# @rbs (?Hash[Symbol, untyped] options) -> void
|
|
15
|
+
def initialize(options = {})
|
|
16
|
+
@blocklist_mode = options[:blocklist_mode] || :length_threshold
|
|
17
|
+
@blocklist_max_length = options[:blocklist_max_length] || 32
|
|
18
|
+
|
|
19
|
+
# Remove our custom options before passing to parent
|
|
20
|
+
parent_options = options.dup
|
|
21
|
+
parent_options.delete(:blocklist_mode)
|
|
22
|
+
parent_options.delete(:blocklist_max_length)
|
|
23
|
+
|
|
24
|
+
super(parent_options)
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
private
|
|
28
|
+
|
|
29
|
+
# Override blocked_id? to implement blocklist mode logic
|
|
30
|
+
# @rbs (String id) -> bool
|
|
31
|
+
def blocked_id?(id)
|
|
32
|
+
return false unless check_blocklist?(id)
|
|
33
|
+
|
|
34
|
+
super
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Determines if blocklist checking should be performed based on mode and ID length
|
|
38
|
+
# @rbs (String id) -> bool
|
|
39
|
+
def check_blocklist?(id)
|
|
40
|
+
return false if @blocklist.empty?
|
|
41
|
+
|
|
42
|
+
case @blocklist_mode
|
|
43
|
+
when :always
|
|
44
|
+
true
|
|
45
|
+
when :length_threshold
|
|
46
|
+
id.length <= @blocklist_max_length
|
|
47
|
+
when :raise_if_likely
|
|
48
|
+
# This mode raises at configuration time, so if we get here, we check
|
|
49
|
+
true
|
|
50
|
+
else
|
|
51
|
+
true
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|