encoded_id 1.0.0.rc5 → 1.0.0.rc7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +99 -3
  3. data/README.md +86 -329
  4. data/context/encoded_id.md +437 -0
  5. data/lib/encoded_id/alphabet.rb +34 -3
  6. data/lib/encoded_id/blocklist.rb +100 -0
  7. data/lib/encoded_id/encoders/base_configuration.rb +154 -0
  8. data/lib/encoded_id/encoders/hashid.rb +527 -0
  9. data/lib/encoded_id/encoders/hashid_configuration.rb +40 -0
  10. data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb +110 -0
  11. data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb +244 -0
  12. data/lib/encoded_id/encoders/hashid_salt.rb +51 -0
  13. data/lib/encoded_id/encoders/my_sqids.rb +454 -0
  14. data/lib/encoded_id/encoders/sqids.rb +59 -0
  15. data/lib/encoded_id/encoders/sqids_configuration.rb +22 -0
  16. data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +54 -0
  17. data/lib/encoded_id/hex_representation.rb +29 -14
  18. data/lib/encoded_id/reversible_id.rb +115 -82
  19. data/lib/encoded_id/version.rb +3 -1
  20. data/lib/encoded_id.rb +34 -4
  21. metadata +34 -26
  22. data/.devcontainer/Dockerfile +0 -9
  23. data/.devcontainer/compose.yml +0 -8
  24. data/.devcontainer/devcontainer.json +0 -8
  25. data/.standard.yml +0 -2
  26. data/Gemfile +0 -36
  27. data/Rakefile +0 -20
  28. data/Steepfile +0 -5
  29. data/ext/encoded_id/extconf.rb +0 -3
  30. data/ext/encoded_id/extension.c +0 -123
  31. data/ext/encoded_id/hashids.c +0 -939
  32. data/ext/encoded_id/hashids.h +0 -139
  33. data/lib/encoded_id/hash_id.rb +0 -227
  34. data/lib/encoded_id/hash_id_consistent_shuffle.rb +0 -27
  35. data/lib/encoded_id/hash_id_salt.rb +0 -15
  36. data/lib/encoded_id/ordinal_alphabet_separator_guards.rb +0 -90
  37. data/rbs_collection.yaml +0 -24
  38. data/sig/encoded_id.rbs +0 -189
@@ -0,0 +1,454 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ # Sqids (pronounced "squids") is a library that generates short, unique, non-sequential IDs
6
+ # from numbers. It's useful for obfuscating database IDs, creating URL-friendly identifiers,
7
+ # and generating human-readable codes.
8
+ #
9
+ # Key features:
10
+ # - Reversible: encoded IDs can be decoded back to the original numbers
11
+ # - Customizable: supports custom alphabets, minimum lengths, and blocklists
12
+ # - Collision-free: same input always produces the same output
13
+ # - Blocklist filtering: automatically regenerates IDs that contain blocked words
14
+ #
15
+ # The algorithm uses a shuffling mechanism based on the input numbers to select characters
16
+ # from a customized alphabet, ensuring that sequential numbers produce non-sequential IDs.
17
+ #
18
+ class MySqids
19
+ # @rbs @alphabet: Array[Integer]
20
+ # @rbs @min_length: Integer
21
+ # @rbs @blocklist: (Array[String] | Set[String])
22
+
23
+ DEFAULT_ALPHABET = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
24
+
25
+ # Default minimum length of 0 means no padding is applied to generated IDs
26
+ DEFAULT_MIN_LENGTH = 0
27
+ # rubocop:disable Metrics/CollectionLiteralLength, Layout/LineLength
28
+ # Default blocklist containing words that should not appear in generated IDs
29
+ # The blocklist prevents offensive or inappropriate words from appearing in IDs by
30
+ # regenerating IDs that contain these patterns.
31
+ DEFAULT_BLOCKLIST = %w[0rgasm 1d10t 1d1ot 1di0t 1diot 1eccacu10 1eccacu1o 1eccacul0
32
+ 1eccaculo 1mbec11e 1mbec1le 1mbeci1e 1mbecile a11upat0 a11upato a1lupat0 a1lupato aand ah01e ah0le aho1e ahole al1upat0 al1upato allupat0 allupato ana1 ana1e anal anale anus arrapat0 arrapato arsch arse ass b00b b00be b01ata b0ceta b0iata b0ob b0obe b0sta b1tch b1te b1tte ba1atkar balatkar bastard0 bastardo batt0na battona bitch bite bitte bo0b bo0be bo1ata boceta boiata boob boobe bosta bran1age bran1er bran1ette bran1eur bran1euse branlage branler branlette branleur branleuse c0ck c0g110ne c0g11one c0g1i0ne c0g1ione c0gl10ne c0gl1one c0gli0ne c0glione c0na c0nnard c0nnasse c0nne c0u111es c0u11les c0u1l1es c0u1lles c0ui11es c0ui1les c0uil1es c0uilles c11t c11t0 c11to c1it c1it0 c1ito cabr0n cabra0 cabrao cabron caca cacca cacete cagante cagar cagare cagna cara1h0 cara1ho caracu10 caracu1o caracul0 caraculo caralh0 caralho cazz0 cazz1mma cazzata cazzimma cazzo ch00t1a ch00t1ya ch00tia ch00tiya ch0d ch0ot1a ch0ot1ya ch0otia ch0otiya ch1asse ch1avata ch1er ch1ng0 ch1ngadaz0s ch1ngadazos ch1ngader1ta ch1ngaderita ch1ngar ch1ngo ch1ngues ch1nk chatte chiasse chiavata chier ching0 chingadaz0s chingadazos chingader1ta chingaderita chingar chingo chingues chink cho0t1a cho0t1ya cho0tia cho0tiya chod choot1a choot1ya chootia chootiya cl1t cl1t0 cl1to clit clit0 clito cock cog110ne cog11one cog1i0ne cog1ione cogl10ne cogl1one cogli0ne coglione cona connard connasse conne cou111es cou11les cou1l1es cou1lles coui11es coui1les couil1es couilles cracker crap cu10 cu1att0ne cu1attone cu1er0 cu1ero cu1o cul0 culatt0ne culattone culer0 culero culo cum cunt d11d0 d11do d1ck d1ld0 d1ldo damn de1ch deich depp di1d0 di1do dick dild0 dildo dyke encu1e encule enema enf01re enf0ire enfo1re enfoire estup1d0 estup1do estupid0 estupido etr0n etron f0da f0der f0ttere f0tters1 f0ttersi f0tze f0utre f1ca f1cker f1ga fag fica ficker figa foda foder fottere fotters1 fottersi fotze foutre fr0c10 fr0c1o fr0ci0 fr0cio fr0sc10 fr0sc1o fr0sci0 fr0scio froc10 froc1o froci0 frocio frosc10 frosc1o frosci0 froscio fuck g00 g0o g0u1ne g0uine gandu go0 goo gou1ne gouine gr0gnasse grognasse haram1 harami haramzade hund1n hundin id10t id1ot idi0t idiot imbec11e imbec1le imbeci1e imbecile j1zz jerk jizz k1ke kam1ne kamine kike leccacu10 leccacu1o leccacul0 leccaculo m1erda m1gn0tta m1gnotta m1nch1a m1nchia m1st mam0n mamahuev0 mamahuevo mamon masturbat10n masturbat1on masturbate masturbati0n masturbation merd0s0 merd0so merda merde merdos0 merdoso mierda mign0tta mignotta minch1a minchia mist musch1 muschi n1gger neger negr0 negre negro nerch1a nerchia nigger orgasm p00p p011a p01la p0l1a p0lla p0mp1n0 p0mp1no p0mpin0 p0mpino p0op p0rca p0rn p0rra p0uff1asse p0uffiasse p1p1 p1pi p1r1a p1rla p1sc10 p1sc1o p1sci0 p1scio p1sser pa11e pa1le pal1e palle pane1e1r0 pane1e1ro pane1eir0 pane1eiro panele1r0 panele1ro paneleir0 paneleiro patakha pec0r1na pec0rina pecor1na pecorina pen1s pendej0 pendejo penis pip1 pipi pir1a pirla pisc10 pisc1o pisci0 piscio pisser po0p po11a po1la pol1a polla pomp1n0 pomp1no pompin0 pompino poop porca porn porra pouff1asse pouffiasse pr1ck prick pussy put1za puta puta1n putain pute putiza puttana queca r0mp1ba11e r0mp1ba1le r0mp1bal1e r0mp1balle r0mpiba11e r0mpiba1le r0mpibal1e r0mpiballe rand1 randi rape recch10ne recch1one recchi0ne recchione retard romp1ba11e romp1ba1le romp1bal1e romp1balle rompiba11e rompiba1le rompibal1e rompiballe ruff1an0 ruff1ano ruffian0 ruffiano s1ut sa10pe sa1aud sa1ope sacanagem sal0pe salaud salope saugnapf sb0rr0ne sb0rra sb0rrone sbattere sbatters1 sbattersi sborr0ne sborra sborrone sc0pare sc0pata sch1ampe sche1se sche1sse scheise scheisse schlampe schwachs1nn1g schwachs1nnig schwachsinn1g schwachsinnig schwanz scopare scopata sexy sh1t shit slut sp0mp1nare sp0mpinare spomp1nare spompinare str0nz0 str0nza str0nzo stronz0 stronza stronzo stup1d stupid succh1am1 succh1ami succhiam1 succhiami sucker t0pa tapette test1c1e test1cle testic1e testicle tette topa tr01a tr0ia tr0mbare tr1ng1er tr1ngler tring1er tringler tro1a troia trombare turd twat vaffancu10 vaffancu1o vaffancul0 vaffanculo vag1na vagina verdammt verga w1chsen wank wichsen x0ch0ta x0chota xana xoch0ta xochota z0cc01a z0cc0la z0cco1a z0ccola z1z1 z1zi ziz1 zizi zocc01a zocc0la zocco1a zoccola].freeze
33
+ # rubocop:enable Metrics/CollectionLiteralLength, Layout/LineLength
34
+
35
+ # Maximum integer value that can be encoded
36
+ # Uses Integer::MAX if available (Ruby 2.4+), otherwise calculates the max fixnum value
37
+ # based on the platform's word size
38
+ MAX_INT = defined?(Integer::MAX) ? Integer::MAX : ((2**((0.size * 8) - 2)) - 1)
39
+
40
+ # Returns the maximum integer value that can be safely encoded
41
+ # @rbs () -> Integer
42
+ def self.max_value
43
+ MAX_INT
44
+ end
45
+
46
+ # Initializes a new MySqids encoder with custom options
47
+ #
48
+ # @param options [Hash] Configuration options
49
+ # @option options [String, Array<String>] :alphabet Custom alphabet to use for encoding
50
+ # (default: DEFAULT_ALPHABET). Must be at least 3 characters and contain only single-byte chars.
51
+ # @option options [Integer] :min_length Minimum length for generated IDs (default: 0).
52
+ # IDs shorter than this will be padded. Must be between 0 and 255.
53
+ # @option options [Array<String>, Set<String>] :blocklist Words to exclude from generated IDs
54
+ # (default: DEFAULT_BLOCKLIST). Words must be at least 3 characters long.
55
+ #
56
+ # @raise [ArgumentError] If alphabet contains multibyte characters
57
+ # @raise [ArgumentError] If alphabet is shorter than 3 characters
58
+ # @raise [ArgumentError] If alphabet contains duplicate characters
59
+ # @raise [TypeError] If min_length is not between 0 and 255
60
+ #
61
+ # @rbs (?Hash[Symbol, untyped] options) -> void
62
+ def initialize(options = {})
63
+ alphabet = options[:alphabet] || DEFAULT_ALPHABET
64
+ min_length = options[:min_length] || DEFAULT_MIN_LENGTH
65
+ blocklist = options[:blocklist] || DEFAULT_BLOCKLIST
66
+
67
+ raise ArgumentError, "Alphabet cannot contain multibyte characters" if contains_multibyte_chars?(alphabet)
68
+ raise ArgumentError, "Alphabet length must be at least 3" if alphabet.length < 3
69
+
70
+ alphabet = alphabet.chars unless alphabet.is_a?(Array)
71
+
72
+ if alphabet.uniq.size != alphabet.length
73
+ raise ArgumentError,
74
+ "Alphabet must contain unique characters"
75
+ end
76
+
77
+ min_length_limit = 255
78
+ unless min_length.is_a?(Integer) && min_length >= 0 && min_length <= min_length_limit
79
+ raise TypeError,
80
+ "Minimum length has to be between 0 and #{min_length_limit}"
81
+ end
82
+
83
+ filtered_blocklist = if options[:blocklist].nil? && options[:alphabet].nil?
84
+ blocklist
85
+ else
86
+ downcased_alphabet = alphabet.map(&:downcase)
87
+ blocklist.select do |word|
88
+ word.length >= 3 && (word.downcase.chars - downcased_alphabet).empty?
89
+ end.to_set(&:downcase)
90
+ end
91
+
92
+ @alphabet = shuffle(alphabet.map(&:ord))
93
+ @min_length = min_length
94
+ @blocklist = filtered_blocklist
95
+ end
96
+
97
+ # Encodes an array of integers into a single Sqids string
98
+ #
99
+ # The encoding process:
100
+ # 1. Validates all numbers are in valid range (0 to MAX_INT)
101
+ # 2. Generates a prefix character based on the numbers and alphabet
102
+ # 3. Encodes each number using a shuffled alphabet
103
+ # 4. Separates encoded numbers with the first character of the shuffled alphabet
104
+ # 5. Pads the result if it's shorter than min_length
105
+ # 6. Regenerates if the result contains blocklisted words
106
+ #
107
+ # @param numbers [Array<Integer>] Array of non-negative integers to encode
108
+ # @return [String] The encoded Sqids string
109
+ # @raise [ArgumentError] If any number is outside the valid range (0 to MAX_INT)
110
+ #
111
+ # @example
112
+ # sqids.encode([1, 2, 3]) # => "86Rf07"
113
+ # sqids.encode([]) # => ""
114
+ #
115
+ # @rbs (Array[Integer] numbers) -> String
116
+ def encode(numbers)
117
+ return "" if numbers.empty?
118
+
119
+ # Validate that all numbers are within the acceptable range
120
+ in_range_numbers = numbers.filter_map { |n|
121
+ i = n.to_i
122
+ i if i.between?(0, MAX_INT)
123
+ }
124
+ unless in_range_numbers.length == numbers.length
125
+ raise ArgumentError,
126
+ "Encoding supports numbers between 0 and #{MAX_INT}"
127
+ end
128
+
129
+ encode_numbers(in_range_numbers)
130
+ end
131
+
132
+ # Decodes a Sqids string back into the original array of integers
133
+ #
134
+ # The decoding process mirrors the encoding:
135
+ # 1. Validates all characters exist in the alphabet
136
+ # 2. Extracts the prefix to determine the alphabet offset
137
+ # 3. Rotates and reverses the alphabet based on the offset
138
+ # 4. Splits the ID by separator characters (first char of shuffled alphabet)
139
+ # 5. Converts each chunk back to its original number
140
+ # 6. Re-shuffles the alphabet between chunks
141
+ #
142
+ # @param id [String] The Sqids string to decode
143
+ # @return [Array<Integer>] Array of integers that were encoded, or empty array if invalid
144
+ #
145
+ # @example
146
+ # sqids.decode("86Rf07") # => [1, 2, 3]
147
+ # sqids.decode("") # => []
148
+ # sqids.decode("xyz") # => [] (if 'xyz' contains invalid chars)
149
+ #
150
+ # @rbs (String id) -> Array[Integer]
151
+ def decode(id)
152
+ ret = [] #: Array[Integer]
153
+
154
+ return ret if id.empty?
155
+
156
+ id = id.codepoints
157
+
158
+ # Validate that all characters in the ID exist in our alphabet
159
+ # If any character is invalid, return empty array
160
+ id.each do |c|
161
+ return ret unless @alphabet.include?(c)
162
+ end
163
+
164
+ # Extract the prefix (first character) which determines the alphabet transformation
165
+ prefix = id[0]
166
+ offset = @alphabet.index(prefix)
167
+ # If prefix not found in alphabet, return empty (should never happen after validation)
168
+ return [] if offset.nil?
169
+
170
+ # Reconstruct the alphabet used during encoding
171
+ alphabet = rotate_and_reverse_alphabet(@alphabet, offset)
172
+
173
+ # Remove the prefix, leaving only the encoded number segments
174
+ id = id[1, id.length] || [] #: Array[Integer]
175
+
176
+ # Decode each segment separated by the separator character
177
+ while id.length.positive?
178
+ separator = alphabet[0]
179
+ chunks = split_array(id, separator)
180
+ if chunks.any?
181
+ # Empty chunk indicates invalid ID structure
182
+ return ret if chunks[0] == []
183
+
184
+ # Convert the chunk back to its original number
185
+ ret.push(to_number(chunks[0], alphabet))
186
+ # Re-shuffle alphabet before processing next segment (matches encoding)
187
+ alphabet = shuffle(alphabet) if chunks.length > 1
188
+ end
189
+
190
+ # Continue with the next segment, or empty array if no more segments
191
+ id = (chunks.length > 1) ? chunks[1] : []
192
+ end
193
+
194
+ ret
195
+ end
196
+
197
+ private
198
+
199
+ # Splits an array into two parts at the first occurrence of a separator
200
+ #
201
+ # This is used during decoding to split the encoded ID at separator characters,
202
+ # which mark the boundaries between encoded numbers.
203
+ #
204
+ # @param arr [Array<Integer>] The array to split (character codepoints)
205
+ # @param separator [Integer] The separator character codepoint to split on
206
+ # @return [Array<Array<Integer>>] An array containing the left part (before separator)
207
+ # and right part (after separator). If separator not found, returns [arr].
208
+ #
209
+ # @example
210
+ # split_array([1, 2, 3, 4, 5], 3) # => [[1, 2], [4, 5]]
211
+ # split_array([1, 2, 3], 9) # => [[1, 2, 3]]
212
+ #
213
+ # @rbs (Array[Integer] arr, Integer separator) -> Array[Array[Integer]]
214
+ def split_array(arr, separator)
215
+ index = arr.index(separator)
216
+ return [arr] if index.nil?
217
+
218
+ left = arr[0...index] #: Array[Integer]
219
+ right = arr[index + 1..] #: Array[Integer]
220
+
221
+ [left, right]
222
+ end
223
+
224
+ # Shuffles an array of character codepoints using a consistent, deterministic algorithm
225
+ #
226
+ # This is a key part of the Sqids algorithm. The shuffle is deterministic and reversible,
227
+ # meaning the same input always produces the same output. The algorithm performs a series
228
+ # of swaps based on the current index and character values.
229
+ #
230
+ # The shuffle ensures that:
231
+ # - Sequential numbers don't produce sequential IDs
232
+ # - The same alphabet configuration always produces the same shuffle
233
+ # - The transformation is reversible
234
+ #
235
+ # @param chars [Array<Integer>] Array of character codepoints to shuffle
236
+ # @return [Array<Integer>] The shuffled array (modifies in place and returns)
237
+ #
238
+ # @rbs (Array[Integer] chars) -> Array[Integer]
239
+ def shuffle(chars)
240
+ i = 0
241
+ length = chars.length
242
+ j = length - 1
243
+ while j > 0
244
+ r = ((i * j) + chars[i] + chars[j]) % length
245
+ chars[i], chars[r] = chars[r], chars[i]
246
+ i += 1
247
+ j -= 1
248
+ end
249
+
250
+ chars
251
+ end
252
+
253
+ # Internal method to encode an array of numbers into a Sqids string
254
+ #
255
+ # This is the core encoding logic. The process:
256
+ # 1. Calculates an offset based on the numbers and alphabet (ensures uniqueness)
257
+ # 2. Applies an increment if this is a retry (for blocklist filtering)
258
+ # 3. Selects a prefix character from the alphabet at the offset position
259
+ # 4. Rotates and reverses the alphabet based on the offset
260
+ # 5. Encodes each number and separates them with the first shuffled alphabet character
261
+ # 6. Pads to minimum length if needed
262
+ # 7. Checks against blocklist and retries with incremented offset if needed
263
+ #
264
+ # @param numbers [Array<Integer>] Array of integers to encode
265
+ # @param increment [Integer] Retry counter for blocklist collision avoidance (default: 0)
266
+ # @return [String] The encoded Sqids string
267
+ # @raise [ArgumentError] If max retry attempts (alphabet length) is exceeded
268
+ #
269
+ # @rbs (Array[Integer] numbers, ?increment: Integer) -> String
270
+ def encode_numbers(numbers, increment: 0)
271
+ alphabet_length = @alphabet.length
272
+ raise ArgumentError, "Reached max attempts to re-generate the ID" if increment > alphabet_length
273
+
274
+ numbers_length = numbers.length
275
+ offset = numbers_length
276
+ i = 0
277
+ while i < numbers_length
278
+ offset += @alphabet[numbers[i] % alphabet_length] + i
279
+ i += 1
280
+ end
281
+ offset %= alphabet_length
282
+ offset = (offset + increment) % alphabet_length
283
+
284
+ prefix = @alphabet[offset]
285
+ alphabet = rotate_and_reverse_alphabet(@alphabet, offset)
286
+ id = [prefix]
287
+
288
+ i = 0
289
+ while i < numbers_length
290
+ to_id(id, numbers[i], alphabet)
291
+
292
+ if i < numbers_length - 1
293
+ id.push(alphabet[0])
294
+ alphabet = shuffle(alphabet)
295
+ end
296
+
297
+ i += 1
298
+ end
299
+
300
+ if @min_length > id.length
301
+ id << alphabet[0]
302
+
303
+ while (@min_length - id.length) > 0
304
+ alphabet = shuffle(alphabet)
305
+ slice_length = [@min_length - id.length, alphabet.length].min
306
+ alphabet_slice = alphabet.slice(0, slice_length) #: Array[Integer]
307
+ id.concat alphabet_slice
308
+ end
309
+ end
310
+
311
+ id = id.pack("U*")
312
+
313
+ id = encode_numbers(numbers, increment: increment + 1) if blocked_id?(id)
314
+
315
+ id
316
+ end
317
+
318
+ # Converts a single number into its encoded representation and appends to the ID
319
+ #
320
+ # This implements a base conversion algorithm where:
321
+ # - The base is (alphabet_length - 1) because the first character is reserved as separator
322
+ # - Characters are added at the start_index position (building the number representation)
323
+ # - The process continues until the number is fully converted
324
+ #
325
+ # The algorithm repeatedly:
326
+ # 1. Takes the remainder (mod alphabet_length - 1) to get the next character index
327
+ # 2. Adds 1 to skip the first character (reserved as separator)
328
+ # 3. Inserts the character into the ID
329
+ # 4. Divides the number by the base to continue with the quotient
330
+ #
331
+ # @param id [Array<Integer>] The ID array being built (modified in place)
332
+ # @param num [Integer] The number to encode
333
+ # @param alphabet [Array<Integer>] The alphabet to use for encoding
334
+ # @return [void] Modifies id in place
335
+ #
336
+ # @rbs (Array[Integer] id, Integer num, Array[Integer] alphabet) -> void
337
+ def to_id(id, num, alphabet)
338
+ result = num
339
+ start_index = id.length
340
+ # We are effectively removing the first character of the alphabet, hence the -1 on length and the +1 on the index
341
+ alphabet_length = alphabet.length - 1
342
+ while true # rubocop:disable Style/InfiniteLoop
343
+ new_char_index = (result % alphabet_length) + 1
344
+ new_char = alphabet[new_char_index]
345
+ id.insert(start_index, new_char)
346
+ result /= alphabet_length
347
+ break if result <= 0
348
+ end
349
+ end
350
+
351
+ # Converts an encoded ID chunk back into its original number
352
+ #
353
+ # This is the inverse of to_id, implementing base conversion from the custom alphabet
354
+ # back to a decimal integer. It processes each character in the ID chunk, treating it
355
+ # as a digit in a positional number system with base (alphabet_length - 1).
356
+ #
357
+ # The algorithm:
358
+ # 1. Finds each character's index in the alphabet
359
+ # 2. Subtracts 1 (because we added 1 during encoding to skip separator)
360
+ # 3. Multiplies accumulator by base and adds the digit value
361
+ #
362
+ # @param id [Array<Integer>] The encoded ID chunk (character codepoints)
363
+ # @param alphabet [Array<Integer>] The alphabet used during encoding
364
+ # @return [Integer] The decoded number
365
+ # @raise [RuntimeError] If a character is not found in the alphabet
366
+ #
367
+ # @rbs (Array[Integer] id, Array[Integer] alphabet) -> Integer
368
+ def to_number(id, alphabet)
369
+ # We are effectively removing the first character of the alphabet, hence the -1 on length and the -1 on the index
370
+ alphabet_length = alphabet.length - 1
371
+ id.reduce(0) do |a, v|
372
+ v_index = alphabet.index(v)
373
+ raise "Character #{v} not found in alphabet" if v_index.nil?
374
+ (a * alphabet_length) + v_index - 1
375
+ end
376
+ end
377
+
378
+ # Checks if a generated ID contains any blocklisted words
379
+ #
380
+ # The filtering rules vary by word and ID length:
381
+ # - For very short IDs/words (≤3 chars): requires exact match
382
+ # - For words containing digits: checks if ID starts or ends with the word
383
+ # - For other words: checks if word appears anywhere in the ID
384
+ #
385
+ # This helps prevent offensive or inappropriate words from appearing in generated IDs
386
+ # while minimizing false positives.
387
+ #
388
+ # @param id [String] The generated ID to check
389
+ # @return [Boolean] true if the ID contains a blocklisted word, false otherwise
390
+ #
391
+ # @rbs (String id) -> bool
392
+ def blocked_id?(id)
393
+ id = id.downcase
394
+
395
+ @blocklist.any? do |word|
396
+ if word.length <= id.length
397
+ if id.length <= 3 || word.length <= 3
398
+ id == word
399
+ elsif word.match?(/\d/)
400
+ id.start_with?(word) || id.end_with?(word)
401
+ else
402
+ id.include?(word)
403
+ end
404
+ end
405
+ end
406
+ end
407
+
408
+ # Checks if a string contains any multibyte (non-ASCII) characters
409
+ #
410
+ # Sqids requires single-byte characters only because:
411
+ # - The algorithm uses character codepoints (ord) for shuffling and encoding
412
+ # - Multibyte characters would complicate the mathematical operations
413
+ # - Single-byte ensures consistent behavior across different Ruby versions/platforms
414
+ #
415
+ # @param input_str [String] The string to check
416
+ # @return [Boolean] true if any character requires multiple bytes, false otherwise
417
+ #
418
+ # @rbs (String input_str) -> bool
419
+ def contains_multibyte_chars?(input_str)
420
+ input_str.each_char do |char|
421
+ return true if char.bytesize > 1
422
+ end
423
+
424
+ false
425
+ end
426
+
427
+ # Rotates and reverses the alphabet based on an offset
428
+ #
429
+ # This transformation is a crucial part of the Sqids algorithm:
430
+ # - Rotation: moves elements from the start to the end by 'offset' positions
431
+ # - Reversal: reverses the entire array order
432
+ #
433
+ # These operations ensure that:
434
+ # - Different input numbers produce different alphabet arrangements
435
+ # - The transformation is deterministic and reproducible during decoding
436
+ # - Sequential numbers don't produce predictable patterns
437
+ #
438
+ # Both encoder and decoder use this to synchronize their alphabet state.
439
+ #
440
+ # @param alphabet [Array<Integer>] The alphabet to transform (character codepoints)
441
+ # @param offset [Integer] Number of positions to rotate
442
+ # @return [Array<Integer>] A new rotated and reversed alphabet
443
+ #
444
+ # @example
445
+ # rotate_and_reverse_alphabet([1,2,3,4,5], 2)
446
+ # # => [5, 4, 1, 2, 3] (rotated by 2: [3,4,5,1,2], then reversed)
447
+ #
448
+ # @rbs (Array[Integer] alphabet, Integer offset) -> Array[Integer]
449
+ def rotate_and_reverse_alphabet(alphabet, offset)
450
+ rotated_alphabet = alphabet.dup
451
+ rotated_alphabet.rotate!(offset)
452
+ rotated_alphabet.reverse!
453
+ end
454
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Encoder implementation using the Sqids algorithm for encoding/decoding IDs.
8
+ class Sqids
9
+ # @rbs @sqids: untyped
10
+ # @rbs @min_hash_length: Integer
11
+ # @rbs @alphabet: Alphabet
12
+ # @rbs @blocklist: Blocklist
13
+ # @rbs @blocklist_mode: Symbol
14
+ # @rbs @blocklist_max_length: Integer
15
+
16
+ # @rbs (?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist blocklist, ?Symbol blocklist_mode, ?Integer blocklist_max_length) -> void
17
+ def initialize(min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = Blocklist.empty, blocklist_mode = :length_threshold, blocklist_max_length = 32)
18
+ @min_hash_length = min_hash_length
19
+ @alphabet = alphabet
20
+ @blocklist = blocklist
21
+ @blocklist_mode = blocklist_mode
22
+ @blocklist_max_length = blocklist_max_length
23
+
24
+ @sqids = ::SqidsWithBlocklistMode.new(
25
+ {
26
+ min_length: min_hash_length,
27
+ alphabet: alphabet.characters,
28
+ blocklist: blocklist,
29
+ blocklist_mode: blocklist_mode,
30
+ blocklist_max_length: blocklist_max_length
31
+ }
32
+ )
33
+ rescue TypeError, ArgumentError => error
34
+ raise InvalidInputError, "unable to create sqids instance: #{error.message}"
35
+ end
36
+
37
+ attr_reader :min_hash_length #: Integer
38
+ attr_reader :alphabet #: Alphabet
39
+ attr_reader :blocklist #: Blocklist
40
+
41
+ # @rbs (Array[Integer] numbers) -> String
42
+ def encode(numbers)
43
+ numbers.all? { Integer(_1) } # raises if conversion fails
44
+ return "" if numbers.empty? || numbers.any?(&:negative?)
45
+
46
+ @sqids.encode(numbers)
47
+ end
48
+
49
+ # @rbs (String hash) -> Array[Integer]
50
+ def decode(hash)
51
+ return [] if hash.nil? || hash.empty?
52
+
53
+ @sqids.decode(hash)
54
+ rescue
55
+ raise InvalidInputError, "unable to unhash"
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ module EncodedId
6
+ module Encoders
7
+ # Configuration for Sqids encoder
8
+ # Sqids does not use a salt - it shuffles the alphabet deterministically
9
+ class SqidsConfiguration < BaseConfiguration
10
+ # @rbs () -> Symbol
11
+ def encoder_type
12
+ :sqids
13
+ end
14
+
15
+ # Create the Sqids encoder instance
16
+ # @rbs () -> Sqids
17
+ def create_encoder
18
+ Sqids.new(min_length, alphabet, blocklist, blocklist_mode, blocklist_max_length)
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,54 @@
1
+ # frozen_string_literal: true
2
+
3
+ # rbs_inline: enabled
4
+
5
+ # Extension of MySqids (vendored Sqids) that adds blocklist mode support.
6
+ # This subclass overrides blocklist checking to support different modes
7
+ # without modifying the vendored library.
8
+ # In the future, the base class can be changed from MySqids to ::Sqids::Sqids
9
+ # once we use the official gem.
10
+ class SqidsWithBlocklistMode < MySqids
11
+ # @rbs @blocklist_mode: Symbol
12
+ # @rbs @blocklist_max_length: Integer
13
+
14
+ # @rbs (?Hash[Symbol, untyped] options) -> void
15
+ def initialize(options = {})
16
+ @blocklist_mode = options[:blocklist_mode] || :length_threshold
17
+ @blocklist_max_length = options[:blocklist_max_length] || 32
18
+
19
+ # Remove our custom options before passing to parent
20
+ parent_options = options.dup
21
+ parent_options.delete(:blocklist_mode)
22
+ parent_options.delete(:blocklist_max_length)
23
+
24
+ super(parent_options)
25
+ end
26
+
27
+ private
28
+
29
+ # Override blocked_id? to implement blocklist mode logic
30
+ # @rbs (String id) -> bool
31
+ def blocked_id?(id)
32
+ return false unless check_blocklist?(id)
33
+
34
+ super
35
+ end
36
+
37
+ # Determines if blocklist checking should be performed based on mode and ID length
38
+ # @rbs (String id) -> bool
39
+ def check_blocklist?(id)
40
+ return false if @blocklist.empty?
41
+
42
+ case @blocklist_mode
43
+ when :always
44
+ true
45
+ when :length_threshold
46
+ id.length <= @blocklist_max_length
47
+ when :raise_if_likely
48
+ # This mode raises at configuration time, so if we get here, we check
49
+ true
50
+ else
51
+ true
52
+ end
53
+ end
54
+ end