encoded_id 1.0.0.rc5 → 1.0.0.rc7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +99 -3
- data/README.md +86 -329
- data/context/encoded_id.md +437 -0
- data/lib/encoded_id/alphabet.rb +34 -3
- data/lib/encoded_id/blocklist.rb +100 -0
- data/lib/encoded_id/encoders/base_configuration.rb +154 -0
- data/lib/encoded_id/encoders/hashid.rb +527 -0
- data/lib/encoded_id/encoders/hashid_configuration.rb +40 -0
- data/lib/encoded_id/encoders/hashid_consistent_shuffle.rb +110 -0
- data/lib/encoded_id/encoders/hashid_ordinal_alphabet_separator_guards.rb +244 -0
- data/lib/encoded_id/encoders/hashid_salt.rb +51 -0
- data/lib/encoded_id/encoders/my_sqids.rb +454 -0
- data/lib/encoded_id/encoders/sqids.rb +59 -0
- data/lib/encoded_id/encoders/sqids_configuration.rb +22 -0
- data/lib/encoded_id/encoders/sqids_with_blocklist_mode.rb +54 -0
- data/lib/encoded_id/hex_representation.rb +29 -14
- data/lib/encoded_id/reversible_id.rb +115 -82
- data/lib/encoded_id/version.rb +3 -1
- data/lib/encoded_id.rb +34 -4
- metadata +34 -26
- data/.devcontainer/Dockerfile +0 -9
- data/.devcontainer/compose.yml +0 -8
- data/.devcontainer/devcontainer.json +0 -8
- data/.standard.yml +0 -2
- data/Gemfile +0 -36
- data/Rakefile +0 -20
- data/Steepfile +0 -5
- data/ext/encoded_id/extconf.rb +0 -3
- data/ext/encoded_id/extension.c +0 -123
- data/ext/encoded_id/hashids.c +0 -939
- data/ext/encoded_id/hashids.h +0 -139
- data/lib/encoded_id/hash_id.rb +0 -227
- data/lib/encoded_id/hash_id_consistent_shuffle.rb +0 -27
- data/lib/encoded_id/hash_id_salt.rb +0 -15
- data/lib/encoded_id/ordinal_alphabet_separator_guards.rb +0 -90
- data/rbs_collection.yaml +0 -24
- data/sig/encoded_id.rbs +0 -189
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# rbs_inline: enabled
|
|
4
|
+
|
|
5
|
+
module EncodedId
|
|
6
|
+
module Encoders
|
|
7
|
+
# Base configuration class for encoder-specific settings
|
|
8
|
+
# This provides common parameters shared across all encoders
|
|
9
|
+
class BaseConfiguration
|
|
10
|
+
# @rbs @min_length: Integer
|
|
11
|
+
# @rbs @alphabet: Alphabet
|
|
12
|
+
# @rbs @split_at: Integer?
|
|
13
|
+
# @rbs @split_with: String?
|
|
14
|
+
# @rbs @hex_digit_encoding_group_size: Integer
|
|
15
|
+
# @rbs @max_length: Integer?
|
|
16
|
+
# @rbs @max_inputs_per_id: Integer
|
|
17
|
+
# @rbs @blocklist: Blocklist
|
|
18
|
+
# @rbs @blocklist_mode: Symbol
|
|
19
|
+
# @rbs @blocklist_max_length: Integer
|
|
20
|
+
|
|
21
|
+
attr_reader :min_length, :alphabet, :split_at, :split_with,
|
|
22
|
+
:hex_digit_encoding_group_size, :max_length,
|
|
23
|
+
:max_inputs_per_id, :blocklist, :blocklist_mode, :blocklist_max_length
|
|
24
|
+
|
|
25
|
+
# @rbs (?min_length: Integer, ?alphabet: Alphabet, ?split_at: Integer?, ?split_with: String?, ?hex_digit_encoding_group_size: Integer, ?max_length: Integer?, ?max_inputs_per_id: Integer, ?blocklist: Blocklist | Array[String] | Set[String] | nil, ?blocklist_mode: Symbol, ?blocklist_max_length: Integer) -> void
|
|
26
|
+
def initialize(
|
|
27
|
+
min_length: 8,
|
|
28
|
+
alphabet: Alphabet.modified_crockford,
|
|
29
|
+
split_at: 4,
|
|
30
|
+
split_with: "-",
|
|
31
|
+
hex_digit_encoding_group_size: 4,
|
|
32
|
+
max_length: 128,
|
|
33
|
+
max_inputs_per_id: 32,
|
|
34
|
+
blocklist: Blocklist.empty,
|
|
35
|
+
blocklist_mode: :length_threshold,
|
|
36
|
+
blocklist_max_length: 32
|
|
37
|
+
)
|
|
38
|
+
@min_length = validate_min_length(min_length)
|
|
39
|
+
@alphabet = validate_alphabet(alphabet)
|
|
40
|
+
@split_at = validate_split_at(split_at)
|
|
41
|
+
@split_with = validate_split_with(split_with, @alphabet)
|
|
42
|
+
@hex_digit_encoding_group_size = hex_digit_encoding_group_size
|
|
43
|
+
@max_length = validate_max_length(max_length)
|
|
44
|
+
@max_inputs_per_id = validate_max_inputs_per_id(max_inputs_per_id)
|
|
45
|
+
@blocklist = validate_blocklist(blocklist)
|
|
46
|
+
@blocklist = @blocklist.filter_for_alphabet(@alphabet) unless @blocklist.empty?
|
|
47
|
+
@blocklist_mode = validate_blocklist_mode(blocklist_mode)
|
|
48
|
+
@blocklist_max_length = validate_blocklist_max_length(blocklist_max_length)
|
|
49
|
+
validate_blocklist_collision_risk
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# @rbs () -> Symbol
|
|
53
|
+
def encoder_type
|
|
54
|
+
raise NotImplementedError, "Subclasses must implement encoder_type"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @rbs () -> untyped
|
|
58
|
+
def create_encoder
|
|
59
|
+
raise NotImplementedError, "Subclasses must implement create_encoder"
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
# @rbs (Alphabet alphabet) -> Alphabet
|
|
65
|
+
def validate_alphabet(alphabet)
|
|
66
|
+
return alphabet if alphabet.is_a?(Alphabet)
|
|
67
|
+
raise InvalidAlphabetError, "alphabet must be an instance of Alphabet"
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# @rbs (Integer min_length) -> Integer
|
|
71
|
+
def validate_min_length(min_length)
|
|
72
|
+
return min_length if valid_integer_option?(min_length)
|
|
73
|
+
raise InvalidConfigurationError, "min_length must be an integer greater than 0"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# @rbs (Integer? max_length) -> Integer?
|
|
77
|
+
def validate_max_length(max_length)
|
|
78
|
+
return max_length if valid_integer_option?(max_length) || max_length.nil?
|
|
79
|
+
raise InvalidConfigurationError, "max_length must be an integer greater than 0 or nil"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# @rbs (Integer max_inputs_per_id) -> Integer
|
|
83
|
+
def validate_max_inputs_per_id(max_inputs_per_id)
|
|
84
|
+
return max_inputs_per_id if valid_integer_option?(max_inputs_per_id)
|
|
85
|
+
raise InvalidConfigurationError, "max_inputs_per_id must be an integer greater than 0"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# @rbs (Integer? split_at) -> Integer?
|
|
89
|
+
def validate_split_at(split_at)
|
|
90
|
+
return split_at if valid_integer_option?(split_at) || split_at.nil?
|
|
91
|
+
raise InvalidConfigurationError, "split_at must be an integer greater than 0 or nil"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# @rbs (String? split_with, Alphabet alphabet) -> String?
|
|
95
|
+
def validate_split_with(split_with, alphabet)
|
|
96
|
+
return split_with if split_with.nil? || (split_with.is_a?(String) && !alphabet.characters.include?(split_with))
|
|
97
|
+
raise InvalidConfigurationError, "split_with must be a string not part of the alphabet, or nil"
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @rbs (Integer? value) -> bool
|
|
101
|
+
def valid_integer_option?(value)
|
|
102
|
+
value.is_a?(Integer) && value > 0
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# @rbs (Blocklist | Array[String] | Set[String] | nil blocklist) -> Blocklist
|
|
106
|
+
def validate_blocklist(blocklist)
|
|
107
|
+
return blocklist if blocklist.is_a?(Blocklist)
|
|
108
|
+
return Blocklist.empty if blocklist.nil?
|
|
109
|
+
return Blocklist.new(blocklist) if blocklist.is_a?(Array) || blocklist.is_a?(Set)
|
|
110
|
+
|
|
111
|
+
raise InvalidConfigurationError, "blocklist must be a Blocklist, Set, or Array of strings"
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# @rbs (Symbol blocklist_mode) -> Symbol
|
|
115
|
+
def validate_blocklist_mode(blocklist_mode)
|
|
116
|
+
valid_modes = [:always, :length_threshold, :raise_if_likely]
|
|
117
|
+
return blocklist_mode if valid_modes.include?(blocklist_mode)
|
|
118
|
+
|
|
119
|
+
raise InvalidConfigurationError, "blocklist_mode must be one of #{valid_modes.inspect}, got #{blocklist_mode.inspect}"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# @rbs (Integer blocklist_max_length) -> Integer
|
|
123
|
+
def validate_blocklist_max_length(blocklist_max_length)
|
|
124
|
+
return blocklist_max_length if valid_integer_option?(blocklist_max_length)
|
|
125
|
+
|
|
126
|
+
raise InvalidConfigurationError, "blocklist_max_length must be an integer greater than 0"
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Validates configuration for :raise_if_likely mode
|
|
130
|
+
# @rbs () -> void
|
|
131
|
+
def validate_blocklist_collision_risk
|
|
132
|
+
return if @blocklist.empty?
|
|
133
|
+
return unless @blocklist_mode == :raise_if_likely
|
|
134
|
+
|
|
135
|
+
# Check if min_length suggests long IDs
|
|
136
|
+
if @min_length > @blocklist_max_length
|
|
137
|
+
raise InvalidConfigurationError,
|
|
138
|
+
"blocklist_mode is :raise_if_likely and min_length (#{@min_length}) exceeds blocklist_max_length (#{@blocklist_max_length}). " \
|
|
139
|
+
"Long IDs have high collision probability with blocklists. " \
|
|
140
|
+
"Use blocklist_mode: :length_threshold or remove the blocklist."
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Check if max_inputs_per_id suggests long IDs
|
|
144
|
+
# Rough heuristic: encoding 100+ inputs typically results in long IDs
|
|
145
|
+
if @max_inputs_per_id > 100
|
|
146
|
+
raise InvalidConfigurationError,
|
|
147
|
+
"blocklist_mode is :raise_if_likely and max_inputs_per_id (#{@max_inputs_per_id}) is very high. " \
|
|
148
|
+
"Encoding many inputs typically results in long IDs with high blocklist collision probability. " \
|
|
149
|
+
"Use blocklist_mode: :length_threshold or remove the blocklist."
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
end
|
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# This implementation based on https://github.com/peterhellberg/hashids.rb
|
|
4
|
+
# --------------------------------------------------------------------------
|
|
5
|
+
# Original Hashids implementation is MIT licensed:
|
|
6
|
+
#
|
|
7
|
+
# Copyright (c) 2013-2017 Peter Hellberg
|
|
8
|
+
#
|
|
9
|
+
# MIT License
|
|
10
|
+
#
|
|
11
|
+
# Permission is hereby granted, free of charge, to any person obtaining
|
|
12
|
+
# a copy of this software and associated documentation files (the
|
|
13
|
+
# "Software"), to deal in the Software without restriction, including
|
|
14
|
+
# without limitation the rights to use, copy, modify, merge, publish,
|
|
15
|
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
|
16
|
+
# permit persons to whom the Software is furnished to do so, subject to
|
|
17
|
+
# the following conditions:
|
|
18
|
+
#
|
|
19
|
+
# The above copyright notice and this permission notice shall be
|
|
20
|
+
# included in all copies or substantial portions of the Software.
|
|
21
|
+
#
|
|
22
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
23
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
24
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
25
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
|
26
|
+
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
|
27
|
+
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
|
28
|
+
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
29
|
+
# --------------------------------------------------------------------------
|
|
30
|
+
#
|
|
31
|
+
# This version also MIT licensed (Stephen Ierodiaconou 2023-2025):
|
|
32
|
+
# see LICENSE.txt file
|
|
33
|
+
# rbs_inline: enabled
|
|
34
|
+
|
|
35
|
+
# == HashID Algorithm Overview
|
|
36
|
+
#
|
|
37
|
+
# Hashids is a small library that generates short, unique, non-sequential IDs from numbers.
|
|
38
|
+
# The algorithm has several key properties:
|
|
39
|
+
#
|
|
40
|
+
# 1. **Deterministic**: Same input numbers always produce the same hash
|
|
41
|
+
# 2. **Reversible**: You can decode the hash back to the original numbers
|
|
42
|
+
# 3. **Non-sequential**: Sequential numbers don't produce sequential hashes
|
|
43
|
+
# 4. **Customizable**: Uses a salt, minimum length, alphabet, and optional blocklist
|
|
44
|
+
#
|
|
45
|
+
# === Core Algorithm Concepts:
|
|
46
|
+
#
|
|
47
|
+
# The algorithm works by:
|
|
48
|
+
# - Converting each integer to a custom base-N representation using a shuffled alphabet
|
|
49
|
+
# - The alphabet permutation is deterministic based on a "lottery" character and salt
|
|
50
|
+
# - A lottery character is chosen based on a hash of the input numbers
|
|
51
|
+
# - Each number is encoded with a different alphabet permutation (for obfuscation)
|
|
52
|
+
# - Separators divide encoded numbers, and guards are added for minimum length
|
|
53
|
+
# - The decode process reverses this by extracting the lottery, splitting on separators,
|
|
54
|
+
# and converting each segment back from the custom base-N representation
|
|
55
|
+
#
|
|
56
|
+
# === Character Sets:
|
|
57
|
+
#
|
|
58
|
+
# - **Alphabet**: Main characters used to encode numbers (after setup, doesn't include separators/guards)
|
|
59
|
+
# - **Separators**: Characters that separate encoded number segments within a hash
|
|
60
|
+
# - **Guards**: Special characters added at boundaries to meet minimum length requirements
|
|
61
|
+
# - All three sets are disjoint (no overlap) after initialization
|
|
62
|
+
#
|
|
63
|
+
# === Why This Design?
|
|
64
|
+
#
|
|
65
|
+
# The shuffling and lottery system ensures that:
|
|
66
|
+
# - Similar numbers produce very different hashes (no sequential patterns)
|
|
67
|
+
# - Each position in a multi-number sequence uses a different encoding
|
|
68
|
+
# - The hash obfuscates the inputs if the salt is unknown
|
|
69
|
+
# - The same numbers always produce the same hash (deterministic)
|
|
70
|
+
|
|
71
|
+
module EncodedId
|
|
72
|
+
module Encoders
|
|
73
|
+
# Implementation of HashId, optimised and adapted from the original `hashid.rb` gem
|
|
74
|
+
class Hashid
|
|
75
|
+
include HashidConsistentShuffle
|
|
76
|
+
|
|
77
|
+
# @rbs @separators_and_guards: HashidOrdinalAlphabetSeparatorGuards
|
|
78
|
+
# @rbs @alphabet_ordinals: Array[Integer]
|
|
79
|
+
# @rbs @separator_ordinals: Array[Integer]
|
|
80
|
+
# @rbs @guard_ordinals: Array[Integer]
|
|
81
|
+
# @rbs @salt_ordinals: Array[Integer]
|
|
82
|
+
# @rbs @escaped_separator_selector: String
|
|
83
|
+
# @rbs @escaped_guards_selector: String
|
|
84
|
+
# @rbs @blocklist_mode: Symbol
|
|
85
|
+
# @rbs @blocklist_max_length: Integer
|
|
86
|
+
|
|
87
|
+
# Initialize a new HashId encoder with custom parameters.
|
|
88
|
+
#
|
|
89
|
+
# The initialization process sets up the character sets (alphabet, separators, guards)
|
|
90
|
+
# that will be used for encoding and decoding. These character sets are:
|
|
91
|
+
# 1. Shuffled based on the salt for uniqueness
|
|
92
|
+
# 2. Balanced in ratios (alphabet:separators ≈ 3.5:1, alphabet:guards ≈ 12:1)
|
|
93
|
+
# 3. Made disjoint (no character appears in multiple sets)
|
|
94
|
+
#
|
|
95
|
+
# @param salt [String] Secret salt used to shuffle the alphabet (empty string is valid)
|
|
96
|
+
# @param min_hash_length [Integer] Minimum length of generated hashes (0 for no minimum)
|
|
97
|
+
# @param alphabet [Alphabet] Character set to use for encoding
|
|
98
|
+
# @param blocklist [Blocklist?] Optional list of words that shouldn't appear in hashes
|
|
99
|
+
# @param blocklist_mode [Symbol] Mode for blocklist checking (:always, :length_threshold, :raise_if_likely)
|
|
100
|
+
# @param blocklist_max_length [Integer] Maximum ID length for blocklist checking (when mode is :length_threshold)
|
|
101
|
+
#
|
|
102
|
+
# @rbs (String salt, ?Integer min_hash_length, ?Alphabet alphabet, ?Blocklist? blocklist, ?Symbol blocklist_mode, ?Integer blocklist_max_length) -> void
|
|
103
|
+
def initialize(salt, min_hash_length = 0, alphabet = Alphabet.alphanum, blocklist = nil, blocklist_mode = :length_threshold, blocklist_max_length = 32)
|
|
104
|
+
unless min_hash_length.is_a?(Integer) && min_hash_length >= 0
|
|
105
|
+
raise ArgumentError, "The min length must be a Integer and greater than or equal to 0"
|
|
106
|
+
end
|
|
107
|
+
@min_hash_length = min_hash_length
|
|
108
|
+
@salt = salt
|
|
109
|
+
@alphabet = alphabet
|
|
110
|
+
@blocklist = blocklist
|
|
111
|
+
@blocklist_mode = blocklist_mode
|
|
112
|
+
@blocklist_max_length = blocklist_max_length
|
|
113
|
+
|
|
114
|
+
@separators_and_guards = HashidOrdinalAlphabetSeparatorGuards.new(alphabet, salt)
|
|
115
|
+
@alphabet_ordinals = @separators_and_guards.alphabet
|
|
116
|
+
@separator_ordinals = @separators_and_guards.seps
|
|
117
|
+
@guard_ordinals = @separators_and_guards.guards
|
|
118
|
+
@salt_ordinals = @separators_and_guards.salt
|
|
119
|
+
|
|
120
|
+
# Pre-compute escaped versions for use with String#tr during decoding.
|
|
121
|
+
# This escapes special regex characters like '-', '\\', and '^' for safe use in tr().
|
|
122
|
+
@escaped_separator_selector = @separators_and_guards.seps_tr_selector
|
|
123
|
+
@escaped_guards_selector = @separators_and_guards.guards_tr_selector
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
attr_reader :alphabet_ordinals #: Array[Integer]
|
|
127
|
+
attr_reader :separator_ordinals #: Array[Integer]
|
|
128
|
+
attr_reader :guard_ordinals #: Array[Integer]
|
|
129
|
+
attr_reader :salt_ordinals #: Array[Integer]
|
|
130
|
+
attr_reader :salt #: String
|
|
131
|
+
attr_reader :alphabet #: Alphabet
|
|
132
|
+
attr_reader :blocklist #: Blocklist?
|
|
133
|
+
attr_reader :min_hash_length #: Integer
|
|
134
|
+
|
|
135
|
+
# Encode an array of non-negative integers into a hash string.
|
|
136
|
+
#
|
|
137
|
+
# The encoding process:
|
|
138
|
+
# 1. Validates all numbers are integers and non-negative
|
|
139
|
+
# 2. Calculates a "lottery" character based on the input numbers
|
|
140
|
+
# 3. For each number, shuffles the alphabet and encodes the number in that custom base
|
|
141
|
+
# 4. Inserts separator characters between encoded numbers
|
|
142
|
+
# 5. Adds guards and padding if needed to meet minimum length
|
|
143
|
+
# 6. Validates the result doesn't contain blocklisted words
|
|
144
|
+
#
|
|
145
|
+
# @param numbers [Array<Integer>] Array of non-negative integers to encode
|
|
146
|
+
# @return [String] The encoded hash string (empty if input is empty or contains negatives)
|
|
147
|
+
# @raise [BlocklistError] If the generated hash contains a blocklisted word
|
|
148
|
+
#
|
|
149
|
+
# @rbs (Array[Integer] numbers) -> String
|
|
150
|
+
def encode(numbers)
|
|
151
|
+
numbers.all? { |n| Integer(n) }
|
|
152
|
+
|
|
153
|
+
return "" if numbers.empty? || numbers.any? { |n| n < 0 }
|
|
154
|
+
|
|
155
|
+
encoded = internal_encode(numbers)
|
|
156
|
+
if check_blocklist?(encoded)
|
|
157
|
+
blocked_word = contains_blocklisted_word?(encoded)
|
|
158
|
+
if blocked_word
|
|
159
|
+
raise EncodedId::BlocklistError, "Generated ID '#{encoded}' contains blocklisted word: '#{blocked_word}'"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
encoded
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
# Decode a hash string back into an array of integers.
|
|
167
|
+
#
|
|
168
|
+
# The decoding process:
|
|
169
|
+
# 1. Removes guards by replacing them with spaces and splitting
|
|
170
|
+
# 2. Extracts the lottery character (first character after guard removal)
|
|
171
|
+
# 3. Splits on separators to get individual encoded number segments
|
|
172
|
+
# 4. For each segment, shuffles the alphabet the same way as encoding and decodes
|
|
173
|
+
# 5. Verifies by re-encoding the result and comparing to the original hash
|
|
174
|
+
#
|
|
175
|
+
# This verification step is critical for valid decoding: it ensures that random strings
|
|
176
|
+
# won't decode to valid numbers. Only properly encoded hashes will pass.
|
|
177
|
+
#
|
|
178
|
+
# @param hash [String] The hash string to decode
|
|
179
|
+
# @return [Array<Integer>] Array of decoded integers (empty if hash is invalid)
|
|
180
|
+
#
|
|
181
|
+
# @rbs (String hash) -> Array[Integer]
|
|
182
|
+
def decode(hash)
|
|
183
|
+
return [] if hash.nil? || hash.empty?
|
|
184
|
+
|
|
185
|
+
internal_decode(hash)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
private
|
|
189
|
+
|
|
190
|
+
# Internal encoding implementation - converts numbers to a hash string.
|
|
191
|
+
#
|
|
192
|
+
# Algorithm steps:
|
|
193
|
+
#
|
|
194
|
+
# Step 1: Calculate the "lottery" character
|
|
195
|
+
# - Create a hash_int from the input numbers (weighted sum: num % (index + 100))
|
|
196
|
+
# - Use hash_int to pick a lottery character from the alphabet
|
|
197
|
+
# - The lottery becomes the first character and seeds all alphabet shuffles
|
|
198
|
+
#
|
|
199
|
+
# Step 2: Encode each number
|
|
200
|
+
# - For each number:
|
|
201
|
+
# a. Shuffle alphabet using (lottery + salt) as the shuffle key
|
|
202
|
+
# b. Convert number to custom base-N using shuffled alphabet (via hash_one_number)
|
|
203
|
+
# c. Insert a separator character between numbers (chosen deterministically)
|
|
204
|
+
# - Each number gets a different alphabet permutation due to the shuffle
|
|
205
|
+
#
|
|
206
|
+
# Step 3: Add guards if below minimum length
|
|
207
|
+
# - Guards are special boundary characters that don't encode data
|
|
208
|
+
# - First guard is prepended based on (hash_int + first_char)
|
|
209
|
+
# - Second guard is appended based on (hash_int + third_char)
|
|
210
|
+
#
|
|
211
|
+
# Step 4: Pad with alphabet if still below minimum length
|
|
212
|
+
# - Shuffle the alphabet using itself as the key
|
|
213
|
+
# - Wrap the hash with the shuffled alphabet (second half + hash + first half)
|
|
214
|
+
# - Trim excess from the middle if we overshoot the target length
|
|
215
|
+
#
|
|
216
|
+
# The result is a string where:
|
|
217
|
+
# - Structure: [guard?] lottery encoded_num1 sep encoded_num2 sep ... [guard?] [padding?]
|
|
218
|
+
# - Each component is deterministic based on the input numbers and salt
|
|
219
|
+
# - Similar inputs produce very different outputs due to the lottery system
|
|
220
|
+
#
|
|
221
|
+
# @param numbers [Array<Integer>] Non-negative integers to encode
|
|
222
|
+
# @return [String] The encoded hash string
|
|
223
|
+
#
|
|
224
|
+
# @rbs (Array[Integer] numbers) -> String
|
|
225
|
+
def internal_encode(numbers)
|
|
226
|
+
current_alphabet = @alphabet_ordinals.dup
|
|
227
|
+
separator_ordinals = @separator_ordinals
|
|
228
|
+
guard_ordinals = @guard_ordinals
|
|
229
|
+
|
|
230
|
+
alphabet_length = current_alphabet.length
|
|
231
|
+
length = numbers.length
|
|
232
|
+
|
|
233
|
+
# Step 1: Calculate lottery character using a weighted hash of all input numbers.
|
|
234
|
+
# The modulo (i + 100) ensures different positions contribute differently to the hash.
|
|
235
|
+
# We use a manual loop instead of Array#sum to avoid extra array allocation.
|
|
236
|
+
hash_int = 0
|
|
237
|
+
i = 0
|
|
238
|
+
while i < length
|
|
239
|
+
hash_int += numbers[i] % (i + 100)
|
|
240
|
+
i += 1
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# The lottery character is chosen deterministically from the alphabet.
|
|
244
|
+
# This becomes the first character of the hash AND the seed for all shuffles.
|
|
245
|
+
lottery = current_alphabet[hash_int % alphabet_length]
|
|
246
|
+
|
|
247
|
+
# This array will hold the final hash as character ordinals (codepoints).
|
|
248
|
+
# @type var hashid_code: Array[Integer]
|
|
249
|
+
hashid_code = []
|
|
250
|
+
hashid_code << lottery
|
|
251
|
+
|
|
252
|
+
# The "seasoning" is the shuffle key: lottery + salt.
|
|
253
|
+
# This same seasoning will be used to shuffle the alphabet for each number.
|
|
254
|
+
seasoning = [lottery].concat(@salt_ordinals)
|
|
255
|
+
|
|
256
|
+
# Reusable buffer for the pre-shuffle alphabet state to avoid allocations in the loop.
|
|
257
|
+
alphabet_buffer = current_alphabet.dup
|
|
258
|
+
|
|
259
|
+
# Step 2: Encode each number with its own alphabet permutation.
|
|
260
|
+
i = 0
|
|
261
|
+
while i < length
|
|
262
|
+
num = numbers[i]
|
|
263
|
+
|
|
264
|
+
# Shuffle the alphabet using the seasoning. This is deterministic but produces
|
|
265
|
+
# a different permutation than the original alphabet. Since we reshuffle on each
|
|
266
|
+
# iteration with the same key, we need to pass the pre-shuffle state as salt_part_2.
|
|
267
|
+
alphabet_buffer.replace(current_alphabet)
|
|
268
|
+
consistent_shuffle!(current_alphabet, seasoning, alphabet_buffer, alphabet_length)
|
|
269
|
+
|
|
270
|
+
# Convert this number to base-N using the current shuffled alphabet.
|
|
271
|
+
last_char_ord = hash_one_number(hashid_code, num, current_alphabet, alphabet_length)
|
|
272
|
+
|
|
273
|
+
# Add a separator between numbers (but not after the last number).
|
|
274
|
+
# The separator is chosen deterministically based on the encoded number and position.
|
|
275
|
+
if (i + 1) < length
|
|
276
|
+
num %= (last_char_ord + i)
|
|
277
|
+
hashid_code << separator_ordinals[num % separator_ordinals.length]
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
i += 1
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Step 3: Add guards if we're below the minimum length.
|
|
284
|
+
# Guards are boundary markers chosen deterministically from the guard set.
|
|
285
|
+
if hashid_code.length < @min_hash_length
|
|
286
|
+
# Prepend first guard based on hash_int and the lottery character.
|
|
287
|
+
guard_count = guard_ordinals.length
|
|
288
|
+
first_char = hashid_code[0] #: Integer
|
|
289
|
+
hashid_code.prepend(guard_ordinals[(hash_int + first_char) % guard_count])
|
|
290
|
+
|
|
291
|
+
# If still too short, append second guard based on hash_int and third character.
|
|
292
|
+
if hashid_code.length < @min_hash_length
|
|
293
|
+
# At this point hashid_code has at least 2 elements (lottery + guard), check for 3rd
|
|
294
|
+
third_char = hashid_code[2]
|
|
295
|
+
hashid_code << if third_char
|
|
296
|
+
guard_ordinals[(hash_int + third_char) % guard_count]
|
|
297
|
+
else
|
|
298
|
+
# If no third character exists, use 0 as default
|
|
299
|
+
guard_ordinals[hash_int % guard_count]
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
# Step 4: Pad with shuffled alphabet if still below minimum length.
|
|
305
|
+
half_length = alphabet_length.div(2)
|
|
306
|
+
|
|
307
|
+
while hashid_code.length < @min_hash_length
|
|
308
|
+
# Shuffle the alphabet using itself as the key (creates a new permutation).
|
|
309
|
+
consistent_shuffle!(current_alphabet, current_alphabet.dup, nil, alphabet_length)
|
|
310
|
+
|
|
311
|
+
# Wrap the hash: second_half + hash + first_half
|
|
312
|
+
second_half = current_alphabet[half_length..] #: Array[Integer]
|
|
313
|
+
first_half = current_alphabet[0, half_length] #: Array[Integer]
|
|
314
|
+
hashid_code.prepend(*second_half)
|
|
315
|
+
hashid_code.concat(first_half)
|
|
316
|
+
|
|
317
|
+
# If we've overshot the target, trim excess from the middle.
|
|
318
|
+
excess = hashid_code.length - @min_hash_length
|
|
319
|
+
if excess > 0
|
|
320
|
+
hashid_code = hashid_code[excess / 2, @min_hash_length] #: Array[Integer]
|
|
321
|
+
end
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Convert the array of character ordinals to a UTF-8 string.
|
|
325
|
+
hashid_code.pack("U*")
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
# Internal decoding implementation - converts a hash string back to numbers.
|
|
329
|
+
#
|
|
330
|
+
# Algorithm steps:
|
|
331
|
+
#
|
|
332
|
+
# Step 1: Remove guards
|
|
333
|
+
# - Replace all guard characters with spaces and split
|
|
334
|
+
# - Guards can appear at positions [0] or [0] and [-1]
|
|
335
|
+
# - If array has 2 or 3 elements, the middle one contains the actual hash
|
|
336
|
+
# - Otherwise, element [0] contains the hash
|
|
337
|
+
#
|
|
338
|
+
# Step 2: Extract lottery and split on separators
|
|
339
|
+
# - First character is the lottery (same as during encoding)
|
|
340
|
+
# - Replace separator characters with spaces and split
|
|
341
|
+
# - Each segment is an encoded number
|
|
342
|
+
#
|
|
343
|
+
# Step 3: Decode each number
|
|
344
|
+
# - For each segment:
|
|
345
|
+
# a. Shuffle alphabet using (lottery + salt) - same as encoding
|
|
346
|
+
# b. Convert from custom base-N back to integer (via unhash)
|
|
347
|
+
# - The alphabet shuffles must match the encoding shuffles exactly
|
|
348
|
+
#
|
|
349
|
+
# Step 4: Verify the result
|
|
350
|
+
# - Re-encode the decoded numbers and compare to original hash
|
|
351
|
+
# - If they don't match, return empty array
|
|
352
|
+
# - This prevents random strings from decoding to valid numbers
|
|
353
|
+
#
|
|
354
|
+
# @param hash [String] The hash string to decode
|
|
355
|
+
# @return [Array<Integer>] Decoded integers (empty if hash is invalid)
|
|
356
|
+
#
|
|
357
|
+
# @rbs (String hash) -> Array[Integer]
|
|
358
|
+
def internal_decode(hash)
|
|
359
|
+
# @type var ret: Array[Integer]
|
|
360
|
+
ret = []
|
|
361
|
+
current_alphabet = @alphabet_ordinals.dup
|
|
362
|
+
salt_ordinals = @salt_ordinals
|
|
363
|
+
|
|
364
|
+
# Step 1: Remove guards by replacing them with spaces and splitting.
|
|
365
|
+
# This separates the actual hash from any guard characters that were added.
|
|
366
|
+
breakdown = hash.tr(@escaped_guards_selector, " ")
|
|
367
|
+
array = breakdown.split(" ")
|
|
368
|
+
|
|
369
|
+
# If guards were present, the hash will be in the middle segment.
|
|
370
|
+
# - Length 1: No guards, hash is at [0]
|
|
371
|
+
# - Length 2: One guard, hash is at [1]
|
|
372
|
+
# - Length 3: Two guards, hash is at [1]
|
|
373
|
+
i = [3, 2].include?(array.length) ? 1 : 0
|
|
374
|
+
|
|
375
|
+
if (breakdown = array[i])
|
|
376
|
+
# Step 2: Extract the lottery character (first char) and the rest.
|
|
377
|
+
lottery = breakdown[0] #: String
|
|
378
|
+
remainder = breakdown[1..] || "" #: String
|
|
379
|
+
|
|
380
|
+
# Replace separator characters with spaces and split to get individual encoded numbers.
|
|
381
|
+
remainder.tr!(@escaped_separator_selector, " ")
|
|
382
|
+
sub_hashes = remainder.split(" ")
|
|
383
|
+
|
|
384
|
+
# Create the same seasoning used during encoding: lottery + salt.
|
|
385
|
+
seasoning = [lottery.ord].concat(salt_ordinals)
|
|
386
|
+
|
|
387
|
+
# Step 3: Decode each number segment.
|
|
388
|
+
len = sub_hashes.length
|
|
389
|
+
time = 0
|
|
390
|
+
while time < len
|
|
391
|
+
sub_hash = sub_hashes[time]
|
|
392
|
+
|
|
393
|
+
# Shuffle the alphabet exactly as we did during encoding.
|
|
394
|
+
# This must produce the same permutation to correctly decode.
|
|
395
|
+
consistent_shuffle!(current_alphabet, seasoning, current_alphabet.dup, current_alphabet.length)
|
|
396
|
+
|
|
397
|
+
# Convert this segment from base-N back to an integer.
|
|
398
|
+
ret.push unhash(sub_hash, current_alphabet)
|
|
399
|
+
time += 1
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Step 4: Verify by re-encoding and comparing.
|
|
403
|
+
# This is critical for validity: it ensures only valid hashes decode successfully.
|
|
404
|
+
if encode(ret) != hash
|
|
405
|
+
# @type var ret: Array[Integer]
|
|
406
|
+
ret = []
|
|
407
|
+
end
|
|
408
|
+
end
|
|
409
|
+
|
|
410
|
+
ret
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Convert a single integer to its representation in a custom base-N system.
|
|
414
|
+
#
|
|
415
|
+
# This is similar to converting a decimal number to binary, hex, etc., but:
|
|
416
|
+
# - Uses a custom alphabet instead of 0-9 or 0-9A-F
|
|
417
|
+
# - The alphabet can be any length (base-N where N = alphabet.length)
|
|
418
|
+
# - Characters are inserted in reverse order (most significant digit last)
|
|
419
|
+
#
|
|
420
|
+
# Example: Converting 123 to base-10 with alphabet ['a','b','c','d','e','f','g','h','i','j']
|
|
421
|
+
# - 123 % 10 = 3 → 'd' (index 3)
|
|
422
|
+
# - 12 % 10 = 2 → 'c' (index 2)
|
|
423
|
+
# - 1 % 10 = 1 → 'b' (index 1)
|
|
424
|
+
# - Result: "bcd" (but inserted in reverse, so appears as "bcd" in hash_code)
|
|
425
|
+
#
|
|
426
|
+
# @param hash_code [Array<Integer>] The array to append characters to (modified in place)
|
|
427
|
+
# @param num [Integer] The number to convert
|
|
428
|
+
# @param alphabet [Array<Integer>] The alphabet ordinals to use for encoding
|
|
429
|
+
# @param alphabet_length [Integer] Length of the alphabet (cached for performance)
|
|
430
|
+
# @return [Integer] The ordinal of the last character added
|
|
431
|
+
#
|
|
432
|
+
# @rbs (Array[Integer] hash_code, Integer num, Array[Integer] alphabet, Integer alphabet_length) -> Integer
|
|
433
|
+
def hash_one_number(hash_code, num, alphabet, alphabet_length)
|
|
434
|
+
char = 0 #: Integer
|
|
435
|
+
insert_at = 0
|
|
436
|
+
|
|
437
|
+
# Convert number to base-N by repeatedly dividing by alphabet_length.
|
|
438
|
+
# Insert characters at the end (using negative index) so they appear in correct order.
|
|
439
|
+
while true # standard:disable Style/InfiniteLoop
|
|
440
|
+
char = alphabet[num % alphabet_length] || 0
|
|
441
|
+
insert_at -= 1
|
|
442
|
+
hash_code.insert(insert_at, char)
|
|
443
|
+
num /= alphabet_length
|
|
444
|
+
break unless num > 0
|
|
445
|
+
end
|
|
446
|
+
|
|
447
|
+
char
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Convert a custom base-N encoded string back to an integer.
|
|
451
|
+
#
|
|
452
|
+
# This is the inverse of hash_one_number. It treats the input string as a number
|
|
453
|
+
# in a custom base where each character's position in the alphabet represents its digit value.
|
|
454
|
+
#
|
|
455
|
+
# Example: Decoding "bcd" with alphabet ['a','b','c','d','e','f','g','h','i','j'] (base-10)
|
|
456
|
+
# - 'b' at position 1: 1 × 10² = 100
|
|
457
|
+
# - 'c' at position 2: 2 × 10¹ = 20
|
|
458
|
+
# - 'd' at position 3: 3 × 10⁰ = 3
|
|
459
|
+
# - Result: 100 + 20 + 3 = 123
|
|
460
|
+
#
|
|
461
|
+
# @param input [String] The encoded string to decode
|
|
462
|
+
# @param alphabet [Array<Integer>] The alphabet ordinals used for encoding
|
|
463
|
+
# @return [Integer] The decoded number
|
|
464
|
+
# @raise [InvalidInputError] If input contains characters not in the alphabet
|
|
465
|
+
#
|
|
466
|
+
# @rbs (String input, Array[Integer] alphabet) -> Integer
|
|
467
|
+
def unhash(input, alphabet)
|
|
468
|
+
num = 0 #: Integer
|
|
469
|
+
input_length = input.length
|
|
470
|
+
alphabet_length = alphabet.length
|
|
471
|
+
i = 0
|
|
472
|
+
|
|
473
|
+
# Process each character from left to right (most significant to least).
|
|
474
|
+
while i < input_length
|
|
475
|
+
first_char = input[i] #: String
|
|
476
|
+
pos = alphabet.index(first_char.ord)
|
|
477
|
+
raise InvalidInputError, "unable to unhash" unless pos
|
|
478
|
+
|
|
479
|
+
# Calculate this digit's contribution: position_in_alphabet × base^exponent
|
|
480
|
+
exponent = input_length - i - 1
|
|
481
|
+
multiplier = alphabet_length**exponent #: Integer
|
|
482
|
+
num += pos * multiplier
|
|
483
|
+
i += 1
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
num
|
|
487
|
+
end
|
|
488
|
+
|
|
489
|
+
# Check if the encoded string contains any blocklisted words.
|
|
490
|
+
#
|
|
491
|
+
# Determines if blocklist checking should be performed based on mode and ID length
|
|
492
|
+
#
|
|
493
|
+
# @param encoded_string [String] The encoded ID to check
|
|
494
|
+
# @return [Boolean] True if blocklist should be checked
|
|
495
|
+
#
|
|
496
|
+
# @rbs (String encoded_string) -> bool
|
|
497
|
+
def check_blocklist?(encoded_string)
|
|
498
|
+
return false if !blocklist || blocklist.empty?
|
|
499
|
+
|
|
500
|
+
case @blocklist_mode
|
|
501
|
+
when :always
|
|
502
|
+
true
|
|
503
|
+
when :length_threshold
|
|
504
|
+
encoded_string.length <= @blocklist_max_length
|
|
505
|
+
when :raise_if_likely
|
|
506
|
+
# This mode raises at configuration time, so if we get here, we check
|
|
507
|
+
true
|
|
508
|
+
else
|
|
509
|
+
true
|
|
510
|
+
end
|
|
511
|
+
end
|
|
512
|
+
|
|
513
|
+
# @param encoded_string [String] The encoded hash to check
|
|
514
|
+
# @return [String, false] The blocklisted word if found, false otherwise
|
|
515
|
+
#
|
|
516
|
+
# @rbs (String encoded_string) -> (String | false)
|
|
517
|
+
def contains_blocklisted_word?(encoded_string)
|
|
518
|
+
return false if !blocklist || blocklist.empty?
|
|
519
|
+
|
|
520
|
+
blocked_word = blocklist.blocks?(encoded_string)
|
|
521
|
+
return blocked_word if blocked_word
|
|
522
|
+
|
|
523
|
+
false
|
|
524
|
+
end
|
|
525
|
+
end
|
|
526
|
+
end
|
|
527
|
+
end
|