multibases 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +7 -0
- data/.travis.yml +7 -0
- data/Gemfile +8 -0
- data/README.md +326 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/multibases.rb +42 -0
- data/lib/multibases/bare.rb +142 -0
- data/lib/multibases/base16.rb +106 -0
- data/lib/multibases/base2.rb +98 -0
- data/lib/multibases/base32.rb +110 -0
- data/lib/multibases/base64.rb +116 -0
- data/lib/multibases/base_x.rb +129 -0
- data/lib/multibases/byte_array.rb +73 -0
- data/lib/multibases/ord_table.rb +109 -0
- data/lib/multibases/registry.rb +53 -0
- data/lib/multibases/version.rb +5 -0
- data/multibases.gemspec +51 -0
- metadata +112 -0
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
class Base64
|
5
|
+
def inspect
|
6
|
+
'[Multibases::Base64 ' \
|
7
|
+
"alphabet=\"#{@table.alphabet}\"" \
|
8
|
+
"#{@table.padder.nil? ? '' : ' pad="' + @table.padder.chr + '"'}" \
|
9
|
+
']'
|
10
|
+
end
|
11
|
+
|
12
|
+
# RFC 4648 implementation
|
13
|
+
def self.encode(plain)
|
14
|
+
plain = plain.map(&:chr).join if plain.is_a?(Array)
|
15
|
+
|
16
|
+
# Base64.strict_encode(plain)
|
17
|
+
EncodedByteArray.new(Array(String(plain)).pack('m0').bytes)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.decode(packed)
|
21
|
+
packed = packed.map(&:chr).join if packed.is_a?(Array)
|
22
|
+
# Base64.strict_decode64("m").first
|
23
|
+
# Don't use m0, as that requires padderding _always_
|
24
|
+
DecodedByteArray.new(packed.unpack1('m').bytes)
|
25
|
+
end
|
26
|
+
|
27
|
+
class Table < OrdTable
|
28
|
+
def self.from(alphabet, **opts)
|
29
|
+
alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
|
30
|
+
alphabet.map!(&:ord)
|
31
|
+
|
32
|
+
new(alphabet, **opts)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(ords, **opts)
|
36
|
+
ords = ords.uniq
|
37
|
+
|
38
|
+
if ords.length < 64 || ords.length > 65
|
39
|
+
raise ArgumentError,
|
40
|
+
'Expected alphabet to contain 64 characters or 65 + 1 ' \
|
41
|
+
"padding character. Actual: #{ords.length} characters"
|
42
|
+
end
|
43
|
+
|
44
|
+
padder = nil
|
45
|
+
*ords, padder = ords if ords.length == 65
|
46
|
+
|
47
|
+
super(ords, padder: padder, **opts)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def initialize(alphabet, strict: false)
|
52
|
+
@table = Table.from(alphabet, strict: strict)
|
53
|
+
end
|
54
|
+
|
55
|
+
def encode(plain)
|
56
|
+
return EncodedByteArray::EMPTY if plain.empty?
|
57
|
+
|
58
|
+
encoded = Multibases::Base64.encode(plain)
|
59
|
+
encoded.chomp!(Default.table_padder) unless @table.padder
|
60
|
+
return encoded if default?
|
61
|
+
|
62
|
+
encoded.transcode(
|
63
|
+
Default.table_ords(force_strict: @table.strict?),
|
64
|
+
table_ords
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
def decode(encoded)
|
69
|
+
return DecodedByteArray::EMPTY if encoded.empty?
|
70
|
+
|
71
|
+
unless encoded.is_a?(Array)
|
72
|
+
encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
|
73
|
+
end
|
74
|
+
|
75
|
+
unless decodable?(encoded)
|
76
|
+
raise ArgumentError, "'#{encoded}' contains unknown characters'"
|
77
|
+
end
|
78
|
+
|
79
|
+
unless default?
|
80
|
+
encoded = ByteArray.new(encoded).transcode(
|
81
|
+
table_ords,
|
82
|
+
Default.table_ords(force_strict: @table.strict?)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
Multibases::Base64.decode(encoded)
|
87
|
+
end
|
88
|
+
|
89
|
+
def default?
|
90
|
+
eql?(Default)
|
91
|
+
end
|
92
|
+
|
93
|
+
def eql?(other)
|
94
|
+
other.is_a?(Base64) && other.instance_variable_get(:@table) == @table
|
95
|
+
end
|
96
|
+
|
97
|
+
alias == eql?
|
98
|
+
|
99
|
+
def decodable?(encoded)
|
100
|
+
(encoded.uniq - table_ords).length.zero?
|
101
|
+
end
|
102
|
+
|
103
|
+
def table_ords(force_strict: nil)
|
104
|
+
@table.tr_ords(force_strict: force_strict)
|
105
|
+
end
|
106
|
+
|
107
|
+
def table_padder
|
108
|
+
@table.padder
|
109
|
+
end
|
110
|
+
|
111
|
+
# rubocop:disable Metrics/LineLength
|
112
|
+
Default = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
|
113
|
+
UrlSafe = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_=')
|
114
|
+
# rubocop:enable Metrics/LineLength
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative './byte_array'
|
4
|
+
require_relative './ord_table'
|
5
|
+
|
6
|
+
module Multibases
|
7
|
+
class BaseX
|
8
|
+
def inspect
|
9
|
+
"[Multibases::Base#{@table.base} " \
|
10
|
+
"alphabet=\"#{@table.alphabet}\"" \
|
11
|
+
"#{@table.strict? ? ' strict' : ''}" \
|
12
|
+
']'
|
13
|
+
end
|
14
|
+
|
15
|
+
class Table < IndexedOrdTable
|
16
|
+
def self.from(alphabet, **opts)
|
17
|
+
raise ArgumentError, 'Alphabet too long' if alphabet.length >= 255
|
18
|
+
|
19
|
+
alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
|
20
|
+
alphabet.map!(&:ord)
|
21
|
+
|
22
|
+
new(alphabet, **opts)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def initialize(alphabet, strict: false)
|
27
|
+
@table = Table.from(alphabet, strict: strict)
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Encode +plain+ to an encoded string
|
32
|
+
#
|
33
|
+
# @param plain [String, Array] plain string or byte array
|
34
|
+
# @return [EncodedByteArray] encoded byte array
|
35
|
+
#
|
36
|
+
def encode(plain)
|
37
|
+
return EncodedByteArray::EMPTY if plain.empty?
|
38
|
+
|
39
|
+
plain = plain.bytes unless plain.is_a?(Array)
|
40
|
+
expected_length = @table.encoded_length(plain)
|
41
|
+
|
42
|
+
# Find leading zeroes
|
43
|
+
zeroes_count = [
|
44
|
+
0,
|
45
|
+
plain.find_index { |b| b.ord != 0 } || plain.length
|
46
|
+
].max
|
47
|
+
plain = plain.drop(zeroes_count)
|
48
|
+
expected_length = @table.encoded_length(plain) unless @table.pad_to_power?
|
49
|
+
|
50
|
+
# Encode number into destination base as byte array
|
51
|
+
output = []
|
52
|
+
plain_big_number = plain.inject { |a, b| (a << 8) + b.ord }
|
53
|
+
|
54
|
+
while plain_big_number >= @table.base
|
55
|
+
mod = plain_big_number % @table.base
|
56
|
+
output.unshift(@table.ord_at(mod))
|
57
|
+
plain_big_number = (plain_big_number - mod) / @table.base
|
58
|
+
end
|
59
|
+
|
60
|
+
output.unshift(@table.ord_at(plain_big_number))
|
61
|
+
|
62
|
+
# Prepend the leading zeroes
|
63
|
+
@table.encoded_zeroes_length(zeroes_count).times do
|
64
|
+
output.unshift(@table.zero)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Padding at the front (to match expected length). Because of the
|
68
|
+
if @table.pad_to_power?
|
69
|
+
(expected_length - output.length).times do
|
70
|
+
output.unshift(@table.zero)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
EncodedByteArray.new(output)
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# Decode +encoded+ to a byte array
|
79
|
+
#
|
80
|
+
# @param encoded [String, Array, ByteArray] encoded string or byte array
|
81
|
+
# @return [DecodedByteArray] decoded byte array
|
82
|
+
#
|
83
|
+
def decode(encoded)
|
84
|
+
return DecodedByteArray::EMPTY if encoded.empty?
|
85
|
+
|
86
|
+
unless encoded.is_a?(Array)
|
87
|
+
encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
|
88
|
+
end
|
89
|
+
|
90
|
+
unless decodable?(encoded)
|
91
|
+
raise ArgumentError, "'#{encoded}' contains unknown characters'"
|
92
|
+
end
|
93
|
+
|
94
|
+
# Find leading zeroes
|
95
|
+
zeroes_count = [
|
96
|
+
0,
|
97
|
+
encoded.find_index { |b| b.ord != @table.zero } || encoded.length
|
98
|
+
].max
|
99
|
+
encoded = encoded.drop(zeroes_count)
|
100
|
+
|
101
|
+
# Decode number from encoding base to base 10
|
102
|
+
encoded_big_number = 0
|
103
|
+
|
104
|
+
encoded.reverse.each_with_index do |char, i|
|
105
|
+
table_i = @table.index(char)
|
106
|
+
encoded_big_number += @table.base**i * table_i
|
107
|
+
end
|
108
|
+
|
109
|
+
# Build the output by reversing the bytes. Because the encoding is "lost"
|
110
|
+
# the result might not be correct just yet. This is up to the caller to
|
111
|
+
# fix. The algorithm **can not know** what the encoding was.
|
112
|
+
output = 1.upto((Math.log2(encoded_big_number) / 8).ceil).collect do
|
113
|
+
encoded_big_number, character_byte = encoded_big_number.divmod 256
|
114
|
+
character_byte
|
115
|
+
end.reverse
|
116
|
+
|
117
|
+
# Prepend the leading zeroes
|
118
|
+
@table.decoded_zeroes_length(zeroes_count).times do
|
119
|
+
output.unshift(0x00)
|
120
|
+
end
|
121
|
+
|
122
|
+
DecodedByteArray.new(output)
|
123
|
+
end
|
124
|
+
|
125
|
+
def decodable?(encoded)
|
126
|
+
(encoded.uniq - @table.tr_ords).length.zero?
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
class ByteArray < DelegateClass(Array)
|
5
|
+
def hash
|
6
|
+
__getobj__.hash
|
7
|
+
end
|
8
|
+
|
9
|
+
def eql?(other)
|
10
|
+
other.to_s.eql?(to_s)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_arr
|
14
|
+
__getobj__.dup
|
15
|
+
end
|
16
|
+
|
17
|
+
def is_a?(klazz)
|
18
|
+
super || __getobj__.is_a?(klazz)
|
19
|
+
end
|
20
|
+
|
21
|
+
def transcode(from, to)
|
22
|
+
from = from.each_with_index.to_h
|
23
|
+
to = Hash[to.each_with_index.to_a.collect(&:reverse)]
|
24
|
+
|
25
|
+
self.class.new(map { |byte| to[from[byte]] })
|
26
|
+
end
|
27
|
+
|
28
|
+
alias to_a to_arr
|
29
|
+
alias kind_of? is_a?
|
30
|
+
end
|
31
|
+
|
32
|
+
class EncodedByteArray < ByteArray
|
33
|
+
def inspect
|
34
|
+
"[Multibases::EncodedByteArray \"#{to_str}\"]"
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_str
|
38
|
+
map(&:chr).join.encode(Encoding::ASCII_8BIT)
|
39
|
+
end
|
40
|
+
|
41
|
+
def chomp!(ord)
|
42
|
+
return self unless ord
|
43
|
+
|
44
|
+
__getobj__.reverse!
|
45
|
+
index = __getobj__.find_index { |el| el != ord }
|
46
|
+
__getobj__.slice!(0, index) unless index.nil?
|
47
|
+
__getobj__.reverse!
|
48
|
+
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
alias to_s to_str
|
53
|
+
end
|
54
|
+
|
55
|
+
class DecodedByteArray < ByteArray
|
56
|
+
def inspect
|
57
|
+
"[Multibases::DecodedByteArray \"#{to_str}\"]"
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_str(encoding = Encoding::UTF_8)
|
61
|
+
map(&:chr).join.force_encoding(encoding)
|
62
|
+
end
|
63
|
+
|
64
|
+
def force_encoding(*args)
|
65
|
+
to_str(*args)
|
66
|
+
end
|
67
|
+
|
68
|
+
alias to_s to_str
|
69
|
+
end
|
70
|
+
|
71
|
+
EncodedByteArray.const_set(:EMPTY, EncodedByteArray.new([]))
|
72
|
+
DecodedByteArray.const_set(:EMPTY, DecodedByteArray.new([]))
|
73
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
class OrdTable
|
5
|
+
def initialize(ords, strict:, padder: nil)
|
6
|
+
ords = ords.uniq
|
7
|
+
|
8
|
+
@ords = ords
|
9
|
+
@base = ords.length
|
10
|
+
@padder = padder
|
11
|
+
|
12
|
+
chars = ords.map(&:chr)
|
13
|
+
chars_downcased = chars.map(&:downcase).uniq
|
14
|
+
chars_upcased = chars.map(&:upcase).uniq
|
15
|
+
chars_cased = chars_upcased - chars_downcased
|
16
|
+
|
17
|
+
# Strict means that the algorithm may _not_ treat incorrectly cased
|
18
|
+
# input the same as correctly cased input. In other words, the table is
|
19
|
+
# strict if a character exists that is both upcased and downcased and
|
20
|
+
# therefore has a canonical casing.
|
21
|
+
@strict = strict ||
|
22
|
+
chars_cased.empty? ||
|
23
|
+
chars.length != chars_downcased.length
|
24
|
+
|
25
|
+
@loose_ords = (chars + chars_downcased + chars_upcased).uniq.map(&:ord)
|
26
|
+
end
|
27
|
+
|
28
|
+
def eql?(other)
|
29
|
+
other.is_a?(OrdTable) &&
|
30
|
+
other.alphabet == alphabet &&
|
31
|
+
other.strict? == strict?
|
32
|
+
end
|
33
|
+
|
34
|
+
alias == eql?
|
35
|
+
|
36
|
+
def hash
|
37
|
+
@ords.hash
|
38
|
+
end
|
39
|
+
|
40
|
+
def strict?
|
41
|
+
@strict
|
42
|
+
end
|
43
|
+
|
44
|
+
def tr_ords(force_strict: false)
|
45
|
+
return @ords + [@padder].compact if strict? || force_strict
|
46
|
+
|
47
|
+
@loose_ords + [@padder].compact
|
48
|
+
end
|
49
|
+
|
50
|
+
def alphabet
|
51
|
+
@ords.map(&:chr).join
|
52
|
+
end
|
53
|
+
|
54
|
+
attr_reader :base, :factor, :padder
|
55
|
+
end
|
56
|
+
|
57
|
+
class IndexedOrdTable < OrdTable
|
58
|
+
def initialize(ords, **opts)
|
59
|
+
super(ords, **opts)
|
60
|
+
|
61
|
+
@forward = ords.each_with_index.to_h
|
62
|
+
@backward = Hash[@forward.to_a.collect(&:reverse)]
|
63
|
+
@factor = Math.log(256) / Math.log(base)
|
64
|
+
end
|
65
|
+
|
66
|
+
def zero
|
67
|
+
@backward[0]
|
68
|
+
end
|
69
|
+
|
70
|
+
def index(byte)
|
71
|
+
@forward[byte] || !strict? && (
|
72
|
+
@forward[byte.chr.upcase.ord] ||
|
73
|
+
@forward[byte.chr.downcase.ord]
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
def ord_at(index)
|
78
|
+
@backward[index]
|
79
|
+
end
|
80
|
+
|
81
|
+
def encoded_length(plain_bytes)
|
82
|
+
(plain_bytes.length.to_f * factor).ceil
|
83
|
+
end
|
84
|
+
|
85
|
+
def decoded_length(encoded_bytes)
|
86
|
+
(encoded_bytes.length / factor).round
|
87
|
+
end
|
88
|
+
|
89
|
+
def encoded_zeroes_length(count)
|
90
|
+
# For power of 2 bases, add "canonical-width"
|
91
|
+
return (factor * count).floor if pad_to_power?
|
92
|
+
|
93
|
+
# For other bases, add a equivalent count to front
|
94
|
+
count
|
95
|
+
end
|
96
|
+
|
97
|
+
def decoded_zeroes_length(count)
|
98
|
+
# For power of 2 bases, add "canonical-width"
|
99
|
+
return (count / factor).round if pad_to_power?
|
100
|
+
|
101
|
+
# For other bases, add a equivalent count to front
|
102
|
+
count
|
103
|
+
end
|
104
|
+
|
105
|
+
def pad_to_power?
|
106
|
+
(Math.log2(base) % 1).zero?
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
# rubocop:disable Style/MutableConstant
|
5
|
+
IMPLEMENTATIONS = {}
|
6
|
+
# rubocop:enable Style/MutableConstant
|
7
|
+
|
8
|
+
Registration = Struct.new(:code, :encoding, :engine) do
|
9
|
+
def hash
|
10
|
+
encoding.hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
return [encoding, code].include?(other) if other.is_a?(String)
|
15
|
+
|
16
|
+
eql?(other)
|
17
|
+
end
|
18
|
+
|
19
|
+
def eql?(other)
|
20
|
+
other.is_a?(Registration) && other.encoding == encoding
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module_function
|
25
|
+
|
26
|
+
def implement(encoding, code, implementation = nil, alphabet = nil)
|
27
|
+
Multibases::IMPLEMENTATIONS[encoding] = Registration.new(
|
28
|
+
code,
|
29
|
+
encoding,
|
30
|
+
implementation&.new(alphabet)
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def fetch_by!(code: nil, encoding: nil)
|
35
|
+
return Multibases::IMPLEMENTATIONS.fetch(encoding) if encoding
|
36
|
+
|
37
|
+
Multibases.find_by(code: code).tap do |found|
|
38
|
+
raise KeyError, "No implementation has code #{code}" unless found
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def find_by(code: nil, encoding: nil)
|
43
|
+
Multibases::IMPLEMENTATIONS.values.find do |v|
|
44
|
+
v == code || v == encoding
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def multibase_version(multibase_semver = nil)
|
49
|
+
return @multibase_version if multibase_semver.nil?
|
50
|
+
|
51
|
+
@multibase_version = multibase_semver
|
52
|
+
end
|
53
|
+
end
|