multibases 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +7 -0
- data/.travis.yml +7 -0
- data/Gemfile +8 -0
- data/README.md +326 -0
- data/Rakefile +12 -0
- data/bin/console +15 -0
- data/bin/setup +8 -0
- data/lib/multibases.rb +42 -0
- data/lib/multibases/bare.rb +142 -0
- data/lib/multibases/base16.rb +106 -0
- data/lib/multibases/base2.rb +98 -0
- data/lib/multibases/base32.rb +110 -0
- data/lib/multibases/base64.rb +116 -0
- data/lib/multibases/base_x.rb +129 -0
- data/lib/multibases/byte_array.rb +73 -0
- data/lib/multibases/ord_table.rb +109 -0
- data/lib/multibases/registry.rb +53 -0
- data/lib/multibases/version.rb +5 -0
- data/multibases.gemspec +51 -0
- metadata +112 -0
@@ -0,0 +1,116 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
class Base64
|
5
|
+
def inspect
|
6
|
+
'[Multibases::Base64 ' \
|
7
|
+
"alphabet=\"#{@table.alphabet}\"" \
|
8
|
+
"#{@table.padder.nil? ? '' : ' pad="' + @table.padder.chr + '"'}" \
|
9
|
+
']'
|
10
|
+
end
|
11
|
+
|
12
|
+
# RFC 4648 implementation
|
13
|
+
def self.encode(plain)
|
14
|
+
plain = plain.map(&:chr).join if plain.is_a?(Array)
|
15
|
+
|
16
|
+
# Base64.strict_encode(plain)
|
17
|
+
EncodedByteArray.new(Array(String(plain)).pack('m0').bytes)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.decode(packed)
|
21
|
+
packed = packed.map(&:chr).join if packed.is_a?(Array)
|
22
|
+
# Base64.strict_decode64("m").first
|
23
|
+
# Don't use m0, as that requires padderding _always_
|
24
|
+
DecodedByteArray.new(packed.unpack1('m').bytes)
|
25
|
+
end
|
26
|
+
|
27
|
+
class Table < OrdTable
|
28
|
+
def self.from(alphabet, **opts)
|
29
|
+
alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
|
30
|
+
alphabet.map!(&:ord)
|
31
|
+
|
32
|
+
new(alphabet, **opts)
|
33
|
+
end
|
34
|
+
|
35
|
+
def initialize(ords, **opts)
|
36
|
+
ords = ords.uniq
|
37
|
+
|
38
|
+
if ords.length < 64 || ords.length > 65
|
39
|
+
raise ArgumentError,
|
40
|
+
'Expected alphabet to contain 64 characters or 65 + 1 ' \
|
41
|
+
"padding character. Actual: #{ords.length} characters"
|
42
|
+
end
|
43
|
+
|
44
|
+
padder = nil
|
45
|
+
*ords, padder = ords if ords.length == 65
|
46
|
+
|
47
|
+
super(ords, padder: padder, **opts)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def initialize(alphabet, strict: false)
|
52
|
+
@table = Table.from(alphabet, strict: strict)
|
53
|
+
end
|
54
|
+
|
55
|
+
def encode(plain)
|
56
|
+
return EncodedByteArray::EMPTY if plain.empty?
|
57
|
+
|
58
|
+
encoded = Multibases::Base64.encode(plain)
|
59
|
+
encoded.chomp!(Default.table_padder) unless @table.padder
|
60
|
+
return encoded if default?
|
61
|
+
|
62
|
+
encoded.transcode(
|
63
|
+
Default.table_ords(force_strict: @table.strict?),
|
64
|
+
table_ords
|
65
|
+
)
|
66
|
+
end
|
67
|
+
|
68
|
+
def decode(encoded)
|
69
|
+
return DecodedByteArray::EMPTY if encoded.empty?
|
70
|
+
|
71
|
+
unless encoded.is_a?(Array)
|
72
|
+
encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
|
73
|
+
end
|
74
|
+
|
75
|
+
unless decodable?(encoded)
|
76
|
+
raise ArgumentError, "'#{encoded}' contains unknown characters'"
|
77
|
+
end
|
78
|
+
|
79
|
+
unless default?
|
80
|
+
encoded = ByteArray.new(encoded).transcode(
|
81
|
+
table_ords,
|
82
|
+
Default.table_ords(force_strict: @table.strict?)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
Multibases::Base64.decode(encoded)
|
87
|
+
end
|
88
|
+
|
89
|
+
def default?
|
90
|
+
eql?(Default)
|
91
|
+
end
|
92
|
+
|
93
|
+
def eql?(other)
|
94
|
+
other.is_a?(Base64) && other.instance_variable_get(:@table) == @table
|
95
|
+
end
|
96
|
+
|
97
|
+
alias == eql?
|
98
|
+
|
99
|
+
def decodable?(encoded)
|
100
|
+
(encoded.uniq - table_ords).length.zero?
|
101
|
+
end
|
102
|
+
|
103
|
+
def table_ords(force_strict: nil)
|
104
|
+
@table.tr_ords(force_strict: force_strict)
|
105
|
+
end
|
106
|
+
|
107
|
+
def table_padder
|
108
|
+
@table.padder
|
109
|
+
end
|
110
|
+
|
111
|
+
# rubocop:disable Metrics/LineLength
|
112
|
+
Default = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
|
113
|
+
UrlSafe = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_=')
|
114
|
+
# rubocop:enable Metrics/LineLength
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative './byte_array'
|
4
|
+
require_relative './ord_table'
|
5
|
+
|
6
|
+
module Multibases
|
7
|
+
class BaseX
|
8
|
+
def inspect
|
9
|
+
"[Multibases::Base#{@table.base} " \
|
10
|
+
"alphabet=\"#{@table.alphabet}\"" \
|
11
|
+
"#{@table.strict? ? ' strict' : ''}" \
|
12
|
+
']'
|
13
|
+
end
|
14
|
+
|
15
|
+
class Table < IndexedOrdTable
|
16
|
+
def self.from(alphabet, **opts)
|
17
|
+
raise ArgumentError, 'Alphabet too long' if alphabet.length >= 255
|
18
|
+
|
19
|
+
alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
|
20
|
+
alphabet.map!(&:ord)
|
21
|
+
|
22
|
+
new(alphabet, **opts)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def initialize(alphabet, strict: false)
|
27
|
+
@table = Table.from(alphabet, strict: strict)
|
28
|
+
end
|
29
|
+
|
30
|
+
##
|
31
|
+
# Encode +plain+ to an encoded string
|
32
|
+
#
|
33
|
+
# @param plain [String, Array] plain string or byte array
|
34
|
+
# @return [EncodedByteArray] encoded byte array
|
35
|
+
#
|
36
|
+
def encode(plain)
|
37
|
+
return EncodedByteArray::EMPTY if plain.empty?
|
38
|
+
|
39
|
+
plain = plain.bytes unless plain.is_a?(Array)
|
40
|
+
expected_length = @table.encoded_length(plain)
|
41
|
+
|
42
|
+
# Find leading zeroes
|
43
|
+
zeroes_count = [
|
44
|
+
0,
|
45
|
+
plain.find_index { |b| b.ord != 0 } || plain.length
|
46
|
+
].max
|
47
|
+
plain = plain.drop(zeroes_count)
|
48
|
+
expected_length = @table.encoded_length(plain) unless @table.pad_to_power?
|
49
|
+
|
50
|
+
# Encode number into destination base as byte array
|
51
|
+
output = []
|
52
|
+
plain_big_number = plain.inject { |a, b| (a << 8) + b.ord }
|
53
|
+
|
54
|
+
while plain_big_number >= @table.base
|
55
|
+
mod = plain_big_number % @table.base
|
56
|
+
output.unshift(@table.ord_at(mod))
|
57
|
+
plain_big_number = (plain_big_number - mod) / @table.base
|
58
|
+
end
|
59
|
+
|
60
|
+
output.unshift(@table.ord_at(plain_big_number))
|
61
|
+
|
62
|
+
# Prepend the leading zeroes
|
63
|
+
@table.encoded_zeroes_length(zeroes_count).times do
|
64
|
+
output.unshift(@table.zero)
|
65
|
+
end
|
66
|
+
|
67
|
+
# Padding at the front (to match expected length). Because of the
|
68
|
+
if @table.pad_to_power?
|
69
|
+
(expected_length - output.length).times do
|
70
|
+
output.unshift(@table.zero)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
EncodedByteArray.new(output)
|
75
|
+
end
|
76
|
+
|
77
|
+
##
|
78
|
+
# Decode +encoded+ to a byte array
|
79
|
+
#
|
80
|
+
# @param encoded [String, Array, ByteArray] encoded string or byte array
|
81
|
+
# @return [DecodedByteArray] decoded byte array
|
82
|
+
#
|
83
|
+
def decode(encoded)
|
84
|
+
return DecodedByteArray::EMPTY if encoded.empty?
|
85
|
+
|
86
|
+
unless encoded.is_a?(Array)
|
87
|
+
encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
|
88
|
+
end
|
89
|
+
|
90
|
+
unless decodable?(encoded)
|
91
|
+
raise ArgumentError, "'#{encoded}' contains unknown characters'"
|
92
|
+
end
|
93
|
+
|
94
|
+
# Find leading zeroes
|
95
|
+
zeroes_count = [
|
96
|
+
0,
|
97
|
+
encoded.find_index { |b| b.ord != @table.zero } || encoded.length
|
98
|
+
].max
|
99
|
+
encoded = encoded.drop(zeroes_count)
|
100
|
+
|
101
|
+
# Decode number from encoding base to base 10
|
102
|
+
encoded_big_number = 0
|
103
|
+
|
104
|
+
encoded.reverse.each_with_index do |char, i|
|
105
|
+
table_i = @table.index(char)
|
106
|
+
encoded_big_number += @table.base**i * table_i
|
107
|
+
end
|
108
|
+
|
109
|
+
# Build the output by reversing the bytes. Because the encoding is "lost"
|
110
|
+
# the result might not be correct just yet. This is up to the caller to
|
111
|
+
# fix. The algorithm **can not know** what the encoding was.
|
112
|
+
output = 1.upto((Math.log2(encoded_big_number) / 8).ceil).collect do
|
113
|
+
encoded_big_number, character_byte = encoded_big_number.divmod 256
|
114
|
+
character_byte
|
115
|
+
end.reverse
|
116
|
+
|
117
|
+
# Prepend the leading zeroes
|
118
|
+
@table.decoded_zeroes_length(zeroes_count).times do
|
119
|
+
output.unshift(0x00)
|
120
|
+
end
|
121
|
+
|
122
|
+
DecodedByteArray.new(output)
|
123
|
+
end
|
124
|
+
|
125
|
+
def decodable?(encoded)
|
126
|
+
(encoded.uniq - @table.tr_ords).length.zero?
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
class ByteArray < DelegateClass(Array)
|
5
|
+
def hash
|
6
|
+
__getobj__.hash
|
7
|
+
end
|
8
|
+
|
9
|
+
def eql?(other)
|
10
|
+
other.to_s.eql?(to_s)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_arr
|
14
|
+
__getobj__.dup
|
15
|
+
end
|
16
|
+
|
17
|
+
def is_a?(klazz)
|
18
|
+
super || __getobj__.is_a?(klazz)
|
19
|
+
end
|
20
|
+
|
21
|
+
def transcode(from, to)
|
22
|
+
from = from.each_with_index.to_h
|
23
|
+
to = Hash[to.each_with_index.to_a.collect(&:reverse)]
|
24
|
+
|
25
|
+
self.class.new(map { |byte| to[from[byte]] })
|
26
|
+
end
|
27
|
+
|
28
|
+
alias to_a to_arr
|
29
|
+
alias kind_of? is_a?
|
30
|
+
end
|
31
|
+
|
32
|
+
class EncodedByteArray < ByteArray
|
33
|
+
def inspect
|
34
|
+
"[Multibases::EncodedByteArray \"#{to_str}\"]"
|
35
|
+
end
|
36
|
+
|
37
|
+
def to_str
|
38
|
+
map(&:chr).join.encode(Encoding::ASCII_8BIT)
|
39
|
+
end
|
40
|
+
|
41
|
+
def chomp!(ord)
|
42
|
+
return self unless ord
|
43
|
+
|
44
|
+
__getobj__.reverse!
|
45
|
+
index = __getobj__.find_index { |el| el != ord }
|
46
|
+
__getobj__.slice!(0, index) unless index.nil?
|
47
|
+
__getobj__.reverse!
|
48
|
+
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
alias to_s to_str
|
53
|
+
end
|
54
|
+
|
55
|
+
class DecodedByteArray < ByteArray
|
56
|
+
def inspect
|
57
|
+
"[Multibases::DecodedByteArray \"#{to_str}\"]"
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_str(encoding = Encoding::UTF_8)
|
61
|
+
map(&:chr).join.force_encoding(encoding)
|
62
|
+
end
|
63
|
+
|
64
|
+
def force_encoding(*args)
|
65
|
+
to_str(*args)
|
66
|
+
end
|
67
|
+
|
68
|
+
alias to_s to_str
|
69
|
+
end
|
70
|
+
|
71
|
+
EncodedByteArray.const_set(:EMPTY, EncodedByteArray.new([]))
|
72
|
+
DecodedByteArray.const_set(:EMPTY, DecodedByteArray.new([]))
|
73
|
+
end
|
@@ -0,0 +1,109 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
class OrdTable
|
5
|
+
def initialize(ords, strict:, padder: nil)
|
6
|
+
ords = ords.uniq
|
7
|
+
|
8
|
+
@ords = ords
|
9
|
+
@base = ords.length
|
10
|
+
@padder = padder
|
11
|
+
|
12
|
+
chars = ords.map(&:chr)
|
13
|
+
chars_downcased = chars.map(&:downcase).uniq
|
14
|
+
chars_upcased = chars.map(&:upcase).uniq
|
15
|
+
chars_cased = chars_upcased - chars_downcased
|
16
|
+
|
17
|
+
# Strict means that the algorithm may _not_ treat incorrectly cased
|
18
|
+
# input the same as correctly cased input. In other words, the table is
|
19
|
+
# strict if a character exists that is both upcased and downcased and
|
20
|
+
# therefore has a canonical casing.
|
21
|
+
@strict = strict ||
|
22
|
+
chars_cased.empty? ||
|
23
|
+
chars.length != chars_downcased.length
|
24
|
+
|
25
|
+
@loose_ords = (chars + chars_downcased + chars_upcased).uniq.map(&:ord)
|
26
|
+
end
|
27
|
+
|
28
|
+
def eql?(other)
|
29
|
+
other.is_a?(OrdTable) &&
|
30
|
+
other.alphabet == alphabet &&
|
31
|
+
other.strict? == strict?
|
32
|
+
end
|
33
|
+
|
34
|
+
alias == eql?
|
35
|
+
|
36
|
+
def hash
|
37
|
+
@ords.hash
|
38
|
+
end
|
39
|
+
|
40
|
+
def strict?
|
41
|
+
@strict
|
42
|
+
end
|
43
|
+
|
44
|
+
def tr_ords(force_strict: false)
|
45
|
+
return @ords + [@padder].compact if strict? || force_strict
|
46
|
+
|
47
|
+
@loose_ords + [@padder].compact
|
48
|
+
end
|
49
|
+
|
50
|
+
def alphabet
|
51
|
+
@ords.map(&:chr).join
|
52
|
+
end
|
53
|
+
|
54
|
+
attr_reader :base, :factor, :padder
|
55
|
+
end
|
56
|
+
|
57
|
+
class IndexedOrdTable < OrdTable
|
58
|
+
def initialize(ords, **opts)
|
59
|
+
super(ords, **opts)
|
60
|
+
|
61
|
+
@forward = ords.each_with_index.to_h
|
62
|
+
@backward = Hash[@forward.to_a.collect(&:reverse)]
|
63
|
+
@factor = Math.log(256) / Math.log(base)
|
64
|
+
end
|
65
|
+
|
66
|
+
def zero
|
67
|
+
@backward[0]
|
68
|
+
end
|
69
|
+
|
70
|
+
def index(byte)
|
71
|
+
@forward[byte] || !strict? && (
|
72
|
+
@forward[byte.chr.upcase.ord] ||
|
73
|
+
@forward[byte.chr.downcase.ord]
|
74
|
+
)
|
75
|
+
end
|
76
|
+
|
77
|
+
def ord_at(index)
|
78
|
+
@backward[index]
|
79
|
+
end
|
80
|
+
|
81
|
+
def encoded_length(plain_bytes)
|
82
|
+
(plain_bytes.length.to_f * factor).ceil
|
83
|
+
end
|
84
|
+
|
85
|
+
def decoded_length(encoded_bytes)
|
86
|
+
(encoded_bytes.length / factor).round
|
87
|
+
end
|
88
|
+
|
89
|
+
def encoded_zeroes_length(count)
|
90
|
+
# For power of 2 bases, add "canonical-width"
|
91
|
+
return (factor * count).floor if pad_to_power?
|
92
|
+
|
93
|
+
# For other bases, add a equivalent count to front
|
94
|
+
count
|
95
|
+
end
|
96
|
+
|
97
|
+
def decoded_zeroes_length(count)
|
98
|
+
# For power of 2 bases, add "canonical-width"
|
99
|
+
return (count / factor).round if pad_to_power?
|
100
|
+
|
101
|
+
# For other bases, add a equivalent count to front
|
102
|
+
count
|
103
|
+
end
|
104
|
+
|
105
|
+
def pad_to_power?
|
106
|
+
(Math.log2(base) % 1).zero?
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Multibases
|
4
|
+
# rubocop:disable Style/MutableConstant
|
5
|
+
IMPLEMENTATIONS = {}
|
6
|
+
# rubocop:enable Style/MutableConstant
|
7
|
+
|
8
|
+
Registration = Struct.new(:code, :encoding, :engine) do
|
9
|
+
def hash
|
10
|
+
encoding.hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def ==(other)
|
14
|
+
return [encoding, code].include?(other) if other.is_a?(String)
|
15
|
+
|
16
|
+
eql?(other)
|
17
|
+
end
|
18
|
+
|
19
|
+
def eql?(other)
|
20
|
+
other.is_a?(Registration) && other.encoding == encoding
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module_function
|
25
|
+
|
26
|
+
def implement(encoding, code, implementation = nil, alphabet = nil)
|
27
|
+
Multibases::IMPLEMENTATIONS[encoding] = Registration.new(
|
28
|
+
code,
|
29
|
+
encoding,
|
30
|
+
implementation&.new(alphabet)
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
def fetch_by!(code: nil, encoding: nil)
|
35
|
+
return Multibases::IMPLEMENTATIONS.fetch(encoding) if encoding
|
36
|
+
|
37
|
+
Multibases.find_by(code: code).tap do |found|
|
38
|
+
raise KeyError, "No implementation has code #{code}" unless found
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def find_by(code: nil, encoding: nil)
|
43
|
+
Multibases::IMPLEMENTATIONS.values.find do |v|
|
44
|
+
v == code || v == encoding
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def multibase_version(multibase_semver = nil)
|
49
|
+
return @multibase_version if multibase_semver.nil?
|
50
|
+
|
51
|
+
@multibase_version = multibase_semver
|
52
|
+
end
|
53
|
+
end
|