multibases 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ class Base64
5
+ def inspect
6
+ '[Multibases::Base64 ' \
7
+ "alphabet=\"#{@table.alphabet}\"" \
8
+ "#{@table.padder.nil? ? '' : ' pad="' + @table.padder.chr + '"'}" \
9
+ ']'
10
+ end
11
+
12
+ # RFC 4648 implementation
13
+ def self.encode(plain)
14
+ plain = plain.map(&:chr).join if plain.is_a?(Array)
15
+
16
+ # Base64.strict_encode(plain)
17
+ EncodedByteArray.new(Array(String(plain)).pack('m0').bytes)
18
+ end
19
+
20
+ def self.decode(packed)
21
+ packed = packed.map(&:chr).join if packed.is_a?(Array)
22
+ # Base64.strict_decode64("m").first
23
+ # Don't use m0, as that requires padderding _always_
24
+ DecodedByteArray.new(packed.unpack1('m').bytes)
25
+ end
26
+
27
+ class Table < OrdTable
28
+ def self.from(alphabet, **opts)
29
+ alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
30
+ alphabet.map!(&:ord)
31
+
32
+ new(alphabet, **opts)
33
+ end
34
+
35
+ def initialize(ords, **opts)
36
+ ords = ords.uniq
37
+
38
+ if ords.length < 64 || ords.length > 65
39
+ raise ArgumentError,
40
+ 'Expected alphabet to contain 64 characters or 65 + 1 ' \
41
+ "padding character. Actual: #{ords.length} characters"
42
+ end
43
+
44
+ padder = nil
45
+ *ords, padder = ords if ords.length == 65
46
+
47
+ super(ords, padder: padder, **opts)
48
+ end
49
+ end
50
+
51
+ def initialize(alphabet, strict: false)
52
+ @table = Table.from(alphabet, strict: strict)
53
+ end
54
+
55
+ def encode(plain)
56
+ return EncodedByteArray::EMPTY if plain.empty?
57
+
58
+ encoded = Multibases::Base64.encode(plain)
59
+ encoded.chomp!(Default.table_padder) unless @table.padder
60
+ return encoded if default?
61
+
62
+ encoded.transcode(
63
+ Default.table_ords(force_strict: @table.strict?),
64
+ table_ords
65
+ )
66
+ end
67
+
68
+ def decode(encoded)
69
+ return DecodedByteArray::EMPTY if encoded.empty?
70
+
71
+ unless encoded.is_a?(Array)
72
+ encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
73
+ end
74
+
75
+ unless decodable?(encoded)
76
+ raise ArgumentError, "'#{encoded}' contains unknown characters'"
77
+ end
78
+
79
+ unless default?
80
+ encoded = ByteArray.new(encoded).transcode(
81
+ table_ords,
82
+ Default.table_ords(force_strict: @table.strict?)
83
+ )
84
+ end
85
+
86
+ Multibases::Base64.decode(encoded)
87
+ end
88
+
89
+ def default?
90
+ eql?(Default)
91
+ end
92
+
93
+ def eql?(other)
94
+ other.is_a?(Base64) && other.instance_variable_get(:@table) == @table
95
+ end
96
+
97
+ alias == eql?
98
+
99
+ def decodable?(encoded)
100
+ (encoded.uniq - table_ords).length.zero?
101
+ end
102
+
103
+ def table_ords(force_strict: nil)
104
+ @table.tr_ords(force_strict: force_strict)
105
+ end
106
+
107
+ def table_padder
108
+ @table.padder
109
+ end
110
+
111
+ # rubocop:disable Metrics/LineLength
112
+ Default = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
113
+ UrlSafe = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_=')
114
+ # rubocop:enable Metrics/LineLength
115
+ end
116
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative './byte_array'
4
+ require_relative './ord_table'
5
+
6
+ module Multibases
7
+ class BaseX
8
+ def inspect
9
+ "[Multibases::Base#{@table.base} " \
10
+ "alphabet=\"#{@table.alphabet}\"" \
11
+ "#{@table.strict? ? ' strict' : ''}" \
12
+ ']'
13
+ end
14
+
15
+ class Table < IndexedOrdTable
16
+ def self.from(alphabet, **opts)
17
+ raise ArgumentError, 'Alphabet too long' if alphabet.length >= 255
18
+
19
+ alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
20
+ alphabet.map!(&:ord)
21
+
22
+ new(alphabet, **opts)
23
+ end
24
+ end
25
+
26
+ def initialize(alphabet, strict: false)
27
+ @table = Table.from(alphabet, strict: strict)
28
+ end
29
+
30
+ ##
31
+ # Encode +plain+ to an encoded string
32
+ #
33
+ # @param plain [String, Array] plain string or byte array
34
+ # @return [EncodedByteArray] encoded byte array
35
+ #
36
+ def encode(plain)
37
+ return EncodedByteArray::EMPTY if plain.empty?
38
+
39
+ plain = plain.bytes unless plain.is_a?(Array)
40
+ expected_length = @table.encoded_length(plain)
41
+
42
+ # Find leading zeroes
43
+ zeroes_count = [
44
+ 0,
45
+ plain.find_index { |b| b.ord != 0 } || plain.length
46
+ ].max
47
+ plain = plain.drop(zeroes_count)
48
+ expected_length = @table.encoded_length(plain) unless @table.pad_to_power?
49
+
50
+ # Encode number into destination base as byte array
51
+ output = []
52
+ plain_big_number = plain.inject { |a, b| (a << 8) + b.ord }
53
+
54
+ while plain_big_number >= @table.base
55
+ mod = plain_big_number % @table.base
56
+ output.unshift(@table.ord_at(mod))
57
+ plain_big_number = (plain_big_number - mod) / @table.base
58
+ end
59
+
60
+ output.unshift(@table.ord_at(plain_big_number))
61
+
62
+ # Prepend the leading zeroes
63
+ @table.encoded_zeroes_length(zeroes_count).times do
64
+ output.unshift(@table.zero)
65
+ end
66
+
67
+ # Padding at the front (to match expected length). Because of the
68
+ if @table.pad_to_power?
69
+ (expected_length - output.length).times do
70
+ output.unshift(@table.zero)
71
+ end
72
+ end
73
+
74
+ EncodedByteArray.new(output)
75
+ end
76
+
77
+ ##
78
+ # Decode +encoded+ to a byte array
79
+ #
80
+ # @param encoded [String, Array, ByteArray] encoded string or byte array
81
+ # @return [DecodedByteArray] decoded byte array
82
+ #
83
+ def decode(encoded)
84
+ return DecodedByteArray::EMPTY if encoded.empty?
85
+
86
+ unless encoded.is_a?(Array)
87
+ encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
88
+ end
89
+
90
+ unless decodable?(encoded)
91
+ raise ArgumentError, "'#{encoded}' contains unknown characters'"
92
+ end
93
+
94
+ # Find leading zeroes
95
+ zeroes_count = [
96
+ 0,
97
+ encoded.find_index { |b| b.ord != @table.zero } || encoded.length
98
+ ].max
99
+ encoded = encoded.drop(zeroes_count)
100
+
101
+ # Decode number from encoding base to base 10
102
+ encoded_big_number = 0
103
+
104
+ encoded.reverse.each_with_index do |char, i|
105
+ table_i = @table.index(char)
106
+ encoded_big_number += @table.base**i * table_i
107
+ end
108
+
109
+ # Build the output by reversing the bytes. Because the encoding is "lost"
110
+ # the result might not be correct just yet. This is up to the caller to
111
+ # fix. The algorithm **can not know** what the encoding was.
112
+ output = 1.upto((Math.log2(encoded_big_number) / 8).ceil).collect do
113
+ encoded_big_number, character_byte = encoded_big_number.divmod 256
114
+ character_byte
115
+ end.reverse
116
+
117
+ # Prepend the leading zeroes
118
+ @table.decoded_zeroes_length(zeroes_count).times do
119
+ output.unshift(0x00)
120
+ end
121
+
122
+ DecodedByteArray.new(output)
123
+ end
124
+
125
+ def decodable?(encoded)
126
+ (encoded.uniq - @table.tr_ords).length.zero?
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ class ByteArray < DelegateClass(Array)
5
+ def hash
6
+ __getobj__.hash
7
+ end
8
+
9
+ def eql?(other)
10
+ other.to_s.eql?(to_s)
11
+ end
12
+
13
+ def to_arr
14
+ __getobj__.dup
15
+ end
16
+
17
+ def is_a?(klazz)
18
+ super || __getobj__.is_a?(klazz)
19
+ end
20
+
21
+ def transcode(from, to)
22
+ from = from.each_with_index.to_h
23
+ to = Hash[to.each_with_index.to_a.collect(&:reverse)]
24
+
25
+ self.class.new(map { |byte| to[from[byte]] })
26
+ end
27
+
28
+ alias to_a to_arr
29
+ alias kind_of? is_a?
30
+ end
31
+
32
+ class EncodedByteArray < ByteArray
33
+ def inspect
34
+ "[Multibases::EncodedByteArray \"#{to_str}\"]"
35
+ end
36
+
37
+ def to_str
38
+ map(&:chr).join.encode(Encoding::ASCII_8BIT)
39
+ end
40
+
41
+ def chomp!(ord)
42
+ return self unless ord
43
+
44
+ __getobj__.reverse!
45
+ index = __getobj__.find_index { |el| el != ord }
46
+ __getobj__.slice!(0, index) unless index.nil?
47
+ __getobj__.reverse!
48
+
49
+ self
50
+ end
51
+
52
+ alias to_s to_str
53
+ end
54
+
55
+ class DecodedByteArray < ByteArray
56
+ def inspect
57
+ "[Multibases::DecodedByteArray \"#{to_str}\"]"
58
+ end
59
+
60
+ def to_str(encoding = Encoding::UTF_8)
61
+ map(&:chr).join.force_encoding(encoding)
62
+ end
63
+
64
+ def force_encoding(*args)
65
+ to_str(*args)
66
+ end
67
+
68
+ alias to_s to_str
69
+ end
70
+
71
+ EncodedByteArray.const_set(:EMPTY, EncodedByteArray.new([]))
72
+ DecodedByteArray.const_set(:EMPTY, DecodedByteArray.new([]))
73
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ class OrdTable
5
+ def initialize(ords, strict:, padder: nil)
6
+ ords = ords.uniq
7
+
8
+ @ords = ords
9
+ @base = ords.length
10
+ @padder = padder
11
+
12
+ chars = ords.map(&:chr)
13
+ chars_downcased = chars.map(&:downcase).uniq
14
+ chars_upcased = chars.map(&:upcase).uniq
15
+ chars_cased = chars_upcased - chars_downcased
16
+
17
+ # Strict means that the algorithm may _not_ treat incorrectly cased
18
+ # input the same as correctly cased input. In other words, the table is
19
+ # strict if a character exists that is both upcased and downcased and
20
+ # therefore has a canonical casing.
21
+ @strict = strict ||
22
+ chars_cased.empty? ||
23
+ chars.length != chars_downcased.length
24
+
25
+ @loose_ords = (chars + chars_downcased + chars_upcased).uniq.map(&:ord)
26
+ end
27
+
28
+ def eql?(other)
29
+ other.is_a?(OrdTable) &&
30
+ other.alphabet == alphabet &&
31
+ other.strict? == strict?
32
+ end
33
+
34
+ alias == eql?
35
+
36
+ def hash
37
+ @ords.hash
38
+ end
39
+
40
+ def strict?
41
+ @strict
42
+ end
43
+
44
+ def tr_ords(force_strict: false)
45
+ return @ords + [@padder].compact if strict? || force_strict
46
+
47
+ @loose_ords + [@padder].compact
48
+ end
49
+
50
+ def alphabet
51
+ @ords.map(&:chr).join
52
+ end
53
+
54
+ attr_reader :base, :factor, :padder
55
+ end
56
+
57
+ class IndexedOrdTable < OrdTable
58
+ def initialize(ords, **opts)
59
+ super(ords, **opts)
60
+
61
+ @forward = ords.each_with_index.to_h
62
+ @backward = Hash[@forward.to_a.collect(&:reverse)]
63
+ @factor = Math.log(256) / Math.log(base)
64
+ end
65
+
66
+ def zero
67
+ @backward[0]
68
+ end
69
+
70
+ def index(byte)
71
+ @forward[byte] || !strict? && (
72
+ @forward[byte.chr.upcase.ord] ||
73
+ @forward[byte.chr.downcase.ord]
74
+ )
75
+ end
76
+
77
+ def ord_at(index)
78
+ @backward[index]
79
+ end
80
+
81
+ def encoded_length(plain_bytes)
82
+ (plain_bytes.length.to_f * factor).ceil
83
+ end
84
+
85
+ def decoded_length(encoded_bytes)
86
+ (encoded_bytes.length / factor).round
87
+ end
88
+
89
+ def encoded_zeroes_length(count)
90
+ # For power of 2 bases, add "canonical-width"
91
+ return (factor * count).floor if pad_to_power?
92
+
93
+ # For other bases, add a equivalent count to front
94
+ count
95
+ end
96
+
97
+ def decoded_zeroes_length(count)
98
+ # For power of 2 bases, add "canonical-width"
99
+ return (count / factor).round if pad_to_power?
100
+
101
+ # For other bases, add a equivalent count to front
102
+ count
103
+ end
104
+
105
+ def pad_to_power?
106
+ (Math.log2(base) % 1).zero?
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ # rubocop:disable Style/MutableConstant
5
+ IMPLEMENTATIONS = {}
6
+ # rubocop:enable Style/MutableConstant
7
+
8
+ Registration = Struct.new(:code, :encoding, :engine) do
9
+ def hash
10
+ encoding.hash
11
+ end
12
+
13
+ def ==(other)
14
+ return [encoding, code].include?(other) if other.is_a?(String)
15
+
16
+ eql?(other)
17
+ end
18
+
19
+ def eql?(other)
20
+ other.is_a?(Registration) && other.encoding == encoding
21
+ end
22
+ end
23
+
24
+ module_function
25
+
26
+ def implement(encoding, code, implementation = nil, alphabet = nil)
27
+ Multibases::IMPLEMENTATIONS[encoding] = Registration.new(
28
+ code,
29
+ encoding,
30
+ implementation&.new(alphabet)
31
+ )
32
+ end
33
+
34
+ def fetch_by!(code: nil, encoding: nil)
35
+ return Multibases::IMPLEMENTATIONS.fetch(encoding) if encoding
36
+
37
+ Multibases.find_by(code: code).tap do |found|
38
+ raise KeyError, "No implementation has code #{code}" unless found
39
+ end
40
+ end
41
+
42
+ def find_by(code: nil, encoding: nil)
43
+ Multibases::IMPLEMENTATIONS.values.find do |v|
44
+ v == code || v == encoding
45
+ end
46
+ end
47
+
48
+ def multibase_version(multibase_semver = nil)
49
+ return @multibase_version if multibase_semver.nil?
50
+
51
+ @multibase_version = multibase_semver
52
+ end
53
+ end