multibases 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,116 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ class Base64
5
+ def inspect
6
+ '[Multibases::Base64 ' \
7
+ "alphabet=\"#{@table.alphabet}\"" \
8
+ "#{@table.padder.nil? ? '' : ' pad="' + @table.padder.chr + '"'}" \
9
+ ']'
10
+ end
11
+
12
+ # RFC 4648 implementation
13
+ def self.encode(plain)
14
+ plain = plain.map(&:chr).join if plain.is_a?(Array)
15
+
16
+ # Base64.strict_encode(plain)
17
+ EncodedByteArray.new(Array(String(plain)).pack('m0').bytes)
18
+ end
19
+
20
+ def self.decode(packed)
21
+ packed = packed.map(&:chr).join if packed.is_a?(Array)
22
+ # Base64.strict_decode64("m").first
23
+ # Don't use m0, as that requires padderding _always_
24
+ DecodedByteArray.new(packed.unpack1('m').bytes)
25
+ end
26
+
27
+ class Table < OrdTable
28
+ def self.from(alphabet, **opts)
29
+ alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
30
+ alphabet.map!(&:ord)
31
+
32
+ new(alphabet, **opts)
33
+ end
34
+
35
+ def initialize(ords, **opts)
36
+ ords = ords.uniq
37
+
38
+ if ords.length < 64 || ords.length > 65
39
+ raise ArgumentError,
40
+ 'Expected alphabet to contain 64 characters or 65 + 1 ' \
41
+ "padding character. Actual: #{ords.length} characters"
42
+ end
43
+
44
+ padder = nil
45
+ *ords, padder = ords if ords.length == 65
46
+
47
+ super(ords, padder: padder, **opts)
48
+ end
49
+ end
50
+
51
+ def initialize(alphabet, strict: false)
52
+ @table = Table.from(alphabet, strict: strict)
53
+ end
54
+
55
+ def encode(plain)
56
+ return EncodedByteArray::EMPTY if plain.empty?
57
+
58
+ encoded = Multibases::Base64.encode(plain)
59
+ encoded.chomp!(Default.table_padder) unless @table.padder
60
+ return encoded if default?
61
+
62
+ encoded.transcode(
63
+ Default.table_ords(force_strict: @table.strict?),
64
+ table_ords
65
+ )
66
+ end
67
+
68
+ def decode(encoded)
69
+ return DecodedByteArray::EMPTY if encoded.empty?
70
+
71
+ unless encoded.is_a?(Array)
72
+ encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
73
+ end
74
+
75
+ unless decodable?(encoded)
76
+ raise ArgumentError, "'#{encoded}' contains unknown characters'"
77
+ end
78
+
79
+ unless default?
80
+ encoded = ByteArray.new(encoded).transcode(
81
+ table_ords,
82
+ Default.table_ords(force_strict: @table.strict?)
83
+ )
84
+ end
85
+
86
+ Multibases::Base64.decode(encoded)
87
+ end
88
+
89
+ def default?
90
+ eql?(Default)
91
+ end
92
+
93
+ def eql?(other)
94
+ other.is_a?(Base64) && other.instance_variable_get(:@table) == @table
95
+ end
96
+
97
+ alias == eql?
98
+
99
+ def decodable?(encoded)
100
+ (encoded.uniq - table_ords).length.zero?
101
+ end
102
+
103
+ def table_ords(force_strict: nil)
104
+ @table.tr_ords(force_strict: force_strict)
105
+ end
106
+
107
+ def table_padder
108
+ @table.padder
109
+ end
110
+
111
+ # rubocop:disable Metrics/LineLength
112
+ Default = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=')
113
+ UrlSafe = Base64.new('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_=')
114
+ # rubocop:enable Metrics/LineLength
115
+ end
116
+ end
@@ -0,0 +1,129 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative './byte_array'
4
+ require_relative './ord_table'
5
+
6
+ module Multibases
7
+ class BaseX
8
+ def inspect
9
+ "[Multibases::Base#{@table.base} " \
10
+ "alphabet=\"#{@table.alphabet}\"" \
11
+ "#{@table.strict? ? ' strict' : ''}" \
12
+ ']'
13
+ end
14
+
15
+ class Table < IndexedOrdTable
16
+ def self.from(alphabet, **opts)
17
+ raise ArgumentError, 'Alphabet too long' if alphabet.length >= 255
18
+
19
+ alphabet = alphabet.bytes if alphabet.respond_to?(:bytes)
20
+ alphabet.map!(&:ord)
21
+
22
+ new(alphabet, **opts)
23
+ end
24
+ end
25
+
26
+ def initialize(alphabet, strict: false)
27
+ @table = Table.from(alphabet, strict: strict)
28
+ end
29
+
30
+ ##
31
+ # Encode +plain+ to an encoded string
32
+ #
33
+ # @param plain [String, Array] plain string or byte array
34
+ # @return [EncodedByteArray] encoded byte array
35
+ #
36
+ def encode(plain)
37
+ return EncodedByteArray::EMPTY if plain.empty?
38
+
39
+ plain = plain.bytes unless plain.is_a?(Array)
40
+ expected_length = @table.encoded_length(plain)
41
+
42
+ # Find leading zeroes
43
+ zeroes_count = [
44
+ 0,
45
+ plain.find_index { |b| b.ord != 0 } || plain.length
46
+ ].max
47
+ plain = plain.drop(zeroes_count)
48
+ expected_length = @table.encoded_length(plain) unless @table.pad_to_power?
49
+
50
+ # Encode number into destination base as byte array
51
+ output = []
52
+ plain_big_number = plain.inject { |a, b| (a << 8) + b.ord }
53
+
54
+ while plain_big_number >= @table.base
55
+ mod = plain_big_number % @table.base
56
+ output.unshift(@table.ord_at(mod))
57
+ plain_big_number = (plain_big_number - mod) / @table.base
58
+ end
59
+
60
+ output.unshift(@table.ord_at(plain_big_number))
61
+
62
+ # Prepend the leading zeroes
63
+ @table.encoded_zeroes_length(zeroes_count).times do
64
+ output.unshift(@table.zero)
65
+ end
66
+
67
+ # Padding at the front (to match expected length). Because of the
68
+ if @table.pad_to_power?
69
+ (expected_length - output.length).times do
70
+ output.unshift(@table.zero)
71
+ end
72
+ end
73
+
74
+ EncodedByteArray.new(output)
75
+ end
76
+
77
+ ##
78
+ # Decode +encoded+ to a byte array
79
+ #
80
+ # @param encoded [String, Array, ByteArray] encoded string or byte array
81
+ # @return [DecodedByteArray] decoded byte array
82
+ #
83
+ def decode(encoded)
84
+ return DecodedByteArray::EMPTY if encoded.empty?
85
+
86
+ unless encoded.is_a?(Array)
87
+ encoded = encoded.force_encoding(Encoding::ASCII_8BIT).bytes
88
+ end
89
+
90
+ unless decodable?(encoded)
91
+ raise ArgumentError, "'#{encoded}' contains unknown characters'"
92
+ end
93
+
94
+ # Find leading zeroes
95
+ zeroes_count = [
96
+ 0,
97
+ encoded.find_index { |b| b.ord != @table.zero } || encoded.length
98
+ ].max
99
+ encoded = encoded.drop(zeroes_count)
100
+
101
+ # Decode number from encoding base to base 10
102
+ encoded_big_number = 0
103
+
104
+ encoded.reverse.each_with_index do |char, i|
105
+ table_i = @table.index(char)
106
+ encoded_big_number += @table.base**i * table_i
107
+ end
108
+
109
+ # Build the output by reversing the bytes. Because the encoding is "lost"
110
+ # the result might not be correct just yet. This is up to the caller to
111
+ # fix. The algorithm **can not know** what the encoding was.
112
+ output = 1.upto((Math.log2(encoded_big_number) / 8).ceil).collect do
113
+ encoded_big_number, character_byte = encoded_big_number.divmod 256
114
+ character_byte
115
+ end.reverse
116
+
117
+ # Prepend the leading zeroes
118
+ @table.decoded_zeroes_length(zeroes_count).times do
119
+ output.unshift(0x00)
120
+ end
121
+
122
+ DecodedByteArray.new(output)
123
+ end
124
+
125
+ def decodable?(encoded)
126
+ (encoded.uniq - @table.tr_ords).length.zero?
127
+ end
128
+ end
129
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ class ByteArray < DelegateClass(Array)
5
+ def hash
6
+ __getobj__.hash
7
+ end
8
+
9
+ def eql?(other)
10
+ other.to_s.eql?(to_s)
11
+ end
12
+
13
+ def to_arr
14
+ __getobj__.dup
15
+ end
16
+
17
+ def is_a?(klazz)
18
+ super || __getobj__.is_a?(klazz)
19
+ end
20
+
21
+ def transcode(from, to)
22
+ from = from.each_with_index.to_h
23
+ to = Hash[to.each_with_index.to_a.collect(&:reverse)]
24
+
25
+ self.class.new(map { |byte| to[from[byte]] })
26
+ end
27
+
28
+ alias to_a to_arr
29
+ alias kind_of? is_a?
30
+ end
31
+
32
+ class EncodedByteArray < ByteArray
33
+ def inspect
34
+ "[Multibases::EncodedByteArray \"#{to_str}\"]"
35
+ end
36
+
37
+ def to_str
38
+ map(&:chr).join.encode(Encoding::ASCII_8BIT)
39
+ end
40
+
41
+ def chomp!(ord)
42
+ return self unless ord
43
+
44
+ __getobj__.reverse!
45
+ index = __getobj__.find_index { |el| el != ord }
46
+ __getobj__.slice!(0, index) unless index.nil?
47
+ __getobj__.reverse!
48
+
49
+ self
50
+ end
51
+
52
+ alias to_s to_str
53
+ end
54
+
55
+ class DecodedByteArray < ByteArray
56
+ def inspect
57
+ "[Multibases::DecodedByteArray \"#{to_str}\"]"
58
+ end
59
+
60
+ def to_str(encoding = Encoding::UTF_8)
61
+ map(&:chr).join.force_encoding(encoding)
62
+ end
63
+
64
+ def force_encoding(*args)
65
+ to_str(*args)
66
+ end
67
+
68
+ alias to_s to_str
69
+ end
70
+
71
+ EncodedByteArray.const_set(:EMPTY, EncodedByteArray.new([]))
72
+ DecodedByteArray.const_set(:EMPTY, DecodedByteArray.new([]))
73
+ end
@@ -0,0 +1,109 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ class OrdTable
5
+ def initialize(ords, strict:, padder: nil)
6
+ ords = ords.uniq
7
+
8
+ @ords = ords
9
+ @base = ords.length
10
+ @padder = padder
11
+
12
+ chars = ords.map(&:chr)
13
+ chars_downcased = chars.map(&:downcase).uniq
14
+ chars_upcased = chars.map(&:upcase).uniq
15
+ chars_cased = chars_upcased - chars_downcased
16
+
17
+ # Strict means that the algorithm may _not_ treat incorrectly cased
18
+ # input the same as correctly cased input. In other words, the table is
19
+ # strict if a character exists that is both upcased and downcased and
20
+ # therefore has a canonical casing.
21
+ @strict = strict ||
22
+ chars_cased.empty? ||
23
+ chars.length != chars_downcased.length
24
+
25
+ @loose_ords = (chars + chars_downcased + chars_upcased).uniq.map(&:ord)
26
+ end
27
+
28
+ def eql?(other)
29
+ other.is_a?(OrdTable) &&
30
+ other.alphabet == alphabet &&
31
+ other.strict? == strict?
32
+ end
33
+
34
+ alias == eql?
35
+
36
+ def hash
37
+ @ords.hash
38
+ end
39
+
40
+ def strict?
41
+ @strict
42
+ end
43
+
44
+ def tr_ords(force_strict: false)
45
+ return @ords + [@padder].compact if strict? || force_strict
46
+
47
+ @loose_ords + [@padder].compact
48
+ end
49
+
50
+ def alphabet
51
+ @ords.map(&:chr).join
52
+ end
53
+
54
+ attr_reader :base, :factor, :padder
55
+ end
56
+
57
+ class IndexedOrdTable < OrdTable
58
+ def initialize(ords, **opts)
59
+ super(ords, **opts)
60
+
61
+ @forward = ords.each_with_index.to_h
62
+ @backward = Hash[@forward.to_a.collect(&:reverse)]
63
+ @factor = Math.log(256) / Math.log(base)
64
+ end
65
+
66
+ def zero
67
+ @backward[0]
68
+ end
69
+
70
+ def index(byte)
71
+ @forward[byte] || !strict? && (
72
+ @forward[byte.chr.upcase.ord] ||
73
+ @forward[byte.chr.downcase.ord]
74
+ )
75
+ end
76
+
77
+ def ord_at(index)
78
+ @backward[index]
79
+ end
80
+
81
+ def encoded_length(plain_bytes)
82
+ (plain_bytes.length.to_f * factor).ceil
83
+ end
84
+
85
+ def decoded_length(encoded_bytes)
86
+ (encoded_bytes.length / factor).round
87
+ end
88
+
89
+ def encoded_zeroes_length(count)
90
+ # For power of 2 bases, add "canonical-width"
91
+ return (factor * count).floor if pad_to_power?
92
+
93
+ # For other bases, add a equivalent count to front
94
+ count
95
+ end
96
+
97
+ def decoded_zeroes_length(count)
98
+ # For power of 2 bases, add "canonical-width"
99
+ return (count / factor).round if pad_to_power?
100
+
101
+ # For other bases, add a equivalent count to front
102
+ count
103
+ end
104
+
105
+ def pad_to_power?
106
+ (Math.log2(base) % 1).zero?
107
+ end
108
+ end
109
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Multibases
4
+ # rubocop:disable Style/MutableConstant
5
+ IMPLEMENTATIONS = {}
6
+ # rubocop:enable Style/MutableConstant
7
+
8
+ Registration = Struct.new(:code, :encoding, :engine) do
9
+ def hash
10
+ encoding.hash
11
+ end
12
+
13
+ def ==(other)
14
+ return [encoding, code].include?(other) if other.is_a?(String)
15
+
16
+ eql?(other)
17
+ end
18
+
19
+ def eql?(other)
20
+ other.is_a?(Registration) && other.encoding == encoding
21
+ end
22
+ end
23
+
24
+ module_function
25
+
26
+ def implement(encoding, code, implementation = nil, alphabet = nil)
27
+ Multibases::IMPLEMENTATIONS[encoding] = Registration.new(
28
+ code,
29
+ encoding,
30
+ implementation&.new(alphabet)
31
+ )
32
+ end
33
+
34
+ def fetch_by!(code: nil, encoding: nil)
35
+ return Multibases::IMPLEMENTATIONS.fetch(encoding) if encoding
36
+
37
+ Multibases.find_by(code: code).tap do |found|
38
+ raise KeyError, "No implementation has code #{code}" unless found
39
+ end
40
+ end
41
+
42
+ def find_by(code: nil, encoding: nil)
43
+ Multibases::IMPLEMENTATIONS.values.find do |v|
44
+ v == code || v == encoding
45
+ end
46
+ end
47
+
48
+ def multibase_version(multibase_semver = nil)
49
+ return @multibase_version if multibase_semver.nil?
50
+
51
+ @multibase_version = multibase_semver
52
+ end
53
+ end