oak 0.0.3 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +51 -0
- data/.rubocop.yml +74 -0
- data/.travis.yml +17 -0
- data/CHANGELOG.md +24 -0
- data/DESIDERATA.md +318 -0
- data/Gemfile +3 -15
- data/LICENSE +22 -0
- data/Makefile +113 -0
- data/README.md +163 -23
- data/Rakefile +6 -47
- data/bin/oak +242 -3
- data/bin/oak.rb +245 -0
- data/lib/oak.rb +1049 -86
- data/lib/oak/version.rb +3 -0
- data/oak.gemspec +29 -65
- metadata +121 -71
- data/.document +0 -5
- data/Gemfile.lock +0 -26
- data/LICENSE.txt +0 -20
- data/VERSION +0 -1
- data/test/files/config/application.rb +0 -3
- data/test/files/config/database.yml +0 -25
- data/test/files/config/initializers/secret_token.rb +0 -7
- data/test/files/dot_gitignore +0 -0
- data/test/helper.rb +0 -29
- data/test/test_oak.rb +0 -44
data/bin/oak.rb
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# oak.rb: cli driver for encoding strings in the OAK format.
|
4
|
+
#
|
5
|
+
# author: jhw@prosperworks.com
|
6
|
+
# incept: 2016-03-05
|
7
|
+
#
|
8
|
+
|
9
|
+
require_relative '../lib/oak.rb'
|
10
|
+
require 'optimist'
|
11
|
+
|
12
|
+
OLD_ARGV = ARGV.dup # ARGV is consumed by Optimist but we use later.
|
13
|
+
OPTS = Optimist.options do
|
14
|
+
banner "#{$0} cli driver for OAK"
|
15
|
+
banner <<-OPTIMIST_EXAMPLES
|
16
|
+
Examples:
|
17
|
+
$ echo hello | bin/oak.rb
|
18
|
+
oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
|
19
|
+
$ (echo hello ; echo world) | bin/oak.rb
|
20
|
+
oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
|
21
|
+
oak_3CNB_2139413982_15_RjFTVTVfd29ybGQ_ok
|
22
|
+
$ (echo hello ; echo world) | bin/oak.rb --compression zlib --force
|
23
|
+
oak_3CZB_1944283675_26_eJxzMwwONY3PSM3JyQcAFF4DyA_ok
|
24
|
+
oak_3CZB_2139413982_26_eJxzMwwONY0vzy_KSQEAFNgD3A_ok
|
25
|
+
$ (echo hello ; echo world) | bin/oak.rb --format none
|
26
|
+
oak_3CNN_1944283675_11_F1SU5_hello_ok
|
27
|
+
oak_3CNN_2139413982_11_F1SU5_world_ok
|
28
|
+
$ (echo hello ; echo world) | bin/oak.rb | bin/oak.rb --mode decode-lines
|
29
|
+
hello
|
30
|
+
world
|
31
|
+
OPTIMIST_EXAMPLES
|
32
|
+
banner "Options:"
|
33
|
+
version "#{$0} #{OAK::VERSION}"
|
34
|
+
opt :redundancy, 'redundancy', :default => 'crc32'
|
35
|
+
opt :format, 'format', :default => 'base64'
|
36
|
+
opt :compression, 'compression', :default => 'none'
|
37
|
+
opt :force, 'compress even if bigger', :default => false
|
38
|
+
opt :mode, 'mode', :default => 'encode-lines'
|
39
|
+
opt :key_chain, 'key chain env name', :type => :string
|
40
|
+
opt :key, 'encrypt key name', :type => :string
|
41
|
+
opt :key_check, 'check available keys', :default => false
|
42
|
+
opt :key_generate, 'generate new key', :default => false
|
43
|
+
opt :force_oak_4, 'force OAK_4 even unencrypted', :default => false
|
44
|
+
opt :eigen, 'calc eigenratio', :type => :int
|
45
|
+
opt :self_test, 'self-test only', :default => false
|
46
|
+
opt :help, 'show this help'
|
47
|
+
end
|
48
|
+
Optimist::die :eigen, "must be non-negative" if OPTS[:eigen] && OPTS[:eigen] < 0
|
49
|
+
|
50
|
+
oak_opts = {}
|
51
|
+
oak_opts[:redundancy] = OPTS[:redundancy]
|
52
|
+
oak_opts[:compression] = OPTS[:compression]
|
53
|
+
oak_opts[:force] = OPTS[:force]
|
54
|
+
oak_opts[:format] = OPTS[:format]
|
55
|
+
oak_opts[:key_chain] = OAK.parse_env_chain(ENV,OPTS[:key_chain])
|
56
|
+
oak_opts[:key] = OPTS[:key]
|
57
|
+
oak_opts[:force_oak_4] = OPTS[:force_oak_4]
|
58
|
+
|
59
|
+
if !OAK::REDUNDANCY_2_CODE.keys.include?(oak_opts[:redundancy])
|
60
|
+
Optimist::die :redundancy, "bogus #{OPTS[:redundancy]}"
|
61
|
+
end
|
62
|
+
if !OAK::COMPRESSION_2_CODE.keys.include?(oak_opts[:compression])
|
63
|
+
Optimist::die :compression, "bogus #{OPTS[:compression]}"
|
64
|
+
end
|
65
|
+
cool_formats = OAK::FORMAT_2_CODE.keys
|
66
|
+
if !cool_formats.include?(oak_opts[:format])
|
67
|
+
Optimist::die :format, "bogus #{OPTS[:format]} not in #{cool_formats}"
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin
|
71
|
+
|
72
|
+
doctest: simple transcoding
|
73
|
+
>> OAK::decode(OAK::encode([1,"2",3.000001]))
|
74
|
+
=> [1,"2",3.000001]
|
75
|
+
>> OAK::decode(OAK::encode({foo: "bar"}))
|
76
|
+
=> {foo: "bar"}
|
77
|
+
>> OAK::decode(OAK::encode({foo: :bar}))
|
78
|
+
=> {foo: :bar}
|
79
|
+
>> OAK::decode(OAK::encode("Hello, World!"))
|
80
|
+
=> "Hello, World!"
|
81
|
+
>> OAK::decode(OAK::encode("Hello, World!", format: :none, redundancy: :none))
|
82
|
+
=> "Hello, World!"
|
83
|
+
|
84
|
+
doctest: stability of encoding
|
85
|
+
>> OAK::decode("oak_3NNB_0_30_RjNIMV8xXzJZQTNfZm9vU1UzX2Jhcg_ok")
|
86
|
+
=> {:foo=>"bar"}
|
87
|
+
>> OAK::encode(1, format: :base64, redundancy: :none)
|
88
|
+
=> "oak_3NNB_0_6_RjFJMQ_ok"
|
89
|
+
>> OAK::encode(1, format: :base64, redundancy: :crc32)
|
90
|
+
=> "oak_3CNB_3405226796_6_RjFJMQ_ok"
|
91
|
+
>> OAK::encode(1, format: :none, redundancy: :crc32)
|
92
|
+
=> "oak_3CNN_3405226796_4_F1I1_ok"
|
93
|
+
>> hello_utf8 = "Hello, World!".force_encoding('UTF-8')
|
94
|
+
=> "Hello, World!"
|
95
|
+
>> OAK::encode(hello_utf8, format: :base64, redundancy: :none)
|
96
|
+
=> "oak_3NNB_0_27_RjFTVTEzX0hlbGxvLCBXb3JsZCE_ok"
|
97
|
+
>> OAK::encode(hello_utf8, format: :none, redundancy: :crc32)
|
98
|
+
=> "oak_3CNN_2351984628_20_F1SU13_Hello, World!_ok"
|
99
|
+
|
100
|
+
Note above I used force_encoding('UTF-8') after discovering that with
|
101
|
+
Ruby 2.1.6 on Mac I get Encoding.default_encoding is UTF-8, but with
|
102
|
+
Ruby 2.1.6 on Linux I get Encoding.default_encoding is US-ASCII!
|
103
|
+
|
104
|
+
=end
|
105
|
+
|
106
|
+
if __FILE__ == $0
|
107
|
+
if OPTS[:self_test]
|
108
|
+
require 'rubydoctest'
|
109
|
+
exit RubyDocTest::Runner.new(File.read(__FILE__), __FILE__).run ? 0 : 1
|
110
|
+
end
|
111
|
+
if OPTS[:key_check]
|
112
|
+
if !OPTS[:key_chain]
|
113
|
+
puts "no --key-chain specified"
|
114
|
+
else
|
115
|
+
keys = oak_opts[:key_chain].keys.keys
|
116
|
+
if 0 == keys.size
|
117
|
+
puts "#{OPTS[:key_chain]}: no keys found"
|
118
|
+
else
|
119
|
+
puts "#{OPTS[:key_chain]}: found keys: #{keys.join(' ')}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
if OPTS[:key_generate]
|
124
|
+
STDOUT.puts OAK.encode(OAK.random_key)
|
125
|
+
exit 0
|
126
|
+
end
|
127
|
+
if !$stdin.tty?
|
128
|
+
if OPTS[:eigen]
|
129
|
+
prev = STDIN.read
|
130
|
+
puts "input: %d" % prev.size
|
131
|
+
OPTS[:eigen].times do |i|
|
132
|
+
oak = OAK.encode(prev,oak_opts)
|
133
|
+
psize = prev.size
|
134
|
+
wsize = oak.size
|
135
|
+
ratio = 1.0 * wsize / psize
|
136
|
+
puts " iter %3d: %4d => %4d ratio %.2f" % [i,psize,wsize,ratio]
|
137
|
+
prev = oak
|
138
|
+
end
|
139
|
+
exit 0
|
140
|
+
end
|
141
|
+
unhappiness = 0
|
142
|
+
case OPTS[:mode]
|
143
|
+
when 'cat'
|
144
|
+
ARGF.each_line.map(&:strip).each do |line|
|
145
|
+
puts line
|
146
|
+
end
|
147
|
+
when 'encode-lines'
|
148
|
+
ARGF.each_line.map(&:strip).each do |line|
|
149
|
+
puts OAK.encode(line,oak_opts)
|
150
|
+
end
|
151
|
+
when 'decode-lines'
|
152
|
+
ARGF.each_line.map(&:strip).each do |line|
|
153
|
+
puts OAK.decode(line,oak_opts)
|
154
|
+
end
|
155
|
+
when 'encode-file'
|
156
|
+
puts OAK.encode(STDIN.read,oak_opts)
|
157
|
+
when 'decode-file'
|
158
|
+
STDOUT.write OAK.decode(STDIN.read.strip,oak_opts)
|
159
|
+
when 'recode-file'
|
160
|
+
puts OAK.encode(OAK.decode(STDIN.read,oak_opts),oak_opts)
|
161
|
+
when 'crazy'
|
162
|
+
#
|
163
|
+
# --mode crazy prints out a sample of OAK strings for various
|
164
|
+
# challenging cases.
|
165
|
+
#
|
166
|
+
cycle_a = ['cycle_a','TBD']
|
167
|
+
cycle_b = ['cycle_b',cycle_a]
|
168
|
+
cycle_a[1] = cycle_b
|
169
|
+
dag_c = ['dag_c']
|
170
|
+
dag_b = ['dag_b',dag_c]
|
171
|
+
dag_a = ['dag_a',dag_b,dag_c]
|
172
|
+
[
|
173
|
+
'hello',
|
174
|
+
['hello'] + ['hello',:hello] * 2,
|
175
|
+
{1=>'a','b'=>2,[]=>3,''=>4,{}=>5,nil=>6},
|
176
|
+
['x','x','x','x','x','x','x','x','x','x','x','x','x'],
|
177
|
+
['x'] * 13,
|
178
|
+
cycle_a,
|
179
|
+
dag_a,
|
180
|
+
[1,-123,0.12,-0.123,Float::NAN,-Float::INFINITY,3.14159265358979],
|
181
|
+
].each do |obj|
|
182
|
+
oak = OAK.encode(
|
183
|
+
obj,
|
184
|
+
redundancy: :crc32,
|
185
|
+
format: :none,
|
186
|
+
compression: :none,
|
187
|
+
)
|
188
|
+
puts ""
|
189
|
+
puts "obj: #{obj}"
|
190
|
+
puts " oak: #{oak}"
|
191
|
+
begin
|
192
|
+
dec = OAK.decode(oak,oak_opts)
|
193
|
+
if dec != obj
|
194
|
+
if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
|
195
|
+
unhappiness += 1
|
196
|
+
puts " BAD: #{dec}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
rescue OAK::CantTouchThisStringError => ex
|
200
|
+
puts " BAD: #{ex.message}: #{ex.backtrace_locations[0]}"
|
201
|
+
unhappiness += 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
when 'tests'
|
205
|
+
[
|
206
|
+
[1,2,3],
|
207
|
+
{:foo=>'foo','foo'=>['x']*10},
|
208
|
+
-1,
|
209
|
+
Float::NAN,
|
210
|
+
nil,
|
211
|
+
].each do |obj|
|
212
|
+
puts " #{obj} => ["
|
213
|
+
key_chain = OAK::KeyChain.new(
|
214
|
+
{ 'l0ng3r' => OAK::Key.new('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') }
|
215
|
+
)
|
216
|
+
[
|
217
|
+
{redundancy: :none, format: :none, compression: :none },
|
218
|
+
{redundancy: :none, format: :base64,compression: :lz4, force: true},
|
219
|
+
{redundancy: :crc32,format: :base64,compression: :zlib, force: true},
|
220
|
+
{redundancy: :crc32,format: :base64,compression: :bzip2,force: true},
|
221
|
+
{redundancy: :sha1, format: :base64,compression: :lzma, force: true},
|
222
|
+
{key_chain: key_chain,force_oak_4: true,format: :none, },
|
223
|
+
{key_chain: key_chain,force_oak_4: true, },
|
224
|
+
{key_chain: key_chain,key: 'l0ng3r', },
|
225
|
+
].each do |opts|
|
226
|
+
oak = OAK.encode(obj,opts)
|
227
|
+
puts " '#{oak}',"
|
228
|
+
dec = OAK.decode(oak,opts)
|
229
|
+
if dec != obj
|
230
|
+
if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
|
231
|
+
unhappiness += 1
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
puts " ],"
|
236
|
+
end
|
237
|
+
else
|
238
|
+
Optimist::die :mode, "bogus mode #{OPTS[:mode]}"
|
239
|
+
end
|
240
|
+
if unhappiness > 0
|
241
|
+
puts "unhappiness: #{unhappiness}"
|
242
|
+
end
|
243
|
+
exit unhappiness
|
244
|
+
end
|
245
|
+
end
|
data/lib/oak.rb
CHANGED
@@ -1,122 +1,1085 @@
|
|
1
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
#
|
3
|
+
# OAK: An encoding format with enough polymorphism to support run-time
|
4
|
+
# performance experimentation and some light encryption-at-rest.
|
5
|
+
#
|
6
|
+
# author: jhw@prosperworks.com
|
7
|
+
# incept: 2016-03-02
|
2
8
|
|
3
|
-
|
4
|
-
|
5
|
-
|
9
|
+
require_relative 'oak/version'
|
10
|
+
require 'strscan'
|
11
|
+
require 'digest'
|
12
|
+
require 'base64'
|
13
|
+
require 'lz4-ruby'
|
14
|
+
require 'zlib'
|
15
|
+
require 'bzip2/ffi'
|
16
|
+
require 'lzma'
|
17
|
+
require 'openssl'
|
6
18
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
19
|
+
module OAK
|
20
|
+
|
21
|
+
# CantTouchThisObjectError is thrown when encode() or serialize() is
|
22
|
+
# called on an object which cannot be encoded losslessly by OAK.
|
23
|
+
#
|
24
|
+
class CantTouchThisObjectError < ArgumentError ; end
|
25
|
+
|
26
|
+
# CantTouchThisStringError is thrown when decode(), deserialize(),
|
27
|
+
# or unwrap() called on a String which cannot be decoded.
|
28
|
+
#
|
29
|
+
class CantTouchThisStringError < ArgumentError ; end
|
30
|
+
|
31
|
+
# Internal syntactic conveniences.
|
32
|
+
#
|
33
|
+
BAD_OBJ = CantTouchThisObjectError
|
34
|
+
BAD_STR = CantTouchThisStringError
|
35
|
+
|
36
|
+
# OAK_4 supports one and only one encryption algorithm and mode of
|
37
|
+
# operation.
|
38
|
+
#
|
39
|
+
# - AES-256-GCM
|
40
|
+
# - 128 bits of security
|
41
|
+
# - 256-bit keys (32 bytes)
|
42
|
+
# - 96-bit IVs (12 bytes)
|
43
|
+
# - 128-bit auth_tags (16 bytes)
|
44
|
+
# - Random IV ("Initialization Vector") for each encryption op
|
45
|
+
# - All headers authenticated.
|
46
|
+
# - Headers encrypted when not required for decryption.
|
47
|
+
#
|
48
|
+
ENCRYPTION_ALGO_NAME = 'aes-256-gcm'.freeze
|
49
|
+
ENCRYPTION_ALGO_IV_BYTES = 12 # AES-256-GCM has 96-bit IVs
|
50
|
+
ENCRYPTION_ALGO_AUTH_TAG_BYTES = 16 # AES-256-GCM has 128-bit auth, we use all
|
51
|
+
|
52
|
+
# Get a new instance of OpenSSL::Cipher for our algorithm.
|
53
|
+
#
|
54
|
+
def self.encryption_algo
|
55
|
+
OpenSSL::Cipher.new(ENCRYPTION_ALGO_NAME)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Generate a new random key appropriate for the OAK_4 encryption
|
59
|
+
# algorithm.
|
60
|
+
#
|
61
|
+
def self.random_key
|
62
|
+
encryption_algo.random_key
|
63
|
+
end
|
64
|
+
|
65
|
+
# Generate a new random initialization vector appropriate for the
|
66
|
+
# OAK_4 encryption algorithm.
|
67
|
+
#
|
68
|
+
def self.random_iv
|
69
|
+
encryption_algo.random_iv
|
70
|
+
end
|
71
|
+
|
72
|
+
class Key
|
73
|
+
|
74
|
+
# @param key String encryption key suitable for AES-256,
|
75
|
+
# specifically a binary string of 32 bytes (256 bits),
|
76
|
+
# randomly-generated and kept very, very secret.
|
77
|
+
#
|
78
|
+
def initialize(key)
|
79
|
+
if !key.is_a?(String)
|
80
|
+
raise ArgumentError, "bad non-String key: ELIDED"
|
81
|
+
end
|
82
|
+
rk_size = OAK.random_key.size
|
83
|
+
if key.size != rk_size
|
84
|
+
raise ArgumentError, "bad key ELIDED, length not #{rk_size}"
|
85
|
+
end
|
86
|
+
@key = key.dup.freeze # happy :)
|
87
|
+
end
|
88
|
+
|
89
|
+
attr_reader :key
|
90
|
+
|
91
|
+
def inspect
|
92
|
+
#
|
93
|
+
# Avoid exposing the key in casual logs or console session.
|
94
|
+
#
|
95
|
+
to_s[0..-2] + " @key=ELIDED>"
|
16
96
|
end
|
97
|
+
|
17
98
|
end
|
18
99
|
|
19
|
-
|
100
|
+
class KeyChain
|
20
101
|
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
102
|
+
def initialize(keys)
|
103
|
+
if !keys.is_a?(Hash)
|
104
|
+
raise ArgumentError, "bogus keys #{keys}"
|
105
|
+
end
|
106
|
+
keys.each do |k,v|
|
107
|
+
if !k.is_a?(String)
|
108
|
+
raise ArgumentError, "bogus key #{k} in keys #{keys}"
|
109
|
+
end
|
110
|
+
if /^[a-zA-Z][0-9a-zA-Z]*$/ !~ k
|
111
|
+
#
|
112
|
+
# In oak_4, we restrict key names to sequences which look
|
113
|
+
# like code identifiers: alphanumeric strings which start
|
114
|
+
# with a letter.
|
115
|
+
#
|
116
|
+
# This keeps the encoding simple but compact.
|
117
|
+
#
|
118
|
+
raise ArgumentError, "bad key #{k} in keys #{keys}"
|
119
|
+
end
|
120
|
+
if !v.is_a?(Key)
|
121
|
+
raise ArgumentError, "bogus val #{v} at #{k} in keys #{keys}"
|
25
122
|
end
|
26
123
|
end
|
124
|
+
#
|
125
|
+
# We are a happy KeyChain object now!
|
126
|
+
#
|
127
|
+
@keys = keys.dup.freeze
|
128
|
+
end
|
129
|
+
|
130
|
+
attr_reader :keys
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
# Parses a KeyChain object and keys from an ENV-like object.
|
135
|
+
#
|
136
|
+
# E.g. if the ENV contains:
|
137
|
+
#
|
138
|
+
# FOO_KEYS=a,b
|
139
|
+
# FOO_KEY_a=#{OAK.encode(<binary key>)}
|
140
|
+
# FOO_KEY_b=#{OAK.encode(<binary key>)}
|
141
|
+
#
|
142
|
+
# ...then the call OAK.parse_key_chain(ENV,'FOO') will return a new
|
143
|
+
# OAK::KeyChain with two OAK::Keys, 'a' and 'b'.
|
144
|
+
#
|
145
|
+
# This self-referential (but not recursive!) use of OAK to encode
|
146
|
+
# the key and iv is to avoid the problems with binary strings in ENV
|
147
|
+
# variables, 'heroku config:set' command line arguments, etc.
|
148
|
+
#
|
149
|
+
# @param env ENV or an ENV-like Hash from String to String.
|
150
|
+
#
|
151
|
+
# @param name String the root token
|
152
|
+
#
|
153
|
+
# @returns a new OAK::KeyChain
|
154
|
+
#
|
155
|
+
def self.parse_env_chain(env,name)
|
156
|
+
key_names = (env["#{name}_KEYS"] || '').gsub(/^[, ]*/,'').split(/[ ,]+/)
|
157
|
+
keys = key_names.map do |key_name|
|
158
|
+
key = OAK.decode(env["#{name}_KEY_#{key_name}"] || '')
|
159
|
+
[ key_name, Key.new(key) ]
|
160
|
+
end.to_h
|
161
|
+
KeyChain.new(keys)
|
162
|
+
end
|
163
|
+
|
164
|
+
##########################################################################
|
165
|
+
#
|
166
|
+
# encode() and decode() are the top layer
|
167
|
+
#
|
168
|
+
# They coordinate the structure layer and the byte layer.
|
169
|
+
#
|
170
|
+
# These are the recommended entry points for most callers.
|
171
|
+
#
|
172
|
+
##########################################################################
|
173
|
+
|
174
|
+
# Encodes suitable objects string into OAK strings.
|
175
|
+
#
|
176
|
+
# Is inverted by decode(). For all obj, if encode(obj) does not
|
177
|
+
# raise an exception, decode(encode(obj)) == obj.
|
178
|
+
#
|
179
|
+
# @param obj to encode
|
180
|
+
#
|
181
|
+
# @param redundancy 'none', 'crc32' (default), or 'sha1'
|
182
|
+
#
|
183
|
+
# @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', 'lzma'
|
184
|
+
#
|
185
|
+
# @param force false (default), or true. When true, always
|
186
|
+
# compress. When false, fall back to the
|
187
|
+
# original if the compressed form is larger.
|
188
|
+
#
|
189
|
+
# @param key_chain OAK::KeyChain from which to draw the encryption
|
190
|
+
# key, or nil for none.
|
191
|
+
#
|
192
|
+
# @param key String name of a key in key_chain to be used
|
193
|
+
# for encryption, or nil if none.
|
194
|
+
#
|
195
|
+
# @param format 'none', 'base64' (default)
|
196
|
+
#
|
197
|
+
# @param force_oak_4 Bool, for debugging, force oak_4 encoding even
|
198
|
+
# if no encryption key is specified.
|
199
|
+
#
|
200
|
+
# @param debug_iv String, force encryption with a known IV, TEST ONLY!
|
201
|
+
#
|
202
|
+
# WARNING: Use of debug_iv jeopardizes the security of all messages
|
203
|
+
# *ever* encrypted with that key! Never use debug_iv in production!
|
204
|
+
#
|
205
|
+
# @raises ArgumentError if obj is not handled.
|
206
|
+
#
|
207
|
+
def self.encode(obj,opts={})
|
208
|
+
ser = _serialize(obj)
|
209
|
+
_wrap(ser,opts)
|
210
|
+
end
|
27
211
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
212
|
+
# Decodes suitable OAK strings into objects.
|
213
|
+
#
|
214
|
+
# Inverts encode().
|
215
|
+
#
|
216
|
+
# @param str String to decode
|
217
|
+
#
|
218
|
+
# @param key_chain OAK::KeyChain in which to look for keys to
|
219
|
+
# decrypt encrypted OAK strings, or nil for none.
|
220
|
+
#
|
221
|
+
# @returns obj String to decode
|
222
|
+
#
|
223
|
+
# @raises ArgumentError if str is not a recognized string.
|
224
|
+
#
|
225
|
+
def self.decode(str,opts={})
|
226
|
+
if !str.is_a?(String)
|
227
|
+
raise ArgumentError, "str not a String"
|
228
|
+
end
|
229
|
+
ser = _unwrap(str,opts)
|
230
|
+
_deserialize(ser)
|
231
|
+
end
|
232
|
+
|
233
|
+
##########################################################################
|
234
|
+
#
|
235
|
+
# serialize() and deserialize() are the structure layer
|
236
|
+
#
|
237
|
+
# They are responsible for interconverting between objects and naive
|
238
|
+
# strings.
|
239
|
+
#
|
240
|
+
# This layer is analagous to TAR for files or JSON: it converts
|
241
|
+
# structure into string and vice-versa.
|
242
|
+
#
|
243
|
+
##########################################################################
|
244
|
+
|
245
|
+
# Serializes suitable objects string into naive strings.
|
246
|
+
#
|
247
|
+
# Is inverted by deserialize(). For all obj, if serialize(obj) does
|
248
|
+
# not raise an exception, deserialize(serialize(obj)) == obj.
|
249
|
+
#
|
250
|
+
# @raises CantTouchThisObjectError if obj contains any types or
|
251
|
+
# structure which cannot be encoded reversibly by OAK.
|
252
|
+
#
|
253
|
+
def self._serialize(obj)
|
254
|
+
seen,_reseen = _safety_dance(obj) do |child|
|
255
|
+
next if ALL_TYPES.select{ |type| child.is_a?(type) }.size > 0
|
256
|
+
raise CantTouchThisObjectError, "#{child.class} not supported: #{child}"
|
257
|
+
end
|
258
|
+
strt = Hash.new # string table, str => id for strings already encoded
|
259
|
+
ser = 'F'
|
260
|
+
ser << seen.size.to_s
|
261
|
+
seen.each_with_index do |(_object_id,(_idx2,child)),_idx|
|
262
|
+
#
|
263
|
+
# First, identify the unique apex type in TYPE_2_CODE.keys
|
264
|
+
# which matches the child.
|
265
|
+
#
|
266
|
+
# child.class may not be listed explicitly, such as for Fixnum
|
267
|
+
# and Bigint both being Integer, so we search and assert
|
268
|
+
# uniqueness and existence.
|
269
|
+
#
|
270
|
+
is_as = ALL_TYPES.select{ |type| child.is_a?(type) }
|
271
|
+
raise CantTouchThisObjectError if 1 != is_as.size
|
272
|
+
type = is_as[0]
|
273
|
+
typecode = TYPE_2_CODE[type]
|
274
|
+
if nil == child || true == child || false == child
|
275
|
+
#
|
276
|
+
# The type code by itself is sufficient to decode NilType,
|
277
|
+
# TrueType, and FalseType. We need use other space for them.
|
278
|
+
#
|
279
|
+
ser << typecode
|
280
|
+
next
|
281
|
+
end
|
282
|
+
if child.is_a?(Symbol) || child.is_a?(String)
|
283
|
+
#
|
284
|
+
# Strings and Symbols encode as their size in chars followed
|
285
|
+
# by their bytes.
|
286
|
+
#
|
287
|
+
# We maintain a running string table, strt, to recognize when
|
288
|
+
# we encounter a string representation which has been
|
289
|
+
# previously encoded.
|
290
|
+
#
|
291
|
+
# If we find such a duplicate, we encode the current string
|
292
|
+
# via a back reference to the first one we saw. This is
|
293
|
+
# indicated by downcasing the typecode.
|
294
|
+
#
|
295
|
+
str = child.to_s
|
296
|
+
enc = str.encoding
|
297
|
+
enc_code = nil
|
298
|
+
case enc
|
299
|
+
when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::ASCII
|
300
|
+
enc_code = 'A'
|
301
|
+
when Encoding::UTF_8
|
302
|
+
enc_code = 'U'
|
303
|
+
else
|
304
|
+
raise CantTouchThisObjectError, "unknown string encoding #{enc}"
|
34
305
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
306
|
+
if strt.has_key?(str)
|
307
|
+
ser << typecode.downcase # downcase indicates strt reference
|
308
|
+
ser << enc_code
|
309
|
+
ser << strt[str].to_s
|
310
|
+
else
|
311
|
+
ser << typecode # upcase indicates full representation
|
312
|
+
ser << enc_code
|
313
|
+
ser << str.bytesize.to_s
|
314
|
+
if str.bytesize > 0
|
315
|
+
ser << '_'
|
316
|
+
ser << str
|
317
|
+
end
|
318
|
+
strt[str] = strt.size
|
38
319
|
end
|
320
|
+
next
|
39
321
|
end
|
40
|
-
|
322
|
+
if child.is_a?(Numeric)
|
323
|
+
#
|
324
|
+
# Numerics primitives encode as their Ruby to_s which
|
325
|
+
# matches their JSON.dump().
|
326
|
+
#
|
327
|
+
ser << typecode
|
328
|
+
ser << child.to_s
|
329
|
+
next
|
330
|
+
end
|
331
|
+
if child.is_a?(Array)
|
332
|
+
#
|
333
|
+
# An array is encoded as a size N followed by N indexes into
|
334
|
+
# the seen list.
|
335
|
+
#
|
336
|
+
ser << typecode
|
337
|
+
ser << child.size.to_s
|
338
|
+
child.each do |a|
|
339
|
+
ser << '_'
|
340
|
+
ser << seen[a.object_id][0].to_s
|
341
|
+
end
|
342
|
+
next
|
343
|
+
end
|
344
|
+
if child.is_a?(Hash)
|
345
|
+
#
|
346
|
+
# An array is encoded as a size N followed by 2*N indexes
|
347
|
+
# into the seen list, organized pairwise key+value.
|
348
|
+
#
|
349
|
+
ser << typecode
|
350
|
+
ser << child.size.to_s
|
351
|
+
child.each do |k,v|
|
352
|
+
ser << '_'
|
353
|
+
ser << seen[k.object_id][0].to_s
|
354
|
+
ser << '_'
|
355
|
+
ser << seen[v.object_id][0].to_s
|
356
|
+
end
|
357
|
+
next
|
358
|
+
end
|
359
|
+
raise CantTouchThisObjectError, "not handled: #{child.class} #{child}"
|
360
|
+
end
|
361
|
+
ser
|
362
|
+
end
|
41
363
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
364
|
+
# Deserializes suitable naive strings into objects.
|
365
|
+
#
|
366
|
+
# Inverts serialize().
|
367
|
+
#
|
368
|
+
# @raises CantTouchThisObjectError if str is not recognized
|
369
|
+
#
|
370
|
+
def self._deserialize(str)
|
371
|
+
scanner = StringScanner.new(str)
|
372
|
+
serial_code = scanner.scan(/F/)
|
373
|
+
if 'F' != serial_code
|
374
|
+
raise CantTouchThisStringError, "bogus serial_code #{serial_code}"
|
375
|
+
end
|
376
|
+
num_objs = scanner.scan(/[0-9]+/)
|
377
|
+
if !num_objs
|
378
|
+
raise CantTouchThisStringError, "missing object list size"
|
379
|
+
end
|
380
|
+
num_objs = num_objs.to_i
|
381
|
+
strt = Hash.new # string table, id => str for strings already decoded
|
382
|
+
seen = []
|
383
|
+
#
|
384
|
+
# We parse the stream, constructing all the objects we see in to
|
385
|
+
# a seen list.
|
386
|
+
#
|
387
|
+
# In this first pass, Arrays and Hashes are created whose
|
388
|
+
# elements, keys, and values are temporarily integers. These all
|
389
|
+
# refer to slots in the seen list, and many of them will be
|
390
|
+
# forward references to objects which we have yet to decode.
|
391
|
+
# Later we will rectify the object graph by replacing these
|
392
|
+
# integers with their refrants from the seen list.
|
393
|
+
#
|
394
|
+
num_objs.times.each do |idx_obj|
|
395
|
+
code = scanner.scan(/[a-zA-Z]/)
|
396
|
+
case code
|
397
|
+
when 'n'
|
398
|
+
seen[idx_obj] = nil
|
399
|
+
when 'f'
|
400
|
+
seen[idx_obj] = false
|
401
|
+
when 't'
|
402
|
+
seen[idx_obj] = true
|
403
|
+
when 'S', 'Y', 's', 'y'
|
404
|
+
enc_code = scanner.scan(/[AU]/)
|
405
|
+
enc = nil
|
406
|
+
case enc_code
|
407
|
+
when 'A'
|
408
|
+
enc = Encoding::ASCII_8BIT
|
409
|
+
when 'U'
|
410
|
+
enc = Encoding::UTF_8
|
411
|
+
else
|
412
|
+
raise CantTouchThisStringError, "unknown enc_code #{enc_code}"
|
413
|
+
end
|
414
|
+
num = scanner.scan(/[0-9]+/)
|
415
|
+
if !num
|
416
|
+
raise CantTouchThisStringError, "missing num"
|
417
|
+
end
|
418
|
+
num = num.to_i
|
419
|
+
case code
|
420
|
+
when 'S', 'Y'
|
421
|
+
if num > 0
|
422
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
423
|
+
seen[idx_obj] = scanner.peek(num)
|
424
|
+
scanner.pos += num # skip body
|
425
|
+
else
|
426
|
+
seen[idx_obj] = ''
|
47
427
|
end
|
428
|
+
strt[strt.size] = seen[idx_obj]
|
429
|
+
when 's', 'y'
|
430
|
+
seen[idx_obj] = strt[num]
|
431
|
+
end
|
432
|
+
seen[idx_obj] = seen[idx_obj].dup.force_encoding(enc)
|
433
|
+
case code
|
434
|
+
when 'Y', 'y'
|
435
|
+
seen[idx_obj] = seen[idx_obj].intern
|
436
|
+
end
|
437
|
+
when 'I'
|
438
|
+
pattern = /-?[0-9]+/
|
439
|
+
seen[idx_obj] = scanner.scan(pattern).to_i
|
440
|
+
when 'F'
|
441
|
+
pattern = /-?(Infinity|NaN|[0-9]+(\.[0-9]*)?(e([+-][0-9]*)?)?)/
|
442
|
+
match = scanner.scan(pattern)
|
443
|
+
case match
|
444
|
+
when 'Infinity' then seen[idx_obj] = Float::INFINITY
|
445
|
+
when '-Infinity' then seen[idx_obj] = -Float::INFINITY
|
446
|
+
when 'NaN' then seen[idx_obj] = Float::NAN
|
447
|
+
else seen[idx_obj] = match.to_f
|
448
|
+
end
|
449
|
+
when 'A'
|
450
|
+
num_items = scanner.scan(/[0-9]+/).to_i
|
451
|
+
arr = []
|
452
|
+
num_items.times.each do |idx|
|
453
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
454
|
+
val = scanner.scan(/[0-9]+/).to_i # temp obj
|
455
|
+
arr[idx] = val
|
456
|
+
end
|
457
|
+
seen[idx_obj] = arr
|
458
|
+
when 'H'
|
459
|
+
num_items = scanner.scan(/[0-9]+/).to_i
|
460
|
+
hash = Hash.new
|
461
|
+
num_items.times.each do
|
462
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
463
|
+
k = scanner.scan(/[0-9]+/).to_i # temp obj
|
464
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
465
|
+
v = scanner.scan(/[0-9]+/).to_i # temp obj
|
466
|
+
hash[k] = v
|
48
467
|
end
|
468
|
+
seen[idx_obj] = hash
|
469
|
+
else
|
470
|
+
raise BAD_STR, "not handled: #{code} #{scanner.pos} #{scanner.rest}"
|
49
471
|
end
|
472
|
+
end
|
473
|
+
#
|
474
|
+
# If we parsed correctly, there will be no unconsumed in the
|
475
|
+
# scanner.
|
476
|
+
#
|
477
|
+
if !scanner.eos?
|
478
|
+
raise BAD_STR, "not at end-of-string: #{scanner.pos} #{scanner.rest}"
|
479
|
+
end
|
480
|
+
#
|
481
|
+
# We rectify the references for each intermediate Array and Hash
|
482
|
+
# as promised earlier.
|
483
|
+
#
|
484
|
+
# Note that this code must be inherently mutation-oriented since
|
485
|
+
# it might have to construct cyclic graphs.
|
486
|
+
#
|
487
|
+
rectified = seen.map do |elem|
|
488
|
+
if elem.is_a?(Array)
|
489
|
+
next Array.new
|
490
|
+
elsif elem.is_a?(Hash)
|
491
|
+
next Hash.new
|
492
|
+
else
|
493
|
+
elem
|
494
|
+
end
|
495
|
+
end
|
496
|
+
rectified.each_with_index do |elem,idx|
|
497
|
+
if elem.is_a?(Array)
|
498
|
+
seen[idx].each_with_index do |a,i|
|
499
|
+
elem[i] = rectified[a]
|
500
|
+
end
|
501
|
+
elsif elem.is_a?(Hash)
|
502
|
+
seen[idx].each do |k,v|
|
503
|
+
elem[rectified[k]] = rectified[v]
|
504
|
+
end
|
505
|
+
end
|
506
|
+
end
|
507
|
+
#
|
508
|
+
# By the way _safety_dance performed its walk in _serialize(), the
|
509
|
+
# object we are decoding is the first object encoded in str.
|
510
|
+
#
|
511
|
+
# Thus, we return the first element of the rectified list.
|
512
|
+
#
|
513
|
+
rectified.first
|
514
|
+
end
|
50
515
|
|
51
|
-
|
516
|
+
##########################################################################
|
517
|
+
#
|
518
|
+
# wrap() and unwrap() are the byte layer
|
519
|
+
#
|
520
|
+
# They are responsible for interconverting between naive strings and
|
521
|
+
# strings which are ready to go out on the wire into external
|
522
|
+
# storage.
|
523
|
+
#
|
524
|
+
# This layer is analagous to GZIP: it converts strings into a
|
525
|
+
# different representation which is smaller, more resistant to
|
526
|
+
# corruption, and/or more recognizable.
|
527
|
+
#
|
528
|
+
##########################################################################
|
52
529
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
530
|
+
# Wraps any string into a OAK string.
|
531
|
+
#
|
532
|
+
# Is inverted by unwrap(). For all str, unwrap(wrap(str)) == str.
|
533
|
+
#
|
534
|
+
# @param str naive string to be wrapped as an OAK string
|
535
|
+
#
|
536
|
+
# @param redundancy 'none', 'crc32' (default), or 'sha1'
|
537
|
+
#
|
538
|
+
# @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', or 'lzma'
|
539
|
+
#
|
540
|
+
# @param force false (default), or true. When true, always
|
541
|
+
# compress. When false, fall back to the
|
542
|
+
# original if the compressed form is larger.
|
543
|
+
#
|
544
|
+
# @param key_chain OAK::KeyChain from which to draw the encryption
|
545
|
+
# key, or nil for none.
|
546
|
+
#
|
547
|
+
# @param key String name of a key in key_chain to be used
|
548
|
+
# for encryption, or nil if none.
|
549
|
+
#
|
550
|
+
# @param force_oak_4 Bool, for debugging, force oak_4 encoding even
|
551
|
+
# if no encryption key is specified.
|
552
|
+
#
|
553
|
+
# @param format 'none', 'base64' (default)
|
554
|
+
#
|
555
|
+
# @returns an OAK string
|
556
|
+
#
|
557
|
+
def self._wrap(str,opts={})
|
558
|
+
redundancy = (opts[:redundancy] || :crc32).to_s
|
559
|
+
compression = (opts[:compression] || :none).to_s
|
560
|
+
force = (opts[:force] || false)
|
561
|
+
format = (opts[:format] || :base64).to_s
|
562
|
+
key_chain = opts[:key_chain]
|
563
|
+
key = opts[:key]
|
564
|
+
debug_iv = opts[:debug_iv]
|
565
|
+
if key_chain && !key_chain.is_a?(KeyChain)
|
566
|
+
raise ArgumentError, "bad key_chain #{key_chain}"
|
567
|
+
end
|
568
|
+
if debug_iv && !debug_iv.is_a?(String)
|
569
|
+
raise ArgumentError, "bad debug_iv #{debug_iv}"
|
570
|
+
end
|
571
|
+
if debug_iv && ENCRYPTION_ALGO_IV_BYTES != debug_iv.size
|
572
|
+
raise ArgumentError, "bad debug_iv #{debug_iv}"
|
573
|
+
end
|
574
|
+
if key && !key_chain
|
575
|
+
raise ArgumentError, "key #{key} without key_chain"
|
61
576
|
end
|
577
|
+
if key && !key_chain.keys[key]
|
578
|
+
keys = key_chain.keys
|
579
|
+
raise ArgumentError, "key not found in #{keys}: #{key}"
|
580
|
+
end
|
581
|
+
encryption_key = key ? key_chain.keys[key] : nil
|
582
|
+
str = str.b # dupe to Encoding::ASCII_8BIT
|
583
|
+
if encryption_key || opts[:force_oak_4]
|
584
|
+
_wrap_oak_4(
|
585
|
+
str,
|
586
|
+
redundancy,
|
587
|
+
compression,
|
588
|
+
force,
|
589
|
+
format,
|
590
|
+
key,
|
591
|
+
encryption_key,
|
592
|
+
debug_iv
|
593
|
+
)
|
594
|
+
else
|
595
|
+
_wrap_oak_3(
|
596
|
+
str,
|
597
|
+
redundancy,
|
598
|
+
compression,
|
599
|
+
force,
|
600
|
+
format
|
601
|
+
)
|
602
|
+
end
|
603
|
+
end
|
62
604
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
605
|
+
def self._wrap_oak_3(
|
606
|
+
str,
|
607
|
+
redundancy,
|
608
|
+
compression,
|
609
|
+
force,
|
610
|
+
format
|
611
|
+
)
|
612
|
+
source_redundancy = _check(redundancy,str)
|
613
|
+
compressed, compression = _compress(compression,force,str)
|
614
|
+
formatted = _format(format,compressed)
|
615
|
+
output = 'oak_3' # format id+ver
|
616
|
+
output << REDUNDANCY_2_CODE[redundancy] # redundancy
|
617
|
+
output << COMPRESSION_2_CODE[compression] # compression
|
618
|
+
output << FORMAT_2_CODE[format] # format
|
619
|
+
output << '_'
|
620
|
+
output << source_redundancy # source check
|
621
|
+
output << '_'
|
622
|
+
output << '%d' % formatted.size # formatted size
|
623
|
+
output << '_'
|
624
|
+
output << formatted # payload
|
625
|
+
output << '_'
|
626
|
+
output << 'ok' # terminator
|
627
|
+
output.force_encoding(Encoding::ASCII_8BIT)
|
628
|
+
end
|
68
629
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
630
|
+
def self._wrap_oak_4(
|
631
|
+
str,
|
632
|
+
redundancy,
|
633
|
+
compression,
|
634
|
+
force,
|
635
|
+
format,
|
636
|
+
key,
|
637
|
+
encryption_key,
|
638
|
+
debug_iv
|
639
|
+
)
|
640
|
+
header = 'oak_4' # format id+ver
|
641
|
+
if key
|
642
|
+
header << key # key name
|
75
643
|
end
|
644
|
+
header << '_'
|
645
|
+
header << FORMAT_2_CODE[format] # format
|
646
|
+
compressed, compression = _compress(compression,force,str)
|
647
|
+
plaintext = ''
|
648
|
+
plaintext << REDUNDANCY_2_CODE[redundancy] # redundancy
|
649
|
+
plaintext << COMPRESSION_2_CODE[compression] # compression
|
650
|
+
plaintext << _check(redundancy,str) # source check
|
651
|
+
plaintext << '_'
|
652
|
+
plaintext << compressed
|
653
|
+
encrypted = _encrypt(
|
654
|
+
encryption_key,
|
655
|
+
plaintext,
|
656
|
+
header,
|
657
|
+
debug_iv
|
658
|
+
)
|
659
|
+
formatted = _format(format,encrypted)
|
660
|
+
output = header
|
661
|
+
output << '%d' % formatted.size # formatted size
|
662
|
+
output << '_'
|
663
|
+
output << formatted # payload
|
664
|
+
output << '_'
|
665
|
+
output << 'ok' # terminator
|
666
|
+
output.force_encoding(Encoding::ASCII_8BIT)
|
667
|
+
end
|
76
668
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
669
|
+
# Unwraps any OAK string into a string.
|
670
|
+
#
|
671
|
+
# Inverts wrap(). For all str, unwrap(wrap(str)) == str.
|
672
|
+
#
|
673
|
+
# @param str OAK string to be unwrapped
|
674
|
+
#
|
675
|
+
# @param key_chain OAK::KeyChain in which to look for keys to
|
676
|
+
# decrypt encrypted OAK strings, or nil for none.
|
677
|
+
#
|
678
|
+
# @returns a string
|
679
|
+
#
|
680
|
+
# @raises ArgumentError if str is not in OAK format.
|
681
|
+
#
|
682
|
+
def self._unwrap(str,opts={})
|
683
|
+
str = str.b # str.b for dup to ASCII_8BIT
|
684
|
+
sc = StringScanner.new(str)
|
685
|
+
ov = sc.scan(/oak_[34]/) or raise BAD_STR, "bad oak+ver"
|
686
|
+
if 'oak_4' == ov
|
687
|
+
_unwrap_oak_4(sc,opts) # encryption opts possible for decoding OAK_4 :(
|
688
|
+
else
|
689
|
+
_unwrap_oak_3(sc) # no opts for decoding OAK_3 :)
|
690
|
+
end
|
691
|
+
end
|
82
692
|
|
83
|
-
|
84
|
-
|
693
|
+
def self._unwrap_oak_3(sc)
|
694
|
+
r = sc.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
|
695
|
+
c = sc.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
|
696
|
+
f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
|
697
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
698
|
+
scheck = sc.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
|
699
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
700
|
+
fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
|
701
|
+
fsize = fsize.to_i
|
702
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
703
|
+
formatted = sc.peek(fsize)
|
704
|
+
begin
|
705
|
+
sc.pos += fsize
|
706
|
+
rescue RangeError => ex
|
707
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
85
708
|
end
|
709
|
+
_ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok: #{formatted}"
|
710
|
+
redundancy = CODE_2_REDUNDANCY[r] || r
|
711
|
+
compression = CODE_2_COMPRESSION[c] || c
|
712
|
+
format = CODE_2_FORMAT[f] || f
|
713
|
+
fsize_re = formatted.size
|
714
|
+
if fsize.to_i != fsize_re
|
715
|
+
raise CantTouchThisStringError, "fsize #{fsize} vs #{fsize_re}"
|
716
|
+
end
|
717
|
+
compressed = _deformat(format,formatted)
|
718
|
+
original = _decompress(compression,compressed)
|
719
|
+
scheck_re = _check(redundancy,original)
|
720
|
+
if scheck != scheck_re
|
721
|
+
raise CantTouchThisStringError, "scheck #{scheck} vs #{scheck_re}"
|
722
|
+
end
|
723
|
+
original
|
724
|
+
end
|
86
725
|
|
87
|
-
|
88
|
-
|
89
|
-
|
726
|
+
def self._unwrap_oak_4(sc,opts={})
|
727
|
+
key = sc.scan(/[^_]+/) # nil OK, indicates no compression
|
728
|
+
encryption_key = nil
|
729
|
+
if key
|
730
|
+
key_chain = opts[:key_chain]
|
731
|
+
if !key_chain
|
732
|
+
raise CantTouchThisStringError, "key #{key} but no key_chain"
|
90
733
|
end
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
File.open('.gitignore', 'w') do |f|
|
96
|
-
f.write ignored
|
734
|
+
encryption_key = opts[:key_chain].keys[key]
|
735
|
+
if !encryption_key
|
736
|
+
keys = key_chain.keys
|
737
|
+
raise CantTouchThisStringError, "key not found in #{keys}: #{key}"
|
97
738
|
end
|
739
|
+
end
|
740
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
741
|
+
f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
|
742
|
+
header = sc.string[0..(sc.pos-1)] # for authentication by _decrypt
|
743
|
+
format = CODE_2_FORMAT[f]
|
744
|
+
fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
|
745
|
+
fsize = fsize.to_i
|
746
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
747
|
+
formatted = sc.peek(fsize)
|
748
|
+
begin
|
749
|
+
sc.pos += fsize
|
750
|
+
rescue RangeError => ex
|
751
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
752
|
+
end
|
753
|
+
_ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok"
|
754
|
+
encrypted = _deformat(format,formatted)
|
755
|
+
plaintext = _decrypt(encryption_key,encrypted,header)
|
756
|
+
sp = StringScanner.new(plaintext)
|
757
|
+
r = sp.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
|
758
|
+
c = sp.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
|
759
|
+
scheck = sp.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
|
760
|
+
_ = sp.scan(/_/) or raise BAD_STR, "missing _"
|
761
|
+
compressed = sp.rest
|
762
|
+
redundancy = CODE_2_REDUNDANCY[r] || r
|
763
|
+
compression = CODE_2_COMPRESSION[c] || c
|
764
|
+
original = _decompress(compression,compressed)
|
765
|
+
scheck_re = _check(redundancy,original)
|
766
|
+
if scheck != scheck_re
|
767
|
+
raise(
|
768
|
+
CantTouchThisStringError,
|
769
|
+
"scheck #{scheck} vs #{scheck_re} in #{sc.string}"
|
770
|
+
)
|
771
|
+
end
|
772
|
+
original
|
773
|
+
end
|
98
774
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
775
|
+
# How we encode object type.
|
776
|
+
#
|
777
|
+
TYPE_2_CODE ||= {
|
778
|
+
Hash => 'H',
|
779
|
+
Array => 'A',
|
780
|
+
String => 'S', # downcased to 's' for string table lookup
|
781
|
+
Symbol => 'Y', # downcased to 'y' for string table lookup
|
782
|
+
Integer => 'I',
|
783
|
+
Float => 'F',
|
784
|
+
NilClass => 'n',
|
785
|
+
TrueClass => 't',
|
786
|
+
FalseClass => 'f',
|
787
|
+
}.freeze
|
788
|
+
ALL_TYPES ||= TYPE_2_CODE.keys.freeze
|
103
789
|
|
104
|
-
|
105
|
-
|
790
|
+
# How we encode :format and :compression in the OAK strings.
|
791
|
+
#
|
792
|
+
FORMAT_2_CODE ||= {
|
793
|
+
'none' => 'N',
|
794
|
+
'base64' => 'B', # urlsafe form with padding and whitespace stripped
|
795
|
+
}.freeze
|
796
|
+
CODE_2_FORMAT ||= FORMAT_2_CODE.invert.freeze
|
106
797
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
798
|
+
# How we encode :compression in the OAK strings.
|
799
|
+
#
|
800
|
+
# Early on, I captures some metrics using the catenation of all our
|
801
|
+
# Ruby code as a test file.
|
802
|
+
#
|
803
|
+
# I measured:
|
804
|
+
#
|
805
|
+
# SOURCE 5707334
|
806
|
+
# none 5707370 compression 0.17s decompression 0.16s
|
807
|
+
# lzo 1804765 compression 0.18s decompression 0.16s
|
808
|
+
# lzf 1807971 compression 0.16s decompression 0.17s
|
809
|
+
# lz4 1813574 compression 0.17s decompression 0.14s
|
810
|
+
# zlib 1071216 compression 0.53s decompression 0.19s
|
811
|
+
# bzip2 868595 compression 0.62s decompression 0.33s
|
812
|
+
# lzma 760594 compression 6.22s decompression 0.20s
|
813
|
+
#
|
814
|
+
# From this, I conclude that only one of lzo,lzf,lz4 is interesting.
|
815
|
+
# They all yield approximately the same compression, and their
|
816
|
+
# compression times are indistinguishable from the rest of the
|
817
|
+
# streaming and encoding times imposed by OAK.
|
818
|
+
#
|
819
|
+
# I'm settling on supporting only lz4 because it seems to be better
|
820
|
+
# supported as a polymorphic lib - it's closer to a defacto standard
|
821
|
+
# for the LZ77 family.
|
822
|
+
#
|
823
|
+
# zlib, bzip2, and lzma each represent interesting distinct choices
|
824
|
+
# - I'm keeping support for all three.
|
825
|
+
#
|
826
|
+
COMPRESSION_2_CODE ||= {
|
827
|
+
'none' => 'N',
|
828
|
+
'lz4' => '4',
|
829
|
+
'zlib' => 'Z',
|
830
|
+
'bzip2' => 'B',
|
831
|
+
'lzma' => 'M',
|
832
|
+
}.freeze
|
833
|
+
CODE_2_COMPRESSION ||= COMPRESSION_2_CODE.invert.freeze
|
834
|
+
|
835
|
+
# How we encode :redundancy in the OAK strings.
|
836
|
+
#
|
837
|
+
REDUNDANCY_2_CODE ||= {
|
838
|
+
'none' => 'N',
|
839
|
+
'crc32' => 'C',
|
840
|
+
'sha1' => 'S',
|
841
|
+
}.freeze
|
842
|
+
CODE_2_REDUNDANCY ||= REDUNDANCY_2_CODE.invert.freeze
|
843
|
+
|
844
|
+
# Helper method, calculates redundancy check for str.
|
845
|
+
#
|
846
|
+
def self._check(redundancy,str)
|
847
|
+
case redundancy.to_s
|
848
|
+
when 'none' then return '0'
|
849
|
+
when 'crc32' then return '%d' % Zlib.crc32(str)
|
850
|
+
when 'sha1' then return Digest::SHA1.hexdigest(str)
|
851
|
+
else
|
852
|
+
raise ArgumentError, "unknown redundancy #{redundancy}"
|
853
|
+
end
|
854
|
+
end
|
855
|
+
|
856
|
+
# Helper method, calculates formatted version of str.
|
857
|
+
#
|
858
|
+
def self._format(format,str)
|
859
|
+
case format.to_s
|
860
|
+
when 'none'
|
861
|
+
return str
|
862
|
+
when 'base64'
|
863
|
+
#
|
864
|
+
# We actual using "Base 64 Encoding with URL and Filename Safe
|
865
|
+
# Alphabet" aka base64url with the option not to use padding,
|
866
|
+
# per https://tools.ietf.org/html/rfc4648#section-5.
|
867
|
+
#
|
868
|
+
# If we were using Ruby 2.3+, we could use the option "padding:
|
869
|
+
# false" instead of chopping out the /=*$/ with gsub.
|
870
|
+
#
|
871
|
+
return Base64.urlsafe_encode64(str).gsub(/=.*$/,'')
|
872
|
+
else
|
873
|
+
raise ArgumentError, "unknown format #{format}"
|
874
|
+
end
|
875
|
+
end
|
876
|
+
|
877
|
+
def self._deformat(format,str)
|
878
|
+
case format.to_s
|
879
|
+
when 'none'
|
880
|
+
return str
|
881
|
+
when 'base64'
|
882
|
+
#
|
883
|
+
# Regrettably, Base64.urlsafe_decode64(str) does not reverse
|
884
|
+
# Base64.urlsafe_encode64(str).gsub(/=.*$/,''), it raises an
|
885
|
+
# ArgumentError "invalid base64".
|
886
|
+
#
|
887
|
+
# Fortunately, simple Base64.decode64() is liberal in what it
|
888
|
+
# accepts, and handles the output of all of encode64,
|
889
|
+
# strict_encode64, and urlsafe_encode64 both with and without
|
890
|
+
# the /=*$/.
|
891
|
+
#
|
892
|
+
return Base64.decode64(str.tr('-_','+/'))
|
893
|
+
else
|
894
|
+
raise ArgumentError, "unknown format #{format}"
|
895
|
+
end
|
896
|
+
end
|
897
|
+
|
898
|
+
# Helper for wrap() and unwrap(), multiplexes encryption.
|
899
|
+
#
|
900
|
+
def self._encrypt(encryption_key,data,auth_data,debug_iv)
|
901
|
+
return data if !encryption_key
|
902
|
+
#
|
903
|
+
# WARNING: In at least some versions of OpenSSL::Cipher, setting
|
904
|
+
# iv before key would cause the iv to be ignored in aes-*-gcm
|
905
|
+
# ciphers!
|
906
|
+
#
|
907
|
+
# https://github.com/attr-encrypted/encryptor/pull/22
|
908
|
+
# https://github.com/attr-encrypted/encryptor/blob/master/README.md
|
909
|
+
#
|
910
|
+
# The issue was reported against version "1.0.1f 6 Jan 2014". I
|
911
|
+
# have yet to figure out whether our current version, 1.1.0, is
|
912
|
+
# affected, or when/how the fix will go live.
|
913
|
+
#
|
914
|
+
# OAK_4 only supports AES-256-GCB. Although the implementation
|
915
|
+
# bug has been fixed and OAK will almost certainly not be used
|
916
|
+
# with a buggy version of OpenSSL, nevertheless we take great
|
917
|
+
# care to set cipher.key *then* cipher.iv.
|
918
|
+
#
|
919
|
+
# Still, can't be to careful.
|
920
|
+
#
|
921
|
+
iv_size = ENCRYPTION_ALGO_IV_BYTES
|
922
|
+
auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
|
923
|
+
if debug_iv && iv_size != debug_iv.size
|
924
|
+
raise "unexpected debug_iv.size #{debug_iv.size} not #{iv_size}"
|
925
|
+
end
|
926
|
+
cipher = encryption_algo.encrypt
|
927
|
+
cipher.key = encryption_key.key
|
928
|
+
iv = debug_iv || cipher.random_iv
|
929
|
+
cipher.iv = iv
|
930
|
+
cipher.auth_data = auth_data
|
931
|
+
ciphertext = cipher.update(data) + cipher.final
|
932
|
+
auth_tag = cipher.auth_tag
|
933
|
+
if iv_size != iv.size
|
934
|
+
raise "unexpected iv.size #{iv.size} not #{iv_size}"
|
935
|
+
end
|
936
|
+
if auth_tag_size != auth_tag.size
|
937
|
+
raise "unexpected auth_tag.size #{auth_tag.size} not #{auth_tag_size}"
|
938
|
+
end
|
939
|
+
#
|
940
|
+
# Since iv and auth_tag have fixed widths, they are trivial to
|
941
|
+
# parse without putting any effort or space into recording their
|
942
|
+
# sizes in the message body.
|
943
|
+
#
|
944
|
+
iv + auth_tag + ciphertext
|
945
|
+
end
|
946
|
+
|
947
|
+
# Helper for wrap() and unwrap(), multiplexes decryption.
|
948
|
+
#
|
949
|
+
def self._decrypt(encryption_key,data,auth_data)
|
950
|
+
return data if !encryption_key
|
951
|
+
iv_size = ENCRYPTION_ALGO_IV_BYTES
|
952
|
+
auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
|
953
|
+
iv = data[0..(iv_size-1)]
|
954
|
+
auth_tag = data[iv_size..(auth_tag_size+iv_size-1)]
|
955
|
+
ciphertext = data[(auth_tag_size+iv_size)..-1]
|
956
|
+
cipher = encryption_algo.decrypt
|
957
|
+
cipher.key = encryption_key.key
|
958
|
+
begin
|
959
|
+
cipher.iv = iv
|
960
|
+
cipher.auth_tag = auth_tag
|
961
|
+
cipher.auth_data = auth_data
|
962
|
+
cipher.update(ciphertext) + cipher.final
|
963
|
+
rescue OpenSSL::Cipher::CipherError => ex
|
964
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
# Helper for wrap() and unwrap(), multiplexes compression.
|
969
|
+
#
|
970
|
+
def self._compress(compression,force,str)
|
971
|
+
case compression.to_s
|
972
|
+
when 'none'
|
973
|
+
compressed = str
|
974
|
+
when 'lz4'
|
975
|
+
compressed = LZ4.compress(str)
|
976
|
+
when 'zlib'
|
977
|
+
compressed = Zlib.deflate(str)
|
978
|
+
when 'bzip2'
|
979
|
+
io = StringIO.new
|
980
|
+
io.set_encoding(Encoding::ASCII_8BIT)
|
981
|
+
Bzip2::FFI::Writer.write(io, str)
|
982
|
+
compressed = io.string
|
983
|
+
when 'lzma'
|
984
|
+
compressed = LZMA.compress(str)
|
985
|
+
else
|
986
|
+
raise ArgumentError, "unknown compression #{compression}"
|
987
|
+
end
|
988
|
+
if !force && compressed.size >= str.size
|
989
|
+
compressed = str
|
990
|
+
compression = 'none'
|
991
|
+
end
|
992
|
+
[compressed,compression.to_s]
|
993
|
+
end
|
994
|
+
|
995
|
+
# Helper for wrap() and unwrap(), multiplexes decompression.
|
996
|
+
#
|
997
|
+
def self._decompress(compression,str)
|
998
|
+
case compression.to_s
|
999
|
+
when 'none'
|
1000
|
+
return str
|
1001
|
+
when 'lz4'
|
1002
|
+
begin
|
1003
|
+
return LZ4.uncompress(str)
|
1004
|
+
rescue LZ4Internal::Error => ex
|
1005
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
1006
|
+
end
|
1007
|
+
when 'zlib'
|
1008
|
+
begin
|
1009
|
+
return Zlib::Inflate.inflate(str)
|
1010
|
+
rescue Zlib::DataError => ex
|
1011
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
1012
|
+
end
|
1013
|
+
when 'bzip2'
|
1014
|
+
io = StringIO.new(str)
|
1015
|
+
raw = nil
|
1016
|
+
begin
|
1017
|
+
raw = Bzip2::FFI::Reader.read(io)
|
1018
|
+
rescue Bzip2::FFI::Error::MagicDataError => ex
|
1019
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
112
1020
|
end
|
113
|
-
|
1021
|
+
str = raw.b # dupe to Encoding::ASCII_8BIT
|
1022
|
+
return str
|
1023
|
+
when 'lzma'
|
1024
|
+
begin
|
1025
|
+
raw = LZMA.decompress(str)
|
1026
|
+
rescue RuntimeError => ex
|
1027
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
1028
|
+
end
|
1029
|
+
str = raw.b # dupe to Encoding::ASCII_8BIT
|
1030
|
+
return str
|
1031
|
+
else
|
1032
|
+
raise ArgumentError, "unknown compression #{compression}"
|
114
1033
|
end
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
# Walks obj recursively, touching each reachable child only once
|
1037
|
+
# without getting caught up cycles or touching DAGy bits twice.
|
1038
|
+
#
|
1039
|
+
# Only knows how to recurse into Arrays and Hashs.
|
1040
|
+
#
|
1041
|
+
# This traversal is depth-first pre-order with the children of
|
1042
|
+
# Arrays walked in positional anbd Hash pairs walked in positional
|
1043
|
+
# order k,v,k,v, etc.
|
1044
|
+
#
|
1045
|
+
# @param obj object to walk
|
1046
|
+
#
|
1047
|
+
# @param seen Hash which maps object_id => [idx,child] of every
|
1048
|
+
# object touched, where idx is 0,1,2,... corresponding to the order
|
1049
|
+
# in which we encountered child.
|
1050
|
+
#
|
1051
|
+
# @param reseen List of children which were walked more than once.
|
1052
|
+
#
|
1053
|
+
# @param block if present, every object touched is yielded to block
|
1054
|
+
#
|
1055
|
+
# @return seen,reseen
|
1056
|
+
#
|
1057
|
+
def self._safety_dance(obj,seen=nil,reseen=nil,&block)
|
1058
|
+
#
|
1059
|
+
# Note that OAK._serialize() depends on the depth-first pre-order
|
1060
|
+
# specification here - at least, it assumes that the first element
|
1061
|
+
# walked will be the first element added to seen.
|
1062
|
+
#
|
1063
|
+
seen ||= {}
|
1064
|
+
reseen ||= []
|
1065
|
+
oid = obj.object_id
|
1066
|
+
if seen.has_key?(oid)
|
1067
|
+
reseen << obj
|
1068
|
+
return seen,reseen
|
120
1069
|
end
|
1070
|
+
seen[oid] = [seen.size,obj]
|
1071
|
+
yield obj if block # pre-order: this node before children
|
1072
|
+
if obj.is_a?(Hash)
|
1073
|
+
obj.each do |k,v| # children in hash order and k,v,...
|
1074
|
+
_safety_dance(k,seen,reseen,&block)
|
1075
|
+
_safety_dance(v,seen,reseen,&block)
|
1076
|
+
end
|
1077
|
+
elsif obj.is_a?(Array)
|
1078
|
+
obj.each do |v| # children in list order
|
1079
|
+
_safety_dance(v,seen,reseen,&block)
|
1080
|
+
end
|
1081
|
+
end
|
1082
|
+
return seen,reseen
|
121
1083
|
end
|
1084
|
+
|
122
1085
|
end
|