oak 0.0.3 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +51 -0
- data/.rubocop.yml +74 -0
- data/.travis.yml +17 -0
- data/CHANGELOG.md +24 -0
- data/DESIDERATA.md +318 -0
- data/Gemfile +3 -15
- data/LICENSE +22 -0
- data/Makefile +113 -0
- data/README.md +163 -23
- data/Rakefile +6 -47
- data/bin/oak +242 -3
- data/bin/oak.rb +245 -0
- data/lib/oak.rb +1049 -86
- data/lib/oak/version.rb +3 -0
- data/oak.gemspec +29 -65
- metadata +121 -71
- data/.document +0 -5
- data/Gemfile.lock +0 -26
- data/LICENSE.txt +0 -20
- data/VERSION +0 -1
- data/test/files/config/application.rb +0 -3
- data/test/files/config/database.yml +0 -25
- data/test/files/config/initializers/secret_token.rb +0 -7
- data/test/files/dot_gitignore +0 -0
- data/test/helper.rb +0 -29
- data/test/test_oak.rb +0 -44
data/bin/oak.rb
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# oak.rb: cli driver for encoding strings in the OAK format.
|
4
|
+
#
|
5
|
+
# author: jhw@prosperworks.com
|
6
|
+
# incept: 2016-03-05
|
7
|
+
#
|
8
|
+
|
9
|
+
require_relative '../lib/oak.rb'
|
10
|
+
require 'optimist'
|
11
|
+
|
12
|
+
OLD_ARGV = ARGV.dup # ARGV is consumed by Optimist but we use later.
|
13
|
+
OPTS = Optimist.options do
|
14
|
+
banner "#{$0} cli driver for OAK"
|
15
|
+
banner <<-OPTIMIST_EXAMPLES
|
16
|
+
Examples:
|
17
|
+
$ echo hello | bin/oak.rb
|
18
|
+
oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
|
19
|
+
$ (echo hello ; echo world) | bin/oak.rb
|
20
|
+
oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
|
21
|
+
oak_3CNB_2139413982_15_RjFTVTVfd29ybGQ_ok
|
22
|
+
$ (echo hello ; echo world) | bin/oak.rb --compression zlib --force
|
23
|
+
oak_3CZB_1944283675_26_eJxzMwwONY3PSM3JyQcAFF4DyA_ok
|
24
|
+
oak_3CZB_2139413982_26_eJxzMwwONY0vzy_KSQEAFNgD3A_ok
|
25
|
+
$ (echo hello ; echo world) | bin/oak.rb --format none
|
26
|
+
oak_3CNN_1944283675_11_F1SU5_hello_ok
|
27
|
+
oak_3CNN_2139413982_11_F1SU5_world_ok
|
28
|
+
$ (echo hello ; echo world) | bin/oak.rb | bin/oak.rb --mode decode-lines
|
29
|
+
hello
|
30
|
+
world
|
31
|
+
OPTIMIST_EXAMPLES
|
32
|
+
banner "Options:"
|
33
|
+
version "#{$0} #{OAK::VERSION}"
|
34
|
+
opt :redundancy, 'redundancy', :default => 'crc32'
|
35
|
+
opt :format, 'format', :default => 'base64'
|
36
|
+
opt :compression, 'compression', :default => 'none'
|
37
|
+
opt :force, 'compress even if bigger', :default => false
|
38
|
+
opt :mode, 'mode', :default => 'encode-lines'
|
39
|
+
opt :key_chain, 'key chain env name', :type => :string
|
40
|
+
opt :key, 'encrypt key name', :type => :string
|
41
|
+
opt :key_check, 'check available keys', :default => false
|
42
|
+
opt :key_generate, 'generate new key', :default => false
|
43
|
+
opt :force_oak_4, 'force OAK_4 even unencrypted', :default => false
|
44
|
+
opt :eigen, 'calc eigenratio', :type => :int
|
45
|
+
opt :self_test, 'self-test only', :default => false
|
46
|
+
opt :help, 'show this help'
|
47
|
+
end
|
48
|
+
Optimist::die :eigen, "must be non-negative" if OPTS[:eigen] && OPTS[:eigen] < 0
|
49
|
+
|
50
|
+
oak_opts = {}
|
51
|
+
oak_opts[:redundancy] = OPTS[:redundancy]
|
52
|
+
oak_opts[:compression] = OPTS[:compression]
|
53
|
+
oak_opts[:force] = OPTS[:force]
|
54
|
+
oak_opts[:format] = OPTS[:format]
|
55
|
+
oak_opts[:key_chain] = OAK.parse_env_chain(ENV,OPTS[:key_chain])
|
56
|
+
oak_opts[:key] = OPTS[:key]
|
57
|
+
oak_opts[:force_oak_4] = OPTS[:force_oak_4]
|
58
|
+
|
59
|
+
if !OAK::REDUNDANCY_2_CODE.keys.include?(oak_opts[:redundancy])
|
60
|
+
Optimist::die :redundancy, "bogus #{OPTS[:redundancy]}"
|
61
|
+
end
|
62
|
+
if !OAK::COMPRESSION_2_CODE.keys.include?(oak_opts[:compression])
|
63
|
+
Optimist::die :compression, "bogus #{OPTS[:compression]}"
|
64
|
+
end
|
65
|
+
cool_formats = OAK::FORMAT_2_CODE.keys
|
66
|
+
if !cool_formats.include?(oak_opts[:format])
|
67
|
+
Optimist::die :format, "bogus #{OPTS[:format]} not in #{cool_formats}"
|
68
|
+
end
|
69
|
+
|
70
|
+
=begin
|
71
|
+
|
72
|
+
doctest: simple transcoding
|
73
|
+
>> OAK::decode(OAK::encode([1,"2",3.000001]))
|
74
|
+
=> [1,"2",3.000001]
|
75
|
+
>> OAK::decode(OAK::encode({foo: "bar"}))
|
76
|
+
=> {foo: "bar"}
|
77
|
+
>> OAK::decode(OAK::encode({foo: :bar}))
|
78
|
+
=> {foo: :bar}
|
79
|
+
>> OAK::decode(OAK::encode("Hello, World!"))
|
80
|
+
=> "Hello, World!"
|
81
|
+
>> OAK::decode(OAK::encode("Hello, World!", format: :none, redundancy: :none))
|
82
|
+
=> "Hello, World!"
|
83
|
+
|
84
|
+
doctest: stability of encoding
|
85
|
+
>> OAK::decode("oak_3NNB_0_30_RjNIMV8xXzJZQTNfZm9vU1UzX2Jhcg_ok")
|
86
|
+
=> {:foo=>"bar"}
|
87
|
+
>> OAK::encode(1, format: :base64, redundancy: :none)
|
88
|
+
=> "oak_3NNB_0_6_RjFJMQ_ok"
|
89
|
+
>> OAK::encode(1, format: :base64, redundancy: :crc32)
|
90
|
+
=> "oak_3CNB_3405226796_6_RjFJMQ_ok"
|
91
|
+
>> OAK::encode(1, format: :none, redundancy: :crc32)
|
92
|
+
=> "oak_3CNN_3405226796_4_F1I1_ok"
|
93
|
+
>> hello_utf8 = "Hello, World!".force_encoding('UTF-8')
|
94
|
+
=> "Hello, World!"
|
95
|
+
>> OAK::encode(hello_utf8, format: :base64, redundancy: :none)
|
96
|
+
=> "oak_3NNB_0_27_RjFTVTEzX0hlbGxvLCBXb3JsZCE_ok"
|
97
|
+
>> OAK::encode(hello_utf8, format: :none, redundancy: :crc32)
|
98
|
+
=> "oak_3CNN_2351984628_20_F1SU13_Hello, World!_ok"
|
99
|
+
|
100
|
+
Note above I used force_encoding('UTF-8') after discovering that with
|
101
|
+
Ruby 2.1.6 on Mac I get Encoding.default_encoding is UTF-8, but with
|
102
|
+
Ruby 2.1.6 on Linux I get Encoding.default_encoding is US-ASCII!
|
103
|
+
|
104
|
+
=end
|
105
|
+
|
106
|
+
if __FILE__ == $0
|
107
|
+
if OPTS[:self_test]
|
108
|
+
require 'rubydoctest'
|
109
|
+
exit RubyDocTest::Runner.new(File.read(__FILE__), __FILE__).run ? 0 : 1
|
110
|
+
end
|
111
|
+
if OPTS[:key_check]
|
112
|
+
if !OPTS[:key_chain]
|
113
|
+
puts "no --key-chain specified"
|
114
|
+
else
|
115
|
+
keys = oak_opts[:key_chain].keys.keys
|
116
|
+
if 0 == keys.size
|
117
|
+
puts "#{OPTS[:key_chain]}: no keys found"
|
118
|
+
else
|
119
|
+
puts "#{OPTS[:key_chain]}: found keys: #{keys.join(' ')}"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
if OPTS[:key_generate]
|
124
|
+
STDOUT.puts OAK.encode(OAK.random_key)
|
125
|
+
exit 0
|
126
|
+
end
|
127
|
+
if !$stdin.tty?
|
128
|
+
if OPTS[:eigen]
|
129
|
+
prev = STDIN.read
|
130
|
+
puts "input: %d" % prev.size
|
131
|
+
OPTS[:eigen].times do |i|
|
132
|
+
oak = OAK.encode(prev,oak_opts)
|
133
|
+
psize = prev.size
|
134
|
+
wsize = oak.size
|
135
|
+
ratio = 1.0 * wsize / psize
|
136
|
+
puts " iter %3d: %4d => %4d ratio %.2f" % [i,psize,wsize,ratio]
|
137
|
+
prev = oak
|
138
|
+
end
|
139
|
+
exit 0
|
140
|
+
end
|
141
|
+
unhappiness = 0
|
142
|
+
case OPTS[:mode]
|
143
|
+
when 'cat'
|
144
|
+
ARGF.each_line.map(&:strip).each do |line|
|
145
|
+
puts line
|
146
|
+
end
|
147
|
+
when 'encode-lines'
|
148
|
+
ARGF.each_line.map(&:strip).each do |line|
|
149
|
+
puts OAK.encode(line,oak_opts)
|
150
|
+
end
|
151
|
+
when 'decode-lines'
|
152
|
+
ARGF.each_line.map(&:strip).each do |line|
|
153
|
+
puts OAK.decode(line,oak_opts)
|
154
|
+
end
|
155
|
+
when 'encode-file'
|
156
|
+
puts OAK.encode(STDIN.read,oak_opts)
|
157
|
+
when 'decode-file'
|
158
|
+
STDOUT.write OAK.decode(STDIN.read.strip,oak_opts)
|
159
|
+
when 'recode-file'
|
160
|
+
puts OAK.encode(OAK.decode(STDIN.read,oak_opts),oak_opts)
|
161
|
+
when 'crazy'
|
162
|
+
#
|
163
|
+
# --mode crazy prints out a sample of OAK strings for various
|
164
|
+
# challenging cases.
|
165
|
+
#
|
166
|
+
cycle_a = ['cycle_a','TBD']
|
167
|
+
cycle_b = ['cycle_b',cycle_a]
|
168
|
+
cycle_a[1] = cycle_b
|
169
|
+
dag_c = ['dag_c']
|
170
|
+
dag_b = ['dag_b',dag_c]
|
171
|
+
dag_a = ['dag_a',dag_b,dag_c]
|
172
|
+
[
|
173
|
+
'hello',
|
174
|
+
['hello'] + ['hello',:hello] * 2,
|
175
|
+
{1=>'a','b'=>2,[]=>3,''=>4,{}=>5,nil=>6},
|
176
|
+
['x','x','x','x','x','x','x','x','x','x','x','x','x'],
|
177
|
+
['x'] * 13,
|
178
|
+
cycle_a,
|
179
|
+
dag_a,
|
180
|
+
[1,-123,0.12,-0.123,Float::NAN,-Float::INFINITY,3.14159265358979],
|
181
|
+
].each do |obj|
|
182
|
+
oak = OAK.encode(
|
183
|
+
obj,
|
184
|
+
redundancy: :crc32,
|
185
|
+
format: :none,
|
186
|
+
compression: :none,
|
187
|
+
)
|
188
|
+
puts ""
|
189
|
+
puts "obj: #{obj}"
|
190
|
+
puts " oak: #{oak}"
|
191
|
+
begin
|
192
|
+
dec = OAK.decode(oak,oak_opts)
|
193
|
+
if dec != obj
|
194
|
+
if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
|
195
|
+
unhappiness += 1
|
196
|
+
puts " BAD: #{dec}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
rescue OAK::CantTouchThisStringError => ex
|
200
|
+
puts " BAD: #{ex.message}: #{ex.backtrace_locations[0]}"
|
201
|
+
unhappiness += 1
|
202
|
+
end
|
203
|
+
end
|
204
|
+
when 'tests'
|
205
|
+
[
|
206
|
+
[1,2,3],
|
207
|
+
{:foo=>'foo','foo'=>['x']*10},
|
208
|
+
-1,
|
209
|
+
Float::NAN,
|
210
|
+
nil,
|
211
|
+
].each do |obj|
|
212
|
+
puts " #{obj} => ["
|
213
|
+
key_chain = OAK::KeyChain.new(
|
214
|
+
{ 'l0ng3r' => OAK::Key.new('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') }
|
215
|
+
)
|
216
|
+
[
|
217
|
+
{redundancy: :none, format: :none, compression: :none },
|
218
|
+
{redundancy: :none, format: :base64,compression: :lz4, force: true},
|
219
|
+
{redundancy: :crc32,format: :base64,compression: :zlib, force: true},
|
220
|
+
{redundancy: :crc32,format: :base64,compression: :bzip2,force: true},
|
221
|
+
{redundancy: :sha1, format: :base64,compression: :lzma, force: true},
|
222
|
+
{key_chain: key_chain,force_oak_4: true,format: :none, },
|
223
|
+
{key_chain: key_chain,force_oak_4: true, },
|
224
|
+
{key_chain: key_chain,key: 'l0ng3r', },
|
225
|
+
].each do |opts|
|
226
|
+
oak = OAK.encode(obj,opts)
|
227
|
+
puts " '#{oak}',"
|
228
|
+
dec = OAK.decode(oak,opts)
|
229
|
+
if dec != obj
|
230
|
+
if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
|
231
|
+
unhappiness += 1
|
232
|
+
end
|
233
|
+
end
|
234
|
+
end
|
235
|
+
puts " ],"
|
236
|
+
end
|
237
|
+
else
|
238
|
+
Optimist::die :mode, "bogus mode #{OPTS[:mode]}"
|
239
|
+
end
|
240
|
+
if unhappiness > 0
|
241
|
+
puts "unhappiness: #{unhappiness}"
|
242
|
+
end
|
243
|
+
exit unhappiness
|
244
|
+
end
|
245
|
+
end
|
data/lib/oak.rb
CHANGED
@@ -1,122 +1,1085 @@
|
|
1
|
-
|
1
|
+
# coding: utf-8
|
2
|
+
#
|
3
|
+
# OAK: An encoding format with enough polymorphism to support run-time
|
4
|
+
# performance experimentation and some light encryption-at-rest.
|
5
|
+
#
|
6
|
+
# author: jhw@prosperworks.com
|
7
|
+
# incept: 2016-03-02
|
2
8
|
|
3
|
-
|
4
|
-
|
5
|
-
|
9
|
+
require_relative 'oak/version'
|
10
|
+
require 'strscan'
|
11
|
+
require 'digest'
|
12
|
+
require 'base64'
|
13
|
+
require 'lz4-ruby'
|
14
|
+
require 'zlib'
|
15
|
+
require 'bzip2/ffi'
|
16
|
+
require 'lzma'
|
17
|
+
require 'openssl'
|
6
18
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
19
|
+
module OAK
|
20
|
+
|
21
|
+
# CantTouchThisObjectError is thrown when encode() or serialize() is
|
22
|
+
# called on an object which cannot be encoded losslessly by OAK.
|
23
|
+
#
|
24
|
+
class CantTouchThisObjectError < ArgumentError ; end
|
25
|
+
|
26
|
+
# CantTouchThisStringError is thrown when decode(), deserialize(),
|
27
|
+
# or unwrap() called on a String which cannot be decoded.
|
28
|
+
#
|
29
|
+
class CantTouchThisStringError < ArgumentError ; end
|
30
|
+
|
31
|
+
# Internal syntactic conveniences.
|
32
|
+
#
|
33
|
+
BAD_OBJ = CantTouchThisObjectError
|
34
|
+
BAD_STR = CantTouchThisStringError
|
35
|
+
|
36
|
+
# OAK_4 supports one and only one encryption algorithm and mode of
|
37
|
+
# operation.
|
38
|
+
#
|
39
|
+
# - AES-256-GCM
|
40
|
+
# - 128 bits of security
|
41
|
+
# - 256-bit keys (32 bytes)
|
42
|
+
# - 96-bit IVs (12 bytes)
|
43
|
+
# - 128-bit auth_tags (16 bytes)
|
44
|
+
# - Random IV ("Initialization Vector") for each encryption op
|
45
|
+
# - All headers authenticated.
|
46
|
+
# - Headers encrypted when not required for decryption.
|
47
|
+
#
|
48
|
+
ENCRYPTION_ALGO_NAME = 'aes-256-gcm'.freeze
|
49
|
+
ENCRYPTION_ALGO_IV_BYTES = 12 # AES-256-GCM has 96-bit IVs
|
50
|
+
ENCRYPTION_ALGO_AUTH_TAG_BYTES = 16 # AES-256-GCM has 128-bit auth, we use all
|
51
|
+
|
52
|
+
# Get a new instance of OpenSSL::Cipher for our algorithm.
|
53
|
+
#
|
54
|
+
def self.encryption_algo
|
55
|
+
OpenSSL::Cipher.new(ENCRYPTION_ALGO_NAME)
|
56
|
+
end
|
57
|
+
|
58
|
+
# Generate a new random key appropriate for the OAK_4 encryption
|
59
|
+
# algorithm.
|
60
|
+
#
|
61
|
+
def self.random_key
|
62
|
+
encryption_algo.random_key
|
63
|
+
end
|
64
|
+
|
65
|
+
# Generate a new random initialization vector appropriate for the
|
66
|
+
# OAK_4 encryption algorithm.
|
67
|
+
#
|
68
|
+
def self.random_iv
|
69
|
+
encryption_algo.random_iv
|
70
|
+
end
|
71
|
+
|
72
|
+
class Key
|
73
|
+
|
74
|
+
# @param key String encryption key suitable for AES-256,
|
75
|
+
# specifically a binary string of 32 bytes (256 bits),
|
76
|
+
# randomly-generated and kept very, very secret.
|
77
|
+
#
|
78
|
+
def initialize(key)
|
79
|
+
if !key.is_a?(String)
|
80
|
+
raise ArgumentError, "bad non-String key: ELIDED"
|
81
|
+
end
|
82
|
+
rk_size = OAK.random_key.size
|
83
|
+
if key.size != rk_size
|
84
|
+
raise ArgumentError, "bad key ELIDED, length not #{rk_size}"
|
85
|
+
end
|
86
|
+
@key = key.dup.freeze # happy :)
|
87
|
+
end
|
88
|
+
|
89
|
+
attr_reader :key
|
90
|
+
|
91
|
+
def inspect
|
92
|
+
#
|
93
|
+
# Avoid exposing the key in casual logs or console session.
|
94
|
+
#
|
95
|
+
to_s[0..-2] + " @key=ELIDED>"
|
16
96
|
end
|
97
|
+
|
17
98
|
end
|
18
99
|
|
19
|
-
|
100
|
+
class KeyChain
|
20
101
|
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
102
|
+
def initialize(keys)
|
103
|
+
if !keys.is_a?(Hash)
|
104
|
+
raise ArgumentError, "bogus keys #{keys}"
|
105
|
+
end
|
106
|
+
keys.each do |k,v|
|
107
|
+
if !k.is_a?(String)
|
108
|
+
raise ArgumentError, "bogus key #{k} in keys #{keys}"
|
109
|
+
end
|
110
|
+
if /^[a-zA-Z][0-9a-zA-Z]*$/ !~ k
|
111
|
+
#
|
112
|
+
# In oak_4, we restrict key names to sequences which look
|
113
|
+
# like code identifiers: alphanumeric strings which start
|
114
|
+
# with a letter.
|
115
|
+
#
|
116
|
+
# This keeps the encoding simple but compact.
|
117
|
+
#
|
118
|
+
raise ArgumentError, "bad key #{k} in keys #{keys}"
|
119
|
+
end
|
120
|
+
if !v.is_a?(Key)
|
121
|
+
raise ArgumentError, "bogus val #{v} at #{k} in keys #{keys}"
|
25
122
|
end
|
26
123
|
end
|
124
|
+
#
|
125
|
+
# We are a happy KeyChain object now!
|
126
|
+
#
|
127
|
+
@keys = keys.dup.freeze
|
128
|
+
end
|
129
|
+
|
130
|
+
attr_reader :keys
|
131
|
+
|
132
|
+
end
|
133
|
+
|
134
|
+
# Parses a KeyChain object and keys from an ENV-like object.
|
135
|
+
#
|
136
|
+
# E.g. if the ENV contains:
|
137
|
+
#
|
138
|
+
# FOO_KEYS=a,b
|
139
|
+
# FOO_KEY_a=#{OAK.encode(<binary key>)}
|
140
|
+
# FOO_KEY_b=#{OAK.encode(<binary key>)}
|
141
|
+
#
|
142
|
+
# ...then the call OAK.parse_key_chain(ENV,'FOO') will return a new
|
143
|
+
# OAK::KeyChain with two OAK::Keys, 'a' and 'b'.
|
144
|
+
#
|
145
|
+
# This self-referential (but not recursive!) use of OAK to encode
|
146
|
+
# the key and iv is to avoid the problems with binary strings in ENV
|
147
|
+
# variables, 'heroku config:set' command line arguments, etc.
|
148
|
+
#
|
149
|
+
# @param env ENV or an ENV-like Hash from String to String.
|
150
|
+
#
|
151
|
+
# @param name String the root token
|
152
|
+
#
|
153
|
+
# @returns a new OAK::KeyChain
|
154
|
+
#
|
155
|
+
def self.parse_env_chain(env,name)
|
156
|
+
key_names = (env["#{name}_KEYS"] || '').gsub(/^[, ]*/,'').split(/[ ,]+/)
|
157
|
+
keys = key_names.map do |key_name|
|
158
|
+
key = OAK.decode(env["#{name}_KEY_#{key_name}"] || '')
|
159
|
+
[ key_name, Key.new(key) ]
|
160
|
+
end.to_h
|
161
|
+
KeyChain.new(keys)
|
162
|
+
end
|
163
|
+
|
164
|
+
##########################################################################
|
165
|
+
#
|
166
|
+
# encode() and decode() are the top layer
|
167
|
+
#
|
168
|
+
# They coordinate the structure layer and the byte layer.
|
169
|
+
#
|
170
|
+
# These are the recommended entry points for most callers.
|
171
|
+
#
|
172
|
+
##########################################################################
|
173
|
+
|
174
|
+
# Encodes suitable objects string into OAK strings.
|
175
|
+
#
|
176
|
+
# Is inverted by decode(). For all obj, if encode(obj) does not
|
177
|
+
# raise an exception, decode(encode(obj)) == obj.
|
178
|
+
#
|
179
|
+
# @param obj to encode
|
180
|
+
#
|
181
|
+
# @param redundancy 'none', 'crc32' (default), or 'sha1'
|
182
|
+
#
|
183
|
+
# @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', 'lzma'
|
184
|
+
#
|
185
|
+
# @param force false (default), or true. When true, always
|
186
|
+
# compress. When false, fall back to the
|
187
|
+
# original if the compressed form is larger.
|
188
|
+
#
|
189
|
+
# @param key_chain OAK::KeyChain from which to draw the encryption
|
190
|
+
# key, or nil for none.
|
191
|
+
#
|
192
|
+
# @param key String name of a key in key_chain to be used
|
193
|
+
# for encryption, or nil if none.
|
194
|
+
#
|
195
|
+
# @param format 'none', 'base64' (default)
|
196
|
+
#
|
197
|
+
# @param force_oak_4 Bool, for debugging, force oak_4 encoding even
|
198
|
+
# if no encryption key is specified.
|
199
|
+
#
|
200
|
+
# @param debug_iv String, force encryption with a known IV, TEST ONLY!
|
201
|
+
#
|
202
|
+
# WARNING: Use of debug_iv jeopardizes the security of all messages
|
203
|
+
# *ever* encrypted with that key! Never use debug_iv in production!
|
204
|
+
#
|
205
|
+
# @raises ArgumentError if obj is not handled.
|
206
|
+
#
|
207
|
+
def self.encode(obj,opts={})
|
208
|
+
ser = _serialize(obj)
|
209
|
+
_wrap(ser,opts)
|
210
|
+
end
|
27
211
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
212
|
+
# Decodes suitable OAK strings into objects.
|
213
|
+
#
|
214
|
+
# Inverts encode().
|
215
|
+
#
|
216
|
+
# @param str String to decode
|
217
|
+
#
|
218
|
+
# @param key_chain OAK::KeyChain in which to look for keys to
|
219
|
+
# decrypt encrypted OAK strings, or nil for none.
|
220
|
+
#
|
221
|
+
# @returns obj String to decode
|
222
|
+
#
|
223
|
+
# @raises ArgumentError if str is not a recognized string.
|
224
|
+
#
|
225
|
+
def self.decode(str,opts={})
|
226
|
+
if !str.is_a?(String)
|
227
|
+
raise ArgumentError, "str not a String"
|
228
|
+
end
|
229
|
+
ser = _unwrap(str,opts)
|
230
|
+
_deserialize(ser)
|
231
|
+
end
|
232
|
+
|
233
|
+
##########################################################################
|
234
|
+
#
|
235
|
+
# serialize() and deserialize() are the structure layer
|
236
|
+
#
|
237
|
+
# They are responsible for interconverting between objects and naive
|
238
|
+
# strings.
|
239
|
+
#
|
240
|
+
# This layer is analagous to TAR for files or JSON: it converts
|
241
|
+
# structure into string and vice-versa.
|
242
|
+
#
|
243
|
+
##########################################################################
|
244
|
+
|
245
|
+
# Serializes suitable objects string into naive strings.
|
246
|
+
#
|
247
|
+
# Is inverted by deserialize(). For all obj, if serialize(obj) does
|
248
|
+
# not raise an exception, deserialize(serialize(obj)) == obj.
|
249
|
+
#
|
250
|
+
# @raises CantTouchThisObjectError if obj contains any types or
|
251
|
+
# structure which cannot be encoded reversibly by OAK.
|
252
|
+
#
|
253
|
+
def self._serialize(obj)
|
254
|
+
seen,_reseen = _safety_dance(obj) do |child|
|
255
|
+
next if ALL_TYPES.select{ |type| child.is_a?(type) }.size > 0
|
256
|
+
raise CantTouchThisObjectError, "#{child.class} not supported: #{child}"
|
257
|
+
end
|
258
|
+
strt = Hash.new # string table, str => id for strings already encoded
|
259
|
+
ser = 'F'
|
260
|
+
ser << seen.size.to_s
|
261
|
+
seen.each_with_index do |(_object_id,(_idx2,child)),_idx|
|
262
|
+
#
|
263
|
+
# First, identify the unique apex type in TYPE_2_CODE.keys
|
264
|
+
# which matches the child.
|
265
|
+
#
|
266
|
+
# child.class may not be listed explicitly, such as for Fixnum
|
267
|
+
# and Bigint both being Integer, so we search and assert
|
268
|
+
# uniqueness and existence.
|
269
|
+
#
|
270
|
+
is_as = ALL_TYPES.select{ |type| child.is_a?(type) }
|
271
|
+
raise CantTouchThisObjectError if 1 != is_as.size
|
272
|
+
type = is_as[0]
|
273
|
+
typecode = TYPE_2_CODE[type]
|
274
|
+
if nil == child || true == child || false == child
|
275
|
+
#
|
276
|
+
# The type code by itself is sufficient to decode NilType,
|
277
|
+
# TrueType, and FalseType. We need use other space for them.
|
278
|
+
#
|
279
|
+
ser << typecode
|
280
|
+
next
|
281
|
+
end
|
282
|
+
if child.is_a?(Symbol) || child.is_a?(String)
|
283
|
+
#
|
284
|
+
# Strings and Symbols encode as their size in chars followed
|
285
|
+
# by their bytes.
|
286
|
+
#
|
287
|
+
# We maintain a running string table, strt, to recognize when
|
288
|
+
# we encounter a string representation which has been
|
289
|
+
# previously encoded.
|
290
|
+
#
|
291
|
+
# If we find such a duplicate, we encode the current string
|
292
|
+
# via a back reference to the first one we saw. This is
|
293
|
+
# indicated by downcasing the typecode.
|
294
|
+
#
|
295
|
+
str = child.to_s
|
296
|
+
enc = str.encoding
|
297
|
+
enc_code = nil
|
298
|
+
case enc
|
299
|
+
when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::ASCII
|
300
|
+
enc_code = 'A'
|
301
|
+
when Encoding::UTF_8
|
302
|
+
enc_code = 'U'
|
303
|
+
else
|
304
|
+
raise CantTouchThisObjectError, "unknown string encoding #{enc}"
|
34
305
|
end
|
35
|
-
|
36
|
-
|
37
|
-
|
306
|
+
if strt.has_key?(str)
|
307
|
+
ser << typecode.downcase # downcase indicates strt reference
|
308
|
+
ser << enc_code
|
309
|
+
ser << strt[str].to_s
|
310
|
+
else
|
311
|
+
ser << typecode # upcase indicates full representation
|
312
|
+
ser << enc_code
|
313
|
+
ser << str.bytesize.to_s
|
314
|
+
if str.bytesize > 0
|
315
|
+
ser << '_'
|
316
|
+
ser << str
|
317
|
+
end
|
318
|
+
strt[str] = strt.size
|
38
319
|
end
|
320
|
+
next
|
39
321
|
end
|
40
|
-
|
322
|
+
if child.is_a?(Numeric)
|
323
|
+
#
|
324
|
+
# Numerics primitives encode as their Ruby to_s which
|
325
|
+
# matches their JSON.dump().
|
326
|
+
#
|
327
|
+
ser << typecode
|
328
|
+
ser << child.to_s
|
329
|
+
next
|
330
|
+
end
|
331
|
+
if child.is_a?(Array)
|
332
|
+
#
|
333
|
+
# An array is encoded as a size N followed by N indexes into
|
334
|
+
# the seen list.
|
335
|
+
#
|
336
|
+
ser << typecode
|
337
|
+
ser << child.size.to_s
|
338
|
+
child.each do |a|
|
339
|
+
ser << '_'
|
340
|
+
ser << seen[a.object_id][0].to_s
|
341
|
+
end
|
342
|
+
next
|
343
|
+
end
|
344
|
+
if child.is_a?(Hash)
|
345
|
+
#
|
346
|
+
# An array is encoded as a size N followed by 2*N indexes
|
347
|
+
# into the seen list, organized pairwise key+value.
|
348
|
+
#
|
349
|
+
ser << typecode
|
350
|
+
ser << child.size.to_s
|
351
|
+
child.each do |k,v|
|
352
|
+
ser << '_'
|
353
|
+
ser << seen[k.object_id][0].to_s
|
354
|
+
ser << '_'
|
355
|
+
ser << seen[v.object_id][0].to_s
|
356
|
+
end
|
357
|
+
next
|
358
|
+
end
|
359
|
+
raise CantTouchThisObjectError, "not handled: #{child.class} #{child}"
|
360
|
+
end
|
361
|
+
ser
|
362
|
+
end
|
41
363
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
364
|
+
# Deserializes suitable naive strings into objects.
|
365
|
+
#
|
366
|
+
# Inverts serialize().
|
367
|
+
#
|
368
|
+
# @raises CantTouchThisObjectError if str is not recognized
|
369
|
+
#
|
370
|
+
def self._deserialize(str)
|
371
|
+
scanner = StringScanner.new(str)
|
372
|
+
serial_code = scanner.scan(/F/)
|
373
|
+
if 'F' != serial_code
|
374
|
+
raise CantTouchThisStringError, "bogus serial_code #{serial_code}"
|
375
|
+
end
|
376
|
+
num_objs = scanner.scan(/[0-9]+/)
|
377
|
+
if !num_objs
|
378
|
+
raise CantTouchThisStringError, "missing object list size"
|
379
|
+
end
|
380
|
+
num_objs = num_objs.to_i
|
381
|
+
strt = Hash.new # string table, id => str for strings already decoded
|
382
|
+
seen = []
|
383
|
+
#
|
384
|
+
# We parse the stream, constructing all the objects we see in to
|
385
|
+
# a seen list.
|
386
|
+
#
|
387
|
+
# In this first pass, Arrays and Hashes are created whose
|
388
|
+
# elements, keys, and values are temporarily integers. These all
|
389
|
+
# refer to slots in the seen list, and many of them will be
|
390
|
+
# forward references to objects which we have yet to decode.
|
391
|
+
# Later we will rectify the object graph by replacing these
|
392
|
+
# integers with their refrants from the seen list.
|
393
|
+
#
|
394
|
+
num_objs.times.each do |idx_obj|
|
395
|
+
code = scanner.scan(/[a-zA-Z]/)
|
396
|
+
case code
|
397
|
+
when 'n'
|
398
|
+
seen[idx_obj] = nil
|
399
|
+
when 'f'
|
400
|
+
seen[idx_obj] = false
|
401
|
+
when 't'
|
402
|
+
seen[idx_obj] = true
|
403
|
+
when 'S', 'Y', 's', 'y'
|
404
|
+
enc_code = scanner.scan(/[AU]/)
|
405
|
+
enc = nil
|
406
|
+
case enc_code
|
407
|
+
when 'A'
|
408
|
+
enc = Encoding::ASCII_8BIT
|
409
|
+
when 'U'
|
410
|
+
enc = Encoding::UTF_8
|
411
|
+
else
|
412
|
+
raise CantTouchThisStringError, "unknown enc_code #{enc_code}"
|
413
|
+
end
|
414
|
+
num = scanner.scan(/[0-9]+/)
|
415
|
+
if !num
|
416
|
+
raise CantTouchThisStringError, "missing num"
|
417
|
+
end
|
418
|
+
num = num.to_i
|
419
|
+
case code
|
420
|
+
when 'S', 'Y'
|
421
|
+
if num > 0
|
422
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
423
|
+
seen[idx_obj] = scanner.peek(num)
|
424
|
+
scanner.pos += num # skip body
|
425
|
+
else
|
426
|
+
seen[idx_obj] = ''
|
47
427
|
end
|
428
|
+
strt[strt.size] = seen[idx_obj]
|
429
|
+
when 's', 'y'
|
430
|
+
seen[idx_obj] = strt[num]
|
431
|
+
end
|
432
|
+
seen[idx_obj] = seen[idx_obj].dup.force_encoding(enc)
|
433
|
+
case code
|
434
|
+
when 'Y', 'y'
|
435
|
+
seen[idx_obj] = seen[idx_obj].intern
|
436
|
+
end
|
437
|
+
when 'I'
|
438
|
+
pattern = /-?[0-9]+/
|
439
|
+
seen[idx_obj] = scanner.scan(pattern).to_i
|
440
|
+
when 'F'
|
441
|
+
pattern = /-?(Infinity|NaN|[0-9]+(\.[0-9]*)?(e([+-][0-9]*)?)?)/
|
442
|
+
match = scanner.scan(pattern)
|
443
|
+
case match
|
444
|
+
when 'Infinity' then seen[idx_obj] = Float::INFINITY
|
445
|
+
when '-Infinity' then seen[idx_obj] = -Float::INFINITY
|
446
|
+
when 'NaN' then seen[idx_obj] = Float::NAN
|
447
|
+
else seen[idx_obj] = match.to_f
|
448
|
+
end
|
449
|
+
when 'A'
|
450
|
+
num_items = scanner.scan(/[0-9]+/).to_i
|
451
|
+
arr = []
|
452
|
+
num_items.times.each do |idx|
|
453
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
454
|
+
val = scanner.scan(/[0-9]+/).to_i # temp obj
|
455
|
+
arr[idx] = val
|
456
|
+
end
|
457
|
+
seen[idx_obj] = arr
|
458
|
+
when 'H'
|
459
|
+
num_items = scanner.scan(/[0-9]+/).to_i
|
460
|
+
hash = Hash.new
|
461
|
+
num_items.times.each do
|
462
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
463
|
+
k = scanner.scan(/[0-9]+/).to_i # temp obj
|
464
|
+
scanner.scan(/_/) or raise BAD_STR, "missing _"
|
465
|
+
v = scanner.scan(/[0-9]+/).to_i # temp obj
|
466
|
+
hash[k] = v
|
48
467
|
end
|
468
|
+
seen[idx_obj] = hash
|
469
|
+
else
|
470
|
+
raise BAD_STR, "not handled: #{code} #{scanner.pos} #{scanner.rest}"
|
49
471
|
end
|
472
|
+
end
|
473
|
+
#
|
474
|
+
# If we parsed correctly, there will be no unconsumed in the
|
475
|
+
# scanner.
|
476
|
+
#
|
477
|
+
if !scanner.eos?
|
478
|
+
raise BAD_STR, "not at end-of-string: #{scanner.pos} #{scanner.rest}"
|
479
|
+
end
|
480
|
+
#
|
481
|
+
# We rectify the references for each intermediate Array and Hash
|
482
|
+
# as promised earlier.
|
483
|
+
#
|
484
|
+
# Note that this code must be inherently mutation-oriented since
|
485
|
+
# it might have to construct cyclic graphs.
|
486
|
+
#
|
487
|
+
rectified = seen.map do |elem|
|
488
|
+
if elem.is_a?(Array)
|
489
|
+
next Array.new
|
490
|
+
elsif elem.is_a?(Hash)
|
491
|
+
next Hash.new
|
492
|
+
else
|
493
|
+
elem
|
494
|
+
end
|
495
|
+
end
|
496
|
+
rectified.each_with_index do |elem,idx|
|
497
|
+
if elem.is_a?(Array)
|
498
|
+
seen[idx].each_with_index do |a,i|
|
499
|
+
elem[i] = rectified[a]
|
500
|
+
end
|
501
|
+
elsif elem.is_a?(Hash)
|
502
|
+
seen[idx].each do |k,v|
|
503
|
+
elem[rectified[k]] = rectified[v]
|
504
|
+
end
|
505
|
+
end
|
506
|
+
end
|
507
|
+
#
|
508
|
+
# By the way _safety_dance performed its walk in _serialize(), the
|
509
|
+
# object we are decoding is the first object encoded in str.
|
510
|
+
#
|
511
|
+
# Thus, we return the first element of the rectified list.
|
512
|
+
#
|
513
|
+
rectified.first
|
514
|
+
end
|
50
515
|
|
51
|
-
|
516
|
+
##########################################################################
|
517
|
+
#
|
518
|
+
# wrap() and unwrap() are the byte layer
|
519
|
+
#
|
520
|
+
# They are responsible for interconverting between naive strings and
|
521
|
+
# strings which are ready to go out on the wire into external
|
522
|
+
# storage.
|
523
|
+
#
|
524
|
+
# This layer is analagous to GZIP: it converts strings into a
|
525
|
+
# different representation which is smaller, more resistant to
|
526
|
+
# corruption, and/or more recognizable.
|
527
|
+
#
|
528
|
+
##########################################################################
|
52
529
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
530
|
+
# Wraps any string into a OAK string.
|
531
|
+
#
|
532
|
+
# Is inverted by unwrap(). For all str, unwrap(wrap(str)) == str.
|
533
|
+
#
|
534
|
+
# @param str naive string to be wrapped as an OAK string
|
535
|
+
#
|
536
|
+
# @param redundancy 'none', 'crc32' (default), or 'sha1'
|
537
|
+
#
|
538
|
+
# @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', or 'lzma'
|
539
|
+
#
|
540
|
+
# @param force false (default), or true. When true, always
|
541
|
+
# compress. When false, fall back to the
|
542
|
+
# original if the compressed form is larger.
|
543
|
+
#
|
544
|
+
# @param key_chain OAK::KeyChain from which to draw the encryption
|
545
|
+
# key, or nil for none.
|
546
|
+
#
|
547
|
+
# @param key String name of a key in key_chain to be used
|
548
|
+
# for encryption, or nil if none.
|
549
|
+
#
|
550
|
+
# @param force_oak_4 Bool, for debugging, force oak_4 encoding even
|
551
|
+
# if no encryption key is specified.
|
552
|
+
#
|
553
|
+
# @param format 'none', 'base64' (default)
|
554
|
+
#
|
555
|
+
# @returns an OAK string
|
556
|
+
#
|
557
|
+
def self._wrap(str,opts={})
|
558
|
+
redundancy = (opts[:redundancy] || :crc32).to_s
|
559
|
+
compression = (opts[:compression] || :none).to_s
|
560
|
+
force = (opts[:force] || false)
|
561
|
+
format = (opts[:format] || :base64).to_s
|
562
|
+
key_chain = opts[:key_chain]
|
563
|
+
key = opts[:key]
|
564
|
+
debug_iv = opts[:debug_iv]
|
565
|
+
if key_chain && !key_chain.is_a?(KeyChain)
|
566
|
+
raise ArgumentError, "bad key_chain #{key_chain}"
|
567
|
+
end
|
568
|
+
if debug_iv && !debug_iv.is_a?(String)
|
569
|
+
raise ArgumentError, "bad debug_iv #{debug_iv}"
|
570
|
+
end
|
571
|
+
if debug_iv && ENCRYPTION_ALGO_IV_BYTES != debug_iv.size
|
572
|
+
raise ArgumentError, "bad debug_iv #{debug_iv}"
|
573
|
+
end
|
574
|
+
if key && !key_chain
|
575
|
+
raise ArgumentError, "key #{key} without key_chain"
|
61
576
|
end
|
577
|
+
if key && !key_chain.keys[key]
|
578
|
+
keys = key_chain.keys
|
579
|
+
raise ArgumentError, "key not found in #{keys}: #{key}"
|
580
|
+
end
|
581
|
+
encryption_key = key ? key_chain.keys[key] : nil
|
582
|
+
str = str.b # dupe to Encoding::ASCII_8BIT
|
583
|
+
if encryption_key || opts[:force_oak_4]
|
584
|
+
_wrap_oak_4(
|
585
|
+
str,
|
586
|
+
redundancy,
|
587
|
+
compression,
|
588
|
+
force,
|
589
|
+
format,
|
590
|
+
key,
|
591
|
+
encryption_key,
|
592
|
+
debug_iv
|
593
|
+
)
|
594
|
+
else
|
595
|
+
_wrap_oak_3(
|
596
|
+
str,
|
597
|
+
redundancy,
|
598
|
+
compression,
|
599
|
+
force,
|
600
|
+
format
|
601
|
+
)
|
602
|
+
end
|
603
|
+
end
|
62
604
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
605
|
+
def self._wrap_oak_3(
|
606
|
+
str,
|
607
|
+
redundancy,
|
608
|
+
compression,
|
609
|
+
force,
|
610
|
+
format
|
611
|
+
)
|
612
|
+
source_redundancy = _check(redundancy,str)
|
613
|
+
compressed, compression = _compress(compression,force,str)
|
614
|
+
formatted = _format(format,compressed)
|
615
|
+
output = 'oak_3' # format id+ver
|
616
|
+
output << REDUNDANCY_2_CODE[redundancy] # redundancy
|
617
|
+
output << COMPRESSION_2_CODE[compression] # compression
|
618
|
+
output << FORMAT_2_CODE[format] # format
|
619
|
+
output << '_'
|
620
|
+
output << source_redundancy # source check
|
621
|
+
output << '_'
|
622
|
+
output << '%d' % formatted.size # formatted size
|
623
|
+
output << '_'
|
624
|
+
output << formatted # payload
|
625
|
+
output << '_'
|
626
|
+
output << 'ok' # terminator
|
627
|
+
output.force_encoding(Encoding::ASCII_8BIT)
|
628
|
+
end
|
68
629
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
630
|
+
def self._wrap_oak_4(
|
631
|
+
str,
|
632
|
+
redundancy,
|
633
|
+
compression,
|
634
|
+
force,
|
635
|
+
format,
|
636
|
+
key,
|
637
|
+
encryption_key,
|
638
|
+
debug_iv
|
639
|
+
)
|
640
|
+
header = 'oak_4' # format id+ver
|
641
|
+
if key
|
642
|
+
header << key # key name
|
75
643
|
end
|
644
|
+
header << '_'
|
645
|
+
header << FORMAT_2_CODE[format] # format
|
646
|
+
compressed, compression = _compress(compression,force,str)
|
647
|
+
plaintext = ''
|
648
|
+
plaintext << REDUNDANCY_2_CODE[redundancy] # redundancy
|
649
|
+
plaintext << COMPRESSION_2_CODE[compression] # compression
|
650
|
+
plaintext << _check(redundancy,str) # source check
|
651
|
+
plaintext << '_'
|
652
|
+
plaintext << compressed
|
653
|
+
encrypted = _encrypt(
|
654
|
+
encryption_key,
|
655
|
+
plaintext,
|
656
|
+
header,
|
657
|
+
debug_iv
|
658
|
+
)
|
659
|
+
formatted = _format(format,encrypted)
|
660
|
+
output = header
|
661
|
+
output << '%d' % formatted.size # formatted size
|
662
|
+
output << '_'
|
663
|
+
output << formatted # payload
|
664
|
+
output << '_'
|
665
|
+
output << 'ok' # terminator
|
666
|
+
output.force_encoding(Encoding::ASCII_8BIT)
|
667
|
+
end
|
76
668
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
669
|
+
# Unwraps any OAK string into a string.
|
670
|
+
#
|
671
|
+
# Inverts wrap(). For all str, unwrap(wrap(str)) == str.
|
672
|
+
#
|
673
|
+
# @param str OAK string to be unwrapped
|
674
|
+
#
|
675
|
+
# @param key_chain OAK::KeyChain in which to look for keys to
|
676
|
+
# decrypt encrypted OAK strings, or nil for none.
|
677
|
+
#
|
678
|
+
# @returns a string
|
679
|
+
#
|
680
|
+
# @raises ArgumentError if str is not in OAK format.
|
681
|
+
#
|
682
|
+
def self._unwrap(str,opts={})
|
683
|
+
str = str.b # str.b for dup to ASCII_8BIT
|
684
|
+
sc = StringScanner.new(str)
|
685
|
+
ov = sc.scan(/oak_[34]/) or raise BAD_STR, "bad oak+ver"
|
686
|
+
if 'oak_4' == ov
|
687
|
+
_unwrap_oak_4(sc,opts) # encryption opts possible for decoding OAK_4 :(
|
688
|
+
else
|
689
|
+
_unwrap_oak_3(sc) # no opts for decoding OAK_3 :)
|
690
|
+
end
|
691
|
+
end
|
82
692
|
|
83
|
-
|
84
|
-
|
693
|
+
def self._unwrap_oak_3(sc)
|
694
|
+
r = sc.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
|
695
|
+
c = sc.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
|
696
|
+
f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
|
697
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
698
|
+
scheck = sc.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
|
699
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
700
|
+
fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
|
701
|
+
fsize = fsize.to_i
|
702
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
703
|
+
formatted = sc.peek(fsize)
|
704
|
+
begin
|
705
|
+
sc.pos += fsize
|
706
|
+
rescue RangeError => ex
|
707
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
85
708
|
end
|
709
|
+
_ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok: #{formatted}"
|
710
|
+
redundancy = CODE_2_REDUNDANCY[r] || r
|
711
|
+
compression = CODE_2_COMPRESSION[c] || c
|
712
|
+
format = CODE_2_FORMAT[f] || f
|
713
|
+
fsize_re = formatted.size
|
714
|
+
if fsize.to_i != fsize_re
|
715
|
+
raise CantTouchThisStringError, "fsize #{fsize} vs #{fsize_re}"
|
716
|
+
end
|
717
|
+
compressed = _deformat(format,formatted)
|
718
|
+
original = _decompress(compression,compressed)
|
719
|
+
scheck_re = _check(redundancy,original)
|
720
|
+
if scheck != scheck_re
|
721
|
+
raise CantTouchThisStringError, "scheck #{scheck} vs #{scheck_re}"
|
722
|
+
end
|
723
|
+
original
|
724
|
+
end
|
86
725
|
|
87
|
-
|
88
|
-
|
89
|
-
|
726
|
+
def self._unwrap_oak_4(sc,opts={})
|
727
|
+
key = sc.scan(/[^_]+/) # nil OK, indicates no compression
|
728
|
+
encryption_key = nil
|
729
|
+
if key
|
730
|
+
key_chain = opts[:key_chain]
|
731
|
+
if !key_chain
|
732
|
+
raise CantTouchThisStringError, "key #{key} but no key_chain"
|
90
733
|
end
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
File.open('.gitignore', 'w') do |f|
|
96
|
-
f.write ignored
|
734
|
+
encryption_key = opts[:key_chain].keys[key]
|
735
|
+
if !encryption_key
|
736
|
+
keys = key_chain.keys
|
737
|
+
raise CantTouchThisStringError, "key not found in #{keys}: #{key}"
|
97
738
|
end
|
739
|
+
end
|
740
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
741
|
+
f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
|
742
|
+
header = sc.string[0..(sc.pos-1)] # for authentication by _decrypt
|
743
|
+
format = CODE_2_FORMAT[f]
|
744
|
+
fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
|
745
|
+
fsize = fsize.to_i
|
746
|
+
_ = sc.scan(/_/) or raise BAD_STR, "missing _"
|
747
|
+
formatted = sc.peek(fsize)
|
748
|
+
begin
|
749
|
+
sc.pos += fsize
|
750
|
+
rescue RangeError => ex
|
751
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
752
|
+
end
|
753
|
+
_ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok"
|
754
|
+
encrypted = _deformat(format,formatted)
|
755
|
+
plaintext = _decrypt(encryption_key,encrypted,header)
|
756
|
+
sp = StringScanner.new(plaintext)
|
757
|
+
r = sp.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
|
758
|
+
c = sp.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
|
759
|
+
scheck = sp.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
|
760
|
+
_ = sp.scan(/_/) or raise BAD_STR, "missing _"
|
761
|
+
compressed = sp.rest
|
762
|
+
redundancy = CODE_2_REDUNDANCY[r] || r
|
763
|
+
compression = CODE_2_COMPRESSION[c] || c
|
764
|
+
original = _decompress(compression,compressed)
|
765
|
+
scheck_re = _check(redundancy,original)
|
766
|
+
if scheck != scheck_re
|
767
|
+
raise(
|
768
|
+
CantTouchThisStringError,
|
769
|
+
"scheck #{scheck} vs #{scheck_re} in #{sc.string}"
|
770
|
+
)
|
771
|
+
end
|
772
|
+
original
|
773
|
+
end
|
98
774
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
775
|
+
# How we encode object type.
|
776
|
+
#
|
777
|
+
TYPE_2_CODE ||= {
|
778
|
+
Hash => 'H',
|
779
|
+
Array => 'A',
|
780
|
+
String => 'S', # downcased to 's' for string table lookup
|
781
|
+
Symbol => 'Y', # downcased to 'y' for string table lookup
|
782
|
+
Integer => 'I',
|
783
|
+
Float => 'F',
|
784
|
+
NilClass => 'n',
|
785
|
+
TrueClass => 't',
|
786
|
+
FalseClass => 'f',
|
787
|
+
}.freeze
|
788
|
+
ALL_TYPES ||= TYPE_2_CODE.keys.freeze
|
103
789
|
|
104
|
-
|
105
|
-
|
790
|
+
# How we encode :format and :compression in the OAK strings.
|
791
|
+
#
|
792
|
+
FORMAT_2_CODE ||= {
|
793
|
+
'none' => 'N',
|
794
|
+
'base64' => 'B', # urlsafe form with padding and whitespace stripped
|
795
|
+
}.freeze
|
796
|
+
CODE_2_FORMAT ||= FORMAT_2_CODE.invert.freeze
|
106
797
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
798
|
+
# How we encode :compression in the OAK strings.
|
799
|
+
#
|
800
|
+
# Early on, I captures some metrics using the catenation of all our
|
801
|
+
# Ruby code as a test file.
|
802
|
+
#
|
803
|
+
# I measured:
|
804
|
+
#
|
805
|
+
# SOURCE 5707334
|
806
|
+
# none 5707370 compression 0.17s decompression 0.16s
|
807
|
+
# lzo 1804765 compression 0.18s decompression 0.16s
|
808
|
+
# lzf 1807971 compression 0.16s decompression 0.17s
|
809
|
+
# lz4 1813574 compression 0.17s decompression 0.14s
|
810
|
+
# zlib 1071216 compression 0.53s decompression 0.19s
|
811
|
+
# bzip2 868595 compression 0.62s decompression 0.33s
|
812
|
+
# lzma 760594 compression 6.22s decompression 0.20s
|
813
|
+
#
|
814
|
+
# From this, I conclude that only one of lzo,lzf,lz4 is interesting.
|
815
|
+
# They all yield approximately the same compression, and their
|
816
|
+
# compression times are indistinguishable from the rest of the
|
817
|
+
# streaming and encoding times imposed by OAK.
|
818
|
+
#
|
819
|
+
# I'm settling on supporting only lz4 because it seems to be better
|
820
|
+
# supported as a polymorphic lib - it's closer to a defacto standard
|
821
|
+
# for the LZ77 family.
|
822
|
+
#
|
823
|
+
# zlib, bzip2, and lzma each represent interesting distinct choices
|
824
|
+
# - I'm keeping support for all three.
|
825
|
+
#
|
826
|
+
COMPRESSION_2_CODE ||= {
|
827
|
+
'none' => 'N',
|
828
|
+
'lz4' => '4',
|
829
|
+
'zlib' => 'Z',
|
830
|
+
'bzip2' => 'B',
|
831
|
+
'lzma' => 'M',
|
832
|
+
}.freeze
|
833
|
+
CODE_2_COMPRESSION ||= COMPRESSION_2_CODE.invert.freeze
|
834
|
+
|
835
|
+
# How we encode :redundancy in the OAK strings.
|
836
|
+
#
|
837
|
+
REDUNDANCY_2_CODE ||= {
|
838
|
+
'none' => 'N',
|
839
|
+
'crc32' => 'C',
|
840
|
+
'sha1' => 'S',
|
841
|
+
}.freeze
|
842
|
+
CODE_2_REDUNDANCY ||= REDUNDANCY_2_CODE.invert.freeze
|
843
|
+
|
844
|
+
# Helper method, calculates redundancy check for str.
|
845
|
+
#
|
846
|
+
def self._check(redundancy,str)
|
847
|
+
case redundancy.to_s
|
848
|
+
when 'none' then return '0'
|
849
|
+
when 'crc32' then return '%d' % Zlib.crc32(str)
|
850
|
+
when 'sha1' then return Digest::SHA1.hexdigest(str)
|
851
|
+
else
|
852
|
+
raise ArgumentError, "unknown redundancy #{redundancy}"
|
853
|
+
end
|
854
|
+
end
|
855
|
+
|
856
|
+
# Helper method, calculates formatted version of str.
|
857
|
+
#
|
858
|
+
def self._format(format,str)
|
859
|
+
case format.to_s
|
860
|
+
when 'none'
|
861
|
+
return str
|
862
|
+
when 'base64'
|
863
|
+
#
|
864
|
+
# We actual using "Base 64 Encoding with URL and Filename Safe
|
865
|
+
# Alphabet" aka base64url with the option not to use padding,
|
866
|
+
# per https://tools.ietf.org/html/rfc4648#section-5.
|
867
|
+
#
|
868
|
+
# If we were using Ruby 2.3+, we could use the option "padding:
|
869
|
+
# false" instead of chopping out the /=*$/ with gsub.
|
870
|
+
#
|
871
|
+
return Base64.urlsafe_encode64(str).gsub(/=.*$/,'')
|
872
|
+
else
|
873
|
+
raise ArgumentError, "unknown format #{format}"
|
874
|
+
end
|
875
|
+
end
|
876
|
+
|
877
|
+
def self._deformat(format,str)
|
878
|
+
case format.to_s
|
879
|
+
when 'none'
|
880
|
+
return str
|
881
|
+
when 'base64'
|
882
|
+
#
|
883
|
+
# Regrettably, Base64.urlsafe_decode64(str) does not reverse
|
884
|
+
# Base64.urlsafe_encode64(str).gsub(/=.*$/,''), it raises an
|
885
|
+
# ArgumentError "invalid base64".
|
886
|
+
#
|
887
|
+
# Fortunately, simple Base64.decode64() is liberal in what it
|
888
|
+
# accepts, and handles the output of all of encode64,
|
889
|
+
# strict_encode64, and urlsafe_encode64 both with and without
|
890
|
+
# the /=*$/.
|
891
|
+
#
|
892
|
+
return Base64.decode64(str.tr('-_','+/'))
|
893
|
+
else
|
894
|
+
raise ArgumentError, "unknown format #{format}"
|
895
|
+
end
|
896
|
+
end
|
897
|
+
|
898
|
+
# Helper for wrap() and unwrap(), multiplexes encryption.
|
899
|
+
#
|
900
|
+
def self._encrypt(encryption_key,data,auth_data,debug_iv)
|
901
|
+
return data if !encryption_key
|
902
|
+
#
|
903
|
+
# WARNING: In at least some versions of OpenSSL::Cipher, setting
|
904
|
+
# iv before key would cause the iv to be ignored in aes-*-gcm
|
905
|
+
# ciphers!
|
906
|
+
#
|
907
|
+
# https://github.com/attr-encrypted/encryptor/pull/22
|
908
|
+
# https://github.com/attr-encrypted/encryptor/blob/master/README.md
|
909
|
+
#
|
910
|
+
# The issue was reported against version "1.0.1f 6 Jan 2014". I
|
911
|
+
# have yet to figure out whether our current version, 1.1.0, is
|
912
|
+
# affected, or when/how the fix will go live.
|
913
|
+
#
|
914
|
+
# OAK_4 only supports AES-256-GCB. Although the implementation
|
915
|
+
# bug has been fixed and OAK will almost certainly not be used
|
916
|
+
# with a buggy version of OpenSSL, nevertheless we take great
|
917
|
+
# care to set cipher.key *then* cipher.iv.
|
918
|
+
#
|
919
|
+
# Still, can't be to careful.
|
920
|
+
#
|
921
|
+
iv_size = ENCRYPTION_ALGO_IV_BYTES
|
922
|
+
auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
|
923
|
+
if debug_iv && iv_size != debug_iv.size
|
924
|
+
raise "unexpected debug_iv.size #{debug_iv.size} not #{iv_size}"
|
925
|
+
end
|
926
|
+
cipher = encryption_algo.encrypt
|
927
|
+
cipher.key = encryption_key.key
|
928
|
+
iv = debug_iv || cipher.random_iv
|
929
|
+
cipher.iv = iv
|
930
|
+
cipher.auth_data = auth_data
|
931
|
+
ciphertext = cipher.update(data) + cipher.final
|
932
|
+
auth_tag = cipher.auth_tag
|
933
|
+
if iv_size != iv.size
|
934
|
+
raise "unexpected iv.size #{iv.size} not #{iv_size}"
|
935
|
+
end
|
936
|
+
if auth_tag_size != auth_tag.size
|
937
|
+
raise "unexpected auth_tag.size #{auth_tag.size} not #{auth_tag_size}"
|
938
|
+
end
|
939
|
+
#
|
940
|
+
# Since iv and auth_tag have fixed widths, they are trivial to
|
941
|
+
# parse without putting any effort or space into recording their
|
942
|
+
# sizes in the message body.
|
943
|
+
#
|
944
|
+
iv + auth_tag + ciphertext
|
945
|
+
end
|
946
|
+
|
947
|
+
# Helper for wrap() and unwrap(), multiplexes decryption.
|
948
|
+
#
|
949
|
+
def self._decrypt(encryption_key,data,auth_data)
|
950
|
+
return data if !encryption_key
|
951
|
+
iv_size = ENCRYPTION_ALGO_IV_BYTES
|
952
|
+
auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
|
953
|
+
iv = data[0..(iv_size-1)]
|
954
|
+
auth_tag = data[iv_size..(auth_tag_size+iv_size-1)]
|
955
|
+
ciphertext = data[(auth_tag_size+iv_size)..-1]
|
956
|
+
cipher = encryption_algo.decrypt
|
957
|
+
cipher.key = encryption_key.key
|
958
|
+
begin
|
959
|
+
cipher.iv = iv
|
960
|
+
cipher.auth_tag = auth_tag
|
961
|
+
cipher.auth_data = auth_data
|
962
|
+
cipher.update(ciphertext) + cipher.final
|
963
|
+
rescue OpenSSL::Cipher::CipherError => ex
|
964
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
965
|
+
end
|
966
|
+
end
|
967
|
+
|
968
|
+
# Helper for wrap() and unwrap(), multiplexes compression.
|
969
|
+
#
|
970
|
+
def self._compress(compression,force,str)
|
971
|
+
case compression.to_s
|
972
|
+
when 'none'
|
973
|
+
compressed = str
|
974
|
+
when 'lz4'
|
975
|
+
compressed = LZ4.compress(str)
|
976
|
+
when 'zlib'
|
977
|
+
compressed = Zlib.deflate(str)
|
978
|
+
when 'bzip2'
|
979
|
+
io = StringIO.new
|
980
|
+
io.set_encoding(Encoding::ASCII_8BIT)
|
981
|
+
Bzip2::FFI::Writer.write(io, str)
|
982
|
+
compressed = io.string
|
983
|
+
when 'lzma'
|
984
|
+
compressed = LZMA.compress(str)
|
985
|
+
else
|
986
|
+
raise ArgumentError, "unknown compression #{compression}"
|
987
|
+
end
|
988
|
+
if !force && compressed.size >= str.size
|
989
|
+
compressed = str
|
990
|
+
compression = 'none'
|
991
|
+
end
|
992
|
+
[compressed,compression.to_s]
|
993
|
+
end
|
994
|
+
|
995
|
+
# Helper for wrap() and unwrap(), multiplexes decompression.
|
996
|
+
#
|
997
|
+
def self._decompress(compression,str)
|
998
|
+
case compression.to_s
|
999
|
+
when 'none'
|
1000
|
+
return str
|
1001
|
+
when 'lz4'
|
1002
|
+
begin
|
1003
|
+
return LZ4.uncompress(str)
|
1004
|
+
rescue LZ4Internal::Error => ex
|
1005
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
1006
|
+
end
|
1007
|
+
when 'zlib'
|
1008
|
+
begin
|
1009
|
+
return Zlib::Inflate.inflate(str)
|
1010
|
+
rescue Zlib::DataError => ex
|
1011
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
1012
|
+
end
|
1013
|
+
when 'bzip2'
|
1014
|
+
io = StringIO.new(str)
|
1015
|
+
raw = nil
|
1016
|
+
begin
|
1017
|
+
raw = Bzip2::FFI::Reader.read(io)
|
1018
|
+
rescue Bzip2::FFI::Error::MagicDataError => ex
|
1019
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
112
1020
|
end
|
113
|
-
|
1021
|
+
str = raw.b # dupe to Encoding::ASCII_8BIT
|
1022
|
+
return str
|
1023
|
+
when 'lzma'
|
1024
|
+
begin
|
1025
|
+
raw = LZMA.decompress(str)
|
1026
|
+
rescue RuntimeError => ex
|
1027
|
+
raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
|
1028
|
+
end
|
1029
|
+
str = raw.b # dupe to Encoding::ASCII_8BIT
|
1030
|
+
return str
|
1031
|
+
else
|
1032
|
+
raise ArgumentError, "unknown compression #{compression}"
|
114
1033
|
end
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
1034
|
+
end
|
1035
|
+
|
1036
|
+
# Walks obj recursively, touching each reachable child only once
|
1037
|
+
# without getting caught up cycles or touching DAGy bits twice.
|
1038
|
+
#
|
1039
|
+
# Only knows how to recurse into Arrays and Hashs.
|
1040
|
+
#
|
1041
|
+
# This traversal is depth-first pre-order with the children of
|
1042
|
+
# Arrays walked in positional anbd Hash pairs walked in positional
|
1043
|
+
# order k,v,k,v, etc.
|
1044
|
+
#
|
1045
|
+
# @param obj object to walk
|
1046
|
+
#
|
1047
|
+
# @param seen Hash which maps object_id => [idx,child] of every
|
1048
|
+
# object touched, where idx is 0,1,2,... corresponding to the order
|
1049
|
+
# in which we encountered child.
|
1050
|
+
#
|
1051
|
+
# @param reseen List of children which were walked more than once.
|
1052
|
+
#
|
1053
|
+
# @param block if present, every object touched is yielded to block
|
1054
|
+
#
|
1055
|
+
# @return seen,reseen
|
1056
|
+
#
|
1057
|
+
def self._safety_dance(obj,seen=nil,reseen=nil,&block)
|
1058
|
+
#
|
1059
|
+
# Note that OAK._serialize() depends on the depth-first pre-order
|
1060
|
+
# specification here - at least, it assumes that the first element
|
1061
|
+
# walked will be the first element added to seen.
|
1062
|
+
#
|
1063
|
+
seen ||= {}
|
1064
|
+
reseen ||= []
|
1065
|
+
oid = obj.object_id
|
1066
|
+
if seen.has_key?(oid)
|
1067
|
+
reseen << obj
|
1068
|
+
return seen,reseen
|
120
1069
|
end
|
1070
|
+
seen[oid] = [seen.size,obj]
|
1071
|
+
yield obj if block # pre-order: this node before children
|
1072
|
+
if obj.is_a?(Hash)
|
1073
|
+
obj.each do |k,v| # children in hash order and k,v,...
|
1074
|
+
_safety_dance(k,seen,reseen,&block)
|
1075
|
+
_safety_dance(v,seen,reseen,&block)
|
1076
|
+
end
|
1077
|
+
elsif obj.is_a?(Array)
|
1078
|
+
obj.each do |v| # children in list order
|
1079
|
+
_safety_dance(v,seen,reseen,&block)
|
1080
|
+
end
|
1081
|
+
end
|
1082
|
+
return seen,reseen
|
121
1083
|
end
|
1084
|
+
|
122
1085
|
end
|