oak 0.0.3 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # oak.rb: cli driver for encoding strings in the OAK format.
4
+ #
5
+ # author: jhw@prosperworks.com
6
+ # incept: 2016-03-05
7
+ #
8
+
9
+ require_relative '../lib/oak.rb'
10
+ require 'optimist'
11
+
12
+ OLD_ARGV = ARGV.dup # ARGV is consumed by Optimist but we use later.
13
+ OPTS = Optimist.options do
14
+ banner "#{$0} cli driver for OAK"
15
+ banner <<-OPTIMIST_EXAMPLES
16
+ Examples:
17
+ $ echo hello | bin/oak.rb
18
+ oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
19
+ $ (echo hello ; echo world) | bin/oak.rb
20
+ oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
21
+ oak_3CNB_2139413982_15_RjFTVTVfd29ybGQ_ok
22
+ $ (echo hello ; echo world) | bin/oak.rb --compression zlib --force
23
+ oak_3CZB_1944283675_26_eJxzMwwONY3PSM3JyQcAFF4DyA_ok
24
+ oak_3CZB_2139413982_26_eJxzMwwONY0vzy_KSQEAFNgD3A_ok
25
+ $ (echo hello ; echo world) | bin/oak.rb --format none
26
+ oak_3CNN_1944283675_11_F1SU5_hello_ok
27
+ oak_3CNN_2139413982_11_F1SU5_world_ok
28
+ $ (echo hello ; echo world) | bin/oak.rb | bin/oak.rb --mode decode-lines
29
+ hello
30
+ world
31
+ OPTIMIST_EXAMPLES
32
+ banner "Options:"
33
+ version "#{$0} #{OAK::VERSION}"
34
+ opt :redundancy, 'redundancy', :default => 'crc32'
35
+ opt :format, 'format', :default => 'base64'
36
+ opt :compression, 'compression', :default => 'none'
37
+ opt :force, 'compress even if bigger', :default => false
38
+ opt :mode, 'mode', :default => 'encode-lines'
39
+ opt :key_chain, 'key chain env name', :type => :string
40
+ opt :key, 'encrypt key name', :type => :string
41
+ opt :key_check, 'check available keys', :default => false
42
+ opt :key_generate, 'generate new key', :default => false
43
+ opt :force_oak_4, 'force OAK_4 even unencrypted', :default => false
44
+ opt :eigen, 'calc eigenratio', :type => :int
45
+ opt :self_test, 'self-test only', :default => false
46
+ opt :help, 'show this help'
47
+ end
48
+ Optimist::die :eigen, "must be non-negative" if OPTS[:eigen] && OPTS[:eigen] < 0
49
+
50
+ oak_opts = {}
51
+ oak_opts[:redundancy] = OPTS[:redundancy]
52
+ oak_opts[:compression] = OPTS[:compression]
53
+ oak_opts[:force] = OPTS[:force]
54
+ oak_opts[:format] = OPTS[:format]
55
+ oak_opts[:key_chain] = OAK.parse_env_chain(ENV,OPTS[:key_chain])
56
+ oak_opts[:key] = OPTS[:key]
57
+ oak_opts[:force_oak_4] = OPTS[:force_oak_4]
58
+
59
+ if !OAK::REDUNDANCY_2_CODE.keys.include?(oak_opts[:redundancy])
60
+ Optimist::die :redundancy, "bogus #{OPTS[:redundancy]}"
61
+ end
62
+ if !OAK::COMPRESSION_2_CODE.keys.include?(oak_opts[:compression])
63
+ Optimist::die :compression, "bogus #{OPTS[:compression]}"
64
+ end
65
+ cool_formats = OAK::FORMAT_2_CODE.keys
66
+ if !cool_formats.include?(oak_opts[:format])
67
+ Optimist::die :format, "bogus #{OPTS[:format]} not in #{cool_formats}"
68
+ end
69
+
70
+ =begin
71
+
72
+ doctest: simple transcoding
73
+ >> OAK::decode(OAK::encode([1,"2",3.000001]))
74
+ => [1,"2",3.000001]
75
+ >> OAK::decode(OAK::encode({foo: "bar"}))
76
+ => {foo: "bar"}
77
+ >> OAK::decode(OAK::encode({foo: :bar}))
78
+ => {foo: :bar}
79
+ >> OAK::decode(OAK::encode("Hello, World!"))
80
+ => "Hello, World!"
81
+ >> OAK::decode(OAK::encode("Hello, World!", format: :none, redundancy: :none))
82
+ => "Hello, World!"
83
+
84
+ doctest: stability of encoding
85
+ >> OAK::decode("oak_3NNB_0_30_RjNIMV8xXzJZQTNfZm9vU1UzX2Jhcg_ok")
86
+ => {:foo=>"bar"}
87
+ >> OAK::encode(1, format: :base64, redundancy: :none)
88
+ => "oak_3NNB_0_6_RjFJMQ_ok"
89
+ >> OAK::encode(1, format: :base64, redundancy: :crc32)
90
+ => "oak_3CNB_3405226796_6_RjFJMQ_ok"
91
+ >> OAK::encode(1, format: :none, redundancy: :crc32)
92
+ => "oak_3CNN_3405226796_4_F1I1_ok"
93
+ >> hello_utf8 = "Hello, World!".force_encoding('UTF-8')
94
+ => "Hello, World!"
95
+ >> OAK::encode(hello_utf8, format: :base64, redundancy: :none)
96
+ => "oak_3NNB_0_27_RjFTVTEzX0hlbGxvLCBXb3JsZCE_ok"
97
+ >> OAK::encode(hello_utf8, format: :none, redundancy: :crc32)
98
+ => "oak_3CNN_2351984628_20_F1SU13_Hello, World!_ok"
99
+
100
+ Note above I used force_encoding('UTF-8') after discovering that with
101
+ Ruby 2.1.6 on Mac I get Encoding.default_encoding is UTF-8, but with
102
+ Ruby 2.1.6 on Linux I get Encoding.default_encoding is US-ASCII!
103
+
104
+ =end
105
+
106
+ if __FILE__ == $0
107
+ if OPTS[:self_test]
108
+ require 'rubydoctest'
109
+ exit RubyDocTest::Runner.new(File.read(__FILE__), __FILE__).run ? 0 : 1
110
+ end
111
+ if OPTS[:key_check]
112
+ if !OPTS[:key_chain]
113
+ puts "no --key-chain specified"
114
+ else
115
+ keys = oak_opts[:key_chain].keys.keys
116
+ if 0 == keys.size
117
+ puts "#{OPTS[:key_chain]}: no keys found"
118
+ else
119
+ puts "#{OPTS[:key_chain]}: found keys: #{keys.join(' ')}"
120
+ end
121
+ end
122
+ end
123
+ if OPTS[:key_generate]
124
+ STDOUT.puts OAK.encode(OAK.random_key)
125
+ exit 0
126
+ end
127
+ if !$stdin.tty?
128
+ if OPTS[:eigen]
129
+ prev = STDIN.read
130
+ puts "input: %d" % prev.size
131
+ OPTS[:eigen].times do |i|
132
+ oak = OAK.encode(prev,oak_opts)
133
+ psize = prev.size
134
+ wsize = oak.size
135
+ ratio = 1.0 * wsize / psize
136
+ puts " iter %3d: %4d => %4d ratio %.2f" % [i,psize,wsize,ratio]
137
+ prev = oak
138
+ end
139
+ exit 0
140
+ end
141
+ unhappiness = 0
142
+ case OPTS[:mode]
143
+ when 'cat'
144
+ ARGF.each_line.map(&:strip).each do |line|
145
+ puts line
146
+ end
147
+ when 'encode-lines'
148
+ ARGF.each_line.map(&:strip).each do |line|
149
+ puts OAK.encode(line,oak_opts)
150
+ end
151
+ when 'decode-lines'
152
+ ARGF.each_line.map(&:strip).each do |line|
153
+ puts OAK.decode(line,oak_opts)
154
+ end
155
+ when 'encode-file'
156
+ puts OAK.encode(STDIN.read,oak_opts)
157
+ when 'decode-file'
158
+ STDOUT.write OAK.decode(STDIN.read.strip,oak_opts)
159
+ when 'recode-file'
160
+ puts OAK.encode(OAK.decode(STDIN.read,oak_opts),oak_opts)
161
+ when 'crazy'
162
+ #
163
+ # --mode crazy prints out a sample of OAK strings for various
164
+ # challenging cases.
165
+ #
166
+ cycle_a = ['cycle_a','TBD']
167
+ cycle_b = ['cycle_b',cycle_a]
168
+ cycle_a[1] = cycle_b
169
+ dag_c = ['dag_c']
170
+ dag_b = ['dag_b',dag_c]
171
+ dag_a = ['dag_a',dag_b,dag_c]
172
+ [
173
+ 'hello',
174
+ ['hello'] + ['hello',:hello] * 2,
175
+ {1=>'a','b'=>2,[]=>3,''=>4,{}=>5,nil=>6},
176
+ ['x','x','x','x','x','x','x','x','x','x','x','x','x'],
177
+ ['x'] * 13,
178
+ cycle_a,
179
+ dag_a,
180
+ [1,-123,0.12,-0.123,Float::NAN,-Float::INFINITY,3.14159265358979],
181
+ ].each do |obj|
182
+ oak = OAK.encode(
183
+ obj,
184
+ redundancy: :crc32,
185
+ format: :none,
186
+ compression: :none,
187
+ )
188
+ puts ""
189
+ puts "obj: #{obj}"
190
+ puts " oak: #{oak}"
191
+ begin
192
+ dec = OAK.decode(oak,oak_opts)
193
+ if dec != obj
194
+ if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
195
+ unhappiness += 1
196
+ puts " BAD: #{dec}"
197
+ end
198
+ end
199
+ rescue OAK::CantTouchThisStringError => ex
200
+ puts " BAD: #{ex.message}: #{ex.backtrace_locations[0]}"
201
+ unhappiness += 1
202
+ end
203
+ end
204
+ when 'tests'
205
+ [
206
+ [1,2,3],
207
+ {:foo=>'foo','foo'=>['x']*10},
208
+ -1,
209
+ Float::NAN,
210
+ nil,
211
+ ].each do |obj|
212
+ puts " #{obj} => ["
213
+ key_chain = OAK::KeyChain.new(
214
+ { 'l0ng3r' => OAK::Key.new('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') }
215
+ )
216
+ [
217
+ {redundancy: :none, format: :none, compression: :none },
218
+ {redundancy: :none, format: :base64,compression: :lz4, force: true},
219
+ {redundancy: :crc32,format: :base64,compression: :zlib, force: true},
220
+ {redundancy: :crc32,format: :base64,compression: :bzip2,force: true},
221
+ {redundancy: :sha1, format: :base64,compression: :lzma, force: true},
222
+ {key_chain: key_chain,force_oak_4: true,format: :none, },
223
+ {key_chain: key_chain,force_oak_4: true, },
224
+ {key_chain: key_chain,key: 'l0ng3r', },
225
+ ].each do |opts|
226
+ oak = OAK.encode(obj,opts)
227
+ puts " '#{oak}',"
228
+ dec = OAK.decode(oak,opts)
229
+ if dec != obj
230
+ if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
231
+ unhappiness += 1
232
+ end
233
+ end
234
+ end
235
+ puts " ],"
236
+ end
237
+ else
238
+ Optimist::die :mode, "bogus mode #{OPTS[:mode]}"
239
+ end
240
+ if unhappiness > 0
241
+ puts "unhappiness: #{unhappiness}"
242
+ end
243
+ exit unhappiness
244
+ end
245
+ end
data/lib/oak.rb CHANGED
@@ -1,122 +1,1085 @@
1
- require 'thor'
1
+ # coding: utf-8
2
+ #
3
+ # OAK: An encoding format with enough polymorphism to support run-time
4
+ # performance experimentation and some light encryption-at-rest.
5
+ #
6
+ # author: jhw@prosperworks.com
7
+ # incept: 2016-03-02
2
8
 
3
- class Oak < Thor
4
- include Thor::Actions
5
- attr_reader :secret_token
9
+ require_relative 'oak/version'
10
+ require 'strscan'
11
+ require 'digest'
12
+ require 'base64'
13
+ require 'lz4-ruby'
14
+ require 'zlib'
15
+ require 'bzip2/ffi'
16
+ require 'lzma'
17
+ require 'openssl'
6
18
 
7
- desc "setup oak", "Set current rails app source open ready"
8
- def setup(working_directory = '.')
9
- self.destination_root = working_directory
10
- FileUtils.chdir destination_root do
11
- check_cfg
12
- dummy_config
13
- git_prepare
14
- create_config_on_deploy
15
- commit_deploy_branch
19
+ module OAK
20
+
21
+ # CantTouchThisObjectError is thrown when encode() or serialize() is
22
+ # called on an object which cannot be encoded losslessly by OAK.
23
+ #
24
+ class CantTouchThisObjectError < ArgumentError ; end
25
+
26
+ # CantTouchThisStringError is thrown when decode(), deserialize(),
27
+ # or unwrap() called on a String which cannot be decoded.
28
+ #
29
+ class CantTouchThisStringError < ArgumentError ; end
30
+
31
+ # Internal syntactic conveniences.
32
+ #
33
+ BAD_OBJ = CantTouchThisObjectError
34
+ BAD_STR = CantTouchThisStringError
35
+
36
+ # OAK_4 supports one and only one encryption algorithm and mode of
37
+ # operation.
38
+ #
39
+ # - AES-256-GCM
40
+ # - 128 bits of security
41
+ # - 256-bit keys (32 bytes)
42
+ # - 96-bit IVs (12 bytes)
43
+ # - 128-bit auth_tags (16 bytes)
44
+ # - Random IV ("Initialization Vector") for each encryption op
45
+ # - All headers authenticated.
46
+ # - Headers encrypted when not required for decryption.
47
+ #
48
+ ENCRYPTION_ALGO_NAME = 'aes-256-gcm'.freeze
49
+ ENCRYPTION_ALGO_IV_BYTES = 12 # AES-256-GCM has 96-bit IVs
50
+ ENCRYPTION_ALGO_AUTH_TAG_BYTES = 16 # AES-256-GCM has 128-bit auth, we use all
51
+
52
+ # Get a new instance of OpenSSL::Cipher for our algorithm.
53
+ #
54
+ def self.encryption_algo
55
+ OpenSSL::Cipher.new(ENCRYPTION_ALGO_NAME)
56
+ end
57
+
58
+ # Generate a new random key appropriate for the OAK_4 encryption
59
+ # algorithm.
60
+ #
61
+ def self.random_key
62
+ encryption_algo.random_key
63
+ end
64
+
65
+ # Generate a new random initialization vector appropriate for the
66
+ # OAK_4 encryption algorithm.
67
+ #
68
+ def self.random_iv
69
+ encryption_algo.random_iv
70
+ end
71
+
72
+ class Key
73
+
74
+ # @param key String encryption key suitable for AES-256,
75
+ # specifically a binary string of 32 bytes (256 bits),
76
+ # randomly-generated and kept very, very secret.
77
+ #
78
+ def initialize(key)
79
+ if !key.is_a?(String)
80
+ raise ArgumentError, "bad non-String key: ELIDED"
81
+ end
82
+ rk_size = OAK.random_key.size
83
+ if key.size != rk_size
84
+ raise ArgumentError, "bad key ELIDED, length not #{rk_size}"
85
+ end
86
+ @key = key.dup.freeze # happy :)
87
+ end
88
+
89
+ attr_reader :key
90
+
91
+ def inspect
92
+ #
93
+ # Avoid exposing the key in casual logs or console session.
94
+ #
95
+ to_s[0..-2] + " @key=ELIDED>"
16
96
  end
97
+
17
98
  end
18
99
 
19
- no_tasks do
100
+ class KeyChain
20
101
 
21
- def check_cfg
22
- ['config/application.rb', '.gitignore'].each do |f|
23
- if !File.exists? f
24
- raise "#{f} not found, are we at the root directory of a rails app?"
102
+ def initialize(keys)
103
+ if !keys.is_a?(Hash)
104
+ raise ArgumentError, "bogus keys #{keys}"
105
+ end
106
+ keys.each do |k,v|
107
+ if !k.is_a?(String)
108
+ raise ArgumentError, "bogus key #{k} in keys #{keys}"
109
+ end
110
+ if /^[a-zA-Z][0-9a-zA-Z]*$/ !~ k
111
+ #
112
+ # In oak_4, we restrict key names to sequences which look
113
+ # like code identifiers: alphanumeric strings which start
114
+ # with a letter.
115
+ #
116
+ # This keeps the encoding simple but compact.
117
+ #
118
+ raise ArgumentError, "bad key #{k} in keys #{keys}"
119
+ end
120
+ if !v.is_a?(Key)
121
+ raise ArgumentError, "bogus val #{v} at #{k} in keys #{keys}"
25
122
  end
26
123
  end
124
+ #
125
+ # We are a happy KeyChain object now!
126
+ #
127
+ @keys = keys.dup.freeze
128
+ end
129
+
130
+ attr_reader :keys
131
+
132
+ end
133
+
134
+ # Parses a KeyChain object and keys from an ENV-like object.
135
+ #
136
+ # E.g. if the ENV contains:
137
+ #
138
+ # FOO_KEYS=a,b
139
+ # FOO_KEY_a=#{OAK.encode(<binary key>)}
140
+ # FOO_KEY_b=#{OAK.encode(<binary key>)}
141
+ #
142
+ # ...then the call OAK.parse_key_chain(ENV,'FOO') will return a new
143
+ # OAK::KeyChain with two OAK::Keys, 'a' and 'b'.
144
+ #
145
+ # This self-referential (but not recursive!) use of OAK to encode
146
+ # the key and iv is to avoid the problems with binary strings in ENV
147
+ # variables, 'heroku config:set' command line arguments, etc.
148
+ #
149
+ # @param env ENV or an ENV-like Hash from String to String.
150
+ #
151
+ # @param name String the root token
152
+ #
153
+ # @returns a new OAK::KeyChain
154
+ #
155
+ def self.parse_env_chain(env,name)
156
+ key_names = (env["#{name}_KEYS"] || '').gsub(/^[, ]*/,'').split(/[ ,]+/)
157
+ keys = key_names.map do |key_name|
158
+ key = OAK.decode(env["#{name}_KEY_#{key_name}"] || '')
159
+ [ key_name, Key.new(key) ]
160
+ end.to_h
161
+ KeyChain.new(keys)
162
+ end
163
+
164
+ ##########################################################################
165
+ #
166
+ # encode() and decode() are the top layer
167
+ #
168
+ # They coordinate the structure layer and the byte layer.
169
+ #
170
+ # These are the recommended entry points for most callers.
171
+ #
172
+ ##########################################################################
173
+
174
+ # Encodes suitable objects string into OAK strings.
175
+ #
176
+ # Is inverted by decode(). For all obj, if encode(obj) does not
177
+ # raise an exception, decode(encode(obj)) == obj.
178
+ #
179
+ # @param obj to encode
180
+ #
181
+ # @param redundancy 'none', 'crc32' (default), or 'sha1'
182
+ #
183
+ # @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', 'lzma'
184
+ #
185
+ # @param force false (default), or true. When true, always
186
+ # compress. When false, fall back to the
187
+ # original if the compressed form is larger.
188
+ #
189
+ # @param key_chain OAK::KeyChain from which to draw the encryption
190
+ # key, or nil for none.
191
+ #
192
+ # @param key String name of a key in key_chain to be used
193
+ # for encryption, or nil if none.
194
+ #
195
+ # @param format 'none', 'base64' (default)
196
+ #
197
+ # @param force_oak_4 Bool, for debugging, force oak_4 encoding even
198
+ # if no encryption key is specified.
199
+ #
200
+ # @param debug_iv String, force encryption with a known IV, TEST ONLY!
201
+ #
202
+ # WARNING: Use of debug_iv jeopardizes the security of all messages
203
+ # *ever* encrypted with that key! Never use debug_iv in production!
204
+ #
205
+ # @raises ArgumentError if obj is not handled.
206
+ #
207
+ def self.encode(obj,opts={})
208
+ ser = _serialize(obj)
209
+ _wrap(ser,opts)
210
+ end
27
211
 
28
- # make 'config/database.yml' globally ignored
29
- global_ignore_file = File.expand_path('~/.gitignore')
30
- if File.exist? global_ignore_file
31
- ignored = File.binread global_ignore_file
32
- if !ignored.include?('config/database.yml')
33
- append_to_file(global_ignore_file, 'config/database.yml')
212
+ # Decodes suitable OAK strings into objects.
213
+ #
214
+ # Inverts encode().
215
+ #
216
+ # @param str String to decode
217
+ #
218
+ # @param key_chain OAK::KeyChain in which to look for keys to
219
+ # decrypt encrypted OAK strings, or nil for none.
220
+ #
221
+ # @returns obj String to decode
222
+ #
223
+ # @raises ArgumentError if str is not a recognized string.
224
+ #
225
+ def self.decode(str,opts={})
226
+ if !str.is_a?(String)
227
+ raise ArgumentError, "str not a String"
228
+ end
229
+ ser = _unwrap(str,opts)
230
+ _deserialize(ser)
231
+ end
232
+
233
+ ##########################################################################
234
+ #
235
+ # serialize() and deserialize() are the structure layer
236
+ #
237
+ # They are responsible for interconverting between objects and naive
238
+ # strings.
239
+ #
240
+ # This layer is analagous to TAR for files or JSON: it converts
241
+ # structure into string and vice-versa.
242
+ #
243
+ ##########################################################################
244
+
245
+ # Serializes suitable objects string into naive strings.
246
+ #
247
+ # Is inverted by deserialize(). For all obj, if serialize(obj) does
248
+ # not raise an exception, deserialize(serialize(obj)) == obj.
249
+ #
250
+ # @raises CantTouchThisObjectError if obj contains any types or
251
+ # structure which cannot be encoded reversibly by OAK.
252
+ #
253
+ def self._serialize(obj)
254
+ seen,_reseen = _safety_dance(obj) do |child|
255
+ next if ALL_TYPES.select{ |type| child.is_a?(type) }.size > 0
256
+ raise CantTouchThisObjectError, "#{child.class} not supported: #{child}"
257
+ end
258
+ strt = Hash.new # string table, str => id for strings already encoded
259
+ ser = 'F'
260
+ ser << seen.size.to_s
261
+ seen.each_with_index do |(_object_id,(_idx2,child)),_idx|
262
+ #
263
+ # First, identify the unique apex type in TYPE_2_CODE.keys
264
+ # which matches the child.
265
+ #
266
+ # child.class may not be listed explicitly, such as for Fixnum
267
+ # and Bigint both being Integer, so we search and assert
268
+ # uniqueness and existence.
269
+ #
270
+ is_as = ALL_TYPES.select{ |type| child.is_a?(type) }
271
+ raise CantTouchThisObjectError if 1 != is_as.size
272
+ type = is_as[0]
273
+ typecode = TYPE_2_CODE[type]
274
+ if nil == child || true == child || false == child
275
+ #
276
+ # The type code by itself is sufficient to decode NilType,
277
+ # TrueType, and FalseType. We need use other space for them.
278
+ #
279
+ ser << typecode
280
+ next
281
+ end
282
+ if child.is_a?(Symbol) || child.is_a?(String)
283
+ #
284
+ # Strings and Symbols encode as their size in chars followed
285
+ # by their bytes.
286
+ #
287
+ # We maintain a running string table, strt, to recognize when
288
+ # we encounter a string representation which has been
289
+ # previously encoded.
290
+ #
291
+ # If we find such a duplicate, we encode the current string
292
+ # via a back reference to the first one we saw. This is
293
+ # indicated by downcasing the typecode.
294
+ #
295
+ str = child.to_s
296
+ enc = str.encoding
297
+ enc_code = nil
298
+ case enc
299
+ when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::ASCII
300
+ enc_code = 'A'
301
+ when Encoding::UTF_8
302
+ enc_code = 'U'
303
+ else
304
+ raise CantTouchThisObjectError, "unknown string encoding #{enc}"
34
305
  end
35
- else
36
- File.open(global_ignore_file, 'w') do |f|
37
- f.write 'config/database.yml'
306
+ if strt.has_key?(str)
307
+ ser << typecode.downcase # downcase indicates strt reference
308
+ ser << enc_code
309
+ ser << strt[str].to_s
310
+ else
311
+ ser << typecode # upcase indicates full representation
312
+ ser << enc_code
313
+ ser << str.bytesize.to_s
314
+ if str.bytesize > 0
315
+ ser << '_'
316
+ ser << str
317
+ end
318
+ strt[str] = strt.size
38
319
  end
320
+ next
39
321
  end
40
- `git config --global core.excludesfile ~/.gitignore`
322
+ if child.is_a?(Numeric)
323
+ #
324
+ # Numerics primitives encode as their Ruby to_s which
325
+ # matches their JSON.dump().
326
+ #
327
+ ser << typecode
328
+ ser << child.to_s
329
+ next
330
+ end
331
+ if child.is_a?(Array)
332
+ #
333
+ # An array is encoded as a size N followed by N indexes into
334
+ # the seen list.
335
+ #
336
+ ser << typecode
337
+ ser << child.size.to_s
338
+ child.each do |a|
339
+ ser << '_'
340
+ ser << seen[a.object_id][0].to_s
341
+ end
342
+ next
343
+ end
344
+ if child.is_a?(Hash)
345
+ #
346
+ # An array is encoded as a size N followed by 2*N indexes
347
+ # into the seen list, organized pairwise key+value.
348
+ #
349
+ ser << typecode
350
+ ser << child.size.to_s
351
+ child.each do |k,v|
352
+ ser << '_'
353
+ ser << seen[k.object_id][0].to_s
354
+ ser << '_'
355
+ ser << seen[v.object_id][0].to_s
356
+ end
357
+ next
358
+ end
359
+ raise CantTouchThisObjectError, "not handled: #{child.class} #{child}"
360
+ end
361
+ ser
362
+ end
41
363
 
42
- # append config/config.yml to .gitignore if not already in
43
- File.open('.gitignore') do |f|
44
- f.each_line do |l|
45
- if l == 'config/config.yml'
46
- return
364
+ # Deserializes suitable naive strings into objects.
365
+ #
366
+ # Inverts serialize().
367
+ #
368
+ # @raises CantTouchThisObjectError if str is not recognized
369
+ #
370
+ def self._deserialize(str)
371
+ scanner = StringScanner.new(str)
372
+ serial_code = scanner.scan(/F/)
373
+ if 'F' != serial_code
374
+ raise CantTouchThisStringError, "bogus serial_code #{serial_code}"
375
+ end
376
+ num_objs = scanner.scan(/[0-9]+/)
377
+ if !num_objs
378
+ raise CantTouchThisStringError, "missing object list size"
379
+ end
380
+ num_objs = num_objs.to_i
381
+ strt = Hash.new # string table, id => str for strings already decoded
382
+ seen = []
383
+ #
384
+ # We parse the stream, constructing all the objects we see in to
385
+ # a seen list.
386
+ #
387
+ # In this first pass, Arrays and Hashes are created whose
388
+ # elements, keys, and values are temporarily integers. These all
389
+ # refer to slots in the seen list, and many of them will be
390
+ # forward references to objects which we have yet to decode.
391
+ # Later we will rectify the object graph by replacing these
392
+ # integers with their refrants from the seen list.
393
+ #
394
+ num_objs.times.each do |idx_obj|
395
+ code = scanner.scan(/[a-zA-Z]/)
396
+ case code
397
+ when 'n'
398
+ seen[idx_obj] = nil
399
+ when 'f'
400
+ seen[idx_obj] = false
401
+ when 't'
402
+ seen[idx_obj] = true
403
+ when 'S', 'Y', 's', 'y'
404
+ enc_code = scanner.scan(/[AU]/)
405
+ enc = nil
406
+ case enc_code
407
+ when 'A'
408
+ enc = Encoding::ASCII_8BIT
409
+ when 'U'
410
+ enc = Encoding::UTF_8
411
+ else
412
+ raise CantTouchThisStringError, "unknown enc_code #{enc_code}"
413
+ end
414
+ num = scanner.scan(/[0-9]+/)
415
+ if !num
416
+ raise CantTouchThisStringError, "missing num"
417
+ end
418
+ num = num.to_i
419
+ case code
420
+ when 'S', 'Y'
421
+ if num > 0
422
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
423
+ seen[idx_obj] = scanner.peek(num)
424
+ scanner.pos += num # skip body
425
+ else
426
+ seen[idx_obj] = ''
47
427
  end
428
+ strt[strt.size] = seen[idx_obj]
429
+ when 's', 'y'
430
+ seen[idx_obj] = strt[num]
431
+ end
432
+ seen[idx_obj] = seen[idx_obj].dup.force_encoding(enc)
433
+ case code
434
+ when 'Y', 'y'
435
+ seen[idx_obj] = seen[idx_obj].intern
436
+ end
437
+ when 'I'
438
+ pattern = /-?[0-9]+/
439
+ seen[idx_obj] = scanner.scan(pattern).to_i
440
+ when 'F'
441
+ pattern = /-?(Infinity|NaN|[0-9]+(\.[0-9]*)?(e([+-][0-9]*)?)?)/
442
+ match = scanner.scan(pattern)
443
+ case match
444
+ when 'Infinity' then seen[idx_obj] = Float::INFINITY
445
+ when '-Infinity' then seen[idx_obj] = -Float::INFINITY
446
+ when 'NaN' then seen[idx_obj] = Float::NAN
447
+ else seen[idx_obj] = match.to_f
448
+ end
449
+ when 'A'
450
+ num_items = scanner.scan(/[0-9]+/).to_i
451
+ arr = []
452
+ num_items.times.each do |idx|
453
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
454
+ val = scanner.scan(/[0-9]+/).to_i # temp obj
455
+ arr[idx] = val
456
+ end
457
+ seen[idx_obj] = arr
458
+ when 'H'
459
+ num_items = scanner.scan(/[0-9]+/).to_i
460
+ hash = Hash.new
461
+ num_items.times.each do
462
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
463
+ k = scanner.scan(/[0-9]+/).to_i # temp obj
464
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
465
+ v = scanner.scan(/[0-9]+/).to_i # temp obj
466
+ hash[k] = v
48
467
  end
468
+ seen[idx_obj] = hash
469
+ else
470
+ raise BAD_STR, "not handled: #{code} #{scanner.pos} #{scanner.rest}"
49
471
  end
472
+ end
473
+ #
474
+ # If we parsed correctly, there will be no unconsumed in the
475
+ # scanner.
476
+ #
477
+ if !scanner.eos?
478
+ raise BAD_STR, "not at end-of-string: #{scanner.pos} #{scanner.rest}"
479
+ end
480
+ #
481
+ # We rectify the references for each intermediate Array and Hash
482
+ # as promised earlier.
483
+ #
484
+ # Note that this code must be inherently mutation-oriented since
485
+ # it might have to construct cyclic graphs.
486
+ #
487
+ rectified = seen.map do |elem|
488
+ if elem.is_a?(Array)
489
+ next Array.new
490
+ elsif elem.is_a?(Hash)
491
+ next Hash.new
492
+ else
493
+ elem
494
+ end
495
+ end
496
+ rectified.each_with_index do |elem,idx|
497
+ if elem.is_a?(Array)
498
+ seen[idx].each_with_index do |a,i|
499
+ elem[i] = rectified[a]
500
+ end
501
+ elsif elem.is_a?(Hash)
502
+ seen[idx].each do |k,v|
503
+ elem[rectified[k]] = rectified[v]
504
+ end
505
+ end
506
+ end
507
+ #
508
+ # By the way _safety_dance performed its walk in _serialize(), the
509
+ # object we are decoding is the first object encoded in str.
510
+ #
511
+ # Thus, we return the first element of the rectified list.
512
+ #
513
+ rectified.first
514
+ end
50
515
 
51
- append_to_file '.gitignore', 'config/config.yml'
516
+ ##########################################################################
517
+ #
518
+ # wrap() and unwrap() are the byte layer
519
+ #
520
+ # They are responsible for interconverting between naive strings and
521
+ # strings which are ready to go out on the wire into external
522
+ # storage.
523
+ #
524
+ # This layer is analagous to GZIP: it converts strings into a
525
+ # different representation which is smaller, more resistant to
526
+ # corruption, and/or more recognizable.
527
+ #
528
+ ##########################################################################
52
529
 
53
- # protect secret_token
54
- full_text = File.binread 'config/initializers/secret_token.rb'
55
- full_text.gsub! /(Application\.config\.secret_token\s=\s)'(.*)'/, '\1APP_CONFIG[\'secret_token\']'
56
- # save per app secret_token for later use
57
- @secret_token = "#{$2}"
58
- File.open('config/initializers/secret_token.rb', 'w') do |f|
59
- f.write full_text
60
- end
530
+ # Wraps any string into a OAK string.
531
+ #
532
+ # Is inverted by unwrap(). For all str, unwrap(wrap(str)) == str.
533
+ #
534
+ # @param str naive string to be wrapped as an OAK string
535
+ #
536
+ # @param redundancy 'none', 'crc32' (default), or 'sha1'
537
+ #
538
+ # @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', or 'lzma'
539
+ #
540
+ # @param force false (default), or true. When true, always
541
+ # compress. When false, fall back to the
542
+ # original if the compressed form is larger.
543
+ #
544
+ # @param key_chain OAK::KeyChain from which to draw the encryption
545
+ # key, or nil for none.
546
+ #
547
+ # @param key String name of a key in key_chain to be used
548
+ # for encryption, or nil if none.
549
+ #
550
+ # @param force_oak_4 Bool, for debugging, force oak_4 encoding even
551
+ # if no encryption key is specified.
552
+ #
553
+ # @param format 'none', 'base64' (default)
554
+ #
555
+ # @returns an OAK string
556
+ #
557
+ def self._wrap(str,opts={})
558
+ redundancy = (opts[:redundancy] || :crc32).to_s
559
+ compression = (opts[:compression] || :none).to_s
560
+ force = (opts[:force] || false)
561
+ format = (opts[:format] || :base64).to_s
562
+ key_chain = opts[:key_chain]
563
+ key = opts[:key]
564
+ debug_iv = opts[:debug_iv]
565
+ if key_chain && !key_chain.is_a?(KeyChain)
566
+ raise ArgumentError, "bad key_chain #{key_chain}"
567
+ end
568
+ if debug_iv && !debug_iv.is_a?(String)
569
+ raise ArgumentError, "bad debug_iv #{debug_iv}"
570
+ end
571
+ if debug_iv && ENCRYPTION_ALGO_IV_BYTES != debug_iv.size
572
+ raise ArgumentError, "bad debug_iv #{debug_iv}"
573
+ end
574
+ if key && !key_chain
575
+ raise ArgumentError, "key #{key} without key_chain"
61
576
  end
577
+ if key && !key_chain.keys[key]
578
+ keys = key_chain.keys
579
+ raise ArgumentError, "key not found in #{keys}: #{key}"
580
+ end
581
+ encryption_key = key ? key_chain.keys[key] : nil
582
+ str = str.b # dupe to Encoding::ASCII_8BIT
583
+ if encryption_key || opts[:force_oak_4]
584
+ _wrap_oak_4(
585
+ str,
586
+ redundancy,
587
+ compression,
588
+ force,
589
+ format,
590
+ key,
591
+ encryption_key,
592
+ debug_iv
593
+ )
594
+ else
595
+ _wrap_oak_3(
596
+ str,
597
+ redundancy,
598
+ compression,
599
+ force,
600
+ format
601
+ )
602
+ end
603
+ end
62
604
 
63
- def dummy_config
64
- File.open('config/config.example.yml', 'w') do |f|
65
- f.write "secret_token: 'c1cae0f52a3ef8efa369a127c63bd6ede539a4089fd952b33199100a6769c8455ab4969f2eefaf1ebcbe0208bd57531204c77f41f715207f961e7e45f139f4e7'"
66
- end
67
- prepend_to_file 'config/application.rb', "require 'yaml'\nAPP_CONFIG = YAML.load(File.read(File.expand_path('../config.yml', __FILE__)))\n"
605
+ def self._wrap_oak_3(
606
+ str,
607
+ redundancy,
608
+ compression,
609
+ force,
610
+ format
611
+ )
612
+ source_redundancy = _check(redundancy,str)
613
+ compressed, compression = _compress(compression,force,str)
614
+ formatted = _format(format,compressed)
615
+ output = 'oak_3' # format id+ver
616
+ output << REDUNDANCY_2_CODE[redundancy] # redundancy
617
+ output << COMPRESSION_2_CODE[compression] # compression
618
+ output << FORMAT_2_CODE[format] # format
619
+ output << '_'
620
+ output << source_redundancy # source check
621
+ output << '_'
622
+ output << '%d' % formatted.size # formatted size
623
+ output << '_'
624
+ output << formatted # payload
625
+ output << '_'
626
+ output << 'ok' # terminator
627
+ output.force_encoding(Encoding::ASCII_8BIT)
628
+ end
68
629
 
69
- # simply copy database.yml to database.example.yml
70
- File.open('config/database.example.yml', 'w') do |f|
71
- File.open('config/database.yml', 'r') do |o|
72
- f.write o.read
73
- end
74
- end
630
+ def self._wrap_oak_4(
631
+ str,
632
+ redundancy,
633
+ compression,
634
+ force,
635
+ format,
636
+ key,
637
+ encryption_key,
638
+ debug_iv
639
+ )
640
+ header = 'oak_4' # format id+ver
641
+ if key
642
+ header << key # key name
75
643
  end
644
+ header << '_'
645
+ header << FORMAT_2_CODE[format] # format
646
+ compressed, compression = _compress(compression,force,str)
647
+ plaintext = ''
648
+ plaintext << REDUNDANCY_2_CODE[redundancy] # redundancy
649
+ plaintext << COMPRESSION_2_CODE[compression] # compression
650
+ plaintext << _check(redundancy,str) # source check
651
+ plaintext << '_'
652
+ plaintext << compressed
653
+ encrypted = _encrypt(
654
+ encryption_key,
655
+ plaintext,
656
+ header,
657
+ debug_iv
658
+ )
659
+ formatted = _format(format,encrypted)
660
+ output = header
661
+ output << '%d' % formatted.size # formatted size
662
+ output << '_'
663
+ output << formatted # payload
664
+ output << '_'
665
+ output << 'ok' # terminator
666
+ output.force_encoding(Encoding::ASCII_8BIT)
667
+ end
76
668
 
77
- def git_prepare
78
- if File.exists? '.git'
79
- puts 'It seems a git repository has already created, I\'ll leave it untouched.'
80
- return
81
- end
669
+ # Unwraps any OAK string into a string.
670
+ #
671
+ # Inverts wrap(). For all str, unwrap(wrap(str)) == str.
672
+ #
673
+ # @param str OAK string to be unwrapped
674
+ #
675
+ # @param key_chain OAK::KeyChain in which to look for keys to
676
+ # decrypt encrypted OAK strings, or nil for none.
677
+ #
678
+ # @returns a string
679
+ #
680
+ # @raises ArgumentError if str is not in OAK format.
681
+ #
682
+ def self._unwrap(str,opts={})
683
+ str = str.b # str.b for dup to ASCII_8BIT
684
+ sc = StringScanner.new(str)
685
+ ov = sc.scan(/oak_[34]/) or raise BAD_STR, "bad oak+ver"
686
+ if 'oak_4' == ov
687
+ _unwrap_oak_4(sc,opts) # encryption opts possible for decoding OAK_4 :(
688
+ else
689
+ _unwrap_oak_3(sc) # no opts for decoding OAK_3 :)
690
+ end
691
+ end
82
692
 
83
- `git init && git add . && git commit -m "init"`
84
- `git checkout -b deploy`
693
+ def self._unwrap_oak_3(sc)
694
+ r = sc.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
695
+ c = sc.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
696
+ f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
697
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
698
+ scheck = sc.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
699
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
700
+ fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
701
+ fsize = fsize.to_i
702
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
703
+ formatted = sc.peek(fsize)
704
+ begin
705
+ sc.pos += fsize
706
+ rescue RangeError => ex
707
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
85
708
  end
709
+ _ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok: #{formatted}"
710
+ redundancy = CODE_2_REDUNDANCY[r] || r
711
+ compression = CODE_2_COMPRESSION[c] || c
712
+ format = CODE_2_FORMAT[f] || f
713
+ fsize_re = formatted.size
714
+ if fsize.to_i != fsize_re
715
+ raise CantTouchThisStringError, "fsize #{fsize} vs #{fsize_re}"
716
+ end
717
+ compressed = _deformat(format,formatted)
718
+ original = _decompress(compression,compressed)
719
+ scheck_re = _check(redundancy,original)
720
+ if scheck != scheck_re
721
+ raise CantTouchThisStringError, "scheck #{scheck} vs #{scheck_re}"
722
+ end
723
+ original
724
+ end
86
725
 
87
- def create_config_on_deploy
88
- File.open('config/config.yml', 'w') do |f|
89
- f.write 'secret_token: \'' + secret_token + '\''
726
+ def self._unwrap_oak_4(sc,opts={})
727
+ key = sc.scan(/[^_]+/) # nil OK, indicates no compression
728
+ encryption_key = nil
729
+ if key
730
+ key_chain = opts[:key_chain]
731
+ if !key_chain
732
+ raise CantTouchThisStringError, "key #{key} but no key_chain"
90
733
  end
91
-
92
- # remove 'config/config.yml' from .gitignore on deploy branch
93
- ignored = File.binread('.gitignore')
94
- ignored.gsub! /config\/config.yml/, ''
95
- File.open('.gitignore', 'w') do |f|
96
- f.write ignored
734
+ encryption_key = opts[:key_chain].keys[key]
735
+ if !encryption_key
736
+ keys = key_chain.keys
737
+ raise CantTouchThisStringError, "key not found in #{keys}: #{key}"
97
738
  end
739
+ end
740
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
741
+ f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
742
+ header = sc.string[0..(sc.pos-1)] # for authentication by _decrypt
743
+ format = CODE_2_FORMAT[f]
744
+ fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
745
+ fsize = fsize.to_i
746
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
747
+ formatted = sc.peek(fsize)
748
+ begin
749
+ sc.pos += fsize
750
+ rescue RangeError => ex
751
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
752
+ end
753
+ _ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok"
754
+ encrypted = _deformat(format,formatted)
755
+ plaintext = _decrypt(encryption_key,encrypted,header)
756
+ sp = StringScanner.new(plaintext)
757
+ r = sp.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
758
+ c = sp.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
759
+ scheck = sp.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
760
+ _ = sp.scan(/_/) or raise BAD_STR, "missing _"
761
+ compressed = sp.rest
762
+ redundancy = CODE_2_REDUNDANCY[r] || r
763
+ compression = CODE_2_COMPRESSION[c] || c
764
+ original = _decompress(compression,compressed)
765
+ scheck_re = _check(redundancy,original)
766
+ if scheck != scheck_re
767
+ raise(
768
+ CantTouchThisStringError,
769
+ "scheck #{scheck} vs #{scheck_re} in #{sc.string}"
770
+ )
771
+ end
772
+ original
773
+ end
98
774
 
99
- # add checkout hook for switching from 'deploy' to 'master'
100
- File.open('.git/hooks/post-checkout', 'w') do |f|
101
- f.write <<-EOS
102
- #!/bin/bash
775
+ # How we encode object type.
776
+ #
777
+ TYPE_2_CODE ||= {
778
+ Hash => 'H',
779
+ Array => 'A',
780
+ String => 'S', # downcased to 's' for string table lookup
781
+ Symbol => 'Y', # downcased to 'y' for string table lookup
782
+ Integer => 'I',
783
+ Float => 'F',
784
+ NilClass => 'n',
785
+ TrueClass => 't',
786
+ FalseClass => 'f',
787
+ }.freeze
788
+ ALL_TYPES ||= TYPE_2_CODE.keys.freeze
103
789
 
104
- branch_name=$(git symbolic-ref -q HEAD)
105
- branch_name=${branch_name##refs/heads/}
790
+ # How we encode :format and :compression in the OAK strings.
791
+ #
792
+ FORMAT_2_CODE ||= {
793
+ 'none' => 'N',
794
+ 'base64' => 'B', # urlsafe form with padding and whitespace stripped
795
+ }.freeze
796
+ CODE_2_FORMAT ||= FORMAT_2_CODE.invert.freeze
106
797
 
107
- if [ "$branch_name" = master -a -e "config/config.example.yml" ]; then
108
- cp config/config.example.yml config/config.yml
109
- echo "cp config/config.example.yml config/config.yml"
110
- fi
111
- EOS
798
+ # How we encode :compression in the OAK strings.
799
+ #
800
+ # Early on, I captures some metrics using the catenation of all our
801
+ # Ruby code as a test file.
802
+ #
803
+ # I measured:
804
+ #
805
+ # SOURCE 5707334
806
+ # none 5707370 compression 0.17s decompression 0.16s
807
+ # lzo 1804765 compression 0.18s decompression 0.16s
808
+ # lzf 1807971 compression 0.16s decompression 0.17s
809
+ # lz4 1813574 compression 0.17s decompression 0.14s
810
+ # zlib 1071216 compression 0.53s decompression 0.19s
811
+ # bzip2 868595 compression 0.62s decompression 0.33s
812
+ # lzma 760594 compression 6.22s decompression 0.20s
813
+ #
814
+ # From this, I conclude that only one of lzo,lzf,lz4 is interesting.
815
+ # They all yield approximately the same compression, and their
816
+ # compression times are indistinguishable from the rest of the
817
+ # streaming and encoding times imposed by OAK.
818
+ #
819
+ # I'm settling on supporting only lz4 because it seems to be better
820
+ # supported as a polymorphic lib - it's closer to a defacto standard
821
+ # for the LZ77 family.
822
+ #
823
+ # zlib, bzip2, and lzma each represent interesting distinct choices
824
+ # - I'm keeping support for all three.
825
+ #
826
+ COMPRESSION_2_CODE ||= {
827
+ 'none' => 'N',
828
+ 'lz4' => '4',
829
+ 'zlib' => 'Z',
830
+ 'bzip2' => 'B',
831
+ 'lzma' => 'M',
832
+ }.freeze
833
+ CODE_2_COMPRESSION ||= COMPRESSION_2_CODE.invert.freeze
834
+
835
+ # How we encode :redundancy in the OAK strings.
836
+ #
837
+ REDUNDANCY_2_CODE ||= {
838
+ 'none' => 'N',
839
+ 'crc32' => 'C',
840
+ 'sha1' => 'S',
841
+ }.freeze
842
+ CODE_2_REDUNDANCY ||= REDUNDANCY_2_CODE.invert.freeze
843
+
844
+ # Helper method, calculates redundancy check for str.
845
+ #
846
+ def self._check(redundancy,str)
847
+ case redundancy.to_s
848
+ when 'none' then return '0'
849
+ when 'crc32' then return '%d' % Zlib.crc32(str)
850
+ when 'sha1' then return Digest::SHA1.hexdigest(str)
851
+ else
852
+ raise ArgumentError, "unknown redundancy #{redundancy}"
853
+ end
854
+ end
855
+
856
+ # Helper method, calculates formatted version of str.
857
+ #
858
+ def self._format(format,str)
859
+ case format.to_s
860
+ when 'none'
861
+ return str
862
+ when 'base64'
863
+ #
864
+ # We actual using "Base 64 Encoding with URL and Filename Safe
865
+ # Alphabet" aka base64url with the option not to use padding,
866
+ # per https://tools.ietf.org/html/rfc4648#section-5.
867
+ #
868
+ # If we were using Ruby 2.3+, we could use the option "padding:
869
+ # false" instead of chopping out the /=*$/ with gsub.
870
+ #
871
+ return Base64.urlsafe_encode64(str).gsub(/=.*$/,'')
872
+ else
873
+ raise ArgumentError, "unknown format #{format}"
874
+ end
875
+ end
876
+
877
+ def self._deformat(format,str)
878
+ case format.to_s
879
+ when 'none'
880
+ return str
881
+ when 'base64'
882
+ #
883
+ # Regrettably, Base64.urlsafe_decode64(str) does not reverse
884
+ # Base64.urlsafe_encode64(str).gsub(/=.*$/,''), it raises an
885
+ # ArgumentError "invalid base64".
886
+ #
887
+ # Fortunately, simple Base64.decode64() is liberal in what it
888
+ # accepts, and handles the output of all of encode64,
889
+ # strict_encode64, and urlsafe_encode64 both with and without
890
+ # the /=*$/.
891
+ #
892
+ return Base64.decode64(str.tr('-_','+/'))
893
+ else
894
+ raise ArgumentError, "unknown format #{format}"
895
+ end
896
+ end
897
+
898
+ # Helper for wrap() and unwrap(), multiplexes encryption.
899
+ #
900
+ def self._encrypt(encryption_key,data,auth_data,debug_iv)
901
+ return data if !encryption_key
902
+ #
903
+ # WARNING: In at least some versions of OpenSSL::Cipher, setting
904
+ # iv before key would cause the iv to be ignored in aes-*-gcm
905
+ # ciphers!
906
+ #
907
+ # https://github.com/attr-encrypted/encryptor/pull/22
908
+ # https://github.com/attr-encrypted/encryptor/blob/master/README.md
909
+ #
910
+ # The issue was reported against version "1.0.1f 6 Jan 2014". I
911
+ # have yet to figure out whether our current version, 1.1.0, is
912
+ # affected, or when/how the fix will go live.
913
+ #
914
+ # OAK_4 only supports AES-256-GCB. Although the implementation
915
+ # bug has been fixed and OAK will almost certainly not be used
916
+ # with a buggy version of OpenSSL, nevertheless we take great
917
+ # care to set cipher.key *then* cipher.iv.
918
+ #
919
+ # Still, can't be to careful.
920
+ #
921
+ iv_size = ENCRYPTION_ALGO_IV_BYTES
922
+ auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
923
+ if debug_iv && iv_size != debug_iv.size
924
+ raise "unexpected debug_iv.size #{debug_iv.size} not #{iv_size}"
925
+ end
926
+ cipher = encryption_algo.encrypt
927
+ cipher.key = encryption_key.key
928
+ iv = debug_iv || cipher.random_iv
929
+ cipher.iv = iv
930
+ cipher.auth_data = auth_data
931
+ ciphertext = cipher.update(data) + cipher.final
932
+ auth_tag = cipher.auth_tag
933
+ if iv_size != iv.size
934
+ raise "unexpected iv.size #{iv.size} not #{iv_size}"
935
+ end
936
+ if auth_tag_size != auth_tag.size
937
+ raise "unexpected auth_tag.size #{auth_tag.size} not #{auth_tag_size}"
938
+ end
939
+ #
940
+ # Since iv and auth_tag have fixed widths, they are trivial to
941
+ # parse without putting any effort or space into recording their
942
+ # sizes in the message body.
943
+ #
944
+ iv + auth_tag + ciphertext
945
+ end
946
+
947
+ # Helper for wrap() and unwrap(), multiplexes decryption.
948
+ #
949
+ def self._decrypt(encryption_key,data,auth_data)
950
+ return data if !encryption_key
951
+ iv_size = ENCRYPTION_ALGO_IV_BYTES
952
+ auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
953
+ iv = data[0..(iv_size-1)]
954
+ auth_tag = data[iv_size..(auth_tag_size+iv_size-1)]
955
+ ciphertext = data[(auth_tag_size+iv_size)..-1]
956
+ cipher = encryption_algo.decrypt
957
+ cipher.key = encryption_key.key
958
+ begin
959
+ cipher.iv = iv
960
+ cipher.auth_tag = auth_tag
961
+ cipher.auth_data = auth_data
962
+ cipher.update(ciphertext) + cipher.final
963
+ rescue OpenSSL::Cipher::CipherError => ex
964
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
965
+ end
966
+ end
967
+
968
+ # Helper for wrap() and unwrap(), multiplexes compression.
969
+ #
970
+ def self._compress(compression,force,str)
971
+ case compression.to_s
972
+ when 'none'
973
+ compressed = str
974
+ when 'lz4'
975
+ compressed = LZ4.compress(str)
976
+ when 'zlib'
977
+ compressed = Zlib.deflate(str)
978
+ when 'bzip2'
979
+ io = StringIO.new
980
+ io.set_encoding(Encoding::ASCII_8BIT)
981
+ Bzip2::FFI::Writer.write(io, str)
982
+ compressed = io.string
983
+ when 'lzma'
984
+ compressed = LZMA.compress(str)
985
+ else
986
+ raise ArgumentError, "unknown compression #{compression}"
987
+ end
988
+ if !force && compressed.size >= str.size
989
+ compressed = str
990
+ compression = 'none'
991
+ end
992
+ [compressed,compression.to_s]
993
+ end
994
+
995
+ # Helper for wrap() and unwrap(), multiplexes decompression.
996
+ #
997
+ def self._decompress(compression,str)
998
+ case compression.to_s
999
+ when 'none'
1000
+ return str
1001
+ when 'lz4'
1002
+ begin
1003
+ return LZ4.uncompress(str)
1004
+ rescue LZ4Internal::Error => ex
1005
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
1006
+ end
1007
+ when 'zlib'
1008
+ begin
1009
+ return Zlib::Inflate.inflate(str)
1010
+ rescue Zlib::DataError => ex
1011
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
1012
+ end
1013
+ when 'bzip2'
1014
+ io = StringIO.new(str)
1015
+ raw = nil
1016
+ begin
1017
+ raw = Bzip2::FFI::Reader.read(io)
1018
+ rescue Bzip2::FFI::Error::MagicDataError => ex
1019
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
112
1020
  end
113
- `chmod +x .git/hooks/post-checkout`
1021
+ str = raw.b # dupe to Encoding::ASCII_8BIT
1022
+ return str
1023
+ when 'lzma'
1024
+ begin
1025
+ raw = LZMA.decompress(str)
1026
+ rescue RuntimeError => ex
1027
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
1028
+ end
1029
+ str = raw.b # dupe to Encoding::ASCII_8BIT
1030
+ return str
1031
+ else
1032
+ raise ArgumentError, "unknown compression #{compression}"
114
1033
  end
115
-
116
- def commit_deploy_branch
117
- # commit deploy branch
118
- `git add . && git commit -m "deploy setup"`
119
- `git checkout master`
1034
+ end
1035
+
1036
+ # Walks obj recursively, touching each reachable child only once
1037
+ # without getting caught up cycles or touching DAGy bits twice.
1038
+ #
1039
+ # Only knows how to recurse into Arrays and Hashs.
1040
+ #
1041
+ # This traversal is depth-first pre-order with the children of
1042
+ # Arrays walked in positional anbd Hash pairs walked in positional
1043
+ # order k,v,k,v, etc.
1044
+ #
1045
+ # @param obj object to walk
1046
+ #
1047
+ # @param seen Hash which maps object_id => [idx,child] of every
1048
+ # object touched, where idx is 0,1,2,... corresponding to the order
1049
+ # in which we encountered child.
1050
+ #
1051
+ # @param reseen List of children which were walked more than once.
1052
+ #
1053
+ # @param block if present, every object touched is yielded to block
1054
+ #
1055
+ # @return seen,reseen
1056
+ #
1057
+ def self._safety_dance(obj,seen=nil,reseen=nil,&block)
1058
+ #
1059
+ # Note that OAK._serialize() depends on the depth-first pre-order
1060
+ # specification here - at least, it assumes that the first element
1061
+ # walked will be the first element added to seen.
1062
+ #
1063
+ seen ||= {}
1064
+ reseen ||= []
1065
+ oid = obj.object_id
1066
+ if seen.has_key?(oid)
1067
+ reseen << obj
1068
+ return seen,reseen
120
1069
  end
1070
+ seen[oid] = [seen.size,obj]
1071
+ yield obj if block # pre-order: this node before children
1072
+ if obj.is_a?(Hash)
1073
+ obj.each do |k,v| # children in hash order and k,v,...
1074
+ _safety_dance(k,seen,reseen,&block)
1075
+ _safety_dance(v,seen,reseen,&block)
1076
+ end
1077
+ elsif obj.is_a?(Array)
1078
+ obj.each do |v| # children in list order
1079
+ _safety_dance(v,seen,reseen,&block)
1080
+ end
1081
+ end
1082
+ return seen,reseen
121
1083
  end
1084
+
122
1085
  end