oak 0.0.3 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,245 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # oak.rb: cli driver for encoding strings in the OAK format.
4
+ #
5
+ # author: jhw@prosperworks.com
6
+ # incept: 2016-03-05
7
+ #
8
+
9
+ require_relative '../lib/oak.rb'
10
+ require 'optimist'
11
+
12
+ OLD_ARGV = ARGV.dup # ARGV is consumed by Optimist but we use later.
13
+ OPTS = Optimist.options do
14
+ banner "#{$0} cli driver for OAK"
15
+ banner <<-OPTIMIST_EXAMPLES
16
+ Examples:
17
+ $ echo hello | bin/oak.rb
18
+ oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
19
+ $ (echo hello ; echo world) | bin/oak.rb
20
+ oak_3CNB_1944283675_15_RjFTVTVfaGVsbG8_ok
21
+ oak_3CNB_2139413982_15_RjFTVTVfd29ybGQ_ok
22
+ $ (echo hello ; echo world) | bin/oak.rb --compression zlib --force
23
+ oak_3CZB_1944283675_26_eJxzMwwONY3PSM3JyQcAFF4DyA_ok
24
+ oak_3CZB_2139413982_26_eJxzMwwONY0vzy_KSQEAFNgD3A_ok
25
+ $ (echo hello ; echo world) | bin/oak.rb --format none
26
+ oak_3CNN_1944283675_11_F1SU5_hello_ok
27
+ oak_3CNN_2139413982_11_F1SU5_world_ok
28
+ $ (echo hello ; echo world) | bin/oak.rb | bin/oak.rb --mode decode-lines
29
+ hello
30
+ world
31
+ OPTIMIST_EXAMPLES
32
+ banner "Options:"
33
+ version "#{$0} #{OAK::VERSION}"
34
+ opt :redundancy, 'redundancy', :default => 'crc32'
35
+ opt :format, 'format', :default => 'base64'
36
+ opt :compression, 'compression', :default => 'none'
37
+ opt :force, 'compress even if bigger', :default => false
38
+ opt :mode, 'mode', :default => 'encode-lines'
39
+ opt :key_chain, 'key chain env name', :type => :string
40
+ opt :key, 'encrypt key name', :type => :string
41
+ opt :key_check, 'check available keys', :default => false
42
+ opt :key_generate, 'generate new key', :default => false
43
+ opt :force_oak_4, 'force OAK_4 even unencrypted', :default => false
44
+ opt :eigen, 'calc eigenratio', :type => :int
45
+ opt :self_test, 'self-test only', :default => false
46
+ opt :help, 'show this help'
47
+ end
48
+ Optimist::die :eigen, "must be non-negative" if OPTS[:eigen] && OPTS[:eigen] < 0
49
+
50
+ oak_opts = {}
51
+ oak_opts[:redundancy] = OPTS[:redundancy]
52
+ oak_opts[:compression] = OPTS[:compression]
53
+ oak_opts[:force] = OPTS[:force]
54
+ oak_opts[:format] = OPTS[:format]
55
+ oak_opts[:key_chain] = OAK.parse_env_chain(ENV,OPTS[:key_chain])
56
+ oak_opts[:key] = OPTS[:key]
57
+ oak_opts[:force_oak_4] = OPTS[:force_oak_4]
58
+
59
+ if !OAK::REDUNDANCY_2_CODE.keys.include?(oak_opts[:redundancy])
60
+ Optimist::die :redundancy, "bogus #{OPTS[:redundancy]}"
61
+ end
62
+ if !OAK::COMPRESSION_2_CODE.keys.include?(oak_opts[:compression])
63
+ Optimist::die :compression, "bogus #{OPTS[:compression]}"
64
+ end
65
+ cool_formats = OAK::FORMAT_2_CODE.keys
66
+ if !cool_formats.include?(oak_opts[:format])
67
+ Optimist::die :format, "bogus #{OPTS[:format]} not in #{cool_formats}"
68
+ end
69
+
70
+ =begin
71
+
72
+ doctest: simple transcoding
73
+ >> OAK::decode(OAK::encode([1,"2",3.000001]))
74
+ => [1,"2",3.000001]
75
+ >> OAK::decode(OAK::encode({foo: "bar"}))
76
+ => {foo: "bar"}
77
+ >> OAK::decode(OAK::encode({foo: :bar}))
78
+ => {foo: :bar}
79
+ >> OAK::decode(OAK::encode("Hello, World!"))
80
+ => "Hello, World!"
81
+ >> OAK::decode(OAK::encode("Hello, World!", format: :none, redundancy: :none))
82
+ => "Hello, World!"
83
+
84
+ doctest: stability of encoding
85
+ >> OAK::decode("oak_3NNB_0_30_RjNIMV8xXzJZQTNfZm9vU1UzX2Jhcg_ok")
86
+ => {:foo=>"bar"}
87
+ >> OAK::encode(1, format: :base64, redundancy: :none)
88
+ => "oak_3NNB_0_6_RjFJMQ_ok"
89
+ >> OAK::encode(1, format: :base64, redundancy: :crc32)
90
+ => "oak_3CNB_3405226796_6_RjFJMQ_ok"
91
+ >> OAK::encode(1, format: :none, redundancy: :crc32)
92
+ => "oak_3CNN_3405226796_4_F1I1_ok"
93
+ >> hello_utf8 = "Hello, World!".force_encoding('UTF-8')
94
+ => "Hello, World!"
95
+ >> OAK::encode(hello_utf8, format: :base64, redundancy: :none)
96
+ => "oak_3NNB_0_27_RjFTVTEzX0hlbGxvLCBXb3JsZCE_ok"
97
+ >> OAK::encode(hello_utf8, format: :none, redundancy: :crc32)
98
+ => "oak_3CNN_2351984628_20_F1SU13_Hello, World!_ok"
99
+
100
+ Note above I used force_encoding('UTF-8') after discovering that with
101
+ Ruby 2.1.6 on Mac I get Encoding.default_encoding is UTF-8, but with
102
+ Ruby 2.1.6 on Linux I get Encoding.default_encoding is US-ASCII!
103
+
104
+ =end
105
+
106
+ if __FILE__ == $0
107
+ if OPTS[:self_test]
108
+ require 'rubydoctest'
109
+ exit RubyDocTest::Runner.new(File.read(__FILE__), __FILE__).run ? 0 : 1
110
+ end
111
+ if OPTS[:key_check]
112
+ if !OPTS[:key_chain]
113
+ puts "no --key-chain specified"
114
+ else
115
+ keys = oak_opts[:key_chain].keys.keys
116
+ if 0 == keys.size
117
+ puts "#{OPTS[:key_chain]}: no keys found"
118
+ else
119
+ puts "#{OPTS[:key_chain]}: found keys: #{keys.join(' ')}"
120
+ end
121
+ end
122
+ end
123
+ if OPTS[:key_generate]
124
+ STDOUT.puts OAK.encode(OAK.random_key)
125
+ exit 0
126
+ end
127
+ if !$stdin.tty?
128
+ if OPTS[:eigen]
129
+ prev = STDIN.read
130
+ puts "input: %d" % prev.size
131
+ OPTS[:eigen].times do |i|
132
+ oak = OAK.encode(prev,oak_opts)
133
+ psize = prev.size
134
+ wsize = oak.size
135
+ ratio = 1.0 * wsize / psize
136
+ puts " iter %3d: %4d => %4d ratio %.2f" % [i,psize,wsize,ratio]
137
+ prev = oak
138
+ end
139
+ exit 0
140
+ end
141
+ unhappiness = 0
142
+ case OPTS[:mode]
143
+ when 'cat'
144
+ ARGF.each_line.map(&:strip).each do |line|
145
+ puts line
146
+ end
147
+ when 'encode-lines'
148
+ ARGF.each_line.map(&:strip).each do |line|
149
+ puts OAK.encode(line,oak_opts)
150
+ end
151
+ when 'decode-lines'
152
+ ARGF.each_line.map(&:strip).each do |line|
153
+ puts OAK.decode(line,oak_opts)
154
+ end
155
+ when 'encode-file'
156
+ puts OAK.encode(STDIN.read,oak_opts)
157
+ when 'decode-file'
158
+ STDOUT.write OAK.decode(STDIN.read.strip,oak_opts)
159
+ when 'recode-file'
160
+ puts OAK.encode(OAK.decode(STDIN.read,oak_opts),oak_opts)
161
+ when 'crazy'
162
+ #
163
+ # --mode crazy prints out a sample of OAK strings for various
164
+ # challenging cases.
165
+ #
166
+ cycle_a = ['cycle_a','TBD']
167
+ cycle_b = ['cycle_b',cycle_a]
168
+ cycle_a[1] = cycle_b
169
+ dag_c = ['dag_c']
170
+ dag_b = ['dag_b',dag_c]
171
+ dag_a = ['dag_a',dag_b,dag_c]
172
+ [
173
+ 'hello',
174
+ ['hello'] + ['hello',:hello] * 2,
175
+ {1=>'a','b'=>2,[]=>3,''=>4,{}=>5,nil=>6},
176
+ ['x','x','x','x','x','x','x','x','x','x','x','x','x'],
177
+ ['x'] * 13,
178
+ cycle_a,
179
+ dag_a,
180
+ [1,-123,0.12,-0.123,Float::NAN,-Float::INFINITY,3.14159265358979],
181
+ ].each do |obj|
182
+ oak = OAK.encode(
183
+ obj,
184
+ redundancy: :crc32,
185
+ format: :none,
186
+ compression: :none,
187
+ )
188
+ puts ""
189
+ puts "obj: #{obj}"
190
+ puts " oak: #{oak}"
191
+ begin
192
+ dec = OAK.decode(oak,oak_opts)
193
+ if dec != obj
194
+ if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
195
+ unhappiness += 1
196
+ puts " BAD: #{dec}"
197
+ end
198
+ end
199
+ rescue OAK::CantTouchThisStringError => ex
200
+ puts " BAD: #{ex.message}: #{ex.backtrace_locations[0]}"
201
+ unhappiness += 1
202
+ end
203
+ end
204
+ when 'tests'
205
+ [
206
+ [1,2,3],
207
+ {:foo=>'foo','foo'=>['x']*10},
208
+ -1,
209
+ Float::NAN,
210
+ nil,
211
+ ].each do |obj|
212
+ puts " #{obj} => ["
213
+ key_chain = OAK::KeyChain.new(
214
+ { 'l0ng3r' => OAK::Key.new('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx') }
215
+ )
216
+ [
217
+ {redundancy: :none, format: :none, compression: :none },
218
+ {redundancy: :none, format: :base64,compression: :lz4, force: true},
219
+ {redundancy: :crc32,format: :base64,compression: :zlib, force: true},
220
+ {redundancy: :crc32,format: :base64,compression: :bzip2,force: true},
221
+ {redundancy: :sha1, format: :base64,compression: :lzma, force: true},
222
+ {key_chain: key_chain,force_oak_4: true,format: :none, },
223
+ {key_chain: key_chain,force_oak_4: true, },
224
+ {key_chain: key_chain,key: 'l0ng3r', },
225
+ ].each do |opts|
226
+ oak = OAK.encode(obj,opts)
227
+ puts " '#{oak}',"
228
+ dec = OAK.decode(oak,opts)
229
+ if dec != obj
230
+ if !dec.is_a?(Float) && !enc.is_a?(Float) && !dec.nan? && !enc.nan?
231
+ unhappiness += 1
232
+ end
233
+ end
234
+ end
235
+ puts " ],"
236
+ end
237
+ else
238
+ Optimist::die :mode, "bogus mode #{OPTS[:mode]}"
239
+ end
240
+ if unhappiness > 0
241
+ puts "unhappiness: #{unhappiness}"
242
+ end
243
+ exit unhappiness
244
+ end
245
+ end
data/lib/oak.rb CHANGED
@@ -1,122 +1,1085 @@
1
- require 'thor'
1
+ # coding: utf-8
2
+ #
3
+ # OAK: An encoding format with enough polymorphism to support run-time
4
+ # performance experimentation and some light encryption-at-rest.
5
+ #
6
+ # author: jhw@prosperworks.com
7
+ # incept: 2016-03-02
2
8
 
3
- class Oak < Thor
4
- include Thor::Actions
5
- attr_reader :secret_token
9
+ require_relative 'oak/version'
10
+ require 'strscan'
11
+ require 'digest'
12
+ require 'base64'
13
+ require 'lz4-ruby'
14
+ require 'zlib'
15
+ require 'bzip2/ffi'
16
+ require 'lzma'
17
+ require 'openssl'
6
18
 
7
- desc "setup oak", "Set current rails app source open ready"
8
- def setup(working_directory = '.')
9
- self.destination_root = working_directory
10
- FileUtils.chdir destination_root do
11
- check_cfg
12
- dummy_config
13
- git_prepare
14
- create_config_on_deploy
15
- commit_deploy_branch
19
+ module OAK
20
+
21
+ # CantTouchThisObjectError is thrown when encode() or serialize() is
22
+ # called on an object which cannot be encoded losslessly by OAK.
23
+ #
24
+ class CantTouchThisObjectError < ArgumentError ; end
25
+
26
+ # CantTouchThisStringError is thrown when decode(), deserialize(),
27
+ # or unwrap() called on a String which cannot be decoded.
28
+ #
29
+ class CantTouchThisStringError < ArgumentError ; end
30
+
31
+ # Internal syntactic conveniences.
32
+ #
33
+ BAD_OBJ = CantTouchThisObjectError
34
+ BAD_STR = CantTouchThisStringError
35
+
36
+ # OAK_4 supports one and only one encryption algorithm and mode of
37
+ # operation.
38
+ #
39
+ # - AES-256-GCM
40
+ # - 128 bits of security
41
+ # - 256-bit keys (32 bytes)
42
+ # - 96-bit IVs (12 bytes)
43
+ # - 128-bit auth_tags (16 bytes)
44
+ # - Random IV ("Initialization Vector") for each encryption op
45
+ # - All headers authenticated.
46
+ # - Headers encrypted when not required for decryption.
47
+ #
48
+ ENCRYPTION_ALGO_NAME = 'aes-256-gcm'.freeze
49
+ ENCRYPTION_ALGO_IV_BYTES = 12 # AES-256-GCM has 96-bit IVs
50
+ ENCRYPTION_ALGO_AUTH_TAG_BYTES = 16 # AES-256-GCM has 128-bit auth, we use all
51
+
52
+ # Get a new instance of OpenSSL::Cipher for our algorithm.
53
+ #
54
+ def self.encryption_algo
55
+ OpenSSL::Cipher.new(ENCRYPTION_ALGO_NAME)
56
+ end
57
+
58
+ # Generate a new random key appropriate for the OAK_4 encryption
59
+ # algorithm.
60
+ #
61
+ def self.random_key
62
+ encryption_algo.random_key
63
+ end
64
+
65
+ # Generate a new random initialization vector appropriate for the
66
+ # OAK_4 encryption algorithm.
67
+ #
68
+ def self.random_iv
69
+ encryption_algo.random_iv
70
+ end
71
+
72
+ class Key
73
+
74
+ # @param key String encryption key suitable for AES-256,
75
+ # specifically a binary string of 32 bytes (256 bits),
76
+ # randomly-generated and kept very, very secret.
77
+ #
78
+ def initialize(key)
79
+ if !key.is_a?(String)
80
+ raise ArgumentError, "bad non-String key: ELIDED"
81
+ end
82
+ rk_size = OAK.random_key.size
83
+ if key.size != rk_size
84
+ raise ArgumentError, "bad key ELIDED, length not #{rk_size}"
85
+ end
86
+ @key = key.dup.freeze # happy :)
87
+ end
88
+
89
+ attr_reader :key
90
+
91
+ def inspect
92
+ #
93
+ # Avoid exposing the key in casual logs or console session.
94
+ #
95
+ to_s[0..-2] + " @key=ELIDED>"
16
96
  end
97
+
17
98
  end
18
99
 
19
- no_tasks do
100
+ class KeyChain
20
101
 
21
- def check_cfg
22
- ['config/application.rb', '.gitignore'].each do |f|
23
- if !File.exists? f
24
- raise "#{f} not found, are we at the root directory of a rails app?"
102
+ def initialize(keys)
103
+ if !keys.is_a?(Hash)
104
+ raise ArgumentError, "bogus keys #{keys}"
105
+ end
106
+ keys.each do |k,v|
107
+ if !k.is_a?(String)
108
+ raise ArgumentError, "bogus key #{k} in keys #{keys}"
109
+ end
110
+ if /^[a-zA-Z][0-9a-zA-Z]*$/ !~ k
111
+ #
112
+ # In oak_4, we restrict key names to sequences which look
113
+ # like code identifiers: alphanumeric strings which start
114
+ # with a letter.
115
+ #
116
+ # This keeps the encoding simple but compact.
117
+ #
118
+ raise ArgumentError, "bad key #{k} in keys #{keys}"
119
+ end
120
+ if !v.is_a?(Key)
121
+ raise ArgumentError, "bogus val #{v} at #{k} in keys #{keys}"
25
122
  end
26
123
  end
124
+ #
125
+ # We are a happy KeyChain object now!
126
+ #
127
+ @keys = keys.dup.freeze
128
+ end
129
+
130
+ attr_reader :keys
131
+
132
+ end
133
+
134
+ # Parses a KeyChain object and keys from an ENV-like object.
135
+ #
136
+ # E.g. if the ENV contains:
137
+ #
138
+ # FOO_KEYS=a,b
139
+ # FOO_KEY_a=#{OAK.encode(<binary key>)}
140
+ # FOO_KEY_b=#{OAK.encode(<binary key>)}
141
+ #
142
+ # ...then the call OAK.parse_key_chain(ENV,'FOO') will return a new
143
+ # OAK::KeyChain with two OAK::Keys, 'a' and 'b'.
144
+ #
145
+ # This self-referential (but not recursive!) use of OAK to encode
146
+ # the key and iv is to avoid the problems with binary strings in ENV
147
+ # variables, 'heroku config:set' command line arguments, etc.
148
+ #
149
+ # @param env ENV or an ENV-like Hash from String to String.
150
+ #
151
+ # @param name String the root token
152
+ #
153
+ # @returns a new OAK::KeyChain
154
+ #
155
+ def self.parse_env_chain(env,name)
156
+ key_names = (env["#{name}_KEYS"] || '').gsub(/^[, ]*/,'').split(/[ ,]+/)
157
+ keys = key_names.map do |key_name|
158
+ key = OAK.decode(env["#{name}_KEY_#{key_name}"] || '')
159
+ [ key_name, Key.new(key) ]
160
+ end.to_h
161
+ KeyChain.new(keys)
162
+ end
163
+
164
+ ##########################################################################
165
+ #
166
+ # encode() and decode() are the top layer
167
+ #
168
+ # They coordinate the structure layer and the byte layer.
169
+ #
170
+ # These are the recommended entry points for most callers.
171
+ #
172
+ ##########################################################################
173
+
174
+ # Encodes suitable objects string into OAK strings.
175
+ #
176
+ # Is inverted by decode(). For all obj, if encode(obj) does not
177
+ # raise an exception, decode(encode(obj)) == obj.
178
+ #
179
+ # @param obj to encode
180
+ #
181
+ # @param redundancy 'none', 'crc32' (default), or 'sha1'
182
+ #
183
+ # @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', 'lzma'
184
+ #
185
+ # @param force false (default), or true. When true, always
186
+ # compress. When false, fall back to the
187
+ # original if the compressed form is larger.
188
+ #
189
+ # @param key_chain OAK::KeyChain from which to draw the encryption
190
+ # key, or nil for none.
191
+ #
192
+ # @param key String name of a key in key_chain to be used
193
+ # for encryption, or nil if none.
194
+ #
195
+ # @param format 'none', 'base64' (default)
196
+ #
197
+ # @param force_oak_4 Bool, for debugging, force oak_4 encoding even
198
+ # if no encryption key is specified.
199
+ #
200
+ # @param debug_iv String, force encryption with a known IV, TEST ONLY!
201
+ #
202
+ # WARNING: Use of debug_iv jeopardizes the security of all messages
203
+ # *ever* encrypted with that key! Never use debug_iv in production!
204
+ #
205
+ # @raises ArgumentError if obj is not handled.
206
+ #
207
+ def self.encode(obj,opts={})
208
+ ser = _serialize(obj)
209
+ _wrap(ser,opts)
210
+ end
27
211
 
28
- # make 'config/database.yml' globally ignored
29
- global_ignore_file = File.expand_path('~/.gitignore')
30
- if File.exist? global_ignore_file
31
- ignored = File.binread global_ignore_file
32
- if !ignored.include?('config/database.yml')
33
- append_to_file(global_ignore_file, 'config/database.yml')
212
+ # Decodes suitable OAK strings into objects.
213
+ #
214
+ # Inverts encode().
215
+ #
216
+ # @param str String to decode
217
+ #
218
+ # @param key_chain OAK::KeyChain in which to look for keys to
219
+ # decrypt encrypted OAK strings, or nil for none.
220
+ #
221
+ # @returns obj String to decode
222
+ #
223
+ # @raises ArgumentError if str is not a recognized string.
224
+ #
225
+ def self.decode(str,opts={})
226
+ if !str.is_a?(String)
227
+ raise ArgumentError, "str not a String"
228
+ end
229
+ ser = _unwrap(str,opts)
230
+ _deserialize(ser)
231
+ end
232
+
233
+ ##########################################################################
234
+ #
235
+ # serialize() and deserialize() are the structure layer
236
+ #
237
+ # They are responsible for interconverting between objects and naive
238
+ # strings.
239
+ #
240
+ # This layer is analagous to TAR for files or JSON: it converts
241
+ # structure into string and vice-versa.
242
+ #
243
+ ##########################################################################
244
+
245
+ # Serializes suitable objects string into naive strings.
246
+ #
247
+ # Is inverted by deserialize(). For all obj, if serialize(obj) does
248
+ # not raise an exception, deserialize(serialize(obj)) == obj.
249
+ #
250
+ # @raises CantTouchThisObjectError if obj contains any types or
251
+ # structure which cannot be encoded reversibly by OAK.
252
+ #
253
+ def self._serialize(obj)
254
+ seen,_reseen = _safety_dance(obj) do |child|
255
+ next if ALL_TYPES.select{ |type| child.is_a?(type) }.size > 0
256
+ raise CantTouchThisObjectError, "#{child.class} not supported: #{child}"
257
+ end
258
+ strt = Hash.new # string table, str => id for strings already encoded
259
+ ser = 'F'
260
+ ser << seen.size.to_s
261
+ seen.each_with_index do |(_object_id,(_idx2,child)),_idx|
262
+ #
263
+ # First, identify the unique apex type in TYPE_2_CODE.keys
264
+ # which matches the child.
265
+ #
266
+ # child.class may not be listed explicitly, such as for Fixnum
267
+ # and Bigint both being Integer, so we search and assert
268
+ # uniqueness and existence.
269
+ #
270
+ is_as = ALL_TYPES.select{ |type| child.is_a?(type) }
271
+ raise CantTouchThisObjectError if 1 != is_as.size
272
+ type = is_as[0]
273
+ typecode = TYPE_2_CODE[type]
274
+ if nil == child || true == child || false == child
275
+ #
276
+ # The type code by itself is sufficient to decode NilType,
277
+ # TrueType, and FalseType. We need use other space for them.
278
+ #
279
+ ser << typecode
280
+ next
281
+ end
282
+ if child.is_a?(Symbol) || child.is_a?(String)
283
+ #
284
+ # Strings and Symbols encode as their size in chars followed
285
+ # by their bytes.
286
+ #
287
+ # We maintain a running string table, strt, to recognize when
288
+ # we encounter a string representation which has been
289
+ # previously encoded.
290
+ #
291
+ # If we find such a duplicate, we encode the current string
292
+ # via a back reference to the first one we saw. This is
293
+ # indicated by downcasing the typecode.
294
+ #
295
+ str = child.to_s
296
+ enc = str.encoding
297
+ enc_code = nil
298
+ case enc
299
+ when Encoding::ASCII_8BIT, Encoding::US_ASCII, Encoding::ASCII
300
+ enc_code = 'A'
301
+ when Encoding::UTF_8
302
+ enc_code = 'U'
303
+ else
304
+ raise CantTouchThisObjectError, "unknown string encoding #{enc}"
34
305
  end
35
- else
36
- File.open(global_ignore_file, 'w') do |f|
37
- f.write 'config/database.yml'
306
+ if strt.has_key?(str)
307
+ ser << typecode.downcase # downcase indicates strt reference
308
+ ser << enc_code
309
+ ser << strt[str].to_s
310
+ else
311
+ ser << typecode # upcase indicates full representation
312
+ ser << enc_code
313
+ ser << str.bytesize.to_s
314
+ if str.bytesize > 0
315
+ ser << '_'
316
+ ser << str
317
+ end
318
+ strt[str] = strt.size
38
319
  end
320
+ next
39
321
  end
40
- `git config --global core.excludesfile ~/.gitignore`
322
+ if child.is_a?(Numeric)
323
+ #
324
+ # Numerics primitives encode as their Ruby to_s which
325
+ # matches their JSON.dump().
326
+ #
327
+ ser << typecode
328
+ ser << child.to_s
329
+ next
330
+ end
331
+ if child.is_a?(Array)
332
+ #
333
+ # An array is encoded as a size N followed by N indexes into
334
+ # the seen list.
335
+ #
336
+ ser << typecode
337
+ ser << child.size.to_s
338
+ child.each do |a|
339
+ ser << '_'
340
+ ser << seen[a.object_id][0].to_s
341
+ end
342
+ next
343
+ end
344
+ if child.is_a?(Hash)
345
+ #
346
+ # An array is encoded as a size N followed by 2*N indexes
347
+ # into the seen list, organized pairwise key+value.
348
+ #
349
+ ser << typecode
350
+ ser << child.size.to_s
351
+ child.each do |k,v|
352
+ ser << '_'
353
+ ser << seen[k.object_id][0].to_s
354
+ ser << '_'
355
+ ser << seen[v.object_id][0].to_s
356
+ end
357
+ next
358
+ end
359
+ raise CantTouchThisObjectError, "not handled: #{child.class} #{child}"
360
+ end
361
+ ser
362
+ end
41
363
 
42
- # append config/config.yml to .gitignore if not already in
43
- File.open('.gitignore') do |f|
44
- f.each_line do |l|
45
- if l == 'config/config.yml'
46
- return
364
+ # Deserializes suitable naive strings into objects.
365
+ #
366
+ # Inverts serialize().
367
+ #
368
+ # @raises CantTouchThisObjectError if str is not recognized
369
+ #
370
+ def self._deserialize(str)
371
+ scanner = StringScanner.new(str)
372
+ serial_code = scanner.scan(/F/)
373
+ if 'F' != serial_code
374
+ raise CantTouchThisStringError, "bogus serial_code #{serial_code}"
375
+ end
376
+ num_objs = scanner.scan(/[0-9]+/)
377
+ if !num_objs
378
+ raise CantTouchThisStringError, "missing object list size"
379
+ end
380
+ num_objs = num_objs.to_i
381
+ strt = Hash.new # string table, id => str for strings already decoded
382
+ seen = []
383
+ #
384
+ # We parse the stream, constructing all the objects we see in to
385
+ # a seen list.
386
+ #
387
+ # In this first pass, Arrays and Hashes are created whose
388
+ # elements, keys, and values are temporarily integers. These all
389
+ # refer to slots in the seen list, and many of them will be
390
+ # forward references to objects which we have yet to decode.
391
+ # Later we will rectify the object graph by replacing these
392
+ # integers with their refrants from the seen list.
393
+ #
394
+ num_objs.times.each do |idx_obj|
395
+ code = scanner.scan(/[a-zA-Z]/)
396
+ case code
397
+ when 'n'
398
+ seen[idx_obj] = nil
399
+ when 'f'
400
+ seen[idx_obj] = false
401
+ when 't'
402
+ seen[idx_obj] = true
403
+ when 'S', 'Y', 's', 'y'
404
+ enc_code = scanner.scan(/[AU]/)
405
+ enc = nil
406
+ case enc_code
407
+ when 'A'
408
+ enc = Encoding::ASCII_8BIT
409
+ when 'U'
410
+ enc = Encoding::UTF_8
411
+ else
412
+ raise CantTouchThisStringError, "unknown enc_code #{enc_code}"
413
+ end
414
+ num = scanner.scan(/[0-9]+/)
415
+ if !num
416
+ raise CantTouchThisStringError, "missing num"
417
+ end
418
+ num = num.to_i
419
+ case code
420
+ when 'S', 'Y'
421
+ if num > 0
422
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
423
+ seen[idx_obj] = scanner.peek(num)
424
+ scanner.pos += num # skip body
425
+ else
426
+ seen[idx_obj] = ''
47
427
  end
428
+ strt[strt.size] = seen[idx_obj]
429
+ when 's', 'y'
430
+ seen[idx_obj] = strt[num]
431
+ end
432
+ seen[idx_obj] = seen[idx_obj].dup.force_encoding(enc)
433
+ case code
434
+ when 'Y', 'y'
435
+ seen[idx_obj] = seen[idx_obj].intern
436
+ end
437
+ when 'I'
438
+ pattern = /-?[0-9]+/
439
+ seen[idx_obj] = scanner.scan(pattern).to_i
440
+ when 'F'
441
+ pattern = /-?(Infinity|NaN|[0-9]+(\.[0-9]*)?(e([+-][0-9]*)?)?)/
442
+ match = scanner.scan(pattern)
443
+ case match
444
+ when 'Infinity' then seen[idx_obj] = Float::INFINITY
445
+ when '-Infinity' then seen[idx_obj] = -Float::INFINITY
446
+ when 'NaN' then seen[idx_obj] = Float::NAN
447
+ else seen[idx_obj] = match.to_f
448
+ end
449
+ when 'A'
450
+ num_items = scanner.scan(/[0-9]+/).to_i
451
+ arr = []
452
+ num_items.times.each do |idx|
453
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
454
+ val = scanner.scan(/[0-9]+/).to_i # temp obj
455
+ arr[idx] = val
456
+ end
457
+ seen[idx_obj] = arr
458
+ when 'H'
459
+ num_items = scanner.scan(/[0-9]+/).to_i
460
+ hash = Hash.new
461
+ num_items.times.each do
462
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
463
+ k = scanner.scan(/[0-9]+/).to_i # temp obj
464
+ scanner.scan(/_/) or raise BAD_STR, "missing _"
465
+ v = scanner.scan(/[0-9]+/).to_i # temp obj
466
+ hash[k] = v
48
467
  end
468
+ seen[idx_obj] = hash
469
+ else
470
+ raise BAD_STR, "not handled: #{code} #{scanner.pos} #{scanner.rest}"
49
471
  end
472
+ end
473
+ #
474
+ # If we parsed correctly, there will be no unconsumed in the
475
+ # scanner.
476
+ #
477
+ if !scanner.eos?
478
+ raise BAD_STR, "not at end-of-string: #{scanner.pos} #{scanner.rest}"
479
+ end
480
+ #
481
+ # We rectify the references for each intermediate Array and Hash
482
+ # as promised earlier.
483
+ #
484
+ # Note that this code must be inherently mutation-oriented since
485
+ # it might have to construct cyclic graphs.
486
+ #
487
+ rectified = seen.map do |elem|
488
+ if elem.is_a?(Array)
489
+ next Array.new
490
+ elsif elem.is_a?(Hash)
491
+ next Hash.new
492
+ else
493
+ elem
494
+ end
495
+ end
496
+ rectified.each_with_index do |elem,idx|
497
+ if elem.is_a?(Array)
498
+ seen[idx].each_with_index do |a,i|
499
+ elem[i] = rectified[a]
500
+ end
501
+ elsif elem.is_a?(Hash)
502
+ seen[idx].each do |k,v|
503
+ elem[rectified[k]] = rectified[v]
504
+ end
505
+ end
506
+ end
507
+ #
508
+ # By the way _safety_dance performed its walk in _serialize(), the
509
+ # object we are decoding is the first object encoded in str.
510
+ #
511
+ # Thus, we return the first element of the rectified list.
512
+ #
513
+ rectified.first
514
+ end
50
515
 
51
- append_to_file '.gitignore', 'config/config.yml'
516
+ ##########################################################################
517
+ #
518
+ # wrap() and unwrap() are the byte layer
519
+ #
520
+ # They are responsible for interconverting between naive strings and
521
+ # strings which are ready to go out on the wire into external
522
+ # storage.
523
+ #
524
+ # This layer is analagous to GZIP: it converts strings into a
525
+ # different representation which is smaller, more resistant to
526
+ # corruption, and/or more recognizable.
527
+ #
528
+ ##########################################################################
52
529
 
53
- # protect secret_token
54
- full_text = File.binread 'config/initializers/secret_token.rb'
55
- full_text.gsub! /(Application\.config\.secret_token\s=\s)'(.*)'/, '\1APP_CONFIG[\'secret_token\']'
56
- # save per app secret_token for later use
57
- @secret_token = "#{$2}"
58
- File.open('config/initializers/secret_token.rb', 'w') do |f|
59
- f.write full_text
60
- end
530
+ # Wraps any string into a OAK string.
531
+ #
532
+ # Is inverted by unwrap(). For all str, unwrap(wrap(str)) == str.
533
+ #
534
+ # @param str naive string to be wrapped as an OAK string
535
+ #
536
+ # @param redundancy 'none', 'crc32' (default), or 'sha1'
537
+ #
538
+ # @param compression 'none' (default), 'lz4', 'zlib', 'bzip2', or 'lzma'
539
+ #
540
+ # @param force false (default), or true. When true, always
541
+ # compress. When false, fall back to the
542
+ # original if the compressed form is larger.
543
+ #
544
+ # @param key_chain OAK::KeyChain from which to draw the encryption
545
+ # key, or nil for none.
546
+ #
547
+ # @param key String name of a key in key_chain to be used
548
+ # for encryption, or nil if none.
549
+ #
550
+ # @param force_oak_4 Bool, for debugging, force oak_4 encoding even
551
+ # if no encryption key is specified.
552
+ #
553
+ # @param format 'none', 'base64' (default)
554
+ #
555
+ # @returns an OAK string
556
+ #
557
+ def self._wrap(str,opts={})
558
+ redundancy = (opts[:redundancy] || :crc32).to_s
559
+ compression = (opts[:compression] || :none).to_s
560
+ force = (opts[:force] || false)
561
+ format = (opts[:format] || :base64).to_s
562
+ key_chain = opts[:key_chain]
563
+ key = opts[:key]
564
+ debug_iv = opts[:debug_iv]
565
+ if key_chain && !key_chain.is_a?(KeyChain)
566
+ raise ArgumentError, "bad key_chain #{key_chain}"
567
+ end
568
+ if debug_iv && !debug_iv.is_a?(String)
569
+ raise ArgumentError, "bad debug_iv #{debug_iv}"
570
+ end
571
+ if debug_iv && ENCRYPTION_ALGO_IV_BYTES != debug_iv.size
572
+ raise ArgumentError, "bad debug_iv #{debug_iv}"
573
+ end
574
+ if key && !key_chain
575
+ raise ArgumentError, "key #{key} without key_chain"
61
576
  end
577
+ if key && !key_chain.keys[key]
578
+ keys = key_chain.keys
579
+ raise ArgumentError, "key not found in #{keys}: #{key}"
580
+ end
581
+ encryption_key = key ? key_chain.keys[key] : nil
582
+ str = str.b # dupe to Encoding::ASCII_8BIT
583
+ if encryption_key || opts[:force_oak_4]
584
+ _wrap_oak_4(
585
+ str,
586
+ redundancy,
587
+ compression,
588
+ force,
589
+ format,
590
+ key,
591
+ encryption_key,
592
+ debug_iv
593
+ )
594
+ else
595
+ _wrap_oak_3(
596
+ str,
597
+ redundancy,
598
+ compression,
599
+ force,
600
+ format
601
+ )
602
+ end
603
+ end
62
604
 
63
- def dummy_config
64
- File.open('config/config.example.yml', 'w') do |f|
65
- f.write "secret_token: 'c1cae0f52a3ef8efa369a127c63bd6ede539a4089fd952b33199100a6769c8455ab4969f2eefaf1ebcbe0208bd57531204c77f41f715207f961e7e45f139f4e7'"
66
- end
67
- prepend_to_file 'config/application.rb', "require 'yaml'\nAPP_CONFIG = YAML.load(File.read(File.expand_path('../config.yml', __FILE__)))\n"
605
+ def self._wrap_oak_3(
606
+ str,
607
+ redundancy,
608
+ compression,
609
+ force,
610
+ format
611
+ )
612
+ source_redundancy = _check(redundancy,str)
613
+ compressed, compression = _compress(compression,force,str)
614
+ formatted = _format(format,compressed)
615
+ output = 'oak_3' # format id+ver
616
+ output << REDUNDANCY_2_CODE[redundancy] # redundancy
617
+ output << COMPRESSION_2_CODE[compression] # compression
618
+ output << FORMAT_2_CODE[format] # format
619
+ output << '_'
620
+ output << source_redundancy # source check
621
+ output << '_'
622
+ output << '%d' % formatted.size # formatted size
623
+ output << '_'
624
+ output << formatted # payload
625
+ output << '_'
626
+ output << 'ok' # terminator
627
+ output.force_encoding(Encoding::ASCII_8BIT)
628
+ end
68
629
 
69
- # simply copy database.yml to database.example.yml
70
- File.open('config/database.example.yml', 'w') do |f|
71
- File.open('config/database.yml', 'r') do |o|
72
- f.write o.read
73
- end
74
- end
630
+ def self._wrap_oak_4(
631
+ str,
632
+ redundancy,
633
+ compression,
634
+ force,
635
+ format,
636
+ key,
637
+ encryption_key,
638
+ debug_iv
639
+ )
640
+ header = 'oak_4' # format id+ver
641
+ if key
642
+ header << key # key name
75
643
  end
644
+ header << '_'
645
+ header << FORMAT_2_CODE[format] # format
646
+ compressed, compression = _compress(compression,force,str)
647
+ plaintext = ''
648
+ plaintext << REDUNDANCY_2_CODE[redundancy] # redundancy
649
+ plaintext << COMPRESSION_2_CODE[compression] # compression
650
+ plaintext << _check(redundancy,str) # source check
651
+ plaintext << '_'
652
+ plaintext << compressed
653
+ encrypted = _encrypt(
654
+ encryption_key,
655
+ plaintext,
656
+ header,
657
+ debug_iv
658
+ )
659
+ formatted = _format(format,encrypted)
660
+ output = header
661
+ output << '%d' % formatted.size # formatted size
662
+ output << '_'
663
+ output << formatted # payload
664
+ output << '_'
665
+ output << 'ok' # terminator
666
+ output.force_encoding(Encoding::ASCII_8BIT)
667
+ end
76
668
 
77
- def git_prepare
78
- if File.exists? '.git'
79
- puts 'It seems a git repository has already created, I\'ll leave it untouched.'
80
- return
81
- end
669
+ # Unwraps any OAK string into a string.
670
+ #
671
+ # Inverts wrap(). For all str, unwrap(wrap(str)) == str.
672
+ #
673
+ # @param str OAK string to be unwrapped
674
+ #
675
+ # @param key_chain OAK::KeyChain in which to look for keys to
676
+ # decrypt encrypted OAK strings, or nil for none.
677
+ #
678
+ # @returns a string
679
+ #
680
+ # @raises ArgumentError if str is not in OAK format.
681
+ #
682
+ def self._unwrap(str,opts={})
683
+ str = str.b # str.b for dup to ASCII_8BIT
684
+ sc = StringScanner.new(str)
685
+ ov = sc.scan(/oak_[34]/) or raise BAD_STR, "bad oak+ver"
686
+ if 'oak_4' == ov
687
+ _unwrap_oak_4(sc,opts) # encryption opts possible for decoding OAK_4 :(
688
+ else
689
+ _unwrap_oak_3(sc) # no opts for decoding OAK_3 :)
690
+ end
691
+ end
82
692
 
83
- `git init && git add . && git commit -m "init"`
84
- `git checkout -b deploy`
693
+ def self._unwrap_oak_3(sc)
694
+ r = sc.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
695
+ c = sc.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
696
+ f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
697
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
698
+ scheck = sc.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
699
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
700
+ fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
701
+ fsize = fsize.to_i
702
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
703
+ formatted = sc.peek(fsize)
704
+ begin
705
+ sc.pos += fsize
706
+ rescue RangeError => ex
707
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
85
708
  end
709
+ _ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok: #{formatted}"
710
+ redundancy = CODE_2_REDUNDANCY[r] || r
711
+ compression = CODE_2_COMPRESSION[c] || c
712
+ format = CODE_2_FORMAT[f] || f
713
+ fsize_re = formatted.size
714
+ if fsize.to_i != fsize_re
715
+ raise CantTouchThisStringError, "fsize #{fsize} vs #{fsize_re}"
716
+ end
717
+ compressed = _deformat(format,formatted)
718
+ original = _decompress(compression,compressed)
719
+ scheck_re = _check(redundancy,original)
720
+ if scheck != scheck_re
721
+ raise CantTouchThisStringError, "scheck #{scheck} vs #{scheck_re}"
722
+ end
723
+ original
724
+ end
86
725
 
87
- def create_config_on_deploy
88
- File.open('config/config.yml', 'w') do |f|
89
- f.write 'secret_token: \'' + secret_token + '\''
726
+ def self._unwrap_oak_4(sc,opts={})
727
+ key = sc.scan(/[^_]+/) # nil OK, indicates no compression
728
+ encryption_key = nil
729
+ if key
730
+ key_chain = opts[:key_chain]
731
+ if !key_chain
732
+ raise CantTouchThisStringError, "key #{key} but no key_chain"
90
733
  end
91
-
92
- # remove 'config/config.yml' from .gitignore on deploy branch
93
- ignored = File.binread('.gitignore')
94
- ignored.gsub! /config\/config.yml/, ''
95
- File.open('.gitignore', 'w') do |f|
96
- f.write ignored
734
+ encryption_key = opts[:key_chain].keys[key]
735
+ if !encryption_key
736
+ keys = key_chain.keys
737
+ raise CantTouchThisStringError, "key not found in #{keys}: #{key}"
97
738
  end
739
+ end
740
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
741
+ f = sc.scan(/[NB]/) or raise BAD_STR, "bad format"
742
+ header = sc.string[0..(sc.pos-1)] # for authentication by _decrypt
743
+ format = CODE_2_FORMAT[f]
744
+ fsize = sc.scan(/[0-9]+/) or raise BAD_STR, "bad fsize"
745
+ fsize = fsize.to_i
746
+ _ = sc.scan(/_/) or raise BAD_STR, "missing _"
747
+ formatted = sc.peek(fsize)
748
+ begin
749
+ sc.pos += fsize
750
+ rescue RangeError => ex
751
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
752
+ end
753
+ _ = sc.scan(/_ok$/) or raise BAD_STR, "bad ok"
754
+ encrypted = _deformat(format,formatted)
755
+ plaintext = _decrypt(encryption_key,encrypted,header)
756
+ sp = StringScanner.new(plaintext)
757
+ r = sp.scan(/[NCS]/) or raise BAD_STR, "bad redundancy"
758
+ c = sp.scan(/[N4ZBM]/) or raise BAD_STR, "bad compression"
759
+ scheck = sp.scan(/[a-f0-9]+/) or raise BAD_STR, "bad scheck"
760
+ _ = sp.scan(/_/) or raise BAD_STR, "missing _"
761
+ compressed = sp.rest
762
+ redundancy = CODE_2_REDUNDANCY[r] || r
763
+ compression = CODE_2_COMPRESSION[c] || c
764
+ original = _decompress(compression,compressed)
765
+ scheck_re = _check(redundancy,original)
766
+ if scheck != scheck_re
767
+ raise(
768
+ CantTouchThisStringError,
769
+ "scheck #{scheck} vs #{scheck_re} in #{sc.string}"
770
+ )
771
+ end
772
+ original
773
+ end
98
774
 
99
- # add checkout hook for switching from 'deploy' to 'master'
100
- File.open('.git/hooks/post-checkout', 'w') do |f|
101
- f.write <<-EOS
102
- #!/bin/bash
775
+ # How we encode object type.
776
+ #
777
+ TYPE_2_CODE ||= {
778
+ Hash => 'H',
779
+ Array => 'A',
780
+ String => 'S', # downcased to 's' for string table lookup
781
+ Symbol => 'Y', # downcased to 'y' for string table lookup
782
+ Integer => 'I',
783
+ Float => 'F',
784
+ NilClass => 'n',
785
+ TrueClass => 't',
786
+ FalseClass => 'f',
787
+ }.freeze
788
+ ALL_TYPES ||= TYPE_2_CODE.keys.freeze
103
789
 
104
- branch_name=$(git symbolic-ref -q HEAD)
105
- branch_name=${branch_name##refs/heads/}
790
+ # How we encode :format and :compression in the OAK strings.
791
+ #
792
+ FORMAT_2_CODE ||= {
793
+ 'none' => 'N',
794
+ 'base64' => 'B', # urlsafe form with padding and whitespace stripped
795
+ }.freeze
796
+ CODE_2_FORMAT ||= FORMAT_2_CODE.invert.freeze
106
797
 
107
- if [ "$branch_name" = master -a -e "config/config.example.yml" ]; then
108
- cp config/config.example.yml config/config.yml
109
- echo "cp config/config.example.yml config/config.yml"
110
- fi
111
- EOS
798
+ # How we encode :compression in the OAK strings.
799
+ #
800
+ # Early on, I captures some metrics using the catenation of all our
801
+ # Ruby code as a test file.
802
+ #
803
+ # I measured:
804
+ #
805
+ # SOURCE 5707334
806
+ # none 5707370 compression 0.17s decompression 0.16s
807
+ # lzo 1804765 compression 0.18s decompression 0.16s
808
+ # lzf 1807971 compression 0.16s decompression 0.17s
809
+ # lz4 1813574 compression 0.17s decompression 0.14s
810
+ # zlib 1071216 compression 0.53s decompression 0.19s
811
+ # bzip2 868595 compression 0.62s decompression 0.33s
812
+ # lzma 760594 compression 6.22s decompression 0.20s
813
+ #
814
+ # From this, I conclude that only one of lzo,lzf,lz4 is interesting.
815
+ # They all yield approximately the same compression, and their
816
+ # compression times are indistinguishable from the rest of the
817
+ # streaming and encoding times imposed by OAK.
818
+ #
819
+ # I'm settling on supporting only lz4 because it seems to be better
820
+ # supported as a polymorphic lib - it's closer to a defacto standard
821
+ # for the LZ77 family.
822
+ #
823
+ # zlib, bzip2, and lzma each represent interesting distinct choices
824
+ # - I'm keeping support for all three.
825
+ #
826
+ COMPRESSION_2_CODE ||= {
827
+ 'none' => 'N',
828
+ 'lz4' => '4',
829
+ 'zlib' => 'Z',
830
+ 'bzip2' => 'B',
831
+ 'lzma' => 'M',
832
+ }.freeze
833
+ CODE_2_COMPRESSION ||= COMPRESSION_2_CODE.invert.freeze
834
+
835
+ # How we encode :redundancy in the OAK strings.
836
+ #
837
+ REDUNDANCY_2_CODE ||= {
838
+ 'none' => 'N',
839
+ 'crc32' => 'C',
840
+ 'sha1' => 'S',
841
+ }.freeze
842
+ CODE_2_REDUNDANCY ||= REDUNDANCY_2_CODE.invert.freeze
843
+
844
+ # Helper method, calculates redundancy check for str.
845
+ #
846
+ def self._check(redundancy,str)
847
+ case redundancy.to_s
848
+ when 'none' then return '0'
849
+ when 'crc32' then return '%d' % Zlib.crc32(str)
850
+ when 'sha1' then return Digest::SHA1.hexdigest(str)
851
+ else
852
+ raise ArgumentError, "unknown redundancy #{redundancy}"
853
+ end
854
+ end
855
+
856
+ # Helper method, calculates formatted version of str.
857
+ #
858
+ def self._format(format,str)
859
+ case format.to_s
860
+ when 'none'
861
+ return str
862
+ when 'base64'
863
+ #
864
+ # We actual using "Base 64 Encoding with URL and Filename Safe
865
+ # Alphabet" aka base64url with the option not to use padding,
866
+ # per https://tools.ietf.org/html/rfc4648#section-5.
867
+ #
868
+ # If we were using Ruby 2.3+, we could use the option "padding:
869
+ # false" instead of chopping out the /=*$/ with gsub.
870
+ #
871
+ return Base64.urlsafe_encode64(str).gsub(/=.*$/,'')
872
+ else
873
+ raise ArgumentError, "unknown format #{format}"
874
+ end
875
+ end
876
+
877
+ def self._deformat(format,str)
878
+ case format.to_s
879
+ when 'none'
880
+ return str
881
+ when 'base64'
882
+ #
883
+ # Regrettably, Base64.urlsafe_decode64(str) does not reverse
884
+ # Base64.urlsafe_encode64(str).gsub(/=.*$/,''), it raises an
885
+ # ArgumentError "invalid base64".
886
+ #
887
+ # Fortunately, simple Base64.decode64() is liberal in what it
888
+ # accepts, and handles the output of all of encode64,
889
+ # strict_encode64, and urlsafe_encode64 both with and without
890
+ # the /=*$/.
891
+ #
892
+ return Base64.decode64(str.tr('-_','+/'))
893
+ else
894
+ raise ArgumentError, "unknown format #{format}"
895
+ end
896
+ end
897
+
898
+ # Helper for wrap() and unwrap(), multiplexes encryption.
899
+ #
900
+ def self._encrypt(encryption_key,data,auth_data,debug_iv)
901
+ return data if !encryption_key
902
+ #
903
+ # WARNING: In at least some versions of OpenSSL::Cipher, setting
904
+ # iv before key would cause the iv to be ignored in aes-*-gcm
905
+ # ciphers!
906
+ #
907
+ # https://github.com/attr-encrypted/encryptor/pull/22
908
+ # https://github.com/attr-encrypted/encryptor/blob/master/README.md
909
+ #
910
+ # The issue was reported against version "1.0.1f 6 Jan 2014". I
911
+ # have yet to figure out whether our current version, 1.1.0, is
912
+ # affected, or when/how the fix will go live.
913
+ #
914
+ # OAK_4 only supports AES-256-GCB. Although the implementation
915
+ # bug has been fixed and OAK will almost certainly not be used
916
+ # with a buggy version of OpenSSL, nevertheless we take great
917
+ # care to set cipher.key *then* cipher.iv.
918
+ #
919
+ # Still, can't be to careful.
920
+ #
921
+ iv_size = ENCRYPTION_ALGO_IV_BYTES
922
+ auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
923
+ if debug_iv && iv_size != debug_iv.size
924
+ raise "unexpected debug_iv.size #{debug_iv.size} not #{iv_size}"
925
+ end
926
+ cipher = encryption_algo.encrypt
927
+ cipher.key = encryption_key.key
928
+ iv = debug_iv || cipher.random_iv
929
+ cipher.iv = iv
930
+ cipher.auth_data = auth_data
931
+ ciphertext = cipher.update(data) + cipher.final
932
+ auth_tag = cipher.auth_tag
933
+ if iv_size != iv.size
934
+ raise "unexpected iv.size #{iv.size} not #{iv_size}"
935
+ end
936
+ if auth_tag_size != auth_tag.size
937
+ raise "unexpected auth_tag.size #{auth_tag.size} not #{auth_tag_size}"
938
+ end
939
+ #
940
+ # Since iv and auth_tag have fixed widths, they are trivial to
941
+ # parse without putting any effort or space into recording their
942
+ # sizes in the message body.
943
+ #
944
+ iv + auth_tag + ciphertext
945
+ end
946
+
947
+ # Helper for wrap() and unwrap(), multiplexes decryption.
948
+ #
949
+ def self._decrypt(encryption_key,data,auth_data)
950
+ return data if !encryption_key
951
+ iv_size = ENCRYPTION_ALGO_IV_BYTES
952
+ auth_tag_size = ENCRYPTION_ALGO_AUTH_TAG_BYTES
953
+ iv = data[0..(iv_size-1)]
954
+ auth_tag = data[iv_size..(auth_tag_size+iv_size-1)]
955
+ ciphertext = data[(auth_tag_size+iv_size)..-1]
956
+ cipher = encryption_algo.decrypt
957
+ cipher.key = encryption_key.key
958
+ begin
959
+ cipher.iv = iv
960
+ cipher.auth_tag = auth_tag
961
+ cipher.auth_data = auth_data
962
+ cipher.update(ciphertext) + cipher.final
963
+ rescue OpenSSL::Cipher::CipherError => ex
964
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
965
+ end
966
+ end
967
+
968
+ # Helper for wrap() and unwrap(), multiplexes compression.
969
+ #
970
+ def self._compress(compression,force,str)
971
+ case compression.to_s
972
+ when 'none'
973
+ compressed = str
974
+ when 'lz4'
975
+ compressed = LZ4.compress(str)
976
+ when 'zlib'
977
+ compressed = Zlib.deflate(str)
978
+ when 'bzip2'
979
+ io = StringIO.new
980
+ io.set_encoding(Encoding::ASCII_8BIT)
981
+ Bzip2::FFI::Writer.write(io, str)
982
+ compressed = io.string
983
+ when 'lzma'
984
+ compressed = LZMA.compress(str)
985
+ else
986
+ raise ArgumentError, "unknown compression #{compression}"
987
+ end
988
+ if !force && compressed.size >= str.size
989
+ compressed = str
990
+ compression = 'none'
991
+ end
992
+ [compressed,compression.to_s]
993
+ end
994
+
995
+ # Helper for wrap() and unwrap(), multiplexes decompression.
996
+ #
997
+ def self._decompress(compression,str)
998
+ case compression.to_s
999
+ when 'none'
1000
+ return str
1001
+ when 'lz4'
1002
+ begin
1003
+ return LZ4.uncompress(str)
1004
+ rescue LZ4Internal::Error => ex
1005
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
1006
+ end
1007
+ when 'zlib'
1008
+ begin
1009
+ return Zlib::Inflate.inflate(str)
1010
+ rescue Zlib::DataError => ex
1011
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
1012
+ end
1013
+ when 'bzip2'
1014
+ io = StringIO.new(str)
1015
+ raw = nil
1016
+ begin
1017
+ raw = Bzip2::FFI::Reader.read(io)
1018
+ rescue Bzip2::FFI::Error::MagicDataError => ex
1019
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
112
1020
  end
113
- `chmod +x .git/hooks/post-checkout`
1021
+ str = raw.b # dupe to Encoding::ASCII_8BIT
1022
+ return str
1023
+ when 'lzma'
1024
+ begin
1025
+ raw = LZMA.decompress(str)
1026
+ rescue RuntimeError => ex
1027
+ raise CantTouchThisStringError, "#{ex.class}: #{ex.message}"
1028
+ end
1029
+ str = raw.b # dupe to Encoding::ASCII_8BIT
1030
+ return str
1031
+ else
1032
+ raise ArgumentError, "unknown compression #{compression}"
114
1033
  end
115
-
116
- def commit_deploy_branch
117
- # commit deploy branch
118
- `git add . && git commit -m "deploy setup"`
119
- `git checkout master`
1034
+ end
1035
+
1036
+ # Walks obj recursively, touching each reachable child only once
1037
+ # without getting caught up cycles or touching DAGy bits twice.
1038
+ #
1039
+ # Only knows how to recurse into Arrays and Hashs.
1040
+ #
1041
+ # This traversal is depth-first pre-order with the children of
1042
+ # Arrays walked in positional anbd Hash pairs walked in positional
1043
+ # order k,v,k,v, etc.
1044
+ #
1045
+ # @param obj object to walk
1046
+ #
1047
+ # @param seen Hash which maps object_id => [idx,child] of every
1048
+ # object touched, where idx is 0,1,2,... corresponding to the order
1049
+ # in which we encountered child.
1050
+ #
1051
+ # @param reseen List of children which were walked more than once.
1052
+ #
1053
+ # @param block if present, every object touched is yielded to block
1054
+ #
1055
+ # @return seen,reseen
1056
+ #
1057
+ def self._safety_dance(obj,seen=nil,reseen=nil,&block)
1058
+ #
1059
+ # Note that OAK._serialize() depends on the depth-first pre-order
1060
+ # specification here - at least, it assumes that the first element
1061
+ # walked will be the first element added to seen.
1062
+ #
1063
+ seen ||= {}
1064
+ reseen ||= []
1065
+ oid = obj.object_id
1066
+ if seen.has_key?(oid)
1067
+ reseen << obj
1068
+ return seen,reseen
120
1069
  end
1070
+ seen[oid] = [seen.size,obj]
1071
+ yield obj if block # pre-order: this node before children
1072
+ if obj.is_a?(Hash)
1073
+ obj.each do |k,v| # children in hash order and k,v,...
1074
+ _safety_dance(k,seen,reseen,&block)
1075
+ _safety_dance(v,seen,reseen,&block)
1076
+ end
1077
+ elsif obj.is_a?(Array)
1078
+ obj.each do |v| # children in list order
1079
+ _safety_dance(v,seen,reseen,&block)
1080
+ end
1081
+ end
1082
+ return seen,reseen
121
1083
  end
1084
+
122
1085
  end