oak 0.0.3 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +51 -0
- data/.rubocop.yml +74 -0
- data/.travis.yml +17 -0
- data/CHANGELOG.md +24 -0
- data/DESIDERATA.md +318 -0
- data/Gemfile +3 -15
- data/LICENSE +22 -0
- data/Makefile +113 -0
- data/README.md +163 -23
- data/Rakefile +6 -47
- data/bin/oak +242 -3
- data/bin/oak.rb +245 -0
- data/lib/oak.rb +1049 -86
- data/lib/oak/version.rb +3 -0
- data/oak.gemspec +29 -65
- metadata +121 -71
- data/.document +0 -5
- data/Gemfile.lock +0 -26
- data/LICENSE.txt +0 -20
- data/VERSION +0 -1
- data/test/files/config/application.rb +0 -3
- data/test/files/config/database.yml +0 -25
- data/test/files/config/initializers/secret_token.rb +0 -7
- data/test/files/dot_gitignore +0 -0
- data/test/helper.rb +0 -29
- data/test/test_oak.rb +0 -44
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1894c4827e6cb478e373686a2c5c198530a6eabf
|
4
|
+
data.tar.gz: 157bf6e8b64b962cefdabfee31eadd0955b58fed
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: fb82da115acd3abd4cc992bfa0896c0ad8ec0a13e56a4843ce332672ba7dc8c1e5b6608dcc0ac7215debf16644e3ac2bf709cb043c826d35aeaf8c07a6a92cb8
|
7
|
+
data.tar.gz: 30200fc38b86dd7e5953a9c70ae0d84825f6008042c1283a66554ad47480eae06658ef626eb6eaa7b3d93156d718952669cd4677add032b3b232408a8acdbf3f
|
data/.gitignore
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
*.gem
|
2
|
+
*.rbc
|
3
|
+
/.config
|
4
|
+
/coverage/
|
5
|
+
/InstalledFiles
|
6
|
+
/pkg/
|
7
|
+
/spec/reports/
|
8
|
+
/spec/examples.txt
|
9
|
+
/test/tmp/
|
10
|
+
/test/version_tmp/
|
11
|
+
/tmp/
|
12
|
+
|
13
|
+
# Used by dotenv library to load environment variables.
|
14
|
+
# .env
|
15
|
+
|
16
|
+
## Specific to RubyMotion:
|
17
|
+
.dat*
|
18
|
+
.repl_history
|
19
|
+
build/
|
20
|
+
*.bridgesupport
|
21
|
+
build-iPhoneOS/
|
22
|
+
build-iPhoneSimulator/
|
23
|
+
|
24
|
+
## Specific to RubyMotion (use of CocoaPods):
|
25
|
+
#
|
26
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
27
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
28
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
29
|
+
#
|
30
|
+
# vendor/Pods/
|
31
|
+
|
32
|
+
## Documentation cache and generated files:
|
33
|
+
/.yardoc/
|
34
|
+
/_yardoc/
|
35
|
+
/doc/
|
36
|
+
/rdoc/
|
37
|
+
|
38
|
+
## Environment normalization:
|
39
|
+
/.bundle/
|
40
|
+
/vendor/bundle
|
41
|
+
/lib/bundler/man/
|
42
|
+
|
43
|
+
# For a library or gem, you might want to ignore these files since the code is
|
44
|
+
# intended to run in multiple environments; otherwise, check them in:
|
45
|
+
#
|
46
|
+
Gemfile.lock
|
47
|
+
.ruby-version
|
48
|
+
.ruby-gemset
|
49
|
+
|
50
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
51
|
+
.rvmrc
|
data/.rubocop.yml
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
AllCops:
|
2
|
+
Include:
|
3
|
+
- Rakefile
|
4
|
+
- Gemfile
|
5
|
+
- '*.gemspec'
|
6
|
+
|
7
|
+
# I like the Metrics family in principle, but OAK was built in a
|
8
|
+
# lower-level style much like C. The default thresholds for these are
|
9
|
+
# too tight for the style I chose for OAK.
|
10
|
+
#
|
11
|
+
# Moreover, IMO test code is not the place get pedantic about class
|
12
|
+
# length, method complexity, etc. One should be encouraged to add
|
13
|
+
# more tests with minimal friction, not forced to make a hard choice
|
14
|
+
# between cutting tests or splitting up my test suites.
|
15
|
+
#
|
16
|
+
Metrics/ParameterLists:
|
17
|
+
Max: 10
|
18
|
+
Metrics/BlockLength:
|
19
|
+
Max: 150
|
20
|
+
Exclude:
|
21
|
+
- 'test/**/*.rb'
|
22
|
+
Metrics/ClassLength:
|
23
|
+
Max: 400
|
24
|
+
Exclude:
|
25
|
+
- 'test/**/*.rb'
|
26
|
+
Metrics/MethodLength:
|
27
|
+
Max: 150
|
28
|
+
Exclude:
|
29
|
+
- 'test/**/*.rb'
|
30
|
+
Metrics/ModuleLength:
|
31
|
+
Max: 1000
|
32
|
+
Exclude:
|
33
|
+
- 'test/**/*.rb'
|
34
|
+
Metrics/AbcSize:
|
35
|
+
Max: 150
|
36
|
+
Exclude:
|
37
|
+
- 'test/**/*.rb'
|
38
|
+
Metrics/BlockNesting:
|
39
|
+
Max: 5
|
40
|
+
Metrics/CyclomaticComplexity:
|
41
|
+
Max: 50
|
42
|
+
Metrics/PerceivedComplexity:
|
43
|
+
Max: 25
|
44
|
+
#
|
45
|
+
# Normally I am a pedantic adherent to 80-column lines.
|
46
|
+
#
|
47
|
+
# Over in test/oak.rb however, there are necessarily a lot of OAK
|
48
|
+
# strings which are much larger than 80 characters.
|
49
|
+
#
|
50
|
+
# I have decided that disablement in .rubocop.yml is less disruptive
|
51
|
+
# than repeate use of inline rubocop: comments.
|
52
|
+
#
|
53
|
+
Metrics/LineLength:
|
54
|
+
Exclude:
|
55
|
+
- 'test/**/*.rb'
|
56
|
+
Naming/UncommunicativeMethodParamName:
|
57
|
+
Enabled: false
|
58
|
+
|
59
|
+
# I put extra spaces in a lot of expressions for a lot of different
|
60
|
+
# reasons, including especially readability.
|
61
|
+
#
|
62
|
+
# I reject these cops.
|
63
|
+
#
|
64
|
+
Layout:
|
65
|
+
Enabled: false
|
66
|
+
|
67
|
+
# As a group, the Style cops are bewilderingly opiniated.
|
68
|
+
#
|
69
|
+
# In some cases IMO they are harmful e.g. Style/TernaryParentheses.
|
70
|
+
#
|
71
|
+
# I reject these cops.
|
72
|
+
#
|
73
|
+
Style:
|
74
|
+
Enabled: false
|
data/.travis.yml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
sudo: true
|
2
|
+
language: ruby
|
3
|
+
before_install:
|
4
|
+
- gem install bundler -v 1.16.1
|
5
|
+
rvm:
|
6
|
+
- 2.1.6
|
7
|
+
- 2.2.9
|
8
|
+
- 2.4.3
|
9
|
+
- 2.5.0
|
10
|
+
script:
|
11
|
+
#
|
12
|
+
# Run several tests in parallel, and be happy if they are all happy.
|
13
|
+
#
|
14
|
+
# If any fail, rerun serially so we get clean output from the ones
|
15
|
+
# which failed.
|
16
|
+
#
|
17
|
+
- make test -j 3 || make test
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
## 0.4.1 (2018-10-01)
|
2
|
+
|
3
|
+
- `oak`, `oak.rb` published as executables from gem.
|
4
|
+
- Removed heavier dep on `contracts`, switched to manual checks and looser spec.
|
5
|
+
- Documentation reorg and cleanup.
|
6
|
+
- Open-sourced with MIT License, published as https://rubygems.org/gems/oak!
|
7
|
+
|
8
|
+
|
9
|
+
## 0.4.0 (2018-09-24)
|
10
|
+
|
11
|
+
- First export from ProsperWorks/ALI.
|
12
|
+
- First conversion to gem.
|
13
|
+
- Not open (yet).
|
14
|
+
- OAK3 emitted by default.
|
15
|
+
- OAK4 with AES-256-GCM encryption with random IV supported.
|
16
|
+
|
17
|
+
|
18
|
+
## 0.0.3 (2011-11-07) and earlier
|
19
|
+
|
20
|
+
- https://rubygems.org/gems/oak had an earlier incarnation as a
|
21
|
+
secret management utility, https://github.com/imonyse/oak.
|
22
|
+
- Special thanks and regards to https://github.com/imonyse, who
|
23
|
+
generously released the gem name `oak` so it could have a second
|
24
|
+
life.
|
data/DESIDERATA.md
ADDED
@@ -0,0 +1,318 @@
|
|
1
|
+
# oak design desiderata
|
2
|
+
|
3
|
+
Some design goals with which I started this project.
|
4
|
+
|
5
|
+
- P1 means "top priorty"
|
6
|
+
- P2 means "very important"
|
7
|
+
- P3 means "nice to have"
|
8
|
+
- P4 means "not harmful if cheap"
|
9
|
+
|
10
|
+
- `+` means "accomplished"
|
11
|
+
- `-` means "not accomplished"
|
12
|
+
- `?` means "accomplished, but only for some combinations of arguments"
|
13
|
+
|
14
|
+
Desiderata for the structure layer:
|
15
|
+
|
16
|
+
- P1 + losslessly handle nil, true, false, Integer, and String
|
17
|
+
- P1 + losslessly handle List with arbitrary values and deep nesting
|
18
|
+
- P1 + losslessly handle Hash with string keys and deep nesting in values
|
19
|
+
- P1 + detect cycles and DAGS in input structures, fail or handle
|
20
|
+
- P1 + handle all Integer types without loss
|
21
|
+
- P1 - handle Floats with no more than a small quantified loss
|
22
|
+
- P2 + Hash key ordering is preserved in Ruby-Ruby transcoding
|
23
|
+
- P3 - convenient: vaguely human-readable representations available
|
24
|
+
- P3 + encode cycles and DAGs
|
25
|
+
- P3 + handle Hash with non-string keys and deep nesting in keys
|
26
|
+
- P3 + losslessly handle Symbol distinct from String
|
27
|
+
- P3 - handle Times and Dates
|
28
|
+
|
29
|
+
Desiderata for the byte layer:
|
30
|
+
|
31
|
+
- P1 + reversible: original string can be reconstructed from only OAK string
|
32
|
+
- P1 + unambiguous: no OAK string is the prefix of any other OAK string
|
33
|
+
- P1 + extensible: OAK strings contain ids for ver, format, compression, etc
|
34
|
+
- P1 + robust: error detection in OAK strings
|
35
|
+
- P2 + flexible: multiple compression modes available
|
36
|
+
- P3 + convenient: available representation without `{}`, comma, whitespace
|
37
|
+
- P3 + convenient: 7-bit clean representations available
|
38
|
+
- P3 + convenient: representations which are selectable with double-click
|
39
|
+
- P3 + convenient: vaguely human-readable representations available
|
40
|
+
- P3 - streamable: reversing can be accomplished with definite-size buffers
|
41
|
+
- P4 - embeddable: reversing can be accomplished with fixed-size buffers
|
42
|
+
- P4 - defensive: error correction available (no good libs found)
|
43
|
+
|
44
|
+
Techniques used in the byte layer to accomplish these goals.
|
45
|
+
|
46
|
+
- manifest type id for self-identification
|
47
|
+
- manifest version id in case format changes in future
|
48
|
+
- salient encoding algorithm choices stored in output stream
|
49
|
+
- error detection algorithm aka redundancy
|
50
|
+
- compression
|
51
|
+
- formatting
|
52
|
+
- microchoices made to confine metadata characters to [_0-9a-z]
|
53
|
+
- algorithm menu constructed to offer data characters in [-_0-9a-z]
|
54
|
+
|
55
|
+
|
56
|
+
## Serialization Choices
|
57
|
+
|
58
|
+
A survey of alternatives considered for the serialization layer.
|
59
|
+
|
60
|
+
### Considering Marshal
|
61
|
+
|
62
|
+
The Marshal format has some major drawbacks which I believe make it
|
63
|
+
a nonstarter.
|
64
|
+
|
65
|
+
- strictly Ruby-specific
|
66
|
+
- readability across major versions not guaranteed
|
67
|
+
- too powerful: can be used to execute arbitrary code
|
68
|
+
- binary and non-human-readable
|
69
|
+
- many option combos for oak make oak strings also non-human-readable
|
70
|
+
- still, it is nice to have layer which is at least potentially clear
|
71
|
+
|
72
|
+
Marshal does offer one major advantage:
|
73
|
+
|
74
|
+
- transcodes all Ruby value types and user-defined value-like classes
|
75
|
+
- reported to be much faster than JSON or YAML for serializing
|
76
|
+
|
77
|
+
### Considering JSON
|
78
|
+
|
79
|
+
JSON is awesome most of the time, especially in highly constrained
|
80
|
+
environments such as API specifications and simple ad-hoc caching
|
81
|
+
situations.
|
82
|
+
|
83
|
+
JSON offers advantages:
|
84
|
+
|
85
|
+
- a portable object model
|
86
|
+
- easy to read
|
87
|
+
- widely deployed
|
88
|
+
- the go-to choice for interchange in recent years
|
89
|
+
|
90
|
+
But it has some shortcomings which lead me to reject it for the
|
91
|
+
structural level in OAK.
|
92
|
+
|
93
|
+
- floating point precision is implementation-dependent
|
94
|
+
- always decodes as a tree - fails to transcode DAGiness
|
95
|
+
- cannot represent cycles - encoder reject, stack overflow, or infinite loop
|
96
|
+
- no native date or time handling
|
97
|
+
- table keys may only be strings
|
98
|
+
- e.g. `{'123'=>'x'} == JSON.parse(JSON.dump({123=>'x'}))`
|
99
|
+
- type information symbol-vs-string lost, symbols transcode to strings
|
100
|
+
- e.g. `'foo' == JSON.parse(JSON.dump(:foo))`
|
101
|
+
- e.g. `{'foo'=>'x'} == JSON.parse(JSON.dump({:foo=>'x'}))`
|
102
|
+
- official grammer only allows {} or [] as top-level object
|
103
|
+
- e.g. `123 == JSON.parse('123')` but `JSON.parse('123')` raises `ParserError`
|
104
|
+
- many parsers in the wild support only this strict official grammer
|
105
|
+
- JSON is suitable only for document encoding, not streams
|
106
|
+
- allows only one object per file
|
107
|
+
- multiple objects must be members of a list
|
108
|
+
- lists must be fully scanned and parsed before being processed
|
109
|
+
- no possibility of streamy processing
|
110
|
+
|
111
|
+
Biggest limitation of JSON IMO is that Hash keys can only be strings:
|
112
|
+
```
|
113
|
+
2.1.6 :008 > obj = {'str'=>'bar',[1,2,3]=>'baz'}
|
114
|
+
=> {"str"=>"bar", [1, 2, 3]=>"baz"}
|
115
|
+
2.1.6 :009 > JSON.dump(obj)
|
116
|
+
=> "{\"str\":\"bar\",\"[1, 2, 3]\":\"baz\"}"
|
117
|
+
2.1.6 :010 > JSON.parse(JSON.dump(obj))
|
118
|
+
=> {"str"=>"bar", "[1, 2, 3]"=>"baz"}
|
119
|
+
2.1.6 :011 > JSON.parse(JSON.dump(obj)) == obj
|
120
|
+
=> false
|
121
|
+
```
|
122
|
+
|
123
|
+
### Considering YAML
|
124
|
+
|
125
|
+
YAML is strong where JSON is strong, and also strong in many places
|
126
|
+
where JSON is weak. In fact, YAML includes JSON as a subformat: JSON
|
127
|
+
strings *are* YAML strings!
|
128
|
+
|
129
|
+
Some of the advantages of YAML over JSON are:
|
130
|
+
|
131
|
+
- handles any directed graph, including DAGy bits and cycles
|
132
|
+
- arguably more human-readable than JSON
|
133
|
+
- YAML spec subsumes JSON spec: JSON files are YAML files
|
134
|
+
- supports non-string keys
|
135
|
+
- e.g. `{123=>'x'} == YAML.load(YAML.dump({123=>'x'}))`
|
136
|
+
- supports symbols
|
137
|
+
- e.g. `:foo == YAML.load(YAML.dump(:foo))`
|
138
|
+
- e.g. `{:foo=>'x'} == YAML.load(YAML.dump({:foo=>'x'}))`
|
139
|
+
- allows integer or string as top-level object
|
140
|
+
|
141
|
+
YAML overcomes the biggest limitation of JSON by supporting non-string
|
142
|
+
hash keys:
|
143
|
+
```
|
144
|
+
2.1.6 :008 > obj = {'str'=>'bar',[1,2,3]=>'baz'}
|
145
|
+
=> {"str"=>"bar", [1, 2, 3]=>"baz"}
|
146
|
+
2.1.6 :012 > YAML.dump(obj)
|
147
|
+
=> "---\nstr: bar\n? - 1\n - 2\n - 3\n: baz\n"
|
148
|
+
2.1.6 :013 > YAML.load(YAML.dump(obj))
|
149
|
+
=> {"str"=>"bar", [1, 2, 3]=>"baz"}
|
150
|
+
2.1.6 :014 > YAML.load(YAML.dump(obj)) == obj
|
151
|
+
=> true
|
152
|
+
```
|
153
|
+
|
154
|
+
Note: YAML's support for Symbols is due to Psych, not strictly the
|
155
|
+
YAML format itself. I've taken both `YAML.dump(:foo)` and
|
156
|
+
`YAML.dump(':foo')` into Python and done `yaml.load()` on them. Both
|
157
|
+
result in `':foo'`. So this nicety is not portable.
|
158
|
+
|
159
|
+
But YAML still has some shortcomings:
|
160
|
+
|
161
|
+
- floating point precision is implementation-dependent
|
162
|
+
- no native date or time handling
|
163
|
+
- unclear whether available parsers support stream processing
|
164
|
+
- DAGs and cycles of Arrays and Hash are handled, but Strings are not.
|
165
|
+
|
166
|
+
### Considering FRIZZY
|
167
|
+
|
168
|
+
FRIZZY is a home-grown serialization format which I ended up commiting
|
169
|
+
to for OAK.
|
170
|
+
|
171
|
+
The name FRIZZY means nothing, and survives only as the rogue `F`
|
172
|
+
character at the start of a serialized object:
|
173
|
+
|
174
|
+
```
|
175
|
+
.1.6 :006 > OAK.encode('Hello, World!',redundancy: :none,format: :none)
|
176
|
+
=> "oak_3NNN_0_20_F1SU13_Hello, World!_ok"
|
177
|
+
```
|
178
|
+
|
179
|
+
Advantages:
|
180
|
+
|
181
|
+
- Recongizes when Strings are identical, not just equivalent.
|
182
|
+
- It is much more compact than YAML.
|
183
|
+
- Has built-in folding of String and Symbol representation.
|
184
|
+
|
185
|
+
Disadvantages:
|
186
|
+
|
187
|
+
- Home grown.
|
188
|
+
- Very much not human readable.
|
189
|
+
- Floating point precision is incompletely specified.
|
190
|
+
- Current implementation crudely uses Number.to_s and String.to_f
|
191
|
+
|
192
|
+
I decided to reinvent the wheel and go with FRIZZY. We have
|
193
|
+
discovered Summaries which are DAGs on strings. It might be
|
194
|
+
acceptable to lose that information but I did not want to *prove* it
|
195
|
+
was acceptable to lose that information.
|
196
|
+
|
197
|
+
It may have been an ego-driven sin to go custom here, but I did not
|
198
|
+
want to pessimize future use cases on fidelity or control.
|
199
|
+
|
200
|
+
|
201
|
+
## Compression Choices
|
202
|
+
|
203
|
+
A survey of alternatives considered for the compression layer.
|
204
|
+
|
205
|
+
### Considering LZO, LZF, and LZ4.
|
206
|
+
|
207
|
+
These compression formats are similar in performance and algorithm.
|
208
|
+
All are in the Lempel-Ziv family of dictionary-based
|
209
|
+
redundancy-eaters. They will all be cheap to compress, cheap to
|
210
|
+
uncompress, but will delver only modest compression ratios.
|
211
|
+
|
212
|
+
This family of algorithms are unfamiliar to those accustomed to
|
213
|
+
archive formats, but they are used widely in low-latency applications
|
214
|
+
(such as server caches ;) ).
|
215
|
+
|
216
|
+
To keep things simple, I settled on supporting only LZ4 because its
|
217
|
+
gem, `lz4-ruby`, seems to have more mindshare and momentum. It is
|
218
|
+
weaker but faster than the other weak+fast options - which seems like
|
219
|
+
the way to be.
|
220
|
+
|
221
|
+
Based on previous experience, I expect this to be a clear win for use
|
222
|
+
in Redis caches vs being uncompressed.
|
223
|
+
|
224
|
+
### Considering ZLIB
|
225
|
+
|
226
|
+
Including ZLIB felt like a no-brainer. ZLIB is familiar,
|
227
|
+
widely-deployed, and standardized in RFC 1951. It uses the L-Z
|
228
|
+
process with an additional Huffman encoding phase. It will deliver
|
229
|
+
intermediate cost for intermediate compression.
|
230
|
+
|
231
|
+
Based on previous experience, I expect this option will usually be
|
232
|
+
dominated by either LZ4 for low-latency applications or BZIP2 for
|
233
|
+
archival applications, but I'm including it for comparisons and
|
234
|
+
because it would feel strage not to.
|
235
|
+
|
236
|
+
### Considering BZIP2
|
237
|
+
|
238
|
+
BZIP2 is an aggressive compression which uses the Burrows–Wheeler,
|
239
|
+
move-to-front, and run-length-encoding transforms with Huffman It will
|
240
|
+
be several times slower but several 10% stronger than ZLIB. I chose
|
241
|
+
the gem bzip2-ffi over the more flexible rbzip2 to make absolutely
|
242
|
+
certain that we use the native libbz2 implementation and do not
|
243
|
+
falling back silently to a Ruby version which is 100x slower if/when
|
244
|
+
Heroku does not offer FFI.
|
245
|
+
|
246
|
+
Based on previous experience, I expect this option will dominate where
|
247
|
+
data is generally cold or where storage is very expensive compared to
|
248
|
+
CPU.
|
249
|
+
|
250
|
+
### Considering LZMA
|
251
|
+
|
252
|
+
LZMA is the Lempel-Ziv-Markov chains algorithm. It will be an order
|
253
|
+
of magnitude more expensive to compress than BZIP2, but will
|
254
|
+
decompress slightly faster and will yield better compression ratios by
|
255
|
+
few 5%.
|
256
|
+
|
257
|
+
This will be useful only for cases where read-write ratios are over 10
|
258
|
+
and storage:cpu cost ratios are high. When read-write ratios are
|
259
|
+
close to unity, LZO will dominate where storage:cpu is low and BZIP2
|
260
|
+
will dominate where storage:cpu is high.
|
261
|
+
|
262
|
+
Nonetheless, I have a soft spot in my heart for this algorithm so I am
|
263
|
+
including it - if only so we can rule it out by demonstration rather
|
264
|
+
than hypothesis.
|
265
|
+
|
266
|
+
|
267
|
+
## Encryption Choices
|
268
|
+
|
269
|
+
Encryption is the first extension of OAK since it went live in
|
270
|
+
ProsperWorks's Redis layer on 2016-06-02 and in the S3 Correspondence
|
271
|
+
bodies since 2016-07-06. There had been only Rubocop updates and nary
|
272
|
+
a bugfix since 2016-07-01.
|
273
|
+
|
274
|
+
### Encryption-in-OAK Design Decisions (see arch doc for discussion):
|
275
|
+
|
276
|
+
- Encryption is the only change in OAK4.
|
277
|
+
- OAK4 will only support AES-256-GCM with random IVs chosen for
|
278
|
+
each encryption event.
|
279
|
+
- OAK4 will use no salt other than the random IV.
|
280
|
+
- Encrypted OAK strings will be nondeterministic.
|
281
|
+
- This crushes the desiderata of making OAK.encode a pure function.
|
282
|
+
- This is unavoidable to avoid a blatant security hole.
|
283
|
+
- OAK4 dramatically changes how headers are managed from OAK3.
|
284
|
+
- Encrypts all headers which are not required for decryption.
|
285
|
+
- Athenticates all headers and the encrypted stream.
|
286
|
+
- Key rotation is supported.
|
287
|
+
- Via an ENV-specified key chain.
|
288
|
+
- Can hold multiple master keys.
|
289
|
+
|
290
|
+
### Encryption-in-OAK Backward Compatibility
|
291
|
+
|
292
|
+
Before encryption was added, the format identifier for OAK strings
|
293
|
+
was `'oak_3'`.
|
294
|
+
|
295
|
+
To indicate we are making a non-backward compatible change, I am
|
296
|
+
bumping that up to `'oak_4'` for encrypted strings.
|
297
|
+
|
298
|
+
The legacy OAK3 are still supported both on read and on write.
|
299
|
+
|
300
|
+
By default, OAK4 is used only when encryption is requested.
|
301
|
+
|
302
|
+
### Encryption-in-OAK Regarding Compression vs Encryption
|
303
|
+
|
304
|
+
Note that compression of encrypted strings is next to useless. By
|
305
|
+
design, encryption algorithms obscure exploitable redundancy in
|
306
|
+
plaintext and produce incompressible ciphertext.
|
307
|
+
|
308
|
+
On the other hand, in the wild there have been a handful of successful
|
309
|
+
chosen-plaintext attacks on compress-then-encrypt encodings. See:
|
310
|
+
|
311
|
+
- https://blog.appcanary.com/2016/encrypt-or-compress.html
|
312
|
+
- https://en.wikipedia.org/wiki/CRIME
|
313
|
+
|
314
|
+
OAK4 supports compression and does compression-then-encryption.
|
315
|
+
|
316
|
+
The extremely paranoid are encouraged to use compression: :none. Note
|
317
|
+
however that the source data may be compressed. Furthermore, for
|
318
|
+
larger objects FRIZZY itself is, in part, a compression algorithm.
|