aez 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 4df307fcf2d926a7d97d8a67b446653314c41da2ba48daddabaa2db99a005f6c
4
+ data.tar.gz: c753537e76da64402c0ccdbdd92c85ce5fe0a81ca6fc767462f80360e2da38ce
5
+ SHA512:
6
+ metadata.gz: 15148d0a90cebfeb9db8169fbda4812374bf1f6e7a5f455225e4b84662e7d98f8f7f11e35ffa8534ba983a86109805af5738677a8ac21266ea7de9f723db8496
7
+ data.tar.gz: d854f7d44f95b9711bcbd111fe91237056bf1772441f984df938171b4106cf1681bb7e354754009cc791e2dc1ab58e60cd958490fd6ebb2380bb0be42a1e3932
@@ -0,0 +1,37 @@
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+ # This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
6
+ # For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
7
+
8
+ name: Ruby
9
+
10
+ on:
11
+ push:
12
+ branches: [ master ]
13
+ pull_request:
14
+ branches: [ master ]
15
+
16
+ jobs:
17
+ test:
18
+
19
+ runs-on: ubuntu-latest
20
+ strategy:
21
+ matrix:
22
+ ruby-version: ['2.6', '2.7', '3.0']
23
+
24
+ steps:
25
+ - uses: actions/checkout@v2
26
+ - name: Set up Ruby
27
+ # To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
28
+ # change this to (see https://github.com/ruby/setup-ruby#versioning):
29
+ # uses: ruby/setup-ruby@v1
30
+ uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
31
+ with:
32
+ ruby-version: ${{ matrix.ruby-version }}
33
+ bundler-cache: true # runs 'bundle install' and caches installed gems automatically
34
+ - name: Compile extension
35
+ run: bundle exec rake compile
36
+ - name: Run tests
37
+ run: bundle exec rake spec
data/.gitignore ADDED
@@ -0,0 +1,13 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /_yardoc/
4
+ /coverage/
5
+ /doc/
6
+ /pkg/
7
+ /spec/reports/
8
+ /tmp/
9
+
10
+ # rspec failure tracking
11
+ .rspec_status
12
+ Gemfile.lock
13
+ *.so
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.ruby-gemset ADDED
@@ -0,0 +1 @@
1
+ aez
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ ruby-3.0.0
@@ -0,0 +1,74 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ In the interest of fostering an open and welcoming environment, we as
6
+ contributors and maintainers pledge to making participation in our project and
7
+ our community a harassment-free experience for everyone, regardless of age, body
8
+ size, disability, ethnicity, gender identity and expression, level of experience,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ ## Our Standards
13
+
14
+ Examples of behavior that contributes to creating a positive environment
15
+ include:
16
+
17
+ * Using welcoming and inclusive language
18
+ * Being respectful of differing viewpoints and experiences
19
+ * Gracefully accepting constructive criticism
20
+ * Focusing on what is best for the community
21
+ * Showing empathy towards other community members
22
+
23
+ Examples of unacceptable behavior by participants include:
24
+
25
+ * The use of sexualized language or imagery and unwelcome sexual attention or
26
+ advances
27
+ * Trolling, insulting/derogatory comments, and personal or political attacks
28
+ * Public or private harassment
29
+ * Publishing others' private information, such as a physical or electronic
30
+ address, without explicit permission
31
+ * Other conduct which could reasonably be considered inappropriate in a
32
+ professional setting
33
+
34
+ ## Our Responsibilities
35
+
36
+ Project maintainers are responsible for clarifying the standards of acceptable
37
+ behavior and are expected to take appropriate and fair corrective action in
38
+ response to any instances of unacceptable behavior.
39
+
40
+ Project maintainers have the right and responsibility to remove, edit, or
41
+ reject comments, commits, code, wiki edits, issues, and other contributions
42
+ that are not aligned to this Code of Conduct, or to ban temporarily or
43
+ permanently any contributor for other behaviors that they deem inappropriate,
44
+ threatening, offensive, or harmful.
45
+
46
+ ## Scope
47
+
48
+ This Code of Conduct applies both within project spaces and in public spaces
49
+ when an individual is representing the project or its community. Examples of
50
+ representing a project or community include using an official project e-mail
51
+ address, posting via an official social media account, or acting as an appointed
52
+ representative at an online or offline event. Representation of a project may be
53
+ further defined and clarified by project maintainers.
54
+
55
+ ## Enforcement
56
+
57
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
58
+ reported by contacting the project team at azuchi@haw.co.jp. All
59
+ complaints will be reviewed and investigated and will result in a response that
60
+ is deemed necessary and appropriate to the circumstances. The project team is
61
+ obligated to maintain confidentiality with regard to the reporter of an incident.
62
+ Further details of specific enforcement policies may be posted separately.
63
+
64
+ Project maintainers who do not follow or enforce the Code of Conduct in good
65
+ faith may face temporary or permanent repercussions as determined by other
66
+ members of the project's leadership.
67
+
68
+ ## Attribution
69
+
70
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71
+ available at [http://contributor-covenant.org/version/1/4][version]
72
+
73
+ [homepage]: http://contributor-covenant.org
74
+ [version]: http://contributor-covenant.org/version/1/4/
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ git_source(:github) {|repo_name| "https://github.com/#{repo_name}" }
4
+
5
+ # Specify your gem's dependencies in aez.gemspec
6
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2021 Shigeyuki Azuchi.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # AEZ for Ruby [![Build Status](https://github.com/azuchi/aez/actions/workflows/ruby.yml/badge.svg?branch=master)](https://github.com/azuchi/aez/actions/workflows/ruby.yml) [![Gem Version](https://badge.fury.io/rb/aez.svg)](https://badge.fury.io/rb/aez) [![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE)
2
+
3
+ [AEZ](http://web.cs.ucdavis.edu/~rogaway/aez/) binding for ruby.
4
+ This library calls AEZv5 implementation in C using AES-NI hardware optimizations via FFI.
5
+
6
+ ## Requirements
7
+
8
+ There are the following limitations from Ted Krovetz's C implementation:
9
+
10
+ - Intel or ARM CPU supporting AES instructions
11
+ - Faster if all pointers are 16-byte aligned.
12
+ - Max 16 byte nonce, 16 byte authenticator
13
+ - Single AD (AEZ spec allows vector AD but this code doesn't)
14
+ - Max 2^32-1 byte buffers allowed (due to using unsigned int)
15
+
16
+ ## Installation
17
+
18
+ Add this line to your application's Gemfile:
19
+
20
+ ```ruby
21
+ gem 'aez'
22
+ ```
23
+
24
+ And then execute:
25
+
26
+ $ bundle
27
+
28
+ Or install it yourself as:
29
+
30
+ $ gem install aez
31
+
32
+ ## Usage
33
+
34
+ ```ruby
35
+ require 'aez'
36
+
37
+ key = ['9adf7a023fbc4e663695f627a8d5b5c45f6752e375d19e11a669e6b949347d0cf5e0e2516ee285af365224976afa60be'].pack('H*')
38
+ nonce = ['799de3d90fbd6fed93b5f96cf9f4e852'].pack('H*')
39
+ ad = ['d6e278e0c6ede09d302d6fde09de77711a9a02fc8a049fb34a5e3f00c1cfc336d0'].pack('H*')
40
+ message = ['efea7ecfa45f51b52ce038cf6c0704392c2211bfca17a36284f63a902b37f0ab'].pack('H*')
41
+ abyte = 16
42
+
43
+ # Encryption
44
+ cipher_tex = AEZ.encrypt(key, message, ad, nonce, abyte)
45
+
46
+ # Decryption
47
+ plain_text = AEZ.decrypt(key, message, ad, nonce, abyte)
48
+ ```
data/Rakefile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+ require 'rake/extensiontask'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task default: :spec
10
+
11
+ Rake::ExtensionTask.new 'aezv5' do |ext|
12
+ ext.lib_dir = 'lib/aez'
13
+ end
data/aez.gemspec ADDED
@@ -0,0 +1,33 @@
1
+
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'aez/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'aez'
8
+ spec.version = AEZ::VERSION
9
+ spec.authors = ['Shigeyuki Azuchi']
10
+ spec.email = ['azuchi@chaintope.com']
11
+
12
+ spec.summary = 'AEZ binding for ruby.'
13
+ spec.description = 'AEZ binding for ruby.'
14
+ spec.homepage = 'https://github.com/azuchi/aez'
15
+ spec.license = 'MIT'
16
+
17
+ # Specify which files should be added to the gem when it is released.
18
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
19
+ spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
20
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
21
+ end
22
+ spec.bindir = 'exe'
23
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
24
+ spec.require_paths = ['lib']
25
+ spec.extensions = ['ext/aezv5/extconf.rb']
26
+ spec.add_runtime_dependency 'ffi', '>= 1.15.1'
27
+
28
+ spec.add_development_dependency 'bundler'
29
+ spec.add_development_dependency 'rake', '>= 12.3.3'
30
+ spec.add_development_dependency 'rake-compiler', '>= 1.1.1'
31
+ spec.add_development_dependency 'rspec', '~> 3.0'
32
+
33
+ end
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "aez"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start(__FILE__)
data/bin/setup ADDED
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+ set -vx
5
+
6
+ bundle install
7
+
8
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,943 @@
1
+ /*
2
+ // AEZ v5 AES-NI version. AEZ info: http://www.cs.ucdavis.edu/~rogaway/aez
3
+ //
4
+ // REQUIREMENTS: - Intel or ARM CPU supporting AES instructions
5
+ // - Faster if all pointers are 16-byte aligned.
6
+ // - Max 16 byte nonce, 16 byte authenticator
7
+ // - Single AD (AEZ spec allows vector AD but this code doesn't)
8
+ // - Max 2^32-1 byte buffers allowed (due to using unsigned int)
9
+ //
10
+ // Written by Ted Krovetz (ted@krovetz.net). Last modified 21 March 2017.
11
+ //
12
+ // This is free and unencumbered software released into the public domain.
13
+ //
14
+ // Anyone is free to copy, modify, publish, use, compile, sell, or
15
+ // distribute this software, either in source code form or as a compiled
16
+ // binary, for any purpose, commercial or non-commercial, and by any
17
+ // means.
18
+ //
19
+ // In jurisdictions that recognize copyright laws, the author or authors
20
+ // of this software dedicate any and all copyright interest in the
21
+ // software to the public domain. We make this dedication for the benefit
22
+ // of the public at large and to the detriment of our heirs and
23
+ // successors. We intend this dedication to be an overt act of
24
+ // relinquishment in perpetuity of all present and future rights to this
25
+ // software under copyright law.
26
+ //
27
+ // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28
+ // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29
+ // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
30
+ // IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
31
+ // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
32
+ // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
33
+ // OTHER DEALINGS IN THE SOFTWARE.
34
+ //
35
+ // For more information, please refer to <http://unlicense.org/>
36
+ */
37
+
38
+ #include <stdint.h>
39
+ #include <stddef.h>
40
+
41
+ /* ------------------------------------------------------------------------- */
42
+ #if __AES__ /* Defined by gcc/clang when compiling for AES-NI */
43
+ /* ------------------------------------------------------------------------- */
44
+
45
+ #include <smmintrin.h>
46
+ #include <wmmintrin.h>
47
+ #define block __m128i
48
+
49
+ /* ------------------------------------------------------------------------- */
50
+
51
+ #define zero _mm_setzero_si128()
52
+ #define vadd(x,y) _mm_add_epi8(x,y)
53
+ #define vand(x,y) _mm_and_si128(x,y)
54
+ #define vandnot(x,y) _mm_andnot_si128(x,y) /* (~x)&y */
55
+ #define vor(x,y) _mm_or_si128(x,y)
56
+ #define vxor(x,y) _mm_xor_si128(x,y)
57
+
58
+ static int is_zero(block x) { return _mm_testz_si128(x,x); } /* 0 or 1 */
59
+
60
+ static block sll4(block x) {
61
+ return vor(_mm_srli_epi64(x, 4), _mm_slli_epi64(_mm_srli_si128(x, 8), 60));
62
+ }
63
+
64
+ static block srl4(block x) {
65
+ return vor(_mm_slli_epi64(x, 4), _mm_srli_epi64(_mm_slli_si128(x, 8), 60));
66
+ }
67
+
68
+ static __m128i bswap16(__m128i b) {
69
+ const __m128i t = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
70
+ return _mm_shuffle_epi8(b,t);
71
+ }
72
+
73
+ static __m128i double_block(__m128i bl) {
74
+ const __m128i mask = _mm_set_epi32(135,1,1,1);
75
+ __m128i tmp = _mm_srai_epi32(bl, 31);
76
+ tmp = _mm_and_si128(tmp, mask);
77
+ tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3));
78
+ bl = _mm_slli_epi32(bl, 1);
79
+ return _mm_xor_si128(bl,tmp);
80
+ }
81
+
82
+ static __m128i aes(__m128i *key, __m128i in, __m128i first_key) {
83
+ in = vxor(in, first_key);
84
+ in = _mm_aesenc_si128 (in,key[0]);
85
+ in = _mm_aesenc_si128 (in,key[2]);
86
+ in = _mm_aesenc_si128 (in,key[5]);
87
+ in = _mm_aesenc_si128 (in,key[0]);
88
+ in = _mm_aesenc_si128 (in,key[2]);
89
+ in = _mm_aesenc_si128 (in,key[5]);
90
+ in = _mm_aesenc_si128 (in,key[0]);
91
+ in = _mm_aesenc_si128 (in,key[2]);
92
+ in = _mm_aesenc_si128 (in,key[5]);
93
+ return _mm_aesenc_si128 (in,key[0]);
94
+ }
95
+
96
+ static __m128i aes4(__m128i in, __m128i a, __m128i b,
97
+ __m128i c, __m128i d, __m128i e) {
98
+ in = _mm_aesenc_si128(vxor(in,a),b);
99
+ in = _mm_aesenc_si128(in,c);
100
+ in = _mm_aesenc_si128(in,d);
101
+ return _mm_aesenc_si128 (in,e);
102
+ }
103
+
104
+ #define aes4pre(in,a,b,c,d) aes4(in,a,b,c,d,zero)
105
+
106
+ static __m128i loadu(const void *p) { return _mm_loadu_si128((__m128i*)p); }
107
+ static void storeu(const void *p, __m128i x) {_mm_storeu_si128((__m128i*)p,x);}
108
+
109
+ #define load loadu /* Intel with AES-NI has fast unaligned loads/stores */
110
+ #define store storeu
111
+
112
+ /* ------------------------------------------------------------------------- */
113
+ #elif __ARM_FEATURE_CRYPTO
114
+ /* ------------------------------------------------------------------------- */
115
+
116
+ #include <arm_neon.h>
117
+ #define block uint8x16_t
118
+
119
+ #define zero vmovq_n_u8(0)
120
+ #define vadd(x,y) vaddq_u8(x,y)
121
+ #define vand(x,y) vandq_u8(x,y)
122
+ #define vandnot(x,y) vbicq_u8(y,x) /* (~x)&y */
123
+ #define vor(x,y) vorrq_u8(x,y)
124
+ #define vxor(x,y) veorq_u8(x,y)
125
+
126
+ static int is_zero(block x) { /* 0 or 1 */
127
+ uint8x8_t t = vorr_u8(vget_high_u8(x), vget_low_u8(x));
128
+ return vget_lane_u64(vreinterpret_u64_u8(t),0) == 0;
129
+ }
130
+
131
+ static block srl4(block x) {
132
+ const block mask = {15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,0};
133
+ uint8x16_t tmp = vandq_u8(vshrq_n_u8(vextq_u8(x, x, 1),4),mask);
134
+ return veorq_u8(tmp,vshlq_n_u8(x,4));
135
+ }
136
+
137
+ static block sll4(block x) {
138
+ const block mask = {0,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15};
139
+ uint8x16_t tmp = vshlq_n_u8(vandq_u8(vextq_u8(x, x, 15),mask),4);
140
+ return veorq_u8(tmp,vshrq_n_u8(x,4));
141
+ }
142
+
143
+ static uint8x16_t bswap16(uint8x16_t b) { return b; } /* Not with uint8x16_t */
144
+
145
+ static block double_block(block b) {
146
+ const block mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
147
+ block tmp = (block)vshrq_n_s8((int8x16_t)b,7);
148
+ tmp = vandq_u8(tmp, mask);
149
+ tmp = vextq_u8(tmp, tmp, 1); /* Rotate high byte to low end */
150
+ b = vshlq_n_u8(b,1);
151
+ return veorq_u8(tmp,b);
152
+ }
153
+
154
+ static uint8x16_t aes(uint8x16_t *key, uint8x16_t in, uint8x16_t first_key) {
155
+ in = vaesmcq_u8(vaeseq_u8(in, first_key));
156
+ in = vaesmcq_u8(vaeseq_u8(in, key[0]));
157
+ in = vaesmcq_u8(vaeseq_u8(in, key[2]));
158
+ in = vaesmcq_u8(vaeseq_u8(in, key[5]));
159
+ in = vaesmcq_u8(vaeseq_u8(in, key[0]));
160
+ in = vaesmcq_u8(vaeseq_u8(in, key[2]));
161
+ in = vaesmcq_u8(vaeseq_u8(in, key[5]));
162
+ in = vaesmcq_u8(vaeseq_u8(in, key[0]));
163
+ in = vaesmcq_u8(vaeseq_u8(in, key[2]));
164
+ in = vaesmcq_u8(vaeseq_u8(in, key[5]));
165
+ return vxor(in, key[0]);
166
+ }
167
+
168
+ static uint8x16_t aes4pre(uint8x16_t in, uint8x16_t a, uint8x16_t b,
169
+ uint8x16_t c, uint8x16_t d) {
170
+ in = vaesmcq_u8(vaeseq_u8(in, a));
171
+ in = vaesmcq_u8(vaeseq_u8(in, b));
172
+ in = vaesmcq_u8(vaeseq_u8(in, c));
173
+ return vaesmcq_u8(vaeseq_u8(in, d));
174
+ }
175
+
176
+ #define aes4(in,a,b,c,d,e) vxor(aes4pre(in,a,b,c,d),e)
177
+
178
+ static uint8x16_t load(const void *p) { return *(uint8x16_t *)p; }
179
+ static void store(void *p, uint8x16_t x) { *(uint8x16_t *)p = x; }
180
+
181
+ #define loadu load /* ARMv8 allows unaligned loads/stores */
182
+ #define storeu store /* ARMv8 allows unaligned stores */
183
+
184
+ /* ------------------------------------------------------------------------- */
185
+ #else
186
+ #error - This implementation requires __AES__ or __ARM_FEATURE_CRYPTO
187
+ #endif
188
+ /* ------------------------------------------------------------------------- */
189
+
190
+ #define vxor3(x,y,z) vxor(vxor(x,y),z)
191
+ #define vxor4(w,x,y,z) vxor(vxor(w,x),vxor(y,z))
192
+ #define load_partial(p,n) loadu(p)
193
+
194
+ /*
195
+ Might need a version like this if, for example, we want to load a 12-byte nonce
196
+ into a 16-byte block.
197
+
198
+ static block load_partial(const void *p, unsigned n) {
199
+ if ((intptr_t)p % 16 == 0) return load(p);
200
+ else {
201
+ block tmp; unsigned i;
202
+ for (i=0; i<n; i++) ((char*)&tmp)[i] = ((char*)p)[i];
203
+ return tmp;
204
+ }
205
+ }
206
+ */
207
+
208
+ static const unsigned char pad[] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
209
+ 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
210
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
211
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
212
+ 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
213
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
214
+
215
+ static block zero_pad(block x, unsigned zero_bytes) {
216
+ return vand(x, loadu(pad + zero_bytes));
217
+ }
218
+
219
+ static block one_zero_pad(block x, unsigned one_zero_bytes) {
220
+ block *p = (block*)(pad + one_zero_bytes);
221
+ return vor(vand(x, loadu(p)), loadu(p+1));
222
+ }
223
+
224
+ static block zero_set_byte(char val, unsigned idx) {
225
+ block tmp = zero; ((char *)&tmp)[idx] = val; return tmp;
226
+ }
227
+
228
+ /* ------------------------------------------------------------------------- */
229
+
230
+ typedef struct { /* All data memory-correct except 2I register-correct */
231
+ block I[2]; /* 1I, 2I */
232
+ block J[3]; /* 1J,2J,4J */
233
+ block L[3]; /* 1L,2L,4L */
234
+ block delta3_cache;
235
+ } aez_ctx_t;
236
+
237
+ /* ------------------------------------------------------------------------- */
238
+
239
+ static int blake2b(void *out, size_t outlen,
240
+ const void *key, size_t keylen,
241
+ const void *in, size_t inlen);
242
+
243
+ /* ------------------------------------------------------------------------- */
244
+
245
+ void aez_setup(unsigned char *key, unsigned keylen, aez_ctx_t *ctx) {
246
+ block tmp;
247
+ if (keylen==48) {
248
+ ctx->I[0] = loadu(key);
249
+ ctx->J[0] = loadu(key+16);
250
+ ctx->L[0] = loadu(key+32);
251
+ } else {
252
+ blake2b(ctx, 48, 0, 0, key, keylen); /* Puts IJL into ctx */
253
+ ctx->L[0] = ctx->J[0]; /* Rearrange. */
254
+ ctx->J[0] = ctx->I[1]; /* Rearrange. */
255
+ }
256
+ /* Fill remaining ctx locations with doublings */
257
+ ctx->I[1] = double_block(bswap16(ctx->I[0])); /* No post-bswap */
258
+ ctx->J[1] = bswap16(tmp = double_block(bswap16(ctx->J[0])));
259
+ ctx->J[2] = bswap16(double_block(tmp));
260
+ ctx->L[1] = bswap16(tmp = double_block(bswap16(ctx->L[0])));
261
+ ctx->L[2] = bswap16(double_block(tmp));
262
+ ctx->delta3_cache = zero;
263
+ }
264
+
265
+ /* ------------------------------------------------------------------------- */
266
+
267
+ /* !! Warning !! Only handles nbytes <= 16 and abytes <= 16 */
268
+ static block aez_hash(aez_ctx_t *ctx, char *n, unsigned nbytes, char *ad,
269
+ unsigned adbytes, unsigned abytes) {
270
+ block o1, o2, o3, o4, o5, o6, o7, o8, sum, offset, tmp;
271
+ block I=ctx->I[0], Ifordoubling = ctx->I[1], I2 = bswap16(Ifordoubling);
272
+ block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
273
+ block J=ctx->J[0], J2 = ctx->J[1], J4 = ctx->J[2], J5 = vxor(J,J4);
274
+
275
+ /* Process abytes and nonce */
276
+ offset = vxor4(J, J2, I2, L);
277
+ tmp = zero_set_byte((char)(8*abytes),15);
278
+ sum = aes4pre(offset,tmp,J,I,L);
279
+
280
+ if (nbytes==16) sum = aes4(vxor(loadu(n), J4), vxor(I2, L),J,I,L,sum);
281
+ else sum = aes4(vxor(J4, I),
282
+ one_zero_pad(load_partial(n,nbytes),16-nbytes),J,I,L,sum);
283
+
284
+ if (ad) { /* Possible easy misuse: ad==null && adbytes==0 */
285
+ if (adbytes==0) {
286
+ ctx->delta3_cache = aes4pre(vxor(J5, I), loadu(pad+32),J,I,L);
287
+ } else {
288
+ block delta3 = zero;
289
+ offset = vxor(J5, I2);
290
+ while (adbytes >= 8*16) {
291
+ o1 = vxor(offset,L);
292
+ o2 = vxor(offset,L2);
293
+ o3 = vxor(o1,L2);
294
+ o4 = vxor(offset,L4);
295
+ o5 = vxor(o1,L4);
296
+ o6 = vxor(o2,L4);
297
+ o7 = vxor(o3,L4);
298
+ o8 = offset;
299
+ Ifordoubling = double_block(Ifordoubling);
300
+ offset = vxor(J5, bswap16(Ifordoubling));
301
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
302
+ delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
303
+ delta3 = vxor(delta3, aes4pre(load(ad+ 32), o3, J, I, L));
304
+ delta3 = vxor(delta3, aes4pre(load(ad+ 48), o4, J, I, L));
305
+ delta3 = vxor(delta3, aes4pre(load(ad+ 64), o5, J, I, L));
306
+ delta3 = vxor(delta3, aes4pre(load(ad+ 80), o6, J, I, L));
307
+ delta3 = vxor(delta3, aes4pre(load(ad+ 96), o7, J, I, L));
308
+ delta3 = vxor(delta3, aes4pre(load(ad+112), o8, J, I, L));
309
+ adbytes-=8*16; ad+=8*16;
310
+ }
311
+ if (adbytes >= 4*16) {
312
+ o1 = vxor(offset,L);
313
+ o2 = vxor(offset,L2);
314
+ o3 = vxor(o1,L2);
315
+ o4 = offset = vxor(offset,L4);
316
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
317
+ delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
318
+ delta3 = vxor(delta3, aes4pre(load(ad+ 32), o3, J, I, L));
319
+ delta3 = vxor(delta3, aes4pre(load(ad+ 48), o4, J, I, L));
320
+ adbytes-=4*16; ad+=4*16;
321
+ }
322
+ if (adbytes >= 2*16) {
323
+ o1 = vxor(offset,L);
324
+ o2 = offset = vxor(offset,L2);
325
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
326
+ delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
327
+ adbytes-=2*16; ad+=2*16;
328
+ }
329
+ if (adbytes >= 1*16) {
330
+ o1 = vxor(offset,L);
331
+ delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
332
+ adbytes-=1*16; ad+=1*16;
333
+ }
334
+ if (adbytes) {
335
+ tmp = vxor3(J5, I, one_zero_pad(load(ad),16-adbytes));
336
+ delta3 = aes4(vxor(J5, I), one_zero_pad(load(ad),16-adbytes),
337
+ J, I, L, delta3);
338
+ }
339
+ ctx->delta3_cache = delta3;
340
+ }
341
+ }
342
+ return vxor(sum,ctx->delta3_cache);
343
+ }
344
+
345
+ /* ------------------------------------------------------------------------- */
346
+
347
+ static block pass_one(aez_ctx_t *ctx, block *src, unsigned bytes, block *dst) {
348
+ block o1, o2, o3, o4, o5, o6, o7, o8, offset, tmp, sum=zero;
349
+ block I=ctx->I[0], Ifordoubling = ctx->I[1];
350
+ block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
351
+ block J=ctx->J[0];
352
+ offset = vxor(J, bswap16(Ifordoubling));
353
+ while (bytes >= 16*16) {
354
+ o1 = vxor(offset,L);
355
+ o2 = vxor(offset,L2);
356
+ o3 = vxor(o1,L2);
357
+ o4 = vxor(offset,L4);
358
+ o5 = vxor(o1,L4);
359
+ o6 = vxor(o2,L4);
360
+ o7 = vxor(o3,L4);
361
+ o8 = offset;
362
+ Ifordoubling = double_block(Ifordoubling);
363
+ offset = vxor(J,bswap16(Ifordoubling));
364
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
365
+ store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
366
+ store(dst+ 4, aes4(load(src + 5),o3, J, I, L, load(src+ 4)));
367
+ store(dst+ 6, aes4(load(src + 7),o4, J, I, L, load(src+ 6)));
368
+ store(dst+ 8, aes4(load(src + 9),o5, J, I, L, load(src+ 8)));
369
+ store(dst+10, aes4(load(src +11),o6, J, I, L, load(src+10)));
370
+ store(dst+12, aes4(load(src +13),o7, J, I, L, load(src+12)));
371
+ store(dst+14, aes4(load(src +15),o8, J, I, L, load(src+14)));
372
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));store(dst+ 1,tmp);
373
+ sum=vxor(sum,tmp);
374
+ tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
375
+ store(dst+ 3,tmp);sum=vxor(sum,tmp);
376
+ tmp=aes4(I,load(dst+ 4),J,I,L,load(src+ 5));
377
+ store(dst+ 5,tmp);sum=vxor(sum,tmp);
378
+ tmp=aes4(I,load(dst+ 6),J,I,L,load(src+ 7));
379
+ store(dst+ 7,tmp);sum=vxor(sum,tmp);
380
+ tmp=aes4(I,load(dst+ 8),J,I,L,load(src+ 9));
381
+ store(dst+ 9,tmp);sum=vxor(sum,tmp);
382
+ tmp=aes4(I,load(dst+10),J,I,L,load(src+11));
383
+ store(dst+11,tmp);sum=vxor(sum,tmp);
384
+ tmp=aes4(I,load(dst+12),J,I,L,load(src+13));
385
+ store(dst+13,tmp);sum=vxor(sum,tmp);
386
+ tmp=aes4(I,load(dst+14),J,I,L,load(src+15));
387
+ store(dst+15,tmp);sum=vxor(sum,tmp);
388
+ bytes -= 16*16; dst += 16; src += 16;
389
+ }
390
+ if (bytes >= 8*16) {
391
+ o1 = vxor(offset,L);
392
+ o2 = vxor(offset,L2);
393
+ o3 = vxor(o1,L2);
394
+ o4 = offset = vxor(offset,L4);
395
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
396
+ store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
397
+ store(dst+ 4, aes4(load(src + 5),o3, J, I, L, load(src+ 4)));
398
+ store(dst+ 6, aes4(load(src + 7),o4, J, I, L, load(src+ 6)));
399
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
400
+ store(dst+ 1,tmp);sum=vxor(sum,tmp);
401
+ tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
402
+ store(dst+ 3,tmp);sum=vxor(sum,tmp);
403
+ tmp=aes4(I,load(dst+ 4),J,I,L,load(src+ 5));
404
+ store(dst+ 5,tmp);sum=vxor(sum,tmp);
405
+ tmp=aes4(I,load(dst+ 6),J,I,L,load(src+ 7));
406
+ store(dst+ 7,tmp);sum=vxor(sum,tmp);
407
+ bytes -= 8*16; dst += 8; src += 8;
408
+ }
409
+ if (bytes >= 4*16) {
410
+ o1 = vxor(offset,L);
411
+ o2 = offset = vxor(offset,L2);
412
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
413
+ store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
414
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
415
+ store(dst+ 1,tmp);sum=vxor(sum,tmp);
416
+ tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
417
+ store(dst+ 3,tmp);sum=vxor(sum,tmp);
418
+ bytes -= 4*16; dst += 4; src += 4;
419
+ }
420
+ if (bytes) {
421
+ o1 = vxor(offset,L);
422
+ store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
423
+ tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
424
+ store(dst+ 1,tmp);sum=vxor(sum,tmp);
425
+ }
426
+ return sum;
427
+ }
428
+
429
+ /* ------------------------------------------------------------------------- */
430
+
431
+ static block pass_two(aez_ctx_t *ctx, block s, unsigned bytes, block *dst) {
432
+ block o1, o2, o3, o4, o5, o6, o7, o8, sum=zero, offset, fs[8], tmp[8];
433
+ block I=ctx->I[0], Ifordoubling = ctx->I[1];
434
+ block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
435
+ block J=ctx->J[0], J2=ctx->J[1], J3=vxor(J,J2);
436
+ offset = vxor(J2, bswap16(Ifordoubling));
437
+ while (bytes >= 16*16) {
438
+ o1 = vxor(offset,L);
439
+ o2 = vxor(offset,L2);
440
+ o3 = vxor(o1,L2);
441
+ o4 = vxor(offset,L4);
442
+ o5 = vxor(o1,L4);
443
+ o6 = vxor(o2,L4);
444
+ o7 = vxor(o3,L4);
445
+ o8 = offset;
446
+ Ifordoubling = double_block(Ifordoubling);
447
+ offset = vxor(J2, bswap16(Ifordoubling));
448
+ fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
449
+ fs[2] = aes4pre(s,o3,J,I,L); fs[3] = aes4pre(s,o4,J,I,L);
450
+ fs[4] = aes4pre(s,o5,J,I,L); fs[5] = aes4pre(s,o6,J,I,L);
451
+ fs[6] = aes4pre(s,o7,J,I,L); fs[7] = aes4pre(s,o8,J,I,L);
452
+ o1 = vxor(J3,o1); o2 = vxor(J3,o2);
453
+ o3 = vxor(J3,o3); o4 = vxor(J3,o4);
454
+ o5 = vxor(J3,o5); o6 = vxor(J3,o6);
455
+ o7 = vxor(J3,o7); o8 = vxor(J3,o8);
456
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
457
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
458
+ tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
459
+ store(dst+ 2,vxor(load(dst+ 3),fs[1]));
460
+ tmp[2] = vxor(load(dst+ 4),fs[2]); sum = vxor(sum,tmp[2]);
461
+ store(dst+ 4,vxor(load(dst+ 5),fs[2]));
462
+ tmp[3] = vxor(load(dst+ 6),fs[3]); sum = vxor(sum,tmp[3]);
463
+ store(dst+ 6,vxor(load(dst+ 7),fs[3]));
464
+ tmp[4] = vxor(load(dst+ 8),fs[4]); sum = vxor(sum,tmp[4]);
465
+ store(dst+ 8,vxor(load(dst+ 9),fs[4]));
466
+ tmp[5] = vxor(load(dst+10),fs[5]); sum = vxor(sum,tmp[5]);
467
+ store(dst+10,vxor(load(dst+11),fs[5]));
468
+ tmp[6] = vxor(load(dst+12),fs[6]); sum = vxor(sum,tmp[6]);
469
+ store(dst+12,vxor(load(dst+13),fs[6]));
470
+ tmp[7] = vxor(load(dst+14),fs[7]); sum = vxor(sum,tmp[7]);
471
+ store(dst+14,vxor(load(dst+15),fs[7]));
472
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
473
+ store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
474
+ store(dst+ 5, aes4(I,load(dst+ 4), J, I, L, tmp[2]));
475
+ store(dst+ 7, aes4(I,load(dst+ 6), J, I, L, tmp[3]));
476
+ store(dst+ 9, aes4(I,load(dst+ 8), J, I, L, tmp[4]));
477
+ store(dst+11, aes4(I,load(dst+10), J, I, L, tmp[5]));
478
+ store(dst+13, aes4(I,load(dst+12), J, I, L, tmp[6]));
479
+ store(dst+15, aes4(I,load(dst+14), J, I, L, tmp[7]));
480
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
481
+ store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
482
+ store(dst+ 4, aes4(load(dst+ 5),o3, J, I, L, load(dst+ 4)));
483
+ store(dst+ 6, aes4(load(dst+ 7),o4, J, I, L, load(dst+ 6)));
484
+ store(dst+ 8, aes4(load(dst+ 9),o5, J, I, L, load(dst+ 8)));
485
+ store(dst+10, aes4(load(dst+11),o6, J, I, L, load(dst+10)));
486
+ store(dst+12, aes4(load(dst+13),o7, J, I, L, load(dst+12)));
487
+ store(dst+14, aes4(load(dst+15),o8, J, I, L, load(dst+14)));
488
+ bytes -= 16*16; dst += 16;
489
+ }
490
+ if (bytes >= 8*16) {
491
+ o1 = vxor(offset,L);
492
+ o2 = vxor(offset,L2);
493
+ o3 = vxor(o1,L2);
494
+ o4 = offset = vxor(offset,L4);
495
+ fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
496
+ fs[2] = aes4pre(s,o3,J,I,L); fs[3] = aes4pre(s,o4,J,I,L);
497
+ o1 = vxor(J3,o1); o2 = vxor(J3,o2);
498
+ o3 = vxor(J3,o3); o4 = vxor(J3,o4);
499
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
500
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
501
+ tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
502
+ store(dst+ 2,vxor(load(dst+ 3),fs[1]));
503
+ tmp[2] = vxor(load(dst+ 4),fs[2]); sum = vxor(sum,tmp[2]);
504
+ store(dst+ 4,vxor(load(dst+ 5),fs[2]));
505
+ tmp[3] = vxor(load(dst+ 6),fs[3]); sum = vxor(sum,tmp[3]);
506
+ store(dst+ 6,vxor(load(dst+ 7),fs[3]));
507
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
508
+ store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
509
+ store(dst+ 5, aes4(I,load(dst+ 4), J, I, L, tmp[2]));
510
+ store(dst+ 7, aes4(I,load(dst+ 6), J, I, L, tmp[3]));
511
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
512
+ store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
513
+ store(dst+ 4, aes4(load(dst+ 5),o3, J, I, L, load(dst+ 4)));
514
+ store(dst+ 6, aes4(load(dst+ 7),o4, J, I, L, load(dst+ 6)));
515
+ bytes -= 8*16; dst += 8;
516
+ }
517
+ if (bytes >= 4*16) {
518
+ o1 = vxor(offset,L);
519
+ o2 = offset = vxor(offset,L2);
520
+ fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
521
+ o1 = vxor(J3,o1); o2 = vxor(J3,o2);
522
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
523
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
524
+ tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
525
+ store(dst+ 2,vxor(load(dst+ 3),fs[1]));
526
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
527
+ store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
528
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
529
+ store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
530
+ bytes -= 4*16; dst += 4;
531
+ }
532
+ if (bytes) {
533
+ o1 = vxor(offset,L);
534
+ fs[0] = aes4pre(s,o1,J,I,L);
535
+ o1 = vxor(J3,o1);
536
+ tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
537
+ store(dst+ 0,vxor(load(dst+ 1),fs[0]));
538
+ store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
539
+ store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
540
+ }
541
+ return sum;
542
+ }
543
+
544
+ /* ------------------------------------------------------------------------- */
545
+
546
+ static int cipher_aez_core(aez_ctx_t *ctx, block t, int d, char *src,
547
+ unsigned bytes, unsigned abytes, char *dst) {
548
+ block s, x, y, frag0, frag1, final0, final1;
549
+ block I=ctx->I[0], L=ctx->L[0], J=ctx->J[0];
550
+ block L4=ctx->L[2], I2 = bswap16(ctx->I[1]);
551
+ unsigned i, frag_bytes, initial_bytes;
552
+
553
+ if (!d) bytes += abytes;
554
+ frag_bytes = bytes % 32;
555
+ initial_bytes = bytes - frag_bytes - 32;
556
+
557
+ /* Compute x and store intermediate results */
558
+ x = pass_one(ctx, (block*)src, initial_bytes, (block*)dst);
559
+ if (frag_bytes >= 16) {
560
+ frag0 = load(src + initial_bytes);
561
+ frag1 = one_zero_pad(load(src + initial_bytes + 16), 32-frag_bytes);
562
+ x = aes4(frag0, vxor(L4, I2), J, I, L, x);
563
+ x = vxor(x, aes4pre(frag1, vxor3(I2, L4, L), J, I, L));
564
+ } else if (frag_bytes) {
565
+ frag0 = one_zero_pad(load(src + initial_bytes), 16-frag_bytes);
566
+ x = aes4(frag0, vxor(L4, I2), J, I, L, x);
567
+ }
568
+
569
+ /* Calculate s and final block values (y xor'd to final1 later) */
570
+ final0 = vxor3(loadu(src + (bytes - 32)), x, t);
571
+ if (d || !abytes) final1 = loadu(src+(bytes-32)+16);
572
+ else final1 = zero_pad(loadu(src+(bytes-32)+16), abytes);
573
+ final0 = aes4(final1, vxor(I2, ctx->L[d]), J, I, L, final0);
574
+ final1 = vxor(final1, aes((block*)ctx, final0, ctx->L[d]));
575
+ s = vxor(final0, final1);
576
+ final0 = vxor(final0, aes((block*)ctx, final1, ctx->L[d^1]));
577
+ /* Decryption: final0 should hold abytes zero bytes. If not, failure */
578
+ if (d && !is_zero(vandnot(loadu(pad+abytes),final0))) return -1;
579
+ final1 = aes4(final0, vxor(I2, ctx->L[d^1]), J, I, L, final1);
580
+
581
+ /* Compute y and store final results */
582
+ y = pass_two(ctx, s, initial_bytes, (block*)dst);
583
+ if (frag_bytes >= 16) {
584
+ frag0 = vxor(frag0, aes((block*)ctx, s, L4));
585
+ frag1 = vxor(frag1, aes((block*)ctx, s, vxor(L4, L)));
586
+ frag1 = one_zero_pad(frag1, 32-frag_bytes);
587
+ y = aes4(frag0, vxor(I2, L4), J, I, L, y);
588
+ y = vxor(y, aes4pre(frag1, vxor3(I2, L4, L), J, I, L));
589
+ store(dst + initial_bytes, frag0);
590
+ store(dst + initial_bytes + 16, frag1);
591
+ } else if (frag_bytes) {
592
+ frag0 = vxor(frag0, aes((block*)ctx, s, L4));
593
+ frag0 = one_zero_pad(frag0, 16-frag_bytes);
594
+ y = aes4(frag0, vxor(I2, L4), J, I, L, y);
595
+ store(dst + initial_bytes, frag0);
596
+ }
597
+
598
+ storeu(dst + (bytes - 32), vxor3(final1, y, t));
599
+ if (!d || !abytes)
600
+ storeu(dst + (bytes - 32) + 16, final0);
601
+ else {
602
+ for (i=0; i<16-abytes; i++)
603
+ ((char*)dst + (bytes - 16))[i] = ((char*)&final0)[i];
604
+ }
605
+ return 0;
606
+ }
607
+
608
+ /* ------------------------------------------------------------------------- */
609
+
610
+ static int cipher_aez_tiny(aez_ctx_t *ctx, block t, int d, char *src,
611
+ unsigned bytes, unsigned abytes, char *dst) {
612
+ block l, r, tmp, one, rcon, buf[2], mask_10, mask_ff;
613
+ block I=ctx->I[0], L=ctx->L[0], J=ctx->J[0], t_orig = t;
614
+ block L2=ctx->L[1], L4=ctx->L[2], I2 = bswap16(ctx->I[1]);
615
+ unsigned rnds, i;
616
+
617
+ /* load src into buf, zero pad, update bytes for abytes */
618
+ if (bytes >= 16) {
619
+ buf[0] = load(src);
620
+ buf[1] = zero_pad(load_partial(src+16,bytes-16),32-bytes);
621
+ } else {
622
+ buf[0] = zero_pad(load_partial(src,bytes),16-bytes);
623
+ buf[1] = zero;
624
+ }
625
+ if (!d) bytes += abytes;
626
+
627
+ /* load l/r, create 10* padding masks, shift r 4 bits if odd length */
628
+ l = buf[0];
629
+ r = loadu((char*)buf+bytes/2);
630
+ mask_ff = loadu(pad+16-bytes/2);
631
+ mask_10 = loadu(pad+32-bytes/2);
632
+ if (bytes&1) { /* Odd length. Deal with nibbles. */
633
+ mask_10 = sll4(mask_10);
634
+ ((char*)&mask_ff)[bytes/2] = (char)0xf0;
635
+ r = bswap16(r);
636
+ r = srl4(r);
637
+ r = bswap16(r);
638
+ }
639
+ r = vor(vand(r, mask_ff), mask_10);
640
+
641
+ /* Add tweak offset into t, and determine the number of rounds */
642
+ if (bytes >= 16) {
643
+ t = vxor4(t, I2, L2, L4); /* (0,6) offset */
644
+ rnds = 8;
645
+ } else {
646
+ t = vxor(vxor4(t, I2, L2, L4), L); /* (0,7) offset */
647
+ if (bytes>=3) rnds = 10; else if (bytes==2) rnds = 16; else rnds = 24;
648
+ }
649
+
650
+ if (!d) {
651
+ one = zero_set_byte(1,15);
652
+ rcon = zero;
653
+ } else {
654
+ one = zero_set_byte(-1,15);
655
+ rcon = zero_set_byte((char)(rnds-1),15);
656
+ }
657
+
658
+ if ((d) && (bytes < 16)) {
659
+ block offset = vxor3(I2, L, L2);
660
+ tmp = vor(l, loadu(pad+32));
661
+ tmp = aes4pre(t_orig, vxor(tmp,offset), J, I, L);
662
+ tmp = vand(tmp, loadu(pad+32));
663
+ l = vxor(l, tmp);
664
+ }
665
+
666
+ /* Feistel */
667
+ for (i=0; i<rnds; i+=2) {
668
+ l = vor(vand(aes4(t,vxor(r,rcon), J, I, L, l), mask_ff), mask_10);
669
+ rcon = vadd(rcon,one);
670
+ r = vor(vand(aes4(t,vxor(l,rcon), J, I, L, r), mask_ff), mask_10);
671
+ rcon = vadd(rcon,one);
672
+ }
673
+ buf[0] = r;
674
+ if (bytes&1) {
675
+ l = bswap16(l);
676
+ l = sll4(l);
677
+ l = bswap16(l);
678
+ r = vand(loadu((char*)buf+bytes/2), zero_set_byte((char)0xf0,0));
679
+ l = vor(l, r);
680
+ }
681
+ storeu((char*)buf+bytes/2, l);
682
+ if (d) {
683
+ bytes -= abytes;
684
+ if (abytes==16) tmp = loadu((char*)buf+bytes);
685
+ else {
686
+ tmp = zero;
687
+ for (i=0; i<abytes; i++) ((char*)&tmp)[i] = ((char*)buf+bytes)[i];
688
+ }
689
+ if (!is_zero(tmp)) return -1;
690
+ } else if (bytes < 16) {
691
+ block offset = vxor3(I2, L, L2);
692
+ tmp = vor(zero_pad(buf[0], 16-bytes), loadu(pad+32));
693
+ tmp = aes4pre(t_orig,vxor(tmp,offset), J, I, L);
694
+ buf[0] = vxor(buf[0], vand(tmp, loadu(pad+32)));
695
+ }
696
+ for (i=0; i<bytes; i++) dst[i] = ((char*)buf)[i];
697
+ return 0;
698
+ }
699
+
700
+ /* ------------------------------------------------------------------------- */
701
+
702
+ void aez_encrypt(aez_ctx_t *ctx, char *n, unsigned nbytes,
703
+ char *ad, unsigned adbytes, unsigned abytes,
704
+ char *src, unsigned bytes, char *dst) {
705
+
706
+ block t = aez_hash(ctx, n, nbytes, ad, adbytes, abytes);
707
+ if (bytes==0) {
708
+ unsigned i;
709
+ t = aes((block*)ctx, t, vxor(ctx->L[0], ctx->L[1]));
710
+ for (i=0; i<abytes; i++) dst[i] = ((char*)&t)[i];
711
+ } else if (bytes+abytes < 32)
712
+ cipher_aez_tiny(ctx, t, 0, src, bytes, abytes, dst);
713
+ else
714
+ cipher_aez_core(ctx, t, 0, src, bytes, abytes, dst);
715
+ }
716
+
717
+ /* ------------------------------------------------------------------------- */
718
+
719
+ int aez_decrypt(aez_ctx_t *ctx, char *n, unsigned nbytes,
720
+ char *ad, unsigned adbytes, unsigned abytes,
721
+ char *src, unsigned bytes, char *dst) {
722
+
723
+ block t;
724
+ if (bytes < abytes) return -1;
725
+ t = aez_hash(ctx, n, nbytes, ad, adbytes, abytes);
726
+ if (bytes==abytes) {
727
+ block claimed = zero_pad(load_partial(src,abytes), 16-abytes);
728
+ t = zero_pad(aes((block*)ctx, t, vxor(ctx->L[0], ctx->L[1])), 16-abytes);
729
+ return is_zero(vandnot(t, claimed)) - 1; /* is_zero return 0 or 1 */
730
+ } else if (bytes < 32) {
731
+ return cipher_aez_tiny(ctx, t, 1, src, bytes, abytes, dst);
732
+ } else {
733
+ return cipher_aez_core(ctx, t, 1, src, bytes, abytes, dst);
734
+ }
735
+ }
736
+
737
+ /* ------------------------------------------------------------------------- */
738
+ /* Reference Blake2b code, here for convenience, and not for speed. */
739
+ /* Dowloaded Sep 2015 from https://github.com/mjosaarinen/blake2_mjosref */
740
+
741
+ #include <stdint.h>
742
+
743
+ typedef struct {
744
+ uint8_t b[128];
745
+ uint64_t h[8];
746
+ uint64_t t[2];
747
+ size_t c;
748
+ size_t outlen;
749
+ } blake2b_ctx;
750
+
751
+ #ifndef ROTR64
752
+ #define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
753
+ #endif
754
+
755
+ #define B2B_GET64(p) \
756
+ (((uint64_t) ((uint8_t *) (p))[0]) ^ \
757
+ (((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
758
+ (((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
759
+ (((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
760
+ (((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
761
+ (((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
762
+ (((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
763
+ (((uint64_t) ((uint8_t *) (p))[7]) << 56))
764
+
765
+ #define B2B_G(a, b, c, d, x, y) { \
766
+ v[a] = v[a] + v[b] + x; \
767
+ v[d] = ROTR64(v[d] ^ v[a], 32); \
768
+ v[c] = v[c] + v[d]; \
769
+ v[b] = ROTR64(v[b] ^ v[c], 24); \
770
+ v[a] = v[a] + v[b] + y; \
771
+ v[d] = ROTR64(v[d] ^ v[a], 16); \
772
+ v[c] = v[c] + v[d]; \
773
+ v[b] = ROTR64(v[b] ^ v[c], 63); }
774
+
775
+ static const uint64_t blake2b_iv[8] = {
776
+ 0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
777
+ 0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
778
+ 0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
779
+ 0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
780
+ };
781
+
782
+ static void blake2b_compress(blake2b_ctx *ctx, int last)
783
+ {
784
+ const uint8_t sigma[12][16] = {
785
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
786
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
787
+ { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
788
+ { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
789
+ { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
790
+ { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
791
+ { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
792
+ { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
793
+ { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
794
+ { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
795
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
796
+ { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
797
+ };
798
+ int i;
799
+ uint64_t v[16], m[16];
800
+
801
+ for (i = 0; i < 8; i++) {
802
+ v[i] = ctx->h[i];
803
+ v[i + 8] = blake2b_iv[i];
804
+ }
805
+
806
+ v[12] ^= ctx->t[0];
807
+ v[13] ^= ctx->t[1];
808
+ if (last)
809
+ v[14] = ~v[14];
810
+
811
+ for (i = 0; i < 16; i++)
812
+ m[i] = B2B_GET64(&ctx->b[8 * i]);
813
+
814
+ for (i = 0; i < 12; i++) {
815
+ B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
816
+ B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
817
+ B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
818
+ B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
819
+ B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
820
+ B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
821
+ B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
822
+ B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
823
+ }
824
+
825
+ for( i = 0; i < 8; ++i )
826
+ ctx->h[i] ^= v[i] ^ v[i + 8];
827
+ }
828
+
829
+ static void blake2b_update(blake2b_ctx *ctx,
830
+ const void *in, size_t inlen)
831
+ {
832
+ size_t i;
833
+
834
+ for (i = 0; i < inlen; i++) {
835
+ if (ctx->c == 128) {
836
+ ctx->t[0] += ctx->c;
837
+ if (ctx->t[0] < ctx->c)
838
+ ctx->t[1]++;
839
+ blake2b_compress(ctx, 0);
840
+ ctx->c = 0;
841
+ }
842
+ ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
843
+ }
844
+ }
845
+
846
+ static void blake2b_final(blake2b_ctx *ctx, void *out)
847
+ {
848
+ size_t i;
849
+
850
+ ctx->t[0] += ctx->c;
851
+ if (ctx->t[0] < ctx->c)
852
+ ctx->t[1]++;
853
+
854
+ while (ctx->c < 128)
855
+ ctx->b[ctx->c++] = 0;
856
+ blake2b_compress(ctx, 1);
857
+
858
+ for (i = 0; i < ctx->outlen; i++) {
859
+ ((uint8_t *) out)[i] =
860
+ (ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
861
+ }
862
+ }
863
+
864
+ static int blake2b_init(blake2b_ctx *ctx, size_t outlen,
865
+ const void *key, size_t keylen)
866
+ {
867
+ size_t i;
868
+
869
+ if (outlen == 0 || outlen > 64 || keylen > 64)
870
+ return -1;
871
+
872
+ for (i = 0; i < 8; i++)
873
+ ctx->h[i] = blake2b_iv[i];
874
+ ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
875
+
876
+ ctx->t[0] = 0;
877
+ ctx->t[1] = 0;
878
+ ctx->c = 0;
879
+ ctx->outlen = outlen;
880
+
881
+ for (i = keylen; i < 128; i++)
882
+ ctx->b[i] = 0;
883
+ if (keylen > 0) {
884
+ blake2b_update(ctx, key, keylen);
885
+ ctx->c = 128;
886
+ }
887
+
888
+ return 0;
889
+ }
890
+
891
+ static int blake2b(void *out, size_t outlen,
892
+ const void *key, size_t keylen,
893
+ const void *in, size_t inlen)
894
+ {
895
+ blake2b_ctx ctx;
896
+
897
+ if (blake2b_init(&ctx, outlen, key, keylen))
898
+ return -1;
899
+ blake2b_update(&ctx, in, inlen);
900
+ blake2b_final(&ctx, out);
901
+
902
+ return 0;
903
+ }
904
+
905
+ /* ------------------------------------------------------------------------- */
906
+ /* aez mapping for CAESAR competition */
907
+
908
+ int crypto_aead_encrypt(
909
+ unsigned char *c,unsigned long long *clen,
910
+ const unsigned char *m,unsigned long long mlen,
911
+ const unsigned char *ad,unsigned long long adlen,
912
+ const unsigned char *nsec,
913
+ const unsigned char *npub,
914
+ const unsigned char *k
915
+ )
916
+ {
917
+ aez_ctx_t ctx;
918
+ (void)nsec;
919
+ if (clen) *clen = mlen+16;
920
+ aez_setup((unsigned char *)k, 48, &ctx);
921
+ aez_encrypt(&ctx, (char *)npub, 12,
922
+ (char *)ad, (unsigned)adlen, 16,
923
+ (char *)m, (unsigned)mlen, (char *)c);
924
+ return 0;
925
+ }
926
+
927
+ int crypto_aead_decrypt(
928
+ unsigned char *m,unsigned long long *mlen,
929
+ unsigned char *nsec,
930
+ const unsigned char *c,unsigned long long clen,
931
+ const unsigned char *ad,unsigned long long adlen,
932
+ const unsigned char *npub,
933
+ const unsigned char *k
934
+ )
935
+ {
936
+ aez_ctx_t ctx;
937
+ (void)nsec;
938
+ if (mlen) *mlen = clen-16;
939
+ aez_setup((unsigned char *)k, 48, &ctx);
940
+ return aez_decrypt(&ctx, (char *)npub, 12,
941
+ (char *)ad, (unsigned)adlen, 16,
942
+ (char *)c, (unsigned)clen, (char *)m);
943
+ }