aez 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/ruby.yml +37 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +48 -0
- data/Rakefile +13 -0
- data/aez.gemspec +33 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/aezv5/encrypt.c +943 -0
- data/ext/aezv5/extconf.rb +8 -0
- data/lib/aez.rb +76 -0
- data/lib/aez/version.rb +3 -0
- metadata +131 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 4df307fcf2d926a7d97d8a67b446653314c41da2ba48daddabaa2db99a005f6c
|
4
|
+
data.tar.gz: c753537e76da64402c0ccdbdd92c85ce5fe0a81ca6fc767462f80360e2da38ce
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 15148d0a90cebfeb9db8169fbda4812374bf1f6e7a5f455225e4b84662e7d98f8f7f11e35ffa8534ba983a86109805af5738677a8ac21266ea7de9f723db8496
|
7
|
+
data.tar.gz: d854f7d44f95b9711bcbd111fe91237056bf1772441f984df938171b4106cf1681bb7e354754009cc791e2dc1ab58e60cd958490fd6ebb2380bb0be42a1e3932
|
@@ -0,0 +1,37 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ master ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ master ]
|
15
|
+
|
16
|
+
jobs:
|
17
|
+
test:
|
18
|
+
|
19
|
+
runs-on: ubuntu-latest
|
20
|
+
strategy:
|
21
|
+
matrix:
|
22
|
+
ruby-version: ['2.6', '2.7', '3.0']
|
23
|
+
|
24
|
+
steps:
|
25
|
+
- uses: actions/checkout@v2
|
26
|
+
- name: Set up Ruby
|
27
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
28
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
29
|
+
# uses: ruby/setup-ruby@v1
|
30
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
31
|
+
with:
|
32
|
+
ruby-version: ${{ matrix.ruby-version }}
|
33
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
34
|
+
- name: Compile extension
|
35
|
+
run: bundle exec rake compile
|
36
|
+
- name: Run tests
|
37
|
+
run: bundle exec rake spec
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
aez
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-3.0.0
|
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
In the interest of fostering an open and welcoming environment, we as
|
6
|
+
contributors and maintainers pledge to making participation in our project and
|
7
|
+
our community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, disability, ethnicity, gender identity and expression, level of experience,
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
10
|
+
orientation.
|
11
|
+
|
12
|
+
## Our Standards
|
13
|
+
|
14
|
+
Examples of behavior that contributes to creating a positive environment
|
15
|
+
include:
|
16
|
+
|
17
|
+
* Using welcoming and inclusive language
|
18
|
+
* Being respectful of differing viewpoints and experiences
|
19
|
+
* Gracefully accepting constructive criticism
|
20
|
+
* Focusing on what is best for the community
|
21
|
+
* Showing empathy towards other community members
|
22
|
+
|
23
|
+
Examples of unacceptable behavior by participants include:
|
24
|
+
|
25
|
+
* The use of sexualized language or imagery and unwelcome sexual attention or
|
26
|
+
advances
|
27
|
+
* Trolling, insulting/derogatory comments, and personal or political attacks
|
28
|
+
* Public or private harassment
|
29
|
+
* Publishing others' private information, such as a physical or electronic
|
30
|
+
address, without explicit permission
|
31
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
32
|
+
professional setting
|
33
|
+
|
34
|
+
## Our Responsibilities
|
35
|
+
|
36
|
+
Project maintainers are responsible for clarifying the standards of acceptable
|
37
|
+
behavior and are expected to take appropriate and fair corrective action in
|
38
|
+
response to any instances of unacceptable behavior.
|
39
|
+
|
40
|
+
Project maintainers have the right and responsibility to remove, edit, or
|
41
|
+
reject comments, commits, code, wiki edits, issues, and other contributions
|
42
|
+
that are not aligned to this Code of Conduct, or to ban temporarily or
|
43
|
+
permanently any contributor for other behaviors that they deem inappropriate,
|
44
|
+
threatening, offensive, or harmful.
|
45
|
+
|
46
|
+
## Scope
|
47
|
+
|
48
|
+
This Code of Conduct applies both within project spaces and in public spaces
|
49
|
+
when an individual is representing the project or its community. Examples of
|
50
|
+
representing a project or community include using an official project e-mail
|
51
|
+
address, posting via an official social media account, or acting as an appointed
|
52
|
+
representative at an online or offline event. Representation of a project may be
|
53
|
+
further defined and clarified by project maintainers.
|
54
|
+
|
55
|
+
## Enforcement
|
56
|
+
|
57
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
58
|
+
reported by contacting the project team at azuchi@haw.co.jp. All
|
59
|
+
complaints will be reviewed and investigated and will result in a response that
|
60
|
+
is deemed necessary and appropriate to the circumstances. The project team is
|
61
|
+
obligated to maintain confidentiality with regard to the reporter of an incident.
|
62
|
+
Further details of specific enforcement policies may be posted separately.
|
63
|
+
|
64
|
+
Project maintainers who do not follow or enforce the Code of Conduct in good
|
65
|
+
faith may face temporary or permanent repercussions as determined by other
|
66
|
+
members of the project's leadership.
|
67
|
+
|
68
|
+
## Attribution
|
69
|
+
|
70
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
71
|
+
available at [http://contributor-covenant.org/version/1/4][version]
|
72
|
+
|
73
|
+
[homepage]: http://contributor-covenant.org
|
74
|
+
[version]: http://contributor-covenant.org/version/1/4/
|
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2021 Shigeyuki Azuchi.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# AEZ for Ruby [![Build Status](https://github.com/azuchi/aez/actions/workflows/ruby.yml/badge.svg?branch=master)](https://github.com/azuchi/aez/actions/workflows/ruby.yml) [![Gem Version](https://badge.fury.io/rb/aez.svg)](https://badge.fury.io/rb/aez) [![MIT License](http://img.shields.io/badge/license-MIT-blue.svg?style=flat)](LICENSE)
|
2
|
+
|
3
|
+
[AEZ](http://web.cs.ucdavis.edu/~rogaway/aez/) binding for ruby.
|
4
|
+
This library calls AEZv5 implementation in C using AES-NI hardware optimizations via FFI.
|
5
|
+
|
6
|
+
## Requirements
|
7
|
+
|
8
|
+
There are the following limitations from Ted Krovetz's C implementation:
|
9
|
+
|
10
|
+
- Intel or ARM CPU supporting AES instructions
|
11
|
+
- Faster if all pointers are 16-byte aligned.
|
12
|
+
- Max 16 byte nonce, 16 byte authenticator
|
13
|
+
- Single AD (AEZ spec allows vector AD but this code doesn't)
|
14
|
+
- Max 2^32-1 byte buffers allowed (due to using unsigned int)
|
15
|
+
|
16
|
+
## Installation
|
17
|
+
|
18
|
+
Add this line to your application's Gemfile:
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
gem 'aez'
|
22
|
+
```
|
23
|
+
|
24
|
+
And then execute:
|
25
|
+
|
26
|
+
$ bundle
|
27
|
+
|
28
|
+
Or install it yourself as:
|
29
|
+
|
30
|
+
$ gem install aez
|
31
|
+
|
32
|
+
## Usage
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
require 'aez'
|
36
|
+
|
37
|
+
key = ['9adf7a023fbc4e663695f627a8d5b5c45f6752e375d19e11a669e6b949347d0cf5e0e2516ee285af365224976afa60be'].pack('H*')
|
38
|
+
nonce = ['799de3d90fbd6fed93b5f96cf9f4e852'].pack('H*')
|
39
|
+
ad = ['d6e278e0c6ede09d302d6fde09de77711a9a02fc8a049fb34a5e3f00c1cfc336d0'].pack('H*')
|
40
|
+
message = ['efea7ecfa45f51b52ce038cf6c0704392c2211bfca17a36284f63a902b37f0ab'].pack('H*')
|
41
|
+
abyte = 16
|
42
|
+
|
43
|
+
# Encryption
|
44
|
+
cipher_tex = AEZ.encrypt(key, message, ad, nonce, abyte)
|
45
|
+
|
46
|
+
# Decryption
|
47
|
+
plain_text = AEZ.decrypt(key, message, ad, nonce, abyte)
|
48
|
+
```
|
data/Rakefile
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'bundler/gem_tasks'
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
require 'rake/extensiontask'
|
6
|
+
|
7
|
+
RSpec::Core::RakeTask.new(:spec)
|
8
|
+
|
9
|
+
task default: :spec
|
10
|
+
|
11
|
+
Rake::ExtensionTask.new 'aezv5' do |ext|
|
12
|
+
ext.lib_dir = 'lib/aez'
|
13
|
+
end
|
data/aez.gemspec
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'aez/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = 'aez'
|
8
|
+
spec.version = AEZ::VERSION
|
9
|
+
spec.authors = ['Shigeyuki Azuchi']
|
10
|
+
spec.email = ['azuchi@chaintope.com']
|
11
|
+
|
12
|
+
spec.summary = 'AEZ binding for ruby.'
|
13
|
+
spec.description = 'AEZ binding for ruby.'
|
14
|
+
spec.homepage = 'https://github.com/azuchi/aez'
|
15
|
+
spec.license = 'MIT'
|
16
|
+
|
17
|
+
# Specify which files should be added to the gem when it is released.
|
18
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
19
|
+
spec.files = Dir.chdir(File.expand_path('..', __FILE__)) do
|
20
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
21
|
+
end
|
22
|
+
spec.bindir = 'exe'
|
23
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
24
|
+
spec.require_paths = ['lib']
|
25
|
+
spec.extensions = ['ext/aezv5/extconf.rb']
|
26
|
+
spec.add_runtime_dependency 'ffi', '>= 1.15.1'
|
27
|
+
|
28
|
+
spec.add_development_dependency 'bundler'
|
29
|
+
spec.add_development_dependency 'rake', '>= 12.3.3'
|
30
|
+
spec.add_development_dependency 'rake-compiler', '>= 1.1.1'
|
31
|
+
spec.add_development_dependency 'rspec', '~> 3.0'
|
32
|
+
|
33
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "aez"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
data/ext/aezv5/encrypt.c
ADDED
@@ -0,0 +1,943 @@
|
|
1
|
+
/*
|
2
|
+
// AEZ v5 AES-NI version. AEZ info: http://www.cs.ucdavis.edu/~rogaway/aez
|
3
|
+
//
|
4
|
+
// REQUIREMENTS: - Intel or ARM CPU supporting AES instructions
|
5
|
+
// - Faster if all pointers are 16-byte aligned.
|
6
|
+
// - Max 16 byte nonce, 16 byte authenticator
|
7
|
+
// - Single AD (AEZ spec allows vector AD but this code doesn't)
|
8
|
+
// - Max 2^32-1 byte buffers allowed (due to using unsigned int)
|
9
|
+
//
|
10
|
+
// Written by Ted Krovetz (ted@krovetz.net). Last modified 21 March 2017.
|
11
|
+
//
|
12
|
+
// This is free and unencumbered software released into the public domain.
|
13
|
+
//
|
14
|
+
// Anyone is free to copy, modify, publish, use, compile, sell, or
|
15
|
+
// distribute this software, either in source code form or as a compiled
|
16
|
+
// binary, for any purpose, commercial or non-commercial, and by any
|
17
|
+
// means.
|
18
|
+
//
|
19
|
+
// In jurisdictions that recognize copyright laws, the author or authors
|
20
|
+
// of this software dedicate any and all copyright interest in the
|
21
|
+
// software to the public domain. We make this dedication for the benefit
|
22
|
+
// of the public at large and to the detriment of our heirs and
|
23
|
+
// successors. We intend this dedication to be an overt act of
|
24
|
+
// relinquishment in perpetuity of all present and future rights to this
|
25
|
+
// software under copyright law.
|
26
|
+
//
|
27
|
+
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
28
|
+
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
29
|
+
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
30
|
+
// IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
31
|
+
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
32
|
+
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
33
|
+
// OTHER DEALINGS IN THE SOFTWARE.
|
34
|
+
//
|
35
|
+
// For more information, please refer to <http://unlicense.org/>
|
36
|
+
*/
|
37
|
+
|
38
|
+
#include <stdint.h>
|
39
|
+
#include <stddef.h>
|
40
|
+
|
41
|
+
/* ------------------------------------------------------------------------- */
|
42
|
+
#if __AES__ /* Defined by gcc/clang when compiling for AES-NI */
|
43
|
+
/* ------------------------------------------------------------------------- */
|
44
|
+
|
45
|
+
#include <smmintrin.h>
|
46
|
+
#include <wmmintrin.h>
|
47
|
+
#define block __m128i
|
48
|
+
|
49
|
+
/* ------------------------------------------------------------------------- */
|
50
|
+
|
51
|
+
#define zero _mm_setzero_si128()
|
52
|
+
#define vadd(x,y) _mm_add_epi8(x,y)
|
53
|
+
#define vand(x,y) _mm_and_si128(x,y)
|
54
|
+
#define vandnot(x,y) _mm_andnot_si128(x,y) /* (~x)&y */
|
55
|
+
#define vor(x,y) _mm_or_si128(x,y)
|
56
|
+
#define vxor(x,y) _mm_xor_si128(x,y)
|
57
|
+
|
58
|
+
static int is_zero(block x) { return _mm_testz_si128(x,x); } /* 0 or 1 */
|
59
|
+
|
60
|
+
static block sll4(block x) {
|
61
|
+
return vor(_mm_srli_epi64(x, 4), _mm_slli_epi64(_mm_srli_si128(x, 8), 60));
|
62
|
+
}
|
63
|
+
|
64
|
+
static block srl4(block x) {
|
65
|
+
return vor(_mm_slli_epi64(x, 4), _mm_srli_epi64(_mm_slli_si128(x, 8), 60));
|
66
|
+
}
|
67
|
+
|
68
|
+
static __m128i bswap16(__m128i b) {
|
69
|
+
const __m128i t = _mm_set_epi8(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15);
|
70
|
+
return _mm_shuffle_epi8(b,t);
|
71
|
+
}
|
72
|
+
|
73
|
+
static __m128i double_block(__m128i bl) {
|
74
|
+
const __m128i mask = _mm_set_epi32(135,1,1,1);
|
75
|
+
__m128i tmp = _mm_srai_epi32(bl, 31);
|
76
|
+
tmp = _mm_and_si128(tmp, mask);
|
77
|
+
tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2,1,0,3));
|
78
|
+
bl = _mm_slli_epi32(bl, 1);
|
79
|
+
return _mm_xor_si128(bl,tmp);
|
80
|
+
}
|
81
|
+
|
82
|
+
static __m128i aes(__m128i *key, __m128i in, __m128i first_key) {
|
83
|
+
in = vxor(in, first_key);
|
84
|
+
in = _mm_aesenc_si128 (in,key[0]);
|
85
|
+
in = _mm_aesenc_si128 (in,key[2]);
|
86
|
+
in = _mm_aesenc_si128 (in,key[5]);
|
87
|
+
in = _mm_aesenc_si128 (in,key[0]);
|
88
|
+
in = _mm_aesenc_si128 (in,key[2]);
|
89
|
+
in = _mm_aesenc_si128 (in,key[5]);
|
90
|
+
in = _mm_aesenc_si128 (in,key[0]);
|
91
|
+
in = _mm_aesenc_si128 (in,key[2]);
|
92
|
+
in = _mm_aesenc_si128 (in,key[5]);
|
93
|
+
return _mm_aesenc_si128 (in,key[0]);
|
94
|
+
}
|
95
|
+
|
96
|
+
static __m128i aes4(__m128i in, __m128i a, __m128i b,
|
97
|
+
__m128i c, __m128i d, __m128i e) {
|
98
|
+
in = _mm_aesenc_si128(vxor(in,a),b);
|
99
|
+
in = _mm_aesenc_si128(in,c);
|
100
|
+
in = _mm_aesenc_si128(in,d);
|
101
|
+
return _mm_aesenc_si128 (in,e);
|
102
|
+
}
|
103
|
+
|
104
|
+
#define aes4pre(in,a,b,c,d) aes4(in,a,b,c,d,zero)
|
105
|
+
|
106
|
+
static __m128i loadu(const void *p) { return _mm_loadu_si128((__m128i*)p); }
|
107
|
+
static void storeu(const void *p, __m128i x) {_mm_storeu_si128((__m128i*)p,x);}
|
108
|
+
|
109
|
+
#define load loadu /* Intel with AES-NI has fast unaligned loads/stores */
|
110
|
+
#define store storeu
|
111
|
+
|
112
|
+
/* ------------------------------------------------------------------------- */
|
113
|
+
#elif __ARM_FEATURE_CRYPTO
|
114
|
+
/* ------------------------------------------------------------------------- */
|
115
|
+
|
116
|
+
#include <arm_neon.h>
|
117
|
+
#define block uint8x16_t
|
118
|
+
|
119
|
+
#define zero vmovq_n_u8(0)
|
120
|
+
#define vadd(x,y) vaddq_u8(x,y)
|
121
|
+
#define vand(x,y) vandq_u8(x,y)
|
122
|
+
#define vandnot(x,y) vbicq_u8(y,x) /* (~x)&y */
|
123
|
+
#define vor(x,y) vorrq_u8(x,y)
|
124
|
+
#define vxor(x,y) veorq_u8(x,y)
|
125
|
+
|
126
|
+
static int is_zero(block x) { /* 0 or 1 */
|
127
|
+
uint8x8_t t = vorr_u8(vget_high_u8(x), vget_low_u8(x));
|
128
|
+
return vget_lane_u64(vreinterpret_u64_u8(t),0) == 0;
|
129
|
+
}
|
130
|
+
|
131
|
+
static block srl4(block x) {
|
132
|
+
const block mask = {15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,0};
|
133
|
+
uint8x16_t tmp = vandq_u8(vshrq_n_u8(vextq_u8(x, x, 1),4),mask);
|
134
|
+
return veorq_u8(tmp,vshlq_n_u8(x,4));
|
135
|
+
}
|
136
|
+
|
137
|
+
static block sll4(block x) {
|
138
|
+
const block mask = {0,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15};
|
139
|
+
uint8x16_t tmp = vshlq_n_u8(vandq_u8(vextq_u8(x, x, 15),mask),4);
|
140
|
+
return veorq_u8(tmp,vshrq_n_u8(x,4));
|
141
|
+
}
|
142
|
+
|
143
|
+
static uint8x16_t bswap16(uint8x16_t b) { return b; } /* Not with uint8x16_t */
|
144
|
+
|
145
|
+
static block double_block(block b) {
|
146
|
+
const block mask = {135,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1};
|
147
|
+
block tmp = (block)vshrq_n_s8((int8x16_t)b,7);
|
148
|
+
tmp = vandq_u8(tmp, mask);
|
149
|
+
tmp = vextq_u8(tmp, tmp, 1); /* Rotate high byte to low end */
|
150
|
+
b = vshlq_n_u8(b,1);
|
151
|
+
return veorq_u8(tmp,b);
|
152
|
+
}
|
153
|
+
|
154
|
+
static uint8x16_t aes(uint8x16_t *key, uint8x16_t in, uint8x16_t first_key) {
|
155
|
+
in = vaesmcq_u8(vaeseq_u8(in, first_key));
|
156
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[0]));
|
157
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[2]));
|
158
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[5]));
|
159
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[0]));
|
160
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[2]));
|
161
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[5]));
|
162
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[0]));
|
163
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[2]));
|
164
|
+
in = vaesmcq_u8(vaeseq_u8(in, key[5]));
|
165
|
+
return vxor(in, key[0]);
|
166
|
+
}
|
167
|
+
|
168
|
+
static uint8x16_t aes4pre(uint8x16_t in, uint8x16_t a, uint8x16_t b,
|
169
|
+
uint8x16_t c, uint8x16_t d) {
|
170
|
+
in = vaesmcq_u8(vaeseq_u8(in, a));
|
171
|
+
in = vaesmcq_u8(vaeseq_u8(in, b));
|
172
|
+
in = vaesmcq_u8(vaeseq_u8(in, c));
|
173
|
+
return vaesmcq_u8(vaeseq_u8(in, d));
|
174
|
+
}
|
175
|
+
|
176
|
+
#define aes4(in,a,b,c,d,e) vxor(aes4pre(in,a,b,c,d),e)
|
177
|
+
|
178
|
+
static uint8x16_t load(const void *p) { return *(uint8x16_t *)p; }
|
179
|
+
static void store(void *p, uint8x16_t x) { *(uint8x16_t *)p = x; }
|
180
|
+
|
181
|
+
#define loadu load /* ARMv8 allows unaligned loads/stores */
|
182
|
+
#define storeu store /* ARMv8 allows unaligned stores */
|
183
|
+
|
184
|
+
/* ------------------------------------------------------------------------- */
|
185
|
+
#else
|
186
|
+
#error - This implementation requires __AES__ or __ARM_FEATURE_CRYPTO
|
187
|
+
#endif
|
188
|
+
/* ------------------------------------------------------------------------- */
|
189
|
+
|
190
|
+
#define vxor3(x,y,z) vxor(vxor(x,y),z)
|
191
|
+
#define vxor4(w,x,y,z) vxor(vxor(w,x),vxor(y,z))
|
192
|
+
#define load_partial(p,n) loadu(p)
|
193
|
+
|
194
|
+
/*
|
195
|
+
Might need a version like this if, for example, we want to load a 12-byte nonce
|
196
|
+
into a 16-byte block.
|
197
|
+
|
198
|
+
static block load_partial(const void *p, unsigned n) {
|
199
|
+
if ((intptr_t)p % 16 == 0) return load(p);
|
200
|
+
else {
|
201
|
+
block tmp; unsigned i;
|
202
|
+
for (i=0; i<n; i++) ((char*)&tmp)[i] = ((char*)p)[i];
|
203
|
+
return tmp;
|
204
|
+
}
|
205
|
+
}
|
206
|
+
*/
|
207
|
+
|
208
|
+
static const unsigned char pad[] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
209
|
+
0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,
|
210
|
+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
211
|
+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
212
|
+
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
|
213
|
+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};
|
214
|
+
|
215
|
+
static block zero_pad(block x, unsigned zero_bytes) {
|
216
|
+
return vand(x, loadu(pad + zero_bytes));
|
217
|
+
}
|
218
|
+
|
219
|
+
static block one_zero_pad(block x, unsigned one_zero_bytes) {
|
220
|
+
block *p = (block*)(pad + one_zero_bytes);
|
221
|
+
return vor(vand(x, loadu(p)), loadu(p+1));
|
222
|
+
}
|
223
|
+
|
224
|
+
static block zero_set_byte(char val, unsigned idx) {
|
225
|
+
block tmp = zero; ((char *)&tmp)[idx] = val; return tmp;
|
226
|
+
}
|
227
|
+
|
228
|
+
/* ------------------------------------------------------------------------- */
|
229
|
+
|
230
|
+
typedef struct { /* All data memory-correct except 2I register-correct */
|
231
|
+
block I[2]; /* 1I, 2I */
|
232
|
+
block J[3]; /* 1J,2J,4J */
|
233
|
+
block L[3]; /* 1L,2L,4L */
|
234
|
+
block delta3_cache;
|
235
|
+
} aez_ctx_t;
|
236
|
+
|
237
|
+
/* ------------------------------------------------------------------------- */
|
238
|
+
|
239
|
+
static int blake2b(void *out, size_t outlen,
|
240
|
+
const void *key, size_t keylen,
|
241
|
+
const void *in, size_t inlen);
|
242
|
+
|
243
|
+
/* ------------------------------------------------------------------------- */
|
244
|
+
|
245
|
+
void aez_setup(unsigned char *key, unsigned keylen, aez_ctx_t *ctx) {
|
246
|
+
block tmp;
|
247
|
+
if (keylen==48) {
|
248
|
+
ctx->I[0] = loadu(key);
|
249
|
+
ctx->J[0] = loadu(key+16);
|
250
|
+
ctx->L[0] = loadu(key+32);
|
251
|
+
} else {
|
252
|
+
blake2b(ctx, 48, 0, 0, key, keylen); /* Puts IJL into ctx */
|
253
|
+
ctx->L[0] = ctx->J[0]; /* Rearrange. */
|
254
|
+
ctx->J[0] = ctx->I[1]; /* Rearrange. */
|
255
|
+
}
|
256
|
+
/* Fill remaining ctx locations with doublings */
|
257
|
+
ctx->I[1] = double_block(bswap16(ctx->I[0])); /* No post-bswap */
|
258
|
+
ctx->J[1] = bswap16(tmp = double_block(bswap16(ctx->J[0])));
|
259
|
+
ctx->J[2] = bswap16(double_block(tmp));
|
260
|
+
ctx->L[1] = bswap16(tmp = double_block(bswap16(ctx->L[0])));
|
261
|
+
ctx->L[2] = bswap16(double_block(tmp));
|
262
|
+
ctx->delta3_cache = zero;
|
263
|
+
}
|
264
|
+
|
265
|
+
/* ------------------------------------------------------------------------- */
|
266
|
+
|
267
|
+
/* !! Warning !! Only handles nbytes <= 16 and abytes <= 16 */
|
268
|
+
static block aez_hash(aez_ctx_t *ctx, char *n, unsigned nbytes, char *ad,
|
269
|
+
unsigned adbytes, unsigned abytes) {
|
270
|
+
block o1, o2, o3, o4, o5, o6, o7, o8, sum, offset, tmp;
|
271
|
+
block I=ctx->I[0], Ifordoubling = ctx->I[1], I2 = bswap16(Ifordoubling);
|
272
|
+
block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
|
273
|
+
block J=ctx->J[0], J2 = ctx->J[1], J4 = ctx->J[2], J5 = vxor(J,J4);
|
274
|
+
|
275
|
+
/* Process abytes and nonce */
|
276
|
+
offset = vxor4(J, J2, I2, L);
|
277
|
+
tmp = zero_set_byte((char)(8*abytes),15);
|
278
|
+
sum = aes4pre(offset,tmp,J,I,L);
|
279
|
+
|
280
|
+
if (nbytes==16) sum = aes4(vxor(loadu(n), J4), vxor(I2, L),J,I,L,sum);
|
281
|
+
else sum = aes4(vxor(J4, I),
|
282
|
+
one_zero_pad(load_partial(n,nbytes),16-nbytes),J,I,L,sum);
|
283
|
+
|
284
|
+
if (ad) { /* Possible easy misuse: ad==null && adbytes==0 */
|
285
|
+
if (adbytes==0) {
|
286
|
+
ctx->delta3_cache = aes4pre(vxor(J5, I), loadu(pad+32),J,I,L);
|
287
|
+
} else {
|
288
|
+
block delta3 = zero;
|
289
|
+
offset = vxor(J5, I2);
|
290
|
+
while (adbytes >= 8*16) {
|
291
|
+
o1 = vxor(offset,L);
|
292
|
+
o2 = vxor(offset,L2);
|
293
|
+
o3 = vxor(o1,L2);
|
294
|
+
o4 = vxor(offset,L4);
|
295
|
+
o5 = vxor(o1,L4);
|
296
|
+
o6 = vxor(o2,L4);
|
297
|
+
o7 = vxor(o3,L4);
|
298
|
+
o8 = offset;
|
299
|
+
Ifordoubling = double_block(Ifordoubling);
|
300
|
+
offset = vxor(J5, bswap16(Ifordoubling));
|
301
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
|
302
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
|
303
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 32), o3, J, I, L));
|
304
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 48), o4, J, I, L));
|
305
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 64), o5, J, I, L));
|
306
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 80), o6, J, I, L));
|
307
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 96), o7, J, I, L));
|
308
|
+
delta3 = vxor(delta3, aes4pre(load(ad+112), o8, J, I, L));
|
309
|
+
adbytes-=8*16; ad+=8*16;
|
310
|
+
}
|
311
|
+
if (adbytes >= 4*16) {
|
312
|
+
o1 = vxor(offset,L);
|
313
|
+
o2 = vxor(offset,L2);
|
314
|
+
o3 = vxor(o1,L2);
|
315
|
+
o4 = offset = vxor(offset,L4);
|
316
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
|
317
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
|
318
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 32), o3, J, I, L));
|
319
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 48), o4, J, I, L));
|
320
|
+
adbytes-=4*16; ad+=4*16;
|
321
|
+
}
|
322
|
+
if (adbytes >= 2*16) {
|
323
|
+
o1 = vxor(offset,L);
|
324
|
+
o2 = offset = vxor(offset,L2);
|
325
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
|
326
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 16), o2, J, I, L));
|
327
|
+
adbytes-=2*16; ad+=2*16;
|
328
|
+
}
|
329
|
+
if (adbytes >= 1*16) {
|
330
|
+
o1 = vxor(offset,L);
|
331
|
+
delta3 = vxor(delta3, aes4pre(load(ad+ 0), o1, J, I, L));
|
332
|
+
adbytes-=1*16; ad+=1*16;
|
333
|
+
}
|
334
|
+
if (adbytes) {
|
335
|
+
tmp = vxor3(J5, I, one_zero_pad(load(ad),16-adbytes));
|
336
|
+
delta3 = aes4(vxor(J5, I), one_zero_pad(load(ad),16-adbytes),
|
337
|
+
J, I, L, delta3);
|
338
|
+
}
|
339
|
+
ctx->delta3_cache = delta3;
|
340
|
+
}
|
341
|
+
}
|
342
|
+
return vxor(sum,ctx->delta3_cache);
|
343
|
+
}
|
344
|
+
|
345
|
+
/* ------------------------------------------------------------------------- */
|
346
|
+
|
347
|
+
static block pass_one(aez_ctx_t *ctx, block *src, unsigned bytes, block *dst) {
|
348
|
+
block o1, o2, o3, o4, o5, o6, o7, o8, offset, tmp, sum=zero;
|
349
|
+
block I=ctx->I[0], Ifordoubling = ctx->I[1];
|
350
|
+
block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
|
351
|
+
block J=ctx->J[0];
|
352
|
+
offset = vxor(J, bswap16(Ifordoubling));
|
353
|
+
while (bytes >= 16*16) {
|
354
|
+
o1 = vxor(offset,L);
|
355
|
+
o2 = vxor(offset,L2);
|
356
|
+
o3 = vxor(o1,L2);
|
357
|
+
o4 = vxor(offset,L4);
|
358
|
+
o5 = vxor(o1,L4);
|
359
|
+
o6 = vxor(o2,L4);
|
360
|
+
o7 = vxor(o3,L4);
|
361
|
+
o8 = offset;
|
362
|
+
Ifordoubling = double_block(Ifordoubling);
|
363
|
+
offset = vxor(J,bswap16(Ifordoubling));
|
364
|
+
store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
|
365
|
+
store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
|
366
|
+
store(dst+ 4, aes4(load(src + 5),o3, J, I, L, load(src+ 4)));
|
367
|
+
store(dst+ 6, aes4(load(src + 7),o4, J, I, L, load(src+ 6)));
|
368
|
+
store(dst+ 8, aes4(load(src + 9),o5, J, I, L, load(src+ 8)));
|
369
|
+
store(dst+10, aes4(load(src +11),o6, J, I, L, load(src+10)));
|
370
|
+
store(dst+12, aes4(load(src +13),o7, J, I, L, load(src+12)));
|
371
|
+
store(dst+14, aes4(load(src +15),o8, J, I, L, load(src+14)));
|
372
|
+
tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));store(dst+ 1,tmp);
|
373
|
+
sum=vxor(sum,tmp);
|
374
|
+
tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
|
375
|
+
store(dst+ 3,tmp);sum=vxor(sum,tmp);
|
376
|
+
tmp=aes4(I,load(dst+ 4),J,I,L,load(src+ 5));
|
377
|
+
store(dst+ 5,tmp);sum=vxor(sum,tmp);
|
378
|
+
tmp=aes4(I,load(dst+ 6),J,I,L,load(src+ 7));
|
379
|
+
store(dst+ 7,tmp);sum=vxor(sum,tmp);
|
380
|
+
tmp=aes4(I,load(dst+ 8),J,I,L,load(src+ 9));
|
381
|
+
store(dst+ 9,tmp);sum=vxor(sum,tmp);
|
382
|
+
tmp=aes4(I,load(dst+10),J,I,L,load(src+11));
|
383
|
+
store(dst+11,tmp);sum=vxor(sum,tmp);
|
384
|
+
tmp=aes4(I,load(dst+12),J,I,L,load(src+13));
|
385
|
+
store(dst+13,tmp);sum=vxor(sum,tmp);
|
386
|
+
tmp=aes4(I,load(dst+14),J,I,L,load(src+15));
|
387
|
+
store(dst+15,tmp);sum=vxor(sum,tmp);
|
388
|
+
bytes -= 16*16; dst += 16; src += 16;
|
389
|
+
}
|
390
|
+
if (bytes >= 8*16) {
|
391
|
+
o1 = vxor(offset,L);
|
392
|
+
o2 = vxor(offset,L2);
|
393
|
+
o3 = vxor(o1,L2);
|
394
|
+
o4 = offset = vxor(offset,L4);
|
395
|
+
store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
|
396
|
+
store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
|
397
|
+
store(dst+ 4, aes4(load(src + 5),o3, J, I, L, load(src+ 4)));
|
398
|
+
store(dst+ 6, aes4(load(src + 7),o4, J, I, L, load(src+ 6)));
|
399
|
+
tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
|
400
|
+
store(dst+ 1,tmp);sum=vxor(sum,tmp);
|
401
|
+
tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
|
402
|
+
store(dst+ 3,tmp);sum=vxor(sum,tmp);
|
403
|
+
tmp=aes4(I,load(dst+ 4),J,I,L,load(src+ 5));
|
404
|
+
store(dst+ 5,tmp);sum=vxor(sum,tmp);
|
405
|
+
tmp=aes4(I,load(dst+ 6),J,I,L,load(src+ 7));
|
406
|
+
store(dst+ 7,tmp);sum=vxor(sum,tmp);
|
407
|
+
bytes -= 8*16; dst += 8; src += 8;
|
408
|
+
}
|
409
|
+
if (bytes >= 4*16) {
|
410
|
+
o1 = vxor(offset,L);
|
411
|
+
o2 = offset = vxor(offset,L2);
|
412
|
+
store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
|
413
|
+
store(dst+ 2, aes4(load(src + 3),o2, J, I, L, load(src+ 2)));
|
414
|
+
tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
|
415
|
+
store(dst+ 1,tmp);sum=vxor(sum,tmp);
|
416
|
+
tmp=aes4(I,load(dst+ 2),J,I,L,load(src+ 3));
|
417
|
+
store(dst+ 3,tmp);sum=vxor(sum,tmp);
|
418
|
+
bytes -= 4*16; dst += 4; src += 4;
|
419
|
+
}
|
420
|
+
if (bytes) {
|
421
|
+
o1 = vxor(offset,L);
|
422
|
+
store(dst+ 0, aes4(load(src + 1),o1, J, I, L, load(src+ 0)));
|
423
|
+
tmp=aes4(I,load(dst+ 0),J,I,L,load(src+ 1));
|
424
|
+
store(dst+ 1,tmp);sum=vxor(sum,tmp);
|
425
|
+
}
|
426
|
+
return sum;
|
427
|
+
}
|
428
|
+
|
429
|
+
/* ------------------------------------------------------------------------- */
|
430
|
+
|
431
|
+
static block pass_two(aez_ctx_t *ctx, block s, unsigned bytes, block *dst) {
|
432
|
+
block o1, o2, o3, o4, o5, o6, o7, o8, sum=zero, offset, fs[8], tmp[8];
|
433
|
+
block I=ctx->I[0], Ifordoubling = ctx->I[1];
|
434
|
+
block L=ctx->L[0], L2=ctx->L[1],L4=ctx->L[2];
|
435
|
+
block J=ctx->J[0], J2=ctx->J[1], J3=vxor(J,J2);
|
436
|
+
offset = vxor(J2, bswap16(Ifordoubling));
|
437
|
+
while (bytes >= 16*16) {
|
438
|
+
o1 = vxor(offset,L);
|
439
|
+
o2 = vxor(offset,L2);
|
440
|
+
o3 = vxor(o1,L2);
|
441
|
+
o4 = vxor(offset,L4);
|
442
|
+
o5 = vxor(o1,L4);
|
443
|
+
o6 = vxor(o2,L4);
|
444
|
+
o7 = vxor(o3,L4);
|
445
|
+
o8 = offset;
|
446
|
+
Ifordoubling = double_block(Ifordoubling);
|
447
|
+
offset = vxor(J2, bswap16(Ifordoubling));
|
448
|
+
fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
|
449
|
+
fs[2] = aes4pre(s,o3,J,I,L); fs[3] = aes4pre(s,o4,J,I,L);
|
450
|
+
fs[4] = aes4pre(s,o5,J,I,L); fs[5] = aes4pre(s,o6,J,I,L);
|
451
|
+
fs[6] = aes4pre(s,o7,J,I,L); fs[7] = aes4pre(s,o8,J,I,L);
|
452
|
+
o1 = vxor(J3,o1); o2 = vxor(J3,o2);
|
453
|
+
o3 = vxor(J3,o3); o4 = vxor(J3,o4);
|
454
|
+
o5 = vxor(J3,o5); o6 = vxor(J3,o6);
|
455
|
+
o7 = vxor(J3,o7); o8 = vxor(J3,o8);
|
456
|
+
tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
|
457
|
+
store(dst+ 0,vxor(load(dst+ 1),fs[0]));
|
458
|
+
tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
|
459
|
+
store(dst+ 2,vxor(load(dst+ 3),fs[1]));
|
460
|
+
tmp[2] = vxor(load(dst+ 4),fs[2]); sum = vxor(sum,tmp[2]);
|
461
|
+
store(dst+ 4,vxor(load(dst+ 5),fs[2]));
|
462
|
+
tmp[3] = vxor(load(dst+ 6),fs[3]); sum = vxor(sum,tmp[3]);
|
463
|
+
store(dst+ 6,vxor(load(dst+ 7),fs[3]));
|
464
|
+
tmp[4] = vxor(load(dst+ 8),fs[4]); sum = vxor(sum,tmp[4]);
|
465
|
+
store(dst+ 8,vxor(load(dst+ 9),fs[4]));
|
466
|
+
tmp[5] = vxor(load(dst+10),fs[5]); sum = vxor(sum,tmp[5]);
|
467
|
+
store(dst+10,vxor(load(dst+11),fs[5]));
|
468
|
+
tmp[6] = vxor(load(dst+12),fs[6]); sum = vxor(sum,tmp[6]);
|
469
|
+
store(dst+12,vxor(load(dst+13),fs[6]));
|
470
|
+
tmp[7] = vxor(load(dst+14),fs[7]); sum = vxor(sum,tmp[7]);
|
471
|
+
store(dst+14,vxor(load(dst+15),fs[7]));
|
472
|
+
store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
|
473
|
+
store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
|
474
|
+
store(dst+ 5, aes4(I,load(dst+ 4), J, I, L, tmp[2]));
|
475
|
+
store(dst+ 7, aes4(I,load(dst+ 6), J, I, L, tmp[3]));
|
476
|
+
store(dst+ 9, aes4(I,load(dst+ 8), J, I, L, tmp[4]));
|
477
|
+
store(dst+11, aes4(I,load(dst+10), J, I, L, tmp[5]));
|
478
|
+
store(dst+13, aes4(I,load(dst+12), J, I, L, tmp[6]));
|
479
|
+
store(dst+15, aes4(I,load(dst+14), J, I, L, tmp[7]));
|
480
|
+
store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
|
481
|
+
store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
|
482
|
+
store(dst+ 4, aes4(load(dst+ 5),o3, J, I, L, load(dst+ 4)));
|
483
|
+
store(dst+ 6, aes4(load(dst+ 7),o4, J, I, L, load(dst+ 6)));
|
484
|
+
store(dst+ 8, aes4(load(dst+ 9),o5, J, I, L, load(dst+ 8)));
|
485
|
+
store(dst+10, aes4(load(dst+11),o6, J, I, L, load(dst+10)));
|
486
|
+
store(dst+12, aes4(load(dst+13),o7, J, I, L, load(dst+12)));
|
487
|
+
store(dst+14, aes4(load(dst+15),o8, J, I, L, load(dst+14)));
|
488
|
+
bytes -= 16*16; dst += 16;
|
489
|
+
}
|
490
|
+
if (bytes >= 8*16) {
|
491
|
+
o1 = vxor(offset,L);
|
492
|
+
o2 = vxor(offset,L2);
|
493
|
+
o3 = vxor(o1,L2);
|
494
|
+
o4 = offset = vxor(offset,L4);
|
495
|
+
fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
|
496
|
+
fs[2] = aes4pre(s,o3,J,I,L); fs[3] = aes4pre(s,o4,J,I,L);
|
497
|
+
o1 = vxor(J3,o1); o2 = vxor(J3,o2);
|
498
|
+
o3 = vxor(J3,o3); o4 = vxor(J3,o4);
|
499
|
+
tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
|
500
|
+
store(dst+ 0,vxor(load(dst+ 1),fs[0]));
|
501
|
+
tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
|
502
|
+
store(dst+ 2,vxor(load(dst+ 3),fs[1]));
|
503
|
+
tmp[2] = vxor(load(dst+ 4),fs[2]); sum = vxor(sum,tmp[2]);
|
504
|
+
store(dst+ 4,vxor(load(dst+ 5),fs[2]));
|
505
|
+
tmp[3] = vxor(load(dst+ 6),fs[3]); sum = vxor(sum,tmp[3]);
|
506
|
+
store(dst+ 6,vxor(load(dst+ 7),fs[3]));
|
507
|
+
store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
|
508
|
+
store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
|
509
|
+
store(dst+ 5, aes4(I,load(dst+ 4), J, I, L, tmp[2]));
|
510
|
+
store(dst+ 7, aes4(I,load(dst+ 6), J, I, L, tmp[3]));
|
511
|
+
store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
|
512
|
+
store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
|
513
|
+
store(dst+ 4, aes4(load(dst+ 5),o3, J, I, L, load(dst+ 4)));
|
514
|
+
store(dst+ 6, aes4(load(dst+ 7),o4, J, I, L, load(dst+ 6)));
|
515
|
+
bytes -= 8*16; dst += 8;
|
516
|
+
}
|
517
|
+
if (bytes >= 4*16) {
|
518
|
+
o1 = vxor(offset,L);
|
519
|
+
o2 = offset = vxor(offset,L2);
|
520
|
+
fs[0] = aes4pre(s,o1,J,I,L); fs[1] = aes4pre(s,o2,J,I,L);
|
521
|
+
o1 = vxor(J3,o1); o2 = vxor(J3,o2);
|
522
|
+
tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
|
523
|
+
store(dst+ 0,vxor(load(dst+ 1),fs[0]));
|
524
|
+
tmp[1] = vxor(load(dst+ 2),fs[1]); sum = vxor(sum,tmp[1]);
|
525
|
+
store(dst+ 2,vxor(load(dst+ 3),fs[1]));
|
526
|
+
store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
|
527
|
+
store(dst+ 3, aes4(I,load(dst+ 2), J, I, L, tmp[1]));
|
528
|
+
store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
|
529
|
+
store(dst+ 2, aes4(load(dst+ 3),o2, J, I, L, load(dst+ 2)));
|
530
|
+
bytes -= 4*16; dst += 4;
|
531
|
+
}
|
532
|
+
if (bytes) {
|
533
|
+
o1 = vxor(offset,L);
|
534
|
+
fs[0] = aes4pre(s,o1,J,I,L);
|
535
|
+
o1 = vxor(J3,o1);
|
536
|
+
tmp[0] = vxor(load(dst+ 0),fs[0]); sum = vxor(sum,tmp[0]);
|
537
|
+
store(dst+ 0,vxor(load(dst+ 1),fs[0]));
|
538
|
+
store(dst+ 1, aes4(I,load(dst+ 0), J, I, L, tmp[0]));
|
539
|
+
store(dst+ 0, aes4(load(dst+ 1),o1, J, I, L, load(dst+ 0)));
|
540
|
+
}
|
541
|
+
return sum;
|
542
|
+
}
|
543
|
+
|
544
|
+
/* ------------------------------------------------------------------------- */
|
545
|
+
|
546
|
+
static int cipher_aez_core(aez_ctx_t *ctx, block t, int d, char *src,
|
547
|
+
unsigned bytes, unsigned abytes, char *dst) {
|
548
|
+
block s, x, y, frag0, frag1, final0, final1;
|
549
|
+
block I=ctx->I[0], L=ctx->L[0], J=ctx->J[0];
|
550
|
+
block L4=ctx->L[2], I2 = bswap16(ctx->I[1]);
|
551
|
+
unsigned i, frag_bytes, initial_bytes;
|
552
|
+
|
553
|
+
if (!d) bytes += abytes;
|
554
|
+
frag_bytes = bytes % 32;
|
555
|
+
initial_bytes = bytes - frag_bytes - 32;
|
556
|
+
|
557
|
+
/* Compute x and store intermediate results */
|
558
|
+
x = pass_one(ctx, (block*)src, initial_bytes, (block*)dst);
|
559
|
+
if (frag_bytes >= 16) {
|
560
|
+
frag0 = load(src + initial_bytes);
|
561
|
+
frag1 = one_zero_pad(load(src + initial_bytes + 16), 32-frag_bytes);
|
562
|
+
x = aes4(frag0, vxor(L4, I2), J, I, L, x);
|
563
|
+
x = vxor(x, aes4pre(frag1, vxor3(I2, L4, L), J, I, L));
|
564
|
+
} else if (frag_bytes) {
|
565
|
+
frag0 = one_zero_pad(load(src + initial_bytes), 16-frag_bytes);
|
566
|
+
x = aes4(frag0, vxor(L4, I2), J, I, L, x);
|
567
|
+
}
|
568
|
+
|
569
|
+
/* Calculate s and final block values (y xor'd to final1 later) */
|
570
|
+
final0 = vxor3(loadu(src + (bytes - 32)), x, t);
|
571
|
+
if (d || !abytes) final1 = loadu(src+(bytes-32)+16);
|
572
|
+
else final1 = zero_pad(loadu(src+(bytes-32)+16), abytes);
|
573
|
+
final0 = aes4(final1, vxor(I2, ctx->L[d]), J, I, L, final0);
|
574
|
+
final1 = vxor(final1, aes((block*)ctx, final0, ctx->L[d]));
|
575
|
+
s = vxor(final0, final1);
|
576
|
+
final0 = vxor(final0, aes((block*)ctx, final1, ctx->L[d^1]));
|
577
|
+
/* Decryption: final0 should hold abytes zero bytes. If not, failure */
|
578
|
+
if (d && !is_zero(vandnot(loadu(pad+abytes),final0))) return -1;
|
579
|
+
final1 = aes4(final0, vxor(I2, ctx->L[d^1]), J, I, L, final1);
|
580
|
+
|
581
|
+
/* Compute y and store final results */
|
582
|
+
y = pass_two(ctx, s, initial_bytes, (block*)dst);
|
583
|
+
if (frag_bytes >= 16) {
|
584
|
+
frag0 = vxor(frag0, aes((block*)ctx, s, L4));
|
585
|
+
frag1 = vxor(frag1, aes((block*)ctx, s, vxor(L4, L)));
|
586
|
+
frag1 = one_zero_pad(frag1, 32-frag_bytes);
|
587
|
+
y = aes4(frag0, vxor(I2, L4), J, I, L, y);
|
588
|
+
y = vxor(y, aes4pre(frag1, vxor3(I2, L4, L), J, I, L));
|
589
|
+
store(dst + initial_bytes, frag0);
|
590
|
+
store(dst + initial_bytes + 16, frag1);
|
591
|
+
} else if (frag_bytes) {
|
592
|
+
frag0 = vxor(frag0, aes((block*)ctx, s, L4));
|
593
|
+
frag0 = one_zero_pad(frag0, 16-frag_bytes);
|
594
|
+
y = aes4(frag0, vxor(I2, L4), J, I, L, y);
|
595
|
+
store(dst + initial_bytes, frag0);
|
596
|
+
}
|
597
|
+
|
598
|
+
storeu(dst + (bytes - 32), vxor3(final1, y, t));
|
599
|
+
if (!d || !abytes)
|
600
|
+
storeu(dst + (bytes - 32) + 16, final0);
|
601
|
+
else {
|
602
|
+
for (i=0; i<16-abytes; i++)
|
603
|
+
((char*)dst + (bytes - 16))[i] = ((char*)&final0)[i];
|
604
|
+
}
|
605
|
+
return 0;
|
606
|
+
}
|
607
|
+
|
608
|
+
/* ------------------------------------------------------------------------- */
|
609
|
+
|
610
|
+
static int cipher_aez_tiny(aez_ctx_t *ctx, block t, int d, char *src,
|
611
|
+
unsigned bytes, unsigned abytes, char *dst) {
|
612
|
+
block l, r, tmp, one, rcon, buf[2], mask_10, mask_ff;
|
613
|
+
block I=ctx->I[0], L=ctx->L[0], J=ctx->J[0], t_orig = t;
|
614
|
+
block L2=ctx->L[1], L4=ctx->L[2], I2 = bswap16(ctx->I[1]);
|
615
|
+
unsigned rnds, i;
|
616
|
+
|
617
|
+
/* load src into buf, zero pad, update bytes for abytes */
|
618
|
+
if (bytes >= 16) {
|
619
|
+
buf[0] = load(src);
|
620
|
+
buf[1] = zero_pad(load_partial(src+16,bytes-16),32-bytes);
|
621
|
+
} else {
|
622
|
+
buf[0] = zero_pad(load_partial(src,bytes),16-bytes);
|
623
|
+
buf[1] = zero;
|
624
|
+
}
|
625
|
+
if (!d) bytes += abytes;
|
626
|
+
|
627
|
+
/* load l/r, create 10* padding masks, shift r 4 bits if odd length */
|
628
|
+
l = buf[0];
|
629
|
+
r = loadu((char*)buf+bytes/2);
|
630
|
+
mask_ff = loadu(pad+16-bytes/2);
|
631
|
+
mask_10 = loadu(pad+32-bytes/2);
|
632
|
+
if (bytes&1) { /* Odd length. Deal with nibbles. */
|
633
|
+
mask_10 = sll4(mask_10);
|
634
|
+
((char*)&mask_ff)[bytes/2] = (char)0xf0;
|
635
|
+
r = bswap16(r);
|
636
|
+
r = srl4(r);
|
637
|
+
r = bswap16(r);
|
638
|
+
}
|
639
|
+
r = vor(vand(r, mask_ff), mask_10);
|
640
|
+
|
641
|
+
/* Add tweak offset into t, and determine the number of rounds */
|
642
|
+
if (bytes >= 16) {
|
643
|
+
t = vxor4(t, I2, L2, L4); /* (0,6) offset */
|
644
|
+
rnds = 8;
|
645
|
+
} else {
|
646
|
+
t = vxor(vxor4(t, I2, L2, L4), L); /* (0,7) offset */
|
647
|
+
if (bytes>=3) rnds = 10; else if (bytes==2) rnds = 16; else rnds = 24;
|
648
|
+
}
|
649
|
+
|
650
|
+
if (!d) {
|
651
|
+
one = zero_set_byte(1,15);
|
652
|
+
rcon = zero;
|
653
|
+
} else {
|
654
|
+
one = zero_set_byte(-1,15);
|
655
|
+
rcon = zero_set_byte((char)(rnds-1),15);
|
656
|
+
}
|
657
|
+
|
658
|
+
if ((d) && (bytes < 16)) {
|
659
|
+
block offset = vxor3(I2, L, L2);
|
660
|
+
tmp = vor(l, loadu(pad+32));
|
661
|
+
tmp = aes4pre(t_orig, vxor(tmp,offset), J, I, L);
|
662
|
+
tmp = vand(tmp, loadu(pad+32));
|
663
|
+
l = vxor(l, tmp);
|
664
|
+
}
|
665
|
+
|
666
|
+
/* Feistel */
|
667
|
+
for (i=0; i<rnds; i+=2) {
|
668
|
+
l = vor(vand(aes4(t,vxor(r,rcon), J, I, L, l), mask_ff), mask_10);
|
669
|
+
rcon = vadd(rcon,one);
|
670
|
+
r = vor(vand(aes4(t,vxor(l,rcon), J, I, L, r), mask_ff), mask_10);
|
671
|
+
rcon = vadd(rcon,one);
|
672
|
+
}
|
673
|
+
buf[0] = r;
|
674
|
+
if (bytes&1) {
|
675
|
+
l = bswap16(l);
|
676
|
+
l = sll4(l);
|
677
|
+
l = bswap16(l);
|
678
|
+
r = vand(loadu((char*)buf+bytes/2), zero_set_byte((char)0xf0,0));
|
679
|
+
l = vor(l, r);
|
680
|
+
}
|
681
|
+
storeu((char*)buf+bytes/2, l);
|
682
|
+
if (d) {
|
683
|
+
bytes -= abytes;
|
684
|
+
if (abytes==16) tmp = loadu((char*)buf+bytes);
|
685
|
+
else {
|
686
|
+
tmp = zero;
|
687
|
+
for (i=0; i<abytes; i++) ((char*)&tmp)[i] = ((char*)buf+bytes)[i];
|
688
|
+
}
|
689
|
+
if (!is_zero(tmp)) return -1;
|
690
|
+
} else if (bytes < 16) {
|
691
|
+
block offset = vxor3(I2, L, L2);
|
692
|
+
tmp = vor(zero_pad(buf[0], 16-bytes), loadu(pad+32));
|
693
|
+
tmp = aes4pre(t_orig,vxor(tmp,offset), J, I, L);
|
694
|
+
buf[0] = vxor(buf[0], vand(tmp, loadu(pad+32)));
|
695
|
+
}
|
696
|
+
for (i=0; i<bytes; i++) dst[i] = ((char*)buf)[i];
|
697
|
+
return 0;
|
698
|
+
}
|
699
|
+
|
700
|
+
/* ------------------------------------------------------------------------- */
|
701
|
+
|
702
|
+
void aez_encrypt(aez_ctx_t *ctx, char *n, unsigned nbytes,
|
703
|
+
char *ad, unsigned adbytes, unsigned abytes,
|
704
|
+
char *src, unsigned bytes, char *dst) {
|
705
|
+
|
706
|
+
block t = aez_hash(ctx, n, nbytes, ad, adbytes, abytes);
|
707
|
+
if (bytes==0) {
|
708
|
+
unsigned i;
|
709
|
+
t = aes((block*)ctx, t, vxor(ctx->L[0], ctx->L[1]));
|
710
|
+
for (i=0; i<abytes; i++) dst[i] = ((char*)&t)[i];
|
711
|
+
} else if (bytes+abytes < 32)
|
712
|
+
cipher_aez_tiny(ctx, t, 0, src, bytes, abytes, dst);
|
713
|
+
else
|
714
|
+
cipher_aez_core(ctx, t, 0, src, bytes, abytes, dst);
|
715
|
+
}
|
716
|
+
|
717
|
+
/* ------------------------------------------------------------------------- */
|
718
|
+
|
719
|
+
int aez_decrypt(aez_ctx_t *ctx, char *n, unsigned nbytes,
|
720
|
+
char *ad, unsigned adbytes, unsigned abytes,
|
721
|
+
char *src, unsigned bytes, char *dst) {
|
722
|
+
|
723
|
+
block t;
|
724
|
+
if (bytes < abytes) return -1;
|
725
|
+
t = aez_hash(ctx, n, nbytes, ad, adbytes, abytes);
|
726
|
+
if (bytes==abytes) {
|
727
|
+
block claimed = zero_pad(load_partial(src,abytes), 16-abytes);
|
728
|
+
t = zero_pad(aes((block*)ctx, t, vxor(ctx->L[0], ctx->L[1])), 16-abytes);
|
729
|
+
return is_zero(vandnot(t, claimed)) - 1; /* is_zero return 0 or 1 */
|
730
|
+
} else if (bytes < 32) {
|
731
|
+
return cipher_aez_tiny(ctx, t, 1, src, bytes, abytes, dst);
|
732
|
+
} else {
|
733
|
+
return cipher_aez_core(ctx, t, 1, src, bytes, abytes, dst);
|
734
|
+
}
|
735
|
+
}
|
736
|
+
|
737
|
+
/* ------------------------------------------------------------------------- */
|
738
|
+
/* Reference Blake2b code, here for convenience, and not for speed. */
|
739
|
+
/* Dowloaded Sep 2015 from https://github.com/mjosaarinen/blake2_mjosref */
|
740
|
+
|
741
|
+
#include <stdint.h>
|
742
|
+
|
743
|
+
typedef struct {
|
744
|
+
uint8_t b[128];
|
745
|
+
uint64_t h[8];
|
746
|
+
uint64_t t[2];
|
747
|
+
size_t c;
|
748
|
+
size_t outlen;
|
749
|
+
} blake2b_ctx;
|
750
|
+
|
751
|
+
#ifndef ROTR64
|
752
|
+
#define ROTR64(x, y) (((x) >> (y)) ^ ((x) << (64 - (y))))
|
753
|
+
#endif
|
754
|
+
|
755
|
+
#define B2B_GET64(p) \
|
756
|
+
(((uint64_t) ((uint8_t *) (p))[0]) ^ \
|
757
|
+
(((uint64_t) ((uint8_t *) (p))[1]) << 8) ^ \
|
758
|
+
(((uint64_t) ((uint8_t *) (p))[2]) << 16) ^ \
|
759
|
+
(((uint64_t) ((uint8_t *) (p))[3]) << 24) ^ \
|
760
|
+
(((uint64_t) ((uint8_t *) (p))[4]) << 32) ^ \
|
761
|
+
(((uint64_t) ((uint8_t *) (p))[5]) << 40) ^ \
|
762
|
+
(((uint64_t) ((uint8_t *) (p))[6]) << 48) ^ \
|
763
|
+
(((uint64_t) ((uint8_t *) (p))[7]) << 56))
|
764
|
+
|
765
|
+
#define B2B_G(a, b, c, d, x, y) { \
|
766
|
+
v[a] = v[a] + v[b] + x; \
|
767
|
+
v[d] = ROTR64(v[d] ^ v[a], 32); \
|
768
|
+
v[c] = v[c] + v[d]; \
|
769
|
+
v[b] = ROTR64(v[b] ^ v[c], 24); \
|
770
|
+
v[a] = v[a] + v[b] + y; \
|
771
|
+
v[d] = ROTR64(v[d] ^ v[a], 16); \
|
772
|
+
v[c] = v[c] + v[d]; \
|
773
|
+
v[b] = ROTR64(v[b] ^ v[c], 63); }
|
774
|
+
|
775
|
+
static const uint64_t blake2b_iv[8] = {
|
776
|
+
0x6A09E667F3BCC908, 0xBB67AE8584CAA73B,
|
777
|
+
0x3C6EF372FE94F82B, 0xA54FF53A5F1D36F1,
|
778
|
+
0x510E527FADE682D1, 0x9B05688C2B3E6C1F,
|
779
|
+
0x1F83D9ABFB41BD6B, 0x5BE0CD19137E2179
|
780
|
+
};
|
781
|
+
|
782
|
+
static void blake2b_compress(blake2b_ctx *ctx, int last)
|
783
|
+
{
|
784
|
+
const uint8_t sigma[12][16] = {
|
785
|
+
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
786
|
+
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 },
|
787
|
+
{ 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 },
|
788
|
+
{ 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 },
|
789
|
+
{ 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 },
|
790
|
+
{ 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 },
|
791
|
+
{ 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 },
|
792
|
+
{ 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 },
|
793
|
+
{ 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 },
|
794
|
+
{ 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 },
|
795
|
+
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
|
796
|
+
{ 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }
|
797
|
+
};
|
798
|
+
int i;
|
799
|
+
uint64_t v[16], m[16];
|
800
|
+
|
801
|
+
for (i = 0; i < 8; i++) {
|
802
|
+
v[i] = ctx->h[i];
|
803
|
+
v[i + 8] = blake2b_iv[i];
|
804
|
+
}
|
805
|
+
|
806
|
+
v[12] ^= ctx->t[0];
|
807
|
+
v[13] ^= ctx->t[1];
|
808
|
+
if (last)
|
809
|
+
v[14] = ~v[14];
|
810
|
+
|
811
|
+
for (i = 0; i < 16; i++)
|
812
|
+
m[i] = B2B_GET64(&ctx->b[8 * i]);
|
813
|
+
|
814
|
+
for (i = 0; i < 12; i++) {
|
815
|
+
B2B_G( 0, 4, 8, 12, m[sigma[i][ 0]], m[sigma[i][ 1]]);
|
816
|
+
B2B_G( 1, 5, 9, 13, m[sigma[i][ 2]], m[sigma[i][ 3]]);
|
817
|
+
B2B_G( 2, 6, 10, 14, m[sigma[i][ 4]], m[sigma[i][ 5]]);
|
818
|
+
B2B_G( 3, 7, 11, 15, m[sigma[i][ 6]], m[sigma[i][ 7]]);
|
819
|
+
B2B_G( 0, 5, 10, 15, m[sigma[i][ 8]], m[sigma[i][ 9]]);
|
820
|
+
B2B_G( 1, 6, 11, 12, m[sigma[i][10]], m[sigma[i][11]]);
|
821
|
+
B2B_G( 2, 7, 8, 13, m[sigma[i][12]], m[sigma[i][13]]);
|
822
|
+
B2B_G( 3, 4, 9, 14, m[sigma[i][14]], m[sigma[i][15]]);
|
823
|
+
}
|
824
|
+
|
825
|
+
for( i = 0; i < 8; ++i )
|
826
|
+
ctx->h[i] ^= v[i] ^ v[i + 8];
|
827
|
+
}
|
828
|
+
|
829
|
+
static void blake2b_update(blake2b_ctx *ctx,
|
830
|
+
const void *in, size_t inlen)
|
831
|
+
{
|
832
|
+
size_t i;
|
833
|
+
|
834
|
+
for (i = 0; i < inlen; i++) {
|
835
|
+
if (ctx->c == 128) {
|
836
|
+
ctx->t[0] += ctx->c;
|
837
|
+
if (ctx->t[0] < ctx->c)
|
838
|
+
ctx->t[1]++;
|
839
|
+
blake2b_compress(ctx, 0);
|
840
|
+
ctx->c = 0;
|
841
|
+
}
|
842
|
+
ctx->b[ctx->c++] = ((const uint8_t *) in)[i];
|
843
|
+
}
|
844
|
+
}
|
845
|
+
|
846
|
+
static void blake2b_final(blake2b_ctx *ctx, void *out)
|
847
|
+
{
|
848
|
+
size_t i;
|
849
|
+
|
850
|
+
ctx->t[0] += ctx->c;
|
851
|
+
if (ctx->t[0] < ctx->c)
|
852
|
+
ctx->t[1]++;
|
853
|
+
|
854
|
+
while (ctx->c < 128)
|
855
|
+
ctx->b[ctx->c++] = 0;
|
856
|
+
blake2b_compress(ctx, 1);
|
857
|
+
|
858
|
+
for (i = 0; i < ctx->outlen; i++) {
|
859
|
+
((uint8_t *) out)[i] =
|
860
|
+
(ctx->h[i >> 3] >> (8 * (i & 7))) & 0xFF;
|
861
|
+
}
|
862
|
+
}
|
863
|
+
|
864
|
+
static int blake2b_init(blake2b_ctx *ctx, size_t outlen,
|
865
|
+
const void *key, size_t keylen)
|
866
|
+
{
|
867
|
+
size_t i;
|
868
|
+
|
869
|
+
if (outlen == 0 || outlen > 64 || keylen > 64)
|
870
|
+
return -1;
|
871
|
+
|
872
|
+
for (i = 0; i < 8; i++)
|
873
|
+
ctx->h[i] = blake2b_iv[i];
|
874
|
+
ctx->h[0] ^= 0x01010000 ^ (keylen << 8) ^ outlen;
|
875
|
+
|
876
|
+
ctx->t[0] = 0;
|
877
|
+
ctx->t[1] = 0;
|
878
|
+
ctx->c = 0;
|
879
|
+
ctx->outlen = outlen;
|
880
|
+
|
881
|
+
for (i = keylen; i < 128; i++)
|
882
|
+
ctx->b[i] = 0;
|
883
|
+
if (keylen > 0) {
|
884
|
+
blake2b_update(ctx, key, keylen);
|
885
|
+
ctx->c = 128;
|
886
|
+
}
|
887
|
+
|
888
|
+
return 0;
|
889
|
+
}
|
890
|
+
|
891
|
+
static int blake2b(void *out, size_t outlen,
|
892
|
+
const void *key, size_t keylen,
|
893
|
+
const void *in, size_t inlen)
|
894
|
+
{
|
895
|
+
blake2b_ctx ctx;
|
896
|
+
|
897
|
+
if (blake2b_init(&ctx, outlen, key, keylen))
|
898
|
+
return -1;
|
899
|
+
blake2b_update(&ctx, in, inlen);
|
900
|
+
blake2b_final(&ctx, out);
|
901
|
+
|
902
|
+
return 0;
|
903
|
+
}
|
904
|
+
|
905
|
+
/* ------------------------------------------------------------------------- */
|
906
|
+
/* aez mapping for CAESAR competition */
|
907
|
+
|
908
|
+
int crypto_aead_encrypt(
|
909
|
+
unsigned char *c,unsigned long long *clen,
|
910
|
+
const unsigned char *m,unsigned long long mlen,
|
911
|
+
const unsigned char *ad,unsigned long long adlen,
|
912
|
+
const unsigned char *nsec,
|
913
|
+
const unsigned char *npub,
|
914
|
+
const unsigned char *k
|
915
|
+
)
|
916
|
+
{
|
917
|
+
aez_ctx_t ctx;
|
918
|
+
(void)nsec;
|
919
|
+
if (clen) *clen = mlen+16;
|
920
|
+
aez_setup((unsigned char *)k, 48, &ctx);
|
921
|
+
aez_encrypt(&ctx, (char *)npub, 12,
|
922
|
+
(char *)ad, (unsigned)adlen, 16,
|
923
|
+
(char *)m, (unsigned)mlen, (char *)c);
|
924
|
+
return 0;
|
925
|
+
}
|
926
|
+
|
927
|
+
int crypto_aead_decrypt(
|
928
|
+
unsigned char *m,unsigned long long *mlen,
|
929
|
+
unsigned char *nsec,
|
930
|
+
const unsigned char *c,unsigned long long clen,
|
931
|
+
const unsigned char *ad,unsigned long long adlen,
|
932
|
+
const unsigned char *npub,
|
933
|
+
const unsigned char *k
|
934
|
+
)
|
935
|
+
{
|
936
|
+
aez_ctx_t ctx;
|
937
|
+
(void)nsec;
|
938
|
+
if (mlen) *mlen = clen-16;
|
939
|
+
aez_setup((unsigned char *)k, 48, &ctx);
|
940
|
+
return aez_decrypt(&ctx, (char *)npub, 12,
|
941
|
+
(char *)ad, (unsigned)adlen, 16,
|
942
|
+
(char *)c, (unsigned)clen, (char *)m);
|
943
|
+
}
|