zweifische 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,15 @@
1
+ require "mkmf"
2
+
3
+ cpp_include "assert.h"
4
+
5
+ abort "missing calloc()" unless have_func "calloc"
6
+ abort "missing free()" unless have_func "free"
7
+ abort "missing memcpy()" unless have_func "memcpy"
8
+
9
+ have_header "assert.h"
10
+
11
+ $objs = ["twofish.o", "zweifische.o"]
12
+
13
+ with_cflags(" -std=c99 -Ofast -pedantic -pedantic-errors -Wall -Werror -Wno-error=attributes ") do
14
+ create_makefile "zweifische/zweifische"
15
+ end
@@ -0,0 +1,148 @@
1
+ #define u8 unsigned char
2
+ u8 RS[4][8] = {
3
+ { 0x01, 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, },
4
+ { 0xA4, 0x56, 0x82, 0xF3, 0x1E, 0xC6, 0x68, 0xE5, },
5
+ { 0x02, 0xA1, 0xFC, 0xC1, 0x47, 0xAE, 0x3D, 0x19, },
6
+ { 0xA4, 0x55, 0x87, 0x5A, 0x58, 0xDB, 0x9E, 0x03, },
7
+ };
8
+
9
+ u8 Q0[] = {
10
+ 0xA9, 0x67, 0xB3, 0xE8, 0x04, 0xFD, 0xA3, 0x76,
11
+ 0x9A, 0x92, 0x80, 0x78, 0xE4, 0xDD, 0xD1, 0x38,
12
+ 0x0D, 0xC6, 0x35, 0x98, 0x18, 0xF7, 0xEC, 0x6C,
13
+ 0x43, 0x75, 0x37, 0x26, 0xFA, 0x13, 0x94, 0x48,
14
+ 0xF2, 0xD0, 0x8B, 0x30, 0x84, 0x54, 0xDF, 0x23,
15
+ 0x19, 0x5B, 0x3D, 0x59, 0xF3, 0xAE, 0xA2, 0x82,
16
+ 0x63, 0x01, 0x83, 0x2E, 0xD9, 0x51, 0x9B, 0x7C,
17
+ 0xA6, 0xEB, 0xA5, 0xBE, 0x16, 0x0C, 0xE3, 0x61,
18
+ 0xC0, 0x8C, 0x3A, 0xF5, 0x73, 0x2C, 0x25, 0x0B,
19
+ 0xBB, 0x4E, 0x89, 0x6B, 0x53, 0x6A, 0xB4, 0xF1,
20
+ 0xE1, 0xE6, 0xBD, 0x45, 0xE2, 0xF4, 0xB6, 0x66,
21
+ 0xCC, 0x95, 0x03, 0x56, 0xD4, 0x1C, 0x1E, 0xD7,
22
+ 0xFB, 0xC3, 0x8E, 0xB5, 0xE9, 0xCF, 0xBF, 0xBA,
23
+ 0xEA, 0x77, 0x39, 0xAF, 0x33, 0xC9, 0x62, 0x71,
24
+ 0x81, 0x79, 0x09, 0xAD, 0x24, 0xCD, 0xF9, 0xD8,
25
+ 0xE5, 0xC5, 0xB9, 0x4D, 0x44, 0x08, 0x86, 0xE7,
26
+ 0xA1, 0x1D, 0xAA, 0xED, 0x06, 0x70, 0xB2, 0xD2,
27
+ 0x41, 0x7B, 0xA0, 0x11, 0x31, 0xC2, 0x27, 0x90,
28
+ 0x20, 0xF6, 0x60, 0xFF, 0x96, 0x5C, 0xB1, 0xAB,
29
+ 0x9E, 0x9C, 0x52, 0x1B, 0x5F, 0x93, 0x0A, 0xEF,
30
+ 0x91, 0x85, 0x49, 0xEE, 0x2D, 0x4F, 0x8F, 0x3B,
31
+ 0x47, 0x87, 0x6D, 0x46, 0xD6, 0x3E, 0x69, 0x64,
32
+ 0x2A, 0xCE, 0xCB, 0x2F, 0xFC, 0x97, 0x05, 0x7A,
33
+ 0xAC, 0x7F, 0xD5, 0x1A, 0x4B, 0x0E, 0xA7, 0x5A,
34
+ 0x28, 0x14, 0x3F, 0x29, 0x88, 0x3C, 0x4C, 0x02,
35
+ 0xB8, 0xDA, 0xB0, 0x17, 0x55, 0x1F, 0x8A, 0x7D,
36
+ 0x57, 0xC7, 0x8D, 0x74, 0xB7, 0xC4, 0x9F, 0x72,
37
+ 0x7E, 0x15, 0x22, 0x12, 0x58, 0x07, 0x99, 0x34,
38
+ 0x6E, 0x50, 0xDE, 0x68, 0x65, 0xBC, 0xDB, 0xF8,
39
+ 0xC8, 0xA8, 0x2B, 0x40, 0xDC, 0xFE, 0x32, 0xA4,
40
+ 0xCA, 0x10, 0x21, 0xF0, 0xD3, 0x5D, 0x0F, 0x00,
41
+ 0x6F, 0x9D, 0x36, 0x42, 0x4A, 0x5E, 0xC1, 0xE0,
42
+ };
43
+
44
+ u8 Q1[] = {
45
+ 0x75, 0xF3, 0xC6, 0xF4, 0xDB, 0x7B, 0xFB, 0xC8,
46
+ 0x4A, 0xD3, 0xE6, 0x6B, 0x45, 0x7D, 0xE8, 0x4B,
47
+ 0xD6, 0x32, 0xD8, 0xFD, 0x37, 0x71, 0xF1, 0xE1,
48
+ 0x30, 0x0F, 0xF8, 0x1B, 0x87, 0xFA, 0x06, 0x3F,
49
+ 0x5E, 0xBA, 0xAE, 0x5B, 0x8A, 0x00, 0xBC, 0x9D,
50
+ 0x6D, 0xC1, 0xB1, 0x0E, 0x80, 0x5D, 0xD2, 0xD5,
51
+ 0xA0, 0x84, 0x07, 0x14, 0xB5, 0x90, 0x2C, 0xA3,
52
+ 0xB2, 0x73, 0x4C, 0x54, 0x92, 0x74, 0x36, 0x51,
53
+ 0x38, 0xB0, 0xBD, 0x5A, 0xFC, 0x60, 0x62, 0x96,
54
+ 0x6C, 0x42, 0xF7, 0x10, 0x7C, 0x28, 0x27, 0x8C,
55
+ 0x13, 0x95, 0x9C, 0xC7, 0x24, 0x46, 0x3B, 0x70,
56
+ 0xCA, 0xE3, 0x85, 0xCB, 0x11, 0xD0, 0x93, 0xB8,
57
+ 0xA6, 0x83, 0x20, 0xFF, 0x9F, 0x77, 0xC3, 0xCC,
58
+ 0x03, 0x6F, 0x08, 0xBF, 0x40, 0xE7, 0x2B, 0xE2,
59
+ 0x79, 0x0C, 0xAA, 0x82, 0x41, 0x3A, 0xEA, 0xB9,
60
+ 0xE4, 0x9A, 0xA4, 0x97, 0x7E, 0xDA, 0x7A, 0x17,
61
+ 0x66, 0x94, 0xA1, 0x1D, 0x3D, 0xF0, 0xDE, 0xB3,
62
+ 0x0B, 0x72, 0xA7, 0x1C, 0xEF, 0xD1, 0x53, 0x3E,
63
+ 0x8F, 0x33, 0x26, 0x5F, 0xEC, 0x76, 0x2A, 0x49,
64
+ 0x81, 0x88, 0xEE, 0x21, 0xC4, 0x1A, 0xEB, 0xD9,
65
+ 0xC5, 0x39, 0x99, 0xCD, 0xAD, 0x31, 0x8B, 0x01,
66
+ 0x18, 0x23, 0xDD, 0x1F, 0x4E, 0x2D, 0xF9, 0x48,
67
+ 0x4F, 0xF2, 0x65, 0x8E, 0x78, 0x5C, 0x58, 0x19,
68
+ 0x8D, 0xE5, 0x98, 0x57, 0x67, 0x7F, 0x05, 0x64,
69
+ 0xAF, 0x63, 0xB6, 0xFE, 0xF5, 0xB7, 0x3C, 0xA5,
70
+ 0xCE, 0xE9, 0x68, 0x44, 0xE0, 0x4D, 0x43, 0x69,
71
+ 0x29, 0x2E, 0xAC, 0x15, 0x59, 0xA8, 0x0A, 0x9E,
72
+ 0x6E, 0x47, 0xDF, 0x34, 0x35, 0x6A, 0xCF, 0xDC,
73
+ 0x22, 0xC9, 0xC0, 0x9B, 0x89, 0xD4, 0xED, 0xAB,
74
+ 0x12, 0xA2, 0x0D, 0x52, 0xBB, 0x02, 0x2F, 0xA9,
75
+ 0xD7, 0x61, 0x1E, 0xB4, 0x50, 0x04, 0xF6, 0xC2,
76
+ 0x16, 0x25, 0x86, 0x56, 0x55, 0x09, 0xBE, 0x91,
77
+ };
78
+
79
+ u8 mult5B[] = {
80
+ 0x00, 0x5B, 0xB6, 0xED, 0x05, 0x5E, 0xB3, 0xE8,
81
+ 0x0A, 0x51, 0xBC, 0xE7, 0x0F, 0x54, 0xB9, 0xE2,
82
+ 0x14, 0x4F, 0xA2, 0xF9, 0x11, 0x4A, 0xA7, 0xFC,
83
+ 0x1E, 0x45, 0xA8, 0xF3, 0x1B, 0x40, 0xAD, 0xF6,
84
+ 0x28, 0x73, 0x9E, 0xC5, 0x2D, 0x76, 0x9B, 0xC0,
85
+ 0x22, 0x79, 0x94, 0xCF, 0x27, 0x7C, 0x91, 0xCA,
86
+ 0x3C, 0x67, 0x8A, 0xD1, 0x39, 0x62, 0x8F, 0xD4,
87
+ 0x36, 0x6D, 0x80, 0xDB, 0x33, 0x68, 0x85, 0xDE,
88
+ 0x50, 0x0B, 0xE6, 0xBD, 0x55, 0x0E, 0xE3, 0xB8,
89
+ 0x5A, 0x01, 0xEC, 0xB7, 0x5F, 0x04, 0xE9, 0xB2,
90
+ 0x44, 0x1F, 0xF2, 0xA9, 0x41, 0x1A, 0xF7, 0xAC,
91
+ 0x4E, 0x15, 0xF8, 0xA3, 0x4B, 0x10, 0xFD, 0xA6,
92
+ 0x78, 0x23, 0xCE, 0x95, 0x7D, 0x26, 0xCB, 0x90,
93
+ 0x72, 0x29, 0xC4, 0x9F, 0x77, 0x2C, 0xC1, 0x9A,
94
+ 0x6C, 0x37, 0xDA, 0x81, 0x69, 0x32, 0xDF, 0x84,
95
+ 0x66, 0x3D, 0xD0, 0x8B, 0x63, 0x38, 0xD5, 0x8E,
96
+ 0xA0, 0xFB, 0x16, 0x4D, 0xA5, 0xFE, 0x13, 0x48,
97
+ 0xAA, 0xF1, 0x1C, 0x47, 0xAF, 0xF4, 0x19, 0x42,
98
+ 0xB4, 0xEF, 0x02, 0x59, 0xB1, 0xEA, 0x07, 0x5C,
99
+ 0xBE, 0xE5, 0x08, 0x53, 0xBB, 0xE0, 0x0D, 0x56,
100
+ 0x88, 0xD3, 0x3E, 0x65, 0x8D, 0xD6, 0x3B, 0x60,
101
+ 0x82, 0xD9, 0x34, 0x6F, 0x87, 0xDC, 0x31, 0x6A,
102
+ 0x9C, 0xC7, 0x2A, 0x71, 0x99, 0xC2, 0x2F, 0x74,
103
+ 0x96, 0xCD, 0x20, 0x7B, 0x93, 0xC8, 0x25, 0x7E,
104
+ 0xF0, 0xAB, 0x46, 0x1D, 0xF5, 0xAE, 0x43, 0x18,
105
+ 0xFA, 0xA1, 0x4C, 0x17, 0xFF, 0xA4, 0x49, 0x12,
106
+ 0xE4, 0xBF, 0x52, 0x09, 0xE1, 0xBA, 0x57, 0x0C,
107
+ 0xEE, 0xB5, 0x58, 0x03, 0xEB, 0xB0, 0x5D, 0x06,
108
+ 0xD8, 0x83, 0x6E, 0x35, 0xDD, 0x86, 0x6B, 0x30,
109
+ 0xD2, 0x89, 0x64, 0x3F, 0xD7, 0x8C, 0x61, 0x3A,
110
+ 0xCC, 0x97, 0x7A, 0x21, 0xC9, 0x92, 0x7F, 0x24,
111
+ 0xC6, 0x9D, 0x70, 0x2B, 0xC3, 0x98, 0x75, 0x2E,
112
+ };
113
+
114
+ u8 multEF[] = {
115
+ 0x00, 0xEF, 0xB7, 0x58, 0x07, 0xE8, 0xB0, 0x5F,
116
+ 0x0E, 0xE1, 0xB9, 0x56, 0x09, 0xE6, 0xBE, 0x51,
117
+ 0x1C, 0xF3, 0xAB, 0x44, 0x1B, 0xF4, 0xAC, 0x43,
118
+ 0x12, 0xFD, 0xA5, 0x4A, 0x15, 0xFA, 0xA2, 0x4D,
119
+ 0x38, 0xD7, 0x8F, 0x60, 0x3F, 0xD0, 0x88, 0x67,
120
+ 0x36, 0xD9, 0x81, 0x6E, 0x31, 0xDE, 0x86, 0x69,
121
+ 0x24, 0xCB, 0x93, 0x7C, 0x23, 0xCC, 0x94, 0x7B,
122
+ 0x2A, 0xC5, 0x9D, 0x72, 0x2D, 0xC2, 0x9A, 0x75,
123
+ 0x70, 0x9F, 0xC7, 0x28, 0x77, 0x98, 0xC0, 0x2F,
124
+ 0x7E, 0x91, 0xC9, 0x26, 0x79, 0x96, 0xCE, 0x21,
125
+ 0x6C, 0x83, 0xDB, 0x34, 0x6B, 0x84, 0xDC, 0x33,
126
+ 0x62, 0x8D, 0xD5, 0x3A, 0x65, 0x8A, 0xD2, 0x3D,
127
+ 0x48, 0xA7, 0xFF, 0x10, 0x4F, 0xA0, 0xF8, 0x17,
128
+ 0x46, 0xA9, 0xF1, 0x1E, 0x41, 0xAE, 0xF6, 0x19,
129
+ 0x54, 0xBB, 0xE3, 0x0C, 0x53, 0xBC, 0xE4, 0x0B,
130
+ 0x5A, 0xB5, 0xED, 0x02, 0x5D, 0xB2, 0xEA, 0x05,
131
+ 0xE0, 0x0F, 0x57, 0xB8, 0xE7, 0x08, 0x50, 0xBF,
132
+ 0xEE, 0x01, 0x59, 0xB6, 0xE9, 0x06, 0x5E, 0xB1,
133
+ 0xFC, 0x13, 0x4B, 0xA4, 0xFB, 0x14, 0x4C, 0xA3,
134
+ 0xF2, 0x1D, 0x45, 0xAA, 0xF5, 0x1A, 0x42, 0xAD,
135
+ 0xD8, 0x37, 0x6F, 0x80, 0xDF, 0x30, 0x68, 0x87,
136
+ 0xD6, 0x39, 0x61, 0x8E, 0xD1, 0x3E, 0x66, 0x89,
137
+ 0xC4, 0x2B, 0x73, 0x9C, 0xC3, 0x2C, 0x74, 0x9B,
138
+ 0xCA, 0x25, 0x7D, 0x92, 0xCD, 0x22, 0x7A, 0x95,
139
+ 0x90, 0x7F, 0x27, 0xC8, 0x97, 0x78, 0x20, 0xCF,
140
+ 0x9E, 0x71, 0x29, 0xC6, 0x99, 0x76, 0x2E, 0xC1,
141
+ 0x8C, 0x63, 0x3B, 0xD4, 0x8B, 0x64, 0x3C, 0xD3,
142
+ 0x82, 0x6D, 0x35, 0xDA, 0x85, 0x6A, 0x32, 0xDD,
143
+ 0xA8, 0x47, 0x1F, 0xF0, 0xAF, 0x40, 0x18, 0xF7,
144
+ 0xA6, 0x49, 0x11, 0xFE, 0xA1, 0x4E, 0x16, 0xF9,
145
+ 0xB4, 0x5B, 0x03, 0xEC, 0xB3, 0x5C, 0x04, 0xEB,
146
+ 0xBA, 0x55, 0x0D, 0xE2, 0xBD, 0x52, 0x0A, 0xE5,
147
+ };
148
+
@@ -0,0 +1,602 @@
1
+ /*
2
+ compiler is gcc(egcs-2.91.66)
3
+ flags are -O3 -fomit-frame-pointer -Wall
4
+ Processor is 233Mhz Pentium II (Deschutes)
5
+ OS is Linux 2.2.16
6
+
7
+ Max encryption speed I've seen (in mulit-user mode even, although single
8
+ user mode probably won't save more than a couple clocks):
9
+
10
+ encs/sec = 506115.904591
11
+ bytes/sec = 8097854.473457
12
+ KB/sec = 7908.061009
13
+ MB/sec = 7.722716
14
+ approx clocks/enc (for 233Mhz) = 461.027466
15
+
16
+ I easily beat the best C implementations (the best being MSC @ 600 clocks),
17
+ so the target is the assembly implementations...
18
+
19
+ according to twofish docs, fully tuned *assembly* (in clocks):
20
+ compiled is 285 (shouldn't be able to do this) (12.5 MB/sec)
21
+ full keyed is 315 (if I get to 460, maybe this is possible but
22
+ I *really* doubt it) (11.3 MB/sec)
23
+ partially keyed is 460 (I'm *really* close) (7.7 MB/sec)
24
+ minimal keying is 720 (I've beat this -their C did too) (4.9 MB/sec)
25
+
26
+ */
27
+
28
+ #include <stdlib.h>
29
+ #include <string.h>
30
+ #include <assert.h>
31
+
32
+ #include "twofish.h"
33
+ #include "tables.h"
34
+
35
+ #define RS_MOD 0x14D
36
+ #define RHO 0x01010101L
37
+
38
+ /*
39
+ gcc is smart enough to convert these to roll instructions. If you want
40
+ to see for yourself, either do gcc -O3 -S, or change the |'s to +'s and
41
+ see how slow things get (you lose about 30-50 clocks) :).
42
+ */
43
+ #define ROL(x,n) (((x) << ((n) & 0x1F)) | ((x) >> (32-((n) & 0x1F))))
44
+ #define ROR(x,n) (((x) >> ((n) & 0x1F)) | ((x) << (32-((n) & 0x1F))))
45
+
46
+ #if BIG_ENDIAN == 1
47
+ #define BSWAP(x) (((ROR(x,8) & 0xFF00FF00) | (ROL(x,8) & 0x00FF00FF)))
48
+ #else
49
+ #define BSWAP(x) (x)
50
+ #endif
51
+
52
+ #define _b(x, N) (((x) >> (N*8)) & 0xFF)
53
+
54
+ /* just casting to byte (instead of masking with 0xFF saves *tons* of clocks
55
+ (around 50) */
56
+ #define b0(x) ((BYTE)(x))
57
+ /* this saved 10 clocks */
58
+ #define b1(x) ((BYTE)((x) >> 8))
59
+ /* use byte cast here saves around 10 clocks */
60
+ #define b2(x) (BYTE)((x) >> 16)
61
+ /* don't need to mask since all bits are in lower 8 - byte cast here saves
62
+ nothing, but hey, what the hell, it doesn't hurt any */
63
+ #define b3(x) (BYTE)((x) >> 24)
64
+
65
+ #define BYTEARRAY_TO_U32(r) ((r[0] << 24) ^ (r[1] << 16) ^ (r[2] << 8) ^ r[3])
66
+ #define BYTES_TO_U32(r0, r1, r2, r3) ((r0 << 24) ^ (r1 << 16) ^ (r2 << 8) ^ r3)
67
+
68
+ /*
69
+ multiply two polynomials represented as u32's, actually called with BYTES,
70
+ but since I'm not really going to too much work to optimize key setup (since
71
+ raw encryption speed is what I'm after), big deal.
72
+ */
73
+ u32 polyMult(u32 a, u32 b)
74
+ {
75
+ u32 t=0;
76
+ while (a)
77
+ {
78
+ /*printf("A=%X B=%X T=%X\n", a, b, t);*/
79
+ if (a&1) t^=b;
80
+ b <<= 1;
81
+ a >>= 1;
82
+ }
83
+ return t;
84
+ }
85
+
86
+ /* take the polynomial t and return the t % modulus in GF(256) */
87
+ u32 gfMod(u32 t, u32 modulus)
88
+ {
89
+ int i;
90
+ u32 tt;
91
+
92
+ modulus <<= 7;
93
+ for (i = 0; i < 8; i++)
94
+ {
95
+ tt = t ^ modulus;
96
+ if (tt < t) t = tt;
97
+ modulus >>= 1;
98
+ }
99
+ return t;
100
+ }
101
+
102
+ /*multiply a and b and return the modulus */
103
+ #define gfMult(a, b, modulus) gfMod(polyMult(a, b), modulus)
104
+
105
+ /* return a u32 containing the result of multiplying the RS Code matrix
106
+ by the sd matrix
107
+ */
108
+ u32 RSMatrixMultiply(BYTE sd[8])
109
+ {
110
+ int j, k;
111
+ BYTE t;
112
+ BYTE result[4];
113
+
114
+ for (j = 0; j < 4; j++)
115
+ {
116
+ t = 0;
117
+ for (k = 0; k < 8; k++)
118
+ {
119
+ /*printf("t=%X %X\n", t, gfMult(RS[j][k], sd[k], RS_MOD));*/
120
+ t ^= gfMult(RS[j][k], sd[k], RS_MOD);
121
+ }
122
+ result[3-j] = t;
123
+ }
124
+ return BYTEARRAY_TO_U32(result);
125
+ }
126
+
127
+ /* the Zero-keyed h function (used by the key setup routine) */
128
+ u32 h(u32 X, u32 L[4], int k)
129
+ {
130
+ BYTE y0, y1, y2, y3;
131
+ BYTE z0, z1, z2, z3;
132
+ y0 = b0(X);
133
+ y1 = b1(X);
134
+ y2 = b2(X);
135
+ y3 = b3(X);
136
+
137
+ switch(k)
138
+ {
139
+ case 4:
140
+ y0 = Q1[y0] ^ b0(L[3]);
141
+ y1 = Q0[y1] ^ b1(L[3]);
142
+ y2 = Q0[y2] ^ b2(L[3]);
143
+ y3 = Q1[y3] ^ b3(L[3]);
144
+ case 3:
145
+ y0 = Q1[y0] ^ b0(L[2]);
146
+ y1 = Q1[y1] ^ b1(L[2]);
147
+ y2 = Q0[y2] ^ b2(L[2]);
148
+ y3 = Q0[y3] ^ b3(L[2]);
149
+ case 2:
150
+ y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ];
151
+ y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ];
152
+ y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ];
153
+ y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ];
154
+ }
155
+
156
+ /* inline the MDS matrix multiply */
157
+ z0 = multEF[y0] ^ y1 ^ multEF[y2] ^ mult5B[y3];
158
+ z1 = multEF[y0] ^ mult5B[y1] ^ y2 ^ multEF[y3];
159
+ z2 = mult5B[y0] ^ multEF[y1] ^ multEF[y2] ^ y3;
160
+ z3 = y0 ^ multEF[y1] ^ mult5B[y2] ^ mult5B[y3];
161
+
162
+ return BYTES_TO_U32(z0, z1, z2, z3);
163
+ }
164
+
165
+ /* given the Sbox keys, create the fully keyed QF */
166
+ void fullKey(u32 L[4], int k, u32 QF[4][256])
167
+ {
168
+ BYTE y0, y1, y2, y3;
169
+
170
+ int i;
171
+
172
+ /* for all input values to the Q permutations */
173
+ for (i=0; i<256; i++)
174
+ {
175
+ /* run the Q permutations */
176
+ y0 = i; y1=i; y2=i; y3=i;
177
+ switch(k)
178
+ {
179
+ case 4:
180
+ y0 = Q1[y0] ^ b0(L[3]);
181
+ y1 = Q0[y1] ^ b1(L[3]);
182
+ y2 = Q0[y2] ^ b2(L[3]);
183
+ y3 = Q1[y3] ^ b3(L[3]);
184
+ case 3:
185
+ y0 = Q1[y0] ^ b0(L[2]);
186
+ y1 = Q1[y1] ^ b1(L[2]);
187
+ y2 = Q0[y2] ^ b2(L[2]);
188
+ y3 = Q0[y3] ^ b3(L[2]);
189
+ case 2:
190
+ y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ];
191
+ y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ];
192
+ y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ];
193
+ y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ];
194
+ }
195
+
196
+ /* now do the partial MDS matrix multiplies */
197
+ QF[0][i] = ((multEF[y0] << 24)
198
+ | (multEF[y0] << 16)
199
+ | (mult5B[y0] << 8)
200
+ | y0);
201
+ QF[1][i] = ((y1 << 24)
202
+ | (mult5B[y1] << 16)
203
+ | (multEF[y1] << 8)
204
+ | multEF[y1]);
205
+ QF[2][i] = ((multEF[y2] << 24)
206
+ | (y2 << 16)
207
+ | (multEF[y2] << 8)
208
+ | mult5B[y2]);
209
+ QF[3][i] = ((mult5B[y3] << 24)
210
+ | (multEF[y3] << 16)
211
+ | (y3 << 8)
212
+ | mult5B[y3]);
213
+ }
214
+ }
215
+
216
+ /* fully keyed h (aka g) function */
217
+ #define fkh(X) (S[0][b0(X)]^S[1][b1(X)]^S[2][b2(X)]^S[3][b3(X)])
218
+
219
+ /* one encryption round */
220
+ #define ENC_ROUND(R0, R1, R2, R3, round) \
221
+ T0 = fkh(R0); \
222
+ T1 = fkh(ROL(R1, 8)); \
223
+ R2 = ROR(R2 ^ (T1 + T0 + K[2*round+8]), 1); \
224
+ R3 = ROL(R3, 1) ^ (2*T1 + T0 + K[2*round+9]);
225
+
226
+ void twofish_internal_encrypt(u32 K[40], u32 S[4][256], BYTE PT[16])
227
+ {
228
+ u32 R0, R1, R2, R3;
229
+ u32 T0, T1;
230
+
231
+ /* load/byteswap/whiten input */
232
+ R3 = K[3] ^ BSWAP(((u32*)PT)[3]);
233
+ R2 = K[2] ^ BSWAP(((u32*)PT)[2]);
234
+ R1 = K[1] ^ BSWAP(((u32*)PT)[1]);
235
+ R0 = K[0] ^ BSWAP(((u32*)PT)[0]);
236
+
237
+ ENC_ROUND(R0, R1, R2, R3, 0);
238
+ ENC_ROUND(R2, R3, R0, R1, 1);
239
+ ENC_ROUND(R0, R1, R2, R3, 2);
240
+ ENC_ROUND(R2, R3, R0, R1, 3);
241
+ ENC_ROUND(R0, R1, R2, R3, 4);
242
+ ENC_ROUND(R2, R3, R0, R1, 5);
243
+ ENC_ROUND(R0, R1, R2, R3, 6);
244
+ ENC_ROUND(R2, R3, R0, R1, 7);
245
+ ENC_ROUND(R0, R1, R2, R3, 8);
246
+ ENC_ROUND(R2, R3, R0, R1, 9);
247
+ ENC_ROUND(R0, R1, R2, R3, 10);
248
+ ENC_ROUND(R2, R3, R0, R1, 11);
249
+ ENC_ROUND(R0, R1, R2, R3, 12);
250
+ ENC_ROUND(R2, R3, R0, R1, 13);
251
+ ENC_ROUND(R0, R1, R2, R3, 14);
252
+ ENC_ROUND(R2, R3, R0, R1, 15);
253
+
254
+ /* load/byteswap/whiten output */
255
+ ((u32*)PT)[3] = BSWAP(R1 ^ K[7]);
256
+ ((u32*)PT)[2] = BSWAP(R0 ^ K[6]);
257
+ ((u32*)PT)[1] = BSWAP(R3 ^ K[5]);
258
+ ((u32*)PT)[0] = BSWAP(R2 ^ K[4]);
259
+ }
260
+
261
+ /* one decryption round */
262
+ #define DEC_ROUND(R0, R1, R2, R3, round) \
263
+ T0 = fkh(R0); \
264
+ T1 = fkh(ROL(R1, 8)); \
265
+ R2 = ROL(R2, 1) ^ (T0 + T1 + K[2*round+8]); \
266
+ R3 = ROR(R3 ^ (T0 + 2*T1 + K[2*round+9]), 1);
267
+
268
+ void twofish_internal_decrypt(u32 K[40], u32 S[4][256], BYTE PT[16])
269
+ {
270
+ u32 T0, T1;
271
+ u32 R0, R1, R2, R3;
272
+
273
+ /* load/byteswap/whiten input */
274
+ R3 = K[7] ^ BSWAP(((u32*)PT)[3]);
275
+ R2 = K[6] ^ BSWAP(((u32*)PT)[2]);
276
+ R1 = K[5] ^ BSWAP(((u32*)PT)[1]);
277
+ R0 = K[4] ^ BSWAP(((u32*)PT)[0]);
278
+
279
+ DEC_ROUND(R0, R1, R2, R3, 15);
280
+ DEC_ROUND(R2, R3, R0, R1, 14);
281
+ DEC_ROUND(R0, R1, R2, R3, 13);
282
+ DEC_ROUND(R2, R3, R0, R1, 12);
283
+ DEC_ROUND(R0, R1, R2, R3, 11);
284
+ DEC_ROUND(R2, R3, R0, R1, 10);
285
+ DEC_ROUND(R0, R1, R2, R3, 9);
286
+ DEC_ROUND(R2, R3, R0, R1, 8);
287
+ DEC_ROUND(R0, R1, R2, R3, 7);
288
+ DEC_ROUND(R2, R3, R0, R1, 6);
289
+ DEC_ROUND(R0, R1, R2, R3, 5);
290
+ DEC_ROUND(R2, R3, R0, R1, 4);
291
+ DEC_ROUND(R0, R1, R2, R3, 3);
292
+ DEC_ROUND(R2, R3, R0, R1, 2);
293
+ DEC_ROUND(R0, R1, R2, R3, 1);
294
+ DEC_ROUND(R2, R3, R0, R1, 0);
295
+
296
+ /* load/byteswap/whiten output */
297
+ ((u32*)PT)[3] = BSWAP(R1 ^ K[3]);
298
+ ((u32*)PT)[2] = BSWAP(R0 ^ K[2]);
299
+ ((u32*)PT)[1] = BSWAP(R3 ^ K[1]);
300
+ ((u32*)PT)[0] = BSWAP(R2 ^ K[0]);
301
+
302
+ }
303
+
304
+ /* the key schedule routine */
305
+ void keySched(BYTE M[], int N, u32 **S, u32 K[40], int *k)
306
+ {
307
+ u32 Mo[4], Me[4];
308
+ int i, j;
309
+ BYTE vector[8];
310
+ u32 A, B;
311
+
312
+ *k = (N + 63) / 64;
313
+ *S = (u32*)malloc(sizeof(u32) * (*k));
314
+
315
+ for (i = 0; i < *k; i++)
316
+ {
317
+ Me[i] = BSWAP(((u32*)M)[2*i]);
318
+ Mo[i] = BSWAP(((u32*)M)[2*i+1]);
319
+ }
320
+
321
+ for (i = 0; i < *k; i++)
322
+ {
323
+ for (j = 0; j < 4; j++) vector[j] = _b(Me[i], j);
324
+ for (j = 0; j < 4; j++) vector[j+4] = _b(Mo[i], j);
325
+ (*S)[(*k)-i-1] = RSMatrixMultiply(vector);
326
+ }
327
+ for (i = 0; i < 20; i++)
328
+ {
329
+ A = h(2*i*RHO, Me, *k);
330
+ B = ROL(h(2*i*RHO + RHO, Mo, *k), 8);
331
+ K[2*i] = A+B;
332
+ K[2*i+1] = ROL(A + 2*B, 9);
333
+ }
334
+ }
335
+
336
+ enum twofish_mode {
337
+ twofish_mode_ecb,
338
+ twofish_mode_cbc
339
+ };
340
+
341
+ struct twofish {
342
+ int N;
343
+ u32 K[40];
344
+ u32 QF[4][256];
345
+ BYTE epv[16];
346
+ BYTE dpv[16];
347
+ u32 nrest;
348
+ BYTE rest[16];
349
+ enum twofish_mode mode;
350
+ };
351
+
352
+ #define fix_xor(target, source) for (int _i = 0; _i < 16; _i++) { (target)[_i] = (target)[_i] ^ (source)[_i]; }
353
+
354
+ #define TWOFISH_INIT(key, len) { \
355
+ int k; \
356
+ u32 *S; \
357
+ struct twofish *twofish_ctx; \
358
+ \
359
+ twofish_ctx = calloc(1, sizeof(struct twofish)); \
360
+ assert(twofish_ctx); \
361
+ \
362
+ twofish_ctx->N = len; \
363
+ keySched(key, len, &S, twofish_ctx->K, &k); \
364
+ fullKey(S, k, twofish_ctx->QF); \
365
+ free(S); \
366
+ \
367
+ return twofish_ctx; \
368
+ }
369
+
370
+ #define TWOFISH_ECB_INIT(key, len) { \
371
+ struct twofish *twofish_ctx; \
372
+ \
373
+ twofish_ctx = twofish_##len##_init(key); \
374
+ twofish_ctx->mode = twofish_mode_ecb; \
375
+ return twofish_ctx; \
376
+ }
377
+
378
+
379
+ #define TWOFISH_CBC_INIT(key, iv, len) { \
380
+ struct twofish *twofish_ctx; \
381
+ \
382
+ twofish_ctx = twofish_##len##_init(key); \
383
+ twofish_ctx->mode = twofish_mode_cbc; \
384
+ memcpy(twofish_ctx->epv, iv, 16); \
385
+ memcpy(twofish_ctx->dpv, iv, 16); \
386
+ return twofish_ctx; \
387
+ }
388
+
389
+ /**
390
+ * public API
391
+ **/
392
+
393
+ /**
394
+ * By definition twofish can only accept key up to 256 bit
395
+ * we wont do any checking here and will assume user already
396
+ * know about it. Twofish is undefined for key larger than 256 bit
397
+ */
398
+ struct twofish *twofish_256_init(BYTE key[32]) TWOFISH_INIT(key, 256)
399
+ struct twofish *twofish_192_init(BYTE key[32]) TWOFISH_INIT(key, 192)
400
+ struct twofish *twofish_128_init(BYTE key[32]) TWOFISH_INIT(key, 128)
401
+
402
+ struct twofish *twofish_256_ecb_init(BYTE key[32], BYTE iv[16] /* unused */) TWOFISH_ECB_INIT(key, 256)
403
+ struct twofish *twofish_192_ecb_init(BYTE key[32], BYTE iv[16] /* unused */) TWOFISH_ECB_INIT(key, 192)
404
+ struct twofish *twofish_128_ecb_init(BYTE key[32], BYTE iv[16] /* unused */) TWOFISH_ECB_INIT(key, 128)
405
+
406
+ struct twofish *twofish_256_cbc_init(BYTE key[32], BYTE iv[16]) TWOFISH_CBC_INIT(key, iv, 256)
407
+ struct twofish *twofish_192_cbc_init(BYTE key[32], BYTE iv[16]) TWOFISH_CBC_INIT(key, iv, 192)
408
+ struct twofish *twofish_128_cbc_init(BYTE key[32], BYTE iv[16]) TWOFISH_CBC_INIT(key, iv, 128)
409
+
410
+ void twofish_free(struct twofish **pctx)
411
+ {
412
+ if (*pctx) {
413
+ free(*pctx);
414
+ }
415
+ *pctx = 0;
416
+ }
417
+
418
+ #define CONCAT_PREV_BUFFER(newbuff, prev, nprev, current, ncurrent) if (nprev) { \
419
+ unsigned int snaplen = ((ncurrent + nprev) % 16) == 0 ? \
420
+ 0 : \
421
+ (16 - ((ncurrent + nprev) % 16)); \
422
+ newbuff = calloc(ncurrent + nprev + snaplen + 1, sizeof(BYTE)); \
423
+ assert(newbuff); \
424
+ memcpy(newbuff, prev, nprev); \
425
+ memcpy(&newbuff[nprev], current, ncurrent); \
426
+ ncurrent = ncurrent + nprev; \
427
+ } \
428
+ else { \
429
+ newbuff = current; \
430
+ }
431
+
432
+ #define ENCRYPT_ECB(ctx, nblock, text, target) for (int i = 0; i < nblock; i++) { \
433
+ memcpy(ctx->epv, &text[i * 16], 16); \
434
+ twofish_internal_encrypt(ctx->K, ctx->QF, ctx->epv); \
435
+ memcpy(&target[i * 16], ctx->epv, 16); \
436
+ }
437
+
438
+ #define ENCRYPT_CBC(ctx, nblock, text, target) for (int i = 0; i < nblock; i++) { \
439
+ fix_xor(ctx->epv, &text[i * 16]); \
440
+ twofish_internal_encrypt(ctx->K, ctx->QF, ctx->epv); \
441
+ memcpy(&target[i * 16], ctx->epv, 16); \
442
+ }
443
+
444
+ #define ENCRYPT_WITH(ctx, nblock, text, crypt) switch (ctx->mode) { \
445
+ case twofish_mode_ecb: \
446
+ ENCRYPT_ECB(ctx, nblock, text, crypt); \
447
+ break; \
448
+ case twofish_mode_cbc: \
449
+ ENCRYPT_CBC(ctx, nblock, text, crypt); \
450
+ break; \
451
+ }
452
+
453
+ int twofish_encrypt_update(
454
+ struct twofish *ctx,
455
+ BYTE plain_text[],
456
+ u32 text_len,
457
+ BYTE crypted_text[],
458
+ u32 crypted_size)
459
+ {
460
+ u32 nblock;
461
+ int lrest;
462
+ BYTE *text = 0;
463
+
464
+ CONCAT_PREV_BUFFER(text, ctx->rest, ctx->nrest, plain_text, text_len);
465
+
466
+ nblock = text_len / 16;
467
+ lrest = text_len % 16;
468
+
469
+ ENCRYPT_WITH(ctx, nblock, text, crypted_text);
470
+
471
+ if (lrest) {
472
+ ctx->nrest = lrest;
473
+ memset(ctx->rest, 0, 16);
474
+ memcpy(ctx->rest, &text[16 * nblock], lrest);
475
+ } else {
476
+ ctx->nrest = 0;
477
+ memset(ctx->rest, 0, 16);
478
+ }
479
+
480
+ if (text && text != plain_text) {
481
+ free(text);
482
+ }
483
+
484
+ return nblock * 16;
485
+ }
486
+
487
+ int twofish_encrypt_final(
488
+ struct twofish *ctx,
489
+ BYTE plain_text[],
490
+ u32 text_len,
491
+ BYTE crypted_text[],
492
+ u32 crypted_size)
493
+ {
494
+ u32 nblock;
495
+ int lrest;
496
+ BYTE *text = 0;
497
+
498
+ CONCAT_PREV_BUFFER(text, ctx->rest, ctx->nrest, plain_text, text_len);
499
+
500
+ nblock = text_len / 16;
501
+ lrest = text_len % 16;
502
+
503
+ if (lrest) {
504
+ if (text && text != plain_text) {
505
+ free(text);
506
+ }
507
+ return lrest - 16;
508
+ }
509
+
510
+ ENCRYPT_WITH(ctx, nblock, text, crypted_text);
511
+
512
+ if (text && text != plain_text) {
513
+ free(text);
514
+ }
515
+
516
+ return nblock * 16;
517
+ }
518
+
519
+ #define DECRYPT_ECB(ctx, nblock, crypted, plain) for (int i = 0; i < nblock; i++) { \
520
+ memcpy(ctx->dpv, &crypted[i * 16], 16); \
521
+ twofish_internal_decrypt(ctx->K, ctx->QF, ctx->dpv); \
522
+ memcpy(&plain[i * 16], ctx->dpv, 16); \
523
+ }
524
+
525
+ #define DECRYPT_CBC(ctx, nblock, crypted, plain) for (int i = 0; i < nblock; i++) { \
526
+ memcpy(&plain[i * 16], &crypted[i * 16], 16); \
527
+ twofish_internal_decrypt(ctx->K, ctx->QF, &plain[i * 16]); \
528
+ fix_xor(&plain[i * 16], ctx->dpv); \
529
+ memcpy(ctx->dpv, &crypted[i * 16], 16); \
530
+ }
531
+
532
+ #define DECRYPT_WITH(ctx, nbloc, crypted, plain) switch (ctx->mode) { \
533
+ case twofish_mode_ecb: \
534
+ DECRYPT_ECB(ctx, nblock, crypted, plain); \
535
+ break; \
536
+ case twofish_mode_cbc: \
537
+ DECRYPT_CBC(ctx, nblock, crypted, plain); \
538
+ break; \
539
+ }
540
+
541
+ int twofish_decrypt_update(
542
+ struct twofish *ctx,
543
+ BYTE crypted_text[],
544
+ u32 crypted_len,
545
+ BYTE plain_text[],
546
+ u32 text_size)
547
+ {
548
+ u32 nblock;
549
+ int lrest;
550
+ BYTE *text = 0;
551
+
552
+ CONCAT_PREV_BUFFER(text, ctx->rest, ctx->nrest, crypted_text, crypted_len);
553
+
554
+ nblock = crypted_len / 16;
555
+ lrest = crypted_len % 16;
556
+
557
+ DECRYPT_WITH(ctx, nblock, text, plain_text);
558
+
559
+ if (lrest) {
560
+ ctx->nrest = lrest;
561
+ memset(ctx->rest, 0, 16);
562
+ memcpy(ctx->rest, &text[nblock * 16], lrest);
563
+ }
564
+
565
+ if (text && text != crypted_text) {
566
+ free(text);
567
+ }
568
+
569
+ return nblock * 16;
570
+ }
571
+
572
+ int twofish_decrypt_final(
573
+ struct twofish *ctx,
574
+ BYTE crypted_text[],
575
+ u32 crypted_len,
576
+ BYTE plain_text[],
577
+ u32 text_size)
578
+ {
579
+ u32 nblock;
580
+ int lrest;
581
+ BYTE *text = 0;
582
+
583
+ CONCAT_PREV_BUFFER(text, ctx->rest, ctx->nrest, crypted_text, crypted_len);
584
+
585
+ nblock = crypted_len / 16;
586
+ lrest = crypted_len % 16;
587
+
588
+ if (lrest) {
589
+ if (text && text != crypted_text) {
590
+ free(text);
591
+ }
592
+ return lrest - 16;
593
+ }
594
+
595
+ DECRYPT_WITH(ctx, nblock, text, plain_text);
596
+
597
+ if (text && text != crypted_text) {
598
+ free(text);
599
+ }
600
+
601
+ return nblock * 16;
602
+ }