x25519 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/.rubocop.yml +3 -0
  4. data/CHANGES.md +3 -0
  5. data/Gemfile +3 -2
  6. data/README.md +205 -14
  7. data/Rakefile +9 -1
  8. data/ext/x25519/cputest.c +68 -0
  9. data/ext/x25519/extconf.rb +31 -0
  10. data/ext/x25519/ref10/api.h +2 -0
  11. data/ext/x25519/ref10/base.c +12 -0
  12. data/ext/x25519/ref10/fe.h +44 -0
  13. data/ext/x25519/ref10/fe_0.c +19 -0
  14. data/ext/x25519/ref10/fe_1.c +19 -0
  15. data/ext/x25519/ref10/fe_add.c +57 -0
  16. data/ext/x25519/ref10/fe_copy.c +29 -0
  17. data/ext/x25519/ref10/fe_cswap.c +73 -0
  18. data/ext/x25519/ref10/fe_frombytes.c +67 -0
  19. data/ext/x25519/ref10/fe_invert.c +14 -0
  20. data/ext/x25519/ref10/fe_mul.c +252 -0
  21. data/ext/x25519/ref10/fe_mul121666.c +69 -0
  22. data/ext/x25519/ref10/fe_sq.c +148 -0
  23. data/ext/x25519/ref10/fe_sub.c +57 -0
  24. data/ext/x25519/ref10/fe_tobytes.c +119 -0
  25. data/ext/x25519/ref10/montgomery.h +140 -0
  26. data/ext/x25519/ref10/pow225521.h +160 -0
  27. data/ext/x25519/ref10/scalarmult.c +46 -0
  28. data/ext/x25519/{fp25519_x64.c → rfc7748_precomputed/fp25519_x64.c} +14 -16
  29. data/ext/x25519/{fp25519_x64.h → rfc7748_precomputed/fp25519_x64.h} +6 -10
  30. data/ext/x25519/{bytes.h → rfc7748_precomputed/rfc7748_precomputed.h} +13 -5
  31. data/ext/x25519/{table_ladder_x25519.h → rfc7748_precomputed/table_ladder_x25519.h} +0 -0
  32. data/ext/x25519/{x25519_x64.c → rfc7748_precomputed/x25519_x64.c} +16 -29
  33. data/ext/x25519/x25519.c +325 -0
  34. data/ext/x25519/x25519.h +24 -0
  35. data/x25519.gemspec +3 -6
  36. metadata +32 -15
  37. data/ext/x25519/bytes.c +0 -42
  38. data/ext/x25519/random.c +0 -51
  39. data/ext/x25519/random.h +0 -24
  40. data/ext/x25519/rfc7748_precompted.h +0 -49
  41. data/ext/x25519/rfc7748_precomputed.c +0 -20
  42. data/lib/x25519.rb +0 -7
  43. data/lib/x25519/version.rb +0 -5
@@ -0,0 +1,160 @@
1
+
2
+ /* qhasm: fe z1 */
3
+
4
+ /* qhasm: fe z2 */
5
+
6
+ /* qhasm: fe z8 */
7
+
8
+ /* qhasm: fe z9 */
9
+
10
+ /* qhasm: fe z11 */
11
+
12
+ /* qhasm: fe z22 */
13
+
14
+ /* qhasm: fe z_5_0 */
15
+
16
+ /* qhasm: fe z_10_5 */
17
+
18
+ /* qhasm: fe z_10_0 */
19
+
20
+ /* qhasm: fe z_20_10 */
21
+
22
+ /* qhasm: fe z_20_0 */
23
+
24
+ /* qhasm: fe z_40_20 */
25
+
26
+ /* qhasm: fe z_40_0 */
27
+
28
+ /* qhasm: fe z_50_10 */
29
+
30
+ /* qhasm: fe z_50_0 */
31
+
32
+ /* qhasm: fe z_100_50 */
33
+
34
+ /* qhasm: fe z_100_0 */
35
+
36
+ /* qhasm: fe z_200_100 */
37
+
38
+ /* qhasm: fe z_200_0 */
39
+
40
+ /* qhasm: fe z_250_50 */
41
+
42
+ /* qhasm: fe z_250_0 */
43
+
44
+ /* qhasm: fe z_255_5 */
45
+
46
+ /* qhasm: fe z_255_21 */
47
+
48
+ /* qhasm: enter pow225521 */
49
+
50
+ /* qhasm: z2 = z1^2^1 */
51
+ /* asm 1: fe_sq(>z2=fe#1,<z1=fe#11); for (i = 1;i < 1;++i) fe_sq(>z2=fe#1,>z2=fe#1); */
52
+ /* asm 2: fe_sq(>z2=t0,<z1=z); for (i = 1;i < 1;++i) fe_sq(>z2=t0,>z2=t0); */
53
+ fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0);
54
+
55
+ /* qhasm: z8 = z2^2^2 */
56
+ /* asm 1: fe_sq(>z8=fe#2,<z2=fe#1); for (i = 1;i < 2;++i) fe_sq(>z8=fe#2,>z8=fe#2); */
57
+ /* asm 2: fe_sq(>z8=t1,<z2=t0); for (i = 1;i < 2;++i) fe_sq(>z8=t1,>z8=t1); */
58
+ fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1);
59
+
60
+ /* qhasm: z9 = z1*z8 */
61
+ /* asm 1: fe_mul(>z9=fe#2,<z1=fe#11,<z8=fe#2); */
62
+ /* asm 2: fe_mul(>z9=t1,<z1=z,<z8=t1); */
63
+ fe_mul(t1,z,t1);
64
+
65
+ /* qhasm: z11 = z2*z9 */
66
+ /* asm 1: fe_mul(>z11=fe#1,<z2=fe#1,<z9=fe#2); */
67
+ /* asm 2: fe_mul(>z11=t0,<z2=t0,<z9=t1); */
68
+ fe_mul(t0,t0,t1);
69
+
70
+ /* qhasm: z22 = z11^2^1 */
71
+ /* asm 1: fe_sq(>z22=fe#3,<z11=fe#1); for (i = 1;i < 1;++i) fe_sq(>z22=fe#3,>z22=fe#3); */
72
+ /* asm 2: fe_sq(>z22=t2,<z11=t0); for (i = 1;i < 1;++i) fe_sq(>z22=t2,>z22=t2); */
73
+ fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2);
74
+
75
+ /* qhasm: z_5_0 = z9*z22 */
76
+ /* asm 1: fe_mul(>z_5_0=fe#2,<z9=fe#2,<z22=fe#3); */
77
+ /* asm 2: fe_mul(>z_5_0=t1,<z9=t1,<z22=t2); */
78
+ fe_mul(t1,t1,t2);
79
+
80
+ /* qhasm: z_10_5 = z_5_0^2^5 */
81
+ /* asm 1: fe_sq(>z_10_5=fe#3,<z_5_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_10_5=fe#3,>z_10_5=fe#3); */
82
+ /* asm 2: fe_sq(>z_10_5=t2,<z_5_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_10_5=t2,>z_10_5=t2); */
83
+ fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2);
84
+
85
+ /* qhasm: z_10_0 = z_10_5*z_5_0 */
86
+ /* asm 1: fe_mul(>z_10_0=fe#2,<z_10_5=fe#3,<z_5_0=fe#2); */
87
+ /* asm 2: fe_mul(>z_10_0=t1,<z_10_5=t2,<z_5_0=t1); */
88
+ fe_mul(t1,t2,t1);
89
+
90
+ /* qhasm: z_20_10 = z_10_0^2^10 */
91
+ /* asm 1: fe_sq(>z_20_10=fe#3,<z_10_0=fe#2); for (i = 1;i < 10;++i) fe_sq(>z_20_10=fe#3,>z_20_10=fe#3); */
92
+ /* asm 2: fe_sq(>z_20_10=t2,<z_10_0=t1); for (i = 1;i < 10;++i) fe_sq(>z_20_10=t2,>z_20_10=t2); */
93
+ fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2);
94
+
95
+ /* qhasm: z_20_0 = z_20_10*z_10_0 */
96
+ /* asm 1: fe_mul(>z_20_0=fe#3,<z_20_10=fe#3,<z_10_0=fe#2); */
97
+ /* asm 2: fe_mul(>z_20_0=t2,<z_20_10=t2,<z_10_0=t1); */
98
+ fe_mul(t2,t2,t1);
99
+
100
+ /* qhasm: z_40_20 = z_20_0^2^20 */
101
+ /* asm 1: fe_sq(>z_40_20=fe#4,<z_20_0=fe#3); for (i = 1;i < 20;++i) fe_sq(>z_40_20=fe#4,>z_40_20=fe#4); */
102
+ /* asm 2: fe_sq(>z_40_20=t3,<z_20_0=t2); for (i = 1;i < 20;++i) fe_sq(>z_40_20=t3,>z_40_20=t3); */
103
+ fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3);
104
+
105
+ /* qhasm: z_40_0 = z_40_20*z_20_0 */
106
+ /* asm 1: fe_mul(>z_40_0=fe#3,<z_40_20=fe#4,<z_20_0=fe#3); */
107
+ /* asm 2: fe_mul(>z_40_0=t2,<z_40_20=t3,<z_20_0=t2); */
108
+ fe_mul(t2,t3,t2);
109
+
110
+ /* qhasm: z_50_10 = z_40_0^2^10 */
111
+ /* asm 1: fe_sq(>z_50_10=fe#3,<z_40_0=fe#3); for (i = 1;i < 10;++i) fe_sq(>z_50_10=fe#3,>z_50_10=fe#3); */
112
+ /* asm 2: fe_sq(>z_50_10=t2,<z_40_0=t2); for (i = 1;i < 10;++i) fe_sq(>z_50_10=t2,>z_50_10=t2); */
113
+ fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2);
114
+
115
+ /* qhasm: z_50_0 = z_50_10*z_10_0 */
116
+ /* asm 1: fe_mul(>z_50_0=fe#2,<z_50_10=fe#3,<z_10_0=fe#2); */
117
+ /* asm 2: fe_mul(>z_50_0=t1,<z_50_10=t2,<z_10_0=t1); */
118
+ fe_mul(t1,t2,t1);
119
+
120
+ /* qhasm: z_100_50 = z_50_0^2^50 */
121
+ /* asm 1: fe_sq(>z_100_50=fe#3,<z_50_0=fe#2); for (i = 1;i < 50;++i) fe_sq(>z_100_50=fe#3,>z_100_50=fe#3); */
122
+ /* asm 2: fe_sq(>z_100_50=t2,<z_50_0=t1); for (i = 1;i < 50;++i) fe_sq(>z_100_50=t2,>z_100_50=t2); */
123
+ fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2);
124
+
125
+ /* qhasm: z_100_0 = z_100_50*z_50_0 */
126
+ /* asm 1: fe_mul(>z_100_0=fe#3,<z_100_50=fe#3,<z_50_0=fe#2); */
127
+ /* asm 2: fe_mul(>z_100_0=t2,<z_100_50=t2,<z_50_0=t1); */
128
+ fe_mul(t2,t2,t1);
129
+
130
+ /* qhasm: z_200_100 = z_100_0^2^100 */
131
+ /* asm 1: fe_sq(>z_200_100=fe#4,<z_100_0=fe#3); for (i = 1;i < 100;++i) fe_sq(>z_200_100=fe#4,>z_200_100=fe#4); */
132
+ /* asm 2: fe_sq(>z_200_100=t3,<z_100_0=t2); for (i = 1;i < 100;++i) fe_sq(>z_200_100=t3,>z_200_100=t3); */
133
+ fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3);
134
+
135
+ /* qhasm: z_200_0 = z_200_100*z_100_0 */
136
+ /* asm 1: fe_mul(>z_200_0=fe#3,<z_200_100=fe#4,<z_100_0=fe#3); */
137
+ /* asm 2: fe_mul(>z_200_0=t2,<z_200_100=t3,<z_100_0=t2); */
138
+ fe_mul(t2,t3,t2);
139
+
140
+ /* qhasm: z_250_50 = z_200_0^2^50 */
141
+ /* asm 1: fe_sq(>z_250_50=fe#3,<z_200_0=fe#3); for (i = 1;i < 50;++i) fe_sq(>z_250_50=fe#3,>z_250_50=fe#3); */
142
+ /* asm 2: fe_sq(>z_250_50=t2,<z_200_0=t2); for (i = 1;i < 50;++i) fe_sq(>z_250_50=t2,>z_250_50=t2); */
143
+ fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2);
144
+
145
+ /* qhasm: z_250_0 = z_250_50*z_50_0 */
146
+ /* asm 1: fe_mul(>z_250_0=fe#2,<z_250_50=fe#3,<z_50_0=fe#2); */
147
+ /* asm 2: fe_mul(>z_250_0=t1,<z_250_50=t2,<z_50_0=t1); */
148
+ fe_mul(t1,t2,t1);
149
+
150
+ /* qhasm: z_255_5 = z_250_0^2^5 */
151
+ /* asm 1: fe_sq(>z_255_5=fe#2,<z_250_0=fe#2); for (i = 1;i < 5;++i) fe_sq(>z_255_5=fe#2,>z_255_5=fe#2); */
152
+ /* asm 2: fe_sq(>z_255_5=t1,<z_250_0=t1); for (i = 1;i < 5;++i) fe_sq(>z_255_5=t1,>z_255_5=t1); */
153
+ fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1);
154
+
155
+ /* qhasm: z_255_21 = z_255_5*z11 */
156
+ /* asm 1: fe_mul(>z_255_21=fe#12,<z_255_5=fe#2,<z11=fe#1); */
157
+ /* asm 2: fe_mul(>z_255_21=out,<z_255_5=t1,<z11=t0); */
158
+ fe_mul(out,t1,t0);
159
+
160
+ /* qhasm: return */
@@ -0,0 +1,46 @@
1
+ #include "fe.h"
2
+
3
+ int x25519_ref10_scalarmult(uint8_t *q, const uint8_t *n, const uint8_t *p)
4
+ {
5
+ uint8_t e[32];
6
+ unsigned int i;
7
+ fe x1;
8
+ fe x2;
9
+ fe z2;
10
+ fe x3;
11
+ fe z3;
12
+ fe tmp0;
13
+ fe tmp1;
14
+ int pos;
15
+ unsigned int swap;
16
+ unsigned int b;
17
+
18
+ for (i = 0;i < 32;++i) e[i] = n[i];
19
+ e[0] &= 248;
20
+ e[31] &= 127;
21
+ e[31] |= 64;
22
+
23
+ fe_frombytes(x1,p);
24
+ fe_1(x2);
25
+ fe_0(z2);
26
+ fe_copy(x3,x1);
27
+ fe_1(z3);
28
+
29
+ swap = 0;
30
+ for (pos = 254;pos >= 0;--pos) {
31
+ b = e[pos / 8] >> (pos & 7);
32
+ b &= 1;
33
+ swap ^= b;
34
+ fe_cswap(x2,x3,swap);
35
+ fe_cswap(z2,z3,swap);
36
+ swap = b;
37
+ #include "montgomery.h"
38
+ }
39
+ fe_cswap(x2,x3,swap);
40
+ fe_cswap(z2,z3,swap);
41
+
42
+ fe_invert(z2,z2);
43
+ fe_mul(x2,x2,z2);
44
+ fe_tobytes(q,x2);
45
+ return 0;
46
+ }
@@ -2,27 +2,30 @@
2
2
  * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>.
3
3
  * Institute of Computing.
4
4
  * University of Campinas, Brazil.
5
- *
6
- * This program is free software: you can redistribute it and/or modify
7
- * it under the terms of the GNU Lesser General Public License as
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as
8
8
  * published by the Free Software Foundation, version 3.
9
9
  *
10
- * This program is distributed in the hope that it will be useful, but
11
- * WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
+ * This program is distributed in the hope that it will be useful, but
11
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
13
  * Lesser General Public License for more details.
14
14
  *
15
15
  * You should have received a copy of the GNU Lesser General Public License
16
16
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  */
18
- #include "random.h"
19
- #include "bytes.h"
20
18
  #include "fp25519_x64.h"
21
19
 
22
- void random_EltFp25519_1w_x64(uint64_t *A)
20
+ int compare_bytes(uint8_t* A, uint8_t* B,unsigned int num_bytes)
23
21
  {
24
- random_bytes((uint8_t*)A,SIZE_ELEMENT_BYTES);
25
- A[3] &= ((uint64_t)1<<63)-1;
22
+ unsigned int i=0;
23
+ uint8_t ret=0;
24
+ for(i=0;i<num_bytes;i++)
25
+ {
26
+ ret += A[i]^B[i];
27
+ }
28
+ return ret;
26
29
  }
27
30
 
28
31
  int compare_EltFp25519_1w_x64(uint64_t *A, uint64_t *B)
@@ -30,11 +33,6 @@ int compare_EltFp25519_1w_x64(uint64_t *A, uint64_t *B)
30
33
  return compare_bytes((uint8_t*)A,(uint8_t*)B,SIZE_ELEMENT_BYTES);
31
34
  }
32
35
 
33
- void print_EltFp25519_1w_x64(uint64_t *A)
34
- {
35
- print_bytes((uint8_t*)A,SIZE_ELEMENT_BYTES);
36
- }
37
-
38
36
  /**
39
37
  *
40
38
  * @param c Two 512-bit products: c[0:7]=a[0:3]*b[0:3] and c[8:15]=a[4:7]*b[4:7]
@@ -2,14 +2,14 @@
2
2
  * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>.
3
3
  * Institute of Computing.
4
4
  * University of Campinas, Brazil.
5
- *
6
- * This program is free software: you can redistribute it and/or modify
7
- * it under the terms of the GNU Lesser General Public License as
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as
8
8
  * published by the Free Software Foundation, version 3.
9
9
  *
10
- * This program is distributed in the hope that it will be useful, but
11
- * WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
+ * This program is distributed in the hope that it will be useful, but
11
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
13
  * Lesser General Public License for more details.
14
14
  *
15
15
  * You should have received a copy of the GNU Lesser General Public License
@@ -34,8 +34,6 @@ typedef ALIGN uint64_t EltFp25519_1w_Buffer_x64[2*NUM_WORDS_ELTFP25519_X64];
34
34
  typedef ALIGN uint64_t EltFp25519_2w_x64[2*NUM_WORDS_ELTFP25519_X64];
35
35
  typedef ALIGN uint64_t EltFp25519_2w_Buffer_x64[4*NUM_WORDS_ELTFP25519_X64];
36
36
 
37
- void print_bytes(uint8_t * A,int num_bytes);
38
-
39
37
  /* Integer Arithmetic */
40
38
  void mul2_256x256_integer_x64(uint64_t *const c, uint64_t *const a, uint64_t *const b);
41
39
  void sqr2_256x256_integer_x64(uint64_t *const c, uint64_t *const a);
@@ -47,10 +45,8 @@ void red_EltFp25519_1w_x64(uint64_t *const c, uint64_t *const a);
47
45
 
48
46
  /* Prime Field Util */
49
47
  void random_EltFp25519_1w_x64(uint64_t *A);
50
- void print_EltFp25519_1w_x64(uint64_t *A);
51
48
  int compare_EltFp25519_1w_x64(uint64_t *A, uint64_t *B);
52
49
  void random_EltFp25519_2w_x64(uint64_t *A);
53
- void print_EltFp25519_2w(uint64_t *A);
54
50
  int compare_EltFp25519_2w(uint64_t *A, uint64_t *B);
55
51
 
56
52
  /* Prime Field Arithmetic */
@@ -15,11 +15,19 @@
15
15
  * You should have received a copy of the GNU Lesser General Public License
16
16
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  */
18
- #ifndef BYTES_H
19
- #define BYTES_H
18
+ #ifndef RFC7748_PRECOMPUTED_H
19
+ #define RFC7748_PRECOMPUTED_H
20
20
 
21
21
  #include <stdint.h>
22
- void print_bytes(uint8_t * A, int num_bytes);
23
- int compare_bytes(uint8_t* A, uint8_t* B,unsigned int num_bytes);
24
22
 
25
- #endif /* BYTES_H */
23
+ #define ALIGN_BYTES 32
24
+ #ifdef __INTEL_COMPILER
25
+ #define ALIGN __declspec(align(ALIGN_BYTES))
26
+ #else
27
+ #define ALIGN __attribute__ ((aligned (ALIGN_BYTES)))
28
+ #endif
29
+
30
+ #define X25519_KEYSIZE_BYTES 32
31
+ typedef ALIGN uint8_t X25519_KEY[X25519_KEYSIZE_BYTES];
32
+
33
+ #endif /* RFC7748_PRECOMPUTED_H */
@@ -2,32 +2,22 @@
2
2
  * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>.
3
3
  * Institute of Computing.
4
4
  * University of Campinas, Brazil.
5
- *
6
- * This program is free software: you can redistribute it and/or modify
7
- * it under the terms of the GNU Lesser General Public License as
5
+ *
6
+ * This program is free software: you can redistribute it and/or modify
7
+ * it under the terms of the GNU Lesser General Public License as
8
8
  * published by the Free Software Foundation, version 3.
9
9
  *
10
- * This program is distributed in the hope that it will be useful, but
11
- * WITHOUT ANY WARRANTY; without even the implied warranty of
12
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
10
+ * This program is distributed in the hope that it will be useful, but
11
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13
13
  * Lesser General Public License for more details.
14
14
  *
15
15
  * You should have received a copy of the GNU Lesser General Public License
16
16
  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17
17
  */
18
- #include <fp25519_x64.h>
19
- #include <table_ladder_x25519.h>
20
- #include "rfc7748_precompted.h"
21
- #include "random.h"
22
-
23
- void print_X25519_key(argKey key)
24
- {
25
- print_bytes(key,X25519_KEYSIZE_BYTES);
26
- }
27
- void random_X25519_key(argKey key)
28
- {
29
- random_bytes(key,X25519_KEYSIZE_BYTES);
30
- }
18
+ #include "fp25519_x64.h"
19
+ #include "table_ladder_x25519.h"
20
+ #include "rfc7748_precomputed.h"
31
21
 
32
22
  /****** Implementation of Montgomery Ladder Algorithm ************/
33
23
  static inline void cswap_x64(uint64_t bit, uint64_t *const px, uint64_t *const py)
@@ -42,7 +32,7 @@ static inline void cswap_x64(uint64_t bit, uint64_t *const px, uint64_t *const p
42
32
  }
43
33
  }
44
34
 
45
- static void x25519_shared_secret_x64(argKey shared, argKey session_key, argKey private_key)
35
+ void x25519_rfc7748_precomputed_scalarmult(uint8_t *shared, uint8_t *private_key, uint8_t *session_key)
46
36
  {
47
37
  ALIGN uint64_t buffer[4*NUM_WORDS_ELTFP25519_X64];
48
38
  ALIGN uint64_t coordinates[4*NUM_WORDS_ELTFP25519_X64];
@@ -143,11 +133,15 @@ static void x25519_shared_secret_x64(argKey shared, argKey session_key, argKey p
143
133
  private_key[0] = (uint8_t)(save & 0xFF);
144
134
  }
145
135
 
146
- static void x25519_keygen_precmp_x64(argKey session_key, argKey private_key)
136
+ void x25519_rfc7748_precomputed_scalarmult_base(uint8_t *session_key, uint8_t *private_key)
147
137
  {
148
138
  ALIGN uint64_t buffer[4*NUM_WORDS_ELTFP25519_X64];
149
139
  ALIGN uint64_t coordinates[4*NUM_WORDS_ELTFP25519_X64];
150
140
  ALIGN uint64_t workspace[4*NUM_WORDS_ELTFP25519_X64];
141
+ const int ite[4] = {64,64,64,63};
142
+ const int q = 3;
143
+ uint64_t swap = 1;
144
+ uint64_t bit;
151
145
  uint64_t save;
152
146
 
153
147
  int i=0, j=0, k=0;
@@ -191,17 +185,13 @@ static void x25519_keygen_precmp_x64(argKey session_key, argKey private_key)
191
185
  Ur2[0] = 0x7e94e1fec82faabd;
192
186
 
193
187
  /* main-loop */
194
- const int ite[4] = {64,64,64,63};
195
- const int q = 3;
196
- uint64_t swap = 1;
197
-
198
188
  j = q;
199
189
  for(i=0;i<NUM_WORDS_ELTFP25519_X64;i++)
200
190
  {
201
191
  while(j < ite[i])
202
192
  {
203
193
  k = (64*i+j-q);
204
- uint64_t bit = (key[i]>>j)&0x1;
194
+ bit = (key[i]>>j)&0x1;
205
195
  swap = swap ^ bit;
206
196
  cswap_x64(swap, Ur1, Ur2);
207
197
  cswap_x64(swap, Zr1, Zr2);
@@ -239,6 +229,3 @@ static void x25519_keygen_precmp_x64(argKey session_key, argKey private_key)
239
229
  private_key[X25519_KEYSIZE_BYTES-1] = (uint8_t)((save>>16) & 0xFF);
240
230
  private_key[0] = (uint8_t)(save & 0xFF);
241
231
  }
242
-
243
- const KeyGen X25519_KeyGen_x64 = x25519_keygen_precmp_x64;
244
- const Shared X25519_Shared_x64 = x25519_shared_secret_x64;
@@ -0,0 +1,325 @@
1
+ /* Ruby C extension providing bindings to the X25519 Diffie-Hellman algorithm */
2
+
3
+ #define _POSIX_C_SOURCE 200809L
4
+ #include <stdlib.h>
5
+
6
+ #include "ruby.h"
7
+ #include "x25519.h"
8
+
9
+ /* The X25519::VERSION */
10
+ #define GEM_VERSION "0.1.0"
11
+
12
+ /* X25519 module method prototypes */
13
+ static VALUE X25519_backend(VALUE self);
14
+ static VALUE X25519_self_test(VALUE self);
15
+ static VALUE X25519_diffie_hellman(VALUE self, VALUE public_key, VALUE secret_key);
16
+
17
+ /* X25519::Scalar prototypes */
18
+ static VALUE cX25519_Scalar_allocate(VALUE klass);
19
+ static void cX25519_Scalar_mark(X25519_KEY *scalar);
20
+ static void cX25519_Scalar_free(X25519_KEY *scalar);
21
+ static VALUE cX25519_Scalar_generate(VALUE self);
22
+ static VALUE cX25519_Scalar_initialize(VALUE self, VALUE bytes);
23
+ static VALUE cX25519_Scalar_multiply_base(VALUE self);
24
+ static VALUE cX25519_Scalar_multiply(VALUE self, VALUE montgomery_u);
25
+ static VALUE cX25519_Scalar_to_bytes(VALUE self);
26
+
27
+ /* X25519::MontgomeryU prototypes */
28
+ static VALUE cX25519_MontgomeryU_allocate(VALUE klass);
29
+ static void cX25519_MontgomeryU_mark(X25519_KEY *coord);
30
+ static void cX25519_MontgomeryU_free(X25519_KEY *coord);
31
+ static VALUE cX25519_MontgomeryU_initialize(VALUE self, VALUE bytes);
32
+ static VALUE cX25519_MontgomeryU_to_bytes(VALUE self);
33
+
34
+ static VALUE mX25519 = Qnil;
35
+ static VALUE cX25519_Scalar = Qnil;
36
+ static VALUE cX25519_MontgomeryU = Qnil;
37
+
38
+ /* Are we on a 4th gen Intel Core CPU architecture that supports the
39
+ rfc7748_precomputed backend? */
40
+ static int use_rfc7748_precomputed = 0;
41
+
42
+ /* Initialize the Ruby module */
43
+ void Init_x25519()
44
+ {
45
+ /* Test for support for the rfc7748_precomputed backend */
46
+ use_rfc7748_precomputed = check_4th_gen_intel_core_features();
47
+
48
+ /* Used for key generation */
49
+ rb_require("securerandom");
50
+
51
+ mX25519 = rb_define_module("X25519");
52
+ rb_define_const(mX25519, "VERSION", rb_str_new2(GEM_VERSION));
53
+ rb_define_singleton_method(mX25519, "backend", X25519_backend, 0);
54
+ rb_define_singleton_method(mX25519, "self_test", X25519_self_test, 0);
55
+ rb_define_singleton_method(mX25519, "diffie_hellman", X25519_diffie_hellman, 2);
56
+
57
+ cX25519_Scalar = rb_define_class_under(mX25519, "Scalar", rb_cObject);
58
+ rb_define_alloc_func(cX25519_Scalar, cX25519_Scalar_allocate);
59
+ rb_define_singleton_method(cX25519_Scalar, "generate", cX25519_Scalar_generate, 0);
60
+ rb_define_method(cX25519_Scalar, "initialize", cX25519_Scalar_initialize, 1);
61
+ rb_define_method(cX25519_Scalar, "multiply_base", cX25519_Scalar_multiply_base, 0);
62
+ rb_define_method(cX25519_Scalar, "public_key", cX25519_Scalar_multiply_base, 0);
63
+ rb_define_method(cX25519_Scalar, "multiply", cX25519_Scalar_multiply, 1);
64
+ rb_define_method(cX25519_Scalar, "diffie_hellman", cX25519_Scalar_multiply, 1);
65
+ rb_define_method(cX25519_Scalar, "to_bytes", cX25519_Scalar_to_bytes, 0);
66
+ rb_define_method(cX25519_Scalar, "to_str", cX25519_Scalar_to_bytes, 0);
67
+
68
+ cX25519_MontgomeryU = rb_define_class_under(mX25519, "MontgomeryU", rb_cObject);
69
+ rb_define_alloc_func(cX25519_MontgomeryU, cX25519_MontgomeryU_allocate);
70
+ rb_define_method(cX25519_MontgomeryU, "initialize", cX25519_MontgomeryU_initialize, 1);
71
+ rb_define_method(cX25519_MontgomeryU, "to_bytes", cX25519_MontgomeryU_to_bytes, 0);
72
+ rb_define_method(cX25519_MontgomeryU, "to_str", cX25519_MontgomeryU_to_bytes, 0);
73
+
74
+ /* Run the self-test on load to ensure everything is working */
75
+ rb_funcall(mX25519, rb_intern("self_test"), 0);
76
+ }
77
+
78
+ /* Return a symbol identifying the backend in use */
79
+ static VALUE X25519_backend(VALUE self)
80
+ {
81
+ switch(use_rfc7748_precomputed) {
82
+ case 1:
83
+ return ID2SYM(rb_intern("rfc7748_precomputed"));
84
+ case 0:
85
+ return ID2SYM(rb_intern("ref10"));
86
+ default:
87
+ rb_raise(rb_eRuntimeError, "invalid X25519 backend! (%d)", use_rfc7748_precomputed);
88
+ }
89
+ }
90
+
91
+ /* Perform an end-to-end test of the Ruby binding to ensure it's working correctly */
92
+ static VALUE X25519_self_test(VALUE self)
93
+ {
94
+ VALUE sk, pk, shared;
95
+
96
+ /* Test vectors from RFC 7748 */
97
+ X25519_KEY ietf_cfrg_key0 = {
98
+ 0xa5,0x46,0xe3,0x6b,0xf0,0x52,0x7c,0x9d,
99
+ 0x3b,0x16,0x15,0x4b,0x82,0x46,0x5e,0xdd,
100
+ 0x62,0x14,0x4c,0x0a,0xc1,0xfc,0x5a,0x18,
101
+ 0x50,0x6a,0x22,0x44,0xba,0x44,0x9a,0xc4
102
+ };
103
+
104
+ X25519_KEY ietf_cfrg_input_coord0 = {
105
+ 0xe6,0xdb,0x68,0x67,0x58,0x30,0x30,0xdb,
106
+ 0x35,0x94,0xc1,0xa4,0x24,0xb1,0x5f,0x7c,
107
+ 0x72,0x66,0x24,0xec,0x26,0xb3,0x35,0x3b,
108
+ 0x10,0xa9,0x03,0xa6,0xd0,0xab,0x1c,0x4c
109
+ };
110
+
111
+ X25519_KEY ietf_cfrg_output_coord0 = {
112
+ 0xc3,0xda,0x55,0x37,0x9d,0xe9,0xc6,0x90,
113
+ 0x8e,0x94,0xea,0x4d,0xf2,0x8d,0x08,0x4f,
114
+ 0x32,0xec,0xcf,0x03,0x49,0x1c,0x71,0xf7,
115
+ 0x54,0xb4,0x07,0x55,0x77,0xa2,0x85,0x52
116
+ };
117
+
118
+ sk = rb_str_new((const char *)&ietf_cfrg_key0, X25519_KEYSIZE_BYTES);
119
+ pk = rb_str_new((const char *)&ietf_cfrg_input_coord0, X25519_KEYSIZE_BYTES);
120
+
121
+ shared = rb_funcall(mX25519, rb_intern("diffie_hellman"), 2, sk, pk);
122
+
123
+ if(RSTRING_LEN(shared) != X25519_KEYSIZE_BYTES ||
124
+ memcmp(RSTRING_PTR(shared), ietf_cfrg_output_coord0, X25519_KEYSIZE_BYTES) != 0)
125
+ {
126
+ rb_raise(rb_eRuntimeError, "X25519 self-test failed!");
127
+ }
128
+
129
+ return Qtrue;
130
+ }
131
+
132
+ /* Compute Diffie-Hellman for the given key and Montgomery-u coordinate
133
+ * (i.e. variable base scalar multiplication) */
134
+ static VALUE X25519_diffie_hellman(VALUE self, VALUE secret_key, VALUE public_key)
135
+ {
136
+ VALUE scalar, coord, shared;
137
+
138
+ scalar = rb_class_new_instance(1, &secret_key, cX25519_Scalar);
139
+ coord = rb_class_new_instance(1, &public_key, cX25519_MontgomeryU);
140
+ shared = rb_funcall(scalar, rb_intern("multiply"), 1, coord);
141
+
142
+ return rb_funcall(shared, rb_intern("to_bytes"), 0);
143
+ }
144
+
145
+ /********************************
146
+ * X25519::Scalar: private keys *
147
+ ********************************/
148
+
149
+ static VALUE cX25519_Scalar_allocate(VALUE klass)
150
+ {
151
+ X25519_KEY *scalar = NULL;
152
+
153
+ /* Ensure allocation with the correct (32-byte) memory alignent */
154
+ if(posix_memalign((void **)&scalar, ALIGN_BYTES, X25519_KEYSIZE_BYTES)) {
155
+ rb_fatal("x25519: can't allocate memory with posix_memalign()");
156
+ }
157
+
158
+ /* Avoid using unitialized memory */
159
+ memset(scalar, 0, X25519_KEYSIZE_BYTES);
160
+
161
+ return Data_Wrap_Struct(klass, cX25519_Scalar_mark, cX25519_Scalar_free, scalar);
162
+ }
163
+
164
+ static void cX25519_Scalar_mark(X25519_KEY *scalar)
165
+ {
166
+ }
167
+
168
+ static void cX25519_Scalar_free(X25519_KEY *scalar)
169
+ {
170
+ free(scalar);
171
+ }
172
+
173
+ /* Generate a random X25519 private scalar */
174
+ static VALUE cX25519_Scalar_generate(VALUE self)
175
+ {
176
+ VALUE rb_mSecureRandom, scalar_bytes;
177
+ rb_mSecureRandom = rb_const_get(rb_cObject, rb_intern("SecureRandom"));
178
+
179
+ scalar_bytes = rb_funcall(
180
+ rb_mSecureRandom,
181
+ rb_intern("random_bytes"),
182
+ 1,
183
+ INT2NUM(X25519_KEYSIZE_BYTES)
184
+ );
185
+
186
+ return rb_class_new_instance(1, &scalar_bytes, self);
187
+ }
188
+
189
+ /* Create an X25519::Scalar from a String containing bytes */
190
+ static VALUE cX25519_Scalar_initialize(VALUE self, VALUE bytes)
191
+ {
192
+ X25519_KEY *scalar = NULL;
193
+ Data_Get_Struct(self, X25519_KEY, scalar);
194
+
195
+ StringValue(bytes);
196
+ if(RSTRING_LEN(bytes) != X25519_KEYSIZE_BYTES) {
197
+ rb_raise(
198
+ rb_eArgError,
199
+ "expected %d-byte scalar, got %ld",
200
+ X25519_KEYSIZE_BYTES,
201
+ RSTRING_LEN(bytes)
202
+ );
203
+ }
204
+
205
+ memcpy(scalar, RSTRING_PTR(bytes), X25519_KEYSIZE_BYTES);
206
+
207
+ return self;
208
+ }
209
+
210
+ /* Obtain a public key for an X25519 private scalar
211
+ * (i.e. fixed base scalar multiplication ) */
212
+ static VALUE cX25519_Scalar_multiply_base(VALUE self)
213
+ {
214
+ X25519_KEY *scalar = NULL, public_key;
215
+ VALUE public_key_str;
216
+
217
+ Data_Get_Struct(self, X25519_KEY, scalar);
218
+
219
+ /* Avoid using unitialized memory */
220
+ memset(&public_key, 0, X25519_KEYSIZE_BYTES);
221
+
222
+ /* Compute public key from private scalar using fixed-base scalar multiplication */
223
+ if(use_rfc7748_precomputed) {
224
+ x25519_rfc7748_precomputed_scalarmult_base(public_key, *scalar);
225
+ } else {
226
+ x25519_ref10_scalarmult_base(public_key, *scalar);
227
+ }
228
+
229
+ public_key_str = rb_str_new((const char *)&public_key, X25519_KEYSIZE_BYTES);
230
+ return rb_class_new_instance(1, &public_key_str, cX25519_MontgomeryU);
231
+ }
232
+
233
+ /* Obtain a public key for an X25519 private scalar
234
+ * (i.e. fixed base scalar multiplication ) */
235
+ static VALUE cX25519_Scalar_multiply(VALUE self, VALUE montgomery_u)
236
+ {
237
+ X25519_KEY *scalar = NULL, *coord = NULL, product;
238
+ VALUE product_str;
239
+
240
+ if(rb_obj_class(montgomery_u) != cX25519_MontgomeryU) {
241
+ rb_raise(rb_eTypeError, "wrong argument type (expected X25519::MontgomeryU)");
242
+ }
243
+
244
+ Data_Get_Struct(self, X25519_KEY, scalar);
245
+ Data_Get_Struct(montgomery_u, X25519_KEY, coord);
246
+
247
+ /* Avoid using unitialized memory */
248
+ memset(&product, 0, X25519_KEYSIZE_BYTES);
249
+
250
+ /* Compute the Diffie-Hellman shared secret */
251
+ if(use_rfc7748_precomputed) {
252
+ x25519_rfc7748_precomputed_scalarmult(product, *scalar, *coord);
253
+ } else {
254
+ x25519_ref10_scalarmult(product, *scalar, *coord);
255
+ }
256
+
257
+ product_str = rb_str_new((const char *)&product, X25519_KEYSIZE_BYTES);
258
+ return rb_class_new_instance(1, &product_str, cX25519_MontgomeryU);
259
+ }
260
+
261
+ /* Return a String containing the raw bytes of this scalar */
262
+ static VALUE cX25519_Scalar_to_bytes(VALUE self)
263
+ {
264
+ X25519_KEY *scalar = NULL;
265
+ Data_Get_Struct(self, X25519_KEY, scalar);
266
+
267
+ return rb_str_new((const char *)scalar, X25519_KEYSIZE_BYTES);;
268
+ }
269
+
270
+ /************************************
271
+ * X25519::MontgomeryU: public keys *
272
+ ************************************/
273
+
274
+ static VALUE cX25519_MontgomeryU_allocate(VALUE klass)
275
+ {
276
+ X25519_KEY *coord = NULL;
277
+
278
+ /* Ensure allocation with the correct (32-byte) memory alignent */
279
+ if(posix_memalign((void **)&coord, ALIGN_BYTES, X25519_KEYSIZE_BYTES)) {
280
+ rb_fatal("x25519: can't allocate memory with posix_memalign()");
281
+ }
282
+
283
+ /* Avoid using unitialized memory */
284
+ memset(coord, 0, X25519_KEYSIZE_BYTES);
285
+
286
+ return Data_Wrap_Struct(klass, cX25519_MontgomeryU_mark, cX25519_MontgomeryU_free, coord);
287
+ }
288
+
289
+ static void cX25519_MontgomeryU_mark(X25519_KEY *coord)
290
+ {
291
+ }
292
+
293
+ static void cX25519_MontgomeryU_free(X25519_KEY *coord)
294
+ {
295
+ free(coord);
296
+ }
297
+
298
+ static VALUE cX25519_MontgomeryU_initialize(VALUE self, VALUE bytes)
299
+ {
300
+ X25519_KEY *coord = NULL;
301
+ Data_Get_Struct(self, X25519_KEY, coord);
302
+
303
+ StringValue(bytes);
304
+ if(RSTRING_LEN(bytes) != X25519_KEYSIZE_BYTES) {
305
+ rb_raise(
306
+ rb_eArgError,
307
+ "expected %d-byte scalar, got %ld",
308
+ X25519_KEYSIZE_BYTES,
309
+ RSTRING_LEN(bytes)
310
+ );
311
+ }
312
+
313
+ memcpy(coord, RSTRING_PTR(bytes), X25519_KEYSIZE_BYTES);
314
+
315
+ return self;
316
+ }
317
+
318
+ /* Return a String containing the raw bytes of this scalar */
319
+ static VALUE cX25519_MontgomeryU_to_bytes(VALUE self)
320
+ {
321
+ X25519_KEY *coord = NULL;
322
+ Data_Get_Struct(self, X25519_KEY, coord);
323
+
324
+ return rb_str_new((const char *)coord, X25519_KEYSIZE_BYTES);;
325
+ }