crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,623 @@
|
|
1
|
+
/** Functions for SCS addition and subtraction
|
2
|
+
|
3
|
+
@file addition_scs.c
|
4
|
+
|
5
|
+
@author Defour David David.Defour@ens-lyon.fr
|
6
|
+
@author Florent de Dinechin Florent.de.Dinechin@ens-lyon.fr
|
7
|
+
|
8
|
+
This file is part of the SCS library.
|
9
|
+
|
10
|
+
Many functions come in two versions, selected by a @#if.
|
11
|
+
|
12
|
+
The reason is that we designed scslib library for internal use with
|
13
|
+
SCS_NB_WORDS==8, so we provide a version with manual optimizations for
|
14
|
+
this case.
|
15
|
+
|
16
|
+
These optimisations include loop unrolling, and sometimes replacing
|
17
|
+
temporary arrays of size 8 with 8 variables, which is more efficient
|
18
|
+
on all modern processors with many (renaming) registers.
|
19
|
+
|
20
|
+
Using gcc3.2 with the most aggressive optimization options for this
|
21
|
+
purpose (-funroll-loops -foptimize-register-move -frerun-loop-opt
|
22
|
+
-frerun-cse-after-loop) is still much slower. At some point in the
|
23
|
+
future, gcc should catch up with unrolling since our loops are so
|
24
|
+
simple, however the replacement of small arrays with variables is
|
25
|
+
not something we are aware of in the literature about compiler
|
26
|
+
optimization.
|
27
|
+
*/
|
28
|
+
|
29
|
+
/*
|
30
|
+
Copyright (C) 2002 David Defour and Florent de Dinechin
|
31
|
+
|
32
|
+
This library is free software; you can redistribute it and/or
|
33
|
+
modify it under the terms of the GNU Lesser General Public
|
34
|
+
License as published by the Free Software Foundation; either
|
35
|
+
version 2.1 of the License, or (at your option) any later version.
|
36
|
+
|
37
|
+
This library is distributed in the hope that it will be useful,
|
38
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
39
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
40
|
+
Lesser General Public License for more details.
|
41
|
+
|
42
|
+
You should have received a copy of the GNU Lesser General Public
|
43
|
+
License along with this library; if not, write to the Free Software
|
44
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
45
|
+
|
46
|
+
*/
|
47
|
+
|
48
|
+
#include "scs.h"
|
49
|
+
#include "scs_private.h"
|
50
|
+
|
51
|
+
/**
|
52
|
+
This function copies a result into another. There is an unrolled
|
53
|
+
version for the case SCS_NB_WORDS==8.
|
54
|
+
*/
|
55
|
+
void scs_set(scs_ptr result, scs_ptr x){
|
56
|
+
/* unsigned int i;*/
|
57
|
+
|
58
|
+
#if (SCS_NB_WORDS==8)
|
59
|
+
R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
|
60
|
+
R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
|
61
|
+
R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
|
62
|
+
R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7];
|
63
|
+
#else
|
64
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
65
|
+
R_HW[i] = X_HW[i];
|
66
|
+
#endif
|
67
|
+
R_EXP = X_EXP;
|
68
|
+
R_IND = X_IND;
|
69
|
+
R_SGN = X_SGN;
|
70
|
+
}
|
71
|
+
|
72
|
+
|
73
|
+
/** renormalize a SCS number.
|
74
|
+
|
75
|
+
This function removes the carry from each digit, and also shifts the
|
76
|
+
digits in case of a cancellation (so that if result != 0 then its
|
77
|
+
first digit is non-zero)
|
78
|
+
|
79
|
+
@warning THIS FUNCTION HAS NEVER BEEN PROPERLY TESTED and is
|
80
|
+
currently unused in the library: instead, specific renormalisation
|
81
|
+
steps are fused within the code of the operations which require it.
|
82
|
+
*/
|
83
|
+
|
84
|
+
void scs_renorm(scs_ptr result){
|
85
|
+
unsigned int c;
|
86
|
+
int i, j, k;
|
87
|
+
|
88
|
+
/*
|
89
|
+
* Carry propagate
|
90
|
+
*/
|
91
|
+
for(i=SCS_NB_WORDS-1; i>0; i--){
|
92
|
+
c = R_HW[i] & ~SCS_MASK_RADIX;
|
93
|
+
R_HW[i-1] += c >> SCS_NB_BITS;
|
94
|
+
R_HW[i] = R_HW[i] & SCS_MASK_RADIX;
|
95
|
+
}
|
96
|
+
|
97
|
+
if (R_HW[0] >= SCS_RADIX){
|
98
|
+
/* Carry out! Need to shift digits */
|
99
|
+
c = R_HW[0] & ~SCS_MASK_RADIX;
|
100
|
+
c = c >> SCS_NB_BITS;
|
101
|
+
for(i=SCS_NB_WORDS-1; i>1; i--)
|
102
|
+
R_HW[i] = R_HW[i-1];
|
103
|
+
|
104
|
+
R_HW[1] = R_HW[0] & SCS_MASK_RADIX;
|
105
|
+
R_HW[0] = c;
|
106
|
+
R_IND += 1;
|
107
|
+
|
108
|
+
}else{
|
109
|
+
/* Was there a cancellation ? */
|
110
|
+
if (R_HW[0] == 0){
|
111
|
+
|
112
|
+
k = 1;
|
113
|
+
while ((R_HW[k] == 0) && (k <= SCS_NB_WORDS))
|
114
|
+
k++;
|
115
|
+
|
116
|
+
R_IND -= k;
|
117
|
+
|
118
|
+
for(j=k, i=0; j<SCS_NB_WORDS; j++, i++)
|
119
|
+
R_HW[i] = R_HW[j];
|
120
|
+
|
121
|
+
for( ; i<SCS_NB_WORDS; i++)
|
122
|
+
R_HW[i] = 0;
|
123
|
+
|
124
|
+
}
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
/** Renormalization without cancellation check.
|
131
|
+
|
132
|
+
This renormalization step is especially designed for the addition of
|
133
|
+
several numbers with the same sign. In this case, you know that there
|
134
|
+
has been no cancellation, which allows simpler renormalisation.
|
135
|
+
*/
|
136
|
+
|
137
|
+
void scs_renorm_no_cancel_check(scs_ptr result){
|
138
|
+
unsigned int carry, c0;
|
139
|
+
/* int i;*/
|
140
|
+
|
141
|
+
/* Carry propagate */
|
142
|
+
#if (SCS_NB_WORDS==8)
|
143
|
+
carry = R_HW[7] >> SCS_NB_BITS;
|
144
|
+
R_HW[6] += carry; R_HW[7] = R_HW[7] & SCS_MASK_RADIX;
|
145
|
+
carry = R_HW[6] >> SCS_NB_BITS;
|
146
|
+
R_HW[5] += carry; R_HW[6] = R_HW[6] & SCS_MASK_RADIX;
|
147
|
+
carry = R_HW[5] >> SCS_NB_BITS;
|
148
|
+
R_HW[4] += carry; R_HW[5] = R_HW[5] & SCS_MASK_RADIX;
|
149
|
+
carry = R_HW[4] >> SCS_NB_BITS;
|
150
|
+
R_HW[3] += carry; R_HW[4] = R_HW[4] & SCS_MASK_RADIX;
|
151
|
+
carry = R_HW[3] >> SCS_NB_BITS;
|
152
|
+
R_HW[2] += carry; R_HW[3] = R_HW[3] & SCS_MASK_RADIX;
|
153
|
+
carry = R_HW[2] >> SCS_NB_BITS;
|
154
|
+
R_HW[1] += carry; R_HW[2] = R_HW[2] & SCS_MASK_RADIX;
|
155
|
+
carry = R_HW[1] >> SCS_NB_BITS;
|
156
|
+
R_HW[0] += carry; R_HW[1] = R_HW[1] & SCS_MASK_RADIX;
|
157
|
+
#else
|
158
|
+
for(i=(SCS_NB_WORDS-1);i>0;i--){
|
159
|
+
carry = R_HW[i] >> SCS_NB_BITS;
|
160
|
+
R_HW[i-1] += carry;
|
161
|
+
R_HW[i] = R_HW[i] & SCS_MASK_RADIX;
|
162
|
+
}
|
163
|
+
#endif
|
164
|
+
|
165
|
+
if (R_HW[0] >= SCS_RADIX){
|
166
|
+
/* Carry out ! Need to shift digits */
|
167
|
+
c0 = R_HW[0] >> SCS_NB_BITS;
|
168
|
+
|
169
|
+
#if (SCS_NB_WORDS==8)
|
170
|
+
R_HW[7] = R_HW[6]; R_HW[6] = R_HW[5];
|
171
|
+
R_HW[5] = R_HW[4]; R_HW[4] = R_HW[3];
|
172
|
+
R_HW[3] = R_HW[2]; R_HW[2] = R_HW[1];
|
173
|
+
#else
|
174
|
+
for(i=(SCS_NB_WORDS-1); i>1; i--)
|
175
|
+
R_HW[i] = R_HW[i-1];
|
176
|
+
#endif
|
177
|
+
R_HW[1] = R_HW[0] & SCS_MASK_RADIX;
|
178
|
+
R_HW[0] = c0;
|
179
|
+
R_IND += 1;
|
180
|
+
}
|
181
|
+
return;
|
182
|
+
}
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
/* addition without renormalisation.
|
188
|
+
|
189
|
+
|
190
|
+
Add two scs number x and y, the result is put into "result".
|
191
|
+
Assumes x.sign == y.sign x.index > y.index.
|
192
|
+
|
193
|
+
The result is not normalized.
|
194
|
+
*/
|
195
|
+
|
196
|
+
static void do_add_no_renorm(scs_ptr result, scs_ptr x, scs_ptr y){
|
197
|
+
unsigned int RES[SCS_NB_WORDS];
|
198
|
+
unsigned int i, j, Diff;
|
199
|
+
|
200
|
+
if (x->exception.i[HI]==0){scs_set(result, y); return; }
|
201
|
+
if (y->exception.i[HI]==0){scs_set(result, x); return; }
|
202
|
+
|
203
|
+
for (i=0; i<SCS_NB_WORDS; i++)
|
204
|
+
RES[i] = X_HW[i];
|
205
|
+
|
206
|
+
Diff = (unsigned int)(X_IND - Y_IND);
|
207
|
+
R_EXP = X_EXP + Y_EXP - 1;
|
208
|
+
R_IND = X_IND;
|
209
|
+
R_SGN = X_SGN;
|
210
|
+
|
211
|
+
for (i=Diff, j=0; i<SCS_NB_WORDS; i++, j++)
|
212
|
+
RES[i] += Y_HW[j];
|
213
|
+
|
214
|
+
for (i=0; i<SCS_NB_WORDS; i++)
|
215
|
+
R_HW[i] = RES[i];
|
216
|
+
|
217
|
+
return;
|
218
|
+
}
|
219
|
+
|
220
|
+
|
221
|
+
/*
|
222
|
+
* Addition without renormalization. Assumes that x.sign == y.sign.
|
223
|
+
*/
|
224
|
+
void scs_add_no_renorm(scs_ptr result, scs_ptr x, scs_ptr y)
|
225
|
+
{
|
226
|
+
if (X_IND >= Y_IND)
|
227
|
+
do_add_no_renorm(result,x,y);
|
228
|
+
else
|
229
|
+
do_add_no_renorm(result,y,x);
|
230
|
+
return;
|
231
|
+
}
|
232
|
+
|
233
|
+
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
|
243
|
+
|
244
|
+
|
245
|
+
|
246
|
+
|
247
|
+
|
248
|
+
/* The function that does the work in case of an addition
|
249
|
+
|
250
|
+
do_add is the function that does the addition of two SCS numbers,
|
251
|
+
assuming that x.sign == y.sign, X_IND > Y_IND, x and y both
|
252
|
+
non-zero.
|
253
|
+
*/
|
254
|
+
|
255
|
+
static void do_add(scs_ptr result, scs_ptr x, scs_ptr y)
|
256
|
+
{
|
257
|
+
#if (SCS_NB_WORDS==8) /* in this case we unroll all the loops */
|
258
|
+
int Diff;
|
259
|
+
unsigned int carry;
|
260
|
+
unsigned int r0,r1,r2,r3,r4,r5,r6,r7;
|
261
|
+
|
262
|
+
Diff = X_IND - Y_IND;
|
263
|
+
R_EXP = X_EXP + Y_EXP - 1;
|
264
|
+
R_IND = X_IND;
|
265
|
+
R_SGN = X_SGN;
|
266
|
+
#if 0
|
267
|
+
if(Diff<4)
|
268
|
+
if(Diff<2)
|
269
|
+
if(Diff==0)
|
270
|
+
{
|
271
|
+
// case 0:
|
272
|
+
r0 = X_HW[0] + Y_HW[0]; r1 = X_HW[1] + Y_HW[1];
|
273
|
+
r2 = X_HW[2] + Y_HW[2]; r3 = X_HW[3] + Y_HW[3];
|
274
|
+
r4 = X_HW[4] + Y_HW[4]; r5 = X_HW[5] + Y_HW[5];
|
275
|
+
r6 = X_HW[6] + Y_HW[6]; r7 = X_HW[7] + Y_HW[7];
|
276
|
+
}
|
277
|
+
else {
|
278
|
+
// case 1:
|
279
|
+
r0 = X_HW[0]; r1 = X_HW[1] + Y_HW[0];
|
280
|
+
r2 = X_HW[2] + Y_HW[1]; r3 = X_HW[3] + Y_HW[2];
|
281
|
+
r4 = X_HW[4] + Y_HW[3]; r5 = X_HW[5] + Y_HW[4];
|
282
|
+
r6 = X_HW[6] + Y_HW[5]; r7 = X_HW[7] + Y_HW[6];
|
283
|
+
}
|
284
|
+
else if(Diff==2)
|
285
|
+
{
|
286
|
+
//case 2:
|
287
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
288
|
+
r2 = X_HW[2] + Y_HW[0]; r3 = X_HW[3] + Y_HW[1];
|
289
|
+
r4 = X_HW[4] + Y_HW[2]; r5 = X_HW[5] + Y_HW[3];
|
290
|
+
r6 = X_HW[6] + Y_HW[4]; r7 = X_HW[7] + Y_HW[5];
|
291
|
+
}
|
292
|
+
else
|
293
|
+
{
|
294
|
+
// case 3:
|
295
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
296
|
+
r2 = X_HW[2]; r3 = X_HW[3] + Y_HW[0];
|
297
|
+
r4 = X_HW[4] + Y_HW[1]; r5 = X_HW[5] + Y_HW[2];
|
298
|
+
r6 = X_HW[6] + Y_HW[3]; r7 = X_HW[7] + Y_HW[4];
|
299
|
+
}
|
300
|
+
else if(Diff<6)
|
301
|
+
if(Diff==4)
|
302
|
+
{
|
303
|
+
// case 4:
|
304
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
305
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
306
|
+
r4 = X_HW[4] + Y_HW[0]; r5 = X_HW[5] + Y_HW[1];
|
307
|
+
r6 = X_HW[6] + Y_HW[2]; r7 = X_HW[7] + Y_HW[3];
|
308
|
+
}
|
309
|
+
else {
|
310
|
+
// case 5:
|
311
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
312
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
313
|
+
r4 = X_HW[4]; r5 = X_HW[5] + Y_HW[0];
|
314
|
+
r6 = X_HW[6] + Y_HW[1]; r7 = X_HW[7] + Y_HW[2];
|
315
|
+
}
|
316
|
+
else if(Diff<8)
|
317
|
+
if(Diff==6)
|
318
|
+
{
|
319
|
+
// case 6:
|
320
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
321
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
322
|
+
r4 = X_HW[4]; r5 = X_HW[5];
|
323
|
+
r6 = X_HW[6] + Y_HW[0]; r7 = X_HW[7] + Y_HW[1];
|
324
|
+
}
|
325
|
+
else {
|
326
|
+
// case 7:
|
327
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
328
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
329
|
+
r4 = X_HW[4]; r5 = X_HW[5];
|
330
|
+
r6 = X_HW[6]; r7 = X_HW[7] + Y_HW[0];
|
331
|
+
}
|
332
|
+
|
333
|
+
else
|
334
|
+
{
|
335
|
+
/* Diff >= 8*/
|
336
|
+
R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
|
337
|
+
R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
|
338
|
+
R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
|
339
|
+
R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7];
|
340
|
+
return;
|
341
|
+
}
|
342
|
+
#else
|
343
|
+
switch (Diff){
|
344
|
+
case 0:
|
345
|
+
r0 = X_HW[0] + Y_HW[0]; r1 = X_HW[1] + Y_HW[1];
|
346
|
+
r2 = X_HW[2] + Y_HW[2]; r3 = X_HW[3] + Y_HW[3];
|
347
|
+
r4 = X_HW[4] + Y_HW[4]; r5 = X_HW[5] + Y_HW[5];
|
348
|
+
r6 = X_HW[6] + Y_HW[6]; r7 = X_HW[7] + Y_HW[7]; break;
|
349
|
+
case 1:
|
350
|
+
r0 = X_HW[0]; r1 = X_HW[1] + Y_HW[0];
|
351
|
+
r2 = X_HW[2] + Y_HW[1]; r3 = X_HW[3] + Y_HW[2];
|
352
|
+
r4 = X_HW[4] + Y_HW[3]; r5 = X_HW[5] + Y_HW[4];
|
353
|
+
r6 = X_HW[6] + Y_HW[5]; r7 = X_HW[7] + Y_HW[6]; break;
|
354
|
+
case 2:
|
355
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
356
|
+
r2 = X_HW[2] + Y_HW[0]; r3 = X_HW[3] + Y_HW[1];
|
357
|
+
r4 = X_HW[4] + Y_HW[2]; r5 = X_HW[5] + Y_HW[3];
|
358
|
+
r6 = X_HW[6] + Y_HW[4]; r7 = X_HW[7] + Y_HW[5]; break;
|
359
|
+
case 3:
|
360
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
361
|
+
r2 = X_HW[2]; r3 = X_HW[3] + Y_HW[0];
|
362
|
+
r4 = X_HW[4] + Y_HW[1]; r5 = X_HW[5] + Y_HW[2];
|
363
|
+
r6 = X_HW[6] + Y_HW[3]; r7 = X_HW[7] + Y_HW[4]; break;
|
364
|
+
case 4:
|
365
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
366
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
367
|
+
r4 = X_HW[4] + Y_HW[0]; r5 = X_HW[5] + Y_HW[1];
|
368
|
+
r6 = X_HW[6] + Y_HW[2]; r7 = X_HW[7] + Y_HW[3]; break;
|
369
|
+
case 5:
|
370
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
371
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
372
|
+
r4 = X_HW[4]; r5 = X_HW[5] + Y_HW[0];
|
373
|
+
r6 = X_HW[6] + Y_HW[1]; r7 = X_HW[7] + Y_HW[2]; break;
|
374
|
+
case 6:
|
375
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
376
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
377
|
+
r4 = X_HW[4]; r5 = X_HW[5];
|
378
|
+
r6 = X_HW[6] + Y_HW[0]; r7 = X_HW[7] + Y_HW[1]; break;
|
379
|
+
case 7:
|
380
|
+
r0 = X_HW[0]; r1 = X_HW[1];
|
381
|
+
r2 = X_HW[2]; r3 = X_HW[3];
|
382
|
+
r4 = X_HW[4]; r5 = X_HW[5];
|
383
|
+
r6 = X_HW[6]; r7 = X_HW[7] + Y_HW[0]; break;
|
384
|
+
default:
|
385
|
+
/* Diff >= 8*/
|
386
|
+
R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
|
387
|
+
R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
|
388
|
+
R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
|
389
|
+
R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7]; return;
|
390
|
+
}
|
391
|
+
#endif
|
392
|
+
|
393
|
+
/* Carry propagation */
|
394
|
+
|
395
|
+
carry = r7 >> SCS_NB_BITS; r6 += carry; r7 = r7 & SCS_MASK_RADIX;
|
396
|
+
carry = r6 >> SCS_NB_BITS; r5 += carry; r6 = r6 & SCS_MASK_RADIX;
|
397
|
+
carry = r5 >> SCS_NB_BITS; r4 += carry; r5 = r5 & SCS_MASK_RADIX;
|
398
|
+
carry = r4 >> SCS_NB_BITS; r3 += carry; r4 = r4 & SCS_MASK_RADIX;
|
399
|
+
carry = r3 >> SCS_NB_BITS; r2 += carry; r3 = r3 & SCS_MASK_RADIX;
|
400
|
+
carry = r2 >> SCS_NB_BITS; r1 += carry; r2 = r2 & SCS_MASK_RADIX;
|
401
|
+
carry = r1 >> SCS_NB_BITS; r0 += carry; r1 = r1 & SCS_MASK_RADIX;
|
402
|
+
carry = r0 >> SCS_NB_BITS;
|
403
|
+
|
404
|
+
if (carry!=0){
|
405
|
+
R_HW[7] = r6; R_HW[6] = r5; R_HW[5] = r4; R_HW[4] = r3;
|
406
|
+
R_HW[3] = r2; R_HW[2] = r1; R_HW[1] = r0 & SCS_MASK_RADIX;
|
407
|
+
R_HW[0] = 1 ;
|
408
|
+
R_IND += 1;
|
409
|
+
}
|
410
|
+
else {
|
411
|
+
R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
|
412
|
+
R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7;
|
413
|
+
}
|
414
|
+
return;
|
415
|
+
|
416
|
+
#else /* #if SCS_NB_WORDS==8*/
|
417
|
+
|
418
|
+
/* This generic version is still written in such a way that
|
419
|
+
it is unrollable at compile time
|
420
|
+
*/
|
421
|
+
int i,j, s, carry, Diff;
|
422
|
+
int res[SCS_NB_WORDS];
|
423
|
+
|
424
|
+
Diff = X_IND - Y_IND;
|
425
|
+
R_EXP = X_EXP + Y_EXP - 1;
|
426
|
+
R_IND = X_IND;
|
427
|
+
R_SGN = X_SGN;
|
428
|
+
|
429
|
+
/* The easy case */
|
430
|
+
if(Diff >= SCS_NB_WORDS){
|
431
|
+
scs_set(result, x); return;
|
432
|
+
}
|
433
|
+
|
434
|
+
/* 0 <= Diff <= (SCS_NB_WORDS-1) */
|
435
|
+
|
436
|
+
carry=0;
|
437
|
+
for(i=(SCS_NB_WORDS-1), j=((SCS_NB_WORDS-1)-Diff); i>=0 ; i--,j--){
|
438
|
+
if (j>=0)
|
439
|
+
s = X_HW[i] + Y_HW[j] + carry;
|
440
|
+
else
|
441
|
+
s = X_HW[i] + carry;
|
442
|
+
carry = s >> SCS_NB_BITS;
|
443
|
+
res[i] = s & SCS_MASK_RADIX;
|
444
|
+
}
|
445
|
+
|
446
|
+
if (carry){
|
447
|
+
/* Carry out ! Need to shift digits */
|
448
|
+
for(i=(SCS_NB_WORDS-1); i>=1; i--)
|
449
|
+
R_HW[i] = res[i-1];
|
450
|
+
|
451
|
+
R_HW[0] = 1 ;
|
452
|
+
R_IND += 1;
|
453
|
+
}
|
454
|
+
else {
|
455
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
456
|
+
R_HW[i] = res[i];
|
457
|
+
}
|
458
|
+
|
459
|
+
return;
|
460
|
+
#endif /* #if SCS_NB_WORDS==8*/
|
461
|
+
|
462
|
+
} /* do_add*/
|
463
|
+
|
464
|
+
|
465
|
+
|
466
|
+
|
467
|
+
/*/////////////////////////////////////////////////////////////////
|
468
|
+
/////////////////////// SUBTRACTION //////////////////////////////
|
469
|
+
//////////////////////////////////////////////////////////////////
|
470
|
+
// This procedure assumes :
|
471
|
+
// - X_IND >= Y_IND
|
472
|
+
// - X_SIGN != Y_SIGN
|
473
|
+
// neither x or y is zero
|
474
|
+
// and result = x - y
|
475
|
+
*/
|
476
|
+
|
477
|
+
|
478
|
+
static void do_sub(scs_ptr result, scs_ptr x, scs_ptr y){
|
479
|
+
int s, carry;
|
480
|
+
int Diff, i, j, cp;
|
481
|
+
int res[SCS_NB_WORDS];
|
482
|
+
|
483
|
+
R_EXP = X_EXP + Y_EXP - 1;
|
484
|
+
Diff = X_IND - Y_IND;
|
485
|
+
R_IND = X_IND;
|
486
|
+
|
487
|
+
/* The easy case */
|
488
|
+
if(Diff >= SCS_NB_WORDS){
|
489
|
+
scs_set(result, x); return;
|
490
|
+
}
|
491
|
+
|
492
|
+
else {
|
493
|
+
/* 0 <= Diff <= (SCS_NB_WORDS-1) */
|
494
|
+
carry = 0;
|
495
|
+
if(Diff==0) {
|
496
|
+
|
497
|
+
i=0;
|
498
|
+
while((X_HW[i] == Y_HW[i]) && (i<SCS_NB_WORDS)) i++;
|
499
|
+
if (X_HW[i] > Y_HW[i]) cp=1;
|
500
|
+
else if (X_HW[i] < Y_HW[i]) cp=-1;
|
501
|
+
else cp=0;
|
502
|
+
|
503
|
+
if (cp == 0) {
|
504
|
+
/* Yet another easy case: result = 0 */
|
505
|
+
scs_zero(result);
|
506
|
+
return;
|
507
|
+
}
|
508
|
+
else { /* cp <> 0 */
|
509
|
+
if (cp > 0){
|
510
|
+
/* x > y */
|
511
|
+
|
512
|
+
R_SGN = X_SGN;
|
513
|
+
for(i=(SCS_NB_WORDS-1); i>=0 ;i--){
|
514
|
+
s = (int)(X_HW[i] - Y_HW[i] - carry);
|
515
|
+
carry = (int)((s&SCS_RADIX)>>SCS_NB_BITS);
|
516
|
+
res[i] = (int)((s&SCS_RADIX) + s);
|
517
|
+
}
|
518
|
+
}
|
519
|
+
else { /* cp < 0 */
|
520
|
+
/* x < y (change of sign) */
|
521
|
+
|
522
|
+
R_SGN = - X_SGN;
|
523
|
+
for(i=(SCS_NB_WORDS-1); i>=0 ;i--){
|
524
|
+
s = (int)(- X_HW[i] + Y_HW[i] - carry);
|
525
|
+
carry = (int)((s&SCS_RADIX)>>SCS_NB_BITS);
|
526
|
+
res[i] = (int)((s&SCS_RADIX) + s);
|
527
|
+
}
|
528
|
+
}
|
529
|
+
}
|
530
|
+
}
|
531
|
+
else {
|
532
|
+
/* 1<=Diff<(SCS_NB_WORDS-1) Digits of x and y overlap but the
|
533
|
+
* sign will be that of x */
|
534
|
+
|
535
|
+
R_SGN = X_SGN;
|
536
|
+
for(i=(SCS_NB_WORDS-1), j=((SCS_NB_WORDS-1)-Diff); i>=0 ;i--,j--){
|
537
|
+
if(j>=0)
|
538
|
+
s = (int)(X_HW[i] - Y_HW[j] - carry);
|
539
|
+
else
|
540
|
+
s = (int)(X_HW[i] - carry);
|
541
|
+
carry = (int)((s&SCS_RADIX)>>SCS_NB_BITS);
|
542
|
+
res[i] = (int)((s&SCS_RADIX) + s);
|
543
|
+
}
|
544
|
+
}
|
545
|
+
/* check for cancellations */
|
546
|
+
i=0;
|
547
|
+
while ((res[i]==0) && (i < SCS_NB_WORDS)) i++;
|
548
|
+
|
549
|
+
if(i>0) { /* cancellation, shift result*/
|
550
|
+
R_IND -= i;
|
551
|
+
for(j=0; i<SCS_NB_WORDS; i++,j++) R_HW[j] = (unsigned int)(res[i]);
|
552
|
+
for( ; j<SCS_NB_WORDS; j++) R_HW[j] = 0;
|
553
|
+
}
|
554
|
+
else {
|
555
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
556
|
+
R_HW[i] = (unsigned int)(res[i]);
|
557
|
+
}
|
558
|
+
}
|
559
|
+
return;
|
560
|
+
}
|
561
|
+
|
562
|
+
|
563
|
+
|
564
|
+
|
565
|
+
|
566
|
+
|
567
|
+
|
568
|
+
|
569
|
+
/** SCS addition (result is a normalised SCS number).
|
570
|
+
|
571
|
+
*/
|
572
|
+
void scs_add(scs_ptr result, scs_ptr x, scs_ptr y)
|
573
|
+
{
|
574
|
+
|
575
|
+
if (x->exception.i[HI]==0){scs_set(result, y); return; }
|
576
|
+
if (y->exception.i[HI]==0){scs_set(result, x); return; }
|
577
|
+
|
578
|
+
if (X_SGN == Y_SGN){
|
579
|
+
if(X_IND >= Y_IND)
|
580
|
+
do_add(result,x,y);
|
581
|
+
else
|
582
|
+
do_add(result,y,x);
|
583
|
+
}else {
|
584
|
+
if(X_IND>=Y_IND){
|
585
|
+
do_sub(result,x,y);
|
586
|
+
}else {
|
587
|
+
do_sub(result,y,x);
|
588
|
+
}
|
589
|
+
} return;
|
590
|
+
}
|
591
|
+
|
592
|
+
/** SCS subtraction (result is a normalised SCS number).
|
593
|
+
|
594
|
+
The arguments x, y and result may point to the same memory
|
595
|
+
location.
|
596
|
+
*/
|
597
|
+
void scs_sub(scs_ptr result, scs_ptr x, scs_ptr y)
|
598
|
+
{
|
599
|
+
if (x->exception.i[HI]==0)
|
600
|
+
{ scs_set(result, y); R_SGN = -R_SGN; return; }
|
601
|
+
if (y->exception.i[HI]==0)
|
602
|
+
{ scs_set(result, x); return; }
|
603
|
+
|
604
|
+
if (X_SGN == Y_SGN) {
|
605
|
+
/* Same sign, so it's a sub */
|
606
|
+
if(X_IND>=Y_IND)
|
607
|
+
do_sub(result,x,y);
|
608
|
+
else{
|
609
|
+
do_sub(result,y,x);
|
610
|
+
R_SGN = -R_SGN;
|
611
|
+
}
|
612
|
+
}else {
|
613
|
+
if(X_IND>=Y_IND)
|
614
|
+
do_add(result,x,y);
|
615
|
+
else{
|
616
|
+
do_add(result,y,x);
|
617
|
+
R_SGN = -R_SGN;
|
618
|
+
}
|
619
|
+
}
|
620
|
+
return;
|
621
|
+
}
|
622
|
+
|
623
|
+
|