crmf 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +12 -0
- data/crmf.gemspec +102 -1
- data/ext/crlibm-1.0beta5/AUTHORS +2 -0
- data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
- data/ext/crlibm-1.0beta5/COPYING +340 -0
- data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
- data/ext/crlibm-1.0beta5/ChangeLog +125 -0
- data/ext/crlibm-1.0beta5/Makefile.am +134 -0
- data/ext/crlibm-1.0beta5/NEWS +0 -0
- data/ext/crlibm-1.0beta5/README +31 -0
- data/ext/crlibm-1.0beta5/README.DEV +23 -0
- data/ext/crlibm-1.0beta5/README.md +5 -0
- data/ext/crlibm-1.0beta5/TODO +66 -0
- data/ext/crlibm-1.0beta5/VERSION +1 -0
- data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
- data/ext/crlibm-1.0beta5/acos-td.h +629 -0
- data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
- data/ext/crlibm-1.0beta5/asin-td.h +620 -0
- data/ext/crlibm-1.0beta5/asincos.c +4488 -0
- data/ext/crlibm-1.0beta5/asincos.h +575 -0
- data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
- data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
- data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
- data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
- data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
- data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
- data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
- data/ext/crlibm-1.0beta5/configure.ac +419 -0
- data/ext/crlibm-1.0beta5/crlibm.h +204 -0
- data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
- data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
- data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
- data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
- data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
- data/ext/crlibm-1.0beta5/double-extended.h +496 -0
- data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
- data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
- data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
- data/ext/crlibm-1.0beta5/exp-td.h +685 -0
- data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
- data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
- data/ext/crlibm-1.0beta5/expm1.c +2515 -0
- data/ext/crlibm-1.0beta5/expm1.h +715 -0
- data/ext/crlibm-1.0beta5/interval.h +238 -0
- data/ext/crlibm-1.0beta5/log-de.c +480 -0
- data/ext/crlibm-1.0beta5/log-de.h +747 -0
- data/ext/crlibm-1.0beta5/log-de2.c +280 -0
- data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
- data/ext/crlibm-1.0beta5/log-td.c +1158 -0
- data/ext/crlibm-1.0beta5/log-td.h +819 -0
- data/ext/crlibm-1.0beta5/log.c +2244 -0
- data/ext/crlibm-1.0beta5/log.h +1592 -0
- data/ext/crlibm-1.0beta5/log10-td.c +906 -0
- data/ext/crlibm-1.0beta5/log10-td.h +823 -0
- data/ext/crlibm-1.0beta5/log1p.c +1295 -0
- data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
- data/ext/crlibm-1.0beta5/log2-td.h +821 -0
- data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
- data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
- data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
- data/ext/crlibm-1.0beta5/log_fast.c +360 -0
- data/ext/crlibm-1.0beta5/log_fast.h +440 -0
- data/ext/crlibm-1.0beta5/pow.c +1396 -0
- data/ext/crlibm-1.0beta5/pow.h +3101 -0
- data/ext/crlibm-1.0beta5/prepare +20 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
- data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
- data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
- data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
- data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
- data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
- data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
- data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
- data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
- data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
- data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
- data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
- data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
- data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
- data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
- data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
- data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
- data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
- data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
- data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
- data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
- data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
- data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
- data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
- data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
- data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
- data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
- data/ext/crlibm-1.0beta5/trigpi.h +556 -0
- data/ext/crlibm-1.0beta5/triple-double.c +57 -0
- data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
- data/ext/crmf/crmf.c +16 -16
- data/ext/crmf/extconf.rb +12 -8
- data/lib/crmf/version.rb +1 -1
- data/tests/perf.rb +100 -219
- metadata +104 -3
- data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,339 @@
|
|
1
|
+
/** Functions for SCS multiplication operations
|
2
|
+
@file multiplication_scs.c
|
3
|
+
|
4
|
+
@author Defour David David.Defour@ens-lyon.fr
|
5
|
+
@author Florent de Dinechin Florent.de.Dinechin@ens-lyon.fr
|
6
|
+
|
7
|
+
This file is part of the SCS library.
|
8
|
+
*/
|
9
|
+
|
10
|
+
/*
|
11
|
+
Copyright (C) 2002 David Defour and Florent de Dinechin
|
12
|
+
|
13
|
+
This library is free software; you can redistribute it and/or
|
14
|
+
modify it under the terms of the GNU Lesser General Public
|
15
|
+
License as published by the Free Software Foundation; either
|
16
|
+
version 2.1 of the License, or (at your option) any later version.
|
17
|
+
|
18
|
+
This library is distributed in the hope that it will be useful,
|
19
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
20
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
21
|
+
Lesser General Public License for more details.
|
22
|
+
|
23
|
+
You should have received a copy of the GNU Lesser General Public
|
24
|
+
License along with this library; if not, write to the Free Software
|
25
|
+
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
26
|
+
|
27
|
+
*/
|
28
|
+
|
29
|
+
#include "scs.h"
|
30
|
+
#include "scs_private.h"
|
31
|
+
|
32
|
+
#if 0 /* used to help debugging */
|
33
|
+
void pr(char* s,double d) {
|
34
|
+
db_number x;
|
35
|
+
x.d=d;
|
36
|
+
printf(s);printf(" ");
|
37
|
+
printf("%8x%8x . 2^%d (%8f %8x %8x) \n",
|
38
|
+
(x.i[HI]&0x000FFFFF)+0x00100000,
|
39
|
+
x.i[LO],
|
40
|
+
(x.i[HI]>>20)-1023,
|
41
|
+
x.d,
|
42
|
+
x.i[HI],
|
43
|
+
x.i[LO]);
|
44
|
+
}
|
45
|
+
#endif
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
/* Compute the carry of r1, remove it from r1, and add it to r0 */
|
50
|
+
#define SCS_CARRY_PROPAGATE(r1,r0,tmp) \
|
51
|
+
{tmp = r1>>SCS_NB_BITS; r0 += tmp; r1 -= (tmp<<SCS_NB_BITS);}
|
52
|
+
|
53
|
+
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
/************************************************************/
|
60
|
+
/* We have unrolled the loops for SCS_NB_WORDS==8
|
61
|
+
It leads to almost x2 speedup.
|
62
|
+
|
63
|
+
We just wish gcc would do it for us ! There are option switches,
|
64
|
+
but they don't lead to any performance improvement. When they do,
|
65
|
+
this part of the source code will be removed.
|
66
|
+
|
67
|
+
In the meantime, feel free to unroll for other values. */
|
68
|
+
|
69
|
+
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
/***************************/
|
74
|
+
#if (SCS_NB_WORDS==8)
|
75
|
+
/***************************/
|
76
|
+
void scs_mul(scs_ptr result, scs_ptr x, scs_ptr y){
|
77
|
+
uint64_t val, tmp;
|
78
|
+
uint64_t r0,r1,r2,r3,r4,r5,r6,r7,r8;
|
79
|
+
uint64_t x0,x1,x2,x3,x4,x5,x6,x7;
|
80
|
+
int y0,y1,y2,y3,y4,y5,y6,y7;
|
81
|
+
|
82
|
+
R_EXP = X_EXP * Y_EXP;
|
83
|
+
R_SGN = X_SGN * Y_SGN;
|
84
|
+
R_IND = X_IND + Y_IND;
|
85
|
+
|
86
|
+
/* Partial products computation */
|
87
|
+
x7=X_HW[7]; y7=Y_HW[7]; x6=X_HW[6]; y6=Y_HW[6];
|
88
|
+
x5=X_HW[5]; y5=Y_HW[5]; x4=X_HW[4]; y4=Y_HW[4];
|
89
|
+
x3=X_HW[3]; y3=Y_HW[3]; x2=X_HW[2]; y2=Y_HW[2];
|
90
|
+
x1=X_HW[1]; y1=Y_HW[1]; x0=X_HW[0]; y0=Y_HW[0];
|
91
|
+
|
92
|
+
r8 = x7*y1 + x6*y2 + x5*y3 + x4*y4 + x3*y5 + x2*y6 + x1*y7;
|
93
|
+
r7 = x7*y0 + x6*y1 + x5*y2 + x4*y3 + x3*y4 + x2*y5 + x1*y6 + x0*y7;
|
94
|
+
r6 = x6*y0 + x5*y1 + x4*y2 + x3*y3 + x2*y4 + x1*y5 + x0*y6;
|
95
|
+
r5 = x5*y0 + x4*y1 + x3*y2 + x2*y3 + x1*y4 + x0*y5;
|
96
|
+
r4 = x4*y0 + x3*y1 + x2*y2 + x1*y3 + x0*y4 ;
|
97
|
+
r3 = x3*y0 + x2*y1 + x1*y2 + x0*y3;
|
98
|
+
r2 = x2*y0 + x1*y1 + x0*y2;
|
99
|
+
r1 = x1*y0 + x0*y1 ;
|
100
|
+
r0 = x0*y0 ;
|
101
|
+
|
102
|
+
val= 0;
|
103
|
+
/* Carry Propagate */
|
104
|
+
SCS_CARRY_PROPAGATE(r8,r7,tmp)
|
105
|
+
SCS_CARRY_PROPAGATE(r7,r6,tmp)
|
106
|
+
SCS_CARRY_PROPAGATE(r6,r5,tmp)
|
107
|
+
SCS_CARRY_PROPAGATE(r5,r4,tmp)
|
108
|
+
SCS_CARRY_PROPAGATE(r4,r3,tmp)
|
109
|
+
SCS_CARRY_PROPAGATE(r3,r2,tmp)
|
110
|
+
SCS_CARRY_PROPAGATE(r2,r1,tmp)
|
111
|
+
SCS_CARRY_PROPAGATE(r1,r0,tmp)
|
112
|
+
SCS_CARRY_PROPAGATE(r0,val,tmp)
|
113
|
+
|
114
|
+
if(val != 0){
|
115
|
+
/* shift all the digits ! */
|
116
|
+
R_HW[0] = val; R_HW[1] = r0; R_HW[2] = r1; R_HW[3] = r2;
|
117
|
+
R_HW[4] = r3; R_HW[5] = r4; R_HW[6] = r5; R_HW[7] = r6;
|
118
|
+
R_IND += 1;
|
119
|
+
}
|
120
|
+
else {
|
121
|
+
R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
|
122
|
+
R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7;
|
123
|
+
}
|
124
|
+
|
125
|
+
}
|
126
|
+
|
127
|
+
|
128
|
+
void scs_square(scs_ptr result, scs_ptr x){
|
129
|
+
uint64_t r0,r1,r2,r3,r4,r5,r6,r7,r8;
|
130
|
+
uint64_t x0,x1,x2,x3,x4,x5,x6,x7;
|
131
|
+
uint64_t val, tmp;
|
132
|
+
|
133
|
+
|
134
|
+
R_EXP = X_EXP * X_EXP;
|
135
|
+
R_IND = X_IND + X_IND;
|
136
|
+
R_SGN = 1;
|
137
|
+
|
138
|
+
/*
|
139
|
+
* Partial products
|
140
|
+
*/
|
141
|
+
x7=X_HW[7]; x6=X_HW[6]; x5=X_HW[5]; x4=X_HW[4];
|
142
|
+
x3=X_HW[3]; x2=X_HW[2]; x1=X_HW[1]; x0=X_HW[0];
|
143
|
+
|
144
|
+
r0 = x0*x0;
|
145
|
+
r1 = (x0*x1)* 2 ;
|
146
|
+
r2 = x1*x1 + (x0*x2*2);
|
147
|
+
r3 = (x1*x2 + x0*x3)* 2;
|
148
|
+
r4 = x2*x2 + (x1*x3 + x0*x4)* 2;
|
149
|
+
r5 = (x2*x3 + x1*x4 + x0*x5)* 2;
|
150
|
+
r6 = x3*x3 + (x2*x4 + x1*x5 + x0*x6)* 2;
|
151
|
+
r7 = (x3*x4 + x2*x5 + x1*x6 + x0*x7)* 2;
|
152
|
+
r8 = x4*x4 + (x3*x5 + x2*x6 + x1*x7)* 2;
|
153
|
+
|
154
|
+
val= 0;
|
155
|
+
/* Carry propagation */
|
156
|
+
SCS_CARRY_PROPAGATE(r8,r7,tmp)
|
157
|
+
SCS_CARRY_PROPAGATE(r7,r6,tmp)
|
158
|
+
SCS_CARRY_PROPAGATE(r6,r5,tmp)
|
159
|
+
SCS_CARRY_PROPAGATE(r5,r4,tmp)
|
160
|
+
SCS_CARRY_PROPAGATE(r4,r3,tmp)
|
161
|
+
SCS_CARRY_PROPAGATE(r3,r2,tmp)
|
162
|
+
SCS_CARRY_PROPAGATE(r2,r1,tmp)
|
163
|
+
SCS_CARRY_PROPAGATE(r1,r0,tmp)
|
164
|
+
SCS_CARRY_PROPAGATE(r0,val,tmp)
|
165
|
+
|
166
|
+
if(val != 0){
|
167
|
+
/* shift all the digits ! */
|
168
|
+
R_HW[0] = val; R_HW[1] = r0; R_HW[2] = r1; R_HW[3] = r2;
|
169
|
+
R_HW[4] = r3; R_HW[5] = r4; R_HW[6] = r5; R_HW[7] = r6;
|
170
|
+
R_IND += 1;
|
171
|
+
}
|
172
|
+
else {
|
173
|
+
R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
|
174
|
+
R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7;
|
175
|
+
}
|
176
|
+
|
177
|
+
}
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
/***************************/
|
182
|
+
#else
|
183
|
+
/***************************/
|
184
|
+
/* From there on, the normal, unrolled case */
|
185
|
+
|
186
|
+
|
187
|
+
void scs_mul(scs_ptr result, scs_ptr x, scs_ptr y){
|
188
|
+
uint64_t RES[SCS_NB_WORDS+1];
|
189
|
+
uint64_t val, tmp;
|
190
|
+
int i, j;
|
191
|
+
|
192
|
+
R_EXP = X_EXP * Y_EXP;
|
193
|
+
R_SGN = X_SGN * Y_SGN;
|
194
|
+
R_IND = X_IND + Y_IND;
|
195
|
+
|
196
|
+
for(i=0; i<=SCS_NB_WORDS; i++)
|
197
|
+
RES[i]=0;
|
198
|
+
|
199
|
+
/* Compute only the first half of the partial product. See the
|
200
|
+
unrolled code for an example of what we compute */
|
201
|
+
|
202
|
+
/* i=0 */
|
203
|
+
tmp = X_HW[0];
|
204
|
+
for(j=0; j<(SCS_NB_WORDS); j++)
|
205
|
+
RES[j] += tmp * Y_HW[j];
|
206
|
+
/* i = 1..SCS_NB_WORDS-1 */
|
207
|
+
for(i=1 ; i<SCS_NB_WORDS; i++){
|
208
|
+
tmp = X_HW[i];
|
209
|
+
for(j=0; j<(SCS_NB_WORDS-i); j++)
|
210
|
+
RES[i+j] += tmp * Y_HW[j];
|
211
|
+
RES[SCS_NB_WORDS] += tmp * Y_HW[j]; /* here j==SCS_NB_WORDS-i */
|
212
|
+
}
|
213
|
+
|
214
|
+
val = 0;
|
215
|
+
|
216
|
+
/* Carry propagate */
|
217
|
+
for(i=SCS_NB_WORDS; i>0; i--)
|
218
|
+
SCS_CARRY_PROPAGATE(RES[i],RES[i-1],tmp)
|
219
|
+
SCS_CARRY_PROPAGATE(RES[0],val,tmp)
|
220
|
+
|
221
|
+
|
222
|
+
/* Store the result */
|
223
|
+
if(val != 0){
|
224
|
+
/* shift all the digits ! */
|
225
|
+
R_HW[0] = val;
|
226
|
+
for(i=1; i<SCS_NB_WORDS; i++)
|
227
|
+
R_HW[i] = RES[i-1];
|
228
|
+
|
229
|
+
R_IND += 1;
|
230
|
+
}else {
|
231
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
232
|
+
R_HW[i] = RES[i];
|
233
|
+
}
|
234
|
+
}
|
235
|
+
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
void scs_square(scs_ptr result, scs_ptr x){
|
243
|
+
uint64_t RES[SCS_NB_WORDS+1];
|
244
|
+
uint64_t val, tmp;
|
245
|
+
int i, j;
|
246
|
+
|
247
|
+
|
248
|
+
R_EXP = X_EXP * X_EXP;
|
249
|
+
R_SGN = 1;
|
250
|
+
R_IND = X_IND + X_IND;
|
251
|
+
|
252
|
+
/* Set to 0 intermediate register */
|
253
|
+
for(i=0; i<=SCS_NB_WORDS; i++)
|
254
|
+
RES[i] = 0;
|
255
|
+
|
256
|
+
/* Compute all the double partial products: 2 x_i * x_j, i!=j */
|
257
|
+
tmp = (uint64_t)X_HW[0];
|
258
|
+
for(j=1; j<SCS_NB_WORDS; j++)
|
259
|
+
RES[j] += tmp * X_HW[j];
|
260
|
+
for(i=1 ; i<(SCS_NB_WORDS+1)/2; i++){
|
261
|
+
tmp = (uint64_t)X_HW[i];
|
262
|
+
for(j=i+1; j<(SCS_NB_WORDS-i); j++)
|
263
|
+
RES[i+j] += tmp * X_HW[j];
|
264
|
+
RES[SCS_NB_WORDS] += tmp * X_HW[SCS_NB_WORDS-i];
|
265
|
+
}
|
266
|
+
|
267
|
+
/* All these partial products are double */
|
268
|
+
for(i=0; i<=SCS_NB_WORDS; i++)
|
269
|
+
RES[i] *=2;
|
270
|
+
|
271
|
+
/* Add partial product of the form x_i^2 */
|
272
|
+
for(i=0, j=0; i<=SCS_NB_WORDS; i+=2, j++){
|
273
|
+
RES[i] += (uint64_t)X_HW[j] * X_HW[j];
|
274
|
+
}
|
275
|
+
|
276
|
+
val = 0;
|
277
|
+
/* Carry propagate */
|
278
|
+
for(i=SCS_NB_WORDS; i>0; i--)
|
279
|
+
SCS_CARRY_PROPAGATE(RES[i],RES[i-1],tmp)
|
280
|
+
|
281
|
+
SCS_CARRY_PROPAGATE(RES[0],val,tmp)
|
282
|
+
|
283
|
+
|
284
|
+
/* Store the result */
|
285
|
+
if(val != 0){
|
286
|
+
/* shift all the digits ! */
|
287
|
+
R_HW[0] = val;
|
288
|
+
for(i=1; i<SCS_NB_WORDS; i++)
|
289
|
+
R_HW[i] = RES[i-1];
|
290
|
+
|
291
|
+
R_IND += 1;
|
292
|
+
}else {
|
293
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
294
|
+
R_HW[i] = RES[i];
|
295
|
+
}
|
296
|
+
|
297
|
+
}
|
298
|
+
|
299
|
+
|
300
|
+
/*
|
301
|
+
* #endif corresponding to the test #if (SCS_NB_WORDS==8)
|
302
|
+
*/
|
303
|
+
#endif
|
304
|
+
|
305
|
+
|
306
|
+
/*
|
307
|
+
Multiply x by an integer val; result is returned in x.
|
308
|
+
*/
|
309
|
+
void scs_mul_ui(scs_ptr x, unsigned int val_int){
|
310
|
+
uint64_t val, tmp, vald, rr;
|
311
|
+
int i;
|
312
|
+
|
313
|
+
if (val_int == 0)
|
314
|
+
X_EXP = 0;
|
315
|
+
|
316
|
+
vald = val_int;
|
317
|
+
|
318
|
+
val = 0;
|
319
|
+
rr = 0;
|
320
|
+
for(i=(SCS_NB_WORDS-1); i>=0; i--){
|
321
|
+
val += vald * X_HW[i];
|
322
|
+
SCS_CARRY_PROPAGATE(val, rr, tmp)
|
323
|
+
X_HW[i] = val;
|
324
|
+
val = rr;
|
325
|
+
rr = 0;
|
326
|
+
}
|
327
|
+
|
328
|
+
if(val != 0){
|
329
|
+
/* shift all the digits ! */
|
330
|
+
for(i=(SCS_NB_WORDS-1); i>0; i--)
|
331
|
+
X_HW[i] = X_HW[i-1];
|
332
|
+
|
333
|
+
X_HW[0] = (unsigned int)val;
|
334
|
+
X_IND += 1;
|
335
|
+
}
|
336
|
+
|
337
|
+
return;
|
338
|
+
}
|
339
|
+
|
@@ -0,0 +1,112 @@
|
|
1
|
+
/*
|
2
|
+
* Author : Defour David
|
3
|
+
* Contact : David.Defour@ens-lyon.fr
|
4
|
+
*
|
5
|
+
* This program is free software; you can redistribute it and/or modify
|
6
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
* the Free Software Foundation; either version 2 of the License, or
|
8
|
+
* (at your option) any later version.
|
9
|
+
*
|
10
|
+
* This program is distributed in the hope that it will be useful,
|
11
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
* GNU General Public License for more details.
|
14
|
+
*
|
15
|
+
* You should have received a copy of the GNU Lesser General Public License
|
16
|
+
* along with this program; if not, write to the Free Software
|
17
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
*/
|
19
|
+
#include "scs.h"
|
20
|
+
#include "scs_private.h"
|
21
|
+
|
22
|
+
|
23
|
+
/*
|
24
|
+
* result = z + (x * y)
|
25
|
+
*/
|
26
|
+
/* z->sign = X_SGN . Y_SGN */
|
27
|
+
void scs_fma(scs_ptr result, scs_ptr x, scs_ptr y, scs_ptr z){
|
28
|
+
uint64_t RES[2*SCS_NB_WORDS];
|
29
|
+
uint64_t val, tmp;
|
30
|
+
int i, j, ind, Diff;
|
31
|
+
|
32
|
+
ind = X_IND + Y_IND;
|
33
|
+
|
34
|
+
for(i=0; i<=SCS_NB_WORDS+1; i++)
|
35
|
+
RES[i]=0;
|
36
|
+
|
37
|
+
for(i=0 ; i<SCS_NB_WORDS; i++){
|
38
|
+
for(j=0; j<(SCS_NB_WORDS-i); j++){
|
39
|
+
RES[i+j] += (uint64_t)X_HW[i] * Y_HW[j];
|
40
|
+
}}
|
41
|
+
|
42
|
+
/* if we can perform an add */
|
43
|
+
if (z->sign == (X_SGN * Y_SGN)){
|
44
|
+
Diff = z->index - ind;
|
45
|
+
if (Diff >= 0){
|
46
|
+
for(i=(SCS_NB_WORDS-1), j=(SCS_NB_WORDS-Diff); j>=0; i--, j--)
|
47
|
+
RES[i] = z->h_word[i] + RES[j];
|
48
|
+
for( ; i>=0; i--)
|
49
|
+
RES[i] = z->h_word[i];
|
50
|
+
}else {
|
51
|
+
for(i=(SCS_NB_WORDS+Diff), j=(SCS_NB_WORDS-1); i>=0; i--, j--)
|
52
|
+
RES[j] = z->h_word[i] + RES[j];
|
53
|
+
}
|
54
|
+
|
55
|
+
/* Carry propagate */
|
56
|
+
RES[SCS_NB_WORDS-1] += (RES[SCS_NB_WORDS]>>SCS_NB_BITS);
|
57
|
+
for(i=(SCS_NB_WORDS-1); i>0; i--)
|
58
|
+
{tmp = RES[i]>>SCS_NB_BITS; RES[i-1] += tmp; RES[i] -= (tmp<<SCS_NB_BITS);}
|
59
|
+
|
60
|
+
val = RES[0] >> SCS_NB_BITS;
|
61
|
+
R_IND = X_IND + Y_IND;
|
62
|
+
|
63
|
+
/* Store the result */
|
64
|
+
if(val != 0){
|
65
|
+
/* shift all the digits ! */
|
66
|
+
R_HW[0] = (unsigned int)val;
|
67
|
+
R_HW[1] = (unsigned int)(RES[0] - (val<<SCS_NB_BITS));
|
68
|
+
for(i=2; i<SCS_NB_WORDS; i++)
|
69
|
+
R_HW[i] = (unsigned int)RES[i-1];
|
70
|
+
|
71
|
+
R_IND += 1;
|
72
|
+
}
|
73
|
+
else {
|
74
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
75
|
+
R_HW[i] = (unsigned int)RES[i];
|
76
|
+
}
|
77
|
+
|
78
|
+
R_EXP = (z->exception.d + (X_EXP * Y_EXP)) - 1;
|
79
|
+
R_SGN = X_SGN * Y_SGN;
|
80
|
+
|
81
|
+
}else {
|
82
|
+
/* we have to do a sub */
|
83
|
+
|
84
|
+
/* Carry propagate */
|
85
|
+
RES[SCS_NB_WORDS-1] += (RES[SCS_NB_WORDS]>>SCS_NB_BITS);
|
86
|
+
for(i=(SCS_NB_WORDS-1); i>0; i--)
|
87
|
+
{tmp = RES[i]>>SCS_NB_BITS; RES[i-1] += tmp; RES[i] -= (tmp<<SCS_NB_BITS);}
|
88
|
+
|
89
|
+
val = RES[0] >> SCS_NB_BITS;
|
90
|
+
R_IND = X_IND + Y_IND;
|
91
|
+
|
92
|
+
/* Store the result */
|
93
|
+
if(val != 0){
|
94
|
+
/* shift all the digits ! */
|
95
|
+
R_HW[0] = (unsigned int)val;
|
96
|
+
R_HW[1] = (unsigned int)(RES[0] - (val<<SCS_NB_BITS));
|
97
|
+
for(i=2; i<SCS_NB_WORDS; i++)
|
98
|
+
R_HW[i] = (unsigned int)RES[i-1];
|
99
|
+
|
100
|
+
R_IND += 1;
|
101
|
+
}
|
102
|
+
else {
|
103
|
+
for(i=0; i<SCS_NB_WORDS; i++)
|
104
|
+
R_HW[i] = (unsigned int)RES[i];
|
105
|
+
}
|
106
|
+
|
107
|
+
R_EXP = (X_EXP * Y_EXP);
|
108
|
+
R_SGN = X_SGN * Y_SGN;
|
109
|
+
|
110
|
+
scs_add(result, result, z);
|
111
|
+
}
|
112
|
+
}
|
@@ -0,0 +1,73 @@
|
|
1
|
+
/*
|
2
|
+
* Author : Defour David
|
3
|
+
* Contact : David.Defour@ens-lyon.fr
|
4
|
+
*
|
5
|
+
* This program is free software; you can redistribute it and/or modify
|
6
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
* the Free Software Foundation; either version 2 of the License, or
|
8
|
+
* (at your option) any later version.
|
9
|
+
*
|
10
|
+
* This program is distributed in the hope that it will be useful,
|
11
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
* GNU General Public License for more details.
|
14
|
+
*
|
15
|
+
* You should have received a copy of the GNU Lesser General Public License
|
16
|
+
* along with this program; if not, write to the Free Software
|
17
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
*/
|
19
|
+
#include <stdio.h>
|
20
|
+
#include "scs.h"
|
21
|
+
#include "scs_private.h"
|
22
|
+
|
23
|
+
|
24
|
+
/*
|
25
|
+
* used by the next function to write
|
26
|
+
* bit of the integer in the right order ....
|
27
|
+
*/
|
28
|
+
static void print_order(unsigned int x, int nb, int b){
|
29
|
+
if (nb<=0)
|
30
|
+
return;
|
31
|
+
print_order(x/b, nb-1, b);
|
32
|
+
printf("%u",x%b);
|
33
|
+
return;
|
34
|
+
}
|
35
|
+
/*
|
36
|
+
* print nb digits of the chain x in base "b"
|
37
|
+
* b must be between 1 and 10
|
38
|
+
*/
|
39
|
+
static void print_integer(unsigned int x, int b, int nb){
|
40
|
+
|
41
|
+
if ((b < 2)||(b>16)){
|
42
|
+
fprintf(stderr," ERROR: You musn't print number with a base larger than 10 or less than 2 \n");
|
43
|
+
return;
|
44
|
+
}
|
45
|
+
print_order(x, nb, b);
|
46
|
+
return;
|
47
|
+
}
|
48
|
+
|
49
|
+
|
50
|
+
/*
|
51
|
+
* Convert a double precision number in it scs multiprecision
|
52
|
+
* representation
|
53
|
+
*
|
54
|
+
* Rem. : We haven't tested all special cases yet.
|
55
|
+
*/
|
56
|
+
|
57
|
+
/*
|
58
|
+
*/
|
59
|
+
void scs_get_std( scs_ptr x){
|
60
|
+
int i;
|
61
|
+
db_number d;
|
62
|
+
|
63
|
+
scs_get_d(&d.d, x);
|
64
|
+
printf("Exception : %e \n", X_EXP);
|
65
|
+
printf("Index= %d \n Sign= %d \n Double value= %.30e \n Hex mantissa= %x %x\n",
|
66
|
+
X_IND, X_SGN, d.d, d.i[HI], d.i[LO]);
|
67
|
+
for(i=0;i<SCS_NB_WORDS;i++){
|
68
|
+
printf(" D %d : %8x %20u \n",i, X_HW[i], X_HW[i]);
|
69
|
+
}
|
70
|
+
}
|
71
|
+
|
72
|
+
|
73
|
+
|
@@ -0,0 +1,63 @@
|
|
1
|
+
/*
|
2
|
+
* Author : Defour David
|
3
|
+
* Contact : David.Defour@ens-lyon.fr
|
4
|
+
*
|
5
|
+
* This program is free software; you can redistribute it and/or modify
|
6
|
+
* it under the terms of the GNU Lesser General Public License as published by
|
7
|
+
* the Free Software Foundation; either version 2 of the License, or
|
8
|
+
* (at your option) any later version.
|
9
|
+
*
|
10
|
+
* This program is distributed in the hope that it will be useful,
|
11
|
+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
* GNU General Public License for more details.
|
14
|
+
*
|
15
|
+
* You should have received a copy of the GNU Lesser General Public License
|
16
|
+
* along with this program; if not, write to the Free Software
|
17
|
+
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
18
|
+
*/
|
19
|
+
#include <stdlib.h>
|
20
|
+
#include "scs.h"
|
21
|
+
#include "scs_private.h"
|
22
|
+
|
23
|
+
/*
|
24
|
+
* Return 'sizeof(int)' random bits
|
25
|
+
*/
|
26
|
+
|
27
|
+
|
28
|
+
int rand_val(void){
|
29
|
+
int val;
|
30
|
+
int i;
|
31
|
+
|
32
|
+
val = (rand() & 0x000000ff);
|
33
|
+
for(i=0; i<(sizeof(int)); i++){
|
34
|
+
val = val << 8;
|
35
|
+
val += (rand() & 0x000000ff ); /* we keep only 8 bits */
|
36
|
+
}
|
37
|
+
return val;
|
38
|
+
}
|
39
|
+
|
40
|
+
|
41
|
+
/*
|
42
|
+
* Put into 'result' a scs random number with the index field set
|
43
|
+
* with a value between -expo_max and +expo_max.
|
44
|
+
*
|
45
|
+
* Rem. :
|
46
|
+
* 1) If you want an scs number belonging to double precision floating
|
47
|
+
* point number you must call scs_rand with an expo_max less than 39.
|
48
|
+
* 2) expo_max must be less than RAND_MAX that is usually set a
|
49
|
+
* value greater than 32767
|
50
|
+
*/
|
51
|
+
void scs_rand(scs_ptr result, int expo_max){
|
52
|
+
int i;
|
53
|
+
|
54
|
+
R_EXP = 1;
|
55
|
+
R_IND = (rand() % (2*expo_max)) - expo_max;
|
56
|
+
R_SGN = ((2*rand()- RAND_MAX) > 0) ? (-1) : (1);
|
57
|
+
|
58
|
+
|
59
|
+
for(i=0; i<SCS_NB_WORDS; i++){
|
60
|
+
/* We keep the first SCS_NB_BITS bits of a random value */
|
61
|
+
R_HW[i] = rand_val() & SCS_MASK_RADIX;
|
62
|
+
}
|
63
|
+
}
|