crmf 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +12 -0
  3. data/crmf.gemspec +102 -1
  4. data/ext/crlibm-1.0beta5/AUTHORS +2 -0
  5. data/ext/crlibm-1.0beta5/CMakeLists.txt +154 -0
  6. data/ext/crlibm-1.0beta5/COPYING +340 -0
  7. data/ext/crlibm-1.0beta5/COPYING.LIB +504 -0
  8. data/ext/crlibm-1.0beta5/ChangeLog +125 -0
  9. data/ext/crlibm-1.0beta5/Makefile.am +134 -0
  10. data/ext/crlibm-1.0beta5/NEWS +0 -0
  11. data/ext/crlibm-1.0beta5/README +31 -0
  12. data/ext/crlibm-1.0beta5/README.DEV +23 -0
  13. data/ext/crlibm-1.0beta5/README.md +5 -0
  14. data/ext/crlibm-1.0beta5/TODO +66 -0
  15. data/ext/crlibm-1.0beta5/VERSION +1 -0
  16. data/ext/crlibm-1.0beta5/acos-td.c +1195 -0
  17. data/ext/crlibm-1.0beta5/acos-td.h +629 -0
  18. data/ext/crlibm-1.0beta5/asin-td.c +1297 -0
  19. data/ext/crlibm-1.0beta5/asin-td.h +620 -0
  20. data/ext/crlibm-1.0beta5/asincos.c +4488 -0
  21. data/ext/crlibm-1.0beta5/asincos.h +575 -0
  22. data/ext/crlibm-1.0beta5/atan-itanium.c +846 -0
  23. data/ext/crlibm-1.0beta5/atan-pentium.c +280 -0
  24. data/ext/crlibm-1.0beta5/atan-pentium.h +343 -0
  25. data/ext/crlibm-1.0beta5/atan_accurate.c +341 -0
  26. data/ext/crlibm-1.0beta5/atan_accurate.h +198 -0
  27. data/ext/crlibm-1.0beta5/atan_fast.c +506 -0
  28. data/ext/crlibm-1.0beta5/atan_fast.h +680 -0
  29. data/ext/crlibm-1.0beta5/configure.ac +419 -0
  30. data/ext/crlibm-1.0beta5/crlibm.h +204 -0
  31. data/ext/crlibm-1.0beta5/crlibm.spec +42 -0
  32. data/ext/crlibm-1.0beta5/crlibm_private.c +397 -0
  33. data/ext/crlibm-1.0beta5/crlibm_private.h +1048 -0
  34. data/ext/crlibm-1.0beta5/csh_fast.c +721 -0
  35. data/ext/crlibm-1.0beta5/csh_fast.h +771 -0
  36. data/ext/crlibm-1.0beta5/double-extended.h +496 -0
  37. data/ext/crlibm-1.0beta5/exp-itanium.c +723 -0
  38. data/ext/crlibm-1.0beta5/exp-td-standalone.c +87 -0
  39. data/ext/crlibm-1.0beta5/exp-td.c +1363 -0
  40. data/ext/crlibm-1.0beta5/exp-td.h +685 -0
  41. data/ext/crlibm-1.0beta5/exp_build_coeffs/exp_fast_table.c +125 -0
  42. data/ext/crlibm-1.0beta5/expm1-standalone.c +119 -0
  43. data/ext/crlibm-1.0beta5/expm1.c +2515 -0
  44. data/ext/crlibm-1.0beta5/expm1.h +715 -0
  45. data/ext/crlibm-1.0beta5/interval.h +238 -0
  46. data/ext/crlibm-1.0beta5/log-de.c +480 -0
  47. data/ext/crlibm-1.0beta5/log-de.h +747 -0
  48. data/ext/crlibm-1.0beta5/log-de2.c +280 -0
  49. data/ext/crlibm-1.0beta5/log-de2.h +2352 -0
  50. data/ext/crlibm-1.0beta5/log-td.c +1158 -0
  51. data/ext/crlibm-1.0beta5/log-td.h +819 -0
  52. data/ext/crlibm-1.0beta5/log.c +2244 -0
  53. data/ext/crlibm-1.0beta5/log.h +1592 -0
  54. data/ext/crlibm-1.0beta5/log10-td.c +906 -0
  55. data/ext/crlibm-1.0beta5/log10-td.h +823 -0
  56. data/ext/crlibm-1.0beta5/log1p.c +1295 -0
  57. data/ext/crlibm-1.0beta5/log2-td.c +1521 -0
  58. data/ext/crlibm-1.0beta5/log2-td.h +821 -0
  59. data/ext/crlibm-1.0beta5/log2_accurate.c +330 -0
  60. data/ext/crlibm-1.0beta5/log2_accurate.h +261 -0
  61. data/ext/crlibm-1.0beta5/log_accurate.c +133 -0
  62. data/ext/crlibm-1.0beta5/log_accurate.h +261 -0
  63. data/ext/crlibm-1.0beta5/log_fast.c +360 -0
  64. data/ext/crlibm-1.0beta5/log_fast.h +440 -0
  65. data/ext/crlibm-1.0beta5/pow.c +1396 -0
  66. data/ext/crlibm-1.0beta5/pow.h +3101 -0
  67. data/ext/crlibm-1.0beta5/prepare +20 -0
  68. data/ext/crlibm-1.0beta5/rem_pio2_accurate.c +219 -0
  69. data/ext/crlibm-1.0beta5/rem_pio2_accurate.h +53 -0
  70. data/ext/crlibm-1.0beta5/scs_lib/AUTHORS +3 -0
  71. data/ext/crlibm-1.0beta5/scs_lib/COPYING +504 -0
  72. data/ext/crlibm-1.0beta5/scs_lib/ChangeLog +16 -0
  73. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.dev +939 -0
  74. data/ext/crlibm-1.0beta5/scs_lib/Doxyfile.user +939 -0
  75. data/ext/crlibm-1.0beta5/scs_lib/INSTALL +215 -0
  76. data/ext/crlibm-1.0beta5/scs_lib/Makefile.am +17 -0
  77. data/ext/crlibm-1.0beta5/scs_lib/NEWS +0 -0
  78. data/ext/crlibm-1.0beta5/scs_lib/README +9 -0
  79. data/ext/crlibm-1.0beta5/scs_lib/README.DEV +38 -0
  80. data/ext/crlibm-1.0beta5/scs_lib/TODO +4 -0
  81. data/ext/crlibm-1.0beta5/scs_lib/VERSION +1 -0
  82. data/ext/crlibm-1.0beta5/scs_lib/addition_scs.c +623 -0
  83. data/ext/crlibm-1.0beta5/scs_lib/division_scs.c +110 -0
  84. data/ext/crlibm-1.0beta5/scs_lib/double2scs.c +174 -0
  85. data/ext/crlibm-1.0beta5/scs_lib/main.dox +104 -0
  86. data/ext/crlibm-1.0beta5/scs_lib/multiplication_scs.c +339 -0
  87. data/ext/crlibm-1.0beta5/scs_lib/poly_fct.c +112 -0
  88. data/ext/crlibm-1.0beta5/scs_lib/print_scs.c +73 -0
  89. data/ext/crlibm-1.0beta5/scs_lib/rand_scs.c +63 -0
  90. data/ext/crlibm-1.0beta5/scs_lib/scs.h +353 -0
  91. data/ext/crlibm-1.0beta5/scs_lib/scs2double.c +411 -0
  92. data/ext/crlibm-1.0beta5/scs_lib/scs2mpf.c +58 -0
  93. data/ext/crlibm-1.0beta5/scs_lib/scs2mpfr.c +61 -0
  94. data/ext/crlibm-1.0beta5/scs_lib/scs_private.c +23 -0
  95. data/ext/crlibm-1.0beta5/scs_lib/scs_private.h +133 -0
  96. data/ext/crlibm-1.0beta5/scs_lib/wrapper_scs.h +486 -0
  97. data/ext/crlibm-1.0beta5/scs_lib/zero_scs.c +52 -0
  98. data/ext/crlibm-1.0beta5/trigo_accurate.c +501 -0
  99. data/ext/crlibm-1.0beta5/trigo_accurate.h +331 -0
  100. data/ext/crlibm-1.0beta5/trigo_fast.c +1243 -0
  101. data/ext/crlibm-1.0beta5/trigo_fast.h +639 -0
  102. data/ext/crlibm-1.0beta5/trigpi.c +1169 -0
  103. data/ext/crlibm-1.0beta5/trigpi.h +556 -0
  104. data/ext/crlibm-1.0beta5/triple-double.c +57 -0
  105. data/ext/crlibm-1.0beta5/triple-double.h +1380 -0
  106. data/ext/crmf/crmf.c +16 -16
  107. data/ext/crmf/extconf.rb +12 -8
  108. data/lib/crmf/version.rb +1 -1
  109. data/tests/perf.rb +100 -219
  110. metadata +104 -3
  111. data/ext/crlibm-1.0beta4.tar.gz +0 -0
@@ -0,0 +1,623 @@
1
+ /** Functions for SCS addition and subtraction
2
+
3
+ @file addition_scs.c
4
+
5
+ @author Defour David David.Defour@ens-lyon.fr
6
+ @author Florent de Dinechin Florent.de.Dinechin@ens-lyon.fr
7
+
8
+ This file is part of the SCS library.
9
+
10
+ Many functions come in two versions, selected by a @#if.
11
+
12
+ The reason is that we designed scslib library for internal use with
13
+ SCS_NB_WORDS==8, so we provide a version with manual optimizations for
14
+ this case.
15
+
16
+ These optimisations include loop unrolling, and sometimes replacing
17
+ temporary arrays of size 8 with 8 variables, which is more efficient
18
+ on all modern processors with many (renaming) registers.
19
+
20
+ Using gcc3.2 with the most aggressive optimization options for this
21
+ purpose (-funroll-loops -foptimize-register-move -frerun-loop-opt
22
+ -frerun-cse-after-loop) is still much slower. At some point in the
23
+ future, gcc should catch up with unrolling since our loops are so
24
+ simple, however the replacement of small arrays with variables is
25
+ not something we are aware of in the literature about compiler
26
+ optimization.
27
+ */
28
+
29
+ /*
30
+ Copyright (C) 2002 David Defour and Florent de Dinechin
31
+
32
+ This library is free software; you can redistribute it and/or
33
+ modify it under the terms of the GNU Lesser General Public
34
+ License as published by the Free Software Foundation; either
35
+ version 2.1 of the License, or (at your option) any later version.
36
+
37
+ This library is distributed in the hope that it will be useful,
38
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
39
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
40
+ Lesser General Public License for more details.
41
+
42
+ You should have received a copy of the GNU Lesser General Public
43
+ License along with this library; if not, write to the Free Software
44
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
45
+
46
+ */
47
+
48
+ #include "scs.h"
49
+ #include "scs_private.h"
50
+
51
+ /**
52
+ This function copies a result into another. There is an unrolled
53
+ version for the case SCS_NB_WORDS==8.
54
+ */
55
+ void scs_set(scs_ptr result, scs_ptr x){
56
+ /* unsigned int i;*/
57
+
58
+ #if (SCS_NB_WORDS==8)
59
+ R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
60
+ R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
61
+ R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
62
+ R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7];
63
+ #else
64
+ for(i=0; i<SCS_NB_WORDS; i++)
65
+ R_HW[i] = X_HW[i];
66
+ #endif
67
+ R_EXP = X_EXP;
68
+ R_IND = X_IND;
69
+ R_SGN = X_SGN;
70
+ }
71
+
72
+
73
+ /** renormalize a SCS number.
74
+
75
+ This function removes the carry from each digit, and also shifts the
76
+ digits in case of a cancellation (so that if result != 0 then its
77
+ first digit is non-zero)
78
+
79
+ @warning THIS FUNCTION HAS NEVER BEEN PROPERLY TESTED and is
80
+ currently unused in the library: instead, specific renormalisation
81
+ steps are fused within the code of the operations which require it.
82
+ */
83
+
84
+ void scs_renorm(scs_ptr result){
85
+ unsigned int c;
86
+ int i, j, k;
87
+
88
+ /*
89
+ * Carry propagate
90
+ */
91
+ for(i=SCS_NB_WORDS-1; i>0; i--){
92
+ c = R_HW[i] & ~SCS_MASK_RADIX;
93
+ R_HW[i-1] += c >> SCS_NB_BITS;
94
+ R_HW[i] = R_HW[i] & SCS_MASK_RADIX;
95
+ }
96
+
97
+ if (R_HW[0] >= SCS_RADIX){
98
+ /* Carry out! Need to shift digits */
99
+ c = R_HW[0] & ~SCS_MASK_RADIX;
100
+ c = c >> SCS_NB_BITS;
101
+ for(i=SCS_NB_WORDS-1; i>1; i--)
102
+ R_HW[i] = R_HW[i-1];
103
+
104
+ R_HW[1] = R_HW[0] & SCS_MASK_RADIX;
105
+ R_HW[0] = c;
106
+ R_IND += 1;
107
+
108
+ }else{
109
+ /* Was there a cancellation ? */
110
+ if (R_HW[0] == 0){
111
+
112
+ k = 1;
113
+ while ((R_HW[k] == 0) && (k <= SCS_NB_WORDS))
114
+ k++;
115
+
116
+ R_IND -= k;
117
+
118
+ for(j=k, i=0; j<SCS_NB_WORDS; j++, i++)
119
+ R_HW[i] = R_HW[j];
120
+
121
+ for( ; i<SCS_NB_WORDS; i++)
122
+ R_HW[i] = 0;
123
+
124
+ }
125
+ }
126
+ }
127
+
128
+
129
+
130
+ /** Renormalization without cancellation check.
131
+
132
+ This renormalization step is especially designed for the addition of
133
+ several numbers with the same sign. In this case, you know that there
134
+ has been no cancellation, which allows simpler renormalisation.
135
+ */
136
+
137
+ void scs_renorm_no_cancel_check(scs_ptr result){
138
+ unsigned int carry, c0;
139
+ /* int i;*/
140
+
141
+ /* Carry propagate */
142
+ #if (SCS_NB_WORDS==8)
143
+ carry = R_HW[7] >> SCS_NB_BITS;
144
+ R_HW[6] += carry; R_HW[7] = R_HW[7] & SCS_MASK_RADIX;
145
+ carry = R_HW[6] >> SCS_NB_BITS;
146
+ R_HW[5] += carry; R_HW[6] = R_HW[6] & SCS_MASK_RADIX;
147
+ carry = R_HW[5] >> SCS_NB_BITS;
148
+ R_HW[4] += carry; R_HW[5] = R_HW[5] & SCS_MASK_RADIX;
149
+ carry = R_HW[4] >> SCS_NB_BITS;
150
+ R_HW[3] += carry; R_HW[4] = R_HW[4] & SCS_MASK_RADIX;
151
+ carry = R_HW[3] >> SCS_NB_BITS;
152
+ R_HW[2] += carry; R_HW[3] = R_HW[3] & SCS_MASK_RADIX;
153
+ carry = R_HW[2] >> SCS_NB_BITS;
154
+ R_HW[1] += carry; R_HW[2] = R_HW[2] & SCS_MASK_RADIX;
155
+ carry = R_HW[1] >> SCS_NB_BITS;
156
+ R_HW[0] += carry; R_HW[1] = R_HW[1] & SCS_MASK_RADIX;
157
+ #else
158
+ for(i=(SCS_NB_WORDS-1);i>0;i--){
159
+ carry = R_HW[i] >> SCS_NB_BITS;
160
+ R_HW[i-1] += carry;
161
+ R_HW[i] = R_HW[i] & SCS_MASK_RADIX;
162
+ }
163
+ #endif
164
+
165
+ if (R_HW[0] >= SCS_RADIX){
166
+ /* Carry out ! Need to shift digits */
167
+ c0 = R_HW[0] >> SCS_NB_BITS;
168
+
169
+ #if (SCS_NB_WORDS==8)
170
+ R_HW[7] = R_HW[6]; R_HW[6] = R_HW[5];
171
+ R_HW[5] = R_HW[4]; R_HW[4] = R_HW[3];
172
+ R_HW[3] = R_HW[2]; R_HW[2] = R_HW[1];
173
+ #else
174
+ for(i=(SCS_NB_WORDS-1); i>1; i--)
175
+ R_HW[i] = R_HW[i-1];
176
+ #endif
177
+ R_HW[1] = R_HW[0] & SCS_MASK_RADIX;
178
+ R_HW[0] = c0;
179
+ R_IND += 1;
180
+ }
181
+ return;
182
+ }
183
+
184
+
185
+
186
+
187
+ /* addition without renormalisation.
188
+
189
+
190
+ Add two scs number x and y, the result is put into "result".
191
+ Assumes x.sign == y.sign x.index > y.index.
192
+
193
+ The result is not normalized.
194
+ */
195
+
196
+ static void do_add_no_renorm(scs_ptr result, scs_ptr x, scs_ptr y){
197
+ unsigned int RES[SCS_NB_WORDS];
198
+ unsigned int i, j, Diff;
199
+
200
+ if (x->exception.i[HI]==0){scs_set(result, y); return; }
201
+ if (y->exception.i[HI]==0){scs_set(result, x); return; }
202
+
203
+ for (i=0; i<SCS_NB_WORDS; i++)
204
+ RES[i] = X_HW[i];
205
+
206
+ Diff = (unsigned int)(X_IND - Y_IND);
207
+ R_EXP = X_EXP + Y_EXP - 1;
208
+ R_IND = X_IND;
209
+ R_SGN = X_SGN;
210
+
211
+ for (i=Diff, j=0; i<SCS_NB_WORDS; i++, j++)
212
+ RES[i] += Y_HW[j];
213
+
214
+ for (i=0; i<SCS_NB_WORDS; i++)
215
+ R_HW[i] = RES[i];
216
+
217
+ return;
218
+ }
219
+
220
+
221
+ /*
222
+ * Addition without renormalization. Assumes that x.sign == y.sign.
223
+ */
224
+ void scs_add_no_renorm(scs_ptr result, scs_ptr x, scs_ptr y)
225
+ {
226
+ if (X_IND >= Y_IND)
227
+ do_add_no_renorm(result,x,y);
228
+ else
229
+ do_add_no_renorm(result,y,x);
230
+ return;
231
+ }
232
+
233
+
234
+
235
+
236
+
237
+
238
+
239
+
240
+
241
+
242
+
243
+
244
+
245
+
246
+
247
+
248
+ /* The function that does the work in case of an addition
249
+
250
+ do_add is the function that does the addition of two SCS numbers,
251
+ assuming that x.sign == y.sign, X_IND > Y_IND, x and y both
252
+ non-zero.
253
+ */
254
+
255
+ static void do_add(scs_ptr result, scs_ptr x, scs_ptr y)
256
+ {
257
+ #if (SCS_NB_WORDS==8) /* in this case we unroll all the loops */
258
+ int Diff;
259
+ unsigned int carry;
260
+ unsigned int r0,r1,r2,r3,r4,r5,r6,r7;
261
+
262
+ Diff = X_IND - Y_IND;
263
+ R_EXP = X_EXP + Y_EXP - 1;
264
+ R_IND = X_IND;
265
+ R_SGN = X_SGN;
266
+ #if 0
267
+ if(Diff<4)
268
+ if(Diff<2)
269
+ if(Diff==0)
270
+ {
271
+ // case 0:
272
+ r0 = X_HW[0] + Y_HW[0]; r1 = X_HW[1] + Y_HW[1];
273
+ r2 = X_HW[2] + Y_HW[2]; r3 = X_HW[3] + Y_HW[3];
274
+ r4 = X_HW[4] + Y_HW[4]; r5 = X_HW[5] + Y_HW[5];
275
+ r6 = X_HW[6] + Y_HW[6]; r7 = X_HW[7] + Y_HW[7];
276
+ }
277
+ else {
278
+ // case 1:
279
+ r0 = X_HW[0]; r1 = X_HW[1] + Y_HW[0];
280
+ r2 = X_HW[2] + Y_HW[1]; r3 = X_HW[3] + Y_HW[2];
281
+ r4 = X_HW[4] + Y_HW[3]; r5 = X_HW[5] + Y_HW[4];
282
+ r6 = X_HW[6] + Y_HW[5]; r7 = X_HW[7] + Y_HW[6];
283
+ }
284
+ else if(Diff==2)
285
+ {
286
+ //case 2:
287
+ r0 = X_HW[0]; r1 = X_HW[1];
288
+ r2 = X_HW[2] + Y_HW[0]; r3 = X_HW[3] + Y_HW[1];
289
+ r4 = X_HW[4] + Y_HW[2]; r5 = X_HW[5] + Y_HW[3];
290
+ r6 = X_HW[6] + Y_HW[4]; r7 = X_HW[7] + Y_HW[5];
291
+ }
292
+ else
293
+ {
294
+ // case 3:
295
+ r0 = X_HW[0]; r1 = X_HW[1];
296
+ r2 = X_HW[2]; r3 = X_HW[3] + Y_HW[0];
297
+ r4 = X_HW[4] + Y_HW[1]; r5 = X_HW[5] + Y_HW[2];
298
+ r6 = X_HW[6] + Y_HW[3]; r7 = X_HW[7] + Y_HW[4];
299
+ }
300
+ else if(Diff<6)
301
+ if(Diff==4)
302
+ {
303
+ // case 4:
304
+ r0 = X_HW[0]; r1 = X_HW[1];
305
+ r2 = X_HW[2]; r3 = X_HW[3];
306
+ r4 = X_HW[4] + Y_HW[0]; r5 = X_HW[5] + Y_HW[1];
307
+ r6 = X_HW[6] + Y_HW[2]; r7 = X_HW[7] + Y_HW[3];
308
+ }
309
+ else {
310
+ // case 5:
311
+ r0 = X_HW[0]; r1 = X_HW[1];
312
+ r2 = X_HW[2]; r3 = X_HW[3];
313
+ r4 = X_HW[4]; r5 = X_HW[5] + Y_HW[0];
314
+ r6 = X_HW[6] + Y_HW[1]; r7 = X_HW[7] + Y_HW[2];
315
+ }
316
+ else if(Diff<8)
317
+ if(Diff==6)
318
+ {
319
+ // case 6:
320
+ r0 = X_HW[0]; r1 = X_HW[1];
321
+ r2 = X_HW[2]; r3 = X_HW[3];
322
+ r4 = X_HW[4]; r5 = X_HW[5];
323
+ r6 = X_HW[6] + Y_HW[0]; r7 = X_HW[7] + Y_HW[1];
324
+ }
325
+ else {
326
+ // case 7:
327
+ r0 = X_HW[0]; r1 = X_HW[1];
328
+ r2 = X_HW[2]; r3 = X_HW[3];
329
+ r4 = X_HW[4]; r5 = X_HW[5];
330
+ r6 = X_HW[6]; r7 = X_HW[7] + Y_HW[0];
331
+ }
332
+
333
+ else
334
+ {
335
+ /* Diff >= 8*/
336
+ R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
337
+ R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
338
+ R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
339
+ R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7];
340
+ return;
341
+ }
342
+ #else
343
+ switch (Diff){
344
+ case 0:
345
+ r0 = X_HW[0] + Y_HW[0]; r1 = X_HW[1] + Y_HW[1];
346
+ r2 = X_HW[2] + Y_HW[2]; r3 = X_HW[3] + Y_HW[3];
347
+ r4 = X_HW[4] + Y_HW[4]; r5 = X_HW[5] + Y_HW[5];
348
+ r6 = X_HW[6] + Y_HW[6]; r7 = X_HW[7] + Y_HW[7]; break;
349
+ case 1:
350
+ r0 = X_HW[0]; r1 = X_HW[1] + Y_HW[0];
351
+ r2 = X_HW[2] + Y_HW[1]; r3 = X_HW[3] + Y_HW[2];
352
+ r4 = X_HW[4] + Y_HW[3]; r5 = X_HW[5] + Y_HW[4];
353
+ r6 = X_HW[6] + Y_HW[5]; r7 = X_HW[7] + Y_HW[6]; break;
354
+ case 2:
355
+ r0 = X_HW[0]; r1 = X_HW[1];
356
+ r2 = X_HW[2] + Y_HW[0]; r3 = X_HW[3] + Y_HW[1];
357
+ r4 = X_HW[4] + Y_HW[2]; r5 = X_HW[5] + Y_HW[3];
358
+ r6 = X_HW[6] + Y_HW[4]; r7 = X_HW[7] + Y_HW[5]; break;
359
+ case 3:
360
+ r0 = X_HW[0]; r1 = X_HW[1];
361
+ r2 = X_HW[2]; r3 = X_HW[3] + Y_HW[0];
362
+ r4 = X_HW[4] + Y_HW[1]; r5 = X_HW[5] + Y_HW[2];
363
+ r6 = X_HW[6] + Y_HW[3]; r7 = X_HW[7] + Y_HW[4]; break;
364
+ case 4:
365
+ r0 = X_HW[0]; r1 = X_HW[1];
366
+ r2 = X_HW[2]; r3 = X_HW[3];
367
+ r4 = X_HW[4] + Y_HW[0]; r5 = X_HW[5] + Y_HW[1];
368
+ r6 = X_HW[6] + Y_HW[2]; r7 = X_HW[7] + Y_HW[3]; break;
369
+ case 5:
370
+ r0 = X_HW[0]; r1 = X_HW[1];
371
+ r2 = X_HW[2]; r3 = X_HW[3];
372
+ r4 = X_HW[4]; r5 = X_HW[5] + Y_HW[0];
373
+ r6 = X_HW[6] + Y_HW[1]; r7 = X_HW[7] + Y_HW[2]; break;
374
+ case 6:
375
+ r0 = X_HW[0]; r1 = X_HW[1];
376
+ r2 = X_HW[2]; r3 = X_HW[3];
377
+ r4 = X_HW[4]; r5 = X_HW[5];
378
+ r6 = X_HW[6] + Y_HW[0]; r7 = X_HW[7] + Y_HW[1]; break;
379
+ case 7:
380
+ r0 = X_HW[0]; r1 = X_HW[1];
381
+ r2 = X_HW[2]; r3 = X_HW[3];
382
+ r4 = X_HW[4]; r5 = X_HW[5];
383
+ r6 = X_HW[6]; r7 = X_HW[7] + Y_HW[0]; break;
384
+ default:
385
+ /* Diff >= 8*/
386
+ R_HW[0] = X_HW[0]; R_HW[1] = X_HW[1];
387
+ R_HW[2] = X_HW[2]; R_HW[3] = X_HW[3];
388
+ R_HW[4] = X_HW[4]; R_HW[5] = X_HW[5];
389
+ R_HW[6] = X_HW[6]; R_HW[7] = X_HW[7]; return;
390
+ }
391
+ #endif
392
+
393
+ /* Carry propagation */
394
+
395
+ carry = r7 >> SCS_NB_BITS; r6 += carry; r7 = r7 & SCS_MASK_RADIX;
396
+ carry = r6 >> SCS_NB_BITS; r5 += carry; r6 = r6 & SCS_MASK_RADIX;
397
+ carry = r5 >> SCS_NB_BITS; r4 += carry; r5 = r5 & SCS_MASK_RADIX;
398
+ carry = r4 >> SCS_NB_BITS; r3 += carry; r4 = r4 & SCS_MASK_RADIX;
399
+ carry = r3 >> SCS_NB_BITS; r2 += carry; r3 = r3 & SCS_MASK_RADIX;
400
+ carry = r2 >> SCS_NB_BITS; r1 += carry; r2 = r2 & SCS_MASK_RADIX;
401
+ carry = r1 >> SCS_NB_BITS; r0 += carry; r1 = r1 & SCS_MASK_RADIX;
402
+ carry = r0 >> SCS_NB_BITS;
403
+
404
+ if (carry!=0){
405
+ R_HW[7] = r6; R_HW[6] = r5; R_HW[5] = r4; R_HW[4] = r3;
406
+ R_HW[3] = r2; R_HW[2] = r1; R_HW[1] = r0 & SCS_MASK_RADIX;
407
+ R_HW[0] = 1 ;
408
+ R_IND += 1;
409
+ }
410
+ else {
411
+ R_HW[0] = r0; R_HW[1] = r1; R_HW[2] = r2; R_HW[3] = r3;
412
+ R_HW[4] = r4; R_HW[5] = r5; R_HW[6] = r6; R_HW[7] = r7;
413
+ }
414
+ return;
415
+
416
+ #else /* #if SCS_NB_WORDS==8*/
417
+
418
+ /* This generic version is still written in such a way that
419
+ it is unrollable at compile time
420
+ */
421
+ int i,j, s, carry, Diff;
422
+ int res[SCS_NB_WORDS];
423
+
424
+ Diff = X_IND - Y_IND;
425
+ R_EXP = X_EXP + Y_EXP - 1;
426
+ R_IND = X_IND;
427
+ R_SGN = X_SGN;
428
+
429
+ /* The easy case */
430
+ if(Diff >= SCS_NB_WORDS){
431
+ scs_set(result, x); return;
432
+ }
433
+
434
+ /* 0 <= Diff <= (SCS_NB_WORDS-1) */
435
+
436
+ carry=0;
437
+ for(i=(SCS_NB_WORDS-1), j=((SCS_NB_WORDS-1)-Diff); i>=0 ; i--,j--){
438
+ if (j>=0)
439
+ s = X_HW[i] + Y_HW[j] + carry;
440
+ else
441
+ s = X_HW[i] + carry;
442
+ carry = s >> SCS_NB_BITS;
443
+ res[i] = s & SCS_MASK_RADIX;
444
+ }
445
+
446
+ if (carry){
447
+ /* Carry out ! Need to shift digits */
448
+ for(i=(SCS_NB_WORDS-1); i>=1; i--)
449
+ R_HW[i] = res[i-1];
450
+
451
+ R_HW[0] = 1 ;
452
+ R_IND += 1;
453
+ }
454
+ else {
455
+ for(i=0; i<SCS_NB_WORDS; i++)
456
+ R_HW[i] = res[i];
457
+ }
458
+
459
+ return;
460
+ #endif /* #if SCS_NB_WORDS==8*/
461
+
462
+ } /* do_add*/
463
+
464
+
465
+
466
+
467
+ /*/////////////////////////////////////////////////////////////////
468
+ /////////////////////// SUBTRACTION //////////////////////////////
469
+ //////////////////////////////////////////////////////////////////
470
+ // This procedure assumes :
471
+ // - X_IND >= Y_IND
472
+ // - X_SIGN != Y_SIGN
473
+ // neither x or y is zero
474
+ // and result = x - y
475
+ */
476
+
477
+
478
+ static void do_sub(scs_ptr result, scs_ptr x, scs_ptr y){
479
+ int s, carry;
480
+ int Diff, i, j, cp;
481
+ int res[SCS_NB_WORDS];
482
+
483
+ R_EXP = X_EXP + Y_EXP - 1;
484
+ Diff = X_IND - Y_IND;
485
+ R_IND = X_IND;
486
+
487
+ /* The easy case */
488
+ if(Diff >= SCS_NB_WORDS){
489
+ scs_set(result, x); return;
490
+ }
491
+
492
+ else {
493
+ /* 0 <= Diff <= (SCS_NB_WORDS-1) */
494
+ carry = 0;
495
+ if(Diff==0) {
496
+
497
+ i=0;
498
+ while((X_HW[i] == Y_HW[i]) && (i<SCS_NB_WORDS)) i++;
499
+ if (X_HW[i] > Y_HW[i]) cp=1;
500
+ else if (X_HW[i] < Y_HW[i]) cp=-1;
501
+ else cp=0;
502
+
503
+ if (cp == 0) {
504
+ /* Yet another easy case: result = 0 */
505
+ scs_zero(result);
506
+ return;
507
+ }
508
+ else { /* cp <> 0 */
509
+ if (cp > 0){
510
+ /* x > y */
511
+
512
+ R_SGN = X_SGN;
513
+ for(i=(SCS_NB_WORDS-1); i>=0 ;i--){
514
+ s = (int)(X_HW[i] - Y_HW[i] - carry);
515
+ carry = (int)((s&SCS_RADIX)>>SCS_NB_BITS);
516
+ res[i] = (int)((s&SCS_RADIX) + s);
517
+ }
518
+ }
519
+ else { /* cp < 0 */
520
+ /* x < y (change of sign) */
521
+
522
+ R_SGN = - X_SGN;
523
+ for(i=(SCS_NB_WORDS-1); i>=0 ;i--){
524
+ s = (int)(- X_HW[i] + Y_HW[i] - carry);
525
+ carry = (int)((s&SCS_RADIX)>>SCS_NB_BITS);
526
+ res[i] = (int)((s&SCS_RADIX) + s);
527
+ }
528
+ }
529
+ }
530
+ }
531
+ else {
532
+ /* 1<=Diff<(SCS_NB_WORDS-1) Digits of x and y overlap but the
533
+ * sign will be that of x */
534
+
535
+ R_SGN = X_SGN;
536
+ for(i=(SCS_NB_WORDS-1), j=((SCS_NB_WORDS-1)-Diff); i>=0 ;i--,j--){
537
+ if(j>=0)
538
+ s = (int)(X_HW[i] - Y_HW[j] - carry);
539
+ else
540
+ s = (int)(X_HW[i] - carry);
541
+ carry = (int)((s&SCS_RADIX)>>SCS_NB_BITS);
542
+ res[i] = (int)((s&SCS_RADIX) + s);
543
+ }
544
+ }
545
+ /* check for cancellations */
546
+ i=0;
547
+ while ((res[i]==0) && (i < SCS_NB_WORDS)) i++;
548
+
549
+ if(i>0) { /* cancellation, shift result*/
550
+ R_IND -= i;
551
+ for(j=0; i<SCS_NB_WORDS; i++,j++) R_HW[j] = (unsigned int)(res[i]);
552
+ for( ; j<SCS_NB_WORDS; j++) R_HW[j] = 0;
553
+ }
554
+ else {
555
+ for(i=0; i<SCS_NB_WORDS; i++)
556
+ R_HW[i] = (unsigned int)(res[i]);
557
+ }
558
+ }
559
+ return;
560
+ }
561
+
562
+
563
+
564
+
565
+
566
+
567
+
568
+
569
+ /** SCS addition (result is a normalised SCS number).
570
+
571
+ */
572
+ void scs_add(scs_ptr result, scs_ptr x, scs_ptr y)
573
+ {
574
+
575
+ if (x->exception.i[HI]==0){scs_set(result, y); return; }
576
+ if (y->exception.i[HI]==0){scs_set(result, x); return; }
577
+
578
+ if (X_SGN == Y_SGN){
579
+ if(X_IND >= Y_IND)
580
+ do_add(result,x,y);
581
+ else
582
+ do_add(result,y,x);
583
+ }else {
584
+ if(X_IND>=Y_IND){
585
+ do_sub(result,x,y);
586
+ }else {
587
+ do_sub(result,y,x);
588
+ }
589
+ } return;
590
+ }
591
+
592
+ /** SCS subtraction (result is a normalised SCS number).
593
+
594
+ The arguments x, y and result may point to the same memory
595
+ location.
596
+ */
597
+ void scs_sub(scs_ptr result, scs_ptr x, scs_ptr y)
598
+ {
599
+ if (x->exception.i[HI]==0)
600
+ { scs_set(result, y); R_SGN = -R_SGN; return; }
601
+ if (y->exception.i[HI]==0)
602
+ { scs_set(result, x); return; }
603
+
604
+ if (X_SGN == Y_SGN) {
605
+ /* Same sign, so it's a sub */
606
+ if(X_IND>=Y_IND)
607
+ do_sub(result,x,y);
608
+ else{
609
+ do_sub(result,y,x);
610
+ R_SGN = -R_SGN;
611
+ }
612
+ }else {
613
+ if(X_IND>=Y_IND)
614
+ do_add(result,x,y);
615
+ else{
616
+ do_add(result,y,x);
617
+ R_SGN = -R_SGN;
618
+ }
619
+ }
620
+ return;
621
+ }
622
+
623
+