scs 0.2.2 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -0
- data/LICENSE.txt +18 -18
- data/README.md +19 -14
- data/lib/scs/ffi.rb +31 -20
- data/lib/scs/solver.rb +32 -9
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +39 -0
- data/vendor/scs/CMakeLists.txt +320 -0
- data/vendor/scs/Makefile +32 -23
- data/vendor/scs/README.md +9 -218
- data/vendor/scs/include/aa.h +67 -23
- data/vendor/scs/include/cones.h +22 -19
- data/vendor/scs/include/glbopts.h +107 -79
- data/vendor/scs/include/linalg.h +3 -4
- data/vendor/scs/include/linsys.h +58 -44
- data/vendor/scs/include/normalize.h +6 -5
- data/vendor/scs/include/rw.h +8 -2
- data/vendor/scs/include/scs.h +257 -141
- data/vendor/scs/include/scs_types.h +34 -0
- data/vendor/scs/include/scs_work.h +83 -0
- data/vendor/scs/include/util.h +3 -15
- data/vendor/scs/linsys/cpu/direct/private.c +241 -232
- data/vendor/scs/linsys/cpu/direct/private.h +13 -7
- data/vendor/scs/linsys/cpu/indirect/private.c +194 -118
- data/vendor/scs/linsys/cpu/indirect/private.h +7 -4
- data/vendor/scs/linsys/csparse.c +87 -0
- data/vendor/scs/linsys/csparse.h +34 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +6 -6
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +6 -1
- data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
- data/vendor/scs/linsys/external/amd/amd_order.c +5 -5
- data/vendor/scs/linsys/external/qdldl/changes +2 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
- data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
- data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
- data/vendor/scs/linsys/gpu/gpu.c +58 -21
- data/vendor/scs/linsys/gpu/gpu.h +70 -35
- data/vendor/scs/linsys/gpu/indirect/private.c +394 -157
- data/vendor/scs/linsys/gpu/indirect/private.h +27 -12
- data/vendor/scs/linsys/scs_matrix.c +478 -0
- data/vendor/scs/linsys/scs_matrix.h +70 -0
- data/vendor/scs/scs.mk +14 -10
- data/vendor/scs/src/aa.c +394 -110
- data/vendor/scs/src/cones.c +497 -359
- data/vendor/scs/src/ctrlc.c +15 -5
- data/vendor/scs/src/linalg.c +107 -26
- data/vendor/scs/src/normalize.c +30 -72
- data/vendor/scs/src/rw.c +202 -27
- data/vendor/scs/src/scs.c +769 -571
- data/vendor/scs/src/scs_version.c +11 -3
- data/vendor/scs/src/util.c +37 -106
- data/vendor/scs/test/minunit.h +22 -8
- data/vendor/scs/test/problem_utils.h +180 -25
- data/vendor/scs/test/problems/degenerate.h +130 -0
- data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
- data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
- data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
- data/vendor/scs/test/problems/random_prob +0 -0
- data/vendor/scs/test/problems/random_prob.h +45 -0
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
- data/vendor/scs/test/problems/small_lp.h +14 -13
- data/vendor/scs/test/problems/small_qp.h +352 -0
- data/vendor/scs/test/problems/test_validation.h +43 -0
- data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
- data/vendor/scs/test/random_socp_prob.c +54 -53
- data/vendor/scs/test/rng.h +109 -0
- data/vendor/scs/test/run_from_file.c +20 -11
- data/vendor/scs/test/run_tests.c +35 -2
- metadata +29 -98
- data/vendor/scs/linsys/amatrix.c +0 -305
- data/vendor/scs/linsys/amatrix.h +0 -36
- data/vendor/scs/linsys/amatrix.o +0 -0
- data/vendor/scs/linsys/cpu/direct/private.o +0 -0
- data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
- data/vendor/scs/src/aa.o +0 -0
- data/vendor/scs/src/cones.o +0 -0
- data/vendor/scs/src/ctrlc.o +0 -0
- data/vendor/scs/src/linalg.o +0 -0
- data/vendor/scs/src/normalize.o +0 -0
- data/vendor/scs/src/rw.o +0 -0
- data/vendor/scs/src/scs.o +0 -0
- data/vendor/scs/src/scs_version.o +0 -0
- data/vendor/scs/src/util.o +0 -0
- data/vendor/scs/test/data/small_random_socp +0 -0
- data/vendor/scs/test/problems/small_random_socp.h +0 -33
- data/vendor/scs/test/run_tests +0 -2
|
@@ -1,35 +1,9 @@
|
|
|
1
1
|
#include "qdldl.h"
|
|
2
|
-
#include "ctrlc.h"
|
|
3
2
|
|
|
4
3
|
#define QDLDL_UNKNOWN (-1)
|
|
5
4
|
#define QDLDL_USED (1)
|
|
6
5
|
#define QDLDL_UNUSED (0)
|
|
7
6
|
|
|
8
|
-
// //DEBUG
|
|
9
|
-
// #include <stdio.h>
|
|
10
|
-
// void qdprint_arrayi(const QDLDL_int* data, QDLDL_int n,char* varName){
|
|
11
|
-
|
|
12
|
-
// QDLDL_int i;
|
|
13
|
-
// printf("%s = [",varName);
|
|
14
|
-
// for(i=0; i< n; i++){
|
|
15
|
-
// printf("%lli,",data[i]);
|
|
16
|
-
// }
|
|
17
|
-
// printf("]\n");
|
|
18
|
-
|
|
19
|
-
// }
|
|
20
|
-
|
|
21
|
-
// void qdprint_arrayf(const QDLDL_float* data, QDLDL_int n, char* varName){
|
|
22
|
-
|
|
23
|
-
// QDLDL_int i;
|
|
24
|
-
// printf("%s = [",varName);
|
|
25
|
-
// for(i=0; i< n; i++){
|
|
26
|
-
// printf("%.3g,",data[i]);
|
|
27
|
-
// }
|
|
28
|
-
// printf("]\n");
|
|
29
|
-
|
|
30
|
-
// }
|
|
31
|
-
// // END DEBUG
|
|
32
|
-
|
|
33
7
|
/* Compute the elimination tree for a quasidefinite matrix
|
|
34
8
|
in compressed sparse column form.
|
|
35
9
|
*/
|
|
@@ -41,7 +15,7 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
|
|
|
41
15
|
QDLDL_int* Lnz,
|
|
42
16
|
QDLDL_int* etree){
|
|
43
17
|
|
|
44
|
-
QDLDL_int sumLnz
|
|
18
|
+
QDLDL_int sumLnz;
|
|
45
19
|
QDLDL_int i,j,p;
|
|
46
20
|
|
|
47
21
|
|
|
@@ -76,8 +50,19 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
|
|
|
76
50
|
}
|
|
77
51
|
|
|
78
52
|
//compute the total nonzeros in L. This much
|
|
79
|
-
//space is required to store Li and Lx
|
|
80
|
-
|
|
53
|
+
//space is required to store Li and Lx. Return
|
|
54
|
+
//error code -2 if the nonzero count will overflow
|
|
55
|
+
//its unteger type.
|
|
56
|
+
sumLnz = 0;
|
|
57
|
+
for(i = 0; i < n; i++){
|
|
58
|
+
if(sumLnz > QDLDL_INT_MAX - Lnz[i]){
|
|
59
|
+
sumLnz = -2;
|
|
60
|
+
break;
|
|
61
|
+
}
|
|
62
|
+
else{
|
|
63
|
+
sumLnz += Lnz[i];
|
|
64
|
+
}
|
|
65
|
+
}
|
|
81
66
|
|
|
82
67
|
return sumLnz;
|
|
83
68
|
}
|
|
@@ -139,10 +124,6 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
|
|
|
139
124
|
//Start from 1 here. The upper LH corner is trivially 0
|
|
140
125
|
//in L b/c we are only computing the subdiagonal elements
|
|
141
126
|
for(k = 1; k < n; k++){
|
|
142
|
-
if(scs_is_interrupted()) {
|
|
143
|
-
scs_printf("interrupt detected in factorization\n");
|
|
144
|
-
return -1;
|
|
145
|
-
}
|
|
146
127
|
|
|
147
128
|
//NB : For each k, we compute a solution to
|
|
148
129
|
//y = L(0:(k-1),0:k-1))\b, where b is the kth
|
|
@@ -258,11 +239,12 @@ void QDLDL_Lsolve(const QDLDL_int n,
|
|
|
258
239
|
const QDLDL_float* Lx,
|
|
259
240
|
QDLDL_float* x){
|
|
260
241
|
|
|
261
|
-
QDLDL_int i,j;
|
|
242
|
+
QDLDL_int i,j;
|
|
262
243
|
for(i = 0; i < n; i++){
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
244
|
+
QDLDL_float val = x[i];
|
|
245
|
+
for(j = Lp[i]; j < Lp[i+1]; j++){
|
|
246
|
+
x[Li[j]] -= Lx[j]*val;
|
|
247
|
+
}
|
|
266
248
|
}
|
|
267
249
|
}
|
|
268
250
|
|
|
@@ -273,11 +255,13 @@ void QDLDL_Ltsolve(const QDLDL_int n,
|
|
|
273
255
|
const QDLDL_float* Lx,
|
|
274
256
|
QDLDL_float* x){
|
|
275
257
|
|
|
276
|
-
QDLDL_int i,j;
|
|
258
|
+
QDLDL_int i,j;
|
|
277
259
|
for(i = n-1; i>=0; i--){
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
260
|
+
QDLDL_float val = x[i];
|
|
261
|
+
for(j = Lp[i]; j < Lp[i+1]; j++){
|
|
262
|
+
val -= Lx[j]*x[Li[j]];
|
|
263
|
+
}
|
|
264
|
+
x[i] = val;
|
|
281
265
|
}
|
|
282
266
|
}
|
|
283
267
|
|
|
@@ -289,10 +273,9 @@ void QDLDL_solve(const QDLDL_int n,
|
|
|
289
273
|
const QDLDL_float* Dinv,
|
|
290
274
|
QDLDL_float* x){
|
|
291
275
|
|
|
292
|
-
QDLDL_int i;
|
|
293
|
-
|
|
294
|
-
QDLDL_Lsolve(n,Lp,Li,Lx,x);
|
|
295
|
-
for(i = 0; i < n; i++) x[i] *= Dinv[i];
|
|
296
|
-
QDLDL_Ltsolve(n,Lp,Li,Lx,x);
|
|
276
|
+
QDLDL_int i;
|
|
297
277
|
|
|
278
|
+
QDLDL_Lsolve(n,Lp,Li,Lx,x);
|
|
279
|
+
for(i = 0; i < n; i++) x[i] *= Dinv[i];
|
|
280
|
+
QDLDL_Ltsolve(n,Lp,Li,Lx,x);
|
|
298
281
|
}
|
|
@@ -32,19 +32,17 @@ extern "C" {
|
|
|
32
32
|
* this function will *not* return an error, as it may still be possible to factor
|
|
33
33
|
* such a matrix in LDL form. No promises are made in this case though...
|
|
34
34
|
*
|
|
35
|
-
* @param
|
|
35
|
+
* @param n number of columns in CSC matrix A (assumed square)
|
|
36
36
|
* @param Ap column pointers (size n+1) for columns of A
|
|
37
37
|
* @param Ai row indices of A. Has Ap[n] elements
|
|
38
38
|
* @param work work vector (size n) (no meaning on return)
|
|
39
39
|
* @param Lnz count of nonzeros in each column of L (size n) below diagonal
|
|
40
40
|
* @param etree elimination tree (size n)
|
|
41
|
-
* @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
|
|
42
|
-
* -1 if the input
|
|
43
|
-
*
|
|
44
|
-
*
|
|
41
|
+
* @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
|
|
42
|
+
* Returns -1 if the input is not triu or has an empty column.
|
|
43
|
+
* Returns -2 if the return value overflows QDLDL_int.
|
|
45
44
|
*
|
|
46
45
|
*/
|
|
47
|
-
|
|
48
46
|
QDLDL_int QDLDL_etree(const QDLDL_int n,
|
|
49
47
|
const QDLDL_int* Ap,
|
|
50
48
|
const QDLDL_int* Ai,
|
|
@@ -52,6 +50,7 @@ extern "C" {
|
|
|
52
50
|
QDLDL_int* Lnz,
|
|
53
51
|
QDLDL_int* etree);
|
|
54
52
|
|
|
53
|
+
|
|
55
54
|
/**
|
|
56
55
|
* Compute an LDL decomposition for a quasidefinite matrix
|
|
57
56
|
* in compressed sparse column form, where the input matrix is
|
|
@@ -61,21 +60,22 @@ extern "C" {
|
|
|
61
60
|
* Returns factors L, D and Dinv = 1./D.
|
|
62
61
|
*
|
|
63
62
|
* Does not use MALLOC. It is assumed that L will be a compressed
|
|
64
|
-
* sparse column matrix with data (
|
|
63
|
+
* sparse column matrix with data (n,Lp,Li,Lx) with sufficient space
|
|
65
64
|
* allocated, with a number of nonzeros equal to the count given
|
|
66
|
-
* as a return value by
|
|
67
|
-
*
|
|
68
|
-
* @param
|
|
69
|
-
* @param Ap column pointers (size n+1) for columns of A
|
|
70
|
-
* @param Ai row indices of A. Has Ap[n] elements
|
|
71
|
-
* @param
|
|
72
|
-
* @param Lp column pointers (size
|
|
73
|
-
* @param Li row indices of L. Has Lp[
|
|
65
|
+
* as a return value by QDLDL_etree
|
|
66
|
+
*
|
|
67
|
+
* @param n number of columns in L and A (both square)
|
|
68
|
+
* @param Ap column pointers (size n+1) for columns of A (not modified)
|
|
69
|
+
* @param Ai row indices of A. Has Ap[n] elements (not modified)
|
|
70
|
+
* @param Ax data of A. Has Ap[n] elements (not modified)
|
|
71
|
+
* @param Lp column pointers (size n+1) for columns of L
|
|
72
|
+
* @param Li row indices of L. Has Lp[n] elements
|
|
73
|
+
* @param Lx data of L. Has Lp[n] elements
|
|
74
74
|
* @param D vectorized factor D. Length is n
|
|
75
75
|
* @param Dinv reciprocal of D. Length is n
|
|
76
76
|
* @param Lnz count of nonzeros in each column of L below diagonal,
|
|
77
|
-
* as given by
|
|
78
|
-
* @param etree elimination tree as as given by
|
|
77
|
+
* as given by QDLDL_etree (not modified)
|
|
78
|
+
* @param etree elimination tree as as given by QDLDL_etree (not modified)
|
|
79
79
|
* @param bwork working array of bools. Length is n
|
|
80
80
|
* @param iwork working array of integers. Length is 3*n
|
|
81
81
|
* @param fwork working array of floats. Length is n
|
|
@@ -85,8 +85,6 @@ extern "C" {
|
|
|
85
85
|
* or otherwise LDL factorisable)
|
|
86
86
|
*
|
|
87
87
|
*/
|
|
88
|
-
|
|
89
|
-
|
|
90
88
|
QDLDL_int QDLDL_factor(const QDLDL_int n,
|
|
91
89
|
const QDLDL_int* Ap,
|
|
92
90
|
const QDLDL_int* Ai,
|
|
@@ -107,16 +105,15 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
|
|
|
107
105
|
* Solves LDL'x = b
|
|
108
106
|
*
|
|
109
107
|
* It is assumed that L will be a compressed
|
|
110
|
-
* sparse column matrix with data (
|
|
108
|
+
* sparse column matrix with data (n,Lp,Li,Lx).
|
|
111
109
|
*
|
|
112
|
-
* @param
|
|
113
|
-
* @param
|
|
114
|
-
* @param
|
|
115
|
-
* @param
|
|
110
|
+
* @param n number of columns in L
|
|
111
|
+
* @param Lp column pointers (size n+1) for columns of L
|
|
112
|
+
* @param Li row indices of L. Has Lp[n] elements
|
|
113
|
+
* @param Lx data of L. Has Lp[n] elements
|
|
116
114
|
* @param Dinv reciprocal of D. Length is n
|
|
117
115
|
* @param x initialized to b. Equal to x on return
|
|
118
116
|
*
|
|
119
|
-
*
|
|
120
117
|
*/
|
|
121
118
|
void QDLDL_solve(const QDLDL_int n,
|
|
122
119
|
const QDLDL_int* Lp,
|
|
@@ -130,40 +127,35 @@ void QDLDL_solve(const QDLDL_int n,
|
|
|
130
127
|
* Solves (L+I)x = b
|
|
131
128
|
*
|
|
132
129
|
* It is assumed that L will be a compressed
|
|
133
|
-
* sparse column matrix with data (
|
|
130
|
+
* sparse column matrix with data (n,Lp,Li,Lx).
|
|
134
131
|
*
|
|
135
|
-
* @param
|
|
136
|
-
* @param
|
|
137
|
-
* @param
|
|
138
|
-
* @param
|
|
139
|
-
* @param Dinv reciprocal of D. Length is n
|
|
132
|
+
* @param n number of columns in L
|
|
133
|
+
* @param Lp column pointers (size n+1) for columns of L
|
|
134
|
+
* @param Li row indices of L. Has Lp[n] elements
|
|
135
|
+
* @param Lx data of L. Has Lp[n] elements
|
|
140
136
|
* @param x initialized to b. Equal to x on return
|
|
141
137
|
*
|
|
142
|
-
*
|
|
143
138
|
*/
|
|
144
|
-
|
|
145
139
|
void QDLDL_Lsolve(const QDLDL_int n,
|
|
146
140
|
const QDLDL_int* Lp,
|
|
147
141
|
const QDLDL_int* Li,
|
|
148
142
|
const QDLDL_float* Lx,
|
|
149
143
|
QDLDL_float* x);
|
|
150
144
|
|
|
145
|
+
|
|
151
146
|
/**
|
|
152
147
|
* Solves (L+I)'x = b
|
|
153
148
|
*
|
|
154
149
|
* It is assumed that L will be a compressed
|
|
155
|
-
* sparse column matrix with data (
|
|
150
|
+
* sparse column matrix with data (n,Lp,Li,Lx).
|
|
156
151
|
*
|
|
157
|
-
* @param
|
|
158
|
-
* @param
|
|
159
|
-
* @param
|
|
160
|
-
* @param
|
|
161
|
-
* @param Dinv reciprocal of D. Length is n
|
|
152
|
+
* @param n number of columns in L
|
|
153
|
+
* @param Lp column pointers (size n+1) for columns of L
|
|
154
|
+
* @param Li row indices of L. Has Lp[n] elements
|
|
155
|
+
* @param Lx data of L. Has Lp[n] elements
|
|
162
156
|
* @param x initialized to b. Equal to x on return
|
|
163
157
|
*
|
|
164
|
-
*
|
|
165
158
|
*/
|
|
166
|
-
|
|
167
159
|
void QDLDL_Ltsolve(const QDLDL_int n,
|
|
168
160
|
const QDLDL_int* Lp,
|
|
169
161
|
const QDLDL_int* Li,
|
|
@@ -1,18 +1,26 @@
|
|
|
1
1
|
#ifndef QDLDL_TYPES_H
|
|
2
2
|
# define QDLDL_TYPES_H
|
|
3
3
|
|
|
4
|
-
#include "glbopts.h"
|
|
5
|
-
|
|
6
4
|
# ifdef __cplusplus
|
|
7
5
|
extern "C" {
|
|
8
6
|
# endif /* ifdef __cplusplus */
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
#include "glbopts.h"
|
|
9
|
+
#include <limits.h> //for the QDLDL_INT_TYPE_MAX
|
|
10
|
+
|
|
11
|
+
/* QDLDL integer and float types */
|
|
11
12
|
|
|
12
13
|
#define QDLDL_int scs_int
|
|
13
14
|
#define QDLDL_float scs_float
|
|
14
15
|
#define QDLDL_bool scs_int
|
|
15
16
|
|
|
17
|
+
/* Maximum value of the signed type QDLDL_int */
|
|
18
|
+
#ifdef DLONG
|
|
19
|
+
#define QDLDL_INT_MAX LLONG_MAX
|
|
20
|
+
#else
|
|
21
|
+
#define QDLDL_INT_MAX INT_MAX
|
|
22
|
+
#endif
|
|
23
|
+
|
|
16
24
|
# ifdef __cplusplus
|
|
17
25
|
}
|
|
18
26
|
# endif /* ifdef __cplusplus */
|
data/vendor/scs/linsys/gpu/gpu.c
CHANGED
|
@@ -1,41 +1,78 @@
|
|
|
1
1
|
#include "gpu.h"
|
|
2
2
|
|
|
3
|
-
void SCS(
|
|
4
|
-
|
|
3
|
+
void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
|
|
4
|
+
const cusparseDnVecDescr_t x,
|
|
5
|
+
cusparseDnVecDescr_t y,
|
|
6
|
+
cusparseHandle_t cusparse_handle,
|
|
7
|
+
size_t *buffer_size, void **buffer) {
|
|
5
8
|
/* y += A'*x
|
|
6
9
|
x and y MUST be on GPU already
|
|
7
10
|
*/
|
|
8
11
|
const scs_float onef = 1.0;
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
size_t new_buffer_size = 0;
|
|
13
|
+
|
|
14
|
+
CUSPARSE_GEN(SpMV_bufferSize)
|
|
15
|
+
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
|
|
16
|
+
&onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
|
|
17
|
+
|
|
18
|
+
if (new_buffer_size > *buffer_size) {
|
|
19
|
+
if (*buffer != SCS_NULL) {
|
|
20
|
+
cudaFree(*buffer);
|
|
21
|
+
}
|
|
22
|
+
cudaMalloc(buffer, *buffer_size);
|
|
23
|
+
*buffer_size = new_buffer_size;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
CUSPARSE_GEN(SpMV)
|
|
27
|
+
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
|
|
28
|
+
&onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
|
|
12
29
|
}
|
|
13
30
|
|
|
14
|
-
|
|
15
|
-
|
|
31
|
+
/* this is slow, use trans routine if possible */
|
|
32
|
+
void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
|
|
33
|
+
cusparseDnVecDescr_t y,
|
|
34
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
|
35
|
+
void **buffer) {
|
|
16
36
|
/* y += A*x
|
|
17
37
|
x and y MUST be on GPU already
|
|
18
38
|
*/
|
|
19
39
|
const scs_float onef = 1.0;
|
|
40
|
+
size_t new_buffer_size = 0;
|
|
41
|
+
|
|
20
42
|
/* The A matrix idx pointers must be ORDERED */
|
|
21
|
-
|
|
22
|
-
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
|
23
|
-
|
|
43
|
+
CUSPARSE_GEN(SpMV_bufferSize)
|
|
44
|
+
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
|
|
45
|
+
SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
|
|
46
|
+
|
|
47
|
+
if (new_buffer_size > *buffer_size) {
|
|
48
|
+
if (*buffer != SCS_NULL) {
|
|
49
|
+
cudaFree(*buffer);
|
|
50
|
+
}
|
|
51
|
+
cudaMalloc(buffer, *buffer_size);
|
|
52
|
+
*buffer_size = new_buffer_size;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
CUSPARSE_GEN(SpMV)
|
|
56
|
+
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
|
|
57
|
+
SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/* This assumes that P has been made full (ie not triangular) and uses the
|
|
61
|
+
* fact that the GPU is faster for general sparse matrices than for symmetric
|
|
62
|
+
*/
|
|
63
|
+
/* y += P*x
|
|
64
|
+
x and y MUST be on GPU already
|
|
65
|
+
*/
|
|
66
|
+
void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x,
|
|
67
|
+
cusparseDnVecDescr_t y,
|
|
68
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
|
69
|
+
void **buffer) {
|
|
70
|
+
SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer);
|
|
24
71
|
}
|
|
25
72
|
|
|
26
73
|
void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
|
|
27
74
|
cudaFree(A->x);
|
|
28
75
|
cudaFree(A->i);
|
|
29
76
|
cudaFree(A->p);
|
|
30
|
-
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
|
|
34
|
-
ScsScaling *scal) {
|
|
35
|
-
SCS(_normalize_a)(A, stgs, k, scal);
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs,
|
|
39
|
-
const ScsScaling *scal) {
|
|
40
|
-
SCS(_un_normalize_a)(A, stgs, scal);
|
|
77
|
+
cusparseDestroySpMat(A->descr);
|
|
41
78
|
}
|
data/vendor/scs/linsys/gpu/gpu.h
CHANGED
|
@@ -1,57 +1,82 @@
|
|
|
1
|
-
#ifndef
|
|
2
|
-
#define
|
|
1
|
+
#ifndef SCS_GPU_H_GUARD
|
|
2
|
+
#define SCS_GPU_H_GUARD
|
|
3
3
|
|
|
4
4
|
#ifdef __cplusplus
|
|
5
5
|
extern "C" {
|
|
6
6
|
#endif
|
|
7
7
|
|
|
8
|
-
|
|
8
|
+
/* TODO: Do we need this?
|
|
9
|
+
|
|
9
10
|
#include <cuda.h>
|
|
11
|
+
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
#include <cublas_v2.h>
|
|
10
15
|
#include <cuda_runtime_api.h>
|
|
11
16
|
#include <cusparse.h>
|
|
12
17
|
|
|
13
|
-
#include "amatrix.h"
|
|
14
18
|
#include "glbopts.h"
|
|
15
19
|
#include "linalg.h"
|
|
16
20
|
#include "linsys.h"
|
|
17
21
|
#include "scs.h"
|
|
22
|
+
#include "scs_matrix.h"
|
|
18
23
|
#include "util.h"
|
|
19
24
|
|
|
20
|
-
#define CUDA_CHECK_ERR
|
|
21
|
-
do {
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
#define CUDA_CHECK_ERR \
|
|
26
|
+
do { \
|
|
27
|
+
cudaDeviceSynchronize(); \
|
|
28
|
+
cudaError_t err = cudaGetLastError(); \
|
|
29
|
+
if (err != cudaSuccess) { \
|
|
30
|
+
scs_printf("%s:%d:%s\n ERROR_CUDA (#): %s\n", __FILE__, __LINE__, \
|
|
31
|
+
__func__, cudaGetErrorString(err)); \
|
|
32
|
+
} \
|
|
27
33
|
} while (0)
|
|
28
34
|
|
|
29
|
-
#
|
|
35
|
+
#if VERBOSITY == 0
|
|
30
36
|
#ifndef SFLOAT
|
|
31
37
|
#define CUBLAS(x) cublasD##x
|
|
32
|
-
#define
|
|
38
|
+
#define CUBLASI(x) cublasId##x
|
|
33
39
|
#else
|
|
34
40
|
#define CUBLAS(x) cublasS##x
|
|
35
|
-
#define
|
|
41
|
+
#define CUBLASI(x) cublasIs##x
|
|
36
42
|
#endif
|
|
43
|
+
#define CUSPARSE_GEN(x) cusparse##x
|
|
37
44
|
#else
|
|
38
45
|
#ifndef SFLOAT
|
|
39
|
-
#define CUBLAS(x)
|
|
40
|
-
CUDA_CHECK_ERR;
|
|
46
|
+
#define CUBLAS(x) \
|
|
47
|
+
CUDA_CHECK_ERR; \
|
|
41
48
|
cublasD##x
|
|
42
|
-
#define
|
|
43
|
-
CUDA_CHECK_ERR;
|
|
44
|
-
|
|
49
|
+
#define CUBLASI(x) \
|
|
50
|
+
CUDA_CHECK_ERR; \
|
|
51
|
+
cublasId##x
|
|
45
52
|
#else
|
|
46
|
-
#define CUBLAS(x)
|
|
47
|
-
CUDA_CHECK_ERR;
|
|
53
|
+
#define CUBLAS(x) \
|
|
54
|
+
CUDA_CHECK_ERR; \
|
|
48
55
|
cublasS##x
|
|
49
|
-
#define
|
|
50
|
-
CUDA_CHECK_ERR;
|
|
51
|
-
|
|
56
|
+
#define CUBLASI(x) \
|
|
57
|
+
CUDA_CHECK_ERR; \
|
|
58
|
+
cublasIs##x
|
|
52
59
|
#endif
|
|
60
|
+
#define CUSPARSE_GEN(x) \
|
|
61
|
+
CUDA_CHECK_ERR; \
|
|
62
|
+
cusparse##x
|
|
53
63
|
#endif
|
|
54
64
|
|
|
65
|
+
#ifndef SFLOAT
|
|
66
|
+
#define SCS_CUDA_FLOAT CUDA_R_64F
|
|
67
|
+
#else
|
|
68
|
+
#define SCS_CUDA_FLOAT CUDA_R_32F
|
|
69
|
+
#endif
|
|
70
|
+
|
|
71
|
+
#ifndef DLONG
|
|
72
|
+
#define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_32I
|
|
73
|
+
#else
|
|
74
|
+
#define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
|
|
75
|
+
#endif
|
|
76
|
+
|
|
77
|
+
#define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
|
|
78
|
+
#define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
|
|
79
|
+
|
|
55
80
|
/*
|
|
56
81
|
CUDA matrix routines only for CSR, not CSC matrices:
|
|
57
82
|
CSC CSR GPU Mult
|
|
@@ -59,23 +84,33 @@ extern "C" {
|
|
|
59
84
|
A'(n x m) A (m x n) Agt accum_by_a_gpu
|
|
60
85
|
*/
|
|
61
86
|
|
|
62
|
-
/* this struct defines the data matrix
|
|
63
|
-
typedef struct
|
|
87
|
+
/* this struct defines the data matrix on GPU */
|
|
88
|
+
typedef struct SCS_GPU_DATA_MATRIX {
|
|
64
89
|
/* A is supplied in column compressed format */
|
|
65
|
-
scs_float *x; /*
|
|
66
|
-
scs_int *i; /*
|
|
67
|
-
scs_int *p; /*
|
|
90
|
+
scs_float *x; /* values, size: NNZ */
|
|
91
|
+
scs_int *i; /* row index, size: NNZ */
|
|
92
|
+
scs_int *p; /* column pointer, size: n+1 */
|
|
68
93
|
scs_int m, n; /* m rows, n cols */
|
|
69
|
-
scs_int
|
|
94
|
+
scs_int nnz; /* num non-zeros in matrix */
|
|
70
95
|
/* CUDA */
|
|
71
|
-
|
|
96
|
+
cusparseSpMatDescr_t descr;
|
|
72
97
|
} ScsGpuMatrix;
|
|
73
98
|
|
|
74
|
-
void SCS(
|
|
75
|
-
|
|
99
|
+
void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *A,
|
|
100
|
+
const cusparseDnVecDescr_t x,
|
|
101
|
+
cusparseDnVecDescr_t y,
|
|
102
|
+
cusparseHandle_t cusparse_handle,
|
|
103
|
+
size_t *buffer_size, void **buffer);
|
|
104
|
+
|
|
105
|
+
void SCS(accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
|
|
106
|
+
cusparseDnVecDescr_t y,
|
|
107
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
|
108
|
+
void **buffer);
|
|
76
109
|
|
|
77
|
-
void SCS(
|
|
78
|
-
|
|
110
|
+
void SCS(accum_by_p_gpu)(const ScsGpuMatrix *P, const cusparseDnVecDescr_t x,
|
|
111
|
+
cusparseDnVecDescr_t y,
|
|
112
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
|
113
|
+
void **buffer);
|
|
79
114
|
|
|
80
115
|
void SCS(free_gpu_matrix)(ScsGpuMatrix *A);
|
|
81
116
|
|