scs 0.2.3 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +11 -6
- data/lib/scs/ffi.rb +30 -13
- data/lib/scs/solver.rb +32 -9
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +39 -0
- data/vendor/scs/CMakeLists.txt +7 -8
- data/vendor/scs/Makefile +24 -15
- data/vendor/scs/README.md +5 -263
- data/vendor/scs/include/aa.h +67 -23
- data/vendor/scs/include/cones.h +17 -17
- data/vendor/scs/include/glbopts.h +98 -32
- data/vendor/scs/include/linalg.h +2 -4
- data/vendor/scs/include/linsys.h +58 -44
- data/vendor/scs/include/normalize.h +3 -3
- data/vendor/scs/include/rw.h +8 -2
- data/vendor/scs/include/scs.h +293 -133
- data/vendor/scs/include/util.h +3 -15
- data/vendor/scs/linsys/cpu/direct/private.c +220 -224
- data/vendor/scs/linsys/cpu/direct/private.h +13 -7
- data/vendor/scs/linsys/cpu/direct/private.o +0 -0
- data/vendor/scs/linsys/cpu/indirect/private.c +177 -110
- data/vendor/scs/linsys/cpu/indirect/private.h +8 -4
- data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
- data/vendor/scs/linsys/csparse.c +87 -0
- data/vendor/scs/linsys/csparse.h +34 -0
- data/vendor/scs/linsys/csparse.o +0 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +1 -1
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_internal.h +1 -1
- data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
- data/vendor/scs/linsys/external/qdldl/changes +2 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.c +29 -46
- data/vendor/scs/linsys/external/qdldl/qdldl.h +33 -41
- data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
- data/vendor/scs/linsys/external/qdldl/qdldl_types.h +11 -3
- data/vendor/scs/linsys/gpu/gpu.c +31 -33
- data/vendor/scs/linsys/gpu/gpu.h +48 -31
- data/vendor/scs/linsys/gpu/indirect/private.c +338 -232
- data/vendor/scs/linsys/gpu/indirect/private.h +23 -14
- data/vendor/scs/linsys/scs_matrix.c +498 -0
- data/vendor/scs/linsys/scs_matrix.h +70 -0
- data/vendor/scs/linsys/scs_matrix.o +0 -0
- data/vendor/scs/scs.mk +13 -9
- data/vendor/scs/src/aa.c +384 -109
- data/vendor/scs/src/aa.o +0 -0
- data/vendor/scs/src/cones.c +440 -353
- data/vendor/scs/src/cones.o +0 -0
- data/vendor/scs/src/ctrlc.c +15 -5
- data/vendor/scs/src/ctrlc.o +0 -0
- data/vendor/scs/src/linalg.c +84 -28
- data/vendor/scs/src/linalg.o +0 -0
- data/vendor/scs/src/normalize.c +22 -64
- data/vendor/scs/src/normalize.o +0 -0
- data/vendor/scs/src/rw.c +160 -21
- data/vendor/scs/src/rw.o +0 -0
- data/vendor/scs/src/scs.c +767 -563
- data/vendor/scs/src/scs.o +0 -0
- data/vendor/scs/src/scs_indir.o +0 -0
- data/vendor/scs/src/scs_version.c +9 -3
- data/vendor/scs/src/scs_version.o +0 -0
- data/vendor/scs/src/util.c +37 -106
- data/vendor/scs/src/util.o +0 -0
- data/vendor/scs/test/minunit.h +17 -8
- data/vendor/scs/test/problem_utils.h +176 -14
- data/vendor/scs/test/problems/degenerate.h +130 -0
- data/vendor/scs/test/problems/hs21_tiny_qp.h +124 -0
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +116 -0
- data/vendor/scs/test/problems/infeasible_tiny_qp.h +100 -0
- data/vendor/scs/test/problems/qafiro_tiny_qp.h +199 -0
- data/vendor/scs/test/problems/random_prob +0 -0
- data/vendor/scs/test/problems/random_prob.h +45 -0
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +188 -31
- data/vendor/scs/test/problems/small_lp.h +13 -14
- data/vendor/scs/test/problems/test_fails.h +43 -0
- data/vendor/scs/test/problems/unbounded_tiny_qp.h +82 -0
- data/vendor/scs/test/random_socp_prob.c +54 -53
- data/vendor/scs/test/rng.h +109 -0
- data/vendor/scs/test/run_from_file.c +19 -10
- data/vendor/scs/test/run_tests.c +27 -3
- metadata +20 -8
- data/vendor/scs/linsys/amatrix.c +0 -305
- data/vendor/scs/linsys/amatrix.h +0 -36
- data/vendor/scs/linsys/amatrix.o +0 -0
- data/vendor/scs/test/data/small_random_socp +0 -0
- data/vendor/scs/test/problems/small_random_socp.h +0 -33
- data/vendor/scs/test/run_tests +0 -2
@@ -1,35 +1,9 @@
|
|
1
1
|
#include "qdldl.h"
|
2
|
-
#include "ctrlc.h"
|
3
2
|
|
4
3
|
#define QDLDL_UNKNOWN (-1)
|
5
4
|
#define QDLDL_USED (1)
|
6
5
|
#define QDLDL_UNUSED (0)
|
7
6
|
|
8
|
-
// //DEBUG
|
9
|
-
// #include <stdio.h>
|
10
|
-
// void qdprint_arrayi(const QDLDL_int* data, QDLDL_int n,char* varName){
|
11
|
-
|
12
|
-
// QDLDL_int i;
|
13
|
-
// printf("%s = [",varName);
|
14
|
-
// for(i=0; i< n; i++){
|
15
|
-
// printf("%lli,",data[i]);
|
16
|
-
// }
|
17
|
-
// printf("]\n");
|
18
|
-
|
19
|
-
// }
|
20
|
-
|
21
|
-
// void qdprint_arrayf(const QDLDL_float* data, QDLDL_int n, char* varName){
|
22
|
-
|
23
|
-
// QDLDL_int i;
|
24
|
-
// printf("%s = [",varName);
|
25
|
-
// for(i=0; i< n; i++){
|
26
|
-
// printf("%.3g,",data[i]);
|
27
|
-
// }
|
28
|
-
// printf("]\n");
|
29
|
-
|
30
|
-
// }
|
31
|
-
// // END DEBUG
|
32
|
-
|
33
7
|
/* Compute the elimination tree for a quasidefinite matrix
|
34
8
|
in compressed sparse column form.
|
35
9
|
*/
|
@@ -41,7 +15,7 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
|
|
41
15
|
QDLDL_int* Lnz,
|
42
16
|
QDLDL_int* etree){
|
43
17
|
|
44
|
-
QDLDL_int sumLnz
|
18
|
+
QDLDL_int sumLnz;
|
45
19
|
QDLDL_int i,j,p;
|
46
20
|
|
47
21
|
|
@@ -76,8 +50,19 @@ QDLDL_int QDLDL_etree(const QDLDL_int n,
|
|
76
50
|
}
|
77
51
|
|
78
52
|
//compute the total nonzeros in L. This much
|
79
|
-
//space is required to store Li and Lx
|
80
|
-
|
53
|
+
//space is required to store Li and Lx. Return
|
54
|
+
//error code -2 if the nonzero count will overflow
|
55
|
+
//its unteger type.
|
56
|
+
sumLnz = 0;
|
57
|
+
for(i = 0; i < n; i++){
|
58
|
+
if(sumLnz > QDLDL_INT_MAX - Lnz[i]){
|
59
|
+
sumLnz = -2;
|
60
|
+
break;
|
61
|
+
}
|
62
|
+
else{
|
63
|
+
sumLnz += Lnz[i];
|
64
|
+
}
|
65
|
+
}
|
81
66
|
|
82
67
|
return sumLnz;
|
83
68
|
}
|
@@ -139,10 +124,6 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
|
|
139
124
|
//Start from 1 here. The upper LH corner is trivially 0
|
140
125
|
//in L b/c we are only computing the subdiagonal elements
|
141
126
|
for(k = 1; k < n; k++){
|
142
|
-
if(scs_is_interrupted()) {
|
143
|
-
scs_printf("interrupt detected in factorization\n");
|
144
|
-
return -1;
|
145
|
-
}
|
146
127
|
|
147
128
|
//NB : For each k, we compute a solution to
|
148
129
|
//y = L(0:(k-1),0:k-1))\b, where b is the kth
|
@@ -258,11 +239,12 @@ void QDLDL_Lsolve(const QDLDL_int n,
|
|
258
239
|
const QDLDL_float* Lx,
|
259
240
|
QDLDL_float* x){
|
260
241
|
|
261
|
-
QDLDL_int i,j;
|
242
|
+
QDLDL_int i,j;
|
262
243
|
for(i = 0; i < n; i++){
|
263
|
-
|
264
|
-
|
265
|
-
|
244
|
+
QDLDL_float val = x[i];
|
245
|
+
for(j = Lp[i]; j < Lp[i+1]; j++){
|
246
|
+
x[Li[j]] -= Lx[j]*val;
|
247
|
+
}
|
266
248
|
}
|
267
249
|
}
|
268
250
|
|
@@ -273,11 +255,13 @@ void QDLDL_Ltsolve(const QDLDL_int n,
|
|
273
255
|
const QDLDL_float* Lx,
|
274
256
|
QDLDL_float* x){
|
275
257
|
|
276
|
-
QDLDL_int i,j;
|
258
|
+
QDLDL_int i,j;
|
277
259
|
for(i = n-1; i>=0; i--){
|
278
|
-
|
279
|
-
|
280
|
-
|
260
|
+
QDLDL_float val = x[i];
|
261
|
+
for(j = Lp[i]; j < Lp[i+1]; j++){
|
262
|
+
val -= Lx[j]*x[Li[j]];
|
263
|
+
}
|
264
|
+
x[i] = val;
|
281
265
|
}
|
282
266
|
}
|
283
267
|
|
@@ -289,10 +273,9 @@ void QDLDL_solve(const QDLDL_int n,
|
|
289
273
|
const QDLDL_float* Dinv,
|
290
274
|
QDLDL_float* x){
|
291
275
|
|
292
|
-
QDLDL_int i;
|
293
|
-
|
294
|
-
QDLDL_Lsolve(n,Lp,Li,Lx,x);
|
295
|
-
for(i = 0; i < n; i++) x[i] *= Dinv[i];
|
296
|
-
QDLDL_Ltsolve(n,Lp,Li,Lx,x);
|
276
|
+
QDLDL_int i;
|
297
277
|
|
278
|
+
QDLDL_Lsolve(n,Lp,Li,Lx,x);
|
279
|
+
for(i = 0; i < n; i++) x[i] *= Dinv[i];
|
280
|
+
QDLDL_Ltsolve(n,Lp,Li,Lx,x);
|
298
281
|
}
|
@@ -32,19 +32,17 @@ extern "C" {
|
|
32
32
|
* this function will *not* return an error, as it may still be possible to factor
|
33
33
|
* such a matrix in LDL form. No promises are made in this case though...
|
34
34
|
*
|
35
|
-
* @param
|
35
|
+
* @param n number of columns in CSC matrix A (assumed square)
|
36
36
|
* @param Ap column pointers (size n+1) for columns of A
|
37
37
|
* @param Ai row indices of A. Has Ap[n] elements
|
38
38
|
* @param work work vector (size n) (no meaning on return)
|
39
39
|
* @param Lnz count of nonzeros in each column of L (size n) below diagonal
|
40
40
|
* @param etree elimination tree (size n)
|
41
|
-
* @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
|
42
|
-
* -1 if the input
|
43
|
-
*
|
44
|
-
*
|
41
|
+
* @return total sum of Lnz (i.e. total nonzeros in L below diagonal).
|
42
|
+
* Returns -1 if the input is not triu or has an empty column.
|
43
|
+
* Returns -2 if the return value overflows QDLDL_int.
|
45
44
|
*
|
46
45
|
*/
|
47
|
-
|
48
46
|
QDLDL_int QDLDL_etree(const QDLDL_int n,
|
49
47
|
const QDLDL_int* Ap,
|
50
48
|
const QDLDL_int* Ai,
|
@@ -52,6 +50,7 @@ extern "C" {
|
|
52
50
|
QDLDL_int* Lnz,
|
53
51
|
QDLDL_int* etree);
|
54
52
|
|
53
|
+
|
55
54
|
/**
|
56
55
|
* Compute an LDL decomposition for a quasidefinite matrix
|
57
56
|
* in compressed sparse column form, where the input matrix is
|
@@ -61,21 +60,22 @@ extern "C" {
|
|
61
60
|
* Returns factors L, D and Dinv = 1./D.
|
62
61
|
*
|
63
62
|
* Does not use MALLOC. It is assumed that L will be a compressed
|
64
|
-
* sparse column matrix with data (
|
63
|
+
* sparse column matrix with data (n,Lp,Li,Lx) with sufficient space
|
65
64
|
* allocated, with a number of nonzeros equal to the count given
|
66
|
-
* as a return value by
|
67
|
-
*
|
68
|
-
* @param
|
69
|
-
* @param Ap column pointers (size n+1) for columns of A
|
70
|
-
* @param Ai row indices of A. Has Ap[n] elements
|
71
|
-
* @param
|
72
|
-
* @param Lp column pointers (size
|
73
|
-
* @param Li row indices of L. Has Lp[
|
65
|
+
* as a return value by QDLDL_etree
|
66
|
+
*
|
67
|
+
* @param n number of columns in L and A (both square)
|
68
|
+
* @param Ap column pointers (size n+1) for columns of A (not modified)
|
69
|
+
* @param Ai row indices of A. Has Ap[n] elements (not modified)
|
70
|
+
* @param Ax data of A. Has Ap[n] elements (not modified)
|
71
|
+
* @param Lp column pointers (size n+1) for columns of L
|
72
|
+
* @param Li row indices of L. Has Lp[n] elements
|
73
|
+
* @param Lx data of L. Has Lp[n] elements
|
74
74
|
* @param D vectorized factor D. Length is n
|
75
75
|
* @param Dinv reciprocal of D. Length is n
|
76
76
|
* @param Lnz count of nonzeros in each column of L below diagonal,
|
77
|
-
* as given by
|
78
|
-
* @param etree elimination tree as as given by
|
77
|
+
* as given by QDLDL_etree (not modified)
|
78
|
+
* @param etree elimination tree as as given by QDLDL_etree (not modified)
|
79
79
|
* @param bwork working array of bools. Length is n
|
80
80
|
* @param iwork working array of integers. Length is 3*n
|
81
81
|
* @param fwork working array of floats. Length is n
|
@@ -85,8 +85,6 @@ extern "C" {
|
|
85
85
|
* or otherwise LDL factorisable)
|
86
86
|
*
|
87
87
|
*/
|
88
|
-
|
89
|
-
|
90
88
|
QDLDL_int QDLDL_factor(const QDLDL_int n,
|
91
89
|
const QDLDL_int* Ap,
|
92
90
|
const QDLDL_int* Ai,
|
@@ -107,16 +105,15 @@ QDLDL_int QDLDL_factor(const QDLDL_int n,
|
|
107
105
|
* Solves LDL'x = b
|
108
106
|
*
|
109
107
|
* It is assumed that L will be a compressed
|
110
|
-
* sparse column matrix with data (
|
108
|
+
* sparse column matrix with data (n,Lp,Li,Lx).
|
111
109
|
*
|
112
|
-
* @param
|
113
|
-
* @param
|
114
|
-
* @param
|
115
|
-
* @param
|
110
|
+
* @param n number of columns in L
|
111
|
+
* @param Lp column pointers (size n+1) for columns of L
|
112
|
+
* @param Li row indices of L. Has Lp[n] elements
|
113
|
+
* @param Lx data of L. Has Lp[n] elements
|
116
114
|
* @param Dinv reciprocal of D. Length is n
|
117
115
|
* @param x initialized to b. Equal to x on return
|
118
116
|
*
|
119
|
-
*
|
120
117
|
*/
|
121
118
|
void QDLDL_solve(const QDLDL_int n,
|
122
119
|
const QDLDL_int* Lp,
|
@@ -130,40 +127,35 @@ void QDLDL_solve(const QDLDL_int n,
|
|
130
127
|
* Solves (L+I)x = b
|
131
128
|
*
|
132
129
|
* It is assumed that L will be a compressed
|
133
|
-
* sparse column matrix with data (
|
130
|
+
* sparse column matrix with data (n,Lp,Li,Lx).
|
134
131
|
*
|
135
|
-
* @param
|
136
|
-
* @param
|
137
|
-
* @param
|
138
|
-
* @param
|
139
|
-
* @param Dinv reciprocal of D. Length is n
|
132
|
+
* @param n number of columns in L
|
133
|
+
* @param Lp column pointers (size n+1) for columns of L
|
134
|
+
* @param Li row indices of L. Has Lp[n] elements
|
135
|
+
* @param Lx data of L. Has Lp[n] elements
|
140
136
|
* @param x initialized to b. Equal to x on return
|
141
137
|
*
|
142
|
-
*
|
143
138
|
*/
|
144
|
-
|
145
139
|
void QDLDL_Lsolve(const QDLDL_int n,
|
146
140
|
const QDLDL_int* Lp,
|
147
141
|
const QDLDL_int* Li,
|
148
142
|
const QDLDL_float* Lx,
|
149
143
|
QDLDL_float* x);
|
150
144
|
|
145
|
+
|
151
146
|
/**
|
152
147
|
* Solves (L+I)'x = b
|
153
148
|
*
|
154
149
|
* It is assumed that L will be a compressed
|
155
|
-
* sparse column matrix with data (
|
150
|
+
* sparse column matrix with data (n,Lp,Li,Lx).
|
156
151
|
*
|
157
|
-
* @param
|
158
|
-
* @param
|
159
|
-
* @param
|
160
|
-
* @param
|
161
|
-
* @param Dinv reciprocal of D. Length is n
|
152
|
+
* @param n number of columns in L
|
153
|
+
* @param Lp column pointers (size n+1) for columns of L
|
154
|
+
* @param Li row indices of L. Has Lp[n] elements
|
155
|
+
* @param Lx data of L. Has Lp[n] elements
|
162
156
|
* @param x initialized to b. Equal to x on return
|
163
157
|
*
|
164
|
-
*
|
165
158
|
*/
|
166
|
-
|
167
159
|
void QDLDL_Ltsolve(const QDLDL_int n,
|
168
160
|
const QDLDL_int* Lp,
|
169
161
|
const QDLDL_int* Li,
|
Binary file
|
@@ -1,18 +1,26 @@
|
|
1
1
|
#ifndef QDLDL_TYPES_H
|
2
2
|
# define QDLDL_TYPES_H
|
3
3
|
|
4
|
-
#include "glbopts.h"
|
5
|
-
|
6
4
|
# ifdef __cplusplus
|
7
5
|
extern "C" {
|
8
6
|
# endif /* ifdef __cplusplus */
|
9
7
|
|
10
|
-
|
8
|
+
#include "glbopts.h"
|
9
|
+
#include <limits.h> //for the QDLDL_INT_TYPE_MAX
|
10
|
+
|
11
|
+
/* QDLDL integer and float types */
|
11
12
|
|
12
13
|
#define QDLDL_int scs_int
|
13
14
|
#define QDLDL_float scs_float
|
14
15
|
#define QDLDL_bool scs_int
|
15
16
|
|
17
|
+
/* Maximum value of the signed type QDLDL_int */
|
18
|
+
#ifdef DLONG
|
19
|
+
#define QDLDL_INT_MAX LLONG_MAX
|
20
|
+
#else
|
21
|
+
#define QDLDL_INT_MAX INT_MAX
|
22
|
+
#endif
|
23
|
+
|
16
24
|
# ifdef __cplusplus
|
17
25
|
}
|
18
26
|
# endif /* ifdef __cplusplus */
|
data/vendor/scs/linsys/gpu/gpu.c
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
#include "gpu.h"
|
2
2
|
|
3
|
-
void SCS(
|
4
|
-
|
5
|
-
|
3
|
+
void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
|
4
|
+
const cusparseDnVecDescr_t x,
|
5
|
+
cusparseDnVecDescr_t y,
|
6
|
+
cusparseHandle_t cusparse_handle,
|
7
|
+
size_t *buffer_size, void **buffer) {
|
6
8
|
/* y += A'*x
|
7
9
|
x and y MUST be on GPU already
|
8
10
|
*/
|
@@ -10,10 +12,8 @@ void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_
|
|
10
12
|
size_t new_buffer_size = 0;
|
11
13
|
|
12
14
|
CUSPARSE_GEN(SpMV_bufferSize)
|
13
|
-
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
14
|
-
|
15
|
-
SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
|
16
|
-
&new_buffer_size);
|
15
|
+
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
|
16
|
+
&onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
|
17
17
|
|
18
18
|
if (new_buffer_size > *buffer_size) {
|
19
19
|
if (*buffer != SCS_NULL) {
|
@@ -24,15 +24,15 @@ void SCS(_accum_by_atrans_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_
|
|
24
24
|
}
|
25
25
|
|
26
26
|
CUSPARSE_GEN(SpMV)
|
27
|
-
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
|
28
|
-
|
29
|
-
SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
|
30
|
-
buffer);
|
27
|
+
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
|
28
|
+
&onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
|
31
29
|
}
|
32
30
|
|
33
|
-
|
34
|
-
|
35
|
-
|
31
|
+
/* this is slow, use trans routine if possible */
|
32
|
+
void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
|
33
|
+
cusparseDnVecDescr_t y,
|
34
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
35
|
+
void **buffer) {
|
36
36
|
/* y += A*x
|
37
37
|
x and y MUST be on GPU already
|
38
38
|
*/
|
@@ -40,12 +40,9 @@ void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
|
|
40
40
|
size_t new_buffer_size = 0;
|
41
41
|
|
42
42
|
/* The A matrix idx pointers must be ORDERED */
|
43
|
-
|
44
43
|
CUSPARSE_GEN(SpMV_bufferSize)
|
45
|
-
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
46
|
-
|
47
|
-
SCS_CUDA_FLOAT, SCS_CSRMV_ALG,
|
48
|
-
&new_buffer_size);
|
44
|
+
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
|
45
|
+
SCS_CUDA_FLOAT, SCS_CSRMV_ALG, &new_buffer_size);
|
49
46
|
|
50
47
|
if (new_buffer_size > *buffer_size) {
|
51
48
|
if (*buffer != SCS_NULL) {
|
@@ -56,10 +53,21 @@ void SCS(_accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
|
|
56
53
|
}
|
57
54
|
|
58
55
|
CUSPARSE_GEN(SpMV)
|
59
|
-
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE,
|
60
|
-
|
61
|
-
|
62
|
-
|
56
|
+
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
|
57
|
+
SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
|
58
|
+
}
|
59
|
+
|
60
|
+
/* This assumes that P has been made full (ie not triangular) and uses the
|
61
|
+
* fact that the GPU is faster for general sparse matrices than for symmetric
|
62
|
+
*/
|
63
|
+
/* y += P*x
|
64
|
+
x and y MUST be on GPU already
|
65
|
+
*/
|
66
|
+
void SCS(accum_by_p_gpu)(const ScsGpuMatrix *Pg, const cusparseDnVecDescr_t x,
|
67
|
+
cusparseDnVecDescr_t y,
|
68
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
69
|
+
void **buffer) {
|
70
|
+
SCS(accum_by_atrans_gpu)(Pg, x, y, cusparse_handle, buffer_size, buffer);
|
63
71
|
}
|
64
72
|
|
65
73
|
void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
|
@@ -68,13 +76,3 @@ void SCS(free_gpu_matrix)(ScsGpuMatrix *A) {
|
|
68
76
|
cudaFree(A->p);
|
69
77
|
cusparseDestroySpMat(A->descr);
|
70
78
|
}
|
71
|
-
|
72
|
-
void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
|
73
|
-
ScsScaling *scal) {
|
74
|
-
SCS(_normalize_a)(A, stgs, k, scal);
|
75
|
-
}
|
76
|
-
|
77
|
-
void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs,
|
78
|
-
const ScsScaling *scal) {
|
79
|
-
SCS(_un_normalize_a)(A, stgs, scal);
|
80
|
-
}
|
data/vendor/scs/linsys/gpu/gpu.h
CHANGED
@@ -10,49 +10,58 @@ extern "C" {
|
|
10
10
|
#include <cuda_runtime_api.h>
|
11
11
|
#include <cusparse.h>
|
12
12
|
|
13
|
-
#include "amatrix.h"
|
14
13
|
#include "glbopts.h"
|
15
14
|
#include "linalg.h"
|
16
15
|
#include "linsys.h"
|
17
16
|
#include "scs.h"
|
17
|
+
#include "scs_matrix.h"
|
18
18
|
#include "util.h"
|
19
19
|
|
20
|
-
#define CUDA_CHECK_ERR
|
21
|
-
do {
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
20
|
+
#define CUDA_CHECK_ERR \
|
21
|
+
do { \
|
22
|
+
cudaDeviceSynchronize(); \
|
23
|
+
cudaError_t err = cudaGetLastError(); \
|
24
|
+
if (err != cudaSuccess) { \
|
25
|
+
scs_printf("%s:%d:%s\n ERROR_CUDA (#): %s\n", __FILE__, __LINE__, \
|
26
|
+
__func__, cudaGetErrorString(err)); \
|
27
|
+
} \
|
27
28
|
} while (0)
|
28
29
|
|
29
|
-
#
|
30
|
+
#if VERBOSITY == 0
|
30
31
|
#ifndef SFLOAT
|
31
32
|
#define CUBLAS(x) cublasD##x
|
33
|
+
#define CUBLASI(x) cublasId##x
|
32
34
|
#define CUSPARSE(x) cusparseD##x
|
33
35
|
#else
|
34
36
|
#define CUBLAS(x) cublasS##x
|
37
|
+
#define CUBLASI(x) cublasIs##x
|
35
38
|
#define CUSPARSE(x) cusparseS##x
|
36
39
|
#endif
|
37
40
|
#define CUSPARSE_GEN(x) cusparse##x
|
38
41
|
#else
|
39
42
|
#ifndef SFLOAT
|
40
|
-
#define CUBLAS(x)
|
41
|
-
CUDA_CHECK_ERR;
|
43
|
+
#define CUBLAS(x) \
|
44
|
+
CUDA_CHECK_ERR; \
|
42
45
|
cublasD##x
|
43
|
-
#define
|
44
|
-
CUDA_CHECK_ERR;
|
46
|
+
#define CUBLASI(x) \
|
47
|
+
CUDA_CHECK_ERR; \
|
48
|
+
cublasId##x
|
49
|
+
#define CUSPARSE(x) \
|
50
|
+
CUDA_CHECK_ERR; \
|
45
51
|
cusparseD##x
|
46
52
|
#else
|
47
|
-
#define CUBLAS(x)
|
48
|
-
CUDA_CHECK_ERR;
|
53
|
+
#define CUBLAS(x) \
|
54
|
+
CUDA_CHECK_ERR; \
|
49
55
|
cublasS##x
|
50
|
-
#define
|
51
|
-
CUDA_CHECK_ERR;
|
56
|
+
#define CUBLASI(x) \
|
57
|
+
CUDA_CHECK_ERR; \
|
58
|
+
cublasIs##x
|
59
|
+
#define CUSPARSE(x) \
|
60
|
+
CUDA_CHECK_ERR; \
|
52
61
|
cusparseS##x
|
53
62
|
#endif
|
54
|
-
#define CUSPARSE_GEN(x)
|
55
|
-
CUDA_CHECK_ERR;
|
63
|
+
#define CUSPARSE_GEN(x) \
|
64
|
+
CUDA_CHECK_ERR; \
|
56
65
|
cusparse##x
|
57
66
|
#endif
|
58
67
|
|
@@ -78,25 +87,33 @@ extern "C" {
|
|
78
87
|
A'(n x m) A (m x n) Agt accum_by_a_gpu
|
79
88
|
*/
|
80
89
|
|
81
|
-
/* this struct defines the data matrix
|
82
|
-
typedef struct
|
90
|
+
/* this struct defines the data matrix on GPU */
|
91
|
+
typedef struct SCS_GPU_DATA_MATRIX {
|
83
92
|
/* A is supplied in column compressed format */
|
84
|
-
scs_float *x; /*
|
85
|
-
scs_int *i; /*
|
86
|
-
scs_int *p; /*
|
93
|
+
scs_float *x; /* values, size: NNZ */
|
94
|
+
scs_int *i; /* row index, size: NNZ */
|
95
|
+
scs_int *p; /* column pointer, size: n+1 */
|
87
96
|
scs_int m, n; /* m rows, n cols */
|
88
|
-
scs_int
|
97
|
+
scs_int nnz; /* num non-zeros in matrix */
|
89
98
|
/* CUDA */
|
90
99
|
cusparseSpMatDescr_t descr;
|
91
100
|
} ScsGpuMatrix;
|
92
101
|
|
93
|
-
void SCS(
|
94
|
-
|
95
|
-
|
102
|
+
void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *A,
|
103
|
+
const cusparseDnVecDescr_t x,
|
104
|
+
cusparseDnVecDescr_t y,
|
105
|
+
cusparseHandle_t cusparse_handle,
|
106
|
+
size_t *buffer_size, void **buffer);
|
96
107
|
|
97
|
-
void SCS(
|
98
|
-
|
99
|
-
|
108
|
+
void SCS(accum_by_a_gpu)(const ScsGpuMatrix *A, const cusparseDnVecDescr_t x,
|
109
|
+
cusparseDnVecDescr_t y,
|
110
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
111
|
+
void **buffer);
|
112
|
+
|
113
|
+
void SCS(accum_by_p_gpu)(const ScsGpuMatrix *P, const cusparseDnVecDescr_t x,
|
114
|
+
cusparseDnVecDescr_t y,
|
115
|
+
cusparseHandle_t cusparse_handle, size_t *buffer_size,
|
116
|
+
void **buffer);
|
100
117
|
|
101
118
|
void SCS(free_gpu_matrix)(ScsGpuMatrix *A);
|
102
119
|
|