scs 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/README.md +1 -1
- data/lib/scs/version.rb +1 -1
- data/vendor/scs/CITATION.cff +2 -2
- data/vendor/scs/CMakeLists.txt +284 -168
- data/vendor/scs/Makefile +43 -18
- data/vendor/scs/README.md +1 -1
- data/vendor/scs/include/glbopts.h +32 -13
- data/vendor/scs/include/linsys.h +8 -8
- data/vendor/scs/include/scs.h +6 -2
- data/vendor/scs/include/scs_types.h +3 -1
- data/vendor/scs/include/scs_work.h +9 -8
- data/vendor/scs/include/util.h +1 -1
- data/vendor/scs/linsys/cpu/direct/private.c +32 -153
- data/vendor/scs/linsys/cpu/direct/private.h +6 -6
- data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
- data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
- data/vendor/scs/linsys/csparse.c +140 -12
- data/vendor/scs/linsys/csparse.h +10 -17
- data/vendor/scs/linsys/gpu/gpu.c +4 -4
- data/vendor/scs/linsys/gpu/gpu.h +1 -1
- data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
- data/vendor/scs/linsys/mkl/direct/private.c +182 -0
- data/vendor/scs/linsys/mkl/direct/private.h +38 -0
- data/vendor/scs/linsys/scs_matrix.c +11 -5
- data/vendor/scs/scs.mk +39 -26
- data/vendor/scs/src/cones.c +15 -159
- data/vendor/scs/src/exp_cone.c +399 -0
- data/vendor/scs/src/normalize.c +4 -2
- data/vendor/scs/src/rw.c +93 -38
- data/vendor/scs/src/scs.c +83 -52
- data/vendor/scs/src/util.c +12 -3
- data/vendor/scs/test/minunit.h +2 -1
- data/vendor/scs/test/problem_utils.h +2 -1
- data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
- data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
- data/vendor/scs/test/problems/max_ent +0 -0
- data/vendor/scs/test/problems/max_ent.h +8 -0
- data/vendor/scs/test/problems/random_prob.h +2 -43
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
- data/vendor/scs/test/problems/test_exp_cone.h +84 -0
- data/vendor/scs/test/problems/test_prob_from_data_file.h +57 -0
- data/vendor/scs/test/run_from_file.c +7 -1
- data/vendor/scs/test/run_tests.c +22 -9
- metadata +10 -3
@@ -9,15 +9,15 @@ extern "C" {
|
|
9
9
|
#include "external/amd/amd.h"
|
10
10
|
#include "external/qdldl/qdldl.h"
|
11
11
|
#include "glbopts.h"
|
12
|
-
#include "
|
12
|
+
#include "linsys.h"
|
13
13
|
#include "scs_matrix.h"
|
14
14
|
|
15
15
|
struct SCS_LIN_SYS_WORK {
|
16
|
-
scs_int m, n;
|
17
|
-
|
18
|
-
scs_float *Dinv;
|
19
|
-
scs_int *perm;
|
20
|
-
scs_float *bp;
|
16
|
+
scs_int m, n; /* linear system dimensions */
|
17
|
+
ScsMatrix *kkt, *L; /* KKT, and factorization matrix L resp. */
|
18
|
+
scs_float *Dinv; /* inverse diagonal matrix of factorization */
|
19
|
+
scs_int *perm; /* permutation of KKT matrix for factorization */
|
20
|
+
scs_float *bp; /* workspace memory for solves */
|
21
21
|
scs_int *diag_r_idxs;
|
22
22
|
scs_int factorizations;
|
23
23
|
/* ldl factorization workspace */
|
@@ -1,22 +1,9 @@
|
|
1
1
|
#include "private.h"
|
2
|
-
#include "linsys.h"
|
3
|
-
#include "util.h"
|
4
|
-
#include <limits.h>
|
5
2
|
|
6
|
-
const char *
|
7
|
-
return "sparse-indirect";
|
3
|
+
const char *scs_get_lin_sys_method() {
|
4
|
+
return "sparse-indirect-scs";
|
8
5
|
}
|
9
6
|
|
10
|
-
/*
|
11
|
-
char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
|
12
|
-
char *str = (char *)scs_malloc(sizeof(char) * 128);
|
13
|
-
sprintf(str, "lin-sys: avg cg its: %2.2f\n",
|
14
|
-
(scs_float)p->tot_cg_its / (info->iter + 1));
|
15
|
-
p->tot_cg_its = 0;
|
16
|
-
return str;
|
17
|
-
}
|
18
|
-
*/
|
19
|
-
|
20
7
|
/* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
|
21
8
|
/* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
|
22
9
|
static void set_preconditioner(ScsLinSysWork *p) {
|
@@ -97,7 +84,7 @@ static void transpose(const ScsMatrix *A, ScsLinSysWork *p) {
|
|
97
84
|
#endif
|
98
85
|
}
|
99
86
|
|
100
|
-
void
|
87
|
+
void scs_free_lin_sys_work(ScsLinSysWork *p) {
|
101
88
|
if (p) {
|
102
89
|
scs_free(p->p);
|
103
90
|
scs_free(p->r);
|
@@ -162,13 +149,13 @@ static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n,
|
|
162
149
|
}
|
163
150
|
|
164
151
|
/* no need to update anything in this case */
|
165
|
-
void
|
152
|
+
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
|
166
153
|
p->diag_r = diag_r; /* this isn't needed but do it to be safe */
|
167
154
|
set_preconditioner(p);
|
168
155
|
}
|
169
156
|
|
170
|
-
ScsLinSysWork *
|
171
|
-
|
157
|
+
ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
|
158
|
+
const scs_float *diag_r) {
|
172
159
|
ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
|
173
160
|
p->A = A;
|
174
161
|
p->P = P;
|
@@ -198,7 +185,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
|
|
198
185
|
p->tot_cg_its = 0;
|
199
186
|
if (!p->p || !p->r || !p->Gp || !p->tmp || !p->At || !p->At->i || !p->At->p ||
|
200
187
|
!p->At->x) {
|
201
|
-
|
188
|
+
scs_free_lin_sys_work(p);
|
202
189
|
return SCS_NULL;
|
203
190
|
}
|
204
191
|
return p;
|
@@ -288,8 +275,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b,
|
|
288
275
|
* y = R_y^{-1} (Ax - ry)
|
289
276
|
*
|
290
277
|
*/
|
291
|
-
scs_int
|
292
|
-
|
278
|
+
scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
|
279
|
+
scs_float tol) {
|
293
280
|
scs_int cg_its, max_iters;
|
294
281
|
|
295
282
|
if (tol <= 0.) {
|
@@ -5,11 +5,13 @@
|
|
5
5
|
extern "C" {
|
6
6
|
#endif
|
7
7
|
|
8
|
+
#include "csparse.h"
|
8
9
|
#include "glbopts.h"
|
9
10
|
#include "linalg.h"
|
10
|
-
#include "
|
11
|
+
#include "linsys.h"
|
11
12
|
#include "scs_matrix.h"
|
12
|
-
#include
|
13
|
+
#include "util.h" /* timer */
|
14
|
+
#include <string.h>
|
13
15
|
|
14
16
|
struct SCS_LIN_SYS_WORK {
|
15
17
|
scs_int n, m; /* linear system dimensions */
|
data/vendor/scs/linsys/csparse.c
CHANGED
@@ -2,39 +2,37 @@
|
|
2
2
|
|
3
3
|
#include "csparse.h"
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
|
6
|
+
scs_int triplet) {
|
7
|
+
ScsMatrix *A = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
|
8
8
|
if (!A) {
|
9
9
|
return SCS_NULL;
|
10
10
|
} /* out of memory */
|
11
11
|
A->m = m; /* define dimensions and nzmax */
|
12
12
|
A->n = n;
|
13
|
-
A->nzmax = nzmax = MAX(nzmax, 1);
|
14
|
-
A->nz = triplet ? 0 : -1; /* allocate triplet or comp.col */
|
15
13
|
A->p = (scs_int *)scs_calloc((triplet ? nzmax : n + 1), sizeof(scs_int));
|
16
14
|
A->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int));
|
17
15
|
A->x = values ? (scs_float *)scs_calloc(nzmax, sizeof(scs_float)) : SCS_NULL;
|
18
16
|
return (!A->p || !A->i || (values && !A->x)) ? SCS(cs_spfree)(A) : A;
|
19
17
|
}
|
20
18
|
|
21
|
-
|
19
|
+
ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok) {
|
22
20
|
scs_free(w); /* free workspace */
|
23
21
|
scs_free(x);
|
24
22
|
return ok ? C : SCS(cs_spfree)(C); /* return result if OK, else free it */
|
25
23
|
}
|
26
24
|
|
27
25
|
/* C = compressed-column form of a triplet matrix T */
|
28
|
-
|
29
|
-
|
26
|
+
ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
|
27
|
+
scs_int *idx_mapping) {
|
28
|
+
scs_int m, n, p, k, *Cp, *Ci, *w, *Ti, *Tj;
|
30
29
|
scs_float *Cx, *Tx;
|
31
|
-
|
30
|
+
ScsMatrix *C;
|
32
31
|
m = T->m;
|
33
32
|
n = T->n;
|
34
33
|
Ti = T->i;
|
35
34
|
Tj = T->p;
|
36
35
|
Tx = T->x;
|
37
|
-
nz = T->nz;
|
38
36
|
C = SCS(cs_spalloc)(m, n, nz, Tx != SCS_NULL, 0); /* allocate result */
|
39
37
|
w = (scs_int *)scs_calloc(n, sizeof(scs_int)); /* get workspace */
|
40
38
|
if (!C || !w) {
|
@@ -75,7 +73,7 @@ scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n) {
|
|
75
73
|
return nz2; /* return sum (c [0..n-1]) */
|
76
74
|
}
|
77
75
|
|
78
|
-
|
76
|
+
ScsMatrix *SCS(cs_spfree)(ScsMatrix *A) {
|
79
77
|
if (!A) {
|
80
78
|
return SCS_NULL;
|
81
79
|
} /* do nothing if A already SCS_NULL */
|
@@ -83,5 +81,135 @@ csc *SCS(cs_spfree)(csc *A) {
|
|
83
81
|
scs_free(A->i);
|
84
82
|
scs_free(A->x);
|
85
83
|
scs_free(A);
|
86
|
-
|
84
|
+
/* free the ScsMatrix struct and return SCS_NULL */
|
85
|
+
return (ScsMatrix *)SCS_NULL;
|
86
|
+
}
|
87
|
+
|
88
|
+
/* Build the KKT matrix */
|
89
|
+
ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
|
90
|
+
scs_float *diag_p, const scs_float *diag_r,
|
91
|
+
scs_int *diag_r_idxs, scs_int upper) {
|
92
|
+
/*
|
93
|
+
* Forms column compressed KKT matrix assumes column compressed A,P matrices.
|
94
|
+
* Only upper OR lower triangular part is stuffed, depending on `upper` flag.
|
95
|
+
*
|
96
|
+
* Forms upper/lower triangular part of [(R_x + P) A'; A -R_y]
|
97
|
+
* Shapes: P : n x n, A: m x n.
|
98
|
+
*
|
99
|
+
* Output: `diag_p` will contain values of P diagonal upon completion,
|
100
|
+
* and `diag_r_idxs` will contain the indices corresponding to the entries
|
101
|
+
* in the returned matrix corresponding to the entries of R.
|
102
|
+
*
|
103
|
+
*/
|
104
|
+
scs_int h, i, j, count;
|
105
|
+
ScsMatrix *Kcsc, *K;
|
106
|
+
scs_int n = A->n;
|
107
|
+
scs_int m = A->m;
|
108
|
+
scs_int Anz = A->p[n];
|
109
|
+
scs_int Knzmax;
|
110
|
+
scs_int *idx_mapping;
|
111
|
+
if (P) {
|
112
|
+
/* Upper bound P + I triangular component NNZs as Pnz + n */
|
113
|
+
Knzmax = n + m + Anz + P->p[n];
|
114
|
+
} else {
|
115
|
+
Knzmax = n + m + Anz;
|
116
|
+
}
|
117
|
+
K = SCS(cs_spalloc)(m + n, m + n, Knzmax, 1, 1);
|
118
|
+
|
119
|
+
#if VERBOSITY > 0
|
120
|
+
scs_printf("forming kkt\n");
|
121
|
+
#endif
|
122
|
+
/* Here we generate a triplet matrix and then compress to CSC */
|
123
|
+
if (!K) {
|
124
|
+
return SCS_NULL;
|
125
|
+
}
|
126
|
+
|
127
|
+
count = 0; /* element counter */
|
128
|
+
if (P) {
|
129
|
+
/* R_x + P in top left */
|
130
|
+
for (j = 0; j < n; j++) { /* cols */
|
131
|
+
diag_p[j] = 0.;
|
132
|
+
/* empty column, add diagonal */
|
133
|
+
if (P->p[j] == P->p[j + 1]) {
|
134
|
+
K->i[count] = j;
|
135
|
+
K->p[count] = j;
|
136
|
+
K->x[count] = diag_r[j];
|
137
|
+
diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
|
138
|
+
count++;
|
139
|
+
}
|
140
|
+
for (h = P->p[j]; h < P->p[j + 1]; h++) {
|
141
|
+
i = P->i[h]; /* row */
|
142
|
+
if (i > j) { /* only upper triangular needed */
|
143
|
+
break;
|
144
|
+
}
|
145
|
+
if (upper) {
|
146
|
+
K->i[count] = i;
|
147
|
+
K->p[count] = j;
|
148
|
+
} else { /* lower triangular */
|
149
|
+
/* P is passed in upper triangular, need to flip that here */
|
150
|
+
K->i[count] = j; /* col -> row */
|
151
|
+
K->p[count] = i; /* row -> col */
|
152
|
+
}
|
153
|
+
K->x[count] = P->x[h];
|
154
|
+
if (i == j) {
|
155
|
+
/* P has diagonal element */
|
156
|
+
diag_p[j] = P->x[h];
|
157
|
+
K->x[count] += diag_r[j];
|
158
|
+
diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
|
159
|
+
}
|
160
|
+
count++;
|
161
|
+
/* reached the end without adding diagonal, do it now */
|
162
|
+
if ((i < j) && (h + 1 == P->p[j + 1] || P->i[h + 1] > j)) {
|
163
|
+
K->i[count] = j;
|
164
|
+
K->p[count] = j;
|
165
|
+
K->x[count] = diag_r[j];
|
166
|
+
diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
|
167
|
+
count++;
|
168
|
+
}
|
169
|
+
}
|
170
|
+
}
|
171
|
+
} else {
|
172
|
+
/* R_x in top left */
|
173
|
+
for (j = 0; j < n; j++) {
|
174
|
+
diag_p[j] = 0.;
|
175
|
+
K->i[count] = j;
|
176
|
+
K->p[count] = j;
|
177
|
+
K->x[count] = diag_r[j];
|
178
|
+
diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
|
179
|
+
count++;
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
/* A in bottom left or A^T top right */
|
184
|
+
for (j = 0; j < n; j++) { /* column */
|
185
|
+
for (h = A->p[j]; h < A->p[j + 1]; h++) {
|
186
|
+
if (upper) {
|
187
|
+
K->p[count] = A->i[h] + n; /* column */
|
188
|
+
K->i[count] = j; /*row */
|
189
|
+
} else { /* lower triangular */
|
190
|
+
K->p[count] = j; /* column */
|
191
|
+
K->i[count] = A->i[h] + n; /* row */
|
192
|
+
}
|
193
|
+
K->x[count] = A->x[h];
|
194
|
+
count++;
|
195
|
+
}
|
196
|
+
}
|
197
|
+
|
198
|
+
/* -R_y at bottom right */
|
199
|
+
for (j = 0; j < m; j++) {
|
200
|
+
K->i[count] = j + n;
|
201
|
+
K->p[count] = j + n;
|
202
|
+
K->x[count] = -diag_r[j + n];
|
203
|
+
diag_r_idxs[j + n] = count; /* store the indices where diag_r occurs */
|
204
|
+
count++;
|
205
|
+
}
|
206
|
+
|
207
|
+
idx_mapping = (scs_int *)scs_calloc(count, sizeof(scs_int));
|
208
|
+
Kcsc = SCS(cs_compress)(K, count, idx_mapping);
|
209
|
+
for (i = 0; i < m + n; i++) {
|
210
|
+
diag_r_idxs[i] = idx_mapping[diag_r_idxs[i]];
|
211
|
+
}
|
212
|
+
SCS(cs_spfree)(K);
|
213
|
+
scs_free(idx_mapping);
|
214
|
+
return Kcsc;
|
87
215
|
}
|
data/vendor/scs/linsys/csparse.h
CHANGED
@@ -10,24 +10,17 @@ extern "C" {
|
|
10
10
|
#include "glbopts.h"
|
11
11
|
#include "scs.h"
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
scs_int *i; /* row indices, size nzmax */
|
20
|
-
scs_float *x; /* numerical values, size nzmax */
|
21
|
-
scs_int nz; /* # of entries in triplet matrix, -1 for compressed-col */
|
22
|
-
} csc;
|
23
|
-
|
24
|
-
csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
|
25
|
-
scs_int triplet);
|
26
|
-
csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok);
|
27
|
-
csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping);
|
13
|
+
ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
|
14
|
+
scs_int triplet);
|
15
|
+
ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok);
|
16
|
+
ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
|
17
|
+
scs_int *idx_mapping);
|
18
|
+
ScsMatrix *SCS(cs_spfree)(ScsMatrix *A);
|
28
19
|
scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n);
|
29
|
-
|
30
|
-
|
20
|
+
/* Forms KKT matrix */
|
21
|
+
ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
|
22
|
+
scs_float *diag_p, const scs_float *diag_r,
|
23
|
+
scs_int *diag_r_idxs, scs_int upper);
|
31
24
|
#ifdef __cplusplus
|
32
25
|
}
|
33
26
|
#endif
|
data/vendor/scs/linsys/gpu/gpu.c
CHANGED
@@ -19,13 +19,13 @@ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
|
|
19
19
|
if (*buffer != SCS_NULL) {
|
20
20
|
cudaFree(*buffer);
|
21
21
|
}
|
22
|
-
cudaMalloc(buffer,
|
22
|
+
cudaMalloc(buffer, new_buffer_size);
|
23
23
|
*buffer_size = new_buffer_size;
|
24
24
|
}
|
25
25
|
|
26
26
|
CUSPARSE_GEN(SpMV)
|
27
27
|
(cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
|
28
|
-
&onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
|
28
|
+
&onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
|
29
29
|
}
|
30
30
|
|
31
31
|
/* this is slow, use trans routine if possible */
|
@@ -48,13 +48,13 @@ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
|
|
48
48
|
if (*buffer != SCS_NULL) {
|
49
49
|
cudaFree(*buffer);
|
50
50
|
}
|
51
|
-
cudaMalloc(buffer,
|
51
|
+
cudaMalloc(buffer, new_buffer_size);
|
52
52
|
*buffer_size = new_buffer_size;
|
53
53
|
}
|
54
54
|
|
55
55
|
CUSPARSE_GEN(SpMV)
|
56
56
|
(cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
|
57
|
-
SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
|
57
|
+
SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
|
58
58
|
}
|
59
59
|
|
60
60
|
/* This assumes that P has been made full (ie not triangular) and uses the
|
data/vendor/scs/linsys/gpu/gpu.h
CHANGED
@@ -21,20 +21,10 @@ static scs_float cg_gpu_norm(cublasHandle_t cublas_handle, scs_float *r,
|
|
21
21
|
return nrm;
|
22
22
|
}
|
23
23
|
|
24
|
-
const char *
|
24
|
+
const char *scs_get_lin_sys_method() {
|
25
25
|
return "sparse-indirect GPU";
|
26
26
|
}
|
27
27
|
|
28
|
-
/*
|
29
|
-
char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
|
30
|
-
char *str = (char *)scs_malloc(sizeof(char) * 128);
|
31
|
-
sprintf(str, "lin-sys: avg cg its: %2.2f\n",
|
32
|
-
(scs_float)p->tot_cg_its / (info->iter + 1));
|
33
|
-
p->tot_cg_its = 0;
|
34
|
-
return str;
|
35
|
-
}
|
36
|
-
*/
|
37
|
-
|
38
28
|
/* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
|
39
29
|
/* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
|
40
30
|
static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
|
@@ -76,7 +66,7 @@ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
|
|
76
66
|
}
|
77
67
|
|
78
68
|
/* no need to update anything in this case */
|
79
|
-
void
|
69
|
+
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
|
80
70
|
scs_int i;
|
81
71
|
|
82
72
|
/* R_x to gpu */
|
@@ -93,7 +83,7 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
|
|
93
83
|
set_preconditioner(p, diag_r);
|
94
84
|
}
|
95
85
|
|
96
|
-
void
|
86
|
+
void scs_free_lin_sys_work(ScsLinSysWork *p) {
|
97
87
|
if (p) {
|
98
88
|
scs_free(p->M);
|
99
89
|
scs_free(p->inv_r_y);
|
@@ -182,13 +172,13 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
|
|
182
172
|
}
|
183
173
|
|
184
174
|
/* P comes in upper triangular, expand to full
|
185
|
-
* First compute triplet version of full matrix, then compress to
|
175
|
+
* First compute triplet version of full matrix, then compress to CSC
|
186
176
|
* */
|
187
|
-
static
|
177
|
+
static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
|
188
178
|
scs_int i, j, k, kk;
|
189
179
|
scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
|
190
|
-
|
191
|
-
|
180
|
+
ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
|
181
|
+
ScsMatrix *P_full;
|
192
182
|
kk = 0;
|
193
183
|
for (j = 0; j < P->n; j++) { /* cols */
|
194
184
|
for (k = P->p[j]; k < P->p[j + 1]; k++) {
|
@@ -209,16 +199,15 @@ static csc *fill_p_matrix(const ScsMatrix *P) {
|
|
209
199
|
kk++;
|
210
200
|
}
|
211
201
|
}
|
212
|
-
|
213
|
-
P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
|
202
|
+
P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
|
214
203
|
SCS(cs_spfree)(P_tmp);
|
215
204
|
return P_full;
|
216
205
|
}
|
217
206
|
|
218
|
-
ScsLinSysWork *
|
219
|
-
|
207
|
+
ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
|
208
|
+
const scs_float *diag_r) {
|
220
209
|
cudaError_t err;
|
221
|
-
|
210
|
+
ScsMatrix *P_full;
|
222
211
|
ScsLinSysWork *p = SCS_NULL;
|
223
212
|
ScsGpuMatrix *Ag = SCS_NULL;
|
224
213
|
ScsGpuMatrix *Pg = SCS_NULL;
|
@@ -324,7 +313,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
|
|
324
313
|
cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
|
325
314
|
|
326
315
|
/* Form preconditioner and copy R_x, 1/R_y to gpu */
|
327
|
-
|
316
|
+
scs_update_lin_sys_diag_r(p, diag_r);
|
328
317
|
|
329
318
|
#if GPU_TRANSPOSE_MAT > 0
|
330
319
|
p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
|
@@ -367,7 +356,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
|
|
367
356
|
if (err != cudaSuccess) {
|
368
357
|
printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
|
369
358
|
cudaGetErrorString(err));
|
370
|
-
|
359
|
+
scs_free_lin_sys_work(p);
|
371
360
|
return SCS_NULL;
|
372
361
|
}
|
373
362
|
return p;
|
@@ -466,8 +455,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *bg,
|
|
466
455
|
* y = R_y^{-1} (Ax - ry)
|
467
456
|
*
|
468
457
|
*/
|
469
|
-
scs_int
|
470
|
-
|
458
|
+
scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
|
459
|
+
scs_float tol) {
|
471
460
|
scs_int cg_its, max_iters;
|
472
461
|
scs_float neg_onef = -1.0;
|
473
462
|
|
@@ -0,0 +1,182 @@
|
|
1
|
+
#include "private.h"
|
2
|
+
|
3
|
+
#define PARDISO_SYMBOLIC (11)
|
4
|
+
#define PARDISO_NUMERIC (22)
|
5
|
+
#define PARDISO_SOLVE (33)
|
6
|
+
#define PARDISO_CLEANUP (-1)
|
7
|
+
|
8
|
+
/* TODO: is it necessary to use pardiso_64 and MKL_Set_Interface_Layer ? */
|
9
|
+
/*
|
10
|
+
#define MKL_INTERFACE_LP64 0
|
11
|
+
#define MKL_INTERFACE_ILP64 1
|
12
|
+
*/
|
13
|
+
#ifdef DLONG
|
14
|
+
#define _PARDISO pardiso_64
|
15
|
+
#else
|
16
|
+
#define _PARDISO pardiso
|
17
|
+
#endif
|
18
|
+
|
19
|
+
/* Prototypes for Pardiso functions */
|
20
|
+
void _PARDISO(void **pt, const scs_int *maxfct, const scs_int *mnum,
|
21
|
+
const scs_int *mtype, const scs_int *phase, const scs_int *n,
|
22
|
+
const scs_float *a, const scs_int *ia, const scs_int *ja,
|
23
|
+
scs_int *perm, const scs_int *nrhs, scs_int *iparm,
|
24
|
+
const scs_int *msglvl, scs_float *b, scs_float *x,
|
25
|
+
scs_int *error);
|
26
|
+
/* scs_int MKL_Set_Interface_Layer(scs_int); */
|
27
|
+
|
28
|
+
const char *scs_get_lin_sys_method() {
|
29
|
+
return "sparse-direct-mkl-pardiso";
|
30
|
+
}
|
31
|
+
|
32
|
+
void scs_free_lin_sys_work(ScsLinSysWork *p) {
|
33
|
+
if (p) {
|
34
|
+
p->phase = PARDISO_CLEANUP;
|
35
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
36
|
+
&(p->n_plus_m), SCS_NULL, p->kkt->p, p->kkt->i, SCS_NULL,
|
37
|
+
&(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL,
|
38
|
+
&(p->error));
|
39
|
+
if (p->error != 0) {
|
40
|
+
scs_printf("Error during MKL Pardiso cleanup: %d", (int)p->error);
|
41
|
+
}
|
42
|
+
if (p->kkt)
|
43
|
+
SCS(cs_spfree)(p->kkt);
|
44
|
+
if (p->sol)
|
45
|
+
scs_free(p->sol);
|
46
|
+
if (p->diag_r_idxs)
|
47
|
+
scs_free(p->diag_r_idxs);
|
48
|
+
if (p->diag_p)
|
49
|
+
scs_free(p->diag_p);
|
50
|
+
scs_free(p);
|
51
|
+
}
|
52
|
+
}
|
53
|
+
|
54
|
+
ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
|
55
|
+
const scs_float *diag_r) {
|
56
|
+
scs_int i;
|
57
|
+
ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
|
58
|
+
|
59
|
+
/* TODO: is this necessary with pardiso_64? */
|
60
|
+
/* Set MKL interface layer */
|
61
|
+
/*
|
62
|
+
#ifdef DLONG
|
63
|
+
MKL_Set_Interface_Layer(MKL_INTERFACE_ILP64);
|
64
|
+
#else
|
65
|
+
MKL_Set_Interface_Layer(MKL_INTERFACE_LP64);
|
66
|
+
#endif
|
67
|
+
*/
|
68
|
+
p->n = A->n;
|
69
|
+
p->m = A->m;
|
70
|
+
p->n_plus_m = p->n + p->m;
|
71
|
+
|
72
|
+
/* Even though we overwrite rhs with sol pardiso requires the memory */
|
73
|
+
p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
|
74
|
+
p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
|
75
|
+
p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
|
76
|
+
|
77
|
+
/* MKL pardiso requires upper triangular CSR matrices. The KKT matrix stuffed
|
78
|
+
* as CSC lower triangular is equivalent. Pass upper=0. */
|
79
|
+
p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 0);
|
80
|
+
if (!(p->kkt)) {
|
81
|
+
scs_printf("Error in forming KKT matrix");
|
82
|
+
scs_free_lin_sys_work(p);
|
83
|
+
return SCS_NULL;
|
84
|
+
}
|
85
|
+
|
86
|
+
for (i = 0; i < 64; i++) {
|
87
|
+
p->iparm[i] = 0; /* Setup Pardiso control parameters */
|
88
|
+
p->pt[i] = 0; /* Initialize the internal solver memory pointer */
|
89
|
+
}
|
90
|
+
|
91
|
+
/* Set Pardiso variables */
|
92
|
+
p->mtype = -2; /* Real symmetric indefinite matrix */
|
93
|
+
p->nrhs = 1; /* Number of right hand sides */
|
94
|
+
p->maxfct = 1; /* Maximum number of numerical factorizations */
|
95
|
+
p->mnum = 1; /* Which factorization to use */
|
96
|
+
p->error = 0; /* Initialize error flag */
|
97
|
+
p->msglvl = VERBOSITY; /* Printing information */
|
98
|
+
|
99
|
+
/* For all iparm vars see MKL documentation */
|
100
|
+
p->iparm[0] = 1; /* Parsido must inspect iparm */
|
101
|
+
p->iparm[1] = 3; /* Fill-in reordering from OpenMP */
|
102
|
+
p->iparm[5] = 1; /* Write solution into b */
|
103
|
+
p->iparm[7] = 0; /* Automatic iterative refinement calculation */
|
104
|
+
p->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */
|
105
|
+
p->iparm[34] = 1; /* Use C-style indexing for indices */
|
106
|
+
/* p->iparm[36] = -80; */ /* Form block sparse matrices */
|
107
|
+
|
108
|
+
#ifdef SFLOAT
|
109
|
+
p->iparm[27] = 1; /* 1 is single precision, 0 is double */
|
110
|
+
#endif
|
111
|
+
|
112
|
+
/* Permutation and symbolic factorization */
|
113
|
+
scs_int phase = PARDISO_SYMBOLIC;
|
114
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &phase, &(p->n_plus_m),
|
115
|
+
p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL, &(p->nrhs), p->iparm,
|
116
|
+
&(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
|
117
|
+
|
118
|
+
if (p->error != 0) {
|
119
|
+
scs_printf("Error during symbolic factorization: %d", (int)p->error);
|
120
|
+
scs_free_lin_sys_work(p);
|
121
|
+
return SCS_NULL;
|
122
|
+
}
|
123
|
+
|
124
|
+
/* Numerical factorization */
|
125
|
+
p->phase = PARDISO_NUMERIC;
|
126
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
127
|
+
&(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
|
128
|
+
&(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
|
129
|
+
|
130
|
+
if (p->error) {
|
131
|
+
scs_printf("Error during numerical factorization: %d", (int)p->error);
|
132
|
+
scs_free_lin_sys_work(p);
|
133
|
+
return SCS_NULL;
|
134
|
+
}
|
135
|
+
|
136
|
+
if (p->iparm[21] < p->n) {
|
137
|
+
scs_printf("KKT matrix has < n positive eigenvalues. P not PSD.");
|
138
|
+
return SCS_NULL;
|
139
|
+
}
|
140
|
+
|
141
|
+
return p;
|
142
|
+
}
|
143
|
+
|
144
|
+
/* Returns solution to linear system Ax = b with solution stored in b */
|
145
|
+
scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
|
146
|
+
scs_float tol) {
|
147
|
+
/* Back substitution and iterative refinement */
|
148
|
+
p->phase = PARDISO_SOLVE;
|
149
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
150
|
+
&(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
|
151
|
+
&(p->nrhs), p->iparm, &(p->msglvl), b, p->sol, &(p->error));
|
152
|
+
if (p->error != 0) {
|
153
|
+
scs_printf("Error during linear system solution: %d", (int)p->error);
|
154
|
+
}
|
155
|
+
return p->error;
|
156
|
+
}
|
157
|
+
|
158
|
+
/* Update factorization when R changes */
|
159
|
+
void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
|
160
|
+
scs_int i;
|
161
|
+
|
162
|
+
for (i = 0; i < p->n; ++i) {
|
163
|
+
/* top left is R_x + P, bottom right is -R_y */
|
164
|
+
p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
|
165
|
+
}
|
166
|
+
for (i = p->n; i < p->n + p->m; ++i) {
|
167
|
+
/* top left is R_x + P, bottom right is -R_y */
|
168
|
+
p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
|
169
|
+
}
|
170
|
+
|
171
|
+
/* Perform numerical factorization */
|
172
|
+
p->phase = PARDISO_NUMERIC;
|
173
|
+
_PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
|
174
|
+
&(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
|
175
|
+
&(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
|
176
|
+
|
177
|
+
if (p->error != 0) {
|
178
|
+
scs_printf("Error in PARDISO factorization when updating: %d.\n",
|
179
|
+
(int)p->error);
|
180
|
+
scs_free_lin_sys_work(p);
|
181
|
+
}
|
182
|
+
}
|