scs 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/lib/scs/version.rb +1 -1
  6. data/vendor/scs/CITATION.cff +2 -2
  7. data/vendor/scs/CMakeLists.txt +284 -168
  8. data/vendor/scs/Makefile +43 -18
  9. data/vendor/scs/README.md +1 -1
  10. data/vendor/scs/include/glbopts.h +32 -13
  11. data/vendor/scs/include/linsys.h +8 -8
  12. data/vendor/scs/include/scs.h +6 -2
  13. data/vendor/scs/include/scs_types.h +3 -1
  14. data/vendor/scs/include/scs_work.h +9 -8
  15. data/vendor/scs/include/util.h +1 -1
  16. data/vendor/scs/linsys/cpu/direct/private.c +32 -153
  17. data/vendor/scs/linsys/cpu/direct/private.h +6 -6
  18. data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
  19. data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
  20. data/vendor/scs/linsys/csparse.c +140 -12
  21. data/vendor/scs/linsys/csparse.h +10 -17
  22. data/vendor/scs/linsys/gpu/gpu.c +4 -4
  23. data/vendor/scs/linsys/gpu/gpu.h +1 -1
  24. data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
  25. data/vendor/scs/linsys/mkl/direct/private.c +182 -0
  26. data/vendor/scs/linsys/mkl/direct/private.h +38 -0
  27. data/vendor/scs/linsys/scs_matrix.c +11 -5
  28. data/vendor/scs/scs.mk +39 -26
  29. data/vendor/scs/src/cones.c +15 -159
  30. data/vendor/scs/src/exp_cone.c +399 -0
  31. data/vendor/scs/src/normalize.c +4 -2
  32. data/vendor/scs/src/rw.c +93 -38
  33. data/vendor/scs/src/scs.c +83 -52
  34. data/vendor/scs/src/util.c +12 -3
  35. data/vendor/scs/test/minunit.h +2 -1
  36. data/vendor/scs/test/problem_utils.h +2 -1
  37. data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
  38. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
  39. data/vendor/scs/test/problems/max_ent +0 -0
  40. data/vendor/scs/test/problems/max_ent.h +8 -0
  41. data/vendor/scs/test/problems/random_prob.h +2 -43
  42. data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
  43. data/vendor/scs/test/problems/test_exp_cone.h +84 -0
  44. data/vendor/scs/test/problems/test_prob_from_data_file.h +57 -0
  45. data/vendor/scs/test/run_from_file.c +7 -1
  46. data/vendor/scs/test/run_tests.c +22 -9
  47. metadata +10 -3
@@ -9,15 +9,15 @@ extern "C" {
9
9
  #include "external/amd/amd.h"
10
10
  #include "external/qdldl/qdldl.h"
11
11
  #include "glbopts.h"
12
- #include "scs.h"
12
+ #include "linsys.h"
13
13
  #include "scs_matrix.h"
14
14
 
15
15
  struct SCS_LIN_SYS_WORK {
16
- scs_int m, n; /* linear system dimensions */
17
- csc *kkt, *L; /* KKT, and factorization matrix L resp. */
18
- scs_float *Dinv; /* inverse diagonal matrix of factorization */
19
- scs_int *perm; /* permutation of KKT matrix for factorization */
20
- scs_float *bp; /* workspace memory for solves */
16
+ scs_int m, n; /* linear system dimensions */
17
+ ScsMatrix *kkt, *L; /* KKT, and factorization matrix L resp. */
18
+ scs_float *Dinv; /* inverse diagonal matrix of factorization */
19
+ scs_int *perm; /* permutation of KKT matrix for factorization */
20
+ scs_float *bp; /* workspace memory for solves */
21
21
  scs_int *diag_r_idxs;
22
22
  scs_int factorizations;
23
23
  /* ldl factorization workspace */
@@ -1,22 +1,9 @@
1
1
  #include "private.h"
2
- #include "linsys.h"
3
- #include "util.h"
4
- #include <limits.h>
5
2
 
6
- const char *SCS(get_lin_sys_method)() {
7
- return "sparse-indirect";
3
+ const char *scs_get_lin_sys_method() {
4
+ return "sparse-indirect-scs";
8
5
  }
9
6
 
10
- /*
11
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
12
- char *str = (char *)scs_malloc(sizeof(char) * 128);
13
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
14
- (scs_float)p->tot_cg_its / (info->iter + 1));
15
- p->tot_cg_its = 0;
16
- return str;
17
- }
18
- */
19
-
20
7
  /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
21
8
  /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
22
9
  static void set_preconditioner(ScsLinSysWork *p) {
@@ -97,7 +84,7 @@ static void transpose(const ScsMatrix *A, ScsLinSysWork *p) {
97
84
  #endif
98
85
  }
99
86
 
100
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
87
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
101
88
  if (p) {
102
89
  scs_free(p->p);
103
90
  scs_free(p->r);
@@ -162,13 +149,13 @@ static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n,
162
149
  }
163
150
 
164
151
  /* no need to update anything in this case */
165
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
152
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
166
153
  p->diag_r = diag_r; /* this isn't needed but do it to be safe */
167
154
  set_preconditioner(p);
168
155
  }
169
156
 
170
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
171
- const scs_float *diag_r) {
157
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
158
+ const scs_float *diag_r) {
172
159
  ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
173
160
  p->A = A;
174
161
  p->P = P;
@@ -198,7 +185,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
198
185
  p->tot_cg_its = 0;
199
186
  if (!p->p || !p->r || !p->Gp || !p->tmp || !p->At || !p->At->i || !p->At->p ||
200
187
  !p->At->x) {
201
- SCS(free_lin_sys_work)(p);
188
+ scs_free_lin_sys_work(p);
202
189
  return SCS_NULL;
203
190
  }
204
191
  return p;
@@ -288,8 +275,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b,
288
275
  * y = R_y^{-1} (Ax - ry)
289
276
  *
290
277
  */
291
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
292
- scs_float tol) {
278
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
279
+ scs_float tol) {
293
280
  scs_int cg_its, max_iters;
294
281
 
295
282
  if (tol <= 0.) {
@@ -5,11 +5,13 @@
5
5
  extern "C" {
6
6
  #endif
7
7
 
8
+ #include "csparse.h"
8
9
  #include "glbopts.h"
9
10
  #include "linalg.h"
10
- #include "scs.h"
11
+ #include "linsys.h"
11
12
  #include "scs_matrix.h"
12
- #include <math.h>
13
+ #include "util.h" /* timer */
14
+ #include <string.h>
13
15
 
14
16
  struct SCS_LIN_SYS_WORK {
15
17
  scs_int n, m; /* linear system dimensions */
@@ -2,39 +2,37 @@
2
2
 
3
3
  #include "csparse.h"
4
4
 
5
- csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
6
- scs_int triplet) {
7
- csc *A = (csc *)scs_calloc(1, sizeof(csc)); /* allocate the csc struct */
5
+ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
6
+ scs_int triplet) {
7
+ ScsMatrix *A = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
8
8
  if (!A) {
9
9
  return SCS_NULL;
10
10
  } /* out of memory */
11
11
  A->m = m; /* define dimensions and nzmax */
12
12
  A->n = n;
13
- A->nzmax = nzmax = MAX(nzmax, 1);
14
- A->nz = triplet ? 0 : -1; /* allocate triplet or comp.col */
15
13
  A->p = (scs_int *)scs_calloc((triplet ? nzmax : n + 1), sizeof(scs_int));
16
14
  A->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int));
17
15
  A->x = values ? (scs_float *)scs_calloc(nzmax, sizeof(scs_float)) : SCS_NULL;
18
16
  return (!A->p || !A->i || (values && !A->x)) ? SCS(cs_spfree)(A) : A;
19
17
  }
20
18
 
21
- csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok) {
19
+ ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok) {
22
20
  scs_free(w); /* free workspace */
23
21
  scs_free(x);
24
22
  return ok ? C : SCS(cs_spfree)(C); /* return result if OK, else free it */
25
23
  }
26
24
 
27
25
  /* C = compressed-column form of a triplet matrix T */
28
- csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping) {
29
- scs_int m, n, nz, p, k, *Cp, *Ci, *w, *Ti, *Tj;
26
+ ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
27
+ scs_int *idx_mapping) {
28
+ scs_int m, n, p, k, *Cp, *Ci, *w, *Ti, *Tj;
30
29
  scs_float *Cx, *Tx;
31
- csc *C;
30
+ ScsMatrix *C;
32
31
  m = T->m;
33
32
  n = T->n;
34
33
  Ti = T->i;
35
34
  Tj = T->p;
36
35
  Tx = T->x;
37
- nz = T->nz;
38
36
  C = SCS(cs_spalloc)(m, n, nz, Tx != SCS_NULL, 0); /* allocate result */
39
37
  w = (scs_int *)scs_calloc(n, sizeof(scs_int)); /* get workspace */
40
38
  if (!C || !w) {
@@ -75,7 +73,7 @@ scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n) {
75
73
  return nz2; /* return sum (c [0..n-1]) */
76
74
  }
77
75
 
78
- csc *SCS(cs_spfree)(csc *A) {
76
+ ScsMatrix *SCS(cs_spfree)(ScsMatrix *A) {
79
77
  if (!A) {
80
78
  return SCS_NULL;
81
79
  } /* do nothing if A already SCS_NULL */
@@ -83,5 +81,135 @@ csc *SCS(cs_spfree)(csc *A) {
83
81
  scs_free(A->i);
84
82
  scs_free(A->x);
85
83
  scs_free(A);
86
- return (csc *)SCS_NULL; /* free the csc struct and return SCS_NULL */
84
+ /* free the ScsMatrix struct and return SCS_NULL */
85
+ return (ScsMatrix *)SCS_NULL;
86
+ }
87
+
88
+ /* Build the KKT matrix */
89
+ ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
90
+ scs_float *diag_p, const scs_float *diag_r,
91
+ scs_int *diag_r_idxs, scs_int upper) {
92
+ /*
93
+ * Forms column compressed KKT matrix assumes column compressed A,P matrices.
94
+ * Only upper OR lower triangular part is stuffed, depending on `upper` flag.
95
+ *
96
+ * Forms upper/lower triangular part of [(R_x + P) A'; A -R_y]
97
+ * Shapes: P : n x n, A: m x n.
98
+ *
99
+ * Output: `diag_p` will contain values of P diagonal upon completion,
100
+ * and `diag_r_idxs` will contain the indices corresponding to the entries
101
+ * in the returned matrix corresponding to the entries of R.
102
+ *
103
+ */
104
+ scs_int h, i, j, count;
105
+ ScsMatrix *Kcsc, *K;
106
+ scs_int n = A->n;
107
+ scs_int m = A->m;
108
+ scs_int Anz = A->p[n];
109
+ scs_int Knzmax;
110
+ scs_int *idx_mapping;
111
+ if (P) {
112
+ /* Upper bound P + I triangular component NNZs as Pnz + n */
113
+ Knzmax = n + m + Anz + P->p[n];
114
+ } else {
115
+ Knzmax = n + m + Anz;
116
+ }
117
+ K = SCS(cs_spalloc)(m + n, m + n, Knzmax, 1, 1);
118
+
119
+ #if VERBOSITY > 0
120
+ scs_printf("forming kkt\n");
121
+ #endif
122
+ /* Here we generate a triplet matrix and then compress to CSC */
123
+ if (!K) {
124
+ return SCS_NULL;
125
+ }
126
+
127
+ count = 0; /* element counter */
128
+ if (P) {
129
+ /* R_x + P in top left */
130
+ for (j = 0; j < n; j++) { /* cols */
131
+ diag_p[j] = 0.;
132
+ /* empty column, add diagonal */
133
+ if (P->p[j] == P->p[j + 1]) {
134
+ K->i[count] = j;
135
+ K->p[count] = j;
136
+ K->x[count] = diag_r[j];
137
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
138
+ count++;
139
+ }
140
+ for (h = P->p[j]; h < P->p[j + 1]; h++) {
141
+ i = P->i[h]; /* row */
142
+ if (i > j) { /* only upper triangular needed */
143
+ break;
144
+ }
145
+ if (upper) {
146
+ K->i[count] = i;
147
+ K->p[count] = j;
148
+ } else { /* lower triangular */
149
+ /* P is passed in upper triangular, need to flip that here */
150
+ K->i[count] = j; /* col -> row */
151
+ K->p[count] = i; /* row -> col */
152
+ }
153
+ K->x[count] = P->x[h];
154
+ if (i == j) {
155
+ /* P has diagonal element */
156
+ diag_p[j] = P->x[h];
157
+ K->x[count] += diag_r[j];
158
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
159
+ }
160
+ count++;
161
+ /* reached the end without adding diagonal, do it now */
162
+ if ((i < j) && (h + 1 == P->p[j + 1] || P->i[h + 1] > j)) {
163
+ K->i[count] = j;
164
+ K->p[count] = j;
165
+ K->x[count] = diag_r[j];
166
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
167
+ count++;
168
+ }
169
+ }
170
+ }
171
+ } else {
172
+ /* R_x in top left */
173
+ for (j = 0; j < n; j++) {
174
+ diag_p[j] = 0.;
175
+ K->i[count] = j;
176
+ K->p[count] = j;
177
+ K->x[count] = diag_r[j];
178
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
179
+ count++;
180
+ }
181
+ }
182
+
183
+ /* A in bottom left or A^T top right */
184
+ for (j = 0; j < n; j++) { /* column */
185
+ for (h = A->p[j]; h < A->p[j + 1]; h++) {
186
+ if (upper) {
187
+ K->p[count] = A->i[h] + n; /* column */
188
+ K->i[count] = j; /*row */
189
+ } else { /* lower triangular */
190
+ K->p[count] = j; /* column */
191
+ K->i[count] = A->i[h] + n; /* row */
192
+ }
193
+ K->x[count] = A->x[h];
194
+ count++;
195
+ }
196
+ }
197
+
198
+ /* -R_y at bottom right */
199
+ for (j = 0; j < m; j++) {
200
+ K->i[count] = j + n;
201
+ K->p[count] = j + n;
202
+ K->x[count] = -diag_r[j + n];
203
+ diag_r_idxs[j + n] = count; /* store the indices where diag_r occurs */
204
+ count++;
205
+ }
206
+
207
+ idx_mapping = (scs_int *)scs_calloc(count, sizeof(scs_int));
208
+ Kcsc = SCS(cs_compress)(K, count, idx_mapping);
209
+ for (i = 0; i < m + n; i++) {
210
+ diag_r_idxs[i] = idx_mapping[diag_r_idxs[i]];
211
+ }
212
+ SCS(cs_spfree)(K);
213
+ scs_free(idx_mapping);
214
+ return Kcsc;
87
215
  }
@@ -10,24 +10,17 @@ extern "C" {
10
10
  #include "glbopts.h"
11
11
  #include "scs.h"
12
12
 
13
- /* matrix in compressed-column or triplet form */
14
- typedef struct SPARSE_MATRIX {
15
- scs_int nzmax; /* maximum number of entries */
16
- scs_int m; /* number of rows */
17
- scs_int n; /* number of columns */
18
- scs_int *p; /* column pointers (size n+1) or col indices (size nzmax) */
19
- scs_int *i; /* row indices, size nzmax */
20
- scs_float *x; /* numerical values, size nzmax */
21
- scs_int nz; /* # of entries in triplet matrix, -1 for compressed-col */
22
- } csc;
23
-
24
- csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
25
- scs_int triplet);
26
- csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok);
27
- csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping);
13
+ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
14
+ scs_int triplet);
15
+ ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok);
16
+ ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
17
+ scs_int *idx_mapping);
18
+ ScsMatrix *SCS(cs_spfree)(ScsMatrix *A);
28
19
  scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n);
29
- csc *SCS(cs_spfree)(csc *A);
30
-
20
+ /* Forms KKT matrix */
21
+ ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
22
+ scs_float *diag_p, const scs_float *diag_r,
23
+ scs_int *diag_r_idxs, scs_int upper);
31
24
  #ifdef __cplusplus
32
25
  }
33
26
  #endif
@@ -19,13 +19,13 @@ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
19
19
  if (*buffer != SCS_NULL) {
20
20
  cudaFree(*buffer);
21
21
  }
22
- cudaMalloc(buffer, *buffer_size);
22
+ cudaMalloc(buffer, new_buffer_size);
23
23
  *buffer_size = new_buffer_size;
24
24
  }
25
25
 
26
26
  CUSPARSE_GEN(SpMV)
27
27
  (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28
- &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
28
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
29
29
  }
30
30
 
31
31
  /* this is slow, use trans routine if possible */
@@ -48,13 +48,13 @@ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
48
48
  if (*buffer != SCS_NULL) {
49
49
  cudaFree(*buffer);
50
50
  }
51
- cudaMalloc(buffer, *buffer_size);
51
+ cudaMalloc(buffer, new_buffer_size);
52
52
  *buffer_size = new_buffer_size;
53
53
  }
54
54
 
55
55
  CUSPARSE_GEN(SpMV)
56
56
  (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
57
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
58
58
  }
59
59
 
60
60
  /* This assumes that P has been made full (ie not triangular) and uses the
@@ -74,7 +74,7 @@ extern "C" {
74
74
  #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
75
75
  #endif
76
76
 
77
- #define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
77
+ #define SCS_CSRMV_ALG CUSPARSE_SPMV_CSR_ALG1
78
78
  #define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
79
79
 
80
80
  /*
@@ -21,20 +21,10 @@ static scs_float cg_gpu_norm(cublasHandle_t cublas_handle, scs_float *r,
21
21
  return nrm;
22
22
  }
23
23
 
24
- const char *SCS(get_lin_sys_method)() {
24
+ const char *scs_get_lin_sys_method() {
25
25
  return "sparse-indirect GPU";
26
26
  }
27
27
 
28
- /*
29
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
30
- char *str = (char *)scs_malloc(sizeof(char) * 128);
31
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
32
- (scs_float)p->tot_cg_its / (info->iter + 1));
33
- p->tot_cg_its = 0;
34
- return str;
35
- }
36
- */
37
-
38
28
  /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
39
29
  /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
40
30
  static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
@@ -76,7 +66,7 @@ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
76
66
  }
77
67
 
78
68
  /* no need to update anything in this case */
79
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
69
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
80
70
  scs_int i;
81
71
 
82
72
  /* R_x to gpu */
@@ -93,7 +83,7 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
93
83
  set_preconditioner(p, diag_r);
94
84
  }
95
85
 
96
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
86
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
97
87
  if (p) {
98
88
  scs_free(p->M);
99
89
  scs_free(p->inv_r_y);
@@ -182,13 +172,13 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
182
172
  }
183
173
 
184
174
  /* P comes in upper triangular, expand to full
185
- * First compute triplet version of full matrix, then compress to csc
175
+ * First compute triplet version of full matrix, then compress to CSC
186
176
  * */
187
- static csc *fill_p_matrix(const ScsMatrix *P) {
177
+ static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
188
178
  scs_int i, j, k, kk;
189
179
  scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
190
- csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
191
- csc *P_full;
180
+ ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
181
+ ScsMatrix *P_full;
192
182
  kk = 0;
193
183
  for (j = 0; j < P->n; j++) { /* cols */
194
184
  for (k = P->p[j]; k < P->p[j + 1]; k++) {
@@ -209,16 +199,15 @@ static csc *fill_p_matrix(const ScsMatrix *P) {
209
199
  kk++;
210
200
  }
211
201
  }
212
- P_tmp->nz = kk; /* set number of nonzeros */
213
- P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
202
+ P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
214
203
  SCS(cs_spfree)(P_tmp);
215
204
  return P_full;
216
205
  }
217
206
 
218
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
219
- const scs_float *diag_r) {
207
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
208
+ const scs_float *diag_r) {
220
209
  cudaError_t err;
221
- csc *P_full;
210
+ ScsMatrix *P_full;
222
211
  ScsLinSysWork *p = SCS_NULL;
223
212
  ScsGpuMatrix *Ag = SCS_NULL;
224
213
  ScsGpuMatrix *Pg = SCS_NULL;
@@ -324,7 +313,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
324
313
  cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
325
314
 
326
315
  /* Form preconditioner and copy R_x, 1/R_y to gpu */
327
- SCS(update_lin_sys_diag_r)(p, diag_r);
316
+ scs_update_lin_sys_diag_r(p, diag_r);
328
317
 
329
318
  #if GPU_TRANSPOSE_MAT > 0
330
319
  p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
@@ -367,7 +356,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
367
356
  if (err != cudaSuccess) {
368
357
  printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
369
358
  cudaGetErrorString(err));
370
- SCS(free_lin_sys_work)(p);
359
+ scs_free_lin_sys_work(p);
371
360
  return SCS_NULL;
372
361
  }
373
362
  return p;
@@ -466,8 +455,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *bg,
466
455
  * y = R_y^{-1} (Ax - ry)
467
456
  *
468
457
  */
469
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
470
- scs_float tol) {
458
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
459
+ scs_float tol) {
471
460
  scs_int cg_its, max_iters;
472
461
  scs_float neg_onef = -1.0;
473
462
 
@@ -0,0 +1,182 @@
1
+ #include "private.h"
2
+
3
+ #define PARDISO_SYMBOLIC (11)
4
+ #define PARDISO_NUMERIC (22)
5
+ #define PARDISO_SOLVE (33)
6
+ #define PARDISO_CLEANUP (-1)
7
+
8
+ /* TODO: is it necessary to use pardiso_64 and MKL_Set_Interface_Layer ? */
9
+ /*
10
+ #define MKL_INTERFACE_LP64 0
11
+ #define MKL_INTERFACE_ILP64 1
12
+ */
13
+ #ifdef DLONG
14
+ #define _PARDISO pardiso_64
15
+ #else
16
+ #define _PARDISO pardiso
17
+ #endif
18
+
19
+ /* Prototypes for Pardiso functions */
20
+ void _PARDISO(void **pt, const scs_int *maxfct, const scs_int *mnum,
21
+ const scs_int *mtype, const scs_int *phase, const scs_int *n,
22
+ const scs_float *a, const scs_int *ia, const scs_int *ja,
23
+ scs_int *perm, const scs_int *nrhs, scs_int *iparm,
24
+ const scs_int *msglvl, scs_float *b, scs_float *x,
25
+ scs_int *error);
26
+ /* scs_int MKL_Set_Interface_Layer(scs_int); */
27
+
28
+ const char *scs_get_lin_sys_method() {
29
+ return "sparse-direct-mkl-pardiso";
30
+ }
31
+
32
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
33
+ if (p) {
34
+ p->phase = PARDISO_CLEANUP;
35
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
36
+ &(p->n_plus_m), SCS_NULL, p->kkt->p, p->kkt->i, SCS_NULL,
37
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL,
38
+ &(p->error));
39
+ if (p->error != 0) {
40
+ scs_printf("Error during MKL Pardiso cleanup: %d", (int)p->error);
41
+ }
42
+ if (p->kkt)
43
+ SCS(cs_spfree)(p->kkt);
44
+ if (p->sol)
45
+ scs_free(p->sol);
46
+ if (p->diag_r_idxs)
47
+ scs_free(p->diag_r_idxs);
48
+ if (p->diag_p)
49
+ scs_free(p->diag_p);
50
+ scs_free(p);
51
+ }
52
+ }
53
+
54
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
55
+ const scs_float *diag_r) {
56
+ scs_int i;
57
+ ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
58
+
59
+ /* TODO: is this necessary with pardiso_64? */
60
+ /* Set MKL interface layer */
61
+ /*
62
+ #ifdef DLONG
63
+ MKL_Set_Interface_Layer(MKL_INTERFACE_ILP64);
64
+ #else
65
+ MKL_Set_Interface_Layer(MKL_INTERFACE_LP64);
66
+ #endif
67
+ */
68
+ p->n = A->n;
69
+ p->m = A->m;
70
+ p->n_plus_m = p->n + p->m;
71
+
72
+ /* Even though we overwrite rhs with sol pardiso requires the memory */
73
+ p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
74
+ p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
75
+ p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
76
+
77
+ /* MKL pardiso requires upper triangular CSR matrices. The KKT matrix stuffed
78
+ * as CSC lower triangular is equivalent. Pass upper=0. */
79
+ p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 0);
80
+ if (!(p->kkt)) {
81
+ scs_printf("Error in forming KKT matrix");
82
+ scs_free_lin_sys_work(p);
83
+ return SCS_NULL;
84
+ }
85
+
86
+ for (i = 0; i < 64; i++) {
87
+ p->iparm[i] = 0; /* Setup Pardiso control parameters */
88
+ p->pt[i] = 0; /* Initialize the internal solver memory pointer */
89
+ }
90
+
91
+ /* Set Pardiso variables */
92
+ p->mtype = -2; /* Real symmetric indefinite matrix */
93
+ p->nrhs = 1; /* Number of right hand sides */
94
+ p->maxfct = 1; /* Maximum number of numerical factorizations */
95
+ p->mnum = 1; /* Which factorization to use */
96
+ p->error = 0; /* Initialize error flag */
97
+ p->msglvl = VERBOSITY; /* Printing information */
98
+
99
+ /* For all iparm vars see MKL documentation */
100
+ p->iparm[0] = 1; /* Parsido must inspect iparm */
101
+ p->iparm[1] = 3; /* Fill-in reordering from OpenMP */
102
+ p->iparm[5] = 1; /* Write solution into b */
103
+ p->iparm[7] = 0; /* Automatic iterative refinement calculation */
104
+ p->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */
105
+ p->iparm[34] = 1; /* Use C-style indexing for indices */
106
+ /* p->iparm[36] = -80; */ /* Form block sparse matrices */
107
+
108
+ #ifdef SFLOAT
109
+ p->iparm[27] = 1; /* 1 is single precision, 0 is double */
110
+ #endif
111
+
112
+ /* Permutation and symbolic factorization */
113
+ scs_int phase = PARDISO_SYMBOLIC;
114
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &phase, &(p->n_plus_m),
115
+ p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL, &(p->nrhs), p->iparm,
116
+ &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
117
+
118
+ if (p->error != 0) {
119
+ scs_printf("Error during symbolic factorization: %d", (int)p->error);
120
+ scs_free_lin_sys_work(p);
121
+ return SCS_NULL;
122
+ }
123
+
124
+ /* Numerical factorization */
125
+ p->phase = PARDISO_NUMERIC;
126
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
127
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
128
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
129
+
130
+ if (p->error) {
131
+ scs_printf("Error during numerical factorization: %d", (int)p->error);
132
+ scs_free_lin_sys_work(p);
133
+ return SCS_NULL;
134
+ }
135
+
136
+ if (p->iparm[21] < p->n) {
137
+ scs_printf("KKT matrix has < n positive eigenvalues. P not PSD.");
138
+ return SCS_NULL;
139
+ }
140
+
141
+ return p;
142
+ }
143
+
144
+ /* Returns solution to linear system Ax = b with solution stored in b */
145
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
146
+ scs_float tol) {
147
+ /* Back substitution and iterative refinement */
148
+ p->phase = PARDISO_SOLVE;
149
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
150
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
151
+ &(p->nrhs), p->iparm, &(p->msglvl), b, p->sol, &(p->error));
152
+ if (p->error != 0) {
153
+ scs_printf("Error during linear system solution: %d", (int)p->error);
154
+ }
155
+ return p->error;
156
+ }
157
+
158
+ /* Update factorization when R changes */
159
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
160
+ scs_int i;
161
+
162
+ for (i = 0; i < p->n; ++i) {
163
+ /* top left is R_x + P, bottom right is -R_y */
164
+ p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
165
+ }
166
+ for (i = p->n; i < p->n + p->m; ++i) {
167
+ /* top left is R_x + P, bottom right is -R_y */
168
+ p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
169
+ }
170
+
171
+ /* Perform numerical factorization */
172
+ p->phase = PARDISO_NUMERIC;
173
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
174
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
175
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
176
+
177
+ if (p->error != 0) {
178
+ scs_printf("Error in PARDISO factorization when updating: %d.\n",
179
+ (int)p->error);
180
+ scs_free_lin_sys_work(p);
181
+ }
182
+ }