scs 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/lib/scs/ffi.rb +2 -2
  6. data/lib/scs/version.rb +1 -1
  7. data/lib/scs.rb +3 -3
  8. data/vendor/scs/CITATION.cff +2 -2
  9. data/vendor/scs/CMakeLists.txt +305 -171
  10. data/vendor/scs/Makefile +44 -19
  11. data/vendor/scs/README.md +1 -1
  12. data/vendor/scs/include/glbopts.h +34 -14
  13. data/vendor/scs/include/linsys.h +8 -8
  14. data/vendor/scs/include/scs.h +6 -2
  15. data/vendor/scs/include/scs_blas.h +4 -0
  16. data/vendor/scs/include/scs_types.h +3 -1
  17. data/vendor/scs/include/scs_work.h +9 -8
  18. data/vendor/scs/include/util.h +1 -1
  19. data/vendor/scs/linsys/cpu/direct/private.c +32 -153
  20. data/vendor/scs/linsys/cpu/direct/private.h +6 -6
  21. data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
  22. data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
  23. data/vendor/scs/linsys/csparse.c +140 -12
  24. data/vendor/scs/linsys/csparse.h +10 -17
  25. data/vendor/scs/linsys/gpu/gpu.c +4 -4
  26. data/vendor/scs/linsys/gpu/gpu.h +1 -1
  27. data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
  28. data/vendor/scs/linsys/mkl/direct/private.c +182 -0
  29. data/vendor/scs/linsys/mkl/direct/private.h +38 -0
  30. data/vendor/scs/linsys/scs_matrix.c +11 -5
  31. data/vendor/scs/scs.mk +40 -27
  32. data/vendor/scs/src/cones.c +17 -161
  33. data/vendor/scs/src/exp_cone.c +399 -0
  34. data/vendor/scs/src/linalg.c +17 -3
  35. data/vendor/scs/src/normalize.c +4 -2
  36. data/vendor/scs/src/rw.c +107 -38
  37. data/vendor/scs/src/scs.c +103 -69
  38. data/vendor/scs/src/util.c +12 -3
  39. data/vendor/scs/test/minunit.h +2 -1
  40. data/vendor/scs/test/problem_utils.h +2 -1
  41. data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
  42. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
  43. data/vendor/scs/test/problems/max_ent +0 -0
  44. data/vendor/scs/test/problems/max_ent.h +8 -0
  45. data/vendor/scs/test/problems/mpc_bug.h +19 -0
  46. data/vendor/scs/test/problems/mpc_bug1 +0 -0
  47. data/vendor/scs/test/problems/mpc_bug2 +0 -0
  48. data/vendor/scs/test/problems/mpc_bug3 +0 -0
  49. data/vendor/scs/test/problems/random_prob.h +2 -43
  50. data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
  51. data/vendor/scs/test/problems/test_exp_cone.h +84 -0
  52. data/vendor/scs/test/problems/test_prob_from_data_file.h +73 -0
  53. data/vendor/scs/test/run_from_file.c +7 -1
  54. data/vendor/scs/test/run_tests.c +25 -9
  55. metadata +14 -3
@@ -1,20 +1,10 @@
1
1
  #include "private.h"
2
- #include "linsys.h"
3
2
 
4
- const char *SCS(get_lin_sys_method)() {
5
- return "sparse-direct";
3
+ const char *scs_get_lin_sys_method(void) {
4
+ return "sparse-direct-amd-qdldl";
6
5
  }
7
6
 
8
- /*
9
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
10
- char *str = (char *)scs_malloc(sizeof(char) * 128);
11
- scs_int n = p->L->n;
12
- sprintf(str, "lin-sys: nnz(L): %li\n", (long)(p->L->p[n] + n));
13
- return str;
14
- }
15
- */
16
-
17
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
7
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
18
8
  if (p) {
19
9
  SCS(cs_spfree)(p->L);
20
10
  SCS(cs_spfree)(p->kkt);
@@ -33,155 +23,43 @@ void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
33
23
  }
34
24
  }
35
25
 
36
- static csc *form_kkt(const ScsMatrix *A, const ScsMatrix *P, scs_float *diag_p,
37
- const scs_float *diag_r, scs_int *diag_r_idxs) {
38
- /* ONLY UPPER TRIANGULAR PART IS STUFFED
39
- * forms column compressed kkt matrix
40
- * assumes column compressed form A matrix
41
- *
42
- * forms upper triangular part of [(I + P) A'; A -I]
43
- * P : n x n, A: m x n.
44
- */
45
- scs_int h, i, j, count;
46
- csc *Kcsc, *K;
47
- scs_int n = A->n;
48
- scs_int m = A->m;
49
- scs_int Anz = A->p[n];
50
- scs_int Knzmax;
51
- scs_int *idx_mapping;
52
- if (P) {
53
- /* Upper bound P + I upper triangular component as Pnz + n */
54
- Knzmax = n + m + Anz + P->p[n];
55
- } else {
56
- Knzmax = n + m + Anz;
57
- }
58
- K = SCS(cs_spalloc)(m + n, m + n, Knzmax, 1, 1);
59
-
60
- #if VERBOSITY > 0
61
- scs_printf("forming kkt\n");
62
- #endif
63
- /* Here we generate a triplet matrix and then compress to CSC */
64
- if (!K) {
65
- return SCS_NULL;
66
- }
67
-
68
- count = 0; /* element counter */
69
- if (P) {
70
- /* R_x + P in top left */
71
- for (j = 0; j < n; j++) { /* cols */
72
- diag_p[j] = 0.;
73
- /* empty column, add diagonal */
74
- if (P->p[j] == P->p[j + 1]) {
75
- K->i[count] = j;
76
- K->p[count] = j;
77
- K->x[count] = diag_r[j];
78
- diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
79
- count++;
80
- }
81
- for (h = P->p[j]; h < P->p[j + 1]; h++) {
82
- i = P->i[h]; /* row */
83
- if (i > j) { /* only upper triangular needed */
84
- break;
85
- }
86
- K->i[count] = i;
87
- K->p[count] = j;
88
- K->x[count] = P->x[h];
89
- if (i == j) {
90
- /* P has diagonal element */
91
- diag_p[j] = P->x[h];
92
- K->x[count] += diag_r[j];
93
- diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
94
- }
95
- count++;
96
- /* reached the end without adding diagonal, do it now */
97
- if ((i < j) && (h + 1 == P->p[j + 1] || P->i[h + 1] > j)) {
98
- K->i[count] = j;
99
- K->p[count] = j;
100
- K->x[count] = diag_r[j];
101
- diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
102
- count++;
103
- }
104
- }
105
- }
106
- } else {
107
- /* R_x in top left */
108
- for (j = 0; j < n; j++) {
109
- diag_p[j] = 0.;
110
- K->i[count] = j;
111
- K->p[count] = j;
112
- K->x[count] = diag_r[j];
113
- diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
114
- count++;
115
- }
116
- }
117
-
118
- /* A^T at top right */
119
- for (j = 0; j < n; j++) {
120
- for (h = A->p[j]; h < A->p[j + 1]; h++) {
121
- K->p[count] = A->i[h] + n;
122
- K->i[count] = j;
123
- K->x[count] = A->x[h];
124
- count++;
125
- }
126
- }
127
-
128
- /* -R_y at bottom right */
129
- for (j = 0; j < m; j++) {
130
- K->i[count] = j + n;
131
- K->p[count] = j + n;
132
- K->x[count] = -diag_r[j + n];
133
- diag_r_idxs[j + n] = count; /* store the indices where diag_r occurs */
134
- count++;
135
- }
136
-
137
- K->nz = count;
138
- idx_mapping = (scs_int *)scs_calloc(K->nz, sizeof(scs_int));
139
- Kcsc = SCS(cs_compress)(K, idx_mapping);
140
- for (i = 0; i < m + n; i++) {
141
- diag_r_idxs[i] = idx_mapping[diag_r_idxs[i]];
142
- }
143
- SCS(cs_spfree)(K);
144
- scs_free(idx_mapping);
145
- return Kcsc;
146
- }
147
-
148
- static scs_int _ldl_init(csc *A, scs_int *P, scs_float **info) {
26
+ static scs_int _ldl_init(ScsMatrix *A, scs_int *P, scs_float **info) {
149
27
  *info = (scs_float *)scs_calloc(AMD_INFO, sizeof(scs_float));
150
28
  return amd_order(A->n, A->p, A->i, P, (scs_float *)SCS_NULL, *info);
151
29
  }
152
30
 
153
31
  /* call only once */
154
32
  static scs_int ldl_prepare(ScsLinSysWork *p) {
155
- csc *kkt = p->kkt, *L = p->L;
156
- scs_int n = kkt->n;
33
+ ScsMatrix *kkt = p->kkt, *L = p->L;
34
+ scs_int nzmax, n = kkt->n;
157
35
  p->etree = (scs_int *)scs_calloc(n, sizeof(scs_int));
158
36
  p->Lnz = (scs_int *)scs_calloc(n, sizeof(scs_int));
159
37
  p->iwork = (scs_int *)scs_calloc(3 * n, sizeof(scs_int));
160
38
  L->p = (scs_int *)scs_calloc((1 + n), sizeof(scs_int));
161
- L->nzmax = QDLDL_etree(n, kkt->p, kkt->i, p->iwork, p->Lnz, p->etree);
162
- if (L->nzmax < 0) {
39
+ nzmax = QDLDL_etree(n, kkt->p, kkt->i, p->iwork, p->Lnz, p->etree);
40
+ if (nzmax < 0) {
163
41
  scs_printf("Error in elimination tree calculation.\n");
164
- if (L->nzmax == -1) {
42
+ if (nzmax == -1) {
165
43
  scs_printf("Matrix is not perfectly upper triangular.\n");
166
- } else if (L->nzmax == -2) {
44
+ } else if (nzmax == -2) {
167
45
  scs_printf("Integer overflow in L nonzero count.\n");
168
46
  }
169
- return L->nzmax;
47
+ return nzmax;
170
48
  }
171
49
 
172
- L->x = (scs_float *)scs_calloc(L->nzmax, sizeof(scs_float));
173
- L->i = (scs_int *)scs_calloc(L->nzmax, sizeof(scs_int));
50
+ L->x = (scs_float *)scs_calloc(nzmax, sizeof(scs_float));
51
+ L->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int));
174
52
  p->Dinv = (scs_float *)scs_calloc(n, sizeof(scs_float));
175
53
  p->D = (scs_float *)scs_calloc(n, sizeof(scs_float));
176
54
  p->bwork = (scs_int *)scs_calloc(n, sizeof(scs_int));
177
55
  p->fwork = (scs_float *)scs_calloc(n, sizeof(scs_float));
178
- return L->nzmax;
56
+ return nzmax;
179
57
  }
180
58
 
181
59
  /* can call many times */
182
60
  static scs_int ldl_factor(ScsLinSysWork *p, scs_int num_vars) {
183
61
  scs_int factor_status;
184
- csc *kkt = p->kkt, *L = p->L;
62
+ ScsMatrix *kkt = p->kkt, *L = p->L;
185
63
  #if VERBOSITY > 0
186
64
  scs_printf("numeric factorization\n");
187
65
  #endif
@@ -217,7 +95,7 @@ static void _ldl_permt(scs_int n, scs_float *x, scs_float *b, scs_int *P) {
217
95
  x[P[j]] = b[j];
218
96
  }
219
97
 
220
- static void _ldl_solve(scs_float *b, csc *L, scs_float *Dinv, scs_int *P,
98
+ static void _ldl_solve(scs_float *b, ScsMatrix *L, scs_float *Dinv, scs_int *P,
221
99
  scs_float *bp) {
222
100
  /* solves PLDL'P' x = b for x */
223
101
  scs_int n = L->n;
@@ -240,11 +118,11 @@ static scs_int *cs_pinv(scs_int const *p, scs_int n) {
240
118
  return pinv; /* return result */
241
119
  }
242
120
 
243
- static csc *cs_symperm(const csc *A, const scs_int *pinv, scs_int *idx_mapping,
244
- scs_int values) {
121
+ static ScsMatrix *cs_symperm(const ScsMatrix *A, const scs_int *pinv,
122
+ scs_int *idx_mapping, scs_int values) {
245
123
  scs_int i, j, p, q, i2, j2, n, *Ap, *Ai, *Cp, *Ci, *w;
246
124
  scs_float *Cx, *Ax;
247
- csc *C;
125
+ ScsMatrix *C;
248
126
  n = A->n;
249
127
  Ap = A->p;
250
128
  Ai = A->i;
@@ -290,14 +168,16 @@ static csc *cs_symperm(const csc *A, const scs_int *pinv, scs_int *idx_mapping,
290
168
  1); /* success; free workspace, return C */
291
169
  }
292
170
 
293
- static csc *permute_kkt(const ScsMatrix *A, const ScsMatrix *P,
294
- ScsLinSysWork *p, const scs_float *diag_r) {
171
+ static ScsMatrix *permute_kkt(const ScsMatrix *A, const ScsMatrix *P,
172
+ ScsLinSysWork *p, const scs_float *diag_r) {
295
173
  scs_float *info;
296
- scs_int *Pinv, amd_status, *idx_mapping, i;
297
- csc *kkt_perm, *kkt = form_kkt(A, P, p->diag_p, diag_r, p->diag_r_idxs);
174
+ scs_int *Pinv, amd_status, *idx_mapping, i, kkt_nnz;
175
+ ScsMatrix *kkt_perm;
176
+ ScsMatrix *kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 1);
298
177
  if (!kkt) {
299
178
  return SCS_NULL;
300
179
  }
180
+ kkt_nnz = kkt->p[kkt->n];
301
181
  amd_status = _ldl_init(kkt, p->perm, &info);
302
182
  if (amd_status < 0) {
303
183
  scs_printf("AMD permutatation error.\n");
@@ -308,7 +188,7 @@ static csc *permute_kkt(const ScsMatrix *A, const ScsMatrix *P,
308
188
  amd_info(info);
309
189
  #endif
310
190
  Pinv = cs_pinv(p->perm, A->n + A->m);
311
- idx_mapping = (scs_int *)scs_calloc(kkt->nzmax, sizeof(scs_int));
191
+ idx_mapping = (scs_int *)scs_calloc(kkt_nnz, sizeof(scs_int));
312
192
  kkt_perm = cs_symperm(kkt, Pinv, idx_mapping, 1);
313
193
  for (i = 0; i < A->n + A->m; i++) {
314
194
  p->diag_r_idxs[i] = idx_mapping[p->diag_r_idxs[i]];
@@ -320,7 +200,7 @@ static csc *permute_kkt(const ScsMatrix *A, const ScsMatrix *P,
320
200
  return kkt_perm;
321
201
  }
322
202
 
323
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
203
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
324
204
  scs_int i, ldl_status;
325
205
  for (i = 0; i < p->n; ++i) {
326
206
  /* top left is R_x + P, bottom right is -R_y */
@@ -339,21 +219,20 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
339
219
  }
340
220
  }
341
221
 
342
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
343
- const scs_float *diag_r) {
222
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
223
+ const scs_float *diag_r) {
344
224
  ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
345
225
  scs_int n_plus_m = A->n + A->m, ldl_status, ldl_prepare_status;
346
226
  p->m = A->m;
347
227
  p->n = A->n;
348
228
  p->diag_p = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
349
229
  p->perm = (scs_int *)scs_calloc(sizeof(scs_int), n_plus_m);
350
- p->L = (csc *)scs_calloc(1, sizeof(csc));
230
+ p->L = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
351
231
  p->bp = (scs_float *)scs_calloc(n_plus_m, sizeof(scs_float));
352
232
  p->diag_r_idxs = (scs_int *)scs_calloc(n_plus_m, sizeof(scs_int));
353
233
  p->factorizations = 0;
354
234
  p->L->m = n_plus_m;
355
235
  p->L->n = n_plus_m;
356
- p->L->nz = -1;
357
236
  p->kkt = permute_kkt(A, P, p, diag_r);
358
237
  ldl_prepare_status = ldl_prepare(p);
359
238
  ldl_status = ldl_factor(p, A->n);
@@ -366,8 +245,8 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
366
245
  return p;
367
246
  }
368
247
 
369
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
370
- scs_float tol) {
248
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
249
+ scs_float tol) {
371
250
  /* returns solution to linear system */
372
251
  /* Ax = b with solution stored in b */
373
252
  _ldl_solve(b, p->L, p->Dinv, p->perm, p->bp);
@@ -9,15 +9,15 @@ extern "C" {
9
9
  #include "external/amd/amd.h"
10
10
  #include "external/qdldl/qdldl.h"
11
11
  #include "glbopts.h"
12
- #include "scs.h"
12
+ #include "linsys.h"
13
13
  #include "scs_matrix.h"
14
14
 
15
15
  struct SCS_LIN_SYS_WORK {
16
- scs_int m, n; /* linear system dimensions */
17
- csc *kkt, *L; /* KKT, and factorization matrix L resp. */
18
- scs_float *Dinv; /* inverse diagonal matrix of factorization */
19
- scs_int *perm; /* permutation of KKT matrix for factorization */
20
- scs_float *bp; /* workspace memory for solves */
16
+ scs_int m, n; /* linear system dimensions */
17
+ ScsMatrix *kkt, *L; /* KKT, and factorization matrix L resp. */
18
+ scs_float *Dinv; /* inverse diagonal matrix of factorization */
19
+ scs_int *perm; /* permutation of KKT matrix for factorization */
20
+ scs_float *bp; /* workspace memory for solves */
21
21
  scs_int *diag_r_idxs;
22
22
  scs_int factorizations;
23
23
  /* ldl factorization workspace */
@@ -1,22 +1,9 @@
1
1
  #include "private.h"
2
- #include "linsys.h"
3
- #include "util.h"
4
- #include <limits.h>
5
2
 
6
- const char *SCS(get_lin_sys_method)() {
7
- return "sparse-indirect";
3
+ const char *scs_get_lin_sys_method(void) {
4
+ return "sparse-indirect-scs";
8
5
  }
9
6
 
10
- /*
11
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
12
- char *str = (char *)scs_malloc(sizeof(char) * 128);
13
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
14
- (scs_float)p->tot_cg_its / (info->iter + 1));
15
- p->tot_cg_its = 0;
16
- return str;
17
- }
18
- */
19
-
20
7
  /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
21
8
  /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
22
9
  static void set_preconditioner(ScsLinSysWork *p) {
@@ -97,7 +84,7 @@ static void transpose(const ScsMatrix *A, ScsLinSysWork *p) {
97
84
  #endif
98
85
  }
99
86
 
100
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
87
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
101
88
  if (p) {
102
89
  scs_free(p->p);
103
90
  scs_free(p->r);
@@ -162,13 +149,13 @@ static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n,
162
149
  }
163
150
 
164
151
  /* no need to update anything in this case */
165
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
152
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
166
153
  p->diag_r = diag_r; /* this isn't needed but do it to be safe */
167
154
  set_preconditioner(p);
168
155
  }
169
156
 
170
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
171
- const scs_float *diag_r) {
157
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
158
+ const scs_float *diag_r) {
172
159
  ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
173
160
  p->A = A;
174
161
  p->P = P;
@@ -198,7 +185,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
198
185
  p->tot_cg_its = 0;
199
186
  if (!p->p || !p->r || !p->Gp || !p->tmp || !p->At || !p->At->i || !p->At->p ||
200
187
  !p->At->x) {
201
- SCS(free_lin_sys_work)(p);
188
+ scs_free_lin_sys_work(p);
202
189
  return SCS_NULL;
203
190
  }
204
191
  return p;
@@ -288,8 +275,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b,
288
275
  * y = R_y^{-1} (Ax - ry)
289
276
  *
290
277
  */
291
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
292
- scs_float tol) {
278
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
279
+ scs_float tol) {
293
280
  scs_int cg_its, max_iters;
294
281
 
295
282
  if (tol <= 0.) {
@@ -5,11 +5,13 @@
5
5
  extern "C" {
6
6
  #endif
7
7
 
8
+ #include "csparse.h"
8
9
  #include "glbopts.h"
9
10
  #include "linalg.h"
10
- #include "scs.h"
11
+ #include "linsys.h"
11
12
  #include "scs_matrix.h"
12
- #include <math.h>
13
+ #include "util.h" /* timer */
14
+ #include <string.h>
13
15
 
14
16
  struct SCS_LIN_SYS_WORK {
15
17
  scs_int n, m; /* linear system dimensions */
@@ -2,39 +2,37 @@
2
2
 
3
3
  #include "csparse.h"
4
4
 
5
- csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
6
- scs_int triplet) {
7
- csc *A = (csc *)scs_calloc(1, sizeof(csc)); /* allocate the csc struct */
5
+ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
6
+ scs_int triplet) {
7
+ ScsMatrix *A = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
8
8
  if (!A) {
9
9
  return SCS_NULL;
10
10
  } /* out of memory */
11
11
  A->m = m; /* define dimensions and nzmax */
12
12
  A->n = n;
13
- A->nzmax = nzmax = MAX(nzmax, 1);
14
- A->nz = triplet ? 0 : -1; /* allocate triplet or comp.col */
15
13
  A->p = (scs_int *)scs_calloc((triplet ? nzmax : n + 1), sizeof(scs_int));
16
14
  A->i = (scs_int *)scs_calloc(nzmax, sizeof(scs_int));
17
15
  A->x = values ? (scs_float *)scs_calloc(nzmax, sizeof(scs_float)) : SCS_NULL;
18
16
  return (!A->p || !A->i || (values && !A->x)) ? SCS(cs_spfree)(A) : A;
19
17
  }
20
18
 
21
- csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok) {
19
+ ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok) {
22
20
  scs_free(w); /* free workspace */
23
21
  scs_free(x);
24
22
  return ok ? C : SCS(cs_spfree)(C); /* return result if OK, else free it */
25
23
  }
26
24
 
27
25
  /* C = compressed-column form of a triplet matrix T */
28
- csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping) {
29
- scs_int m, n, nz, p, k, *Cp, *Ci, *w, *Ti, *Tj;
26
+ ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
27
+ scs_int *idx_mapping) {
28
+ scs_int m, n, p, k, *Cp, *Ci, *w, *Ti, *Tj;
30
29
  scs_float *Cx, *Tx;
31
- csc *C;
30
+ ScsMatrix *C;
32
31
  m = T->m;
33
32
  n = T->n;
34
33
  Ti = T->i;
35
34
  Tj = T->p;
36
35
  Tx = T->x;
37
- nz = T->nz;
38
36
  C = SCS(cs_spalloc)(m, n, nz, Tx != SCS_NULL, 0); /* allocate result */
39
37
  w = (scs_int *)scs_calloc(n, sizeof(scs_int)); /* get workspace */
40
38
  if (!C || !w) {
@@ -75,7 +73,7 @@ scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n) {
75
73
  return nz2; /* return sum (c [0..n-1]) */
76
74
  }
77
75
 
78
- csc *SCS(cs_spfree)(csc *A) {
76
+ ScsMatrix *SCS(cs_spfree)(ScsMatrix *A) {
79
77
  if (!A) {
80
78
  return SCS_NULL;
81
79
  } /* do nothing if A already SCS_NULL */
@@ -83,5 +81,135 @@ csc *SCS(cs_spfree)(csc *A) {
83
81
  scs_free(A->i);
84
82
  scs_free(A->x);
85
83
  scs_free(A);
86
- return (csc *)SCS_NULL; /* free the csc struct and return SCS_NULL */
84
+ /* free the ScsMatrix struct and return SCS_NULL */
85
+ return (ScsMatrix *)SCS_NULL;
86
+ }
87
+
88
+ /* Build the KKT matrix */
89
+ ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
90
+ scs_float *diag_p, const scs_float *diag_r,
91
+ scs_int *diag_r_idxs, scs_int upper) {
92
+ /*
93
+ * Forms column compressed KKT matrix assumes column compressed A,P matrices.
94
+ * Only upper OR lower triangular part is stuffed, depending on `upper` flag.
95
+ *
96
+ * Forms upper/lower triangular part of [(R_x + P) A'; A -R_y]
97
+ * Shapes: P : n x n, A: m x n.
98
+ *
99
+ * Output: `diag_p` will contain values of P diagonal upon completion,
100
+ * and `diag_r_idxs` will contain the indices corresponding to the entries
101
+ * in the returned matrix corresponding to the entries of R.
102
+ *
103
+ */
104
+ scs_int h, i, j, count;
105
+ ScsMatrix *Kcsc, *K;
106
+ scs_int n = A->n;
107
+ scs_int m = A->m;
108
+ scs_int Anz = A->p[n];
109
+ scs_int Knzmax;
110
+ scs_int *idx_mapping;
111
+ if (P) {
112
+ /* Upper bound P + I triangular component NNZs as Pnz + n */
113
+ Knzmax = n + m + Anz + P->p[n];
114
+ } else {
115
+ Knzmax = n + m + Anz;
116
+ }
117
+ K = SCS(cs_spalloc)(m + n, m + n, Knzmax, 1, 1);
118
+
119
+ #if VERBOSITY > 0
120
+ scs_printf("forming kkt\n");
121
+ #endif
122
+ /* Here we generate a triplet matrix and then compress to CSC */
123
+ if (!K) {
124
+ return SCS_NULL;
125
+ }
126
+
127
+ count = 0; /* element counter */
128
+ if (P) {
129
+ /* R_x + P in top left */
130
+ for (j = 0; j < n; j++) { /* cols */
131
+ diag_p[j] = 0.;
132
+ /* empty column, add diagonal */
133
+ if (P->p[j] == P->p[j + 1]) {
134
+ K->i[count] = j;
135
+ K->p[count] = j;
136
+ K->x[count] = diag_r[j];
137
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
138
+ count++;
139
+ }
140
+ for (h = P->p[j]; h < P->p[j + 1]; h++) {
141
+ i = P->i[h]; /* row */
142
+ if (i > j) { /* only upper triangular needed */
143
+ break;
144
+ }
145
+ if (upper) {
146
+ K->i[count] = i;
147
+ K->p[count] = j;
148
+ } else { /* lower triangular */
149
+ /* P is passed in upper triangular, need to flip that here */
150
+ K->i[count] = j; /* col -> row */
151
+ K->p[count] = i; /* row -> col */
152
+ }
153
+ K->x[count] = P->x[h];
154
+ if (i == j) {
155
+ /* P has diagonal element */
156
+ diag_p[j] = P->x[h];
157
+ K->x[count] += diag_r[j];
158
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
159
+ }
160
+ count++;
161
+ /* reached the end without adding diagonal, do it now */
162
+ if ((i < j) && (h + 1 == P->p[j + 1] || P->i[h + 1] > j)) {
163
+ K->i[count] = j;
164
+ K->p[count] = j;
165
+ K->x[count] = diag_r[j];
166
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
167
+ count++;
168
+ }
169
+ }
170
+ }
171
+ } else {
172
+ /* R_x in top left */
173
+ for (j = 0; j < n; j++) {
174
+ diag_p[j] = 0.;
175
+ K->i[count] = j;
176
+ K->p[count] = j;
177
+ K->x[count] = diag_r[j];
178
+ diag_r_idxs[j] = count; /* store the indices where diag_r occurs */
179
+ count++;
180
+ }
181
+ }
182
+
183
+ /* A in bottom left or A^T top right */
184
+ for (j = 0; j < n; j++) { /* column */
185
+ for (h = A->p[j]; h < A->p[j + 1]; h++) {
186
+ if (upper) {
187
+ K->p[count] = A->i[h] + n; /* column */
188
+ K->i[count] = j; /*row */
189
+ } else { /* lower triangular */
190
+ K->p[count] = j; /* column */
191
+ K->i[count] = A->i[h] + n; /* row */
192
+ }
193
+ K->x[count] = A->x[h];
194
+ count++;
195
+ }
196
+ }
197
+
198
+ /* -R_y at bottom right */
199
+ for (j = 0; j < m; j++) {
200
+ K->i[count] = j + n;
201
+ K->p[count] = j + n;
202
+ K->x[count] = -diag_r[j + n];
203
+ diag_r_idxs[j + n] = count; /* store the indices where diag_r occurs */
204
+ count++;
205
+ }
206
+
207
+ idx_mapping = (scs_int *)scs_calloc(count, sizeof(scs_int));
208
+ Kcsc = SCS(cs_compress)(K, count, idx_mapping);
209
+ for (i = 0; i < m + n; i++) {
210
+ diag_r_idxs[i] = idx_mapping[diag_r_idxs[i]];
211
+ }
212
+ SCS(cs_spfree)(K);
213
+ scs_free(idx_mapping);
214
+ return Kcsc;
87
215
  }
@@ -10,24 +10,17 @@ extern "C" {
10
10
  #include "glbopts.h"
11
11
  #include "scs.h"
12
12
 
13
- /* matrix in compressed-column or triplet form */
14
- typedef struct SPARSE_MATRIX {
15
- scs_int nzmax; /* maximum number of entries */
16
- scs_int m; /* number of rows */
17
- scs_int n; /* number of columns */
18
- scs_int *p; /* column pointers (size n+1) or col indices (size nzmax) */
19
- scs_int *i; /* row indices, size nzmax */
20
- scs_float *x; /* numerical values, size nzmax */
21
- scs_int nz; /* # of entries in triplet matrix, -1 for compressed-col */
22
- } csc;
23
-
24
- csc *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
25
- scs_int triplet);
26
- csc *SCS(cs_done)(csc *C, void *w, void *x, scs_int ok);
27
- csc *SCS(cs_compress)(const csc *T, scs_int *idx_mapping);
13
+ ScsMatrix *SCS(cs_spalloc)(scs_int m, scs_int n, scs_int nzmax, scs_int values,
14
+ scs_int triplet);
15
+ ScsMatrix *SCS(cs_done)(ScsMatrix *C, void *w, void *x, scs_int ok);
16
+ ScsMatrix *SCS(cs_compress)(const ScsMatrix *T, scs_int nz,
17
+ scs_int *idx_mapping);
18
+ ScsMatrix *SCS(cs_spfree)(ScsMatrix *A);
28
19
  scs_float SCS(cumsum)(scs_int *p, scs_int *c, scs_int n);
29
- csc *SCS(cs_spfree)(csc *A);
30
-
20
+ /* Forms KKT matrix */
21
+ ScsMatrix *SCS(form_kkt)(const ScsMatrix *A, const ScsMatrix *P,
22
+ scs_float *diag_p, const scs_float *diag_r,
23
+ scs_int *diag_r_idxs, scs_int upper);
31
24
  #ifdef __cplusplus
32
25
  }
33
26
  #endif
@@ -19,13 +19,13 @@ void SCS(accum_by_atrans_gpu)(const ScsGpuMatrix *Ag,
19
19
  if (*buffer != SCS_NULL) {
20
20
  cudaFree(*buffer);
21
21
  }
22
- cudaMalloc(buffer, *buffer_size);
22
+ cudaMalloc(buffer, new_buffer_size);
23
23
  *buffer_size = new_buffer_size;
24
24
  }
25
25
 
26
26
  CUSPARSE_GEN(SpMV)
27
27
  (cusparse_handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &onef, Ag->descr, x,
28
- &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
28
+ &onef, y, SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
29
29
  }
30
30
 
31
31
  /* this is slow, use trans routine if possible */
@@ -48,13 +48,13 @@ void SCS(accum_by_a_gpu)(const ScsGpuMatrix *Ag, const cusparseDnVecDescr_t x,
48
48
  if (*buffer != SCS_NULL) {
49
49
  cudaFree(*buffer);
50
50
  }
51
- cudaMalloc(buffer, *buffer_size);
51
+ cudaMalloc(buffer, new_buffer_size);
52
52
  *buffer_size = new_buffer_size;
53
53
  }
54
54
 
55
55
  CUSPARSE_GEN(SpMV)
56
56
  (cusparse_handle, CUSPARSE_OPERATION_TRANSPOSE, &onef, Ag->descr, x, &onef, y,
57
- SCS_CUDA_FLOAT, SCS_CSRMV_ALG, buffer);
57
+ SCS_CUDA_FLOAT, SCS_CSRMV_ALG, *buffer);
58
58
  }
59
59
 
60
60
  /* This assumes that P has been made full (ie not triangular) and uses the
@@ -74,7 +74,7 @@ extern "C" {
74
74
  #define SCS_CUSPARSE_INDEX CUSPARSE_INDEX_64I
75
75
  #endif
76
76
 
77
- #define SCS_CSRMV_ALG CUSPARSE_CSRMV_ALG1
77
+ #define SCS_CSRMV_ALG CUSPARSE_SPMV_CSR_ALG1
78
78
  #define SCS_CSR2CSC_ALG CUSPARSE_CSR2CSC_ALG1
79
79
 
80
80
  /*