scs 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +4 -0
  3. data/lib/scs/ffi.rb +2 -0
  4. data/lib/scs/version.rb +1 -1
  5. data/vendor/scs/CITATION.cff +2 -2
  6. data/vendor/scs/CMakeLists.txt +136 -6
  7. data/vendor/scs/Makefile +53 -3
  8. data/vendor/scs/README.md +1 -1
  9. data/vendor/scs/include/cones.h +47 -2
  10. data/vendor/scs/include/glbopts.h +1 -1
  11. data/vendor/scs/include/scs.h +29 -0
  12. data/vendor/scs/include/scs_blas.h +4 -0
  13. data/vendor/scs/include/scs_types.h +3 -1
  14. data/vendor/scs/include/util_spectral_cones.h +45 -0
  15. data/vendor/scs/linsys/cpu/direct/private.c +3 -3
  16. data/vendor/scs/linsys/cpu/direct/private.h +2 -1
  17. data/vendor/scs/linsys/csparse.c +1 -1
  18. data/vendor/scs/linsys/cudss/direct/private.c +279 -0
  19. data/vendor/scs/linsys/cudss/direct/private.h +63 -0
  20. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +1 -1
  21. data/vendor/scs/linsys/gpu/indirect/private.c +14 -21
  22. data/vendor/scs/scs.mk +17 -2
  23. data/vendor/scs/src/aa.c +8 -12
  24. data/vendor/scs/src/cones.c +783 -12
  25. data/vendor/scs/src/rw.c +15 -1
  26. data/vendor/scs/src/scs.c +4 -0
  27. data/vendor/scs/src/spectral_cones/logdeterminant/log_cone_IPM.c +660 -0
  28. data/vendor/scs/src/spectral_cones/logdeterminant/log_cone_Newton.c +279 -0
  29. data/vendor/scs/src/spectral_cones/logdeterminant/log_cone_wrapper.c +205 -0
  30. data/vendor/scs/src/spectral_cones/logdeterminant/logdet_cone.c +143 -0
  31. data/vendor/scs/src/spectral_cones/nuclear/ell1_cone.c +221 -0
  32. data/vendor/scs/src/spectral_cones/nuclear/nuclear_cone.c +99 -0
  33. data/vendor/scs/src/spectral_cones/sum-largest/sum_largest_cone.c +196 -0
  34. data/vendor/scs/src/spectral_cones/sum-largest/sum_largest_eval_cone.c +140 -0
  35. data/vendor/scs/src/spectral_cones/util_spectral_cones.c +52 -0
  36. data/vendor/scs/test/problems/complex_PSD.h +83 -0
  37. data/vendor/scs/test/rng.h +4 -4
  38. data/vendor/scs/test/run_tests.c +25 -0
  39. data/vendor/scs/test/spectral_cones_problems/exp_design.h +141 -0
  40. data/vendor/scs/test/spectral_cones_problems/graph_partitioning.h +275 -0
  41. data/vendor/scs/test/spectral_cones_problems/robust_pca.h +253 -0
  42. data/vendor/scs/test/spectral_cones_problems/several_logdet_cones.h +222 -0
  43. data/vendor/scs/test/spectral_cones_problems/several_nuc_cone.h +285 -0
  44. data/vendor/scs/test/spectral_cones_problems/several_sum_largest.h +420 -0
  45. metadata +21 -2
@@ -0,0 +1,279 @@
1
+ #include "private.h"
2
+ #include "linsys.h"
3
+
4
+ /* In case of error abort freeing p */
5
+ #define CUDSS_CHECK_ABORT(call, p, fname) \
6
+ do { \
7
+ cudssStatus_t status = call; \
8
+ if (status != CUDSS_STATUS_SUCCESS) { \
9
+ scs_printf("CUDSS call " #fname " returned status = %d\n", status); \
10
+ scs_free_lin_sys_work(p); \
11
+ return SCS_NULL; \
12
+ } \
13
+ } while (0);
14
+
15
+ /* In case of error abort freeing p */
16
+ #define CUDA_CHECK_ABORT(call, p, fname) \
17
+ do { \
18
+ cudaError_t status = call; \
19
+ if (status != cudaSuccess) { \
20
+ printf("CUDA call " #fname " returned status = %d\n", status); \
21
+ scs_free_lin_sys_work(p); \
22
+ return SCS_NULL; \
23
+ } \
24
+ } while (0);
25
+
26
+ /* Return the linear system method name */
27
+ const char *scs_get_lin_sys_method() {
28
+ return "sparse-direct-cuDSS";
29
+ }
30
+
31
+ /* Free allocated resources for the linear system solver */
32
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
33
+ if (p) {
34
+ /* Free GPU resources */
35
+ if (p->d_kkt_val)
36
+ cudaFree(p->d_kkt_val);
37
+ if (p->d_kkt_row_ptr)
38
+ cudaFree(p->d_kkt_row_ptr);
39
+ if (p->d_kkt_col_ind)
40
+ cudaFree(p->d_kkt_col_ind);
41
+ if (p->d_b)
42
+ cudaFree(p->d_b);
43
+ if (p->d_sol)
44
+ cudaFree(p->d_sol);
45
+
46
+ /* Free cuDSS resources */
47
+ if (p->d_kkt_mat)
48
+ cudssMatrixDestroy(p->d_kkt_mat);
49
+ if (p->d_b_mat)
50
+ cudssMatrixDestroy(p->d_b_mat);
51
+ if (p->d_sol_mat)
52
+ cudssMatrixDestroy(p->d_sol_mat);
53
+
54
+ if (p->solver_config)
55
+ cudssConfigDestroy(p->solver_config);
56
+ if (p->solver_data && p->handle)
57
+ cudssDataDestroy(p->handle, p->solver_data);
58
+ if (p->handle)
59
+ cudssDestroy(p->handle);
60
+
61
+ /* Free CPU resources */
62
+ if (p->kkt)
63
+ SCS(cs_spfree)(p->kkt);
64
+ if (p->sol)
65
+ scs_free(p->sol);
66
+ if (p->diag_r_idxs)
67
+ scs_free(p->diag_r_idxs);
68
+ if (p->diag_p)
69
+ scs_free(p->diag_p);
70
+
71
+ scs_free(p);
72
+ }
73
+ }
74
+
75
+ /* Initialize the linear system solver workspace */
76
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
77
+ const scs_float *diag_r) {
78
+ ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
79
+ if (!p)
80
+ return SCS_NULL;
81
+
82
+ /* Store problem dimensions */
83
+ p->n = A->n;
84
+ p->m = A->m;
85
+ p->n_plus_m = p->n + p->m;
86
+
87
+ /* Allocate CPU memory */
88
+ p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
89
+ if (!p->sol) {
90
+ scs_free_lin_sys_work(p);
91
+ return SCS_NULL;
92
+ }
93
+
94
+ p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
95
+ if (!p->diag_r_idxs) {
96
+ scs_free_lin_sys_work(p);
97
+ return SCS_NULL;
98
+ }
99
+
100
+ p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
101
+ if (!p->diag_p) {
102
+ scs_free_lin_sys_work(p);
103
+ return SCS_NULL;
104
+ }
105
+
106
+ /* Form KKT matrix as upper-triangular, CSC */
107
+ /* Because of symmetry it is equivalent to lower-triangular, CSR */
108
+ p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 1);
109
+ if (!p->kkt) {
110
+ scs_printf("Error in forming KKT matrix");
111
+ scs_free_lin_sys_work(p);
112
+ return SCS_NULL;
113
+ }
114
+
115
+ cudssStatus_t status;
116
+ cudaError_t cuda_error;
117
+
118
+ /* Create cuDSS handle */
119
+ CUDSS_CHECK_ABORT(cudssCreate(&p->handle), p, "cudssCreate");
120
+ /* Creating cuDSS solver configuration and data objects */
121
+
122
+ CUDSS_CHECK_ABORT(cudssConfigCreate(&p->solver_config), p,
123
+ "cudssConfigCreate");
124
+ CUDSS_CHECK_ABORT(cudssDataCreate(p->handle, &p->solver_data), p,
125
+ "cudssDataCreate");
126
+
127
+ /* Allocate device memory for KKT matrix */
128
+ scs_int nnz = p->kkt->p[p->n_plus_m];
129
+
130
+ CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_val, nnz * sizeof(scs_float)),
131
+ p, "cudaMalloc: kkt_val");
132
+ CUDA_CHECK_ABORT(cudaMalloc((void **)&p->d_kkt_row_ptr,
133
+ (p->n_plus_m + 1) * sizeof(scs_int)),
134
+ p, "cudaMalloc: kkt_row_ptr");
135
+ CUDA_CHECK_ABORT(
136
+ cudaMalloc((void **)&p->d_kkt_col_ind, nnz * sizeof(scs_int)), p,
137
+ "cudaMalloc: kkt_col_ind");
138
+
139
+ /* Copy KKT matrix to device */
140
+ /* Note: we treat column pointers (p->kkt->p) as row pointers on the device */
141
+ CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_val, p->kkt->x, nnz * sizeof(scs_float),
142
+ cudaMemcpyHostToDevice),
143
+ p, "cudaMemcpy: kkt_val");
144
+ CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_row_ptr, p->kkt->p,
145
+ (p->kkt->n + 1) * sizeof(scs_int),
146
+ cudaMemcpyHostToDevice),
147
+ p, "cudaMemcpy: kkt_row_ptr");
148
+ CUDA_CHECK_ABORT(cudaMemcpy(p->d_kkt_col_ind, p->kkt->i,
149
+ nnz * sizeof(scs_int), cudaMemcpyHostToDevice),
150
+ p, "cudaMemcpy: kkt_col_ind");
151
+
152
+ /* Create kkt matrix descriptor */
153
+ /* We pass the kkt matrix as symmetric, lower triangular */
154
+ cudssMatrixType_t mtype = CUDSS_MTYPE_SYMMETRIC;
155
+ cudssMatrixViewType_t mview = CUDSS_MVIEW_LOWER;
156
+ cudssIndexBase_t base = CUDSS_BASE_ZERO;
157
+ CUDSS_CHECK_ABORT(cudssMatrixCreateCsr(
158
+ &p->d_kkt_mat, p->kkt->m, p->kkt->n, nnz,
159
+ p->d_kkt_row_ptr, NULL, p->d_kkt_col_ind, p->d_kkt_val,
160
+ SCS_CUDA_INDEX, SCS_CUDA_FLOAT, mtype, mview, base),
161
+ p, "cudssMatrixCreateCsr");
162
+
163
+ /* Allocate device memory for vectors */
164
+ CUDA_CHECK_ABORT(
165
+ cudaMalloc((void **)&p->d_b, p->n_plus_m * sizeof(scs_float)), p,
166
+ "cudaMalloc: b");
167
+ CUDA_CHECK_ABORT(
168
+ cudaMalloc((void **)&p->d_sol, p->n_plus_m * sizeof(scs_float)), p,
169
+ "cudaMalloc: sol");
170
+
171
+ /* Create RHS and solution matrix descriptors */
172
+ scs_int nrhs = 1;
173
+ CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_b_mat, p->n_plus_m, nrhs,
174
+ p->n_plus_m, p->d_b, SCS_CUDA_FLOAT,
175
+ CUDSS_LAYOUT_COL_MAJOR),
176
+ p, "cudssMatrixCreateDn: b");
177
+ CUDSS_CHECK_ABORT(cudssMatrixCreateDn(&p->d_sol_mat, p->n_plus_m, nrhs,
178
+ p->n_plus_m, p->d_sol, SCS_CUDA_FLOAT,
179
+ CUDSS_LAYOUT_COL_MAJOR),
180
+ p, "cudssMatrixCreateDn: sol");
181
+
182
+ /* Symbolic factorization */
183
+ CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_ANALYSIS,
184
+ p->solver_config, p->solver_data, p->d_kkt_mat,
185
+ p->d_sol_mat, p->d_b_mat),
186
+ p, "cudssExecute: analysis");
187
+
188
+ /* Numerical Factorization */
189
+ CUDSS_CHECK_ABORT(cudssExecute(p->handle, CUDSS_PHASE_FACTORIZATION,
190
+ p->solver_config, p->solver_data, p->d_kkt_mat,
191
+ p->d_sol_mat, p->d_b_mat),
192
+ p, "cudssExecute: factorization");
193
+
194
+ return p;
195
+ }
196
+
197
+ /* Solve the linear system for a given RHS b */
198
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
199
+ scs_float tol) {
200
+ /* Copy right-hand side to device */
201
+ cudaError_t custatus = cudaMemcpy(p->d_b, b, p->n_plus_m * sizeof(scs_float),
202
+ cudaMemcpyHostToDevice);
203
+ if (custatus != cudaSuccess) {
204
+ scs_printf("scs_solve_lin_sys: Error copying `b` side to device: %d\n",
205
+ (int)custatus);
206
+ return custatus;
207
+ }
208
+
209
+ // is this really needed?
210
+ cudssMatrixSetValues(p->d_b_mat, p->d_b);
211
+
212
+ /* Solve the system */
213
+ cudssStatus_t status =
214
+ cudssExecute(p->handle, CUDSS_PHASE_SOLVE, p->solver_config,
215
+ p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
216
+
217
+ if (status != CUDSS_STATUS_SUCCESS) {
218
+ scs_printf("scs_solve_lin_sys: Error during solve: %d\n", (int)status);
219
+ return status;
220
+ }
221
+
222
+ /* Copy solution back to host */
223
+ custatus = cudaMemcpy(b, p->d_sol, p->n_plus_m * sizeof(scs_float),
224
+ cudaMemcpyDeviceToHost);
225
+ if (status != cudaSuccess) {
226
+ scs_printf("scs_solve_lin_sys: Error copying d_sol to host: %d\n",
227
+ (int)status);
228
+ return status;
229
+ }
230
+
231
+ return 0; /* Success */
232
+ }
233
+
234
+ /* Update the KKT matrix when R changes */
235
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
236
+ scs_int i;
237
+
238
+ /* Update KKT matrix on CPU */
239
+ for (i = 0; i < p->n; ++i) {
240
+ /* top left is R_x + P */
241
+ p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
242
+ }
243
+ for (i = p->n; i < p->n + p->m; ++i) {
244
+ /* bottom right is -R_y */
245
+ p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
246
+ }
247
+
248
+ /* Copy updated values to device */
249
+ cudaError_t custatus = cudaMemcpy(p->d_kkt_val, p->kkt->x,
250
+ p->kkt->p[p->n_plus_m] * sizeof(scs_float),
251
+ cudaMemcpyHostToDevice);
252
+ if (custatus != cudaSuccess) {
253
+ scs_printf(
254
+ "scs_update_lin_sys_diag_r: Error copying kkt->x to device: %d\n",
255
+ (int)custatus);
256
+ return;
257
+ }
258
+
259
+ /* Update the matrix values in cuDSS */
260
+ cudssStatus_t status;
261
+ status = cudssMatrixSetCsrPointers(p->d_kkt_mat, p->d_kkt_row_ptr, NULL,
262
+ p->d_kkt_col_ind, p->d_kkt_val);
263
+ if (status != CUDSS_STATUS_SUCCESS) {
264
+ scs_printf(
265
+ "scs_update_lin_sys_diag_r: Error updating kkt matrix on device: %d\n",
266
+ (int)status);
267
+ return;
268
+ }
269
+
270
+ /* Perform Refactorization with the updated matrix */
271
+ status =
272
+ cudssExecute(p->handle, CUDSS_PHASE_REFACTORIZATION, p->solver_config,
273
+ p->solver_data, p->d_kkt_mat, p->d_sol_mat, p->d_b_mat);
274
+ if (status != CUDSS_STATUS_SUCCESS) {
275
+ scs_printf("scs_update_lin_sys_diag_r: Error during re-factorization: %d\n",
276
+ (int)status);
277
+ return;
278
+ }
279
+ }
@@ -0,0 +1,63 @@
1
+ #ifndef PRIV_H_GUARD
2
+ #define PRIV_H_GUARD
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #ifndef SFLOAT
9
+ #define SCS_CUDA_FLOAT CUDA_R_64F
10
+ #else
11
+ #define SCS_CUDA_FLOAT CUDA_R_32F
12
+ #endif
13
+
14
+ #ifndef DLONG
15
+ #define SCS_CUDA_INDEX CUDA_R_32I
16
+ #else
17
+ #define SCS_CUDA_INDEX CUDA_R_64I
18
+ #endif
19
+
20
+ #include "csparse.h"
21
+ #include "linsys.h"
22
+ #include <cuda_runtime.h>
23
+ #include <cudss.h>
24
+
25
+ struct SCS_LIN_SYS_WORK {
26
+ /* General problem dimensions */
27
+ scs_int n; /* number of QP variables */
28
+ scs_int m; /* number of QP constraints */
29
+ scs_int n_plus_m; /* dimension of the linear system */
30
+
31
+ /* CPU matrices and vectors */
32
+ ScsMatrix *kkt; /* KKT matrix in CSR format */
33
+ scs_float *sol; /* solution to the KKT system */
34
+
35
+ /* cuDSS handle and descriptors */
36
+ cudssHandle_t handle; /* cuDSS library handle */
37
+ cudssMatrix_t d_kkt_mat; /* cuDSS matrix descriptors */
38
+ cudssMatrix_t d_b_mat;
39
+ cudssMatrix_t d_sol_mat;
40
+
41
+ /* Device memory for KKT matrix */
42
+ scs_float *d_kkt_val; /* device copy of KKT values */
43
+ scs_int *d_kkt_row_ptr; /* device copy of KKT row pointers */
44
+ scs_int *d_kkt_col_ind; /* device copy of KKT column indices */
45
+
46
+ /* Device memory for vectors */
47
+ scs_float *d_b; /* device copy of right-hand side */
48
+ scs_float *d_sol; /* device copy of solution */
49
+
50
+ /* These are required for matrix updates */
51
+ scs_int *diag_r_idxs; /* indices where R appears in the KKT matrix */
52
+ scs_float *diag_p; /* Diagonal values of P */
53
+
54
+ /* cuDSS configuration */
55
+ cudssConfig_t solver_config; /* cuDSS solver handle */
56
+ cudssData_t solver_data; /* cuDSS data handle */
57
+ };
58
+
59
+ #ifdef __cplusplus
60
+ }
61
+ #endif
62
+
63
+ #endif
@@ -12,7 +12,7 @@ extern "C" {
12
12
 
13
13
  #define QDLDL_int scs_int
14
14
  #define QDLDL_float scs_float
15
- #define QDLDL_bool scs_int
15
+ #define QDLDL_bool unsigned char
16
16
 
17
17
  /* Maximum value of the signed type QDLDL_int */
18
18
  #ifdef DLONG
@@ -147,28 +147,24 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
147
147
 
148
148
  if (p->Pg) {
149
149
  /* y = R_x * x + P x */
150
- SCS(accum_by_p_gpu)
151
- (p->Pg, p->dn_vec_n, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size,
152
- &p->buffer);
150
+ SCS(accum_by_p_gpu)(p->Pg, p->dn_vec_n, p->dn_vec_n_p, p->cusparse_handle,
151
+ &p->buffer_size, &p->buffer);
153
152
  }
154
153
 
155
154
  /* z = Ax */
156
155
  #if GPU_TRANSPOSE_MAT > 0
157
- SCS(accum_by_atrans_gpu)
158
- (p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
159
- &p->buffer);
156
+ SCS(accum_by_atrans_gpu)(p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
157
+ &p->buffer_size, &p->buffer);
160
158
  #else
161
- SCS(accum_by_a_gpu)
162
- (p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
163
- &p->buffer);
159
+ SCS(accum_by_a_gpu)(p->Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
160
+ &p->buffer_size, &p->buffer);
164
161
  #endif
165
162
  /* z = R_y^{-1} A x */
166
163
  scale_by_diag(p->cublas_handle, p->inv_r_y_gpu, z, p->m);
167
164
 
168
165
  /* y += A'z => y = R_x * x + P x + A' R_y^{-1} Ax */
169
- SCS(accum_by_atrans_gpu)
170
- (p->Ag, p->dn_vec_m, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size,
171
- &p->buffer);
166
+ SCS(accum_by_atrans_gpu)(p->Ag, p->dn_vec_m, p->dn_vec_n_p,
167
+ p->cusparse_handle, &p->buffer_size, &p->buffer);
172
168
  }
173
169
 
174
170
  /* P comes in upper triangular, expand to full
@@ -488,9 +484,8 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
488
484
  cusparseDnVecSetValues(p->dn_vec_m, (void *)tmp_m); /* R * ry */
489
485
  cusparseDnVecSetValues(p->dn_vec_n, (void *)bg); /* rx */
490
486
  /* bg[:n] = rx + A' R ry */
491
- SCS(accum_by_atrans_gpu)
492
- (Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle, &p->buffer_size,
493
- &p->buffer);
487
+ SCS(accum_by_atrans_gpu)(Ag, p->dn_vec_m, p->dn_vec_n, p->cusparse_handle,
488
+ &p->buffer_size, &p->buffer);
494
489
 
495
490
  /* set max_iters to 10 * n (though in theory n is enough for any tol) */
496
491
  max_iters = 10 * Ag->n;
@@ -506,13 +501,11 @@ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
506
501
 
507
502
  /* b[n:] = Ax - ry */
508
503
  #if GPU_TRANSPOSE_MAT > 0
509
- SCS(accum_by_atrans_gpu)
510
- (p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
511
- &p->buffer);
504
+ SCS(accum_by_atrans_gpu)(p->Agt, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
505
+ &p->buffer_size, &p->buffer);
512
506
  #else
513
- SCS(accum_by_a_gpu)
514
- (Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle, &p->buffer_size,
515
- &p->buffer);
507
+ SCS(accum_by_a_gpu)(Ag, p->dn_vec_n, p->dn_vec_m, p->cusparse_handle,
508
+ &p->buffer_size, &p->buffer);
516
509
  #endif
517
510
 
518
511
  /* bg[n:] = R_y^{-1} bg[n:] = R_y^{-1} (Ax - ry) = y */
data/vendor/scs/scs.mk CHANGED
@@ -8,7 +8,6 @@ endif
8
8
  # For cross-compiling with mingw use these.
9
9
  #CC = i686-w64-mingw32-gcc -m32
10
10
  #CC = x86_64-w64-mingw32-gcc-4.8
11
- CUCC = $(CC) #Don't need to use nvcc, since using cuda blas APIs
12
11
 
13
12
  # For GPU must add cuda libs to path, e.g.
14
13
  # export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
@@ -53,13 +52,19 @@ endif
53
52
  #TODO: check if this works for all platforms:
54
53
  ifeq ($(CUDA_PATH), )
55
54
  CUDA_PATH=/usr/local/cuda
55
+ CUCC = $(CUDA_PATH)/bin/nvcc
56
56
  endif
57
+
57
58
  CULDFLAGS = -L$(CUDA_PATH)/lib -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcusparse
58
59
  CUDAFLAGS = $(CFLAGS) -I$(CUDA_PATH)/include -Ilinsys/gpu -Wno-c++11-long-long # turn off annoying long-long warnings in cuda header files
59
60
 
61
+ CUDSS_FLAGS = -I$(CUDSS_PATH)/include -I$(CUDA_PATH)/include
62
+ CUDSS_LDFLAGS = $(CULDFLAGS) -L$(CUDSS_PATH)/lib -lcudss
63
+
60
64
  # Add on default CFLAGS
61
65
  OPT = -O3
62
- override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes -I. -Iinclude -Ilinsys $(OPT)
66
+ INCLUDE = -I. -Iinclude -Ilinsys
67
+ override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes $(INCLUDE) $(OPT) -Werror=incompatible-pointer-types
63
68
  ifneq ($(ISWINDOWS), 1)
64
69
  override CFLAGS += -fPIC
65
70
  endif
@@ -70,6 +75,7 @@ INDIRSRC = $(LINSYS)/cpu/indirect
70
75
  GPUDIR = $(LINSYS)/gpu/direct
71
76
  GPUINDIR = $(LINSYS)/gpu/indirect
72
77
  MKLSRC = $(LINSYS)/mkl/direct
78
+ CUDSSSRC = $(LINSYS)/cudss/direct
73
79
 
74
80
  EXTSRC = $(LINSYS)/external
75
81
 
@@ -174,6 +180,15 @@ ifneq ($(USE_LAPACK), 0)
174
180
  endif
175
181
  endif
176
182
 
183
+ ############ SPECTRAL CONES ############
184
+ USE_SPECTRAL_CONES = 0
185
+ ifneq ($(USE_SPECTRAL_CONES), 0)
186
+ ifeq ($(USE_LAPACK), 0)
187
+ $(error USE_SPECTRAL_CONES requires USE_LAPACK to be enabled)
188
+ endif
189
+ CUSTOM_FLAGS += -DUSE_SPECTRAL_CONES
190
+ endif
191
+
177
192
  MATLAB_MEX_FILE = 0
178
193
  ifneq ($(MATLAB_MEX_FILE), 0)
179
194
  CUSTOM_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
data/vendor/scs/src/aa.c CHANGED
@@ -189,9 +189,8 @@ static void set_m(AaWork *a, aa_int len) {
189
189
  blas_int blen = (blas_int)len;
190
190
  aa_float onef = 1.0, zerof = 0.0, r;
191
191
  /* if len < mem this only uses len cols */
192
- BLAS(gemm)
193
- ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim,
194
- a->Y, &bdim, &zerof, a->M, &blen);
192
+ BLAS(gemm)("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y,
193
+ &bdim, a->Y, &bdim, &zerof, a->M, &blen);
195
194
  if (a->regularization > 0) {
196
195
  r = compute_regularization(a, len);
197
196
  for (i = 0; i < len; ++i) {
@@ -287,9 +286,8 @@ static void relax(aa_float *f, AaWork *a, aa_int len) {
287
286
  aa_float onef = 1.0, neg_onef = -1.0;
288
287
  aa_float one_m_relaxation = 1. - a->relaxation;
289
288
  /* x_work = x - S * work */
290
- BLAS(gemv)
291
- ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef,
292
- a->x_work, &one);
289
+ BLAS(gemv)("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one,
290
+ &onef, a->x_work, &one);
293
291
  /* f = relaxation * f */
294
292
  BLAS(scal)(&bdim, &a->relaxation, f, &one);
295
293
  /* f += (1 - relaxation) * x_work */
@@ -306,9 +304,8 @@ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
306
304
  aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm;
307
305
 
308
306
  /* work = S'g or Y'g */
309
- BLAS(gemv)
310
- ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one,
311
- &zerof, a->work, &one);
307
+ BLAS(gemv)("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g,
308
+ &one, &zerof, a->work, &one);
312
309
 
313
310
  /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */
314
311
  BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info);
@@ -335,9 +332,8 @@ static aa_float solve(aa_float *f, AaWork *a, aa_int len) {
335
332
  /* if solve was successful compute new point */
336
333
 
337
334
  /* first set f -= D * work */
338
- BLAS(gemv)
339
- ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f,
340
- &one);
335
+ BLAS(gemv)("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one,
336
+ &onef, f, &one);
341
337
 
342
338
  /* if relaxation is not 1 then need to incorporate */
343
339
  if (a->relaxation != 1.0) {