scs 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +1 -1
  5. data/lib/scs/ffi.rb +2 -2
  6. data/lib/scs/version.rb +1 -1
  7. data/lib/scs.rb +3 -3
  8. data/vendor/scs/CITATION.cff +2 -2
  9. data/vendor/scs/CMakeLists.txt +305 -171
  10. data/vendor/scs/Makefile +44 -19
  11. data/vendor/scs/README.md +1 -1
  12. data/vendor/scs/include/glbopts.h +34 -14
  13. data/vendor/scs/include/linsys.h +8 -8
  14. data/vendor/scs/include/scs.h +6 -2
  15. data/vendor/scs/include/scs_blas.h +4 -0
  16. data/vendor/scs/include/scs_types.h +3 -1
  17. data/vendor/scs/include/scs_work.h +9 -8
  18. data/vendor/scs/include/util.h +1 -1
  19. data/vendor/scs/linsys/cpu/direct/private.c +32 -153
  20. data/vendor/scs/linsys/cpu/direct/private.h +6 -6
  21. data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
  22. data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
  23. data/vendor/scs/linsys/csparse.c +140 -12
  24. data/vendor/scs/linsys/csparse.h +10 -17
  25. data/vendor/scs/linsys/gpu/gpu.c +4 -4
  26. data/vendor/scs/linsys/gpu/gpu.h +1 -1
  27. data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
  28. data/vendor/scs/linsys/mkl/direct/private.c +182 -0
  29. data/vendor/scs/linsys/mkl/direct/private.h +38 -0
  30. data/vendor/scs/linsys/scs_matrix.c +11 -5
  31. data/vendor/scs/scs.mk +40 -27
  32. data/vendor/scs/src/cones.c +17 -161
  33. data/vendor/scs/src/exp_cone.c +399 -0
  34. data/vendor/scs/src/linalg.c +17 -3
  35. data/vendor/scs/src/normalize.c +4 -2
  36. data/vendor/scs/src/rw.c +107 -38
  37. data/vendor/scs/src/scs.c +103 -69
  38. data/vendor/scs/src/util.c +12 -3
  39. data/vendor/scs/test/minunit.h +2 -1
  40. data/vendor/scs/test/problem_utils.h +2 -1
  41. data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
  42. data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
  43. data/vendor/scs/test/problems/max_ent +0 -0
  44. data/vendor/scs/test/problems/max_ent.h +8 -0
  45. data/vendor/scs/test/problems/mpc_bug.h +19 -0
  46. data/vendor/scs/test/problems/mpc_bug1 +0 -0
  47. data/vendor/scs/test/problems/mpc_bug2 +0 -0
  48. data/vendor/scs/test/problems/mpc_bug3 +0 -0
  49. data/vendor/scs/test/problems/random_prob.h +2 -43
  50. data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
  51. data/vendor/scs/test/problems/test_exp_cone.h +84 -0
  52. data/vendor/scs/test/problems/test_prob_from_data_file.h +73 -0
  53. data/vendor/scs/test/run_from_file.c +7 -1
  54. data/vendor/scs/test/run_tests.c +25 -9
  55. metadata +14 -3
@@ -21,20 +21,10 @@ static scs_float cg_gpu_norm(cublasHandle_t cublas_handle, scs_float *r,
21
21
  return nrm;
22
22
  }
23
23
 
24
- const char *SCS(get_lin_sys_method)() {
24
+ const char *scs_get_lin_sys_method() {
25
25
  return "sparse-indirect GPU";
26
26
  }
27
27
 
28
- /*
29
- char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
30
- char *str = (char *)scs_malloc(sizeof(char) * 128);
31
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
32
- (scs_float)p->tot_cg_its / (info->iter + 1));
33
- p->tot_cg_its = 0;
34
- return str;
35
- }
36
- */
37
-
38
28
  /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
39
29
  /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
40
30
  static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
@@ -76,7 +66,7 @@ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
76
66
  }
77
67
 
78
68
  /* no need to update anything in this case */
79
- void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
69
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
80
70
  scs_int i;
81
71
 
82
72
  /* R_x to gpu */
@@ -93,7 +83,7 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
93
83
  set_preconditioner(p, diag_r);
94
84
  }
95
85
 
96
- void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
86
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
97
87
  if (p) {
98
88
  scs_free(p->M);
99
89
  scs_free(p->inv_r_y);
@@ -182,13 +172,13 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
182
172
  }
183
173
 
184
174
  /* P comes in upper triangular, expand to full
185
- * First compute triplet version of full matrix, then compress to csc
175
+ * First compute triplet version of full matrix, then compress to CSC
186
176
  * */
187
- static csc *fill_p_matrix(const ScsMatrix *P) {
177
+ static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
188
178
  scs_int i, j, k, kk;
189
179
  scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
190
- csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
191
- csc *P_full;
180
+ ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
181
+ ScsMatrix *P_full;
192
182
  kk = 0;
193
183
  for (j = 0; j < P->n; j++) { /* cols */
194
184
  for (k = P->p[j]; k < P->p[j + 1]; k++) {
@@ -209,16 +199,15 @@ static csc *fill_p_matrix(const ScsMatrix *P) {
209
199
  kk++;
210
200
  }
211
201
  }
212
- P_tmp->nz = kk; /* set number of nonzeros */
213
- P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
202
+ P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
214
203
  SCS(cs_spfree)(P_tmp);
215
204
  return P_full;
216
205
  }
217
206
 
218
- ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
219
- const scs_float *diag_r) {
207
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
208
+ const scs_float *diag_r) {
220
209
  cudaError_t err;
221
- csc *P_full;
210
+ ScsMatrix *P_full;
222
211
  ScsLinSysWork *p = SCS_NULL;
223
212
  ScsGpuMatrix *Ag = SCS_NULL;
224
213
  ScsGpuMatrix *Pg = SCS_NULL;
@@ -324,7 +313,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
324
313
  cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
325
314
 
326
315
  /* Form preconditioner and copy R_x, 1/R_y to gpu */
327
- SCS(update_lin_sys_diag_r)(p, diag_r);
316
+ scs_update_lin_sys_diag_r(p, diag_r);
328
317
 
329
318
  #if GPU_TRANSPOSE_MAT > 0
330
319
  p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
@@ -367,7 +356,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
367
356
  if (err != cudaSuccess) {
368
357
  printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
369
358
  cudaGetErrorString(err));
370
- SCS(free_lin_sys_work)(p);
359
+ scs_free_lin_sys_work(p);
371
360
  return SCS_NULL;
372
361
  }
373
362
  return p;
@@ -466,8 +455,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *bg,
466
455
  * y = R_y^{-1} (Ax - ry)
467
456
  *
468
457
  */
469
- scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
470
- scs_float tol) {
458
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
459
+ scs_float tol) {
471
460
  scs_int cg_its, max_iters;
472
461
  scs_float neg_onef = -1.0;
473
462
 
@@ -0,0 +1,182 @@
1
+ #include "private.h"
2
+
3
+ #define PARDISO_SYMBOLIC (11)
4
+ #define PARDISO_NUMERIC (22)
5
+ #define PARDISO_SOLVE (33)
6
+ #define PARDISO_CLEANUP (-1)
7
+
8
+ /* TODO: is it necessary to use pardiso_64 and MKL_Set_Interface_Layer ? */
9
+ /*
10
+ #define MKL_INTERFACE_LP64 0
11
+ #define MKL_INTERFACE_ILP64 1
12
+ */
13
+ #ifdef DLONG
14
+ #define _PARDISO pardiso_64
15
+ #else
16
+ #define _PARDISO pardiso
17
+ #endif
18
+
19
+ /* Prototypes for Pardiso functions */
20
+ void _PARDISO(void **pt, const scs_int *maxfct, const scs_int *mnum,
21
+ const scs_int *mtype, const scs_int *phase, const scs_int *n,
22
+ const scs_float *a, const scs_int *ia, const scs_int *ja,
23
+ scs_int *perm, const scs_int *nrhs, scs_int *iparm,
24
+ const scs_int *msglvl, scs_float *b, scs_float *x,
25
+ scs_int *error);
26
+ /* scs_int MKL_Set_Interface_Layer(scs_int); */
27
+
28
+ const char *scs_get_lin_sys_method() {
29
+ return "sparse-direct-mkl-pardiso";
30
+ }
31
+
32
+ void scs_free_lin_sys_work(ScsLinSysWork *p) {
33
+ if (p) {
34
+ p->phase = PARDISO_CLEANUP;
35
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
36
+ &(p->n_plus_m), SCS_NULL, p->kkt->p, p->kkt->i, SCS_NULL,
37
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL,
38
+ &(p->error));
39
+ if (p->error != 0) {
40
+ scs_printf("Error during MKL Pardiso cleanup: %d", (int)p->error);
41
+ }
42
+ if (p->kkt)
43
+ SCS(cs_spfree)(p->kkt);
44
+ if (p->sol)
45
+ scs_free(p->sol);
46
+ if (p->diag_r_idxs)
47
+ scs_free(p->diag_r_idxs);
48
+ if (p->diag_p)
49
+ scs_free(p->diag_p);
50
+ scs_free(p);
51
+ }
52
+ }
53
+
54
+ ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
55
+ const scs_float *diag_r) {
56
+ scs_int i;
57
+ ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
58
+
59
+ /* TODO: is this necessary with pardiso_64? */
60
+ /* Set MKL interface layer */
61
+ /*
62
+ #ifdef DLONG
63
+ MKL_Set_Interface_Layer(MKL_INTERFACE_ILP64);
64
+ #else
65
+ MKL_Set_Interface_Layer(MKL_INTERFACE_LP64);
66
+ #endif
67
+ */
68
+ p->n = A->n;
69
+ p->m = A->m;
70
+ p->n_plus_m = p->n + p->m;
71
+
72
+ /* Even though we overwrite rhs with sol pardiso requires the memory */
73
+ p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
74
+ p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
75
+ p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
76
+
77
+ /* MKL pardiso requires upper triangular CSR matrices. The KKT matrix stuffed
78
+ * as CSC lower triangular is equivalent. Pass upper=0. */
79
+ p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 0);
80
+ if (!(p->kkt)) {
81
+ scs_printf("Error in forming KKT matrix");
82
+ scs_free_lin_sys_work(p);
83
+ return SCS_NULL;
84
+ }
85
+
86
+ for (i = 0; i < 64; i++) {
87
+ p->iparm[i] = 0; /* Setup Pardiso control parameters */
88
+ p->pt[i] = 0; /* Initialize the internal solver memory pointer */
89
+ }
90
+
91
+ /* Set Pardiso variables */
92
+ p->mtype = -2; /* Real symmetric indefinite matrix */
93
+ p->nrhs = 1; /* Number of right hand sides */
94
+ p->maxfct = 1; /* Maximum number of numerical factorizations */
95
+ p->mnum = 1; /* Which factorization to use */
96
+ p->error = 0; /* Initialize error flag */
97
+ p->msglvl = VERBOSITY; /* Printing information */
98
+
99
+ /* For all iparm vars see MKL documentation */
100
+ p->iparm[0] = 1; /* Parsido must inspect iparm */
101
+ p->iparm[1] = 3; /* Fill-in reordering from OpenMP */
102
+ p->iparm[5] = 1; /* Write solution into b */
103
+ p->iparm[7] = 0; /* Automatic iterative refinement calculation */
104
+ p->iparm[9] = 8; /* Perturb the pivot elements with 1E-8 */
105
+ p->iparm[34] = 1; /* Use C-style indexing for indices */
106
+ /* p->iparm[36] = -80; */ /* Form block sparse matrices */
107
+
108
+ #ifdef SFLOAT
109
+ p->iparm[27] = 1; /* 1 is single precision, 0 is double */
110
+ #endif
111
+
112
+ /* Permutation and symbolic factorization */
113
+ scs_int phase = PARDISO_SYMBOLIC;
114
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &phase, &(p->n_plus_m),
115
+ p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL, &(p->nrhs), p->iparm,
116
+ &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
117
+
118
+ if (p->error != 0) {
119
+ scs_printf("Error during symbolic factorization: %d", (int)p->error);
120
+ scs_free_lin_sys_work(p);
121
+ return SCS_NULL;
122
+ }
123
+
124
+ /* Numerical factorization */
125
+ p->phase = PARDISO_NUMERIC;
126
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
127
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
128
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
129
+
130
+ if (p->error) {
131
+ scs_printf("Error during numerical factorization: %d", (int)p->error);
132
+ scs_free_lin_sys_work(p);
133
+ return SCS_NULL;
134
+ }
135
+
136
+ if (p->iparm[21] < p->n) {
137
+ scs_printf("KKT matrix has < n positive eigenvalues. P not PSD.");
138
+ return SCS_NULL;
139
+ }
140
+
141
+ return p;
142
+ }
143
+
144
+ /* Returns solution to linear system Ax = b with solution stored in b */
145
+ scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
146
+ scs_float tol) {
147
+ /* Back substitution and iterative refinement */
148
+ p->phase = PARDISO_SOLVE;
149
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
150
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
151
+ &(p->nrhs), p->iparm, &(p->msglvl), b, p->sol, &(p->error));
152
+ if (p->error != 0) {
153
+ scs_printf("Error during linear system solution: %d", (int)p->error);
154
+ }
155
+ return p->error;
156
+ }
157
+
158
+ /* Update factorization when R changes */
159
+ void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
160
+ scs_int i;
161
+
162
+ for (i = 0; i < p->n; ++i) {
163
+ /* top left is R_x + P, bottom right is -R_y */
164
+ p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
165
+ }
166
+ for (i = p->n; i < p->n + p->m; ++i) {
167
+ /* top left is R_x + P, bottom right is -R_y */
168
+ p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
169
+ }
170
+
171
+ /* Perform numerical factorization */
172
+ p->phase = PARDISO_NUMERIC;
173
+ _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
174
+ &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
175
+ &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
176
+
177
+ if (p->error != 0) {
178
+ scs_printf("Error in PARDISO factorization when updating: %d.\n",
179
+ (int)p->error);
180
+ scs_free_lin_sys_work(p);
181
+ }
182
+ }
@@ -0,0 +1,38 @@
1
+ #ifndef PRIV_H_GUARD
2
+ #define PRIV_H_GUARD
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "csparse.h"
9
+ #include "linsys.h"
10
+
11
+ struct SCS_LIN_SYS_WORK {
12
+ ScsMatrix *kkt; /* Upper triangular KKT matrix (in CSR format) */
13
+ scs_float *sol; /* solution to the KKT system */
14
+ scs_int n; /* number of QP variables */
15
+ scs_int m; /* number of QP constraints */
16
+
17
+ /* Pardiso variables */
18
+ void *pt[64]; /* internal solver memory pointer pt */
19
+ scs_int iparm[64]; /* Pardiso control parameters */
20
+ scs_int n_plus_m; /* dimension of the linear system */
21
+ scs_int mtype; /* matrix type (-2 for real and symmetric indefinite) */
22
+ scs_int nrhs; /* number of right-hand sides (1) */
23
+ scs_int maxfct; /* maximum number of factors (1) */
24
+ scs_int mnum; /* indicates matrix for the solution phase (1) */
25
+ scs_int phase; /* control the execution phases of the solver */
26
+ scs_int error; /* the error indicator (0 for no error) */
27
+ scs_int msglvl; /* Message level information (0 for no output) */
28
+
29
+ /* These are required for matrix updates */
30
+ scs_int *diag_r_idxs; /* indices where R appears */
31
+ scs_float *diag_p; /* Diagonal values of P */
32
+ };
33
+
34
+ #ifdef __cplusplus
35
+ }
36
+ #endif
37
+
38
+ #endif
@@ -117,6 +117,7 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
117
117
  scs_float *Et, ScsConeWork *cone) {
118
118
  scs_int i, j, kk;
119
119
  scs_float wrk;
120
+ scs_float nm_a_col;
120
121
 
121
122
  /**************************** D ****************************/
122
123
 
@@ -138,7 +139,8 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
138
139
 
139
140
  /* invert temporary vec to form D */
140
141
  for (i = 0; i < A->m; ++i) {
141
- Dt[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Dt[i])));
142
+ Dt[i] = SQRTF(apply_limit(Dt[i]));
143
+ Dt[i] = SAFEDIV_POS(1.0, Dt[i]);
142
144
  }
143
145
 
144
146
  /**************************** E ****************************/
@@ -169,8 +171,10 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
169
171
 
170
172
  /* calculate col norms, E */
171
173
  for (i = 0; i < A->n; ++i) {
172
- Et[i] = MAX(Et[i], SCS(norm_inf)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]));
173
- Et[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Et[i])));
174
+ nm_a_col = SCS(norm_inf)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]);
175
+ Et[i] = MAX(Et[i], nm_a_col);
176
+ Et[i] = SQRTF(apply_limit(Et[i]));
177
+ Et[i] = SAFEDIV_POS(1.0, Et[i]);
174
178
  }
175
179
  }
176
180
 
@@ -201,7 +205,8 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
201
205
  SCS(enforce_cone_boundaries)(cone, Dt, &SCS(mean));
202
206
 
203
207
  for (i = 0; i < A->m; ++i) {
204
- Dt[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Dt[i])));
208
+ Dt[i] = SQRTF(apply_limit(Dt[i]));
209
+ Dt[i] = SAFEDIV_POS(1.0, Dt[i]);
205
210
  }
206
211
 
207
212
  /**************************** E ****************************/
@@ -233,7 +238,8 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
233
238
  /* calculate col norms, E */
234
239
  for (i = 0; i < A->n; ++i) {
235
240
  Et[i] += SCS(norm_sq)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]);
236
- Et[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(SQRTF(Et[i]))));
241
+ Et[i] = SQRTF(apply_limit(SQRTF(Et[i])));
242
+ Et[i] = SAFEDIV_POS(1.0, Et[i]);
237
243
  }
238
244
  }
239
245
 
data/vendor/scs/scs.mk CHANGED
@@ -69,6 +69,7 @@ DIRSRC = $(LINSYS)/cpu/direct
69
69
  INDIRSRC = $(LINSYS)/cpu/indirect
70
70
  GPUDIR = $(LINSYS)/gpu/direct
71
71
  GPUINDIR = $(LINSYS)/gpu/indirect
72
+ MKLSRC = $(LINSYS)/mkl/direct
72
73
 
73
74
  EXTSRC = $(LINSYS)/external
74
75
 
@@ -83,44 +84,56 @@ ifeq ($(PREFIX),)
83
84
  PREFIX = /usr/local
84
85
  endif
85
86
 
86
- OPT_FLAGS =
87
- ########### OPTIONAL FLAGS ##########
87
+ ########### CUSTOM FLAGS ##########
88
88
  # these can all be override from the command line
89
+ CUSTOM_FLAGS =
89
90
  # e.g. make DLONG=1 will override the setting below
90
91
  DLONG = 0
91
92
  ifneq ($(DLONG), 0)
92
- OPT_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
93
+ CUSTOM_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
93
94
  endif
94
95
  CTRLC = 1
95
96
  ifneq ($(CTRLC), 0)
96
- OPT_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
97
+ CUSTOM_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
97
98
  endif
98
99
  SFLOAT = 0
99
100
  ifneq ($(SFLOAT), 0)
100
- OPT_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
101
- endif
102
- NOTIMER = 0
103
- ifneq ($(NOTIMER), 0)
104
- OPT_FLAGS += -DNOTIMER=$(NOTIMER) # no timing, times reported as nan
101
+ CUSTOM_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
105
102
  endif
106
103
  GPU_TRANSPOSE_MAT = 1
107
104
  ifneq ($(GPU_TRANSPOSE_MAT), 0)
108
- OPT_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # tranpose A mat in GPU memory
105
+ CUSTOM_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # transpose A mat in GPU memory
106
+ endif
107
+ NO_TIMER = 0
108
+ ifneq ($(NO_TIMER), 0)
109
+ CUSTOM_FLAGS += -DNO_TIMER=$(NO_TIMER) # no timing, times reported as nan
110
+ endif
111
+ NO_VALIDATE = 0
112
+ ifneq ($(NO_VALIDATE), 0)
113
+ CUSTOM_FLAGS += -DNO_VALIDATE=$(NO_VALIDATE) # perform problem validation or skip
114
+ endif
115
+ NO_PRINTING = 0
116
+ ifneq ($(NO_PRINTING), 0)
117
+ CUSTOM_FLAGS += -DNO_PRINTING=$(NO_PRINTING) # disable printing
109
118
  endif
110
- NOVALIDATE = 0
111
- ifneq ($(NOVALIDATE), 0)
112
- OPT_FLAGS += -DNOVALIDATE=$(NOVALIDATE) # perform problem validation or skip
119
+ NO_READ_WRITE = 0
120
+ ifneq ($(NO_READ_WRITE), 0)
121
+ CUSTOM_FLAGS += -DNO_READ_WRITE=$(NO_READ_WRITE) # disable printing
113
122
  endif
114
123
  ### VERBOSITY LEVELS: 0,1,2,...
115
124
  VERBOSITY = 0
116
125
  ifneq ($(VERBOSITY), 0)
117
- OPT_FLAGS += -DVERBOSITY=$(VERBOSITY) # verbosity level
126
+ CUSTOM_FLAGS += -DVERBOSITY=$(VERBOSITY) # verbosity level
118
127
  endif
119
128
  COVERAGE = 0
120
129
  ifneq ($(COVERAGE), 0)
121
- override CFLAGS += --coverage # generate test coverage data
130
+ CUSTOM_FLAGS += --coverage # generate test coverage data
122
131
  endif
123
132
 
133
+ # See: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-link-line-advisor.html
134
+ # This is probably not correct for other systems. TODO: update this
135
+ # to work for all combinations of platform / compiler / threading options.
136
+ MKLFLAGS = -L$(MKLROOT) -L$(MKLROOT)/lib -Wl,--no-as-needed -lmkl_rt -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -ldl
124
137
 
125
138
  ############ OPENMP: ############
126
139
  # set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies):
@@ -130,7 +143,7 @@ endif
130
143
  USE_OPENMP = 0
131
144
  ifneq ($(USE_OPENMP), 0)
132
145
  override CFLAGS += -fopenmp
133
- LDFLAGS += -lgomp
146
+ LDFLAGS += -fopenmp
134
147
  endif
135
148
 
136
149
  ############ SDPS: BLAS + LAPACK ############
@@ -138,44 +151,44 @@ endif
138
151
  # NB: point the libraries to the locations where
139
152
  # you have blas and lapack installed
140
153
 
154
+ BLASLDFLAGS =
141
155
  USE_LAPACK = 1
142
156
  ifneq ($(USE_LAPACK), 0)
143
157
  # edit these for your setup:
144
- BLASLDFLAGS = -llapack -lblas # -lgfortran
145
- LDFLAGS += $(BLASLDFLAGS)
146
- OPT_FLAGS += -DUSE_LAPACK
158
+ BLASLDFLAGS += -llapack -lblas # -lgfortran
159
+ CUSTOM_FLAGS += -DUSE_LAPACK
147
160
 
148
161
  BLAS64 = 0
149
162
  ifneq ($(BLAS64), 0)
150
- OPT_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
163
+ CUSTOM_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
151
164
  endif
152
165
 
153
166
  NOBLASSUFFIX = 0
154
167
  ifneq ($(NOBLASSUFFIX), 0)
155
- OPT_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
168
+ CUSTOM_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
156
169
  endif
157
170
 
158
171
  BLASSUFFIX = "_"
159
172
  ifneq ($(BLASSUFFIX), "_")
160
- OPT_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
173
+ CUSTOM_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
161
174
  endif
162
175
  endif
163
176
 
164
177
  MATLAB_MEX_FILE = 0
165
178
  ifneq ($(MATLAB_MEX_FILE), 0)
166
- OPT_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
179
+ CUSTOM_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
167
180
  endif
168
181
  PYTHON = 0
169
182
  ifneq ($(PYTHON), 0)
170
- OPT_FLAGS += -DPYTHON=$(PYTHON) # python extension
183
+ CUSTOM_FLAGS += -DPYTHON=$(PYTHON) # python extension
171
184
  endif
172
185
  USING_R = 0
173
186
  ifneq ($(USING_R), 0)
174
- OPT_FLAGS += -DUSING_R=$(USING_R) # R extension
187
+ CUSTOM_FLAGS += -DUSING_R=$(USING_R) # R extension
175
188
  endif
176
189
 
177
190
  # debug to see var values, e.g. 'make print-OBJECTS' shows OBJECTS value
178
191
  print-%: ; @echo $*=$($*)
179
192
 
180
- override CFLAGS += $(OPT_FLAGS)
181
- CUDAFLAGS += $(OPT_FLAGS)
193
+ override CFLAGS += $(CUSTOM_FLAGS)
194
+ CUDAFLAGS += $(CUSTOM_FLAGS)