RubyGems - scs - Versions diffs - 0.4.0 → 0.4.2 - Mend

scs 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +8 -0
data/LICENSE.txt +1 -1
data/README.md +1 -1
data/lib/scs/ffi.rb +2 -2
data/lib/scs/version.rb +1 -1
data/lib/scs.rb +3 -3
data/vendor/scs/CITATION.cff +2 -2
data/vendor/scs/CMakeLists.txt +305 -171
data/vendor/scs/Makefile +44 -19
data/vendor/scs/README.md +1 -1
data/vendor/scs/include/glbopts.h +34 -14
data/vendor/scs/include/linsys.h +8 -8
data/vendor/scs/include/scs.h +6 -2
data/vendor/scs/include/scs_blas.h +4 -0
data/vendor/scs/include/scs_types.h +3 -1
data/vendor/scs/include/scs_work.h +9 -8
data/vendor/scs/include/util.h +1 -1
data/vendor/scs/linsys/cpu/direct/private.c +32 -153
data/vendor/scs/linsys/cpu/direct/private.h +6 -6
data/vendor/scs/linsys/cpu/indirect/private.c +9 -22
data/vendor/scs/linsys/cpu/indirect/private.h +4 -2
data/vendor/scs/linsys/csparse.c +140 -12
data/vendor/scs/linsys/csparse.h +10 -17
data/vendor/scs/linsys/gpu/gpu.c +4 -4
data/vendor/scs/linsys/gpu/gpu.h +1 -1
data/vendor/scs/linsys/gpu/indirect/private.c +15 -26
data/vendor/scs/linsys/mkl/direct/private.c +182 -0
data/vendor/scs/linsys/mkl/direct/private.h +38 -0
data/vendor/scs/linsys/scs_matrix.c +11 -5
data/vendor/scs/scs.mk +40 -27
data/vendor/scs/src/cones.c +17 -161
data/vendor/scs/src/exp_cone.c +399 -0
data/vendor/scs/src/linalg.c +17 -3
data/vendor/scs/src/normalize.c +4 -2
data/vendor/scs/src/rw.c +107 -38
data/vendor/scs/src/scs.c +103 -69
data/vendor/scs/src/util.c +12 -3
data/vendor/scs/test/minunit.h +2 -1
data/vendor/scs/test/problem_utils.h +2 -1
data/vendor/scs/test/problems/hs21_tiny_qp.h +1 -1
data/vendor/scs/test/problems/hs21_tiny_qp_rw.h +8 -3
data/vendor/scs/test/problems/max_ent +0 -0
data/vendor/scs/test/problems/max_ent.h +8 -0
data/vendor/scs/test/problems/mpc_bug.h +19 -0
data/vendor/scs/test/problems/mpc_bug1 +0 -0
data/vendor/scs/test/problems/mpc_bug2 +0 -0
data/vendor/scs/test/problems/mpc_bug3 +0 -0
data/vendor/scs/test/problems/random_prob.h +2 -43
data/vendor/scs/test/problems/rob_gauss_cov_est.h +7 -2
data/vendor/scs/test/problems/test_exp_cone.h +84 -0
data/vendor/scs/test/problems/test_prob_from_data_file.h +73 -0
data/vendor/scs/test/run_from_file.c +7 -1
data/vendor/scs/test/run_tests.c +25 -9
metadata +14 -3

data/vendor/scs/linsys/gpu/indirect/private.c CHANGED Viewed

@@ -21,20 +21,10 @@ static scs_float cg_gpu_norm(cublasHandle_t cublas_handle, scs_float *r,
   return nrm;
 }
-const char *SCS(get_lin_sys_method)() {
+const char *scs_get_lin_sys_method() {
   return "sparse-indirect GPU";
 }
-/*
-char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
-  char *str = (char *)scs_malloc(sizeof(char) * 128);
-  sprintf(str, "lin-sys: avg cg its: %2.2f\n",
-          (scs_float)p->tot_cg_its / (info->iter + 1));
-  p->tot_cg_its = 0;
-  return str;
-}
-*/
 /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
 /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
 static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
@@ -76,7 +66,7 @@ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
 }
 /* no need to update anything in this case */
-void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
+void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
   scs_int i;
   /* R_x to gpu */
@@ -93,7 +83,7 @@ void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
   set_preconditioner(p, diag_r);
 }
-void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
+void scs_free_lin_sys_work(ScsLinSysWork *p) {
   if (p) {
     scs_free(p->M);
     scs_free(p->inv_r_y);
@@ -182,13 +172,13 @@ static void mat_vec(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
 }
 /* P comes in upper triangular, expand to full
- * First compute triplet version of full matrix, then compress to csc
+ * First compute triplet version of full matrix, then compress to CSC
  * */
-static csc *fill_p_matrix(const ScsMatrix *P) {
+static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
   scs_int i, j, k, kk;
   scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
-  csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
-  csc *P_full;
+  ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
+  ScsMatrix *P_full;
   kk = 0;
   for (j = 0; j < P->n; j++) { /* cols */
     for (k = P->p[j]; k < P->p[j + 1]; k++) {
@@ -209,16 +199,15 @@ static csc *fill_p_matrix(const ScsMatrix *P) {
       kk++;
     }
   }
-  P_tmp->nz = kk; /* set number of nonzeros */
-  P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
+  P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
   SCS(cs_spfree)(P_tmp);
   return P_full;
 }
-ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
-                                      const scs_float *diag_r) {
+ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
+                                     const scs_float *diag_r) {
   cudaError_t err;
-  csc *P_full;
+  ScsMatrix *P_full;
   ScsLinSysWork *p = SCS_NULL;
   ScsGpuMatrix *Ag = SCS_NULL;
   ScsGpuMatrix *Pg = SCS_NULL;
@@ -324,7 +313,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
   cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
   /* Form preconditioner and copy R_x, 1/R_y to gpu */
-  SCS(update_lin_sys_diag_r)(p, diag_r);
+  scs_update_lin_sys_diag_r(p, diag_r);
 #if GPU_TRANSPOSE_MAT > 0
   p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
@@ -367,7 +356,7 @@ ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
   if (err != cudaSuccess) {
     printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
            cudaGetErrorString(err));
-    SCS(free_lin_sys_work)(p);
+    scs_free_lin_sys_work(p);
     return SCS_NULL;
   }
   return p;
@@ -466,8 +455,8 @@ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *bg,
  * y = R_y^{-1} (Ax - ry)
  *
  */
-scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
-                           scs_float tol) {
+scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
+                          scs_float tol) {
   scs_int cg_its, max_iters;
   scs_float neg_onef = -1.0;

data/vendor/scs/linsys/mkl/direct/private.c ADDED Viewed

@@ -0,0 +1,182 @@
+#include "private.h"
+#define PARDISO_SYMBOLIC (11)
+#define PARDISO_NUMERIC (22)
+#define PARDISO_SOLVE (33)
+#define PARDISO_CLEANUP (-1)
+/* TODO: is it necessary to use pardiso_64 and MKL_Set_Interface_Layer ? */
+/*
+#define MKL_INTERFACE_LP64 0
+#define MKL_INTERFACE_ILP64 1
+*/
+#ifdef DLONG
+#define _PARDISO pardiso_64
+#else
+#define _PARDISO pardiso
+#endif
+/* Prototypes for Pardiso functions */
+void _PARDISO(void **pt, const scs_int *maxfct, const scs_int *mnum,
+              const scs_int *mtype, const scs_int *phase, const scs_int *n,
+              const scs_float *a, const scs_int *ia, const scs_int *ja,
+              scs_int *perm, const scs_int *nrhs, scs_int *iparm,
+              const scs_int *msglvl, scs_float *b, scs_float *x,
+              scs_int *error);
+/* scs_int MKL_Set_Interface_Layer(scs_int); */
+const char *scs_get_lin_sys_method() {
+  return "sparse-direct-mkl-pardiso";
+}
+void scs_free_lin_sys_work(ScsLinSysWork *p) {
+  if (p) {
+    p->phase = PARDISO_CLEANUP;
+    _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
+             &(p->n_plus_m), SCS_NULL, p->kkt->p, p->kkt->i, SCS_NULL,
+             &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL,
+             &(p->error));
+    if (p->error != 0) {
+      scs_printf("Error during MKL Pardiso cleanup: %d", (int)p->error);
+    }
+    if (p->kkt)
+      SCS(cs_spfree)(p->kkt);
+    if (p->sol)
+      scs_free(p->sol);
+    if (p->diag_r_idxs)
+      scs_free(p->diag_r_idxs);
+    if (p->diag_p)
+      scs_free(p->diag_p);
+    scs_free(p);
+  }
+}
+ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
+                                     const scs_float *diag_r) {
+  scs_int i;
+  ScsLinSysWork *p = scs_calloc(1, sizeof(ScsLinSysWork));
+  /* TODO: is this necessary with pardiso_64? */
+  /* Set MKL interface layer */
+  /*
+#ifdef DLONG
+  MKL_Set_Interface_Layer(MKL_INTERFACE_ILP64);
+#else
+  MKL_Set_Interface_Layer(MKL_INTERFACE_LP64);
+#endif
+  */
+  p->n = A->n;
+  p->m = A->m;
+  p->n_plus_m = p->n + p->m;
+  /* Even though we overwrite rhs with sol pardiso requires the memory */
+  p->sol = (scs_float *)scs_malloc(sizeof(scs_float) * p->n_plus_m);
+  p->diag_r_idxs = (scs_int *)scs_calloc(p->n_plus_m, sizeof(scs_int));
+  p->diag_p = (scs_float *)scs_calloc(p->n, sizeof(scs_float));
+  /* MKL pardiso requires upper triangular CSR matrices. The KKT matrix stuffed
+   * as CSC lower triangular is equivalent. Pass upper=0. */
+  p->kkt = SCS(form_kkt)(A, P, p->diag_p, diag_r, p->diag_r_idxs, 0);
+  if (!(p->kkt)) {
+    scs_printf("Error in forming KKT matrix");
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  for (i = 0; i < 64; i++) {
+    p->iparm[i] = 0; /* Setup Pardiso control parameters */
+    p->pt[i] = 0;    /* Initialize the internal solver memory pointer */
+  }
+  /* Set Pardiso variables */
+  p->mtype = -2;         /* Real symmetric indefinite matrix */
+  p->nrhs = 1;           /* Number of right hand sides */
+  p->maxfct = 1;         /* Maximum number of numerical factorizations */
+  p->mnum = 1;           /* Which factorization to use */
+  p->error = 0;          /* Initialize error flag */
+  p->msglvl = VERBOSITY; /* Printing information */
+  /* For all iparm vars see MKL documentation */
+  p->iparm[0] = 1;          /* Parsido must inspect iparm */
+  p->iparm[1] = 3;          /* Fill-in reordering from OpenMP */
+  p->iparm[5] = 1;          /* Write solution into b */
+  p->iparm[7] = 0;          /* Automatic iterative refinement calculation */
+  p->iparm[9] = 8;          /* Perturb the pivot elements with 1E-8 */
+  p->iparm[34] = 1;         /* Use C-style indexing for indices */
+  /* p->iparm[36] = -80; */ /* Form block sparse matrices */
+#ifdef SFLOAT
+  p->iparm[27] = 1; /* 1 is single precision, 0 is double */
+#endif
+  /* Permutation and symbolic factorization */
+  scs_int phase = PARDISO_SYMBOLIC;
+  _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &phase, &(p->n_plus_m),
+           p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL, &(p->nrhs), p->iparm,
+           &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
+  if (p->error != 0) {
+    scs_printf("Error during symbolic factorization: %d", (int)p->error);
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  /* Numerical factorization */
+  p->phase = PARDISO_NUMERIC;
+  _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
+           &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
+           &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
+  if (p->error) {
+    scs_printf("Error during numerical factorization: %d", (int)p->error);
+    scs_free_lin_sys_work(p);
+    return SCS_NULL;
+  }
+  if (p->iparm[21] < p->n) {
+    scs_printf("KKT matrix has < n positive eigenvalues. P not PSD.");
+    return SCS_NULL;
+  }
+  return p;
+}
+/* Returns solution to linear system Ax = b with solution stored in b */
+scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *ws,
+                          scs_float tol) {
+  /* Back substitution and iterative refinement */
+  p->phase = PARDISO_SOLVE;
+  _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
+           &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
+           &(p->nrhs), p->iparm, &(p->msglvl), b, p->sol, &(p->error));
+  if (p->error != 0) {
+    scs_printf("Error during linear system solution: %d", (int)p->error);
+  }
+  return p->error;
+}
+/* Update factorization when R changes */
+void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
+  scs_int i;
+  for (i = 0; i < p->n; ++i) {
+    /* top left is R_x + P, bottom right is -R_y */
+    p->kkt->x[p->diag_r_idxs[i]] = p->diag_p[i] + diag_r[i];
+  }
+  for (i = p->n; i < p->n + p->m; ++i) {
+    /* top left is R_x + P, bottom right is -R_y */
+    p->kkt->x[p->diag_r_idxs[i]] = -diag_r[i];
+  }
+  /* Perform numerical factorization */
+  p->phase = PARDISO_NUMERIC;
+  _PARDISO(p->pt, &(p->maxfct), &(p->mnum), &(p->mtype), &(p->phase),
+           &(p->n_plus_m), p->kkt->x, p->kkt->p, p->kkt->i, SCS_NULL,
+           &(p->nrhs), p->iparm, &(p->msglvl), SCS_NULL, SCS_NULL, &(p->error));
+  if (p->error != 0) {
+    scs_printf("Error in PARDISO factorization when updating: %d.\n",
+               (int)p->error);
+    scs_free_lin_sys_work(p);
+  }
+}

data/vendor/scs/linsys/mkl/direct/private.h ADDED Viewed

@@ -0,0 +1,38 @@
+#ifndef PRIV_H_GUARD
+#define PRIV_H_GUARD
+#ifdef __cplusplus
+extern "C" {
+#endif
+#include "csparse.h"
+#include "linsys.h"
+struct SCS_LIN_SYS_WORK {
+  ScsMatrix *kkt; /* Upper triangular KKT matrix (in CSR format) */
+  scs_float *sol; /* solution to the KKT system */
+  scs_int n;      /* number of QP variables */
+  scs_int m;      /* number of QP constraints */
+  /* Pardiso variables */
+  void *pt[64];      /* internal solver memory pointer pt */
+  scs_int iparm[64]; /* Pardiso control parameters */
+  scs_int n_plus_m;  /* dimension of the linear system */
+  scs_int mtype;     /* matrix type (-2 for real and symmetric indefinite) */
+  scs_int nrhs;      /* number of right-hand sides (1) */
+  scs_int maxfct;    /* maximum number of factors (1) */
+  scs_int mnum;      /* indicates matrix for the solution phase (1) */
+  scs_int phase;     /* control the execution phases of the solver */
+  scs_int error;     /* the error indicator (0 for no error) */
+  scs_int msglvl;    /* Message level information (0 for no output) */
+  /* These are required for matrix updates */
+  scs_int *diag_r_idxs; /* indices where R appears */
+  scs_float *diag_p;    /* Diagonal values of P */
+};
+#ifdef __cplusplus
+}
+#endif
+#endif

data/vendor/scs/linsys/scs_matrix.c CHANGED Viewed

@@ -117,6 +117,7 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
                               scs_float *Et, ScsConeWork *cone) {
   scs_int i, j, kk;
   scs_float wrk;
+  scs_float nm_a_col;
   /****************************  D  ****************************/
@@ -138,7 +139,8 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
   /* invert temporary vec to form D */
   for (i = 0; i < A->m; ++i) {
-    Dt[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Dt[i])));
+    Dt[i] = SQRTF(apply_limit(Dt[i]));
+    Dt[i] = SAFEDIV_POS(1.0, Dt[i]);
   }
   /****************************  E  ****************************/
@@ -169,8 +171,10 @@ static void compute_ruiz_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
   /* calculate col norms, E */
   for (i = 0; i < A->n; ++i) {
-    Et[i] = MAX(Et[i], SCS(norm_inf)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]));
-    Et[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Et[i])));
+    nm_a_col = SCS(norm_inf)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]);
+    Et[i] = MAX(Et[i], nm_a_col);
+    Et[i] = SQRTF(apply_limit(Et[i]));
+    Et[i] = SAFEDIV_POS(1.0, Et[i]);
   }
 }
@@ -201,7 +205,8 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
   SCS(enforce_cone_boundaries)(cone, Dt, &SCS(mean));
   for (i = 0; i < A->m; ++i) {
-    Dt[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(Dt[i])));
+    Dt[i] = SQRTF(apply_limit(Dt[i]));
+    Dt[i] = SAFEDIV_POS(1.0, Dt[i]);
   }
   /****************************  E  ****************************/
@@ -233,7 +238,8 @@ static void compute_l2_mats(ScsMatrix *P, ScsMatrix *A, scs_float *Dt,
   /* calculate col norms, E */
   for (i = 0; i < A->n; ++i) {
     Et[i] += SCS(norm_sq)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]);
-    Et[i] = SAFEDIV_POS(1.0, SQRTF(apply_limit(SQRTF(Et[i]))));
+    Et[i] = SQRTF(apply_limit(SQRTF(Et[i])));
+    Et[i] = SAFEDIV_POS(1.0, Et[i]);
   }
 }

data/vendor/scs/scs.mk CHANGED Viewed

@@ -69,6 +69,7 @@ DIRSRC = $(LINSYS)/cpu/direct
 INDIRSRC = $(LINSYS)/cpu/indirect
 GPUDIR = $(LINSYS)/gpu/direct
 GPUINDIR = $(LINSYS)/gpu/indirect
+MKLSRC = $(LINSYS)/mkl/direct
 EXTSRC = $(LINSYS)/external
@@ -83,44 +84,56 @@ ifeq ($(PREFIX),)
   PREFIX = /usr/local
 endif
-OPT_FLAGS =
-########### OPTIONAL FLAGS ##########
+########### CUSTOM FLAGS ##########
 # these can all be override from the command line
+CUSTOM_FLAGS =
 # e.g. make DLONG=1 will override the setting below
 DLONG = 0
 ifneq ($(DLONG), 0)
-OPT_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
+CUSTOM_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
 endif
 CTRLC = 1
 ifneq ($(CTRLC), 0)
-OPT_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
+CUSTOM_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
 endif
 SFLOAT = 0
 ifneq ($(SFLOAT), 0)
-OPT_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
-endif
-NOTIMER = 0
-ifneq ($(NOTIMER), 0)
-OPT_FLAGS += -DNOTIMER=$(NOTIMER) # no timing, times reported as nan
+CUSTOM_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
 endif
 GPU_TRANSPOSE_MAT = 1
 ifneq ($(GPU_TRANSPOSE_MAT), 0)
-OPT_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # tranpose A mat in GPU memory
+CUSTOM_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # transpose A mat in GPU memory
+endif
+NO_TIMER = 0
+ifneq ($(NO_TIMER), 0)
+CUSTOM_FLAGS += -DNO_TIMER=$(NO_TIMER) # no timing, times reported as nan
+endif
+NO_VALIDATE = 0
+ifneq ($(NO_VALIDATE), 0)
+CUSTOM_FLAGS += -DNO_VALIDATE=$(NO_VALIDATE) # perform problem validation or skip
+endif
+NO_PRINTING = 0
+ifneq ($(NO_PRINTING), 0)
+CUSTOM_FLAGS += -DNO_PRINTING=$(NO_PRINTING) # disable printing
 endif
-NOVALIDATE = 0
-ifneq ($(NOVALIDATE), 0)
-OPT_FLAGS += -DNOVALIDATE=$(NOVALIDATE) # perform problem validation or skip
+NO_READ_WRITE = 0
+ifneq ($(NO_READ_WRITE), 0)
+CUSTOM_FLAGS += -DNO_READ_WRITE=$(NO_READ_WRITE) # disable printing
 endif
 ### VERBOSITY LEVELS: 0,1,2,...
 VERBOSITY = 0
 ifneq ($(VERBOSITY), 0)
-OPT_FLAGS += -DVERBOSITY=$(VERBOSITY) # verbosity level
+CUSTOM_FLAGS += -DVERBOSITY=$(VERBOSITY) # verbosity level
 endif
 COVERAGE = 0
 ifneq ($(COVERAGE), 0)
-override CFLAGS += --coverage # generate test coverage data
+CUSTOM_FLAGS += --coverage # generate test coverage data
 endif
+# See: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-link-line-advisor.html
+# This is probably not correct for other systems. TODO: update this
+# to work for all combinations of platform / compiler / threading options.
+MKLFLAGS = -L$(MKLROOT) -L$(MKLROOT)/lib -Wl,--no-as-needed -lmkl_rt -lmkl_gnu_thread -lmkl_core -lgomp -lpthread -ldl
 ############ OPENMP: ############
 # set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies):
@@ -130,7 +143,7 @@ endif
 USE_OPENMP = 0
 ifneq ($(USE_OPENMP), 0)
   override CFLAGS += -fopenmp
-  LDFLAGS += -lgomp
+  LDFLAGS += -fopenmp
 endif
 ############ SDPS: BLAS + LAPACK ############
@@ -138,44 +151,44 @@ endif
 # NB: point the libraries to the locations where
 # you have blas and lapack installed
+BLASLDFLAGS =
 USE_LAPACK = 1
 ifneq ($(USE_LAPACK), 0)
   # edit these for your setup:
-  BLASLDFLAGS = -llapack -lblas # -lgfortran
-  LDFLAGS += $(BLASLDFLAGS)
-  OPT_FLAGS += -DUSE_LAPACK
+  BLASLDFLAGS += -llapack -lblas # -lgfortran
+  CUSTOM_FLAGS += -DUSE_LAPACK
   BLAS64 = 0
   ifneq ($(BLAS64), 0)
-  OPT_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
+  CUSTOM_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
   endif
   NOBLASSUFFIX = 0
   ifneq ($(NOBLASSUFFIX), 0)
-  OPT_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
+  CUSTOM_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
   endif
   BLASSUFFIX = "_"
   ifneq ($(BLASSUFFIX), "_")
-  OPT_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
+  CUSTOM_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
   endif
 endif
 MATLAB_MEX_FILE = 0
 ifneq ($(MATLAB_MEX_FILE), 0)
-OPT_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
+CUSTOM_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
 endif
 PYTHON = 0
 ifneq ($(PYTHON), 0)
-OPT_FLAGS += -DPYTHON=$(PYTHON) # python extension
+CUSTOM_FLAGS += -DPYTHON=$(PYTHON) # python extension
 endif
 USING_R = 0
 ifneq ($(USING_R), 0)
-OPT_FLAGS += -DUSING_R=$(USING_R) # R extension
+CUSTOM_FLAGS += -DUSING_R=$(USING_R) # R extension
 endif
 # debug to see var values, e.g. 'make print-OBJECTS' shows OBJECTS value
 print-%: ; @echo $*=$($*)
-override CFLAGS += $(OPT_FLAGS)
-CUDAFLAGS += $(OPT_FLAGS)
+override CFLAGS += $(CUSTOM_FLAGS)
+CUDAFLAGS += $(CUSTOM_FLAGS)