scs 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +12 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +98 -0
  5. data/ext/scs/extconf.rb +29 -0
  6. data/lib/scs.rb +17 -0
  7. data/lib/scs/ffi.rb +117 -0
  8. data/lib/scs/solver.rb +173 -0
  9. data/lib/scs/version.rb +3 -0
  10. data/vendor/scs/LICENSE.txt +21 -0
  11. data/vendor/scs/Makefile +164 -0
  12. data/vendor/scs/README.md +222 -0
  13. data/vendor/scs/include/aa.h +56 -0
  14. data/vendor/scs/include/cones.h +46 -0
  15. data/vendor/scs/include/ctrlc.h +33 -0
  16. data/vendor/scs/include/glbopts.h +177 -0
  17. data/vendor/scs/include/linalg.h +26 -0
  18. data/vendor/scs/include/linsys.h +64 -0
  19. data/vendor/scs/include/normalize.h +18 -0
  20. data/vendor/scs/include/rw.h +17 -0
  21. data/vendor/scs/include/scs.h +161 -0
  22. data/vendor/scs/include/scs_blas.h +51 -0
  23. data/vendor/scs/include/util.h +65 -0
  24. data/vendor/scs/linsys/amatrix.c +305 -0
  25. data/vendor/scs/linsys/amatrix.h +36 -0
  26. data/vendor/scs/linsys/amatrix.o +0 -0
  27. data/vendor/scs/linsys/cpu/direct/private.c +366 -0
  28. data/vendor/scs/linsys/cpu/direct/private.h +26 -0
  29. data/vendor/scs/linsys/cpu/direct/private.o +0 -0
  30. data/vendor/scs/linsys/cpu/indirect/private.c +256 -0
  31. data/vendor/scs/linsys/cpu/indirect/private.h +31 -0
  32. data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
  33. data/vendor/scs/linsys/external/amd/LICENSE.txt +934 -0
  34. data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +469 -0
  35. data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +254 -0
  36. data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
  37. data/vendor/scs/linsys/external/amd/amd.h +400 -0
  38. data/vendor/scs/linsys/external/amd/amd_1.c +180 -0
  39. data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
  40. data/vendor/scs/linsys/external/amd/amd_2.c +1842 -0
  41. data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
  42. data/vendor/scs/linsys/external/amd/amd_aat.c +184 -0
  43. data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
  44. data/vendor/scs/linsys/external/amd/amd_control.c +64 -0
  45. data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
  46. data/vendor/scs/linsys/external/amd/amd_defaults.c +37 -0
  47. data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
  48. data/vendor/scs/linsys/external/amd/amd_dump.c +179 -0
  49. data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
  50. data/vendor/scs/linsys/external/amd/amd_global.c +16 -0
  51. data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
  52. data/vendor/scs/linsys/external/amd/amd_info.c +119 -0
  53. data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
  54. data/vendor/scs/linsys/external/amd/amd_internal.h +304 -0
  55. data/vendor/scs/linsys/external/amd/amd_order.c +199 -0
  56. data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
  57. data/vendor/scs/linsys/external/amd/amd_post_tree.c +120 -0
  58. data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
  59. data/vendor/scs/linsys/external/amd/amd_postorder.c +206 -0
  60. data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
  61. data/vendor/scs/linsys/external/amd/amd_preprocess.c +118 -0
  62. data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
  63. data/vendor/scs/linsys/external/amd/amd_valid.c +92 -0
  64. data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
  65. data/vendor/scs/linsys/external/amd/changes +11 -0
  66. data/vendor/scs/linsys/external/qdldl/LICENSE +201 -0
  67. data/vendor/scs/linsys/external/qdldl/README.md +120 -0
  68. data/vendor/scs/linsys/external/qdldl/changes +4 -0
  69. data/vendor/scs/linsys/external/qdldl/qdldl.c +298 -0
  70. data/vendor/scs/linsys/external/qdldl/qdldl.h +177 -0
  71. data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
  72. data/vendor/scs/linsys/external/qdldl/qdldl_types.h +21 -0
  73. data/vendor/scs/linsys/gpu/gpu.c +41 -0
  74. data/vendor/scs/linsys/gpu/gpu.h +85 -0
  75. data/vendor/scs/linsys/gpu/indirect/private.c +304 -0
  76. data/vendor/scs/linsys/gpu/indirect/private.h +36 -0
  77. data/vendor/scs/scs.mk +181 -0
  78. data/vendor/scs/src/aa.c +224 -0
  79. data/vendor/scs/src/aa.o +0 -0
  80. data/vendor/scs/src/cones.c +802 -0
  81. data/vendor/scs/src/cones.o +0 -0
  82. data/vendor/scs/src/ctrlc.c +77 -0
  83. data/vendor/scs/src/ctrlc.o +0 -0
  84. data/vendor/scs/src/linalg.c +84 -0
  85. data/vendor/scs/src/linalg.o +0 -0
  86. data/vendor/scs/src/normalize.c +93 -0
  87. data/vendor/scs/src/normalize.o +0 -0
  88. data/vendor/scs/src/rw.c +167 -0
  89. data/vendor/scs/src/rw.o +0 -0
  90. data/vendor/scs/src/scs.c +978 -0
  91. data/vendor/scs/src/scs.o +0 -0
  92. data/vendor/scs/src/scs_version.c +5 -0
  93. data/vendor/scs/src/scs_version.o +0 -0
  94. data/vendor/scs/src/util.c +196 -0
  95. data/vendor/scs/src/util.o +0 -0
  96. data/vendor/scs/test/data/small_random_socp +0 -0
  97. data/vendor/scs/test/minunit.h +13 -0
  98. data/vendor/scs/test/problem_utils.h +93 -0
  99. data/vendor/scs/test/problems/rob_gauss_cov_est.h +85 -0
  100. data/vendor/scs/test/problems/small_lp.h +50 -0
  101. data/vendor/scs/test/problems/small_random_socp.h +33 -0
  102. data/vendor/scs/test/random_socp_prob.c +171 -0
  103. data/vendor/scs/test/run_from_file.c +69 -0
  104. data/vendor/scs/test/run_tests +2 -0
  105. data/vendor/scs/test/run_tests.c +32 -0
  106. metadata +203 -0
@@ -0,0 +1,36 @@
1
+ #ifndef PRIV_H_GUARD
2
+ #define PRIV_H_GUARD
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "gpu.h"
9
+ #include "glbopts.h"
10
+ #include "linalg.h"
11
+ #include "scs.h"
12
+
13
+
14
+ struct SCS_LIN_SYS_WORK {
15
+ /* reporting */
16
+ scs_int tot_cg_its;
17
+ scs_float total_solve_time;
18
+ /* ALL BELOW HOSTED ON THE GPU */
19
+ scs_float *p; /* cg iterate, n */
20
+ scs_float *r; /* cg residual, n */
21
+ scs_float *Gp; /* G * p, n */
22
+ scs_float *bg; /* b, n */
23
+ scs_float *tmp_m; /* m, used in mat_vec */
24
+ scs_float *z; /* preconditioned */
25
+ scs_float *M; /* preconditioner */
26
+ ScsGpuMatrix *Ag; /* A matrix on GPU */
27
+ ScsGpuMatrix *Agt; /* A trans matrix on GPU */
28
+ /* CUDA */
29
+ cublasHandle_t cublas_handle;
30
+ cusparseHandle_t cusparse_handle;
31
+ };
32
+
33
+ #ifdef __cplusplus
34
+ }
35
+ #endif
36
+ #endif
@@ -0,0 +1,181 @@
1
+ ifeq ($(OS),Windows_NT)
2
+ UNAME = CYGWINorMINGWorMSYS
3
+ else
4
+ UNAME = $(shell uname -s)
5
+ endif
6
+
7
+ #CC = gcc
8
+ # For cross-compiling with mingw use these.
9
+ #CC = i686-w64-mingw32-gcc -m32
10
+ #CC = x86_64-w64-mingw32-gcc-4.8
11
+ CUCC = $(CC) #Don't need to use nvcc, since using cuda blas APIs
12
+
13
+ # For GPU must add cuda libs to path, e.g.
14
+ # export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
15
+
16
+ ifneq (, $(findstring CYGWIN, $(UNAME)))
17
+ ISWINDOWS := 1
18
+ else
19
+ ifneq (, $(findstring MINGW, $(UNAME)))
20
+ ISWINDOWS := 1
21
+ else
22
+ ifneq (, $(findstring MSYS, $(UNAME)))
23
+ ISWINDOWS := 1
24
+ else
25
+ ifneq (, $(findstring mingw, $(CC)))
26
+ ISWINDOWS := 1
27
+ else
28
+ ISWINDOWS := 0
29
+ endif
30
+ endif
31
+ endif
32
+ endif
33
+
34
+ ifeq ($(UNAME), Darwin)
35
+ # we're on apple, no need to link rt library
36
+ LDFLAGS += -lm
37
+ SHARED = dylib
38
+ SONAME = -install_name
39
+ else
40
+ ifeq ($(ISWINDOWS), 1)
41
+ # we're on windows (cygwin or msys)
42
+ LDFLAGS += -lm
43
+ SHARED = dll
44
+ SONAME = -soname
45
+ else
46
+ # we're on a linux system, use accurate timer provided by clock_gettime()
47
+ LDFLAGS += -lm -lrt
48
+ SHARED = so
49
+ SONAME = -soname
50
+ endif
51
+ endif
52
+
53
+ #TODO: check if this works for all platforms:
54
+ ifeq ($(CUDA_PATH), )
55
+ CUDA_PATH=/usr/local/cuda
56
+ endif
57
+ CULDFLAGS = -L$(CUDA_PATH)/lib -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcusparse
58
+ CUDAFLAGS = $(CFLAGS) -I$(CUDA_PATH)/include -Ilinsys/gpu -Wno-c++11-long-long # turn off annoying long-long warnings in cuda header files
59
+
60
+ # Add on default CFLAGS
61
+ OPT = -O3
62
+ override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes -I. -Iinclude -Ilinsys $(OPT)
63
+ ifneq ($(ISWINDOWS), 1)
64
+ override CFLAGS += -fPIC
65
+ endif
66
+
67
+ LINSYS = linsys
68
+ DIRSRC = $(LINSYS)/cpu/direct
69
+ INDIRSRC = $(LINSYS)/cpu/indirect
70
+ GPUDIR = $(LINSYS)/gpu/direct
71
+ GPUINDIR = $(LINSYS)/gpu/indirect
72
+
73
+ EXTSRC = $(LINSYS)/external
74
+
75
+ OUT = out
76
+ AR = ar
77
+ ARFLAGS = rv
78
+ ARCHIVE = $(AR) $(ARFLAGS)
79
+ RANLIB = ranlib
80
+ INSTALL = install
81
+
82
+ ifeq ($(PREFIX),)
83
+ PREFIX = /usr/local
84
+ endif
85
+
86
+ OPT_FLAGS =
87
+ ########### OPTIONAL FLAGS ##########
88
+ # these can all be override from the command line
89
+ # e.g. make DLONG=1 will override the setting below
90
+ DLONG = 0
91
+ ifneq ($(DLONG), 0)
92
+ OPT_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
93
+ endif
94
+ CTRLC = 1
95
+ ifneq ($(CTRLC), 0)
96
+ OPT_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
97
+ endif
98
+ SFLOAT = 0
99
+ ifneq ($(SFLOAT), 0)
100
+ OPT_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
101
+ endif
102
+ NOVALIDATE = 0
103
+ ifneq ($(NOVALIDATE), 0)
104
+ OPT_FLAGS += -DNOVALIDATE=$(NOVALIDATE)$ # remove data validation step
105
+ endif
106
+ NOTIMER = 0
107
+ ifneq ($(NOTIMER), 0)
108
+ OPT_FLAGS += -DNOTIMER=$(NOTIMER) # no timing, times reported as nan
109
+ endif
110
+ COPYAMATRIX = 1
111
+ ifneq ($(COPYAMATRIX), 0)
112
+ OPT_FLAGS += -DCOPYAMATRIX=$(COPYAMATRIX) # if normalize, copy A
113
+ endif
114
+ GPU_TRANSPOSE_MAT = 1
115
+ ifneq ($(GPU_TRANSPOSE_MAT), 0)
116
+ OPT_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # tranpose A mat in GPU memory
117
+ endif
118
+
119
+ ### VERBOSITY LEVELS: 0,1,2
120
+ EXTRA_VERBOSE = 0
121
+ ifneq ($(EXTRA_VERBOSE), 0)
122
+ OPT_FLAGS += -DEXTRA_VERBOSE=$(EXTRA_VERBOSE) # extra verbosity level
123
+ endif
124
+
125
+ ############ OPENMP: ############
126
+ # set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies):
127
+ # set the number of threads to, for example, 4 by entering the command:
128
+ # export OMP_NUM_THREADS=4
129
+
130
+ USE_OPENMP = 0
131
+ ifneq ($(USE_OPENMP), 0)
132
+ override CFLAGS += -fopenmp
133
+ LDFLAGS += -lgomp
134
+ endif
135
+
136
+ ############ SDPS: BLAS + LAPACK ############
137
+ # set USE_LAPACK = 1 below to enable solving SDPs
138
+ # NB: point the libraries to the locations where
139
+ # you have blas and lapack installed
140
+
141
+ USE_LAPACK = 1
142
+ ifneq ($(USE_LAPACK), 0)
143
+ # edit these for your setup:
144
+ BLASLDFLAGS = -lblas -llapack #-lgfortran
145
+ LDFLAGS += $(BLASLDFLAGS)
146
+ OPT_FLAGS += -DUSE_LAPACK
147
+
148
+ BLAS64 = 0
149
+ ifneq ($(BLAS64), 0)
150
+ OPT_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
151
+ endif
152
+
153
+ NOBLASSUFFIX = 0
154
+ ifneq ($(NOBLASSUFFIX), 0)
155
+ OPT_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
156
+ endif
157
+
158
+ BLASSUFFIX = "_"
159
+ ifneq ($(BLASSUFFIX), "_")
160
+ OPT_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
161
+ endif
162
+ endif
163
+
164
+ MATLAB_MEX_FILE = 0
165
+ ifneq ($(MATLAB_MEX_FILE), 0)
166
+ OPT_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
167
+ endif
168
+ PYTHON = 0
169
+ ifneq ($(PYTHON), 0)
170
+ OPT_FLAGS += -DPYTHON=$(PYTHON) # python extension
171
+ endif
172
+ USING_R = 0
173
+ ifneq ($(USING_R), 0)
174
+ OPT_FLAGS += -DUSING_R=$(USING_R) # R extension
175
+ endif
176
+
177
+ # debug to see var values, e.g. 'make print-OBJECTS' shows OBJECTS value
178
+ print-%: ; @echo $*=$($*)
179
+
180
+ override CFLAGS += $(OPT_FLAGS)
181
+ CUDAFLAGS += $(OPT_FLAGS)
@@ -0,0 +1,224 @@
1
+ #include "aa.h"
2
+ #include "scs_blas.h"
3
+
4
+ /* This file uses Anderson acceleration to improve the convergence of
5
+ * a fixed point mapping.
6
+ * At each iteration we need to solve a (small) linear system, we
7
+ * do this using LAPACK ?gesv.
8
+ */
9
+
10
+ #ifndef USE_LAPACK
11
+
12
+ typedef void * ACCEL_WORK;
13
+
14
+ AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; }
15
+ aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; }
16
+ void aa_finish(AaWork *a) {}
17
+
18
+ #else
19
+
20
+ /* contains the necessary parameters to perform aa at each step */
21
+ struct ACCEL_WORK {
22
+ aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
23
+ aa_int k; /* aa memory */
24
+ aa_int l; /* variable dimension */
25
+ aa_int iter; /* current iteration */
26
+
27
+ aa_float *x; /* x input to map*/
28
+ aa_float *f; /* f(x) output of map */
29
+ aa_float *g; /* x - f(x) */
30
+
31
+ /* from previous iteration */
32
+ aa_float *g_prev; /* x - f(x) */
33
+
34
+ aa_float *y; /* g - g_prev */
35
+ aa_float *s; /* x - x_prev */
36
+ aa_float *d; /* f - f_prev */
37
+
38
+ aa_float *Y; /* matrix of stacked y values */
39
+ aa_float *S; /* matrix of stacked s values */
40
+ aa_float *D; /* matrix of stacked d values = (S-Y) */
41
+ aa_float *M; /* S'Y or Y'Y depending on type of aa */
42
+
43
+ /* workspace variables */
44
+ aa_float *work;
45
+ blas_int *ipiv;
46
+ };
47
+
48
+ /* BLAS functions used */
49
+ aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
50
+ void BLAS(axpy)(blas_int *n, aa_float *a, const aa_float *x, blas_int *incx,
51
+ aa_float *y, blas_int *incy);
52
+ void BLAS(gemv)(const char *trans, const blas_int *m, const blas_int *n,
53
+ const aa_float *alpha, const aa_float *a, const blas_int *lda,
54
+ const aa_float *x, const blas_int *incx, const aa_float *beta,
55
+ aa_float *y, const blas_int *incy);
56
+ void BLAS(gesv)(blas_int *n, blas_int *nrhs, aa_float *a, blas_int *lda,
57
+ blas_int *ipiv, aa_float *b, blas_int *ldb, blas_int *info);
58
+ void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
59
+ blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
60
+ blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
61
+ aa_float *c, blas_int *ldc);
62
+
63
+ /* sets a->M to S'Y or Y'Y depending on type of aa used */
64
+ static void set_m(AaWork *a) {
65
+ blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k;
66
+ aa_float onef = 1.0, zerof = 0.0;
67
+ BLAS(gemm)
68
+ ("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl,
69
+ &zerof, a->M, &bk);
70
+ }
71
+
72
+ /* updates the workspace parameters for aa for this iteration */
73
+ static void update_accel_params(const aa_float *x, const aa_float *f,
74
+ AaWork *a) {
75
+ /* at the start a->x = x_prev and a->f = f_prev */
76
+ aa_int idx = a->iter % a->k;
77
+ aa_int l = a->l;
78
+
79
+ blas_int one = 1;
80
+ blas_int bl = (blas_int)l;
81
+ aa_float neg_onef = -1.0;
82
+
83
+ /* g = x */
84
+ memcpy(a->g, x, sizeof(aa_float) * l);
85
+ /* s = x */
86
+ memcpy(a->s, x, sizeof(aa_float) * l);
87
+ /* d = f */
88
+ memcpy(a->d, f, sizeof(aa_float) * l);
89
+ /* g -= f */
90
+ BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one);
91
+ /* s -= x_prev */
92
+ BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one);
93
+ /* d -= f_prev */
94
+ BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one);
95
+
96
+ /* g, s, d correct here */
97
+
98
+ /* y = g */
99
+ memcpy(a->y, a->g, sizeof(aa_float) * l);
100
+ /* y -= g_prev */
101
+ BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one);
102
+
103
+ /* y correct here */
104
+
105
+ /* copy y into idx col of Y */
106
+ memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l);
107
+ /* copy s into idx col of S */
108
+ memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l);
109
+ /* copy d into idx col of D */
110
+ memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l);
111
+
112
+ /* Y, S,D correct here */
113
+
114
+ memcpy(a->f, f, sizeof(aa_float) * l);
115
+ memcpy(a->x, x, sizeof(aa_float) * l);
116
+
117
+ /* x, f correct here */
118
+
119
+ /* set M = S'*Y */
120
+ set_m(a);
121
+
122
+ /* M correct here */
123
+
124
+ memcpy(a->g_prev, a->g, sizeof(aa_float) * l);
125
+
126
+ /* g_prev set for next iter here */
127
+ }
128
+
129
+ /* solves the system of equations to perform the aa update
130
+ * at the end f contains the next iterate to be returned
131
+ */
132
+ static aa_int solve(aa_float *f, AaWork *a, aa_int len) {
133
+ blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len,
134
+ bk = (blas_int)a->k;
135
+ aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm;
136
+ /* work = S'g or Y'g */
137
+ BLAS(gemv)
138
+ ("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof,
139
+ a->work, &one);
140
+ /* work = M \ work, where M = S'Y or M = Y'Y */
141
+ BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info);
142
+ nrm = BLAS(nrm2)(&bk, a->work, &one);
143
+ if (info < 0 || nrm >= MAX_AA_NRM) {
144
+ #if EXTRA_VERBOSE > 0
145
+ scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n",
146
+ a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm);
147
+ #endif
148
+ return -1;
149
+ }
150
+ /* if solve was successful then set f -= D * work */
151
+ BLAS(gemv)
152
+ ("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one);
153
+ return (aa_int)info;
154
+ }
155
+
156
+ /*
157
+ * API functions below this line, see aa.h for descriptions.
158
+ */
159
+ AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) {
160
+ AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
161
+ if (!a) {
162
+ scs_printf("Failed to allocate memory for AA.\n");
163
+ return (void *)0;
164
+ }
165
+ a->type1 = type1;
166
+ a->iter = 0;
167
+ a->l = l;
168
+ a->k = aa_mem;
169
+ if (a->k <= 0) {
170
+ return a;
171
+ }
172
+
173
+ a->x = (aa_float *)calloc(a->l, sizeof(aa_float));
174
+ a->f = (aa_float *)calloc(a->l, sizeof(aa_float));
175
+ a->g = (aa_float *)calloc(a->l, sizeof(aa_float));
176
+
177
+ a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float));
178
+
179
+ a->y = (aa_float *)calloc(a->l, sizeof(aa_float));
180
+ a->s = (aa_float *)calloc(a->l, sizeof(aa_float));
181
+ a->d = (aa_float *)calloc(a->l, sizeof(aa_float));
182
+
183
+ a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
184
+ a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
185
+ a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
186
+
187
+ a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float));
188
+ a->work = (aa_float *)calloc(a->k, sizeof(aa_float));
189
+ a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int));
190
+ return a;
191
+ }
192
+
193
+ aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
194
+ if (a->k <= 0) {
195
+ return 0;
196
+ }
197
+ update_accel_params(x, f, a);
198
+ if (a->iter++ == 0) {
199
+ return 0;
200
+ }
201
+ /* solve linear system, new point overwrites f if successful */
202
+ return solve(f, a, MIN(a->iter - 1, a->k));
203
+ }
204
+
205
+ void aa_finish(AaWork *a) {
206
+ if (a) {
207
+ free(a->x);
208
+ free(a->f);
209
+ free(a->g);
210
+ free(a->g_prev);
211
+ free(a->y);
212
+ free(a->s);
213
+ free(a->d);
214
+ free(a->Y);
215
+ free(a->S);
216
+ free(a->D);
217
+ free(a->M);
218
+ free(a->work);
219
+ free(a->ipiv);
220
+ free(a);
221
+ }
222
+ }
223
+
224
+ #endif
Binary file
@@ -0,0 +1,802 @@
1
+ #include "cones.h"
2
+
3
+ #include "linalg.h"
4
+ #include "scs.h"
5
+ #include "scs_blas.h" /* contains BLAS(X) macros and type info */
6
+ #include "util.h"
7
+
8
+ #define CONE_RATE (2)
9
+ #define CONE_TOL (1e-8)
10
+ #define CONE_THRESH (1e-6)
11
+ #define EXP_CONE_MAX_ITERS (100)
12
+ #define POW_CONE_MAX_ITERS (20)
13
+
14
+ #ifdef USE_LAPACK
15
+ void BLAS(syevr)(const char *jobz, const char *range, const char *uplo,
16
+ blas_int *n, scs_float *a, blas_int *lda, scs_float *vl,
17
+ scs_float *vu, blas_int *il, blas_int *iu, scs_float *abstol,
18
+ blas_int *m, scs_float *w, scs_float *z, blas_int *ldz,
19
+ blas_int *isuppz, scs_float *work, blas_int *lwork,
20
+ blas_int *iwork, blas_int *liwork, blas_int *info);
21
+ void BLAS(syr)(const char *uplo, const blas_int *n, const scs_float *alpha,
22
+ const scs_float *x, const blas_int *incx, scs_float *a,
23
+ const blas_int *lda);
24
+ void BLAS(scal)(const blas_int *n, const scs_float *sa, scs_float *sx,
25
+ const blas_int *incx);
26
+ scs_float BLAS(nrm2)(const blas_int *n, scs_float *x, const blas_int *incx);
27
+ #endif
28
+
29
+ static scs_int get_sd_cone_size(scs_int s) { return (s * (s + 1)) / 2; }
30
+
31
+ /*
32
+ * boundaries will contain array of indices of rows of A corresponding to
33
+ * cone boundaries, boundaries[0] is starting index for cones of size strictly
34
+ * larger than 1
35
+ * returns length of boundaries array, boundaries malloc-ed here so should be
36
+ * freed
37
+ */
38
+ scs_int SCS(get_cone_boundaries)(const ScsCone *k, scs_int **boundaries) {
39
+ scs_int i, count = 0;
40
+ scs_int len = 1 + k->qsize + k->ssize + k->ed + k->ep + k->psize;
41
+ scs_int *b = (scs_int *)scs_calloc(len, sizeof(scs_int));
42
+ b[count] = k->f + k->l;
43
+ count += 1;
44
+ if (k->qsize > 0) {
45
+ memcpy(&b[count], k->q, k->qsize * sizeof(scs_int));
46
+ }
47
+ count += k->qsize;
48
+ for (i = 0; i < k->ssize; ++i) {
49
+ b[count + i] = get_sd_cone_size(k->s[i]);
50
+ }
51
+ count += k->ssize;
52
+ for (i = 0; i < k->ep + k->ed; ++i) {
53
+ b[count + i] = 3;
54
+ }
55
+ count += k->ep + k->ed;
56
+ for (i = 0; i < k->psize; ++i) {
57
+ b[count + i] = 3;
58
+ }
59
+ count += k->psize;
60
+ *boundaries = b;
61
+ return len;
62
+ }
63
+
64
+ static scs_int get_full_cone_dims(const ScsCone *k) {
65
+ scs_int i, c = 0;
66
+ if (k->f) {
67
+ c += k->f;
68
+ }
69
+ if (k->l) {
70
+ c += k->l;
71
+ }
72
+ if (k->qsize && k->q) {
73
+ for (i = 0; i < k->qsize; ++i) {
74
+ c += k->q[i];
75
+ }
76
+ }
77
+ if (k->ssize && k->s) {
78
+ for (i = 0; i < k->ssize; ++i) {
79
+ c += get_sd_cone_size(k->s[i]);
80
+ }
81
+ }
82
+ if (k->ed) {
83
+ c += 3 * k->ed;
84
+ }
85
+ if (k->ep) {
86
+ c += 3 * k->ep;
87
+ }
88
+ if (k->p) {
89
+ c += 3 * k->psize;
90
+ }
91
+ return c;
92
+ }
93
+
94
+ scs_int SCS(validate_cones)(const ScsData *d, const ScsCone *k) {
95
+ scs_int i;
96
+ if (get_full_cone_dims(k) != d->m) {
97
+ scs_printf("cone dimensions %li not equal to num rows in A = m = %li\n",
98
+ (long)get_full_cone_dims(k), (long)d->m);
99
+ return -1;
100
+ }
101
+ if (k->f && k->f < 0) {
102
+ scs_printf("free cone error\n");
103
+ return -1;
104
+ }
105
+ if (k->l && k->l < 0) {
106
+ scs_printf("lp cone error\n");
107
+ return -1;
108
+ }
109
+ if (k->qsize && k->q) {
110
+ if (k->qsize < 0) {
111
+ scs_printf("soc cone error\n");
112
+ return -1;
113
+ }
114
+ for (i = 0; i < k->qsize; ++i) {
115
+ if (k->q[i] < 0) {
116
+ scs_printf("soc cone error\n");
117
+ return -1;
118
+ }
119
+ }
120
+ }
121
+ if (k->ssize && k->s) {
122
+ if (k->ssize < 0) {
123
+ scs_printf("sd cone error\n");
124
+ return -1;
125
+ }
126
+ for (i = 0; i < k->ssize; ++i) {
127
+ if (k->s[i] < 0) {
128
+ scs_printf("sd cone error\n");
129
+ return -1;
130
+ }
131
+ }
132
+ }
133
+ if (k->ed && k->ed < 0) {
134
+ scs_printf("ep cone error\n");
135
+ return -1;
136
+ }
137
+ if (k->ep && k->ep < 0) {
138
+ scs_printf("ed cone error\n");
139
+ return -1;
140
+ }
141
+ if (k->psize && k->p) {
142
+ if (k->psize < 0) {
143
+ scs_printf("power cone error\n");
144
+ return -1;
145
+ }
146
+ for (i = 0; i < k->psize; ++i) {
147
+ if (k->p[i] < -1 || k->p[i] > 1) {
148
+ scs_printf("power cone error, values must be in [-1,1]\n");
149
+ return -1;
150
+ }
151
+ }
152
+ }
153
+ return 0;
154
+ }
155
+
156
+ char *SCS(get_cone_summary)(const ScsInfo *info, ScsConeWork *c) {
157
+ char *str = (char *)scs_malloc(sizeof(char) * 64);
158
+ sprintf(str, "\tCones: avg projection time: %1.2es\n",
159
+ c->total_cone_time / (info->iter + 1) / 1e3);
160
+ c->total_cone_time = 0.0;
161
+ return str;
162
+ }
163
+
164
+ void SCS(finish_cone)(ScsConeWork *c) {
165
+ #ifdef USE_LAPACK
166
+ if (c->Xs) {
167
+ scs_free(c->Xs);
168
+ }
169
+ if (c->Z) {
170
+ scs_free(c->Z);
171
+ }
172
+ if (c->e) {
173
+ scs_free(c->e);
174
+ }
175
+ if (c->work) {
176
+ scs_free(c->work);
177
+ }
178
+ if (c->iwork) {
179
+ scs_free(c->iwork);
180
+ }
181
+ #endif
182
+ if (c) {
183
+ scs_free(c);
184
+ }
185
+ }
186
+
187
+ char *SCS(get_cone_header)(const ScsCone *k) {
188
+ char *tmp = (char *)scs_malloc(sizeof(char) * 512);
189
+ scs_int i, soc_vars, soc_blks, sd_vars, sd_blks;
190
+ sprintf(tmp, "Cones:");
191
+ if (k->f) {
192
+ sprintf(tmp + strlen(tmp), "\tprimal zero / dual free vars: %li\n",
193
+ (long)k->f);
194
+ }
195
+ if (k->l) {
196
+ sprintf(tmp + strlen(tmp), "\tlinear vars: %li\n", (long)k->l);
197
+ }
198
+ soc_vars = 0;
199
+ soc_blks = 0;
200
+ if (k->qsize && k->q) {
201
+ soc_blks = k->qsize;
202
+ for (i = 0; i < k->qsize; i++) {
203
+ soc_vars += k->q[i];
204
+ }
205
+ sprintf(tmp + strlen(tmp), "\tsoc vars: %li, soc blks: %li\n",
206
+ (long)soc_vars, (long)soc_blks);
207
+ }
208
+ sd_vars = 0;
209
+ sd_blks = 0;
210
+ if (k->ssize && k->s) {
211
+ sd_blks = k->ssize;
212
+ for (i = 0; i < k->ssize; i++) {
213
+ sd_vars += get_sd_cone_size(k->s[i]);
214
+ }
215
+ sprintf(tmp + strlen(tmp), "\tsd vars: %li, sd blks: %li\n", (long)sd_vars,
216
+ (long)sd_blks);
217
+ }
218
+ if (k->ep || k->ed) {
219
+ sprintf(tmp + strlen(tmp), "\texp vars: %li, dual exp vars: %li\n",
220
+ (long)(3 * k->ep), (long)(3 * k->ed));
221
+ }
222
+ if (k->psize && k->p) {
223
+ sprintf(tmp + strlen(tmp), "\tprimal + dual power vars: %li\n",
224
+ (long)(3 * k->psize));
225
+ }
226
+ return tmp;
227
+ }
228
+
229
+ static scs_int is_simple_semi_definite_cone(scs_int *s, scs_int ssize) {
230
+ scs_int i;
231
+ for (i = 0; i < ssize; i++) {
232
+ if (s[i] > 2) {
233
+ return 0; /* false */
234
+ }
235
+ }
236
+ return 1; /* true */
237
+ }
238
+
239
+ static scs_float exp_newton_one_d(scs_float rho, scs_float y_hat,
240
+ scs_float z_hat) {
241
+ scs_float t = MAX(-z_hat, 1e-6);
242
+ scs_float f, fp;
243
+ scs_int i;
244
+ for (i = 0; i < EXP_CONE_MAX_ITERS; ++i) {
245
+ f = t * (t + z_hat) / rho / rho - y_hat / rho + log(t / rho) + 1;
246
+ fp = (2 * t + z_hat) / rho / rho + 1 / t;
247
+
248
+ t = t - f / fp;
249
+
250
+ if (t <= -z_hat) {
251
+ return 0;
252
+ } else if (t <= 0) {
253
+ return z_hat;
254
+ } else if (ABS(f) < CONE_TOL) {
255
+ break;
256
+ }
257
+ }
258
+ return t + z_hat;
259
+ }
260
+
261
+ static void exp_solve_for_x_with_rho(scs_float *v, scs_float *x,
262
+ scs_float rho) {
263
+ x[2] = exp_newton_one_d(rho, v[1], v[2]);
264
+ x[1] = (x[2] - v[2]) * x[2] / rho;
265
+ x[0] = v[0] - rho;
266
+ }
267
+
268
+ static scs_float exp_calc_grad(scs_float *v, scs_float *x, scs_float rho) {
269
+ exp_solve_for_x_with_rho(v, x, rho);
270
+ if (x[1] <= 1e-12) {
271
+ return x[0];
272
+ }
273
+ return x[0] + x[1] * log(x[1] / x[2]);
274
+ }
275
+
276
+ static void exp_get_rho_ub(scs_float *v, scs_float *x, scs_float *ub,
277
+ scs_float *lb) {
278
+ *lb = 0;
279
+ *ub = 0.125;
280
+ while (exp_calc_grad(v, x, *ub) > 0) {
281
+ *lb = *ub;
282
+ (*ub) *= 2;
283
+ }
284
+ }
285
+
286
+ /* project onto the exponential cone, v has dimension *exactly* 3 */
287
+ static scs_int proj_exp_cone(scs_float *v) {
288
+ scs_int i;
289
+ scs_float ub, lb, rho, g, x[3];
290
+ scs_float r = v[0], s = v[1], t = v[2];
291
+ scs_float tol = CONE_TOL; /* iter < 0 ? CONE_TOL : MAX(CONE_TOL, 1 /
292
+ POWF((iter + 1), CONE_RATE)); */
293
+
294
+ /* v in cl(Kexp) */
295
+ if ((s * exp(r / s) - t <= CONE_THRESH && s > 0) ||
296
+ (r <= 0 && s == 0 && t >= 0)) {
297
+ return 0;
298
+ }
299
+
300
+ /* -v in Kexp^* */
301
+ if ((-r < 0 && r * exp(s / r) + exp(1) * t <= CONE_THRESH) ||
302
+ (-r == 0 && -s >= 0 && -t >= 0)) {
303
+ memset(v, 0, 3 * sizeof(scs_float));
304
+ return 0;
305
+ }
306
+
307
+ /* special case with analytical solution */
308
+ if (r < 0 && s < 0) {
309
+ v[1] = 0.0;
310
+ v[2] = MAX(v[2], 0);
311
+ return 0;
312
+ }
313
+
314
+ /* iterative procedure to find projection, bisects on dual variable: */
315
+ exp_get_rho_ub(v, x, &ub, &lb); /* get starting upper and lower bounds */
316
+ for (i = 0; i < EXP_CONE_MAX_ITERS; ++i) {
317
+ rho = (ub + lb) / 2; /* halfway between upper and lower bounds */
318
+ g = exp_calc_grad(v, x, rho); /* calculates gradient wrt dual var */
319
+ if (g > 0) {
320
+ lb = rho;
321
+ } else {
322
+ ub = rho;
323
+ }
324
+ if (ub - lb < tol) {
325
+ break;
326
+ }
327
+ }
328
+ /*
329
+ #if EXTRA_VERBOSE > 0
330
+ scs_printf("exponential cone proj iters %i\n", i);
331
+ #endif
332
+ */
333
+ v[0] = x[0];
334
+ v[1] = x[1];
335
+ v[2] = x[2];
336
+ return 0;
337
+ }
338
+
339
+ static scs_int set_up_sd_cone_work_space(ScsConeWork *c, const ScsCone *k) {
340
+ #ifdef USE_LAPACK
341
+ scs_int i;
342
+ blas_int n_max = 0;
343
+ scs_float eig_tol = 1e-8;
344
+ blas_int neg_one = -1;
345
+ blas_int m = 0;
346
+ blas_int info = 0;
347
+ scs_float wkopt = 0.0;
348
+ #if EXTRA_VERBOSE > 0
349
+ #define _STR_EXPAND(tok) #tok
350
+ #define _STR(tok) _STR_EXPAND(tok)
351
+ scs_printf("BLAS(func) = '%s'\n", _STR(BLAS(func)));
352
+ #endif
353
+ /* eigenvector decomp workspace */
354
+ for (i = 0; i < k->ssize; ++i) {
355
+ if (k->s[i] > n_max) {
356
+ n_max = (blas_int)k->s[i];
357
+ }
358
+ }
359
+ c->Xs = (scs_float *)scs_calloc(n_max * n_max, sizeof(scs_float));
360
+ c->Z = (scs_float *)scs_calloc(n_max * n_max, sizeof(scs_float));
361
+ c->e = (scs_float *)scs_calloc(n_max, sizeof(scs_float));
362
+ c->liwork = 0;
363
+
364
+ BLAS(syevr)
365
+ ("Vectors", "All", "Lower", &n_max, c->Xs, &n_max, SCS_NULL, SCS_NULL,
366
+ SCS_NULL, SCS_NULL, &eig_tol, &m, c->e, c->Z, &n_max, SCS_NULL, &wkopt,
367
+ &neg_one, &(c->liwork), &neg_one, &info);
368
+
369
+ if (info != 0) {
370
+ scs_printf("FATAL: syevr failure, info = %li\n", (long)info);
371
+ return -1;
372
+ }
373
+ c->lwork = (blas_int)(wkopt + 0.01); /* 0.01 for int casting safety */
374
+ c->work = (scs_float *)scs_calloc(c->lwork, sizeof(scs_float));
375
+ c->iwork = (blas_int *)scs_calloc(c->liwork, sizeof(blas_int));
376
+
377
+ if (!c->Xs || !c->Z || !c->e || !c->work || !c->iwork) {
378
+ return -1;
379
+ }
380
+ return 0;
381
+ #else
382
+ scs_printf(
383
+ "FATAL: Cannot solve SDPs with > 2x2 matrices without linked "
384
+ "blas+lapack libraries\n");
385
+ scs_printf(
386
+ "Install blas+lapack and re-compile SCS with blas+lapack library "
387
+ "locations\n");
388
+ return -1;
389
+ #endif
390
+ }
391
+
392
+ ScsConeWork *SCS(init_cone)(const ScsCone *k) {
393
+ ScsConeWork *c = (ScsConeWork *)scs_calloc(1, sizeof(ScsConeWork));
394
+ #if EXTRA_VERBOSE > 0
395
+ scs_printf("init_cone\n");
396
+ #endif
397
+ c->total_cone_time = 0.0;
398
+ if (k->ssize && k->s) {
399
+ if (!is_simple_semi_definite_cone(k->s, k->ssize) &&
400
+ set_up_sd_cone_work_space(c, k) < 0) {
401
+ SCS(finish_cone)(c);
402
+ return SCS_NULL;
403
+ }
404
+ }
405
+ #if EXTRA_VERBOSE > 0
406
+ scs_printf("init_cone complete\n");
407
+ #ifdef MATLAB_MEX_FILE
408
+ mexEvalString("drawnow;");
409
+ #endif
410
+ #endif
411
+ return c;
412
+ }
413
+
414
+ static scs_int project_2x2_sdc(scs_float *X) {
415
+ scs_float a, b, d, l1, l2, x1, x2, rad;
416
+ scs_float sqrt2 = SQRTF(2.0);
417
+ a = X[0];
418
+ b = X[1] / sqrt2;
419
+ d = X[2];
420
+
421
+ if (ABS(b) < 1e-6) { /* diagonal matrix */
422
+ X[0] = MAX(a, 0);
423
+ X[1] = 0;
424
+ X[2] = MAX(d, 0);
425
+ return 0;
426
+ }
427
+
428
+ rad = SQRTF((a - d) * (a - d) + 4 * b * b);
429
+ /* l1 >= l2 always, since rad >= 0 */
430
+ l1 = 0.5 * (a + d + rad);
431
+ l2 = 0.5 * (a + d - rad);
432
+
433
+ #if EXTRA_VERBOSE > 0
434
+ scs_printf(
435
+ "2x2 SD: a = %4f, b = %4f, (X[1] = %4f, X[2] = %4f), d = %4f, "
436
+ "rad = %4f, l1 = %4f, l2 = %4f\n",
437
+ a, b, X[1], X[2], d, rad, l1, l2);
438
+ #endif
439
+
440
+ if (l2 >= 0) { /* both eigs positive already */
441
+ return 0;
442
+ }
443
+ if (l1 <= 0) { /* both eigs negative, set to 0 */
444
+ X[0] = 0;
445
+ X[1] = 0;
446
+ X[2] = 0;
447
+ return 0;
448
+ }
449
+
450
+ /* l1 pos, l2 neg */
451
+ x1 = 1 / SQRTF(1 + (l1 - a) * (l1 - a) / b / b);
452
+ x2 = x1 * (l1 - a) / b;
453
+
454
+ X[0] = l1 * x1 * x1;
455
+ X[1] = (l1 * x1 * x2) * sqrt2;
456
+ X[2] = l1 * x2 * x2;
457
+ return 0;
458
+ }
459
+
460
+ /* size of X is get_sd_cone_size(n) */
461
+ static scs_int proj_semi_definite_cone(scs_float *X, const scs_int n,
462
+ ScsConeWork *c) {
463
+ /* project onto the positive semi-definite cone */
464
+ #ifdef USE_LAPACK
465
+ scs_int i;
466
+ blas_int one = 1;
467
+ blas_int m = 0;
468
+ blas_int nb = (blas_int)n;
469
+ blas_int nb_plus_one = (blas_int)(n + 1);
470
+ blas_int cone_sz = (blas_int)(get_sd_cone_size(n));
471
+
472
+ scs_float sqrt2 = SQRTF(2.0);
473
+ scs_float sqrt2Inv = 1.0 / sqrt2;
474
+ scs_float *Xs = c->Xs;
475
+ scs_float *Z = c->Z;
476
+ scs_float *e = c->e;
477
+ scs_float *work = c->work;
478
+ blas_int *iwork = c->iwork;
479
+ blas_int lwork = c->lwork;
480
+ blas_int liwork = c->liwork;
481
+
482
+ scs_float eig_tol = CONE_TOL; /* iter < 0 ? CONE_TOL : MAX(CONE_TOL, 1 /
483
+ POWF(iter + 1, CONE_RATE)); */
484
+ scs_float zero = 0.0;
485
+ blas_int info = 0;
486
+ scs_float vupper = 0.0;
487
+ #endif
488
+ if (n == 0) {
489
+ return 0;
490
+ }
491
+ if (n == 1) {
492
+ if (X[0] < 0.0) {
493
+ X[0] = 0.0;
494
+ }
495
+ return 0;
496
+ }
497
+ if (n == 2) {
498
+ return project_2x2_sdc(X);
499
+ }
500
+ #ifdef USE_LAPACK
501
+
502
+ memset(Xs, 0, n * n * sizeof(scs_float));
503
+ /* expand lower triangular matrix to full matrix */
504
+ for (i = 0; i < n; ++i) {
505
+ memcpy(&(Xs[i * (n + 1)]), &(X[i * n - ((i - 1) * i) / 2]),
506
+ (n - i) * sizeof(scs_float));
507
+ }
508
+ /*
509
+ rescale so projection works, and matrix norm preserved
510
+ see http://www.seas.ucla.edu/~vandenbe/publications/mlbook.pdf pg 3
511
+ */
512
+ /* scale diags by sqrt(2) */
513
+ BLAS(scal)(&nb, &sqrt2, Xs, &nb_plus_one); /* not n_squared */
514
+
515
+ /* max-eig upper bounded by frobenius norm */
516
+ vupper = 1.1 * sqrt2 *
517
+ BLAS(nrm2)(&cone_sz, X,
518
+ &one); /* mult by factor to make sure is upper bound */
519
+ vupper = MAX(vupper, 0.01);
520
+ #if EXTRA_VERBOSE > 0
521
+ SCS(print_array)(Xs, n * n, "Xs");
522
+ SCS(print_array)(X, get_sd_cone_size(n), "X");
523
+ #endif
524
+ /* Solve eigenproblem, reuse workspaces */
525
+ BLAS(syevr)
526
+ ("Vectors", "VInterval", "Lower", &nb, Xs, &nb, &zero, &vupper, SCS_NULL,
527
+ SCS_NULL, &eig_tol, &m, e, Z, &nb, SCS_NULL, work, &lwork, iwork, &liwork,
528
+ &info);
529
+ #if EXTRA_VERBOSE > 0
530
+ if (info != 0) {
531
+ scs_printf("WARN: LAPACK syevr error, info = %i\n", info);
532
+ }
533
+ scs_printf("syevr input parameter dump:\n");
534
+ scs_printf("nb = %li\n", (long)nb);
535
+ scs_printf("lwork = %li\n", (long)lwork);
536
+ scs_printf("liwork = %li\n", (long)liwork);
537
+ scs_printf("vupper = %f\n", vupper);
538
+ scs_printf("eig_tol = %e\n", eig_tol);
539
+ SCS(print_array)(e, m, "e");
540
+ SCS(print_array)(Z, m * n, "Z");
541
+ #endif
542
+ if (info < 0) {
543
+ return -1;
544
+ }
545
+
546
+ memset(Xs, 0, n * n * sizeof(scs_float));
547
+ for (i = 0; i < m; ++i) {
548
+ scs_float a = e[i];
549
+ BLAS(syr)("Lower", &nb, &a, &(Z[i * n]), &one, Xs, &nb);
550
+ }
551
+ /* scale diags by 1/sqrt(2) */
552
+ BLAS(scal)(&nb, &sqrt2Inv, Xs, &nb_plus_one); /* not n_squared */
553
+ /* extract just lower triangular matrix */
554
+ for (i = 0; i < n; ++i) {
555
+ memcpy(&(X[i * n - ((i - 1) * i) / 2]), &(Xs[i * (n + 1)]),
556
+ (n - i) * sizeof(scs_float));
557
+ }
558
+
559
+ #if EXTRA_VERBOSE > 0
560
+ SCS(print_array)(Xs, n * n, "Xs");
561
+ SCS(print_array)(X, get_sd_cone_size(n), "X");
562
+ #endif
563
+
564
+ #else
565
+ scs_printf(
566
+ "FAILURE: solving SDP with > 2x2 matrices, but no blas/lapack "
567
+ "libraries were linked!\n");
568
+ scs_printf("SCS will return nonsense!\n");
569
+ SCS(scale_array)(X, NAN, n);
570
+ return -1;
571
+ #endif
572
+ return 0;
573
+ }
574
+
575
+ static scs_float pow_calc_x(scs_float r, scs_float xh, scs_float rh,
576
+ scs_float a) {
577
+ scs_float x = 0.5 * (xh + SQRTF(xh * xh + 4 * a * (rh - r) * r));
578
+ return MAX(x, 1e-12);
579
+ }
580
+
581
+ static scs_float pow_calcdxdr(scs_float x, scs_float xh, scs_float rh,
582
+ scs_float r, scs_float a) {
583
+ return a * (rh - 2 * r) / (2 * x - xh);
584
+ }
585
+
586
+ static scs_float pow_calc_f(scs_float x, scs_float y, scs_float r,
587
+ scs_float a) {
588
+ return POWF(x, a) * POWF(y, (1 - a)) - r;
589
+ }
590
+
591
+ static scs_float pow_calc_fp(scs_float x, scs_float y, scs_float dxdr,
592
+ scs_float dydr, scs_float a) {
593
+ return POWF(x, a) * POWF(y, (1 - a)) * (a * dxdr / x + (1 - a) * dydr / y) -
594
+ 1;
595
+ }
596
+
597
+ static void proj_power_cone(scs_float *v, scs_float a) {
598
+ scs_float xh = v[0], yh = v[1], rh = ABS(v[2]);
599
+ scs_float x = 0.0, y = 0.0, r;
600
+ scs_int i;
601
+ /* v in K_a */
602
+ if (xh >= 0 && yh >= 0 &&
603
+ CONE_THRESH + POWF(xh, a) * POWF(yh, (1 - a)) >= rh) {
604
+ return;
605
+ }
606
+
607
+ /* -v in K_a^* */
608
+ if (xh <= 0 && yh <= 0 &&
609
+ CONE_THRESH + POWF(-xh, a) * POWF(-yh, 1 - a) >=
610
+ rh * POWF(a, a) * POWF(1 - a, 1 - a)) {
611
+ v[0] = v[1] = v[2] = 0;
612
+ return;
613
+ }
614
+
615
+ r = rh / 2;
616
+ for (i = 0; i < POW_CONE_MAX_ITERS; ++i) {
617
+ scs_float f, fp, dxdr, dydr;
618
+ x = pow_calc_x(r, xh, rh, a);
619
+ y = pow_calc_x(r, yh, rh, 1 - a);
620
+
621
+ f = pow_calc_f(x, y, r, a);
622
+ if (ABS(f) < CONE_TOL) {
623
+ break;
624
+ }
625
+
626
+ dxdr = pow_calcdxdr(x, xh, rh, r, a);
627
+ dydr = pow_calcdxdr(y, yh, rh, r, (1 - a));
628
+ fp = pow_calc_fp(x, y, dxdr, dydr, a);
629
+
630
+ r = MAX(r - f / fp, 0);
631
+ r = MIN(r, rh);
632
+ }
633
+ v[0] = x;
634
+ v[1] = y;
635
+ v[2] = (v[2] < 0) ? -(r) : (r);
636
+ }
637
+
638
+ /* outward facing cone projection routine, iter is outer algorithm iteration, if
639
+ iter < 0 then iter is ignored
640
+ warm_start contains guess of projection (can be set to SCS_NULL) */
641
+ scs_int SCS(proj_dual_cone)(scs_float *x, const ScsCone *k, ScsConeWork *c,
642
+ const scs_float *warm_start, scs_int iter) {
643
+ scs_int i;
644
+ scs_int count = (k->f ? k->f : 0);
645
+ SCS(timer) cone_timer;
646
+ #if EXTRA_VERBOSE > 0
647
+ SCS(timer) proj_timer;
648
+ SCS(tic)(&proj_timer);
649
+ #endif
650
+ SCS(tic)(&cone_timer);
651
+
652
+ if (k->l) {
653
+ /* project onto positive orthant */
654
+ for (i = count; i < count + k->l; ++i) {
655
+ if (x[i] < 0.0) {
656
+ x[i] = 0.0;
657
+ }
658
+ /* x[i] = (x[i] < 0.0) ? 0.0 : x[i]; */
659
+ }
660
+ count += k->l;
661
+ #if EXTRA_VERBOSE > 0
662
+ scs_printf("pos orthant proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
663
+ SCS(tic)(&proj_timer);
664
+ #endif
665
+ }
666
+
667
+ if (k->qsize && k->q) {
668
+ /* project onto SOC */
669
+ for (i = 0; i < k->qsize; ++i) {
670
+ if (k->q[i] == 0) {
671
+ continue;
672
+ }
673
+ if (k->q[i] == 1) {
674
+ if (x[count] < 0.0) {
675
+ x[count] = 0.0;
676
+ }
677
+ } else {
678
+ scs_float v1 = x[count];
679
+ scs_float s = SCS(norm)(&(x[count + 1]), k->q[i] - 1);
680
+ scs_float alpha = (s + v1) / 2.0;
681
+
682
+ if (s <= v1) { /* do nothing */
683
+ } else if (s <= -v1) {
684
+ memset(&(x[count]), 0, k->q[i] * sizeof(scs_float));
685
+ } else {
686
+ x[count] = alpha;
687
+ SCS(scale_array)(&(x[count + 1]), alpha / s, k->q[i] - 1);
688
+ }
689
+ }
690
+ count += k->q[i];
691
+ }
692
+ #if EXTRA_VERBOSE > 0
693
+ scs_printf("SOC proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
694
+ SCS(tic)(&proj_timer);
695
+ #endif
696
+ }
697
+
698
+ if (k->ssize && k->s) {
699
+ /* project onto PSD cone */
700
+ for (i = 0; i < k->ssize; ++i) {
701
+ #if EXTRA_VERBOSE > 0
702
+ scs_printf("SD proj size %li\n", (long)k->s[i]);
703
+ #endif
704
+ if (k->s[i] == 0) {
705
+ continue;
706
+ }
707
+ if (proj_semi_definite_cone(&(x[count]), k->s[i], c) < 0) {
708
+ return -1;
709
+ }
710
+ count += get_sd_cone_size(k->s[i]);
711
+ }
712
+ #if EXTRA_VERBOSE > 0
713
+ scs_printf("SD proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
714
+ SCS(tic)(&proj_timer);
715
+ #endif
716
+ }
717
+
718
+ if (k->ep) {
719
+ scs_float r, s, t;
720
+ scs_int idx;
721
+ /*
722
+ * exponential cone is not self dual, if s \in K
723
+ * then y \in K^* and so if K is the primal cone
724
+ * here we project onto K^*, via Moreau
725
+ * \Pi_C^*(y) = y + \Pi_C(-y)
726
+ */
727
+ SCS(scale_array)(&(x[count]), -1, 3 * k->ep); /* x = -x; */
728
+ #ifdef _OPENMP
729
+ #pragma omp parallel for private(r, s, t, idx)
730
+ #endif
731
+ for (i = 0; i < k->ep; ++i) {
732
+ idx = count + 3 * i;
733
+ r = x[idx];
734
+ s = x[idx + 1];
735
+ t = x[idx + 2];
736
+
737
+ proj_exp_cone(&(x[idx]));
738
+
739
+ x[idx] -= r;
740
+ x[idx + 1] -= s;
741
+ x[idx + 2] -= t;
742
+ }
743
+ count += 3 * k->ep;
744
+ #if EXTRA_VERBOSE > 0
745
+ scs_printf("EP proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
746
+ SCS(tic)(&proj_timer);
747
+ #endif
748
+ }
749
+
750
+ if (k->ed) {
751
+ /* exponential cone: */
752
+ #ifdef _OPENMP
753
+ #pragma omp parallel for
754
+ #endif
755
+ for (i = 0; i < k->ed; ++i) {
756
+ proj_exp_cone(&(x[count + 3 * i]));
757
+ }
758
+ count += 3 * k->ed;
759
+ #if EXTRA_VERBOSE > 0
760
+ scs_printf("ED proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
761
+ SCS(tic)(&proj_timer);
762
+ #endif
763
+ }
764
+
765
+ if (k->psize && k->p) {
766
+ scs_float v[3];
767
+ scs_int idx;
768
+ /* don't use openmp for power cone
769
+ ifdef _OPENMP
770
+ pragma omp parallel for private(v, idx)
771
+ endif
772
+ */
773
+ for (i = 0; i < k->psize; ++i) {
774
+ idx = count + 3 * i;
775
+ if (k->p[i] <= 0) {
776
+ /* dual power cone */
777
+ proj_power_cone(&(x[idx]), -k->p[i]);
778
+ } else {
779
+ /* primal power cone, using Moreau */
780
+ v[0] = -x[idx];
781
+ v[1] = -x[idx + 1];
782
+ v[2] = -x[idx + 2];
783
+
784
+ proj_power_cone(v, k->p[i]);
785
+
786
+ x[idx] += v[0];
787
+ x[idx + 1] += v[1];
788
+ x[idx + 2] += v[2];
789
+ }
790
+ }
791
+ count += 3 * k->psize;
792
+ #if EXTRA_VERBOSE > 0
793
+ scs_printf("Power cone proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
794
+ SCS(tic)(&proj_timer);
795
+ #endif
796
+ }
797
+ /* project onto OTHER cones */
798
+ if (c) {
799
+ c->total_cone_time += SCS(tocq)(&cone_timer);
800
+ }
801
+ return 0;
802
+ }