scs 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +12 -0
- data/LICENSE.txt +22 -0
- data/README.md +98 -0
- data/ext/scs/extconf.rb +29 -0
- data/lib/scs.rb +17 -0
- data/lib/scs/ffi.rb +117 -0
- data/lib/scs/solver.rb +173 -0
- data/lib/scs/version.rb +3 -0
- data/vendor/scs/LICENSE.txt +21 -0
- data/vendor/scs/Makefile +164 -0
- data/vendor/scs/README.md +222 -0
- data/vendor/scs/include/aa.h +56 -0
- data/vendor/scs/include/cones.h +46 -0
- data/vendor/scs/include/ctrlc.h +33 -0
- data/vendor/scs/include/glbopts.h +177 -0
- data/vendor/scs/include/linalg.h +26 -0
- data/vendor/scs/include/linsys.h +64 -0
- data/vendor/scs/include/normalize.h +18 -0
- data/vendor/scs/include/rw.h +17 -0
- data/vendor/scs/include/scs.h +161 -0
- data/vendor/scs/include/scs_blas.h +51 -0
- data/vendor/scs/include/util.h +65 -0
- data/vendor/scs/linsys/amatrix.c +305 -0
- data/vendor/scs/linsys/amatrix.h +36 -0
- data/vendor/scs/linsys/amatrix.o +0 -0
- data/vendor/scs/linsys/cpu/direct/private.c +366 -0
- data/vendor/scs/linsys/cpu/direct/private.h +26 -0
- data/vendor/scs/linsys/cpu/direct/private.o +0 -0
- data/vendor/scs/linsys/cpu/indirect/private.c +256 -0
- data/vendor/scs/linsys/cpu/indirect/private.h +31 -0
- data/vendor/scs/linsys/cpu/indirect/private.o +0 -0
- data/vendor/scs/linsys/external/amd/LICENSE.txt +934 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.c +469 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.h +254 -0
- data/vendor/scs/linsys/external/amd/SuiteSparse_config.o +0 -0
- data/vendor/scs/linsys/external/amd/amd.h +400 -0
- data/vendor/scs/linsys/external/amd/amd_1.c +180 -0
- data/vendor/scs/linsys/external/amd/amd_1.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_2.c +1842 -0
- data/vendor/scs/linsys/external/amd/amd_2.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_aat.c +184 -0
- data/vendor/scs/linsys/external/amd/amd_aat.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_control.c +64 -0
- data/vendor/scs/linsys/external/amd/amd_control.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.c +37 -0
- data/vendor/scs/linsys/external/amd/amd_defaults.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_dump.c +179 -0
- data/vendor/scs/linsys/external/amd/amd_dump.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_global.c +16 -0
- data/vendor/scs/linsys/external/amd/amd_global.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_info.c +119 -0
- data/vendor/scs/linsys/external/amd/amd_info.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_internal.h +304 -0
- data/vendor/scs/linsys/external/amd/amd_order.c +199 -0
- data/vendor/scs/linsys/external/amd/amd_order.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.c +120 -0
- data/vendor/scs/linsys/external/amd/amd_post_tree.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.c +206 -0
- data/vendor/scs/linsys/external/amd/amd_postorder.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.c +118 -0
- data/vendor/scs/linsys/external/amd/amd_preprocess.o +0 -0
- data/vendor/scs/linsys/external/amd/amd_valid.c +92 -0
- data/vendor/scs/linsys/external/amd/amd_valid.o +0 -0
- data/vendor/scs/linsys/external/amd/changes +11 -0
- data/vendor/scs/linsys/external/qdldl/LICENSE +201 -0
- data/vendor/scs/linsys/external/qdldl/README.md +120 -0
- data/vendor/scs/linsys/external/qdldl/changes +4 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.c +298 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.h +177 -0
- data/vendor/scs/linsys/external/qdldl/qdldl.o +0 -0
- data/vendor/scs/linsys/external/qdldl/qdldl_types.h +21 -0
- data/vendor/scs/linsys/gpu/gpu.c +41 -0
- data/vendor/scs/linsys/gpu/gpu.h +85 -0
- data/vendor/scs/linsys/gpu/indirect/private.c +304 -0
- data/vendor/scs/linsys/gpu/indirect/private.h +36 -0
- data/vendor/scs/scs.mk +181 -0
- data/vendor/scs/src/aa.c +224 -0
- data/vendor/scs/src/aa.o +0 -0
- data/vendor/scs/src/cones.c +802 -0
- data/vendor/scs/src/cones.o +0 -0
- data/vendor/scs/src/ctrlc.c +77 -0
- data/vendor/scs/src/ctrlc.o +0 -0
- data/vendor/scs/src/linalg.c +84 -0
- data/vendor/scs/src/linalg.o +0 -0
- data/vendor/scs/src/normalize.c +93 -0
- data/vendor/scs/src/normalize.o +0 -0
- data/vendor/scs/src/rw.c +167 -0
- data/vendor/scs/src/rw.o +0 -0
- data/vendor/scs/src/scs.c +978 -0
- data/vendor/scs/src/scs.o +0 -0
- data/vendor/scs/src/scs_version.c +5 -0
- data/vendor/scs/src/scs_version.o +0 -0
- data/vendor/scs/src/util.c +196 -0
- data/vendor/scs/src/util.o +0 -0
- data/vendor/scs/test/data/small_random_socp +0 -0
- data/vendor/scs/test/minunit.h +13 -0
- data/vendor/scs/test/problem_utils.h +93 -0
- data/vendor/scs/test/problems/rob_gauss_cov_est.h +85 -0
- data/vendor/scs/test/problems/small_lp.h +50 -0
- data/vendor/scs/test/problems/small_random_socp.h +33 -0
- data/vendor/scs/test/random_socp_prob.c +171 -0
- data/vendor/scs/test/run_from_file.c +69 -0
- data/vendor/scs/test/run_tests +2 -0
- data/vendor/scs/test/run_tests.c +32 -0
- metadata +203 -0
@@ -0,0 +1,36 @@
|
|
1
|
+
#ifndef PRIV_H_GUARD
|
2
|
+
#define PRIV_H_GUARD
|
3
|
+
|
4
|
+
#ifdef __cplusplus
|
5
|
+
extern "C" {
|
6
|
+
#endif
|
7
|
+
|
8
|
+
#include "gpu.h"
|
9
|
+
#include "glbopts.h"
|
10
|
+
#include "linalg.h"
|
11
|
+
#include "scs.h"
|
12
|
+
|
13
|
+
|
14
|
+
struct SCS_LIN_SYS_WORK {
|
15
|
+
/* reporting */
|
16
|
+
scs_int tot_cg_its;
|
17
|
+
scs_float total_solve_time;
|
18
|
+
/* ALL BELOW HOSTED ON THE GPU */
|
19
|
+
scs_float *p; /* cg iterate, n */
|
20
|
+
scs_float *r; /* cg residual, n */
|
21
|
+
scs_float *Gp; /* G * p, n */
|
22
|
+
scs_float *bg; /* b, n */
|
23
|
+
scs_float *tmp_m; /* m, used in mat_vec */
|
24
|
+
scs_float *z; /* preconditioned */
|
25
|
+
scs_float *M; /* preconditioner */
|
26
|
+
ScsGpuMatrix *Ag; /* A matrix on GPU */
|
27
|
+
ScsGpuMatrix *Agt; /* A trans matrix on GPU */
|
28
|
+
/* CUDA */
|
29
|
+
cublasHandle_t cublas_handle;
|
30
|
+
cusparseHandle_t cusparse_handle;
|
31
|
+
};
|
32
|
+
|
33
|
+
#ifdef __cplusplus
|
34
|
+
}
|
35
|
+
#endif
|
36
|
+
#endif
|
data/vendor/scs/scs.mk
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
ifeq ($(OS),Windows_NT)
|
2
|
+
UNAME = CYGWINorMINGWorMSYS
|
3
|
+
else
|
4
|
+
UNAME = $(shell uname -s)
|
5
|
+
endif
|
6
|
+
|
7
|
+
#CC = gcc
|
8
|
+
# For cross-compiling with mingw use these.
|
9
|
+
#CC = i686-w64-mingw32-gcc -m32
|
10
|
+
#CC = x86_64-w64-mingw32-gcc-4.8
|
11
|
+
CUCC = $(CC) #Don't need to use nvcc, since using cuda blas APIs
|
12
|
+
|
13
|
+
# For GPU must add cuda libs to path, e.g.
|
14
|
+
# export DYLD_LIBRARY_PATH=/usr/local/cuda/lib:$DYLD_LIBRARY_PATH
|
15
|
+
|
16
|
+
ifneq (, $(findstring CYGWIN, $(UNAME)))
|
17
|
+
ISWINDOWS := 1
|
18
|
+
else
|
19
|
+
ifneq (, $(findstring MINGW, $(UNAME)))
|
20
|
+
ISWINDOWS := 1
|
21
|
+
else
|
22
|
+
ifneq (, $(findstring MSYS, $(UNAME)))
|
23
|
+
ISWINDOWS := 1
|
24
|
+
else
|
25
|
+
ifneq (, $(findstring mingw, $(CC)))
|
26
|
+
ISWINDOWS := 1
|
27
|
+
else
|
28
|
+
ISWINDOWS := 0
|
29
|
+
endif
|
30
|
+
endif
|
31
|
+
endif
|
32
|
+
endif
|
33
|
+
|
34
|
+
ifeq ($(UNAME), Darwin)
|
35
|
+
# we're on apple, no need to link rt library
|
36
|
+
LDFLAGS += -lm
|
37
|
+
SHARED = dylib
|
38
|
+
SONAME = -install_name
|
39
|
+
else
|
40
|
+
ifeq ($(ISWINDOWS), 1)
|
41
|
+
# we're on windows (cygwin or msys)
|
42
|
+
LDFLAGS += -lm
|
43
|
+
SHARED = dll
|
44
|
+
SONAME = -soname
|
45
|
+
else
|
46
|
+
# we're on a linux system, use accurate timer provided by clock_gettime()
|
47
|
+
LDFLAGS += -lm -lrt
|
48
|
+
SHARED = so
|
49
|
+
SONAME = -soname
|
50
|
+
endif
|
51
|
+
endif
|
52
|
+
|
53
|
+
#TODO: check if this works for all platforms:
|
54
|
+
ifeq ($(CUDA_PATH), )
|
55
|
+
CUDA_PATH=/usr/local/cuda
|
56
|
+
endif
|
57
|
+
CULDFLAGS = -L$(CUDA_PATH)/lib -L$(CUDA_PATH)/lib64 -lcudart -lcublas -lcusparse
|
58
|
+
CUDAFLAGS = $(CFLAGS) -I$(CUDA_PATH)/include -Ilinsys/gpu -Wno-c++11-long-long # turn off annoying long-long warnings in cuda header files
|
59
|
+
|
60
|
+
# Add on default CFLAGS
|
61
|
+
OPT = -O3
|
62
|
+
override CFLAGS += -g -Wall -Wwrite-strings -pedantic -funroll-loops -Wstrict-prototypes -I. -Iinclude -Ilinsys $(OPT)
|
63
|
+
ifneq ($(ISWINDOWS), 1)
|
64
|
+
override CFLAGS += -fPIC
|
65
|
+
endif
|
66
|
+
|
67
|
+
LINSYS = linsys
|
68
|
+
DIRSRC = $(LINSYS)/cpu/direct
|
69
|
+
INDIRSRC = $(LINSYS)/cpu/indirect
|
70
|
+
GPUDIR = $(LINSYS)/gpu/direct
|
71
|
+
GPUINDIR = $(LINSYS)/gpu/indirect
|
72
|
+
|
73
|
+
EXTSRC = $(LINSYS)/external
|
74
|
+
|
75
|
+
OUT = out
|
76
|
+
AR = ar
|
77
|
+
ARFLAGS = rv
|
78
|
+
ARCHIVE = $(AR) $(ARFLAGS)
|
79
|
+
RANLIB = ranlib
|
80
|
+
INSTALL = install
|
81
|
+
|
82
|
+
ifeq ($(PREFIX),)
|
83
|
+
PREFIX = /usr/local
|
84
|
+
endif
|
85
|
+
|
86
|
+
OPT_FLAGS =
|
87
|
+
########### OPTIONAL FLAGS ##########
|
88
|
+
# these can all be override from the command line
|
89
|
+
# e.g. make DLONG=1 will override the setting below
|
90
|
+
DLONG = 0
|
91
|
+
ifneq ($(DLONG), 0)
|
92
|
+
OPT_FLAGS += -DDLONG=$(DLONG) # use longs rather than ints
|
93
|
+
endif
|
94
|
+
CTRLC = 1
|
95
|
+
ifneq ($(CTRLC), 0)
|
96
|
+
OPT_FLAGS += -DCTRLC=$(CTRLC) # graceful interrupts with ctrl-c
|
97
|
+
endif
|
98
|
+
SFLOAT = 0
|
99
|
+
ifneq ($(SFLOAT), 0)
|
100
|
+
OPT_FLAGS += -DSFLOAT=$(SFLOAT) # use floats rather than doubles
|
101
|
+
endif
|
102
|
+
NOVALIDATE = 0
|
103
|
+
ifneq ($(NOVALIDATE), 0)
|
104
|
+
OPT_FLAGS += -DNOVALIDATE=$(NOVALIDATE)$ # remove data validation step
|
105
|
+
endif
|
106
|
+
NOTIMER = 0
|
107
|
+
ifneq ($(NOTIMER), 0)
|
108
|
+
OPT_FLAGS += -DNOTIMER=$(NOTIMER) # no timing, times reported as nan
|
109
|
+
endif
|
110
|
+
COPYAMATRIX = 1
|
111
|
+
ifneq ($(COPYAMATRIX), 0)
|
112
|
+
OPT_FLAGS += -DCOPYAMATRIX=$(COPYAMATRIX) # if normalize, copy A
|
113
|
+
endif
|
114
|
+
GPU_TRANSPOSE_MAT = 1
|
115
|
+
ifneq ($(GPU_TRANSPOSE_MAT), 0)
|
116
|
+
OPT_FLAGS += -DGPU_TRANSPOSE_MAT=$(GPU_TRANSPOSE_MAT) # tranpose A mat in GPU memory
|
117
|
+
endif
|
118
|
+
|
119
|
+
### VERBOSITY LEVELS: 0,1,2
|
120
|
+
EXTRA_VERBOSE = 0
|
121
|
+
ifneq ($(EXTRA_VERBOSE), 0)
|
122
|
+
OPT_FLAGS += -DEXTRA_VERBOSE=$(EXTRA_VERBOSE) # extra verbosity level
|
123
|
+
endif
|
124
|
+
|
125
|
+
############ OPENMP: ############
|
126
|
+
# set USE_OPENMP = 1 to allow openmp (multi-threaded matrix multiplies):
|
127
|
+
# set the number of threads to, for example, 4 by entering the command:
|
128
|
+
# export OMP_NUM_THREADS=4
|
129
|
+
|
130
|
+
USE_OPENMP = 0
|
131
|
+
ifneq ($(USE_OPENMP), 0)
|
132
|
+
override CFLAGS += -fopenmp
|
133
|
+
LDFLAGS += -lgomp
|
134
|
+
endif
|
135
|
+
|
136
|
+
############ SDPS: BLAS + LAPACK ############
|
137
|
+
# set USE_LAPACK = 1 below to enable solving SDPs
|
138
|
+
# NB: point the libraries to the locations where
|
139
|
+
# you have blas and lapack installed
|
140
|
+
|
141
|
+
USE_LAPACK = 1
|
142
|
+
ifneq ($(USE_LAPACK), 0)
|
143
|
+
# edit these for your setup:
|
144
|
+
BLASLDFLAGS = -lblas -llapack #-lgfortran
|
145
|
+
LDFLAGS += $(BLASLDFLAGS)
|
146
|
+
OPT_FLAGS += -DUSE_LAPACK
|
147
|
+
|
148
|
+
BLAS64 = 0
|
149
|
+
ifneq ($(BLAS64), 0)
|
150
|
+
OPT_FLAGS += -DBLAS64=$(BLAS64) # if blas/lapack lib uses 64 bit ints
|
151
|
+
endif
|
152
|
+
|
153
|
+
NOBLASSUFFIX = 0
|
154
|
+
ifneq ($(NOBLASSUFFIX), 0)
|
155
|
+
OPT_FLAGS += -DNOBLASSUFFIX=$(NOBLASSUFFIX) # hack to strip blas suffix
|
156
|
+
endif
|
157
|
+
|
158
|
+
BLASSUFFIX = "_"
|
159
|
+
ifneq ($(BLASSUFFIX), "_")
|
160
|
+
OPT_FLAGS += -DBLASSUFFIX=$(BLASSUFFIX) # blas suffix (underscore usually)
|
161
|
+
endif
|
162
|
+
endif
|
163
|
+
|
164
|
+
MATLAB_MEX_FILE = 0
|
165
|
+
ifneq ($(MATLAB_MEX_FILE), 0)
|
166
|
+
OPT_FLAGS += -DMATLAB_MEX_FILE=$(MATLAB_MEX_FILE) # matlab mex
|
167
|
+
endif
|
168
|
+
PYTHON = 0
|
169
|
+
ifneq ($(PYTHON), 0)
|
170
|
+
OPT_FLAGS += -DPYTHON=$(PYTHON) # python extension
|
171
|
+
endif
|
172
|
+
USING_R = 0
|
173
|
+
ifneq ($(USING_R), 0)
|
174
|
+
OPT_FLAGS += -DUSING_R=$(USING_R) # R extension
|
175
|
+
endif
|
176
|
+
|
177
|
+
# debug to see var values, e.g. 'make print-OBJECTS' shows OBJECTS value
|
178
|
+
print-%: ; @echo $*=$($*)
|
179
|
+
|
180
|
+
override CFLAGS += $(OPT_FLAGS)
|
181
|
+
CUDAFLAGS += $(OPT_FLAGS)
|
data/vendor/scs/src/aa.c
ADDED
@@ -0,0 +1,224 @@
|
|
1
|
+
#include "aa.h"
|
2
|
+
#include "scs_blas.h"
|
3
|
+
|
4
|
+
/* This file uses Anderson acceleration to improve the convergence of
|
5
|
+
* a fixed point mapping.
|
6
|
+
* At each iteration we need to solve a (small) linear system, we
|
7
|
+
* do this using LAPACK ?gesv.
|
8
|
+
*/
|
9
|
+
|
10
|
+
#ifndef USE_LAPACK
|
11
|
+
|
12
|
+
typedef void * ACCEL_WORK;
|
13
|
+
|
14
|
+
AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; }
|
15
|
+
aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; }
|
16
|
+
void aa_finish(AaWork *a) {}
|
17
|
+
|
18
|
+
#else
|
19
|
+
|
20
|
+
/* contains the necessary parameters to perform aa at each step */
|
21
|
+
struct ACCEL_WORK {
|
22
|
+
aa_int type1; /* bool, if true type 1 aa otherwise type 2 */
|
23
|
+
aa_int k; /* aa memory */
|
24
|
+
aa_int l; /* variable dimension */
|
25
|
+
aa_int iter; /* current iteration */
|
26
|
+
|
27
|
+
aa_float *x; /* x input to map*/
|
28
|
+
aa_float *f; /* f(x) output of map */
|
29
|
+
aa_float *g; /* x - f(x) */
|
30
|
+
|
31
|
+
/* from previous iteration */
|
32
|
+
aa_float *g_prev; /* x - f(x) */
|
33
|
+
|
34
|
+
aa_float *y; /* g - g_prev */
|
35
|
+
aa_float *s; /* x - x_prev */
|
36
|
+
aa_float *d; /* f - f_prev */
|
37
|
+
|
38
|
+
aa_float *Y; /* matrix of stacked y values */
|
39
|
+
aa_float *S; /* matrix of stacked s values */
|
40
|
+
aa_float *D; /* matrix of stacked d values = (S-Y) */
|
41
|
+
aa_float *M; /* S'Y or Y'Y depending on type of aa */
|
42
|
+
|
43
|
+
/* workspace variables */
|
44
|
+
aa_float *work;
|
45
|
+
blas_int *ipiv;
|
46
|
+
};
|
47
|
+
|
48
|
+
/* BLAS functions used */
|
49
|
+
aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx);
|
50
|
+
void BLAS(axpy)(blas_int *n, aa_float *a, const aa_float *x, blas_int *incx,
|
51
|
+
aa_float *y, blas_int *incy);
|
52
|
+
void BLAS(gemv)(const char *trans, const blas_int *m, const blas_int *n,
|
53
|
+
const aa_float *alpha, const aa_float *a, const blas_int *lda,
|
54
|
+
const aa_float *x, const blas_int *incx, const aa_float *beta,
|
55
|
+
aa_float *y, const blas_int *incy);
|
56
|
+
void BLAS(gesv)(blas_int *n, blas_int *nrhs, aa_float *a, blas_int *lda,
|
57
|
+
blas_int *ipiv, aa_float *b, blas_int *ldb, blas_int *info);
|
58
|
+
void BLAS(gemm)(const char *transa, const char *transb, blas_int *m,
|
59
|
+
blas_int *n, blas_int *k, aa_float *alpha, aa_float *a,
|
60
|
+
blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta,
|
61
|
+
aa_float *c, blas_int *ldc);
|
62
|
+
|
63
|
+
/* sets a->M to S'Y or Y'Y depending on type of aa used */
|
64
|
+
static void set_m(AaWork *a) {
|
65
|
+
blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k;
|
66
|
+
aa_float onef = 1.0, zerof = 0.0;
|
67
|
+
BLAS(gemm)
|
68
|
+
("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl,
|
69
|
+
&zerof, a->M, &bk);
|
70
|
+
}
|
71
|
+
|
72
|
+
/* updates the workspace parameters for aa for this iteration */
|
73
|
+
static void update_accel_params(const aa_float *x, const aa_float *f,
|
74
|
+
AaWork *a) {
|
75
|
+
/* at the start a->x = x_prev and a->f = f_prev */
|
76
|
+
aa_int idx = a->iter % a->k;
|
77
|
+
aa_int l = a->l;
|
78
|
+
|
79
|
+
blas_int one = 1;
|
80
|
+
blas_int bl = (blas_int)l;
|
81
|
+
aa_float neg_onef = -1.0;
|
82
|
+
|
83
|
+
/* g = x */
|
84
|
+
memcpy(a->g, x, sizeof(aa_float) * l);
|
85
|
+
/* s = x */
|
86
|
+
memcpy(a->s, x, sizeof(aa_float) * l);
|
87
|
+
/* d = f */
|
88
|
+
memcpy(a->d, f, sizeof(aa_float) * l);
|
89
|
+
/* g -= f */
|
90
|
+
BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one);
|
91
|
+
/* s -= x_prev */
|
92
|
+
BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one);
|
93
|
+
/* d -= f_prev */
|
94
|
+
BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one);
|
95
|
+
|
96
|
+
/* g, s, d correct here */
|
97
|
+
|
98
|
+
/* y = g */
|
99
|
+
memcpy(a->y, a->g, sizeof(aa_float) * l);
|
100
|
+
/* y -= g_prev */
|
101
|
+
BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one);
|
102
|
+
|
103
|
+
/* y correct here */
|
104
|
+
|
105
|
+
/* copy y into idx col of Y */
|
106
|
+
memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l);
|
107
|
+
/* copy s into idx col of S */
|
108
|
+
memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l);
|
109
|
+
/* copy d into idx col of D */
|
110
|
+
memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l);
|
111
|
+
|
112
|
+
/* Y, S,D correct here */
|
113
|
+
|
114
|
+
memcpy(a->f, f, sizeof(aa_float) * l);
|
115
|
+
memcpy(a->x, x, sizeof(aa_float) * l);
|
116
|
+
|
117
|
+
/* x, f correct here */
|
118
|
+
|
119
|
+
/* set M = S'*Y */
|
120
|
+
set_m(a);
|
121
|
+
|
122
|
+
/* M correct here */
|
123
|
+
|
124
|
+
memcpy(a->g_prev, a->g, sizeof(aa_float) * l);
|
125
|
+
|
126
|
+
/* g_prev set for next iter here */
|
127
|
+
}
|
128
|
+
|
129
|
+
/* solves the system of equations to perform the aa update
|
130
|
+
* at the end f contains the next iterate to be returned
|
131
|
+
*/
|
132
|
+
static aa_int solve(aa_float *f, AaWork *a, aa_int len) {
|
133
|
+
blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len,
|
134
|
+
bk = (blas_int)a->k;
|
135
|
+
aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm;
|
136
|
+
/* work = S'g or Y'g */
|
137
|
+
BLAS(gemv)
|
138
|
+
("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof,
|
139
|
+
a->work, &one);
|
140
|
+
/* work = M \ work, where M = S'Y or M = Y'Y */
|
141
|
+
BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info);
|
142
|
+
nrm = BLAS(nrm2)(&bk, a->work, &one);
|
143
|
+
if (info < 0 || nrm >= MAX_AA_NRM) {
|
144
|
+
#if EXTRA_VERBOSE > 0
|
145
|
+
scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n",
|
146
|
+
a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm);
|
147
|
+
#endif
|
148
|
+
return -1;
|
149
|
+
}
|
150
|
+
/* if solve was successful then set f -= D * work */
|
151
|
+
BLAS(gemv)
|
152
|
+
("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one);
|
153
|
+
return (aa_int)info;
|
154
|
+
}
|
155
|
+
|
156
|
+
/*
|
157
|
+
* API functions below this line, see aa.h for descriptions.
|
158
|
+
*/
|
159
|
+
AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) {
|
160
|
+
AaWork *a = (AaWork *)calloc(1, sizeof(AaWork));
|
161
|
+
if (!a) {
|
162
|
+
scs_printf("Failed to allocate memory for AA.\n");
|
163
|
+
return (void *)0;
|
164
|
+
}
|
165
|
+
a->type1 = type1;
|
166
|
+
a->iter = 0;
|
167
|
+
a->l = l;
|
168
|
+
a->k = aa_mem;
|
169
|
+
if (a->k <= 0) {
|
170
|
+
return a;
|
171
|
+
}
|
172
|
+
|
173
|
+
a->x = (aa_float *)calloc(a->l, sizeof(aa_float));
|
174
|
+
a->f = (aa_float *)calloc(a->l, sizeof(aa_float));
|
175
|
+
a->g = (aa_float *)calloc(a->l, sizeof(aa_float));
|
176
|
+
|
177
|
+
a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float));
|
178
|
+
|
179
|
+
a->y = (aa_float *)calloc(a->l, sizeof(aa_float));
|
180
|
+
a->s = (aa_float *)calloc(a->l, sizeof(aa_float));
|
181
|
+
a->d = (aa_float *)calloc(a->l, sizeof(aa_float));
|
182
|
+
|
183
|
+
a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
|
184
|
+
a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
|
185
|
+
a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float));
|
186
|
+
|
187
|
+
a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float));
|
188
|
+
a->work = (aa_float *)calloc(a->k, sizeof(aa_float));
|
189
|
+
a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int));
|
190
|
+
return a;
|
191
|
+
}
|
192
|
+
|
193
|
+
aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) {
|
194
|
+
if (a->k <= 0) {
|
195
|
+
return 0;
|
196
|
+
}
|
197
|
+
update_accel_params(x, f, a);
|
198
|
+
if (a->iter++ == 0) {
|
199
|
+
return 0;
|
200
|
+
}
|
201
|
+
/* solve linear system, new point overwrites f if successful */
|
202
|
+
return solve(f, a, MIN(a->iter - 1, a->k));
|
203
|
+
}
|
204
|
+
|
205
|
+
void aa_finish(AaWork *a) {
|
206
|
+
if (a) {
|
207
|
+
free(a->x);
|
208
|
+
free(a->f);
|
209
|
+
free(a->g);
|
210
|
+
free(a->g_prev);
|
211
|
+
free(a->y);
|
212
|
+
free(a->s);
|
213
|
+
free(a->d);
|
214
|
+
free(a->Y);
|
215
|
+
free(a->S);
|
216
|
+
free(a->D);
|
217
|
+
free(a->M);
|
218
|
+
free(a->work);
|
219
|
+
free(a->ipiv);
|
220
|
+
free(a);
|
221
|
+
}
|
222
|
+
}
|
223
|
+
|
224
|
+
#endif
|
data/vendor/scs/src/aa.o
ADDED
Binary file
|
@@ -0,0 +1,802 @@
|
|
1
|
+
#include "cones.h"
|
2
|
+
|
3
|
+
#include "linalg.h"
|
4
|
+
#include "scs.h"
|
5
|
+
#include "scs_blas.h" /* contains BLAS(X) macros and type info */
|
6
|
+
#include "util.h"
|
7
|
+
|
8
|
+
#define CONE_RATE (2)
|
9
|
+
#define CONE_TOL (1e-8)
|
10
|
+
#define CONE_THRESH (1e-6)
|
11
|
+
#define EXP_CONE_MAX_ITERS (100)
|
12
|
+
#define POW_CONE_MAX_ITERS (20)
|
13
|
+
|
14
|
+
#ifdef USE_LAPACK
|
15
|
+
void BLAS(syevr)(const char *jobz, const char *range, const char *uplo,
|
16
|
+
blas_int *n, scs_float *a, blas_int *lda, scs_float *vl,
|
17
|
+
scs_float *vu, blas_int *il, blas_int *iu, scs_float *abstol,
|
18
|
+
blas_int *m, scs_float *w, scs_float *z, blas_int *ldz,
|
19
|
+
blas_int *isuppz, scs_float *work, blas_int *lwork,
|
20
|
+
blas_int *iwork, blas_int *liwork, blas_int *info);
|
21
|
+
void BLAS(syr)(const char *uplo, const blas_int *n, const scs_float *alpha,
|
22
|
+
const scs_float *x, const blas_int *incx, scs_float *a,
|
23
|
+
const blas_int *lda);
|
24
|
+
void BLAS(scal)(const blas_int *n, const scs_float *sa, scs_float *sx,
|
25
|
+
const blas_int *incx);
|
26
|
+
scs_float BLAS(nrm2)(const blas_int *n, scs_float *x, const blas_int *incx);
|
27
|
+
#endif
|
28
|
+
|
29
|
+
static scs_int get_sd_cone_size(scs_int s) { return (s * (s + 1)) / 2; }
|
30
|
+
|
31
|
+
/*
|
32
|
+
* boundaries will contain array of indices of rows of A corresponding to
|
33
|
+
* cone boundaries, boundaries[0] is starting index for cones of size strictly
|
34
|
+
* larger than 1
|
35
|
+
* returns length of boundaries array, boundaries malloc-ed here so should be
|
36
|
+
* freed
|
37
|
+
*/
|
38
|
+
scs_int SCS(get_cone_boundaries)(const ScsCone *k, scs_int **boundaries) {
|
39
|
+
scs_int i, count = 0;
|
40
|
+
scs_int len = 1 + k->qsize + k->ssize + k->ed + k->ep + k->psize;
|
41
|
+
scs_int *b = (scs_int *)scs_calloc(len, sizeof(scs_int));
|
42
|
+
b[count] = k->f + k->l;
|
43
|
+
count += 1;
|
44
|
+
if (k->qsize > 0) {
|
45
|
+
memcpy(&b[count], k->q, k->qsize * sizeof(scs_int));
|
46
|
+
}
|
47
|
+
count += k->qsize;
|
48
|
+
for (i = 0; i < k->ssize; ++i) {
|
49
|
+
b[count + i] = get_sd_cone_size(k->s[i]);
|
50
|
+
}
|
51
|
+
count += k->ssize;
|
52
|
+
for (i = 0; i < k->ep + k->ed; ++i) {
|
53
|
+
b[count + i] = 3;
|
54
|
+
}
|
55
|
+
count += k->ep + k->ed;
|
56
|
+
for (i = 0; i < k->psize; ++i) {
|
57
|
+
b[count + i] = 3;
|
58
|
+
}
|
59
|
+
count += k->psize;
|
60
|
+
*boundaries = b;
|
61
|
+
return len;
|
62
|
+
}
|
63
|
+
|
64
|
+
static scs_int get_full_cone_dims(const ScsCone *k) {
|
65
|
+
scs_int i, c = 0;
|
66
|
+
if (k->f) {
|
67
|
+
c += k->f;
|
68
|
+
}
|
69
|
+
if (k->l) {
|
70
|
+
c += k->l;
|
71
|
+
}
|
72
|
+
if (k->qsize && k->q) {
|
73
|
+
for (i = 0; i < k->qsize; ++i) {
|
74
|
+
c += k->q[i];
|
75
|
+
}
|
76
|
+
}
|
77
|
+
if (k->ssize && k->s) {
|
78
|
+
for (i = 0; i < k->ssize; ++i) {
|
79
|
+
c += get_sd_cone_size(k->s[i]);
|
80
|
+
}
|
81
|
+
}
|
82
|
+
if (k->ed) {
|
83
|
+
c += 3 * k->ed;
|
84
|
+
}
|
85
|
+
if (k->ep) {
|
86
|
+
c += 3 * k->ep;
|
87
|
+
}
|
88
|
+
if (k->p) {
|
89
|
+
c += 3 * k->psize;
|
90
|
+
}
|
91
|
+
return c;
|
92
|
+
}
|
93
|
+
|
94
|
+
scs_int SCS(validate_cones)(const ScsData *d, const ScsCone *k) {
|
95
|
+
scs_int i;
|
96
|
+
if (get_full_cone_dims(k) != d->m) {
|
97
|
+
scs_printf("cone dimensions %li not equal to num rows in A = m = %li\n",
|
98
|
+
(long)get_full_cone_dims(k), (long)d->m);
|
99
|
+
return -1;
|
100
|
+
}
|
101
|
+
if (k->f && k->f < 0) {
|
102
|
+
scs_printf("free cone error\n");
|
103
|
+
return -1;
|
104
|
+
}
|
105
|
+
if (k->l && k->l < 0) {
|
106
|
+
scs_printf("lp cone error\n");
|
107
|
+
return -1;
|
108
|
+
}
|
109
|
+
if (k->qsize && k->q) {
|
110
|
+
if (k->qsize < 0) {
|
111
|
+
scs_printf("soc cone error\n");
|
112
|
+
return -1;
|
113
|
+
}
|
114
|
+
for (i = 0; i < k->qsize; ++i) {
|
115
|
+
if (k->q[i] < 0) {
|
116
|
+
scs_printf("soc cone error\n");
|
117
|
+
return -1;
|
118
|
+
}
|
119
|
+
}
|
120
|
+
}
|
121
|
+
if (k->ssize && k->s) {
|
122
|
+
if (k->ssize < 0) {
|
123
|
+
scs_printf("sd cone error\n");
|
124
|
+
return -1;
|
125
|
+
}
|
126
|
+
for (i = 0; i < k->ssize; ++i) {
|
127
|
+
if (k->s[i] < 0) {
|
128
|
+
scs_printf("sd cone error\n");
|
129
|
+
return -1;
|
130
|
+
}
|
131
|
+
}
|
132
|
+
}
|
133
|
+
if (k->ed && k->ed < 0) {
|
134
|
+
scs_printf("ep cone error\n");
|
135
|
+
return -1;
|
136
|
+
}
|
137
|
+
if (k->ep && k->ep < 0) {
|
138
|
+
scs_printf("ed cone error\n");
|
139
|
+
return -1;
|
140
|
+
}
|
141
|
+
if (k->psize && k->p) {
|
142
|
+
if (k->psize < 0) {
|
143
|
+
scs_printf("power cone error\n");
|
144
|
+
return -1;
|
145
|
+
}
|
146
|
+
for (i = 0; i < k->psize; ++i) {
|
147
|
+
if (k->p[i] < -1 || k->p[i] > 1) {
|
148
|
+
scs_printf("power cone error, values must be in [-1,1]\n");
|
149
|
+
return -1;
|
150
|
+
}
|
151
|
+
}
|
152
|
+
}
|
153
|
+
return 0;
|
154
|
+
}
|
155
|
+
|
156
|
+
char *SCS(get_cone_summary)(const ScsInfo *info, ScsConeWork *c) {
|
157
|
+
char *str = (char *)scs_malloc(sizeof(char) * 64);
|
158
|
+
sprintf(str, "\tCones: avg projection time: %1.2es\n",
|
159
|
+
c->total_cone_time / (info->iter + 1) / 1e3);
|
160
|
+
c->total_cone_time = 0.0;
|
161
|
+
return str;
|
162
|
+
}
|
163
|
+
|
164
|
+
void SCS(finish_cone)(ScsConeWork *c) {
|
165
|
+
#ifdef USE_LAPACK
|
166
|
+
if (c->Xs) {
|
167
|
+
scs_free(c->Xs);
|
168
|
+
}
|
169
|
+
if (c->Z) {
|
170
|
+
scs_free(c->Z);
|
171
|
+
}
|
172
|
+
if (c->e) {
|
173
|
+
scs_free(c->e);
|
174
|
+
}
|
175
|
+
if (c->work) {
|
176
|
+
scs_free(c->work);
|
177
|
+
}
|
178
|
+
if (c->iwork) {
|
179
|
+
scs_free(c->iwork);
|
180
|
+
}
|
181
|
+
#endif
|
182
|
+
if (c) {
|
183
|
+
scs_free(c);
|
184
|
+
}
|
185
|
+
}
|
186
|
+
|
187
|
+
char *SCS(get_cone_header)(const ScsCone *k) {
|
188
|
+
char *tmp = (char *)scs_malloc(sizeof(char) * 512);
|
189
|
+
scs_int i, soc_vars, soc_blks, sd_vars, sd_blks;
|
190
|
+
sprintf(tmp, "Cones:");
|
191
|
+
if (k->f) {
|
192
|
+
sprintf(tmp + strlen(tmp), "\tprimal zero / dual free vars: %li\n",
|
193
|
+
(long)k->f);
|
194
|
+
}
|
195
|
+
if (k->l) {
|
196
|
+
sprintf(tmp + strlen(tmp), "\tlinear vars: %li\n", (long)k->l);
|
197
|
+
}
|
198
|
+
soc_vars = 0;
|
199
|
+
soc_blks = 0;
|
200
|
+
if (k->qsize && k->q) {
|
201
|
+
soc_blks = k->qsize;
|
202
|
+
for (i = 0; i < k->qsize; i++) {
|
203
|
+
soc_vars += k->q[i];
|
204
|
+
}
|
205
|
+
sprintf(tmp + strlen(tmp), "\tsoc vars: %li, soc blks: %li\n",
|
206
|
+
(long)soc_vars, (long)soc_blks);
|
207
|
+
}
|
208
|
+
sd_vars = 0;
|
209
|
+
sd_blks = 0;
|
210
|
+
if (k->ssize && k->s) {
|
211
|
+
sd_blks = k->ssize;
|
212
|
+
for (i = 0; i < k->ssize; i++) {
|
213
|
+
sd_vars += get_sd_cone_size(k->s[i]);
|
214
|
+
}
|
215
|
+
sprintf(tmp + strlen(tmp), "\tsd vars: %li, sd blks: %li\n", (long)sd_vars,
|
216
|
+
(long)sd_blks);
|
217
|
+
}
|
218
|
+
if (k->ep || k->ed) {
|
219
|
+
sprintf(tmp + strlen(tmp), "\texp vars: %li, dual exp vars: %li\n",
|
220
|
+
(long)(3 * k->ep), (long)(3 * k->ed));
|
221
|
+
}
|
222
|
+
if (k->psize && k->p) {
|
223
|
+
sprintf(tmp + strlen(tmp), "\tprimal + dual power vars: %li\n",
|
224
|
+
(long)(3 * k->psize));
|
225
|
+
}
|
226
|
+
return tmp;
|
227
|
+
}
|
228
|
+
|
229
|
+
static scs_int is_simple_semi_definite_cone(scs_int *s, scs_int ssize) {
|
230
|
+
scs_int i;
|
231
|
+
for (i = 0; i < ssize; i++) {
|
232
|
+
if (s[i] > 2) {
|
233
|
+
return 0; /* false */
|
234
|
+
}
|
235
|
+
}
|
236
|
+
return 1; /* true */
|
237
|
+
}
|
238
|
+
|
239
|
+
static scs_float exp_newton_one_d(scs_float rho, scs_float y_hat,
|
240
|
+
scs_float z_hat) {
|
241
|
+
scs_float t = MAX(-z_hat, 1e-6);
|
242
|
+
scs_float f, fp;
|
243
|
+
scs_int i;
|
244
|
+
for (i = 0; i < EXP_CONE_MAX_ITERS; ++i) {
|
245
|
+
f = t * (t + z_hat) / rho / rho - y_hat / rho + log(t / rho) + 1;
|
246
|
+
fp = (2 * t + z_hat) / rho / rho + 1 / t;
|
247
|
+
|
248
|
+
t = t - f / fp;
|
249
|
+
|
250
|
+
if (t <= -z_hat) {
|
251
|
+
return 0;
|
252
|
+
} else if (t <= 0) {
|
253
|
+
return z_hat;
|
254
|
+
} else if (ABS(f) < CONE_TOL) {
|
255
|
+
break;
|
256
|
+
}
|
257
|
+
}
|
258
|
+
return t + z_hat;
|
259
|
+
}
|
260
|
+
|
261
|
+
static void exp_solve_for_x_with_rho(scs_float *v, scs_float *x,
|
262
|
+
scs_float rho) {
|
263
|
+
x[2] = exp_newton_one_d(rho, v[1], v[2]);
|
264
|
+
x[1] = (x[2] - v[2]) * x[2] / rho;
|
265
|
+
x[0] = v[0] - rho;
|
266
|
+
}
|
267
|
+
|
268
|
+
static scs_float exp_calc_grad(scs_float *v, scs_float *x, scs_float rho) {
|
269
|
+
exp_solve_for_x_with_rho(v, x, rho);
|
270
|
+
if (x[1] <= 1e-12) {
|
271
|
+
return x[0];
|
272
|
+
}
|
273
|
+
return x[0] + x[1] * log(x[1] / x[2]);
|
274
|
+
}
|
275
|
+
|
276
|
+
static void exp_get_rho_ub(scs_float *v, scs_float *x, scs_float *ub,
|
277
|
+
scs_float *lb) {
|
278
|
+
*lb = 0;
|
279
|
+
*ub = 0.125;
|
280
|
+
while (exp_calc_grad(v, x, *ub) > 0) {
|
281
|
+
*lb = *ub;
|
282
|
+
(*ub) *= 2;
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
/* project onto the exponential cone, v has dimension *exactly* 3 */
|
287
|
+
static scs_int proj_exp_cone(scs_float *v) {
|
288
|
+
scs_int i;
|
289
|
+
scs_float ub, lb, rho, g, x[3];
|
290
|
+
scs_float r = v[0], s = v[1], t = v[2];
|
291
|
+
scs_float tol = CONE_TOL; /* iter < 0 ? CONE_TOL : MAX(CONE_TOL, 1 /
|
292
|
+
POWF((iter + 1), CONE_RATE)); */
|
293
|
+
|
294
|
+
/* v in cl(Kexp) */
|
295
|
+
if ((s * exp(r / s) - t <= CONE_THRESH && s > 0) ||
|
296
|
+
(r <= 0 && s == 0 && t >= 0)) {
|
297
|
+
return 0;
|
298
|
+
}
|
299
|
+
|
300
|
+
/* -v in Kexp^* */
|
301
|
+
if ((-r < 0 && r * exp(s / r) + exp(1) * t <= CONE_THRESH) ||
|
302
|
+
(-r == 0 && -s >= 0 && -t >= 0)) {
|
303
|
+
memset(v, 0, 3 * sizeof(scs_float));
|
304
|
+
return 0;
|
305
|
+
}
|
306
|
+
|
307
|
+
/* special case with analytical solution */
|
308
|
+
if (r < 0 && s < 0) {
|
309
|
+
v[1] = 0.0;
|
310
|
+
v[2] = MAX(v[2], 0);
|
311
|
+
return 0;
|
312
|
+
}
|
313
|
+
|
314
|
+
/* iterative procedure to find projection, bisects on dual variable: */
|
315
|
+
exp_get_rho_ub(v, x, &ub, &lb); /* get starting upper and lower bounds */
|
316
|
+
for (i = 0; i < EXP_CONE_MAX_ITERS; ++i) {
|
317
|
+
rho = (ub + lb) / 2; /* halfway between upper and lower bounds */
|
318
|
+
g = exp_calc_grad(v, x, rho); /* calculates gradient wrt dual var */
|
319
|
+
if (g > 0) {
|
320
|
+
lb = rho;
|
321
|
+
} else {
|
322
|
+
ub = rho;
|
323
|
+
}
|
324
|
+
if (ub - lb < tol) {
|
325
|
+
break;
|
326
|
+
}
|
327
|
+
}
|
328
|
+
/*
|
329
|
+
#if EXTRA_VERBOSE > 0
|
330
|
+
scs_printf("exponential cone proj iters %i\n", i);
|
331
|
+
#endif
|
332
|
+
*/
|
333
|
+
v[0] = x[0];
|
334
|
+
v[1] = x[1];
|
335
|
+
v[2] = x[2];
|
336
|
+
return 0;
|
337
|
+
}
|
338
|
+
|
339
|
+
static scs_int set_up_sd_cone_work_space(ScsConeWork *c, const ScsCone *k) {
|
340
|
+
#ifdef USE_LAPACK
|
341
|
+
scs_int i;
|
342
|
+
blas_int n_max = 0;
|
343
|
+
scs_float eig_tol = 1e-8;
|
344
|
+
blas_int neg_one = -1;
|
345
|
+
blas_int m = 0;
|
346
|
+
blas_int info = 0;
|
347
|
+
scs_float wkopt = 0.0;
|
348
|
+
#if EXTRA_VERBOSE > 0
|
349
|
+
#define _STR_EXPAND(tok) #tok
|
350
|
+
#define _STR(tok) _STR_EXPAND(tok)
|
351
|
+
scs_printf("BLAS(func) = '%s'\n", _STR(BLAS(func)));
|
352
|
+
#endif
|
353
|
+
/* eigenvector decomp workspace */
|
354
|
+
for (i = 0; i < k->ssize; ++i) {
|
355
|
+
if (k->s[i] > n_max) {
|
356
|
+
n_max = (blas_int)k->s[i];
|
357
|
+
}
|
358
|
+
}
|
359
|
+
c->Xs = (scs_float *)scs_calloc(n_max * n_max, sizeof(scs_float));
|
360
|
+
c->Z = (scs_float *)scs_calloc(n_max * n_max, sizeof(scs_float));
|
361
|
+
c->e = (scs_float *)scs_calloc(n_max, sizeof(scs_float));
|
362
|
+
c->liwork = 0;
|
363
|
+
|
364
|
+
BLAS(syevr)
|
365
|
+
("Vectors", "All", "Lower", &n_max, c->Xs, &n_max, SCS_NULL, SCS_NULL,
|
366
|
+
SCS_NULL, SCS_NULL, &eig_tol, &m, c->e, c->Z, &n_max, SCS_NULL, &wkopt,
|
367
|
+
&neg_one, &(c->liwork), &neg_one, &info);
|
368
|
+
|
369
|
+
if (info != 0) {
|
370
|
+
scs_printf("FATAL: syevr failure, info = %li\n", (long)info);
|
371
|
+
return -1;
|
372
|
+
}
|
373
|
+
c->lwork = (blas_int)(wkopt + 0.01); /* 0.01 for int casting safety */
|
374
|
+
c->work = (scs_float *)scs_calloc(c->lwork, sizeof(scs_float));
|
375
|
+
c->iwork = (blas_int *)scs_calloc(c->liwork, sizeof(blas_int));
|
376
|
+
|
377
|
+
if (!c->Xs || !c->Z || !c->e || !c->work || !c->iwork) {
|
378
|
+
return -1;
|
379
|
+
}
|
380
|
+
return 0;
|
381
|
+
#else
|
382
|
+
scs_printf(
|
383
|
+
"FATAL: Cannot solve SDPs with > 2x2 matrices without linked "
|
384
|
+
"blas+lapack libraries\n");
|
385
|
+
scs_printf(
|
386
|
+
"Install blas+lapack and re-compile SCS with blas+lapack library "
|
387
|
+
"locations\n");
|
388
|
+
return -1;
|
389
|
+
#endif
|
390
|
+
}
|
391
|
+
|
392
|
+
ScsConeWork *SCS(init_cone)(const ScsCone *k) {
|
393
|
+
ScsConeWork *c = (ScsConeWork *)scs_calloc(1, sizeof(ScsConeWork));
|
394
|
+
#if EXTRA_VERBOSE > 0
|
395
|
+
scs_printf("init_cone\n");
|
396
|
+
#endif
|
397
|
+
c->total_cone_time = 0.0;
|
398
|
+
if (k->ssize && k->s) {
|
399
|
+
if (!is_simple_semi_definite_cone(k->s, k->ssize) &&
|
400
|
+
set_up_sd_cone_work_space(c, k) < 0) {
|
401
|
+
SCS(finish_cone)(c);
|
402
|
+
return SCS_NULL;
|
403
|
+
}
|
404
|
+
}
|
405
|
+
#if EXTRA_VERBOSE > 0
|
406
|
+
scs_printf("init_cone complete\n");
|
407
|
+
#ifdef MATLAB_MEX_FILE
|
408
|
+
mexEvalString("drawnow;");
|
409
|
+
#endif
|
410
|
+
#endif
|
411
|
+
return c;
|
412
|
+
}
|
413
|
+
|
414
|
+
static scs_int project_2x2_sdc(scs_float *X) {
|
415
|
+
scs_float a, b, d, l1, l2, x1, x2, rad;
|
416
|
+
scs_float sqrt2 = SQRTF(2.0);
|
417
|
+
a = X[0];
|
418
|
+
b = X[1] / sqrt2;
|
419
|
+
d = X[2];
|
420
|
+
|
421
|
+
if (ABS(b) < 1e-6) { /* diagonal matrix */
|
422
|
+
X[0] = MAX(a, 0);
|
423
|
+
X[1] = 0;
|
424
|
+
X[2] = MAX(d, 0);
|
425
|
+
return 0;
|
426
|
+
}
|
427
|
+
|
428
|
+
rad = SQRTF((a - d) * (a - d) + 4 * b * b);
|
429
|
+
/* l1 >= l2 always, since rad >= 0 */
|
430
|
+
l1 = 0.5 * (a + d + rad);
|
431
|
+
l2 = 0.5 * (a + d - rad);
|
432
|
+
|
433
|
+
#if EXTRA_VERBOSE > 0
|
434
|
+
scs_printf(
|
435
|
+
"2x2 SD: a = %4f, b = %4f, (X[1] = %4f, X[2] = %4f), d = %4f, "
|
436
|
+
"rad = %4f, l1 = %4f, l2 = %4f\n",
|
437
|
+
a, b, X[1], X[2], d, rad, l1, l2);
|
438
|
+
#endif
|
439
|
+
|
440
|
+
if (l2 >= 0) { /* both eigs positive already */
|
441
|
+
return 0;
|
442
|
+
}
|
443
|
+
if (l1 <= 0) { /* both eigs negative, set to 0 */
|
444
|
+
X[0] = 0;
|
445
|
+
X[1] = 0;
|
446
|
+
X[2] = 0;
|
447
|
+
return 0;
|
448
|
+
}
|
449
|
+
|
450
|
+
/* l1 pos, l2 neg */
|
451
|
+
x1 = 1 / SQRTF(1 + (l1 - a) * (l1 - a) / b / b);
|
452
|
+
x2 = x1 * (l1 - a) / b;
|
453
|
+
|
454
|
+
X[0] = l1 * x1 * x1;
|
455
|
+
X[1] = (l1 * x1 * x2) * sqrt2;
|
456
|
+
X[2] = l1 * x2 * x2;
|
457
|
+
return 0;
|
458
|
+
}
|
459
|
+
|
460
|
+
/* size of X is get_sd_cone_size(n) */
|
461
|
+
static scs_int proj_semi_definite_cone(scs_float *X, const scs_int n,
|
462
|
+
ScsConeWork *c) {
|
463
|
+
/* project onto the positive semi-definite cone */
|
464
|
+
#ifdef USE_LAPACK
|
465
|
+
scs_int i;
|
466
|
+
blas_int one = 1;
|
467
|
+
blas_int m = 0;
|
468
|
+
blas_int nb = (blas_int)n;
|
469
|
+
blas_int nb_plus_one = (blas_int)(n + 1);
|
470
|
+
blas_int cone_sz = (blas_int)(get_sd_cone_size(n));
|
471
|
+
|
472
|
+
scs_float sqrt2 = SQRTF(2.0);
|
473
|
+
scs_float sqrt2Inv = 1.0 / sqrt2;
|
474
|
+
scs_float *Xs = c->Xs;
|
475
|
+
scs_float *Z = c->Z;
|
476
|
+
scs_float *e = c->e;
|
477
|
+
scs_float *work = c->work;
|
478
|
+
blas_int *iwork = c->iwork;
|
479
|
+
blas_int lwork = c->lwork;
|
480
|
+
blas_int liwork = c->liwork;
|
481
|
+
|
482
|
+
scs_float eig_tol = CONE_TOL; /* iter < 0 ? CONE_TOL : MAX(CONE_TOL, 1 /
|
483
|
+
POWF(iter + 1, CONE_RATE)); */
|
484
|
+
scs_float zero = 0.0;
|
485
|
+
blas_int info = 0;
|
486
|
+
scs_float vupper = 0.0;
|
487
|
+
#endif
|
488
|
+
if (n == 0) {
|
489
|
+
return 0;
|
490
|
+
}
|
491
|
+
if (n == 1) {
|
492
|
+
if (X[0] < 0.0) {
|
493
|
+
X[0] = 0.0;
|
494
|
+
}
|
495
|
+
return 0;
|
496
|
+
}
|
497
|
+
if (n == 2) {
|
498
|
+
return project_2x2_sdc(X);
|
499
|
+
}
|
500
|
+
#ifdef USE_LAPACK
|
501
|
+
|
502
|
+
memset(Xs, 0, n * n * sizeof(scs_float));
|
503
|
+
/* expand lower triangular matrix to full matrix */
|
504
|
+
for (i = 0; i < n; ++i) {
|
505
|
+
memcpy(&(Xs[i * (n + 1)]), &(X[i * n - ((i - 1) * i) / 2]),
|
506
|
+
(n - i) * sizeof(scs_float));
|
507
|
+
}
|
508
|
+
/*
|
509
|
+
rescale so projection works, and matrix norm preserved
|
510
|
+
see http://www.seas.ucla.edu/~vandenbe/publications/mlbook.pdf pg 3
|
511
|
+
*/
|
512
|
+
/* scale diags by sqrt(2) */
|
513
|
+
BLAS(scal)(&nb, &sqrt2, Xs, &nb_plus_one); /* not n_squared */
|
514
|
+
|
515
|
+
/* max-eig upper bounded by frobenius norm */
|
516
|
+
vupper = 1.1 * sqrt2 *
|
517
|
+
BLAS(nrm2)(&cone_sz, X,
|
518
|
+
&one); /* mult by factor to make sure is upper bound */
|
519
|
+
vupper = MAX(vupper, 0.01);
|
520
|
+
#if EXTRA_VERBOSE > 0
|
521
|
+
SCS(print_array)(Xs, n * n, "Xs");
|
522
|
+
SCS(print_array)(X, get_sd_cone_size(n), "X");
|
523
|
+
#endif
|
524
|
+
/* Solve eigenproblem, reuse workspaces */
|
525
|
+
BLAS(syevr)
|
526
|
+
("Vectors", "VInterval", "Lower", &nb, Xs, &nb, &zero, &vupper, SCS_NULL,
|
527
|
+
SCS_NULL, &eig_tol, &m, e, Z, &nb, SCS_NULL, work, &lwork, iwork, &liwork,
|
528
|
+
&info);
|
529
|
+
#if EXTRA_VERBOSE > 0
|
530
|
+
if (info != 0) {
|
531
|
+
scs_printf("WARN: LAPACK syevr error, info = %i\n", info);
|
532
|
+
}
|
533
|
+
scs_printf("syevr input parameter dump:\n");
|
534
|
+
scs_printf("nb = %li\n", (long)nb);
|
535
|
+
scs_printf("lwork = %li\n", (long)lwork);
|
536
|
+
scs_printf("liwork = %li\n", (long)liwork);
|
537
|
+
scs_printf("vupper = %f\n", vupper);
|
538
|
+
scs_printf("eig_tol = %e\n", eig_tol);
|
539
|
+
SCS(print_array)(e, m, "e");
|
540
|
+
SCS(print_array)(Z, m * n, "Z");
|
541
|
+
#endif
|
542
|
+
if (info < 0) {
|
543
|
+
return -1;
|
544
|
+
}
|
545
|
+
|
546
|
+
memset(Xs, 0, n * n * sizeof(scs_float));
|
547
|
+
for (i = 0; i < m; ++i) {
|
548
|
+
scs_float a = e[i];
|
549
|
+
BLAS(syr)("Lower", &nb, &a, &(Z[i * n]), &one, Xs, &nb);
|
550
|
+
}
|
551
|
+
/* scale diags by 1/sqrt(2) */
|
552
|
+
BLAS(scal)(&nb, &sqrt2Inv, Xs, &nb_plus_one); /* not n_squared */
|
553
|
+
/* extract just lower triangular matrix */
|
554
|
+
for (i = 0; i < n; ++i) {
|
555
|
+
memcpy(&(X[i * n - ((i - 1) * i) / 2]), &(Xs[i * (n + 1)]),
|
556
|
+
(n - i) * sizeof(scs_float));
|
557
|
+
}
|
558
|
+
|
559
|
+
#if EXTRA_VERBOSE > 0
|
560
|
+
SCS(print_array)(Xs, n * n, "Xs");
|
561
|
+
SCS(print_array)(X, get_sd_cone_size(n), "X");
|
562
|
+
#endif
|
563
|
+
|
564
|
+
#else
|
565
|
+
scs_printf(
|
566
|
+
"FAILURE: solving SDP with > 2x2 matrices, but no blas/lapack "
|
567
|
+
"libraries were linked!\n");
|
568
|
+
scs_printf("SCS will return nonsense!\n");
|
569
|
+
SCS(scale_array)(X, NAN, n);
|
570
|
+
return -1;
|
571
|
+
#endif
|
572
|
+
return 0;
|
573
|
+
}
|
574
|
+
|
575
|
+
static scs_float pow_calc_x(scs_float r, scs_float xh, scs_float rh,
|
576
|
+
scs_float a) {
|
577
|
+
scs_float x = 0.5 * (xh + SQRTF(xh * xh + 4 * a * (rh - r) * r));
|
578
|
+
return MAX(x, 1e-12);
|
579
|
+
}
|
580
|
+
|
581
|
+
static scs_float pow_calcdxdr(scs_float x, scs_float xh, scs_float rh,
|
582
|
+
scs_float r, scs_float a) {
|
583
|
+
return a * (rh - 2 * r) / (2 * x - xh);
|
584
|
+
}
|
585
|
+
|
586
|
+
static scs_float pow_calc_f(scs_float x, scs_float y, scs_float r,
|
587
|
+
scs_float a) {
|
588
|
+
return POWF(x, a) * POWF(y, (1 - a)) - r;
|
589
|
+
}
|
590
|
+
|
591
|
+
static scs_float pow_calc_fp(scs_float x, scs_float y, scs_float dxdr,
|
592
|
+
scs_float dydr, scs_float a) {
|
593
|
+
return POWF(x, a) * POWF(y, (1 - a)) * (a * dxdr / x + (1 - a) * dydr / y) -
|
594
|
+
1;
|
595
|
+
}
|
596
|
+
|
597
|
+
static void proj_power_cone(scs_float *v, scs_float a) {
|
598
|
+
scs_float xh = v[0], yh = v[1], rh = ABS(v[2]);
|
599
|
+
scs_float x = 0.0, y = 0.0, r;
|
600
|
+
scs_int i;
|
601
|
+
/* v in K_a */
|
602
|
+
if (xh >= 0 && yh >= 0 &&
|
603
|
+
CONE_THRESH + POWF(xh, a) * POWF(yh, (1 - a)) >= rh) {
|
604
|
+
return;
|
605
|
+
}
|
606
|
+
|
607
|
+
/* -v in K_a^* */
|
608
|
+
if (xh <= 0 && yh <= 0 &&
|
609
|
+
CONE_THRESH + POWF(-xh, a) * POWF(-yh, 1 - a) >=
|
610
|
+
rh * POWF(a, a) * POWF(1 - a, 1 - a)) {
|
611
|
+
v[0] = v[1] = v[2] = 0;
|
612
|
+
return;
|
613
|
+
}
|
614
|
+
|
615
|
+
r = rh / 2;
|
616
|
+
for (i = 0; i < POW_CONE_MAX_ITERS; ++i) {
|
617
|
+
scs_float f, fp, dxdr, dydr;
|
618
|
+
x = pow_calc_x(r, xh, rh, a);
|
619
|
+
y = pow_calc_x(r, yh, rh, 1 - a);
|
620
|
+
|
621
|
+
f = pow_calc_f(x, y, r, a);
|
622
|
+
if (ABS(f) < CONE_TOL) {
|
623
|
+
break;
|
624
|
+
}
|
625
|
+
|
626
|
+
dxdr = pow_calcdxdr(x, xh, rh, r, a);
|
627
|
+
dydr = pow_calcdxdr(y, yh, rh, r, (1 - a));
|
628
|
+
fp = pow_calc_fp(x, y, dxdr, dydr, a);
|
629
|
+
|
630
|
+
r = MAX(r - f / fp, 0);
|
631
|
+
r = MIN(r, rh);
|
632
|
+
}
|
633
|
+
v[0] = x;
|
634
|
+
v[1] = y;
|
635
|
+
v[2] = (v[2] < 0) ? -(r) : (r);
|
636
|
+
}
|
637
|
+
|
638
|
+
/* outward facing cone projection routine, iter is outer algorithm iteration, if
|
639
|
+
iter < 0 then iter is ignored
|
640
|
+
warm_start contains guess of projection (can be set to SCS_NULL) */
|
641
|
+
scs_int SCS(proj_dual_cone)(scs_float *x, const ScsCone *k, ScsConeWork *c,
|
642
|
+
const scs_float *warm_start, scs_int iter) {
|
643
|
+
scs_int i;
|
644
|
+
scs_int count = (k->f ? k->f : 0);
|
645
|
+
SCS(timer) cone_timer;
|
646
|
+
#if EXTRA_VERBOSE > 0
|
647
|
+
SCS(timer) proj_timer;
|
648
|
+
SCS(tic)(&proj_timer);
|
649
|
+
#endif
|
650
|
+
SCS(tic)(&cone_timer);
|
651
|
+
|
652
|
+
if (k->l) {
|
653
|
+
/* project onto positive orthant */
|
654
|
+
for (i = count; i < count + k->l; ++i) {
|
655
|
+
if (x[i] < 0.0) {
|
656
|
+
x[i] = 0.0;
|
657
|
+
}
|
658
|
+
/* x[i] = (x[i] < 0.0) ? 0.0 : x[i]; */
|
659
|
+
}
|
660
|
+
count += k->l;
|
661
|
+
#if EXTRA_VERBOSE > 0
|
662
|
+
scs_printf("pos orthant proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
|
663
|
+
SCS(tic)(&proj_timer);
|
664
|
+
#endif
|
665
|
+
}
|
666
|
+
|
667
|
+
if (k->qsize && k->q) {
|
668
|
+
/* project onto SOC */
|
669
|
+
for (i = 0; i < k->qsize; ++i) {
|
670
|
+
if (k->q[i] == 0) {
|
671
|
+
continue;
|
672
|
+
}
|
673
|
+
if (k->q[i] == 1) {
|
674
|
+
if (x[count] < 0.0) {
|
675
|
+
x[count] = 0.0;
|
676
|
+
}
|
677
|
+
} else {
|
678
|
+
scs_float v1 = x[count];
|
679
|
+
scs_float s = SCS(norm)(&(x[count + 1]), k->q[i] - 1);
|
680
|
+
scs_float alpha = (s + v1) / 2.0;
|
681
|
+
|
682
|
+
if (s <= v1) { /* do nothing */
|
683
|
+
} else if (s <= -v1) {
|
684
|
+
memset(&(x[count]), 0, k->q[i] * sizeof(scs_float));
|
685
|
+
} else {
|
686
|
+
x[count] = alpha;
|
687
|
+
SCS(scale_array)(&(x[count + 1]), alpha / s, k->q[i] - 1);
|
688
|
+
}
|
689
|
+
}
|
690
|
+
count += k->q[i];
|
691
|
+
}
|
692
|
+
#if EXTRA_VERBOSE > 0
|
693
|
+
scs_printf("SOC proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
|
694
|
+
SCS(tic)(&proj_timer);
|
695
|
+
#endif
|
696
|
+
}
|
697
|
+
|
698
|
+
if (k->ssize && k->s) {
|
699
|
+
/* project onto PSD cone */
|
700
|
+
for (i = 0; i < k->ssize; ++i) {
|
701
|
+
#if EXTRA_VERBOSE > 0
|
702
|
+
scs_printf("SD proj size %li\n", (long)k->s[i]);
|
703
|
+
#endif
|
704
|
+
if (k->s[i] == 0) {
|
705
|
+
continue;
|
706
|
+
}
|
707
|
+
if (proj_semi_definite_cone(&(x[count]), k->s[i], c) < 0) {
|
708
|
+
return -1;
|
709
|
+
}
|
710
|
+
count += get_sd_cone_size(k->s[i]);
|
711
|
+
}
|
712
|
+
#if EXTRA_VERBOSE > 0
|
713
|
+
scs_printf("SD proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
|
714
|
+
SCS(tic)(&proj_timer);
|
715
|
+
#endif
|
716
|
+
}
|
717
|
+
|
718
|
+
if (k->ep) {
|
719
|
+
scs_float r, s, t;
|
720
|
+
scs_int idx;
|
721
|
+
/*
|
722
|
+
* exponential cone is not self dual, if s \in K
|
723
|
+
* then y \in K^* and so if K is the primal cone
|
724
|
+
* here we project onto K^*, via Moreau
|
725
|
+
* \Pi_C^*(y) = y + \Pi_C(-y)
|
726
|
+
*/
|
727
|
+
SCS(scale_array)(&(x[count]), -1, 3 * k->ep); /* x = -x; */
|
728
|
+
#ifdef _OPENMP
|
729
|
+
#pragma omp parallel for private(r, s, t, idx)
|
730
|
+
#endif
|
731
|
+
for (i = 0; i < k->ep; ++i) {
|
732
|
+
idx = count + 3 * i;
|
733
|
+
r = x[idx];
|
734
|
+
s = x[idx + 1];
|
735
|
+
t = x[idx + 2];
|
736
|
+
|
737
|
+
proj_exp_cone(&(x[idx]));
|
738
|
+
|
739
|
+
x[idx] -= r;
|
740
|
+
x[idx + 1] -= s;
|
741
|
+
x[idx + 2] -= t;
|
742
|
+
}
|
743
|
+
count += 3 * k->ep;
|
744
|
+
#if EXTRA_VERBOSE > 0
|
745
|
+
scs_printf("EP proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
|
746
|
+
SCS(tic)(&proj_timer);
|
747
|
+
#endif
|
748
|
+
}
|
749
|
+
|
750
|
+
if (k->ed) {
|
751
|
+
/* exponential cone: */
|
752
|
+
#ifdef _OPENMP
|
753
|
+
#pragma omp parallel for
|
754
|
+
#endif
|
755
|
+
for (i = 0; i < k->ed; ++i) {
|
756
|
+
proj_exp_cone(&(x[count + 3 * i]));
|
757
|
+
}
|
758
|
+
count += 3 * k->ed;
|
759
|
+
#if EXTRA_VERBOSE > 0
|
760
|
+
scs_printf("ED proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
|
761
|
+
SCS(tic)(&proj_timer);
|
762
|
+
#endif
|
763
|
+
}
|
764
|
+
|
765
|
+
if (k->psize && k->p) {
|
766
|
+
scs_float v[3];
|
767
|
+
scs_int idx;
|
768
|
+
/* don't use openmp for power cone
|
769
|
+
ifdef _OPENMP
|
770
|
+
pragma omp parallel for private(v, idx)
|
771
|
+
endif
|
772
|
+
*/
|
773
|
+
for (i = 0; i < k->psize; ++i) {
|
774
|
+
idx = count + 3 * i;
|
775
|
+
if (k->p[i] <= 0) {
|
776
|
+
/* dual power cone */
|
777
|
+
proj_power_cone(&(x[idx]), -k->p[i]);
|
778
|
+
} else {
|
779
|
+
/* primal power cone, using Moreau */
|
780
|
+
v[0] = -x[idx];
|
781
|
+
v[1] = -x[idx + 1];
|
782
|
+
v[2] = -x[idx + 2];
|
783
|
+
|
784
|
+
proj_power_cone(v, k->p[i]);
|
785
|
+
|
786
|
+
x[idx] += v[0];
|
787
|
+
x[idx + 1] += v[1];
|
788
|
+
x[idx + 2] += v[2];
|
789
|
+
}
|
790
|
+
}
|
791
|
+
count += 3 * k->psize;
|
792
|
+
#if EXTRA_VERBOSE > 0
|
793
|
+
scs_printf("Power cone proj time: %1.2es\n", SCS(tocq)(&proj_timer) / 1e3);
|
794
|
+
SCS(tic)(&proj_timer);
|
795
|
+
#endif
|
796
|
+
}
|
797
|
+
/* project onto OTHER cones */
|
798
|
+
if (c) {
|
799
|
+
c->total_cone_time += SCS(tocq)(&cone_timer);
|
800
|
+
}
|
801
|
+
return 0;
|
802
|
+
}
|