RubyGems - lbfgsb - Versions diffs - 0.1.0 - Mend

lbfgsb 0.1.0

Files changed (23) hide show

@@ -0,0 +1,287 @@
+/**
+ * L-BFGS-B is released under the “New BSD License” (aka “Modified BSD License”
+ * or “3-clause license”)
+ * Please read attached file License.txt
+ */
+#include "blas.h"
+double dnrm2_(long *n, double *x, long *incx)
+{
+  long i__1, i__2;
+  double ret_val, d__1, d__2, d__3;
+  static long i__;
+  static double scale;
+  --x;
+  ret_val = 0.;
+  scale = 0.;
+  i__1 = *n;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    d__2 = scale, d__3 = (d__1 = x[i__], fabs(d__1));
+    scale = d__2 >= d__3 ? d__2 : d__3;
+  }
+  if (scale == 0.) {
+    return ret_val;
+  }
+  i__2 = *n;
+  i__1 = *incx;
+  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+    d__1 = x[i__] / scale;
+    ret_val += d__1 * d__1;
+  }
+  ret_val = scale * sqrt(ret_val);
+  return ret_val;
+}
+int daxpy_(long *n, double *da, double *dx, long *incx, double *dy, long *incy)
+{
+  long i__1;
+  static long i__, m, ix, iy, mp1;
+  --dy;
+  --dx;
+  /* constant times a vector plus a vector. */
+  /* uses unrolled loops for increments equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  if (*n <= 0) {
+    return 0;
+  }
+  if (*da == 0.) {
+    return 0;
+  }
+  if (*incx == 1 && *incy == 1) {
+    goto L20;
+  }
+  /* code for unequal increments or equal increments */
+  /*   not equal to 1 */
+  ix = 1;
+  iy = 1;
+  if (*incx < 0) {
+    ix = (-(*n) + 1) * *incx + 1;
+  }
+  if (*incy < 0) {
+    iy = (-(*n) + 1) * *incy + 1;
+  }
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[iy] += *da * dx[ix];
+    ix += *incx;
+    iy += *incy;
+  }
+  return 0;
+  /* code for both increments equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 4;
+  if (m == 0) {
+    goto L40;
+  }
+  i__1 = m;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[i__] += *da * dx[i__];
+  }
+  if (*n < 4) {
+    return 0;
+  }
+L40:
+  mp1 = m + 1;
+  i__1 = *n;
+  for (i__ = mp1; i__ <= i__1; i__ += 4) {
+    dy[i__] += *da * dx[i__];
+    dy[i__ + 1] += *da * dx[i__ + 1];
+    dy[i__ + 2] += *da * dx[i__ + 2];
+    dy[i__ + 3] += *da * dx[i__ + 3];
+  }
+  return 0;
+}
+int dcopy_(long *n, double *dx, long *incx, double *dy, long *incy)
+{
+  long i__1;
+  static long i__, m, ix, iy, mp1;
+  --dy;
+  --dx;
+  /* copies a vector, x, to a vector, y. */
+  /* uses unrolled loops for increments equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  if (*n <= 0) {
+    return 0;
+  }
+  if (*incx == 1 && *incy == 1) {
+    goto L20;
+  }
+  /* code for unequal increments or equal increments */
+  /*   not equal to 1 */
+  ix = 1;
+  iy = 1;
+  if (*incx < 0) {
+    ix = (-(*n) + 1) * *incx + 1;
+  }
+  if (*incy < 0) {
+    iy = (-(*n) + 1) * *incy + 1;
+  }
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[iy] = dx[ix];
+    ix += *incx;
+    iy += *incy;
+  }
+  return 0;
+  /* code for both increments equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 7;
+  if (m == 0) {
+    goto L40;
+  }
+  i__1 = m;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[i__] = dx[i__];
+  }
+  if (*n < 7) {
+    return 0;
+  }
+L40:
+  mp1 = m + 1;
+  i__1 = *n;
+  for (i__ = mp1; i__ <= i__1; i__ += 7) {
+    dy[i__] = dx[i__];
+    dy[i__ + 1] = dx[i__ + 1];
+    dy[i__ + 2] = dx[i__ + 2];
+    dy[i__ + 3] = dx[i__ + 3];
+    dy[i__ + 4] = dx[i__ + 4];
+    dy[i__ + 5] = dx[i__ + 5];
+    dy[i__ + 6] = dx[i__ + 6];
+  }
+  return 0;
+}
+double ddot_(long *n, double *dx, long *incx, double *dy, long *incy)
+{
+  long i__1;
+  double ret_val;
+  static long i__, m, ix, iy, mp1;
+  static double dtemp;
+  --dy;
+  --dx;
+  /* forms the dot product of two vectors. */
+  /* uses unrolled loops for increments equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  ret_val = 0.;
+  dtemp = 0.;
+  if (*n <= 0) {
+    return ret_val;
+  }
+  if (*incx == 1 && *incy == 1) {
+    goto L20;
+  }
+  /* code for unequal increments or equal increments */
+  /*   not equal to 1 */
+  ix = 1;
+  iy = 1;
+  if (*incx < 0) {
+    ix = (-(*n) + 1) * *incx + 1;
+  }
+  if (*incy < 0) {
+    iy = (-(*n) + 1) * *incy + 1;
+  }
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dtemp += dx[ix] * dy[iy];
+    ix += *incx;
+    iy += *incy;
+  }
+  ret_val = dtemp;
+  return ret_val;
+  /* code for both increments equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 5;
+  if (m == 0) {
+    goto L40;
+  }
+  i__1 = m;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dtemp += dx[i__] * dy[i__];
+  }
+  if (*n < 5) {
+    goto L60;
+  }
+L40:
+  mp1 = m + 1;
+  i__1 = *n;
+  for (i__ = mp1; i__ <= i__1; i__ += 5) {
+    dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1]
+      + dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 4] * dy[i__ + 4];
+  }
+L60:
+  ret_val = dtemp;
+  return ret_val;
+}
+int dscal_(long *n, double *da, double *dx, long *incx)
+{
+  long i__1, i__2;
+  static long i__, m, mp1, nincx;
+  --dx;
+  /* scales a vector by a constant. */
+  /* uses unrolled loops for increment equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  /* modified 3/93 to return if incx .le. 0. */
+  if (*n <= 0 || *incx <= 0) {
+    return 0;
+  }
+  if (*incx == 1) {
+    goto L20;
+  }
+  /* code for increment not equal to 1 */
+  nincx = *n * *incx;
+  i__1 = nincx;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    dx[i__] = *da * dx[i__];
+  }
+  return 0;
+  /* code for increment equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 5;
+  if (m == 0) {
+    goto L40;
+  }
+  i__2 = m;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    dx[i__] = *da * dx[i__];
+  }
+  if (*n < 5) {
+    return 0;
+  }
+L40:
+  mp1 = m + 1;
+  i__2 = *n;
+  for (i__ = mp1; i__ <= i__2; i__ += 5) {
+    dx[i__] = *da * dx[i__];
+    dx[i__ + 1] = *da * dx[i__ + 1];
+    dx[i__ + 2] = *da * dx[i__ + 2];
+    dx[i__ + 3] = *da * dx[i__ + 3];
+    dx[i__ + 4] = *da * dx[i__ + 4];
+  }
+  return 0;
+}

data/ext/lbfgsb/src/blas.h ADDED

@@ -0,0 +1,12 @@
+#ifndef LBFGSB_RB_BLAS_H_
+#define LBFGSB_RB_BLAS_H_
+#include <math.h>
+extern double dnrm2_(long *n, double *x, long *incx);
+extern int daxpy_(long *n, double *da, double *dx, long *incx, double *dy, long *incy);
+extern int dcopy_(long *n, double *dx, long *incx, double *dy, long *incy);
+extern double ddot_(long *n, double *dx, long *incx, double *dy, long *incy);
+extern int dscal_(long *n, double *da, double *dx, long *incx);
+#endif /* LBFGSB_RB_BLAS_H_ */

data/ext/lbfgsb/src/lbfgsb.c ADDED

@@ -0,0 +1,4096 @@
+/**
+ * L-BFGS-B is released under the “New BSD License” (aka “Modified BSD License”
+ * or “3-clause license”)
+ * Please read attached file License.txt
+ *
+ * ===========   L-BFGS-B (version 3.0.  April 25, 2011  ===================
+ *
+ *     This is a modified version of L-BFGS-B. Minor changes in the updated
+ *     code appear preceded by a line comment as follows
+ *
+ *     jlm-jn
+ *
+ *     Major changes are described in the accompanying paper:
+ *
+ *         Jorge Nocedal and Jose Luis Morales, Remark on "Algorithm 778:
+ *         L-BFGS-B: Fortran Subroutines for Large-Scale Bound Constrained
+ *         Optimization"  (2011). To appear in  ACM Transactions on
+ *         Mathematical Software,
+ *
+ *     The paper describes an improvement and a correction to Algorithm 778.
+ *     It is shown that the performance of the algorithm can be improved
+ *     significantly by making a relatively simple modication to the subspace
+ *     minimization phase. The correction concerns an error caused by the use
+ *     of routine dpmeps to estimate machine precision.
+ *
+ *     The total work space **wa** required by the new version is
+ *
+ *                  2*m*n + 11m*m + 5*n + 8*m
+ *
+ *     the old version required
+ *
+ *                  2*m*n + 12m*m + 4*n + 12*m
+ *
+ *
+ *            J. Nocedal  Department of Electrical Engineering and
+ *                        Computer Science.
+ *                        Northwestern University. Evanston, IL. USA
+ *
+ *
+ *           J.L Morales  Departamento de Matematicas,
+ *                        Instituto Tecnologico Autonomo de Mexico
+ *                        Mexico D.F. Mexico.
+ *
+ *                        March  2011
+ */
+#include "blas.h"
+#include "linpack.h"
+#include "lbfgsb.h"
+static double c_b9 = 0.;
+static long c__1 = 1;
+static long c__11 = 11;
+static double c_b280 = .001;
+static double c_b281 = .9;
+static double c_b282 = .1;
+/**
+ * Subroutine setulb
+ *
+ *     This subroutine partitions the working arrays wa and iwa, and
+ *       then uses the limited memory BFGS method to solve the bound
+ *       constrained optimization problem by calling mainlb.
+ *       (The direct method will be used in the subspace minimization.)
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x is an approximation to the solution.
+ *       On exit x is the current approximation.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound on x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound on x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is an long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds, and
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     f is a double precision variable.
+ *       On first entry f is unspecified.
+ *       On final exit f is the value of the function at x.
+ *
+ *     g is a double precision array of dimension n.
+ *       On first entry g is unspecified.
+ *       On final exit g is the value of the gradient at x.
+ *
+ *     factr is a double precision variable.
+ *       On entry factr >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *         (f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr*epsmch
+ *
+ *         where epsmch is the machine precision, which is automatically
+ *         generated by the code. Typical values for factr: 1.d+12 for
+ *         low accuracy; 1.d+7 for moderate accuracy; 1.d+1 for extremely
+ *         high accuracy.
+ *
+ *       On exit factr is unchanged.
+ *
+ *     pgtol is a double precision variable.
+ *       On entry pgtol >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *                 max{|proj g_i | i = 1, ..., n} <= pgtol
+ *
+ *         where pg_i is the ith component of the projected gradient.
+ *       On exit pgtol is unchanged.
+ *
+ *     wa is a double precision working array of length
+ *       (2mmax + 5)nmax + 12mmax^2 + 12mmax.
+ *
+ *     iwa is an long working array of length 3nmax.
+ *
+ *     task is a working string of characters of length 60 indicating
+ *       the current job when entering and quitting this subroutine.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     csave is a working string of characters of length 60.
+ *
+ *     lsave is a logical working array of dimension 4.
+ *       On exit with 'task' = NEW_X, the following information is
+ *                                                             available:
+ *         If lsave(1) = .true.  then  the initial X has been replaced by
+ *                                     its projection in the feasible set;
+ *         If lsave(2) = .true.  then  the problem is constrained;
+ *         If lsave(3) = .true.  then  each variable has upper and lower
+ *                                     bounds;
+ *
+ *     isave is an long working array of dimension 44.
+ *       On exit with 'task' = NEW_X, the following information is
+ *                                                             available:
+ *         isave(22) = the total number of intervals explored in the
+ *                         search of Cauchy points;
+ *         isave(26) = the total number of skipped BFGS updates before
+ *                         the current iteration;
+ *         isave(30) = the number of current iteration;
+ *         isave(31) = the total number of BFGS updates prior the current
+ *                         iteration;
+ *         isave(33) = the number of intervals explored in the search of
+ *                         Cauchy point in the current iteration;
+ *         isave(34) = the total number of function and gradient
+ *                         evaluations;
+ *         isave(36) = the number of function value or gradient
+ *                                  evaluations in the current iteration;
+ *         if isave(37) = 0  then the subspace argmin is within the box;
+ *         if isave(37) = 1  then the subspace argmin is beyond the box;
+ *         isave(38) = the number of free variables in the current
+ *                         iteration;
+ *         isave(39) = the number of active constraints in the current
+ *                         iteration;
+ *         n + 1 - isave(40) = the number of variables leaving the set of
+ *                           active constraints in the current iteration;
+ *         isave(41) = the number of variables entering the set of active
+ *                         constraints in the current iteration.
+ *
+ *     dsave is a double precision working array of dimension 29.
+ *       On exit with 'task' = NEW_X, the following information is
+ *                                                             available:
+ *         dsave(1) = current 'theta' in the BFGS matrix;
+ *         dsave(2) = f(x) in the previous iteration;
+ *         dsave(3) = factr*epsmch;
+ *         dsave(4) = 2-norm of the line search direction vector;
+ *         dsave(5) = the machine precision epsmch generated by the code;
+ *         dsave(7) = the accumulated time spent on searching for
+ *                                                         Cauchy points;
+ *         dsave(8) = the accumulated time spent on
+ *                                                 subspace minimization;
+ *         dsave(9) = the accumulated time spent on line search;
+ *         dsave(11) = the slope of the line search function at
+ *                                  the current point of line search;
+ *         dsave(12) = the maximum relative step length imposed in
+ *                                                           line search;
+ *         dsave(13) = the infinity norm of the projected gradient;
+ *         dsave(14) = the relative step length in the line search;
+ *         dsave(15) = the slope of the line search function at
+ *                                 the starting point of the line search;
+ *         dsave(16) = the square of the 2-norm of the line search
+ *                                                      direction vector.
+ *
+ *     Subprograms called:
+ *
+ *       L-BFGS-B Library ... mainlb.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: a
+ *       limited memory FORTRAN code for solving bound constrained
+ *       optimization problems'', Tech. Report, NAM-11, EECS Department,
+ *       Northwestern University, 1994.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                       *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int setulb_(long *n, long *m, double *x,
+  double *l, double *u, long *nbd, double *f, double *g,
+  double *factr, double *pgtol, double *wa, long *iwa,
+  char *task, long *iprint, char *csave, long *lsave,
+  long *isave, double *dsave)
+{
+  long i__1;
+  static long ld, lr, lt, lz, lwa, lwn, lss, lxp, lws, lwt, lsy, lwy, lsnd;
+  /* jlm-jn */
+  --iwa;
+  --g;
+  --nbd;
+  --u;
+  --l;
+  --x;
+  --wa;
+  --lsave;
+  --isave;
+  --dsave;
+  if (strncmp(task, "START", 5) == 0) {
+    isave[1] = *m * *n;
+    i__1 = *m;
+    isave[2] = i__1 * i__1;
+    i__1 = *m;
+    isave[3] = i__1 * i__1 << 2;
+    isave[4] = 1;                     /* ws      m*n    */
+    isave[5] = isave[4] + isave[1];   /* wy      m*n    */
+    isave[6] = isave[5] + isave[1];   /* wsy     m**2   */
+    isave[7] = isave[6] + isave[2];   /* wss     m**2   */
+    isave[8] = isave[7] + isave[2];   /* wt      m**2   */
+    isave[9] = isave[8] + isave[2];   /* wn      4*m**2 */
+    isave[10] = isave[9] + isave[3];  /* wsnd    4*m**2 */
+    isave[11] = isave[10] + isave[3]; /* wz      n      */
+    isave[12] = isave[11] + *n;       /* wr      n      */
+    isave[13] = isave[12] + *n;       /* wd      n      */
+    isave[14] = isave[13] + *n;       /* wt      n      */
+    isave[15] = isave[14] + *n;       /* wxp     n      */
+    isave[16] = isave[15] + *n;       /* wa      8*m    */
+  }
+  lws = isave[4];
+  lwy = isave[5];
+  lsy = isave[6];
+  lss = isave[7];
+  lwt = isave[8];
+  lwn = isave[9];
+  lsnd = isave[10];
+  lz = isave[11];
+  lr = isave[12];
+  ld = isave[13];
+  lt = isave[14];
+  lxp = isave[15];
+  lwa = isave[16];
+  mainlb_(n, m, &x[1], &l[1], &u[1], &nbd[1], f, &g[1], factr, pgtol, &wa[lws],
+          &wa[lwy], &wa[lsy], &wa[lss], &wa[lwt], &wa[lwn], &wa[lsnd],
+          &wa[lz], &wa[lr], &wa[ld], &wa[lt], &wa[lxp], &wa[lwa], &iwa[1],
+          &iwa[*n + 1], &iwa[(*n << 1) + 1], task, iprint, csave, &lsave[1],
+          &isave[22], &dsave[1]);
+  return 0;
+}
+/**
+ * Subroutine mainlb
+ *
+ *     This subroutine solves bound constrained optimization problems by
+ *       using the compact formula of the limited memory BFGS updates.
+ *
+ *     n is an long variable.
+ *       On entry n is the number of variables.
+ *       On exit n is unchanged.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric
+ *          corrections allowed in the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x is an approximation to the solution.
+ *       On exit x is the current approximation.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound of x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound of x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is an long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds,
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     f is a double precision variable.
+ *       On first entry f is unspecified.
+ *       On final exit f is the value of the function at x.
+ *
+ *     g is a double precision array of dimension n.
+ *       On first entry g is unspecified.
+ *       On final exit g is the value of the gradient at x.
+ *
+ *     factr is a double precision variable.
+ *       On entry factr >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *         (f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr*epsmch
+ *
+ *         where epsmch is the machine precision, which is automatically
+ *         generated by the code.
+ *       On exit factr is unchanged.
+ *
+ *     pgtol is a double precision variable.
+ *       On entry pgtol >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *                 max{|proj g_i | i = 1, ..., n} <= pgtol
+ *
+ *         where pg_i is the ith component of the projected gradient.
+ *       On exit pgtol is unchanged.
+ *
+ *     ws, wy, sy, and wt are double precision working arrays used to
+ *       store the following information defining the limited memory
+ *          BFGS matrix:
+ *          ws, of dimension n x m, stores S, the matrix of s-vectors;
+ *          wy, of dimension n x m, stores Y, the matrix of y-vectors;
+ *          sy, of dimension m x m, stores S'Y;
+ *          ss, of dimension m x m, stores S'S;
+ *          yy, of dimension m x m, stores Y'Y;
+ *          wt, of dimension m x m, stores the Cholesky factorization
+ *                                  of (theta*S'S+LD^(-1)L'); see eq.
+ *                                  (2.26) in [3].
+ *
+ *     wn is a double precision working array of dimension 2m x 2m
+ *       used to store the LEL^T factorization of the indefinite matrix
+ *                 K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *
+ *       where     E = [-I  0]
+ *                     [ 0  I]
+ *
+ *     snd is a double precision working array of dimension 2m x 2m
+ *       used to store the lower triangular part of
+ *                 N = [Y' ZZ'Y   L_a'+R_z']
+ *                     [L_a +R_z  S'AA'S   ]
+ *
+ *     z(n),r(n),d(n),t(n), xp(n),wa(8*m) are double precision working arrays.
+ *       z  is used at different times to store the Cauchy point and
+ *          the Newton point.
+ *       xp is used to safeguard the projected Newton direction
+ *
+ *     sg(m),sgo(m),yg(m),ygo(m) are double precision working arrays.
+ *
+ *     index is an long working array of dimension n.
+ *       In subroutine freev, index is used to store the free and fixed
+ *          variables at the Generalized Cauchy Point (GCP).
+ *
+ *     iwhere is an long working array of dimension n used to record
+ *       the status of the vector x for GCP computation.
+ *       iwhere(i)=0 or -3 if x(i) is free and has bounds,
+ *                 1       if x(i) is fixed at l(i), and l(i) .ne. u(i)
+ *                 2       if x(i) is fixed at u(i), and u(i) .ne. l(i)
+ *                 3       if x(i) is always fixed, i.e.,  u(i)=x(i)=l(i)
+ *                -1       if x(i) is always free, i.e., no bounds on it.
+ *
+ *     indx2 is an long working array of dimension n.
+ *       Within subroutine cauchy, indx2 corresponds to the array iorder.
+ *       In subroutine freev, a list of variables entering and leaving
+ *       the free set is stored in indx2, and it is passed on to
+ *       subroutine formk with this information.
+ *
+ *     task is a working string of characters of length 60 indicating
+ *       the current job when entering and leaving this subroutine.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     csave is a working string of characters of length 60.
+ *
+ *     lsave is a logical working array of dimension 4.
+ *
+ *     isave is an long working array of dimension 23.
+ *
+ *     dsave is a double precision working array of dimension 29.
+ *
+ *
+ *     Subprograms called
+ *
+ *       L-BFGS-B Library ... cauchy, subsm, lnsrlb, formk,
+ *
+ *        errclb, prn1lb, prn2lb, prn3lb, active, projgr,
+ *
+ *        freev, cmprlb, matupd, formt.
+ *
+ *       Minpack2 Library ... timer
+ *
+ *       Linpack Library ... dcopy, ddot.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
+ *       Subroutines for Large Scale Bound Constrained Optimization''
+ *       Tech. Report, NAM-11, EECS Department, Northwestern University,
+ *       1994.
+ *
+ *       [3] R. Byrd, J. Nocedal and R. Schnabel "Representations of
+ *       Quasi-Newton Matrices and their use in Limited Memory Methods'',
+ *       Mathematical Programming 63 (1994), no. 4, pp. 129-156.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                       *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int mainlb_(long *n, long *m, double *x,
+  double *l, double *u, long *nbd, double *f, double *g,
+  double *factr, double *pgtol, double *ws, double *wy,
+  double *sy, double *ss, double *wt, double *wn,
+  double *snd, double *z__, double *r__, double *d__,
+  double *t, double *xp, double *wa, long *index,
+  long *iwhere, long *indx2, char *task, long *iprint,
+  char *csave, long *lsave, long *isave, double *dsave)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset,
+    ss_dim1, ss_offset, wt_dim1, wt_offset, wn_dim1, wn_offset,
+    snd_dim1, snd_offset, i__1;
+  double d__1, d__2;
+  FILE *itfptr;
+  static long i__, k;
+  static double gd, dr, rr, dtd;
+  static long col;
+  static double tol;
+  static long wrk;
+  static double stp, cpu1, cpu2;
+  static long head;
+  static double fold;
+  static long nact;
+  static double ddum;
+  static long info, nseg;
+  static double time;
+  static long nfgv, ifun, iter;
+  static char word[4];
+  static double time1, time2;
+  static long iback;
+  static double gdold;
+  static long nfree;
+  static long boxed;
+  static long itail;
+  static double theta;
+  static double dnorm;
+  static long nskip, iword;
+  static double xstep, stpmx;
+  static long ileave;
+  static double cachyt;
+  static long itfile;
+  static double epsmch;
+  static long updatd;
+  static double sbtime;
+  static long prjctd;
+  static long iupdat;
+  static double sbgnrm;
+  static long cnstnd;
+  static long nenter;
+  static double lnscht;
+  static long nintol;
+  --indx2;
+  --iwhere;
+  --index;
+  --xp;
+  --t;
+  --d__;
+  --r__;
+  --z__;
+  --g;
+  --nbd;
+  --u;
+  --l;
+  --x;
+  --wa;
+  snd_dim1 = 2 * *m;
+  snd_offset = 1 + snd_dim1;
+  snd -= snd_offset;
+  wn_dim1 = 2 * *m;
+  wn_offset = 1 + wn_dim1;
+  wn -= wn_offset;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  ss_dim1 = *m;
+  ss_offset = 1 + ss_dim1;
+  ss -= ss_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  --lsave;
+  --isave;
+  --dsave;
+  /* jlm-jn */
+  if (strncmp(task, "START", 5) == 0) {
+    epsmch = DBL_EPSILON;
+    timer_(&time1);
+    /* Initialize counters and scalars when task='START'. */
+    /*    for the limited memory BFGS matrices: */
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    iback = 0;
+    itail = 0;
+    iword = 0;
+    nact = 0;
+    ileave = 0;
+    nenter = 0;
+    fold = 0.;
+    dnorm = 0.;
+    cpu1 = 0.;
+    gd = 0.;
+    stpmx = 0.;
+    sbgnrm = 0.;
+    stp = 0.;
+    gdold = 0.;
+    dtd = 0.;
+    /* for operation counts: */
+    iter = 0;
+    nfgv = 0;
+    nseg = 0;
+    nintol = 0;
+    nskip = 0;
+    nfree = *n;
+    ifun = 0;
+    /* for stopping tolerance: */
+    tol = *factr * epsmch;
+    /* for measuring running time: */
+    cachyt = 0.;
+    sbtime = 0.;
+    lnscht = 0.;
+    /* 'word' records the status of subspace solutions. */
+    strcpy(word, "---");
+    /* 'info' records the termination information. */
+    info = 0;
+    itfile = 8;
+    /* Check the input arguments for errors. */
+    errclb_(n, m, factr, &l[1], &u[1], &nbd[1], task, &info, &k);
+    if (strncmp(task, "ERROR", 5) == 0) {
+      prn3lb_(n, &x[1], f, task, iprint, &info, &itfile, &iter, &nfgv, &nintol,
+          &nskip, &nact, &sbgnrm, &c_b9, &nseg, word, &iback,
+          &stp, &xstep, &k, &cachyt, &sbtime, &lnscht);
+      return 0;
+    }
+    prn1lb_(n, m, &l[1], &u[1], &x[1], iprint, &itfile, &epsmch);
+    /* Initialize iwhere & project x onto the feasible set. */
+    active_(n, &l[1], &u[1], &nbd[1], &x[1], &iwhere[1], iprint, &prjctd, &cnstnd, &boxed);
+    /* The end of the initialization. */
+  } else {
+    /* restore local variables. */
+    prjctd = lsave[1];
+    cnstnd = lsave[2];
+    boxed = lsave[3];
+    updatd = lsave[4];
+    nintol = isave[1];
+    itfile = isave[3];
+    iback = isave[4];
+    nskip = isave[5];
+    head = isave[6];
+    col = isave[7];
+    itail = isave[8];
+    iter = isave[9];
+    iupdat = isave[10];
+    nseg = isave[12];
+    nfgv = isave[13];
+    info = isave[14];
+    ifun = isave[15];
+    iword = isave[16];
+    nfree = isave[17];
+    nact = isave[18];
+    ileave = isave[19];
+    nenter = isave[20];
+    theta = dsave[1];
+    fold = dsave[2];
+    tol = dsave[3];
+    dnorm = dsave[4];
+    epsmch = dsave[5];
+    cpu1 = dsave[6];
+    cachyt = dsave[7];
+    sbtime = dsave[8];
+    lnscht = dsave[9];
+    time1 = dsave[10];
+    gd = dsave[11];
+    stpmx = dsave[12];
+    sbgnrm = dsave[13];
+    stp = dsave[14];
+    gdold = dsave[15];
+    dtd = dsave[16];
+    /* After returning from the driver go to the point where execution */
+    /* is to resume. */
+    if (strncmp(task, "FG_LN", 5) == 0) {
+      goto L666;
+    }
+    if (strncmp(task, "NEW_X", 5) == 0) {
+      goto L777;
+    }
+    if (strncmp(task, "FG_ST", 5) == 0) {
+      goto L111;
+    }
+    if (strncmp(task, "STOP", 4) == 0) {
+      if (strncmp(task + 6, "CPU", 3) == 0) {
+        /* restore the previous iterate. */
+        dcopy_(n, &t[1], &c__1, &x[1], &c__1);
+        dcopy_(n, &r__[1], &c__1, &g[1], &c__1);
+        *f = fold;
+      }
+      goto L999;
+    }
+  }
+  /* Compute f0 and g0. */
+  strcpy(task, "FG_START");
+  /* return to the driver to calculate f and g; reenter at 111. */
+  goto L1000;
+L111:
+  nfgv = 1;
+  /* Compute the infinity norm of the (-) projected gradient. */
+  projgr_(n, &l[1], &u[1], &nbd[1], &x[1], &g[1], &sbgnrm);
+  if (*iprint >= 1) {
+    fprintf(stdout, "\nAt iterate%5ld    f= %12.5E    |proj g|= %12.5E\n", iter, *f, sbgnrm);
+    itfptr = fopen("iterate.dat", "a");
+    fprintf(itfptr, " %4ld %4ld     -     -   -     -     -        -    %10.3E %10.3E\n", iter, nfgv, sbgnrm, *f);
+    fclose(itfptr);
+  }
+  if (sbgnrm <= *pgtol) {
+    /* terminate the algorithm. */
+    strcpy(task, "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL");
+    goto L999;
+  }
+  /* ----------------- the beginning of the loop -------------------------- */
+L222:
+  if (*iprint >= 99) {
+    i__1 = iter + 1;
+    fprintf(stdout, "\n\nITERATION %5ld\n", i__1);
+  }
+  iword = -1;
+  if (! cnstnd && col > 0) {
+    /* skip the search for GCP. */
+    dcopy_(n, &x[1], &c__1, &z__[1], &c__1);
+    wrk = updatd;
+    nseg = 0;
+    goto L333;
+  }
+  /**
+   * Compute the Generalized Cauchy Point (GCP).
+   */
+  timer_(&cpu1);
+  cauchy_(n, &x[1], &l[1], &u[1], &nbd[1], &g[1], &indx2[1], &iwhere[1], &t[1],
+      &d__[1], &z__[1], m, &wy[wy_offset], &ws[ws_offset], &sy[sy_offset],
+      &wt[wt_offset], &theta, &col, &head, &wa[1], &wa[(*m << 1) + 1],
+      &wa[(*m << 2) + 1], &wa[*m * 6 + 1], &nseg, iprint, &sbgnrm, &info, &epsmch);
+  if (info != 0) {
+    /* singular triangular system detected; refresh the lbfgs memory. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Singular triangular system detected;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    timer_(&cpu2);
+    cachyt = cachyt + cpu2 - cpu1;
+    goto L222;
+  }
+  timer_(&cpu2);
+  cachyt = cachyt + cpu2 - cpu1;
+  nintol += nseg;
+  /* Count the entering and leaving variables for iter > 0; */
+  /* find the index set of free and active variables at the GCP. */
+  freev_(n, &nfree, &index[1], &nenter, &ileave, &indx2[1], &iwhere[1], &wrk, &updatd, &cnstnd, iprint, &iter);
+  nact = *n - nfree;
+L333:
+  /* If there are no free variables or B=theta*I, then */
+  /*                                    skip the subspace minimization. */
+  if (nfree == 0 || col == 0) {
+    goto L555;
+  }
+  /**
+   * Subspace minimization.
+   */
+  timer_(&cpu1);
+  /* Form  the LEL^T factorization of the indefinite */
+  /*   matrix    K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ] */
+  /*                 [L_a -R_z           theta*S'AA'S ] */
+  /*   where     E = [-I  0] */
+  /*                 [ 0  I] */
+  if (wrk) {
+    formk_(n, &nfree, &index[1], &nenter, &ileave, &indx2[1], &iupdat, &updatd,
+        &wn[wn_offset], &snd[snd_offset], m, &ws[ws_offset], &wy[wy_offset],
+        &sy[sy_offset], &theta, &col, &head, &info);
+  }
+  if (info != 0) {
+    /* nonpositive definiteness in Cholesky factorization; */
+    /* refresh the lbfgs memory and restart the iteration. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Nonpositive definiteness in Cholesky factorization in formk;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    timer_(&cpu2);
+    sbtime = sbtime + cpu2 - cpu1;
+    goto L222;
+  }
+  /* compute r=-Z'B(xcp-xk)-Z'g (using wa(2m+1)=W'(xcp-x) */
+  /*                                            from 'cauchy'). */
+  cmprlb_(n, m, &x[1], &g[1], &ws[ws_offset], &wy[wy_offset], &sy[sy_offset],
+      &wt[wt_offset], &z__[1], &r__[1], &wa[1], &index[1], &theta, &col,
+      &head, &nfree, &cnstnd, &info);
+  if (info != 0) {
+    goto L444;
+  }
+  /* jlm-jn call the direct method. */
+  subsm_(n, m, &nfree, &index[1], &l[1], &u[1], &nbd[1], &z__[1], &r__[1], &xp[1],
+      &ws[ws_offset], &wy[wy_offset], &theta, &x[1], &g[1], &col,
+      &head, &iword, &wa[1], &wn[wn_offset], iprint, &info);
+L444:
+  if (info != 0) {
+    /* singular triangular system detected; */
+    /* refresh the lbfgs memory and restart the iteration. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Singular triangular system detected;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    timer_(&cpu2);
+    sbtime = sbtime + cpu2 - cpu1;
+    goto L222;
+  }
+  timer_(&cpu2);
+  sbtime = sbtime + cpu2 - cpu1;
+L555:
+  /**
+   * Line search and optimality tests.
+   */
+  /* Generate the search direction d:=z-x. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    d__[i__] = z__[i__] - x[i__];
+  }
+  timer_(&cpu1);
+L666:
+  lnsrlb_(n, &l[1], &u[1], &nbd[1], &x[1], f, &fold, &gd, &gdold, &g[1],
+      &d__[1], &r__[1], &t[1], &z__[1], &stp, &dnorm, &dtd, &xstep,
+      &stpmx, &iter, &ifun, &iback, &nfgv, &info, task, &boxed, &cnstnd,
+      csave, &isave[22], &dsave[17]);
+  if (info != 0 || iback >= 20) {
+    /* restore the previous iterate. */
+    dcopy_(n, &t[1], &c__1, &x[1], &c__1);
+    dcopy_(n, &r__[1], &c__1, &g[1], &c__1);
+    *f = fold;
+    if (col == 0) {
+      /* abnormal termination. */
+      if (info == 0) {
+        info = -9;
+        /* restore the actual number of f and g evaluations etc. */
+        --nfgv;
+        --ifun;
+        --iback;
+      }
+      strcpy(task, "ABNORMAL_TERMINATION_IN_LNSRCH");
+      ++iter;
+      goto L999;
+    } else {
+      /* refresh the lbfgs memory and restart the iteration. */
+      if (*iprint >= 1) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Bad direction in the line search;\n");
+        fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+      }
+      if (info == 0) {
+        --nfgv;
+      }
+      info = 0;
+      col = 0;
+      head = 1;
+      theta = 1.;
+      iupdat = 0;
+      updatd = FALSE_;
+      strcpy(task, "RESTART_FROM_LNSRCH");
+      timer_(&cpu2);
+      lnscht = lnscht + cpu2 - cpu1;
+      goto L222;
+    }
+  } else if (strncmp(task, "FG_LN", 5) == 0) {
+    /* return to the driver for calculating f and g; reenter at 666. */
+    goto L1000;
+  } else {
+    /* calculate and print out the quantities related to the new X. */
+    timer_(&cpu2);
+    lnscht = lnscht + cpu2 - cpu1;
+    ++iter;
+    /* Compute the infinity norm of the projected (-)gradient. */
+    projgr_(n, &l[1], &u[1], &nbd[1], &x[1], &g[1], &sbgnrm);
+    /* Print iteration information. */
+    prn2lb_(n, &x[1], f, &g[1], iprint, &itfile, &iter, &nfgv, &nact,
+        &sbgnrm, &nseg, word, &iword, &iback, &stp, &xstep);
+    goto L1000;
+  }
+L777:
+  /* Test for termination. */
+  if (sbgnrm <= *pgtol) {
+    /* terminate the algorithm. */
+    strcpy(task, "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL");
+    goto L999;
+  }
+  d__1 = fabs(fold);
+  d__2 = fabs(*f);
+  d__1 = d__1 >= d__2 ? d__1 : d__2;
+  ddum = d__1 >= 1. ? d__1 : 1.;
+  if (fold - *f <= tol * ddum) {
+    /* terminate the algorithm. */
+    strcpy(task, "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH");
+    if (iback >= 10) {
+      info = -5;
+    }
+    /*i.e., to issue a warning if iback>10 in the line search. */
+    goto L999;
+  }
+  /* Compute d=newx-oldx, r=newg-oldg, rr=y'y and dr=y's. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    r__[i__] = g[i__] - r__[i__];
+  }
+  rr = ddot_(n, &r__[1], &c__1, &r__[1], &c__1);
+  if (stp == 1.) {
+    dr = gd - gdold;
+    ddum = -gdold;
+  } else {
+    dr = (gd - gdold) * stp;
+    dscal_(n, &stp, &d__[1], &c__1);
+    ddum = -gdold * stp;
+  }
+  if (dr <= epsmch * ddum) {
+    /* skip the L-BFGS update. */
+    ++nskip;
+    updatd = FALSE_;
+    if (*iprint >= 1) {
+      fprintf(stdout, "  ys=%10.3E  -gs=%10.3E BFGS update SKIPPED\n", dr, ddum);
+    }
+    goto L888;
+  }
+  /**
+   * Update the L-BFGS matrix.
+   */
+  updatd = TRUE_;
+  ++iupdat;
+  /* Update matrices WS and WY and form the middle matrix in B. */
+  matupd_(n, m, &ws[ws_offset], &wy[wy_offset], &sy[sy_offset],
+      &ss[ss_offset], &d__[1], &r__[1], &itail, &iupdat, &col, &head,
+      &theta, &rr, &dr, &stp, &dtd);
+  /* Form the upper half of the pds T = theta*SS + L*D^(-1)*L'; */
+  /*    Store T in the upper triangular of the array wt; */
+  /*    Cholesky factorize T to J*J' with */
+  /*       J' stored in the upper triangular of wt. */
+  formt_(m, &wt[wt_offset], &sy[sy_offset], &ss[ss_offset], &col, &theta, &info);
+  if (info != 0) {
+    /* nonpositive definiteness in Cholesky factorization; */
+    /* refresh the lbfgs memory and restart the iteration. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Nonpositive definiteness in Cholesky factorization in formt;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    goto L222;
+  }
+  /* Now the inverse of the middle matrix in B is */
+  /*   [  D^(1/2)      O ] [ -D^(1/2)  D^(-1/2)*L' ] */
+  /*   [ -L*D^(-1/2)   J ] [  0        J'          ] */
+L888:
+  /* -------------------- the end of the loop ----------------------------- */
+  goto L222;
+L999:
+  timer_(&time2);
+  time = time2 - time1;
+  prn3lb_(n, &x[1], f, task, iprint, &info, &itfile, &iter, &nfgv, &nintol,
+    &nskip, &nact, &sbgnrm, &time, &nseg, word, &iback, &stp, &xstep,
+    &k, &cachyt, &sbtime, &lnscht);
+L1000:
+  /* Save local variables. */
+  lsave[1] = prjctd;
+  lsave[2] = cnstnd;
+  lsave[3] = boxed;
+  lsave[4] = updatd;
+  isave[1] = nintol;
+  isave[3] = itfile;
+  isave[4] = iback;
+  isave[5] = nskip;
+  isave[6] = head;
+  isave[7] = col;
+  isave[8] = itail;
+  isave[9] = iter;
+  isave[10] = iupdat;
+  isave[12] = nseg;
+  isave[13] = nfgv;
+  isave[14] = info;
+  isave[15] = ifun;
+  isave[16] = iword;
+  isave[17] = nfree;
+  isave[18] = nact;
+  isave[19] = ileave;
+  isave[20] = nenter;
+  dsave[1] = theta;
+  dsave[2] = fold;
+  dsave[3] = tol;
+  dsave[4] = dnorm;
+  dsave[5] = epsmch;
+  dsave[6] = cpu1;
+  dsave[7] = cachyt;
+  dsave[8] = sbtime;
+  dsave[9] = lnscht;
+  dsave[10] = time1;
+  dsave[11] = gd;
+  dsave[12] = stpmx;
+  dsave[13] = sbgnrm;
+  dsave[14] = stp;
+  dsave[15] = gdold;
+  dsave[16] = dtd;
+  return 0;
+}
+/**
+ * Subroutine active
+ *
+ *     This subroutine initializes iwhere and projects the initial x to
+ *       the feasible set if necessary.
+ *
+ *     iwhere is an long array of dimension n.
+ *       On entry iwhere is unspecified.
+ *       On exit iwhere(i)=-1  if x(i) has no bounds
+ *                         3   if l(i)=u(i)
+ *                         0   otherwise.
+ *       In cauchy, iwhere is given finer gradations.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int active_(long *n, double *l, double *u,
+  long *nbd, double *x, long *iwhere, long *iprint,
+  long *prjctd, long *cnstnd, long *boxed)
+{
+  long i__1;
+  static long i__, nbdd;
+  --iwhere;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  /* Initialize nbdd, prjctd, cnstnd and boxed. */
+  nbdd = 0;
+  *prjctd = FALSE_;
+  *cnstnd = FALSE_;
+  *boxed = TRUE_;
+  /* Project the initial x to the easible set if necessary. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (nbd[i__] > 0) {
+      if (nbd[i__] <= 2 && x[i__] <= l[i__]) {
+        if (x[i__] < l[i__]) {
+          *prjctd = TRUE_;
+          x[i__] = l[i__];
+        }
+        ++nbdd;
+      } else if (nbd[i__] >= 2 && x[i__] >= u[i__]) {
+        if (x[i__] > u[i__]) {
+          *prjctd = TRUE_;
+          x[i__] = u[i__];
+        }
+        ++nbdd;
+      }
+    }
+  }
+  /* Initialize iwhere and assign values to cnstnd and boxed. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (nbd[i__] != 2) {
+      *boxed = FALSE_;
+    }
+    if (nbd[i__] == 0) {
+      /* this variable is always free */
+      iwhere[i__] = -1;
+    /* otherwise set x(i)=mid(x(i), u(i), l(i)). */
+    } else {
+      *cnstnd = TRUE_;
+      if (nbd[i__] == 2 && u[i__] - l[i__] <= 0.) {
+        /* this variable is always fixed */
+        iwhere[i__] = 3;
+      } else {
+        iwhere[i__] = 0;
+      }
+    }
+  }
+  if (*iprint >= 0) {
+    if (*prjctd) {
+      fprintf(stdout, " The initial X is infeasible.  Restart with its projection.\n");
+    }
+    if (! (*cnstnd)) {
+      fprintf(stdout, " This problem is unconstrained.\n");
+    }
+  }
+  if (*iprint > 0) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "At X0 %9ld variables are exactly at the bounds\n", nbdd);
+  }
+  return 0;
+}
+/**
+ * Subroutine bmv
+ *
+ *     This subroutine computes the product of the 2m x 2m middle matrix
+ *       in the compact L-BFGS formula of B and a 2m vector v;
+ *       it returns the product in p.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     sy is a double precision array of dimension m x m.
+ *       On entry sy specifies the matrix S'Y.
+ *       On exit sy is unchanged.
+ *
+ *     wt is a double precision array of dimension m x m.
+ *       On entry wt specifies the upper triangular matrix J' which is
+ *         the Cholesky factor of (thetaS'S+LD^(-1)L').
+ *       On exit wt is unchanged.
+ *
+ *     col is an long variable.
+ *       On entry col specifies the number of s-vectors (or y-vectors)
+ *         stored in the compact L-BFGS formula.
+ *       On exit col is unchanged.
+ *
+ *     v is a double precision array of dimension 2col.
+ *       On entry v specifies vector v.
+ *       On exit v is unchanged.
+ *
+ *     p is a double precision array of dimension 2col.
+ *       On entry p is unspecified.
+ *       On exit p is the product Mv.
+ *
+ *     info is an long variable.
+ *       On entry info is unspecified.
+ *       On exit info = 0       for normal return,
+ *                    = nonzero for abnormal return when the system
+ *                                to be solved by dtrsl is singular.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dtrsl.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int bmv_(long *m, double *sy, double *wt, long
+  *col, double *v, double *p, long *info)
+{
+  long sy_dim1, sy_offset, wt_dim1, wt_offset, i__1, i__2;
+  static long i__, k, i2;
+  static double sum;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  --p;
+  --v;
+  if (*col == 0) {
+    return 0;
+  }
+  /* PART I: solve [  D^(1/2)      O ] [ p1 ] = [ v1 ] */
+  /*               [ -L*D^(-1/2)   J ] [ p2 ]   [ v2 ]. */
+  /*   solve Jp2=v2+LD^(-1)v1. */
+  p[*col + 1] = v[*col + 1];
+  i__1 = *col;
+  for (i__ = 2; i__ <= i__1; ++i__) {
+    i2 = *col + i__;
+    sum = 0.;
+    i__2 = i__ - 1;
+    for (k = 1; k <= i__2; ++k) {
+      sum += sy[i__ + k * sy_dim1] * v[k] / sy[k + k * sy_dim1];
+    }
+    p[i2] = v[i2] + sum;
+  }
+  /* Solve the triangular system */
+  dtrsl_(&wt[wt_offset], m, col, &p[*col + 1], &c__11, info);
+  if (*info != 0) {
+    return 0;
+  }
+  /* solve D^(1/2)p1=v1. */
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    p[i__] = v[i__] / sqrt(sy[i__ + i__ * sy_dim1]);
+  }
+  /* PART II: solve [ -D^(1/2)   D^(-1/2)*L'  ] [ p1 ] = [ p1 ] */
+  /*                [  0         J'           ] [ p2 ]   [ p2 ]. */
+  /*   solve J^Tp2=p2. */
+  dtrsl_(&wt[wt_offset], m, col, &p[*col + 1], &c__1, info);
+  if (*info != 0) {
+    return 0;
+  }
+  /* compute p1=-D^(-1/2)(p1-D^(-1/2)L'p2) */
+  /*           =-D^(-1/2)p1+D^(-1)L'p2. */
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    p[i__] = -p[i__] / sqrt(sy[i__ + i__ * sy_dim1]);
+  }
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    sum = 0.;
+    i__2 = *col;
+    for (k = i__ + 1; k <= i__2; ++k) {
+      sum += sy[k + i__ * sy_dim1] * p[*col + k] / sy[i__ + i__ * sy_dim1];
+    }
+    p[i__] += sum;
+  }
+  return 0;
+}
+/**
+ * Subroutine cauchy
+ *
+ *     For given x, l, u, g (with sbgnrm > 0), and a limited memory
+ *       BFGS matrix B defined in terms of matrices WY, WS, WT, and
+ *       scalars head, col, and theta, this subroutine computes the
+ *       generalized Cauchy point (GCP), defined as the first local
+ *       minimizer of the quadratic
+ *
+ *                  Q(x + s) = g's + 1/2 s'Bs
+ *
+ *       along the projected gradient direction P(x-tg,l,u).
+ *       The routine returns the GCP in xcp.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x is the starting point for the GCP computation.
+ *       On exit x is unchanged.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound of x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound of x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is an long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds, and
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     g is a double precision array of dimension n.
+ *       On entry g is the gradient of f(x).  g must be a nonzero vector.
+ *       On exit g is unchanged.
+ *
+ *     iorder is an long working array of dimension n.
+ *       iorder will be used to store the breakpoints in the piecewise
+ *       linear path and free variables encountered. On exit,
+ *         iorder(1),...,iorder(nleft) are indices of breakpoints
+ *                                which have not been encountered;
+ *         iorder(nleft+1),...,iorder(nbreak) are indices of
+ *                                     encountered breakpoints; and
+ *         iorder(nfree),...,iorder(n) are indices of variables which
+ *                 have no bound constraits along the search direction.
+ *
+ *     iwhere is an long array of dimension n.
+ *       On entry iwhere indicates only the permanently fixed (iwhere=3)
+ *       or free (iwhere= -1) components of x.
+ *       On exit iwhere records the status of the current x variables.
+ *       iwhere(i)=-3  if x(i) is free and has bounds, but is not moved
+ *                 0   if x(i) is free and has bounds, and is moved
+ *                 1   if x(i) is fixed at l(i), and l(i) .ne. u(i)
+ *                 2   if x(i) is fixed at u(i), and u(i) .ne. l(i)
+ *                 3   if x(i) is always fixed, i.e.,  u(i)=x(i)=l(i)
+ *                 -1  if x(i) is always free, i.e., it has no bounds.
+ *
+ *     t is a double precision working array of dimension n.
+ *       t will be used to store the break points.
+ *
+ *     d is a double precision array of dimension n used to store
+ *       the Cauchy direction P(x-tg)-x.
+ *
+ *     xcp is a double precision array of dimension n used to return the
+ *       GCP on exit.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     ws, wy, sy, and wt are double precision arrays.
+ *       On entry they store information that defines the
+ *                             limited memory BFGS matrix:
+ *         ws(n,m) stores S, a set of s-vectors;
+ *         wy(n,m) stores Y, a set of y-vectors;
+ *         sy(m,m) stores S'Y;
+ *         wt(m,m) stores the
+ *                 Cholesky factorization of (theta*S'S+LD^(-1)L').
+ *       On exit these arrays are unchanged.
+ *
+ *     theta is a double precision variable.
+ *       On entry theta is the scaling factor specifying B_0 = theta I.
+ *       On exit theta is unchanged.
+ *
+ *     col is an long variable.
+ *       On entry col is the actual number of variable metric
+ *         corrections stored so far.
+ *       On exit col is unchanged.
+ *
+ *     head is an long variable.
+ *       On entry head is the location of the first s-vector (or y-vector)
+ *         in S (or Y).
+ *       On exit col is unchanged.
+ *
+ *     p is a double precision working array of dimension 2m.
+ *       p will be used to store the vector p = W^(T)d.
+ *
+ *     c is a double precision working array of dimension 2m.
+ *       c will be used to store the vector c = W^(T)(xcp-x).
+ *
+ *     wbp is a double precision working array of dimension 2m.
+ *       wbp will be used to store the row of W corresponding
+ *         to a breakpoint.
+ *
+ *     v is a double precision working array of dimension 2m.
+ *
+ *     nseg is an long variable.
+ *       On exit nseg records the number of quadratic segments explored
+ *         in searching for the GCP.
+ *
+ *     sg and yg are double precision arrays of dimension m.
+ *       On entry sg  and yg store S'g and Y'g correspondingly.
+ *       On exit they are unchanged.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     sbgnrm is a double precision variable.
+ *       On entry sbgnrm is the norm of the projected gradient at x.
+ *       On exit sbgnrm is unchanged.
+ *
+ *     info is an long variable.
+ *       On entry info is 0.
+ *       On exit info = 0       for normal return,
+ *                    = nonzero for abnormal return when the the system
+ *                              used in routine bmv is singular.
+ *
+ *     Subprograms called:
+ *
+ *       L-BFGS-B Library ... hpsolb, bmv.
+ *
+ *       Linpack ... dscal dcopy, daxpy.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
+ *       Subroutines for Large Scale Bound Constrained Optimization''
+ *       Tech. Report, NAM-11, EECS Department, Northwestern University,
+ *       1994.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int cauchy_(long *n, double *x, double *l,
+  double *u, long *nbd, double *g, long *iorder, long *iwhere,
+  double *t, double *d__, double *xcp, long *m,
+  double *wy, double *ws, double *sy, double *wt,
+  double *theta, long *col, long *head, double *p,
+  double *c__, double *wbp, double *v, long *nseg,
+  long *iprint, double *sbgnrm, long *info, double *epsmch)
+{
+  long wy_dim1, wy_offset, ws_dim1, ws_offset, sy_dim1, sy_offset, wt_dim1, wt_offset, i__1, i__2;
+  double d__1;
+  static long i__, j;
+  static double f1, f2, dt, tj, tl, tu, tj0;
+  static long ibp;
+  static double dtm;
+  static double wmc, wmp, wmw;
+  static long col2;
+  static double dibp;
+  static long iter;
+  static double zibp, tsum, dibp2;
+  static long bnded;
+  static double neggi;
+  static long nfree;
+  static double bkmin;
+  static long nleft;
+  static double f2_org__;
+  static long nbreak, ibkmin;
+  static long pointr;
+  static long xlower, xupper;
+  --xcp;
+  --d__;
+  --t;
+  --iwhere;
+  --iorder;
+  --g;
+  --nbd;
+  --u;
+  --l;
+  --x;
+  --v;
+  --wbp;
+  --c__;
+  --p;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  /* Check the status of the variables, reset iwhere(i) if necessary; */
+  /*   compute the Cauchy direction d and the breakpoints t; initialize */
+  /*   the derivative f1 and the vector p = W'd (for theta = 1). */
+  if (*sbgnrm <= 0.) {
+    if (*iprint >= 0) {
+      fprintf(stdout, " Subgnorm = 0.  GCP = X.\n");
+    }
+    dcopy_(n, &x[1], &c__1, &xcp[1], &c__1);
+    return 0;
+  }
+  bnded = TRUE_;
+  nfree = *n + 1;
+  nbreak = 0;
+  ibkmin = 0;
+  bkmin = 0.;
+  col2 = *col << 1;
+  f1 = 0.;
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n---------------- CAUCHY entered-------------------\n\n");
+  }
+  /* We set p to zero and build it up as we determine d. */
+  i__1 = col2;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    p[i__] = 0.;
+  }
+  /* In the following loop we determine for each variable its bound */
+  /*    status and its breakpoint, and update p accordingly. */
+  /*    Smallest breakpoint is identified. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    neggi = -g[i__];
+    if (iwhere[i__] != 3 && iwhere[i__] != -1) {
+      /* if x(i) is not a constant and has bounds, */
+      /* compute the difference between x(i) and its bounds. */
+      if (nbd[i__] <= 2) {
+        tl = x[i__] - l[i__];
+      }
+      if (nbd[i__] >= 2) {
+        tu = u[i__] - x[i__];
+      }
+      /* If a variable is close enough to a bound */
+      /*   we treat it as at bound. */
+      xlower = nbd[i__] <= 2 && tl <= 0.;
+      xupper = nbd[i__] >= 2 && tu <= 0.;
+      /* reset iwhere(i). */
+      iwhere[i__] = 0;
+      if (xlower) {
+        if (neggi <= 0.) {
+          iwhere[i__] = 1;
+        }
+      } else if (xupper) {
+        if (neggi >= 0.) {
+          iwhere[i__] = 2;
+        }
+      } else {
+        if (fabs(neggi) <= 0.) {
+          iwhere[i__] = -3;
+        }
+      }
+    }
+    pointr = *head;
+    if (iwhere[i__] != 0 && iwhere[i__] != -1) {
+      d__[i__] = 0.;
+    } else {
+      d__[i__] = neggi;
+      f1 -= neggi * neggi;
+      /* calculate p := p - W'e_i* (g_i). */
+      i__2 = *col;
+      for (j = 1; j <= i__2; ++j) {
+        p[j] += wy[i__ + pointr * wy_dim1] * neggi;
+        p[*col + j] += ws[i__ + pointr * ws_dim1] * neggi;
+        pointr = pointr % *m + 1;
+      }
+      if (nbd[i__] <= 2 && nbd[i__] != 0 && neggi < 0.) {
+        /* x(i) + d(i) is bounded; compute t(i). */
+        ++nbreak;
+        iorder[nbreak] = i__;
+        t[nbreak] = tl / (-neggi);
+        if (nbreak == 1 || t[nbreak] < bkmin) {
+          bkmin = t[nbreak];
+          ibkmin = nbreak;
+        }
+      } else if (nbd[i__] >= 2 && neggi > 0.) {
+        /* x(i) + d(i) is bounded; compute t(i). */
+        ++nbreak;
+        iorder[nbreak] = i__;
+        t[nbreak] = tu / neggi;
+        if (nbreak == 1 || t[nbreak] < bkmin) {
+          bkmin = t[nbreak];
+          ibkmin = nbreak;
+        }
+      } else {
+        /* x(i) + d(i) is not bounded. */
+        --nfree;
+        iorder[nfree] = i__;
+        if (fabs(neggi) > 0.) {
+          bnded = FALSE_;
+        }
+      }
+    }
+  }
+  /* The indices of the nonzero components of d are now stored */
+  /*   in iorder(1),...,iorder(nbreak) and iorder(nfree),...,iorder(n). */
+  /*   The smallest of the nbreak breakpoints is in t(ibkmin)=bkmin. */
+  if (*theta != 1.) {
+    /* complete the initialization of p for theta not= one. */
+    dscal_(col, theta, &p[*col + 1], &c__1);
+  }
+  /* Initialize GCP xcp = x. */
+  dcopy_(n, &x[1], &c__1, &xcp[1], &c__1);
+  if (nbreak == 0 && nfree == *n + 1) {
+    /* is a zero vector, return with the initial xcp as GCP. */
+    if (*iprint > 100) {
+      fprintf(stdout, "Cauchy X =  \n");
+      fprintf(stdout, "    ");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, " %11.4E", xcp[i__]);
+        if (i__ % 6 == 0) {
+          fprintf(stdout, "\n");
+          fprintf(stdout, "    ");
+        }
+      }
+      fprintf(stdout, "\n");
+    }
+    return 0;
+  }
+  /* Initialize c = W'(xcp - x) = 0. */
+  i__1 = col2;
+  for (j = 1; j <= i__1; ++j) {
+    c__[j] = 0.;
+  }
+  /* Initialize derivative f2. */
+  f2 = -(*theta) * f1;
+  f2_org__ = f2;
+  if (*col > 0) {
+    bmv_(m, &sy[sy_offset], &wt[wt_offset], col, &p[1], &v[1], info);
+    if (*info != 0) {
+      return 0;
+    }
+    f2 -= ddot_(&col2, &v[1], &c__1, &p[1], &c__1);
+  }
+  dtm = -f1 / f2;
+  tsum = 0.;
+  *nseg = 1;
+  if (*iprint >= 99) {
+    fprintf(stdout, " There are %3ld  breakpoints \n", nbreak);
+  }
+  /* If there are no breakpoints, locate the GCP and return. */
+  if (nbreak == 0) {
+    goto L888;
+  }
+  nleft = nbreak;
+  iter = 1;
+  tj = 0.;
+  /* ------------------- the beginning of the loop ------------------------- */
+L777:
+  /* Find the next smallest breakpoint; */
+  /*   compute dt = t(nleft) - t(nleft + 1). */
+  tj0 = tj;
+  if (iter == 1) {
+    /* Since we already have the smallest breakpoint we need not do */
+    /* heapsort yet. Often only one breakpoint is used and the */
+    /* cost of heapsort is avoided. */
+    tj = bkmin;
+    ibp = iorder[ibkmin];
+  } else {
+    if (iter == 2) {
+      /* Replace the already used smallest breakpoint with the */
+      /* breakpoint numbered nbreak > nlast, before heapsort call. */
+      if (ibkmin != nbreak) {
+        t[ibkmin] = t[nbreak];
+        iorder[ibkmin] = iorder[nbreak];
+      }
+      /* Update heap structure of breakpoints */
+      /*    (if iter=2, initialize heap). */
+    }
+    i__1 = iter - 2;
+    hpsolb_(&nleft, &t[1], &iorder[1], &i__1);
+    tj = t[nleft];
+    ibp = iorder[nleft];
+  }
+  dt = tj - tj0;
+  if (dt != 0. && *iprint >= 100) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "Piece    %3ld --f1, f2 at start point  %11.4E %11.4E\n", *nseg, f1, f2);
+    fprintf(stdout, "Distance to the next break point =  %11.4E\n", dt);
+    fprintf(stdout, "Distance to the stationary point =  %11.4E\n", dtm);
+  }
+  /* If a minimizer is within this interval, locate the GCP and return. */
+  if (dtm < dt) {
+    goto L888;
+  }
+  /* Otherwise fix one variable and */
+  /*   reset the corresponding component of d to zero. */
+  tsum += dt;
+  --nleft;
+  ++iter;
+  dibp = d__[ibp];
+  d__[ibp] = 0.;
+  if (dibp > 0.) {
+    zibp = u[ibp] - x[ibp];
+    xcp[ibp] = u[ibp];
+    iwhere[ibp] = 2;
+  } else {
+    zibp = l[ibp] - x[ibp];
+    xcp[ibp] = l[ibp];
+    iwhere[ibp] = 1;
+  }
+  if (*iprint >= 100) {
+    fprintf(stdout, " Variable   %ld  is fixed.\n", ibp);
+  }
+  if (nleft == 0 && nbreak == *n) {
+    /* all n variables are fixed, */
+    /*    return with xcp as GCP. */
+    dtm = dt;
+    goto L999;
+  }
+  /* Update the derivative information. */
+  ++(*nseg);
+  /* Computing 2nd power */
+  d__1 = dibp;
+  dibp2 = d__1 * d__1;
+  /* Update f1 and f2. */
+  /*    temporarily set f1 and f2 for col=0. */
+  f1 = f1 + dt * f2 + dibp2 - *theta * dibp * zibp;
+  f2 -= *theta * dibp2;
+  if (*col > 0) {
+    /* update c = c + dt*p. */
+    daxpy_(&col2, &dt, &p[1], &c__1, &c__[1], &c__1);
+    /* choose wbp, */
+    /* the row of W corresponding to the breakpoint encountered. */
+    pointr = *head;
+    i__1 = *col;
+    for (j = 1; j <= i__1; ++j) {
+      wbp[j] = wy[ibp + pointr * wy_dim1];
+      wbp[*col + j] = *theta * ws[ibp + pointr * ws_dim1];
+      pointr = pointr % *m + 1;
+    }
+    /* compute (wbp)Mc, (wbp)Mp, and (wbp)M(wbp)'. */
+    bmv_(m, &sy[sy_offset], &wt[wt_offset], col, &wbp[1], &v[1], info);
+    if (*info != 0) {
+      return 0;
+    }
+    wmc = ddot_(&col2, &c__[1], &c__1, &v[1], &c__1);
+    wmp = ddot_(&col2, &p[1], &c__1, &v[1], &c__1);
+    wmw = ddot_(&col2, &wbp[1], &c__1, &v[1], &c__1);
+    /* update p = p - dibp*wbp. */
+    d__1 = -dibp;
+    daxpy_(&col2, &d__1, &wbp[1], &c__1, &p[1], &c__1);
+    /* complete updating f1 and f2 while col > 0. */
+    f1 += dibp * wmc;
+    f2 = f2 + dibp * 2. * wmp - dibp2 * wmw;
+  }
+  d__1 = *epsmch * f2_org__;
+  f2 = d__1 > f2 ? d__1 : f2;
+  if (nleft > 0) {
+    dtm = -f1 / f2;
+    goto L777;
+    /* to repeat the loop for unsearched intervals. */
+  } else if (bnded) {
+    f1 = 0.;
+    f2 = 0.;
+    dtm = 0.;
+  } else {
+    dtm = -f1 / f2;
+  }
+  /* ------------------- the end of the loop ------------------------------- */
+L888:
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, " GCP found in this segment\n");
+    fprintf(stdout, "Piece    %3ld --f1, f2 at start point  %11.4E %11.4E\n", *nseg, f1, f2);
+    fprintf(stdout, "Distance to the stationary point =  %11.4E\n", dtm);
+  }
+  if (dtm <= 0.) {
+    dtm = 0.;
+  }
+  tsum += dtm;
+  /* Move free variables (i.e., the ones w/o breakpoints) and */
+  /*   the variables whose breakpoints haven't been reached. */
+  daxpy_(n, &tsum, &d__[1], &c__1, &xcp[1], &c__1);
+L999:
+  /* Update c = c + dtm*p = W'(x^c - x) */
+  /*   which will be used in computing r = Z'(B(x^c - x) + g). */
+  if (*col > 0) {
+    daxpy_(&col2, &dtm, &p[1], &c__1, &c__[1], &c__1);
+  }
+  if (*iprint > 100) {
+    fprintf(stdout, "Cauchy X =  \n");
+    fprintf(stdout, "    ");
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      fprintf(stdout, " %11.4E", xcp[i__]);
+      if (i__ % 6 == 0) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, "    ");
+      }
+    }
+    fprintf(stdout, "\n");
+  }
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n---------------- exit CAUCHY----------------------\n\n");
+  }
+  return 0;
+}
+/**
+ * Subroutine cmprlb
+ *
+ *       This subroutine computes r=-Z'B(xcp-xk)-Z'g by using
+ *         wa(2m+1)=W'(xcp-x) from subroutine cauchy.
+ *
+ *     Subprograms called:
+ *
+ *       L-BFGS-B Library ... bmv.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int cmprlb_(long *n, long *m, double *x,
+  double *g, double *ws, double *wy, double *sy,
+  double *wt, double *z__, double *r__, double *wa,
+  long *index, double *theta, long *col, long *head,
+  long *nfree, long *cnstnd, long *info)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset, wt_dim1, wt_offset, i__1, i__2;
+  static long i__, j, k;
+  static double a1, a2;
+  static long pointr;
+  --index;
+  --r__;
+  --z__;
+  --g;
+  --x;
+  --wa;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  if (! (*cnstnd) && *col > 0) {
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      r__[i__] = -g[i__];
+    }
+  } else {
+    i__1 = *nfree;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      k = index[i__];
+      r__[i__] = -(*theta) * (z__[k] - x[k]) - g[k];
+    }
+    bmv_(m, &sy[sy_offset], &wt[wt_offset], col, &wa[(*m << 1) + 1], &wa[1], info);
+    if (*info != 0) {
+      *info = -8;
+      return 0;
+    }
+    pointr = *head;
+    i__1 = *col;
+    for (j = 1; j <= i__1; ++j) {
+      a1 = wa[j];
+      a2 = *theta * wa[*col + j];
+      i__2 = *nfree;
+      for (i__ = 1; i__ <= i__2; ++i__) {
+        k = index[i__];
+        r__[i__] = r__[i__] + wy[k + pointr * wy_dim1] * a1 + ws[k + pointr * ws_dim1] * a2;
+      }
+      pointr = pointr % *m + 1;
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine errclb
+ *
+ *    This subroutine checks the validity of the input data.
+ *
+ *                          *  *  *
+ *
+ *    NEOS, November 1994. (Latest revision June 1996.)
+ *    Optimization Technology Center.
+ *    Argonne National Laboratory and Northwestern University.
+ *    Written by
+ *                       Ciyou Zhu
+ *    in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int errclb_(long *n, long *m, double *factr,
+  double *l, double *u, long *nbd, char *task, long *info, long *k)
+{
+  long i__1;
+  static long i__;
+  --nbd;
+  --u;
+  --l;
+  /* Check the input arguments for errors. */
+  if (*n <= 0) {
+    strcpy(task, "ERROR: N .LE. 0");
+  }
+  if (*m <= 0) {
+    strcpy(task, "ERROR: M .LE. 0");
+  }
+  if (*factr < 0.) {
+    strcpy(task, "ERROR: FACTR .LT. 0");
+  }
+  /* Check the validity of the arrays nbd(i), u(i), and l(i). */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (nbd[i__] < 0 || nbd[i__] > 3) {
+      /* return */
+      strcpy(task, "ERROR: INVALID NBD");
+      *info = -6;
+      *k = i__;
+    }
+    if (nbd[i__] == 2) {
+      if (l[i__] > u[i__]) {
+        /* return */
+        strcpy(task, "ERROR: NO FEASIBLE SOLUTION");
+        *info = -7;
+        *k = i__;
+      }
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine formk
+ *
+ *     This subroutine forms  the LEL^T factorization of the indefinite
+ *
+ *       matrix    K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *                                                    where E = [-I  0]
+ *                                                              [ 0  I]
+ *     The matrix K can be shown to be equal to the matrix M^[-1]N
+ *       occurring in section 5.1 of [1], as well as to the matrix
+ *       Mbar^[-1] Nbar in section 5.3.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     nsub is an long variable
+ *       On entry nsub is the number of subspace variables in free set.
+ *       On exit nsub is not changed.
+ *
+ *     ind is an long array of dimension nsub.
+ *       On entry ind specifies the indices of subspace variables.
+ *       On exit ind is unchanged.
+ *
+ *     nenter is an long variable.
+ *       On entry nenter is the number of variables entering the
+ *         free set.
+ *       On exit nenter is unchanged.
+ *
+ *     ileave is an long variable.
+ *       On entry indx2(ileave),...,indx2(n) are the variables leaving
+ *         the free set.
+ *       On exit ileave is unchanged.
+ *
+ *     indx2 is an long array of dimension n.
+ *       On entry indx2(1),...,indx2(nenter) are the variables entering
+ *         the free set, while indx2(ileave),...,indx2(n) are the
+ *         variables leaving the free set.
+ *       On exit indx2 is unchanged.
+ *
+ *     iupdat is an long variable.
+ *       On entry iupdat is the total number of BFGS updates made so far.
+ *       On exit iupdat is unchanged.
+ *
+ *     updatd is a logical variable.
+ *       On entry 'updatd' is true if the L-BFGS matrix is updatd.
+ *       On exit 'updatd' is unchanged.
+ *
+ *     wn is a double precision array of dimension 2m x 2m.
+ *       On entry wn is unspecified.
+ *       On exit the upper triangle of wn stores the LEL^T factorization
+ *         of the 2*col x 2*col indefinite matrix
+ *                     [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *
+ *     wn1 is a double precision array of dimension 2m x 2m.
+ *       On entry wn1 stores the lower triangular part of
+ *                     [Y' ZZ'Y   L_a'+R_z']
+ *                     [L_a+R_z   S'AA'S   ]
+ *         in the previous iteration.
+ *       On exit wn1 stores the corresponding updated matrices.
+ *       The purpose of wn1 is just to store these inner products
+ *       so they can be easily updated and inserted into wn.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     ws, wy, sy, and wtyy are double precision arrays;
+ *     theta is a double precision variable;
+ *     col is an long variable;
+ *     head is an long variable.
+ *       On entry they store the information defining the
+ *                                          limited memory BFGS matrix:
+ *         ws(n,m) stores S, a set of s-vectors;
+ *         wy(n,m) stores Y, a set of y-vectors;
+ *         sy(m,m) stores S'Y;
+ *         wtyy(m,m) stores the Cholesky factorization
+ *                                   of (theta*S'S+LD^(-1)L')
+ *         theta is the scaling factor specifying B_0 = theta I;
+ *         col is the number of variable metric corrections stored;
+ *         head is the location of the 1st s- (or y-) vector in S (or Y).
+ *       On exit they are unchanged.
+ *
+ *     info is an long variable.
+ *       On entry info is unspecified.
+ *       On exit info =  0 for normal return;
+ *                    = -1 when the 1st Cholesky factorization failed;
+ *                    = -2 when the 2st Cholesky factorization failed.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dcopy, dpofa, dtrsl.
+ *
+ *
+ *     References:
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: a
+ *       limited memory FORTRAN code for solving bound constrained
+ *       optimization problems'', Tech. Report, NAM-11, EECS Department,
+ *       Northwestern University, 1994.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int formk_(long *n, long *nsub, long *ind, long *nenter,
+  long *ileave, long *indx2, long *iupdat, long *updatd,
+  double *wn, double *wn1, long *m, double *ws,
+  double *wy, double *sy, double *theta, long *col,
+  long *head, long *info)
+{
+  long wn_dim1, wn_offset, wn1_dim1, wn1_offset, ws_dim1, ws_offset,
+    wy_dim1, wy_offset, sy_dim1, sy_offset, i__1, i__2, i__3;
+  static long i__, k, k1, m2, is, js, iy, jy, is1, js1, col2, dend, pend;
+  static long upcl;
+  static double temp1, temp2, temp3, temp4;
+  static long ipntr, jpntr, dbegin, pbegin;
+  --indx2;
+  --ind;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  wn1_dim1 = 2 * *m;
+  wn1_offset = 1 + wn1_dim1;
+  wn1 -= wn1_offset;
+  wn_dim1 = 2 * *m;
+  wn_offset = 1 + wn_dim1;
+  wn -= wn_offset;
+  /* Form the lower triangular part of */
+  /*           WN1 = [Y' ZZ'Y   L_a'+R_z'] */
+  /*                 [L_a+R_z   S'AA'S   ] */
+  /*    where L_a is the strictly lower triangular part of S'AA'Y */
+  /*          R_z is the upper triangular part of S'ZZ'Y. */
+  if (*updatd) {
+    if (*iupdat > *m) {
+      /* shift old part of WN1. */
+      i__1 = *m - 1;
+      for (jy = 1; jy <= i__1; ++jy) {
+        js = *m + jy;
+        i__2 = *m - jy;
+        dcopy_(&i__2, &wn1[jy + 1 + (jy + 1) * wn1_dim1], &c__1, &wn1[jy + jy * wn1_dim1], &c__1);
+        i__2 = *m - jy;
+        dcopy_(&i__2, &wn1[js + 1 + (js + 1) * wn1_dim1], &c__1, &wn1[js + js * wn1_dim1], &c__1);
+        i__2 = *m - 1;
+        dcopy_(&i__2, &wn1[*m + 2 + (jy + 1) * wn1_dim1], &c__1, &wn1[*m + 1 + jy * wn1_dim1], &c__1);
+      }
+    }
+    /* put new rows in blocks (1,1), (2,1) and (2,2). */
+    pbegin = 1;
+    pend = *nsub;
+    dbegin = *nsub + 1;
+    dend = *n;
+    iy = *col;
+    is = *m + *col;
+    ipntr = *head + *col - 1;
+    if (ipntr > *m) {
+      ipntr -= *m;
+    }
+    jpntr = *head;
+    i__1 = *col;
+    for (jy = 1; jy <= i__1; ++jy) {
+      js = *m + jy;
+      temp1 = 0.;
+      temp2 = 0.;
+      temp3 = 0.;
+      /* compute element jy of row 'col' of Y'ZZ'Y */
+      i__2 = pend;
+      for (k = pbegin; k <= i__2; ++k) {
+        k1 = ind[k];
+        temp1 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      /* compute elements jy of row 'col' of L_a and S'AA'S */
+      i__2 = dend;
+      for (k = dbegin; k <= i__2; ++k) {
+        k1 = ind[k];
+        temp2 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
+        temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      wn1[iy + jy * wn1_dim1] = temp1;
+      wn1[is + js * wn1_dim1] = temp2;
+      wn1[is + jy * wn1_dim1] = temp3;
+      jpntr = jpntr % *m + 1;
+    }
+    /* put new column in block (2,1). */
+    jy = *col;
+    jpntr = *head + *col - 1;
+    if (jpntr > *m) {
+      jpntr -= *m;
+    }
+    ipntr = *head;
+    i__1 = *col;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      is = *m + i__;
+      temp3 = 0.;
+      /* compute element i of column 'col' of R_z */
+      i__2 = pend;
+      for (k = pbegin; k <= i__2; ++k) {
+        k1 = ind[k];
+        temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      ipntr = ipntr % *m + 1;
+      wn1[is + jy * wn1_dim1] = temp3;
+    }
+    upcl = *col - 1;
+  } else {
+    upcl = *col;
+  }
+  /* modify the old parts in blocks (1,1) and (2,2) due to changes */
+  /* in the set of free variables. */
+  ipntr = *head;
+  i__1 = upcl;
+  for (iy = 1; iy <= i__1; ++iy) {
+    is = *m + iy;
+    jpntr = *head;
+    i__2 = iy;
+    for (jy = 1; jy <= i__2; ++jy) {
+      js = *m + jy;
+      temp1 = 0.;
+      temp2 = 0.;
+      temp3 = 0.;
+      temp4 = 0.;
+      i__3 = *nenter;
+      for (k = 1; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp1 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
+        temp2 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
+      }
+      i__3 = *n;
+      for (k = *ileave; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp3 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
+        temp4 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
+      }
+      wn1[iy + jy * wn1_dim1] = wn1[iy + jy * wn1_dim1] + temp1 - temp3;
+      wn1[is + js * wn1_dim1] = wn1[is + js * wn1_dim1] - temp2 + temp4;
+      jpntr = jpntr % *m + 1;
+    }
+    ipntr = ipntr % *m + 1;
+  }
+  /* modify the old parts in block (2,1). */
+  ipntr = *head;
+  i__1 = *m + upcl;
+  for (is = *m + 1; is <= i__1; ++is) {
+    jpntr = *head;
+    i__2 = upcl;
+    for (jy = 1; jy <= i__2; ++jy) {
+      temp1 = 0.;
+      temp3 = 0.;
+      i__3 = *nenter;
+      for (k = 1; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp1 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      i__3 = *n;
+      for (k = *ileave; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      if (is <= jy + *m) {
+        wn1[is + jy * wn1_dim1] = wn1[is + jy * wn1_dim1] + temp1 - temp3;
+      } else {
+        wn1[is + jy * wn1_dim1] = wn1[is + jy * wn1_dim1] - temp1 + temp3;
+      }
+      jpntr = jpntr % *m + 1;
+    }
+    ipntr = ipntr % *m + 1;
+  }
+  /* Form the upper triangle of WN = [D+Y' ZZ'Y/theta   -L_a'+R_z' ] */
+  /*                                 [-L_a +R_z        S'AA'S*theta] */
+  m2 = *m << 1;
+  i__1 = *col;
+  for (iy = 1; iy <= i__1; ++iy) {
+    is = *col + iy;
+    is1 = *m + iy;
+    i__2 = iy;
+    for (jy = 1; jy <= i__2; ++jy) {
+      js = *col + jy;
+      js1 = *m + jy;
+      wn[jy + iy * wn_dim1] = wn1[iy + jy * wn1_dim1] / *theta;
+      wn[js + is * wn_dim1] = wn1[is1 + js1 * wn1_dim1] * *theta;
+    }
+    i__2 = iy - 1;
+    for (jy = 1; jy <= i__2; ++jy) {
+      wn[jy + is * wn_dim1] = -wn1[is1 + jy * wn1_dim1];
+    }
+    i__2 = *col;
+    for (jy = iy; jy <= i__2; ++jy) {
+      wn[jy + is * wn_dim1] = wn1[is1 + jy * wn1_dim1];
+    }
+    wn[iy + iy * wn_dim1] += sy[iy + iy * sy_dim1];
+  }
+  /* Form the upper triangle of WN= [  LL'            L^-1(-L_a'+R_z')] */
+  /*                                [(-L_a +R_z)L'^-1   S'AA'S*theta  ] */
+  /*    first Cholesky factor (1,1) block of wn to get LL' */
+  /*                      with L' stored in the upper triangle of wn. */
+  dpofa_(&wn[wn_offset], &m2, col, info);
+  if (*info != 0) {
+    *info = -1;
+    return 0;
+  }
+  /* then form L^-1(-L_a'+R_z') in the (1,2) block. */
+  col2 = *col << 1;
+  i__1 = col2;
+  for (js = *col + 1; js <= i__1; ++js) {
+    dtrsl_(&wn[wn_offset], &m2, col, &wn[js * wn_dim1 + 1], &c__11, info);
+  }
+  /* Form S'AA'S*theta + (L^-1(-L_a'+R_z'))'L^-1(-L_a'+R_z') in the */
+  /*    upper triangle of (2,2) block of wn. */
+  i__1 = col2;
+  for (is = *col + 1; is <= i__1; ++is) {
+    i__2 = col2;
+    for (js = is; js <= i__2; ++js) {
+      wn[is + js * wn_dim1] += ddot_(col, &wn[is * wn_dim1 + 1], &c__1, &wn[js * wn_dim1 + 1], &c__1);
+    }
+  }
+  /* Cholesky factorization of (2,2) block of wn. */
+  dpofa_(&wn[*col + 1 + (*col + 1) * wn_dim1], &m2, col, info);
+  if (*info != 0) {
+    *info = -2;
+    return 0;
+  }
+  return 0;
+}
+/**
+ * Subroutine formt
+ *
+ *       This subroutine forms the upper half of the pos. def. and symm.
+ *         T = theta*SS + L*D^(-1)*L', stores T in the upper triangle
+ *         of the array wt, and performs the Cholesky factorization of T
+ *         to produce J*J', with J' stored in the upper triangle of wt.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dpofa.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int formt_(long *m, double *wt, double *sy,
+  double *ss, long *col, double *theta, long *info)
+{
+  long wt_dim1, wt_offset, sy_dim1, sy_offset, ss_dim1, ss_offset, i__1, i__2, i__3;
+  static long i__, j, k, k1;
+  static double ddum;
+  ss_dim1 = *m;
+  ss_offset = 1 + ss_dim1;
+  ss -= ss_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  /* Form the upper half of  T = theta*SS + L*D^(-1)*L', */
+  /*    store T in the upper triangle of the array wt. */
+  i__1 = *col;
+  for (j = 1; j <= i__1; ++j) {
+    wt[j * wt_dim1 + 1] = *theta * ss[j * ss_dim1 + 1];
+  }
+  i__1 = *col;
+  for (i__ = 2; i__ <= i__1; ++i__) {
+    i__2 = *col;
+    for (j = i__; j <= i__2; ++j) {
+      k1 = (i__ <= j ? i__ : j) - 1;
+      ddum = 0.;
+      i__3 = k1;
+      for (k = 1; k <= i__3; ++k) {
+        ddum += sy[i__ + k * sy_dim1] * sy[j + k * sy_dim1] / sy[k + k * sy_dim1];
+      }
+      wt[i__ + j * wt_dim1] = ddum + *theta * ss[i__ + j * ss_dim1];
+    }
+  }
+  /* Cholesky factorize T to J*J' with */
+  /*    J' stored in the upper triangle of wt. */
+  dpofa_(&wt[wt_offset], m, col, info);
+  if (*info != 0) {
+    *info = -3;
+  }
+  return 0;
+}
+/**
+ * Subroutine freev
+ *
+ *     This subroutine counts the entering and leaving variables when
+ *       iter > 0, and finds the index set of free and active variables
+ *       at the GCP.
+ *
+ *     cnstnd is a logical variable indicating whether bounds are present
+ *
+ *     index is an long array of dimension n
+ *       for i=1,...,nfree, index(i) are the indices of free variables
+ *       for i=nfree+1,...,n, index(i) are the indices of bound variables
+ *       On entry after the first iteration, index gives
+ *         the free variables at the previous iteration.
+ *       On exit it gives the free variables based on the determination
+ *         in cauchy using the array iwhere.
+ *
+ *     indx2 is an long array of dimension n
+ *       On entry indx2 is unspecified.
+ *       On exit with iter>0, indx2 indicates which variables
+ *          have changed status since the previous iteration.
+ *       For i= 1,...,nenter, indx2(i) have changed from bound to free.
+ *       For i= ileave+1,...,n, indx2(i) have changed from free to bound.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int freev_(long *n, long *nfree, long *index,
+  long *nenter, long *ileave, long *indx2, long *iwhere,
+  long *wrk, long *updatd, long *cnstnd, long *iprint,
+  long *iter)
+{
+  long i__1;
+  static long i__, k, iact;
+  --iwhere;
+  --indx2;
+  --index;
+  *nenter = 0;
+  *ileave = *n + 1;
+  if (*iter > 0 && *cnstnd) {
+    /* count the entering and leaving variables. */
+    i__1 = *nfree;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      k = index[i__];
+      /* write(6,*) ' k  = index(i) ', k */
+      /* write(6,*) ' index = ', i */
+      if (iwhere[k] > 0) {
+        --(*ileave);
+        indx2[*ileave] = k;
+        if (*iprint >= 100) {
+          fprintf(stdout, " Variable %2ld leaves the set of free variables\n", k);
+        }
+      }
+    }
+    i__1 = *n;
+    for (i__ = *nfree + 1; i__ <= i__1; ++i__) {
+      k = index[i__];
+      if (iwhere[k] <= 0) {
+        ++(*nenter);
+        indx2[*nenter] = k;
+        if (*iprint >= 100) {
+          fprintf(stdout, " Variable %2ld enters the set of free variables\n", k);
+        }
+      }
+    }
+    if (*iprint >= 99) {
+      i__1 = *n + 1 - *ileave;
+      fprintf(stdout,  " %2ld variables leave; %2ld variables enter\n", i__1, *nenter);
+    }
+  }
+  *wrk = *ileave < *n + 1 || *nenter > 0 || *updatd;
+  /* Find the index set of free and active variables at the GCP. */
+  *nfree = 0;
+  iact = *n + 1;
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (iwhere[i__] <= 0) {
+      ++(*nfree);
+      index[*nfree] = i__;
+    } else {
+      --iact;
+      index[iact] = i__;
+    }
+  }
+  if (*iprint >= 99) {
+    i__1 = *iter + 1;
+    fprintf(stdout, " %2ld variables are free at GCP %3ld\n", *nfree, i__1);
+  }
+  return 0;
+}
+/**
+ * Subroutine hpsolb
+ *
+ *     This subroutine sorts out the least element of t, and puts the
+ *       remaining elements of t in a heap.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the arrays t and iorder.
+ *       On exit n is unchanged.
+ *
+ *     t is a double precision array of dimension n.
+ *       On entry t stores the elements to be sorted,
+ *       On exit t(n) stores the least elements of t, and t(1) to t(n-1)
+ *         stores the remaining elements in the form of a heap.
+ *
+ *     iorder is an long array of dimension n.
+ *       On entry iorder(i) is the index of t(i).
+ *       On exit iorder(i) is still the index of t(i), but iorder may be
+ *         permuted in accordance with t.
+ *
+ *     iheap is an long variable specifying the task.
+ *       On entry iheap should be set as follows:
+ *         iheap .eq. 0 if t(1) to t(n) is not in the form of a heap,
+ *         iheap .ne. 0 if otherwise.
+ *       On exit iheap is unchanged.
+ *
+ *
+ *     References:
+ *       Algorithm 232 of CACM (J. W. J. Williams): HEAPSORT.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int hpsolb_(long *n, double *t, long *iorder, long *iheap)
+{
+  long i__1;
+  static long i__, j, k;
+  static double out, ddum;
+  static long indxin, indxou;
+  --iorder;
+  --t;
+  if (*iheap == 0) {
+    /* Rearrange the elements t(1) to t(n) to form a heap. */
+    i__1 = *n;
+    for (k = 2; k <= i__1; ++k) {
+      ddum = t[k];
+      indxin = iorder[k];
+      /* Add ddum to the heap. */
+      i__ = k;
+L10:
+      if (i__ > 1) {
+        j = i__ / 2;
+        if (ddum < t[j]) {
+          t[i__] = t[j];
+          iorder[i__] = iorder[j];
+          i__ = j;
+          goto L10;
+        }
+      }
+      t[i__] = ddum;
+      iorder[i__] = indxin;
+    }
+  }
+  /* Assign to 'out' the value of t(1), the least member of the heap, */
+  /* and rearrange the remaining members to form a heap as */
+  /* elements 1 to n-1 of t. */
+  if (*n > 1) {
+    i__ = 1;
+    out = t[1];
+    indxou = iorder[1];
+    ddum = t[*n];
+    indxin = iorder[*n];
+    /* Restore the heap */
+L30:
+    j = i__ + i__;
+    if (j <= *n - 1) {
+      if (t[j + 1] < t[j]) {
+        ++j;
+      }
+      if (t[j] < ddum) {
+        t[i__] = t[j];
+        iorder[i__] = iorder[j];
+        i__ = j;
+        goto L30;
+      }
+    }
+    t[i__] = ddum;
+    iorder[i__] = indxin;
+    /* Put the least member in t(n). */
+    t[*n] = out;
+    iorder[*n] = indxou;
+  }
+  return 0;
+}
+/**
+ * Subroutine lnsrlb
+ *
+ *     This subroutine calls subroutine dcsrch from the Minpack2 library
+ *       to perform the line search.  Subroutine dscrch is safeguarded so
+ *       that all trial points lie within the feasible region.
+ *
+ *     Subprograms called:
+ *
+ *       Minpack2 Library ... dcsrch.
+ *
+ *       Linpack ... dtrsl, ddot.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int lnsrlb_(long *n, double *l, double *u,
+  long *nbd, double *x, double *f, double *fold,
+  double *gd, double *gdold, double *g, double *d__,
+  double *r__, double *t, double *z__, double *stp,
+  double *dnorm, double *dtd, double *xstep, double *stpmx,
+  long *iter, long *ifun, long *iback, long *nfgv,
+  long *info, char *task, long *boxed, long *cnstnd,
+  char *csave, long *isave, double *dsave)
+{
+  long i__1;
+  double d__1;
+  static long i__;
+  static double a1, a2;
+  --z__;
+  --t;
+  --r__;
+  --d__;
+  --g;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  --isave;
+  --dsave;
+  if (strncmp(task, "FG_LN", 5) == 0) {
+    goto L556;
+  }
+  *dtd = ddot_(n, &d__[1], &c__1, &d__[1], &c__1);
+  *dnorm = sqrt(*dtd);
+  /* Determine the maximum step length. */
+  *stpmx = 1e10;
+  if (*cnstnd) {
+    if (*iter == 0) {
+      *stpmx = 1.;
+    } else {
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        a1 = d__[i__];
+        if (nbd[i__] != 0) {
+          if (a1 < 0. && nbd[i__] <= 2) {
+            a2 = l[i__] - x[i__];
+            if (a2 >= 0.) {
+              *stpmx = 0.;
+            } else if (a1 * *stpmx < a2) {
+              *stpmx = a2 / a1;
+            }
+          } else if (a1 > 0. && nbd[i__] >= 2) {
+            a2 = u[i__] - x[i__];
+            if (a2 <= 0.) {
+              *stpmx = 0.;
+            } else if (a1 * *stpmx > a2) {
+              *stpmx = a2 / a1;
+            }
+          }
+        }
+      }
+    }
+  }
+  if (*iter == 0 && ! (*boxed)) {
+    d__1 = 1. / *dnorm;
+    *stp = d__1 <= *stpmx ? d__1 : *stpmx;
+  } else {
+    *stp = 1.;
+  }
+  dcopy_(n, &x[1], &c__1, &t[1], &c__1);
+  dcopy_(n, &g[1], &c__1, &r__[1], &c__1);
+  *fold = *f;
+  *ifun = 0;
+  *iback = 0;
+  strcpy(csave, "START");
+L556:
+  *gd = ddot_(n, &g[1], &c__1, &d__[1], &c__1);
+  if (*ifun == 0) {
+    *gdold = *gd;
+    if (*gd >= 0.) {
+      /* the directional derivative >=0. */
+      /* Line search is impossible. */
+      fprintf(stdout, "  ascent direction in projection gd =  %.8E\n", *gd);
+      *info = -4;
+      return 0;
+    }
+  }
+  dcsrch_(f, gd, stp, &c_b280, &c_b281, &c_b282, &c_b9, stpmx, csave, &isave[1], &dsave[1]);
+  *xstep = *stp * *dnorm;
+  if (strncmp(csave, "CONV", 4) != 0 && strncmp(csave, "WARN", 4) != 0) {
+    strcpy(task, "FG_LNSRCH");
+    ++(*ifun);
+    ++(*nfgv);
+    *iback = *ifun - 1;
+    if (*stp == 1.) {
+      dcopy_(n, &z__[1], &c__1, &x[1], &c__1);
+    } else {
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        x[i__] = *stp * d__[i__] + t[i__];
+      }
+    }
+  } else {
+    strcpy(task, "NEW_X");
+  }
+  return 0;
+}
+/**
+ * Subroutine matupd
+ *
+ *       This subroutine updates matrices WS and WY, and forms the
+ *         middle matrix in B.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dcopy, ddot.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int matupd_(long *n, long *m, double *ws,
+  double *wy, double *sy, double *ss, double *d__,
+  double *r__, long *itail, long *iupdat, long *col,
+  long *head, double *theta, double *rr, double *dr,
+  double *stp, double *dtd)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset, ss_dim1, ss_offset, i__1, i__2;
+  static long j;
+  static long pointr;
+  --r__;
+  --d__;
+  ss_dim1 = *m;
+  ss_offset = 1 + ss_dim1;
+  ss -= ss_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  /* Set pointers for matrices WS and WY. */
+  if (*iupdat <= *m) {
+    *col = *iupdat;
+    *itail = (*head + *iupdat - 2) % *m + 1;
+  } else {
+    *itail = *itail % *m + 1;
+    *head = *head % *m + 1;
+  }
+  /* Update matrices WS and WY. */
+  dcopy_(n, &d__[1], &c__1, &ws[*itail * ws_dim1 + 1], &c__1);
+  dcopy_(n, &r__[1], &c__1, &wy[*itail * wy_dim1 + 1], &c__1);
+  /* Set theta=yy/ys. */
+  *theta = *rr / *dr;
+  /* Form the middle matrix in B. */
+  /* update the upper triangle of SS, */
+  /* and the lower triangle of SY: */
+  if (*iupdat > *m) {
+    /* move old information */
+    i__1 = *col - 1;
+    for (j = 1; j <= i__1; ++j) {
+      dcopy_(&j, &ss[(j + 1) * ss_dim1 + 2], &c__1, &ss[j * ss_dim1 + 1], &c__1);
+      i__2 = *col - j;
+      dcopy_(&i__2, &sy[j + 1 + (j + 1) * sy_dim1], &c__1, &sy[j + j * sy_dim1], &c__1);
+    }
+  }
+  /* add new information: the last row of SY */
+  /* and the last column of SS: */
+  pointr = *head;
+  i__1 = *col - 1;
+  for (j = 1; j <= i__1; ++j) {
+    sy[*col + j * sy_dim1] = ddot_(n, &d__[1], &c__1, &wy[pointr * wy_dim1 + 1], &c__1);
+    ss[j + *col * ss_dim1] = ddot_(n, &ws[pointr * ws_dim1 + 1], &c__1, &d__[1], &c__1);
+    pointr = pointr % *m + 1;
+  }
+  if (*stp == 1.) {
+    ss[*col + *col * ss_dim1] = *dtd;
+  } else {
+    ss[*col + *col * ss_dim1] = *stp * *stp * *dtd;
+  }
+  sy[*col + *col * sy_dim1] = *dr;
+  return 0;
+}
+/**
+ * Subroutine prn1lb
+ *
+ *     This subroutine prints the input data, initial point, upper and
+ *       lower bounds of each variable, machine precision, as well as
+ *       the headings of the output.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int prn1lb_(long *n, long *m, double *l,
+  double *u, double *x, long *iprint, long *itfile,
+  double *epsmch)
+{
+  long i__1;
+  FILE *itfptr;
+  static long i__;
+  --x;
+  --u;
+  --l;
+  if (*iprint >= 0) {
+    fprintf(stdout, "RUNNING THE L-BFGS-B CODE\n\n");
+    fprintf(stdout, "           * * *\n\n");
+    fprintf(stdout, "Machine precision = %.3E\n", *epsmch);
+    fprintf(stdout, " N = %3ld    M = %2ld\n", *n, *m);
+    if (*iprint >= 1) {
+      itfptr = fopen("iterate.dat", "w");
+      fprintf(itfptr, "RUNNING THE L-BFGS-B CODE\n");
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "it    = iteration number\n");
+      fprintf(itfptr, "nf    = number of function evaluations\n");
+      fprintf(itfptr, "nseg  = number of segments explored during the Cauchy search\n");
+      fprintf(itfptr, "nact  = number of active bounds at the generalized Cauchy point\n");
+      fprintf(itfptr, "sub   = manner in which the subspace minimization terminated:\n");
+      fprintf(itfptr, "        con = converged, bnd = a bound was reached\n");
+      fprintf(itfptr, "itls  = number of iterations performed in the line search\n");
+      fprintf(itfptr, "stepl = step length used\n");
+      fprintf(itfptr, "tstep = norm of the displacement (total step)\n");
+      fprintf(itfptr, "projg = norm of the projected gradient\n");
+      fprintf(itfptr, "f     = function value\n");
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "           * * *\n\n");
+      fprintf(itfptr, "Machine precision = %.3E\n", *epsmch);
+      fprintf(itfptr, " N = %3ld    M = %2ld\n", *n, *m);
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "   it   nf  nseg  nact  sub  itls  stepl    tstep     projg        f\n");
+      fclose(itfptr);
+      if (*iprint > 100) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " L = ");
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          fprintf(stdout, " %11.4E", l[i__]);
+          if (i__ % 6 == 0) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "     ");
+          }
+        }
+        fprintf(stdout, "\n");
+        fprintf(stdout, "\n");
+        fprintf(stdout, " X0 =");
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          fprintf(stdout, " %11.4E", x[i__]);
+          if (i__ % 6 == 0) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "     ");
+          }
+        }
+        fprintf(stdout, "\n");
+        fprintf(stdout, "\n");
+        fprintf(stdout, " U = ");
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          fprintf(stdout, " %11.4E", u[i__]);
+          if (i__ % 6 == 0) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "     ");
+          }
+        }
+        fprintf(stdout, "\n");
+      }
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine prn2lb
+ *
+ *     This subroutine prints out new information after a successful
+ *       line search.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int prn2lb_(long *n, double *x, double *f,
+  double *g, long *iprint, long *itfile, long *iter,
+  long *nfgv, long *nact, double *sbgnrm, long *nseg, char*word,
+  long *iword, long *iback, double *stp, double *xstep)
+{
+  long i__1;
+  static long i__, imod;
+  FILE *itfptr;
+  --g;
+  --x;
+  /* 'word' records the status of subspace solutions. */
+  if (*iword == 0) {
+    /* the subspace minimization converged. */
+    strcpy(word, "con");
+  } else if (*iword == 1) {
+    /* the subspace minimization stopped at a bound. */
+    strcpy(word, "bnd");
+  } else if (*iword == 5) {
+    /* the truncated Newton step has been used. */
+    strcpy(word, "TNT");
+  } else {
+    strcpy(word, "---");
+  }
+  if (*iprint >= 99) {
+    fprintf(stdout, "LINE SEARCH %ld times; norm of step = %E\n", *iback, *xstep);
+    fprintf(stdout, "\nAt iterate%5ld    f= %12.5E    |proj g|= %12.5E\n", *iter, *f, *sbgnrm);
+    if (*iprint > 100) {
+      fprintf(stdout, "X =");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, "%11.4E ", x[i__]);
+      }
+      fprintf(stdout, "\n");
+      fprintf(stdout, "G =");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, "%11.4E ", g[i__]);
+      }
+      fprintf(stdout, "\n");
+    }
+  } else if (*iprint > 0) {
+    imod = *iter % *iprint;
+    if (imod == 0) {
+      fprintf(stdout, "\nAt iterate%5ld    f= %12.5E    |proj g|= %12.5E\n", *iter, *f, *sbgnrm);
+    }
+  }
+  if (*iprint >= 1) {
+    itfptr = fopen("iterate.dat", "a");
+    fprintf(itfptr, " %4ld %4ld %5ld %5ld  %3s %4ld  %7.1E  %7.1E %10.3E %10.3E\n",
+        *iter, *nfgv, *nseg, *nact, word, *iback, *stp, *xstep, *sbgnrm, *f);
+    fclose(itfptr);
+  }
+  return 0;
+}
+/**
+ * Subroutine prn3lb
+ *
+ *     This subroutine prints out information when either a built-in
+ *       convergence test is satisfied or when an error message is
+ *       generated.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int prn3lb_(long *n, double *x, double *f, char *task,
+  long *iprint, long *info, long *itfile, long *iter,
+  long *nfgv, long *nintol, long *nskip, long *nact,
+  double *sbgnrm, double *time, long *nseg, char *word,
+  long *iback, double *stp, double *xstep, long *k,
+  double *cachyt, double *sbtime, double *lnscht)
+{
+  long i__1;
+  FILE *itfptr;
+  static long i__;
+  --x;
+  if (strncmp(task, "ERROR", 5) == 0) {
+    goto L999;
+  }
+  if (*iprint >= 0) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "           * * *\n");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "Tit   = total number of iterations\n");
+    fprintf(stdout, "Tnf   = total number of function evaluations\n");
+    fprintf(stdout, "Tnint = total number of segments explored during Cauchy searches\n");
+    fprintf(stdout, "Skip  = number of BFGS updates skipped\n");
+    fprintf(stdout, "Nact  = number of active bounds at final generalized Cauchy point\n");
+    fprintf(stdout, "Projg = norm of the final projected gradient\n");
+    fprintf(stdout, "F     = final function value\n");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "           * * *\n");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F\n");
+    fprintf(stdout, "%5ld %6ld %6ld %6ld %5ld %5ld  %10.3E  %10.3E\n", *n, *iter, *nfgv, *nintol, *nskip, *nact, *sbgnrm, *f);
+    if (*iprint >= 100) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " X =");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, " %11.4E", x[i__]);
+        if (i__ % 6 == 0) {
+          fprintf(stdout, "\n");
+          fprintf(stdout, "    ");
+        }
+      }
+      fprintf(stdout, "\n");
+    }
+    if (*iprint >= 1) {
+      fprintf(stdout, "  F =  %3.8E\n", *f);
+    }
+  }
+L999:
+  if (*iprint >= 0) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "%s\n", task);
+    if (*info != 0) {
+      if (*info == -1) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Matrix in 1st Cholesky factorization in formk is not Pos. Def.\n");
+      }
+      if (*info == -2) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Matrix in 2st Cholesky factorization in formk is not Pos. Def.\n");
+      }
+      if (*info == -3) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Matrix in the Cholesky factorization in formt is not Pos. Def.\n");
+      }
+      if (*info == -4) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Derivative >= 0, backtracking line search impossible.\n");
+        fprintf(stdout, "   Previous x, f and g restored.\n");
+        fprintf(stdout, " Possible causes: 1 error in function or gradient evaluation;\n");
+        fprintf(stdout, "                  2 rounding errors dominate computation.\n");
+      }
+      if (*info == -5) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Warning:  more than 10 function and gradient\n");
+        fprintf(stdout, "   evaluations in the last line search.  Termination\n");
+        fprintf(stdout, "   may possibly be caused by a bad search direction.\n");
+      }
+      if (*info == -6) {
+        fprintf(stdout, "  Input nbd(%2ld) is invalid.\n", *k);
+      }
+      if (*info == -7) {
+        fprintf(stdout, "  l(%2ld) > u(%2ld).  No feasible solution.\n", *k, *k);
+      }
+      if (*info == -8) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " The triangular system is singular.\n");
+      }
+      if (*info == -9) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Line search cannot locate an adequate point after 20 function\n");
+        fprintf(stdout, "  and gradient evaluations.  Previous x, f and g restored.\n");
+        fprintf(stdout, " Possible causes: 1 error in function or gradient evaluation;\n");
+        fprintf(stdout, "                  2 rounding error dominate computation.\n");
+      }
+    }
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Cauchy                time %1.3E seconds.\n", *cachyt);
+      fprintf(stdout, " Subspace minimization time %1.3E seconds.\n", *sbtime);
+      fprintf(stdout, " Line search           time %1.3E seconds.\n", *lnscht);
+    }
+    fprintf(stdout, "\n");
+    fprintf(stdout, " Total User time %1.3E seconds.\n", *time);
+    fprintf(stdout, "\n");
+    if (*iprint >= 1) {
+      itfptr = fopen("iterate.dat", "a");
+      if (*info == -4 || *info == -9) {
+        fprintf(itfptr, " %4ld %4ld %5ld %5ld  %3s %4ld  %7.1E  %7.1E      -          -\n",
+            *iter, *nfgv, *nseg, *nact, word, *iback, *stp, *xstep);
+      }
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "%s\n", task);
+      if (*info != 0) {
+        if (*info == -1) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Matrix in 1st Cholesky factorization in formk is not Pos. Def.\n");
+        }
+        if (*info == -2) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Matrix in 2st Cholesky factorization in formk is not Pos. Def.\n");
+        }
+        if (*info == -3) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Matrix in the Cholesky factorization in formt is not Pos. Def.\n");
+        }
+        if (*info == -4) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Derivative >= 0, backtracking line search impossible.\n");
+          fprintf(itfptr, "   Previous x, f and g restored.\n");
+          fprintf(itfptr, " Possible causes: 1 error in function or gradient evaluation;\n");
+          fprintf(itfptr, "                  2 rounding errors dominate computation.\n");
+        }
+        if (*info == -5) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Warning:  more than 10 function and gradient\n");
+          fprintf(itfptr, "   evaluations in the last line search.  Termination\n");
+          fprintf(itfptr, "   may possibly be caused by a bad search direction.\n");
+        }
+        if (*info == -8) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " The triangular system is singular.\n");
+        }
+        if (*info == -9) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Line search cannot locate an adequate point after 20 function\n");
+          fprintf(itfptr, "  and gradient evaluations.  Previous x, f and g restored.\n");
+          fprintf(itfptr, " Possible causes: 1 error in function or gradient evaluation;\n");
+          fprintf(itfptr, "                  2 rounding error dominate computation.\n");
+        }
+      }
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, " Total User time %1.3E seconds.\n", *time);
+      fprintf(itfptr, "\n");
+      fclose(itfptr);
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine projgr
+ *
+ *     This subroutine computes the infinity norm of the projected
+ *       gradient.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int projgr_(long *n, double *l, double *u,
+  long *nbd, double *x, double *g, double *sbgnrm)
+{
+  long i__1;
+  double d__1, d__2;
+  static long i__;
+  static double gi;
+  --g;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  *sbgnrm = 0.;
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    gi = g[i__];
+    if (nbd[i__] != 0) {
+      if (gi < 0.) {
+        if (nbd[i__] >= 2) {
+          d__1 = x[i__] - u[i__];
+          gi = d__1 >= gi ? d__1 : gi;
+        }
+      } else {
+        if (nbd[i__] <= 2) {
+          d__1 = x[i__] - l[i__];
+          gi = d__1 <= gi ? d__1 : gi;
+        }
+      }
+    }
+    d__1 = *sbgnrm, d__2 = fabs(gi);
+    *sbgnrm = d__1 >= d__2 ? d__1 : d__2;
+  }
+  return 0;
+}
+/* **********************************************************************
+ *
+ * This routine contains the major changes in the updated version.
+ * The changes are described in the accompanying paper
+ *
+ *  Jose Luis Morales, Jorge Nocedal
+ *  "Remark On Algorithm 788: L-BFGS-B: Fortran Subroutines for Large-Scale
+ *   Bound Constrained Optimization". Decemmber 27, 2010.
+ *
+ *         J.L. Morales  Departamento de Matematicas,
+ *                       Instituto Tecnologico Autonomo de Mexico
+ *                       Mexico D.F.
+ *
+ *         J, Nocedal    Department of Electrical Engineering and
+ *                       Computer Science.
+ *                       Northwestern University. Evanston, IL. USA
+ *
+ *                       January 17, 2011
+ *
+ * ********************************************************************** */
+/**
+ * Subroutine subsm
+ *
+ *     Given xcp, l, u, r, an index set that specifies
+ *       the active set at xcp, and an l-BFGS matrix B
+ *       (in terms of WY, WS, SY, WT, head, col, and theta),
+ *       this subroutine computes an approximate solution
+ *       of the subspace problem
+ *
+ *       (P)   min Q(x) = r'(x-xcp) + 1/2 (x-xcp)' B (x-xcp)
+ *
+ *             subject to l<=x<=u
+ *                       x_i=xcp_i for all i in A(xcp)
+ *
+ *       along the subspace unconstrained Newton direction
+ *
+ *          d = -(Z'BZ)^(-1) r.
+ *
+ *       The formula for the Newton direction, given the L-BFGS matrix
+ *       and the Sherman-Morrison formula, is
+ *
+ *          d = (1/theta)r + (1/theta*2) Z'WK^(-1)W'Z r.
+ *
+ *       where
+ *                 K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *
+ *     Note that this procedure for computing d differs
+ *     from that described in [1]. One can show that the matrix K is
+ *     equal to the matrix M^[-1]N in that paper.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     nsub is an long variable.
+ *       On entry nsub is the number of free variables.
+ *       On exit nsub is unchanged.
+ *
+ *     ind is an long array of dimension nsub.
+ *       On entry ind specifies the coordinate indices of free variables.
+ *       On exit ind is unchanged.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound of x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound of x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is a long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds, and
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x specifies the Cauchy point xcp.
+ *       On exit x(i) is the minimizer of Q over the subspace of
+ *                                                        free variables.
+ *
+ *     d is a double precision array of dimension n.
+ *       On entry d is the reduced gradient of Q at xcp.
+ *       On exit d is the Newton direction of Q.
+ *
+ *    xp is a double precision array of dimension n.
+ *       used to safeguard the projected Newton direction
+ *
+ *    xx is a double precision array of dimension n
+ *       On entry it holds the current iterate
+ *       On output it is unchanged
+ *    gg is a double precision array of dimension n
+ *       On entry it holds the gradient at the current iterate
+ *       On output it is unchanged
+ *
+ *     ws and wy are double precision arrays;
+ *     theta is a double precision variable;
+ *     col is an long variable;
+ *     head is an long variable.
+ *       On entry they store the information defining the
+ *                                          limited memory BFGS matrix:
+ *         ws(n,m) stores S, a set of s-vectors;
+ *         wy(n,m) stores Y, a set of y-vectors;
+ *         theta is the scaling factor specifying B_0 = theta I;
+ *         col is the number of variable metric corrections stored;
+ *         head is the location of the 1st s- (or y-) vector in S (or Y).
+ *       On exit they are unchanged.
+ *
+ *     iword is an long variable.
+ *       On entry iword is unspecified.
+ *       On exit iword specifies the status of the subspace solution.
+ *         iword = 0 if the solution is in the box,
+ *                 1 if some bound is encountered.
+ *
+ *     wv is a double precision working array of dimension 2m.
+ *
+ *     wn is a double precision array of dimension 2m x 2m.
+ *       On entry the upper triangle of wn stores the LEL^T factorization
+ *         of the indefinite matrix
+ *
+ *              K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                  [L_a -R_z           theta*S'AA'S ]
+ *                                                    where E = [-I  0]
+ *                                                              [ 0  I]
+ *       On exit wn is unchanged.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     info is an long variable.
+ *       On entry info is unspecified.
+ *       On exit info = 0       for normal return,
+ *                    = nonzero for abnormal return
+ *                                  when the matrix K is ill-conditioned.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack dtrsl.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal
+ */
+int subsm_(long *n, long *m, long *nsub, long *ind,
+  double *l, double *u, long *nbd, double *x,
+  double *d__, double *xp, double *ws, double *wy,
+  double *theta, double *xx, double *gg, long *col,
+  long *head, long *iword, double *wv, double *wn,
+  long *iprint, long *info)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, wn_dim1, wn_offset, i__1, i__2;
+  double d__1, d__2;
+  static long i__, j, k, m2;
+  static double dk;
+  static long js, jy;
+  static double xk;
+  static long ibd, col2;
+  static double dd_p__, temp1, temp2, alpha;
+  static long pointr;
+  --gg;
+  --xx;
+  --xp;
+  --d__;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  wn_dim1 = 2 * *m;
+  wn_offset = 1 + wn_dim1;
+  wn -= wn_offset;
+  --wv;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  --ind;
+  if (*nsub <= 0) {
+    return 0;
+  }
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n----------------SUBSM entered-----------------\n\n");
+  }
+  /* Compute wv = W'Zd. */
+  pointr = *head;
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    temp1 = 0.;
+    temp2 = 0.;
+    i__2 = *nsub;
+    for (j = 1; j <= i__2; ++j) {
+      k = ind[j];
+      temp1 += wy[k + pointr * wy_dim1] * d__[j];
+      temp2 += ws[k + pointr * ws_dim1] * d__[j];
+    }
+    wv[i__] = temp1;
+    wv[*col + i__] = *theta * temp2;
+    pointr = pointr % *m + 1;
+  }
+  /* Compute wv:=K^(-1)wv. */
+  m2 = *m << 1;
+  col2 = *col << 1;
+  dtrsl_(&wn[wn_offset], &m2, &col2, &wv[1], &c__11, info);
+  if (*info != 0) {
+    return 0;
+  }
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    wv[i__] = -wv[i__];
+  }
+  dtrsl_(&wn[wn_offset], &m2, &col2, &wv[1], &c__1, info);
+  if (*info != 0) {
+    return 0;
+  }
+  /* Compute d = (1/theta)d + (1/theta**2)Z'W wv. */
+  pointr = *head;
+  i__1 = *col;
+  for (jy = 1; jy <= i__1; ++jy) {
+    js = *col + jy;
+    i__2 = *nsub;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+      k = ind[i__];
+      d__[i__] = d__[i__] + wy[k + pointr * wy_dim1] * wv[jy] / *theta
+        + ws[k + pointr * ws_dim1] * wv[js];
+    }
+    pointr = pointr % *m + 1;
+  }
+  d__1 = 1. / *theta;
+  dscal_(nsub, &d__1, &d__[1], &c__1);
+  /* ----------------------------------------------------------------- */
+  /* Let us try the projection, d is the Newton direction */
+  *iword = 0;
+  dcopy_(n, &x[1], &c__1, &xp[1], &c__1);
+  i__1 = *nsub;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    k = ind[i__];
+    dk = d__[i__];
+    xk = x[k];
+    if (nbd[k] != 0) {
+      if (nbd[k] == 1) {
+        /* lower bounds only */
+        d__1 = l[k], d__2 = xk + dk;
+        x[k] = d__1 >= d__2 ? d__1 : d__2;
+        if (x[k] == l[k]) {
+          *iword = 1;
+        }
+      } else {
+        if (nbd[k] == 2) {
+          /* upper and lower bounds */
+          d__1 = l[k], d__2 = xk + dk;
+          xk = d__1 >= d__2 ? d__1 : d__2;
+          d__1 = u[k];
+          x[k] = d__1 <= xk ? d__1 : xk;
+          if (x[k] == l[k] || x[k] == u[k]) {
+            *iword = 1;
+          }
+        } else {
+          if (nbd[k] == 3) {
+            /* upper bounds only */
+            d__1 = u[k], d__2 = xk + dk;
+            x[k] = d__1 <= d__2 ? d__1 : d__2;
+            if (x[k] == u[k]) {
+              *iword = 1;
+            }
+          }
+        }
+      }
+    } else {
+      /* free variables */
+      x[k] = xk + dk;
+    }
+  }
+  if (*iword == 0) {
+    goto L911;
+  }
+  /* check sign of the directional derivative */
+  dd_p__ = 0.;
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dd_p__ += (x[i__] - xx[i__]) * gg[i__];
+  }
+  if (dd_p__ > 0.) {
+    dcopy_(n, &xp[1], &c__1, &x[1], &c__1);
+    fprintf(stderr, "  Positive dir derivative in projection\n");
+    fprintf(stderr, "  Using the backtracking step\n");
+  } else {
+    goto L911;
+  }
+  /* ----------------------------------------------------------------- */
+  alpha = 1.;
+  temp1 = alpha;
+  ibd = 0;
+  i__1 = *nsub;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    k = ind[i__];
+    dk = d__[i__];
+    if (nbd[k] != 0) {
+      if (dk < 0. && nbd[k] <= 2) {
+        temp2 = l[k] - x[k];
+        if (temp2 >= 0.) {
+          temp1 = 0.;
+        } else if (dk * alpha < temp2) {
+          temp1 = temp2 / dk;
+        }
+      } else if (dk > 0. && nbd[k] >= 2) {
+        temp2 = u[k] - x[k];
+        if (temp2 <= 0.) {
+          temp1 = 0.;
+        } else if (dk * alpha > temp2) {
+          temp1 = temp2 / dk;
+        }
+      }
+      if (temp1 < alpha) {
+        alpha = temp1;
+        ibd = i__;
+      }
+    }
+  }
+  if (alpha < 1.) {
+    dk = d__[ibd];
+    k = ind[ibd];
+    if (dk > 0.) {
+      x[k] = u[k];
+      d__[ibd] = 0.;
+    } else if (dk < 0.) {
+      x[k] = l[k];
+      d__[ibd] = 0.;
+    }
+  }
+  i__1 = *nsub;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    k = ind[i__];
+    x[k] += alpha * d__[i__];
+  }
+  /* ccccc */
+L911:
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n----------------exit SUBSM --------------------\n\n");
+  }
+  return 0;
+}
+/**
+ * Subroutine dcsrch
+ *
+ *     This subroutine finds a step that satisfies a sufficient
+ *     decrease condition and a curvature condition.
+ *
+ *     Each call of the subroutine updates an interval with
+ *     endpoints stx and sty. The interval is initially chosen
+ *     so that it contains a minimizer of the modified function
+ *
+ *           psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
+ *
+ *     If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
+ *     interval is chosen so that it contains a minimizer of f.
+ *
+ *     The algorithm is designed to find a step that satisfies
+ *     the sufficient decrease condition
+ *
+ *           f(stp) <= f(0) + ftol*stp*f'(0),
+ *
+ *     and the curvature condition
+ *
+ *           abs(f'(stp)) <= gtol*abs(f'(0)).
+ *
+ *     If ftol is less than gtol and if, for example, the function
+ *     is bounded below, then there is always a step which satisfies
+ *     both conditions.
+ *
+ *     If no step can be found that satisfies both conditions, then
+ *     the algorithm stops with a warning. In this case stp only
+ *     satisfies the sufficient decrease condition.
+ *
+ *     A typical invocation of dcsrch has the following outline:
+ *
+ *     task = 'START'
+ *  10 continue
+ *        call dcsrch( ... )
+ *        if (task .eq. 'FG') then
+ *           Evaluate the function and the gradient at stp
+ *           goto 10
+ *           end if
+ *
+ *     NOTE: The user must no alter work arrays between calls.
+ *
+ *     The subroutine statement is
+ *
+ *        subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
+ *                          task,isave,dsave)
+ *     where
+ *
+ *       f is a double precision variable.
+ *         On initial entry f is the value of the function at 0.
+ *            On subsequent entries f is the value of the
+ *            function at stp.
+ *         On exit f is the value of the function at stp.
+ *
+ *       g is a double precision variable.
+ *         On initial entry g is the derivative of the function at 0.
+ *            On subsequent entries g is the derivative of the
+ *            function at stp.
+ *         On exit g is the derivative of the function at stp.
+ *
+ *       stp is a double precision variable.
+ *         On entry stp is the current estimate of a satisfactory
+ *            step. On initial entry, a positive initial estimate
+ *            must be provided.
+ *         On exit stp is the current estimate of a satisfactory step
+ *            if task = 'FG'. If task = 'CONV' then stp satisfies
+ *            the sufficient decrease and curvature condition.
+ *
+ *       ftol is a double precision variable.
+ *         On entry ftol specifies a nonnegative tolerance for the
+ *            sufficient decrease condition.
+ *         On exit ftol is unchanged.
+ *
+ *       gtol is a double precision variable.
+ *         On entry gtol specifies a nonnegative tolerance for the
+ *            curvature condition.
+ *         On exit gtol is unchanged.
+ *
+ *       xtol is a double precision variable.
+ *         On entry xtol specifies a nonnegative relative tolerance
+ *            for an acceptable step. The subroutine exits with a
+ *            warning if the relative difference between sty and stx
+ *            is less than xtol.
+ *         On exit xtol is unchanged.
+ *
+ *       stpmin is a double precision variable.
+ *         On entry stpmin is a nonnegative lower bound for the step.
+ *         On exit stpmin is unchanged.
+ *
+ *       stpmax is a double precision variable.
+ *         On entry stpmax is a nonnegative upper bound for the step.
+ *         On exit stpmax is unchanged.
+ *
+ *       task is a character variable of length at least 60.
+ *         On initial entry task must be set to 'START'.
+ *         On exit task indicates the required action:
+ *
+ *            If task(1:2) = 'FG' then evaluate the function and
+ *            derivative at stp and call dcsrch again.
+ *
+ *            If task(1:4) = 'CONV' then the search is successful.
+ *
+ *            If task(1:4) = 'WARN' then the subroutine is not able
+ *            to satisfy the convergence conditions. The exit value of
+ *            stp contains the best point found during the search.
+ *
+ *            If task(1:5) = 'ERROR' then there is an error in the
+ *            input arguments.
+ *
+ *         On exit with convergence, a warning or an error, the
+ *            variable task contains additional information.
+ *
+ *       isave is an long work array of dimension 2.
+ *
+ *       dsave is a double precision work array of dimension 13.
+ *
+ *     Subprograms called
+ *
+ *       MINPACK-2 ... dcstep
+ *
+ *     MINPACK-1 Project. June 1983.
+ *     Argonne National Laboratory.
+ *     Jorge J. More' and David J. Thuente.
+ *
+ *     MINPACK-2 Project. October 1993.
+ *     Argonne National Laboratory and University of Minnesota.
+ *     Brett M. Averick, Richard G. Carter, and Jorge J. More'.
+ */
+int dcsrch_(double *f, double *g, double *stp,
+  double *ftol, double *gtol, double *xtol,
+  double *stpmin, double *stpmax,
+  char *task, long *isave, double *dsave)
+{
+  double d__1;
+  static double fm, gm, fx, fy, gx, gy, fxm, fym, gxm, gym, stx, sty;
+  static long stage;
+  static double finit, ginit, width, ftest, gtest, stmin, stmax, width1;
+  static long brackt;
+  --dsave;
+  --isave;
+  if (strncmp(task, "START", 5) == 0) {
+    /* Check the input arguments for errors. */
+    if (*stp < *stpmin) {
+      strcpy(task, "ERROR: STP .LT. STPMIN");
+    }
+    if (*stp > *stpmax) {
+      strcpy(task, "ERROR: STP .GT. STPMAX");
+    }
+    if (*g >= 0.) {
+      strcpy(task, "ERROR: INITIAL G .GE. ZERO");
+    }
+    if (*ftol < 0.) {
+      strcpy(task, "ERROR: FTOL .LT. ZERO");
+    }
+    if (*gtol < 0.) {
+      strcpy(task, "ERROR: GTOL .LT. ZERO");
+    }
+    if (*xtol < 0.) {
+      strcpy(task, "ERROR: XTOL .LT. ZERO");
+    }
+    if (*stpmin < 0.) {
+      strcpy(task, "ERROR: STPMIN .LT. ZERO");
+    }
+    if (*stpmax < *stpmin) {
+      strcpy(task, "ERROR: STPMAX .LT. STPMIN");
+    }
+    /* Exit if there are errors on input. */
+    if (strncmp(task, "ERROR", 5) == 0) {
+      return 0;
+    }
+    /* Initialize local variables. */
+    brackt = FALSE_;
+    stage = 1;
+    finit = *f;
+    ginit = *g;
+    gtest = *ftol * ginit;
+    width = *stpmax - *stpmin;
+    width1 = width / .5;
+    /* The variables stx, fx, gx contain the values of the step, */
+    /* function, and derivative at the best step. */
+    /* The variables sty, fy, gy contain the value of the step, */
+    /* function, and derivative at sty. */
+    /* The variables stp, f, g contain the values of the step, */
+    /* function, and derivative at stp. */
+    stx = 0.;
+    fx = finit;
+    gx = ginit;
+    sty = 0.;
+    fy = finit;
+    gy = ginit;
+    stmin = 0.;
+    stmax = *stp + *stp * 4.;
+    strcpy(task, "FG");
+    goto L1000;
+  } else {
+    /* Restore local variables. */
+    if (isave[1] == 1) {
+      brackt = TRUE_;
+    } else {
+      brackt = FALSE_;
+    }
+    stage = isave[2];
+    ginit = dsave[1];
+    gtest = dsave[2];
+    gx = dsave[3];
+    gy = dsave[4];
+    finit = dsave[5];
+    fx = dsave[6];
+    fy = dsave[7];
+    stx = dsave[8];
+    sty = dsave[9];
+    stmin = dsave[10];
+    stmax = dsave[11];
+    width = dsave[12];
+    width1 = dsave[13];
+  }
+  /* If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the */
+  /* algorithm enters the second stage. */
+  ftest = finit + *stp * gtest;
+  if (stage == 1 && *f <= ftest && *g >= 0.) {
+    stage = 2;
+  }
+  /* Test for warnings. */
+  if (brackt && (*stp <= stmin || *stp >= stmax)) {
+    strcpy(task, "WARNING: ROUNDING ERRORS PREVENT PROGRESS");
+  }
+  if (brackt && stmax - stmin <= *xtol * stmax) {
+    strcpy(task, "WARNING: XTOL TEST SATISFIED");
+  }
+  if (*stp == *stpmax && *f <= ftest && *g <= gtest) {
+    strcpy(task, "WARNING: STP = STPMAX");
+  }
+  if (*stp == *stpmin && (*f > ftest || *g >= gtest)) {
+    strcpy(task, "WARNING: STP = STPMIN");
+  }
+  /* Test for convergence. */
+  if (*f <= ftest && fabs(*g) <= *gtol * (-ginit)) {
+    strcpy(task, "CONVERGENCE");
+  }
+  /* Test for termination. */
+  if (strncmp(task, "WARN", 4) == 0 || strncmp(task, "CONV", 4) == 0) {
+    goto L1000;
+  }
+  /* A modified function is used to predict the step during the */
+  /* first stage if a lower function value has been obtained but */
+  /* the decrease is not sufficient. */
+  if (stage == 1 && *f <= fx && *f > ftest) {
+    /* Define the modified function and derivative values. */
+    fm = *f - *stp * gtest;
+    fxm = fx - stx * gtest;
+    fym = fy - sty * gtest;
+    gm = *g - gtest;
+    gxm = gx - gtest;
+    gym = gy - gtest;
+    /* Call dcstep to update stx, sty, and to compute the new step. */
+    dcstep_(&stx, &fxm, &gxm, &sty, &fym, &gym, stp, &fm, &gm, &brackt, &stmin, &stmax);
+    /* Reset the function and derivative values for f. */
+    fx = fxm + stx * gtest;
+    fy = fym + sty * gtest;
+    gx = gxm + gtest;
+    gy = gym + gtest;
+  } else {
+    /* Call dcstep to update stx, sty, and to compute the new step. */
+    dcstep_(&stx, &fx, &gx, &sty, &fy, &gy, stp, f, g, &brackt, &stmin, &stmax);
+  }
+  /* Decide if a bisection step is needed. */
+  if (brackt) {
+    if ((d__1 = sty - stx, fabs(d__1)) >= width1 * .66) {
+      *stp = stx + (sty - stx) * .5;
+    }
+    width1 = width;
+    width = (d__1 = sty - stx, fabs(d__1));
+  }
+  /* Set the minimum and maximum steps allowed for stp. */
+  if (brackt) {
+    stmin = stx <= sty ? stx : sty;
+    stmax = stx >= sty ? stx : sty;
+  } else {
+    stmin = *stp + (*stp - stx) * 1.1;
+    stmax = *stp + (*stp - stx) * 4.;
+  }
+  /* Force the step to be within the bounds stpmax and stpmin. */
+  *stp = *stp >= *stpmin ? *stp : *stpmin;
+  *stp = *stp <= *stpmax ? *stp : *stpmax;
+  /* If further progress is not possible, let stp be the best */
+  /* point obtained during the search. */
+  if ((brackt && (*stp <= stmin || *stp >= stmax)) || (brackt && (stmax - stmin <= *xtol * stmax))) {
+    *stp = stx;
+  }
+  /* Obtain another function and derivative. */
+  strcpy(task, "FG");
+L1000:
+  /* Save local variables. */
+  if (brackt) {
+    isave[1] = 1;
+  } else {
+    isave[1] = 0;
+  }
+  isave[2] = stage;
+  dsave[1] = ginit;
+  dsave[2] = gtest;
+  dsave[3] = gx;
+  dsave[4] = gy;
+  dsave[5] = finit;
+  dsave[6] = fx;
+  dsave[7] = fy;
+  dsave[8] = stx;
+  dsave[9] = sty;
+  dsave[10] = stmin;
+  dsave[11] = stmax;
+  dsave[12] = width;
+  dsave[13] = width1;
+  return 0;
+}
+/**
+ * Subroutine dcstep
+ *
+ *     This subroutine computes a safeguarded step for a search
+ *     procedure and updates an interval that contains a step that
+ *     satisfies a sufficient decrease and a curvature condition.
+ *
+ *     The parameter stx contains the step with the least function
+ *     value. If brackt is set to .true. then a minimizer has
+ *     been bracketed in an interval with endpoints stx and sty.
+ *     The parameter stp contains the current step.
+ *     The subroutine assumes that if brackt is set to .true. then
+ *
+ *           min(stx,sty) < stp < max(stx,sty),
+ *
+ *     and that the derivative at stx is negative in the direction
+ *     of the step.
+ *
+ *     The subroutine statement is
+ *
+ *       subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
+ *                         stpmin,stpmax)
+ *
+ *     where
+ *
+ *       stx is a double precision variable.
+ *         On entry stx is the best step obtained so far and is an
+ *            endpoint of the interval that contains the minimizer.
+ *         On exit stx is the updated best step.
+ *
+ *       fx is a double precision variable.
+ *         On entry fx is the function at stx.
+ *         On exit fx is the function at stx.
+ *
+ *       dx is a double precision variable.
+ *         On entry dx is the derivative of the function at
+ *            stx. The derivative must be negative in the direction of
+ *            the step, that is, dx and stp - stx must have opposite
+ *            signs.
+ *         On exit dx is the derivative of the function at stx.
+ *
+ *       sty is a double precision variable.
+ *         On entry sty is the second endpoint of the interval that
+ *            contains the minimizer.
+ *         On exit sty is the updated endpoint of the interval that
+ *            contains the minimizer.
+ *
+ *       fy is a double precision variable.
+ *         On entry fy is the function at sty.
+ *         On exit fy is the function at sty.
+ *
+ *       dy is a double precision variable.
+ *         On entry dy is the derivative of the function at sty.
+ *         On exit dy is the derivative of the function at the exit sty.
+ *
+ *       stp is a double precision variable.
+ *         On entry stp is the current step. If brackt is set to .true.
+ *            then on input stp must be between stx and sty.
+ *         On exit stp is a new trial step.
+ *
+ *       fp is a double precision variable.
+ *         On entry fp is the function at stp
+ *         On exit fp is unchanged.
+ *
+ *       dp is a double precision variable.
+ *         On entry dp is the the derivative of the function at stp.
+ *         On exit dp is unchanged.
+ *
+ *       brackt is an logical variable.
+ *         On entry brackt specifies if a minimizer has been bracketed.
+ *            Initially brackt must be set to .false.
+ *         On exit brackt specifies if a minimizer has been bracketed.
+ *            When a minimizer is bracketed brackt is set to .true.
+ *
+ *       stpmin is a double precision variable.
+ *         On entry stpmin is a lower bound for the step.
+ *         On exit stpmin is unchanged.
+ *
+ *       stpmax is a double precision variable.
+ *         On entry stpmax is an upper bound for the step.
+ *         On exit stpmax is unchanged.
+ *
+ *     MINPACK-1 Project. June 1983
+ *     Argonne National Laboratory.
+ *     Jorge J. More' and David J. Thuente.
+ *
+ *     MINPACK-2 Project. October 1993.
+ *     Argonne National Laboratory and University of Minnesota.
+ *     Brett M. Averick and Jorge J. More'.
+ */
+int dcstep_(double *stx, double *fx, double *dx,
+  double *sty, double *fy, double *dy, double *stp,
+  double *fp, double *dp, long *brackt, double *stpmin,
+  double *stpmax)
+{
+  double d__1, d__2, d__3;
+  static double p, q, r__, s, sgnd, stpc, stpf, stpq, gamma, theta;
+  sgnd = *dp * (*dx / fabs(*dx));
+  /* First case: A higher function value. The minimum is bracketed. */
+  /* If the cubic step is closer to stx than the quadratic step, the */
+  /* cubic step is taken, otherwise the average of the cubic and */
+  /* quadratic steps is taken. */
+  if (*fp > *fx) {
+    theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
+    d__1 = fabs(theta);
+    d__2 = fabs(*dx);
+    d__1 = d__1 >= d__2 ? d__1 : d__2;
+    d__2 = fabs(*dp);
+    s = d__1 >= d__2 ? d__1 : d__2;
+    d__1 = theta / s;
+    gamma = s * sqrt(d__1 * d__1 - *dx / s * (*dp / s));
+    if (*stp < *stx) {
+      gamma = -gamma;
+    }
+    p = gamma - *dx + theta;
+    q = gamma - *dx + gamma + *dp;
+    r__ = p / q;
+    stpc = *stx + r__ * (*stp - *stx);
+    stpq = *stx + *dx / ((*fx - *fp) / (*stp - *stx) + *dx) / 2. * (*stp - *stx);
+    if ((d__1 = stpc - *stx, fabs(d__1)) < (d__2 = stpq - *stx, fabs(d__2))) {
+      stpf = stpc;
+    } else {
+      stpf = stpc + (stpq - stpc) / 2.;
+    }
+    *brackt = TRUE_;
+  /* Second case: A lower function value and derivatives of opposite */
+  /* sign. The minimum is bracketed. If the cubic step is farther from */
+  /* stp than the secant step, the cubic step is taken, otherwise the */
+  /* secant step is taken. */
+  } else if (sgnd < 0.) {
+    theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
+    d__1 = fabs(theta);
+    d__2 = fabs(*dx);
+    d__1 = d__1 >= d__2 ? d__1 : d__2;
+    d__2 = fabs(*dp);
+    s = d__1 >= d__2 ? d__1 : d__2;
+    d__1 = theta / s;
+    gamma = s * sqrt(d__1 * d__1 - *dx / s * (*dp / s));
+    if (*stp > *stx) {
+      gamma = -gamma;
+    }
+    p = gamma - *dp + theta;
+    q = gamma - *dp + gamma + *dx;
+    r__ = p / q;
+    stpc = *stp + r__ * (*stx - *stp);
+    stpq = *stp + *dp / (*dp - *dx) * (*stx - *stp);
+    if ((d__1 = stpc - *stp, fabs(d__1)) > (d__2 = stpq - *stp, fabs(d__2))) {
+      stpf = stpc;
+    } else {
+      stpf = stpq;
+    }
+    *brackt = TRUE_;
+  /* Third case: A lower function value, derivatives of the same sign, */
+  /* and the magnitude of the derivative decreases. */
+  } else if (fabs(*dp) < fabs(*dx)) {
+    /* The cubic step is computed only if the cubic tends to infinity */
+    /* in the direction of the step or if the minimum of the cubic */
+    /* is beyond stp. Otherwise the cubic step is defined to be the */
+    /* secant step. */
+    theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
+    d__1 = fabs(theta);
+    d__2 = fabs(*dx);
+    d__1 = d__1 >= d__2 ? d__1 : d__2;
+    d__2 = fabs(*dp);
+    s = d__1 >= d__2 ? d__1 : d__2;
+    /* The case gamma = 0 only arises if the cubic does not tend */
+    /* to infinity in the direction of the step. */
+    d__3 = theta / s;
+    d__1 = 0.;
+    d__2 = d__3 * d__3 - *dx / s * (*dp / s);
+    gamma = s * sqrt(d__1 >= d__2 ? d__1 : d__2);
+    if (*stp > *stx) {
+      gamma = -gamma;
+    }
+    p = gamma - *dp + theta;
+    q = gamma + (*dx - *dp) + gamma;
+    r__ = p / q;
+    if (r__ < 0. && gamma != 0.) {
+      stpc = *stp + r__ * (*stx - *stp);
+    } else if (*stp > *stx) {
+      stpc = *stpmax;
+    } else {
+      stpc = *stpmin;
+    }
+    stpq = *stp + *dp / (*dp - *dx) * (*stx - *stp);
+    if (*brackt) {
+      /* A minimizer has been bracketed. If the cubic step is */
+      /* closer to stp than the secant step, the cubic step is */
+      /* taken, otherwise the secant step is taken. */
+      if ((d__1 = stpc - *stp, fabs(d__1)) < (d__2 = stpq - *stp, fabs(d__2))) {
+        stpf = stpc;
+      } else {
+        stpf = stpq;
+      }
+      if (*stp > *stx) {
+        d__1 = *stp + (*sty - *stp) * .66;
+        stpf = d__1 <= stpf ? d__1 : stpf;
+      } else {
+        d__1 = *stp + (*sty - *stp) * .66;
+        stpf = d__1 >= stpf ? d__1 : stpf;
+      }
+    } else {
+      /* A minimizer has not been bracketed. If the cubic step is */
+      /* farther from stp than the secant step, the cubic step is */
+      /* taken, otherwise the secant step is taken. */
+      if ((d__1 = stpc - *stp, fabs(d__1)) > (d__2 = stpq - *stp, fabs(d__2))) {
+        stpf = stpc;
+      } else {
+        stpf = stpq;
+      }
+      stpf = *stpmax <= stpf ? *stpmax : stpf;
+      stpf = *stpmin >= stpf ? *stpmin : stpf;
+    }
+  /* Fourth case: A lower function value, derivatives of the same sign, */
+  /* and the magnitude of the derivative does not decrease. If the */
+  /* minimum is not bracketed, the step is either stpmin or stpmax, */
+  /* otherwise the cubic step is taken. */
+  } else {
+    if (*brackt) {
+      theta = (*fp - *fy) * 3. / (*sty - *stp) + *dy + *dp;
+      d__1 = fabs(theta);
+      d__2 = fabs(*dy);
+      d__1 = d__1 >= d__2 ? d__1 : d__2;
+      d__2 = fabs(*dp);
+      s = d__1 >= d__2 ? d__1: d__2;
+      d__1 = theta / s;
+      gamma = s * sqrt(d__1 * d__1 - *dy / s * (*dp / s));
+      if (*stp > *sty) {
+        gamma = -gamma;
+      }
+      p = gamma - *dp + theta;
+      q = gamma - *dp + gamma + *dy;
+      r__ = p / q;
+      stpc = *stp + r__ * (*sty - *stp);
+      stpf = stpc;
+    } else if (*stp > *stx) {
+      stpf = *stpmax;
+    } else {
+      stpf = *stpmin;
+    }
+  }
+  /* Update the interval which contains a minimizer. */
+  if (*fp > *fx) {
+    *sty = *stp;
+    *fy = *fp;
+    *dy = *dp;
+  } else {
+    if (sgnd < 0.) {
+      *sty = *stx;
+      *fy = *fx;
+      *dy = *dx;
+    }
+    *stx = *stp;
+    *fx = *fp;
+    *dx = *dp;
+  }
+  /* Compute the new step. */
+  *stp = stpf;
+  return 0;
+}
+int timer_(double *ttime)
+{
+  *ttime = (double)clock() / CLOCKS_PER_SEC;
+  return 0;
+}