RubyGems - lbfgsb - Versions diffs - 0.1.0 - Mend

lbfgsb 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

data/ext/lbfgsb/src/blas.c ADDED

@@ -0,0 +1,287 @@
+/**
+ * L-BFGS-B is released under the “New BSD License” (aka “Modified BSD License”
+ * or “3-clause license”)
+ * Please read attached file License.txt
+ */
+#include "blas.h"
+double dnrm2_(long *n, double *x, long *incx)
+{
+  long i__1, i__2;
+  double ret_val, d__1, d__2, d__3;
+  static long i__;
+  static double scale;
+  --x;
+  ret_val = 0.;
+  scale = 0.;
+  i__1 = *n;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    d__2 = scale, d__3 = (d__1 = x[i__], fabs(d__1));
+    scale = d__2 >= d__3 ? d__2 : d__3;
+  }
+  if (scale == 0.) {
+    return ret_val;
+  }
+  i__2 = *n;
+  i__1 = *incx;
+  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
+    d__1 = x[i__] / scale;
+    ret_val += d__1 * d__1;
+  }
+  ret_val = scale * sqrt(ret_val);
+  return ret_val;
+}
+int daxpy_(long *n, double *da, double *dx, long *incx, double *dy, long *incy)
+{
+  long i__1;
+  static long i__, m, ix, iy, mp1;
+  --dy;
+  --dx;
+  /* constant times a vector plus a vector. */
+  /* uses unrolled loops for increments equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  if (*n <= 0) {
+    return 0;
+  }
+  if (*da == 0.) {
+    return 0;
+  }
+  if (*incx == 1 && *incy == 1) {
+    goto L20;
+  }
+  /* code for unequal increments or equal increments */
+  /*   not equal to 1 */
+  ix = 1;
+  iy = 1;
+  if (*incx < 0) {
+    ix = (-(*n) + 1) * *incx + 1;
+  }
+  if (*incy < 0) {
+    iy = (-(*n) + 1) * *incy + 1;
+  }
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[iy] += *da * dx[ix];
+    ix += *incx;
+    iy += *incy;
+  }
+  return 0;
+  /* code for both increments equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 4;
+  if (m == 0) {
+    goto L40;
+  }
+  i__1 = m;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[i__] += *da * dx[i__];
+  }
+  if (*n < 4) {
+    return 0;
+  }
+L40:
+  mp1 = m + 1;
+  i__1 = *n;
+  for (i__ = mp1; i__ <= i__1; i__ += 4) {
+    dy[i__] += *da * dx[i__];
+    dy[i__ + 1] += *da * dx[i__ + 1];
+    dy[i__ + 2] += *da * dx[i__ + 2];
+    dy[i__ + 3] += *da * dx[i__ + 3];
+  }
+  return 0;
+}
+int dcopy_(long *n, double *dx, long *incx, double *dy, long *incy)
+{
+  long i__1;
+  static long i__, m, ix, iy, mp1;
+  --dy;
+  --dx;
+  /* copies a vector, x, to a vector, y. */
+  /* uses unrolled loops for increments equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  if (*n <= 0) {
+    return 0;
+  }
+  if (*incx == 1 && *incy == 1) {
+    goto L20;
+  }
+  /* code for unequal increments or equal increments */
+  /*   not equal to 1 */
+  ix = 1;
+  iy = 1;
+  if (*incx < 0) {
+    ix = (-(*n) + 1) * *incx + 1;
+  }
+  if (*incy < 0) {
+    iy = (-(*n) + 1) * *incy + 1;
+  }
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[iy] = dx[ix];
+    ix += *incx;
+    iy += *incy;
+  }
+  return 0;
+  /* code for both increments equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 7;
+  if (m == 0) {
+    goto L40;
+  }
+  i__1 = m;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dy[i__] = dx[i__];
+  }
+  if (*n < 7) {
+    return 0;
+  }
+L40:
+  mp1 = m + 1;
+  i__1 = *n;
+  for (i__ = mp1; i__ <= i__1; i__ += 7) {
+    dy[i__] = dx[i__];
+    dy[i__ + 1] = dx[i__ + 1];
+    dy[i__ + 2] = dx[i__ + 2];
+    dy[i__ + 3] = dx[i__ + 3];
+    dy[i__ + 4] = dx[i__ + 4];
+    dy[i__ + 5] = dx[i__ + 5];
+    dy[i__ + 6] = dx[i__ + 6];
+  }
+  return 0;
+}
+double ddot_(long *n, double *dx, long *incx, double *dy, long *incy)
+{
+  long i__1;
+  double ret_val;
+  static long i__, m, ix, iy, mp1;
+  static double dtemp;
+  --dy;
+  --dx;
+  /* forms the dot product of two vectors. */
+  /* uses unrolled loops for increments equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  ret_val = 0.;
+  dtemp = 0.;
+  if (*n <= 0) {
+    return ret_val;
+  }
+  if (*incx == 1 && *incy == 1) {
+    goto L20;
+  }
+  /* code for unequal increments or equal increments */
+  /*   not equal to 1 */
+  ix = 1;
+  iy = 1;
+  if (*incx < 0) {
+    ix = (-(*n) + 1) * *incx + 1;
+  }
+  if (*incy < 0) {
+    iy = (-(*n) + 1) * *incy + 1;
+  }
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dtemp += dx[ix] * dy[iy];
+    ix += *incx;
+    iy += *incy;
+  }
+  ret_val = dtemp;
+  return ret_val;
+  /* code for both increments equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 5;
+  if (m == 0) {
+    goto L40;
+  }
+  i__1 = m;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dtemp += dx[i__] * dy[i__];
+  }
+  if (*n < 5) {
+    goto L60;
+  }
+L40:
+  mp1 = m + 1;
+  i__1 = *n;
+  for (i__ = mp1; i__ <= i__1; i__ += 5) {
+    dtemp = dtemp + dx[i__] * dy[i__] + dx[i__ + 1] * dy[i__ + 1]
+      + dx[i__ + 2] * dy[i__ + 2] + dx[i__ + 3] * dy[i__ + 3] + dx[i__ + 4] * dy[i__ + 4];
+  }
+L60:
+  ret_val = dtemp;
+  return ret_val;
+}
+int dscal_(long *n, double *da, double *dx, long *incx)
+{
+  long i__1, i__2;
+  static long i__, m, mp1, nincx;
+  --dx;
+  /* scales a vector by a constant. */
+  /* uses unrolled loops for increment equal to one. */
+  /* jack dongarra, linpack, 3/11/78. */
+  /* modified 3/93 to return if incx .le. 0. */
+  if (*n <= 0 || *incx <= 0) {
+    return 0;
+  }
+  if (*incx == 1) {
+    goto L20;
+  }
+  /* code for increment not equal to 1 */
+  nincx = *n * *incx;
+  i__1 = nincx;
+  i__2 = *incx;
+  for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
+    dx[i__] = *da * dx[i__];
+  }
+  return 0;
+  /* code for increment equal to 1 */
+  /* clean-up loop */
+L20:
+  m = *n % 5;
+  if (m == 0) {
+    goto L40;
+  }
+  i__2 = m;
+  for (i__ = 1; i__ <= i__2; ++i__) {
+    dx[i__] = *da * dx[i__];
+  }
+  if (*n < 5) {
+    return 0;
+  }
+L40:
+  mp1 = m + 1;
+  i__2 = *n;
+  for (i__ = mp1; i__ <= i__2; i__ += 5) {
+    dx[i__] = *da * dx[i__];
+    dx[i__ + 1] = *da * dx[i__ + 1];
+    dx[i__ + 2] = *da * dx[i__ + 2];
+    dx[i__ + 3] = *da * dx[i__ + 3];
+    dx[i__ + 4] = *da * dx[i__ + 4];
+  }
+  return 0;
+}

data/ext/lbfgsb/src/blas.h ADDED

@@ -0,0 +1,12 @@
+#ifndef LBFGSB_RB_BLAS_H_
+#define LBFGSB_RB_BLAS_H_
+#include <math.h>
+extern double dnrm2_(long *n, double *x, long *incx);
+extern int daxpy_(long *n, double *da, double *dx, long *incx, double *dy, long *incy);
+extern int dcopy_(long *n, double *dx, long *incx, double *dy, long *incy);
+extern double ddot_(long *n, double *dx, long *incx, double *dy, long *incy);
+extern int dscal_(long *n, double *da, double *dx, long *incx);
+#endif /* LBFGSB_RB_BLAS_H_ */

data/ext/lbfgsb/src/lbfgsb.c ADDED

@@ -0,0 +1,4096 @@
+/**
+ * L-BFGS-B is released under the “New BSD License” (aka “Modified BSD License”
+ * or “3-clause license”)
+ * Please read attached file License.txt
+ *
+ * ===========   L-BFGS-B (version 3.0.  April 25, 2011  ===================
+ *
+ *     This is a modified version of L-BFGS-B. Minor changes in the updated
+ *     code appear preceded by a line comment as follows
+ *
+ *     jlm-jn
+ *
+ *     Major changes are described in the accompanying paper:
+ *
+ *         Jorge Nocedal and Jose Luis Morales, Remark on "Algorithm 778:
+ *         L-BFGS-B: Fortran Subroutines for Large-Scale Bound Constrained
+ *         Optimization"  (2011). To appear in  ACM Transactions on
+ *         Mathematical Software,
+ *
+ *     The paper describes an improvement and a correction to Algorithm 778.
+ *     It is shown that the performance of the algorithm can be improved
+ *     significantly by making a relatively simple modication to the subspace
+ *     minimization phase. The correction concerns an error caused by the use
+ *     of routine dpmeps to estimate machine precision.
+ *
+ *     The total work space **wa** required by the new version is
+ *
+ *                  2*m*n + 11m*m + 5*n + 8*m
+ *
+ *     the old version required
+ *
+ *                  2*m*n + 12m*m + 4*n + 12*m
+ *
+ *
+ *            J. Nocedal  Department of Electrical Engineering and
+ *                        Computer Science.
+ *                        Northwestern University. Evanston, IL. USA
+ *
+ *
+ *           J.L Morales  Departamento de Matematicas,
+ *                        Instituto Tecnologico Autonomo de Mexico
+ *                        Mexico D.F. Mexico.
+ *
+ *                        March  2011
+ */
+#include "blas.h"
+#include "linpack.h"
+#include "lbfgsb.h"
+static double c_b9 = 0.;
+static long c__1 = 1;
+static long c__11 = 11;
+static double c_b280 = .001;
+static double c_b281 = .9;
+static double c_b282 = .1;
+/**
+ * Subroutine setulb
+ *
+ *     This subroutine partitions the working arrays wa and iwa, and
+ *       then uses the limited memory BFGS method to solve the bound
+ *       constrained optimization problem by calling mainlb.
+ *       (The direct method will be used in the subspace minimization.)
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x is an approximation to the solution.
+ *       On exit x is the current approximation.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound on x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound on x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is an long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds, and
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     f is a double precision variable.
+ *       On first entry f is unspecified.
+ *       On final exit f is the value of the function at x.
+ *
+ *     g is a double precision array of dimension n.
+ *       On first entry g is unspecified.
+ *       On final exit g is the value of the gradient at x.
+ *
+ *     factr is a double precision variable.
+ *       On entry factr >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *         (f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr*epsmch
+ *
+ *         where epsmch is the machine precision, which is automatically
+ *         generated by the code. Typical values for factr: 1.d+12 for
+ *         low accuracy; 1.d+7 for moderate accuracy; 1.d+1 for extremely
+ *         high accuracy.
+ *
+ *       On exit factr is unchanged.
+ *
+ *     pgtol is a double precision variable.
+ *       On entry pgtol >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *                 max{|proj g_i | i = 1, ..., n} <= pgtol
+ *
+ *         where pg_i is the ith component of the projected gradient.
+ *       On exit pgtol is unchanged.
+ *
+ *     wa is a double precision working array of length
+ *       (2mmax + 5)nmax + 12mmax^2 + 12mmax.
+ *
+ *     iwa is an long working array of length 3nmax.
+ *
+ *     task is a working string of characters of length 60 indicating
+ *       the current job when entering and quitting this subroutine.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     csave is a working string of characters of length 60.
+ *
+ *     lsave is a logical working array of dimension 4.
+ *       On exit with 'task' = NEW_X, the following information is
+ *                                                             available:
+ *         If lsave(1) = .true.  then  the initial X has been replaced by
+ *                                     its projection in the feasible set;
+ *         If lsave(2) = .true.  then  the problem is constrained;
+ *         If lsave(3) = .true.  then  each variable has upper and lower
+ *                                     bounds;
+ *
+ *     isave is an long working array of dimension 44.
+ *       On exit with 'task' = NEW_X, the following information is
+ *                                                             available:
+ *         isave(22) = the total number of intervals explored in the
+ *                         search of Cauchy points;
+ *         isave(26) = the total number of skipped BFGS updates before
+ *                         the current iteration;
+ *         isave(30) = the number of current iteration;
+ *         isave(31) = the total number of BFGS updates prior the current
+ *                         iteration;
+ *         isave(33) = the number of intervals explored in the search of
+ *                         Cauchy point in the current iteration;
+ *         isave(34) = the total number of function and gradient
+ *                         evaluations;
+ *         isave(36) = the number of function value or gradient
+ *                                  evaluations in the current iteration;
+ *         if isave(37) = 0  then the subspace argmin is within the box;
+ *         if isave(37) = 1  then the subspace argmin is beyond the box;
+ *         isave(38) = the number of free variables in the current
+ *                         iteration;
+ *         isave(39) = the number of active constraints in the current
+ *                         iteration;
+ *         n + 1 - isave(40) = the number of variables leaving the set of
+ *                           active constraints in the current iteration;
+ *         isave(41) = the number of variables entering the set of active
+ *                         constraints in the current iteration.
+ *
+ *     dsave is a double precision working array of dimension 29.
+ *       On exit with 'task' = NEW_X, the following information is
+ *                                                             available:
+ *         dsave(1) = current 'theta' in the BFGS matrix;
+ *         dsave(2) = f(x) in the previous iteration;
+ *         dsave(3) = factr*epsmch;
+ *         dsave(4) = 2-norm of the line search direction vector;
+ *         dsave(5) = the machine precision epsmch generated by the code;
+ *         dsave(7) = the accumulated time spent on searching for
+ *                                                         Cauchy points;
+ *         dsave(8) = the accumulated time spent on
+ *                                                 subspace minimization;
+ *         dsave(9) = the accumulated time spent on line search;
+ *         dsave(11) = the slope of the line search function at
+ *                                  the current point of line search;
+ *         dsave(12) = the maximum relative step length imposed in
+ *                                                           line search;
+ *         dsave(13) = the infinity norm of the projected gradient;
+ *         dsave(14) = the relative step length in the line search;
+ *         dsave(15) = the slope of the line search function at
+ *                                 the starting point of the line search;
+ *         dsave(16) = the square of the 2-norm of the line search
+ *                                                      direction vector.
+ *
+ *     Subprograms called:
+ *
+ *       L-BFGS-B Library ... mainlb.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: a
+ *       limited memory FORTRAN code for solving bound constrained
+ *       optimization problems'', Tech. Report, NAM-11, EECS Department,
+ *       Northwestern University, 1994.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                       *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int setulb_(long *n, long *m, double *x,
+  double *l, double *u, long *nbd, double *f, double *g,
+  double *factr, double *pgtol, double *wa, long *iwa,
+  char *task, long *iprint, char *csave, long *lsave,
+  long *isave, double *dsave)
+{
+  long i__1;
+  static long ld, lr, lt, lz, lwa, lwn, lss, lxp, lws, lwt, lsy, lwy, lsnd;
+  /* jlm-jn */
+  --iwa;
+  --g;
+  --nbd;
+  --u;
+  --l;
+  --x;
+  --wa;
+  --lsave;
+  --isave;
+  --dsave;
+  if (strncmp(task, "START", 5) == 0) {
+    isave[1] = *m * *n;
+    i__1 = *m;
+    isave[2] = i__1 * i__1;
+    i__1 = *m;
+    isave[3] = i__1 * i__1 << 2;
+    isave[4] = 1;                     /* ws      m*n    */
+    isave[5] = isave[4] + isave[1];   /* wy      m*n    */
+    isave[6] = isave[5] + isave[1];   /* wsy     m**2   */
+    isave[7] = isave[6] + isave[2];   /* wss     m**2   */
+    isave[8] = isave[7] + isave[2];   /* wt      m**2   */
+    isave[9] = isave[8] + isave[2];   /* wn      4*m**2 */
+    isave[10] = isave[9] + isave[3];  /* wsnd    4*m**2 */
+    isave[11] = isave[10] + isave[3]; /* wz      n      */
+    isave[12] = isave[11] + *n;       /* wr      n      */
+    isave[13] = isave[12] + *n;       /* wd      n      */
+    isave[14] = isave[13] + *n;       /* wt      n      */
+    isave[15] = isave[14] + *n;       /* wxp     n      */
+    isave[16] = isave[15] + *n;       /* wa      8*m    */
+  }
+  lws = isave[4];
+  lwy = isave[5];
+  lsy = isave[6];
+  lss = isave[7];
+  lwt = isave[8];
+  lwn = isave[9];
+  lsnd = isave[10];
+  lz = isave[11];
+  lr = isave[12];
+  ld = isave[13];
+  lt = isave[14];
+  lxp = isave[15];
+  lwa = isave[16];
+  mainlb_(n, m, &x[1], &l[1], &u[1], &nbd[1], f, &g[1], factr, pgtol, &wa[lws],
+          &wa[lwy], &wa[lsy], &wa[lss], &wa[lwt], &wa[lwn], &wa[lsnd],
+          &wa[lz], &wa[lr], &wa[ld], &wa[lt], &wa[lxp], &wa[lwa], &iwa[1],
+          &iwa[*n + 1], &iwa[(*n << 1) + 1], task, iprint, csave, &lsave[1],
+          &isave[22], &dsave[1]);
+  return 0;
+}
+/**
+ * Subroutine mainlb
+ *
+ *     This subroutine solves bound constrained optimization problems by
+ *       using the compact formula of the limited memory BFGS updates.
+ *
+ *     n is an long variable.
+ *       On entry n is the number of variables.
+ *       On exit n is unchanged.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric
+ *          corrections allowed in the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x is an approximation to the solution.
+ *       On exit x is the current approximation.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound of x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound of x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is an long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds,
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     f is a double precision variable.
+ *       On first entry f is unspecified.
+ *       On final exit f is the value of the function at x.
+ *
+ *     g is a double precision array of dimension n.
+ *       On first entry g is unspecified.
+ *       On final exit g is the value of the gradient at x.
+ *
+ *     factr is a double precision variable.
+ *       On entry factr >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *         (f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr*epsmch
+ *
+ *         where epsmch is the machine precision, which is automatically
+ *         generated by the code.
+ *       On exit factr is unchanged.
+ *
+ *     pgtol is a double precision variable.
+ *       On entry pgtol >= 0 is specified by the user.  The iteration
+ *         will stop when
+ *
+ *                 max{|proj g_i | i = 1, ..., n} <= pgtol
+ *
+ *         where pg_i is the ith component of the projected gradient.
+ *       On exit pgtol is unchanged.
+ *
+ *     ws, wy, sy, and wt are double precision working arrays used to
+ *       store the following information defining the limited memory
+ *          BFGS matrix:
+ *          ws, of dimension n x m, stores S, the matrix of s-vectors;
+ *          wy, of dimension n x m, stores Y, the matrix of y-vectors;
+ *          sy, of dimension m x m, stores S'Y;
+ *          ss, of dimension m x m, stores S'S;
+ *          yy, of dimension m x m, stores Y'Y;
+ *          wt, of dimension m x m, stores the Cholesky factorization
+ *                                  of (theta*S'S+LD^(-1)L'); see eq.
+ *                                  (2.26) in [3].
+ *
+ *     wn is a double precision working array of dimension 2m x 2m
+ *       used to store the LEL^T factorization of the indefinite matrix
+ *                 K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *
+ *       where     E = [-I  0]
+ *                     [ 0  I]
+ *
+ *     snd is a double precision working array of dimension 2m x 2m
+ *       used to store the lower triangular part of
+ *                 N = [Y' ZZ'Y   L_a'+R_z']
+ *                     [L_a +R_z  S'AA'S   ]
+ *
+ *     z(n),r(n),d(n),t(n), xp(n),wa(8*m) are double precision working arrays.
+ *       z  is used at different times to store the Cauchy point and
+ *          the Newton point.
+ *       xp is used to safeguard the projected Newton direction
+ *
+ *     sg(m),sgo(m),yg(m),ygo(m) are double precision working arrays.
+ *
+ *     index is an long working array of dimension n.
+ *       In subroutine freev, index is used to store the free and fixed
+ *          variables at the Generalized Cauchy Point (GCP).
+ *
+ *     iwhere is an long working array of dimension n used to record
+ *       the status of the vector x for GCP computation.
+ *       iwhere(i)=0 or -3 if x(i) is free and has bounds,
+ *                 1       if x(i) is fixed at l(i), and l(i) .ne. u(i)
+ *                 2       if x(i) is fixed at u(i), and u(i) .ne. l(i)
+ *                 3       if x(i) is always fixed, i.e.,  u(i)=x(i)=l(i)
+ *                -1       if x(i) is always free, i.e., no bounds on it.
+ *
+ *     indx2 is an long working array of dimension n.
+ *       Within subroutine cauchy, indx2 corresponds to the array iorder.
+ *       In subroutine freev, a list of variables entering and leaving
+ *       the free set is stored in indx2, and it is passed on to
+ *       subroutine formk with this information.
+ *
+ *     task is a working string of characters of length 60 indicating
+ *       the current job when entering and leaving this subroutine.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     csave is a working string of characters of length 60.
+ *
+ *     lsave is a logical working array of dimension 4.
+ *
+ *     isave is an long working array of dimension 23.
+ *
+ *     dsave is a double precision working array of dimension 29.
+ *
+ *
+ *     Subprograms called
+ *
+ *       L-BFGS-B Library ... cauchy, subsm, lnsrlb, formk,
+ *
+ *        errclb, prn1lb, prn2lb, prn3lb, active, projgr,
+ *
+ *        freev, cmprlb, matupd, formt.
+ *
+ *       Minpack2 Library ... timer
+ *
+ *       Linpack Library ... dcopy, ddot.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
+ *       Subroutines for Large Scale Bound Constrained Optimization''
+ *       Tech. Report, NAM-11, EECS Department, Northwestern University,
+ *       1994.
+ *
+ *       [3] R. Byrd, J. Nocedal and R. Schnabel "Representations of
+ *       Quasi-Newton Matrices and their use in Limited Memory Methods'',
+ *       Mathematical Programming 63 (1994), no. 4, pp. 129-156.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                       *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int mainlb_(long *n, long *m, double *x,
+  double *l, double *u, long *nbd, double *f, double *g,
+  double *factr, double *pgtol, double *ws, double *wy,
+  double *sy, double *ss, double *wt, double *wn,
+  double *snd, double *z__, double *r__, double *d__,
+  double *t, double *xp, double *wa, long *index,
+  long *iwhere, long *indx2, char *task, long *iprint,
+  char *csave, long *lsave, long *isave, double *dsave)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset,
+    ss_dim1, ss_offset, wt_dim1, wt_offset, wn_dim1, wn_offset,
+    snd_dim1, snd_offset, i__1;
+  double d__1, d__2;
+  FILE *itfptr;
+  static long i__, k;
+  static double gd, dr, rr, dtd;
+  static long col;
+  static double tol;
+  static long wrk;
+  static double stp, cpu1, cpu2;
+  static long head;
+  static double fold;
+  static long nact;
+  static double ddum;
+  static long info, nseg;
+  static double time;
+  static long nfgv, ifun, iter;
+  static char word[4];
+  static double time1, time2;
+  static long iback;
+  static double gdold;
+  static long nfree;
+  static long boxed;
+  static long itail;
+  static double theta;
+  static double dnorm;
+  static long nskip, iword;
+  static double xstep, stpmx;
+  static long ileave;
+  static double cachyt;
+  static long itfile;
+  static double epsmch;
+  static long updatd;
+  static double sbtime;
+  static long prjctd;
+  static long iupdat;
+  static double sbgnrm;
+  static long cnstnd;
+  static long nenter;
+  static double lnscht;
+  static long nintol;
+  --indx2;
+  --iwhere;
+  --index;
+  --xp;
+  --t;
+  --d__;
+  --r__;
+  --z__;
+  --g;
+  --nbd;
+  --u;
+  --l;
+  --x;
+  --wa;
+  snd_dim1 = 2 * *m;
+  snd_offset = 1 + snd_dim1;
+  snd -= snd_offset;
+  wn_dim1 = 2 * *m;
+  wn_offset = 1 + wn_dim1;
+  wn -= wn_offset;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  ss_dim1 = *m;
+  ss_offset = 1 + ss_dim1;
+  ss -= ss_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  --lsave;
+  --isave;
+  --dsave;
+  /* jlm-jn */
+  if (strncmp(task, "START", 5) == 0) {
+    epsmch = DBL_EPSILON;
+    timer_(&time1);
+    /* Initialize counters and scalars when task='START'. */
+    /*    for the limited memory BFGS matrices: */
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    iback = 0;
+    itail = 0;
+    iword = 0;
+    nact = 0;
+    ileave = 0;
+    nenter = 0;
+    fold = 0.;
+    dnorm = 0.;
+    cpu1 = 0.;
+    gd = 0.;
+    stpmx = 0.;
+    sbgnrm = 0.;
+    stp = 0.;
+    gdold = 0.;
+    dtd = 0.;
+    /* for operation counts: */
+    iter = 0;
+    nfgv = 0;
+    nseg = 0;
+    nintol = 0;
+    nskip = 0;
+    nfree = *n;
+    ifun = 0;
+    /* for stopping tolerance: */
+    tol = *factr * epsmch;
+    /* for measuring running time: */
+    cachyt = 0.;
+    sbtime = 0.;
+    lnscht = 0.;
+    /* 'word' records the status of subspace solutions. */
+    strcpy(word, "---");
+    /* 'info' records the termination information. */
+    info = 0;
+    itfile = 8;
+    /* Check the input arguments for errors. */
+    errclb_(n, m, factr, &l[1], &u[1], &nbd[1], task, &info, &k);
+    if (strncmp(task, "ERROR", 5) == 0) {
+      prn3lb_(n, &x[1], f, task, iprint, &info, &itfile, &iter, &nfgv, &nintol,
+          &nskip, &nact, &sbgnrm, &c_b9, &nseg, word, &iback,
+          &stp, &xstep, &k, &cachyt, &sbtime, &lnscht);
+      return 0;
+    }
+    prn1lb_(n, m, &l[1], &u[1], &x[1], iprint, &itfile, &epsmch);
+    /* Initialize iwhere & project x onto the feasible set. */
+    active_(n, &l[1], &u[1], &nbd[1], &x[1], &iwhere[1], iprint, &prjctd, &cnstnd, &boxed);
+    /* The end of the initialization. */
+  } else {
+    /* restore local variables. */
+    prjctd = lsave[1];
+    cnstnd = lsave[2];
+    boxed = lsave[3];
+    updatd = lsave[4];
+    nintol = isave[1];
+    itfile = isave[3];
+    iback = isave[4];
+    nskip = isave[5];
+    head = isave[6];
+    col = isave[7];
+    itail = isave[8];
+    iter = isave[9];
+    iupdat = isave[10];
+    nseg = isave[12];
+    nfgv = isave[13];
+    info = isave[14];
+    ifun = isave[15];
+    iword = isave[16];
+    nfree = isave[17];
+    nact = isave[18];
+    ileave = isave[19];
+    nenter = isave[20];
+    theta = dsave[1];
+    fold = dsave[2];
+    tol = dsave[3];
+    dnorm = dsave[4];
+    epsmch = dsave[5];
+    cpu1 = dsave[6];
+    cachyt = dsave[7];
+    sbtime = dsave[8];
+    lnscht = dsave[9];
+    time1 = dsave[10];
+    gd = dsave[11];
+    stpmx = dsave[12];
+    sbgnrm = dsave[13];
+    stp = dsave[14];
+    gdold = dsave[15];
+    dtd = dsave[16];
+    /* After returning from the driver go to the point where execution */
+    /* is to resume. */
+    if (strncmp(task, "FG_LN", 5) == 0) {
+      goto L666;
+    }
+    if (strncmp(task, "NEW_X", 5) == 0) {
+      goto L777;
+    }
+    if (strncmp(task, "FG_ST", 5) == 0) {
+      goto L111;
+    }
+    if (strncmp(task, "STOP", 4) == 0) {
+      if (strncmp(task + 6, "CPU", 3) == 0) {
+        /* restore the previous iterate. */
+        dcopy_(n, &t[1], &c__1, &x[1], &c__1);
+        dcopy_(n, &r__[1], &c__1, &g[1], &c__1);
+        *f = fold;
+      }
+      goto L999;
+    }
+  }
+  /* Compute f0 and g0. */
+  strcpy(task, "FG_START");
+  /* return to the driver to calculate f and g; reenter at 111. */
+  goto L1000;
+L111:
+  nfgv = 1;
+  /* Compute the infinity norm of the (-) projected gradient. */
+  projgr_(n, &l[1], &u[1], &nbd[1], &x[1], &g[1], &sbgnrm);
+  if (*iprint >= 1) {
+    fprintf(stdout, "\nAt iterate%5ld    f= %12.5E    |proj g|= %12.5E\n", iter, *f, sbgnrm);
+    itfptr = fopen("iterate.dat", "a");
+    fprintf(itfptr, " %4ld %4ld     -     -   -     -     -        -    %10.3E %10.3E\n", iter, nfgv, sbgnrm, *f);
+    fclose(itfptr);
+  }
+  if (sbgnrm <= *pgtol) {
+    /* terminate the algorithm. */
+    strcpy(task, "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL");
+    goto L999;
+  }
+  /* ----------------- the beginning of the loop -------------------------- */
+L222:
+  if (*iprint >= 99) {
+    i__1 = iter + 1;
+    fprintf(stdout, "\n\nITERATION %5ld\n", i__1);
+  }
+  iword = -1;
+  if (! cnstnd && col > 0) {
+    /* skip the search for GCP. */
+    dcopy_(n, &x[1], &c__1, &z__[1], &c__1);
+    wrk = updatd;
+    nseg = 0;
+    goto L333;
+  }
+  /**
+   * Compute the Generalized Cauchy Point (GCP).
+   */
+  timer_(&cpu1);
+  cauchy_(n, &x[1], &l[1], &u[1], &nbd[1], &g[1], &indx2[1], &iwhere[1], &t[1],
+      &d__[1], &z__[1], m, &wy[wy_offset], &ws[ws_offset], &sy[sy_offset],
+      &wt[wt_offset], &theta, &col, &head, &wa[1], &wa[(*m << 1) + 1],
+      &wa[(*m << 2) + 1], &wa[*m * 6 + 1], &nseg, iprint, &sbgnrm, &info, &epsmch);
+  if (info != 0) {
+    /* singular triangular system detected; refresh the lbfgs memory. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Singular triangular system detected;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    timer_(&cpu2);
+    cachyt = cachyt + cpu2 - cpu1;
+    goto L222;
+  }
+  timer_(&cpu2);
+  cachyt = cachyt + cpu2 - cpu1;
+  nintol += nseg;
+  /* Count the entering and leaving variables for iter > 0; */
+  /* find the index set of free and active variables at the GCP. */
+  freev_(n, &nfree, &index[1], &nenter, &ileave, &indx2[1], &iwhere[1], &wrk, &updatd, &cnstnd, iprint, &iter);
+  nact = *n - nfree;
+L333:
+  /* If there are no free variables or B=theta*I, then */
+  /*                                    skip the subspace minimization. */
+  if (nfree == 0 || col == 0) {
+    goto L555;
+  }
+  /**
+   * Subspace minimization.
+   */
+  timer_(&cpu1);
+  /* Form  the LEL^T factorization of the indefinite */
+  /*   matrix    K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ] */
+  /*                 [L_a -R_z           theta*S'AA'S ] */
+  /*   where     E = [-I  0] */
+  /*                 [ 0  I] */
+  if (wrk) {
+    formk_(n, &nfree, &index[1], &nenter, &ileave, &indx2[1], &iupdat, &updatd,
+        &wn[wn_offset], &snd[snd_offset], m, &ws[ws_offset], &wy[wy_offset],
+        &sy[sy_offset], &theta, &col, &head, &info);
+  }
+  if (info != 0) {
+    /* nonpositive definiteness in Cholesky factorization; */
+    /* refresh the lbfgs memory and restart the iteration. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Nonpositive definiteness in Cholesky factorization in formk;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    timer_(&cpu2);
+    sbtime = sbtime + cpu2 - cpu1;
+    goto L222;
+  }
+  /* compute r=-Z'B(xcp-xk)-Z'g (using wa(2m+1)=W'(xcp-x) */
+  /*                                            from 'cauchy'). */
+  cmprlb_(n, m, &x[1], &g[1], &ws[ws_offset], &wy[wy_offset], &sy[sy_offset],
+      &wt[wt_offset], &z__[1], &r__[1], &wa[1], &index[1], &theta, &col,
+      &head, &nfree, &cnstnd, &info);
+  if (info != 0) {
+    goto L444;
+  }
+  /* jlm-jn call the direct method. */
+  subsm_(n, m, &nfree, &index[1], &l[1], &u[1], &nbd[1], &z__[1], &r__[1], &xp[1],
+      &ws[ws_offset], &wy[wy_offset], &theta, &x[1], &g[1], &col,
+      &head, &iword, &wa[1], &wn[wn_offset], iprint, &info);
+L444:
+  if (info != 0) {
+    /* singular triangular system detected; */
+    /* refresh the lbfgs memory and restart the iteration. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Singular triangular system detected;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    timer_(&cpu2);
+    sbtime = sbtime + cpu2 - cpu1;
+    goto L222;
+  }
+  timer_(&cpu2);
+  sbtime = sbtime + cpu2 - cpu1;
+L555:
+  /**
+   * Line search and optimality tests.
+   */
+  /* Generate the search direction d:=z-x. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    d__[i__] = z__[i__] - x[i__];
+  }
+  timer_(&cpu1);
+L666:
+  lnsrlb_(n, &l[1], &u[1], &nbd[1], &x[1], f, &fold, &gd, &gdold, &g[1],
+      &d__[1], &r__[1], &t[1], &z__[1], &stp, &dnorm, &dtd, &xstep,
+      &stpmx, &iter, &ifun, &iback, &nfgv, &info, task, &boxed, &cnstnd,
+      csave, &isave[22], &dsave[17]);
+  if (info != 0 || iback >= 20) {
+    /* restore the previous iterate. */
+    dcopy_(n, &t[1], &c__1, &x[1], &c__1);
+    dcopy_(n, &r__[1], &c__1, &g[1], &c__1);
+    *f = fold;
+    if (col == 0) {
+      /* abnormal termination. */
+      if (info == 0) {
+        info = -9;
+        /* restore the actual number of f and g evaluations etc. */
+        --nfgv;
+        --ifun;
+        --iback;
+      }
+      strcpy(task, "ABNORMAL_TERMINATION_IN_LNSRCH");
+      ++iter;
+      goto L999;
+    } else {
+      /* refresh the lbfgs memory and restart the iteration. */
+      if (*iprint >= 1) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Bad direction in the line search;\n");
+        fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+      }
+      if (info == 0) {
+        --nfgv;
+      }
+      info = 0;
+      col = 0;
+      head = 1;
+      theta = 1.;
+      iupdat = 0;
+      updatd = FALSE_;
+      strcpy(task, "RESTART_FROM_LNSRCH");
+      timer_(&cpu2);
+      lnscht = lnscht + cpu2 - cpu1;
+      goto L222;
+    }
+  } else if (strncmp(task, "FG_LN", 5) == 0) {
+    /* return to the driver for calculating f and g; reenter at 666. */
+    goto L1000;
+  } else {
+    /* calculate and print out the quantities related to the new X. */
+    timer_(&cpu2);
+    lnscht = lnscht + cpu2 - cpu1;
+    ++iter;
+    /* Compute the infinity norm of the projected (-)gradient. */
+    projgr_(n, &l[1], &u[1], &nbd[1], &x[1], &g[1], &sbgnrm);
+    /* Print iteration information. */
+    prn2lb_(n, &x[1], f, &g[1], iprint, &itfile, &iter, &nfgv, &nact,
+        &sbgnrm, &nseg, word, &iword, &iback, &stp, &xstep);
+    goto L1000;
+  }
+L777:
+  /* Test for termination. */
+  if (sbgnrm <= *pgtol) {
+    /* terminate the algorithm. */
+    strcpy(task, "CONVERGENCE: NORM_OF_PROJECTED_GRADIENT_<=_PGTOL");
+    goto L999;
+  }
+  d__1 = fabs(fold);
+  d__2 = fabs(*f);
+  d__1 = d__1 >= d__2 ? d__1 : d__2;
+  ddum = d__1 >= 1. ? d__1 : 1.;
+  if (fold - *f <= tol * ddum) {
+    /* terminate the algorithm. */
+    strcpy(task, "CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH");
+    if (iback >= 10) {
+      info = -5;
+    }
+    /*i.e., to issue a warning if iback>10 in the line search. */
+    goto L999;
+  }
+  /* Compute d=newx-oldx, r=newg-oldg, rr=y'y and dr=y's. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    r__[i__] = g[i__] - r__[i__];
+  }
+  rr = ddot_(n, &r__[1], &c__1, &r__[1], &c__1);
+  if (stp == 1.) {
+    dr = gd - gdold;
+    ddum = -gdold;
+  } else {
+    dr = (gd - gdold) * stp;
+    dscal_(n, &stp, &d__[1], &c__1);
+    ddum = -gdold * stp;
+  }
+  if (dr <= epsmch * ddum) {
+    /* skip the L-BFGS update. */
+    ++nskip;
+    updatd = FALSE_;
+    if (*iprint >= 1) {
+      fprintf(stdout, "  ys=%10.3E  -gs=%10.3E BFGS update SKIPPED\n", dr, ddum);
+    }
+    goto L888;
+  }
+  /**
+   * Update the L-BFGS matrix.
+   */
+  updatd = TRUE_;
+  ++iupdat;
+  /* Update matrices WS and WY and form the middle matrix in B. */
+  matupd_(n, m, &ws[ws_offset], &wy[wy_offset], &sy[sy_offset],
+      &ss[ss_offset], &d__[1], &r__[1], &itail, &iupdat, &col, &head,
+      &theta, &rr, &dr, &stp, &dtd);
+  /* Form the upper half of the pds T = theta*SS + L*D^(-1)*L'; */
+  /*    Store T in the upper triangular of the array wt; */
+  /*    Cholesky factorize T to J*J' with */
+  /*       J' stored in the upper triangular of wt. */
+  formt_(m, &wt[wt_offset], &sy[sy_offset], &ss[ss_offset], &col, &theta, &info);
+  if (info != 0) {
+    /* nonpositive definiteness in Cholesky factorization; */
+    /* refresh the lbfgs memory and restart the iteration. */
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Nonpositive definiteness in Cholesky factorization in formt;\n");
+      fprintf(stdout, "   refresh the lbfgs memory and restart the iteration.\n");
+    }
+    info = 0;
+    col = 0;
+    head = 1;
+    theta = 1.;
+    iupdat = 0;
+    updatd = FALSE_;
+    goto L222;
+  }
+  /* Now the inverse of the middle matrix in B is */
+  /*   [  D^(1/2)      O ] [ -D^(1/2)  D^(-1/2)*L' ] */
+  /*   [ -L*D^(-1/2)   J ] [  0        J'          ] */
+L888:
+  /* -------------------- the end of the loop ----------------------------- */
+  goto L222;
+L999:
+  timer_(&time2);
+  time = time2 - time1;
+  prn3lb_(n, &x[1], f, task, iprint, &info, &itfile, &iter, &nfgv, &nintol,
+    &nskip, &nact, &sbgnrm, &time, &nseg, word, &iback, &stp, &xstep,
+    &k, &cachyt, &sbtime, &lnscht);
+L1000:
+  /* Save local variables. */
+  lsave[1] = prjctd;
+  lsave[2] = cnstnd;
+  lsave[3] = boxed;
+  lsave[4] = updatd;
+  isave[1] = nintol;
+  isave[3] = itfile;
+  isave[4] = iback;
+  isave[5] = nskip;
+  isave[6] = head;
+  isave[7] = col;
+  isave[8] = itail;
+  isave[9] = iter;
+  isave[10] = iupdat;
+  isave[12] = nseg;
+  isave[13] = nfgv;
+  isave[14] = info;
+  isave[15] = ifun;
+  isave[16] = iword;
+  isave[17] = nfree;
+  isave[18] = nact;
+  isave[19] = ileave;
+  isave[20] = nenter;
+  dsave[1] = theta;
+  dsave[2] = fold;
+  dsave[3] = tol;
+  dsave[4] = dnorm;
+  dsave[5] = epsmch;
+  dsave[6] = cpu1;
+  dsave[7] = cachyt;
+  dsave[8] = sbtime;
+  dsave[9] = lnscht;
+  dsave[10] = time1;
+  dsave[11] = gd;
+  dsave[12] = stpmx;
+  dsave[13] = sbgnrm;
+  dsave[14] = stp;
+  dsave[15] = gdold;
+  dsave[16] = dtd;
+  return 0;
+}
+/**
+ * Subroutine active
+ *
+ *     This subroutine initializes iwhere and projects the initial x to
+ *       the feasible set if necessary.
+ *
+ *     iwhere is an long array of dimension n.
+ *       On entry iwhere is unspecified.
+ *       On exit iwhere(i)=-1  if x(i) has no bounds
+ *                         3   if l(i)=u(i)
+ *                         0   otherwise.
+ *       In cauchy, iwhere is given finer gradations.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int active_(long *n, double *l, double *u,
+  long *nbd, double *x, long *iwhere, long *iprint,
+  long *prjctd, long *cnstnd, long *boxed)
+{
+  long i__1;
+  static long i__, nbdd;
+  --iwhere;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  /* Initialize nbdd, prjctd, cnstnd and boxed. */
+  nbdd = 0;
+  *prjctd = FALSE_;
+  *cnstnd = FALSE_;
+  *boxed = TRUE_;
+  /* Project the initial x to the easible set if necessary. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (nbd[i__] > 0) {
+      if (nbd[i__] <= 2 && x[i__] <= l[i__]) {
+        if (x[i__] < l[i__]) {
+          *prjctd = TRUE_;
+          x[i__] = l[i__];
+        }
+        ++nbdd;
+      } else if (nbd[i__] >= 2 && x[i__] >= u[i__]) {
+        if (x[i__] > u[i__]) {
+          *prjctd = TRUE_;
+          x[i__] = u[i__];
+        }
+        ++nbdd;
+      }
+    }
+  }
+  /* Initialize iwhere and assign values to cnstnd and boxed. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (nbd[i__] != 2) {
+      *boxed = FALSE_;
+    }
+    if (nbd[i__] == 0) {
+      /* this variable is always free */
+      iwhere[i__] = -1;
+    /* otherwise set x(i)=mid(x(i), u(i), l(i)). */
+    } else {
+      *cnstnd = TRUE_;
+      if (nbd[i__] == 2 && u[i__] - l[i__] <= 0.) {
+        /* this variable is always fixed */
+        iwhere[i__] = 3;
+      } else {
+        iwhere[i__] = 0;
+      }
+    }
+  }
+  if (*iprint >= 0) {
+    if (*prjctd) {
+      fprintf(stdout, " The initial X is infeasible.  Restart with its projection.\n");
+    }
+    if (! (*cnstnd)) {
+      fprintf(stdout, " This problem is unconstrained.\n");
+    }
+  }
+  if (*iprint > 0) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "At X0 %9ld variables are exactly at the bounds\n", nbdd);
+  }
+  return 0;
+}
+/**
+ * Subroutine bmv
+ *
+ *     This subroutine computes the product of the 2m x 2m middle matrix
+ *       in the compact L-BFGS formula of B and a 2m vector v;
+ *       it returns the product in p.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     sy is a double precision array of dimension m x m.
+ *       On entry sy specifies the matrix S'Y.
+ *       On exit sy is unchanged.
+ *
+ *     wt is a double precision array of dimension m x m.
+ *       On entry wt specifies the upper triangular matrix J' which is
+ *         the Cholesky factor of (thetaS'S+LD^(-1)L').
+ *       On exit wt is unchanged.
+ *
+ *     col is an long variable.
+ *       On entry col specifies the number of s-vectors (or y-vectors)
+ *         stored in the compact L-BFGS formula.
+ *       On exit col is unchanged.
+ *
+ *     v is a double precision array of dimension 2col.
+ *       On entry v specifies vector v.
+ *       On exit v is unchanged.
+ *
+ *     p is a double precision array of dimension 2col.
+ *       On entry p is unspecified.
+ *       On exit p is the product Mv.
+ *
+ *     info is an long variable.
+ *       On entry info is unspecified.
+ *       On exit info = 0       for normal return,
+ *                    = nonzero for abnormal return when the system
+ *                                to be solved by dtrsl is singular.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dtrsl.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int bmv_(long *m, double *sy, double *wt, long
+  *col, double *v, double *p, long *info)
+{
+  long sy_dim1, sy_offset, wt_dim1, wt_offset, i__1, i__2;
+  static long i__, k, i2;
+  static double sum;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  --p;
+  --v;
+  if (*col == 0) {
+    return 0;
+  }
+  /* PART I: solve [  D^(1/2)      O ] [ p1 ] = [ v1 ] */
+  /*               [ -L*D^(-1/2)   J ] [ p2 ]   [ v2 ]. */
+  /*   solve Jp2=v2+LD^(-1)v1. */
+  p[*col + 1] = v[*col + 1];
+  i__1 = *col;
+  for (i__ = 2; i__ <= i__1; ++i__) {
+    i2 = *col + i__;
+    sum = 0.;
+    i__2 = i__ - 1;
+    for (k = 1; k <= i__2; ++k) {
+      sum += sy[i__ + k * sy_dim1] * v[k] / sy[k + k * sy_dim1];
+    }
+    p[i2] = v[i2] + sum;
+  }
+  /* Solve the triangular system */
+  dtrsl_(&wt[wt_offset], m, col, &p[*col + 1], &c__11, info);
+  if (*info != 0) {
+    return 0;
+  }
+  /* solve D^(1/2)p1=v1. */
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    p[i__] = v[i__] / sqrt(sy[i__ + i__ * sy_dim1]);
+  }
+  /* PART II: solve [ -D^(1/2)   D^(-1/2)*L'  ] [ p1 ] = [ p1 ] */
+  /*                [  0         J'           ] [ p2 ]   [ p2 ]. */
+  /*   solve J^Tp2=p2. */
+  dtrsl_(&wt[wt_offset], m, col, &p[*col + 1], &c__1, info);
+  if (*info != 0) {
+    return 0;
+  }
+  /* compute p1=-D^(-1/2)(p1-D^(-1/2)L'p2) */
+  /*           =-D^(-1/2)p1+D^(-1)L'p2. */
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    p[i__] = -p[i__] / sqrt(sy[i__ + i__ * sy_dim1]);
+  }
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    sum = 0.;
+    i__2 = *col;
+    for (k = i__ + 1; k <= i__2; ++k) {
+      sum += sy[k + i__ * sy_dim1] * p[*col + k] / sy[i__ + i__ * sy_dim1];
+    }
+    p[i__] += sum;
+  }
+  return 0;
+}
+/**
+ * Subroutine cauchy
+ *
+ *     For given x, l, u, g (with sbgnrm > 0), and a limited memory
+ *       BFGS matrix B defined in terms of matrices WY, WS, WT, and
+ *       scalars head, col, and theta, this subroutine computes the
+ *       generalized Cauchy point (GCP), defined as the first local
+ *       minimizer of the quadratic
+ *
+ *                  Q(x + s) = g's + 1/2 s'Bs
+ *
+ *       along the projected gradient direction P(x-tg,l,u).
+ *       The routine returns the GCP in xcp.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x is the starting point for the GCP computation.
+ *       On exit x is unchanged.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound of x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound of x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is an long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds, and
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     g is a double precision array of dimension n.
+ *       On entry g is the gradient of f(x).  g must be a nonzero vector.
+ *       On exit g is unchanged.
+ *
+ *     iorder is an long working array of dimension n.
+ *       iorder will be used to store the breakpoints in the piecewise
+ *       linear path and free variables encountered. On exit,
+ *         iorder(1),...,iorder(nleft) are indices of breakpoints
+ *                                which have not been encountered;
+ *         iorder(nleft+1),...,iorder(nbreak) are indices of
+ *                                     encountered breakpoints; and
+ *         iorder(nfree),...,iorder(n) are indices of variables which
+ *                 have no bound constraits along the search direction.
+ *
+ *     iwhere is an long array of dimension n.
+ *       On entry iwhere indicates only the permanently fixed (iwhere=3)
+ *       or free (iwhere= -1) components of x.
+ *       On exit iwhere records the status of the current x variables.
+ *       iwhere(i)=-3  if x(i) is free and has bounds, but is not moved
+ *                 0   if x(i) is free and has bounds, and is moved
+ *                 1   if x(i) is fixed at l(i), and l(i) .ne. u(i)
+ *                 2   if x(i) is fixed at u(i), and u(i) .ne. l(i)
+ *                 3   if x(i) is always fixed, i.e.,  u(i)=x(i)=l(i)
+ *                 -1  if x(i) is always free, i.e., it has no bounds.
+ *
+ *     t is a double precision working array of dimension n.
+ *       t will be used to store the break points.
+ *
+ *     d is a double precision array of dimension n used to store
+ *       the Cauchy direction P(x-tg)-x.
+ *
+ *     xcp is a double precision array of dimension n used to return the
+ *       GCP on exit.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     ws, wy, sy, and wt are double precision arrays.
+ *       On entry they store information that defines the
+ *                             limited memory BFGS matrix:
+ *         ws(n,m) stores S, a set of s-vectors;
+ *         wy(n,m) stores Y, a set of y-vectors;
+ *         sy(m,m) stores S'Y;
+ *         wt(m,m) stores the
+ *                 Cholesky factorization of (theta*S'S+LD^(-1)L').
+ *       On exit these arrays are unchanged.
+ *
+ *     theta is a double precision variable.
+ *       On entry theta is the scaling factor specifying B_0 = theta I.
+ *       On exit theta is unchanged.
+ *
+ *     col is an long variable.
+ *       On entry col is the actual number of variable metric
+ *         corrections stored so far.
+ *       On exit col is unchanged.
+ *
+ *     head is an long variable.
+ *       On entry head is the location of the first s-vector (or y-vector)
+ *         in S (or Y).
+ *       On exit col is unchanged.
+ *
+ *     p is a double precision working array of dimension 2m.
+ *       p will be used to store the vector p = W^(T)d.
+ *
+ *     c is a double precision working array of dimension 2m.
+ *       c will be used to store the vector c = W^(T)(xcp-x).
+ *
+ *     wbp is a double precision working array of dimension 2m.
+ *       wbp will be used to store the row of W corresponding
+ *         to a breakpoint.
+ *
+ *     v is a double precision working array of dimension 2m.
+ *
+ *     nseg is an long variable.
+ *       On exit nseg records the number of quadratic segments explored
+ *         in searching for the GCP.
+ *
+ *     sg and yg are double precision arrays of dimension m.
+ *       On entry sg  and yg store S'g and Y'g correspondingly.
+ *       On exit they are unchanged.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     sbgnrm is a double precision variable.
+ *       On entry sbgnrm is the norm of the projected gradient at x.
+ *       On exit sbgnrm is unchanged.
+ *
+ *     info is an long variable.
+ *       On entry info is 0.
+ *       On exit info = 0       for normal return,
+ *                    = nonzero for abnormal return when the the system
+ *                              used in routine bmv is singular.
+ *
+ *     Subprograms called:
+ *
+ *       L-BFGS-B Library ... hpsolb, bmv.
+ *
+ *       Linpack ... dscal dcopy, daxpy.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: FORTRAN
+ *       Subroutines for Large Scale Bound Constrained Optimization''
+ *       Tech. Report, NAM-11, EECS Department, Northwestern University,
+ *       1994.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int cauchy_(long *n, double *x, double *l,
+  double *u, long *nbd, double *g, long *iorder, long *iwhere,
+  double *t, double *d__, double *xcp, long *m,
+  double *wy, double *ws, double *sy, double *wt,
+  double *theta, long *col, long *head, double *p,
+  double *c__, double *wbp, double *v, long *nseg,
+  long *iprint, double *sbgnrm, long *info, double *epsmch)
+{
+  long wy_dim1, wy_offset, ws_dim1, ws_offset, sy_dim1, sy_offset, wt_dim1, wt_offset, i__1, i__2;
+  double d__1;
+  static long i__, j;
+  static double f1, f2, dt, tj, tl, tu, tj0;
+  static long ibp;
+  static double dtm;
+  static double wmc, wmp, wmw;
+  static long col2;
+  static double dibp;
+  static long iter;
+  static double zibp, tsum, dibp2;
+  static long bnded;
+  static double neggi;
+  static long nfree;
+  static double bkmin;
+  static long nleft;
+  static double f2_org__;
+  static long nbreak, ibkmin;
+  static long pointr;
+  static long xlower, xupper;
+  --xcp;
+  --d__;
+  --t;
+  --iwhere;
+  --iorder;
+  --g;
+  --nbd;
+  --u;
+  --l;
+  --x;
+  --v;
+  --wbp;
+  --c__;
+  --p;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  /* Check the status of the variables, reset iwhere(i) if necessary; */
+  /*   compute the Cauchy direction d and the breakpoints t; initialize */
+  /*   the derivative f1 and the vector p = W'd (for theta = 1). */
+  if (*sbgnrm <= 0.) {
+    if (*iprint >= 0) {
+      fprintf(stdout, " Subgnorm = 0.  GCP = X.\n");
+    }
+    dcopy_(n, &x[1], &c__1, &xcp[1], &c__1);
+    return 0;
+  }
+  bnded = TRUE_;
+  nfree = *n + 1;
+  nbreak = 0;
+  ibkmin = 0;
+  bkmin = 0.;
+  col2 = *col << 1;
+  f1 = 0.;
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n---------------- CAUCHY entered-------------------\n\n");
+  }
+  /* We set p to zero and build it up as we determine d. */
+  i__1 = col2;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    p[i__] = 0.;
+  }
+  /* In the following loop we determine for each variable its bound */
+  /*    status and its breakpoint, and update p accordingly. */
+  /*    Smallest breakpoint is identified. */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    neggi = -g[i__];
+    if (iwhere[i__] != 3 && iwhere[i__] != -1) {
+      /* if x(i) is not a constant and has bounds, */
+      /* compute the difference between x(i) and its bounds. */
+      if (nbd[i__] <= 2) {
+        tl = x[i__] - l[i__];
+      }
+      if (nbd[i__] >= 2) {
+        tu = u[i__] - x[i__];
+      }
+      /* If a variable is close enough to a bound */
+      /*   we treat it as at bound. */
+      xlower = nbd[i__] <= 2 && tl <= 0.;
+      xupper = nbd[i__] >= 2 && tu <= 0.;
+      /* reset iwhere(i). */
+      iwhere[i__] = 0;
+      if (xlower) {
+        if (neggi <= 0.) {
+          iwhere[i__] = 1;
+        }
+      } else if (xupper) {
+        if (neggi >= 0.) {
+          iwhere[i__] = 2;
+        }
+      } else {
+        if (fabs(neggi) <= 0.) {
+          iwhere[i__] = -3;
+        }
+      }
+    }
+    pointr = *head;
+    if (iwhere[i__] != 0 && iwhere[i__] != -1) {
+      d__[i__] = 0.;
+    } else {
+      d__[i__] = neggi;
+      f1 -= neggi * neggi;
+      /* calculate p := p - W'e_i* (g_i). */
+      i__2 = *col;
+      for (j = 1; j <= i__2; ++j) {
+        p[j] += wy[i__ + pointr * wy_dim1] * neggi;
+        p[*col + j] += ws[i__ + pointr * ws_dim1] * neggi;
+        pointr = pointr % *m + 1;
+      }
+      if (nbd[i__] <= 2 && nbd[i__] != 0 && neggi < 0.) {
+        /* x(i) + d(i) is bounded; compute t(i). */
+        ++nbreak;
+        iorder[nbreak] = i__;
+        t[nbreak] = tl / (-neggi);
+        if (nbreak == 1 || t[nbreak] < bkmin) {
+          bkmin = t[nbreak];
+          ibkmin = nbreak;
+        }
+      } else if (nbd[i__] >= 2 && neggi > 0.) {
+        /* x(i) + d(i) is bounded; compute t(i). */
+        ++nbreak;
+        iorder[nbreak] = i__;
+        t[nbreak] = tu / neggi;
+        if (nbreak == 1 || t[nbreak] < bkmin) {
+          bkmin = t[nbreak];
+          ibkmin = nbreak;
+        }
+      } else {
+        /* x(i) + d(i) is not bounded. */
+        --nfree;
+        iorder[nfree] = i__;
+        if (fabs(neggi) > 0.) {
+          bnded = FALSE_;
+        }
+      }
+    }
+  }
+  /* The indices of the nonzero components of d are now stored */
+  /*   in iorder(1),...,iorder(nbreak) and iorder(nfree),...,iorder(n). */
+  /*   The smallest of the nbreak breakpoints is in t(ibkmin)=bkmin. */
+  if (*theta != 1.) {
+    /* complete the initialization of p for theta not= one. */
+    dscal_(col, theta, &p[*col + 1], &c__1);
+  }
+  /* Initialize GCP xcp = x. */
+  dcopy_(n, &x[1], &c__1, &xcp[1], &c__1);
+  if (nbreak == 0 && nfree == *n + 1) {
+    /* is a zero vector, return with the initial xcp as GCP. */
+    if (*iprint > 100) {
+      fprintf(stdout, "Cauchy X =  \n");
+      fprintf(stdout, "    ");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, " %11.4E", xcp[i__]);
+        if (i__ % 6 == 0) {
+          fprintf(stdout, "\n");
+          fprintf(stdout, "    ");
+        }
+      }
+      fprintf(stdout, "\n");
+    }
+    return 0;
+  }
+  /* Initialize c = W'(xcp - x) = 0. */
+  i__1 = col2;
+  for (j = 1; j <= i__1; ++j) {
+    c__[j] = 0.;
+  }
+  /* Initialize derivative f2. */
+  f2 = -(*theta) * f1;
+  f2_org__ = f2;
+  if (*col > 0) {
+    bmv_(m, &sy[sy_offset], &wt[wt_offset], col, &p[1], &v[1], info);
+    if (*info != 0) {
+      return 0;
+    }
+    f2 -= ddot_(&col2, &v[1], &c__1, &p[1], &c__1);
+  }
+  dtm = -f1 / f2;
+  tsum = 0.;
+  *nseg = 1;
+  if (*iprint >= 99) {
+    fprintf(stdout, " There are %3ld  breakpoints \n", nbreak);
+  }
+  /* If there are no breakpoints, locate the GCP and return. */
+  if (nbreak == 0) {
+    goto L888;
+  }
+  nleft = nbreak;
+  iter = 1;
+  tj = 0.;
+  /* ------------------- the beginning of the loop ------------------------- */
+L777:
+  /* Find the next smallest breakpoint; */
+  /*   compute dt = t(nleft) - t(nleft + 1). */
+  tj0 = tj;
+  if (iter == 1) {
+    /* Since we already have the smallest breakpoint we need not do */
+    /* heapsort yet. Often only one breakpoint is used and the */
+    /* cost of heapsort is avoided. */
+    tj = bkmin;
+    ibp = iorder[ibkmin];
+  } else {
+    if (iter == 2) {
+      /* Replace the already used smallest breakpoint with the */
+      /* breakpoint numbered nbreak > nlast, before heapsort call. */
+      if (ibkmin != nbreak) {
+        t[ibkmin] = t[nbreak];
+        iorder[ibkmin] = iorder[nbreak];
+      }
+      /* Update heap structure of breakpoints */
+      /*    (if iter=2, initialize heap). */
+    }
+    i__1 = iter - 2;
+    hpsolb_(&nleft, &t[1], &iorder[1], &i__1);
+    tj = t[nleft];
+    ibp = iorder[nleft];
+  }
+  dt = tj - tj0;
+  if (dt != 0. && *iprint >= 100) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "Piece    %3ld --f1, f2 at start point  %11.4E %11.4E\n", *nseg, f1, f2);
+    fprintf(stdout, "Distance to the next break point =  %11.4E\n", dt);
+    fprintf(stdout, "Distance to the stationary point =  %11.4E\n", dtm);
+  }
+  /* If a minimizer is within this interval, locate the GCP and return. */
+  if (dtm < dt) {
+    goto L888;
+  }
+  /* Otherwise fix one variable and */
+  /*   reset the corresponding component of d to zero. */
+  tsum += dt;
+  --nleft;
+  ++iter;
+  dibp = d__[ibp];
+  d__[ibp] = 0.;
+  if (dibp > 0.) {
+    zibp = u[ibp] - x[ibp];
+    xcp[ibp] = u[ibp];
+    iwhere[ibp] = 2;
+  } else {
+    zibp = l[ibp] - x[ibp];
+    xcp[ibp] = l[ibp];
+    iwhere[ibp] = 1;
+  }
+  if (*iprint >= 100) {
+    fprintf(stdout, " Variable   %ld  is fixed.\n", ibp);
+  }
+  if (nleft == 0 && nbreak == *n) {
+    /* all n variables are fixed, */
+    /*    return with xcp as GCP. */
+    dtm = dt;
+    goto L999;
+  }
+  /* Update the derivative information. */
+  ++(*nseg);
+  /* Computing 2nd power */
+  d__1 = dibp;
+  dibp2 = d__1 * d__1;
+  /* Update f1 and f2. */
+  /*    temporarily set f1 and f2 for col=0. */
+  f1 = f1 + dt * f2 + dibp2 - *theta * dibp * zibp;
+  f2 -= *theta * dibp2;
+  if (*col > 0) {
+    /* update c = c + dt*p. */
+    daxpy_(&col2, &dt, &p[1], &c__1, &c__[1], &c__1);
+    /* choose wbp, */
+    /* the row of W corresponding to the breakpoint encountered. */
+    pointr = *head;
+    i__1 = *col;
+    for (j = 1; j <= i__1; ++j) {
+      wbp[j] = wy[ibp + pointr * wy_dim1];
+      wbp[*col + j] = *theta * ws[ibp + pointr * ws_dim1];
+      pointr = pointr % *m + 1;
+    }
+    /* compute (wbp)Mc, (wbp)Mp, and (wbp)M(wbp)'. */
+    bmv_(m, &sy[sy_offset], &wt[wt_offset], col, &wbp[1], &v[1], info);
+    if (*info != 0) {
+      return 0;
+    }
+    wmc = ddot_(&col2, &c__[1], &c__1, &v[1], &c__1);
+    wmp = ddot_(&col2, &p[1], &c__1, &v[1], &c__1);
+    wmw = ddot_(&col2, &wbp[1], &c__1, &v[1], &c__1);
+    /* update p = p - dibp*wbp. */
+    d__1 = -dibp;
+    daxpy_(&col2, &d__1, &wbp[1], &c__1, &p[1], &c__1);
+    /* complete updating f1 and f2 while col > 0. */
+    f1 += dibp * wmc;
+    f2 = f2 + dibp * 2. * wmp - dibp2 * wmw;
+  }
+  d__1 = *epsmch * f2_org__;
+  f2 = d__1 > f2 ? d__1 : f2;
+  if (nleft > 0) {
+    dtm = -f1 / f2;
+    goto L777;
+    /* to repeat the loop for unsearched intervals. */
+  } else if (bnded) {
+    f1 = 0.;
+    f2 = 0.;
+    dtm = 0.;
+  } else {
+    dtm = -f1 / f2;
+  }
+  /* ------------------- the end of the loop ------------------------------- */
+L888:
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, " GCP found in this segment\n");
+    fprintf(stdout, "Piece    %3ld --f1, f2 at start point  %11.4E %11.4E\n", *nseg, f1, f2);
+    fprintf(stdout, "Distance to the stationary point =  %11.4E\n", dtm);
+  }
+  if (dtm <= 0.) {
+    dtm = 0.;
+  }
+  tsum += dtm;
+  /* Move free variables (i.e., the ones w/o breakpoints) and */
+  /*   the variables whose breakpoints haven't been reached. */
+  daxpy_(n, &tsum, &d__[1], &c__1, &xcp[1], &c__1);
+L999:
+  /* Update c = c + dtm*p = W'(x^c - x) */
+  /*   which will be used in computing r = Z'(B(x^c - x) + g). */
+  if (*col > 0) {
+    daxpy_(&col2, &dtm, &p[1], &c__1, &c__[1], &c__1);
+  }
+  if (*iprint > 100) {
+    fprintf(stdout, "Cauchy X =  \n");
+    fprintf(stdout, "    ");
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      fprintf(stdout, " %11.4E", xcp[i__]);
+      if (i__ % 6 == 0) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, "    ");
+      }
+    }
+    fprintf(stdout, "\n");
+  }
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n---------------- exit CAUCHY----------------------\n\n");
+  }
+  return 0;
+}
+/**
+ * Subroutine cmprlb
+ *
+ *       This subroutine computes r=-Z'B(xcp-xk)-Z'g by using
+ *         wa(2m+1)=W'(xcp-x) from subroutine cauchy.
+ *
+ *     Subprograms called:
+ *
+ *       L-BFGS-B Library ... bmv.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int cmprlb_(long *n, long *m, double *x,
+  double *g, double *ws, double *wy, double *sy,
+  double *wt, double *z__, double *r__, double *wa,
+  long *index, double *theta, long *col, long *head,
+  long *nfree, long *cnstnd, long *info)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset, wt_dim1, wt_offset, i__1, i__2;
+  static long i__, j, k;
+  static double a1, a2;
+  static long pointr;
+  --index;
+  --r__;
+  --z__;
+  --g;
+  --x;
+  --wa;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  if (! (*cnstnd) && *col > 0) {
+    i__1 = *n;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      r__[i__] = -g[i__];
+    }
+  } else {
+    i__1 = *nfree;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      k = index[i__];
+      r__[i__] = -(*theta) * (z__[k] - x[k]) - g[k];
+    }
+    bmv_(m, &sy[sy_offset], &wt[wt_offset], col, &wa[(*m << 1) + 1], &wa[1], info);
+    if (*info != 0) {
+      *info = -8;
+      return 0;
+    }
+    pointr = *head;
+    i__1 = *col;
+    for (j = 1; j <= i__1; ++j) {
+      a1 = wa[j];
+      a2 = *theta * wa[*col + j];
+      i__2 = *nfree;
+      for (i__ = 1; i__ <= i__2; ++i__) {
+        k = index[i__];
+        r__[i__] = r__[i__] + wy[k + pointr * wy_dim1] * a1 + ws[k + pointr * ws_dim1] * a2;
+      }
+      pointr = pointr % *m + 1;
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine errclb
+ *
+ *    This subroutine checks the validity of the input data.
+ *
+ *                          *  *  *
+ *
+ *    NEOS, November 1994. (Latest revision June 1996.)
+ *    Optimization Technology Center.
+ *    Argonne National Laboratory and Northwestern University.
+ *    Written by
+ *                       Ciyou Zhu
+ *    in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int errclb_(long *n, long *m, double *factr,
+  double *l, double *u, long *nbd, char *task, long *info, long *k)
+{
+  long i__1;
+  static long i__;
+  --nbd;
+  --u;
+  --l;
+  /* Check the input arguments for errors. */
+  if (*n <= 0) {
+    strcpy(task, "ERROR: N .LE. 0");
+  }
+  if (*m <= 0) {
+    strcpy(task, "ERROR: M .LE. 0");
+  }
+  if (*factr < 0.) {
+    strcpy(task, "ERROR: FACTR .LT. 0");
+  }
+  /* Check the validity of the arrays nbd(i), u(i), and l(i). */
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (nbd[i__] < 0 || nbd[i__] > 3) {
+      /* return */
+      strcpy(task, "ERROR: INVALID NBD");
+      *info = -6;
+      *k = i__;
+    }
+    if (nbd[i__] == 2) {
+      if (l[i__] > u[i__]) {
+        /* return */
+        strcpy(task, "ERROR: NO FEASIBLE SOLUTION");
+        *info = -7;
+        *k = i__;
+      }
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine formk
+ *
+ *     This subroutine forms  the LEL^T factorization of the indefinite
+ *
+ *       matrix    K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *                                                    where E = [-I  0]
+ *                                                              [ 0  I]
+ *     The matrix K can be shown to be equal to the matrix M^[-1]N
+ *       occurring in section 5.1 of [1], as well as to the matrix
+ *       Mbar^[-1] Nbar in section 5.3.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     nsub is an long variable
+ *       On entry nsub is the number of subspace variables in free set.
+ *       On exit nsub is not changed.
+ *
+ *     ind is an long array of dimension nsub.
+ *       On entry ind specifies the indices of subspace variables.
+ *       On exit ind is unchanged.
+ *
+ *     nenter is an long variable.
+ *       On entry nenter is the number of variables entering the
+ *         free set.
+ *       On exit nenter is unchanged.
+ *
+ *     ileave is an long variable.
+ *       On entry indx2(ileave),...,indx2(n) are the variables leaving
+ *         the free set.
+ *       On exit ileave is unchanged.
+ *
+ *     indx2 is an long array of dimension n.
+ *       On entry indx2(1),...,indx2(nenter) are the variables entering
+ *         the free set, while indx2(ileave),...,indx2(n) are the
+ *         variables leaving the free set.
+ *       On exit indx2 is unchanged.
+ *
+ *     iupdat is an long variable.
+ *       On entry iupdat is the total number of BFGS updates made so far.
+ *       On exit iupdat is unchanged.
+ *
+ *     updatd is a logical variable.
+ *       On entry 'updatd' is true if the L-BFGS matrix is updatd.
+ *       On exit 'updatd' is unchanged.
+ *
+ *     wn is a double precision array of dimension 2m x 2m.
+ *       On entry wn is unspecified.
+ *       On exit the upper triangle of wn stores the LEL^T factorization
+ *         of the 2*col x 2*col indefinite matrix
+ *                     [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *
+ *     wn1 is a double precision array of dimension 2m x 2m.
+ *       On entry wn1 stores the lower triangular part of
+ *                     [Y' ZZ'Y   L_a'+R_z']
+ *                     [L_a+R_z   S'AA'S   ]
+ *         in the previous iteration.
+ *       On exit wn1 stores the corresponding updated matrices.
+ *       The purpose of wn1 is just to store these inner products
+ *       so they can be easily updated and inserted into wn.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     ws, wy, sy, and wtyy are double precision arrays;
+ *     theta is a double precision variable;
+ *     col is an long variable;
+ *     head is an long variable.
+ *       On entry they store the information defining the
+ *                                          limited memory BFGS matrix:
+ *         ws(n,m) stores S, a set of s-vectors;
+ *         wy(n,m) stores Y, a set of y-vectors;
+ *         sy(m,m) stores S'Y;
+ *         wtyy(m,m) stores the Cholesky factorization
+ *                                   of (theta*S'S+LD^(-1)L')
+ *         theta is the scaling factor specifying B_0 = theta I;
+ *         col is the number of variable metric corrections stored;
+ *         head is the location of the 1st s- (or y-) vector in S (or Y).
+ *       On exit they are unchanged.
+ *
+ *     info is an long variable.
+ *       On entry info is unspecified.
+ *       On exit info =  0 for normal return;
+ *                    = -1 when the 1st Cholesky factorization failed;
+ *                    = -2 when the 2st Cholesky factorization failed.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dcopy, dpofa, dtrsl.
+ *
+ *
+ *     References:
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *       [2] C. Zhu, R.H. Byrd, P. Lu, J. Nocedal, ``L-BFGS-B: a
+ *       limited memory FORTRAN code for solving bound constrained
+ *       optimization problems'', Tech. Report, NAM-11, EECS Department,
+ *       Northwestern University, 1994.
+ *
+ *       (Postscript files of these papers are available via anonymous
+ *        ftp to eecs.nwu.edu in the directory pub/lbfgs/lbfgs_bcm.)
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int formk_(long *n, long *nsub, long *ind, long *nenter,
+  long *ileave, long *indx2, long *iupdat, long *updatd,
+  double *wn, double *wn1, long *m, double *ws,
+  double *wy, double *sy, double *theta, long *col,
+  long *head, long *info)
+{
+  long wn_dim1, wn_offset, wn1_dim1, wn1_offset, ws_dim1, ws_offset,
+    wy_dim1, wy_offset, sy_dim1, sy_offset, i__1, i__2, i__3;
+  static long i__, k, k1, m2, is, js, iy, jy, is1, js1, col2, dend, pend;
+  static long upcl;
+  static double temp1, temp2, temp3, temp4;
+  static long ipntr, jpntr, dbegin, pbegin;
+  --indx2;
+  --ind;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  wn1_dim1 = 2 * *m;
+  wn1_offset = 1 + wn1_dim1;
+  wn1 -= wn1_offset;
+  wn_dim1 = 2 * *m;
+  wn_offset = 1 + wn_dim1;
+  wn -= wn_offset;
+  /* Form the lower triangular part of */
+  /*           WN1 = [Y' ZZ'Y   L_a'+R_z'] */
+  /*                 [L_a+R_z   S'AA'S   ] */
+  /*    where L_a is the strictly lower triangular part of S'AA'Y */
+  /*          R_z is the upper triangular part of S'ZZ'Y. */
+  if (*updatd) {
+    if (*iupdat > *m) {
+      /* shift old part of WN1. */
+      i__1 = *m - 1;
+      for (jy = 1; jy <= i__1; ++jy) {
+        js = *m + jy;
+        i__2 = *m - jy;
+        dcopy_(&i__2, &wn1[jy + 1 + (jy + 1) * wn1_dim1], &c__1, &wn1[jy + jy * wn1_dim1], &c__1);
+        i__2 = *m - jy;
+        dcopy_(&i__2, &wn1[js + 1 + (js + 1) * wn1_dim1], &c__1, &wn1[js + js * wn1_dim1], &c__1);
+        i__2 = *m - 1;
+        dcopy_(&i__2, &wn1[*m + 2 + (jy + 1) * wn1_dim1], &c__1, &wn1[*m + 1 + jy * wn1_dim1], &c__1);
+      }
+    }
+    /* put new rows in blocks (1,1), (2,1) and (2,2). */
+    pbegin = 1;
+    pend = *nsub;
+    dbegin = *nsub + 1;
+    dend = *n;
+    iy = *col;
+    is = *m + *col;
+    ipntr = *head + *col - 1;
+    if (ipntr > *m) {
+      ipntr -= *m;
+    }
+    jpntr = *head;
+    i__1 = *col;
+    for (jy = 1; jy <= i__1; ++jy) {
+      js = *m + jy;
+      temp1 = 0.;
+      temp2 = 0.;
+      temp3 = 0.;
+      /* compute element jy of row 'col' of Y'ZZ'Y */
+      i__2 = pend;
+      for (k = pbegin; k <= i__2; ++k) {
+        k1 = ind[k];
+        temp1 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      /* compute elements jy of row 'col' of L_a and S'AA'S */
+      i__2 = dend;
+      for (k = dbegin; k <= i__2; ++k) {
+        k1 = ind[k];
+        temp2 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
+        temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      wn1[iy + jy * wn1_dim1] = temp1;
+      wn1[is + js * wn1_dim1] = temp2;
+      wn1[is + jy * wn1_dim1] = temp3;
+      jpntr = jpntr % *m + 1;
+    }
+    /* put new column in block (2,1). */
+    jy = *col;
+    jpntr = *head + *col - 1;
+    if (jpntr > *m) {
+      jpntr -= *m;
+    }
+    ipntr = *head;
+    i__1 = *col;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      is = *m + i__;
+      temp3 = 0.;
+      /* compute element i of column 'col' of R_z */
+      i__2 = pend;
+      for (k = pbegin; k <= i__2; ++k) {
+        k1 = ind[k];
+        temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      ipntr = ipntr % *m + 1;
+      wn1[is + jy * wn1_dim1] = temp3;
+    }
+    upcl = *col - 1;
+  } else {
+    upcl = *col;
+  }
+  /* modify the old parts in blocks (1,1) and (2,2) due to changes */
+  /* in the set of free variables. */
+  ipntr = *head;
+  i__1 = upcl;
+  for (iy = 1; iy <= i__1; ++iy) {
+    is = *m + iy;
+    jpntr = *head;
+    i__2 = iy;
+    for (jy = 1; jy <= i__2; ++jy) {
+      js = *m + jy;
+      temp1 = 0.;
+      temp2 = 0.;
+      temp3 = 0.;
+      temp4 = 0.;
+      i__3 = *nenter;
+      for (k = 1; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp1 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
+        temp2 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
+      }
+      i__3 = *n;
+      for (k = *ileave; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp3 += wy[k1 + ipntr * wy_dim1] * wy[k1 + jpntr * wy_dim1];
+        temp4 += ws[k1 + ipntr * ws_dim1] * ws[k1 + jpntr * ws_dim1];
+      }
+      wn1[iy + jy * wn1_dim1] = wn1[iy + jy * wn1_dim1] + temp1 - temp3;
+      wn1[is + js * wn1_dim1] = wn1[is + js * wn1_dim1] - temp2 + temp4;
+      jpntr = jpntr % *m + 1;
+    }
+    ipntr = ipntr % *m + 1;
+  }
+  /* modify the old parts in block (2,1). */
+  ipntr = *head;
+  i__1 = *m + upcl;
+  for (is = *m + 1; is <= i__1; ++is) {
+    jpntr = *head;
+    i__2 = upcl;
+    for (jy = 1; jy <= i__2; ++jy) {
+      temp1 = 0.;
+      temp3 = 0.;
+      i__3 = *nenter;
+      for (k = 1; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp1 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      i__3 = *n;
+      for (k = *ileave; k <= i__3; ++k) {
+        k1 = indx2[k];
+        temp3 += ws[k1 + ipntr * ws_dim1] * wy[k1 + jpntr * wy_dim1];
+      }
+      if (is <= jy + *m) {
+        wn1[is + jy * wn1_dim1] = wn1[is + jy * wn1_dim1] + temp1 - temp3;
+      } else {
+        wn1[is + jy * wn1_dim1] = wn1[is + jy * wn1_dim1] - temp1 + temp3;
+      }
+      jpntr = jpntr % *m + 1;
+    }
+    ipntr = ipntr % *m + 1;
+  }
+  /* Form the upper triangle of WN = [D+Y' ZZ'Y/theta   -L_a'+R_z' ] */
+  /*                                 [-L_a +R_z        S'AA'S*theta] */
+  m2 = *m << 1;
+  i__1 = *col;
+  for (iy = 1; iy <= i__1; ++iy) {
+    is = *col + iy;
+    is1 = *m + iy;
+    i__2 = iy;
+    for (jy = 1; jy <= i__2; ++jy) {
+      js = *col + jy;
+      js1 = *m + jy;
+      wn[jy + iy * wn_dim1] = wn1[iy + jy * wn1_dim1] / *theta;
+      wn[js + is * wn_dim1] = wn1[is1 + js1 * wn1_dim1] * *theta;
+    }
+    i__2 = iy - 1;
+    for (jy = 1; jy <= i__2; ++jy) {
+      wn[jy + is * wn_dim1] = -wn1[is1 + jy * wn1_dim1];
+    }
+    i__2 = *col;
+    for (jy = iy; jy <= i__2; ++jy) {
+      wn[jy + is * wn_dim1] = wn1[is1 + jy * wn1_dim1];
+    }
+    wn[iy + iy * wn_dim1] += sy[iy + iy * sy_dim1];
+  }
+  /* Form the upper triangle of WN= [  LL'            L^-1(-L_a'+R_z')] */
+  /*                                [(-L_a +R_z)L'^-1   S'AA'S*theta  ] */
+  /*    first Cholesky factor (1,1) block of wn to get LL' */
+  /*                      with L' stored in the upper triangle of wn. */
+  dpofa_(&wn[wn_offset], &m2, col, info);
+  if (*info != 0) {
+    *info = -1;
+    return 0;
+  }
+  /* then form L^-1(-L_a'+R_z') in the (1,2) block. */
+  col2 = *col << 1;
+  i__1 = col2;
+  for (js = *col + 1; js <= i__1; ++js) {
+    dtrsl_(&wn[wn_offset], &m2, col, &wn[js * wn_dim1 + 1], &c__11, info);
+  }
+  /* Form S'AA'S*theta + (L^-1(-L_a'+R_z'))'L^-1(-L_a'+R_z') in the */
+  /*    upper triangle of (2,2) block of wn. */
+  i__1 = col2;
+  for (is = *col + 1; is <= i__1; ++is) {
+    i__2 = col2;
+    for (js = is; js <= i__2; ++js) {
+      wn[is + js * wn_dim1] += ddot_(col, &wn[is * wn_dim1 + 1], &c__1, &wn[js * wn_dim1 + 1], &c__1);
+    }
+  }
+  /* Cholesky factorization of (2,2) block of wn. */
+  dpofa_(&wn[*col + 1 + (*col + 1) * wn_dim1], &m2, col, info);
+  if (*info != 0) {
+    *info = -2;
+    return 0;
+  }
+  return 0;
+}
+/**
+ * Subroutine formt
+ *
+ *       This subroutine forms the upper half of the pos. def. and symm.
+ *         T = theta*SS + L*D^(-1)*L', stores T in the upper triangle
+ *         of the array wt, and performs the Cholesky factorization of T
+ *         to produce J*J', with J' stored in the upper triangle of wt.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dpofa.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int formt_(long *m, double *wt, double *sy,
+  double *ss, long *col, double *theta, long *info)
+{
+  long wt_dim1, wt_offset, sy_dim1, sy_offset, ss_dim1, ss_offset, i__1, i__2, i__3;
+  static long i__, j, k, k1;
+  static double ddum;
+  ss_dim1 = *m;
+  ss_offset = 1 + ss_dim1;
+  ss -= ss_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wt_dim1 = *m;
+  wt_offset = 1 + wt_dim1;
+  wt -= wt_offset;
+  /* Form the upper half of  T = theta*SS + L*D^(-1)*L', */
+  /*    store T in the upper triangle of the array wt. */
+  i__1 = *col;
+  for (j = 1; j <= i__1; ++j) {
+    wt[j * wt_dim1 + 1] = *theta * ss[j * ss_dim1 + 1];
+  }
+  i__1 = *col;
+  for (i__ = 2; i__ <= i__1; ++i__) {
+    i__2 = *col;
+    for (j = i__; j <= i__2; ++j) {
+      k1 = (i__ <= j ? i__ : j) - 1;
+      ddum = 0.;
+      i__3 = k1;
+      for (k = 1; k <= i__3; ++k) {
+        ddum += sy[i__ + k * sy_dim1] * sy[j + k * sy_dim1] / sy[k + k * sy_dim1];
+      }
+      wt[i__ + j * wt_dim1] = ddum + *theta * ss[i__ + j * ss_dim1];
+    }
+  }
+  /* Cholesky factorize T to J*J' with */
+  /*    J' stored in the upper triangle of wt. */
+  dpofa_(&wt[wt_offset], m, col, info);
+  if (*info != 0) {
+    *info = -3;
+  }
+  return 0;
+}
+/**
+ * Subroutine freev
+ *
+ *     This subroutine counts the entering and leaving variables when
+ *       iter > 0, and finds the index set of free and active variables
+ *       at the GCP.
+ *
+ *     cnstnd is a logical variable indicating whether bounds are present
+ *
+ *     index is an long array of dimension n
+ *       for i=1,...,nfree, index(i) are the indices of free variables
+ *       for i=nfree+1,...,n, index(i) are the indices of bound variables
+ *       On entry after the first iteration, index gives
+ *         the free variables at the previous iteration.
+ *       On exit it gives the free variables based on the determination
+ *         in cauchy using the array iwhere.
+ *
+ *     indx2 is an long array of dimension n
+ *       On entry indx2 is unspecified.
+ *       On exit with iter>0, indx2 indicates which variables
+ *          have changed status since the previous iteration.
+ *       For i= 1,...,nenter, indx2(i) have changed from bound to free.
+ *       For i= ileave+1,...,n, indx2(i) have changed from free to bound.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int freev_(long *n, long *nfree, long *index,
+  long *nenter, long *ileave, long *indx2, long *iwhere,
+  long *wrk, long *updatd, long *cnstnd, long *iprint,
+  long *iter)
+{
+  long i__1;
+  static long i__, k, iact;
+  --iwhere;
+  --indx2;
+  --index;
+  *nenter = 0;
+  *ileave = *n + 1;
+  if (*iter > 0 && *cnstnd) {
+    /* count the entering and leaving variables. */
+    i__1 = *nfree;
+    for (i__ = 1; i__ <= i__1; ++i__) {
+      k = index[i__];
+      /* write(6,*) ' k  = index(i) ', k */
+      /* write(6,*) ' index = ', i */
+      if (iwhere[k] > 0) {
+        --(*ileave);
+        indx2[*ileave] = k;
+        if (*iprint >= 100) {
+          fprintf(stdout, " Variable %2ld leaves the set of free variables\n", k);
+        }
+      }
+    }
+    i__1 = *n;
+    for (i__ = *nfree + 1; i__ <= i__1; ++i__) {
+      k = index[i__];
+      if (iwhere[k] <= 0) {
+        ++(*nenter);
+        indx2[*nenter] = k;
+        if (*iprint >= 100) {
+          fprintf(stdout, " Variable %2ld enters the set of free variables\n", k);
+        }
+      }
+    }
+    if (*iprint >= 99) {
+      i__1 = *n + 1 - *ileave;
+      fprintf(stdout,  " %2ld variables leave; %2ld variables enter\n", i__1, *nenter);
+    }
+  }
+  *wrk = *ileave < *n + 1 || *nenter > 0 || *updatd;
+  /* Find the index set of free and active variables at the GCP. */
+  *nfree = 0;
+  iact = *n + 1;
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    if (iwhere[i__] <= 0) {
+      ++(*nfree);
+      index[*nfree] = i__;
+    } else {
+      --iact;
+      index[iact] = i__;
+    }
+  }
+  if (*iprint >= 99) {
+    i__1 = *iter + 1;
+    fprintf(stdout, " %2ld variables are free at GCP %3ld\n", *nfree, i__1);
+  }
+  return 0;
+}
+/**
+ * Subroutine hpsolb
+ *
+ *     This subroutine sorts out the least element of t, and puts the
+ *       remaining elements of t in a heap.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the arrays t and iorder.
+ *       On exit n is unchanged.
+ *
+ *     t is a double precision array of dimension n.
+ *       On entry t stores the elements to be sorted,
+ *       On exit t(n) stores the least elements of t, and t(1) to t(n-1)
+ *         stores the remaining elements in the form of a heap.
+ *
+ *     iorder is an long array of dimension n.
+ *       On entry iorder(i) is the index of t(i).
+ *       On exit iorder(i) is still the index of t(i), but iorder may be
+ *         permuted in accordance with t.
+ *
+ *     iheap is an long variable specifying the task.
+ *       On entry iheap should be set as follows:
+ *         iheap .eq. 0 if t(1) to t(n) is not in the form of a heap,
+ *         iheap .ne. 0 if otherwise.
+ *       On exit iheap is unchanged.
+ *
+ *
+ *     References:
+ *       Algorithm 232 of CACM (J. W. J. Williams): HEAPSORT.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int hpsolb_(long *n, double *t, long *iorder, long *iheap)
+{
+  long i__1;
+  static long i__, j, k;
+  static double out, ddum;
+  static long indxin, indxou;
+  --iorder;
+  --t;
+  if (*iheap == 0) {
+    /* Rearrange the elements t(1) to t(n) to form a heap. */
+    i__1 = *n;
+    for (k = 2; k <= i__1; ++k) {
+      ddum = t[k];
+      indxin = iorder[k];
+      /* Add ddum to the heap. */
+      i__ = k;
+L10:
+      if (i__ > 1) {
+        j = i__ / 2;
+        if (ddum < t[j]) {
+          t[i__] = t[j];
+          iorder[i__] = iorder[j];
+          i__ = j;
+          goto L10;
+        }
+      }
+      t[i__] = ddum;
+      iorder[i__] = indxin;
+    }
+  }
+  /* Assign to 'out' the value of t(1), the least member of the heap, */
+  /* and rearrange the remaining members to form a heap as */
+  /* elements 1 to n-1 of t. */
+  if (*n > 1) {
+    i__ = 1;
+    out = t[1];
+    indxou = iorder[1];
+    ddum = t[*n];
+    indxin = iorder[*n];
+    /* Restore the heap */
+L30:
+    j = i__ + i__;
+    if (j <= *n - 1) {
+      if (t[j + 1] < t[j]) {
+        ++j;
+      }
+      if (t[j] < ddum) {
+        t[i__] = t[j];
+        iorder[i__] = iorder[j];
+        i__ = j;
+        goto L30;
+      }
+    }
+    t[i__] = ddum;
+    iorder[i__] = indxin;
+    /* Put the least member in t(n). */
+    t[*n] = out;
+    iorder[*n] = indxou;
+  }
+  return 0;
+}
+/**
+ * Subroutine lnsrlb
+ *
+ *     This subroutine calls subroutine dcsrch from the Minpack2 library
+ *       to perform the line search.  Subroutine dscrch is safeguarded so
+ *       that all trial points lie within the feasible region.
+ *
+ *     Subprograms called:
+ *
+ *       Minpack2 Library ... dcsrch.
+ *
+ *       Linpack ... dtrsl, ddot.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int lnsrlb_(long *n, double *l, double *u,
+  long *nbd, double *x, double *f, double *fold,
+  double *gd, double *gdold, double *g, double *d__,
+  double *r__, double *t, double *z__, double *stp,
+  double *dnorm, double *dtd, double *xstep, double *stpmx,
+  long *iter, long *ifun, long *iback, long *nfgv,
+  long *info, char *task, long *boxed, long *cnstnd,
+  char *csave, long *isave, double *dsave)
+{
+  long i__1;
+  double d__1;
+  static long i__;
+  static double a1, a2;
+  --z__;
+  --t;
+  --r__;
+  --d__;
+  --g;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  --isave;
+  --dsave;
+  if (strncmp(task, "FG_LN", 5) == 0) {
+    goto L556;
+  }
+  *dtd = ddot_(n, &d__[1], &c__1, &d__[1], &c__1);
+  *dnorm = sqrt(*dtd);
+  /* Determine the maximum step length. */
+  *stpmx = 1e10;
+  if (*cnstnd) {
+    if (*iter == 0) {
+      *stpmx = 1.;
+    } else {
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        a1 = d__[i__];
+        if (nbd[i__] != 0) {
+          if (a1 < 0. && nbd[i__] <= 2) {
+            a2 = l[i__] - x[i__];
+            if (a2 >= 0.) {
+              *stpmx = 0.;
+            } else if (a1 * *stpmx < a2) {
+              *stpmx = a2 / a1;
+            }
+          } else if (a1 > 0. && nbd[i__] >= 2) {
+            a2 = u[i__] - x[i__];
+            if (a2 <= 0.) {
+              *stpmx = 0.;
+            } else if (a1 * *stpmx > a2) {
+              *stpmx = a2 / a1;
+            }
+          }
+        }
+      }
+    }
+  }
+  if (*iter == 0 && ! (*boxed)) {
+    d__1 = 1. / *dnorm;
+    *stp = d__1 <= *stpmx ? d__1 : *stpmx;
+  } else {
+    *stp = 1.;
+  }
+  dcopy_(n, &x[1], &c__1, &t[1], &c__1);
+  dcopy_(n, &g[1], &c__1, &r__[1], &c__1);
+  *fold = *f;
+  *ifun = 0;
+  *iback = 0;
+  strcpy(csave, "START");
+L556:
+  *gd = ddot_(n, &g[1], &c__1, &d__[1], &c__1);
+  if (*ifun == 0) {
+    *gdold = *gd;
+    if (*gd >= 0.) {
+      /* the directional derivative >=0. */
+      /* Line search is impossible. */
+      fprintf(stdout, "  ascent direction in projection gd =  %.8E\n", *gd);
+      *info = -4;
+      return 0;
+    }
+  }
+  dcsrch_(f, gd, stp, &c_b280, &c_b281, &c_b282, &c_b9, stpmx, csave, &isave[1], &dsave[1]);
+  *xstep = *stp * *dnorm;
+  if (strncmp(csave, "CONV", 4) != 0 && strncmp(csave, "WARN", 4) != 0) {
+    strcpy(task, "FG_LNSRCH");
+    ++(*ifun);
+    ++(*nfgv);
+    *iback = *ifun - 1;
+    if (*stp == 1.) {
+      dcopy_(n, &z__[1], &c__1, &x[1], &c__1);
+    } else {
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        x[i__] = *stp * d__[i__] + t[i__];
+      }
+    }
+  } else {
+    strcpy(task, "NEW_X");
+  }
+  return 0;
+}
+/**
+ * Subroutine matupd
+ *
+ *       This subroutine updates matrices WS and WY, and forms the
+ *         middle matrix in B.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack ... dcopy, ddot.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int matupd_(long *n, long *m, double *ws,
+  double *wy, double *sy, double *ss, double *d__,
+  double *r__, long *itail, long *iupdat, long *col,
+  long *head, double *theta, double *rr, double *dr,
+  double *stp, double *dtd)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, sy_dim1, sy_offset, ss_dim1, ss_offset, i__1, i__2;
+  static long j;
+  static long pointr;
+  --r__;
+  --d__;
+  ss_dim1 = *m;
+  ss_offset = 1 + ss_dim1;
+  ss -= ss_offset;
+  sy_dim1 = *m;
+  sy_offset = 1 + sy_dim1;
+  sy -= sy_offset;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  /* Set pointers for matrices WS and WY. */
+  if (*iupdat <= *m) {
+    *col = *iupdat;
+    *itail = (*head + *iupdat - 2) % *m + 1;
+  } else {
+    *itail = *itail % *m + 1;
+    *head = *head % *m + 1;
+  }
+  /* Update matrices WS and WY. */
+  dcopy_(n, &d__[1], &c__1, &ws[*itail * ws_dim1 + 1], &c__1);
+  dcopy_(n, &r__[1], &c__1, &wy[*itail * wy_dim1 + 1], &c__1);
+  /* Set theta=yy/ys. */
+  *theta = *rr / *dr;
+  /* Form the middle matrix in B. */
+  /* update the upper triangle of SS, */
+  /* and the lower triangle of SY: */
+  if (*iupdat > *m) {
+    /* move old information */
+    i__1 = *col - 1;
+    for (j = 1; j <= i__1; ++j) {
+      dcopy_(&j, &ss[(j + 1) * ss_dim1 + 2], &c__1, &ss[j * ss_dim1 + 1], &c__1);
+      i__2 = *col - j;
+      dcopy_(&i__2, &sy[j + 1 + (j + 1) * sy_dim1], &c__1, &sy[j + j * sy_dim1], &c__1);
+    }
+  }
+  /* add new information: the last row of SY */
+  /* and the last column of SS: */
+  pointr = *head;
+  i__1 = *col - 1;
+  for (j = 1; j <= i__1; ++j) {
+    sy[*col + j * sy_dim1] = ddot_(n, &d__[1], &c__1, &wy[pointr * wy_dim1 + 1], &c__1);
+    ss[j + *col * ss_dim1] = ddot_(n, &ws[pointr * ws_dim1 + 1], &c__1, &d__[1], &c__1);
+    pointr = pointr % *m + 1;
+  }
+  if (*stp == 1.) {
+    ss[*col + *col * ss_dim1] = *dtd;
+  } else {
+    ss[*col + *col * ss_dim1] = *stp * *stp * *dtd;
+  }
+  sy[*col + *col * sy_dim1] = *dr;
+  return 0;
+}
+/**
+ * Subroutine prn1lb
+ *
+ *     This subroutine prints the input data, initial point, upper and
+ *       lower bounds of each variable, machine precision, as well as
+ *       the headings of the output.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int prn1lb_(long *n, long *m, double *l,
+  double *u, double *x, long *iprint, long *itfile,
+  double *epsmch)
+{
+  long i__1;
+  FILE *itfptr;
+  static long i__;
+  --x;
+  --u;
+  --l;
+  if (*iprint >= 0) {
+    fprintf(stdout, "RUNNING THE L-BFGS-B CODE\n\n");
+    fprintf(stdout, "           * * *\n\n");
+    fprintf(stdout, "Machine precision = %.3E\n", *epsmch);
+    fprintf(stdout, " N = %3ld    M = %2ld\n", *n, *m);
+    if (*iprint >= 1) {
+      itfptr = fopen("iterate.dat", "w");
+      fprintf(itfptr, "RUNNING THE L-BFGS-B CODE\n");
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "it    = iteration number\n");
+      fprintf(itfptr, "nf    = number of function evaluations\n");
+      fprintf(itfptr, "nseg  = number of segments explored during the Cauchy search\n");
+      fprintf(itfptr, "nact  = number of active bounds at the generalized Cauchy point\n");
+      fprintf(itfptr, "sub   = manner in which the subspace minimization terminated:\n");
+      fprintf(itfptr, "        con = converged, bnd = a bound was reached\n");
+      fprintf(itfptr, "itls  = number of iterations performed in the line search\n");
+      fprintf(itfptr, "stepl = step length used\n");
+      fprintf(itfptr, "tstep = norm of the displacement (total step)\n");
+      fprintf(itfptr, "projg = norm of the projected gradient\n");
+      fprintf(itfptr, "f     = function value\n");
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "           * * *\n\n");
+      fprintf(itfptr, "Machine precision = %.3E\n", *epsmch);
+      fprintf(itfptr, " N = %3ld    M = %2ld\n", *n, *m);
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "   it   nf  nseg  nact  sub  itls  stepl    tstep     projg        f\n");
+      fclose(itfptr);
+      if (*iprint > 100) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " L = ");
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          fprintf(stdout, " %11.4E", l[i__]);
+          if (i__ % 6 == 0) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "     ");
+          }
+        }
+        fprintf(stdout, "\n");
+        fprintf(stdout, "\n");
+        fprintf(stdout, " X0 =");
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          fprintf(stdout, " %11.4E", x[i__]);
+          if (i__ % 6 == 0) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "     ");
+          }
+        }
+        fprintf(stdout, "\n");
+        fprintf(stdout, "\n");
+        fprintf(stdout, " U = ");
+        i__1 = *n;
+        for (i__ = 1; i__ <= i__1; ++i__) {
+          fprintf(stdout, " %11.4E", u[i__]);
+          if (i__ % 6 == 0) {
+            fprintf(stdout, "\n");
+            fprintf(stdout, "     ");
+          }
+        }
+        fprintf(stdout, "\n");
+      }
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine prn2lb
+ *
+ *     This subroutine prints out new information after a successful
+ *       line search.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int prn2lb_(long *n, double *x, double *f,
+  double *g, long *iprint, long *itfile, long *iter,
+  long *nfgv, long *nact, double *sbgnrm, long *nseg, char*word,
+  long *iword, long *iback, double *stp, double *xstep)
+{
+  long i__1;
+  static long i__, imod;
+  FILE *itfptr;
+  --g;
+  --x;
+  /* 'word' records the status of subspace solutions. */
+  if (*iword == 0) {
+    /* the subspace minimization converged. */
+    strcpy(word, "con");
+  } else if (*iword == 1) {
+    /* the subspace minimization stopped at a bound. */
+    strcpy(word, "bnd");
+  } else if (*iword == 5) {
+    /* the truncated Newton step has been used. */
+    strcpy(word, "TNT");
+  } else {
+    strcpy(word, "---");
+  }
+  if (*iprint >= 99) {
+    fprintf(stdout, "LINE SEARCH %ld times; norm of step = %E\n", *iback, *xstep);
+    fprintf(stdout, "\nAt iterate%5ld    f= %12.5E    |proj g|= %12.5E\n", *iter, *f, *sbgnrm);
+    if (*iprint > 100) {
+      fprintf(stdout, "X =");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, "%11.4E ", x[i__]);
+      }
+      fprintf(stdout, "\n");
+      fprintf(stdout, "G =");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, "%11.4E ", g[i__]);
+      }
+      fprintf(stdout, "\n");
+    }
+  } else if (*iprint > 0) {
+    imod = *iter % *iprint;
+    if (imod == 0) {
+      fprintf(stdout, "\nAt iterate%5ld    f= %12.5E    |proj g|= %12.5E\n", *iter, *f, *sbgnrm);
+    }
+  }
+  if (*iprint >= 1) {
+    itfptr = fopen("iterate.dat", "a");
+    fprintf(itfptr, " %4ld %4ld %5ld %5ld  %3s %4ld  %7.1E  %7.1E %10.3E %10.3E\n",
+        *iter, *nfgv, *nseg, *nact, word, *iback, *stp, *xstep, *sbgnrm, *f);
+    fclose(itfptr);
+  }
+  return 0;
+}
+/**
+ * Subroutine prn3lb
+ *
+ *     This subroutine prints out information when either a built-in
+ *       convergence test is satisfied or when an error message is
+ *       generated.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int prn3lb_(long *n, double *x, double *f, char *task,
+  long *iprint, long *info, long *itfile, long *iter,
+  long *nfgv, long *nintol, long *nskip, long *nact,
+  double *sbgnrm, double *time, long *nseg, char *word,
+  long *iback, double *stp, double *xstep, long *k,
+  double *cachyt, double *sbtime, double *lnscht)
+{
+  long i__1;
+  FILE *itfptr;
+  static long i__;
+  --x;
+  if (strncmp(task, "ERROR", 5) == 0) {
+    goto L999;
+  }
+  if (*iprint >= 0) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "           * * *\n");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "Tit   = total number of iterations\n");
+    fprintf(stdout, "Tnf   = total number of function evaluations\n");
+    fprintf(stdout, "Tnint = total number of segments explored during Cauchy searches\n");
+    fprintf(stdout, "Skip  = number of BFGS updates skipped\n");
+    fprintf(stdout, "Nact  = number of active bounds at final generalized Cauchy point\n");
+    fprintf(stdout, "Projg = norm of the final projected gradient\n");
+    fprintf(stdout, "F     = final function value\n");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "           * * *\n");
+    fprintf(stdout, "\n");
+    fprintf(stdout, "   N    Tit     Tnf  Tnint  Skip  Nact     Projg        F\n");
+    fprintf(stdout, "%5ld %6ld %6ld %6ld %5ld %5ld  %10.3E  %10.3E\n", *n, *iter, *nfgv, *nintol, *nskip, *nact, *sbgnrm, *f);
+    if (*iprint >= 100) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " X =");
+      i__1 = *n;
+      for (i__ = 1; i__ <= i__1; ++i__) {
+        fprintf(stdout, " %11.4E", x[i__]);
+        if (i__ % 6 == 0) {
+          fprintf(stdout, "\n");
+          fprintf(stdout, "    ");
+        }
+      }
+      fprintf(stdout, "\n");
+    }
+    if (*iprint >= 1) {
+      fprintf(stdout, "  F =  %3.8E\n", *f);
+    }
+  }
+L999:
+  if (*iprint >= 0) {
+    fprintf(stdout, "\n");
+    fprintf(stdout, "%s\n", task);
+    if (*info != 0) {
+      if (*info == -1) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Matrix in 1st Cholesky factorization in formk is not Pos. Def.\n");
+      }
+      if (*info == -2) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Matrix in 2st Cholesky factorization in formk is not Pos. Def.\n");
+      }
+      if (*info == -3) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Matrix in the Cholesky factorization in formt is not Pos. Def.\n");
+      }
+      if (*info == -4) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Derivative >= 0, backtracking line search impossible.\n");
+        fprintf(stdout, "   Previous x, f and g restored.\n");
+        fprintf(stdout, " Possible causes: 1 error in function or gradient evaluation;\n");
+        fprintf(stdout, "                  2 rounding errors dominate computation.\n");
+      }
+      if (*info == -5) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Warning:  more than 10 function and gradient\n");
+        fprintf(stdout, "   evaluations in the last line search.  Termination\n");
+        fprintf(stdout, "   may possibly be caused by a bad search direction.\n");
+      }
+      if (*info == -6) {
+        fprintf(stdout, "  Input nbd(%2ld) is invalid.\n", *k);
+      }
+      if (*info == -7) {
+        fprintf(stdout, "  l(%2ld) > u(%2ld).  No feasible solution.\n", *k, *k);
+      }
+      if (*info == -8) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " The triangular system is singular.\n");
+      }
+      if (*info == -9) {
+        fprintf(stdout, "\n");
+        fprintf(stdout, " Line search cannot locate an adequate point after 20 function\n");
+        fprintf(stdout, "  and gradient evaluations.  Previous x, f and g restored.\n");
+        fprintf(stdout, " Possible causes: 1 error in function or gradient evaluation;\n");
+        fprintf(stdout, "                  2 rounding error dominate computation.\n");
+      }
+    }
+    if (*iprint >= 1) {
+      fprintf(stdout, "\n");
+      fprintf(stdout, " Cauchy                time %1.3E seconds.\n", *cachyt);
+      fprintf(stdout, " Subspace minimization time %1.3E seconds.\n", *sbtime);
+      fprintf(stdout, " Line search           time %1.3E seconds.\n", *lnscht);
+    }
+    fprintf(stdout, "\n");
+    fprintf(stdout, " Total User time %1.3E seconds.\n", *time);
+    fprintf(stdout, "\n");
+    if (*iprint >= 1) {
+      itfptr = fopen("iterate.dat", "a");
+      if (*info == -4 || *info == -9) {
+        fprintf(itfptr, " %4ld %4ld %5ld %5ld  %3s %4ld  %7.1E  %7.1E      -          -\n",
+            *iter, *nfgv, *nseg, *nact, word, *iback, *stp, *xstep);
+      }
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, "%s\n", task);
+      if (*info != 0) {
+        if (*info == -1) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Matrix in 1st Cholesky factorization in formk is not Pos. Def.\n");
+        }
+        if (*info == -2) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Matrix in 2st Cholesky factorization in formk is not Pos. Def.\n");
+        }
+        if (*info == -3) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Matrix in the Cholesky factorization in formt is not Pos. Def.\n");
+        }
+        if (*info == -4) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Derivative >= 0, backtracking line search impossible.\n");
+          fprintf(itfptr, "   Previous x, f and g restored.\n");
+          fprintf(itfptr, " Possible causes: 1 error in function or gradient evaluation;\n");
+          fprintf(itfptr, "                  2 rounding errors dominate computation.\n");
+        }
+        if (*info == -5) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Warning:  more than 10 function and gradient\n");
+          fprintf(itfptr, "   evaluations in the last line search.  Termination\n");
+          fprintf(itfptr, "   may possibly be caused by a bad search direction.\n");
+        }
+        if (*info == -8) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " The triangular system is singular.\n");
+        }
+        if (*info == -9) {
+          fprintf(itfptr, "\n");
+          fprintf(itfptr, " Line search cannot locate an adequate point after 20 function\n");
+          fprintf(itfptr, "  and gradient evaluations.  Previous x, f and g restored.\n");
+          fprintf(itfptr, " Possible causes: 1 error in function or gradient evaluation;\n");
+          fprintf(itfptr, "                  2 rounding error dominate computation.\n");
+        }
+      }
+      fprintf(itfptr, "\n");
+      fprintf(itfptr, " Total User time %1.3E seconds.\n", *time);
+      fprintf(itfptr, "\n");
+      fclose(itfptr);
+    }
+  }
+  return 0;
+}
+/**
+ * Subroutine projgr
+ *
+ *     This subroutine computes the infinity norm of the projected
+ *       gradient.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal.
+ */
+int projgr_(long *n, double *l, double *u,
+  long *nbd, double *x, double *g, double *sbgnrm)
+{
+  long i__1;
+  double d__1, d__2;
+  static long i__;
+  static double gi;
+  --g;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  *sbgnrm = 0.;
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    gi = g[i__];
+    if (nbd[i__] != 0) {
+      if (gi < 0.) {
+        if (nbd[i__] >= 2) {
+          d__1 = x[i__] - u[i__];
+          gi = d__1 >= gi ? d__1 : gi;
+        }
+      } else {
+        if (nbd[i__] <= 2) {
+          d__1 = x[i__] - l[i__];
+          gi = d__1 <= gi ? d__1 : gi;
+        }
+      }
+    }
+    d__1 = *sbgnrm, d__2 = fabs(gi);
+    *sbgnrm = d__1 >= d__2 ? d__1 : d__2;
+  }
+  return 0;
+}
+/* **********************************************************************
+ *
+ * This routine contains the major changes in the updated version.
+ * The changes are described in the accompanying paper
+ *
+ *  Jose Luis Morales, Jorge Nocedal
+ *  "Remark On Algorithm 788: L-BFGS-B: Fortran Subroutines for Large-Scale
+ *   Bound Constrained Optimization". Decemmber 27, 2010.
+ *
+ *         J.L. Morales  Departamento de Matematicas,
+ *                       Instituto Tecnologico Autonomo de Mexico
+ *                       Mexico D.F.
+ *
+ *         J, Nocedal    Department of Electrical Engineering and
+ *                       Computer Science.
+ *                       Northwestern University. Evanston, IL. USA
+ *
+ *                       January 17, 2011
+ *
+ * ********************************************************************** */
+/**
+ * Subroutine subsm
+ *
+ *     Given xcp, l, u, r, an index set that specifies
+ *       the active set at xcp, and an l-BFGS matrix B
+ *       (in terms of WY, WS, SY, WT, head, col, and theta),
+ *       this subroutine computes an approximate solution
+ *       of the subspace problem
+ *
+ *       (P)   min Q(x) = r'(x-xcp) + 1/2 (x-xcp)' B (x-xcp)
+ *
+ *             subject to l<=x<=u
+ *                       x_i=xcp_i for all i in A(xcp)
+ *
+ *       along the subspace unconstrained Newton direction
+ *
+ *          d = -(Z'BZ)^(-1) r.
+ *
+ *       The formula for the Newton direction, given the L-BFGS matrix
+ *       and the Sherman-Morrison formula, is
+ *
+ *          d = (1/theta)r + (1/theta*2) Z'WK^(-1)W'Z r.
+ *
+ *       where
+ *                 K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                     [L_a -R_z           theta*S'AA'S ]
+ *
+ *     Note that this procedure for computing d differs
+ *     from that described in [1]. One can show that the matrix K is
+ *     equal to the matrix M^[-1]N in that paper.
+ *
+ *     n is an long variable.
+ *       On entry n is the dimension of the problem.
+ *       On exit n is unchanged.
+ *
+ *     m is an long variable.
+ *       On entry m is the maximum number of variable metric corrections
+ *         used to define the limited memory matrix.
+ *       On exit m is unchanged.
+ *
+ *     nsub is an long variable.
+ *       On entry nsub is the number of free variables.
+ *       On exit nsub is unchanged.
+ *
+ *     ind is an long array of dimension nsub.
+ *       On entry ind specifies the coordinate indices of free variables.
+ *       On exit ind is unchanged.
+ *
+ *     l is a double precision array of dimension n.
+ *       On entry l is the lower bound of x.
+ *       On exit l is unchanged.
+ *
+ *     u is a double precision array of dimension n.
+ *       On entry u is the upper bound of x.
+ *       On exit u is unchanged.
+ *
+ *     nbd is a long array of dimension n.
+ *       On entry nbd represents the type of bounds imposed on the
+ *         variables, and must be specified as follows:
+ *         nbd(i)=0 if x(i) is unbounded,
+ *                1 if x(i) has only a lower bound,
+ *                2 if x(i) has both lower and upper bounds, and
+ *                3 if x(i) has only an upper bound.
+ *       On exit nbd is unchanged.
+ *
+ *     x is a double precision array of dimension n.
+ *       On entry x specifies the Cauchy point xcp.
+ *       On exit x(i) is the minimizer of Q over the subspace of
+ *                                                        free variables.
+ *
+ *     d is a double precision array of dimension n.
+ *       On entry d is the reduced gradient of Q at xcp.
+ *       On exit d is the Newton direction of Q.
+ *
+ *    xp is a double precision array of dimension n.
+ *       used to safeguard the projected Newton direction
+ *
+ *    xx is a double precision array of dimension n
+ *       On entry it holds the current iterate
+ *       On output it is unchanged
+ *    gg is a double precision array of dimension n
+ *       On entry it holds the gradient at the current iterate
+ *       On output it is unchanged
+ *
+ *     ws and wy are double precision arrays;
+ *     theta is a double precision variable;
+ *     col is an long variable;
+ *     head is an long variable.
+ *       On entry they store the information defining the
+ *                                          limited memory BFGS matrix:
+ *         ws(n,m) stores S, a set of s-vectors;
+ *         wy(n,m) stores Y, a set of y-vectors;
+ *         theta is the scaling factor specifying B_0 = theta I;
+ *         col is the number of variable metric corrections stored;
+ *         head is the location of the 1st s- (or y-) vector in S (or Y).
+ *       On exit they are unchanged.
+ *
+ *     iword is an long variable.
+ *       On entry iword is unspecified.
+ *       On exit iword specifies the status of the subspace solution.
+ *         iword = 0 if the solution is in the box,
+ *                 1 if some bound is encountered.
+ *
+ *     wv is a double precision working array of dimension 2m.
+ *
+ *     wn is a double precision array of dimension 2m x 2m.
+ *       On entry the upper triangle of wn stores the LEL^T factorization
+ *         of the indefinite matrix
+ *
+ *              K = [-D -Y'ZZ'Y/theta     L_a'-R_z'  ]
+ *                  [L_a -R_z           theta*S'AA'S ]
+ *                                                    where E = [-I  0]
+ *                                                              [ 0  I]
+ *       On exit wn is unchanged.
+ *
+ *     iprint is an long variable that must be set by the user.
+ *       It controls the frequency and type of output generated:
+ *        iprint<0    no output is generated;
+ *        iprint=0    print only one line at the last iteration;
+ *        0<iprint<99 print also f and |proj g| every iprint iterations;
+ *        iprint=99   print details of every iteration except n-vectors;
+ *        iprint=100  print also the changes of active set and final x;
+ *        iprint>100  print details of every iteration including x and g;
+ *       When iprint > 0, the file iterate.dat will be created to
+ *                        summarize the iteration.
+ *
+ *     info is an long variable.
+ *       On entry info is unspecified.
+ *       On exit info = 0       for normal return,
+ *                    = nonzero for abnormal return
+ *                                  when the matrix K is ill-conditioned.
+ *
+ *     Subprograms called:
+ *
+ *       Linpack dtrsl.
+ *
+ *
+ *     References:
+ *
+ *       [1] R. H. Byrd, P. Lu, J. Nocedal and C. Zhu, ``A limited
+ *       memory algorithm for bound constrained optimization'',
+ *       SIAM J. Scientific Computing 16 (1995), no. 5, pp. 1190--1208.
+ *
+ *                           *  *  *
+ *
+ *     NEOS, November 1994. (Latest revision June 1996.)
+ *     Optimization Technology Center.
+ *     Argonne National Laboratory and Northwestern University.
+ *     Written by
+ *                        Ciyou Zhu
+ *     in collaboration with R.H. Byrd, P. Lu-Chen and J. Nocedal
+ */
+int subsm_(long *n, long *m, long *nsub, long *ind,
+  double *l, double *u, long *nbd, double *x,
+  double *d__, double *xp, double *ws, double *wy,
+  double *theta, double *xx, double *gg, long *col,
+  long *head, long *iword, double *wv, double *wn,
+  long *iprint, long *info)
+{
+  long ws_dim1, ws_offset, wy_dim1, wy_offset, wn_dim1, wn_offset, i__1, i__2;
+  double d__1, d__2;
+  static long i__, j, k, m2;
+  static double dk;
+  static long js, jy;
+  static double xk;
+  static long ibd, col2;
+  static double dd_p__, temp1, temp2, alpha;
+  static long pointr;
+  --gg;
+  --xx;
+  --xp;
+  --d__;
+  --x;
+  --nbd;
+  --u;
+  --l;
+  wn_dim1 = 2 * *m;
+  wn_offset = 1 + wn_dim1;
+  wn -= wn_offset;
+  --wv;
+  wy_dim1 = *n;
+  wy_offset = 1 + wy_dim1;
+  wy -= wy_offset;
+  ws_dim1 = *n;
+  ws_offset = 1 + ws_dim1;
+  ws -= ws_offset;
+  --ind;
+  if (*nsub <= 0) {
+    return 0;
+  }
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n----------------SUBSM entered-----------------\n\n");
+  }
+  /* Compute wv = W'Zd. */
+  pointr = *head;
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    temp1 = 0.;
+    temp2 = 0.;
+    i__2 = *nsub;
+    for (j = 1; j <= i__2; ++j) {
+      k = ind[j];
+      temp1 += wy[k + pointr * wy_dim1] * d__[j];
+      temp2 += ws[k + pointr * ws_dim1] * d__[j];
+    }
+    wv[i__] = temp1;
+    wv[*col + i__] = *theta * temp2;
+    pointr = pointr % *m + 1;
+  }
+  /* Compute wv:=K^(-1)wv. */
+  m2 = *m << 1;
+  col2 = *col << 1;
+  dtrsl_(&wn[wn_offset], &m2, &col2, &wv[1], &c__11, info);
+  if (*info != 0) {
+    return 0;
+  }
+  i__1 = *col;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    wv[i__] = -wv[i__];
+  }
+  dtrsl_(&wn[wn_offset], &m2, &col2, &wv[1], &c__1, info);
+  if (*info != 0) {
+    return 0;
+  }
+  /* Compute d = (1/theta)d + (1/theta**2)Z'W wv. */
+  pointr = *head;
+  i__1 = *col;
+  for (jy = 1; jy <= i__1; ++jy) {
+    js = *col + jy;
+    i__2 = *nsub;
+    for (i__ = 1; i__ <= i__2; ++i__) {
+      k = ind[i__];
+      d__[i__] = d__[i__] + wy[k + pointr * wy_dim1] * wv[jy] / *theta
+        + ws[k + pointr * ws_dim1] * wv[js];
+    }
+    pointr = pointr % *m + 1;
+  }
+  d__1 = 1. / *theta;
+  dscal_(nsub, &d__1, &d__[1], &c__1);
+  /* ----------------------------------------------------------------- */
+  /* Let us try the projection, d is the Newton direction */
+  *iword = 0;
+  dcopy_(n, &x[1], &c__1, &xp[1], &c__1);
+  i__1 = *nsub;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    k = ind[i__];
+    dk = d__[i__];
+    xk = x[k];
+    if (nbd[k] != 0) {
+      if (nbd[k] == 1) {
+        /* lower bounds only */
+        d__1 = l[k], d__2 = xk + dk;
+        x[k] = d__1 >= d__2 ? d__1 : d__2;
+        if (x[k] == l[k]) {
+          *iword = 1;
+        }
+      } else {
+        if (nbd[k] == 2) {
+          /* upper and lower bounds */
+          d__1 = l[k], d__2 = xk + dk;
+          xk = d__1 >= d__2 ? d__1 : d__2;
+          d__1 = u[k];
+          x[k] = d__1 <= xk ? d__1 : xk;
+          if (x[k] == l[k] || x[k] == u[k]) {
+            *iword = 1;
+          }
+        } else {
+          if (nbd[k] == 3) {
+            /* upper bounds only */
+            d__1 = u[k], d__2 = xk + dk;
+            x[k] = d__1 <= d__2 ? d__1 : d__2;
+            if (x[k] == u[k]) {
+              *iword = 1;
+            }
+          }
+        }
+      }
+    } else {
+      /* free variables */
+      x[k] = xk + dk;
+    }
+  }
+  if (*iword == 0) {
+    goto L911;
+  }
+  /* check sign of the directional derivative */
+  dd_p__ = 0.;
+  i__1 = *n;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    dd_p__ += (x[i__] - xx[i__]) * gg[i__];
+  }
+  if (dd_p__ > 0.) {
+    dcopy_(n, &xp[1], &c__1, &x[1], &c__1);
+    fprintf(stderr, "  Positive dir derivative in projection\n");
+    fprintf(stderr, "  Using the backtracking step\n");
+  } else {
+    goto L911;
+  }
+  /* ----------------------------------------------------------------- */
+  alpha = 1.;
+  temp1 = alpha;
+  ibd = 0;
+  i__1 = *nsub;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    k = ind[i__];
+    dk = d__[i__];
+    if (nbd[k] != 0) {
+      if (dk < 0. && nbd[k] <= 2) {
+        temp2 = l[k] - x[k];
+        if (temp2 >= 0.) {
+          temp1 = 0.;
+        } else if (dk * alpha < temp2) {
+          temp1 = temp2 / dk;
+        }
+      } else if (dk > 0. && nbd[k] >= 2) {
+        temp2 = u[k] - x[k];
+        if (temp2 <= 0.) {
+          temp1 = 0.;
+        } else if (dk * alpha > temp2) {
+          temp1 = temp2 / dk;
+        }
+      }
+      if (temp1 < alpha) {
+        alpha = temp1;
+        ibd = i__;
+      }
+    }
+  }
+  if (alpha < 1.) {
+    dk = d__[ibd];
+    k = ind[ibd];
+    if (dk > 0.) {
+      x[k] = u[k];
+      d__[ibd] = 0.;
+    } else if (dk < 0.) {
+      x[k] = l[k];
+      d__[ibd] = 0.;
+    }
+  }
+  i__1 = *nsub;
+  for (i__ = 1; i__ <= i__1; ++i__) {
+    k = ind[i__];
+    x[k] += alpha * d__[i__];
+  }
+  /* ccccc */
+L911:
+  if (*iprint >= 99) {
+    fprintf(stdout, "\n----------------exit SUBSM --------------------\n\n");
+  }
+  return 0;
+}
+/**
+ * Subroutine dcsrch
+ *
+ *     This subroutine finds a step that satisfies a sufficient
+ *     decrease condition and a curvature condition.
+ *
+ *     Each call of the subroutine updates an interval with
+ *     endpoints stx and sty. The interval is initially chosen
+ *     so that it contains a minimizer of the modified function
+ *
+ *           psi(stp) = f(stp) - f(0) - ftol*stp*f'(0).
+ *
+ *     If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the
+ *     interval is chosen so that it contains a minimizer of f.
+ *
+ *     The algorithm is designed to find a step that satisfies
+ *     the sufficient decrease condition
+ *
+ *           f(stp) <= f(0) + ftol*stp*f'(0),
+ *
+ *     and the curvature condition
+ *
+ *           abs(f'(stp)) <= gtol*abs(f'(0)).
+ *
+ *     If ftol is less than gtol and if, for example, the function
+ *     is bounded below, then there is always a step which satisfies
+ *     both conditions.
+ *
+ *     If no step can be found that satisfies both conditions, then
+ *     the algorithm stops with a warning. In this case stp only
+ *     satisfies the sufficient decrease condition.
+ *
+ *     A typical invocation of dcsrch has the following outline:
+ *
+ *     task = 'START'
+ *  10 continue
+ *        call dcsrch( ... )
+ *        if (task .eq. 'FG') then
+ *           Evaluate the function and the gradient at stp
+ *           goto 10
+ *           end if
+ *
+ *     NOTE: The user must no alter work arrays between calls.
+ *
+ *     The subroutine statement is
+ *
+ *        subroutine dcsrch(f,g,stp,ftol,gtol,xtol,stpmin,stpmax,
+ *                          task,isave,dsave)
+ *     where
+ *
+ *       f is a double precision variable.
+ *         On initial entry f is the value of the function at 0.
+ *            On subsequent entries f is the value of the
+ *            function at stp.
+ *         On exit f is the value of the function at stp.
+ *
+ *       g is a double precision variable.
+ *         On initial entry g is the derivative of the function at 0.
+ *            On subsequent entries g is the derivative of the
+ *            function at stp.
+ *         On exit g is the derivative of the function at stp.
+ *
+ *       stp is a double precision variable.
+ *         On entry stp is the current estimate of a satisfactory
+ *            step. On initial entry, a positive initial estimate
+ *            must be provided.
+ *         On exit stp is the current estimate of a satisfactory step
+ *            if task = 'FG'. If task = 'CONV' then stp satisfies
+ *            the sufficient decrease and curvature condition.
+ *
+ *       ftol is a double precision variable.
+ *         On entry ftol specifies a nonnegative tolerance for the
+ *            sufficient decrease condition.
+ *         On exit ftol is unchanged.
+ *
+ *       gtol is a double precision variable.
+ *         On entry gtol specifies a nonnegative tolerance for the
+ *            curvature condition.
+ *         On exit gtol is unchanged.
+ *
+ *       xtol is a double precision variable.
+ *         On entry xtol specifies a nonnegative relative tolerance
+ *            for an acceptable step. The subroutine exits with a
+ *            warning if the relative difference between sty and stx
+ *            is less than xtol.
+ *         On exit xtol is unchanged.
+ *
+ *       stpmin is a double precision variable.
+ *         On entry stpmin is a nonnegative lower bound for the step.
+ *         On exit stpmin is unchanged.
+ *
+ *       stpmax is a double precision variable.
+ *         On entry stpmax is a nonnegative upper bound for the step.
+ *         On exit stpmax is unchanged.
+ *
+ *       task is a character variable of length at least 60.
+ *         On initial entry task must be set to 'START'.
+ *         On exit task indicates the required action:
+ *
+ *            If task(1:2) = 'FG' then evaluate the function and
+ *            derivative at stp and call dcsrch again.
+ *
+ *            If task(1:4) = 'CONV' then the search is successful.
+ *
+ *            If task(1:4) = 'WARN' then the subroutine is not able
+ *            to satisfy the convergence conditions. The exit value of
+ *            stp contains the best point found during the search.
+ *
+ *            If task(1:5) = 'ERROR' then there is an error in the
+ *            input arguments.
+ *
+ *         On exit with convergence, a warning or an error, the
+ *            variable task contains additional information.
+ *
+ *       isave is an long work array of dimension 2.
+ *
+ *       dsave is a double precision work array of dimension 13.
+ *
+ *     Subprograms called
+ *
+ *       MINPACK-2 ... dcstep
+ *
+ *     MINPACK-1 Project. June 1983.
+ *     Argonne National Laboratory.
+ *     Jorge J. More' and David J. Thuente.
+ *
+ *     MINPACK-2 Project. October 1993.
+ *     Argonne National Laboratory and University of Minnesota.
+ *     Brett M. Averick, Richard G. Carter, and Jorge J. More'.
+ */
+int dcsrch_(double *f, double *g, double *stp,
+  double *ftol, double *gtol, double *xtol,
+  double *stpmin, double *stpmax,
+  char *task, long *isave, double *dsave)
+{
+  double d__1;
+  static double fm, gm, fx, fy, gx, gy, fxm, fym, gxm, gym, stx, sty;
+  static long stage;
+  static double finit, ginit, width, ftest, gtest, stmin, stmax, width1;
+  static long brackt;
+  --dsave;
+  --isave;
+  if (strncmp(task, "START", 5) == 0) {
+    /* Check the input arguments for errors. */
+    if (*stp < *stpmin) {
+      strcpy(task, "ERROR: STP .LT. STPMIN");
+    }
+    if (*stp > *stpmax) {
+      strcpy(task, "ERROR: STP .GT. STPMAX");
+    }
+    if (*g >= 0.) {
+      strcpy(task, "ERROR: INITIAL G .GE. ZERO");
+    }
+    if (*ftol < 0.) {
+      strcpy(task, "ERROR: FTOL .LT. ZERO");
+    }
+    if (*gtol < 0.) {
+      strcpy(task, "ERROR: GTOL .LT. ZERO");
+    }
+    if (*xtol < 0.) {
+      strcpy(task, "ERROR: XTOL .LT. ZERO");
+    }
+    if (*stpmin < 0.) {
+      strcpy(task, "ERROR: STPMIN .LT. ZERO");
+    }
+    if (*stpmax < *stpmin) {
+      strcpy(task, "ERROR: STPMAX .LT. STPMIN");
+    }
+    /* Exit if there are errors on input. */
+    if (strncmp(task, "ERROR", 5) == 0) {
+      return 0;
+    }
+    /* Initialize local variables. */
+    brackt = FALSE_;
+    stage = 1;
+    finit = *f;
+    ginit = *g;
+    gtest = *ftol * ginit;
+    width = *stpmax - *stpmin;
+    width1 = width / .5;
+    /* The variables stx, fx, gx contain the values of the step, */
+    /* function, and derivative at the best step. */
+    /* The variables sty, fy, gy contain the value of the step, */
+    /* function, and derivative at sty. */
+    /* The variables stp, f, g contain the values of the step, */
+    /* function, and derivative at stp. */
+    stx = 0.;
+    fx = finit;
+    gx = ginit;
+    sty = 0.;
+    fy = finit;
+    gy = ginit;
+    stmin = 0.;
+    stmax = *stp + *stp * 4.;
+    strcpy(task, "FG");
+    goto L1000;
+  } else {
+    /* Restore local variables. */
+    if (isave[1] == 1) {
+      brackt = TRUE_;
+    } else {
+      brackt = FALSE_;
+    }
+    stage = isave[2];
+    ginit = dsave[1];
+    gtest = dsave[2];
+    gx = dsave[3];
+    gy = dsave[4];
+    finit = dsave[5];
+    fx = dsave[6];
+    fy = dsave[7];
+    stx = dsave[8];
+    sty = dsave[9];
+    stmin = dsave[10];
+    stmax = dsave[11];
+    width = dsave[12];
+    width1 = dsave[13];
+  }
+  /* If psi(stp) <= 0 and f'(stp) >= 0 for some step, then the */
+  /* algorithm enters the second stage. */
+  ftest = finit + *stp * gtest;
+  if (stage == 1 && *f <= ftest && *g >= 0.) {
+    stage = 2;
+  }
+  /* Test for warnings. */
+  if (brackt && (*stp <= stmin || *stp >= stmax)) {
+    strcpy(task, "WARNING: ROUNDING ERRORS PREVENT PROGRESS");
+  }
+  if (brackt && stmax - stmin <= *xtol * stmax) {
+    strcpy(task, "WARNING: XTOL TEST SATISFIED");
+  }
+  if (*stp == *stpmax && *f <= ftest && *g <= gtest) {
+    strcpy(task, "WARNING: STP = STPMAX");
+  }
+  if (*stp == *stpmin && (*f > ftest || *g >= gtest)) {
+    strcpy(task, "WARNING: STP = STPMIN");
+  }
+  /* Test for convergence. */
+  if (*f <= ftest && fabs(*g) <= *gtol * (-ginit)) {
+    strcpy(task, "CONVERGENCE");
+  }
+  /* Test for termination. */
+  if (strncmp(task, "WARN", 4) == 0 || strncmp(task, "CONV", 4) == 0) {
+    goto L1000;
+  }
+  /* A modified function is used to predict the step during the */
+  /* first stage if a lower function value has been obtained but */
+  /* the decrease is not sufficient. */
+  if (stage == 1 && *f <= fx && *f > ftest) {
+    /* Define the modified function and derivative values. */
+    fm = *f - *stp * gtest;
+    fxm = fx - stx * gtest;
+    fym = fy - sty * gtest;
+    gm = *g - gtest;
+    gxm = gx - gtest;
+    gym = gy - gtest;
+    /* Call dcstep to update stx, sty, and to compute the new step. */
+    dcstep_(&stx, &fxm, &gxm, &sty, &fym, &gym, stp, &fm, &gm, &brackt, &stmin, &stmax);
+    /* Reset the function and derivative values for f. */
+    fx = fxm + stx * gtest;
+    fy = fym + sty * gtest;
+    gx = gxm + gtest;
+    gy = gym + gtest;
+  } else {
+    /* Call dcstep to update stx, sty, and to compute the new step. */
+    dcstep_(&stx, &fx, &gx, &sty, &fy, &gy, stp, f, g, &brackt, &stmin, &stmax);
+  }
+  /* Decide if a bisection step is needed. */
+  if (brackt) {
+    if ((d__1 = sty - stx, fabs(d__1)) >= width1 * .66) {
+      *stp = stx + (sty - stx) * .5;
+    }
+    width1 = width;
+    width = (d__1 = sty - stx, fabs(d__1));
+  }
+  /* Set the minimum and maximum steps allowed for stp. */
+  if (brackt) {
+    stmin = stx <= sty ? stx : sty;
+    stmax = stx >= sty ? stx : sty;
+  } else {
+    stmin = *stp + (*stp - stx) * 1.1;
+    stmax = *stp + (*stp - stx) * 4.;
+  }
+  /* Force the step to be within the bounds stpmax and stpmin. */
+  *stp = *stp >= *stpmin ? *stp : *stpmin;
+  *stp = *stp <= *stpmax ? *stp : *stpmax;
+  /* If further progress is not possible, let stp be the best */
+  /* point obtained during the search. */
+  if ((brackt && (*stp <= stmin || *stp >= stmax)) || (brackt && (stmax - stmin <= *xtol * stmax))) {
+    *stp = stx;
+  }
+  /* Obtain another function and derivative. */
+  strcpy(task, "FG");
+L1000:
+  /* Save local variables. */
+  if (brackt) {
+    isave[1] = 1;
+  } else {
+    isave[1] = 0;
+  }
+  isave[2] = stage;
+  dsave[1] = ginit;
+  dsave[2] = gtest;
+  dsave[3] = gx;
+  dsave[4] = gy;
+  dsave[5] = finit;
+  dsave[6] = fx;
+  dsave[7] = fy;
+  dsave[8] = stx;
+  dsave[9] = sty;
+  dsave[10] = stmin;
+  dsave[11] = stmax;
+  dsave[12] = width;
+  dsave[13] = width1;
+  return 0;
+}
+/**
+ * Subroutine dcstep
+ *
+ *     This subroutine computes a safeguarded step for a search
+ *     procedure and updates an interval that contains a step that
+ *     satisfies a sufficient decrease and a curvature condition.
+ *
+ *     The parameter stx contains the step with the least function
+ *     value. If brackt is set to .true. then a minimizer has
+ *     been bracketed in an interval with endpoints stx and sty.
+ *     The parameter stp contains the current step.
+ *     The subroutine assumes that if brackt is set to .true. then
+ *
+ *           min(stx,sty) < stp < max(stx,sty),
+ *
+ *     and that the derivative at stx is negative in the direction
+ *     of the step.
+ *
+ *     The subroutine statement is
+ *
+ *       subroutine dcstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
+ *                         stpmin,stpmax)
+ *
+ *     where
+ *
+ *       stx is a double precision variable.
+ *         On entry stx is the best step obtained so far and is an
+ *            endpoint of the interval that contains the minimizer.
+ *         On exit stx is the updated best step.
+ *
+ *       fx is a double precision variable.
+ *         On entry fx is the function at stx.
+ *         On exit fx is the function at stx.
+ *
+ *       dx is a double precision variable.
+ *         On entry dx is the derivative of the function at
+ *            stx. The derivative must be negative in the direction of
+ *            the step, that is, dx and stp - stx must have opposite
+ *            signs.
+ *         On exit dx is the derivative of the function at stx.
+ *
+ *       sty is a double precision variable.
+ *         On entry sty is the second endpoint of the interval that
+ *            contains the minimizer.
+ *         On exit sty is the updated endpoint of the interval that
+ *            contains the minimizer.
+ *
+ *       fy is a double precision variable.
+ *         On entry fy is the function at sty.
+ *         On exit fy is the function at sty.
+ *
+ *       dy is a double precision variable.
+ *         On entry dy is the derivative of the function at sty.
+ *         On exit dy is the derivative of the function at the exit sty.
+ *
+ *       stp is a double precision variable.
+ *         On entry stp is the current step. If brackt is set to .true.
+ *            then on input stp must be between stx and sty.
+ *         On exit stp is a new trial step.
+ *
+ *       fp is a double precision variable.
+ *         On entry fp is the function at stp
+ *         On exit fp is unchanged.
+ *
+ *       dp is a double precision variable.
+ *         On entry dp is the the derivative of the function at stp.
+ *         On exit dp is unchanged.
+ *
+ *       brackt is an logical variable.
+ *         On entry brackt specifies if a minimizer has been bracketed.
+ *            Initially brackt must be set to .false.
+ *         On exit brackt specifies if a minimizer has been bracketed.
+ *            When a minimizer is bracketed brackt is set to .true.
+ *
+ *       stpmin is a double precision variable.
+ *         On entry stpmin is a lower bound for the step.
+ *         On exit stpmin is unchanged.
+ *
+ *       stpmax is a double precision variable.
+ *         On entry stpmax is an upper bound for the step.
+ *         On exit stpmax is unchanged.
+ *
+ *     MINPACK-1 Project. June 1983
+ *     Argonne National Laboratory.
+ *     Jorge J. More' and David J. Thuente.
+ *
+ *     MINPACK-2 Project. October 1993.
+ *     Argonne National Laboratory and University of Minnesota.
+ *     Brett M. Averick and Jorge J. More'.
+ */
+int dcstep_(double *stx, double *fx, double *dx,
+  double *sty, double *fy, double *dy, double *stp,
+  double *fp, double *dp, long *brackt, double *stpmin,
+  double *stpmax)
+{
+  double d__1, d__2, d__3;
+  static double p, q, r__, s, sgnd, stpc, stpf, stpq, gamma, theta;
+  sgnd = *dp * (*dx / fabs(*dx));
+  /* First case: A higher function value. The minimum is bracketed. */
+  /* If the cubic step is closer to stx than the quadratic step, the */
+  /* cubic step is taken, otherwise the average of the cubic and */
+  /* quadratic steps is taken. */
+  if (*fp > *fx) {
+    theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
+    d__1 = fabs(theta);
+    d__2 = fabs(*dx);
+    d__1 = d__1 >= d__2 ? d__1 : d__2;
+    d__2 = fabs(*dp);
+    s = d__1 >= d__2 ? d__1 : d__2;
+    d__1 = theta / s;
+    gamma = s * sqrt(d__1 * d__1 - *dx / s * (*dp / s));
+    if (*stp < *stx) {
+      gamma = -gamma;
+    }
+    p = gamma - *dx + theta;
+    q = gamma - *dx + gamma + *dp;
+    r__ = p / q;
+    stpc = *stx + r__ * (*stp - *stx);
+    stpq = *stx + *dx / ((*fx - *fp) / (*stp - *stx) + *dx) / 2. * (*stp - *stx);
+    if ((d__1 = stpc - *stx, fabs(d__1)) < (d__2 = stpq - *stx, fabs(d__2))) {
+      stpf = stpc;
+    } else {
+      stpf = stpc + (stpq - stpc) / 2.;
+    }
+    *brackt = TRUE_;
+  /* Second case: A lower function value and derivatives of opposite */
+  /* sign. The minimum is bracketed. If the cubic step is farther from */
+  /* stp than the secant step, the cubic step is taken, otherwise the */
+  /* secant step is taken. */
+  } else if (sgnd < 0.) {
+    theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
+    d__1 = fabs(theta);
+    d__2 = fabs(*dx);
+    d__1 = d__1 >= d__2 ? d__1 : d__2;
+    d__2 = fabs(*dp);
+    s = d__1 >= d__2 ? d__1 : d__2;
+    d__1 = theta / s;
+    gamma = s * sqrt(d__1 * d__1 - *dx / s * (*dp / s));
+    if (*stp > *stx) {
+      gamma = -gamma;
+    }
+    p = gamma - *dp + theta;
+    q = gamma - *dp + gamma + *dx;
+    r__ = p / q;
+    stpc = *stp + r__ * (*stx - *stp);
+    stpq = *stp + *dp / (*dp - *dx) * (*stx - *stp);
+    if ((d__1 = stpc - *stp, fabs(d__1)) > (d__2 = stpq - *stp, fabs(d__2))) {
+      stpf = stpc;
+    } else {
+      stpf = stpq;
+    }
+    *brackt = TRUE_;
+  /* Third case: A lower function value, derivatives of the same sign, */
+  /* and the magnitude of the derivative decreases. */
+  } else if (fabs(*dp) < fabs(*dx)) {
+    /* The cubic step is computed only if the cubic tends to infinity */
+    /* in the direction of the step or if the minimum of the cubic */
+    /* is beyond stp. Otherwise the cubic step is defined to be the */
+    /* secant step. */
+    theta = (*fx - *fp) * 3. / (*stp - *stx) + *dx + *dp;
+    d__1 = fabs(theta);
+    d__2 = fabs(*dx);
+    d__1 = d__1 >= d__2 ? d__1 : d__2;
+    d__2 = fabs(*dp);
+    s = d__1 >= d__2 ? d__1 : d__2;
+    /* The case gamma = 0 only arises if the cubic does not tend */
+    /* to infinity in the direction of the step. */
+    d__3 = theta / s;
+    d__1 = 0.;
+    d__2 = d__3 * d__3 - *dx / s * (*dp / s);
+    gamma = s * sqrt(d__1 >= d__2 ? d__1 : d__2);
+    if (*stp > *stx) {
+      gamma = -gamma;
+    }
+    p = gamma - *dp + theta;
+    q = gamma + (*dx - *dp) + gamma;
+    r__ = p / q;
+    if (r__ < 0. && gamma != 0.) {
+      stpc = *stp + r__ * (*stx - *stp);
+    } else if (*stp > *stx) {
+      stpc = *stpmax;
+    } else {
+      stpc = *stpmin;
+    }
+    stpq = *stp + *dp / (*dp - *dx) * (*stx - *stp);
+    if (*brackt) {
+      /* A minimizer has been bracketed. If the cubic step is */
+      /* closer to stp than the secant step, the cubic step is */
+      /* taken, otherwise the secant step is taken. */
+      if ((d__1 = stpc - *stp, fabs(d__1)) < (d__2 = stpq - *stp, fabs(d__2))) {
+        stpf = stpc;
+      } else {
+        stpf = stpq;
+      }
+      if (*stp > *stx) {
+        d__1 = *stp + (*sty - *stp) * .66;
+        stpf = d__1 <= stpf ? d__1 : stpf;
+      } else {
+        d__1 = *stp + (*sty - *stp) * .66;
+        stpf = d__1 >= stpf ? d__1 : stpf;
+      }
+    } else {
+      /* A minimizer has not been bracketed. If the cubic step is */
+      /* farther from stp than the secant step, the cubic step is */
+      /* taken, otherwise the secant step is taken. */
+      if ((d__1 = stpc - *stp, fabs(d__1)) > (d__2 = stpq - *stp, fabs(d__2))) {
+        stpf = stpc;
+      } else {
+        stpf = stpq;
+      }
+      stpf = *stpmax <= stpf ? *stpmax : stpf;
+      stpf = *stpmin >= stpf ? *stpmin : stpf;
+    }
+  /* Fourth case: A lower function value, derivatives of the same sign, */
+  /* and the magnitude of the derivative does not decrease. If the */
+  /* minimum is not bracketed, the step is either stpmin or stpmax, */
+  /* otherwise the cubic step is taken. */
+  } else {
+    if (*brackt) {
+      theta = (*fp - *fy) * 3. / (*sty - *stp) + *dy + *dp;
+      d__1 = fabs(theta);
+      d__2 = fabs(*dy);
+      d__1 = d__1 >= d__2 ? d__1 : d__2;
+      d__2 = fabs(*dp);
+      s = d__1 >= d__2 ? d__1: d__2;
+      d__1 = theta / s;
+      gamma = s * sqrt(d__1 * d__1 - *dy / s * (*dp / s));
+      if (*stp > *sty) {
+        gamma = -gamma;
+      }
+      p = gamma - *dp + theta;
+      q = gamma - *dp + gamma + *dy;
+      r__ = p / q;
+      stpc = *stp + r__ * (*sty - *stp);
+      stpf = stpc;
+    } else if (*stp > *stx) {
+      stpf = *stpmax;
+    } else {
+      stpf = *stpmin;
+    }
+  }
+  /* Update the interval which contains a minimizer. */
+  if (*fp > *fx) {
+    *sty = *stp;
+    *fy = *fp;
+    *dy = *dp;
+  } else {
+    if (sgnd < 0.) {
+      *sty = *stx;
+      *fy = *fx;
+      *dy = *dx;
+    }
+    *stx = *stp;
+    *fx = *fp;
+    *dx = *dp;
+  }
+  /* Compute the new step. */
+  *stp = stpf;
+  return 0;
+}
+int timer_(double *ttime)
+{
+  *ttime = (double)clock() / CLOCKS_PER_SEC;
+  return 0;
+}