RubyGems - wapiti - Versions diffs - 0.0.5 → 0.1.0 - Mend

wapiti 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (53) hide show

checksums.yaml +7 -0
data/.simplecov +3 -0
data/Gemfile +25 -2
data/HISTORY.md +5 -1
data/LICENSE +14 -13
data/README.md +9 -16
data/Rakefile +38 -8
data/ext/wapiti/bcd.c +126 -124
data/ext/wapiti/decoder.c +203 -124
data/ext/wapiti/decoder.h +6 -4
data/ext/wapiti/extconf.rb +2 -2
data/ext/wapiti/gradient.c +491 -320
data/ext/wapiti/gradient.h +52 -34
data/ext/wapiti/lbfgs.c +74 -33
data/ext/wapiti/model.c +47 -37
data/ext/wapiti/model.h +22 -20
data/ext/wapiti/native.c +850 -839
data/ext/wapiti/native.h +1 -1
data/ext/wapiti/options.c +52 -20
data/ext/wapiti/options.h +37 -30
data/ext/wapiti/pattern.c +35 -33
data/ext/wapiti/pattern.h +12 -11
data/ext/wapiti/progress.c +14 -13
data/ext/wapiti/progress.h +3 -2
data/ext/wapiti/quark.c +14 -16
data/ext/wapiti/quark.h +6 -5
data/ext/wapiti/reader.c +83 -69
data/ext/wapiti/reader.h +11 -9
data/ext/wapiti/rprop.c +84 -43
data/ext/wapiti/sequence.h +18 -16
data/ext/wapiti/sgdl1.c +45 -43
data/ext/wapiti/thread.c +19 -17
data/ext/wapiti/thread.h +5 -4
data/ext/wapiti/tools.c +7 -7
data/ext/wapiti/tools.h +3 -4
data/ext/wapiti/trainers.h +1 -1
data/ext/wapiti/vmath.c +40 -38
data/ext/wapiti/vmath.h +12 -11
data/ext/wapiti/wapiti.c +159 -37
data/ext/wapiti/wapiti.h +18 -4
data/lib/wapiti.rb +15 -15
data/lib/wapiti/errors.rb +15 -15
data/lib/wapiti/model.rb +92 -84
data/lib/wapiti/options.rb +123 -124
data/lib/wapiti/utility.rb +14 -14
data/lib/wapiti/version.rb +2 -2
data/spec/spec_helper.rb +29 -9
data/spec/wapiti/model_spec.rb +230 -194
data/spec/wapiti/native_spec.rb +7 -8
data/spec/wapiti/options_spec.rb +184 -174
data/wapiti.gemspec +22 -8
metadata +38 -42
data/.gitignore +0 -5

data/ext/wapiti/reader.h CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -45,16 +45,16 @@
  */
 typedef struct rdr_s rdr_t;
 struct rdr_s {
-	bool    maxent;     //      Is this a maxent reader
-	int     npats;      //  P   Total number of patterns
-	int     nuni, nbi;  //      Number of unigram and bigram patterns
-	int     ntoks;      //      Expected number of tokens in input
-	pat_t **pats;       // [P]  List of precompiled patterns
-	qrk_t  *lbl;        //      Labels database
-	qrk_t  *obs;        //      Observation database
+	bool       autouni;    //      Automatically add 'u' prefix
+	uint32_t   npats;      //  P   Total number of patterns
+	uint32_t   nuni, nbi;  //      Number of unigram and bigram patterns
+	uint32_t   ntoks;      //      Expected number of tokens in input
+	pat_t    **pats;       // [P]  List of precompiled patterns
+	qrk_t     *lbl;        //      Labels database
+	qrk_t     *obs;        //      Observation database
 };
-rdr_t *rdr_new(bool maxent);
+rdr_t *rdr_new(bool autouni);
 void rdr_free(rdr_t *rdr);
 void rdr_freeraw(raw_t *raw);
 void rdr_freeseq(seq_t *seq);
@@ -69,5 +69,7 @@ dat_t *rdr_readdat(rdr_t *rdr, FILE *file, bool lbl);
 void rdr_load(rdr_t *rdr, FILE *file);
 void rdr_save(const rdr_t *rdr, FILE *file);
+char *rdr_readline(FILE *file);
 #endif

data/ext/wapiti/rprop.c CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -24,9 +24,13 @@
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
+#include <inttypes.h>
+#include <float.h>
 #include <math.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -39,7 +43,9 @@
 #include "thread.h"
 #include "vmath.h"
-#define sign(v) ((v) < 0.0 ? -1.0 : ((v) > 0.0 ? 1.0 : 0.0))
+#define EPSILON (DBL_EPSILON * 64.0)
+#define sign(v) ((v) < -EPSILON ? -1.0 : ((v) > EPSILON ? 1.0 : 0.0))
 #define sqr(v)  ((v) * (v))
 /******************************************************************************
@@ -58,7 +64,7 @@
  ******************************************************************************/
 typedef struct rprop_s rprop_t;
 struct rprop_s {
-	mdl_t *mdl;
+	mdl_t  *mdl;
 	double *xp;
 	double *stp;
 	double *g;
@@ -71,33 +77,33 @@ struct rprop_s {
  *   parameter given, the job scheduling system is not used here as we can
  *   easily split processing in equals parts.
  */
-static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
+static void trn_rpropsub(job_t *job, uint32_t id, uint32_t cnt, rprop_t *st) {
 	unused(job);
 	mdl_t *mdl = st->mdl;
-	const size_t F = mdl->nftr;
-	const double stpmin = mdl->opt->rprop.stpmin;
-	const double stpmax = mdl->opt->rprop.stpmax;
-	const double stpinc = mdl->opt->rprop.stpinc;
-	const double stpdec = mdl->opt->rprop.stpdec;
-	const bool   wbt    = strcmp(mdl->opt->algo, "rprop-");
-	const double rho1   = mdl->opt->rho1;
-	const int    l1     = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
+	const uint64_t F = mdl->nftr;
+	const double   stpmin = mdl->opt->rprop.stpmin;
+	const double   stpmax = mdl->opt->rprop.stpmax;
+	const double   stpinc = mdl->opt->rprop.stpinc;
+	const double   stpdec = mdl->opt->rprop.stpdec;
+	const bool     wbt    = strcmp(mdl->opt->algo, "rprop-");
+	const double   rho1   = mdl->opt->rho1;
+	const int      l1     = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
 	double *x = mdl->theta;
 	double *xp  = st->xp,   *stp = st->stp;
 	double *g   = st->g,    *gp  = st->gp;
-	const size_t from = F * id / cnt;
-	const size_t to   = F * (id + 1) / cnt;
-	for (size_t f = from; f < to; f++) {
+	const uint64_t from = F * id / cnt;
+	const uint64_t to   = F * (id + 1) / cnt;
+	for (uint64_t f = from; f < to; f++) {
 		double pg = g[f];
 		// If there is a l1 component in the regularization component,
 		// we either project the gradient in the current orthant or
 		// check for cutdown depending on the projection scheme wanted.
 		if (l1 == 1) {
-			if (x[f] < 0.0)        pg -= rho1;
-			else if (x[f] > 0.0)   pg += rho1;
-			else if (g[f] < -rho1) pg += rho1;
-			else if (g[f] > rho1)  pg -= rho1;
-			else                   pg  = 0.0;
+			     if (x[f] < -EPSILON) pg -= rho1;
+			else if (x[f] >  EPSILON) pg += rho1;
+			else if (g[f] < -rho1)    pg += rho1;
+			else if (g[f] >  rho1)    pg -= rho1;
+			else                      pg  = 0.0;
 		} else if (l1 && sqr(g[f] + rho1 * sign(x[f])) < sqr(rho1)) {
 			if (x[f] == 0.0 || (   gp[f] * g[f] < 0.0
 			                    && xp[f] * x[f] < 0.0)) {
@@ -108,51 +114,77 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
 				continue;
 			}
 		}
+		const double sgp = sign(gp[f]);
+		const double spg = sign(pg);
 		// Next we adjust the step depending of the new and
 		// previous gradient values.
-		if (gp[f] * pg > 0.0)
+		if (sgp * spg > 0.0)
 			stp[f] = min(stp[f] * stpinc, stpmax);
-		else if (gp[f] * pg < 0.0)
+		else if (sgp * spg < 0.0)
 			stp[f] = max(stp[f] * stpdec, stpmin);
 		// Finally update the weight. if there is l1 penalty
 		// and the pseudo gradient projection is used, we have to
 		// project back the update in the choosen orthant.
-		if (!wbt || gp[f] * pg > 0.0) {
+		if (!wbt || sgp * spg > 0.0) {
 			double dlt = stp[f] * -sign(g[f]);
-			if (l1 == 1 && dlt * pg >= 0.0)
+			if (l1 == 1 && dlt * spg >= 0.0)
 				dlt = 0.0;
 			if (wbt)
 				xp[f] = x[f];
 			x[f] += dlt;
-		} else if (gp[f] * pg < 0.0) {
+		} else if (sgp * spg < -0.0) {
 			x[f] = xp[f];
 			g[f] = 0.0;
 		} else {
 			xp[f] = x[f];
 			if (l1 != 1)
-				x[f] += stp[f] * -sign(pg);
+				x[f] += stp[f] * -spg;
 		}
 		gp[f] = g[f];
 	}
 }
 void trn_rprop(mdl_t *mdl) {
-	const size_t F   = mdl->nftr;
-	const int    K   = mdl->opt->maxiter;
-	const size_t W   = mdl->opt->nthread;
-	const bool   wbt = strcmp(mdl->opt->algo, "rprop-");
-	const int    cut = mdl->opt->rprop.cutoff;
+	const uint64_t F   = mdl->nftr;
+	const uint32_t K   = mdl->opt->maxiter;
+	const uint32_t W   = mdl->opt->nthread;
+	const bool     wbt = strcmp(mdl->opt->algo, "rprop-");
+	const int      cut = mdl->opt->rprop.cutoff;
 	// Allocate state memory and initialize it
 	double *xp  = NULL,       *stp = xvm_new(F);
 	double *g   = xvm_new(F), *gp  = xvm_new(F);
 	if (wbt && !cut)
 		xp = xvm_new(F);
-	for (unsigned f = 0; f < F; f++) {
+	for (uint64_t f = 0; f < F; f++) {
 		if (wbt && !cut)
 			xp[f]  = 0.0;
 		gp[f]  = 0.0;
 		stp[f] = 0.1;
 	}
+	// Restore a saved state if given by the user
+	if (mdl->opt->rstate != NULL) {
+		const char *err = "invalid state file";
+		FILE *file = fopen(mdl->opt->rstate, "r");
+		if (file == NULL)
+			fatal("failed to open input state file");
+		int type;
+		uint64_t nftr;
+		if (fscanf(file, "#state#%d#%"SCNu64"\n", &type, &nftr) != 2)
+			fatal(err);
+		if (type != 3)
+			fatal("state is not for rprop model");
+		for (uint64_t i = 0; i < nftr; i++) {
+			uint64_t f;
+			double vxp, vstp, vgp;
+			if (fscanf(file, "%"PRIu64" %la %la %la\n", &f, &vxp,
+					&vstp, &vgp) != 4)
+				fatal(err);
+			if (wbt && !cut) xp[f] = vxp;
+			gp[f] = vgp;
+			stp[f] = vstp;
+		}
+		fclose(file);
+	}
 	// Prepare the rprop state used to send information to the rprop worker
 	// about updating weight using the gradient.
 	rprop_t *st = wapiti_xmalloc(sizeof(rprop_t));
@@ -160,32 +192,41 @@ void trn_rprop(mdl_t *mdl) {
 	st->xp  = xp;  st->stp = stp;
 	st->g   = g;   st->gp  = gp;
 	rprop_t *rprop[W];
-	for (size_t w = 0; w < W; w++)
+	for (uint32_t w = 0; w < W; w++)
 		rprop[w] = st;
 	// Prepare the gradient state for the distributed gradient computation.
-	grd_t *grds[W];
-	grds[0] = grd_new(mdl, g);
-	for (size_t w = 1; w < W; w++)
-		grds[w] = grd_new(mdl, xvm_new(F));
+	grd_t *grd = grd_new(mdl, g);
 	// And iterate the gradient computation / weight update process until
 	// convergence or stop request
-	for (int k = 0; !uit_stop && k < K; k++) {
-		double fx = grd_gradient(mdl, g, grds);
+	for (uint32_t k = 0; !uit_stop && k < K; k++) {
+		double fx = grd_gradient(grd);
 		if (uit_stop)
 			break;
 		mth_spawn((func_t *)trn_rpropsub, W, (void **)rprop, 0, 0);
 		if (uit_progress(mdl, k + 1, fx) == false)
 			break;
 	}
+	// Save state if user requested it
+	if (mdl->opt->sstate != NULL) {
+		FILE *file = fopen(mdl->opt->sstate, "w");
+		if (file == NULL)
+			fatal("failed to open output state file");
+		fprintf(file, "#state#3#%"PRIu64"\n", F);
+		for (uint64_t f = 0; f < F; f++) {
+			double vxp = xp != NULL ? xp[f] : 0.0;
+			double vstp = stp[f], vgp = gp[f];
+			fprintf(file, "%"PRIu64" ", f);
+			fprintf(file, "%la %la %la\n", vxp, vstp, vgp);
+		}
+		fclose(file);
+	}
 	// Free all allocated memory
 	if (wbt && !cut)
 		xvm_free(xp);
 	xvm_free(g);
 	xvm_free(gp);
-	for (size_t w = 1; w < W; w++)
-		xvm_free(grds[w]->g);
-	for (size_t w = 0; w < W; w++)
-		grd_free(grds[w]);
+	xvm_free(stp);
+	grd_free(grd);
 	free(st);
 }

data/ext/wapiti/sequence.h CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,8 @@
 #define sequence_h
 #include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
 #include "wapiti.h"
@@ -72,8 +74,8 @@
  */
 typedef struct raw_s raw_t;
 struct raw_s {
-	int   len;      //   T     Sequence length
-	char *lines[];  //  [T]    Raw lines directly from file
+	uint32_t  len;      //   T     Sequence length
+	char     *lines[];  //  [T]    Raw lines directly from file
 };
 /* tok_t:
@@ -91,10 +93,10 @@ struct raw_s {
  */
 typedef struct tok_s tok_t;
 struct tok_s {
-	int    len;     //   T     Sequence length
-	char **lbl;     //  [T]    List of labels strings
-	int   *cnts;    //  [T]    Length of tokens lists
-	char **toks[];  //  [T][]  Tokens lists
+	uint32_t   len;     //   T     Sequence length
+	char     **lbl;     //  [T]    List of labels strings
+	uint32_t  *cnts;    //  [T]    Length of tokens lists
+	char     **toks[];  //  [T][]  Tokens lists
 };
 /* seq_t:
@@ -121,12 +123,12 @@ struct tok_s {
 typedef struct pos_s pos_t;
 typedef struct seq_s seq_t;
 struct seq_s {
-	int     len;
-	size_t *raw;
+	uint32_t  len;
+	uint64_t *raw;
 	struct pos_s {
-		size_t  lbl;
-		size_t  ucnt,  bcnt;
-		size_t *uobs, *bobs;
+		uint32_t  lbl;
+		uint32_t  ucnt,  bcnt;
+		uint64_t *uobs, *bobs;
 	} pos[];
 };
@@ -139,10 +141,10 @@ struct seq_s {
  */
 typedef struct dat_s dat_t;
 struct dat_s {
-	bool     lbl;   //         True iff sequences are labelled
-	int      mlen;  //         Length of the longest sequence in the set
-	size_t   nseq;  //   S     Number of sequences in the set
-	seq_t  **seq;   //  [S]    List of sequences
+	bool       lbl;   //         True iff sequences are labelled
+	uint32_t   mlen;  //         Length of the longest sequence in the set
+	uint32_t   nseq;  //   S     Number of sequences in the set
+	seq_t    **seq;   //  [S]    List of sequences
 };
 #endif

data/ext/wapiti/sgdl1.c CHANGED

@@ -1,7 +1,7 @@
 /*
  *      Wapiti - A linear-chain CRF tool
  *
- * Copyright (c) 2009-2011  CNRS
+ * Copyright (c) 2009-2013  CNRS
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
 #include <math.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdint.h>
 #include <stdlib.h>
 #include <string.h>
@@ -51,8 +52,8 @@
  *       the AFNLP, pages 477-485, August 2009
  ******************************************************************************/
 typedef struct sgd_idx_s {
-	size_t *uobs;
-	size_t *bobs;
+	uint64_t *uobs;
+	uint64_t *bobs;
 } sgd_idx_t;
 /* applypenalty:
@@ -75,11 +76,11 @@ typedef struct sgd_idx_s {
  *   Add the <new> value in the array <obs> of size <cnt>. If the value is
  *   already present, we do nothing, else we add it.
  */
-static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
+static void sgd_add(uint64_t *obs, uint32_t *cnt, uint64_t new) {
 	// First check if value is already in the array, we do a linear probing
 	// as it is simpler and since these array will be very short in
 	// practice, it's efficient enough.
-	for (size_t p = 0; p < *cnt; p++)
+	for (uint32_t p = 0; p < *cnt; p++)
 		if (obs[p] == new)
 			return;
 	// Insert the new value at the end since we have not found it.
@@ -91,13 +92,13 @@ static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
  *   Train the model with the SGD-l1 algorithm described by tsurukoa et al.
  */
 void trn_sgdl1(mdl_t *mdl) {
-	const size_t  Y = mdl->nlbl;
-	const size_t  F = mdl->nftr;
-	const int     U = mdl->reader->nuni;
-	const int     B = mdl->reader->nbi;
-	const int     S = mdl->train->nseq;
-	const int     K = mdl->opt->maxiter;
-	      double *w = mdl->theta;
+	const uint64_t  Y = mdl->nlbl;
+	const uint64_t  F = mdl->nftr;
+	const uint32_t  U = mdl->reader->nuni;
+	const uint32_t  B = mdl->reader->nbi;
+	const uint32_t  S = mdl->train->nseq;
+	const uint32_t  K = mdl->opt->maxiter;
+	      double   *w = mdl->theta;
 	// First we have to build and index who hold, for each sequences, the
 	// list of actives observations.
 	// The index is a simple table indexed by sequences number. Each entry
@@ -105,24 +106,25 @@ void trn_sgdl1(mdl_t *mdl) {
 	// unigrams obss and one for bigrams obss.
 	info("    - Build the index\n");
 	sgd_idx_t *idx  = wapiti_xmalloc(sizeof(sgd_idx_t) * S);
-	for (int s = 0; s < S; s++) {
+	for (uint32_t s = 0; s < S; s++) {
 		const seq_t *seq = mdl->train->seq[s];
-		const int T = seq->len;
-		size_t uobs[U * T + 1], ucnt = 0;
-		size_t bobs[B * T + 1], bcnt = 0;
-		for (int t = 0; t < seq->len; t++) {
+		const uint32_t T = seq->len;
+		uint64_t uobs[U * T + 1];
+		uint64_t bobs[B * T + 1];
+		uint32_t ucnt = 0, bcnt = 0;
+		for (uint32_t t = 0; t < seq->len; t++) {
 			const pos_t *pos = &seq->pos[t];
-			for (size_t p = 0; p < pos->ucnt; p++)
+			for (uint32_t p = 0; p < pos->ucnt; p++)
 				sgd_add(uobs, &ucnt, pos->uobs[p]);
-			for (size_t p = 0; p < pos->bcnt; p++)
+			for (uint32_t p = 0; p < pos->bcnt; p++)
 				sgd_add(bobs, &bcnt, pos->bobs[p]);
 		}
 		uobs[ucnt++] = none;
 		bobs[bcnt++] = none;
-		idx[s].uobs = wapiti_xmalloc(sizeof(size_t) * ucnt);
-		idx[s].bobs = wapiti_xmalloc(sizeof(size_t) * bcnt);
-		memcpy(idx[s].uobs, uobs, ucnt * sizeof(size_t));
-		memcpy(idx[s].bobs, bobs, bcnt * sizeof(size_t));
+		idx[s].uobs = wapiti_xmalloc(sizeof(uint64_t) * ucnt);
+		idx[s].bobs = wapiti_xmalloc(sizeof(uint64_t) * bcnt);
+		memcpy(idx[s].uobs, uobs, ucnt * sizeof(uint64_t));
+		memcpy(idx[s].bobs, bobs, bcnt * sizeof(uint64_t));
 	}
 	info("      Done\n");
 	// We will process sequences in random order in each iteration, so we
@@ -137,34 +139,34 @@ void trn_sgdl1(mdl_t *mdl) {
 	// time.
 	// We also need an aditional vector named <q> who hold the penalty
 	// already applied to each features.
-	int *perm = wapiti_xmalloc(sizeof(int) * S);
-	for (int s = 0; s < S; s++)
+	uint32_t *perm = wapiti_xmalloc(sizeof(uint32_t) * S);
+	for (uint32_t s = 0; s < S; s++)
 		perm[s] = s;
 	double *g = wapiti_xmalloc(sizeof(double) * F);
 	double *q = wapiti_xmalloc(sizeof(double) * F);
-	for (size_t f = 0; f < F; f++)
+	for (uint64_t f = 0; f < F; f++)
 		g[f] = q[f] = 0.0;
 	// We can now start training the model, we perform the requested number
 	// of iteration, each of these going through all the sequences. For
 	// computing the decay, we will need to keep track of the number of
 	// already processed sequences, this is tracked by the <i> variable.
 	double u = 0.0;
-	grd_t *grd = grd_new(mdl, g);
-	for (int k = 0, i = 0; k < K && !uit_stop; k++) {
+	grd_st_t *grd_st = grd_stnew(mdl, g);
+	for (uint32_t k = 0, i = 0; k < K && !uit_stop; k++) {
 		// First we shuffle the sequence by making a lot of random swap
 		// of entry in the permutation index.
-		for (int s = 0; s < S; s++) {
-			const int a = rand() % S;
-			const int b = rand() % S;
-			const int t = perm[a];
+		for (uint32_t s = 0; s < S; s++) {
+			const uint32_t a = rand() % S;
+			const uint32_t b = rand() % S;
+			const uint32_t t = perm[a];
 			perm[a] = perm[b];
 			perm[b] = t;
 		}
 		// And so, we can process sequence in a random order
-		for (int sp = 0; sp < S && !uit_stop; sp++, i++) {
-			const int s = perm[sp];
+		for (uint32_t sp = 0; sp < S && !uit_stop; sp++, i++) {
+			const uint32_t s = perm[sp];
 			const seq_t *seq = mdl->train->seq[s];
-			grd_dospl(grd, seq);
+			grd_dospl(grd_st, seq);
 			// Before applying the gradient, we have to compute the
 			// learning rate to apply to this sequence. For this we
 			// use an exponential decay [1, pp 481(5)]
@@ -180,17 +182,17 @@ void trn_sgdl1(mdl_t *mdl) {
 			// observations actives in the current sequence. We must
 			// not forget to clear the gradient for the next
 			// sequence.
-			for (size_t n = 0; idx[s].uobs[n] != none; n++) {
-				size_t f = mdl->uoff[idx[s].uobs[n]];
-				for (size_t y = 0; y < Y; y++, f++) {
+			for (uint32_t n = 0; idx[s].uobs[n] != none; n++) {
+				uint64_t f = mdl->uoff[idx[s].uobs[n]];
+				for (uint32_t y = 0; y < Y; y++, f++) {
 					w[f] -= nk * g[f];
 					applypenalty(f);
 					g[f] = 0.0;
 				}
 			}
-			for (size_t n = 0; idx[s].bobs[n] != none; n++) {
-				size_t f = mdl->boff[idx[s].bobs[n]];
-				for (size_t d = 0; d < Y * Y; d++, f++) {
+			for (uint32_t n = 0; idx[s].bobs[n] != none; n++) {
+				uint64_t f = mdl->boff[idx[s].bobs[n]];
+				for (uint32_t d = 0; d < Y * Y; d++, f++) {
 					w[f] -= nk * g[f];
 					applypenalty(f);
 					g[f] = 0.0;
@@ -203,9 +205,9 @@ void trn_sgdl1(mdl_t *mdl) {
 		if (!uit_progress(mdl, k + 1, -1.0))
 			break;
 	}
-	grd_free(grd);
+	grd_stfree(grd_st);
 	// Cleanup allocated memory before returning
-	for (int s = 0; s < S; s++) {
+	for (uint32_t s = 0; s < S; s++) {
 		free(idx[s].uobs);
 		free(idx[s].bobs);
 	}