wapiti 0.0.5 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.simplecov +3 -0
  3. data/Gemfile +25 -2
  4. data/HISTORY.md +5 -1
  5. data/LICENSE +14 -13
  6. data/README.md +9 -16
  7. data/Rakefile +38 -8
  8. data/ext/wapiti/bcd.c +126 -124
  9. data/ext/wapiti/decoder.c +203 -124
  10. data/ext/wapiti/decoder.h +6 -4
  11. data/ext/wapiti/extconf.rb +2 -2
  12. data/ext/wapiti/gradient.c +491 -320
  13. data/ext/wapiti/gradient.h +52 -34
  14. data/ext/wapiti/lbfgs.c +74 -33
  15. data/ext/wapiti/model.c +47 -37
  16. data/ext/wapiti/model.h +22 -20
  17. data/ext/wapiti/native.c +850 -839
  18. data/ext/wapiti/native.h +1 -1
  19. data/ext/wapiti/options.c +52 -20
  20. data/ext/wapiti/options.h +37 -30
  21. data/ext/wapiti/pattern.c +35 -33
  22. data/ext/wapiti/pattern.h +12 -11
  23. data/ext/wapiti/progress.c +14 -13
  24. data/ext/wapiti/progress.h +3 -2
  25. data/ext/wapiti/quark.c +14 -16
  26. data/ext/wapiti/quark.h +6 -5
  27. data/ext/wapiti/reader.c +83 -69
  28. data/ext/wapiti/reader.h +11 -9
  29. data/ext/wapiti/rprop.c +84 -43
  30. data/ext/wapiti/sequence.h +18 -16
  31. data/ext/wapiti/sgdl1.c +45 -43
  32. data/ext/wapiti/thread.c +19 -17
  33. data/ext/wapiti/thread.h +5 -4
  34. data/ext/wapiti/tools.c +7 -7
  35. data/ext/wapiti/tools.h +3 -4
  36. data/ext/wapiti/trainers.h +1 -1
  37. data/ext/wapiti/vmath.c +40 -38
  38. data/ext/wapiti/vmath.h +12 -11
  39. data/ext/wapiti/wapiti.c +159 -37
  40. data/ext/wapiti/wapiti.h +18 -4
  41. data/lib/wapiti.rb +15 -15
  42. data/lib/wapiti/errors.rb +15 -15
  43. data/lib/wapiti/model.rb +92 -84
  44. data/lib/wapiti/options.rb +123 -124
  45. data/lib/wapiti/utility.rb +14 -14
  46. data/lib/wapiti/version.rb +2 -2
  47. data/spec/spec_helper.rb +29 -9
  48. data/spec/wapiti/model_spec.rb +230 -194
  49. data/spec/wapiti/native_spec.rb +7 -8
  50. data/spec/wapiti/options_spec.rb +184 -174
  51. data/wapiti.gemspec +22 -8
  52. metadata +38 -42
  53. data/.gitignore +0 -5
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -45,16 +45,16 @@
45
45
  */
46
46
  typedef struct rdr_s rdr_t;
47
47
  struct rdr_s {
48
- bool maxent; // Is this a maxent reader
49
- int npats; // P Total number of patterns
50
- int nuni, nbi; // Number of unigram and bigram patterns
51
- int ntoks; // Expected number of tokens in input
52
- pat_t **pats; // [P] List of precompiled patterns
53
- qrk_t *lbl; // Labels database
54
- qrk_t *obs; // Observation database
48
+ bool autouni; // Automatically add 'u' prefix
49
+ uint32_t npats; // P Total number of patterns
50
+ uint32_t nuni, nbi; // Number of unigram and bigram patterns
51
+ uint32_t ntoks; // Expected number of tokens in input
52
+ pat_t **pats; // [P] List of precompiled patterns
53
+ qrk_t *lbl; // Labels database
54
+ qrk_t *obs; // Observation database
55
55
  };
56
56
 
57
- rdr_t *rdr_new(bool maxent);
57
+ rdr_t *rdr_new(bool autouni);
58
58
  void rdr_free(rdr_t *rdr);
59
59
  void rdr_freeraw(raw_t *raw);
60
60
  void rdr_freeseq(seq_t *seq);
@@ -69,5 +69,7 @@ dat_t *rdr_readdat(rdr_t *rdr, FILE *file, bool lbl);
69
69
  void rdr_load(rdr_t *rdr, FILE *file);
70
70
  void rdr_save(const rdr_t *rdr, FILE *file);
71
71
 
72
+ char *rdr_readline(FILE *file);
73
+
72
74
  #endif
73
75
 
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -24,9 +24,13 @@
24
24
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
25
  * POSSIBILITY OF SUCH DAMAGE.
26
26
  */
27
+ #include <inttypes.h>
28
+ #include <float.h>
27
29
  #include <math.h>
28
30
  #include <stdbool.h>
29
31
  #include <stddef.h>
32
+ #include <stdint.h>
33
+ #include <stdio.h>
30
34
  #include <stdlib.h>
31
35
  #include <string.h>
32
36
 
@@ -39,7 +43,9 @@
39
43
  #include "thread.h"
40
44
  #include "vmath.h"
41
45
 
42
- #define sign(v) ((v) < 0.0 ? -1.0 : ((v) > 0.0 ? 1.0 : 0.0))
46
+ #define EPSILON (DBL_EPSILON * 64.0)
47
+
48
+ #define sign(v) ((v) < -EPSILON ? -1.0 : ((v) > EPSILON ? 1.0 : 0.0))
43
49
  #define sqr(v) ((v) * (v))
44
50
 
45
51
  /******************************************************************************
@@ -58,7 +64,7 @@
58
64
  ******************************************************************************/
59
65
  typedef struct rprop_s rprop_t;
60
66
  struct rprop_s {
61
- mdl_t *mdl;
67
+ mdl_t *mdl;
62
68
  double *xp;
63
69
  double *stp;
64
70
  double *g;
@@ -71,33 +77,33 @@ struct rprop_s {
71
77
  * parameter given, the job scheduling system is not used here as we can
72
78
  * easily split processing in equals parts.
73
79
  */
74
- static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
80
+ static void trn_rpropsub(job_t *job, uint32_t id, uint32_t cnt, rprop_t *st) {
75
81
  unused(job);
76
82
  mdl_t *mdl = st->mdl;
77
- const size_t F = mdl->nftr;
78
- const double stpmin = mdl->opt->rprop.stpmin;
79
- const double stpmax = mdl->opt->rprop.stpmax;
80
- const double stpinc = mdl->opt->rprop.stpinc;
81
- const double stpdec = mdl->opt->rprop.stpdec;
82
- const bool wbt = strcmp(mdl->opt->algo, "rprop-");
83
- const double rho1 = mdl->opt->rho1;
84
- const int l1 = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
83
+ const uint64_t F = mdl->nftr;
84
+ const double stpmin = mdl->opt->rprop.stpmin;
85
+ const double stpmax = mdl->opt->rprop.stpmax;
86
+ const double stpinc = mdl->opt->rprop.stpinc;
87
+ const double stpdec = mdl->opt->rprop.stpdec;
88
+ const bool wbt = strcmp(mdl->opt->algo, "rprop-");
89
+ const double rho1 = mdl->opt->rho1;
90
+ const int l1 = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
85
91
  double *x = mdl->theta;
86
92
  double *xp = st->xp, *stp = st->stp;
87
93
  double *g = st->g, *gp = st->gp;
88
- const size_t from = F * id / cnt;
89
- const size_t to = F * (id + 1) / cnt;
90
- for (size_t f = from; f < to; f++) {
94
+ const uint64_t from = F * id / cnt;
95
+ const uint64_t to = F * (id + 1) / cnt;
96
+ for (uint64_t f = from; f < to; f++) {
91
97
  double pg = g[f];
92
98
  // If there is a l1 component in the regularization component,
93
99
  // we either project the gradient in the current orthant or
94
100
  // check for cutdown depending on the projection scheme wanted.
95
101
  if (l1 == 1) {
96
- if (x[f] < 0.0) pg -= rho1;
97
- else if (x[f] > 0.0) pg += rho1;
98
- else if (g[f] < -rho1) pg += rho1;
99
- else if (g[f] > rho1) pg -= rho1;
100
- else pg = 0.0;
102
+ if (x[f] < -EPSILON) pg -= rho1;
103
+ else if (x[f] > EPSILON) pg += rho1;
104
+ else if (g[f] < -rho1) pg += rho1;
105
+ else if (g[f] > rho1) pg -= rho1;
106
+ else pg = 0.0;
101
107
  } else if (l1 && sqr(g[f] + rho1 * sign(x[f])) < sqr(rho1)) {
102
108
  if (x[f] == 0.0 || ( gp[f] * g[f] < 0.0
103
109
  && xp[f] * x[f] < 0.0)) {
@@ -108,51 +114,77 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
108
114
  continue;
109
115
  }
110
116
  }
117
+ const double sgp = sign(gp[f]);
118
+ const double spg = sign(pg);
111
119
  // Next we adjust the step depending of the new and
112
120
  // previous gradient values.
113
- if (gp[f] * pg > 0.0)
121
+ if (sgp * spg > 0.0)
114
122
  stp[f] = min(stp[f] * stpinc, stpmax);
115
- else if (gp[f] * pg < 0.0)
123
+ else if (sgp * spg < 0.0)
116
124
  stp[f] = max(stp[f] * stpdec, stpmin);
117
125
  // Finally update the weight. if there is l1 penalty
118
126
  // and the pseudo gradient projection is used, we have to
119
127
  // project back the update in the choosen orthant.
120
- if (!wbt || gp[f] * pg > 0.0) {
128
+ if (!wbt || sgp * spg > 0.0) {
121
129
  double dlt = stp[f] * -sign(g[f]);
122
- if (l1 == 1 && dlt * pg >= 0.0)
130
+ if (l1 == 1 && dlt * spg >= 0.0)
123
131
  dlt = 0.0;
124
132
  if (wbt)
125
133
  xp[f] = x[f];
126
134
  x[f] += dlt;
127
- } else if (gp[f] * pg < 0.0) {
135
+ } else if (sgp * spg < -0.0) {
128
136
  x[f] = xp[f];
129
137
  g[f] = 0.0;
130
138
  } else {
131
139
  xp[f] = x[f];
132
140
  if (l1 != 1)
133
- x[f] += stp[f] * -sign(pg);
141
+ x[f] += stp[f] * -spg;
134
142
  }
135
143
  gp[f] = g[f];
136
144
  }
137
145
  }
138
146
 
139
147
  void trn_rprop(mdl_t *mdl) {
140
- const size_t F = mdl->nftr;
141
- const int K = mdl->opt->maxiter;
142
- const size_t W = mdl->opt->nthread;
143
- const bool wbt = strcmp(mdl->opt->algo, "rprop-");
144
- const int cut = mdl->opt->rprop.cutoff;
148
+ const uint64_t F = mdl->nftr;
149
+ const uint32_t K = mdl->opt->maxiter;
150
+ const uint32_t W = mdl->opt->nthread;
151
+ const bool wbt = strcmp(mdl->opt->algo, "rprop-");
152
+ const int cut = mdl->opt->rprop.cutoff;
145
153
  // Allocate state memory and initialize it
146
154
  double *xp = NULL, *stp = xvm_new(F);
147
155
  double *g = xvm_new(F), *gp = xvm_new(F);
148
156
  if (wbt && !cut)
149
157
  xp = xvm_new(F);
150
- for (unsigned f = 0; f < F; f++) {
158
+ for (uint64_t f = 0; f < F; f++) {
151
159
  if (wbt && !cut)
152
160
  xp[f] = 0.0;
153
161
  gp[f] = 0.0;
154
162
  stp[f] = 0.1;
155
163
  }
164
+ // Restore a saved state if given by the user
165
+ if (mdl->opt->rstate != NULL) {
166
+ const char *err = "invalid state file";
167
+ FILE *file = fopen(mdl->opt->rstate, "r");
168
+ if (file == NULL)
169
+ fatal("failed to open input state file");
170
+ int type;
171
+ uint64_t nftr;
172
+ if (fscanf(file, "#state#%d#%"SCNu64"\n", &type, &nftr) != 2)
173
+ fatal(err);
174
+ if (type != 3)
175
+ fatal("state is not for rprop model");
176
+ for (uint64_t i = 0; i < nftr; i++) {
177
+ uint64_t f;
178
+ double vxp, vstp, vgp;
179
+ if (fscanf(file, "%"PRIu64" %la %la %la\n", &f, &vxp,
180
+ &vstp, &vgp) != 4)
181
+ fatal(err);
182
+ if (wbt && !cut) xp[f] = vxp;
183
+ gp[f] = vgp;
184
+ stp[f] = vstp;
185
+ }
186
+ fclose(file);
187
+ }
156
188
  // Prepare the rprop state used to send information to the rprop worker
157
189
  // about updating weight using the gradient.
158
190
  rprop_t *st = wapiti_xmalloc(sizeof(rprop_t));
@@ -160,32 +192,41 @@ void trn_rprop(mdl_t *mdl) {
160
192
  st->xp = xp; st->stp = stp;
161
193
  st->g = g; st->gp = gp;
162
194
  rprop_t *rprop[W];
163
- for (size_t w = 0; w < W; w++)
195
+ for (uint32_t w = 0; w < W; w++)
164
196
  rprop[w] = st;
165
197
  // Prepare the gradient state for the distributed gradient computation.
166
- grd_t *grds[W];
167
- grds[0] = grd_new(mdl, g);
168
- for (size_t w = 1; w < W; w++)
169
- grds[w] = grd_new(mdl, xvm_new(F));
198
+ grd_t *grd = grd_new(mdl, g);
170
199
  // And iterate the gradient computation / weight update process until
171
200
  // convergence or stop request
172
- for (int k = 0; !uit_stop && k < K; k++) {
173
- double fx = grd_gradient(mdl, g, grds);
201
+ for (uint32_t k = 0; !uit_stop && k < K; k++) {
202
+ double fx = grd_gradient(grd);
174
203
  if (uit_stop)
175
204
  break;
176
205
  mth_spawn((func_t *)trn_rpropsub, W, (void **)rprop, 0, 0);
177
206
  if (uit_progress(mdl, k + 1, fx) == false)
178
207
  break;
179
208
  }
209
+ // Save state if user requested it
210
+ if (mdl->opt->sstate != NULL) {
211
+ FILE *file = fopen(mdl->opt->sstate, "w");
212
+ if (file == NULL)
213
+ fatal("failed to open output state file");
214
+ fprintf(file, "#state#3#%"PRIu64"\n", F);
215
+ for (uint64_t f = 0; f < F; f++) {
216
+ double vxp = xp != NULL ? xp[f] : 0.0;
217
+ double vstp = stp[f], vgp = gp[f];
218
+ fprintf(file, "%"PRIu64" ", f);
219
+ fprintf(file, "%la %la %la\n", vxp, vstp, vgp);
220
+ }
221
+ fclose(file);
222
+ }
180
223
  // Free all allocated memory
181
224
  if (wbt && !cut)
182
225
  xvm_free(xp);
183
226
  xvm_free(g);
184
227
  xvm_free(gp);
185
- for (size_t w = 1; w < W; w++)
186
- xvm_free(grds[w]->g);
187
- for (size_t w = 0; w < W; w++)
188
- grd_free(grds[w]);
228
+ xvm_free(stp);
229
+ grd_free(grd);
189
230
  free(st);
190
231
  }
191
232
 
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,8 @@
29
29
  #define sequence_h
30
30
 
31
31
  #include <stddef.h>
32
+ #include <stdint.h>
33
+ #include <stdbool.h>
32
34
 
33
35
  #include "wapiti.h"
34
36
 
@@ -72,8 +74,8 @@
72
74
  */
73
75
  typedef struct raw_s raw_t;
74
76
  struct raw_s {
75
- int len; // T Sequence length
76
- char *lines[]; // [T] Raw lines directly from file
77
+ uint32_t len; // T Sequence length
78
+ char *lines[]; // [T] Raw lines directly from file
77
79
  };
78
80
 
79
81
  /* tok_t:
@@ -91,10 +93,10 @@ struct raw_s {
91
93
  */
92
94
  typedef struct tok_s tok_t;
93
95
  struct tok_s {
94
- int len; // T Sequence length
95
- char **lbl; // [T] List of labels strings
96
- int *cnts; // [T] Length of tokens lists
97
- char **toks[]; // [T][] Tokens lists
96
+ uint32_t len; // T Sequence length
97
+ char **lbl; // [T] List of labels strings
98
+ uint32_t *cnts; // [T] Length of tokens lists
99
+ char **toks[]; // [T][] Tokens lists
98
100
  };
99
101
 
100
102
  /* seq_t:
@@ -121,12 +123,12 @@ struct tok_s {
121
123
  typedef struct pos_s pos_t;
122
124
  typedef struct seq_s seq_t;
123
125
  struct seq_s {
124
- int len;
125
- size_t *raw;
126
+ uint32_t len;
127
+ uint64_t *raw;
126
128
  struct pos_s {
127
- size_t lbl;
128
- size_t ucnt, bcnt;
129
- size_t *uobs, *bobs;
129
+ uint32_t lbl;
130
+ uint32_t ucnt, bcnt;
131
+ uint64_t *uobs, *bobs;
130
132
  } pos[];
131
133
  };
132
134
 
@@ -139,10 +141,10 @@ struct seq_s {
139
141
  */
140
142
  typedef struct dat_s dat_t;
141
143
  struct dat_s {
142
- bool lbl; // True iff sequences are labelled
143
- int mlen; // Length of the longest sequence in the set
144
- size_t nseq; // S Number of sequences in the set
145
- seq_t **seq; // [S] List of sequences
144
+ bool lbl; // True iff sequences are labelled
145
+ uint32_t mlen; // Length of the longest sequence in the set
146
+ uint32_t nseq; // S Number of sequences in the set
147
+ seq_t **seq; // [S] List of sequences
146
148
  };
147
149
 
148
150
  #endif
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
27
27
  #include <math.h>
28
28
  #include <stdbool.h>
29
29
  #include <stddef.h>
30
+ #include <stdint.h>
30
31
  #include <stdlib.h>
31
32
  #include <string.h>
32
33
 
@@ -51,8 +52,8 @@
51
52
  * the AFNLP, pages 477-485, August 2009
52
53
  ******************************************************************************/
53
54
  typedef struct sgd_idx_s {
54
- size_t *uobs;
55
- size_t *bobs;
55
+ uint64_t *uobs;
56
+ uint64_t *bobs;
56
57
  } sgd_idx_t;
57
58
 
58
59
  /* applypenalty:
@@ -75,11 +76,11 @@ typedef struct sgd_idx_s {
75
76
  * Add the <new> value in the array <obs> of size <cnt>. If the value is
76
77
  * already present, we do nothing, else we add it.
77
78
  */
78
- static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
79
+ static void sgd_add(uint64_t *obs, uint32_t *cnt, uint64_t new) {
79
80
  // First check if value is already in the array, we do a linear probing
80
81
  // as it is simpler and since these array will be very short in
81
82
  // practice, it's efficient enough.
82
- for (size_t p = 0; p < *cnt; p++)
83
+ for (uint32_t p = 0; p < *cnt; p++)
83
84
  if (obs[p] == new)
84
85
  return;
85
86
  // Insert the new value at the end since we have not found it.
@@ -91,13 +92,13 @@ static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
91
92
  * Train the model with the SGD-l1 algorithm described by tsurukoa et al.
92
93
  */
93
94
  void trn_sgdl1(mdl_t *mdl) {
94
- const size_t Y = mdl->nlbl;
95
- const size_t F = mdl->nftr;
96
- const int U = mdl->reader->nuni;
97
- const int B = mdl->reader->nbi;
98
- const int S = mdl->train->nseq;
99
- const int K = mdl->opt->maxiter;
100
- double *w = mdl->theta;
95
+ const uint64_t Y = mdl->nlbl;
96
+ const uint64_t F = mdl->nftr;
97
+ const uint32_t U = mdl->reader->nuni;
98
+ const uint32_t B = mdl->reader->nbi;
99
+ const uint32_t S = mdl->train->nseq;
100
+ const uint32_t K = mdl->opt->maxiter;
101
+ double *w = mdl->theta;
101
102
  // First we have to build and index who hold, for each sequences, the
102
103
  // list of actives observations.
103
104
  // The index is a simple table indexed by sequences number. Each entry
@@ -105,24 +106,25 @@ void trn_sgdl1(mdl_t *mdl) {
105
106
  // unigrams obss and one for bigrams obss.
106
107
  info(" - Build the index\n");
107
108
  sgd_idx_t *idx = wapiti_xmalloc(sizeof(sgd_idx_t) * S);
108
- for (int s = 0; s < S; s++) {
109
+ for (uint32_t s = 0; s < S; s++) {
109
110
  const seq_t *seq = mdl->train->seq[s];
110
- const int T = seq->len;
111
- size_t uobs[U * T + 1], ucnt = 0;
112
- size_t bobs[B * T + 1], bcnt = 0;
113
- for (int t = 0; t < seq->len; t++) {
111
+ const uint32_t T = seq->len;
112
+ uint64_t uobs[U * T + 1];
113
+ uint64_t bobs[B * T + 1];
114
+ uint32_t ucnt = 0, bcnt = 0;
115
+ for (uint32_t t = 0; t < seq->len; t++) {
114
116
  const pos_t *pos = &seq->pos[t];
115
- for (size_t p = 0; p < pos->ucnt; p++)
117
+ for (uint32_t p = 0; p < pos->ucnt; p++)
116
118
  sgd_add(uobs, &ucnt, pos->uobs[p]);
117
- for (size_t p = 0; p < pos->bcnt; p++)
119
+ for (uint32_t p = 0; p < pos->bcnt; p++)
118
120
  sgd_add(bobs, &bcnt, pos->bobs[p]);
119
121
  }
120
122
  uobs[ucnt++] = none;
121
123
  bobs[bcnt++] = none;
122
- idx[s].uobs = wapiti_xmalloc(sizeof(size_t) * ucnt);
123
- idx[s].bobs = wapiti_xmalloc(sizeof(size_t) * bcnt);
124
- memcpy(idx[s].uobs, uobs, ucnt * sizeof(size_t));
125
- memcpy(idx[s].bobs, bobs, bcnt * sizeof(size_t));
124
+ idx[s].uobs = wapiti_xmalloc(sizeof(uint64_t) * ucnt);
125
+ idx[s].bobs = wapiti_xmalloc(sizeof(uint64_t) * bcnt);
126
+ memcpy(idx[s].uobs, uobs, ucnt * sizeof(uint64_t));
127
+ memcpy(idx[s].bobs, bobs, bcnt * sizeof(uint64_t));
126
128
  }
127
129
  info(" Done\n");
128
130
  // We will process sequences in random order in each iteration, so we
@@ -137,34 +139,34 @@ void trn_sgdl1(mdl_t *mdl) {
137
139
  // time.
138
140
  // We also need an aditional vector named <q> who hold the penalty
139
141
  // already applied to each features.
140
- int *perm = wapiti_xmalloc(sizeof(int) * S);
141
- for (int s = 0; s < S; s++)
142
+ uint32_t *perm = wapiti_xmalloc(sizeof(uint32_t) * S);
143
+ for (uint32_t s = 0; s < S; s++)
142
144
  perm[s] = s;
143
145
  double *g = wapiti_xmalloc(sizeof(double) * F);
144
146
  double *q = wapiti_xmalloc(sizeof(double) * F);
145
- for (size_t f = 0; f < F; f++)
147
+ for (uint64_t f = 0; f < F; f++)
146
148
  g[f] = q[f] = 0.0;
147
149
  // We can now start training the model, we perform the requested number
148
150
  // of iteration, each of these going through all the sequences. For
149
151
  // computing the decay, we will need to keep track of the number of
150
152
  // already processed sequences, this is tracked by the <i> variable.
151
153
  double u = 0.0;
152
- grd_t *grd = grd_new(mdl, g);
153
- for (int k = 0, i = 0; k < K && !uit_stop; k++) {
154
+ grd_st_t *grd_st = grd_stnew(mdl, g);
155
+ for (uint32_t k = 0, i = 0; k < K && !uit_stop; k++) {
154
156
  // First we shuffle the sequence by making a lot of random swap
155
157
  // of entry in the permutation index.
156
- for (int s = 0; s < S; s++) {
157
- const int a = rand() % S;
158
- const int b = rand() % S;
159
- const int t = perm[a];
158
+ for (uint32_t s = 0; s < S; s++) {
159
+ const uint32_t a = rand() % S;
160
+ const uint32_t b = rand() % S;
161
+ const uint32_t t = perm[a];
160
162
  perm[a] = perm[b];
161
163
  perm[b] = t;
162
164
  }
163
165
  // And so, we can process sequence in a random order
164
- for (int sp = 0; sp < S && !uit_stop; sp++, i++) {
165
- const int s = perm[sp];
166
+ for (uint32_t sp = 0; sp < S && !uit_stop; sp++, i++) {
167
+ const uint32_t s = perm[sp];
166
168
  const seq_t *seq = mdl->train->seq[s];
167
- grd_dospl(grd, seq);
169
+ grd_dospl(grd_st, seq);
168
170
  // Before applying the gradient, we have to compute the
169
171
  // learning rate to apply to this sequence. For this we
170
172
  // use an exponential decay [1, pp 481(5)]
@@ -180,17 +182,17 @@ void trn_sgdl1(mdl_t *mdl) {
180
182
  // observations actives in the current sequence. We must
181
183
  // not forget to clear the gradient for the next
182
184
  // sequence.
183
- for (size_t n = 0; idx[s].uobs[n] != none; n++) {
184
- size_t f = mdl->uoff[idx[s].uobs[n]];
185
- for (size_t y = 0; y < Y; y++, f++) {
185
+ for (uint32_t n = 0; idx[s].uobs[n] != none; n++) {
186
+ uint64_t f = mdl->uoff[idx[s].uobs[n]];
187
+ for (uint32_t y = 0; y < Y; y++, f++) {
186
188
  w[f] -= nk * g[f];
187
189
  applypenalty(f);
188
190
  g[f] = 0.0;
189
191
  }
190
192
  }
191
- for (size_t n = 0; idx[s].bobs[n] != none; n++) {
192
- size_t f = mdl->boff[idx[s].bobs[n]];
193
- for (size_t d = 0; d < Y * Y; d++, f++) {
193
+ for (uint32_t n = 0; idx[s].bobs[n] != none; n++) {
194
+ uint64_t f = mdl->boff[idx[s].bobs[n]];
195
+ for (uint32_t d = 0; d < Y * Y; d++, f++) {
194
196
  w[f] -= nk * g[f];
195
197
  applypenalty(f);
196
198
  g[f] = 0.0;
@@ -203,9 +205,9 @@ void trn_sgdl1(mdl_t *mdl) {
203
205
  if (!uit_progress(mdl, k + 1, -1.0))
204
206
  break;
205
207
  }
206
- grd_free(grd);
208
+ grd_stfree(grd_st);
207
209
  // Cleanup allocated memory before returning
208
- for (int s = 0; s < S; s++) {
210
+ for (uint32_t s = 0; s < S; s++) {
209
211
  free(idx[s].uobs);
210
212
  free(idx[s].bobs);
211
213
  }