wapiti 0.0.5 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +7 -0
  2. data/.simplecov +3 -0
  3. data/Gemfile +25 -2
  4. data/HISTORY.md +5 -1
  5. data/LICENSE +14 -13
  6. data/README.md +9 -16
  7. data/Rakefile +38 -8
  8. data/ext/wapiti/bcd.c +126 -124
  9. data/ext/wapiti/decoder.c +203 -124
  10. data/ext/wapiti/decoder.h +6 -4
  11. data/ext/wapiti/extconf.rb +2 -2
  12. data/ext/wapiti/gradient.c +491 -320
  13. data/ext/wapiti/gradient.h +52 -34
  14. data/ext/wapiti/lbfgs.c +74 -33
  15. data/ext/wapiti/model.c +47 -37
  16. data/ext/wapiti/model.h +22 -20
  17. data/ext/wapiti/native.c +850 -839
  18. data/ext/wapiti/native.h +1 -1
  19. data/ext/wapiti/options.c +52 -20
  20. data/ext/wapiti/options.h +37 -30
  21. data/ext/wapiti/pattern.c +35 -33
  22. data/ext/wapiti/pattern.h +12 -11
  23. data/ext/wapiti/progress.c +14 -13
  24. data/ext/wapiti/progress.h +3 -2
  25. data/ext/wapiti/quark.c +14 -16
  26. data/ext/wapiti/quark.h +6 -5
  27. data/ext/wapiti/reader.c +83 -69
  28. data/ext/wapiti/reader.h +11 -9
  29. data/ext/wapiti/rprop.c +84 -43
  30. data/ext/wapiti/sequence.h +18 -16
  31. data/ext/wapiti/sgdl1.c +45 -43
  32. data/ext/wapiti/thread.c +19 -17
  33. data/ext/wapiti/thread.h +5 -4
  34. data/ext/wapiti/tools.c +7 -7
  35. data/ext/wapiti/tools.h +3 -4
  36. data/ext/wapiti/trainers.h +1 -1
  37. data/ext/wapiti/vmath.c +40 -38
  38. data/ext/wapiti/vmath.h +12 -11
  39. data/ext/wapiti/wapiti.c +159 -37
  40. data/ext/wapiti/wapiti.h +18 -4
  41. data/lib/wapiti.rb +15 -15
  42. data/lib/wapiti/errors.rb +15 -15
  43. data/lib/wapiti/model.rb +92 -84
  44. data/lib/wapiti/options.rb +123 -124
  45. data/lib/wapiti/utility.rb +14 -14
  46. data/lib/wapiti/version.rb +2 -2
  47. data/spec/spec_helper.rb +29 -9
  48. data/spec/wapiti/model_spec.rb +230 -194
  49. data/spec/wapiti/native_spec.rb +7 -8
  50. data/spec/wapiti/options_spec.rb +184 -174
  51. data/wapiti.gemspec +22 -8
  52. metadata +38 -42
  53. data/.gitignore +0 -5
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -45,16 +45,16 @@
45
45
  */
46
46
  typedef struct rdr_s rdr_t;
47
47
  struct rdr_s {
48
- bool maxent; // Is this a maxent reader
49
- int npats; // P Total number of patterns
50
- int nuni, nbi; // Number of unigram and bigram patterns
51
- int ntoks; // Expected number of tokens in input
52
- pat_t **pats; // [P] List of precompiled patterns
53
- qrk_t *lbl; // Labels database
54
- qrk_t *obs; // Observation database
48
+ bool autouni; // Automatically add 'u' prefix
49
+ uint32_t npats; // P Total number of patterns
50
+ uint32_t nuni, nbi; // Number of unigram and bigram patterns
51
+ uint32_t ntoks; // Expected number of tokens in input
52
+ pat_t **pats; // [P] List of precompiled patterns
53
+ qrk_t *lbl; // Labels database
54
+ qrk_t *obs; // Observation database
55
55
  };
56
56
 
57
- rdr_t *rdr_new(bool maxent);
57
+ rdr_t *rdr_new(bool autouni);
58
58
  void rdr_free(rdr_t *rdr);
59
59
  void rdr_freeraw(raw_t *raw);
60
60
  void rdr_freeseq(seq_t *seq);
@@ -69,5 +69,7 @@ dat_t *rdr_readdat(rdr_t *rdr, FILE *file, bool lbl);
69
69
  void rdr_load(rdr_t *rdr, FILE *file);
70
70
  void rdr_save(const rdr_t *rdr, FILE *file);
71
71
 
72
+ char *rdr_readline(FILE *file);
73
+
72
74
  #endif
73
75
 
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -24,9 +24,13 @@
24
24
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
25
  * POSSIBILITY OF SUCH DAMAGE.
26
26
  */
27
+ #include <inttypes.h>
28
+ #include <float.h>
27
29
  #include <math.h>
28
30
  #include <stdbool.h>
29
31
  #include <stddef.h>
32
+ #include <stdint.h>
33
+ #include <stdio.h>
30
34
  #include <stdlib.h>
31
35
  #include <string.h>
32
36
 
@@ -39,7 +43,9 @@
39
43
  #include "thread.h"
40
44
  #include "vmath.h"
41
45
 
42
- #define sign(v) ((v) < 0.0 ? -1.0 : ((v) > 0.0 ? 1.0 : 0.0))
46
+ #define EPSILON (DBL_EPSILON * 64.0)
47
+
48
+ #define sign(v) ((v) < -EPSILON ? -1.0 : ((v) > EPSILON ? 1.0 : 0.0))
43
49
  #define sqr(v) ((v) * (v))
44
50
 
45
51
  /******************************************************************************
@@ -58,7 +64,7 @@
58
64
  ******************************************************************************/
59
65
  typedef struct rprop_s rprop_t;
60
66
  struct rprop_s {
61
- mdl_t *mdl;
67
+ mdl_t *mdl;
62
68
  double *xp;
63
69
  double *stp;
64
70
  double *g;
@@ -71,33 +77,33 @@ struct rprop_s {
71
77
  * parameter given, the job scheduling system is not used here as we can
72
78
  * easily split processing in equals parts.
73
79
  */
74
- static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
80
+ static void trn_rpropsub(job_t *job, uint32_t id, uint32_t cnt, rprop_t *st) {
75
81
  unused(job);
76
82
  mdl_t *mdl = st->mdl;
77
- const size_t F = mdl->nftr;
78
- const double stpmin = mdl->opt->rprop.stpmin;
79
- const double stpmax = mdl->opt->rprop.stpmax;
80
- const double stpinc = mdl->opt->rprop.stpinc;
81
- const double stpdec = mdl->opt->rprop.stpdec;
82
- const bool wbt = strcmp(mdl->opt->algo, "rprop-");
83
- const double rho1 = mdl->opt->rho1;
84
- const int l1 = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
83
+ const uint64_t F = mdl->nftr;
84
+ const double stpmin = mdl->opt->rprop.stpmin;
85
+ const double stpmax = mdl->opt->rprop.stpmax;
86
+ const double stpinc = mdl->opt->rprop.stpinc;
87
+ const double stpdec = mdl->opt->rprop.stpdec;
88
+ const bool wbt = strcmp(mdl->opt->algo, "rprop-");
89
+ const double rho1 = mdl->opt->rho1;
90
+ const int l1 = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
85
91
  double *x = mdl->theta;
86
92
  double *xp = st->xp, *stp = st->stp;
87
93
  double *g = st->g, *gp = st->gp;
88
- const size_t from = F * id / cnt;
89
- const size_t to = F * (id + 1) / cnt;
90
- for (size_t f = from; f < to; f++) {
94
+ const uint64_t from = F * id / cnt;
95
+ const uint64_t to = F * (id + 1) / cnt;
96
+ for (uint64_t f = from; f < to; f++) {
91
97
  double pg = g[f];
92
98
  // If there is a l1 component in the regularization component,
93
99
  // we either project the gradient in the current orthant or
94
100
  // check for cutdown depending on the projection scheme wanted.
95
101
  if (l1 == 1) {
96
- if (x[f] < 0.0) pg -= rho1;
97
- else if (x[f] > 0.0) pg += rho1;
98
- else if (g[f] < -rho1) pg += rho1;
99
- else if (g[f] > rho1) pg -= rho1;
100
- else pg = 0.0;
102
+ if (x[f] < -EPSILON) pg -= rho1;
103
+ else if (x[f] > EPSILON) pg += rho1;
104
+ else if (g[f] < -rho1) pg += rho1;
105
+ else if (g[f] > rho1) pg -= rho1;
106
+ else pg = 0.0;
101
107
  } else if (l1 && sqr(g[f] + rho1 * sign(x[f])) < sqr(rho1)) {
102
108
  if (x[f] == 0.0 || ( gp[f] * g[f] < 0.0
103
109
  && xp[f] * x[f] < 0.0)) {
@@ -108,51 +114,77 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
108
114
  continue;
109
115
  }
110
116
  }
117
+ const double sgp = sign(gp[f]);
118
+ const double spg = sign(pg);
111
119
  // Next we adjust the step depending of the new and
112
120
  // previous gradient values.
113
- if (gp[f] * pg > 0.0)
121
+ if (sgp * spg > 0.0)
114
122
  stp[f] = min(stp[f] * stpinc, stpmax);
115
- else if (gp[f] * pg < 0.0)
123
+ else if (sgp * spg < 0.0)
116
124
  stp[f] = max(stp[f] * stpdec, stpmin);
117
125
  // Finally update the weight. if there is l1 penalty
118
126
  // and the pseudo gradient projection is used, we have to
119
127
  // project back the update in the choosen orthant.
120
- if (!wbt || gp[f] * pg > 0.0) {
128
+ if (!wbt || sgp * spg > 0.0) {
121
129
  double dlt = stp[f] * -sign(g[f]);
122
- if (l1 == 1 && dlt * pg >= 0.0)
130
+ if (l1 == 1 && dlt * spg >= 0.0)
123
131
  dlt = 0.0;
124
132
  if (wbt)
125
133
  xp[f] = x[f];
126
134
  x[f] += dlt;
127
- } else if (gp[f] * pg < 0.0) {
135
+ } else if (sgp * spg < -0.0) {
128
136
  x[f] = xp[f];
129
137
  g[f] = 0.0;
130
138
  } else {
131
139
  xp[f] = x[f];
132
140
  if (l1 != 1)
133
- x[f] += stp[f] * -sign(pg);
141
+ x[f] += stp[f] * -spg;
134
142
  }
135
143
  gp[f] = g[f];
136
144
  }
137
145
  }
138
146
 
139
147
  void trn_rprop(mdl_t *mdl) {
140
- const size_t F = mdl->nftr;
141
- const int K = mdl->opt->maxiter;
142
- const size_t W = mdl->opt->nthread;
143
- const bool wbt = strcmp(mdl->opt->algo, "rprop-");
144
- const int cut = mdl->opt->rprop.cutoff;
148
+ const uint64_t F = mdl->nftr;
149
+ const uint32_t K = mdl->opt->maxiter;
150
+ const uint32_t W = mdl->opt->nthread;
151
+ const bool wbt = strcmp(mdl->opt->algo, "rprop-");
152
+ const int cut = mdl->opt->rprop.cutoff;
145
153
  // Allocate state memory and initialize it
146
154
  double *xp = NULL, *stp = xvm_new(F);
147
155
  double *g = xvm_new(F), *gp = xvm_new(F);
148
156
  if (wbt && !cut)
149
157
  xp = xvm_new(F);
150
- for (unsigned f = 0; f < F; f++) {
158
+ for (uint64_t f = 0; f < F; f++) {
151
159
  if (wbt && !cut)
152
160
  xp[f] = 0.0;
153
161
  gp[f] = 0.0;
154
162
  stp[f] = 0.1;
155
163
  }
164
+ // Restore a saved state if given by the user
165
+ if (mdl->opt->rstate != NULL) {
166
+ const char *err = "invalid state file";
167
+ FILE *file = fopen(mdl->opt->rstate, "r");
168
+ if (file == NULL)
169
+ fatal("failed to open input state file");
170
+ int type;
171
+ uint64_t nftr;
172
+ if (fscanf(file, "#state#%d#%"SCNu64"\n", &type, &nftr) != 2)
173
+ fatal(err);
174
+ if (type != 3)
175
+ fatal("state is not for rprop model");
176
+ for (uint64_t i = 0; i < nftr; i++) {
177
+ uint64_t f;
178
+ double vxp, vstp, vgp;
179
+ if (fscanf(file, "%"PRIu64" %la %la %la\n", &f, &vxp,
180
+ &vstp, &vgp) != 4)
181
+ fatal(err);
182
+ if (wbt && !cut) xp[f] = vxp;
183
+ gp[f] = vgp;
184
+ stp[f] = vstp;
185
+ }
186
+ fclose(file);
187
+ }
156
188
  // Prepare the rprop state used to send information to the rprop worker
157
189
  // about updating weight using the gradient.
158
190
  rprop_t *st = wapiti_xmalloc(sizeof(rprop_t));
@@ -160,32 +192,41 @@ void trn_rprop(mdl_t *mdl) {
160
192
  st->xp = xp; st->stp = stp;
161
193
  st->g = g; st->gp = gp;
162
194
  rprop_t *rprop[W];
163
- for (size_t w = 0; w < W; w++)
195
+ for (uint32_t w = 0; w < W; w++)
164
196
  rprop[w] = st;
165
197
  // Prepare the gradient state for the distributed gradient computation.
166
- grd_t *grds[W];
167
- grds[0] = grd_new(mdl, g);
168
- for (size_t w = 1; w < W; w++)
169
- grds[w] = grd_new(mdl, xvm_new(F));
198
+ grd_t *grd = grd_new(mdl, g);
170
199
  // And iterate the gradient computation / weight update process until
171
200
  // convergence or stop request
172
- for (int k = 0; !uit_stop && k < K; k++) {
173
- double fx = grd_gradient(mdl, g, grds);
201
+ for (uint32_t k = 0; !uit_stop && k < K; k++) {
202
+ double fx = grd_gradient(grd);
174
203
  if (uit_stop)
175
204
  break;
176
205
  mth_spawn((func_t *)trn_rpropsub, W, (void **)rprop, 0, 0);
177
206
  if (uit_progress(mdl, k + 1, fx) == false)
178
207
  break;
179
208
  }
209
+ // Save state if user requested it
210
+ if (mdl->opt->sstate != NULL) {
211
+ FILE *file = fopen(mdl->opt->sstate, "w");
212
+ if (file == NULL)
213
+ fatal("failed to open output state file");
214
+ fprintf(file, "#state#3#%"PRIu64"\n", F);
215
+ for (uint64_t f = 0; f < F; f++) {
216
+ double vxp = xp != NULL ? xp[f] : 0.0;
217
+ double vstp = stp[f], vgp = gp[f];
218
+ fprintf(file, "%"PRIu64" ", f);
219
+ fprintf(file, "%la %la %la\n", vxp, vstp, vgp);
220
+ }
221
+ fclose(file);
222
+ }
180
223
  // Free all allocated memory
181
224
  if (wbt && !cut)
182
225
  xvm_free(xp);
183
226
  xvm_free(g);
184
227
  xvm_free(gp);
185
- for (size_t w = 1; w < W; w++)
186
- xvm_free(grds[w]->g);
187
- for (size_t w = 0; w < W; w++)
188
- grd_free(grds[w]);
228
+ xvm_free(stp);
229
+ grd_free(grd);
189
230
  free(st);
190
231
  }
191
232
 
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,8 @@
29
29
  #define sequence_h
30
30
 
31
31
  #include <stddef.h>
32
+ #include <stdint.h>
33
+ #include <stdbool.h>
32
34
 
33
35
  #include "wapiti.h"
34
36
 
@@ -72,8 +74,8 @@
72
74
  */
73
75
  typedef struct raw_s raw_t;
74
76
  struct raw_s {
75
- int len; // T Sequence length
76
- char *lines[]; // [T] Raw lines directly from file
77
+ uint32_t len; // T Sequence length
78
+ char *lines[]; // [T] Raw lines directly from file
77
79
  };
78
80
 
79
81
  /* tok_t:
@@ -91,10 +93,10 @@ struct raw_s {
91
93
  */
92
94
  typedef struct tok_s tok_t;
93
95
  struct tok_s {
94
- int len; // T Sequence length
95
- char **lbl; // [T] List of labels strings
96
- int *cnts; // [T] Length of tokens lists
97
- char **toks[]; // [T][] Tokens lists
96
+ uint32_t len; // T Sequence length
97
+ char **lbl; // [T] List of labels strings
98
+ uint32_t *cnts; // [T] Length of tokens lists
99
+ char **toks[]; // [T][] Tokens lists
98
100
  };
99
101
 
100
102
  /* seq_t:
@@ -121,12 +123,12 @@ struct tok_s {
121
123
  typedef struct pos_s pos_t;
122
124
  typedef struct seq_s seq_t;
123
125
  struct seq_s {
124
- int len;
125
- size_t *raw;
126
+ uint32_t len;
127
+ uint64_t *raw;
126
128
  struct pos_s {
127
- size_t lbl;
128
- size_t ucnt, bcnt;
129
- size_t *uobs, *bobs;
129
+ uint32_t lbl;
130
+ uint32_t ucnt, bcnt;
131
+ uint64_t *uobs, *bobs;
130
132
  } pos[];
131
133
  };
132
134
 
@@ -139,10 +141,10 @@ struct seq_s {
139
141
  */
140
142
  typedef struct dat_s dat_t;
141
143
  struct dat_s {
142
- bool lbl; // True iff sequences are labelled
143
- int mlen; // Length of the longest sequence in the set
144
- size_t nseq; // S Number of sequences in the set
145
- seq_t **seq; // [S] List of sequences
144
+ bool lbl; // True iff sequences are labelled
145
+ uint32_t mlen; // Length of the longest sequence in the set
146
+ uint32_t nseq; // S Number of sequences in the set
147
+ seq_t **seq; // [S] List of sequences
146
148
  };
147
149
 
148
150
  #endif
@@ -1,7 +1,7 @@
1
1
  /*
2
2
  * Wapiti - A linear-chain CRF tool
3
3
  *
4
- * Copyright (c) 2009-2011 CNRS
4
+ * Copyright (c) 2009-2013 CNRS
5
5
  * All rights reserved.
6
6
  *
7
7
  * Redistribution and use in source and binary forms, with or without
@@ -27,6 +27,7 @@
27
27
  #include <math.h>
28
28
  #include <stdbool.h>
29
29
  #include <stddef.h>
30
+ #include <stdint.h>
30
31
  #include <stdlib.h>
31
32
  #include <string.h>
32
33
 
@@ -51,8 +52,8 @@
51
52
  * the AFNLP, pages 477-485, August 2009
52
53
  ******************************************************************************/
53
54
  typedef struct sgd_idx_s {
54
- size_t *uobs;
55
- size_t *bobs;
55
+ uint64_t *uobs;
56
+ uint64_t *bobs;
56
57
  } sgd_idx_t;
57
58
 
58
59
  /* applypenalty:
@@ -75,11 +76,11 @@ typedef struct sgd_idx_s {
75
76
  * Add the <new> value in the array <obs> of size <cnt>. If the value is
76
77
  * already present, we do nothing, else we add it.
77
78
  */
78
- static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
79
+ static void sgd_add(uint64_t *obs, uint32_t *cnt, uint64_t new) {
79
80
  // First check if value is already in the array, we do a linear probing
80
81
  // as it is simpler and since these array will be very short in
81
82
  // practice, it's efficient enough.
82
- for (size_t p = 0; p < *cnt; p++)
83
+ for (uint32_t p = 0; p < *cnt; p++)
83
84
  if (obs[p] == new)
84
85
  return;
85
86
  // Insert the new value at the end since we have not found it.
@@ -91,13 +92,13 @@ static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
91
92
  * Train the model with the SGD-l1 algorithm described by tsurukoa et al.
92
93
  */
93
94
  void trn_sgdl1(mdl_t *mdl) {
94
- const size_t Y = mdl->nlbl;
95
- const size_t F = mdl->nftr;
96
- const int U = mdl->reader->nuni;
97
- const int B = mdl->reader->nbi;
98
- const int S = mdl->train->nseq;
99
- const int K = mdl->opt->maxiter;
100
- double *w = mdl->theta;
95
+ const uint64_t Y = mdl->nlbl;
96
+ const uint64_t F = mdl->nftr;
97
+ const uint32_t U = mdl->reader->nuni;
98
+ const uint32_t B = mdl->reader->nbi;
99
+ const uint32_t S = mdl->train->nseq;
100
+ const uint32_t K = mdl->opt->maxiter;
101
+ double *w = mdl->theta;
101
102
  // First we have to build and index who hold, for each sequences, the
102
103
  // list of actives observations.
103
104
  // The index is a simple table indexed by sequences number. Each entry
@@ -105,24 +106,25 @@ void trn_sgdl1(mdl_t *mdl) {
105
106
  // unigrams obss and one for bigrams obss.
106
107
  info(" - Build the index\n");
107
108
  sgd_idx_t *idx = wapiti_xmalloc(sizeof(sgd_idx_t) * S);
108
- for (int s = 0; s < S; s++) {
109
+ for (uint32_t s = 0; s < S; s++) {
109
110
  const seq_t *seq = mdl->train->seq[s];
110
- const int T = seq->len;
111
- size_t uobs[U * T + 1], ucnt = 0;
112
- size_t bobs[B * T + 1], bcnt = 0;
113
- for (int t = 0; t < seq->len; t++) {
111
+ const uint32_t T = seq->len;
112
+ uint64_t uobs[U * T + 1];
113
+ uint64_t bobs[B * T + 1];
114
+ uint32_t ucnt = 0, bcnt = 0;
115
+ for (uint32_t t = 0; t < seq->len; t++) {
114
116
  const pos_t *pos = &seq->pos[t];
115
- for (size_t p = 0; p < pos->ucnt; p++)
117
+ for (uint32_t p = 0; p < pos->ucnt; p++)
116
118
  sgd_add(uobs, &ucnt, pos->uobs[p]);
117
- for (size_t p = 0; p < pos->bcnt; p++)
119
+ for (uint32_t p = 0; p < pos->bcnt; p++)
118
120
  sgd_add(bobs, &bcnt, pos->bobs[p]);
119
121
  }
120
122
  uobs[ucnt++] = none;
121
123
  bobs[bcnt++] = none;
122
- idx[s].uobs = wapiti_xmalloc(sizeof(size_t) * ucnt);
123
- idx[s].bobs = wapiti_xmalloc(sizeof(size_t) * bcnt);
124
- memcpy(idx[s].uobs, uobs, ucnt * sizeof(size_t));
125
- memcpy(idx[s].bobs, bobs, bcnt * sizeof(size_t));
124
+ idx[s].uobs = wapiti_xmalloc(sizeof(uint64_t) * ucnt);
125
+ idx[s].bobs = wapiti_xmalloc(sizeof(uint64_t) * bcnt);
126
+ memcpy(idx[s].uobs, uobs, ucnt * sizeof(uint64_t));
127
+ memcpy(idx[s].bobs, bobs, bcnt * sizeof(uint64_t));
126
128
  }
127
129
  info(" Done\n");
128
130
  // We will process sequences in random order in each iteration, so we
@@ -137,34 +139,34 @@ void trn_sgdl1(mdl_t *mdl) {
137
139
  // time.
138
140
  // We also need an aditional vector named <q> who hold the penalty
139
141
  // already applied to each features.
140
- int *perm = wapiti_xmalloc(sizeof(int) * S);
141
- for (int s = 0; s < S; s++)
142
+ uint32_t *perm = wapiti_xmalloc(sizeof(uint32_t) * S);
143
+ for (uint32_t s = 0; s < S; s++)
142
144
  perm[s] = s;
143
145
  double *g = wapiti_xmalloc(sizeof(double) * F);
144
146
  double *q = wapiti_xmalloc(sizeof(double) * F);
145
- for (size_t f = 0; f < F; f++)
147
+ for (uint64_t f = 0; f < F; f++)
146
148
  g[f] = q[f] = 0.0;
147
149
  // We can now start training the model, we perform the requested number
148
150
  // of iteration, each of these going through all the sequences. For
149
151
  // computing the decay, we will need to keep track of the number of
150
152
  // already processed sequences, this is tracked by the <i> variable.
151
153
  double u = 0.0;
152
- grd_t *grd = grd_new(mdl, g);
153
- for (int k = 0, i = 0; k < K && !uit_stop; k++) {
154
+ grd_st_t *grd_st = grd_stnew(mdl, g);
155
+ for (uint32_t k = 0, i = 0; k < K && !uit_stop; k++) {
154
156
  // First we shuffle the sequence by making a lot of random swap
155
157
  // of entry in the permutation index.
156
- for (int s = 0; s < S; s++) {
157
- const int a = rand() % S;
158
- const int b = rand() % S;
159
- const int t = perm[a];
158
+ for (uint32_t s = 0; s < S; s++) {
159
+ const uint32_t a = rand() % S;
160
+ const uint32_t b = rand() % S;
161
+ const uint32_t t = perm[a];
160
162
  perm[a] = perm[b];
161
163
  perm[b] = t;
162
164
  }
163
165
  // And so, we can process sequence in a random order
164
- for (int sp = 0; sp < S && !uit_stop; sp++, i++) {
165
- const int s = perm[sp];
166
+ for (uint32_t sp = 0; sp < S && !uit_stop; sp++, i++) {
167
+ const uint32_t s = perm[sp];
166
168
  const seq_t *seq = mdl->train->seq[s];
167
- grd_dospl(grd, seq);
169
+ grd_dospl(grd_st, seq);
168
170
  // Before applying the gradient, we have to compute the
169
171
  // learning rate to apply to this sequence. For this we
170
172
  // use an exponential decay [1, pp 481(5)]
@@ -180,17 +182,17 @@ void trn_sgdl1(mdl_t *mdl) {
180
182
  // observations actives in the current sequence. We must
181
183
  // not forget to clear the gradient for the next
182
184
  // sequence.
183
- for (size_t n = 0; idx[s].uobs[n] != none; n++) {
184
- size_t f = mdl->uoff[idx[s].uobs[n]];
185
- for (size_t y = 0; y < Y; y++, f++) {
185
+ for (uint32_t n = 0; idx[s].uobs[n] != none; n++) {
186
+ uint64_t f = mdl->uoff[idx[s].uobs[n]];
187
+ for (uint32_t y = 0; y < Y; y++, f++) {
186
188
  w[f] -= nk * g[f];
187
189
  applypenalty(f);
188
190
  g[f] = 0.0;
189
191
  }
190
192
  }
191
- for (size_t n = 0; idx[s].bobs[n] != none; n++) {
192
- size_t f = mdl->boff[idx[s].bobs[n]];
193
- for (size_t d = 0; d < Y * Y; d++, f++) {
193
+ for (uint32_t n = 0; idx[s].bobs[n] != none; n++) {
194
+ uint64_t f = mdl->boff[idx[s].bobs[n]];
195
+ for (uint32_t d = 0; d < Y * Y; d++, f++) {
194
196
  w[f] -= nk * g[f];
195
197
  applypenalty(f);
196
198
  g[f] = 0.0;
@@ -203,9 +205,9 @@ void trn_sgdl1(mdl_t *mdl) {
203
205
  if (!uit_progress(mdl, k + 1, -1.0))
204
206
  break;
205
207
  }
206
- grd_free(grd);
208
+ grd_stfree(grd_st);
207
209
  // Cleanup allocated memory before returning
208
- for (int s = 0; s < S; s++) {
210
+ for (uint32_t s = 0; s < S; s++) {
209
211
  free(idx[s].uobs);
210
212
  free(idx[s].bobs);
211
213
  }