wapiti 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/ext/wapiti/reader.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -45,16 +45,16 @@
|
|
45
45
|
*/
|
46
46
|
typedef struct rdr_s rdr_t;
|
47
47
|
struct rdr_s {
|
48
|
-
bool
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
pat_t
|
53
|
-
qrk_t
|
54
|
-
qrk_t
|
48
|
+
bool autouni; // Automatically add 'u' prefix
|
49
|
+
uint32_t npats; // P Total number of patterns
|
50
|
+
uint32_t nuni, nbi; // Number of unigram and bigram patterns
|
51
|
+
uint32_t ntoks; // Expected number of tokens in input
|
52
|
+
pat_t **pats; // [P] List of precompiled patterns
|
53
|
+
qrk_t *lbl; // Labels database
|
54
|
+
qrk_t *obs; // Observation database
|
55
55
|
};
|
56
56
|
|
57
|
-
rdr_t *rdr_new(bool
|
57
|
+
rdr_t *rdr_new(bool autouni);
|
58
58
|
void rdr_free(rdr_t *rdr);
|
59
59
|
void rdr_freeraw(raw_t *raw);
|
60
60
|
void rdr_freeseq(seq_t *seq);
|
@@ -69,5 +69,7 @@ dat_t *rdr_readdat(rdr_t *rdr, FILE *file, bool lbl);
|
|
69
69
|
void rdr_load(rdr_t *rdr, FILE *file);
|
70
70
|
void rdr_save(const rdr_t *rdr, FILE *file);
|
71
71
|
|
72
|
+
char *rdr_readline(FILE *file);
|
73
|
+
|
72
74
|
#endif
|
73
75
|
|
data/ext/wapiti/rprop.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -24,9 +24,13 @@
|
|
24
24
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
25
|
* POSSIBILITY OF SUCH DAMAGE.
|
26
26
|
*/
|
27
|
+
#include <inttypes.h>
|
28
|
+
#include <float.h>
|
27
29
|
#include <math.h>
|
28
30
|
#include <stdbool.h>
|
29
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
33
|
+
#include <stdio.h>
|
30
34
|
#include <stdlib.h>
|
31
35
|
#include <string.h>
|
32
36
|
|
@@ -39,7 +43,9 @@
|
|
39
43
|
#include "thread.h"
|
40
44
|
#include "vmath.h"
|
41
45
|
|
42
|
-
#define
|
46
|
+
#define EPSILON (DBL_EPSILON * 64.0)
|
47
|
+
|
48
|
+
#define sign(v) ((v) < -EPSILON ? -1.0 : ((v) > EPSILON ? 1.0 : 0.0))
|
43
49
|
#define sqr(v) ((v) * (v))
|
44
50
|
|
45
51
|
/******************************************************************************
|
@@ -58,7 +64,7 @@
|
|
58
64
|
******************************************************************************/
|
59
65
|
typedef struct rprop_s rprop_t;
|
60
66
|
struct rprop_s {
|
61
|
-
mdl_t
|
67
|
+
mdl_t *mdl;
|
62
68
|
double *xp;
|
63
69
|
double *stp;
|
64
70
|
double *g;
|
@@ -71,33 +77,33 @@ struct rprop_s {
|
|
71
77
|
* parameter given, the job scheduling system is not used here as we can
|
72
78
|
* easily split processing in equals parts.
|
73
79
|
*/
|
74
|
-
static void trn_rpropsub(job_t *job,
|
80
|
+
static void trn_rpropsub(job_t *job, uint32_t id, uint32_t cnt, rprop_t *st) {
|
75
81
|
unused(job);
|
76
82
|
mdl_t *mdl = st->mdl;
|
77
|
-
const
|
78
|
-
const double
|
79
|
-
const double
|
80
|
-
const double
|
81
|
-
const double
|
82
|
-
const bool
|
83
|
-
const double
|
84
|
-
const int
|
83
|
+
const uint64_t F = mdl->nftr;
|
84
|
+
const double stpmin = mdl->opt->rprop.stpmin;
|
85
|
+
const double stpmax = mdl->opt->rprop.stpmax;
|
86
|
+
const double stpinc = mdl->opt->rprop.stpinc;
|
87
|
+
const double stpdec = mdl->opt->rprop.stpdec;
|
88
|
+
const bool wbt = strcmp(mdl->opt->algo, "rprop-");
|
89
|
+
const double rho1 = mdl->opt->rho1;
|
90
|
+
const int l1 = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
|
85
91
|
double *x = mdl->theta;
|
86
92
|
double *xp = st->xp, *stp = st->stp;
|
87
93
|
double *g = st->g, *gp = st->gp;
|
88
|
-
const
|
89
|
-
const
|
90
|
-
for (
|
94
|
+
const uint64_t from = F * id / cnt;
|
95
|
+
const uint64_t to = F * (id + 1) / cnt;
|
96
|
+
for (uint64_t f = from; f < to; f++) {
|
91
97
|
double pg = g[f];
|
92
98
|
// If there is a l1 component in the regularization component,
|
93
99
|
// we either project the gradient in the current orthant or
|
94
100
|
// check for cutdown depending on the projection scheme wanted.
|
95
101
|
if (l1 == 1) {
|
96
|
-
|
97
|
-
else if (x[f] >
|
98
|
-
else if (g[f] < -rho1)
|
99
|
-
else if (g[f] >
|
100
|
-
else
|
102
|
+
if (x[f] < -EPSILON) pg -= rho1;
|
103
|
+
else if (x[f] > EPSILON) pg += rho1;
|
104
|
+
else if (g[f] < -rho1) pg += rho1;
|
105
|
+
else if (g[f] > rho1) pg -= rho1;
|
106
|
+
else pg = 0.0;
|
101
107
|
} else if (l1 && sqr(g[f] + rho1 * sign(x[f])) < sqr(rho1)) {
|
102
108
|
if (x[f] == 0.0 || ( gp[f] * g[f] < 0.0
|
103
109
|
&& xp[f] * x[f] < 0.0)) {
|
@@ -108,51 +114,77 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
|
|
108
114
|
continue;
|
109
115
|
}
|
110
116
|
}
|
117
|
+
const double sgp = sign(gp[f]);
|
118
|
+
const double spg = sign(pg);
|
111
119
|
// Next we adjust the step depending of the new and
|
112
120
|
// previous gradient values.
|
113
|
-
if (
|
121
|
+
if (sgp * spg > 0.0)
|
114
122
|
stp[f] = min(stp[f] * stpinc, stpmax);
|
115
|
-
else if (
|
123
|
+
else if (sgp * spg < 0.0)
|
116
124
|
stp[f] = max(stp[f] * stpdec, stpmin);
|
117
125
|
// Finally update the weight. if there is l1 penalty
|
118
126
|
// and the pseudo gradient projection is used, we have to
|
119
127
|
// project back the update in the choosen orthant.
|
120
|
-
if (!wbt ||
|
128
|
+
if (!wbt || sgp * spg > 0.0) {
|
121
129
|
double dlt = stp[f] * -sign(g[f]);
|
122
|
-
if (l1 == 1 && dlt *
|
130
|
+
if (l1 == 1 && dlt * spg >= 0.0)
|
123
131
|
dlt = 0.0;
|
124
132
|
if (wbt)
|
125
133
|
xp[f] = x[f];
|
126
134
|
x[f] += dlt;
|
127
|
-
} else if (
|
135
|
+
} else if (sgp * spg < -0.0) {
|
128
136
|
x[f] = xp[f];
|
129
137
|
g[f] = 0.0;
|
130
138
|
} else {
|
131
139
|
xp[f] = x[f];
|
132
140
|
if (l1 != 1)
|
133
|
-
x[f] += stp[f] * -
|
141
|
+
x[f] += stp[f] * -spg;
|
134
142
|
}
|
135
143
|
gp[f] = g[f];
|
136
144
|
}
|
137
145
|
}
|
138
146
|
|
139
147
|
void trn_rprop(mdl_t *mdl) {
|
140
|
-
const
|
141
|
-
const
|
142
|
-
const
|
143
|
-
const bool
|
144
|
-
const int
|
148
|
+
const uint64_t F = mdl->nftr;
|
149
|
+
const uint32_t K = mdl->opt->maxiter;
|
150
|
+
const uint32_t W = mdl->opt->nthread;
|
151
|
+
const bool wbt = strcmp(mdl->opt->algo, "rprop-");
|
152
|
+
const int cut = mdl->opt->rprop.cutoff;
|
145
153
|
// Allocate state memory and initialize it
|
146
154
|
double *xp = NULL, *stp = xvm_new(F);
|
147
155
|
double *g = xvm_new(F), *gp = xvm_new(F);
|
148
156
|
if (wbt && !cut)
|
149
157
|
xp = xvm_new(F);
|
150
|
-
for (
|
158
|
+
for (uint64_t f = 0; f < F; f++) {
|
151
159
|
if (wbt && !cut)
|
152
160
|
xp[f] = 0.0;
|
153
161
|
gp[f] = 0.0;
|
154
162
|
stp[f] = 0.1;
|
155
163
|
}
|
164
|
+
// Restore a saved state if given by the user
|
165
|
+
if (mdl->opt->rstate != NULL) {
|
166
|
+
const char *err = "invalid state file";
|
167
|
+
FILE *file = fopen(mdl->opt->rstate, "r");
|
168
|
+
if (file == NULL)
|
169
|
+
fatal("failed to open input state file");
|
170
|
+
int type;
|
171
|
+
uint64_t nftr;
|
172
|
+
if (fscanf(file, "#state#%d#%"SCNu64"\n", &type, &nftr) != 2)
|
173
|
+
fatal(err);
|
174
|
+
if (type != 3)
|
175
|
+
fatal("state is not for rprop model");
|
176
|
+
for (uint64_t i = 0; i < nftr; i++) {
|
177
|
+
uint64_t f;
|
178
|
+
double vxp, vstp, vgp;
|
179
|
+
if (fscanf(file, "%"PRIu64" %la %la %la\n", &f, &vxp,
|
180
|
+
&vstp, &vgp) != 4)
|
181
|
+
fatal(err);
|
182
|
+
if (wbt && !cut) xp[f] = vxp;
|
183
|
+
gp[f] = vgp;
|
184
|
+
stp[f] = vstp;
|
185
|
+
}
|
186
|
+
fclose(file);
|
187
|
+
}
|
156
188
|
// Prepare the rprop state used to send information to the rprop worker
|
157
189
|
// about updating weight using the gradient.
|
158
190
|
rprop_t *st = wapiti_xmalloc(sizeof(rprop_t));
|
@@ -160,32 +192,41 @@ void trn_rprop(mdl_t *mdl) {
|
|
160
192
|
st->xp = xp; st->stp = stp;
|
161
193
|
st->g = g; st->gp = gp;
|
162
194
|
rprop_t *rprop[W];
|
163
|
-
for (
|
195
|
+
for (uint32_t w = 0; w < W; w++)
|
164
196
|
rprop[w] = st;
|
165
197
|
// Prepare the gradient state for the distributed gradient computation.
|
166
|
-
grd_t *
|
167
|
-
grds[0] = grd_new(mdl, g);
|
168
|
-
for (size_t w = 1; w < W; w++)
|
169
|
-
grds[w] = grd_new(mdl, xvm_new(F));
|
198
|
+
grd_t *grd = grd_new(mdl, g);
|
170
199
|
// And iterate the gradient computation / weight update process until
|
171
200
|
// convergence or stop request
|
172
|
-
for (
|
173
|
-
double fx = grd_gradient(
|
201
|
+
for (uint32_t k = 0; !uit_stop && k < K; k++) {
|
202
|
+
double fx = grd_gradient(grd);
|
174
203
|
if (uit_stop)
|
175
204
|
break;
|
176
205
|
mth_spawn((func_t *)trn_rpropsub, W, (void **)rprop, 0, 0);
|
177
206
|
if (uit_progress(mdl, k + 1, fx) == false)
|
178
207
|
break;
|
179
208
|
}
|
209
|
+
// Save state if user requested it
|
210
|
+
if (mdl->opt->sstate != NULL) {
|
211
|
+
FILE *file = fopen(mdl->opt->sstate, "w");
|
212
|
+
if (file == NULL)
|
213
|
+
fatal("failed to open output state file");
|
214
|
+
fprintf(file, "#state#3#%"PRIu64"\n", F);
|
215
|
+
for (uint64_t f = 0; f < F; f++) {
|
216
|
+
double vxp = xp != NULL ? xp[f] : 0.0;
|
217
|
+
double vstp = stp[f], vgp = gp[f];
|
218
|
+
fprintf(file, "%"PRIu64" ", f);
|
219
|
+
fprintf(file, "%la %la %la\n", vxp, vstp, vgp);
|
220
|
+
}
|
221
|
+
fclose(file);
|
222
|
+
}
|
180
223
|
// Free all allocated memory
|
181
224
|
if (wbt && !cut)
|
182
225
|
xvm_free(xp);
|
183
226
|
xvm_free(g);
|
184
227
|
xvm_free(gp);
|
185
|
-
|
186
|
-
|
187
|
-
for (size_t w = 0; w < W; w++)
|
188
|
-
grd_free(grds[w]);
|
228
|
+
xvm_free(stp);
|
229
|
+
grd_free(grd);
|
189
230
|
free(st);
|
190
231
|
}
|
191
232
|
|
data/ext/wapiti/sequence.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -29,6 +29,8 @@
|
|
29
29
|
#define sequence_h
|
30
30
|
|
31
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
33
|
+
#include <stdbool.h>
|
32
34
|
|
33
35
|
#include "wapiti.h"
|
34
36
|
|
@@ -72,8 +74,8 @@
|
|
72
74
|
*/
|
73
75
|
typedef struct raw_s raw_t;
|
74
76
|
struct raw_s {
|
75
|
-
|
76
|
-
char
|
77
|
+
uint32_t len; // T Sequence length
|
78
|
+
char *lines[]; // [T] Raw lines directly from file
|
77
79
|
};
|
78
80
|
|
79
81
|
/* tok_t:
|
@@ -91,10 +93,10 @@ struct raw_s {
|
|
91
93
|
*/
|
92
94
|
typedef struct tok_s tok_t;
|
93
95
|
struct tok_s {
|
94
|
-
|
95
|
-
char
|
96
|
-
|
97
|
-
char
|
96
|
+
uint32_t len; // T Sequence length
|
97
|
+
char **lbl; // [T] List of labels strings
|
98
|
+
uint32_t *cnts; // [T] Length of tokens lists
|
99
|
+
char **toks[]; // [T][] Tokens lists
|
98
100
|
};
|
99
101
|
|
100
102
|
/* seq_t:
|
@@ -121,12 +123,12 @@ struct tok_s {
|
|
121
123
|
typedef struct pos_s pos_t;
|
122
124
|
typedef struct seq_s seq_t;
|
123
125
|
struct seq_s {
|
124
|
-
|
125
|
-
|
126
|
+
uint32_t len;
|
127
|
+
uint64_t *raw;
|
126
128
|
struct pos_s {
|
127
|
-
|
128
|
-
|
129
|
-
|
129
|
+
uint32_t lbl;
|
130
|
+
uint32_t ucnt, bcnt;
|
131
|
+
uint64_t *uobs, *bobs;
|
130
132
|
} pos[];
|
131
133
|
};
|
132
134
|
|
@@ -139,10 +141,10 @@ struct seq_s {
|
|
139
141
|
*/
|
140
142
|
typedef struct dat_s dat_t;
|
141
143
|
struct dat_s {
|
142
|
-
bool
|
143
|
-
|
144
|
-
|
145
|
-
seq_t
|
144
|
+
bool lbl; // True iff sequences are labelled
|
145
|
+
uint32_t mlen; // Length of the longest sequence in the set
|
146
|
+
uint32_t nseq; // S Number of sequences in the set
|
147
|
+
seq_t **seq; // [S] List of sequences
|
146
148
|
};
|
147
149
|
|
148
150
|
#endif
|
data/ext/wapiti/sgdl1.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -27,6 +27,7 @@
|
|
27
27
|
#include <math.h>
|
28
28
|
#include <stdbool.h>
|
29
29
|
#include <stddef.h>
|
30
|
+
#include <stdint.h>
|
30
31
|
#include <stdlib.h>
|
31
32
|
#include <string.h>
|
32
33
|
|
@@ -51,8 +52,8 @@
|
|
51
52
|
* the AFNLP, pages 477-485, August 2009
|
52
53
|
******************************************************************************/
|
53
54
|
typedef struct sgd_idx_s {
|
54
|
-
|
55
|
-
|
55
|
+
uint64_t *uobs;
|
56
|
+
uint64_t *bobs;
|
56
57
|
} sgd_idx_t;
|
57
58
|
|
58
59
|
/* applypenalty:
|
@@ -75,11 +76,11 @@ typedef struct sgd_idx_s {
|
|
75
76
|
* Add the <new> value in the array <obs> of size <cnt>. If the value is
|
76
77
|
* already present, we do nothing, else we add it.
|
77
78
|
*/
|
78
|
-
static void sgd_add(
|
79
|
+
static void sgd_add(uint64_t *obs, uint32_t *cnt, uint64_t new) {
|
79
80
|
// First check if value is already in the array, we do a linear probing
|
80
81
|
// as it is simpler and since these array will be very short in
|
81
82
|
// practice, it's efficient enough.
|
82
|
-
for (
|
83
|
+
for (uint32_t p = 0; p < *cnt; p++)
|
83
84
|
if (obs[p] == new)
|
84
85
|
return;
|
85
86
|
// Insert the new value at the end since we have not found it.
|
@@ -91,13 +92,13 @@ static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
|
|
91
92
|
* Train the model with the SGD-l1 algorithm described by tsurukoa et al.
|
92
93
|
*/
|
93
94
|
void trn_sgdl1(mdl_t *mdl) {
|
94
|
-
const
|
95
|
-
const
|
96
|
-
const
|
97
|
-
const
|
98
|
-
const
|
99
|
-
const
|
100
|
-
double
|
95
|
+
const uint64_t Y = mdl->nlbl;
|
96
|
+
const uint64_t F = mdl->nftr;
|
97
|
+
const uint32_t U = mdl->reader->nuni;
|
98
|
+
const uint32_t B = mdl->reader->nbi;
|
99
|
+
const uint32_t S = mdl->train->nseq;
|
100
|
+
const uint32_t K = mdl->opt->maxiter;
|
101
|
+
double *w = mdl->theta;
|
101
102
|
// First we have to build and index who hold, for each sequences, the
|
102
103
|
// list of actives observations.
|
103
104
|
// The index is a simple table indexed by sequences number. Each entry
|
@@ -105,24 +106,25 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
105
106
|
// unigrams obss and one for bigrams obss.
|
106
107
|
info(" - Build the index\n");
|
107
108
|
sgd_idx_t *idx = wapiti_xmalloc(sizeof(sgd_idx_t) * S);
|
108
|
-
for (
|
109
|
+
for (uint32_t s = 0; s < S; s++) {
|
109
110
|
const seq_t *seq = mdl->train->seq[s];
|
110
|
-
const
|
111
|
-
|
112
|
-
|
113
|
-
|
111
|
+
const uint32_t T = seq->len;
|
112
|
+
uint64_t uobs[U * T + 1];
|
113
|
+
uint64_t bobs[B * T + 1];
|
114
|
+
uint32_t ucnt = 0, bcnt = 0;
|
115
|
+
for (uint32_t t = 0; t < seq->len; t++) {
|
114
116
|
const pos_t *pos = &seq->pos[t];
|
115
|
-
for (
|
117
|
+
for (uint32_t p = 0; p < pos->ucnt; p++)
|
116
118
|
sgd_add(uobs, &ucnt, pos->uobs[p]);
|
117
|
-
for (
|
119
|
+
for (uint32_t p = 0; p < pos->bcnt; p++)
|
118
120
|
sgd_add(bobs, &bcnt, pos->bobs[p]);
|
119
121
|
}
|
120
122
|
uobs[ucnt++] = none;
|
121
123
|
bobs[bcnt++] = none;
|
122
|
-
idx[s].uobs = wapiti_xmalloc(sizeof(
|
123
|
-
idx[s].bobs = wapiti_xmalloc(sizeof(
|
124
|
-
memcpy(idx[s].uobs, uobs, ucnt * sizeof(
|
125
|
-
memcpy(idx[s].bobs, bobs, bcnt * sizeof(
|
124
|
+
idx[s].uobs = wapiti_xmalloc(sizeof(uint64_t) * ucnt);
|
125
|
+
idx[s].bobs = wapiti_xmalloc(sizeof(uint64_t) * bcnt);
|
126
|
+
memcpy(idx[s].uobs, uobs, ucnt * sizeof(uint64_t));
|
127
|
+
memcpy(idx[s].bobs, bobs, bcnt * sizeof(uint64_t));
|
126
128
|
}
|
127
129
|
info(" Done\n");
|
128
130
|
// We will process sequences in random order in each iteration, so we
|
@@ -137,34 +139,34 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
137
139
|
// time.
|
138
140
|
// We also need an aditional vector named <q> who hold the penalty
|
139
141
|
// already applied to each features.
|
140
|
-
|
141
|
-
for (
|
142
|
+
uint32_t *perm = wapiti_xmalloc(sizeof(uint32_t) * S);
|
143
|
+
for (uint32_t s = 0; s < S; s++)
|
142
144
|
perm[s] = s;
|
143
145
|
double *g = wapiti_xmalloc(sizeof(double) * F);
|
144
146
|
double *q = wapiti_xmalloc(sizeof(double) * F);
|
145
|
-
for (
|
147
|
+
for (uint64_t f = 0; f < F; f++)
|
146
148
|
g[f] = q[f] = 0.0;
|
147
149
|
// We can now start training the model, we perform the requested number
|
148
150
|
// of iteration, each of these going through all the sequences. For
|
149
151
|
// computing the decay, we will need to keep track of the number of
|
150
152
|
// already processed sequences, this is tracked by the <i> variable.
|
151
153
|
double u = 0.0;
|
152
|
-
|
153
|
-
for (
|
154
|
+
grd_st_t *grd_st = grd_stnew(mdl, g);
|
155
|
+
for (uint32_t k = 0, i = 0; k < K && !uit_stop; k++) {
|
154
156
|
// First we shuffle the sequence by making a lot of random swap
|
155
157
|
// of entry in the permutation index.
|
156
|
-
for (
|
157
|
-
const
|
158
|
-
const
|
159
|
-
const
|
158
|
+
for (uint32_t s = 0; s < S; s++) {
|
159
|
+
const uint32_t a = rand() % S;
|
160
|
+
const uint32_t b = rand() % S;
|
161
|
+
const uint32_t t = perm[a];
|
160
162
|
perm[a] = perm[b];
|
161
163
|
perm[b] = t;
|
162
164
|
}
|
163
165
|
// And so, we can process sequence in a random order
|
164
|
-
for (
|
165
|
-
const
|
166
|
+
for (uint32_t sp = 0; sp < S && !uit_stop; sp++, i++) {
|
167
|
+
const uint32_t s = perm[sp];
|
166
168
|
const seq_t *seq = mdl->train->seq[s];
|
167
|
-
grd_dospl(
|
169
|
+
grd_dospl(grd_st, seq);
|
168
170
|
// Before applying the gradient, we have to compute the
|
169
171
|
// learning rate to apply to this sequence. For this we
|
170
172
|
// use an exponential decay [1, pp 481(5)]
|
@@ -180,17 +182,17 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
180
182
|
// observations actives in the current sequence. We must
|
181
183
|
// not forget to clear the gradient for the next
|
182
184
|
// sequence.
|
183
|
-
for (
|
184
|
-
|
185
|
-
for (
|
185
|
+
for (uint32_t n = 0; idx[s].uobs[n] != none; n++) {
|
186
|
+
uint64_t f = mdl->uoff[idx[s].uobs[n]];
|
187
|
+
for (uint32_t y = 0; y < Y; y++, f++) {
|
186
188
|
w[f] -= nk * g[f];
|
187
189
|
applypenalty(f);
|
188
190
|
g[f] = 0.0;
|
189
191
|
}
|
190
192
|
}
|
191
|
-
for (
|
192
|
-
|
193
|
-
for (
|
193
|
+
for (uint32_t n = 0; idx[s].bobs[n] != none; n++) {
|
194
|
+
uint64_t f = mdl->boff[idx[s].bobs[n]];
|
195
|
+
for (uint32_t d = 0; d < Y * Y; d++, f++) {
|
194
196
|
w[f] -= nk * g[f];
|
195
197
|
applypenalty(f);
|
196
198
|
g[f] = 0.0;
|
@@ -203,9 +205,9 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
203
205
|
if (!uit_progress(mdl, k + 1, -1.0))
|
204
206
|
break;
|
205
207
|
}
|
206
|
-
|
208
|
+
grd_stfree(grd_st);
|
207
209
|
// Cleanup allocated memory before returning
|
208
|
-
for (
|
210
|
+
for (uint32_t s = 0; s < S; s++) {
|
209
211
|
free(idx[s].uobs);
|
210
212
|
free(idx[s].bobs);
|
211
213
|
}
|