wapiti 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/ext/wapiti/reader.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -45,16 +45,16 @@
|
|
45
45
|
*/
|
46
46
|
typedef struct rdr_s rdr_t;
|
47
47
|
struct rdr_s {
|
48
|
-
bool
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
pat_t
|
53
|
-
qrk_t
|
54
|
-
qrk_t
|
48
|
+
bool autouni; // Automatically add 'u' prefix
|
49
|
+
uint32_t npats; // P Total number of patterns
|
50
|
+
uint32_t nuni, nbi; // Number of unigram and bigram patterns
|
51
|
+
uint32_t ntoks; // Expected number of tokens in input
|
52
|
+
pat_t **pats; // [P] List of precompiled patterns
|
53
|
+
qrk_t *lbl; // Labels database
|
54
|
+
qrk_t *obs; // Observation database
|
55
55
|
};
|
56
56
|
|
57
|
-
rdr_t *rdr_new(bool
|
57
|
+
rdr_t *rdr_new(bool autouni);
|
58
58
|
void rdr_free(rdr_t *rdr);
|
59
59
|
void rdr_freeraw(raw_t *raw);
|
60
60
|
void rdr_freeseq(seq_t *seq);
|
@@ -69,5 +69,7 @@ dat_t *rdr_readdat(rdr_t *rdr, FILE *file, bool lbl);
|
|
69
69
|
void rdr_load(rdr_t *rdr, FILE *file);
|
70
70
|
void rdr_save(const rdr_t *rdr, FILE *file);
|
71
71
|
|
72
|
+
char *rdr_readline(FILE *file);
|
73
|
+
|
72
74
|
#endif
|
73
75
|
|
data/ext/wapiti/rprop.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -24,9 +24,13 @@
|
|
24
24
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
25
|
* POSSIBILITY OF SUCH DAMAGE.
|
26
26
|
*/
|
27
|
+
#include <inttypes.h>
|
28
|
+
#include <float.h>
|
27
29
|
#include <math.h>
|
28
30
|
#include <stdbool.h>
|
29
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
33
|
+
#include <stdio.h>
|
30
34
|
#include <stdlib.h>
|
31
35
|
#include <string.h>
|
32
36
|
|
@@ -39,7 +43,9 @@
|
|
39
43
|
#include "thread.h"
|
40
44
|
#include "vmath.h"
|
41
45
|
|
42
|
-
#define
|
46
|
+
#define EPSILON (DBL_EPSILON * 64.0)
|
47
|
+
|
48
|
+
#define sign(v) ((v) < -EPSILON ? -1.0 : ((v) > EPSILON ? 1.0 : 0.0))
|
43
49
|
#define sqr(v) ((v) * (v))
|
44
50
|
|
45
51
|
/******************************************************************************
|
@@ -58,7 +64,7 @@
|
|
58
64
|
******************************************************************************/
|
59
65
|
typedef struct rprop_s rprop_t;
|
60
66
|
struct rprop_s {
|
61
|
-
mdl_t
|
67
|
+
mdl_t *mdl;
|
62
68
|
double *xp;
|
63
69
|
double *stp;
|
64
70
|
double *g;
|
@@ -71,33 +77,33 @@ struct rprop_s {
|
|
71
77
|
* parameter given, the job scheduling system is not used here as we can
|
72
78
|
* easily split processing in equals parts.
|
73
79
|
*/
|
74
|
-
static void trn_rpropsub(job_t *job,
|
80
|
+
static void trn_rpropsub(job_t *job, uint32_t id, uint32_t cnt, rprop_t *st) {
|
75
81
|
unused(job);
|
76
82
|
mdl_t *mdl = st->mdl;
|
77
|
-
const
|
78
|
-
const double
|
79
|
-
const double
|
80
|
-
const double
|
81
|
-
const double
|
82
|
-
const bool
|
83
|
-
const double
|
84
|
-
const int
|
83
|
+
const uint64_t F = mdl->nftr;
|
84
|
+
const double stpmin = mdl->opt->rprop.stpmin;
|
85
|
+
const double stpmax = mdl->opt->rprop.stpmax;
|
86
|
+
const double stpinc = mdl->opt->rprop.stpinc;
|
87
|
+
const double stpdec = mdl->opt->rprop.stpdec;
|
88
|
+
const bool wbt = strcmp(mdl->opt->algo, "rprop-");
|
89
|
+
const double rho1 = mdl->opt->rho1;
|
90
|
+
const int l1 = (rho1 != 0.0) ? mdl->opt->rprop.cutoff + 1: 0;
|
85
91
|
double *x = mdl->theta;
|
86
92
|
double *xp = st->xp, *stp = st->stp;
|
87
93
|
double *g = st->g, *gp = st->gp;
|
88
|
-
const
|
89
|
-
const
|
90
|
-
for (
|
94
|
+
const uint64_t from = F * id / cnt;
|
95
|
+
const uint64_t to = F * (id + 1) / cnt;
|
96
|
+
for (uint64_t f = from; f < to; f++) {
|
91
97
|
double pg = g[f];
|
92
98
|
// If there is a l1 component in the regularization component,
|
93
99
|
// we either project the gradient in the current orthant or
|
94
100
|
// check for cutdown depending on the projection scheme wanted.
|
95
101
|
if (l1 == 1) {
|
96
|
-
|
97
|
-
else if (x[f] >
|
98
|
-
else if (g[f] < -rho1)
|
99
|
-
else if (g[f] >
|
100
|
-
else
|
102
|
+
if (x[f] < -EPSILON) pg -= rho1;
|
103
|
+
else if (x[f] > EPSILON) pg += rho1;
|
104
|
+
else if (g[f] < -rho1) pg += rho1;
|
105
|
+
else if (g[f] > rho1) pg -= rho1;
|
106
|
+
else pg = 0.0;
|
101
107
|
} else if (l1 && sqr(g[f] + rho1 * sign(x[f])) < sqr(rho1)) {
|
102
108
|
if (x[f] == 0.0 || ( gp[f] * g[f] < 0.0
|
103
109
|
&& xp[f] * x[f] < 0.0)) {
|
@@ -108,51 +114,77 @@ static void trn_rpropsub(job_t *job, int id, int cnt, rprop_t *st) {
|
|
108
114
|
continue;
|
109
115
|
}
|
110
116
|
}
|
117
|
+
const double sgp = sign(gp[f]);
|
118
|
+
const double spg = sign(pg);
|
111
119
|
// Next we adjust the step depending of the new and
|
112
120
|
// previous gradient values.
|
113
|
-
if (
|
121
|
+
if (sgp * spg > 0.0)
|
114
122
|
stp[f] = min(stp[f] * stpinc, stpmax);
|
115
|
-
else if (
|
123
|
+
else if (sgp * spg < 0.0)
|
116
124
|
stp[f] = max(stp[f] * stpdec, stpmin);
|
117
125
|
// Finally update the weight. if there is l1 penalty
|
118
126
|
// and the pseudo gradient projection is used, we have to
|
119
127
|
// project back the update in the choosen orthant.
|
120
|
-
if (!wbt ||
|
128
|
+
if (!wbt || sgp * spg > 0.0) {
|
121
129
|
double dlt = stp[f] * -sign(g[f]);
|
122
|
-
if (l1 == 1 && dlt *
|
130
|
+
if (l1 == 1 && dlt * spg >= 0.0)
|
123
131
|
dlt = 0.0;
|
124
132
|
if (wbt)
|
125
133
|
xp[f] = x[f];
|
126
134
|
x[f] += dlt;
|
127
|
-
} else if (
|
135
|
+
} else if (sgp * spg < -0.0) {
|
128
136
|
x[f] = xp[f];
|
129
137
|
g[f] = 0.0;
|
130
138
|
} else {
|
131
139
|
xp[f] = x[f];
|
132
140
|
if (l1 != 1)
|
133
|
-
x[f] += stp[f] * -
|
141
|
+
x[f] += stp[f] * -spg;
|
134
142
|
}
|
135
143
|
gp[f] = g[f];
|
136
144
|
}
|
137
145
|
}
|
138
146
|
|
139
147
|
void trn_rprop(mdl_t *mdl) {
|
140
|
-
const
|
141
|
-
const
|
142
|
-
const
|
143
|
-
const bool
|
144
|
-
const int
|
148
|
+
const uint64_t F = mdl->nftr;
|
149
|
+
const uint32_t K = mdl->opt->maxiter;
|
150
|
+
const uint32_t W = mdl->opt->nthread;
|
151
|
+
const bool wbt = strcmp(mdl->opt->algo, "rprop-");
|
152
|
+
const int cut = mdl->opt->rprop.cutoff;
|
145
153
|
// Allocate state memory and initialize it
|
146
154
|
double *xp = NULL, *stp = xvm_new(F);
|
147
155
|
double *g = xvm_new(F), *gp = xvm_new(F);
|
148
156
|
if (wbt && !cut)
|
149
157
|
xp = xvm_new(F);
|
150
|
-
for (
|
158
|
+
for (uint64_t f = 0; f < F; f++) {
|
151
159
|
if (wbt && !cut)
|
152
160
|
xp[f] = 0.0;
|
153
161
|
gp[f] = 0.0;
|
154
162
|
stp[f] = 0.1;
|
155
163
|
}
|
164
|
+
// Restore a saved state if given by the user
|
165
|
+
if (mdl->opt->rstate != NULL) {
|
166
|
+
const char *err = "invalid state file";
|
167
|
+
FILE *file = fopen(mdl->opt->rstate, "r");
|
168
|
+
if (file == NULL)
|
169
|
+
fatal("failed to open input state file");
|
170
|
+
int type;
|
171
|
+
uint64_t nftr;
|
172
|
+
if (fscanf(file, "#state#%d#%"SCNu64"\n", &type, &nftr) != 2)
|
173
|
+
fatal(err);
|
174
|
+
if (type != 3)
|
175
|
+
fatal("state is not for rprop model");
|
176
|
+
for (uint64_t i = 0; i < nftr; i++) {
|
177
|
+
uint64_t f;
|
178
|
+
double vxp, vstp, vgp;
|
179
|
+
if (fscanf(file, "%"PRIu64" %la %la %la\n", &f, &vxp,
|
180
|
+
&vstp, &vgp) != 4)
|
181
|
+
fatal(err);
|
182
|
+
if (wbt && !cut) xp[f] = vxp;
|
183
|
+
gp[f] = vgp;
|
184
|
+
stp[f] = vstp;
|
185
|
+
}
|
186
|
+
fclose(file);
|
187
|
+
}
|
156
188
|
// Prepare the rprop state used to send information to the rprop worker
|
157
189
|
// about updating weight using the gradient.
|
158
190
|
rprop_t *st = wapiti_xmalloc(sizeof(rprop_t));
|
@@ -160,32 +192,41 @@ void trn_rprop(mdl_t *mdl) {
|
|
160
192
|
st->xp = xp; st->stp = stp;
|
161
193
|
st->g = g; st->gp = gp;
|
162
194
|
rprop_t *rprop[W];
|
163
|
-
for (
|
195
|
+
for (uint32_t w = 0; w < W; w++)
|
164
196
|
rprop[w] = st;
|
165
197
|
// Prepare the gradient state for the distributed gradient computation.
|
166
|
-
grd_t *
|
167
|
-
grds[0] = grd_new(mdl, g);
|
168
|
-
for (size_t w = 1; w < W; w++)
|
169
|
-
grds[w] = grd_new(mdl, xvm_new(F));
|
198
|
+
grd_t *grd = grd_new(mdl, g);
|
170
199
|
// And iterate the gradient computation / weight update process until
|
171
200
|
// convergence or stop request
|
172
|
-
for (
|
173
|
-
double fx = grd_gradient(
|
201
|
+
for (uint32_t k = 0; !uit_stop && k < K; k++) {
|
202
|
+
double fx = grd_gradient(grd);
|
174
203
|
if (uit_stop)
|
175
204
|
break;
|
176
205
|
mth_spawn((func_t *)trn_rpropsub, W, (void **)rprop, 0, 0);
|
177
206
|
if (uit_progress(mdl, k + 1, fx) == false)
|
178
207
|
break;
|
179
208
|
}
|
209
|
+
// Save state if user requested it
|
210
|
+
if (mdl->opt->sstate != NULL) {
|
211
|
+
FILE *file = fopen(mdl->opt->sstate, "w");
|
212
|
+
if (file == NULL)
|
213
|
+
fatal("failed to open output state file");
|
214
|
+
fprintf(file, "#state#3#%"PRIu64"\n", F);
|
215
|
+
for (uint64_t f = 0; f < F; f++) {
|
216
|
+
double vxp = xp != NULL ? xp[f] : 0.0;
|
217
|
+
double vstp = stp[f], vgp = gp[f];
|
218
|
+
fprintf(file, "%"PRIu64" ", f);
|
219
|
+
fprintf(file, "%la %la %la\n", vxp, vstp, vgp);
|
220
|
+
}
|
221
|
+
fclose(file);
|
222
|
+
}
|
180
223
|
// Free all allocated memory
|
181
224
|
if (wbt && !cut)
|
182
225
|
xvm_free(xp);
|
183
226
|
xvm_free(g);
|
184
227
|
xvm_free(gp);
|
185
|
-
|
186
|
-
|
187
|
-
for (size_t w = 0; w < W; w++)
|
188
|
-
grd_free(grds[w]);
|
228
|
+
xvm_free(stp);
|
229
|
+
grd_free(grd);
|
189
230
|
free(st);
|
190
231
|
}
|
191
232
|
|
data/ext/wapiti/sequence.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -29,6 +29,8 @@
|
|
29
29
|
#define sequence_h
|
30
30
|
|
31
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
33
|
+
#include <stdbool.h>
|
32
34
|
|
33
35
|
#include "wapiti.h"
|
34
36
|
|
@@ -72,8 +74,8 @@
|
|
72
74
|
*/
|
73
75
|
typedef struct raw_s raw_t;
|
74
76
|
struct raw_s {
|
75
|
-
|
76
|
-
char
|
77
|
+
uint32_t len; // T Sequence length
|
78
|
+
char *lines[]; // [T] Raw lines directly from file
|
77
79
|
};
|
78
80
|
|
79
81
|
/* tok_t:
|
@@ -91,10 +93,10 @@ struct raw_s {
|
|
91
93
|
*/
|
92
94
|
typedef struct tok_s tok_t;
|
93
95
|
struct tok_s {
|
94
|
-
|
95
|
-
char
|
96
|
-
|
97
|
-
char
|
96
|
+
uint32_t len; // T Sequence length
|
97
|
+
char **lbl; // [T] List of labels strings
|
98
|
+
uint32_t *cnts; // [T] Length of tokens lists
|
99
|
+
char **toks[]; // [T][] Tokens lists
|
98
100
|
};
|
99
101
|
|
100
102
|
/* seq_t:
|
@@ -121,12 +123,12 @@ struct tok_s {
|
|
121
123
|
typedef struct pos_s pos_t;
|
122
124
|
typedef struct seq_s seq_t;
|
123
125
|
struct seq_s {
|
124
|
-
|
125
|
-
|
126
|
+
uint32_t len;
|
127
|
+
uint64_t *raw;
|
126
128
|
struct pos_s {
|
127
|
-
|
128
|
-
|
129
|
-
|
129
|
+
uint32_t lbl;
|
130
|
+
uint32_t ucnt, bcnt;
|
131
|
+
uint64_t *uobs, *bobs;
|
130
132
|
} pos[];
|
131
133
|
};
|
132
134
|
|
@@ -139,10 +141,10 @@ struct seq_s {
|
|
139
141
|
*/
|
140
142
|
typedef struct dat_s dat_t;
|
141
143
|
struct dat_s {
|
142
|
-
bool
|
143
|
-
|
144
|
-
|
145
|
-
seq_t
|
144
|
+
bool lbl; // True iff sequences are labelled
|
145
|
+
uint32_t mlen; // Length of the longest sequence in the set
|
146
|
+
uint32_t nseq; // S Number of sequences in the set
|
147
|
+
seq_t **seq; // [S] List of sequences
|
146
148
|
};
|
147
149
|
|
148
150
|
#endif
|
data/ext/wapiti/sgdl1.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -27,6 +27,7 @@
|
|
27
27
|
#include <math.h>
|
28
28
|
#include <stdbool.h>
|
29
29
|
#include <stddef.h>
|
30
|
+
#include <stdint.h>
|
30
31
|
#include <stdlib.h>
|
31
32
|
#include <string.h>
|
32
33
|
|
@@ -51,8 +52,8 @@
|
|
51
52
|
* the AFNLP, pages 477-485, August 2009
|
52
53
|
******************************************************************************/
|
53
54
|
typedef struct sgd_idx_s {
|
54
|
-
|
55
|
-
|
55
|
+
uint64_t *uobs;
|
56
|
+
uint64_t *bobs;
|
56
57
|
} sgd_idx_t;
|
57
58
|
|
58
59
|
/* applypenalty:
|
@@ -75,11 +76,11 @@ typedef struct sgd_idx_s {
|
|
75
76
|
* Add the <new> value in the array <obs> of size <cnt>. If the value is
|
76
77
|
* already present, we do nothing, else we add it.
|
77
78
|
*/
|
78
|
-
static void sgd_add(
|
79
|
+
static void sgd_add(uint64_t *obs, uint32_t *cnt, uint64_t new) {
|
79
80
|
// First check if value is already in the array, we do a linear probing
|
80
81
|
// as it is simpler and since these array will be very short in
|
81
82
|
// practice, it's efficient enough.
|
82
|
-
for (
|
83
|
+
for (uint32_t p = 0; p < *cnt; p++)
|
83
84
|
if (obs[p] == new)
|
84
85
|
return;
|
85
86
|
// Insert the new value at the end since we have not found it.
|
@@ -91,13 +92,13 @@ static void sgd_add(size_t *obs, size_t *cnt, size_t new) {
|
|
91
92
|
* Train the model with the SGD-l1 algorithm described by tsurukoa et al.
|
92
93
|
*/
|
93
94
|
void trn_sgdl1(mdl_t *mdl) {
|
94
|
-
const
|
95
|
-
const
|
96
|
-
const
|
97
|
-
const
|
98
|
-
const
|
99
|
-
const
|
100
|
-
double
|
95
|
+
const uint64_t Y = mdl->nlbl;
|
96
|
+
const uint64_t F = mdl->nftr;
|
97
|
+
const uint32_t U = mdl->reader->nuni;
|
98
|
+
const uint32_t B = mdl->reader->nbi;
|
99
|
+
const uint32_t S = mdl->train->nseq;
|
100
|
+
const uint32_t K = mdl->opt->maxiter;
|
101
|
+
double *w = mdl->theta;
|
101
102
|
// First we have to build and index who hold, for each sequences, the
|
102
103
|
// list of actives observations.
|
103
104
|
// The index is a simple table indexed by sequences number. Each entry
|
@@ -105,24 +106,25 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
105
106
|
// unigrams obss and one for bigrams obss.
|
106
107
|
info(" - Build the index\n");
|
107
108
|
sgd_idx_t *idx = wapiti_xmalloc(sizeof(sgd_idx_t) * S);
|
108
|
-
for (
|
109
|
+
for (uint32_t s = 0; s < S; s++) {
|
109
110
|
const seq_t *seq = mdl->train->seq[s];
|
110
|
-
const
|
111
|
-
|
112
|
-
|
113
|
-
|
111
|
+
const uint32_t T = seq->len;
|
112
|
+
uint64_t uobs[U * T + 1];
|
113
|
+
uint64_t bobs[B * T + 1];
|
114
|
+
uint32_t ucnt = 0, bcnt = 0;
|
115
|
+
for (uint32_t t = 0; t < seq->len; t++) {
|
114
116
|
const pos_t *pos = &seq->pos[t];
|
115
|
-
for (
|
117
|
+
for (uint32_t p = 0; p < pos->ucnt; p++)
|
116
118
|
sgd_add(uobs, &ucnt, pos->uobs[p]);
|
117
|
-
for (
|
119
|
+
for (uint32_t p = 0; p < pos->bcnt; p++)
|
118
120
|
sgd_add(bobs, &bcnt, pos->bobs[p]);
|
119
121
|
}
|
120
122
|
uobs[ucnt++] = none;
|
121
123
|
bobs[bcnt++] = none;
|
122
|
-
idx[s].uobs = wapiti_xmalloc(sizeof(
|
123
|
-
idx[s].bobs = wapiti_xmalloc(sizeof(
|
124
|
-
memcpy(idx[s].uobs, uobs, ucnt * sizeof(
|
125
|
-
memcpy(idx[s].bobs, bobs, bcnt * sizeof(
|
124
|
+
idx[s].uobs = wapiti_xmalloc(sizeof(uint64_t) * ucnt);
|
125
|
+
idx[s].bobs = wapiti_xmalloc(sizeof(uint64_t) * bcnt);
|
126
|
+
memcpy(idx[s].uobs, uobs, ucnt * sizeof(uint64_t));
|
127
|
+
memcpy(idx[s].bobs, bobs, bcnt * sizeof(uint64_t));
|
126
128
|
}
|
127
129
|
info(" Done\n");
|
128
130
|
// We will process sequences in random order in each iteration, so we
|
@@ -137,34 +139,34 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
137
139
|
// time.
|
138
140
|
// We also need an aditional vector named <q> who hold the penalty
|
139
141
|
// already applied to each features.
|
140
|
-
|
141
|
-
for (
|
142
|
+
uint32_t *perm = wapiti_xmalloc(sizeof(uint32_t) * S);
|
143
|
+
for (uint32_t s = 0; s < S; s++)
|
142
144
|
perm[s] = s;
|
143
145
|
double *g = wapiti_xmalloc(sizeof(double) * F);
|
144
146
|
double *q = wapiti_xmalloc(sizeof(double) * F);
|
145
|
-
for (
|
147
|
+
for (uint64_t f = 0; f < F; f++)
|
146
148
|
g[f] = q[f] = 0.0;
|
147
149
|
// We can now start training the model, we perform the requested number
|
148
150
|
// of iteration, each of these going through all the sequences. For
|
149
151
|
// computing the decay, we will need to keep track of the number of
|
150
152
|
// already processed sequences, this is tracked by the <i> variable.
|
151
153
|
double u = 0.0;
|
152
|
-
|
153
|
-
for (
|
154
|
+
grd_st_t *grd_st = grd_stnew(mdl, g);
|
155
|
+
for (uint32_t k = 0, i = 0; k < K && !uit_stop; k++) {
|
154
156
|
// First we shuffle the sequence by making a lot of random swap
|
155
157
|
// of entry in the permutation index.
|
156
|
-
for (
|
157
|
-
const
|
158
|
-
const
|
159
|
-
const
|
158
|
+
for (uint32_t s = 0; s < S; s++) {
|
159
|
+
const uint32_t a = rand() % S;
|
160
|
+
const uint32_t b = rand() % S;
|
161
|
+
const uint32_t t = perm[a];
|
160
162
|
perm[a] = perm[b];
|
161
163
|
perm[b] = t;
|
162
164
|
}
|
163
165
|
// And so, we can process sequence in a random order
|
164
|
-
for (
|
165
|
-
const
|
166
|
+
for (uint32_t sp = 0; sp < S && !uit_stop; sp++, i++) {
|
167
|
+
const uint32_t s = perm[sp];
|
166
168
|
const seq_t *seq = mdl->train->seq[s];
|
167
|
-
grd_dospl(
|
169
|
+
grd_dospl(grd_st, seq);
|
168
170
|
// Before applying the gradient, we have to compute the
|
169
171
|
// learning rate to apply to this sequence. For this we
|
170
172
|
// use an exponential decay [1, pp 481(5)]
|
@@ -180,17 +182,17 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
180
182
|
// observations actives in the current sequence. We must
|
181
183
|
// not forget to clear the gradient for the next
|
182
184
|
// sequence.
|
183
|
-
for (
|
184
|
-
|
185
|
-
for (
|
185
|
+
for (uint32_t n = 0; idx[s].uobs[n] != none; n++) {
|
186
|
+
uint64_t f = mdl->uoff[idx[s].uobs[n]];
|
187
|
+
for (uint32_t y = 0; y < Y; y++, f++) {
|
186
188
|
w[f] -= nk * g[f];
|
187
189
|
applypenalty(f);
|
188
190
|
g[f] = 0.0;
|
189
191
|
}
|
190
192
|
}
|
191
|
-
for (
|
192
|
-
|
193
|
-
for (
|
193
|
+
for (uint32_t n = 0; idx[s].bobs[n] != none; n++) {
|
194
|
+
uint64_t f = mdl->boff[idx[s].bobs[n]];
|
195
|
+
for (uint32_t d = 0; d < Y * Y; d++, f++) {
|
194
196
|
w[f] -= nk * g[f];
|
195
197
|
applypenalty(f);
|
196
198
|
g[f] = 0.0;
|
@@ -203,9 +205,9 @@ void trn_sgdl1(mdl_t *mdl) {
|
|
203
205
|
if (!uit_progress(mdl, k + 1, -1.0))
|
204
206
|
break;
|
205
207
|
}
|
206
|
-
|
208
|
+
grd_stfree(grd_st);
|
207
209
|
// Cleanup allocated memory before returning
|
208
|
-
for (
|
210
|
+
for (uint32_t s = 0; s < S; s++) {
|
209
211
|
free(idx[s].uobs);
|
210
212
|
free(idx[s].bobs);
|
211
213
|
}
|