wapiti 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/ext/wapiti/native.h
CHANGED
data/ext/wapiti/options.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -25,9 +25,11 @@
|
|
25
25
|
* POSSIBILITY OF SUCH DAMAGE.
|
26
26
|
*/
|
27
27
|
|
28
|
+
#include <inttypes.h>
|
28
29
|
#include <limits.h>
|
29
30
|
#include <stdbool.h>
|
30
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
31
33
|
#include <stdlib.h>
|
32
34
|
#include <stdio.h>
|
33
35
|
#include <string.h>
|
@@ -54,13 +56,16 @@ static void opt_help(const char *pname) {
|
|
54
56
|
"\t-h | --help display this help message\n"
|
55
57
|
"\t | --version display version information\n"
|
56
58
|
"\n"
|
57
|
-
"
|
59
|
+
"Train mode:\n"
|
58
60
|
" %1$s train [options] [input data] [model file]\n"
|
59
61
|
"\t | --me force maxent mode\n"
|
62
|
+
"\t-T | --type STRING type of model to train\n"
|
60
63
|
"\t-a | --algo STRING training algorithm to use\n"
|
61
64
|
"\t-p | --pattern FILE patterns for extracting features\n"
|
62
65
|
"\t-m | --model FILE model file to preload\n"
|
63
66
|
"\t-d | --devel FILE development dataset\n"
|
67
|
+
"\t | --rstate FILE optimizer state to restore\n"
|
68
|
+
"\t | --sstate FILE optimizer state to save\n"
|
64
69
|
"\t-c | --compact compact model after training\n"
|
65
70
|
"\t-t | --nthread INT number of worker threads\n"
|
66
71
|
"\t-j | --jobsize INT job size for worker threads\n"
|
@@ -83,7 +88,7 @@ static void opt_help(const char *pname) {
|
|
83
88
|
"\t | --stpdec FLOAT (rprop) step decrement factor\n"
|
84
89
|
"\t | --cutoff (rprop) alternate projection\n"
|
85
90
|
"\n"
|
86
|
-
"
|
91
|
+
"Label mode:\n"
|
87
92
|
" %1$s label [options] [input data] [output data]\n"
|
88
93
|
"\t | --me force maxent mode\n"
|
89
94
|
"\t-m | --model FILE model file to load\n"
|
@@ -92,9 +97,18 @@ static void opt_help(const char *pname) {
|
|
92
97
|
"\t-s | --score add scores to output\n"
|
93
98
|
"\t-p | --post label using posteriors\n"
|
94
99
|
"\t-n | --nbest INT output n-best list\n"
|
100
|
+
"\t | --force use forced decoding\n"
|
95
101
|
"\n"
|
96
|
-
"
|
97
|
-
" %1$s dump [input model] [output text]\n"
|
102
|
+
"Dump mode\n"
|
103
|
+
" %1$s dump [options] [input model] [output text]\n"
|
104
|
+
"\t-p | --prec INT set weights precision\n"
|
105
|
+
"\t | --all also output 0 weights\n"
|
106
|
+
"\n"
|
107
|
+
"Update mode\n"
|
108
|
+
" %1$s update [options] [patch file] [output model]\n"
|
109
|
+
"\t-m | --model FILE model file to load\n"
|
110
|
+
"\t-c | --compact compact model after training\n"
|
111
|
+
;
|
98
112
|
fprintf(stderr, msg, pname);
|
99
113
|
}
|
100
114
|
|
@@ -104,8 +118,10 @@ static void opt_help(const char *pname) {
|
|
104
118
|
const opt_t opt_defaults = {
|
105
119
|
.mode = -1,
|
106
120
|
.input = NULL, .output = NULL,
|
121
|
+
.type = "crf",
|
107
122
|
.maxent = false,
|
108
123
|
.algo = "l-bfgs", .pattern = NULL, .model = NULL, .devel = NULL,
|
124
|
+
.rstate = NULL, .sstate = NULL,
|
109
125
|
.compact = false, .sparse = false,
|
110
126
|
.nthread = 1, .jobsize = 64, .maxiter = 0,
|
111
127
|
.rho1 = 0.5, .rho2 = 0.0001,
|
@@ -116,7 +132,8 @@ const opt_t opt_defaults = {
|
|
116
132
|
.rprop = {.stpmin = 1e-8, .stpmax = 50.0, .stpinc = 1.2, .stpdec = 0.5,
|
117
133
|
.cutoff = false},
|
118
134
|
.label = false, .check = false, .outsc = false,
|
119
|
-
.lblpost = false, .nbest =
|
135
|
+
.lblpost = false, .nbest = 1, .force = false,
|
136
|
+
.prec = 5, .all = false,
|
120
137
|
};
|
121
138
|
|
122
139
|
/* opt_switch:
|
@@ -125,29 +142,32 @@ const opt_t opt_defaults = {
|
|
125
142
|
*/
|
126
143
|
struct {
|
127
144
|
int mode;
|
128
|
-
char *dshort;
|
129
|
-
char *dlong;
|
145
|
+
const char *dshort;
|
146
|
+
const char *dlong;
|
130
147
|
char kind;
|
131
148
|
size_t offset;
|
132
149
|
} opt_switch[] = {
|
150
|
+
{0, "-T", "--type", 'S', offsetof(opt_t, type )},
|
133
151
|
{0, "##", "--me", 'B', offsetof(opt_t, maxent )},
|
134
152
|
{0, "-a", "--algo", 'S', offsetof(opt_t, algo )},
|
135
153
|
{0, "-p", "--pattern", 'S', offsetof(opt_t, pattern )},
|
136
154
|
{0, "-m", "--model", 'S', offsetof(opt_t, model )},
|
137
155
|
{0, "-d", "--devel", 'S', offsetof(opt_t, devel )},
|
156
|
+
{0, "##", "--rstate", 'S', offsetof(opt_t, rstate )},
|
157
|
+
{0, "##", "--sstate", 'S', offsetof(opt_t, sstate )},
|
138
158
|
{0, "-c", "--compact", 'B', offsetof(opt_t, compact )},
|
139
159
|
{0, "-s", "--sparse", 'B', offsetof(opt_t, sparse )},
|
140
|
-
{0, "-t", "--nthread", '
|
141
|
-
{0, "-j", "--
|
142
|
-
{0, "-i", "--maxiter", '
|
160
|
+
{0, "-t", "--nthread", 'U', offsetof(opt_t, nthread )},
|
161
|
+
{0, "-j", "--jobsize", 'U', offsetof(opt_t, jobsize )},
|
162
|
+
{0, "-i", "--maxiter", 'U', offsetof(opt_t, maxiter )},
|
143
163
|
{0, "-1", "--rho1", 'F', offsetof(opt_t, rho1 )},
|
144
164
|
{0, "-2", "--rho2", 'F', offsetof(opt_t, rho2 )},
|
145
|
-
{0, "-o", "--
|
146
|
-
{0, "-w", "--stopwin", '
|
165
|
+
{0, "-o", "--objwin", 'U', offsetof(opt_t, objwin )},
|
166
|
+
{0, "-w", "--stopwin", 'U', offsetof(opt_t, stopwin )},
|
147
167
|
{0, "-e", "--stopeps", 'F', offsetof(opt_t, stopeps )},
|
148
168
|
{0, "##", "--clip", 'B', offsetof(opt_t, lbfgs.clip )},
|
149
|
-
{0, "##", "--histsz", '
|
150
|
-
{0, "##", "--maxls", '
|
169
|
+
{0, "##", "--histsz", 'U', offsetof(opt_t, lbfgs.histsz)},
|
170
|
+
{0, "##", "--maxls", 'U', offsetof(opt_t, lbfgs.maxls )},
|
151
171
|
{0, "##", "--eta0", 'F', offsetof(opt_t, sgdl1.eta0 )},
|
152
172
|
{0," ##", "--alpha", 'F', offsetof(opt_t, sgdl1.alpha )},
|
153
173
|
{0, "##", "--kappa", 'F', offsetof(opt_t, bcd.kappa )},
|
@@ -162,7 +182,12 @@ struct {
|
|
162
182
|
{1, "-c", "--check", 'B', offsetof(opt_t, check )},
|
163
183
|
{1, "-s", "--score", 'B', offsetof(opt_t, outsc )},
|
164
184
|
{1, "-p", "--post", 'B', offsetof(opt_t, lblpost )},
|
165
|
-
{1, "-n", "--nbest", '
|
185
|
+
{1, "-n", "--nbest", 'U', offsetof(opt_t, nbest )},
|
186
|
+
{1, "##", "--force", 'B', offsetof(opt_t, force )},
|
187
|
+
{2, "-p", "--prec", 'U', offsetof(opt_t, prec )},
|
188
|
+
{2, "##", "--all", 'B', offsetof(opt_t, all )},
|
189
|
+
{3, "-m", "--model", 'S', offsetof(opt_t, model )},
|
190
|
+
{3, "-c", "--compact", 'B', offsetof(opt_t, compact )},
|
166
191
|
{-1, NULL, NULL, '\0', 0}
|
167
192
|
};
|
168
193
|
|
@@ -195,6 +220,8 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
195
220
|
opt->mode = 1;
|
196
221
|
} else if (!strcmp(argv[0], "d") || !strcmp(argv[0], "dump")) {
|
197
222
|
opt->mode = 2;
|
223
|
+
} else if (!strcmp(argv[0], "u") || !strcmp(argv[0], "update")) {
|
224
|
+
opt->mode = 3;
|
198
225
|
} else {
|
199
226
|
fatal("unknown mode <%s>", argv[0]);
|
200
227
|
}
|
@@ -204,7 +231,7 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
204
231
|
opt->output = NULL;
|
205
232
|
while (argc > 0) {
|
206
233
|
const char *arg = argv[0];
|
207
|
-
|
234
|
+
uint32_t idx;
|
208
235
|
// Check if this argument is a filename or an option
|
209
236
|
if (arg[0] != '-') {
|
210
237
|
if (opt->input == NULL)
|
@@ -237,8 +264,9 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
237
264
|
*((char **)ptr) = argv[1];
|
238
265
|
argc -= 2, argv += 2;
|
239
266
|
break;
|
240
|
-
case '
|
241
|
-
if (sscanf(argv[1], "%
|
267
|
+
case 'U':
|
268
|
+
if (sscanf(argv[1], "%"SCNu32,
|
269
|
+
(uint32_t *)ptr) != 1)
|
242
270
|
fatal(err_badval, arg);
|
243
271
|
argc -= 2, argv += 2;
|
244
272
|
break;
|
@@ -272,7 +300,11 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
272
300
|
argchecksub("--alpha", opt->sgdl1.alpha > 0.0);
|
273
301
|
argchecksub("--nbest", opt->nbest > 0 );
|
274
302
|
#undef argchecksub
|
275
|
-
if (opt->maxent && !strcmp(opt->algo, "bcd"))
|
303
|
+
if ((opt->maxent || !strcmp(opt->type, "maxent")) && !strcmp(opt->algo, "bcd"))
|
276
304
|
fatal("BCD not supported for training maxent models");
|
305
|
+
if (!strcmp(opt->type, "memm") && !strcmp(opt->algo, "bcd"))
|
306
|
+
fatal("BCD not supported for training MEMM models");
|
307
|
+
if (opt->check && opt->force)
|
308
|
+
fatal("--check and --force cannot be used together");
|
277
309
|
}
|
278
310
|
|
data/ext/wapiti/options.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -27,6 +27,7 @@
|
|
27
27
|
#ifndef options_h
|
28
28
|
#define options_h
|
29
29
|
|
30
|
+
#include <stdint.h>
|
30
31
|
#include <stdbool.h>
|
31
32
|
|
32
33
|
#include "wapiti.h"
|
@@ -37,50 +38,56 @@
|
|
37
38
|
*/
|
38
39
|
typedef struct opt_s opt_t;
|
39
40
|
struct opt_s {
|
40
|
-
int
|
41
|
-
char
|
42
|
-
bool
|
41
|
+
int mode;
|
42
|
+
char *input, *output;
|
43
|
+
bool maxent;
|
43
44
|
// Options for training
|
44
|
-
char
|
45
|
-
char
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
const char *type;
|
46
|
+
const char *algo, *pattern;
|
47
|
+
char *model, *devel;
|
48
|
+
char *rstate, *sstate;
|
49
|
+
bool compact, sparse;
|
50
|
+
uint32_t nthread;
|
51
|
+
uint32_t jobsize;
|
52
|
+
uint32_t maxiter;
|
53
|
+
double rho1, rho2;
|
51
54
|
// Window size criterion
|
52
|
-
|
53
|
-
|
54
|
-
double
|
55
|
+
uint32_t objwin;
|
56
|
+
uint32_t stopwin;
|
57
|
+
double stopeps;
|
55
58
|
// Options specific to L-BFGS
|
56
59
|
struct {
|
57
|
-
bool
|
58
|
-
|
59
|
-
|
60
|
+
bool clip;
|
61
|
+
uint32_t histsz;
|
62
|
+
uint32_t maxls;
|
60
63
|
} lbfgs;
|
61
64
|
// Options specific to SGD-L1
|
62
65
|
struct {
|
63
|
-
double
|
64
|
-
double
|
66
|
+
double eta0;
|
67
|
+
double alpha;
|
65
68
|
} sgdl1;
|
66
69
|
// Options specific to BCD
|
67
70
|
struct {
|
68
|
-
double
|
71
|
+
double kappa;
|
69
72
|
} bcd;
|
70
73
|
// Options specific to RPROP
|
71
74
|
struct {
|
72
|
-
double
|
73
|
-
double
|
74
|
-
double
|
75
|
-
double
|
76
|
-
bool
|
75
|
+
double stpmin;
|
76
|
+
double stpmax;
|
77
|
+
double stpinc;
|
78
|
+
double stpdec;
|
79
|
+
bool cutoff;
|
77
80
|
} rprop;
|
78
81
|
// Options for labelling
|
79
|
-
bool
|
80
|
-
bool
|
81
|
-
bool
|
82
|
-
bool
|
83
|
-
|
82
|
+
bool label;
|
83
|
+
bool check;
|
84
|
+
bool outsc;
|
85
|
+
bool lblpost;
|
86
|
+
uint32_t nbest;
|
87
|
+
bool force;
|
88
|
+
// Options for model dump
|
89
|
+
int prec;
|
90
|
+
bool all;
|
84
91
|
};
|
85
92
|
|
86
93
|
extern const opt_t opt_defaults;
|
data/ext/wapiti/pattern.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -26,8 +26,10 @@
|
|
26
26
|
*/
|
27
27
|
|
28
28
|
#include <ctype.h>
|
29
|
+
#include <inttypes.h>
|
29
30
|
#include <stdbool.h>
|
30
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
31
33
|
#include <stdio.h>
|
32
34
|
#include <stdlib.h>
|
33
35
|
#include <string.h>
|
@@ -101,7 +103,7 @@ static bool rex_matchit(const char *ch, const char *str) {
|
|
101
103
|
* is length is returned in len. The mathing is done through tail-recursion
|
102
104
|
* for good performances.
|
103
105
|
*/
|
104
|
-
static bool rex_matchme(const char *re, const char *str,
|
106
|
+
static bool rex_matchme(const char *re, const char *str, uint32_t *len) {
|
105
107
|
// Special check for end of regexp
|
106
108
|
if (re[0] == '\0')
|
107
109
|
return true;
|
@@ -120,7 +122,7 @@ static bool rex_matchme(const char *re, const char *str, int *len) {
|
|
120
122
|
if (nxt[0] == '*') {
|
121
123
|
nxt++;
|
122
124
|
do {
|
123
|
-
const
|
125
|
+
const uint32_t save = *len;
|
124
126
|
if (rex_matchme(nxt, str, len))
|
125
127
|
return true;
|
126
128
|
*len = save + 1;
|
@@ -150,7 +152,7 @@ static bool rex_matchme(const char *re, const char *str, int *len) {
|
|
150
152
|
* position of the start of the match is returned and is len is returned in
|
151
153
|
* len, else -1 is returned.
|
152
154
|
*/
|
153
|
-
static
|
155
|
+
static int32_t rex_match(const char *re, const char *str, uint32_t *len) {
|
154
156
|
// Special case for anchor at start
|
155
157
|
if (*re == '^') {
|
156
158
|
*len = 0;
|
@@ -159,7 +161,7 @@ static int rex_match(const char *re, const char *str, int *len) {
|
|
159
161
|
return -1;
|
160
162
|
}
|
161
163
|
// And general case for any position
|
162
|
-
|
164
|
+
int32_t pos = 0;
|
163
165
|
do {
|
164
166
|
*len = 0;
|
165
167
|
if (rex_matchme(re, str + pos, len))
|
@@ -215,8 +217,8 @@ pat_t *pat_comp(char *p) {
|
|
215
217
|
// on an over-estimation of the number of required item. As compiled
|
216
218
|
// pattern take a neglectible amount of memory, this waste is not
|
217
219
|
// important.
|
218
|
-
|
219
|
-
for (
|
220
|
+
uint32_t mitems = 0;
|
221
|
+
for (uint32_t pos = 0; p[pos] != '\0'; pos++)
|
220
222
|
if (p[pos] == '%')
|
221
223
|
mitems++;
|
222
224
|
mitems = mitems * 2 + 1;
|
@@ -225,9 +227,9 @@ pat_t *pat_comp(char *p) {
|
|
225
227
|
// Next, we go through the pattern compiling the items as they are
|
226
228
|
// found. Commands are parsed and put in a corresponding item, and
|
227
229
|
// segment of char not in a command are put in a 's' item.
|
228
|
-
|
229
|
-
|
230
|
-
|
230
|
+
uint32_t nitems = 0;
|
231
|
+
uint32_t ntoks = 0;
|
232
|
+
uint32_t pos = 0;
|
231
233
|
while (p[pos] != '\0') {
|
232
234
|
pat_item_t *item = &(pat->items[nitems++]);
|
233
235
|
item->value = NULL;
|
@@ -243,14 +245,14 @@ pat_t *pat_comp(char *p) {
|
|
243
245
|
// Next we parse the offset and column and store them in
|
244
246
|
// the item.
|
245
247
|
const char *at = p + pos;
|
246
|
-
|
248
|
+
uint32_t col;
|
249
|
+
int32_t off;
|
250
|
+
int nch;
|
247
251
|
item->absolute = false;
|
248
|
-
if (sscanf(at, "[@%
|
252
|
+
if (sscanf(at, "[@%"SCNi32",%"SCNu32"%n", &off, &col, &nch) == 2)
|
249
253
|
item->absolute = true;
|
250
|
-
else if (sscanf(at, "[%
|
254
|
+
else if (sscanf(at, "[%"SCNi32",%"SCNu32"%n", &off, &col, &nch) != 2)
|
251
255
|
fatal("invalid pattern: %s", p);
|
252
|
-
if (col < 0)
|
253
|
-
fatal("invalid column number: %d", col);
|
254
256
|
item->offset = off;
|
255
257
|
item->column = col;
|
256
258
|
ntoks = max(ntoks, col);
|
@@ -261,7 +263,7 @@ pat_t *pat_comp(char *p) {
|
|
261
263
|
if (type == 't' || type == 'm') {
|
262
264
|
if (p[pos] != ',' && p[pos + 1] != '"')
|
263
265
|
fatal("missing arg in pattern: %s", p);
|
264
|
-
const
|
266
|
+
const int32_t start = (pos += 2);
|
265
267
|
while (p[pos] != '\0') {
|
266
268
|
if (p[pos] == '"')
|
267
269
|
break;
|
@@ -271,7 +273,7 @@ pat_t *pat_comp(char *p) {
|
|
271
273
|
}
|
272
274
|
if (p[pos] != '"')
|
273
275
|
fatal("unended argument: %s", p);
|
274
|
-
const
|
276
|
+
const int32_t len = pos - start;
|
275
277
|
item->value = wapiti_xmalloc(sizeof(char) * (len + 1));
|
276
278
|
memcpy(item->value, p + start, len);
|
277
279
|
item->value[len] = '\0';
|
@@ -285,10 +287,10 @@ pat_t *pat_comp(char *p) {
|
|
285
287
|
// No command here, so build an 's' item with the chars
|
286
288
|
// until end of pattern or next command and put it in
|
287
289
|
// the list.
|
288
|
-
const
|
290
|
+
const int32_t start = pos;
|
289
291
|
while (p[pos] != '\0' && p[pos] != '%')
|
290
292
|
pos++;
|
291
|
-
const
|
293
|
+
const int32_t len = pos - start;
|
292
294
|
item->type = 's';
|
293
295
|
item->caps = false;
|
294
296
|
item->value = wapiti_xmalloc(sizeof(char) * (len + 1));
|
@@ -307,18 +309,18 @@ pat_t *pat_comp(char *p) {
|
|
307
309
|
* newly allocated memory block and the caller is responsible to free it when
|
308
310
|
* not needed anymore.
|
309
311
|
*/
|
310
|
-
char *pat_exec(const pat_t *pat, const tok_t *tok,
|
311
|
-
static char *bval[] = {"_x-1", "_x-2", "_x-3", "_x-4", "_x-#"};
|
312
|
-
static char *eval[] = {"_x+1", "_x+2", "_x+3", "_x+4", "_x+#"};
|
313
|
-
const
|
312
|
+
char *pat_exec(const pat_t *pat, const tok_t *tok, uint32_t at) {
|
313
|
+
static const char *bval[] = {"_x-1", "_x-2", "_x-3", "_x-4", "_x-#"};
|
314
|
+
static const char *eval[] = {"_x+1", "_x+2", "_x+3", "_x+4", "_x+#"};
|
315
|
+
const uint32_t T = tok->len;
|
314
316
|
// Prepare the buffer who will hold the result
|
315
|
-
|
317
|
+
uint32_t size = 16, pos = 0;
|
316
318
|
char *buffer = wapiti_xmalloc(sizeof(char) * size);
|
317
319
|
// And loop over the compiled items
|
318
|
-
for (
|
320
|
+
for (uint32_t it = 0; it < pat->nitems; it++) {
|
319
321
|
const pat_item_t *item = &(pat->items[it]);
|
320
|
-
char *value = NULL;
|
321
|
-
|
322
|
+
const char *value = NULL;
|
323
|
+
uint32_t len = 0;
|
322
324
|
// First, if needed, we retrieve the token at the referenced
|
323
325
|
// position in the sequence. We store it in value and let the
|
324
326
|
// command handler do what it need with it.
|
@@ -332,11 +334,11 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
332
334
|
} else {
|
333
335
|
pos += at;
|
334
336
|
}
|
335
|
-
|
337
|
+
uint32_t col = item->column;
|
336
338
|
if (pos < 0)
|
337
339
|
value = bval[min(-pos - 1, 4)];
|
338
|
-
else if (pos >= T)
|
339
|
-
value = eval[min( pos - T, 4)];
|
340
|
+
else if (pos >= (int32_t)T)
|
341
|
+
value = eval[min( pos - (int32_t)T, 4)];
|
340
342
|
else if (col >= tok->cnts[pos])
|
341
343
|
fatal("missing tokens, cannot apply pattern");
|
342
344
|
else
|
@@ -356,7 +358,7 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
356
358
|
value = "true";
|
357
359
|
len = strlen(value);
|
358
360
|
} else if (item->type == 'm') {
|
359
|
-
|
361
|
+
int32_t pos = rex_match(item->value, value, &len);
|
360
362
|
if (pos == -1)
|
361
363
|
len = 0;
|
362
364
|
value += pos;
|
@@ -370,7 +372,7 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
370
372
|
}
|
371
373
|
memcpy(buffer + pos, value, len);
|
372
374
|
if (item->caps)
|
373
|
-
for (
|
375
|
+
for (uint32_t i = pos; i < pos + len; i++)
|
374
376
|
buffer[i] = tolower(buffer[i]);
|
375
377
|
pos += len;
|
376
378
|
}
|
@@ -386,7 +388,7 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
386
388
|
* not use this pointer again.
|
387
389
|
*/
|
388
390
|
void pat_free(pat_t *pat) {
|
389
|
-
for (
|
391
|
+
for (uint32_t it = 0; it < pat->nitems; it++)
|
390
392
|
free(pat->items[it].value);
|
391
393
|
free(pat->src);
|
392
394
|
free(pat);
|