wapiti 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/ext/wapiti/native.h
CHANGED
data/ext/wapiti/options.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -25,9 +25,11 @@
|
|
25
25
|
* POSSIBILITY OF SUCH DAMAGE.
|
26
26
|
*/
|
27
27
|
|
28
|
+
#include <inttypes.h>
|
28
29
|
#include <limits.h>
|
29
30
|
#include <stdbool.h>
|
30
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
31
33
|
#include <stdlib.h>
|
32
34
|
#include <stdio.h>
|
33
35
|
#include <string.h>
|
@@ -54,13 +56,16 @@ static void opt_help(const char *pname) {
|
|
54
56
|
"\t-h | --help display this help message\n"
|
55
57
|
"\t | --version display version information\n"
|
56
58
|
"\n"
|
57
|
-
"
|
59
|
+
"Train mode:\n"
|
58
60
|
" %1$s train [options] [input data] [model file]\n"
|
59
61
|
"\t | --me force maxent mode\n"
|
62
|
+
"\t-T | --type STRING type of model to train\n"
|
60
63
|
"\t-a | --algo STRING training algorithm to use\n"
|
61
64
|
"\t-p | --pattern FILE patterns for extracting features\n"
|
62
65
|
"\t-m | --model FILE model file to preload\n"
|
63
66
|
"\t-d | --devel FILE development dataset\n"
|
67
|
+
"\t | --rstate FILE optimizer state to restore\n"
|
68
|
+
"\t | --sstate FILE optimizer state to save\n"
|
64
69
|
"\t-c | --compact compact model after training\n"
|
65
70
|
"\t-t | --nthread INT number of worker threads\n"
|
66
71
|
"\t-j | --jobsize INT job size for worker threads\n"
|
@@ -83,7 +88,7 @@ static void opt_help(const char *pname) {
|
|
83
88
|
"\t | --stpdec FLOAT (rprop) step decrement factor\n"
|
84
89
|
"\t | --cutoff (rprop) alternate projection\n"
|
85
90
|
"\n"
|
86
|
-
"
|
91
|
+
"Label mode:\n"
|
87
92
|
" %1$s label [options] [input data] [output data]\n"
|
88
93
|
"\t | --me force maxent mode\n"
|
89
94
|
"\t-m | --model FILE model file to load\n"
|
@@ -92,9 +97,18 @@ static void opt_help(const char *pname) {
|
|
92
97
|
"\t-s | --score add scores to output\n"
|
93
98
|
"\t-p | --post label using posteriors\n"
|
94
99
|
"\t-n | --nbest INT output n-best list\n"
|
100
|
+
"\t | --force use forced decoding\n"
|
95
101
|
"\n"
|
96
|
-
"
|
97
|
-
" %1$s dump [input model] [output text]\n"
|
102
|
+
"Dump mode\n"
|
103
|
+
" %1$s dump [options] [input model] [output text]\n"
|
104
|
+
"\t-p | --prec INT set weights precision\n"
|
105
|
+
"\t | --all also output 0 weights\n"
|
106
|
+
"\n"
|
107
|
+
"Update mode\n"
|
108
|
+
" %1$s update [options] [patch file] [output model]\n"
|
109
|
+
"\t-m | --model FILE model file to load\n"
|
110
|
+
"\t-c | --compact compact model after training\n"
|
111
|
+
;
|
98
112
|
fprintf(stderr, msg, pname);
|
99
113
|
}
|
100
114
|
|
@@ -104,8 +118,10 @@ static void opt_help(const char *pname) {
|
|
104
118
|
const opt_t opt_defaults = {
|
105
119
|
.mode = -1,
|
106
120
|
.input = NULL, .output = NULL,
|
121
|
+
.type = "crf",
|
107
122
|
.maxent = false,
|
108
123
|
.algo = "l-bfgs", .pattern = NULL, .model = NULL, .devel = NULL,
|
124
|
+
.rstate = NULL, .sstate = NULL,
|
109
125
|
.compact = false, .sparse = false,
|
110
126
|
.nthread = 1, .jobsize = 64, .maxiter = 0,
|
111
127
|
.rho1 = 0.5, .rho2 = 0.0001,
|
@@ -116,7 +132,8 @@ const opt_t opt_defaults = {
|
|
116
132
|
.rprop = {.stpmin = 1e-8, .stpmax = 50.0, .stpinc = 1.2, .stpdec = 0.5,
|
117
133
|
.cutoff = false},
|
118
134
|
.label = false, .check = false, .outsc = false,
|
119
|
-
.lblpost = false, .nbest =
|
135
|
+
.lblpost = false, .nbest = 1, .force = false,
|
136
|
+
.prec = 5, .all = false,
|
120
137
|
};
|
121
138
|
|
122
139
|
/* opt_switch:
|
@@ -125,29 +142,32 @@ const opt_t opt_defaults = {
|
|
125
142
|
*/
|
126
143
|
struct {
|
127
144
|
int mode;
|
128
|
-
char *dshort;
|
129
|
-
char *dlong;
|
145
|
+
const char *dshort;
|
146
|
+
const char *dlong;
|
130
147
|
char kind;
|
131
148
|
size_t offset;
|
132
149
|
} opt_switch[] = {
|
150
|
+
{0, "-T", "--type", 'S', offsetof(opt_t, type )},
|
133
151
|
{0, "##", "--me", 'B', offsetof(opt_t, maxent )},
|
134
152
|
{0, "-a", "--algo", 'S', offsetof(opt_t, algo )},
|
135
153
|
{0, "-p", "--pattern", 'S', offsetof(opt_t, pattern )},
|
136
154
|
{0, "-m", "--model", 'S', offsetof(opt_t, model )},
|
137
155
|
{0, "-d", "--devel", 'S', offsetof(opt_t, devel )},
|
156
|
+
{0, "##", "--rstate", 'S', offsetof(opt_t, rstate )},
|
157
|
+
{0, "##", "--sstate", 'S', offsetof(opt_t, sstate )},
|
138
158
|
{0, "-c", "--compact", 'B', offsetof(opt_t, compact )},
|
139
159
|
{0, "-s", "--sparse", 'B', offsetof(opt_t, sparse )},
|
140
|
-
{0, "-t", "--nthread", '
|
141
|
-
{0, "-j", "--
|
142
|
-
{0, "-i", "--maxiter", '
|
160
|
+
{0, "-t", "--nthread", 'U', offsetof(opt_t, nthread )},
|
161
|
+
{0, "-j", "--jobsize", 'U', offsetof(opt_t, jobsize )},
|
162
|
+
{0, "-i", "--maxiter", 'U', offsetof(opt_t, maxiter )},
|
143
163
|
{0, "-1", "--rho1", 'F', offsetof(opt_t, rho1 )},
|
144
164
|
{0, "-2", "--rho2", 'F', offsetof(opt_t, rho2 )},
|
145
|
-
{0, "-o", "--
|
146
|
-
{0, "-w", "--stopwin", '
|
165
|
+
{0, "-o", "--objwin", 'U', offsetof(opt_t, objwin )},
|
166
|
+
{0, "-w", "--stopwin", 'U', offsetof(opt_t, stopwin )},
|
147
167
|
{0, "-e", "--stopeps", 'F', offsetof(opt_t, stopeps )},
|
148
168
|
{0, "##", "--clip", 'B', offsetof(opt_t, lbfgs.clip )},
|
149
|
-
{0, "##", "--histsz", '
|
150
|
-
{0, "##", "--maxls", '
|
169
|
+
{0, "##", "--histsz", 'U', offsetof(opt_t, lbfgs.histsz)},
|
170
|
+
{0, "##", "--maxls", 'U', offsetof(opt_t, lbfgs.maxls )},
|
151
171
|
{0, "##", "--eta0", 'F', offsetof(opt_t, sgdl1.eta0 )},
|
152
172
|
{0," ##", "--alpha", 'F', offsetof(opt_t, sgdl1.alpha )},
|
153
173
|
{0, "##", "--kappa", 'F', offsetof(opt_t, bcd.kappa )},
|
@@ -162,7 +182,12 @@ struct {
|
|
162
182
|
{1, "-c", "--check", 'B', offsetof(opt_t, check )},
|
163
183
|
{1, "-s", "--score", 'B', offsetof(opt_t, outsc )},
|
164
184
|
{1, "-p", "--post", 'B', offsetof(opt_t, lblpost )},
|
165
|
-
{1, "-n", "--nbest", '
|
185
|
+
{1, "-n", "--nbest", 'U', offsetof(opt_t, nbest )},
|
186
|
+
{1, "##", "--force", 'B', offsetof(opt_t, force )},
|
187
|
+
{2, "-p", "--prec", 'U', offsetof(opt_t, prec )},
|
188
|
+
{2, "##", "--all", 'B', offsetof(opt_t, all )},
|
189
|
+
{3, "-m", "--model", 'S', offsetof(opt_t, model )},
|
190
|
+
{3, "-c", "--compact", 'B', offsetof(opt_t, compact )},
|
166
191
|
{-1, NULL, NULL, '\0', 0}
|
167
192
|
};
|
168
193
|
|
@@ -195,6 +220,8 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
195
220
|
opt->mode = 1;
|
196
221
|
} else if (!strcmp(argv[0], "d") || !strcmp(argv[0], "dump")) {
|
197
222
|
opt->mode = 2;
|
223
|
+
} else if (!strcmp(argv[0], "u") || !strcmp(argv[0], "update")) {
|
224
|
+
opt->mode = 3;
|
198
225
|
} else {
|
199
226
|
fatal("unknown mode <%s>", argv[0]);
|
200
227
|
}
|
@@ -204,7 +231,7 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
204
231
|
opt->output = NULL;
|
205
232
|
while (argc > 0) {
|
206
233
|
const char *arg = argv[0];
|
207
|
-
|
234
|
+
uint32_t idx;
|
208
235
|
// Check if this argument is a filename or an option
|
209
236
|
if (arg[0] != '-') {
|
210
237
|
if (opt->input == NULL)
|
@@ -237,8 +264,9 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
237
264
|
*((char **)ptr) = argv[1];
|
238
265
|
argc -= 2, argv += 2;
|
239
266
|
break;
|
240
|
-
case '
|
241
|
-
if (sscanf(argv[1], "%
|
267
|
+
case 'U':
|
268
|
+
if (sscanf(argv[1], "%"SCNu32,
|
269
|
+
(uint32_t *)ptr) != 1)
|
242
270
|
fatal(err_badval, arg);
|
243
271
|
argc -= 2, argv += 2;
|
244
272
|
break;
|
@@ -272,7 +300,11 @@ void opt_parse(int argc, char *argv[argc], opt_t *opt) {
|
|
272
300
|
argchecksub("--alpha", opt->sgdl1.alpha > 0.0);
|
273
301
|
argchecksub("--nbest", opt->nbest > 0 );
|
274
302
|
#undef argchecksub
|
275
|
-
if (opt->maxent && !strcmp(opt->algo, "bcd"))
|
303
|
+
if ((opt->maxent || !strcmp(opt->type, "maxent")) && !strcmp(opt->algo, "bcd"))
|
276
304
|
fatal("BCD not supported for training maxent models");
|
305
|
+
if (!strcmp(opt->type, "memm") && !strcmp(opt->algo, "bcd"))
|
306
|
+
fatal("BCD not supported for training MEMM models");
|
307
|
+
if (opt->check && opt->force)
|
308
|
+
fatal("--check and --force cannot be used together");
|
277
309
|
}
|
278
310
|
|
data/ext/wapiti/options.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -27,6 +27,7 @@
|
|
27
27
|
#ifndef options_h
|
28
28
|
#define options_h
|
29
29
|
|
30
|
+
#include <stdint.h>
|
30
31
|
#include <stdbool.h>
|
31
32
|
|
32
33
|
#include "wapiti.h"
|
@@ -37,50 +38,56 @@
|
|
37
38
|
*/
|
38
39
|
typedef struct opt_s opt_t;
|
39
40
|
struct opt_s {
|
40
|
-
int
|
41
|
-
char
|
42
|
-
bool
|
41
|
+
int mode;
|
42
|
+
char *input, *output;
|
43
|
+
bool maxent;
|
43
44
|
// Options for training
|
44
|
-
char
|
45
|
-
char
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
45
|
+
const char *type;
|
46
|
+
const char *algo, *pattern;
|
47
|
+
char *model, *devel;
|
48
|
+
char *rstate, *sstate;
|
49
|
+
bool compact, sparse;
|
50
|
+
uint32_t nthread;
|
51
|
+
uint32_t jobsize;
|
52
|
+
uint32_t maxiter;
|
53
|
+
double rho1, rho2;
|
51
54
|
// Window size criterion
|
52
|
-
|
53
|
-
|
54
|
-
double
|
55
|
+
uint32_t objwin;
|
56
|
+
uint32_t stopwin;
|
57
|
+
double stopeps;
|
55
58
|
// Options specific to L-BFGS
|
56
59
|
struct {
|
57
|
-
bool
|
58
|
-
|
59
|
-
|
60
|
+
bool clip;
|
61
|
+
uint32_t histsz;
|
62
|
+
uint32_t maxls;
|
60
63
|
} lbfgs;
|
61
64
|
// Options specific to SGD-L1
|
62
65
|
struct {
|
63
|
-
double
|
64
|
-
double
|
66
|
+
double eta0;
|
67
|
+
double alpha;
|
65
68
|
} sgdl1;
|
66
69
|
// Options specific to BCD
|
67
70
|
struct {
|
68
|
-
double
|
71
|
+
double kappa;
|
69
72
|
} bcd;
|
70
73
|
// Options specific to RPROP
|
71
74
|
struct {
|
72
|
-
double
|
73
|
-
double
|
74
|
-
double
|
75
|
-
double
|
76
|
-
bool
|
75
|
+
double stpmin;
|
76
|
+
double stpmax;
|
77
|
+
double stpinc;
|
78
|
+
double stpdec;
|
79
|
+
bool cutoff;
|
77
80
|
} rprop;
|
78
81
|
// Options for labelling
|
79
|
-
bool
|
80
|
-
bool
|
81
|
-
bool
|
82
|
-
bool
|
83
|
-
|
82
|
+
bool label;
|
83
|
+
bool check;
|
84
|
+
bool outsc;
|
85
|
+
bool lblpost;
|
86
|
+
uint32_t nbest;
|
87
|
+
bool force;
|
88
|
+
// Options for model dump
|
89
|
+
int prec;
|
90
|
+
bool all;
|
84
91
|
};
|
85
92
|
|
86
93
|
extern const opt_t opt_defaults;
|
data/ext/wapiti/pattern.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -26,8 +26,10 @@
|
|
26
26
|
*/
|
27
27
|
|
28
28
|
#include <ctype.h>
|
29
|
+
#include <inttypes.h>
|
29
30
|
#include <stdbool.h>
|
30
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
31
33
|
#include <stdio.h>
|
32
34
|
#include <stdlib.h>
|
33
35
|
#include <string.h>
|
@@ -101,7 +103,7 @@ static bool rex_matchit(const char *ch, const char *str) {
|
|
101
103
|
* is length is returned in len. The mathing is done through tail-recursion
|
102
104
|
* for good performances.
|
103
105
|
*/
|
104
|
-
static bool rex_matchme(const char *re, const char *str,
|
106
|
+
static bool rex_matchme(const char *re, const char *str, uint32_t *len) {
|
105
107
|
// Special check for end of regexp
|
106
108
|
if (re[0] == '\0')
|
107
109
|
return true;
|
@@ -120,7 +122,7 @@ static bool rex_matchme(const char *re, const char *str, int *len) {
|
|
120
122
|
if (nxt[0] == '*') {
|
121
123
|
nxt++;
|
122
124
|
do {
|
123
|
-
const
|
125
|
+
const uint32_t save = *len;
|
124
126
|
if (rex_matchme(nxt, str, len))
|
125
127
|
return true;
|
126
128
|
*len = save + 1;
|
@@ -150,7 +152,7 @@ static bool rex_matchme(const char *re, const char *str, int *len) {
|
|
150
152
|
* position of the start of the match is returned and is len is returned in
|
151
153
|
* len, else -1 is returned.
|
152
154
|
*/
|
153
|
-
static
|
155
|
+
static int32_t rex_match(const char *re, const char *str, uint32_t *len) {
|
154
156
|
// Special case for anchor at start
|
155
157
|
if (*re == '^') {
|
156
158
|
*len = 0;
|
@@ -159,7 +161,7 @@ static int rex_match(const char *re, const char *str, int *len) {
|
|
159
161
|
return -1;
|
160
162
|
}
|
161
163
|
// And general case for any position
|
162
|
-
|
164
|
+
int32_t pos = 0;
|
163
165
|
do {
|
164
166
|
*len = 0;
|
165
167
|
if (rex_matchme(re, str + pos, len))
|
@@ -215,8 +217,8 @@ pat_t *pat_comp(char *p) {
|
|
215
217
|
// on an over-estimation of the number of required item. As compiled
|
216
218
|
// pattern take a neglectible amount of memory, this waste is not
|
217
219
|
// important.
|
218
|
-
|
219
|
-
for (
|
220
|
+
uint32_t mitems = 0;
|
221
|
+
for (uint32_t pos = 0; p[pos] != '\0'; pos++)
|
220
222
|
if (p[pos] == '%')
|
221
223
|
mitems++;
|
222
224
|
mitems = mitems * 2 + 1;
|
@@ -225,9 +227,9 @@ pat_t *pat_comp(char *p) {
|
|
225
227
|
// Next, we go through the pattern compiling the items as they are
|
226
228
|
// found. Commands are parsed and put in a corresponding item, and
|
227
229
|
// segment of char not in a command are put in a 's' item.
|
228
|
-
|
229
|
-
|
230
|
-
|
230
|
+
uint32_t nitems = 0;
|
231
|
+
uint32_t ntoks = 0;
|
232
|
+
uint32_t pos = 0;
|
231
233
|
while (p[pos] != '\0') {
|
232
234
|
pat_item_t *item = &(pat->items[nitems++]);
|
233
235
|
item->value = NULL;
|
@@ -243,14 +245,14 @@ pat_t *pat_comp(char *p) {
|
|
243
245
|
// Next we parse the offset and column and store them in
|
244
246
|
// the item.
|
245
247
|
const char *at = p + pos;
|
246
|
-
|
248
|
+
uint32_t col;
|
249
|
+
int32_t off;
|
250
|
+
int nch;
|
247
251
|
item->absolute = false;
|
248
|
-
if (sscanf(at, "[@%
|
252
|
+
if (sscanf(at, "[@%"SCNi32",%"SCNu32"%n", &off, &col, &nch) == 2)
|
249
253
|
item->absolute = true;
|
250
|
-
else if (sscanf(at, "[%
|
254
|
+
else if (sscanf(at, "[%"SCNi32",%"SCNu32"%n", &off, &col, &nch) != 2)
|
251
255
|
fatal("invalid pattern: %s", p);
|
252
|
-
if (col < 0)
|
253
|
-
fatal("invalid column number: %d", col);
|
254
256
|
item->offset = off;
|
255
257
|
item->column = col;
|
256
258
|
ntoks = max(ntoks, col);
|
@@ -261,7 +263,7 @@ pat_t *pat_comp(char *p) {
|
|
261
263
|
if (type == 't' || type == 'm') {
|
262
264
|
if (p[pos] != ',' && p[pos + 1] != '"')
|
263
265
|
fatal("missing arg in pattern: %s", p);
|
264
|
-
const
|
266
|
+
const int32_t start = (pos += 2);
|
265
267
|
while (p[pos] != '\0') {
|
266
268
|
if (p[pos] == '"')
|
267
269
|
break;
|
@@ -271,7 +273,7 @@ pat_t *pat_comp(char *p) {
|
|
271
273
|
}
|
272
274
|
if (p[pos] != '"')
|
273
275
|
fatal("unended argument: %s", p);
|
274
|
-
const
|
276
|
+
const int32_t len = pos - start;
|
275
277
|
item->value = wapiti_xmalloc(sizeof(char) * (len + 1));
|
276
278
|
memcpy(item->value, p + start, len);
|
277
279
|
item->value[len] = '\0';
|
@@ -285,10 +287,10 @@ pat_t *pat_comp(char *p) {
|
|
285
287
|
// No command here, so build an 's' item with the chars
|
286
288
|
// until end of pattern or next command and put it in
|
287
289
|
// the list.
|
288
|
-
const
|
290
|
+
const int32_t start = pos;
|
289
291
|
while (p[pos] != '\0' && p[pos] != '%')
|
290
292
|
pos++;
|
291
|
-
const
|
293
|
+
const int32_t len = pos - start;
|
292
294
|
item->type = 's';
|
293
295
|
item->caps = false;
|
294
296
|
item->value = wapiti_xmalloc(sizeof(char) * (len + 1));
|
@@ -307,18 +309,18 @@ pat_t *pat_comp(char *p) {
|
|
307
309
|
* newly allocated memory block and the caller is responsible to free it when
|
308
310
|
* not needed anymore.
|
309
311
|
*/
|
310
|
-
char *pat_exec(const pat_t *pat, const tok_t *tok,
|
311
|
-
static char *bval[] = {"_x-1", "_x-2", "_x-3", "_x-4", "_x-#"};
|
312
|
-
static char *eval[] = {"_x+1", "_x+2", "_x+3", "_x+4", "_x+#"};
|
313
|
-
const
|
312
|
+
char *pat_exec(const pat_t *pat, const tok_t *tok, uint32_t at) {
|
313
|
+
static const char *bval[] = {"_x-1", "_x-2", "_x-3", "_x-4", "_x-#"};
|
314
|
+
static const char *eval[] = {"_x+1", "_x+2", "_x+3", "_x+4", "_x+#"};
|
315
|
+
const uint32_t T = tok->len;
|
314
316
|
// Prepare the buffer who will hold the result
|
315
|
-
|
317
|
+
uint32_t size = 16, pos = 0;
|
316
318
|
char *buffer = wapiti_xmalloc(sizeof(char) * size);
|
317
319
|
// And loop over the compiled items
|
318
|
-
for (
|
320
|
+
for (uint32_t it = 0; it < pat->nitems; it++) {
|
319
321
|
const pat_item_t *item = &(pat->items[it]);
|
320
|
-
char *value = NULL;
|
321
|
-
|
322
|
+
const char *value = NULL;
|
323
|
+
uint32_t len = 0;
|
322
324
|
// First, if needed, we retrieve the token at the referenced
|
323
325
|
// position in the sequence. We store it in value and let the
|
324
326
|
// command handler do what it need with it.
|
@@ -332,11 +334,11 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
332
334
|
} else {
|
333
335
|
pos += at;
|
334
336
|
}
|
335
|
-
|
337
|
+
uint32_t col = item->column;
|
336
338
|
if (pos < 0)
|
337
339
|
value = bval[min(-pos - 1, 4)];
|
338
|
-
else if (pos >= T)
|
339
|
-
value = eval[min( pos - T, 4)];
|
340
|
+
else if (pos >= (int32_t)T)
|
341
|
+
value = eval[min( pos - (int32_t)T, 4)];
|
340
342
|
else if (col >= tok->cnts[pos])
|
341
343
|
fatal("missing tokens, cannot apply pattern");
|
342
344
|
else
|
@@ -356,7 +358,7 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
356
358
|
value = "true";
|
357
359
|
len = strlen(value);
|
358
360
|
} else if (item->type == 'm') {
|
359
|
-
|
361
|
+
int32_t pos = rex_match(item->value, value, &len);
|
360
362
|
if (pos == -1)
|
361
363
|
len = 0;
|
362
364
|
value += pos;
|
@@ -370,7 +372,7 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
370
372
|
}
|
371
373
|
memcpy(buffer + pos, value, len);
|
372
374
|
if (item->caps)
|
373
|
-
for (
|
375
|
+
for (uint32_t i = pos; i < pos + len; i++)
|
374
376
|
buffer[i] = tolower(buffer[i]);
|
375
377
|
pos += len;
|
376
378
|
}
|
@@ -386,7 +388,7 @@ char *pat_exec(const pat_t *pat, const tok_t *tok, int at) {
|
|
386
388
|
* not use this pointer again.
|
387
389
|
*/
|
388
390
|
void pat_free(pat_t *pat) {
|
389
|
-
for (
|
391
|
+
for (uint32_t it = 0; it < pat->nitems; it++)
|
390
392
|
free(pat->items[it].value);
|
391
393
|
free(pat->src);
|
392
394
|
free(pat);
|