wapiti 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +13 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/Gemfile +6 -0
- data/LICENSE +30 -0
- data/README.md +153 -0
- data/Rakefile +33 -0
- data/ext/wapiti/bcd.c +392 -0
- data/ext/wapiti/decoder.c +535 -0
- data/ext/wapiti/decoder.h +46 -0
- data/ext/wapiti/extconf.rb +8 -0
- data/ext/wapiti/gradient.c +818 -0
- data/ext/wapiti/gradient.h +81 -0
- data/ext/wapiti/lbfgs.c +294 -0
- data/ext/wapiti/model.c +296 -0
- data/ext/wapiti/model.h +100 -0
- data/ext/wapiti/native.c +1238 -0
- data/ext/wapiti/native.h +15 -0
- data/ext/wapiti/options.c +278 -0
- data/ext/wapiti/options.h +91 -0
- data/ext/wapiti/pattern.c +395 -0
- data/ext/wapiti/pattern.h +56 -0
- data/ext/wapiti/progress.c +167 -0
- data/ext/wapiti/progress.h +43 -0
- data/ext/wapiti/quark.c +272 -0
- data/ext/wapiti/quark.h +46 -0
- data/ext/wapiti/reader.c +553 -0
- data/ext/wapiti/reader.h +73 -0
- data/ext/wapiti/rprop.c +191 -0
- data/ext/wapiti/sequence.h +148 -0
- data/ext/wapiti/sgdl1.c +218 -0
- data/ext/wapiti/thread.c +171 -0
- data/ext/wapiti/thread.h +42 -0
- data/ext/wapiti/tools.c +202 -0
- data/ext/wapiti/tools.h +54 -0
- data/ext/wapiti/trainers.h +39 -0
- data/ext/wapiti/vmath.c +372 -0
- data/ext/wapiti/vmath.h +51 -0
- data/ext/wapiti/wapiti.c +288 -0
- data/ext/wapiti/wapiti.h +45 -0
- data/lib/wapiti.rb +30 -0
- data/lib/wapiti/errors.rb +17 -0
- data/lib/wapiti/model.rb +49 -0
- data/lib/wapiti/options.rb +113 -0
- data/lib/wapiti/utility.rb +15 -0
- data/lib/wapiti/version.rb +3 -0
- data/spec/fixtures/ch.mod +18550 -0
- data/spec/fixtures/chpattern.txt +52 -0
- data/spec/fixtures/chtest.txt +1973 -0
- data/spec/fixtures/chtrain.txt +19995 -0
- data/spec/fixtures/nppattern.txt +52 -0
- data/spec/fixtures/nptest.txt +1973 -0
- data/spec/fixtures/nptrain.txt +19995 -0
- data/spec/fixtures/pattern.txt +14 -0
- data/spec/fixtures/test.txt +60000 -0
- data/spec/fixtures/train.txt +1200 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/wapiti/model_spec.rb +173 -0
- data/spec/wapiti/native_spec.rb +12 -0
- data/spec/wapiti/options_spec.rb +175 -0
- data/spec/wapiti/utility_spec.rb +22 -0
- data/wapiti.gemspec +35 -0
- metadata +178 -0
data/ext/wapiti/thread.c
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#include "model.h"
|
29
|
+
#include "tools.h"
|
30
|
+
#include "thread.h"
|
31
|
+
#include "wapiti.h"
|
32
|
+
|
33
|
+
/******************************************************************************
|
34
|
+
* Multi-threading code
|
35
|
+
*
|
36
|
+
* This module handle the thread managment code using POSIX pthreads, on
|
37
|
+
* non-POSIX systems you will have to rewrite this using your systems threads.
|
38
|
+
* all code who depend on threads is located here so this process must not be
|
39
|
+
* too difficult.
|
40
|
+
* If you don't want to use multithreading on non-POSIX system, just enable
|
41
|
+
* the definition of MTH_ANSI in wapiti.h. This will disable multithreading.
|
42
|
+
*
|
43
|
+
* The jobs system is a simple scheduling system, you have to provide the
|
44
|
+
* number of jobs to be done and the size of each batch, a call to getjob will
|
45
|
+
* return the index of the first available and the size of the batch, and mark
|
46
|
+
* these jobs as done. This is usefull if your jobs are numbered but you can't
|
47
|
+
* do a trivial split as each of them may require different amount of time to
|
48
|
+
* be completed like gradient computation which depend on the length of the
|
49
|
+
* sequences.
|
50
|
+
* If you provide a count of 0, the job system is disabled.
|
51
|
+
******************************************************************************/
|
52
|
+
#ifdef MTH_ANSI
|
53
|
+
struct job_s {
|
54
|
+
size_t size;
|
55
|
+
};
|
56
|
+
|
57
|
+
bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
|
58
|
+
if (job->size == 0)
|
59
|
+
return false;
|
60
|
+
*cnt = job->size;
|
61
|
+
*pos = 0;
|
62
|
+
job->size = 0;
|
63
|
+
return true;
|
64
|
+
}
|
65
|
+
|
66
|
+
void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
67
|
+
unused(batch);
|
68
|
+
if (size == 0) {
|
69
|
+
f(NULL, 0, 1, ud[0]);
|
70
|
+
} else {
|
71
|
+
job_t job = {size};
|
72
|
+
f(&job, 0, 1, ud[0]);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
#else
|
77
|
+
|
78
|
+
#include <pthread.h>
|
79
|
+
|
80
|
+
struct job_s {
|
81
|
+
size_t size;
|
82
|
+
size_t send;
|
83
|
+
size_t batch;
|
84
|
+
pthread_mutex_t lock;
|
85
|
+
};
|
86
|
+
|
87
|
+
typedef struct mth_s mth_t;
|
88
|
+
struct mth_s {
|
89
|
+
job_t *job;
|
90
|
+
int id;
|
91
|
+
int cnt;
|
92
|
+
func_t *f;
|
93
|
+
void *ud;
|
94
|
+
};
|
95
|
+
|
96
|
+
/* mth_getjob:
|
97
|
+
* Get a new bunch of sequence to process. This function will return a new
|
98
|
+
* batch of sequence to process starting at position <pos> and with size
|
99
|
+
* <cnt> and return true. If no more batch are available, return false.
|
100
|
+
* This function use a lock to ensure thread safety as it will be called by
|
101
|
+
* the multiple workers threads.
|
102
|
+
*/
|
103
|
+
bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
|
104
|
+
if (job == NULL)
|
105
|
+
return false;
|
106
|
+
if (job->send == job->size)
|
107
|
+
return false;
|
108
|
+
pthread_mutex_lock(&job->lock);
|
109
|
+
*cnt = min(job->batch, job->size - job->send);
|
110
|
+
*pos = job->send;
|
111
|
+
job->send += *cnt;
|
112
|
+
pthread_mutex_unlock(&job->lock);
|
113
|
+
return true;
|
114
|
+
}
|
115
|
+
|
116
|
+
static void *mth_stub(void *ud) {
|
117
|
+
mth_t *mth = (mth_t *)ud;
|
118
|
+
mth->f(mth->job, mth->id, mth->cnt, mth->ud);
|
119
|
+
return NULL;
|
120
|
+
}
|
121
|
+
|
122
|
+
/* mth_spawn:
|
123
|
+
* This function spawn W threads for calling the 'f' function. The function
|
124
|
+
* will get a unique identifier between 0 and W-1 and a user data from the
|
125
|
+
* 'ud' array.
|
126
|
+
*/
|
127
|
+
void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
128
|
+
// First prepare the jobs scheduler
|
129
|
+
job_t job, *pjob = NULL;
|
130
|
+
if (size != 0) {
|
131
|
+
pjob = &job;
|
132
|
+
job.size = size;
|
133
|
+
job.send = 0;
|
134
|
+
job.batch = batch;
|
135
|
+
if (pthread_mutex_init(&job.lock, NULL) != 0)
|
136
|
+
fatal("failed to create mutex");
|
137
|
+
}
|
138
|
+
// We handle differently the case where user requested a single thread
|
139
|
+
// for efficiency.
|
140
|
+
if (W == 1) {
|
141
|
+
f(&job, 0, 1, ud[0]);
|
142
|
+
return;
|
143
|
+
}
|
144
|
+
// We prepare the parameters structures that will be send to the threads
|
145
|
+
// with informations for calling the user function.
|
146
|
+
mth_t p[W];
|
147
|
+
for (int w = 0; w < W; w++) {
|
148
|
+
p[w].job = pjob;
|
149
|
+
p[w].id = w;
|
150
|
+
p[w].cnt = W;
|
151
|
+
p[w].f = f;
|
152
|
+
p[w].ud = ud[w];
|
153
|
+
}
|
154
|
+
// We are now ready to spawn the threads and wait for them to finish
|
155
|
+
// their jobs. So we just create all the thread and try to join them
|
156
|
+
// waiting for there return.
|
157
|
+
pthread_attr_t attr;
|
158
|
+
pthread_attr_init(&attr);
|
159
|
+
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
|
160
|
+
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
161
|
+
pthread_t th[W];
|
162
|
+
for (int w = 0; w < W; w++)
|
163
|
+
if (pthread_create(&th[w], &attr, &mth_stub, &p[w]) != 0)
|
164
|
+
fatal("failed to create thread");
|
165
|
+
for (int w = 0; w < W; w++)
|
166
|
+
if (pthread_join(th[w], NULL) != 0)
|
167
|
+
fatal("failed to join thread");
|
168
|
+
pthread_attr_destroy(&attr);
|
169
|
+
}
|
170
|
+
#endif
|
171
|
+
|
data/ext/wapiti/thread.h
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef thread_h
|
29
|
+
#define thread_h
|
30
|
+
|
31
|
+
#include <pthread.h>
|
32
|
+
|
33
|
+
#include "model.h"
|
34
|
+
|
35
|
+
typedef struct job_s job_t;
|
36
|
+
|
37
|
+
typedef void (func_t)(job_t *job, int id, int cnt, void *ud);
|
38
|
+
|
39
|
+
bool mth_getjob(job_t *job, size_t *cnt, size_t *pos);
|
40
|
+
void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch);
|
41
|
+
|
42
|
+
#endif
|
data/ext/wapiti/tools.c
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#include <errno.h>
|
29
|
+
#include <stdarg.h>
|
30
|
+
#include <stddef.h>
|
31
|
+
#include <stdlib.h>
|
32
|
+
#include <stdio.h>
|
33
|
+
#include <string.h>
|
34
|
+
|
35
|
+
#include "tools.h"
|
36
|
+
|
37
|
+
/*
|
38
|
+
* Wapiti Ruby Logging
|
39
|
+
*
|
40
|
+
* Wapiti-Ruby delegates all wapiti logging messages to a Ruby logger in the
|
41
|
+
* main Wapiti module.
|
42
|
+
*
|
43
|
+
*/
|
44
|
+
|
45
|
+
#include "native.h"
|
46
|
+
|
47
|
+
|
48
|
+
/*******************************************************************************
|
49
|
+
* Error handling and memory managment
|
50
|
+
*
|
51
|
+
* Wapiti use a very simple system for error handling: violently fail. Errors
|
52
|
+
* can occurs in two cases, when user feed Wapiti with bad datas or when there
|
53
|
+
* is a problem on the system side. In both cases, there is nothing we can do,
|
54
|
+
* so the best thing is to exit with a meaning full error message.
|
55
|
+
*
|
56
|
+
* Memory allocation is one of the possible point of failure and its painfull
|
57
|
+
* to always remeber to check return value of malloc so we provide wrapper
|
58
|
+
* around it and realloc who check and fail in case of error.
|
59
|
+
******************************************************************************/
|
60
|
+
|
61
|
+
/* fatal:
|
62
|
+
* This is the main error function, it will print the given message with same
|
63
|
+
* formating than the printf family and exit program with an error. We let the
|
64
|
+
* OS care about freeing ressources.
|
65
|
+
*/
|
66
|
+
void fatal(const char *msg, ...) {
|
67
|
+
va_list args;
|
68
|
+
va_start(args, msg);
|
69
|
+
|
70
|
+
// VALUE message = rb_vsprintf(msg, args);
|
71
|
+
VALUE message = rb_str_new2(msg);
|
72
|
+
|
73
|
+
va_end(args);
|
74
|
+
|
75
|
+
(void)rb_funcall(cLogger, rb_intern("fatal"), 1, message);
|
76
|
+
rb_raise(cNativeError, StringValuePtr(message));
|
77
|
+
}
|
78
|
+
|
79
|
+
/* pfatal:
|
80
|
+
* This one is very similar to the fatal function but print an additional
|
81
|
+
* system error message depending on the errno. This can be used when a
|
82
|
+
* function who set the errno fail to print more detailed informations. You
|
83
|
+
* must be carefull to not call other functino that might reset it before
|
84
|
+
* calling pfatal.
|
85
|
+
*/
|
86
|
+
void pfatal(const char *msg, ...) {
|
87
|
+
const char *err = strerror(errno);
|
88
|
+
va_list args;
|
89
|
+
va_start(args, msg);
|
90
|
+
|
91
|
+
// VALUE message = rb_vsprintf(msg, args);
|
92
|
+
// rb_str_catf(message, ": <%s>", err);
|
93
|
+
VALUE message = rb_str_new2(msg);
|
94
|
+
|
95
|
+
va_end(args);
|
96
|
+
|
97
|
+
(void)rb_funcall(cLogger, rb_intern("fatal"), 1, message);
|
98
|
+
rb_raise(cNativeError, StringValuePtr(message));
|
99
|
+
}
|
100
|
+
|
101
|
+
/* warning:
|
102
|
+
* This one is less violent as it just print a warning on stderr, but doesn't
|
103
|
+
* exit the program. It is intended to inform the user that something strange
|
104
|
+
* have happen and the result might be not what it have expected.
|
105
|
+
*/
|
106
|
+
void warning(const char *msg, ...) {
|
107
|
+
va_list args;
|
108
|
+
va_start(args, msg);
|
109
|
+
|
110
|
+
// (void)rb_funcall(cLogger, rb_intern("warn"), 1, rb_vsprintf(msg, args));
|
111
|
+
(void)rb_funcall(cLogger, rb_intern("warn"), 1, rb_str_new2(msg));
|
112
|
+
|
113
|
+
va_end(args);
|
114
|
+
}
|
115
|
+
|
116
|
+
/* info:
|
117
|
+
* Function used for all progress reports. This is where an eventual verbose
|
118
|
+
* level can be implemented later or redirection to a logfile. For now, it is
|
119
|
+
* just a wrapper for printf to stderr. Note that unlike the previous one,
|
120
|
+
* this function doesn't automatically append a new line character.
|
121
|
+
*/
|
122
|
+
void info(const char *msg, ...) {
|
123
|
+
va_list args;
|
124
|
+
va_start(args, msg);
|
125
|
+
|
126
|
+
// (void)rb_funcall(cLogger, rb_intern("info"), 1, rb_vsprintf(msg, args));
|
127
|
+
(void)rb_funcall(cLogger, rb_intern("info"), 1, rb_str_new2(msg));
|
128
|
+
|
129
|
+
va_end(args);
|
130
|
+
}
|
131
|
+
|
132
|
+
/* wapiti_xmalloc:
|
133
|
+
* A simple wrapper around malloc who violently fail if memory cannot be
|
134
|
+
* allocated, so it will never return NULL.
|
135
|
+
*/
|
136
|
+
void *wapiti_xmalloc(size_t size) {
|
137
|
+
void *ptr = malloc(size);
|
138
|
+
if (ptr == NULL)
|
139
|
+
fatal("out of memory");
|
140
|
+
return ptr;
|
141
|
+
}
|
142
|
+
|
143
|
+
/* wapiti_xrealloc:
|
144
|
+
* As wapiti_xmalloc, this is a simple wrapper around realloc who fail on memory
|
145
|
+
* error and so never return NULL.
|
146
|
+
*/
|
147
|
+
void *wapiti_xrealloc(void *ptr, size_t size) {
|
148
|
+
void *new = realloc(ptr, size);
|
149
|
+
if (new == NULL)
|
150
|
+
fatal("out of memory");
|
151
|
+
return new;
|
152
|
+
}
|
153
|
+
|
154
|
+
/* xstrdup:
|
155
|
+
* As the previous one, this is a safe version of xstrdup who fail on
|
156
|
+
* allocation error.
|
157
|
+
*/
|
158
|
+
char *xstrdup(const char *str) {
|
159
|
+
const int len = strlen(str) + 1;
|
160
|
+
char *res = wapiti_xmalloc(sizeof(char) * len);
|
161
|
+
memcpy(res, str, len);
|
162
|
+
return res;
|
163
|
+
}
|
164
|
+
|
165
|
+
/******************************************************************************
|
166
|
+
* Netstring for persistent storage
|
167
|
+
*
|
168
|
+
* This follow the format proposed by D.J. Bernstein for safe and portable
|
169
|
+
* storage of string in persistent file and networks. This used for storing
|
170
|
+
* strings in saved models.
|
171
|
+
* We just add an additional end-of-line character to make the output files
|
172
|
+
* more readable.
|
173
|
+
*
|
174
|
+
******************************************************************************/
|
175
|
+
|
176
|
+
/* ns_readstr:
|
177
|
+
* Read a string from the given file in netstring format. The string is
|
178
|
+
* returned as a newly allocated bloc of memory 0-terminated.
|
179
|
+
*/
|
180
|
+
char *ns_readstr(FILE *file) {
|
181
|
+
int len;
|
182
|
+
if (fscanf(file, "%d:", &len) != 1)
|
183
|
+
pfatal("cannot read from file");
|
184
|
+
char *buf = wapiti_xmalloc(len + 1);
|
185
|
+
if (fread(buf, len, 1, file) != 1)
|
186
|
+
pfatal("cannot read from file");
|
187
|
+
if (fgetc(file) != ',')
|
188
|
+
fatal("invalid format");
|
189
|
+
buf[len] = '\0';
|
190
|
+
fgetc(file);
|
191
|
+
return buf;
|
192
|
+
}
|
193
|
+
|
194
|
+
/* ns_writestr:
|
195
|
+
* Write a string in the netstring format to the given file.
|
196
|
+
*/
|
197
|
+
void ns_writestr(FILE *file, const char *str) {
|
198
|
+
const int len = strlen(str);
|
199
|
+
if (fprintf(file, "%d:%s,\n", len, str) < 0)
|
200
|
+
pfatal("cannot write to file");
|
201
|
+
}
|
202
|
+
|
data/ext/wapiti/tools.h
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
#ifndef tools_h
|
28
|
+
#define tools_h
|
29
|
+
|
30
|
+
#include <stdarg.h>
|
31
|
+
#include <stddef.h>
|
32
|
+
#include <stdio.h>
|
33
|
+
|
34
|
+
#include <ruby.h>
|
35
|
+
|
36
|
+
#define unused(v) ((void)(v))
|
37
|
+
#define none ((size_t)-1)
|
38
|
+
|
39
|
+
#define min(a, b) ((a) < (b) ? (a) : (b))
|
40
|
+
#define max(a, b) ((a) < (b) ? (b) : (a))
|
41
|
+
|
42
|
+
void fatal(const char *msg, ...);
|
43
|
+
void pfatal(const char *msg, ...);
|
44
|
+
void warning(const char *msg, ...);
|
45
|
+
void info(const char *msg, ...);
|
46
|
+
|
47
|
+
void *wapiti_xmalloc(size_t size);
|
48
|
+
void *wapiti_xrealloc(void *ptr, size_t size);
|
49
|
+
char *xstrdup(const char *str);
|
50
|
+
|
51
|
+
char *ns_readstr(FILE *file);
|
52
|
+
void ns_writestr(FILE *file, const char *str);
|
53
|
+
|
54
|
+
#endif
|