wapiti 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +13 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/Gemfile +6 -0
- data/LICENSE +30 -0
- data/README.md +153 -0
- data/Rakefile +33 -0
- data/ext/wapiti/bcd.c +392 -0
- data/ext/wapiti/decoder.c +535 -0
- data/ext/wapiti/decoder.h +46 -0
- data/ext/wapiti/extconf.rb +8 -0
- data/ext/wapiti/gradient.c +818 -0
- data/ext/wapiti/gradient.h +81 -0
- data/ext/wapiti/lbfgs.c +294 -0
- data/ext/wapiti/model.c +296 -0
- data/ext/wapiti/model.h +100 -0
- data/ext/wapiti/native.c +1238 -0
- data/ext/wapiti/native.h +15 -0
- data/ext/wapiti/options.c +278 -0
- data/ext/wapiti/options.h +91 -0
- data/ext/wapiti/pattern.c +395 -0
- data/ext/wapiti/pattern.h +56 -0
- data/ext/wapiti/progress.c +167 -0
- data/ext/wapiti/progress.h +43 -0
- data/ext/wapiti/quark.c +272 -0
- data/ext/wapiti/quark.h +46 -0
- data/ext/wapiti/reader.c +553 -0
- data/ext/wapiti/reader.h +73 -0
- data/ext/wapiti/rprop.c +191 -0
- data/ext/wapiti/sequence.h +148 -0
- data/ext/wapiti/sgdl1.c +218 -0
- data/ext/wapiti/thread.c +171 -0
- data/ext/wapiti/thread.h +42 -0
- data/ext/wapiti/tools.c +202 -0
- data/ext/wapiti/tools.h +54 -0
- data/ext/wapiti/trainers.h +39 -0
- data/ext/wapiti/vmath.c +372 -0
- data/ext/wapiti/vmath.h +51 -0
- data/ext/wapiti/wapiti.c +288 -0
- data/ext/wapiti/wapiti.h +45 -0
- data/lib/wapiti.rb +30 -0
- data/lib/wapiti/errors.rb +17 -0
- data/lib/wapiti/model.rb +49 -0
- data/lib/wapiti/options.rb +113 -0
- data/lib/wapiti/utility.rb +15 -0
- data/lib/wapiti/version.rb +3 -0
- data/spec/fixtures/ch.mod +18550 -0
- data/spec/fixtures/chpattern.txt +52 -0
- data/spec/fixtures/chtest.txt +1973 -0
- data/spec/fixtures/chtrain.txt +19995 -0
- data/spec/fixtures/nppattern.txt +52 -0
- data/spec/fixtures/nptest.txt +1973 -0
- data/spec/fixtures/nptrain.txt +19995 -0
- data/spec/fixtures/pattern.txt +14 -0
- data/spec/fixtures/test.txt +60000 -0
- data/spec/fixtures/train.txt +1200 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/wapiti/model_spec.rb +173 -0
- data/spec/wapiti/native_spec.rb +12 -0
- data/spec/wapiti/options_spec.rb +175 -0
- data/spec/wapiti/utility_spec.rb +22 -0
- data/wapiti.gemspec +35 -0
- metadata +178 -0
data/ext/wapiti/thread.c
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#include "model.h"
|
29
|
+
#include "tools.h"
|
30
|
+
#include "thread.h"
|
31
|
+
#include "wapiti.h"
|
32
|
+
|
33
|
+
/******************************************************************************
|
34
|
+
* Multi-threading code
|
35
|
+
*
|
36
|
+
* This module handle the thread managment code using POSIX pthreads, on
|
37
|
+
* non-POSIX systems you will have to rewrite this using your systems threads.
|
38
|
+
* all code who depend on threads is located here so this process must not be
|
39
|
+
* too difficult.
|
40
|
+
* If you don't want to use multithreading on non-POSIX system, just enable
|
41
|
+
* the definition of MTH_ANSI in wapiti.h. This will disable multithreading.
|
42
|
+
*
|
43
|
+
* The jobs system is a simple scheduling system, you have to provide the
|
44
|
+
* number of jobs to be done and the size of each batch, a call to getjob will
|
45
|
+
* return the index of the first available and the size of the batch, and mark
|
46
|
+
* these jobs as done. This is usefull if your jobs are numbered but you can't
|
47
|
+
* do a trivial split as each of them may require different amount of time to
|
48
|
+
* be completed like gradient computation which depend on the length of the
|
49
|
+
* sequences.
|
50
|
+
* If you provide a count of 0, the job system is disabled.
|
51
|
+
******************************************************************************/
|
52
|
+
#ifdef MTH_ANSI
|
53
|
+
struct job_s {
|
54
|
+
size_t size;
|
55
|
+
};
|
56
|
+
|
57
|
+
bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
|
58
|
+
if (job->size == 0)
|
59
|
+
return false;
|
60
|
+
*cnt = job->size;
|
61
|
+
*pos = 0;
|
62
|
+
job->size = 0;
|
63
|
+
return true;
|
64
|
+
}
|
65
|
+
|
66
|
+
void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
67
|
+
unused(batch);
|
68
|
+
if (size == 0) {
|
69
|
+
f(NULL, 0, 1, ud[0]);
|
70
|
+
} else {
|
71
|
+
job_t job = {size};
|
72
|
+
f(&job, 0, 1, ud[0]);
|
73
|
+
}
|
74
|
+
}
|
75
|
+
|
76
|
+
#else
|
77
|
+
|
78
|
+
#include <pthread.h>
|
79
|
+
|
80
|
+
struct job_s {
|
81
|
+
size_t size;
|
82
|
+
size_t send;
|
83
|
+
size_t batch;
|
84
|
+
pthread_mutex_t lock;
|
85
|
+
};
|
86
|
+
|
87
|
+
typedef struct mth_s mth_t;
|
88
|
+
struct mth_s {
|
89
|
+
job_t *job;
|
90
|
+
int id;
|
91
|
+
int cnt;
|
92
|
+
func_t *f;
|
93
|
+
void *ud;
|
94
|
+
};
|
95
|
+
|
96
|
+
/* mth_getjob:
|
97
|
+
* Get a new bunch of sequence to process. This function will return a new
|
98
|
+
* batch of sequence to process starting at position <pos> and with size
|
99
|
+
* <cnt> and return true. If no more batch are available, return false.
|
100
|
+
* This function use a lock to ensure thread safety as it will be called by
|
101
|
+
* the multiple workers threads.
|
102
|
+
*/
|
103
|
+
bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
|
104
|
+
if (job == NULL)
|
105
|
+
return false;
|
106
|
+
if (job->send == job->size)
|
107
|
+
return false;
|
108
|
+
pthread_mutex_lock(&job->lock);
|
109
|
+
*cnt = min(job->batch, job->size - job->send);
|
110
|
+
*pos = job->send;
|
111
|
+
job->send += *cnt;
|
112
|
+
pthread_mutex_unlock(&job->lock);
|
113
|
+
return true;
|
114
|
+
}
|
115
|
+
|
116
|
+
static void *mth_stub(void *ud) {
|
117
|
+
mth_t *mth = (mth_t *)ud;
|
118
|
+
mth->f(mth->job, mth->id, mth->cnt, mth->ud);
|
119
|
+
return NULL;
|
120
|
+
}
|
121
|
+
|
122
|
+
/* mth_spawn:
|
123
|
+
* This function spawn W threads for calling the 'f' function. The function
|
124
|
+
* will get a unique identifier between 0 and W-1 and a user data from the
|
125
|
+
* 'ud' array.
|
126
|
+
*/
|
127
|
+
void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
128
|
+
// First prepare the jobs scheduler
|
129
|
+
job_t job, *pjob = NULL;
|
130
|
+
if (size != 0) {
|
131
|
+
pjob = &job;
|
132
|
+
job.size = size;
|
133
|
+
job.send = 0;
|
134
|
+
job.batch = batch;
|
135
|
+
if (pthread_mutex_init(&job.lock, NULL) != 0)
|
136
|
+
fatal("failed to create mutex");
|
137
|
+
}
|
138
|
+
// We handle differently the case where user requested a single thread
|
139
|
+
// for efficiency.
|
140
|
+
if (W == 1) {
|
141
|
+
f(&job, 0, 1, ud[0]);
|
142
|
+
return;
|
143
|
+
}
|
144
|
+
// We prepare the parameters structures that will be send to the threads
|
145
|
+
// with informations for calling the user function.
|
146
|
+
mth_t p[W];
|
147
|
+
for (int w = 0; w < W; w++) {
|
148
|
+
p[w].job = pjob;
|
149
|
+
p[w].id = w;
|
150
|
+
p[w].cnt = W;
|
151
|
+
p[w].f = f;
|
152
|
+
p[w].ud = ud[w];
|
153
|
+
}
|
154
|
+
// We are now ready to spawn the threads and wait for them to finish
|
155
|
+
// their jobs. So we just create all the thread and try to join them
|
156
|
+
// waiting for there return.
|
157
|
+
pthread_attr_t attr;
|
158
|
+
pthread_attr_init(&attr);
|
159
|
+
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
|
160
|
+
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
161
|
+
pthread_t th[W];
|
162
|
+
for (int w = 0; w < W; w++)
|
163
|
+
if (pthread_create(&th[w], &attr, &mth_stub, &p[w]) != 0)
|
164
|
+
fatal("failed to create thread");
|
165
|
+
for (int w = 0; w < W; w++)
|
166
|
+
if (pthread_join(th[w], NULL) != 0)
|
167
|
+
fatal("failed to join thread");
|
168
|
+
pthread_attr_destroy(&attr);
|
169
|
+
}
|
170
|
+
#endif
|
171
|
+
|
data/ext/wapiti/thread.h
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef thread_h
|
29
|
+
#define thread_h
|
30
|
+
|
31
|
+
#include <pthread.h>
|
32
|
+
|
33
|
+
#include "model.h"
|
34
|
+
|
35
|
+
typedef struct job_s job_t;
|
36
|
+
|
37
|
+
typedef void (func_t)(job_t *job, int id, int cnt, void *ud);
|
38
|
+
|
39
|
+
bool mth_getjob(job_t *job, size_t *cnt, size_t *pos);
|
40
|
+
void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch);
|
41
|
+
|
42
|
+
#endif
|
data/ext/wapiti/tools.c
ADDED
@@ -0,0 +1,202 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#include <errno.h>
|
29
|
+
#include <stdarg.h>
|
30
|
+
#include <stddef.h>
|
31
|
+
#include <stdlib.h>
|
32
|
+
#include <stdio.h>
|
33
|
+
#include <string.h>
|
34
|
+
|
35
|
+
#include "tools.h"
|
36
|
+
|
37
|
+
/*
|
38
|
+
* Wapiti Ruby Logging
|
39
|
+
*
|
40
|
+
* Wapiti-Ruby delegates all wapiti logging messages to a Ruby logger in the
|
41
|
+
* main Wapiti module.
|
42
|
+
*
|
43
|
+
*/
|
44
|
+
|
45
|
+
#include "native.h"
|
46
|
+
|
47
|
+
|
48
|
+
/*******************************************************************************
|
49
|
+
* Error handling and memory managment
|
50
|
+
*
|
51
|
+
* Wapiti use a very simple system for error handling: violently fail. Errors
|
52
|
+
* can occurs in two cases, when user feed Wapiti with bad datas or when there
|
53
|
+
* is a problem on the system side. In both cases, there is nothing we can do,
|
54
|
+
* so the best thing is to exit with a meaning full error message.
|
55
|
+
*
|
56
|
+
* Memory allocation is one of the possible point of failure and its painfull
|
57
|
+
* to always remeber to check return value of malloc so we provide wrapper
|
58
|
+
* around it and realloc who check and fail in case of error.
|
59
|
+
******************************************************************************/
|
60
|
+
|
61
|
+
/* fatal:
|
62
|
+
* This is the main error function, it will print the given message with same
|
63
|
+
* formating than the printf family and exit program with an error. We let the
|
64
|
+
* OS care about freeing ressources.
|
65
|
+
*/
|
66
|
+
void fatal(const char *msg, ...) {
|
67
|
+
va_list args;
|
68
|
+
va_start(args, msg);
|
69
|
+
|
70
|
+
// VALUE message = rb_vsprintf(msg, args);
|
71
|
+
VALUE message = rb_str_new2(msg);
|
72
|
+
|
73
|
+
va_end(args);
|
74
|
+
|
75
|
+
(void)rb_funcall(cLogger, rb_intern("fatal"), 1, message);
|
76
|
+
rb_raise(cNativeError, StringValuePtr(message));
|
77
|
+
}
|
78
|
+
|
79
|
+
/* pfatal:
|
80
|
+
* This one is very similar to the fatal function but print an additional
|
81
|
+
* system error message depending on the errno. This can be used when a
|
82
|
+
* function who set the errno fail to print more detailed informations. You
|
83
|
+
* must be carefull to not call other functino that might reset it before
|
84
|
+
* calling pfatal.
|
85
|
+
*/
|
86
|
+
void pfatal(const char *msg, ...) {
|
87
|
+
const char *err = strerror(errno);
|
88
|
+
va_list args;
|
89
|
+
va_start(args, msg);
|
90
|
+
|
91
|
+
// VALUE message = rb_vsprintf(msg, args);
|
92
|
+
// rb_str_catf(message, ": <%s>", err);
|
93
|
+
VALUE message = rb_str_new2(msg);
|
94
|
+
|
95
|
+
va_end(args);
|
96
|
+
|
97
|
+
(void)rb_funcall(cLogger, rb_intern("fatal"), 1, message);
|
98
|
+
rb_raise(cNativeError, StringValuePtr(message));
|
99
|
+
}
|
100
|
+
|
101
|
+
/* warning:
|
102
|
+
* This one is less violent as it just print a warning on stderr, but doesn't
|
103
|
+
* exit the program. It is intended to inform the user that something strange
|
104
|
+
* have happen and the result might be not what it have expected.
|
105
|
+
*/
|
106
|
+
void warning(const char *msg, ...) {
|
107
|
+
va_list args;
|
108
|
+
va_start(args, msg);
|
109
|
+
|
110
|
+
// (void)rb_funcall(cLogger, rb_intern("warn"), 1, rb_vsprintf(msg, args));
|
111
|
+
(void)rb_funcall(cLogger, rb_intern("warn"), 1, rb_str_new2(msg));
|
112
|
+
|
113
|
+
va_end(args);
|
114
|
+
}
|
115
|
+
|
116
|
+
/* info:
|
117
|
+
* Function used for all progress reports. This is where an eventual verbose
|
118
|
+
* level can be implemented later or redirection to a logfile. For now, it is
|
119
|
+
* just a wrapper for printf to stderr. Note that unlike the previous one,
|
120
|
+
* this function doesn't automatically append a new line character.
|
121
|
+
*/
|
122
|
+
void info(const char *msg, ...) {
|
123
|
+
va_list args;
|
124
|
+
va_start(args, msg);
|
125
|
+
|
126
|
+
// (void)rb_funcall(cLogger, rb_intern("info"), 1, rb_vsprintf(msg, args));
|
127
|
+
(void)rb_funcall(cLogger, rb_intern("info"), 1, rb_str_new2(msg));
|
128
|
+
|
129
|
+
va_end(args);
|
130
|
+
}
|
131
|
+
|
132
|
+
/* wapiti_xmalloc:
|
133
|
+
* A simple wrapper around malloc who violently fail if memory cannot be
|
134
|
+
* allocated, so it will never return NULL.
|
135
|
+
*/
|
136
|
+
void *wapiti_xmalloc(size_t size) {
|
137
|
+
void *ptr = malloc(size);
|
138
|
+
if (ptr == NULL)
|
139
|
+
fatal("out of memory");
|
140
|
+
return ptr;
|
141
|
+
}
|
142
|
+
|
143
|
+
/* wapiti_xrealloc:
|
144
|
+
* As wapiti_xmalloc, this is a simple wrapper around realloc who fail on memory
|
145
|
+
* error and so never return NULL.
|
146
|
+
*/
|
147
|
+
void *wapiti_xrealloc(void *ptr, size_t size) {
|
148
|
+
void *new = realloc(ptr, size);
|
149
|
+
if (new == NULL)
|
150
|
+
fatal("out of memory");
|
151
|
+
return new;
|
152
|
+
}
|
153
|
+
|
154
|
+
/* xstrdup:
|
155
|
+
* As the previous one, this is a safe version of xstrdup who fail on
|
156
|
+
* allocation error.
|
157
|
+
*/
|
158
|
+
char *xstrdup(const char *str) {
|
159
|
+
const int len = strlen(str) + 1;
|
160
|
+
char *res = wapiti_xmalloc(sizeof(char) * len);
|
161
|
+
memcpy(res, str, len);
|
162
|
+
return res;
|
163
|
+
}
|
164
|
+
|
165
|
+
/******************************************************************************
|
166
|
+
* Netstring for persistent storage
|
167
|
+
*
|
168
|
+
* This follow the format proposed by D.J. Bernstein for safe and portable
|
169
|
+
* storage of string in persistent file and networks. This used for storing
|
170
|
+
* strings in saved models.
|
171
|
+
* We just add an additional end-of-line character to make the output files
|
172
|
+
* more readable.
|
173
|
+
*
|
174
|
+
******************************************************************************/
|
175
|
+
|
176
|
+
/* ns_readstr:
|
177
|
+
* Read a string from the given file in netstring format. The string is
|
178
|
+
* returned as a newly allocated bloc of memory 0-terminated.
|
179
|
+
*/
|
180
|
+
char *ns_readstr(FILE *file) {
|
181
|
+
int len;
|
182
|
+
if (fscanf(file, "%d:", &len) != 1)
|
183
|
+
pfatal("cannot read from file");
|
184
|
+
char *buf = wapiti_xmalloc(len + 1);
|
185
|
+
if (fread(buf, len, 1, file) != 1)
|
186
|
+
pfatal("cannot read from file");
|
187
|
+
if (fgetc(file) != ',')
|
188
|
+
fatal("invalid format");
|
189
|
+
buf[len] = '\0';
|
190
|
+
fgetc(file);
|
191
|
+
return buf;
|
192
|
+
}
|
193
|
+
|
194
|
+
/* ns_writestr:
|
195
|
+
* Write a string in the netstring format to the given file.
|
196
|
+
*/
|
197
|
+
void ns_writestr(FILE *file, const char *str) {
|
198
|
+
const int len = strlen(str);
|
199
|
+
if (fprintf(file, "%d:%s,\n", len, str) < 0)
|
200
|
+
pfatal("cannot write to file");
|
201
|
+
}
|
202
|
+
|
data/ext/wapiti/tools.h
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
#ifndef tools_h
|
28
|
+
#define tools_h
|
29
|
+
|
30
|
+
#include <stdarg.h>
|
31
|
+
#include <stddef.h>
|
32
|
+
#include <stdio.h>
|
33
|
+
|
34
|
+
#include <ruby.h>
|
35
|
+
|
36
|
+
#define unused(v) ((void)(v))
|
37
|
+
#define none ((size_t)-1)
|
38
|
+
|
39
|
+
#define min(a, b) ((a) < (b) ? (a) : (b))
|
40
|
+
#define max(a, b) ((a) < (b) ? (b) : (a))
|
41
|
+
|
42
|
+
void fatal(const char *msg, ...);
|
43
|
+
void pfatal(const char *msg, ...);
|
44
|
+
void warning(const char *msg, ...);
|
45
|
+
void info(const char *msg, ...);
|
46
|
+
|
47
|
+
void *wapiti_xmalloc(size_t size);
|
48
|
+
void *wapiti_xrealloc(void *ptr, size_t size);
|
49
|
+
char *xstrdup(const char *str);
|
50
|
+
|
51
|
+
char *ns_readstr(FILE *file);
|
52
|
+
void ns_writestr(FILE *file, const char *str);
|
53
|
+
|
54
|
+
#endif
|