wapiti 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +13 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/Gemfile +6 -0
- data/LICENSE +30 -0
- data/README.md +153 -0
- data/Rakefile +33 -0
- data/ext/wapiti/bcd.c +392 -0
- data/ext/wapiti/decoder.c +535 -0
- data/ext/wapiti/decoder.h +46 -0
- data/ext/wapiti/extconf.rb +8 -0
- data/ext/wapiti/gradient.c +818 -0
- data/ext/wapiti/gradient.h +81 -0
- data/ext/wapiti/lbfgs.c +294 -0
- data/ext/wapiti/model.c +296 -0
- data/ext/wapiti/model.h +100 -0
- data/ext/wapiti/native.c +1238 -0
- data/ext/wapiti/native.h +15 -0
- data/ext/wapiti/options.c +278 -0
- data/ext/wapiti/options.h +91 -0
- data/ext/wapiti/pattern.c +395 -0
- data/ext/wapiti/pattern.h +56 -0
- data/ext/wapiti/progress.c +167 -0
- data/ext/wapiti/progress.h +43 -0
- data/ext/wapiti/quark.c +272 -0
- data/ext/wapiti/quark.h +46 -0
- data/ext/wapiti/reader.c +553 -0
- data/ext/wapiti/reader.h +73 -0
- data/ext/wapiti/rprop.c +191 -0
- data/ext/wapiti/sequence.h +148 -0
- data/ext/wapiti/sgdl1.c +218 -0
- data/ext/wapiti/thread.c +171 -0
- data/ext/wapiti/thread.h +42 -0
- data/ext/wapiti/tools.c +202 -0
- data/ext/wapiti/tools.h +54 -0
- data/ext/wapiti/trainers.h +39 -0
- data/ext/wapiti/vmath.c +372 -0
- data/ext/wapiti/vmath.h +51 -0
- data/ext/wapiti/wapiti.c +288 -0
- data/ext/wapiti/wapiti.h +45 -0
- data/lib/wapiti.rb +30 -0
- data/lib/wapiti/errors.rb +17 -0
- data/lib/wapiti/model.rb +49 -0
- data/lib/wapiti/options.rb +113 -0
- data/lib/wapiti/utility.rb +15 -0
- data/lib/wapiti/version.rb +3 -0
- data/spec/fixtures/ch.mod +18550 -0
- data/spec/fixtures/chpattern.txt +52 -0
- data/spec/fixtures/chtest.txt +1973 -0
- data/spec/fixtures/chtrain.txt +19995 -0
- data/spec/fixtures/nppattern.txt +52 -0
- data/spec/fixtures/nptest.txt +1973 -0
- data/spec/fixtures/nptrain.txt +19995 -0
- data/spec/fixtures/pattern.txt +14 -0
- data/spec/fixtures/test.txt +60000 -0
- data/spec/fixtures/train.txt +1200 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/wapiti/model_spec.rb +173 -0
- data/spec/wapiti/native_spec.rb +12 -0
- data/spec/wapiti/options_spec.rb +175 -0
- data/spec/wapiti/utility_spec.rb +22 -0
- data/wapiti.gemspec +35 -0
- metadata +178 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef trainers_h
|
29
|
+
#define trainers_h
|
30
|
+
|
31
|
+
#include "model.h"
|
32
|
+
|
33
|
+
void trn_lbfgs(mdl_t *mdl);
|
34
|
+
void trn_sgdl1(mdl_t *mdl);
|
35
|
+
void trn_bcd(mdl_t *mdl);
|
36
|
+
void trn_rprop(mdl_t *mdl);
|
37
|
+
|
38
|
+
#endif
|
39
|
+
|
data/ext/wapiti/vmath.c
ADDED
@@ -0,0 +1,372 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#include <assert.h>
|
29
|
+
#include <math.h>
|
30
|
+
#include <stddef.h>
|
31
|
+
#include <stdlib.h>
|
32
|
+
|
33
|
+
#include "wapiti.h"
|
34
|
+
#include "tools.h"
|
35
|
+
#include "vmath.h"
|
36
|
+
|
37
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
38
|
+
#include <emmintrin.h>
|
39
|
+
#endif
|
40
|
+
|
41
|
+
/* xvm_mode:
|
42
|
+
* Return a string describing the SSE level used in the optimized code paths.
|
43
|
+
*/
|
44
|
+
const char *xvm_mode(void) {
|
45
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
46
|
+
return "sse2";
|
47
|
+
#else
|
48
|
+
return "no-sse";
|
49
|
+
#endif
|
50
|
+
}
|
51
|
+
|
52
|
+
/* xvm_new:
|
53
|
+
* Allocate a new vector suitable to be used in the SSE code paths. This
|
54
|
+
* ensure that the vector size contains the need padding. You must only use
|
55
|
+
* vector allocated by this function if you use the optimized code paths.
|
56
|
+
*/
|
57
|
+
double *xvm_new(size_t N) {
|
58
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
59
|
+
if (N % 4 != 0)
|
60
|
+
N += 4 - N % 4;
|
61
|
+
void *ptr = _mm_malloc(sizeof(double) * N, 16);
|
62
|
+
if (ptr == NULL)
|
63
|
+
fatal("out of memory");
|
64
|
+
return ptr;
|
65
|
+
#else
|
66
|
+
return wapiti_xmalloc(sizeof(double) * N);
|
67
|
+
#endif
|
68
|
+
}
|
69
|
+
|
70
|
+
/* xvm_free:
|
71
|
+
* Free a vector allocated by xvm_new.
|
72
|
+
*/
|
73
|
+
void xvm_free(double x[]) {
|
74
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
75
|
+
_mm_free(x);
|
76
|
+
#else
|
77
|
+
free(x);
|
78
|
+
#endif
|
79
|
+
}
|
80
|
+
|
81
|
+
/* xvm_neg:
|
82
|
+
* Return the component-wise negation of the given vector:
|
83
|
+
* r = -x
|
84
|
+
*/
|
85
|
+
void xvm_neg(double r[], const double x[], size_t N) {
|
86
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
87
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
88
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
89
|
+
const __m128d vz = _mm_setzero_pd();
|
90
|
+
for (size_t n = 0; n < N; n += 4) {
|
91
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
92
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
93
|
+
const __m128d r0 = _mm_sub_pd(vz, x0);
|
94
|
+
const __m128d r1 = _mm_sub_pd(vz, x1);
|
95
|
+
_mm_store_pd(r + n, r0);
|
96
|
+
_mm_store_pd(r + n + 2, r1);
|
97
|
+
}
|
98
|
+
#else
|
99
|
+
for (size_t n = 0; n < N; n++)
|
100
|
+
r[n] = -x[n];
|
101
|
+
#endif
|
102
|
+
}
|
103
|
+
|
104
|
+
/* xvm_sub:
|
105
|
+
* Return the difference of the two given vector:
|
106
|
+
* r = x .- y
|
107
|
+
*/
|
108
|
+
void xvm_sub(double r[], const double x[], const double y[], size_t N) {
|
109
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
110
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
111
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
112
|
+
assert(y != NULL && ((size_t)y % 16) == 0);
|
113
|
+
for (size_t n = 0; n < N; n += 4) {
|
114
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
115
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
116
|
+
const __m128d y0 = _mm_load_pd(y + n );
|
117
|
+
const __m128d y1 = _mm_load_pd(y + n + 2);
|
118
|
+
const __m128d r0 = _mm_sub_pd(x0, y0);
|
119
|
+
const __m128d r1 = _mm_sub_pd(x1, y1);
|
120
|
+
_mm_store_pd(r + n, r0);
|
121
|
+
_mm_store_pd(r + n + 2, r1);
|
122
|
+
}
|
123
|
+
#else
|
124
|
+
for (size_t n = 0; n < N; n++)
|
125
|
+
r[n] = x[n] - y[n];
|
126
|
+
#endif
|
127
|
+
}
|
128
|
+
|
129
|
+
/* xvm_scale:
|
130
|
+
* Return the given vector scaled by a constant:
|
131
|
+
* r = a * x
|
132
|
+
*/
|
133
|
+
void xvm_scale(double r[], const double x[], double a, size_t N) {
|
134
|
+
for (size_t n = 0; n < N; n++)
|
135
|
+
r[n] = x[n] * a;
|
136
|
+
}
|
137
|
+
|
138
|
+
/* xvm_norm:
|
139
|
+
* Store a normalized copy of the given vector in r and return the
|
140
|
+
* normalization factor.
|
141
|
+
*/
|
142
|
+
double xvm_unit(double r[], const double x[], size_t N) {
|
143
|
+
double sum = 0.0;
|
144
|
+
for (size_t n = 0; n < N; n++)
|
145
|
+
sum += x[n];
|
146
|
+
const double scale = 1.0 / sum;
|
147
|
+
xvm_scale(r, x, scale, N);
|
148
|
+
return scale;
|
149
|
+
}
|
150
|
+
|
151
|
+
/* xvm_norm:
|
152
|
+
* Return the euclidian norm of the given vector.
|
153
|
+
*/
|
154
|
+
double xvm_norm(const double x[], size_t N) {
|
155
|
+
double r = 0.0;
|
156
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
157
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
158
|
+
size_t n, d = N % 4;
|
159
|
+
__m128d s0 = _mm_setzero_pd();
|
160
|
+
__m128d s1 = _mm_setzero_pd();
|
161
|
+
for (n = 0; n < N - d; n += 4) {
|
162
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
163
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
164
|
+
const __m128d r0 = _mm_mul_pd(x0, x0);
|
165
|
+
const __m128d r1 = _mm_mul_pd(x1, x1);
|
166
|
+
s0 = _mm_add_pd(s0, r0);
|
167
|
+
s1 = _mm_add_pd(s1, r1);
|
168
|
+
}
|
169
|
+
s0 = _mm_add_pd(s0, s1);
|
170
|
+
s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
|
171
|
+
s0 = _mm_add_pd(s0, s1);
|
172
|
+
_mm_store_sd(&r, s0);
|
173
|
+
for ( ; n < N; n++)
|
174
|
+
r += x[n] * x[n];
|
175
|
+
#else
|
176
|
+
for (size_t n = 0; n < N; n++)
|
177
|
+
r += x[n] * x[n];
|
178
|
+
#endif
|
179
|
+
return sqrt(r);
|
180
|
+
}
|
181
|
+
|
182
|
+
/* xvm_dot:
|
183
|
+
* Return the dot product of the two given vectors.
|
184
|
+
*/
|
185
|
+
double xvm_dot(const double x[], const double y[], size_t N) {
|
186
|
+
double r = 0.0;
|
187
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
188
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
189
|
+
assert(y != NULL && ((size_t)y % 16) == 0);
|
190
|
+
size_t n, d = N % 4;
|
191
|
+
__m128d s0 = _mm_setzero_pd();
|
192
|
+
__m128d s1 = _mm_setzero_pd();
|
193
|
+
for (n = 0; n < N - d; n += 4) {
|
194
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
195
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
196
|
+
const __m128d y0 = _mm_load_pd(y + n );
|
197
|
+
const __m128d y1 = _mm_load_pd(y + n + 2);
|
198
|
+
const __m128d r0 = _mm_mul_pd(x0, y0);
|
199
|
+
const __m128d r1 = _mm_mul_pd(x1, y1);
|
200
|
+
s0 = _mm_add_pd(s0, r0);
|
201
|
+
s1 = _mm_add_pd(s1, r1);
|
202
|
+
}
|
203
|
+
s0 = _mm_add_pd(s0, s1);
|
204
|
+
s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
|
205
|
+
s0 = _mm_add_pd(s0, s1);
|
206
|
+
_mm_store_sd(&r, s0);
|
207
|
+
for ( ; n < N; n++)
|
208
|
+
r += x[n] * y[n];
|
209
|
+
#else
|
210
|
+
for (size_t n = 0; n < N; n++)
|
211
|
+
r += x[n] * y[n];
|
212
|
+
#endif
|
213
|
+
return r;
|
214
|
+
}
|
215
|
+
|
216
|
+
/* xvm_axpy:
|
217
|
+
* Return the sum of x scaled by a and y:
|
218
|
+
* r = a * x + y
|
219
|
+
*/
|
220
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N) {
|
221
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
222
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
223
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
224
|
+
assert(y != NULL && ((size_t)y % 16) == 0);
|
225
|
+
const __m128d va = _mm_set1_pd(a);
|
226
|
+
for (size_t n = 0; n < N; n += 4) {
|
227
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
228
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
229
|
+
const __m128d y0 = _mm_load_pd(y + n );
|
230
|
+
const __m128d y1 = _mm_load_pd(y + n + 2);
|
231
|
+
const __m128d t0 = _mm_mul_pd(x0, va);
|
232
|
+
const __m128d t1 = _mm_mul_pd(x1, va);
|
233
|
+
const __m128d r0 = _mm_add_pd(t0, y0);
|
234
|
+
const __m128d r1 = _mm_add_pd(t1, y1);
|
235
|
+
_mm_store_pd(r + n, r0);
|
236
|
+
_mm_store_pd(r + n + 2, r1);
|
237
|
+
}
|
238
|
+
#else
|
239
|
+
for (size_t n = 0; n < N; n++)
|
240
|
+
r[n] = a * x[n] + y[n];
|
241
|
+
#endif
|
242
|
+
}
|
243
|
+
|
244
|
+
/* vms_expma:
|
245
|
+
* Compute the component-wise exponential minus <a>:
|
246
|
+
* r[i] <-- e^x[i] - a
|
247
|
+
*
|
248
|
+
* The following comments apply to the SSE2 version of this code:
|
249
|
+
*
|
250
|
+
* Computation is done four doubles as a time by doing computation in paralell
|
251
|
+
* on two vectors of two doubles using SSE2 intrisics. If size is not a
|
252
|
+
* multiple of 4, the remaining elements are computed using the stdlib exp().
|
253
|
+
*
|
254
|
+
* The computation is done by first doing a range reduction of the argument of
|
255
|
+
* the type e^x = 2^k * e^f choosing k and f so that f is in [-0.5, 0.5].
|
256
|
+
* Then 2^k can be computed exactly using bit operations to build the double
|
257
|
+
* result and e^f can be efficiently computed with enough precision using a
|
258
|
+
* polynomial approximation.
|
259
|
+
*
|
260
|
+
* The polynomial approximation is done with 11th order polynomial computed by
|
261
|
+
* Remez algorithm with the Solya suite, instead of the more classical Pade
|
262
|
+
* polynomial form cause it is better suited to parallel execution. In order
|
263
|
+
* to achieve the same precision, a Pade form seems to require three less
|
264
|
+
* multiplications but need a very costly division, so it will be less
|
265
|
+
* efficient.
|
266
|
+
*
|
267
|
+
* The maximum error is less than 1lsb and special cases are correctly
|
268
|
+
* handled:
|
269
|
+
* +inf or +oor --> return +inf
|
270
|
+
* -inf or -oor --> return 0.0
|
271
|
+
* qNaN or sNaN --> return qNaN
|
272
|
+
*
|
273
|
+
* This code is copyright 2004-2011 Thomas Lavergne and licenced under the
|
274
|
+
* BSD licence like the remaining of Wapiti.
|
275
|
+
*/
|
276
|
+
void xvm_expma(double r[], const double x[], double a, size_t N) {
|
277
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
278
|
+
#define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
|
279
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
280
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
281
|
+
const __m128i vl = _mm_set1_epi64x(0x3ff0000000000000ULL);
|
282
|
+
const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
|
283
|
+
const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
|
284
|
+
const __m128d l2e = xvm_vconst(0x3ff71547652b82feULL);
|
285
|
+
const __m128d hal = xvm_vconst(0x3fe0000000000000ULL);
|
286
|
+
const __m128d nan = xvm_vconst(0xfff8000000000000ULL);
|
287
|
+
const __m128d inf = xvm_vconst(0x7ff0000000000000ULL);
|
288
|
+
const __m128d c1 = xvm_vconst(0x3fe62e4000000000ULL);
|
289
|
+
const __m128d c2 = xvm_vconst(0x3eb7f7d1cf79abcaULL);
|
290
|
+
const __m128d p0 = xvm_vconst(0x3feffffffffffffeULL);
|
291
|
+
const __m128d p1 = xvm_vconst(0x3ff000000000000bULL);
|
292
|
+
const __m128d p2 = xvm_vconst(0x3fe0000000000256ULL);
|
293
|
+
const __m128d p3 = xvm_vconst(0x3fc5555555553a2aULL);
|
294
|
+
const __m128d p4 = xvm_vconst(0x3fa55555554e57d3ULL);
|
295
|
+
const __m128d p5 = xvm_vconst(0x3f81111111362f4fULL);
|
296
|
+
const __m128d p6 = xvm_vconst(0x3f56c16c25f3bae1ULL);
|
297
|
+
const __m128d p7 = xvm_vconst(0x3f2a019fc9310c33ULL);
|
298
|
+
const __m128d p8 = xvm_vconst(0x3efa01825f3cb28bULL);
|
299
|
+
const __m128d p9 = xvm_vconst(0x3ec71e2bd880fdd8ULL);
|
300
|
+
const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
|
301
|
+
const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
|
302
|
+
const __m128d va = _mm_set1_pd(a);
|
303
|
+
for (size_t n = 0; n < N; n += 4) {
|
304
|
+
__m128d mn1, mn2, mi1, mi2;
|
305
|
+
__m128d t1, t2, d1, d2;
|
306
|
+
__m128d v1, v2, w1, w2;
|
307
|
+
__m128i k1, k2;
|
308
|
+
__m128d f1, f2;
|
309
|
+
// Load the next four values
|
310
|
+
__m128d x1 = _mm_load_pd(x + n );
|
311
|
+
__m128d x2 = _mm_load_pd(x + n + 2);
|
312
|
+
// Check for out of ranges, infinites and NaN
|
313
|
+
mn1 = _mm_cmpneq_pd(x1, x1); mn2 = _mm_cmpneq_pd(x2, x2);
|
314
|
+
mi1 = _mm_cmpgt_pd(x1, ehi); mi2 = _mm_cmpgt_pd(x2, ehi);
|
315
|
+
x1 = _mm_max_pd(x1, elo); x2 = _mm_max_pd(x2, elo);
|
316
|
+
// Range reduction: we search k and f such that e^x = 2^k * e^f
|
317
|
+
// with f in [-0.5, 0.5]
|
318
|
+
t1 = _mm_mul_pd(x1, l2e); t2 = _mm_mul_pd(x2, l2e);
|
319
|
+
t1 = _mm_add_pd(t1, hal); t2 = _mm_add_pd(t2, hal);
|
320
|
+
k1 = _mm_cvttpd_epi32(t1); k2 = _mm_cvttpd_epi32(t2);
|
321
|
+
d1 = _mm_cvtepi32_pd(k1); d2 = _mm_cvtepi32_pd(k2);
|
322
|
+
t1 = _mm_mul_pd(d1, c1); t2 = _mm_mul_pd(d2, c1);
|
323
|
+
f1 = _mm_sub_pd(x1, t1); f2 = _mm_sub_pd(x2, t2);
|
324
|
+
t1 = _mm_mul_pd(d1, c2); t2 = _mm_mul_pd(d2, c2);
|
325
|
+
f1 = _mm_sub_pd(f1, t1); f2 = _mm_sub_pd(f2, t2);
|
326
|
+
// Evaluation of e^f using a 11th order polynom in Horner form
|
327
|
+
v1 = _mm_mul_pd(f1, p11); v2 = _mm_mul_pd(f2, p11);
|
328
|
+
v1 = _mm_add_pd(v1, p10); v2 = _mm_add_pd(v2, p10);
|
329
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
330
|
+
v1 = _mm_add_pd(v1, p9); v2 = _mm_add_pd(v2, p9);
|
331
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
332
|
+
v1 = _mm_add_pd(v1, p8); v2 = _mm_add_pd(v2, p8);
|
333
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
334
|
+
v1 = _mm_add_pd(v1, p7); v2 = _mm_add_pd(v2, p7);
|
335
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
336
|
+
v1 = _mm_add_pd(v1, p6); v2 = _mm_add_pd(v2, p6);
|
337
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
338
|
+
v1 = _mm_add_pd(v1, p5); v2 = _mm_add_pd(v2, p5);
|
339
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
340
|
+
v1 = _mm_add_pd(v1, p4); v2 = _mm_add_pd(v2, p4);
|
341
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
342
|
+
v1 = _mm_add_pd(v1, p3); v2 = _mm_add_pd(v2, p3);
|
343
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
344
|
+
v1 = _mm_add_pd(v1, p2); v2 = _mm_add_pd(v2, p2);
|
345
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
346
|
+
v1 = _mm_add_pd(v1, p1); v2 = _mm_add_pd(v2, p1);
|
347
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
348
|
+
v1 = _mm_add_pd(v1, p0); v2 = _mm_add_pd(v2, p0);
|
349
|
+
// Evaluation of 2^k using bitops to achieve exact computation
|
350
|
+
k1 = _mm_slli_epi32(k1, 20); k2 = _mm_slli_epi32(k2, 20);
|
351
|
+
k1 = _mm_shuffle_epi32(k1, 0x72);
|
352
|
+
k2 = _mm_shuffle_epi32(k2, 0x72);
|
353
|
+
k1 = _mm_add_epi32(k1, vl); k2 = _mm_add_epi32(k2, vl);
|
354
|
+
w1 = _mm_castsi128_pd(k1); w2 = _mm_castsi128_pd(k2);
|
355
|
+
// Return to full range to substract <a>
|
356
|
+
v1 = _mm_mul_pd(v1, w1); v2 = _mm_mul_pd(v2, w2);
|
357
|
+
v1 = _mm_sub_pd(v1, va); v2 = _mm_sub_pd(v2, va);
|
358
|
+
// Finally apply infinite and NaN where needed
|
359
|
+
v1 = _mm_or_pd(_mm_and_pd(mi1, inf), _mm_andnot_pd(mi1, v1));
|
360
|
+
v2 = _mm_or_pd(_mm_and_pd(mi2, inf), _mm_andnot_pd(mi2, v2));
|
361
|
+
v1 = _mm_or_pd(_mm_and_pd(mn1, nan), _mm_andnot_pd(mn1, v1));
|
362
|
+
v2 = _mm_or_pd(_mm_and_pd(mn2, nan), _mm_andnot_pd(mn2, v2));
|
363
|
+
// Store the results
|
364
|
+
_mm_store_pd(r + n, v1);
|
365
|
+
_mm_store_pd(r + n + 2, v2);
|
366
|
+
}
|
367
|
+
#else
|
368
|
+
for (size_t n = 0; n < N; n++)
|
369
|
+
r[n] = exp(x[n]) - a;
|
370
|
+
#endif
|
371
|
+
}
|
372
|
+
|
data/ext/wapiti/vmath.h
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef vmath_h
|
29
|
+
#define vmath_h
|
30
|
+
|
31
|
+
#include <stddef.h>
|
32
|
+
|
33
|
+
const char *xvm_mode(void);
|
34
|
+
|
35
|
+
double *xvm_new(size_t N);
|
36
|
+
void xvm_free(double x[]);
|
37
|
+
|
38
|
+
void xvm_neg(double r[], const double x[], size_t N);
|
39
|
+
void xvm_sub(double r[], const double x[], const double y[], size_t N);
|
40
|
+
void xvm_scale(double r[], const double x[], double a, size_t N);
|
41
|
+
double xvm_unit(double r[], const double x[], size_t N);
|
42
|
+
|
43
|
+
double xvm_norm(const double x[], size_t N);
|
44
|
+
double xvm_dot(const double x[], const double y[], size_t N);
|
45
|
+
|
46
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N);
|
47
|
+
|
48
|
+
void xvm_expma(double r[], const double x[], double a, size_t N);
|
49
|
+
|
50
|
+
#endif
|
51
|
+
|