wapiti 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +13 -0
- data/.gitignore +5 -0
- data/.rspec +3 -0
- data/Gemfile +6 -0
- data/LICENSE +30 -0
- data/README.md +153 -0
- data/Rakefile +33 -0
- data/ext/wapiti/bcd.c +392 -0
- data/ext/wapiti/decoder.c +535 -0
- data/ext/wapiti/decoder.h +46 -0
- data/ext/wapiti/extconf.rb +8 -0
- data/ext/wapiti/gradient.c +818 -0
- data/ext/wapiti/gradient.h +81 -0
- data/ext/wapiti/lbfgs.c +294 -0
- data/ext/wapiti/model.c +296 -0
- data/ext/wapiti/model.h +100 -0
- data/ext/wapiti/native.c +1238 -0
- data/ext/wapiti/native.h +15 -0
- data/ext/wapiti/options.c +278 -0
- data/ext/wapiti/options.h +91 -0
- data/ext/wapiti/pattern.c +395 -0
- data/ext/wapiti/pattern.h +56 -0
- data/ext/wapiti/progress.c +167 -0
- data/ext/wapiti/progress.h +43 -0
- data/ext/wapiti/quark.c +272 -0
- data/ext/wapiti/quark.h +46 -0
- data/ext/wapiti/reader.c +553 -0
- data/ext/wapiti/reader.h +73 -0
- data/ext/wapiti/rprop.c +191 -0
- data/ext/wapiti/sequence.h +148 -0
- data/ext/wapiti/sgdl1.c +218 -0
- data/ext/wapiti/thread.c +171 -0
- data/ext/wapiti/thread.h +42 -0
- data/ext/wapiti/tools.c +202 -0
- data/ext/wapiti/tools.h +54 -0
- data/ext/wapiti/trainers.h +39 -0
- data/ext/wapiti/vmath.c +372 -0
- data/ext/wapiti/vmath.h +51 -0
- data/ext/wapiti/wapiti.c +288 -0
- data/ext/wapiti/wapiti.h +45 -0
- data/lib/wapiti.rb +30 -0
- data/lib/wapiti/errors.rb +17 -0
- data/lib/wapiti/model.rb +49 -0
- data/lib/wapiti/options.rb +113 -0
- data/lib/wapiti/utility.rb +15 -0
- data/lib/wapiti/version.rb +3 -0
- data/spec/fixtures/ch.mod +18550 -0
- data/spec/fixtures/chpattern.txt +52 -0
- data/spec/fixtures/chtest.txt +1973 -0
- data/spec/fixtures/chtrain.txt +19995 -0
- data/spec/fixtures/nppattern.txt +52 -0
- data/spec/fixtures/nptest.txt +1973 -0
- data/spec/fixtures/nptrain.txt +19995 -0
- data/spec/fixtures/pattern.txt +14 -0
- data/spec/fixtures/test.txt +60000 -0
- data/spec/fixtures/train.txt +1200 -0
- data/spec/spec_helper.rb +21 -0
- data/spec/wapiti/model_spec.rb +173 -0
- data/spec/wapiti/native_spec.rb +12 -0
- data/spec/wapiti/options_spec.rb +175 -0
- data/spec/wapiti/utility_spec.rb +22 -0
- data/wapiti.gemspec +35 -0
- metadata +178 -0
@@ -0,0 +1,39 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef trainers_h
|
29
|
+
#define trainers_h
|
30
|
+
|
31
|
+
#include "model.h"
|
32
|
+
|
33
|
+
void trn_lbfgs(mdl_t *mdl);
|
34
|
+
void trn_sgdl1(mdl_t *mdl);
|
35
|
+
void trn_bcd(mdl_t *mdl);
|
36
|
+
void trn_rprop(mdl_t *mdl);
|
37
|
+
|
38
|
+
#endif
|
39
|
+
|
data/ext/wapiti/vmath.c
ADDED
@@ -0,0 +1,372 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#include <assert.h>
|
29
|
+
#include <math.h>
|
30
|
+
#include <stddef.h>
|
31
|
+
#include <stdlib.h>
|
32
|
+
|
33
|
+
#include "wapiti.h"
|
34
|
+
#include "tools.h"
|
35
|
+
#include "vmath.h"
|
36
|
+
|
37
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
38
|
+
#include <emmintrin.h>
|
39
|
+
#endif
|
40
|
+
|
41
|
+
/* xvm_mode:
|
42
|
+
* Return a string describing the SSE level used in the optimized code paths.
|
43
|
+
*/
|
44
|
+
const char *xvm_mode(void) {
|
45
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
46
|
+
return "sse2";
|
47
|
+
#else
|
48
|
+
return "no-sse";
|
49
|
+
#endif
|
50
|
+
}
|
51
|
+
|
52
|
+
/* xvm_new:
|
53
|
+
* Allocate a new vector suitable to be used in the SSE code paths. This
|
54
|
+
* ensure that the vector size contains the need padding. You must only use
|
55
|
+
* vector allocated by this function if you use the optimized code paths.
|
56
|
+
*/
|
57
|
+
double *xvm_new(size_t N) {
|
58
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
59
|
+
if (N % 4 != 0)
|
60
|
+
N += 4 - N % 4;
|
61
|
+
void *ptr = _mm_malloc(sizeof(double) * N, 16);
|
62
|
+
if (ptr == NULL)
|
63
|
+
fatal("out of memory");
|
64
|
+
return ptr;
|
65
|
+
#else
|
66
|
+
return wapiti_xmalloc(sizeof(double) * N);
|
67
|
+
#endif
|
68
|
+
}
|
69
|
+
|
70
|
+
/* xvm_free:
|
71
|
+
* Free a vector allocated by xvm_new.
|
72
|
+
*/
|
73
|
+
void xvm_free(double x[]) {
|
74
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
75
|
+
_mm_free(x);
|
76
|
+
#else
|
77
|
+
free(x);
|
78
|
+
#endif
|
79
|
+
}
|
80
|
+
|
81
|
+
/* xvm_neg:
|
82
|
+
* Return the component-wise negation of the given vector:
|
83
|
+
* r = -x
|
84
|
+
*/
|
85
|
+
void xvm_neg(double r[], const double x[], size_t N) {
|
86
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
87
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
88
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
89
|
+
const __m128d vz = _mm_setzero_pd();
|
90
|
+
for (size_t n = 0; n < N; n += 4) {
|
91
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
92
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
93
|
+
const __m128d r0 = _mm_sub_pd(vz, x0);
|
94
|
+
const __m128d r1 = _mm_sub_pd(vz, x1);
|
95
|
+
_mm_store_pd(r + n, r0);
|
96
|
+
_mm_store_pd(r + n + 2, r1);
|
97
|
+
}
|
98
|
+
#else
|
99
|
+
for (size_t n = 0; n < N; n++)
|
100
|
+
r[n] = -x[n];
|
101
|
+
#endif
|
102
|
+
}
|
103
|
+
|
104
|
+
/* xvm_sub:
|
105
|
+
* Return the difference of the two given vector:
|
106
|
+
* r = x .- y
|
107
|
+
*/
|
108
|
+
void xvm_sub(double r[], const double x[], const double y[], size_t N) {
|
109
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
110
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
111
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
112
|
+
assert(y != NULL && ((size_t)y % 16) == 0);
|
113
|
+
for (size_t n = 0; n < N; n += 4) {
|
114
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
115
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
116
|
+
const __m128d y0 = _mm_load_pd(y + n );
|
117
|
+
const __m128d y1 = _mm_load_pd(y + n + 2);
|
118
|
+
const __m128d r0 = _mm_sub_pd(x0, y0);
|
119
|
+
const __m128d r1 = _mm_sub_pd(x1, y1);
|
120
|
+
_mm_store_pd(r + n, r0);
|
121
|
+
_mm_store_pd(r + n + 2, r1);
|
122
|
+
}
|
123
|
+
#else
|
124
|
+
for (size_t n = 0; n < N; n++)
|
125
|
+
r[n] = x[n] - y[n];
|
126
|
+
#endif
|
127
|
+
}
|
128
|
+
|
129
|
+
/* xvm_scale:
|
130
|
+
* Return the given vector scaled by a constant:
|
131
|
+
* r = a * x
|
132
|
+
*/
|
133
|
+
void xvm_scale(double r[], const double x[], double a, size_t N) {
|
134
|
+
for (size_t n = 0; n < N; n++)
|
135
|
+
r[n] = x[n] * a;
|
136
|
+
}
|
137
|
+
|
138
|
+
/* xvm_norm:
|
139
|
+
* Store a normalized copy of the given vector in r and return the
|
140
|
+
* normalization factor.
|
141
|
+
*/
|
142
|
+
double xvm_unit(double r[], const double x[], size_t N) {
|
143
|
+
double sum = 0.0;
|
144
|
+
for (size_t n = 0; n < N; n++)
|
145
|
+
sum += x[n];
|
146
|
+
const double scale = 1.0 / sum;
|
147
|
+
xvm_scale(r, x, scale, N);
|
148
|
+
return scale;
|
149
|
+
}
|
150
|
+
|
151
|
+
/* xvm_norm:
|
152
|
+
* Return the euclidian norm of the given vector.
|
153
|
+
*/
|
154
|
+
double xvm_norm(const double x[], size_t N) {
|
155
|
+
double r = 0.0;
|
156
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
157
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
158
|
+
size_t n, d = N % 4;
|
159
|
+
__m128d s0 = _mm_setzero_pd();
|
160
|
+
__m128d s1 = _mm_setzero_pd();
|
161
|
+
for (n = 0; n < N - d; n += 4) {
|
162
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
163
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
164
|
+
const __m128d r0 = _mm_mul_pd(x0, x0);
|
165
|
+
const __m128d r1 = _mm_mul_pd(x1, x1);
|
166
|
+
s0 = _mm_add_pd(s0, r0);
|
167
|
+
s1 = _mm_add_pd(s1, r1);
|
168
|
+
}
|
169
|
+
s0 = _mm_add_pd(s0, s1);
|
170
|
+
s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
|
171
|
+
s0 = _mm_add_pd(s0, s1);
|
172
|
+
_mm_store_sd(&r, s0);
|
173
|
+
for ( ; n < N; n++)
|
174
|
+
r += x[n] * x[n];
|
175
|
+
#else
|
176
|
+
for (size_t n = 0; n < N; n++)
|
177
|
+
r += x[n] * x[n];
|
178
|
+
#endif
|
179
|
+
return sqrt(r);
|
180
|
+
}
|
181
|
+
|
182
|
+
/* xvm_dot:
|
183
|
+
* Return the dot product of the two given vectors.
|
184
|
+
*/
|
185
|
+
double xvm_dot(const double x[], const double y[], size_t N) {
|
186
|
+
double r = 0.0;
|
187
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
188
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
189
|
+
assert(y != NULL && ((size_t)y % 16) == 0);
|
190
|
+
size_t n, d = N % 4;
|
191
|
+
__m128d s0 = _mm_setzero_pd();
|
192
|
+
__m128d s1 = _mm_setzero_pd();
|
193
|
+
for (n = 0; n < N - d; n += 4) {
|
194
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
195
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
196
|
+
const __m128d y0 = _mm_load_pd(y + n );
|
197
|
+
const __m128d y1 = _mm_load_pd(y + n + 2);
|
198
|
+
const __m128d r0 = _mm_mul_pd(x0, y0);
|
199
|
+
const __m128d r1 = _mm_mul_pd(x1, y1);
|
200
|
+
s0 = _mm_add_pd(s0, r0);
|
201
|
+
s1 = _mm_add_pd(s1, r1);
|
202
|
+
}
|
203
|
+
s0 = _mm_add_pd(s0, s1);
|
204
|
+
s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
|
205
|
+
s0 = _mm_add_pd(s0, s1);
|
206
|
+
_mm_store_sd(&r, s0);
|
207
|
+
for ( ; n < N; n++)
|
208
|
+
r += x[n] * y[n];
|
209
|
+
#else
|
210
|
+
for (size_t n = 0; n < N; n++)
|
211
|
+
r += x[n] * y[n];
|
212
|
+
#endif
|
213
|
+
return r;
|
214
|
+
}
|
215
|
+
|
216
|
+
/* xvm_axpy:
|
217
|
+
* Return the sum of x scaled by a and y:
|
218
|
+
* r = a * x + y
|
219
|
+
*/
|
220
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N) {
|
221
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
222
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
223
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
224
|
+
assert(y != NULL && ((size_t)y % 16) == 0);
|
225
|
+
const __m128d va = _mm_set1_pd(a);
|
226
|
+
for (size_t n = 0; n < N; n += 4) {
|
227
|
+
const __m128d x0 = _mm_load_pd(x + n );
|
228
|
+
const __m128d x1 = _mm_load_pd(x + n + 2);
|
229
|
+
const __m128d y0 = _mm_load_pd(y + n );
|
230
|
+
const __m128d y1 = _mm_load_pd(y + n + 2);
|
231
|
+
const __m128d t0 = _mm_mul_pd(x0, va);
|
232
|
+
const __m128d t1 = _mm_mul_pd(x1, va);
|
233
|
+
const __m128d r0 = _mm_add_pd(t0, y0);
|
234
|
+
const __m128d r1 = _mm_add_pd(t1, y1);
|
235
|
+
_mm_store_pd(r + n, r0);
|
236
|
+
_mm_store_pd(r + n + 2, r1);
|
237
|
+
}
|
238
|
+
#else
|
239
|
+
for (size_t n = 0; n < N; n++)
|
240
|
+
r[n] = a * x[n] + y[n];
|
241
|
+
#endif
|
242
|
+
}
|
243
|
+
|
244
|
+
/* vms_expma:
|
245
|
+
* Compute the component-wise exponential minus <a>:
|
246
|
+
* r[i] <-- e^x[i] - a
|
247
|
+
*
|
248
|
+
* The following comments apply to the SSE2 version of this code:
|
249
|
+
*
|
250
|
+
* Computation is done four doubles as a time by doing computation in paralell
|
251
|
+
* on two vectors of two doubles using SSE2 intrisics. If size is not a
|
252
|
+
* multiple of 4, the remaining elements are computed using the stdlib exp().
|
253
|
+
*
|
254
|
+
* The computation is done by first doing a range reduction of the argument of
|
255
|
+
* the type e^x = 2^k * e^f choosing k and f so that f is in [-0.5, 0.5].
|
256
|
+
* Then 2^k can be computed exactly using bit operations to build the double
|
257
|
+
* result and e^f can be efficiently computed with enough precision using a
|
258
|
+
* polynomial approximation.
|
259
|
+
*
|
260
|
+
* The polynomial approximation is done with 11th order polynomial computed by
|
261
|
+
* Remez algorithm with the Solya suite, instead of the more classical Pade
|
262
|
+
* polynomial form cause it is better suited to parallel execution. In order
|
263
|
+
* to achieve the same precision, a Pade form seems to require three less
|
264
|
+
* multiplications but need a very costly division, so it will be less
|
265
|
+
* efficient.
|
266
|
+
*
|
267
|
+
* The maximum error is less than 1lsb and special cases are correctly
|
268
|
+
* handled:
|
269
|
+
* +inf or +oor --> return +inf
|
270
|
+
* -inf or -oor --> return 0.0
|
271
|
+
* qNaN or sNaN --> return qNaN
|
272
|
+
*
|
273
|
+
* This code is copyright 2004-2011 Thomas Lavergne and licenced under the
|
274
|
+
* BSD licence like the remaining of Wapiti.
|
275
|
+
*/
|
276
|
+
void xvm_expma(double r[], const double x[], double a, size_t N) {
|
277
|
+
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
278
|
+
#define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
|
279
|
+
assert(r != NULL && ((size_t)r % 16) == 0);
|
280
|
+
assert(x != NULL && ((size_t)x % 16) == 0);
|
281
|
+
const __m128i vl = _mm_set1_epi64x(0x3ff0000000000000ULL);
|
282
|
+
const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
|
283
|
+
const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
|
284
|
+
const __m128d l2e = xvm_vconst(0x3ff71547652b82feULL);
|
285
|
+
const __m128d hal = xvm_vconst(0x3fe0000000000000ULL);
|
286
|
+
const __m128d nan = xvm_vconst(0xfff8000000000000ULL);
|
287
|
+
const __m128d inf = xvm_vconst(0x7ff0000000000000ULL);
|
288
|
+
const __m128d c1 = xvm_vconst(0x3fe62e4000000000ULL);
|
289
|
+
const __m128d c2 = xvm_vconst(0x3eb7f7d1cf79abcaULL);
|
290
|
+
const __m128d p0 = xvm_vconst(0x3feffffffffffffeULL);
|
291
|
+
const __m128d p1 = xvm_vconst(0x3ff000000000000bULL);
|
292
|
+
const __m128d p2 = xvm_vconst(0x3fe0000000000256ULL);
|
293
|
+
const __m128d p3 = xvm_vconst(0x3fc5555555553a2aULL);
|
294
|
+
const __m128d p4 = xvm_vconst(0x3fa55555554e57d3ULL);
|
295
|
+
const __m128d p5 = xvm_vconst(0x3f81111111362f4fULL);
|
296
|
+
const __m128d p6 = xvm_vconst(0x3f56c16c25f3bae1ULL);
|
297
|
+
const __m128d p7 = xvm_vconst(0x3f2a019fc9310c33ULL);
|
298
|
+
const __m128d p8 = xvm_vconst(0x3efa01825f3cb28bULL);
|
299
|
+
const __m128d p9 = xvm_vconst(0x3ec71e2bd880fdd8ULL);
|
300
|
+
const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
|
301
|
+
const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
|
302
|
+
const __m128d va = _mm_set1_pd(a);
|
303
|
+
for (size_t n = 0; n < N; n += 4) {
|
304
|
+
__m128d mn1, mn2, mi1, mi2;
|
305
|
+
__m128d t1, t2, d1, d2;
|
306
|
+
__m128d v1, v2, w1, w2;
|
307
|
+
__m128i k1, k2;
|
308
|
+
__m128d f1, f2;
|
309
|
+
// Load the next four values
|
310
|
+
__m128d x1 = _mm_load_pd(x + n );
|
311
|
+
__m128d x2 = _mm_load_pd(x + n + 2);
|
312
|
+
// Check for out of ranges, infinites and NaN
|
313
|
+
mn1 = _mm_cmpneq_pd(x1, x1); mn2 = _mm_cmpneq_pd(x2, x2);
|
314
|
+
mi1 = _mm_cmpgt_pd(x1, ehi); mi2 = _mm_cmpgt_pd(x2, ehi);
|
315
|
+
x1 = _mm_max_pd(x1, elo); x2 = _mm_max_pd(x2, elo);
|
316
|
+
// Range reduction: we search k and f such that e^x = 2^k * e^f
|
317
|
+
// with f in [-0.5, 0.5]
|
318
|
+
t1 = _mm_mul_pd(x1, l2e); t2 = _mm_mul_pd(x2, l2e);
|
319
|
+
t1 = _mm_add_pd(t1, hal); t2 = _mm_add_pd(t2, hal);
|
320
|
+
k1 = _mm_cvttpd_epi32(t1); k2 = _mm_cvttpd_epi32(t2);
|
321
|
+
d1 = _mm_cvtepi32_pd(k1); d2 = _mm_cvtepi32_pd(k2);
|
322
|
+
t1 = _mm_mul_pd(d1, c1); t2 = _mm_mul_pd(d2, c1);
|
323
|
+
f1 = _mm_sub_pd(x1, t1); f2 = _mm_sub_pd(x2, t2);
|
324
|
+
t1 = _mm_mul_pd(d1, c2); t2 = _mm_mul_pd(d2, c2);
|
325
|
+
f1 = _mm_sub_pd(f1, t1); f2 = _mm_sub_pd(f2, t2);
|
326
|
+
// Evaluation of e^f using a 11th order polynom in Horner form
|
327
|
+
v1 = _mm_mul_pd(f1, p11); v2 = _mm_mul_pd(f2, p11);
|
328
|
+
v1 = _mm_add_pd(v1, p10); v2 = _mm_add_pd(v2, p10);
|
329
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
330
|
+
v1 = _mm_add_pd(v1, p9); v2 = _mm_add_pd(v2, p9);
|
331
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
332
|
+
v1 = _mm_add_pd(v1, p8); v2 = _mm_add_pd(v2, p8);
|
333
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
334
|
+
v1 = _mm_add_pd(v1, p7); v2 = _mm_add_pd(v2, p7);
|
335
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
336
|
+
v1 = _mm_add_pd(v1, p6); v2 = _mm_add_pd(v2, p6);
|
337
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
338
|
+
v1 = _mm_add_pd(v1, p5); v2 = _mm_add_pd(v2, p5);
|
339
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
340
|
+
v1 = _mm_add_pd(v1, p4); v2 = _mm_add_pd(v2, p4);
|
341
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
342
|
+
v1 = _mm_add_pd(v1, p3); v2 = _mm_add_pd(v2, p3);
|
343
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
344
|
+
v1 = _mm_add_pd(v1, p2); v2 = _mm_add_pd(v2, p2);
|
345
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
346
|
+
v1 = _mm_add_pd(v1, p1); v2 = _mm_add_pd(v2, p1);
|
347
|
+
v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
|
348
|
+
v1 = _mm_add_pd(v1, p0); v2 = _mm_add_pd(v2, p0);
|
349
|
+
// Evaluation of 2^k using bitops to achieve exact computation
|
350
|
+
k1 = _mm_slli_epi32(k1, 20); k2 = _mm_slli_epi32(k2, 20);
|
351
|
+
k1 = _mm_shuffle_epi32(k1, 0x72);
|
352
|
+
k2 = _mm_shuffle_epi32(k2, 0x72);
|
353
|
+
k1 = _mm_add_epi32(k1, vl); k2 = _mm_add_epi32(k2, vl);
|
354
|
+
w1 = _mm_castsi128_pd(k1); w2 = _mm_castsi128_pd(k2);
|
355
|
+
// Return to full range to substract <a>
|
356
|
+
v1 = _mm_mul_pd(v1, w1); v2 = _mm_mul_pd(v2, w2);
|
357
|
+
v1 = _mm_sub_pd(v1, va); v2 = _mm_sub_pd(v2, va);
|
358
|
+
// Finally apply infinite and NaN where needed
|
359
|
+
v1 = _mm_or_pd(_mm_and_pd(mi1, inf), _mm_andnot_pd(mi1, v1));
|
360
|
+
v2 = _mm_or_pd(_mm_and_pd(mi2, inf), _mm_andnot_pd(mi2, v2));
|
361
|
+
v1 = _mm_or_pd(_mm_and_pd(mn1, nan), _mm_andnot_pd(mn1, v1));
|
362
|
+
v2 = _mm_or_pd(_mm_and_pd(mn2, nan), _mm_andnot_pd(mn2, v2));
|
363
|
+
// Store the results
|
364
|
+
_mm_store_pd(r + n, v1);
|
365
|
+
_mm_store_pd(r + n + 2, v2);
|
366
|
+
}
|
367
|
+
#else
|
368
|
+
for (size_t n = 0; n < N; n++)
|
369
|
+
r[n] = exp(x[n]) - a;
|
370
|
+
#endif
|
371
|
+
}
|
372
|
+
|
data/ext/wapiti/vmath.h
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
/*
|
2
|
+
* Wapiti - A linear-chain CRF tool
|
3
|
+
*
|
4
|
+
* Copyright (c) 2009-2011 CNRS
|
5
|
+
* All rights reserved.
|
6
|
+
*
|
7
|
+
* Redistribution and use in source and binary forms, with or without
|
8
|
+
* modification, are permitted provided that the following conditions are met:
|
9
|
+
* * Redistributions of source code must retain the above copyright
|
10
|
+
* notice, this list of conditions and the following disclaimer.
|
11
|
+
* * Redistributions in binary form must reproduce the above copyright
|
12
|
+
* notice, this list of conditions and the following disclaimer in the
|
13
|
+
* documentation and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
16
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
17
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
18
|
+
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
19
|
+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
20
|
+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
21
|
+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
22
|
+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
23
|
+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
24
|
+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
25
|
+
* POSSIBILITY OF SUCH DAMAGE.
|
26
|
+
*/
|
27
|
+
|
28
|
+
#ifndef vmath_h
|
29
|
+
#define vmath_h
|
30
|
+
|
31
|
+
#include <stddef.h>
|
32
|
+
|
33
|
+
const char *xvm_mode(void);
|
34
|
+
|
35
|
+
double *xvm_new(size_t N);
|
36
|
+
void xvm_free(double x[]);
|
37
|
+
|
38
|
+
void xvm_neg(double r[], const double x[], size_t N);
|
39
|
+
void xvm_sub(double r[], const double x[], const double y[], size_t N);
|
40
|
+
void xvm_scale(double r[], const double x[], double a, size_t N);
|
41
|
+
double xvm_unit(double r[], const double x[], size_t N);
|
42
|
+
|
43
|
+
double xvm_norm(const double x[], size_t N);
|
44
|
+
double xvm_dot(const double x[], const double y[], size_t N);
|
45
|
+
|
46
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N);
|
47
|
+
|
48
|
+
void xvm_expma(double r[], const double x[], double a, size_t N);
|
49
|
+
|
50
|
+
#endif
|
51
|
+
|