wapiti 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (63) hide show
  1. data/.autotest +13 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE +30 -0
  6. data/README.md +153 -0
  7. data/Rakefile +33 -0
  8. data/ext/wapiti/bcd.c +392 -0
  9. data/ext/wapiti/decoder.c +535 -0
  10. data/ext/wapiti/decoder.h +46 -0
  11. data/ext/wapiti/extconf.rb +8 -0
  12. data/ext/wapiti/gradient.c +818 -0
  13. data/ext/wapiti/gradient.h +81 -0
  14. data/ext/wapiti/lbfgs.c +294 -0
  15. data/ext/wapiti/model.c +296 -0
  16. data/ext/wapiti/model.h +100 -0
  17. data/ext/wapiti/native.c +1238 -0
  18. data/ext/wapiti/native.h +15 -0
  19. data/ext/wapiti/options.c +278 -0
  20. data/ext/wapiti/options.h +91 -0
  21. data/ext/wapiti/pattern.c +395 -0
  22. data/ext/wapiti/pattern.h +56 -0
  23. data/ext/wapiti/progress.c +167 -0
  24. data/ext/wapiti/progress.h +43 -0
  25. data/ext/wapiti/quark.c +272 -0
  26. data/ext/wapiti/quark.h +46 -0
  27. data/ext/wapiti/reader.c +553 -0
  28. data/ext/wapiti/reader.h +73 -0
  29. data/ext/wapiti/rprop.c +191 -0
  30. data/ext/wapiti/sequence.h +148 -0
  31. data/ext/wapiti/sgdl1.c +218 -0
  32. data/ext/wapiti/thread.c +171 -0
  33. data/ext/wapiti/thread.h +42 -0
  34. data/ext/wapiti/tools.c +202 -0
  35. data/ext/wapiti/tools.h +54 -0
  36. data/ext/wapiti/trainers.h +39 -0
  37. data/ext/wapiti/vmath.c +372 -0
  38. data/ext/wapiti/vmath.h +51 -0
  39. data/ext/wapiti/wapiti.c +288 -0
  40. data/ext/wapiti/wapiti.h +45 -0
  41. data/lib/wapiti.rb +30 -0
  42. data/lib/wapiti/errors.rb +17 -0
  43. data/lib/wapiti/model.rb +49 -0
  44. data/lib/wapiti/options.rb +113 -0
  45. data/lib/wapiti/utility.rb +15 -0
  46. data/lib/wapiti/version.rb +3 -0
  47. data/spec/fixtures/ch.mod +18550 -0
  48. data/spec/fixtures/chpattern.txt +52 -0
  49. data/spec/fixtures/chtest.txt +1973 -0
  50. data/spec/fixtures/chtrain.txt +19995 -0
  51. data/spec/fixtures/nppattern.txt +52 -0
  52. data/spec/fixtures/nptest.txt +1973 -0
  53. data/spec/fixtures/nptrain.txt +19995 -0
  54. data/spec/fixtures/pattern.txt +14 -0
  55. data/spec/fixtures/test.txt +60000 -0
  56. data/spec/fixtures/train.txt +1200 -0
  57. data/spec/spec_helper.rb +21 -0
  58. data/spec/wapiti/model_spec.rb +173 -0
  59. data/spec/wapiti/native_spec.rb +12 -0
  60. data/spec/wapiti/options_spec.rb +175 -0
  61. data/spec/wapiti/utility_spec.rb +22 -0
  62. data/wapiti.gemspec +35 -0
  63. metadata +178 -0
@@ -0,0 +1,39 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #ifndef trainers_h
29
+ #define trainers_h
30
+
31
+ #include "model.h"
32
+
33
+ void trn_lbfgs(mdl_t *mdl);
34
+ void trn_sgdl1(mdl_t *mdl);
35
+ void trn_bcd(mdl_t *mdl);
36
+ void trn_rprop(mdl_t *mdl);
37
+
38
+ #endif
39
+
@@ -0,0 +1,372 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #include <assert.h>
29
+ #include <math.h>
30
+ #include <stddef.h>
31
+ #include <stdlib.h>
32
+
33
+ #include "wapiti.h"
34
+ #include "tools.h"
35
+ #include "vmath.h"
36
+
37
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
38
+ #include <emmintrin.h>
39
+ #endif
40
+
41
+ /* xvm_mode:
42
+ * Return a string describing the SSE level used in the optimized code paths.
43
+ */
44
+ const char *xvm_mode(void) {
45
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
46
+ return "sse2";
47
+ #else
48
+ return "no-sse";
49
+ #endif
50
+ }
51
+
52
+ /* xvm_new:
53
+ * Allocate a new vector suitable to be used in the SSE code paths. This
54
+ * ensure that the vector size contains the need padding. You must only use
55
+ * vector allocated by this function if you use the optimized code paths.
56
+ */
57
+ double *xvm_new(size_t N) {
58
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
59
+ if (N % 4 != 0)
60
+ N += 4 - N % 4;
61
+ void *ptr = _mm_malloc(sizeof(double) * N, 16);
62
+ if (ptr == NULL)
63
+ fatal("out of memory");
64
+ return ptr;
65
+ #else
66
+ return wapiti_xmalloc(sizeof(double) * N);
67
+ #endif
68
+ }
69
+
70
+ /* xvm_free:
71
+ * Free a vector allocated by xvm_new.
72
+ */
73
+ void xvm_free(double x[]) {
74
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
75
+ _mm_free(x);
76
+ #else
77
+ free(x);
78
+ #endif
79
+ }
80
+
81
+ /* xvm_neg:
82
+ * Return the component-wise negation of the given vector:
83
+ * r = -x
84
+ */
85
+ void xvm_neg(double r[], const double x[], size_t N) {
86
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
87
+ assert(r != NULL && ((size_t)r % 16) == 0);
88
+ assert(x != NULL && ((size_t)x % 16) == 0);
89
+ const __m128d vz = _mm_setzero_pd();
90
+ for (size_t n = 0; n < N; n += 4) {
91
+ const __m128d x0 = _mm_load_pd(x + n );
92
+ const __m128d x1 = _mm_load_pd(x + n + 2);
93
+ const __m128d r0 = _mm_sub_pd(vz, x0);
94
+ const __m128d r1 = _mm_sub_pd(vz, x1);
95
+ _mm_store_pd(r + n, r0);
96
+ _mm_store_pd(r + n + 2, r1);
97
+ }
98
+ #else
99
+ for (size_t n = 0; n < N; n++)
100
+ r[n] = -x[n];
101
+ #endif
102
+ }
103
+
104
+ /* xvm_sub:
105
+ * Return the difference of the two given vector:
106
+ * r = x .- y
107
+ */
108
+ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
109
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
110
+ assert(r != NULL && ((size_t)r % 16) == 0);
111
+ assert(x != NULL && ((size_t)x % 16) == 0);
112
+ assert(y != NULL && ((size_t)y % 16) == 0);
113
+ for (size_t n = 0; n < N; n += 4) {
114
+ const __m128d x0 = _mm_load_pd(x + n );
115
+ const __m128d x1 = _mm_load_pd(x + n + 2);
116
+ const __m128d y0 = _mm_load_pd(y + n );
117
+ const __m128d y1 = _mm_load_pd(y + n + 2);
118
+ const __m128d r0 = _mm_sub_pd(x0, y0);
119
+ const __m128d r1 = _mm_sub_pd(x1, y1);
120
+ _mm_store_pd(r + n, r0);
121
+ _mm_store_pd(r + n + 2, r1);
122
+ }
123
+ #else
124
+ for (size_t n = 0; n < N; n++)
125
+ r[n] = x[n] - y[n];
126
+ #endif
127
+ }
128
+
129
+ /* xvm_scale:
130
+ * Return the given vector scaled by a constant:
131
+ * r = a * x
132
+ */
133
+ void xvm_scale(double r[], const double x[], double a, size_t N) {
134
+ for (size_t n = 0; n < N; n++)
135
+ r[n] = x[n] * a;
136
+ }
137
+
138
+ /* xvm_norm:
139
+ * Store a normalized copy of the given vector in r and return the
140
+ * normalization factor.
141
+ */
142
+ double xvm_unit(double r[], const double x[], size_t N) {
143
+ double sum = 0.0;
144
+ for (size_t n = 0; n < N; n++)
145
+ sum += x[n];
146
+ const double scale = 1.0 / sum;
147
+ xvm_scale(r, x, scale, N);
148
+ return scale;
149
+ }
150
+
151
+ /* xvm_norm:
152
+ * Return the euclidian norm of the given vector.
153
+ */
154
+ double xvm_norm(const double x[], size_t N) {
155
+ double r = 0.0;
156
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
157
+ assert(x != NULL && ((size_t)x % 16) == 0);
158
+ size_t n, d = N % 4;
159
+ __m128d s0 = _mm_setzero_pd();
160
+ __m128d s1 = _mm_setzero_pd();
161
+ for (n = 0; n < N - d; n += 4) {
162
+ const __m128d x0 = _mm_load_pd(x + n );
163
+ const __m128d x1 = _mm_load_pd(x + n + 2);
164
+ const __m128d r0 = _mm_mul_pd(x0, x0);
165
+ const __m128d r1 = _mm_mul_pd(x1, x1);
166
+ s0 = _mm_add_pd(s0, r0);
167
+ s1 = _mm_add_pd(s1, r1);
168
+ }
169
+ s0 = _mm_add_pd(s0, s1);
170
+ s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
171
+ s0 = _mm_add_pd(s0, s1);
172
+ _mm_store_sd(&r, s0);
173
+ for ( ; n < N; n++)
174
+ r += x[n] * x[n];
175
+ #else
176
+ for (size_t n = 0; n < N; n++)
177
+ r += x[n] * x[n];
178
+ #endif
179
+ return sqrt(r);
180
+ }
181
+
182
+ /* xvm_dot:
183
+ * Return the dot product of the two given vectors.
184
+ */
185
+ double xvm_dot(const double x[], const double y[], size_t N) {
186
+ double r = 0.0;
187
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
188
+ assert(x != NULL && ((size_t)x % 16) == 0);
189
+ assert(y != NULL && ((size_t)y % 16) == 0);
190
+ size_t n, d = N % 4;
191
+ __m128d s0 = _mm_setzero_pd();
192
+ __m128d s1 = _mm_setzero_pd();
193
+ for (n = 0; n < N - d; n += 4) {
194
+ const __m128d x0 = _mm_load_pd(x + n );
195
+ const __m128d x1 = _mm_load_pd(x + n + 2);
196
+ const __m128d y0 = _mm_load_pd(y + n );
197
+ const __m128d y1 = _mm_load_pd(y + n + 2);
198
+ const __m128d r0 = _mm_mul_pd(x0, y0);
199
+ const __m128d r1 = _mm_mul_pd(x1, y1);
200
+ s0 = _mm_add_pd(s0, r0);
201
+ s1 = _mm_add_pd(s1, r1);
202
+ }
203
+ s0 = _mm_add_pd(s0, s1);
204
+ s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
205
+ s0 = _mm_add_pd(s0, s1);
206
+ _mm_store_sd(&r, s0);
207
+ for ( ; n < N; n++)
208
+ r += x[n] * y[n];
209
+ #else
210
+ for (size_t n = 0; n < N; n++)
211
+ r += x[n] * y[n];
212
+ #endif
213
+ return r;
214
+ }
215
+
216
+ /* xvm_axpy:
217
+ * Return the sum of x scaled by a and y:
218
+ * r = a * x + y
219
+ */
220
+ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N) {
221
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
222
+ assert(r != NULL && ((size_t)r % 16) == 0);
223
+ assert(x != NULL && ((size_t)x % 16) == 0);
224
+ assert(y != NULL && ((size_t)y % 16) == 0);
225
+ const __m128d va = _mm_set1_pd(a);
226
+ for (size_t n = 0; n < N; n += 4) {
227
+ const __m128d x0 = _mm_load_pd(x + n );
228
+ const __m128d x1 = _mm_load_pd(x + n + 2);
229
+ const __m128d y0 = _mm_load_pd(y + n );
230
+ const __m128d y1 = _mm_load_pd(y + n + 2);
231
+ const __m128d t0 = _mm_mul_pd(x0, va);
232
+ const __m128d t1 = _mm_mul_pd(x1, va);
233
+ const __m128d r0 = _mm_add_pd(t0, y0);
234
+ const __m128d r1 = _mm_add_pd(t1, y1);
235
+ _mm_store_pd(r + n, r0);
236
+ _mm_store_pd(r + n + 2, r1);
237
+ }
238
+ #else
239
+ for (size_t n = 0; n < N; n++)
240
+ r[n] = a * x[n] + y[n];
241
+ #endif
242
+ }
243
+
244
+ /* vms_expma:
245
+ * Compute the component-wise exponential minus <a>:
246
+ * r[i] <-- e^x[i] - a
247
+ *
248
+ * The following comments apply to the SSE2 version of this code:
249
+ *
250
+ * Computation is done four doubles as a time by doing computation in paralell
251
+ * on two vectors of two doubles using SSE2 intrisics. If size is not a
252
+ * multiple of 4, the remaining elements are computed using the stdlib exp().
253
+ *
254
+ * The computation is done by first doing a range reduction of the argument of
255
+ * the type e^x = 2^k * e^f choosing k and f so that f is in [-0.5, 0.5].
256
+ * Then 2^k can be computed exactly using bit operations to build the double
257
+ * result and e^f can be efficiently computed with enough precision using a
258
+ * polynomial approximation.
259
+ *
260
+ * The polynomial approximation is done with 11th order polynomial computed by
261
+ * Remez algorithm with the Solya suite, instead of the more classical Pade
262
+ * polynomial form cause it is better suited to parallel execution. In order
263
+ * to achieve the same precision, a Pade form seems to require three less
264
+ * multiplications but need a very costly division, so it will be less
265
+ * efficient.
266
+ *
267
+ * The maximum error is less than 1lsb and special cases are correctly
268
+ * handled:
269
+ * +inf or +oor --> return +inf
270
+ * -inf or -oor --> return 0.0
271
+ * qNaN or sNaN --> return qNaN
272
+ *
273
+ * This code is copyright 2004-2011 Thomas Lavergne and licenced under the
274
+ * BSD licence like the remaining of Wapiti.
275
+ */
276
+ void xvm_expma(double r[], const double x[], double a, size_t N) {
277
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
278
+ #define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
279
+ assert(r != NULL && ((size_t)r % 16) == 0);
280
+ assert(x != NULL && ((size_t)x % 16) == 0);
281
+ const __m128i vl = _mm_set1_epi64x(0x3ff0000000000000ULL);
282
+ const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
283
+ const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
284
+ const __m128d l2e = xvm_vconst(0x3ff71547652b82feULL);
285
+ const __m128d hal = xvm_vconst(0x3fe0000000000000ULL);
286
+ const __m128d nan = xvm_vconst(0xfff8000000000000ULL);
287
+ const __m128d inf = xvm_vconst(0x7ff0000000000000ULL);
288
+ const __m128d c1 = xvm_vconst(0x3fe62e4000000000ULL);
289
+ const __m128d c2 = xvm_vconst(0x3eb7f7d1cf79abcaULL);
290
+ const __m128d p0 = xvm_vconst(0x3feffffffffffffeULL);
291
+ const __m128d p1 = xvm_vconst(0x3ff000000000000bULL);
292
+ const __m128d p2 = xvm_vconst(0x3fe0000000000256ULL);
293
+ const __m128d p3 = xvm_vconst(0x3fc5555555553a2aULL);
294
+ const __m128d p4 = xvm_vconst(0x3fa55555554e57d3ULL);
295
+ const __m128d p5 = xvm_vconst(0x3f81111111362f4fULL);
296
+ const __m128d p6 = xvm_vconst(0x3f56c16c25f3bae1ULL);
297
+ const __m128d p7 = xvm_vconst(0x3f2a019fc9310c33ULL);
298
+ const __m128d p8 = xvm_vconst(0x3efa01825f3cb28bULL);
299
+ const __m128d p9 = xvm_vconst(0x3ec71e2bd880fdd8ULL);
300
+ const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
301
+ const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
302
+ const __m128d va = _mm_set1_pd(a);
303
+ for (size_t n = 0; n < N; n += 4) {
304
+ __m128d mn1, mn2, mi1, mi2;
305
+ __m128d t1, t2, d1, d2;
306
+ __m128d v1, v2, w1, w2;
307
+ __m128i k1, k2;
308
+ __m128d f1, f2;
309
+ // Load the next four values
310
+ __m128d x1 = _mm_load_pd(x + n );
311
+ __m128d x2 = _mm_load_pd(x + n + 2);
312
+ // Check for out of ranges, infinites and NaN
313
+ mn1 = _mm_cmpneq_pd(x1, x1); mn2 = _mm_cmpneq_pd(x2, x2);
314
+ mi1 = _mm_cmpgt_pd(x1, ehi); mi2 = _mm_cmpgt_pd(x2, ehi);
315
+ x1 = _mm_max_pd(x1, elo); x2 = _mm_max_pd(x2, elo);
316
+ // Range reduction: we search k and f such that e^x = 2^k * e^f
317
+ // with f in [-0.5, 0.5]
318
+ t1 = _mm_mul_pd(x1, l2e); t2 = _mm_mul_pd(x2, l2e);
319
+ t1 = _mm_add_pd(t1, hal); t2 = _mm_add_pd(t2, hal);
320
+ k1 = _mm_cvttpd_epi32(t1); k2 = _mm_cvttpd_epi32(t2);
321
+ d1 = _mm_cvtepi32_pd(k1); d2 = _mm_cvtepi32_pd(k2);
322
+ t1 = _mm_mul_pd(d1, c1); t2 = _mm_mul_pd(d2, c1);
323
+ f1 = _mm_sub_pd(x1, t1); f2 = _mm_sub_pd(x2, t2);
324
+ t1 = _mm_mul_pd(d1, c2); t2 = _mm_mul_pd(d2, c2);
325
+ f1 = _mm_sub_pd(f1, t1); f2 = _mm_sub_pd(f2, t2);
326
+ // Evaluation of e^f using a 11th order polynom in Horner form
327
+ v1 = _mm_mul_pd(f1, p11); v2 = _mm_mul_pd(f2, p11);
328
+ v1 = _mm_add_pd(v1, p10); v2 = _mm_add_pd(v2, p10);
329
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
330
+ v1 = _mm_add_pd(v1, p9); v2 = _mm_add_pd(v2, p9);
331
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
332
+ v1 = _mm_add_pd(v1, p8); v2 = _mm_add_pd(v2, p8);
333
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
334
+ v1 = _mm_add_pd(v1, p7); v2 = _mm_add_pd(v2, p7);
335
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
336
+ v1 = _mm_add_pd(v1, p6); v2 = _mm_add_pd(v2, p6);
337
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
338
+ v1 = _mm_add_pd(v1, p5); v2 = _mm_add_pd(v2, p5);
339
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
340
+ v1 = _mm_add_pd(v1, p4); v2 = _mm_add_pd(v2, p4);
341
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
342
+ v1 = _mm_add_pd(v1, p3); v2 = _mm_add_pd(v2, p3);
343
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
344
+ v1 = _mm_add_pd(v1, p2); v2 = _mm_add_pd(v2, p2);
345
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
346
+ v1 = _mm_add_pd(v1, p1); v2 = _mm_add_pd(v2, p1);
347
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
348
+ v1 = _mm_add_pd(v1, p0); v2 = _mm_add_pd(v2, p0);
349
+ // Evaluation of 2^k using bitops to achieve exact computation
350
+ k1 = _mm_slli_epi32(k1, 20); k2 = _mm_slli_epi32(k2, 20);
351
+ k1 = _mm_shuffle_epi32(k1, 0x72);
352
+ k2 = _mm_shuffle_epi32(k2, 0x72);
353
+ k1 = _mm_add_epi32(k1, vl); k2 = _mm_add_epi32(k2, vl);
354
+ w1 = _mm_castsi128_pd(k1); w2 = _mm_castsi128_pd(k2);
355
+ // Return to full range to substract <a>
356
+ v1 = _mm_mul_pd(v1, w1); v2 = _mm_mul_pd(v2, w2);
357
+ v1 = _mm_sub_pd(v1, va); v2 = _mm_sub_pd(v2, va);
358
+ // Finally apply infinite and NaN where needed
359
+ v1 = _mm_or_pd(_mm_and_pd(mi1, inf), _mm_andnot_pd(mi1, v1));
360
+ v2 = _mm_or_pd(_mm_and_pd(mi2, inf), _mm_andnot_pd(mi2, v2));
361
+ v1 = _mm_or_pd(_mm_and_pd(mn1, nan), _mm_andnot_pd(mn1, v1));
362
+ v2 = _mm_or_pd(_mm_and_pd(mn2, nan), _mm_andnot_pd(mn2, v2));
363
+ // Store the results
364
+ _mm_store_pd(r + n, v1);
365
+ _mm_store_pd(r + n + 2, v2);
366
+ }
367
+ #else
368
+ for (size_t n = 0; n < N; n++)
369
+ r[n] = exp(x[n]) - a;
370
+ #endif
371
+ }
372
+
@@ -0,0 +1,51 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #ifndef vmath_h
29
+ #define vmath_h
30
+
31
+ #include <stddef.h>
32
+
33
+ const char *xvm_mode(void);
34
+
35
+ double *xvm_new(size_t N);
36
+ void xvm_free(double x[]);
37
+
38
+ void xvm_neg(double r[], const double x[], size_t N);
39
+ void xvm_sub(double r[], const double x[], const double y[], size_t N);
40
+ void xvm_scale(double r[], const double x[], double a, size_t N);
41
+ double xvm_unit(double r[], const double x[], size_t N);
42
+
43
+ double xvm_norm(const double x[], size_t N);
44
+ double xvm_dot(const double x[], const double y[], size_t N);
45
+
46
+ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N);
47
+
48
+ void xvm_expma(double r[], const double x[], double a, size_t N);
49
+
50
+ #endif
51
+