wapiti 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. data/.autotest +13 -0
  2. data/.gitignore +5 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +6 -0
  5. data/LICENSE +30 -0
  6. data/README.md +153 -0
  7. data/Rakefile +33 -0
  8. data/ext/wapiti/bcd.c +392 -0
  9. data/ext/wapiti/decoder.c +535 -0
  10. data/ext/wapiti/decoder.h +46 -0
  11. data/ext/wapiti/extconf.rb +8 -0
  12. data/ext/wapiti/gradient.c +818 -0
  13. data/ext/wapiti/gradient.h +81 -0
  14. data/ext/wapiti/lbfgs.c +294 -0
  15. data/ext/wapiti/model.c +296 -0
  16. data/ext/wapiti/model.h +100 -0
  17. data/ext/wapiti/native.c +1238 -0
  18. data/ext/wapiti/native.h +15 -0
  19. data/ext/wapiti/options.c +278 -0
  20. data/ext/wapiti/options.h +91 -0
  21. data/ext/wapiti/pattern.c +395 -0
  22. data/ext/wapiti/pattern.h +56 -0
  23. data/ext/wapiti/progress.c +167 -0
  24. data/ext/wapiti/progress.h +43 -0
  25. data/ext/wapiti/quark.c +272 -0
  26. data/ext/wapiti/quark.h +46 -0
  27. data/ext/wapiti/reader.c +553 -0
  28. data/ext/wapiti/reader.h +73 -0
  29. data/ext/wapiti/rprop.c +191 -0
  30. data/ext/wapiti/sequence.h +148 -0
  31. data/ext/wapiti/sgdl1.c +218 -0
  32. data/ext/wapiti/thread.c +171 -0
  33. data/ext/wapiti/thread.h +42 -0
  34. data/ext/wapiti/tools.c +202 -0
  35. data/ext/wapiti/tools.h +54 -0
  36. data/ext/wapiti/trainers.h +39 -0
  37. data/ext/wapiti/vmath.c +372 -0
  38. data/ext/wapiti/vmath.h +51 -0
  39. data/ext/wapiti/wapiti.c +288 -0
  40. data/ext/wapiti/wapiti.h +45 -0
  41. data/lib/wapiti.rb +30 -0
  42. data/lib/wapiti/errors.rb +17 -0
  43. data/lib/wapiti/model.rb +49 -0
  44. data/lib/wapiti/options.rb +113 -0
  45. data/lib/wapiti/utility.rb +15 -0
  46. data/lib/wapiti/version.rb +3 -0
  47. data/spec/fixtures/ch.mod +18550 -0
  48. data/spec/fixtures/chpattern.txt +52 -0
  49. data/spec/fixtures/chtest.txt +1973 -0
  50. data/spec/fixtures/chtrain.txt +19995 -0
  51. data/spec/fixtures/nppattern.txt +52 -0
  52. data/spec/fixtures/nptest.txt +1973 -0
  53. data/spec/fixtures/nptrain.txt +19995 -0
  54. data/spec/fixtures/pattern.txt +14 -0
  55. data/spec/fixtures/test.txt +60000 -0
  56. data/spec/fixtures/train.txt +1200 -0
  57. data/spec/spec_helper.rb +21 -0
  58. data/spec/wapiti/model_spec.rb +173 -0
  59. data/spec/wapiti/native_spec.rb +12 -0
  60. data/spec/wapiti/options_spec.rb +175 -0
  61. data/spec/wapiti/utility_spec.rb +22 -0
  62. data/wapiti.gemspec +35 -0
  63. metadata +178 -0
@@ -0,0 +1,39 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #ifndef trainers_h
29
+ #define trainers_h
30
+
31
+ #include "model.h"
32
+
33
+ void trn_lbfgs(mdl_t *mdl);
34
+ void trn_sgdl1(mdl_t *mdl);
35
+ void trn_bcd(mdl_t *mdl);
36
+ void trn_rprop(mdl_t *mdl);
37
+
38
+ #endif
39
+
@@ -0,0 +1,372 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #include <assert.h>
29
+ #include <math.h>
30
+ #include <stddef.h>
31
+ #include <stdlib.h>
32
+
33
+ #include "wapiti.h"
34
+ #include "tools.h"
35
+ #include "vmath.h"
36
+
37
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
38
+ #include <emmintrin.h>
39
+ #endif
40
+
41
+ /* xvm_mode:
42
+ * Return a string describing the SSE level used in the optimized code paths.
43
+ */
44
+ const char *xvm_mode(void) {
45
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
46
+ return "sse2";
47
+ #else
48
+ return "no-sse";
49
+ #endif
50
+ }
51
+
52
+ /* xvm_new:
53
+ * Allocate a new vector suitable to be used in the SSE code paths. This
54
+ * ensure that the vector size contains the need padding. You must only use
55
+ * vector allocated by this function if you use the optimized code paths.
56
+ */
57
+ double *xvm_new(size_t N) {
58
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
59
+ if (N % 4 != 0)
60
+ N += 4 - N % 4;
61
+ void *ptr = _mm_malloc(sizeof(double) * N, 16);
62
+ if (ptr == NULL)
63
+ fatal("out of memory");
64
+ return ptr;
65
+ #else
66
+ return wapiti_xmalloc(sizeof(double) * N);
67
+ #endif
68
+ }
69
+
70
+ /* xvm_free:
71
+ * Free a vector allocated by xvm_new.
72
+ */
73
+ void xvm_free(double x[]) {
74
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
75
+ _mm_free(x);
76
+ #else
77
+ free(x);
78
+ #endif
79
+ }
80
+
81
+ /* xvm_neg:
82
+ * Return the component-wise negation of the given vector:
83
+ * r = -x
84
+ */
85
+ void xvm_neg(double r[], const double x[], size_t N) {
86
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
87
+ assert(r != NULL && ((size_t)r % 16) == 0);
88
+ assert(x != NULL && ((size_t)x % 16) == 0);
89
+ const __m128d vz = _mm_setzero_pd();
90
+ for (size_t n = 0; n < N; n += 4) {
91
+ const __m128d x0 = _mm_load_pd(x + n );
92
+ const __m128d x1 = _mm_load_pd(x + n + 2);
93
+ const __m128d r0 = _mm_sub_pd(vz, x0);
94
+ const __m128d r1 = _mm_sub_pd(vz, x1);
95
+ _mm_store_pd(r + n, r0);
96
+ _mm_store_pd(r + n + 2, r1);
97
+ }
98
+ #else
99
+ for (size_t n = 0; n < N; n++)
100
+ r[n] = -x[n];
101
+ #endif
102
+ }
103
+
104
+ /* xvm_sub:
105
+ * Return the difference of the two given vector:
106
+ * r = x .- y
107
+ */
108
+ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
109
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
110
+ assert(r != NULL && ((size_t)r % 16) == 0);
111
+ assert(x != NULL && ((size_t)x % 16) == 0);
112
+ assert(y != NULL && ((size_t)y % 16) == 0);
113
+ for (size_t n = 0; n < N; n += 4) {
114
+ const __m128d x0 = _mm_load_pd(x + n );
115
+ const __m128d x1 = _mm_load_pd(x + n + 2);
116
+ const __m128d y0 = _mm_load_pd(y + n );
117
+ const __m128d y1 = _mm_load_pd(y + n + 2);
118
+ const __m128d r0 = _mm_sub_pd(x0, y0);
119
+ const __m128d r1 = _mm_sub_pd(x1, y1);
120
+ _mm_store_pd(r + n, r0);
121
+ _mm_store_pd(r + n + 2, r1);
122
+ }
123
+ #else
124
+ for (size_t n = 0; n < N; n++)
125
+ r[n] = x[n] - y[n];
126
+ #endif
127
+ }
128
+
129
+ /* xvm_scale:
130
+ * Return the given vector scaled by a constant:
131
+ * r = a * x
132
+ */
133
+ void xvm_scale(double r[], const double x[], double a, size_t N) {
134
+ for (size_t n = 0; n < N; n++)
135
+ r[n] = x[n] * a;
136
+ }
137
+
138
+ /* xvm_norm:
139
+ * Store a normalized copy of the given vector in r and return the
140
+ * normalization factor.
141
+ */
142
+ double xvm_unit(double r[], const double x[], size_t N) {
143
+ double sum = 0.0;
144
+ for (size_t n = 0; n < N; n++)
145
+ sum += x[n];
146
+ const double scale = 1.0 / sum;
147
+ xvm_scale(r, x, scale, N);
148
+ return scale;
149
+ }
150
+
151
+ /* xvm_norm:
152
+ * Return the euclidian norm of the given vector.
153
+ */
154
+ double xvm_norm(const double x[], size_t N) {
155
+ double r = 0.0;
156
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
157
+ assert(x != NULL && ((size_t)x % 16) == 0);
158
+ size_t n, d = N % 4;
159
+ __m128d s0 = _mm_setzero_pd();
160
+ __m128d s1 = _mm_setzero_pd();
161
+ for (n = 0; n < N - d; n += 4) {
162
+ const __m128d x0 = _mm_load_pd(x + n );
163
+ const __m128d x1 = _mm_load_pd(x + n + 2);
164
+ const __m128d r0 = _mm_mul_pd(x0, x0);
165
+ const __m128d r1 = _mm_mul_pd(x1, x1);
166
+ s0 = _mm_add_pd(s0, r0);
167
+ s1 = _mm_add_pd(s1, r1);
168
+ }
169
+ s0 = _mm_add_pd(s0, s1);
170
+ s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
171
+ s0 = _mm_add_pd(s0, s1);
172
+ _mm_store_sd(&r, s0);
173
+ for ( ; n < N; n++)
174
+ r += x[n] * x[n];
175
+ #else
176
+ for (size_t n = 0; n < N; n++)
177
+ r += x[n] * x[n];
178
+ #endif
179
+ return sqrt(r);
180
+ }
181
+
182
+ /* xvm_dot:
183
+ * Return the dot product of the two given vectors.
184
+ */
185
+ double xvm_dot(const double x[], const double y[], size_t N) {
186
+ double r = 0.0;
187
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
188
+ assert(x != NULL && ((size_t)x % 16) == 0);
189
+ assert(y != NULL && ((size_t)y % 16) == 0);
190
+ size_t n, d = N % 4;
191
+ __m128d s0 = _mm_setzero_pd();
192
+ __m128d s1 = _mm_setzero_pd();
193
+ for (n = 0; n < N - d; n += 4) {
194
+ const __m128d x0 = _mm_load_pd(x + n );
195
+ const __m128d x1 = _mm_load_pd(x + n + 2);
196
+ const __m128d y0 = _mm_load_pd(y + n );
197
+ const __m128d y1 = _mm_load_pd(y + n + 2);
198
+ const __m128d r0 = _mm_mul_pd(x0, y0);
199
+ const __m128d r1 = _mm_mul_pd(x1, y1);
200
+ s0 = _mm_add_pd(s0, r0);
201
+ s1 = _mm_add_pd(s1, r1);
202
+ }
203
+ s0 = _mm_add_pd(s0, s1);
204
+ s1 = _mm_shuffle_pd(s0, s0, _MM_SHUFFLE2(1, 1));
205
+ s0 = _mm_add_pd(s0, s1);
206
+ _mm_store_sd(&r, s0);
207
+ for ( ; n < N; n++)
208
+ r += x[n] * y[n];
209
+ #else
210
+ for (size_t n = 0; n < N; n++)
211
+ r += x[n] * y[n];
212
+ #endif
213
+ return r;
214
+ }
215
+
216
+ /* xvm_axpy:
217
+ * Return the sum of x scaled by a and y:
218
+ * r = a * x + y
219
+ */
220
+ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N) {
221
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
222
+ assert(r != NULL && ((size_t)r % 16) == 0);
223
+ assert(x != NULL && ((size_t)x % 16) == 0);
224
+ assert(y != NULL && ((size_t)y % 16) == 0);
225
+ const __m128d va = _mm_set1_pd(a);
226
+ for (size_t n = 0; n < N; n += 4) {
227
+ const __m128d x0 = _mm_load_pd(x + n );
228
+ const __m128d x1 = _mm_load_pd(x + n + 2);
229
+ const __m128d y0 = _mm_load_pd(y + n );
230
+ const __m128d y1 = _mm_load_pd(y + n + 2);
231
+ const __m128d t0 = _mm_mul_pd(x0, va);
232
+ const __m128d t1 = _mm_mul_pd(x1, va);
233
+ const __m128d r0 = _mm_add_pd(t0, y0);
234
+ const __m128d r1 = _mm_add_pd(t1, y1);
235
+ _mm_store_pd(r + n, r0);
236
+ _mm_store_pd(r + n + 2, r1);
237
+ }
238
+ #else
239
+ for (size_t n = 0; n < N; n++)
240
+ r[n] = a * x[n] + y[n];
241
+ #endif
242
+ }
243
+
244
+ /* vms_expma:
245
+ * Compute the component-wise exponential minus <a>:
246
+ * r[i] <-- e^x[i] - a
247
+ *
248
+ * The following comments apply to the SSE2 version of this code:
249
+ *
250
+ * Computation is done four doubles as a time by doing computation in paralell
251
+ * on two vectors of two doubles using SSE2 intrisics. If size is not a
252
+ * multiple of 4, the remaining elements are computed using the stdlib exp().
253
+ *
254
+ * The computation is done by first doing a range reduction of the argument of
255
+ * the type e^x = 2^k * e^f choosing k and f so that f is in [-0.5, 0.5].
256
+ * Then 2^k can be computed exactly using bit operations to build the double
257
+ * result and e^f can be efficiently computed with enough precision using a
258
+ * polynomial approximation.
259
+ *
260
+ * The polynomial approximation is done with 11th order polynomial computed by
261
+ * Remez algorithm with the Solya suite, instead of the more classical Pade
262
+ * polynomial form cause it is better suited to parallel execution. In order
263
+ * to achieve the same precision, a Pade form seems to require three less
264
+ * multiplications but need a very costly division, so it will be less
265
+ * efficient.
266
+ *
267
+ * The maximum error is less than 1lsb and special cases are correctly
268
+ * handled:
269
+ * +inf or +oor --> return +inf
270
+ * -inf or -oor --> return 0.0
271
+ * qNaN or sNaN --> return qNaN
272
+ *
273
+ * This code is copyright 2004-2011 Thomas Lavergne and licenced under the
274
+ * BSD licence like the remaining of Wapiti.
275
+ */
276
+ void xvm_expma(double r[], const double x[], double a, size_t N) {
277
+ #if defined(__SSE2__) && !defined(XVM_ANSI)
278
+ #define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
279
+ assert(r != NULL && ((size_t)r % 16) == 0);
280
+ assert(x != NULL && ((size_t)x % 16) == 0);
281
+ const __m128i vl = _mm_set1_epi64x(0x3ff0000000000000ULL);
282
+ const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
283
+ const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
284
+ const __m128d l2e = xvm_vconst(0x3ff71547652b82feULL);
285
+ const __m128d hal = xvm_vconst(0x3fe0000000000000ULL);
286
+ const __m128d nan = xvm_vconst(0xfff8000000000000ULL);
287
+ const __m128d inf = xvm_vconst(0x7ff0000000000000ULL);
288
+ const __m128d c1 = xvm_vconst(0x3fe62e4000000000ULL);
289
+ const __m128d c2 = xvm_vconst(0x3eb7f7d1cf79abcaULL);
290
+ const __m128d p0 = xvm_vconst(0x3feffffffffffffeULL);
291
+ const __m128d p1 = xvm_vconst(0x3ff000000000000bULL);
292
+ const __m128d p2 = xvm_vconst(0x3fe0000000000256ULL);
293
+ const __m128d p3 = xvm_vconst(0x3fc5555555553a2aULL);
294
+ const __m128d p4 = xvm_vconst(0x3fa55555554e57d3ULL);
295
+ const __m128d p5 = xvm_vconst(0x3f81111111362f4fULL);
296
+ const __m128d p6 = xvm_vconst(0x3f56c16c25f3bae1ULL);
297
+ const __m128d p7 = xvm_vconst(0x3f2a019fc9310c33ULL);
298
+ const __m128d p8 = xvm_vconst(0x3efa01825f3cb28bULL);
299
+ const __m128d p9 = xvm_vconst(0x3ec71e2bd880fdd8ULL);
300
+ const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
301
+ const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
302
+ const __m128d va = _mm_set1_pd(a);
303
+ for (size_t n = 0; n < N; n += 4) {
304
+ __m128d mn1, mn2, mi1, mi2;
305
+ __m128d t1, t2, d1, d2;
306
+ __m128d v1, v2, w1, w2;
307
+ __m128i k1, k2;
308
+ __m128d f1, f2;
309
+ // Load the next four values
310
+ __m128d x1 = _mm_load_pd(x + n );
311
+ __m128d x2 = _mm_load_pd(x + n + 2);
312
+ // Check for out of ranges, infinites and NaN
313
+ mn1 = _mm_cmpneq_pd(x1, x1); mn2 = _mm_cmpneq_pd(x2, x2);
314
+ mi1 = _mm_cmpgt_pd(x1, ehi); mi2 = _mm_cmpgt_pd(x2, ehi);
315
+ x1 = _mm_max_pd(x1, elo); x2 = _mm_max_pd(x2, elo);
316
+ // Range reduction: we search k and f such that e^x = 2^k * e^f
317
+ // with f in [-0.5, 0.5]
318
+ t1 = _mm_mul_pd(x1, l2e); t2 = _mm_mul_pd(x2, l2e);
319
+ t1 = _mm_add_pd(t1, hal); t2 = _mm_add_pd(t2, hal);
320
+ k1 = _mm_cvttpd_epi32(t1); k2 = _mm_cvttpd_epi32(t2);
321
+ d1 = _mm_cvtepi32_pd(k1); d2 = _mm_cvtepi32_pd(k2);
322
+ t1 = _mm_mul_pd(d1, c1); t2 = _mm_mul_pd(d2, c1);
323
+ f1 = _mm_sub_pd(x1, t1); f2 = _mm_sub_pd(x2, t2);
324
+ t1 = _mm_mul_pd(d1, c2); t2 = _mm_mul_pd(d2, c2);
325
+ f1 = _mm_sub_pd(f1, t1); f2 = _mm_sub_pd(f2, t2);
326
+ // Evaluation of e^f using a 11th order polynom in Horner form
327
+ v1 = _mm_mul_pd(f1, p11); v2 = _mm_mul_pd(f2, p11);
328
+ v1 = _mm_add_pd(v1, p10); v2 = _mm_add_pd(v2, p10);
329
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
330
+ v1 = _mm_add_pd(v1, p9); v2 = _mm_add_pd(v2, p9);
331
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
332
+ v1 = _mm_add_pd(v1, p8); v2 = _mm_add_pd(v2, p8);
333
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
334
+ v1 = _mm_add_pd(v1, p7); v2 = _mm_add_pd(v2, p7);
335
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
336
+ v1 = _mm_add_pd(v1, p6); v2 = _mm_add_pd(v2, p6);
337
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
338
+ v1 = _mm_add_pd(v1, p5); v2 = _mm_add_pd(v2, p5);
339
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
340
+ v1 = _mm_add_pd(v1, p4); v2 = _mm_add_pd(v2, p4);
341
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
342
+ v1 = _mm_add_pd(v1, p3); v2 = _mm_add_pd(v2, p3);
343
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
344
+ v1 = _mm_add_pd(v1, p2); v2 = _mm_add_pd(v2, p2);
345
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
346
+ v1 = _mm_add_pd(v1, p1); v2 = _mm_add_pd(v2, p1);
347
+ v1 = _mm_mul_pd(v1, f1); v2 = _mm_mul_pd(v2, f2);
348
+ v1 = _mm_add_pd(v1, p0); v2 = _mm_add_pd(v2, p0);
349
+ // Evaluation of 2^k using bitops to achieve exact computation
350
+ k1 = _mm_slli_epi32(k1, 20); k2 = _mm_slli_epi32(k2, 20);
351
+ k1 = _mm_shuffle_epi32(k1, 0x72);
352
+ k2 = _mm_shuffle_epi32(k2, 0x72);
353
+ k1 = _mm_add_epi32(k1, vl); k2 = _mm_add_epi32(k2, vl);
354
+ w1 = _mm_castsi128_pd(k1); w2 = _mm_castsi128_pd(k2);
355
+ // Return to full range to substract <a>
356
+ v1 = _mm_mul_pd(v1, w1); v2 = _mm_mul_pd(v2, w2);
357
+ v1 = _mm_sub_pd(v1, va); v2 = _mm_sub_pd(v2, va);
358
+ // Finally apply infinite and NaN where needed
359
+ v1 = _mm_or_pd(_mm_and_pd(mi1, inf), _mm_andnot_pd(mi1, v1));
360
+ v2 = _mm_or_pd(_mm_and_pd(mi2, inf), _mm_andnot_pd(mi2, v2));
361
+ v1 = _mm_or_pd(_mm_and_pd(mn1, nan), _mm_andnot_pd(mn1, v1));
362
+ v2 = _mm_or_pd(_mm_and_pd(mn2, nan), _mm_andnot_pd(mn2, v2));
363
+ // Store the results
364
+ _mm_store_pd(r + n, v1);
365
+ _mm_store_pd(r + n + 2, v2);
366
+ }
367
+ #else
368
+ for (size_t n = 0; n < N; n++)
369
+ r[n] = exp(x[n]) - a;
370
+ #endif
371
+ }
372
+
@@ -0,0 +1,51 @@
1
+ /*
2
+ * Wapiti - A linear-chain CRF tool
3
+ *
4
+ * Copyright (c) 2009-2011 CNRS
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ *
15
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25
+ * POSSIBILITY OF SUCH DAMAGE.
26
+ */
27
+
28
+ #ifndef vmath_h
29
+ #define vmath_h
30
+
31
+ #include <stddef.h>
32
+
33
+ const char *xvm_mode(void);
34
+
35
+ double *xvm_new(size_t N);
36
+ void xvm_free(double x[]);
37
+
38
+ void xvm_neg(double r[], const double x[], size_t N);
39
+ void xvm_sub(double r[], const double x[], const double y[], size_t N);
40
+ void xvm_scale(double r[], const double x[], double a, size_t N);
41
+ double xvm_unit(double r[], const double x[], size_t N);
42
+
43
+ double xvm_norm(const double x[], size_t N);
44
+ double xvm_dot(const double x[], const double y[], size_t N);
45
+
46
+ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N);
47
+
48
+ void xvm_expma(double r[], const double x[], double a, size_t N);
49
+
50
+ #endif
51
+