wapiti 0.0.5 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/ext/wapiti/thread.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -25,6 +25,8 @@
|
|
25
25
|
* POSSIBILITY OF SUCH DAMAGE.
|
26
26
|
*/
|
27
27
|
|
28
|
+
#include <stdint.h>
|
29
|
+
|
28
30
|
#include "model.h"
|
29
31
|
#include "tools.h"
|
30
32
|
#include "thread.h"
|
@@ -51,10 +53,10 @@
|
|
51
53
|
******************************************************************************/
|
52
54
|
#ifdef MTH_ANSI
|
53
55
|
struct job_s {
|
54
|
-
|
56
|
+
uint32_t size;
|
55
57
|
};
|
56
58
|
|
57
|
-
bool mth_getjob(job_t *job,
|
59
|
+
bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos) {
|
58
60
|
if (job->size == 0)
|
59
61
|
return false;
|
60
62
|
*cnt = job->size;
|
@@ -63,7 +65,7 @@ bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
|
|
63
65
|
return true;
|
64
66
|
}
|
65
67
|
|
66
|
-
void mth_spawn(func_t *f,
|
68
|
+
void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch) {
|
67
69
|
unused(batch);
|
68
70
|
if (size == 0) {
|
69
71
|
f(NULL, 0, 1, ud[0]);
|
@@ -78,19 +80,19 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
|
78
80
|
#include <pthread.h>
|
79
81
|
|
80
82
|
struct job_s {
|
81
|
-
|
82
|
-
|
83
|
-
|
83
|
+
uint32_t size;
|
84
|
+
uint32_t send;
|
85
|
+
uint32_t batch;
|
84
86
|
pthread_mutex_t lock;
|
85
87
|
};
|
86
88
|
|
87
89
|
typedef struct mth_s mth_t;
|
88
90
|
struct mth_s {
|
89
|
-
job_t
|
90
|
-
|
91
|
-
|
92
|
-
func_t
|
93
|
-
void
|
91
|
+
job_t *job;
|
92
|
+
uint32_t id;
|
93
|
+
uint32_t cnt;
|
94
|
+
func_t *f;
|
95
|
+
void *ud;
|
94
96
|
};
|
95
97
|
|
96
98
|
/* mth_getjob:
|
@@ -100,7 +102,7 @@ struct mth_s {
|
|
100
102
|
* This function use a lock to ensure thread safety as it will be called by
|
101
103
|
* the multiple workers threads.
|
102
104
|
*/
|
103
|
-
bool mth_getjob(job_t *job,
|
105
|
+
bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos) {
|
104
106
|
if (job == NULL)
|
105
107
|
return false;
|
106
108
|
if (job->send == job->size)
|
@@ -124,7 +126,7 @@ static void *mth_stub(void *ud) {
|
|
124
126
|
* will get a unique identifier between 0 and W-1 and a user data from the
|
125
127
|
* 'ud' array.
|
126
128
|
*/
|
127
|
-
void mth_spawn(func_t *f,
|
129
|
+
void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch) {
|
128
130
|
// First prepare the jobs scheduler
|
129
131
|
job_t job, *pjob = NULL;
|
130
132
|
if (size != 0) {
|
@@ -144,7 +146,7 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
|
144
146
|
// We prepare the parameters structures that will be send to the threads
|
145
147
|
// with informations for calling the user function.
|
146
148
|
mth_t p[W];
|
147
|
-
for (
|
149
|
+
for (uint32_t w = 0; w < W; w++) {
|
148
150
|
p[w].job = pjob;
|
149
151
|
p[w].id = w;
|
150
152
|
p[w].cnt = W;
|
@@ -159,10 +161,10 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
|
159
161
|
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
|
160
162
|
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
161
163
|
pthread_t th[W];
|
162
|
-
for (
|
164
|
+
for (uint32_t w = 0; w < W; w++)
|
163
165
|
if (pthread_create(&th[w], &attr, &mth_stub, &p[w]) != 0)
|
164
166
|
fatal("failed to create thread");
|
165
|
-
for (
|
167
|
+
for (uint32_t w = 0; w < W; w++)
|
166
168
|
if (pthread_join(th[w], NULL) != 0)
|
167
169
|
fatal("failed to join thread");
|
168
170
|
pthread_attr_destroy(&attr);
|
data/ext/wapiti/thread.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -28,15 +28,16 @@
|
|
28
28
|
#ifndef thread_h
|
29
29
|
#define thread_h
|
30
30
|
|
31
|
+
#include <stdint.h>
|
31
32
|
#include <pthread.h>
|
32
33
|
|
33
34
|
#include "model.h"
|
34
35
|
|
35
36
|
typedef struct job_s job_t;
|
36
37
|
|
37
|
-
typedef void (func_t)(job_t *job,
|
38
|
+
typedef void (func_t)(job_t *job, uint32_t id, uint32_t cnt, void *ud);
|
38
39
|
|
39
|
-
bool mth_getjob(job_t *job,
|
40
|
-
void mth_spawn(func_t *f,
|
40
|
+
bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos);
|
41
|
+
void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch);
|
41
42
|
|
42
43
|
#endif
|
data/ext/wapiti/tools.c
CHANGED
@@ -69,7 +69,7 @@ void fatal(const char *msg, ...) {
|
|
69
69
|
|
70
70
|
rb_raise(cNativeError, msg, args);
|
71
71
|
|
72
|
-
va_end(args);
|
72
|
+
va_end(args);
|
73
73
|
}
|
74
74
|
|
75
75
|
/* pfatal:
|
@@ -80,7 +80,7 @@ void fatal(const char *msg, ...) {
|
|
80
80
|
* calling pfatal.
|
81
81
|
*/
|
82
82
|
void pfatal(const char *msg, ...) {
|
83
|
-
const char *err = strerror(errno);
|
83
|
+
// const char *err = strerror(errno);
|
84
84
|
va_list args;
|
85
85
|
va_start(args, msg);
|
86
86
|
|
@@ -123,7 +123,7 @@ void info(const char *msg, ...) {
|
|
123
123
|
va_end(args);
|
124
124
|
}
|
125
125
|
|
126
|
-
/*
|
126
|
+
/* wapiti_xmalloc:
|
127
127
|
* A simple wrapper around malloc who violently fail if memory cannot be
|
128
128
|
* allocated, so it will never return NULL.
|
129
129
|
*/
|
@@ -134,8 +134,8 @@ void *wapiti_xmalloc(size_t size) {
|
|
134
134
|
return ptr;
|
135
135
|
}
|
136
136
|
|
137
|
-
/*
|
138
|
-
* As
|
137
|
+
/* wapiti_xrealloc:
|
138
|
+
* As wapiti_xmalloc, this is a simple wrapper around realloc who fail on memory
|
139
139
|
* error and so never return NULL.
|
140
140
|
*/
|
141
141
|
void *wapiti_xrealloc(void *ptr, size_t size) {
|
@@ -151,7 +151,7 @@ void *wapiti_xrealloc(void *ptr, size_t size) {
|
|
151
151
|
*/
|
152
152
|
char *xstrdup(const char *str) {
|
153
153
|
const int len = strlen(str) + 1;
|
154
|
-
char *res =
|
154
|
+
char *res = wapiti_xmalloc(sizeof(char) * len);
|
155
155
|
memcpy(res, str, len);
|
156
156
|
return res;
|
157
157
|
}
|
@@ -175,7 +175,7 @@ char *ns_readstr(FILE *file) {
|
|
175
175
|
int len;
|
176
176
|
if (fscanf(file, "%d:", &len) != 1)
|
177
177
|
pfatal("cannot read from file");
|
178
|
-
char *buf =
|
178
|
+
char *buf = wapiti_xmalloc(len + 1);
|
179
179
|
if (fread(buf, len, 1, file) != 1)
|
180
180
|
pfatal("cannot read from file");
|
181
181
|
if (fgetc(file) != ',')
|
data/ext/wapiti/tools.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -29,12 +29,11 @@
|
|
29
29
|
|
30
30
|
#include <stdarg.h>
|
31
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
32
33
|
#include <stdio.h>
|
33
34
|
|
34
|
-
#include <ruby.h>
|
35
|
-
|
36
35
|
#define unused(v) ((void)(v))
|
37
|
-
#define none ((
|
36
|
+
#define none ((uint64_t)-1)
|
38
37
|
|
39
38
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
40
39
|
#define max(a, b) ((a) < (b) ? (b) : (a))
|
data/ext/wapiti/trainers.h
CHANGED
data/ext/wapiti/vmath.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -28,6 +28,7 @@
|
|
28
28
|
#include <assert.h>
|
29
29
|
#include <math.h>
|
30
30
|
#include <stddef.h>
|
31
|
+
#include <stdint.h>
|
31
32
|
#include <stdlib.h>
|
32
33
|
|
33
34
|
#include "wapiti.h"
|
@@ -54,7 +55,7 @@ const char *xvm_mode(void) {
|
|
54
55
|
* ensure that the vector size contains the need padding. You must only use
|
55
56
|
* vector allocated by this function if you use the optimized code paths.
|
56
57
|
*/
|
57
|
-
double *xvm_new(
|
58
|
+
double *xvm_new(uint64_t N) {
|
58
59
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
59
60
|
if (N % 4 != 0)
|
60
61
|
N += 4 - N % 4;
|
@@ -82,12 +83,12 @@ void xvm_free(double x[]) {
|
|
82
83
|
* Return the component-wise negation of the given vector:
|
83
84
|
* r = -x
|
84
85
|
*/
|
85
|
-
void xvm_neg(double r[], const double x[],
|
86
|
+
void xvm_neg(double r[], const double x[], uint64_t N) {
|
86
87
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
87
|
-
assert(r != NULL && ((
|
88
|
-
assert(x != NULL && ((
|
88
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
89
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
89
90
|
const __m128d vz = _mm_setzero_pd();
|
90
|
-
for (
|
91
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
91
92
|
const __m128d x0 = _mm_load_pd(x + n );
|
92
93
|
const __m128d x1 = _mm_load_pd(x + n + 2);
|
93
94
|
const __m128d r0 = _mm_sub_pd(vz, x0);
|
@@ -96,7 +97,7 @@ void xvm_neg(double r[], const double x[], size_t N) {
|
|
96
97
|
_mm_store_pd(r + n + 2, r1);
|
97
98
|
}
|
98
99
|
#else
|
99
|
-
for (
|
100
|
+
for (uint64_t n = 0; n < N; n++)
|
100
101
|
r[n] = -x[n];
|
101
102
|
#endif
|
102
103
|
}
|
@@ -105,12 +106,12 @@ void xvm_neg(double r[], const double x[], size_t N) {
|
|
105
106
|
* Return the difference of the two given vector:
|
106
107
|
* r = x .- y
|
107
108
|
*/
|
108
|
-
void xvm_sub(double r[], const double x[], const double y[],
|
109
|
+
void xvm_sub(double r[], const double x[], const double y[], uint64_t N) {
|
109
110
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
110
|
-
assert(r != NULL && ((
|
111
|
-
assert(x != NULL && ((
|
112
|
-
assert(y != NULL && ((
|
113
|
-
for (
|
111
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
112
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
113
|
+
assert(y != NULL && ((uintptr_t)y % 16) == 0);
|
114
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
114
115
|
const __m128d x0 = _mm_load_pd(x + n );
|
115
116
|
const __m128d x1 = _mm_load_pd(x + n + 2);
|
116
117
|
const __m128d y0 = _mm_load_pd(y + n );
|
@@ -121,7 +122,7 @@ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
|
|
121
122
|
_mm_store_pd(r + n + 2, r1);
|
122
123
|
}
|
123
124
|
#else
|
124
|
-
for (
|
125
|
+
for (uint64_t n = 0; n < N; n++)
|
125
126
|
r[n] = x[n] - y[n];
|
126
127
|
#endif
|
127
128
|
}
|
@@ -130,8 +131,8 @@ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
|
|
130
131
|
* Return the given vector scaled by a constant:
|
131
132
|
* r = a * x
|
132
133
|
*/
|
133
|
-
void xvm_scale(double r[], const double x[], double a,
|
134
|
-
for (
|
134
|
+
void xvm_scale(double r[], const double x[], double a, uint64_t N) {
|
135
|
+
for (uint64_t n = 0; n < N; n++)
|
135
136
|
r[n] = x[n] * a;
|
136
137
|
}
|
137
138
|
|
@@ -139,9 +140,9 @@ void xvm_scale(double r[], const double x[], double a, size_t N) {
|
|
139
140
|
* Store a normalized copy of the given vector in r and return the
|
140
141
|
* normalization factor.
|
141
142
|
*/
|
142
|
-
double xvm_unit(double r[], const double x[],
|
143
|
+
double xvm_unit(double r[], const double x[], uint64_t N) {
|
143
144
|
double sum = 0.0;
|
144
|
-
for (
|
145
|
+
for (uint64_t n = 0; n < N; n++)
|
145
146
|
sum += x[n];
|
146
147
|
const double scale = 1.0 / sum;
|
147
148
|
xvm_scale(r, x, scale, N);
|
@@ -151,11 +152,11 @@ double xvm_unit(double r[], const double x[], size_t N) {
|
|
151
152
|
/* xvm_norm:
|
152
153
|
* Return the euclidian norm of the given vector.
|
153
154
|
*/
|
154
|
-
double xvm_norm(const double x[],
|
155
|
+
double xvm_norm(const double x[], uint64_t N) {
|
155
156
|
double r = 0.0;
|
156
157
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
157
|
-
assert(x != NULL && ((
|
158
|
-
|
158
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
159
|
+
uint64_t n, d = N % 4;
|
159
160
|
__m128d s0 = _mm_setzero_pd();
|
160
161
|
__m128d s1 = _mm_setzero_pd();
|
161
162
|
for (n = 0; n < N - d; n += 4) {
|
@@ -173,7 +174,7 @@ double xvm_norm(const double x[], size_t N) {
|
|
173
174
|
for ( ; n < N; n++)
|
174
175
|
r += x[n] * x[n];
|
175
176
|
#else
|
176
|
-
for (
|
177
|
+
for (uint64_t n = 0; n < N; n++)
|
177
178
|
r += x[n] * x[n];
|
178
179
|
#endif
|
179
180
|
return sqrt(r);
|
@@ -182,12 +183,12 @@ double xvm_norm(const double x[], size_t N) {
|
|
182
183
|
/* xvm_dot:
|
183
184
|
* Return the dot product of the two given vectors.
|
184
185
|
*/
|
185
|
-
double xvm_dot(const double x[], const double y[],
|
186
|
+
double xvm_dot(const double x[], const double y[], uint64_t N) {
|
186
187
|
double r = 0.0;
|
187
188
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
188
|
-
assert(x != NULL && ((
|
189
|
-
assert(y != NULL && ((
|
190
|
-
|
189
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
190
|
+
assert(y != NULL && ((uintptr_t)y % 16) == 0);
|
191
|
+
uint64_t n, d = N % 4;
|
191
192
|
__m128d s0 = _mm_setzero_pd();
|
192
193
|
__m128d s1 = _mm_setzero_pd();
|
193
194
|
for (n = 0; n < N - d; n += 4) {
|
@@ -207,7 +208,7 @@ double xvm_dot(const double x[], const double y[], size_t N) {
|
|
207
208
|
for ( ; n < N; n++)
|
208
209
|
r += x[n] * y[n];
|
209
210
|
#else
|
210
|
-
for (
|
211
|
+
for (uint64_t n = 0; n < N; n++)
|
211
212
|
r += x[n] * y[n];
|
212
213
|
#endif
|
213
214
|
return r;
|
@@ -217,13 +218,14 @@ double xvm_dot(const double x[], const double y[], size_t N) {
|
|
217
218
|
* Return the sum of x scaled by a and y:
|
218
219
|
* r = a * x + y
|
219
220
|
*/
|
220
|
-
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
221
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
222
|
+
uint64_t N) {
|
221
223
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
222
|
-
assert(r != NULL && ((
|
223
|
-
assert(x != NULL && ((
|
224
|
-
assert(y != NULL && ((
|
224
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
225
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
226
|
+
assert(y != NULL && ((uintptr_t)y % 16) == 0);
|
225
227
|
const __m128d va = _mm_set1_pd(a);
|
226
|
-
for (
|
228
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
227
229
|
const __m128d x0 = _mm_load_pd(x + n );
|
228
230
|
const __m128d x1 = _mm_load_pd(x + n + 2);
|
229
231
|
const __m128d y0 = _mm_load_pd(y + n );
|
@@ -236,7 +238,7 @@ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N
|
|
236
238
|
_mm_store_pd(r + n + 2, r1);
|
237
239
|
}
|
238
240
|
#else
|
239
|
-
for (
|
241
|
+
for (uint64_t n = 0; n < N; n++)
|
240
242
|
r[n] = a * x[n] + y[n];
|
241
243
|
#endif
|
242
244
|
}
|
@@ -270,14 +272,14 @@ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N
|
|
270
272
|
* -inf or -oor --> return 0.0
|
271
273
|
* qNaN or sNaN --> return qNaN
|
272
274
|
*
|
273
|
-
* This code is copyright 2004-
|
275
|
+
* This code is copyright 2004-2013 Thomas Lavergne and licenced under the
|
274
276
|
* BSD licence like the remaining of Wapiti.
|
275
277
|
*/
|
276
|
-
void xvm_expma(double r[], const double x[], double a,
|
278
|
+
void xvm_expma(double r[], const double x[], double a, uint64_t N) {
|
277
279
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
278
280
|
#define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
|
279
|
-
assert(r != NULL && ((
|
280
|
-
assert(x != NULL && ((
|
281
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
282
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
281
283
|
const __m128i vl = _mm_set1_epi64x(0x3ff0000000000000ULL);
|
282
284
|
const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
|
283
285
|
const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
|
@@ -300,7 +302,7 @@ void xvm_expma(double r[], const double x[], double a, size_t N) {
|
|
300
302
|
const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
|
301
303
|
const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
|
302
304
|
const __m128d va = _mm_set1_pd(a);
|
303
|
-
for (
|
305
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
304
306
|
__m128d mn1, mn2, mi1, mi2;
|
305
307
|
__m128d t1, t2, d1, d2;
|
306
308
|
__m128d v1, v2, w1, w2;
|
@@ -365,7 +367,7 @@ void xvm_expma(double r[], const double x[], double a, size_t N) {
|
|
365
367
|
_mm_store_pd(r + n + 2, v2);
|
366
368
|
}
|
367
369
|
#else
|
368
|
-
for (
|
370
|
+
for (uint64_t n = 0; n < N; n++)
|
369
371
|
r[n] = exp(x[n]) - a;
|
370
372
|
#endif
|
371
373
|
}
|
data/ext/wapiti/vmath.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -28,24 +28,25 @@
|
|
28
28
|
#ifndef vmath_h
|
29
29
|
#define vmath_h
|
30
30
|
|
31
|
-
#include <
|
31
|
+
#include <stdint.h>
|
32
32
|
|
33
33
|
const char *xvm_mode(void);
|
34
34
|
|
35
|
-
double *xvm_new(
|
35
|
+
double *xvm_new(uint64_t N);
|
36
36
|
void xvm_free(double x[]);
|
37
37
|
|
38
|
-
void xvm_neg(double r[], const double x[],
|
39
|
-
void xvm_sub(double r[], const double x[], const double y[],
|
40
|
-
void xvm_scale(double r[], const double x[], double a,
|
41
|
-
double xvm_unit(double r[], const double x[],
|
38
|
+
void xvm_neg(double r[], const double x[], uint64_t N);
|
39
|
+
void xvm_sub(double r[], const double x[], const double y[], uint64_t N);
|
40
|
+
void xvm_scale(double r[], const double x[], double a, uint64_t N);
|
41
|
+
double xvm_unit(double r[], const double x[], uint64_t N);
|
42
42
|
|
43
|
-
double xvm_norm(const double x[],
|
44
|
-
double xvm_dot(const double x[], const double y[],
|
43
|
+
double xvm_norm(const double x[], uint64_t N);
|
44
|
+
double xvm_dot(const double x[], const double y[], uint64_t N);
|
45
45
|
|
46
|
-
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
46
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
47
|
+
uint64_t N);
|
47
48
|
|
48
|
-
void xvm_expma(double r[], const double x[], double a,
|
49
|
+
void xvm_expma(double r[], const double x[], double a, uint64_t N);
|
49
50
|
|
50
51
|
#endif
|
51
52
|
|