wapiti 0.0.5 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.simplecov +3 -0
- data/Gemfile +25 -2
- data/HISTORY.md +5 -1
- data/LICENSE +14 -13
- data/README.md +9 -16
- data/Rakefile +38 -8
- data/ext/wapiti/bcd.c +126 -124
- data/ext/wapiti/decoder.c +203 -124
- data/ext/wapiti/decoder.h +6 -4
- data/ext/wapiti/extconf.rb +2 -2
- data/ext/wapiti/gradient.c +491 -320
- data/ext/wapiti/gradient.h +52 -34
- data/ext/wapiti/lbfgs.c +74 -33
- data/ext/wapiti/model.c +47 -37
- data/ext/wapiti/model.h +22 -20
- data/ext/wapiti/native.c +850 -839
- data/ext/wapiti/native.h +1 -1
- data/ext/wapiti/options.c +52 -20
- data/ext/wapiti/options.h +37 -30
- data/ext/wapiti/pattern.c +35 -33
- data/ext/wapiti/pattern.h +12 -11
- data/ext/wapiti/progress.c +14 -13
- data/ext/wapiti/progress.h +3 -2
- data/ext/wapiti/quark.c +14 -16
- data/ext/wapiti/quark.h +6 -5
- data/ext/wapiti/reader.c +83 -69
- data/ext/wapiti/reader.h +11 -9
- data/ext/wapiti/rprop.c +84 -43
- data/ext/wapiti/sequence.h +18 -16
- data/ext/wapiti/sgdl1.c +45 -43
- data/ext/wapiti/thread.c +19 -17
- data/ext/wapiti/thread.h +5 -4
- data/ext/wapiti/tools.c +7 -7
- data/ext/wapiti/tools.h +3 -4
- data/ext/wapiti/trainers.h +1 -1
- data/ext/wapiti/vmath.c +40 -38
- data/ext/wapiti/vmath.h +12 -11
- data/ext/wapiti/wapiti.c +159 -37
- data/ext/wapiti/wapiti.h +18 -4
- data/lib/wapiti.rb +15 -15
- data/lib/wapiti/errors.rb +15 -15
- data/lib/wapiti/model.rb +92 -84
- data/lib/wapiti/options.rb +123 -124
- data/lib/wapiti/utility.rb +14 -14
- data/lib/wapiti/version.rb +2 -2
- data/spec/spec_helper.rb +29 -9
- data/spec/wapiti/model_spec.rb +230 -194
- data/spec/wapiti/native_spec.rb +7 -8
- data/spec/wapiti/options_spec.rb +184 -174
- data/wapiti.gemspec +22 -8
- metadata +38 -42
- data/.gitignore +0 -5
data/ext/wapiti/thread.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -25,6 +25,8 @@
|
|
25
25
|
* POSSIBILITY OF SUCH DAMAGE.
|
26
26
|
*/
|
27
27
|
|
28
|
+
#include <stdint.h>
|
29
|
+
|
28
30
|
#include "model.h"
|
29
31
|
#include "tools.h"
|
30
32
|
#include "thread.h"
|
@@ -51,10 +53,10 @@
|
|
51
53
|
******************************************************************************/
|
52
54
|
#ifdef MTH_ANSI
|
53
55
|
struct job_s {
|
54
|
-
|
56
|
+
uint32_t size;
|
55
57
|
};
|
56
58
|
|
57
|
-
bool mth_getjob(job_t *job,
|
59
|
+
bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos) {
|
58
60
|
if (job->size == 0)
|
59
61
|
return false;
|
60
62
|
*cnt = job->size;
|
@@ -63,7 +65,7 @@ bool mth_getjob(job_t *job, size_t *cnt, size_t *pos) {
|
|
63
65
|
return true;
|
64
66
|
}
|
65
67
|
|
66
|
-
void mth_spawn(func_t *f,
|
68
|
+
void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch) {
|
67
69
|
unused(batch);
|
68
70
|
if (size == 0) {
|
69
71
|
f(NULL, 0, 1, ud[0]);
|
@@ -78,19 +80,19 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
|
78
80
|
#include <pthread.h>
|
79
81
|
|
80
82
|
struct job_s {
|
81
|
-
|
82
|
-
|
83
|
-
|
83
|
+
uint32_t size;
|
84
|
+
uint32_t send;
|
85
|
+
uint32_t batch;
|
84
86
|
pthread_mutex_t lock;
|
85
87
|
};
|
86
88
|
|
87
89
|
typedef struct mth_s mth_t;
|
88
90
|
struct mth_s {
|
89
|
-
job_t
|
90
|
-
|
91
|
-
|
92
|
-
func_t
|
93
|
-
void
|
91
|
+
job_t *job;
|
92
|
+
uint32_t id;
|
93
|
+
uint32_t cnt;
|
94
|
+
func_t *f;
|
95
|
+
void *ud;
|
94
96
|
};
|
95
97
|
|
96
98
|
/* mth_getjob:
|
@@ -100,7 +102,7 @@ struct mth_s {
|
|
100
102
|
* This function use a lock to ensure thread safety as it will be called by
|
101
103
|
* the multiple workers threads.
|
102
104
|
*/
|
103
|
-
bool mth_getjob(job_t *job,
|
105
|
+
bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos) {
|
104
106
|
if (job == NULL)
|
105
107
|
return false;
|
106
108
|
if (job->send == job->size)
|
@@ -124,7 +126,7 @@ static void *mth_stub(void *ud) {
|
|
124
126
|
* will get a unique identifier between 0 and W-1 and a user data from the
|
125
127
|
* 'ud' array.
|
126
128
|
*/
|
127
|
-
void mth_spawn(func_t *f,
|
129
|
+
void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch) {
|
128
130
|
// First prepare the jobs scheduler
|
129
131
|
job_t job, *pjob = NULL;
|
130
132
|
if (size != 0) {
|
@@ -144,7 +146,7 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
|
144
146
|
// We prepare the parameters structures that will be send to the threads
|
145
147
|
// with informations for calling the user function.
|
146
148
|
mth_t p[W];
|
147
|
-
for (
|
149
|
+
for (uint32_t w = 0; w < W; w++) {
|
148
150
|
p[w].job = pjob;
|
149
151
|
p[w].id = w;
|
150
152
|
p[w].cnt = W;
|
@@ -159,10 +161,10 @@ void mth_spawn(func_t *f, int W, void *ud[W], size_t size, size_t batch) {
|
|
159
161
|
pthread_attr_setscope(&attr, PTHREAD_SCOPE_SYSTEM);
|
160
162
|
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
161
163
|
pthread_t th[W];
|
162
|
-
for (
|
164
|
+
for (uint32_t w = 0; w < W; w++)
|
163
165
|
if (pthread_create(&th[w], &attr, &mth_stub, &p[w]) != 0)
|
164
166
|
fatal("failed to create thread");
|
165
|
-
for (
|
167
|
+
for (uint32_t w = 0; w < W; w++)
|
166
168
|
if (pthread_join(th[w], NULL) != 0)
|
167
169
|
fatal("failed to join thread");
|
168
170
|
pthread_attr_destroy(&attr);
|
data/ext/wapiti/thread.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -28,15 +28,16 @@
|
|
28
28
|
#ifndef thread_h
|
29
29
|
#define thread_h
|
30
30
|
|
31
|
+
#include <stdint.h>
|
31
32
|
#include <pthread.h>
|
32
33
|
|
33
34
|
#include "model.h"
|
34
35
|
|
35
36
|
typedef struct job_s job_t;
|
36
37
|
|
37
|
-
typedef void (func_t)(job_t *job,
|
38
|
+
typedef void (func_t)(job_t *job, uint32_t id, uint32_t cnt, void *ud);
|
38
39
|
|
39
|
-
bool mth_getjob(job_t *job,
|
40
|
-
void mth_spawn(func_t *f,
|
40
|
+
bool mth_getjob(job_t *job, uint32_t *cnt, uint32_t *pos);
|
41
|
+
void mth_spawn(func_t *f, uint32_t W, void *ud[W], uint32_t size, uint32_t batch);
|
41
42
|
|
42
43
|
#endif
|
data/ext/wapiti/tools.c
CHANGED
@@ -69,7 +69,7 @@ void fatal(const char *msg, ...) {
|
|
69
69
|
|
70
70
|
rb_raise(cNativeError, msg, args);
|
71
71
|
|
72
|
-
va_end(args);
|
72
|
+
va_end(args);
|
73
73
|
}
|
74
74
|
|
75
75
|
/* pfatal:
|
@@ -80,7 +80,7 @@ void fatal(const char *msg, ...) {
|
|
80
80
|
* calling pfatal.
|
81
81
|
*/
|
82
82
|
void pfatal(const char *msg, ...) {
|
83
|
-
const char *err = strerror(errno);
|
83
|
+
// const char *err = strerror(errno);
|
84
84
|
va_list args;
|
85
85
|
va_start(args, msg);
|
86
86
|
|
@@ -123,7 +123,7 @@ void info(const char *msg, ...) {
|
|
123
123
|
va_end(args);
|
124
124
|
}
|
125
125
|
|
126
|
-
/*
|
126
|
+
/* wapiti_xmalloc:
|
127
127
|
* A simple wrapper around malloc who violently fail if memory cannot be
|
128
128
|
* allocated, so it will never return NULL.
|
129
129
|
*/
|
@@ -134,8 +134,8 @@ void *wapiti_xmalloc(size_t size) {
|
|
134
134
|
return ptr;
|
135
135
|
}
|
136
136
|
|
137
|
-
/*
|
138
|
-
* As
|
137
|
+
/* wapiti_xrealloc:
|
138
|
+
* As wapiti_xmalloc, this is a simple wrapper around realloc who fail on memory
|
139
139
|
* error and so never return NULL.
|
140
140
|
*/
|
141
141
|
void *wapiti_xrealloc(void *ptr, size_t size) {
|
@@ -151,7 +151,7 @@ void *wapiti_xrealloc(void *ptr, size_t size) {
|
|
151
151
|
*/
|
152
152
|
char *xstrdup(const char *str) {
|
153
153
|
const int len = strlen(str) + 1;
|
154
|
-
char *res =
|
154
|
+
char *res = wapiti_xmalloc(sizeof(char) * len);
|
155
155
|
memcpy(res, str, len);
|
156
156
|
return res;
|
157
157
|
}
|
@@ -175,7 +175,7 @@ char *ns_readstr(FILE *file) {
|
|
175
175
|
int len;
|
176
176
|
if (fscanf(file, "%d:", &len) != 1)
|
177
177
|
pfatal("cannot read from file");
|
178
|
-
char *buf =
|
178
|
+
char *buf = wapiti_xmalloc(len + 1);
|
179
179
|
if (fread(buf, len, 1, file) != 1)
|
180
180
|
pfatal("cannot read from file");
|
181
181
|
if (fgetc(file) != ',')
|
data/ext/wapiti/tools.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -29,12 +29,11 @@
|
|
29
29
|
|
30
30
|
#include <stdarg.h>
|
31
31
|
#include <stddef.h>
|
32
|
+
#include <stdint.h>
|
32
33
|
#include <stdio.h>
|
33
34
|
|
34
|
-
#include <ruby.h>
|
35
|
-
|
36
35
|
#define unused(v) ((void)(v))
|
37
|
-
#define none ((
|
36
|
+
#define none ((uint64_t)-1)
|
38
37
|
|
39
38
|
#define min(a, b) ((a) < (b) ? (a) : (b))
|
40
39
|
#define max(a, b) ((a) < (b) ? (b) : (a))
|
data/ext/wapiti/trainers.h
CHANGED
data/ext/wapiti/vmath.c
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -28,6 +28,7 @@
|
|
28
28
|
#include <assert.h>
|
29
29
|
#include <math.h>
|
30
30
|
#include <stddef.h>
|
31
|
+
#include <stdint.h>
|
31
32
|
#include <stdlib.h>
|
32
33
|
|
33
34
|
#include "wapiti.h"
|
@@ -54,7 +55,7 @@ const char *xvm_mode(void) {
|
|
54
55
|
* ensure that the vector size contains the need padding. You must only use
|
55
56
|
* vector allocated by this function if you use the optimized code paths.
|
56
57
|
*/
|
57
|
-
double *xvm_new(
|
58
|
+
double *xvm_new(uint64_t N) {
|
58
59
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
59
60
|
if (N % 4 != 0)
|
60
61
|
N += 4 - N % 4;
|
@@ -82,12 +83,12 @@ void xvm_free(double x[]) {
|
|
82
83
|
* Return the component-wise negation of the given vector:
|
83
84
|
* r = -x
|
84
85
|
*/
|
85
|
-
void xvm_neg(double r[], const double x[],
|
86
|
+
void xvm_neg(double r[], const double x[], uint64_t N) {
|
86
87
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
87
|
-
assert(r != NULL && ((
|
88
|
-
assert(x != NULL && ((
|
88
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
89
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
89
90
|
const __m128d vz = _mm_setzero_pd();
|
90
|
-
for (
|
91
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
91
92
|
const __m128d x0 = _mm_load_pd(x + n );
|
92
93
|
const __m128d x1 = _mm_load_pd(x + n + 2);
|
93
94
|
const __m128d r0 = _mm_sub_pd(vz, x0);
|
@@ -96,7 +97,7 @@ void xvm_neg(double r[], const double x[], size_t N) {
|
|
96
97
|
_mm_store_pd(r + n + 2, r1);
|
97
98
|
}
|
98
99
|
#else
|
99
|
-
for (
|
100
|
+
for (uint64_t n = 0; n < N; n++)
|
100
101
|
r[n] = -x[n];
|
101
102
|
#endif
|
102
103
|
}
|
@@ -105,12 +106,12 @@ void xvm_neg(double r[], const double x[], size_t N) {
|
|
105
106
|
* Return the difference of the two given vector:
|
106
107
|
* r = x .- y
|
107
108
|
*/
|
108
|
-
void xvm_sub(double r[], const double x[], const double y[],
|
109
|
+
void xvm_sub(double r[], const double x[], const double y[], uint64_t N) {
|
109
110
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
110
|
-
assert(r != NULL && ((
|
111
|
-
assert(x != NULL && ((
|
112
|
-
assert(y != NULL && ((
|
113
|
-
for (
|
111
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
112
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
113
|
+
assert(y != NULL && ((uintptr_t)y % 16) == 0);
|
114
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
114
115
|
const __m128d x0 = _mm_load_pd(x + n );
|
115
116
|
const __m128d x1 = _mm_load_pd(x + n + 2);
|
116
117
|
const __m128d y0 = _mm_load_pd(y + n );
|
@@ -121,7 +122,7 @@ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
|
|
121
122
|
_mm_store_pd(r + n + 2, r1);
|
122
123
|
}
|
123
124
|
#else
|
124
|
-
for (
|
125
|
+
for (uint64_t n = 0; n < N; n++)
|
125
126
|
r[n] = x[n] - y[n];
|
126
127
|
#endif
|
127
128
|
}
|
@@ -130,8 +131,8 @@ void xvm_sub(double r[], const double x[], const double y[], size_t N) {
|
|
130
131
|
* Return the given vector scaled by a constant:
|
131
132
|
* r = a * x
|
132
133
|
*/
|
133
|
-
void xvm_scale(double r[], const double x[], double a,
|
134
|
-
for (
|
134
|
+
void xvm_scale(double r[], const double x[], double a, uint64_t N) {
|
135
|
+
for (uint64_t n = 0; n < N; n++)
|
135
136
|
r[n] = x[n] * a;
|
136
137
|
}
|
137
138
|
|
@@ -139,9 +140,9 @@ void xvm_scale(double r[], const double x[], double a, size_t N) {
|
|
139
140
|
* Store a normalized copy of the given vector in r and return the
|
140
141
|
* normalization factor.
|
141
142
|
*/
|
142
|
-
double xvm_unit(double r[], const double x[],
|
143
|
+
double xvm_unit(double r[], const double x[], uint64_t N) {
|
143
144
|
double sum = 0.0;
|
144
|
-
for (
|
145
|
+
for (uint64_t n = 0; n < N; n++)
|
145
146
|
sum += x[n];
|
146
147
|
const double scale = 1.0 / sum;
|
147
148
|
xvm_scale(r, x, scale, N);
|
@@ -151,11 +152,11 @@ double xvm_unit(double r[], const double x[], size_t N) {
|
|
151
152
|
/* xvm_norm:
|
152
153
|
* Return the euclidian norm of the given vector.
|
153
154
|
*/
|
154
|
-
double xvm_norm(const double x[],
|
155
|
+
double xvm_norm(const double x[], uint64_t N) {
|
155
156
|
double r = 0.0;
|
156
157
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
157
|
-
assert(x != NULL && ((
|
158
|
-
|
158
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
159
|
+
uint64_t n, d = N % 4;
|
159
160
|
__m128d s0 = _mm_setzero_pd();
|
160
161
|
__m128d s1 = _mm_setzero_pd();
|
161
162
|
for (n = 0; n < N - d; n += 4) {
|
@@ -173,7 +174,7 @@ double xvm_norm(const double x[], size_t N) {
|
|
173
174
|
for ( ; n < N; n++)
|
174
175
|
r += x[n] * x[n];
|
175
176
|
#else
|
176
|
-
for (
|
177
|
+
for (uint64_t n = 0; n < N; n++)
|
177
178
|
r += x[n] * x[n];
|
178
179
|
#endif
|
179
180
|
return sqrt(r);
|
@@ -182,12 +183,12 @@ double xvm_norm(const double x[], size_t N) {
|
|
182
183
|
/* xvm_dot:
|
183
184
|
* Return the dot product of the two given vectors.
|
184
185
|
*/
|
185
|
-
double xvm_dot(const double x[], const double y[],
|
186
|
+
double xvm_dot(const double x[], const double y[], uint64_t N) {
|
186
187
|
double r = 0.0;
|
187
188
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
188
|
-
assert(x != NULL && ((
|
189
|
-
assert(y != NULL && ((
|
190
|
-
|
189
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
190
|
+
assert(y != NULL && ((uintptr_t)y % 16) == 0);
|
191
|
+
uint64_t n, d = N % 4;
|
191
192
|
__m128d s0 = _mm_setzero_pd();
|
192
193
|
__m128d s1 = _mm_setzero_pd();
|
193
194
|
for (n = 0; n < N - d; n += 4) {
|
@@ -207,7 +208,7 @@ double xvm_dot(const double x[], const double y[], size_t N) {
|
|
207
208
|
for ( ; n < N; n++)
|
208
209
|
r += x[n] * y[n];
|
209
210
|
#else
|
210
|
-
for (
|
211
|
+
for (uint64_t n = 0; n < N; n++)
|
211
212
|
r += x[n] * y[n];
|
212
213
|
#endif
|
213
214
|
return r;
|
@@ -217,13 +218,14 @@ double xvm_dot(const double x[], const double y[], size_t N) {
|
|
217
218
|
* Return the sum of x scaled by a and y:
|
218
219
|
* r = a * x + y
|
219
220
|
*/
|
220
|
-
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
221
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
222
|
+
uint64_t N) {
|
221
223
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
222
|
-
assert(r != NULL && ((
|
223
|
-
assert(x != NULL && ((
|
224
|
-
assert(y != NULL && ((
|
224
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
225
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
226
|
+
assert(y != NULL && ((uintptr_t)y % 16) == 0);
|
225
227
|
const __m128d va = _mm_set1_pd(a);
|
226
|
-
for (
|
228
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
227
229
|
const __m128d x0 = _mm_load_pd(x + n );
|
228
230
|
const __m128d x1 = _mm_load_pd(x + n + 2);
|
229
231
|
const __m128d y0 = _mm_load_pd(y + n );
|
@@ -236,7 +238,7 @@ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N
|
|
236
238
|
_mm_store_pd(r + n + 2, r1);
|
237
239
|
}
|
238
240
|
#else
|
239
|
-
for (
|
241
|
+
for (uint64_t n = 0; n < N; n++)
|
240
242
|
r[n] = a * x[n] + y[n];
|
241
243
|
#endif
|
242
244
|
}
|
@@ -270,14 +272,14 @@ void xvm_axpy(double r[], double a, const double x[], const double y[], size_t N
|
|
270
272
|
* -inf or -oor --> return 0.0
|
271
273
|
* qNaN or sNaN --> return qNaN
|
272
274
|
*
|
273
|
-
* This code is copyright 2004-
|
275
|
+
* This code is copyright 2004-2013 Thomas Lavergne and licenced under the
|
274
276
|
* BSD licence like the remaining of Wapiti.
|
275
277
|
*/
|
276
|
-
void xvm_expma(double r[], const double x[], double a,
|
278
|
+
void xvm_expma(double r[], const double x[], double a, uint64_t N) {
|
277
279
|
#if defined(__SSE2__) && !defined(XVM_ANSI)
|
278
280
|
#define xvm_vconst(v) (_mm_castsi128_pd(_mm_set1_epi64x((v))))
|
279
|
-
assert(r != NULL && ((
|
280
|
-
assert(x != NULL && ((
|
281
|
+
assert(r != NULL && ((uintptr_t)r % 16) == 0);
|
282
|
+
assert(x != NULL && ((uintptr_t)x % 16) == 0);
|
281
283
|
const __m128i vl = _mm_set1_epi64x(0x3ff0000000000000ULL);
|
282
284
|
const __m128d ehi = xvm_vconst(0x4086232bdd7abcd2ULL);
|
283
285
|
const __m128d elo = xvm_vconst(0xc086232bdd7abcd2ULL);
|
@@ -300,7 +302,7 @@ void xvm_expma(double r[], const double x[], double a, size_t N) {
|
|
300
302
|
const __m128d p10 = xvm_vconst(0x3e9299068168ac8fULL);
|
301
303
|
const __m128d p11 = xvm_vconst(0x3e5ac52350b60b19ULL);
|
302
304
|
const __m128d va = _mm_set1_pd(a);
|
303
|
-
for (
|
305
|
+
for (uint64_t n = 0; n < N; n += 4) {
|
304
306
|
__m128d mn1, mn2, mi1, mi2;
|
305
307
|
__m128d t1, t2, d1, d2;
|
306
308
|
__m128d v1, v2, w1, w2;
|
@@ -365,7 +367,7 @@ void xvm_expma(double r[], const double x[], double a, size_t N) {
|
|
365
367
|
_mm_store_pd(r + n + 2, v2);
|
366
368
|
}
|
367
369
|
#else
|
368
|
-
for (
|
370
|
+
for (uint64_t n = 0; n < N; n++)
|
369
371
|
r[n] = exp(x[n]) - a;
|
370
372
|
#endif
|
371
373
|
}
|
data/ext/wapiti/vmath.h
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
/*
|
2
2
|
* Wapiti - A linear-chain CRF tool
|
3
3
|
*
|
4
|
-
* Copyright (c) 2009-
|
4
|
+
* Copyright (c) 2009-2013 CNRS
|
5
5
|
* All rights reserved.
|
6
6
|
*
|
7
7
|
* Redistribution and use in source and binary forms, with or without
|
@@ -28,24 +28,25 @@
|
|
28
28
|
#ifndef vmath_h
|
29
29
|
#define vmath_h
|
30
30
|
|
31
|
-
#include <
|
31
|
+
#include <stdint.h>
|
32
32
|
|
33
33
|
const char *xvm_mode(void);
|
34
34
|
|
35
|
-
double *xvm_new(
|
35
|
+
double *xvm_new(uint64_t N);
|
36
36
|
void xvm_free(double x[]);
|
37
37
|
|
38
|
-
void xvm_neg(double r[], const double x[],
|
39
|
-
void xvm_sub(double r[], const double x[], const double y[],
|
40
|
-
void xvm_scale(double r[], const double x[], double a,
|
41
|
-
double xvm_unit(double r[], const double x[],
|
38
|
+
void xvm_neg(double r[], const double x[], uint64_t N);
|
39
|
+
void xvm_sub(double r[], const double x[], const double y[], uint64_t N);
|
40
|
+
void xvm_scale(double r[], const double x[], double a, uint64_t N);
|
41
|
+
double xvm_unit(double r[], const double x[], uint64_t N);
|
42
42
|
|
43
|
-
double xvm_norm(const double x[],
|
44
|
-
double xvm_dot(const double x[], const double y[],
|
43
|
+
double xvm_norm(const double x[], uint64_t N);
|
44
|
+
double xvm_dot(const double x[], const double y[], uint64_t N);
|
45
45
|
|
46
|
-
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
46
|
+
void xvm_axpy(double r[], double a, const double x[], const double y[],
|
47
|
+
uint64_t N);
|
47
48
|
|
48
|
-
void xvm_expma(double r[], const double x[], double a,
|
49
|
+
void xvm_expma(double r[], const double x[], double a, uint64_t N);
|
49
50
|
|
50
51
|
#endif
|
51
52
|
|