opener-opinion-detector-basic 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/ext/hack/Rakefile +0 -2
- data/lib/opener/opinion_detector_basic/version.rb +1 -1
- data/opener-opinion-detector-basic.gemspec +0 -1
- data/task/compile.rake +1 -1
- data/task/requirements.rake +0 -1
- metadata +2 -142
- data/core/vendor/src/crfsuite/AUTHORS +0 -1
- data/core/vendor/src/crfsuite/COPYING +0 -27
- data/core/vendor/src/crfsuite/ChangeLog +0 -103
- data/core/vendor/src/crfsuite/INSTALL +0 -236
- data/core/vendor/src/crfsuite/Makefile.am +0 -19
- data/core/vendor/src/crfsuite/Makefile.in +0 -783
- data/core/vendor/src/crfsuite/README +0 -183
- data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
- data/core/vendor/src/crfsuite/autogen.sh +0 -38
- data/core/vendor/src/crfsuite/compile +0 -143
- data/core/vendor/src/crfsuite/config.guess +0 -1502
- data/core/vendor/src/crfsuite/config.h.in +0 -198
- data/core/vendor/src/crfsuite/config.sub +0 -1714
- data/core/vendor/src/crfsuite/configure +0 -14273
- data/core/vendor/src/crfsuite/configure.in +0 -149
- data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
- data/core/vendor/src/crfsuite/depcomp +0 -630
- data/core/vendor/src/crfsuite/example/chunking.py +0 -49
- data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
- data/core/vendor/src/crfsuite/example/ner.py +0 -270
- data/core/vendor/src/crfsuite/example/pos.py +0 -78
- data/core/vendor/src/crfsuite/example/template.py +0 -88
- data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
- data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
- data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
- data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
- data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
- data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
- data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
- data/core/vendor/src/crfsuite/frontend/main.c +0 -137
- data/core/vendor/src/crfsuite/frontend/option.c +0 -93
- data/core/vendor/src/crfsuite/frontend/option.h +0 -86
- data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
- data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
- data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
- data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
- data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
- data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
- data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
- data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
- data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
- data/core/vendor/src/crfsuite/include/os.h +0 -61
- data/core/vendor/src/crfsuite/install-sh +0 -520
- data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
- data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
- data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
- data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
- data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
- data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
- data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
- data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
- data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
- data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
- data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
- data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
- data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
- data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
- data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
- data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
- data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
- data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
- data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
- data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
- data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
- data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
- data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
- data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
- data/core/vendor/src/crfsuite/missing +0 -376
- data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
- data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
- data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
- data/core/vendor/src/crfsuite/swig/export.i +0 -32
- data/core/vendor/src/crfsuite/swig/python/README +0 -92
- data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
- data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
- data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
- data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
- data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
- data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
- data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
- data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
- data/core/vendor/src/liblbfgs/AUTHORS +0 -1
- data/core/vendor/src/liblbfgs/COPYING +0 -22
- data/core/vendor/src/liblbfgs/ChangeLog +0 -120
- data/core/vendor/src/liblbfgs/INSTALL +0 -231
- data/core/vendor/src/liblbfgs/Makefile.am +0 -10
- data/core/vendor/src/liblbfgs/Makefile.in +0 -638
- data/core/vendor/src/liblbfgs/NEWS +0 -0
- data/core/vendor/src/liblbfgs/README +0 -71
- data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
- data/core/vendor/src/liblbfgs/autogen.sh +0 -38
- data/core/vendor/src/liblbfgs/config.guess +0 -1411
- data/core/vendor/src/liblbfgs/config.h.in +0 -64
- data/core/vendor/src/liblbfgs/config.sub +0 -1500
- data/core/vendor/src/liblbfgs/configure +0 -21146
- data/core/vendor/src/liblbfgs/configure.in +0 -107
- data/core/vendor/src/liblbfgs/depcomp +0 -522
- data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
- data/core/vendor/src/liblbfgs/install-sh +0 -322
- data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
- data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
- data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
- data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
- data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
- data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
- data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
- data/core/vendor/src/liblbfgs/missing +0 -353
- data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
- data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
- data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
- data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
- data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
- data/core/vendor/src/svm_light/LICENSE.txt +0 -59
- data/core/vendor/src/svm_light/Makefile +0 -105
- data/core/vendor/src/svm_light/kernel.h +0 -40
- data/core/vendor/src/svm_light/svm_classify.c +0 -197
- data/core/vendor/src/svm_light/svm_common.c +0 -985
- data/core/vendor/src/svm_light/svm_common.h +0 -301
- data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
- data/core/vendor/src/svm_light/svm_learn.c +0 -4147
- data/core/vendor/src/svm_light/svm_learn.h +0 -169
- data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
- data/core/vendor/src/svm_light/svm_loqo.c +0 -211
- data/task/c.rake +0 -36
- data/task/submodules.rake +0 -5
@@ -1,294 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* SSE2 implementation of vector oprations (64bit double).
|
3
|
-
*
|
4
|
-
* Copyright (c) 2007-2010 Naoaki Okazaki
|
5
|
-
* All rights reserved.
|
6
|
-
*
|
7
|
-
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
8
|
-
* of this software and associated documentation files (the "Software"), to deal
|
9
|
-
* in the Software without restriction, including without limitation the rights
|
10
|
-
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
-
* copies of the Software, and to permit persons to whom the Software is
|
12
|
-
* furnished to do so, subject to the following conditions:
|
13
|
-
*
|
14
|
-
* The above copyright notice and this permission notice shall be included in
|
15
|
-
* all copies or substantial portions of the Software.
|
16
|
-
*
|
17
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
-
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
-
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20
|
-
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21
|
-
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22
|
-
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
|
-
* THE SOFTWARE.
|
24
|
-
*/
|
25
|
-
|
26
|
-
/* $Id$ */
|
27
|
-
|
28
|
-
#include <stdlib.h>
|
29
|
-
#ifndef __APPLE__
|
30
|
-
#include <malloc.h>
|
31
|
-
#endif
|
32
|
-
#include <memory.h>
|
33
|
-
|
34
|
-
#if 1400 <= _MSC_VER
|
35
|
-
#include <intrin.h>
|
36
|
-
#endif/*1400 <= _MSC_VER*/
|
37
|
-
|
38
|
-
#if HAVE_EMMINTRIN_H
|
39
|
-
#include <emmintrin.h>
|
40
|
-
#endif/*HAVE_EMMINTRIN_H*/
|
41
|
-
|
42
|
-
inline static void* vecalloc(size_t size)
|
43
|
-
{
|
44
|
-
#if defined(_MSC_VER)
|
45
|
-
void *memblock = _aligned_malloc(size, 16);
|
46
|
-
#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
|
47
|
-
void *memblock = malloc(size);
|
48
|
-
#else
|
49
|
-
void *memblock = NULL, *p = NULL;
|
50
|
-
if (posix_memalign(&p, 16, size) == 0) {
|
51
|
-
memblock = p;
|
52
|
-
}
|
53
|
-
#endif
|
54
|
-
if (memblock != NULL) {
|
55
|
-
memset(memblock, 0, size);
|
56
|
-
}
|
57
|
-
return memblock;
|
58
|
-
}
|
59
|
-
|
60
|
-
inline static void vecfree(void *memblock)
|
61
|
-
{
|
62
|
-
#ifdef _MSC_VER
|
63
|
-
_aligned_free(memblock);
|
64
|
-
#else
|
65
|
-
free(memblock);
|
66
|
-
#endif
|
67
|
-
}
|
68
|
-
|
69
|
-
#define fsigndiff(x, y) \
|
70
|
-
((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
|
71
|
-
|
72
|
-
#define vecset(x, c, n) \
|
73
|
-
{ \
|
74
|
-
int i; \
|
75
|
-
__m128d XMM0 = _mm_set1_pd(c); \
|
76
|
-
for (i = 0;i < (n);i += 8) { \
|
77
|
-
_mm_store_pd((x)+i , XMM0); \
|
78
|
-
_mm_store_pd((x)+i+2, XMM0); \
|
79
|
-
_mm_store_pd((x)+i+4, XMM0); \
|
80
|
-
_mm_store_pd((x)+i+6, XMM0); \
|
81
|
-
} \
|
82
|
-
}
|
83
|
-
|
84
|
-
#define veccpy(y, x, n) \
|
85
|
-
{ \
|
86
|
-
int i; \
|
87
|
-
for (i = 0;i < (n);i += 8) { \
|
88
|
-
__m128d XMM0 = _mm_load_pd((x)+i ); \
|
89
|
-
__m128d XMM1 = _mm_load_pd((x)+i+2); \
|
90
|
-
__m128d XMM2 = _mm_load_pd((x)+i+4); \
|
91
|
-
__m128d XMM3 = _mm_load_pd((x)+i+6); \
|
92
|
-
_mm_store_pd((y)+i , XMM0); \
|
93
|
-
_mm_store_pd((y)+i+2, XMM1); \
|
94
|
-
_mm_store_pd((y)+i+4, XMM2); \
|
95
|
-
_mm_store_pd((y)+i+6, XMM3); \
|
96
|
-
} \
|
97
|
-
}
|
98
|
-
|
99
|
-
#define vecncpy(y, x, n) \
|
100
|
-
{ \
|
101
|
-
int i; \
|
102
|
-
for (i = 0;i < (n);i += 8) { \
|
103
|
-
__m128d XMM0 = _mm_setzero_pd(); \
|
104
|
-
__m128d XMM1 = _mm_setzero_pd(); \
|
105
|
-
__m128d XMM2 = _mm_setzero_pd(); \
|
106
|
-
__m128d XMM3 = _mm_setzero_pd(); \
|
107
|
-
__m128d XMM4 = _mm_load_pd((x)+i ); \
|
108
|
-
__m128d XMM5 = _mm_load_pd((x)+i+2); \
|
109
|
-
__m128d XMM6 = _mm_load_pd((x)+i+4); \
|
110
|
-
__m128d XMM7 = _mm_load_pd((x)+i+6); \
|
111
|
-
XMM0 = _mm_sub_pd(XMM0, XMM4); \
|
112
|
-
XMM1 = _mm_sub_pd(XMM1, XMM5); \
|
113
|
-
XMM2 = _mm_sub_pd(XMM2, XMM6); \
|
114
|
-
XMM3 = _mm_sub_pd(XMM3, XMM7); \
|
115
|
-
_mm_store_pd((y)+i , XMM0); \
|
116
|
-
_mm_store_pd((y)+i+2, XMM1); \
|
117
|
-
_mm_store_pd((y)+i+4, XMM2); \
|
118
|
-
_mm_store_pd((y)+i+6, XMM3); \
|
119
|
-
} \
|
120
|
-
}
|
121
|
-
|
122
|
-
#define vecadd(y, x, c, n) \
|
123
|
-
{ \
|
124
|
-
int i; \
|
125
|
-
__m128d XMM7 = _mm_set1_pd(c); \
|
126
|
-
for (i = 0;i < (n);i += 4) { \
|
127
|
-
__m128d XMM0 = _mm_load_pd((x)+i ); \
|
128
|
-
__m128d XMM1 = _mm_load_pd((x)+i+2); \
|
129
|
-
__m128d XMM2 = _mm_load_pd((y)+i ); \
|
130
|
-
__m128d XMM3 = _mm_load_pd((y)+i+2); \
|
131
|
-
XMM0 = _mm_mul_pd(XMM0, XMM7); \
|
132
|
-
XMM1 = _mm_mul_pd(XMM1, XMM7); \
|
133
|
-
XMM2 = _mm_add_pd(XMM2, XMM0); \
|
134
|
-
XMM3 = _mm_add_pd(XMM3, XMM1); \
|
135
|
-
_mm_store_pd((y)+i , XMM2); \
|
136
|
-
_mm_store_pd((y)+i+2, XMM3); \
|
137
|
-
} \
|
138
|
-
}
|
139
|
-
|
140
|
-
#define vecdiff(z, x, y, n) \
|
141
|
-
{ \
|
142
|
-
int i; \
|
143
|
-
for (i = 0;i < (n);i += 8) { \
|
144
|
-
__m128d XMM0 = _mm_load_pd((x)+i ); \
|
145
|
-
__m128d XMM1 = _mm_load_pd((x)+i+2); \
|
146
|
-
__m128d XMM2 = _mm_load_pd((x)+i+4); \
|
147
|
-
__m128d XMM3 = _mm_load_pd((x)+i+6); \
|
148
|
-
__m128d XMM4 = _mm_load_pd((y)+i ); \
|
149
|
-
__m128d XMM5 = _mm_load_pd((y)+i+2); \
|
150
|
-
__m128d XMM6 = _mm_load_pd((y)+i+4); \
|
151
|
-
__m128d XMM7 = _mm_load_pd((y)+i+6); \
|
152
|
-
XMM0 = _mm_sub_pd(XMM0, XMM4); \
|
153
|
-
XMM1 = _mm_sub_pd(XMM1, XMM5); \
|
154
|
-
XMM2 = _mm_sub_pd(XMM2, XMM6); \
|
155
|
-
XMM3 = _mm_sub_pd(XMM3, XMM7); \
|
156
|
-
_mm_store_pd((z)+i , XMM0); \
|
157
|
-
_mm_store_pd((z)+i+2, XMM1); \
|
158
|
-
_mm_store_pd((z)+i+4, XMM2); \
|
159
|
-
_mm_store_pd((z)+i+6, XMM3); \
|
160
|
-
} \
|
161
|
-
}
|
162
|
-
|
163
|
-
#define vecscale(y, c, n) \
|
164
|
-
{ \
|
165
|
-
int i; \
|
166
|
-
__m128d XMM7 = _mm_set1_pd(c); \
|
167
|
-
for (i = 0;i < (n);i += 4) { \
|
168
|
-
__m128d XMM0 = _mm_load_pd((y)+i ); \
|
169
|
-
__m128d XMM1 = _mm_load_pd((y)+i+2); \
|
170
|
-
XMM0 = _mm_mul_pd(XMM0, XMM7); \
|
171
|
-
XMM1 = _mm_mul_pd(XMM1, XMM7); \
|
172
|
-
_mm_store_pd((y)+i , XMM0); \
|
173
|
-
_mm_store_pd((y)+i+2, XMM1); \
|
174
|
-
} \
|
175
|
-
}
|
176
|
-
|
177
|
-
#define vecmul(y, x, n) \
|
178
|
-
{ \
|
179
|
-
int i; \
|
180
|
-
for (i = 0;i < (n);i += 8) { \
|
181
|
-
__m128d XMM0 = _mm_load_pd((x)+i ); \
|
182
|
-
__m128d XMM1 = _mm_load_pd((x)+i+2); \
|
183
|
-
__m128d XMM2 = _mm_load_pd((x)+i+4); \
|
184
|
-
__m128d XMM3 = _mm_load_pd((x)+i+6); \
|
185
|
-
__m128d XMM4 = _mm_load_pd((y)+i ); \
|
186
|
-
__m128d XMM5 = _mm_load_pd((y)+i+2); \
|
187
|
-
__m128d XMM6 = _mm_load_pd((y)+i+4); \
|
188
|
-
__m128d XMM7 = _mm_load_pd((y)+i+6); \
|
189
|
-
XMM4 = _mm_mul_pd(XMM4, XMM0); \
|
190
|
-
XMM5 = _mm_mul_pd(XMM5, XMM1); \
|
191
|
-
XMM6 = _mm_mul_pd(XMM6, XMM2); \
|
192
|
-
XMM7 = _mm_mul_pd(XMM7, XMM3); \
|
193
|
-
_mm_store_pd((y)+i , XMM4); \
|
194
|
-
_mm_store_pd((y)+i+2, XMM5); \
|
195
|
-
_mm_store_pd((y)+i+4, XMM6); \
|
196
|
-
_mm_store_pd((y)+i+6, XMM7); \
|
197
|
-
} \
|
198
|
-
}
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
#if 3 <= __SSE__ || defined(__SSE3__)
|
203
|
-
/*
|
204
|
-
Horizontal add with haddps SSE3 instruction. The work register (rw)
|
205
|
-
is unused.
|
206
|
-
*/
|
207
|
-
#define __horizontal_sum(r, rw) \
|
208
|
-
r = _mm_hadd_ps(r, r); \
|
209
|
-
r = _mm_hadd_ps(r, r);
|
210
|
-
|
211
|
-
#else
|
212
|
-
/*
|
213
|
-
Horizontal add with SSE instruction. The work register (rw) is used.
|
214
|
-
*/
|
215
|
-
#define __horizontal_sum(r, rw) \
|
216
|
-
rw = r; \
|
217
|
-
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
|
218
|
-
r = _mm_add_ps(r, rw); \
|
219
|
-
rw = r; \
|
220
|
-
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
|
221
|
-
r = _mm_add_ps(r, rw);
|
222
|
-
|
223
|
-
#endif
|
224
|
-
|
225
|
-
#define vecdot(s, x, y, n) \
|
226
|
-
{ \
|
227
|
-
int i; \
|
228
|
-
__m128d XMM0 = _mm_setzero_pd(); \
|
229
|
-
__m128d XMM1 = _mm_setzero_pd(); \
|
230
|
-
__m128d XMM2, XMM3, XMM4, XMM5; \
|
231
|
-
for (i = 0;i < (n);i += 4) { \
|
232
|
-
XMM2 = _mm_load_pd((x)+i ); \
|
233
|
-
XMM3 = _mm_load_pd((x)+i+2); \
|
234
|
-
XMM4 = _mm_load_pd((y)+i ); \
|
235
|
-
XMM5 = _mm_load_pd((y)+i+2); \
|
236
|
-
XMM2 = _mm_mul_pd(XMM2, XMM4); \
|
237
|
-
XMM3 = _mm_mul_pd(XMM3, XMM5); \
|
238
|
-
XMM0 = _mm_add_pd(XMM0, XMM2); \
|
239
|
-
XMM1 = _mm_add_pd(XMM1, XMM3); \
|
240
|
-
} \
|
241
|
-
XMM0 = _mm_add_pd(XMM0, XMM1); \
|
242
|
-
XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
|
243
|
-
XMM0 = _mm_add_pd(XMM0, XMM1); \
|
244
|
-
_mm_store_sd((s), XMM0); \
|
245
|
-
}
|
246
|
-
|
247
|
-
#define vec2norm(s, x, n) \
|
248
|
-
{ \
|
249
|
-
int i; \
|
250
|
-
__m128d XMM0 = _mm_setzero_pd(); \
|
251
|
-
__m128d XMM1 = _mm_setzero_pd(); \
|
252
|
-
__m128d XMM2, XMM3, XMM4, XMM5; \
|
253
|
-
for (i = 0;i < (n);i += 4) { \
|
254
|
-
XMM2 = _mm_load_pd((x)+i ); \
|
255
|
-
XMM3 = _mm_load_pd((x)+i+2); \
|
256
|
-
XMM4 = XMM2; \
|
257
|
-
XMM5 = XMM3; \
|
258
|
-
XMM2 = _mm_mul_pd(XMM2, XMM4); \
|
259
|
-
XMM3 = _mm_mul_pd(XMM3, XMM5); \
|
260
|
-
XMM0 = _mm_add_pd(XMM0, XMM2); \
|
261
|
-
XMM1 = _mm_add_pd(XMM1, XMM3); \
|
262
|
-
} \
|
263
|
-
XMM0 = _mm_add_pd(XMM0, XMM1); \
|
264
|
-
XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
|
265
|
-
XMM0 = _mm_add_pd(XMM0, XMM1); \
|
266
|
-
XMM0 = _mm_sqrt_pd(XMM0); \
|
267
|
-
_mm_store_sd((s), XMM0); \
|
268
|
-
}
|
269
|
-
|
270
|
-
|
271
|
-
#define vec2norminv(s, x, n) \
|
272
|
-
{ \
|
273
|
-
int i; \
|
274
|
-
__m128d XMM0 = _mm_setzero_pd(); \
|
275
|
-
__m128d XMM1 = _mm_setzero_pd(); \
|
276
|
-
__m128d XMM2, XMM3, XMM4, XMM5; \
|
277
|
-
for (i = 0;i < (n);i += 4) { \
|
278
|
-
XMM2 = _mm_load_pd((x)+i ); \
|
279
|
-
XMM3 = _mm_load_pd((x)+i+2); \
|
280
|
-
XMM4 = XMM2; \
|
281
|
-
XMM5 = XMM3; \
|
282
|
-
XMM2 = _mm_mul_pd(XMM2, XMM4); \
|
283
|
-
XMM3 = _mm_mul_pd(XMM3, XMM5); \
|
284
|
-
XMM0 = _mm_add_pd(XMM0, XMM2); \
|
285
|
-
XMM1 = _mm_add_pd(XMM1, XMM3); \
|
286
|
-
} \
|
287
|
-
XMM2 = _mm_set1_pd(1.0); \
|
288
|
-
XMM0 = _mm_add_pd(XMM0, XMM1); \
|
289
|
-
XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
|
290
|
-
XMM0 = _mm_add_pd(XMM0, XMM1); \
|
291
|
-
XMM0 = _mm_sqrt_pd(XMM0); \
|
292
|
-
XMM2 = _mm_div_pd(XMM2, XMM0); \
|
293
|
-
_mm_store_sd((s), XMM2); \
|
294
|
-
}
|
@@ -1,298 +0,0 @@
|
|
1
|
-
/*
|
2
|
-
* SSE/SSE3 implementation of vector oprations (32bit float).
|
3
|
-
*
|
4
|
-
* Copyright (c) 2007-2010 Naoaki Okazaki
|
5
|
-
* All rights reserved.
|
6
|
-
*
|
7
|
-
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
8
|
-
* of this software and associated documentation files (the "Software"), to deal
|
9
|
-
* in the Software without restriction, including without limitation the rights
|
10
|
-
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
-
* copies of the Software, and to permit persons to whom the Software is
|
12
|
-
* furnished to do so, subject to the following conditions:
|
13
|
-
*
|
14
|
-
* The above copyright notice and this permission notice shall be included in
|
15
|
-
* all copies or substantial portions of the Software.
|
16
|
-
*
|
17
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
-
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
-
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20
|
-
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21
|
-
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22
|
-
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
|
-
* THE SOFTWARE.
|
24
|
-
*/
|
25
|
-
|
26
|
-
/* $Id$ */
|
27
|
-
|
28
|
-
#include <stdlib.h>
|
29
|
-
#ifndef __APPLE__
|
30
|
-
#include <malloc.h>
|
31
|
-
#endif
|
32
|
-
#include <memory.h>
|
33
|
-
|
34
|
-
#if 1400 <= _MSC_VER
|
35
|
-
#include <intrin.h>
|
36
|
-
#endif/*_MSC_VER*/
|
37
|
-
|
38
|
-
#if HAVE_XMMINTRIN_H
|
39
|
-
#include <xmmintrin.h>
|
40
|
-
#endif/*HAVE_XMMINTRIN_H*/
|
41
|
-
|
42
|
-
#if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
|
43
|
-
#define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
|
44
|
-
#else
|
45
|
-
#define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
|
46
|
-
#endif/*LBFGS_IEEE_FLOAT*/
|
47
|
-
|
48
|
-
inline static void* vecalloc(size_t size)
|
49
|
-
{
|
50
|
-
#if defined(_MSC_VER)
|
51
|
-
void *memblock = _aligned_malloc(size, 16);
|
52
|
-
#elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
|
53
|
-
void *memblock = malloc(size);
|
54
|
-
#else
|
55
|
-
void *memblock = NULL, *p = NULL;
|
56
|
-
if (posix_memalign(&p, 16, size) == 0) {
|
57
|
-
memblock = p;
|
58
|
-
}
|
59
|
-
#endif
|
60
|
-
if (memblock != NULL) {
|
61
|
-
memset(memblock, 0, size);
|
62
|
-
}
|
63
|
-
return memblock;
|
64
|
-
}
|
65
|
-
|
66
|
-
inline static void vecfree(void *memblock)
|
67
|
-
{
|
68
|
-
_aligned_free(memblock);
|
69
|
-
}
|
70
|
-
|
71
|
-
#define vecset(x, c, n) \
|
72
|
-
{ \
|
73
|
-
int i; \
|
74
|
-
__m128 XMM0 = _mm_set_ps1(c); \
|
75
|
-
for (i = 0;i < (n);i += 16) { \
|
76
|
-
_mm_store_ps((x)+i , XMM0); \
|
77
|
-
_mm_store_ps((x)+i+ 4, XMM0); \
|
78
|
-
_mm_store_ps((x)+i+ 8, XMM0); \
|
79
|
-
_mm_store_ps((x)+i+12, XMM0); \
|
80
|
-
} \
|
81
|
-
}
|
82
|
-
|
83
|
-
#define veccpy(y, x, n) \
|
84
|
-
{ \
|
85
|
-
int i; \
|
86
|
-
for (i = 0;i < (n);i += 16) { \
|
87
|
-
__m128 XMM0 = _mm_load_ps((x)+i ); \
|
88
|
-
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
|
89
|
-
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
|
90
|
-
__m128 XMM3 = _mm_load_ps((x)+i+12); \
|
91
|
-
_mm_store_ps((y)+i , XMM0); \
|
92
|
-
_mm_store_ps((y)+i+ 4, XMM1); \
|
93
|
-
_mm_store_ps((y)+i+ 8, XMM2); \
|
94
|
-
_mm_store_ps((y)+i+12, XMM3); \
|
95
|
-
} \
|
96
|
-
}
|
97
|
-
|
98
|
-
#define vecncpy(y, x, n) \
|
99
|
-
{ \
|
100
|
-
int i; \
|
101
|
-
const uint32_t mask = 0x80000000; \
|
102
|
-
__m128 XMM4 = _mm_load_ps1((float*)&mask); \
|
103
|
-
for (i = 0;i < (n);i += 16) { \
|
104
|
-
__m128 XMM0 = _mm_load_ps((x)+i ); \
|
105
|
-
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
|
106
|
-
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
|
107
|
-
__m128 XMM3 = _mm_load_ps((x)+i+12); \
|
108
|
-
XMM0 = _mm_xor_ps(XMM0, XMM4); \
|
109
|
-
XMM1 = _mm_xor_ps(XMM1, XMM4); \
|
110
|
-
XMM2 = _mm_xor_ps(XMM2, XMM4); \
|
111
|
-
XMM3 = _mm_xor_ps(XMM3, XMM4); \
|
112
|
-
_mm_store_ps((y)+i , XMM0); \
|
113
|
-
_mm_store_ps((y)+i+ 4, XMM1); \
|
114
|
-
_mm_store_ps((y)+i+ 8, XMM2); \
|
115
|
-
_mm_store_ps((y)+i+12, XMM3); \
|
116
|
-
} \
|
117
|
-
}
|
118
|
-
|
119
|
-
#define vecadd(y, x, c, n) \
|
120
|
-
{ \
|
121
|
-
int i; \
|
122
|
-
__m128 XMM7 = _mm_set_ps1(c); \
|
123
|
-
for (i = 0;i < (n);i += 8) { \
|
124
|
-
__m128 XMM0 = _mm_load_ps((x)+i ); \
|
125
|
-
__m128 XMM1 = _mm_load_ps((x)+i+4); \
|
126
|
-
__m128 XMM2 = _mm_load_ps((y)+i ); \
|
127
|
-
__m128 XMM3 = _mm_load_ps((y)+i+4); \
|
128
|
-
XMM0 = _mm_mul_ps(XMM0, XMM7); \
|
129
|
-
XMM1 = _mm_mul_ps(XMM1, XMM7); \
|
130
|
-
XMM2 = _mm_add_ps(XMM2, XMM0); \
|
131
|
-
XMM3 = _mm_add_ps(XMM3, XMM1); \
|
132
|
-
_mm_store_ps((y)+i , XMM2); \
|
133
|
-
_mm_store_ps((y)+i+4, XMM3); \
|
134
|
-
} \
|
135
|
-
}
|
136
|
-
|
137
|
-
#define vecdiff(z, x, y, n) \
|
138
|
-
{ \
|
139
|
-
int i; \
|
140
|
-
for (i = 0;i < (n);i += 16) { \
|
141
|
-
__m128 XMM0 = _mm_load_ps((x)+i ); \
|
142
|
-
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
|
143
|
-
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
|
144
|
-
__m128 XMM3 = _mm_load_ps((x)+i+12); \
|
145
|
-
__m128 XMM4 = _mm_load_ps((y)+i ); \
|
146
|
-
__m128 XMM5 = _mm_load_ps((y)+i+ 4); \
|
147
|
-
__m128 XMM6 = _mm_load_ps((y)+i+ 8); \
|
148
|
-
__m128 XMM7 = _mm_load_ps((y)+i+12); \
|
149
|
-
XMM0 = _mm_sub_ps(XMM0, XMM4); \
|
150
|
-
XMM1 = _mm_sub_ps(XMM1, XMM5); \
|
151
|
-
XMM2 = _mm_sub_ps(XMM2, XMM6); \
|
152
|
-
XMM3 = _mm_sub_ps(XMM3, XMM7); \
|
153
|
-
_mm_store_ps((z)+i , XMM0); \
|
154
|
-
_mm_store_ps((z)+i+ 4, XMM1); \
|
155
|
-
_mm_store_ps((z)+i+ 8, XMM2); \
|
156
|
-
_mm_store_ps((z)+i+12, XMM3); \
|
157
|
-
} \
|
158
|
-
}
|
159
|
-
|
160
|
-
#define vecscale(y, c, n) \
|
161
|
-
{ \
|
162
|
-
int i; \
|
163
|
-
__m128 XMM7 = _mm_set_ps1(c); \
|
164
|
-
for (i = 0;i < (n);i += 8) { \
|
165
|
-
__m128 XMM0 = _mm_load_ps((y)+i ); \
|
166
|
-
__m128 XMM1 = _mm_load_ps((y)+i+4); \
|
167
|
-
XMM0 = _mm_mul_ps(XMM0, XMM7); \
|
168
|
-
XMM1 = _mm_mul_ps(XMM1, XMM7); \
|
169
|
-
_mm_store_ps((y)+i , XMM0); \
|
170
|
-
_mm_store_ps((y)+i+4, XMM1); \
|
171
|
-
} \
|
172
|
-
}
|
173
|
-
|
174
|
-
#define vecmul(y, x, n) \
|
175
|
-
{ \
|
176
|
-
int i; \
|
177
|
-
for (i = 0;i < (n);i += 16) { \
|
178
|
-
__m128 XMM0 = _mm_load_ps((x)+i ); \
|
179
|
-
__m128 XMM1 = _mm_load_ps((x)+i+ 4); \
|
180
|
-
__m128 XMM2 = _mm_load_ps((x)+i+ 8); \
|
181
|
-
__m128 XMM3 = _mm_load_ps((x)+i+12); \
|
182
|
-
__m128 XMM4 = _mm_load_ps((y)+i ); \
|
183
|
-
__m128 XMM5 = _mm_load_ps((y)+i+ 4); \
|
184
|
-
__m128 XMM6 = _mm_load_ps((y)+i+ 8); \
|
185
|
-
__m128 XMM7 = _mm_load_ps((y)+i+12); \
|
186
|
-
XMM4 = _mm_mul_ps(XMM4, XMM0); \
|
187
|
-
XMM5 = _mm_mul_ps(XMM5, XMM1); \
|
188
|
-
XMM6 = _mm_mul_ps(XMM6, XMM2); \
|
189
|
-
XMM7 = _mm_mul_ps(XMM7, XMM3); \
|
190
|
-
_mm_store_ps((y)+i , XMM4); \
|
191
|
-
_mm_store_ps((y)+i+ 4, XMM5); \
|
192
|
-
_mm_store_ps((y)+i+ 8, XMM6); \
|
193
|
-
_mm_store_ps((y)+i+12, XMM7); \
|
194
|
-
} \
|
195
|
-
}
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
#if 3 <= __SSE__ || defined(__SSE3__)
|
200
|
-
/*
|
201
|
-
Horizontal add with haddps SSE3 instruction. The work register (rw)
|
202
|
-
is unused.
|
203
|
-
*/
|
204
|
-
#define __horizontal_sum(r, rw) \
|
205
|
-
r = _mm_hadd_ps(r, r); \
|
206
|
-
r = _mm_hadd_ps(r, r);
|
207
|
-
|
208
|
-
#else
|
209
|
-
/*
|
210
|
-
Horizontal add with SSE instruction. The work register (rw) is used.
|
211
|
-
*/
|
212
|
-
#define __horizontal_sum(r, rw) \
|
213
|
-
rw = r; \
|
214
|
-
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
|
215
|
-
r = _mm_add_ps(r, rw); \
|
216
|
-
rw = r; \
|
217
|
-
r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
|
218
|
-
r = _mm_add_ps(r, rw);
|
219
|
-
|
220
|
-
#endif
|
221
|
-
|
222
|
-
#define vecdot(s, x, y, n) \
|
223
|
-
{ \
|
224
|
-
int i; \
|
225
|
-
__m128 XMM0 = _mm_setzero_ps(); \
|
226
|
-
__m128 XMM1 = _mm_setzero_ps(); \
|
227
|
-
__m128 XMM2, XMM3, XMM4, XMM5; \
|
228
|
-
for (i = 0;i < (n);i += 8) { \
|
229
|
-
XMM2 = _mm_load_ps((x)+i ); \
|
230
|
-
XMM3 = _mm_load_ps((x)+i+4); \
|
231
|
-
XMM4 = _mm_load_ps((y)+i ); \
|
232
|
-
XMM5 = _mm_load_ps((y)+i+4); \
|
233
|
-
XMM2 = _mm_mul_ps(XMM2, XMM4); \
|
234
|
-
XMM3 = _mm_mul_ps(XMM3, XMM5); \
|
235
|
-
XMM0 = _mm_add_ps(XMM0, XMM2); \
|
236
|
-
XMM1 = _mm_add_ps(XMM1, XMM3); \
|
237
|
-
} \
|
238
|
-
XMM0 = _mm_add_ps(XMM0, XMM1); \
|
239
|
-
__horizontal_sum(XMM0, XMM1); \
|
240
|
-
_mm_store_ss((s), XMM0); \
|
241
|
-
}
|
242
|
-
|
243
|
-
#define vec2norm(s, x, n) \
|
244
|
-
{ \
|
245
|
-
int i; \
|
246
|
-
__m128 XMM0 = _mm_setzero_ps(); \
|
247
|
-
__m128 XMM1 = _mm_setzero_ps(); \
|
248
|
-
__m128 XMM2, XMM3; \
|
249
|
-
for (i = 0;i < (n);i += 8) { \
|
250
|
-
XMM2 = _mm_load_ps((x)+i ); \
|
251
|
-
XMM3 = _mm_load_ps((x)+i+4); \
|
252
|
-
XMM2 = _mm_mul_ps(XMM2, XMM2); \
|
253
|
-
XMM3 = _mm_mul_ps(XMM3, XMM3); \
|
254
|
-
XMM0 = _mm_add_ps(XMM0, XMM2); \
|
255
|
-
XMM1 = _mm_add_ps(XMM1, XMM3); \
|
256
|
-
} \
|
257
|
-
XMM0 = _mm_add_ps(XMM0, XMM1); \
|
258
|
-
__horizontal_sum(XMM0, XMM1); \
|
259
|
-
XMM2 = XMM0; \
|
260
|
-
XMM1 = _mm_rsqrt_ss(XMM0); \
|
261
|
-
XMM3 = XMM1; \
|
262
|
-
XMM1 = _mm_mul_ss(XMM1, XMM1); \
|
263
|
-
XMM1 = _mm_mul_ss(XMM1, XMM3); \
|
264
|
-
XMM1 = _mm_mul_ss(XMM1, XMM0); \
|
265
|
-
XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
|
266
|
-
XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
|
267
|
-
XMM3 = _mm_add_ss(XMM3, XMM1); \
|
268
|
-
XMM3 = _mm_mul_ss(XMM3, XMM2); \
|
269
|
-
_mm_store_ss((s), XMM3); \
|
270
|
-
}
|
271
|
-
|
272
|
-
#define vec2norminv(s, x, n) \
|
273
|
-
{ \
|
274
|
-
int i; \
|
275
|
-
__m128 XMM0 = _mm_setzero_ps(); \
|
276
|
-
__m128 XMM1 = _mm_setzero_ps(); \
|
277
|
-
__m128 XMM2, XMM3; \
|
278
|
-
for (i = 0;i < (n);i += 16) { \
|
279
|
-
XMM2 = _mm_load_ps((x)+i ); \
|
280
|
-
XMM3 = _mm_load_ps((x)+i+4); \
|
281
|
-
XMM2 = _mm_mul_ps(XMM2, XMM2); \
|
282
|
-
XMM3 = _mm_mul_ps(XMM3, XMM3); \
|
283
|
-
XMM0 = _mm_add_ps(XMM0, XMM2); \
|
284
|
-
XMM1 = _mm_add_ps(XMM1, XMM3); \
|
285
|
-
} \
|
286
|
-
XMM0 = _mm_add_ps(XMM0, XMM1); \
|
287
|
-
__horizontal_sum(XMM0, XMM1); \
|
288
|
-
XMM2 = XMM0; \
|
289
|
-
XMM1 = _mm_rsqrt_ss(XMM0); \
|
290
|
-
XMM3 = XMM1; \
|
291
|
-
XMM1 = _mm_mul_ss(XMM1, XMM1); \
|
292
|
-
XMM1 = _mm_mul_ss(XMM1, XMM3); \
|
293
|
-
XMM1 = _mm_mul_ss(XMM1, XMM0); \
|
294
|
-
XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
|
295
|
-
XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
|
296
|
-
XMM3 = _mm_add_ss(XMM3, XMM1); \
|
297
|
-
_mm_store_ss((s), XMM3); \
|
298
|
-
}
|