opener-opinion-detector-basic 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (148) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -0
  3. data/ext/hack/Rakefile +0 -2
  4. data/lib/opener/opinion_detector_basic/version.rb +1 -1
  5. data/opener-opinion-detector-basic.gemspec +0 -1
  6. data/task/compile.rake +1 -1
  7. data/task/requirements.rake +0 -1
  8. metadata +2 -142
  9. data/core/vendor/src/crfsuite/AUTHORS +0 -1
  10. data/core/vendor/src/crfsuite/COPYING +0 -27
  11. data/core/vendor/src/crfsuite/ChangeLog +0 -103
  12. data/core/vendor/src/crfsuite/INSTALL +0 -236
  13. data/core/vendor/src/crfsuite/Makefile.am +0 -19
  14. data/core/vendor/src/crfsuite/Makefile.in +0 -783
  15. data/core/vendor/src/crfsuite/README +0 -183
  16. data/core/vendor/src/crfsuite/aclocal.m4 +0 -9018
  17. data/core/vendor/src/crfsuite/autogen.sh +0 -38
  18. data/core/vendor/src/crfsuite/compile +0 -143
  19. data/core/vendor/src/crfsuite/config.guess +0 -1502
  20. data/core/vendor/src/crfsuite/config.h.in +0 -198
  21. data/core/vendor/src/crfsuite/config.sub +0 -1714
  22. data/core/vendor/src/crfsuite/configure +0 -14273
  23. data/core/vendor/src/crfsuite/configure.in +0 -149
  24. data/core/vendor/src/crfsuite/crfsuite.sln +0 -42
  25. data/core/vendor/src/crfsuite/depcomp +0 -630
  26. data/core/vendor/src/crfsuite/example/chunking.py +0 -49
  27. data/core/vendor/src/crfsuite/example/crfutils.py +0 -179
  28. data/core/vendor/src/crfsuite/example/ner.py +0 -270
  29. data/core/vendor/src/crfsuite/example/pos.py +0 -78
  30. data/core/vendor/src/crfsuite/example/template.py +0 -88
  31. data/core/vendor/src/crfsuite/frontend/Makefile.am +0 -29
  32. data/core/vendor/src/crfsuite/frontend/Makefile.in +0 -640
  33. data/core/vendor/src/crfsuite/frontend/dump.c +0 -116
  34. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +0 -129
  35. data/core/vendor/src/crfsuite/frontend/iwa.c +0 -273
  36. data/core/vendor/src/crfsuite/frontend/iwa.h +0 -65
  37. data/core/vendor/src/crfsuite/frontend/learn.c +0 -439
  38. data/core/vendor/src/crfsuite/frontend/main.c +0 -137
  39. data/core/vendor/src/crfsuite/frontend/option.c +0 -93
  40. data/core/vendor/src/crfsuite/frontend/option.h +0 -86
  41. data/core/vendor/src/crfsuite/frontend/readdata.h +0 -38
  42. data/core/vendor/src/crfsuite/frontend/reader.c +0 -136
  43. data/core/vendor/src/crfsuite/frontend/tag.c +0 -427
  44. data/core/vendor/src/crfsuite/genbinary.sh.in +0 -15
  45. data/core/vendor/src/crfsuite/include/Makefile.am +0 -11
  46. data/core/vendor/src/crfsuite/include/Makefile.in +0 -461
  47. data/core/vendor/src/crfsuite/include/crfsuite.h +0 -1063
  48. data/core/vendor/src/crfsuite/include/crfsuite.hpp +0 -555
  49. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +0 -400
  50. data/core/vendor/src/crfsuite/include/os.h +0 -61
  51. data/core/vendor/src/crfsuite/install-sh +0 -520
  52. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +0 -28
  53. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +0 -21
  54. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +0 -549
  55. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +0 -86
  56. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +0 -524
  57. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +0 -587
  58. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +0 -976
  59. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +0 -46
  60. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +0 -721
  61. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +0 -216
  62. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +0 -353
  63. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +0 -705
  64. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +0 -943
  65. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +0 -352
  66. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +0 -994
  67. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +0 -550
  68. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +0 -492
  69. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +0 -236
  70. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +0 -272
  71. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +0 -106
  72. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +0 -118
  73. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +0 -80
  74. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +0 -91
  75. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +0 -48
  76. data/core/vendor/src/crfsuite/lib/crf/src/params.c +0 -335
  77. data/core/vendor/src/crfsuite/lib/crf/src/params.h +0 -80
  78. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +0 -172
  79. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +0 -46
  80. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +0 -1107
  81. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +0 -160
  82. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +0 -408
  83. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +0 -242
  84. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +0 -507
  85. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +0 -338
  86. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +0 -435
  87. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +0 -341
  88. data/core/vendor/src/crfsuite/ltmain.sh +0 -8413
  89. data/core/vendor/src/crfsuite/missing +0 -376
  90. data/core/vendor/src/crfsuite/swig/Makefile.am +0 -13
  91. data/core/vendor/src/crfsuite/swig/Makefile.in +0 -365
  92. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +0 -2
  93. data/core/vendor/src/crfsuite/swig/export.i +0 -32
  94. data/core/vendor/src/crfsuite/swig/python/README +0 -92
  95. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +0 -329
  96. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +0 -14355
  97. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +0 -63
  98. data/core/vendor/src/crfsuite/swig/python/prepare.sh +0 -9
  99. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +0 -52
  100. data/core/vendor/src/crfsuite/swig/python/sample_train.py +0 -68
  101. data/core/vendor/src/crfsuite/swig/python/setup.py +0 -44
  102. data/core/vendor/src/crfsuite/win32/stdint.h +0 -679
  103. data/core/vendor/src/liblbfgs/AUTHORS +0 -1
  104. data/core/vendor/src/liblbfgs/COPYING +0 -22
  105. data/core/vendor/src/liblbfgs/ChangeLog +0 -120
  106. data/core/vendor/src/liblbfgs/INSTALL +0 -231
  107. data/core/vendor/src/liblbfgs/Makefile.am +0 -10
  108. data/core/vendor/src/liblbfgs/Makefile.in +0 -638
  109. data/core/vendor/src/liblbfgs/NEWS +0 -0
  110. data/core/vendor/src/liblbfgs/README +0 -71
  111. data/core/vendor/src/liblbfgs/aclocal.m4 +0 -6985
  112. data/core/vendor/src/liblbfgs/autogen.sh +0 -38
  113. data/core/vendor/src/liblbfgs/config.guess +0 -1411
  114. data/core/vendor/src/liblbfgs/config.h.in +0 -64
  115. data/core/vendor/src/liblbfgs/config.sub +0 -1500
  116. data/core/vendor/src/liblbfgs/configure +0 -21146
  117. data/core/vendor/src/liblbfgs/configure.in +0 -107
  118. data/core/vendor/src/liblbfgs/depcomp +0 -522
  119. data/core/vendor/src/liblbfgs/include/lbfgs.h +0 -745
  120. data/core/vendor/src/liblbfgs/install-sh +0 -322
  121. data/core/vendor/src/liblbfgs/lbfgs.sln +0 -26
  122. data/core/vendor/src/liblbfgs/lib/Makefile.am +0 -24
  123. data/core/vendor/src/liblbfgs/lib/Makefile.in +0 -499
  124. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +0 -133
  125. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +0 -294
  126. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +0 -298
  127. data/core/vendor/src/liblbfgs/lib/lbfgs.c +0 -1371
  128. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +0 -95
  129. data/core/vendor/src/liblbfgs/ltmain.sh +0 -6426
  130. data/core/vendor/src/liblbfgs/missing +0 -353
  131. data/core/vendor/src/liblbfgs/sample/Makefile.am +0 -15
  132. data/core/vendor/src/liblbfgs/sample/Makefile.in +0 -433
  133. data/core/vendor/src/liblbfgs/sample/sample.c +0 -81
  134. data/core/vendor/src/liblbfgs/sample/sample.cpp +0 -126
  135. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +0 -105
  136. data/core/vendor/src/svm_light/LICENSE.txt +0 -59
  137. data/core/vendor/src/svm_light/Makefile +0 -105
  138. data/core/vendor/src/svm_light/kernel.h +0 -40
  139. data/core/vendor/src/svm_light/svm_classify.c +0 -197
  140. data/core/vendor/src/svm_light/svm_common.c +0 -985
  141. data/core/vendor/src/svm_light/svm_common.h +0 -301
  142. data/core/vendor/src/svm_light/svm_hideo.c +0 -1062
  143. data/core/vendor/src/svm_light/svm_learn.c +0 -4147
  144. data/core/vendor/src/svm_light/svm_learn.h +0 -169
  145. data/core/vendor/src/svm_light/svm_learn_main.c +0 -397
  146. data/core/vendor/src/svm_light/svm_loqo.c +0 -211
  147. data/task/c.rake +0 -36
  148. data/task/submodules.rake +0 -5
@@ -1,294 +0,0 @@
1
- /*
2
- * SSE2 implementation of vector oprations (64bit double).
3
- *
4
- * Copyright (c) 2007-2010 Naoaki Okazaki
5
- * All rights reserved.
6
- *
7
- * Permission is hereby granted, free of charge, to any person obtaining a copy
8
- * of this software and associated documentation files (the "Software"), to deal
9
- * in the Software without restriction, including without limitation the rights
10
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- * copies of the Software, and to permit persons to whom the Software is
12
- * furnished to do so, subject to the following conditions:
13
- *
14
- * The above copyright notice and this permission notice shall be included in
15
- * all copies or substantial portions of the Software.
16
- *
17
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- * THE SOFTWARE.
24
- */
25
-
26
- /* $Id$ */
27
-
28
- #include <stdlib.h>
29
- #ifndef __APPLE__
30
- #include <malloc.h>
31
- #endif
32
- #include <memory.h>
33
-
34
- #if 1400 <= _MSC_VER
35
- #include <intrin.h>
36
- #endif/*1400 <= _MSC_VER*/
37
-
38
- #if HAVE_EMMINTRIN_H
39
- #include <emmintrin.h>
40
- #endif/*HAVE_EMMINTRIN_H*/
41
-
42
- inline static void* vecalloc(size_t size)
43
- {
44
- #if defined(_MSC_VER)
45
- void *memblock = _aligned_malloc(size, 16);
46
- #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
47
- void *memblock = malloc(size);
48
- #else
49
- void *memblock = NULL, *p = NULL;
50
- if (posix_memalign(&p, 16, size) == 0) {
51
- memblock = p;
52
- }
53
- #endif
54
- if (memblock != NULL) {
55
- memset(memblock, 0, size);
56
- }
57
- return memblock;
58
- }
59
-
60
- inline static void vecfree(void *memblock)
61
- {
62
- #ifdef _MSC_VER
63
- _aligned_free(memblock);
64
- #else
65
- free(memblock);
66
- #endif
67
- }
68
-
69
- #define fsigndiff(x, y) \
70
- ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
71
-
72
- #define vecset(x, c, n) \
73
- { \
74
- int i; \
75
- __m128d XMM0 = _mm_set1_pd(c); \
76
- for (i = 0;i < (n);i += 8) { \
77
- _mm_store_pd((x)+i , XMM0); \
78
- _mm_store_pd((x)+i+2, XMM0); \
79
- _mm_store_pd((x)+i+4, XMM0); \
80
- _mm_store_pd((x)+i+6, XMM0); \
81
- } \
82
- }
83
-
84
- #define veccpy(y, x, n) \
85
- { \
86
- int i; \
87
- for (i = 0;i < (n);i += 8) { \
88
- __m128d XMM0 = _mm_load_pd((x)+i ); \
89
- __m128d XMM1 = _mm_load_pd((x)+i+2); \
90
- __m128d XMM2 = _mm_load_pd((x)+i+4); \
91
- __m128d XMM3 = _mm_load_pd((x)+i+6); \
92
- _mm_store_pd((y)+i , XMM0); \
93
- _mm_store_pd((y)+i+2, XMM1); \
94
- _mm_store_pd((y)+i+4, XMM2); \
95
- _mm_store_pd((y)+i+6, XMM3); \
96
- } \
97
- }
98
-
99
- #define vecncpy(y, x, n) \
100
- { \
101
- int i; \
102
- for (i = 0;i < (n);i += 8) { \
103
- __m128d XMM0 = _mm_setzero_pd(); \
104
- __m128d XMM1 = _mm_setzero_pd(); \
105
- __m128d XMM2 = _mm_setzero_pd(); \
106
- __m128d XMM3 = _mm_setzero_pd(); \
107
- __m128d XMM4 = _mm_load_pd((x)+i ); \
108
- __m128d XMM5 = _mm_load_pd((x)+i+2); \
109
- __m128d XMM6 = _mm_load_pd((x)+i+4); \
110
- __m128d XMM7 = _mm_load_pd((x)+i+6); \
111
- XMM0 = _mm_sub_pd(XMM0, XMM4); \
112
- XMM1 = _mm_sub_pd(XMM1, XMM5); \
113
- XMM2 = _mm_sub_pd(XMM2, XMM6); \
114
- XMM3 = _mm_sub_pd(XMM3, XMM7); \
115
- _mm_store_pd((y)+i , XMM0); \
116
- _mm_store_pd((y)+i+2, XMM1); \
117
- _mm_store_pd((y)+i+4, XMM2); \
118
- _mm_store_pd((y)+i+6, XMM3); \
119
- } \
120
- }
121
-
122
- #define vecadd(y, x, c, n) \
123
- { \
124
- int i; \
125
- __m128d XMM7 = _mm_set1_pd(c); \
126
- for (i = 0;i < (n);i += 4) { \
127
- __m128d XMM0 = _mm_load_pd((x)+i ); \
128
- __m128d XMM1 = _mm_load_pd((x)+i+2); \
129
- __m128d XMM2 = _mm_load_pd((y)+i ); \
130
- __m128d XMM3 = _mm_load_pd((y)+i+2); \
131
- XMM0 = _mm_mul_pd(XMM0, XMM7); \
132
- XMM1 = _mm_mul_pd(XMM1, XMM7); \
133
- XMM2 = _mm_add_pd(XMM2, XMM0); \
134
- XMM3 = _mm_add_pd(XMM3, XMM1); \
135
- _mm_store_pd((y)+i , XMM2); \
136
- _mm_store_pd((y)+i+2, XMM3); \
137
- } \
138
- }
139
-
140
- #define vecdiff(z, x, y, n) \
141
- { \
142
- int i; \
143
- for (i = 0;i < (n);i += 8) { \
144
- __m128d XMM0 = _mm_load_pd((x)+i ); \
145
- __m128d XMM1 = _mm_load_pd((x)+i+2); \
146
- __m128d XMM2 = _mm_load_pd((x)+i+4); \
147
- __m128d XMM3 = _mm_load_pd((x)+i+6); \
148
- __m128d XMM4 = _mm_load_pd((y)+i ); \
149
- __m128d XMM5 = _mm_load_pd((y)+i+2); \
150
- __m128d XMM6 = _mm_load_pd((y)+i+4); \
151
- __m128d XMM7 = _mm_load_pd((y)+i+6); \
152
- XMM0 = _mm_sub_pd(XMM0, XMM4); \
153
- XMM1 = _mm_sub_pd(XMM1, XMM5); \
154
- XMM2 = _mm_sub_pd(XMM2, XMM6); \
155
- XMM3 = _mm_sub_pd(XMM3, XMM7); \
156
- _mm_store_pd((z)+i , XMM0); \
157
- _mm_store_pd((z)+i+2, XMM1); \
158
- _mm_store_pd((z)+i+4, XMM2); \
159
- _mm_store_pd((z)+i+6, XMM3); \
160
- } \
161
- }
162
-
163
- #define vecscale(y, c, n) \
164
- { \
165
- int i; \
166
- __m128d XMM7 = _mm_set1_pd(c); \
167
- for (i = 0;i < (n);i += 4) { \
168
- __m128d XMM0 = _mm_load_pd((y)+i ); \
169
- __m128d XMM1 = _mm_load_pd((y)+i+2); \
170
- XMM0 = _mm_mul_pd(XMM0, XMM7); \
171
- XMM1 = _mm_mul_pd(XMM1, XMM7); \
172
- _mm_store_pd((y)+i , XMM0); \
173
- _mm_store_pd((y)+i+2, XMM1); \
174
- } \
175
- }
176
-
177
- #define vecmul(y, x, n) \
178
- { \
179
- int i; \
180
- for (i = 0;i < (n);i += 8) { \
181
- __m128d XMM0 = _mm_load_pd((x)+i ); \
182
- __m128d XMM1 = _mm_load_pd((x)+i+2); \
183
- __m128d XMM2 = _mm_load_pd((x)+i+4); \
184
- __m128d XMM3 = _mm_load_pd((x)+i+6); \
185
- __m128d XMM4 = _mm_load_pd((y)+i ); \
186
- __m128d XMM5 = _mm_load_pd((y)+i+2); \
187
- __m128d XMM6 = _mm_load_pd((y)+i+4); \
188
- __m128d XMM7 = _mm_load_pd((y)+i+6); \
189
- XMM4 = _mm_mul_pd(XMM4, XMM0); \
190
- XMM5 = _mm_mul_pd(XMM5, XMM1); \
191
- XMM6 = _mm_mul_pd(XMM6, XMM2); \
192
- XMM7 = _mm_mul_pd(XMM7, XMM3); \
193
- _mm_store_pd((y)+i , XMM4); \
194
- _mm_store_pd((y)+i+2, XMM5); \
195
- _mm_store_pd((y)+i+4, XMM6); \
196
- _mm_store_pd((y)+i+6, XMM7); \
197
- } \
198
- }
199
-
200
-
201
-
202
- #if 3 <= __SSE__ || defined(__SSE3__)
203
- /*
204
- Horizontal add with haddps SSE3 instruction. The work register (rw)
205
- is unused.
206
- */
207
- #define __horizontal_sum(r, rw) \
208
- r = _mm_hadd_ps(r, r); \
209
- r = _mm_hadd_ps(r, r);
210
-
211
- #else
212
- /*
213
- Horizontal add with SSE instruction. The work register (rw) is used.
214
- */
215
- #define __horizontal_sum(r, rw) \
216
- rw = r; \
217
- r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
218
- r = _mm_add_ps(r, rw); \
219
- rw = r; \
220
- r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
221
- r = _mm_add_ps(r, rw);
222
-
223
- #endif
224
-
225
- #define vecdot(s, x, y, n) \
226
- { \
227
- int i; \
228
- __m128d XMM0 = _mm_setzero_pd(); \
229
- __m128d XMM1 = _mm_setzero_pd(); \
230
- __m128d XMM2, XMM3, XMM4, XMM5; \
231
- for (i = 0;i < (n);i += 4) { \
232
- XMM2 = _mm_load_pd((x)+i ); \
233
- XMM3 = _mm_load_pd((x)+i+2); \
234
- XMM4 = _mm_load_pd((y)+i ); \
235
- XMM5 = _mm_load_pd((y)+i+2); \
236
- XMM2 = _mm_mul_pd(XMM2, XMM4); \
237
- XMM3 = _mm_mul_pd(XMM3, XMM5); \
238
- XMM0 = _mm_add_pd(XMM0, XMM2); \
239
- XMM1 = _mm_add_pd(XMM1, XMM3); \
240
- } \
241
- XMM0 = _mm_add_pd(XMM0, XMM1); \
242
- XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
243
- XMM0 = _mm_add_pd(XMM0, XMM1); \
244
- _mm_store_sd((s), XMM0); \
245
- }
246
-
247
- #define vec2norm(s, x, n) \
248
- { \
249
- int i; \
250
- __m128d XMM0 = _mm_setzero_pd(); \
251
- __m128d XMM1 = _mm_setzero_pd(); \
252
- __m128d XMM2, XMM3, XMM4, XMM5; \
253
- for (i = 0;i < (n);i += 4) { \
254
- XMM2 = _mm_load_pd((x)+i ); \
255
- XMM3 = _mm_load_pd((x)+i+2); \
256
- XMM4 = XMM2; \
257
- XMM5 = XMM3; \
258
- XMM2 = _mm_mul_pd(XMM2, XMM4); \
259
- XMM3 = _mm_mul_pd(XMM3, XMM5); \
260
- XMM0 = _mm_add_pd(XMM0, XMM2); \
261
- XMM1 = _mm_add_pd(XMM1, XMM3); \
262
- } \
263
- XMM0 = _mm_add_pd(XMM0, XMM1); \
264
- XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
265
- XMM0 = _mm_add_pd(XMM0, XMM1); \
266
- XMM0 = _mm_sqrt_pd(XMM0); \
267
- _mm_store_sd((s), XMM0); \
268
- }
269
-
270
-
271
- #define vec2norminv(s, x, n) \
272
- { \
273
- int i; \
274
- __m128d XMM0 = _mm_setzero_pd(); \
275
- __m128d XMM1 = _mm_setzero_pd(); \
276
- __m128d XMM2, XMM3, XMM4, XMM5; \
277
- for (i = 0;i < (n);i += 4) { \
278
- XMM2 = _mm_load_pd((x)+i ); \
279
- XMM3 = _mm_load_pd((x)+i+2); \
280
- XMM4 = XMM2; \
281
- XMM5 = XMM3; \
282
- XMM2 = _mm_mul_pd(XMM2, XMM4); \
283
- XMM3 = _mm_mul_pd(XMM3, XMM5); \
284
- XMM0 = _mm_add_pd(XMM0, XMM2); \
285
- XMM1 = _mm_add_pd(XMM1, XMM3); \
286
- } \
287
- XMM2 = _mm_set1_pd(1.0); \
288
- XMM0 = _mm_add_pd(XMM0, XMM1); \
289
- XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
290
- XMM0 = _mm_add_pd(XMM0, XMM1); \
291
- XMM0 = _mm_sqrt_pd(XMM0); \
292
- XMM2 = _mm_div_pd(XMM2, XMM0); \
293
- _mm_store_sd((s), XMM2); \
294
- }
@@ -1,298 +0,0 @@
1
- /*
2
- * SSE/SSE3 implementation of vector oprations (32bit float).
3
- *
4
- * Copyright (c) 2007-2010 Naoaki Okazaki
5
- * All rights reserved.
6
- *
7
- * Permission is hereby granted, free of charge, to any person obtaining a copy
8
- * of this software and associated documentation files (the "Software"), to deal
9
- * in the Software without restriction, including without limitation the rights
10
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- * copies of the Software, and to permit persons to whom the Software is
12
- * furnished to do so, subject to the following conditions:
13
- *
14
- * The above copyright notice and this permission notice shall be included in
15
- * all copies or substantial portions of the Software.
16
- *
17
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- * THE SOFTWARE.
24
- */
25
-
26
- /* $Id$ */
27
-
28
- #include <stdlib.h>
29
- #ifndef __APPLE__
30
- #include <malloc.h>
31
- #endif
32
- #include <memory.h>
33
-
34
- #if 1400 <= _MSC_VER
35
- #include <intrin.h>
36
- #endif/*_MSC_VER*/
37
-
38
- #if HAVE_XMMINTRIN_H
39
- #include <xmmintrin.h>
40
- #endif/*HAVE_XMMINTRIN_H*/
41
-
42
- #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
43
- #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
44
- #else
45
- #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
46
- #endif/*LBFGS_IEEE_FLOAT*/
47
-
48
- inline static void* vecalloc(size_t size)
49
- {
50
- #if defined(_MSC_VER)
51
- void *memblock = _aligned_malloc(size, 16);
52
- #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
53
- void *memblock = malloc(size);
54
- #else
55
- void *memblock = NULL, *p = NULL;
56
- if (posix_memalign(&p, 16, size) == 0) {
57
- memblock = p;
58
- }
59
- #endif
60
- if (memblock != NULL) {
61
- memset(memblock, 0, size);
62
- }
63
- return memblock;
64
- }
65
-
66
- inline static void vecfree(void *memblock)
67
- {
68
- _aligned_free(memblock);
69
- }
70
-
71
- #define vecset(x, c, n) \
72
- { \
73
- int i; \
74
- __m128 XMM0 = _mm_set_ps1(c); \
75
- for (i = 0;i < (n);i += 16) { \
76
- _mm_store_ps((x)+i , XMM0); \
77
- _mm_store_ps((x)+i+ 4, XMM0); \
78
- _mm_store_ps((x)+i+ 8, XMM0); \
79
- _mm_store_ps((x)+i+12, XMM0); \
80
- } \
81
- }
82
-
83
- #define veccpy(y, x, n) \
84
- { \
85
- int i; \
86
- for (i = 0;i < (n);i += 16) { \
87
- __m128 XMM0 = _mm_load_ps((x)+i ); \
88
- __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
89
- __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
90
- __m128 XMM3 = _mm_load_ps((x)+i+12); \
91
- _mm_store_ps((y)+i , XMM0); \
92
- _mm_store_ps((y)+i+ 4, XMM1); \
93
- _mm_store_ps((y)+i+ 8, XMM2); \
94
- _mm_store_ps((y)+i+12, XMM3); \
95
- } \
96
- }
97
-
98
- #define vecncpy(y, x, n) \
99
- { \
100
- int i; \
101
- const uint32_t mask = 0x80000000; \
102
- __m128 XMM4 = _mm_load_ps1((float*)&mask); \
103
- for (i = 0;i < (n);i += 16) { \
104
- __m128 XMM0 = _mm_load_ps((x)+i ); \
105
- __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
106
- __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
107
- __m128 XMM3 = _mm_load_ps((x)+i+12); \
108
- XMM0 = _mm_xor_ps(XMM0, XMM4); \
109
- XMM1 = _mm_xor_ps(XMM1, XMM4); \
110
- XMM2 = _mm_xor_ps(XMM2, XMM4); \
111
- XMM3 = _mm_xor_ps(XMM3, XMM4); \
112
- _mm_store_ps((y)+i , XMM0); \
113
- _mm_store_ps((y)+i+ 4, XMM1); \
114
- _mm_store_ps((y)+i+ 8, XMM2); \
115
- _mm_store_ps((y)+i+12, XMM3); \
116
- } \
117
- }
118
-
119
- #define vecadd(y, x, c, n) \
120
- { \
121
- int i; \
122
- __m128 XMM7 = _mm_set_ps1(c); \
123
- for (i = 0;i < (n);i += 8) { \
124
- __m128 XMM0 = _mm_load_ps((x)+i ); \
125
- __m128 XMM1 = _mm_load_ps((x)+i+4); \
126
- __m128 XMM2 = _mm_load_ps((y)+i ); \
127
- __m128 XMM3 = _mm_load_ps((y)+i+4); \
128
- XMM0 = _mm_mul_ps(XMM0, XMM7); \
129
- XMM1 = _mm_mul_ps(XMM1, XMM7); \
130
- XMM2 = _mm_add_ps(XMM2, XMM0); \
131
- XMM3 = _mm_add_ps(XMM3, XMM1); \
132
- _mm_store_ps((y)+i , XMM2); \
133
- _mm_store_ps((y)+i+4, XMM3); \
134
- } \
135
- }
136
-
137
- #define vecdiff(z, x, y, n) \
138
- { \
139
- int i; \
140
- for (i = 0;i < (n);i += 16) { \
141
- __m128 XMM0 = _mm_load_ps((x)+i ); \
142
- __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
143
- __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
144
- __m128 XMM3 = _mm_load_ps((x)+i+12); \
145
- __m128 XMM4 = _mm_load_ps((y)+i ); \
146
- __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
147
- __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
148
- __m128 XMM7 = _mm_load_ps((y)+i+12); \
149
- XMM0 = _mm_sub_ps(XMM0, XMM4); \
150
- XMM1 = _mm_sub_ps(XMM1, XMM5); \
151
- XMM2 = _mm_sub_ps(XMM2, XMM6); \
152
- XMM3 = _mm_sub_ps(XMM3, XMM7); \
153
- _mm_store_ps((z)+i , XMM0); \
154
- _mm_store_ps((z)+i+ 4, XMM1); \
155
- _mm_store_ps((z)+i+ 8, XMM2); \
156
- _mm_store_ps((z)+i+12, XMM3); \
157
- } \
158
- }
159
-
160
- #define vecscale(y, c, n) \
161
- { \
162
- int i; \
163
- __m128 XMM7 = _mm_set_ps1(c); \
164
- for (i = 0;i < (n);i += 8) { \
165
- __m128 XMM0 = _mm_load_ps((y)+i ); \
166
- __m128 XMM1 = _mm_load_ps((y)+i+4); \
167
- XMM0 = _mm_mul_ps(XMM0, XMM7); \
168
- XMM1 = _mm_mul_ps(XMM1, XMM7); \
169
- _mm_store_ps((y)+i , XMM0); \
170
- _mm_store_ps((y)+i+4, XMM1); \
171
- } \
172
- }
173
-
174
- #define vecmul(y, x, n) \
175
- { \
176
- int i; \
177
- for (i = 0;i < (n);i += 16) { \
178
- __m128 XMM0 = _mm_load_ps((x)+i ); \
179
- __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
180
- __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
181
- __m128 XMM3 = _mm_load_ps((x)+i+12); \
182
- __m128 XMM4 = _mm_load_ps((y)+i ); \
183
- __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
184
- __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
185
- __m128 XMM7 = _mm_load_ps((y)+i+12); \
186
- XMM4 = _mm_mul_ps(XMM4, XMM0); \
187
- XMM5 = _mm_mul_ps(XMM5, XMM1); \
188
- XMM6 = _mm_mul_ps(XMM6, XMM2); \
189
- XMM7 = _mm_mul_ps(XMM7, XMM3); \
190
- _mm_store_ps((y)+i , XMM4); \
191
- _mm_store_ps((y)+i+ 4, XMM5); \
192
- _mm_store_ps((y)+i+ 8, XMM6); \
193
- _mm_store_ps((y)+i+12, XMM7); \
194
- } \
195
- }
196
-
197
-
198
-
199
- #if 3 <= __SSE__ || defined(__SSE3__)
200
- /*
201
- Horizontal add with haddps SSE3 instruction. The work register (rw)
202
- is unused.
203
- */
204
- #define __horizontal_sum(r, rw) \
205
- r = _mm_hadd_ps(r, r); \
206
- r = _mm_hadd_ps(r, r);
207
-
208
- #else
209
- /*
210
- Horizontal add with SSE instruction. The work register (rw) is used.
211
- */
212
- #define __horizontal_sum(r, rw) \
213
- rw = r; \
214
- r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
215
- r = _mm_add_ps(r, rw); \
216
- rw = r; \
217
- r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
218
- r = _mm_add_ps(r, rw);
219
-
220
- #endif
221
-
222
- #define vecdot(s, x, y, n) \
223
- { \
224
- int i; \
225
- __m128 XMM0 = _mm_setzero_ps(); \
226
- __m128 XMM1 = _mm_setzero_ps(); \
227
- __m128 XMM2, XMM3, XMM4, XMM5; \
228
- for (i = 0;i < (n);i += 8) { \
229
- XMM2 = _mm_load_ps((x)+i ); \
230
- XMM3 = _mm_load_ps((x)+i+4); \
231
- XMM4 = _mm_load_ps((y)+i ); \
232
- XMM5 = _mm_load_ps((y)+i+4); \
233
- XMM2 = _mm_mul_ps(XMM2, XMM4); \
234
- XMM3 = _mm_mul_ps(XMM3, XMM5); \
235
- XMM0 = _mm_add_ps(XMM0, XMM2); \
236
- XMM1 = _mm_add_ps(XMM1, XMM3); \
237
- } \
238
- XMM0 = _mm_add_ps(XMM0, XMM1); \
239
- __horizontal_sum(XMM0, XMM1); \
240
- _mm_store_ss((s), XMM0); \
241
- }
242
-
243
- #define vec2norm(s, x, n) \
244
- { \
245
- int i; \
246
- __m128 XMM0 = _mm_setzero_ps(); \
247
- __m128 XMM1 = _mm_setzero_ps(); \
248
- __m128 XMM2, XMM3; \
249
- for (i = 0;i < (n);i += 8) { \
250
- XMM2 = _mm_load_ps((x)+i ); \
251
- XMM3 = _mm_load_ps((x)+i+4); \
252
- XMM2 = _mm_mul_ps(XMM2, XMM2); \
253
- XMM3 = _mm_mul_ps(XMM3, XMM3); \
254
- XMM0 = _mm_add_ps(XMM0, XMM2); \
255
- XMM1 = _mm_add_ps(XMM1, XMM3); \
256
- } \
257
- XMM0 = _mm_add_ps(XMM0, XMM1); \
258
- __horizontal_sum(XMM0, XMM1); \
259
- XMM2 = XMM0; \
260
- XMM1 = _mm_rsqrt_ss(XMM0); \
261
- XMM3 = XMM1; \
262
- XMM1 = _mm_mul_ss(XMM1, XMM1); \
263
- XMM1 = _mm_mul_ss(XMM1, XMM3); \
264
- XMM1 = _mm_mul_ss(XMM1, XMM0); \
265
- XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
266
- XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
267
- XMM3 = _mm_add_ss(XMM3, XMM1); \
268
- XMM3 = _mm_mul_ss(XMM3, XMM2); \
269
- _mm_store_ss((s), XMM3); \
270
- }
271
-
272
- #define vec2norminv(s, x, n) \
273
- { \
274
- int i; \
275
- __m128 XMM0 = _mm_setzero_ps(); \
276
- __m128 XMM1 = _mm_setzero_ps(); \
277
- __m128 XMM2, XMM3; \
278
- for (i = 0;i < (n);i += 16) { \
279
- XMM2 = _mm_load_ps((x)+i ); \
280
- XMM3 = _mm_load_ps((x)+i+4); \
281
- XMM2 = _mm_mul_ps(XMM2, XMM2); \
282
- XMM3 = _mm_mul_ps(XMM3, XMM3); \
283
- XMM0 = _mm_add_ps(XMM0, XMM2); \
284
- XMM1 = _mm_add_ps(XMM1, XMM3); \
285
- } \
286
- XMM0 = _mm_add_ps(XMM0, XMM1); \
287
- __horizontal_sum(XMM0, XMM1); \
288
- XMM2 = XMM0; \
289
- XMM1 = _mm_rsqrt_ss(XMM0); \
290
- XMM3 = XMM1; \
291
- XMM1 = _mm_mul_ss(XMM1, XMM1); \
292
- XMM1 = _mm_mul_ss(XMM1, XMM3); \
293
- XMM1 = _mm_mul_ss(XMM1, XMM0); \
294
- XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
295
- XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
296
- XMM3 = _mm_add_ss(XMM3, XMM1); \
297
- _mm_store_ss((s), XMM3); \
298
- }