opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,133 @@
1
+ /*
2
+ * ANSI C implementation of vector operations.
3
+ *
4
+ * Copyright (c) 2007-2010 Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ * of this software and associated documentation files (the "Software"), to deal
9
+ * in the Software without restriction, including without limitation the rights
10
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the Software is
12
+ * furnished to do so, subject to the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be included in
15
+ * all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ * THE SOFTWARE.
24
+ */
25
+
26
+ /* $Id$ */
27
+
28
+ #include <stdlib.h>
29
+ #include <memory.h>
30
+
31
+ #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
32
+ #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
33
+ #else
34
+ #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
35
+ #endif/*LBFGS_IEEE_FLOAT*/
36
+
37
+ inline static void* vecalloc(size_t size)
38
+ {
39
+ void *memblock = malloc(size);
40
+ if (memblock) {
41
+ memset(memblock, 0, size);
42
+ }
43
+ return memblock;
44
+ }
45
+
46
+ inline static void vecfree(void *memblock)
47
+ {
48
+ free(memblock);
49
+ }
50
+
51
+ inline static void vecset(lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
52
+ {
53
+ int i;
54
+
55
+ for (i = 0;i < n;++i) {
56
+ x[i] = c;
57
+ }
58
+ }
59
+
60
+ inline static void veccpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
61
+ {
62
+ int i;
63
+
64
+ for (i = 0;i < n;++i) {
65
+ y[i] = x[i];
66
+ }
67
+ }
68
+
69
+ inline static void vecncpy(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
70
+ {
71
+ int i;
72
+
73
+ for (i = 0;i < n;++i) {
74
+ y[i] = -x[i];
75
+ }
76
+ }
77
+
78
+ inline static void vecadd(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const lbfgsfloatval_t c, const int n)
79
+ {
80
+ int i;
81
+
82
+ for (i = 0;i < n;++i) {
83
+ y[i] += c * x[i];
84
+ }
85
+ }
86
+
87
+ inline static void vecdiff(lbfgsfloatval_t *z, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
88
+ {
89
+ int i;
90
+
91
+ for (i = 0;i < n;++i) {
92
+ z[i] = x[i] - y[i];
93
+ }
94
+ }
95
+
96
+ inline static void vecscale(lbfgsfloatval_t *y, const lbfgsfloatval_t c, const int n)
97
+ {
98
+ int i;
99
+
100
+ for (i = 0;i < n;++i) {
101
+ y[i] *= c;
102
+ }
103
+ }
104
+
105
+ inline static void vecmul(lbfgsfloatval_t *y, const lbfgsfloatval_t *x, const int n)
106
+ {
107
+ int i;
108
+
109
+ for (i = 0;i < n;++i) {
110
+ y[i] *= x[i];
111
+ }
112
+ }
113
+
114
+ inline static void vecdot(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const lbfgsfloatval_t *y, const int n)
115
+ {
116
+ int i;
117
+ *s = 0.;
118
+ for (i = 0;i < n;++i) {
119
+ *s += x[i] * y[i];
120
+ }
121
+ }
122
+
123
+ inline static void vec2norm(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
124
+ {
125
+ vecdot(s, x, x, n);
126
+ *s = (lbfgsfloatval_t)sqrt(*s);
127
+ }
128
+
129
+ inline static void vec2norminv(lbfgsfloatval_t* s, const lbfgsfloatval_t *x, const int n)
130
+ {
131
+ vec2norm(s, x, n);
132
+ *s = (lbfgsfloatval_t)(1.0 / *s);
133
+ }
@@ -0,0 +1,294 @@
1
+ /*
2
+ * SSE2 implementation of vector oprations (64bit double).
3
+ *
4
+ * Copyright (c) 2007-2010 Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ * of this software and associated documentation files (the "Software"), to deal
9
+ * in the Software without restriction, including without limitation the rights
10
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the Software is
12
+ * furnished to do so, subject to the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be included in
15
+ * all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ * THE SOFTWARE.
24
+ */
25
+
26
+ /* $Id$ */
27
+
28
+ #include <stdlib.h>
29
+ #ifndef __APPLE__
30
+ #include <malloc.h>
31
+ #endif
32
+ #include <memory.h>
33
+
34
+ #if 1400 <= _MSC_VER
35
+ #include <intrin.h>
36
+ #endif/*1400 <= _MSC_VER*/
37
+
38
+ #if HAVE_EMMINTRIN_H
39
+ #include <emmintrin.h>
40
+ #endif/*HAVE_EMMINTRIN_H*/
41
+
42
+ inline static void* vecalloc(size_t size)
43
+ {
44
+ #if defined(_MSC_VER)
45
+ void *memblock = _aligned_malloc(size, 16);
46
+ #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
47
+ void *memblock = malloc(size);
48
+ #else
49
+ void *memblock = NULL, *p = NULL;
50
+ if (posix_memalign(&p, 16, size) == 0) {
51
+ memblock = p;
52
+ }
53
+ #endif
54
+ if (memblock != NULL) {
55
+ memset(memblock, 0, size);
56
+ }
57
+ return memblock;
58
+ }
59
+
60
+ inline static void vecfree(void *memblock)
61
+ {
62
+ #ifdef _MSC_VER
63
+ _aligned_free(memblock);
64
+ #else
65
+ free(memblock);
66
+ #endif
67
+ }
68
+
69
+ #define fsigndiff(x, y) \
70
+ ((_mm_movemask_pd(_mm_set_pd(*(x), *(y))) + 1) & 0x002)
71
+
72
+ #define vecset(x, c, n) \
73
+ { \
74
+ int i; \
75
+ __m128d XMM0 = _mm_set1_pd(c); \
76
+ for (i = 0;i < (n);i += 8) { \
77
+ _mm_store_pd((x)+i , XMM0); \
78
+ _mm_store_pd((x)+i+2, XMM0); \
79
+ _mm_store_pd((x)+i+4, XMM0); \
80
+ _mm_store_pd((x)+i+6, XMM0); \
81
+ } \
82
+ }
83
+
84
+ #define veccpy(y, x, n) \
85
+ { \
86
+ int i; \
87
+ for (i = 0;i < (n);i += 8) { \
88
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
89
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
90
+ __m128d XMM2 = _mm_load_pd((x)+i+4); \
91
+ __m128d XMM3 = _mm_load_pd((x)+i+6); \
92
+ _mm_store_pd((y)+i , XMM0); \
93
+ _mm_store_pd((y)+i+2, XMM1); \
94
+ _mm_store_pd((y)+i+4, XMM2); \
95
+ _mm_store_pd((y)+i+6, XMM3); \
96
+ } \
97
+ }
98
+
99
+ #define vecncpy(y, x, n) \
100
+ { \
101
+ int i; \
102
+ for (i = 0;i < (n);i += 8) { \
103
+ __m128d XMM0 = _mm_setzero_pd(); \
104
+ __m128d XMM1 = _mm_setzero_pd(); \
105
+ __m128d XMM2 = _mm_setzero_pd(); \
106
+ __m128d XMM3 = _mm_setzero_pd(); \
107
+ __m128d XMM4 = _mm_load_pd((x)+i ); \
108
+ __m128d XMM5 = _mm_load_pd((x)+i+2); \
109
+ __m128d XMM6 = _mm_load_pd((x)+i+4); \
110
+ __m128d XMM7 = _mm_load_pd((x)+i+6); \
111
+ XMM0 = _mm_sub_pd(XMM0, XMM4); \
112
+ XMM1 = _mm_sub_pd(XMM1, XMM5); \
113
+ XMM2 = _mm_sub_pd(XMM2, XMM6); \
114
+ XMM3 = _mm_sub_pd(XMM3, XMM7); \
115
+ _mm_store_pd((y)+i , XMM0); \
116
+ _mm_store_pd((y)+i+2, XMM1); \
117
+ _mm_store_pd((y)+i+4, XMM2); \
118
+ _mm_store_pd((y)+i+6, XMM3); \
119
+ } \
120
+ }
121
+
122
+ #define vecadd(y, x, c, n) \
123
+ { \
124
+ int i; \
125
+ __m128d XMM7 = _mm_set1_pd(c); \
126
+ for (i = 0;i < (n);i += 4) { \
127
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
128
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
129
+ __m128d XMM2 = _mm_load_pd((y)+i ); \
130
+ __m128d XMM3 = _mm_load_pd((y)+i+2); \
131
+ XMM0 = _mm_mul_pd(XMM0, XMM7); \
132
+ XMM1 = _mm_mul_pd(XMM1, XMM7); \
133
+ XMM2 = _mm_add_pd(XMM2, XMM0); \
134
+ XMM3 = _mm_add_pd(XMM3, XMM1); \
135
+ _mm_store_pd((y)+i , XMM2); \
136
+ _mm_store_pd((y)+i+2, XMM3); \
137
+ } \
138
+ }
139
+
140
+ #define vecdiff(z, x, y, n) \
141
+ { \
142
+ int i; \
143
+ for (i = 0;i < (n);i += 8) { \
144
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
145
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
146
+ __m128d XMM2 = _mm_load_pd((x)+i+4); \
147
+ __m128d XMM3 = _mm_load_pd((x)+i+6); \
148
+ __m128d XMM4 = _mm_load_pd((y)+i ); \
149
+ __m128d XMM5 = _mm_load_pd((y)+i+2); \
150
+ __m128d XMM6 = _mm_load_pd((y)+i+4); \
151
+ __m128d XMM7 = _mm_load_pd((y)+i+6); \
152
+ XMM0 = _mm_sub_pd(XMM0, XMM4); \
153
+ XMM1 = _mm_sub_pd(XMM1, XMM5); \
154
+ XMM2 = _mm_sub_pd(XMM2, XMM6); \
155
+ XMM3 = _mm_sub_pd(XMM3, XMM7); \
156
+ _mm_store_pd((z)+i , XMM0); \
157
+ _mm_store_pd((z)+i+2, XMM1); \
158
+ _mm_store_pd((z)+i+4, XMM2); \
159
+ _mm_store_pd((z)+i+6, XMM3); \
160
+ } \
161
+ }
162
+
163
+ #define vecscale(y, c, n) \
164
+ { \
165
+ int i; \
166
+ __m128d XMM7 = _mm_set1_pd(c); \
167
+ for (i = 0;i < (n);i += 4) { \
168
+ __m128d XMM0 = _mm_load_pd((y)+i ); \
169
+ __m128d XMM1 = _mm_load_pd((y)+i+2); \
170
+ XMM0 = _mm_mul_pd(XMM0, XMM7); \
171
+ XMM1 = _mm_mul_pd(XMM1, XMM7); \
172
+ _mm_store_pd((y)+i , XMM0); \
173
+ _mm_store_pd((y)+i+2, XMM1); \
174
+ } \
175
+ }
176
+
177
+ #define vecmul(y, x, n) \
178
+ { \
179
+ int i; \
180
+ for (i = 0;i < (n);i += 8) { \
181
+ __m128d XMM0 = _mm_load_pd((x)+i ); \
182
+ __m128d XMM1 = _mm_load_pd((x)+i+2); \
183
+ __m128d XMM2 = _mm_load_pd((x)+i+4); \
184
+ __m128d XMM3 = _mm_load_pd((x)+i+6); \
185
+ __m128d XMM4 = _mm_load_pd((y)+i ); \
186
+ __m128d XMM5 = _mm_load_pd((y)+i+2); \
187
+ __m128d XMM6 = _mm_load_pd((y)+i+4); \
188
+ __m128d XMM7 = _mm_load_pd((y)+i+6); \
189
+ XMM4 = _mm_mul_pd(XMM4, XMM0); \
190
+ XMM5 = _mm_mul_pd(XMM5, XMM1); \
191
+ XMM6 = _mm_mul_pd(XMM6, XMM2); \
192
+ XMM7 = _mm_mul_pd(XMM7, XMM3); \
193
+ _mm_store_pd((y)+i , XMM4); \
194
+ _mm_store_pd((y)+i+2, XMM5); \
195
+ _mm_store_pd((y)+i+4, XMM6); \
196
+ _mm_store_pd((y)+i+6, XMM7); \
197
+ } \
198
+ }
199
+
200
+
201
+
202
+ #if 3 <= __SSE__ || defined(__SSE3__)
203
+ /*
204
+ Horizontal add with haddps SSE3 instruction. The work register (rw)
205
+ is unused.
206
+ */
207
+ #define __horizontal_sum(r, rw) \
208
+ r = _mm_hadd_ps(r, r); \
209
+ r = _mm_hadd_ps(r, r);
210
+
211
+ #else
212
+ /*
213
+ Horizontal add with SSE instruction. The work register (rw) is used.
214
+ */
215
+ #define __horizontal_sum(r, rw) \
216
+ rw = r; \
217
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
218
+ r = _mm_add_ps(r, rw); \
219
+ rw = r; \
220
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
221
+ r = _mm_add_ps(r, rw);
222
+
223
+ #endif
224
+
225
+ #define vecdot(s, x, y, n) \
226
+ { \
227
+ int i; \
228
+ __m128d XMM0 = _mm_setzero_pd(); \
229
+ __m128d XMM1 = _mm_setzero_pd(); \
230
+ __m128d XMM2, XMM3, XMM4, XMM5; \
231
+ for (i = 0;i < (n);i += 4) { \
232
+ XMM2 = _mm_load_pd((x)+i ); \
233
+ XMM3 = _mm_load_pd((x)+i+2); \
234
+ XMM4 = _mm_load_pd((y)+i ); \
235
+ XMM5 = _mm_load_pd((y)+i+2); \
236
+ XMM2 = _mm_mul_pd(XMM2, XMM4); \
237
+ XMM3 = _mm_mul_pd(XMM3, XMM5); \
238
+ XMM0 = _mm_add_pd(XMM0, XMM2); \
239
+ XMM1 = _mm_add_pd(XMM1, XMM3); \
240
+ } \
241
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
242
+ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
243
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
244
+ _mm_store_sd((s), XMM0); \
245
+ }
246
+
247
+ #define vec2norm(s, x, n) \
248
+ { \
249
+ int i; \
250
+ __m128d XMM0 = _mm_setzero_pd(); \
251
+ __m128d XMM1 = _mm_setzero_pd(); \
252
+ __m128d XMM2, XMM3, XMM4, XMM5; \
253
+ for (i = 0;i < (n);i += 4) { \
254
+ XMM2 = _mm_load_pd((x)+i ); \
255
+ XMM3 = _mm_load_pd((x)+i+2); \
256
+ XMM4 = XMM2; \
257
+ XMM5 = XMM3; \
258
+ XMM2 = _mm_mul_pd(XMM2, XMM4); \
259
+ XMM3 = _mm_mul_pd(XMM3, XMM5); \
260
+ XMM0 = _mm_add_pd(XMM0, XMM2); \
261
+ XMM1 = _mm_add_pd(XMM1, XMM3); \
262
+ } \
263
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
264
+ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
265
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
266
+ XMM0 = _mm_sqrt_pd(XMM0); \
267
+ _mm_store_sd((s), XMM0); \
268
+ }
269
+
270
+
271
+ #define vec2norminv(s, x, n) \
272
+ { \
273
+ int i; \
274
+ __m128d XMM0 = _mm_setzero_pd(); \
275
+ __m128d XMM1 = _mm_setzero_pd(); \
276
+ __m128d XMM2, XMM3, XMM4, XMM5; \
277
+ for (i = 0;i < (n);i += 4) { \
278
+ XMM2 = _mm_load_pd((x)+i ); \
279
+ XMM3 = _mm_load_pd((x)+i+2); \
280
+ XMM4 = XMM2; \
281
+ XMM5 = XMM3; \
282
+ XMM2 = _mm_mul_pd(XMM2, XMM4); \
283
+ XMM3 = _mm_mul_pd(XMM3, XMM5); \
284
+ XMM0 = _mm_add_pd(XMM0, XMM2); \
285
+ XMM1 = _mm_add_pd(XMM1, XMM3); \
286
+ } \
287
+ XMM2 = _mm_set1_pd(1.0); \
288
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
289
+ XMM1 = _mm_shuffle_pd(XMM0, XMM0, _MM_SHUFFLE2(1, 1)); \
290
+ XMM0 = _mm_add_pd(XMM0, XMM1); \
291
+ XMM0 = _mm_sqrt_pd(XMM0); \
292
+ XMM2 = _mm_div_pd(XMM2, XMM0); \
293
+ _mm_store_sd((s), XMM2); \
294
+ }
@@ -0,0 +1,298 @@
1
+ /*
2
+ * SSE/SSE3 implementation of vector oprations (32bit float).
3
+ *
4
+ * Copyright (c) 2007-2010 Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
8
+ * of this software and associated documentation files (the "Software"), to deal
9
+ * in the Software without restriction, including without limitation the rights
10
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
+ * copies of the Software, and to permit persons to whom the Software is
12
+ * furnished to do so, subject to the following conditions:
13
+ *
14
+ * The above copyright notice and this permission notice shall be included in
15
+ * all copies or substantial portions of the Software.
16
+ *
17
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
+ * THE SOFTWARE.
24
+ */
25
+
26
+ /* $Id$ */
27
+
28
+ #include <stdlib.h>
29
+ #ifndef __APPLE__
30
+ #include <malloc.h>
31
+ #endif
32
+ #include <memory.h>
33
+
34
+ #if 1400 <= _MSC_VER
35
+ #include <intrin.h>
36
+ #endif/*_MSC_VER*/
37
+
38
+ #if HAVE_XMMINTRIN_H
39
+ #include <xmmintrin.h>
40
+ #endif/*HAVE_XMMINTRIN_H*/
41
+
42
+ #if LBFGS_FLOAT == 32 && LBFGS_IEEE_FLOAT
43
+ #define fsigndiff(x, y) (((*(uint32_t*)(x)) ^ (*(uint32_t*)(y))) & 0x80000000U)
44
+ #else
45
+ #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
46
+ #endif/*LBFGS_IEEE_FLOAT*/
47
+
48
+ inline static void* vecalloc(size_t size)
49
+ {
50
+ #if defined(_MSC_VER)
51
+ void *memblock = _aligned_malloc(size, 16);
52
+ #elif defined(__APPLE__) /* OS X always aligns on 16-byte boundaries */
53
+ void *memblock = malloc(size);
54
+ #else
55
+ void *memblock = NULL, *p = NULL;
56
+ if (posix_memalign(&p, 16, size) == 0) {
57
+ memblock = p;
58
+ }
59
+ #endif
60
+ if (memblock != NULL) {
61
+ memset(memblock, 0, size);
62
+ }
63
+ return memblock;
64
+ }
65
+
66
+ inline static void vecfree(void *memblock)
67
+ {
68
+ _aligned_free(memblock);
69
+ }
70
+
71
+ #define vecset(x, c, n) \
72
+ { \
73
+ int i; \
74
+ __m128 XMM0 = _mm_set_ps1(c); \
75
+ for (i = 0;i < (n);i += 16) { \
76
+ _mm_store_ps((x)+i , XMM0); \
77
+ _mm_store_ps((x)+i+ 4, XMM0); \
78
+ _mm_store_ps((x)+i+ 8, XMM0); \
79
+ _mm_store_ps((x)+i+12, XMM0); \
80
+ } \
81
+ }
82
+
83
+ #define veccpy(y, x, n) \
84
+ { \
85
+ int i; \
86
+ for (i = 0;i < (n);i += 16) { \
87
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
88
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
89
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
90
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
91
+ _mm_store_ps((y)+i , XMM0); \
92
+ _mm_store_ps((y)+i+ 4, XMM1); \
93
+ _mm_store_ps((y)+i+ 8, XMM2); \
94
+ _mm_store_ps((y)+i+12, XMM3); \
95
+ } \
96
+ }
97
+
98
+ #define vecncpy(y, x, n) \
99
+ { \
100
+ int i; \
101
+ const uint32_t mask = 0x80000000; \
102
+ __m128 XMM4 = _mm_load_ps1((float*)&mask); \
103
+ for (i = 0;i < (n);i += 16) { \
104
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
105
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
106
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
107
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
108
+ XMM0 = _mm_xor_ps(XMM0, XMM4); \
109
+ XMM1 = _mm_xor_ps(XMM1, XMM4); \
110
+ XMM2 = _mm_xor_ps(XMM2, XMM4); \
111
+ XMM3 = _mm_xor_ps(XMM3, XMM4); \
112
+ _mm_store_ps((y)+i , XMM0); \
113
+ _mm_store_ps((y)+i+ 4, XMM1); \
114
+ _mm_store_ps((y)+i+ 8, XMM2); \
115
+ _mm_store_ps((y)+i+12, XMM3); \
116
+ } \
117
+ }
118
+
119
+ #define vecadd(y, x, c, n) \
120
+ { \
121
+ int i; \
122
+ __m128 XMM7 = _mm_set_ps1(c); \
123
+ for (i = 0;i < (n);i += 8) { \
124
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
125
+ __m128 XMM1 = _mm_load_ps((x)+i+4); \
126
+ __m128 XMM2 = _mm_load_ps((y)+i ); \
127
+ __m128 XMM3 = _mm_load_ps((y)+i+4); \
128
+ XMM0 = _mm_mul_ps(XMM0, XMM7); \
129
+ XMM1 = _mm_mul_ps(XMM1, XMM7); \
130
+ XMM2 = _mm_add_ps(XMM2, XMM0); \
131
+ XMM3 = _mm_add_ps(XMM3, XMM1); \
132
+ _mm_store_ps((y)+i , XMM2); \
133
+ _mm_store_ps((y)+i+4, XMM3); \
134
+ } \
135
+ }
136
+
137
+ #define vecdiff(z, x, y, n) \
138
+ { \
139
+ int i; \
140
+ for (i = 0;i < (n);i += 16) { \
141
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
142
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
143
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
144
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
145
+ __m128 XMM4 = _mm_load_ps((y)+i ); \
146
+ __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
147
+ __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
148
+ __m128 XMM7 = _mm_load_ps((y)+i+12); \
149
+ XMM0 = _mm_sub_ps(XMM0, XMM4); \
150
+ XMM1 = _mm_sub_ps(XMM1, XMM5); \
151
+ XMM2 = _mm_sub_ps(XMM2, XMM6); \
152
+ XMM3 = _mm_sub_ps(XMM3, XMM7); \
153
+ _mm_store_ps((z)+i , XMM0); \
154
+ _mm_store_ps((z)+i+ 4, XMM1); \
155
+ _mm_store_ps((z)+i+ 8, XMM2); \
156
+ _mm_store_ps((z)+i+12, XMM3); \
157
+ } \
158
+ }
159
+
160
+ #define vecscale(y, c, n) \
161
+ { \
162
+ int i; \
163
+ __m128 XMM7 = _mm_set_ps1(c); \
164
+ for (i = 0;i < (n);i += 8) { \
165
+ __m128 XMM0 = _mm_load_ps((y)+i ); \
166
+ __m128 XMM1 = _mm_load_ps((y)+i+4); \
167
+ XMM0 = _mm_mul_ps(XMM0, XMM7); \
168
+ XMM1 = _mm_mul_ps(XMM1, XMM7); \
169
+ _mm_store_ps((y)+i , XMM0); \
170
+ _mm_store_ps((y)+i+4, XMM1); \
171
+ } \
172
+ }
173
+
174
+ #define vecmul(y, x, n) \
175
+ { \
176
+ int i; \
177
+ for (i = 0;i < (n);i += 16) { \
178
+ __m128 XMM0 = _mm_load_ps((x)+i ); \
179
+ __m128 XMM1 = _mm_load_ps((x)+i+ 4); \
180
+ __m128 XMM2 = _mm_load_ps((x)+i+ 8); \
181
+ __m128 XMM3 = _mm_load_ps((x)+i+12); \
182
+ __m128 XMM4 = _mm_load_ps((y)+i ); \
183
+ __m128 XMM5 = _mm_load_ps((y)+i+ 4); \
184
+ __m128 XMM6 = _mm_load_ps((y)+i+ 8); \
185
+ __m128 XMM7 = _mm_load_ps((y)+i+12); \
186
+ XMM4 = _mm_mul_ps(XMM4, XMM0); \
187
+ XMM5 = _mm_mul_ps(XMM5, XMM1); \
188
+ XMM6 = _mm_mul_ps(XMM6, XMM2); \
189
+ XMM7 = _mm_mul_ps(XMM7, XMM3); \
190
+ _mm_store_ps((y)+i , XMM4); \
191
+ _mm_store_ps((y)+i+ 4, XMM5); \
192
+ _mm_store_ps((y)+i+ 8, XMM6); \
193
+ _mm_store_ps((y)+i+12, XMM7); \
194
+ } \
195
+ }
196
+
197
+
198
+
199
+ #if 3 <= __SSE__ || defined(__SSE3__)
200
+ /*
201
+ Horizontal add with haddps SSE3 instruction. The work register (rw)
202
+ is unused.
203
+ */
204
+ #define __horizontal_sum(r, rw) \
205
+ r = _mm_hadd_ps(r, r); \
206
+ r = _mm_hadd_ps(r, r);
207
+
208
+ #else
209
+ /*
210
+ Horizontal add with SSE instruction. The work register (rw) is used.
211
+ */
212
+ #define __horizontal_sum(r, rw) \
213
+ rw = r; \
214
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(1, 0, 3, 2)); \
215
+ r = _mm_add_ps(r, rw); \
216
+ rw = r; \
217
+ r = _mm_shuffle_ps(r, rw, _MM_SHUFFLE(2, 3, 0, 1)); \
218
+ r = _mm_add_ps(r, rw);
219
+
220
+ #endif
221
+
222
+ #define vecdot(s, x, y, n) \
223
+ { \
224
+ int i; \
225
+ __m128 XMM0 = _mm_setzero_ps(); \
226
+ __m128 XMM1 = _mm_setzero_ps(); \
227
+ __m128 XMM2, XMM3, XMM4, XMM5; \
228
+ for (i = 0;i < (n);i += 8) { \
229
+ XMM2 = _mm_load_ps((x)+i ); \
230
+ XMM3 = _mm_load_ps((x)+i+4); \
231
+ XMM4 = _mm_load_ps((y)+i ); \
232
+ XMM5 = _mm_load_ps((y)+i+4); \
233
+ XMM2 = _mm_mul_ps(XMM2, XMM4); \
234
+ XMM3 = _mm_mul_ps(XMM3, XMM5); \
235
+ XMM0 = _mm_add_ps(XMM0, XMM2); \
236
+ XMM1 = _mm_add_ps(XMM1, XMM3); \
237
+ } \
238
+ XMM0 = _mm_add_ps(XMM0, XMM1); \
239
+ __horizontal_sum(XMM0, XMM1); \
240
+ _mm_store_ss((s), XMM0); \
241
+ }
242
+
243
+ #define vec2norm(s, x, n) \
244
+ { \
245
+ int i; \
246
+ __m128 XMM0 = _mm_setzero_ps(); \
247
+ __m128 XMM1 = _mm_setzero_ps(); \
248
+ __m128 XMM2, XMM3; \
249
+ for (i = 0;i < (n);i += 8) { \
250
+ XMM2 = _mm_load_ps((x)+i ); \
251
+ XMM3 = _mm_load_ps((x)+i+4); \
252
+ XMM2 = _mm_mul_ps(XMM2, XMM2); \
253
+ XMM3 = _mm_mul_ps(XMM3, XMM3); \
254
+ XMM0 = _mm_add_ps(XMM0, XMM2); \
255
+ XMM1 = _mm_add_ps(XMM1, XMM3); \
256
+ } \
257
+ XMM0 = _mm_add_ps(XMM0, XMM1); \
258
+ __horizontal_sum(XMM0, XMM1); \
259
+ XMM2 = XMM0; \
260
+ XMM1 = _mm_rsqrt_ss(XMM0); \
261
+ XMM3 = XMM1; \
262
+ XMM1 = _mm_mul_ss(XMM1, XMM1); \
263
+ XMM1 = _mm_mul_ss(XMM1, XMM3); \
264
+ XMM1 = _mm_mul_ss(XMM1, XMM0); \
265
+ XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
266
+ XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
267
+ XMM3 = _mm_add_ss(XMM3, XMM1); \
268
+ XMM3 = _mm_mul_ss(XMM3, XMM2); \
269
+ _mm_store_ss((s), XMM3); \
270
+ }
271
+
272
+ #define vec2norminv(s, x, n) \
273
+ { \
274
+ int i; \
275
+ __m128 XMM0 = _mm_setzero_ps(); \
276
+ __m128 XMM1 = _mm_setzero_ps(); \
277
+ __m128 XMM2, XMM3; \
278
+ for (i = 0;i < (n);i += 16) { \
279
+ XMM2 = _mm_load_ps((x)+i ); \
280
+ XMM3 = _mm_load_ps((x)+i+4); \
281
+ XMM2 = _mm_mul_ps(XMM2, XMM2); \
282
+ XMM3 = _mm_mul_ps(XMM3, XMM3); \
283
+ XMM0 = _mm_add_ps(XMM0, XMM2); \
284
+ XMM1 = _mm_add_ps(XMM1, XMM3); \
285
+ } \
286
+ XMM0 = _mm_add_ps(XMM0, XMM1); \
287
+ __horizontal_sum(XMM0, XMM1); \
288
+ XMM2 = XMM0; \
289
+ XMM1 = _mm_rsqrt_ss(XMM0); \
290
+ XMM3 = XMM1; \
291
+ XMM1 = _mm_mul_ss(XMM1, XMM1); \
292
+ XMM1 = _mm_mul_ss(XMM1, XMM3); \
293
+ XMM1 = _mm_mul_ss(XMM1, XMM0); \
294
+ XMM1 = _mm_mul_ss(XMM1, _mm_set_ss(-0.5f)); \
295
+ XMM3 = _mm_mul_ss(XMM3, _mm_set_ss(1.5f)); \
296
+ XMM3 = _mm_add_ss(XMM3, XMM1); \
297
+ _mm_store_ss((s), XMM3); \
298
+ }