opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,976 @@
1
+ /*
2
+ -------------------------------------------------------------------------------
3
+ lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4
+
5
+ These are functions for producing 32-bit hashes for hash table lookup.
6
+ hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7
+ are externally useful functions. Routines to test the hash are included
8
+ if SELF_TEST is defined. You can use this free for any purpose. It's in
9
+ the public domain. It has no warranty.
10
+
11
+ You probably want to use hashlittle(). hashlittle() and hashbig()
12
+ hash byte arrays. hashlittle() is is faster than hashbig() on
13
+ little-endian machines. Intel and AMD are little-endian machines.
14
+ On second thought, you probably want hashlittle2(), which is identical to
15
+ hashlittle() except it returns two 32-bit hashes for the price of one.
16
+ You could implement hashbig2() if you wanted but I haven't bothered here.
17
+
18
+ If you want to find a hash of, say, exactly 7 integers, do
19
+ a = i1; b = i2; c = i3;
20
+ mix(a,b,c);
21
+ a += i4; b += i5; c += i6;
22
+ mix(a,b,c);
23
+ a += i7;
24
+ final(a,b,c);
25
+ then use c as the hash value. If you have a variable length array of
26
+ 4-byte integers to hash, use hashword(). If you have a byte array (like
27
+ a character string), use hashlittle(). If you have several byte arrays, or
28
+ a mix of things, see the comments above hashlittle().
29
+
30
+ Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31
+ then mix those integers. This is fast (you can do a lot more thorough
32
+ mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33
+ on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34
+ -------------------------------------------------------------------------------
35
+ */
36
+ //#define SELF_TEST 1
37
+
38
+ #include <stdio.h> /* defines printf for tests */
39
+ #include <time.h> /* defines time_t for timings in the test */
40
+ #include <stdint.h> /* defines uint32_t etc */
41
+ //#include <sys/param.h> /* attempt to define endianness */
42
+ #ifdef linux
43
+ # include <endian.h> /* attempt to define endianness */
44
+ #endif
45
+
46
+ /*
47
+ * My best guess at if you are big-endian or little-endian. This may
48
+ * need adjustment.
49
+ */
50
+ #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
51
+ __BYTE_ORDER == __LITTLE_ENDIAN) || \
52
+ (defined(i386) || defined(__i386__) || defined(__i486__) || \
53
+ defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
54
+ # define HASH_LITTLE_ENDIAN 1
55
+ # define HASH_BIG_ENDIAN 0
56
+ #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
57
+ __BYTE_ORDER == __BIG_ENDIAN) || \
58
+ (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
59
+ # define HASH_LITTLE_ENDIAN 0
60
+ # define HASH_BIG_ENDIAN 1
61
+ #else
62
+ # define HASH_LITTLE_ENDIAN 0
63
+ # define HASH_BIG_ENDIAN 0
64
+ #endif
65
+
66
+ #define hashsize(n) ((uint32_t)1<<(n))
67
+ #define hashmask(n) (hashsize(n)-1)
68
+ #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
69
+
70
+ /*
71
+ -------------------------------------------------------------------------------
72
+ mix -- mix 3 32-bit values reversibly.
73
+
74
+ This is reversible, so any information in (a,b,c) before mix() is
75
+ still in (a,b,c) after mix().
76
+
77
+ If four pairs of (a,b,c) inputs are run through mix(), or through
78
+ mix() in reverse, there are at least 32 bits of the output that
79
+ are sometimes the same for one pair and different for another pair.
80
+ This was tested for:
81
+ * pairs that differed by one bit, by two bits, in any combination
82
+ of top bits of (a,b,c), or in any combination of bottom bits of
83
+ (a,b,c).
84
+ * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
85
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
86
+ is commonly produced by subtraction) look like a single 1-bit
87
+ difference.
88
+ * the base values were pseudorandom, all zero but one bit set, or
89
+ all zero plus a counter that starts at zero.
90
+
91
+ Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
92
+ satisfy this are
93
+ 4 6 8 16 19 4
94
+ 9 15 3 18 27 15
95
+ 14 9 3 7 17 3
96
+ Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
97
+ for "differ" defined as + with a one-bit base and a two-bit delta. I
98
+ used http://burtleburtle.net/bob/hash/avalanche.html to choose
99
+ the operations, constants, and arrangements of the variables.
100
+
101
+ This does not achieve avalanche. There are input bits of (a,b,c)
102
+ that fail to affect some output bits of (a,b,c), especially of a. The
103
+ most thoroughly mixed value is c, but it doesn't really even achieve
104
+ avalanche in c.
105
+
106
+ This allows some parallelism. Read-after-writes are good at doubling
107
+ the number of bits affected, so the goal of mixing pulls in the opposite
108
+ direction as the goal of parallelism. I did what I could. Rotates
109
+ seem to cost as much as shifts on every machine I could lay my hands
110
+ on, and rotates are much kinder to the top and bottom bits, so I used
111
+ rotates.
112
+ -------------------------------------------------------------------------------
113
+ */
114
+ #define mix(a,b,c) \
115
+ { \
116
+ a -= c; a ^= rot(c, 4); c += b; \
117
+ b -= a; b ^= rot(a, 6); a += c; \
118
+ c -= b; c ^= rot(b, 8); b += a; \
119
+ a -= c; a ^= rot(c,16); c += b; \
120
+ b -= a; b ^= rot(a,19); a += c; \
121
+ c -= b; c ^= rot(b, 4); b += a; \
122
+ }
123
+
124
+ /*
125
+ -------------------------------------------------------------------------------
126
+ final -- final mixing of 3 32-bit values (a,b,c) into c
127
+
128
+ Pairs of (a,b,c) values differing in only a few bits will usually
129
+ produce values of c that look totally different. This was tested for
130
+ * pairs that differed by one bit, by two bits, in any combination
131
+ of top bits of (a,b,c), or in any combination of bottom bits of
132
+ (a,b,c).
133
+ * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
134
+ the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
135
+ is commonly produced by subtraction) look like a single 1-bit
136
+ difference.
137
+ * the base values were pseudorandom, all zero but one bit set, or
138
+ all zero plus a counter that starts at zero.
139
+
140
+ These constants passed:
141
+ 14 11 25 16 4 14 24
142
+ 12 14 25 16 4 14 24
143
+ and these came close:
144
+ 4 8 15 26 3 22 24
145
+ 10 8 15 26 3 22 24
146
+ 11 8 15 26 3 22 24
147
+ -------------------------------------------------------------------------------
148
+ */
149
+ #define final(a,b,c) \
150
+ { \
151
+ c ^= b; c -= rot(b,14); \
152
+ a ^= c; a -= rot(c,11); \
153
+ b ^= a; b -= rot(a,25); \
154
+ c ^= b; c -= rot(b,16); \
155
+ a ^= c; a -= rot(c,4); \
156
+ b ^= a; b -= rot(a,14); \
157
+ c ^= b; c -= rot(b,24); \
158
+ }
159
+
160
+ /*
161
+ --------------------------------------------------------------------
162
+ This works on all machines. To be useful, it requires
163
+ -- that the key be an array of uint32_t's, and
164
+ -- that the length be the number of uint32_t's in the key
165
+
166
+ The function hashword() is identical to hashlittle() on little-endian
167
+ machines, and identical to hashbig() on big-endian machines,
168
+ except that the length has to be measured in uint32_ts rather than in
169
+ bytes. hashlittle() is more complicated than hashword() only because
170
+ hashlittle() has to dance around fitting the key bytes into registers.
171
+ --------------------------------------------------------------------
172
+ */
173
+ uint32_t hashword(
174
+ const uint32_t *k, /* the key, an array of uint32_t values */
175
+ size_t length, /* the length of the key, in uint32_ts */
176
+ uint32_t initval) /* the previous hash, or an arbitrary value */
177
+ {
178
+ uint32_t a,b,c;
179
+
180
+ /* Set up the internal state */
181
+ a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
182
+
183
+ /*------------------------------------------------- handle most of the key */
184
+ while (length > 3)
185
+ {
186
+ a += k[0];
187
+ b += k[1];
188
+ c += k[2];
189
+ mix(a,b,c);
190
+ length -= 3;
191
+ k += 3;
192
+ }
193
+
194
+ /*------------------------------------------- handle the last 3 uint32_t's */
195
+ switch(length) /* all the case statements fall through */
196
+ {
197
+ case 3 : c+=k[2];
198
+ case 2 : b+=k[1];
199
+ case 1 : a+=k[0];
200
+ final(a,b,c);
201
+ case 0: /* case 0: nothing left to add */
202
+ break;
203
+ }
204
+ /*------------------------------------------------------ report the result */
205
+ return c;
206
+ }
207
+
208
+
209
+ /*
210
+ --------------------------------------------------------------------
211
+ hashword2() -- same as hashword(), but take two seeds and return two
212
+ 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
213
+ both be initialized with seeds. If you pass in (*pb)==0, the output
214
+ (*pc) will be the same as the return value from hashword().
215
+ --------------------------------------------------------------------
216
+ */
217
+ void hashword2 (
218
+ const uint32_t *k, /* the key, an array of uint32_t values */
219
+ size_t length, /* the length of the key, in uint32_ts */
220
+ uint32_t *pc, /* IN: seed OUT: primary hash value */
221
+ uint32_t *pb) /* IN: more seed OUT: secondary hash value */
222
+ {
223
+ uint32_t a,b,c;
224
+
225
+ /* Set up the internal state */
226
+ a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
227
+ c += *pb;
228
+
229
+ /*------------------------------------------------- handle most of the key */
230
+ while (length > 3)
231
+ {
232
+ a += k[0];
233
+ b += k[1];
234
+ c += k[2];
235
+ mix(a,b,c);
236
+ length -= 3;
237
+ k += 3;
238
+ }
239
+
240
+ /*------------------------------------------- handle the last 3 uint32_t's */
241
+ switch(length) /* all the case statements fall through */
242
+ {
243
+ case 3 : c+=k[2];
244
+ case 2 : b+=k[1];
245
+ case 1 : a+=k[0];
246
+ final(a,b,c);
247
+ case 0: /* case 0: nothing left to add */
248
+ break;
249
+ }
250
+ /*------------------------------------------------------ report the result */
251
+ *pc=c; *pb=b;
252
+ }
253
+
254
+
255
+ /*
256
+ -------------------------------------------------------------------------------
257
+ hashlittle() -- hash a variable-length key into a 32-bit value
258
+ k : the key (the unaligned variable-length array of bytes)
259
+ length : the length of the key, counting by bytes
260
+ initval : can be any 4-byte value
261
+ Returns a 32-bit value. Every bit of the key affects every bit of
262
+ the return value. Two keys differing by one or two bits will have
263
+ totally different hash values.
264
+
265
+ The best hash table sizes are powers of 2. There is no need to do
266
+ mod a prime (mod is sooo slow!). If you need less than 32 bits,
267
+ use a bitmask. For example, if you need only 10 bits, do
268
+ h = (h & hashmask(10));
269
+ In which case, the hash table should have hashsize(10) elements.
270
+
271
+ If you are hashing n strings (uint8_t **)k, do it like this:
272
+ for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
273
+
274
+ By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
275
+ code any way you wish, private, educational, or commercial. It's free.
276
+
277
+ Use for hash table lookup, or anything where one collision in 2^^32 is
278
+ acceptable. Do NOT use for cryptographic purposes.
279
+ -------------------------------------------------------------------------------
280
+ */
281
+
282
+ uint32_t hashlittle( const void *key, size_t length, uint32_t initval)
283
+ {
284
+ uint32_t a,b,c; /* internal state */
285
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
286
+
287
+ /* Set up the internal state */
288
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
289
+
290
+ u.ptr = key;
291
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
292
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
293
+
294
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
295
+ while (length > 12)
296
+ {
297
+ a += k[0];
298
+ b += k[1];
299
+ c += k[2];
300
+ mix(a,b,c);
301
+ length -= 12;
302
+ k += 3;
303
+ }
304
+
305
+ /*----------------------------- handle the last (probably partial) block */
306
+ /*
307
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
308
+ * then masks off the part it's not allowed to read. Because the
309
+ * string is aligned, the masked-off tail is in the same word as the
310
+ * rest of the string. Every machine with memory protection I've seen
311
+ * does it on word boundaries, so is OK with this. But VALGRIND will
312
+ * still catch it and complain. The masking trick does make the hash
313
+ * noticably faster for short strings (like English words).
314
+ */
315
+ #ifndef VALGRIND
316
+
317
+ switch(length)
318
+ {
319
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
320
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
321
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
322
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
323
+ case 8 : b+=k[1]; a+=k[0]; break;
324
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
325
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
326
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
327
+ case 4 : a+=k[0]; break;
328
+ case 3 : a+=k[0]&0xffffff; break;
329
+ case 2 : a+=k[0]&0xffff; break;
330
+ case 1 : a+=k[0]&0xff; break;
331
+ case 0 : return c; /* zero length strings require no mixing */
332
+ }
333
+
334
+ #else /* make valgrind happy */
335
+
336
+ k8 = (const uint8_t *)k;
337
+ switch(length)
338
+ {
339
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
340
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
341
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
342
+ case 9 : c+=k8[8]; /* fall through */
343
+ case 8 : b+=k[1]; a+=k[0]; break;
344
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
345
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
346
+ case 5 : b+=k8[4]; /* fall through */
347
+ case 4 : a+=k[0]; break;
348
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
349
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
350
+ case 1 : a+=k8[0]; break;
351
+ case 0 : return c;
352
+ }
353
+
354
+ #endif /* !valgrind */
355
+
356
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
357
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
358
+ const uint8_t *k8;
359
+
360
+ /*--------------- all but last block: aligned reads and different mixing */
361
+ while (length > 12)
362
+ {
363
+ a += k[0] + (((uint32_t)k[1])<<16);
364
+ b += k[2] + (((uint32_t)k[3])<<16);
365
+ c += k[4] + (((uint32_t)k[5])<<16);
366
+ mix(a,b,c);
367
+ length -= 12;
368
+ k += 6;
369
+ }
370
+
371
+ /*----------------------------- handle the last (probably partial) block */
372
+ k8 = (const uint8_t *)k;
373
+ switch(length)
374
+ {
375
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
376
+ b+=k[2]+(((uint32_t)k[3])<<16);
377
+ a+=k[0]+(((uint32_t)k[1])<<16);
378
+ break;
379
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
380
+ case 10: c+=k[4];
381
+ b+=k[2]+(((uint32_t)k[3])<<16);
382
+ a+=k[0]+(((uint32_t)k[1])<<16);
383
+ break;
384
+ case 9 : c+=k8[8]; /* fall through */
385
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
386
+ a+=k[0]+(((uint32_t)k[1])<<16);
387
+ break;
388
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
389
+ case 6 : b+=k[2];
390
+ a+=k[0]+(((uint32_t)k[1])<<16);
391
+ break;
392
+ case 5 : b+=k8[4]; /* fall through */
393
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
394
+ break;
395
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
396
+ case 2 : a+=k[0];
397
+ break;
398
+ case 1 : a+=k8[0];
399
+ break;
400
+ case 0 : return c; /* zero length requires no mixing */
401
+ }
402
+
403
+ } else { /* need to read the key one byte at a time */
404
+ const uint8_t *k = (const uint8_t *)key;
405
+
406
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
407
+ while (length > 12)
408
+ {
409
+ a += k[0];
410
+ a += ((uint32_t)k[1])<<8;
411
+ a += ((uint32_t)k[2])<<16;
412
+ a += ((uint32_t)k[3])<<24;
413
+ b += k[4];
414
+ b += ((uint32_t)k[5])<<8;
415
+ b += ((uint32_t)k[6])<<16;
416
+ b += ((uint32_t)k[7])<<24;
417
+ c += k[8];
418
+ c += ((uint32_t)k[9])<<8;
419
+ c += ((uint32_t)k[10])<<16;
420
+ c += ((uint32_t)k[11])<<24;
421
+ mix(a,b,c);
422
+ length -= 12;
423
+ k += 12;
424
+ }
425
+
426
+ /*-------------------------------- last block: affect all 32 bits of (c) */
427
+ switch(length) /* all the case statements fall through */
428
+ {
429
+ case 12: c+=((uint32_t)k[11])<<24;
430
+ case 11: c+=((uint32_t)k[10])<<16;
431
+ case 10: c+=((uint32_t)k[9])<<8;
432
+ case 9 : c+=k[8];
433
+ case 8 : b+=((uint32_t)k[7])<<24;
434
+ case 7 : b+=((uint32_t)k[6])<<16;
435
+ case 6 : b+=((uint32_t)k[5])<<8;
436
+ case 5 : b+=k[4];
437
+ case 4 : a+=((uint32_t)k[3])<<24;
438
+ case 3 : a+=((uint32_t)k[2])<<16;
439
+ case 2 : a+=((uint32_t)k[1])<<8;
440
+ case 1 : a+=k[0];
441
+ break;
442
+ case 0 : return c;
443
+ }
444
+ }
445
+
446
+ final(a,b,c);
447
+ return c;
448
+ }
449
+
450
+
451
+ /*
452
+ * hashlittle2: return 2 32-bit hash values
453
+ *
454
+ * This is identical to hashlittle(), except it returns two 32-bit hash
455
+ * values instead of just one. This is good enough for hash table
456
+ * lookup with 2^^64 buckets, or if you want a second hash if you're not
457
+ * happy with the first, or if you want a probably-unique 64-bit ID for
458
+ * the key. *pc is better mixed than *pb, so use *pc first. If you want
459
+ * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
460
+ */
461
+ void hashlittle2(
462
+ const void *key, /* the key to hash */
463
+ size_t length, /* length of the key */
464
+ uint32_t *pc, /* IN: primary initval, OUT: primary hash */
465
+ uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
466
+ {
467
+ uint32_t a,b,c; /* internal state */
468
+ union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
469
+
470
+ /* Set up the internal state */
471
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
472
+ c += *pb;
473
+
474
+ u.ptr = key;
475
+ if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
476
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
477
+
478
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
479
+ while (length > 12)
480
+ {
481
+ a += k[0];
482
+ b += k[1];
483
+ c += k[2];
484
+ mix(a,b,c);
485
+ length -= 12;
486
+ k += 3;
487
+ }
488
+
489
+ /*----------------------------- handle the last (probably partial) block */
490
+ /*
491
+ * "k[2]&0xffffff" actually reads beyond the end of the string, but
492
+ * then masks off the part it's not allowed to read. Because the
493
+ * string is aligned, the masked-off tail is in the same word as the
494
+ * rest of the string. Every machine with memory protection I've seen
495
+ * does it on word boundaries, so is OK with this. But VALGRIND will
496
+ * still catch it and complain. The masking trick does make the hash
497
+ * noticably faster for short strings (like English words).
498
+ */
499
+ #ifndef VALGRIND
500
+
501
+ switch(length)
502
+ {
503
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
504
+ case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
505
+ case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
506
+ case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
507
+ case 8 : b+=k[1]; a+=k[0]; break;
508
+ case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
509
+ case 6 : b+=k[1]&0xffff; a+=k[0]; break;
510
+ case 5 : b+=k[1]&0xff; a+=k[0]; break;
511
+ case 4 : a+=k[0]; break;
512
+ case 3 : a+=k[0]&0xffffff; break;
513
+ case 2 : a+=k[0]&0xffff; break;
514
+ case 1 : a+=k[0]&0xff; break;
515
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
516
+ }
517
+
518
+ #else /* make valgrind happy */
519
+
520
+ k8 = (const uint8_t *)k;
521
+ switch(length)
522
+ {
523
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
524
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
525
+ case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
526
+ case 9 : c+=k8[8]; /* fall through */
527
+ case 8 : b+=k[1]; a+=k[0]; break;
528
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
529
+ case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
530
+ case 5 : b+=k8[4]; /* fall through */
531
+ case 4 : a+=k[0]; break;
532
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
533
+ case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
534
+ case 1 : a+=k8[0]; break;
535
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
536
+ }
537
+
538
+ #endif /* !valgrind */
539
+
540
+ } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
541
+ const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
542
+ const uint8_t *k8;
543
+
544
+ /*--------------- all but last block: aligned reads and different mixing */
545
+ while (length > 12)
546
+ {
547
+ a += k[0] + (((uint32_t)k[1])<<16);
548
+ b += k[2] + (((uint32_t)k[3])<<16);
549
+ c += k[4] + (((uint32_t)k[5])<<16);
550
+ mix(a,b,c);
551
+ length -= 12;
552
+ k += 6;
553
+ }
554
+
555
+ /*----------------------------- handle the last (probably partial) block */
556
+ k8 = (const uint8_t *)k;
557
+ switch(length)
558
+ {
559
+ case 12: c+=k[4]+(((uint32_t)k[5])<<16);
560
+ b+=k[2]+(((uint32_t)k[3])<<16);
561
+ a+=k[0]+(((uint32_t)k[1])<<16);
562
+ break;
563
+ case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
564
+ case 10: c+=k[4];
565
+ b+=k[2]+(((uint32_t)k[3])<<16);
566
+ a+=k[0]+(((uint32_t)k[1])<<16);
567
+ break;
568
+ case 9 : c+=k8[8]; /* fall through */
569
+ case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
570
+ a+=k[0]+(((uint32_t)k[1])<<16);
571
+ break;
572
+ case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
573
+ case 6 : b+=k[2];
574
+ a+=k[0]+(((uint32_t)k[1])<<16);
575
+ break;
576
+ case 5 : b+=k8[4]; /* fall through */
577
+ case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
578
+ break;
579
+ case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
580
+ case 2 : a+=k[0];
581
+ break;
582
+ case 1 : a+=k8[0];
583
+ break;
584
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
585
+ }
586
+
587
+ } else { /* need to read the key one byte at a time */
588
+ const uint8_t *k = (const uint8_t *)key;
589
+
590
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
591
+ while (length > 12)
592
+ {
593
+ a += k[0];
594
+ a += ((uint32_t)k[1])<<8;
595
+ a += ((uint32_t)k[2])<<16;
596
+ a += ((uint32_t)k[3])<<24;
597
+ b += k[4];
598
+ b += ((uint32_t)k[5])<<8;
599
+ b += ((uint32_t)k[6])<<16;
600
+ b += ((uint32_t)k[7])<<24;
601
+ c += k[8];
602
+ c += ((uint32_t)k[9])<<8;
603
+ c += ((uint32_t)k[10])<<16;
604
+ c += ((uint32_t)k[11])<<24;
605
+ mix(a,b,c);
606
+ length -= 12;
607
+ k += 12;
608
+ }
609
+
610
+ /*-------------------------------- last block: affect all 32 bits of (c) */
611
+ switch(length) /* all the case statements fall through */
612
+ {
613
+ case 12: c+=((uint32_t)k[11])<<24;
614
+ case 11: c+=((uint32_t)k[10])<<16;
615
+ case 10: c+=((uint32_t)k[9])<<8;
616
+ case 9 : c+=k[8];
617
+ case 8 : b+=((uint32_t)k[7])<<24;
618
+ case 7 : b+=((uint32_t)k[6])<<16;
619
+ case 6 : b+=((uint32_t)k[5])<<8;
620
+ case 5 : b+=k[4];
621
+ case 4 : a+=((uint32_t)k[3])<<24;
622
+ case 3 : a+=((uint32_t)k[2])<<16;
623
+ case 2 : a+=((uint32_t)k[1])<<8;
624
+ case 1 : a+=k[0];
625
+ break;
626
+ case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
627
+ }
628
+ }
629
+
630
+ final(a,b,c);
631
+ *pc=c; *pb=b;
632
+ }
633
+
634
+
635
+
636
+ /*
637
+ * hashbig():
638
+ * This is the same as hashword() on big-endian machines. It is different
639
+ * from hashlittle() on all machines. hashbig() takes advantage of
640
+ * big-endian byte ordering.
641
+ */
642
+ uint32_t hashbig( const void *key, size_t length, uint32_t initval)
643
+ {
644
+ uint32_t a,b,c;
645
+ union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
646
+
647
+ /* Set up the internal state */
648
+ a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
649
+
650
+ u.ptr = key;
651
+ if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
652
+ const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
653
+
654
+ /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
655
+ while (length > 12)
656
+ {
657
+ a += k[0];
658
+ b += k[1];
659
+ c += k[2];
660
+ mix(a,b,c);
661
+ length -= 12;
662
+ k += 3;
663
+ }
664
+
665
+ /*----------------------------- handle the last (probably partial) block */
666
+ /*
667
+ * "k[2]<<8" actually reads beyond the end of the string, but
668
+ * then shifts out the part it's not allowed to read. Because the
669
+ * string is aligned, the illegal read is in the same word as the
670
+ * rest of the string. Every machine with memory protection I've seen
671
+ * does it on word boundaries, so is OK with this. But VALGRIND will
672
+ * still catch it and complain. The masking trick does make the hash
673
+ * noticably faster for short strings (like English words).
674
+ */
675
+ #ifndef VALGRIND
676
+
677
+ switch(length)
678
+ {
679
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
680
+ case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
681
+ case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
682
+ case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
683
+ case 8 : b+=k[1]; a+=k[0]; break;
684
+ case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
685
+ case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
686
+ case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
687
+ case 4 : a+=k[0]; break;
688
+ case 3 : a+=k[0]&0xffffff00; break;
689
+ case 2 : a+=k[0]&0xffff0000; break;
690
+ case 1 : a+=k[0]&0xff000000; break;
691
+ case 0 : return c; /* zero length strings require no mixing */
692
+ }
693
+
694
+ #else /* make valgrind happy */
695
+
696
+ k8 = (const uint8_t *)k;
697
+ switch(length) /* all the case statements fall through */
698
+ {
699
+ case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
700
+ case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
701
+ case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
702
+ case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
703
+ case 8 : b+=k[1]; a+=k[0]; break;
704
+ case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
705
+ case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
706
+ case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
707
+ case 4 : a+=k[0]; break;
708
+ case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
709
+ case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
710
+ case 1 : a+=((uint32_t)k8[0])<<24; break;
711
+ case 0 : return c;
712
+ }
713
+
714
+ #endif /* !VALGRIND */
715
+
716
+ } else { /* need to read the key one byte at a time */
717
+ const uint8_t *k = (const uint8_t *)key;
718
+
719
+ /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
720
+ while (length > 12)
721
+ {
722
+ a += ((uint32_t)k[0])<<24;
723
+ a += ((uint32_t)k[1])<<16;
724
+ a += ((uint32_t)k[2])<<8;
725
+ a += ((uint32_t)k[3]);
726
+ b += ((uint32_t)k[4])<<24;
727
+ b += ((uint32_t)k[5])<<16;
728
+ b += ((uint32_t)k[6])<<8;
729
+ b += ((uint32_t)k[7]);
730
+ c += ((uint32_t)k[8])<<24;
731
+ c += ((uint32_t)k[9])<<16;
732
+ c += ((uint32_t)k[10])<<8;
733
+ c += ((uint32_t)k[11]);
734
+ mix(a,b,c);
735
+ length -= 12;
736
+ k += 12;
737
+ }
738
+
739
+ /*-------------------------------- last block: affect all 32 bits of (c) */
740
+ switch(length) /* all the case statements fall through */
741
+ {
742
+ case 12: c+=k[11];
743
+ case 11: c+=((uint32_t)k[10])<<8;
744
+ case 10: c+=((uint32_t)k[9])<<16;
745
+ case 9 : c+=((uint32_t)k[8])<<24;
746
+ case 8 : b+=k[7];
747
+ case 7 : b+=((uint32_t)k[6])<<8;
748
+ case 6 : b+=((uint32_t)k[5])<<16;
749
+ case 5 : b+=((uint32_t)k[4])<<24;
750
+ case 4 : a+=k[3];
751
+ case 3 : a+=((uint32_t)k[2])<<8;
752
+ case 2 : a+=((uint32_t)k[1])<<16;
753
+ case 1 : a+=((uint32_t)k[0])<<24;
754
+ break;
755
+ case 0 : return c;
756
+ }
757
+ }
758
+
759
+ final(a,b,c);
760
+ return c;
761
+ }
762
+
763
+
764
+ #ifdef SELF_TEST
765
+
766
+ /* used for timings */
767
+ void driver1()
768
+ {
769
+ uint8_t buf[256];
770
+ uint32_t i;
771
+ uint32_t h=0;
772
+ time_t a,z;
773
+
774
+ time(&a);
775
+ for (i=0; i<256; ++i) buf[i] = 'x';
776
+ for (i=0; i<1; ++i)
777
+ {
778
+ h = hashlittle(&buf[0],1,h);
779
+ }
780
+ time(&z);
781
+ if (z-a > 0) printf("time %d %.8x\n", z-a, h);
782
+ }
783
+
784
+ /* check that every input bit changes every output bit half the time */
785
+ #define HASHSTATE 1
786
+ #define HASHLEN 1
787
+ #define MAXPAIR 60
788
+ #define MAXLEN 70
789
+ void driver2()
790
+ {
791
+ uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
792
+ uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
793
+ uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
794
+ uint32_t x[HASHSTATE],y[HASHSTATE];
795
+ uint32_t hlen;
796
+
797
+ printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
798
+ for (hlen=0; hlen < MAXLEN; ++hlen)
799
+ {
800
+ z=0;
801
+ for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
802
+ {
803
+ for (j=0; j<8; ++j) /*------------------------ for each input bit, */
804
+ {
805
+ for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
806
+ {
807
+ for (l=0; l<HASHSTATE; ++l)
808
+ e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
809
+
810
+ /*---- check that every output bit is affected by that input bit */
811
+ for (k=0; k<MAXPAIR; k+=2)
812
+ {
813
+ uint32_t finished=1;
814
+ /* keys have one bit different */
815
+ for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
816
+ /* have a and b be two keys differing in only one bit */
817
+ a[i] ^= (k<<j);
818
+ a[i] ^= (k>>(8-j));
819
+ c[0] = hashlittle(a, hlen, m);
820
+ b[i] ^= ((k+1)<<j);
821
+ b[i] ^= ((k+1)>>(8-j));
822
+ d[0] = hashlittle(b, hlen, m);
823
+ /* check every bit is 1, 0, set, and not set at least once */
824
+ for (l=0; l<HASHSTATE; ++l)
825
+ {
826
+ e[l] &= (c[l]^d[l]);
827
+ f[l] &= ~(c[l]^d[l]);
828
+ g[l] &= c[l];
829
+ h[l] &= ~c[l];
830
+ x[l] &= d[l];
831
+ y[l] &= ~d[l];
832
+ if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
833
+ }
834
+ if (finished) break;
835
+ }
836
+ if (k>z) z=k;
837
+ if (k==MAXPAIR)
838
+ {
839
+ printf("Some bit didn't change: ");
840
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
841
+ e[0],f[0],g[0],h[0],x[0],y[0]);
842
+ printf("i %d j %d m %d len %d\n", i, j, m, hlen);
843
+ }
844
+ if (z==MAXPAIR) goto done;
845
+ }
846
+ }
847
+ }
848
+ done:
849
+ if (z < MAXPAIR)
850
+ {
851
+ printf("Mix success %2d bytes %2d initvals ",i,m);
852
+ printf("required %d trials\n", z/2);
853
+ }
854
+ }
855
+ printf("\n");
856
+ }
857
+
858
+ /* Check for reading beyond the end of the buffer and alignment problems */
859
+ void driver3()
860
+ {
861
+ uint8_t buf[MAXLEN+20], *b;
862
+ uint32_t len;
863
+ uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
864
+ uint32_t h;
865
+ uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
866
+ uint32_t i;
867
+ uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
868
+ uint32_t j;
869
+ uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
870
+ uint32_t ref,x,y;
871
+ uint8_t *p;
872
+
873
+ printf("Endianness. These lines should all be the same (for values filled in):\n");
874
+ printf("%.8x %.8x %.8x\n",
875
+ hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
876
+ hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
877
+ hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
878
+ p = q;
879
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
880
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
881
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
882
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
883
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
884
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
885
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
886
+ p = &qq[1];
887
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
888
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
889
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
890
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
891
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
892
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
893
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
894
+ p = &qqq[2];
895
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
896
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
897
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
898
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
899
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
900
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
901
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
902
+ p = &qqqq[3];
903
+ printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
904
+ hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
905
+ hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
906
+ hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
907
+ hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
908
+ hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
909
+ hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
910
+ printf("\n");
911
+
912
+ /* check that hashlittle2 and hashlittle produce the same results */
913
+ i=47; j=0;
914
+ hashlittle2(q, sizeof(q), &i, &j);
915
+ if (hashlittle(q, sizeof(q), 47) != i)
916
+ printf("hashlittle2 and hashlittle mismatch\n");
917
+
918
+ /* check that hashword2 and hashword produce the same results */
919
+ len = 0xdeadbeef;
920
+ i=47, j=0;
921
+ hashword2(&len, 1, &i, &j);
922
+ if (hashword(&len, 1, 47) != i)
923
+ printf("hashword2 and hashword mismatch %x %x\n",
924
+ i, hashword(&len, 1, 47));
925
+
926
+ /* check hashlittle doesn't read before or after the ends of the string */
927
+ for (h=0, b=buf+1; h<8; ++h, ++b)
928
+ {
929
+ for (i=0; i<MAXLEN; ++i)
930
+ {
931
+ len = i;
932
+ for (j=0; j<i; ++j) *(b+j)=0;
933
+
934
+ /* these should all be equal */
935
+ ref = hashlittle(b, len, (uint32_t)1);
936
+ *(b+i)=(uint8_t)~0;
937
+ *(b-1)=(uint8_t)~0;
938
+ x = hashlittle(b, len, (uint32_t)1);
939
+ y = hashlittle(b, len, (uint32_t)1);
940
+ if ((ref != x) || (ref != y))
941
+ {
942
+ printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
943
+ h, i);
944
+ }
945
+ }
946
+ }
947
+ }
948
+
949
+ /* check for problems with nulls */
950
+ void driver4()
951
+ {
952
+ uint8_t buf[1];
953
+ uint32_t h,i,state[HASHSTATE];
954
+
955
+
956
+ buf[0] = ~0;
957
+ for (i=0; i<HASHSTATE; ++i) state[i] = 1;
958
+ printf("These should all be different\n");
959
+ for (i=0, h=0; i<8; ++i)
960
+ {
961
+ h = hashlittle(buf, 0, h);
962
+ printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
963
+ }
964
+ }
965
+
966
+
967
+ int main()
968
+ {
969
+ driver1(); /* test that the key is hashed: used for timings */
970
+ driver2(); /* test that whole key is hashed thoroughly */
971
+ driver3(); /* test that nothing but the key is hashed */
972
+ driver4(); /* test hashing multiple buffers (all buffers are null) */
973
+ return 1;
974
+ }
975
+
976
+ #endif /* SELF_TEST */