opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,46 @@
1
+ /*
2
+ * Quark object.
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifndef __QUARK_H__
34
+ #define __QUARK_H__
35
+
36
+ struct tag_quark;
37
+ typedef struct tag_quark quark_t;
38
+
39
+ quark_t* quark_new();
40
+ void quark_delete(quark_t* qrk);
41
+ int quark_get(quark_t* qrk, const char *str);
42
+ int quark_to_id(quark_t* qrk, const char *str);
43
+ const char *quark_to_string(quark_t* qrk, int qid);
44
+ int quark_num(quark_t* qrk);
45
+
46
+ #endif/*__QUARK_H__*/
@@ -0,0 +1,1107 @@
1
+ /*----------------------------------------------------------------------------
2
+ * RumAVL - Threaded AVL Tree Implementation
3
+ *
4
+ * Copyright (c) 2005-2007 Jesse Long <jpl@unknown.za.net>
5
+ * All rights reserved.
6
+ *
7
+ * Permission is hereby granted, free of charge, to any person obtaining a
8
+ * copy of this software and associated documentation files (the "Software"),
9
+ * to deal in the Software without restriction, including without limitation
10
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
+ * and/or sell copies of the Software, and to permit persons to whom the
12
+ * Software is furnished to do so, subject to the following conditions:
13
+ *
14
+ * 1. The above copyright notice and this permission notice shall be
15
+ * included in all copies or substantial portions of the Software.
16
+ * 2. The origin of the Software must not be misrepresented; you must not
17
+ * claim that you wrote the original Software.
18
+ * 3. Altered source versions of the Software must be plainly marked as
19
+ * such, and must not be misrepresented as being the original Software.
20
+ *
21
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
26
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
27
+ * DEALINGS IN THE SOFTWARE.
28
+ *--------------------------------------------------------------------------*/
29
+
30
+ /*----------------------------------------------------------------------------
31
+ * Although not required by the license, I would appreciate it if you would
32
+ * send me a mail notifying me of bugfixes and enhancements you make to this
33
+ * code. My email address is <jpl@unknown.za.net>
34
+ *--------------------------------------------------------------------------*/
35
+
36
+ /*----------------------------------------------------------------------------
37
+ * DEVELOPEMENT NOTES
38
+ *
39
+ * Links
40
+ * Each node has two links, link[0] is the left child, and link[1] is the
41
+ * right child. When a link points to a node that is actually below it in
42
+ * the BST, the respective thread flag is marked 0. When the link is a
43
+ * thread, the respective thread flag is marked 1, or 2 if the thread is
44
+ * to the opposite edge of the BST.
45
+ *
46
+ * Direction
47
+ * In RumAVL we use the numbers -1 (RUMAVL_DESC) and +1 (RUMAVL_ASC) to
48
+ * indicate direction, where -1 (RUMAVL_DESC) means left or descending in
49
+ * value, and +1 (RUMAVL_ASC) means right or ascending in value.
50
+ *
51
+ * Threads
52
+ * In RumAVL, the threads (non-bst links of leaves) are implemented in a
53
+ * sort of circular list. It is important to note that you cannot go
54
+ * through the entire list by following the same link, as you would when
55
+ * going through a linked list. Draw an example threaded AVL tree on paper
56
+ * and see why.
57
+ *
58
+ *--------------------------------------------------------------------------*/
59
+
60
+ #include <stdlib.h>
61
+ #include <string.h>
62
+
63
+ #include "rumavl.h"
64
+
65
+ /* For memory allocation debugging
66
+ #ifdef USE_MEMBUG
67
+ #define MEMBUG_DEFINES
68
+ #include <membug.h>
69
+ #endif */
70
+
71
+
72
+
73
+
74
+ /*****************************************************************************
75
+ *
76
+ * MACROS - to make readability better
77
+ *
78
+ ****************************************************************************/
79
+
80
+ /* Link numbers */
81
+ #define LEFT (0)
82
+ #define RIGHT (1)
83
+
84
+ /* Direction to link no, expects RUMAVL_DESC or RUMAVL_ASC */
85
+ #define LINK_NO(i) (((i) + 1) / 2) /* -1 => 0; 1 => 1 */
86
+ /* Get opposite link number, expects LEFT or RIGHT */
87
+ #define OTHER_LINK(i) ((i) ^ 1) /* 1 => 0; 0 => 1 */
88
+
89
+ /* link no to direction, expects LEFT or RIGHT */
90
+ #define DIR_NO(i) (((i) * 2) - 1) /* 0 => -1; 1 => 1 */
91
+ /* opposite direction, expects RUMAVL_DESC or RUMAVL_ASC */
92
+ #define OTHER_DIR(i) ((i) * -1) /* -1 => 1; 1 => -1 */
93
+
94
+ /* Memory allocation functions */
95
+ #define mem_alloc(tree, bytes) mem_mgr((tree), NULL, (bytes))
96
+ #define mem_free(tree, ptr) mem_mgr((tree), (ptr), 0)
97
+ #define mem_relloc(tree, ptr, bytes) mem_mgr((tree), (ptr), (bytes))
98
+
99
+
100
+
101
+
102
+ /*****************************************************************************
103
+ *
104
+ * DATA TYPES
105
+ *
106
+ ****************************************************************************/
107
+
108
+ /*
109
+ * RUMAVL - the handle on the tree
110
+ *
111
+ * All settings for a tree are in the RUMAVL object, including memory
112
+ * management, delete and overwrite callback functions, and the record
113
+ * comparison function pointer.
114
+ */
115
+ struct rumavl {
116
+ RUMAVL_NODE *root; /* root node in tree */
117
+ size_t reclen; /* length of records */
118
+ int (*cmp)(const void *, /* function to compare records */
119
+ const void *,
120
+ size_t,
121
+ void *);
122
+ int (*owcb)(RUMAVL *, RUMAVL_NODE *, void *, const void *, void *);
123
+ int (*delcb)(RUMAVL *, RUMAVL_NODE *, void *, void *);
124
+ void *(*alloc)(void *, size_t, void *);
125
+ void *udata; /* user data for callbacks */
126
+ };
127
+
128
+ /*
129
+ * RUMAVL_NODE - the node structure
130
+ *
131
+ * RUMAVL_NODE's contain all information about a specific node, including
132
+ * links to the right and left children of the node, and flags (thread)
133
+ * indicating whether or not the links are threads or not, and the balance
134
+ * factor of the node.
135
+ *
136
+ * The record associated with each node is allocated along with the node,
137
+ * and can be found directly after the node, by using the NODE_REC() macro.
138
+ */
139
+ struct rumavl_node {
140
+ RUMAVL_NODE *link[2]; /* links to child nodes */
141
+ char thread[2]; /* flags for links, normal link or thread? */
142
+ signed char balance; /* balance factor for node */
143
+ void *rec;
144
+ #define NODE_REC(node) ((node)->rec)
145
+ };
146
+
147
+ /*
148
+ * RUMAVL_STACK - a stack of nodes forming a path to a node
149
+ *
150
+ * RUMAVL_STACK's are used while deleting and inserting nodes, where effects
151
+ * could be felt by all parents of the node. RUMAVL_STACK's are implemented
152
+ * in a singly linked list. This is a change from the method used by most AVL
153
+ * trees, where a static array node pointers are allocated. Linked lists allow
154
+ * fo an unlimited height in the AVL tree.
155
+ *
156
+ * node is a pointer to the parent node's pointer to the node in question.
157
+ * dir is the direction of the descent from this node.
158
+ */
159
+ typedef struct rumavl_stack RUMAVL_STACK;
160
+ struct rumavl_stack {
161
+ RUMAVL_STACK *next;
162
+ RUMAVL_NODE **node;
163
+ int dir;
164
+ };
165
+
166
+ /* various other RumAVL specific structs defined in rumavl.h */
167
+
168
+
169
+
170
+
171
+ /*****************************************************************************
172
+ *
173
+ * FORWARD DECLERATIONS
174
+ *
175
+ ****************************************************************************/
176
+
177
+ static RUMAVL_NODE *seq_next (RUMAVL_NODE *node, int dir);
178
+ static RUMAVL_NODE *node_new(RUMAVL *tree, const void *record);
179
+ static void node_destroy (RUMAVL *tree, RUMAVL_NODE *node);
180
+ static int stack_push (RUMAVL *tree, RUMAVL_STACK **stack, RUMAVL_NODE **node,
181
+ int dir);
182
+ static void stack_destroy(RUMAVL *tree, RUMAVL_STACK *stack);
183
+ static void stack_update(RUMAVL *tree, RUMAVL_STACK *stack, signed char diff);
184
+
185
+ static signed char balance (RUMAVL_NODE **node, int dir);
186
+ static signed char rotate (RUMAVL_NODE **node, int dir);
187
+
188
+ static void *mem_mgr (RUMAVL *tree, void *ptr, size_t size);
189
+
190
+ static int rec_cmp (RUMAVL *tree, const void *reca, const void *recb);
191
+ static int my_cmp (const void *a, const void *b, size_t n, void *udata);
192
+
193
+ static int insert_cb (RUMAVL *t, RUMAVL_NODE *n, void *r1, const void *r2,
194
+ void *udata);
195
+
196
+
197
+
198
+ /*****************************************************************************
199
+ *
200
+ * PUBLIC FUNCTIONS
201
+ *
202
+ ****************************************************************************/
203
+
204
+ /*----------------------------------------------------------------------------
205
+ * rumavl_new - allocates a new RUMAVL object, and initialises it. This is the
206
+ * only time the user gets to set the record length and record comparison
207
+ * function, to avoid data loss.
208
+ *--------------------------------------------------------------------------*/
209
+ RUMAVL *rumavl_new (size_t reclen,
210
+ int (*cmp)(const void *, const void *, size_t, void *),
211
+ void *(*alloc)(void *, size_t, void *),
212
+ void *udata)
213
+ {
214
+ RUMAVL *tree;
215
+
216
+ if (reclen < 1)
217
+ return NULL;
218
+
219
+ if (alloc == NULL)
220
+ tree = malloc(sizeof(RUMAVL));
221
+ else
222
+ tree = alloc(NULL, sizeof(RUMAVL), udata);
223
+
224
+ if (tree == NULL)
225
+ return NULL;
226
+
227
+ tree->root = NULL;
228
+
229
+ tree->owcb = NULL;
230
+ tree->delcb = NULL;
231
+
232
+ tree->alloc = alloc;
233
+
234
+ tree->reclen = reclen;
235
+ tree->udata = udata;
236
+
237
+ if (cmp == NULL)
238
+ tree->cmp = my_cmp;
239
+ else
240
+ tree->cmp = cmp;
241
+
242
+ return tree;
243
+ }
244
+
245
+ /*----------------------------------------------------------------------------
246
+ * rumavl_destroy - cleanly frees all memory used by the RUMAVL, as well as
247
+ * all nodes. All nodes are passed to the delete callback function in case the
248
+ * user has a special way of destroying nodes. The return value of the delete
249
+ * callback function is ignored, because once we start destroying we cant
250
+ * simply undestroy half the nodes.
251
+ *--------------------------------------------------------------------------*/
252
+ void rumavl_destroy (RUMAVL *tree)
253
+ {
254
+ RUMAVL_NODE *node, *tmp;
255
+
256
+ if (tree->root != NULL){
257
+ /* walk through tree deleting all */
258
+ node = tree->root;
259
+ while (node->thread[LEFT] == 0) /* move to bottom left most node */
260
+ node = node->link[LEFT];
261
+ while (node != NULL){
262
+ tmp = seq_next(node, RUMAVL_ASC);
263
+ if (tree->delcb != NULL){
264
+ tree->delcb(tree, node, NODE_REC(node), tree->udata);
265
+ }
266
+ node_destroy(tree, node);
267
+ node = tmp;
268
+ }
269
+ }
270
+
271
+ if (tree->alloc == NULL)
272
+ free(tree);
273
+ else
274
+ tree->alloc(tree, 0, tree->udata);
275
+ }
276
+
277
+ /*---------------------------------------------------------------------------
278
+ * rumavl_udata - get a pointer to the tree's user pointer
279
+ *-------------------------------------------------------------------------*/
280
+ void **rumavl_udata (RUMAVL *tree)
281
+ {
282
+ return &tree->udata;
283
+ }
284
+
285
+ int (**rumavl_owcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *,
286
+ const void *, void *)
287
+ {
288
+ return &tree->owcb;
289
+ }
290
+
291
+ int (**rumavl_delcb(RUMAVL *tree))(RUMAVL *, RUMAVL_NODE *, void *, void *)
292
+ {
293
+ return &tree->delcb;
294
+ }
295
+
296
+ /*----------------------------------------------------------------------------
297
+ * rumavl_set - set a node, overwriting if necessary, or creating if the node
298
+ * does not exist
299
+ *--------------------------------------------------------------------------*/
300
+ int rumavl_set (RUMAVL *tree, const void *record)
301
+ {
302
+ RUMAVL_NODE **node, *tmp;
303
+ RUMAVL_STACK *stack;
304
+ int ln;
305
+
306
+ if (tree->root == NULL){
307
+ /* This is the first node in the tree */
308
+ if ((tree->root = node_new(tree, record)) == NULL)
309
+ return RUMAVL_ERR_NOMEM;
310
+ tree->root->link[LEFT] = tree->root;
311
+ tree->root->link[RIGHT] = tree->root;
312
+ tree->root->thread[LEFT] = 2;
313
+ tree->root->thread[RIGHT] = 2;
314
+ return 0;
315
+ }
316
+
317
+ /* Since the tree is not empty, we must descend towards the nodes ideal
318
+ * possition, and we may even find an existing node with the same record.
319
+ * We keep a list parents for the eventual node position, because these
320
+ * parents may become inbalanced by a new insertion. */
321
+
322
+ stack = NULL;
323
+ node = &tree->root;
324
+ for (;;){
325
+ if ((ln = rec_cmp(tree, record, NODE_REC(*node))) == 0){
326
+ /* OK, we found the exact node we wish to set, and we now
327
+ * overwrite it. No change happens to the tree structure */
328
+ stack_destroy(tree, stack);
329
+
330
+ if (tree->owcb != NULL &&
331
+ (ln = tree->owcb(tree, *node, NODE_REC(*node),
332
+ record, tree->udata)) != 0){
333
+ return ln;
334
+ }
335
+
336
+ memcpy(NODE_REC(*node), record, tree->reclen);
337
+ return 0;
338
+ }
339
+
340
+ /* *node is not the node we seek */
341
+
342
+ if (stack_push(tree, &stack, node, ln)){
343
+ stack_destroy(tree, stack);
344
+ return RUMAVL_ERR_NOMEM;
345
+ }
346
+
347
+ ln = LINK_NO(ln);
348
+ if ((*node)->thread[ln] > 0){
349
+ /* This is as close to the correct node as we can get. We will
350
+ * now break and add the new node as a leaf */
351
+ break;
352
+ }
353
+
354
+ node = &(*node)->link[ln];
355
+ }
356
+
357
+ /* we have reached a leaf, add new node here */
358
+ if ((tmp = node_new(tree, record)) == NULL){
359
+ stack_destroy(tree, stack);
360
+ return RUMAVL_ERR_NOMEM;
361
+ }
362
+ /* new child inherits parent thread */
363
+ tmp->link[ln] = (*node)->link[ln];
364
+ tmp->thread[ln] = (*node)->thread[ln];
365
+ if (tmp->thread[ln] == 2)
366
+ tmp->link[ln]->link[OTHER_LINK(ln)] = tmp;
367
+
368
+ tmp->link[OTHER_LINK(ln)] = *node;
369
+ tmp->thread[OTHER_LINK(ln)] = 1;
370
+ (*node)->link[ln] = tmp;
371
+ (*node)->thread[ln] = 0;
372
+
373
+ /* all parentage is now one level heavier - balance where necessary */
374
+ stack_update(tree, stack, +1);
375
+
376
+ return 0;
377
+ }
378
+
379
+
380
+ /*----------------------------------------------------------------------------
381
+ * rumavl_insert - like rumavl_set, but only works if the node does not
382
+ * exist. Temporarily replaces overwrite callback with a function that
383
+ * always prevents overwrite, and calls rumavl_set()
384
+ *--------------------------------------------------------------------------*/
385
+ int rumavl_insert (RUMAVL *tree, const void *record)
386
+ {
387
+ int retv;
388
+ int (*tmp)(RUMAVL *, RUMAVL_NODE *, void *, const void *, void *);
389
+
390
+ tmp = tree->owcb;
391
+ tree->owcb = insert_cb;
392
+ retv = rumavl_set(tree, record);
393
+ tree->owcb = tmp;
394
+ return retv;
395
+ }
396
+
397
+ /*----------------------------------------------------------------------------
398
+ * rumavl_delete - deletes a node. Beware! this function is the worst part of
399
+ * the library. Think (and draw pictures) when you edit this function.
400
+ *--------------------------------------------------------------------------*/
401
+ int rumavl_delete (RUMAVL *tree, const void *record)
402
+ {
403
+ RUMAVL_NODE **node, *tmpnode;
404
+ RUMAVL_STACK *stack;
405
+ int dir, ln;
406
+
407
+ if (tree->root == NULL) /* tree is empty */
408
+ return RUMAVL_ERR_NOENT;
409
+
410
+ stack = NULL;
411
+ node = &tree->root;
412
+
413
+ /* Find desired node */
414
+ while ((dir = rec_cmp(tree, record, NODE_REC(*node))) != 0){
415
+ if (stack_push(tree, &stack, node, dir) != 0)
416
+ goto nomemout;
417
+
418
+ if ((*node)->thread[LINK_NO(dir)] > 0){
419
+ /* desired node does not exist */
420
+ stack_destroy(tree, stack);
421
+ return RUMAVL_ERR_NOENT;
422
+ }
423
+ node = &(*node)->link[LINK_NO(dir)];
424
+ }
425
+
426
+ /* OK, we got the node to be deleted, now get confirmation from user */
427
+ if (tree->delcb != NULL &&
428
+ (ln = tree->delcb(tree, *node, NODE_REC(*node), tree->udata))
429
+ != 0){
430
+ stack_destroy(tree, stack);
431
+ return ln;
432
+ }
433
+
434
+ if ((*node)->thread[LEFT] > 0){
435
+ if ((*node)->thread[RIGHT] > 0){
436
+ /* ooh look, we're a leaf */
437
+ tmpnode = *node;
438
+ if (stack != NULL){
439
+ /* This node has a parent, which will need to take over a
440
+ * thread from the node being deleted. First we work out
441
+ * which (left/right) child we are of parent, then give
442
+ * parent the respective thread. If the thread destination
443
+ * points back to us (edge of tree thread), update it to
444
+ * point to our parent. */
445
+ ln = LINK_NO(stack->dir);
446
+ (*stack->node)->link[ln] = tmpnode->link[ln];
447
+ (*stack->node)->thread[ln] = tmpnode->thread[ln];
448
+ if ((*stack->node)->thread[ln] == 2)
449
+ (*stack->node)->link[ln]->link[OTHER_LINK(ln)] =
450
+ *stack->node;
451
+ }else{
452
+ /*
453
+ * the only time stack will == NULL is when we are
454
+ * deleting the root of the tree. We already know that
455
+ * this is a leaf, so we will be leaving the tree empty.
456
+ */
457
+ tree->root = NULL;
458
+ }
459
+ node_destroy(tree, tmpnode);
460
+ }else{
461
+ /* *node has only one child, and can be pruned by replacing
462
+ * *node with its only child. This block of code and the next
463
+ * should be identical, except that all directions and link
464
+ * numbers are opposite.
465
+ *
466
+ * Let node being deleted = DELNODE for this comment.
467
+ * DELNODE only has one child (the right child). The left
468
+ * most descendant of DELNODE will have a thread (left thread)
469
+ * pointing to DELNODE. This thread must be updated to point
470
+ * to the node currently pointed to by DELNODE's left thread.
471
+ *
472
+ * DELNODE's left thread may point to the opposite edge of the
473
+ * BST. In this case, the destination of the thread will have
474
+ * a thread back to DELNODE. This will need to be updated to
475
+ * point back to the leftmost descendant of DELNODE.
476
+ */
477
+ tmpnode = *node; /* node being deleted */
478
+ *node = (*node)->link[RIGHT]; /* right child */
479
+ /* find left most descendant */
480
+ while ((*node)->thread[LEFT] == 0)
481
+ node = &(*node)->link[LEFT];
482
+ /* inherit thread from node being deleted */
483
+ (*node)->link[LEFT] = tmpnode->link[LEFT];
484
+ (*node)->thread[LEFT] = tmpnode->thread[LEFT];
485
+ /* update reverse thread if necessary */
486
+ if ((*node)->thread[LEFT] == 2)
487
+ (*node)->link[LEFT]->link[RIGHT] = *node;
488
+ node_destroy(tree, tmpnode);
489
+ }
490
+ }else if ((*node)->thread[RIGHT] > 0){
491
+ /* see above */
492
+ tmpnode = *node;
493
+ *node = (*node)->link[LEFT];
494
+ while ((*node)->thread[RIGHT] == 0)
495
+ node = &(*node)->link[RIGHT];
496
+ (*node)->link[RIGHT] = tmpnode->link[RIGHT];
497
+ (*node)->thread[RIGHT] = tmpnode->thread[RIGHT];
498
+ if ((*node)->thread[RIGHT] == 2)
499
+ (*node)->link[RIGHT]->link[LEFT] = *node;
500
+ node_destroy(tree, tmpnode);
501
+ }else{
502
+ /* Delete a node with children on both sides. We do this by replacing
503
+ * the node to be deleted (delnode) with its inner most child
504
+ * on the heavier side (repnode). This in place replacement is quicker
505
+ * than the previously used method of rotating delnode until it is a
506
+ * (semi) leaf.
507
+ *
508
+ * At this point node points to delnode's parent's link to delnode. */
509
+ RUMAVL_NODE *repnode, *parent;
510
+ int outdir, outln;
511
+
512
+ /* find heaviest subtree */
513
+ if ((*node)->balance > 0){
514
+ outdir = +1; /* outter direction */
515
+ dir = -1; /* inner direction */
516
+ outln = 1; /* outer link number */
517
+ ln = 0; /* inner link number */
518
+ }else{
519
+ outdir = -1; /* same as above, but opposite subtree */
520
+ dir = +1;
521
+ outln = 0;
522
+ ln = 1;
523
+ }
524
+
525
+ /* Add node to be deleted to the list of nodes to be rebalanced.
526
+ * Rememer that the replacement node will actually be acted apon,
527
+ * and that the replacement node should feel the effect of its own
528
+ * move */
529
+ if (stack_push(tree, &stack, node, outdir) != 0)
530
+ goto nomemout;
531
+
532
+ parent = *node;
533
+ repnode = parent->link[outln];
534
+
535
+ if (repnode->thread[ln] != 0){
536
+ /* repnode inherits delnode's lighter tree, and balance, and gets
537
+ * balance readjusted below */
538
+ repnode->link[ln] = (*node)->link[ln];
539
+ repnode->thread[ln] = (*node)->thread[ln];
540
+ repnode->balance = (*node)->balance;
541
+ }else{
542
+ /* Now we add delnodes direct child to the list of "to update".
543
+ * We pass a pointer to delnode's link to its direct child to
544
+ * stack_push(), but that pointer is invalid, because when
545
+ * stack_update() tries to access the link, delnode would have
546
+ * been destroyed. So, we remember the stack position at which
547
+ * we passed the faulty pointer to stack_push, and update its
548
+ * node pointer when we find repnode to point to repnodes
549
+ * link on the same side */
550
+ RUMAVL_STACK *tmpstack;
551
+
552
+ if (stack_push(tree, &stack, &parent->link[outln], dir) != 0)
553
+ goto nomemout;
554
+
555
+ tmpstack = stack;
556
+
557
+ parent = repnode;
558
+ repnode = repnode->link[ln];
559
+
560
+ /* move towards the innermost child of delnode */
561
+ while (repnode->thread[ln] == 0){
562
+ if (stack_push(tree, &stack, &parent->link[ln], dir) != 0)
563
+ goto nomemout;
564
+ parent = repnode;
565
+ repnode = repnode->link[ln];
566
+ }
567
+
568
+ if (repnode->thread[outln] == 0){
569
+ /* repnode's parent inherits repnodes only child */
570
+ parent->link[ln] = repnode->link[outln];
571
+ }else{
572
+ /* parent already has a link to repnode, but it must now be
573
+ * marked as a thread */
574
+ parent->thread[ln] = 1;
575
+ }
576
+
577
+ repnode->link[0] = (*node)->link[0];
578
+ repnode->thread[0] = (*node)->thread[0];
579
+ repnode->link[1] = (*node)->link[1];
580
+ repnode->thread[1] = (*node)->thread[1];
581
+ repnode->balance = (*node)->balance;
582
+
583
+ /* see comment above */
584
+ tmpstack->node = &repnode->link[outln];
585
+ }
586
+ node_destroy(tree, *node);
587
+ *node = repnode;
588
+
589
+ /* innermost child in lighter tree has an invalid thread to delnode,
590
+ * update it to point to repnode */
591
+ repnode = seq_next(repnode, dir);
592
+ repnode->link[outln] = *node;
593
+ }
594
+
595
+ /* update parents' balances */
596
+ stack_update(tree, stack, -1);
597
+ return 0;
598
+
599
+ nomemout:
600
+ stack_destroy(tree, stack);
601
+ return RUMAVL_ERR_NOMEM;
602
+ }
603
+
604
+ /*----------------------------------------------------------------------------
605
+ * rumavl_find
606
+ *
607
+ * Returns a pointer to the record that matches "record".
608
+ *--------------------------------------------------------------------------*/
609
+ void *rumavl_find (RUMAVL *tree, const void *find)
610
+ {
611
+ void *record;
612
+ rumavl_node_find(tree, find, &record);
613
+ return record;
614
+ }
615
+
616
+ void *(**rumavl_alloc(RUMAVL *tree))(void *ptr, size_t size, void *udata)
617
+ {
618
+ return &tree->alloc;
619
+ }
620
+
621
+ /*----------------------------------------------------------------------------
622
+ * rumavl_record_size - returns size of all records in a tree
623
+ *--------------------------------------------------------------------------*/
624
+ size_t rumavl_record_size (RUMAVL *tree)
625
+ {
626
+ return tree->reclen;
627
+ }
628
+
629
+ /*----------------------------------------------------------------------------
630
+ * rumavl_node_find
631
+ *
632
+ * Returns a pointer to the node that matches "record".
633
+ *--------------------------------------------------------------------------*/
634
+ RUMAVL_NODE *rumavl_node_find (RUMAVL *tree, const void *find, void **record)
635
+ {
636
+ RUMAVL_NODE *node;
637
+ int ln;
638
+
639
+ if (find == NULL || tree->root == NULL)
640
+ goto fail;
641
+
642
+ node = tree->root;
643
+ for (;;){
644
+ if ((ln = rec_cmp(tree, find, NODE_REC(node))) == 0){
645
+ if (record != NULL)
646
+ *record = NODE_REC(node);
647
+ return node;
648
+ }
649
+
650
+ ln = LINK_NO(ln);
651
+ if (node->thread[ln] > 0)
652
+ break;
653
+
654
+ node = node->link[ln];
655
+ }
656
+ /* we didn't find the desired node */
657
+
658
+ fail:
659
+ if (record != NULL)
660
+ *record = NULL;
661
+
662
+ return NULL;
663
+ }
664
+
665
+ /*----------------------------------------------------------------------------
666
+ * rumavl_node_next - find next node
667
+ *--------------------------------------------------------------------------*/
668
+ RUMAVL_NODE *rumavl_node_next (RUMAVL *tree, RUMAVL_NODE *node, int dir,
669
+ void **record)
670
+ {
671
+ /* make sure `dir' is either RUMAVL_ASC or RUMAVL_DESC */
672
+ if (dir == 0)
673
+ goto fail;
674
+ else if (dir > 0)
675
+ dir = RUMAVL_ASC;
676
+ else
677
+ dir = RUMAVL_DESC;
678
+
679
+ /* if node is uninitialised, start with first possible node in `dir'
680
+ * direction */
681
+ if (node == NULL){
682
+ /* unless the tree is empty of course */
683
+ if (tree->root == NULL)
684
+ goto fail;
685
+
686
+ dir = OTHER_LINK(LINK_NO(dir));
687
+ node = tree->root;
688
+ while (node->thread[dir] == 0){
689
+ node = node->link[dir];
690
+ }
691
+ goto found;
692
+ }
693
+
694
+ if ((node = seq_next(node, dir)) == NULL)
695
+ goto fail;
696
+
697
+ /* fall through */
698
+
699
+ found:
700
+ if (record != NULL)
701
+ *record = NODE_REC(node);
702
+ return node;
703
+
704
+ fail:
705
+ if (record != NULL)
706
+ *record = NULL;
707
+ return NULL;
708
+ }
709
+
710
+ /*----------------------------------------------------------------------------
711
+ * rumavl_node_record - returns a pointer to the record stored in a node
712
+ *--------------------------------------------------------------------------*/
713
+ void *rumavl_node_record (RUMAVL_NODE *node)
714
+ {
715
+ return NODE_REC(node);
716
+ }
717
+
718
+ /*----------------------------------------------------------------------------
719
+ * rumavl_foreach - loop through entire tree, using temporary iterator
720
+ *--------------------------------------------------------------------------*/
721
+ extern int rumavl_foreach (RUMAVL *tree, int dir,
722
+ int (*cbfn)(RUMAVL *, void *, void *), void *udata)
723
+ {
724
+ RUMAVL_NODE *node;
725
+ int retv;
726
+ void *record;
727
+
728
+ if (cbfn == NULL)
729
+ return RUMAVL_ERR_INVAL;
730
+
731
+ retv = RUMAVL_ERR_NOENT;
732
+ node = NULL;
733
+ while ((node = rumavl_node_next(tree, node, dir, &record)) != NULL){
734
+ if ((retv = cbfn(tree, record, udata)) != 0)
735
+ break;
736
+ }
737
+
738
+ return retv;
739
+ }
740
+
741
+ /*----------------------------------------------------------------------------
742
+ * rumavl_strerror - return string description of RumAVL error code
743
+ *--------------------------------------------------------------------------*/
744
+ const char *rumavl_strerror (int errno)
745
+ {
746
+ switch (errno){
747
+ case 0:
748
+ return "Operation successful";
749
+ case RUMAVL_ERR_INVAL:
750
+ return "Invalid argument to function";
751
+ case RUMAVL_ERR_NOMEM:
752
+ return "Insufficient memory to complete operation";
753
+ case RUMAVL_ERR_NOENT:
754
+ return "Entry does not exist";
755
+ case RUMAVL_ERR_EORNG:
756
+ return "No more entries in range";
757
+ case RUMAVL_ERR_EXIST:
758
+ return "Entry already exists";
759
+ }
760
+ return "UNKNOWN ERROR";
761
+ }
762
+
763
+
764
+
765
+
766
+ /*****************************************************************************
767
+ *
768
+ * PRIVATE FUNCTIONS
769
+ *
770
+ ****************************************************************************/
771
+
772
+ /*----------------------------------------------------------------------------
773
+ * insert_cb - used by rumavl_insert() to disallow any overwriting by
774
+ * rumavl_set()
775
+ *--------------------------------------------------------------------------*/
776
+ static int insert_cb (RUMAVL *t, RUMAVL_NODE *n, void *r1, const void *r2,
777
+ void *udata)
778
+ {
779
+ (void) t; (void) r1; (void) r2; (void) udata; (void) n;
780
+ return RUMAVL_ERR_EXIST;
781
+ }
782
+
783
+ /*----------------------------------------------------------------------------
784
+ * seq_next - return a pointer to the next node in sequence
785
+ *--------------------------------------------------------------------------*/
786
+ static RUMAVL_NODE *seq_next (RUMAVL_NODE *node, int dir)
787
+ {
788
+ int ln;
789
+
790
+ ln = LINK_NO(dir);
791
+ if (node->thread[ln] == 2){
792
+ return NULL;
793
+ }else if (node->thread[ln] == 1){
794
+ return node->link[ln];
795
+ }
796
+ node = node->link[ln];
797
+ ln = OTHER_LINK(ln);
798
+ while (node->thread[ln] == 0){
799
+ node = node->link[ln];
800
+ }
801
+ return node;
802
+ }
803
+
804
+ /*----------------------------------------------------------------------------
805
+ * node_new - create a new node. MUST update link[] and thread[] after calling
806
+ * this function
807
+ *--------------------------------------------------------------------------*/
808
+ static RUMAVL_NODE *node_new(RUMAVL *tree, const void *record)
809
+ {
810
+ RUMAVL_NODE *node;
811
+
812
+ if ((node = mem_alloc(tree, sizeof(RUMAVL_NODE))) == NULL)
813
+ return NULL;
814
+
815
+ if ((node->rec = mem_alloc(tree, tree->reclen)) == NULL){
816
+ mem_free(tree, node);
817
+ return NULL;
818
+ }
819
+
820
+ memcpy(node->rec, record, tree->reclen);
821
+ node->balance = 0;
822
+ node->link[0] = NULL;
823
+ node->link[1] = NULL;
824
+ node->thread[0] = 0;
825
+ node->thread[1] = 0;
826
+ return node;
827
+ }
828
+
829
+ /*----------------------------------------------------------------------------
830
+ * node_destroy - cleanly destroy node
831
+ *--------------------------------------------------------------------------*/
832
+ static void node_destroy (RUMAVL *tree, RUMAVL_NODE *node)
833
+ {
834
+ mem_free(tree, node);
835
+ }
836
+
837
+ /*----------------------------------------------------------------------------
838
+ * stack_push - push a node entry onto stack, for rumavl_set() and
839
+ * rumavl_delete(). If this is the first entry, *stack should == NULL
840
+ *--------------------------------------------------------------------------*/
841
+ static int stack_push(RUMAVL *tree, RUMAVL_STACK **stack, RUMAVL_NODE **node,
842
+ int dir)
843
+ {
844
+ RUMAVL_STACK *tmp;
845
+
846
+ if ((tmp = mem_alloc(tree, sizeof(RUMAVL_STACK))) == NULL)
847
+ return -1;
848
+
849
+ tmp->next = *stack;
850
+ *stack = tmp;
851
+ tmp->node = node;
852
+ tmp->dir = dir;
853
+
854
+ return 0;
855
+ }
856
+
857
+ /*----------------------------------------------------------------------------
858
+ * stack_destroy - free up a stack
859
+ *--------------------------------------------------------------------------*/
860
+ static void stack_destroy(RUMAVL *tree, RUMAVL_STACK *stack)
861
+ {
862
+ RUMAVL_STACK *tmp;
863
+ while (stack != NULL){
864
+ tmp = stack;
865
+ stack = stack->next;
866
+ mem_free(tree, tmp);
867
+ }
868
+ }
869
+
870
+ /*----------------------------------------------------------------------------
871
+ * stack_update - goes up stack readjusting balance as needed. This function
872
+ * serves as a testiment to the philosophy of commenting while you code, 'cos
873
+ * hell if I can remember how I got to this. I think is has something to do
874
+ * with the varying effects on tree height, depending on exactly which sub
875
+ * tree, or sub-sub tree was modified. TODO study and comment
876
+ *--------------------------------------------------------------------------*/
877
+ static void stack_update(RUMAVL *tree, RUMAVL_STACK *stack, signed char diff)
878
+ {
879
+ RUMAVL_STACK *tmpstack;
880
+
881
+ /* if diff becomes 0, we quit, because no further change to ancestors
882
+ * can be made */
883
+ while (stack != NULL && diff != 0){
884
+ signed char ob, nb;
885
+ ob = (*stack->node)->balance;
886
+ (*stack->node)->balance += diff * (signed char)stack->dir;
887
+ nb = (*stack->node)->balance;
888
+ if (diff < 0){
889
+ if (stack->dir == -1 && ob < 0){
890
+ if (nb > 0)
891
+ nb = 0;
892
+ diff = (nb - ob) * -1;
893
+ }else if (stack->dir == 1 && ob > 0){
894
+ if (nb < 0)
895
+ nb = 0;
896
+ diff = nb - ob;
897
+ }else{
898
+ diff = 0;
899
+ }
900
+ }else{
901
+ if (stack->dir == -1 && nb < 0){
902
+ if (ob > 0)
903
+ ob = 0;
904
+ diff = (nb - ob) * -1;
905
+ }else if (stack->dir == 1 && nb > 0){
906
+ if (ob < 0)
907
+ ob = 0;
908
+ diff = nb - ob;
909
+ }else{
910
+ diff = 0;
911
+ }
912
+ }
913
+ while ((*stack->node)->balance > 1){
914
+ diff += balance(stack->node, -1);
915
+ }
916
+ while ((*stack->node)->balance < -1){
917
+ diff += balance(stack->node, 1);
918
+ }
919
+ tmpstack = stack;
920
+ stack = stack->next;
921
+ mem_free(tree, tmpstack);
922
+ }
923
+
924
+ /* we may exit early if diff becomes 0. We still need to free all stack
925
+ * entries */
926
+ while (stack != NULL){
927
+ tmpstack = stack;
928
+ stack = stack->next;
929
+ mem_free(tree, tmpstack);
930
+ }
931
+ }
932
+
933
+ /*----------------------------------------------------------------------------
934
+ * my_cmp - a wrapper around memcmp() for default record comparison function.
935
+ *--------------------------------------------------------------------------*/
936
+ static int my_cmp (const void *a, const void *b, size_t n, void *udata)
937
+ {
938
+ (void) udata;
939
+ return memcmp(a, b, n);
940
+ }
941
+
942
+ /*----------------------------------------------------------------------------
943
+ * rec_cmp - a wrapper around the record comparison function, that only
944
+ * returns 0, RUMAVL_ASC or RUMAVL_DESC.
945
+ *--------------------------------------------------------------------------*/
946
+ static int rec_cmp (RUMAVL *tree, const void *reca, const void *recb)
947
+ {
948
+ int retv;
949
+ retv = tree->cmp(reca, recb, tree->reclen, tree->udata);
950
+ if (retv < 0)
951
+ return RUMAVL_DESC;
952
+ if (retv > 0)
953
+ return RUMAVL_ASC;
954
+ return 0;
955
+ }
956
+
957
+ /*----------------------------------------------------------------------------
958
+ * Balance - rotate or double rotate as needed. Sometimes simply rotating a
959
+ * tree is inefficient, as it leaves the tree as inbalanced as it was before
960
+ * the rotate. To rectify this, we first rotate the heavier child so that the
961
+ * heavier grandchild is on the outside, then rotate as per normal.
962
+ *
963
+ * TODO Check all callers, and make sure that they call this function sanely,
964
+ * and then remove unnecessary checks.
965
+ *--------------------------------------------------------------------------*/
966
+ static signed char balance (RUMAVL_NODE **node, int dir)
967
+ {
968
+ int ln;
969
+ signed char retv;
970
+
971
+ if (node == NULL || *node == NULL || (dir * dir) != 1)
972
+ return 0;
973
+
974
+ ln = OTHER_LINK(LINK_NO(dir)); /* link number of new root */
975
+
976
+ /* new root must exist */
977
+ if ((*node)->thread[ln] > 0)
978
+ return 0;
979
+
980
+ retv = 0;
981
+ if ((*node)->link[ln]->balance == (char) dir &&
982
+ (*node)->link[ln]->thread[OTHER_LINK(ln)] == 0){
983
+ /* double rotate if inner grandchild is heaviest */
984
+ retv = rotate (&((*node)->link[ln]), OTHER_DIR(dir));
985
+ }
986
+
987
+ return retv + rotate (node, dir);
988
+ }
989
+
990
+ /*----------------------------------------------------------------------------
991
+ * rotate
992
+ *
993
+ * rotates a tree rooted at *node. dir determines the direction of the rotate,
994
+ * dir < 0 -> left rotate; dir >= 0 -> right rotate
995
+ *
996
+ * TODO How sure are we that all callers pass decent `dir' values?
997
+ * TODO Restudy the tree height modification and balance factor algorithms,
998
+ * and document them.
999
+ *--------------------------------------------------------------------------*/
1000
+ static signed char rotate (RUMAVL_NODE **node, int dir)
1001
+ {
1002
+ RUMAVL_NODE *tmp;
1003
+ signed char a, b, ad, bd, retv;
1004
+ int ln;
1005
+
1006
+ /* force |dir| to be either -1 or +1 */
1007
+ if (node == NULL || *node == NULL || (dir * dir) != 1)
1008
+ return 0;
1009
+
1010
+ ln = LINK_NO(dir);
1011
+ ln = OTHER_LINK(ln); /* link number of new root */
1012
+
1013
+ /* new root must exist */
1014
+ if ((*node)->thread[ln] > 0)
1015
+ return 0;
1016
+
1017
+ /* calculate effect on tree height */
1018
+ if ((dir == 1 && (*node)->balance < 0 && (*node)->link[0]->balance >= 0)||
1019
+ (dir == -1 && (*node)->balance > 0 && (*node)->link[1]->balance <= 0)){
1020
+ retv = 0;
1021
+ }else{
1022
+ if (dir == 1){
1023
+ if ((*node)->balance < -1)
1024
+ retv = -1;
1025
+ else if ((*node)->balance == -1)
1026
+ retv = 0;
1027
+ else
1028
+ retv = +1;
1029
+ }else{
1030
+ if ((*node)->balance > 1)
1031
+ retv = -1;
1032
+ else if ((*node)->balance == 1)
1033
+ retv = 0;
1034
+ else
1035
+ retv = +1;
1036
+ }
1037
+ }
1038
+
1039
+
1040
+ /* rotate tree */
1041
+ tmp = *node;
1042
+ *node = tmp->link[ln];
1043
+ if ((*node)->thread[OTHER_LINK(ln)] > 0){
1044
+ tmp->thread[ln] = 1;
1045
+ }else{
1046
+ tmp->link[ln] = (*node)->link[OTHER_LINK(ln)];
1047
+ tmp->thread[ln] = 0;
1048
+ }
1049
+ (*node)->link[OTHER_LINK(ln)] = tmp;
1050
+ (*node)->thread[OTHER_LINK(ln)] = 0;
1051
+
1052
+
1053
+
1054
+ /* rebalance factors after rotate matrix */
1055
+ a = tmp->balance;
1056
+ b = (*node)->balance;
1057
+
1058
+ if (a > 0)
1059
+ ad = 1;
1060
+ else if (a < 0)
1061
+ ad = -1;
1062
+ else
1063
+ ad = 0;
1064
+
1065
+ if (b > 0)
1066
+ bd = 1;
1067
+ else if (b < 0)
1068
+ bd = -1;
1069
+ else
1070
+ bd = 0;
1071
+
1072
+ if (ad == OTHER_DIR(dir)){
1073
+ if (bd == OTHER_DIR(dir)){
1074
+ tmp->balance += (b * -1) + dir;
1075
+ if (tmp->balance * dir > 0)
1076
+ (*node)->balance = (tmp->balance - (b * -1)) + dir;
1077
+ else
1078
+ (*node)->balance += dir;
1079
+ }else{
1080
+ tmp->balance += dir;
1081
+ (*node)->balance += dir;
1082
+ }
1083
+ }else{
1084
+ if (bd == OTHER_DIR(dir)){
1085
+ tmp->balance += (b * -1) + dir;
1086
+ (*node)->balance += dir + tmp->balance;
1087
+ }else{
1088
+ tmp->balance += dir;
1089
+ (*node)->balance += dir + tmp->balance;
1090
+ }
1091
+ }
1092
+
1093
+ return retv;
1094
+ }
1095
+
1096
+ /*----------------------------------------------------------------------------
1097
+ * mem_alloc
1098
+ *
1099
+ * default memory allocation function (malloc wrapper)
1100
+ *--------------------------------------------------------------------------*/
1101
+ static void *mem_mgr (RUMAVL *tree, void *ptr, size_t size)
1102
+ {
1103
+ if (tree->alloc != NULL)
1104
+ return tree->alloc(ptr, size, tree->udata);
1105
+
1106
+ return realloc(ptr, size);
1107
+ }