opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,550 @@
1
+ /*
2
+ * CRF1d tagger (implementation of crfsuite_model_t and crfsuite_tagger_t).
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifdef HAVE_CONFIG_H
34
+ #include <config.h>
35
+ #endif/*HAVE_CONFIG_H*/
36
+
37
+ #include <os.h>
38
+
39
+ #include <math.h>
40
+ #include <stdio.h>
41
+ #include <stdlib.h>
42
+ #include <string.h>
43
+
44
+ #include <crfsuite.h>
45
+
46
+ #include "crf1d.h"
47
+
48
+ enum {
49
+ LEVEL_NONE = 0,
50
+ LEVEL_SET,
51
+ LEVEL_ALPHABETA,
52
+ };
53
+
54
+ typedef struct {
55
+ crf1dm_t *model; /**< CRF model. */
56
+ crf1d_context_t *ctx; /**< CRF context. */
57
+ int num_labels; /**< Number of distinct output labels (L). */
58
+ int num_attributes; /**< Number of distinct attributes (A). */
59
+ int level;
60
+ } crf1dt_t;
61
+
62
+ static void crf1dt_state_score(crf1dt_t *crf1dt, const crfsuite_instance_t *inst)
63
+ {
64
+ int a, i, l, t, r, fid;
65
+ crf1dm_feature_t f;
66
+ feature_refs_t attr;
67
+ floatval_t value, *state = NULL;
68
+ crf1dm_t* model = crf1dt->model;
69
+ crf1d_context_t* ctx = crf1dt->ctx;
70
+ const crfsuite_item_t* item = NULL;
71
+ const int T = inst->num_items;
72
+ const int L = crf1dt->num_labels;
73
+
74
+ /* Loop over the items in the sequence. */
75
+ for (t = 0;t < T;++t) {
76
+ item = &inst->items[t];
77
+ state = STATE_SCORE(ctx, t);
78
+
79
+ /* Loop over the contents (attributes) attached to the item. */
80
+ for (i = 0;i < item->num_contents;++i) {
81
+ /* Access the list of state features associated with the attribute. */
82
+ a = item->contents[i].aid;
83
+ crf1dm_get_attrref(model, a, &attr);
84
+ /* A scale usually represents the atrribute frequency in the item. */
85
+ value = item->contents[i].value;
86
+
87
+ /* Loop over the state features associated with the attribute. */
88
+ for (r = 0;r < attr.num_features;++r) {
89
+ /* The state feature #(attr->fids[r]), which is represented by
90
+ the attribute #a, outputs the label #(f->dst). */
91
+ fid = crf1dm_get_featureid(&attr, r);
92
+ crf1dm_get_feature(model, fid, &f);
93
+ l = f.dst;
94
+ state[l] += f.weight * value;
95
+ }
96
+ }
97
+ }
98
+ }
99
+
100
+ static void crf1dt_transition_score(crf1dt_t* crf1dt)
101
+ {
102
+ int i, r, fid;
103
+ crf1dm_feature_t f;
104
+ feature_refs_t edge;
105
+ floatval_t *trans = NULL;
106
+ crf1dm_t* model = crf1dt->model;
107
+ crf1d_context_t* ctx = crf1dt->ctx;
108
+ const int L = crf1dt->num_labels;
109
+
110
+ /* Compute transition scores between two labels. */
111
+ for (i = 0;i < L;++i) {
112
+ trans = TRANS_SCORE(ctx, i);
113
+ crf1dm_get_labelref(model, i, &edge);
114
+ for (r = 0;r < edge.num_features;++r) {
115
+ /* Transition feature from #i to #(f->dst). */
116
+ fid = crf1dm_get_featureid(&edge, r);
117
+ crf1dm_get_feature(model, fid, &f);
118
+ trans[f.dst] = f.weight;
119
+ }
120
+ }
121
+ }
122
+
123
+ static void crf1dt_set_level(crf1dt_t *crf1dt, int level)
124
+ {
125
+ int prev = crf1dt->level;
126
+ crf1d_context_t* ctx = crf1dt->ctx;
127
+
128
+ if (level <= LEVEL_ALPHABETA && prev < LEVEL_ALPHABETA) {
129
+ crf1dc_exp_state(ctx);
130
+ crf1dc_alpha_score(ctx);
131
+ crf1dc_beta_score(ctx);
132
+ }
133
+
134
+ crf1dt->level = level;
135
+ }
136
+
137
+ static void crf1dt_delete(crf1dt_t* crf1dt)
138
+ {
139
+ /* Note: we don't own the model object (crf1t->model). */
140
+ if (crf1dt->ctx != NULL) {
141
+ crf1dc_delete(crf1dt->ctx);
142
+ crf1dt->ctx = NULL;
143
+ }
144
+ free(crf1dt);
145
+ }
146
+
147
+ static crf1dt_t *crf1dt_new(crf1dm_t* crf1dm)
148
+ {
149
+ crf1dt_t* crf1dt = NULL;
150
+
151
+ crf1dt = (crf1dt_t*)calloc(1, sizeof(crf1dt_t));
152
+ if (crf1dt != NULL) {
153
+ crf1dt->num_labels = crf1dm_get_num_labels(crf1dm);
154
+ crf1dt->num_attributes = crf1dm_get_num_attrs(crf1dm);
155
+ crf1dt->model = crf1dm;
156
+ crf1dt->ctx = crf1dc_new(CTXF_VITERBI | CTXF_MARGINALS, crf1dt->num_labels, 0);
157
+ if (crf1dt->ctx != NULL) {
158
+ crf1dc_reset(crf1dt->ctx, RF_TRANS);
159
+ crf1dt_transition_score(crf1dt);
160
+ crf1dc_exp_transition(crf1dt->ctx);
161
+ } else {
162
+ crf1dt_delete(crf1dt);
163
+ crf1dt = NULL;
164
+ }
165
+ crf1dt->level = LEVEL_NONE;
166
+ }
167
+
168
+ return crf1dt;
169
+ }
170
+
171
+
172
+
173
+ /*
174
+ * Implementation of crfsuite_tagger_t object.
175
+ * This object is instantiated only by a crfsuite_model_t object.
176
+ */
177
+
178
+ static int tagger_addref(crfsuite_tagger_t* tagger)
179
+ {
180
+ /* This object is owned only by a crfsuite_model_t object. */
181
+ return tagger->nref;
182
+ }
183
+
184
+ static int tagger_release(crfsuite_tagger_t* tagger)
185
+ {
186
+ /* This object is owned only by a crfsuite_model_t object. */
187
+ return tagger->nref;
188
+ }
189
+
190
+ static int tagger_set(crfsuite_tagger_t* tagger, crfsuite_instance_t *inst)
191
+ {
192
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
193
+ crf1d_context_t* ctx = crf1dt->ctx;
194
+ crf1dc_set_num_items(ctx, inst->num_items);
195
+ crf1dc_reset(crf1dt->ctx, RF_STATE);
196
+ crf1dt_state_score(crf1dt, inst);
197
+ crf1dt->level = LEVEL_SET;
198
+ return 0;
199
+ }
200
+
201
+ static int tagger_length(crfsuite_tagger_t* tagger)
202
+ {
203
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
204
+ crf1d_context_t* ctx = crf1dt->ctx;
205
+ return ctx->num_items;
206
+ }
207
+
208
+ static int tagger_viterbi(crfsuite_tagger_t* tagger, int *labels, floatval_t *ptr_score)
209
+ {
210
+ floatval_t score;
211
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
212
+ crf1d_context_t* ctx = crf1dt->ctx;
213
+
214
+ score = crf1dc_viterbi(ctx, labels);
215
+ if (ptr_score != NULL) {
216
+ *ptr_score = score;
217
+ }
218
+
219
+ return 0;
220
+ }
221
+
222
+ static int tagger_score(crfsuite_tagger_t* tagger, int *path, floatval_t *ptr_score)
223
+ {
224
+ floatval_t score;
225
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
226
+ crf1d_context_t* ctx = crf1dt->ctx;
227
+ score = crf1dc_score(ctx, path);
228
+ if (ptr_score != NULL) {
229
+ *ptr_score = score;
230
+ }
231
+ return 0;
232
+ }
233
+
234
+ static int tagger_lognorm(crfsuite_tagger_t* tagger, floatval_t *ptr_norm)
235
+ {
236
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
237
+ crf1dt_set_level(crf1dt, LEVEL_ALPHABETA);
238
+ *ptr_norm = crf1dc_lognorm(crf1dt->ctx);
239
+ return 0;
240
+ }
241
+
242
+ static int tagger_marginal_point(crfsuite_tagger_t *tagger, int l, int t, floatval_t *ptr_prob)
243
+ {
244
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
245
+ crf1dt_set_level(crf1dt, LEVEL_ALPHABETA);
246
+ *ptr_prob = crf1dc_marginal_point(crf1dt->ctx, l, t);
247
+ return 0;
248
+ }
249
+
250
+ static int tagger_marginal_path(crfsuite_tagger_t *tagger, const int *path, int begin, int end, floatval_t *ptr_prob)
251
+ {
252
+ crf1dt_t* crf1dt = (crf1dt_t*)tagger->internal;
253
+ crf1dt_set_level(crf1dt, LEVEL_ALPHABETA);
254
+ *ptr_prob = crf1dc_marginal_path(crf1dt->ctx, path, begin, end);
255
+ return 0;
256
+ }
257
+
258
+
259
+
260
+ /*
261
+ * Implementation of crfsuite_dictionary_t object for attributes.
262
+ * This object is instantiated only by a crfsuite_model_t object.
263
+ */
264
+
265
+ static int model_attrs_addref(crfsuite_dictionary_t* dic)
266
+ {
267
+ /* This object is owned only by a crfsuite_model_t object. */
268
+ return dic->nref;
269
+ }
270
+
271
+ static int model_attrs_release(crfsuite_dictionary_t* dic)
272
+ {
273
+ /* This object is owned and freed only by a crfsuite_model_t object. */
274
+ return dic->nref;
275
+ }
276
+
277
+ static int model_attrs_get(crfsuite_dictionary_t* dic, const char *str)
278
+ {
279
+ /* This object is ready only. */
280
+ return CRFSUITEERR_NOTSUPPORTED;
281
+ }
282
+
283
+ static int model_attrs_to_id(crfsuite_dictionary_t* dic, const char *str)
284
+ {
285
+ crf1dm_t *crf1dm = (crf1dm_t*)dic->internal;
286
+ return crf1dm_to_aid(crf1dm, str);
287
+ }
288
+
289
+ static int model_attrs_to_string(crfsuite_dictionary_t* dic, int id, char const **pstr)
290
+ {
291
+ crf1dm_t *crf1dm = (crf1dm_t*)dic->internal;
292
+ *pstr = crf1dm_to_attr(crf1dm, id);
293
+ return 0;
294
+ }
295
+
296
+ static int model_attrs_num(crfsuite_dictionary_t* dic)
297
+ {
298
+ crf1dm_t *crf1dm = (crf1dm_t*)dic->internal;
299
+ return crf1dm_get_num_attrs(crf1dm);
300
+ }
301
+
302
+ static void model_attrs_free(crfsuite_dictionary_t* dic, const char *str)
303
+ {
304
+ /* all strings are freed on the release of the dictionary object. */
305
+ }
306
+
307
+
308
+
309
+
310
+ /*
311
+ * Implementation of crfsuite_dictionary_t object for labels.
312
+ * This object is instantiated only by a crfsuite_model_t object.
313
+ */
314
+
315
+ static int model_labels_addref(crfsuite_dictionary_t* dic)
316
+ {
317
+ /* This object is owned only by a crfsuite_model_t object. */
318
+ return dic->nref;
319
+ }
320
+
321
+ static int model_labels_release(crfsuite_dictionary_t* dic)
322
+ {
323
+ /* This object is owned and freed only by a crfsuite_model_t object. */
324
+ return dic->nref;
325
+ }
326
+
327
+ static int model_labels_get(crfsuite_dictionary_t* dic, const char *str)
328
+ {
329
+ /* This object is ready only. */
330
+ return CRFSUITEERR_NOTSUPPORTED;
331
+ }
332
+
333
+ static int model_labels_to_id(crfsuite_dictionary_t* dic, const char *str)
334
+ {
335
+ crf1dm_t *crf1dm = (crf1dm_t*)dic->internal;
336
+ return crf1dm_to_lid(crf1dm, str);
337
+ }
338
+
339
+ static int model_labels_to_string(crfsuite_dictionary_t* dic, int id, char const **pstr)
340
+ {
341
+ crf1dm_t *crf1dm = (crf1dm_t*)dic->internal;
342
+ *pstr = crf1dm_to_label(crf1dm, id);
343
+ return 0;
344
+ }
345
+
346
+ static int model_labels_num(crfsuite_dictionary_t* dic)
347
+ {
348
+ crf1dm_t *crf1dm = (crf1dm_t*)dic->internal;
349
+ return crf1dm_get_num_labels(crf1dm);
350
+ }
351
+
352
+ static void model_labels_free(crfsuite_dictionary_t* dic, const char *str)
353
+ {
354
+ /* all strings are freed on the release of the dictionary object. */
355
+ }
356
+
357
+
358
+
359
+ /*
360
+ * Implementation of crfsuite_model_t object.
361
+ * This object is instantiated by crf1m_model_create() function.
362
+ */
363
+
364
+ typedef struct {
365
+ crf1dm_t* crf1dm;
366
+
367
+ crfsuite_dictionary_t* attrs;
368
+ crfsuite_dictionary_t* labels;
369
+ crfsuite_tagger_t* tagger;
370
+ } model_internal_t;
371
+
372
+ static int model_addref(crfsuite_model_t* model)
373
+ {
374
+ return crfsuite_interlocked_increment(&model->nref);
375
+ }
376
+
377
+ static int model_release(crfsuite_model_t* model)
378
+ {
379
+ int count = crfsuite_interlocked_decrement(&model->nref);
380
+ if (count == 0) {
381
+ /* This instance is being destroyed. */
382
+ model_internal_t* internal = (model_internal_t*)model->internal;
383
+ crf1dt_delete((crf1dt_t*)internal->tagger->internal);
384
+ free(internal->tagger);
385
+ free(internal->labels);
386
+ free(internal->attrs);
387
+ crf1dm_close(internal->crf1dm);
388
+ free(internal);
389
+ free(model);
390
+ }
391
+ return count;
392
+ }
393
+
394
+ static int model_get_tagger(crfsuite_model_t* model, crfsuite_tagger_t** ptr_tagger)
395
+ {
396
+ model_internal_t* internal = (model_internal_t*)model->internal;
397
+ /* We don't increment the reference counter. */
398
+ *ptr_tagger = internal->tagger;
399
+ return 0;
400
+ }
401
+
402
+ static int model_get_labels(crfsuite_model_t* model, crfsuite_dictionary_t** ptr_labels)
403
+ {
404
+ model_internal_t* internal = (model_internal_t*)model->internal;
405
+ /* We don't increment the reference counter. */
406
+ *ptr_labels = internal->labels;
407
+ return 0;
408
+ }
409
+
410
+ static int model_get_attrs(crfsuite_model_t* model, crfsuite_dictionary_t** ptr_attrs)
411
+ {
412
+ model_internal_t* internal = (model_internal_t*)model->internal;
413
+ /* We don't increment the reference counter. */
414
+ *ptr_attrs = internal->attrs;
415
+ return 0;
416
+ }
417
+
418
+ static int model_dump(crfsuite_model_t* model, FILE *fpo)
419
+ {
420
+ model_internal_t* internal = (model_internal_t*)model->internal;
421
+ crf1dm_dump(internal->crf1dm, fpo);
422
+ return 0;
423
+ }
424
+
425
+ static int crf1m_model_create(const char *filename, crfsuite_model_t** ptr_model)
426
+ {
427
+ int ret = 0;
428
+ crf1dm_t *crf1dm = NULL;
429
+ crf1dt_t *crf1dt = NULL;
430
+ crfsuite_model_t *model = NULL;
431
+ model_internal_t *internal = NULL;
432
+ crfsuite_tagger_t *tagger = NULL;
433
+ crfsuite_dictionary_t *attrs = NULL, *labels = NULL;
434
+
435
+ *ptr_model = NULL;
436
+
437
+ /* Open the model file. */
438
+ crf1dm = crf1dm_new(filename);
439
+ if (crf1dm == NULL) {
440
+ ret = CRFSUITEERR_INCOMPATIBLE;
441
+ goto error_exit;
442
+ }
443
+
444
+ /* Construct a tagger based on the model. */
445
+ crf1dt = crf1dt_new(crf1dm);
446
+ if (crf1dt == NULL) {
447
+ ret = CRFSUITEERR_OUTOFMEMORY;
448
+ goto error_exit;
449
+ }
450
+
451
+ /* Create an instance of internal data attached to the model. */
452
+ internal = (model_internal_t*)calloc(1, sizeof(model_internal_t));
453
+ if (internal == NULL) {
454
+ ret = CRFSUITEERR_OUTOFMEMORY;
455
+ goto error_exit;
456
+ }
457
+
458
+ /* Create an instance of dictionary object for attributes. */
459
+ attrs = (crfsuite_dictionary_t*)calloc(1, sizeof(crfsuite_dictionary_t));
460
+ if (attrs == NULL) {
461
+ ret = CRFSUITEERR_OUTOFMEMORY;
462
+ goto error_exit;
463
+ }
464
+ attrs->internal = crf1dm;
465
+ attrs->nref = 1;
466
+ attrs->addref = model_attrs_addref;
467
+ attrs->release = model_attrs_release;
468
+ attrs->get = model_attrs_get;
469
+ attrs->to_id = model_attrs_to_id;
470
+ attrs->to_string = model_attrs_to_string;
471
+ attrs->num = model_attrs_num;
472
+ attrs->free = model_attrs_free;
473
+
474
+ /* Create an instance of dictionary object for labels. */
475
+ labels = (crfsuite_dictionary_t*)calloc(1, sizeof(crfsuite_dictionary_t));
476
+ if (labels == NULL) {
477
+ ret = CRFSUITEERR_OUTOFMEMORY;
478
+ goto error_exit;
479
+ }
480
+ labels->internal = crf1dm;
481
+ labels->nref = 1;
482
+ labels->addref = model_labels_addref;
483
+ labels->release = model_labels_release;
484
+ labels->get = model_labels_get;
485
+ labels->to_id = model_labels_to_id;
486
+ labels->to_string = model_labels_to_string;
487
+ labels->num = model_labels_num;
488
+ labels->free = model_labels_free;
489
+
490
+ /* Create an instance of tagger object. */
491
+ tagger = (crfsuite_tagger_t*)calloc(1, sizeof(crfsuite_tagger_t));
492
+ if (tagger == NULL) {
493
+ ret = CRFSUITEERR_OUTOFMEMORY;
494
+ goto error_exit;
495
+ }
496
+ tagger->internal = crf1dt;
497
+ tagger->nref = 1;
498
+ tagger->addref = tagger_addref;
499
+ tagger->release = tagger_release;
500
+ tagger->set = tagger_set;
501
+ tagger->length = tagger_length;
502
+ tagger->viterbi = tagger_viterbi;
503
+ tagger->score = tagger_score;
504
+ tagger->lognorm = tagger_lognorm;
505
+ tagger->marginal_point = tagger_marginal_point;
506
+ tagger->marginal_path = tagger_marginal_path;
507
+
508
+ /* Set the internal data for the model object. */
509
+ internal->crf1dm = crf1dm;
510
+ internal->attrs = attrs;
511
+ internal->labels = labels;
512
+ internal->tagger = tagger;
513
+
514
+ /* Create an instance of model object. */
515
+ model = (crfsuite_model_t*)calloc(1, sizeof(crfsuite_model_t));
516
+ if (model == NULL) {
517
+ ret = CRFSUITEERR_OUTOFMEMORY;
518
+ goto error_exit;
519
+ }
520
+ model->internal = internal;
521
+ model->nref = 1;
522
+ model->addref = model_addref;
523
+ model->release = model_release;
524
+ model->get_attrs = model_get_attrs;
525
+ model->get_labels = model_get_labels;
526
+ model->get_tagger = model_get_tagger;
527
+ model->dump = model_dump;
528
+
529
+ *ptr_model = model;
530
+ return 0;
531
+
532
+ error_exit:
533
+ free(tagger);
534
+ free(labels);
535
+ free(attrs);
536
+ if (crf1dt != NULL) {
537
+ crf1dt_delete(crf1dt);
538
+ }
539
+ if (crf1dm != NULL) {
540
+ crf1dm_close(crf1dm);
541
+ }
542
+ free(internal);
543
+ free(model);
544
+ return ret;
545
+ }
546
+
547
+ int crf1m_create_instance_from_file(const char *filename, void **ptr)
548
+ {
549
+ return crf1m_model_create(filename, (crfsuite_model_t**)ptr);
550
+ }