opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,555 @@
1
+ /*
2
+ * CRFsuite C++/SWIG API wrapper.
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #ifndef __CRFSUITE_HPP__
32
+ #define __CRFSUITE_HPP__
33
+
34
+ #include <cmath>
35
+ #include <vector>
36
+ #include <string>
37
+ #include <stdexcept>
38
+ #include <iostream>
39
+ #include <sstream>
40
+
41
+ #include <crfsuite.h>
42
+ #include "crfsuite_api.hpp"
43
+
44
+ namespace CRFSuite
45
+ {
46
+
47
+ Trainer::Trainer()
48
+ {
49
+ data = new crfsuite_data_t;
50
+ if (data != NULL) {
51
+ crfsuite_data_init(data);
52
+ }
53
+ tr = NULL;
54
+ }
55
+
56
+ Trainer::~Trainer()
57
+ {
58
+ if (data != NULL) {
59
+ clear();
60
+ delete data;
61
+ data = NULL;
62
+ }
63
+ if (tr != NULL) {
64
+ tr->release(tr);
65
+ tr = NULL;
66
+ }
67
+ }
68
+
69
+ void Trainer::init()
70
+ {
71
+ // Create an instance of attribute dictionary.
72
+ if (data->attrs == NULL) {
73
+ int ret = crfsuite_create_instance("dictionary", (void**)&data->attrs);
74
+ if (!ret) {
75
+ throw std::runtime_error("Failed to create a dictionary instance for attributes.");
76
+ }
77
+ }
78
+
79
+ // Create an instance of label dictionary.
80
+ if (data->labels == NULL) {
81
+ int ret = crfsuite_create_instance("dictionary", (void**)&data->labels);
82
+ if (!ret) {
83
+ throw std::runtime_error("Failed to create a dictionary instance for labels.");
84
+ }
85
+ }
86
+ }
87
+
88
+ void Trainer::clear()
89
+ {
90
+ if (data != NULL) {
91
+ if (data->labels != NULL) {
92
+ data->labels->release(data->labels);
93
+ data->labels = NULL;
94
+ }
95
+
96
+ if (data->attrs != NULL) {
97
+ data->attrs->release(data->attrs);
98
+ data->attrs = NULL;
99
+ }
100
+
101
+ crfsuite_data_finish(data);
102
+ crfsuite_data_init(data);
103
+ }
104
+ }
105
+
106
+ void Trainer::append(const ItemSequence& xseq, const StringList& yseq, int group)
107
+ {
108
+ // Create dictionary objects if necessary.
109
+ if (data->attrs == NULL || data->labels == NULL) {
110
+ init();
111
+ }
112
+
113
+ // Make sure |y| == |x|.
114
+ if (xseq.size() != yseq.size()) {
115
+ std::stringstream ss;
116
+ ss << "The numbers of items and labels differ: |x| = " << xseq.size() << ", |y| = " << yseq.size();
117
+ throw std::invalid_argument(ss.str());
118
+ }
119
+
120
+ // Convert instance_type to crfsuite_instance_t.
121
+ crfsuite_instance_t _inst;
122
+ crfsuite_instance_init_n(&_inst, xseq.size());
123
+ for (size_t t = 0;t < xseq.size();++t) {
124
+ const Item& item = xseq[t];
125
+ crfsuite_item_t* _item = &_inst.items[t];
126
+
127
+ // Set the attributes in the item.
128
+ crfsuite_item_init_n(_item, item.size());
129
+ for (size_t i = 0;i < item.size();++i) {
130
+ _item->contents[i].aid = data->attrs->get(data->attrs, item[i].attr.c_str());
131
+ _item->contents[i].value = (floatval_t)item[i].value;
132
+ }
133
+
134
+ // Set the label of the item.
135
+ _inst.labels[t] = data->labels->get(data->labels, yseq[t].c_str());
136
+ }
137
+ _inst.group = group;
138
+
139
+ // Append the instance to the training set.
140
+ crfsuite_data_append(data, &_inst);
141
+
142
+ // Finish the instance.
143
+ crfsuite_instance_finish(&_inst);
144
+ }
145
+
146
+ bool Trainer::select(const std::string& algorithm, const std::string& type)
147
+ {
148
+ int ret;
149
+
150
+ // Release the trainer if it is already initialized.
151
+ if (tr != NULL) {
152
+ tr->release(tr);
153
+ tr = NULL;
154
+ }
155
+
156
+ // Build the trainer string ID.
157
+ std::string tid = "train/";
158
+ tid += type;
159
+ tid += '/';
160
+ tid += algorithm;
161
+
162
+ // Create an instance of a trainer.
163
+ ret = crfsuite_create_instance(tid.c_str(), (void**)&tr);
164
+ if (!ret) {
165
+ return false;
166
+ }
167
+
168
+ // Set the callback function for receiving messages.
169
+ tr->set_message_callback(tr, this, __logging_callback);
170
+
171
+ return true;
172
+ }
173
+
174
+ int Trainer::train(const std::string& model, int holdout)
175
+ {
176
+ int ret;
177
+
178
+ // Run the training algorithm.
179
+ ret = tr->train(tr, data, model.c_str(), holdout);
180
+
181
+ return ret;
182
+ }
183
+
184
+ StringList Trainer::params()
185
+ {
186
+ StringList pars;
187
+ crfsuite_params_t* params = tr->params(tr);
188
+ int n = params->num(params);
189
+ for (int i = 0;i < n;++i) {
190
+ char *name = NULL;
191
+ params->name(params, i, &name);
192
+ pars.push_back(name);
193
+ params->free(params, name);
194
+ }
195
+ return pars;
196
+ }
197
+
198
+ void Trainer::set(const std::string& name, const std::string& value)
199
+ {
200
+ crfsuite_params_t* params = tr->params(tr);
201
+ if (params->set(params, name.c_str(), value.c_str()) != 0) {
202
+ std::stringstream ss;
203
+ ss << "Parameter not found: " << name << " = " << value;
204
+ params->release(params);
205
+ throw std::invalid_argument(ss.str());
206
+ }
207
+ params->release(params);
208
+ }
209
+
210
+ std::string Trainer::get(const std::string& name)
211
+ {
212
+ std::string value;
213
+ char *_value = NULL;
214
+ crfsuite_params_t* params = tr->params(tr);
215
+ if (params->get(params, name.c_str(), &_value) != 0) {
216
+ std::stringstream ss;
217
+ ss << "Parameter not found: " << name << " = " << value;
218
+ params->release(params);
219
+ throw std::invalid_argument(ss.str());
220
+ }
221
+ value = _value;
222
+ params->free(params, _value);
223
+ params->release(params);
224
+ return value;
225
+ }
226
+
227
+ std::string Trainer::help(const std::string& name)
228
+ {
229
+ std::string str;
230
+ crfsuite_params_t* params = tr->params(tr);
231
+ char *_str = NULL;
232
+ params->help(params, name.c_str(), NULL, &_str);
233
+ str = _str;
234
+ params->free(params, _str);
235
+ params->release(params);
236
+ return str;
237
+ }
238
+
239
+ void Trainer::message(const std::string& msg)
240
+ {
241
+ }
242
+
243
+ int Trainer::__logging_callback(void *instance, const char *format, va_list args)
244
+ {
245
+ char buffer[65536];
246
+ vsnprintf(buffer, sizeof(buffer)-1, format, args);
247
+ reinterpret_cast<Trainer*>(instance)->message(buffer);
248
+ return 0;
249
+ }
250
+
251
+
252
+
253
+ Tagger::Tagger()
254
+ {
255
+ model = NULL;
256
+ tagger = NULL;
257
+ }
258
+
259
+ Tagger::~Tagger()
260
+ {
261
+ this->close();
262
+ }
263
+
264
+ bool Tagger::open(const std::string& name)
265
+ {
266
+ int ret;
267
+
268
+ // Close the model if it is already opened.
269
+ this->close();
270
+
271
+ // Open the model file.
272
+ if ((ret = crfsuite_create_instance_from_file(name.c_str(), (void**)&model))) {
273
+ return false;
274
+ }
275
+
276
+ // Obtain the tagger interface.
277
+ if ((ret = model->get_tagger(model, &tagger))) {
278
+ throw std::runtime_error("Failed to obtain the tagger interface");
279
+ }
280
+
281
+ return true;
282
+ }
283
+
284
+ void Tagger::close()
285
+ {
286
+ if (tagger != NULL) {
287
+ tagger->release(tagger);
288
+ tagger = NULL;
289
+ }
290
+ if (model != NULL) {
291
+ model->release(model);
292
+ model = NULL;
293
+ }
294
+ }
295
+
296
+ StringList Tagger::labels()
297
+ {
298
+ int ret;
299
+ StringList lseq;
300
+ crfsuite_dictionary_t *labels = NULL;
301
+
302
+ if (model == NULL) {
303
+ throw std::invalid_argument("The tagger is not opened");
304
+ }
305
+
306
+ // Obtain the dictionary interface representing the labels in the model.
307
+ if ((ret = model->get_labels(model, &labels))) {
308
+ throw std::runtime_error("Failed to obtain the dictionary interface for labels");
309
+ }
310
+
311
+ // Collect all label strings to lseq.
312
+ for (int i = 0;i < labels->num(labels);++i) {
313
+ const char *label = NULL;
314
+ if (labels->to_string(labels, i, &label) != 0) {
315
+ labels->release(labels);
316
+ throw std::runtime_error("Failed to convert a label identifier to string.");
317
+ }
318
+ lseq.push_back(label);
319
+ labels->free(labels, label);
320
+ }
321
+
322
+ labels->release(labels);
323
+ return lseq;
324
+ }
325
+
326
+ StringList Tagger::tag(const ItemSequence& xseq)
327
+ {
328
+ set(xseq);
329
+ return viterbi();
330
+ }
331
+
332
+ void Tagger::set(const ItemSequence& xseq)
333
+ {
334
+ int ret;
335
+ StringList yseq;
336
+ crfsuite_instance_t _inst;
337
+ crfsuite_dictionary_t *attrs = NULL;
338
+
339
+ if (model == NULL || tagger == NULL) {
340
+ throw std::invalid_argument("The tagger is not opened");
341
+ }
342
+
343
+ // Obtain the dictionary interface representing the attributes in the model.
344
+ if ((ret = model->get_attrs(model, &attrs))) {
345
+ throw std::runtime_error("Failed to obtain the dictionary interface for attributes");
346
+ }
347
+
348
+ // Build an instance.
349
+ crfsuite_instance_init_n(&_inst, xseq.size());
350
+ for (size_t t = 0;t < xseq.size();++t) {
351
+ const Item& item = xseq[t];
352
+ crfsuite_item_t* _item = &_inst.items[t];
353
+
354
+ // Set the attributes in the item.
355
+ crfsuite_item_init(_item);
356
+ for (size_t i = 0;i < item.size();++i) {
357
+ int aid = attrs->to_id(attrs, item[i].attr.c_str());
358
+ if (0 <= aid) {
359
+ crfsuite_attribute_t cont;
360
+ crfsuite_attribute_set(&cont, aid, item[i].value);
361
+ crfsuite_item_append_attribute(_item, &cont);
362
+ }
363
+ }
364
+ }
365
+
366
+ // Set the instance to the tagger.
367
+ if ((ret = tagger->set(tagger, &_inst))) {
368
+ crfsuite_instance_finish(&_inst);
369
+ attrs->release(attrs);
370
+ throw std::runtime_error("Failed to set the instance to the tagger.");
371
+ }
372
+
373
+ crfsuite_instance_finish(&_inst);
374
+ attrs->release(attrs);
375
+ }
376
+
377
+ StringList Tagger::viterbi()
378
+ {
379
+ int ret;
380
+ StringList yseq;
381
+ crfsuite_dictionary_t *labels = NULL;
382
+
383
+ if (model == NULL || tagger == NULL) {
384
+ throw std::invalid_argument("The tagger is not opened");
385
+ }
386
+
387
+ // Make sure that the current instance is not empty.
388
+ const size_t T = (size_t)tagger->length(tagger);
389
+ if (T <= 0) {
390
+ return yseq;
391
+ }
392
+
393
+ // Obtain the dictionary interface representing the labels in the model.
394
+ if ((ret = model->get_labels(model, &labels))) {
395
+ throw std::runtime_error("Failed to obtain the dictionary interface for labels");
396
+ }
397
+
398
+ // Run the Viterbi algorithm.
399
+ floatval_t score;
400
+ int *path = new int[T];
401
+ if ((ret = tagger->viterbi(tagger, path, &score))) {
402
+ delete[] path;
403
+ labels->release(labels);
404
+ throw std::runtime_error("Failed to find the Viterbi path.");
405
+ }
406
+
407
+ // Convert the Viterbi path to a label sequence.
408
+ yseq.resize(T);
409
+ for (size_t t = 0;t < T;++t) {
410
+ const char *label = NULL;
411
+ if (labels->to_string(labels, path[t], &label) != 0) {
412
+ delete[] path;
413
+ labels->release(labels);
414
+ throw std::runtime_error("Failed to convert a label identifier to string.");
415
+ }
416
+ yseq[t] = label;
417
+ labels->free(labels, label);
418
+ }
419
+
420
+ labels->release(labels);
421
+ return yseq;
422
+ }
423
+
424
+ double Tagger::probability(const StringList& yseq)
425
+ {
426
+ int ret;
427
+ size_t T;
428
+ int *path = NULL;
429
+ std::stringstream msg;
430
+ floatval_t score, lognorm;
431
+ crfsuite_dictionary_t *labels = NULL;
432
+
433
+ if (model == NULL || tagger == NULL) {
434
+ msg << "The tagger is not opened";
435
+ throw std::invalid_argument(msg.str());
436
+ }
437
+
438
+ // Make sure that the current instance is not empty.
439
+ T = (size_t)tagger->length(tagger);
440
+ if (T <= 0) {
441
+ return 0.;
442
+ }
443
+
444
+ // Make sure that |y| == |x|.
445
+ if (yseq.size() != T) {
446
+ msg << "The numbers of items and labels differ: |x| = " << T << ", |y| = " << yseq.size();
447
+ throw std::invalid_argument(msg.str());
448
+ }
449
+
450
+ // Obtain the dictionary interface representing the labels in the model.
451
+ if ((ret = model->get_labels(model, &labels))) {
452
+ msg << "Failed to obtain the dictionary interface for labels";
453
+ goto error_exit;
454
+ }
455
+
456
+ // Convert string labels into label IDs.
457
+ path = new int[T];
458
+ for (size_t t = 0;t < T;++t) {
459
+ int l = labels->to_id(labels, yseq[t].c_str());
460
+ if (l < 0) {
461
+ msg << "Failed to convert into label identifier: " << yseq[t];
462
+ goto error_exit;
463
+ }
464
+ path[t] = l;
465
+ }
466
+
467
+ // Compute the score of the path.
468
+ if ((ret = tagger->score(tagger, path, &score))) {
469
+ msg << "Failed to score the label sequence";
470
+ goto error_exit;
471
+ }
472
+
473
+ // Compute the partition factor.
474
+ if ((ret = tagger->lognorm(tagger, &lognorm))) {
475
+ msg << "Failed to compute the partition factor";
476
+ goto error_exit;
477
+ }
478
+
479
+ labels->release(labels);
480
+ delete[] path;
481
+ return std::exp((double)(score - lognorm));
482
+
483
+ error_exit:
484
+ if (labels != NULL) {
485
+ labels->release(labels);
486
+ labels = NULL;
487
+ }
488
+ delete[] path;
489
+ throw std::runtime_error(msg.str());
490
+ }
491
+
492
+ double Tagger::marginal(const std::string& y, const int t)
493
+ {
494
+ int l, ret, T;
495
+ floatval_t prob;
496
+ std::stringstream msg;
497
+ crfsuite_dictionary_t *labels = NULL;
498
+
499
+ if (model == NULL || tagger == NULL) {
500
+ msg << "The tagger is not opened";
501
+ throw std::invalid_argument(msg.str());
502
+ }
503
+
504
+ // Make sure that the current instance is not empty.
505
+ T = tagger->length(tagger);
506
+ if (T <= 0) {
507
+ return 0.;
508
+ }
509
+
510
+ // Make sure that 0 <= t < |x|.
511
+ if (t < 0 || T <= t) {
512
+ msg << "The position, " << t << "is out of range of " << T;
513
+ throw std::invalid_argument(msg.str());
514
+ }
515
+
516
+ // Obtain the dictionary interface representing the labels in the model.
517
+ if ((ret = model->get_labels(model, &labels))) {
518
+ msg << "Failed to obtain the dictionary interface for labels";
519
+ goto error_exit;
520
+ }
521
+
522
+ // Convert string labels into label IDs.
523
+ l = labels->to_id(labels, y.c_str());
524
+ if (l < 0) {
525
+ msg << "Failed to convert into label identifier: " << y;
526
+ goto error_exit;
527
+ }
528
+
529
+ // Compute the score of the path.
530
+ if ((ret = tagger->marginal_point(tagger, l, t, &prob))) {
531
+ msg << "Failed to compute the marginal probability of '" << y << "' at " << t;
532
+ goto error_exit;
533
+ }
534
+
535
+ labels->release(labels);
536
+ return prob;
537
+
538
+ error_exit:
539
+ if (labels != NULL) {
540
+ labels->release(labels);
541
+ labels = NULL;
542
+ }
543
+ throw std::runtime_error(msg.str());
544
+ }
545
+
546
+
547
+ std::string version()
548
+ {
549
+ return CRFSUITE_VERSION;
550
+ }
551
+
552
+ };
553
+
554
+ #endif/*__CRFSUITE_HPP__*/
555
+