opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,427 @@
1
+ /*
2
+ * Tag command for CRFsuite frontend.
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #include <os.h>
34
+
35
+ #include <stdio.h>
36
+ #include <stdlib.h>
37
+ #include <string.h>
38
+ #include <time.h>
39
+ #include <math.h>
40
+
41
+ #include <crfsuite.h>
42
+ #include "option.h"
43
+ #include "iwa.h"
44
+
45
+ #define SAFE_RELEASE(obj) if ((obj) != NULL) { (obj)->release(obj); (obj) = NULL; }
46
+
47
+ void show_copyright(FILE *fp);
48
+
49
+ typedef struct {
50
+ char *input;
51
+ char *model;
52
+ int evaluate;
53
+ int probability;
54
+ int marginal;
55
+ int quiet;
56
+ int reference;
57
+ int help;
58
+
59
+ int num_params;
60
+ char **params;
61
+
62
+ FILE *fpi;
63
+ FILE *fpo;
64
+ FILE *fpe;
65
+ } tagger_option_t;
66
+
67
+ static char* mystrdup(const char *src)
68
+ {
69
+ char *dst = (char*)malloc(strlen(src)+1);
70
+ if (dst != NULL) {
71
+ strcpy(dst, src);
72
+ }
73
+ return dst;
74
+ }
75
+
76
+ static void tagger_option_init(tagger_option_t* opt)
77
+ {
78
+ memset(opt, 0, sizeof(*opt));
79
+ opt->fpi = stdin;
80
+ opt->fpo = stdout;
81
+ opt->fpe = stderr;
82
+ opt->model = mystrdup("");
83
+ }
84
+
85
+ static void tagger_option_finish(tagger_option_t* opt)
86
+ {
87
+ int i;
88
+
89
+ free(opt->input);
90
+ free(opt->model);
91
+ for (i = 0;i < opt->num_params;++i) {
92
+ free(opt->params[i]);
93
+ }
94
+ free(opt->params);
95
+ }
96
+
97
+ BEGIN_OPTION_MAP(parse_tagger_options, tagger_option_t)
98
+
99
+ ON_OPTION_WITH_ARG(SHORTOPT('m') || LONGOPT("model"))
100
+ free(opt->model);
101
+ opt->model = mystrdup(arg);
102
+
103
+ ON_OPTION(SHORTOPT('t') || LONGOPT("test"))
104
+ opt->evaluate = 1;
105
+
106
+ ON_OPTION(SHORTOPT('r') || LONGOPT("reference"))
107
+ opt->reference = 1;
108
+
109
+ ON_OPTION(SHORTOPT('p') || LONGOPT("probability"))
110
+ opt->probability = 1;
111
+
112
+ ON_OPTION(SHORTOPT('i') || LONGOPT("marginal"))
113
+ opt->marginal = 1;
114
+
115
+ ON_OPTION(SHORTOPT('q') || LONGOPT("quiet"))
116
+ opt->quiet = 1;
117
+
118
+ ON_OPTION(SHORTOPT('h') || LONGOPT("help"))
119
+ opt->help = 1;
120
+
121
+ ON_OPTION_WITH_ARG(SHORTOPT('p') || LONGOPT("param"))
122
+ opt->params = (char **)realloc(opt->params, sizeof(char*) * (opt->num_params + 1));
123
+ opt->params[opt->num_params] = mystrdup(arg);
124
+ ++opt->num_params;
125
+
126
+ END_OPTION_MAP()
127
+
128
+ static void show_usage(FILE *fp, const char *argv0, const char *command)
129
+ {
130
+ fprintf(fp, "USAGE: %s %s [OPTIONS] [DATA]\n", argv0, command);
131
+ fprintf(fp, "Assign suitable labels to the instances in the data set given by a file (DATA).\n");
132
+ fprintf(fp, "If the argument DATA is omitted or '-', this utility reads a data from STDIN.\n");
133
+ fprintf(fp, "Evaluate the performance of the model on labeled instances (with -t option).\n");
134
+ fprintf(fp, "\n");
135
+ fprintf(fp, "OPTIONS:\n");
136
+ fprintf(fp, " -m, --model=MODEL Read a model from a file (MODEL)\n");
137
+ fprintf(fp, " -t, --test Report the performance of the model on the data\n");
138
+ fprintf(fp, " -r, --reference Output the reference labels in the input data\n");
139
+ fprintf(fp, " -p, --probability Output the probability of the label sequences\n");
140
+ fprintf(fp, " -i, --marginal Output the marginal probabilities of items\n");
141
+ fprintf(fp, " -q, --quiet Suppress tagging results (useful for test mode)\n");
142
+ fprintf(fp, " -h, --help Show the usage of this command and exit\n");
143
+ }
144
+
145
+
146
+
147
+ static void
148
+ output_result(
149
+ FILE *fpo,
150
+ crfsuite_tagger_t *tagger,
151
+ const crfsuite_instance_t *inst,
152
+ int *output,
153
+ crfsuite_dictionary_t *labels,
154
+ floatval_t score,
155
+ const tagger_option_t* opt
156
+ )
157
+ {
158
+ int i;
159
+
160
+ if (opt->probability) {
161
+ floatval_t lognorm;
162
+ tagger->lognorm(tagger, &lognorm);
163
+ fprintf(fpo, "@probability\t%f\n", exp(score - lognorm));
164
+ }
165
+
166
+ for (i = 0;i < inst->num_items;++i) {
167
+ const char *label = NULL;
168
+
169
+ if (opt->reference) {
170
+ labels->to_string(labels, inst->labels[i], &label);
171
+ fprintf(fpo, "%s\t", label);
172
+ labels->free(labels, label);
173
+ }
174
+
175
+ labels->to_string(labels, output[i], &label);
176
+ fprintf(fpo, "%s", label);
177
+ labels->free(labels, label);
178
+
179
+ if (opt->marginal) {
180
+ floatval_t prob;
181
+ tagger->marginal_point(tagger, output[i], i, &prob);
182
+ fprintf(fpo, ":%f", prob);
183
+ }
184
+
185
+ fprintf(fpo, "\n");
186
+ }
187
+ fprintf(fpo, "\n");
188
+ }
189
+
190
+ static void
191
+ output_instance(
192
+ FILE *fpo,
193
+ const crfsuite_instance_t *inst,
194
+ crfsuite_dictionary_t *labels,
195
+ crfsuite_dictionary_t *attrs
196
+ )
197
+ {
198
+ int i, j;
199
+
200
+ for (i = 0;i < inst->num_items;++i) {
201
+ const char *label = NULL;
202
+ labels->to_string(labels, inst->labels[i], &label);
203
+ fprintf(fpo, "%s", label);
204
+ labels->free(labels, label);
205
+
206
+ for (j = 0;j < inst->items[i].num_contents;++j) {
207
+ const char *attr = NULL;
208
+ attrs->to_string(attrs, inst->items[i].contents[j].aid, &attr);
209
+ fprintf(fpo, "\t%s:%f", attr, inst->items[i].contents[j].value);
210
+ attrs->free(attrs, attr);
211
+ }
212
+
213
+ fprintf(fpo, "\n");
214
+ }
215
+ fprintf(fpo, "\n");
216
+ }
217
+
218
+ static int message_callback(void *instance, const char *format, va_list args)
219
+ {
220
+ FILE *fp = (FILE*)instance;
221
+ vfprintf(fp, format, args);
222
+ fflush(fp);
223
+ return 0;
224
+ }
225
+
226
+ static int tag(tagger_option_t* opt, crfsuite_model_t* model)
227
+ {
228
+ int N = 0, L = 0, ret = 0, lid = -1;
229
+ clock_t clk0, clk1;
230
+ crfsuite_instance_t inst;
231
+ crfsuite_item_t item;
232
+ crfsuite_attribute_t cont;
233
+ crfsuite_evaluation_t eval;
234
+ char *comment = NULL;
235
+ iwa_t* iwa = NULL;
236
+ const iwa_token_t* token = NULL;
237
+ crfsuite_tagger_t *tagger = NULL;
238
+ crfsuite_dictionary_t *attrs = NULL, *labels = NULL;
239
+ FILE *fp = NULL, *fpi = opt->fpi, *fpo = opt->fpo, *fpe = opt->fpe;
240
+
241
+ /* Obtain the dictionary interface representing the labels in the model. */
242
+ if (ret = model->get_labels(model, &labels)) {
243
+ goto force_exit;
244
+ }
245
+
246
+ /* Obtain the dictionary interface representing the attributes in the model. */
247
+ if (ret = model->get_attrs(model, &attrs)) {
248
+ goto force_exit;
249
+ }
250
+
251
+ /* Obtain the tagger interface. */
252
+ if (ret = model->get_tagger(model, &tagger)) {
253
+ goto force_exit;
254
+ }
255
+
256
+ /* Initialize the objects for instance and evaluation. */
257
+ L = labels->num(labels);
258
+ crfsuite_instance_init(&inst);
259
+ crfsuite_evaluation_init(&eval, L);
260
+
261
+ /* Open the stream for the input data. */
262
+ fp = (strcmp(opt->input, "-") == 0) ? fpi : fopen(opt->input, "r");
263
+ if (fp == NULL) {
264
+ fprintf(fpe, "ERROR: failed to open the stream for the input data,\n");
265
+ fprintf(fpe, " %s\n", opt->input);
266
+ ret = 1;
267
+ goto force_exit;
268
+ }
269
+
270
+ /* Open a IWA reader. */
271
+ iwa = iwa_reader(fp);
272
+ if (iwa == NULL) {
273
+ fprintf(fpe, "ERROR: Failed to initialize the parser for the input data.\n");
274
+ ret = 1;
275
+ goto force_exit;
276
+ }
277
+
278
+ /* Read the input data and assign labels. */
279
+ clk0 = clock();
280
+ while (token = iwa_read(iwa), token != NULL) {
281
+ switch (token->type) {
282
+ case IWA_BOI:
283
+ /* Initialize an item. */
284
+ lid = -1;
285
+ crfsuite_item_init(&item);
286
+ free(comment);
287
+ comment = NULL;
288
+ break;
289
+ case IWA_EOI:
290
+ /* Append the item to the instance. */
291
+ crfsuite_instance_append(&inst, &item, lid);
292
+ crfsuite_item_finish(&item);
293
+ break;
294
+ case IWA_ITEM:
295
+ if (lid == -1) {
296
+ /* The first field in a line presents a label. */
297
+ lid = labels->to_id(labels, token->attr);
298
+ if (lid < 0) lid = L; /* #L stands for a unknown label. */
299
+ } else {
300
+ /* Fields after the first field present attributes. */
301
+ int aid = attrs->to_id(attrs, token->attr);
302
+ /* Ignore attributes 'unknown' to the model. */
303
+ if (0 <= aid) {
304
+ /* Associate the attribute with the current item. */
305
+ if (token->value && *token->value) {
306
+ crfsuite_attribute_set(&cont, aid, atof(token->value));
307
+ } else {
308
+ crfsuite_attribute_set(&cont, aid, 1.0);
309
+ }
310
+ crfsuite_item_append_attribute(&item, &cont);
311
+ }
312
+ }
313
+ break;
314
+ case IWA_NONE:
315
+ case IWA_EOF:
316
+ if (!crfsuite_instance_empty(&inst)) {
317
+ /* Initialize the object to receive the tagging result. */
318
+ floatval_t score = 0;
319
+ int *output = calloc(sizeof(int), inst.num_items);
320
+
321
+ /* Set the instance to the tagger. */
322
+ if ((ret = tagger->set(tagger, &inst))) {
323
+ goto force_exit;
324
+ }
325
+
326
+ /* Obtain the viterbi label sequence. */
327
+ if ((ret = tagger->viterbi(tagger, output, &score))) {
328
+ goto force_exit;
329
+ }
330
+
331
+ ++N;
332
+
333
+ /* Accumulate the tagging performance. */
334
+ if (opt->evaluate) {
335
+ crfsuite_evaluation_accmulate(&eval, inst.labels, output, inst.num_items);
336
+ }
337
+
338
+ if (!opt->quiet) {
339
+ output_result(fpo, tagger, &inst, output, labels, score, opt);
340
+ }
341
+
342
+ free(output);
343
+ crfsuite_instance_finish(&inst);
344
+ }
345
+ break;
346
+ }
347
+ }
348
+ clk1 = clock();
349
+
350
+ /* Compute the performance if specified. */
351
+ if (opt->evaluate) {
352
+ double sec = (clk1 - clk0) / (double)CLOCKS_PER_SEC;
353
+ crfsuite_evaluation_finalize(&eval);
354
+ crfsuite_evaluation_output(&eval, labels, message_callback, stdout);
355
+ fprintf(fpo, "Elapsed time: %f [sec] (%.1f [instance/sec])\n", sec, N / sec);
356
+ }
357
+
358
+ force_exit:
359
+ /* Close the IWA parser. */
360
+ iwa_delete(iwa);
361
+ iwa = NULL;
362
+
363
+ /* Close the input stream if necessary. */
364
+ if (fp != NULL && fp != fpi) {
365
+ fclose(fp);
366
+ fp = NULL;
367
+ }
368
+
369
+ free(comment);
370
+ crfsuite_instance_finish(&inst);
371
+ crfsuite_evaluation_finish(&eval);
372
+
373
+ SAFE_RELEASE(tagger);
374
+ SAFE_RELEASE(attrs);
375
+ SAFE_RELEASE(labels);
376
+
377
+ return ret;
378
+ }
379
+
380
+ int main_tag(int argc, char *argv[], const char *argv0)
381
+ {
382
+ int ret = 0, arg_used = 0;
383
+ tagger_option_t opt;
384
+ const char *command = argv[0];
385
+ FILE *fp = NULL, *fpi = stdin, *fpo = stdout, *fpe = stderr;
386
+ crfsuite_model_t *model = NULL;
387
+
388
+ /* Parse the command-line option. */
389
+ tagger_option_init(&opt);
390
+ arg_used = option_parse(++argv, --argc, parse_tagger_options, &opt);
391
+ if (arg_used < 0) {
392
+ ret = 1;
393
+ goto force_exit;
394
+ }
395
+
396
+ /* Show the help message for this command if specified. */
397
+ if (opt.help) {
398
+ show_copyright(fpo);
399
+ show_usage(fpo, argv0, command);
400
+ goto force_exit;
401
+ }
402
+
403
+ /* Set an input file. */
404
+ if (arg_used < argc) {
405
+ opt.input = mystrdup(argv[arg_used]);
406
+ } else {
407
+ opt.input = mystrdup("-"); /* STDIN. */
408
+ }
409
+
410
+ /* Read the model. */
411
+ if (opt.model != NULL) {
412
+ /* Create a model instance corresponding to the model file. */
413
+ if (ret = crfsuite_create_instance_from_file(opt.model, (void**)&model)) {
414
+ goto force_exit;
415
+ }
416
+
417
+ /* Tag the input data. */
418
+ if (ret = tag(&opt, model)) {
419
+ goto force_exit;
420
+ }
421
+ }
422
+
423
+ force_exit:
424
+ SAFE_RELEASE(model);
425
+ tagger_option_finish(&opt);
426
+ return ret;
427
+ }
@@ -0,0 +1,15 @@
1
+ #!/bin/bash
2
+
3
+ LIBLBFGS=$HOME/local
4
+ PKG=@PACKAGE@-@VERSION@
5
+ BINDIR=$HOME/build/$PKG
6
+ TARGET=`pwd`/$PKG-`/usr/bin/arch`.tar.gz
7
+
8
+ rm -rf $BINDIR
9
+ ./configure --prefix=$BINDIR --with-liblbfgs=$LIBLBFGS
10
+ make clean
11
+ make LDFLAGS=-all-static
12
+ make install
13
+ cd $BINDIR/..
14
+ tar cvzf $TARGET $PKG
15
+
@@ -0,0 +1,11 @@
1
+ # $Id:$
2
+
3
+ EXTRA_DIST = \
4
+ os.h
5
+
6
+ crfsuiteincludedir = $(includedir)
7
+ crfsuiteinclude_HEADERS = \
8
+ crfsuite.h \
9
+ crfsuite_api.hpp \
10
+ crfsuite.hpp
11
+