opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,236 @@
1
+ /*
2
+ * CRFsuite internal interface.
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifndef __CRFSUITE_INTERNAL_H__
34
+ #define __CRFSUITE_INTERNAL_H__
35
+
36
+ #include <crfsuite.h>
37
+ #include "logging.h"
38
+
39
+ enum {
40
+ FTYPE_NONE = 0, /**< Unselected. */
41
+ FTYPE_CRF1D, /**< 1st-order tyad features. */
42
+ FTYPE_CRF1T, /**< 1st-order triad features. */
43
+ };
44
+
45
+ enum {
46
+ TRAIN_NONE = 0, /**< Unselected. */
47
+ TRAIN_LBFGS, /**< L-BFGS batch training. */
48
+ TRAIN_L2SGD, /**< Pegasos online training. */
49
+ TRAIN_AVERAGED_PERCEPTRON, /**< Averaged perceptron. */
50
+ TRAIN_PASSIVE_AGGRESSIVE,
51
+ TRAIN_AROW,
52
+ };
53
+
54
+ struct tag_crfsuite_train_internal;
55
+ typedef struct tag_crfsuite_train_internal crfsuite_train_internal_t;
56
+
57
+ struct tag_encoder;
58
+ typedef struct tag_encoder encoder_t;
59
+
60
+ typedef struct {
61
+ crfsuite_data_t *data;
62
+ int *perm;
63
+ int num_instances;
64
+ } dataset_t;
65
+
66
+ void dataset_init_trainset(dataset_t *ds, crfsuite_data_t *data, int holdout);
67
+ void dataset_init_testset(dataset_t *ds, crfsuite_data_t *data, int holdout);
68
+ void dataset_finish(dataset_t *ds);
69
+ void dataset_shuffle(dataset_t *ds);
70
+ crfsuite_instance_t *dataset_get(dataset_t *ds, int i);
71
+
72
+ typedef void (*crfsuite_encoder_features_on_path_callback)(void *instance, int fid, floatval_t value);
73
+
74
+ /**
75
+ * Internal data structure for
76
+ */
77
+ struct tag_crfsuite_train_internal {
78
+ encoder_t *gm; /** Interface to the graphical model. */
79
+ crfsuite_params_t *params; /**< Parameter interface. */
80
+ logging_t* lg; /**< Logging interface. */
81
+ int feature_type; /**< Feature type. */
82
+ int algorithm; /**< Training algorithm. */
83
+ };
84
+
85
+ /**
86
+ * Interface for a graphical model.
87
+ */
88
+ struct tag_encoder
89
+ {
90
+ void *internal;
91
+
92
+ const floatval_t *w;
93
+ floatval_t scale;
94
+
95
+ dataset_t *ds;
96
+ const crfsuite_instance_t *inst;
97
+ int level;
98
+
99
+ int num_features;
100
+ int cap_items;
101
+
102
+ /**
103
+ * Exchanges options.
104
+ * @param self The encoder instance.
105
+ * @param params The parameter interface.
106
+ * @param mode The direction of parameter exchange.
107
+ * @return A status code.
108
+ */
109
+ int (*exchange_options)(encoder_t *self, crfsuite_params_t* params, int mode);
110
+
111
+ /**
112
+ * Initializes the encoder with a training data set.
113
+ * @param self The encoder instance.
114
+ * @param ds The data set for training.
115
+ * @param lg The logging interface.
116
+ * @return A status code.
117
+ */
118
+ int (*initialize)(encoder_t *self, dataset_t *ds, logging_t *lg);
119
+
120
+ /**
121
+ * Compute the objective value and gradients for the whole data set.
122
+ * @param self The encoder instance.
123
+ * @param ds The data set.
124
+ * @param w The feature weights.
125
+ * @param f The pointer to a floatval_t variable to which the
126
+ * objective value is stored by this function.
127
+ * @param g The pointer to the array that receives gradients.
128
+ * @return A status code.
129
+ */
130
+ int (*objective_and_gradients_batch)(encoder_t *self, dataset_t *ds, const floatval_t *w, floatval_t *f, floatval_t *g);
131
+
132
+ int (*features_on_path)(encoder_t *self, const crfsuite_instance_t *inst, const int *path, crfsuite_encoder_features_on_path_callback func, void *instance);
133
+
134
+ /**
135
+ * Sets the feature weights (and their scale factor).
136
+ * @param self The encoder instance.
137
+ * @param w The array of feature weights.
138
+ * @param scale The scale factor that should be applied to the
139
+ * feature weights.
140
+ * @return A status code.
141
+ */
142
+ int (*set_weights)(encoder_t *self, const floatval_t *w, floatval_t scale);
143
+
144
+ /* Instance-wise operations. */
145
+ int (*set_instance)(encoder_t *self, const crfsuite_instance_t *inst);
146
+
147
+ /* Level 0. */
148
+
149
+ /* Level 1 (feature weights). */
150
+ int (*score)(encoder_t *self, const int *path, floatval_t *ptr_score);
151
+ int (*viterbi)(encoder_t *self, int *path, floatval_t *ptr_score);
152
+
153
+ /* Level 2 (forward-backward). */
154
+ int (*partition_factor)(encoder_t *self, floatval_t *ptr_pf);
155
+
156
+ /* Level 3 (marginals). */
157
+ int (*objective_and_gradients)(encoder_t *self, floatval_t *f, floatval_t *g, floatval_t gain);
158
+
159
+ int (*save_model)(encoder_t *self, const char *filename, const floatval_t *w, logging_t *lg);
160
+
161
+ };
162
+
163
+ /**
164
+ * \defgroup crf1d_encode.c
165
+ */
166
+ /** @{ */
167
+
168
+ encoder_t *crf1d_create_encoder();
169
+
170
+ /** @} */
171
+
172
+
173
+ void holdout_evaluation(
174
+ encoder_t *gm,
175
+ dataset_t *testset,
176
+ const floatval_t *w,
177
+ logging_t *lg
178
+ );
179
+
180
+ int crfsuite_train_lbfgs(
181
+ encoder_t *gm,
182
+ dataset_t *trainset,
183
+ dataset_t *testset,
184
+ crfsuite_params_t *params,
185
+ logging_t *lg,
186
+ floatval_t **ptr_w
187
+ );
188
+
189
+ void crfsuite_train_lbfgs_init(crfsuite_params_t* params);
190
+
191
+ void crfsuite_train_averaged_perceptron_init(crfsuite_params_t* params);
192
+
193
+ int crfsuite_train_averaged_perceptron(
194
+ encoder_t *gm,
195
+ dataset_t *trainset,
196
+ dataset_t *testset,
197
+ crfsuite_params_t *params,
198
+ logging_t *lg,
199
+ floatval_t **ptr_w
200
+ );
201
+
202
+ void crfsuite_train_l2sgd_init(crfsuite_params_t* params);
203
+
204
+ int crfsuite_train_l2sgd(
205
+ encoder_t *gm,
206
+ dataset_t *trainset,
207
+ dataset_t *testset,
208
+ crfsuite_params_t *params,
209
+ logging_t *lg,
210
+ floatval_t **ptr_w
211
+ );
212
+
213
+ void crfsuite_train_passive_aggressive_init(crfsuite_params_t* params);
214
+
215
+ int crfsuite_train_passive_aggressive(
216
+ encoder_t *gm,
217
+ dataset_t *trainset,
218
+ dataset_t *testset,
219
+ crfsuite_params_t *params,
220
+ logging_t *lg,
221
+ floatval_t **ptr_w
222
+ );
223
+
224
+ void crfsuite_train_arow_init(crfsuite_params_t* params);
225
+
226
+ int crfsuite_train_arow(
227
+ encoder_t *gm,
228
+ dataset_t *trainset,
229
+ dataset_t *testset,
230
+ crfsuite_params_t *params,
231
+ logging_t *lg,
232
+ floatval_t **ptr_w
233
+ );
234
+
235
+
236
+ #endif/*__CRFSUITE_INTERNAL_H__*/
@@ -0,0 +1,272 @@
1
+ /*
2
+ * Implementation of the training interface (crfsuite_trainer_t).
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifdef HAVE_CONFIG_H
34
+ #include <config.h>
35
+ #endif/*HAVE_CONFIG_H*/
36
+
37
+ #include <os.h>
38
+
39
+ #include <stdlib.h>
40
+ #include <string.h>
41
+
42
+ #include <crfsuite.h>
43
+ #include "crfsuite_internal.h"
44
+ #include "params.h"
45
+ #include "logging.h"
46
+ #include "crf1d.h"
47
+
48
+ static crfsuite_train_internal_t* crfsuite_train_new(int ftype, int algorithm)
49
+ {
50
+ crfsuite_train_internal_t *tr = (crfsuite_train_internal_t*)calloc(1, sizeof(crfsuite_train_internal_t));
51
+ if (tr != NULL) {
52
+ tr->lg = (logging_t*)calloc(1, sizeof(logging_t));
53
+ tr->params = params_create_instance();
54
+ tr->feature_type = ftype;
55
+ tr->algorithm = algorithm;
56
+
57
+ tr->gm = crf1d_create_encoder();
58
+ tr->gm->exchange_options(tr->gm, tr->params, 0);
59
+
60
+ /* Initialize parameters for the training algorithm. */
61
+ switch (algorithm) {
62
+ case TRAIN_LBFGS:
63
+ crfsuite_train_lbfgs_init(tr->params);
64
+ break;
65
+ case TRAIN_L2SGD:
66
+ crfsuite_train_l2sgd_init(tr->params);
67
+ break;
68
+ case TRAIN_AVERAGED_PERCEPTRON:
69
+ crfsuite_train_averaged_perceptron_init(tr->params);
70
+ break;
71
+ case TRAIN_PASSIVE_AGGRESSIVE:
72
+ crfsuite_train_passive_aggressive_init(tr->params);
73
+ break;
74
+ case TRAIN_AROW:
75
+ crfsuite_train_arow_init(tr->params);
76
+ break;
77
+ }
78
+ }
79
+
80
+ return tr;
81
+ }
82
+
83
+ static void crfsuite_train_delete(crfsuite_trainer_t* self)
84
+ {
85
+ crfsuite_train_internal_t *tr = (crfsuite_train_internal_t*)self->internal;
86
+ if (tr != NULL) {
87
+ if (tr->params != NULL) {
88
+ tr->params->release(tr->params);
89
+ }
90
+ free(tr->lg);
91
+ free(tr);
92
+ }
93
+ }
94
+
95
+ static int crfsuite_train_addref(crfsuite_trainer_t* tr)
96
+ {
97
+ return crfsuite_interlocked_increment(&tr->nref);
98
+ }
99
+
100
+ static int crfsuite_train_release(crfsuite_trainer_t* self)
101
+ {
102
+ int count = crfsuite_interlocked_decrement(&self->nref);
103
+ if (count == 0) {
104
+ crfsuite_train_delete(self);
105
+ }
106
+ return count;
107
+ }
108
+
109
+ static void crfsuite_train_set_message_callback(crfsuite_trainer_t* self, void *instance, crfsuite_logging_callback cbm)
110
+ {
111
+ crfsuite_train_internal_t *tr = (crfsuite_train_internal_t*)self->internal;
112
+ tr->lg->func = cbm;
113
+ tr->lg->instance = instance;
114
+ }
115
+
116
+ static crfsuite_params_t* crfsuite_train_params(crfsuite_trainer_t* self)
117
+ {
118
+ crfsuite_train_internal_t *tr = (crfsuite_train_internal_t*)self->internal;
119
+ crfsuite_params_t* params = tr->params;
120
+ params->addref(params);
121
+ return params;
122
+ }
123
+
124
+ static int crfsuite_train_train(
125
+ crfsuite_trainer_t* self,
126
+ const crfsuite_data_t *data,
127
+ const char *filename,
128
+ int holdout
129
+ )
130
+ {
131
+ char *algorithm = NULL;
132
+ crfsuite_train_internal_t *tr = (crfsuite_train_internal_t*)self->internal;
133
+ logging_t *lg = tr->lg;
134
+ encoder_t *gm = tr->gm;
135
+ floatval_t *w = NULL;
136
+ dataset_t trainset;
137
+ dataset_t testset;
138
+
139
+ /* Prepare the data set(s) for training (and holdout evaluation). */
140
+ dataset_init_trainset(&trainset, (crfsuite_data_t*)data, holdout);
141
+ if (0 <= holdout) {
142
+ dataset_init_testset(&testset, (crfsuite_data_t*)data, holdout);
143
+ logging(lg, "Holdout group: %d\n", holdout+1);
144
+ logging(lg, "\n");
145
+ }
146
+
147
+ /* Set the training set to the CRF, and generate features. */
148
+ gm->exchange_options(gm, tr->params, -1);
149
+ gm->initialize(gm, &trainset, lg);
150
+
151
+ /* Call the training algorithm. */
152
+ switch (tr->algorithm) {
153
+ case TRAIN_LBFGS:
154
+ crfsuite_train_lbfgs(
155
+ gm,
156
+ &trainset,
157
+ (holdout != -1 ? &testset : NULL),
158
+ tr->params,
159
+ lg,
160
+ &w
161
+ );
162
+ break;
163
+ case TRAIN_L2SGD:
164
+ crfsuite_train_l2sgd(
165
+ gm,
166
+ &trainset,
167
+ (holdout != -1 ? &testset : NULL),
168
+ tr->params,
169
+ lg,
170
+ &w
171
+ );
172
+ break;
173
+ case TRAIN_AVERAGED_PERCEPTRON:
174
+ crfsuite_train_averaged_perceptron(
175
+ gm,
176
+ &trainset,
177
+ (holdout != -1 ? &testset : NULL),
178
+ tr->params,
179
+ lg,
180
+ &w
181
+ );
182
+ break;
183
+ case TRAIN_PASSIVE_AGGRESSIVE:
184
+ crfsuite_train_passive_aggressive(
185
+ gm,
186
+ &trainset,
187
+ (holdout != -1 ? &testset : NULL),
188
+ tr->params,
189
+ lg,
190
+ &w
191
+ );
192
+ break;
193
+ case TRAIN_AROW:
194
+ crfsuite_train_arow(
195
+ gm,
196
+ &trainset,
197
+ (holdout != -1 ? &testset : NULL),
198
+ tr->params,
199
+ lg,
200
+ &w
201
+ );
202
+ break;
203
+ }
204
+
205
+ /* Store the model file. */
206
+ if (filename != NULL && *filename != '\0') {
207
+ gm->save_model(gm, filename, w, lg);
208
+ }
209
+
210
+ free(w);
211
+
212
+ return 0;
213
+ }
214
+
215
+ int crf1de_create_instance(const char *interface, void **ptr)
216
+ {
217
+ int ftype = FTYPE_NONE;
218
+ int algorithm = TRAIN_NONE;
219
+
220
+ /* Check if the interface name begins with "train/". */
221
+ if (strncmp(interface, "train/", 6) != 0) {
222
+ return 1;
223
+ }
224
+ interface += 6;
225
+
226
+ /* Obtain the feature type. */
227
+ if (strncmp(interface, "crf1d/", 6) == 0) {
228
+ ftype = FTYPE_CRF1D;
229
+ interface += 6;
230
+ } else {
231
+ return 1;
232
+ }
233
+
234
+ /* Obtain the training algorithm. */
235
+ if (strcmp(interface, "lbfgs") == 0) {
236
+ algorithm = TRAIN_LBFGS;
237
+ } else if (strcmp(interface, "l2sgd") == 0) {
238
+ algorithm = TRAIN_L2SGD;
239
+ } else if (strcmp(interface, "averaged-perceptron") == 0) {
240
+ algorithm = TRAIN_AVERAGED_PERCEPTRON;
241
+ } else if (strcmp(interface, "passive-aggressive") == 0) {
242
+ algorithm = TRAIN_PASSIVE_AGGRESSIVE;
243
+ } else if (strcmp(interface, "arow") == 0) {
244
+ algorithm = TRAIN_AROW;
245
+ } else {
246
+ return 1;
247
+ }
248
+
249
+ /* Create an instance. */
250
+ if (ftype != FTYPE_NONE && algorithm != TRAIN_NONE) {
251
+ crfsuite_trainer_t* trainer = (crfsuite_trainer_t*)calloc(1, sizeof(crfsuite_trainer_t));
252
+ if (trainer != NULL) {
253
+ trainer->internal = crfsuite_train_new(ftype, algorithm);
254
+ if (trainer->internal != NULL) {
255
+ trainer->nref = 1;
256
+ trainer->addref = crfsuite_train_addref;
257
+ trainer->release = crfsuite_train_release;
258
+ trainer->params = crfsuite_train_params;
259
+ trainer->set_message_callback = crfsuite_train_set_message_callback;
260
+ trainer->train = crfsuite_train_train;
261
+
262
+ *ptr = trainer;
263
+ return 0;
264
+ } else {
265
+ free(trainer);
266
+ trainer = NULL;
267
+ }
268
+ }
269
+ }
270
+
271
+ return 1;
272
+ }