opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,943 @@
1
+ /*
2
+ * CRF1d encoder (routines for training).
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifdef HAVE_CONFIG_H
34
+ #include <config.h>
35
+ #endif/*HAVE_CONFIG_H*/
36
+
37
+ #include <os.h>
38
+
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <memory.h>
42
+ #include <time.h>
43
+
44
+ #include <crfsuite.h>
45
+ #include "crfsuite_internal.h"
46
+ #include "crf1d.h"
47
+ #include "params.h"
48
+ #include "logging.h"
49
+
50
+ /**
51
+ * Parameters for feature generation.
52
+ */
53
+ typedef struct {
54
+ floatval_t feature_minfreq; /** The threshold for occurrences of features. */
55
+ int feature_possible_states; /** Dense state features. */
56
+ int feature_possible_transitions; /** Dense transition features. */
57
+ } crf1de_option_t;
58
+
59
+ /**
60
+ * CRF1d internal data.
61
+ */
62
+ typedef struct {
63
+ int num_labels; /**< Number of distinct output labels (L). */
64
+ int num_attributes; /**< Number of distinct attributes (A). */
65
+
66
+ int cap_items; /**< Maximum length of sequences in the data set. */
67
+
68
+ int num_features; /**< Number of distinct features (K). */
69
+ crf1df_feature_t *features; /**< Array of feature descriptors [K]. */
70
+ feature_refs_t* attributes; /**< References to attribute features [A]. */
71
+ feature_refs_t* forward_trans; /**< References to transition features [L]. */
72
+
73
+ crf1d_context_t *ctx; /**< CRF1d context. */
74
+ crf1de_option_t opt; /**< CRF1d options. */
75
+ } crf1de_t;
76
+
77
+ #define FEATURE(crf1de, k) \
78
+ (&(crf1de)->features[(k)])
79
+ #define ATTRIBUTE(crf1de, a) \
80
+ (&(crf1de)->attributes[(a)])
81
+ #define TRANSITION(crf1de, i) \
82
+ (&(crf1de)->forward_trans[(i)])
83
+
84
+
85
+
86
+ static void crf1de_init(crf1de_t *crf1de)
87
+ {
88
+ crf1de->num_labels = 0;
89
+ crf1de->num_attributes = 0;
90
+ crf1de->cap_items = 0;
91
+ crf1de->num_features = 0;
92
+ crf1de->features = NULL;
93
+ crf1de->attributes = NULL;
94
+ crf1de->forward_trans = NULL;
95
+ crf1de->ctx = NULL;
96
+ /* Initialize except for opt. */
97
+ }
98
+
99
+ static void crf1de_finish(crf1de_t *crf1de)
100
+ {
101
+ if (crf1de->ctx != NULL) {
102
+ crf1dc_delete(crf1de->ctx);
103
+ crf1de->ctx = NULL;
104
+ }
105
+ if (crf1de->features != NULL) {
106
+ free(crf1de->features);
107
+ crf1de->features = NULL;
108
+ }
109
+ if (crf1de->attributes != NULL) {
110
+ free(crf1de->attributes);
111
+ crf1de->attributes = NULL;
112
+ }
113
+ if (crf1de->forward_trans != NULL) {
114
+ free(crf1de->forward_trans);
115
+ crf1de->forward_trans = NULL;
116
+ }
117
+ }
118
+
119
+ static void crf1de_state_score(
120
+ crf1de_t *crf1de,
121
+ const crfsuite_instance_t* inst,
122
+ const floatval_t* w
123
+ )
124
+ {
125
+ int i, t, r;
126
+ crf1d_context_t* ctx = crf1de->ctx;
127
+ const int T = inst->num_items;
128
+ const int L = crf1de->num_labels;
129
+
130
+ /* Loop over the items in the sequence. */
131
+ for (t = 0;t < T;++t) {
132
+ const crfsuite_item_t *item = &inst->items[t];
133
+ floatval_t *state = STATE_SCORE(ctx, t);
134
+
135
+ /* Loop over the contents (attributes) attached to the item. */
136
+ for (i = 0;i < item->num_contents;++i) {
137
+ /* Access the list of state features associated with the attribute. */
138
+ int a = item->contents[i].aid;
139
+ const feature_refs_t *attr = ATTRIBUTE(crf1de, a);
140
+ floatval_t value = item->contents[i].value;
141
+
142
+ /* Loop over the state features associated with the attribute. */
143
+ for (r = 0;r < attr->num_features;++r) {
144
+ /* State feature associates the attribute #a with the label #(f->dst). */
145
+ int fid = attr->fids[r];
146
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
147
+ state[f->dst] += w[fid] * value;
148
+ }
149
+ }
150
+ }
151
+ }
152
+
153
+ static void
154
+ crf1de_state_score_scaled(
155
+ crf1de_t* crf1de,
156
+ const crfsuite_instance_t* inst,
157
+ const floatval_t* w,
158
+ const floatval_t scale
159
+ )
160
+ {
161
+ int i, t, r;
162
+ crf1d_context_t* ctx = crf1de->ctx;
163
+ const int T = inst->num_items;
164
+ const int L = crf1de->num_labels;
165
+
166
+ /* Forward to the non-scaling version for fast computation when scale == 1. */
167
+ if (scale == 1.) {
168
+ crf1de_state_score(crf1de, inst, w);
169
+ return;
170
+ }
171
+
172
+ /* Loop over the items in the sequence. */
173
+ for (t = 0;t < T;++t) {
174
+ const crfsuite_item_t *item = &inst->items[t];
175
+ floatval_t *state = STATE_SCORE(ctx, t);
176
+
177
+ /* Loop over the contents (attributes) attached to the item. */
178
+ for (i = 0;i < item->num_contents;++i) {
179
+ /* Access the list of state features associated with the attribute. */
180
+ int a = item->contents[i].aid;
181
+ const feature_refs_t *attr = ATTRIBUTE(crf1de, a);
182
+ floatval_t value = item->contents[i].value * scale;
183
+
184
+ /* Loop over the state features associated with the attribute. */
185
+ for (r = 0;r < attr->num_features;++r) {
186
+ /* State feature associates the attribute #a with the label #(f->dst). */
187
+ int fid = attr->fids[r];
188
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
189
+ state[f->dst] += w[fid] * value;
190
+ }
191
+ }
192
+ }
193
+ }
194
+
195
+ static void
196
+ crf1de_transition_score(
197
+ crf1de_t* crf1de,
198
+ const floatval_t* w
199
+ )
200
+ {
201
+ int i, r;
202
+ crf1d_context_t* ctx = crf1de->ctx;
203
+ const int L = crf1de->num_labels;
204
+
205
+ /* Compute transition scores between two labels. */
206
+ for (i = 0;i < L;++i) {
207
+ floatval_t *trans = TRANS_SCORE(ctx, i);
208
+ const feature_refs_t *edge = TRANSITION(crf1de, i);
209
+ for (r = 0;r < edge->num_features;++r) {
210
+ /* Transition feature from #i to #(f->dst). */
211
+ int fid = edge->fids[r];
212
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
213
+ trans[f->dst] = w[fid];
214
+ }
215
+ }
216
+ }
217
+
218
+ static void
219
+ crf1de_transition_score_scaled(
220
+ crf1de_t* crf1de,
221
+ const floatval_t* w,
222
+ const floatval_t scale
223
+ )
224
+ {
225
+ int i, r;
226
+ crf1d_context_t* ctx = crf1de->ctx;
227
+ const int L = crf1de->num_labels;
228
+
229
+ /* Forward to the non-scaling version for fast computation when scale == 1. */
230
+ if (scale == 1.) {
231
+ crf1de_transition_score(crf1de, w);
232
+ return;
233
+ }
234
+
235
+ /* Compute transition scores between two labels. */
236
+ for (i = 0;i < L;++i) {
237
+ floatval_t *trans = TRANS_SCORE(ctx, i);
238
+ const feature_refs_t *edge = TRANSITION(crf1de, i);
239
+ for (r = 0;r < edge->num_features;++r) {
240
+ /* Transition feature from #i to #(f->dst). */
241
+ int fid = edge->fids[r];
242
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
243
+ trans[f->dst] = w[fid] * scale;
244
+ }
245
+ }
246
+ }
247
+
248
+ static void
249
+ crf1de_features_on_path(
250
+ crf1de_t *crf1de,
251
+ const crfsuite_instance_t *inst,
252
+ const int *labels,
253
+ crfsuite_encoder_features_on_path_callback func,
254
+ void *instance
255
+ )
256
+ {
257
+ int c, i = -1, t, r;
258
+ crf1d_context_t* ctx = crf1de->ctx;
259
+ const int T = inst->num_items;
260
+ const int L = crf1de->num_labels;
261
+
262
+ /* Loop over the items in the sequence. */
263
+ for (t = 0;t < T;++t) {
264
+ const crfsuite_item_t *item = &inst->items[t];
265
+ const int j = labels[t];
266
+
267
+ /* Loop over the contents (attributes) attached to the item. */
268
+ for (c = 0;c < item->num_contents;++c) {
269
+ /* Access the list of state features associated with the attribute. */
270
+ int a = item->contents[c].aid;
271
+ const feature_refs_t *attr = ATTRIBUTE(crf1de, a);
272
+ floatval_t value = item->contents[c].value;
273
+
274
+ /* Loop over the state features associated with the attribute. */
275
+ for (r = 0;r < attr->num_features;++r) {
276
+ /* State feature associates the attribute #a with the label #(f->dst). */
277
+ int fid = attr->fids[r];
278
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
279
+ if (f->dst == j) {
280
+ func(instance, fid, value);
281
+ }
282
+ }
283
+ }
284
+
285
+ if (i != -1) {
286
+ const feature_refs_t *edge = TRANSITION(crf1de, i);
287
+ for (r = 0;r < edge->num_features;++r) {
288
+ /* Transition feature from #i to #(f->dst). */
289
+ int fid = edge->fids[r];
290
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
291
+ if (f->dst == j) {
292
+ func(instance, fid, 1.);
293
+ }
294
+ }
295
+ }
296
+
297
+ i = j;
298
+ }
299
+ }
300
+
301
+ static void
302
+ crf1de_observation_expectation(
303
+ crf1de_t* crf1de,
304
+ const crfsuite_instance_t* inst,
305
+ const int *labels,
306
+ floatval_t *w,
307
+ const floatval_t scale
308
+ )
309
+ {
310
+ int c, i = -1, t, r;
311
+ crf1d_context_t* ctx = crf1de->ctx;
312
+ const int T = inst->num_items;
313
+ const int L = crf1de->num_labels;
314
+
315
+ /* Loop over the items in the sequence. */
316
+ for (t = 0;t < T;++t) {
317
+ const crfsuite_item_t *item = &inst->items[t];
318
+ const int j = labels[t];
319
+
320
+ /* Loop over the contents (attributes) attached to the item. */
321
+ for (c = 0;c < item->num_contents;++c) {
322
+ /* Access the list of state features associated with the attribute. */
323
+ int a = item->contents[c].aid;
324
+ const feature_refs_t *attr = ATTRIBUTE(crf1de, a);
325
+ floatval_t value = item->contents[c].value;
326
+
327
+ /* Loop over the state features associated with the attribute. */
328
+ for (r = 0;r < attr->num_features;++r) {
329
+ /* State feature associates the attribute #a with the label #(f->dst). */
330
+ int fid = attr->fids[r];
331
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
332
+ if (f->dst == j) {
333
+ w[fid] += value * scale;
334
+ }
335
+ }
336
+ }
337
+
338
+ if (i != -1) {
339
+ const feature_refs_t *edge = TRANSITION(crf1de, i);
340
+ for (r = 0;r < edge->num_features;++r) {
341
+ /* Transition feature from #i to #(f->dst). */
342
+ int fid = edge->fids[r];
343
+ const crf1df_feature_t *f = FEATURE(crf1de, fid);
344
+ if (f->dst == j) {
345
+ w[fid] += scale;
346
+ }
347
+ }
348
+ }
349
+
350
+ i = j;
351
+ }
352
+ }
353
+
354
+ static void
355
+ crf1de_model_expectation(
356
+ crf1de_t *crf1de,
357
+ const crfsuite_instance_t *inst,
358
+ floatval_t *w,
359
+ const floatval_t scale
360
+ )
361
+ {
362
+ int a, c, i, t, r;
363
+ crf1d_context_t* ctx = crf1de->ctx;
364
+ const feature_refs_t *attr = NULL, *trans = NULL;
365
+ const crfsuite_item_t* item = NULL;
366
+ const int T = inst->num_items;
367
+ const int L = crf1de->num_labels;
368
+
369
+ for (t = 0;t < T;++t) {
370
+ floatval_t *prob = STATE_MEXP(ctx, t);
371
+
372
+ /* Compute expectations for state features at position #t. */
373
+ item = &inst->items[t];
374
+ for (c = 0;c < item->num_contents;++c) {
375
+ /* Access the attribute. */
376
+ floatval_t value = item->contents[c].value;
377
+ a = item->contents[c].aid;
378
+ attr = ATTRIBUTE(crf1de, a);
379
+
380
+ /* Loop over state features for the attribute. */
381
+ for (r = 0;r < attr->num_features;++r) {
382
+ int fid = attr->fids[r];
383
+ crf1df_feature_t *f = FEATURE(crf1de, fid);
384
+ w[fid] += prob[f->dst] * value * scale;
385
+ }
386
+ }
387
+ }
388
+
389
+ /* Loop over the labels (t, i) */
390
+ for (i = 0;i < L;++i) {
391
+ const floatval_t *prob = TRANS_MEXP(ctx, i);
392
+ const feature_refs_t *edge = TRANSITION(crf1de, i);
393
+ for (r = 0;r < edge->num_features;++r) {
394
+ /* Transition feature from #i to #(f->dst). */
395
+ int fid = edge->fids[r];
396
+ crf1df_feature_t *f = FEATURE(crf1de, fid);
397
+ w[fid] += prob[f->dst] * scale;
398
+ }
399
+ }
400
+ }
401
+
402
+ static int
403
+ crf1de_set_data(
404
+ crf1de_t *crf1de,
405
+ dataset_t *ds,
406
+ int num_labels,
407
+ int num_attributes,
408
+ logging_t *lg
409
+ )
410
+ {
411
+ int i, ret = 0;
412
+ clock_t begin = 0;
413
+ int T = 0;
414
+ const int L = num_labels;
415
+ const int A = num_attributes;
416
+ const int N = ds->num_instances;
417
+ crf1de_option_t *opt = &crf1de->opt;
418
+
419
+ /* Initialize the member variables. */
420
+ crf1de_init(crf1de);
421
+ crf1de->num_attributes = A;
422
+ crf1de->num_labels = L;
423
+
424
+ /* Find the maximum length of items in the data set. */
425
+ for (i = 0;i < N;++i) {
426
+ const crfsuite_instance_t *inst = dataset_get(ds, i);
427
+ if (T < inst->num_items) {
428
+ T = inst->num_items;
429
+ }
430
+ }
431
+
432
+ /* Construct a CRF context. */
433
+ crf1de->ctx = crf1dc_new(CTXF_MARGINALS | CTXF_VITERBI, L, T);
434
+ if (crf1de->ctx == NULL) {
435
+ ret = CRFSUITEERR_OUTOFMEMORY;
436
+ goto error_exit;
437
+ }
438
+
439
+ /* Feature generation. */
440
+ logging(lg, "Feature generation\n");
441
+ logging(lg, "type: CRF1d\n");
442
+ logging(lg, "feature.minfreq: %f\n", opt->feature_minfreq);
443
+ logging(lg, "feature.possible_states: %d\n", opt->feature_possible_states);
444
+ logging(lg, "feature.possible_transitions: %d\n", opt->feature_possible_transitions);
445
+ begin = clock();
446
+ crf1de->features = crf1df_generate(
447
+ &crf1de->num_features,
448
+ ds,
449
+ L,
450
+ A,
451
+ opt->feature_possible_states ? 1 : 0,
452
+ opt->feature_possible_transitions ? 1 : 0,
453
+ opt->feature_minfreq,
454
+ lg->func,
455
+ lg->instance
456
+ );
457
+ if (crf1de->features == NULL) {
458
+ ret = CRFSUITEERR_OUTOFMEMORY;
459
+ goto error_exit;
460
+ }
461
+ logging(lg, "Number of features: %d\n", crf1de->num_features);
462
+ logging(lg, "Seconds required: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
463
+ logging(lg, "\n");
464
+
465
+ /* Initialize the feature references. */
466
+ crf1df_init_references(
467
+ &crf1de->attributes,
468
+ &crf1de->forward_trans,
469
+ crf1de->features,
470
+ crf1de->num_features,
471
+ A,
472
+ L);
473
+ if (crf1de->attributes == NULL || crf1de->forward_trans == NULL) {
474
+ ret = CRFSUITEERR_OUTOFMEMORY;
475
+ goto error_exit;
476
+ }
477
+
478
+ return ret;
479
+
480
+ error_exit:
481
+ crf1de_finish(crf1de);
482
+ return ret;
483
+ }
484
+
485
+ static int
486
+ crf1de_save_model(
487
+ crf1de_t *crf1de,
488
+ const char *filename,
489
+ const floatval_t *w,
490
+ crfsuite_dictionary_t *attrs,
491
+ crfsuite_dictionary_t *labels,
492
+ logging_t *lg
493
+ )
494
+ {
495
+ int a, k, l, ret;
496
+ clock_t begin;
497
+ int *fmap = NULL, *amap = NULL;
498
+ crf1dmw_t* writer = NULL;
499
+ const feature_refs_t *edge = NULL, *attr = NULL;
500
+ const floatval_t threshold = 0.01;
501
+ const int L = crf1de->num_labels;
502
+ const int A = crf1de->num_attributes;
503
+ const int K = crf1de->num_features;
504
+ int J = 0, B = 0;
505
+
506
+ /* Start storing the model. */
507
+ logging(lg, "Storing the model\n");
508
+ begin = clock();
509
+
510
+ /* Allocate and initialize the feature mapping. */
511
+ fmap = (int*)calloc(K, sizeof(int));
512
+ if (fmap == NULL) {
513
+ goto error_exit;
514
+ }
515
+ #ifdef CRF_TRAIN_SAVE_NO_PRUNING
516
+ for (k = 0;k < K;++k) fmap[k] = k;
517
+ J = K;
518
+ #else
519
+ for (k = 0;k < K;++k) fmap[k] = -1;
520
+ #endif/*CRF_TRAIN_SAVE_NO_PRUNING*/
521
+
522
+ /* Allocate and initialize the attribute mapping. */
523
+ amap = (int*)calloc(A, sizeof(int));
524
+ if (amap == NULL) {
525
+ goto error_exit;
526
+ }
527
+ #ifdef CRF_TRAIN_SAVE_NO_PRUNING
528
+ for (a = 0;a < A;++a) amap[a] = a;
529
+ B = A;
530
+ #else
531
+ for (a = 0;a < A;++a) amap[a] = -1;
532
+ #endif/*CRF_TRAIN_SAVE_NO_PRUNING*/
533
+
534
+ /*
535
+ * Open a model writer.
536
+ */
537
+ writer = crf1mmw(filename);
538
+ if (writer == NULL) {
539
+ goto error_exit;
540
+ }
541
+
542
+ /* Open a feature chunk in the model file. */
543
+ if (ret = crf1dmw_open_features(writer)) {
544
+ goto error_exit;
545
+ }
546
+
547
+ /*
548
+ * Write the feature values.
549
+ * (with determining active features and attributes).
550
+ */
551
+ for (k = 0;k < K;++k) {
552
+ crf1df_feature_t* f = &crf1de->features[k];
553
+ if (w[k] != 0) {
554
+ int src;
555
+ crf1dm_feature_t feat;
556
+
557
+ #ifndef CRF_TRAIN_SAVE_NO_PRUNING
558
+ /* The feature (#k) will have a new feature id (#J). */
559
+ fmap[k] = J++; /* Feature #k -> #fmap[k]. */
560
+
561
+ /* Map the source of the field. */
562
+ if (f->type == FT_STATE) {
563
+ /* The attribute #(f->src) will have a new attribute id (#B). */
564
+ if (amap[f->src] < 0) amap[f->src] = B++; /* Attribute #a -> #amap[a]. */
565
+ src = amap[f->src];
566
+ } else {
567
+ src = f->src;
568
+ }
569
+ #endif/*CRF_TRAIN_SAVE_NO_PRUNING*/
570
+
571
+ feat.type = f->type;
572
+ feat.src = src;
573
+ feat.dst = f->dst;
574
+ feat.weight = w[k];
575
+
576
+ /* Write the feature. */
577
+ if (ret = crf1dmw_put_feature(writer, fmap[k], &feat)) {
578
+ goto error_exit;
579
+ }
580
+ }
581
+ }
582
+
583
+ /* Close the feature chunk. */
584
+ if (ret = crf1dmw_close_features(writer)) {
585
+ goto error_exit;
586
+ }
587
+
588
+ logging(lg, "Number of active features: %d (%d)\n", J, K);
589
+ logging(lg, "Number of active attributes: %d (%d)\n", B, A);
590
+ logging(lg, "Number of active labels: %d (%d)\n", L, L);
591
+
592
+ /* Write labels. */
593
+ logging(lg, "Writing labels\n", L);
594
+ if (ret = crf1dmw_open_labels(writer, L)) {
595
+ goto error_exit;
596
+ }
597
+ for (l = 0;l < L;++l) {
598
+ const char *str = NULL;
599
+ labels->to_string(labels, l, &str);
600
+ if (str != NULL) {
601
+ if (ret = crf1dmw_put_label(writer, l, str)) {
602
+ goto error_exit;
603
+ }
604
+ labels->free(labels, str);
605
+ }
606
+ }
607
+ if (ret = crf1dmw_close_labels(writer)) {
608
+ goto error_exit;
609
+ }
610
+
611
+ /* Write attributes. */
612
+ logging(lg, "Writing attributes\n");
613
+ if (ret = crf1dmw_open_attrs(writer, B)) {
614
+ goto error_exit;
615
+ }
616
+ for (a = 0;a < A;++a) {
617
+ if (0 <= amap[a]) {
618
+ const char *str = NULL;
619
+ attrs->to_string(attrs, a, &str);
620
+ if (str != NULL) {
621
+ if (ret = crf1dmw_put_attr(writer, amap[a], str)) {
622
+ goto error_exit;
623
+ }
624
+ attrs->free(attrs, str);
625
+ }
626
+ }
627
+ }
628
+ if (ret = crf1dmw_close_attrs(writer)) {
629
+ goto error_exit;
630
+ }
631
+
632
+ /* Write label feature references. */
633
+ logging(lg, "Writing feature references for transitions\n");
634
+ if (ret = crf1dmw_open_labelrefs(writer, L+2)) {
635
+ goto error_exit;
636
+ }
637
+ for (l = 0;l < L;++l) {
638
+ edge = TRANSITION(crf1de, l);
639
+ if (ret = crf1dmw_put_labelref(writer, l, edge, fmap)) {
640
+ goto error_exit;
641
+ }
642
+ }
643
+ if (ret = crf1dmw_close_labelrefs(writer)) {
644
+ goto error_exit;
645
+ }
646
+
647
+ /* Write attribute feature references. */
648
+ logging(lg, "Writing feature references for attributes\n");
649
+ if (ret = crf1dmw_open_attrrefs(writer, B)) {
650
+ goto error_exit;
651
+ }
652
+ for (a = 0;a < A;++a) {
653
+ if (0 <= amap[a]) {
654
+ attr = ATTRIBUTE(crf1de, a);
655
+ if (ret = crf1dmw_put_attrref(writer, amap[a], attr, fmap)) {
656
+ goto error_exit;
657
+ }
658
+ }
659
+ }
660
+ if (ret = crf1dmw_close_attrrefs(writer)) {
661
+ goto error_exit;
662
+ }
663
+
664
+ /* Close the writer. */
665
+ crf1dmw_close(writer);
666
+ logging(lg, "Seconds required: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
667
+ logging(lg, "\n");
668
+
669
+ free(amap);
670
+ free(fmap);
671
+ return 0;
672
+
673
+ error_exit:
674
+ if (writer != NULL) {
675
+ crf1dmw_close(writer);
676
+ }
677
+ if (amap != NULL) {
678
+ free(amap);
679
+ }
680
+ if (fmap != NULL) {
681
+ free(fmap);
682
+ }
683
+ return ret;
684
+ }
685
+
686
+ static int crf1de_exchange_options(crfsuite_params_t* params, crf1de_option_t* opt, int mode)
687
+ {
688
+ BEGIN_PARAM_MAP(params, mode)
689
+ DDX_PARAM_FLOAT(
690
+ "feature.minfreq", opt->feature_minfreq, 0.0,
691
+ "The minimum frequency of features."
692
+ )
693
+ DDX_PARAM_INT(
694
+ "feature.possible_states", opt->feature_possible_states, 0,
695
+ "Force to generate possible state features."
696
+ )
697
+ DDX_PARAM_INT(
698
+ "feature.possible_transitions", opt->feature_possible_transitions, 0,
699
+ "Force to generate possible transition features."
700
+ )
701
+ END_PARAM_MAP()
702
+
703
+ return 0;
704
+ }
705
+
706
+
707
+
708
+ /*
709
+ * Implementation of encoder_t object.
710
+ */
711
+
712
+ enum {
713
+ /** No precomputation. */
714
+ LEVEL_NONE = 0,
715
+ /** Feature weights are set. */
716
+ LEVEL_WEIGHT,
717
+ /** Instance is set. */
718
+ LEVEL_INSTANCE,
719
+ /** Performed the forward-backward algorithm. */
720
+ LEVEL_ALPHABETA,
721
+ /** Computed marginal probabilities. */
722
+ LEVEL_MARGINAL,
723
+ };
724
+
725
+ static void set_level(encoder_t *self, int level)
726
+ {
727
+ int prev = self->level;
728
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
729
+
730
+ /*
731
+ Each training algorithm has a different requirement for processing a
732
+ training instance. For example, the perceptron algorithm need compute
733
+ Viterbi paths whereas gradient-based algorithms (e.g., SGD) need
734
+ marginal probabilities computed by the forward-backward algorithm.
735
+ */
736
+
737
+ /* LEVEL_WEIGHT: set transition scores. */
738
+ if (LEVEL_WEIGHT <= level && prev < LEVEL_WEIGHT) {
739
+ crf1dc_reset(crf1de->ctx, RF_TRANS);
740
+ crf1de_transition_score_scaled(crf1de, self->w, self->scale);
741
+ }
742
+
743
+ /* LEVEL_INSTANCE: set state scores. */
744
+ if (LEVEL_INSTANCE <= level && prev < LEVEL_INSTANCE) {
745
+ crf1dc_set_num_items(crf1de->ctx, self->inst->num_items);
746
+ crf1dc_reset(crf1de->ctx, RF_STATE);
747
+ crf1de_state_score_scaled(crf1de, self->inst, self->w, self->scale);
748
+ }
749
+
750
+ /* LEVEL_ALPHABETA: perform the forward-backward algorithm. */
751
+ if (LEVEL_ALPHABETA <= level && prev < LEVEL_ALPHABETA) {
752
+ crf1dc_exp_transition(crf1de->ctx);
753
+ crf1dc_exp_state(crf1de->ctx);
754
+ crf1dc_alpha_score(crf1de->ctx);
755
+ crf1dc_beta_score(crf1de->ctx);
756
+ }
757
+
758
+ /* LEVEL_MARGINAL: compute the marginal probability. */
759
+ if (LEVEL_MARGINAL <= level && prev < LEVEL_MARGINAL) {
760
+ crf1dc_marginals(crf1de->ctx);
761
+ }
762
+
763
+ self->level = level;
764
+ }
765
+
766
+ static int encoder_exchange_options(encoder_t *self, crfsuite_params_t* params, int mode)
767
+ {
768
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
769
+ return crf1de_exchange_options(params, &crf1de->opt, mode);
770
+ }
771
+
772
+ static int encoder_initialize(encoder_t *self, dataset_t *ds, logging_t *lg)
773
+ {
774
+ int ret;
775
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
776
+
777
+ ret = crf1de_set_data(
778
+ crf1de,
779
+ ds,
780
+ ds->data->labels->num(ds->data->labels),
781
+ ds->data->attrs->num(ds->data->attrs),
782
+ lg);
783
+ self->ds = ds;
784
+ self->num_features = crf1de->num_features;
785
+ self->cap_items = crf1de->ctx->cap_items;
786
+ return ret;
787
+ }
788
+
789
+ /* LEVEL_NONE -> LEVEL_NONE. */
790
+ static int encoder_objective_and_gradients_batch(encoder_t *self, dataset_t *ds, const floatval_t *w, floatval_t *f, floatval_t *g)
791
+ {
792
+ int i;
793
+ floatval_t logp = 0, logl = 0;
794
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
795
+ const int N = ds->num_instances;
796
+ const int K = crf1de->num_features;
797
+
798
+ /*
799
+ Initialize the gradients with observation expectations.
800
+ */
801
+ for (i = 0;i < K;++i) {
802
+ crf1df_feature_t* f = &crf1de->features[i];
803
+ g[i] = -f->freq;
804
+ }
805
+
806
+ /*
807
+ Set the scores (weights) of transition features here because
808
+ these are independent of input label sequences.
809
+ */
810
+ crf1dc_reset(crf1de->ctx, RF_TRANS);
811
+ crf1de_transition_score(crf1de, w);
812
+ crf1dc_exp_transition(crf1de->ctx);
813
+
814
+ /*
815
+ Compute model expectations.
816
+ */
817
+ for (i = 0;i < N;++i) {
818
+ const crfsuite_instance_t *seq = dataset_get(ds, i);
819
+
820
+ /* Set label sequences and state scores. */
821
+ crf1dc_set_num_items(crf1de->ctx, seq->num_items);
822
+ crf1dc_reset(crf1de->ctx, RF_STATE);
823
+ crf1de_state_score(crf1de, seq, w);
824
+ crf1dc_exp_state(crf1de->ctx);
825
+
826
+ /* Compute forward/backward scores. */
827
+ crf1dc_alpha_score(crf1de->ctx);
828
+ crf1dc_beta_score(crf1de->ctx);
829
+ crf1dc_marginals(crf1de->ctx);
830
+
831
+ /* Compute the probability of the input sequence on the model. */
832
+ logp = crf1dc_score(crf1de->ctx, seq->labels) - crf1dc_lognorm(crf1de->ctx);
833
+ /* Update the log-likelihood. */
834
+ logl += logp;
835
+
836
+ /* Update the model expectations of features. */
837
+ crf1de_model_expectation(crf1de, seq, g, 1.);
838
+ }
839
+
840
+ *f = -logl;
841
+ return 0;
842
+ }
843
+
844
+ /* LEVEL_NONE -> LEVEL_NONE. */
845
+ static int encoder_features_on_path(encoder_t *self, const crfsuite_instance_t *inst, const int *path, crfsuite_encoder_features_on_path_callback func, void *instance)
846
+ {
847
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
848
+ crf1de_features_on_path(crf1de, inst, path, func, instance);
849
+ return 0;
850
+ }
851
+
852
+ /* LEVEL_NONE -> LEVEL_NONE. */
853
+ static int encoder_save_model(encoder_t *self, const char *filename, const floatval_t *w, logging_t *lg)
854
+ {
855
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
856
+ return crf1de_save_model(crf1de, filename, w, self->ds->data->attrs, self->ds->data->labels, lg);
857
+ }
858
+
859
+ /* LEVEL_NONE -> LEVEL_WEIGHT. */
860
+ static int encoder_set_weights(encoder_t *self, const floatval_t *w, floatval_t scale)
861
+ {
862
+ self->w = w;
863
+ self->scale = scale;
864
+ self->level = LEVEL_WEIGHT-1;
865
+ set_level(self, LEVEL_WEIGHT);
866
+ return 0;
867
+ }
868
+
869
+ /* LEVEL_WEIGHT -> LEVEL_INSTANCE. */
870
+ static int encoder_set_instance(encoder_t *self, const crfsuite_instance_t *inst)
871
+ {
872
+ self->inst = inst;
873
+ self->level = LEVEL_INSTANCE-1;
874
+ set_level(self, LEVEL_INSTANCE);
875
+ return 0;
876
+ }
877
+
878
+ /* LEVEL_INSTANCE -> LEVEL_INSTANCE. */
879
+ static int encoder_score(encoder_t *self, const int *path, floatval_t *ptr_score)
880
+ {
881
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
882
+ *ptr_score = crf1dc_score(crf1de->ctx, path);
883
+ return 0;
884
+ }
885
+
886
+ /* LEVEL_INSTANCE -> LEVEL_INSTANCE. */
887
+ static int encoder_viterbi(encoder_t *self, int *path, floatval_t *ptr_score)
888
+ {
889
+ int i;
890
+ floatval_t score;
891
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
892
+ score = crf1dc_viterbi(crf1de->ctx, path);
893
+ if (ptr_score != NULL) {
894
+ *ptr_score = score;
895
+ }
896
+ return 0;
897
+ }
898
+
899
+ /* LEVEL_INSTANCE -> LEVEL_ALPHABETA. */
900
+ static int encoder_partition_factor(encoder_t *self, floatval_t *ptr_pf)
901
+ {
902
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
903
+ set_level(self, LEVEL_ALPHABETA);
904
+ *ptr_pf = crf1dc_lognorm(crf1de->ctx);
905
+ return 0;
906
+ }
907
+
908
+ /* LEVEL_INSTANCE -> LEVEL_MARGINAL. */
909
+ static int encoder_objective_and_gradients(encoder_t *self, floatval_t *f, floatval_t *g, floatval_t gain)
910
+ {
911
+ crf1de_t *crf1de = (crf1de_t*)self->internal;
912
+ set_level(self, LEVEL_MARGINAL);
913
+ crf1de_observation_expectation(crf1de, self->inst, self->inst->labels, g, gain);
914
+ crf1de_model_expectation(crf1de, self->inst, g, -gain);
915
+ *f = -crf1dc_score(crf1de->ctx, self->inst->labels) + crf1dc_lognorm(crf1de->ctx);
916
+ return 0;
917
+ }
918
+
919
+ encoder_t *crf1d_create_encoder()
920
+ {
921
+ encoder_t *self = (encoder_t*)calloc(1, sizeof(encoder_t));
922
+ if (self != NULL) {
923
+ crf1de_t *enc = (crf1de_t*)calloc(1, sizeof(crf1de_t));
924
+ if (enc != NULL) {
925
+ crf1de_init(enc);
926
+
927
+ self->exchange_options = encoder_exchange_options;
928
+ self->initialize = encoder_initialize;
929
+ self->objective_and_gradients_batch = encoder_objective_and_gradients_batch;
930
+ self->save_model = encoder_save_model;
931
+ self->features_on_path = encoder_features_on_path;
932
+ self->set_weights = encoder_set_weights;
933
+ self->set_instance = encoder_set_instance;
934
+ self->score = encoder_score;
935
+ self->viterbi = encoder_viterbi;
936
+ self->partition_factor = encoder_partition_factor;
937
+ self->objective_and_gradients = encoder_objective_and_gradients;
938
+ self->internal = enc;
939
+ }
940
+ }
941
+
942
+ return self;
943
+ }