opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,338 @@
1
+ /*
2
+ * Batch training with L-BFGS.
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifdef HAVE_CONFIG_H
34
+ #include <config.h>
35
+ #endif/*HAVE_CONFIG_H*/
36
+
37
+ #include <os.h>
38
+
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <string.h>
42
+ #include <limits.h>
43
+ #include <time.h>
44
+
45
+ #include <crfsuite.h>
46
+ #include "crfsuite_internal.h"
47
+
48
+ #include "logging.h"
49
+ #include "params.h"
50
+ #include "vecmath.h"
51
+ #include <lbfgs.h>
52
+
53
+ /**
54
+ * Training parameters (configurable with crfsuite_params_t interface).
55
+ */
56
+ typedef struct {
57
+ floatval_t c1;
58
+ floatval_t c2;
59
+ int memory;
60
+ floatval_t epsilon;
61
+ int stop;
62
+ floatval_t delta;
63
+ int max_iterations;
64
+ char* linesearch;
65
+ int linesearch_max_iterations;
66
+ } training_option_t;
67
+
68
+ /**
69
+ * Internal data structure for the callback function of lbfgs().
70
+ */
71
+ typedef struct {
72
+ encoder_t *gm;
73
+ dataset_t *trainset;
74
+ dataset_t *testset;
75
+ logging_t *lg;
76
+ floatval_t c2;
77
+ floatval_t* best_w;
78
+ clock_t begin;
79
+ } lbfgs_internal_t;
80
+
81
+ static lbfgsfloatval_t lbfgs_evaluate(
82
+ void *instance,
83
+ const lbfgsfloatval_t *x,
84
+ lbfgsfloatval_t *g,
85
+ const int n,
86
+ const lbfgsfloatval_t step
87
+ )
88
+ {
89
+ int i;
90
+ floatval_t f, norm = 0.;
91
+ lbfgs_internal_t *lbfgsi = (lbfgs_internal_t*)instance;
92
+ encoder_t *gm = lbfgsi->gm;
93
+ dataset_t *trainset = lbfgsi->trainset;
94
+
95
+ /* Compute the objective value and gradients. */
96
+ gm->objective_and_gradients_batch(gm, trainset, x, &f, g);
97
+
98
+ /* L2 regularization. */
99
+ if (0 < lbfgsi->c2) {
100
+ const floatval_t c22 = lbfgsi->c2 * 2.;
101
+ for (i = 0;i < n;++i) {
102
+ g[i] += (c22 * x[i]);
103
+ norm += x[i] * x[i];
104
+ }
105
+ f += (lbfgsi->c2 * norm);
106
+ }
107
+
108
+ return f;
109
+ }
110
+
111
+ static int lbfgs_progress(
112
+ void *instance,
113
+ const lbfgsfloatval_t *x,
114
+ const lbfgsfloatval_t *g,
115
+ const lbfgsfloatval_t fx,
116
+ const lbfgsfloatval_t xnorm,
117
+ const lbfgsfloatval_t gnorm,
118
+ const lbfgsfloatval_t step,
119
+ int n,
120
+ int k,
121
+ int ls)
122
+ {
123
+ int i, num_active_features = 0;
124
+ clock_t duration, clk = clock();
125
+ lbfgs_internal_t *lbfgsi = (lbfgs_internal_t*)instance;
126
+ dataset_t *testset = lbfgsi->testset;
127
+ encoder_t *gm = lbfgsi->gm;
128
+ logging_t *lg = lbfgsi->lg;
129
+
130
+ /* Compute the duration required for this iteration. */
131
+ duration = clk - lbfgsi->begin;
132
+ lbfgsi->begin = clk;
133
+
134
+ /* Store the feature weight in case L-BFGS terminates with an error. */
135
+ for (i = 0;i < n;++i) {
136
+ lbfgsi->best_w[i] = x[i];
137
+ if (x[i] != 0.) ++num_active_features;
138
+ }
139
+
140
+ /* Report the progress. */
141
+ logging(lg, "***** Iteration #%d *****\n", k);
142
+ logging(lg, "Loss: %f\n", fx);
143
+ logging(lg, "Feature norm: %f\n", xnorm);
144
+ logging(lg, "Error norm: %f\n", gnorm);
145
+ logging(lg, "Active features: %d\n", num_active_features);
146
+ logging(lg, "Line search trials: %d\n", ls);
147
+ logging(lg, "Line search step: %f\n", step);
148
+ logging(lg, "Seconds required for this iteration: %.3f\n", duration / (double)CLOCKS_PER_SEC);
149
+
150
+ /* Send the tagger with the current parameters. */
151
+ if (testset != NULL) {
152
+ holdout_evaluation(gm, testset, x, lg);
153
+ }
154
+
155
+ logging(lg, "\n");
156
+
157
+ /* Continue. */
158
+ return 0;
159
+ }
160
+
161
+ static int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
162
+ {
163
+ BEGIN_PARAM_MAP(params, mode)
164
+ DDX_PARAM_FLOAT(
165
+ "c1", opt->c1, 0,
166
+ "Coefficient for L1 regularization."
167
+ )
168
+ DDX_PARAM_FLOAT(
169
+ "c2", opt->c2, 1.0,
170
+ "Coefficient for L2 regularization."
171
+ )
172
+ DDX_PARAM_INT(
173
+ "max_iterations", opt->max_iterations, INT_MAX,
174
+ "The maximum number of iterations for L-BFGS optimization."
175
+ )
176
+ DDX_PARAM_INT(
177
+ "num_memories", opt->memory, 6,
178
+ "The number of limited memories for approximating the inverse hessian matrix."
179
+ )
180
+ DDX_PARAM_FLOAT(
181
+ "epsilon", opt->epsilon, 1e-5,
182
+ "Epsilon for testing the convergence of the objective."
183
+ )
184
+ DDX_PARAM_INT(
185
+ "period", opt->stop, 10,
186
+ "The duration of iterations to test the stopping criterion."
187
+ )
188
+ DDX_PARAM_FLOAT(
189
+ "delta", opt->delta, 1e-5,
190
+ "The threshold for the stopping criterion; an L-BFGS iteration stops when the\n"
191
+ "improvement of the log likelihood over the last ${period} iterations is no\n"
192
+ "greater than this threshold."
193
+ )
194
+ DDX_PARAM_STRING(
195
+ "linesearch", opt->linesearch, "MoreThuente",
196
+ "The line search algorithm used in L-BFGS updates:\n"
197
+ "{ 'MoreThuente': More and Thuente's method,\n"
198
+ " 'Backtracking': Backtracking method with regular Wolfe condition,\n"
199
+ " 'StrongBacktracking': Backtracking method with strong Wolfe condition\n"
200
+ "}\n"
201
+ )
202
+ DDX_PARAM_INT(
203
+ "max_linesearch", opt->linesearch_max_iterations, 20,
204
+ "The maximum number of trials for the line search algorithm."
205
+ )
206
+ END_PARAM_MAP()
207
+
208
+ return 0;
209
+ }
210
+
211
+
212
+ void crfsuite_train_lbfgs_init(crfsuite_params_t* params)
213
+ {
214
+ exchange_options(params, NULL, 0);
215
+ }
216
+
217
+ int crfsuite_train_lbfgs(
218
+ encoder_t *gm,
219
+ dataset_t *trainset,
220
+ dataset_t *testset,
221
+ crfsuite_params_t *params,
222
+ logging_t *lg,
223
+ floatval_t **ptr_w
224
+ )
225
+ {
226
+ int ret = 0, lbret;
227
+ floatval_t *w = NULL;
228
+ clock_t begin = clock();
229
+ const int N = trainset->num_instances;
230
+ const int L = trainset->data->labels->num(trainset->data->labels);
231
+ const int A = trainset->data->attrs->num(trainset->data->attrs);
232
+ const int K = gm->num_features;
233
+ lbfgs_internal_t lbfgsi;
234
+ lbfgs_parameter_t lbfgsparam;
235
+ training_option_t opt;
236
+
237
+ /* Initialize the variables. */
238
+ memset(&lbfgsi, 0, sizeof(lbfgsi));
239
+ memset(&opt, 0, sizeof(opt));
240
+ lbfgs_parameter_init(&lbfgsparam);
241
+
242
+ /* Allocate an array that stores the current weights. */
243
+ w = (floatval_t*)calloc(sizeof(floatval_t), K);
244
+ if (w == NULL) {
245
+ ret = CRFSUITEERR_OUTOFMEMORY;
246
+ goto error_exit;
247
+ }
248
+
249
+ /* Allocate an array that stores the best weights. */
250
+ lbfgsi.best_w = (floatval_t*)calloc(sizeof(floatval_t), K);
251
+ if (lbfgsi.best_w == NULL) {
252
+ ret = CRFSUITEERR_OUTOFMEMORY;
253
+ goto error_exit;
254
+ }
255
+
256
+ /* Read the L-BFGS parameters. */
257
+ exchange_options(params, &opt, -1);
258
+ logging(lg, "L-BFGS optimization\n");
259
+ logging(lg, "c1: %f\n", opt.c1);
260
+ logging(lg, "c2: %f\n", opt.c2);
261
+ logging(lg, "num_memories: %d\n", opt.memory);
262
+ logging(lg, "max_iterations: %d\n", opt.max_iterations);
263
+ logging(lg, "epsilon: %f\n", opt.epsilon);
264
+ logging(lg, "stop: %d\n", opt.stop);
265
+ logging(lg, "delta: %f\n", opt.delta);
266
+ logging(lg, "linesearch: %s\n", opt.linesearch);
267
+ logging(lg, "linesearch.max_iterations: %d\n", opt.linesearch_max_iterations);
268
+ logging(lg, "\n");
269
+
270
+ /* Set parameters for L-BFGS. */
271
+ lbfgsparam.m = opt.memory;
272
+ lbfgsparam.epsilon = opt.epsilon;
273
+ lbfgsparam.past = opt.stop;
274
+ lbfgsparam.delta = opt.delta;
275
+ lbfgsparam.max_iterations = opt.max_iterations;
276
+ if (strcmp(opt.linesearch, "Backtracking") == 0) {
277
+ lbfgsparam.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
278
+ } else if (strcmp(opt.linesearch, "StrongBacktracking") == 0) {
279
+ lbfgsparam.linesearch = LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE;
280
+ } else {
281
+ lbfgsparam.linesearch = LBFGS_LINESEARCH_MORETHUENTE;
282
+ }
283
+ lbfgsparam.max_linesearch = opt.linesearch_max_iterations;
284
+
285
+ /* Set regularization parameters. */
286
+ if (0 < opt.c1) {
287
+ lbfgsparam.orthantwise_c = opt.c1;
288
+ lbfgsparam.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
289
+ } else {
290
+ lbfgsparam.orthantwise_c = 0;
291
+ }
292
+
293
+ /* Set other callback data. */
294
+ lbfgsi.gm = gm;
295
+ lbfgsi.trainset = trainset;
296
+ lbfgsi.testset = testset;
297
+ lbfgsi.c2 = opt.c2;
298
+ lbfgsi.lg = lg;
299
+
300
+ /* Call the L-BFGS solver. */
301
+ lbfgsi.begin = clock();
302
+ lbret = lbfgs(
303
+ K,
304
+ w,
305
+ NULL,
306
+ lbfgs_evaluate,
307
+ lbfgs_progress,
308
+ &lbfgsi,
309
+ &lbfgsparam
310
+ );
311
+ if (lbret == LBFGS_CONVERGENCE) {
312
+ logging(lg, "L-BFGS resulted in convergence\n");
313
+ } else if (lbret == LBFGS_STOP) {
314
+ logging(lg, "L-BFGS terminated with the stopping criteria\n");
315
+ } else if (lbret == LBFGSERR_MAXIMUMITERATION) {
316
+ logging(lg, "L-BFGS terminated with the maximum number of iterations\n");
317
+ } else {
318
+ logging(lg, "L-BFGS terminated with error code (%d)\n", lbret);
319
+ }
320
+
321
+ /* Restore the feature weights of the last call of lbfgs_progress(). */
322
+ veccopy(w, lbfgsi.best_w, K);
323
+
324
+ /* Report the run-time for the training. */
325
+ logging(lg, "Total seconds required for training: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
326
+ logging(lg, "\n");
327
+
328
+ /* Exit with success. */
329
+ free(lbfgsi.best_w);
330
+ *ptr_w = w;
331
+ return 0;
332
+
333
+ error_exit:
334
+ free(lbfgsi.best_w);
335
+ free(w);
336
+ *ptr_w = NULL;
337
+ return ret;
338
+ }
@@ -0,0 +1,435 @@
1
+ /*
2
+ * Online training with Passive Aggressive.
3
+ *
4
+ * Copyright (c) 2007-2010, Naoaki Okazaki
5
+ * All rights reserved.
6
+ *
7
+ * Redistribution and use in source and binary forms, with or without
8
+ * modification, are permitted provided that the following conditions are met:
9
+ * * Redistributions of source code must retain the above copyright
10
+ * notice, this list of conditions and the following disclaimer.
11
+ * * Redistributions in binary form must reproduce the above copyright
12
+ * notice, this list of conditions and the following disclaimer in the
13
+ * documentation and/or other materials provided with the distribution.
14
+ * * Neither the names of the authors nor the names of its contributors
15
+ * may be used to endorse or promote products derived from this
16
+ * software without specific prior written permission.
17
+ *
18
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
22
+ * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
25
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
26
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ /* $Id$ */
32
+
33
+ #ifdef HAVE_CONFIG_H
34
+ #include <config.h>
35
+ #endif/*HAVE_CONFIG_H*/
36
+
37
+ #include <os.h>
38
+
39
+ #include <stdio.h>
40
+ #include <stdlib.h>
41
+ #include <time.h>
42
+
43
+ #include <crfsuite.h>
44
+ #include "crfsuite_internal.h"
45
+ #include "logging.h"
46
+ #include "params.h"
47
+ #include "vecmath.h"
48
+
49
+ #define MIN(a, b) ((a) < (b) ? (a) : (b))
50
+
51
+ /**
52
+ * Training parameters (configurable with crfsuite_params_t interface).
53
+ */
54
+ typedef struct {
55
+ int type;
56
+ floatval_t c;
57
+ int error_sensitive;
58
+ int averaging;
59
+ int max_iterations;
60
+ floatval_t epsilon;
61
+ } training_option_t;
62
+
63
+ /**
64
+ * Internal data structure for computing the sparse vector F(x, y) - F(x, y').
65
+ */
66
+ typedef struct {
67
+ /* An array of feature indices relevant to the instance. */
68
+ int *actives;
69
+ int num_actives;
70
+ int cap_actives;
71
+ char *used;
72
+
73
+ /* Coefficient for collecting feature weights. */
74
+ floatval_t c;
75
+ /* The difference vector [K]. */
76
+ floatval_t *delta;
77
+ /* The number of features. */
78
+ int K;
79
+ } delta_t;
80
+
81
+ static int delta_init(delta_t *dc, const int K)
82
+ {
83
+ memset(dc, 0, sizeof(*dc));
84
+ dc->used = (char*)calloc(K, sizeof(char));
85
+ dc->delta = (floatval_t*)calloc(K, sizeof(floatval_t));
86
+ dc->K = K;
87
+ if (dc->delta == NULL || dc->used == NULL) {
88
+ return 1;
89
+ }
90
+ return 0;
91
+ }
92
+
93
+ static void delta_finish(delta_t *dc)
94
+ {
95
+ free(dc->actives);
96
+ free(dc->used);
97
+ free(dc->delta);
98
+ memset(dc, 0, sizeof(*dc));
99
+ }
100
+
101
+ static void delta_reset(delta_t *dc)
102
+ {
103
+ int i;
104
+ for (i = 0;i < dc->num_actives;++i) {
105
+ int k = dc->actives[i];
106
+ dc->delta[k] = 0;
107
+ }
108
+ dc->num_actives = 0;
109
+ }
110
+
111
+ static void delta_collect(void *instance, int fid, floatval_t value)
112
+ {
113
+ delta_t *dc = (delta_t*)instance;
114
+
115
+ /* Expand the active feature list if necessary. */
116
+ if (dc->cap_actives <= dc->num_actives) {
117
+ ++dc->cap_actives;
118
+ dc->cap_actives *= 2;
119
+ dc->actives = (int*)realloc(dc->actives, sizeof(int) * dc->cap_actives);
120
+ }
121
+
122
+ dc->actives[dc->num_actives++] = fid;
123
+ dc->delta[fid] += dc->c * value;
124
+ }
125
+
126
+ static void delta_finalize(delta_t *dc)
127
+ {
128
+ int i, j = 0, k;
129
+
130
+ /* Collapse the duplicated indices. */
131
+ for (i = 0;i < dc->num_actives;++i) {
132
+ k = dc->actives[i];
133
+ if (!dc->used[k]) {
134
+ dc->actives[j++] = k;
135
+ dc->used[k] = 1;
136
+ }
137
+ }
138
+ dc->num_actives = j; /* This is the distinct number of indices. */
139
+
140
+ /* Reset the used flag. */
141
+ for (i = 0;i < dc->num_actives;++i) {
142
+ k = dc->actives[i];
143
+ dc->used[k] = 0;
144
+ }
145
+ }
146
+
147
+ static floatval_t delta_norm2(delta_t *dc)
148
+ {
149
+ int i;
150
+ floatval_t norm2 = 0.;
151
+
152
+ for (i = 0;i < dc->num_actives;++i) {
153
+ int k = dc->actives[i];
154
+ norm2 += dc->delta[k] * dc->delta[k];
155
+ }
156
+ return norm2;
157
+ }
158
+
159
+ static void delta_add(delta_t *dc, floatval_t *w, floatval_t *ws, const floatval_t tau, const floatval_t u)
160
+ {
161
+ int i;
162
+ const floatval_t tauu = tau * u;
163
+
164
+ for (i = 0;i < dc->num_actives;++i) {
165
+ int k = dc->actives[i];
166
+ w[k] += tau * dc->delta[k];
167
+ ws[k] += tauu * dc->delta[k];
168
+ }
169
+ }
170
+
171
+ static int diff(int *x, int *y, int n)
172
+ {
173
+ int i, d = 0;
174
+ for (i = 0;i < n;++i) {
175
+ if (x[i] != y[i]) {
176
+ ++d;
177
+ }
178
+ }
179
+ return d;
180
+ }
181
+
182
+ static floatval_t cost_insensitive(floatval_t err, floatval_t d)
183
+ {
184
+ return err + 1.;
185
+ }
186
+
187
+ static floatval_t cost_sensitive(floatval_t err, floatval_t d)
188
+ {
189
+ return err + sqrt(d);
190
+ }
191
+
192
+ static floatval_t tau0(floatval_t cost, floatval_t norm, floatval_t c)
193
+ {
194
+ return cost / norm;
195
+ }
196
+
197
+ static floatval_t tau1(floatval_t cost, floatval_t norm, floatval_t c)
198
+ {
199
+ return MIN(c, cost / norm);
200
+ }
201
+
202
+ static floatval_t tau2(floatval_t cost, floatval_t norm, floatval_t c)
203
+ {
204
+ return cost / (norm + 0.5 / c);
205
+ }
206
+
207
+ static int exchange_options(crfsuite_params_t* params, training_option_t* opt, int mode)
208
+ {
209
+ BEGIN_PARAM_MAP(params, mode)
210
+ DDX_PARAM_INT(
211
+ "type", opt->type, 1,
212
+ "The strategy for updating feature weights: {\n"
213
+ " 0: PA without slack variables,\n"
214
+ " 1: PA type I,\n"
215
+ " 2: PA type II\n"
216
+ "}.\n"
217
+ )
218
+ DDX_PARAM_FLOAT(
219
+ "c", opt->c, 1.,
220
+ "The aggressiveness parameter."
221
+ )
222
+ DDX_PARAM_INT(
223
+ "error_sensitive", opt->error_sensitive, 1,
224
+ "Consider the number of incorrect labels to the cost function."
225
+ )
226
+ DDX_PARAM_INT(
227
+ "averaging", opt->averaging, 1,
228
+ "Compute the average of feature weights (similarly to Averaged Perceptron)."
229
+ )
230
+ DDX_PARAM_INT(
231
+ "max_iterations", opt->max_iterations, 100,
232
+ "The maximum number of iterations."
233
+ )
234
+ DDX_PARAM_FLOAT(
235
+ "epsilon", opt->epsilon, 0.,
236
+ "The stopping criterion (the mean loss)."
237
+ )
238
+ END_PARAM_MAP()
239
+
240
+ return 0;
241
+ }
242
+
243
+ void crfsuite_train_passive_aggressive_init(crfsuite_params_t* params)
244
+ {
245
+ exchange_options(params, NULL, 0);
246
+ }
247
+
248
+ int crfsuite_train_passive_aggressive(
249
+ encoder_t *gm,
250
+ dataset_t *trainset,
251
+ dataset_t *testset,
252
+ crfsuite_params_t *params,
253
+ logging_t *lg,
254
+ floatval_t **ptr_w
255
+ )
256
+ {
257
+ int n, i, u, ret = 0;
258
+ int *viterbi = NULL;
259
+ floatval_t *w = NULL, *ws = NULL, *wa = NULL;
260
+ const int N = trainset->num_instances;
261
+ const int K = gm->num_features;
262
+ const int T = gm->cap_items;
263
+ training_option_t opt;
264
+ delta_t dc;
265
+ clock_t begin = clock();
266
+ floatval_t (*cost_function)(floatval_t err, floatval_t d) = NULL;
267
+ floatval_t (*tau_function)(floatval_t cost, floatval_t norm, floatval_t c) = NULL;
268
+
269
+ /* Initialize the variable. */
270
+ if (delta_init(&dc, K) != 0) {
271
+ ret = CRFSUITEERR_OUTOFMEMORY;
272
+ goto error_exit;
273
+ }
274
+
275
+ /* Obtain parameter values. */
276
+ exchange_options(params, &opt, -1);
277
+
278
+ /* Allocate arrays. */
279
+ w = (floatval_t*)calloc(sizeof(floatval_t), K);
280
+ ws = (floatval_t*)calloc(sizeof(floatval_t), K);
281
+ wa = (floatval_t*)calloc(sizeof(floatval_t), K);
282
+ viterbi = (int*)calloc(sizeof(int), T);
283
+ if (w == NULL || ws == NULL || wa == NULL || viterbi == NULL) {
284
+ ret = CRFSUITEERR_OUTOFMEMORY;
285
+ goto error_exit;
286
+ }
287
+
288
+ /* Set the cost function for instances. */
289
+ if (opt.error_sensitive) {
290
+ cost_function = cost_sensitive;
291
+ } else {
292
+ cost_function = cost_insensitive;
293
+ }
294
+
295
+ /* Set the routine for computing tau (i.e., PA, PA-I, PA-II). */
296
+ if (opt.type == 1) {
297
+ tau_function = tau1;
298
+ } else if (opt.type == 2) {
299
+ tau_function = tau2;
300
+ } else {
301
+ tau_function = tau0;
302
+ }
303
+
304
+ /* Show the parameters. */
305
+ logging(lg, "Passive Aggressive\n");
306
+ logging(lg, "type: %d\n", opt.type);
307
+ logging(lg, "c: %f\n", opt.c);
308
+ logging(lg, "error_sensitive: %d\n", opt.error_sensitive);
309
+ logging(lg, "averaging: %d\n", opt.averaging);
310
+ logging(lg, "max_iterations: %d\n", opt.max_iterations);
311
+ logging(lg, "epsilon: %f\n", opt.epsilon);
312
+ logging(lg, "\n");
313
+
314
+ u = 1;
315
+
316
+ /* Loop for epoch. */
317
+ for (i = 0;i < opt.max_iterations;++i) {
318
+ floatval_t norm = 0., sum_loss = 0.;
319
+ clock_t iteration_begin = clock();
320
+
321
+ /* Shuffle the instances. */
322
+ dataset_shuffle(trainset);
323
+
324
+ /* Loop for each instance. */
325
+ for (n = 0;n < N;++n) {
326
+ int d = 0;
327
+ floatval_t sv;
328
+ const crfsuite_instance_t *inst = dataset_get(trainset, n);
329
+
330
+ /* Set the feature weights to the encoder. */
331
+ gm->set_weights(gm, w, 1.);
332
+ gm->set_instance(gm, inst);
333
+
334
+ /* Tag the sequence with the current model. */
335
+ gm->viterbi(gm, viterbi, &sv);
336
+
337
+ /* Compute the number of different labels. */
338
+ d = diff(inst->labels, viterbi, inst->num_items);
339
+ if (0 < d) {
340
+ floatval_t sc, norm2;
341
+ floatval_t tau, cost;
342
+
343
+ /*
344
+ Compute the cost of this instance.
345
+ */
346
+ gm->score(gm, inst->labels, &sc);
347
+ cost = cost_function(sv - sc, (double)d);
348
+
349
+ /* Initialize delta[k] = 0. */
350
+ delta_reset(&dc);
351
+
352
+ /*
353
+ For every feature k on the correct path:
354
+ delta[k] += 1;
355
+ */
356
+ dc.c = 1;
357
+ gm->features_on_path(gm, inst, inst->labels, delta_collect, &dc);
358
+
359
+ /*
360
+ For every feature k on the Viterbi path:
361
+ delta[k] -= 1;
362
+ */
363
+ dc.c = -1;
364
+ gm->features_on_path(gm, inst, viterbi, delta_collect, &dc);
365
+
366
+ delta_finalize(&dc);
367
+
368
+ /*
369
+ Compute tau (dpending on PA, PA-I, and PA-II).
370
+ */
371
+ norm2 = delta_norm2(&dc);
372
+ tau = tau_function(cost, norm2, opt.c);
373
+
374
+ /*
375
+ Update the feature weights:
376
+ w[k] += tau * delta[k]
377
+ ws[k] += tau * u * delta[k]
378
+ */
379
+ delta_add(&dc, w, ws, tau, u);
380
+
381
+ sum_loss += cost;
382
+ }
383
+ ++u;
384
+ }
385
+
386
+ if (opt.averaging) {
387
+ /* Perform averaging to wa. */
388
+ veccopy(wa, w, K);
389
+ vecasub(wa, 1./u, ws, K);
390
+ } else {
391
+ /* Simply copy the weights to wa. */
392
+ veccopy(wa, w, K);
393
+ }
394
+
395
+ /* Output the progress. */
396
+ logging(lg, "***** Iteration #%d *****\n", i+1);
397
+ logging(lg, "Loss: %f\n", sum_loss);
398
+ logging(lg, "Feature norm: %f\n", sqrt(vecdot(w, w, K)));
399
+ logging(lg, "Seconds required for this iteration: %.3f\n", (clock() - iteration_begin) / (double)CLOCKS_PER_SEC);
400
+
401
+ /* Holdout evaluation if necessary. */
402
+ if (testset != NULL) {
403
+ holdout_evaluation(gm, testset, wa, lg);
404
+ }
405
+
406
+ logging(lg, "\n");
407
+
408
+ /* Convergence test. */
409
+ if (sum_loss / N < opt.epsilon) {
410
+ logging(lg, "Terminated with the stopping criterion\n");
411
+ logging(lg, "\n");
412
+ break;
413
+ }
414
+ }
415
+
416
+ logging(lg, "Total seconds required for training: %.3f\n", (clock() - begin) / (double)CLOCKS_PER_SEC);
417
+ logging(lg, "\n");
418
+
419
+ free(viterbi);
420
+ free(ws);
421
+ free(w);
422
+ *ptr_w = wa;
423
+ delta_finish(&dc);
424
+ return ret;
425
+
426
+ error_exit:
427
+ free(viterbi);
428
+ free(wa);
429
+ free(ws);
430
+ free(w);
431
+ *ptr_w = NULL;
432
+ delta_finish(&dc);
433
+
434
+ return ret;
435
+ }