opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env python
2
+
3
+ import sys
4
+ import getopt
5
+ import logging
6
+ import os
7
+ import random
8
+ from shutil import rmtree
9
+
10
+
11
+ def usage(cmd):
12
+ print>>sys.stderr,'Usage: '+cmd+' options'
13
+ print>>sys.stderr,'Options:'
14
+ print>>sys.stderr,'\t-f --file: input file with a list of documents (required)'
15
+ print>>sys.stderr,'\t-n --num: num of folds to create (required)'
16
+ print>>sys.stderr,'\t-o --out: name of the main folder to store the subfolds (required)'
17
+ print>>sys.stderr,'\t-s --subfolder: prefix for the subfolders (optional, default "fold")'
18
+ print>>sys.stderr
19
+ print>>sys.stderr,'Examples'
20
+ print>>sys.stderr,'\tgenerate_folds.py -f vu.doclist.attitude.ula.xbank --num 10 -o out_folder'
21
+ print>>sys.stderr,'\tgenerate_folds.py -f vu.doclist.attitude.ula.xbank --num 10 -o out_folder --subfolder my_custom_fold'
22
+
23
+
24
+ def generate_folds(input_file,num_folds,out_folder,name_subfolder='fold'):
25
+ # Load the input file
26
+ logging.debug('Loading elements from '+input_file)
27
+ elements = []
28
+ fic = open(input_file,'rU')
29
+ for line in fic:
30
+ elements.append(line.strip())
31
+ fic.close()
32
+ logging.debug('Loaded '+str(len(elements))+' elements')
33
+
34
+ '''
35
+ ##Get just the %percent
36
+ percent = 25
37
+ original_len = len(elements)
38
+ new_len = original_len*percent/100
39
+ elements = elements[:new_len]
40
+ '''
41
+
42
+ ## Creating folders and subfolders:
43
+ if os.path.exists(out_folder):
44
+ print>>sys.stderr,'Output folder '+out_folder,'already exists'
45
+ rmtree(out_folder)
46
+ print>>sys.stderr,'It has been removed...'
47
+ #sys.exit(-1)
48
+
49
+ logging.debug('Creating '+out_folder+' and subfolders')
50
+ folds = []
51
+ os.mkdir(out_folder)
52
+ for n in range(num_folds):
53
+ my_name = os.path.join(out_folder,name_subfolder+'_'+str(n))
54
+ os.mkdir(my_name)
55
+ logging.debug('Created '+my_name)
56
+ folds.append(my_name)
57
+ ###################################################
58
+
59
+ ## Creating folds
60
+ size_of_fold = len(elements) / num_folds
61
+ my_begin = 0
62
+ my_end = size_of_fold
63
+
64
+ random.shuffle(elements)
65
+ for n in range(num_folds):
66
+ this_fold = folds[n]
67
+ my_test = elements[my_begin:my_end]
68
+ my_train = elements[:my_begin]+elements[my_end:]
69
+ if len( set(my_test) & set(my_train)) != 0:
70
+ print>>sys.stderr,'Error overlapping'
71
+ print>>sys.stderr,my_train
72
+ print>>sys.stderr,my_test
73
+ my_begin = my_end
74
+ my_end = my_end + size_of_fold
75
+
76
+ #Save the folds
77
+ fic_train = open(os.path.join(this_fold,'train'),'w')
78
+ logging.debug('Writing info to '+fic_train.name)
79
+ for ele in my_train:
80
+ fic_train.write(ele+'\n')
81
+ fic_train.close()
82
+
83
+ fic_test =open(os.path.join(this_fold,'test'),'w')
84
+ logging.debug('Writing info to '+fic_test.name)
85
+ for ele in my_test:
86
+ fic_test.write(ele+'\n')
87
+ fic_test.close()
88
+ ####
89
+ logging.debug('Finished OK')
90
+
91
+ if __name__ == '__main__':
92
+ logging.basicConfig(stream=sys.stderr,format='%(asctime)s - %(levelname)s - %(message)s',level=logging.DEBUG)
93
+
94
+ input_file = None
95
+ num_folds = None
96
+ out_folder = None
97
+ name_subfolder = 'fold'
98
+
99
+ try:
100
+ opts, args = getopt.getopt(sys.argv[1:],"f:n:o:s:",["file=","num=","out=","subfolder="])
101
+ for opt, arg in opts:
102
+ if opt in ['-f','--file']:
103
+ input_file = arg
104
+ elif opt in ['-n','--num']:
105
+ num_folds = int(arg)
106
+ elif opt in ['-o','--out']:
107
+ out_folder = arg
108
+ elif opt in ['-s','--subfolder']:
109
+ name_subfolder = arg
110
+ except getopt.GetoptError as e:
111
+ print>>sys.stderr,'Warning: ',str(e)
112
+
113
+ if input_file is None:
114
+ print>>sys.stderr,'ERROR!!!! Input file missing'
115
+ print
116
+ usage(sys.argv[0])
117
+ sys.exit(-1)
118
+
119
+ if num_folds is None:
120
+ print>>sys.stderr,'ERROR!!!! Num of folds missing'
121
+ print
122
+ usage(sys.argv[0])
123
+ sys.exit(-1)
124
+
125
+ if out_folder is None:
126
+ print>>sys.stderr,'ERROR!!!! Out folder missing'
127
+ print
128
+ usage(sys.argv[0])
129
+ sys.exit(-1)
130
+
131
+ ###### END
132
+ generate_folds(input_file,num_folds,out_folder)
133
+
134
+
@@ -0,0 +1,10 @@
1
+ #LANG|domain|pathtomodel|description
2
+ en|hotel|final_models/en/hotel_cfg1|Trained with config1 in the last version of hotel annotations
3
+ en|news|final_models/en/news_cfg1|Trained with config1 using only the sentences annotated with news
4
+ nl|hotel|final_models/nl/hotel_cfg1|Trained with config1 in the last version of hotel annotations
5
+ nl|news|final_models/nl/news_cfg1|Trained with config1 using only the sentences annotated with news
6
+ de|hotel|final_models/de/hotel_cfg1|Trained with config1 in the last version of hotel annotations
7
+ de|news|final_models/de/news_cfg1|Trained with config1 using only the sentences annotated with news
8
+ fr|hotel|final_models/fr/hotel_cfg1|Trained with config1 in the last version of hotel annotations
9
+ it|hotel|final_models/it/hotel_cfg1|Trained with config1 in the last version of hotel annotations
10
+ es|hotel|final_models/es/hotel_cfg1|Trained with config1 in the last version of hotel annotations
@@ -0,0 +1,33 @@
1
+ Format
2
+
3
+ --> lines starting with # are skipped
4
+
5
+ 1 token -2 -1 0
6
+
7
+ -> The first 1 is the length of the template, in this case unigram
8
+ -> Then 'n' labels that will be used (must match with the labels generated
9
+ by the feature extractor)
10
+ --> Then the positions, in case of 2grams 3grams each position must be
11
+ --> n/m/p
12
+
13
+ The example would generate these templates:
14
+ ('token',-2)
15
+ ('token',-1)
16
+ ('token',0)
17
+
18
+
19
+ Example with bigrams
20
+ 2 token token -2/-1 -1/0 0/1 1/2
21
+
22
+ would generate:
23
+ (('token',-2),('token',-1))
24
+ (('token',-1),('token',0))
25
+ (('token',1),('token',1))
26
+
27
+
28
+ Example with tigrams (the example makes no sense)
29
+ 3 token lemma pos -2/0/4 9/8/3
30
+ (('token',-2),('lemma',0),('pos',4))
31
+ (('token',9),('lemma',8),('pos',3))
32
+
33
+
@@ -0,0 +1,6 @@
1
+ 1 token 0 -1 1
2
+ 1 lemma 0 -1 1
3
+ 1 pos 0 -1 1
4
+ 1 pol/mod 0 -1 1
5
+ 1 phrase_type 0 -1 1
6
+ 1 poldomain 0 -1 1
@@ -0,0 +1,10 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity mpqa_polarity
2
+ # entity property phrase_type y
3
+
4
+ 1 token -2 -1 0 1 2
5
+ 1 lemma -2 -1 0 1 2
6
+ 1 pos -2 -1 0 1 2
7
+ 1 pol/mod 0
8
+ 1 mpqa_subjectivity 0
9
+ 1 mpqa_polarity 0
10
+ 1 phrase_type -1 0 1
@@ -0,0 +1,7 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity mpqa_polarity
2
+ # entity property phrase_type y
3
+
4
+ 1 token -2 -1 0 1 2
5
+ 1 lemma -2 -1 0 1 2
6
+ 1 pos -2 -1 0 1 2
7
+ 1 pol/mod -2 -1 0 1 2
@@ -0,0 +1,10 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity mpqa_polarity
2
+ # entity property phrase_type y
3
+
4
+ 1 token -2 -1 0 1 2
5
+ 1 lemma -2 -1 0 1 2
6
+ 1 pos -2 -1 0 1 2
7
+ 1 pol/mod -2 -1 0 1 2
8
+ 1 mpqa_subjectivity -2 -1 0 1 2
9
+ 1 mpqa_polarity -2 -1 0 1 2
10
+ 1 phrase_type -1 0 1
@@ -0,0 +1,11 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
2
+ # mpqa_polarity
3
+ # entity property phrase_type y
4
+
5
+ 1 token 0
6
+ 1 lemma 0
7
+ 1 pos 0
8
+ 1 entity 0
9
+ 1 property 0
10
+ 1 phrase_type 0
11
+ 1 aspect_training 0
@@ -0,0 +1,9 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
2
+ # mpqa_polarity
3
+ # entity property phrase_type y
4
+
5
+ 1 token -2 -1 0 1 2
6
+ 1 lemma -2 -1 0 1 2
7
+ 1 pos -2 -1 0 1 2
8
+ 1 entity -2 -1 0 1 2
9
+ 1 property -2 -1 0 1 2
@@ -0,0 +1,10 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
2
+ # mpqa_polarity
3
+ # entity property phrase_type y
4
+
5
+ 1 token -2 -1 0 1 2
6
+ 1 lemma -2 -1 0 1 2
7
+ 1 pos -2 -1 0 1 2
8
+ 1 entity -2 -1 0 1 2
9
+ 1 property -2 -1 0 1 2
10
+ 1 phrase_type -1 0 1
@@ -0,0 +1,11 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
2
+ # mpqa_polarity
3
+ # entity property phrase_type y
4
+
5
+ 1 token 0 -1 1
6
+ 1 lemma 0 -1 1
7
+ 1 pos 0 -1 1
8
+ 1 entity 0 -1 1
9
+ 1 property 0 -1 1
10
+ 1 phrase_type 0 -1 1
11
+ 1 aspect_training 0 -1 1
@@ -0,0 +1,9 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
2
+ # mpqa_polarity
3
+ # entity property phrase_type y
4
+
5
+ 1 token -2 -1 0 1 2
6
+ 1 lemma -2 -1 0 1 2
7
+ 1 pos -2 -1 0 1 2
8
+ 1 entity -2 -1 0 1 2
9
+ 1 property -2 -1 0 1 2
@@ -0,0 +1,10 @@
1
+ # sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
2
+ # mpqa_polarity
3
+ # entity property phrase_type y
4
+
5
+ 1 token -2 -1 0 1 2
6
+ 1 lemma -2 -1 0 1 2
7
+ 1 pos -2 -1 0 1 2
8
+ 1 entity -2 -1 0 1 2
9
+ 1 property -2 -1 0 1 2
10
+ 1 phrase_type -1 0 1
@@ -0,0 +1,49 @@
1
+ #!/bin/bash
2
+
3
+ # This script runs a bunch of experiments given a list of KAF files
4
+ # Input:
5
+ # $1 --> folder where store all the models
6
+ # $2 --> list of kaf files for training
7
+ # Output:
8
+ # standard output --> latex table with the results
9
+ # standard error --> progress information
10
+ # The script looks for all the subfolders called exp1 exp2 exp3 within the $exps_folder folder
11
+
12
+ general_folder=$1
13
+
14
+ if [ ! -d $general_folder ];
15
+ then
16
+ mkdir $general_folder;
17
+ fi
18
+
19
+ list_files=$2
20
+ exps_folder=experiments
21
+
22
+ echo "Output folder: $general_folder"
23
+ echo "List of files: $list_files"
24
+ echo "\begin{table}"
25
+ echo "\begin{tabular}{c|c|c|c|c|c|c||c|c|c|c}"
26
+ echo "\hline"
27
+ echo "Type & \multicolumn{2}{|c|}{Expression} & \multicolumn{2}{|c|}{Target} & \multicolumn{2}{|c||}{Holder} & \multicolumn{2}{|c|}{Exp-Tar} & \multicolumn{2}{|c|}{Exp-Hol} \\\\"
28
+ echo "\hline"
29
+ echo "& P & R & P & R & P & R & P & R & P & R \\\\"
30
+
31
+ for exp in $exps_folder/exp*
32
+ do
33
+ #id=$1 folder=$2 list_files=$3 experiment_folder=$4
34
+ echo `date +%T` starting experiment $exp >> /dev/stderr
35
+ id=`basename $exp`
36
+ outfolder=$general_folder/$id
37
+ # If the output folder not exists already
38
+ if [ ! -d $outfolder ]; then
39
+ run_experiment.sh $id $outfolder $list_files $exp
40
+ echo `date +%T` Done >> /dev/stderr
41
+ echo "Done experiment $exp Files: $list_files Out: $general_folder" | mail -s "Experiment done" ruben.izquierdobevia@vu.nl
42
+ else
43
+ echo "The experiment $exp on $outfolder already exists, skipped" >> /dev/stderr
44
+ fi
45
+ done
46
+
47
+ echo "\end{tabular}"
48
+ echo "\end{table}"
49
+
@@ -0,0 +1,20 @@
1
+ #!/usr/bin/env python
2
+
3
+ import sys
4
+ from subprocess import Popen,PIPE
5
+
6
+ def run_basic(input_file,output_file):
7
+ cmd = '/home/izquierdo/opener_repos/opinion-detector-basic/core/opinion_detector_basic_multi.py'
8
+ fin = open(input_file,'r')
9
+ fout = open(output_file,'w')
10
+ basic_opinion_miner = Popen(cmd,stdin=fin, stdout=fout,stderr=PIPE,shell=True)
11
+ fin.close()
12
+ basic_opinion_miner.wait()
13
+ fout.close()
14
+ print 'Done'
15
+
16
+ if __name__ == '__main__':
17
+ input = 'english00001_0123ff23e0d0dc0177f9b71a1928b674.kaf'
18
+ output = 'english00001_0123ff23e0d0dc0177f9b71a1928b674.basic.kaf'
19
+ run_basic(input,output)
20
+
@@ -0,0 +1,42 @@
1
+ #!/bin/bash
2
+
3
+ # This script runs a given experiment
4
+ # Input:
5
+ # $1 the id used for the table latex for the experiment
6
+ # $2 the output folder to store all the models and folds (for the validation)
7
+ # $3 the list of files for the training
8
+ # $4 the folder with the experiment (must contain a file called config.cfg with the configuration
9
+ #
10
+ # Output
11
+ # Standard out --> 2 rows of the latex table (for the basic and deluxe)
12
+ # Standard err --> progress of the program
13
+ # The log of training/evaluation will be on the same folder there the exp, called $id.log
14
+ id=$1
15
+ folder=$2
16
+ list_files=$3
17
+ experiment_folder=$4
18
+
19
+ numfolds=5
20
+ base_out_folder=`dirname $folder`
21
+ err_file=$base_out_folder/$id.log
22
+ out_per_folds=$base_out_folder/$id.out_per_fold.tex
23
+
24
+ tmpconfig=`mktemp`
25
+
26
+ echo "[general]" > $tmpconfig
27
+ echo "output_folder = $folder" >> $tmpconfig
28
+ echo "filename_training_list = $list_files" >> $tmpconfig
29
+ echo >> $tmpconfig
30
+ echo "[feature_templates]" >> $tmpconfig
31
+ echo "expression = $experiment_folder/templates_exp.txt " >> $tmpconfig
32
+ echo "holder = $experiment_folder/templates_hol.txt" >> $tmpconfig
33
+ echo "target = $experiment_folder/templates_tar.txt" >> $tmpconfig
34
+ echo >> $tmpconfig
35
+ cat $experiment_folder/config.cfg >> $tmpconfig
36
+
37
+ echo Running experiment $id Logs: $err_file Out per fold: $out_per_folds >> /dev/stderr
38
+ #Output to standard output
39
+ cross_validation.py -n $numfolds -f $tmpconfig -id $id -out_folds $out_per_folds 2> $err_file
40
+
41
+ rm $tmpconfig
42
+
@@ -0,0 +1 @@
1
+ pass
@@ -0,0 +1,314 @@
1
+ import os
2
+ import ConfigParser
3
+ import shutil
4
+
5
+ internal_config_filename= 'config.cfg'
6
+
7
+
8
+ def load_templates_from_file(filename):
9
+ templates = []
10
+ fic = open(filename,'r')
11
+ for line in fic:
12
+ line = line.strip()
13
+ if line != '' and line[0]!='#': #Not empty lines or starting with #
14
+ tokens = line.split(' ')
15
+ my_len = int(tokens[0])
16
+ labels = tokens[1:my_len+1]
17
+ values = tokens[my_len+1:]
18
+ for value in values:
19
+ new_template = []
20
+ single_values = value.split('/')
21
+ for n in range(len(labels)):
22
+ new_template.append((labels[n],int(single_values[n])))
23
+ templates.append(new_template)
24
+ fic.close()
25
+ return templates
26
+
27
+ class Cconfig_manager:
28
+ def __init__(self):
29
+ self.config = ConfigParser.ConfigParser()
30
+ self.out_folder = None
31
+ self.this_folder = None
32
+ self.templates_expr = None
33
+ self.templates_holder = None
34
+ self.templates_target = None
35
+
36
+ def set_current_folder(self,t):
37
+ self.this_folder = t
38
+
39
+ def get_flag_filename(self):
40
+ my_name = 'flag'
41
+ return os.path.join(self.get_output_folder(),my_name)
42
+
43
+ def set_config(self,file_cfg):
44
+ self.config.read(file_cfg)
45
+ output_folder_cfg = self.config.get('general','output_folder')
46
+ out_folder = ''
47
+ if os.path.isabs(output_folder_cfg):
48
+ self.out_folder = output_folder_cfg
49
+ else:
50
+ self.out_folder = os.path.join(self.this_folder,output_folder_cfg)
51
+
52
+ def get_use_dependencies(self):
53
+ use_dependencies = True ##Default
54
+ if self.config.has_section('relation_features'):
55
+ if self.config.has_option('relation_features', 'use_dependencies'):
56
+ use_dependencies = self.config.getboolean('relation_features', 'use_dependencies')
57
+ return use_dependencies
58
+
59
+
60
+ def get_use_training_lexicons(self):
61
+ use_lexicons = True ##Default
62
+ if self.config.has_section('relation_features'):
63
+ if self.config.has_option('relation_features', 'use_training_lexicons'):
64
+ use_lexicons = self.config.getboolean('relation_features', 'use_training_lexicons')
65
+ return use_lexicons
66
+
67
+ def get_use_tokens_lemmas(self):
68
+ use_them = True
69
+ if self.config.has_section('relation_features'):
70
+ if self.config.has_option('relation_features', 'use_tokens_lemmas'):
71
+ use_them = self.config.getboolean('relation_features', 'use_tokens_lemmas')
72
+ return use_them
73
+
74
+ def get_propagation_lexicon_name(self):
75
+ lexicon_name = None
76
+ if self.config.has_section('lexicons'):
77
+ if self.config.has_option('lexicons','propagation_lexicon'):
78
+ lexicon_name = self.config.get('lexicons','propagation_lexicon')
79
+ return lexicon_name
80
+
81
+
82
+ def set_out_folder(self,o):
83
+ self.out_folder = o
84
+
85
+ def get_training_datasets_folder(self):
86
+ my_name='training_datasets'
87
+ outfolder=self.get_output_folder()
88
+ return os.path.join(outfolder,my_name)
89
+
90
+ def get_training_dataset_exp(self):
91
+ my_name = 'training_set_exp.crf'
92
+ return os.path.join(self.get_training_datasets_folder(),my_name)
93
+
94
+ def get_training_dataset_target(self):
95
+ my_name = 'training_set_target.crf'
96
+ return os.path.join(self.get_training_datasets_folder(),my_name)
97
+
98
+
99
+ def get_training_dataset_holder(self):
100
+ my_name = 'training_set_holder.crf'
101
+ return os.path.join(self.get_training_datasets_folder(),my_name)
102
+
103
+
104
+ ## FEATURE TEMPLATES
105
+ def get_feature_template_folder_name(self):
106
+ my_name = 'feature_templates'
107
+ return os.path.join(self.get_output_folder(),my_name)
108
+
109
+ def get_feature_template_exp_name(self):
110
+ my_name = 'feat_template_expr.txt'
111
+ return os.path.join(self.get_feature_template_folder_name(),my_name)
112
+
113
+ def get_feature_template_tar_name(self):
114
+ my_name = 'feat_template_target.txt'
115
+ return os.path.join(self.get_feature_template_folder_name(),my_name)
116
+
117
+ def get_feature_template_hol_name(self):
118
+ my_name = 'feat_template_holder.txt'
119
+ return os.path.join(self.get_feature_template_folder_name(),my_name)
120
+
121
+ def copy_feature_templates(self):
122
+ #Exp
123
+ temp_exp_orig = self.config.get('feature_templates','expression')
124
+ temp_exp_target = self.get_feature_template_exp_name()
125
+ if not os.path.isabs(temp_exp_orig):
126
+ temp_exp_orig = os.path.join(self.this_folder,temp_exp_orig)
127
+ shutil.copyfile(temp_exp_orig, temp_exp_target)
128
+
129
+ temp_tar_orig = self.config.get('feature_templates','target')
130
+ temp_tar_target = self.get_feature_template_tar_name()
131
+ if not os.path.isabs(temp_tar_orig):
132
+ temp_tar_orig = os.path.join(self.this_folder,temp_tar_orig)
133
+ shutil.copyfile(temp_tar_orig, temp_tar_target)
134
+
135
+ temp_hol_orig = self.config.get('feature_templates','holder')
136
+ temp_hol_target = self.get_feature_template_hol_name()
137
+ if not os.path.isabs(temp_hol_orig):
138
+ temp_hol_orig = os.path.join(self.this_folder,temp_hol_orig)
139
+ shutil.copyfile(temp_hol_orig, temp_hol_target)
140
+
141
+ def get_templates_expr(self):
142
+ if self.templates_expr is None:
143
+ filename_template = self.get_feature_template_exp_name()
144
+ self.templates_expr = load_templates_from_file(filename_template)
145
+ return self.templates_expr
146
+
147
+ def get_templates_holder(self):
148
+ if self.templates_holder is None:
149
+ filename_template = self.get_feature_template_hol_name()
150
+ self.templates_holder = load_templates_from_file(filename_template)
151
+ return self.templates_holder
152
+
153
+ def get_templates_target(self):
154
+ if self.templates_target is None:
155
+ filename_template = self.get_feature_template_tar_name()
156
+ self.templates_target = load_templates_from_file(filename_template)
157
+ return self.templates_target
158
+
159
+ def get_lexicons_folder(self):
160
+ my_name = 'lexicons'
161
+ return os.path.join(self.get_output_folder(),my_name)
162
+
163
+ ###############
164
+ def get_expression_lexicon_filename(self):
165
+ my_name = 'polarity_lexicon.csv'
166
+ return os.path.join(self.get_lexicons_folder(),my_name)
167
+
168
+ def get_use_this_expression_lexicon(self):
169
+ use_it = None
170
+ if self.config.has_section('relation_features'):
171
+ if self.config.has_option('relation_features', 'use_this_expression_lexicon'):
172
+ use_it = self.config.get('relation_features', 'use_this_expression_lexicon')
173
+ return use_it
174
+
175
+ def get_use_this_target_lexicon(self):
176
+ use_it = None
177
+ if self.config.has_section('relation_features'):
178
+ if self.config.has_option('relation_features', 'use_this_target_lexicon'):
179
+ use_it = self.config.get('relation_features', 'use_this_target_lexicon')
180
+ return use_it
181
+
182
+ def get_target_lexicon_filename(self):
183
+ my_name = 'target_lexicon.csv'
184
+ return os.path.join(self.get_lexicons_folder(),my_name)
185
+
186
+ def get_feature_folder_name(self):
187
+ subfolder_feats = 'tab_feature_files'
188
+ out_folder = self.get_output_folder()
189
+ return os.path.join(out_folder,subfolder_feats)
190
+
191
+ def get_crf_expression_folder(self):
192
+ my_name='crf_feat_files_exp'
193
+ out_folder = self.get_output_folder()
194
+ return os.path.join(out_folder,my_name)
195
+
196
+ def get_crf_target_folder(self):
197
+ my_name='crf_feat_files_target'
198
+ out_folder = self.get_output_folder()
199
+ return os.path.join(out_folder,my_name)
200
+
201
+ def get_crf_holder_folder(self):
202
+ my_name='crf_feat_files_holder'
203
+ out_folder = self.get_output_folder()
204
+ return os.path.join(out_folder,my_name)
205
+
206
+ def get_output_folder(self):
207
+ return self.out_folder
208
+
209
+ def get_feature_desc_filename(self):
210
+ file_feat_desc = 'feature_desc.txt' #description of features
211
+ out_folder = self.get_output_folder()
212
+ return os.path.join(out_folder,file_feat_desc)
213
+
214
+ def get_file_training_list(self):
215
+ return self.config.get('general','filename_training_list')
216
+
217
+ def get_crfsuite_binary(self):
218
+ return self.config.get('crfsuite','path_to_binary')
219
+
220
+ def get_crfsuite_params(self):
221
+ return self.config.get('crfsuite','parameters')
222
+
223
+ def get_svm_learn_binary(self):
224
+ return self.config.get('svmlight','path_to_binary_learn')
225
+
226
+ def get_svm_classify_binary(self):
227
+ return self.config.get('svmlight','path_to_binary_classify')
228
+
229
+ def get_svm_params(self):
230
+ return self.config.get('svmlight','parameters')
231
+
232
+
233
+ def get_svm_threshold_exp_tar(self):
234
+ thr = -1
235
+ if self.config.has_option('relation_features', 'exp_tar_threshold'):
236
+ thr = self.config.getfloat('relation_features', 'exp_tar_threshold')
237
+ return thr
238
+
239
+ def get_svm_threshold_exp_hol(self):
240
+ thr = -1
241
+ if self.config.has_option('relation_features', 'exp_hol_threshold'):
242
+ thr = self.config.getfloat('relation_features', 'exp_hol_threshold')
243
+ return thr
244
+
245
+
246
+ # [valid_opinions]
247
+ # positive = sentiment-neg
248
+ # negative = sentiment-pos
249
+ def get_mapping_valid_opinions(self):
250
+ mapping = {}
251
+ for mapped_opinion, values_in_corpus in self.config.items('valid_opinions'):
252
+ values = [ v for v in values_in_corpus.split(';') if v != '']
253
+ for v in values:
254
+ mapping[v] = mapped_opinion
255
+ return mapping
256
+
257
+ def get_possible_expression_values(self):
258
+ labels = [key for key,_ in self.config.items('valid_opinions')]
259
+ return labels
260
+
261
+ def get_model_foldername(self):
262
+ my_name = 'models'
263
+ out_folder = self.get_output_folder()
264
+ return os.path.join(out_folder,my_name)
265
+
266
+ def get_filename_model_expression(self):
267
+ my_name = 'model_opi_exp.crf'
268
+ return os.path.join(self.get_model_foldername(),my_name)
269
+
270
+ def get_filename_model_target(self):
271
+ my_name = 'model_opi_target.crf'
272
+ return os.path.join(self.get_model_foldername(),my_name)
273
+
274
+ def get_filename_model_holder(self):
275
+ my_name = 'model_opi_holder.crf'
276
+ return os.path.join(self.get_model_foldername(),my_name)
277
+
278
+ def get_folder_relation_classifier(self):
279
+ my_name = 'relation_classifier'
280
+ return os.path.join(self.out_folder,my_name)
281
+
282
+ def get_relation_exp_tar_training_filename(self):
283
+ my_name = 'training_exp_tar.feat'
284
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
285
+
286
+ def get_relation_exp_hol_training_filename(self):
287
+ my_name = 'training_exp_hol.feat'
288
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
289
+
290
+ def get_rel_exp_tar_training_idx_filename(self):
291
+ my_name = 'training_exp_tar.idx'
292
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
293
+
294
+ def get_rel_exp_hol_training_idx_filename(self):
295
+ my_name = 'training_exp_hol.idx'
296
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
297
+
298
+ def get_index_features_exp_tar_filename(self):
299
+ my_name = 'feat_index.exp_tar.bin'
300
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
301
+
302
+ def get_index_features_exp_hol_filename(self):
303
+ my_name = 'feat_index.exp_hol.bin'
304
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
305
+
306
+ def get_filename_model_exp_tar(self):
307
+ my_name = 'model_relation_exp_tar.svmlight'
308
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
309
+
310
+ def get_filename_model_exp_hol(self):
311
+ my_name = 'model_relation_exp_hol.svmlight'
312
+ return os.path.join(self.get_folder_relation_classifier(),my_name)
313
+
314
+