opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1 @@
1
+ from lexicon import *
@@ -0,0 +1,72 @@
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+ import re
5
+ from VUA_pylib.common import normalize_pos
6
+
7
+ __this_folder__ = os.path.dirname(os.path.realpath(__file__))
8
+
9
+ class MPQA_subjectivity_lexicon:
10
+ def __init__(self):
11
+ self.__filename=os.path.join(__this_folder__,'data','subjclueslen1-HLTEMNLP05.tff')
12
+ self.stemmed = {}
13
+ self.stemmed_anypos = {}
14
+ self.no_stemmed = {}
15
+ self.no_stemmed_anypos = {}
16
+
17
+ self.__load()
18
+
19
+ def __load(self):
20
+ # Format of lines:
21
+ # type=weaksubj len=1 word1=abandoned pos1=adj stemmed1=n priorpolarity=negative
22
+ fic = open(self.__filename)
23
+ for line in fic:
24
+ line=line.strip()+' '
25
+ this_type = re.findall('type=([^ ]+)', line)[0]
26
+ word = re.findall('word1=([^ ]+)', line)[0]
27
+ pos = re.findall('pos1=([^ ]+)', line)[0]
28
+ stemmed = re.findall('stemmed1=([^ ]+)', line)[0]
29
+ prior_polarity = re.findall('priorpolarity=([^ ]+)', line)[0]
30
+ pos = normalize_pos(pos)
31
+ if stemmed == 'y':
32
+ self.stemmed[(word,pos)] = (this_type,prior_polarity)
33
+ if True or pos == '*': #anypos
34
+ self.stemmed_anypos[word] = (this_type,prior_polarity)
35
+
36
+ elif stemmed == 'n':
37
+ self.no_stemmed[(word,pos)] = (this_type,prior_polarity)
38
+ if True or pos == '*':
39
+ self.no_stemmed_anypos[word] = (this_type,prior_polarity)
40
+
41
+ fic.close()
42
+
43
+ def get_type_and_polarity(self,word,pos=None):
44
+ res = None
45
+ if pos is not None:
46
+ pos = normalize_pos(pos)
47
+
48
+ # Try no stemmed with the given pos
49
+ res = self.no_stemmed.get((word,pos))
50
+
51
+ # Try stemmed with the given pos
52
+ if res is None:
53
+ res = self.stemmed.get((word,pos))
54
+
55
+ # Try no stemmed with any pos
56
+ if res is None:
57
+ res = self.no_stemmed_anypos.get(word)
58
+
59
+ # Try stemm with any pos
60
+ if res is None:
61
+ res = self.stemmed_anypos.get(word)
62
+
63
+
64
+
65
+ return res
66
+
67
+
68
+ if __name__ == '__main__':
69
+ o = MPQA_subjectivity_lexicon()
70
+ print o.get_type_and_polarity('abidance','adj')
71
+
72
+
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 1.0
2
+ Name: VUKafParserPy
3
+ Version: 1.0
4
+ Summary: Library in python to parse kaf files
5
+ Home-page: UNKNOWN
6
+ Author: Ruben Izquierdo
7
+ Author-email: r.izquierdobevia@vu.nl
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
@@ -0,0 +1,7 @@
1
+ VUKafParserPy/KafDataObjectsMod.py
2
+ VUKafParserPy/KafParserMod.py
3
+ VUKafParserPy/__init__.py
4
+ VUKafParserPy.egg-info/PKG-INFO
5
+ VUKafParserPy.egg-info/SOURCES.txt
6
+ VUKafParserPy.egg-info/dependency_links.txt
7
+ VUKafParserPy.egg-info/top_level.txt
@@ -0,0 +1,11 @@
1
+ ../VUKafParserPy/KafParserMod.py
2
+ ../VUKafParserPy/__init__.py
3
+ ../VUKafParserPy/KafDataObjectsMod.py
4
+ ../VUKafParserPy/KafParserMod.pyc
5
+ ../VUKafParserPy/__init__.pyc
6
+ ../VUKafParserPy/KafDataObjectsMod.pyc
7
+ ./
8
+ top_level.txt
9
+ SOURCES.txt
10
+ PKG-INFO
11
+ dependency_links.txt
@@ -0,0 +1,165 @@
1
+ class KafTermSentiment:
2
+ def __init__(self):
3
+ self.resource=None
4
+ self.polarity=None
5
+ self.strength=None
6
+ self.subjectivity=None
7
+
8
+ def simpleInit(self,r,p,st,su,sm=None):
9
+ self.resource=r
10
+ self.polarity=p
11
+ self.strength=st
12
+ self.subjectivity=su
13
+ self.sentiment_modifier = sm
14
+
15
+ def getPolarity(self):
16
+ return self.polarity
17
+
18
+ def getSentimentModifier(self):
19
+ return self.sentiment_modifier
20
+
21
+
22
+ class KafToken:
23
+ def __init__(self,wid, value, sent=None, para=None):
24
+ self.token_id = wid
25
+ self.value = value
26
+ self.sent = sent
27
+ self.para = para
28
+
29
+
30
+ class KafOpinionExpression:
31
+ def __init__(self,polarity,strength,targets):
32
+ self.polarity = polarity
33
+ self.strength = strength
34
+ self.targets = targets
35
+
36
+ def __str__(self):
37
+ return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
38
+
39
+ class KafOpinion:
40
+ def __init__(self,id,holders, targets, opi_exp):
41
+ self.id = id
42
+ self.holders = holders
43
+ self.targets = targets
44
+ self.opi_exp = opi_exp
45
+
46
+ def __str__(self):
47
+ c='Opinion id'+self.id+'\n'
48
+ c+=' Holders: '+'-'.join(self.holders)+'\n'
49
+ c+=' Targets: '+'-'.join(self.targets)+'\n'
50
+ c+=str(self.opi_exp)
51
+ return c
52
+
53
+
54
+
55
+ class KafSingleProperty:
56
+ def __init__(self,id,type,targets):
57
+ self.id = id
58
+ self.type = type
59
+ self.targets = targets
60
+
61
+
62
+ def get_id(self):
63
+ return self.id
64
+
65
+ def get_type(self):
66
+ return self.type
67
+
68
+ def get_span(self):
69
+ return self.targets
70
+
71
+ def __str__(self):
72
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
73
+
74
+
75
+ class KafSingleEntity:
76
+ def __init__(self,id,type,targets):
77
+ self.id = id
78
+ self.type = type
79
+ self.targets = targets
80
+
81
+ def get_id(self):
82
+ return self.id
83
+
84
+ def get_type(self):
85
+ return self.type
86
+
87
+ def get_span(self):
88
+ return self.targets
89
+
90
+ def __str__(self):
91
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
92
+
93
+ class KafTerm:
94
+ def __init__(self):
95
+ self.tid = None
96
+ self.lemma = None
97
+ self.pos = None
98
+ self.morphofeat = None
99
+ self.sentiment = None
100
+ self.list_span_id = []
101
+
102
+ def get_morphofeat(self):
103
+ return self.morphofeat
104
+
105
+ def set_list_span_id(self, L):
106
+ self.list_span_id = L
107
+
108
+ def get_list_span(self):
109
+ return self.list_span_id
110
+
111
+ def get_polarity(self):
112
+ if self.sentiment != None:
113
+ return self.sentiment.getPolarity()
114
+ else:
115
+ return None
116
+
117
+ def get_sentiment_modifier(self):
118
+ if self.sentiment != None:
119
+ return self.sentiment.getSentimentModifier()
120
+ else:
121
+ return None
122
+
123
+
124
+ def setSentiment(self,my_sent):
125
+ self.sentiment = my_sent
126
+
127
+ def getSentiment(self):
128
+ return self.sentiment
129
+
130
+ def getLemma(self):
131
+ return self.lemma
132
+
133
+ def setLemma(self,lemma):
134
+ self.lemma = lemma
135
+
136
+ def getPos(self):
137
+ return self.pos
138
+
139
+ def setPos(self,pos):
140
+ self.pos = pos
141
+
142
+ def getId(self):
143
+ return self.tid
144
+
145
+ def setId(self,id):
146
+ self.tid = id
147
+
148
+ def getShortPos(self):
149
+ if self.pos==None:
150
+ return None
151
+ auxpos=self.pos.lower()[0]
152
+ if auxpos == 'g': auxpos='a'
153
+ elif auxpos == 'a': auxpos='r'
154
+ return auxpos
155
+
156
+ def __str__(self):
157
+ if self.tid and self.lemma and self.pos:
158
+ return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
159
+ else:
160
+ return 'None'
161
+
162
+
163
+
164
+
165
+
@@ -0,0 +1,439 @@
1
+ ########################################################################
2
+ # 14 Jan 2013: added function add_attrs_to_layer
3
+ ########################################################################
4
+
5
+ ###################
6
+ # List of changes #
7
+ ###################
8
+ # 14 Jan 2013: added function add_attrs_to_layer
9
+ # 27 Feb 2013: added code for comply with DTD
10
+ # 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
11
+ # 18 Jun 2013: funcion add_property created for adding the properties to the KAF
12
+
13
+
14
+ from lxml import etree
15
+ from KafDataObjectsMod import *
16
+ import time
17
+
18
+ class KafParser:
19
+ def __init__(self,filename=None):
20
+ self.tree=None
21
+ self.__pathForToken={}
22
+ self.__term_ids_for_token_id = None
23
+
24
+ if filename:
25
+ #self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
26
+ self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True, strip_cdata=False))
27
+ ## Do the text tokenization
28
+ self.__textTokenization()
29
+ else:
30
+ root = etree.Element('KAF')
31
+ root.set('version','v1.opener')
32
+ root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
33
+ self.tree = etree.ElementTree(element=root)
34
+
35
+ def __textTokenization(self):
36
+ for wf in self.tree.findall('text/wf'):
37
+ wid = wf.get('wid')
38
+ self.__pathForToken[wid] = self.tree.getpath(wf)
39
+
40
+
41
+ def getToken(self,tid):
42
+ if tid in self.__pathForToken:
43
+ path = self.__pathForToken[tid]
44
+ return self.tree.xpath(self.__pathForToken[tid])[0]
45
+ return None
46
+
47
+
48
+ def getLanguage(self):
49
+ lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
50
+ return lang
51
+
52
+ ## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
53
+ ## [(s_id1, T1), (sent_id2, T2)....]
54
+ ## T1 --> [(tokenid, token), (tokenid2,token2)....]
55
+ def get_tokens_in_sentences(self):
56
+ sents = []
57
+ current = []
58
+ previous_sent = None
59
+ for element in self.tree.findall('text/wf'):
60
+ w_id = element.get('wid')
61
+ s_id = element.get('sent')
62
+ word = element.text
63
+
64
+ if previous_sent is not None and s_id != previous_sent:
65
+ sents.append((previous_sent,current))
66
+ current = []
67
+ current.append((w_id,word))
68
+ previous_sent = s_id
69
+ ####
70
+ sents.append((s_id,current))
71
+ return sents
72
+
73
+ def get_term_ids_for_token_id(self,tok_id):
74
+ if self.__term_ids_for_token_id is None:
75
+ self.__term_ids_for_token_id = {}
76
+ for element in self.tree.findall('terms/term'):
77
+ term_id = element.get('tid')
78
+ for target in element.findall('span/target'):
79
+ token_id = target.get('id')
80
+ if token_id not in self.__term_ids_for_token_id:
81
+ self.__term_ids_for_token_id[token_id] = [term_id]
82
+ else:
83
+ self.__term_ids_for_token_id[token_id].append(term_id)
84
+ return self.__term_ids_for_token_id.get(tok_id,[])
85
+
86
+
87
+
88
+ def getTokens(self):
89
+ for element in self.tree.findall('text/wf'):
90
+ w_id = element.get('wid')
91
+ s_id = element.get('sent','0')
92
+ word = element.text
93
+ yield (word, s_id, w_id)
94
+
95
+
96
+
97
+ def getTerms(self):
98
+ if self.tree:
99
+ for element in self.tree.findall('terms/term'):
100
+ kafTermObj = KafTerm()
101
+ kafTermObj.setId(element.get('tid'))
102
+ kafTermObj.setLemma(element.get('lemma'))
103
+ kafTermObj.setPos(element.get('pos'))
104
+ kafTermObj.morphofeat = element.get('morphofeat')
105
+
106
+ ## Parsing sentiment
107
+ sentiment = element.find('sentiment')
108
+ if sentiment is not None:
109
+ resource = sentiment.get('resource','')
110
+ polarity = sentiment.get('polarity',None)
111
+ strength = sentiment.get('strength','')
112
+ subjectivity = sentiment.get('subjectivity','')
113
+ sentiment_modifier = sentiment.get('sentiment_modifier')
114
+
115
+ my_sent = KafTermSentiment()
116
+ my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
117
+ kafTermObj.setSentiment(my_sent)
118
+
119
+ ## Parsing the span
120
+ span = element.find('span')
121
+ if span is not None:
122
+ list_ids = [target.get('id') for target in span.findall('target')]
123
+ kafTermObj.set_list_span_id(list_ids)
124
+
125
+
126
+ yield kafTermObj
127
+ else:
128
+ return
129
+
130
+
131
+ def getSentimentTriples(self):
132
+ data = []
133
+ if self.tree:
134
+ for term_element in self.tree.findall('terms/term'):
135
+ lemma = term_element.get('lemma')
136
+ polarity = None
137
+ sentiment_modifier = None
138
+
139
+ sentiment_element = term_element.find('sentiment')
140
+ if sentiment_element is not None:
141
+ polarity = sentiment_element.get('polarity',None)
142
+ sentiment_modifier = sentiment_element.get('sentiment_modifier')
143
+ data.append( (lemma,polarity,sentiment_modifier))
144
+ return data
145
+
146
+
147
+
148
+ def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
149
+ if self.tree:
150
+ for element in self.tree.find('terms'):
151
+ if element.get('tid','')==termid:
152
+
153
+ #In case there is no pos info, we use the polarityPos
154
+ if not element.get('pos') and polarity_pos is not None:
155
+ element.set('pos',polarity_pos)
156
+ sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
157
+ element.append(sentEle)
158
+
159
+ def saveToFile(self,filename,myencoding='UTF-8'):
160
+ if self.tree:
161
+ self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
162
+
163
+
164
+ def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
165
+ aux = self.tree.findall('kafHeader')
166
+ if len(aux)!=0:
167
+ kaf_header = aux[0]
168
+ else:
169
+ kaf_header = etree.Element('kafHeader')
170
+ self.tree.getroot().insert(0,kaf_header)
171
+
172
+ aux2= kaf_header.findall('linguisticProcessors')
173
+ if len(aux2) == 0:
174
+ new_lp = etree.Element('linguisticProcessors')
175
+ new_lp.set('layer',layer)
176
+ kaf_header.append(new_lp)
177
+
178
+ ## Check if there is already element for the layer
179
+ my_lp_ele = None
180
+
181
+ for element in kaf_header.findall('linguisticProcessors'):
182
+ if element.get('layer','')==layer:
183
+ my_lp_ele = element
184
+ break
185
+
186
+ if time_stamp:
187
+ my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
188
+ else:
189
+ my_time = '*'
190
+
191
+ my_lp = etree.Element('lp')
192
+ my_lp.set('timestamp',my_time)
193
+ my_lp.set('version',version)
194
+ my_lp.set('name',name)
195
+
196
+ if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
197
+ my_lp_ele.append(my_lp)
198
+ else:
199
+ # Create a new element for the LP layer
200
+ my_lp_ele = etree.Element('linguisticProcessors')
201
+ my_lp_ele.set('layer',layer)
202
+ my_lp_ele.append(my_lp)
203
+ #my_lp_ele.tail=my_lp_ele.text='\n'
204
+ ## Should be inserted after the last linguisticProcessor element (stored in variable element)
205
+ idx = kaf_header.index(element)
206
+ kaf_header.insert(idx+1,my_lp_ele)
207
+
208
+
209
+ def addLayer(self,type,element,first_char_id=None):
210
+ if first_char_id is None:
211
+ first_char_id = type[0]
212
+
213
+ ## Check if there is already layer for the type
214
+ layer_element = self.tree.find(type)
215
+
216
+ if layer_element is None:
217
+ layer_element = etree.Element(type)
218
+ self.tree.getroot().append(layer_element)
219
+ ## The id is going to be the first one
220
+ new_id = first_char_id+'1'
221
+ else:
222
+ ## We need to know how many elements there are in the layer
223
+ current_n = len(layer_element.getchildren())
224
+ new_id = first_char_id+''+str(current_n+1)
225
+
226
+
227
+ ## In this point layer_element points to the correct element, existing or created
228
+
229
+ element.set(first_char_id+'id',new_id)
230
+ layer_element.append(element)
231
+ return new_id
232
+
233
+ def addElementToLayer(self,layer, element,first_char_id=None):
234
+ return self.addLayer(layer,element,first_char_id)
235
+
236
+ def add_attrs_to_layer(self,layer,attrs):
237
+ layer_element = self.tree.find(layer)
238
+ if layer_element is not None:
239
+ for att, val in attrs.items():
240
+ layer_element.set(att,val)
241
+
242
+
243
+ def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
244
+ for element in self.tree.findall(path):
245
+ if id is not None and element.get(str_id,None) == id:
246
+ if sub_path is not None:
247
+ elements = element.findall(sub_path)
248
+ if len(elements)!=0: element = elements[0]
249
+ element.set(attribute,value)
250
+ return
251
+
252
+
253
+ ## This works with the original definition of the property layer
254
+ ## KAF -> properties -> property* -> span* -> target*
255
+ def getSingleProperties_old(self):
256
+ for element in self.tree.findall('properties/property'):
257
+ my_id = element.get('pid')
258
+ my_type = element.get('type')
259
+ ref = element.find('references')
260
+ if ref is not None:
261
+ element = ref
262
+ for span_element in element.findall('span'):
263
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
264
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
265
+ yield my_prop
266
+
267
+ ## 18-June-2013
268
+ def getSingleProperties(self):
269
+ for property in self.tree.findall('features/properties/property'):
270
+ my_id = property.get('pid')
271
+ if my_id is None:
272
+ my_id = property.get('fpid')
273
+ my_type = property.get('lemma')
274
+ for span_element in property.findall('references/span'):
275
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
276
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
277
+ yield my_prop
278
+
279
+ # This function adds a new property of the type given with the list of ids given
280
+ # my_type -> 'sleeping comfort' list_ids = ['id1','id2']
281
+ # It creates the features/properties layers in case
282
+ # Agglomerates all the properties for the same TYPE under the same property element
283
+ # It calculates automatically the number for the identifier depending on the number
284
+ # of properties existing
285
+ def add_property(self,my_type,list_ids,comment=None):
286
+
287
+ #Looking for feature layer or creating it
288
+ feature_layer = self.tree.find('features')
289
+ if feature_layer is None:
290
+ feature_layer = etree.Element('features')
291
+ self.tree.getroot().append(feature_layer)
292
+
293
+ #Looking for properties layer
294
+ properties_layer = feature_layer.find('properties')
295
+ if properties_layer is None:
296
+ properties_layer = etree.Element('properties')
297
+ feature_layer.append(properties_layer)
298
+
299
+ num_props = 0
300
+ property_layer = None
301
+ for property in properties_layer.findall('property'):
302
+ num_props += 1
303
+ prop_type = property.get('lemma')
304
+ if prop_type == my_type:
305
+ property_layer = property
306
+ break
307
+
308
+ if property_layer is None: # There is no any property for that type, let's create one
309
+ property_layer = etree.Element('property')
310
+ property_layer.set('pid','p'+str(num_props+1))
311
+ property_layer.set('lemma',my_type)
312
+ properties_layer.append(property_layer)
313
+
314
+
315
+ references = property_layer.find('references')
316
+ if references is None:
317
+ references = etree.Element('references')
318
+ property_layer.append(references)
319
+ ## Create the new span
320
+ if comment is not None:
321
+ references.append(etree.Comment(comment))
322
+ span = etree.Element('span')
323
+ references.append(span)
324
+ for my_id in list_ids:
325
+ span.append(etree.Element('target',attrib={'id':my_id}))
326
+
327
+
328
+
329
+
330
+ def getSingleEntities(self):
331
+ for element in self.tree.findall('entities/entity'):
332
+ my_id = element.get('eid')
333
+ my_type = element.get('type')
334
+ my_path_to_span = None
335
+ ref = element.find('references')
336
+ if ref is not None:
337
+ my_path_to_span = 'references/span'
338
+ else:
339
+ my_path_to_span = 'span'
340
+
341
+ for span_element in element.findall(my_path_to_span):
342
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
343
+ my_prop = KafSingleEntity(my_id,my_type,target_ids)
344
+ yield my_prop
345
+
346
+
347
+ def getOpinions(self):
348
+ for element in self.tree.findall('opinions/opinion'):
349
+ my_id = element.get('oid')
350
+
351
+ tar_ids_hol = []
352
+ tar_ids_tar = []
353
+ polarity = strenght = ''
354
+ tar_ids_exp = []
355
+
356
+ #Holder
357
+ opi_hol_eles = element.findall('opinion_holder')
358
+ if len(opi_hol_eles)!=0:
359
+ opi_hol_ele = opi_hol_eles[0]
360
+ tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
361
+
362
+ #Target
363
+ opi_tar_eles = element.findall('opinion_target')
364
+ if len(opi_tar_eles) != 0:
365
+ opi_tar_ele = opi_tar_eles[0]
366
+ tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
367
+
368
+ ## Opinion expression
369
+ opi_exp_eles = element.findall('opinion_expression')
370
+ if len(opi_exp_eles) != 0:
371
+ opi_exp_ele = opi_exp_eles[0]
372
+ polarity = opi_exp_ele.get('polarity','')
373
+ strength = opi_exp_ele.get('strength','')
374
+ tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
375
+
376
+ yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
377
+
378
+
379
+
380
+ def remove_opinion_layer(self):
381
+ opinion_layer = self.tree.find('opinions')
382
+ if opinion_layer is not None:
383
+ self.tree.getroot().remove(opinion_layer)
384
+
385
+ ## This function add an opinion to the opinion layer, creating it if does not exist
386
+ ## The id is calculated automatically according to the number of elements and ensring there is no repetition
387
+ def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
388
+
389
+ #Looking for opinion layer or creating it
390
+ opinion_layer = self.tree.find('opinions')
391
+ if opinion_layer is None:
392
+ opinion_layer = etree.Element('opinions')
393
+ self.tree.getroot().append(opinion_layer)
394
+
395
+ ## Generating unique id
396
+ list_of_oids = [opi.get('oid') for opi in opinion_layer]
397
+
398
+ n = 1
399
+ while True:
400
+ my_id = 'o'+str(n)
401
+ if my_id not in list_of_oids:
402
+ break
403
+ n += 1
404
+ #####
405
+
406
+ op_ele = etree.Element('opinion')
407
+ opinion_layer.append(op_ele)
408
+ op_ele.set('oid',my_id)
409
+
410
+ ## Holder
411
+ op_hol = etree.Element('opinion_holder')
412
+ op_ele.append(op_hol)
413
+ span_op_hol = etree.Element('span')
414
+ op_hol.append(span_op_hol)
415
+ for my_id in hol_ids:
416
+ span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
417
+
418
+ ## TARGET
419
+ op_tar = etree.Element('opinion_target')
420
+ op_ele.append(op_tar)
421
+ span_op_tar = etree.Element('span')
422
+ op_tar.append(span_op_tar)
423
+ for my_id in tar_ids:
424
+ span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
425
+
426
+ ## Expression
427
+
428
+ op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
429
+ 'strength':str(strength)})
430
+ op_ele.append(op_exp)
431
+ span_exp = etree.Element('span')
432
+ op_exp.append(span_exp)
433
+ for my_id in exp_ids:
434
+ span_exp.append(etree.Element('target',attrib={'id':my_id}))
435
+
436
+
437
+
438
+
439
+