opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 1.0
2
+ Name: KafNafParser
3
+ Version: 1.4
4
+ Summary: Parser between KAF and NAF
5
+ Home-page: https://github.com/cltl/KafNafParserPy
6
+ Author: Ruben Izquierdo
7
+ Author-email: r.izquierdobevia@vu.nl
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
@@ -0,0 +1,22 @@
1
+ KafNafParser/KafNafParserMod.py
2
+ KafNafParser/__init__.py
3
+ KafNafParser/constituency_data.py
4
+ KafNafParser/coreference_data.py
5
+ KafNafParser/dependency_data.py
6
+ KafNafParser/entity_data.py
7
+ KafNafParser/external_references_data.py
8
+ KafNafParser/features_data.py
9
+ KafNafParser/header_data.py
10
+ KafNafParser/opinion_data.py
11
+ KafNafParser/references_data.py
12
+ KafNafParser/span_data.py
13
+ KafNafParser/term_data.py
14
+ KafNafParser/term_sentiment_data.py
15
+ KafNafParser/text_data.py
16
+ KafNafParser.egg-info/PKG-INFO
17
+ KafNafParser.egg-info/SOURCES.txt
18
+ KafNafParser.egg-info/dependency_links.txt
19
+ KafNafParser.egg-info/top_level.txt
20
+ KafNafParser/feature_extractor/__init__.py
21
+ KafNafParser/feature_extractor/constituency.py
22
+ KafNafParser/feature_extractor/dependency.py
@@ -0,0 +1,47 @@
1
+ ../KafNafParser/__init__.py
2
+ ../KafNafParser/header_data.py
3
+ ../KafNafParser/text_data.py
4
+ ../KafNafParser/term_data.py
5
+ ../KafNafParser/entity_data.py
6
+ ../KafNafParser/features_data.py
7
+ ../KafNafParser/opinion_data.py
8
+ ../KafNafParser/constituency_data.py
9
+ ../KafNafParser/dependency_data.py
10
+ ../KafNafParser/coreference_data.py
11
+ ../KafNafParser/references_data.py
12
+ ../KafNafParser/external_references_data.py
13
+ ../KafNafParser/span_data.py
14
+ ../KafNafParser/KafNafParserMod.py
15
+ ../KafNafParser/term_sentiment_data.py
16
+ ../KafNafParser/feature_extractor/dependency.py
17
+ ../KafNafParser/feature_extractor/constituency.py
18
+ ../KafNafParser/feature_extractor/__init__.py
19
+ ../KafNafParser/__init__.pyc
20
+ ../KafNafParser/header_data.pyc
21
+ ../KafNafParser/text_data.pyc
22
+ ../KafNafParser/term_data.pyc
23
+ ../KafNafParser/entity_data.pyc
24
+ ../KafNafParser/features_data.pyc
25
+ ../KafNafParser/opinion_data.pyc
26
+ ../KafNafParser/constituency_data.pyc
27
+ ../KafNafParser/dependency_data.pyc
28
+ ../KafNafParser/coreference_data.pyc
29
+ ../KafNafParser/references_data.pyc
30
+ ../KafNafParser/external_references_data.pyc
31
+ ../KafNafParser/span_data.pyc
32
+ ../KafNafParser/KafNafParserMod.pyc
33
+ ../KafNafParser/term_sentiment_data.pyc
34
+ ../KafNafParser/feature_extractor/dependency.pyc
35
+ ../KafNafParser/feature_extractor/constituency.pyc
36
+ ../KafNafParser/feature_extractor/__init__.pyc
37
+ ../../../kaf_example.xml
38
+ ../../../naf.dtd
39
+ ../../../naf_example.xml
40
+ ../../../test.py
41
+ ../../../README.md
42
+ ../../../LICENSE
43
+ ./
44
+ SOURCES.txt
45
+ dependency_links.txt
46
+ top_level.txt
47
+ PKG-INFO
@@ -0,0 +1,390 @@
1
+ ## LIST OF CHANGES
2
+ # Ruben 8-nov-2013
3
+ # + included layers for entities, properties, opinions
4
+ # + renamed all classes to Cnameoftheclass
5
+ # Ruben 15-nov-2013
6
+ # + included constituency layer
7
+ #
8
+ # Ruben 19-nov-2013
9
+ # + included dependency layer
10
+ # Ruben 17-dec-2013
11
+ # + modified all to red/write NAF and KAF
12
+ #
13
+ # Ruben 21-Feb-2014
14
+ # + Included coreference layer
15
+
16
+ __last_modified = '17dec2013'
17
+
18
+ from lxml import etree
19
+ from header_data import *
20
+ from text_data import *
21
+ from term_data import *
22
+ from entity_data import *
23
+ from features_data import *
24
+ from opinion_data import *
25
+ from constituency_data import *
26
+ from dependency_data import *
27
+ from feature_extractor import Cdependency_extractor, Cconstituency_extractor
28
+ from coreference_data import *
29
+ from references_data import Creferences
30
+
31
+ import sys
32
+
33
+
34
+
35
+ class KafNafParser:
36
+ def __init__(self,filename):
37
+ self.tree = None
38
+ self.filename = filename
39
+ self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
40
+ self.root = self.tree.getroot()
41
+ self.type = self.root.tag # KAF NAF
42
+
43
+ self.header = None
44
+ self.text_layer = None
45
+ self.term_layer = None
46
+ self.entity_layer = None
47
+ self.features_layer = None
48
+ self.opinion_layer = None
49
+ self.constituency_layer = None
50
+ self.dependency_layer = None
51
+ self.coreference_layer = None
52
+
53
+ ## Specific feature extractor for complicated layers
54
+ self.my_dependency_extractor = None
55
+ self.my_constituency_extractor = None
56
+ ##################################################
57
+
58
+ #######
59
+ self.dict_tokens_for_tid = None
60
+ self.terms_for_token = None
61
+ ##
62
+
63
+ self.lang = self.root.get('{http://www.w3.org/XML/1998/namespace}lang')
64
+ self.version = self.root.get('version')
65
+
66
+ if self.type == 'NAF':
67
+ node_header = self.root.find('nafHeader')
68
+ elif self.type == 'KAF':
69
+ node_header = self.root.find('kafHeader')
70
+
71
+ if node_header is not None:
72
+ self.header = CHeader(node_header,self.type)
73
+
74
+ # Text layer adapted to naf/kaf
75
+ node_text = self.root.find('text')
76
+ if node_text is not None:
77
+ self.text_layer = Ctext(node=node_text,type=self.type)
78
+
79
+ node_term = self.root.find('terms')
80
+ if node_term is not None:
81
+ self.term_layer = Cterms(node=node_term,type=self.type)
82
+
83
+ node_entity = self.root.find('entities')
84
+ if node_entity is not None:
85
+ self.entity_layer = Centities(node_entity,type=self.type)
86
+
87
+ node_features = self.root.find('features')
88
+ if node_features is not None:
89
+ self.features_layer = Cfeatures(node_features,type=self.type)
90
+
91
+ node_opinions = self.root.find('opinions')
92
+ if node_opinions is not None:
93
+ self.opinion_layer = Copinions(node_opinions,type=self.type)
94
+
95
+ # Definition KAF/NAF is the same
96
+ node_constituency = self.root.find('constituency')
97
+ if node_constituency is not None:
98
+ self.constituency_layer = Cconstituency(node_constituency)
99
+
100
+ # Definition KAF/NAF is the same
101
+ node_dependency = self.root.find('deps')
102
+ if node_dependency is not None:
103
+ self.dependency_layer = Cdependencies(node_dependency)
104
+
105
+ node_coreferences = self.root.find('coreferences')
106
+ if node_coreferences is not None:
107
+ self.coreference_layer = Ccoreferences(node_coreferences,type=self.type)
108
+
109
+ def get_type(self):
110
+ return self.type
111
+
112
+ def get_filename(self):
113
+ return self.filename
114
+
115
+ def to_kaf(self):
116
+ #Convert the root
117
+ if self.type == 'NAF':
118
+ self.root.tag = 'KAF'
119
+ self.type = 'KAF'
120
+
121
+ ## Convert the header
122
+ if self.header is not None:
123
+ self.header.to_kaf()
124
+
125
+ ## Convert the token layer
126
+ if self.text_layer is not None:
127
+ self.text_layer.to_kaf()
128
+
129
+ ## Convert the term layer
130
+ if self.term_layer is not None:
131
+ self.term_layer.to_kaf()
132
+
133
+ ## Convert the entity layer
134
+ if self.entity_layer is not None:
135
+ self.entity_layer.to_kaf()
136
+
137
+ ## Convert the features layer
138
+ ## There is no feature layer defined in NAF, but we assumed
139
+ ## that is defined will be followin the same rules
140
+ if self.features_layer is not None:
141
+ self.features_layer.to_kaf()
142
+
143
+
144
+ ##Convert the opinion layer
145
+ if self.opinion_layer is not None:
146
+ self.opinion_layer.to_kaf()
147
+
148
+ ## Convert the constituency layer
149
+ ## This layer is exactly the same in KAF/NAF
150
+ if self.constituency_layer is not None:
151
+ self.constituency_layer.to_kaf() #Does nothing...
152
+
153
+
154
+ ## Convert the dedepency layer
155
+ ## It is not defined on KAF so we assme both will be similar
156
+ if self.dependency_layer is not None:
157
+ self.dependency_layer.to_kaf()
158
+
159
+ if self.coreference_layer is not None:
160
+ self.coreference_layer.to_kaf()
161
+
162
+
163
+ def to_naf(self):
164
+ #Convert the root
165
+ if self.type == 'KAF':
166
+ self.root.tag = self.type = 'NAF'
167
+
168
+ ## Convert the header
169
+ if self.header is not None:
170
+ self.header.to_naf()
171
+
172
+ ## Convert the token layer
173
+ if self.text_layer is not None:
174
+ self.text_layer.to_naf()
175
+
176
+
177
+ ## Convert the term layer
178
+ if self.term_layer is not None:
179
+ self.term_layer.to_naf()
180
+
181
+
182
+ ## Convert the entity layer
183
+ if self.entity_layer is not None:
184
+ self.entity_layer.to_naf()
185
+
186
+ ## Convert the features layer
187
+ ## There is no feature layer defined in NAF, but we assumed
188
+ ## that is defined will be followin the same rules
189
+ if self.features_layer is not None:
190
+ self.features_layer.to_naf()
191
+
192
+
193
+ ##Convert the opinion layer
194
+ if self.opinion_layer is not None:
195
+ self.opinion_layer.to_naf()
196
+
197
+
198
+ ## Convert the constituency layer
199
+ ## This layer is exactly the same in KAF/NAF
200
+ if self.constituency_layer is not None:
201
+ self.constituency_layer.to_naf() #Does nothing...
202
+
203
+
204
+ ## Convert the dedepency layer
205
+ ## It is not defined on KAF so we assume both will be similar
206
+ if self.dependency_layer is not None:
207
+ self.dependency_layer.to_naf() #Does nothing...
208
+
209
+ if self.coreference_layer is not None:
210
+ self.coreference_layer.to_naf()
211
+
212
+
213
+
214
+ def print_constituency(self):
215
+ print self.constituency_layer
216
+
217
+ def get_trees(self):
218
+ if self.constituency_layer is not None:
219
+ for tree in self.constituency_layer.get_trees():
220
+ yield tree
221
+
222
+
223
+ def get_dependencies(self):
224
+ if self.dependency_layer is not None:
225
+ for dep in self.dependency_layer.get_dependencies():
226
+ yield dep
227
+
228
+ def get_language(self):
229
+ return self.lang
230
+
231
+ def get_tokens(self):
232
+ for token in self.text_layer:
233
+ yield token
234
+
235
+ def get_terms(self):
236
+ if self.term_layer is not None:
237
+ for term in self.term_layer:
238
+ yield term
239
+
240
+ def get_token(self,token_id):
241
+ if self.text_layer is not None:
242
+ return self.text_layer.get_wf(token_id)
243
+ else:
244
+ return None
245
+
246
+ def get_term(self,term_id):
247
+ if self.term_layer is not None:
248
+ return self.term_layer.get_term(term_id)
249
+ else:
250
+ return None
251
+
252
+ def get_properties(self):
253
+ if self.features_layer is not None:
254
+ for property in self.features_layer.get_properties():
255
+ yield property
256
+
257
+ def get_entities(self):
258
+ if self.entity_layer is not None:
259
+ for entity in self.entity_layer:
260
+ yield entity
261
+
262
+ def get_opinions(self):
263
+ if self.opinion_layer is not None:
264
+ for opinion in self.opinion_layer.get_opinions():
265
+ yield opinion
266
+
267
+
268
+ def dump(self,filename=sys.stdout):
269
+ self.tree.write(filename,encoding='UTF-8',pretty_print=True,xml_declaration=True)
270
+
271
+ def remove_dependency_layer(self):
272
+ if self.dependency_layer is not None:
273
+ this_node = self.dependency_layer.get_node()
274
+ self.root.remove(this_node)
275
+ self.dependency_layer = self.my_dependency_extractor = None
276
+
277
+ if self.header is not None:
278
+ self.header.remove_lp('deps')
279
+
280
+
281
+ def remove_this_opinion(self,opinion_id):
282
+ if self.opinion_layer is not None:
283
+ self.opinion_layer.remove_this_opinion(opinion_id)
284
+
285
+ def remove_opinion_layer(self):
286
+ if self.opinion_layer is not None:
287
+ this_node = self.opinion_layer.get_node()
288
+ self.root.remove(this_node)
289
+ self.opinion_layer = None
290
+
291
+ if self.header is not None:
292
+ self.header.remove_lp('opinions')
293
+
294
+ def remove_properties(self):
295
+ if self.features_layer is not None:
296
+ self.features_layer.remove_properties()
297
+
298
+ if self.header is not None:
299
+ self.header.remove_lp('features')
300
+
301
+ def remove_term_layer(self):
302
+ if self.term_layer is not None:
303
+ this_node = self.term_layer.get_node()
304
+ self.root.remove(this_node)
305
+ self.term_layer = None
306
+
307
+ if self.header is not None:
308
+ self.header.remove_lp('terms')
309
+
310
+
311
+ if self.header is not None:
312
+ self.header.remove_lp('terms')
313
+
314
+ def get_constituency_extractor(self):
315
+ if self.constituency_layer is not None: ##Otherwise there are no constituens
316
+ if self.my_constituency_extractor is None:
317
+ self.my_constituency_extractor = Cconstituency_extractor(self)
318
+ return self.my_constituency_extractor
319
+ else:
320
+ return None
321
+
322
+ def get_dependency_extractor(self):
323
+ if self.dependency_layer is not None: #otherwise there are no dependencies
324
+ if self.my_dependency_extractor is None:
325
+ self.my_dependency_extractor = Cdependency_extractor(self)
326
+ return self.my_dependency_extractor
327
+ else:
328
+ return None
329
+
330
+ ## ADDING METHODS
331
+ def add_wf(self,wf_obj):
332
+ if self.text_layer is None:
333
+ self.text_layer = Ctext(type=self.type)
334
+ self.root.append(self.text_layer.get_node())
335
+ self.text_layer.add_wf(wf_obj)
336
+
337
+ def add_opinion(self,opinion_obj):
338
+ if self.opinion_layer is None:
339
+ self.opinion_layer = Copinions()
340
+ self.root.append(self.opinion_layer.get_node())
341
+ self.opinion_layer.add_opinion(opinion_obj)
342
+
343
+ def add_linguistic_processor(self, layer ,my_lp):
344
+ self.header.add_linguistic_processor(layer,my_lp)
345
+
346
+
347
+ def add_dependency(self,my_dep):
348
+ if self.dependency_layer is None:
349
+ self.dependency_layer = Cdependencies()
350
+ self.root.append(self.dependency_layer.get_node())
351
+ self.dependency_layer.add_dependency(my_dep)
352
+
353
+ ## Adds a property to the feature layer
354
+ def add_property(self,label,term_span,pid=None):
355
+ if self.features_layer is None:
356
+ self.features_layer = Cfeatures(type=self.type)
357
+ self.root.append(self.features_layer.get_node())
358
+ self.features_layer.add_property(pid, label,term_span)
359
+
360
+ ## EXTRA FUNCTIONS
361
+ ## Gets the token identifiers in the span of a term id
362
+ def get_dict_tokens_for_termid(self, term_id):
363
+ if self.dict_tokens_for_tid is None:
364
+ self.dict_tokens_for_tid = {}
365
+ for term in self.get_terms():
366
+ self.dict_tokens_for_tid[term.get_id()] = term.get_span().get_span_ids()
367
+
368
+ return self.dict_tokens_for_tid.get(term_id,[])
369
+
370
+ ## Maps a list of token ids to term ids
371
+ def map_tokens_to_terms(self,list_tokens):
372
+ if self.terms_for_token is None:
373
+ self.terms_for_token = {}
374
+ for term in self.get_terms():
375
+ termid = term.get_id()
376
+ token_ids = term.get_span().get_span_ids()
377
+ for tokid in token_ids:
378
+ if tokid not in self.terms_for_token:
379
+ self.terms_for_token[tokid] = [termid]
380
+ else:
381
+ self.terms_for_token[tokid].append(termid)
382
+
383
+ ret = set()
384
+ for my_id in list_tokens:
385
+ term_ids = self.terms_for_token.get(my_id,[])
386
+ ret |= set(term_ids)
387
+ return sorted(list(ret))
388
+
389
+ def remove_tokens_of_sentence(self,sentence_id):
390
+ self.text_layer.remove_tokens_of_sentence(sentence_id)