opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,14 @@
1
+ from KafNafParserMod import *
2
+ from header_data import *
3
+ from external_references_data import *
4
+ from span_data import *
5
+ from term_data import *
6
+ from term_sentiment_data import *
7
+ from text_data import *
8
+ from entity_data import *
9
+ from features_data import *
10
+ from opinion_data import *
11
+ from dependency_data import *
12
+ from constituency_data import *
13
+ from references_data import *
14
+ from coreference_data import *
@@ -0,0 +1,125 @@
1
+ from lxml import etree
2
+ from lxml.objectify import dump
3
+ from span_data import Cspan
4
+
5
+
6
+ class Cnonterminal:
7
+ def __init__(self,node=None):
8
+ if node is None:
9
+ self.node = etree.Element('nt')
10
+ else:
11
+ self.node = node
12
+
13
+ def get_id(self):
14
+ return self.node.get('id')
15
+
16
+ def get_label(self):
17
+ return self.node.get('label')
18
+
19
+ def __str__(self):
20
+ return dump(self.node)
21
+
22
+
23
+
24
+ class Cterminal:
25
+ def __init__(self,node=None):
26
+ if node is None:
27
+ self.node = etree.Element('t')
28
+ else:
29
+ self.node = node
30
+
31
+ def get_id(self):
32
+ return self.node.get('id')
33
+
34
+ def get_span(self):
35
+ span_node = self.node.find('span')
36
+ return Cspan(span_node)
37
+
38
+ def __str__(self):
39
+ return dump(self.node)
40
+
41
+ class Cedge:
42
+ def __init__(self,node=None):
43
+ if node is None:
44
+ self.node = etree.Element('edge')
45
+ else:
46
+ self.node = node
47
+
48
+ def __str__(self):
49
+ return dump(self.node)
50
+
51
+ def get_from(self):
52
+ return self.node.get('from')
53
+
54
+ def get_to(self):
55
+ return self.node.get('to')
56
+
57
+
58
+
59
+ class Ctree:
60
+ def __init__(self,node=None):
61
+ if node is None:
62
+ self.node = etree.Element('tree')
63
+ else:
64
+ self.node = node
65
+
66
+
67
+ def __str__(self):
68
+ return dump(self.node)
69
+
70
+ ## Fore getting non terminals
71
+ def __get_nt_nodes(self):
72
+ for nt_node in self.node.findall('nt'):
73
+ yield nt_node
74
+
75
+ def get_non_terminals(self):
76
+ for nt_node in self.__get_nt_nodes():
77
+ yield Cnonterminal(nt_node)
78
+ ##################################
79
+
80
+ ## Fore getting terminals
81
+ def __get_t_nodes(self):
82
+ for t_node in self.node.findall('t'):
83
+ yield t_node
84
+
85
+ def get_terminals(self):
86
+ for t_node in self.__get_t_nodes():
87
+ yield Cterminal(t_node)
88
+ ##################################
89
+
90
+ ## Fore getting edges
91
+ def __get_edge_nodes(self):
92
+ for t_node in self.node.findall('edge'):
93
+ yield t_node
94
+
95
+ def get_edges(self):
96
+ for edge_node in self.__get_edge_nodes():
97
+ yield Cedge(edge_node)
98
+ ##################################
99
+
100
+
101
+
102
+ class Cconstituency:
103
+ def __init__(self,node=None):
104
+ self.type = 'NAF/NAF'
105
+ if node is None:
106
+ self.node = etree.Element('constituency')
107
+ else:
108
+ self.node = node
109
+
110
+ def to_kaf(self):
111
+ pass
112
+
113
+ def to_naf(self):
114
+ pass
115
+
116
+ def __get_tree_nodes(self):
117
+ for tree_node in self.node.findall('tree'):
118
+ yield tree_node
119
+
120
+ def get_trees(self):
121
+ for tree_node in self.__get_tree_nodes():
122
+ yield Ctree(tree_node)
123
+
124
+ def __str__(self):
125
+ return dump(self.node)
@@ -0,0 +1,52 @@
1
+ from lxml import etree
2
+ from span_data import Cspan
3
+
4
+ class Ccoreference:
5
+ def __init__(self,node=None,type='NAF'):
6
+ self.type = type
7
+ if node is None:
8
+ self.node = etree.Element('coref')
9
+ else:
10
+ self.node = node
11
+
12
+ def get_id(self):
13
+ if self.type == 'NAF':
14
+ return self.node.get('id')
15
+ elif self.type == 'KAF':
16
+ return self.node.get('coid')
17
+
18
+ def get_spans(self):
19
+ for node_span in self.node.findall('span'):
20
+ yield Cspan(node_span)
21
+
22
+
23
+
24
+ class Ccoreferences:
25
+ def __init__(self,node=None, type='NAF'):
26
+ self.type = type
27
+ if node is None:
28
+ self.node = etree.Element('coreferences')
29
+ else:
30
+ self.node = node
31
+
32
+ def __get_corefs_nodes(self):
33
+ for coref_node in self.node.findall('coref'):
34
+ yield coref_node
35
+
36
+ def get_corefs(self):
37
+ for coref_node in self.__get_corefs_nodes():
38
+ yield Ccoreference(coref_node,self.type)
39
+
40
+ def to_kaf(self):
41
+ if self.type == 'NAF':
42
+ for node_coref in self.__get_corefs_nodes():
43
+ node_coref.set('coid',node_coref.get('id'))
44
+ del node_coref.attrib['id']
45
+
46
+ def to_naf(self):
47
+ if self.type == 'KAF':
48
+ for node_coref in self.__get_corefs_nodes():
49
+ node_coref.set('id',node_coref.get('coid'))
50
+ del node_coref.attrib['coid']
51
+
52
+
@@ -0,0 +1,78 @@
1
+ from lxml import etree
2
+ #from lxml.objectify import dump
3
+
4
+
5
+ class Cdependency:
6
+ def __init__(self,node=None):
7
+ if node is None:
8
+ self.node = etree.Element('dep')
9
+ else:
10
+ self.node = node
11
+
12
+ def get_node_comment(self):
13
+ return self.node_comment
14
+
15
+ def get_node(self):
16
+ return self.node
17
+
18
+ def get_from(self):
19
+ return self.node.get('from')
20
+
21
+ def get_to(self):
22
+ return self.node.get('to')
23
+
24
+ def get_function(self):
25
+ return self.node.get('rfunc')
26
+
27
+ def set_from(self, f):
28
+ self.node.set('from',f)
29
+
30
+ def set_to(self,t):
31
+ self.node.set('to',t)
32
+
33
+ def set_function(self,f):
34
+ self.node.set('rfunc',f)
35
+
36
+
37
+ def set_comment(self,c):
38
+ c = c.replace('--','- -')
39
+ self.node.insert(0,etree.Comment(c) )
40
+
41
+
42
+ def __str__(self):
43
+ return dump(self.node)
44
+
45
+
46
+
47
+ class Cdependencies:
48
+ def __init__(self,node=None):
49
+ if node is None:
50
+ self.node = etree.Element('deps')
51
+ else:
52
+ self.node = node
53
+
54
+ def get_node(self):
55
+ return self.node
56
+
57
+ def to_kaf(self):
58
+ pass
59
+
60
+ def to_naf(self):
61
+ pass
62
+
63
+ def __str__(self):
64
+ return dump(self.node)
65
+
66
+
67
+ def __get_node_deps(self):
68
+ for node_dep in self.node.findall('dep'):
69
+ yield node_dep
70
+
71
+ def get_dependencies(self):
72
+ for node in self.__get_node_deps():
73
+ yield Cdependency(node)
74
+
75
+
76
+ def add_dependency(self,my_dep):
77
+ self.node.append(my_dep.get_node())
78
+
@@ -0,0 +1,59 @@
1
+ ## Modified for KAF NAF adaptation
2
+ from lxml import etree
3
+ from lxml.objectify import dump
4
+ from references_data import *
5
+
6
+
7
+ class Centity:
8
+ def __init__(self,node=None,type='NAF'):
9
+ self.type = type
10
+ if node is None:
11
+ self.node = etree.Element('entity')
12
+ else:
13
+ self.node = node
14
+
15
+ def get_id(self):
16
+ if self.type == 'NAF':
17
+ return self.node.get('id')
18
+ elif self.type == 'KAF':
19
+ return self.node.get('eid')
20
+
21
+ def get_type(self):
22
+ return self.node.get('type')
23
+
24
+ def get_references(self):
25
+ for ref_node in self.node.findall('references'):
26
+ yield Creferences(ref_node)
27
+
28
+ class Centities:
29
+ def __init__(self,node=None,type='NAF'):
30
+ self.type = type
31
+ if node is None:
32
+ self.node = etree.Element('entities')
33
+ else:
34
+ self.node = node
35
+
36
+
37
+ def to_kaf(self):
38
+ if self.type == 'NAF':
39
+ for node in self.__get_entity_nodes():
40
+ node.set('eid',node.get('id'))
41
+ del node.attrib['id']
42
+
43
+ def to_naf(self):
44
+ if self.type == 'KAF':
45
+ for node in self.__get_entity_nodes():
46
+ node.set('id',node.get('eid'))
47
+ del node.attrib['eid']
48
+
49
+ def __get_entity_nodes(self):
50
+ for ent_node in self.node.findall('entity'):
51
+ yield ent_node
52
+
53
+ def __iter__(self):
54
+ for ent_node in self.__get_entity_nodes():
55
+ yield Centity(ent_node,self.type)
56
+
57
+
58
+ def __str__(self):
59
+ return dump(self.node)
@@ -0,0 +1,41 @@
1
+ # included modification for KAF/NAF
2
+ from term_sentiment_data import Cterm_sentiment
3
+ from lxml import etree
4
+
5
+ class CexternalReference:
6
+ def __init__(self,node=None):
7
+ self.type= 'NAF/KAF'
8
+ #self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
9
+ if node is None:
10
+ self.node = etree.Element('externalRef')
11
+ else:
12
+ self.node = node
13
+
14
+ def get_node(self):
15
+ return self.node
16
+
17
+ def set_resource(self,resource):
18
+ self.node.set('resource',resource)
19
+
20
+ def set_confidence(self,confidence):
21
+ self.node.set('confidence',confidence)
22
+
23
+ def set_reference(self,reference):
24
+ self.node.set('reference',reference)
25
+
26
+
27
+ class CexternalReferences:
28
+ def __init__(self,node=None):
29
+ if node is None:
30
+ self.node = etree.Element('externalReferences')
31
+ else:
32
+ self.node = node
33
+
34
+ def add_external_reference(self,ext_ref):
35
+ self.node.append(ext_ref.get_node())
36
+
37
+ def get_node(self):
38
+ return self.node
39
+
40
+
41
+
@@ -0,0 +1,2 @@
1
+ from dependency import *
2
+ from constituency import *
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env python
2
+
3
+ from operator import itemgetter
4
+
5
+ '''
6
+ Extract information from the contituent layer from a NAF file
7
+ '''
8
+
9
+ class Cconstituency_extractor:
10
+ def __init__(self,knaf_obj):
11
+ self.naf = knaf_obj
12
+ #Extract terminals, non terminals and edges
13
+ ## Extracted directly from
14
+ self.terminals = {} #terminal id --> list term ids
15
+ self.terminal_for_term = {} #term id --> terminal id
16
+ self.label_for_nonter = {} # nonter --> label
17
+ self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
18
+
19
+ self.extract_info_from_naf(knaf_obj)
20
+
21
+ #Extracting all posible paths from leave to root for each terminal id
22
+ self.paths_for_terminal= {}
23
+ for terminal_id in self.terminals.keys():
24
+ paths = self.__expand_node(terminal_id,False)
25
+ self.paths_for_terminal[terminal_id] = paths
26
+ #######################################
27
+
28
+ ## Create, for each non terminal, which are the terminals subsumed
29
+ self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
30
+ for terminal_id, span_terms in self.terminals.items():
31
+ for path in self.paths_for_terminal[terminal_id]:
32
+ for nonter in path:
33
+ if nonter not in self.terms_subsumed_by_nonter:
34
+ self.terms_subsumed_by_nonter[nonter] = set()
35
+ for termid in span_terms:
36
+ self.terms_subsumed_by_nonter[nonter].add(termid)
37
+
38
+ ## To print the paths calculated
39
+ # for terminal in self.terminals.keys():
40
+ # print terminal
41
+ # for path in self.paths_for_terminal[terminal]:
42
+ # sep=' '
43
+ # for node in path:
44
+ # print sep,node,self.label_for_nonter.get(node,'?')
45
+ # sep+=' '
46
+ # print '#'*20
47
+
48
+
49
+ def get_deepest_phrases(self):
50
+ all_nonter = set()
51
+ for terminal in self.terminals.keys():
52
+ for path in self.paths_for_terminal[terminal]:
53
+ first_non_ter_phrase = path[1]
54
+ all_nonter.add(first_non_ter_phrase)
55
+
56
+ ter_for_nonter = {}
57
+ for nonter in all_nonter:
58
+ for terminal in self.terminals.keys():
59
+ for path in self.paths_for_terminal[terminal]:
60
+ if nonter in path:
61
+ if nonter in ter_for_nonter:
62
+ ter_for_nonter[nonter].append(terminal)
63
+ else:
64
+ ter_for_nonter[nonter] = [terminal]
65
+
66
+ visited = set()
67
+ for nonter, list_term in ter_for_nonter.items():
68
+ for ter in list_term:
69
+
70
+ visited.add(ter)
71
+
72
+
73
+ ### Returns the label of the deepest phrase for the term id (termid as in the term layer)
74
+ def get_deepest_phrase_for_termid(self,termid):
75
+ terminal_id = self.terminal_for_term.get(termid)
76
+ label = None
77
+ subsumed = []
78
+ if terminal_id is not None:
79
+ first_path = self.paths_for_terminal[terminal_id][0]
80
+ first_phrase_id = first_path[1]
81
+ label = self.label_for_nonter.get(first_phrase_id)
82
+ subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
83
+ return label,sorted(list(subsumed))
84
+
85
+
86
+ def get_least_common_subsumer(self,from_tid,to_tid):
87
+ termid_from = self.terminal_for_term.get(from_tid)
88
+ termid_to = self.terminal_for_term.get(to_tid)
89
+
90
+ path_from = self.paths_for_terminal[termid_from][0]
91
+ path_to = self.paths_for_terminal[termid_to][0]
92
+ common_nodes = set(path_from) & set(path_to)
93
+ if len(common_nodes) == 0:
94
+ return None
95
+ else:
96
+ indexes = []
97
+ for common_node in common_nodes:
98
+ index1 = path_from.index(common_node)
99
+ index2 = path_to.index(common_node)
100
+ indexes.append((common_node,index1+index2))
101
+ indexes.sort(key=itemgetter(1))
102
+ shortest_common = indexes[0][0]
103
+ return shortest_common
104
+
105
+
106
+ def get_path_from_to(self,from_tid, to_tid):
107
+ shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
108
+
109
+ #print 'From:',self.naf.get_term(from_tid).get_lemma()
110
+ #print 'To:',self.naf.get_term(to_tid).get_lemma()
111
+ termid_from = self.terminal_for_term.get(from_tid)
112
+ termid_to = self.terminal_for_term.get(to_tid)
113
+
114
+ path_from = self.paths_for_terminal[termid_from][0]
115
+ path_to = self.paths_for_terminal[termid_to][0]
116
+
117
+ if shortest_subsumer is None:
118
+ return None
119
+
120
+ complete_path = []
121
+ for node in path_from:
122
+ complete_path.append(node)
123
+ if node == shortest_subsumer: break
124
+
125
+ begin=False
126
+ for node in path_to[-1::-1]:
127
+ if begin:
128
+ complete_path.append(node)
129
+
130
+ if node==shortest_subsumer:
131
+ begin=True
132
+ labels = [self.label_for_nonter[nonter] for nonter in complete_path]
133
+ return labels
134
+
135
+
136
+ def get_path_for_termid(self,termid):
137
+ terminal_id = self.terminal_for_term.get(termid)
138
+ paths = self.paths_for_terminal[terminal_id]
139
+ labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
140
+ return labels
141
+
142
+ def extract_info_from_naf(self,knaf_obj):
143
+ ## Generated internally
144
+ # For each terminal node, a list of paths through all the edges
145
+ self.paths_for_terminal = {}
146
+ for tree in knaf_obj.get_trees():
147
+ for terminal in tree.get_terminals():
148
+ ter_id = terminal.get_id()
149
+ span_ids = terminal.get_span().get_span_ids()
150
+ self.terminals[ter_id] = span_ids
151
+ for this_id in span_ids:
152
+ self.terminal_for_term[this_id] = ter_id
153
+
154
+
155
+ for non_terminal in tree.get_non_terminals():
156
+ nonter_id = non_terminal.get_id()
157
+ label = non_terminal.get_label()
158
+ self.label_for_nonter[nonter_id] = label
159
+
160
+
161
+ for edge in tree.get_edges():
162
+ node_from = edge.get_from()
163
+ node_to = edge.get_to()
164
+ if node_from not in self.reachable_from:
165
+ self.reachable_from[node_from] = [node_to]
166
+ else:
167
+ self.reachable_from[node_from].append(node_to)
168
+
169
+
170
+
171
+ ##Recursive function
172
+ ## Propagates the node through all the relations extracte from the edges information
173
+ ## It returns a list of lists, one for each path
174
+ ## Include_this_node is used for avoiding the first node
175
+ def __expand_node(self,node,include_this_node=True):
176
+ paths = []
177
+ possible_nodes = self.reachable_from.get(node,[])
178
+ if len(possible_nodes) == 0:
179
+ return [[node]]
180
+ else:
181
+ for possible_node in possible_nodes:
182
+ new_paths = self.__expand_node(possible_node)
183
+ for path in new_paths:
184
+ if include_this_node:
185
+ path.insert(0,node)
186
+ paths.append(path)
187
+ return paths
188
+
189
+ def get_chunks(self,chunk_type):
190
+ for nonter,this_type in self.label_for_nonter.items():
191
+ if this_type == chunk_type:
192
+ subsumed = self.terms_subsumed_by_nonter.get(nonter)
193
+ if subsumed is not None:
194
+ yield sorted(list(subsumed))
195
+
196
+ def get_all_chunks_for_term(self,termid):
197
+ terminal_id = self.terminal_for_term.get(termid)
198
+ paths = self.paths_for_terminal[terminal_id]
199
+ for path in paths:
200
+ for node in path:
201
+ this_type = self.label_for_nonter[node]
202
+ subsumed = self.terms_subsumed_by_nonter.get(node)
203
+ if subsumed is not None:
204
+ yield this_type,sorted(list(subsumed))
205
+