opener-opinion-detector-base 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +101 -0
  3. data/bin/opinion-detector-base +19 -0
  4. data/core/annotation.cfg.erb +9 -0
  5. data/core/packages/KafNafParser-1.4.tar.gz +0 -0
  6. data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
  7. data/core/python-scripts/LICENSE +339 -0
  8. data/core/python-scripts/README.md +226 -0
  9. data/core/python-scripts/classify_kaf_naf_file.py +499 -0
  10. data/core/python-scripts/cross_validation.py +634 -0
  11. data/core/python-scripts/generate_folds.py +134 -0
  12. data/core/python-scripts/models.cfg +10 -0
  13. data/core/python-scripts/my_templates/README +33 -0
  14. data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
  15. data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
  16. data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
  17. data/core/python-scripts/my_templates/templates_exp.txt +10 -0
  18. data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
  19. data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
  20. data/core/python-scripts/my_templates/templates_holder.txt +10 -0
  21. data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
  22. data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
  23. data/core/python-scripts/my_templates/templates_target.txt +10 -0
  24. data/core/python-scripts/run_all_experiments.sh +49 -0
  25. data/core/python-scripts/run_basic.py +20 -0
  26. data/core/python-scripts/run_experiment.sh +42 -0
  27. data/core/python-scripts/scripts/__init__.py +1 -0
  28. data/core/python-scripts/scripts/config_manager.py +314 -0
  29. data/core/python-scripts/scripts/crfutils.py +215 -0
  30. data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
  31. data/core/python-scripts/scripts/extract_features.py +376 -0
  32. data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
  33. data/core/python-scripts/scripts/lexicons.py +44 -0
  34. data/core/python-scripts/scripts/link_entities_distance.py +77 -0
  35. data/core/python-scripts/scripts/relation_classifier.py +250 -0
  36. data/core/python-scripts/train.py +566 -0
  37. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
  38. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
  39. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
  40. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
  41. data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
  42. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
  43. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  44. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  45. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  46. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  47. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  48. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  49. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  50. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
  51. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  52. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  53. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  54. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  55. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  56. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  57. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  58. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  59. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  60. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
  61. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  62. data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
  63. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  64. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  65. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  66. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
  67. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  68. data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
  69. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  70. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  71. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  72. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  73. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  74. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  75. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  76. data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
  77. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  78. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
  79. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
  80. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
  81. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
  82. data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
  83. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  84. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  85. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  86. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  87. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  88. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  89. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  90. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  91. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  92. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  93. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  94. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  95. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  96. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  97. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  98. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  99. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  100. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  101. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  102. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  103. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  104. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  105. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  106. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  107. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  108. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  109. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  110. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  111. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  112. data/core/vendor/src/crfsuite/AUTHORS +1 -0
  113. data/core/vendor/src/crfsuite/COPYING +27 -0
  114. data/core/vendor/src/crfsuite/ChangeLog +103 -0
  115. data/core/vendor/src/crfsuite/INSTALL +236 -0
  116. data/core/vendor/src/crfsuite/Makefile.am +19 -0
  117. data/core/vendor/src/crfsuite/Makefile.in +783 -0
  118. data/core/vendor/src/crfsuite/README +183 -0
  119. data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
  120. data/core/vendor/src/crfsuite/autogen.sh +38 -0
  121. data/core/vendor/src/crfsuite/compile +143 -0
  122. data/core/vendor/src/crfsuite/config.guess +1502 -0
  123. data/core/vendor/src/crfsuite/config.h.in +198 -0
  124. data/core/vendor/src/crfsuite/config.sub +1714 -0
  125. data/core/vendor/src/crfsuite/configure +14273 -0
  126. data/core/vendor/src/crfsuite/configure.in +149 -0
  127. data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
  128. data/core/vendor/src/crfsuite/depcomp +630 -0
  129. data/core/vendor/src/crfsuite/example/chunking.py +49 -0
  130. data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
  131. data/core/vendor/src/crfsuite/example/ner.py +270 -0
  132. data/core/vendor/src/crfsuite/example/pos.py +78 -0
  133. data/core/vendor/src/crfsuite/example/template.py +88 -0
  134. data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
  135. data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
  136. data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
  137. data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
  138. data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
  139. data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
  140. data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
  141. data/core/vendor/src/crfsuite/frontend/main.c +137 -0
  142. data/core/vendor/src/crfsuite/frontend/option.c +93 -0
  143. data/core/vendor/src/crfsuite/frontend/option.h +86 -0
  144. data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
  145. data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
  146. data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
  147. data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
  148. data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
  149. data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
  150. data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
  151. data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
  152. data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
  153. data/core/vendor/src/crfsuite/include/os.h +61 -0
  154. data/core/vendor/src/crfsuite/install-sh +520 -0
  155. data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
  156. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
  157. data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
  158. data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
  159. data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
  160. data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
  161. data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
  162. data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
  163. data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
  164. data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
  165. data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
  166. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
  167. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
  168. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
  169. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
  170. data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
  171. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
  172. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
  173. data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
  174. data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
  175. data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
  176. data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
  177. data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
  178. data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
  179. data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
  180. data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
  181. data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
  182. data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
  183. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
  184. data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
  185. data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
  186. data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
  187. data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
  188. data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
  189. data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
  190. data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
  191. data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
  192. data/core/vendor/src/crfsuite/missing +376 -0
  193. data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
  194. data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
  195. data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
  196. data/core/vendor/src/crfsuite/swig/export.i +32 -0
  197. data/core/vendor/src/crfsuite/swig/python/README +92 -0
  198. data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
  199. data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
  200. data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
  201. data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
  202. data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
  203. data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
  204. data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
  205. data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
  206. data/core/vendor/src/liblbfgs/AUTHORS +1 -0
  207. data/core/vendor/src/liblbfgs/COPYING +22 -0
  208. data/core/vendor/src/liblbfgs/ChangeLog +120 -0
  209. data/core/vendor/src/liblbfgs/INSTALL +231 -0
  210. data/core/vendor/src/liblbfgs/Makefile.am +10 -0
  211. data/core/vendor/src/liblbfgs/Makefile.in +638 -0
  212. data/core/vendor/src/liblbfgs/NEWS +0 -0
  213. data/core/vendor/src/liblbfgs/README +71 -0
  214. data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
  215. data/core/vendor/src/liblbfgs/autogen.sh +38 -0
  216. data/core/vendor/src/liblbfgs/config.guess +1411 -0
  217. data/core/vendor/src/liblbfgs/config.h.in +64 -0
  218. data/core/vendor/src/liblbfgs/config.sub +1500 -0
  219. data/core/vendor/src/liblbfgs/configure +21146 -0
  220. data/core/vendor/src/liblbfgs/configure.in +107 -0
  221. data/core/vendor/src/liblbfgs/depcomp +522 -0
  222. data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
  223. data/core/vendor/src/liblbfgs/install-sh +322 -0
  224. data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
  225. data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
  226. data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
  227. data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
  228. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
  229. data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
  230. data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
  231. data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
  232. data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
  233. data/core/vendor/src/liblbfgs/missing +353 -0
  234. data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
  235. data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
  236. data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
  237. data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
  238. data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
  239. data/core/vendor/src/svm_light/LICENSE.txt +59 -0
  240. data/core/vendor/src/svm_light/Makefile +105 -0
  241. data/core/vendor/src/svm_light/kernel.h +40 -0
  242. data/core/vendor/src/svm_light/svm_classify.c +197 -0
  243. data/core/vendor/src/svm_light/svm_common.c +985 -0
  244. data/core/vendor/src/svm_light/svm_common.h +301 -0
  245. data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
  246. data/core/vendor/src/svm_light/svm_learn.c +4147 -0
  247. data/core/vendor/src/svm_light/svm_learn.h +169 -0
  248. data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
  249. data/core/vendor/src/svm_light/svm_loqo.c +211 -0
  250. data/ext/hack/Rakefile +17 -0
  251. data/ext/hack/support.rb +88 -0
  252. data/lib/opener/opinion_detectors/base.rb +112 -0
  253. data/lib/opener/opinion_detectors/base/version.rb +7 -0
  254. data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
  255. data/lib/opener/opinion_detectors/de.rb +7 -0
  256. data/lib/opener/opinion_detectors/en.rb +7 -0
  257. data/lib/opener/opinion_detectors/it.rb +7 -0
  258. data/lib/opener/opinion_detectors/nl.rb +6 -0
  259. data/opener-opinion-detector-base.gemspec +35 -0
  260. data/pre_build_requirements.txt +3 -0
  261. metadata +374 -0
@@ -0,0 +1,309 @@
1
+ from operator import itemgetter
2
+ from VUA_pylib.common import get_max_distr_dict
3
+ import sys
4
+
5
+ class Cdependency_extractor:
6
+ def __init__(self,knaf_obj):
7
+ self.naf = knaf_obj
8
+ self.relations_for_term = {}
9
+ self.reverse_relations_for_term = {}
10
+ self.prefix_for_reverse = ''
11
+
12
+
13
+ already_linked = {}
14
+ for dep in knaf_obj.get_dependencies():
15
+ term_from = dep.get_from()
16
+ term_to = dep.get_to()
17
+ rfunc = dep.get_function()
18
+
19
+ # Dependencies reversed are skipped...
20
+ #if rfunc.startswith('rhd/') or rfunc.startswith('whd/'):
21
+ # continue
22
+
23
+ # For detecting cycles like:
24
+ # <!-- rhd/body(geef,wat) -->
25
+ # <dep from="t19" to="t15" rfunc="rhd/body"/>
26
+ # <!-- hd/su(wat,geef) -->
27
+ # <dep from="t15" to="t19" rfunc="hd/su"/>
28
+
29
+ '''
30
+ if term_from in already_linked and term_to in already_linked[term_from]:
31
+ #There could be a cycle, skip this
32
+ print>>sys.stderr,'Skipped from',term_from,'to',term_to,'func',rfunc,' cycle detected'
33
+ continue
34
+ else:
35
+ #Include term_from as linked with term_to for future...
36
+ if term_to not in already_linked:
37
+ already_linked[term_to] = set()
38
+ already_linked[term_to].add(term_from)
39
+ '''
40
+
41
+
42
+
43
+
44
+ if term_from in self.relations_for_term:
45
+ self.relations_for_term[term_from].append((rfunc,term_to))
46
+ else:
47
+ self.relations_for_term[term_from] = [(rfunc,term_to)]
48
+
49
+ if term_to in self.reverse_relations_for_term:
50
+ self.reverse_relations_for_term[term_to].append((self.prefix_for_reverse+rfunc,term_from))
51
+ else:
52
+ self.reverse_relations_for_term[term_to] = [(self.prefix_for_reverse+rfunc,term_from)]
53
+
54
+
55
+ self.paths_for_termid={}
56
+ self.sentence_for_termid={}
57
+ self.top_relation_for_term = {} ## termid --> (relation,topnode)
58
+ self.root_for_sentence = {} ## sentenceid --> termid
59
+
60
+ for term_obj in knaf_obj.get_terms():
61
+ termid = term_obj.get_id()
62
+
63
+ #Calculating the sentence id for the term id
64
+ span_ids = term_obj.get_span().get_span_ids()
65
+ token_obj = knaf_obj.get_token(span_ids[0])
66
+ if token_obj is None:
67
+ continue
68
+
69
+ sentence = token_obj.get_sent()
70
+
71
+ self.sentence_for_termid[termid] = sentence
72
+ ###########################################
73
+
74
+ #paths = self.__propagate_node(termid,[])
75
+ #inversed = self.__reverse_propagate_node(termid)
76
+
77
+ ## Due to the change on direction of dependencies...
78
+ inversed = self.__propagate_node(termid,already_propagated=[])
79
+ paths = self.__reverse_propagate_node(termid,already_propagated=[])
80
+
81
+ ##Calculate the top relation for the node, the relation with the main root of the tree
82
+ if len(inversed) != 0:
83
+ for ip in inversed:
84
+ if len(ip)!=0:
85
+ self.top_relation_for_term[termid] = ip[-1] ## ex. ('NMOD', 't2')
86
+ root = ip[-1][1]
87
+ if sentence not in self.root_for_sentence:
88
+ self.root_for_sentence[sentence] = {}
89
+
90
+ if root not in self.root_for_sentence[sentence]:
91
+ self.root_for_sentence[sentence][root]=0
92
+ else:
93
+ self.root_for_sentence[sentence][root]+=1
94
+ break
95
+
96
+ self.paths_for_termid[termid] = paths + inversed
97
+
98
+ '''
99
+ print termid
100
+ print 'DIRECT RELS'
101
+ for p in paths:
102
+ print ' ',p
103
+
104
+ print 'INDIRECT RELS'
105
+ for p in inversed:
106
+ print ' ',p
107
+ '''
108
+ ####
109
+
110
+ for sent_id, distr in self.root_for_sentence.items():
111
+ ## get_max_distr_dict imported from VUA_pylib.common
112
+ most_freq,c = get_max_distr_dict(distr)
113
+ self.root_for_sentence[sent_id] = most_freq
114
+
115
+
116
+
117
+
118
+ def __propagate_node(self,node,already_propagated=[]):
119
+ paths = []
120
+
121
+ relations = self.relations_for_term.get(node)
122
+ #print 'Propagate ',node,relations
123
+ if relations is None: ##Case base
124
+ paths = [[]]
125
+ elif node in already_propagated:
126
+ paths = [[]]
127
+
128
+ else:
129
+ already_propagated.append(node)
130
+ for func, target_node in relations:
131
+ new_paths = self.__propagate_node(target_node, already_propagated)
132
+ for new_path in new_paths:
133
+ new_path.insert(0,(func,target_node))
134
+ paths.append(new_path)
135
+ return paths
136
+
137
+ def __reverse_propagate_node(self,node,already_propagated=[]):
138
+ paths = []
139
+ relations = self.reverse_relations_for_term.get(node)
140
+ #print 'Propagate reverse',node,relations,already_propagated
141
+ if relations is None: ##Case base
142
+ paths = [[]]
143
+ elif node in already_propagated:
144
+ paths = [[]]
145
+ else:
146
+ already_propagated.append(node)
147
+ for func, target_node in relations:
148
+ new_paths = self.__reverse_propagate_node(target_node,already_propagated)
149
+ for new_path in new_paths:
150
+ new_path.insert(0,(func,target_node))
151
+ paths.append(new_path)
152
+ return paths
153
+
154
+
155
+ # Get the shortest path between 2 term ids
156
+ def get_shortest_path(self,term1,term2):
157
+ dep_path = None
158
+ if term1 == term2: dep_path = []
159
+ else:
160
+ paths1 = self.paths_for_termid[term1]
161
+ paths2 = self.paths_for_termid[term2]
162
+
163
+ ##Check if term2 is on paths1
164
+ hits = [] ## list of (common_id,idx1,idx2,numpath1,numpath2)
165
+ for num1, p1 in enumerate(paths1):
166
+ ids1 = [ my_id for my_func, my_id in p1]
167
+ if term2 in ids1:
168
+ idx1 = ids1.index(term2)
169
+ hits.append((term2,idx1+0,idx1,0,num1,None))
170
+
171
+ for num2,p2 in enumerate(paths2):
172
+ ids2 = [ my_id for my_func, my_id in p2]
173
+ if term1 in ids2:
174
+ idx2=ids2.index(term1)
175
+ hits.append((term1,0+idx2,0,idx2,None,num2))
176
+
177
+ #Pair by pair
178
+ for num1, p1 in enumerate(paths1):
179
+ #print 'Path1',term1, p1
180
+ ids1 = [ my_id for my_func, my_id in p1]
181
+ #print 'IDS1',ids1
182
+ for num2, p2 in enumerate(paths2):
183
+ #print '\t',term2,p2
184
+ ids2 = [ my_id for my_func, my_id in p2]
185
+ #print ' IDS2',ids2
186
+ common_ids = set(ids1) & set(ids2)
187
+ #print ' cmmon',common_ids
188
+ for common_id in common_ids:
189
+ idx1 = ids1.index(common_id)
190
+ idx2 = ids2.index(common_id)
191
+ hits.append((common_id,idx1+idx2,idx1,idx2,num1,num2))
192
+
193
+
194
+ if len(hits) != 0:
195
+ dep_path = []
196
+ hits.sort(key=itemgetter(1))
197
+ best_hit = hits[0]
198
+ common_id, _, idx1, idx2, numpath1, numpath2 = best_hit
199
+
200
+ if numpath2 is None: #term2 is in one of the paths of t1
201
+ path1 = paths1[numpath1]
202
+ my_rels1 = path1[:idx1+1]
203
+ ##complete_path = ''
204
+ ##complete_path_ids = ''
205
+ for func,node in my_rels1:
206
+ dep_path.append(func)
207
+ ##complete_path+=func+'#'
208
+ ##complete_path_ids+=node+'#'
209
+
210
+ #===========================================================
211
+ # print 'CASE1',best_hit
212
+ # print complete_path
213
+ # print complete_path_ids
214
+ #===========================================================
215
+ elif numpath1 is None: #term1 is in one of the paths of t2
216
+ path2 = paths2[numpath2]
217
+ my_rels2 = path2[:idx2+1]
218
+ ##complete_path = ''
219
+ ##complete_path_ids = ''
220
+ for func,node in my_rels2:
221
+ dep_path.append(func)
222
+ #complete_path+=func+'#'
223
+ #complete_path_ids+=node+'#'
224
+
225
+ #===========================================================
226
+ # print 'CASE2',best_hit
227
+ # print complete_path
228
+ # print complete_path_ids
229
+ #===========================================================
230
+ else: #There is a common node linking both
231
+ path1 = paths1[numpath1]
232
+ my_rels1 = path1[:idx1+1]
233
+
234
+ path2 = paths2[numpath2]
235
+ my_rels2 = path2[:idx2+1]
236
+
237
+ ##complete_path = ''
238
+ #complete_path_ids = ''
239
+ for func,node in my_rels1:
240
+ dep_path.append(func)
241
+ ##complete_path+=func+'#'
242
+ #complete_path_ids+=func+'->'+self.naf.get_term(node).get_lemma()+'->'
243
+
244
+ for func,node in my_rels2[-1::-1]:
245
+ dep_path.append(func)
246
+ ##complete_path+=func+'#'
247
+ #complete_path_ids+=func+'->'+self.naf.get_term(node).get_lemma()+'->'
248
+ #===========================================================
249
+ #
250
+ # print complete_path
251
+ # print complete_path_ids
252
+ # print path2
253
+ # print my_rels1
254
+ # print my_rels2
255
+ # print 'CASE3',best_hit
256
+ #===========================================================
257
+ return dep_path
258
+
259
+ ## Get the shortest dependency path between 2 sets of spans
260
+ def get_shortest_path_spans(self,span1,span2):
261
+ shortest_path = None
262
+
263
+ for term1 in span1:
264
+ for term2 in span2:
265
+ this_path = self.get_shortest_path(term1, term2)
266
+ #print term1,term2, this_path
267
+ if shortest_path is None or (this_path is not None and len(this_path)<len(shortest_path)):
268
+ shortest_path = this_path
269
+ return shortest_path
270
+
271
+ # Get the dependency path to the sentence root for a term id
272
+ def get_path_to_root(self,termid):
273
+ # Get the sentence for the term
274
+ root = None
275
+ sentence = self.sentence_for_termid.get(termid)
276
+
277
+ if sentence is None: #try with the top node
278
+ top_node = self.top_relation_for_term.get(termid)
279
+ if top_node is not None:
280
+ root = top_node[1]
281
+ else:
282
+ return None
283
+ else:
284
+ if sentence in self.root_for_sentence:
285
+ root = self.root_for_sentence[sentence]
286
+ else:
287
+ ##There is no root for this sentence
288
+ return None
289
+ # In this point top_node should be properly set
290
+ path = self.get_shortest_path(termid, root)
291
+ return path
292
+
293
+ # Get the shortest dependency path to the sentence root for a span of ids
294
+ # extractor.get_shortest_path_to_root_span(['t444','t445','t446'])
295
+ def get_shortest_path_to_root_span(self,span):
296
+ shortest_path = None
297
+ for termid in span:
298
+ this_path = self.get_path_to_root(termid)
299
+ ## In case of , or . or whatever, the path to the root usually is None, there are no dependencies...
300
+ if shortest_path is None or (this_path is not None and len(this_path) < len(shortest_path)):
301
+ shortest_path = this_path
302
+ return shortest_path
303
+
304
+
305
+
306
+
307
+
308
+
309
+
@@ -0,0 +1,131 @@
1
+ from lxml import etree
2
+ from lxml.objectify import dump
3
+ from references_data import *
4
+
5
+
6
+
7
+ class Cproperty:
8
+ def __init__(self,node=None,type='NAF'):
9
+ self.type = type
10
+ if node is None:
11
+ self.node = etree.Element('property')
12
+ else:
13
+ self.node = node
14
+
15
+ def get_node(self):
16
+ return self.node
17
+
18
+ def get_id(self):
19
+ if self.type == 'KAF':
20
+ return self.node.get('pid')
21
+ elif self.type == 'NAF':
22
+ return self.node.get('id')
23
+
24
+ def set_id(self,pid):
25
+ if self.type == 'KAF':
26
+ return self.node.set('pid',pid)
27
+ elif self.type == 'NAF':
28
+ return self.node.set('id',pid)
29
+
30
+ def get_type(self):
31
+ return self.node.get('lemma')
32
+
33
+ def set_type(self,t):
34
+ return self.node.set('lemma',t)
35
+
36
+ def get_references(self):
37
+ for ref_node in self.node.findall('references'):
38
+ yield Creferences(ref_node)
39
+
40
+ def set_reference(self,ref):
41
+ self.node.append(ref.get_node())
42
+
43
+
44
+
45
+ class Cproperties:
46
+ def __init__(self,node=None,type='NAF'):
47
+ self.type=type
48
+ if node is None:
49
+ self.node = etree.Element('properties')
50
+ else:
51
+ self.node = node
52
+
53
+ def get_node(self):
54
+ return self.node
55
+
56
+ def __iter__(self):
57
+ for prop_node in self.node.findall('property'):
58
+ yield Cproperty(prop_node,self.type)
59
+
60
+ def add_property(self,pid, label,term_span):
61
+ new_property = Cproperty(type=self.type)
62
+ self.node.append(new_property.get_node())
63
+ ##Set the id
64
+ if pid is None:
65
+ ##Generate a new pid
66
+ existing_pids = [property.get_id() for property in self]
67
+ n = 0
68
+ new_pid = ''
69
+ while True:
70
+ new_pid = 'p'+str(n)
71
+ if new_pid not in existing_pids: break
72
+ n += 1
73
+ pid = new_pid
74
+ new_property.set_id(pid)
75
+
76
+ new_property.set_type(label)
77
+
78
+ new_ref = Creferences()
79
+ new_ref.add_span(term_span)
80
+ new_property.set_reference(new_ref)
81
+
82
+
83
+
84
+ class Cfeatures:
85
+ def __init__(self,node=None,type='NAF'):
86
+ self.type = type
87
+ if node is None:
88
+ self.node = etree.Element('features')
89
+ else:
90
+ self.node = node
91
+
92
+ def get_node(self):
93
+ return self.node
94
+
95
+ def to_kaf(self):
96
+ if self.type == 'NAF':
97
+ ##convert all the properties
98
+ for node in self.node.findall('properties/property'):
99
+ node.set('pid',node.get('id'))
100
+ del node.attrib['id']
101
+
102
+ def to_naf(self):
103
+ if self.type == 'KAF':
104
+ ##convert all the properties
105
+ for node in self.node.findall('properties/property'):
106
+ node.set('id',node.get('pid'))
107
+ del node.attrib['pid']
108
+
109
+ def add_property(self,pid, label,term_span):
110
+ node_prop = self.node.find('properties')
111
+ if node_prop is None:
112
+ properties = Cproperties(type=self.type)
113
+ self.node.append(properties.get_node())
114
+ else:
115
+ properties = Cproperties(node=node_prop,type=self.type)
116
+
117
+ properties.add_property(pid, label,term_span)
118
+
119
+
120
+ def get_properties(self):
121
+ node_prop = self.node.find('properties')
122
+ if node_prop is not None:
123
+ obj_properties = Cproperties(node_prop,self.type)
124
+ for prop in obj_properties:
125
+ yield prop
126
+
127
+ def remove_properties(self):
128
+ node_prop = self.node.find('properties')
129
+ if node_prop is not None:
130
+ self.node.remove(node_prop)
131
+
@@ -0,0 +1,127 @@
1
+ # Modified to KAF / NAF
2
+
3
+ from lxml import etree
4
+ import time
5
+
6
+ class CfileDesc:
7
+ def __init__(self,node=None):
8
+ self.type = 'KAF/NAF'
9
+ if node is None:
10
+ self.node = etree.Element('fileDesc')
11
+ else:
12
+ self.node = node
13
+
14
+ #self.title='' #self.author='' #self.creationtime='' #self.filename='' #self.filetype='' #self.pages=''
15
+
16
+
17
+ class Cpublic:
18
+ def __init__(self,node=None):
19
+ self.type = 'KAF/NAF'
20
+ if node is None:
21
+ self.node = etree.Element('public')
22
+ else:
23
+ self.node = node
24
+
25
+ #self.publicId = ''
26
+ #slf.uri = ''
27
+
28
+
29
+ class Clp:
30
+ def __init__(self,node=None,name="",version="",timestamp=None):
31
+ self.type = 'KAF/NAF'
32
+ if node is None:
33
+ self.node = etree.Element('lp')
34
+ self.set_name(name)
35
+ self.set_version(version)
36
+ self.set_timestamp(timestamp)
37
+ else:
38
+ self.node = node
39
+
40
+ def set_name(self,name):
41
+ self.node.set('name',name)
42
+
43
+ def set_version(self,version):
44
+ self.node.set('version',version)
45
+
46
+ def set_timestamp(self,timestamp=None):
47
+ if timestamp is None:
48
+ import time
49
+ timestamp = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
50
+ self.node.set('timestamp',timestamp)
51
+
52
+
53
+ def get_node(self):
54
+ return self.node
55
+
56
+
57
+ class ClinguisticProcessors:
58
+ def __init__(self,node=None):
59
+ self.type = 'KAF/NAF'
60
+ if node is None:
61
+ self.node = etree.Element('linguisticProcessors')
62
+ else:
63
+ self.node = node
64
+
65
+ def get_layer(self):
66
+ return self.node.get('layer')
67
+
68
+ def set_layer(self,layer):
69
+ self.node.set('layer',layer)
70
+
71
+ def add_linguistic_processor(self,my_lp):
72
+ self.node.append(my_lp.get_node())
73
+
74
+ def get_node(self):
75
+ return self.node
76
+
77
+
78
+ class CHeader:
79
+ def __init__(self,node=None,type='NAF'):
80
+ self.type = type
81
+ if node is None:
82
+ if self.type == 'NAF':
83
+ self.node = etree.Element('nafHeader')
84
+ elif self.type == 'KAF':
85
+ self.node = etree.Element('kafHeader')
86
+ else:
87
+ self.node = node
88
+
89
+ def to_kaf(self):
90
+ if self.type == 'NAF':
91
+ self.node.tag = 'kafHeader'
92
+ self.type = 'KAF'
93
+
94
+ def to_naf(self):
95
+ if self.type == 'KAF':
96
+ self.node.tag = 'nafHeader'
97
+ self.type = 'NAF'
98
+
99
+ def add_linguistic_processors(self,linpro):
100
+ self.node.append(linpro.get_node())
101
+
102
+ def remove_lp(self,layer):
103
+ for this_node in self.node.findall('linguisticProcessors'):
104
+ if this_node.get('layer') == layer:
105
+ self.node.remove(this_node)
106
+ break
107
+
108
+
109
+ def add_linguistic_processor(self, layer ,my_lp):
110
+ ## Locate the linguisticProcessor element for taht layer
111
+ found_lp_obj = None
112
+ for this_lp in self.node.findall('linguisticProcessors'):
113
+ lp_obj = ClinguisticProcessors(this_lp)
114
+ if lp_obj.get_layer() == layer:
115
+ found_lp_obj = lp_obj
116
+ break
117
+
118
+ if found_lp_obj is None: #Not found
119
+ found_lp_obj = ClinguisticProcessors()
120
+ found_lp_obj.set_layer(layer)
121
+ self.add_linguistic_processors(found_lp_obj)
122
+
123
+ found_lp_obj.add_linguistic_processor(my_lp)
124
+
125
+
126
+
127
+