opener-opinion-detector-base 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +101 -0
- data/bin/opinion-detector-base +19 -0
- data/core/annotation.cfg.erb +9 -0
- data/core/packages/KafNafParser-1.4.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
- data/core/python-scripts/LICENSE +339 -0
- data/core/python-scripts/README.md +226 -0
- data/core/python-scripts/classify_kaf_naf_file.py +499 -0
- data/core/python-scripts/cross_validation.py +634 -0
- data/core/python-scripts/generate_folds.py +134 -0
- data/core/python-scripts/models.cfg +10 -0
- data/core/python-scripts/my_templates/README +33 -0
- data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
- data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
- data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
- data/core/python-scripts/my_templates/templates_exp.txt +10 -0
- data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_holder.txt +10 -0
- data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_target.txt +10 -0
- data/core/python-scripts/run_all_experiments.sh +49 -0
- data/core/python-scripts/run_basic.py +20 -0
- data/core/python-scripts/run_experiment.sh +42 -0
- data/core/python-scripts/scripts/__init__.py +1 -0
- data/core/python-scripts/scripts/config_manager.py +314 -0
- data/core/python-scripts/scripts/crfutils.py +215 -0
- data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
- data/core/python-scripts/scripts/extract_features.py +376 -0
- data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
- data/core/python-scripts/scripts/lexicons.py +44 -0
- data/core/python-scripts/scripts/link_entities_distance.py +77 -0
- data/core/python-scripts/scripts/relation_classifier.py +250 -0
- data/core/python-scripts/train.py +566 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/vendor/src/crfsuite/AUTHORS +1 -0
- data/core/vendor/src/crfsuite/COPYING +27 -0
- data/core/vendor/src/crfsuite/ChangeLog +103 -0
- data/core/vendor/src/crfsuite/INSTALL +236 -0
- data/core/vendor/src/crfsuite/Makefile.am +19 -0
- data/core/vendor/src/crfsuite/Makefile.in +783 -0
- data/core/vendor/src/crfsuite/README +183 -0
- data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
- data/core/vendor/src/crfsuite/autogen.sh +38 -0
- data/core/vendor/src/crfsuite/compile +143 -0
- data/core/vendor/src/crfsuite/config.guess +1502 -0
- data/core/vendor/src/crfsuite/config.h.in +198 -0
- data/core/vendor/src/crfsuite/config.sub +1714 -0
- data/core/vendor/src/crfsuite/configure +14273 -0
- data/core/vendor/src/crfsuite/configure.in +149 -0
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
- data/core/vendor/src/crfsuite/depcomp +630 -0
- data/core/vendor/src/crfsuite/example/chunking.py +49 -0
- data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
- data/core/vendor/src/crfsuite/example/ner.py +270 -0
- data/core/vendor/src/crfsuite/example/pos.py +78 -0
- data/core/vendor/src/crfsuite/example/template.py +88 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
- data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
- data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
- data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
- data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
- data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
- data/core/vendor/src/crfsuite/frontend/main.c +137 -0
- data/core/vendor/src/crfsuite/frontend/option.c +93 -0
- data/core/vendor/src/crfsuite/frontend/option.h +86 -0
- data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
- data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
- data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
- data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
- data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
- data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
- data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
- data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
- data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
- data/core/vendor/src/crfsuite/include/os.h +61 -0
- data/core/vendor/src/crfsuite/install-sh +520 -0
- data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
- data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
- data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
- data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
- data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
- data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
- data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
- data/core/vendor/src/crfsuite/missing +376 -0
- data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
- data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
- data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
- data/core/vendor/src/crfsuite/swig/export.i +32 -0
- data/core/vendor/src/crfsuite/swig/python/README +92 -0
- data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
- data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
- data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
- data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
- data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
- data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
- data/core/vendor/src/liblbfgs/AUTHORS +1 -0
- data/core/vendor/src/liblbfgs/COPYING +22 -0
- data/core/vendor/src/liblbfgs/ChangeLog +120 -0
- data/core/vendor/src/liblbfgs/INSTALL +231 -0
- data/core/vendor/src/liblbfgs/Makefile.am +10 -0
- data/core/vendor/src/liblbfgs/Makefile.in +638 -0
- data/core/vendor/src/liblbfgs/NEWS +0 -0
- data/core/vendor/src/liblbfgs/README +71 -0
- data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
- data/core/vendor/src/liblbfgs/autogen.sh +38 -0
- data/core/vendor/src/liblbfgs/config.guess +1411 -0
- data/core/vendor/src/liblbfgs/config.h.in +64 -0
- data/core/vendor/src/liblbfgs/config.sub +1500 -0
- data/core/vendor/src/liblbfgs/configure +21146 -0
- data/core/vendor/src/liblbfgs/configure.in +107 -0
- data/core/vendor/src/liblbfgs/depcomp +522 -0
- data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
- data/core/vendor/src/liblbfgs/install-sh +322 -0
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
- data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
- data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
- data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
- data/core/vendor/src/liblbfgs/missing +353 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
- data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
- data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
- data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
- data/core/vendor/src/svm_light/LICENSE.txt +59 -0
- data/core/vendor/src/svm_light/Makefile +105 -0
- data/core/vendor/src/svm_light/kernel.h +40 -0
- data/core/vendor/src/svm_light/svm_classify.c +197 -0
- data/core/vendor/src/svm_light/svm_common.c +985 -0
- data/core/vendor/src/svm_light/svm_common.h +301 -0
- data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
- data/core/vendor/src/svm_light/svm_learn.c +4147 -0
- data/core/vendor/src/svm_light/svm_learn.h +169 -0
- data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
- data/core/vendor/src/svm_light/svm_loqo.c +211 -0
- data/ext/hack/Rakefile +17 -0
- data/ext/hack/support.rb +88 -0
- data/lib/opener/opinion_detectors/base.rb +112 -0
- data/lib/opener/opinion_detectors/base/version.rb +7 -0
- data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
- data/lib/opener/opinion_detectors/de.rb +7 -0
- data/lib/opener/opinion_detectors/en.rb +7 -0
- data/lib/opener/opinion_detectors/it.rb +7 -0
- data/lib/opener/opinion_detectors/nl.rb +6 -0
- data/opener-opinion-detector-base.gemspec +35 -0
- data/pre_build_requirements.txt +3 -0
- metadata +374 -0
Binary file
|
@@ -0,0 +1,14 @@
|
|
1
|
+
from KafNafParserMod import *
|
2
|
+
from header_data import *
|
3
|
+
from external_references_data import *
|
4
|
+
from span_data import *
|
5
|
+
from term_data import *
|
6
|
+
from term_sentiment_data import *
|
7
|
+
from text_data import *
|
8
|
+
from entity_data import *
|
9
|
+
from features_data import *
|
10
|
+
from opinion_data import *
|
11
|
+
from dependency_data import *
|
12
|
+
from constituency_data import *
|
13
|
+
from references_data import *
|
14
|
+
from coreference_data import *
|
Binary file
|
@@ -0,0 +1,125 @@
|
|
1
|
+
from lxml import etree
|
2
|
+
from lxml.objectify import dump
|
3
|
+
from span_data import Cspan
|
4
|
+
|
5
|
+
|
6
|
+
class Cnonterminal:
|
7
|
+
def __init__(self,node=None):
|
8
|
+
if node is None:
|
9
|
+
self.node = etree.Element('nt')
|
10
|
+
else:
|
11
|
+
self.node = node
|
12
|
+
|
13
|
+
def get_id(self):
|
14
|
+
return self.node.get('id')
|
15
|
+
|
16
|
+
def get_label(self):
|
17
|
+
return self.node.get('label')
|
18
|
+
|
19
|
+
def __str__(self):
|
20
|
+
return dump(self.node)
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
class Cterminal:
|
25
|
+
def __init__(self,node=None):
|
26
|
+
if node is None:
|
27
|
+
self.node = etree.Element('t')
|
28
|
+
else:
|
29
|
+
self.node = node
|
30
|
+
|
31
|
+
def get_id(self):
|
32
|
+
return self.node.get('id')
|
33
|
+
|
34
|
+
def get_span(self):
|
35
|
+
span_node = self.node.find('span')
|
36
|
+
return Cspan(span_node)
|
37
|
+
|
38
|
+
def __str__(self):
|
39
|
+
return dump(self.node)
|
40
|
+
|
41
|
+
class Cedge:
|
42
|
+
def __init__(self,node=None):
|
43
|
+
if node is None:
|
44
|
+
self.node = etree.Element('edge')
|
45
|
+
else:
|
46
|
+
self.node = node
|
47
|
+
|
48
|
+
def __str__(self):
|
49
|
+
return dump(self.node)
|
50
|
+
|
51
|
+
def get_from(self):
|
52
|
+
return self.node.get('from')
|
53
|
+
|
54
|
+
def get_to(self):
|
55
|
+
return self.node.get('to')
|
56
|
+
|
57
|
+
|
58
|
+
|
59
|
+
class Ctree:
|
60
|
+
def __init__(self,node=None):
|
61
|
+
if node is None:
|
62
|
+
self.node = etree.Element('tree')
|
63
|
+
else:
|
64
|
+
self.node = node
|
65
|
+
|
66
|
+
|
67
|
+
def __str__(self):
|
68
|
+
return dump(self.node)
|
69
|
+
|
70
|
+
## Fore getting non terminals
|
71
|
+
def __get_nt_nodes(self):
|
72
|
+
for nt_node in self.node.findall('nt'):
|
73
|
+
yield nt_node
|
74
|
+
|
75
|
+
def get_non_terminals(self):
|
76
|
+
for nt_node in self.__get_nt_nodes():
|
77
|
+
yield Cnonterminal(nt_node)
|
78
|
+
##################################
|
79
|
+
|
80
|
+
## Fore getting terminals
|
81
|
+
def __get_t_nodes(self):
|
82
|
+
for t_node in self.node.findall('t'):
|
83
|
+
yield t_node
|
84
|
+
|
85
|
+
def get_terminals(self):
|
86
|
+
for t_node in self.__get_t_nodes():
|
87
|
+
yield Cterminal(t_node)
|
88
|
+
##################################
|
89
|
+
|
90
|
+
## Fore getting edges
|
91
|
+
def __get_edge_nodes(self):
|
92
|
+
for t_node in self.node.findall('edge'):
|
93
|
+
yield t_node
|
94
|
+
|
95
|
+
def get_edges(self):
|
96
|
+
for edge_node in self.__get_edge_nodes():
|
97
|
+
yield Cedge(edge_node)
|
98
|
+
##################################
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
class Cconstituency:
|
103
|
+
def __init__(self,node=None):
|
104
|
+
self.type = 'NAF/NAF'
|
105
|
+
if node is None:
|
106
|
+
self.node = etree.Element('constituency')
|
107
|
+
else:
|
108
|
+
self.node = node
|
109
|
+
|
110
|
+
def to_kaf(self):
|
111
|
+
pass
|
112
|
+
|
113
|
+
def to_naf(self):
|
114
|
+
pass
|
115
|
+
|
116
|
+
def __get_tree_nodes(self):
|
117
|
+
for tree_node in self.node.findall('tree'):
|
118
|
+
yield tree_node
|
119
|
+
|
120
|
+
def get_trees(self):
|
121
|
+
for tree_node in self.__get_tree_nodes():
|
122
|
+
yield Ctree(tree_node)
|
123
|
+
|
124
|
+
def __str__(self):
|
125
|
+
return dump(self.node)
|
Binary file
|
@@ -0,0 +1,52 @@
|
|
1
|
+
from lxml import etree
|
2
|
+
from span_data import Cspan
|
3
|
+
|
4
|
+
class Ccoreference:
|
5
|
+
def __init__(self,node=None,type='NAF'):
|
6
|
+
self.type = type
|
7
|
+
if node is None:
|
8
|
+
self.node = etree.Element('coref')
|
9
|
+
else:
|
10
|
+
self.node = node
|
11
|
+
|
12
|
+
def get_id(self):
|
13
|
+
if self.type == 'NAF':
|
14
|
+
return self.node.get('id')
|
15
|
+
elif self.type == 'KAF':
|
16
|
+
return self.node.get('coid')
|
17
|
+
|
18
|
+
def get_spans(self):
|
19
|
+
for node_span in self.node.findall('span'):
|
20
|
+
yield Cspan(node_span)
|
21
|
+
|
22
|
+
|
23
|
+
|
24
|
+
class Ccoreferences:
|
25
|
+
def __init__(self,node=None, type='NAF'):
|
26
|
+
self.type = type
|
27
|
+
if node is None:
|
28
|
+
self.node = etree.Element('coreferences')
|
29
|
+
else:
|
30
|
+
self.node = node
|
31
|
+
|
32
|
+
def __get_corefs_nodes(self):
|
33
|
+
for coref_node in self.node.findall('coref'):
|
34
|
+
yield coref_node
|
35
|
+
|
36
|
+
def get_corefs(self):
|
37
|
+
for coref_node in self.__get_corefs_nodes():
|
38
|
+
yield Ccoreference(coref_node,self.type)
|
39
|
+
|
40
|
+
def to_kaf(self):
|
41
|
+
if self.type == 'NAF':
|
42
|
+
for node_coref in self.__get_corefs_nodes():
|
43
|
+
node_coref.set('coid',node_coref.get('id'))
|
44
|
+
del node_coref.attrib['id']
|
45
|
+
|
46
|
+
def to_naf(self):
|
47
|
+
if self.type == 'KAF':
|
48
|
+
for node_coref in self.__get_corefs_nodes():
|
49
|
+
node_coref.set('id',node_coref.get('coid'))
|
50
|
+
del node_coref.attrib['coid']
|
51
|
+
|
52
|
+
|
Binary file
|
@@ -0,0 +1,78 @@
|
|
1
|
+
from lxml import etree
|
2
|
+
#from lxml.objectify import dump
|
3
|
+
|
4
|
+
|
5
|
+
class Cdependency:
|
6
|
+
def __init__(self,node=None):
|
7
|
+
if node is None:
|
8
|
+
self.node = etree.Element('dep')
|
9
|
+
else:
|
10
|
+
self.node = node
|
11
|
+
|
12
|
+
def get_node_comment(self):
|
13
|
+
return self.node_comment
|
14
|
+
|
15
|
+
def get_node(self):
|
16
|
+
return self.node
|
17
|
+
|
18
|
+
def get_from(self):
|
19
|
+
return self.node.get('from')
|
20
|
+
|
21
|
+
def get_to(self):
|
22
|
+
return self.node.get('to')
|
23
|
+
|
24
|
+
def get_function(self):
|
25
|
+
return self.node.get('rfunc')
|
26
|
+
|
27
|
+
def set_from(self, f):
|
28
|
+
self.node.set('from',f)
|
29
|
+
|
30
|
+
def set_to(self,t):
|
31
|
+
self.node.set('to',t)
|
32
|
+
|
33
|
+
def set_function(self,f):
|
34
|
+
self.node.set('rfunc',f)
|
35
|
+
|
36
|
+
|
37
|
+
def set_comment(self,c):
|
38
|
+
c = c.replace('--','- -')
|
39
|
+
self.node.insert(0,etree.Comment(c) )
|
40
|
+
|
41
|
+
|
42
|
+
def __str__(self):
|
43
|
+
return dump(self.node)
|
44
|
+
|
45
|
+
|
46
|
+
|
47
|
+
class Cdependencies:
|
48
|
+
def __init__(self,node=None):
|
49
|
+
if node is None:
|
50
|
+
self.node = etree.Element('deps')
|
51
|
+
else:
|
52
|
+
self.node = node
|
53
|
+
|
54
|
+
def get_node(self):
|
55
|
+
return self.node
|
56
|
+
|
57
|
+
def to_kaf(self):
|
58
|
+
pass
|
59
|
+
|
60
|
+
def to_naf(self):
|
61
|
+
pass
|
62
|
+
|
63
|
+
def __str__(self):
|
64
|
+
return dump(self.node)
|
65
|
+
|
66
|
+
|
67
|
+
def __get_node_deps(self):
|
68
|
+
for node_dep in self.node.findall('dep'):
|
69
|
+
yield node_dep
|
70
|
+
|
71
|
+
def get_dependencies(self):
|
72
|
+
for node in self.__get_node_deps():
|
73
|
+
yield Cdependency(node)
|
74
|
+
|
75
|
+
|
76
|
+
def add_dependency(self,my_dep):
|
77
|
+
self.node.append(my_dep.get_node())
|
78
|
+
|
Binary file
|
@@ -0,0 +1,59 @@
|
|
1
|
+
## Modified for KAF NAF adaptation
|
2
|
+
from lxml import etree
|
3
|
+
from lxml.objectify import dump
|
4
|
+
from references_data import *
|
5
|
+
|
6
|
+
|
7
|
+
class Centity:
|
8
|
+
def __init__(self,node=None,type='NAF'):
|
9
|
+
self.type = type
|
10
|
+
if node is None:
|
11
|
+
self.node = etree.Element('entity')
|
12
|
+
else:
|
13
|
+
self.node = node
|
14
|
+
|
15
|
+
def get_id(self):
|
16
|
+
if self.type == 'NAF':
|
17
|
+
return self.node.get('id')
|
18
|
+
elif self.type == 'KAF':
|
19
|
+
return self.node.get('eid')
|
20
|
+
|
21
|
+
def get_type(self):
|
22
|
+
return self.node.get('type')
|
23
|
+
|
24
|
+
def get_references(self):
|
25
|
+
for ref_node in self.node.findall('references'):
|
26
|
+
yield Creferences(ref_node)
|
27
|
+
|
28
|
+
class Centities:
|
29
|
+
def __init__(self,node=None,type='NAF'):
|
30
|
+
self.type = type
|
31
|
+
if node is None:
|
32
|
+
self.node = etree.Element('entities')
|
33
|
+
else:
|
34
|
+
self.node = node
|
35
|
+
|
36
|
+
|
37
|
+
def to_kaf(self):
|
38
|
+
if self.type == 'NAF':
|
39
|
+
for node in self.__get_entity_nodes():
|
40
|
+
node.set('eid',node.get('id'))
|
41
|
+
del node.attrib['id']
|
42
|
+
|
43
|
+
def to_naf(self):
|
44
|
+
if self.type == 'KAF':
|
45
|
+
for node in self.__get_entity_nodes():
|
46
|
+
node.set('id',node.get('eid'))
|
47
|
+
del node.attrib['eid']
|
48
|
+
|
49
|
+
def __get_entity_nodes(self):
|
50
|
+
for ent_node in self.node.findall('entity'):
|
51
|
+
yield ent_node
|
52
|
+
|
53
|
+
def __iter__(self):
|
54
|
+
for ent_node in self.__get_entity_nodes():
|
55
|
+
yield Centity(ent_node,self.type)
|
56
|
+
|
57
|
+
|
58
|
+
def __str__(self):
|
59
|
+
return dump(self.node)
|
Binary file
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# included modification for KAF/NAF
|
2
|
+
from term_sentiment_data import Cterm_sentiment
|
3
|
+
from lxml import etree
|
4
|
+
|
5
|
+
class CexternalReference:
|
6
|
+
def __init__(self,node=None):
|
7
|
+
self.type= 'NAF/KAF'
|
8
|
+
#self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
|
9
|
+
if node is None:
|
10
|
+
self.node = etree.Element('externalRef')
|
11
|
+
else:
|
12
|
+
self.node = node
|
13
|
+
|
14
|
+
def get_node(self):
|
15
|
+
return self.node
|
16
|
+
|
17
|
+
def set_resource(self,resource):
|
18
|
+
self.node.set('resource',resource)
|
19
|
+
|
20
|
+
def set_confidence(self,confidence):
|
21
|
+
self.node.set('confidence',confidence)
|
22
|
+
|
23
|
+
def set_reference(self,reference):
|
24
|
+
self.node.set('reference',reference)
|
25
|
+
|
26
|
+
|
27
|
+
class CexternalReferences:
|
28
|
+
def __init__(self,node=None):
|
29
|
+
if node is None:
|
30
|
+
self.node = etree.Element('externalReferences')
|
31
|
+
else:
|
32
|
+
self.node = node
|
33
|
+
|
34
|
+
def add_external_reference(self,ext_ref):
|
35
|
+
self.node.append(ext_ref.get_node())
|
36
|
+
|
37
|
+
def get_node(self):
|
38
|
+
return self.node
|
39
|
+
|
40
|
+
|
41
|
+
|
@@ -0,0 +1,205 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
from operator import itemgetter
|
4
|
+
|
5
|
+
'''
|
6
|
+
Extract information from the contituent layer from a NAF file
|
7
|
+
'''
|
8
|
+
|
9
|
+
class Cconstituency_extractor:
|
10
|
+
def __init__(self,knaf_obj):
|
11
|
+
self.naf = knaf_obj
|
12
|
+
#Extract terminals, non terminals and edges
|
13
|
+
## Extracted directly from
|
14
|
+
self.terminals = {} #terminal id --> list term ids
|
15
|
+
self.terminal_for_term = {} #term id --> terminal id
|
16
|
+
self.label_for_nonter = {} # nonter --> label
|
17
|
+
self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
|
18
|
+
|
19
|
+
self.extract_info_from_naf(knaf_obj)
|
20
|
+
|
21
|
+
#Extracting all posible paths from leave to root for each terminal id
|
22
|
+
self.paths_for_terminal= {}
|
23
|
+
for terminal_id in self.terminals.keys():
|
24
|
+
paths = self.__expand_node(terminal_id,False)
|
25
|
+
self.paths_for_terminal[terminal_id] = paths
|
26
|
+
#######################################
|
27
|
+
|
28
|
+
## Create, for each non terminal, which are the terminals subsumed
|
29
|
+
self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
|
30
|
+
for terminal_id, span_terms in self.terminals.items():
|
31
|
+
for path in self.paths_for_terminal[terminal_id]:
|
32
|
+
for nonter in path:
|
33
|
+
if nonter not in self.terms_subsumed_by_nonter:
|
34
|
+
self.terms_subsumed_by_nonter[nonter] = set()
|
35
|
+
for termid in span_terms:
|
36
|
+
self.terms_subsumed_by_nonter[nonter].add(termid)
|
37
|
+
|
38
|
+
## To print the paths calculated
|
39
|
+
# for terminal in self.terminals.keys():
|
40
|
+
# print terminal
|
41
|
+
# for path in self.paths_for_terminal[terminal]:
|
42
|
+
# sep=' '
|
43
|
+
# for node in path:
|
44
|
+
# print sep,node,self.label_for_nonter.get(node,'?')
|
45
|
+
# sep+=' '
|
46
|
+
# print '#'*20
|
47
|
+
|
48
|
+
|
49
|
+
def get_deepest_phrases(self):
|
50
|
+
all_nonter = set()
|
51
|
+
for terminal in self.terminals.keys():
|
52
|
+
for path in self.paths_for_terminal[terminal]:
|
53
|
+
first_non_ter_phrase = path[1]
|
54
|
+
all_nonter.add(first_non_ter_phrase)
|
55
|
+
|
56
|
+
ter_for_nonter = {}
|
57
|
+
for nonter in all_nonter:
|
58
|
+
for terminal in self.terminals.keys():
|
59
|
+
for path in self.paths_for_terminal[terminal]:
|
60
|
+
if nonter in path:
|
61
|
+
if nonter in ter_for_nonter:
|
62
|
+
ter_for_nonter[nonter].append(terminal)
|
63
|
+
else:
|
64
|
+
ter_for_nonter[nonter] = [terminal]
|
65
|
+
|
66
|
+
visited = set()
|
67
|
+
for nonter, list_term in ter_for_nonter.items():
|
68
|
+
for ter in list_term:
|
69
|
+
|
70
|
+
visited.add(ter)
|
71
|
+
|
72
|
+
|
73
|
+
### Returns the label of the deepest phrase for the term id (termid as in the term layer)
|
74
|
+
def get_deepest_phrase_for_termid(self,termid):
|
75
|
+
terminal_id = self.terminal_for_term.get(termid)
|
76
|
+
label = None
|
77
|
+
subsumed = []
|
78
|
+
if terminal_id is not None:
|
79
|
+
first_path = self.paths_for_terminal[terminal_id][0]
|
80
|
+
first_phrase_id = first_path[1]
|
81
|
+
label = self.label_for_nonter.get(first_phrase_id)
|
82
|
+
subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
|
83
|
+
return label,sorted(list(subsumed))
|
84
|
+
|
85
|
+
|
86
|
+
def get_least_common_subsumer(self,from_tid,to_tid):
|
87
|
+
termid_from = self.terminal_for_term.get(from_tid)
|
88
|
+
termid_to = self.terminal_for_term.get(to_tid)
|
89
|
+
|
90
|
+
path_from = self.paths_for_terminal[termid_from][0]
|
91
|
+
path_to = self.paths_for_terminal[termid_to][0]
|
92
|
+
common_nodes = set(path_from) & set(path_to)
|
93
|
+
if len(common_nodes) == 0:
|
94
|
+
return None
|
95
|
+
else:
|
96
|
+
indexes = []
|
97
|
+
for common_node in common_nodes:
|
98
|
+
index1 = path_from.index(common_node)
|
99
|
+
index2 = path_to.index(common_node)
|
100
|
+
indexes.append((common_node,index1+index2))
|
101
|
+
indexes.sort(key=itemgetter(1))
|
102
|
+
shortest_common = indexes[0][0]
|
103
|
+
return shortest_common
|
104
|
+
|
105
|
+
|
106
|
+
def get_path_from_to(self,from_tid, to_tid):
|
107
|
+
shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
|
108
|
+
|
109
|
+
#print 'From:',self.naf.get_term(from_tid).get_lemma()
|
110
|
+
#print 'To:',self.naf.get_term(to_tid).get_lemma()
|
111
|
+
termid_from = self.terminal_for_term.get(from_tid)
|
112
|
+
termid_to = self.terminal_for_term.get(to_tid)
|
113
|
+
|
114
|
+
path_from = self.paths_for_terminal[termid_from][0]
|
115
|
+
path_to = self.paths_for_terminal[termid_to][0]
|
116
|
+
|
117
|
+
if shortest_subsumer is None:
|
118
|
+
return None
|
119
|
+
|
120
|
+
complete_path = []
|
121
|
+
for node in path_from:
|
122
|
+
complete_path.append(node)
|
123
|
+
if node == shortest_subsumer: break
|
124
|
+
|
125
|
+
begin=False
|
126
|
+
for node in path_to[-1::-1]:
|
127
|
+
if begin:
|
128
|
+
complete_path.append(node)
|
129
|
+
|
130
|
+
if node==shortest_subsumer:
|
131
|
+
begin=True
|
132
|
+
labels = [self.label_for_nonter[nonter] for nonter in complete_path]
|
133
|
+
return labels
|
134
|
+
|
135
|
+
|
136
|
+
def get_path_for_termid(self,termid):
|
137
|
+
terminal_id = self.terminal_for_term.get(termid)
|
138
|
+
paths = self.paths_for_terminal[terminal_id]
|
139
|
+
labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
|
140
|
+
return labels
|
141
|
+
|
142
|
+
def extract_info_from_naf(self,knaf_obj):
|
143
|
+
## Generated internally
|
144
|
+
# For each terminal node, a list of paths through all the edges
|
145
|
+
self.paths_for_terminal = {}
|
146
|
+
for tree in knaf_obj.get_trees():
|
147
|
+
for terminal in tree.get_terminals():
|
148
|
+
ter_id = terminal.get_id()
|
149
|
+
span_ids = terminal.get_span().get_span_ids()
|
150
|
+
self.terminals[ter_id] = span_ids
|
151
|
+
for this_id in span_ids:
|
152
|
+
self.terminal_for_term[this_id] = ter_id
|
153
|
+
|
154
|
+
|
155
|
+
for non_terminal in tree.get_non_terminals():
|
156
|
+
nonter_id = non_terminal.get_id()
|
157
|
+
label = non_terminal.get_label()
|
158
|
+
self.label_for_nonter[nonter_id] = label
|
159
|
+
|
160
|
+
|
161
|
+
for edge in tree.get_edges():
|
162
|
+
node_from = edge.get_from()
|
163
|
+
node_to = edge.get_to()
|
164
|
+
if node_from not in self.reachable_from:
|
165
|
+
self.reachable_from[node_from] = [node_to]
|
166
|
+
else:
|
167
|
+
self.reachable_from[node_from].append(node_to)
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
##Recursive function
|
172
|
+
## Propagates the node through all the relations extracte from the edges information
|
173
|
+
## It returns a list of lists, one for each path
|
174
|
+
## Include_this_node is used for avoiding the first node
|
175
|
+
def __expand_node(self,node,include_this_node=True):
|
176
|
+
paths = []
|
177
|
+
possible_nodes = self.reachable_from.get(node,[])
|
178
|
+
if len(possible_nodes) == 0:
|
179
|
+
return [[node]]
|
180
|
+
else:
|
181
|
+
for possible_node in possible_nodes:
|
182
|
+
new_paths = self.__expand_node(possible_node)
|
183
|
+
for path in new_paths:
|
184
|
+
if include_this_node:
|
185
|
+
path.insert(0,node)
|
186
|
+
paths.append(path)
|
187
|
+
return paths
|
188
|
+
|
189
|
+
def get_chunks(self,chunk_type):
|
190
|
+
for nonter,this_type in self.label_for_nonter.items():
|
191
|
+
if this_type == chunk_type:
|
192
|
+
subsumed = self.terms_subsumed_by_nonter.get(nonter)
|
193
|
+
if subsumed is not None:
|
194
|
+
yield sorted(list(subsumed))
|
195
|
+
|
196
|
+
def get_all_chunks_for_term(self,termid):
|
197
|
+
terminal_id = self.terminal_for_term.get(termid)
|
198
|
+
paths = self.paths_for_terminal[terminal_id]
|
199
|
+
for path in paths:
|
200
|
+
for node in path:
|
201
|
+
this_type = self.label_for_nonter[node]
|
202
|
+
subsumed = self.terms_subsumed_by_nonter.get(node)
|
203
|
+
if subsumed is not None:
|
204
|
+
yield this_type,sorted(list(subsumed))
|
205
|
+
|