opener-opinion-detector-base 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +101 -0
- data/bin/opinion-detector-base +19 -0
- data/core/annotation.cfg.erb +9 -0
- data/core/packages/KafNafParser-1.4.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
- data/core/python-scripts/LICENSE +339 -0
- data/core/python-scripts/README.md +226 -0
- data/core/python-scripts/classify_kaf_naf_file.py +499 -0
- data/core/python-scripts/cross_validation.py +634 -0
- data/core/python-scripts/generate_folds.py +134 -0
- data/core/python-scripts/models.cfg +10 -0
- data/core/python-scripts/my_templates/README +33 -0
- data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
- data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
- data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
- data/core/python-scripts/my_templates/templates_exp.txt +10 -0
- data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_holder.txt +10 -0
- data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_target.txt +10 -0
- data/core/python-scripts/run_all_experiments.sh +49 -0
- data/core/python-scripts/run_basic.py +20 -0
- data/core/python-scripts/run_experiment.sh +42 -0
- data/core/python-scripts/scripts/__init__.py +1 -0
- data/core/python-scripts/scripts/config_manager.py +314 -0
- data/core/python-scripts/scripts/crfutils.py +215 -0
- data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
- data/core/python-scripts/scripts/extract_features.py +376 -0
- data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
- data/core/python-scripts/scripts/lexicons.py +44 -0
- data/core/python-scripts/scripts/link_entities_distance.py +77 -0
- data/core/python-scripts/scripts/relation_classifier.py +250 -0
- data/core/python-scripts/train.py +566 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/vendor/src/crfsuite/AUTHORS +1 -0
- data/core/vendor/src/crfsuite/COPYING +27 -0
- data/core/vendor/src/crfsuite/ChangeLog +103 -0
- data/core/vendor/src/crfsuite/INSTALL +236 -0
- data/core/vendor/src/crfsuite/Makefile.am +19 -0
- data/core/vendor/src/crfsuite/Makefile.in +783 -0
- data/core/vendor/src/crfsuite/README +183 -0
- data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
- data/core/vendor/src/crfsuite/autogen.sh +38 -0
- data/core/vendor/src/crfsuite/compile +143 -0
- data/core/vendor/src/crfsuite/config.guess +1502 -0
- data/core/vendor/src/crfsuite/config.h.in +198 -0
- data/core/vendor/src/crfsuite/config.sub +1714 -0
- data/core/vendor/src/crfsuite/configure +14273 -0
- data/core/vendor/src/crfsuite/configure.in +149 -0
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
- data/core/vendor/src/crfsuite/depcomp +630 -0
- data/core/vendor/src/crfsuite/example/chunking.py +49 -0
- data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
- data/core/vendor/src/crfsuite/example/ner.py +270 -0
- data/core/vendor/src/crfsuite/example/pos.py +78 -0
- data/core/vendor/src/crfsuite/example/template.py +88 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
- data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
- data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
- data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
- data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
- data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
- data/core/vendor/src/crfsuite/frontend/main.c +137 -0
- data/core/vendor/src/crfsuite/frontend/option.c +93 -0
- data/core/vendor/src/crfsuite/frontend/option.h +86 -0
- data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
- data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
- data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
- data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
- data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
- data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
- data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
- data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
- data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
- data/core/vendor/src/crfsuite/include/os.h +61 -0
- data/core/vendor/src/crfsuite/install-sh +520 -0
- data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
- data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
- data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
- data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
- data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
- data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
- data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
- data/core/vendor/src/crfsuite/missing +376 -0
- data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
- data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
- data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
- data/core/vendor/src/crfsuite/swig/export.i +32 -0
- data/core/vendor/src/crfsuite/swig/python/README +92 -0
- data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
- data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
- data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
- data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
- data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
- data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
- data/core/vendor/src/liblbfgs/AUTHORS +1 -0
- data/core/vendor/src/liblbfgs/COPYING +22 -0
- data/core/vendor/src/liblbfgs/ChangeLog +120 -0
- data/core/vendor/src/liblbfgs/INSTALL +231 -0
- data/core/vendor/src/liblbfgs/Makefile.am +10 -0
- data/core/vendor/src/liblbfgs/Makefile.in +638 -0
- data/core/vendor/src/liblbfgs/NEWS +0 -0
- data/core/vendor/src/liblbfgs/README +71 -0
- data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
- data/core/vendor/src/liblbfgs/autogen.sh +38 -0
- data/core/vendor/src/liblbfgs/config.guess +1411 -0
- data/core/vendor/src/liblbfgs/config.h.in +64 -0
- data/core/vendor/src/liblbfgs/config.sub +1500 -0
- data/core/vendor/src/liblbfgs/configure +21146 -0
- data/core/vendor/src/liblbfgs/configure.in +107 -0
- data/core/vendor/src/liblbfgs/depcomp +522 -0
- data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
- data/core/vendor/src/liblbfgs/install-sh +322 -0
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
- data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
- data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
- data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
- data/core/vendor/src/liblbfgs/missing +353 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
- data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
- data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
- data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
- data/core/vendor/src/svm_light/LICENSE.txt +59 -0
- data/core/vendor/src/svm_light/Makefile +105 -0
- data/core/vendor/src/svm_light/kernel.h +40 -0
- data/core/vendor/src/svm_light/svm_classify.c +197 -0
- data/core/vendor/src/svm_light/svm_common.c +985 -0
- data/core/vendor/src/svm_light/svm_common.h +301 -0
- data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
- data/core/vendor/src/svm_light/svm_learn.c +4147 -0
- data/core/vendor/src/svm_light/svm_learn.h +169 -0
- data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
- data/core/vendor/src/svm_light/svm_loqo.c +211 -0
- data/ext/hack/Rakefile +17 -0
- data/ext/hack/support.rb +88 -0
- data/lib/opener/opinion_detectors/base.rb +112 -0
- data/lib/opener/opinion_detectors/base/version.rb +7 -0
- data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
- data/lib/opener/opinion_detectors/de.rb +7 -0
- data/lib/opener/opinion_detectors/en.rb +7 -0
- data/lib/opener/opinion_detectors/it.rb +7 -0
- data/lib/opener/opinion_detectors/nl.rb +6 -0
- data/opener-opinion-detector-base.gemspec +35 -0
- data/pre_build_requirements.txt +3 -0
- metadata +374 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
Metadata-Version: 1.0
|
2
|
+
Name: KafNafParser
|
3
|
+
Version: 1.4
|
4
|
+
Summary: Parser between KAF and NAF
|
5
|
+
Home-page: https://github.com/cltl/KafNafParserPy
|
6
|
+
Author: Ruben Izquierdo
|
7
|
+
Author-email: r.izquierdobevia@vu.nl
|
8
|
+
License: UNKNOWN
|
9
|
+
Description: UNKNOWN
|
10
|
+
Platform: UNKNOWN
|
@@ -0,0 +1,22 @@
|
|
1
|
+
KafNafParser/KafNafParserMod.py
|
2
|
+
KafNafParser/__init__.py
|
3
|
+
KafNafParser/constituency_data.py
|
4
|
+
KafNafParser/coreference_data.py
|
5
|
+
KafNafParser/dependency_data.py
|
6
|
+
KafNafParser/entity_data.py
|
7
|
+
KafNafParser/external_references_data.py
|
8
|
+
KafNafParser/features_data.py
|
9
|
+
KafNafParser/header_data.py
|
10
|
+
KafNafParser/opinion_data.py
|
11
|
+
KafNafParser/references_data.py
|
12
|
+
KafNafParser/span_data.py
|
13
|
+
KafNafParser/term_data.py
|
14
|
+
KafNafParser/term_sentiment_data.py
|
15
|
+
KafNafParser/text_data.py
|
16
|
+
KafNafParser.egg-info/PKG-INFO
|
17
|
+
KafNafParser.egg-info/SOURCES.txt
|
18
|
+
KafNafParser.egg-info/dependency_links.txt
|
19
|
+
KafNafParser.egg-info/top_level.txt
|
20
|
+
KafNafParser/feature_extractor/__init__.py
|
21
|
+
KafNafParser/feature_extractor/constituency.py
|
22
|
+
KafNafParser/feature_extractor/dependency.py
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
../KafNafParser/__init__.py
|
2
|
+
../KafNafParser/header_data.py
|
3
|
+
../KafNafParser/text_data.py
|
4
|
+
../KafNafParser/term_data.py
|
5
|
+
../KafNafParser/entity_data.py
|
6
|
+
../KafNafParser/features_data.py
|
7
|
+
../KafNafParser/opinion_data.py
|
8
|
+
../KafNafParser/constituency_data.py
|
9
|
+
../KafNafParser/dependency_data.py
|
10
|
+
../KafNafParser/coreference_data.py
|
11
|
+
../KafNafParser/references_data.py
|
12
|
+
../KafNafParser/external_references_data.py
|
13
|
+
../KafNafParser/span_data.py
|
14
|
+
../KafNafParser/KafNafParserMod.py
|
15
|
+
../KafNafParser/term_sentiment_data.py
|
16
|
+
../KafNafParser/feature_extractor/dependency.py
|
17
|
+
../KafNafParser/feature_extractor/constituency.py
|
18
|
+
../KafNafParser/feature_extractor/__init__.py
|
19
|
+
../KafNafParser/__init__.pyc
|
20
|
+
../KafNafParser/header_data.pyc
|
21
|
+
../KafNafParser/text_data.pyc
|
22
|
+
../KafNafParser/term_data.pyc
|
23
|
+
../KafNafParser/entity_data.pyc
|
24
|
+
../KafNafParser/features_data.pyc
|
25
|
+
../KafNafParser/opinion_data.pyc
|
26
|
+
../KafNafParser/constituency_data.pyc
|
27
|
+
../KafNafParser/dependency_data.pyc
|
28
|
+
../KafNafParser/coreference_data.pyc
|
29
|
+
../KafNafParser/references_data.pyc
|
30
|
+
../KafNafParser/external_references_data.pyc
|
31
|
+
../KafNafParser/span_data.pyc
|
32
|
+
../KafNafParser/KafNafParserMod.pyc
|
33
|
+
../KafNafParser/term_sentiment_data.pyc
|
34
|
+
../KafNafParser/feature_extractor/dependency.pyc
|
35
|
+
../KafNafParser/feature_extractor/constituency.pyc
|
36
|
+
../KafNafParser/feature_extractor/__init__.pyc
|
37
|
+
../../../kaf_example.xml
|
38
|
+
../../../naf.dtd
|
39
|
+
../../../naf_example.xml
|
40
|
+
../../../test.py
|
41
|
+
../../../README.md
|
42
|
+
../../../LICENSE
|
43
|
+
./
|
44
|
+
SOURCES.txt
|
45
|
+
dependency_links.txt
|
46
|
+
top_level.txt
|
47
|
+
PKG-INFO
|
@@ -0,0 +1 @@
|
|
1
|
+
KafNafParser
|
@@ -0,0 +1,390 @@
|
|
1
|
+
## LIST OF CHANGES
|
2
|
+
# Ruben 8-nov-2013
|
3
|
+
# + included layers for entities, properties, opinions
|
4
|
+
# + renamed all classes to Cnameoftheclass
|
5
|
+
# Ruben 15-nov-2013
|
6
|
+
# + included constituency layer
|
7
|
+
#
|
8
|
+
# Ruben 19-nov-2013
|
9
|
+
# + included dependency layer
|
10
|
+
# Ruben 17-dec-2013
|
11
|
+
# + modified all to red/write NAF and KAF
|
12
|
+
#
|
13
|
+
# Ruben 21-Feb-2014
|
14
|
+
# + Included coreference layer
|
15
|
+
|
16
|
+
__last_modified = '17dec2013'
|
17
|
+
|
18
|
+
from lxml import etree
|
19
|
+
from header_data import *
|
20
|
+
from text_data import *
|
21
|
+
from term_data import *
|
22
|
+
from entity_data import *
|
23
|
+
from features_data import *
|
24
|
+
from opinion_data import *
|
25
|
+
from constituency_data import *
|
26
|
+
from dependency_data import *
|
27
|
+
from feature_extractor import Cdependency_extractor, Cconstituency_extractor
|
28
|
+
from coreference_data import *
|
29
|
+
from references_data import Creferences
|
30
|
+
|
31
|
+
import sys
|
32
|
+
|
33
|
+
|
34
|
+
|
35
|
+
class KafNafParser:
|
36
|
+
def __init__(self,filename):
|
37
|
+
self.tree = None
|
38
|
+
self.filename = filename
|
39
|
+
self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
|
40
|
+
self.root = self.tree.getroot()
|
41
|
+
self.type = self.root.tag # KAF NAF
|
42
|
+
|
43
|
+
self.header = None
|
44
|
+
self.text_layer = None
|
45
|
+
self.term_layer = None
|
46
|
+
self.entity_layer = None
|
47
|
+
self.features_layer = None
|
48
|
+
self.opinion_layer = None
|
49
|
+
self.constituency_layer = None
|
50
|
+
self.dependency_layer = None
|
51
|
+
self.coreference_layer = None
|
52
|
+
|
53
|
+
## Specific feature extractor for complicated layers
|
54
|
+
self.my_dependency_extractor = None
|
55
|
+
self.my_constituency_extractor = None
|
56
|
+
##################################################
|
57
|
+
|
58
|
+
#######
|
59
|
+
self.dict_tokens_for_tid = None
|
60
|
+
self.terms_for_token = None
|
61
|
+
##
|
62
|
+
|
63
|
+
self.lang = self.root.get('{http://www.w3.org/XML/1998/namespace}lang')
|
64
|
+
self.version = self.root.get('version')
|
65
|
+
|
66
|
+
if self.type == 'NAF':
|
67
|
+
node_header = self.root.find('nafHeader')
|
68
|
+
elif self.type == 'KAF':
|
69
|
+
node_header = self.root.find('kafHeader')
|
70
|
+
|
71
|
+
if node_header is not None:
|
72
|
+
self.header = CHeader(node_header,self.type)
|
73
|
+
|
74
|
+
# Text layer adapted to naf/kaf
|
75
|
+
node_text = self.root.find('text')
|
76
|
+
if node_text is not None:
|
77
|
+
self.text_layer = Ctext(node=node_text,type=self.type)
|
78
|
+
|
79
|
+
node_term = self.root.find('terms')
|
80
|
+
if node_term is not None:
|
81
|
+
self.term_layer = Cterms(node=node_term,type=self.type)
|
82
|
+
|
83
|
+
node_entity = self.root.find('entities')
|
84
|
+
if node_entity is not None:
|
85
|
+
self.entity_layer = Centities(node_entity,type=self.type)
|
86
|
+
|
87
|
+
node_features = self.root.find('features')
|
88
|
+
if node_features is not None:
|
89
|
+
self.features_layer = Cfeatures(node_features,type=self.type)
|
90
|
+
|
91
|
+
node_opinions = self.root.find('opinions')
|
92
|
+
if node_opinions is not None:
|
93
|
+
self.opinion_layer = Copinions(node_opinions,type=self.type)
|
94
|
+
|
95
|
+
# Definition KAF/NAF is the same
|
96
|
+
node_constituency = self.root.find('constituency')
|
97
|
+
if node_constituency is not None:
|
98
|
+
self.constituency_layer = Cconstituency(node_constituency)
|
99
|
+
|
100
|
+
# Definition KAF/NAF is the same
|
101
|
+
node_dependency = self.root.find('deps')
|
102
|
+
if node_dependency is not None:
|
103
|
+
self.dependency_layer = Cdependencies(node_dependency)
|
104
|
+
|
105
|
+
node_coreferences = self.root.find('coreferences')
|
106
|
+
if node_coreferences is not None:
|
107
|
+
self.coreference_layer = Ccoreferences(node_coreferences,type=self.type)
|
108
|
+
|
109
|
+
def get_type(self):
|
110
|
+
return self.type
|
111
|
+
|
112
|
+
def get_filename(self):
|
113
|
+
return self.filename
|
114
|
+
|
115
|
+
def to_kaf(self):
|
116
|
+
#Convert the root
|
117
|
+
if self.type == 'NAF':
|
118
|
+
self.root.tag = 'KAF'
|
119
|
+
self.type = 'KAF'
|
120
|
+
|
121
|
+
## Convert the header
|
122
|
+
if self.header is not None:
|
123
|
+
self.header.to_kaf()
|
124
|
+
|
125
|
+
## Convert the token layer
|
126
|
+
if self.text_layer is not None:
|
127
|
+
self.text_layer.to_kaf()
|
128
|
+
|
129
|
+
## Convert the term layer
|
130
|
+
if self.term_layer is not None:
|
131
|
+
self.term_layer.to_kaf()
|
132
|
+
|
133
|
+
## Convert the entity layer
|
134
|
+
if self.entity_layer is not None:
|
135
|
+
self.entity_layer.to_kaf()
|
136
|
+
|
137
|
+
## Convert the features layer
|
138
|
+
## There is no feature layer defined in NAF, but we assumed
|
139
|
+
## that is defined will be followin the same rules
|
140
|
+
if self.features_layer is not None:
|
141
|
+
self.features_layer.to_kaf()
|
142
|
+
|
143
|
+
|
144
|
+
##Convert the opinion layer
|
145
|
+
if self.opinion_layer is not None:
|
146
|
+
self.opinion_layer.to_kaf()
|
147
|
+
|
148
|
+
## Convert the constituency layer
|
149
|
+
## This layer is exactly the same in KAF/NAF
|
150
|
+
if self.constituency_layer is not None:
|
151
|
+
self.constituency_layer.to_kaf() #Does nothing...
|
152
|
+
|
153
|
+
|
154
|
+
## Convert the dedepency layer
|
155
|
+
## It is not defined on KAF so we assme both will be similar
|
156
|
+
if self.dependency_layer is not None:
|
157
|
+
self.dependency_layer.to_kaf()
|
158
|
+
|
159
|
+
if self.coreference_layer is not None:
|
160
|
+
self.coreference_layer.to_kaf()
|
161
|
+
|
162
|
+
|
163
|
+
def to_naf(self):
|
164
|
+
#Convert the root
|
165
|
+
if self.type == 'KAF':
|
166
|
+
self.root.tag = self.type = 'NAF'
|
167
|
+
|
168
|
+
## Convert the header
|
169
|
+
if self.header is not None:
|
170
|
+
self.header.to_naf()
|
171
|
+
|
172
|
+
## Convert the token layer
|
173
|
+
if self.text_layer is not None:
|
174
|
+
self.text_layer.to_naf()
|
175
|
+
|
176
|
+
|
177
|
+
## Convert the term layer
|
178
|
+
if self.term_layer is not None:
|
179
|
+
self.term_layer.to_naf()
|
180
|
+
|
181
|
+
|
182
|
+
## Convert the entity layer
|
183
|
+
if self.entity_layer is not None:
|
184
|
+
self.entity_layer.to_naf()
|
185
|
+
|
186
|
+
## Convert the features layer
|
187
|
+
## There is no feature layer defined in NAF, but we assumed
|
188
|
+
## that is defined will be followin the same rules
|
189
|
+
if self.features_layer is not None:
|
190
|
+
self.features_layer.to_naf()
|
191
|
+
|
192
|
+
|
193
|
+
##Convert the opinion layer
|
194
|
+
if self.opinion_layer is not None:
|
195
|
+
self.opinion_layer.to_naf()
|
196
|
+
|
197
|
+
|
198
|
+
## Convert the constituency layer
|
199
|
+
## This layer is exactly the same in KAF/NAF
|
200
|
+
if self.constituency_layer is not None:
|
201
|
+
self.constituency_layer.to_naf() #Does nothing...
|
202
|
+
|
203
|
+
|
204
|
+
## Convert the dedepency layer
|
205
|
+
## It is not defined on KAF so we assume both will be similar
|
206
|
+
if self.dependency_layer is not None:
|
207
|
+
self.dependency_layer.to_naf() #Does nothing...
|
208
|
+
|
209
|
+
if self.coreference_layer is not None:
|
210
|
+
self.coreference_layer.to_naf()
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
def print_constituency(self):
|
215
|
+
print self.constituency_layer
|
216
|
+
|
217
|
+
def get_trees(self):
|
218
|
+
if self.constituency_layer is not None:
|
219
|
+
for tree in self.constituency_layer.get_trees():
|
220
|
+
yield tree
|
221
|
+
|
222
|
+
|
223
|
+
def get_dependencies(self):
|
224
|
+
if self.dependency_layer is not None:
|
225
|
+
for dep in self.dependency_layer.get_dependencies():
|
226
|
+
yield dep
|
227
|
+
|
228
|
+
def get_language(self):
|
229
|
+
return self.lang
|
230
|
+
|
231
|
+
def get_tokens(self):
|
232
|
+
for token in self.text_layer:
|
233
|
+
yield token
|
234
|
+
|
235
|
+
def get_terms(self):
|
236
|
+
if self.term_layer is not None:
|
237
|
+
for term in self.term_layer:
|
238
|
+
yield term
|
239
|
+
|
240
|
+
def get_token(self,token_id):
|
241
|
+
if self.text_layer is not None:
|
242
|
+
return self.text_layer.get_wf(token_id)
|
243
|
+
else:
|
244
|
+
return None
|
245
|
+
|
246
|
+
def get_term(self,term_id):
|
247
|
+
if self.term_layer is not None:
|
248
|
+
return self.term_layer.get_term(term_id)
|
249
|
+
else:
|
250
|
+
return None
|
251
|
+
|
252
|
+
def get_properties(self):
|
253
|
+
if self.features_layer is not None:
|
254
|
+
for property in self.features_layer.get_properties():
|
255
|
+
yield property
|
256
|
+
|
257
|
+
def get_entities(self):
|
258
|
+
if self.entity_layer is not None:
|
259
|
+
for entity in self.entity_layer:
|
260
|
+
yield entity
|
261
|
+
|
262
|
+
def get_opinions(self):
|
263
|
+
if self.opinion_layer is not None:
|
264
|
+
for opinion in self.opinion_layer.get_opinions():
|
265
|
+
yield opinion
|
266
|
+
|
267
|
+
|
268
|
+
def dump(self,filename=sys.stdout):
|
269
|
+
self.tree.write(filename,encoding='UTF-8',pretty_print=True,xml_declaration=True)
|
270
|
+
|
271
|
+
def remove_dependency_layer(self):
|
272
|
+
if self.dependency_layer is not None:
|
273
|
+
this_node = self.dependency_layer.get_node()
|
274
|
+
self.root.remove(this_node)
|
275
|
+
self.dependency_layer = self.my_dependency_extractor = None
|
276
|
+
|
277
|
+
if self.header is not None:
|
278
|
+
self.header.remove_lp('deps')
|
279
|
+
|
280
|
+
|
281
|
+
def remove_this_opinion(self,opinion_id):
|
282
|
+
if self.opinion_layer is not None:
|
283
|
+
self.opinion_layer.remove_this_opinion(opinion_id)
|
284
|
+
|
285
|
+
def remove_opinion_layer(self):
|
286
|
+
if self.opinion_layer is not None:
|
287
|
+
this_node = self.opinion_layer.get_node()
|
288
|
+
self.root.remove(this_node)
|
289
|
+
self.opinion_layer = None
|
290
|
+
|
291
|
+
if self.header is not None:
|
292
|
+
self.header.remove_lp('opinions')
|
293
|
+
|
294
|
+
def remove_properties(self):
|
295
|
+
if self.features_layer is not None:
|
296
|
+
self.features_layer.remove_properties()
|
297
|
+
|
298
|
+
if self.header is not None:
|
299
|
+
self.header.remove_lp('features')
|
300
|
+
|
301
|
+
def remove_term_layer(self):
|
302
|
+
if self.term_layer is not None:
|
303
|
+
this_node = self.term_layer.get_node()
|
304
|
+
self.root.remove(this_node)
|
305
|
+
self.term_layer = None
|
306
|
+
|
307
|
+
if self.header is not None:
|
308
|
+
self.header.remove_lp('terms')
|
309
|
+
|
310
|
+
|
311
|
+
if self.header is not None:
|
312
|
+
self.header.remove_lp('terms')
|
313
|
+
|
314
|
+
def get_constituency_extractor(self):
|
315
|
+
if self.constituency_layer is not None: ##Otherwise there are no constituens
|
316
|
+
if self.my_constituency_extractor is None:
|
317
|
+
self.my_constituency_extractor = Cconstituency_extractor(self)
|
318
|
+
return self.my_constituency_extractor
|
319
|
+
else:
|
320
|
+
return None
|
321
|
+
|
322
|
+
def get_dependency_extractor(self):
|
323
|
+
if self.dependency_layer is not None: #otherwise there are no dependencies
|
324
|
+
if self.my_dependency_extractor is None:
|
325
|
+
self.my_dependency_extractor = Cdependency_extractor(self)
|
326
|
+
return self.my_dependency_extractor
|
327
|
+
else:
|
328
|
+
return None
|
329
|
+
|
330
|
+
## ADDING METHODS
|
331
|
+
def add_wf(self,wf_obj):
|
332
|
+
if self.text_layer is None:
|
333
|
+
self.text_layer = Ctext(type=self.type)
|
334
|
+
self.root.append(self.text_layer.get_node())
|
335
|
+
self.text_layer.add_wf(wf_obj)
|
336
|
+
|
337
|
+
def add_opinion(self,opinion_obj):
|
338
|
+
if self.opinion_layer is None:
|
339
|
+
self.opinion_layer = Copinions()
|
340
|
+
self.root.append(self.opinion_layer.get_node())
|
341
|
+
self.opinion_layer.add_opinion(opinion_obj)
|
342
|
+
|
343
|
+
def add_linguistic_processor(self, layer ,my_lp):
|
344
|
+
self.header.add_linguistic_processor(layer,my_lp)
|
345
|
+
|
346
|
+
|
347
|
+
def add_dependency(self,my_dep):
|
348
|
+
if self.dependency_layer is None:
|
349
|
+
self.dependency_layer = Cdependencies()
|
350
|
+
self.root.append(self.dependency_layer.get_node())
|
351
|
+
self.dependency_layer.add_dependency(my_dep)
|
352
|
+
|
353
|
+
## Adds a property to the feature layer
|
354
|
+
def add_property(self,label,term_span,pid=None):
|
355
|
+
if self.features_layer is None:
|
356
|
+
self.features_layer = Cfeatures(type=self.type)
|
357
|
+
self.root.append(self.features_layer.get_node())
|
358
|
+
self.features_layer.add_property(pid, label,term_span)
|
359
|
+
|
360
|
+
## EXTRA FUNCTIONS
|
361
|
+
## Gets the token identifiers in the span of a term id
|
362
|
+
def get_dict_tokens_for_termid(self, term_id):
|
363
|
+
if self.dict_tokens_for_tid is None:
|
364
|
+
self.dict_tokens_for_tid = {}
|
365
|
+
for term in self.get_terms():
|
366
|
+
self.dict_tokens_for_tid[term.get_id()] = term.get_span().get_span_ids()
|
367
|
+
|
368
|
+
return self.dict_tokens_for_tid.get(term_id,[])
|
369
|
+
|
370
|
+
## Maps a list of token ids to term ids
|
371
|
+
def map_tokens_to_terms(self,list_tokens):
|
372
|
+
if self.terms_for_token is None:
|
373
|
+
self.terms_for_token = {}
|
374
|
+
for term in self.get_terms():
|
375
|
+
termid = term.get_id()
|
376
|
+
token_ids = term.get_span().get_span_ids()
|
377
|
+
for tokid in token_ids:
|
378
|
+
if tokid not in self.terms_for_token:
|
379
|
+
self.terms_for_token[tokid] = [termid]
|
380
|
+
else:
|
381
|
+
self.terms_for_token[tokid].append(termid)
|
382
|
+
|
383
|
+
ret = set()
|
384
|
+
for my_id in list_tokens:
|
385
|
+
term_ids = self.terms_for_token.get(my_id,[])
|
386
|
+
ret |= set(term_ids)
|
387
|
+
return sorted(list(ret))
|
388
|
+
|
389
|
+
def remove_tokens_of_sentence(self,sentence_id):
|
390
|
+
self.text_layer.remove_tokens_of_sentence(sentence_id)
|