opener-opinion-detector-base 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/README.md +101 -0
- data/bin/opinion-detector-base +19 -0
- data/core/annotation.cfg.erb +9 -0
- data/core/packages/KafNafParser-1.4.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
- data/core/python-scripts/LICENSE +339 -0
- data/core/python-scripts/README.md +226 -0
- data/core/python-scripts/classify_kaf_naf_file.py +499 -0
- data/core/python-scripts/cross_validation.py +634 -0
- data/core/python-scripts/generate_folds.py +134 -0
- data/core/python-scripts/models.cfg +10 -0
- data/core/python-scripts/my_templates/README +33 -0
- data/core/python-scripts/my_templates/templates_exp.only0.txt +6 -0
- data/core/python-scripts/my_templates/templates_exp.pol0.txt +10 -0
- data/core/python-scripts/my_templates/templates_exp.red.txt +7 -0
- data/core/python-scripts/my_templates/templates_exp.txt +10 -0
- data/core/python-scripts/my_templates/templates_holder.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_holder.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_holder.txt +10 -0
- data/core/python-scripts/my_templates/templates_target.only0.txt +11 -0
- data/core/python-scripts/my_templates/templates_target.red.txt +9 -0
- data/core/python-scripts/my_templates/templates_target.txt +10 -0
- data/core/python-scripts/run_all_experiments.sh +49 -0
- data/core/python-scripts/run_basic.py +20 -0
- data/core/python-scripts/run_experiment.sh +42 -0
- data/core/python-scripts/scripts/__init__.py +1 -0
- data/core/python-scripts/scripts/config_manager.py +314 -0
- data/core/python-scripts/scripts/crfutils.py +215 -0
- data/core/python-scripts/scripts/extract_feats_relations.py +295 -0
- data/core/python-scripts/scripts/extract_features.py +376 -0
- data/core/python-scripts/scripts/feats_to_crf.exp.py +105 -0
- data/core/python-scripts/scripts/lexicons.py +44 -0
- data/core/python-scripts/scripts/link_entities_distance.py +77 -0
- data/core/python-scripts/scripts/relation_classifier.py +250 -0
- data/core/python-scripts/train.py +566 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +22 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +47 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +390 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
- data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
- data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
- data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +78 -0
- data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
- data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
- data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +309 -0
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.py +131 -0
- data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
- data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +211 -0
- data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.py +23 -0
- data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
- data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
- data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.py +99 -0
- data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +14 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +23 -0
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
- data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
- data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
- data/core/vendor/src/crfsuite/AUTHORS +1 -0
- data/core/vendor/src/crfsuite/COPYING +27 -0
- data/core/vendor/src/crfsuite/ChangeLog +103 -0
- data/core/vendor/src/crfsuite/INSTALL +236 -0
- data/core/vendor/src/crfsuite/Makefile.am +19 -0
- data/core/vendor/src/crfsuite/Makefile.in +783 -0
- data/core/vendor/src/crfsuite/README +183 -0
- data/core/vendor/src/crfsuite/aclocal.m4 +9018 -0
- data/core/vendor/src/crfsuite/autogen.sh +38 -0
- data/core/vendor/src/crfsuite/compile +143 -0
- data/core/vendor/src/crfsuite/config.guess +1502 -0
- data/core/vendor/src/crfsuite/config.h.in +198 -0
- data/core/vendor/src/crfsuite/config.sub +1714 -0
- data/core/vendor/src/crfsuite/configure +14273 -0
- data/core/vendor/src/crfsuite/configure.in +149 -0
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -0
- data/core/vendor/src/crfsuite/depcomp +630 -0
- data/core/vendor/src/crfsuite/example/chunking.py +49 -0
- data/core/vendor/src/crfsuite/example/crfutils.py +179 -0
- data/core/vendor/src/crfsuite/example/ner.py +270 -0
- data/core/vendor/src/crfsuite/example/pos.py +78 -0
- data/core/vendor/src/crfsuite/example/template.py +88 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.am +29 -0
- data/core/vendor/src/crfsuite/frontend/Makefile.in +640 -0
- data/core/vendor/src/crfsuite/frontend/dump.c +116 -0
- data/core/vendor/src/crfsuite/frontend/frontend.vcxproj +129 -0
- data/core/vendor/src/crfsuite/frontend/iwa.c +273 -0
- data/core/vendor/src/crfsuite/frontend/iwa.h +65 -0
- data/core/vendor/src/crfsuite/frontend/learn.c +439 -0
- data/core/vendor/src/crfsuite/frontend/main.c +137 -0
- data/core/vendor/src/crfsuite/frontend/option.c +93 -0
- data/core/vendor/src/crfsuite/frontend/option.h +86 -0
- data/core/vendor/src/crfsuite/frontend/readdata.h +38 -0
- data/core/vendor/src/crfsuite/frontend/reader.c +136 -0
- data/core/vendor/src/crfsuite/frontend/tag.c +427 -0
- data/core/vendor/src/crfsuite/genbinary.sh.in +15 -0
- data/core/vendor/src/crfsuite/include/Makefile.am +11 -0
- data/core/vendor/src/crfsuite/include/Makefile.in +461 -0
- data/core/vendor/src/crfsuite/include/crfsuite.h +1063 -0
- data/core/vendor/src/crfsuite/include/crfsuite.hpp +555 -0
- data/core/vendor/src/crfsuite/include/crfsuite_api.hpp +400 -0
- data/core/vendor/src/crfsuite/include/os.h +61 -0
- data/core/vendor/src/crfsuite/install-sh +520 -0
- data/core/vendor/src/crfsuite/lib/cqdb/COPYING +28 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.am +21 -0
- data/core/vendor/src/crfsuite/lib/cqdb/Makefile.in +549 -0
- data/core/vendor/src/crfsuite/lib/cqdb/cqdb.vcxproj +86 -0
- data/core/vendor/src/crfsuite/lib/cqdb/include/cqdb.h +524 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/cqdb.c +587 -0
- data/core/vendor/src/crfsuite/lib/cqdb/src/lookup3.c +976 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.am +46 -0
- data/core/vendor/src/crfsuite/lib/crf/Makefile.in +721 -0
- data/core/vendor/src/crfsuite/lib/crf/crf.vcxproj +216 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d.h +353 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_context.c +705 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_encode.c +943 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_feature.c +352 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_model.c +994 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crf1d_tag.c +550 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite.c +492 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_internal.h +236 -0
- data/core/vendor/src/crfsuite/lib/crf/src/crfsuite_train.c +272 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dataset.c +106 -0
- data/core/vendor/src/crfsuite/lib/crf/src/dictionary.c +118 -0
- data/core/vendor/src/crfsuite/lib/crf/src/holdout.c +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.c +91 -0
- data/core/vendor/src/crfsuite/lib/crf/src/logging.h +48 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.c +335 -0
- data/core/vendor/src/crfsuite/lib/crf/src/params.h +80 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.c +172 -0
- data/core/vendor/src/crfsuite/lib/crf/src/quark.h +46 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.c +1107 -0
- data/core/vendor/src/crfsuite/lib/crf/src/rumavl.h +160 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_arow.c +408 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_averaged_perceptron.c +242 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_l2sgd.c +507 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_lbfgs.c +338 -0
- data/core/vendor/src/crfsuite/lib/crf/src/train_passive_aggressive.c +435 -0
- data/core/vendor/src/crfsuite/lib/crf/src/vecmath.h +341 -0
- data/core/vendor/src/crfsuite/ltmain.sh +8413 -0
- data/core/vendor/src/crfsuite/missing +376 -0
- data/core/vendor/src/crfsuite/swig/Makefile.am +13 -0
- data/core/vendor/src/crfsuite/swig/Makefile.in +365 -0
- data/core/vendor/src/crfsuite/swig/crfsuite.cpp +2 -0
- data/core/vendor/src/crfsuite/swig/export.i +32 -0
- data/core/vendor/src/crfsuite/swig/python/README +92 -0
- data/core/vendor/src/crfsuite/swig/python/crfsuite.py +329 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.cpp +14355 -0
- data/core/vendor/src/crfsuite/swig/python/export_wrap.h +63 -0
- data/core/vendor/src/crfsuite/swig/python/prepare.sh +9 -0
- data/core/vendor/src/crfsuite/swig/python/sample_tag.py +52 -0
- data/core/vendor/src/crfsuite/swig/python/sample_train.py +68 -0
- data/core/vendor/src/crfsuite/swig/python/setup.py +44 -0
- data/core/vendor/src/crfsuite/win32/stdint.h +679 -0
- data/core/vendor/src/liblbfgs/AUTHORS +1 -0
- data/core/vendor/src/liblbfgs/COPYING +22 -0
- data/core/vendor/src/liblbfgs/ChangeLog +120 -0
- data/core/vendor/src/liblbfgs/INSTALL +231 -0
- data/core/vendor/src/liblbfgs/Makefile.am +10 -0
- data/core/vendor/src/liblbfgs/Makefile.in +638 -0
- data/core/vendor/src/liblbfgs/NEWS +0 -0
- data/core/vendor/src/liblbfgs/README +71 -0
- data/core/vendor/src/liblbfgs/aclocal.m4 +6985 -0
- data/core/vendor/src/liblbfgs/autogen.sh +38 -0
- data/core/vendor/src/liblbfgs/config.guess +1411 -0
- data/core/vendor/src/liblbfgs/config.h.in +64 -0
- data/core/vendor/src/liblbfgs/config.sub +1500 -0
- data/core/vendor/src/liblbfgs/configure +21146 -0
- data/core/vendor/src/liblbfgs/configure.in +107 -0
- data/core/vendor/src/liblbfgs/depcomp +522 -0
- data/core/vendor/src/liblbfgs/include/lbfgs.h +745 -0
- data/core/vendor/src/liblbfgs/install-sh +322 -0
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.am +24 -0
- data/core/vendor/src/liblbfgs/lib/Makefile.in +499 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_ansi.h +133 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_double.h +294 -0
- data/core/vendor/src/liblbfgs/lib/arithmetic_sse_float.h +298 -0
- data/core/vendor/src/liblbfgs/lib/lbfgs.c +1371 -0
- data/core/vendor/src/liblbfgs/lib/lib.vcxproj +95 -0
- data/core/vendor/src/liblbfgs/ltmain.sh +6426 -0
- data/core/vendor/src/liblbfgs/missing +353 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.am +15 -0
- data/core/vendor/src/liblbfgs/sample/Makefile.in +433 -0
- data/core/vendor/src/liblbfgs/sample/sample.c +81 -0
- data/core/vendor/src/liblbfgs/sample/sample.cpp +126 -0
- data/core/vendor/src/liblbfgs/sample/sample.vcxproj +105 -0
- data/core/vendor/src/svm_light/LICENSE.txt +59 -0
- data/core/vendor/src/svm_light/Makefile +105 -0
- data/core/vendor/src/svm_light/kernel.h +40 -0
- data/core/vendor/src/svm_light/svm_classify.c +197 -0
- data/core/vendor/src/svm_light/svm_common.c +985 -0
- data/core/vendor/src/svm_light/svm_common.h +301 -0
- data/core/vendor/src/svm_light/svm_hideo.c +1062 -0
- data/core/vendor/src/svm_light/svm_learn.c +4147 -0
- data/core/vendor/src/svm_light/svm_learn.h +169 -0
- data/core/vendor/src/svm_light/svm_learn_main.c +397 -0
- data/core/vendor/src/svm_light/svm_loqo.c +211 -0
- data/ext/hack/Rakefile +17 -0
- data/ext/hack/support.rb +88 -0
- data/lib/opener/opinion_detectors/base.rb +112 -0
- data/lib/opener/opinion_detectors/base/version.rb +7 -0
- data/lib/opener/opinion_detectors/configuration_creator.rb +86 -0
- data/lib/opener/opinion_detectors/de.rb +7 -0
- data/lib/opener/opinion_detectors/en.rb +7 -0
- data/lib/opener/opinion_detectors/it.rb +7 -0
- data/lib/opener/opinion_detectors/nl.rb +6 -0
- data/opener-opinion-detector-base.gemspec +35 -0
- data/pre_build_requirements.txt +3 -0
- metadata +374 -0
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
import sys
|
4
|
+
import getopt
|
5
|
+
import logging
|
6
|
+
import os
|
7
|
+
import random
|
8
|
+
from shutil import rmtree
|
9
|
+
|
10
|
+
|
11
|
+
def usage(cmd):
|
12
|
+
print>>sys.stderr,'Usage: '+cmd+' options'
|
13
|
+
print>>sys.stderr,'Options:'
|
14
|
+
print>>sys.stderr,'\t-f --file: input file with a list of documents (required)'
|
15
|
+
print>>sys.stderr,'\t-n --num: num of folds to create (required)'
|
16
|
+
print>>sys.stderr,'\t-o --out: name of the main folder to store the subfolds (required)'
|
17
|
+
print>>sys.stderr,'\t-s --subfolder: prefix for the subfolders (optional, default "fold")'
|
18
|
+
print>>sys.stderr
|
19
|
+
print>>sys.stderr,'Examples'
|
20
|
+
print>>sys.stderr,'\tgenerate_folds.py -f vu.doclist.attitude.ula.xbank --num 10 -o out_folder'
|
21
|
+
print>>sys.stderr,'\tgenerate_folds.py -f vu.doclist.attitude.ula.xbank --num 10 -o out_folder --subfolder my_custom_fold'
|
22
|
+
|
23
|
+
|
24
|
+
def generate_folds(input_file,num_folds,out_folder,name_subfolder='fold'):
|
25
|
+
# Load the input file
|
26
|
+
logging.debug('Loading elements from '+input_file)
|
27
|
+
elements = []
|
28
|
+
fic = open(input_file,'rU')
|
29
|
+
for line in fic:
|
30
|
+
elements.append(line.strip())
|
31
|
+
fic.close()
|
32
|
+
logging.debug('Loaded '+str(len(elements))+' elements')
|
33
|
+
|
34
|
+
'''
|
35
|
+
##Get just the %percent
|
36
|
+
percent = 25
|
37
|
+
original_len = len(elements)
|
38
|
+
new_len = original_len*percent/100
|
39
|
+
elements = elements[:new_len]
|
40
|
+
'''
|
41
|
+
|
42
|
+
## Creating folders and subfolders:
|
43
|
+
if os.path.exists(out_folder):
|
44
|
+
print>>sys.stderr,'Output folder '+out_folder,'already exists'
|
45
|
+
rmtree(out_folder)
|
46
|
+
print>>sys.stderr,'It has been removed...'
|
47
|
+
#sys.exit(-1)
|
48
|
+
|
49
|
+
logging.debug('Creating '+out_folder+' and subfolders')
|
50
|
+
folds = []
|
51
|
+
os.mkdir(out_folder)
|
52
|
+
for n in range(num_folds):
|
53
|
+
my_name = os.path.join(out_folder,name_subfolder+'_'+str(n))
|
54
|
+
os.mkdir(my_name)
|
55
|
+
logging.debug('Created '+my_name)
|
56
|
+
folds.append(my_name)
|
57
|
+
###################################################
|
58
|
+
|
59
|
+
## Creating folds
|
60
|
+
size_of_fold = len(elements) / num_folds
|
61
|
+
my_begin = 0
|
62
|
+
my_end = size_of_fold
|
63
|
+
|
64
|
+
random.shuffle(elements)
|
65
|
+
for n in range(num_folds):
|
66
|
+
this_fold = folds[n]
|
67
|
+
my_test = elements[my_begin:my_end]
|
68
|
+
my_train = elements[:my_begin]+elements[my_end:]
|
69
|
+
if len( set(my_test) & set(my_train)) != 0:
|
70
|
+
print>>sys.stderr,'Error overlapping'
|
71
|
+
print>>sys.stderr,my_train
|
72
|
+
print>>sys.stderr,my_test
|
73
|
+
my_begin = my_end
|
74
|
+
my_end = my_end + size_of_fold
|
75
|
+
|
76
|
+
#Save the folds
|
77
|
+
fic_train = open(os.path.join(this_fold,'train'),'w')
|
78
|
+
logging.debug('Writing info to '+fic_train.name)
|
79
|
+
for ele in my_train:
|
80
|
+
fic_train.write(ele+'\n')
|
81
|
+
fic_train.close()
|
82
|
+
|
83
|
+
fic_test =open(os.path.join(this_fold,'test'),'w')
|
84
|
+
logging.debug('Writing info to '+fic_test.name)
|
85
|
+
for ele in my_test:
|
86
|
+
fic_test.write(ele+'\n')
|
87
|
+
fic_test.close()
|
88
|
+
####
|
89
|
+
logging.debug('Finished OK')
|
90
|
+
|
91
|
+
if __name__ == '__main__':
|
92
|
+
logging.basicConfig(stream=sys.stderr,format='%(asctime)s - %(levelname)s - %(message)s',level=logging.DEBUG)
|
93
|
+
|
94
|
+
input_file = None
|
95
|
+
num_folds = None
|
96
|
+
out_folder = None
|
97
|
+
name_subfolder = 'fold'
|
98
|
+
|
99
|
+
try:
|
100
|
+
opts, args = getopt.getopt(sys.argv[1:],"f:n:o:s:",["file=","num=","out=","subfolder="])
|
101
|
+
for opt, arg in opts:
|
102
|
+
if opt in ['-f','--file']:
|
103
|
+
input_file = arg
|
104
|
+
elif opt in ['-n','--num']:
|
105
|
+
num_folds = int(arg)
|
106
|
+
elif opt in ['-o','--out']:
|
107
|
+
out_folder = arg
|
108
|
+
elif opt in ['-s','--subfolder']:
|
109
|
+
name_subfolder = arg
|
110
|
+
except getopt.GetoptError as e:
|
111
|
+
print>>sys.stderr,'Warning: ',str(e)
|
112
|
+
|
113
|
+
if input_file is None:
|
114
|
+
print>>sys.stderr,'ERROR!!!! Input file missing'
|
115
|
+
print
|
116
|
+
usage(sys.argv[0])
|
117
|
+
sys.exit(-1)
|
118
|
+
|
119
|
+
if num_folds is None:
|
120
|
+
print>>sys.stderr,'ERROR!!!! Num of folds missing'
|
121
|
+
print
|
122
|
+
usage(sys.argv[0])
|
123
|
+
sys.exit(-1)
|
124
|
+
|
125
|
+
if out_folder is None:
|
126
|
+
print>>sys.stderr,'ERROR!!!! Out folder missing'
|
127
|
+
print
|
128
|
+
usage(sys.argv[0])
|
129
|
+
sys.exit(-1)
|
130
|
+
|
131
|
+
###### END
|
132
|
+
generate_folds(input_file,num_folds,out_folder)
|
133
|
+
|
134
|
+
|
@@ -0,0 +1,10 @@
|
|
1
|
+
#LANG|domain|pathtomodel|description
|
2
|
+
en|hotel|final_models/en/hotel_cfg1|Trained with config1 in the last version of hotel annotations
|
3
|
+
en|news|final_models/en/news_cfg1|Trained with config1 using only the sentences annotated with news
|
4
|
+
nl|hotel|final_models/nl/hotel_cfg1|Trained with config1 in the last version of hotel annotations
|
5
|
+
nl|news|final_models/nl/news_cfg1|Trained with config1 using only the sentences annotated with news
|
6
|
+
de|hotel|final_models/de/hotel_cfg1|Trained with config1 in the last version of hotel annotations
|
7
|
+
de|news|final_models/de/news_cfg1|Trained with config1 using only the sentences annotated with news
|
8
|
+
fr|hotel|final_models/fr/hotel_cfg1|Trained with config1 in the last version of hotel annotations
|
9
|
+
it|hotel|final_models/it/hotel_cfg1|Trained with config1 in the last version of hotel annotations
|
10
|
+
es|hotel|final_models/es/hotel_cfg1|Trained with config1 in the last version of hotel annotations
|
@@ -0,0 +1,33 @@
|
|
1
|
+
Format
|
2
|
+
|
3
|
+
--> lines starting with # are skipped
|
4
|
+
|
5
|
+
1 token -2 -1 0
|
6
|
+
|
7
|
+
-> The first 1 is the length of the template, in this case unigram
|
8
|
+
-> Then 'n' labels that will be used (must match with the labels generated
|
9
|
+
by the feature extractor)
|
10
|
+
--> Then the positions, in case of 2grams 3grams each position must be
|
11
|
+
--> n/m/p
|
12
|
+
|
13
|
+
The example would generate these templates:
|
14
|
+
('token',-2)
|
15
|
+
('token',-1)
|
16
|
+
('token',0)
|
17
|
+
|
18
|
+
|
19
|
+
Example with bigrams
|
20
|
+
2 token token -2/-1 -1/0 0/1 1/2
|
21
|
+
|
22
|
+
would generate:
|
23
|
+
(('token',-2),('token',-1))
|
24
|
+
(('token',-1),('token',0))
|
25
|
+
(('token',1),('token',1))
|
26
|
+
|
27
|
+
|
28
|
+
Example with tigrams (the example makes no sense)
|
29
|
+
3 token lemma pos -2/0/4 9/8/3
|
30
|
+
(('token',-2),('lemma',0),('pos',4))
|
31
|
+
(('token',9),('lemma',8),('pos',3))
|
32
|
+
|
33
|
+
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity mpqa_polarity
|
2
|
+
# entity property phrase_type y
|
3
|
+
|
4
|
+
1 token -2 -1 0 1 2
|
5
|
+
1 lemma -2 -1 0 1 2
|
6
|
+
1 pos -2 -1 0 1 2
|
7
|
+
1 pol/mod 0
|
8
|
+
1 mpqa_subjectivity 0
|
9
|
+
1 mpqa_polarity 0
|
10
|
+
1 phrase_type -1 0 1
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity mpqa_polarity
|
2
|
+
# entity property phrase_type y
|
3
|
+
|
4
|
+
1 token -2 -1 0 1 2
|
5
|
+
1 lemma -2 -1 0 1 2
|
6
|
+
1 pos -2 -1 0 1 2
|
7
|
+
1 pol/mod -2 -1 0 1 2
|
8
|
+
1 mpqa_subjectivity -2 -1 0 1 2
|
9
|
+
1 mpqa_polarity -2 -1 0 1 2
|
10
|
+
1 phrase_type -1 0 1
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
|
2
|
+
# mpqa_polarity
|
3
|
+
# entity property phrase_type y
|
4
|
+
|
5
|
+
1 token -2 -1 0 1 2
|
6
|
+
1 lemma -2 -1 0 1 2
|
7
|
+
1 pos -2 -1 0 1 2
|
8
|
+
1 entity -2 -1 0 1 2
|
9
|
+
1 property -2 -1 0 1 2
|
10
|
+
1 phrase_type -1 0 1
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
|
2
|
+
# mpqa_polarity
|
3
|
+
# entity property phrase_type y
|
4
|
+
|
5
|
+
1 token 0 -1 1
|
6
|
+
1 lemma 0 -1 1
|
7
|
+
1 pos 0 -1 1
|
8
|
+
1 entity 0 -1 1
|
9
|
+
1 property 0 -1 1
|
10
|
+
1 phrase_type 0 -1 1
|
11
|
+
1 aspect_training 0 -1 1
|
@@ -0,0 +1,10 @@
|
|
1
|
+
# sentence_id token_id token lemma pos term_id pol/mod mpqa_subjectivity
|
2
|
+
# mpqa_polarity
|
3
|
+
# entity property phrase_type y
|
4
|
+
|
5
|
+
1 token -2 -1 0 1 2
|
6
|
+
1 lemma -2 -1 0 1 2
|
7
|
+
1 pos -2 -1 0 1 2
|
8
|
+
1 entity -2 -1 0 1 2
|
9
|
+
1 property -2 -1 0 1 2
|
10
|
+
1 phrase_type -1 0 1
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script runs a bunch of experiments given a list of KAF files
|
4
|
+
# Input:
|
5
|
+
# $1 --> folder where store all the models
|
6
|
+
# $2 --> list of kaf files for training
|
7
|
+
# Output:
|
8
|
+
# standard output --> latex table with the results
|
9
|
+
# standard error --> progress information
|
10
|
+
# The script looks for all the subfolders called exp1 exp2 exp3 within the $exps_folder folder
|
11
|
+
|
12
|
+
general_folder=$1
|
13
|
+
|
14
|
+
if [ ! -d $general_folder ];
|
15
|
+
then
|
16
|
+
mkdir $general_folder;
|
17
|
+
fi
|
18
|
+
|
19
|
+
list_files=$2
|
20
|
+
exps_folder=experiments
|
21
|
+
|
22
|
+
echo "Output folder: $general_folder"
|
23
|
+
echo "List of files: $list_files"
|
24
|
+
echo "\begin{table}"
|
25
|
+
echo "\begin{tabular}{c|c|c|c|c|c|c||c|c|c|c}"
|
26
|
+
echo "\hline"
|
27
|
+
echo "Type & \multicolumn{2}{|c|}{Expression} & \multicolumn{2}{|c|}{Target} & \multicolumn{2}{|c||}{Holder} & \multicolumn{2}{|c|}{Exp-Tar} & \multicolumn{2}{|c|}{Exp-Hol} \\\\"
|
28
|
+
echo "\hline"
|
29
|
+
echo "& P & R & P & R & P & R & P & R & P & R \\\\"
|
30
|
+
|
31
|
+
for exp in $exps_folder/exp*
|
32
|
+
do
|
33
|
+
#id=$1 folder=$2 list_files=$3 experiment_folder=$4
|
34
|
+
echo `date +%T` starting experiment $exp >> /dev/stderr
|
35
|
+
id=`basename $exp`
|
36
|
+
outfolder=$general_folder/$id
|
37
|
+
# If the output folder not exists already
|
38
|
+
if [ ! -d $outfolder ]; then
|
39
|
+
run_experiment.sh $id $outfolder $list_files $exp
|
40
|
+
echo `date +%T` Done >> /dev/stderr
|
41
|
+
echo "Done experiment $exp Files: $list_files Out: $general_folder" | mail -s "Experiment done" ruben.izquierdobevia@vu.nl
|
42
|
+
else
|
43
|
+
echo "The experiment $exp on $outfolder already exists, skipped" >> /dev/stderr
|
44
|
+
fi
|
45
|
+
done
|
46
|
+
|
47
|
+
echo "\end{tabular}"
|
48
|
+
echo "\end{table}"
|
49
|
+
|
@@ -0,0 +1,20 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
import sys
|
4
|
+
from subprocess import Popen,PIPE
|
5
|
+
|
6
|
+
def run_basic(input_file,output_file):
|
7
|
+
cmd = '/home/izquierdo/opener_repos/opinion-detector-basic/core/opinion_detector_basic_multi.py'
|
8
|
+
fin = open(input_file,'r')
|
9
|
+
fout = open(output_file,'w')
|
10
|
+
basic_opinion_miner = Popen(cmd,stdin=fin, stdout=fout,stderr=PIPE,shell=True)
|
11
|
+
fin.close()
|
12
|
+
basic_opinion_miner.wait()
|
13
|
+
fout.close()
|
14
|
+
print 'Done'
|
15
|
+
|
16
|
+
if __name__ == '__main__':
|
17
|
+
input = 'english00001_0123ff23e0d0dc0177f9b71a1928b674.kaf'
|
18
|
+
output = 'english00001_0123ff23e0d0dc0177f9b71a1928b674.basic.kaf'
|
19
|
+
run_basic(input,output)
|
20
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
|
3
|
+
# This script runs a given experiment
|
4
|
+
# Input:
|
5
|
+
# $1 the id used for the table latex for the experiment
|
6
|
+
# $2 the output folder to store all the models and folds (for the validation)
|
7
|
+
# $3 the list of files for the training
|
8
|
+
# $4 the folder with the experiment (must contain a file called config.cfg with the configuration
|
9
|
+
#
|
10
|
+
# Output
|
11
|
+
# Standard out --> 2 rows of the latex table (for the basic and deluxe)
|
12
|
+
# Standard err --> progress of the program
|
13
|
+
# The log of training/evaluation will be on the same folder there the exp, called $id.log
|
14
|
+
id=$1
|
15
|
+
folder=$2
|
16
|
+
list_files=$3
|
17
|
+
experiment_folder=$4
|
18
|
+
|
19
|
+
numfolds=5
|
20
|
+
base_out_folder=`dirname $folder`
|
21
|
+
err_file=$base_out_folder/$id.log
|
22
|
+
out_per_folds=$base_out_folder/$id.out_per_fold.tex
|
23
|
+
|
24
|
+
tmpconfig=`mktemp`
|
25
|
+
|
26
|
+
echo "[general]" > $tmpconfig
|
27
|
+
echo "output_folder = $folder" >> $tmpconfig
|
28
|
+
echo "filename_training_list = $list_files" >> $tmpconfig
|
29
|
+
echo >> $tmpconfig
|
30
|
+
echo "[feature_templates]" >> $tmpconfig
|
31
|
+
echo "expression = $experiment_folder/templates_exp.txt " >> $tmpconfig
|
32
|
+
echo "holder = $experiment_folder/templates_hol.txt" >> $tmpconfig
|
33
|
+
echo "target = $experiment_folder/templates_tar.txt" >> $tmpconfig
|
34
|
+
echo >> $tmpconfig
|
35
|
+
cat $experiment_folder/config.cfg >> $tmpconfig
|
36
|
+
|
37
|
+
echo Running experiment $id Logs: $err_file Out per fold: $out_per_folds >> /dev/stderr
|
38
|
+
#Output to standard output
|
39
|
+
cross_validation.py -n $numfolds -f $tmpconfig -id $id -out_folds $out_per_folds 2> $err_file
|
40
|
+
|
41
|
+
rm $tmpconfig
|
42
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
pass
|
@@ -0,0 +1,314 @@
|
|
1
|
+
import os
|
2
|
+
import ConfigParser
|
3
|
+
import shutil
|
4
|
+
|
5
|
+
internal_config_filename= 'config.cfg'
|
6
|
+
|
7
|
+
|
8
|
+
def load_templates_from_file(filename):
|
9
|
+
templates = []
|
10
|
+
fic = open(filename,'r')
|
11
|
+
for line in fic:
|
12
|
+
line = line.strip()
|
13
|
+
if line != '' and line[0]!='#': #Not empty lines or starting with #
|
14
|
+
tokens = line.split(' ')
|
15
|
+
my_len = int(tokens[0])
|
16
|
+
labels = tokens[1:my_len+1]
|
17
|
+
values = tokens[my_len+1:]
|
18
|
+
for value in values:
|
19
|
+
new_template = []
|
20
|
+
single_values = value.split('/')
|
21
|
+
for n in range(len(labels)):
|
22
|
+
new_template.append((labels[n],int(single_values[n])))
|
23
|
+
templates.append(new_template)
|
24
|
+
fic.close()
|
25
|
+
return templates
|
26
|
+
|
27
|
+
class Cconfig_manager:
|
28
|
+
def __init__(self):
|
29
|
+
self.config = ConfigParser.ConfigParser()
|
30
|
+
self.out_folder = None
|
31
|
+
self.this_folder = None
|
32
|
+
self.templates_expr = None
|
33
|
+
self.templates_holder = None
|
34
|
+
self.templates_target = None
|
35
|
+
|
36
|
+
def set_current_folder(self,t):
|
37
|
+
self.this_folder = t
|
38
|
+
|
39
|
+
def get_flag_filename(self):
|
40
|
+
my_name = 'flag'
|
41
|
+
return os.path.join(self.get_output_folder(),my_name)
|
42
|
+
|
43
|
+
def set_config(self,file_cfg):
|
44
|
+
self.config.read(file_cfg)
|
45
|
+
output_folder_cfg = self.config.get('general','output_folder')
|
46
|
+
out_folder = ''
|
47
|
+
if os.path.isabs(output_folder_cfg):
|
48
|
+
self.out_folder = output_folder_cfg
|
49
|
+
else:
|
50
|
+
self.out_folder = os.path.join(self.this_folder,output_folder_cfg)
|
51
|
+
|
52
|
+
def get_use_dependencies(self):
|
53
|
+
use_dependencies = True ##Default
|
54
|
+
if self.config.has_section('relation_features'):
|
55
|
+
if self.config.has_option('relation_features', 'use_dependencies'):
|
56
|
+
use_dependencies = self.config.getboolean('relation_features', 'use_dependencies')
|
57
|
+
return use_dependencies
|
58
|
+
|
59
|
+
|
60
|
+
def get_use_training_lexicons(self):
|
61
|
+
use_lexicons = True ##Default
|
62
|
+
if self.config.has_section('relation_features'):
|
63
|
+
if self.config.has_option('relation_features', 'use_training_lexicons'):
|
64
|
+
use_lexicons = self.config.getboolean('relation_features', 'use_training_lexicons')
|
65
|
+
return use_lexicons
|
66
|
+
|
67
|
+
def get_use_tokens_lemmas(self):
|
68
|
+
use_them = True
|
69
|
+
if self.config.has_section('relation_features'):
|
70
|
+
if self.config.has_option('relation_features', 'use_tokens_lemmas'):
|
71
|
+
use_them = self.config.getboolean('relation_features', 'use_tokens_lemmas')
|
72
|
+
return use_them
|
73
|
+
|
74
|
+
def get_propagation_lexicon_name(self):
|
75
|
+
lexicon_name = None
|
76
|
+
if self.config.has_section('lexicons'):
|
77
|
+
if self.config.has_option('lexicons','propagation_lexicon'):
|
78
|
+
lexicon_name = self.config.get('lexicons','propagation_lexicon')
|
79
|
+
return lexicon_name
|
80
|
+
|
81
|
+
|
82
|
+
def set_out_folder(self,o):
|
83
|
+
self.out_folder = o
|
84
|
+
|
85
|
+
def get_training_datasets_folder(self):
|
86
|
+
my_name='training_datasets'
|
87
|
+
outfolder=self.get_output_folder()
|
88
|
+
return os.path.join(outfolder,my_name)
|
89
|
+
|
90
|
+
def get_training_dataset_exp(self):
|
91
|
+
my_name = 'training_set_exp.crf'
|
92
|
+
return os.path.join(self.get_training_datasets_folder(),my_name)
|
93
|
+
|
94
|
+
def get_training_dataset_target(self):
|
95
|
+
my_name = 'training_set_target.crf'
|
96
|
+
return os.path.join(self.get_training_datasets_folder(),my_name)
|
97
|
+
|
98
|
+
|
99
|
+
def get_training_dataset_holder(self):
|
100
|
+
my_name = 'training_set_holder.crf'
|
101
|
+
return os.path.join(self.get_training_datasets_folder(),my_name)
|
102
|
+
|
103
|
+
|
104
|
+
## FEATURE TEMPLATES
|
105
|
+
def get_feature_template_folder_name(self):
|
106
|
+
my_name = 'feature_templates'
|
107
|
+
return os.path.join(self.get_output_folder(),my_name)
|
108
|
+
|
109
|
+
def get_feature_template_exp_name(self):
|
110
|
+
my_name = 'feat_template_expr.txt'
|
111
|
+
return os.path.join(self.get_feature_template_folder_name(),my_name)
|
112
|
+
|
113
|
+
def get_feature_template_tar_name(self):
|
114
|
+
my_name = 'feat_template_target.txt'
|
115
|
+
return os.path.join(self.get_feature_template_folder_name(),my_name)
|
116
|
+
|
117
|
+
def get_feature_template_hol_name(self):
|
118
|
+
my_name = 'feat_template_holder.txt'
|
119
|
+
return os.path.join(self.get_feature_template_folder_name(),my_name)
|
120
|
+
|
121
|
+
def copy_feature_templates(self):
|
122
|
+
#Exp
|
123
|
+
temp_exp_orig = self.config.get('feature_templates','expression')
|
124
|
+
temp_exp_target = self.get_feature_template_exp_name()
|
125
|
+
if not os.path.isabs(temp_exp_orig):
|
126
|
+
temp_exp_orig = os.path.join(self.this_folder,temp_exp_orig)
|
127
|
+
shutil.copyfile(temp_exp_orig, temp_exp_target)
|
128
|
+
|
129
|
+
temp_tar_orig = self.config.get('feature_templates','target')
|
130
|
+
temp_tar_target = self.get_feature_template_tar_name()
|
131
|
+
if not os.path.isabs(temp_tar_orig):
|
132
|
+
temp_tar_orig = os.path.join(self.this_folder,temp_tar_orig)
|
133
|
+
shutil.copyfile(temp_tar_orig, temp_tar_target)
|
134
|
+
|
135
|
+
temp_hol_orig = self.config.get('feature_templates','holder')
|
136
|
+
temp_hol_target = self.get_feature_template_hol_name()
|
137
|
+
if not os.path.isabs(temp_hol_orig):
|
138
|
+
temp_hol_orig = os.path.join(self.this_folder,temp_hol_orig)
|
139
|
+
shutil.copyfile(temp_hol_orig, temp_hol_target)
|
140
|
+
|
141
|
+
def get_templates_expr(self):
|
142
|
+
if self.templates_expr is None:
|
143
|
+
filename_template = self.get_feature_template_exp_name()
|
144
|
+
self.templates_expr = load_templates_from_file(filename_template)
|
145
|
+
return self.templates_expr
|
146
|
+
|
147
|
+
def get_templates_holder(self):
|
148
|
+
if self.templates_holder is None:
|
149
|
+
filename_template = self.get_feature_template_hol_name()
|
150
|
+
self.templates_holder = load_templates_from_file(filename_template)
|
151
|
+
return self.templates_holder
|
152
|
+
|
153
|
+
def get_templates_target(self):
|
154
|
+
if self.templates_target is None:
|
155
|
+
filename_template = self.get_feature_template_tar_name()
|
156
|
+
self.templates_target = load_templates_from_file(filename_template)
|
157
|
+
return self.templates_target
|
158
|
+
|
159
|
+
def get_lexicons_folder(self):
|
160
|
+
my_name = 'lexicons'
|
161
|
+
return os.path.join(self.get_output_folder(),my_name)
|
162
|
+
|
163
|
+
###############
|
164
|
+
def get_expression_lexicon_filename(self):
|
165
|
+
my_name = 'polarity_lexicon.csv'
|
166
|
+
return os.path.join(self.get_lexicons_folder(),my_name)
|
167
|
+
|
168
|
+
def get_use_this_expression_lexicon(self):
|
169
|
+
use_it = None
|
170
|
+
if self.config.has_section('relation_features'):
|
171
|
+
if self.config.has_option('relation_features', 'use_this_expression_lexicon'):
|
172
|
+
use_it = self.config.get('relation_features', 'use_this_expression_lexicon')
|
173
|
+
return use_it
|
174
|
+
|
175
|
+
def get_use_this_target_lexicon(self):
|
176
|
+
use_it = None
|
177
|
+
if self.config.has_section('relation_features'):
|
178
|
+
if self.config.has_option('relation_features', 'use_this_target_lexicon'):
|
179
|
+
use_it = self.config.get('relation_features', 'use_this_target_lexicon')
|
180
|
+
return use_it
|
181
|
+
|
182
|
+
def get_target_lexicon_filename(self):
|
183
|
+
my_name = 'target_lexicon.csv'
|
184
|
+
return os.path.join(self.get_lexicons_folder(),my_name)
|
185
|
+
|
186
|
+
def get_feature_folder_name(self):
|
187
|
+
subfolder_feats = 'tab_feature_files'
|
188
|
+
out_folder = self.get_output_folder()
|
189
|
+
return os.path.join(out_folder,subfolder_feats)
|
190
|
+
|
191
|
+
def get_crf_expression_folder(self):
|
192
|
+
my_name='crf_feat_files_exp'
|
193
|
+
out_folder = self.get_output_folder()
|
194
|
+
return os.path.join(out_folder,my_name)
|
195
|
+
|
196
|
+
def get_crf_target_folder(self):
|
197
|
+
my_name='crf_feat_files_target'
|
198
|
+
out_folder = self.get_output_folder()
|
199
|
+
return os.path.join(out_folder,my_name)
|
200
|
+
|
201
|
+
def get_crf_holder_folder(self):
|
202
|
+
my_name='crf_feat_files_holder'
|
203
|
+
out_folder = self.get_output_folder()
|
204
|
+
return os.path.join(out_folder,my_name)
|
205
|
+
|
206
|
+
def get_output_folder(self):
|
207
|
+
return self.out_folder
|
208
|
+
|
209
|
+
def get_feature_desc_filename(self):
|
210
|
+
file_feat_desc = 'feature_desc.txt' #description of features
|
211
|
+
out_folder = self.get_output_folder()
|
212
|
+
return os.path.join(out_folder,file_feat_desc)
|
213
|
+
|
214
|
+
def get_file_training_list(self):
|
215
|
+
return self.config.get('general','filename_training_list')
|
216
|
+
|
217
|
+
def get_crfsuite_binary(self):
|
218
|
+
return self.config.get('crfsuite','path_to_binary')
|
219
|
+
|
220
|
+
def get_crfsuite_params(self):
|
221
|
+
return self.config.get('crfsuite','parameters')
|
222
|
+
|
223
|
+
def get_svm_learn_binary(self):
|
224
|
+
return self.config.get('svmlight','path_to_binary_learn')
|
225
|
+
|
226
|
+
def get_svm_classify_binary(self):
|
227
|
+
return self.config.get('svmlight','path_to_binary_classify')
|
228
|
+
|
229
|
+
def get_svm_params(self):
|
230
|
+
return self.config.get('svmlight','parameters')
|
231
|
+
|
232
|
+
|
233
|
+
def get_svm_threshold_exp_tar(self):
|
234
|
+
thr = -1
|
235
|
+
if self.config.has_option('relation_features', 'exp_tar_threshold'):
|
236
|
+
thr = self.config.getfloat('relation_features', 'exp_tar_threshold')
|
237
|
+
return thr
|
238
|
+
|
239
|
+
def get_svm_threshold_exp_hol(self):
|
240
|
+
thr = -1
|
241
|
+
if self.config.has_option('relation_features', 'exp_hol_threshold'):
|
242
|
+
thr = self.config.getfloat('relation_features', 'exp_hol_threshold')
|
243
|
+
return thr
|
244
|
+
|
245
|
+
|
246
|
+
# [valid_opinions]
|
247
|
+
# positive = sentiment-neg
|
248
|
+
# negative = sentiment-pos
|
249
|
+
def get_mapping_valid_opinions(self):
|
250
|
+
mapping = {}
|
251
|
+
for mapped_opinion, values_in_corpus in self.config.items('valid_opinions'):
|
252
|
+
values = [ v for v in values_in_corpus.split(';') if v != '']
|
253
|
+
for v in values:
|
254
|
+
mapping[v] = mapped_opinion
|
255
|
+
return mapping
|
256
|
+
|
257
|
+
def get_possible_expression_values(self):
|
258
|
+
labels = [key for key,_ in self.config.items('valid_opinions')]
|
259
|
+
return labels
|
260
|
+
|
261
|
+
def get_model_foldername(self):
|
262
|
+
my_name = 'models'
|
263
|
+
out_folder = self.get_output_folder()
|
264
|
+
return os.path.join(out_folder,my_name)
|
265
|
+
|
266
|
+
def get_filename_model_expression(self):
|
267
|
+
my_name = 'model_opi_exp.crf'
|
268
|
+
return os.path.join(self.get_model_foldername(),my_name)
|
269
|
+
|
270
|
+
def get_filename_model_target(self):
|
271
|
+
my_name = 'model_opi_target.crf'
|
272
|
+
return os.path.join(self.get_model_foldername(),my_name)
|
273
|
+
|
274
|
+
def get_filename_model_holder(self):
|
275
|
+
my_name = 'model_opi_holder.crf'
|
276
|
+
return os.path.join(self.get_model_foldername(),my_name)
|
277
|
+
|
278
|
+
def get_folder_relation_classifier(self):
|
279
|
+
my_name = 'relation_classifier'
|
280
|
+
return os.path.join(self.out_folder,my_name)
|
281
|
+
|
282
|
+
def get_relation_exp_tar_training_filename(self):
|
283
|
+
my_name = 'training_exp_tar.feat'
|
284
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
285
|
+
|
286
|
+
def get_relation_exp_hol_training_filename(self):
|
287
|
+
my_name = 'training_exp_hol.feat'
|
288
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
289
|
+
|
290
|
+
def get_rel_exp_tar_training_idx_filename(self):
|
291
|
+
my_name = 'training_exp_tar.idx'
|
292
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
293
|
+
|
294
|
+
def get_rel_exp_hol_training_idx_filename(self):
|
295
|
+
my_name = 'training_exp_hol.idx'
|
296
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
297
|
+
|
298
|
+
def get_index_features_exp_tar_filename(self):
|
299
|
+
my_name = 'feat_index.exp_tar.bin'
|
300
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
301
|
+
|
302
|
+
def get_index_features_exp_hol_filename(self):
|
303
|
+
my_name = 'feat_index.exp_hol.bin'
|
304
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
305
|
+
|
306
|
+
def get_filename_model_exp_tar(self):
|
307
|
+
my_name = 'model_relation_exp_tar.svmlight'
|
308
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
309
|
+
|
310
|
+
def get_filename_model_exp_hol(self):
|
311
|
+
my_name = 'model_relation_exp_hol.svmlight'
|
312
|
+
return os.path.join(self.get_folder_relation_classifier(),my_name)
|
313
|
+
|
314
|
+
|