opener-kaf-naf-parser 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +67 -8
- data/bin/kaf-naf-parser-daemon +10 -0
- data/core/kaf-naf-parser.py +5 -5
- data/exec/kaf-naf-parser.rb +9 -0
- data/ext/hack/Rakefile +13 -0
- data/lib/opener/kaf_naf_parser/version.rb +1 -1
- data/opener-kaf-naf-parser.gemspec +5 -1
- data/pre_install_requirements.txt +3 -0
- metadata +37 -51
- data/core/packages/KafNafParser-1.2.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.3.tar.gz +0 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -338
- data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -80
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -300
- data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -71
- data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -200
- data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -15
- data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
- data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
- data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -90
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/SOURCES.txt +0 -22
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/installed-files.txt +0 -47
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/SOURCES.txt +0 -14
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/installed-files.txt +0 -23
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
- data/pre_build_requirements.txt +0 -3
@@ -1,121 +0,0 @@
|
|
1
|
-
from operator import itemgetter
|
2
|
-
import sys
|
3
|
-
import cPickle
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
class Cexample:
|
8
|
-
def __init__(self,str_line=None):
|
9
|
-
self.label = ''
|
10
|
-
self.features = []
|
11
|
-
if str_line is not None:
|
12
|
-
self.load_from_line(str_line)
|
13
|
-
|
14
|
-
def load_from_line(self,str_line):
|
15
|
-
fields = str_line.strip().split('\t')
|
16
|
-
self.label = fields[0]
|
17
|
-
for feat in fields[1:]:
|
18
|
-
first_equal = feat.find('=')
|
19
|
-
if first_equal != -1:
|
20
|
-
name = feat[:first_equal]
|
21
|
-
value = feat[first_equal+1:]
|
22
|
-
self.features.append((name,value))
|
23
|
-
|
24
|
-
def __str__(self):
|
25
|
-
s = 'Label: '+self.label+'\n'
|
26
|
-
s += 'Feats: '+str(self.features)
|
27
|
-
return s
|
28
|
-
|
29
|
-
def get_label(self):
|
30
|
-
return self.label
|
31
|
-
|
32
|
-
def get_features(self):
|
33
|
-
for name,value in self.features:
|
34
|
-
yield name,value
|
35
|
-
|
36
|
-
def get_all_features(self):
|
37
|
-
return self.features
|
38
|
-
|
39
|
-
|
40
|
-
class Cfeature_index:
|
41
|
-
def __init__(self):
|
42
|
-
self.idx = {}
|
43
|
-
|
44
|
-
def get_number_feat(self,feat):
|
45
|
-
return self.idx.get(feat,None)
|
46
|
-
|
47
|
-
def add_feat(self,feat):
|
48
|
-
num_feat = len(self.idx)+1
|
49
|
-
self.idx[feat] = num_feat
|
50
|
-
return num_feat
|
51
|
-
|
52
|
-
|
53
|
-
def compose_feat(self,name,value):
|
54
|
-
return name+'###'+value
|
55
|
-
|
56
|
-
|
57
|
-
def __encode_features(self,feats,modify_index=True):
|
58
|
-
feats_for_example = {}
|
59
|
-
clean_feats = ''
|
60
|
-
for name, value in feats:
|
61
|
-
my_feat = self.compose_feat(name, value)
|
62
|
-
clean_feats+=my_feat+' '
|
63
|
-
num_feat = self.get_number_feat(my_feat)
|
64
|
-
if num_feat is None:
|
65
|
-
if modify_index:
|
66
|
-
num_feat = self.add_feat(my_feat)
|
67
|
-
|
68
|
-
if num_feat is not None:
|
69
|
-
if num_feat in feats_for_example:
|
70
|
-
feats_for_example[num_feat] += 1
|
71
|
-
else:
|
72
|
-
feats_for_example[num_feat] = 1
|
73
|
-
return sorted(feats_for_example.items(),key=itemgetter(0)),clean_feats
|
74
|
-
|
75
|
-
|
76
|
-
def encode_feature_file_to_svm(self,feat_file_obj,out_fic=sys.stdout):
|
77
|
-
for example in feat_file_obj:
|
78
|
-
class_label = example.get_label()
|
79
|
-
out_fic.write(class_label)
|
80
|
-
feats_for_example, clean_feats =self.__encode_features(example.get_all_features())
|
81
|
-
|
82
|
-
for feat,freq_feat in feats_for_example:
|
83
|
-
value = freq_feat
|
84
|
-
out_fic.write(' %d:%d' % (feat,value))
|
85
|
-
out_fic.write(' #'+clean_feats+'\n')
|
86
|
-
|
87
|
-
def encode_example_for_classification(self, feats,out_fic,my_class='0'):
|
88
|
-
feats_for_example, clean_feats =self.__encode_features(feats,modify_index=False)
|
89
|
-
out_fic.write(my_class)
|
90
|
-
for feat,freq_feat in feats_for_example:
|
91
|
-
value = freq_feat
|
92
|
-
out_fic.write(' %d:%d' % (feat,value))
|
93
|
-
out_fic.write(' #'+clean_feats.encode('utf-8')+'\n')
|
94
|
-
|
95
|
-
def save_to_file(self,filename):
|
96
|
-
fic = open(filename,'wb')
|
97
|
-
cPickle.dump(self.idx, fic, protocol=0)
|
98
|
-
fic.close()
|
99
|
-
|
100
|
-
def load_from_file(self,filename):
|
101
|
-
fic = open(filename,'rb')
|
102
|
-
self.idx = cPickle.load(fic)
|
103
|
-
fic.close()
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
class Cfeature_file:
|
108
|
-
def __init__(self,filename=None):
|
109
|
-
self.filename = filename
|
110
|
-
|
111
|
-
def __iter__(self):
|
112
|
-
if self.filename is not None:
|
113
|
-
fic = open(self.filename,'r')
|
114
|
-
for line in fic:
|
115
|
-
if line[0] != '#':
|
116
|
-
yield Cexample(line)
|
117
|
-
fic.close()
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
@@ -1 +0,0 @@
|
|
1
|
-
from lexicon import *
|
@@ -1,72 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
import os
|
4
|
-
import re
|
5
|
-
from VUA_pylib.common import normalize_pos
|
6
|
-
|
7
|
-
__this_folder__ = os.path.dirname(os.path.realpath(__file__))
|
8
|
-
|
9
|
-
class MPQA_subjectivity_lexicon:
|
10
|
-
def __init__(self):
|
11
|
-
self.__filename=os.path.join(__this_folder__,'data','subjclueslen1-HLTEMNLP05.tff')
|
12
|
-
self.stemmed = {}
|
13
|
-
self.stemmed_anypos = {}
|
14
|
-
self.no_stemmed = {}
|
15
|
-
self.no_stemmed_anypos = {}
|
16
|
-
|
17
|
-
self.__load()
|
18
|
-
|
19
|
-
def __load(self):
|
20
|
-
# Format of lines:
|
21
|
-
# type=weaksubj len=1 word1=abandoned pos1=adj stemmed1=n priorpolarity=negative
|
22
|
-
fic = open(self.__filename)
|
23
|
-
for line in fic:
|
24
|
-
line=line.strip()+' '
|
25
|
-
this_type = re.findall('type=([^ ]+)', line)[0]
|
26
|
-
word = re.findall('word1=([^ ]+)', line)[0]
|
27
|
-
pos = re.findall('pos1=([^ ]+)', line)[0]
|
28
|
-
stemmed = re.findall('stemmed1=([^ ]+)', line)[0]
|
29
|
-
prior_polarity = re.findall('priorpolarity=([^ ]+)', line)[0]
|
30
|
-
pos = normalize_pos(pos)
|
31
|
-
if stemmed == 'y':
|
32
|
-
self.stemmed[(word,pos)] = (this_type,prior_polarity)
|
33
|
-
if True or pos == '*': #anypos
|
34
|
-
self.stemmed_anypos[word] = (this_type,prior_polarity)
|
35
|
-
|
36
|
-
elif stemmed == 'n':
|
37
|
-
self.no_stemmed[(word,pos)] = (this_type,prior_polarity)
|
38
|
-
if True or pos == '*':
|
39
|
-
self.no_stemmed_anypos[word] = (this_type,prior_polarity)
|
40
|
-
|
41
|
-
fic.close()
|
42
|
-
|
43
|
-
def get_type_and_polarity(self,word,pos=None):
|
44
|
-
res = None
|
45
|
-
if pos is not None:
|
46
|
-
pos = normalize_pos(pos)
|
47
|
-
|
48
|
-
# Try no stemmed with the given pos
|
49
|
-
res = self.no_stemmed.get((word,pos))
|
50
|
-
|
51
|
-
# Try stemmed with the given pos
|
52
|
-
if res is None:
|
53
|
-
res = self.stemmed.get((word,pos))
|
54
|
-
|
55
|
-
# Try no stemmed with any pos
|
56
|
-
if res is None:
|
57
|
-
res = self.no_stemmed_anypos.get(word)
|
58
|
-
|
59
|
-
# Try stemm with any pos
|
60
|
-
if res is None:
|
61
|
-
res = self.stemmed_anypos.get(word)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
return res
|
66
|
-
|
67
|
-
|
68
|
-
if __name__ == '__main__':
|
69
|
-
o = MPQA_subjectivity_lexicon()
|
70
|
-
print o.get_type_and_polarity('abidance','adj')
|
71
|
-
|
72
|
-
|
@@ -1,10 +0,0 @@
|
|
1
|
-
Metadata-Version: 1.0
|
2
|
-
Name: VUA-pylib
|
3
|
-
Version: 1.3
|
4
|
-
Summary: Various KAF / NAF python helpers
|
5
|
-
Home-page: https://github.com/cltl/VUA_pylib
|
6
|
-
Author: Ruben Izquierdo
|
7
|
-
Author-email: r.izquierdobevia@vu.nl
|
8
|
-
License: UNKNOWN
|
9
|
-
Description: UNKNOWN
|
10
|
-
Platform: UNKNOWN
|
@@ -1,14 +0,0 @@
|
|
1
|
-
README
|
2
|
-
VUA_pylib/__init__.py
|
3
|
-
VUA_pylib.egg-info/PKG-INFO
|
4
|
-
VUA_pylib.egg-info/SOURCES.txt
|
5
|
-
VUA_pylib.egg-info/dependency_links.txt
|
6
|
-
VUA_pylib.egg-info/top_level.txt
|
7
|
-
VUA_pylib/common/__init__.py
|
8
|
-
VUA_pylib/common/common.py
|
9
|
-
VUA_pylib/corpus_reader/__init__.py
|
10
|
-
VUA_pylib/corpus_reader/google_web_nl.py
|
11
|
-
VUA_pylib/io_utils/__init__.py
|
12
|
-
VUA_pylib/io_utils/feature_file.py
|
13
|
-
VUA_pylib/lexicon/__init__.py
|
14
|
-
VUA_pylib/lexicon/lexicon.py
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1,23 +0,0 @@
|
|
1
|
-
../VUA_pylib/__init__.py
|
2
|
-
../VUA_pylib/lexicon/lexicon.py
|
3
|
-
../VUA_pylib/lexicon/__init__.py
|
4
|
-
../VUA_pylib/common/common.py
|
5
|
-
../VUA_pylib/common/__init__.py
|
6
|
-
../VUA_pylib/io_utils/feature_file.py
|
7
|
-
../VUA_pylib/io_utils/__init__.py
|
8
|
-
../VUA_pylib/corpus_reader/google_web_nl.py
|
9
|
-
../VUA_pylib/corpus_reader/__init__.py
|
10
|
-
../VUA_pylib/__init__.pyc
|
11
|
-
../VUA_pylib/lexicon/lexicon.pyc
|
12
|
-
../VUA_pylib/lexicon/__init__.pyc
|
13
|
-
../VUA_pylib/common/common.pyc
|
14
|
-
../VUA_pylib/common/__init__.pyc
|
15
|
-
../VUA_pylib/io_utils/feature_file.pyc
|
16
|
-
../VUA_pylib/io_utils/__init__.pyc
|
17
|
-
../VUA_pylib/corpus_reader/google_web_nl.pyc
|
18
|
-
../VUA_pylib/corpus_reader/__init__.pyc
|
19
|
-
./
|
20
|
-
SOURCES.txt
|
21
|
-
dependency_links.txt
|
22
|
-
top_level.txt
|
23
|
-
PKG-INFO
|
@@ -1 +0,0 @@
|
|
1
|
-
VUA_pylib
|
@@ -1,165 +0,0 @@
|
|
1
|
-
class KafTermSentiment:
|
2
|
-
def __init__(self):
|
3
|
-
self.resource=None
|
4
|
-
self.polarity=None
|
5
|
-
self.strength=None
|
6
|
-
self.subjectivity=None
|
7
|
-
|
8
|
-
def simpleInit(self,r,p,st,su,sm=None):
|
9
|
-
self.resource=r
|
10
|
-
self.polarity=p
|
11
|
-
self.strength=st
|
12
|
-
self.subjectivity=su
|
13
|
-
self.sentiment_modifier = sm
|
14
|
-
|
15
|
-
def getPolarity(self):
|
16
|
-
return self.polarity
|
17
|
-
|
18
|
-
def getSentimentModifier(self):
|
19
|
-
return self.sentiment_modifier
|
20
|
-
|
21
|
-
|
22
|
-
class KafToken:
|
23
|
-
def __init__(self,wid, value, sent=None, para=None):
|
24
|
-
self.token_id = wid
|
25
|
-
self.value = value
|
26
|
-
self.sent = sent
|
27
|
-
self.para = para
|
28
|
-
|
29
|
-
|
30
|
-
class KafOpinionExpression:
|
31
|
-
def __init__(self,polarity,strength,targets):
|
32
|
-
self.polarity = polarity
|
33
|
-
self.strength = strength
|
34
|
-
self.targets = targets
|
35
|
-
|
36
|
-
def __str__(self):
|
37
|
-
return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
|
38
|
-
|
39
|
-
class KafOpinion:
|
40
|
-
def __init__(self,id,holders, targets, opi_exp):
|
41
|
-
self.id = id
|
42
|
-
self.holders = holders
|
43
|
-
self.targets = targets
|
44
|
-
self.opi_exp = opi_exp
|
45
|
-
|
46
|
-
def __str__(self):
|
47
|
-
c='Opinion id'+self.id+'\n'
|
48
|
-
c+=' Holders: '+'-'.join(self.holders)+'\n'
|
49
|
-
c+=' Targets: '+'-'.join(self.targets)+'\n'
|
50
|
-
c+=str(self.opi_exp)
|
51
|
-
return c
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
class KafSingleProperty:
|
56
|
-
def __init__(self,id,type,targets):
|
57
|
-
self.id = id
|
58
|
-
self.type = type
|
59
|
-
self.targets = targets
|
60
|
-
|
61
|
-
|
62
|
-
def get_id(self):
|
63
|
-
return self.id
|
64
|
-
|
65
|
-
def get_type(self):
|
66
|
-
return self.type
|
67
|
-
|
68
|
-
def get_span(self):
|
69
|
-
return self.targets
|
70
|
-
|
71
|
-
def __str__(self):
|
72
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
73
|
-
|
74
|
-
|
75
|
-
class KafSingleEntity:
|
76
|
-
def __init__(self,id,type,targets):
|
77
|
-
self.id = id
|
78
|
-
self.type = type
|
79
|
-
self.targets = targets
|
80
|
-
|
81
|
-
def get_id(self):
|
82
|
-
return self.id
|
83
|
-
|
84
|
-
def get_type(self):
|
85
|
-
return self.type
|
86
|
-
|
87
|
-
def get_span(self):
|
88
|
-
return self.targets
|
89
|
-
|
90
|
-
def __str__(self):
|
91
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
92
|
-
|
93
|
-
class KafTerm:
|
94
|
-
def __init__(self):
|
95
|
-
self.tid = None
|
96
|
-
self.lemma = None
|
97
|
-
self.pos = None
|
98
|
-
self.morphofeat = None
|
99
|
-
self.sentiment = None
|
100
|
-
self.list_span_id = []
|
101
|
-
|
102
|
-
def get_morphofeat(self):
|
103
|
-
return self.morphofeat
|
104
|
-
|
105
|
-
def set_list_span_id(self, L):
|
106
|
-
self.list_span_id = L
|
107
|
-
|
108
|
-
def get_list_span(self):
|
109
|
-
return self.list_span_id
|
110
|
-
|
111
|
-
def get_polarity(self):
|
112
|
-
if self.sentiment != None:
|
113
|
-
return self.sentiment.getPolarity()
|
114
|
-
else:
|
115
|
-
return None
|
116
|
-
|
117
|
-
def get_sentiment_modifier(self):
|
118
|
-
if self.sentiment != None:
|
119
|
-
return self.sentiment.getSentimentModifier()
|
120
|
-
else:
|
121
|
-
return None
|
122
|
-
|
123
|
-
|
124
|
-
def setSentiment(self,my_sent):
|
125
|
-
self.sentiment = my_sent
|
126
|
-
|
127
|
-
def getSentiment(self):
|
128
|
-
return self.sentiment
|
129
|
-
|
130
|
-
def getLemma(self):
|
131
|
-
return self.lemma
|
132
|
-
|
133
|
-
def setLemma(self,lemma):
|
134
|
-
self.lemma = lemma
|
135
|
-
|
136
|
-
def getPos(self):
|
137
|
-
return self.pos
|
138
|
-
|
139
|
-
def setPos(self,pos):
|
140
|
-
self.pos = pos
|
141
|
-
|
142
|
-
def getId(self):
|
143
|
-
return self.tid
|
144
|
-
|
145
|
-
def setId(self,id):
|
146
|
-
self.tid = id
|
147
|
-
|
148
|
-
def getShortPos(self):
|
149
|
-
if self.pos==None:
|
150
|
-
return None
|
151
|
-
auxpos=self.pos.lower()[0]
|
152
|
-
if auxpos == 'g': auxpos='a'
|
153
|
-
elif auxpos == 'a': auxpos='r'
|
154
|
-
return auxpos
|
155
|
-
|
156
|
-
def __str__(self):
|
157
|
-
if self.tid and self.lemma and self.pos:
|
158
|
-
return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
|
159
|
-
else:
|
160
|
-
return 'None'
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|