opener-kaf-naf-parser 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +67 -8
- data/bin/kaf-naf-parser-daemon +10 -0
- data/core/kaf-naf-parser.py +5 -5
- data/exec/kaf-naf-parser.rb +9 -0
- data/ext/hack/Rakefile +13 -0
- data/lib/opener/kaf_naf_parser/version.rb +1 -1
- data/opener-kaf-naf-parser.gemspec +5 -1
- data/pre_install_requirements.txt +3 -0
- metadata +37 -51
- data/core/packages/KafNafParser-1.2.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.3.tar.gz +0 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -338
- data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -80
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -300
- data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -71
- data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -200
- data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -15
- data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
- data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
- data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -90
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/SOURCES.txt +0 -22
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/installed-files.txt +0 -47
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/SOURCES.txt +0 -14
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/installed-files.txt +0 -23
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
- data/pre_build_requirements.txt +0 -3
@@ -1,121 +0,0 @@
|
|
1
|
-
from operator import itemgetter
|
2
|
-
import sys
|
3
|
-
import cPickle
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
class Cexample:
|
8
|
-
def __init__(self,str_line=None):
|
9
|
-
self.label = ''
|
10
|
-
self.features = []
|
11
|
-
if str_line is not None:
|
12
|
-
self.load_from_line(str_line)
|
13
|
-
|
14
|
-
def load_from_line(self,str_line):
|
15
|
-
fields = str_line.strip().split('\t')
|
16
|
-
self.label = fields[0]
|
17
|
-
for feat in fields[1:]:
|
18
|
-
first_equal = feat.find('=')
|
19
|
-
if first_equal != -1:
|
20
|
-
name = feat[:first_equal]
|
21
|
-
value = feat[first_equal+1:]
|
22
|
-
self.features.append((name,value))
|
23
|
-
|
24
|
-
def __str__(self):
|
25
|
-
s = 'Label: '+self.label+'\n'
|
26
|
-
s += 'Feats: '+str(self.features)
|
27
|
-
return s
|
28
|
-
|
29
|
-
def get_label(self):
|
30
|
-
return self.label
|
31
|
-
|
32
|
-
def get_features(self):
|
33
|
-
for name,value in self.features:
|
34
|
-
yield name,value
|
35
|
-
|
36
|
-
def get_all_features(self):
|
37
|
-
return self.features
|
38
|
-
|
39
|
-
|
40
|
-
class Cfeature_index:
|
41
|
-
def __init__(self):
|
42
|
-
self.idx = {}
|
43
|
-
|
44
|
-
def get_number_feat(self,feat):
|
45
|
-
return self.idx.get(feat,None)
|
46
|
-
|
47
|
-
def add_feat(self,feat):
|
48
|
-
num_feat = len(self.idx)+1
|
49
|
-
self.idx[feat] = num_feat
|
50
|
-
return num_feat
|
51
|
-
|
52
|
-
|
53
|
-
def compose_feat(self,name,value):
|
54
|
-
return name+'###'+value
|
55
|
-
|
56
|
-
|
57
|
-
def __encode_features(self,feats,modify_index=True):
|
58
|
-
feats_for_example = {}
|
59
|
-
clean_feats = ''
|
60
|
-
for name, value in feats:
|
61
|
-
my_feat = self.compose_feat(name, value)
|
62
|
-
clean_feats+=my_feat+' '
|
63
|
-
num_feat = self.get_number_feat(my_feat)
|
64
|
-
if num_feat is None:
|
65
|
-
if modify_index:
|
66
|
-
num_feat = self.add_feat(my_feat)
|
67
|
-
|
68
|
-
if num_feat is not None:
|
69
|
-
if num_feat in feats_for_example:
|
70
|
-
feats_for_example[num_feat] += 1
|
71
|
-
else:
|
72
|
-
feats_for_example[num_feat] = 1
|
73
|
-
return sorted(feats_for_example.items(),key=itemgetter(0)),clean_feats
|
74
|
-
|
75
|
-
|
76
|
-
def encode_feature_file_to_svm(self,feat_file_obj,out_fic=sys.stdout):
|
77
|
-
for example in feat_file_obj:
|
78
|
-
class_label = example.get_label()
|
79
|
-
out_fic.write(class_label)
|
80
|
-
feats_for_example, clean_feats =self.__encode_features(example.get_all_features())
|
81
|
-
|
82
|
-
for feat,freq_feat in feats_for_example:
|
83
|
-
value = freq_feat
|
84
|
-
out_fic.write(' %d:%d' % (feat,value))
|
85
|
-
out_fic.write(' #'+clean_feats+'\n')
|
86
|
-
|
87
|
-
def encode_example_for_classification(self, feats,out_fic,my_class='0'):
|
88
|
-
feats_for_example, clean_feats =self.__encode_features(feats,modify_index=False)
|
89
|
-
out_fic.write(my_class)
|
90
|
-
for feat,freq_feat in feats_for_example:
|
91
|
-
value = freq_feat
|
92
|
-
out_fic.write(' %d:%d' % (feat,value))
|
93
|
-
out_fic.write(' #'+clean_feats.encode('utf-8')+'\n')
|
94
|
-
|
95
|
-
def save_to_file(self,filename):
|
96
|
-
fic = open(filename,'wb')
|
97
|
-
cPickle.dump(self.idx, fic, protocol=0)
|
98
|
-
fic.close()
|
99
|
-
|
100
|
-
def load_from_file(self,filename):
|
101
|
-
fic = open(filename,'rb')
|
102
|
-
self.idx = cPickle.load(fic)
|
103
|
-
fic.close()
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
class Cfeature_file:
|
108
|
-
def __init__(self,filename=None):
|
109
|
-
self.filename = filename
|
110
|
-
|
111
|
-
def __iter__(self):
|
112
|
-
if self.filename is not None:
|
113
|
-
fic = open(self.filename,'r')
|
114
|
-
for line in fic:
|
115
|
-
if line[0] != '#':
|
116
|
-
yield Cexample(line)
|
117
|
-
fic.close()
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
@@ -1 +0,0 @@
|
|
1
|
-
from lexicon import *
|
@@ -1,72 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
import os
|
4
|
-
import re
|
5
|
-
from VUA_pylib.common import normalize_pos
|
6
|
-
|
7
|
-
__this_folder__ = os.path.dirname(os.path.realpath(__file__))
|
8
|
-
|
9
|
-
class MPQA_subjectivity_lexicon:
|
10
|
-
def __init__(self):
|
11
|
-
self.__filename=os.path.join(__this_folder__,'data','subjclueslen1-HLTEMNLP05.tff')
|
12
|
-
self.stemmed = {}
|
13
|
-
self.stemmed_anypos = {}
|
14
|
-
self.no_stemmed = {}
|
15
|
-
self.no_stemmed_anypos = {}
|
16
|
-
|
17
|
-
self.__load()
|
18
|
-
|
19
|
-
def __load(self):
|
20
|
-
# Format of lines:
|
21
|
-
# type=weaksubj len=1 word1=abandoned pos1=adj stemmed1=n priorpolarity=negative
|
22
|
-
fic = open(self.__filename)
|
23
|
-
for line in fic:
|
24
|
-
line=line.strip()+' '
|
25
|
-
this_type = re.findall('type=([^ ]+)', line)[0]
|
26
|
-
word = re.findall('word1=([^ ]+)', line)[0]
|
27
|
-
pos = re.findall('pos1=([^ ]+)', line)[0]
|
28
|
-
stemmed = re.findall('stemmed1=([^ ]+)', line)[0]
|
29
|
-
prior_polarity = re.findall('priorpolarity=([^ ]+)', line)[0]
|
30
|
-
pos = normalize_pos(pos)
|
31
|
-
if stemmed == 'y':
|
32
|
-
self.stemmed[(word,pos)] = (this_type,prior_polarity)
|
33
|
-
if True or pos == '*': #anypos
|
34
|
-
self.stemmed_anypos[word] = (this_type,prior_polarity)
|
35
|
-
|
36
|
-
elif stemmed == 'n':
|
37
|
-
self.no_stemmed[(word,pos)] = (this_type,prior_polarity)
|
38
|
-
if True or pos == '*':
|
39
|
-
self.no_stemmed_anypos[word] = (this_type,prior_polarity)
|
40
|
-
|
41
|
-
fic.close()
|
42
|
-
|
43
|
-
def get_type_and_polarity(self,word,pos=None):
|
44
|
-
res = None
|
45
|
-
if pos is not None:
|
46
|
-
pos = normalize_pos(pos)
|
47
|
-
|
48
|
-
# Try no stemmed with the given pos
|
49
|
-
res = self.no_stemmed.get((word,pos))
|
50
|
-
|
51
|
-
# Try stemmed with the given pos
|
52
|
-
if res is None:
|
53
|
-
res = self.stemmed.get((word,pos))
|
54
|
-
|
55
|
-
# Try no stemmed with any pos
|
56
|
-
if res is None:
|
57
|
-
res = self.no_stemmed_anypos.get(word)
|
58
|
-
|
59
|
-
# Try stemm with any pos
|
60
|
-
if res is None:
|
61
|
-
res = self.stemmed_anypos.get(word)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
return res
|
66
|
-
|
67
|
-
|
68
|
-
if __name__ == '__main__':
|
69
|
-
o = MPQA_subjectivity_lexicon()
|
70
|
-
print o.get_type_and_polarity('abidance','adj')
|
71
|
-
|
72
|
-
|
@@ -1,10 +0,0 @@
|
|
1
|
-
Metadata-Version: 1.0
|
2
|
-
Name: VUA-pylib
|
3
|
-
Version: 1.3
|
4
|
-
Summary: Various KAF / NAF python helpers
|
5
|
-
Home-page: https://github.com/cltl/VUA_pylib
|
6
|
-
Author: Ruben Izquierdo
|
7
|
-
Author-email: r.izquierdobevia@vu.nl
|
8
|
-
License: UNKNOWN
|
9
|
-
Description: UNKNOWN
|
10
|
-
Platform: UNKNOWN
|
@@ -1,14 +0,0 @@
|
|
1
|
-
README
|
2
|
-
VUA_pylib/__init__.py
|
3
|
-
VUA_pylib.egg-info/PKG-INFO
|
4
|
-
VUA_pylib.egg-info/SOURCES.txt
|
5
|
-
VUA_pylib.egg-info/dependency_links.txt
|
6
|
-
VUA_pylib.egg-info/top_level.txt
|
7
|
-
VUA_pylib/common/__init__.py
|
8
|
-
VUA_pylib/common/common.py
|
9
|
-
VUA_pylib/corpus_reader/__init__.py
|
10
|
-
VUA_pylib/corpus_reader/google_web_nl.py
|
11
|
-
VUA_pylib/io_utils/__init__.py
|
12
|
-
VUA_pylib/io_utils/feature_file.py
|
13
|
-
VUA_pylib/lexicon/__init__.py
|
14
|
-
VUA_pylib/lexicon/lexicon.py
|
@@ -1 +0,0 @@
|
|
1
|
-
|
@@ -1,23 +0,0 @@
|
|
1
|
-
../VUA_pylib/__init__.py
|
2
|
-
../VUA_pylib/lexicon/lexicon.py
|
3
|
-
../VUA_pylib/lexicon/__init__.py
|
4
|
-
../VUA_pylib/common/common.py
|
5
|
-
../VUA_pylib/common/__init__.py
|
6
|
-
../VUA_pylib/io_utils/feature_file.py
|
7
|
-
../VUA_pylib/io_utils/__init__.py
|
8
|
-
../VUA_pylib/corpus_reader/google_web_nl.py
|
9
|
-
../VUA_pylib/corpus_reader/__init__.py
|
10
|
-
../VUA_pylib/__init__.pyc
|
11
|
-
../VUA_pylib/lexicon/lexicon.pyc
|
12
|
-
../VUA_pylib/lexicon/__init__.pyc
|
13
|
-
../VUA_pylib/common/common.pyc
|
14
|
-
../VUA_pylib/common/__init__.pyc
|
15
|
-
../VUA_pylib/io_utils/feature_file.pyc
|
16
|
-
../VUA_pylib/io_utils/__init__.pyc
|
17
|
-
../VUA_pylib/corpus_reader/google_web_nl.pyc
|
18
|
-
../VUA_pylib/corpus_reader/__init__.pyc
|
19
|
-
./
|
20
|
-
SOURCES.txt
|
21
|
-
dependency_links.txt
|
22
|
-
top_level.txt
|
23
|
-
PKG-INFO
|
@@ -1 +0,0 @@
|
|
1
|
-
VUA_pylib
|
@@ -1,165 +0,0 @@
|
|
1
|
-
class KafTermSentiment:
|
2
|
-
def __init__(self):
|
3
|
-
self.resource=None
|
4
|
-
self.polarity=None
|
5
|
-
self.strength=None
|
6
|
-
self.subjectivity=None
|
7
|
-
|
8
|
-
def simpleInit(self,r,p,st,su,sm=None):
|
9
|
-
self.resource=r
|
10
|
-
self.polarity=p
|
11
|
-
self.strength=st
|
12
|
-
self.subjectivity=su
|
13
|
-
self.sentiment_modifier = sm
|
14
|
-
|
15
|
-
def getPolarity(self):
|
16
|
-
return self.polarity
|
17
|
-
|
18
|
-
def getSentimentModifier(self):
|
19
|
-
return self.sentiment_modifier
|
20
|
-
|
21
|
-
|
22
|
-
class KafToken:
|
23
|
-
def __init__(self,wid, value, sent=None, para=None):
|
24
|
-
self.token_id = wid
|
25
|
-
self.value = value
|
26
|
-
self.sent = sent
|
27
|
-
self.para = para
|
28
|
-
|
29
|
-
|
30
|
-
class KafOpinionExpression:
|
31
|
-
def __init__(self,polarity,strength,targets):
|
32
|
-
self.polarity = polarity
|
33
|
-
self.strength = strength
|
34
|
-
self.targets = targets
|
35
|
-
|
36
|
-
def __str__(self):
|
37
|
-
return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
|
38
|
-
|
39
|
-
class KafOpinion:
|
40
|
-
def __init__(self,id,holders, targets, opi_exp):
|
41
|
-
self.id = id
|
42
|
-
self.holders = holders
|
43
|
-
self.targets = targets
|
44
|
-
self.opi_exp = opi_exp
|
45
|
-
|
46
|
-
def __str__(self):
|
47
|
-
c='Opinion id'+self.id+'\n'
|
48
|
-
c+=' Holders: '+'-'.join(self.holders)+'\n'
|
49
|
-
c+=' Targets: '+'-'.join(self.targets)+'\n'
|
50
|
-
c+=str(self.opi_exp)
|
51
|
-
return c
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
class KafSingleProperty:
|
56
|
-
def __init__(self,id,type,targets):
|
57
|
-
self.id = id
|
58
|
-
self.type = type
|
59
|
-
self.targets = targets
|
60
|
-
|
61
|
-
|
62
|
-
def get_id(self):
|
63
|
-
return self.id
|
64
|
-
|
65
|
-
def get_type(self):
|
66
|
-
return self.type
|
67
|
-
|
68
|
-
def get_span(self):
|
69
|
-
return self.targets
|
70
|
-
|
71
|
-
def __str__(self):
|
72
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
73
|
-
|
74
|
-
|
75
|
-
class KafSingleEntity:
|
76
|
-
def __init__(self,id,type,targets):
|
77
|
-
self.id = id
|
78
|
-
self.type = type
|
79
|
-
self.targets = targets
|
80
|
-
|
81
|
-
def get_id(self):
|
82
|
-
return self.id
|
83
|
-
|
84
|
-
def get_type(self):
|
85
|
-
return self.type
|
86
|
-
|
87
|
-
def get_span(self):
|
88
|
-
return self.targets
|
89
|
-
|
90
|
-
def __str__(self):
|
91
|
-
return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
|
92
|
-
|
93
|
-
class KafTerm:
|
94
|
-
def __init__(self):
|
95
|
-
self.tid = None
|
96
|
-
self.lemma = None
|
97
|
-
self.pos = None
|
98
|
-
self.morphofeat = None
|
99
|
-
self.sentiment = None
|
100
|
-
self.list_span_id = []
|
101
|
-
|
102
|
-
def get_morphofeat(self):
|
103
|
-
return self.morphofeat
|
104
|
-
|
105
|
-
def set_list_span_id(self, L):
|
106
|
-
self.list_span_id = L
|
107
|
-
|
108
|
-
def get_list_span(self):
|
109
|
-
return self.list_span_id
|
110
|
-
|
111
|
-
def get_polarity(self):
|
112
|
-
if self.sentiment != None:
|
113
|
-
return self.sentiment.getPolarity()
|
114
|
-
else:
|
115
|
-
return None
|
116
|
-
|
117
|
-
def get_sentiment_modifier(self):
|
118
|
-
if self.sentiment != None:
|
119
|
-
return self.sentiment.getSentimentModifier()
|
120
|
-
else:
|
121
|
-
return None
|
122
|
-
|
123
|
-
|
124
|
-
def setSentiment(self,my_sent):
|
125
|
-
self.sentiment = my_sent
|
126
|
-
|
127
|
-
def getSentiment(self):
|
128
|
-
return self.sentiment
|
129
|
-
|
130
|
-
def getLemma(self):
|
131
|
-
return self.lemma
|
132
|
-
|
133
|
-
def setLemma(self,lemma):
|
134
|
-
self.lemma = lemma
|
135
|
-
|
136
|
-
def getPos(self):
|
137
|
-
return self.pos
|
138
|
-
|
139
|
-
def setPos(self,pos):
|
140
|
-
self.pos = pos
|
141
|
-
|
142
|
-
def getId(self):
|
143
|
-
return self.tid
|
144
|
-
|
145
|
-
def setId(self,id):
|
146
|
-
self.tid = id
|
147
|
-
|
148
|
-
def getShortPos(self):
|
149
|
-
if self.pos==None:
|
150
|
-
return None
|
151
|
-
auxpos=self.pos.lower()[0]
|
152
|
-
if auxpos == 'g': auxpos='a'
|
153
|
-
elif auxpos == 'a': auxpos='r'
|
154
|
-
return auxpos
|
155
|
-
|
156
|
-
def __str__(self):
|
157
|
-
if self.tid and self.lemma and self.pos:
|
158
|
-
return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
|
159
|
-
else:
|
160
|
-
return 'None'
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|