opener-opinion-detector-base 2.0.1 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/core/python-scripts/README.md +78 -3
- data/core/python-scripts/classify_kaf_naf_file.py +94 -94
- data/core/python-scripts/models.cfg +1 -0
- data/core/python-scripts/scripts/config_manager.py +3 -0
- data/core/python-scripts/scripts/extract_features.py +0 -3
- data/core/python-scripts/scripts/relation_classifier.py +1 -1
- data/core/vendor/src/crfsuite/crfsuite.sln +42 -42
- data/core/vendor/src/liblbfgs/lbfgs.sln +26 -26
- data/ext/hack/Rakefile +5 -2
- data/lib/opener/opinion_detectors/base.rb +19 -15
- data/lib/opener/opinion_detectors/base/version.rb +1 -1
- data/lib/opener/opinion_detectors/configuration_creator.rb +6 -8
- data/lib/opener/opinion_detectors/de.rb +1 -1
- data/lib/opener/opinion_detectors/es.rb +7 -0
- data/lib/opener/opinion_detectors/fr.rb +7 -0
- data/opener-opinion-detector-base.gemspec +0 -1
- data/pre_install_requirements.txt +3 -0
- metadata +41 -85
- data/core/packages/KafNafParser-1.4.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.5.tar.gz +0 -0
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/SOURCES.txt +0 -22
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/installed-files.txt +0 -47
- data/core/site-packages/pre_build/KafNafParser-1.4-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -390
- data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -78
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -309
- data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -131
- data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -211
- data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -23
- data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
- data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
- data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -99
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/SOURCES.txt +0 -14
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/installed-files.txt +0 -23
- data/core/site-packages/pre_build/VUA_pylib-1.5-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/pre_build_requirements.txt +0 -3
@@ -1,14 +0,0 @@
|
|
1
|
-
from KafNafParserMod import *
|
2
|
-
from header_data import *
|
3
|
-
from external_references_data import *
|
4
|
-
from span_data import *
|
5
|
-
from term_data import *
|
6
|
-
from term_sentiment_data import *
|
7
|
-
from text_data import *
|
8
|
-
from entity_data import *
|
9
|
-
from features_data import *
|
10
|
-
from opinion_data import *
|
11
|
-
from dependency_data import *
|
12
|
-
from constituency_data import *
|
13
|
-
from references_data import *
|
14
|
-
from coreference_data import *
|
@@ -1,125 +0,0 @@
|
|
1
|
-
from lxml import etree
|
2
|
-
from lxml.objectify import dump
|
3
|
-
from span_data import Cspan
|
4
|
-
|
5
|
-
|
6
|
-
class Cnonterminal:
|
7
|
-
def __init__(self,node=None):
|
8
|
-
if node is None:
|
9
|
-
self.node = etree.Element('nt')
|
10
|
-
else:
|
11
|
-
self.node = node
|
12
|
-
|
13
|
-
def get_id(self):
|
14
|
-
return self.node.get('id')
|
15
|
-
|
16
|
-
def get_label(self):
|
17
|
-
return self.node.get('label')
|
18
|
-
|
19
|
-
def __str__(self):
|
20
|
-
return dump(self.node)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
class Cterminal:
|
25
|
-
def __init__(self,node=None):
|
26
|
-
if node is None:
|
27
|
-
self.node = etree.Element('t')
|
28
|
-
else:
|
29
|
-
self.node = node
|
30
|
-
|
31
|
-
def get_id(self):
|
32
|
-
return self.node.get('id')
|
33
|
-
|
34
|
-
def get_span(self):
|
35
|
-
span_node = self.node.find('span')
|
36
|
-
return Cspan(span_node)
|
37
|
-
|
38
|
-
def __str__(self):
|
39
|
-
return dump(self.node)
|
40
|
-
|
41
|
-
class Cedge:
|
42
|
-
def __init__(self,node=None):
|
43
|
-
if node is None:
|
44
|
-
self.node = etree.Element('edge')
|
45
|
-
else:
|
46
|
-
self.node = node
|
47
|
-
|
48
|
-
def __str__(self):
|
49
|
-
return dump(self.node)
|
50
|
-
|
51
|
-
def get_from(self):
|
52
|
-
return self.node.get('from')
|
53
|
-
|
54
|
-
def get_to(self):
|
55
|
-
return self.node.get('to')
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
class Ctree:
|
60
|
-
def __init__(self,node=None):
|
61
|
-
if node is None:
|
62
|
-
self.node = etree.Element('tree')
|
63
|
-
else:
|
64
|
-
self.node = node
|
65
|
-
|
66
|
-
|
67
|
-
def __str__(self):
|
68
|
-
return dump(self.node)
|
69
|
-
|
70
|
-
## Fore getting non terminals
|
71
|
-
def __get_nt_nodes(self):
|
72
|
-
for nt_node in self.node.findall('nt'):
|
73
|
-
yield nt_node
|
74
|
-
|
75
|
-
def get_non_terminals(self):
|
76
|
-
for nt_node in self.__get_nt_nodes():
|
77
|
-
yield Cnonterminal(nt_node)
|
78
|
-
##################################
|
79
|
-
|
80
|
-
## Fore getting terminals
|
81
|
-
def __get_t_nodes(self):
|
82
|
-
for t_node in self.node.findall('t'):
|
83
|
-
yield t_node
|
84
|
-
|
85
|
-
def get_terminals(self):
|
86
|
-
for t_node in self.__get_t_nodes():
|
87
|
-
yield Cterminal(t_node)
|
88
|
-
##################################
|
89
|
-
|
90
|
-
## Fore getting edges
|
91
|
-
def __get_edge_nodes(self):
|
92
|
-
for t_node in self.node.findall('edge'):
|
93
|
-
yield t_node
|
94
|
-
|
95
|
-
def get_edges(self):
|
96
|
-
for edge_node in self.__get_edge_nodes():
|
97
|
-
yield Cedge(edge_node)
|
98
|
-
##################################
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
class Cconstituency:
|
103
|
-
def __init__(self,node=None):
|
104
|
-
self.type = 'NAF/NAF'
|
105
|
-
if node is None:
|
106
|
-
self.node = etree.Element('constituency')
|
107
|
-
else:
|
108
|
-
self.node = node
|
109
|
-
|
110
|
-
def to_kaf(self):
|
111
|
-
pass
|
112
|
-
|
113
|
-
def to_naf(self):
|
114
|
-
pass
|
115
|
-
|
116
|
-
def __get_tree_nodes(self):
|
117
|
-
for tree_node in self.node.findall('tree'):
|
118
|
-
yield tree_node
|
119
|
-
|
120
|
-
def get_trees(self):
|
121
|
-
for tree_node in self.__get_tree_nodes():
|
122
|
-
yield Ctree(tree_node)
|
123
|
-
|
124
|
-
def __str__(self):
|
125
|
-
return dump(self.node)
|
@@ -1,52 +0,0 @@
|
|
1
|
-
from lxml import etree
|
2
|
-
from span_data import Cspan
|
3
|
-
|
4
|
-
class Ccoreference:
|
5
|
-
def __init__(self,node=None,type='NAF'):
|
6
|
-
self.type = type
|
7
|
-
if node is None:
|
8
|
-
self.node = etree.Element('coref')
|
9
|
-
else:
|
10
|
-
self.node = node
|
11
|
-
|
12
|
-
def get_id(self):
|
13
|
-
if self.type == 'NAF':
|
14
|
-
return self.node.get('id')
|
15
|
-
elif self.type == 'KAF':
|
16
|
-
return self.node.get('coid')
|
17
|
-
|
18
|
-
def get_spans(self):
|
19
|
-
for node_span in self.node.findall('span'):
|
20
|
-
yield Cspan(node_span)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
class Ccoreferences:
|
25
|
-
def __init__(self,node=None, type='NAF'):
|
26
|
-
self.type = type
|
27
|
-
if node is None:
|
28
|
-
self.node = etree.Element('coreferences')
|
29
|
-
else:
|
30
|
-
self.node = node
|
31
|
-
|
32
|
-
def __get_corefs_nodes(self):
|
33
|
-
for coref_node in self.node.findall('coref'):
|
34
|
-
yield coref_node
|
35
|
-
|
36
|
-
def get_corefs(self):
|
37
|
-
for coref_node in self.__get_corefs_nodes():
|
38
|
-
yield Ccoreference(coref_node,self.type)
|
39
|
-
|
40
|
-
def to_kaf(self):
|
41
|
-
if self.type == 'NAF':
|
42
|
-
for node_coref in self.__get_corefs_nodes():
|
43
|
-
node_coref.set('coid',node_coref.get('id'))
|
44
|
-
del node_coref.attrib['id']
|
45
|
-
|
46
|
-
def to_naf(self):
|
47
|
-
if self.type == 'KAF':
|
48
|
-
for node_coref in self.__get_corefs_nodes():
|
49
|
-
node_coref.set('id',node_coref.get('coid'))
|
50
|
-
del node_coref.attrib['coid']
|
51
|
-
|
52
|
-
|
@@ -1,78 +0,0 @@
|
|
1
|
-
from lxml import etree
|
2
|
-
#from lxml.objectify import dump
|
3
|
-
|
4
|
-
|
5
|
-
class Cdependency:
|
6
|
-
def __init__(self,node=None):
|
7
|
-
if node is None:
|
8
|
-
self.node = etree.Element('dep')
|
9
|
-
else:
|
10
|
-
self.node = node
|
11
|
-
|
12
|
-
def get_node_comment(self):
|
13
|
-
return self.node_comment
|
14
|
-
|
15
|
-
def get_node(self):
|
16
|
-
return self.node
|
17
|
-
|
18
|
-
def get_from(self):
|
19
|
-
return self.node.get('from')
|
20
|
-
|
21
|
-
def get_to(self):
|
22
|
-
return self.node.get('to')
|
23
|
-
|
24
|
-
def get_function(self):
|
25
|
-
return self.node.get('rfunc')
|
26
|
-
|
27
|
-
def set_from(self, f):
|
28
|
-
self.node.set('from',f)
|
29
|
-
|
30
|
-
def set_to(self,t):
|
31
|
-
self.node.set('to',t)
|
32
|
-
|
33
|
-
def set_function(self,f):
|
34
|
-
self.node.set('rfunc',f)
|
35
|
-
|
36
|
-
|
37
|
-
def set_comment(self,c):
|
38
|
-
c = c.replace('--','- -')
|
39
|
-
self.node.insert(0,etree.Comment(c) )
|
40
|
-
|
41
|
-
|
42
|
-
def __str__(self):
|
43
|
-
return dump(self.node)
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
class Cdependencies:
|
48
|
-
def __init__(self,node=None):
|
49
|
-
if node is None:
|
50
|
-
self.node = etree.Element('deps')
|
51
|
-
else:
|
52
|
-
self.node = node
|
53
|
-
|
54
|
-
def get_node(self):
|
55
|
-
return self.node
|
56
|
-
|
57
|
-
def to_kaf(self):
|
58
|
-
pass
|
59
|
-
|
60
|
-
def to_naf(self):
|
61
|
-
pass
|
62
|
-
|
63
|
-
def __str__(self):
|
64
|
-
return dump(self.node)
|
65
|
-
|
66
|
-
|
67
|
-
def __get_node_deps(self):
|
68
|
-
for node_dep in self.node.findall('dep'):
|
69
|
-
yield node_dep
|
70
|
-
|
71
|
-
def get_dependencies(self):
|
72
|
-
for node in self.__get_node_deps():
|
73
|
-
yield Cdependency(node)
|
74
|
-
|
75
|
-
|
76
|
-
def add_dependency(self,my_dep):
|
77
|
-
self.node.append(my_dep.get_node())
|
78
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
## Modified for KAF NAF adaptation
|
2
|
-
from lxml import etree
|
3
|
-
from lxml.objectify import dump
|
4
|
-
from references_data import *
|
5
|
-
|
6
|
-
|
7
|
-
class Centity:
|
8
|
-
def __init__(self,node=None,type='NAF'):
|
9
|
-
self.type = type
|
10
|
-
if node is None:
|
11
|
-
self.node = etree.Element('entity')
|
12
|
-
else:
|
13
|
-
self.node = node
|
14
|
-
|
15
|
-
def get_id(self):
|
16
|
-
if self.type == 'NAF':
|
17
|
-
return self.node.get('id')
|
18
|
-
elif self.type == 'KAF':
|
19
|
-
return self.node.get('eid')
|
20
|
-
|
21
|
-
def get_type(self):
|
22
|
-
return self.node.get('type')
|
23
|
-
|
24
|
-
def get_references(self):
|
25
|
-
for ref_node in self.node.findall('references'):
|
26
|
-
yield Creferences(ref_node)
|
27
|
-
|
28
|
-
class Centities:
|
29
|
-
def __init__(self,node=None,type='NAF'):
|
30
|
-
self.type = type
|
31
|
-
if node is None:
|
32
|
-
self.node = etree.Element('entities')
|
33
|
-
else:
|
34
|
-
self.node = node
|
35
|
-
|
36
|
-
|
37
|
-
def to_kaf(self):
|
38
|
-
if self.type == 'NAF':
|
39
|
-
for node in self.__get_entity_nodes():
|
40
|
-
node.set('eid',node.get('id'))
|
41
|
-
del node.attrib['id']
|
42
|
-
|
43
|
-
def to_naf(self):
|
44
|
-
if self.type == 'KAF':
|
45
|
-
for node in self.__get_entity_nodes():
|
46
|
-
node.set('id',node.get('eid'))
|
47
|
-
del node.attrib['eid']
|
48
|
-
|
49
|
-
def __get_entity_nodes(self):
|
50
|
-
for ent_node in self.node.findall('entity'):
|
51
|
-
yield ent_node
|
52
|
-
|
53
|
-
def __iter__(self):
|
54
|
-
for ent_node in self.__get_entity_nodes():
|
55
|
-
yield Centity(ent_node,self.type)
|
56
|
-
|
57
|
-
|
58
|
-
def __str__(self):
|
59
|
-
return dump(self.node)
|
@@ -1,41 +0,0 @@
|
|
1
|
-
# included modification for KAF/NAF
|
2
|
-
from term_sentiment_data import Cterm_sentiment
|
3
|
-
from lxml import etree
|
4
|
-
|
5
|
-
class CexternalReference:
|
6
|
-
def __init__(self,node=None):
|
7
|
-
self.type= 'NAF/KAF'
|
8
|
-
#self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
|
9
|
-
if node is None:
|
10
|
-
self.node = etree.Element('externalRef')
|
11
|
-
else:
|
12
|
-
self.node = node
|
13
|
-
|
14
|
-
def get_node(self):
|
15
|
-
return self.node
|
16
|
-
|
17
|
-
def set_resource(self,resource):
|
18
|
-
self.node.set('resource',resource)
|
19
|
-
|
20
|
-
def set_confidence(self,confidence):
|
21
|
-
self.node.set('confidence',confidence)
|
22
|
-
|
23
|
-
def set_reference(self,reference):
|
24
|
-
self.node.set('reference',reference)
|
25
|
-
|
26
|
-
|
27
|
-
class CexternalReferences:
|
28
|
-
def __init__(self,node=None):
|
29
|
-
if node is None:
|
30
|
-
self.node = etree.Element('externalReferences')
|
31
|
-
else:
|
32
|
-
self.node = node
|
33
|
-
|
34
|
-
def add_external_reference(self,ext_ref):
|
35
|
-
self.node.append(ext_ref.get_node())
|
36
|
-
|
37
|
-
def get_node(self):
|
38
|
-
return self.node
|
39
|
-
|
40
|
-
|
41
|
-
|
@@ -1,205 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
from operator import itemgetter
|
4
|
-
|
5
|
-
'''
|
6
|
-
Extract information from the contituent layer from a NAF file
|
7
|
-
'''
|
8
|
-
|
9
|
-
class Cconstituency_extractor:
|
10
|
-
def __init__(self,knaf_obj):
|
11
|
-
self.naf = knaf_obj
|
12
|
-
#Extract terminals, non terminals and edges
|
13
|
-
## Extracted directly from
|
14
|
-
self.terminals = {} #terminal id --> list term ids
|
15
|
-
self.terminal_for_term = {} #term id --> terminal id
|
16
|
-
self.label_for_nonter = {} # nonter --> label
|
17
|
-
self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
|
18
|
-
|
19
|
-
self.extract_info_from_naf(knaf_obj)
|
20
|
-
|
21
|
-
#Extracting all posible paths from leave to root for each terminal id
|
22
|
-
self.paths_for_terminal= {}
|
23
|
-
for terminal_id in self.terminals.keys():
|
24
|
-
paths = self.__expand_node(terminal_id,False)
|
25
|
-
self.paths_for_terminal[terminal_id] = paths
|
26
|
-
#######################################
|
27
|
-
|
28
|
-
## Create, for each non terminal, which are the terminals subsumed
|
29
|
-
self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
|
30
|
-
for terminal_id, span_terms in self.terminals.items():
|
31
|
-
for path in self.paths_for_terminal[terminal_id]:
|
32
|
-
for nonter in path:
|
33
|
-
if nonter not in self.terms_subsumed_by_nonter:
|
34
|
-
self.terms_subsumed_by_nonter[nonter] = set()
|
35
|
-
for termid in span_terms:
|
36
|
-
self.terms_subsumed_by_nonter[nonter].add(termid)
|
37
|
-
|
38
|
-
## To print the paths calculated
|
39
|
-
# for terminal in self.terminals.keys():
|
40
|
-
# print terminal
|
41
|
-
# for path in self.paths_for_terminal[terminal]:
|
42
|
-
# sep=' '
|
43
|
-
# for node in path:
|
44
|
-
# print sep,node,self.label_for_nonter.get(node,'?')
|
45
|
-
# sep+=' '
|
46
|
-
# print '#'*20
|
47
|
-
|
48
|
-
|
49
|
-
def get_deepest_phrases(self):
|
50
|
-
all_nonter = set()
|
51
|
-
for terminal in self.terminals.keys():
|
52
|
-
for path in self.paths_for_terminal[terminal]:
|
53
|
-
first_non_ter_phrase = path[1]
|
54
|
-
all_nonter.add(first_non_ter_phrase)
|
55
|
-
|
56
|
-
ter_for_nonter = {}
|
57
|
-
for nonter in all_nonter:
|
58
|
-
for terminal in self.terminals.keys():
|
59
|
-
for path in self.paths_for_terminal[terminal]:
|
60
|
-
if nonter in path:
|
61
|
-
if nonter in ter_for_nonter:
|
62
|
-
ter_for_nonter[nonter].append(terminal)
|
63
|
-
else:
|
64
|
-
ter_for_nonter[nonter] = [terminal]
|
65
|
-
|
66
|
-
visited = set()
|
67
|
-
for nonter, list_term in ter_for_nonter.items():
|
68
|
-
for ter in list_term:
|
69
|
-
|
70
|
-
visited.add(ter)
|
71
|
-
|
72
|
-
|
73
|
-
### Returns the label of the deepest phrase for the term id (termid as in the term layer)
|
74
|
-
def get_deepest_phrase_for_termid(self,termid):
|
75
|
-
terminal_id = self.terminal_for_term.get(termid)
|
76
|
-
label = None
|
77
|
-
subsumed = []
|
78
|
-
if terminal_id is not None:
|
79
|
-
first_path = self.paths_for_terminal[terminal_id][0]
|
80
|
-
first_phrase_id = first_path[1]
|
81
|
-
label = self.label_for_nonter.get(first_phrase_id)
|
82
|
-
subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
|
83
|
-
return label,sorted(list(subsumed))
|
84
|
-
|
85
|
-
|
86
|
-
def get_least_common_subsumer(self,from_tid,to_tid):
|
87
|
-
termid_from = self.terminal_for_term.get(from_tid)
|
88
|
-
termid_to = self.terminal_for_term.get(to_tid)
|
89
|
-
|
90
|
-
path_from = self.paths_for_terminal[termid_from][0]
|
91
|
-
path_to = self.paths_for_terminal[termid_to][0]
|
92
|
-
common_nodes = set(path_from) & set(path_to)
|
93
|
-
if len(common_nodes) == 0:
|
94
|
-
return None
|
95
|
-
else:
|
96
|
-
indexes = []
|
97
|
-
for common_node in common_nodes:
|
98
|
-
index1 = path_from.index(common_node)
|
99
|
-
index2 = path_to.index(common_node)
|
100
|
-
indexes.append((common_node,index1+index2))
|
101
|
-
indexes.sort(key=itemgetter(1))
|
102
|
-
shortest_common = indexes[0][0]
|
103
|
-
return shortest_common
|
104
|
-
|
105
|
-
|
106
|
-
def get_path_from_to(self,from_tid, to_tid):
|
107
|
-
shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
|
108
|
-
|
109
|
-
#print 'From:',self.naf.get_term(from_tid).get_lemma()
|
110
|
-
#print 'To:',self.naf.get_term(to_tid).get_lemma()
|
111
|
-
termid_from = self.terminal_for_term.get(from_tid)
|
112
|
-
termid_to = self.terminal_for_term.get(to_tid)
|
113
|
-
|
114
|
-
path_from = self.paths_for_terminal[termid_from][0]
|
115
|
-
path_to = self.paths_for_terminal[termid_to][0]
|
116
|
-
|
117
|
-
if shortest_subsumer is None:
|
118
|
-
return None
|
119
|
-
|
120
|
-
complete_path = []
|
121
|
-
for node in path_from:
|
122
|
-
complete_path.append(node)
|
123
|
-
if node == shortest_subsumer: break
|
124
|
-
|
125
|
-
begin=False
|
126
|
-
for node in path_to[-1::-1]:
|
127
|
-
if begin:
|
128
|
-
complete_path.append(node)
|
129
|
-
|
130
|
-
if node==shortest_subsumer:
|
131
|
-
begin=True
|
132
|
-
labels = [self.label_for_nonter[nonter] for nonter in complete_path]
|
133
|
-
return labels
|
134
|
-
|
135
|
-
|
136
|
-
def get_path_for_termid(self,termid):
|
137
|
-
terminal_id = self.terminal_for_term.get(termid)
|
138
|
-
paths = self.paths_for_terminal[terminal_id]
|
139
|
-
labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
|
140
|
-
return labels
|
141
|
-
|
142
|
-
def extract_info_from_naf(self,knaf_obj):
|
143
|
-
## Generated internally
|
144
|
-
# For each terminal node, a list of paths through all the edges
|
145
|
-
self.paths_for_terminal = {}
|
146
|
-
for tree in knaf_obj.get_trees():
|
147
|
-
for terminal in tree.get_terminals():
|
148
|
-
ter_id = terminal.get_id()
|
149
|
-
span_ids = terminal.get_span().get_span_ids()
|
150
|
-
self.terminals[ter_id] = span_ids
|
151
|
-
for this_id in span_ids:
|
152
|
-
self.terminal_for_term[this_id] = ter_id
|
153
|
-
|
154
|
-
|
155
|
-
for non_terminal in tree.get_non_terminals():
|
156
|
-
nonter_id = non_terminal.get_id()
|
157
|
-
label = non_terminal.get_label()
|
158
|
-
self.label_for_nonter[nonter_id] = label
|
159
|
-
|
160
|
-
|
161
|
-
for edge in tree.get_edges():
|
162
|
-
node_from = edge.get_from()
|
163
|
-
node_to = edge.get_to()
|
164
|
-
if node_from not in self.reachable_from:
|
165
|
-
self.reachable_from[node_from] = [node_to]
|
166
|
-
else:
|
167
|
-
self.reachable_from[node_from].append(node_to)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
##Recursive function
|
172
|
-
## Propagates the node through all the relations extracte from the edges information
|
173
|
-
## It returns a list of lists, one for each path
|
174
|
-
## Include_this_node is used for avoiding the first node
|
175
|
-
def __expand_node(self,node,include_this_node=True):
|
176
|
-
paths = []
|
177
|
-
possible_nodes = self.reachable_from.get(node,[])
|
178
|
-
if len(possible_nodes) == 0:
|
179
|
-
return [[node]]
|
180
|
-
else:
|
181
|
-
for possible_node in possible_nodes:
|
182
|
-
new_paths = self.__expand_node(possible_node)
|
183
|
-
for path in new_paths:
|
184
|
-
if include_this_node:
|
185
|
-
path.insert(0,node)
|
186
|
-
paths.append(path)
|
187
|
-
return paths
|
188
|
-
|
189
|
-
def get_chunks(self,chunk_type):
|
190
|
-
for nonter,this_type in self.label_for_nonter.items():
|
191
|
-
if this_type == chunk_type:
|
192
|
-
subsumed = self.terms_subsumed_by_nonter.get(nonter)
|
193
|
-
if subsumed is not None:
|
194
|
-
yield sorted(list(subsumed))
|
195
|
-
|
196
|
-
def get_all_chunks_for_term(self,termid):
|
197
|
-
terminal_id = self.terminal_for_term.get(termid)
|
198
|
-
paths = self.paths_for_terminal[terminal_id]
|
199
|
-
for path in paths:
|
200
|
-
for node in path:
|
201
|
-
this_type = self.label_for_nonter[node]
|
202
|
-
subsumed = self.terms_subsumed_by_nonter.get(node)
|
203
|
-
if subsumed is not None:
|
204
|
-
yield this_type,sorted(list(subsumed))
|
205
|
-
|