opener-kaf-naf-parser 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +67 -8
- data/bin/kaf-naf-parser-daemon +10 -0
- data/core/kaf-naf-parser.py +5 -5
- data/exec/kaf-naf-parser.rb +9 -0
- data/ext/hack/Rakefile +13 -0
- data/lib/opener/kaf_naf_parser/version.rb +1 -1
- data/opener-kaf-naf-parser.gemspec +5 -1
- data/pre_install_requirements.txt +3 -0
- metadata +37 -51
- data/core/packages/KafNafParser-1.2.tar.gz +0 -0
- data/core/packages/VUA_pylib-1.3.tar.gz +0 -0
- data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -338
- data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
- data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
- data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
- data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -80
- data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
- data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
- data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -300
- data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -71
- data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
- data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -200
- data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -15
- data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
- data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
- data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
- data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -90
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/SOURCES.txt +0 -22
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/installed-files.txt +0 -47
- data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
- data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
- data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
- data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/SOURCES.txt +0 -14
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/installed-files.txt +0 -23
- data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/top_level.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
- data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
- data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
- data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
- data/pre_build_requirements.txt +0 -3
@@ -1,125 +0,0 @@
|
|
1
|
-
from lxml import etree
|
2
|
-
from lxml.objectify import dump
|
3
|
-
from span_data import Cspan
|
4
|
-
|
5
|
-
|
6
|
-
class Cnonterminal:
|
7
|
-
def __init__(self,node=None):
|
8
|
-
if node is None:
|
9
|
-
self.node = etree.Element('nt')
|
10
|
-
else:
|
11
|
-
self.node = node
|
12
|
-
|
13
|
-
def get_id(self):
|
14
|
-
return self.node.get('id')
|
15
|
-
|
16
|
-
def get_label(self):
|
17
|
-
return self.node.get('label')
|
18
|
-
|
19
|
-
def __str__(self):
|
20
|
-
return dump(self.node)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
class Cterminal:
|
25
|
-
def __init__(self,node=None):
|
26
|
-
if node is None:
|
27
|
-
self.node = etree.Element('t')
|
28
|
-
else:
|
29
|
-
self.node = node
|
30
|
-
|
31
|
-
def get_id(self):
|
32
|
-
return self.node.get('id')
|
33
|
-
|
34
|
-
def get_span(self):
|
35
|
-
span_node = self.node.find('span')
|
36
|
-
return Cspan(span_node)
|
37
|
-
|
38
|
-
def __str__(self):
|
39
|
-
return dump(self.node)
|
40
|
-
|
41
|
-
class Cedge:
|
42
|
-
def __init__(self,node=None):
|
43
|
-
if node is None:
|
44
|
-
self.node = etree.Element('edge')
|
45
|
-
else:
|
46
|
-
self.node = node
|
47
|
-
|
48
|
-
def __str__(self):
|
49
|
-
return dump(self.node)
|
50
|
-
|
51
|
-
def get_from(self):
|
52
|
-
return self.node.get('from')
|
53
|
-
|
54
|
-
def get_to(self):
|
55
|
-
return self.node.get('to')
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
class Ctree:
|
60
|
-
def __init__(self,node=None):
|
61
|
-
if node is None:
|
62
|
-
self.node = etree.Element('tree')
|
63
|
-
else:
|
64
|
-
self.node = node
|
65
|
-
|
66
|
-
|
67
|
-
def __str__(self):
|
68
|
-
return dump(self.node)
|
69
|
-
|
70
|
-
## Fore getting non terminals
|
71
|
-
def __get_nt_nodes(self):
|
72
|
-
for nt_node in self.node.findall('nt'):
|
73
|
-
yield nt_node
|
74
|
-
|
75
|
-
def get_non_terminals(self):
|
76
|
-
for nt_node in self.__get_nt_nodes():
|
77
|
-
yield Cnonterminal(nt_node)
|
78
|
-
##################################
|
79
|
-
|
80
|
-
## Fore getting terminals
|
81
|
-
def __get_t_nodes(self):
|
82
|
-
for t_node in self.node.findall('t'):
|
83
|
-
yield t_node
|
84
|
-
|
85
|
-
def get_terminals(self):
|
86
|
-
for t_node in self.__get_t_nodes():
|
87
|
-
yield Cterminal(t_node)
|
88
|
-
##################################
|
89
|
-
|
90
|
-
## Fore getting edges
|
91
|
-
def __get_edge_nodes(self):
|
92
|
-
for t_node in self.node.findall('edge'):
|
93
|
-
yield t_node
|
94
|
-
|
95
|
-
def get_edges(self):
|
96
|
-
for edge_node in self.__get_edge_nodes():
|
97
|
-
yield Cedge(edge_node)
|
98
|
-
##################################
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
class Cconstituency:
|
103
|
-
def __init__(self,node=None):
|
104
|
-
self.type = 'NAF/NAF'
|
105
|
-
if node is None:
|
106
|
-
self.node = etree.Element('constituency')
|
107
|
-
else:
|
108
|
-
self.node = node
|
109
|
-
|
110
|
-
def to_kaf(self):
|
111
|
-
pass
|
112
|
-
|
113
|
-
def to_naf(self):
|
114
|
-
pass
|
115
|
-
|
116
|
-
def __get_tree_nodes(self):
|
117
|
-
for tree_node in self.node.findall('tree'):
|
118
|
-
yield tree_node
|
119
|
-
|
120
|
-
def get_trees(self):
|
121
|
-
for tree_node in self.__get_tree_nodes():
|
122
|
-
yield Ctree(tree_node)
|
123
|
-
|
124
|
-
def __str__(self):
|
125
|
-
return dump(self.node)
|
@@ -1,52 +0,0 @@
|
|
1
|
-
from lxml import etree
|
2
|
-
from span_data import Cspan
|
3
|
-
|
4
|
-
class Ccoreference:
|
5
|
-
def __init__(self,node=None,type='NAF'):
|
6
|
-
self.type = type
|
7
|
-
if node is None:
|
8
|
-
self.node = etree.Element('coref')
|
9
|
-
else:
|
10
|
-
self.node = node
|
11
|
-
|
12
|
-
def get_id(self):
|
13
|
-
if self.type == 'NAF':
|
14
|
-
return self.node.get('id')
|
15
|
-
elif self.type == 'KAF':
|
16
|
-
return self.node.get('coid')
|
17
|
-
|
18
|
-
def get_spans(self):
|
19
|
-
for node_span in self.node.findall('span'):
|
20
|
-
yield Cspan(node_span)
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
class Ccoreferences:
|
25
|
-
def __init__(self,node=None, type='NAF'):
|
26
|
-
self.type = type
|
27
|
-
if node is None:
|
28
|
-
self.node = etree.Element('coreferences')
|
29
|
-
else:
|
30
|
-
self.node = node
|
31
|
-
|
32
|
-
def __get_corefs_nodes(self):
|
33
|
-
for coref_node in self.node.findall('coref'):
|
34
|
-
yield coref_node
|
35
|
-
|
36
|
-
def get_corefs(self):
|
37
|
-
for coref_node in self.__get_corefs_nodes():
|
38
|
-
yield Ccoreference(coref_node,self.type)
|
39
|
-
|
40
|
-
def to_kaf(self):
|
41
|
-
if self.type == 'NAF':
|
42
|
-
for node_coref in self.__get_corefs_nodes():
|
43
|
-
node_coref.set('coid',node_coref.get('id'))
|
44
|
-
del node_coref.attrib['id']
|
45
|
-
|
46
|
-
def to_naf(self):
|
47
|
-
if self.type == 'KAF':
|
48
|
-
for node_coref in self.__get_corefs_nodes():
|
49
|
-
node_coref.set('id',node_coref.get('coid'))
|
50
|
-
del node_coref.attrib['coid']
|
51
|
-
|
52
|
-
|
@@ -1,80 +0,0 @@
|
|
1
|
-
from lxml import etree
|
2
|
-
from lxml.objectify import dump
|
3
|
-
|
4
|
-
|
5
|
-
class Cdependency:
|
6
|
-
def __init__(self,node=None):
|
7
|
-
self.node_comment = None
|
8
|
-
if node is None:
|
9
|
-
self.node = etree.Element('dep')
|
10
|
-
else:
|
11
|
-
self.node = node
|
12
|
-
|
13
|
-
def get_node_comment(self):
|
14
|
-
return self.node_comment
|
15
|
-
|
16
|
-
def get_node(self):
|
17
|
-
return self.node
|
18
|
-
|
19
|
-
def get_from(self):
|
20
|
-
return self.node.get('from')
|
21
|
-
|
22
|
-
def get_to(self):
|
23
|
-
return self.node.get('to')
|
24
|
-
|
25
|
-
def get_function(self):
|
26
|
-
return self.node.get('rfunc')
|
27
|
-
|
28
|
-
def set_from(self, f):
|
29
|
-
self.node.set('from',f)
|
30
|
-
|
31
|
-
def set_to(self,t):
|
32
|
-
self.node.set('to',t)
|
33
|
-
|
34
|
-
def set_function(self,f):
|
35
|
-
self.node.set('rfunc',f)
|
36
|
-
|
37
|
-
def set_comment(self,str_comment):
|
38
|
-
self.node_comment = etree.Comment(str_comment.replace('--','- -'))
|
39
|
-
|
40
|
-
|
41
|
-
def __str__(self):
|
42
|
-
return dump(self.node)
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
class Cdependencies:
|
47
|
-
def __init__(self,node=None):
|
48
|
-
if node is None:
|
49
|
-
self.node = etree.Element('deps')
|
50
|
-
else:
|
51
|
-
self.node = node
|
52
|
-
|
53
|
-
def get_node(self):
|
54
|
-
return self.node
|
55
|
-
|
56
|
-
def to_kaf(self):
|
57
|
-
pass
|
58
|
-
|
59
|
-
def to_naf(self):
|
60
|
-
pass
|
61
|
-
|
62
|
-
def __str__(self):
|
63
|
-
return dump(self.node)
|
64
|
-
|
65
|
-
|
66
|
-
def __get_node_deps(self):
|
67
|
-
for node_dep in self.node.findall('dep'):
|
68
|
-
yield node_dep
|
69
|
-
|
70
|
-
def get_dependencies(self):
|
71
|
-
for node in self.__get_node_deps():
|
72
|
-
yield Cdependency(node)
|
73
|
-
|
74
|
-
|
75
|
-
def add_dependency(self,my_dep):
|
76
|
-
node_comment = my_dep.get_node_comment()
|
77
|
-
if node_comment is not None:
|
78
|
-
self.node.append(node_comment)
|
79
|
-
self.node.append(my_dep.get_node())
|
80
|
-
|
@@ -1,59 +0,0 @@
|
|
1
|
-
## Modified for KAF NAF adaptation
|
2
|
-
from lxml import etree
|
3
|
-
from lxml.objectify import dump
|
4
|
-
from references_data import *
|
5
|
-
|
6
|
-
|
7
|
-
class Centity:
|
8
|
-
def __init__(self,node=None,type='NAF'):
|
9
|
-
self.type = type
|
10
|
-
if node is None:
|
11
|
-
self.node = etree.Element('entity')
|
12
|
-
else:
|
13
|
-
self.node = node
|
14
|
-
|
15
|
-
def get_id(self):
|
16
|
-
if self.type == 'NAF':
|
17
|
-
return self.node.get('id')
|
18
|
-
elif self.type == 'KAF':
|
19
|
-
return self.node.get('eid')
|
20
|
-
|
21
|
-
def get_type(self):
|
22
|
-
return self.node.get('type')
|
23
|
-
|
24
|
-
def get_references(self):
|
25
|
-
for ref_node in self.node.findall('references'):
|
26
|
-
yield Creferences(ref_node)
|
27
|
-
|
28
|
-
class Centities:
|
29
|
-
def __init__(self,node=None,type='NAF'):
|
30
|
-
self.type = type
|
31
|
-
if node is None:
|
32
|
-
self.node = etree.Element('entities')
|
33
|
-
else:
|
34
|
-
self.node = node
|
35
|
-
|
36
|
-
|
37
|
-
def to_kaf(self):
|
38
|
-
if self.type == 'NAF':
|
39
|
-
for node in self.__get_entity_nodes():
|
40
|
-
node.set('eid',node.get('id'))
|
41
|
-
del node.attrib['id']
|
42
|
-
|
43
|
-
def to_naf(self):
|
44
|
-
if self.type == 'KAF':
|
45
|
-
for node in self.__get_entity_nodes():
|
46
|
-
node.set('id',node.get('eid'))
|
47
|
-
del node.attrib['eid']
|
48
|
-
|
49
|
-
def __get_entity_nodes(self):
|
50
|
-
for ent_node in self.node.findall('entity'):
|
51
|
-
yield ent_node
|
52
|
-
|
53
|
-
def __iter__(self):
|
54
|
-
for ent_node in self.__get_entity_nodes():
|
55
|
-
yield Centity(ent_node,self.type)
|
56
|
-
|
57
|
-
|
58
|
-
def __str__(self):
|
59
|
-
return dump(self.node)
|
@@ -1,41 +0,0 @@
|
|
1
|
-
# included modification for KAF/NAF
|
2
|
-
from term_sentiment_data import Cterm_sentiment
|
3
|
-
from lxml import etree
|
4
|
-
|
5
|
-
class CexternalReference:
|
6
|
-
def __init__(self,node=None):
|
7
|
-
self.type= 'NAF/KAF'
|
8
|
-
#self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
|
9
|
-
if node is None:
|
10
|
-
self.node = etree.Element('externalRef')
|
11
|
-
else:
|
12
|
-
self.node = node
|
13
|
-
|
14
|
-
def get_node(self):
|
15
|
-
return self.node
|
16
|
-
|
17
|
-
def set_resource(self,resource):
|
18
|
-
self.node.set('resource',resource)
|
19
|
-
|
20
|
-
def set_confidence(self,confidence):
|
21
|
-
self.node.set('confidence',confidence)
|
22
|
-
|
23
|
-
def set_reference(self,reference):
|
24
|
-
self.node.set('reference',reference)
|
25
|
-
|
26
|
-
|
27
|
-
class CexternalReferences:
|
28
|
-
def __init__(self,node=None):
|
29
|
-
if node is None:
|
30
|
-
self.node = etree.Element('externalReferences')
|
31
|
-
else:
|
32
|
-
self.node = node
|
33
|
-
|
34
|
-
def add_external_reference(self,ext_ref):
|
35
|
-
self.node.append(ext_ref.get_node())
|
36
|
-
|
37
|
-
def get_node(self):
|
38
|
-
return self.node
|
39
|
-
|
40
|
-
|
41
|
-
|
@@ -1,205 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
from operator import itemgetter
|
4
|
-
|
5
|
-
'''
|
6
|
-
Extract information from the contituent layer from a NAF file
|
7
|
-
'''
|
8
|
-
|
9
|
-
class Cconstituency_extractor:
|
10
|
-
def __init__(self,knaf_obj):
|
11
|
-
self.naf = knaf_obj
|
12
|
-
#Extract terminals, non terminals and edges
|
13
|
-
## Extracted directly from
|
14
|
-
self.terminals = {} #terminal id --> list term ids
|
15
|
-
self.terminal_for_term = {} #term id --> terminal id
|
16
|
-
self.label_for_nonter = {} # nonter --> label
|
17
|
-
self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
|
18
|
-
|
19
|
-
self.extract_info_from_naf(knaf_obj)
|
20
|
-
|
21
|
-
#Extracting all posible paths from leave to root for each terminal id
|
22
|
-
self.paths_for_terminal= {}
|
23
|
-
for terminal_id in self.terminals.keys():
|
24
|
-
paths = self.__expand_node(terminal_id,False)
|
25
|
-
self.paths_for_terminal[terminal_id] = paths
|
26
|
-
#######################################
|
27
|
-
|
28
|
-
## Create, for each non terminal, which are the terminals subsumed
|
29
|
-
self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
|
30
|
-
for terminal_id, span_terms in self.terminals.items():
|
31
|
-
for path in self.paths_for_terminal[terminal_id]:
|
32
|
-
for nonter in path:
|
33
|
-
if nonter not in self.terms_subsumed_by_nonter:
|
34
|
-
self.terms_subsumed_by_nonter[nonter] = set()
|
35
|
-
for termid in span_terms:
|
36
|
-
self.terms_subsumed_by_nonter[nonter].add(termid)
|
37
|
-
|
38
|
-
## To print the paths calculated
|
39
|
-
# for terminal in self.terminals.keys():
|
40
|
-
# print terminal
|
41
|
-
# for path in self.paths_for_terminal[terminal]:
|
42
|
-
# sep=' '
|
43
|
-
# for node in path:
|
44
|
-
# print sep,node,self.label_for_nonter.get(node,'?')
|
45
|
-
# sep+=' '
|
46
|
-
# print '#'*20
|
47
|
-
|
48
|
-
|
49
|
-
def get_deepest_phrases(self):
|
50
|
-
all_nonter = set()
|
51
|
-
for terminal in self.terminals.keys():
|
52
|
-
for path in self.paths_for_terminal[terminal]:
|
53
|
-
first_non_ter_phrase = path[1]
|
54
|
-
all_nonter.add(first_non_ter_phrase)
|
55
|
-
|
56
|
-
ter_for_nonter = {}
|
57
|
-
for nonter in all_nonter:
|
58
|
-
for terminal in self.terminals.keys():
|
59
|
-
for path in self.paths_for_terminal[terminal]:
|
60
|
-
if nonter in path:
|
61
|
-
if nonter in ter_for_nonter:
|
62
|
-
ter_for_nonter[nonter].append(terminal)
|
63
|
-
else:
|
64
|
-
ter_for_nonter[nonter] = [terminal]
|
65
|
-
|
66
|
-
visited = set()
|
67
|
-
for nonter, list_term in ter_for_nonter.items():
|
68
|
-
for ter in list_term:
|
69
|
-
|
70
|
-
visited.add(ter)
|
71
|
-
|
72
|
-
|
73
|
-
### Returns the label of the deepest phrase for the term id (termid as in the term layer)
|
74
|
-
def get_deepest_phrase_for_termid(self,termid):
|
75
|
-
terminal_id = self.terminal_for_term.get(termid)
|
76
|
-
label = None
|
77
|
-
subsumed = []
|
78
|
-
if terminal_id is not None:
|
79
|
-
first_path = self.paths_for_terminal[terminal_id][0]
|
80
|
-
first_phrase_id = first_path[1]
|
81
|
-
label = self.label_for_nonter.get(first_phrase_id)
|
82
|
-
subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
|
83
|
-
return label,sorted(list(subsumed))
|
84
|
-
|
85
|
-
|
86
|
-
def get_least_common_subsumer(self,from_tid,to_tid):
|
87
|
-
termid_from = self.terminal_for_term.get(from_tid)
|
88
|
-
termid_to = self.terminal_for_term.get(to_tid)
|
89
|
-
|
90
|
-
path_from = self.paths_for_terminal[termid_from][0]
|
91
|
-
path_to = self.paths_for_terminal[termid_to][0]
|
92
|
-
common_nodes = set(path_from) & set(path_to)
|
93
|
-
if len(common_nodes) == 0:
|
94
|
-
return None
|
95
|
-
else:
|
96
|
-
indexes = []
|
97
|
-
for common_node in common_nodes:
|
98
|
-
index1 = path_from.index(common_node)
|
99
|
-
index2 = path_to.index(common_node)
|
100
|
-
indexes.append((common_node,index1+index2))
|
101
|
-
indexes.sort(key=itemgetter(1))
|
102
|
-
shortest_common = indexes[0][0]
|
103
|
-
return shortest_common
|
104
|
-
|
105
|
-
|
106
|
-
def get_path_from_to(self,from_tid, to_tid):
|
107
|
-
shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
|
108
|
-
|
109
|
-
#print 'From:',self.naf.get_term(from_tid).get_lemma()
|
110
|
-
#print 'To:',self.naf.get_term(to_tid).get_lemma()
|
111
|
-
termid_from = self.terminal_for_term.get(from_tid)
|
112
|
-
termid_to = self.terminal_for_term.get(to_tid)
|
113
|
-
|
114
|
-
path_from = self.paths_for_terminal[termid_from][0]
|
115
|
-
path_to = self.paths_for_terminal[termid_to][0]
|
116
|
-
|
117
|
-
if shortest_subsumer is None:
|
118
|
-
return None
|
119
|
-
|
120
|
-
complete_path = []
|
121
|
-
for node in path_from:
|
122
|
-
complete_path.append(node)
|
123
|
-
if node == shortest_subsumer: break
|
124
|
-
|
125
|
-
begin=False
|
126
|
-
for node in path_to[-1::-1]:
|
127
|
-
if begin:
|
128
|
-
complete_path.append(node)
|
129
|
-
|
130
|
-
if node==shortest_subsumer:
|
131
|
-
begin=True
|
132
|
-
labels = [self.label_for_nonter[nonter] for nonter in complete_path]
|
133
|
-
return labels
|
134
|
-
|
135
|
-
|
136
|
-
def get_path_for_termid(self,termid):
|
137
|
-
terminal_id = self.terminal_for_term.get(termid)
|
138
|
-
paths = self.paths_for_terminal[terminal_id]
|
139
|
-
labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
|
140
|
-
return labels
|
141
|
-
|
142
|
-
def extract_info_from_naf(self,knaf_obj):
|
143
|
-
## Generated internally
|
144
|
-
# For each terminal node, a list of paths through all the edges
|
145
|
-
self.paths_for_terminal = {}
|
146
|
-
for tree in knaf_obj.get_trees():
|
147
|
-
for terminal in tree.get_terminals():
|
148
|
-
ter_id = terminal.get_id()
|
149
|
-
span_ids = terminal.get_span().get_span_ids()
|
150
|
-
self.terminals[ter_id] = span_ids
|
151
|
-
for this_id in span_ids:
|
152
|
-
self.terminal_for_term[this_id] = ter_id
|
153
|
-
|
154
|
-
|
155
|
-
for non_terminal in tree.get_non_terminals():
|
156
|
-
nonter_id = non_terminal.get_id()
|
157
|
-
label = non_terminal.get_label()
|
158
|
-
self.label_for_nonter[nonter_id] = label
|
159
|
-
|
160
|
-
|
161
|
-
for edge in tree.get_edges():
|
162
|
-
node_from = edge.get_from()
|
163
|
-
node_to = edge.get_to()
|
164
|
-
if node_from not in self.reachable_from:
|
165
|
-
self.reachable_from[node_from] = [node_to]
|
166
|
-
else:
|
167
|
-
self.reachable_from[node_from].append(node_to)
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
##Recursive function
|
172
|
-
## Propagates the node through all the relations extracte from the edges information
|
173
|
-
## It returns a list of lists, one for each path
|
174
|
-
## Include_this_node is used for avoiding the first node
|
175
|
-
def __expand_node(self,node,include_this_node=True):
|
176
|
-
paths = []
|
177
|
-
possible_nodes = self.reachable_from.get(node,[])
|
178
|
-
if len(possible_nodes) == 0:
|
179
|
-
return [[node]]
|
180
|
-
else:
|
181
|
-
for possible_node in possible_nodes:
|
182
|
-
new_paths = self.__expand_node(possible_node)
|
183
|
-
for path in new_paths:
|
184
|
-
if include_this_node:
|
185
|
-
path.insert(0,node)
|
186
|
-
paths.append(path)
|
187
|
-
return paths
|
188
|
-
|
189
|
-
def get_chunks(self,chunk_type):
|
190
|
-
for nonter,this_type in self.label_for_nonter.items():
|
191
|
-
if this_type == chunk_type:
|
192
|
-
subsumed = self.terms_subsumed_by_nonter.get(nonter)
|
193
|
-
if subsumed is not None:
|
194
|
-
yield sorted(list(subsumed))
|
195
|
-
|
196
|
-
def get_all_chunks_for_term(self,termid):
|
197
|
-
terminal_id = self.terminal_for_term.get(termid)
|
198
|
-
paths = self.paths_for_terminal[terminal_id]
|
199
|
-
for path in paths:
|
200
|
-
for node in path:
|
201
|
-
this_type = self.label_for_nonter[node]
|
202
|
-
subsumed = self.terms_subsumed_by_nonter.get(node)
|
203
|
-
if subsumed is not None:
|
204
|
-
yield this_type,sorted(list(subsumed))
|
205
|
-
|