opener-kaf-naf-parser 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (97) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +674 -0
  3. data/README.md +46 -0
  4. data/bin/kaf-naf-parser +8 -0
  5. data/bin/kaf-naf-parser-server +10 -0
  6. data/bin/kaf-to-naf +7 -0
  7. data/bin/naf-to-kaf +7 -0
  8. data/config.ru +4 -0
  9. data/core/kaf-naf-parser.py +42 -0
  10. data/core/packages/KafNafParser-1.2.tar.gz +0 -0
  11. data/core/packages/VUA_pylib-1.3.tar.gz +0 -0
  12. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/PKG-INFO +10 -0
  13. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/SOURCES.txt +22 -0
  14. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/dependency_links.txt +1 -0
  15. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/installed-files.txt +47 -0
  16. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/top_level.txt +1 -0
  17. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +338 -0
  18. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.pyc +0 -0
  19. data/core/site-packages/pre_build/KafNafParser/__init__.py +14 -0
  20. data/core/site-packages/pre_build/KafNafParser/__init__.pyc +0 -0
  21. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +125 -0
  22. data/core/site-packages/pre_build/KafNafParser/constituency_data.pyc +0 -0
  23. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +52 -0
  24. data/core/site-packages/pre_build/KafNafParser/coreference_data.pyc +0 -0
  25. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +80 -0
  26. data/core/site-packages/pre_build/KafNafParser/dependency_data.pyc +0 -0
  27. data/core/site-packages/pre_build/KafNafParser/entity_data.py +59 -0
  28. data/core/site-packages/pre_build/KafNafParser/entity_data.pyc +0 -0
  29. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +41 -0
  30. data/core/site-packages/pre_build/KafNafParser/external_references_data.pyc +0 -0
  31. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +2 -0
  32. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.pyc +0 -0
  33. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +205 -0
  34. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.pyc +0 -0
  35. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +300 -0
  36. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.pyc +0 -0
  37. data/core/site-packages/pre_build/KafNafParser/features_data.py +71 -0
  38. data/core/site-packages/pre_build/KafNafParser/features_data.pyc +0 -0
  39. data/core/site-packages/pre_build/KafNafParser/header_data.py +127 -0
  40. data/core/site-packages/pre_build/KafNafParser/header_data.pyc +0 -0
  41. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +200 -0
  42. data/core/site-packages/pre_build/KafNafParser/opinion_data.pyc +0 -0
  43. data/core/site-packages/pre_build/KafNafParser/references_data.py +15 -0
  44. data/core/site-packages/pre_build/KafNafParser/references_data.pyc +0 -0
  45. data/core/site-packages/pre_build/KafNafParser/span_data.py +63 -0
  46. data/core/site-packages/pre_build/KafNafParser/span_data.pyc +0 -0
  47. data/core/site-packages/pre_build/KafNafParser/term_data.py +111 -0
  48. data/core/site-packages/pre_build/KafNafParser/term_data.pyc +0 -0
  49. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +42 -0
  50. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.pyc +0 -0
  51. data/core/site-packages/pre_build/KafNafParser/text_data.py +90 -0
  52. data/core/site-packages/pre_build/KafNafParser/text_data.pyc +0 -0
  53. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/PKG-INFO +10 -0
  54. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/SOURCES.txt +14 -0
  55. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/dependency_links.txt +1 -0
  56. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/installed-files.txt +23 -0
  57. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/top_level.txt +1 -0
  58. data/core/site-packages/pre_build/VUA_pylib/__init__.py +1 -0
  59. data/core/site-packages/pre_build/VUA_pylib/__init__.pyc +0 -0
  60. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +1 -0
  61. data/core/site-packages/pre_build/VUA_pylib/common/__init__.pyc +0 -0
  62. data/core/site-packages/pre_build/VUA_pylib/common/common.py +28 -0
  63. data/core/site-packages/pre_build/VUA_pylib/common/common.pyc +0 -0
  64. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +1 -0
  65. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.pyc +0 -0
  66. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +156 -0
  67. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.pyc +0 -0
  68. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +1 -0
  69. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.pyc +0 -0
  70. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +121 -0
  71. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.pyc +0 -0
  72. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +1 -0
  73. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.pyc +0 -0
  74. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +72 -0
  75. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.pyc +0 -0
  76. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  77. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  78. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  79. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  80. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  81. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +165 -0
  82. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc +0 -0
  83. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +439 -0
  84. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc +0 -0
  85. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  86. data/core/site-packages/pre_build/VUKafParserPy/__init__.pyc +0 -0
  87. data/ext/hack/support.rb +38 -0
  88. data/lib/opener/kaf_naf_parser.rb +77 -0
  89. data/lib/opener/kaf_naf_parser/cli.rb +92 -0
  90. data/lib/opener/kaf_naf_parser/public/markdown.css +284 -0
  91. data/lib/opener/kaf_naf_parser/server.rb +16 -0
  92. data/lib/opener/kaf_naf_parser/version.rb +5 -0
  93. data/lib/opener/kaf_naf_parser/views/index.erb +103 -0
  94. data/lib/opener/kaf_naf_parser/views/result.erb +15 -0
  95. data/opener-kaf-naf-parser.gemspec +38 -0
  96. data/pre_build_requirements.txt +3 -0
  97. metadata +283 -0
@@ -0,0 +1,14 @@
1
+ from KafNafParserMod import *
2
+ from header_data import *
3
+ from external_references_data import *
4
+ from span_data import *
5
+ from term_data import *
6
+ from term_sentiment_data import *
7
+ from text_data import *
8
+ from entity_data import *
9
+ from features_data import *
10
+ from opinion_data import *
11
+ from dependency_data import *
12
+ from constituency_data import *
13
+ from references_data import *
14
+ from coreference_data import *
@@ -0,0 +1,125 @@
1
+ from lxml import etree
2
+ from lxml.objectify import dump
3
+ from span_data import Cspan
4
+
5
+
6
+ class Cnonterminal:
7
+ def __init__(self,node=None):
8
+ if node is None:
9
+ self.node = etree.Element('nt')
10
+ else:
11
+ self.node = node
12
+
13
+ def get_id(self):
14
+ return self.node.get('id')
15
+
16
+ def get_label(self):
17
+ return self.node.get('label')
18
+
19
+ def __str__(self):
20
+ return dump(self.node)
21
+
22
+
23
+
24
+ class Cterminal:
25
+ def __init__(self,node=None):
26
+ if node is None:
27
+ self.node = etree.Element('t')
28
+ else:
29
+ self.node = node
30
+
31
+ def get_id(self):
32
+ return self.node.get('id')
33
+
34
+ def get_span(self):
35
+ span_node = self.node.find('span')
36
+ return Cspan(span_node)
37
+
38
+ def __str__(self):
39
+ return dump(self.node)
40
+
41
+ class Cedge:
42
+ def __init__(self,node=None):
43
+ if node is None:
44
+ self.node = etree.Element('edge')
45
+ else:
46
+ self.node = node
47
+
48
+ def __str__(self):
49
+ return dump(self.node)
50
+
51
+ def get_from(self):
52
+ return self.node.get('from')
53
+
54
+ def get_to(self):
55
+ return self.node.get('to')
56
+
57
+
58
+
59
+ class Ctree:
60
+ def __init__(self,node=None):
61
+ if node is None:
62
+ self.node = etree.Element('tree')
63
+ else:
64
+ self.node = node
65
+
66
+
67
+ def __str__(self):
68
+ return dump(self.node)
69
+
70
+ ## Fore getting non terminals
71
+ def __get_nt_nodes(self):
72
+ for nt_node in self.node.findall('nt'):
73
+ yield nt_node
74
+
75
+ def get_non_terminals(self):
76
+ for nt_node in self.__get_nt_nodes():
77
+ yield Cnonterminal(nt_node)
78
+ ##################################
79
+
80
+ ## Fore getting terminals
81
+ def __get_t_nodes(self):
82
+ for t_node in self.node.findall('t'):
83
+ yield t_node
84
+
85
+ def get_terminals(self):
86
+ for t_node in self.__get_t_nodes():
87
+ yield Cterminal(t_node)
88
+ ##################################
89
+
90
+ ## Fore getting edges
91
+ def __get_edge_nodes(self):
92
+ for t_node in self.node.findall('edge'):
93
+ yield t_node
94
+
95
+ def get_edges(self):
96
+ for edge_node in self.__get_edge_nodes():
97
+ yield Cedge(edge_node)
98
+ ##################################
99
+
100
+
101
+
102
+ class Cconstituency:
103
+ def __init__(self,node=None):
104
+ self.type = 'NAF/NAF'
105
+ if node is None:
106
+ self.node = etree.Element('constituency')
107
+ else:
108
+ self.node = node
109
+
110
+ def to_kaf(self):
111
+ pass
112
+
113
+ def to_naf(self):
114
+ pass
115
+
116
+ def __get_tree_nodes(self):
117
+ for tree_node in self.node.findall('tree'):
118
+ yield tree_node
119
+
120
+ def get_trees(self):
121
+ for tree_node in self.__get_tree_nodes():
122
+ yield Ctree(tree_node)
123
+
124
+ def __str__(self):
125
+ return dump(self.node)
@@ -0,0 +1,52 @@
1
+ from lxml import etree
2
+ from span_data import Cspan
3
+
4
+ class Ccoreference:
5
+ def __init__(self,node=None,type='NAF'):
6
+ self.type = type
7
+ if node is None:
8
+ self.node = etree.Element('coref')
9
+ else:
10
+ self.node = node
11
+
12
+ def get_id(self):
13
+ if self.type == 'NAF':
14
+ return self.node.get('id')
15
+ elif self.type == 'KAF':
16
+ return self.node.get('coid')
17
+
18
+ def get_spans(self):
19
+ for node_span in self.node.findall('span'):
20
+ yield Cspan(node_span)
21
+
22
+
23
+
24
+ class Ccoreferences:
25
+ def __init__(self,node=None, type='NAF'):
26
+ self.type = type
27
+ if node is None:
28
+ self.node = etree.Element('coreferences')
29
+ else:
30
+ self.node = node
31
+
32
+ def __get_corefs_nodes(self):
33
+ for coref_node in self.node.findall('coref'):
34
+ yield coref_node
35
+
36
+ def get_corefs(self):
37
+ for coref_node in self.__get_corefs_nodes():
38
+ yield Ccoreference(coref_node,self.type)
39
+
40
+ def to_kaf(self):
41
+ if self.type == 'NAF':
42
+ for node_coref in self.__get_corefs_nodes():
43
+ node_coref.set('coid',node_coref.get('id'))
44
+ del node_coref.attrib['id']
45
+
46
+ def to_naf(self):
47
+ if self.type == 'KAF':
48
+ for node_coref in self.__get_corefs_nodes():
49
+ node_coref.set('id',node_coref.get('coid'))
50
+ del node_coref.attrib['coid']
51
+
52
+
@@ -0,0 +1,80 @@
1
+ from lxml import etree
2
+ from lxml.objectify import dump
3
+
4
+
5
+ class Cdependency:
6
+ def __init__(self,node=None):
7
+ self.node_comment = None
8
+ if node is None:
9
+ self.node = etree.Element('dep')
10
+ else:
11
+ self.node = node
12
+
13
+ def get_node_comment(self):
14
+ return self.node_comment
15
+
16
+ def get_node(self):
17
+ return self.node
18
+
19
+ def get_from(self):
20
+ return self.node.get('from')
21
+
22
+ def get_to(self):
23
+ return self.node.get('to')
24
+
25
+ def get_function(self):
26
+ return self.node.get('rfunc')
27
+
28
+ def set_from(self, f):
29
+ self.node.set('from',f)
30
+
31
+ def set_to(self,t):
32
+ self.node.set('to',t)
33
+
34
+ def set_function(self,f):
35
+ self.node.set('rfunc',f)
36
+
37
+ def set_comment(self,str_comment):
38
+ self.node_comment = etree.Comment(str_comment.replace('--','- -'))
39
+
40
+
41
+ def __str__(self):
42
+ return dump(self.node)
43
+
44
+
45
+
46
+ class Cdependencies:
47
+ def __init__(self,node=None):
48
+ if node is None:
49
+ self.node = etree.Element('deps')
50
+ else:
51
+ self.node = node
52
+
53
+ def get_node(self):
54
+ return self.node
55
+
56
+ def to_kaf(self):
57
+ pass
58
+
59
+ def to_naf(self):
60
+ pass
61
+
62
+ def __str__(self):
63
+ return dump(self.node)
64
+
65
+
66
+ def __get_node_deps(self):
67
+ for node_dep in self.node.findall('dep'):
68
+ yield node_dep
69
+
70
+ def get_dependencies(self):
71
+ for node in self.__get_node_deps():
72
+ yield Cdependency(node)
73
+
74
+
75
+ def add_dependency(self,my_dep):
76
+ node_comment = my_dep.get_node_comment()
77
+ if node_comment is not None:
78
+ self.node.append(node_comment)
79
+ self.node.append(my_dep.get_node())
80
+
@@ -0,0 +1,59 @@
1
+ ## Modified for KAF NAF adaptation
2
+ from lxml import etree
3
+ from lxml.objectify import dump
4
+ from references_data import *
5
+
6
+
7
+ class Centity:
8
+ def __init__(self,node=None,type='NAF'):
9
+ self.type = type
10
+ if node is None:
11
+ self.node = etree.Element('entity')
12
+ else:
13
+ self.node = node
14
+
15
+ def get_id(self):
16
+ if self.type == 'NAF':
17
+ return self.node.get('id')
18
+ elif self.type == 'KAF':
19
+ return self.node.get('eid')
20
+
21
+ def get_type(self):
22
+ return self.node.get('type')
23
+
24
+ def get_references(self):
25
+ for ref_node in self.node.findall('references'):
26
+ yield Creferences(ref_node)
27
+
28
+ class Centities:
29
+ def __init__(self,node=None,type='NAF'):
30
+ self.type = type
31
+ if node is None:
32
+ self.node = etree.Element('entities')
33
+ else:
34
+ self.node = node
35
+
36
+
37
+ def to_kaf(self):
38
+ if self.type == 'NAF':
39
+ for node in self.__get_entity_nodes():
40
+ node.set('eid',node.get('id'))
41
+ del node.attrib['id']
42
+
43
+ def to_naf(self):
44
+ if self.type == 'KAF':
45
+ for node in self.__get_entity_nodes():
46
+ node.set('id',node.get('eid'))
47
+ del node.attrib['eid']
48
+
49
+ def __get_entity_nodes(self):
50
+ for ent_node in self.node.findall('entity'):
51
+ yield ent_node
52
+
53
+ def __iter__(self):
54
+ for ent_node in self.__get_entity_nodes():
55
+ yield Centity(ent_node,self.type)
56
+
57
+
58
+ def __str__(self):
59
+ return dump(self.node)
@@ -0,0 +1,41 @@
1
+ # included modification for KAF/NAF
2
+ from term_sentiment_data import Cterm_sentiment
3
+ from lxml import etree
4
+
5
+ class CexternalReference:
6
+ def __init__(self,node=None):
7
+ self.type= 'NAF/KAF'
8
+ #self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
9
+ if node is None:
10
+ self.node = etree.Element('externalRef')
11
+ else:
12
+ self.node = node
13
+
14
+ def get_node(self):
15
+ return self.node
16
+
17
+ def set_resource(self,resource):
18
+ self.node.set('resource',resource)
19
+
20
+ def set_confidence(self,confidence):
21
+ self.node.set('confidence',confidence)
22
+
23
+ def set_reference(self,reference):
24
+ self.node.set('reference',reference)
25
+
26
+
27
+ class CexternalReferences:
28
+ def __init__(self,node=None):
29
+ if node is None:
30
+ self.node = etree.Element('externalReferences')
31
+ else:
32
+ self.node = node
33
+
34
+ def add_external_reference(self,ext_ref):
35
+ self.node.append(ext_ref.get_node())
36
+
37
+ def get_node(self):
38
+ return self.node
39
+
40
+
41
+
@@ -0,0 +1,2 @@
1
+ from dependency import *
2
+ from constituency import *
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env python
2
+
3
+ from operator import itemgetter
4
+
5
+ '''
6
+ Extract information from the contituent layer from a NAF file
7
+ '''
8
+
9
+ class Cconstituency_extractor:
10
+ def __init__(self,knaf_obj):
11
+ self.naf = knaf_obj
12
+ #Extract terminals, non terminals and edges
13
+ ## Extracted directly from
14
+ self.terminals = {} #terminal id --> list term ids
15
+ self.terminal_for_term = {} #term id --> terminal id
16
+ self.label_for_nonter = {} # nonter --> label
17
+ self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
18
+
19
+ self.extract_info_from_naf(knaf_obj)
20
+
21
+ #Extracting all posible paths from leave to root for each terminal id
22
+ self.paths_for_terminal= {}
23
+ for terminal_id in self.terminals.keys():
24
+ paths = self.__expand_node(terminal_id,False)
25
+ self.paths_for_terminal[terminal_id] = paths
26
+ #######################################
27
+
28
+ ## Create, for each non terminal, which are the terminals subsumed
29
+ self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
30
+ for terminal_id, span_terms in self.terminals.items():
31
+ for path in self.paths_for_terminal[terminal_id]:
32
+ for nonter in path:
33
+ if nonter not in self.terms_subsumed_by_nonter:
34
+ self.terms_subsumed_by_nonter[nonter] = set()
35
+ for termid in span_terms:
36
+ self.terms_subsumed_by_nonter[nonter].add(termid)
37
+
38
+ ## To print the paths calculated
39
+ # for terminal in self.terminals.keys():
40
+ # print terminal
41
+ # for path in self.paths_for_terminal[terminal]:
42
+ # sep=' '
43
+ # for node in path:
44
+ # print sep,node,self.label_for_nonter.get(node,'?')
45
+ # sep+=' '
46
+ # print '#'*20
47
+
48
+
49
+ def get_deepest_phrases(self):
50
+ all_nonter = set()
51
+ for terminal in self.terminals.keys():
52
+ for path in self.paths_for_terminal[terminal]:
53
+ first_non_ter_phrase = path[1]
54
+ all_nonter.add(first_non_ter_phrase)
55
+
56
+ ter_for_nonter = {}
57
+ for nonter in all_nonter:
58
+ for terminal in self.terminals.keys():
59
+ for path in self.paths_for_terminal[terminal]:
60
+ if nonter in path:
61
+ if nonter in ter_for_nonter:
62
+ ter_for_nonter[nonter].append(terminal)
63
+ else:
64
+ ter_for_nonter[nonter] = [terminal]
65
+
66
+ visited = set()
67
+ for nonter, list_term in ter_for_nonter.items():
68
+ for ter in list_term:
69
+
70
+ visited.add(ter)
71
+
72
+
73
+ ### Returns the label of the deepest phrase for the term id (termid as in the term layer)
74
+ def get_deepest_phrase_for_termid(self,termid):
75
+ terminal_id = self.terminal_for_term.get(termid)
76
+ label = None
77
+ subsumed = []
78
+ if terminal_id is not None:
79
+ first_path = self.paths_for_terminal[terminal_id][0]
80
+ first_phrase_id = first_path[1]
81
+ label = self.label_for_nonter.get(first_phrase_id)
82
+ subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
83
+ return label,sorted(list(subsumed))
84
+
85
+
86
+ def get_least_common_subsumer(self,from_tid,to_tid):
87
+ termid_from = self.terminal_for_term.get(from_tid)
88
+ termid_to = self.terminal_for_term.get(to_tid)
89
+
90
+ path_from = self.paths_for_terminal[termid_from][0]
91
+ path_to = self.paths_for_terminal[termid_to][0]
92
+ common_nodes = set(path_from) & set(path_to)
93
+ if len(common_nodes) == 0:
94
+ return None
95
+ else:
96
+ indexes = []
97
+ for common_node in common_nodes:
98
+ index1 = path_from.index(common_node)
99
+ index2 = path_to.index(common_node)
100
+ indexes.append((common_node,index1+index2))
101
+ indexes.sort(key=itemgetter(1))
102
+ shortest_common = indexes[0][0]
103
+ return shortest_common
104
+
105
+
106
+ def get_path_from_to(self,from_tid, to_tid):
107
+ shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
108
+
109
+ #print 'From:',self.naf.get_term(from_tid).get_lemma()
110
+ #print 'To:',self.naf.get_term(to_tid).get_lemma()
111
+ termid_from = self.terminal_for_term.get(from_tid)
112
+ termid_to = self.terminal_for_term.get(to_tid)
113
+
114
+ path_from = self.paths_for_terminal[termid_from][0]
115
+ path_to = self.paths_for_terminal[termid_to][0]
116
+
117
+ if shortest_subsumer is None:
118
+ return None
119
+
120
+ complete_path = []
121
+ for node in path_from:
122
+ complete_path.append(node)
123
+ if node == shortest_subsumer: break
124
+
125
+ begin=False
126
+ for node in path_to[-1::-1]:
127
+ if begin:
128
+ complete_path.append(node)
129
+
130
+ if node==shortest_subsumer:
131
+ begin=True
132
+ labels = [self.label_for_nonter[nonter] for nonter in complete_path]
133
+ return labels
134
+
135
+
136
+ def get_path_for_termid(self,termid):
137
+ terminal_id = self.terminal_for_term.get(termid)
138
+ paths = self.paths_for_terminal[terminal_id]
139
+ labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
140
+ return labels
141
+
142
+ def extract_info_from_naf(self,knaf_obj):
143
+ ## Generated internally
144
+ # For each terminal node, a list of paths through all the edges
145
+ self.paths_for_terminal = {}
146
+ for tree in knaf_obj.get_trees():
147
+ for terminal in tree.get_terminals():
148
+ ter_id = terminal.get_id()
149
+ span_ids = terminal.get_span().get_span_ids()
150
+ self.terminals[ter_id] = span_ids
151
+ for this_id in span_ids:
152
+ self.terminal_for_term[this_id] = ter_id
153
+
154
+
155
+ for non_terminal in tree.get_non_terminals():
156
+ nonter_id = non_terminal.get_id()
157
+ label = non_terminal.get_label()
158
+ self.label_for_nonter[nonter_id] = label
159
+
160
+
161
+ for edge in tree.get_edges():
162
+ node_from = edge.get_from()
163
+ node_to = edge.get_to()
164
+ if node_from not in self.reachable_from:
165
+ self.reachable_from[node_from] = [node_to]
166
+ else:
167
+ self.reachable_from[node_from].append(node_to)
168
+
169
+
170
+
171
+ ##Recursive function
172
+ ## Propagates the node through all the relations extracte from the edges information
173
+ ## It returns a list of lists, one for each path
174
+ ## Include_this_node is used for avoiding the first node
175
+ def __expand_node(self,node,include_this_node=True):
176
+ paths = []
177
+ possible_nodes = self.reachable_from.get(node,[])
178
+ if len(possible_nodes) == 0:
179
+ return [[node]]
180
+ else:
181
+ for possible_node in possible_nodes:
182
+ new_paths = self.__expand_node(possible_node)
183
+ for path in new_paths:
184
+ if include_this_node:
185
+ path.insert(0,node)
186
+ paths.append(path)
187
+ return paths
188
+
189
+ def get_chunks(self,chunk_type):
190
+ for nonter,this_type in self.label_for_nonter.items():
191
+ if this_type == chunk_type:
192
+ subsumed = self.terms_subsumed_by_nonter.get(nonter)
193
+ if subsumed is not None:
194
+ yield sorted(list(subsumed))
195
+
196
+ def get_all_chunks_for_term(self,termid):
197
+ terminal_id = self.terminal_for_term.get(termid)
198
+ paths = self.paths_for_terminal[terminal_id]
199
+ for path in paths:
200
+ for node in path:
201
+ this_type = self.label_for_nonter[node]
202
+ subsumed = self.terms_subsumed_by_nonter.get(node)
203
+ if subsumed is not None:
204
+ yield this_type,sorted(list(subsumed))
205
+