opener-kaf-naf-parser 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +67 -8
  3. data/bin/kaf-naf-parser-daemon +10 -0
  4. data/core/kaf-naf-parser.py +5 -5
  5. data/exec/kaf-naf-parser.rb +9 -0
  6. data/ext/hack/Rakefile +13 -0
  7. data/lib/opener/kaf_naf_parser/version.rb +1 -1
  8. data/opener-kaf-naf-parser.gemspec +5 -1
  9. data/pre_install_requirements.txt +3 -0
  10. metadata +37 -51
  11. data/core/packages/KafNafParser-1.2.tar.gz +0 -0
  12. data/core/packages/VUA_pylib-1.3.tar.gz +0 -0
  13. data/core/site-packages/pre_build/KafNafParser/KafNafParserMod.py +0 -338
  14. data/core/site-packages/pre_build/KafNafParser/__init__.py +0 -14
  15. data/core/site-packages/pre_build/KafNafParser/constituency_data.py +0 -125
  16. data/core/site-packages/pre_build/KafNafParser/coreference_data.py +0 -52
  17. data/core/site-packages/pre_build/KafNafParser/dependency_data.py +0 -80
  18. data/core/site-packages/pre_build/KafNafParser/entity_data.py +0 -59
  19. data/core/site-packages/pre_build/KafNafParser/external_references_data.py +0 -41
  20. data/core/site-packages/pre_build/KafNafParser/feature_extractor/__init__.py +0 -2
  21. data/core/site-packages/pre_build/KafNafParser/feature_extractor/constituency.py +0 -205
  22. data/core/site-packages/pre_build/KafNafParser/feature_extractor/dependency.py +0 -300
  23. data/core/site-packages/pre_build/KafNafParser/features_data.py +0 -71
  24. data/core/site-packages/pre_build/KafNafParser/header_data.py +0 -127
  25. data/core/site-packages/pre_build/KafNafParser/opinion_data.py +0 -200
  26. data/core/site-packages/pre_build/KafNafParser/references_data.py +0 -15
  27. data/core/site-packages/pre_build/KafNafParser/span_data.py +0 -63
  28. data/core/site-packages/pre_build/KafNafParser/term_data.py +0 -111
  29. data/core/site-packages/pre_build/KafNafParser/term_sentiment_data.py +0 -42
  30. data/core/site-packages/pre_build/KafNafParser/text_data.py +0 -90
  31. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/PKG-INFO +0 -10
  32. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/SOURCES.txt +0 -22
  33. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/dependency_links.txt +0 -1
  34. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/installed-files.txt +0 -47
  35. data/core/site-packages/pre_build/KafNafParser-1.2-py2.7.egg-info/top_level.txt +0 -1
  36. data/core/site-packages/pre_build/VUA_pylib/__init__.py +0 -1
  37. data/core/site-packages/pre_build/VUA_pylib/common/__init__.py +0 -1
  38. data/core/site-packages/pre_build/VUA_pylib/common/common.py +0 -28
  39. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/__init__.py +0 -1
  40. data/core/site-packages/pre_build/VUA_pylib/corpus_reader/google_web_nl.py +0 -156
  41. data/core/site-packages/pre_build/VUA_pylib/io_utils/__init__.py +0 -1
  42. data/core/site-packages/pre_build/VUA_pylib/io_utils/feature_file.py +0 -121
  43. data/core/site-packages/pre_build/VUA_pylib/lexicon/__init__.py +0 -1
  44. data/core/site-packages/pre_build/VUA_pylib/lexicon/lexicon.py +0 -72
  45. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/PKG-INFO +0 -10
  46. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/SOURCES.txt +0 -14
  47. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/dependency_links.txt +0 -1
  48. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/installed-files.txt +0 -23
  49. data/core/site-packages/pre_build/VUA_pylib-1.3-py2.7.egg-info/top_level.txt +0 -1
  50. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +0 -165
  51. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +0 -439
  52. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +0 -7
  53. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +0 -10
  54. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +0 -7
  55. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +0 -1
  56. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +0 -11
  57. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +0 -1
  58. data/pre_build_requirements.txt +0 -3
@@ -1,125 +0,0 @@
1
- from lxml import etree
2
- from lxml.objectify import dump
3
- from span_data import Cspan
4
-
5
-
6
- class Cnonterminal:
7
- def __init__(self,node=None):
8
- if node is None:
9
- self.node = etree.Element('nt')
10
- else:
11
- self.node = node
12
-
13
- def get_id(self):
14
- return self.node.get('id')
15
-
16
- def get_label(self):
17
- return self.node.get('label')
18
-
19
- def __str__(self):
20
- return dump(self.node)
21
-
22
-
23
-
24
- class Cterminal:
25
- def __init__(self,node=None):
26
- if node is None:
27
- self.node = etree.Element('t')
28
- else:
29
- self.node = node
30
-
31
- def get_id(self):
32
- return self.node.get('id')
33
-
34
- def get_span(self):
35
- span_node = self.node.find('span')
36
- return Cspan(span_node)
37
-
38
- def __str__(self):
39
- return dump(self.node)
40
-
41
- class Cedge:
42
- def __init__(self,node=None):
43
- if node is None:
44
- self.node = etree.Element('edge')
45
- else:
46
- self.node = node
47
-
48
- def __str__(self):
49
- return dump(self.node)
50
-
51
- def get_from(self):
52
- return self.node.get('from')
53
-
54
- def get_to(self):
55
- return self.node.get('to')
56
-
57
-
58
-
59
- class Ctree:
60
- def __init__(self,node=None):
61
- if node is None:
62
- self.node = etree.Element('tree')
63
- else:
64
- self.node = node
65
-
66
-
67
- def __str__(self):
68
- return dump(self.node)
69
-
70
- ## Fore getting non terminals
71
- def __get_nt_nodes(self):
72
- for nt_node in self.node.findall('nt'):
73
- yield nt_node
74
-
75
- def get_non_terminals(self):
76
- for nt_node in self.__get_nt_nodes():
77
- yield Cnonterminal(nt_node)
78
- ##################################
79
-
80
- ## Fore getting terminals
81
- def __get_t_nodes(self):
82
- for t_node in self.node.findall('t'):
83
- yield t_node
84
-
85
- def get_terminals(self):
86
- for t_node in self.__get_t_nodes():
87
- yield Cterminal(t_node)
88
- ##################################
89
-
90
- ## Fore getting edges
91
- def __get_edge_nodes(self):
92
- for t_node in self.node.findall('edge'):
93
- yield t_node
94
-
95
- def get_edges(self):
96
- for edge_node in self.__get_edge_nodes():
97
- yield Cedge(edge_node)
98
- ##################################
99
-
100
-
101
-
102
- class Cconstituency:
103
- def __init__(self,node=None):
104
- self.type = 'NAF/NAF'
105
- if node is None:
106
- self.node = etree.Element('constituency')
107
- else:
108
- self.node = node
109
-
110
- def to_kaf(self):
111
- pass
112
-
113
- def to_naf(self):
114
- pass
115
-
116
- def __get_tree_nodes(self):
117
- for tree_node in self.node.findall('tree'):
118
- yield tree_node
119
-
120
- def get_trees(self):
121
- for tree_node in self.__get_tree_nodes():
122
- yield Ctree(tree_node)
123
-
124
- def __str__(self):
125
- return dump(self.node)
@@ -1,52 +0,0 @@
1
- from lxml import etree
2
- from span_data import Cspan
3
-
4
- class Ccoreference:
5
- def __init__(self,node=None,type='NAF'):
6
- self.type = type
7
- if node is None:
8
- self.node = etree.Element('coref')
9
- else:
10
- self.node = node
11
-
12
- def get_id(self):
13
- if self.type == 'NAF':
14
- return self.node.get('id')
15
- elif self.type == 'KAF':
16
- return self.node.get('coid')
17
-
18
- def get_spans(self):
19
- for node_span in self.node.findall('span'):
20
- yield Cspan(node_span)
21
-
22
-
23
-
24
- class Ccoreferences:
25
- def __init__(self,node=None, type='NAF'):
26
- self.type = type
27
- if node is None:
28
- self.node = etree.Element('coreferences')
29
- else:
30
- self.node = node
31
-
32
- def __get_corefs_nodes(self):
33
- for coref_node in self.node.findall('coref'):
34
- yield coref_node
35
-
36
- def get_corefs(self):
37
- for coref_node in self.__get_corefs_nodes():
38
- yield Ccoreference(coref_node,self.type)
39
-
40
- def to_kaf(self):
41
- if self.type == 'NAF':
42
- for node_coref in self.__get_corefs_nodes():
43
- node_coref.set('coid',node_coref.get('id'))
44
- del node_coref.attrib['id']
45
-
46
- def to_naf(self):
47
- if self.type == 'KAF':
48
- for node_coref in self.__get_corefs_nodes():
49
- node_coref.set('id',node_coref.get('coid'))
50
- del node_coref.attrib['coid']
51
-
52
-
@@ -1,80 +0,0 @@
1
- from lxml import etree
2
- from lxml.objectify import dump
3
-
4
-
5
- class Cdependency:
6
- def __init__(self,node=None):
7
- self.node_comment = None
8
- if node is None:
9
- self.node = etree.Element('dep')
10
- else:
11
- self.node = node
12
-
13
- def get_node_comment(self):
14
- return self.node_comment
15
-
16
- def get_node(self):
17
- return self.node
18
-
19
- def get_from(self):
20
- return self.node.get('from')
21
-
22
- def get_to(self):
23
- return self.node.get('to')
24
-
25
- def get_function(self):
26
- return self.node.get('rfunc')
27
-
28
- def set_from(self, f):
29
- self.node.set('from',f)
30
-
31
- def set_to(self,t):
32
- self.node.set('to',t)
33
-
34
- def set_function(self,f):
35
- self.node.set('rfunc',f)
36
-
37
- def set_comment(self,str_comment):
38
- self.node_comment = etree.Comment(str_comment.replace('--','- -'))
39
-
40
-
41
- def __str__(self):
42
- return dump(self.node)
43
-
44
-
45
-
46
- class Cdependencies:
47
- def __init__(self,node=None):
48
- if node is None:
49
- self.node = etree.Element('deps')
50
- else:
51
- self.node = node
52
-
53
- def get_node(self):
54
- return self.node
55
-
56
- def to_kaf(self):
57
- pass
58
-
59
- def to_naf(self):
60
- pass
61
-
62
- def __str__(self):
63
- return dump(self.node)
64
-
65
-
66
- def __get_node_deps(self):
67
- for node_dep in self.node.findall('dep'):
68
- yield node_dep
69
-
70
- def get_dependencies(self):
71
- for node in self.__get_node_deps():
72
- yield Cdependency(node)
73
-
74
-
75
- def add_dependency(self,my_dep):
76
- node_comment = my_dep.get_node_comment()
77
- if node_comment is not None:
78
- self.node.append(node_comment)
79
- self.node.append(my_dep.get_node())
80
-
@@ -1,59 +0,0 @@
1
- ## Modified for KAF NAF adaptation
2
- from lxml import etree
3
- from lxml.objectify import dump
4
- from references_data import *
5
-
6
-
7
- class Centity:
8
- def __init__(self,node=None,type='NAF'):
9
- self.type = type
10
- if node is None:
11
- self.node = etree.Element('entity')
12
- else:
13
- self.node = node
14
-
15
- def get_id(self):
16
- if self.type == 'NAF':
17
- return self.node.get('id')
18
- elif self.type == 'KAF':
19
- return self.node.get('eid')
20
-
21
- def get_type(self):
22
- return self.node.get('type')
23
-
24
- def get_references(self):
25
- for ref_node in self.node.findall('references'):
26
- yield Creferences(ref_node)
27
-
28
- class Centities:
29
- def __init__(self,node=None,type='NAF'):
30
- self.type = type
31
- if node is None:
32
- self.node = etree.Element('entities')
33
- else:
34
- self.node = node
35
-
36
-
37
- def to_kaf(self):
38
- if self.type == 'NAF':
39
- for node in self.__get_entity_nodes():
40
- node.set('eid',node.get('id'))
41
- del node.attrib['id']
42
-
43
- def to_naf(self):
44
- if self.type == 'KAF':
45
- for node in self.__get_entity_nodes():
46
- node.set('id',node.get('eid'))
47
- del node.attrib['eid']
48
-
49
- def __get_entity_nodes(self):
50
- for ent_node in self.node.findall('entity'):
51
- yield ent_node
52
-
53
- def __iter__(self):
54
- for ent_node in self.__get_entity_nodes():
55
- yield Centity(ent_node,self.type)
56
-
57
-
58
- def __str__(self):
59
- return dump(self.node)
@@ -1,41 +0,0 @@
1
- # included modification for KAF/NAF
2
- from term_sentiment_data import Cterm_sentiment
3
- from lxml import etree
4
-
5
- class CexternalReference:
6
- def __init__(self,node=None):
7
- self.type= 'NAF/KAF'
8
- #self.resource = self.reference = self.reftype = self.status = self.source = self.confidence = ''
9
- if node is None:
10
- self.node = etree.Element('externalRef')
11
- else:
12
- self.node = node
13
-
14
- def get_node(self):
15
- return self.node
16
-
17
- def set_resource(self,resource):
18
- self.node.set('resource',resource)
19
-
20
- def set_confidence(self,confidence):
21
- self.node.set('confidence',confidence)
22
-
23
- def set_reference(self,reference):
24
- self.node.set('reference',reference)
25
-
26
-
27
- class CexternalReferences:
28
- def __init__(self,node=None):
29
- if node is None:
30
- self.node = etree.Element('externalReferences')
31
- else:
32
- self.node = node
33
-
34
- def add_external_reference(self,ext_ref):
35
- self.node.append(ext_ref.get_node())
36
-
37
- def get_node(self):
38
- return self.node
39
-
40
-
41
-
@@ -1,2 +0,0 @@
1
- from dependency import *
2
- from constituency import *
@@ -1,205 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- from operator import itemgetter
4
-
5
- '''
6
- Extract information from the contituent layer from a NAF file
7
- '''
8
-
9
- class Cconstituency_extractor:
10
- def __init__(self,knaf_obj):
11
- self.naf = knaf_obj
12
- #Extract terminals, non terminals and edges
13
- ## Extracted directly from
14
- self.terminals = {} #terminal id --> list term ids
15
- self.terminal_for_term = {} #term id --> terminal id
16
- self.label_for_nonter = {} # nonter --> label
17
- self.reachable_from = {} # node_from --> [nodeto1, nodeto2...]
18
-
19
- self.extract_info_from_naf(knaf_obj)
20
-
21
- #Extracting all posible paths from leave to root for each terminal id
22
- self.paths_for_terminal= {}
23
- for terminal_id in self.terminals.keys():
24
- paths = self.__expand_node(terminal_id,False)
25
- self.paths_for_terminal[terminal_id] = paths
26
- #######################################
27
-
28
- ## Create, for each non terminal, which are the terminals subsumed
29
- self.terms_subsumed_by_nonter = {} ## ['nonter12'] = set('t1,'t2','t3','t4')
30
- for terminal_id, span_terms in self.terminals.items():
31
- for path in self.paths_for_terminal[terminal_id]:
32
- for nonter in path:
33
- if nonter not in self.terms_subsumed_by_nonter:
34
- self.terms_subsumed_by_nonter[nonter] = set()
35
- for termid in span_terms:
36
- self.terms_subsumed_by_nonter[nonter].add(termid)
37
-
38
- ## To print the paths calculated
39
- # for terminal in self.terminals.keys():
40
- # print terminal
41
- # for path in self.paths_for_terminal[terminal]:
42
- # sep=' '
43
- # for node in path:
44
- # print sep,node,self.label_for_nonter.get(node,'?')
45
- # sep+=' '
46
- # print '#'*20
47
-
48
-
49
- def get_deepest_phrases(self):
50
- all_nonter = set()
51
- for terminal in self.terminals.keys():
52
- for path in self.paths_for_terminal[terminal]:
53
- first_non_ter_phrase = path[1]
54
- all_nonter.add(first_non_ter_phrase)
55
-
56
- ter_for_nonter = {}
57
- for nonter in all_nonter:
58
- for terminal in self.terminals.keys():
59
- for path in self.paths_for_terminal[terminal]:
60
- if nonter in path:
61
- if nonter in ter_for_nonter:
62
- ter_for_nonter[nonter].append(terminal)
63
- else:
64
- ter_for_nonter[nonter] = [terminal]
65
-
66
- visited = set()
67
- for nonter, list_term in ter_for_nonter.items():
68
- for ter in list_term:
69
-
70
- visited.add(ter)
71
-
72
-
73
- ### Returns the label of the deepest phrase for the term id (termid as in the term layer)
74
- def get_deepest_phrase_for_termid(self,termid):
75
- terminal_id = self.terminal_for_term.get(termid)
76
- label = None
77
- subsumed = []
78
- if terminal_id is not None:
79
- first_path = self.paths_for_terminal[terminal_id][0]
80
- first_phrase_id = first_path[1]
81
- label = self.label_for_nonter.get(first_phrase_id)
82
- subsumed = self.terms_subsumed_by_nonter.get(first_phrase_id,[])
83
- return label,sorted(list(subsumed))
84
-
85
-
86
- def get_least_common_subsumer(self,from_tid,to_tid):
87
- termid_from = self.terminal_for_term.get(from_tid)
88
- termid_to = self.terminal_for_term.get(to_tid)
89
-
90
- path_from = self.paths_for_terminal[termid_from][0]
91
- path_to = self.paths_for_terminal[termid_to][0]
92
- common_nodes = set(path_from) & set(path_to)
93
- if len(common_nodes) == 0:
94
- return None
95
- else:
96
- indexes = []
97
- for common_node in common_nodes:
98
- index1 = path_from.index(common_node)
99
- index2 = path_to.index(common_node)
100
- indexes.append((common_node,index1+index2))
101
- indexes.sort(key=itemgetter(1))
102
- shortest_common = indexes[0][0]
103
- return shortest_common
104
-
105
-
106
- def get_path_from_to(self,from_tid, to_tid):
107
- shortest_subsumer = self.get_least_common_subsumer(from_tid, to_tid)
108
-
109
- #print 'From:',self.naf.get_term(from_tid).get_lemma()
110
- #print 'To:',self.naf.get_term(to_tid).get_lemma()
111
- termid_from = self.terminal_for_term.get(from_tid)
112
- termid_to = self.terminal_for_term.get(to_tid)
113
-
114
- path_from = self.paths_for_terminal[termid_from][0]
115
- path_to = self.paths_for_terminal[termid_to][0]
116
-
117
- if shortest_subsumer is None:
118
- return None
119
-
120
- complete_path = []
121
- for node in path_from:
122
- complete_path.append(node)
123
- if node == shortest_subsumer: break
124
-
125
- begin=False
126
- for node in path_to[-1::-1]:
127
- if begin:
128
- complete_path.append(node)
129
-
130
- if node==shortest_subsumer:
131
- begin=True
132
- labels = [self.label_for_nonter[nonter] for nonter in complete_path]
133
- return labels
134
-
135
-
136
- def get_path_for_termid(self,termid):
137
- terminal_id = self.terminal_for_term.get(termid)
138
- paths = self.paths_for_terminal[terminal_id]
139
- labels = [self.label_for_nonter[nonter] for nonter in paths[0]]
140
- return labels
141
-
142
- def extract_info_from_naf(self,knaf_obj):
143
- ## Generated internally
144
- # For each terminal node, a list of paths through all the edges
145
- self.paths_for_terminal = {}
146
- for tree in knaf_obj.get_trees():
147
- for terminal in tree.get_terminals():
148
- ter_id = terminal.get_id()
149
- span_ids = terminal.get_span().get_span_ids()
150
- self.terminals[ter_id] = span_ids
151
- for this_id in span_ids:
152
- self.terminal_for_term[this_id] = ter_id
153
-
154
-
155
- for non_terminal in tree.get_non_terminals():
156
- nonter_id = non_terminal.get_id()
157
- label = non_terminal.get_label()
158
- self.label_for_nonter[nonter_id] = label
159
-
160
-
161
- for edge in tree.get_edges():
162
- node_from = edge.get_from()
163
- node_to = edge.get_to()
164
- if node_from not in self.reachable_from:
165
- self.reachable_from[node_from] = [node_to]
166
- else:
167
- self.reachable_from[node_from].append(node_to)
168
-
169
-
170
-
171
- ##Recursive function
172
- ## Propagates the node through all the relations extracte from the edges information
173
- ## It returns a list of lists, one for each path
174
- ## Include_this_node is used for avoiding the first node
175
- def __expand_node(self,node,include_this_node=True):
176
- paths = []
177
- possible_nodes = self.reachable_from.get(node,[])
178
- if len(possible_nodes) == 0:
179
- return [[node]]
180
- else:
181
- for possible_node in possible_nodes:
182
- new_paths = self.__expand_node(possible_node)
183
- for path in new_paths:
184
- if include_this_node:
185
- path.insert(0,node)
186
- paths.append(path)
187
- return paths
188
-
189
- def get_chunks(self,chunk_type):
190
- for nonter,this_type in self.label_for_nonter.items():
191
- if this_type == chunk_type:
192
- subsumed = self.terms_subsumed_by_nonter.get(nonter)
193
- if subsumed is not None:
194
- yield sorted(list(subsumed))
195
-
196
- def get_all_chunks_for_term(self,termid):
197
- terminal_id = self.terminal_for_term.get(termid)
198
- paths = self.paths_for_terminal[terminal_id]
199
- for path in paths:
200
- for node in path:
201
- this_type = self.label_for_nonter[node]
202
- subsumed = self.terms_subsumed_by_nonter.get(node)
203
- if subsumed is not None:
204
- yield this_type,sorted(list(subsumed))
205
-