opener-constituent-parser-de 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +41 -0
  3. data/bin/constituent-parser-de +8 -0
  4. data/core/convert_penn_to_kaf.py +127 -0
  5. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  6. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  7. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  8. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  9. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  10. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +161 -0
  11. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +326 -0
  12. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  13. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/PKG-INFO +10 -0
  14. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/SOURCES.txt +6 -0
  15. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  16. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/installed-files.txt +19 -0
  17. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/top_level.txt +1 -0
  18. data/core/site-packages/pre_build/VUSentimentLexicon/DE-lexicon/Sentiment-German-HotelDomain.xml +12847 -0
  19. data/core/site-packages/pre_build/VUSentimentLexicon/DE-lexicon/germanLex.txt +8883 -0
  20. data/core/site-packages/pre_build/VUSentimentLexicon/EN-lexicon/Sentiment-English-HotelDomain.xml +28197 -0
  21. data/core/site-packages/pre_build/VUSentimentLexicon/EN-lexicon/Sentiment-English-general.xml +73998 -0
  22. data/core/site-packages/pre_build/VUSentimentLexicon/ES-lexicon/es-sentiment_lexicon.lmf +106035 -0
  23. data/core/site-packages/pre_build/VUSentimentLexicon/FR-lexicon/fr-sentiment_lexicon-old.lmf +232008 -0
  24. data/core/site-packages/pre_build/VUSentimentLexicon/FR-lexicon/fr-sentiment_lexicon.lmf +141651 -0
  25. data/core/site-packages/pre_build/VUSentimentLexicon/IT-lexicon/it-sentiment_lexicon.lmf +200790 -0
  26. data/core/site-packages/pre_build/VUSentimentLexicon/LexiconMod.py +137 -0
  27. data/core/site-packages/pre_build/VUSentimentLexicon/NL-lexicon/Sentiment-Dutch-HotelDomain.xml +15007 -0
  28. data/core/site-packages/pre_build/VUSentimentLexicon/NL-lexicon/Sentiment-Dutch-general.xml +83143 -0
  29. data/core/site-packages/pre_build/VUSentimentLexicon/__init__.py +5 -0
  30. data/core/stanford_parser_de.py +142 -0
  31. data/core/tree.py +1438 -0
  32. data/core/vendor/stanford-parser/stanford-parser-2.0.5-models.jar +0 -0
  33. data/core/vendor/stanford-parser/stanford-parser.jar +0 -0
  34. data/ext/hack/Rakefile +13 -0
  35. data/ext/hack/support.rb +50 -0
  36. data/lib/opener/constituent_parsers/de.rb +100 -0
  37. data/lib/opener/constituent_parsers/de/version.rb +7 -0
  38. data/opener-constituent-parser-de.gemspec +34 -0
  39. data/pre_build_requirements.txt +1 -0
  40. data/pre_install_requirements.txt +1 -0
  41. metadata +139 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d5a9ae0201da32865ead9a1fe9524712c148204a
4
+ data.tar.gz: 609f1d0465fab2a08278cebdc484949d0fe98e72
5
+ SHA512:
6
+ metadata.gz: b1b526f3a0b0787591003013e54065d172aa82e32fb4b333537fdf2e2b6724926fc6e58e45180a53df36e03340f0f249a160918621b62d22896f6260f3f501f6
7
+ data.tar.gz: 126d01c13291d08eadc89203f9a8e3471d46e35360b004c1153d7a17fe1808684be3f65a731e47e19ccc89ff825a3fbaae34ea643e608c0cdb09345de1016007
@@ -0,0 +1,41 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/constituent-parser-de/status.png)](https://drone.io/github.com/opener-project/constituent-parser-de/latest)
2
+
3
+ VU-parser-DE_kernel
4
+ ===================
5
+
6
+ Introduction
7
+ ------------
8
+
9
+ This is a parser for German text using the Stanford parser (http://nlp.stanford.edu/software/lex-parser.shtml). The input for this module has to be a valid
10
+ KAF file with at least the text layer. The output will be the constituent trees in pennTreebank format for each of the sentences in the input KAF.
11
+ The tokenization and sentence splitting is taken from the input KAF file, so if your input file has a wrong tokenization/splitting, the output could
12
+ contain errors. The number of output constituent trees will be exactly the same as the number of sentences in your input KAF
13
+
14
+ Requirements
15
+ -----------
16
+ * VUKafParserPy: parser in python for KAF files (https://github.com/opener-project/VU-kaf-parser)
17
+ * lxml: library for processing xml in python
18
+ * Stanford parser: http://nlp.stanford.edu/software/lex-parser.shtml
19
+
20
+ Installation
21
+ -----------
22
+ Clone the repository to your local machine and set the varible STANFORD_HOME in the file core/stanford_parser_de.py
23
+ to point to your local folder of the Stanford parser.
24
+
25
+ How to run the module with Python
26
+ ---------------------------------
27
+
28
+ You can run this module from the command line using Python. The main script is core/stanford_parser_de.py. This script reads the KAF from the standard input
29
+ and writes the output to the standard output, generating some log information in the standard error output. To process one file just run:
30
+ ````shell
31
+ cat input.kaf | core/stanford_parser_de.py > input.tree
32
+ ````
33
+
34
+ This will read the KAF file in "input.kaf" and will store the constituent trees in "input.tree".
35
+
36
+
37
+ Contact
38
+ ------
39
+ * Ruben Izquierdo
40
+ * Vrije University of Amsterdam
41
+ * ruben.izquierdobevia@vu.nl
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/de'
4
+
5
+ kernel = Opener::ConstituentParsers::DE.new(:args => ARGV)
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,127 @@
1
+ from lxml import etree
2
+ from tree import Tree
3
+
4
+
5
+
6
+
7
+
8
+ list_t = []
9
+ list_nt = []
10
+ list_edge = []
11
+ cnt_t = cnt_nt = cnt_edge = 0
12
+
13
+ ##This function generates a "tree" xml element as defined in KAF from a string containing
14
+ ##the penntreebank format and a list of term ids to do the linking
15
+ '''
16
+ s = '(S (NP (DET The) (NN dog)) (VP (V ate) (NP (DET the) (NN cat))) (. .))'
17
+ ids = ['t0 t1','t2','t3','t4','t5','t6']
18
+ tree_node = create_constituency_layer(s, ids)
19
+ e = etree.ElementTree(element=tree_node)
20
+ e.write(sys.stdout,pretty_print=True)
21
+ '''
22
+ def convert_penn_to_kaf(tree_str,term_ids,logging,lemma_for_termid,off_t,off_nt,off_edge):
23
+ global list_t, list_nt,list_edge,cnt_t,cnt_nt,cnt_edge
24
+ list_t = []
25
+ list_nt = []
26
+ list_edge = []
27
+ cnt_t = off_t
28
+ cnt_nt = off_nt
29
+ cnt_edge = off_edge
30
+
31
+ this_tree = Tree(tree_str)
32
+ logging.debug('\n'+str(this_tree))
33
+
34
+
35
+ for num, token in enumerate(this_tree.leaves()):
36
+ position = this_tree.leaf_treeposition(num)
37
+ token_id = term_ids[num]
38
+ this_tree[position] = token_id
39
+ logging.debug('Matching '+token+' with term id='+token_id+' which according to KAF lemma='+str(lemma_for_termid.get(token_id).encode('utf-8')))
40
+
41
+
42
+ ##Creat the ROOT
43
+ create_extra_root = False
44
+ nt_id = None
45
+ if create_extra_root:
46
+ nt_id = 'nter'+str(cnt_nt)
47
+ cnt_nt +=1
48
+ list_nt.append((nt_id,'ROOT'))
49
+
50
+ visit_node(this_tree, nt_id)
51
+
52
+ root = etree.Element('tree')
53
+ nonter_heads = set()
54
+ #Nonter
55
+ labels_for_nt = {}
56
+ for nt_id, label in list_nt:
57
+ ##Checking the head
58
+ if len(label)>=2 and label[-1]=='H' and label[-2]=='=':
59
+ nonter_heads.add(nt_id)
60
+ label = label[:-2]
61
+ ele = etree.Element('nt', attrib={'id':nt_id,'label':label})
62
+ labels_for_nt[nt_id] = label
63
+ root.append(ele)
64
+
65
+ ## Terminals
66
+ lemma_for_ter = {}
67
+ for ter_id, span_ids in list_t:
68
+ ele = etree.Element('t',attrib={'id':ter_id})
69
+ span = etree.Element('span')
70
+ ele.append(span)
71
+ for termid in span_ids.split(' '):
72
+ target = etree.Element('target',attrib={'id':termid})
73
+ span.append(target)
74
+ lemma_for_ter[ter_id] = lemma_for_termid.get(termid,'unknown')
75
+ root.append(ele)
76
+
77
+ ##Edges
78
+ #for edge_id,node_to,node_from in list_edge:
79
+ for edge_id, node_from, node_to in list_edge:
80
+ ele = etree.Element('edge',attrib={'id':edge_id,'from':node_from,'to':node_to})
81
+
82
+ ## For the comment
83
+ ##Only non-ter
84
+ label_to = labels_for_nt.get(node_to)
85
+
86
+ ##Could be ter or nonter
87
+ label_from = labels_for_nt.get(node_from)
88
+ if label_from is None:
89
+ label_from = lemma_for_ter.get(node_from,'kk')
90
+
91
+ comment = ' '+(edge_id)+' '+(label_to)+' <- '+(label_from)+' '
92
+
93
+ if node_from in nonter_heads:
94
+ ele.set('head','yes')
95
+ root.append(etree.Comment(comment))
96
+ root.append(ele)
97
+
98
+ return root,cnt_t,cnt_nt,cnt_edge
99
+
100
+
101
+ def visit_node(node,id_parent=None):
102
+ global list_t, list_nt,list_edge,cnt_t,cnt_nt,cnt_edge
103
+ if isinstance(node,str): #is a terminal
104
+ ##Create the terminal
105
+ t_id = 'ter'+str(cnt_t)
106
+ cnt_t += 1
107
+ list_t.append((t_id,str(node)))
108
+
109
+ ##Create the edge with the parent
110
+ edge_id = 'tre'+str(cnt_edge)
111
+ cnt_edge +=1
112
+ list_edge.append((edge_id,t_id,id_parent))
113
+ else: #Is a non terminal
114
+ ##Create the nonterminal
115
+ nt_id = 'nter'+str(cnt_nt)
116
+ cnt_nt += 1
117
+ list_nt.append((nt_id,node.node))
118
+
119
+ ##Create the linking with the parent
120
+ if id_parent is not None:
121
+ edge_id = 'tre'+str(cnt_edge)
122
+ cnt_edge += 1
123
+ list_edge.append((edge_id,nt_id,id_parent))
124
+
125
+ ##Call to the child
126
+ for child in node:
127
+ visit_node(child,nt_id)
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 1.0
2
+ Name: VUKafParserPy
3
+ Version: 1.0
4
+ Summary: Library in python to parse kaf files
5
+ Home-page: UNKNOWN
6
+ Author: Ruben Izquierdo
7
+ Author-email: r.izquierdobevia@vu.nl
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
@@ -0,0 +1,7 @@
1
+ VUKafParserPy/KafDataObjectsMod.py
2
+ VUKafParserPy/KafParserMod.py
3
+ VUKafParserPy/__init__.py
4
+ VUKafParserPy.egg-info/PKG-INFO
5
+ VUKafParserPy.egg-info/SOURCES.txt
6
+ VUKafParserPy.egg-info/dependency_links.txt
7
+ VUKafParserPy.egg-info/top_level.txt
@@ -0,0 +1,11 @@
1
+ ../VUKafParserPy/KafParserMod.py
2
+ ../VUKafParserPy/__init__.py
3
+ ../VUKafParserPy/KafDataObjectsMod.py
4
+ ../VUKafParserPy/KafParserMod.pyc
5
+ ../VUKafParserPy/__init__.pyc
6
+ ../VUKafParserPy/KafDataObjectsMod.pyc
7
+ ./
8
+ top_level.txt
9
+ SOURCES.txt
10
+ PKG-INFO
11
+ dependency_links.txt
@@ -0,0 +1,161 @@
1
+ class KafTermSentiment:
2
+ def __init__(self):
3
+ self.resource=None
4
+ self.polarity=None
5
+ self.strength=None
6
+ self.subjectivity=None
7
+
8
+ def simpleInit(self,r,p,st,su,sm=None):
9
+ self.resource=r
10
+ self.polarity=p
11
+ self.strength=st
12
+ self.subjectivity=su
13
+ self.sentiment_modifier = sm
14
+
15
+ def getPolarity(self):
16
+ return self.polarity
17
+
18
+ def getSentimentModifier(self):
19
+ return self.sentiment_modifier
20
+
21
+
22
+ class KafToken:
23
+ def __init__(self,wid, value, sent=None, para=None):
24
+ self.token_id = wid
25
+ self.value = value
26
+ self.sent = sent
27
+ self.para = para
28
+
29
+
30
+ class KafOpinionExpression:
31
+ def __init__(self,polarity,strength,targets):
32
+ self.polarity = polarity
33
+ self.strength = strength
34
+ self.targets = targets
35
+
36
+ def __str__(self):
37
+ return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
38
+
39
+ class KafOpinion:
40
+ def __init__(self,id,holders, targets, opi_exp):
41
+ self.id = id
42
+ self.holders = holders
43
+ self.targets = targets
44
+ self.opi_exp = opi_exp
45
+
46
+ def __str__(self):
47
+ c='Opinion id'+self.id+'\n'
48
+ c+=' Holders: '+'-'.join(self.holders)+'\n'
49
+ c+=' Targets: '+'-'.join(self.targets)+'\n'
50
+ c+=str(self.opi_exp)
51
+ return c
52
+
53
+
54
+
55
+ class KafSingleProperty:
56
+ def __init__(self,id,type,targets):
57
+ self.id = id
58
+ self.type = type
59
+ self.targets = targets
60
+
61
+
62
+ def get_id(self):
63
+ return self.id
64
+
65
+ def get_type(self):
66
+ return self.type
67
+
68
+ def get_span(self):
69
+ return self.targets
70
+
71
+ def __str__(self):
72
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
73
+
74
+
75
+ class KafSingleEntity:
76
+ def __init__(self,id,type,targets):
77
+ self.id = id
78
+ self.type = type
79
+ self.targets = targets
80
+
81
+ def get_id(self):
82
+ return self.id
83
+
84
+ def get_type(self):
85
+ return self.type
86
+
87
+ def get_span(self):
88
+ return self.targets
89
+
90
+ def __str__(self):
91
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
92
+
93
+ class KafTerm:
94
+ def __init__(self):
95
+ self.tid = None
96
+ self.lemma = None
97
+ self.pos = None
98
+ self.sentiment = None
99
+ self.list_span_id = []
100
+
101
+ def set_list_span_id(self, L):
102
+ self.list_span_id = L
103
+
104
+ def get_list_span(self):
105
+ return self.list_span_id
106
+
107
+ def get_polarity(self):
108
+ if self.sentiment != None:
109
+ return self.sentiment.getPolarity()
110
+ else:
111
+ return None
112
+
113
+ def get_sentiment_modifier(self):
114
+ if self.sentiment != None:
115
+ return self.sentiment.getSentimentModifier()
116
+ else:
117
+ return None
118
+
119
+
120
+ def setSentiment(self,my_sent):
121
+ self.sentiment = my_sent
122
+
123
+ def getSentiment(self):
124
+ return self.sentiment
125
+
126
+ def getLemma(self):
127
+ return self.lemma
128
+
129
+ def setLemma(self,lemma):
130
+ self.lemma = lemma
131
+
132
+ def getPos(self):
133
+ return self.pos
134
+
135
+ def setPos(self,pos):
136
+ self.pos = pos
137
+
138
+ def getId(self):
139
+ return self.tid
140
+
141
+ def setId(self,id):
142
+ self.tid = id
143
+
144
+ def getShortPos(self):
145
+ if self.pos==None:
146
+ return None
147
+ auxpos=self.pos.lower()[0]
148
+ if auxpos == 'g': auxpos='a'
149
+ elif auxpos == 'a': auxpos='r'
150
+ return auxpos
151
+
152
+ def __str__(self):
153
+ if self.tid and self.lemma and self.pos:
154
+ return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
155
+ else:
156
+ return 'None'
157
+
158
+
159
+
160
+
161
+
@@ -0,0 +1,326 @@
1
+ ########################################################################
2
+ # 14 Jan 2013: added function add_attrs_to_layer
3
+ ########################################################################
4
+
5
+ ###################
6
+ # List of changes #
7
+ ###################
8
+ # 14 Jan 2013: added function add_attrs_to_layer
9
+ # 27 Feb 2013: added code for comply with DTD
10
+ # 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
11
+ # 18 Jun 2013: funcion add_property created for adding the properties to the KAF
12
+ from lxml import etree
13
+ from KafDataObjectsMod import *
14
+ import time
15
+
16
+ class KafParser:
17
+ def __init__(self,filename=None):
18
+ self.tree=None
19
+ self.__pathForToken={}
20
+
21
+ if filename:
22
+ self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
23
+ ## Do the text tokenization
24
+ self.__textTokenization()
25
+ else:
26
+ root = etree.Element('KAF')
27
+ root.set('version','v1.opener')
28
+ root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
29
+ self.tree = etree.ElementTree(element=root)
30
+
31
+ def __textTokenization(self):
32
+ for wf in self.tree.findall('text/wf'):
33
+ wid = wf.get('wid')
34
+ self.__pathForToken[wid] = self.tree.getpath(wf)
35
+
36
+
37
+ def getToken(self,tid):
38
+ path = self.__pathForToken[tid]
39
+ return self.tree.xpath(self.__pathForToken[tid])[0]
40
+
41
+
42
+ def getLanguage(self):
43
+ lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
44
+ return lang
45
+
46
+ def getTokens(self):
47
+ for element in self.tree.findall('text/wf'):
48
+ w_id = element.get('wid')
49
+ s_id = element.get('sent','0')
50
+ word = element.text
51
+ yield (word, s_id, w_id)
52
+
53
+ def getTerms(self):
54
+ if self.tree:
55
+ for element in self.tree.findall('terms/term'):
56
+ kafTermObj = KafTerm()
57
+ kafTermObj.setId(element.get('tid'))
58
+ kafTermObj.setLemma(element.get('lemma'))
59
+ kafTermObj.setPos(element.get('pos'))
60
+
61
+ ## Parsing sentiment
62
+ sentiment = element.find('sentiment')
63
+ if sentiment is not None:
64
+ resource = sentiment.get('resource','')
65
+ polarity = sentiment.get('polarity',None)
66
+ strength = sentiment.get('strength','')
67
+ subjectivity = sentiment.get('subjectivity','')
68
+ sentiment_modifier = sentiment.get('sentiment_modifier')
69
+
70
+ my_sent = KafTermSentiment()
71
+ my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
72
+ kafTermObj.setSentiment(my_sent)
73
+
74
+ ## Parsing the span
75
+ span = element.find('span')
76
+ if span is not None:
77
+ list_ids = [target.get('id') for target in span.findall('target')]
78
+ kafTermObj.set_list_span_id(list_ids)
79
+
80
+
81
+ yield kafTermObj
82
+ else:
83
+ return
84
+
85
+
86
+ def getSentimentTriples(self):
87
+ data = []
88
+ if self.tree:
89
+ for term_element in self.tree.findall('terms/term'):
90
+ lemma = term_element.get('lemma')
91
+ polarity = None
92
+ sentiment_modifier = None
93
+
94
+ sentiment_element = term_element.find('sentiment')
95
+ if sentiment_element is not None:
96
+ polarity = sentiment_element.get('polarity',None)
97
+ sentiment_modifier = sentiment_element.get('sentiment_modifier')
98
+ data.append( (lemma,polarity,sentiment_modifier))
99
+ return data
100
+
101
+
102
+
103
+ def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
104
+ if self.tree:
105
+ for element in self.tree.find('terms'):
106
+ if element.get('tid','')==termid:
107
+
108
+ #In case there is no pos info, we use the polarityPos
109
+ if not element.get('pos') and polarity_pos is not None:
110
+ element.set('pos',polarity_pos)
111
+ sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
112
+ element.append(sentEle)
113
+
114
+ def saveToFile(self,filename,myencoding='UTF-8'):
115
+ if self.tree:
116
+ self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
117
+
118
+
119
+ def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
120
+ aux = self.tree.findall('kafHeader')
121
+ if len(aux)!=0:
122
+ kaf_header = aux[0]
123
+ else:
124
+ kaf_header = etree.Element('kafHeader')
125
+ self.tree.getroot().insert(0,kaf_header)
126
+
127
+ ## Check if there is already element for the layer
128
+ my_lp_ele = None
129
+
130
+ for element in kaf_header.findall('linguisticProcessors'):
131
+ if element.get('layer','')==layer:
132
+ my_lp_ele = element
133
+ break
134
+
135
+ if time_stamp:
136
+ my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
137
+ else:
138
+ my_time = '*'
139
+
140
+ my_lp = etree.Element('lp')
141
+ my_lp.set('timestamp',my_time)
142
+ my_lp.set('version',version)
143
+ my_lp.set('name',name)
144
+
145
+ if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
146
+ my_lp_ele.append(my_lp)
147
+ else:
148
+ # Create a new element for the LP layer
149
+ my_lp_ele = etree.Element('linguisticProcessor')
150
+ my_lp_ele.set('layer',layer)
151
+ my_lp_ele.append(my_lp)
152
+ #my_lp_ele.tail=my_lp_ele.text='\n'
153
+ ## Should be inserted after the last linguisticProcessor element (stored in variable element)
154
+ idx = kaf_header.index(element)
155
+ kaf_header.insert(idx+1,my_lp_ele)
156
+
157
+
158
+ def addLayer(self,type,element,first_char_id=None):
159
+ if first_char_id is None:
160
+ first_char_id = type[0]
161
+
162
+ ## Check if there is already layer for the type
163
+ layer_element = self.tree.find(type)
164
+
165
+ if layer_element is None:
166
+ layer_element = etree.Element(type)
167
+ self.tree.getroot().append(layer_element)
168
+ ## The id is going to be the first one
169
+ new_id = first_char_id+'1'
170
+ else:
171
+ ## We need to know how many elements there are in the layer
172
+ current_n = len(layer_element.getchildren())
173
+ new_id = first_char_id+''+str(current_n+1)
174
+
175
+
176
+ ## In this point layer_element points to the correct element, existing or created
177
+
178
+ element.set(first_char_id+'id',new_id)
179
+ layer_element.append(element)
180
+ return new_id
181
+
182
+ def addElementToLayer(self,layer, element,first_char_id=None):
183
+ return self.addLayer(layer,element,first_char_id)
184
+
185
+ def add_attrs_to_layer(self,layer,attrs):
186
+ layer_element = self.tree.find(layer)
187
+ if layer_element is not None:
188
+ for att, val in attrs.items():
189
+ layer_element.set(att,val)
190
+
191
+
192
+ def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
193
+ for element in self.tree.findall(path):
194
+ if id is not None and element.get(str_id,None) == id:
195
+ if sub_path is not None:
196
+ elements = element.findall(sub_path)
197
+ if len(elements)!=0: element = elements[0]
198
+ element.set(attribute,value)
199
+ return
200
+
201
+
202
+ ## This works with the original definition of the property layer
203
+ ## KAF -> properties -> property* -> span* -> target*
204
+ def getSingleProperties_old(self):
205
+ for element in self.tree.findall('properties/property'):
206
+ my_id = element.get('pid')
207
+ my_type = element.get('type')
208
+ ref = element.find('references')
209
+ if ref is not None:
210
+ element = ref
211
+ for span_element in element.findall('span'):
212
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
213
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
214
+ yield my_prop
215
+
216
+ ## 18-June-2013
217
+ def getSingleProperties(self):
218
+ for property in self.tree.findall('features/properties/property'):
219
+ my_id = property.get('pid')
220
+ if my_id is None:
221
+ my_id = property.get('fpid')
222
+ my_type = property.get('lemma')
223
+ for span_element in property.findall('references/span'):
224
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
225
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
226
+ yield my_prop
227
+
228
+ # This function adds a new property of the type given with the list of ids given
229
+ # my_type -> 'sleeping comfort' list_ids = ['id1','id2']
230
+ # It creates the features/properties layers in case
231
+ # Agglomerates all the properties for the same TYPE under the same property element
232
+ # It calculates automatically the number for the identifier depending on the number
233
+ # of properties existing
234
+ def add_property(self,my_type,list_ids):
235
+
236
+ #Looking for feature layer or creating it
237
+ feature_layer = self.tree.find('features')
238
+ if feature_layer is None:
239
+ feature_layer = etree.Element('features')
240
+ self.tree.getroot().append(feature_layer)
241
+
242
+ #Looking for properties layer
243
+ properties_layer = feature_layer.find('properties')
244
+ if properties_layer is None:
245
+ properties_layer = etree.Element('properties')
246
+ feature_layer.append(properties_layer)
247
+
248
+ num_props = 0
249
+ property_layer = None
250
+ for property in properties_layer.findall('property'):
251
+ num_props += 1
252
+ prop_type = property.get('lemma')
253
+ if prop_type == my_type:
254
+ property_layer = property
255
+ break
256
+
257
+ if property_layer is None: # There is no any property for that type, let's create one
258
+ property_layer = etree.Element('property')
259
+ property_layer.set('pid','p'+str(num_props+1))
260
+ property_layer.set('lemma',my_type)
261
+ properties_layer.append(property_layer)
262
+
263
+
264
+ references = property_layer.find('references')
265
+ if references is None:
266
+ references = etree.Element('references')
267
+ property_layer.append(references)
268
+ ## Create the new span
269
+ span = etree.Element('span')
270
+ references.append(span)
271
+ for my_id in list_ids:
272
+ span.append(etree.Element('target',attrib={'id':my_id}))
273
+
274
+
275
+
276
+
277
+ def getSingleEntities(self):
278
+ for element in self.tree.findall('entities/entity'):
279
+ my_id = element.get('eid')
280
+ my_type = element.get('type')
281
+ my_path_to_span = None
282
+ ref = element.find('references')
283
+ if ref is not None:
284
+ my_path_to_span = 'references/span'
285
+ else:
286
+ my_path_to_span = 'span'
287
+
288
+ for span_element in element.findall(my_path_to_span):
289
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
290
+ my_prop = KafSingleEntity(my_id,my_type,target_ids)
291
+ yield my_prop
292
+
293
+
294
+ def getOpinions(self):
295
+ for element in self.tree.findall('opinions/opinion'):
296
+ my_id = element.get('oid')
297
+
298
+ tar_ids_hol = []
299
+ tar_ids_tar = []
300
+ polarity = strenght = ''
301
+ tar_ids_exp = []
302
+
303
+ #Holder
304
+ opi_hol_eles = element.findall('opinion_holder')
305
+ if len(opi_hol_eles)!=0:
306
+ opi_hol_ele = opi_hol_eles[0]
307
+ tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
308
+
309
+ #Target
310
+ opi_tar_eles = element.findall('opinion_target')
311
+ if len(opi_tar_eles) != 0:
312
+ opi_tar_ele = opi_tar_eles[0]
313
+ tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
314
+
315
+ ## Opinion expression
316
+ opi_exp_eles = element.findall('opinion_expression')
317
+ if len(opi_exp_eles) != 0:
318
+ opi_exp_ele = opi_exp_eles[0]
319
+ polarity = opi_exp_ele.get('polarity','')
320
+ strength = opi_exp_ele.get('strength','')
321
+ tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
322
+
323
+ yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
324
+
325
+
326
+