opener-constituent-parser-de 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +41 -0
  3. data/bin/constituent-parser-de +8 -0
  4. data/core/convert_penn_to_kaf.py +127 -0
  5. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO +10 -0
  6. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt +7 -0
  7. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  8. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt +11 -0
  9. data/core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt +1 -0
  10. data/core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py +161 -0
  11. data/core/site-packages/pre_build/VUKafParserPy/KafParserMod.py +326 -0
  12. data/core/site-packages/pre_build/VUKafParserPy/__init__.py +7 -0
  13. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/PKG-INFO +10 -0
  14. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/SOURCES.txt +6 -0
  15. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/dependency_links.txt +1 -0
  16. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/installed-files.txt +19 -0
  17. data/core/site-packages/pre_build/VUSentimentLexicon-1.0-py2.7.egg-info/top_level.txt +1 -0
  18. data/core/site-packages/pre_build/VUSentimentLexicon/DE-lexicon/Sentiment-German-HotelDomain.xml +12847 -0
  19. data/core/site-packages/pre_build/VUSentimentLexicon/DE-lexicon/germanLex.txt +8883 -0
  20. data/core/site-packages/pre_build/VUSentimentLexicon/EN-lexicon/Sentiment-English-HotelDomain.xml +28197 -0
  21. data/core/site-packages/pre_build/VUSentimentLexicon/EN-lexicon/Sentiment-English-general.xml +73998 -0
  22. data/core/site-packages/pre_build/VUSentimentLexicon/ES-lexicon/es-sentiment_lexicon.lmf +106035 -0
  23. data/core/site-packages/pre_build/VUSentimentLexicon/FR-lexicon/fr-sentiment_lexicon-old.lmf +232008 -0
  24. data/core/site-packages/pre_build/VUSentimentLexicon/FR-lexicon/fr-sentiment_lexicon.lmf +141651 -0
  25. data/core/site-packages/pre_build/VUSentimentLexicon/IT-lexicon/it-sentiment_lexicon.lmf +200790 -0
  26. data/core/site-packages/pre_build/VUSentimentLexicon/LexiconMod.py +137 -0
  27. data/core/site-packages/pre_build/VUSentimentLexicon/NL-lexicon/Sentiment-Dutch-HotelDomain.xml +15007 -0
  28. data/core/site-packages/pre_build/VUSentimentLexicon/NL-lexicon/Sentiment-Dutch-general.xml +83143 -0
  29. data/core/site-packages/pre_build/VUSentimentLexicon/__init__.py +5 -0
  30. data/core/stanford_parser_de.py +142 -0
  31. data/core/tree.py +1438 -0
  32. data/core/vendor/stanford-parser/stanford-parser-2.0.5-models.jar +0 -0
  33. data/core/vendor/stanford-parser/stanford-parser.jar +0 -0
  34. data/ext/hack/Rakefile +13 -0
  35. data/ext/hack/support.rb +50 -0
  36. data/lib/opener/constituent_parsers/de.rb +100 -0
  37. data/lib/opener/constituent_parsers/de/version.rb +7 -0
  38. data/opener-constituent-parser-de.gemspec +34 -0
  39. data/pre_build_requirements.txt +1 -0
  40. data/pre_install_requirements.txt +1 -0
  41. metadata +139 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d5a9ae0201da32865ead9a1fe9524712c148204a
4
+ data.tar.gz: 609f1d0465fab2a08278cebdc484949d0fe98e72
5
+ SHA512:
6
+ metadata.gz: b1b526f3a0b0787591003013e54065d172aa82e32fb4b333537fdf2e2b6724926fc6e58e45180a53df36e03340f0f249a160918621b62d22896f6260f3f501f6
7
+ data.tar.gz: 126d01c13291d08eadc89203f9a8e3471d46e35360b004c1153d7a17fe1808684be3f65a731e47e19ccc89ff825a3fbaae34ea643e608c0cdb09345de1016007
@@ -0,0 +1,41 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/constituent-parser-de/status.png)](https://drone.io/github.com/opener-project/constituent-parser-de/latest)
2
+
3
+ VU-parser-DE_kernel
4
+ ===================
5
+
6
+ Introduction
7
+ ------------
8
+
9
+ This is a parser for German text using the Stanford parser (http://nlp.stanford.edu/software/lex-parser.shtml). The input for this module has to be a valid
10
+ KAF file with at least the text layer. The output will be the constituent trees in pennTreebank format for each of the sentences in the input KAF.
11
+ The tokenization and sentence splitting is taken from the input KAF file, so if your input file has a wrong tokenization/splitting, the output could
12
+ contain errors. The number of output constituent trees will be exactly the same as the number of sentences in your input KAF
13
+
14
+ Requirements
15
+ -----------
16
+ * VUKafParserPy: parser in python for KAF files (https://github.com/opener-project/VU-kaf-parser)
17
+ * lxml: library for processing xml in python
18
+ * Stanford parser: http://nlp.stanford.edu/software/lex-parser.shtml
19
+
20
+ Installation
21
+ -----------
22
+ Clone the repository to your local machine and set the varible STANFORD_HOME in the file core/stanford_parser_de.py
23
+ to point to your local folder of the Stanford parser.
24
+
25
+ How to run the module with Python
26
+ ---------------------------------
27
+
28
+ You can run this module from the command line using Python. The main script is core/stanford_parser_de.py. This script reads the KAF from the standard input
29
+ and writes the output to the standard output, generating some log information in the standard error output. To process one file just run:
30
+ ````shell
31
+ cat input.kaf | core/stanford_parser_de.py > input.tree
32
+ ````
33
+
34
+ This will read the KAF file in "input.kaf" and will store the constituent trees in "input.tree".
35
+
36
+
37
+ Contact
38
+ ------
39
+ * Ruben Izquierdo
40
+ * Vrije University of Amsterdam
41
+ * ruben.izquierdobevia@vu.nl
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/constituent_parsers/de'
4
+
5
+ kernel = Opener::ConstituentParsers::DE.new(:args => ARGV)
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,127 @@
1
+ from lxml import etree
2
+ from tree import Tree
3
+
4
+
5
+
6
+
7
+
8
+ list_t = []
9
+ list_nt = []
10
+ list_edge = []
11
+ cnt_t = cnt_nt = cnt_edge = 0
12
+
13
+ ##This function generates a "tree" xml element as defined in KAF from a string containing
14
+ ##the penntreebank format and a list of term ids to do the linking
15
+ '''
16
+ s = '(S (NP (DET The) (NN dog)) (VP (V ate) (NP (DET the) (NN cat))) (. .))'
17
+ ids = ['t0 t1','t2','t3','t4','t5','t6']
18
+ tree_node = create_constituency_layer(s, ids)
19
+ e = etree.ElementTree(element=tree_node)
20
+ e.write(sys.stdout,pretty_print=True)
21
+ '''
22
+ def convert_penn_to_kaf(tree_str,term_ids,logging,lemma_for_termid,off_t,off_nt,off_edge):
23
+ global list_t, list_nt,list_edge,cnt_t,cnt_nt,cnt_edge
24
+ list_t = []
25
+ list_nt = []
26
+ list_edge = []
27
+ cnt_t = off_t
28
+ cnt_nt = off_nt
29
+ cnt_edge = off_edge
30
+
31
+ this_tree = Tree(tree_str)
32
+ logging.debug('\n'+str(this_tree))
33
+
34
+
35
+ for num, token in enumerate(this_tree.leaves()):
36
+ position = this_tree.leaf_treeposition(num)
37
+ token_id = term_ids[num]
38
+ this_tree[position] = token_id
39
+ logging.debug('Matching '+token+' with term id='+token_id+' which according to KAF lemma='+str(lemma_for_termid.get(token_id).encode('utf-8')))
40
+
41
+
42
+ ##Creat the ROOT
43
+ create_extra_root = False
44
+ nt_id = None
45
+ if create_extra_root:
46
+ nt_id = 'nter'+str(cnt_nt)
47
+ cnt_nt +=1
48
+ list_nt.append((nt_id,'ROOT'))
49
+
50
+ visit_node(this_tree, nt_id)
51
+
52
+ root = etree.Element('tree')
53
+ nonter_heads = set()
54
+ #Nonter
55
+ labels_for_nt = {}
56
+ for nt_id, label in list_nt:
57
+ ##Checking the head
58
+ if len(label)>=2 and label[-1]=='H' and label[-2]=='=':
59
+ nonter_heads.add(nt_id)
60
+ label = label[:-2]
61
+ ele = etree.Element('nt', attrib={'id':nt_id,'label':label})
62
+ labels_for_nt[nt_id] = label
63
+ root.append(ele)
64
+
65
+ ## Terminals
66
+ lemma_for_ter = {}
67
+ for ter_id, span_ids in list_t:
68
+ ele = etree.Element('t',attrib={'id':ter_id})
69
+ span = etree.Element('span')
70
+ ele.append(span)
71
+ for termid in span_ids.split(' '):
72
+ target = etree.Element('target',attrib={'id':termid})
73
+ span.append(target)
74
+ lemma_for_ter[ter_id] = lemma_for_termid.get(termid,'unknown')
75
+ root.append(ele)
76
+
77
+ ##Edges
78
+ #for edge_id,node_to,node_from in list_edge:
79
+ for edge_id, node_from, node_to in list_edge:
80
+ ele = etree.Element('edge',attrib={'id':edge_id,'from':node_from,'to':node_to})
81
+
82
+ ## For the comment
83
+ ##Only non-ter
84
+ label_to = labels_for_nt.get(node_to)
85
+
86
+ ##Could be ter or nonter
87
+ label_from = labels_for_nt.get(node_from)
88
+ if label_from is None:
89
+ label_from = lemma_for_ter.get(node_from,'kk')
90
+
91
+ comment = ' '+(edge_id)+' '+(label_to)+' <- '+(label_from)+' '
92
+
93
+ if node_from in nonter_heads:
94
+ ele.set('head','yes')
95
+ root.append(etree.Comment(comment))
96
+ root.append(ele)
97
+
98
+ return root,cnt_t,cnt_nt,cnt_edge
99
+
100
+
101
+ def visit_node(node,id_parent=None):
102
+ global list_t, list_nt,list_edge,cnt_t,cnt_nt,cnt_edge
103
+ if isinstance(node,str): #is a terminal
104
+ ##Create the terminal
105
+ t_id = 'ter'+str(cnt_t)
106
+ cnt_t += 1
107
+ list_t.append((t_id,str(node)))
108
+
109
+ ##Create the edge with the parent
110
+ edge_id = 'tre'+str(cnt_edge)
111
+ cnt_edge +=1
112
+ list_edge.append((edge_id,t_id,id_parent))
113
+ else: #Is a non terminal
114
+ ##Create the nonterminal
115
+ nt_id = 'nter'+str(cnt_nt)
116
+ cnt_nt += 1
117
+ list_nt.append((nt_id,node.node))
118
+
119
+ ##Create the linking with the parent
120
+ if id_parent is not None:
121
+ edge_id = 'tre'+str(cnt_edge)
122
+ cnt_edge += 1
123
+ list_edge.append((edge_id,nt_id,id_parent))
124
+
125
+ ##Call to the child
126
+ for child in node:
127
+ visit_node(child,nt_id)
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 1.0
2
+ Name: VUKafParserPy
3
+ Version: 1.0
4
+ Summary: Library in python to parse kaf files
5
+ Home-page: UNKNOWN
6
+ Author: Ruben Izquierdo
7
+ Author-email: r.izquierdobevia@vu.nl
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
@@ -0,0 +1,7 @@
1
+ VUKafParserPy/KafDataObjectsMod.py
2
+ VUKafParserPy/KafParserMod.py
3
+ VUKafParserPy/__init__.py
4
+ VUKafParserPy.egg-info/PKG-INFO
5
+ VUKafParserPy.egg-info/SOURCES.txt
6
+ VUKafParserPy.egg-info/dependency_links.txt
7
+ VUKafParserPy.egg-info/top_level.txt
@@ -0,0 +1,11 @@
1
+ ../VUKafParserPy/KafParserMod.py
2
+ ../VUKafParserPy/__init__.py
3
+ ../VUKafParserPy/KafDataObjectsMod.py
4
+ ../VUKafParserPy/KafParserMod.pyc
5
+ ../VUKafParserPy/__init__.pyc
6
+ ../VUKafParserPy/KafDataObjectsMod.pyc
7
+ ./
8
+ top_level.txt
9
+ SOURCES.txt
10
+ PKG-INFO
11
+ dependency_links.txt
@@ -0,0 +1,161 @@
1
+ class KafTermSentiment:
2
+ def __init__(self):
3
+ self.resource=None
4
+ self.polarity=None
5
+ self.strength=None
6
+ self.subjectivity=None
7
+
8
+ def simpleInit(self,r,p,st,su,sm=None):
9
+ self.resource=r
10
+ self.polarity=p
11
+ self.strength=st
12
+ self.subjectivity=su
13
+ self.sentiment_modifier = sm
14
+
15
+ def getPolarity(self):
16
+ return self.polarity
17
+
18
+ def getSentimentModifier(self):
19
+ return self.sentiment_modifier
20
+
21
+
22
+ class KafToken:
23
+ def __init__(self,wid, value, sent=None, para=None):
24
+ self.token_id = wid
25
+ self.value = value
26
+ self.sent = sent
27
+ self.para = para
28
+
29
+
30
+ class KafOpinionExpression:
31
+ def __init__(self,polarity,strength,targets):
32
+ self.polarity = polarity
33
+ self.strength = strength
34
+ self.targets = targets
35
+
36
+ def __str__(self):
37
+ return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
38
+
39
+ class KafOpinion:
40
+ def __init__(self,id,holders, targets, opi_exp):
41
+ self.id = id
42
+ self.holders = holders
43
+ self.targets = targets
44
+ self.opi_exp = opi_exp
45
+
46
+ def __str__(self):
47
+ c='Opinion id'+self.id+'\n'
48
+ c+=' Holders: '+'-'.join(self.holders)+'\n'
49
+ c+=' Targets: '+'-'.join(self.targets)+'\n'
50
+ c+=str(self.opi_exp)
51
+ return c
52
+
53
+
54
+
55
+ class KafSingleProperty:
56
+ def __init__(self,id,type,targets):
57
+ self.id = id
58
+ self.type = type
59
+ self.targets = targets
60
+
61
+
62
+ def get_id(self):
63
+ return self.id
64
+
65
+ def get_type(self):
66
+ return self.type
67
+
68
+ def get_span(self):
69
+ return self.targets
70
+
71
+ def __str__(self):
72
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
73
+
74
+
75
+ class KafSingleEntity:
76
+ def __init__(self,id,type,targets):
77
+ self.id = id
78
+ self.type = type
79
+ self.targets = targets
80
+
81
+ def get_id(self):
82
+ return self.id
83
+
84
+ def get_type(self):
85
+ return self.type
86
+
87
+ def get_span(self):
88
+ return self.targets
89
+
90
+ def __str__(self):
91
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
92
+
93
+ class KafTerm:
94
+ def __init__(self):
95
+ self.tid = None
96
+ self.lemma = None
97
+ self.pos = None
98
+ self.sentiment = None
99
+ self.list_span_id = []
100
+
101
+ def set_list_span_id(self, L):
102
+ self.list_span_id = L
103
+
104
+ def get_list_span(self):
105
+ return self.list_span_id
106
+
107
+ def get_polarity(self):
108
+ if self.sentiment != None:
109
+ return self.sentiment.getPolarity()
110
+ else:
111
+ return None
112
+
113
+ def get_sentiment_modifier(self):
114
+ if self.sentiment != None:
115
+ return self.sentiment.getSentimentModifier()
116
+ else:
117
+ return None
118
+
119
+
120
+ def setSentiment(self,my_sent):
121
+ self.sentiment = my_sent
122
+
123
+ def getSentiment(self):
124
+ return self.sentiment
125
+
126
+ def getLemma(self):
127
+ return self.lemma
128
+
129
+ def setLemma(self,lemma):
130
+ self.lemma = lemma
131
+
132
+ def getPos(self):
133
+ return self.pos
134
+
135
+ def setPos(self,pos):
136
+ self.pos = pos
137
+
138
+ def getId(self):
139
+ return self.tid
140
+
141
+ def setId(self,id):
142
+ self.tid = id
143
+
144
+ def getShortPos(self):
145
+ if self.pos==None:
146
+ return None
147
+ auxpos=self.pos.lower()[0]
148
+ if auxpos == 'g': auxpos='a'
149
+ elif auxpos == 'a': auxpos='r'
150
+ return auxpos
151
+
152
+ def __str__(self):
153
+ if self.tid and self.lemma and self.pos:
154
+ return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
155
+ else:
156
+ return 'None'
157
+
158
+
159
+
160
+
161
+
@@ -0,0 +1,326 @@
1
+ ########################################################################
2
+ # 14 Jan 2013: added function add_attrs_to_layer
3
+ ########################################################################
4
+
5
+ ###################
6
+ # List of changes #
7
+ ###################
8
+ # 14 Jan 2013: added function add_attrs_to_layer
9
+ # 27 Feb 2013: added code for comply with DTD
10
+ # 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
11
+ # 18 Jun 2013: funcion add_property created for adding the properties to the KAF
12
+ from lxml import etree
13
+ from KafDataObjectsMod import *
14
+ import time
15
+
16
+ class KafParser:
17
+ def __init__(self,filename=None):
18
+ self.tree=None
19
+ self.__pathForToken={}
20
+
21
+ if filename:
22
+ self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
23
+ ## Do the text tokenization
24
+ self.__textTokenization()
25
+ else:
26
+ root = etree.Element('KAF')
27
+ root.set('version','v1.opener')
28
+ root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
29
+ self.tree = etree.ElementTree(element=root)
30
+
31
+ def __textTokenization(self):
32
+ for wf in self.tree.findall('text/wf'):
33
+ wid = wf.get('wid')
34
+ self.__pathForToken[wid] = self.tree.getpath(wf)
35
+
36
+
37
+ def getToken(self,tid):
38
+ path = self.__pathForToken[tid]
39
+ return self.tree.xpath(self.__pathForToken[tid])[0]
40
+
41
+
42
+ def getLanguage(self):
43
+ lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
44
+ return lang
45
+
46
+ def getTokens(self):
47
+ for element in self.tree.findall('text/wf'):
48
+ w_id = element.get('wid')
49
+ s_id = element.get('sent','0')
50
+ word = element.text
51
+ yield (word, s_id, w_id)
52
+
53
+ def getTerms(self):
54
+ if self.tree:
55
+ for element in self.tree.findall('terms/term'):
56
+ kafTermObj = KafTerm()
57
+ kafTermObj.setId(element.get('tid'))
58
+ kafTermObj.setLemma(element.get('lemma'))
59
+ kafTermObj.setPos(element.get('pos'))
60
+
61
+ ## Parsing sentiment
62
+ sentiment = element.find('sentiment')
63
+ if sentiment is not None:
64
+ resource = sentiment.get('resource','')
65
+ polarity = sentiment.get('polarity',None)
66
+ strength = sentiment.get('strength','')
67
+ subjectivity = sentiment.get('subjectivity','')
68
+ sentiment_modifier = sentiment.get('sentiment_modifier')
69
+
70
+ my_sent = KafTermSentiment()
71
+ my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
72
+ kafTermObj.setSentiment(my_sent)
73
+
74
+ ## Parsing the span
75
+ span = element.find('span')
76
+ if span is not None:
77
+ list_ids = [target.get('id') for target in span.findall('target')]
78
+ kafTermObj.set_list_span_id(list_ids)
79
+
80
+
81
+ yield kafTermObj
82
+ else:
83
+ return
84
+
85
+
86
+ def getSentimentTriples(self):
87
+ data = []
88
+ if self.tree:
89
+ for term_element in self.tree.findall('terms/term'):
90
+ lemma = term_element.get('lemma')
91
+ polarity = None
92
+ sentiment_modifier = None
93
+
94
+ sentiment_element = term_element.find('sentiment')
95
+ if sentiment_element is not None:
96
+ polarity = sentiment_element.get('polarity',None)
97
+ sentiment_modifier = sentiment_element.get('sentiment_modifier')
98
+ data.append( (lemma,polarity,sentiment_modifier))
99
+ return data
100
+
101
+
102
+
103
+ def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
104
+ if self.tree:
105
+ for element in self.tree.find('terms'):
106
+ if element.get('tid','')==termid:
107
+
108
+ #In case there is no pos info, we use the polarityPos
109
+ if not element.get('pos') and polarity_pos is not None:
110
+ element.set('pos',polarity_pos)
111
+ sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
112
+ element.append(sentEle)
113
+
114
+ def saveToFile(self,filename,myencoding='UTF-8'):
115
+ if self.tree:
116
+ self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
117
+
118
+
119
+ def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
120
+ aux = self.tree.findall('kafHeader')
121
+ if len(aux)!=0:
122
+ kaf_header = aux[0]
123
+ else:
124
+ kaf_header = etree.Element('kafHeader')
125
+ self.tree.getroot().insert(0,kaf_header)
126
+
127
+ ## Check if there is already element for the layer
128
+ my_lp_ele = None
129
+
130
+ for element in kaf_header.findall('linguisticProcessors'):
131
+ if element.get('layer','')==layer:
132
+ my_lp_ele = element
133
+ break
134
+
135
+ if time_stamp:
136
+ my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
137
+ else:
138
+ my_time = '*'
139
+
140
+ my_lp = etree.Element('lp')
141
+ my_lp.set('timestamp',my_time)
142
+ my_lp.set('version',version)
143
+ my_lp.set('name',name)
144
+
145
+ if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
146
+ my_lp_ele.append(my_lp)
147
+ else:
148
+ # Create a new element for the LP layer
149
+ my_lp_ele = etree.Element('linguisticProcessor')
150
+ my_lp_ele.set('layer',layer)
151
+ my_lp_ele.append(my_lp)
152
+ #my_lp_ele.tail=my_lp_ele.text='\n'
153
+ ## Should be inserted after the last linguisticProcessor element (stored in variable element)
154
+ idx = kaf_header.index(element)
155
+ kaf_header.insert(idx+1,my_lp_ele)
156
+
157
+
158
+ def addLayer(self,type,element,first_char_id=None):
159
+ if first_char_id is None:
160
+ first_char_id = type[0]
161
+
162
+ ## Check if there is already layer for the type
163
+ layer_element = self.tree.find(type)
164
+
165
+ if layer_element is None:
166
+ layer_element = etree.Element(type)
167
+ self.tree.getroot().append(layer_element)
168
+ ## The id is going to be the first one
169
+ new_id = first_char_id+'1'
170
+ else:
171
+ ## We need to know how many elements there are in the layer
172
+ current_n = len(layer_element.getchildren())
173
+ new_id = first_char_id+''+str(current_n+1)
174
+
175
+
176
+ ## In this point layer_element points to the correct element, existing or created
177
+
178
+ element.set(first_char_id+'id',new_id)
179
+ layer_element.append(element)
180
+ return new_id
181
+
182
+ def addElementToLayer(self,layer, element,first_char_id=None):
183
+ return self.addLayer(layer,element,first_char_id)
184
+
185
+ def add_attrs_to_layer(self,layer,attrs):
186
+ layer_element = self.tree.find(layer)
187
+ if layer_element is not None:
188
+ for att, val in attrs.items():
189
+ layer_element.set(att,val)
190
+
191
+
192
+ def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
193
+ for element in self.tree.findall(path):
194
+ if id is not None and element.get(str_id,None) == id:
195
+ if sub_path is not None:
196
+ elements = element.findall(sub_path)
197
+ if len(elements)!=0: element = elements[0]
198
+ element.set(attribute,value)
199
+ return
200
+
201
+
202
+ ## This works with the original definition of the property layer
203
+ ## KAF -> properties -> property* -> span* -> target*
204
+ def getSingleProperties_old(self):
205
+ for element in self.tree.findall('properties/property'):
206
+ my_id = element.get('pid')
207
+ my_type = element.get('type')
208
+ ref = element.find('references')
209
+ if ref is not None:
210
+ element = ref
211
+ for span_element in element.findall('span'):
212
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
213
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
214
+ yield my_prop
215
+
216
+ ## 18-June-2013
217
+ def getSingleProperties(self):
218
+ for property in self.tree.findall('features/properties/property'):
219
+ my_id = property.get('pid')
220
+ if my_id is None:
221
+ my_id = property.get('fpid')
222
+ my_type = property.get('lemma')
223
+ for span_element in property.findall('references/span'):
224
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
225
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
226
+ yield my_prop
227
+
228
+ # This function adds a new property of the type given with the list of ids given
229
+ # my_type -> 'sleeping comfort' list_ids = ['id1','id2']
230
+ # It creates the features/properties layers in case
231
+ # Agglomerates all the properties for the same TYPE under the same property element
232
+ # It calculates automatically the number for the identifier depending on the number
233
+ # of properties existing
234
+ def add_property(self,my_type,list_ids):
235
+
236
+ #Looking for feature layer or creating it
237
+ feature_layer = self.tree.find('features')
238
+ if feature_layer is None:
239
+ feature_layer = etree.Element('features')
240
+ self.tree.getroot().append(feature_layer)
241
+
242
+ #Looking for properties layer
243
+ properties_layer = feature_layer.find('properties')
244
+ if properties_layer is None:
245
+ properties_layer = etree.Element('properties')
246
+ feature_layer.append(properties_layer)
247
+
248
+ num_props = 0
249
+ property_layer = None
250
+ for property in properties_layer.findall('property'):
251
+ num_props += 1
252
+ prop_type = property.get('lemma')
253
+ if prop_type == my_type:
254
+ property_layer = property
255
+ break
256
+
257
+ if property_layer is None: # There is no any property for that type, let's create one
258
+ property_layer = etree.Element('property')
259
+ property_layer.set('pid','p'+str(num_props+1))
260
+ property_layer.set('lemma',my_type)
261
+ properties_layer.append(property_layer)
262
+
263
+
264
+ references = property_layer.find('references')
265
+ if references is None:
266
+ references = etree.Element('references')
267
+ property_layer.append(references)
268
+ ## Create the new span
269
+ span = etree.Element('span')
270
+ references.append(span)
271
+ for my_id in list_ids:
272
+ span.append(etree.Element('target',attrib={'id':my_id}))
273
+
274
+
275
+
276
+
277
+ def getSingleEntities(self):
278
+ for element in self.tree.findall('entities/entity'):
279
+ my_id = element.get('eid')
280
+ my_type = element.get('type')
281
+ my_path_to_span = None
282
+ ref = element.find('references')
283
+ if ref is not None:
284
+ my_path_to_span = 'references/span'
285
+ else:
286
+ my_path_to_span = 'span'
287
+
288
+ for span_element in element.findall(my_path_to_span):
289
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
290
+ my_prop = KafSingleEntity(my_id,my_type,target_ids)
291
+ yield my_prop
292
+
293
+
294
+ def getOpinions(self):
295
+ for element in self.tree.findall('opinions/opinion'):
296
+ my_id = element.get('oid')
297
+
298
+ tar_ids_hol = []
299
+ tar_ids_tar = []
300
+ polarity = strenght = ''
301
+ tar_ids_exp = []
302
+
303
+ #Holder
304
+ opi_hol_eles = element.findall('opinion_holder')
305
+ if len(opi_hol_eles)!=0:
306
+ opi_hol_ele = opi_hol_eles[0]
307
+ tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
308
+
309
+ #Target
310
+ opi_tar_eles = element.findall('opinion_target')
311
+ if len(opi_tar_eles) != 0:
312
+ opi_tar_ele = opi_tar_eles[0]
313
+ tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
314
+
315
+ ## Opinion expression
316
+ opi_exp_eles = element.findall('opinion_expression')
317
+ if len(opi_exp_eles) != 0:
318
+ opi_exp_ele = opi_exp_eles[0]
319
+ polarity = opi_exp_ele.get('polarity','')
320
+ strength = opi_exp_ele.get('strength','')
321
+ tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
322
+
323
+ yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
324
+
325
+
326
+