opener-coreference-base 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 91b868225cc8ddadc6790ae9ff424de0d5e13591
4
+ data.tar.gz: 7d2944067210631815353e6e9520521309996a23
5
+ SHA512:
6
+ metadata.gz: bb964bf0f8ec83884b1876785285c42827422aee9e65a917518d37ef96386b8b09c33e47d1fb106b2f1558b19a33d1f0cb8c8bc383a68b59e8d3e86e4ae8997a
7
+ data.tar.gz: e4585ebcf72ec6992853893daf37103b1960364ba0952e97ac3fb63b816a1c2a0c75ba0766f50a351cc8bb92e85d36b5b8df8b3e0c829cef54ec7e5e61fa3b87
@@ -0,0 +1,97 @@
1
+ [![Build Status](https://drone.io/github.com/opener-project/coreference-base/status.png)](https://drone.io/github.com/opener-project/coreference-base/latest)
2
+
3
+ # Coreference
4
+
5
+ This Gem provides coreference resolution for various languages such as English
6
+ and Spanish.
7
+
8
+ The CorefGraph-en module provides an implementation of the Multi-Sieve Pass
9
+ system for for Coreference Resolution system originally proposed by the
10
+ Stanford NLP Group (Raghunathan et al., 2010; Lee et al., 2011) and (Lee et
11
+ al., 2013). This system proposes a number of deterministic passes, ranging
12
+ from high precision to higher recall, each dealing with a different manner in
13
+ which coreference manifests itself in running text.
14
+
15
+ Although more sieves are available, in order to facilitate the integration of
16
+ the coreference system for the 6 languages of OpeNER we have included here 4
17
+ sieves: Exact String Matching, Precise Constructs, Strict Head Match and
18
+ Pronoun Match (the sieve nomenclature follows Lee et al (2013)). Furthermore,
19
+ as it has been reported, this sieves are responsible for most of the
20
+ performance in the Stanford system.
21
+
22
+ The implementation is a result of a collaboration between the IXA NLP
23
+ (http://ixa.si.ehu.es) and LinguaMedia Groups (http://linguamedia.deusto.es).
24
+
25
+ ## Requirements
26
+
27
+ * Ruby 1.9.2 or newer
28
+ * Python 2.7 or newer
29
+ * Pip 1.3.1 or newer
30
+
31
+ ## Installation
32
+
33
+ Installing as a regular Gem:
34
+
35
+ gem install opener-coreference-base
36
+
37
+ Using Bundler:
38
+
39
+ gem 'opener-coreference-base',
40
+ :git => 'git@github.com:opener-project/coreference-base.git',
41
+ :branch => 'master'
42
+
43
+ Using specific install:
44
+
45
+ gem install specific_install
46
+ gem specific_install opener-coreference-base \
47
+ -l https://github.com/opener-project/coreference-base.git
48
+
49
+ ## Usage
50
+
51
+ To run the program execute:
52
+
53
+ coreference-base -l (de|en|es|fr|it|nl) -i input.kaf
54
+
55
+ Corefgraph will output KAF via standard output with the <coreference> clusters
56
+ added to the KAF input received. Note that for the full functionality of
57
+ CorefGraph you will need to provide the <constituents> elements with the heads of (at
58
+ least) the Noun Phrases marked, as it can be seen in the treebank input
59
+ examples in the resource/examples directory. If you do not provide heads, only
60
+ Exact String Match will work properly, whereas Precise Constructs, Strict Head
61
+ Match and Pronoun Match will not.
62
+
63
+ For a full explanation of how the Multi Sieve Pass system works see
64
+ documentation in resources/doc.
65
+
66
+ # Adapting CorefGraph-en to your language
67
+
68
+ There are a number of changes needed to be made to make CorefGraph works for
69
+ other languages. Although we have try to keep the language dependent features
70
+ to a minimum, you will still need to create some dictionaries for your own
71
+ language and make some very minor changes in the code. Here is the list of very
72
+ file in the Corefgraph module that needs to be changed. Every change except
73
+ one (see below) to be done in the **$project/core/corefgraph/resources** directory:
74
+
75
+ * dictionaries/$lang\_determiners.py
76
+ * dictionaries/$lang\_pronouns.py
77
+ * dictionaries/$lang\_verbs.py
78
+ * dictionaries/$lang\_stopwords.py
79
+ * dictionaries/$lang\_temporals.py
80
+
81
+ * tagset/$TAGSETNAME\_pos.py
82
+ * tagset/$TAGSETNAME\_constituent.py
83
+
84
+ * files/animate/$lang.animate.txt
85
+ * files/animate/$lang.inanimate.txt
86
+
87
+ * files/demonym/$lang.txt
88
+
89
+ * files/gender/$lang.male.unigrams.txt
90
+ * files/gender/$lang.female.unigrams.txt
91
+ * files/gender/$lang.neutral.unigrams.txt
92
+ * files/gender/$lang.namegender.combine.txt
93
+ * files/gender/$lang.gender.data
94
+
95
+ * files/number/$lang.plural.unigrams.txt
96
+ * files/number/$lang.singular.unigrams.txt
97
+
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/coreferences/base'
4
+
5
+ kernel = Opener::Coreferences::Base.new(:args => ARGV, :language => 'de')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/coreferences/base'
4
+
5
+ kernel = Opener::Coreferences::Base.new(:args => ARGV, :language => 'en')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/coreferences/base'
4
+
5
+ kernel = Opener::Coreferences::Base.new(:args => ARGV, :language => 'es')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/coreferences/base'
4
+
5
+ kernel = Opener::Coreferences::Base.new(:args => ARGV, :language => 'fr')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/coreferences/base'
4
+
5
+ kernel = Opener::Coreferences::Base.new(:args => ARGV, :language => 'it')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require_relative '../lib/opener/coreferences/base'
4
+
5
+ kernel = Opener::Coreferences::Base.new(:args => ARGV, :language => 'nl')
6
+ input = STDIN.tty? ? nil : STDIN.read
7
+
8
+ kernel.run!(input)
@@ -0,0 +1,10 @@
1
+ Metadata-Version: 1.0
2
+ Name: VUKafParserPy
3
+ Version: 1.0
4
+ Summary: Library in python to parse kaf files
5
+ Home-page: UNKNOWN
6
+ Author: Ruben Izquierdo
7
+ Author-email: r.izquierdobevia@vu.nl
8
+ License: UNKNOWN
9
+ Description: UNKNOWN
10
+ Platform: UNKNOWN
@@ -0,0 +1,7 @@
1
+ VUKafParserPy/KafDataObjectsMod.py
2
+ VUKafParserPy/KafParserMod.py
3
+ VUKafParserPy/__init__.py
4
+ VUKafParserPy.egg-info/PKG-INFO
5
+ VUKafParserPy.egg-info/SOURCES.txt
6
+ VUKafParserPy.egg-info/dependency_links.txt
7
+ VUKafParserPy.egg-info/top_level.txt
@@ -0,0 +1,11 @@
1
+ ../VUKafParserPy/KafParserMod.py
2
+ ../VUKafParserPy/__init__.py
3
+ ../VUKafParserPy/KafDataObjectsMod.py
4
+ ../VUKafParserPy/KafParserMod.pyc
5
+ ../VUKafParserPy/__init__.pyc
6
+ ../VUKafParserPy/KafDataObjectsMod.pyc
7
+ ./
8
+ top_level.txt
9
+ SOURCES.txt
10
+ PKG-INFO
11
+ dependency_links.txt
@@ -0,0 +1,165 @@
1
+ class KafTermSentiment:
2
+ def __init__(self):
3
+ self.resource=None
4
+ self.polarity=None
5
+ self.strength=None
6
+ self.subjectivity=None
7
+
8
+ def simpleInit(self,r,p,st,su,sm=None):
9
+ self.resource=r
10
+ self.polarity=p
11
+ self.strength=st
12
+ self.subjectivity=su
13
+ self.sentiment_modifier = sm
14
+
15
+ def getPolarity(self):
16
+ return self.polarity
17
+
18
+ def getSentimentModifier(self):
19
+ return self.sentiment_modifier
20
+
21
+
22
+ class KafToken:
23
+ def __init__(self,wid, value, sent=None, para=None):
24
+ self.token_id = wid
25
+ self.value = value
26
+ self.sent = sent
27
+ self.para = para
28
+
29
+
30
+ class KafOpinionExpression:
31
+ def __init__(self,polarity,strength,targets):
32
+ self.polarity = polarity
33
+ self.strength = strength
34
+ self.targets = targets
35
+
36
+ def __str__(self):
37
+ return 'Op_exp==> pol:'+self.polarity+' Str:'+self.strength+' ids:'+'-'.join(self.targets)
38
+
39
+ class KafOpinion:
40
+ def __init__(self,id,holders, targets, opi_exp):
41
+ self.id = id
42
+ self.holders = holders
43
+ self.targets = targets
44
+ self.opi_exp = opi_exp
45
+
46
+ def __str__(self):
47
+ c='Opinion id'+self.id+'\n'
48
+ c+=' Holders: '+'-'.join(self.holders)+'\n'
49
+ c+=' Targets: '+'-'.join(self.targets)+'\n'
50
+ c+=str(self.opi_exp)
51
+ return c
52
+
53
+
54
+
55
+ class KafSingleProperty:
56
+ def __init__(self,id,type,targets):
57
+ self.id = id
58
+ self.type = type
59
+ self.targets = targets
60
+
61
+
62
+ def get_id(self):
63
+ return self.id
64
+
65
+ def get_type(self):
66
+ return self.type
67
+
68
+ def get_span(self):
69
+ return self.targets
70
+
71
+ def __str__(self):
72
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
73
+
74
+
75
+ class KafSingleEntity:
76
+ def __init__(self,id,type,targets):
77
+ self.id = id
78
+ self.type = type
79
+ self.targets = targets
80
+
81
+ def get_id(self):
82
+ return self.id
83
+
84
+ def get_type(self):
85
+ return self.type
86
+
87
+ def get_span(self):
88
+ return self.targets
89
+
90
+ def __str__(self):
91
+ return 'Id: '+self.id+' Type: '+self.type+' ids:'+' '.join(self.targets)
92
+
93
+ class KafTerm:
94
+ def __init__(self):
95
+ self.tid = None
96
+ self.lemma = None
97
+ self.pos = None
98
+ self.morphofeat = None
99
+ self.sentiment = None
100
+ self.list_span_id = []
101
+
102
+ def get_morphofeat(self):
103
+ return self.morphofeat
104
+
105
+ def set_list_span_id(self, L):
106
+ self.list_span_id = L
107
+
108
+ def get_list_span(self):
109
+ return self.list_span_id
110
+
111
+ def get_polarity(self):
112
+ if self.sentiment != None:
113
+ return self.sentiment.getPolarity()
114
+ else:
115
+ return None
116
+
117
+ def get_sentiment_modifier(self):
118
+ if self.sentiment != None:
119
+ return self.sentiment.getSentimentModifier()
120
+ else:
121
+ return None
122
+
123
+
124
+ def setSentiment(self,my_sent):
125
+ self.sentiment = my_sent
126
+
127
+ def getSentiment(self):
128
+ return self.sentiment
129
+
130
+ def getLemma(self):
131
+ return self.lemma
132
+
133
+ def setLemma(self,lemma):
134
+ self.lemma = lemma
135
+
136
+ def getPos(self):
137
+ return self.pos
138
+
139
+ def setPos(self,pos):
140
+ self.pos = pos
141
+
142
+ def getId(self):
143
+ return self.tid
144
+
145
+ def setId(self,id):
146
+ self.tid = id
147
+
148
+ def getShortPos(self):
149
+ if self.pos==None:
150
+ return None
151
+ auxpos=self.pos.lower()[0]
152
+ if auxpos == 'g': auxpos='a'
153
+ elif auxpos == 'a': auxpos='r'
154
+ return auxpos
155
+
156
+ def __str__(self):
157
+ if self.tid and self.lemma and self.pos:
158
+ return self.tid+'\n\t'+self.lemma.encode('utf-8')+'\n\t'+self.pos
159
+ else:
160
+ return 'None'
161
+
162
+
163
+
164
+
165
+
@@ -0,0 +1,439 @@
1
+ ########################################################################
2
+ # 14 Jan 2013: added function add_attrs_to_layer
3
+ ########################################################################
4
+
5
+ ###################
6
+ # List of changes #
7
+ ###################
8
+ # 14 Jan 2013: added function add_attrs_to_layer
9
+ # 27 Feb 2013: added code for comply with DTD
10
+ # 18 Jun 2013: getSingleProperties adapted to the structure KAF/features/properties/property/references/span/target
11
+ # 18 Jun 2013: funcion add_property created for adding the properties to the KAF
12
+
13
+
14
+ from lxml import etree
15
+ from KafDataObjectsMod import *
16
+ import time
17
+
18
+ class KafParser:
19
+ def __init__(self,filename=None):
20
+ self.tree=None
21
+ self.__pathForToken={}
22
+ self.__term_ids_for_token_id = None
23
+
24
+ if filename:
25
+ #self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True))
26
+ self.tree = etree.parse(filename,etree.XMLParser(remove_blank_text=True, strip_cdata=False))
27
+ ## Do the text tokenization
28
+ self.__textTokenization()
29
+ else:
30
+ root = etree.Element('KAF')
31
+ root.set('version','v1.opener')
32
+ root.set('{http://www.w3.org/XML/1998/namespace}lang','en')
33
+ self.tree = etree.ElementTree(element=root)
34
+
35
+ def __textTokenization(self):
36
+ for wf in self.tree.findall('text/wf'):
37
+ wid = wf.get('wid')
38
+ self.__pathForToken[wid] = self.tree.getpath(wf)
39
+
40
+
41
+ def getToken(self,tid):
42
+ if tid in self.__pathForToken:
43
+ path = self.__pathForToken[tid]
44
+ return self.tree.xpath(self.__pathForToken[tid])[0]
45
+ return None
46
+
47
+
48
+ def getLanguage(self):
49
+ lang = self.tree.getroot().get('{http://www.w3.org/XML/1998/namespace}lang','nl')
50
+ return lang
51
+
52
+ ## Return a list of (sentence_id, TOKENS) where tokens is a list of (token_id,token)
53
+ ## [(s_id1, T1), (sent_id2, T2)....]
54
+ ## T1 --> [(tokenid, token), (tokenid2,token2)....]
55
+ def get_tokens_in_sentences(self):
56
+ sents = []
57
+ current = []
58
+ previous_sent = None
59
+ for element in self.tree.findall('text/wf'):
60
+ w_id = element.get('wid')
61
+ s_id = element.get('sent')
62
+ word = element.text
63
+
64
+ if previous_sent is not None and s_id != previous_sent:
65
+ sents.append((previous_sent,current))
66
+ current = []
67
+ current.append((w_id,word))
68
+ previous_sent = s_id
69
+ ####
70
+ sents.append((s_id,current))
71
+ return sents
72
+
73
+ def get_term_ids_for_token_id(self,tok_id):
74
+ if self.__term_ids_for_token_id is None:
75
+ self.__term_ids_for_token_id = {}
76
+ for element in self.tree.findall('terms/term'):
77
+ term_id = element.get('tid')
78
+ for target in element.findall('span/target'):
79
+ token_id = target.get('id')
80
+ if token_id not in self.__term_ids_for_token_id:
81
+ self.__term_ids_for_token_id[token_id] = [term_id]
82
+ else:
83
+ self.__term_ids_for_token_id[token_id].append(term_id)
84
+ return self.__term_ids_for_token_id.get(tok_id,[])
85
+
86
+
87
+
88
+ def getTokens(self):
89
+ for element in self.tree.findall('text/wf'):
90
+ w_id = element.get('wid')
91
+ s_id = element.get('sent','0')
92
+ word = element.text
93
+ yield (word, s_id, w_id)
94
+
95
+
96
+
97
+ def getTerms(self):
98
+ if self.tree:
99
+ for element in self.tree.findall('terms/term'):
100
+ kafTermObj = KafTerm()
101
+ kafTermObj.setId(element.get('tid'))
102
+ kafTermObj.setLemma(element.get('lemma'))
103
+ kafTermObj.setPos(element.get('pos'))
104
+ kafTermObj.morphofeat = element.get('morphofeat')
105
+
106
+ ## Parsing sentiment
107
+ sentiment = element.find('sentiment')
108
+ if sentiment is not None:
109
+ resource = sentiment.get('resource','')
110
+ polarity = sentiment.get('polarity',None)
111
+ strength = sentiment.get('strength','')
112
+ subjectivity = sentiment.get('subjectivity','')
113
+ sentiment_modifier = sentiment.get('sentiment_modifier')
114
+
115
+ my_sent = KafTermSentiment()
116
+ my_sent.simpleInit(resource,polarity,strength,subjectivity,sentiment_modifier)
117
+ kafTermObj.setSentiment(my_sent)
118
+
119
+ ## Parsing the span
120
+ span = element.find('span')
121
+ if span is not None:
122
+ list_ids = [target.get('id') for target in span.findall('target')]
123
+ kafTermObj.set_list_span_id(list_ids)
124
+
125
+
126
+ yield kafTermObj
127
+ else:
128
+ return
129
+
130
+
131
+ def getSentimentTriples(self):
132
+ data = []
133
+ if self.tree:
134
+ for term_element in self.tree.findall('terms/term'):
135
+ lemma = term_element.get('lemma')
136
+ polarity = None
137
+ sentiment_modifier = None
138
+
139
+ sentiment_element = term_element.find('sentiment')
140
+ if sentiment_element is not None:
141
+ polarity = sentiment_element.get('polarity',None)
142
+ sentiment_modifier = sentiment_element.get('sentiment_modifier')
143
+ data.append( (lemma,polarity,sentiment_modifier))
144
+ return data
145
+
146
+
147
+
148
+ def addPolarityToTerm(self,termid,my_sentiment_attribs,polarity_pos=None):
149
+ if self.tree:
150
+ for element in self.tree.find('terms'):
151
+ if element.get('tid','')==termid:
152
+
153
+ #In case there is no pos info, we use the polarityPos
154
+ if not element.get('pos') and polarity_pos is not None:
155
+ element.set('pos',polarity_pos)
156
+ sentEle = etree.Element('sentiment',attrib=my_sentiment_attribs)
157
+ element.append(sentEle)
158
+
159
+ def saveToFile(self,filename,myencoding='UTF-8'):
160
+ if self.tree:
161
+ self.tree.write(filename,encoding=myencoding,pretty_print=True,xml_declaration=True)
162
+
163
+
164
+ def addLinguisticProcessor(self,name,version, layer, time_stamp=True):
165
+ aux = self.tree.findall('kafHeader')
166
+ if len(aux)!=0:
167
+ kaf_header = aux[0]
168
+ else:
169
+ kaf_header = etree.Element('kafHeader')
170
+ self.tree.getroot().insert(0,kaf_header)
171
+
172
+ aux2= kaf_header.findall('linguisticProcessors')
173
+ if len(aux2) == 0:
174
+ new_lp = etree.Element('linguisticProcessors')
175
+ new_lp.set('layer',layer)
176
+ kaf_header.append(new_lp)
177
+
178
+ ## Check if there is already element for the layer
179
+ my_lp_ele = None
180
+
181
+ for element in kaf_header.findall('linguisticProcessors'):
182
+ if element.get('layer','')==layer:
183
+ my_lp_ele = element
184
+ break
185
+
186
+ if time_stamp:
187
+ my_time = time.strftime('%Y-%m-%dT%H:%M:%S%Z')
188
+ else:
189
+ my_time = '*'
190
+
191
+ my_lp = etree.Element('lp')
192
+ my_lp.set('timestamp',my_time)
193
+ my_lp.set('version',version)
194
+ my_lp.set('name',name)
195
+
196
+ if my_lp_ele is not None: #Already an element for linguisticProcessor with the layer
197
+ my_lp_ele.append(my_lp)
198
+ else:
199
+ # Create a new element for the LP layer
200
+ my_lp_ele = etree.Element('linguisticProcessors')
201
+ my_lp_ele.set('layer',layer)
202
+ my_lp_ele.append(my_lp)
203
+ #my_lp_ele.tail=my_lp_ele.text='\n'
204
+ ## Should be inserted after the last linguisticProcessor element (stored in variable element)
205
+ idx = kaf_header.index(element)
206
+ kaf_header.insert(idx+1,my_lp_ele)
207
+
208
+
209
+ def addLayer(self,type,element,first_char_id=None):
210
+ if first_char_id is None:
211
+ first_char_id = type[0]
212
+
213
+ ## Check if there is already layer for the type
214
+ layer_element = self.tree.find(type)
215
+
216
+ if layer_element is None:
217
+ layer_element = etree.Element(type)
218
+ self.tree.getroot().append(layer_element)
219
+ ## The id is going to be the first one
220
+ new_id = first_char_id+'1'
221
+ else:
222
+ ## We need to know how many elements there are in the layer
223
+ current_n = len(layer_element.getchildren())
224
+ new_id = first_char_id+''+str(current_n+1)
225
+
226
+
227
+ ## In this point layer_element points to the correct element, existing or created
228
+
229
+ element.set(first_char_id+'id',new_id)
230
+ layer_element.append(element)
231
+ return new_id
232
+
233
+ def addElementToLayer(self,layer, element,first_char_id=None):
234
+ return self.addLayer(layer,element,first_char_id)
235
+
236
+ def add_attrs_to_layer(self,layer,attrs):
237
+ layer_element = self.tree.find(layer)
238
+ if layer_element is not None:
239
+ for att, val in attrs.items():
240
+ layer_element.set(att,val)
241
+
242
+
243
+ def addAttributeToElement(self,path,str_id, id, attribute, value,sub_path=None):
244
+ for element in self.tree.findall(path):
245
+ if id is not None and element.get(str_id,None) == id:
246
+ if sub_path is not None:
247
+ elements = element.findall(sub_path)
248
+ if len(elements)!=0: element = elements[0]
249
+ element.set(attribute,value)
250
+ return
251
+
252
+
253
+ ## This works with the original definition of the property layer
254
+ ## KAF -> properties -> property* -> span* -> target*
255
+ def getSingleProperties_old(self):
256
+ for element in self.tree.findall('properties/property'):
257
+ my_id = element.get('pid')
258
+ my_type = element.get('type')
259
+ ref = element.find('references')
260
+ if ref is not None:
261
+ element = ref
262
+ for span_element in element.findall('span'):
263
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
264
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
265
+ yield my_prop
266
+
267
+ ## 18-June-2013
268
+ def getSingleProperties(self):
269
+ for property in self.tree.findall('features/properties/property'):
270
+ my_id = property.get('pid')
271
+ if my_id is None:
272
+ my_id = property.get('fpid')
273
+ my_type = property.get('lemma')
274
+ for span_element in property.findall('references/span'):
275
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
276
+ my_prop = KafSingleProperty(my_id,my_type,target_ids)
277
+ yield my_prop
278
+
279
+ # This function adds a new property of the type given with the list of ids given
280
+ # my_type -> 'sleeping comfort' list_ids = ['id1','id2']
281
+ # It creates the features/properties layers in case
282
+ # Agglomerates all the properties for the same TYPE under the same property element
283
+ # It calculates automatically the number for the identifier depending on the number
284
+ # of properties existing
285
+ def add_property(self,my_type,list_ids,comment=None):
286
+
287
+ #Looking for feature layer or creating it
288
+ feature_layer = self.tree.find('features')
289
+ if feature_layer is None:
290
+ feature_layer = etree.Element('features')
291
+ self.tree.getroot().append(feature_layer)
292
+
293
+ #Looking for properties layer
294
+ properties_layer = feature_layer.find('properties')
295
+ if properties_layer is None:
296
+ properties_layer = etree.Element('properties')
297
+ feature_layer.append(properties_layer)
298
+
299
+ num_props = 0
300
+ property_layer = None
301
+ for property in properties_layer.findall('property'):
302
+ num_props += 1
303
+ prop_type = property.get('lemma')
304
+ if prop_type == my_type:
305
+ property_layer = property
306
+ break
307
+
308
+ if property_layer is None: # There is no any property for that type, let's create one
309
+ property_layer = etree.Element('property')
310
+ property_layer.set('pid','p'+str(num_props+1))
311
+ property_layer.set('lemma',my_type)
312
+ properties_layer.append(property_layer)
313
+
314
+
315
+ references = property_layer.find('references')
316
+ if references is None:
317
+ references = etree.Element('references')
318
+ property_layer.append(references)
319
+ ## Create the new span
320
+ if comment is not None:
321
+ references.append(etree.Comment(comment))
322
+ span = etree.Element('span')
323
+ references.append(span)
324
+ for my_id in list_ids:
325
+ span.append(etree.Element('target',attrib={'id':my_id}))
326
+
327
+
328
+
329
+
330
+ def getSingleEntities(self):
331
+ for element in self.tree.findall('entities/entity'):
332
+ my_id = element.get('eid')
333
+ my_type = element.get('type')
334
+ my_path_to_span = None
335
+ ref = element.find('references')
336
+ if ref is not None:
337
+ my_path_to_span = 'references/span'
338
+ else:
339
+ my_path_to_span = 'span'
340
+
341
+ for span_element in element.findall(my_path_to_span):
342
+ target_ids = [target_element.get('id') for target_element in span_element.findall('target')]
343
+ my_prop = KafSingleEntity(my_id,my_type,target_ids)
344
+ yield my_prop
345
+
346
+
347
+ def getOpinions(self):
348
+ for element in self.tree.findall('opinions/opinion'):
349
+ my_id = element.get('oid')
350
+
351
+ tar_ids_hol = []
352
+ tar_ids_tar = []
353
+ polarity = strenght = ''
354
+ tar_ids_exp = []
355
+
356
+ #Holder
357
+ opi_hol_eles = element.findall('opinion_holder')
358
+ if len(opi_hol_eles)!=0:
359
+ opi_hol_ele = opi_hol_eles[0]
360
+ tar_ids_hol = [t_ele.get('id') for t_ele in opi_hol_ele.findall('span/target')]
361
+
362
+ #Target
363
+ opi_tar_eles = element.findall('opinion_target')
364
+ if len(opi_tar_eles) != 0:
365
+ opi_tar_ele = opi_tar_eles[0]
366
+ tar_ids_tar = [t_ele.get('id') for t_ele in opi_tar_ele.findall('span/target')]
367
+
368
+ ## Opinion expression
369
+ opi_exp_eles = element.findall('opinion_expression')
370
+ if len(opi_exp_eles) != 0:
371
+ opi_exp_ele = opi_exp_eles[0]
372
+ polarity = opi_exp_ele.get('polarity','')
373
+ strength = opi_exp_ele.get('strength','')
374
+ tar_ids_exp = [t_ele.get('id') for t_ele in opi_exp_ele.findall('span/target')]
375
+
376
+ yield KafOpinion(my_id,tar_ids_hol, tar_ids_tar, KafOpinionExpression(polarity, strength,tar_ids_exp))
377
+
378
+
379
+
380
+ def remove_opinion_layer(self):
381
+ opinion_layer = self.tree.find('opinions')
382
+ if opinion_layer is not None:
383
+ self.tree.getroot().remove(opinion_layer)
384
+
385
+ ## This function add an opinion to the opinion layer, creating it if does not exist
386
+ ## The id is calculated automatically according to the number of elements and ensring there is no repetition
387
+ def add_opinion(self,hol_ids,tar_ids,polarity,strength,exp_ids):
388
+
389
+ #Looking for opinion layer or creating it
390
+ opinion_layer = self.tree.find('opinions')
391
+ if opinion_layer is None:
392
+ opinion_layer = etree.Element('opinions')
393
+ self.tree.getroot().append(opinion_layer)
394
+
395
+ ## Generating unique id
396
+ list_of_oids = [opi.get('oid') for opi in opinion_layer]
397
+
398
+ n = 1
399
+ while True:
400
+ my_id = 'o'+str(n)
401
+ if my_id not in list_of_oids:
402
+ break
403
+ n += 1
404
+ #####
405
+
406
+ op_ele = etree.Element('opinion')
407
+ opinion_layer.append(op_ele)
408
+ op_ele.set('oid',my_id)
409
+
410
+ ## Holder
411
+ op_hol = etree.Element('opinion_holder')
412
+ op_ele.append(op_hol)
413
+ span_op_hol = etree.Element('span')
414
+ op_hol.append(span_op_hol)
415
+ for my_id in hol_ids:
416
+ span_op_hol.append(etree.Element('target',attrib={'id':my_id}))
417
+
418
+ ## TARGET
419
+ op_tar = etree.Element('opinion_target')
420
+ op_ele.append(op_tar)
421
+ span_op_tar = etree.Element('span')
422
+ op_tar.append(span_op_tar)
423
+ for my_id in tar_ids:
424
+ span_op_tar.append(etree.Element('target',attrib={'id':my_id}))
425
+
426
+ ## Expression
427
+
428
+ op_exp = etree.Element('opinion_expression',attrib={'polarity':polarity,
429
+ 'strength':str(strength)})
430
+ op_ele.append(op_exp)
431
+ span_exp = etree.Element('span')
432
+ op_exp.append(span_exp)
433
+ for my_id in exp_ids:
434
+ span_exp.append(etree.Element('target',attrib={'id':my_id}))
435
+
436
+
437
+
438
+
439
+
@@ -0,0 +1,7 @@
1
+ ## version = 0.2
2
+ ## Added timestamp to function addLinguisitcProcessor
3
+ ## 24-april-2013 --> getSingleEntieies and getSingleProperties reads both entities/props in format
4
+ ## entities -> entity -> span -> target and entities -> entity -> references -> span
5
+ ####
6
+
7
+ from KafParserMod import KafParser
@@ -0,0 +1,16 @@
1
+ require 'rake'
2
+ require_relative 'support'
3
+
4
+ desc 'Verifies the requirements'
5
+ task :requirements do
6
+ verify_requirements
7
+ require_executable("make")
8
+ end
9
+
10
+ task :compile => :requirements
11
+
12
+ task :default => [:compile] do
13
+ path = File.join(PYTHON_SITE_PACKAGES, 'pre_install')
14
+
15
+ pip_install(PRE_INSTALL_REQUIREMENTS, path)
16
+ end
@@ -0,0 +1,38 @@
1
+ require 'opener/build-tools'
2
+
3
+ include Opener::BuildTools::Requirements
4
+ include Opener::BuildTools::Python
5
+ include Opener::BuildTools::Files
6
+
7
+ # Directory where packages will be installed to.
8
+ PYTHON_SITE_PACKAGES = File.expand_path(
9
+ '../../../core/site-packages',
10
+ __FILE__
11
+ )
12
+
13
+ # Directory containing the temporary files.
14
+ TMP_DIRECTORY = File.expand_path('../../../tmp', __FILE__)
15
+
16
+ # Path to the pip requirements file used to install requirements before
17
+ # packaging the Gem.
18
+ PRE_BUILD_REQUIREMENTS = File.expand_path(
19
+ '../../../pre_build_requirements.txt',
20
+ __FILE__
21
+ )
22
+
23
+ # Path to the pip requirements file used to install requirements upon Gem
24
+ # installation.
25
+ PRE_INSTALL_REQUIREMENTS = File.expand_path(
26
+ '../../../pre_install_requirements.txt',
27
+ __FILE__
28
+ )
29
+
30
+ ##
31
+ # Verifies the requirements to install thi Gem.
32
+ #
33
+ def verify_requirements
34
+ require_executable('python')
35
+ require_version('python', python_version, '2.6.0')
36
+ require_executable('pip')
37
+ require_version('pip', pip_version, '1.3.1')
38
+ end
@@ -0,0 +1,122 @@
1
+ require 'open3'
2
+ require 'nokogiri'
3
+
4
+ require_relative 'base/version'
5
+
6
+ module Opener
7
+ module Coreferences
8
+ ##
9
+ # Coreference class for various languages such as English and Spanish.
10
+ #
11
+ # @!attribute [r] args
12
+ # @return [Array]
13
+ # @!attribute [r] options
14
+ # @return [Hash]
15
+ #
16
+ class Base
17
+ attr_reader :args, :options
18
+
19
+ ##
20
+ # Returns the default language to use.
21
+ #
22
+ # @return [String]
23
+ #
24
+ DEFAULT_LANGUAGE = 'en'.freeze
25
+
26
+ ##
27
+ # @param [Hash] options
28
+ #
29
+ # @option options [Array] :args The commandline arguments to pass to the
30
+ # underlying Java code.
31
+ #
32
+ def initialize(options = {})
33
+
34
+ @args = options.delete(:args) || []
35
+ @options = options
36
+ end
37
+
38
+ ##
39
+ # Returns a String containing the command used to execute the kernel.
40
+ #
41
+ # @return [String]
42
+ #
43
+ def command
44
+ return "#{adjust_python_path} python -E -OO -m #{kernel} #{args.join(' ')}"
45
+ end
46
+
47
+ ##
48
+ # Runs the command and returns the output of STDOUT, STDERR and the
49
+ # process information.
50
+ #
51
+ # @param [String] input The input to process.
52
+ # @return [Array]
53
+ #
54
+ def run(input)
55
+ @args << ["--language #{language(input)}"]
56
+ Dir.chdir(core_dir) do
57
+ capture(input)
58
+ end
59
+ end
60
+
61
+ ##
62
+ # Runs the command and takes care of error handling/aborting based on the
63
+ # output.
64
+ #
65
+ # @see #run
66
+ #
67
+ def run!(input)
68
+ stdout, stderr, process = run(input)
69
+
70
+ if process.success?
71
+ puts stdout
72
+
73
+ STDERR.puts(stderr) unless stderr.empty?
74
+ else
75
+ abort stderr
76
+ end
77
+ end
78
+
79
+ protected
80
+ ##
81
+ # @return [String]
82
+ #
83
+ def adjust_python_path
84
+ site_packages = File.join(core_dir, 'site-packages')
85
+ "env PYTHONPATH=#{site_packages}:$PYTHONPATH"
86
+ end
87
+
88
+ def capture(input)
89
+ Open3.popen3(*command.split(" ")) {|i, o, e, t|
90
+ out_reader = Thread.new { o.read }
91
+ err_reader = Thread.new { e.read }
92
+ i.write input
93
+ i.close
94
+ [out_reader.value, err_reader.value, t.value]
95
+ }
96
+ end
97
+
98
+ ##
99
+ # @return [String]
100
+ #
101
+ def core_dir
102
+ return File.expand_path('../../../../core', __FILE__)
103
+ end
104
+
105
+ ##
106
+ # @return [String]
107
+ #
108
+ def kernel
109
+ return 'corefgraph.process.file'
110
+ end
111
+
112
+ ##
113
+ # @return the language from the KAF
114
+ #
115
+ def language(input)
116
+ document = Nokogiri::XML(input)
117
+ language = document.at('KAF').attr('xml:lang')
118
+ return language
119
+ end
120
+ end # Base
121
+ end # Coreferences
122
+ end # Opener
@@ -0,0 +1,7 @@
1
+ module Opener
2
+ module Coreferences
3
+ class Base
4
+ VERSION = '2.0.0'
5
+ end # Base
6
+ end # Coreferences
7
+ end # Opener
@@ -0,0 +1,31 @@
1
+ require File.expand_path('../lib/opener/coreferences/base/version', __FILE__)
2
+
3
+ Gem::Specification.new do |gem|
4
+ gem.name = 'opener-coreference-base'
5
+ gem.version = Opener::Coreferences::Base::VERSION
6
+ gem.authors = ['development@olery.com']
7
+ gem.summary = 'Coreference resolution for various languages.'
8
+ gem.description = gem.summary
9
+ gem.has_rdoc = 'yard'
10
+ gem.extensions = ['ext/hack/Rakefile']
11
+
12
+ gem.required_ruby_version = '>= 1.9.2'
13
+
14
+ gem.files = Dir.glob([
15
+ 'core/site-packages/pre_build/**/*',
16
+ 'ext/**/*',
17
+ 'lib/**/*',
18
+ '*.gemspec',
19
+ '*_requirements.txt',
20
+ 'README.md'
21
+ ]).select { |file| File.file?(file) }
22
+
23
+ gem.executables = Dir.glob('bin/*').map { |file| File.basename(file) }
24
+
25
+ gem.add_dependency 'rake'
26
+ gem.add_dependency 'opener-build-tools', ['>= 0.2.7']
27
+ gem.add_dependency 'nokogiri'
28
+
29
+ gem.add_development_dependency 'cucumber'
30
+ gem.add_development_dependency 'rspec'
31
+ end
@@ -0,0 +1 @@
1
+ git+ssh://git@github.com/opener-project/VU-kaf-parser.git#egg=VUKafParserPy
@@ -0,0 +1,2 @@
1
+ networkx
2
+ pyyaml
metadata ADDED
@@ -0,0 +1,145 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: opener-coreference-base
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - development@olery.com
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: opener-build-tools
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.7
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.2.7
41
+ - !ruby/object:Gem::Dependency
42
+ name: nokogiri
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: cucumber
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Coreference resolution for various languages.
84
+ email:
85
+ executables:
86
+ - coreference-es
87
+ - coreference-nl
88
+ - coreference-fr
89
+ - coreference-de
90
+ - coreference-it
91
+ - coreference-en
92
+ extensions:
93
+ - ext/hack/Rakefile
94
+ extra_rdoc_files: []
95
+ files:
96
+ - README.md
97
+ - bin/coreference-de
98
+ - bin/coreference-en
99
+ - bin/coreference-es
100
+ - bin/coreference-fr
101
+ - bin/coreference-it
102
+ - bin/coreference-nl
103
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/PKG-INFO
104
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/SOURCES.txt
105
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/dependency_links.txt
106
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/installed-files.txt
107
+ - core/site-packages/pre_build/VUKafParserPy-1.0-py2.7.egg-info/top_level.txt
108
+ - core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
109
+ - core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.pyc
110
+ - core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
111
+ - core/site-packages/pre_build/VUKafParserPy/KafParserMod.pyc
112
+ - core/site-packages/pre_build/VUKafParserPy/__init__.py
113
+ - core/site-packages/pre_build/VUKafParserPy/__init__.pyc
114
+ - ext/hack/Rakefile
115
+ - ext/hack/support.rb
116
+ - lib/opener/coreferences/base.rb
117
+ - lib/opener/coreferences/base/version.rb
118
+ - opener-coreference-base.gemspec
119
+ - pre_build_requirements.txt
120
+ - pre_install_requirements.txt
121
+ homepage:
122
+ licenses: []
123
+ metadata: {}
124
+ post_install_message:
125
+ rdoc_options: []
126
+ require_paths:
127
+ - lib
128
+ required_ruby_version: !ruby/object:Gem::Requirement
129
+ requirements:
130
+ - - ">="
131
+ - !ruby/object:Gem::Version
132
+ version: 1.9.2
133
+ required_rubygems_version: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ requirements: []
139
+ rubyforge_project:
140
+ rubygems_version: 2.2.2
141
+ signing_key:
142
+ specification_version: 4
143
+ summary: Coreference resolution for various languages.
144
+ test_files: []
145
+ has_rdoc: yard