opener-coreference-base 2.0.1 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/core/vendor/dictconfig/__init__.py +1 -0
- data/core/vendor/dictconfig/dictconfig.py +549 -0
- data/core/vendor/pykaf/LICENSE.txt +13 -0
- data/core/vendor/pykaf/__init__.py +427 -0
- data/lib/opener/coreferences/base/version.rb +1 -1
- data/opener-coreference-base.gemspec +1 -1
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c35ea079c7c50da42f7bd17cddabf9cb79d1b93b
|
4
|
+
data.tar.gz: f836ce7b54e055bae722fda4890cd9a9a7cf8f39
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e835457582de3c321d41649f19c9c7682351931d03cd8e526ae5aa3b217833b626a386727702b371743ee267d93dbbf1fbea97167d45223e312067412c200169
|
7
|
+
data.tar.gz: 67d0658e73efc718196354b34d6ecee09cb52ef2d32f2bea821b008fae8f066a43a3ce5ea967b56421de8e543b2a4867f82a55b69fe0995d5bcc8bf794f53ff4
|
@@ -0,0 +1 @@
|
|
1
|
+
from dictconfig import *
|
@@ -0,0 +1,549 @@
|
|
1
|
+
# Copyright 2009-2010 by Vinay Sajip. All Rights Reserved.
|
2
|
+
#
|
3
|
+
# Permission to use, copy, modify, and distribute this software and its
|
4
|
+
# documentation for any purpose and without fee is hereby granted,
|
5
|
+
# provided that the above copyright notice appear in all copies and that
|
6
|
+
# both that copyright notice and this permission notice appear in
|
7
|
+
# supporting documentation, and that the name of Vinay Sajip
|
8
|
+
# not be used in advertising or publicity pertaining to distribution
|
9
|
+
# of the software without specific, written prior permission.
|
10
|
+
# VINAY SAJIP DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
|
11
|
+
# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
|
12
|
+
# VINAY SAJIP BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
13
|
+
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
|
14
|
+
# IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
15
|
+
# OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
16
|
+
|
17
|
+
import logging.handlers
|
18
|
+
import re
|
19
|
+
import sys
|
20
|
+
import types
|
21
|
+
|
22
|
+
IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I)
|
23
|
+
|
24
|
+
def valid_ident(s):
|
25
|
+
m = IDENTIFIER.match(s)
|
26
|
+
if not m:
|
27
|
+
raise ValueError('Not a valid Python identifier: %r' % s)
|
28
|
+
return True
|
29
|
+
|
30
|
+
#
|
31
|
+
# This function is defined in logging only in recent versions of Python
|
32
|
+
#
|
33
|
+
try:
|
34
|
+
from logging import _checkLevel
|
35
|
+
except ImportError:
|
36
|
+
def _checkLevel(level):
|
37
|
+
if isinstance(level, int):
|
38
|
+
rv = level
|
39
|
+
elif str(level) == level:
|
40
|
+
if level not in logging._levelNames:
|
41
|
+
raise ValueError('Unknown level: %r' % level)
|
42
|
+
rv = logging._levelNames[level]
|
43
|
+
else:
|
44
|
+
raise TypeError('Level not an integer or a '
|
45
|
+
'valid string: %r' % level)
|
46
|
+
return rv
|
47
|
+
|
48
|
+
# The ConvertingXXX classes are wrappers around standard Python containers,
|
49
|
+
# and they serve to convert any suitable values in the container. The
|
50
|
+
# conversion converts base dicts, lists and tuples to their wrapped
|
51
|
+
# equivalents, whereas strings which match a conversion format are converted
|
52
|
+
# appropriately.
|
53
|
+
#
|
54
|
+
# Each wrapper should have a configurator attribute holding the actual
|
55
|
+
# configurator to use for conversion.
|
56
|
+
|
57
|
+
class ConvertingDict(dict):
|
58
|
+
"""A converting dictionary wrapper."""
|
59
|
+
|
60
|
+
def __getitem__(self, key):
|
61
|
+
value = dict.__getitem__(self, key)
|
62
|
+
result = self.configurator.convert(value)
|
63
|
+
#If the converted value is different, save for next time
|
64
|
+
if value is not result:
|
65
|
+
self[key] = result
|
66
|
+
if type(result) in (ConvertingDict, ConvertingList,
|
67
|
+
ConvertingTuple):
|
68
|
+
result.parent = self
|
69
|
+
result.key = key
|
70
|
+
return result
|
71
|
+
|
72
|
+
def get(self, key, default=None):
|
73
|
+
value = dict.get(self, key, default)
|
74
|
+
result = self.configurator.convert(value)
|
75
|
+
#If the converted value is different, save for next time
|
76
|
+
if value is not result:
|
77
|
+
self[key] = result
|
78
|
+
if type(result) in (ConvertingDict, ConvertingList,
|
79
|
+
ConvertingTuple):
|
80
|
+
result.parent = self
|
81
|
+
result.key = key
|
82
|
+
return result
|
83
|
+
|
84
|
+
def pop(self, key, default=None):
|
85
|
+
value = dict.pop(self, key, default)
|
86
|
+
result = self.configurator.convert(value)
|
87
|
+
if value is not result:
|
88
|
+
if type(result) in (ConvertingDict, ConvertingList,
|
89
|
+
ConvertingTuple):
|
90
|
+
result.parent = self
|
91
|
+
result.key = key
|
92
|
+
return result
|
93
|
+
|
94
|
+
class ConvertingList(list):
|
95
|
+
"""A converting list wrapper."""
|
96
|
+
def __getitem__(self, key):
|
97
|
+
value = list.__getitem__(self, key)
|
98
|
+
result = self.configurator.convert(value)
|
99
|
+
#If the converted value is different, save for next time
|
100
|
+
if value is not result:
|
101
|
+
self[key] = result
|
102
|
+
if type(result) in (ConvertingDict, ConvertingList,
|
103
|
+
ConvertingTuple):
|
104
|
+
result.parent = self
|
105
|
+
result.key = key
|
106
|
+
return result
|
107
|
+
|
108
|
+
def pop(self, idx=-1):
|
109
|
+
value = list.pop(self, idx)
|
110
|
+
result = self.configurator.convert(value)
|
111
|
+
if value is not result:
|
112
|
+
if type(result) in (ConvertingDict, ConvertingList,
|
113
|
+
ConvertingTuple):
|
114
|
+
result.parent = self
|
115
|
+
return result
|
116
|
+
|
117
|
+
class ConvertingTuple(tuple):
|
118
|
+
"""A converting tuple wrapper."""
|
119
|
+
def __getitem__(self, key):
|
120
|
+
value = tuple.__getitem__(self, key)
|
121
|
+
result = self.configurator.convert(value)
|
122
|
+
if value is not result:
|
123
|
+
if type(result) in (ConvertingDict, ConvertingList,
|
124
|
+
ConvertingTuple):
|
125
|
+
result.parent = self
|
126
|
+
result.key = key
|
127
|
+
return result
|
128
|
+
|
129
|
+
class BaseConfigurator(object):
|
130
|
+
"""
|
131
|
+
The configurator base class which defines some useful defaults.
|
132
|
+
"""
|
133
|
+
|
134
|
+
CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$')
|
135
|
+
|
136
|
+
WORD_PATTERN = re.compile(r'^\s*(\w+)\s*')
|
137
|
+
DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*')
|
138
|
+
INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*')
|
139
|
+
DIGIT_PATTERN = re.compile(r'^\d+$')
|
140
|
+
|
141
|
+
value_converters = {
|
142
|
+
'ext' : 'ext_convert',
|
143
|
+
'cfg' : 'cfg_convert',
|
144
|
+
}
|
145
|
+
|
146
|
+
# We might want to use a different one, e.g. importlib
|
147
|
+
importer = __import__
|
148
|
+
|
149
|
+
def __init__(self, config):
|
150
|
+
self.config = ConvertingDict(config)
|
151
|
+
self.config.configurator = self
|
152
|
+
|
153
|
+
def resolve(self, s):
|
154
|
+
"""
|
155
|
+
Resolve strings to objects using standard import and attribute
|
156
|
+
syntax.
|
157
|
+
"""
|
158
|
+
name = s.split('.')
|
159
|
+
used = name.pop(0)
|
160
|
+
try:
|
161
|
+
found = self.importer(used)
|
162
|
+
for frag in name:
|
163
|
+
used += '.' + frag
|
164
|
+
try:
|
165
|
+
found = getattr(found, frag)
|
166
|
+
except AttributeError:
|
167
|
+
self.importer(used)
|
168
|
+
found = getattr(found, frag)
|
169
|
+
return found
|
170
|
+
except ImportError:
|
171
|
+
e, tb = sys.exc_info()[1:]
|
172
|
+
v = ValueError('Cannot resolve %r: %s' % (s, e))
|
173
|
+
v.__cause__, v.__traceback__ = e, tb
|
174
|
+
raise v
|
175
|
+
|
176
|
+
def ext_convert(self, value):
|
177
|
+
"""Default converter for the ext:// protocol."""
|
178
|
+
return self.resolve(value)
|
179
|
+
|
180
|
+
def cfg_convert(self, value):
|
181
|
+
"""Default converter for the cfg:// protocol."""
|
182
|
+
rest = value
|
183
|
+
m = self.WORD_PATTERN.match(rest)
|
184
|
+
if m is None:
|
185
|
+
raise ValueError("Unable to convert %r" % value)
|
186
|
+
else:
|
187
|
+
rest = rest[m.end():]
|
188
|
+
d = self.config[m.groups()[0]]
|
189
|
+
#print d, rest
|
190
|
+
while rest:
|
191
|
+
m = self.DOT_PATTERN.match(rest)
|
192
|
+
if m:
|
193
|
+
d = d[m.groups()[0]]
|
194
|
+
else:
|
195
|
+
m = self.INDEX_PATTERN.match(rest)
|
196
|
+
if m:
|
197
|
+
idx = m.groups()[0]
|
198
|
+
if not self.DIGIT_PATTERN.match(idx):
|
199
|
+
d = d[idx]
|
200
|
+
else:
|
201
|
+
try:
|
202
|
+
n = int(idx) # try as number first (most likely)
|
203
|
+
d = d[n]
|
204
|
+
except TypeError:
|
205
|
+
d = d[idx]
|
206
|
+
if m:
|
207
|
+
rest = rest[m.end():]
|
208
|
+
else:
|
209
|
+
raise ValueError('Unable to convert '
|
210
|
+
'%r at %r' % (value, rest))
|
211
|
+
#rest should be empty
|
212
|
+
return d
|
213
|
+
|
214
|
+
def convert(self, value):
|
215
|
+
"""
|
216
|
+
Convert values to an appropriate type. dicts, lists and tuples are
|
217
|
+
replaced by their converting alternatives. Strings are checked to
|
218
|
+
see if they have a conversion format and are converted if they do.
|
219
|
+
"""
|
220
|
+
if not isinstance(value, ConvertingDict) and isinstance(value, dict):
|
221
|
+
value = ConvertingDict(value)
|
222
|
+
value.configurator = self
|
223
|
+
elif not isinstance(value, ConvertingList) and isinstance(value, list):
|
224
|
+
value = ConvertingList(value)
|
225
|
+
value.configurator = self
|
226
|
+
elif not isinstance(value, ConvertingTuple) and\
|
227
|
+
isinstance(value, tuple):
|
228
|
+
value = ConvertingTuple(value)
|
229
|
+
value.configurator = self
|
230
|
+
elif isinstance(value, basestring): # str for py3k
|
231
|
+
m = self.CONVERT_PATTERN.match(value)
|
232
|
+
if m:
|
233
|
+
d = m.groupdict()
|
234
|
+
prefix = d['prefix']
|
235
|
+
converter = self.value_converters.get(prefix, None)
|
236
|
+
if converter:
|
237
|
+
suffix = d['suffix']
|
238
|
+
converter = getattr(self, converter)
|
239
|
+
value = converter(suffix)
|
240
|
+
return value
|
241
|
+
|
242
|
+
def configure_custom(self, config):
|
243
|
+
"""Configure an object with a user-supplied factory."""
|
244
|
+
c = config.pop('()')
|
245
|
+
if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType:
|
246
|
+
c = self.resolve(c)
|
247
|
+
props = config.pop('.', None)
|
248
|
+
# Check for valid identifiers
|
249
|
+
kwargs = dict([(k, config[k]) for k in config if valid_ident(k)])
|
250
|
+
result = c(**kwargs)
|
251
|
+
if props:
|
252
|
+
for name, value in props.items():
|
253
|
+
setattr(result, name, value)
|
254
|
+
return result
|
255
|
+
|
256
|
+
def as_tuple(self, value):
|
257
|
+
"""Utility function which converts lists to tuples."""
|
258
|
+
if isinstance(value, list):
|
259
|
+
value = tuple(value)
|
260
|
+
return value
|
261
|
+
|
262
|
+
class DictConfigurator(BaseConfigurator):
|
263
|
+
"""
|
264
|
+
Configure logging using a dictionary-like object to describe the
|
265
|
+
configuration.
|
266
|
+
"""
|
267
|
+
|
268
|
+
def configure(self):
|
269
|
+
"""Do the configuration."""
|
270
|
+
|
271
|
+
config = self.config
|
272
|
+
if 'version' not in config:
|
273
|
+
raise ValueError("dictionary doesn't specify a version")
|
274
|
+
if config['version'] != 1:
|
275
|
+
raise ValueError("Unsupported version: %s" % config['version'])
|
276
|
+
incremental = config.pop('incremental', False)
|
277
|
+
EMPTY_DICT = {}
|
278
|
+
logging._acquireLock()
|
279
|
+
try:
|
280
|
+
if incremental:
|
281
|
+
handlers = config.get('handlers', EMPTY_DICT)
|
282
|
+
# incremental handler config only if handler name
|
283
|
+
# ties in to logging._handlers (Python 2.7)
|
284
|
+
if sys.version_info[:2] == (2, 7):
|
285
|
+
for name in handlers:
|
286
|
+
if name not in logging._handlers:
|
287
|
+
raise ValueError('No handler found with '
|
288
|
+
'name %r' % name)
|
289
|
+
else:
|
290
|
+
try:
|
291
|
+
handler = logging._handlers[name]
|
292
|
+
handler_config = handlers[name]
|
293
|
+
level = handler_config.get('level', None)
|
294
|
+
if level:
|
295
|
+
handler.setLevel(_checkLevel(level))
|
296
|
+
except StandardError, e:
|
297
|
+
raise ValueError('Unable to configure handler '
|
298
|
+
'%r: %s' % (name, e))
|
299
|
+
loggers = config.get('loggers', EMPTY_DICT)
|
300
|
+
for name in loggers:
|
301
|
+
try:
|
302
|
+
self.configure_logger(name, loggers[name], True)
|
303
|
+
except StandardError, e:
|
304
|
+
raise ValueError('Unable to configure logger '
|
305
|
+
'%r: %s' % (name, e))
|
306
|
+
root = config.get('root', None)
|
307
|
+
if root:
|
308
|
+
try:
|
309
|
+
self.configure_root(root, True)
|
310
|
+
except StandardError, e:
|
311
|
+
raise ValueError('Unable to configure root '
|
312
|
+
'logger: %s' % e)
|
313
|
+
else:
|
314
|
+
disable_existing = config.pop('disable_existing_loggers', True)
|
315
|
+
|
316
|
+
logging._handlers.clear()
|
317
|
+
del logging._handlerList[:]
|
318
|
+
|
319
|
+
# Do formatters first - they don't refer to anything else
|
320
|
+
formatters = config.get('formatters', EMPTY_DICT)
|
321
|
+
for name in formatters:
|
322
|
+
try:
|
323
|
+
formatters[name] = self.configure_formatter(
|
324
|
+
formatters[name])
|
325
|
+
except StandardError, e:
|
326
|
+
raise ValueError('Unable to configure '
|
327
|
+
'formatter %r: %s' % (name, e))
|
328
|
+
# Next, do filters - they don't refer to anything else, either
|
329
|
+
filters = config.get('filters', EMPTY_DICT)
|
330
|
+
for name in filters:
|
331
|
+
try:
|
332
|
+
filters[name] = self.configure_filter(filters[name])
|
333
|
+
except StandardError, e:
|
334
|
+
raise ValueError('Unable to configure '
|
335
|
+
'filter %r: %s' % (name, e))
|
336
|
+
|
337
|
+
# Next, do handlers - they refer to formatters and filters
|
338
|
+
# As handlers can refer to other handlers, sort the keys
|
339
|
+
# to allow a deterministic order of configuration
|
340
|
+
handlers = config.get('handlers', EMPTY_DICT)
|
341
|
+
for name in sorted(handlers):
|
342
|
+
try:
|
343
|
+
handler = self.configure_handler(handlers[name])
|
344
|
+
handler.name = name
|
345
|
+
handlers[name] = handler
|
346
|
+
except StandardError, e:
|
347
|
+
raise ValueError('Unable to configure handler '
|
348
|
+
'%r: %s' % (name, e))
|
349
|
+
# Next, do loggers - they refer to handlers and filters
|
350
|
+
|
351
|
+
#we don't want to lose the existing loggers,
|
352
|
+
#since other threads may have pointers to them.
|
353
|
+
#existing is set to contain all existing loggers,
|
354
|
+
#and as we go through the new configuration we
|
355
|
+
#remove any which are configured. At the end,
|
356
|
+
#what's left in existing is the set of loggers
|
357
|
+
#which were in the previous configuration but
|
358
|
+
#which are not in the new configuration.
|
359
|
+
root = logging.root
|
360
|
+
existing = root.manager.loggerDict.keys()
|
361
|
+
#The list needs to be sorted so that we can
|
362
|
+
#avoid disabling child loggers of explicitly
|
363
|
+
#named loggers. With a sorted list it is easier
|
364
|
+
#to find the child loggers.
|
365
|
+
existing.sort()
|
366
|
+
#We'll keep the list of existing loggers
|
367
|
+
#which are children of named loggers here...
|
368
|
+
child_loggers = []
|
369
|
+
#now set up the new ones...
|
370
|
+
loggers = config.get('loggers', EMPTY_DICT)
|
371
|
+
for name in loggers:
|
372
|
+
if name in existing:
|
373
|
+
i = existing.index(name)
|
374
|
+
prefixed = name + "."
|
375
|
+
pflen = len(prefixed)
|
376
|
+
num_existing = len(existing)
|
377
|
+
i = i + 1 # look at the entry after name
|
378
|
+
while (i < num_existing) and\
|
379
|
+
(existing[i][:pflen] == prefixed):
|
380
|
+
child_loggers.append(existing[i])
|
381
|
+
i = i + 1
|
382
|
+
existing.remove(name)
|
383
|
+
try:
|
384
|
+
self.configure_logger(name, loggers[name])
|
385
|
+
except StandardError, e:
|
386
|
+
raise ValueError('Unable to configure logger '
|
387
|
+
'%r: %s' % (name, e))
|
388
|
+
|
389
|
+
#Disable any old loggers. There's no point deleting
|
390
|
+
#them as other threads may continue to hold references
|
391
|
+
#and by disabling them, you stop them doing any logging.
|
392
|
+
#However, don't disable children of named loggers, as that's
|
393
|
+
#probably not what was intended by the user.
|
394
|
+
for log in existing:
|
395
|
+
logger = root.manager.loggerDict[log]
|
396
|
+
if log in child_loggers:
|
397
|
+
logger.level = logging.NOTSET
|
398
|
+
logger.handlers = []
|
399
|
+
logger.propagate = True
|
400
|
+
elif disable_existing:
|
401
|
+
logger.disabled = True
|
402
|
+
|
403
|
+
# And finally, do the root logger
|
404
|
+
root = config.get('root', None)
|
405
|
+
if root:
|
406
|
+
try:
|
407
|
+
self.configure_root(root)
|
408
|
+
except StandardError, e:
|
409
|
+
raise ValueError('Unable to configure root '
|
410
|
+
'logger: %s' % e)
|
411
|
+
finally:
|
412
|
+
logging._releaseLock()
|
413
|
+
|
414
|
+
def configure_formatter(self, config):
|
415
|
+
"""Configure a formatter from a dictionary."""
|
416
|
+
if '()' in config:
|
417
|
+
factory = config['()'] # for use in exception handler
|
418
|
+
try:
|
419
|
+
result = self.configure_custom(config)
|
420
|
+
except TypeError, te:
|
421
|
+
if "'format'" not in str(te):
|
422
|
+
raise
|
423
|
+
#Name of parameter changed from fmt to format.
|
424
|
+
#Retry with old name.
|
425
|
+
#This is so that code can be used with older Python versions
|
426
|
+
#(e.g. by Django)
|
427
|
+
config['fmt'] = config.pop('format')
|
428
|
+
config['()'] = factory
|
429
|
+
result = self.configure_custom(config)
|
430
|
+
else:
|
431
|
+
fmt = config.get('format', None)
|
432
|
+
dfmt = config.get('datefmt', None)
|
433
|
+
result = logging.Formatter(fmt, dfmt)
|
434
|
+
return result
|
435
|
+
|
436
|
+
def configure_filter(self, config):
|
437
|
+
"""Configure a filter from a dictionary."""
|
438
|
+
if '()' in config:
|
439
|
+
result = self.configure_custom(config)
|
440
|
+
else:
|
441
|
+
name = config.get('name', '')
|
442
|
+
result = logging.Filter(name)
|
443
|
+
return result
|
444
|
+
|
445
|
+
def add_filters(self, filterer, filters):
|
446
|
+
"""Add filters to a filterer from a list of names."""
|
447
|
+
for f in filters:
|
448
|
+
try:
|
449
|
+
filterer.addFilter(self.config['filters'][f])
|
450
|
+
except StandardError, e:
|
451
|
+
raise ValueError('Unable to add filter %r: %s' % (f, e))
|
452
|
+
|
453
|
+
def configure_handler(self, config):
|
454
|
+
"""Configure a handler from a dictionary."""
|
455
|
+
formatter = config.pop('formatter', None)
|
456
|
+
if formatter:
|
457
|
+
try:
|
458
|
+
formatter = self.config['formatters'][formatter]
|
459
|
+
except StandardError, e:
|
460
|
+
raise ValueError('Unable to set formatter '
|
461
|
+
'%r: %s' % (formatter, e))
|
462
|
+
level = config.pop('level', None)
|
463
|
+
filters = config.pop('filters', None)
|
464
|
+
if '()' in config:
|
465
|
+
c = config.pop('()')
|
466
|
+
if not hasattr(c, '__call__') and hasattr(types, 'ClassType') and type(c) != types.ClassType:
|
467
|
+
c = self.resolve(c)
|
468
|
+
factory = c
|
469
|
+
else:
|
470
|
+
klass = self.resolve(config.pop('class'))
|
471
|
+
#Special case for handler which refers to another handler
|
472
|
+
if issubclass(klass, logging.handlers.MemoryHandler) and\
|
473
|
+
'target' in config:
|
474
|
+
try:
|
475
|
+
config['target'] = self.config['handlers'][config['target']]
|
476
|
+
except StandardError, e:
|
477
|
+
raise ValueError('Unable to set target handler '
|
478
|
+
'%r: %s' % (config['target'], e))
|
479
|
+
elif issubclass(klass, logging.handlers.SMTPHandler) and\
|
480
|
+
'mailhost' in config:
|
481
|
+
config['mailhost'] = self.as_tuple(config['mailhost'])
|
482
|
+
elif issubclass(klass, logging.handlers.SysLogHandler) and\
|
483
|
+
'address' in config:
|
484
|
+
config['address'] = self.as_tuple(config['address'])
|
485
|
+
factory = klass
|
486
|
+
kwargs = dict([(k, config[k]) for k in config if valid_ident(k)])
|
487
|
+
try:
|
488
|
+
result = factory(**kwargs)
|
489
|
+
except TypeError, te:
|
490
|
+
if "'stream'" not in str(te):
|
491
|
+
raise
|
492
|
+
#The argument name changed from strm to stream
|
493
|
+
#Retry with old name.
|
494
|
+
#This is so that code can be used with older Python versions
|
495
|
+
#(e.g. by Django)
|
496
|
+
kwargs['strm'] = kwargs.pop('stream')
|
497
|
+
result = factory(**kwargs)
|
498
|
+
if formatter:
|
499
|
+
result.setFormatter(formatter)
|
500
|
+
if level is not None:
|
501
|
+
result.setLevel(_checkLevel(level))
|
502
|
+
if filters:
|
503
|
+
self.add_filters(result, filters)
|
504
|
+
return result
|
505
|
+
|
506
|
+
def add_handlers(self, logger, handlers):
|
507
|
+
"""Add handlers to a logger from a list of names."""
|
508
|
+
for h in handlers:
|
509
|
+
try:
|
510
|
+
logger.addHandler(self.config['handlers'][h])
|
511
|
+
except StandardError, e:
|
512
|
+
raise ValueError('Unable to add handler %r: %s' % (h, e))
|
513
|
+
|
514
|
+
def common_logger_config(self, logger, config, incremental=False):
|
515
|
+
"""
|
516
|
+
Perform configuration which is common to root and non-root loggers.
|
517
|
+
"""
|
518
|
+
level = config.get('level', None)
|
519
|
+
if level is not None:
|
520
|
+
logger.setLevel(_checkLevel(level))
|
521
|
+
if not incremental:
|
522
|
+
#Remove any existing handlers
|
523
|
+
for h in logger.handlers[:]:
|
524
|
+
logger.removeHandler(h)
|
525
|
+
handlers = config.get('handlers', None)
|
526
|
+
if handlers:
|
527
|
+
self.add_handlers(logger, handlers)
|
528
|
+
filters = config.get('filters', None)
|
529
|
+
if filters:
|
530
|
+
self.add_filters(logger, filters)
|
531
|
+
|
532
|
+
def configure_logger(self, name, config, incremental=False):
|
533
|
+
"""Configure a non-root logger from a dictionary."""
|
534
|
+
logger = logging.getLogger(name)
|
535
|
+
self.common_logger_config(logger, config, incremental)
|
536
|
+
propagate = config.get('propagate', None)
|
537
|
+
if propagate is not None:
|
538
|
+
logger.propagate = propagate
|
539
|
+
|
540
|
+
def configure_root(self, config, incremental=False):
|
541
|
+
"""Configure a root logger from a dictionary."""
|
542
|
+
root = logging.getLogger()
|
543
|
+
self.common_logger_config(root, config, incremental)
|
544
|
+
|
545
|
+
dictConfigClass = DictConfigurator
|
546
|
+
|
547
|
+
def dictConfig(config):
|
548
|
+
"""Configure logging using a dictionary."""
|
549
|
+
dictConfigClass(config).configure()
|
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2013 Josu Bermudez Galbarriatu <josu.bermudez@deusto.es>.
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
@@ -0,0 +1,427 @@
|
|
1
|
+
# coding=utf-8
|
2
|
+
""" Module for manage KAF formatted files. """
|
3
|
+
|
4
|
+
from __future__ import unicode_literals
|
5
|
+
|
6
|
+
__author__ = 'Josu Bermudez <josu.bermudez@deusto.es>'
|
7
|
+
|
8
|
+
from lxml import etree
|
9
|
+
|
10
|
+
# CONSTANT TEXT VALUES USED TO CONSTRUCT KAF
|
11
|
+
KAF_TAG = "KAF"
|
12
|
+
LANGUAGE_ATTRIBUTE = "{http://www.w3.org/XML/1998/namespace}lang"
|
13
|
+
VERSION_ATTRIBUTE = "version"
|
14
|
+
NS = {}
|
15
|
+
|
16
|
+
KAF_HEADER_TAG = "kafHeader"
|
17
|
+
NAME_ATTRIBUTE = "name"
|
18
|
+
LINGUISTIC_PROCESSOR_HEAD = "linguisticProcessors"
|
19
|
+
LAYER_ATTRIBUTE = "layer"
|
20
|
+
LINGUISTIC_PROCESSOR_OCCURRENCE_TAG = "lp"
|
21
|
+
TIMESTAMP_ATTRIBUTE = "timestamp"
|
22
|
+
|
23
|
+
SPAN_TAG = "span"
|
24
|
+
TARGET_ID_ATTRIBUTE = "id"
|
25
|
+
TARGET_TAG = "target"
|
26
|
+
|
27
|
+
TEXT_LAYER_TAG = "text"
|
28
|
+
WORD_OCCURRENCE_TAG = "wf"
|
29
|
+
WORD_ID_ATTRIBUTE = "wid"
|
30
|
+
|
31
|
+
TERMS_LAYER_TAG = "terms"
|
32
|
+
TERM_OCCURRENCE_TAG = "term"
|
33
|
+
TERM_ID_ATTRIBUTE = "tid"
|
34
|
+
NER_ATTRIBUTE = "ner"
|
35
|
+
TYPE_ATTRIBUTE = "type"
|
36
|
+
LEMMA_ATTRIBUTE = "lemma"
|
37
|
+
POS_ATTRIBUTE = "pos"
|
38
|
+
MORPHOFEAT_ATTRIBUTE = "morphofeat"
|
39
|
+
|
40
|
+
NAMED_ENTITIES_LAYER_TAG = "entities"
|
41
|
+
NAMED_ENTITY_OCCURRENCE_TAG = "entity"
|
42
|
+
NAMED_ENTITY_ID_ATTRIBUTE = "eid"
|
43
|
+
NAMED_ENTITY_TYPE_ATTRIBUTE = "type"
|
44
|
+
NAMED_ENTITY_REFERENCES_GROUP_TAG = "references"
|
45
|
+
|
46
|
+
CONSTITUENCY_LAYER = "constituency"
|
47
|
+
CONSTITUENCY_TREE_TAG = "tree"
|
48
|
+
CONSTITUENCY_NON_TERMINALS = "nt"
|
49
|
+
CONSTITUENCY_TERMINALS = "t"
|
50
|
+
CONSTITUENCY_EDGES = "edge"
|
51
|
+
|
52
|
+
CHUNKS_LAYER_TAG = "chunks"
|
53
|
+
CHUNK_OCCURRENCE_TAG = "chunk"
|
54
|
+
CHUNK_CASE_ATTRIBUTE = "case"
|
55
|
+
CHUNK_PHRASE_ATTRIBUTE = "phrase"
|
56
|
+
CHUNK_HEAD_ATTRIBUTE = "head"
|
57
|
+
CHUNK_ID_ATTRIBUTE = "cid"
|
58
|
+
|
59
|
+
DEPENDENCY_LAYER_TAG = "deps"
|
60
|
+
DEPENDENCY_OCCURRENCE_TAG = "dep"
|
61
|
+
DEPENDENCY_FROM_ATTRIBUTE = "from"
|
62
|
+
DEPENDENCY_FUNCTION_ATTRIBUTE = "rfunc"
|
63
|
+
DEPENDENCY_TO_ATTRIBUTE = "to"
|
64
|
+
|
65
|
+
EXTERNAL_REFERENCE_OCCURRENCE_TAG = "externalRef"
|
66
|
+
EXTERNAL_REFERENCES_TAG = "externalReferences"
|
67
|
+
|
68
|
+
COREFERENCE_LAYER_TAG = "coreferences"
|
69
|
+
COREFERENCE_ID_ATTRIBUTE = "coid"
|
70
|
+
COREFERENCE_OCCURRENCE_TAG = "coref"
|
71
|
+
|
72
|
+
|
73
|
+
class KafDocument:
|
74
|
+
""" Manage a KAF document.
|
75
|
+
"""
|
76
|
+
valid_word_attributes = ("sent", "para", "offset", "length", "page")
|
77
|
+
valid_external_attributes = ("resource", "reference", "reftype", "status", "source", "confidence")
|
78
|
+
valid_externalRef_attributes = ("resource", "reference")
|
79
|
+
|
80
|
+
def __init__(self, file_name=None, input_stream=None, language=None, version="2.0", header=None):
|
81
|
+
""" Prepare the document basic structure.
|
82
|
+
"""
|
83
|
+
#parser = etree.XMLParser(remove_blank_text=True)
|
84
|
+
|
85
|
+
if file_name:
|
86
|
+
self.tree = etree.parse(file_name)#, parser=parser)
|
87
|
+
self.root = self.tree.getroot()
|
88
|
+
elif input_stream:
|
89
|
+
self.root = etree.fromstring(input_stream)#, parser=parser)
|
90
|
+
self.tree = etree.ElementTree(self.root)
|
91
|
+
else:
|
92
|
+
self.root = etree.Element(KAF_TAG, NS)
|
93
|
+
self.tree = etree.ElementTree(self.root)
|
94
|
+
if language:
|
95
|
+
self.root.attrib[LANGUAGE_ATTRIBUTE] = language
|
96
|
+
|
97
|
+
if version:
|
98
|
+
self.root.set(VERSION_ATTRIBUTE, version)
|
99
|
+
|
100
|
+
headers = self.tree.find(KAF_HEADER_TAG)
|
101
|
+
if headers is not None and len(headers):
|
102
|
+
self.kaf_header = headers
|
103
|
+
else:
|
104
|
+
self.kaf_header = None
|
105
|
+
|
106
|
+
if header:
|
107
|
+
self.set_header(header)
|
108
|
+
|
109
|
+
text_layer = self.tree.find(TEXT_LAYER_TAG)
|
110
|
+
if text_layer is not None and len(text_layer):
|
111
|
+
self.text = text_layer
|
112
|
+
else:
|
113
|
+
self.text = etree.SubElement(self.root, TEXT_LAYER_TAG)
|
114
|
+
|
115
|
+
terms_layer = self.tree.find(TERMS_LAYER_TAG)
|
116
|
+
if text_layer is not None and len(terms_layer):
|
117
|
+
self.terms = terms_layer
|
118
|
+
else:
|
119
|
+
self.terms = None
|
120
|
+
|
121
|
+
dependencies_layer = self.tree.find(DEPENDENCY_LAYER_TAG)
|
122
|
+
if dependencies_layer is not None and len(dependencies_layer):
|
123
|
+
self.dependencies = dependencies_layer
|
124
|
+
else:
|
125
|
+
self.dependencies = None
|
126
|
+
|
127
|
+
chunks_layer = self.tree.find(CHUNKS_LAYER_TAG)
|
128
|
+
if chunks_layer is not None and len(chunks_layer):
|
129
|
+
self.chunks = chunks_layer
|
130
|
+
else:
|
131
|
+
self.chunks = None
|
132
|
+
|
133
|
+
constituency_layer = self.tree.find(CONSTITUENCY_LAYER)
|
134
|
+
if constituency_layer is not None and len(constituency_layer):
|
135
|
+
self.constituency = constituency_layer
|
136
|
+
else:
|
137
|
+
self.constituency = None
|
138
|
+
|
139
|
+
named_entities_layer = self.tree.find(NAMED_ENTITIES_LAYER_TAG)
|
140
|
+
if named_entities_layer is not None and len(named_entities_layer):
|
141
|
+
self.entities = named_entities_layer
|
142
|
+
else:
|
143
|
+
self.entities = None
|
144
|
+
|
145
|
+
coreference_layer = self.tree.find(COREFERENCE_LAYER_TAG)
|
146
|
+
if coreference_layer is not None and len(coreference_layer):
|
147
|
+
self.coreferences = coreference_layer
|
148
|
+
else:
|
149
|
+
self.coreferences = None
|
150
|
+
|
151
|
+
def clear_header(self):
|
152
|
+
self.root.remove(self.kaf_header)
|
153
|
+
self.kaf_header = None
|
154
|
+
|
155
|
+
def set_header(self, kaf_header):
|
156
|
+
if self.kaf_header:
|
157
|
+
for element in kaf_header:
|
158
|
+
self.kaf_header.append(element)
|
159
|
+
self.kaf_header.attrib.update(kaf_header.attrib)
|
160
|
+
else:
|
161
|
+
self.kaf_header = kaf_header
|
162
|
+
self.root.append(self.kaf_header)
|
163
|
+
|
164
|
+
def add_linguistic_processors(self, layer, name, version, time_stamp):
|
165
|
+
if not self.kaf_header:
|
166
|
+
self.kaf_header = etree.SubElement(self.root, KAF_HEADER_TAG)
|
167
|
+
|
168
|
+
layer_find = self.kaf_header.find("./{0}..[@{1}='{2}']".format(LINGUISTIC_PROCESSOR_HEAD, LAYER_ATTRIBUTE, layer))
|
169
|
+
if layer_find:
|
170
|
+
layer = layer_find[0]
|
171
|
+
else:
|
172
|
+
layer = etree.SubElement(self.kaf_header, LINGUISTIC_PROCESSOR_HEAD, {LAYER_ATTRIBUTE: layer})
|
173
|
+
|
174
|
+
etree.SubElement(layer, LINGUISTIC_PROCESSOR_OCCURRENCE_TAG,
|
175
|
+
{NAME_ATTRIBUTE: name, VERSION_ATTRIBUTE: version, TIMESTAMP_ATTRIBUTE: time_stamp})
|
176
|
+
|
177
|
+
def add_word(self, word, wid, **kwargs):
|
178
|
+
"""Add a word to the KAF file.
|
179
|
+
A word have the next parameters/attributes;
|
180
|
+
+ wid: the unique id for the word form.
|
181
|
+
+ sent: sentence id of the token (optional)
|
182
|
+
+ para: paragraph id (optional)
|
183
|
+
+ offset: the offset of the word form (optional)
|
184
|
+
+ length: the length of the original word form (optional)
|
185
|
+
+ page: page id (optional)
|
186
|
+
"""
|
187
|
+
# Prepare the word attributes
|
188
|
+
word_attributes = dict((k, v) for (k, v) in kwargs.iteritems() if k in self.valid_word_attributes)
|
189
|
+
word_attributes[WORD_ID_ATTRIBUTE] = wid
|
190
|
+
# Create a text subnode for the word and set its attributes
|
191
|
+
element = etree.SubElement(self.text, WORD_OCCURRENCE_TAG, word_attributes)
|
192
|
+
try:
|
193
|
+
element.text = word
|
194
|
+
except:
|
195
|
+
element.text = "XXXXXX"
|
196
|
+
return element
|
197
|
+
|
198
|
+
def get_words(self):
|
199
|
+
""" Return all the words in the document"""
|
200
|
+
return self.text[:]
|
201
|
+
|
202
|
+
def get_words_by_id(self, wid):
|
203
|
+
""" Return all the words in the document"""
|
204
|
+
results = self.text.find("{0}[@{1}='{2}']".format(WORD_OCCURRENCE_TAG, WORD_ID_ATTRIBUTE, wid))
|
205
|
+
return results and results[0]
|
206
|
+
|
207
|
+
def add_term(self, tid, pos=None, lemma=None, morphofeat=None, term_type=None, words=(), ner=None,
|
208
|
+
external_refs=()):
|
209
|
+
"""Add a term to the kaf file.
|
210
|
+
A Term have the next parameters/attributes:
|
211
|
+
tid: unique identifier
|
212
|
+
type: type of the term. Currently, 3 values are possible:
|
213
|
+
+ open: open category term
|
214
|
+
+ close: close category term
|
215
|
+
lemma: lemma of the term
|
216
|
+
pos: part of speech
|
217
|
+
morphofeat: PennTreebank part of speech tag
|
218
|
+
word: a list of id of the bounded words.
|
219
|
+
external_ref: A list of dictionaries that contains the external references.
|
220
|
+
Each reference have:
|
221
|
+
+ resource
|
222
|
+
+ reference
|
223
|
+
+ INCOMPLETE
|
224
|
+
"""
|
225
|
+
if self.terms is None:
|
226
|
+
self.terms = etree.SubElement(self.root, TERMS_LAYER_TAG)
|
227
|
+
|
228
|
+
#TODO Complete external references
|
229
|
+
|
230
|
+
word_attributes = {TERM_ID_ATTRIBUTE: tid}
|
231
|
+
if pos:
|
232
|
+
word_attributes[POS_ATTRIBUTE] = pos
|
233
|
+
if lemma:
|
234
|
+
word_attributes[LEMMA_ATTRIBUTE] = lemma
|
235
|
+
if term_type:
|
236
|
+
word_attributes[TYPE_ATTRIBUTE] = term_type
|
237
|
+
if morphofeat:
|
238
|
+
word_attributes[MORPHOFEAT_ATTRIBUTE] = morphofeat
|
239
|
+
if ner:
|
240
|
+
word_attributes[NER_ATTRIBUTE] = ner
|
241
|
+
term = etree.SubElement(self.terms, TERM_OCCURRENCE_TAG, word_attributes)
|
242
|
+
if words:
|
243
|
+
span = etree.SubElement(term, SPAN_TAG)
|
244
|
+
for word in words:
|
245
|
+
etree.SubElement(span, TARGET_TAG, {TARGET_ID_ATTRIBUTE: word})
|
246
|
+
if external_refs:
|
247
|
+
span = etree.SubElement(term, EXTERNAL_REFERENCES_TAG)
|
248
|
+
for external_ref in external_refs:
|
249
|
+
ref_attributes = dict((k, v) for (k, v) in external_ref.iteritems()
|
250
|
+
if k in self.valid_externalRef_attributes)
|
251
|
+
keys = ref_attributes.keys()
|
252
|
+
for attribute in self.valid_externalRef_attributes:
|
253
|
+
if not attribute in keys:
|
254
|
+
raise Exception("External resource not have {0}".format(attribute))
|
255
|
+
etree.SubElement(span, EXTERNAL_REFERENCE_OCCURRENCE_TAG, ref_attributes)
|
256
|
+
return term
|
257
|
+
|
258
|
+
def get_terms(self):
|
259
|
+
""" Return all the words in the document"""
|
260
|
+
return self.root.findall("{0}/{1}".format(TERMS_LAYER_TAG, TERM_OCCURRENCE_TAG))
|
261
|
+
|
262
|
+
def get_terms_words(self, term):
|
263
|
+
return term.findall("{0}/{1}".format(SPAN_TAG, TARGET_TAG))
|
264
|
+
|
265
|
+
def get_terms_references(self, term):
|
266
|
+
return term.findall("{0}/{1}".format(EXTERNAL_REFERENCES_TAG, EXTERNAL_REFERENCE_OCCURRENCE_TAG))
|
267
|
+
|
268
|
+
def add_dependency(self, origen, to, rfunc):
|
269
|
+
"""Add a new dependency relation in the text.
|
270
|
+
The dependency have the next parameters/attributes:
|
271
|
+
+ from: term id of the source element
|
272
|
+
+ to: term id of the target element
|
273
|
+
+ rfunc: relational function. One of:
|
274
|
+
- mod: indicates the word introducing the dependent in a head- modifier relation.
|
275
|
+
- subj: indicates the subject in the grammatical relation Subject-Predicate.
|
276
|
+
- csubj, xsubj, ncsubj: The Grammatical Relations (RL) csubj and xsubj may be used for clausal
|
277
|
+
subjects, controlled from within, or without, respectively. ncsubj is a non-clausal subject.
|
278
|
+
- dobj: Indicates the object in the grammatical relation between a predicate and its direct object.
|
279
|
+
- iobj: The relation between a predicate and a non-clausal complement introduced by a preposition;
|
280
|
+
type indicates the preposition introducing the dependent.
|
281
|
+
- obj2: The relation between a predicate and the second non-clausal complement in ditransitive
|
282
|
+
constructions.
|
283
|
+
"""
|
284
|
+
if not self.dependencies:
|
285
|
+
self.dependencies = etree.SubElement(self.root, DEPENDENCY_LAYER_TAG)
|
286
|
+
|
287
|
+
dependency_attributes = {DEPENDENCY_FROM_ATTRIBUTE: origen,
|
288
|
+
DEPENDENCY_TO_ATTRIBUTE: to,
|
289
|
+
DEPENDENCY_FUNCTION_ATTRIBUTE: rfunc}
|
290
|
+
return etree.SubElement(self.dependencies, DEPENDENCY_OCCURRENCE_TAG, dependency_attributes)
|
291
|
+
|
292
|
+
def get_dependencies(self):
|
293
|
+
"""Return all the words in the document"""
|
294
|
+
return self.root.findall("{0}/{1}".format(DEPENDENCY_LAYER_TAG, DEPENDENCY_OCCURRENCE_TAG))
|
295
|
+
|
296
|
+
def add_chunk(self, cid, head, phrase, case=None, terms=()):
|
297
|
+
""""Add a chunk to the kaf document.
|
298
|
+
Chunks are noun or prepositional phrases, spanning terms.A chunk have the following parameters/attributes:
|
299
|
+
+ cid: unique identifier
|
300
|
+
+ head: the chunk head's term id
|
301
|
+
+ phrase: typo of the phrase.Valid values for the phrase elements are one of the following:
|
302
|
+
- NP: noun phrase
|
303
|
+
- VP: verbal phrase
|
304
|
+
- PP: prepositional phrase
|
305
|
+
- S: sentence
|
306
|
+
- O: other
|
307
|
+
+ case (optional): declension case
|
308
|
+
"""
|
309
|
+
# Secure the root
|
310
|
+
if not self.chunks:
|
311
|
+
self.chunks = etree.SubElement(self.root, CHUNKS_LAYER_TAG)
|
312
|
+
# Prepare the attributes
|
313
|
+
chunk_attributes = {CHUNK_ID_ATTRIBUTE: cid, CHUNK_HEAD_ATTRIBUTE: head, CHUNK_PHRASE_ATTRIBUTE: phrase}
|
314
|
+
if case:
|
315
|
+
chunk_attributes[CHUNK_CASE_ATTRIBUTE] = case
|
316
|
+
# Create , and attach, the chunk
|
317
|
+
chunk = etree.SubElement(self.chunks, CHUNK_OCCURRENCE_TAG, chunk_attributes)
|
318
|
+
# Add the span terms
|
319
|
+
if terms:
|
320
|
+
spans = etree.SubElement(chunk, SPAN_TAG)
|
321
|
+
for term in terms:
|
322
|
+
etree.SubElement(spans, TARGET_TAG, {TARGET_ID_ATTRIBUTE: term})
|
323
|
+
return chunk
|
324
|
+
|
325
|
+
def get_chunks(self):
|
326
|
+
"""Return all the chunks of the text"""
|
327
|
+
return self.root.findall("{0}/{1}".format(DEPENDENCY_LAYER_TAG, DEPENDENCY_OCCURRENCE_TAG))
|
328
|
+
|
329
|
+
def get_chunk_terms(self, chunk):
|
330
|
+
"""Return all the terms of a chunk."""
|
331
|
+
return chunk.findall("{0}/{1}".format(SPAN_TAG, TARGET_TAG))
|
332
|
+
|
333
|
+
def add_entity(self, eid, entity_type, references=()):
|
334
|
+
""" Add a entity in the document.
|
335
|
+
:param eid: The identification code of the entity.
|
336
|
+
:param references: The references (ids of the terms) contained in the entity.
|
337
|
+
:param entity_type: The type of the entity.
|
338
|
+
"""
|
339
|
+
|
340
|
+
if self.entities is None:
|
341
|
+
self.entities = etree.SubElement(self.root, NAMED_ENTITIES_LAYER_TAG)
|
342
|
+
|
343
|
+
entity_attributes = {NAMED_ENTITY_ID_ATTRIBUTE: eid}
|
344
|
+
if entity_type:
|
345
|
+
entity_attributes[NAMED_ENTITY_TYPE_ATTRIBUTE] = entity_type
|
346
|
+
entity = etree.SubElement(self.entities, NAMED_ENTITY_OCCURRENCE_TAG, entity_attributes)
|
347
|
+
references_tag = etree.SubElement(entity, "references")
|
348
|
+
if references:
|
349
|
+
for reference in references:
|
350
|
+
span = etree.SubElement(references_tag, SPAN_TAG)
|
351
|
+
for token in reference:
|
352
|
+
etree.SubElement(span, TARGET_TAG, {TARGET_ID_ATTRIBUTE: token})
|
353
|
+
return entity
|
354
|
+
|
355
|
+
def get_constituency_trees(self):
|
356
|
+
"""Return all the constituency trees in the document"""
|
357
|
+
return self.root.findall("{0}/{1}".format(CONSTITUENCY_LAYER, CONSTITUENCY_TREE_TAG))
|
358
|
+
|
359
|
+
def get_contituent_tree_non_terminals(self, tree):
|
360
|
+
"""Get al the non terminal constituents of the tree."""
|
361
|
+
return tree.findall(CONSTITUENCY_NON_TERMINALS)
|
362
|
+
|
363
|
+
def get_contituent_tree_terminals(self, tree):
|
364
|
+
"""Get al the terminal constituents of the tree."""
|
365
|
+
return tree.findall(CONSTITUENCY_TERMINALS)
|
366
|
+
|
367
|
+
def get_contituent_tree_edges(self, tree):
|
368
|
+
"""Get al the edges of the tree."""
|
369
|
+
return tree.findall(CONSTITUENCY_EDGES)
|
370
|
+
|
371
|
+
def get_contituent_terminal_words(self, chunk):
|
372
|
+
"""Return all the terms of a terminal constituent."""
|
373
|
+
return chunk.findall("{0}/{1}".format(SPAN_TAG, TARGET_TAG))
|
374
|
+
|
375
|
+
def get_entities(self):
|
376
|
+
"""Return all the Named Entities in the document"""
|
377
|
+
return self.root.findall("{0}/{1}".format(NAMED_ENTITIES_LAYER_TAG, NAMED_ENTITY_OCCURRENCE_TAG))
|
378
|
+
|
379
|
+
def get_entity_references(self, named_entity):
|
380
|
+
"""Return all the terms of a Named Entities in the document"""
|
381
|
+
return named_entity.findall("{0}/{1}".format(NAMED_ENTITY_REFERENCES_GROUP_TAG, SPAN_TAG))
|
382
|
+
|
383
|
+
def get_entity_reference_span(self, reference):
|
384
|
+
"""Return all the terms of a Named Entities in the document"""
|
385
|
+
return reference.findall(TARGET_TAG)
|
386
|
+
|
387
|
+
def add_coreference(self, coid, references=()):
|
388
|
+
""" Add a coreference cluster to the document.
|
389
|
+
:param coid: The identification code of the cluster.
|
390
|
+
:param references: The references contained in the cluster
|
391
|
+
"""
|
392
|
+
if self.coreferences is None:
|
393
|
+
self.coreferences = etree.SubElement(self.root, COREFERENCE_LAYER_TAG)
|
394
|
+
|
395
|
+
coref_attrib = {COREFERENCE_ID_ATTRIBUTE: coid}
|
396
|
+
entity = etree.SubElement(self.coreferences, COREFERENCE_OCCURRENCE_TAG, coref_attrib)
|
397
|
+
|
398
|
+
if references:
|
399
|
+
for reference, form in references:
|
400
|
+
comment = etree.Comment(form.decode("utf-8").replace("-", " - "))
|
401
|
+
entity.append(comment)
|
402
|
+
span = etree.SubElement(entity, SPAN_TAG)
|
403
|
+
for token in reference:
|
404
|
+
etree.SubElement(span, TARGET_TAG, {TARGET_ID_ATTRIBUTE: token})
|
405
|
+
return entity
|
406
|
+
|
407
|
+
def indent(self, elem, level=0):
|
408
|
+
i = "\n" + level * " "
|
409
|
+
if len(elem):
|
410
|
+
if not elem.text or not elem.text.strip():
|
411
|
+
elem.text = i + " "
|
412
|
+
if not elem.tail or not elem.tail.strip():
|
413
|
+
elem.tail = i
|
414
|
+
for child in elem:
|
415
|
+
self.indent(child, level+1)
|
416
|
+
# This seeks for the las child processed in for, is not a code identation error
|
417
|
+
if not child.tail or not child.tail.strip():
|
418
|
+
child.tail = i
|
419
|
+
else:
|
420
|
+
if level and (not elem.tail or not elem.tail.strip()):
|
421
|
+
elem.tail = i
|
422
|
+
|
423
|
+
def write(self, output, encoding):
|
424
|
+
"""Write document into a file.
|
425
|
+
:param output: The output target for the document. May be a file type object or a file name."""
|
426
|
+
self.indent(self.root)
|
427
|
+
output.write(etree.tostring(self.root, encoding=encoding,))#, pretty_print=True, xml_declaration=True, with_comments=True))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: opener-coreference-base
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- development@olery.com
|
@@ -276,6 +276,10 @@ files:
|
|
276
276
|
- core/site-packages/pre_build/VUKafParserPy/KafDataObjectsMod.py
|
277
277
|
- core/site-packages/pre_build/VUKafParserPy/KafParserMod.py
|
278
278
|
- core/site-packages/pre_build/VUKafParserPy/__init__.py
|
279
|
+
- core/vendor/dictconfig/__init__.py
|
280
|
+
- core/vendor/dictconfig/dictconfig.py
|
281
|
+
- core/vendor/pykaf/LICENSE.txt
|
282
|
+
- core/vendor/pykaf/__init__.py
|
279
283
|
- ext/hack/Rakefile
|
280
284
|
- ext/hack/support.rb
|
281
285
|
- lib/opener/coreferences/base.rb
|