hyperbase 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hyperbase/__init__.py +6 -0
- hyperbase/constants.py +4 -0
- hyperbase/hyperedge.py +1127 -0
- hyperbase/parsers/__init__.py +39 -0
- hyperbase/parsers/correctness.py +265 -0
- hyperbase/parsers/parser.py +41 -0
- hyperbase/parsers/utils.py +19 -0
- hyperbase/patterns/__init__.py +29 -0
- hyperbase/patterns/argroles.py +142 -0
- hyperbase/patterns/atoms.py +98 -0
- hyperbase/patterns/common.py +172 -0
- hyperbase/patterns/counter.py +153 -0
- hyperbase/patterns/entrypoints.py +87 -0
- hyperbase/patterns/matcher.py +245 -0
- hyperbase/patterns/merge.py +52 -0
- hyperbase/patterns/properties.py +59 -0
- hyperbase/patterns/utils.py +118 -0
- hyperbase/patterns/variables.py +161 -0
- hyperbase-0.8.0.dist-info/METADATA +64 -0
- hyperbase-0.8.0.dist-info/RECORD +23 -0
- hyperbase-0.8.0.dist-info/WHEEL +4 -0
- hyperbase-0.8.0.dist-info/licenses/AUTHORS +5 -0
- hyperbase-0.8.0.dist-info/licenses/LICENSE +21 -0
hyperbase/hyperedge.py
ADDED
|
@@ -0,0 +1,1127 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from collections.abc import Iterable
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
argrole_order: dict[str, int] = {
|
|
9
|
+
'm': -1,
|
|
10
|
+
's': 0,
|
|
11
|
+
'p': 1,
|
|
12
|
+
'a': 2,
|
|
13
|
+
'c': 3,
|
|
14
|
+
'o': 4,
|
|
15
|
+
'i': 5,
|
|
16
|
+
't': 6,
|
|
17
|
+
'j': 7,
|
|
18
|
+
'x': 8,
|
|
19
|
+
'r': 9,
|
|
20
|
+
'?': 10
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
valid_p_argroles: set[str] = {'s', 'p', 'a', 'c', 'o', 'i', 't', 'j', 'x', 'r', '?'}
|
|
25
|
+
valid_b_argroles: set[str] = {'m', 'a'}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def str2atom(s: str) -> str:
|
|
29
|
+
"""Converts a string into a valid atom."""
|
|
30
|
+
atom = s.lower()
|
|
31
|
+
|
|
32
|
+
atom = atom.replace('%', '%25')
|
|
33
|
+
atom = atom.replace('/', '%2f')
|
|
34
|
+
atom = atom.replace(' ', '%20')
|
|
35
|
+
atom = atom.replace('(', '%28')
|
|
36
|
+
atom = atom.replace(')', '%29')
|
|
37
|
+
atom = atom.replace('.', '%2e')
|
|
38
|
+
atom = atom.replace('*', '%2a')
|
|
39
|
+
atom = atom.replace('&', '%26')
|
|
40
|
+
atom = atom.replace('@', '%40')
|
|
41
|
+
atom = atom.replace('\n', '%0a')
|
|
42
|
+
atom = atom.replace('\r', '%0d')
|
|
43
|
+
|
|
44
|
+
return atom
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _edge_str_has_outer_parens(edge_str: str) -> bool:
|
|
48
|
+
"""Check if string representation of edge is delimited by outer
|
|
49
|
+
parenthesis.
|
|
50
|
+
"""
|
|
51
|
+
if len(edge_str) < 2:
|
|
52
|
+
return False
|
|
53
|
+
return edge_str[0] == '('
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def split_edge_str(edge_str: str) -> tuple[str, ...] | None:
|
|
57
|
+
"""Shallow split into tokens of a string representation of an edge,
|
|
58
|
+
without outer parenthesis.
|
|
59
|
+
"""
|
|
60
|
+
start = 0
|
|
61
|
+
depth = 0
|
|
62
|
+
str_length = len(edge_str)
|
|
63
|
+
active = 0
|
|
64
|
+
tokens: list[str] = []
|
|
65
|
+
for i in range(str_length):
|
|
66
|
+
c = edge_str[i]
|
|
67
|
+
if c == ' ':
|
|
68
|
+
if active and depth == 0:
|
|
69
|
+
tokens.append(edge_str[start:i])
|
|
70
|
+
active = 0
|
|
71
|
+
elif c == '(':
|
|
72
|
+
if depth == 0:
|
|
73
|
+
active = 1
|
|
74
|
+
start = i
|
|
75
|
+
depth += 1
|
|
76
|
+
elif c == ')':
|
|
77
|
+
depth -= 1
|
|
78
|
+
if depth == 0:
|
|
79
|
+
tokens.append(edge_str[start:i + 1])
|
|
80
|
+
active = 0
|
|
81
|
+
elif depth < 0:
|
|
82
|
+
# TODO: throw exception?
|
|
83
|
+
return None
|
|
84
|
+
else:
|
|
85
|
+
if not active:
|
|
86
|
+
active = 1
|
|
87
|
+
start = i
|
|
88
|
+
|
|
89
|
+
if active:
|
|
90
|
+
if depth > 0:
|
|
91
|
+
# TODO: throw exception?
|
|
92
|
+
return None
|
|
93
|
+
else:
|
|
94
|
+
tokens.append(edge_str[start:])
|
|
95
|
+
|
|
96
|
+
return tuple(tokens)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _parsed_token(token: str) -> Hyperedge | None:
|
|
100
|
+
if _edge_str_has_outer_parens(token):
|
|
101
|
+
return hedge(token)
|
|
102
|
+
else:
|
|
103
|
+
return Atom((token,))
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def hedge(source: str | Hyperedge | list[Any] | tuple[Any, ...]) -> Hyperedge | None:
|
|
107
|
+
"""Create a hyperedge."""
|
|
108
|
+
if type(source) in {tuple, list}:
|
|
109
|
+
return Hyperedge(tuple(hedge(item) for item in source))
|
|
110
|
+
elif type(source) is str:
|
|
111
|
+
edge_str = source.strip().replace('\n', ' ')
|
|
112
|
+
edge_inner_str = edge_str
|
|
113
|
+
|
|
114
|
+
parens = _edge_str_has_outer_parens(edge_str)
|
|
115
|
+
if parens:
|
|
116
|
+
edge_inner_str = edge_str[1:-1]
|
|
117
|
+
|
|
118
|
+
tokens = split_edge_str(edge_inner_str)
|
|
119
|
+
if not tokens:
|
|
120
|
+
return None
|
|
121
|
+
edges = tuple(_parsed_token(token) for token in tokens)
|
|
122
|
+
if len(edges) > 1 or (len(edges) > 0 and type(edges[0]) == Hyperedge):
|
|
123
|
+
return Hyperedge(edges)
|
|
124
|
+
elif len(edges) > 0 and isinstance(edges[0], Atom):
|
|
125
|
+
return Atom(edges[0], parens)
|
|
126
|
+
else:
|
|
127
|
+
return None
|
|
128
|
+
elif type(source) in {Hyperedge, Atom, UniqueAtom}:
|
|
129
|
+
return source # type: ignore
|
|
130
|
+
else:
|
|
131
|
+
return None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def build_atom(text: str, *parts: str) -> Atom:
|
|
135
|
+
"""Build an atom from text and other parts."""
|
|
136
|
+
atom = str2atom(text)
|
|
137
|
+
parts_str = '/'.join([part for part in parts if part])
|
|
138
|
+
if len(parts_str) > 0:
|
|
139
|
+
atom = ''.join((atom, '/', parts_str))
|
|
140
|
+
return Atom((atom,))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class Hyperedge(tuple): # type: ignore[type-arg]
|
|
144
|
+
"""Non-atomic hyperedge."""
|
|
145
|
+
def __new__(cls, edges: Iterable[Hyperedge | None]) -> Hyperedge:
|
|
146
|
+
return super(Hyperedge, cls).__new__(cls, tuple(edges))
|
|
147
|
+
|
|
148
|
+
@property
|
|
149
|
+
def atom(self) -> bool:
|
|
150
|
+
"""True if edge is an atom."""
|
|
151
|
+
return False
|
|
152
|
+
|
|
153
|
+
@property
|
|
154
|
+
def not_atom(self) -> bool:
|
|
155
|
+
"""True if edge is not an atom."""
|
|
156
|
+
return True
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def t(self) -> str:
|
|
160
|
+
""" Edge type.
|
|
161
|
+
(this porperty is a shortcut for Hyperedge.type())
|
|
162
|
+
"""
|
|
163
|
+
return self.type()
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def mt(self) -> str:
|
|
167
|
+
""" Edge main type.
|
|
168
|
+
(this porperty is a shortcut for Hyperedge.mtype())
|
|
169
|
+
"""
|
|
170
|
+
return self.mtype()
|
|
171
|
+
|
|
172
|
+
@property
|
|
173
|
+
def ct(self) -> str | None:
|
|
174
|
+
""" Edge connector type.
|
|
175
|
+
(this porperty is a shortcut for Hyperedge.connector_type())
|
|
176
|
+
"""
|
|
177
|
+
return self.connector_type()
|
|
178
|
+
|
|
179
|
+
@property
|
|
180
|
+
def cmt(self) -> str | None:
|
|
181
|
+
""" Edge connector main type.
|
|
182
|
+
(this porperty is a shortcut for Hyperedge.mconnector_type())
|
|
183
|
+
"""
|
|
184
|
+
return self.connector_mtype()
|
|
185
|
+
|
|
186
|
+
def is_atom(self) -> bool:
|
|
187
|
+
"""
|
|
188
|
+
.. deprecated:: 0.6.0
|
|
189
|
+
Please use the properties .atom and .not_atom instead.
|
|
190
|
+
|
|
191
|
+
Checks if edge is an atom.
|
|
192
|
+
"""
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
def to_str(self, roots_only: bool = False) -> str:
|
|
196
|
+
"""Converts edge to its string representation.
|
|
197
|
+
|
|
198
|
+
Keyword argument:
|
|
199
|
+
roots_only -- only the roots of the atoms will be used to create
|
|
200
|
+
the string representation.
|
|
201
|
+
"""
|
|
202
|
+
s = ' '.join([edge.to_str(roots_only=roots_only) for edge in self if edge])
|
|
203
|
+
return ''.join(('(', s, ')'))
|
|
204
|
+
|
|
205
|
+
def label(self) -> str:
|
|
206
|
+
"""Generate human-readable label for edge."""
|
|
207
|
+
conn_atom = self.connector_atom()
|
|
208
|
+
if len(self) == 2:
|
|
209
|
+
edge: tuple[Any, ...] = self
|
|
210
|
+
elif conn_atom is not None and conn_atom.parts()[-1] == '.':
|
|
211
|
+
edge = self[1:]
|
|
212
|
+
else:
|
|
213
|
+
edge = (self[1], self[0]) + self[2:]
|
|
214
|
+
return ' '.join([item.label() for item in edge])
|
|
215
|
+
|
|
216
|
+
def inner_atom(self) -> Atom:
|
|
217
|
+
"""The inner atom inside of a modifier structure.
|
|
218
|
+
|
|
219
|
+
For example, condider:
|
|
220
|
+
(red/M shoes/C)
|
|
221
|
+
The inner atom is:
|
|
222
|
+
shoes/C
|
|
223
|
+
Or, the more complex case:
|
|
224
|
+
((and/J slow/M steady/M) go/P)
|
|
225
|
+
Yields:
|
|
226
|
+
gp/P
|
|
227
|
+
|
|
228
|
+
This method should not be used on structures that contain more than
|
|
229
|
+
one inner atom, for example concepts constructed with builders or
|
|
230
|
+
relations.
|
|
231
|
+
|
|
232
|
+
The inner atom of an atom is itself.
|
|
233
|
+
"""
|
|
234
|
+
return self[1].inner_atom() # type: ignore[no-any-return]
|
|
235
|
+
|
|
236
|
+
def connector_atom(self) -> Atom | None:
|
|
237
|
+
"""The inner atom of the connector.
|
|
238
|
+
|
|
239
|
+
For example, condider:
|
|
240
|
+
(does/M (not/M like/P.so) john/C chess/C)
|
|
241
|
+
The connector atom is:
|
|
242
|
+
like/P.so
|
|
243
|
+
|
|
244
|
+
The connector atom of an atom is None.
|
|
245
|
+
"""
|
|
246
|
+
return self[0].inner_atom() # type: ignore[no-any-return]
|
|
247
|
+
|
|
248
|
+
def atoms(self) -> set[Atom]:
|
|
249
|
+
"""Returns the set of atoms contained in the edge.
|
|
250
|
+
|
|
251
|
+
For example, consider the edge:
|
|
252
|
+
(the/md (of/br mayor/cc (the/md city/cs)))
|
|
253
|
+
in this case, edge.atoms() returns:
|
|
254
|
+
[the/md, of/br, mayor/cc, city/cs]
|
|
255
|
+
"""
|
|
256
|
+
atom_set: set[Atom] = set()
|
|
257
|
+
for item in self:
|
|
258
|
+
for atom in item.atoms():
|
|
259
|
+
atom_set.add(atom)
|
|
260
|
+
return atom_set
|
|
261
|
+
|
|
262
|
+
def all_atoms(self) -> list[Atom]:
|
|
263
|
+
"""Returns a list of all the atoms contained in the edge. Unlike
|
|
264
|
+
atoms(), which does not return repeated atoms, all_atoms() does
|
|
265
|
+
return repeated atoms if they are different objects.
|
|
266
|
+
|
|
267
|
+
For example, consider the edge:
|
|
268
|
+
(the/md (of/br mayor/cc (the/md city/cs)))
|
|
269
|
+
in this case, edge.all_atoms() returns:
|
|
270
|
+
[the/md, of/br, mayor/cc, the/md, city/cs]
|
|
271
|
+
"""
|
|
272
|
+
atoms: list[Atom] = []
|
|
273
|
+
for item in self:
|
|
274
|
+
atoms += item.all_atoms()
|
|
275
|
+
return atoms
|
|
276
|
+
|
|
277
|
+
def size(self) -> int:
|
|
278
|
+
"""The size of an edge is its total number of atoms, at all depths."""
|
|
279
|
+
return sum([edge.size() for edge in self])
|
|
280
|
+
|
|
281
|
+
def depth(self) -> int:
|
|
282
|
+
"""Returns maximal depth of edge, an atom has depth 0."""
|
|
283
|
+
max_d = 0
|
|
284
|
+
for item in self:
|
|
285
|
+
d = item.depth()
|
|
286
|
+
if d > max_d:
|
|
287
|
+
max_d = d
|
|
288
|
+
return max_d + 1
|
|
289
|
+
|
|
290
|
+
def roots(self) -> Hyperedge:
|
|
291
|
+
"""Returns edge with root-only atoms."""
|
|
292
|
+
return Hyperedge(tuple(item.roots() for item in self))
|
|
293
|
+
|
|
294
|
+
def contains(self, needle: str, deep: bool = False) -> bool:
|
|
295
|
+
"""Checks if 'needle' is contained in edge.
|
|
296
|
+
|
|
297
|
+
Keyword argument:
|
|
298
|
+
deep -- search recursively (default False)"""
|
|
299
|
+
for item in self:
|
|
300
|
+
if item == needle:
|
|
301
|
+
return True
|
|
302
|
+
if deep:
|
|
303
|
+
if item.contains(needle, True):
|
|
304
|
+
return True
|
|
305
|
+
return False
|
|
306
|
+
|
|
307
|
+
def subedges(self) -> set[Hyperedge]:
|
|
308
|
+
"""Returns all the subedges contained in the edge, including atoms
|
|
309
|
+
and itself.
|
|
310
|
+
"""
|
|
311
|
+
edges: set[Hyperedge] = {self}
|
|
312
|
+
for item in self:
|
|
313
|
+
edges = edges.union(item.subedges())
|
|
314
|
+
return edges
|
|
315
|
+
|
|
316
|
+
def insert_first_argument(self, argument: Hyperedge) -> Hyperedge:
|
|
317
|
+
"""Returns an edge built by placing 'argument' as the first item
|
|
318
|
+
after the connector of this edge. If this edge is an atom, then
|
|
319
|
+
it becomes the connector of the returned edge.
|
|
320
|
+
|
|
321
|
+
For example, considering the 'edge' (a) and the 'argument' (b), this
|
|
322
|
+
function returns:
|
|
323
|
+
(a b)
|
|
324
|
+
|
|
325
|
+
Considering the 'edge' (a b c) and the 'argument' (d e), it
|
|
326
|
+
returns:
|
|
327
|
+
(a (d e) b c)
|
|
328
|
+
"""
|
|
329
|
+
return Hyperedge((self[0], argument) + self[1:])
|
|
330
|
+
|
|
331
|
+
def connect(self, arguments: tuple[Hyperedge, ...] | list[Hyperedge] | None) -> Hyperedge:
|
|
332
|
+
"""Returns an edge built by adding the items in 'arguments' to the
|
|
333
|
+
end of this edge. 'arguments' must be a collection.
|
|
334
|
+
|
|
335
|
+
For example, connecting the edge (a b) with the 'arguments'
|
|
336
|
+
(c d) produces:
|
|
337
|
+
(a b c d)
|
|
338
|
+
"""
|
|
339
|
+
if arguments is None or len(arguments) == 0:
|
|
340
|
+
return self
|
|
341
|
+
else:
|
|
342
|
+
return Hyperedge(self + arguments)
|
|
343
|
+
|
|
344
|
+
def sequence(self, entity: Hyperedge, before: bool, flat: bool = True) -> Hyperedge:
|
|
345
|
+
"""Returns an edge built by sequencing the 'entity', if it's an
|
|
346
|
+
atom, or the elements of 'entity' if it is an edge, either before
|
|
347
|
+
or after the elements of this edge.
|
|
348
|
+
|
|
349
|
+
If flat is False, then both this edge and 'entity' are treated as
|
|
350
|
+
self-contained edges when building the new edge.
|
|
351
|
+
|
|
352
|
+
For example, connecting the edge (a b) and the 'entity' c
|
|
353
|
+
produces, if before is True:
|
|
354
|
+
(c a b)
|
|
355
|
+
and if before is False:
|
|
356
|
+
(a b c)
|
|
357
|
+
Connecting the edge (a b) and the 'entity' (c d)
|
|
358
|
+
produces, if before is True:
|
|
359
|
+
(c d a b)
|
|
360
|
+
and if before is False:
|
|
361
|
+
(a b c d)
|
|
362
|
+
This last example, if 'flat' is False, becomes respectively:
|
|
363
|
+
((c d) (a b))
|
|
364
|
+
((a b) (c d))
|
|
365
|
+
"""
|
|
366
|
+
if flat:
|
|
367
|
+
if before:
|
|
368
|
+
return entity + self
|
|
369
|
+
else:
|
|
370
|
+
return self + entity
|
|
371
|
+
else:
|
|
372
|
+
if before:
|
|
373
|
+
return Hyperedge((entity, self))
|
|
374
|
+
else:
|
|
375
|
+
return Hyperedge((self, entity))
|
|
376
|
+
|
|
377
|
+
def replace_atom(self, old: Atom, new: Hyperedge, unique: bool = False) -> Hyperedge:
|
|
378
|
+
"""Returns edge built by replacing every instance of 'old' in
|
|
379
|
+
this edge with 'new'.
|
|
380
|
+
|
|
381
|
+
Keyword argument:
|
|
382
|
+
unique -- match only the exact same instance of the atom, i.e.
|
|
383
|
+
UniqueAtom(self) == UniqueAtom(old) (default: False)
|
|
384
|
+
"""
|
|
385
|
+
return Hyperedge(tuple(item.replace_atom(old, new, unique=unique) for item in self))
|
|
386
|
+
|
|
387
|
+
def simplify(self, subtypes: bool = False, argroles: bool = False, namespaces: bool = True) -> Hyperedge | None:
|
|
388
|
+
"""Returns a version of the edge with simplified atoms, for example
|
|
389
|
+
removing subtypes, subroles or namespaces.
|
|
390
|
+
|
|
391
|
+
Keyword arguments:
|
|
392
|
+
subtypes -- include subtypes (default: False).
|
|
393
|
+
argroles --include argroles (default: False).
|
|
394
|
+
namespaces -- include namespaces (default: True).
|
|
395
|
+
"""
|
|
396
|
+
return hedge([subedge.simplify(subtypes=subtypes,
|
|
397
|
+
argroles=argroles,
|
|
398
|
+
namespaces=namespaces)
|
|
399
|
+
for subedge in self])
|
|
400
|
+
|
|
401
|
+
def type(self) -> str:
|
|
402
|
+
"""Returns the type of this edge as a string.
|
|
403
|
+
Type inference is performed.
|
|
404
|
+
"""
|
|
405
|
+
ptype = self[0].type()
|
|
406
|
+
if ptype[0] == 'P':
|
|
407
|
+
outter_type = 'R'
|
|
408
|
+
elif ptype[0] == 'M':
|
|
409
|
+
if len(self) < 2:
|
|
410
|
+
raise RuntimeError('Edge is malformed, type cannot be determined: {}'.format(str(self)))
|
|
411
|
+
return self[1].type() # type: ignore[no-any-return]
|
|
412
|
+
elif ptype[0] == 'T':
|
|
413
|
+
outter_type = 'S'
|
|
414
|
+
elif ptype[0] == 'B':
|
|
415
|
+
outter_type = 'C'
|
|
416
|
+
elif ptype[0] == 'J':
|
|
417
|
+
if len(self) < 2:
|
|
418
|
+
raise RuntimeError('Edge is malformed, type cannot be determined: {}'.format(str(self)))
|
|
419
|
+
return self[1].mtype() # type: ignore[no-any-return]
|
|
420
|
+
else:
|
|
421
|
+
raise RuntimeError('Edge is malformed, type cannot be determined: {}'.format(str(self)))
|
|
422
|
+
|
|
423
|
+
return '{}{}'.format(outter_type, ptype[1:])
|
|
424
|
+
|
|
425
|
+
def connector_type(self) -> str | None:
|
|
426
|
+
"""Returns the type of the edge's connector.
|
|
427
|
+
If the edge has no connector (i.e. it's an atom), then None is
|
|
428
|
+
returned.
|
|
429
|
+
"""
|
|
430
|
+
return self[0].type() # type: ignore[no-any-return]
|
|
431
|
+
|
|
432
|
+
def mtype(self) -> str:
|
|
433
|
+
"""Returns the main type of this edge as a string of one character.
|
|
434
|
+
Type inference is performed.
|
|
435
|
+
"""
|
|
436
|
+
return self.type()[0]
|
|
437
|
+
|
|
438
|
+
def connector_mtype(self) -> str | None:
|
|
439
|
+
"""Returns the main type of the edge's connector.
|
|
440
|
+
If the edge has no connector (i.e. it's an atom), then None is
|
|
441
|
+
returned.
|
|
442
|
+
"""
|
|
443
|
+
ct = self.connector_type()
|
|
444
|
+
if ct:
|
|
445
|
+
return ct[0]
|
|
446
|
+
else:
|
|
447
|
+
return None
|
|
448
|
+
|
|
449
|
+
def atom_with_type(self, atom_type: str) -> Atom | None:
|
|
450
|
+
"""Returns the first atom found in the edge that has the given
|
|
451
|
+
'atom_type', or whose type starts with 'atom_type'.
|
|
452
|
+
If no such atom is found, returns None.
|
|
453
|
+
|
|
454
|
+
For example, given the edge (+/B a/Cn b/Cp) and the 'atom_type'
|
|
455
|
+
c, this function returns:
|
|
456
|
+
a/Cn
|
|
457
|
+
If the 'atom_type' is 'Cp', the it will return:
|
|
458
|
+
b/Cp
|
|
459
|
+
"""
|
|
460
|
+
for item in self:
|
|
461
|
+
atom: Atom | None = item.atom_with_type(atom_type)
|
|
462
|
+
if atom:
|
|
463
|
+
return atom
|
|
464
|
+
return None
|
|
465
|
+
|
|
466
|
+
def contains_atom_type(self, atom_type: str) -> bool:
|
|
467
|
+
"""Checks if the edge contains any atom with the given type.
|
|
468
|
+
The edge is searched recursively, so the atom can appear at any depth.
|
|
469
|
+
"""
|
|
470
|
+
return self.atom_with_type(atom_type) is not None
|
|
471
|
+
|
|
472
|
+
def argroles(self) -> str:
|
|
473
|
+
"""Returns the argument roles string of the edge, if it exists.
|
|
474
|
+
Otherwise returns empty string.
|
|
475
|
+
|
|
476
|
+
Argument roles can be return for the entire edge that they apply to,
|
|
477
|
+
which can be a relation (R) or a concept (C). For example:
|
|
478
|
+
|
|
479
|
+
((not/M is/P.sc) bob/C sad/C) has argument roles "sc",
|
|
480
|
+
(of/B.ma city/C berlin/C) has argument roles "ma".
|
|
481
|
+
|
|
482
|
+
Argument roles can also be returned for the connectors that define
|
|
483
|
+
the outer edge, which can be of type predicate (P) or builder (B). For
|
|
484
|
+
example:
|
|
485
|
+
|
|
486
|
+
(not/M is/P.sc) has argument roles "sc",
|
|
487
|
+
of/B.ma has argument roles "ma".
|
|
488
|
+
"""
|
|
489
|
+
et = self.mtype()
|
|
490
|
+
if et in {'R', 'C'} and self[0].mtype() in {'B', 'P'}:
|
|
491
|
+
return self[0].argroles() # type: ignore[no-any-return]
|
|
492
|
+
if et not in {'B', 'P'}:
|
|
493
|
+
return ''
|
|
494
|
+
return self[1].argroles() # type: ignore[no-any-return]
|
|
495
|
+
|
|
496
|
+
def has_argroles(self) -> bool:
|
|
497
|
+
"""Returns True if the edge has argroles, False otherwise."""
|
|
498
|
+
return self.argroles() != ''
|
|
499
|
+
|
|
500
|
+
def replace_argroles(self, argroles: str | None) -> Hyperedge:
|
|
501
|
+
"""Returns an edge with the argroles of the connector atom replaced
|
|
502
|
+
with the provided string.
|
|
503
|
+
Returns same edge if the atom does not contain a role part."""
|
|
504
|
+
st = self.mtype()
|
|
505
|
+
if st in {'C', 'R'}:
|
|
506
|
+
new_edge = [self[0].replace_argroles(argroles)]
|
|
507
|
+
new_edge += self[1:]
|
|
508
|
+
return Hyperedge(new_edge)
|
|
509
|
+
elif st in {'P', 'B'}:
|
|
510
|
+
new_edge = [self[0], self[1].replace_argroles(argroles)]
|
|
511
|
+
new_edge += list(self[2:])
|
|
512
|
+
return Hyperedge(new_edge)
|
|
513
|
+
return self
|
|
514
|
+
|
|
515
|
+
def insert_argrole(self, argrole: str, pos: int) -> Hyperedge:
|
|
516
|
+
"""Returns an edge with the given argrole inserted at the specified
|
|
517
|
+
position in the argroles of the connector atom.
|
|
518
|
+
Same restrictions as in replace_argroles() apply."""
|
|
519
|
+
st = self.mtype()
|
|
520
|
+
if st in {'C', 'R'}:
|
|
521
|
+
new_edge = [self[0].insert_argrole(argrole, pos)]
|
|
522
|
+
new_edge += self[1:]
|
|
523
|
+
return Hyperedge(new_edge)
|
|
524
|
+
elif st in {'P', 'B'}:
|
|
525
|
+
new_edge = [self[0], self[1].insert_argrole(argrole, pos)]
|
|
526
|
+
new_edge += list(self[2:])
|
|
527
|
+
return Hyperedge(new_edge)
|
|
528
|
+
return self
|
|
529
|
+
|
|
530
|
+
def insert_edge_with_argrole(self, edge: Hyperedge, argrole: str, pos: int) -> Hyperedge:
|
|
531
|
+
"""Returns a new edge with the provided edge and its argroles inserted
|
|
532
|
+
at the specified position."""
|
|
533
|
+
new_edge = self.insert_argrole(argrole, pos)
|
|
534
|
+
combined = tuple(new_edge[:pos + 1]) + (edge,) + tuple(new_edge[pos + 1:])
|
|
535
|
+
return Hyperedge(combined)
|
|
536
|
+
|
|
537
|
+
def edges_with_argrole(self, argrole: str) -> list[Hyperedge]:
|
|
538
|
+
"""Returns the list of edges with the given argument role."""
|
|
539
|
+
edges: list[Hyperedge] = []
|
|
540
|
+
connector = self[0]
|
|
541
|
+
|
|
542
|
+
argroles = connector.argroles()
|
|
543
|
+
if len(argroles) > 0 and argroles[0] == '{':
|
|
544
|
+
argroles = argroles[1:-1]
|
|
545
|
+
argroles = argroles.replace(',', '')
|
|
546
|
+
for pos, role in enumerate(argroles):
|
|
547
|
+
if role == argrole:
|
|
548
|
+
if pos < len(self) - 1:
|
|
549
|
+
edges.append(self[pos + 1])
|
|
550
|
+
return edges
|
|
551
|
+
|
|
552
|
+
def main_concepts(self) -> list[Hyperedge]:
|
|
553
|
+
"""Returns the list of main concepts in an concept edge.
|
|
554
|
+
A main concept is a central concept in a built concept, e.g.:
|
|
555
|
+
in ('s/Bp.am zimbabwe/Cp economy/Cn.s), economy/Cn.s is the main
|
|
556
|
+
concept.
|
|
557
|
+
|
|
558
|
+
If entity is not an edge, or its connector is not of type builder,
|
|
559
|
+
or the builder does not contain concept role annotations, or no
|
|
560
|
+
concept is annotated as the main one, then an empty list is
|
|
561
|
+
returned.
|
|
562
|
+
"""
|
|
563
|
+
if self[0].mtype() == 'B':
|
|
564
|
+
return self.edges_with_argrole('m')
|
|
565
|
+
return []
|
|
566
|
+
|
|
567
|
+
def replace_main_concept(self, new_main: Hyperedge) -> Hyperedge | None:
|
|
568
|
+
"""TODO: document and test"""
|
|
569
|
+
if self.mtype() != 'C':
|
|
570
|
+
return None
|
|
571
|
+
if self[0].mtype() == 'M':
|
|
572
|
+
return hedge((self[0], new_main))
|
|
573
|
+
elif self[0].mtype() == 'B':
|
|
574
|
+
if len(self) == 3:
|
|
575
|
+
if self[0].argroles() == 'ma':
|
|
576
|
+
return hedge((self[0], new_main, self[2]))
|
|
577
|
+
elif self[0].argroles() == 'am':
|
|
578
|
+
return hedge((self[0], self[1], new_main))
|
|
579
|
+
return None
|
|
580
|
+
|
|
581
|
+
def check_correctness(self) -> dict[Hyperedge, list[tuple[str, str]]]:
|
|
582
|
+
output: dict[Hyperedge, list[tuple[str, str]]] = {}
|
|
583
|
+
errors: list[tuple[str, str]] = []
|
|
584
|
+
|
|
585
|
+
ct = self[0].mtype()
|
|
586
|
+
# check if connector has valid type
|
|
587
|
+
if ct not in {'P', 'M', 'B', 'T', 'J'}:
|
|
588
|
+
errors.append(('conn-bad-type', 'connector has incorrect type: {}'.format(ct)))
|
|
589
|
+
# check if modifier structure is correct
|
|
590
|
+
if ct == 'M':
|
|
591
|
+
if len(self) != 2:
|
|
592
|
+
errors.append(('mod-1-arg', 'modifiers can only have one argument'))
|
|
593
|
+
# check if builder structure is correct
|
|
594
|
+
elif ct == 'B':
|
|
595
|
+
if len(self) != 3:
|
|
596
|
+
errors.append(('build-2-args', 'builders can only have two arguments'))
|
|
597
|
+
for arg in self[1:]:
|
|
598
|
+
at = arg.mtype()
|
|
599
|
+
if at != 'C':
|
|
600
|
+
e = 'builder argument {} has incorrect type: {}'.format(arg.to_str(), at)
|
|
601
|
+
errors.append(('build-arg-bad-type', e))
|
|
602
|
+
# check if trigger structure is correct
|
|
603
|
+
elif ct == 'T':
|
|
604
|
+
if len(self) != 2:
|
|
605
|
+
errors.append(('trig-1-arg', 'triggers can only have one arguments'))
|
|
606
|
+
for arg in self[1:]:
|
|
607
|
+
at = arg.mtype()
|
|
608
|
+
if at not in {'C', 'R'}:
|
|
609
|
+
e = 'trigger argument {} has incorrect type: {}'.format(arg.to_str(), at)
|
|
610
|
+
errors.append(('trig-bad-arg-type', e))
|
|
611
|
+
# check if predicate structure is correct
|
|
612
|
+
elif ct == 'P':
|
|
613
|
+
for arg in self[1:]:
|
|
614
|
+
at = arg.mtype()
|
|
615
|
+
if at not in {'C', 'R', 'S'}:
|
|
616
|
+
e = 'predicate argument {} has incorrect type: {}'.format(arg.to_str(), at)
|
|
617
|
+
errors.append(('pred-arg-bad-type', e))
|
|
618
|
+
# check if conjunction structure is correct
|
|
619
|
+
elif ct == 'J':
|
|
620
|
+
if len(self) < 3:
|
|
621
|
+
errors.append(('conj-2-args-min', 'conjunctions must have at least two arguments'))
|
|
622
|
+
|
|
623
|
+
# check argrole counts
|
|
624
|
+
if ct in {'P', 'B'}:
|
|
625
|
+
try:
|
|
626
|
+
ars = self.argroles()
|
|
627
|
+
if len(ars) > 0:
|
|
628
|
+
if ct == 'P':
|
|
629
|
+
for ar in ars:
|
|
630
|
+
if ar not in valid_p_argroles:
|
|
631
|
+
errors.append(('pred-bad-arg-role', f'{ar} is not a valid argument role for connector of type P'))
|
|
632
|
+
elif ct == 'B':
|
|
633
|
+
for ar in ars:
|
|
634
|
+
if ar not in valid_b_argroles:
|
|
635
|
+
errors.append(('build-bad-arg-role', f'{ar} is not a valid argument role for connector of type B'))
|
|
636
|
+
|
|
637
|
+
if len(ars) != len(self) - 1:
|
|
638
|
+
errors.append(('bad-num-argroles', 'number of argroles must match number of arguments'))
|
|
639
|
+
|
|
640
|
+
ars_counts = Counter(ars)
|
|
641
|
+
if ars_counts['s'] > 1:
|
|
642
|
+
errors.append(('argrole-s-1-max', 'argrole s can only be used once'))
|
|
643
|
+
if ars_counts['o'] > 1:
|
|
644
|
+
errors.append(('argrole-o-1-max', 'argrole o can only be used once'))
|
|
645
|
+
if ars_counts['c'] > 1:
|
|
646
|
+
errors.append(('argrole-c-1-max', 'argrole c can only be used once'))
|
|
647
|
+
if ars_counts['i'] > 1:
|
|
648
|
+
errors.append(('argrole-i-1-max', 'argrole i can only be used once'))
|
|
649
|
+
if ars_counts['p'] > 1:
|
|
650
|
+
errors.append(('argrole-p-1-max', 'argrole p can only be used once'))
|
|
651
|
+
if ars_counts['a'] > 1:
|
|
652
|
+
errors.append(('argrole-a-1-max', 'argrole a can only be used once'))
|
|
653
|
+
else:
|
|
654
|
+
errors.append(('no-argroles', 'Connectors of type P or B must have argument roles'))
|
|
655
|
+
except RuntimeError:
|
|
656
|
+
# malformed edges are detected elsewhere
|
|
657
|
+
pass
|
|
658
|
+
|
|
659
|
+
if len(errors) > 0:
|
|
660
|
+
output[self] = errors
|
|
661
|
+
|
|
662
|
+
for subedge in self:
|
|
663
|
+
output.update(subedge.check_correctness())
|
|
664
|
+
|
|
665
|
+
return output
|
|
666
|
+
|
|
667
|
+
def normalized(self) -> Hyperedge | None:
|
|
668
|
+
edge: Hyperedge = self
|
|
669
|
+
conn = edge[0]
|
|
670
|
+
ar = conn.argroles()
|
|
671
|
+
if ar != '':
|
|
672
|
+
if ar[0] == '{':
|
|
673
|
+
ar = ar[1:-1]
|
|
674
|
+
roles_edges_sorted = sorted(zip(ar, edge[1:]), key=lambda role_edge: argrole_order[role_edge[0]])
|
|
675
|
+
new_edge = hedge([conn] + list(role_edge[1] for role_edge in roles_edges_sorted))
|
|
676
|
+
if not new_edge:
|
|
677
|
+
return None
|
|
678
|
+
edge = new_edge
|
|
679
|
+
return hedge([subedge.normalized() for subedge in edge])
|
|
680
|
+
|
|
681
|
+
def __add__(self, other: Hyperedge | tuple[Any, ...] | list[Any]) -> Hyperedge:
|
|
682
|
+
if isinstance(other, (list, tuple)) and not isinstance(other, Hyperedge):
|
|
683
|
+
return Hyperedge(tuple.__add__(self, tuple(other)))
|
|
684
|
+
elif isinstance(other, Hyperedge) and other.atom:
|
|
685
|
+
return Hyperedge(tuple.__add__(self, (other,)))
|
|
686
|
+
else:
|
|
687
|
+
return Hyperedge(tuple.__add__(self, tuple(other)))
|
|
688
|
+
|
|
689
|
+
def __str__(self) -> str:
|
|
690
|
+
return self.to_str()
|
|
691
|
+
|
|
692
|
+
def __repr__(self) -> str:
|
|
693
|
+
return self.to_str()
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
# Store parens attribute in a dict by id since we can't add attributes to tuple subclasses
|
|
697
|
+
_atom_parens: dict[int, bool] = {}
|
|
698
|
+
|
|
699
|
+
|
|
700
|
+
class Atom(Hyperedge):
|
|
701
|
+
"""Atomic hyperedge."""
|
|
702
|
+
def __new__(cls, edge: tuple[str, ...] | Atom, parens: bool = False) -> Atom:
|
|
703
|
+
atom = super(Hyperedge, cls).__new__(cls, tuple(edge))
|
|
704
|
+
_atom_parens[id(atom)] = parens
|
|
705
|
+
return atom
|
|
706
|
+
|
|
707
|
+
@property
|
|
708
|
+
def parens(self) -> bool:
|
|
709
|
+
"""Whether this atom has parentheses."""
|
|
710
|
+
return _atom_parens.get(id(self), False)
|
|
711
|
+
|
|
712
|
+
@property
|
|
713
|
+
def atom(self) -> bool:
|
|
714
|
+
"""True if edge is an atom."""
|
|
715
|
+
return True
|
|
716
|
+
|
|
717
|
+
@property
|
|
718
|
+
def not_atom(self) -> bool:
|
|
719
|
+
"""True if edge is not an atom."""
|
|
720
|
+
return False
|
|
721
|
+
|
|
722
|
+
def is_atom(self) -> bool:
|
|
723
|
+
"""
|
|
724
|
+
.. deprecated:: 0.6.0
|
|
725
|
+
Please use the properties .atom and .not_atom instead.
|
|
726
|
+
|
|
727
|
+
Checks if edge is an atom.
|
|
728
|
+
"""
|
|
729
|
+
return True
|
|
730
|
+
|
|
731
|
+
def parts(self) -> list[str]:
|
|
732
|
+
"""Splits atom into its parts."""
|
|
733
|
+
return self[0].split('/') # type: ignore[no-any-return]
|
|
734
|
+
|
|
735
|
+
def root(self) -> str:
|
|
736
|
+
"""Extracts the root of an atom
|
|
737
|
+
(e.g. the root of hyperbase/C/1 is hyperbase)."""
|
|
738
|
+
return self.parts()[0]
|
|
739
|
+
|
|
740
|
+
def replace_atom_part(self, part_pos: int, part: str) -> Atom:
|
|
741
|
+
"""Build a new atom by replacing an atom part in a given atom."""
|
|
742
|
+
parts = self.parts()
|
|
743
|
+
parts[part_pos] = part
|
|
744
|
+
atom = '/'.join([part for part in parts if part])
|
|
745
|
+
return Atom((atom,))
|
|
746
|
+
|
|
747
|
+
def to_str(self, roots_only: bool = False) -> str:
|
|
748
|
+
"""Converts atom to its string representation.
|
|
749
|
+
|
|
750
|
+
Keyword argument:
|
|
751
|
+
roots_only -- only the roots of the atoms will be used to create
|
|
752
|
+
the string representation.
|
|
753
|
+
"""
|
|
754
|
+
if roots_only:
|
|
755
|
+
atom_str = self.root()
|
|
756
|
+
else:
|
|
757
|
+
atom_str = str(self[0])
|
|
758
|
+
if self.parens:
|
|
759
|
+
return '({})'.format(atom_str)
|
|
760
|
+
else:
|
|
761
|
+
return atom_str
|
|
762
|
+
|
|
763
|
+
def label(self) -> str:
|
|
764
|
+
"""Generate human-readable label from entity."""
|
|
765
|
+
label = self.root()
|
|
766
|
+
|
|
767
|
+
label = label.replace('%25', '%')
|
|
768
|
+
label = label.replace('%2f', '/')
|
|
769
|
+
label = label.replace('%20', ' ')
|
|
770
|
+
label = label.replace('%28', '(')
|
|
771
|
+
label = label.replace('%29', ')')
|
|
772
|
+
label = label.replace('%2e', '.')
|
|
773
|
+
label = label.replace('%2a', '*')
|
|
774
|
+
label = label.replace('%26', '&')
|
|
775
|
+
label = label.replace('%40', '@')
|
|
776
|
+
|
|
777
|
+
return label
|
|
778
|
+
|
|
779
|
+
def inner_atom(self) -> Atom:
|
|
780
|
+
"""The inner atom inside of a modifier structure.
|
|
781
|
+
|
|
782
|
+
For example, condider:
|
|
783
|
+
(red/M shoes/C)
|
|
784
|
+
The inner atom is:
|
|
785
|
+
shoes/C
|
|
786
|
+
Or, the more complex case:
|
|
787
|
+
((and/J slow/M steady/M) go/P)
|
|
788
|
+
Yields:
|
|
789
|
+
gp/P
|
|
790
|
+
|
|
791
|
+
This method should not be used on structures that contain more than
|
|
792
|
+
one inner atom, for example concepts constructed with builders or
|
|
793
|
+
relations.
|
|
794
|
+
|
|
795
|
+
The inner atom of an atom is itself.
|
|
796
|
+
"""
|
|
797
|
+
return self
|
|
798
|
+
|
|
799
|
+
def connector_atom(self) -> Atom | None:
|
|
800
|
+
"""The inner atom of the connector.
|
|
801
|
+
|
|
802
|
+
For example, condider:
|
|
803
|
+
(does/M (not/M like/P.so) john/C chess/C)
|
|
804
|
+
The connector atom is:
|
|
805
|
+
like/P.so
|
|
806
|
+
|
|
807
|
+
The connector atom of an atom is None.
|
|
808
|
+
"""
|
|
809
|
+
return None
|
|
810
|
+
|
|
811
|
+
def atoms(self) -> set[Atom]:
|
|
812
|
+
"""Returns the set of atoms contained in the edge.
|
|
813
|
+
|
|
814
|
+
For example, consider the edge:
|
|
815
|
+
(the/Md (of/Br mayor/Cc (the/Md city/Cs)))
|
|
816
|
+
in this case, edge.atoms() returns:
|
|
817
|
+
[the/Md, of/Br, mayor/Cc, city/Cs]
|
|
818
|
+
"""
|
|
819
|
+
return {self}
|
|
820
|
+
|
|
821
|
+
def all_atoms(self) -> list[Atom]:
|
|
822
|
+
"""Returns a list of all the atoms contained in the edge. Unlike
|
|
823
|
+
atoms(), which does not return repeated atoms, all_atoms() does
|
|
824
|
+
return repeated atoms if they are different objects.
|
|
825
|
+
|
|
826
|
+
For example, consider the edge:
|
|
827
|
+
(the/Md (of/Br mayor/Cc (the/Md city/Cs)))
|
|
828
|
+
in this case, edge.all_atoms() returns:
|
|
829
|
+
[the/Md, of/Br, mayor/Cc, the/Md, city/Cs]
|
|
830
|
+
"""
|
|
831
|
+
return [self]
|
|
832
|
+
|
|
833
|
+
def size(self) -> int:
|
|
834
|
+
"""The size of an edge is its total number of atoms, at all depths."""
|
|
835
|
+
return 1
|
|
836
|
+
|
|
837
|
+
def depth(self) -> int:
|
|
838
|
+
"""Returns maximal depth of edge, an atom has depth 0."""
|
|
839
|
+
return 0
|
|
840
|
+
|
|
841
|
+
def roots(self) -> Atom:
|
|
842
|
+
"""Returns edge with root-only atoms."""
|
|
843
|
+
return Atom((self.root(),))
|
|
844
|
+
|
|
845
|
+
def contains(self, needle: str, deep: bool = False) -> bool:
|
|
846
|
+
"""Checks if 'needle' is contained in edge.
|
|
847
|
+
|
|
848
|
+
Keyword argument:
|
|
849
|
+
deep -- search recursively (default: False)"""
|
|
850
|
+
return self[0] == needle # type: ignore[no-any-return]
|
|
851
|
+
|
|
852
|
+
def subedges(self) -> set[Hyperedge]:
|
|
853
|
+
"""Returns all the subedges contained in the edge, including atoms
|
|
854
|
+
and itself.
|
|
855
|
+
"""
|
|
856
|
+
return {self}
|
|
857
|
+
|
|
858
|
+
def insert_first_argument(self, argument: Hyperedge) -> Hyperedge:
|
|
859
|
+
"""Returns an edge built by placing 'argument' as the first item
|
|
860
|
+
after the connector of this edge. If this edge is an atom, then
|
|
861
|
+
it becomes the connector of the returned edge.
|
|
862
|
+
|
|
863
|
+
For example, considering the 'edge' (a) and the 'argument' (b), this
|
|
864
|
+
function returns:
|
|
865
|
+
(a b)
|
|
866
|
+
|
|
867
|
+
Considering the 'edge' (a b c) and the 'argument' (d e), it
|
|
868
|
+
returns:
|
|
869
|
+
(a (d e) b c)
|
|
870
|
+
"""
|
|
871
|
+
return Hyperedge((self, argument))
|
|
872
|
+
|
|
873
|
+
def replace_atom(self, old: Atom, new: Hyperedge, unique: bool = False) -> Hyperedge:
|
|
874
|
+
"""Returns edge built by replacing every instance of 'old' in
|
|
875
|
+
this edge with 'new'.
|
|
876
|
+
|
|
877
|
+
Keyword argument:
|
|
878
|
+
unique -- match only the exact same instance of the atom, i.e.
|
|
879
|
+
UniqueAtom(self) == UniqueAtom(old) (default: False)
|
|
880
|
+
"""
|
|
881
|
+
if unique:
|
|
882
|
+
if UniqueAtom(self) == UniqueAtom(old):
|
|
883
|
+
return new
|
|
884
|
+
else:
|
|
885
|
+
if self == old:
|
|
886
|
+
return new
|
|
887
|
+
return self
|
|
888
|
+
|
|
889
|
+
def role(self) -> list[str]:
|
|
890
|
+
"""Returns the role of this atom as a list of the subrole strings.
|
|
891
|
+
|
|
892
|
+
The role of an atom is its second part, right after the root.
|
|
893
|
+
A dot notation is used to separate the subroles. For example,
|
|
894
|
+
the role of hyperbase/Cp.s/1 is:
|
|
895
|
+
|
|
896
|
+
Cp.s
|
|
897
|
+
|
|
898
|
+
For this case, this function returns:
|
|
899
|
+
|
|
900
|
+
['Cp', 's']
|
|
901
|
+
|
|
902
|
+
If the atom only has a root, it is assumed to be a conjunction.
|
|
903
|
+
In this case, this function returns the role with just the
|
|
904
|
+
generic conjunction type:
|
|
905
|
+
|
|
906
|
+
['J'].
|
|
907
|
+
"""
|
|
908
|
+
parts: list[str] = self[0].split('/')
|
|
909
|
+
if len(parts) < 2:
|
|
910
|
+
return list('J')
|
|
911
|
+
else:
|
|
912
|
+
return parts[1].split('.')
|
|
913
|
+
|
|
914
|
+
def simplify(self, subtypes: bool = False, argroles: bool = False, namespaces: bool = True) -> Atom:
|
|
915
|
+
"""Returns a simplified version of the atom, for example removing
|
|
916
|
+
subtypes, subroles or namespaces.
|
|
917
|
+
|
|
918
|
+
Keyword arguments:
|
|
919
|
+
subtypes -- include subtype (default: False).
|
|
920
|
+
argroles --include argroles (default: False).
|
|
921
|
+
namespaces -- include namespaces (default: True).
|
|
922
|
+
"""
|
|
923
|
+
parts = self.parts()
|
|
924
|
+
|
|
925
|
+
if len(parts) < 2:
|
|
926
|
+
return self
|
|
927
|
+
|
|
928
|
+
if subtypes:
|
|
929
|
+
role = self.type()
|
|
930
|
+
else:
|
|
931
|
+
role = self.mtype()
|
|
932
|
+
|
|
933
|
+
if argroles:
|
|
934
|
+
ar = self.argroles()
|
|
935
|
+
if len(ar) > 0:
|
|
936
|
+
role = '{}.{}'.format(role, ar)
|
|
937
|
+
|
|
938
|
+
parts[1] = role
|
|
939
|
+
|
|
940
|
+
if len(parts) > 2 and not namespaces:
|
|
941
|
+
parts = parts[:2]
|
|
942
|
+
|
|
943
|
+
atom_str = '/'.join(parts)
|
|
944
|
+
return Atom((atom_str,))
|
|
945
|
+
|
|
946
|
+
def type(self) -> str:
|
|
947
|
+
"""Returns the type of the atom.
|
|
948
|
+
|
|
949
|
+
The type of an atom is its first subrole. For example, the
|
|
950
|
+
type of hyperbase/Cp.s/1 is 'Cp'.
|
|
951
|
+
|
|
952
|
+
If the atom only has a root, it is assumed to be a conjunction.
|
|
953
|
+
In this case, this function returns the generic conjunction type: 'J'.
|
|
954
|
+
"""
|
|
955
|
+
return self.role()[0]
|
|
956
|
+
|
|
957
|
+
def connector_type(self) -> str | None:
|
|
958
|
+
"""Returns the type of the edge's connector.
|
|
959
|
+
If the edge has no connector (i.e. it's an atom), then None is
|
|
960
|
+
returned.
|
|
961
|
+
"""
|
|
962
|
+
return None
|
|
963
|
+
|
|
964
|
+
def atom_with_type(self, atom_type: str) -> Atom | None:
|
|
965
|
+
"""Returns the first atom found in the edge that has the given
|
|
966
|
+
'atom_type', or whose type starts with 'atom_type'.
|
|
967
|
+
If no such atom is found, returns None.
|
|
968
|
+
|
|
969
|
+
For example, given the edge (+/B a/Cn b/Bp) and the 'atom_type'
|
|
970
|
+
C, this function returns:
|
|
971
|
+
a/Cn
|
|
972
|
+
If the 'atom_type' is 'Cp', the it will return:
|
|
973
|
+
b/Cp
|
|
974
|
+
"""
|
|
975
|
+
n = len(atom_type)
|
|
976
|
+
et = self.type()
|
|
977
|
+
if len(et) >= n and et[:n] == atom_type:
|
|
978
|
+
return self
|
|
979
|
+
else:
|
|
980
|
+
return None
|
|
981
|
+
|
|
982
|
+
def argroles(self) -> str:
|
|
983
|
+
"""Returns the argument roles string of the edge, if it exists.
|
|
984
|
+
Otherwise returns empty string.
|
|
985
|
+
|
|
986
|
+
Argument roles can be return for the entire edge that they apply to,
|
|
987
|
+
which can be a relation (R) or a concept (C). For example:
|
|
988
|
+
|
|
989
|
+
((not/M is/P.sc) bob/C sad/C) has argument roles "sc",
|
|
990
|
+
(of/B.ma city/C berlin/C) has argument roles "ma".
|
|
991
|
+
|
|
992
|
+
Argument roles can also be returned for the connectors that define
|
|
993
|
+
the outer edge, which can be of type predicate (P) or builder (B). For
|
|
994
|
+
example:
|
|
995
|
+
|
|
996
|
+
(not/M is/P.sc) has argument roles "sc",
|
|
997
|
+
of/B.ma has argument roles "ma".
|
|
998
|
+
"""
|
|
999
|
+
et = self.mtype()
|
|
1000
|
+
if et not in {'B', 'P'}:
|
|
1001
|
+
return ''
|
|
1002
|
+
role = self.role()
|
|
1003
|
+
if len(role) < 2:
|
|
1004
|
+
return ''
|
|
1005
|
+
return role[1]
|
|
1006
|
+
|
|
1007
|
+
def replace_argroles(self, argroles: str | None) -> Atom:
|
|
1008
|
+
"""Returns an atom with the argroles replaced with the provided string."""
|
|
1009
|
+
if argroles is None or argroles == '':
|
|
1010
|
+
return self.remove_argroles()
|
|
1011
|
+
parts = self[0].split('/')
|
|
1012
|
+
if len(parts) < 2:
|
|
1013
|
+
return self
|
|
1014
|
+
role = parts[1].split('.')
|
|
1015
|
+
if len(role) < 2:
|
|
1016
|
+
role.append(argroles)
|
|
1017
|
+
else:
|
|
1018
|
+
role[1] = argroles
|
|
1019
|
+
parts = [parts[0], '.'.join(role)] + parts[2:]
|
|
1020
|
+
return Atom(('/'.join(parts),))
|
|
1021
|
+
|
|
1022
|
+
def remove_argroles(self) -> Atom:
|
|
1023
|
+
"""Returns an atom with the argroles removed."""
|
|
1024
|
+
parts = self[0].split('/')
|
|
1025
|
+
if len(parts) < 2:
|
|
1026
|
+
return self
|
|
1027
|
+
role = parts[1].split('.')
|
|
1028
|
+
parts[1] = role[0]
|
|
1029
|
+
return Atom(('/'.join(parts),))
|
|
1030
|
+
|
|
1031
|
+
def insert_argrole(self, argrole: str, pos: int) -> Atom:
|
|
1032
|
+
"""Returns an atom with the given argrole inserted at the specified
|
|
1033
|
+
position. Same restrictions as in replace_argroles() apply."""
|
|
1034
|
+
argroles = self.argroles()
|
|
1035
|
+
argroles = argroles[:pos] + argrole + argroles[pos:]
|
|
1036
|
+
return self.replace_argroles(argroles)
|
|
1037
|
+
|
|
1038
|
+
def edges_with_argrole(self, argrole: str) -> list[Hyperedge]:
|
|
1039
|
+
"""Returns the list of edges with the given argument role"""
|
|
1040
|
+
return []
|
|
1041
|
+
|
|
1042
|
+
def main_concepts(self) -> list[Hyperedge]:
|
|
1043
|
+
"""Returns the list of main concepts in an concept edge.
|
|
1044
|
+
A main concept is a central concept in a built concept, e.g.:
|
|
1045
|
+
in ('s/Bp.am zimbabwe/Mp economy/Cn.s), economy/Cn.s is the main
|
|
1046
|
+
concept.
|
|
1047
|
+
|
|
1048
|
+
If entity is not an edge, or its connector is not of type builder,
|
|
1049
|
+
or the builder does not contain concept role annotations, or no
|
|
1050
|
+
concept is annotated as the main one, then an empty list is
|
|
1051
|
+
returned.
|
|
1052
|
+
"""
|
|
1053
|
+
return []
|
|
1054
|
+
|
|
1055
|
+
def replace_main_concept(self, new_main: Hyperedge) -> Hyperedge | None:
|
|
1056
|
+
"""TODO: document and test"""
|
|
1057
|
+
if self.mtype() != 'C':
|
|
1058
|
+
return None
|
|
1059
|
+
|
|
1060
|
+
return new_main
|
|
1061
|
+
|
|
1062
|
+
def check_correctness(self) -> dict[Hyperedge, list[tuple[str, str]]]:
|
|
1063
|
+
output: dict[Hyperedge, list[tuple[str, str]]] = {}
|
|
1064
|
+
errors: list[tuple[str, str]] = []
|
|
1065
|
+
|
|
1066
|
+
at = self.mtype()
|
|
1067
|
+
if at not in {'C', 'P', 'M', 'B', 'T', 'J'}:
|
|
1068
|
+
errors.append(('bad-atom-type', '{} is not a valid atom type'.format(at)))
|
|
1069
|
+
|
|
1070
|
+
if len(errors) > 0:
|
|
1071
|
+
output[self] = errors
|
|
1072
|
+
|
|
1073
|
+
return output
|
|
1074
|
+
|
|
1075
|
+
def normalized(self) -> Atom:
|
|
1076
|
+
if self.mtype() in {'B', 'P'}:
|
|
1077
|
+
ar = self.argroles()
|
|
1078
|
+
if len(ar) > 0:
|
|
1079
|
+
if ar[0] == '{':
|
|
1080
|
+
ar = ar[1:-1]
|
|
1081
|
+
unordered = True
|
|
1082
|
+
else:
|
|
1083
|
+
unordered = False
|
|
1084
|
+
ar = ''.join(sorted(ar, key=lambda argrole: argrole_order[argrole]))
|
|
1085
|
+
if unordered:
|
|
1086
|
+
ar = '{{{}}}'.format(ar)
|
|
1087
|
+
return self.replace_argroles(ar)
|
|
1088
|
+
return self
|
|
1089
|
+
|
|
1090
|
+
def __add__(self, other: Hyperedge | tuple[Any, ...] | list[Any]) -> Hyperedge:
|
|
1091
|
+
if isinstance(other, (list, tuple)) and not isinstance(other, Hyperedge):
|
|
1092
|
+
return Hyperedge(tuple.__add__((self,), tuple(other)))
|
|
1093
|
+
elif isinstance(other, Hyperedge) and other.atom:
|
|
1094
|
+
return Hyperedge((self, other))
|
|
1095
|
+
else:
|
|
1096
|
+
return Hyperedge(tuple.__add__((self,), tuple(other)))
|
|
1097
|
+
|
|
1098
|
+
|
|
1099
|
+
class UniqueAtom(Atom):
|
|
1100
|
+
def __init__(self, atom: Atom) -> None:
|
|
1101
|
+
self.atom_obj = atom
|
|
1102
|
+
|
|
1103
|
+
def __hash__(self) -> int:
|
|
1104
|
+
return id(self.atom_obj)
|
|
1105
|
+
|
|
1106
|
+
def __eq__(self, other: object) -> bool:
|
|
1107
|
+
return isinstance(other, UniqueAtom) and id(self.atom_obj) == id(other.atom_obj)
|
|
1108
|
+
|
|
1109
|
+
|
|
1110
|
+
def unique(edge: Hyperedge) -> Hyperedge | None:
|
|
1111
|
+
if edge.atom:
|
|
1112
|
+
if type(edge) == UniqueAtom:
|
|
1113
|
+
return edge
|
|
1114
|
+
else:
|
|
1115
|
+
return UniqueAtom(edge) # type: ignore[arg-type]
|
|
1116
|
+
else:
|
|
1117
|
+
return hedge([unique(subedge) for subedge in edge])
|
|
1118
|
+
|
|
1119
|
+
|
|
1120
|
+
def non_unique(edge: Hyperedge) -> Hyperedge | None:
|
|
1121
|
+
if edge.atom:
|
|
1122
|
+
if type(edge) == UniqueAtom:
|
|
1123
|
+
return edge.atom_obj
|
|
1124
|
+
else:
|
|
1125
|
+
return edge
|
|
1126
|
+
else:
|
|
1127
|
+
return hedge([non_unique(subedge) for subedge in edge])
|