hyperbase 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hyperbase/hyperedge.py ADDED
@@ -0,0 +1,1127 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import Counter
4
+ from collections.abc import Iterable
5
+ from typing import Any
6
+
7
+
8
+ argrole_order: dict[str, int] = {
9
+ 'm': -1,
10
+ 's': 0,
11
+ 'p': 1,
12
+ 'a': 2,
13
+ 'c': 3,
14
+ 'o': 4,
15
+ 'i': 5,
16
+ 't': 6,
17
+ 'j': 7,
18
+ 'x': 8,
19
+ 'r': 9,
20
+ '?': 10
21
+ }
22
+
23
+
24
+ valid_p_argroles: set[str] = {'s', 'p', 'a', 'c', 'o', 'i', 't', 'j', 'x', 'r', '?'}
25
+ valid_b_argroles: set[str] = {'m', 'a'}
26
+
27
+
28
+ def str2atom(s: str) -> str:
29
+ """Converts a string into a valid atom."""
30
+ atom = s.lower()
31
+
32
+ atom = atom.replace('%', '%25')
33
+ atom = atom.replace('/', '%2f')
34
+ atom = atom.replace(' ', '%20')
35
+ atom = atom.replace('(', '%28')
36
+ atom = atom.replace(')', '%29')
37
+ atom = atom.replace('.', '%2e')
38
+ atom = atom.replace('*', '%2a')
39
+ atom = atom.replace('&', '%26')
40
+ atom = atom.replace('@', '%40')
41
+ atom = atom.replace('\n', '%0a')
42
+ atom = atom.replace('\r', '%0d')
43
+
44
+ return atom
45
+
46
+
47
+ def _edge_str_has_outer_parens(edge_str: str) -> bool:
48
+ """Check if string representation of edge is delimited by outer
49
+ parenthesis.
50
+ """
51
+ if len(edge_str) < 2:
52
+ return False
53
+ return edge_str[0] == '('
54
+
55
+
56
+ def split_edge_str(edge_str: str) -> tuple[str, ...] | None:
57
+ """Shallow split into tokens of a string representation of an edge,
58
+ without outer parenthesis.
59
+ """
60
+ start = 0
61
+ depth = 0
62
+ str_length = len(edge_str)
63
+ active = 0
64
+ tokens: list[str] = []
65
+ for i in range(str_length):
66
+ c = edge_str[i]
67
+ if c == ' ':
68
+ if active and depth == 0:
69
+ tokens.append(edge_str[start:i])
70
+ active = 0
71
+ elif c == '(':
72
+ if depth == 0:
73
+ active = 1
74
+ start = i
75
+ depth += 1
76
+ elif c == ')':
77
+ depth -= 1
78
+ if depth == 0:
79
+ tokens.append(edge_str[start:i + 1])
80
+ active = 0
81
+ elif depth < 0:
82
+ # TODO: throw exception?
83
+ return None
84
+ else:
85
+ if not active:
86
+ active = 1
87
+ start = i
88
+
89
+ if active:
90
+ if depth > 0:
91
+ # TODO: throw exception?
92
+ return None
93
+ else:
94
+ tokens.append(edge_str[start:])
95
+
96
+ return tuple(tokens)
97
+
98
+
99
+ def _parsed_token(token: str) -> Hyperedge | None:
100
+ if _edge_str_has_outer_parens(token):
101
+ return hedge(token)
102
+ else:
103
+ return Atom((token,))
104
+
105
+
106
+ def hedge(source: str | Hyperedge | list[Any] | tuple[Any, ...]) -> Hyperedge | None:
107
+ """Create a hyperedge."""
108
+ if type(source) in {tuple, list}:
109
+ return Hyperedge(tuple(hedge(item) for item in source))
110
+ elif type(source) is str:
111
+ edge_str = source.strip().replace('\n', ' ')
112
+ edge_inner_str = edge_str
113
+
114
+ parens = _edge_str_has_outer_parens(edge_str)
115
+ if parens:
116
+ edge_inner_str = edge_str[1:-1]
117
+
118
+ tokens = split_edge_str(edge_inner_str)
119
+ if not tokens:
120
+ return None
121
+ edges = tuple(_parsed_token(token) for token in tokens)
122
+ if len(edges) > 1 or (len(edges) > 0 and type(edges[0]) == Hyperedge):
123
+ return Hyperedge(edges)
124
+ elif len(edges) > 0 and isinstance(edges[0], Atom):
125
+ return Atom(edges[0], parens)
126
+ else:
127
+ return None
128
+ elif type(source) in {Hyperedge, Atom, UniqueAtom}:
129
+ return source # type: ignore
130
+ else:
131
+ return None
132
+
133
+
134
+ def build_atom(text: str, *parts: str) -> Atom:
135
+ """Build an atom from text and other parts."""
136
+ atom = str2atom(text)
137
+ parts_str = '/'.join([part for part in parts if part])
138
+ if len(parts_str) > 0:
139
+ atom = ''.join((atom, '/', parts_str))
140
+ return Atom((atom,))
141
+
142
+
143
+ class Hyperedge(tuple): # type: ignore[type-arg]
144
+ """Non-atomic hyperedge."""
145
+ def __new__(cls, edges: Iterable[Hyperedge | None]) -> Hyperedge:
146
+ return super(Hyperedge, cls).__new__(cls, tuple(edges))
147
+
148
+ @property
149
+ def atom(self) -> bool:
150
+ """True if edge is an atom."""
151
+ return False
152
+
153
+ @property
154
+ def not_atom(self) -> bool:
155
+ """True if edge is not an atom."""
156
+ return True
157
+
158
+ @property
159
+ def t(self) -> str:
160
+ """ Edge type.
161
+ (this porperty is a shortcut for Hyperedge.type())
162
+ """
163
+ return self.type()
164
+
165
+ @property
166
+ def mt(self) -> str:
167
+ """ Edge main type.
168
+ (this porperty is a shortcut for Hyperedge.mtype())
169
+ """
170
+ return self.mtype()
171
+
172
+ @property
173
+ def ct(self) -> str | None:
174
+ """ Edge connector type.
175
+ (this porperty is a shortcut for Hyperedge.connector_type())
176
+ """
177
+ return self.connector_type()
178
+
179
+ @property
180
+ def cmt(self) -> str | None:
181
+ """ Edge connector main type.
182
+ (this porperty is a shortcut for Hyperedge.mconnector_type())
183
+ """
184
+ return self.connector_mtype()
185
+
186
+ def is_atom(self) -> bool:
187
+ """
188
+ .. deprecated:: 0.6.0
189
+ Please use the properties .atom and .not_atom instead.
190
+
191
+ Checks if edge is an atom.
192
+ """
193
+ return False
194
+
195
+ def to_str(self, roots_only: bool = False) -> str:
196
+ """Converts edge to its string representation.
197
+
198
+ Keyword argument:
199
+ roots_only -- only the roots of the atoms will be used to create
200
+ the string representation.
201
+ """
202
+ s = ' '.join([edge.to_str(roots_only=roots_only) for edge in self if edge])
203
+ return ''.join(('(', s, ')'))
204
+
205
+ def label(self) -> str:
206
+ """Generate human-readable label for edge."""
207
+ conn_atom = self.connector_atom()
208
+ if len(self) == 2:
209
+ edge: tuple[Any, ...] = self
210
+ elif conn_atom is not None and conn_atom.parts()[-1] == '.':
211
+ edge = self[1:]
212
+ else:
213
+ edge = (self[1], self[0]) + self[2:]
214
+ return ' '.join([item.label() for item in edge])
215
+
216
+ def inner_atom(self) -> Atom:
217
+ """The inner atom inside of a modifier structure.
218
+
219
+ For example, condider:
220
+ (red/M shoes/C)
221
+ The inner atom is:
222
+ shoes/C
223
+ Or, the more complex case:
224
+ ((and/J slow/M steady/M) go/P)
225
+ Yields:
226
+ gp/P
227
+
228
+ This method should not be used on structures that contain more than
229
+ one inner atom, for example concepts constructed with builders or
230
+ relations.
231
+
232
+ The inner atom of an atom is itself.
233
+ """
234
+ return self[1].inner_atom() # type: ignore[no-any-return]
235
+
236
+ def connector_atom(self) -> Atom | None:
237
+ """The inner atom of the connector.
238
+
239
+ For example, condider:
240
+ (does/M (not/M like/P.so) john/C chess/C)
241
+ The connector atom is:
242
+ like/P.so
243
+
244
+ The connector atom of an atom is None.
245
+ """
246
+ return self[0].inner_atom() # type: ignore[no-any-return]
247
+
248
+ def atoms(self) -> set[Atom]:
249
+ """Returns the set of atoms contained in the edge.
250
+
251
+ For example, consider the edge:
252
+ (the/md (of/br mayor/cc (the/md city/cs)))
253
+ in this case, edge.atoms() returns:
254
+ [the/md, of/br, mayor/cc, city/cs]
255
+ """
256
+ atom_set: set[Atom] = set()
257
+ for item in self:
258
+ for atom in item.atoms():
259
+ atom_set.add(atom)
260
+ return atom_set
261
+
262
+ def all_atoms(self) -> list[Atom]:
263
+ """Returns a list of all the atoms contained in the edge. Unlike
264
+ atoms(), which does not return repeated atoms, all_atoms() does
265
+ return repeated atoms if they are different objects.
266
+
267
+ For example, consider the edge:
268
+ (the/md (of/br mayor/cc (the/md city/cs)))
269
+ in this case, edge.all_atoms() returns:
270
+ [the/md, of/br, mayor/cc, the/md, city/cs]
271
+ """
272
+ atoms: list[Atom] = []
273
+ for item in self:
274
+ atoms += item.all_atoms()
275
+ return atoms
276
+
277
+ def size(self) -> int:
278
+ """The size of an edge is its total number of atoms, at all depths."""
279
+ return sum([edge.size() for edge in self])
280
+
281
+ def depth(self) -> int:
282
+ """Returns maximal depth of edge, an atom has depth 0."""
283
+ max_d = 0
284
+ for item in self:
285
+ d = item.depth()
286
+ if d > max_d:
287
+ max_d = d
288
+ return max_d + 1
289
+
290
+ def roots(self) -> Hyperedge:
291
+ """Returns edge with root-only atoms."""
292
+ return Hyperedge(tuple(item.roots() for item in self))
293
+
294
+ def contains(self, needle: str, deep: bool = False) -> bool:
295
+ """Checks if 'needle' is contained in edge.
296
+
297
+ Keyword argument:
298
+ deep -- search recursively (default False)"""
299
+ for item in self:
300
+ if item == needle:
301
+ return True
302
+ if deep:
303
+ if item.contains(needle, True):
304
+ return True
305
+ return False
306
+
307
+ def subedges(self) -> set[Hyperedge]:
308
+ """Returns all the subedges contained in the edge, including atoms
309
+ and itself.
310
+ """
311
+ edges: set[Hyperedge] = {self}
312
+ for item in self:
313
+ edges = edges.union(item.subedges())
314
+ return edges
315
+
316
+ def insert_first_argument(self, argument: Hyperedge) -> Hyperedge:
317
+ """Returns an edge built by placing 'argument' as the first item
318
+ after the connector of this edge. If this edge is an atom, then
319
+ it becomes the connector of the returned edge.
320
+
321
+ For example, considering the 'edge' (a) and the 'argument' (b), this
322
+ function returns:
323
+ (a b)
324
+
325
+ Considering the 'edge' (a b c) and the 'argument' (d e), it
326
+ returns:
327
+ (a (d e) b c)
328
+ """
329
+ return Hyperedge((self[0], argument) + self[1:])
330
+
331
+ def connect(self, arguments: tuple[Hyperedge, ...] | list[Hyperedge] | None) -> Hyperedge:
332
+ """Returns an edge built by adding the items in 'arguments' to the
333
+ end of this edge. 'arguments' must be a collection.
334
+
335
+ For example, connecting the edge (a b) with the 'arguments'
336
+ (c d) produces:
337
+ (a b c d)
338
+ """
339
+ if arguments is None or len(arguments) == 0:
340
+ return self
341
+ else:
342
+ return Hyperedge(self + arguments)
343
+
344
+ def sequence(self, entity: Hyperedge, before: bool, flat: bool = True) -> Hyperedge:
345
+ """Returns an edge built by sequencing the 'entity', if it's an
346
+ atom, or the elements of 'entity' if it is an edge, either before
347
+ or after the elements of this edge.
348
+
349
+ If flat is False, then both this edge and 'entity' are treated as
350
+ self-contained edges when building the new edge.
351
+
352
+ For example, connecting the edge (a b) and the 'entity' c
353
+ produces, if before is True:
354
+ (c a b)
355
+ and if before is False:
356
+ (a b c)
357
+ Connecting the edge (a b) and the 'entity' (c d)
358
+ produces, if before is True:
359
+ (c d a b)
360
+ and if before is False:
361
+ (a b c d)
362
+ This last example, if 'flat' is False, becomes respectively:
363
+ ((c d) (a b))
364
+ ((a b) (c d))
365
+ """
366
+ if flat:
367
+ if before:
368
+ return entity + self
369
+ else:
370
+ return self + entity
371
+ else:
372
+ if before:
373
+ return Hyperedge((entity, self))
374
+ else:
375
+ return Hyperedge((self, entity))
376
+
377
+ def replace_atom(self, old: Atom, new: Hyperedge, unique: bool = False) -> Hyperedge:
378
+ """Returns edge built by replacing every instance of 'old' in
379
+ this edge with 'new'.
380
+
381
+ Keyword argument:
382
+ unique -- match only the exact same instance of the atom, i.e.
383
+ UniqueAtom(self) == UniqueAtom(old) (default: False)
384
+ """
385
+ return Hyperedge(tuple(item.replace_atom(old, new, unique=unique) for item in self))
386
+
387
+ def simplify(self, subtypes: bool = False, argroles: bool = False, namespaces: bool = True) -> Hyperedge | None:
388
+ """Returns a version of the edge with simplified atoms, for example
389
+ removing subtypes, subroles or namespaces.
390
+
391
+ Keyword arguments:
392
+ subtypes -- include subtypes (default: False).
393
+ argroles --include argroles (default: False).
394
+ namespaces -- include namespaces (default: True).
395
+ """
396
+ return hedge([subedge.simplify(subtypes=subtypes,
397
+ argroles=argroles,
398
+ namespaces=namespaces)
399
+ for subedge in self])
400
+
401
+ def type(self) -> str:
402
+ """Returns the type of this edge as a string.
403
+ Type inference is performed.
404
+ """
405
+ ptype = self[0].type()
406
+ if ptype[0] == 'P':
407
+ outter_type = 'R'
408
+ elif ptype[0] == 'M':
409
+ if len(self) < 2:
410
+ raise RuntimeError('Edge is malformed, type cannot be determined: {}'.format(str(self)))
411
+ return self[1].type() # type: ignore[no-any-return]
412
+ elif ptype[0] == 'T':
413
+ outter_type = 'S'
414
+ elif ptype[0] == 'B':
415
+ outter_type = 'C'
416
+ elif ptype[0] == 'J':
417
+ if len(self) < 2:
418
+ raise RuntimeError('Edge is malformed, type cannot be determined: {}'.format(str(self)))
419
+ return self[1].mtype() # type: ignore[no-any-return]
420
+ else:
421
+ raise RuntimeError('Edge is malformed, type cannot be determined: {}'.format(str(self)))
422
+
423
+ return '{}{}'.format(outter_type, ptype[1:])
424
+
425
+ def connector_type(self) -> str | None:
426
+ """Returns the type of the edge's connector.
427
+ If the edge has no connector (i.e. it's an atom), then None is
428
+ returned.
429
+ """
430
+ return self[0].type() # type: ignore[no-any-return]
431
+
432
+ def mtype(self) -> str:
433
+ """Returns the main type of this edge as a string of one character.
434
+ Type inference is performed.
435
+ """
436
+ return self.type()[0]
437
+
438
+ def connector_mtype(self) -> str | None:
439
+ """Returns the main type of the edge's connector.
440
+ If the edge has no connector (i.e. it's an atom), then None is
441
+ returned.
442
+ """
443
+ ct = self.connector_type()
444
+ if ct:
445
+ return ct[0]
446
+ else:
447
+ return None
448
+
449
+ def atom_with_type(self, atom_type: str) -> Atom | None:
450
+ """Returns the first atom found in the edge that has the given
451
+ 'atom_type', or whose type starts with 'atom_type'.
452
+ If no such atom is found, returns None.
453
+
454
+ For example, given the edge (+/B a/Cn b/Cp) and the 'atom_type'
455
+ c, this function returns:
456
+ a/Cn
457
+ If the 'atom_type' is 'Cp', the it will return:
458
+ b/Cp
459
+ """
460
+ for item in self:
461
+ atom: Atom | None = item.atom_with_type(atom_type)
462
+ if atom:
463
+ return atom
464
+ return None
465
+
466
+ def contains_atom_type(self, atom_type: str) -> bool:
467
+ """Checks if the edge contains any atom with the given type.
468
+ The edge is searched recursively, so the atom can appear at any depth.
469
+ """
470
+ return self.atom_with_type(atom_type) is not None
471
+
472
+ def argroles(self) -> str:
473
+ """Returns the argument roles string of the edge, if it exists.
474
+ Otherwise returns empty string.
475
+
476
+ Argument roles can be return for the entire edge that they apply to,
477
+ which can be a relation (R) or a concept (C). For example:
478
+
479
+ ((not/M is/P.sc) bob/C sad/C) has argument roles "sc",
480
+ (of/B.ma city/C berlin/C) has argument roles "ma".
481
+
482
+ Argument roles can also be returned for the connectors that define
483
+ the outer edge, which can be of type predicate (P) or builder (B). For
484
+ example:
485
+
486
+ (not/M is/P.sc) has argument roles "sc",
487
+ of/B.ma has argument roles "ma".
488
+ """
489
+ et = self.mtype()
490
+ if et in {'R', 'C'} and self[0].mtype() in {'B', 'P'}:
491
+ return self[0].argroles() # type: ignore[no-any-return]
492
+ if et not in {'B', 'P'}:
493
+ return ''
494
+ return self[1].argroles() # type: ignore[no-any-return]
495
+
496
+ def has_argroles(self) -> bool:
497
+ """Returns True if the edge has argroles, False otherwise."""
498
+ return self.argroles() != ''
499
+
500
+ def replace_argroles(self, argroles: str | None) -> Hyperedge:
501
+ """Returns an edge with the argroles of the connector atom replaced
502
+ with the provided string.
503
+ Returns same edge if the atom does not contain a role part."""
504
+ st = self.mtype()
505
+ if st in {'C', 'R'}:
506
+ new_edge = [self[0].replace_argroles(argroles)]
507
+ new_edge += self[1:]
508
+ return Hyperedge(new_edge)
509
+ elif st in {'P', 'B'}:
510
+ new_edge = [self[0], self[1].replace_argroles(argroles)]
511
+ new_edge += list(self[2:])
512
+ return Hyperedge(new_edge)
513
+ return self
514
+
515
+ def insert_argrole(self, argrole: str, pos: int) -> Hyperedge:
516
+ """Returns an edge with the given argrole inserted at the specified
517
+ position in the argroles of the connector atom.
518
+ Same restrictions as in replace_argroles() apply."""
519
+ st = self.mtype()
520
+ if st in {'C', 'R'}:
521
+ new_edge = [self[0].insert_argrole(argrole, pos)]
522
+ new_edge += self[1:]
523
+ return Hyperedge(new_edge)
524
+ elif st in {'P', 'B'}:
525
+ new_edge = [self[0], self[1].insert_argrole(argrole, pos)]
526
+ new_edge += list(self[2:])
527
+ return Hyperedge(new_edge)
528
+ return self
529
+
530
+ def insert_edge_with_argrole(self, edge: Hyperedge, argrole: str, pos: int) -> Hyperedge:
531
+ """Returns a new edge with the provided edge and its argroles inserted
532
+ at the specified position."""
533
+ new_edge = self.insert_argrole(argrole, pos)
534
+ combined = tuple(new_edge[:pos + 1]) + (edge,) + tuple(new_edge[pos + 1:])
535
+ return Hyperedge(combined)
536
+
537
+ def edges_with_argrole(self, argrole: str) -> list[Hyperedge]:
538
+ """Returns the list of edges with the given argument role."""
539
+ edges: list[Hyperedge] = []
540
+ connector = self[0]
541
+
542
+ argroles = connector.argroles()
543
+ if len(argroles) > 0 and argroles[0] == '{':
544
+ argroles = argroles[1:-1]
545
+ argroles = argroles.replace(',', '')
546
+ for pos, role in enumerate(argroles):
547
+ if role == argrole:
548
+ if pos < len(self) - 1:
549
+ edges.append(self[pos + 1])
550
+ return edges
551
+
552
+ def main_concepts(self) -> list[Hyperedge]:
553
+ """Returns the list of main concepts in an concept edge.
554
+ A main concept is a central concept in a built concept, e.g.:
555
+ in ('s/Bp.am zimbabwe/Cp economy/Cn.s), economy/Cn.s is the main
556
+ concept.
557
+
558
+ If entity is not an edge, or its connector is not of type builder,
559
+ or the builder does not contain concept role annotations, or no
560
+ concept is annotated as the main one, then an empty list is
561
+ returned.
562
+ """
563
+ if self[0].mtype() == 'B':
564
+ return self.edges_with_argrole('m')
565
+ return []
566
+
567
+ def replace_main_concept(self, new_main: Hyperedge) -> Hyperedge | None:
568
+ """TODO: document and test"""
569
+ if self.mtype() != 'C':
570
+ return None
571
+ if self[0].mtype() == 'M':
572
+ return hedge((self[0], new_main))
573
+ elif self[0].mtype() == 'B':
574
+ if len(self) == 3:
575
+ if self[0].argroles() == 'ma':
576
+ return hedge((self[0], new_main, self[2]))
577
+ elif self[0].argroles() == 'am':
578
+ return hedge((self[0], self[1], new_main))
579
+ return None
580
+
581
+ def check_correctness(self) -> dict[Hyperedge, list[tuple[str, str]]]:
582
+ output: dict[Hyperedge, list[tuple[str, str]]] = {}
583
+ errors: list[tuple[str, str]] = []
584
+
585
+ ct = self[0].mtype()
586
+ # check if connector has valid type
587
+ if ct not in {'P', 'M', 'B', 'T', 'J'}:
588
+ errors.append(('conn-bad-type', 'connector has incorrect type: {}'.format(ct)))
589
+ # check if modifier structure is correct
590
+ if ct == 'M':
591
+ if len(self) != 2:
592
+ errors.append(('mod-1-arg', 'modifiers can only have one argument'))
593
+ # check if builder structure is correct
594
+ elif ct == 'B':
595
+ if len(self) != 3:
596
+ errors.append(('build-2-args', 'builders can only have two arguments'))
597
+ for arg in self[1:]:
598
+ at = arg.mtype()
599
+ if at != 'C':
600
+ e = 'builder argument {} has incorrect type: {}'.format(arg.to_str(), at)
601
+ errors.append(('build-arg-bad-type', e))
602
+ # check if trigger structure is correct
603
+ elif ct == 'T':
604
+ if len(self) != 2:
605
+ errors.append(('trig-1-arg', 'triggers can only have one arguments'))
606
+ for arg in self[1:]:
607
+ at = arg.mtype()
608
+ if at not in {'C', 'R'}:
609
+ e = 'trigger argument {} has incorrect type: {}'.format(arg.to_str(), at)
610
+ errors.append(('trig-bad-arg-type', e))
611
+ # check if predicate structure is correct
612
+ elif ct == 'P':
613
+ for arg in self[1:]:
614
+ at = arg.mtype()
615
+ if at not in {'C', 'R', 'S'}:
616
+ e = 'predicate argument {} has incorrect type: {}'.format(arg.to_str(), at)
617
+ errors.append(('pred-arg-bad-type', e))
618
+ # check if conjunction structure is correct
619
+ elif ct == 'J':
620
+ if len(self) < 3:
621
+ errors.append(('conj-2-args-min', 'conjunctions must have at least two arguments'))
622
+
623
+ # check argrole counts
624
+ if ct in {'P', 'B'}:
625
+ try:
626
+ ars = self.argroles()
627
+ if len(ars) > 0:
628
+ if ct == 'P':
629
+ for ar in ars:
630
+ if ar not in valid_p_argroles:
631
+ errors.append(('pred-bad-arg-role', f'{ar} is not a valid argument role for connector of type P'))
632
+ elif ct == 'B':
633
+ for ar in ars:
634
+ if ar not in valid_b_argroles:
635
+ errors.append(('build-bad-arg-role', f'{ar} is not a valid argument role for connector of type B'))
636
+
637
+ if len(ars) != len(self) - 1:
638
+ errors.append(('bad-num-argroles', 'number of argroles must match number of arguments'))
639
+
640
+ ars_counts = Counter(ars)
641
+ if ars_counts['s'] > 1:
642
+ errors.append(('argrole-s-1-max', 'argrole s can only be used once'))
643
+ if ars_counts['o'] > 1:
644
+ errors.append(('argrole-o-1-max', 'argrole o can only be used once'))
645
+ if ars_counts['c'] > 1:
646
+ errors.append(('argrole-c-1-max', 'argrole c can only be used once'))
647
+ if ars_counts['i'] > 1:
648
+ errors.append(('argrole-i-1-max', 'argrole i can only be used once'))
649
+ if ars_counts['p'] > 1:
650
+ errors.append(('argrole-p-1-max', 'argrole p can only be used once'))
651
+ if ars_counts['a'] > 1:
652
+ errors.append(('argrole-a-1-max', 'argrole a can only be used once'))
653
+ else:
654
+ errors.append(('no-argroles', 'Connectors of type P or B must have argument roles'))
655
+ except RuntimeError:
656
+ # malformed edges are detected elsewhere
657
+ pass
658
+
659
+ if len(errors) > 0:
660
+ output[self] = errors
661
+
662
+ for subedge in self:
663
+ output.update(subedge.check_correctness())
664
+
665
+ return output
666
+
667
+ def normalized(self) -> Hyperedge | None:
668
+ edge: Hyperedge = self
669
+ conn = edge[0]
670
+ ar = conn.argroles()
671
+ if ar != '':
672
+ if ar[0] == '{':
673
+ ar = ar[1:-1]
674
+ roles_edges_sorted = sorted(zip(ar, edge[1:]), key=lambda role_edge: argrole_order[role_edge[0]])
675
+ new_edge = hedge([conn] + list(role_edge[1] for role_edge in roles_edges_sorted))
676
+ if not new_edge:
677
+ return None
678
+ edge = new_edge
679
+ return hedge([subedge.normalized() for subedge in edge])
680
+
681
+ def __add__(self, other: Hyperedge | tuple[Any, ...] | list[Any]) -> Hyperedge:
682
+ if isinstance(other, (list, tuple)) and not isinstance(other, Hyperedge):
683
+ return Hyperedge(tuple.__add__(self, tuple(other)))
684
+ elif isinstance(other, Hyperedge) and other.atom:
685
+ return Hyperedge(tuple.__add__(self, (other,)))
686
+ else:
687
+ return Hyperedge(tuple.__add__(self, tuple(other)))
688
+
689
+ def __str__(self) -> str:
690
+ return self.to_str()
691
+
692
+ def __repr__(self) -> str:
693
+ return self.to_str()
694
+
695
+
696
+ # Store parens attribute in a dict by id since we can't add attributes to tuple subclasses
697
+ _atom_parens: dict[int, bool] = {}
698
+
699
+
700
+ class Atom(Hyperedge):
701
+ """Atomic hyperedge."""
702
+ def __new__(cls, edge: tuple[str, ...] | Atom, parens: bool = False) -> Atom:
703
+ atom = super(Hyperedge, cls).__new__(cls, tuple(edge))
704
+ _atom_parens[id(atom)] = parens
705
+ return atom
706
+
707
+ @property
708
+ def parens(self) -> bool:
709
+ """Whether this atom has parentheses."""
710
+ return _atom_parens.get(id(self), False)
711
+
712
+ @property
713
+ def atom(self) -> bool:
714
+ """True if edge is an atom."""
715
+ return True
716
+
717
+ @property
718
+ def not_atom(self) -> bool:
719
+ """True if edge is not an atom."""
720
+ return False
721
+
722
+ def is_atom(self) -> bool:
723
+ """
724
+ .. deprecated:: 0.6.0
725
+ Please use the properties .atom and .not_atom instead.
726
+
727
+ Checks if edge is an atom.
728
+ """
729
+ return True
730
+
731
+ def parts(self) -> list[str]:
732
+ """Splits atom into its parts."""
733
+ return self[0].split('/') # type: ignore[no-any-return]
734
+
735
+ def root(self) -> str:
736
+ """Extracts the root of an atom
737
+ (e.g. the root of hyperbase/C/1 is hyperbase)."""
738
+ return self.parts()[0]
739
+
740
+ def replace_atom_part(self, part_pos: int, part: str) -> Atom:
741
+ """Build a new atom by replacing an atom part in a given atom."""
742
+ parts = self.parts()
743
+ parts[part_pos] = part
744
+ atom = '/'.join([part for part in parts if part])
745
+ return Atom((atom,))
746
+
747
+ def to_str(self, roots_only: bool = False) -> str:
748
+ """Converts atom to its string representation.
749
+
750
+ Keyword argument:
751
+ roots_only -- only the roots of the atoms will be used to create
752
+ the string representation.
753
+ """
754
+ if roots_only:
755
+ atom_str = self.root()
756
+ else:
757
+ atom_str = str(self[0])
758
+ if self.parens:
759
+ return '({})'.format(atom_str)
760
+ else:
761
+ return atom_str
762
+
763
+ def label(self) -> str:
764
+ """Generate human-readable label from entity."""
765
+ label = self.root()
766
+
767
+ label = label.replace('%25', '%')
768
+ label = label.replace('%2f', '/')
769
+ label = label.replace('%20', ' ')
770
+ label = label.replace('%28', '(')
771
+ label = label.replace('%29', ')')
772
+ label = label.replace('%2e', '.')
773
+ label = label.replace('%2a', '*')
774
+ label = label.replace('%26', '&')
775
+ label = label.replace('%40', '@')
776
+
777
+ return label
778
+
779
+ def inner_atom(self) -> Atom:
780
+ """The inner atom inside of a modifier structure.
781
+
782
+ For example, condider:
783
+ (red/M shoes/C)
784
+ The inner atom is:
785
+ shoes/C
786
+ Or, the more complex case:
787
+ ((and/J slow/M steady/M) go/P)
788
+ Yields:
789
+ gp/P
790
+
791
+ This method should not be used on structures that contain more than
792
+ one inner atom, for example concepts constructed with builders or
793
+ relations.
794
+
795
+ The inner atom of an atom is itself.
796
+ """
797
+ return self
798
+
799
+ def connector_atom(self) -> Atom | None:
800
+ """The inner atom of the connector.
801
+
802
+ For example, condider:
803
+ (does/M (not/M like/P.so) john/C chess/C)
804
+ The connector atom is:
805
+ like/P.so
806
+
807
+ The connector atom of an atom is None.
808
+ """
809
+ return None
810
+
811
+ def atoms(self) -> set[Atom]:
812
+ """Returns the set of atoms contained in the edge.
813
+
814
+ For example, consider the edge:
815
+ (the/Md (of/Br mayor/Cc (the/Md city/Cs)))
816
+ in this case, edge.atoms() returns:
817
+ [the/Md, of/Br, mayor/Cc, city/Cs]
818
+ """
819
+ return {self}
820
+
821
+ def all_atoms(self) -> list[Atom]:
822
+ """Returns a list of all the atoms contained in the edge. Unlike
823
+ atoms(), which does not return repeated atoms, all_atoms() does
824
+ return repeated atoms if they are different objects.
825
+
826
+ For example, consider the edge:
827
+ (the/Md (of/Br mayor/Cc (the/Md city/Cs)))
828
+ in this case, edge.all_atoms() returns:
829
+ [the/Md, of/Br, mayor/Cc, the/Md, city/Cs]
830
+ """
831
+ return [self]
832
+
833
+ def size(self) -> int:
834
+ """The size of an edge is its total number of atoms, at all depths."""
835
+ return 1
836
+
837
+ def depth(self) -> int:
838
+ """Returns maximal depth of edge, an atom has depth 0."""
839
+ return 0
840
+
841
+ def roots(self) -> Atom:
842
+ """Returns edge with root-only atoms."""
843
+ return Atom((self.root(),))
844
+
845
+ def contains(self, needle: str, deep: bool = False) -> bool:
846
+ """Checks if 'needle' is contained in edge.
847
+
848
+ Keyword argument:
849
+ deep -- search recursively (default: False)"""
850
+ return self[0] == needle # type: ignore[no-any-return]
851
+
852
+ def subedges(self) -> set[Hyperedge]:
853
+ """Returns all the subedges contained in the edge, including atoms
854
+ and itself.
855
+ """
856
+ return {self}
857
+
858
+ def insert_first_argument(self, argument: Hyperedge) -> Hyperedge:
859
+ """Returns an edge built by placing 'argument' as the first item
860
+ after the connector of this edge. If this edge is an atom, then
861
+ it becomes the connector of the returned edge.
862
+
863
+ For example, considering the 'edge' (a) and the 'argument' (b), this
864
+ function returns:
865
+ (a b)
866
+
867
+ Considering the 'edge' (a b c) and the 'argument' (d e), it
868
+ returns:
869
+ (a (d e) b c)
870
+ """
871
+ return Hyperedge((self, argument))
872
+
873
+ def replace_atom(self, old: Atom, new: Hyperedge, unique: bool = False) -> Hyperedge:
874
+ """Returns edge built by replacing every instance of 'old' in
875
+ this edge with 'new'.
876
+
877
+ Keyword argument:
878
+ unique -- match only the exact same instance of the atom, i.e.
879
+ UniqueAtom(self) == UniqueAtom(old) (default: False)
880
+ """
881
+ if unique:
882
+ if UniqueAtom(self) == UniqueAtom(old):
883
+ return new
884
+ else:
885
+ if self == old:
886
+ return new
887
+ return self
888
+
889
+ def role(self) -> list[str]:
890
+ """Returns the role of this atom as a list of the subrole strings.
891
+
892
+ The role of an atom is its second part, right after the root.
893
+ A dot notation is used to separate the subroles. For example,
894
+ the role of hyperbase/Cp.s/1 is:
895
+
896
+ Cp.s
897
+
898
+ For this case, this function returns:
899
+
900
+ ['Cp', 's']
901
+
902
+ If the atom only has a root, it is assumed to be a conjunction.
903
+ In this case, this function returns the role with just the
904
+ generic conjunction type:
905
+
906
+ ['J'].
907
+ """
908
+ parts: list[str] = self[0].split('/')
909
+ if len(parts) < 2:
910
+ return list('J')
911
+ else:
912
+ return parts[1].split('.')
913
+
914
+ def simplify(self, subtypes: bool = False, argroles: bool = False, namespaces: bool = True) -> Atom:
915
+ """Returns a simplified version of the atom, for example removing
916
+ subtypes, subroles or namespaces.
917
+
918
+ Keyword arguments:
919
+ subtypes -- include subtype (default: False).
920
+ argroles --include argroles (default: False).
921
+ namespaces -- include namespaces (default: True).
922
+ """
923
+ parts = self.parts()
924
+
925
+ if len(parts) < 2:
926
+ return self
927
+
928
+ if subtypes:
929
+ role = self.type()
930
+ else:
931
+ role = self.mtype()
932
+
933
+ if argroles:
934
+ ar = self.argroles()
935
+ if len(ar) > 0:
936
+ role = '{}.{}'.format(role, ar)
937
+
938
+ parts[1] = role
939
+
940
+ if len(parts) > 2 and not namespaces:
941
+ parts = parts[:2]
942
+
943
+ atom_str = '/'.join(parts)
944
+ return Atom((atom_str,))
945
+
946
+ def type(self) -> str:
947
+ """Returns the type of the atom.
948
+
949
+ The type of an atom is its first subrole. For example, the
950
+ type of hyperbase/Cp.s/1 is 'Cp'.
951
+
952
+ If the atom only has a root, it is assumed to be a conjunction.
953
+ In this case, this function returns the generic conjunction type: 'J'.
954
+ """
955
+ return self.role()[0]
956
+
957
+ def connector_type(self) -> str | None:
958
+ """Returns the type of the edge's connector.
959
+ If the edge has no connector (i.e. it's an atom), then None is
960
+ returned.
961
+ """
962
+ return None
963
+
964
+ def atom_with_type(self, atom_type: str) -> Atom | None:
965
+ """Returns the first atom found in the edge that has the given
966
+ 'atom_type', or whose type starts with 'atom_type'.
967
+ If no such atom is found, returns None.
968
+
969
+ For example, given the edge (+/B a/Cn b/Bp) and the 'atom_type'
970
+ C, this function returns:
971
+ a/Cn
972
+ If the 'atom_type' is 'Cp', the it will return:
973
+ b/Cp
974
+ """
975
+ n = len(atom_type)
976
+ et = self.type()
977
+ if len(et) >= n and et[:n] == atom_type:
978
+ return self
979
+ else:
980
+ return None
981
+
982
+ def argroles(self) -> str:
983
+ """Returns the argument roles string of the edge, if it exists.
984
+ Otherwise returns empty string.
985
+
986
+ Argument roles can be return for the entire edge that they apply to,
987
+ which can be a relation (R) or a concept (C). For example:
988
+
989
+ ((not/M is/P.sc) bob/C sad/C) has argument roles "sc",
990
+ (of/B.ma city/C berlin/C) has argument roles "ma".
991
+
992
+ Argument roles can also be returned for the connectors that define
993
+ the outer edge, which can be of type predicate (P) or builder (B). For
994
+ example:
995
+
996
+ (not/M is/P.sc) has argument roles "sc",
997
+ of/B.ma has argument roles "ma".
998
+ """
999
+ et = self.mtype()
1000
+ if et not in {'B', 'P'}:
1001
+ return ''
1002
+ role = self.role()
1003
+ if len(role) < 2:
1004
+ return ''
1005
+ return role[1]
1006
+
1007
+ def replace_argroles(self, argroles: str | None) -> Atom:
1008
+ """Returns an atom with the argroles replaced with the provided string."""
1009
+ if argroles is None or argroles == '':
1010
+ return self.remove_argroles()
1011
+ parts = self[0].split('/')
1012
+ if len(parts) < 2:
1013
+ return self
1014
+ role = parts[1].split('.')
1015
+ if len(role) < 2:
1016
+ role.append(argroles)
1017
+ else:
1018
+ role[1] = argroles
1019
+ parts = [parts[0], '.'.join(role)] + parts[2:]
1020
+ return Atom(('/'.join(parts),))
1021
+
1022
+ def remove_argroles(self) -> Atom:
1023
+ """Returns an atom with the argroles removed."""
1024
+ parts = self[0].split('/')
1025
+ if len(parts) < 2:
1026
+ return self
1027
+ role = parts[1].split('.')
1028
+ parts[1] = role[0]
1029
+ return Atom(('/'.join(parts),))
1030
+
1031
+ def insert_argrole(self, argrole: str, pos: int) -> Atom:
1032
+ """Returns an atom with the given argrole inserted at the specified
1033
+ position. Same restrictions as in replace_argroles() apply."""
1034
+ argroles = self.argroles()
1035
+ argroles = argroles[:pos] + argrole + argroles[pos:]
1036
+ return self.replace_argroles(argroles)
1037
+
1038
+ def edges_with_argrole(self, argrole: str) -> list[Hyperedge]:
1039
+ """Returns the list of edges with the given argument role"""
1040
+ return []
1041
+
1042
+ def main_concepts(self) -> list[Hyperedge]:
1043
+ """Returns the list of main concepts in an concept edge.
1044
+ A main concept is a central concept in a built concept, e.g.:
1045
+ in ('s/Bp.am zimbabwe/Mp economy/Cn.s), economy/Cn.s is the main
1046
+ concept.
1047
+
1048
+ If entity is not an edge, or its connector is not of type builder,
1049
+ or the builder does not contain concept role annotations, or no
1050
+ concept is annotated as the main one, then an empty list is
1051
+ returned.
1052
+ """
1053
+ return []
1054
+
1055
+ def replace_main_concept(self, new_main: Hyperedge) -> Hyperedge | None:
1056
+ """TODO: document and test"""
1057
+ if self.mtype() != 'C':
1058
+ return None
1059
+
1060
+ return new_main
1061
+
1062
+ def check_correctness(self) -> dict[Hyperedge, list[tuple[str, str]]]:
1063
+ output: dict[Hyperedge, list[tuple[str, str]]] = {}
1064
+ errors: list[tuple[str, str]] = []
1065
+
1066
+ at = self.mtype()
1067
+ if at not in {'C', 'P', 'M', 'B', 'T', 'J'}:
1068
+ errors.append(('bad-atom-type', '{} is not a valid atom type'.format(at)))
1069
+
1070
+ if len(errors) > 0:
1071
+ output[self] = errors
1072
+
1073
+ return output
1074
+
1075
+ def normalized(self) -> Atom:
1076
+ if self.mtype() in {'B', 'P'}:
1077
+ ar = self.argroles()
1078
+ if len(ar) > 0:
1079
+ if ar[0] == '{':
1080
+ ar = ar[1:-1]
1081
+ unordered = True
1082
+ else:
1083
+ unordered = False
1084
+ ar = ''.join(sorted(ar, key=lambda argrole: argrole_order[argrole]))
1085
+ if unordered:
1086
+ ar = '{{{}}}'.format(ar)
1087
+ return self.replace_argroles(ar)
1088
+ return self
1089
+
1090
+ def __add__(self, other: Hyperedge | tuple[Any, ...] | list[Any]) -> Hyperedge:
1091
+ if isinstance(other, (list, tuple)) and not isinstance(other, Hyperedge):
1092
+ return Hyperedge(tuple.__add__((self,), tuple(other)))
1093
+ elif isinstance(other, Hyperedge) and other.atom:
1094
+ return Hyperedge((self, other))
1095
+ else:
1096
+ return Hyperedge(tuple.__add__((self,), tuple(other)))
1097
+
1098
+
1099
+ class UniqueAtom(Atom):
1100
+ def __init__(self, atom: Atom) -> None:
1101
+ self.atom_obj = atom
1102
+
1103
+ def __hash__(self) -> int:
1104
+ return id(self.atom_obj)
1105
+
1106
+ def __eq__(self, other: object) -> bool:
1107
+ return isinstance(other, UniqueAtom) and id(self.atom_obj) == id(other.atom_obj)
1108
+
1109
+
1110
+ def unique(edge: Hyperedge) -> Hyperedge | None:
1111
+ if edge.atom:
1112
+ if type(edge) == UniqueAtom:
1113
+ return edge
1114
+ else:
1115
+ return UniqueAtom(edge) # type: ignore[arg-type]
1116
+ else:
1117
+ return hedge([unique(subedge) for subedge in edge])
1118
+
1119
+
1120
+ def non_unique(edge: Hyperedge) -> Hyperedge | None:
1121
+ if edge.atom:
1122
+ if type(edge) == UniqueAtom:
1123
+ return edge.atom_obj
1124
+ else:
1125
+ return edge
1126
+ else:
1127
+ return hedge([non_unique(subedge) for subedge in edge])