hyperbase 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hyperbase/__init__.py +6 -0
- hyperbase/constants.py +4 -0
- hyperbase/hyperedge.py +1127 -0
- hyperbase/parsers/__init__.py +39 -0
- hyperbase/parsers/correctness.py +265 -0
- hyperbase/parsers/parser.py +41 -0
- hyperbase/parsers/utils.py +19 -0
- hyperbase/patterns/__init__.py +29 -0
- hyperbase/patterns/argroles.py +142 -0
- hyperbase/patterns/atoms.py +98 -0
- hyperbase/patterns/common.py +172 -0
- hyperbase/patterns/counter.py +153 -0
- hyperbase/patterns/entrypoints.py +87 -0
- hyperbase/patterns/matcher.py +245 -0
- hyperbase/patterns/merge.py +52 -0
- hyperbase/patterns/properties.py +59 -0
- hyperbase/patterns/utils.py +118 -0
- hyperbase/patterns/variables.py +161 -0
- hyperbase-0.8.0.dist-info/METADATA +64 -0
- hyperbase-0.8.0.dist-info/RECORD +23 -0
- hyperbase-0.8.0.dist-info/WHEEL +4 -0
- hyperbase-0.8.0.dist-info/licenses/AUTHORS +5 -0
- hyperbase-0.8.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
|
|
5
|
+
from hyperbase import hedge
|
|
6
|
+
from hyperbase.hyperedge import Hyperedge
|
|
7
|
+
from hyperbase.patterns.argroles import edge2rolemap, rolemap_pairings, rolemap2edge
|
|
8
|
+
from hyperbase.patterns.utils import more_general, is_valid
|
|
9
|
+
from hyperbase.patterns.variables import all_variables, is_variable, contains_variable
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def common_pattern_argroles(edge1: Hyperedge, edge2: Hyperedge) -> Hyperedge | None:
|
|
13
|
+
rm1 = edge2rolemap(edge1)
|
|
14
|
+
rm2 = edge2rolemap(edge2)
|
|
15
|
+
|
|
16
|
+
_vars = all_variables(edge1) | all_variables(edge2)
|
|
17
|
+
best_pattern: Hyperedge | None = None
|
|
18
|
+
for rm1_, rm2_ in rolemap_pairings(rm1, rm2):
|
|
19
|
+
edge1_ = rolemap2edge(edge1[0], rm1_)
|
|
20
|
+
edge2_ = rolemap2edge(edge2[0], rm2_)
|
|
21
|
+
|
|
22
|
+
subedges = [_common_pattern(se1, se2) for se1, se2 in zip(edge1_, edge2_)]
|
|
23
|
+
if any(subedge is None for subedge in subedges):
|
|
24
|
+
continue
|
|
25
|
+
argroles = edge1_[0].argroles()
|
|
26
|
+
if argroles == '':
|
|
27
|
+
# deal with (*/P.{} or */B.{})
|
|
28
|
+
pattern = hedge('*/{}'.format(edge1_.mtype()))
|
|
29
|
+
else:
|
|
30
|
+
pattern = hedge(subedges)
|
|
31
|
+
if pattern is None:
|
|
32
|
+
continue
|
|
33
|
+
pattern = pattern.replace_argroles('{{{}}}'.format(edge1_[0].argroles()))
|
|
34
|
+
|
|
35
|
+
if pattern is not None and _vars == all_variables(pattern):
|
|
36
|
+
if best_pattern is None or more_general(best_pattern, pattern):
|
|
37
|
+
best_pattern = pattern
|
|
38
|
+
|
|
39
|
+
if best_pattern is None:
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
return best_pattern.normalized()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def common_type(edges: Sequence[Hyperedge]) -> str | None:
|
|
46
|
+
types = [edge.type() for edge in edges]
|
|
47
|
+
if len(set(types)) == 1:
|
|
48
|
+
return types[0]
|
|
49
|
+
types = [edge.mtype() for edge in edges]
|
|
50
|
+
if len(set(types)) == 1:
|
|
51
|
+
return types[0]
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def common_pattern_atoms(atoms: Sequence[Hyperedge]) -> Hyperedge | None:
|
|
56
|
+
roots = [atom.root() for atom in atoms] # type: ignore[attr-defined]
|
|
57
|
+
|
|
58
|
+
if len(set(roots)) != 1 or '*' in roots:
|
|
59
|
+
root = '*'
|
|
60
|
+
else:
|
|
61
|
+
root = roots[0]
|
|
62
|
+
|
|
63
|
+
if any(len(str(atom).split('/')) == 1 for atom in atoms):
|
|
64
|
+
atype: str | None = None
|
|
65
|
+
else:
|
|
66
|
+
atype = common_type(atoms)
|
|
67
|
+
|
|
68
|
+
roles1: list[str | None] = []
|
|
69
|
+
roles2: list[str | None] = []
|
|
70
|
+
for atom in atoms:
|
|
71
|
+
role = atom.role() # type: ignore[attr-defined]
|
|
72
|
+
r1: str | None = role[1] if len(role) > 1 else None
|
|
73
|
+
r2: str | None = role[2] if len(role) > 2 else None
|
|
74
|
+
roles1.append(r1)
|
|
75
|
+
roles2.append(r2)
|
|
76
|
+
|
|
77
|
+
final_role1: str | None = None
|
|
78
|
+
final_role2: str | None = None
|
|
79
|
+
if len(set(roles1)) == 1 and roles1[0] is not None:
|
|
80
|
+
final_role1 = roles1[0]
|
|
81
|
+
if len(set(roles2)) == 1 and roles2[0] is not None:
|
|
82
|
+
final_role2 = roles2[0]
|
|
83
|
+
|
|
84
|
+
if atype is None:
|
|
85
|
+
atom_str = root
|
|
86
|
+
else:
|
|
87
|
+
role_parts = [atype]
|
|
88
|
+
if final_role1 is not None:
|
|
89
|
+
role_parts.append(final_role1)
|
|
90
|
+
if final_role2 is not None:
|
|
91
|
+
role_parts.append(final_role2)
|
|
92
|
+
role_str = '.'.join(role_parts)
|
|
93
|
+
atom_str = '{}/{}'.format(root, role_str)
|
|
94
|
+
|
|
95
|
+
return hedge(atom_str)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _common_pattern(edge1: Hyperedge, edge2: Hyperedge) -> Hyperedge | None:
|
|
99
|
+
nedge1 = edge1
|
|
100
|
+
nedge2 = edge2
|
|
101
|
+
|
|
102
|
+
# variables
|
|
103
|
+
if is_variable(nedge1):
|
|
104
|
+
var1 = nedge1[2]
|
|
105
|
+
else:
|
|
106
|
+
var1 = None
|
|
107
|
+
if is_variable(nedge2):
|
|
108
|
+
var2 = nedge2[2]
|
|
109
|
+
if var1 is None:
|
|
110
|
+
return None
|
|
111
|
+
else:
|
|
112
|
+
var2 = None
|
|
113
|
+
if var1:
|
|
114
|
+
return None
|
|
115
|
+
if var1 or var2:
|
|
116
|
+
# different variables on same position?
|
|
117
|
+
if var1 and var2 and var1 != var2:
|
|
118
|
+
return None
|
|
119
|
+
var = None
|
|
120
|
+
if var1:
|
|
121
|
+
vedge1 = nedge1[1]
|
|
122
|
+
var = var1
|
|
123
|
+
else:
|
|
124
|
+
vedge1 = nedge1
|
|
125
|
+
if var2:
|
|
126
|
+
vedge2 = nedge2[1]
|
|
127
|
+
var = var2
|
|
128
|
+
else:
|
|
129
|
+
vedge2 = nedge2
|
|
130
|
+
vedge = _common_pattern(vedge1, vedge2)
|
|
131
|
+
if vedge is None or contains_variable(vedge):
|
|
132
|
+
return None
|
|
133
|
+
else:
|
|
134
|
+
return hedge(('var', vedge, var))
|
|
135
|
+
|
|
136
|
+
# both are atoms
|
|
137
|
+
if nedge1.atom and nedge2.atom:
|
|
138
|
+
return common_pattern_atoms((nedge1, nedge2))
|
|
139
|
+
# at least one non-atom
|
|
140
|
+
else:
|
|
141
|
+
if nedge1.not_atom and nedge2.not_atom and nedge1.has_argroles() and nedge2.has_argroles():
|
|
142
|
+
if nedge1.mt == nedge2.mt:
|
|
143
|
+
common = common_pattern_argroles(nedge1, nedge2)
|
|
144
|
+
if common:
|
|
145
|
+
return common
|
|
146
|
+
|
|
147
|
+
# do not combine edges with argroles and edges without them
|
|
148
|
+
perform_ordered_match = not ((nedge1.not_atom and nedge1.has_argroles())
|
|
149
|
+
or (nedge2.not_atom and nedge2.has_argroles()))
|
|
150
|
+
# same length
|
|
151
|
+
if perform_ordered_match and nedge1.not_atom and nedge2.not_atom and len(nedge1) == len(nedge2):
|
|
152
|
+
subedges = [_common_pattern(subedge1, subedge2) for subedge1, subedge2 in zip(nedge1, nedge2)]
|
|
153
|
+
if any(subedge is None for subedge in subedges):
|
|
154
|
+
return None
|
|
155
|
+
return hedge(subedges)
|
|
156
|
+
# not same length
|
|
157
|
+
else:
|
|
158
|
+
if contains_variable(nedge1) or contains_variable(nedge2):
|
|
159
|
+
return None
|
|
160
|
+
etype = common_type((nedge1, nedge2))
|
|
161
|
+
if etype:
|
|
162
|
+
return hedge('*/{}'.format(etype))
|
|
163
|
+
else:
|
|
164
|
+
return hedge('*')
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def common_pattern(edge1: Hyperedge, edge2: Hyperedge) -> Hyperedge | None:
|
|
168
|
+
edge = _common_pattern(edge1, edge2)
|
|
169
|
+
if is_valid(edge):
|
|
170
|
+
return edge
|
|
171
|
+
else:
|
|
172
|
+
return None
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
|
|
5
|
+
from hyperbase import hedge
|
|
6
|
+
from hyperbase.hyperedge import Hyperedge
|
|
7
|
+
from hyperbase.patterns.entrypoints import edge_matches_pattern
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class PatternCounter:
|
|
11
|
+
def __init__(
|
|
12
|
+
self,
|
|
13
|
+
depth: int = 2,
|
|
14
|
+
count_subedges: bool = True,
|
|
15
|
+
expansions: set[str] | None = None,
|
|
16
|
+
match_roots: set[str] | None = None,
|
|
17
|
+
match_subtypes: set[str] | None = None
|
|
18
|
+
) -> None:
|
|
19
|
+
self.patterns: Counter[Hyperedge | None] = Counter()
|
|
20
|
+
self.depth = depth
|
|
21
|
+
self.count_subedges = count_subedges
|
|
22
|
+
if expansions is None:
|
|
23
|
+
self.expansions: set[str] = {'*'}
|
|
24
|
+
else:
|
|
25
|
+
self.expansions = expansions
|
|
26
|
+
if match_roots is None:
|
|
27
|
+
self.match_roots: set[str] = set()
|
|
28
|
+
else:
|
|
29
|
+
self.match_roots = match_roots
|
|
30
|
+
if match_subtypes is None:
|
|
31
|
+
self.match_subtypes: set[str] = set()
|
|
32
|
+
else:
|
|
33
|
+
self.match_subtypes = match_subtypes
|
|
34
|
+
|
|
35
|
+
def _matches_expansions(self, edge: Hyperedge) -> bool:
|
|
36
|
+
for expansion in self.expansions:
|
|
37
|
+
if edge_matches_pattern(edge, expansion):
|
|
38
|
+
return True
|
|
39
|
+
return False
|
|
40
|
+
|
|
41
|
+
def _force_subtypes(self, edge: Hyperedge) -> bool:
|
|
42
|
+
force_subtypes = False
|
|
43
|
+
for st_pattern in self.match_subtypes:
|
|
44
|
+
if edge_matches_pattern(edge, st_pattern):
|
|
45
|
+
force_subtypes = True
|
|
46
|
+
return force_subtypes
|
|
47
|
+
|
|
48
|
+
def _force_root_expansion(self, edge: Hyperedge) -> tuple[bool, bool]:
|
|
49
|
+
force_root = False
|
|
50
|
+
force_expansion = False
|
|
51
|
+
for root_pattern in self.match_roots:
|
|
52
|
+
if edge_matches_pattern(edge, root_pattern):
|
|
53
|
+
force_root = True
|
|
54
|
+
force_expansion = True
|
|
55
|
+
elif _inner_edge_matches_pattern(edge, root_pattern):
|
|
56
|
+
force_expansion = True
|
|
57
|
+
return force_root, force_expansion
|
|
58
|
+
|
|
59
|
+
def _list2patterns(
|
|
60
|
+
self,
|
|
61
|
+
ledge: list[Hyperedge],
|
|
62
|
+
depth: int = 1,
|
|
63
|
+
force_expansion: bool = False,
|
|
64
|
+
force_root: bool = False,
|
|
65
|
+
force_subtypes: bool = False
|
|
66
|
+
) -> list[list[Hyperedge | None]]:
|
|
67
|
+
if depth > self.depth:
|
|
68
|
+
return []
|
|
69
|
+
|
|
70
|
+
first = ledge[0]
|
|
71
|
+
|
|
72
|
+
f_force_subtypes = force_subtypes | self._force_subtypes(first)
|
|
73
|
+
|
|
74
|
+
f_force_root, f_force_expansion = self._force_root_expansion(first)
|
|
75
|
+
f_force_root |= force_root
|
|
76
|
+
f_force_expansion |= force_expansion
|
|
77
|
+
root = force_root | f_force_root
|
|
78
|
+
|
|
79
|
+
if f_force_expansion and not first.atom:
|
|
80
|
+
hpats: list[Hyperedge | None] = []
|
|
81
|
+
else:
|
|
82
|
+
hpats = [_edge2pattern(first, root=root, subtype=f_force_subtypes)]
|
|
83
|
+
|
|
84
|
+
if not first.atom and (self._matches_expansions(first) or
|
|
85
|
+
f_force_expansion):
|
|
86
|
+
hpats += self._list2patterns(list(first), depth + 1, force_expansion=f_force_expansion, # type: ignore[arg-type]
|
|
87
|
+
force_root=f_force_root, force_subtypes=f_force_subtypes)
|
|
88
|
+
if len(ledge) == 1:
|
|
89
|
+
patterns: list[list[Hyperedge | None]] = [[hpat] for hpat in hpats]
|
|
90
|
+
else:
|
|
91
|
+
patterns = []
|
|
92
|
+
for pattern in self._list2patterns(
|
|
93
|
+
ledge[1:],
|
|
94
|
+
depth=depth,
|
|
95
|
+
force_expansion=force_expansion,
|
|
96
|
+
force_root=force_root,
|
|
97
|
+
force_subtypes=force_subtypes
|
|
98
|
+
):
|
|
99
|
+
for hpat in hpats:
|
|
100
|
+
patterns.append([hpat] + pattern)
|
|
101
|
+
return patterns
|
|
102
|
+
|
|
103
|
+
def _edge2patterns(self, edge: Hyperedge) -> list[Hyperedge | None]:
|
|
104
|
+
force_subtypes = self._force_subtypes(edge)
|
|
105
|
+
force_root, _ = self._force_root_expansion(edge)
|
|
106
|
+
normalized = edge.normalized()
|
|
107
|
+
if normalized is None:
|
|
108
|
+
return []
|
|
109
|
+
return [
|
|
110
|
+
hedge(pattern) for pattern in self._list2patterns(
|
|
111
|
+
list(normalized), force_subtypes=force_subtypes, force_root=force_root
|
|
112
|
+
)]
|
|
113
|
+
|
|
114
|
+
def count(self, edge: Hyperedge | str) -> None:
|
|
115
|
+
parsed = hedge(edge)
|
|
116
|
+
if parsed is None:
|
|
117
|
+
return
|
|
118
|
+
if parsed.not_atom:
|
|
119
|
+
if self._matches_expansions(parsed):
|
|
120
|
+
for pattern in self._edge2patterns(parsed):
|
|
121
|
+
self.patterns[pattern] += 1
|
|
122
|
+
if self.count_subedges:
|
|
123
|
+
for subedge in parsed:
|
|
124
|
+
self.count(subedge)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _edge2pattern(edge: Hyperedge, root: bool = False, subtype: bool = False) -> Hyperedge | None:
|
|
128
|
+
if root and edge.atom:
|
|
129
|
+
root_str = edge.root() # type: ignore[attr-defined]
|
|
130
|
+
else:
|
|
131
|
+
root_str = '*'
|
|
132
|
+
if subtype:
|
|
133
|
+
et = edge.type()
|
|
134
|
+
else:
|
|
135
|
+
et = edge.mtype()
|
|
136
|
+
pattern = '{}/{}'.format(root_str, et)
|
|
137
|
+
ar = edge.argroles()
|
|
138
|
+
if ar == '':
|
|
139
|
+
return hedge(pattern)
|
|
140
|
+
else:
|
|
141
|
+
return hedge('{}.{}'.format(pattern, ar))
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def _inner_edge_matches_pattern(edge: Hyperedge, pattern: str, hg: object = None) -> bool:
|
|
145
|
+
if edge.atom:
|
|
146
|
+
return False
|
|
147
|
+
for subedge in edge:
|
|
148
|
+
if edge_matches_pattern(subedge, pattern, hg=hg):
|
|
149
|
+
return True
|
|
150
|
+
for subedge in edge:
|
|
151
|
+
if _inner_edge_matches_pattern(subedge, pattern, hg=hg):
|
|
152
|
+
return True
|
|
153
|
+
return False
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from hyperbase.hyperedge import Hyperedge, hedge
|
|
4
|
+
from hyperbase.patterns.matcher import Matcher
|
|
5
|
+
from hyperbase.patterns.utils import _normalize_fun_patterns
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def match_pattern(
|
|
9
|
+
edge: Hyperedge | str | list[object] | tuple[object, ...],
|
|
10
|
+
pattern: Hyperedge | str | list[object] | tuple[object, ...],
|
|
11
|
+
curvars: dict[str, Hyperedge] | None = None
|
|
12
|
+
) -> list[dict[str, Hyperedge]]:
|
|
13
|
+
"""Matches an edge to a pattern. This means that, if the edge fits the
|
|
14
|
+
pattern, then a list of dictionaries will be returned. If the pattern
|
|
15
|
+
specifies variables, then the returned dictionaries will be populated
|
|
16
|
+
with the values for each pattern variable. There can be more than one
|
|
17
|
+
dictionary in the list if there are multiple ways of matching the
|
|
18
|
+
variables. If the pattern specifies no variables but the edge matches
|
|
19
|
+
it, then a list with a single empty dictionary is returned. If the
|
|
20
|
+
edge does not match the pattern, an empty list is returned.
|
|
21
|
+
|
|
22
|
+
Patterns are themselves edges. They can match families of edges
|
|
23
|
+
by employing special atoms:
|
|
24
|
+
|
|
25
|
+
-> '\\*' represents a general wildcard (matches any entity)
|
|
26
|
+
|
|
27
|
+
-> '.' represents an atomic wildcard (matches any atom)
|
|
28
|
+
|
|
29
|
+
-> '(\\*)' represents an edge wildcard (matches any edge)
|
|
30
|
+
|
|
31
|
+
-> '...' at the end indicates an open-ended pattern.
|
|
32
|
+
|
|
33
|
+
The wildcards ('\\*', '.' and '(\\*)') can be used to specify variables,
|
|
34
|
+
for example '\\*x', '(CLAIM)' or '.ACTOR'. In case of a match, these
|
|
35
|
+
variables are assigned the hyperedge they correspond to. For example,
|
|
36
|
+
|
|
37
|
+
(1) the edge: (is/Pd (my/Mp name/Cn) mary/Cp)
|
|
38
|
+
applied to the pattern: (is/Pd (my/Mp name/Cn) \\*NAME)
|
|
39
|
+
produces the result: [{'NAME', mary/Cp}]
|
|
40
|
+
|
|
41
|
+
(2) the edge: (is/Pd (my/Mp name/Cn) mary/Cp)
|
|
42
|
+
applied to the pattern: (is/Pd (my/Mp name/Cn) (NAME))
|
|
43
|
+
produces the result: [{}]
|
|
44
|
+
|
|
45
|
+
(3) the edge: (is/Pd (my/Mp name/Cn) mary/Cp)
|
|
46
|
+
applied to the pattern: (is/Pd . \\*NAME)
|
|
47
|
+
produces the result: []
|
|
48
|
+
"""
|
|
49
|
+
_edge = hedge(edge)
|
|
50
|
+
_pattern = hedge(pattern)
|
|
51
|
+
if _edge is None or _pattern is None:
|
|
52
|
+
return []
|
|
53
|
+
_pattern = _normalize_fun_patterns(_pattern)
|
|
54
|
+
|
|
55
|
+
matcher: Matcher = Matcher(
|
|
56
|
+
edge=_edge,
|
|
57
|
+
pattern=_pattern,
|
|
58
|
+
curvars=curvars,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
return matcher.results
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def edge_matches_pattern(
|
|
65
|
+
edge: Hyperedge | str | list[object] | tuple[object, ...],
|
|
66
|
+
pattern: Hyperedge | str | list[object] | tuple[object, ...],
|
|
67
|
+
**kwargs: object
|
|
68
|
+
) -> bool:
|
|
69
|
+
"""Check if an edge matches a pattern.
|
|
70
|
+
|
|
71
|
+
Patterns are themselves edges. They can match families of edges
|
|
72
|
+
by employing special atoms:
|
|
73
|
+
|
|
74
|
+
-> '\\*' represents a general wildcard (matches any entity)
|
|
75
|
+
|
|
76
|
+
-> '.' represents an atomic wildcard (matches any atom)
|
|
77
|
+
|
|
78
|
+
-> '(\\*)' represents an edge wildcard (matches any edge)
|
|
79
|
+
|
|
80
|
+
-> '...' at the end indicates an open-ended pattern.
|
|
81
|
+
|
|
82
|
+
The pattern can be any valid hyperedge, including the above special atoms.
|
|
83
|
+
Examples: (is/Pd hyperbase/C .)
|
|
84
|
+
(says/Pd * ...)
|
|
85
|
+
"""
|
|
86
|
+
result = match_pattern(edge, pattern)
|
|
87
|
+
return len(result) > 0
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
from hyperbase import hedge
|
|
7
|
+
from hyperbase.hyperedge import Atom, Hyperedge
|
|
8
|
+
from hyperbase.patterns.argroles import _match_by_argroles
|
|
9
|
+
from hyperbase.patterns.atoms import _matches_atomic_pattern
|
|
10
|
+
from hyperbase.patterns.properties import is_fun_pattern, is_pattern, FUNS
|
|
11
|
+
from hyperbase.patterns.utils import _defun_pattern_argroles, _atoms_and_tok_pos
|
|
12
|
+
from hyperbase.patterns.variables import _varname, _assign_edge_to_var
|
|
13
|
+
|
|
14
|
+
# tok_pos can be nested lists/ints matching the edge structure
|
|
15
|
+
TokPos = Any
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Matcher:
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
edge: Hyperedge,
|
|
22
|
+
pattern: Hyperedge,
|
|
23
|
+
curvars: dict[str, Hyperedge] | None = None,
|
|
24
|
+
tok_pos: TokPos = None
|
|
25
|
+
) -> None:
|
|
26
|
+
self.results: list[dict[str, Hyperedge]] = self.match(edge, pattern, curvars=curvars, tok_pos=tok_pos)
|
|
27
|
+
|
|
28
|
+
def match(
|
|
29
|
+
self,
|
|
30
|
+
edge: Hyperedge,
|
|
31
|
+
pattern: Hyperedge,
|
|
32
|
+
curvars: dict[str, Hyperedge] | None = None,
|
|
33
|
+
tok_pos: TokPos = None
|
|
34
|
+
) -> list[dict[str, Hyperedge]]:
|
|
35
|
+
if curvars is None:
|
|
36
|
+
curvars = {}
|
|
37
|
+
|
|
38
|
+
# functional patterns
|
|
39
|
+
if is_fun_pattern(pattern):
|
|
40
|
+
return self._match_fun_pat(
|
|
41
|
+
edge,
|
|
42
|
+
pattern,
|
|
43
|
+
curvars,
|
|
44
|
+
tok_pos=tok_pos
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# function pattern on edge can never match non-functional pattern
|
|
48
|
+
if is_fun_pattern(edge):
|
|
49
|
+
return []
|
|
50
|
+
|
|
51
|
+
# atomic patterns
|
|
52
|
+
if pattern.atom:
|
|
53
|
+
if _matches_atomic_pattern(edge, pattern):
|
|
54
|
+
variables: dict[str, Hyperedge] = {}
|
|
55
|
+
if is_pattern(pattern):
|
|
56
|
+
varname = _varname(pattern)
|
|
57
|
+
if len(varname) > 0:
|
|
58
|
+
variables[varname] = _assign_edge_to_var({**curvars, **variables}, varname, edge)[varname]
|
|
59
|
+
return [{**curvars, **variables}]
|
|
60
|
+
else:
|
|
61
|
+
return []
|
|
62
|
+
|
|
63
|
+
min_len = len(pattern)
|
|
64
|
+
max_len: int | float = min_len
|
|
65
|
+
# open-ended?
|
|
66
|
+
if pattern[-1].to_str() == '...':
|
|
67
|
+
new_pattern = hedge(pattern[:-1])
|
|
68
|
+
if new_pattern is None:
|
|
69
|
+
return []
|
|
70
|
+
pattern = new_pattern
|
|
71
|
+
min_len -= 1
|
|
72
|
+
max_len = float('inf')
|
|
73
|
+
|
|
74
|
+
result: list[dict[str, Hyperedge]] = [{}]
|
|
75
|
+
argroles_posopt = _defun_pattern_argroles(pattern)[0].argroles().split('-')[0]
|
|
76
|
+
if len(argroles_posopt) > 0 and argroles_posopt[0] == '{':
|
|
77
|
+
match_by_order = False
|
|
78
|
+
argroles_posopt = argroles_posopt[1:-1]
|
|
79
|
+
else:
|
|
80
|
+
match_by_order = True
|
|
81
|
+
argroles = argroles_posopt.split(',')[0]
|
|
82
|
+
argroles_opt = argroles_posopt.replace(',', '')
|
|
83
|
+
|
|
84
|
+
if len(argroles) > 0:
|
|
85
|
+
min_len = 1 + len(argroles)
|
|
86
|
+
max_len = float('inf')
|
|
87
|
+
else:
|
|
88
|
+
match_by_order = True
|
|
89
|
+
|
|
90
|
+
if len(edge) < min_len or len(edge) > max_len:
|
|
91
|
+
return []
|
|
92
|
+
|
|
93
|
+
# match by order
|
|
94
|
+
if match_by_order:
|
|
95
|
+
for i, pitem in enumerate(pattern):
|
|
96
|
+
eitem = edge[i]
|
|
97
|
+
_result: list[dict[str, Hyperedge]] = []
|
|
98
|
+
|
|
99
|
+
for variables in result:
|
|
100
|
+
if pitem.atom:
|
|
101
|
+
varname = _varname(pitem)
|
|
102
|
+
if _matches_atomic_pattern(eitem, pitem):
|
|
103
|
+
if len(varname) > 0 and varname[0].isupper():
|
|
104
|
+
variables[varname] = _assign_edge_to_var(
|
|
105
|
+
{**curvars, **variables}, varname, eitem)[varname]
|
|
106
|
+
else:
|
|
107
|
+
continue
|
|
108
|
+
_result.append(variables)
|
|
109
|
+
else:
|
|
110
|
+
tok_pos_item = None
|
|
111
|
+
if tok_pos is not None:
|
|
112
|
+
try:
|
|
113
|
+
assert len(tok_pos) > i
|
|
114
|
+
except AssertionError:
|
|
115
|
+
raise RuntimeError(f"Index '{i}' in tok_pos '{tok_pos}' is out of range")
|
|
116
|
+
tok_pos_item = tok_pos[i]
|
|
117
|
+
_result += self.match(
|
|
118
|
+
eitem,
|
|
119
|
+
pitem,
|
|
120
|
+
{**curvars, **variables},
|
|
121
|
+
tok_pos=tok_pos_item
|
|
122
|
+
)
|
|
123
|
+
result = _result
|
|
124
|
+
# match by argroles
|
|
125
|
+
else:
|
|
126
|
+
result = []
|
|
127
|
+
# match connector first
|
|
128
|
+
ctok_pos = tok_pos[0] if tok_pos else None
|
|
129
|
+
if self.match(edge[0], pattern[0], curvars, tok_pos=ctok_pos):
|
|
130
|
+
role_counts = Counter(argroles_opt).most_common()
|
|
131
|
+
unknown_roles = (len(pattern) - 1) - len(argroles_opt)
|
|
132
|
+
if unknown_roles > 0:
|
|
133
|
+
role_counts.append(('*', unknown_roles))
|
|
134
|
+
# add connector pseudo-argrole
|
|
135
|
+
role_counts = [('X', 1)] + role_counts
|
|
136
|
+
result = _match_by_argroles(
|
|
137
|
+
self,
|
|
138
|
+
edge,
|
|
139
|
+
pattern,
|
|
140
|
+
role_counts,
|
|
141
|
+
len(argroles),
|
|
142
|
+
curvars=curvars,
|
|
143
|
+
tok_pos=tok_pos
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
unique_vars: list[dict[str, Hyperedge]] = []
|
|
147
|
+
for variables in result:
|
|
148
|
+
v = {**curvars, **variables}
|
|
149
|
+
if v not in unique_vars:
|
|
150
|
+
unique_vars.append(v)
|
|
151
|
+
return unique_vars
|
|
152
|
+
|
|
153
|
+
def _match_atoms(
|
|
154
|
+
self,
|
|
155
|
+
atom_patterns: tuple[Hyperedge, ...],
|
|
156
|
+
atoms: set[Atom] | list[Atom],
|
|
157
|
+
curvars: dict[str, Hyperedge],
|
|
158
|
+
atoms_tok_pos: list[Any] | None = None,
|
|
159
|
+
matched_atoms: list[Atom] | None = None
|
|
160
|
+
) -> list[dict[str, Hyperedge]]:
|
|
161
|
+
if matched_atoms is None:
|
|
162
|
+
matched_atoms = []
|
|
163
|
+
|
|
164
|
+
if len(atom_patterns) == 0:
|
|
165
|
+
return [curvars]
|
|
166
|
+
|
|
167
|
+
results: list[dict[str, Hyperedge]] = []
|
|
168
|
+
atom_pattern = atom_patterns[0]
|
|
169
|
+
|
|
170
|
+
for atom_pos, atom in enumerate(atoms):
|
|
171
|
+
if atom not in matched_atoms:
|
|
172
|
+
tok_pos = atoms_tok_pos[atom_pos] if atoms_tok_pos else None
|
|
173
|
+
svars = self.match(atom, atom_pattern, curvars, tok_pos=tok_pos)
|
|
174
|
+
for variables in svars:
|
|
175
|
+
results += self._match_atoms(
|
|
176
|
+
atom_patterns[1:],
|
|
177
|
+
atoms,
|
|
178
|
+
{**curvars, **variables},
|
|
179
|
+
atoms_tok_pos=atoms_tok_pos,
|
|
180
|
+
matched_atoms=matched_atoms + [atom]
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
return results
|
|
184
|
+
|
|
185
|
+
def _match_fun_pat(
|
|
186
|
+
self,
|
|
187
|
+
edge: Hyperedge,
|
|
188
|
+
fun_pattern: Hyperedge,
|
|
189
|
+
curvars: dict[str, Hyperedge],
|
|
190
|
+
tok_pos: TokPos = None
|
|
191
|
+
) -> list[dict[str, Hyperedge]]:
|
|
192
|
+
fun = fun_pattern[0].root()
|
|
193
|
+
|
|
194
|
+
try:
|
|
195
|
+
assert fun in FUNS
|
|
196
|
+
except AssertionError:
|
|
197
|
+
raise ValueError(f"Unknown pattern function: {fun}")
|
|
198
|
+
|
|
199
|
+
if fun == 'var':
|
|
200
|
+
if len(fun_pattern) != 3:
|
|
201
|
+
raise RuntimeError('var pattern function must have two arguments')
|
|
202
|
+
pattern = fun_pattern[1]
|
|
203
|
+
var_name = fun_pattern[2].root()
|
|
204
|
+
if edge.not_atom and str(edge[0]) == 'var' and len(edge) == 3 and str(edge[2]) == var_name:
|
|
205
|
+
this_var = _assign_edge_to_var(curvars, var_name, edge[1])
|
|
206
|
+
return self.match(
|
|
207
|
+
edge[1],
|
|
208
|
+
pattern,
|
|
209
|
+
curvars={**curvars, **this_var},
|
|
210
|
+
tok_pos=tok_pos
|
|
211
|
+
)
|
|
212
|
+
else:
|
|
213
|
+
this_var = _assign_edge_to_var(curvars, var_name, edge)
|
|
214
|
+
return self.match(
|
|
215
|
+
edge,
|
|
216
|
+
pattern,
|
|
217
|
+
curvars={**curvars, **this_var},
|
|
218
|
+
tok_pos=tok_pos
|
|
219
|
+
)
|
|
220
|
+
elif fun == 'atoms':
|
|
221
|
+
if tok_pos:
|
|
222
|
+
atoms_list, atoms_tok_pos = _atoms_and_tok_pos(edge, tok_pos)
|
|
223
|
+
else:
|
|
224
|
+
atoms_list = list(edge.atoms())
|
|
225
|
+
atoms_tok_pos = None
|
|
226
|
+
atom_patterns = fun_pattern[1:]
|
|
227
|
+
return self._match_atoms(
|
|
228
|
+
atom_patterns,
|
|
229
|
+
atoms_list,
|
|
230
|
+
curvars,
|
|
231
|
+
atoms_tok_pos=atoms_tok_pos
|
|
232
|
+
)
|
|
233
|
+
elif fun == 'any':
|
|
234
|
+
for pattern in fun_pattern[1:]:
|
|
235
|
+
matches = self.match(
|
|
236
|
+
edge,
|
|
237
|
+
pattern,
|
|
238
|
+
curvars=curvars,
|
|
239
|
+
tok_pos=tok_pos
|
|
240
|
+
)
|
|
241
|
+
if len(matches) > 0:
|
|
242
|
+
return matches
|
|
243
|
+
return []
|
|
244
|
+
else:
|
|
245
|
+
raise NotImplementedError(f"Pattern function '{fun}' not implemented.")
|