jaclang 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jaclang might be problematic. Click here for more details.
- jaclang/cli/cli.md +1 -0
- jaclang/cli/cli.py +109 -37
- jaclang/compiler/jac.lark +3 -3
- jaclang/compiler/larkparse/jac_parser.py +2 -2
- jaclang/compiler/parser.py +14 -21
- jaclang/compiler/passes/main/__init__.py +5 -1
- jaclang/compiler/passes/main/binder_pass.py +594 -0
- jaclang/compiler/passes/main/cfg_build_pass.py +21 -1
- jaclang/compiler/passes/main/import_pass.py +8 -256
- jaclang/compiler/passes/main/inheritance_pass.py +10 -3
- jaclang/compiler/passes/main/pyast_gen_pass.py +92 -77
- jaclang/compiler/passes/main/pyast_load_pass.py +24 -13
- jaclang/compiler/passes/main/sem_def_match_pass.py +1 -1
- jaclang/compiler/passes/main/sym_tab_build_pass.py +4 -0
- jaclang/compiler/passes/main/tests/fixtures/M1.jac +3 -0
- jaclang/compiler/passes/main/tests/fixtures/cfg_has_var.jac +12 -0
- jaclang/compiler/passes/main/tests/fixtures/cfg_if_no_else.jac +11 -0
- jaclang/compiler/passes/main/tests/fixtures/cfg_return.jac +9 -0
- jaclang/compiler/passes/main/tests/fixtures/checker_imported.jac +2 -0
- jaclang/compiler/passes/main/tests/fixtures/checker_importer.jac +6 -0
- jaclang/compiler/passes/main/tests/fixtures/data_spatial_types.jac +1 -1
- jaclang/compiler/passes/main/tests/fixtures/import_symbol_type_infer.jac +11 -0
- jaclang/compiler/passes/main/tests/fixtures/infer_type_assignment.jac +5 -0
- jaclang/compiler/passes/main/tests/fixtures/member_access_type_inferred.jac +13 -0
- jaclang/compiler/passes/main/tests/fixtures/member_access_type_resolve.jac +11 -0
- jaclang/compiler/passes/main/tests/fixtures/sym_binder.jac +47 -0
- jaclang/compiler/passes/main/tests/fixtures/type_annotation_assignment.jac +8 -0
- jaclang/compiler/passes/main/tests/test_binder_pass.py +111 -0
- jaclang/compiler/passes/main/tests/test_cfg_build_pass.py +62 -24
- jaclang/compiler/passes/main/tests/test_checker_pass.py +87 -0
- jaclang/compiler/passes/main/tests/test_pyast_gen_pass.py +13 -13
- jaclang/compiler/passes/main/tests/test_sem_def_match_pass.py +6 -6
- jaclang/compiler/passes/main/type_checker_pass.py +128 -0
- jaclang/compiler/passes/tool/doc_ir_gen_pass.py +2 -0
- jaclang/compiler/passes/tool/tests/fixtures/simple_walk_fmt.jac +3 -0
- jaclang/compiler/program.py +32 -11
- jaclang/compiler/tests/test_sr_errors.py +32 -0
- jaclang/compiler/type_system/__init__.py +1 -0
- jaclang/compiler/type_system/type_evaluator.py +421 -0
- jaclang/compiler/type_system/type_utils.py +41 -0
- jaclang/compiler/type_system/types.py +240 -0
- jaclang/compiler/unitree.py +36 -24
- jaclang/langserve/dev_engine.jac +645 -0
- jaclang/langserve/dev_server.jac +201 -0
- jaclang/langserve/engine.jac +24 -5
- jaclang/langserve/tests/server_test/test_lang_serve.py +2 -2
- jaclang/langserve/tests/test_dev_server.py +80 -0
- jaclang/langserve/tests/test_server.py +13 -0
- jaclang/runtimelib/builtin.py +28 -39
- jaclang/runtimelib/importer.py +34 -63
- jaclang/runtimelib/machine.py +48 -64
- jaclang/runtimelib/memory.py +23 -5
- jaclang/runtimelib/tests/fixtures/savable_object.jac +10 -2
- jaclang/runtimelib/utils.py +42 -6
- jaclang/tests/fixtures/edge_node_walk.jac +1 -1
- jaclang/tests/fixtures/edges_walk.jac +1 -1
- jaclang/tests/fixtures/gendot_bubble_sort.jac +1 -1
- jaclang/tests/fixtures/py_run.jac +8 -0
- jaclang/tests/fixtures/py_run.py +23 -0
- jaclang/tests/fixtures/pyfunc.py +2 -0
- jaclang/tests/fixtures/pyfunc_fmt.py +60 -0
- jaclang/tests/fixtures/pyfunc_fstr.py +25 -0
- jaclang/tests/fixtures/pyfunc_kwesc.py +33 -0
- jaclang/tests/fixtures/python_run_test.py +19 -0
- jaclang/tests/test_cli.py +107 -0
- jaclang/tests/test_language.py +106 -5
- jaclang/utils/lang_tools.py +6 -3
- jaclang/utils/module_resolver.py +90 -0
- jaclang/utils/symtable_test_helpers.py +125 -0
- jaclang/utils/test.py +3 -4
- jaclang/vendor/interegular/__init__.py +34 -0
- jaclang/vendor/interegular/comparator.py +163 -0
- jaclang/vendor/interegular/fsm.py +1015 -0
- jaclang/vendor/interegular/patterns.py +732 -0
- jaclang/vendor/interegular/py.typed +0 -0
- jaclang/vendor/interegular/utils/__init__.py +15 -0
- jaclang/vendor/interegular/utils/simple_parser.py +165 -0
- jaclang/vendor/interegular-0.3.3.dist-info/INSTALLER +1 -0
- jaclang/vendor/interegular-0.3.3.dist-info/LICENSE.txt +21 -0
- jaclang/vendor/interegular-0.3.3.dist-info/METADATA +64 -0
- jaclang/vendor/interegular-0.3.3.dist-info/RECORD +20 -0
- jaclang/vendor/interegular-0.3.3.dist-info/REQUESTED +0 -0
- jaclang/vendor/interegular-0.3.3.dist-info/WHEEL +5 -0
- jaclang/vendor/interegular-0.3.3.dist-info/top_level.txt +1 -0
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/METADATA +2 -1
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/RECORD +88 -43
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/WHEEL +0 -0
- {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,1015 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Finite state machine library, extracted from `greenery.fsm` and adapted by MegaIng
|
|
3
|
+
"""
|
|
4
|
+
from _collections import deque
|
|
5
|
+
from collections import defaultdict
|
|
6
|
+
from functools import total_ordering
|
|
7
|
+
from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable
|
|
8
|
+
|
|
9
|
+
from interegular.utils import soft_repr
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class _Marker(BaseException):
|
|
13
|
+
pass
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@total_ordering
|
|
17
|
+
class _AnythingElseCls:
|
|
18
|
+
"""
|
|
19
|
+
This is a surrogate symbol which you can use in your finite state machines
|
|
20
|
+
to represent "any symbol not in the official alphabet". For example, if your
|
|
21
|
+
state machine's alphabet is {"a", "b", "c", "d", fsm.anything_else}, then
|
|
22
|
+
you can pass "e" in as a symbol and it will be converted to
|
|
23
|
+
fsm.anything_else, then follow the appropriate transition.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __str__(self):
|
|
27
|
+
return "anything_else"
|
|
28
|
+
|
|
29
|
+
def __repr__(self):
|
|
30
|
+
return "anything_else"
|
|
31
|
+
|
|
32
|
+
def __lt__(self, other):
|
|
33
|
+
return False
|
|
34
|
+
|
|
35
|
+
def __eq__(self, other):
|
|
36
|
+
return self is other
|
|
37
|
+
|
|
38
|
+
def __hash__(self):
|
|
39
|
+
return hash(id(self))
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# We use a class instance because that gives us control over how the special
|
|
43
|
+
# value gets serialised. Otherwise this would just be `object()`.
|
|
44
|
+
anything_else = _AnythingElseCls()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]):
|
|
48
|
+
out = []
|
|
49
|
+
current_range = []
|
|
50
|
+
for c in sorted(chars):
|
|
51
|
+
if c is not anything_else and current_range and ord(current_range[-1]) + 1 == ord(c):
|
|
52
|
+
current_range.append(c)
|
|
53
|
+
continue
|
|
54
|
+
if len(current_range) >= 2:
|
|
55
|
+
out.append(f"{soft_repr(current_range[0])}-{soft_repr(current_range[-1])}")
|
|
56
|
+
else:
|
|
57
|
+
out.extend(map(soft_repr, current_range))
|
|
58
|
+
current_range = [c]
|
|
59
|
+
if len(current_range) >= 2:
|
|
60
|
+
out.append(f"{soft_repr(current_range[0])}-{soft_repr(current_range[-1])}")
|
|
61
|
+
else:
|
|
62
|
+
out.extend(map(soft_repr, current_range))
|
|
63
|
+
return ','.join(out)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
State = NewType("State", int)
|
|
67
|
+
TransitionKey = NewType("TransitionKey", int)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Alphabet(Mapping[Any, TransitionKey]):
|
|
71
|
+
@property
|
|
72
|
+
def by_transition(self):
|
|
73
|
+
return self._by_transition
|
|
74
|
+
|
|
75
|
+
def __str__(self):
|
|
76
|
+
out = []
|
|
77
|
+
width = 0
|
|
78
|
+
for tk, symbols in sorted(self._by_transition.items()):
|
|
79
|
+
out.append((nice_char_group(symbols), str(tk)))
|
|
80
|
+
if len(out[-1][0]) > width:
|
|
81
|
+
width = len(out[-1][0])
|
|
82
|
+
return '\n'.join(f"{a:{width}} | {b}" for a, b in out)
|
|
83
|
+
|
|
84
|
+
def __repr__(self):
|
|
85
|
+
return f"{type(self).__name__}({self._symbol_mapping!r})"
|
|
86
|
+
|
|
87
|
+
def __len__(self) -> int:
|
|
88
|
+
return len(self._symbol_mapping)
|
|
89
|
+
|
|
90
|
+
def __iter__(self):
|
|
91
|
+
return iter(self._symbol_mapping)
|
|
92
|
+
|
|
93
|
+
def __init__(self, symbol_mapping: Dict[Union[str, _AnythingElseCls], TransitionKey]):
|
|
94
|
+
self._symbol_mapping = symbol_mapping
|
|
95
|
+
by_transition = defaultdict(list)
|
|
96
|
+
for s, t in self._symbol_mapping.items():
|
|
97
|
+
by_transition[t].append(s)
|
|
98
|
+
self._by_transition = dict(by_transition)
|
|
99
|
+
|
|
100
|
+
def __getitem__(self, item):
|
|
101
|
+
if item not in self._symbol_mapping:
|
|
102
|
+
if anything_else in self._symbol_mapping:
|
|
103
|
+
return self._symbol_mapping[anything_else]
|
|
104
|
+
else:
|
|
105
|
+
return None
|
|
106
|
+
else:
|
|
107
|
+
return self._symbol_mapping[item]
|
|
108
|
+
|
|
109
|
+
def __contains__(self, item):
|
|
110
|
+
return item in self._symbol_mapping
|
|
111
|
+
|
|
112
|
+
def union(*alphabets: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[TransitionKey, TransitionKey], ...]]':
|
|
113
|
+
all_symbols = frozenset().union(*(a._symbol_mapping.keys() for a in alphabets))
|
|
114
|
+
symbol_to_keys = {symbol: tuple(a[symbol] for a in alphabets) for symbol in all_symbols}
|
|
115
|
+
keys_to_symbols = defaultdict(list)
|
|
116
|
+
for symbol, keys in symbol_to_keys.items():
|
|
117
|
+
keys_to_symbols[keys].append(symbol)
|
|
118
|
+
keys_to_key = {k: i for i, k in enumerate(keys_to_symbols)}
|
|
119
|
+
result = Alphabet({symbol: keys_to_key[keys]
|
|
120
|
+
for keys, symbols in keys_to_symbols.items()
|
|
121
|
+
for symbol in symbols})
|
|
122
|
+
new_to_old_mappings = [{} for _ in alphabets]
|
|
123
|
+
for keys, new_key in keys_to_key.items():
|
|
124
|
+
for old_key, new_to_old in zip(keys, new_to_old_mappings):
|
|
125
|
+
new_to_old[new_key] = old_key
|
|
126
|
+
return result, tuple(new_to_old_mappings)
|
|
127
|
+
|
|
128
|
+
@classmethod
|
|
129
|
+
def from_groups(cls, *groups):
|
|
130
|
+
return Alphabet({s: TransitionKey(i) for i, group in enumerate(groups) for s in group})
|
|
131
|
+
|
|
132
|
+
def intersect(self, other: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[TransitionKey, TransitionKey], ...]]':
|
|
133
|
+
all_symbols = frozenset(self._symbol_mapping).intersection(other._symbol_mapping)
|
|
134
|
+
symbol_to_keys = {symbol: tuple(a[symbol] for a in (self, other)) for symbol in all_symbols}
|
|
135
|
+
keys_to_symbols = defaultdict(list)
|
|
136
|
+
for symbol, keys in symbol_to_keys.items():
|
|
137
|
+
keys_to_symbols[keys].append(symbol)
|
|
138
|
+
keys_to_key = {k: i for i, k in enumerate(keys_to_symbols)}
|
|
139
|
+
result = Alphabet({symbol: keys_to_key[keys]
|
|
140
|
+
for keys, symbols in keys_to_symbols.items()
|
|
141
|
+
for symbol in symbols})
|
|
142
|
+
old_to_new_mappings = [defaultdict(list) for _ in (self, other)]
|
|
143
|
+
new_to_old_mappings = [{} for _ in (self, other)]
|
|
144
|
+
for keys, new_key in keys_to_key.items():
|
|
145
|
+
for old_key, old_to_new, new_to_old in zip(keys, old_to_new_mappings, new_to_old_mappings):
|
|
146
|
+
old_to_new[old_key].append(new_key)
|
|
147
|
+
new_to_old[new_key] = old_key
|
|
148
|
+
return result, tuple(new_to_old_mappings)
|
|
149
|
+
|
|
150
|
+
def copy(self):
|
|
151
|
+
return Alphabet(self._symbol_mapping.copy())
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class OblivionError(Exception):
|
|
155
|
+
"""
|
|
156
|
+
This exception is thrown while `crawl()`ing an FSM if we transition to the
|
|
157
|
+
oblivion state. For example while crawling two FSMs in parallel we may
|
|
158
|
+
transition to the oblivion state of both FSMs at once. This warrants an
|
|
159
|
+
out-of-bound signal which will reduce the complexity of the new FSM's map.
|
|
160
|
+
"""
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class FSM:
|
|
165
|
+
"""
|
|
166
|
+
A Finite State Machine or FSM has an alphabet and a set of states. At any
|
|
167
|
+
given moment, the FSM is in one state. When passed a symbol from the
|
|
168
|
+
alphabet, the FSM jumps to another state (or possibly the same state).
|
|
169
|
+
A map (Python dictionary) indicates where to jump.
|
|
170
|
+
One state is nominated as a starting state. Zero or more states are
|
|
171
|
+
nominated as final states. If, after consuming a string of symbols,
|
|
172
|
+
the FSM is in a final state, then it is said to "accept" the string.
|
|
173
|
+
This class also has some pretty powerful methods which allow FSMs to
|
|
174
|
+
be concatenated, alternated between, multiplied, looped (Kleene star
|
|
175
|
+
closure), intersected, and simplified.
|
|
176
|
+
The majority of these methods are available using operator overloads.
|
|
177
|
+
"""
|
|
178
|
+
alphabet: Alphabet
|
|
179
|
+
initial: State
|
|
180
|
+
states: Set[State]
|
|
181
|
+
finals: Set[State]
|
|
182
|
+
map: Dict[State, Dict[TransitionKey, State]]
|
|
183
|
+
|
|
184
|
+
def __setattr__(self, name, value):
|
|
185
|
+
"""Immutability prevents some potential problems."""
|
|
186
|
+
raise Exception("This object is immutable.")
|
|
187
|
+
|
|
188
|
+
def __init__(self, alphabet: Alphabet, states, initial, finals, map, *, __no_validation__=False):
|
|
189
|
+
"""
|
|
190
|
+
`alphabet` is an iterable of symbols the FSM can be fed.
|
|
191
|
+
`states` is the set of states for the FSM
|
|
192
|
+
`initial` is the initial state
|
|
193
|
+
`finals` is the set of accepting states
|
|
194
|
+
`map` may be sparse (i.e. it may omit transitions). In the case of omitted
|
|
195
|
+
transitions, a non-final "oblivion" state is simulated.
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
if not __no_validation__:
|
|
199
|
+
# Validation. Thanks to immutability, this only needs to be carried out once.
|
|
200
|
+
if not isinstance(alphabet, Alphabet):
|
|
201
|
+
raise TypeError("Expected an Alphabet instance")
|
|
202
|
+
if not initial in states:
|
|
203
|
+
raise Exception("Initial state " + repr(initial) + " must be one of " + repr(states))
|
|
204
|
+
if not finals.issubset(states):
|
|
205
|
+
raise Exception("Final states " + repr(finals) + " must be a subset of " + repr(states))
|
|
206
|
+
for state in map.keys():
|
|
207
|
+
for symbol in map[state]:
|
|
208
|
+
if not map[state][symbol] in states:
|
|
209
|
+
raise Exception(
|
|
210
|
+
"Transition for state " + repr(state) + " and symbol " + repr(symbol) + " leads to " + repr(
|
|
211
|
+
map[state][symbol]) + ", which is not a state")
|
|
212
|
+
|
|
213
|
+
# Initialise the hard way due to immutability.
|
|
214
|
+
self.__dict__["alphabet"] = alphabet
|
|
215
|
+
self.__dict__["states"] = frozenset(states)
|
|
216
|
+
self.__dict__["initial"] = initial
|
|
217
|
+
self.__dict__["finals"] = frozenset(finals)
|
|
218
|
+
self.__dict__["map"] = map
|
|
219
|
+
|
|
220
|
+
def accepts(self, input: str):
|
|
221
|
+
"""
|
|
222
|
+
Test whether the present FSM accepts the supplied string (iterable of
|
|
223
|
+
symbols). Equivalently, consider `self` as a possibly-infinite set of
|
|
224
|
+
strings and test whether `string` is a member of it.
|
|
225
|
+
This is actually mainly used for unit testing purposes.
|
|
226
|
+
If `fsm.anything_else` is in your alphabet, then any symbol not in your
|
|
227
|
+
alphabet will be converted to `fsm.anything_else`.
|
|
228
|
+
"""
|
|
229
|
+
state = self.initial
|
|
230
|
+
for symbol in input:
|
|
231
|
+
if anything_else in self.alphabet and not symbol in self.alphabet:
|
|
232
|
+
symbol = anything_else
|
|
233
|
+
transition = self.alphabet[symbol]
|
|
234
|
+
|
|
235
|
+
# Missing transition = transition to dead state
|
|
236
|
+
if not (state in self.map and transition in self.map[state]):
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
state = self.map[state][transition]
|
|
240
|
+
return state in self.finals
|
|
241
|
+
|
|
242
|
+
def __contains__(self, string):
|
|
243
|
+
"""
|
|
244
|
+
This lets you use the syntax `"a" in fsm1` to see whether the string "a"
|
|
245
|
+
is in the set of strings accepted by `fsm1`.
|
|
246
|
+
"""
|
|
247
|
+
return self.accepts(string)
|
|
248
|
+
|
|
249
|
+
def reduce(self):
|
|
250
|
+
"""
|
|
251
|
+
A result by Brzozowski (1963) shows that a minimal finite state machine
|
|
252
|
+
equivalent to the original can be obtained by reversing the original
|
|
253
|
+
twice.
|
|
254
|
+
"""
|
|
255
|
+
return self.reversed().reversed()
|
|
256
|
+
|
|
257
|
+
def __repr__(self):
|
|
258
|
+
string = "fsm("
|
|
259
|
+
string += "alphabet = " + repr(self.alphabet)
|
|
260
|
+
string += ", states = " + repr(self.states)
|
|
261
|
+
string += ", initial = " + repr(self.initial)
|
|
262
|
+
string += ", finals = " + repr(self.finals)
|
|
263
|
+
string += ", map = " + repr(self.map)
|
|
264
|
+
string += ")"
|
|
265
|
+
return string
|
|
266
|
+
|
|
267
|
+
def __str__(self):
|
|
268
|
+
rows = []
|
|
269
|
+
|
|
270
|
+
# top row
|
|
271
|
+
row = ["", "name", "final?"]
|
|
272
|
+
# TODO maybe rework this to show transition groups instead of individual symbols
|
|
273
|
+
row.extend(soft_repr(symbol) for symbol in sorted(self.alphabet))
|
|
274
|
+
rows.append(row)
|
|
275
|
+
|
|
276
|
+
# other rows
|
|
277
|
+
for state in self.states:
|
|
278
|
+
row = []
|
|
279
|
+
if state == self.initial:
|
|
280
|
+
row.append("*")
|
|
281
|
+
else:
|
|
282
|
+
row.append("")
|
|
283
|
+
row.append(str(state))
|
|
284
|
+
if state in self.finals:
|
|
285
|
+
row.append("True")
|
|
286
|
+
else:
|
|
287
|
+
row.append("False")
|
|
288
|
+
for symbol, transition in sorted(self.alphabet.items()):
|
|
289
|
+
if state in self.map and transition in self.map[state]:
|
|
290
|
+
row.append(str(self.map[state][transition]))
|
|
291
|
+
else:
|
|
292
|
+
row.append("")
|
|
293
|
+
rows.append(row)
|
|
294
|
+
|
|
295
|
+
# column widths
|
|
296
|
+
colwidths = []
|
|
297
|
+
for x in range(len(rows[0])):
|
|
298
|
+
colwidths.append(max(len(str(rows[y][x])) for y in range(len(rows))) + 1)
|
|
299
|
+
|
|
300
|
+
# apply padding
|
|
301
|
+
for y in range(len(rows)):
|
|
302
|
+
for x in range(len(rows[y])):
|
|
303
|
+
rows[y][x] = rows[y][x].ljust(colwidths[x])
|
|
304
|
+
|
|
305
|
+
# horizontal line
|
|
306
|
+
rows.insert(1, ["-" * colwidth for colwidth in colwidths])
|
|
307
|
+
|
|
308
|
+
return "".join("".join(row) + "\n" for row in rows)
|
|
309
|
+
|
|
310
|
+
def concatenate(*fsms):
|
|
311
|
+
"""
|
|
312
|
+
Concatenate arbitrarily many finite state machines together.
|
|
313
|
+
"""
|
|
314
|
+
if len(fsms) == 0:
|
|
315
|
+
return epsilon(Alphabet({}))
|
|
316
|
+
alphabet, new_to_old = Alphabet.union(*[fsm.alphabet for fsm in fsms])
|
|
317
|
+
last_index, last = len(fsms) - 1, fsms[-1]
|
|
318
|
+
|
|
319
|
+
def connect_all(i, substate):
|
|
320
|
+
"""
|
|
321
|
+
Take a state in the numbered FSM and return a set containing it, plus
|
|
322
|
+
(if it's final) the first state from the next FSM, plus (if that's
|
|
323
|
+
final) the first state from the next but one FSM, plus...
|
|
324
|
+
"""
|
|
325
|
+
result = {(i, substate)}
|
|
326
|
+
while i < last_index and substate in fsms[i].finals:
|
|
327
|
+
i += 1
|
|
328
|
+
substate = fsms[i].initial
|
|
329
|
+
result.add((i, substate))
|
|
330
|
+
return result
|
|
331
|
+
|
|
332
|
+
# Use a superset containing states from all FSMs at once.
|
|
333
|
+
# We start at the start of the first FSM. If this state is final in the
|
|
334
|
+
# first FSM, then we are also at the start of the second FSM. And so on.
|
|
335
|
+
initial = set()
|
|
336
|
+
if len(fsms) > 0:
|
|
337
|
+
initial.update(connect_all(0, fsms[0].initial))
|
|
338
|
+
initial = frozenset(initial)
|
|
339
|
+
|
|
340
|
+
def final(state):
|
|
341
|
+
"""If you're in a final state of the final FSM, it's final"""
|
|
342
|
+
for (i, substate) in state:
|
|
343
|
+
if i == last_index and substate in last.finals:
|
|
344
|
+
return True
|
|
345
|
+
return False
|
|
346
|
+
|
|
347
|
+
def follow(current, new_transition):
|
|
348
|
+
"""
|
|
349
|
+
Follow the collection of states through all FSMs at once, jumping to the
|
|
350
|
+
next FSM if we reach the end of the current one
|
|
351
|
+
TODO: improve all follow() implementations to allow for dead metastates?
|
|
352
|
+
"""
|
|
353
|
+
next = set()
|
|
354
|
+
for (i, substate) in current:
|
|
355
|
+
fsm = fsms[i]
|
|
356
|
+
if substate in fsm.map and new_to_old[i][new_transition] in fsm.map[substate]:
|
|
357
|
+
next.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]]))
|
|
358
|
+
if not next:
|
|
359
|
+
raise OblivionError
|
|
360
|
+
return frozenset(next)
|
|
361
|
+
|
|
362
|
+
return crawl(alphabet, initial, final, follow)
|
|
363
|
+
|
|
364
|
+
def __add__(self, other):
|
|
365
|
+
"""
|
|
366
|
+
Concatenate two finite state machines together.
|
|
367
|
+
For example, if self accepts "0*" and other accepts "1+(0|1)",
|
|
368
|
+
will return a finite state machine accepting "0*1+(0|1)".
|
|
369
|
+
Accomplished by effectively following non-deterministically.
|
|
370
|
+
Call using "fsm3 = fsm1 + fsm2"
|
|
371
|
+
"""
|
|
372
|
+
return self.concatenate(other)
|
|
373
|
+
|
|
374
|
+
def star(self):
|
|
375
|
+
"""
|
|
376
|
+
If the present FSM accepts X, returns an FSM accepting X* (i.e. 0 or
|
|
377
|
+
more Xes). This is NOT as simple as naively connecting the final states
|
|
378
|
+
back to the initial state: see (b*ab)* for example.
|
|
379
|
+
"""
|
|
380
|
+
alphabet = self.alphabet
|
|
381
|
+
|
|
382
|
+
initial = {self.initial}
|
|
383
|
+
|
|
384
|
+
def follow(state, transition):
|
|
385
|
+
next = set()
|
|
386
|
+
for substate in state:
|
|
387
|
+
if substate in self.map and transition in self.map[substate]:
|
|
388
|
+
next.add(self.map[substate][transition])
|
|
389
|
+
|
|
390
|
+
# If one of our substates is final, then we can also consider
|
|
391
|
+
# transitions from the initial state of the original FSM.
|
|
392
|
+
if substate in self.finals \
|
|
393
|
+
and self.initial in self.map \
|
|
394
|
+
and transition in self.map[self.initial]:
|
|
395
|
+
next.add(self.map[self.initial][transition])
|
|
396
|
+
|
|
397
|
+
if not next:
|
|
398
|
+
raise OblivionError
|
|
399
|
+
|
|
400
|
+
return frozenset(next)
|
|
401
|
+
|
|
402
|
+
def final(state):
|
|
403
|
+
return any(substate in self.finals for substate in state)
|
|
404
|
+
|
|
405
|
+
base = crawl(alphabet, initial, final, follow)
|
|
406
|
+
base.__dict__['finals'] = base.finals | {base.initial}
|
|
407
|
+
return base
|
|
408
|
+
|
|
409
|
+
def times(self, multiplier):
|
|
410
|
+
"""
|
|
411
|
+
Given an FSM and a multiplier, return the multiplied FSM.
|
|
412
|
+
"""
|
|
413
|
+
if multiplier < 0:
|
|
414
|
+
raise Exception("Can't multiply an FSM by " + repr(multiplier))
|
|
415
|
+
|
|
416
|
+
alphabet = self.alphabet
|
|
417
|
+
|
|
418
|
+
# metastate is a set of iterations+states
|
|
419
|
+
initial = {(self.initial, 0)}
|
|
420
|
+
|
|
421
|
+
def final(state):
|
|
422
|
+
"""If the initial state is final then multiplying doesn't alter that"""
|
|
423
|
+
for (substate, iteration) in state:
|
|
424
|
+
if substate == self.initial \
|
|
425
|
+
and (self.initial in self.finals or iteration == multiplier):
|
|
426
|
+
return True
|
|
427
|
+
return False
|
|
428
|
+
|
|
429
|
+
def follow(current, transition):
|
|
430
|
+
next = []
|
|
431
|
+
for (substate, iteration) in current:
|
|
432
|
+
if iteration < multiplier \
|
|
433
|
+
and substate in self.map \
|
|
434
|
+
and transition in self.map[substate]:
|
|
435
|
+
next.append((self.map[substate][transition], iteration))
|
|
436
|
+
# final of self? merge with initial on next iteration
|
|
437
|
+
if self.map[substate][transition] in self.finals:
|
|
438
|
+
next.append((self.initial, iteration + 1))
|
|
439
|
+
if len(next) == 0:
|
|
440
|
+
raise OblivionError
|
|
441
|
+
return frozenset(next)
|
|
442
|
+
|
|
443
|
+
return crawl(alphabet, initial, final, follow)
|
|
444
|
+
|
|
445
|
+
def __mul__(self, multiplier):
|
|
446
|
+
"""
|
|
447
|
+
Given an FSM and a multiplier, return the multiplied FSM.
|
|
448
|
+
"""
|
|
449
|
+
return self.times(multiplier)
|
|
450
|
+
|
|
451
|
+
def union(*fsms):
|
|
452
|
+
"""
|
|
453
|
+
Treat `fsms` as a collection of arbitrary FSMs and return the union FSM.
|
|
454
|
+
Can be used as `fsm1.union(fsm2, ...)` or `fsm.union(fsm1, ...)`. `fsms`
|
|
455
|
+
may be empty.
|
|
456
|
+
"""
|
|
457
|
+
return parallel(fsms, any)
|
|
458
|
+
|
|
459
|
+
def __or__(self, other):
|
|
460
|
+
"""
|
|
461
|
+
Alternation.
|
|
462
|
+
Return a finite state machine which accepts any sequence of symbols
|
|
463
|
+
that is accepted by either self or other. Note that the set of strings
|
|
464
|
+
recognised by the two FSMs undergoes a set union.
|
|
465
|
+
Call using "fsm3 = fsm1 | fsm2"
|
|
466
|
+
"""
|
|
467
|
+
return self.union(other)
|
|
468
|
+
|
|
469
|
+
def intersection(*fsms):
|
|
470
|
+
"""
|
|
471
|
+
Intersection.
|
|
472
|
+
Take FSMs and AND them together. That is, return an FSM which
|
|
473
|
+
accepts any sequence of symbols that is accepted by both of the original
|
|
474
|
+
FSMs. Note that the set of strings recognised by the two FSMs undergoes
|
|
475
|
+
a set intersection operation.
|
|
476
|
+
Call using "fsm3 = fsm1 & fsm2"
|
|
477
|
+
"""
|
|
478
|
+
return parallel(fsms, all)
|
|
479
|
+
|
|
480
|
+
def __and__(self, other):
|
|
481
|
+
"""
|
|
482
|
+
Treat the FSMs as sets of strings and return the intersection of those
|
|
483
|
+
sets in the form of a new FSM. `fsm1.intersection(fsm2, ...)` or
|
|
484
|
+
`fsm.intersection(fsm1, ...)` are acceptable.
|
|
485
|
+
"""
|
|
486
|
+
return self.intersection(other)
|
|
487
|
+
|
|
488
|
+
def symmetric_difference(*fsms):
|
|
489
|
+
"""
|
|
490
|
+
Treat `fsms` as a collection of sets of strings and compute the symmetric
|
|
491
|
+
difference of them all. The python set method only allows two sets to be
|
|
492
|
+
operated on at once, but we go the extra mile since it's not too hard.
|
|
493
|
+
"""
|
|
494
|
+
return parallel(fsms, lambda accepts: (accepts.count(True) % 2) == 1)
|
|
495
|
+
|
|
496
|
+
def __xor__(self, other):
|
|
497
|
+
"""
|
|
498
|
+
Symmetric difference. Returns an FSM which recognises only the strings
|
|
499
|
+
recognised by `self` or `other` but not both.
|
|
500
|
+
"""
|
|
501
|
+
return self.symmetric_difference(other)
|
|
502
|
+
|
|
503
|
+
def everythingbut(self):
|
|
504
|
+
"""
|
|
505
|
+
Return a finite state machine which will accept any string NOT
|
|
506
|
+
accepted by self, and will not accept any string accepted by self.
|
|
507
|
+
This is more complicated if there are missing transitions, because the
|
|
508
|
+
missing "dead" state must now be reified.
|
|
509
|
+
"""
|
|
510
|
+
alphabet = self.alphabet
|
|
511
|
+
|
|
512
|
+
initial = {0: self.initial}
|
|
513
|
+
|
|
514
|
+
def follow(current, transition):
|
|
515
|
+
next = {}
|
|
516
|
+
if 0 in current and current[0] in self.map and transition in self.map[current[0]]:
|
|
517
|
+
next[0] = self.map[current[0]][transition]
|
|
518
|
+
return next
|
|
519
|
+
|
|
520
|
+
# state is final unless the original was
|
|
521
|
+
def final(state):
|
|
522
|
+
return not (0 in state and state[0] in self.finals)
|
|
523
|
+
|
|
524
|
+
return crawl(alphabet, initial, final, follow)
|
|
525
|
+
|
|
526
|
+
def isdisjoint(self, other: 'FSM') -> bool:
|
|
527
|
+
alphabet, new_to_old = self.alphabet.intersect(other.alphabet)
|
|
528
|
+
initial = (self.initial, other.initial)
|
|
529
|
+
|
|
530
|
+
# dedicated function accepts a "superset" and returns the next "superset"
|
|
531
|
+
# obtained by following this transition in the new FSM
|
|
532
|
+
def follow(current, transition):
|
|
533
|
+
ss, os = current
|
|
534
|
+
if ss in self.map and new_to_old[0][transition] in self.map[ss]:
|
|
535
|
+
sn = self.map[ss][new_to_old[0][transition]]
|
|
536
|
+
else:
|
|
537
|
+
sn = None
|
|
538
|
+
if os in other.map and new_to_old[1][transition] in other.map[os]:
|
|
539
|
+
on = other.map[os][new_to_old[1][transition]]
|
|
540
|
+
else:
|
|
541
|
+
on = None
|
|
542
|
+
if not sn or not on:
|
|
543
|
+
raise OblivionError
|
|
544
|
+
return sn, on
|
|
545
|
+
|
|
546
|
+
def final(state):
|
|
547
|
+
if state[0] in self.finals and state[1] in other.finals:
|
|
548
|
+
# We found a situation where we are in an final state in both fsm
|
|
549
|
+
raise _Marker
|
|
550
|
+
|
|
551
|
+
try:
|
|
552
|
+
crawl_hash_no_result(alphabet, initial, final, follow)
|
|
553
|
+
except _Marker:
|
|
554
|
+
return False
|
|
555
|
+
else:
|
|
556
|
+
return True
|
|
557
|
+
|
|
558
|
+
def reversed(self):
|
|
559
|
+
"""
|
|
560
|
+
Return a new FSM such that for every string that self accepts (e.g.
|
|
561
|
+
"beer", the new FSM accepts the reversed string ("reeb").
|
|
562
|
+
"""
|
|
563
|
+
alphabet = self.alphabet
|
|
564
|
+
|
|
565
|
+
# Start from a composite "state-set" consisting of all final states.
|
|
566
|
+
# If there are no final states, this set is empty and we'll find that
|
|
567
|
+
# no other states get generated.
|
|
568
|
+
initial = frozenset(self.finals)
|
|
569
|
+
|
|
570
|
+
# Speed up follow by pre-computing reverse-transition map
|
|
571
|
+
reverse_map = {}
|
|
572
|
+
for state, transition_map in self.map.items():
|
|
573
|
+
for transition, next_state in transition_map.items():
|
|
574
|
+
if (next_state, transition) not in reverse_map:
|
|
575
|
+
reverse_map[(next_state, transition)] = set()
|
|
576
|
+
reverse_map[(next_state, transition)].add(state)
|
|
577
|
+
|
|
578
|
+
# Find every possible way to reach the current state-set
|
|
579
|
+
# using this symbol.
|
|
580
|
+
def follow(current, transition):
|
|
581
|
+
next_states = set()
|
|
582
|
+
for state in current:
|
|
583
|
+
next_states.update(reverse_map.get((state, transition), set()))
|
|
584
|
+
if not next_states:
|
|
585
|
+
raise OblivionError
|
|
586
|
+
return frozenset(next_states)
|
|
587
|
+
|
|
588
|
+
# A state-set is final if the initial state is in it.
|
|
589
|
+
def final(state):
|
|
590
|
+
return self.initial in state
|
|
591
|
+
|
|
592
|
+
# Man, crawl() is the best!
|
|
593
|
+
return crawl(alphabet, initial, final, follow)
|
|
594
|
+
|
|
595
|
+
# Do not reduce() the result, since reduce() calls us in turn
|
|
596
|
+
|
|
597
|
+
def __reversed__(self):
|
|
598
|
+
"""
|
|
599
|
+
Return a new FSM such that for every string that self accepts (e.g.
|
|
600
|
+
"beer", the new FSM accepts the reversed string ("reeb").
|
|
601
|
+
"""
|
|
602
|
+
return self.reversed()
|
|
603
|
+
|
|
604
|
+
def islive(self, state):
|
|
605
|
+
"""A state is "live" if a final state can be reached from it."""
|
|
606
|
+
seen = {state}
|
|
607
|
+
reachable = [state]
|
|
608
|
+
i = 0
|
|
609
|
+
while i < len(reachable):
|
|
610
|
+
current = reachable[i]
|
|
611
|
+
if current in self.finals:
|
|
612
|
+
return True
|
|
613
|
+
if current in self.map:
|
|
614
|
+
for transition in self.map[current]:
|
|
615
|
+
next = self.map[current][transition]
|
|
616
|
+
if next not in seen:
|
|
617
|
+
reachable.append(next)
|
|
618
|
+
seen.add(next)
|
|
619
|
+
i += 1
|
|
620
|
+
return False
|
|
621
|
+
|
|
622
|
+
def empty(self):
|
|
623
|
+
"""
|
|
624
|
+
An FSM is empty if it recognises no strings. An FSM may be arbitrarily
|
|
625
|
+
complicated and have arbitrarily many final states while still recognising
|
|
626
|
+
no strings because those final states may all be inaccessible from the
|
|
627
|
+
initial state. Equally, an FSM may be non-empty despite having an empty
|
|
628
|
+
alphabet if the initial state is final.
|
|
629
|
+
"""
|
|
630
|
+
return not self.islive(self.initial)
|
|
631
|
+
|
|
632
|
+
def strings(self, max_iterations=None):
|
|
633
|
+
"""
|
|
634
|
+
Generate strings (lists of symbols) that this FSM accepts. Since there may
|
|
635
|
+
be infinitely many of these we use a generator instead of constructing a
|
|
636
|
+
static list. Strings will be sorted in order of length and then lexically.
|
|
637
|
+
This procedure uses arbitrary amounts of memory but is very fast. There
|
|
638
|
+
may be more efficient ways to do this, that I haven't investigated yet.
|
|
639
|
+
You can use this in list comprehensions.
|
|
640
|
+
|
|
641
|
+
`max_iterations` controls how many attempts will be made to generate strings.
|
|
642
|
+
For complex FSM it can take minutes to actually find something.
|
|
643
|
+
If this isn't acceptable, provide a value to `max_iterations`.
|
|
644
|
+
The approximate time complexity is
|
|
645
|
+
0.15 seconds per 10_000 iterations per 10 symbols
|
|
646
|
+
"""
|
|
647
|
+
|
|
648
|
+
# Many FSMs have "dead states". Once you reach a dead state, you can no
|
|
649
|
+
# longer reach a final state. Since many strings may end up here, it's
|
|
650
|
+
# advantageous to constrain our search to live states only.
|
|
651
|
+
livestates = set(state for state in self.states if self.islive(state))
|
|
652
|
+
|
|
653
|
+
# We store a list of tuples. Each tuple consists of an input string and the
|
|
654
|
+
# state that this input string leads to. This means we don't have to run the
|
|
655
|
+
# state machine from the very beginning every time we want to check a new
|
|
656
|
+
# string.
|
|
657
|
+
# We use a deque instead of a list since we append to the end and pop from
|
|
658
|
+
# the beginning
|
|
659
|
+
strings = deque()
|
|
660
|
+
|
|
661
|
+
# Initial entry (or possibly not, in which case this is a short one)
|
|
662
|
+
cstate = self.initial
|
|
663
|
+
cstring = []
|
|
664
|
+
if cstate in livestates:
|
|
665
|
+
if cstate in self.finals:
|
|
666
|
+
yield cstring
|
|
667
|
+
strings.append((cstring, cstate))
|
|
668
|
+
|
|
669
|
+
# Fixed point calculation
|
|
670
|
+
i = 0
|
|
671
|
+
while strings:
|
|
672
|
+
(cstring, cstate) = strings.popleft()
|
|
673
|
+
i += 1
|
|
674
|
+
if cstate in self.map:
|
|
675
|
+
for transition in sorted(self.map[cstate]):
|
|
676
|
+
nstate = self.map[cstate][transition]
|
|
677
|
+
if nstate in livestates:
|
|
678
|
+
for symbol in sorted(self.alphabet.by_transition[transition]):
|
|
679
|
+
nstring = cstring + [symbol]
|
|
680
|
+
if nstate in self.finals:
|
|
681
|
+
yield nstring
|
|
682
|
+
strings.append((nstring, nstate))
|
|
683
|
+
if max_iterations is not None and i > max_iterations:
|
|
684
|
+
raise ValueError(f"Couldn't find an example within {max_iterations} iterations")
|
|
685
|
+
|
|
686
|
+
def __iter__(self):
|
|
687
|
+
"""
|
|
688
|
+
This allows you to do `for string in fsm1` as a list comprehension!
|
|
689
|
+
"""
|
|
690
|
+
return self.strings()
|
|
691
|
+
|
|
692
|
+
def equivalent(self, other):
|
|
693
|
+
"""
|
|
694
|
+
Two FSMs are considered equivalent if they recognise the same strings.
|
|
695
|
+
Or, to put it another way, if their symmetric difference recognises no
|
|
696
|
+
strings.
|
|
697
|
+
"""
|
|
698
|
+
return (self ^ other).empty()
|
|
699
|
+
|
|
700
|
+
def __eq__(self, other):
|
|
701
|
+
"""
|
|
702
|
+
You can use `fsm1 == fsm2` to determine whether two FSMs recognise the
|
|
703
|
+
same strings.
|
|
704
|
+
"""
|
|
705
|
+
return self.equivalent(other)
|
|
706
|
+
|
|
707
|
+
def different(self, other):
|
|
708
|
+
"""
|
|
709
|
+
Two FSMs are considered different if they have a non-empty symmetric
|
|
710
|
+
difference.
|
|
711
|
+
"""
|
|
712
|
+
return not (self ^ other).empty()
|
|
713
|
+
|
|
714
|
+
def __ne__(self, other):
|
|
715
|
+
"""
|
|
716
|
+
Use `fsm1 != fsm2` to determine whether two FSMs recognise different
|
|
717
|
+
strings.
|
|
718
|
+
"""
|
|
719
|
+
return self.different(other)
|
|
720
|
+
|
|
721
|
+
def difference(*fsms):
|
|
722
|
+
"""
|
|
723
|
+
Difference. Returns an FSM which recognises only the strings
|
|
724
|
+
recognised by the first FSM in the list, but none of the others.
|
|
725
|
+
"""
|
|
726
|
+
return parallel(fsms, lambda accepts: accepts[0] and not any(accepts[1:]))
|
|
727
|
+
|
|
728
|
+
def __sub__(self, other):
|
|
729
|
+
return self.difference(other)
|
|
730
|
+
|
|
731
|
+
def cardinality(self):
|
|
732
|
+
"""
|
|
733
|
+
Consider the FSM as a set of strings and return the cardinality of that
|
|
734
|
+
set, or raise an OverflowError if there are infinitely many
|
|
735
|
+
"""
|
|
736
|
+
num_strings = {}
|
|
737
|
+
|
|
738
|
+
def get_num_strings(state):
|
|
739
|
+
# Many FSMs have at least one oblivion state
|
|
740
|
+
if self.islive(state):
|
|
741
|
+
if state in num_strings:
|
|
742
|
+
if num_strings[state] is None: # "computing..."
|
|
743
|
+
# Recursion! There are infinitely many strings recognised
|
|
744
|
+
raise OverflowError(state)
|
|
745
|
+
return num_strings[state]
|
|
746
|
+
num_strings[state] = None # i.e. "computing..."
|
|
747
|
+
|
|
748
|
+
n = 0
|
|
749
|
+
if state in self.finals:
|
|
750
|
+
n += 1
|
|
751
|
+
if state in self.map:
|
|
752
|
+
for transition in self.map[state]:
|
|
753
|
+
n += get_num_strings(self.map[state][transition]) * len(self.alphabet.by_transition[transition])
|
|
754
|
+
num_strings[state] = n
|
|
755
|
+
|
|
756
|
+
else:
|
|
757
|
+
# Dead state
|
|
758
|
+
num_strings[state] = 0
|
|
759
|
+
|
|
760
|
+
return num_strings[state]
|
|
761
|
+
|
|
762
|
+
return get_num_strings(self.initial)
|
|
763
|
+
|
|
764
|
+
def __len__(self):
|
|
765
|
+
"""
|
|
766
|
+
Consider the FSM as a set of strings and return the cardinality of that
|
|
767
|
+
set, or raise an OverflowError if there are infinitely many
|
|
768
|
+
"""
|
|
769
|
+
return self.cardinality()
|
|
770
|
+
|
|
771
|
+
def issubset(self, other):
|
|
772
|
+
"""
|
|
773
|
+
Treat `self` and `other` as sets of strings and see if `self` is a subset
|
|
774
|
+
of `other`... `self` recognises no strings which `other` doesn't.
|
|
775
|
+
"""
|
|
776
|
+
return (self - other).empty()
|
|
777
|
+
|
|
778
|
+
def __le__(self, other):
|
|
779
|
+
"""
|
|
780
|
+
Treat `self` and `other` as sets of strings and see if `self` is a subset
|
|
781
|
+
of `other`... `self` recognises no strings which `other` doesn't.
|
|
782
|
+
"""
|
|
783
|
+
return self.issubset(other)
|
|
784
|
+
|
|
785
|
+
def ispropersubset(self, other):
|
|
786
|
+
"""
|
|
787
|
+
Treat `self` and `other` as sets of strings and see if `self` is a proper
|
|
788
|
+
subset of `other`.
|
|
789
|
+
"""
|
|
790
|
+
return self <= other and self != other
|
|
791
|
+
|
|
792
|
+
def __lt__(self, other):
|
|
793
|
+
"""
|
|
794
|
+
Treat `self` and `other` as sets of strings and see if `self` is a strict
|
|
795
|
+
subset of `other`.
|
|
796
|
+
"""
|
|
797
|
+
return self.ispropersubset(other)
|
|
798
|
+
|
|
799
|
+
def issuperset(self, other):
|
|
800
|
+
"""
|
|
801
|
+
Treat `self` and `other` as sets of strings and see if `self` is a
|
|
802
|
+
superset of `other`.
|
|
803
|
+
"""
|
|
804
|
+
return (other - self).empty()
|
|
805
|
+
|
|
806
|
+
def __ge__(self, other):
|
|
807
|
+
"""
|
|
808
|
+
Treat `self` and `other` as sets of strings and see if `self` is a
|
|
809
|
+
superset of `other`.
|
|
810
|
+
"""
|
|
811
|
+
return self.issuperset(other)
|
|
812
|
+
|
|
813
|
+
def ispropersuperset(self, other):
|
|
814
|
+
"""
|
|
815
|
+
Treat `self` and `other` as sets of strings and see if `self` is a proper
|
|
816
|
+
superset of `other`.
|
|
817
|
+
"""
|
|
818
|
+
return self >= other and self != other
|
|
819
|
+
|
|
820
|
+
def __gt__(self, other):
|
|
821
|
+
"""
|
|
822
|
+
Treat `self` and `other` as sets of strings and see if `self` is a
|
|
823
|
+
strict superset of `other`.
|
|
824
|
+
"""
|
|
825
|
+
return self.ispropersuperset(other)
|
|
826
|
+
|
|
827
|
+
def copy(self):
|
|
828
|
+
"""
|
|
829
|
+
For completeness only, since `set.copy()` also exists. FSM objects are
|
|
830
|
+
immutable, so I can see only very odd reasons to need this.
|
|
831
|
+
"""
|
|
832
|
+
return FSM(
|
|
833
|
+
alphabet=self.alphabet.copy(),
|
|
834
|
+
states=self.states.copy(),
|
|
835
|
+
initial=self.initial,
|
|
836
|
+
finals=self.finals.copy(),
|
|
837
|
+
map=self.map.copy(),
|
|
838
|
+
__no_validation__=True,
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
def derive(self, input):
|
|
842
|
+
"""
|
|
843
|
+
Compute the Brzozowski derivative of this FSM with respect to the input
|
|
844
|
+
string of symbols. <https://en.wikipedia.org/wiki/Brzozowski_derivative>
|
|
845
|
+
If any of the symbols are not members of the alphabet, that's a KeyError.
|
|
846
|
+
If you fall into oblivion, then the derivative is an FSM accepting no
|
|
847
|
+
strings.
|
|
848
|
+
"""
|
|
849
|
+
try:
|
|
850
|
+
# Consume the input string.
|
|
851
|
+
state = self.initial
|
|
852
|
+
for symbol in input:
|
|
853
|
+
if not symbol in self.alphabet:
|
|
854
|
+
if not anything_else in self.alphabet:
|
|
855
|
+
raise KeyError(symbol)
|
|
856
|
+
symbol = anything_else
|
|
857
|
+
|
|
858
|
+
# Missing transition = transition to dead state
|
|
859
|
+
if not (state in self.map and self.alphabet[symbol] in self.map[state]):
|
|
860
|
+
raise OblivionError
|
|
861
|
+
|
|
862
|
+
state = self.map[state][self.alphabet[symbol]]
|
|
863
|
+
|
|
864
|
+
# OK so now we have consumed that string, use the new location as the
|
|
865
|
+
# starting point.
|
|
866
|
+
return FSM(
|
|
867
|
+
alphabet=self.alphabet,
|
|
868
|
+
states=self.states,
|
|
869
|
+
initial=state,
|
|
870
|
+
finals=self.finals,
|
|
871
|
+
map=self.map,
|
|
872
|
+
__no_validation__=True,
|
|
873
|
+
)
|
|
874
|
+
|
|
875
|
+
except OblivionError:
|
|
876
|
+
# Fell out of the FSM. The derivative of this FSM is the empty FSM.
|
|
877
|
+
return null(self.alphabet)
|
|
878
|
+
|
|
879
|
+
|
|
880
|
+
def null(alphabet):
|
|
881
|
+
"""
|
|
882
|
+
An FSM accepting nothing (not even the empty string). This is
|
|
883
|
+
demonstrates that this is possible, and is also extremely useful
|
|
884
|
+
in some situations
|
|
885
|
+
"""
|
|
886
|
+
return FSM(
|
|
887
|
+
alphabet=alphabet,
|
|
888
|
+
states={0},
|
|
889
|
+
initial=0,
|
|
890
|
+
finals=set(),
|
|
891
|
+
map={
|
|
892
|
+
0: dict([(transition, 0) for transition in alphabet.by_transition]),
|
|
893
|
+
},
|
|
894
|
+
__no_validation__=True,
|
|
895
|
+
)
|
|
896
|
+
|
|
897
|
+
|
|
898
|
+
def epsilon(alphabet):
|
|
899
|
+
"""
|
|
900
|
+
Return an FSM matching an empty string, "", only.
|
|
901
|
+
This is very useful in many situations
|
|
902
|
+
"""
|
|
903
|
+
return FSM(
|
|
904
|
+
alphabet=alphabet,
|
|
905
|
+
states={0},
|
|
906
|
+
initial=0,
|
|
907
|
+
finals={0},
|
|
908
|
+
map={},
|
|
909
|
+
__no_validation__=True,
|
|
910
|
+
)
|
|
911
|
+
|
|
912
|
+
|
|
913
|
+
def parallel(fsms, test):
|
|
914
|
+
"""
|
|
915
|
+
Crawl several FSMs in parallel, mapping the states of a larger meta-FSM.
|
|
916
|
+
To determine whether a state in the larger FSM is final, pass all of the
|
|
917
|
+
finality statuses (e.g. [True, False, False] to `test`.
|
|
918
|
+
"""
|
|
919
|
+
alphabet, new_to_old = Alphabet.union(*[fsm.alphabet for fsm in fsms])
|
|
920
|
+
|
|
921
|
+
initial = {i: fsm.initial for (i, fsm) in enumerate(fsms)}
|
|
922
|
+
|
|
923
|
+
# dedicated function accepts a "superset" and returns the next "superset"
|
|
924
|
+
# obtained by following this transition in the new FSM
|
|
925
|
+
def follow(current, new_transition, fsm_range=tuple(enumerate(fsms))):
|
|
926
|
+
next = {}
|
|
927
|
+
for i, f in fsm_range:
|
|
928
|
+
old_transition = new_to_old[i][new_transition]
|
|
929
|
+
if i in current \
|
|
930
|
+
and current[i] in f.map \
|
|
931
|
+
and old_transition in f.map[current[i]]:
|
|
932
|
+
next[i] = f.map[current[i]][old_transition]
|
|
933
|
+
if not next:
|
|
934
|
+
raise OblivionError
|
|
935
|
+
return next
|
|
936
|
+
|
|
937
|
+
# Determine the "is final?" condition of each substate, then pass it to the
|
|
938
|
+
# test to determine finality of the overall FSM.
|
|
939
|
+
def final(state, fsm_range=tuple(enumerate(fsms))):
|
|
940
|
+
accepts = [i in state and state[i] in fsm.finals for (i, fsm) in fsm_range]
|
|
941
|
+
return test(accepts)
|
|
942
|
+
|
|
943
|
+
return crawl(alphabet, initial, final, follow)
|
|
944
|
+
|
|
945
|
+
|
|
946
|
+
def crawl_hash_no_result(alphabet, initial, final, follow):
|
|
947
|
+
unvisited = {initial}
|
|
948
|
+
visited = set()
|
|
949
|
+
|
|
950
|
+
while unvisited:
|
|
951
|
+
state = unvisited.pop()
|
|
952
|
+
visited.add(state)
|
|
953
|
+
|
|
954
|
+
# add to finals
|
|
955
|
+
final(state)
|
|
956
|
+
|
|
957
|
+
# compute map for this state
|
|
958
|
+
for transition in alphabet.by_transition:
|
|
959
|
+
try:
|
|
960
|
+
new = follow(state, transition)
|
|
961
|
+
except OblivionError:
|
|
962
|
+
# Reached an oblivion state. Don't list it.
|
|
963
|
+
continue
|
|
964
|
+
else:
|
|
965
|
+
if new not in visited:
|
|
966
|
+
unvisited.add(new)
|
|
967
|
+
|
|
968
|
+
|
|
969
|
+
def crawl(alphabet, initial, final, follow):
|
|
970
|
+
"""
|
|
971
|
+
Given the above conditions and instructions, crawl a new unknown FSM,
|
|
972
|
+
mapping its states, final states and transitions. Return the new FSM.
|
|
973
|
+
This is a pretty powerful procedure which could potentially go on
|
|
974
|
+
forever if you supply an evil version of follow().
|
|
975
|
+
"""
|
|
976
|
+
|
|
977
|
+
states = [initial]
|
|
978
|
+
finals = set()
|
|
979
|
+
map = {}
|
|
980
|
+
|
|
981
|
+
# iterate over a growing list
|
|
982
|
+
i = 0
|
|
983
|
+
while i < len(states):
|
|
984
|
+
state = states[i]
|
|
985
|
+
|
|
986
|
+
# add to finals
|
|
987
|
+
if final(state):
|
|
988
|
+
finals.add(i)
|
|
989
|
+
|
|
990
|
+
# compute map for this state
|
|
991
|
+
map[i] = {}
|
|
992
|
+
for transition in alphabet.by_transition:
|
|
993
|
+
try:
|
|
994
|
+
next = follow(state, transition)
|
|
995
|
+
except OblivionError:
|
|
996
|
+
# Reached an oblivion state. Don't list it.
|
|
997
|
+
continue
|
|
998
|
+
else:
|
|
999
|
+
try:
|
|
1000
|
+
j = states.index(next)
|
|
1001
|
+
except ValueError:
|
|
1002
|
+
j = len(states)
|
|
1003
|
+
states.append(next)
|
|
1004
|
+
map[i][transition] = j
|
|
1005
|
+
|
|
1006
|
+
i += 1
|
|
1007
|
+
|
|
1008
|
+
return FSM(
|
|
1009
|
+
alphabet=alphabet,
|
|
1010
|
+
states=range(len(states)),
|
|
1011
|
+
initial=0,
|
|
1012
|
+
finals=finals,
|
|
1013
|
+
map=map,
|
|
1014
|
+
__no_validation__=True,
|
|
1015
|
+
)
|