jaclang 0.8.4__py3-none-any.whl → 0.8.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of jaclang might be problematic. Click here for more details.

Files changed (88) hide show
  1. jaclang/cli/cli.md +1 -0
  2. jaclang/cli/cli.py +109 -37
  3. jaclang/compiler/jac.lark +3 -3
  4. jaclang/compiler/larkparse/jac_parser.py +2 -2
  5. jaclang/compiler/parser.py +14 -21
  6. jaclang/compiler/passes/main/__init__.py +5 -1
  7. jaclang/compiler/passes/main/binder_pass.py +594 -0
  8. jaclang/compiler/passes/main/cfg_build_pass.py +21 -1
  9. jaclang/compiler/passes/main/import_pass.py +8 -256
  10. jaclang/compiler/passes/main/inheritance_pass.py +10 -3
  11. jaclang/compiler/passes/main/pyast_gen_pass.py +92 -77
  12. jaclang/compiler/passes/main/pyast_load_pass.py +24 -13
  13. jaclang/compiler/passes/main/sem_def_match_pass.py +1 -1
  14. jaclang/compiler/passes/main/sym_tab_build_pass.py +4 -0
  15. jaclang/compiler/passes/main/tests/fixtures/M1.jac +3 -0
  16. jaclang/compiler/passes/main/tests/fixtures/cfg_has_var.jac +12 -0
  17. jaclang/compiler/passes/main/tests/fixtures/cfg_if_no_else.jac +11 -0
  18. jaclang/compiler/passes/main/tests/fixtures/cfg_return.jac +9 -0
  19. jaclang/compiler/passes/main/tests/fixtures/checker_imported.jac +2 -0
  20. jaclang/compiler/passes/main/tests/fixtures/checker_importer.jac +6 -0
  21. jaclang/compiler/passes/main/tests/fixtures/data_spatial_types.jac +1 -1
  22. jaclang/compiler/passes/main/tests/fixtures/import_symbol_type_infer.jac +11 -0
  23. jaclang/compiler/passes/main/tests/fixtures/infer_type_assignment.jac +5 -0
  24. jaclang/compiler/passes/main/tests/fixtures/member_access_type_inferred.jac +13 -0
  25. jaclang/compiler/passes/main/tests/fixtures/member_access_type_resolve.jac +11 -0
  26. jaclang/compiler/passes/main/tests/fixtures/sym_binder.jac +47 -0
  27. jaclang/compiler/passes/main/tests/fixtures/type_annotation_assignment.jac +8 -0
  28. jaclang/compiler/passes/main/tests/test_binder_pass.py +111 -0
  29. jaclang/compiler/passes/main/tests/test_cfg_build_pass.py +62 -24
  30. jaclang/compiler/passes/main/tests/test_checker_pass.py +87 -0
  31. jaclang/compiler/passes/main/tests/test_pyast_gen_pass.py +13 -13
  32. jaclang/compiler/passes/main/tests/test_sem_def_match_pass.py +6 -6
  33. jaclang/compiler/passes/main/type_checker_pass.py +128 -0
  34. jaclang/compiler/passes/tool/doc_ir_gen_pass.py +2 -0
  35. jaclang/compiler/passes/tool/tests/fixtures/simple_walk_fmt.jac +3 -0
  36. jaclang/compiler/program.py +32 -11
  37. jaclang/compiler/tests/test_sr_errors.py +32 -0
  38. jaclang/compiler/type_system/__init__.py +1 -0
  39. jaclang/compiler/type_system/type_evaluator.py +421 -0
  40. jaclang/compiler/type_system/type_utils.py +41 -0
  41. jaclang/compiler/type_system/types.py +240 -0
  42. jaclang/compiler/unitree.py +36 -24
  43. jaclang/langserve/dev_engine.jac +645 -0
  44. jaclang/langserve/dev_server.jac +201 -0
  45. jaclang/langserve/engine.jac +24 -5
  46. jaclang/langserve/tests/server_test/test_lang_serve.py +2 -2
  47. jaclang/langserve/tests/test_dev_server.py +80 -0
  48. jaclang/langserve/tests/test_server.py +13 -0
  49. jaclang/runtimelib/builtin.py +28 -39
  50. jaclang/runtimelib/importer.py +34 -63
  51. jaclang/runtimelib/machine.py +48 -64
  52. jaclang/runtimelib/memory.py +23 -5
  53. jaclang/runtimelib/tests/fixtures/savable_object.jac +10 -2
  54. jaclang/runtimelib/utils.py +42 -6
  55. jaclang/tests/fixtures/edge_node_walk.jac +1 -1
  56. jaclang/tests/fixtures/edges_walk.jac +1 -1
  57. jaclang/tests/fixtures/gendot_bubble_sort.jac +1 -1
  58. jaclang/tests/fixtures/py_run.jac +8 -0
  59. jaclang/tests/fixtures/py_run.py +23 -0
  60. jaclang/tests/fixtures/pyfunc.py +2 -0
  61. jaclang/tests/fixtures/pyfunc_fmt.py +60 -0
  62. jaclang/tests/fixtures/pyfunc_fstr.py +25 -0
  63. jaclang/tests/fixtures/pyfunc_kwesc.py +33 -0
  64. jaclang/tests/fixtures/python_run_test.py +19 -0
  65. jaclang/tests/test_cli.py +107 -0
  66. jaclang/tests/test_language.py +106 -5
  67. jaclang/utils/lang_tools.py +6 -3
  68. jaclang/utils/module_resolver.py +90 -0
  69. jaclang/utils/symtable_test_helpers.py +125 -0
  70. jaclang/utils/test.py +3 -4
  71. jaclang/vendor/interegular/__init__.py +34 -0
  72. jaclang/vendor/interegular/comparator.py +163 -0
  73. jaclang/vendor/interegular/fsm.py +1015 -0
  74. jaclang/vendor/interegular/patterns.py +732 -0
  75. jaclang/vendor/interegular/py.typed +0 -0
  76. jaclang/vendor/interegular/utils/__init__.py +15 -0
  77. jaclang/vendor/interegular/utils/simple_parser.py +165 -0
  78. jaclang/vendor/interegular-0.3.3.dist-info/INSTALLER +1 -0
  79. jaclang/vendor/interegular-0.3.3.dist-info/LICENSE.txt +21 -0
  80. jaclang/vendor/interegular-0.3.3.dist-info/METADATA +64 -0
  81. jaclang/vendor/interegular-0.3.3.dist-info/RECORD +20 -0
  82. jaclang/vendor/interegular-0.3.3.dist-info/REQUESTED +0 -0
  83. jaclang/vendor/interegular-0.3.3.dist-info/WHEEL +5 -0
  84. jaclang/vendor/interegular-0.3.3.dist-info/top_level.txt +1 -0
  85. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/METADATA +2 -1
  86. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/RECORD +88 -43
  87. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/WHEEL +0 -0
  88. {jaclang-0.8.4.dist-info → jaclang-0.8.6.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,1015 @@
1
+ """
2
+ Finite state machine library, extracted from `greenery.fsm` and adapted by MegaIng
3
+ """
4
+ from _collections import deque
5
+ from collections import defaultdict
6
+ from functools import total_ordering
7
+ from typing import Any, Set, Dict, Union, NewType, Mapping, Tuple, Iterable
8
+
9
+ from interegular.utils import soft_repr
10
+
11
+
12
+ class _Marker(BaseException):
13
+ pass
14
+
15
+
16
+ @total_ordering
17
+ class _AnythingElseCls:
18
+ """
19
+ This is a surrogate symbol which you can use in your finite state machines
20
+ to represent "any symbol not in the official alphabet". For example, if your
21
+ state machine's alphabet is {"a", "b", "c", "d", fsm.anything_else}, then
22
+ you can pass "e" in as a symbol and it will be converted to
23
+ fsm.anything_else, then follow the appropriate transition.
24
+ """
25
+
26
+ def __str__(self):
27
+ return "anything_else"
28
+
29
+ def __repr__(self):
30
+ return "anything_else"
31
+
32
+ def __lt__(self, other):
33
+ return False
34
+
35
+ def __eq__(self, other):
36
+ return self is other
37
+
38
+ def __hash__(self):
39
+ return hash(id(self))
40
+
41
+
42
+ # We use a class instance because that gives us control over how the special
43
+ # value gets serialised. Otherwise this would just be `object()`.
44
+ anything_else = _AnythingElseCls()
45
+
46
+
47
+ def nice_char_group(chars: Iterable[Union[str, _AnythingElseCls]]):
48
+ out = []
49
+ current_range = []
50
+ for c in sorted(chars):
51
+ if c is not anything_else and current_range and ord(current_range[-1]) + 1 == ord(c):
52
+ current_range.append(c)
53
+ continue
54
+ if len(current_range) >= 2:
55
+ out.append(f"{soft_repr(current_range[0])}-{soft_repr(current_range[-1])}")
56
+ else:
57
+ out.extend(map(soft_repr, current_range))
58
+ current_range = [c]
59
+ if len(current_range) >= 2:
60
+ out.append(f"{soft_repr(current_range[0])}-{soft_repr(current_range[-1])}")
61
+ else:
62
+ out.extend(map(soft_repr, current_range))
63
+ return ','.join(out)
64
+
65
+
66
+ State = NewType("State", int)
67
+ TransitionKey = NewType("TransitionKey", int)
68
+
69
+
70
+ class Alphabet(Mapping[Any, TransitionKey]):
71
+ @property
72
+ def by_transition(self):
73
+ return self._by_transition
74
+
75
+ def __str__(self):
76
+ out = []
77
+ width = 0
78
+ for tk, symbols in sorted(self._by_transition.items()):
79
+ out.append((nice_char_group(symbols), str(tk)))
80
+ if len(out[-1][0]) > width:
81
+ width = len(out[-1][0])
82
+ return '\n'.join(f"{a:{width}} | {b}" for a, b in out)
83
+
84
+ def __repr__(self):
85
+ return f"{type(self).__name__}({self._symbol_mapping!r})"
86
+
87
+ def __len__(self) -> int:
88
+ return len(self._symbol_mapping)
89
+
90
+ def __iter__(self):
91
+ return iter(self._symbol_mapping)
92
+
93
+ def __init__(self, symbol_mapping: Dict[Union[str, _AnythingElseCls], TransitionKey]):
94
+ self._symbol_mapping = symbol_mapping
95
+ by_transition = defaultdict(list)
96
+ for s, t in self._symbol_mapping.items():
97
+ by_transition[t].append(s)
98
+ self._by_transition = dict(by_transition)
99
+
100
+ def __getitem__(self, item):
101
+ if item not in self._symbol_mapping:
102
+ if anything_else in self._symbol_mapping:
103
+ return self._symbol_mapping[anything_else]
104
+ else:
105
+ return None
106
+ else:
107
+ return self._symbol_mapping[item]
108
+
109
+ def __contains__(self, item):
110
+ return item in self._symbol_mapping
111
+
112
+ def union(*alphabets: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[TransitionKey, TransitionKey], ...]]':
113
+ all_symbols = frozenset().union(*(a._symbol_mapping.keys() for a in alphabets))
114
+ symbol_to_keys = {symbol: tuple(a[symbol] for a in alphabets) for symbol in all_symbols}
115
+ keys_to_symbols = defaultdict(list)
116
+ for symbol, keys in symbol_to_keys.items():
117
+ keys_to_symbols[keys].append(symbol)
118
+ keys_to_key = {k: i for i, k in enumerate(keys_to_symbols)}
119
+ result = Alphabet({symbol: keys_to_key[keys]
120
+ for keys, symbols in keys_to_symbols.items()
121
+ for symbol in symbols})
122
+ new_to_old_mappings = [{} for _ in alphabets]
123
+ for keys, new_key in keys_to_key.items():
124
+ for old_key, new_to_old in zip(keys, new_to_old_mappings):
125
+ new_to_old[new_key] = old_key
126
+ return result, tuple(new_to_old_mappings)
127
+
128
+ @classmethod
129
+ def from_groups(cls, *groups):
130
+ return Alphabet({s: TransitionKey(i) for i, group in enumerate(groups) for s in group})
131
+
132
+ def intersect(self, other: 'Alphabet') -> 'Tuple[Alphabet, Tuple[Dict[TransitionKey, TransitionKey], ...]]':
133
+ all_symbols = frozenset(self._symbol_mapping).intersection(other._symbol_mapping)
134
+ symbol_to_keys = {symbol: tuple(a[symbol] for a in (self, other)) for symbol in all_symbols}
135
+ keys_to_symbols = defaultdict(list)
136
+ for symbol, keys in symbol_to_keys.items():
137
+ keys_to_symbols[keys].append(symbol)
138
+ keys_to_key = {k: i for i, k in enumerate(keys_to_symbols)}
139
+ result = Alphabet({symbol: keys_to_key[keys]
140
+ for keys, symbols in keys_to_symbols.items()
141
+ for symbol in symbols})
142
+ old_to_new_mappings = [defaultdict(list) for _ in (self, other)]
143
+ new_to_old_mappings = [{} for _ in (self, other)]
144
+ for keys, new_key in keys_to_key.items():
145
+ for old_key, old_to_new, new_to_old in zip(keys, old_to_new_mappings, new_to_old_mappings):
146
+ old_to_new[old_key].append(new_key)
147
+ new_to_old[new_key] = old_key
148
+ return result, tuple(new_to_old_mappings)
149
+
150
+ def copy(self):
151
+ return Alphabet(self._symbol_mapping.copy())
152
+
153
+
154
+ class OblivionError(Exception):
155
+ """
156
+ This exception is thrown while `crawl()`ing an FSM if we transition to the
157
+ oblivion state. For example while crawling two FSMs in parallel we may
158
+ transition to the oblivion state of both FSMs at once. This warrants an
159
+ out-of-bound signal which will reduce the complexity of the new FSM's map.
160
+ """
161
+ pass
162
+
163
+
164
+ class FSM:
165
+ """
166
+ A Finite State Machine or FSM has an alphabet and a set of states. At any
167
+ given moment, the FSM is in one state. When passed a symbol from the
168
+ alphabet, the FSM jumps to another state (or possibly the same state).
169
+ A map (Python dictionary) indicates where to jump.
170
+ One state is nominated as a starting state. Zero or more states are
171
+ nominated as final states. If, after consuming a string of symbols,
172
+ the FSM is in a final state, then it is said to "accept" the string.
173
+ This class also has some pretty powerful methods which allow FSMs to
174
+ be concatenated, alternated between, multiplied, looped (Kleene star
175
+ closure), intersected, and simplified.
176
+ The majority of these methods are available using operator overloads.
177
+ """
178
+ alphabet: Alphabet
179
+ initial: State
180
+ states: Set[State]
181
+ finals: Set[State]
182
+ map: Dict[State, Dict[TransitionKey, State]]
183
+
184
+ def __setattr__(self, name, value):
185
+ """Immutability prevents some potential problems."""
186
+ raise Exception("This object is immutable.")
187
+
188
+ def __init__(self, alphabet: Alphabet, states, initial, finals, map, *, __no_validation__=False):
189
+ """
190
+ `alphabet` is an iterable of symbols the FSM can be fed.
191
+ `states` is the set of states for the FSM
192
+ `initial` is the initial state
193
+ `finals` is the set of accepting states
194
+ `map` may be sparse (i.e. it may omit transitions). In the case of omitted
195
+ transitions, a non-final "oblivion" state is simulated.
196
+ """
197
+
198
+ if not __no_validation__:
199
+ # Validation. Thanks to immutability, this only needs to be carried out once.
200
+ if not isinstance(alphabet, Alphabet):
201
+ raise TypeError("Expected an Alphabet instance")
202
+ if not initial in states:
203
+ raise Exception("Initial state " + repr(initial) + " must be one of " + repr(states))
204
+ if not finals.issubset(states):
205
+ raise Exception("Final states " + repr(finals) + " must be a subset of " + repr(states))
206
+ for state in map.keys():
207
+ for symbol in map[state]:
208
+ if not map[state][symbol] in states:
209
+ raise Exception(
210
+ "Transition for state " + repr(state) + " and symbol " + repr(symbol) + " leads to " + repr(
211
+ map[state][symbol]) + ", which is not a state")
212
+
213
+ # Initialise the hard way due to immutability.
214
+ self.__dict__["alphabet"] = alphabet
215
+ self.__dict__["states"] = frozenset(states)
216
+ self.__dict__["initial"] = initial
217
+ self.__dict__["finals"] = frozenset(finals)
218
+ self.__dict__["map"] = map
219
+
220
+ def accepts(self, input: str):
221
+ """
222
+ Test whether the present FSM accepts the supplied string (iterable of
223
+ symbols). Equivalently, consider `self` as a possibly-infinite set of
224
+ strings and test whether `string` is a member of it.
225
+ This is actually mainly used for unit testing purposes.
226
+ If `fsm.anything_else` is in your alphabet, then any symbol not in your
227
+ alphabet will be converted to `fsm.anything_else`.
228
+ """
229
+ state = self.initial
230
+ for symbol in input:
231
+ if anything_else in self.alphabet and not symbol in self.alphabet:
232
+ symbol = anything_else
233
+ transition = self.alphabet[symbol]
234
+
235
+ # Missing transition = transition to dead state
236
+ if not (state in self.map and transition in self.map[state]):
237
+ return False
238
+
239
+ state = self.map[state][transition]
240
+ return state in self.finals
241
+
242
+ def __contains__(self, string):
243
+ """
244
+ This lets you use the syntax `"a" in fsm1` to see whether the string "a"
245
+ is in the set of strings accepted by `fsm1`.
246
+ """
247
+ return self.accepts(string)
248
+
249
+ def reduce(self):
250
+ """
251
+ A result by Brzozowski (1963) shows that a minimal finite state machine
252
+ equivalent to the original can be obtained by reversing the original
253
+ twice.
254
+ """
255
+ return self.reversed().reversed()
256
+
257
+ def __repr__(self):
258
+ string = "fsm("
259
+ string += "alphabet = " + repr(self.alphabet)
260
+ string += ", states = " + repr(self.states)
261
+ string += ", initial = " + repr(self.initial)
262
+ string += ", finals = " + repr(self.finals)
263
+ string += ", map = " + repr(self.map)
264
+ string += ")"
265
+ return string
266
+
267
+ def __str__(self):
268
+ rows = []
269
+
270
+ # top row
271
+ row = ["", "name", "final?"]
272
+ # TODO maybe rework this to show transition groups instead of individual symbols
273
+ row.extend(soft_repr(symbol) for symbol in sorted(self.alphabet))
274
+ rows.append(row)
275
+
276
+ # other rows
277
+ for state in self.states:
278
+ row = []
279
+ if state == self.initial:
280
+ row.append("*")
281
+ else:
282
+ row.append("")
283
+ row.append(str(state))
284
+ if state in self.finals:
285
+ row.append("True")
286
+ else:
287
+ row.append("False")
288
+ for symbol, transition in sorted(self.alphabet.items()):
289
+ if state in self.map and transition in self.map[state]:
290
+ row.append(str(self.map[state][transition]))
291
+ else:
292
+ row.append("")
293
+ rows.append(row)
294
+
295
+ # column widths
296
+ colwidths = []
297
+ for x in range(len(rows[0])):
298
+ colwidths.append(max(len(str(rows[y][x])) for y in range(len(rows))) + 1)
299
+
300
+ # apply padding
301
+ for y in range(len(rows)):
302
+ for x in range(len(rows[y])):
303
+ rows[y][x] = rows[y][x].ljust(colwidths[x])
304
+
305
+ # horizontal line
306
+ rows.insert(1, ["-" * colwidth for colwidth in colwidths])
307
+
308
+ return "".join("".join(row) + "\n" for row in rows)
309
+
310
+ def concatenate(*fsms):
311
+ """
312
+ Concatenate arbitrarily many finite state machines together.
313
+ """
314
+ if len(fsms) == 0:
315
+ return epsilon(Alphabet({}))
316
+ alphabet, new_to_old = Alphabet.union(*[fsm.alphabet for fsm in fsms])
317
+ last_index, last = len(fsms) - 1, fsms[-1]
318
+
319
+ def connect_all(i, substate):
320
+ """
321
+ Take a state in the numbered FSM and return a set containing it, plus
322
+ (if it's final) the first state from the next FSM, plus (if that's
323
+ final) the first state from the next but one FSM, plus...
324
+ """
325
+ result = {(i, substate)}
326
+ while i < last_index and substate in fsms[i].finals:
327
+ i += 1
328
+ substate = fsms[i].initial
329
+ result.add((i, substate))
330
+ return result
331
+
332
+ # Use a superset containing states from all FSMs at once.
333
+ # We start at the start of the first FSM. If this state is final in the
334
+ # first FSM, then we are also at the start of the second FSM. And so on.
335
+ initial = set()
336
+ if len(fsms) > 0:
337
+ initial.update(connect_all(0, fsms[0].initial))
338
+ initial = frozenset(initial)
339
+
340
+ def final(state):
341
+ """If you're in a final state of the final FSM, it's final"""
342
+ for (i, substate) in state:
343
+ if i == last_index and substate in last.finals:
344
+ return True
345
+ return False
346
+
347
+ def follow(current, new_transition):
348
+ """
349
+ Follow the collection of states through all FSMs at once, jumping to the
350
+ next FSM if we reach the end of the current one
351
+ TODO: improve all follow() implementations to allow for dead metastates?
352
+ """
353
+ next = set()
354
+ for (i, substate) in current:
355
+ fsm = fsms[i]
356
+ if substate in fsm.map and new_to_old[i][new_transition] in fsm.map[substate]:
357
+ next.update(connect_all(i, fsm.map[substate][new_to_old[i][new_transition]]))
358
+ if not next:
359
+ raise OblivionError
360
+ return frozenset(next)
361
+
362
+ return crawl(alphabet, initial, final, follow)
363
+
364
+ def __add__(self, other):
365
+ """
366
+ Concatenate two finite state machines together.
367
+ For example, if self accepts "0*" and other accepts "1+(0|1)",
368
+ will return a finite state machine accepting "0*1+(0|1)".
369
+ Accomplished by effectively following non-deterministically.
370
+ Call using "fsm3 = fsm1 + fsm2"
371
+ """
372
+ return self.concatenate(other)
373
+
374
+ def star(self):
375
+ """
376
+ If the present FSM accepts X, returns an FSM accepting X* (i.e. 0 or
377
+ more Xes). This is NOT as simple as naively connecting the final states
378
+ back to the initial state: see (b*ab)* for example.
379
+ """
380
+ alphabet = self.alphabet
381
+
382
+ initial = {self.initial}
383
+
384
+ def follow(state, transition):
385
+ next = set()
386
+ for substate in state:
387
+ if substate in self.map and transition in self.map[substate]:
388
+ next.add(self.map[substate][transition])
389
+
390
+ # If one of our substates is final, then we can also consider
391
+ # transitions from the initial state of the original FSM.
392
+ if substate in self.finals \
393
+ and self.initial in self.map \
394
+ and transition in self.map[self.initial]:
395
+ next.add(self.map[self.initial][transition])
396
+
397
+ if not next:
398
+ raise OblivionError
399
+
400
+ return frozenset(next)
401
+
402
+ def final(state):
403
+ return any(substate in self.finals for substate in state)
404
+
405
+ base = crawl(alphabet, initial, final, follow)
406
+ base.__dict__['finals'] = base.finals | {base.initial}
407
+ return base
408
+
409
+ def times(self, multiplier):
410
+ """
411
+ Given an FSM and a multiplier, return the multiplied FSM.
412
+ """
413
+ if multiplier < 0:
414
+ raise Exception("Can't multiply an FSM by " + repr(multiplier))
415
+
416
+ alphabet = self.alphabet
417
+
418
+ # metastate is a set of iterations+states
419
+ initial = {(self.initial, 0)}
420
+
421
+ def final(state):
422
+ """If the initial state is final then multiplying doesn't alter that"""
423
+ for (substate, iteration) in state:
424
+ if substate == self.initial \
425
+ and (self.initial in self.finals or iteration == multiplier):
426
+ return True
427
+ return False
428
+
429
+ def follow(current, transition):
430
+ next = []
431
+ for (substate, iteration) in current:
432
+ if iteration < multiplier \
433
+ and substate in self.map \
434
+ and transition in self.map[substate]:
435
+ next.append((self.map[substate][transition], iteration))
436
+ # final of self? merge with initial on next iteration
437
+ if self.map[substate][transition] in self.finals:
438
+ next.append((self.initial, iteration + 1))
439
+ if len(next) == 0:
440
+ raise OblivionError
441
+ return frozenset(next)
442
+
443
+ return crawl(alphabet, initial, final, follow)
444
+
445
+ def __mul__(self, multiplier):
446
+ """
447
+ Given an FSM and a multiplier, return the multiplied FSM.
448
+ """
449
+ return self.times(multiplier)
450
+
451
+ def union(*fsms):
452
+ """
453
+ Treat `fsms` as a collection of arbitrary FSMs and return the union FSM.
454
+ Can be used as `fsm1.union(fsm2, ...)` or `fsm.union(fsm1, ...)`. `fsms`
455
+ may be empty.
456
+ """
457
+ return parallel(fsms, any)
458
+
459
+ def __or__(self, other):
460
+ """
461
+ Alternation.
462
+ Return a finite state machine which accepts any sequence of symbols
463
+ that is accepted by either self or other. Note that the set of strings
464
+ recognised by the two FSMs undergoes a set union.
465
+ Call using "fsm3 = fsm1 | fsm2"
466
+ """
467
+ return self.union(other)
468
+
469
+ def intersection(*fsms):
470
+ """
471
+ Intersection.
472
+ Take FSMs and AND them together. That is, return an FSM which
473
+ accepts any sequence of symbols that is accepted by both of the original
474
+ FSMs. Note that the set of strings recognised by the two FSMs undergoes
475
+ a set intersection operation.
476
+ Call using "fsm3 = fsm1 & fsm2"
477
+ """
478
+ return parallel(fsms, all)
479
+
480
+ def __and__(self, other):
481
+ """
482
+ Treat the FSMs as sets of strings and return the intersection of those
483
+ sets in the form of a new FSM. `fsm1.intersection(fsm2, ...)` or
484
+ `fsm.intersection(fsm1, ...)` are acceptable.
485
+ """
486
+ return self.intersection(other)
487
+
488
+ def symmetric_difference(*fsms):
489
+ """
490
+ Treat `fsms` as a collection of sets of strings and compute the symmetric
491
+ difference of them all. The python set method only allows two sets to be
492
+ operated on at once, but we go the extra mile since it's not too hard.
493
+ """
494
+ return parallel(fsms, lambda accepts: (accepts.count(True) % 2) == 1)
495
+
496
+ def __xor__(self, other):
497
+ """
498
+ Symmetric difference. Returns an FSM which recognises only the strings
499
+ recognised by `self` or `other` but not both.
500
+ """
501
+ return self.symmetric_difference(other)
502
+
503
+ def everythingbut(self):
504
+ """
505
+ Return a finite state machine which will accept any string NOT
506
+ accepted by self, and will not accept any string accepted by self.
507
+ This is more complicated if there are missing transitions, because the
508
+ missing "dead" state must now be reified.
509
+ """
510
+ alphabet = self.alphabet
511
+
512
+ initial = {0: self.initial}
513
+
514
+ def follow(current, transition):
515
+ next = {}
516
+ if 0 in current and current[0] in self.map and transition in self.map[current[0]]:
517
+ next[0] = self.map[current[0]][transition]
518
+ return next
519
+
520
+ # state is final unless the original was
521
+ def final(state):
522
+ return not (0 in state and state[0] in self.finals)
523
+
524
+ return crawl(alphabet, initial, final, follow)
525
+
526
+ def isdisjoint(self, other: 'FSM') -> bool:
527
+ alphabet, new_to_old = self.alphabet.intersect(other.alphabet)
528
+ initial = (self.initial, other.initial)
529
+
530
+ # dedicated function accepts a "superset" and returns the next "superset"
531
+ # obtained by following this transition in the new FSM
532
+ def follow(current, transition):
533
+ ss, os = current
534
+ if ss in self.map and new_to_old[0][transition] in self.map[ss]:
535
+ sn = self.map[ss][new_to_old[0][transition]]
536
+ else:
537
+ sn = None
538
+ if os in other.map and new_to_old[1][transition] in other.map[os]:
539
+ on = other.map[os][new_to_old[1][transition]]
540
+ else:
541
+ on = None
542
+ if not sn or not on:
543
+ raise OblivionError
544
+ return sn, on
545
+
546
+ def final(state):
547
+ if state[0] in self.finals and state[1] in other.finals:
548
+ # We found a situation where we are in an final state in both fsm
549
+ raise _Marker
550
+
551
+ try:
552
+ crawl_hash_no_result(alphabet, initial, final, follow)
553
+ except _Marker:
554
+ return False
555
+ else:
556
+ return True
557
+
558
+ def reversed(self):
559
+ """
560
+ Return a new FSM such that for every string that self accepts (e.g.
561
+ "beer", the new FSM accepts the reversed string ("reeb").
562
+ """
563
+ alphabet = self.alphabet
564
+
565
+ # Start from a composite "state-set" consisting of all final states.
566
+ # If there are no final states, this set is empty and we'll find that
567
+ # no other states get generated.
568
+ initial = frozenset(self.finals)
569
+
570
+ # Speed up follow by pre-computing reverse-transition map
571
+ reverse_map = {}
572
+ for state, transition_map in self.map.items():
573
+ for transition, next_state in transition_map.items():
574
+ if (next_state, transition) not in reverse_map:
575
+ reverse_map[(next_state, transition)] = set()
576
+ reverse_map[(next_state, transition)].add(state)
577
+
578
+ # Find every possible way to reach the current state-set
579
+ # using this symbol.
580
+ def follow(current, transition):
581
+ next_states = set()
582
+ for state in current:
583
+ next_states.update(reverse_map.get((state, transition), set()))
584
+ if not next_states:
585
+ raise OblivionError
586
+ return frozenset(next_states)
587
+
588
+ # A state-set is final if the initial state is in it.
589
+ def final(state):
590
+ return self.initial in state
591
+
592
+ # Man, crawl() is the best!
593
+ return crawl(alphabet, initial, final, follow)
594
+
595
+ # Do not reduce() the result, since reduce() calls us in turn
596
+
597
+ def __reversed__(self):
598
+ """
599
+ Return a new FSM such that for every string that self accepts (e.g.
600
+ "beer", the new FSM accepts the reversed string ("reeb").
601
+ """
602
+ return self.reversed()
603
+
604
+ def islive(self, state):
605
+ """A state is "live" if a final state can be reached from it."""
606
+ seen = {state}
607
+ reachable = [state]
608
+ i = 0
609
+ while i < len(reachable):
610
+ current = reachable[i]
611
+ if current in self.finals:
612
+ return True
613
+ if current in self.map:
614
+ for transition in self.map[current]:
615
+ next = self.map[current][transition]
616
+ if next not in seen:
617
+ reachable.append(next)
618
+ seen.add(next)
619
+ i += 1
620
+ return False
621
+
622
+ def empty(self):
623
+ """
624
+ An FSM is empty if it recognises no strings. An FSM may be arbitrarily
625
+ complicated and have arbitrarily many final states while still recognising
626
+ no strings because those final states may all be inaccessible from the
627
+ initial state. Equally, an FSM may be non-empty despite having an empty
628
+ alphabet if the initial state is final.
629
+ """
630
+ return not self.islive(self.initial)
631
+
632
+ def strings(self, max_iterations=None):
633
+ """
634
+ Generate strings (lists of symbols) that this FSM accepts. Since there may
635
+ be infinitely many of these we use a generator instead of constructing a
636
+ static list. Strings will be sorted in order of length and then lexically.
637
+ This procedure uses arbitrary amounts of memory but is very fast. There
638
+ may be more efficient ways to do this, that I haven't investigated yet.
639
+ You can use this in list comprehensions.
640
+
641
+ `max_iterations` controls how many attempts will be made to generate strings.
642
+ For complex FSM it can take minutes to actually find something.
643
+ If this isn't acceptable, provide a value to `max_iterations`.
644
+ The approximate time complexity is
645
+ 0.15 seconds per 10_000 iterations per 10 symbols
646
+ """
647
+
648
+ # Many FSMs have "dead states". Once you reach a dead state, you can no
649
+ # longer reach a final state. Since many strings may end up here, it's
650
+ # advantageous to constrain our search to live states only.
651
+ livestates = set(state for state in self.states if self.islive(state))
652
+
653
+ # We store a list of tuples. Each tuple consists of an input string and the
654
+ # state that this input string leads to. This means we don't have to run the
655
+ # state machine from the very beginning every time we want to check a new
656
+ # string.
657
+ # We use a deque instead of a list since we append to the end and pop from
658
+ # the beginning
659
+ strings = deque()
660
+
661
+ # Initial entry (or possibly not, in which case this is a short one)
662
+ cstate = self.initial
663
+ cstring = []
664
+ if cstate in livestates:
665
+ if cstate in self.finals:
666
+ yield cstring
667
+ strings.append((cstring, cstate))
668
+
669
+ # Fixed point calculation
670
+ i = 0
671
+ while strings:
672
+ (cstring, cstate) = strings.popleft()
673
+ i += 1
674
+ if cstate in self.map:
675
+ for transition in sorted(self.map[cstate]):
676
+ nstate = self.map[cstate][transition]
677
+ if nstate in livestates:
678
+ for symbol in sorted(self.alphabet.by_transition[transition]):
679
+ nstring = cstring + [symbol]
680
+ if nstate in self.finals:
681
+ yield nstring
682
+ strings.append((nstring, nstate))
683
+ if max_iterations is not None and i > max_iterations:
684
+ raise ValueError(f"Couldn't find an example within {max_iterations} iterations")
685
+
686
+ def __iter__(self):
687
+ """
688
+ This allows you to do `for string in fsm1` as a list comprehension!
689
+ """
690
+ return self.strings()
691
+
692
+ def equivalent(self, other):
693
+ """
694
+ Two FSMs are considered equivalent if they recognise the same strings.
695
+ Or, to put it another way, if their symmetric difference recognises no
696
+ strings.
697
+ """
698
+ return (self ^ other).empty()
699
+
700
+ def __eq__(self, other):
701
+ """
702
+ You can use `fsm1 == fsm2` to determine whether two FSMs recognise the
703
+ same strings.
704
+ """
705
+ return self.equivalent(other)
706
+
707
+ def different(self, other):
708
+ """
709
+ Two FSMs are considered different if they have a non-empty symmetric
710
+ difference.
711
+ """
712
+ return not (self ^ other).empty()
713
+
714
+ def __ne__(self, other):
715
+ """
716
+ Use `fsm1 != fsm2` to determine whether two FSMs recognise different
717
+ strings.
718
+ """
719
+ return self.different(other)
720
+
721
+ def difference(*fsms):
722
+ """
723
+ Difference. Returns an FSM which recognises only the strings
724
+ recognised by the first FSM in the list, but none of the others.
725
+ """
726
+ return parallel(fsms, lambda accepts: accepts[0] and not any(accepts[1:]))
727
+
728
+ def __sub__(self, other):
729
+ return self.difference(other)
730
+
731
+ def cardinality(self):
732
+ """
733
+ Consider the FSM as a set of strings and return the cardinality of that
734
+ set, or raise an OverflowError if there are infinitely many
735
+ """
736
+ num_strings = {}
737
+
738
+ def get_num_strings(state):
739
+ # Many FSMs have at least one oblivion state
740
+ if self.islive(state):
741
+ if state in num_strings:
742
+ if num_strings[state] is None: # "computing..."
743
+ # Recursion! There are infinitely many strings recognised
744
+ raise OverflowError(state)
745
+ return num_strings[state]
746
+ num_strings[state] = None # i.e. "computing..."
747
+
748
+ n = 0
749
+ if state in self.finals:
750
+ n += 1
751
+ if state in self.map:
752
+ for transition in self.map[state]:
753
+ n += get_num_strings(self.map[state][transition]) * len(self.alphabet.by_transition[transition])
754
+ num_strings[state] = n
755
+
756
+ else:
757
+ # Dead state
758
+ num_strings[state] = 0
759
+
760
+ return num_strings[state]
761
+
762
+ return get_num_strings(self.initial)
763
+
764
+ def __len__(self):
765
+ """
766
+ Consider the FSM as a set of strings and return the cardinality of that
767
+ set, or raise an OverflowError if there are infinitely many
768
+ """
769
+ return self.cardinality()
770
+
771
+ def issubset(self, other):
772
+ """
773
+ Treat `self` and `other` as sets of strings and see if `self` is a subset
774
+ of `other`... `self` recognises no strings which `other` doesn't.
775
+ """
776
+ return (self - other).empty()
777
+
778
+ def __le__(self, other):
779
+ """
780
+ Treat `self` and `other` as sets of strings and see if `self` is a subset
781
+ of `other`... `self` recognises no strings which `other` doesn't.
782
+ """
783
+ return self.issubset(other)
784
+
785
+ def ispropersubset(self, other):
786
+ """
787
+ Treat `self` and `other` as sets of strings and see if `self` is a proper
788
+ subset of `other`.
789
+ """
790
+ return self <= other and self != other
791
+
792
+ def __lt__(self, other):
793
+ """
794
+ Treat `self` and `other` as sets of strings and see if `self` is a strict
795
+ subset of `other`.
796
+ """
797
+ return self.ispropersubset(other)
798
+
799
+ def issuperset(self, other):
800
+ """
801
+ Treat `self` and `other` as sets of strings and see if `self` is a
802
+ superset of `other`.
803
+ """
804
+ return (other - self).empty()
805
+
806
+ def __ge__(self, other):
807
+ """
808
+ Treat `self` and `other` as sets of strings and see if `self` is a
809
+ superset of `other`.
810
+ """
811
+ return self.issuperset(other)
812
+
813
+ def ispropersuperset(self, other):
814
+ """
815
+ Treat `self` and `other` as sets of strings and see if `self` is a proper
816
+ superset of `other`.
817
+ """
818
+ return self >= other and self != other
819
+
820
+ def __gt__(self, other):
821
+ """
822
+ Treat `self` and `other` as sets of strings and see if `self` is a
823
+ strict superset of `other`.
824
+ """
825
+ return self.ispropersuperset(other)
826
+
827
+ def copy(self):
828
+ """
829
+ For completeness only, since `set.copy()` also exists. FSM objects are
830
+ immutable, so I can see only very odd reasons to need this.
831
+ """
832
+ return FSM(
833
+ alphabet=self.alphabet.copy(),
834
+ states=self.states.copy(),
835
+ initial=self.initial,
836
+ finals=self.finals.copy(),
837
+ map=self.map.copy(),
838
+ __no_validation__=True,
839
+ )
840
+
841
+ def derive(self, input):
842
+ """
843
+ Compute the Brzozowski derivative of this FSM with respect to the input
844
+ string of symbols. <https://en.wikipedia.org/wiki/Brzozowski_derivative>
845
+ If any of the symbols are not members of the alphabet, that's a KeyError.
846
+ If you fall into oblivion, then the derivative is an FSM accepting no
847
+ strings.
848
+ """
849
+ try:
850
+ # Consume the input string.
851
+ state = self.initial
852
+ for symbol in input:
853
+ if not symbol in self.alphabet:
854
+ if not anything_else in self.alphabet:
855
+ raise KeyError(symbol)
856
+ symbol = anything_else
857
+
858
+ # Missing transition = transition to dead state
859
+ if not (state in self.map and self.alphabet[symbol] in self.map[state]):
860
+ raise OblivionError
861
+
862
+ state = self.map[state][self.alphabet[symbol]]
863
+
864
+ # OK so now we have consumed that string, use the new location as the
865
+ # starting point.
866
+ return FSM(
867
+ alphabet=self.alphabet,
868
+ states=self.states,
869
+ initial=state,
870
+ finals=self.finals,
871
+ map=self.map,
872
+ __no_validation__=True,
873
+ )
874
+
875
+ except OblivionError:
876
+ # Fell out of the FSM. The derivative of this FSM is the empty FSM.
877
+ return null(self.alphabet)
878
+
879
+
880
+ def null(alphabet):
881
+ """
882
+ An FSM accepting nothing (not even the empty string). This is
883
+ demonstrates that this is possible, and is also extremely useful
884
+ in some situations
885
+ """
886
+ return FSM(
887
+ alphabet=alphabet,
888
+ states={0},
889
+ initial=0,
890
+ finals=set(),
891
+ map={
892
+ 0: dict([(transition, 0) for transition in alphabet.by_transition]),
893
+ },
894
+ __no_validation__=True,
895
+ )
896
+
897
+
898
+ def epsilon(alphabet):
899
+ """
900
+ Return an FSM matching an empty string, "", only.
901
+ This is very useful in many situations
902
+ """
903
+ return FSM(
904
+ alphabet=alphabet,
905
+ states={0},
906
+ initial=0,
907
+ finals={0},
908
+ map={},
909
+ __no_validation__=True,
910
+ )
911
+
912
+
913
+ def parallel(fsms, test):
914
+ """
915
+ Crawl several FSMs in parallel, mapping the states of a larger meta-FSM.
916
+ To determine whether a state in the larger FSM is final, pass all of the
917
+ finality statuses (e.g. [True, False, False] to `test`.
918
+ """
919
+ alphabet, new_to_old = Alphabet.union(*[fsm.alphabet for fsm in fsms])
920
+
921
+ initial = {i: fsm.initial for (i, fsm) in enumerate(fsms)}
922
+
923
+ # dedicated function accepts a "superset" and returns the next "superset"
924
+ # obtained by following this transition in the new FSM
925
+ def follow(current, new_transition, fsm_range=tuple(enumerate(fsms))):
926
+ next = {}
927
+ for i, f in fsm_range:
928
+ old_transition = new_to_old[i][new_transition]
929
+ if i in current \
930
+ and current[i] in f.map \
931
+ and old_transition in f.map[current[i]]:
932
+ next[i] = f.map[current[i]][old_transition]
933
+ if not next:
934
+ raise OblivionError
935
+ return next
936
+
937
+ # Determine the "is final?" condition of each substate, then pass it to the
938
+ # test to determine finality of the overall FSM.
939
+ def final(state, fsm_range=tuple(enumerate(fsms))):
940
+ accepts = [i in state and state[i] in fsm.finals for (i, fsm) in fsm_range]
941
+ return test(accepts)
942
+
943
+ return crawl(alphabet, initial, final, follow)
944
+
945
+
946
+ def crawl_hash_no_result(alphabet, initial, final, follow):
947
+ unvisited = {initial}
948
+ visited = set()
949
+
950
+ while unvisited:
951
+ state = unvisited.pop()
952
+ visited.add(state)
953
+
954
+ # add to finals
955
+ final(state)
956
+
957
+ # compute map for this state
958
+ for transition in alphabet.by_transition:
959
+ try:
960
+ new = follow(state, transition)
961
+ except OblivionError:
962
+ # Reached an oblivion state. Don't list it.
963
+ continue
964
+ else:
965
+ if new not in visited:
966
+ unvisited.add(new)
967
+
968
+
969
+ def crawl(alphabet, initial, final, follow):
970
+ """
971
+ Given the above conditions and instructions, crawl a new unknown FSM,
972
+ mapping its states, final states and transitions. Return the new FSM.
973
+ This is a pretty powerful procedure which could potentially go on
974
+ forever if you supply an evil version of follow().
975
+ """
976
+
977
+ states = [initial]
978
+ finals = set()
979
+ map = {}
980
+
981
+ # iterate over a growing list
982
+ i = 0
983
+ while i < len(states):
984
+ state = states[i]
985
+
986
+ # add to finals
987
+ if final(state):
988
+ finals.add(i)
989
+
990
+ # compute map for this state
991
+ map[i] = {}
992
+ for transition in alphabet.by_transition:
993
+ try:
994
+ next = follow(state, transition)
995
+ except OblivionError:
996
+ # Reached an oblivion state. Don't list it.
997
+ continue
998
+ else:
999
+ try:
1000
+ j = states.index(next)
1001
+ except ValueError:
1002
+ j = len(states)
1003
+ states.append(next)
1004
+ map[i][transition] = j
1005
+
1006
+ i += 1
1007
+
1008
+ return FSM(
1009
+ alphabet=alphabet,
1010
+ states=range(len(states)),
1011
+ initial=0,
1012
+ finals=finals,
1013
+ map=map,
1014
+ __no_validation__=True,
1015
+ )