orca-runtime-python 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orca_runtime_python/__init__.py +69 -0
- orca_runtime_python/bus.py +227 -0
- orca_runtime_python/effects.py +216 -0
- orca_runtime_python/logging.py +161 -0
- orca_runtime_python/machine.py +875 -0
- orca_runtime_python/parser.py +894 -0
- orca_runtime_python/persistence.py +83 -0
- orca_runtime_python/types.py +279 -0
- orca_runtime_python-0.1.0.dist-info/METADATA +246 -0
- orca_runtime_python-0.1.0.dist-info/RECORD +12 -0
- orca_runtime_python-0.1.0.dist-info/WHEEL +5 -0
- orca_runtime_python-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,894 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Orca Markdown parser.
|
|
3
|
+
|
|
4
|
+
Parses Orca machine definition text in markdown (.orca.md) format
|
|
5
|
+
into MachineDef objects. Supports hierarchical and parallel states.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import re
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from typing import Any
|
|
13
|
+
|
|
14
|
+
from .types import (
|
|
15
|
+
MachineDef,
|
|
16
|
+
StateDef,
|
|
17
|
+
Transition,
|
|
18
|
+
GuardDef,
|
|
19
|
+
ActionSignature,
|
|
20
|
+
EffectDef,
|
|
21
|
+
GuardExpression,
|
|
22
|
+
GuardTrue,
|
|
23
|
+
GuardFalse,
|
|
24
|
+
GuardCompare,
|
|
25
|
+
GuardAnd,
|
|
26
|
+
GuardOr,
|
|
27
|
+
GuardNot,
|
|
28
|
+
GuardNullcheck,
|
|
29
|
+
VariableRef,
|
|
30
|
+
ValueRef,
|
|
31
|
+
RegionDef,
|
|
32
|
+
ParallelDef,
|
|
33
|
+
InvokeDef,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class ParseError(Exception):
|
|
38
|
+
"""Raised when parsing fails."""
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# ============================================================
|
|
43
|
+
# Markdown (.orca.md) Parser
|
|
44
|
+
# ============================================================
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class _MdHeading:
|
|
48
|
+
kind: str = "heading"
|
|
49
|
+
level: int = 0
|
|
50
|
+
text: str = ""
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class _MdTable:
|
|
55
|
+
kind: str = "table"
|
|
56
|
+
headers: list[str] = field(default_factory=list)
|
|
57
|
+
rows: list[list[str]] = field(default_factory=list)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class _MdBulletList:
|
|
62
|
+
kind: str = "bullets"
|
|
63
|
+
items: list[str] = field(default_factory=list)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class _MdBlockquote:
|
|
68
|
+
kind: str = "blockquote"
|
|
69
|
+
text: str = ""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class _MdSeparator:
|
|
74
|
+
"""Machine separator (--- between machines in multi-machine files)."""
|
|
75
|
+
kind: str = "separator"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
_MdElement = _MdHeading | _MdTable | _MdBulletList | _MdBlockquote | _MdSeparator
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _parse_markdown_structure(source: str) -> list[_MdElement]:
|
|
82
|
+
"""Phase 1: Parse markdown into structural elements."""
|
|
83
|
+
lines = source.split("\n")
|
|
84
|
+
elements: list[_MdElement] = []
|
|
85
|
+
i = 0
|
|
86
|
+
|
|
87
|
+
while i < len(lines):
|
|
88
|
+
trimmed = lines[i].strip()
|
|
89
|
+
if not trimmed:
|
|
90
|
+
i += 1
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
# Skip fenced code blocks
|
|
94
|
+
if trimmed.startswith("```"):
|
|
95
|
+
i += 1
|
|
96
|
+
while i < len(lines) and not lines[i].strip().startswith("```"):
|
|
97
|
+
i += 1
|
|
98
|
+
if i < len(lines):
|
|
99
|
+
i += 1
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# Heading
|
|
103
|
+
heading_match = re.match(r"^(#{1,6})\s+(.+)$", trimmed)
|
|
104
|
+
if heading_match:
|
|
105
|
+
elements.append(_MdHeading(
|
|
106
|
+
level=len(heading_match.group(1)),
|
|
107
|
+
text=heading_match.group(2).strip(),
|
|
108
|
+
))
|
|
109
|
+
i += 1
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# Separator between machines (--- on its own line)
|
|
113
|
+
if re.match(r"^---+$", trimmed):
|
|
114
|
+
elements.append(_MdSeparator())
|
|
115
|
+
i += 1
|
|
116
|
+
continue
|
|
117
|
+
|
|
118
|
+
# Blockquote
|
|
119
|
+
if trimmed.startswith(">"):
|
|
120
|
+
quote_lines: list[str] = []
|
|
121
|
+
while i < len(lines) and lines[i].strip().startswith(">"):
|
|
122
|
+
quote_lines.append(re.sub(r"^>\s*", "", lines[i].strip()))
|
|
123
|
+
i += 1
|
|
124
|
+
elements.append(_MdBlockquote(text="\n".join(quote_lines)))
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
# Table
|
|
128
|
+
if trimmed.startswith("|"):
|
|
129
|
+
table_lines: list[str] = []
|
|
130
|
+
while i < len(lines) and lines[i].strip().startswith("|"):
|
|
131
|
+
table_lines.append(lines[i].strip())
|
|
132
|
+
i += 1
|
|
133
|
+
if len(table_lines) >= 2:
|
|
134
|
+
headers = _split_table_row(table_lines[0])
|
|
135
|
+
is_separator = bool(re.match(r"^\|[\s\-:|]+\|$", table_lines[1]))
|
|
136
|
+
data_start = 2 if is_separator else 1
|
|
137
|
+
rows = [_split_table_row(table_lines[j]) for j in range(data_start, len(table_lines))]
|
|
138
|
+
elements.append(_MdTable(headers=headers, rows=rows))
|
|
139
|
+
continue
|
|
140
|
+
|
|
141
|
+
# Bullet list
|
|
142
|
+
if trimmed.startswith("- "):
|
|
143
|
+
items: list[str] = []
|
|
144
|
+
while i < len(lines) and lines[i].strip().startswith("- "):
|
|
145
|
+
items.append(lines[i].strip()[2:].strip())
|
|
146
|
+
i += 1
|
|
147
|
+
elements.append(_MdBulletList(items=items))
|
|
148
|
+
continue
|
|
149
|
+
|
|
150
|
+
# Skip other text
|
|
151
|
+
i += 1
|
|
152
|
+
|
|
153
|
+
return elements
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _split_table_row(line: str) -> list[str]:
|
|
157
|
+
"""Split a markdown table row into cells."""
|
|
158
|
+
cells = [c.strip() for c in line.split("|")]
|
|
159
|
+
if cells and cells[0] == "":
|
|
160
|
+
cells.pop(0)
|
|
161
|
+
if cells and cells[-1] == "":
|
|
162
|
+
cells.pop()
|
|
163
|
+
return cells
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _strip_backticks(text: str) -> str:
|
|
167
|
+
"""Remove surrounding backticks from a string."""
|
|
168
|
+
if text.startswith("`") and text.endswith("`"):
|
|
169
|
+
return text[1:-1]
|
|
170
|
+
return text
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def _find_column_index(headers: list[str], name: str) -> int:
|
|
174
|
+
"""Find column index by header name (case-insensitive)."""
|
|
175
|
+
name_lower = name.lower()
|
|
176
|
+
for idx, h in enumerate(headers):
|
|
177
|
+
if h.lower() == name_lower:
|
|
178
|
+
return idx
|
|
179
|
+
return -1
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _parse_md_annotations(text: str) -> dict[str, Any]:
|
|
183
|
+
"""Parse state annotations like [initial, final, parallel, sync: all-final]."""
|
|
184
|
+
result: dict[str, Any] = {
|
|
185
|
+
"is_initial": False,
|
|
186
|
+
"is_final": False,
|
|
187
|
+
"is_parallel": False,
|
|
188
|
+
}
|
|
189
|
+
bracket_match = re.search(r"\[(.+)\]", text)
|
|
190
|
+
if bracket_match:
|
|
191
|
+
for part in [p.strip() for p in bracket_match.group(1).split(",")]:
|
|
192
|
+
if part == "initial":
|
|
193
|
+
result["is_initial"] = True
|
|
194
|
+
elif part == "final":
|
|
195
|
+
result["is_final"] = True
|
|
196
|
+
elif part == "parallel":
|
|
197
|
+
result["is_parallel"] = True
|
|
198
|
+
elif part.startswith("sync:"):
|
|
199
|
+
v = part[5:].strip().replace("_", "-")
|
|
200
|
+
if v in ("all-final", "any-final", "custom"):
|
|
201
|
+
result["sync_strategy"] = v
|
|
202
|
+
return result
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
@dataclass
|
|
206
|
+
class _MdStateEntry:
|
|
207
|
+
entry_type: str # 'state' or 'region'
|
|
208
|
+
level: int = 0
|
|
209
|
+
name: str = ""
|
|
210
|
+
is_initial: bool = False
|
|
211
|
+
is_final: bool = False
|
|
212
|
+
is_parallel: bool = False
|
|
213
|
+
sync_strategy: str | None = None
|
|
214
|
+
description: str | None = None
|
|
215
|
+
on_entry: str | None = None
|
|
216
|
+
on_exit: str | None = None
|
|
217
|
+
on_done: str | None = None
|
|
218
|
+
timeout: dict[str, str] | None = None
|
|
219
|
+
ignored_events: list[str] = field(default_factory=list)
|
|
220
|
+
invoke: InvokeDef | None = None
|
|
221
|
+
_pending_on_error: str | None = None # temp: on_error parsed before invoke
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _parse_md_state_bullet(entry: _MdStateEntry, text: str) -> None:
|
|
225
|
+
"""Parse a bullet list item belonging to a state."""
|
|
226
|
+
if text.startswith("on_entry:"):
|
|
227
|
+
val = text[9:].strip()
|
|
228
|
+
if val.startswith("->"):
|
|
229
|
+
val = val[2:].strip()
|
|
230
|
+
entry.on_entry = val
|
|
231
|
+
elif text.startswith("on_exit:"):
|
|
232
|
+
val = text[8:].strip()
|
|
233
|
+
if val.startswith("->"):
|
|
234
|
+
val = val[2:].strip()
|
|
235
|
+
entry.on_exit = val
|
|
236
|
+
elif text.startswith("timeout:"):
|
|
237
|
+
rest = text[8:].strip()
|
|
238
|
+
arrow_idx = rest.find("->")
|
|
239
|
+
if arrow_idx != -1:
|
|
240
|
+
entry.timeout = {
|
|
241
|
+
"duration": rest[:arrow_idx].strip(),
|
|
242
|
+
"target": rest[arrow_idx + 2:].strip(),
|
|
243
|
+
}
|
|
244
|
+
elif text.startswith("ignore:"):
|
|
245
|
+
names = [e.strip() for e in text[7:].strip().split(",") if e.strip()]
|
|
246
|
+
entry.ignored_events.extend(names)
|
|
247
|
+
elif text.startswith("on_done:"):
|
|
248
|
+
val = text[8:].strip()
|
|
249
|
+
if val.startswith("->"):
|
|
250
|
+
val = val[2:].strip()
|
|
251
|
+
entry.on_done = val
|
|
252
|
+
# Also set on_done on invoke if invoke exists
|
|
253
|
+
if entry.invoke:
|
|
254
|
+
entry.invoke.on_done = val
|
|
255
|
+
elif text.startswith("on_error:"):
|
|
256
|
+
val = text[9:].strip()
|
|
257
|
+
if val.startswith("->"):
|
|
258
|
+
val = val[2:].strip()
|
|
259
|
+
# Store temporarily until invoke is parsed
|
|
260
|
+
entry._pending_on_error = val
|
|
261
|
+
if entry.invoke:
|
|
262
|
+
entry.invoke.on_error = val
|
|
263
|
+
elif text.startswith("invoke:"):
|
|
264
|
+
rest = text[7:].strip() # "MachineName" or "MachineName input: { ... }"
|
|
265
|
+
machine_name = rest
|
|
266
|
+
input_map: dict[str, str] | None = None
|
|
267
|
+
|
|
268
|
+
# Check for input mapping
|
|
269
|
+
input_match = re.search(r"input:\s*\{([^}]+)\}", rest)
|
|
270
|
+
if input_match:
|
|
271
|
+
machine_name = rest[:input_match.start()].strip()
|
|
272
|
+
input_str = input_match.group(1)
|
|
273
|
+
input_map = {}
|
|
274
|
+
for pair in input_str.split(","):
|
|
275
|
+
if ":" in pair:
|
|
276
|
+
key, value = pair.split(":", 1)
|
|
277
|
+
input_map[key.strip()] = value.strip()
|
|
278
|
+
|
|
279
|
+
entry.invoke = InvokeDef(machine=machine_name, input=input_map)
|
|
280
|
+
# Apply pending on_error if we already parsed it
|
|
281
|
+
if entry._pending_on_error:
|
|
282
|
+
entry.invoke.on_error = entry._pending_on_error
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
def _build_md_states_at_level(
|
|
286
|
+
entries: list[_MdStateEntry], start_idx: int, level: int, parent_name: str | None = None
|
|
287
|
+
) -> tuple[list[StateDef], int]:
|
|
288
|
+
"""Build state hierarchy from flat entries at a given heading level."""
|
|
289
|
+
states: list[StateDef] = []
|
|
290
|
+
i = start_idx
|
|
291
|
+
|
|
292
|
+
while i < len(entries):
|
|
293
|
+
entry = entries[i]
|
|
294
|
+
if entry.level < level:
|
|
295
|
+
break
|
|
296
|
+
if entry.entry_type == "region":
|
|
297
|
+
break
|
|
298
|
+
if entry.level > level:
|
|
299
|
+
i += 1
|
|
300
|
+
continue
|
|
301
|
+
|
|
302
|
+
state = StateDef(
|
|
303
|
+
name=entry.name,
|
|
304
|
+
is_initial=entry.is_initial,
|
|
305
|
+
is_final=entry.is_final,
|
|
306
|
+
)
|
|
307
|
+
if parent_name:
|
|
308
|
+
state.parent = parent_name
|
|
309
|
+
if entry.description:
|
|
310
|
+
state.description = entry.description
|
|
311
|
+
if entry.on_entry:
|
|
312
|
+
state.on_entry = entry.on_entry
|
|
313
|
+
if entry.on_exit:
|
|
314
|
+
state.on_exit = entry.on_exit
|
|
315
|
+
if entry.on_done:
|
|
316
|
+
state.on_done = entry.on_done
|
|
317
|
+
if entry.timeout:
|
|
318
|
+
state.timeout = entry.timeout
|
|
319
|
+
if entry.ignored_events:
|
|
320
|
+
state.ignored_events = list(entry.ignored_events)
|
|
321
|
+
if entry.invoke:
|
|
322
|
+
state.invoke = entry.invoke
|
|
323
|
+
|
|
324
|
+
i += 1
|
|
325
|
+
|
|
326
|
+
if entry.is_parallel:
|
|
327
|
+
parallel_def, i = _build_md_parallel_regions(
|
|
328
|
+
entries, i, level + 1, entry.name, entry.sync_strategy
|
|
329
|
+
)
|
|
330
|
+
state.parallel = parallel_def
|
|
331
|
+
elif i < len(entries) and entries[i].level == level + 1 and entries[i].entry_type == "state":
|
|
332
|
+
child_states, i = _build_md_states_at_level(entries, i, level + 1, entry.name)
|
|
333
|
+
state.contains = child_states
|
|
334
|
+
|
|
335
|
+
states.append(state)
|
|
336
|
+
|
|
337
|
+
return states, i
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def _build_md_parallel_regions(
|
|
341
|
+
entries: list[_MdStateEntry], start_idx: int, region_level: int,
|
|
342
|
+
parent_name: str, sync_strategy: str | None = None
|
|
343
|
+
) -> tuple[ParallelDef, int]:
|
|
344
|
+
"""Build parallel regions from flat entries."""
|
|
345
|
+
regions: list[RegionDef] = []
|
|
346
|
+
i = start_idx
|
|
347
|
+
|
|
348
|
+
while i < len(entries) and entries[i].level >= region_level:
|
|
349
|
+
if entries[i].entry_type != "region" or entries[i].level != region_level:
|
|
350
|
+
break
|
|
351
|
+
|
|
352
|
+
region_name = entries[i].name
|
|
353
|
+
i += 1
|
|
354
|
+
|
|
355
|
+
region_states: list[StateDef] = []
|
|
356
|
+
while i < len(entries) and entries[i].level > region_level:
|
|
357
|
+
if entries[i].entry_type == "state" and entries[i].level == region_level + 1:
|
|
358
|
+
e = entries[i]
|
|
359
|
+
s = StateDef(
|
|
360
|
+
name=e.name,
|
|
361
|
+
is_initial=e.is_initial,
|
|
362
|
+
is_final=e.is_final,
|
|
363
|
+
)
|
|
364
|
+
s.parent = f"{parent_name}.{region_name}"
|
|
365
|
+
if e.description:
|
|
366
|
+
s.description = e.description
|
|
367
|
+
if e.on_entry:
|
|
368
|
+
s.on_entry = e.on_entry
|
|
369
|
+
if e.on_exit:
|
|
370
|
+
s.on_exit = e.on_exit
|
|
371
|
+
if e.timeout:
|
|
372
|
+
s.timeout = e.timeout
|
|
373
|
+
if e.ignored_events:
|
|
374
|
+
s.ignored_events = list(e.ignored_events)
|
|
375
|
+
if e.invoke:
|
|
376
|
+
s.invoke = e.invoke
|
|
377
|
+
if e.on_done:
|
|
378
|
+
s.on_done = e.on_done
|
|
379
|
+
region_states.append(s)
|
|
380
|
+
i += 1
|
|
381
|
+
else:
|
|
382
|
+
break
|
|
383
|
+
|
|
384
|
+
regions.append(RegionDef(name=region_name, states=region_states))
|
|
385
|
+
|
|
386
|
+
return ParallelDef(regions=regions, sync=sync_strategy), i
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _parse_md_action_signature(name: str, text: str) -> ActionSignature:
|
|
390
|
+
"""Parse an action signature string like '(ctx, event) -> Context + Effect<T>'."""
|
|
391
|
+
text = text.strip()
|
|
392
|
+
paren_start = text.find("(")
|
|
393
|
+
paren_end = text.find(")")
|
|
394
|
+
params_str = text[paren_start + 1:paren_end].strip()
|
|
395
|
+
|
|
396
|
+
parameters: list[str] = []
|
|
397
|
+
if params_str:
|
|
398
|
+
parameters = [p.strip().split(":")[0].strip() for p in params_str.split(",")]
|
|
399
|
+
|
|
400
|
+
after_paren = text[paren_end + 1:].strip()
|
|
401
|
+
arrow_idx = after_paren.find("->")
|
|
402
|
+
return_part = after_paren[arrow_idx + 2:].strip()
|
|
403
|
+
|
|
404
|
+
return_type = "Context"
|
|
405
|
+
has_effect = False
|
|
406
|
+
effect_type: str | None = None
|
|
407
|
+
|
|
408
|
+
plus_idx = return_part.find("+")
|
|
409
|
+
if plus_idx != -1:
|
|
410
|
+
return_type = return_part[:plus_idx].strip()
|
|
411
|
+
effect_match = re.search(r"Effect<(\w+)>", return_part[plus_idx + 1:])
|
|
412
|
+
if effect_match:
|
|
413
|
+
has_effect = True
|
|
414
|
+
effect_type = effect_match.group(1)
|
|
415
|
+
else:
|
|
416
|
+
return_type = return_part
|
|
417
|
+
|
|
418
|
+
return ActionSignature(
|
|
419
|
+
name=name,
|
|
420
|
+
parameters=parameters,
|
|
421
|
+
return_type=return_type,
|
|
422
|
+
has_effect=has_effect,
|
|
423
|
+
effect_type=effect_type,
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# --- Guard expression parser (shared) ---
|
|
428
|
+
# Grammar:
|
|
429
|
+
# expr = or_expr
|
|
430
|
+
# or_expr = and_expr ('or' and_expr)*
|
|
431
|
+
# and_expr = not_expr ('and' not_expr)*
|
|
432
|
+
# not_expr = 'not' primary | primary
|
|
433
|
+
# primary = '(' expr ')' | 'true' | 'false' | comparison
|
|
434
|
+
# comparison = var_path (op value)?
|
|
435
|
+
# var_path = IDENT ('.' IDENT)*
|
|
436
|
+
# op = '==' | '!=' | '<' | '>' | '<=' | '>='
|
|
437
|
+
# value = NUMBER | STRING | 'true' | 'false' | 'null'
|
|
438
|
+
|
|
439
|
+
@dataclass
|
|
440
|
+
class _GToken:
|
|
441
|
+
type: str # ident, number, string, op, lparen, rparen, dot, eof
|
|
442
|
+
value: str
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
def _tokenize_guard(input_str: str) -> list[_GToken]:
|
|
446
|
+
"""Tokenize a guard expression string."""
|
|
447
|
+
tokens: list[_GToken] = []
|
|
448
|
+
i = 0
|
|
449
|
+
n = len(input_str)
|
|
450
|
+
|
|
451
|
+
while i < n:
|
|
452
|
+
c = input_str[i]
|
|
453
|
+
|
|
454
|
+
# Skip whitespace
|
|
455
|
+
if c.isspace():
|
|
456
|
+
i += 1
|
|
457
|
+
continue
|
|
458
|
+
|
|
459
|
+
# String literal
|
|
460
|
+
if c in ('"', "'"):
|
|
461
|
+
quote = c
|
|
462
|
+
s = ""
|
|
463
|
+
i += 1
|
|
464
|
+
while i < n and input_str[i] != quote:
|
|
465
|
+
s += input_str[i]
|
|
466
|
+
i += 1
|
|
467
|
+
i += 1 # skip closing quote
|
|
468
|
+
tokens.append(_GToken("string", s))
|
|
469
|
+
continue
|
|
470
|
+
|
|
471
|
+
# Two-char operators
|
|
472
|
+
if i + 1 < n:
|
|
473
|
+
two = input_str[i:i + 2]
|
|
474
|
+
if two in ("==", "!=", "<=", ">="):
|
|
475
|
+
tokens.append(_GToken("op", two))
|
|
476
|
+
i += 2
|
|
477
|
+
continue
|
|
478
|
+
|
|
479
|
+
# Single-char operators
|
|
480
|
+
if c in ("<", ">"):
|
|
481
|
+
tokens.append(_GToken("op", c))
|
|
482
|
+
i += 1
|
|
483
|
+
continue
|
|
484
|
+
|
|
485
|
+
if c == "(":
|
|
486
|
+
tokens.append(_GToken("lparen", "("))
|
|
487
|
+
i += 1
|
|
488
|
+
continue
|
|
489
|
+
if c == ")":
|
|
490
|
+
tokens.append(_GToken("rparen", ")"))
|
|
491
|
+
i += 1
|
|
492
|
+
continue
|
|
493
|
+
if c == ".":
|
|
494
|
+
tokens.append(_GToken("dot", "."))
|
|
495
|
+
i += 1
|
|
496
|
+
continue
|
|
497
|
+
|
|
498
|
+
# Number (including negative)
|
|
499
|
+
if c.isdigit() or (c == "-" and i + 1 < n and input_str[i + 1].isdigit()):
|
|
500
|
+
num = c
|
|
501
|
+
i += 1
|
|
502
|
+
while i < n and (input_str[i].isdigit() or input_str[i] == "."):
|
|
503
|
+
num += input_str[i]
|
|
504
|
+
i += 1
|
|
505
|
+
tokens.append(_GToken("number", num))
|
|
506
|
+
continue
|
|
507
|
+
|
|
508
|
+
# Identifier
|
|
509
|
+
if c.isalpha() or c == "_":
|
|
510
|
+
ident = ""
|
|
511
|
+
while i < n and (input_str[i].isalnum() or input_str[i] == "_"):
|
|
512
|
+
ident += input_str[i]
|
|
513
|
+
i += 1
|
|
514
|
+
tokens.append(_GToken("ident", ident))
|
|
515
|
+
continue
|
|
516
|
+
|
|
517
|
+
# Skip unknown
|
|
518
|
+
i += 1
|
|
519
|
+
|
|
520
|
+
tokens.append(_GToken("eof", ""))
|
|
521
|
+
return tokens
|
|
522
|
+
|
|
523
|
+
|
|
524
|
+
def _parse_guard_expression(input_str: str) -> GuardExpression:
|
|
525
|
+
"""Parse a guard expression string into a GuardExpression AST."""
|
|
526
|
+
tokens = _tokenize_guard(input_str)
|
|
527
|
+
pos = [0] # mutable ref for nested functions
|
|
528
|
+
|
|
529
|
+
def peek() -> _GToken:
|
|
530
|
+
return tokens[pos[0]]
|
|
531
|
+
|
|
532
|
+
def advance() -> _GToken:
|
|
533
|
+
tok = tokens[pos[0]]
|
|
534
|
+
pos[0] += 1
|
|
535
|
+
return tok
|
|
536
|
+
|
|
537
|
+
def parse_or() -> GuardExpression:
|
|
538
|
+
left = parse_and()
|
|
539
|
+
while peek().type == "ident" and peek().value == "or":
|
|
540
|
+
advance()
|
|
541
|
+
right = parse_and()
|
|
542
|
+
left = GuardOr(left=left, right=right)
|
|
543
|
+
return left
|
|
544
|
+
|
|
545
|
+
def parse_and() -> GuardExpression:
|
|
546
|
+
left = parse_not()
|
|
547
|
+
while peek().type == "ident" and peek().value == "and":
|
|
548
|
+
advance()
|
|
549
|
+
right = parse_not()
|
|
550
|
+
left = GuardAnd(left=left, right=right)
|
|
551
|
+
return left
|
|
552
|
+
|
|
553
|
+
def parse_not() -> GuardExpression:
|
|
554
|
+
if peek().type == "ident" and peek().value == "not":
|
|
555
|
+
advance()
|
|
556
|
+
return GuardNot(expr=parse_primary())
|
|
557
|
+
return parse_primary()
|
|
558
|
+
|
|
559
|
+
def parse_primary() -> GuardExpression:
|
|
560
|
+
tok = peek()
|
|
561
|
+
|
|
562
|
+
# Parenthesized expression
|
|
563
|
+
if tok.type == "lparen":
|
|
564
|
+
advance()
|
|
565
|
+
expr = parse_or()
|
|
566
|
+
if peek().type == "rparen":
|
|
567
|
+
advance()
|
|
568
|
+
return expr
|
|
569
|
+
|
|
570
|
+
# Literals
|
|
571
|
+
if tok.type == "ident" and tok.value == "true":
|
|
572
|
+
advance()
|
|
573
|
+
return GuardTrue()
|
|
574
|
+
if tok.type == "ident" and tok.value == "false":
|
|
575
|
+
advance()
|
|
576
|
+
return GuardFalse()
|
|
577
|
+
|
|
578
|
+
# Variable path, possibly followed by comparison
|
|
579
|
+
var_path = parse_var_path()
|
|
580
|
+
|
|
581
|
+
# Check for "is null" / "is not null"
|
|
582
|
+
if peek().type == "ident" and peek().value == "is":
|
|
583
|
+
advance()
|
|
584
|
+
if peek().type == "ident" and peek().value == "not":
|
|
585
|
+
advance()
|
|
586
|
+
if peek().type == "ident" and peek().value == "null":
|
|
587
|
+
advance()
|
|
588
|
+
return GuardNullcheck(expr=var_path, is_null=False)
|
|
589
|
+
if peek().type == "ident" and peek().value == "null":
|
|
590
|
+
advance()
|
|
591
|
+
return GuardNullcheck(expr=var_path, is_null=True)
|
|
592
|
+
|
|
593
|
+
# Comparison operator
|
|
594
|
+
if peek().type == "op":
|
|
595
|
+
op = advance().value
|
|
596
|
+
right = parse_value()
|
|
597
|
+
# Special case: != null and == null
|
|
598
|
+
if right.type == "null":
|
|
599
|
+
return GuardNullcheck(expr=var_path, is_null=(op == "=="))
|
|
600
|
+
return GuardCompare(op=_map_op(op), left=var_path, right=right)
|
|
601
|
+
|
|
602
|
+
# Bare variable = truthy check (not null)
|
|
603
|
+
return GuardNullcheck(expr=var_path, is_null=False)
|
|
604
|
+
|
|
605
|
+
def parse_var_path() -> VariableRef:
|
|
606
|
+
parts: list[str] = []
|
|
607
|
+
if peek().type == "ident":
|
|
608
|
+
parts.append(advance().value)
|
|
609
|
+
while peek().type == "dot":
|
|
610
|
+
advance()
|
|
611
|
+
if peek().type == "ident":
|
|
612
|
+
parts.append(advance().value)
|
|
613
|
+
return VariableRef(path=parts)
|
|
614
|
+
|
|
615
|
+
def parse_value() -> ValueRef:
|
|
616
|
+
tok = peek()
|
|
617
|
+
if tok.type == "number":
|
|
618
|
+
advance()
|
|
619
|
+
num = float(tok.value)
|
|
620
|
+
if num == int(num):
|
|
621
|
+
num = int(num)
|
|
622
|
+
return ValueRef(type="number", value=num)
|
|
623
|
+
if tok.type == "string":
|
|
624
|
+
advance()
|
|
625
|
+
return ValueRef(type="string", value=tok.value)
|
|
626
|
+
if tok.type == "ident":
|
|
627
|
+
advance()
|
|
628
|
+
if tok.value == "null":
|
|
629
|
+
return ValueRef(type="null", value=None)
|
|
630
|
+
if tok.value == "true":
|
|
631
|
+
return ValueRef(type="boolean", value=True)
|
|
632
|
+
if tok.value == "false":
|
|
633
|
+
return ValueRef(type="boolean", value=False)
|
|
634
|
+
return ValueRef(type="string", value=tok.value)
|
|
635
|
+
advance()
|
|
636
|
+
return ValueRef(type="null", value=None)
|
|
637
|
+
|
|
638
|
+
return parse_or()
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def _map_op(op: str) -> str:
|
|
642
|
+
"""Map operator string to internal op name."""
|
|
643
|
+
return {
|
|
644
|
+
"==": "eq",
|
|
645
|
+
"!=": "ne",
|
|
646
|
+
"<": "lt",
|
|
647
|
+
">": "gt",
|
|
648
|
+
"<=": "le",
|
|
649
|
+
">=": "ge",
|
|
650
|
+
}.get(op, "eq")
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
def _parse_machine_elements(elements: list[_MdElement]) -> MachineDef:
|
|
654
|
+
"""Parse a single machine from already-split elements."""
|
|
655
|
+
machine_name = "unknown"
|
|
656
|
+
context: dict[str, Any] = {}
|
|
657
|
+
events: list[str] = []
|
|
658
|
+
transitions: list[Transition] = []
|
|
659
|
+
guards: dict[str, GuardExpression] = {}
|
|
660
|
+
actions: list[ActionSignature] = []
|
|
661
|
+
effects: list[EffectDef] = []
|
|
662
|
+
state_entries: list[_MdStateEntry] = []
|
|
663
|
+
current_state_entry: _MdStateEntry | None = None
|
|
664
|
+
|
|
665
|
+
i = 0
|
|
666
|
+
while i < len(elements):
|
|
667
|
+
el = elements[i]
|
|
668
|
+
|
|
669
|
+
if isinstance(el, _MdHeading):
|
|
670
|
+
# Machine heading
|
|
671
|
+
if el.level == 1 and el.text.startswith("machine "):
|
|
672
|
+
machine_name = el.text[8:].strip()
|
|
673
|
+
current_state_entry = None
|
|
674
|
+
i += 1
|
|
675
|
+
continue
|
|
676
|
+
|
|
677
|
+
# Section headings
|
|
678
|
+
section_name = el.text.lower()
|
|
679
|
+
if section_name in ("context", "events", "transitions", "guards", "actions", "effects"):
|
|
680
|
+
current_state_entry = None
|
|
681
|
+
next_el = elements[i + 1] if i + 1 < len(elements) else None
|
|
682
|
+
|
|
683
|
+
if section_name == "context" and isinstance(next_el, _MdTable):
|
|
684
|
+
fi = _find_column_index(next_el.headers, "field")
|
|
685
|
+
di = _find_column_index(next_el.headers, "default")
|
|
686
|
+
for row in next_el.rows:
|
|
687
|
+
name = row[fi].strip() if fi >= 0 and fi < len(row) else ""
|
|
688
|
+
default_str = row[di].strip() if di >= 0 and di < len(row) else ""
|
|
689
|
+
default_value: Any = None
|
|
690
|
+
if default_str:
|
|
691
|
+
if default_str.isdigit():
|
|
692
|
+
default_value = int(default_str)
|
|
693
|
+
elif re.match(r"^\d+\.\d+$", default_str):
|
|
694
|
+
default_value = float(default_str)
|
|
695
|
+
elif default_str in ("true", "false"):
|
|
696
|
+
default_value = default_str == "true"
|
|
697
|
+
elif default_str.startswith('"') or default_str.startswith("'"):
|
|
698
|
+
default_value = default_str.strip("\"'")
|
|
699
|
+
else:
|
|
700
|
+
default_value = default_str
|
|
701
|
+
context[name] = default_value
|
|
702
|
+
i += 2
|
|
703
|
+
continue
|
|
704
|
+
|
|
705
|
+
elif section_name == "events" and isinstance(next_el, _MdBulletList):
|
|
706
|
+
for item in next_el.items:
|
|
707
|
+
for name in [n.strip() for n in item.split(",") if n.strip()]:
|
|
708
|
+
events.append(name)
|
|
709
|
+
i += 2
|
|
710
|
+
continue
|
|
711
|
+
|
|
712
|
+
elif section_name == "transitions" and isinstance(next_el, _MdTable):
|
|
713
|
+
si = _find_column_index(next_el.headers, "source")
|
|
714
|
+
ei = _find_column_index(next_el.headers, "event")
|
|
715
|
+
gi = _find_column_index(next_el.headers, "guard")
|
|
716
|
+
ti = _find_column_index(next_el.headers, "target")
|
|
717
|
+
ai = _find_column_index(next_el.headers, "action")
|
|
718
|
+
for row in next_el.rows:
|
|
719
|
+
source = row[si].strip() if si >= 0 and si < len(row) else ""
|
|
720
|
+
event = row[ei].strip() if ei >= 0 and ei < len(row) else ""
|
|
721
|
+
guard_str = row[gi].strip() if gi >= 0 and gi < len(row) else ""
|
|
722
|
+
target = row[ti].strip() if ti >= 0 and ti < len(row) else ""
|
|
723
|
+
action_str = row[ai].strip() if ai >= 0 and ai < len(row) else ""
|
|
724
|
+
transitions.append(Transition(
|
|
725
|
+
source=source,
|
|
726
|
+
event=event,
|
|
727
|
+
guard=guard_str if guard_str else None,
|
|
728
|
+
target=target,
|
|
729
|
+
action=action_str if action_str and action_str != "_" else None,
|
|
730
|
+
))
|
|
731
|
+
i += 2
|
|
732
|
+
continue
|
|
733
|
+
|
|
734
|
+
elif section_name == "guards" and isinstance(next_el, _MdTable):
|
|
735
|
+
ni = _find_column_index(next_el.headers, "name")
|
|
736
|
+
ei = _find_column_index(next_el.headers, "expression")
|
|
737
|
+
for row in next_el.rows:
|
|
738
|
+
name = row[ni].strip() if ni >= 0 and ni < len(row) else ""
|
|
739
|
+
expr_str = _strip_backticks(row[ei].strip() if ei >= 0 and ei < len(row) else "")
|
|
740
|
+
guards[name] = _parse_guard_expression(expr_str)
|
|
741
|
+
i += 2
|
|
742
|
+
continue
|
|
743
|
+
|
|
744
|
+
elif section_name == "actions" and isinstance(next_el, _MdTable):
|
|
745
|
+
ni = _find_column_index(next_el.headers, "name")
|
|
746
|
+
si = _find_column_index(next_el.headers, "signature")
|
|
747
|
+
for row in next_el.rows:
|
|
748
|
+
name = row[ni].strip() if ni >= 0 and ni < len(row) else ""
|
|
749
|
+
sig = _strip_backticks(row[si].strip() if si >= 0 and si < len(row) else "")
|
|
750
|
+
actions.append(_parse_md_action_signature(name, sig))
|
|
751
|
+
i += 2
|
|
752
|
+
continue
|
|
753
|
+
|
|
754
|
+
elif section_name == "effects" and isinstance(next_el, _MdTable):
|
|
755
|
+
ni = _find_column_index(next_el.headers, "name")
|
|
756
|
+
ii = _find_column_index(next_el.headers, "input")
|
|
757
|
+
oi = _find_column_index(next_el.headers, "output")
|
|
758
|
+
for row in next_el.rows:
|
|
759
|
+
name = row[ni].strip() if ni >= 0 and ni < len(row) else ""
|
|
760
|
+
input_str = _strip_backticks(row[ii].strip() if ii >= 0 and ii < len(row) else "")
|
|
761
|
+
output_str = _strip_backticks(row[oi].strip() if oi >= 0 and oi < len(row) else "")
|
|
762
|
+
if name:
|
|
763
|
+
effects.append(EffectDef(name=name, input=input_str, output=output_str))
|
|
764
|
+
i += 2
|
|
765
|
+
continue
|
|
766
|
+
|
|
767
|
+
i += 1
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
# State heading
|
|
771
|
+
state_match = re.match(r"^state\s+(\w+)(.*)", el.text)
|
|
772
|
+
if state_match:
|
|
773
|
+
annot = _parse_md_annotations(state_match.group(2).strip())
|
|
774
|
+
current_state_entry = _MdStateEntry(
|
|
775
|
+
entry_type="state",
|
|
776
|
+
level=el.level,
|
|
777
|
+
name=state_match.group(1),
|
|
778
|
+
is_initial=annot["is_initial"],
|
|
779
|
+
is_final=annot["is_final"],
|
|
780
|
+
is_parallel=annot["is_parallel"],
|
|
781
|
+
sync_strategy=annot.get("sync_strategy"),
|
|
782
|
+
)
|
|
783
|
+
state_entries.append(current_state_entry)
|
|
784
|
+
i += 1
|
|
785
|
+
continue
|
|
786
|
+
|
|
787
|
+
# Region heading
|
|
788
|
+
region_match = re.match(r"^region\s+(\w+)$", el.text)
|
|
789
|
+
if region_match:
|
|
790
|
+
current_state_entry = None
|
|
791
|
+
state_entries.append(_MdStateEntry(
|
|
792
|
+
entry_type="region",
|
|
793
|
+
level=el.level,
|
|
794
|
+
name=region_match.group(1),
|
|
795
|
+
))
|
|
796
|
+
i += 1
|
|
797
|
+
continue
|
|
798
|
+
|
|
799
|
+
current_state_entry = None
|
|
800
|
+
i += 1
|
|
801
|
+
continue
|
|
802
|
+
|
|
803
|
+
# Content belonging to current state
|
|
804
|
+
if current_state_entry:
|
|
805
|
+
if isinstance(el, _MdBlockquote):
|
|
806
|
+
current_state_entry.description = el.text
|
|
807
|
+
elif isinstance(el, _MdBulletList):
|
|
808
|
+
for item in el.items:
|
|
809
|
+
_parse_md_state_bullet(current_state_entry, item)
|
|
810
|
+
|
|
811
|
+
i += 1
|
|
812
|
+
|
|
813
|
+
# Build state hierarchy
|
|
814
|
+
base_level = state_entries[0].level if state_entries else 2
|
|
815
|
+
states, _ = _build_md_states_at_level(state_entries, 0, base_level)
|
|
816
|
+
|
|
817
|
+
return MachineDef(
|
|
818
|
+
name=machine_name,
|
|
819
|
+
context=context,
|
|
820
|
+
events=events,
|
|
821
|
+
states=states,
|
|
822
|
+
transitions=transitions,
|
|
823
|
+
guards=guards,
|
|
824
|
+
actions=actions,
|
|
825
|
+
effects=effects,
|
|
826
|
+
)
|
|
827
|
+
|
|
828
|
+
|
|
829
|
+
def parse_orca_md(source: str) -> MachineDef:
|
|
830
|
+
"""
|
|
831
|
+
Parse Orca markdown (.orca.md) format into a MachineDef.
|
|
832
|
+
For multi-machine files, returns the first machine.
|
|
833
|
+
"""
|
|
834
|
+
elements = _parse_markdown_structure(source)
|
|
835
|
+
|
|
836
|
+
# Check for separators (multi-machine file)
|
|
837
|
+
has_separators = any(isinstance(el, _MdSeparator) for el in elements)
|
|
838
|
+
if has_separators:
|
|
839
|
+
# Split by separators and parse the first machine
|
|
840
|
+
chunks: list[list[_MdElement]] = []
|
|
841
|
+
current_chunk: list[_MdElement] = []
|
|
842
|
+
for el in elements:
|
|
843
|
+
if isinstance(el, _MdSeparator):
|
|
844
|
+
if current_chunk:
|
|
845
|
+
chunks.append(current_chunk)
|
|
846
|
+
current_chunk = []
|
|
847
|
+
else:
|
|
848
|
+
current_chunk.append(el)
|
|
849
|
+
if current_chunk:
|
|
850
|
+
chunks.append(current_chunk)
|
|
851
|
+
return _parse_machine_elements(chunks[0]) if chunks else _parse_machine_elements(elements)
|
|
852
|
+
|
|
853
|
+
return _parse_machine_elements(elements)
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
def parse_orca_md_multi(source: str) -> list[MachineDef]:
|
|
857
|
+
"""
|
|
858
|
+
Parse Orca markdown (.orca.md) format into multiple MachineDefs.
|
|
859
|
+
For single-machine files, returns a list with one element.
|
|
860
|
+
"""
|
|
861
|
+
elements = _parse_markdown_structure(source)
|
|
862
|
+
|
|
863
|
+
# Check for separators (multi-machine file)
|
|
864
|
+
has_separators = any(isinstance(el, _MdSeparator) for el in elements)
|
|
865
|
+
if has_separators:
|
|
866
|
+
# Split by separators
|
|
867
|
+
chunks: list[list[_MdElement]] = []
|
|
868
|
+
current_chunk: list[_MdElement] = []
|
|
869
|
+
for el in elements:
|
|
870
|
+
if isinstance(el, _MdSeparator):
|
|
871
|
+
if current_chunk:
|
|
872
|
+
chunks.append(current_chunk)
|
|
873
|
+
current_chunk = []
|
|
874
|
+
else:
|
|
875
|
+
current_chunk.append(el)
|
|
876
|
+
if current_chunk:
|
|
877
|
+
chunks.append(current_chunk)
|
|
878
|
+
return [_parse_machine_elements(chunk) for chunk in chunks]
|
|
879
|
+
|
|
880
|
+
return [_parse_machine_elements(elements)]
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def parse_orca_auto(source: str, filename: str | None = None) -> MachineDef:
|
|
884
|
+
"""
|
|
885
|
+
Auto-detect format and parse Orca machine definition.
|
|
886
|
+
Uses filename extension if provided, otherwise sniffs content.
|
|
887
|
+
"""
|
|
888
|
+
if filename and (filename.endswith(".orca.md") or filename.endswith(".md")):
|
|
889
|
+
return parse_orca_md(source)
|
|
890
|
+
# Content sniffing: markdown starts with # heading
|
|
891
|
+
if re.search(r"^\s*#\s+machine\s+", source, re.MULTILINE):
|
|
892
|
+
return parse_orca_md(source)
|
|
893
|
+
# Fallback to markdown for any other case
|
|
894
|
+
return parse_orca_md(source)
|