orca-runtime-python 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,894 @@
1
+ """
2
+ Orca Markdown parser.
3
+
4
+ Parses Orca machine definition text in markdown (.orca.md) format
5
+ into MachineDef objects. Supports hierarchical and parallel states.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import re
11
+ from dataclasses import dataclass, field
12
+ from typing import Any
13
+
14
+ from .types import (
15
+ MachineDef,
16
+ StateDef,
17
+ Transition,
18
+ GuardDef,
19
+ ActionSignature,
20
+ EffectDef,
21
+ GuardExpression,
22
+ GuardTrue,
23
+ GuardFalse,
24
+ GuardCompare,
25
+ GuardAnd,
26
+ GuardOr,
27
+ GuardNot,
28
+ GuardNullcheck,
29
+ VariableRef,
30
+ ValueRef,
31
+ RegionDef,
32
+ ParallelDef,
33
+ InvokeDef,
34
+ )
35
+
36
+
37
+ class ParseError(Exception):
38
+ """Raised when parsing fails."""
39
+ pass
40
+
41
+
42
+ # ============================================================
43
+ # Markdown (.orca.md) Parser
44
+ # ============================================================
45
+
46
+ @dataclass
47
+ class _MdHeading:
48
+ kind: str = "heading"
49
+ level: int = 0
50
+ text: str = ""
51
+
52
+
53
+ @dataclass
54
+ class _MdTable:
55
+ kind: str = "table"
56
+ headers: list[str] = field(default_factory=list)
57
+ rows: list[list[str]] = field(default_factory=list)
58
+
59
+
60
+ @dataclass
61
+ class _MdBulletList:
62
+ kind: str = "bullets"
63
+ items: list[str] = field(default_factory=list)
64
+
65
+
66
+ @dataclass
67
+ class _MdBlockquote:
68
+ kind: str = "blockquote"
69
+ text: str = ""
70
+
71
+
72
+ @dataclass
73
+ class _MdSeparator:
74
+ """Machine separator (--- between machines in multi-machine files)."""
75
+ kind: str = "separator"
76
+
77
+
78
+ _MdElement = _MdHeading | _MdTable | _MdBulletList | _MdBlockquote | _MdSeparator
79
+
80
+
81
+ def _parse_markdown_structure(source: str) -> list[_MdElement]:
82
+ """Phase 1: Parse markdown into structural elements."""
83
+ lines = source.split("\n")
84
+ elements: list[_MdElement] = []
85
+ i = 0
86
+
87
+ while i < len(lines):
88
+ trimmed = lines[i].strip()
89
+ if not trimmed:
90
+ i += 1
91
+ continue
92
+
93
+ # Skip fenced code blocks
94
+ if trimmed.startswith("```"):
95
+ i += 1
96
+ while i < len(lines) and not lines[i].strip().startswith("```"):
97
+ i += 1
98
+ if i < len(lines):
99
+ i += 1
100
+ continue
101
+
102
+ # Heading
103
+ heading_match = re.match(r"^(#{1,6})\s+(.+)$", trimmed)
104
+ if heading_match:
105
+ elements.append(_MdHeading(
106
+ level=len(heading_match.group(1)),
107
+ text=heading_match.group(2).strip(),
108
+ ))
109
+ i += 1
110
+ continue
111
+
112
+ # Separator between machines (--- on its own line)
113
+ if re.match(r"^---+$", trimmed):
114
+ elements.append(_MdSeparator())
115
+ i += 1
116
+ continue
117
+
118
+ # Blockquote
119
+ if trimmed.startswith(">"):
120
+ quote_lines: list[str] = []
121
+ while i < len(lines) and lines[i].strip().startswith(">"):
122
+ quote_lines.append(re.sub(r"^>\s*", "", lines[i].strip()))
123
+ i += 1
124
+ elements.append(_MdBlockquote(text="\n".join(quote_lines)))
125
+ continue
126
+
127
+ # Table
128
+ if trimmed.startswith("|"):
129
+ table_lines: list[str] = []
130
+ while i < len(lines) and lines[i].strip().startswith("|"):
131
+ table_lines.append(lines[i].strip())
132
+ i += 1
133
+ if len(table_lines) >= 2:
134
+ headers = _split_table_row(table_lines[0])
135
+ is_separator = bool(re.match(r"^\|[\s\-:|]+\|$", table_lines[1]))
136
+ data_start = 2 if is_separator else 1
137
+ rows = [_split_table_row(table_lines[j]) for j in range(data_start, len(table_lines))]
138
+ elements.append(_MdTable(headers=headers, rows=rows))
139
+ continue
140
+
141
+ # Bullet list
142
+ if trimmed.startswith("- "):
143
+ items: list[str] = []
144
+ while i < len(lines) and lines[i].strip().startswith("- "):
145
+ items.append(lines[i].strip()[2:].strip())
146
+ i += 1
147
+ elements.append(_MdBulletList(items=items))
148
+ continue
149
+
150
+ # Skip other text
151
+ i += 1
152
+
153
+ return elements
154
+
155
+
156
+ def _split_table_row(line: str) -> list[str]:
157
+ """Split a markdown table row into cells."""
158
+ cells = [c.strip() for c in line.split("|")]
159
+ if cells and cells[0] == "":
160
+ cells.pop(0)
161
+ if cells and cells[-1] == "":
162
+ cells.pop()
163
+ return cells
164
+
165
+
166
+ def _strip_backticks(text: str) -> str:
167
+ """Remove surrounding backticks from a string."""
168
+ if text.startswith("`") and text.endswith("`"):
169
+ return text[1:-1]
170
+ return text
171
+
172
+
173
+ def _find_column_index(headers: list[str], name: str) -> int:
174
+ """Find column index by header name (case-insensitive)."""
175
+ name_lower = name.lower()
176
+ for idx, h in enumerate(headers):
177
+ if h.lower() == name_lower:
178
+ return idx
179
+ return -1
180
+
181
+
182
+ def _parse_md_annotations(text: str) -> dict[str, Any]:
183
+ """Parse state annotations like [initial, final, parallel, sync: all-final]."""
184
+ result: dict[str, Any] = {
185
+ "is_initial": False,
186
+ "is_final": False,
187
+ "is_parallel": False,
188
+ }
189
+ bracket_match = re.search(r"\[(.+)\]", text)
190
+ if bracket_match:
191
+ for part in [p.strip() for p in bracket_match.group(1).split(",")]:
192
+ if part == "initial":
193
+ result["is_initial"] = True
194
+ elif part == "final":
195
+ result["is_final"] = True
196
+ elif part == "parallel":
197
+ result["is_parallel"] = True
198
+ elif part.startswith("sync:"):
199
+ v = part[5:].strip().replace("_", "-")
200
+ if v in ("all-final", "any-final", "custom"):
201
+ result["sync_strategy"] = v
202
+ return result
203
+
204
+
205
+ @dataclass
206
+ class _MdStateEntry:
207
+ entry_type: str # 'state' or 'region'
208
+ level: int = 0
209
+ name: str = ""
210
+ is_initial: bool = False
211
+ is_final: bool = False
212
+ is_parallel: bool = False
213
+ sync_strategy: str | None = None
214
+ description: str | None = None
215
+ on_entry: str | None = None
216
+ on_exit: str | None = None
217
+ on_done: str | None = None
218
+ timeout: dict[str, str] | None = None
219
+ ignored_events: list[str] = field(default_factory=list)
220
+ invoke: InvokeDef | None = None
221
+ _pending_on_error: str | None = None # temp: on_error parsed before invoke
222
+
223
+
224
+ def _parse_md_state_bullet(entry: _MdStateEntry, text: str) -> None:
225
+ """Parse a bullet list item belonging to a state."""
226
+ if text.startswith("on_entry:"):
227
+ val = text[9:].strip()
228
+ if val.startswith("->"):
229
+ val = val[2:].strip()
230
+ entry.on_entry = val
231
+ elif text.startswith("on_exit:"):
232
+ val = text[8:].strip()
233
+ if val.startswith("->"):
234
+ val = val[2:].strip()
235
+ entry.on_exit = val
236
+ elif text.startswith("timeout:"):
237
+ rest = text[8:].strip()
238
+ arrow_idx = rest.find("->")
239
+ if arrow_idx != -1:
240
+ entry.timeout = {
241
+ "duration": rest[:arrow_idx].strip(),
242
+ "target": rest[arrow_idx + 2:].strip(),
243
+ }
244
+ elif text.startswith("ignore:"):
245
+ names = [e.strip() for e in text[7:].strip().split(",") if e.strip()]
246
+ entry.ignored_events.extend(names)
247
+ elif text.startswith("on_done:"):
248
+ val = text[8:].strip()
249
+ if val.startswith("->"):
250
+ val = val[2:].strip()
251
+ entry.on_done = val
252
+ # Also set on_done on invoke if invoke exists
253
+ if entry.invoke:
254
+ entry.invoke.on_done = val
255
+ elif text.startswith("on_error:"):
256
+ val = text[9:].strip()
257
+ if val.startswith("->"):
258
+ val = val[2:].strip()
259
+ # Store temporarily until invoke is parsed
260
+ entry._pending_on_error = val
261
+ if entry.invoke:
262
+ entry.invoke.on_error = val
263
+ elif text.startswith("invoke:"):
264
+ rest = text[7:].strip() # "MachineName" or "MachineName input: { ... }"
265
+ machine_name = rest
266
+ input_map: dict[str, str] | None = None
267
+
268
+ # Check for input mapping
269
+ input_match = re.search(r"input:\s*\{([^}]+)\}", rest)
270
+ if input_match:
271
+ machine_name = rest[:input_match.start()].strip()
272
+ input_str = input_match.group(1)
273
+ input_map = {}
274
+ for pair in input_str.split(","):
275
+ if ":" in pair:
276
+ key, value = pair.split(":", 1)
277
+ input_map[key.strip()] = value.strip()
278
+
279
+ entry.invoke = InvokeDef(machine=machine_name, input=input_map)
280
+ # Apply pending on_error if we already parsed it
281
+ if entry._pending_on_error:
282
+ entry.invoke.on_error = entry._pending_on_error
283
+
284
+
285
+ def _build_md_states_at_level(
286
+ entries: list[_MdStateEntry], start_idx: int, level: int, parent_name: str | None = None
287
+ ) -> tuple[list[StateDef], int]:
288
+ """Build state hierarchy from flat entries at a given heading level."""
289
+ states: list[StateDef] = []
290
+ i = start_idx
291
+
292
+ while i < len(entries):
293
+ entry = entries[i]
294
+ if entry.level < level:
295
+ break
296
+ if entry.entry_type == "region":
297
+ break
298
+ if entry.level > level:
299
+ i += 1
300
+ continue
301
+
302
+ state = StateDef(
303
+ name=entry.name,
304
+ is_initial=entry.is_initial,
305
+ is_final=entry.is_final,
306
+ )
307
+ if parent_name:
308
+ state.parent = parent_name
309
+ if entry.description:
310
+ state.description = entry.description
311
+ if entry.on_entry:
312
+ state.on_entry = entry.on_entry
313
+ if entry.on_exit:
314
+ state.on_exit = entry.on_exit
315
+ if entry.on_done:
316
+ state.on_done = entry.on_done
317
+ if entry.timeout:
318
+ state.timeout = entry.timeout
319
+ if entry.ignored_events:
320
+ state.ignored_events = list(entry.ignored_events)
321
+ if entry.invoke:
322
+ state.invoke = entry.invoke
323
+
324
+ i += 1
325
+
326
+ if entry.is_parallel:
327
+ parallel_def, i = _build_md_parallel_regions(
328
+ entries, i, level + 1, entry.name, entry.sync_strategy
329
+ )
330
+ state.parallel = parallel_def
331
+ elif i < len(entries) and entries[i].level == level + 1 and entries[i].entry_type == "state":
332
+ child_states, i = _build_md_states_at_level(entries, i, level + 1, entry.name)
333
+ state.contains = child_states
334
+
335
+ states.append(state)
336
+
337
+ return states, i
338
+
339
+
340
+ def _build_md_parallel_regions(
341
+ entries: list[_MdStateEntry], start_idx: int, region_level: int,
342
+ parent_name: str, sync_strategy: str | None = None
343
+ ) -> tuple[ParallelDef, int]:
344
+ """Build parallel regions from flat entries."""
345
+ regions: list[RegionDef] = []
346
+ i = start_idx
347
+
348
+ while i < len(entries) and entries[i].level >= region_level:
349
+ if entries[i].entry_type != "region" or entries[i].level != region_level:
350
+ break
351
+
352
+ region_name = entries[i].name
353
+ i += 1
354
+
355
+ region_states: list[StateDef] = []
356
+ while i < len(entries) and entries[i].level > region_level:
357
+ if entries[i].entry_type == "state" and entries[i].level == region_level + 1:
358
+ e = entries[i]
359
+ s = StateDef(
360
+ name=e.name,
361
+ is_initial=e.is_initial,
362
+ is_final=e.is_final,
363
+ )
364
+ s.parent = f"{parent_name}.{region_name}"
365
+ if e.description:
366
+ s.description = e.description
367
+ if e.on_entry:
368
+ s.on_entry = e.on_entry
369
+ if e.on_exit:
370
+ s.on_exit = e.on_exit
371
+ if e.timeout:
372
+ s.timeout = e.timeout
373
+ if e.ignored_events:
374
+ s.ignored_events = list(e.ignored_events)
375
+ if e.invoke:
376
+ s.invoke = e.invoke
377
+ if e.on_done:
378
+ s.on_done = e.on_done
379
+ region_states.append(s)
380
+ i += 1
381
+ else:
382
+ break
383
+
384
+ regions.append(RegionDef(name=region_name, states=region_states))
385
+
386
+ return ParallelDef(regions=regions, sync=sync_strategy), i
387
+
388
+
389
+ def _parse_md_action_signature(name: str, text: str) -> ActionSignature:
390
+ """Parse an action signature string like '(ctx, event) -> Context + Effect<T>'."""
391
+ text = text.strip()
392
+ paren_start = text.find("(")
393
+ paren_end = text.find(")")
394
+ params_str = text[paren_start + 1:paren_end].strip()
395
+
396
+ parameters: list[str] = []
397
+ if params_str:
398
+ parameters = [p.strip().split(":")[0].strip() for p in params_str.split(",")]
399
+
400
+ after_paren = text[paren_end + 1:].strip()
401
+ arrow_idx = after_paren.find("->")
402
+ return_part = after_paren[arrow_idx + 2:].strip()
403
+
404
+ return_type = "Context"
405
+ has_effect = False
406
+ effect_type: str | None = None
407
+
408
+ plus_idx = return_part.find("+")
409
+ if plus_idx != -1:
410
+ return_type = return_part[:plus_idx].strip()
411
+ effect_match = re.search(r"Effect<(\w+)>", return_part[plus_idx + 1:])
412
+ if effect_match:
413
+ has_effect = True
414
+ effect_type = effect_match.group(1)
415
+ else:
416
+ return_type = return_part
417
+
418
+ return ActionSignature(
419
+ name=name,
420
+ parameters=parameters,
421
+ return_type=return_type,
422
+ has_effect=has_effect,
423
+ effect_type=effect_type,
424
+ )
425
+
426
+
427
+ # --- Guard expression parser (shared) ---
428
+ # Grammar:
429
+ # expr = or_expr
430
+ # or_expr = and_expr ('or' and_expr)*
431
+ # and_expr = not_expr ('and' not_expr)*
432
+ # not_expr = 'not' primary | primary
433
+ # primary = '(' expr ')' | 'true' | 'false' | comparison
434
+ # comparison = var_path (op value)?
435
+ # var_path = IDENT ('.' IDENT)*
436
+ # op = '==' | '!=' | '<' | '>' | '<=' | '>='
437
+ # value = NUMBER | STRING | 'true' | 'false' | 'null'
438
+
439
+ @dataclass
440
+ class _GToken:
441
+ type: str # ident, number, string, op, lparen, rparen, dot, eof
442
+ value: str
443
+
444
+
445
+ def _tokenize_guard(input_str: str) -> list[_GToken]:
446
+ """Tokenize a guard expression string."""
447
+ tokens: list[_GToken] = []
448
+ i = 0
449
+ n = len(input_str)
450
+
451
+ while i < n:
452
+ c = input_str[i]
453
+
454
+ # Skip whitespace
455
+ if c.isspace():
456
+ i += 1
457
+ continue
458
+
459
+ # String literal
460
+ if c in ('"', "'"):
461
+ quote = c
462
+ s = ""
463
+ i += 1
464
+ while i < n and input_str[i] != quote:
465
+ s += input_str[i]
466
+ i += 1
467
+ i += 1 # skip closing quote
468
+ tokens.append(_GToken("string", s))
469
+ continue
470
+
471
+ # Two-char operators
472
+ if i + 1 < n:
473
+ two = input_str[i:i + 2]
474
+ if two in ("==", "!=", "<=", ">="):
475
+ tokens.append(_GToken("op", two))
476
+ i += 2
477
+ continue
478
+
479
+ # Single-char operators
480
+ if c in ("<", ">"):
481
+ tokens.append(_GToken("op", c))
482
+ i += 1
483
+ continue
484
+
485
+ if c == "(":
486
+ tokens.append(_GToken("lparen", "("))
487
+ i += 1
488
+ continue
489
+ if c == ")":
490
+ tokens.append(_GToken("rparen", ")"))
491
+ i += 1
492
+ continue
493
+ if c == ".":
494
+ tokens.append(_GToken("dot", "."))
495
+ i += 1
496
+ continue
497
+
498
+ # Number (including negative)
499
+ if c.isdigit() or (c == "-" and i + 1 < n and input_str[i + 1].isdigit()):
500
+ num = c
501
+ i += 1
502
+ while i < n and (input_str[i].isdigit() or input_str[i] == "."):
503
+ num += input_str[i]
504
+ i += 1
505
+ tokens.append(_GToken("number", num))
506
+ continue
507
+
508
+ # Identifier
509
+ if c.isalpha() or c == "_":
510
+ ident = ""
511
+ while i < n and (input_str[i].isalnum() or input_str[i] == "_"):
512
+ ident += input_str[i]
513
+ i += 1
514
+ tokens.append(_GToken("ident", ident))
515
+ continue
516
+
517
+ # Skip unknown
518
+ i += 1
519
+
520
+ tokens.append(_GToken("eof", ""))
521
+ return tokens
522
+
523
+
524
+ def _parse_guard_expression(input_str: str) -> GuardExpression:
525
+ """Parse a guard expression string into a GuardExpression AST."""
526
+ tokens = _tokenize_guard(input_str)
527
+ pos = [0] # mutable ref for nested functions
528
+
529
+ def peek() -> _GToken:
530
+ return tokens[pos[0]]
531
+
532
+ def advance() -> _GToken:
533
+ tok = tokens[pos[0]]
534
+ pos[0] += 1
535
+ return tok
536
+
537
+ def parse_or() -> GuardExpression:
538
+ left = parse_and()
539
+ while peek().type == "ident" and peek().value == "or":
540
+ advance()
541
+ right = parse_and()
542
+ left = GuardOr(left=left, right=right)
543
+ return left
544
+
545
+ def parse_and() -> GuardExpression:
546
+ left = parse_not()
547
+ while peek().type == "ident" and peek().value == "and":
548
+ advance()
549
+ right = parse_not()
550
+ left = GuardAnd(left=left, right=right)
551
+ return left
552
+
553
+ def parse_not() -> GuardExpression:
554
+ if peek().type == "ident" and peek().value == "not":
555
+ advance()
556
+ return GuardNot(expr=parse_primary())
557
+ return parse_primary()
558
+
559
+ def parse_primary() -> GuardExpression:
560
+ tok = peek()
561
+
562
+ # Parenthesized expression
563
+ if tok.type == "lparen":
564
+ advance()
565
+ expr = parse_or()
566
+ if peek().type == "rparen":
567
+ advance()
568
+ return expr
569
+
570
+ # Literals
571
+ if tok.type == "ident" and tok.value == "true":
572
+ advance()
573
+ return GuardTrue()
574
+ if tok.type == "ident" and tok.value == "false":
575
+ advance()
576
+ return GuardFalse()
577
+
578
+ # Variable path, possibly followed by comparison
579
+ var_path = parse_var_path()
580
+
581
+ # Check for "is null" / "is not null"
582
+ if peek().type == "ident" and peek().value == "is":
583
+ advance()
584
+ if peek().type == "ident" and peek().value == "not":
585
+ advance()
586
+ if peek().type == "ident" and peek().value == "null":
587
+ advance()
588
+ return GuardNullcheck(expr=var_path, is_null=False)
589
+ if peek().type == "ident" and peek().value == "null":
590
+ advance()
591
+ return GuardNullcheck(expr=var_path, is_null=True)
592
+
593
+ # Comparison operator
594
+ if peek().type == "op":
595
+ op = advance().value
596
+ right = parse_value()
597
+ # Special case: != null and == null
598
+ if right.type == "null":
599
+ return GuardNullcheck(expr=var_path, is_null=(op == "=="))
600
+ return GuardCompare(op=_map_op(op), left=var_path, right=right)
601
+
602
+ # Bare variable = truthy check (not null)
603
+ return GuardNullcheck(expr=var_path, is_null=False)
604
+
605
+ def parse_var_path() -> VariableRef:
606
+ parts: list[str] = []
607
+ if peek().type == "ident":
608
+ parts.append(advance().value)
609
+ while peek().type == "dot":
610
+ advance()
611
+ if peek().type == "ident":
612
+ parts.append(advance().value)
613
+ return VariableRef(path=parts)
614
+
615
+ def parse_value() -> ValueRef:
616
+ tok = peek()
617
+ if tok.type == "number":
618
+ advance()
619
+ num = float(tok.value)
620
+ if num == int(num):
621
+ num = int(num)
622
+ return ValueRef(type="number", value=num)
623
+ if tok.type == "string":
624
+ advance()
625
+ return ValueRef(type="string", value=tok.value)
626
+ if tok.type == "ident":
627
+ advance()
628
+ if tok.value == "null":
629
+ return ValueRef(type="null", value=None)
630
+ if tok.value == "true":
631
+ return ValueRef(type="boolean", value=True)
632
+ if tok.value == "false":
633
+ return ValueRef(type="boolean", value=False)
634
+ return ValueRef(type="string", value=tok.value)
635
+ advance()
636
+ return ValueRef(type="null", value=None)
637
+
638
+ return parse_or()
639
+
640
+
641
+ def _map_op(op: str) -> str:
642
+ """Map operator string to internal op name."""
643
+ return {
644
+ "==": "eq",
645
+ "!=": "ne",
646
+ "<": "lt",
647
+ ">": "gt",
648
+ "<=": "le",
649
+ ">=": "ge",
650
+ }.get(op, "eq")
651
+
652
+
653
+ def _parse_machine_elements(elements: list[_MdElement]) -> MachineDef:
654
+ """Parse a single machine from already-split elements."""
655
+ machine_name = "unknown"
656
+ context: dict[str, Any] = {}
657
+ events: list[str] = []
658
+ transitions: list[Transition] = []
659
+ guards: dict[str, GuardExpression] = {}
660
+ actions: list[ActionSignature] = []
661
+ effects: list[EffectDef] = []
662
+ state_entries: list[_MdStateEntry] = []
663
+ current_state_entry: _MdStateEntry | None = None
664
+
665
+ i = 0
666
+ while i < len(elements):
667
+ el = elements[i]
668
+
669
+ if isinstance(el, _MdHeading):
670
+ # Machine heading
671
+ if el.level == 1 and el.text.startswith("machine "):
672
+ machine_name = el.text[8:].strip()
673
+ current_state_entry = None
674
+ i += 1
675
+ continue
676
+
677
+ # Section headings
678
+ section_name = el.text.lower()
679
+ if section_name in ("context", "events", "transitions", "guards", "actions", "effects"):
680
+ current_state_entry = None
681
+ next_el = elements[i + 1] if i + 1 < len(elements) else None
682
+
683
+ if section_name == "context" and isinstance(next_el, _MdTable):
684
+ fi = _find_column_index(next_el.headers, "field")
685
+ di = _find_column_index(next_el.headers, "default")
686
+ for row in next_el.rows:
687
+ name = row[fi].strip() if fi >= 0 and fi < len(row) else ""
688
+ default_str = row[di].strip() if di >= 0 and di < len(row) else ""
689
+ default_value: Any = None
690
+ if default_str:
691
+ if default_str.isdigit():
692
+ default_value = int(default_str)
693
+ elif re.match(r"^\d+\.\d+$", default_str):
694
+ default_value = float(default_str)
695
+ elif default_str in ("true", "false"):
696
+ default_value = default_str == "true"
697
+ elif default_str.startswith('"') or default_str.startswith("'"):
698
+ default_value = default_str.strip("\"'")
699
+ else:
700
+ default_value = default_str
701
+ context[name] = default_value
702
+ i += 2
703
+ continue
704
+
705
+ elif section_name == "events" and isinstance(next_el, _MdBulletList):
706
+ for item in next_el.items:
707
+ for name in [n.strip() for n in item.split(",") if n.strip()]:
708
+ events.append(name)
709
+ i += 2
710
+ continue
711
+
712
+ elif section_name == "transitions" and isinstance(next_el, _MdTable):
713
+ si = _find_column_index(next_el.headers, "source")
714
+ ei = _find_column_index(next_el.headers, "event")
715
+ gi = _find_column_index(next_el.headers, "guard")
716
+ ti = _find_column_index(next_el.headers, "target")
717
+ ai = _find_column_index(next_el.headers, "action")
718
+ for row in next_el.rows:
719
+ source = row[si].strip() if si >= 0 and si < len(row) else ""
720
+ event = row[ei].strip() if ei >= 0 and ei < len(row) else ""
721
+ guard_str = row[gi].strip() if gi >= 0 and gi < len(row) else ""
722
+ target = row[ti].strip() if ti >= 0 and ti < len(row) else ""
723
+ action_str = row[ai].strip() if ai >= 0 and ai < len(row) else ""
724
+ transitions.append(Transition(
725
+ source=source,
726
+ event=event,
727
+ guard=guard_str if guard_str else None,
728
+ target=target,
729
+ action=action_str if action_str and action_str != "_" else None,
730
+ ))
731
+ i += 2
732
+ continue
733
+
734
+ elif section_name == "guards" and isinstance(next_el, _MdTable):
735
+ ni = _find_column_index(next_el.headers, "name")
736
+ ei = _find_column_index(next_el.headers, "expression")
737
+ for row in next_el.rows:
738
+ name = row[ni].strip() if ni >= 0 and ni < len(row) else ""
739
+ expr_str = _strip_backticks(row[ei].strip() if ei >= 0 and ei < len(row) else "")
740
+ guards[name] = _parse_guard_expression(expr_str)
741
+ i += 2
742
+ continue
743
+
744
+ elif section_name == "actions" and isinstance(next_el, _MdTable):
745
+ ni = _find_column_index(next_el.headers, "name")
746
+ si = _find_column_index(next_el.headers, "signature")
747
+ for row in next_el.rows:
748
+ name = row[ni].strip() if ni >= 0 and ni < len(row) else ""
749
+ sig = _strip_backticks(row[si].strip() if si >= 0 and si < len(row) else "")
750
+ actions.append(_parse_md_action_signature(name, sig))
751
+ i += 2
752
+ continue
753
+
754
+ elif section_name == "effects" and isinstance(next_el, _MdTable):
755
+ ni = _find_column_index(next_el.headers, "name")
756
+ ii = _find_column_index(next_el.headers, "input")
757
+ oi = _find_column_index(next_el.headers, "output")
758
+ for row in next_el.rows:
759
+ name = row[ni].strip() if ni >= 0 and ni < len(row) else ""
760
+ input_str = _strip_backticks(row[ii].strip() if ii >= 0 and ii < len(row) else "")
761
+ output_str = _strip_backticks(row[oi].strip() if oi >= 0 and oi < len(row) else "")
762
+ if name:
763
+ effects.append(EffectDef(name=name, input=input_str, output=output_str))
764
+ i += 2
765
+ continue
766
+
767
+ i += 1
768
+ continue
769
+
770
+ # State heading
771
+ state_match = re.match(r"^state\s+(\w+)(.*)", el.text)
772
+ if state_match:
773
+ annot = _parse_md_annotations(state_match.group(2).strip())
774
+ current_state_entry = _MdStateEntry(
775
+ entry_type="state",
776
+ level=el.level,
777
+ name=state_match.group(1),
778
+ is_initial=annot["is_initial"],
779
+ is_final=annot["is_final"],
780
+ is_parallel=annot["is_parallel"],
781
+ sync_strategy=annot.get("sync_strategy"),
782
+ )
783
+ state_entries.append(current_state_entry)
784
+ i += 1
785
+ continue
786
+
787
+ # Region heading
788
+ region_match = re.match(r"^region\s+(\w+)$", el.text)
789
+ if region_match:
790
+ current_state_entry = None
791
+ state_entries.append(_MdStateEntry(
792
+ entry_type="region",
793
+ level=el.level,
794
+ name=region_match.group(1),
795
+ ))
796
+ i += 1
797
+ continue
798
+
799
+ current_state_entry = None
800
+ i += 1
801
+ continue
802
+
803
+ # Content belonging to current state
804
+ if current_state_entry:
805
+ if isinstance(el, _MdBlockquote):
806
+ current_state_entry.description = el.text
807
+ elif isinstance(el, _MdBulletList):
808
+ for item in el.items:
809
+ _parse_md_state_bullet(current_state_entry, item)
810
+
811
+ i += 1
812
+
813
+ # Build state hierarchy
814
+ base_level = state_entries[0].level if state_entries else 2
815
+ states, _ = _build_md_states_at_level(state_entries, 0, base_level)
816
+
817
+ return MachineDef(
818
+ name=machine_name,
819
+ context=context,
820
+ events=events,
821
+ states=states,
822
+ transitions=transitions,
823
+ guards=guards,
824
+ actions=actions,
825
+ effects=effects,
826
+ )
827
+
828
+
829
+ def parse_orca_md(source: str) -> MachineDef:
830
+ """
831
+ Parse Orca markdown (.orca.md) format into a MachineDef.
832
+ For multi-machine files, returns the first machine.
833
+ """
834
+ elements = _parse_markdown_structure(source)
835
+
836
+ # Check for separators (multi-machine file)
837
+ has_separators = any(isinstance(el, _MdSeparator) for el in elements)
838
+ if has_separators:
839
+ # Split by separators and parse the first machine
840
+ chunks: list[list[_MdElement]] = []
841
+ current_chunk: list[_MdElement] = []
842
+ for el in elements:
843
+ if isinstance(el, _MdSeparator):
844
+ if current_chunk:
845
+ chunks.append(current_chunk)
846
+ current_chunk = []
847
+ else:
848
+ current_chunk.append(el)
849
+ if current_chunk:
850
+ chunks.append(current_chunk)
851
+ return _parse_machine_elements(chunks[0]) if chunks else _parse_machine_elements(elements)
852
+
853
+ return _parse_machine_elements(elements)
854
+
855
+
856
+ def parse_orca_md_multi(source: str) -> list[MachineDef]:
857
+ """
858
+ Parse Orca markdown (.orca.md) format into multiple MachineDefs.
859
+ For single-machine files, returns a list with one element.
860
+ """
861
+ elements = _parse_markdown_structure(source)
862
+
863
+ # Check for separators (multi-machine file)
864
+ has_separators = any(isinstance(el, _MdSeparator) for el in elements)
865
+ if has_separators:
866
+ # Split by separators
867
+ chunks: list[list[_MdElement]] = []
868
+ current_chunk: list[_MdElement] = []
869
+ for el in elements:
870
+ if isinstance(el, _MdSeparator):
871
+ if current_chunk:
872
+ chunks.append(current_chunk)
873
+ current_chunk = []
874
+ else:
875
+ current_chunk.append(el)
876
+ if current_chunk:
877
+ chunks.append(current_chunk)
878
+ return [_parse_machine_elements(chunk) for chunk in chunks]
879
+
880
+ return [_parse_machine_elements(elements)]
881
+
882
+
883
+ def parse_orca_auto(source: str, filename: str | None = None) -> MachineDef:
884
+ """
885
+ Auto-detect format and parse Orca machine definition.
886
+ Uses filename extension if provided, otherwise sniffs content.
887
+ """
888
+ if filename and (filename.endswith(".orca.md") or filename.endswith(".md")):
889
+ return parse_orca_md(source)
890
+ # Content sniffing: markdown starts with # heading
891
+ if re.search(r"^\s*#\s+machine\s+", source, re.MULTILINE):
892
+ return parse_orca_md(source)
893
+ # Fallback to markdown for any other case
894
+ return parse_orca_md(source)