apc-model-parser 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,183 @@
1
+ """Parse ExprTk-style expression strings into the canonical expression IR.
2
+
3
+ This is a small, explicit tokenizer + precedence-climbing parser. It exists
4
+ instead of regex string rewrites so that operator precedence, associativity,
5
+ and conditional semantics are defined in *one* place (ADR 0003). The output is
6
+ an :data:`model_parser.ir.expr.Expr` tree that every backend lowers from.
7
+
8
+ Supported surface (the ExprTk subset used by the authoring INI format):
9
+
10
+ - numeric literals (``1``, ``1.5``, ``1e-3``);
11
+ - identifiers (symbol references);
12
+ - binary operators ``+ - * / ^`` and unary minus;
13
+ - comparisons ``< > <= >= == !=``;
14
+ - function calls ``f(a, b, ...)``;
15
+ - the ExprTk spellings ``pow(a, b)`` and ``if(c, a, b)``, normalized to ``^``
16
+ and ``ifelse`` respectively.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import re
22
+ from dataclasses import dataclass
23
+
24
+ from model_parser.ir.expr import Call, Expr, Num, Sym
25
+
26
+ # ExprTk function/operator spellings that map onto canonical IR op names.
27
+ _FUNCTION_ALIASES: dict[str, str] = {"pow": "^", "if": "ifelse"}
28
+
29
+ # Binary operators: name -> (left binding power, right binding power).
30
+ # Right binding power lower than left means right-associative (used for ``^``).
31
+ _BINARY: dict[str, tuple[int, int]] = {
32
+ "==": (10, 11),
33
+ "!=": (10, 11),
34
+ "<": (10, 11),
35
+ ">": (10, 11),
36
+ "<=": (10, 11),
37
+ ">=": (10, 11),
38
+ "+": (20, 21),
39
+ "-": (20, 21),
40
+ "*": (30, 31),
41
+ "/": (30, 31),
42
+ "^": (51, 50),
43
+ }
44
+
45
+ _UNARY_BP = 40
46
+
47
+ _TOKEN_RE = re.compile(
48
+ r"""
49
+ \s*(?:
50
+ (?P<number>\d+\.\d+(?:[eE][+-]?\d+)?|\d+(?:[eE][+-]?\d+)?|\.\d+)
51
+ | (?P<ident>[A-Za-z_]\w*)
52
+ | (?P<op><=|>=|==|!=|[-+*/^<>(),])
53
+ )
54
+ """,
55
+ re.VERBOSE,
56
+ )
57
+
58
+
59
+ class ExprParseError(ValueError):
60
+ """Raised when an expression string cannot be parsed."""
61
+
62
+
63
+ @dataclass(frozen=True)
64
+ class _Token:
65
+ kind: str # "number" | "ident" | "op" | "end"
66
+ text: str
67
+ pos: int
68
+
69
+
70
+ def _tokenize(source: str) -> list[_Token]:
71
+ tokens: list[_Token] = []
72
+ pos = 0
73
+ n = len(source)
74
+ while pos < n:
75
+ if source[pos].isspace():
76
+ pos += 1
77
+ continue
78
+ match = _TOKEN_RE.match(source, pos)
79
+ if match is None or match.start() == match.end():
80
+ raise ExprParseError(
81
+ f"unexpected character {source[pos]!r} at position {pos} in {source!r}"
82
+ )
83
+ kind = match.lastgroup
84
+ assert kind is not None
85
+ text = match.group(kind)
86
+ tokens.append(_Token(kind=kind, text=text, pos=match.start(kind)))
87
+ pos = match.end()
88
+ tokens.append(_Token(kind="end", text="", pos=n))
89
+ return tokens
90
+
91
+
92
+ class _Parser:
93
+ def __init__(self, source: str) -> None:
94
+ self.source = source
95
+ self.tokens = _tokenize(source)
96
+ self.index = 0
97
+
98
+ @property
99
+ def current(self) -> _Token:
100
+ return self.tokens[self.index]
101
+
102
+ def advance(self) -> _Token:
103
+ token = self.tokens[self.index]
104
+ self.index += 1
105
+ return token
106
+
107
+ def expect_op(self, text: str) -> None:
108
+ token = self.current
109
+ if token.kind != "op" or token.text != text:
110
+ raise ExprParseError(
111
+ f"expected {text!r} but found {token.text!r} at position "
112
+ f"{token.pos} in {self.source!r}"
113
+ )
114
+ self.advance()
115
+
116
+ def parse(self) -> Expr:
117
+ expr = self._parse_expr(0)
118
+ if self.current.kind != "end":
119
+ raise ExprParseError(
120
+ f"unexpected trailing token {self.current.text!r} at position "
121
+ f"{self.current.pos} in {self.source!r}"
122
+ )
123
+ return expr
124
+
125
+ def _parse_expr(self, min_bp: int) -> Expr:
126
+ left = self._parse_prefix()
127
+ while True:
128
+ token = self.current
129
+ if token.kind != "op" or token.text not in _BINARY:
130
+ break
131
+ left_bp, right_bp = _BINARY[token.text]
132
+ if left_bp < min_bp:
133
+ break
134
+ self.advance()
135
+ right = self._parse_expr(right_bp)
136
+ left = Call(op=token.text, args=[left, right])
137
+ return left
138
+
139
+ def _parse_prefix(self) -> Expr:
140
+ token = self.current
141
+ if token.kind == "op" and token.text == "-":
142
+ self.advance()
143
+ operand = self._parse_expr(_UNARY_BP)
144
+ return Call(op="neg", args=[operand])
145
+ if token.kind == "op" and token.text == "+":
146
+ self.advance()
147
+ return self._parse_expr(_UNARY_BP)
148
+ if token.kind == "op" and token.text == "(":
149
+ self.advance()
150
+ inner = self._parse_expr(0)
151
+ self.expect_op(")")
152
+ return inner
153
+ if token.kind == "number":
154
+ self.advance()
155
+ return Num(value=float(token.text))
156
+ if token.kind == "ident":
157
+ self.advance()
158
+ if self.current.kind == "op" and self.current.text == "(":
159
+ return self._parse_call(token.text)
160
+ return Sym(name=token.text)
161
+ raise ExprParseError(
162
+ f"unexpected token {token.text!r} at position {token.pos} in {self.source!r}"
163
+ )
164
+
165
+ def _parse_call(self, name: str) -> Expr:
166
+ self.expect_op("(")
167
+ args: list[Expr] = []
168
+ if not (self.current.kind == "op" and self.current.text == ")"):
169
+ args.append(self._parse_expr(0))
170
+ while self.current.kind == "op" and self.current.text == ",":
171
+ self.advance()
172
+ args.append(self._parse_expr(0))
173
+ self.expect_op(")")
174
+ op = _FUNCTION_ALIASES.get(name, name)
175
+ return Call(op=op, args=args)
176
+
177
+
178
+ def parse_expression(source: str) -> Expr:
179
+ """Parse a single ExprTk-style expression string into an IR expression tree."""
180
+ text = source.strip().rstrip(";").strip()
181
+ if not text:
182
+ raise ExprParseError("empty expression")
183
+ return _Parser(text).parse()
@@ -0,0 +1,204 @@
1
+ """ExprTk INI frontend: parse an INI-style model file into the canonical IR.
2
+
3
+ This implements the ``parse`` + ``normalize`` transformations for the INI
4
+ authoring format used by the existing MPC / simulation toolchain. The grammar is
5
+ section-oriented:
6
+
7
+ - ``[ModelInfo]`` — ``Name``, ``Description``, ``Version`` key/values.
8
+ - ``[Dimensions]`` — ``num_states``, ``num_inputs``, ``num_outputs``.
9
+ - ``[Parameters]`` — ``name = value`` with optional trailing ``; comment``.
10
+ - ``[StateEquationLocals]`` — ``var name := expr;`` intermediate expressions.
11
+ - ``[StateEquations]`` — ``dxN = expr`` differential equations.
12
+ - ``[OutputEquations]`` — ``yN = expr`` plus inline ``name := expr`` locals.
13
+ - ``[x0]`` / ``[u0]`` — initial values. **Dropped from the scaffold** with a
14
+ warning: initial values belong to a scenario, not the model scaffold
15
+ (org contracts §3). They bootstrap simulation but are not IR semantics.
16
+
17
+ States are named ``x0..x{n-1}``, inputs ``u0..``, and outputs ``y0..`` to match
18
+ the authoring convention; locals keep their authored names.
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import re
24
+ from dataclasses import dataclass, field
25
+ from datetime import UTC, datetime
26
+
27
+ from model_parser.frontends.expr_parser import parse_expression
28
+ from model_parser.ir import (
29
+ DiffEq,
30
+ Equations,
31
+ IRModel,
32
+ Local,
33
+ ModelInfo,
34
+ OutputEq,
35
+ Parameter,
36
+ Provenance,
37
+ Variable,
38
+ )
39
+
40
+ SOURCE_FORMAT = "exprtk-ini"
41
+
42
+ _SECTION_RE = re.compile(r"^\[(.+)\]$")
43
+ _KV_RE = re.compile(r"^(\S+)\s*=\s*(.+)$")
44
+ _LOCAL_RE = re.compile(r"^(?:var\s+)?(\w+)\s*:=\s*(.+)$")
45
+ _DIFF_RE = re.compile(r"^dx(\d+)\s*=\s*(.+)$")
46
+ _OUTPUT_RE = re.compile(r"^y(\d+)\s*=\s*(.+)$")
47
+
48
+
49
+ class IniParseError(ValueError):
50
+ """Raised when an INI model file cannot be parsed."""
51
+
52
+
53
+ @dataclass
54
+ class ParseResult:
55
+ """The outcome of parsing an authoring file: the IR plus diagnostics."""
56
+
57
+ ir: IRModel
58
+ warnings: list[str] = field(default_factory=list)
59
+
60
+
61
+ def _strip_comment(line: str) -> str:
62
+ """Strip a trailing ``; ...`` comment / statement terminator from a line.
63
+
64
+ A leading ``;`` means the whole line is a comment. Otherwise everything from
65
+ the first ``;`` (statement terminator or comment) onward is removed; the
66
+ authoring format never embeds ``;`` inside an expression.
67
+ """
68
+ stripped = line.strip()
69
+ if not stripped or stripped[0] == ";":
70
+ return ""
71
+ idx = stripped.find(";")
72
+ return stripped if idx < 0 else stripped[:idx].strip()
73
+
74
+
75
+ def parse_ini_sections(text: str) -> dict[str, list[str]]:
76
+ """Split INI text into a mapping of section name -> content lines."""
77
+ sections: dict[str, list[str]] = {}
78
+ current: str | None = None
79
+ for raw in text.splitlines():
80
+ line = _strip_comment(raw)
81
+ if not line:
82
+ continue
83
+ match = _SECTION_RE.match(line)
84
+ if match is not None:
85
+ current = match.group(1)
86
+ sections.setdefault(current, [])
87
+ elif current is not None:
88
+ sections[current].append(line)
89
+ return sections
90
+
91
+
92
+ def _parse_kv(lines: list[str]) -> dict[str, str]:
93
+ result: dict[str, str] = {}
94
+ for line in lines:
95
+ match = _KV_RE.match(line)
96
+ if match is not None:
97
+ result[match.group(1)] = match.group(2).strip()
98
+ return result
99
+
100
+
101
+ def parse_ini_text(text: str, *, source_file: str | None = None) -> ParseResult:
102
+ """Parse INI model text into an :class:`IRModel` with diagnostics."""
103
+ sections = parse_ini_sections(text)
104
+ warnings: list[str] = []
105
+
106
+ info = _parse_kv(sections.get("ModelInfo", []))
107
+ dims = _parse_kv(sections.get("Dimensions", []))
108
+
109
+ def _dim(key: str) -> int:
110
+ if key not in dims:
111
+ raise IniParseError(f"[Dimensions] missing required key {key!r}")
112
+ return int(dims[key])
113
+
114
+ n_states = _dim("num_states")
115
+ n_inputs = _dim("num_inputs")
116
+ n_outputs = _dim("num_outputs")
117
+
118
+ states = [Variable(name=f"x{i}") for i in range(n_states)]
119
+ inputs = [Variable(name=f"u{i}") for i in range(n_inputs)]
120
+ outputs = [Variable(name=f"y{i}") for i in range(n_outputs)]
121
+
122
+ parameters: list[Parameter] = []
123
+ for line in sections.get("Parameters", []):
124
+ match = _KV_RE.match(line)
125
+ if match is None:
126
+ warnings.append(f"ignored unparsable parameter line: {line!r}")
127
+ continue
128
+ parameters.append(Parameter(name=match.group(1), default=float(match.group(2).strip())))
129
+
130
+ locals_list: list[Local] = []
131
+ for line in sections.get("StateEquationLocals", []):
132
+ match = _LOCAL_RE.match(line)
133
+ if match is None:
134
+ raise IniParseError(f"cannot parse local line: {line!r}")
135
+ locals_list.append(Local(name=match.group(1), expr=parse_expression(match.group(2))))
136
+
137
+ differential: list[DiffEq] = []
138
+ for line in sections.get("StateEquations", []):
139
+ match = _DIFF_RE.match(line)
140
+ if match is None:
141
+ warnings.append(f"ignored unparsable state equation: {line!r}")
142
+ continue
143
+ idx = int(match.group(1))
144
+ differential.append(DiffEq(state=f"x{idx}", rhs=parse_expression(match.group(2))))
145
+
146
+ output_eqs: list[OutputEq] = []
147
+ for line in sections.get("OutputEquations", []):
148
+ out_match = _OUTPUT_RE.match(line)
149
+ if out_match is not None:
150
+ idx = int(out_match.group(1))
151
+ output_eqs.append(OutputEq(output=f"y{idx}", rhs=parse_expression(out_match.group(2))))
152
+ continue
153
+ local_match = _LOCAL_RE.match(line)
154
+ if local_match is not None:
155
+ locals_list.append(
156
+ Local(
157
+ name=local_match.group(1),
158
+ expr=parse_expression(local_match.group(2)),
159
+ )
160
+ )
161
+ continue
162
+ warnings.append(f"ignored unparsable output line: {line!r}")
163
+
164
+ for section in ("x0", "u0"):
165
+ if sections.get(section):
166
+ warnings.append(
167
+ f"dropped [{section}] initial values from the scaffold: initial "
168
+ "values belong to a scenario, not the model IR"
169
+ )
170
+
171
+ ir = IRModel(
172
+ model=ModelInfo(
173
+ name=info.get("Name", "unnamed_model"),
174
+ description=info.get("Description"),
175
+ source_version=info.get("Version"),
176
+ ),
177
+ parameters=parameters,
178
+ states=states,
179
+ inputs=inputs,
180
+ outputs=outputs,
181
+ locals=locals_list,
182
+ equations=Equations(differential=differential, outputs=output_eqs),
183
+ provenance=Provenance(
184
+ tool=_tool_id(),
185
+ created_at=datetime.now(UTC).isoformat(),
186
+ source_format=SOURCE_FORMAT,
187
+ source_file=source_file,
188
+ ),
189
+ )
190
+ return ParseResult(ir=ir, warnings=warnings)
191
+
192
+
193
+ def parse_ini_file(path: str) -> ParseResult:
194
+ """Parse an INI model file from disk into an :class:`IRModel`."""
195
+ from pathlib import Path
196
+
197
+ text = Path(path).read_text(encoding="utf-8")
198
+ return parse_ini_text(text, source_file=str(path))
199
+
200
+
201
+ def _tool_id() -> str:
202
+ from model_parser import __version__
203
+
204
+ return f"model-parser@{__version__}"
model_parser/io.py ADDED
@@ -0,0 +1,48 @@
1
+ """Load, save, and hash canonical IR files.
2
+
3
+ IR files are JSON. Each IR carries a content hash over its semantic body
4
+ (everything except the ``provenance`` block, which itself stores the hash) so
5
+ that downstream artifacts can reference a scaffold by hash rather than by path
6
+ (org contract: "hashes over file paths").
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import hashlib
12
+ import json
13
+ from pathlib import Path
14
+
15
+ from model_parser.ir import IRModel
16
+
17
+
18
+ def compute_content_hash(ir: IRModel) -> str:
19
+ """Return the SHA-256 hash of the IR's semantic body (excluding provenance)."""
20
+ body = ir.model_dump(mode="json", exclude={"provenance"})
21
+ canonical = json.dumps(body, sort_keys=True, separators=(",", ":"))
22
+ return "sha256:" + hashlib.sha256(canonical.encode("utf-8")).hexdigest()
23
+
24
+
25
+ def with_content_hash(ir: IRModel) -> IRModel:
26
+ """Return a copy of ``ir`` with its provenance ``content_hash`` populated."""
27
+ digest = compute_content_hash(ir)
28
+ if ir.provenance is None:
29
+ return ir
30
+ updated = ir.model_copy(deep=True)
31
+ updated.provenance.content_hash = digest
32
+ return updated
33
+
34
+
35
+ def dumps_ir(ir: IRModel) -> str:
36
+ """Serialize an IR to a stable, pretty JSON string."""
37
+ return json.dumps(ir.model_dump(mode="json"), indent=2, sort_keys=False) + "\n"
38
+
39
+
40
+ def save_ir(ir: IRModel, path: str | Path) -> None:
41
+ """Write an IR to ``path`` as JSON."""
42
+ Path(path).write_text(dumps_ir(ir), encoding="utf-8")
43
+
44
+
45
+ def load_ir(path: str | Path) -> IRModel:
46
+ """Load and structurally validate an IR JSON file from ``path``."""
47
+ data = json.loads(Path(path).read_text(encoding="utf-8"))
48
+ return IRModel.model_validate(data)
@@ -0,0 +1,55 @@
1
+ """Canonical IR: data model and expression tree."""
2
+
3
+ from model_parser.ir.expr import (
4
+ ALLOWED_OPS,
5
+ ARITHMETIC_OPS,
6
+ COMPARISON_OPS,
7
+ FUNCTIONS,
8
+ UNARY_OPS,
9
+ Call,
10
+ Expr,
11
+ Num,
12
+ Sym,
13
+ call,
14
+ free_symbols,
15
+ num,
16
+ sym,
17
+ )
18
+ from model_parser.ir.model import (
19
+ IR_VERSION,
20
+ DiffEq,
21
+ Equations,
22
+ IRModel,
23
+ Local,
24
+ ModelInfo,
25
+ OutputEq,
26
+ Parameter,
27
+ Provenance,
28
+ Variable,
29
+ )
30
+
31
+ __all__ = [
32
+ "ALLOWED_OPS",
33
+ "ARITHMETIC_OPS",
34
+ "COMPARISON_OPS",
35
+ "FUNCTIONS",
36
+ "UNARY_OPS",
37
+ "Call",
38
+ "DiffEq",
39
+ "Equations",
40
+ "Expr",
41
+ "IRModel",
42
+ "IR_VERSION",
43
+ "Local",
44
+ "ModelInfo",
45
+ "Num",
46
+ "OutputEq",
47
+ "Parameter",
48
+ "Provenance",
49
+ "Sym",
50
+ "Variable",
51
+ "call",
52
+ "free_symbols",
53
+ "num",
54
+ "sym",
55
+ ]
@@ -0,0 +1,87 @@
1
+ """Expression IR: a small, explicit, backend-independent expression tree.
2
+
3
+ The expression sub-language is represented as a tagged tree rather than as a
4
+ backend-specific string. This is the core of ADR 0003 (explicit expression IR):
5
+ every backend lowers from the *same* tree, so conditional and numeric semantics
6
+ are defined once instead of being re-derived per target via string rewrites.
7
+
8
+ Three node kinds exist:
9
+
10
+ - :class:`Num` — a numeric literal.
11
+ - :class:`Sym` — a reference to a declared symbol (state, input, output,
12
+ parameter, or local).
13
+ - :class:`Call` — an operator or function applied to argument expressions. All
14
+ arithmetic operators, comparisons, unary negation, and named functions are
15
+ represented uniformly as calls keyed by :attr:`Call.op`.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Annotated, Literal
21
+
22
+ from pydantic import BaseModel, Field
23
+
24
+ # Canonical operator/function names used in ``Call.op``. Frontends normalize
25
+ # authoring-format spellings (e.g. ExprTk ``pow``/``if``) into these names so the
26
+ # IR carries one vocabulary. Backends own the mapping from these names to their
27
+ # own syntax. Keep this list aligned with ``docs/design/ir-specification.md``.
28
+ ARITHMETIC_OPS: frozenset[str] = frozenset({"+", "-", "*", "/", "^"})
29
+ UNARY_OPS: frozenset[str] = frozenset({"neg"})
30
+ COMPARISON_OPS: frozenset[str] = frozenset({"<", ">", "<=", ">=", "==", "!="})
31
+ FUNCTIONS: frozenset[str] = frozenset({"max", "min", "sqrt", "exp", "log", "abs", "ifelse"})
32
+ ALLOWED_OPS: frozenset[str] = ARITHMETIC_OPS | UNARY_OPS | COMPARISON_OPS | FUNCTIONS
33
+
34
+
35
+ class Num(BaseModel):
36
+ """A numeric literal."""
37
+
38
+ kind: Literal["num"] = "num"
39
+ value: float
40
+
41
+
42
+ class Sym(BaseModel):
43
+ """A reference to a declared symbol by canonical name."""
44
+
45
+ kind: Literal["sym"] = "sym"
46
+ name: str
47
+
48
+
49
+ class Call(BaseModel):
50
+ """An operator or function applied to argument expressions."""
51
+
52
+ kind: Literal["call"] = "call"
53
+ op: str
54
+ args: list[Expr] = Field(default_factory=list)
55
+
56
+
57
+ Expr = Annotated[Num | Sym | Call, Field(discriminator="kind")]
58
+ """Any expression node, discriminated on the ``kind`` tag."""
59
+
60
+ Call.model_rebuild()
61
+
62
+
63
+ def num(value: float) -> Num:
64
+ """Construct a numeric literal node."""
65
+ return Num(value=value)
66
+
67
+
68
+ def sym(name: str) -> Sym:
69
+ """Construct a symbol-reference node."""
70
+ return Sym(name=name)
71
+
72
+
73
+ def call(op: str, *args: Expr) -> Call:
74
+ """Construct an operator/function call node."""
75
+ return Call(op=op, args=list(args))
76
+
77
+
78
+ def free_symbols(expr: Expr) -> set[str]:
79
+ """Return the set of symbol names referenced anywhere in ``expr``."""
80
+ if isinstance(expr, Sym):
81
+ return {expr.name}
82
+ if isinstance(expr, Call):
83
+ out: set[str] = set()
84
+ for arg in expr.args:
85
+ out |= free_symbols(arg)
86
+ return out
87
+ return set()