modelwright 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modelwright/oracles.py ADDED
@@ -0,0 +1,132 @@
1
+ """Workbook oracle interface records.
2
+
3
+ Oracle implementations evaluate source workbook outputs for validation. This
4
+ module defines the boundary only; backend-specific execution belongs in
5
+ separate modules.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, Protocol
12
+
13
+ from modelwright.validation import DiagnosticSeverity, JsonValue, ScenarioInput, ScenarioOutput
14
+
15
+
16
+ OracleOutputs = dict[str, JsonValue]
17
+
18
+
19
+ @dataclass(frozen=True)
20
+ class OracleDiagnostic:
21
+ """Diagnostic produced while asking a workbook oracle for observed values."""
22
+
23
+ diagnostic_code: str
24
+ message: str
25
+ severity: DiagnosticSeverity = "warning"
26
+ location: str | None = None
27
+ raw_value: JsonValue = None
28
+
29
+ @classmethod
30
+ def from_dict(cls, data: dict[str, Any]) -> "OracleDiagnostic":
31
+ return cls(
32
+ diagnostic_code=data["diagnostic_code"],
33
+ message=data["message"],
34
+ severity=data.get("severity", "warning"),
35
+ location=data.get("location"),
36
+ raw_value=data.get("raw_value"),
37
+ )
38
+
39
+ def to_dict(self) -> dict[str, JsonValue]:
40
+ payload: dict[str, JsonValue] = {
41
+ "diagnostic_code": self.diagnostic_code,
42
+ "message": self.message,
43
+ "severity": self.severity,
44
+ }
45
+ if self.location is not None:
46
+ payload["location"] = self.location
47
+ if self.raw_value is not None:
48
+ payload["raw_value"] = self.raw_value
49
+ return payload
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class OracleRequest:
54
+ """Request for observed workbook output values from one oracle backend."""
55
+
56
+ source_workbook: str
57
+ outputs: tuple[ScenarioOutput, ...]
58
+ inputs: tuple[ScenarioInput, ...] = field(default_factory=tuple)
59
+ options: dict[str, JsonValue] = field(default_factory=dict)
60
+
61
+ @classmethod
62
+ def from_dict(cls, data: dict[str, Any]) -> "OracleRequest":
63
+ return cls(
64
+ source_workbook=data["source_workbook"],
65
+ outputs=tuple(ScenarioOutput.from_dict(output_data) for output_data in data["outputs"]),
66
+ inputs=tuple(ScenarioInput.from_dict(input_data) for input_data in data.get("inputs", [])),
67
+ options=dict(data.get("options", {})),
68
+ )
69
+
70
+ def to_dict(self) -> dict[str, JsonValue]:
71
+ return {
72
+ "source_workbook": self.source_workbook,
73
+ "outputs": [output.to_dict() for output in self.outputs],
74
+ "inputs": [scenario_input.to_dict() for scenario_input in self.inputs],
75
+ "options": dict(self.options),
76
+ }
77
+
78
+
79
+ @dataclass(frozen=True)
80
+ class OracleResult:
81
+ """Observed workbook output values returned by an oracle backend."""
82
+
83
+ backend: str
84
+ source_workbook: str
85
+ outputs: OracleOutputs = field(default_factory=dict)
86
+ diagnostics: tuple[OracleDiagnostic, ...] = field(default_factory=tuple)
87
+
88
+ @property
89
+ def success(self) -> bool:
90
+ return not any(diagnostic.severity == "error" for diagnostic in self.diagnostics)
91
+
92
+ @classmethod
93
+ def from_dict(cls, data: dict[str, Any]) -> "OracleResult":
94
+ return cls(
95
+ backend=data["backend"],
96
+ source_workbook=data["source_workbook"],
97
+ outputs=dict(data.get("outputs", {})),
98
+ diagnostics=tuple(
99
+ OracleDiagnostic.from_dict(diagnostic_data)
100
+ for diagnostic_data in data.get("diagnostics", [])
101
+ ),
102
+ )
103
+
104
+ def to_dict(self) -> dict[str, JsonValue]:
105
+ return {
106
+ "backend": self.backend,
107
+ "source_workbook": self.source_workbook,
108
+ "success": self.success,
109
+ "outputs": dict(self.outputs),
110
+ "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
111
+ }
112
+
113
+
114
+ class WorkbookOracle(Protocol):
115
+ """Protocol implemented by source-workbook oracle backends."""
116
+
117
+ backend_name: str
118
+
119
+ def evaluate(self, request: OracleRequest) -> OracleResult:
120
+ """Return observed workbook values for the requested outputs."""
121
+ ...
122
+
123
+
124
+ def missing_optional_dependency_diagnostic(*, dependency: str, extra: str, backend: str) -> OracleDiagnostic:
125
+ """Build a standard diagnostic for an unavailable optional oracle backend."""
126
+
127
+ return OracleDiagnostic(
128
+ diagnostic_code="missing_optional_dependency",
129
+ message=f"Install modelwright[{extra}] to use the {backend} oracle backend.",
130
+ severity="error",
131
+ raw_value=dependency,
132
+ )
@@ -0,0 +1,209 @@
1
+ """Workbook reference normalization records and helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from typing import Any, Literal
8
+
9
+ from openpyxl.utils.cell import range_boundaries
10
+
11
+
12
+ JsonValue = str | int | float | bool | None | list[Any] | dict[str, Any]
13
+ ReferenceKind = Literal["cell", "range", "named_range", "structured", "external", "unresolved"]
14
+
15
+ _CELL_RE = re.compile(r"^\$?[A-Za-z]{1,3}\$?\d+$")
16
+ _SHEET_AND_COORD_RE = re.compile(r"^(?P<sheet>'[^']+'|[^!]+)!(?P<coord>.+)$")
17
+ _NAME_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_.]*$")
18
+ _EXTERNAL_WORKBOOK_RE = re.compile(r"\[[^\]]+\.(?:xlsx|xlsm|xlsb|xls|csv)\]", re.IGNORECASE)
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class WorkbookReference:
23
+ """Canonical representation of one workbook reference token."""
24
+
25
+ kind: ReferenceKind
26
+ original: str
27
+ normalized: str
28
+ workbook: str | None = None
29
+ sheet: str | None = None
30
+ start_cell: str | None = None
31
+ end_cell: str | None = None
32
+ name: str | None = None
33
+ diagnostic_code: str | None = None
34
+
35
+ @classmethod
36
+ def from_dict(cls, data: dict[str, Any]) -> "WorkbookReference":
37
+ return cls(
38
+ kind=data["kind"],
39
+ original=data["original"],
40
+ normalized=data["normalized"],
41
+ workbook=data.get("workbook"),
42
+ sheet=data.get("sheet"),
43
+ start_cell=data.get("start_cell"),
44
+ end_cell=data.get("end_cell"),
45
+ name=data.get("name"),
46
+ diagnostic_code=data.get("diagnostic_code"),
47
+ )
48
+
49
+ def to_dict(self) -> dict[str, JsonValue]:
50
+ return {
51
+ "kind": self.kind,
52
+ "original": self.original,
53
+ "normalized": self.normalized,
54
+ "workbook": self.workbook,
55
+ "sheet": self.sheet,
56
+ "start_cell": self.start_cell,
57
+ "end_cell": self.end_cell,
58
+ "name": self.name,
59
+ "diagnostic_code": self.diagnostic_code,
60
+ }
61
+
62
+
63
+ def normalize_reference(reference: str, *, current_sheet: str | None = None) -> WorkbookReference:
64
+ """Normalize one workbook formula reference token."""
65
+
66
+ original = reference
67
+ reference = reference.strip()
68
+ if not reference:
69
+ return _unresolved(original, "empty_reference")
70
+
71
+ if _is_external_reference(reference):
72
+ return WorkbookReference(
73
+ kind="external",
74
+ original=original,
75
+ normalized=reference,
76
+ workbook=_external_workbook(reference),
77
+ diagnostic_code="external_reference",
78
+ )
79
+
80
+ if _is_structured_reference(reference):
81
+ return WorkbookReference(
82
+ kind="structured",
83
+ original=original,
84
+ normalized=reference,
85
+ diagnostic_code="unsupported_structured_reference",
86
+ )
87
+
88
+ sheet_name, coordinate = _split_sheet_and_coordinate(reference)
89
+ if sheet_name is None:
90
+ if _is_named_range(reference):
91
+ return WorkbookReference(
92
+ kind="named_range",
93
+ original=original,
94
+ normalized=reference,
95
+ name=reference,
96
+ )
97
+ sheet_name = current_sheet
98
+ coordinate = reference
99
+
100
+ if sheet_name is not None and _is_cell_or_range(coordinate):
101
+ return _cell_or_range_reference(
102
+ original=original,
103
+ sheet_name=sheet_name,
104
+ coordinate=coordinate,
105
+ )
106
+
107
+ return _unresolved(original, "unresolved_reference")
108
+
109
+
110
+ def normalize_cell_reference(reference: str, *, current_sheet: str | None = None) -> WorkbookReference:
111
+ """Normalize a token expected to identify one cell."""
112
+
113
+ normalized = normalize_reference(reference, current_sheet=current_sheet)
114
+ if normalized.kind == "cell":
115
+ return normalized
116
+ return _unresolved(reference, "not_a_cell_reference")
117
+
118
+
119
+ def _cell_or_range_reference(*, original: str, sheet_name: str, coordinate: str) -> WorkbookReference:
120
+ clean_coordinate = coordinate.replace("$", "")
121
+ try:
122
+ min_col, min_row, max_col, max_row = range_boundaries(clean_coordinate)
123
+ except ValueError:
124
+ return _unresolved(original, "unresolved_reference")
125
+
126
+ start_cell = f"{_column_name(min_col)}{min_row}"
127
+ end_cell = f"{_column_name(max_col)}{max_row}"
128
+ if start_cell == end_cell:
129
+ return WorkbookReference(
130
+ kind="cell",
131
+ original=original,
132
+ normalized=f"{sheet_name}!{start_cell}",
133
+ sheet=sheet_name,
134
+ start_cell=start_cell,
135
+ end_cell=end_cell,
136
+ )
137
+
138
+ return WorkbookReference(
139
+ kind="range",
140
+ original=original,
141
+ normalized=f"{sheet_name}!{start_cell}:{end_cell}",
142
+ sheet=sheet_name,
143
+ start_cell=start_cell,
144
+ end_cell=end_cell,
145
+ )
146
+
147
+
148
+ def _split_sheet_and_coordinate(reference: str) -> tuple[str | None, str]:
149
+ match = _SHEET_AND_COORD_RE.match(reference)
150
+ if not match:
151
+ return None, reference
152
+ return _unquote_sheet(match.group("sheet")), match.group("coord")
153
+
154
+
155
+ def _unquote_sheet(sheet_name: str) -> str:
156
+ if sheet_name.startswith("'") and sheet_name.endswith("'"):
157
+ return sheet_name[1:-1].replace("''", "'")
158
+ return sheet_name
159
+
160
+
161
+ def _is_cell_or_range(coordinate: str) -> bool:
162
+ clean_coordinate = coordinate.replace("$", "")
163
+ if _CELL_RE.match(clean_coordinate):
164
+ return True
165
+ try:
166
+ range_boundaries(clean_coordinate)
167
+ except ValueError:
168
+ return False
169
+ return ":" in clean_coordinate
170
+
171
+
172
+ def _is_named_range(reference: str) -> bool:
173
+ return bool(_NAME_RE.match(reference)) and not _CELL_RE.match(reference)
174
+
175
+
176
+ def _is_external_reference(reference: str) -> bool:
177
+ return bool(_EXTERNAL_WORKBOOK_RE.search(reference)) or _looks_like_external_reference(reference)
178
+
179
+
180
+ def _is_structured_reference(reference: str) -> bool:
181
+ return "[" in reference and "]" in reference
182
+
183
+
184
+ def _looks_like_external_reference(reference: str) -> bool:
185
+ return "[" in reference and "]" in reference and "!" in reference
186
+
187
+
188
+ def _external_workbook(reference: str) -> str | None:
189
+ match = re.search(r"\[([^\]]+)\]", reference)
190
+ if match:
191
+ return match.group(1)
192
+ return None
193
+
194
+
195
+ def _unresolved(original: str, diagnostic_code: str) -> WorkbookReference:
196
+ return WorkbookReference(
197
+ kind="unresolved",
198
+ original=original,
199
+ normalized=original.strip(),
200
+ diagnostic_code=diagnostic_code,
201
+ )
202
+
203
+
204
+ def _column_name(index: int) -> str:
205
+ name = ""
206
+ while index:
207
+ index, remainder = divmod(index - 1, 26)
208
+ name = chr(65 + remainder) + name
209
+ return name