modelwright 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,931 @@
1
+ """Conversion plan records and assembly helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections import Counter
6
+ from collections.abc import Mapping
7
+ from dataclasses import dataclass, field
8
+ from datetime import UTC, datetime
9
+ from typing import Any, Literal
10
+
11
+ from modelwright.extraction import WorkbookRecord
12
+ from modelwright.formulas import FormulaExpression
13
+ from modelwright.generation import GenerationResult
14
+ from modelwright.graph import DependencyGraph
15
+ from modelwright.validation import ValidationReport
16
+
17
+
18
+ JsonValue = str | int | float | bool | None | list[Any] | dict[str, Any]
19
+ BenchmarkRole = Literal[
20
+ "primary_benchmark",
21
+ "stress_benchmark",
22
+ "broken_reference_regression",
23
+ "synthetic_fixture",
24
+ "ad_hoc_private",
25
+ ]
26
+ StageStatus = Literal["pass", "blocked", "not_run"]
27
+ ValidationStatus = Literal["pass", "fail", "blocked", "not_run"]
28
+ OverallStatus = Literal["complete", "partial", "blocked"]
29
+ DiagnosticSeverity = Literal["info", "warning", "error"]
30
+ BlockerCategory = Literal[
31
+ "source_workbook_defect",
32
+ "unsupported_formula_semantics",
33
+ "unsupported_reference_semantics",
34
+ "graph_semantics",
35
+ "generation_scope",
36
+ "validation_oracle",
37
+ "missing_cached_values",
38
+ "external_dependency",
39
+ "unknown",
40
+ ]
41
+ BlockerDisposition = Literal["resolved", "blocked_by_design", "deferred", "out_of_scope", "next_target"]
42
+
43
+
44
+ @dataclass(frozen=True)
45
+ class ConversionSource:
46
+ """Source workbook identity and benchmark role."""
47
+
48
+ workbook_id: str
49
+ file_type: str
50
+ benchmark_role: BenchmarkRole
51
+ source_path: str | None = None
52
+ sanitized: bool = True
53
+
54
+ @classmethod
55
+ def from_dict(cls, data: dict[str, Any]) -> "ConversionSource":
56
+ return cls(
57
+ workbook_id=data["workbook_id"],
58
+ file_type=data["file_type"],
59
+ benchmark_role=data["benchmark_role"],
60
+ source_path=data.get("source_path"),
61
+ sanitized=data.get("sanitized", True),
62
+ )
63
+
64
+ def to_dict(self) -> dict[str, JsonValue]:
65
+ return {
66
+ "workbook_id": self.workbook_id,
67
+ "file_type": self.file_type,
68
+ "benchmark_role": self.benchmark_role,
69
+ "source_path": self.source_path,
70
+ "sanitized": self.sanitized,
71
+ }
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class WorkflowStatus:
76
+ """Stage-level conversion workflow status."""
77
+
78
+ extraction: StageStatus = "not_run"
79
+ dependency_graph: StageStatus = "not_run"
80
+ formula_translation: StageStatus = "not_run"
81
+ generation: StageStatus = "not_run"
82
+ cached_validation: ValidationStatus = "not_run"
83
+ oracle_validation: ValidationStatus = "not_run"
84
+ overall: OverallStatus = "blocked"
85
+
86
+ @classmethod
87
+ def from_dict(cls, data: dict[str, Any]) -> "WorkflowStatus":
88
+ return cls(
89
+ extraction=data.get("extraction", "not_run"),
90
+ dependency_graph=data.get("dependency_graph", "not_run"),
91
+ formula_translation=data.get("formula_translation", "not_run"),
92
+ generation=data.get("generation", "not_run"),
93
+ cached_validation=data.get("cached_validation", "not_run"),
94
+ oracle_validation=data.get("oracle_validation", "not_run"),
95
+ overall=data.get("overall", "blocked"),
96
+ )
97
+
98
+ def to_dict(self) -> dict[str, JsonValue]:
99
+ return {
100
+ "extraction": self.extraction,
101
+ "dependency_graph": self.dependency_graph,
102
+ "formula_translation": self.formula_translation,
103
+ "generation": self.generation,
104
+ "cached_validation": self.cached_validation,
105
+ "oracle_validation": self.oracle_validation,
106
+ "overall": self.overall,
107
+ }
108
+
109
+
110
+ @dataclass(frozen=True)
111
+ class CoverageSummary:
112
+ """Workbook coverage counts for conversion planning."""
113
+
114
+ sheets: int
115
+ cells: int
116
+ value_cells: int
117
+ formula_cells: int
118
+ translated_formula_cells: int
119
+ untranslated_formula_cells: int
120
+ translation_coverage: float
121
+ named_ranges: int
122
+ dependency_edges: int
123
+ semantic_edges: int
124
+ execution_edges: int
125
+
126
+ @classmethod
127
+ def from_dict(cls, data: dict[str, Any]) -> "CoverageSummary":
128
+ return cls(
129
+ sheets=data["sheets"],
130
+ cells=data["cells"],
131
+ value_cells=data["value_cells"],
132
+ formula_cells=data["formula_cells"],
133
+ translated_formula_cells=data["translated_formula_cells"],
134
+ untranslated_formula_cells=data["untranslated_formula_cells"],
135
+ translation_coverage=data["translation_coverage"],
136
+ named_ranges=data["named_ranges"],
137
+ dependency_edges=data["dependency_edges"],
138
+ semantic_edges=data["semantic_edges"],
139
+ execution_edges=data["execution_edges"],
140
+ )
141
+
142
+ def to_dict(self) -> dict[str, JsonValue]:
143
+ return {
144
+ "sheets": self.sheets,
145
+ "cells": self.cells,
146
+ "value_cells": self.value_cells,
147
+ "formula_cells": self.formula_cells,
148
+ "translated_formula_cells": self.translated_formula_cells,
149
+ "untranslated_formula_cells": self.untranslated_formula_cells,
150
+ "translation_coverage": self.translation_coverage,
151
+ "named_ranges": self.named_ranges,
152
+ "dependency_edges": self.dependency_edges,
153
+ "semantic_edges": self.semantic_edges,
154
+ "execution_edges": self.execution_edges,
155
+ }
156
+
157
+
158
+ @dataclass(frozen=True)
159
+ class DiagnosticSummary:
160
+ """Diagnostic counts by workflow stage."""
161
+
162
+ workbook_extraction: dict[str, int] = field(default_factory=dict)
163
+ named_ranges: dict[str, int] = field(default_factory=dict)
164
+ formula_extraction: dict[str, int] = field(default_factory=dict)
165
+ graph: dict[str, int] = field(default_factory=dict)
166
+ translation: dict[str, int] = field(default_factory=dict)
167
+ generation: dict[str, int] = field(default_factory=dict)
168
+ cached_validation: dict[str, int] = field(default_factory=dict)
169
+ oracle_validation: dict[str, int] = field(default_factory=dict)
170
+
171
+ @classmethod
172
+ def from_dict(cls, data: dict[str, Any]) -> "DiagnosticSummary":
173
+ return cls(
174
+ workbook_extraction=dict(data.get("workbook_extraction", {})),
175
+ named_ranges=dict(data.get("named_ranges", {})),
176
+ formula_extraction=dict(data.get("formula_extraction", {})),
177
+ graph=dict(data.get("graph", {})),
178
+ translation=dict(data.get("translation", {})),
179
+ generation=dict(data.get("generation", {})),
180
+ cached_validation=dict(data.get("cached_validation", {})),
181
+ oracle_validation=dict(data.get("oracle_validation", {})),
182
+ )
183
+
184
+ def to_dict(self) -> dict[str, JsonValue]:
185
+ return {
186
+ "workbook_extraction": self.workbook_extraction,
187
+ "named_ranges": self.named_ranges,
188
+ "formula_extraction": self.formula_extraction,
189
+ "graph": self.graph,
190
+ "translation": self.translation,
191
+ "generation": self.generation,
192
+ "cached_validation": self.cached_validation,
193
+ "oracle_validation": self.oracle_validation,
194
+ }
195
+
196
+
197
+ @dataclass(frozen=True)
198
+ class ResidualBlocker:
199
+ """Classified residual blocker for conversion planning."""
200
+
201
+ blocker_id: str
202
+ category: BlockerCategory
203
+ diagnostic_code: str
204
+ item: str
205
+ count: int
206
+ severity: DiagnosticSeverity
207
+ disposition: BlockerDisposition
208
+ next_action: str
209
+ provenance: str
210
+
211
+ @classmethod
212
+ def from_dict(cls, data: dict[str, Any]) -> "ResidualBlocker":
213
+ return cls(
214
+ blocker_id=data["blocker_id"],
215
+ category=data["category"],
216
+ diagnostic_code=data["diagnostic_code"],
217
+ item=data["item"],
218
+ count=data["count"],
219
+ severity=data.get("severity", "warning"),
220
+ disposition=data["disposition"],
221
+ next_action=data["next_action"],
222
+ provenance=data["provenance"],
223
+ )
224
+
225
+ def to_dict(self) -> dict[str, JsonValue]:
226
+ return {
227
+ "blocker_id": self.blocker_id,
228
+ "category": self.category,
229
+ "diagnostic_code": self.diagnostic_code,
230
+ "item": self.item,
231
+ "count": self.count,
232
+ "severity": self.severity,
233
+ "disposition": self.disposition,
234
+ "next_action": self.next_action,
235
+ "provenance": self.provenance,
236
+ }
237
+
238
+
239
+ @dataclass(frozen=True)
240
+ class GenerationSummary:
241
+ """Generated model summary for conversion planning."""
242
+
243
+ generated: bool = False
244
+ generated_model_path: str | None = None
245
+ selected_outputs: int = 0
246
+ selected_input_dependencies: int = 0
247
+ selection_strategy: str = "not_run"
248
+ full_workbook_model: bool = False
249
+
250
+ @classmethod
251
+ def from_dict(cls, data: dict[str, Any]) -> "GenerationSummary":
252
+ return cls(
253
+ generated=data.get("generated", False),
254
+ generated_model_path=data.get("generated_model_path"),
255
+ selected_outputs=data.get("selected_outputs", 0),
256
+ selected_input_dependencies=data.get("selected_input_dependencies", 0),
257
+ selection_strategy=data.get("selection_strategy", "not_run"),
258
+ full_workbook_model=data.get("full_workbook_model", False),
259
+ )
260
+
261
+ def to_dict(self) -> dict[str, JsonValue]:
262
+ return {
263
+ "generated": self.generated,
264
+ "generated_model_path": self.generated_model_path,
265
+ "selected_outputs": self.selected_outputs,
266
+ "selected_input_dependencies": self.selected_input_dependencies,
267
+ "selection_strategy": self.selection_strategy,
268
+ "full_workbook_model": self.full_workbook_model,
269
+ }
270
+
271
+
272
+ @dataclass(frozen=True)
273
+ class ValidationSummary:
274
+ """Validation summary for generated outputs."""
275
+
276
+ cached_validation_status: ValidationStatus = "not_run"
277
+ cached_outputs: int = 0
278
+ cached_mismatches: int = 0
279
+ oracle_backend: str | None = None
280
+ oracle_status: ValidationStatus = "not_run"
281
+ oracle_mismatches: int = 0
282
+ oracle_blockers: tuple[str, ...] = field(default_factory=tuple)
283
+
284
+ @classmethod
285
+ def from_dict(cls, data: dict[str, Any]) -> "ValidationSummary":
286
+ return cls(
287
+ cached_validation_status=data.get("cached_validation_status", "not_run"),
288
+ cached_outputs=data.get("cached_outputs", 0),
289
+ cached_mismatches=data.get("cached_mismatches", 0),
290
+ oracle_backend=data.get("oracle_backend"),
291
+ oracle_status=data.get("oracle_status", "not_run"),
292
+ oracle_mismatches=data.get("oracle_mismatches", 0),
293
+ oracle_blockers=tuple(data.get("oracle_blockers", [])),
294
+ )
295
+
296
+ def to_dict(self) -> dict[str, JsonValue]:
297
+ return {
298
+ "cached_validation_status": self.cached_validation_status,
299
+ "cached_outputs": self.cached_outputs,
300
+ "cached_mismatches": self.cached_mismatches,
301
+ "oracle_backend": self.oracle_backend,
302
+ "oracle_status": self.oracle_status,
303
+ "oracle_mismatches": self.oracle_mismatches,
304
+ "oracle_blockers": list(self.oracle_blockers),
305
+ }
306
+
307
+
308
+ @dataclass(frozen=True)
309
+ class PlanRecommendation:
310
+ """Recommended next action from a conversion plan."""
311
+
312
+ priority: int
313
+ action: str
314
+ rationale: str
315
+ target_issue: int | None = None
316
+
317
+ @classmethod
318
+ def from_dict(cls, data: dict[str, Any]) -> "PlanRecommendation":
319
+ return cls(
320
+ priority=data["priority"],
321
+ action=data["action"],
322
+ rationale=data["rationale"],
323
+ target_issue=data.get("target_issue"),
324
+ )
325
+
326
+ def to_dict(self) -> dict[str, JsonValue]:
327
+ return {
328
+ "priority": self.priority,
329
+ "action": self.action,
330
+ "rationale": self.rationale,
331
+ "target_issue": self.target_issue,
332
+ }
333
+
334
+
335
+ @dataclass(frozen=True)
336
+ class PrivacyReview:
337
+ """Privacy flags for local or tracked conversion plans."""
338
+
339
+ contains_source_path: bool = False
340
+ contains_sheet_names: bool = False
341
+ contains_named_ranges: bool = False
342
+ contains_raw_formulas: bool = False
343
+ contains_raw_cell_values: bool = False
344
+ contains_generated_source: bool = False
345
+
346
+ @classmethod
347
+ def from_dict(cls, data: dict[str, Any]) -> "PrivacyReview":
348
+ return cls(
349
+ contains_source_path=data.get("contains_source_path", False),
350
+ contains_sheet_names=data.get("contains_sheet_names", False),
351
+ contains_named_ranges=data.get("contains_named_ranges", False),
352
+ contains_raw_formulas=data.get("contains_raw_formulas", False),
353
+ contains_raw_cell_values=data.get("contains_raw_cell_values", False),
354
+ contains_generated_source=data.get("contains_generated_source", False),
355
+ )
356
+
357
+ def to_dict(self) -> dict[str, JsonValue]:
358
+ return {
359
+ "contains_source_path": self.contains_source_path,
360
+ "contains_sheet_names": self.contains_sheet_names,
361
+ "contains_named_ranges": self.contains_named_ranges,
362
+ "contains_raw_formulas": self.contains_raw_formulas,
363
+ "contains_raw_cell_values": self.contains_raw_cell_values,
364
+ "contains_generated_source": self.contains_generated_source,
365
+ }
366
+
367
+
368
+ @dataclass(frozen=True)
369
+ class ConversionPlan:
370
+ """Inspectable plan for partial or complete workbook conversion."""
371
+
372
+ plan_id: str
373
+ created_at: str
374
+ modelwright_commit: str
375
+ source: ConversionSource
376
+ workflow_status: WorkflowStatus
377
+ coverage: CoverageSummary
378
+ diagnostic_summary: DiagnosticSummary
379
+ residual_blockers: tuple[ResidualBlocker, ...] = field(default_factory=tuple)
380
+ generation: GenerationSummary = field(default_factory=GenerationSummary)
381
+ validation: ValidationSummary = field(default_factory=ValidationSummary)
382
+ recommendations: tuple[PlanRecommendation, ...] = field(default_factory=tuple)
383
+ privacy_review: PrivacyReview = field(default_factory=PrivacyReview)
384
+
385
+ @classmethod
386
+ def from_dict(cls, data: dict[str, Any]) -> "ConversionPlan":
387
+ return cls(
388
+ plan_id=data["plan_id"],
389
+ created_at=data["created_at"],
390
+ modelwright_commit=data["modelwright_commit"],
391
+ source=ConversionSource.from_dict(data["source"]),
392
+ workflow_status=WorkflowStatus.from_dict(data["workflow_status"]),
393
+ coverage=CoverageSummary.from_dict(data["coverage"]),
394
+ diagnostic_summary=DiagnosticSummary.from_dict(data["diagnostic_summary"]),
395
+ residual_blockers=tuple(
396
+ ResidualBlocker.from_dict(item) for item in data.get("residual_blockers", [])
397
+ ),
398
+ generation=GenerationSummary.from_dict(data.get("generation", {})),
399
+ validation=ValidationSummary.from_dict(data.get("validation", {})),
400
+ recommendations=tuple(
401
+ PlanRecommendation.from_dict(item) for item in data.get("recommendations", [])
402
+ ),
403
+ privacy_review=PrivacyReview.from_dict(data.get("privacy_review", {})),
404
+ )
405
+
406
+ def to_dict(self) -> dict[str, JsonValue]:
407
+ return {
408
+ "plan_id": self.plan_id,
409
+ "created_at": self.created_at,
410
+ "modelwright_commit": self.modelwright_commit,
411
+ "source": self.source.to_dict(),
412
+ "workflow_status": self.workflow_status.to_dict(),
413
+ "coverage": self.coverage.to_dict(),
414
+ "diagnostic_summary": self.diagnostic_summary.to_dict(),
415
+ "residual_blockers": [blocker.to_dict() for blocker in self.residual_blockers],
416
+ "generation": self.generation.to_dict(),
417
+ "validation": self.validation.to_dict(),
418
+ "recommendations": [recommendation.to_dict() for recommendation in self.recommendations],
419
+ "privacy_review": self.privacy_review.to_dict(),
420
+ }
421
+
422
+
423
+ def build_conversion_plan(
424
+ *,
425
+ plan_id: str,
426
+ workbook: WorkbookRecord,
427
+ graph: DependencyGraph,
428
+ expressions: Mapping[str, FormulaExpression],
429
+ benchmark_role: BenchmarkRole,
430
+ modelwright_commit: str = "unknown",
431
+ generation_result: GenerationResult | None = None,
432
+ cached_validation_report: ValidationReport | None = None,
433
+ oracle_validation_report: ValidationReport | None = None,
434
+ oracle_blockers: tuple[str, ...] = (),
435
+ generated_model_path: str | None = None,
436
+ include_source_path: bool = False,
437
+ full_workbook_model: bool = False,
438
+ ) -> ConversionPlan:
439
+ """Build a JSON-serializable conversion plan from workflow records."""
440
+
441
+ formula_cells = tuple(cell for cell in workbook.cells if cell.formula is not None)
442
+ translated_formula_cells = sum(1 for expression in expressions.values() if expression.translated)
443
+ untranslated_formula_cells = len(formula_cells) - translated_formula_cells
444
+ translation_coverage = translated_formula_cells / len(formula_cells) if formula_cells else 1.0
445
+ diagnostic_summary = _diagnostic_summary(
446
+ workbook=workbook,
447
+ graph=graph,
448
+ expressions=expressions,
449
+ generation_result=generation_result,
450
+ cached_validation_report=cached_validation_report,
451
+ oracle_validation_report=oracle_validation_report,
452
+ oracle_blockers=oracle_blockers,
453
+ )
454
+ generation_summary = _generation_summary(
455
+ generation_result=generation_result,
456
+ generated_model_path=generated_model_path,
457
+ full_workbook_model=full_workbook_model,
458
+ )
459
+ validation_summary = _validation_summary(
460
+ cached_validation_report=cached_validation_report,
461
+ oracle_validation_report=oracle_validation_report,
462
+ oracle_blockers=oracle_blockers,
463
+ )
464
+ residual_blockers = _residual_blockers(diagnostic_summary)
465
+ workflow_status = _workflow_status(
466
+ translated_formula_cells=translated_formula_cells,
467
+ formula_cells=len(formula_cells),
468
+ generation_summary=generation_summary,
469
+ validation_summary=validation_summary,
470
+ residual_blockers=residual_blockers,
471
+ full_workbook_model=full_workbook_model,
472
+ )
473
+ source_path = workbook.source_path if include_source_path else None
474
+ return ConversionPlan(
475
+ plan_id=plan_id,
476
+ created_at=datetime.now(UTC).isoformat(),
477
+ modelwright_commit=modelwright_commit,
478
+ source=ConversionSource(
479
+ workbook_id=workbook.workbook_id,
480
+ file_type=_file_type(workbook.source_path),
481
+ benchmark_role=benchmark_role,
482
+ source_path=source_path,
483
+ sanitized=not include_source_path,
484
+ ),
485
+ workflow_status=workflow_status,
486
+ coverage=CoverageSummary(
487
+ sheets=len(workbook.sheets),
488
+ cells=len(workbook.cells),
489
+ value_cells=len(workbook.cells) - len(formula_cells),
490
+ formula_cells=len(formula_cells),
491
+ translated_formula_cells=translated_formula_cells,
492
+ untranslated_formula_cells=untranslated_formula_cells,
493
+ translation_coverage=translation_coverage,
494
+ named_ranges=len(workbook.named_ranges),
495
+ dependency_edges=len(graph.edges),
496
+ semantic_edges=len(graph.semantic_edges),
497
+ execution_edges=len(graph.execution_edges),
498
+ ),
499
+ diagnostic_summary=diagnostic_summary,
500
+ residual_blockers=residual_blockers,
501
+ generation=generation_summary,
502
+ validation=validation_summary,
503
+ recommendations=_recommendations(residual_blockers, validation_summary),
504
+ privacy_review=PrivacyReview(contains_source_path=include_source_path),
505
+ )
506
+
507
+
508
+ def _diagnostic_summary(
509
+ *,
510
+ workbook: WorkbookRecord,
511
+ graph: DependencyGraph,
512
+ expressions: Mapping[str, FormulaExpression],
513
+ generation_result: GenerationResult | None,
514
+ cached_validation_report: ValidationReport | None,
515
+ oracle_validation_report: ValidationReport | None,
516
+ oracle_blockers: tuple[str, ...],
517
+ ) -> DiagnosticSummary:
518
+ return DiagnosticSummary(
519
+ workbook_extraction=_counter_dict(diagnostic.code for diagnostic in workbook.diagnostics),
520
+ named_ranges=_counter_dict(
521
+ diagnostic.code
522
+ for named_range in workbook.named_ranges
523
+ for diagnostic in named_range.diagnostics
524
+ ),
525
+ formula_extraction=_counter_dict(
526
+ diagnostic.code
527
+ for cell in workbook.cells
528
+ if cell.formula is not None
529
+ for diagnostic in cell.formula.diagnostics
530
+ ),
531
+ graph=_counter_dict(graph.diagnostics),
532
+ translation=_counter_dict(
533
+ diagnostic.code
534
+ for expression in expressions.values()
535
+ for diagnostic in expression.diagnostics
536
+ ),
537
+ generation=_counter_dict(
538
+ diagnostic.code for diagnostic in generation_result.diagnostics
539
+ )
540
+ if generation_result is not None
541
+ else {},
542
+ cached_validation=_validation_diagnostic_counts(cached_validation_report),
543
+ oracle_validation=_counter_dict(oracle_blockers)
544
+ if oracle_blockers
545
+ else _validation_diagnostic_counts(oracle_validation_report),
546
+ )
547
+
548
+
549
+ def _generation_summary(
550
+ *,
551
+ generation_result: GenerationResult | None,
552
+ generated_model_path: str | None,
553
+ full_workbook_model: bool,
554
+ ) -> GenerationSummary:
555
+ if generation_result is None:
556
+ return GenerationSummary()
557
+ return GenerationSummary(
558
+ generated=generation_result.generated,
559
+ generated_model_path=generated_model_path,
560
+ selected_outputs=len(generation_result.contract.output_refs),
561
+ selected_input_dependencies=len(generation_result.contract.input_refs),
562
+ selection_strategy="contract_outputs",
563
+ full_workbook_model=full_workbook_model,
564
+ )
565
+
566
+
567
+ def _validation_summary(
568
+ *,
569
+ cached_validation_report: ValidationReport | None,
570
+ oracle_validation_report: ValidationReport | None,
571
+ oracle_blockers: tuple[str, ...],
572
+ ) -> ValidationSummary:
573
+ oracle_backend = oracle_validation_report.oracle_backend if oracle_validation_report is not None else None
574
+ return ValidationSummary(
575
+ cached_validation_status=_report_status(cached_validation_report),
576
+ cached_outputs=len(cached_validation_report.comparisons) if cached_validation_report is not None else 0,
577
+ cached_mismatches=len(cached_validation_report.mismatches) if cached_validation_report is not None else 0,
578
+ oracle_backend=oracle_backend,
579
+ oracle_status="blocked" if oracle_blockers else _report_status(oracle_validation_report),
580
+ oracle_mismatches=len(oracle_validation_report.mismatches) if oracle_validation_report is not None else 0,
581
+ oracle_blockers=oracle_blockers,
582
+ )
583
+
584
+
585
+ def _workflow_status(
586
+ *,
587
+ translated_formula_cells: int,
588
+ formula_cells: int,
589
+ generation_summary: GenerationSummary,
590
+ validation_summary: ValidationSummary,
591
+ residual_blockers: tuple[ResidualBlocker, ...],
592
+ full_workbook_model: bool,
593
+ ) -> WorkflowStatus:
594
+ formula_translation: StageStatus = "pass" if translated_formula_cells else "blocked"
595
+ generation: StageStatus = "pass" if generation_summary.generated else "not_run"
596
+ if not translated_formula_cells:
597
+ overall: OverallStatus = "blocked"
598
+ elif full_workbook_model and not residual_blockers and validation_summary.oracle_status == "pass":
599
+ overall = "complete"
600
+ else:
601
+ overall = "partial"
602
+ return WorkflowStatus(
603
+ extraction="pass",
604
+ dependency_graph="pass",
605
+ formula_translation=formula_translation,
606
+ generation=generation,
607
+ cached_validation=validation_summary.cached_validation_status,
608
+ oracle_validation=validation_summary.oracle_status,
609
+ overall=overall,
610
+ )
611
+
612
+
613
+ def _residual_blockers(diagnostics: DiagnosticSummary) -> tuple[ResidualBlocker, ...]:
614
+ blockers: list[ResidualBlocker] = []
615
+ for code, count in sorted(diagnostics.workbook_extraction.items()):
616
+ blockers.append(
617
+ _diagnostic_blocker(
618
+ prefix="extraction",
619
+ index=len(blockers) + 1,
620
+ code=code,
621
+ count=count,
622
+ category=_extraction_blocker_category(code),
623
+ disposition=_extraction_blocker_disposition(code),
624
+ next_action=_extraction_next_action(code),
625
+ provenance="extraction",
626
+ )
627
+ )
628
+ for code, count in sorted(diagnostics.named_ranges.items()):
629
+ blockers.append(
630
+ _diagnostic_blocker(
631
+ prefix="named-range",
632
+ index=len(blockers) + 1,
633
+ code=code,
634
+ count=count,
635
+ category=_named_range_blocker_category(code),
636
+ disposition=_named_range_blocker_disposition(code),
637
+ next_action=_named_range_next_action(code),
638
+ provenance="extraction",
639
+ )
640
+ )
641
+ for code, count in sorted(diagnostics.formula_extraction.items()):
642
+ blockers.append(
643
+ _diagnostic_blocker(
644
+ prefix="formula-extraction",
645
+ index=len(blockers) + 1,
646
+ code=code,
647
+ count=count,
648
+ category=_formula_extraction_blocker_category(code),
649
+ disposition=_formula_extraction_blocker_disposition(code, diagnostics),
650
+ next_action=_formula_extraction_next_action(code, diagnostics),
651
+ provenance="extraction",
652
+ )
653
+ )
654
+ for code, count in sorted(diagnostics.graph.items()):
655
+ blockers.append(
656
+ _diagnostic_blocker(
657
+ prefix="graph",
658
+ index=len(blockers) + 1,
659
+ code=code,
660
+ count=count,
661
+ category=_graph_blocker_category(code),
662
+ disposition=_graph_blocker_disposition(code),
663
+ next_action=_graph_next_action(code),
664
+ provenance="graph",
665
+ )
666
+ )
667
+ for code, count in sorted(diagnostics.translation.items()):
668
+ blockers.append(
669
+ _diagnostic_blocker(
670
+ prefix="translation",
671
+ index=len(blockers) + 1,
672
+ code=code,
673
+ count=count,
674
+ category=_translation_blocker_category(code),
675
+ disposition=_translation_blocker_disposition(code),
676
+ next_action=_translation_next_action(code),
677
+ provenance="translation",
678
+ )
679
+ )
680
+ for code, count in sorted(diagnostics.generation.items()):
681
+ blockers.append(
682
+ _diagnostic_blocker(
683
+ prefix="generation",
684
+ index=len(blockers) + 1,
685
+ code=code,
686
+ count=count,
687
+ category="generation_scope",
688
+ disposition="next_target",
689
+ next_action="Refine generated-model scope or code generation support for this diagnostic.",
690
+ provenance="generation",
691
+ )
692
+ )
693
+ for code, count in sorted(diagnostics.cached_validation.items()):
694
+ blockers.append(
695
+ _diagnostic_blocker(
696
+ prefix="cached-validation",
697
+ index=len(blockers) + 1,
698
+ code=code,
699
+ count=count,
700
+ category="missing_cached_values" if "missing" in code else "unknown",
701
+ disposition="deferred",
702
+ next_action="Record cached-value limitation and select validation examples that can be compared.",
703
+ provenance="validation",
704
+ )
705
+ )
706
+ for code, count in sorted(diagnostics.oracle_validation.items()):
707
+ blockers.append(
708
+ _diagnostic_blocker(
709
+ prefix="oracle-validation",
710
+ index=len(blockers) + 1,
711
+ code=code,
712
+ count=count,
713
+ category="validation_oracle",
714
+ disposition="deferred",
715
+ next_action="Record oracle limitation and select a validation strategy.",
716
+ provenance="validation",
717
+ )
718
+ )
719
+ return tuple(blockers)
720
+
721
+
722
+ def _diagnostic_blocker(
723
+ *,
724
+ prefix: str,
725
+ index: int,
726
+ code: str,
727
+ count: int,
728
+ category: BlockerCategory,
729
+ disposition: BlockerDisposition,
730
+ next_action: str,
731
+ provenance: str,
732
+ ) -> ResidualBlocker:
733
+ return ResidualBlocker(
734
+ blocker_id=f"{prefix}-{index:03d}",
735
+ category=category,
736
+ diagnostic_code=code,
737
+ item=code,
738
+ count=count,
739
+ severity="warning",
740
+ disposition=disposition,
741
+ next_action=next_action,
742
+ provenance=provenance,
743
+ )
744
+
745
+
746
+ def _recommendations(
747
+ residual_blockers: tuple[ResidualBlocker, ...],
748
+ validation_summary: ValidationSummary,
749
+ ) -> tuple[PlanRecommendation, ...]:
750
+ recommendations: list[PlanRecommendation] = []
751
+ for blocker in residual_blockers:
752
+ if blocker.disposition == "next_target":
753
+ recommendations.append(
754
+ PlanRecommendation(
755
+ priority=len(recommendations) + 1,
756
+ action=blocker.next_action,
757
+ rationale=f"{blocker.count} item(s) remain blocked by {blocker.diagnostic_code}.",
758
+ )
759
+ )
760
+ if validation_summary.oracle_status == "blocked":
761
+ recommendations.append(
762
+ PlanRecommendation(
763
+ priority=len(recommendations) + 1,
764
+ action="Choose full-workbook validation oracle strategy.",
765
+ rationale="Cached-value subset validation is useful but does not prove workbook equivalence.",
766
+ )
767
+ )
768
+ return tuple(recommendations)
769
+
770
+
771
+ def _validation_diagnostic_counts(report: ValidationReport | None) -> dict[str, int]:
772
+ if report is None:
773
+ return {}
774
+ return _counter_dict(
775
+ comparison.diagnostic_code
776
+ for comparison in report.comparisons
777
+ if comparison.diagnostic_code is not None
778
+ )
779
+
780
+
781
+ def _report_status(report: ValidationReport | None) -> ValidationStatus:
782
+ if report is None:
783
+ return "not_run"
784
+ return report.status
785
+
786
+
787
+ def _counter_dict(values) -> dict[str, int]:
788
+ return dict(Counter(value for value in values if value))
789
+
790
+
791
+ def _file_type(source_path: str) -> str:
792
+ if "." not in source_path:
793
+ return ""
794
+ return f".{source_path.rsplit('.', 1)[1].lower()}"
795
+
796
+
797
+ def _extraction_blocker_category(code: str) -> BlockerCategory:
798
+ if "external" in code:
799
+ return "external_dependency"
800
+ return "unknown"
801
+
802
+
803
+ def _extraction_blocker_disposition(code: str) -> BlockerDisposition:
804
+ if "external" in code:
805
+ return "deferred"
806
+ return "next_target"
807
+
808
+
809
+ def _extraction_next_action(code: str) -> str:
810
+ if "external" in code:
811
+ return "Require explicit external workbook materialization, mock inputs, or rejection policy; do not inline external dependencies silently."
812
+ return "Classify this workbook-extraction diagnostic before claiming conversion readiness."
813
+
814
+
815
+ def _graph_blocker_category(code: str) -> BlockerCategory:
816
+ if "external" in code:
817
+ return "external_dependency"
818
+ return "graph_semantics"
819
+
820
+
821
+ def _graph_blocker_disposition(code: str) -> BlockerDisposition:
822
+ if "external" in code:
823
+ return "deferred"
824
+ return "next_target"
825
+
826
+
827
+ def _graph_next_action(code: str) -> str:
828
+ if "external" in code:
829
+ return "Require explicit external workbook materialization, mock inputs, or rejection policy before full conversion."
830
+ return "Define graph semantics or reporting policy for this workbook structure."
831
+
832
+
833
+ def _named_range_blocker_category(code: str) -> BlockerCategory:
834
+ if code == "named_range_source_error":
835
+ return "source_workbook_defect"
836
+ if "named_range" in code or "defined_name" in code:
837
+ return "unsupported_reference_semantics"
838
+ return "unknown"
839
+
840
+
841
+ def _named_range_blocker_disposition(code: str) -> BlockerDisposition:
842
+ if code == "named_range_source_error":
843
+ return "out_of_scope"
844
+ if "unresolved" in code:
845
+ return "next_target"
846
+ return "deferred"
847
+
848
+
849
+ def _named_range_next_action(code: str) -> str:
850
+ if code == "named_range_source_error":
851
+ return "Ignore stale source workbook defined-name errors unless referenced by formulas or validation rules."
852
+ if "unresolved" in code:
853
+ return "Resolve named-range semantics or document why the range is out of conversion scope."
854
+ return "Classify this named-range diagnostic before claiming conversion readiness."
855
+
856
+
857
+ def _formula_extraction_blocker_category(code: str) -> BlockerCategory:
858
+ if "external" in code:
859
+ return "external_dependency"
860
+ if code == "missing_cached_formula_value":
861
+ return "missing_cached_values"
862
+ if "structured_reference" in code:
863
+ return "unsupported_reference_semantics"
864
+ if "volatile" in code:
865
+ return "unsupported_formula_semantics"
866
+ return "unknown"
867
+
868
+
869
+ def _formula_extraction_blocker_disposition(
870
+ code: str,
871
+ diagnostics: DiagnosticSummary,
872
+ ) -> BlockerDisposition:
873
+ if code == "missing_cached_formula_value":
874
+ return "deferred"
875
+ if "external" in code:
876
+ return "deferred"
877
+ if "structured_reference" in code:
878
+ if not _has_unresolved_reference_diagnostics(diagnostics):
879
+ return "resolved"
880
+ return "deferred"
881
+ if "volatile" in code:
882
+ if not diagnostics.translation:
883
+ return "resolved"
884
+ return "deferred"
885
+ return "next_target"
886
+
887
+
888
+ def _formula_extraction_next_action(
889
+ code: str,
890
+ diagnostics: DiagnosticSummary,
891
+ ) -> str:
892
+ if code == "missing_cached_formula_value":
893
+ return "Not a generation blocker; use a recalculation oracle or select validation outputs with available cached values."
894
+ if "external" in code:
895
+ return "Require explicit external workbook materialization, mock inputs, or rejection policy; do not inline external dependencies silently."
896
+ if "structured_reference" in code:
897
+ if not _has_unresolved_reference_diagnostics(diagnostics):
898
+ return "No conversion action required; extraction diagnostic is provenance for structured references already resolved by graph and translation."
899
+ return "Keep structured-reference diagnostics visible and separate them from translation failures."
900
+ if "volatile" in code:
901
+ if not diagnostics.translation:
902
+ return "No formula-semantics action required while translation is clean; retain volatile-function provenance for validation risk review."
903
+ return "Record volatile formula risk and define deterministic handling where conversion needs it."
904
+ return "Classify this formula-extraction diagnostic before claiming conversion readiness."
905
+
906
+
907
+ def _has_unresolved_reference_diagnostics(diagnostics: DiagnosticSummary) -> bool:
908
+ reference_codes = tuple(diagnostics.graph) + tuple(diagnostics.translation)
909
+ return any("structured_reference" in code or "reference" in code for code in reference_codes)
910
+
911
+
912
+ def _translation_blocker_category(code: str) -> BlockerCategory:
913
+ if code == "unsupported_error_reference":
914
+ return "source_workbook_defect"
915
+ if "structured_reference" in code or "reference" in code:
916
+ return "unsupported_reference_semantics"
917
+ if code.startswith("unsupported_function") or code.startswith("unsupported_"):
918
+ return "unsupported_formula_semantics"
919
+ return "unknown"
920
+
921
+
922
+ def _translation_blocker_disposition(code: str) -> BlockerDisposition:
923
+ if code == "unsupported_error_reference":
924
+ return "blocked_by_design"
925
+ return "next_target"
926
+
927
+
928
+ def _translation_next_action(code: str) -> str:
929
+ if code == "unsupported_error_reference":
930
+ return "Report explicit source error references; do not silently generate normal Python behavior."
931
+ return "Implement support or a sharper diagnostic for this translation blocker."