modelwright 0.1.0a2__tar.gz → 0.1.0a3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {modelwright-0.1.0a2/src/modelwright.egg-info → modelwright-0.1.0a3}/PKG-INFO +2 -2
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/README.md +1 -1
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/pyproject.toml +1 -1
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/__init__.py +1 -1
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/generation.py +141 -32
- {modelwright-0.1.0a2 → modelwright-0.1.0a3/src/modelwright.egg-info}/PKG-INFO +2 -2
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_generation_contract.py +1 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_import.py +1 -1
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_python_generation.py +179 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/LICENSE +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/setup.cfg +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/cli.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/conversion.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/evaluation.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/execution.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/extraction.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/formulas.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/formulas_oracle.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/graph.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/oracle_validation.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/oracles.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/references.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/validation.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/SOURCES.txt +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/dependency_links.txt +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/entry_points.txt +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/requires.txt +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/top_level.txt +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_cli.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_conversion_plan.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_dependency_graph.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_evaluation_orchestration.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_extraction_records.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_formula_expressions.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_formula_translation.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_formulas_oracle.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_generated_execution.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_materialize_fable_benchmarks.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_openpyxl_extraction.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_oracle_backed_validation.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_oracle_interface.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_public_api.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_references.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_scalar_comparison.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_supported_semantics_fixture.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_synthetic_fixture.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation_regression.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation_report_builder.py +0 -0
- {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation_scenario.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelwright
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0a3
|
|
4
4
|
Summary: Tools for converting spreadsheet workbooks into transparent Python models.
|
|
5
5
|
Author: UBC FRESH Lab
|
|
6
6
|
License-Expression: MIT
|
|
@@ -135,7 +135,7 @@ Restore the public external FABLE benchmark workbooks into ignored local paths:
|
|
|
135
135
|
scripts/bootstrap_dev_env.sh --benchmarks
|
|
136
136
|
```
|
|
137
137
|
|
|
138
|
-
`modelwright` is pre-release. The current alpha line is `0.1.
|
|
138
|
+
`modelwright` is pre-release. The current alpha line is `0.1.0a3`; alpha releases must not be described as full-workbook conversion guarantees.
|
|
139
139
|
|
|
140
140
|
Check release artifacts locally:
|
|
141
141
|
|
|
@@ -84,7 +84,7 @@ Restore the public external FABLE benchmark workbooks into ignored local paths:
|
|
|
84
84
|
scripts/bootstrap_dev_env.sh --benchmarks
|
|
85
85
|
```
|
|
86
86
|
|
|
87
|
-
`modelwright` is pre-release. The current alpha line is `0.1.
|
|
87
|
+
`modelwright` is pre-release. The current alpha line is `0.1.0a3`; alpha releases must not be described as full-workbook conversion guarantees.
|
|
88
88
|
|
|
89
89
|
Check release artifacts locally:
|
|
90
90
|
|
|
@@ -17,7 +17,10 @@ from modelwright.references import WorkbookReference
|
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
JsonValue = str | int | float | bool | None | list[Any] | dict[str, Any]
|
|
20
|
+
DEFAULT_INLINE_PROVENANCE_COMMENT_LIMIT = 50_000
|
|
21
|
+
DEFAULT_INLINE_FORMULA_LAMBDA_LIMIT = 50_000
|
|
20
22
|
DiagnosticSeverity = Literal["info", "warning", "error"]
|
|
23
|
+
FormulaStorage = Literal["lambdas", "expression_source"]
|
|
21
24
|
GeneratedSymbolKind = Literal["input", "intermediate", "output"]
|
|
22
25
|
|
|
23
26
|
|
|
@@ -89,6 +92,7 @@ class GeneratedModuleContract:
|
|
|
89
92
|
output_refs: tuple[str, ...] = field(default_factory=tuple)
|
|
90
93
|
symbols: tuple[GeneratedSymbol, ...] = field(default_factory=tuple)
|
|
91
94
|
include_provenance_comments: bool = True
|
|
95
|
+
formula_storage: FormulaStorage = "lambdas"
|
|
92
96
|
|
|
93
97
|
@classmethod
|
|
94
98
|
def from_dict(cls, data: dict[str, Any]) -> "GeneratedModuleContract":
|
|
@@ -100,6 +104,7 @@ class GeneratedModuleContract:
|
|
|
100
104
|
output_refs=tuple(data.get("output_refs", [])),
|
|
101
105
|
symbols=tuple(GeneratedSymbol.from_dict(item) for item in data.get("symbols", [])),
|
|
102
106
|
include_provenance_comments=data.get("include_provenance_comments", True),
|
|
107
|
+
formula_storage=data.get("formula_storage", "lambdas"),
|
|
103
108
|
)
|
|
104
109
|
|
|
105
110
|
def to_dict(self) -> dict[str, JsonValue]:
|
|
@@ -111,6 +116,7 @@ class GeneratedModuleContract:
|
|
|
111
116
|
"output_refs": list(self.output_refs),
|
|
112
117
|
"symbols": [symbol.to_dict() for symbol in self.symbols],
|
|
113
118
|
"include_provenance_comments": self.include_provenance_comments,
|
|
119
|
+
"formula_storage": self.formula_storage,
|
|
114
120
|
}
|
|
115
121
|
|
|
116
122
|
|
|
@@ -186,6 +192,8 @@ def infer_generated_module_contract(
|
|
|
186
192
|
module_name: str,
|
|
187
193
|
input_refs: Sequence[str] = (),
|
|
188
194
|
progress: Callable[[str], None] | None = None,
|
|
195
|
+
inline_provenance_comment_limit: int | None = DEFAULT_INLINE_PROVENANCE_COMMENT_LIMIT,
|
|
196
|
+
inline_formula_lambda_limit: int | None = DEFAULT_INLINE_FORMULA_LAMBDA_LIMIT,
|
|
189
197
|
) -> GeneratedContractInferenceResult:
|
|
190
198
|
"""Infer a generated module contract by walking dependencies for selected outputs."""
|
|
191
199
|
|
|
@@ -242,7 +250,9 @@ def infer_generated_module_contract(
|
|
|
242
250
|
)
|
|
243
251
|
|
|
244
252
|
input_order: list[str] = []
|
|
253
|
+
input_seen: set[str] = set()
|
|
245
254
|
formula_order: list[str] = []
|
|
255
|
+
formula_seen: set[str] = set()
|
|
246
256
|
visiting: set[str] = set()
|
|
247
257
|
visited: set[str] = set()
|
|
248
258
|
circular_dependency_locations: set[str] = set()
|
|
@@ -255,12 +265,11 @@ def infer_generated_module_contract(
|
|
|
255
265
|
if isinstance(dependency, str):
|
|
256
266
|
refs.append(dependency)
|
|
257
267
|
continue
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
)
|
|
268
|
+
expanded = expanded_range_dependencies.get(dependency.normalized)
|
|
269
|
+
if expanded is None:
|
|
270
|
+
expanded = _expand_range_dependency(dependency)
|
|
271
|
+
expanded_range_dependencies[dependency.normalized] = expanded
|
|
272
|
+
refs.extend(expanded)
|
|
264
273
|
return tuple(refs)
|
|
265
274
|
|
|
266
275
|
def visit(root_ref: str) -> None:
|
|
@@ -273,8 +282,9 @@ def infer_generated_module_contract(
|
|
|
273
282
|
|
|
274
283
|
if dependencies_processed:
|
|
275
284
|
visiting.discard(cell_ref)
|
|
276
|
-
if cell_ref not in
|
|
285
|
+
if cell_ref not in formula_seen:
|
|
277
286
|
formula_order.append(cell_ref)
|
|
287
|
+
formula_seen.add(cell_ref)
|
|
278
288
|
visited.add(cell_ref)
|
|
279
289
|
continue
|
|
280
290
|
|
|
@@ -303,14 +313,16 @@ def infer_generated_module_contract(
|
|
|
303
313
|
|
|
304
314
|
cell = cell_by_ref.get(cell_ref)
|
|
305
315
|
if cell is None:
|
|
306
|
-
if cell_ref not in
|
|
316
|
+
if cell_ref not in input_seen:
|
|
307
317
|
input_order.append(cell_ref)
|
|
318
|
+
input_seen.add(cell_ref)
|
|
308
319
|
visited.add(cell_ref)
|
|
309
320
|
continue
|
|
310
321
|
|
|
311
322
|
if cell_ref in explicit_inputs or cell.formula is None:
|
|
312
|
-
if cell_ref not in
|
|
323
|
+
if cell_ref not in input_seen:
|
|
313
324
|
input_order.append(cell_ref)
|
|
325
|
+
input_seen.add(cell_ref)
|
|
314
326
|
visited.add(cell_ref)
|
|
315
327
|
continue
|
|
316
328
|
|
|
@@ -368,6 +380,12 @@ def infer_generated_module_contract(
|
|
|
368
380
|
input_refs=tuple(input_order),
|
|
369
381
|
output_refs=selected_outputs,
|
|
370
382
|
symbols=symbols,
|
|
383
|
+
include_provenance_comments=(
|
|
384
|
+
inline_provenance_comment_limit is None or len(formula_order) <= inline_provenance_comment_limit
|
|
385
|
+
),
|
|
386
|
+
formula_storage="lambdas"
|
|
387
|
+
if inline_formula_lambda_limit is None or len(formula_order) <= inline_formula_lambda_limit
|
|
388
|
+
else "expression_source",
|
|
371
389
|
)
|
|
372
390
|
return GeneratedContractInferenceResult(
|
|
373
391
|
contract=contract,
|
|
@@ -478,6 +496,44 @@ def _render_module(
|
|
|
478
496
|
'"""',
|
|
479
497
|
"",
|
|
480
498
|
"import fnmatch",
|
|
499
|
+
"from functools import lru_cache",
|
|
500
|
+
"",
|
|
501
|
+
"",
|
|
502
|
+
"class _SfRangeView:",
|
|
503
|
+
" def __init__(self, sheet, min_col, min_row, max_col, max_row, get_value):",
|
|
504
|
+
" self.sheet = sheet",
|
|
505
|
+
" self.min_col = min_col",
|
|
506
|
+
" self.min_row = min_row",
|
|
507
|
+
" self.max_col = max_col",
|
|
508
|
+
" self.max_row = max_row",
|
|
509
|
+
" self._get_value = get_value",
|
|
510
|
+
" self._values = None",
|
|
511
|
+
" self._lazy_values = None",
|
|
512
|
+
" self._value_calls = 0",
|
|
513
|
+
" self._lazy_value_calls = 0",
|
|
514
|
+
"",
|
|
515
|
+
" def _refs(self):",
|
|
516
|
+
" for row in range(self.min_row, self.max_row + 1):",
|
|
517
|
+
" for column in range(self.min_col, self.max_col + 1):",
|
|
518
|
+
" yield f'{self.sheet}!{_sf_column_name(column)}{row}'",
|
|
519
|
+
"",
|
|
520
|
+
" def values(self):",
|
|
521
|
+
" if self._values is not None:",
|
|
522
|
+
" return self._values",
|
|
523
|
+
" values = tuple(self._get_value(ref) for ref in self._refs())",
|
|
524
|
+
" self._value_calls += 1",
|
|
525
|
+
" if self._value_calls > 1:",
|
|
526
|
+
" self._values = values",
|
|
527
|
+
" return values",
|
|
528
|
+
"",
|
|
529
|
+
" def lazy_values(self):",
|
|
530
|
+
" if self._lazy_values is not None:",
|
|
531
|
+
" return self._lazy_values",
|
|
532
|
+
" values = tuple(lambda ref=ref: self._get_value(ref) for ref in self._refs())",
|
|
533
|
+
" self._lazy_value_calls += 1",
|
|
534
|
+
" if self._lazy_value_calls > 1:",
|
|
535
|
+
" self._lazy_values = values",
|
|
536
|
+
" return values",
|
|
481
537
|
"",
|
|
482
538
|
"",
|
|
483
539
|
"def _sf_column_name(index):",
|
|
@@ -490,6 +546,9 @@ def _render_module(
|
|
|
490
546
|
"",
|
|
491
547
|
"def _sf_flatten(values):",
|
|
492
548
|
" for value in values:",
|
|
549
|
+
" if isinstance(value, _SfRangeView):",
|
|
550
|
+
" yield from value.values()",
|
|
551
|
+
" continue",
|
|
493
552
|
" if isinstance(value, (list, tuple)):",
|
|
494
553
|
" yield from _sf_flatten(value)",
|
|
495
554
|
" else:",
|
|
@@ -498,6 +557,9 @@ def _render_module(
|
|
|
498
557
|
"",
|
|
499
558
|
"def _sf_flatten_lazy(values):",
|
|
500
559
|
" for value in values:",
|
|
560
|
+
" if isinstance(value, _SfRangeView):",
|
|
561
|
+
" yield from value.lazy_values()",
|
|
562
|
+
" continue",
|
|
501
563
|
" if isinstance(value, (list, tuple)):",
|
|
502
564
|
" yield from _sf_flatten_lazy(value)",
|
|
503
565
|
" else:",
|
|
@@ -525,6 +587,7 @@ def _render_module(
|
|
|
525
587
|
" return value",
|
|
526
588
|
"",
|
|
527
589
|
"",
|
|
590
|
+
"@lru_cache(maxsize=4096)",
|
|
528
591
|
"def _sf_numeric_value(value):",
|
|
529
592
|
" if isinstance(value, bool):",
|
|
530
593
|
" return None",
|
|
@@ -605,17 +668,20 @@ def _render_module(
|
|
|
605
668
|
" raise ValueError(f'unsupported criteria operator: {operator}')",
|
|
606
669
|
"",
|
|
607
670
|
"",
|
|
608
|
-
"def
|
|
671
|
+
"def _sf_criteria_matcher(criteria):",
|
|
609
672
|
" if isinstance(criteria, str):",
|
|
610
673
|
" for operator in ('>=', '<=', '<>', '>', '<', '='):",
|
|
611
674
|
" if criteria.startswith(operator):",
|
|
612
|
-
"
|
|
613
|
-
" return _sf_compare_criteria(value, operator,
|
|
675
|
+
" raw_expected = criteria[len(operator):]",
|
|
676
|
+
" return lambda value: _sf_compare_criteria(value, operator, _sf_coerce_criteria(raw_expected, value))",
|
|
614
677
|
" if '*' in criteria or '?' in criteria:",
|
|
615
|
-
" return fnmatch.fnmatchcase(str(value), criteria)",
|
|
616
|
-
"
|
|
617
|
-
"
|
|
618
|
-
"
|
|
678
|
+
" return lambda value: fnmatch.fnmatchcase(str(value), criteria)",
|
|
679
|
+
" return lambda value: _sf_compare_criteria(value, '=', _sf_coerce_criteria(criteria, value))",
|
|
680
|
+
" return lambda value: _sf_compare_criteria(value, '=', criteria)",
|
|
681
|
+
"",
|
|
682
|
+
"",
|
|
683
|
+
"def _sf_matches_criteria(value, criteria):",
|
|
684
|
+
" return _sf_criteria_matcher(criteria)(value)",
|
|
619
685
|
"",
|
|
620
686
|
"",
|
|
621
687
|
"def _sf_lookup_equal(left, right):",
|
|
@@ -627,24 +693,26 @@ def _render_module(
|
|
|
627
693
|
"def _sf_sumif(criteria_range, criteria, sum_range=None):",
|
|
628
694
|
" criteria_values = tuple(_sf_flatten((criteria_range,)))",
|
|
629
695
|
" sum_values = criteria_values if sum_range is None else tuple(_sf_flatten_lazy((sum_range,)))",
|
|
696
|
+
" matcher = _sf_criteria_matcher(criteria)",
|
|
630
697
|
" total = 0",
|
|
631
698
|
" for criteria_value, sum_value in zip(criteria_values, sum_values):",
|
|
632
|
-
" if
|
|
699
|
+
" if matcher(criteria_value):",
|
|
633
700
|
" total += _sf_sum_value(sum_value)",
|
|
634
701
|
" return total",
|
|
635
702
|
"",
|
|
636
703
|
"",
|
|
637
704
|
"def _sf_countif(criteria_range, criteria):",
|
|
638
|
-
"
|
|
705
|
+
" matcher = _sf_criteria_matcher(criteria)",
|
|
706
|
+
" return sum(1 for value in _sf_flatten((criteria_range,)) if matcher(value))",
|
|
639
707
|
"",
|
|
640
708
|
"",
|
|
641
709
|
"def _sf_sumifs(sum_range, *criteria_pairs):",
|
|
642
710
|
" sum_values = tuple(_sf_flatten_lazy((sum_range,)))",
|
|
643
711
|
" criteria_ranges = [tuple(_sf_flatten((criteria_range,))) for criteria_range, _criteria in criteria_pairs]",
|
|
644
|
-
"
|
|
712
|
+
" criteria_matchers = tuple(_sf_criteria_matcher(criteria) for _range, criteria in criteria_pairs)",
|
|
645
713
|
" total = 0",
|
|
646
714
|
" for index, sum_value in enumerate(sum_values):",
|
|
647
|
-
" if all(
|
|
715
|
+
" if all(matcher(criteria_range[index]) for criteria_range, matcher in zip(criteria_ranges, criteria_matchers)):",
|
|
648
716
|
" total += _sf_sum_value(sum_value)",
|
|
649
717
|
" return total",
|
|
650
718
|
"",
|
|
@@ -653,11 +721,11 @@ def _render_module(
|
|
|
653
721
|
" criteria_ranges = [tuple(_sf_flatten((criteria_range,))) for criteria_range, _criteria in criteria_pairs]",
|
|
654
722
|
" if not criteria_ranges:",
|
|
655
723
|
" return 0",
|
|
656
|
-
"
|
|
724
|
+
" criteria_matchers = tuple(_sf_criteria_matcher(criteria) for _range, criteria in criteria_pairs)",
|
|
657
725
|
" return sum(",
|
|
658
726
|
" 1",
|
|
659
727
|
" for index in range(len(criteria_ranges[0]))",
|
|
660
|
-
" if all(
|
|
728
|
+
" if all(matcher(criteria_range[index]) for criteria_range, matcher in zip(criteria_ranges, criteria_matchers))",
|
|
661
729
|
" )",
|
|
662
730
|
"",
|
|
663
731
|
"",
|
|
@@ -699,6 +767,7 @@ def _render_module(
|
|
|
699
767
|
f"def {contract.entrypoint}(inputs=None):",
|
|
700
768
|
" inputs = {} if inputs is None else dict(inputs)",
|
|
701
769
|
" _cache = {}",
|
|
770
|
+
" _range_cache = {}",
|
|
702
771
|
" _stack = []",
|
|
703
772
|
" _evaluated_count = 0",
|
|
704
773
|
" _constants = {",
|
|
@@ -731,7 +800,7 @@ def _render_module(
|
|
|
731
800
|
" raise RuntimeError('circular dependency during generated model execution: ' + ' -> '.join(cycle))",
|
|
732
801
|
" _stack.append(cell_ref)",
|
|
733
802
|
" try:",
|
|
734
|
-
" value = formula
|
|
803
|
+
" value = _evaluate_formula(cell_ref, formula)",
|
|
735
804
|
" finally:",
|
|
736
805
|
" _stack.pop()",
|
|
737
806
|
" _cache[cell_ref] = value",
|
|
@@ -750,11 +819,12 @@ def _render_module(
|
|
|
750
819
|
" return value",
|
|
751
820
|
"",
|
|
752
821
|
" def _range(sheet, min_col, min_row, max_col, max_row):",
|
|
753
|
-
"
|
|
754
|
-
"
|
|
755
|
-
"
|
|
756
|
-
"
|
|
757
|
-
"
|
|
822
|
+
" key = (sheet, min_col, min_row, max_col, max_row)",
|
|
823
|
+
" view = _range_cache.get(key)",
|
|
824
|
+
" if view is None:",
|
|
825
|
+
" view = _SfRangeView(sheet, min_col, min_row, max_col, max_row, _get)",
|
|
826
|
+
" _range_cache[key] = view",
|
|
827
|
+
" return view",
|
|
758
828
|
"",
|
|
759
829
|
" def _table(sheet, min_col, min_row, max_col, max_row):",
|
|
760
830
|
" return tuple(",
|
|
@@ -762,6 +832,36 @@ def _render_module(
|
|
|
762
832
|
" for row in range(min_row, max_row + 1)",
|
|
763
833
|
" )",
|
|
764
834
|
"",
|
|
835
|
+
]
|
|
836
|
+
)
|
|
837
|
+
if contract.formula_storage == "lambdas":
|
|
838
|
+
lines.extend(
|
|
839
|
+
[
|
|
840
|
+
" def _evaluate_formula(_cell_ref, formula):",
|
|
841
|
+
" return formula()",
|
|
842
|
+
"",
|
|
843
|
+
]
|
|
844
|
+
)
|
|
845
|
+
elif contract.formula_storage == "expression_source":
|
|
846
|
+
lines.extend(
|
|
847
|
+
[
|
|
848
|
+
" _formula_globals = dict(globals())",
|
|
849
|
+
" _formula_globals.update({",
|
|
850
|
+
" '_get': _get,",
|
|
851
|
+
" '_range': _range,",
|
|
852
|
+
" '_table': _table,",
|
|
853
|
+
" })",
|
|
854
|
+
"",
|
|
855
|
+
" def _evaluate_formula(cell_ref, formula):",
|
|
856
|
+
" code = compile(formula, f'<modelwright formula {cell_ref}>', 'eval')",
|
|
857
|
+
" return eval(code, _formula_globals)",
|
|
858
|
+
"",
|
|
859
|
+
]
|
|
860
|
+
)
|
|
861
|
+
else:
|
|
862
|
+
raise ValueError(f"unsupported formula storage: {contract.formula_storage}")
|
|
863
|
+
lines.extend(
|
|
864
|
+
[
|
|
765
865
|
" _formulas = {",
|
|
766
866
|
]
|
|
767
867
|
)
|
|
@@ -771,7 +871,11 @@ def _render_module(
|
|
|
771
871
|
lines.append(f" # {symbol.cell_ref}" + (f": {symbol.raw_formula}" if symbol.raw_formula else ""))
|
|
772
872
|
|
|
773
873
|
expression = expressions[symbol.cell_ref]
|
|
774
|
-
|
|
874
|
+
rendered_formula = _render_formula_root(expression.root)
|
|
875
|
+
if contract.formula_storage == "lambdas":
|
|
876
|
+
lines.append(f" {symbol.cell_ref!r}: lambda: {rendered_formula},")
|
|
877
|
+
else:
|
|
878
|
+
lines.append(f" {symbol.cell_ref!r}: {rendered_formula!r},")
|
|
775
879
|
|
|
776
880
|
if index == 1 or index % 10000 == 0 or index == len(formula_symbols):
|
|
777
881
|
_progress(
|
|
@@ -781,12 +885,17 @@ def _render_module(
|
|
|
781
885
|
lines.extend(
|
|
782
886
|
[
|
|
783
887
|
" }",
|
|
784
|
-
"
|
|
888
|
+
" _output_refs = (",
|
|
785
889
|
]
|
|
786
890
|
)
|
|
787
891
|
for output_ref in contract.output_refs:
|
|
788
|
-
lines.append(f" {output_ref!r}
|
|
789
|
-
lines.
|
|
892
|
+
lines.append(f" {output_ref!r},")
|
|
893
|
+
lines.extend(
|
|
894
|
+
[
|
|
895
|
+
" )",
|
|
896
|
+
" return {cell_ref: _get(cell_ref) for cell_ref in _output_refs}",
|
|
897
|
+
]
|
|
898
|
+
)
|
|
790
899
|
lines.append("")
|
|
791
900
|
return "\n".join(lines)
|
|
792
901
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: modelwright
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.0a3
|
|
4
4
|
Summary: Tools for converting spreadsheet workbooks into transparent Python models.
|
|
5
5
|
Author: UBC FRESH Lab
|
|
6
6
|
License-Expression: MIT
|
|
@@ -135,7 +135,7 @@ Restore the public external FABLE benchmark workbooks into ignored local paths:
|
|
|
135
135
|
scripts/bootstrap_dev_env.sh --benchmarks
|
|
136
136
|
```
|
|
137
137
|
|
|
138
|
-
`modelwright` is pre-release. The current alpha line is `0.1.
|
|
138
|
+
`modelwright` is pre-release. The current alpha line is `0.1.0a3`; alpha releases must not be described as full-workbook conversion guarantees.
|
|
139
139
|
|
|
140
140
|
Check release artifacts locally:
|
|
141
141
|
|
|
@@ -44,6 +44,7 @@ def test_generated_module_contract_serializes_provenance() -> None:
|
|
|
44
44
|
assert payload["entrypoint"] == "calculate"
|
|
45
45
|
assert payload["output_refs"] == ["Summary!B2", "Summary!B3"]
|
|
46
46
|
assert payload["include_provenance_comments"] is True
|
|
47
|
+
assert payload["formula_storage"] == "lambdas"
|
|
47
48
|
assert payload["symbols"][0] == {
|
|
48
49
|
"cell_ref": "Summary!B2",
|
|
49
50
|
"symbol_name": "summary_b2",
|
|
@@ -119,6 +119,7 @@ def test_infer_generated_module_contract_for_synthetic_outputs(tmp_path: Path) -
|
|
|
119
119
|
assert result.diagnostics == ()
|
|
120
120
|
assert result.contract.input_refs == ("Inputs!B2", "Inputs!B3", "Inputs!B4")
|
|
121
121
|
assert result.constants == {"Inputs!B2": 100, "Inputs!B3": 0.08, "Inputs!B4": 0.65}
|
|
122
|
+
assert result.contract.include_provenance_comments is True
|
|
122
123
|
assert tuple(symbol.cell_ref for symbol in result.contract.symbols) == (
|
|
123
124
|
"Inputs!B2",
|
|
124
125
|
"Inputs!B3",
|
|
@@ -141,6 +142,107 @@ def test_infer_generated_module_contract_for_synthetic_outputs(tmp_path: Path) -
|
|
|
141
142
|
]
|
|
142
143
|
|
|
143
144
|
|
|
145
|
+
def test_infer_generated_module_contract_disables_large_inline_provenance_comments(tmp_path: Path) -> None:
|
|
146
|
+
workbook = extract_workbook(build_workbook(tmp_path / "synthetic_model.xlsx"))
|
|
147
|
+
graph = build_dependency_graph(workbook)
|
|
148
|
+
formula_cells = {cell.cell_ref: cell for cell in workbook.cells if cell.formula is not None}
|
|
149
|
+
expressions = {
|
|
150
|
+
cell_ref: translate_formula_cell(cell, graph)
|
|
151
|
+
for cell_ref, cell in formula_cells.items()
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
result = infer_generated_module_contract(
|
|
155
|
+
workbook=workbook,
|
|
156
|
+
graph=graph,
|
|
157
|
+
expressions=expressions,
|
|
158
|
+
output_refs=("Summary!B2", "Summary!B3"),
|
|
159
|
+
module_name="synthetic_model",
|
|
160
|
+
inline_provenance_comment_limit=2,
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
assert result.inferred is True
|
|
164
|
+
assert result.contract.include_provenance_comments is False
|
|
165
|
+
assert any(symbol.raw_formula == "=BaseVolume*(1+GrowthRate)" for symbol in result.contract.symbols)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_infer_generated_module_contract_uses_expression_sources_for_large_formula_sets(tmp_path: Path) -> None:
|
|
169
|
+
workbook = extract_workbook(build_workbook(tmp_path / "synthetic_model.xlsx"))
|
|
170
|
+
graph = build_dependency_graph(workbook)
|
|
171
|
+
formula_cells = {cell.cell_ref: cell for cell in workbook.cells if cell.formula is not None}
|
|
172
|
+
expressions = {
|
|
173
|
+
cell_ref: translate_formula_cell(cell, graph)
|
|
174
|
+
for cell_ref, cell in formula_cells.items()
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
result = infer_generated_module_contract(
|
|
178
|
+
workbook=workbook,
|
|
179
|
+
graph=graph,
|
|
180
|
+
expressions=expressions,
|
|
181
|
+
output_refs=("Summary!B2", "Summary!B3"),
|
|
182
|
+
module_name="synthetic_model",
|
|
183
|
+
inline_formula_lambda_limit=2,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
assert result.inferred is True
|
|
187
|
+
assert result.contract.formula_storage == "expression_source"
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def test_generate_python_module_can_omit_inline_provenance_comments(tmp_path: Path) -> None:
|
|
191
|
+
contract, expressions, constants = synthetic_generation_inputs(tmp_path)
|
|
192
|
+
compact_contract = GeneratedModuleContract(
|
|
193
|
+
workbook_id=contract.workbook_id,
|
|
194
|
+
module_name=contract.module_name,
|
|
195
|
+
entrypoint=contract.entrypoint,
|
|
196
|
+
input_refs=contract.input_refs,
|
|
197
|
+
output_refs=contract.output_refs,
|
|
198
|
+
symbols=contract.symbols,
|
|
199
|
+
include_provenance_comments=False,
|
|
200
|
+
)
|
|
201
|
+
output_path = tmp_path / "generated_compact_model.py"
|
|
202
|
+
|
|
203
|
+
result = generate_python_module(
|
|
204
|
+
contract=compact_contract,
|
|
205
|
+
expressions=expressions,
|
|
206
|
+
constants=constants,
|
|
207
|
+
output_path=output_path,
|
|
208
|
+
)
|
|
209
|
+
module = load_module(output_path)
|
|
210
|
+
|
|
211
|
+
assert result.generated is True
|
|
212
|
+
assert "# Calc!B2: =BaseVolume*(1+GrowthRate)" not in result.source_code
|
|
213
|
+
assert module.calculate() == {"Summary!B2": 70.2, "Summary!B3": "ok"}
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def test_generate_python_module_can_store_formula_expression_sources(tmp_path: Path) -> None:
|
|
217
|
+
contract, expressions, constants = synthetic_generation_inputs(tmp_path)
|
|
218
|
+
compact_contract = GeneratedModuleContract(
|
|
219
|
+
workbook_id=contract.workbook_id,
|
|
220
|
+
module_name=contract.module_name,
|
|
221
|
+
entrypoint=contract.entrypoint,
|
|
222
|
+
input_refs=contract.input_refs,
|
|
223
|
+
output_refs=contract.output_refs,
|
|
224
|
+
symbols=contract.symbols,
|
|
225
|
+
formula_storage="expression_source",
|
|
226
|
+
)
|
|
227
|
+
output_path = tmp_path / "generated_expression_source_model.py"
|
|
228
|
+
|
|
229
|
+
result = generate_python_module(
|
|
230
|
+
contract=compact_contract,
|
|
231
|
+
expressions=expressions,
|
|
232
|
+
constants=constants,
|
|
233
|
+
output_path=output_path,
|
|
234
|
+
)
|
|
235
|
+
module = load_module(output_path)
|
|
236
|
+
|
|
237
|
+
assert result.generated is True
|
|
238
|
+
assert " def _evaluate_formula(cell_ref, formula):" in result.source_code
|
|
239
|
+
assert "compile(formula, f'<modelwright formula {cell_ref}>', 'eval')" in result.source_code
|
|
240
|
+
assert "_formula_code_cache" not in result.source_code
|
|
241
|
+
assert "lambda: _sf_direct_reference" not in result.source_code
|
|
242
|
+
assert module.calculate() == {"Summary!B2": 70.2, "Summary!B3": "ok"}
|
|
243
|
+
assert module.calculate({"Inputs!B2": 10}) == {"Summary!B2": 7.02, "Summary!B3": "low"}
|
|
244
|
+
|
|
245
|
+
|
|
144
246
|
def test_infer_generated_module_contract_ignores_unreached_dependency_diagnostics(tmp_path: Path) -> None:
|
|
145
247
|
workbook = extract_workbook(build_workbook(tmp_path / "synthetic_model.xlsx"))
|
|
146
248
|
graph = build_dependency_graph(workbook)
|
|
@@ -317,6 +419,8 @@ def test_inferred_generated_module_runs_synthetic_model(tmp_path: Path) -> None:
|
|
|
317
419
|
module = load_module(output_path)
|
|
318
420
|
|
|
319
421
|
assert generation.generated is True
|
|
422
|
+
assert " _output_refs = (" in generation.source_code
|
|
423
|
+
assert " return {cell_ref: _get(cell_ref) for cell_ref in _output_refs}" in generation.source_code
|
|
320
424
|
assert module.calculate() == {"Summary!B2": 70.2, "Summary!B3": "ok"}
|
|
321
425
|
assert module.calculate({"Inputs!B2": 10}) == {"Summary!B2": 7.02, "Summary!B3": "low"}
|
|
322
426
|
|
|
@@ -623,6 +727,9 @@ def test_generate_python_module_renders_criteria_functions(tmp_path: Path) -> No
|
|
|
623
727
|
|
|
624
728
|
assert result.generated is True
|
|
625
729
|
assert "_sf_sumif" in result.source_code
|
|
730
|
+
assert "_sf_criteria_matcher" in result.source_code
|
|
731
|
+
assert "class _SfRangeView" in result.source_code
|
|
732
|
+
assert "_range_cache = {}" in result.source_code
|
|
626
733
|
assert module.calculate() == {
|
|
627
734
|
"Calc!B1": 4,
|
|
628
735
|
"Calc!B2": 1,
|
|
@@ -1190,3 +1297,75 @@ def test_generate_python_module_skips_excluded_sumifs_sum_cells(tmp_path: Path)
|
|
|
1190
1297
|
assert module.calculate() == {"Calc!C1": 5}
|
|
1191
1298
|
with pytest.raises(RuntimeError, match="Data!A2 -> Data!A2"):
|
|
1192
1299
|
module.calculate({"Data!B1": "skip", "Data!B2": "x"})
|
|
1300
|
+
|
|
1301
|
+
|
|
1302
|
+
def test_generate_python_module_reuses_range_views_without_changing_results(tmp_path: Path) -> None:
|
|
1303
|
+
contract = GeneratedModuleContract(
|
|
1304
|
+
workbook_id="range-cache.xlsx",
|
|
1305
|
+
module_name="range_cache",
|
|
1306
|
+
input_refs=("Data!A1", "Data!A2", "Data!B1", "Data!B2"),
|
|
1307
|
+
output_refs=("Calc!C1", "Calc!C2", "Calc!C3"),
|
|
1308
|
+
symbols=(
|
|
1309
|
+
GeneratedSymbol(cell_ref="Data!A1", symbol_name="data_a1", kind="input"),
|
|
1310
|
+
GeneratedSymbol(cell_ref="Data!A2", symbol_name="data_a2", kind="input"),
|
|
1311
|
+
GeneratedSymbol(cell_ref="Data!B1", symbol_name="data_b1", kind="input"),
|
|
1312
|
+
GeneratedSymbol(cell_ref="Data!B2", symbol_name="data_b2", kind="input"),
|
|
1313
|
+
GeneratedSymbol(cell_ref="Calc!C1", symbol_name="calc_c1", kind="output", raw_formula='=SUMIFS(A1:A2,B1:B2,"x")'),
|
|
1314
|
+
GeneratedSymbol(cell_ref="Calc!C2", symbol_name="calc_c2", kind="output", raw_formula='=SUMIFS(A1:A2,B1:B2,"y")'),
|
|
1315
|
+
GeneratedSymbol(cell_ref="Calc!C3", symbol_name="calc_c3", kind="output", raw_formula='=COUNTIFS(B1:B2,"x")'),
|
|
1316
|
+
),
|
|
1317
|
+
)
|
|
1318
|
+
amount_range = normalize_reference("Data!A1:A2")
|
|
1319
|
+
label_range = normalize_reference("Data!B1:B2")
|
|
1320
|
+
expressions = {
|
|
1321
|
+
"Calc!C1": formula_expression(
|
|
1322
|
+
"Calc!C1",
|
|
1323
|
+
'=SUMIFS(A1:A2,B1:B2,"x")',
|
|
1324
|
+
FormulaExpressionNode.function_call(
|
|
1325
|
+
"SUMIFS",
|
|
1326
|
+
(
|
|
1327
|
+
FormulaExpressionNode.reference_to(amount_range),
|
|
1328
|
+
FormulaExpressionNode.reference_to(label_range),
|
|
1329
|
+
FormulaExpressionNode.literal("x"),
|
|
1330
|
+
),
|
|
1331
|
+
),
|
|
1332
|
+
),
|
|
1333
|
+
"Calc!C2": formula_expression(
|
|
1334
|
+
"Calc!C2",
|
|
1335
|
+
'=SUMIFS(A1:A2,B1:B2,"y")',
|
|
1336
|
+
FormulaExpressionNode.function_call(
|
|
1337
|
+
"SUMIFS",
|
|
1338
|
+
(
|
|
1339
|
+
FormulaExpressionNode.reference_to(amount_range),
|
|
1340
|
+
FormulaExpressionNode.reference_to(label_range),
|
|
1341
|
+
FormulaExpressionNode.literal("y"),
|
|
1342
|
+
),
|
|
1343
|
+
),
|
|
1344
|
+
),
|
|
1345
|
+
"Calc!C3": formula_expression(
|
|
1346
|
+
"Calc!C3",
|
|
1347
|
+
'=COUNTIFS(B1:B2,"x")',
|
|
1348
|
+
FormulaExpressionNode.function_call(
|
|
1349
|
+
"COUNTIFS",
|
|
1350
|
+
(
|
|
1351
|
+
FormulaExpressionNode.reference_to(label_range),
|
|
1352
|
+
FormulaExpressionNode.literal("x"),
|
|
1353
|
+
),
|
|
1354
|
+
),
|
|
1355
|
+
),
|
|
1356
|
+
}
|
|
1357
|
+
output_path = tmp_path / "generated_range_cache.py"
|
|
1358
|
+
|
|
1359
|
+
result = generate_python_module(
|
|
1360
|
+
contract=contract,
|
|
1361
|
+
expressions=expressions,
|
|
1362
|
+
constants={"Data!A1": 2, "Data!A2": 5, "Data!B1": "x", "Data!B2": "y"},
|
|
1363
|
+
output_path=output_path,
|
|
1364
|
+
)
|
|
1365
|
+
module = load_module(output_path)
|
|
1366
|
+
|
|
1367
|
+
assert result.generated is True
|
|
1368
|
+
assert "_range_cache.get(key)" in result.source_code
|
|
1369
|
+
assert "def values(self):" in result.source_code
|
|
1370
|
+
assert "def lazy_values(self):" in result.source_code
|
|
1371
|
+
assert module.calculate() == {"Calc!C1": 2, "Calc!C2": 5, "Calc!C3": 1}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|