modelwright 0.1.0a2__tar.gz → 0.1.0a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {modelwright-0.1.0a2/src/modelwright.egg-info → modelwright-0.1.0a3}/PKG-INFO +2 -2
  2. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/README.md +1 -1
  3. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/pyproject.toml +1 -1
  4. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/__init__.py +1 -1
  5. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/generation.py +141 -32
  6. {modelwright-0.1.0a2 → modelwright-0.1.0a3/src/modelwright.egg-info}/PKG-INFO +2 -2
  7. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_generation_contract.py +1 -0
  8. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_import.py +1 -1
  9. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_python_generation.py +179 -0
  10. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/LICENSE +0 -0
  11. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/setup.cfg +0 -0
  12. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/cli.py +0 -0
  13. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/conversion.py +0 -0
  14. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/evaluation.py +0 -0
  15. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/execution.py +0 -0
  16. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/extraction.py +0 -0
  17. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/formulas.py +0 -0
  18. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/formulas_oracle.py +0 -0
  19. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/graph.py +0 -0
  20. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/oracle_validation.py +0 -0
  21. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/oracles.py +0 -0
  22. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/references.py +0 -0
  23. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright/validation.py +0 -0
  24. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/SOURCES.txt +0 -0
  25. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/dependency_links.txt +0 -0
  26. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/entry_points.txt +0 -0
  27. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/requires.txt +0 -0
  28. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/src/modelwright.egg-info/top_level.txt +0 -0
  29. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_cli.py +0 -0
  30. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_conversion_plan.py +0 -0
  31. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_dependency_graph.py +0 -0
  32. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_evaluation_orchestration.py +0 -0
  33. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_extraction_records.py +0 -0
  34. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_formula_expressions.py +0 -0
  35. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_formula_translation.py +0 -0
  36. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_formulas_oracle.py +0 -0
  37. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_generated_execution.py +0 -0
  38. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_materialize_fable_benchmarks.py +0 -0
  39. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_openpyxl_extraction.py +0 -0
  40. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_oracle_backed_validation.py +0 -0
  41. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_oracle_interface.py +0 -0
  42. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_public_api.py +0 -0
  43. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_references.py +0 -0
  44. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_scalar_comparison.py +0 -0
  45. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_supported_semantics_fixture.py +0 -0
  46. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_synthetic_fixture.py +0 -0
  47. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation.py +0 -0
  48. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation_regression.py +0 -0
  49. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation_report_builder.py +0 -0
  50. {modelwright-0.1.0a2 → modelwright-0.1.0a3}/tests/test_validation_scenario.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelwright
3
- Version: 0.1.0a2
3
+ Version: 0.1.0a3
4
4
  Summary: Tools for converting spreadsheet workbooks into transparent Python models.
5
5
  Author: UBC FRESH Lab
6
6
  License-Expression: MIT
@@ -135,7 +135,7 @@ Restore the public external FABLE benchmark workbooks into ignored local paths:
135
135
  scripts/bootstrap_dev_env.sh --benchmarks
136
136
  ```
137
137
 
138
- `modelwright` is pre-release. The current alpha line is `0.1.0a2`; alpha releases must not be described as full-workbook conversion guarantees.
138
+ `modelwright` is pre-release. The current alpha line is `0.1.0a3`; alpha releases must not be described as full-workbook conversion guarantees.
139
139
 
140
140
  Check release artifacts locally:
141
141
 
@@ -84,7 +84,7 @@ Restore the public external FABLE benchmark workbooks into ignored local paths:
84
84
  scripts/bootstrap_dev_env.sh --benchmarks
85
85
  ```
86
86
 
87
- `modelwright` is pre-release. The current alpha line is `0.1.0a2`; alpha releases must not be described as full-workbook conversion guarantees.
87
+ `modelwright` is pre-release. The current alpha line is `0.1.0a3`; alpha releases must not be described as full-workbook conversion guarantees.
88
88
 
89
89
  Check release artifacts locally:
90
90
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "modelwright"
7
- version = "0.1.0a2"
7
+ version = "0.1.0a3"
8
8
  description = "Tools for converting spreadsheet workbooks into transparent Python models."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -81,7 +81,7 @@ from modelwright.validation import (
81
81
  load_validation_scenario,
82
82
  )
83
83
 
84
- __version__ = "0.1.0a2"
84
+ __version__ = "0.1.0a3"
85
85
 
86
86
  __all__ = [
87
87
  "CellRecord",
@@ -17,7 +17,10 @@ from modelwright.references import WorkbookReference
17
17
 
18
18
 
19
19
  JsonValue = str | int | float | bool | None | list[Any] | dict[str, Any]
20
+ DEFAULT_INLINE_PROVENANCE_COMMENT_LIMIT = 50_000
21
+ DEFAULT_INLINE_FORMULA_LAMBDA_LIMIT = 50_000
20
22
  DiagnosticSeverity = Literal["info", "warning", "error"]
23
+ FormulaStorage = Literal["lambdas", "expression_source"]
21
24
  GeneratedSymbolKind = Literal["input", "intermediate", "output"]
22
25
 
23
26
 
@@ -89,6 +92,7 @@ class GeneratedModuleContract:
89
92
  output_refs: tuple[str, ...] = field(default_factory=tuple)
90
93
  symbols: tuple[GeneratedSymbol, ...] = field(default_factory=tuple)
91
94
  include_provenance_comments: bool = True
95
+ formula_storage: FormulaStorage = "lambdas"
92
96
 
93
97
  @classmethod
94
98
  def from_dict(cls, data: dict[str, Any]) -> "GeneratedModuleContract":
@@ -100,6 +104,7 @@ class GeneratedModuleContract:
100
104
  output_refs=tuple(data.get("output_refs", [])),
101
105
  symbols=tuple(GeneratedSymbol.from_dict(item) for item in data.get("symbols", [])),
102
106
  include_provenance_comments=data.get("include_provenance_comments", True),
107
+ formula_storage=data.get("formula_storage", "lambdas"),
103
108
  )
104
109
 
105
110
  def to_dict(self) -> dict[str, JsonValue]:
@@ -111,6 +116,7 @@ class GeneratedModuleContract:
111
116
  "output_refs": list(self.output_refs),
112
117
  "symbols": [symbol.to_dict() for symbol in self.symbols],
113
118
  "include_provenance_comments": self.include_provenance_comments,
119
+ "formula_storage": self.formula_storage,
114
120
  }
115
121
 
116
122
 
@@ -186,6 +192,8 @@ def infer_generated_module_contract(
186
192
  module_name: str,
187
193
  input_refs: Sequence[str] = (),
188
194
  progress: Callable[[str], None] | None = None,
195
+ inline_provenance_comment_limit: int | None = DEFAULT_INLINE_PROVENANCE_COMMENT_LIMIT,
196
+ inline_formula_lambda_limit: int | None = DEFAULT_INLINE_FORMULA_LAMBDA_LIMIT,
189
197
  ) -> GeneratedContractInferenceResult:
190
198
  """Infer a generated module contract by walking dependencies for selected outputs."""
191
199
 
@@ -242,7 +250,9 @@ def infer_generated_module_contract(
242
250
  )
243
251
 
244
252
  input_order: list[str] = []
253
+ input_seen: set[str] = set()
245
254
  formula_order: list[str] = []
255
+ formula_seen: set[str] = set()
246
256
  visiting: set[str] = set()
247
257
  visited: set[str] = set()
248
258
  circular_dependency_locations: set[str] = set()
@@ -255,12 +265,11 @@ def infer_generated_module_contract(
255
265
  if isinstance(dependency, str):
256
266
  refs.append(dependency)
257
267
  continue
258
- refs.extend(
259
- expanded_range_dependencies.setdefault(
260
- dependency.normalized,
261
- _expand_range_dependency(dependency),
262
- )
263
- )
268
+ expanded = expanded_range_dependencies.get(dependency.normalized)
269
+ if expanded is None:
270
+ expanded = _expand_range_dependency(dependency)
271
+ expanded_range_dependencies[dependency.normalized] = expanded
272
+ refs.extend(expanded)
264
273
  return tuple(refs)
265
274
 
266
275
  def visit(root_ref: str) -> None:
@@ -273,8 +282,9 @@ def infer_generated_module_contract(
273
282
 
274
283
  if dependencies_processed:
275
284
  visiting.discard(cell_ref)
276
- if cell_ref not in formula_order:
285
+ if cell_ref not in formula_seen:
277
286
  formula_order.append(cell_ref)
287
+ formula_seen.add(cell_ref)
278
288
  visited.add(cell_ref)
279
289
  continue
280
290
 
@@ -303,14 +313,16 @@ def infer_generated_module_contract(
303
313
 
304
314
  cell = cell_by_ref.get(cell_ref)
305
315
  if cell is None:
306
- if cell_ref not in input_order:
316
+ if cell_ref not in input_seen:
307
317
  input_order.append(cell_ref)
318
+ input_seen.add(cell_ref)
308
319
  visited.add(cell_ref)
309
320
  continue
310
321
 
311
322
  if cell_ref in explicit_inputs or cell.formula is None:
312
- if cell_ref not in input_order:
323
+ if cell_ref not in input_seen:
313
324
  input_order.append(cell_ref)
325
+ input_seen.add(cell_ref)
314
326
  visited.add(cell_ref)
315
327
  continue
316
328
 
@@ -368,6 +380,12 @@ def infer_generated_module_contract(
368
380
  input_refs=tuple(input_order),
369
381
  output_refs=selected_outputs,
370
382
  symbols=symbols,
383
+ include_provenance_comments=(
384
+ inline_provenance_comment_limit is None or len(formula_order) <= inline_provenance_comment_limit
385
+ ),
386
+ formula_storage="lambdas"
387
+ if inline_formula_lambda_limit is None or len(formula_order) <= inline_formula_lambda_limit
388
+ else "expression_source",
371
389
  )
372
390
  return GeneratedContractInferenceResult(
373
391
  contract=contract,
@@ -478,6 +496,44 @@ def _render_module(
478
496
  '"""',
479
497
  "",
480
498
  "import fnmatch",
499
+ "from functools import lru_cache",
500
+ "",
501
+ "",
502
+ "class _SfRangeView:",
503
+ " def __init__(self, sheet, min_col, min_row, max_col, max_row, get_value):",
504
+ " self.sheet = sheet",
505
+ " self.min_col = min_col",
506
+ " self.min_row = min_row",
507
+ " self.max_col = max_col",
508
+ " self.max_row = max_row",
509
+ " self._get_value = get_value",
510
+ " self._values = None",
511
+ " self._lazy_values = None",
512
+ " self._value_calls = 0",
513
+ " self._lazy_value_calls = 0",
514
+ "",
515
+ " def _refs(self):",
516
+ " for row in range(self.min_row, self.max_row + 1):",
517
+ " for column in range(self.min_col, self.max_col + 1):",
518
+ " yield f'{self.sheet}!{_sf_column_name(column)}{row}'",
519
+ "",
520
+ " def values(self):",
521
+ " if self._values is not None:",
522
+ " return self._values",
523
+ " values = tuple(self._get_value(ref) for ref in self._refs())",
524
+ " self._value_calls += 1",
525
+ " if self._value_calls > 1:",
526
+ " self._values = values",
527
+ " return values",
528
+ "",
529
+ " def lazy_values(self):",
530
+ " if self._lazy_values is not None:",
531
+ " return self._lazy_values",
532
+ " values = tuple(lambda ref=ref: self._get_value(ref) for ref in self._refs())",
533
+ " self._lazy_value_calls += 1",
534
+ " if self._lazy_value_calls > 1:",
535
+ " self._lazy_values = values",
536
+ " return values",
481
537
  "",
482
538
  "",
483
539
  "def _sf_column_name(index):",
@@ -490,6 +546,9 @@ def _render_module(
490
546
  "",
491
547
  "def _sf_flatten(values):",
492
548
  " for value in values:",
549
+ " if isinstance(value, _SfRangeView):",
550
+ " yield from value.values()",
551
+ " continue",
493
552
  " if isinstance(value, (list, tuple)):",
494
553
  " yield from _sf_flatten(value)",
495
554
  " else:",
@@ -498,6 +557,9 @@ def _render_module(
498
557
  "",
499
558
  "def _sf_flatten_lazy(values):",
500
559
  " for value in values:",
560
+ " if isinstance(value, _SfRangeView):",
561
+ " yield from value.lazy_values()",
562
+ " continue",
501
563
  " if isinstance(value, (list, tuple)):",
502
564
  " yield from _sf_flatten_lazy(value)",
503
565
  " else:",
@@ -525,6 +587,7 @@ def _render_module(
525
587
  " return value",
526
588
  "",
527
589
  "",
590
+ "@lru_cache(maxsize=4096)",
528
591
  "def _sf_numeric_value(value):",
529
592
  " if isinstance(value, bool):",
530
593
  " return None",
@@ -605,17 +668,20 @@ def _render_module(
605
668
  " raise ValueError(f'unsupported criteria operator: {operator}')",
606
669
  "",
607
670
  "",
608
- "def _sf_matches_criteria(value, criteria):",
671
+ "def _sf_criteria_matcher(criteria):",
609
672
  " if isinstance(criteria, str):",
610
673
  " for operator in ('>=', '<=', '<>', '>', '<', '='):",
611
674
  " if criteria.startswith(operator):",
612
- " expected = _sf_coerce_criteria(criteria[len(operator):], value)",
613
- " return _sf_compare_criteria(value, operator, expected)",
675
+ " raw_expected = criteria[len(operator):]",
676
+ " return lambda value: _sf_compare_criteria(value, operator, _sf_coerce_criteria(raw_expected, value))",
614
677
  " if '*' in criteria or '?' in criteria:",
615
- " return fnmatch.fnmatchcase(str(value), criteria)",
616
- " expected = _sf_coerce_criteria(criteria, value)",
617
- " return _sf_compare_criteria(value, '=', expected)",
618
- " return _sf_compare_criteria(value, '=', criteria)",
678
+ " return lambda value: fnmatch.fnmatchcase(str(value), criteria)",
679
+ " return lambda value: _sf_compare_criteria(value, '=', _sf_coerce_criteria(criteria, value))",
680
+ " return lambda value: _sf_compare_criteria(value, '=', criteria)",
681
+ "",
682
+ "",
683
+ "def _sf_matches_criteria(value, criteria):",
684
+ " return _sf_criteria_matcher(criteria)(value)",
619
685
  "",
620
686
  "",
621
687
  "def _sf_lookup_equal(left, right):",
@@ -627,24 +693,26 @@ def _render_module(
627
693
  "def _sf_sumif(criteria_range, criteria, sum_range=None):",
628
694
  " criteria_values = tuple(_sf_flatten((criteria_range,)))",
629
695
  " sum_values = criteria_values if sum_range is None else tuple(_sf_flatten_lazy((sum_range,)))",
696
+ " matcher = _sf_criteria_matcher(criteria)",
630
697
  " total = 0",
631
698
  " for criteria_value, sum_value in zip(criteria_values, sum_values):",
632
- " if _sf_matches_criteria(criteria_value, criteria):",
699
+ " if matcher(criteria_value):",
633
700
  " total += _sf_sum_value(sum_value)",
634
701
  " return total",
635
702
  "",
636
703
  "",
637
704
  "def _sf_countif(criteria_range, criteria):",
638
- " return sum(1 for value in _sf_flatten((criteria_range,)) if _sf_matches_criteria(value, criteria))",
705
+ " matcher = _sf_criteria_matcher(criteria)",
706
+ " return sum(1 for value in _sf_flatten((criteria_range,)) if matcher(value))",
639
707
  "",
640
708
  "",
641
709
  "def _sf_sumifs(sum_range, *criteria_pairs):",
642
710
  " sum_values = tuple(_sf_flatten_lazy((sum_range,)))",
643
711
  " criteria_ranges = [tuple(_sf_flatten((criteria_range,))) for criteria_range, _criteria in criteria_pairs]",
644
- " criteria_values = tuple(criteria for _range, criteria in criteria_pairs)",
712
+ " criteria_matchers = tuple(_sf_criteria_matcher(criteria) for _range, criteria in criteria_pairs)",
645
713
  " total = 0",
646
714
  " for index, sum_value in enumerate(sum_values):",
647
- " if all(_sf_matches_criteria(criteria_range[index], criteria) for criteria_range, criteria in zip(criteria_ranges, criteria_values)):",
715
+ " if all(matcher(criteria_range[index]) for criteria_range, matcher in zip(criteria_ranges, criteria_matchers)):",
648
716
  " total += _sf_sum_value(sum_value)",
649
717
  " return total",
650
718
  "",
@@ -653,11 +721,11 @@ def _render_module(
653
721
  " criteria_ranges = [tuple(_sf_flatten((criteria_range,))) for criteria_range, _criteria in criteria_pairs]",
654
722
  " if not criteria_ranges:",
655
723
  " return 0",
656
- " criteria_values = tuple(criteria for _range, criteria in criteria_pairs)",
724
+ " criteria_matchers = tuple(_sf_criteria_matcher(criteria) for _range, criteria in criteria_pairs)",
657
725
  " return sum(",
658
726
  " 1",
659
727
  " for index in range(len(criteria_ranges[0]))",
660
- " if all(_sf_matches_criteria(criteria_range[index], criteria) for criteria_range, criteria in zip(criteria_ranges, criteria_values))",
728
+ " if all(matcher(criteria_range[index]) for criteria_range, matcher in zip(criteria_ranges, criteria_matchers))",
661
729
  " )",
662
730
  "",
663
731
  "",
@@ -699,6 +767,7 @@ def _render_module(
699
767
  f"def {contract.entrypoint}(inputs=None):",
700
768
  " inputs = {} if inputs is None else dict(inputs)",
701
769
  " _cache = {}",
770
+ " _range_cache = {}",
702
771
  " _stack = []",
703
772
  " _evaluated_count = 0",
704
773
  " _constants = {",
@@ -731,7 +800,7 @@ def _render_module(
731
800
  " raise RuntimeError('circular dependency during generated model execution: ' + ' -> '.join(cycle))",
732
801
  " _stack.append(cell_ref)",
733
802
  " try:",
734
- " value = formula()",
803
+ " value = _evaluate_formula(cell_ref, formula)",
735
804
  " finally:",
736
805
  " _stack.pop()",
737
806
  " _cache[cell_ref] = value",
@@ -750,11 +819,12 @@ def _render_module(
750
819
  " return value",
751
820
  "",
752
821
  " def _range(sheet, min_col, min_row, max_col, max_row):",
753
- " return tuple(",
754
- " lambda ref=f'{sheet}!{_sf_column_name(column)}{row}': _get(ref)",
755
- " for row in range(min_row, max_row + 1)",
756
- " for column in range(min_col, max_col + 1)",
757
- " )",
822
+ " key = (sheet, min_col, min_row, max_col, max_row)",
823
+ " view = _range_cache.get(key)",
824
+ " if view is None:",
825
+ " view = _SfRangeView(sheet, min_col, min_row, max_col, max_row, _get)",
826
+ " _range_cache[key] = view",
827
+ " return view",
758
828
  "",
759
829
  " def _table(sheet, min_col, min_row, max_col, max_row):",
760
830
  " return tuple(",
@@ -762,6 +832,36 @@ def _render_module(
762
832
  " for row in range(min_row, max_row + 1)",
763
833
  " )",
764
834
  "",
835
+ ]
836
+ )
837
+ if contract.formula_storage == "lambdas":
838
+ lines.extend(
839
+ [
840
+ " def _evaluate_formula(_cell_ref, formula):",
841
+ " return formula()",
842
+ "",
843
+ ]
844
+ )
845
+ elif contract.formula_storage == "expression_source":
846
+ lines.extend(
847
+ [
848
+ " _formula_globals = dict(globals())",
849
+ " _formula_globals.update({",
850
+ " '_get': _get,",
851
+ " '_range': _range,",
852
+ " '_table': _table,",
853
+ " })",
854
+ "",
855
+ " def _evaluate_formula(cell_ref, formula):",
856
+ " code = compile(formula, f'<modelwright formula {cell_ref}>', 'eval')",
857
+ " return eval(code, _formula_globals)",
858
+ "",
859
+ ]
860
+ )
861
+ else:
862
+ raise ValueError(f"unsupported formula storage: {contract.formula_storage}")
863
+ lines.extend(
864
+ [
765
865
  " _formulas = {",
766
866
  ]
767
867
  )
@@ -771,7 +871,11 @@ def _render_module(
771
871
  lines.append(f" # {symbol.cell_ref}" + (f": {symbol.raw_formula}" if symbol.raw_formula else ""))
772
872
 
773
873
  expression = expressions[symbol.cell_ref]
774
- lines.append(f" {symbol.cell_ref!r}: lambda: {_render_formula_root(expression.root)},")
874
+ rendered_formula = _render_formula_root(expression.root)
875
+ if contract.formula_storage == "lambdas":
876
+ lines.append(f" {symbol.cell_ref!r}: lambda: {rendered_formula},")
877
+ else:
878
+ lines.append(f" {symbol.cell_ref!r}: {rendered_formula!r},")
775
879
 
776
880
  if index == 1 or index % 10000 == 0 or index == len(formula_symbols):
777
881
  _progress(
@@ -781,12 +885,17 @@ def _render_module(
781
885
  lines.extend(
782
886
  [
783
887
  " }",
784
- " return {",
888
+ " _output_refs = (",
785
889
  ]
786
890
  )
787
891
  for output_ref in contract.output_refs:
788
- lines.append(f" {output_ref!r}: _get({output_ref!r}),")
789
- lines.append(" }")
892
+ lines.append(f" {output_ref!r},")
893
+ lines.extend(
894
+ [
895
+ " )",
896
+ " return {cell_ref: _get(cell_ref) for cell_ref in _output_refs}",
897
+ ]
898
+ )
790
899
  lines.append("")
791
900
  return "\n".join(lines)
792
901
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: modelwright
3
- Version: 0.1.0a2
3
+ Version: 0.1.0a3
4
4
  Summary: Tools for converting spreadsheet workbooks into transparent Python models.
5
5
  Author: UBC FRESH Lab
6
6
  License-Expression: MIT
@@ -135,7 +135,7 @@ Restore the public external FABLE benchmark workbooks into ignored local paths:
135
135
  scripts/bootstrap_dev_env.sh --benchmarks
136
136
  ```
137
137
 
138
- `modelwright` is pre-release. The current alpha line is `0.1.0a2`; alpha releases must not be described as full-workbook conversion guarantees.
138
+ `modelwright` is pre-release. The current alpha line is `0.1.0a3`; alpha releases must not be described as full-workbook conversion guarantees.
139
139
 
140
140
  Check release artifacts locally:
141
141
 
@@ -44,6 +44,7 @@ def test_generated_module_contract_serializes_provenance() -> None:
44
44
  assert payload["entrypoint"] == "calculate"
45
45
  assert payload["output_refs"] == ["Summary!B2", "Summary!B3"]
46
46
  assert payload["include_provenance_comments"] is True
47
+ assert payload["formula_storage"] == "lambdas"
47
48
  assert payload["symbols"][0] == {
48
49
  "cell_ref": "Summary!B2",
49
50
  "symbol_name": "summary_b2",
@@ -2,4 +2,4 @@ import modelwright
2
2
 
3
3
 
4
4
  def test_package_imports() -> None:
5
- assert modelwright.__version__ == "0.1.0a2"
5
+ assert modelwright.__version__ == "0.1.0a3"
@@ -119,6 +119,7 @@ def test_infer_generated_module_contract_for_synthetic_outputs(tmp_path: Path) -
119
119
  assert result.diagnostics == ()
120
120
  assert result.contract.input_refs == ("Inputs!B2", "Inputs!B3", "Inputs!B4")
121
121
  assert result.constants == {"Inputs!B2": 100, "Inputs!B3": 0.08, "Inputs!B4": 0.65}
122
+ assert result.contract.include_provenance_comments is True
122
123
  assert tuple(symbol.cell_ref for symbol in result.contract.symbols) == (
123
124
  "Inputs!B2",
124
125
  "Inputs!B3",
@@ -141,6 +142,107 @@ def test_infer_generated_module_contract_for_synthetic_outputs(tmp_path: Path) -
141
142
  ]
142
143
 
143
144
 
145
+ def test_infer_generated_module_contract_disables_large_inline_provenance_comments(tmp_path: Path) -> None:
146
+ workbook = extract_workbook(build_workbook(tmp_path / "synthetic_model.xlsx"))
147
+ graph = build_dependency_graph(workbook)
148
+ formula_cells = {cell.cell_ref: cell for cell in workbook.cells if cell.formula is not None}
149
+ expressions = {
150
+ cell_ref: translate_formula_cell(cell, graph)
151
+ for cell_ref, cell in formula_cells.items()
152
+ }
153
+
154
+ result = infer_generated_module_contract(
155
+ workbook=workbook,
156
+ graph=graph,
157
+ expressions=expressions,
158
+ output_refs=("Summary!B2", "Summary!B3"),
159
+ module_name="synthetic_model",
160
+ inline_provenance_comment_limit=2,
161
+ )
162
+
163
+ assert result.inferred is True
164
+ assert result.contract.include_provenance_comments is False
165
+ assert any(symbol.raw_formula == "=BaseVolume*(1+GrowthRate)" for symbol in result.contract.symbols)
166
+
167
+
168
+ def test_infer_generated_module_contract_uses_expression_sources_for_large_formula_sets(tmp_path: Path) -> None:
169
+ workbook = extract_workbook(build_workbook(tmp_path / "synthetic_model.xlsx"))
170
+ graph = build_dependency_graph(workbook)
171
+ formula_cells = {cell.cell_ref: cell for cell in workbook.cells if cell.formula is not None}
172
+ expressions = {
173
+ cell_ref: translate_formula_cell(cell, graph)
174
+ for cell_ref, cell in formula_cells.items()
175
+ }
176
+
177
+ result = infer_generated_module_contract(
178
+ workbook=workbook,
179
+ graph=graph,
180
+ expressions=expressions,
181
+ output_refs=("Summary!B2", "Summary!B3"),
182
+ module_name="synthetic_model",
183
+ inline_formula_lambda_limit=2,
184
+ )
185
+
186
+ assert result.inferred is True
187
+ assert result.contract.formula_storage == "expression_source"
188
+
189
+
190
+ def test_generate_python_module_can_omit_inline_provenance_comments(tmp_path: Path) -> None:
191
+ contract, expressions, constants = synthetic_generation_inputs(tmp_path)
192
+ compact_contract = GeneratedModuleContract(
193
+ workbook_id=contract.workbook_id,
194
+ module_name=contract.module_name,
195
+ entrypoint=contract.entrypoint,
196
+ input_refs=contract.input_refs,
197
+ output_refs=contract.output_refs,
198
+ symbols=contract.symbols,
199
+ include_provenance_comments=False,
200
+ )
201
+ output_path = tmp_path / "generated_compact_model.py"
202
+
203
+ result = generate_python_module(
204
+ contract=compact_contract,
205
+ expressions=expressions,
206
+ constants=constants,
207
+ output_path=output_path,
208
+ )
209
+ module = load_module(output_path)
210
+
211
+ assert result.generated is True
212
+ assert "# Calc!B2: =BaseVolume*(1+GrowthRate)" not in result.source_code
213
+ assert module.calculate() == {"Summary!B2": 70.2, "Summary!B3": "ok"}
214
+
215
+
216
+ def test_generate_python_module_can_store_formula_expression_sources(tmp_path: Path) -> None:
217
+ contract, expressions, constants = synthetic_generation_inputs(tmp_path)
218
+ compact_contract = GeneratedModuleContract(
219
+ workbook_id=contract.workbook_id,
220
+ module_name=contract.module_name,
221
+ entrypoint=contract.entrypoint,
222
+ input_refs=contract.input_refs,
223
+ output_refs=contract.output_refs,
224
+ symbols=contract.symbols,
225
+ formula_storage="expression_source",
226
+ )
227
+ output_path = tmp_path / "generated_expression_source_model.py"
228
+
229
+ result = generate_python_module(
230
+ contract=compact_contract,
231
+ expressions=expressions,
232
+ constants=constants,
233
+ output_path=output_path,
234
+ )
235
+ module = load_module(output_path)
236
+
237
+ assert result.generated is True
238
+ assert " def _evaluate_formula(cell_ref, formula):" in result.source_code
239
+ assert "compile(formula, f'<modelwright formula {cell_ref}>', 'eval')" in result.source_code
240
+ assert "_formula_code_cache" not in result.source_code
241
+ assert "lambda: _sf_direct_reference" not in result.source_code
242
+ assert module.calculate() == {"Summary!B2": 70.2, "Summary!B3": "ok"}
243
+ assert module.calculate({"Inputs!B2": 10}) == {"Summary!B2": 7.02, "Summary!B3": "low"}
244
+
245
+
144
246
  def test_infer_generated_module_contract_ignores_unreached_dependency_diagnostics(tmp_path: Path) -> None:
145
247
  workbook = extract_workbook(build_workbook(tmp_path / "synthetic_model.xlsx"))
146
248
  graph = build_dependency_graph(workbook)
@@ -317,6 +419,8 @@ def test_inferred_generated_module_runs_synthetic_model(tmp_path: Path) -> None:
317
419
  module = load_module(output_path)
318
420
 
319
421
  assert generation.generated is True
422
+ assert " _output_refs = (" in generation.source_code
423
+ assert " return {cell_ref: _get(cell_ref) for cell_ref in _output_refs}" in generation.source_code
320
424
  assert module.calculate() == {"Summary!B2": 70.2, "Summary!B3": "ok"}
321
425
  assert module.calculate({"Inputs!B2": 10}) == {"Summary!B2": 7.02, "Summary!B3": "low"}
322
426
 
@@ -623,6 +727,9 @@ def test_generate_python_module_renders_criteria_functions(tmp_path: Path) -> No
623
727
 
624
728
  assert result.generated is True
625
729
  assert "_sf_sumif" in result.source_code
730
+ assert "_sf_criteria_matcher" in result.source_code
731
+ assert "class _SfRangeView" in result.source_code
732
+ assert "_range_cache = {}" in result.source_code
626
733
  assert module.calculate() == {
627
734
  "Calc!B1": 4,
628
735
  "Calc!B2": 1,
@@ -1190,3 +1297,75 @@ def test_generate_python_module_skips_excluded_sumifs_sum_cells(tmp_path: Path)
1190
1297
  assert module.calculate() == {"Calc!C1": 5}
1191
1298
  with pytest.raises(RuntimeError, match="Data!A2 -> Data!A2"):
1192
1299
  module.calculate({"Data!B1": "skip", "Data!B2": "x"})
1300
+
1301
+
1302
+ def test_generate_python_module_reuses_range_views_without_changing_results(tmp_path: Path) -> None:
1303
+ contract = GeneratedModuleContract(
1304
+ workbook_id="range-cache.xlsx",
1305
+ module_name="range_cache",
1306
+ input_refs=("Data!A1", "Data!A2", "Data!B1", "Data!B2"),
1307
+ output_refs=("Calc!C1", "Calc!C2", "Calc!C3"),
1308
+ symbols=(
1309
+ GeneratedSymbol(cell_ref="Data!A1", symbol_name="data_a1", kind="input"),
1310
+ GeneratedSymbol(cell_ref="Data!A2", symbol_name="data_a2", kind="input"),
1311
+ GeneratedSymbol(cell_ref="Data!B1", symbol_name="data_b1", kind="input"),
1312
+ GeneratedSymbol(cell_ref="Data!B2", symbol_name="data_b2", kind="input"),
1313
+ GeneratedSymbol(cell_ref="Calc!C1", symbol_name="calc_c1", kind="output", raw_formula='=SUMIFS(A1:A2,B1:B2,"x")'),
1314
+ GeneratedSymbol(cell_ref="Calc!C2", symbol_name="calc_c2", kind="output", raw_formula='=SUMIFS(A1:A2,B1:B2,"y")'),
1315
+ GeneratedSymbol(cell_ref="Calc!C3", symbol_name="calc_c3", kind="output", raw_formula='=COUNTIFS(B1:B2,"x")'),
1316
+ ),
1317
+ )
1318
+ amount_range = normalize_reference("Data!A1:A2")
1319
+ label_range = normalize_reference("Data!B1:B2")
1320
+ expressions = {
1321
+ "Calc!C1": formula_expression(
1322
+ "Calc!C1",
1323
+ '=SUMIFS(A1:A2,B1:B2,"x")',
1324
+ FormulaExpressionNode.function_call(
1325
+ "SUMIFS",
1326
+ (
1327
+ FormulaExpressionNode.reference_to(amount_range),
1328
+ FormulaExpressionNode.reference_to(label_range),
1329
+ FormulaExpressionNode.literal("x"),
1330
+ ),
1331
+ ),
1332
+ ),
1333
+ "Calc!C2": formula_expression(
1334
+ "Calc!C2",
1335
+ '=SUMIFS(A1:A2,B1:B2,"y")',
1336
+ FormulaExpressionNode.function_call(
1337
+ "SUMIFS",
1338
+ (
1339
+ FormulaExpressionNode.reference_to(amount_range),
1340
+ FormulaExpressionNode.reference_to(label_range),
1341
+ FormulaExpressionNode.literal("y"),
1342
+ ),
1343
+ ),
1344
+ ),
1345
+ "Calc!C3": formula_expression(
1346
+ "Calc!C3",
1347
+ '=COUNTIFS(B1:B2,"x")',
1348
+ FormulaExpressionNode.function_call(
1349
+ "COUNTIFS",
1350
+ (
1351
+ FormulaExpressionNode.reference_to(label_range),
1352
+ FormulaExpressionNode.literal("x"),
1353
+ ),
1354
+ ),
1355
+ ),
1356
+ }
1357
+ output_path = tmp_path / "generated_range_cache.py"
1358
+
1359
+ result = generate_python_module(
1360
+ contract=contract,
1361
+ expressions=expressions,
1362
+ constants={"Data!A1": 2, "Data!A2": 5, "Data!B1": "x", "Data!B2": "y"},
1363
+ output_path=output_path,
1364
+ )
1365
+ module = load_module(output_path)
1366
+
1367
+ assert result.generated is True
1368
+ assert "_range_cache.get(key)" in result.source_code
1369
+ assert "def values(self):" in result.source_code
1370
+ assert "def lazy_values(self):" in result.source_code
1371
+ assert module.calculate() == {"Calc!C1": 2, "Calc!C2": 5, "Calc!C3": 1}
File without changes
File without changes