modelwright 0.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,726 @@
1
+ """Generated Python module contracts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from collections.abc import Mapping, Sequence
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
+ from typing import Any, Literal
10
+
11
+ from openpyxl.utils.cell import get_column_letter, range_boundaries
12
+
13
+ from modelwright.extraction import WorkbookRecord
14
+ from modelwright.formulas import FormulaExpression, FormulaExpressionNode
15
+ from modelwright.graph import DependencyGraph
16
+
17
+
18
+ JsonValue = str | int | float | bool | None | list[Any] | dict[str, Any]
19
+ DiagnosticSeverity = Literal["info", "warning", "error"]
20
+ GeneratedSymbolKind = Literal["input", "intermediate", "output"]
21
+
22
+
23
+ @dataclass(frozen=True)
24
+ class GenerationDiagnostic:
25
+ """Generation concern tied to workbook or generated-code provenance."""
26
+
27
+ code: str
28
+ message: str
29
+ severity: DiagnosticSeverity = "warning"
30
+ location: str | None = None
31
+ raw_value: JsonValue = None
32
+
33
+ @classmethod
34
+ def from_dict(cls, data: dict[str, Any]) -> "GenerationDiagnostic":
35
+ return cls(
36
+ code=data["code"],
37
+ message=data["message"],
38
+ severity=data.get("severity", "warning"),
39
+ location=data.get("location"),
40
+ raw_value=data.get("raw_value"),
41
+ )
42
+
43
+ def to_dict(self) -> dict[str, JsonValue]:
44
+ return {
45
+ "code": self.code,
46
+ "message": self.message,
47
+ "severity": self.severity,
48
+ "location": self.location,
49
+ "raw_value": self.raw_value,
50
+ }
51
+
52
+
53
+ @dataclass(frozen=True)
54
+ class GeneratedSymbol:
55
+ """Generated Python symbol tied back to workbook provenance."""
56
+
57
+ cell_ref: str
58
+ symbol_name: str
59
+ kind: GeneratedSymbolKind
60
+ raw_formula: str | None = None
61
+
62
+ @classmethod
63
+ def from_dict(cls, data: dict[str, Any]) -> "GeneratedSymbol":
64
+ return cls(
65
+ cell_ref=data["cell_ref"],
66
+ symbol_name=data["symbol_name"],
67
+ kind=data["kind"],
68
+ raw_formula=data.get("raw_formula"),
69
+ )
70
+
71
+ def to_dict(self) -> dict[str, JsonValue]:
72
+ return {
73
+ "cell_ref": self.cell_ref,
74
+ "symbol_name": self.symbol_name,
75
+ "kind": self.kind,
76
+ "raw_formula": self.raw_formula,
77
+ }
78
+
79
+
80
+ @dataclass(frozen=True)
81
+ class GeneratedModuleContract:
82
+ """Contract for one generated standalone Python module."""
83
+
84
+ workbook_id: str
85
+ module_name: str
86
+ entrypoint: str = "calculate"
87
+ input_refs: tuple[str, ...] = field(default_factory=tuple)
88
+ output_refs: tuple[str, ...] = field(default_factory=tuple)
89
+ symbols: tuple[GeneratedSymbol, ...] = field(default_factory=tuple)
90
+ include_provenance_comments: bool = True
91
+
92
+ @classmethod
93
+ def from_dict(cls, data: dict[str, Any]) -> "GeneratedModuleContract":
94
+ return cls(
95
+ workbook_id=data["workbook_id"],
96
+ module_name=data["module_name"],
97
+ entrypoint=data.get("entrypoint", "calculate"),
98
+ input_refs=tuple(data.get("input_refs", [])),
99
+ output_refs=tuple(data.get("output_refs", [])),
100
+ symbols=tuple(GeneratedSymbol.from_dict(item) for item in data.get("symbols", [])),
101
+ include_provenance_comments=data.get("include_provenance_comments", True),
102
+ )
103
+
104
+ def to_dict(self) -> dict[str, JsonValue]:
105
+ return {
106
+ "workbook_id": self.workbook_id,
107
+ "module_name": self.module_name,
108
+ "entrypoint": self.entrypoint,
109
+ "input_refs": list(self.input_refs),
110
+ "output_refs": list(self.output_refs),
111
+ "symbols": [symbol.to_dict() for symbol in self.symbols],
112
+ "include_provenance_comments": self.include_provenance_comments,
113
+ }
114
+
115
+
116
+ @dataclass(frozen=True)
117
+ class GenerationResult:
118
+ """Result of generating one Python module."""
119
+
120
+ contract: GeneratedModuleContract
121
+ source_code: str = ""
122
+ diagnostics: tuple[GenerationDiagnostic, ...] = field(default_factory=tuple)
123
+
124
+ @property
125
+ def generated(self) -> bool:
126
+ return bool(self.source_code) and not any(diagnostic.severity == "error" for diagnostic in self.diagnostics)
127
+
128
+ @classmethod
129
+ def from_dict(cls, data: dict[str, Any]) -> "GenerationResult":
130
+ return cls(
131
+ contract=GeneratedModuleContract.from_dict(data["contract"]),
132
+ source_code=data.get("source_code", ""),
133
+ diagnostics=tuple(GenerationDiagnostic.from_dict(item) for item in data.get("diagnostics", [])),
134
+ )
135
+
136
+ def to_dict(self) -> dict[str, JsonValue]:
137
+ return {
138
+ "contract": self.contract.to_dict(),
139
+ "source_code": self.source_code,
140
+ "generated": self.generated,
141
+ "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
142
+ }
143
+
144
+
145
+ @dataclass(frozen=True)
146
+ class GeneratedContractInferenceResult:
147
+ """Generated-model contract inference result for selected workbook outputs."""
148
+
149
+ contract: GeneratedModuleContract
150
+ expressions: dict[str, FormulaExpression] = field(default_factory=dict)
151
+ constants: dict[str, JsonValue] = field(default_factory=dict)
152
+ diagnostics: tuple[GenerationDiagnostic, ...] = field(default_factory=tuple)
153
+
154
+ @property
155
+ def inferred(self) -> bool:
156
+ return not any(diagnostic.severity == "error" for diagnostic in self.diagnostics)
157
+
158
+ def to_dict(self) -> dict[str, JsonValue]:
159
+ return {
160
+ "contract": self.contract.to_dict(),
161
+ "expressions": {cell_ref: expression.to_dict() for cell_ref, expression in self.expressions.items()},
162
+ "constants": self.constants,
163
+ "diagnostics": [diagnostic.to_dict() for diagnostic in self.diagnostics],
164
+ "inferred": self.inferred,
165
+ }
166
+
167
+
168
+ def symbol_name_for_cell_ref(cell_ref: str) -> str:
169
+ """Build a stable Python identifier from a canonical workbook cell ref."""
170
+
171
+ symbol = re.sub(r"[^0-9A-Za-z_]+", "_", cell_ref).strip("_").lower()
172
+ if not symbol:
173
+ return "cell"
174
+ if symbol[0].isdigit():
175
+ return f"cell_{symbol}"
176
+ return symbol
177
+
178
+
179
+ def infer_generated_module_contract(
180
+ *,
181
+ workbook: WorkbookRecord,
182
+ graph: DependencyGraph,
183
+ expressions: Mapping[str, FormulaExpression],
184
+ output_refs: Sequence[str],
185
+ module_name: str,
186
+ input_refs: Sequence[str] = (),
187
+ ) -> GeneratedContractInferenceResult:
188
+ """Infer a generated module contract by walking dependencies for selected outputs."""
189
+
190
+ explicit_inputs = set(input_refs)
191
+ selected_outputs = tuple(output_refs)
192
+ cell_by_ref = {cell.cell_ref: cell for cell in workbook.cells}
193
+ dependencies_by_target: dict[str, list[str]] = {}
194
+ edge_diagnostics_by_target: dict[str, list[GenerationDiagnostic]] = {}
195
+ diagnostics: list[GenerationDiagnostic] = []
196
+
197
+ for edge in graph.execution_edges:
198
+ if edge.diagnostic_code is not None:
199
+ edge_diagnostics_by_target.setdefault(edge.target.normalized, []).append(
200
+ GenerationDiagnostic(
201
+ code="unsupported_dependency_edge",
202
+ message="dependency edge has a diagnostic and cannot be inferred silently",
203
+ severity="error",
204
+ location=edge.target.normalized,
205
+ raw_value=edge.diagnostic_code,
206
+ )
207
+ )
208
+ continue
209
+ if edge.source.kind != "cell":
210
+ edge_diagnostics_by_target.setdefault(edge.target.normalized, []).append(
211
+ GenerationDiagnostic(
212
+ code="unsupported_dependency_source",
213
+ message="dependency source is not a concrete cell reference",
214
+ severity="error",
215
+ location=edge.target.normalized,
216
+ raw_value=edge.source.normalized,
217
+ )
218
+ )
219
+ continue
220
+ dependencies_by_target.setdefault(edge.target.normalized, []).append(edge.source.normalized)
221
+
222
+ input_order: list[str] = []
223
+ formula_order: list[str] = []
224
+ visiting: set[str] = set()
225
+ visited: set[str] = set()
226
+
227
+ def visit(cell_ref: str) -> None:
228
+ if cell_ref in visited:
229
+ return
230
+ if cell_ref in visiting:
231
+ diagnostics.append(
232
+ GenerationDiagnostic(
233
+ code="circular_dependency",
234
+ message="selected output dependency walk encountered a cycle",
235
+ severity="error",
236
+ location=cell_ref,
237
+ )
238
+ )
239
+ return
240
+
241
+ cell = cell_by_ref.get(cell_ref)
242
+ if cell is None:
243
+ diagnostics.append(
244
+ GenerationDiagnostic(
245
+ code="missing_dependency_cell",
246
+ message="selected output depends on a cell that was not extracted",
247
+ severity="error",
248
+ location=cell_ref,
249
+ )
250
+ )
251
+ return
252
+
253
+ if cell_ref in explicit_inputs or cell.formula is None:
254
+ if cell_ref not in input_order:
255
+ input_order.append(cell_ref)
256
+ visited.add(cell_ref)
257
+ return
258
+
259
+ visiting.add(cell_ref)
260
+ diagnostics.extend(edge_diagnostics_by_target.get(cell_ref, ()))
261
+ for dependency_ref in dependencies_by_target.get(cell_ref, []):
262
+ visit(dependency_ref)
263
+ visiting.remove(cell_ref)
264
+
265
+ if cell_ref not in formula_order:
266
+ formula_order.append(cell_ref)
267
+ visited.add(cell_ref)
268
+
269
+ for output_ref in selected_outputs:
270
+ visit(output_ref)
271
+
272
+ selected_expressions: dict[str, FormulaExpression] = {}
273
+ for cell_ref in formula_order:
274
+ expression = expressions.get(cell_ref)
275
+ if expression is None:
276
+ diagnostics.append(
277
+ GenerationDiagnostic(
278
+ code="missing_formula_expression",
279
+ message="inferred generated symbol has no translated formula expression",
280
+ severity="error",
281
+ location=cell_ref,
282
+ )
283
+ )
284
+ continue
285
+ selected_expressions[cell_ref] = expression
286
+
287
+ constants = {cell_ref: cell_by_ref[cell_ref].raw_value for cell_ref in input_order if cell_ref in cell_by_ref}
288
+ output_set = set(selected_outputs)
289
+ symbols = tuple(
290
+ GeneratedSymbol(cell_ref=cell_ref, symbol_name=symbol_name_for_cell_ref(cell_ref), kind="input")
291
+ for cell_ref in input_order
292
+ ) + tuple(
293
+ GeneratedSymbol(
294
+ cell_ref=cell_ref,
295
+ symbol_name=symbol_name_for_cell_ref(cell_ref),
296
+ kind="output" if cell_ref in output_set else "intermediate",
297
+ raw_formula=cell_by_ref[cell_ref].formula.raw_formula if cell_by_ref[cell_ref].formula else None,
298
+ )
299
+ for cell_ref in formula_order
300
+ )
301
+ contract = GeneratedModuleContract(
302
+ workbook_id=workbook.workbook_id,
303
+ module_name=module_name,
304
+ input_refs=tuple(input_order),
305
+ output_refs=selected_outputs,
306
+ symbols=symbols,
307
+ )
308
+ return GeneratedContractInferenceResult(
309
+ contract=contract,
310
+ expressions=selected_expressions,
311
+ constants=constants,
312
+ diagnostics=tuple(diagnostics),
313
+ )
314
+
315
+
316
+ def generate_python_module(
317
+ *,
318
+ contract: GeneratedModuleContract,
319
+ expressions: Mapping[str, FormulaExpression],
320
+ constants: Mapping[str, JsonValue] | None = None,
321
+ output_path: str | Path | None = None,
322
+ ) -> GenerationResult:
323
+ """Generate standalone Python source from translated formula expressions."""
324
+
325
+ constants = constants or {}
326
+ diagnostics = _generation_diagnostics(contract, expressions)
327
+ if any(diagnostic.severity == "error" for diagnostic in diagnostics):
328
+ return GenerationResult(contract=contract, diagnostics=tuple(diagnostics))
329
+
330
+ source_code = _render_module(contract=contract, expressions=expressions, constants=constants)
331
+ if output_path is not None:
332
+ path = Path(output_path)
333
+ path.parent.mkdir(parents=True, exist_ok=True)
334
+ path.write_text(source_code, encoding="utf-8")
335
+
336
+ return GenerationResult(contract=contract, source_code=source_code, diagnostics=tuple(diagnostics))
337
+
338
+
339
+ def _generation_diagnostics(
340
+ contract: GeneratedModuleContract,
341
+ expressions: Mapping[str, FormulaExpression],
342
+ ) -> list[GenerationDiagnostic]:
343
+ diagnostics: list[GenerationDiagnostic] = []
344
+ for symbol in contract.symbols:
345
+ if symbol.kind == "input":
346
+ continue
347
+
348
+ expression = expressions.get(symbol.cell_ref)
349
+ if expression is None:
350
+ diagnostics.append(
351
+ GenerationDiagnostic(
352
+ code="missing_formula_expression",
353
+ message="generated symbol has no translated formula expression",
354
+ severity="error",
355
+ location=symbol.cell_ref,
356
+ raw_value=symbol.raw_formula,
357
+ )
358
+ )
359
+ continue
360
+
361
+ if not expression.translated:
362
+ diagnostics.append(
363
+ GenerationDiagnostic(
364
+ code="unsupported_formula",
365
+ message="formula expression could not be generated",
366
+ severity="error",
367
+ location=symbol.cell_ref,
368
+ raw_value=expression.raw_formula,
369
+ )
370
+ )
371
+ return diagnostics
372
+
373
+
374
+ def _render_module(
375
+ *,
376
+ contract: GeneratedModuleContract,
377
+ expressions: Mapping[str, FormulaExpression],
378
+ constants: Mapping[str, JsonValue],
379
+ ) -> str:
380
+ lines = [
381
+ '"""Generated Modelwright model.',
382
+ "",
383
+ f"Source workbook: {contract.workbook_id}",
384
+ '"""',
385
+ "",
386
+ "import fnmatch",
387
+ "",
388
+ "",
389
+ "def _sf_flatten(values):",
390
+ " for value in values:",
391
+ " if isinstance(value, (list, tuple)):",
392
+ " yield from _sf_flatten(value)",
393
+ " else:",
394
+ " yield value",
395
+ "",
396
+ "",
397
+ "def _sf_average(values):",
398
+ " values = list(values)",
399
+ " return sum(values) / len(values)",
400
+ "",
401
+ "",
402
+ "def _sf_iferror(value_fn, fallback):",
403
+ " try:",
404
+ " return value_fn()",
405
+ " except Exception:",
406
+ " return fallback",
407
+ "",
408
+ "",
409
+ "def _sf_ifna(value_fn, fallback):",
410
+ " try:",
411
+ " value = value_fn()",
412
+ " except LookupError:",
413
+ " return fallback",
414
+ " if value == '#N/A':",
415
+ " return fallback",
416
+ " return value",
417
+ "",
418
+ "",
419
+ "def _sf_coerce_criteria(raw, sample):",
420
+ " if isinstance(raw, str):",
421
+ " upper = raw.upper()",
422
+ " if upper == 'TRUE':",
423
+ " return True",
424
+ " if upper == 'FALSE':",
425
+ " return False",
426
+ " try:",
427
+ " number = float(raw)",
428
+ " except ValueError:",
429
+ " return raw",
430
+ " if number.is_integer():",
431
+ " return int(number)",
432
+ " return number",
433
+ " return raw",
434
+ "",
435
+ "",
436
+ "def _sf_compare_criteria(value, operator, expected):",
437
+ " if operator == '=':",
438
+ " return value == expected",
439
+ " if operator == '<>':",
440
+ " return value != expected",
441
+ " if operator == '>':",
442
+ " return value > expected",
443
+ " if operator == '>=':",
444
+ " return value >= expected",
445
+ " if operator == '<':",
446
+ " return value < expected",
447
+ " if operator == '<=':",
448
+ " return value <= expected",
449
+ " raise ValueError(f'unsupported criteria operator: {operator}')",
450
+ "",
451
+ "",
452
+ "def _sf_matches_criteria(value, criteria):",
453
+ " if isinstance(criteria, str):",
454
+ " for operator in ('>=', '<=', '<>', '>', '<', '='):",
455
+ " if criteria.startswith(operator):",
456
+ " expected = _sf_coerce_criteria(criteria[len(operator):], value)",
457
+ " return _sf_compare_criteria(value, operator, expected)",
458
+ " if '*' in criteria or '?' in criteria:",
459
+ " return fnmatch.fnmatchcase(str(value), criteria)",
460
+ " return value == criteria",
461
+ "",
462
+ "",
463
+ "def _sf_sumif(criteria_range, criteria, sum_range=None):",
464
+ " criteria_values = tuple(_sf_flatten((criteria_range,)))",
465
+ " sum_values = criteria_values if sum_range is None else tuple(_sf_flatten((sum_range,)))",
466
+ " return sum(",
467
+ " sum_value",
468
+ " for criteria_value, sum_value in zip(criteria_values, sum_values)",
469
+ " if _sf_matches_criteria(criteria_value, criteria)",
470
+ " )",
471
+ "",
472
+ "",
473
+ "def _sf_countif(criteria_range, criteria):",
474
+ " return sum(1 for value in _sf_flatten((criteria_range,)) if _sf_matches_criteria(value, criteria))",
475
+ "",
476
+ "",
477
+ "def _sf_sumifs(sum_range, *criteria_pairs):",
478
+ " sum_values = tuple(_sf_flatten((sum_range,)))",
479
+ " criteria_ranges = [tuple(_sf_flatten((criteria_range,))) for criteria_range, _criteria in criteria_pairs]",
480
+ " total = 0",
481
+ " for index, sum_value in enumerate(sum_values):",
482
+ " if all(_sf_matches_criteria(criteria_range[index], criteria) for criteria_range, criteria in zip(criteria_ranges, (criteria for _range, criteria in criteria_pairs))):",
483
+ " total += sum_value",
484
+ " return total",
485
+ "",
486
+ "",
487
+ "def _sf_countifs(*criteria_pairs):",
488
+ " criteria_ranges = [tuple(_sf_flatten((criteria_range,))) for criteria_range, _criteria in criteria_pairs]",
489
+ " if not criteria_ranges:",
490
+ " return 0",
491
+ " criteria_values = tuple(criteria for _range, criteria in criteria_pairs)",
492
+ " return sum(",
493
+ " 1",
494
+ " for index in range(len(criteria_ranges[0]))",
495
+ " if all(_sf_matches_criteria(criteria_range[index], criteria) for criteria_range, criteria in zip(criteria_ranges, criteria_values))",
496
+ " )",
497
+ "",
498
+ "",
499
+ "def _sf_range_lookup_enabled(range_lookup):",
500
+ " if isinstance(range_lookup, str):",
501
+ " return range_lookup.upper() not in {'FALSE', '0'}",
502
+ " return bool(range_lookup)",
503
+ "",
504
+ "",
505
+ "def _sf_vlookup(lookup_value, table_array, col_index_num, range_lookup=True):",
506
+ " column_index = int(col_index_num) - 1",
507
+ " if column_index < 0:",
508
+ " raise ValueError('VLOOKUP column index must be one-based')",
509
+ " rows = tuple(tuple(row) for row in table_array)",
510
+ " if not rows:",
511
+ " raise LookupError('VLOOKUP table is empty')",
512
+ " if any(column_index >= len(row) for row in rows):",
513
+ " raise IndexError('VLOOKUP column index is outside the table')",
514
+ " if not _sf_range_lookup_enabled(range_lookup):",
515
+ " for row in rows:",
516
+ " if row[0] == lookup_value:",
517
+ " return row[column_index]",
518
+ " raise LookupError('VLOOKUP exact match not found')",
519
+ " candidate = None",
520
+ " for row in rows:",
521
+ " try:",
522
+ " matched = row[0] <= lookup_value",
523
+ " except TypeError:",
524
+ " continue",
525
+ " if matched:",
526
+ " candidate = row",
527
+ " else:",
528
+ " break",
529
+ " if candidate is None:",
530
+ " raise LookupError('VLOOKUP approximate match not found')",
531
+ " return candidate[column_index]",
532
+ "",
533
+ "",
534
+ f"def {contract.entrypoint}(inputs=None):",
535
+ " inputs = {} if inputs is None else dict(inputs)",
536
+ ]
537
+
538
+ for symbol in contract.symbols:
539
+ if contract.include_provenance_comments:
540
+ lines.append(f" # {symbol.cell_ref}" + (f": {symbol.raw_formula}" if symbol.raw_formula else ""))
541
+
542
+ if symbol.kind == "input":
543
+ default_value = constants.get(symbol.cell_ref)
544
+ lines.append(
545
+ f" {symbol.symbol_name} = inputs.get({symbol.cell_ref!r}, {default_value!r})"
546
+ )
547
+ continue
548
+
549
+ expression = expressions[symbol.cell_ref]
550
+ lines.append(f" {symbol.symbol_name} = {_render_expression(expression.root)}")
551
+
552
+ lines.append(" return {")
553
+ for output_ref in contract.output_refs:
554
+ lines.append(f" {output_ref!r}: {symbol_name_for_cell_ref(output_ref)},")
555
+ lines.append(" }")
556
+ lines.append("")
557
+ return "\n".join(lines)
558
+
559
+
560
+ def _render_expression(node: FormulaExpressionNode | None) -> str:
561
+ if node is None:
562
+ raise ValueError("cannot render missing formula expression root")
563
+
564
+ if node.kind == "literal":
565
+ return repr(node.value)
566
+ if node.kind == "reference":
567
+ if node.reference is None:
568
+ raise ValueError("cannot render reference expression without reference")
569
+ if node.reference.kind == "range":
570
+ return _render_range_reference(node.reference)
571
+ return symbol_name_for_cell_ref(node.reference.normalized)
572
+ if node.kind == "unary":
573
+ (operand,) = node.operands
574
+ if node.operator == "-":
575
+ return f"(-{_render_expression(operand)})"
576
+ raise ValueError(f"unsupported unary operator: {node.operator}")
577
+ if node.kind == "binary":
578
+ left, right = node.operands
579
+ if node.operator == "^":
580
+ return f"({_render_expression(left)} ** {_render_expression(right)})"
581
+ if node.operator == "&":
582
+ return f"(str({_render_expression(left)}) + str({_render_expression(right)}))"
583
+ return f"({_render_expression(left)} {node.operator} {_render_expression(right)})"
584
+ if node.kind == "comparison":
585
+ left, right = node.operands
586
+ operator = _python_comparison_operator(node.operator)
587
+ return f"({_render_expression(left)} {operator} {_render_expression(right)})"
588
+ if node.kind == "function_call":
589
+ return _render_function_call(node)
590
+
591
+ raise ValueError(f"unsupported expression kind: {node.kind}")
592
+
593
+
594
+ def _render_function_call(node: FormulaExpressionNode) -> str:
595
+ if node.function_name == "ROUND":
596
+ if len(node.operands) != 2:
597
+ raise ValueError("ROUND requires two operands")
598
+ return f"round({_render_expression(node.operands[0])}, {_render_expression(node.operands[1])})"
599
+ if node.function_name == "IF":
600
+ if len(node.operands) != 3:
601
+ raise ValueError("IF requires three operands")
602
+ condition, true_value, false_value = node.operands
603
+ return f"({_render_expression(true_value)} if {_render_expression(condition)} else {_render_expression(false_value)})"
604
+ if node.function_name == "IFERROR":
605
+ if len(node.operands) != 2:
606
+ raise ValueError("IFERROR requires two operands")
607
+ value, fallback = node.operands
608
+ return f"_sf_iferror(lambda: {_render_expression(value)}, {_render_expression(fallback)})"
609
+ if node.function_name == "IFNA":
610
+ if len(node.operands) != 2:
611
+ raise ValueError("IFNA requires two operands")
612
+ value, fallback = node.operands
613
+ return f"_sf_ifna(lambda: {_render_expression(value)}, {_render_expression(fallback)})"
614
+ if node.function_name == "AND":
615
+ return f"all(_sf_flatten({_render_argument_tuple(node.operands)}))"
616
+ if node.function_name == "OR":
617
+ return f"any(_sf_flatten({_render_argument_tuple(node.operands)}))"
618
+ if node.function_name == "SUM":
619
+ return f"sum(_sf_flatten({_render_argument_tuple(node.operands)}))"
620
+ if node.function_name == "MIN":
621
+ return f"min(_sf_flatten({_render_argument_tuple(node.operands)}))"
622
+ if node.function_name == "MAX":
623
+ return f"max(_sf_flatten({_render_argument_tuple(node.operands)}))"
624
+ if node.function_name == "AVERAGE":
625
+ return f"_sf_average(_sf_flatten({_render_argument_tuple(node.operands)}))"
626
+ if node.function_name == "CONCATENATE":
627
+ return f"''.join(str(value) for value in _sf_flatten({_render_argument_tuple(node.operands)}))"
628
+ if node.function_name == "SUMIF":
629
+ if len(node.operands) not in {2, 3}:
630
+ raise ValueError("SUMIF requires two or three operands")
631
+ return f"_sf_sumif({_render_function_arguments(node.operands)})"
632
+ if node.function_name == "COUNTIF":
633
+ if len(node.operands) != 2:
634
+ raise ValueError("COUNTIF requires two operands")
635
+ return f"_sf_countif({_render_function_arguments(node.operands)})"
636
+ if node.function_name == "SUMIFS":
637
+ if len(node.operands) < 3 or len(node.operands) % 2 != 1:
638
+ raise ValueError("SUMIFS requires a sum range followed by criteria range/criteria pairs")
639
+ return f"_sf_sumifs({_render_criteria_function_arguments(node.operands)})"
640
+ if node.function_name == "COUNTIFS":
641
+ if len(node.operands) < 2 or len(node.operands) % 2 != 0:
642
+ raise ValueError("COUNTIFS requires criteria range/criteria pairs")
643
+ return f"_sf_countifs({_render_criteria_function_arguments(node.operands)})"
644
+ if node.function_name == "VLOOKUP":
645
+ if len(node.operands) not in {3, 4}:
646
+ raise ValueError("VLOOKUP requires three or four operands")
647
+ lookup_value, table_array, col_index_num, *range_lookup = node.operands
648
+ rendered_arguments = [
649
+ _render_expression(lookup_value),
650
+ _render_table_array(table_array),
651
+ _render_expression(col_index_num),
652
+ ]
653
+ if range_lookup:
654
+ rendered_arguments.append(_render_expression(range_lookup[0]))
655
+ return f"_sf_vlookup({', '.join(rendered_arguments)})"
656
+ raise ValueError(f"unsupported function call: {node.function_name}")
657
+
658
+
659
+ def _render_function_arguments(operands: tuple[FormulaExpressionNode, ...]) -> str:
660
+ return ", ".join(_render_expression(operand) for operand in operands)
661
+
662
+
663
+ def _render_criteria_function_arguments(operands: tuple[FormulaExpressionNode, ...]) -> str:
664
+ if len(operands) < 2:
665
+ return ""
666
+
667
+ rendered: list[str] = []
668
+ if len(operands) % 2 == 1:
669
+ rendered.append(_render_expression(operands[0]))
670
+ pair_operands = operands[1:]
671
+ else:
672
+ pair_operands = operands
673
+
674
+ rendered.extend(
675
+ f"({_render_expression(pair_operands[index])}, {_render_expression(pair_operands[index + 1])})"
676
+ for index in range(0, len(pair_operands), 2)
677
+ )
678
+ return ", ".join(rendered)
679
+
680
+
681
+ def _render_argument_tuple(operands: tuple[FormulaExpressionNode, ...]) -> str:
682
+ rendered = ", ".join(_render_expression(operand) for operand in operands)
683
+ if len(operands) == 1:
684
+ rendered = f"{rendered},"
685
+ return f"({rendered})"
686
+
687
+
688
+ def _render_range_reference(reference) -> str:
689
+ if reference.sheet is None or reference.start_cell is None or reference.end_cell is None:
690
+ raise ValueError(f"cannot render incomplete range reference: {reference.normalized}")
691
+
692
+ min_col, min_row, max_col, max_row = range_boundaries(f"{reference.start_cell}:{reference.end_cell}")
693
+ rendered_cells = [
694
+ symbol_name_for_cell_ref(f"{reference.sheet}!{get_column_letter(column)}{row}")
695
+ for row in range(min_row, max_row + 1)
696
+ for column in range(min_col, max_col + 1)
697
+ ]
698
+ return f"({', '.join(rendered_cells)}{',' if len(rendered_cells) == 1 else ''})"
699
+
700
+
701
+ def _render_table_array(node: FormulaExpressionNode) -> str:
702
+ if node.kind != "reference" or node.reference is None or node.reference.kind != "range":
703
+ raise ValueError("VLOOKUP table array must be a concrete range reference")
704
+ reference = node.reference
705
+ if reference.sheet is None or reference.start_cell is None or reference.end_cell is None:
706
+ raise ValueError(f"cannot render incomplete VLOOKUP table reference: {reference.normalized}")
707
+
708
+ min_col, min_row, max_col, max_row = range_boundaries(f"{reference.start_cell}:{reference.end_cell}")
709
+ rendered_rows = []
710
+ for row in range(min_row, max_row + 1):
711
+ rendered_cells = [
712
+ symbol_name_for_cell_ref(f"{reference.sheet}!{get_column_letter(column)}{row}")
713
+ for column in range(min_col, max_col + 1)
714
+ ]
715
+ rendered_rows.append(f"({', '.join(rendered_cells)}{',' if len(rendered_cells) == 1 else ''})")
716
+ return f"({', '.join(rendered_rows)}{',' if len(rendered_rows) == 1 else ''})"
717
+
718
+
719
+ def _python_comparison_operator(operator: str | None) -> str:
720
+ if operator == "=":
721
+ return "=="
722
+ if operator == "<>":
723
+ return "!="
724
+ if operator is None:
725
+ raise ValueError("missing comparison operator")
726
+ return operator