additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (121) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -177
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -352
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/deduce.py +0 -259
  100. additory/synthetic/distributions.py +0 -22
  101. additory/synthetic/forecast.py +0 -1132
  102. additory/synthetic/linked_list_parser.py +0 -415
  103. additory/synthetic/namespace_lookup.py +0 -129
  104. additory/synthetic/smote.py +0 -320
  105. additory/synthetic/strategies.py +0 -926
  106. additory/synthetic/synthesizer.py +0 -713
  107. additory/utilities/__init__.py +0 -53
  108. additory/utilities/encoding.py +0 -600
  109. additory/utilities/games.py +0 -300
  110. additory/utilities/keys.py +0 -8
  111. additory/utilities/lookup.py +0 -103
  112. additory/utilities/matchers.py +0 -216
  113. additory/utilities/resolvers.py +0 -286
  114. additory/utilities/settings.py +0 -167
  115. additory/utilities/units.py +0 -749
  116. additory/utilities/validators.py +0 -153
  117. additory-0.1.0a4.dist-info/METADATA +0 -311
  118. additory-0.1.0a4.dist-info/RECORD +0 -72
  119. additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
  120. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  121. {additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,380 @@
1
+ """
2
+ Result wrapper classes for Additory operations.
3
+
4
+ Provides DataFrameResult and AnalysisResult classes that wrap
5
+ operation results with metadata and helper methods.
6
+ """
7
+
8
+ from typing import Any, Dict, List, Optional
9
+ import polars as pl
10
+ import json
11
+ import time
12
+
13
+
14
+ class DataFrameResult:
15
+ """
16
+ Enhanced DataFrame wrapper with metadata and helper methods.
17
+
18
+ Used by: to, transform, snapshot, synthetic, expressions functions
19
+
20
+ Attributes:
21
+ df: Polars DataFrame (the actual result)
22
+ metadata: Dictionary of operation metadata
23
+ operation: Operation name ('to', 'transform', 'snapshot', 'synthetic')
24
+ input_shape: Original DataFrame shape
25
+ output_shape: Result DataFrame shape
26
+ columns_added: List of columns added
27
+ columns_removed: List of columns removed
28
+ execution_time: Time taken for operation
29
+ """
30
+
31
+ def __init__(self, df: pl.DataFrame, operation: str, metadata: Dict[str, Any]):
32
+ """
33
+ Initialize result wrapper.
34
+
35
+ Args:
36
+ df: Result DataFrame
37
+ operation: Operation name
38
+ metadata: Operation metadata
39
+ """
40
+ self.df = df
41
+ self.operation = operation
42
+ self.metadata = metadata
43
+
44
+ # Extract common metadata
45
+ self.input_shape = metadata.get('input_shape', (0, 0))
46
+ self.output_shape = (df.height, df.width)
47
+ self.columns_added = metadata.get('columns_added', [])
48
+ self.columns_removed = metadata.get('columns_removed', [])
49
+ self.execution_time = metadata.get('execution_time', 0.0)
50
+
51
+ def info(self) -> Dict[str, Any]:
52
+ """
53
+ Get summary information about the result.
54
+
55
+ Returns:
56
+ Dictionary with shape, columns, operation info
57
+
58
+ Example:
59
+ result = add.to(df, ...)
60
+ print(result.info())
61
+ # {'operation': 'to', 'rows': 1000, 'columns': 15, ...}
62
+ """
63
+ return {
64
+ 'operation': self.operation,
65
+ 'rows': self.output_shape[0],
66
+ 'columns': self.output_shape[1],
67
+ 'columns_added': self.columns_added,
68
+ 'columns_removed': self.columns_removed,
69
+ 'execution_time': self.execution_time,
70
+ 'input_shape': self.input_shape,
71
+ 'output_shape': self.output_shape,
72
+ 'metadata': self.metadata
73
+ }
74
+
75
+ def summary(self) -> str:
76
+ """
77
+ Get human-readable summary of the operation.
78
+
79
+ Returns:
80
+ Formatted string summary
81
+
82
+ Example:
83
+ result = add.to(df, ...)
84
+ print(result.summary())
85
+ # "Added 1 column (price) to 1000 rows in 0.05s"
86
+ """
87
+ parts = []
88
+
89
+ # Operation name
90
+ parts.append(f"Operation: add.{self.operation}()")
91
+
92
+ # Columns added
93
+ if self.columns_added:
94
+ cols_str = ', '.join(self.columns_added)
95
+ parts.append(f"Added {len(self.columns_added)} column(s): {cols_str}")
96
+
97
+ # Columns removed
98
+ if self.columns_removed:
99
+ cols_str = ', '.join(self.columns_removed)
100
+ parts.append(f"Removed {len(self.columns_removed)} column(s): {cols_str}")
101
+
102
+ # Shape change
103
+ if self.input_shape != self.output_shape:
104
+ parts.append(
105
+ f"Shape: {self.input_shape[0]}x{self.input_shape[1]} → "
106
+ f"{self.output_shape[0]}x{self.output_shape[1]}"
107
+ )
108
+
109
+ # Execution time
110
+ parts.append(f"Time: {self.execution_time:.3f}s")
111
+
112
+ return '\n'.join(parts)
113
+
114
+ def explain(self) -> str:
115
+ """
116
+ Get detailed explanation of what happened.
117
+
118
+ Returns:
119
+ Detailed explanation string
120
+
121
+ Example:
122
+ result = add.to(df, ...)
123
+ print(result.explain())
124
+ # "Operation: add.to()
125
+ # - Looked up 'price' from reference DataFrame
126
+ # - Matched on 'product_id' (1000 matches)
127
+ # - Used 'fetch if unique' mode
128
+ # - Added column at position 'after:product_id'"
129
+ """
130
+ lines = [f"Operation: add.{self.operation}()"]
131
+
132
+ # Add metadata details
133
+ for key, value in self.metadata.items():
134
+ if key not in ['input_shape', 'execution_time', 'columns_added', 'columns_removed']:
135
+ lines.append(f" - {key}: {value}")
136
+
137
+ # Add summary info
138
+ if self.columns_added:
139
+ lines.append(f" - Added columns: {', '.join(self.columns_added)}")
140
+ if self.columns_removed:
141
+ lines.append(f" - Removed columns: {', '.join(self.columns_removed)}")
142
+
143
+ lines.append(f" - Execution time: {self.execution_time:.3f}s")
144
+
145
+ return '\n'.join(lines)
146
+
147
+ def to_polars(self) -> pl.DataFrame:
148
+ """
149
+ Get the underlying Polars DataFrame.
150
+
151
+ Returns:
152
+ Polars DataFrame
153
+ """
154
+ return self.df
155
+
156
+ def to_pandas(self):
157
+ """
158
+ Convert result to pandas DataFrame.
159
+
160
+ Returns:
161
+ pandas DataFrame
162
+ """
163
+ return self.df.to_pandas()
164
+
165
+ def to_arrow(self):
166
+ """
167
+ Convert result to Arrow Table.
168
+
169
+ Returns:
170
+ Arrow Table
171
+ """
172
+ return self.df.to_arrow()
173
+
174
+ def __repr__(self) -> str:
175
+ """String representation of result."""
176
+ return (
177
+ f"DataFrameResult(operation='{self.operation}', "
178
+ f"shape={self.output_shape}, "
179
+ f"columns_added={len(self.columns_added)}, "
180
+ f"columns_removed={len(self.columns_removed)})"
181
+ )
182
+
183
+ def __getattr__(self, name: str) -> Any:
184
+ """
185
+ Delegate attribute access to underlying DataFrame.
186
+
187
+ Args:
188
+ name: Attribute name
189
+
190
+ Returns:
191
+ Attribute from DataFrame
192
+
193
+ Example:
194
+ result = add.to(df, ...)
195
+ result.select(['name', 'age']) # Delegates to df.select()
196
+ """
197
+ # Avoid infinite recursion for special attributes
198
+ if name in ['df', 'operation', 'metadata', 'input_shape', 'output_shape',
199
+ 'columns_added', 'columns_removed', 'execution_time']:
200
+ raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")
201
+
202
+ # Delegate to underlying DataFrame
203
+ return getattr(self.df, name)
204
+
205
+
206
+ class AnalysisResult:
207
+ """
208
+ Special result wrapper for analyze() function.
209
+
210
+ Used by: analyze function
211
+
212
+ Attributes:
213
+ quality: Quality analysis results
214
+ cardinality: Cardinality analysis results
215
+ distributions: Distribution analysis results
216
+ correlations: Correlation analysis results
217
+ features: Feature analysis results
218
+ types: Type analysis results
219
+ patterns: Pattern analysis results
220
+ outliers: Outlier analysis results
221
+ duplicates: Duplicate analysis results
222
+ timeseries: Time series analysis results (if applicable)
223
+ imputation: Imputation recommendations
224
+ metadata: Analysis metadata
225
+ """
226
+
227
+ def __init__(self, analyses: Dict[str, Any], metadata: Dict[str, Any]):
228
+ """
229
+ Initialize analysis result.
230
+
231
+ Args:
232
+ analyses: Dictionary of analysis results
233
+ metadata: Analysis metadata
234
+ """
235
+ self.metadata = metadata
236
+
237
+ # Store individual analysis results
238
+ self.quality = analyses.get('quality')
239
+ self.cardinality = analyses.get('cardinality')
240
+ self.distributions = analyses.get('distributions')
241
+ self.correlations = analyses.get('correlations')
242
+ self.features = analyses.get('features')
243
+ self.types = analyses.get('types')
244
+ self.patterns = analyses.get('patterns')
245
+ self.outliers = analyses.get('outliers')
246
+ self.duplicates = analyses.get('duplicates')
247
+ self.timeseries = analyses.get('timeseries')
248
+ self.imputation = analyses.get('imputation')
249
+
250
+ # Store all analyses
251
+ self._analyses = analyses
252
+
253
+ def summary(self) -> str:
254
+ """
255
+ Get summary of all analyses.
256
+
257
+ Returns:
258
+ Formatted summary string
259
+ """
260
+ lines = ["Analysis Summary"]
261
+ lines.append("=" * 50)
262
+
263
+ # Count available analyses
264
+ available = [k for k, v in self._analyses.items() if v is not None]
265
+ lines.append(f"Analyses performed: {len(available)}")
266
+ lines.append("")
267
+
268
+ # Summarize each analysis
269
+ for analysis_name in available:
270
+ result = self._analyses[analysis_name]
271
+ lines.append(f"{analysis_name.upper()}:")
272
+
273
+ if isinstance(result, dict):
274
+ for key, value in result.items():
275
+ if isinstance(value, (int, float, str, bool)):
276
+ lines.append(f" {key}: {value}")
277
+ elif isinstance(value, list):
278
+ lines.append(f" {key}: {len(value)} items")
279
+ elif isinstance(value, pl.DataFrame):
280
+ lines.append(f" {key}: DataFrame ({value.height}x{value.width})")
281
+ else:
282
+ lines.append(f" {result}")
283
+
284
+ lines.append("")
285
+
286
+ # Add metadata
287
+ if self.metadata:
288
+ lines.append("METADATA:")
289
+ for key, value in self.metadata.items():
290
+ lines.append(f" {key}: {value}")
291
+
292
+ return '\n'.join(lines)
293
+
294
+ def to_dict(self) -> Dict[str, Any]:
295
+ """
296
+ Convert all results to dictionary.
297
+
298
+ Returns:
299
+ Dictionary of all analysis results
300
+ """
301
+ result = {}
302
+
303
+ for key, value in self._analyses.items():
304
+ if value is None:
305
+ continue
306
+
307
+ # Convert DataFrames to dictionaries
308
+ if isinstance(value, pl.DataFrame):
309
+ result[key] = value.to_dict()
310
+ elif isinstance(value, dict):
311
+ # Recursively convert nested DataFrames
312
+ result[key] = {}
313
+ for k, v in value.items():
314
+ if isinstance(v, pl.DataFrame):
315
+ result[key][k] = v.to_dict()
316
+ else:
317
+ result[key][k] = v
318
+ else:
319
+ result[key] = value
320
+
321
+ # Add metadata
322
+ result['metadata'] = self.metadata
323
+
324
+ return result
325
+
326
+ def to_json(self) -> str:
327
+ """
328
+ Convert all results to JSON.
329
+
330
+ Returns:
331
+ JSON string
332
+ """
333
+ return json.dumps(self.to_dict(), indent=2, default=str)
334
+
335
+ def __repr__(self) -> str:
336
+ """String representation."""
337
+ available = [k for k, v in self._analyses.items() if v is not None]
338
+ return f"AnalysisResult(analyses={len(available)})"
339
+
340
+
341
+ def wrap_result(df: pl.DataFrame, operation: str, metadata: Dict[str, Any]) -> DataFrameResult:
342
+ """
343
+ Convenience function to wrap DataFrame as result.
344
+
345
+ Called by: to, transform, snapshot, synthetic, expressions functions
346
+
347
+ Args:
348
+ df: DataFrame to wrap
349
+ operation: Operation name
350
+ metadata: Operation metadata
351
+
352
+ Returns:
353
+ DataFrameResult instance
354
+
355
+ Example:
356
+ result = wrap_result(df, 'to', {'columns_added': ['price']})
357
+ """
358
+ return DataFrameResult(df, operation, metadata)
359
+
360
+
361
+ def wrap_analysis(analyses: Dict[str, Any], metadata: Dict[str, Any]) -> AnalysisResult:
362
+ """
363
+ Convenience function to wrap analysis results.
364
+
365
+ Called by: analyze function
366
+
367
+ Args:
368
+ analyses: Dictionary of analysis results
369
+ metadata: Analysis metadata
370
+
371
+ Returns:
372
+ AnalysisResult instance
373
+
374
+ Example:
375
+ result = wrap_analysis(
376
+ {'quality': {...}, 'cardinality': {...}},
377
+ {'execution_time': 0.5}
378
+ )
379
+ """
380
+ return AnalysisResult(analyses, metadata)
@@ -0,0 +1,243 @@
1
+ """
2
+ Strategy parsing and validation utilities for Additory.
3
+
4
+ Provides functions to parse and validate strategy dictionaries
5
+ used across multiple functions (to, transform, synthetic).
6
+ """
7
+
8
+ from typing import Any, Dict, List, Tuple
9
+ import polars as pl
10
+
11
+
12
+ def parse_strategy(strategy: Dict, context: str) -> Dict:
13
+ """
14
+ Parse and validate strategy dictionary for a given context.
15
+
16
+ Args:
17
+ strategy: Dictionary containing strategy configuration
18
+ context: Context string ('to', 'transform', 'synthetic')
19
+
20
+ Returns:
21
+ Validated and normalized strategy dictionary
22
+
23
+ Example:
24
+ strategy = {'price': {'mode': 'first', 'position': 'after:id'}}
25
+ parsed = parse_strategy(strategy, context='to')
26
+ """
27
+ # Define allowed keys for each context
28
+ allowed_keys_by_context = {
29
+ 'to': ['mode', 'position', 'default', 'deduce'],
30
+ 'transform': ['mode', 'from_unit', 'to_unit', 'features', 'deduce'],
31
+ 'synthetic': ['mode', 'distribution', 'min', 'max', 'mean', 'std',
32
+ 'categories', 'deduce', 'correlation']
33
+ }
34
+
35
+ if context not in allowed_keys_by_context:
36
+ raise ValueError(f"Invalid context: {context}. Must be one of: {list(allowed_keys_by_context.keys())}")
37
+
38
+ allowed_keys = allowed_keys_by_context[context]
39
+
40
+ # Validate and normalize each column strategy
41
+ parsed = {}
42
+ for column, column_strategy in strategy.items():
43
+ if isinstance(column_strategy, dict):
44
+ # Validate keys
45
+ validate_strategy_keys(column_strategy, allowed_keys)
46
+
47
+ # Normalize values
48
+ normalized = {}
49
+ for key, value in column_strategy.items():
50
+ normalized[key] = normalize_strategy_value(value, key)
51
+
52
+ parsed[column] = normalized
53
+ else:
54
+ # Simple value (e.g., 'deduce:expression')
55
+ parsed[column] = normalize_strategy_value(column_strategy, 'simple')
56
+
57
+ return parsed
58
+
59
+
60
+ def validate_strategy_keys(strategy: Dict, allowed_keys: List[str]) -> bool:
61
+ """
62
+ Validate that strategy contains only allowed keys.
63
+
64
+ Args:
65
+ strategy: Strategy dictionary to validate
66
+ allowed_keys: List of allowed keys for this context
67
+
68
+ Returns:
69
+ True if valid
70
+
71
+ Raises:
72
+ ValueError: If strategy contains invalid keys
73
+ """
74
+ invalid_keys = set(strategy.keys()) - set(allowed_keys)
75
+
76
+ if invalid_keys:
77
+ raise ValueError(
78
+ f"Invalid strategy keys: {invalid_keys}. "
79
+ f"Allowed keys: {allowed_keys}"
80
+ )
81
+
82
+ return True
83
+
84
+
85
+ def normalize_strategy_value(value: Any, value_type: str) -> Any:
86
+ """
87
+ Normalize strategy value to expected type.
88
+
89
+ Args:
90
+ value: Value to normalize
91
+ value_type: Expected type ('mode', 'position', 'expression', etc.)
92
+
93
+ Returns:
94
+ Normalized value
95
+ """
96
+ if value_type == 'mode':
97
+ # Mode should be a string
98
+ if not isinstance(value, str):
99
+ raise ValueError(f"Mode must be a string, got {type(value)}")
100
+ return value.lower()
101
+
102
+ elif value_type == 'position':
103
+ # Position should be a string (e.g., 'after:id', 'before:name')
104
+ if not isinstance(value, str):
105
+ raise ValueError(f"Position must be a string, got {type(value)}")
106
+ return value.lower()
107
+
108
+ elif value_type == 'deduce':
109
+ # Deduce should be a string (expression or reference)
110
+ if not isinstance(value, str):
111
+ raise ValueError(f"Deduce must be a string, got {type(value)}")
112
+ return value
113
+
114
+ elif value_type == 'distribution':
115
+ # Distribution should be a string
116
+ if not isinstance(value, str):
117
+ raise ValueError(f"Distribution must be a string, got {type(value)}")
118
+ return value.lower()
119
+
120
+ elif value_type in ['min', 'max', 'mean', 'std', 'correlation']:
121
+ # Numeric values
122
+ if not isinstance(value, (int, float)):
123
+ raise ValueError(f"{value_type} must be numeric, got {type(value)}")
124
+ return float(value)
125
+
126
+ elif value_type == 'categories':
127
+ # Categories should be a list
128
+ if not isinstance(value, list):
129
+ raise ValueError(f"Categories must be a list, got {type(value)}")
130
+ return value
131
+
132
+ elif value_type == 'features':
133
+ # Features should be a list
134
+ if not isinstance(value, list):
135
+ raise ValueError(f"Features must be a list, got {type(value)}")
136
+ return value
137
+
138
+ elif value_type in ['from_unit', 'to_unit']:
139
+ # Units should be strings
140
+ if not isinstance(value, str):
141
+ raise ValueError(f"{value_type} must be a string, got {type(value)}")
142
+ return value.lower()
143
+
144
+ elif value_type == 'default':
145
+ # Default can be any type
146
+ return value
147
+
148
+ elif value_type == 'simple':
149
+ # Simple value (not in a dict)
150
+ return value
151
+
152
+ else:
153
+ # Unknown type, return as-is
154
+ return value
155
+
156
+
157
+ def parse_deduce_strategy(strategy_value: str, df: pl.DataFrame) -> pl.Series:
158
+ """
159
+ Parse 'deduce:' strategy (inline expression or reference).
160
+
161
+ Args:
162
+ strategy_value: Strategy string starting with 'deduce:'
163
+ df: DataFrame for expression evaluation
164
+
165
+ Returns:
166
+ Polars Series with computed values
167
+
168
+ Example:
169
+ # Inline expression
170
+ result = parse_deduce_strategy('deduce:weight / (height ** 2)', df)
171
+
172
+ # Reference expression
173
+ result = parse_deduce_strategy('deduce:inbuilt:bmi', df)
174
+ """
175
+ if not strategy_value.startswith('deduce:'):
176
+ raise ValueError(f"Strategy value must start with 'deduce:', got: {strategy_value}")
177
+
178
+ # Remove 'deduce:' prefix
179
+ expression_part = strategy_value[7:] # len('deduce:') = 7
180
+
181
+ # Check if it's a reference (contains ':')
182
+ if ':' in expression_part:
183
+ # It's a reference like 'inbuilt:bmi' or 'myfolder:roi'
184
+ namespace, expr_name = extract_namespace_from_reference(expression_part)
185
+
186
+ # For now, we'll return a placeholder since expressions.engine is not yet implemented
187
+ # This will be replaced when expressions.engine is available
188
+ raise NotImplementedError(
189
+ f"Expression references not yet implemented. "
190
+ f"Namespace: {namespace}, Expression: {expr_name}"
191
+ )
192
+ else:
193
+ # It's an inline expression like 'weight / (height ** 2)'
194
+ # For now, we'll try to evaluate it directly using Polars
195
+ try:
196
+ # Try to evaluate as a simple Polars expression
197
+ # This is a simplified version - full implementation will use expressions.engine
198
+ result = df.select(pl.lit(expression_part).alias('result'))['result']
199
+ return result
200
+ except Exception as e:
201
+ raise ValueError(
202
+ f"Failed to evaluate inline expression: {expression_part}. "
203
+ f"Error: {str(e)}"
204
+ )
205
+
206
+
207
+ def extract_namespace_from_reference(reference: str) -> Tuple[str, str]:
208
+ """
209
+ Extract namespace and expression name from reference.
210
+
211
+ Args:
212
+ reference: Reference string like 'inbuilt:bmi' or 'myfolder:roi'
213
+
214
+ Returns:
215
+ Tuple of (namespace, expression_name)
216
+
217
+ Example:
218
+ namespace, expr_name = extract_namespace_from_reference('inbuilt:bmi')
219
+ # Returns: ('inbuilt', 'bmi')
220
+ """
221
+ if ':' not in reference:
222
+ raise ValueError(
223
+ f"Invalid reference format: {reference}. "
224
+ f"Expected format: 'namespace:expression_name'"
225
+ )
226
+
227
+ parts = reference.split(':', 1)
228
+ if len(parts) != 2:
229
+ raise ValueError(
230
+ f"Invalid reference format: {reference}. "
231
+ f"Expected format: 'namespace:expression_name'"
232
+ )
233
+
234
+ namespace = parts[0].strip()
235
+ expr_name = parts[1].strip()
236
+
237
+ if not namespace or not expr_name:
238
+ raise ValueError(
239
+ f"Invalid reference format: {reference}. "
240
+ f"Both namespace and expression name must be non-empty"
241
+ )
242
+
243
+ return namespace, expr_name