additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- additory/__init__.py +58 -14
- additory/common/__init__.py +31 -147
- additory/common/column_selector.py +255 -0
- additory/common/distributions.py +286 -613
- additory/common/extractors.py +313 -0
- additory/common/knn_imputation.py +332 -0
- additory/common/result.py +380 -0
- additory/common/strategy_parser.py +243 -0
- additory/common/unit_conversions.py +338 -0
- additory/common/validation.py +283 -103
- additory/core/__init__.py +34 -22
- additory/core/backend.py +258 -0
- additory/core/config.py +177 -305
- additory/core/logging.py +230 -24
- additory/core/memory_manager.py +157 -495
- additory/expressions/__init__.py +2 -23
- additory/expressions/compiler.py +457 -0
- additory/expressions/engine.py +264 -487
- additory/expressions/integrity.py +179 -0
- additory/expressions/loader.py +263 -0
- additory/expressions/parser.py +363 -167
- additory/expressions/resolver.py +274 -0
- additory/functions/__init__.py +1 -0
- additory/functions/analyze/__init__.py +144 -0
- additory/functions/analyze/cardinality.py +58 -0
- additory/functions/analyze/correlations.py +66 -0
- additory/functions/analyze/distributions.py +53 -0
- additory/functions/analyze/duplicates.py +49 -0
- additory/functions/analyze/features.py +61 -0
- additory/functions/analyze/imputation.py +66 -0
- additory/functions/analyze/outliers.py +65 -0
- additory/functions/analyze/patterns.py +65 -0
- additory/functions/analyze/presets.py +72 -0
- additory/functions/analyze/quality.py +59 -0
- additory/functions/analyze/timeseries.py +53 -0
- additory/functions/analyze/types.py +45 -0
- additory/functions/expressions/__init__.py +161 -0
- additory/functions/snapshot/__init__.py +82 -0
- additory/functions/snapshot/filter.py +119 -0
- additory/functions/synthetic/__init__.py +113 -0
- additory/functions/synthetic/mode_detector.py +47 -0
- additory/functions/synthetic/strategies/__init__.py +1 -0
- additory/functions/synthetic/strategies/advanced.py +35 -0
- additory/functions/synthetic/strategies/augmentative.py +160 -0
- additory/functions/synthetic/strategies/generative.py +168 -0
- additory/functions/synthetic/strategies/presets.py +116 -0
- additory/functions/to/__init__.py +188 -0
- additory/functions/to/lookup.py +351 -0
- additory/functions/to/merge.py +189 -0
- additory/functions/to/sort.py +91 -0
- additory/functions/to/summarize.py +170 -0
- additory/functions/transform/__init__.py +140 -0
- additory/functions/transform/datetime.py +79 -0
- additory/functions/transform/extract.py +85 -0
- additory/functions/transform/harmonize.py +105 -0
- additory/functions/transform/knn.py +62 -0
- additory/functions/transform/onehotencoding.py +68 -0
- additory/functions/transform/transpose.py +42 -0
- additory-0.1.1a1.dist-info/METADATA +83 -0
- additory-0.1.1a1.dist-info/RECORD +62 -0
- additory/analysis/__init__.py +0 -48
- additory/analysis/cardinality.py +0 -126
- additory/analysis/correlations.py +0 -124
- additory/analysis/distributions.py +0 -376
- additory/analysis/quality.py +0 -158
- additory/analysis/scan.py +0 -400
- additory/common/backend.py +0 -371
- additory/common/column_utils.py +0 -191
- additory/common/exceptions.py +0 -62
- additory/common/lists.py +0 -229
- additory/common/patterns.py +0 -240
- additory/common/resolver.py +0 -567
- additory/common/sample_data.py +0 -182
- additory/core/ast_builder.py +0 -165
- additory/core/backends/__init__.py +0 -23
- additory/core/backends/arrow_bridge.py +0 -483
- additory/core/backends/cudf_bridge.py +0 -355
- additory/core/column_positioning.py +0 -358
- additory/core/compiler_polars.py +0 -166
- additory/core/enhanced_cache_manager.py +0 -1119
- additory/core/enhanced_matchers.py +0 -473
- additory/core/enhanced_version_manager.py +0 -325
- additory/core/executor.py +0 -59
- additory/core/integrity_manager.py +0 -477
- additory/core/loader.py +0 -190
- additory/core/namespace_manager.py +0 -657
- additory/core/parser.py +0 -176
- additory/core/polars_expression_engine.py +0 -601
- additory/core/registry.py +0 -176
- additory/core/sample_data_manager.py +0 -492
- additory/core/user_namespace.py +0 -751
- additory/core/validator.py +0 -27
- additory/dynamic_api.py +0 -304
- additory/expressions/proxy.py +0 -549
- additory/expressions/registry.py +0 -313
- additory/expressions/samples.py +0 -492
- additory/synthetic/__init__.py +0 -13
- additory/synthetic/column_name_resolver.py +0 -149
- additory/synthetic/distributions.py +0 -22
- additory/synthetic/forecast.py +0 -1132
- additory/synthetic/linked_list_parser.py +0 -415
- additory/synthetic/namespace_lookup.py +0 -129
- additory/synthetic/smote.py +0 -320
- additory/synthetic/strategies.py +0 -850
- additory/synthetic/synthesizer.py +0 -713
- additory/utilities/__init__.py +0 -53
- additory/utilities/encoding.py +0 -600
- additory/utilities/games.py +0 -300
- additory/utilities/keys.py +0 -8
- additory/utilities/lookup.py +0 -103
- additory/utilities/matchers.py +0 -216
- additory/utilities/resolvers.py +0 -286
- additory/utilities/settings.py +0 -167
- additory/utilities/units.py +0 -749
- additory/utilities/validators.py +0 -153
- additory-0.1.0a3.dist-info/METADATA +0 -288
- additory-0.1.0a3.dist-info/RECORD +0 -71
- additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
- {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
additory/expressions/__init__.py
CHANGED
|
@@ -1,26 +1,5 @@
|
|
|
1
|
-
# additory/expressions/__init__.py
|
|
2
|
-
# Expression system - .add file driven functionality
|
|
3
|
-
|
|
4
1
|
"""
|
|
5
|
-
Expression
|
|
2
|
+
Expression engine for Additory.
|
|
6
3
|
|
|
7
|
-
|
|
8
|
-
- Expression parsing and compilation
|
|
9
|
-
- Polars-based expression execution
|
|
10
|
-
- Expression caching and versioning
|
|
11
|
-
- Sample data management
|
|
12
|
-
- Namespace support (builtin vs user)
|
|
4
|
+
Provides expression parsing, compilation, and evaluation.
|
|
13
5
|
"""
|
|
14
|
-
|
|
15
|
-
# Core expression functionality will be imported here after migration
|
|
16
|
-
# from .proxy import EnhancedExpressionProxy
|
|
17
|
-
# from .engine import PolarsExpressionEngine
|
|
18
|
-
# from .parser import ExpressionParser
|
|
19
|
-
# from .compiler import ExpressionCompiler
|
|
20
|
-
# from .executor import ExpressionExecutor
|
|
21
|
-
# from .registry import ExpressionRegistry
|
|
22
|
-
# from .samples import SampleDataManager
|
|
23
|
-
|
|
24
|
-
__all__ = [
|
|
25
|
-
# Will be populated after migration
|
|
26
|
-
]
|
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Expression compiler for Additory.
|
|
3
|
+
|
|
4
|
+
Compiles Abstract Syntax Trees (AST) into Polars expressions.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from typing import List
|
|
9
|
+
from additory.expressions.parser import ASTNode
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ExpressionCompiler:
|
|
13
|
+
"""
|
|
14
|
+
Compile AST nodes to Polars expressions.
|
|
15
|
+
|
|
16
|
+
Supports:
|
|
17
|
+
- Arithmetic operators: +, -, *, /, **, %
|
|
18
|
+
- Comparison operators: ==, !=, >, <, >=, <=
|
|
19
|
+
- Logical operators: AND, OR, NOT
|
|
20
|
+
- Functions: sqrt, abs, log, if_else, etc.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def compile(self, ast: ASTNode, df: pl.DataFrame) -> pl.Expr:
|
|
24
|
+
"""
|
|
25
|
+
Compile AST to Polars expression.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
ast: Root AST node
|
|
29
|
+
df: DataFrame (for column validation)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
Polars expression
|
|
33
|
+
|
|
34
|
+
Example:
|
|
35
|
+
compiler = ExpressionCompiler()
|
|
36
|
+
expr = compiler.compile(ast, df)
|
|
37
|
+
result = df.select(expr)
|
|
38
|
+
"""
|
|
39
|
+
return self.compile_node(ast, df)
|
|
40
|
+
|
|
41
|
+
def compile_node(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
|
|
42
|
+
"""
|
|
43
|
+
Compile a single AST node.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
node: AST node to compile
|
|
47
|
+
df: DataFrame
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Polars expression
|
|
51
|
+
"""
|
|
52
|
+
if node.type == 'literal':
|
|
53
|
+
return self.compile_literal(node)
|
|
54
|
+
elif node.type == 'identifier':
|
|
55
|
+
return self.compile_identifier(node, df)
|
|
56
|
+
elif node.type == 'binary_op':
|
|
57
|
+
return self.compile_binary_op(node, df)
|
|
58
|
+
elif node.type == 'unary_op':
|
|
59
|
+
return self.compile_unary_op(node, df)
|
|
60
|
+
elif node.type == 'function':
|
|
61
|
+
return self.compile_function(node, df)
|
|
62
|
+
else:
|
|
63
|
+
raise ValueError(f"Unknown node type: {node.type}")
|
|
64
|
+
|
|
65
|
+
def compile_literal(self, node: ASTNode) -> pl.Expr:
|
|
66
|
+
"""
|
|
67
|
+
Compile literal value node.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
node: Literal node
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Polars expression
|
|
74
|
+
"""
|
|
75
|
+
return pl.lit(node.value)
|
|
76
|
+
|
|
77
|
+
def compile_identifier(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
|
|
78
|
+
"""
|
|
79
|
+
Compile column reference node.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
node: Identifier node
|
|
83
|
+
df: DataFrame
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Polars expression
|
|
87
|
+
"""
|
|
88
|
+
column_name = node.value
|
|
89
|
+
|
|
90
|
+
# Validate column exists
|
|
91
|
+
if column_name not in df.columns:
|
|
92
|
+
raise ValueError(f"Column '{column_name}' not found in DataFrame")
|
|
93
|
+
|
|
94
|
+
return pl.col(column_name)
|
|
95
|
+
|
|
96
|
+
def compile_binary_op(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
|
|
97
|
+
"""
|
|
98
|
+
Compile binary operator node.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
node: Binary operator node
|
|
102
|
+
df: DataFrame
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Polars expression
|
|
106
|
+
"""
|
|
107
|
+
# Compile left and right operands
|
|
108
|
+
left = self.compile_node(node.left, df)
|
|
109
|
+
right = self.compile_node(node.right, df)
|
|
110
|
+
|
|
111
|
+
# Apply operator
|
|
112
|
+
operator = node.value
|
|
113
|
+
|
|
114
|
+
if operator == '+':
|
|
115
|
+
return left + right
|
|
116
|
+
elif operator == '-':
|
|
117
|
+
return left - right
|
|
118
|
+
elif operator == '*':
|
|
119
|
+
return left * right
|
|
120
|
+
elif operator == '/':
|
|
121
|
+
return left / right
|
|
122
|
+
elif operator == '**':
|
|
123
|
+
return left ** right
|
|
124
|
+
elif operator == '%':
|
|
125
|
+
return left % right
|
|
126
|
+
elif operator == '==':
|
|
127
|
+
return left == right
|
|
128
|
+
elif operator == '!=':
|
|
129
|
+
return left != right
|
|
130
|
+
elif operator == '>':
|
|
131
|
+
return left > right
|
|
132
|
+
elif operator == '<':
|
|
133
|
+
return left < right
|
|
134
|
+
elif operator == '>=':
|
|
135
|
+
return left >= right
|
|
136
|
+
elif operator == '<=':
|
|
137
|
+
return left <= right
|
|
138
|
+
elif operator == 'AND':
|
|
139
|
+
return left & right
|
|
140
|
+
elif operator == 'OR':
|
|
141
|
+
return left | right
|
|
142
|
+
else:
|
|
143
|
+
raise ValueError(f"Unknown binary operator: {operator}")
|
|
144
|
+
|
|
145
|
+
def compile_unary_op(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
|
|
146
|
+
"""
|
|
147
|
+
Compile unary operator node.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
node: Unary operator node
|
|
151
|
+
df: DataFrame
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Polars expression
|
|
155
|
+
"""
|
|
156
|
+
# Compile operand
|
|
157
|
+
operand = self.compile_node(node.right, df)
|
|
158
|
+
|
|
159
|
+
# Apply operator
|
|
160
|
+
operator = node.value
|
|
161
|
+
|
|
162
|
+
if operator == '-':
|
|
163
|
+
return -operand
|
|
164
|
+
elif operator == 'NOT':
|
|
165
|
+
return ~operand
|
|
166
|
+
else:
|
|
167
|
+
raise ValueError(f"Unknown unary operator: {operator}")
|
|
168
|
+
|
|
169
|
+
def compile_function(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
|
|
170
|
+
"""
|
|
171
|
+
Compile function call node.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
node: Function node
|
|
175
|
+
df: DataFrame
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Polars expression
|
|
179
|
+
"""
|
|
180
|
+
func_name = node.value
|
|
181
|
+
|
|
182
|
+
# Compile arguments
|
|
183
|
+
args = [self.compile_node(arg, df) for arg in (node.children or [])]
|
|
184
|
+
|
|
185
|
+
# Dispatch to specific function compiler
|
|
186
|
+
if func_name in ['sqrt', 'abs', 'log', 'log10', 'exp', 'pow', 'round', 'floor', 'ceil']:
|
|
187
|
+
return self.compile_math_function(func_name, args)
|
|
188
|
+
elif func_name in ['lower', 'upper', 'trim', 'length', 'substring', 'replace', 'contains', 'matches']:
|
|
189
|
+
return self.compile_string_function(func_name, args)
|
|
190
|
+
elif func_name in ['year', 'month', 'day', 'hour', 'minute', 'second', 'day_of_week', 'time_of_day']:
|
|
191
|
+
return self.compile_datetime_function(func_name, args)
|
|
192
|
+
elif func_name in ['sum', 'mean', 'median', 'min', 'max', 'count', 'std']:
|
|
193
|
+
return self.compile_aggregation_function(func_name, args)
|
|
194
|
+
elif func_name in ['if_else', 'coalesce', 'is_null', 'is_not_null']:
|
|
195
|
+
return self.compile_conditional_function(func_name, args)
|
|
196
|
+
else:
|
|
197
|
+
raise ValueError(f"Unknown function: {func_name}")
|
|
198
|
+
|
|
199
|
+
def compile_math_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
|
|
200
|
+
"""
|
|
201
|
+
Compile mathematical function.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
name: Function name
|
|
205
|
+
args: Compiled arguments
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
Polars expression
|
|
209
|
+
"""
|
|
210
|
+
if name == 'sqrt':
|
|
211
|
+
if len(args) != 1:
|
|
212
|
+
raise ValueError(f"Function 'sqrt' expects 1 argument, got {len(args)}")
|
|
213
|
+
return args[0].sqrt()
|
|
214
|
+
|
|
215
|
+
elif name == 'abs':
|
|
216
|
+
if len(args) != 1:
|
|
217
|
+
raise ValueError(f"Function 'abs' expects 1 argument, got {len(args)}")
|
|
218
|
+
return args[0].abs()
|
|
219
|
+
|
|
220
|
+
elif name == 'log':
|
|
221
|
+
if len(args) != 1:
|
|
222
|
+
raise ValueError(f"Function 'log' expects 1 argument, got {len(args)}")
|
|
223
|
+
return args[0].log()
|
|
224
|
+
|
|
225
|
+
elif name == 'log10':
|
|
226
|
+
if len(args) != 1:
|
|
227
|
+
raise ValueError(f"Function 'log10' expects 1 argument, got {len(args)}")
|
|
228
|
+
return args[0].log10()
|
|
229
|
+
|
|
230
|
+
elif name == 'exp':
|
|
231
|
+
if len(args) != 1:
|
|
232
|
+
raise ValueError(f"Function 'exp' expects 1 argument, got {len(args)}")
|
|
233
|
+
return args[0].exp()
|
|
234
|
+
|
|
235
|
+
elif name == 'pow':
|
|
236
|
+
if len(args) != 2:
|
|
237
|
+
raise ValueError(f"Function 'pow' expects 2 arguments, got {len(args)}")
|
|
238
|
+
return args[0].pow(args[1])
|
|
239
|
+
|
|
240
|
+
elif name == 'round':
|
|
241
|
+
if len(args) not in (1, 2):
|
|
242
|
+
raise ValueError(f"Function 'round' expects 1 or 2 arguments, got {len(args)}")
|
|
243
|
+
decimals = args[1] if len(args) == 2 else pl.lit(0)
|
|
244
|
+
return args[0].round(decimals)
|
|
245
|
+
|
|
246
|
+
elif name == 'floor':
|
|
247
|
+
if len(args) != 1:
|
|
248
|
+
raise ValueError(f"Function 'floor' expects 1 argument, got {len(args)}")
|
|
249
|
+
return args[0].floor()
|
|
250
|
+
|
|
251
|
+
elif name == 'ceil':
|
|
252
|
+
if len(args) != 1:
|
|
253
|
+
raise ValueError(f"Function 'ceil' expects 1 argument, got {len(args)}")
|
|
254
|
+
return args[0].ceil()
|
|
255
|
+
|
|
256
|
+
else:
|
|
257
|
+
raise ValueError(f"Unknown math function: {name}")
|
|
258
|
+
|
|
259
|
+
def compile_string_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
|
|
260
|
+
"""
|
|
261
|
+
Compile string function.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
name: Function name
|
|
265
|
+
args: Compiled arguments
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
Polars expression
|
|
269
|
+
"""
|
|
270
|
+
if name == 'lower':
|
|
271
|
+
if len(args) != 1:
|
|
272
|
+
raise ValueError(f"Function 'lower' expects 1 argument, got {len(args)}")
|
|
273
|
+
return args[0].str.to_lowercase()
|
|
274
|
+
|
|
275
|
+
elif name == 'upper':
|
|
276
|
+
if len(args) != 1:
|
|
277
|
+
raise ValueError(f"Function 'upper' expects 1 argument, got {len(args)}")
|
|
278
|
+
return args[0].str.to_uppercase()
|
|
279
|
+
|
|
280
|
+
elif name == 'trim':
|
|
281
|
+
if len(args) != 1:
|
|
282
|
+
raise ValueError(f"Function 'trim' expects 1 argument, got {len(args)}")
|
|
283
|
+
return args[0].str.strip_chars()
|
|
284
|
+
|
|
285
|
+
elif name == 'length':
|
|
286
|
+
if len(args) != 1:
|
|
287
|
+
raise ValueError(f"Function 'length' expects 1 argument, got {len(args)}")
|
|
288
|
+
return args[0].str.len_chars()
|
|
289
|
+
|
|
290
|
+
elif name == 'substring':
|
|
291
|
+
if len(args) != 3:
|
|
292
|
+
raise ValueError(f"Function 'substring' expects 3 arguments, got {len(args)}")
|
|
293
|
+
return args[0].str.slice(args[1], args[2])
|
|
294
|
+
|
|
295
|
+
elif name == 'replace':
|
|
296
|
+
if len(args) != 3:
|
|
297
|
+
raise ValueError(f"Function 'replace' expects 3 arguments, got {len(args)}")
|
|
298
|
+
return args[0].str.replace(args[1], args[2])
|
|
299
|
+
|
|
300
|
+
elif name == 'contains':
|
|
301
|
+
if len(args) != 2:
|
|
302
|
+
raise ValueError(f"Function 'contains' expects 2 arguments, got {len(args)}")
|
|
303
|
+
return args[0].str.contains(args[1])
|
|
304
|
+
|
|
305
|
+
elif name == 'matches':
|
|
306
|
+
if len(args) != 2:
|
|
307
|
+
raise ValueError(f"Function 'matches' expects 2 arguments, got {len(args)}")
|
|
308
|
+
return args[0].str.contains(args[1])
|
|
309
|
+
|
|
310
|
+
else:
|
|
311
|
+
raise ValueError(f"Unknown string function: {name}")
|
|
312
|
+
|
|
313
|
+
def compile_datetime_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
|
|
314
|
+
"""
|
|
315
|
+
Compile datetime function.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
name: Function name
|
|
319
|
+
args: Compiled arguments
|
|
320
|
+
|
|
321
|
+
Returns:
|
|
322
|
+
Polars expression
|
|
323
|
+
"""
|
|
324
|
+
if name == 'year':
|
|
325
|
+
if len(args) != 1:
|
|
326
|
+
raise ValueError(f"Function 'year' expects 1 argument, got {len(args)}")
|
|
327
|
+
return args[0].dt.year()
|
|
328
|
+
|
|
329
|
+
elif name == 'month':
|
|
330
|
+
if len(args) != 1:
|
|
331
|
+
raise ValueError(f"Function 'month' expects 1 argument, got {len(args)}")
|
|
332
|
+
return args[0].dt.month()
|
|
333
|
+
|
|
334
|
+
elif name == 'day':
|
|
335
|
+
if len(args) != 1:
|
|
336
|
+
raise ValueError(f"Function 'day' expects 1 argument, got {len(args)}")
|
|
337
|
+
return args[0].dt.day()
|
|
338
|
+
|
|
339
|
+
elif name == 'hour':
|
|
340
|
+
if len(args) != 1:
|
|
341
|
+
raise ValueError(f"Function 'hour' expects 1 argument, got {len(args)}")
|
|
342
|
+
return args[0].dt.hour()
|
|
343
|
+
|
|
344
|
+
elif name == 'minute':
|
|
345
|
+
if len(args) != 1:
|
|
346
|
+
raise ValueError(f"Function 'minute' expects 1 argument, got {len(args)}")
|
|
347
|
+
return args[0].dt.minute()
|
|
348
|
+
|
|
349
|
+
elif name == 'second':
|
|
350
|
+
if len(args) != 1:
|
|
351
|
+
raise ValueError(f"Function 'second' expects 1 argument, got {len(args)}")
|
|
352
|
+
return args[0].dt.second()
|
|
353
|
+
|
|
354
|
+
elif name == 'day_of_week':
|
|
355
|
+
if len(args) != 1:
|
|
356
|
+
raise ValueError(f"Function 'day_of_week' expects 1 argument, got {len(args)}")
|
|
357
|
+
return args[0].dt.weekday()
|
|
358
|
+
|
|
359
|
+
elif name == 'time_of_day':
|
|
360
|
+
if len(args) != 1:
|
|
361
|
+
raise ValueError(f"Function 'time_of_day' expects 1 argument, got {len(args)}")
|
|
362
|
+
# Custom time_of_day: morning (6-12), afternoon (12-18), evening (18-22), night (22-6)
|
|
363
|
+
hour = args[0].dt.hour()
|
|
364
|
+
return (
|
|
365
|
+
pl.when((hour >= 6) & (hour < 12)).then(pl.lit('morning'))
|
|
366
|
+
.when((hour >= 12) & (hour < 18)).then(pl.lit('afternoon'))
|
|
367
|
+
.when((hour >= 18) & (hour < 22)).then(pl.lit('evening'))
|
|
368
|
+
.otherwise(pl.lit('night'))
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
else:
|
|
372
|
+
raise ValueError(f"Unknown datetime function: {name}")
|
|
373
|
+
|
|
374
|
+
def compile_aggregation_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
|
|
375
|
+
"""
|
|
376
|
+
Compile aggregation function.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
name: Function name
|
|
380
|
+
args: Compiled arguments
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
Polars expression
|
|
384
|
+
"""
|
|
385
|
+
if name == 'sum':
|
|
386
|
+
if len(args) != 1:
|
|
387
|
+
raise ValueError(f"Function 'sum' expects 1 argument, got {len(args)}")
|
|
388
|
+
return args[0].sum()
|
|
389
|
+
|
|
390
|
+
elif name == 'mean':
|
|
391
|
+
if len(args) != 1:
|
|
392
|
+
raise ValueError(f"Function 'mean' expects 1 argument, got {len(args)}")
|
|
393
|
+
return args[0].mean()
|
|
394
|
+
|
|
395
|
+
elif name == 'median':
|
|
396
|
+
if len(args) != 1:
|
|
397
|
+
raise ValueError(f"Function 'median' expects 1 argument, got {len(args)}")
|
|
398
|
+
return args[0].median()
|
|
399
|
+
|
|
400
|
+
elif name == 'min':
|
|
401
|
+
if len(args) != 1:
|
|
402
|
+
raise ValueError(f"Function 'min' expects 1 argument, got {len(args)}")
|
|
403
|
+
return args[0].min()
|
|
404
|
+
|
|
405
|
+
elif name == 'max':
|
|
406
|
+
if len(args) != 1:
|
|
407
|
+
raise ValueError(f"Function 'max' expects 1 argument, got {len(args)}")
|
|
408
|
+
return args[0].max()
|
|
409
|
+
|
|
410
|
+
elif name == 'count':
|
|
411
|
+
if len(args) not in (0, 1):
|
|
412
|
+
raise ValueError(f"Function 'count' expects 0 or 1 arguments, got {len(args)}")
|
|
413
|
+
if len(args) == 0:
|
|
414
|
+
return pl.count()
|
|
415
|
+
return args[0].count()
|
|
416
|
+
|
|
417
|
+
elif name == 'std':
|
|
418
|
+
if len(args) != 1:
|
|
419
|
+
raise ValueError(f"Function 'std' expects 1 argument, got {len(args)}")
|
|
420
|
+
return args[0].std()
|
|
421
|
+
|
|
422
|
+
else:
|
|
423
|
+
raise ValueError(f"Unknown aggregation function: {name}")
|
|
424
|
+
|
|
425
|
+
def compile_conditional_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
|
|
426
|
+
"""
|
|
427
|
+
Compile conditional function.
|
|
428
|
+
|
|
429
|
+
Args:
|
|
430
|
+
name: Function name
|
|
431
|
+
args: Compiled arguments
|
|
432
|
+
|
|
433
|
+
Returns:
|
|
434
|
+
Polars expression
|
|
435
|
+
"""
|
|
436
|
+
if name == 'if_else':
|
|
437
|
+
if len(args) != 3:
|
|
438
|
+
raise ValueError(f"Function 'if_else' expects 3 arguments, got {len(args)}")
|
|
439
|
+
return pl.when(args[0]).then(args[1]).otherwise(args[2])
|
|
440
|
+
|
|
441
|
+
elif name == 'coalesce':
|
|
442
|
+
if len(args) < 1:
|
|
443
|
+
raise ValueError(f"Function 'coalesce' expects at least 1 argument, got {len(args)}")
|
|
444
|
+
return pl.coalesce(*args)
|
|
445
|
+
|
|
446
|
+
elif name == 'is_null':
|
|
447
|
+
if len(args) != 1:
|
|
448
|
+
raise ValueError(f"Function 'is_null' expects 1 argument, got {len(args)}")
|
|
449
|
+
return args[0].is_null()
|
|
450
|
+
|
|
451
|
+
elif name == 'is_not_null':
|
|
452
|
+
if len(args) != 1:
|
|
453
|
+
raise ValueError(f"Function 'is_not_null' expects 1 argument, got {len(args)}")
|
|
454
|
+
return args[0].is_not_null()
|
|
455
|
+
|
|
456
|
+
else:
|
|
457
|
+
raise ValueError(f"Unknown conditional function: {name}")
|