additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. additory/__init__.py +58 -14
  2. additory/common/__init__.py +31 -147
  3. additory/common/column_selector.py +255 -0
  4. additory/common/distributions.py +286 -613
  5. additory/common/extractors.py +313 -0
  6. additory/common/knn_imputation.py +332 -0
  7. additory/common/result.py +380 -0
  8. additory/common/strategy_parser.py +243 -0
  9. additory/common/unit_conversions.py +338 -0
  10. additory/common/validation.py +283 -103
  11. additory/core/__init__.py +34 -22
  12. additory/core/backend.py +258 -0
  13. additory/core/config.py +177 -305
  14. additory/core/logging.py +230 -24
  15. additory/core/memory_manager.py +157 -495
  16. additory/expressions/__init__.py +2 -23
  17. additory/expressions/compiler.py +457 -0
  18. additory/expressions/engine.py +264 -487
  19. additory/expressions/integrity.py +179 -0
  20. additory/expressions/loader.py +263 -0
  21. additory/expressions/parser.py +363 -167
  22. additory/expressions/resolver.py +274 -0
  23. additory/functions/__init__.py +1 -0
  24. additory/functions/analyze/__init__.py +144 -0
  25. additory/functions/analyze/cardinality.py +58 -0
  26. additory/functions/analyze/correlations.py +66 -0
  27. additory/functions/analyze/distributions.py +53 -0
  28. additory/functions/analyze/duplicates.py +49 -0
  29. additory/functions/analyze/features.py +61 -0
  30. additory/functions/analyze/imputation.py +66 -0
  31. additory/functions/analyze/outliers.py +65 -0
  32. additory/functions/analyze/patterns.py +65 -0
  33. additory/functions/analyze/presets.py +72 -0
  34. additory/functions/analyze/quality.py +59 -0
  35. additory/functions/analyze/timeseries.py +53 -0
  36. additory/functions/analyze/types.py +45 -0
  37. additory/functions/expressions/__init__.py +161 -0
  38. additory/functions/snapshot/__init__.py +82 -0
  39. additory/functions/snapshot/filter.py +119 -0
  40. additory/functions/synthetic/__init__.py +113 -0
  41. additory/functions/synthetic/mode_detector.py +47 -0
  42. additory/functions/synthetic/strategies/__init__.py +1 -0
  43. additory/functions/synthetic/strategies/advanced.py +35 -0
  44. additory/functions/synthetic/strategies/augmentative.py +160 -0
  45. additory/functions/synthetic/strategies/generative.py +168 -0
  46. additory/functions/synthetic/strategies/presets.py +116 -0
  47. additory/functions/to/__init__.py +188 -0
  48. additory/functions/to/lookup.py +351 -0
  49. additory/functions/to/merge.py +189 -0
  50. additory/functions/to/sort.py +91 -0
  51. additory/functions/to/summarize.py +170 -0
  52. additory/functions/transform/__init__.py +140 -0
  53. additory/functions/transform/datetime.py +79 -0
  54. additory/functions/transform/extract.py +85 -0
  55. additory/functions/transform/harmonize.py +105 -0
  56. additory/functions/transform/knn.py +62 -0
  57. additory/functions/transform/onehotencoding.py +68 -0
  58. additory/functions/transform/transpose.py +42 -0
  59. additory-0.1.1a1.dist-info/METADATA +83 -0
  60. additory-0.1.1a1.dist-info/RECORD +62 -0
  61. additory/analysis/__init__.py +0 -48
  62. additory/analysis/cardinality.py +0 -126
  63. additory/analysis/correlations.py +0 -124
  64. additory/analysis/distributions.py +0 -376
  65. additory/analysis/quality.py +0 -158
  66. additory/analysis/scan.py +0 -400
  67. additory/common/backend.py +0 -371
  68. additory/common/column_utils.py +0 -191
  69. additory/common/exceptions.py +0 -62
  70. additory/common/lists.py +0 -229
  71. additory/common/patterns.py +0 -240
  72. additory/common/resolver.py +0 -567
  73. additory/common/sample_data.py +0 -182
  74. additory/core/ast_builder.py +0 -165
  75. additory/core/backends/__init__.py +0 -23
  76. additory/core/backends/arrow_bridge.py +0 -483
  77. additory/core/backends/cudf_bridge.py +0 -355
  78. additory/core/column_positioning.py +0 -358
  79. additory/core/compiler_polars.py +0 -166
  80. additory/core/enhanced_cache_manager.py +0 -1119
  81. additory/core/enhanced_matchers.py +0 -473
  82. additory/core/enhanced_version_manager.py +0 -325
  83. additory/core/executor.py +0 -59
  84. additory/core/integrity_manager.py +0 -477
  85. additory/core/loader.py +0 -190
  86. additory/core/namespace_manager.py +0 -657
  87. additory/core/parser.py +0 -176
  88. additory/core/polars_expression_engine.py +0 -601
  89. additory/core/registry.py +0 -176
  90. additory/core/sample_data_manager.py +0 -492
  91. additory/core/user_namespace.py +0 -751
  92. additory/core/validator.py +0 -27
  93. additory/dynamic_api.py +0 -304
  94. additory/expressions/proxy.py +0 -549
  95. additory/expressions/registry.py +0 -313
  96. additory/expressions/samples.py +0 -492
  97. additory/synthetic/__init__.py +0 -13
  98. additory/synthetic/column_name_resolver.py +0 -149
  99. additory/synthetic/distributions.py +0 -22
  100. additory/synthetic/forecast.py +0 -1132
  101. additory/synthetic/linked_list_parser.py +0 -415
  102. additory/synthetic/namespace_lookup.py +0 -129
  103. additory/synthetic/smote.py +0 -320
  104. additory/synthetic/strategies.py +0 -850
  105. additory/synthetic/synthesizer.py +0 -713
  106. additory/utilities/__init__.py +0 -53
  107. additory/utilities/encoding.py +0 -600
  108. additory/utilities/games.py +0 -300
  109. additory/utilities/keys.py +0 -8
  110. additory/utilities/lookup.py +0 -103
  111. additory/utilities/matchers.py +0 -216
  112. additory/utilities/resolvers.py +0 -286
  113. additory/utilities/settings.py +0 -167
  114. additory/utilities/units.py +0 -749
  115. additory/utilities/validators.py +0 -153
  116. additory-0.1.0a3.dist-info/METADATA +0 -288
  117. additory-0.1.0a3.dist-info/RECORD +0 -71
  118. additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
  119. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
  120. {additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0
@@ -1,26 +1,5 @@
1
- # additory/expressions/__init__.py
2
- # Expression system - .add file driven functionality
3
-
4
1
  """
5
- Expression System Module
2
+ Expression engine for Additory.
6
3
 
7
- This module handles .add file driven functionality including:
8
- - Expression parsing and compilation
9
- - Polars-based expression execution
10
- - Expression caching and versioning
11
- - Sample data management
12
- - Namespace support (builtin vs user)
4
+ Provides expression parsing, compilation, and evaluation.
13
5
  """
14
-
15
- # Core expression functionality will be imported here after migration
16
- # from .proxy import EnhancedExpressionProxy
17
- # from .engine import PolarsExpressionEngine
18
- # from .parser import ExpressionParser
19
- # from .compiler import ExpressionCompiler
20
- # from .executor import ExpressionExecutor
21
- # from .registry import ExpressionRegistry
22
- # from .samples import SampleDataManager
23
-
24
- __all__ = [
25
- # Will be populated after migration
26
- ]
@@ -0,0 +1,457 @@
1
+ """
2
+ Expression compiler for Additory.
3
+
4
+ Compiles Abstract Syntax Trees (AST) into Polars expressions.
5
+ """
6
+
7
+ import polars as pl
8
+ from typing import List
9
+ from additory.expressions.parser import ASTNode
10
+
11
+
12
+ class ExpressionCompiler:
13
+ """
14
+ Compile AST nodes to Polars expressions.
15
+
16
+ Supports:
17
+ - Arithmetic operators: +, -, *, /, **, %
18
+ - Comparison operators: ==, !=, >, <, >=, <=
19
+ - Logical operators: AND, OR, NOT
20
+ - Functions: sqrt, abs, log, if_else, etc.
21
+ """
22
+
23
+ def compile(self, ast: ASTNode, df: pl.DataFrame) -> pl.Expr:
24
+ """
25
+ Compile AST to Polars expression.
26
+
27
+ Args:
28
+ ast: Root AST node
29
+ df: DataFrame (for column validation)
30
+
31
+ Returns:
32
+ Polars expression
33
+
34
+ Example:
35
+ compiler = ExpressionCompiler()
36
+ expr = compiler.compile(ast, df)
37
+ result = df.select(expr)
38
+ """
39
+ return self.compile_node(ast, df)
40
+
41
+ def compile_node(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
42
+ """
43
+ Compile a single AST node.
44
+
45
+ Args:
46
+ node: AST node to compile
47
+ df: DataFrame
48
+
49
+ Returns:
50
+ Polars expression
51
+ """
52
+ if node.type == 'literal':
53
+ return self.compile_literal(node)
54
+ elif node.type == 'identifier':
55
+ return self.compile_identifier(node, df)
56
+ elif node.type == 'binary_op':
57
+ return self.compile_binary_op(node, df)
58
+ elif node.type == 'unary_op':
59
+ return self.compile_unary_op(node, df)
60
+ elif node.type == 'function':
61
+ return self.compile_function(node, df)
62
+ else:
63
+ raise ValueError(f"Unknown node type: {node.type}")
64
+
65
+ def compile_literal(self, node: ASTNode) -> pl.Expr:
66
+ """
67
+ Compile literal value node.
68
+
69
+ Args:
70
+ node: Literal node
71
+
72
+ Returns:
73
+ Polars expression
74
+ """
75
+ return pl.lit(node.value)
76
+
77
+ def compile_identifier(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
78
+ """
79
+ Compile column reference node.
80
+
81
+ Args:
82
+ node: Identifier node
83
+ df: DataFrame
84
+
85
+ Returns:
86
+ Polars expression
87
+ """
88
+ column_name = node.value
89
+
90
+ # Validate column exists
91
+ if column_name not in df.columns:
92
+ raise ValueError(f"Column '{column_name}' not found in DataFrame")
93
+
94
+ return pl.col(column_name)
95
+
96
+ def compile_binary_op(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
97
+ """
98
+ Compile binary operator node.
99
+
100
+ Args:
101
+ node: Binary operator node
102
+ df: DataFrame
103
+
104
+ Returns:
105
+ Polars expression
106
+ """
107
+ # Compile left and right operands
108
+ left = self.compile_node(node.left, df)
109
+ right = self.compile_node(node.right, df)
110
+
111
+ # Apply operator
112
+ operator = node.value
113
+
114
+ if operator == '+':
115
+ return left + right
116
+ elif operator == '-':
117
+ return left - right
118
+ elif operator == '*':
119
+ return left * right
120
+ elif operator == '/':
121
+ return left / right
122
+ elif operator == '**':
123
+ return left ** right
124
+ elif operator == '%':
125
+ return left % right
126
+ elif operator == '==':
127
+ return left == right
128
+ elif operator == '!=':
129
+ return left != right
130
+ elif operator == '>':
131
+ return left > right
132
+ elif operator == '<':
133
+ return left < right
134
+ elif operator == '>=':
135
+ return left >= right
136
+ elif operator == '<=':
137
+ return left <= right
138
+ elif operator == 'AND':
139
+ return left & right
140
+ elif operator == 'OR':
141
+ return left | right
142
+ else:
143
+ raise ValueError(f"Unknown binary operator: {operator}")
144
+
145
+ def compile_unary_op(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
146
+ """
147
+ Compile unary operator node.
148
+
149
+ Args:
150
+ node: Unary operator node
151
+ df: DataFrame
152
+
153
+ Returns:
154
+ Polars expression
155
+ """
156
+ # Compile operand
157
+ operand = self.compile_node(node.right, df)
158
+
159
+ # Apply operator
160
+ operator = node.value
161
+
162
+ if operator == '-':
163
+ return -operand
164
+ elif operator == 'NOT':
165
+ return ~operand
166
+ else:
167
+ raise ValueError(f"Unknown unary operator: {operator}")
168
+
169
+ def compile_function(self, node: ASTNode, df: pl.DataFrame) -> pl.Expr:
170
+ """
171
+ Compile function call node.
172
+
173
+ Args:
174
+ node: Function node
175
+ df: DataFrame
176
+
177
+ Returns:
178
+ Polars expression
179
+ """
180
+ func_name = node.value
181
+
182
+ # Compile arguments
183
+ args = [self.compile_node(arg, df) for arg in (node.children or [])]
184
+
185
+ # Dispatch to specific function compiler
186
+ if func_name in ['sqrt', 'abs', 'log', 'log10', 'exp', 'pow', 'round', 'floor', 'ceil']:
187
+ return self.compile_math_function(func_name, args)
188
+ elif func_name in ['lower', 'upper', 'trim', 'length', 'substring', 'replace', 'contains', 'matches']:
189
+ return self.compile_string_function(func_name, args)
190
+ elif func_name in ['year', 'month', 'day', 'hour', 'minute', 'second', 'day_of_week', 'time_of_day']:
191
+ return self.compile_datetime_function(func_name, args)
192
+ elif func_name in ['sum', 'mean', 'median', 'min', 'max', 'count', 'std']:
193
+ return self.compile_aggregation_function(func_name, args)
194
+ elif func_name in ['if_else', 'coalesce', 'is_null', 'is_not_null']:
195
+ return self.compile_conditional_function(func_name, args)
196
+ else:
197
+ raise ValueError(f"Unknown function: {func_name}")
198
+
199
+ def compile_math_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
200
+ """
201
+ Compile mathematical function.
202
+
203
+ Args:
204
+ name: Function name
205
+ args: Compiled arguments
206
+
207
+ Returns:
208
+ Polars expression
209
+ """
210
+ if name == 'sqrt':
211
+ if len(args) != 1:
212
+ raise ValueError(f"Function 'sqrt' expects 1 argument, got {len(args)}")
213
+ return args[0].sqrt()
214
+
215
+ elif name == 'abs':
216
+ if len(args) != 1:
217
+ raise ValueError(f"Function 'abs' expects 1 argument, got {len(args)}")
218
+ return args[0].abs()
219
+
220
+ elif name == 'log':
221
+ if len(args) != 1:
222
+ raise ValueError(f"Function 'log' expects 1 argument, got {len(args)}")
223
+ return args[0].log()
224
+
225
+ elif name == 'log10':
226
+ if len(args) != 1:
227
+ raise ValueError(f"Function 'log10' expects 1 argument, got {len(args)}")
228
+ return args[0].log10()
229
+
230
+ elif name == 'exp':
231
+ if len(args) != 1:
232
+ raise ValueError(f"Function 'exp' expects 1 argument, got {len(args)}")
233
+ return args[0].exp()
234
+
235
+ elif name == 'pow':
236
+ if len(args) != 2:
237
+ raise ValueError(f"Function 'pow' expects 2 arguments, got {len(args)}")
238
+ return args[0].pow(args[1])
239
+
240
+ elif name == 'round':
241
+ if len(args) not in (1, 2):
242
+ raise ValueError(f"Function 'round' expects 1 or 2 arguments, got {len(args)}")
243
+ decimals = args[1] if len(args) == 2 else pl.lit(0)
244
+ return args[0].round(decimals)
245
+
246
+ elif name == 'floor':
247
+ if len(args) != 1:
248
+ raise ValueError(f"Function 'floor' expects 1 argument, got {len(args)}")
249
+ return args[0].floor()
250
+
251
+ elif name == 'ceil':
252
+ if len(args) != 1:
253
+ raise ValueError(f"Function 'ceil' expects 1 argument, got {len(args)}")
254
+ return args[0].ceil()
255
+
256
+ else:
257
+ raise ValueError(f"Unknown math function: {name}")
258
+
259
+ def compile_string_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
260
+ """
261
+ Compile string function.
262
+
263
+ Args:
264
+ name: Function name
265
+ args: Compiled arguments
266
+
267
+ Returns:
268
+ Polars expression
269
+ """
270
+ if name == 'lower':
271
+ if len(args) != 1:
272
+ raise ValueError(f"Function 'lower' expects 1 argument, got {len(args)}")
273
+ return args[0].str.to_lowercase()
274
+
275
+ elif name == 'upper':
276
+ if len(args) != 1:
277
+ raise ValueError(f"Function 'upper' expects 1 argument, got {len(args)}")
278
+ return args[0].str.to_uppercase()
279
+
280
+ elif name == 'trim':
281
+ if len(args) != 1:
282
+ raise ValueError(f"Function 'trim' expects 1 argument, got {len(args)}")
283
+ return args[0].str.strip_chars()
284
+
285
+ elif name == 'length':
286
+ if len(args) != 1:
287
+ raise ValueError(f"Function 'length' expects 1 argument, got {len(args)}")
288
+ return args[0].str.len_chars()
289
+
290
+ elif name == 'substring':
291
+ if len(args) != 3:
292
+ raise ValueError(f"Function 'substring' expects 3 arguments, got {len(args)}")
293
+ return args[0].str.slice(args[1], args[2])
294
+
295
+ elif name == 'replace':
296
+ if len(args) != 3:
297
+ raise ValueError(f"Function 'replace' expects 3 arguments, got {len(args)}")
298
+ return args[0].str.replace(args[1], args[2])
299
+
300
+ elif name == 'contains':
301
+ if len(args) != 2:
302
+ raise ValueError(f"Function 'contains' expects 2 arguments, got {len(args)}")
303
+ return args[0].str.contains(args[1])
304
+
305
+ elif name == 'matches':
306
+ if len(args) != 2:
307
+ raise ValueError(f"Function 'matches' expects 2 arguments, got {len(args)}")
308
+ return args[0].str.contains(args[1])
309
+
310
+ else:
311
+ raise ValueError(f"Unknown string function: {name}")
312
+
313
+ def compile_datetime_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
314
+ """
315
+ Compile datetime function.
316
+
317
+ Args:
318
+ name: Function name
319
+ args: Compiled arguments
320
+
321
+ Returns:
322
+ Polars expression
323
+ """
324
+ if name == 'year':
325
+ if len(args) != 1:
326
+ raise ValueError(f"Function 'year' expects 1 argument, got {len(args)}")
327
+ return args[0].dt.year()
328
+
329
+ elif name == 'month':
330
+ if len(args) != 1:
331
+ raise ValueError(f"Function 'month' expects 1 argument, got {len(args)}")
332
+ return args[0].dt.month()
333
+
334
+ elif name == 'day':
335
+ if len(args) != 1:
336
+ raise ValueError(f"Function 'day' expects 1 argument, got {len(args)}")
337
+ return args[0].dt.day()
338
+
339
+ elif name == 'hour':
340
+ if len(args) != 1:
341
+ raise ValueError(f"Function 'hour' expects 1 argument, got {len(args)}")
342
+ return args[0].dt.hour()
343
+
344
+ elif name == 'minute':
345
+ if len(args) != 1:
346
+ raise ValueError(f"Function 'minute' expects 1 argument, got {len(args)}")
347
+ return args[0].dt.minute()
348
+
349
+ elif name == 'second':
350
+ if len(args) != 1:
351
+ raise ValueError(f"Function 'second' expects 1 argument, got {len(args)}")
352
+ return args[0].dt.second()
353
+
354
+ elif name == 'day_of_week':
355
+ if len(args) != 1:
356
+ raise ValueError(f"Function 'day_of_week' expects 1 argument, got {len(args)}")
357
+ return args[0].dt.weekday()
358
+
359
+ elif name == 'time_of_day':
360
+ if len(args) != 1:
361
+ raise ValueError(f"Function 'time_of_day' expects 1 argument, got {len(args)}")
362
+ # Custom time_of_day: morning (6-12), afternoon (12-18), evening (18-22), night (22-6)
363
+ hour = args[0].dt.hour()
364
+ return (
365
+ pl.when((hour >= 6) & (hour < 12)).then(pl.lit('morning'))
366
+ .when((hour >= 12) & (hour < 18)).then(pl.lit('afternoon'))
367
+ .when((hour >= 18) & (hour < 22)).then(pl.lit('evening'))
368
+ .otherwise(pl.lit('night'))
369
+ )
370
+
371
+ else:
372
+ raise ValueError(f"Unknown datetime function: {name}")
373
+
374
+ def compile_aggregation_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
375
+ """
376
+ Compile aggregation function.
377
+
378
+ Args:
379
+ name: Function name
380
+ args: Compiled arguments
381
+
382
+ Returns:
383
+ Polars expression
384
+ """
385
+ if name == 'sum':
386
+ if len(args) != 1:
387
+ raise ValueError(f"Function 'sum' expects 1 argument, got {len(args)}")
388
+ return args[0].sum()
389
+
390
+ elif name == 'mean':
391
+ if len(args) != 1:
392
+ raise ValueError(f"Function 'mean' expects 1 argument, got {len(args)}")
393
+ return args[0].mean()
394
+
395
+ elif name == 'median':
396
+ if len(args) != 1:
397
+ raise ValueError(f"Function 'median' expects 1 argument, got {len(args)}")
398
+ return args[0].median()
399
+
400
+ elif name == 'min':
401
+ if len(args) != 1:
402
+ raise ValueError(f"Function 'min' expects 1 argument, got {len(args)}")
403
+ return args[0].min()
404
+
405
+ elif name == 'max':
406
+ if len(args) != 1:
407
+ raise ValueError(f"Function 'max' expects 1 argument, got {len(args)}")
408
+ return args[0].max()
409
+
410
+ elif name == 'count':
411
+ if len(args) not in (0, 1):
412
+ raise ValueError(f"Function 'count' expects 0 or 1 arguments, got {len(args)}")
413
+ if len(args) == 0:
414
+ return pl.count()
415
+ return args[0].count()
416
+
417
+ elif name == 'std':
418
+ if len(args) != 1:
419
+ raise ValueError(f"Function 'std' expects 1 argument, got {len(args)}")
420
+ return args[0].std()
421
+
422
+ else:
423
+ raise ValueError(f"Unknown aggregation function: {name}")
424
+
425
+ def compile_conditional_function(self, name: str, args: List[pl.Expr]) -> pl.Expr:
426
+ """
427
+ Compile conditional function.
428
+
429
+ Args:
430
+ name: Function name
431
+ args: Compiled arguments
432
+
433
+ Returns:
434
+ Polars expression
435
+ """
436
+ if name == 'if_else':
437
+ if len(args) != 3:
438
+ raise ValueError(f"Function 'if_else' expects 3 arguments, got {len(args)}")
439
+ return pl.when(args[0]).then(args[1]).otherwise(args[2])
440
+
441
+ elif name == 'coalesce':
442
+ if len(args) < 1:
443
+ raise ValueError(f"Function 'coalesce' expects at least 1 argument, got {len(args)}")
444
+ return pl.coalesce(*args)
445
+
446
+ elif name == 'is_null':
447
+ if len(args) != 1:
448
+ raise ValueError(f"Function 'is_null' expects 1 argument, got {len(args)}")
449
+ return args[0].is_null()
450
+
451
+ elif name == 'is_not_null':
452
+ if len(args) != 1:
453
+ raise ValueError(f"Function 'is_not_null' expects 1 argument, got {len(args)}")
454
+ return args[0].is_not_null()
455
+
456
+ else:
457
+ raise ValueError(f"Unknown conditional function: {name}")