PyPI - additory - Versions diffs - 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl - Mend

additory 0.1.0a3py3-none-any.whl → 0.1.1a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (120) hide show

additory/__init__.py +58 -14
additory/common/__init__.py +31 -147
additory/common/column_selector.py +255 -0
additory/common/distributions.py +286 -613
additory/common/extractors.py +313 -0
additory/common/knn_imputation.py +332 -0
additory/common/result.py +380 -0
additory/common/strategy_parser.py +243 -0
additory/common/unit_conversions.py +338 -0
additory/common/validation.py +283 -103
additory/core/__init__.py +34 -22
additory/core/backend.py +258 -0
additory/core/config.py +177 -305
additory/core/logging.py +230 -24
additory/core/memory_manager.py +157 -495
additory/expressions/__init__.py +2 -23
additory/expressions/compiler.py +457 -0
additory/expressions/engine.py +264 -487
additory/expressions/integrity.py +179 -0
additory/expressions/loader.py +263 -0
additory/expressions/parser.py +363 -167
additory/expressions/resolver.py +274 -0
additory/functions/__init__.py +1 -0
additory/functions/analyze/__init__.py +144 -0
additory/functions/analyze/cardinality.py +58 -0
additory/functions/analyze/correlations.py +66 -0
additory/functions/analyze/distributions.py +53 -0
additory/functions/analyze/duplicates.py +49 -0
additory/functions/analyze/features.py +61 -0
additory/functions/analyze/imputation.py +66 -0
additory/functions/analyze/outliers.py +65 -0
additory/functions/analyze/patterns.py +65 -0
additory/functions/analyze/presets.py +72 -0
additory/functions/analyze/quality.py +59 -0
additory/functions/analyze/timeseries.py +53 -0
additory/functions/analyze/types.py +45 -0
additory/functions/expressions/__init__.py +161 -0
additory/functions/snapshot/__init__.py +82 -0
additory/functions/snapshot/filter.py +119 -0
additory/functions/synthetic/__init__.py +113 -0
additory/functions/synthetic/mode_detector.py +47 -0
additory/functions/synthetic/strategies/__init__.py +1 -0
additory/functions/synthetic/strategies/advanced.py +35 -0
additory/functions/synthetic/strategies/augmentative.py +160 -0
additory/functions/synthetic/strategies/generative.py +168 -0
additory/functions/synthetic/strategies/presets.py +116 -0
additory/functions/to/__init__.py +188 -0
additory/functions/to/lookup.py +351 -0
additory/functions/to/merge.py +189 -0
additory/functions/to/sort.py +91 -0
additory/functions/to/summarize.py +170 -0
additory/functions/transform/__init__.py +140 -0
additory/functions/transform/datetime.py +79 -0
additory/functions/transform/extract.py +85 -0
additory/functions/transform/harmonize.py +105 -0
additory/functions/transform/knn.py +62 -0
additory/functions/transform/onehotencoding.py +68 -0
additory/functions/transform/transpose.py +42 -0
additory-0.1.1a1.dist-info/METADATA +83 -0
additory-0.1.1a1.dist-info/RECORD +62 -0
additory/analysis/__init__.py +0 -48
additory/analysis/cardinality.py +0 -126
additory/analysis/correlations.py +0 -124
additory/analysis/distributions.py +0 -376
additory/analysis/quality.py +0 -158
additory/analysis/scan.py +0 -400
additory/common/backend.py +0 -371
additory/common/column_utils.py +0 -191
additory/common/exceptions.py +0 -62
additory/common/lists.py +0 -229
additory/common/patterns.py +0 -240
additory/common/resolver.py +0 -567
additory/common/sample_data.py +0 -182
additory/core/ast_builder.py +0 -165
additory/core/backends/__init__.py +0 -23
additory/core/backends/arrow_bridge.py +0 -483
additory/core/backends/cudf_bridge.py +0 -355
additory/core/column_positioning.py +0 -358
additory/core/compiler_polars.py +0 -166
additory/core/enhanced_cache_manager.py +0 -1119
additory/core/enhanced_matchers.py +0 -473
additory/core/enhanced_version_manager.py +0 -325
additory/core/executor.py +0 -59
additory/core/integrity_manager.py +0 -477
additory/core/loader.py +0 -190
additory/core/namespace_manager.py +0 -657
additory/core/parser.py +0 -176
additory/core/polars_expression_engine.py +0 -601
additory/core/registry.py +0 -176
additory/core/sample_data_manager.py +0 -492
additory/core/user_namespace.py +0 -751
additory/core/validator.py +0 -27
additory/dynamic_api.py +0 -304
additory/expressions/proxy.py +0 -549
additory/expressions/registry.py +0 -313
additory/expressions/samples.py +0 -492
additory/synthetic/__init__.py +0 -13
additory/synthetic/column_name_resolver.py +0 -149
additory/synthetic/distributions.py +0 -22
additory/synthetic/forecast.py +0 -1132
additory/synthetic/linked_list_parser.py +0 -415
additory/synthetic/namespace_lookup.py +0 -129
additory/synthetic/smote.py +0 -320
additory/synthetic/strategies.py +0 -850
additory/synthetic/synthesizer.py +0 -713
additory/utilities/__init__.py +0 -53
additory/utilities/encoding.py +0 -600
additory/utilities/games.py +0 -300
additory/utilities/keys.py +0 -8
additory/utilities/lookup.py +0 -103
additory/utilities/matchers.py +0 -216
additory/utilities/resolvers.py +0 -286
additory/utilities/settings.py +0 -167
additory/utilities/units.py +0 -749
additory/utilities/validators.py +0 -153
additory-0.1.0a3.dist-info/METADATA +0 -288
additory-0.1.0a3.dist-info/RECORD +0 -71
additory-0.1.0a3.dist-info/licenses/LICENSE +0 -21
{additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
{additory-0.1.0a3.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0

additory/expressions/engine.py CHANGED Viewed

@@ -1,551 +1,328 @@
-# polars_expression_engine.py
-# Polars-only expression processing engine for enhanced expressions system
+"""
+Core expression evaluation engine for Additory.
-import polars as pl
-from typing import Any, Dict, Optional, Union
-from dataclasses import dataclass
-from datetime import datetime
-from .enhanced_arrow_bridge import EnhancedArrowBridge, ArrowBridgeError
-from .ast_builder import build_ast_from_expression
-from .logging import log_info, log_warning
-from .memory_manager import get_memory_manager
+Main engine that ties together parser, compiler, loader, resolver, and integrity.
+"""
+import polars as pl
+from typing import Dict, List, Optional, Tuple
+from pathlib import Path
-@dataclass
-class ExpressionResult:
-    """Result of expression execution"""
-    dataframe: Any
-    execution_time_ms: float
-    rows_processed: int
-    columns_processed: int
-    backend_type: str
-    memory_used_mb: float
+from additory.expressions.parser import ExpressionParser
+from additory.expressions.compiler import ExpressionCompiler
+from additory.expressions.loader import load_expressions_from_file
+from additory.expressions.resolver import resolve_dependencies, check_circular_dependencies
+from additory.expressions.integrity import verify_sha
+from additory.core.logging import Logger
-class PolarsExpressionError(Exception):
-    """Raised when Polars expression processing fails"""
-    pass
+# Global engine instance
+_engine_instance: Optional['ExpressionEngine'] = None
-class PolarsExpressionEngine:
-    """Exclusive Polars-based expression processing engine"""
+class ExpressionEngine:
+    """
+    Main expression evaluation engine.
+    Singleton class that manages expression loading, parsing, compilation, and evaluation.
+    """
     def __init__(self):
-        self.arrow_bridge = EnhancedArrowBridge()
-        self.execution_stats = {
-            "total_executions": 0,
-            "total_time_ms": 0.0,
-            "total_rows_processed": 0,
-            "errors": 0
-        }
+        """Initialize expression engine."""
+        self.loaded_expressions: Dict[str, Dict] = {}
+        self.parser = ExpressionParser()
+        self.compiler = ExpressionCompiler()
+        self.logger = Logger()
-        # Register with memory manager for cleanup
-        self.memory_manager = get_memory_manager()
-        self.memory_manager.register_cleanup_callback(self._cleanup_callback)
+        # Load built-in expressions
+        self._load_inbuilt_expressions()
-    def execute_expression(self, df: Any, expression: str, output_column: str,
-                         backend_type: Optional[str] = None) -> ExpressionResult:
-        """
-        Execute expression using Polars exclusively
-        Args:
-            df: Input dataframe (any supported backend)
-            expression: Expression string to execute
-            output_column: Name for the output column
-            backend_type: Source backend type (auto-detected if None)
-        Returns:
-            ExpressionResult with processed dataframe and statistics
-        Raises:
-            PolarsExpressionError: If expression execution fails
-        """
-        start_time = datetime.now()
+    def _load_inbuilt_expressions(self):
+        """Load built-in expressions from bundled .add files."""
+        # Get the inbuilt expressions directory
+        # This would be in the package: additory/inbuilt_expressions/
+        inbuilt_dir = Path(__file__).parent.parent / 'inbuilt_expressions'
-        # Use memory context for monitoring
-        with self.memory_manager.memory_context(f"expression: {expression[:50]}..."):
-            try:
-                # Auto-detect backend if not specified
-                if backend_type is None:
-                    backend_type = self.arrow_bridge.detect_backend(df)
-                # Get memory usage before processing
-                memory_before = self.arrow_bridge._get_memory_usage_mb()
-                # 1. Convert input to Arrow
-                log_info(f"[polars_engine] Converting {backend_type} to Arrow")
-                arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
-                # 2. Convert Arrow to Polars
-                log_info("[polars_engine] Converting Arrow to Polars")
-                polars_df = pl.from_arrow(arrow_table)
-                # 3. Execute expression in Polars
-                log_info(f"[polars_engine] Executing expression: {expression}")
-                result_df = self._execute_polars_expression(
-                    polars_df, expression, output_column
-                )
-                # 4. Convert back to Arrow
-                log_info("[polars_engine] Converting result to Arrow")
-                result_arrow = result_df.to_arrow()
-                # 5. Convert to original backend format
-                log_info(f"[polars_engine] Converting Arrow to {backend_type}")
-                final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
-                # Calculate execution statistics
-                execution_time = (datetime.now() - start_time).total_seconds() * 1000
-                memory_after = self.arrow_bridge._get_memory_usage_mb()
-                memory_used = max(0, memory_after - memory_before)
-                # Update global statistics
-                self.execution_stats["total_executions"] += 1
-                self.execution_stats["total_time_ms"] += execution_time
-                self.execution_stats["total_rows_processed"] += result_df.height
-                log_info(f"[polars_engine] Expression executed successfully in {execution_time:.1f}ms")
-                return ExpressionResult(
-                    dataframe=final_result,
-                    execution_time_ms=execution_time,
-                    rows_processed=result_df.height,
-                    columns_processed=result_df.width,
-                    backend_type=backend_type,
-                    memory_used_mb=memory_used
-                )
-            except Exception as e:
-                self.execution_stats["errors"] += 1
-                raise PolarsExpressionError(f"Expression execution failed: {e}")
-            finally:
-                # 6. Always cleanup Arrow memory
-                self.arrow_bridge.cleanup_arrow_memory()
+        if inbuilt_dir.exists():
+            self.load_namespace('inbuilt', str(inbuilt_dir))
-    def _execute_polars_expression(self, polars_df: pl.DataFrame,
-                                 expression: str, output_column: str) -> pl.DataFrame:
+    def evaluate(self, df: pl.DataFrame, expression: str) -> pl.Series:
         """
-        Execute expression AST in Polars
+        Evaluate expression and return result.
         Args:
-            polars_df: Input Polars DataFrame
-            expression: Expression string
-            output_column: Name for output column
+            df: DataFrame to evaluate expression on
+            expression: Expression string (inline or reference)
         Returns:
-            Polars DataFrame with new column
-        Raises:
-            PolarsExpressionError: If expression execution fails
-        """
-        try:
-            # Clean up multiline expressions
-            cleaned_expression = ' '.join(line.strip() for line in expression.strip().split('\n') if line.strip())
-            # Build AST from expression
-            ast_tree = build_ast_from_expression(cleaned_expression)
-            if ast_tree is None:
-                raise PolarsExpressionError(f"Failed to parse expression: {expression}")
+            Polars Series with result
-            # Convert AST to Polars expression
-            polars_expr = self._ast_to_polars_expr(ast_tree)
+        Example:
+            # Inline expression
+            result = engine.evaluate(df, 'weight / (height ** 2)')
-            # Execute expression and add as new column
-            result_df = polars_df.with_columns([
-                polars_expr.alias(output_column)
-            ])
-            return result_df
-        except Exception as e:
-            raise PolarsExpressionError(f"Polars expression execution failed: {e}")
-    def _ast_to_polars_expr(self, ast_node: Dict[str, Any]) -> pl.Expr:
+            # Reference expression
+            result = engine.evaluate(df, 'inbuilt:bmi')
         """
-        Convert expression AST to Polars expression
+        # Check if this is a reference or inline expression
+        if is_reference(expression):
+            # Parse reference
+            namespace, name = parse_expression_reference(expression)
+            # Get expression definition
+            expr_def = self.get_expression(f"{namespace}:{name}")
+            # Get expression string
+            expr_string = expr_def['expression']
+            # Verify SHA integrity
+            if 'sha' in expr_def and expr_def['sha']:
+                is_valid = verify_sha(expr_string, expr_def['sha'])
+                if not is_valid:
+                    self.logger.warning(
+                        f"Expression '{name}' in namespace '{namespace}' failed integrity check"
+                    )
+            # Log evaluation
+            self.logger.info(f"Evaluating expression: {namespace}:{name}")
+        else:
+            # Inline expression
+            expr_string = expression
+            self.logger.info(f"Evaluating inline expression")
-        Args:
-            ast_node: AST node dictionary
-        Returns:
-            Polars expression
-        Raises:
-            PolarsExpressionError: If AST conversion fails
-        """
-        try:
-            node_type = ast_node.get("type")
-            if node_type == "column":
-                return pl.col(ast_node["name"])
-            elif node_type == "literal":
-                return pl.lit(ast_node["value"])
-            elif node_type == "binary":
-                left = self._ast_to_polars_expr(ast_node["left"])
-                right = self._ast_to_polars_expr(ast_node["right"])
-                op = ast_node["op"]
-                if op == "+":
-                    return left + right
-                elif op == "-":
-                    return left - right
-                elif op == "*":
-                    return left * right
-                elif op == "/":
-                    return left / right
-                elif op == "**":
-                    return left ** right
-                elif op == "%":
-                    return left % right
-                elif op == "//":
-                    return left // right
-                else:
-                    raise PolarsExpressionError(f"Unsupported binary operator: {op}")
-            elif node_type == "cmp":
-                left = self._ast_to_polars_expr(ast_node["left"])
-                right = self._ast_to_polars_expr(ast_node["right"])
-                op = ast_node["op"]
-                if op == "==":
-                    return left == right
-                elif op == "!=":
-                    return left != right
-                elif op == ">":
-                    return left > right
-                elif op == "<":
-                    return left < right
-                elif op == ">=":
-                    return left >= right
-                elif op == "<=":
-                    return left <= right
-                else:
-                    raise PolarsExpressionError(f"Unsupported comparison operator: {op}")
-            elif node_type == "bool_op":
-                op = ast_node["op"]
-                values = [self._ast_to_polars_expr(v) for v in ast_node["values"]]
-                if op == "and":
-                    result = values[0]
-                    for v in values[1:]:
-                        result = result & v
-                    return result
-                elif op == "or":
-                    result = values[0]
-                    for v in values[1:]:
-                        result = result | v
-                    return result
-                else:
-                    raise PolarsExpressionError(f"Unsupported boolean operator: {op}")
-            elif node_type == "unary_bool":
-                op = ast_node["op"]
-                value = self._ast_to_polars_expr(ast_node["value"])
-                if op == "not":
-                    return ~value
-                else:
-                    raise PolarsExpressionError(f"Unsupported unary boolean operator: {op}")
-            elif node_type == "if_expr":
-                # Ternary: a if cond else b
-                cond = self._ast_to_polars_expr(ast_node["cond"])
-                then_expr = self._ast_to_polars_expr(ast_node["then"])
-                else_expr = self._ast_to_polars_expr(ast_node["else"])
-                return pl.when(cond).then(then_expr).otherwise(else_expr)
-            elif node_type == "call":
-                # Function calls
-                func_name = ast_node["name"]
-                args = [self._ast_to_polars_expr(arg) for arg in ast_node["args"]]
-                return self._handle_function_call(func_name, args)
-            else:
-                raise PolarsExpressionError(f"Unsupported AST node type: {node_type}")
-        except Exception as e:
-            raise PolarsExpressionError(f"AST to Polars conversion failed: {e}")
+        # Parse expression to AST
+        ast = self.parser.parse(expr_string)
+        # Compile AST to Polars expression
+        polars_expr = self.compiler.compile(ast, df)
+        # Execute and return result
+        result = df.select(polars_expr.alias('result'))['result']
+        return result
-    def _handle_function_call(self, func_name: str, args: list) -> pl.Expr:
+    def load_namespace(self, namespace: str, folder_path: str):
         """
-        Handle function calls in expressions
+        Load expressions from a namespace folder.
         Args:
-            func_name: Name of the function
-            args: List of Polars expressions as arguments
-        Returns:
-            Polars expression for the function call
+            namespace: Namespace name
+            folder_path: Path to folder containing .add files
-        Raises:
-            PolarsExpressionError: If function is not supported
+        Example:
+            engine.load_namespace('inbuilt', '/path/to/inbuilt_expressions')
         """
-        if func_name == "min":
-            if len(args) == 1:
-                return args[0].min()
-            else:
-                # Element-wise minimum of multiple expressions
-                result = args[0]
-                for arg in args[1:]:
-                    result = pl.min_horizontal([result, arg])
-                return result
-        elif func_name == "max":
-            if len(args) == 1:
-                return args[0].max()
-            else:
-                # Element-wise maximum of multiple expressions
-                result = args[0]
-                for arg in args[1:]:
-                    result = pl.max_horizontal([result, arg])
-                return result
-        elif func_name == "abs":
-            if len(args) != 1:
-                raise PolarsExpressionError("abs() requires exactly 1 argument")
-            return args[0].abs()
-        elif func_name == "log":
-            if len(args) == 1:
-                return args[0].log()
-            elif len(args) == 2:
-                # log(value, base)
-                return args[0].log() / args[1].log()
-            else:
-                raise PolarsExpressionError("log() requires 1 or 2 arguments")
+        folder = Path(folder_path)
-        elif func_name == "exp":
-            if len(args) != 1:
-                raise PolarsExpressionError("exp() requires exactly 1 argument")
-            return args[0].exp()
+        if not folder.exists():
+            self.logger.warning(f"Namespace folder not found: {folder_path}")
+            return
-        elif func_name == "sqrt":
-            if len(args) != 1:
-                raise PolarsExpressionError("sqrt() requires exactly 1 argument")
-            return args[0].sqrt()
+        # Find all .add files
+        add_files = list(folder.glob('*.add'))
-        elif func_name == "pow":
-            if len(args) != 2:
-                raise PolarsExpressionError("pow() requires exactly 2 arguments")
-            return args[0] ** args[1]
+        if not add_files:
+            self.logger.info(f"No .add files found in {folder_path}")
+            return
-        elif func_name == "round":
-            if len(args) == 1:
-                return args[0].round(0)
-            elif len(args) == 2:
-                # For round with decimals, the second argument must be a literal integer
-                if hasattr(args[1], 'meta') and hasattr(args[1].meta, 'output_name'):
-                    # This is a column reference, not a literal
-                    raise PolarsExpressionError("round() decimals parameter must be a literal integer")
-                return args[0].round(args[1])
-            else:
-                raise PolarsExpressionError("round() requires 1 or 2 arguments")
-        elif func_name == "floor":
-            if len(args) != 1:
-                raise PolarsExpressionError("floor() requires exactly 1 argument")
-            return args[0].floor()
-        elif func_name == "ceil":
-            if len(args) != 1:
-                raise PolarsExpressionError("ceil() requires exactly 1 argument")
-            return args[0].ceil()
+        # Load expressions from each file
+        loaded_count = 0
+        for add_file in add_files:
+            try:
+                expressions = load_expressions_from_file(str(add_file), namespace)
+                # Store expressions
+                for name, expr_def in expressions.items():
+                    # Create full reference
+                    full_ref = f"{namespace}:{name}"
+                    # Check for duplicates
+                    if full_ref in self.loaded_expressions:
+                        self.logger.warning(
+                            f"Duplicate expression '{name}' in namespace '{namespace}' "
+                            f"(from {add_file.name})"
+                        )
+                        continue
+                    # Add source file info
+                    expr_def['source_file'] = add_file.name
+                    # Store expression
+                    self.loaded_expressions[full_ref] = expr_def
+                    loaded_count += 1
+            except Exception as e:
+                self.logger.error(f"Error loading {add_file.name}: {str(e)}")
-        else:
-            raise PolarsExpressionError(f"Unsupported function: {func_name}")
+        self.logger.info(
+            f"Loaded {loaded_count} expressions from namespace '{namespace}'"
+        )
-    def execute_with_ast(self, df: Any, ast_tree: Dict[str, Any], output_column: str,
-                        backend_type: Optional[str] = None) -> ExpressionResult:
+    def get_expression(self, reference: str) -> Dict:
         """
-        Execute expression using pre-built AST
+        Get expression definition from reference.
         Args:
-            df: Input dataframe
-            ast_tree: Pre-built AST tree
-            output_column: Name for output column
-            backend_type: Source backend type
+            reference: Expression reference ('inbuilt:bmi', 'myfolder:roi')
         Returns:
-            ExpressionResult with processed dataframe
-        """
-        start_time = datetime.now()
-        try:
-            # Auto-detect backend if not specified
-            if backend_type is None:
-                backend_type = self.arrow_bridge.detect_backend(df)
-            # Get memory usage before processing
-            memory_before = self.arrow_bridge._get_memory_usage_mb()
-            # Convert to Polars via Arrow
-            arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
-            polars_df = pl.from_arrow(arrow_table)
-            # Execute using AST
-            polars_expr = self._ast_to_polars_expr(ast_tree)
-            result_df = polars_df.with_columns([polars_expr.alias(output_column)])
+            Dictionary with expression definition
-            # Convert back to original format
-            result_arrow = result_df.to_arrow()
-            final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
-            # Calculate statistics
-            execution_time = (datetime.now() - start_time).total_seconds() * 1000
-            memory_after = self.arrow_bridge._get_memory_usage_mb()
-            memory_used = max(0, memory_after - memory_before)
-            # Update statistics
-            self.execution_stats["total_executions"] += 1
-            self.execution_stats["total_time_ms"] += execution_time
-            self.execution_stats["total_rows_processed"] += result_df.height
+        Raises:
+            ValueError: If expression not found
-            return ExpressionResult(
-                dataframe=final_result,
-                execution_time_ms=execution_time,
-                rows_processed=result_df.height,
-                columns_processed=result_df.width,
-                backend_type=backend_type,
-                memory_used_mb=memory_used
+        Example:
+            expr_def = engine.get_expression('inbuilt:bmi')
+        """
+        if reference not in self.loaded_expressions:
+            raise ValueError(
+                f"Expression '{reference}' not found. "
+                f"Available expressions: {list(self.loaded_expressions.keys())}"
             )
-        except Exception as e:
-            self.execution_stats["errors"] += 1
-            raise PolarsExpressionError(f"AST execution failed: {e}")
-        finally:
-            self.arrow_bridge.cleanup_arrow_memory()
+        return self.loaded_expressions[reference]
-    def validate_expression(self, expression: str) -> bool:
+    def list_expressions(self, namespace: Optional[str] = None) -> List[Dict]:
         """
-        Validate expression syntax without executing
+        List all available expressions.
         Args:
-            expression: Expression string to validate
+            namespace: Filter by namespace (None = all)
         Returns:
-            True if expression is valid
-        """
-        try:
-            # Clean up multiline expressions
-            cleaned_expression = ' '.join(line.strip() for line in expression.strip().split('\n') if line.strip())
+            List of expression dictionaries
-            ast_tree = build_ast_from_expression(cleaned_expression)
-            if ast_tree is None:
-                return False
+        Example:
+            # List all expressions
+            all_exprs = engine.list_expressions()
-            # Try to convert AST to Polars expression (dry run)
-            # This will catch unsupported functions and operators
-            self._ast_to_polars_expr(ast_tree)
-            return True
+            # List only inbuilt
+            inbuilt = engine.list_expressions('inbuilt')
+        """
+        if namespace is None:
+            return list(self.loaded_expressions.values())
+        # Filter by namespace
+        return [
+            expr_def for ref, expr_def in self.loaded_expressions.items()
+            if ref.startswith(f"{namespace}:")
+        ]
+    def reload_custom_namespace(self):
+        """
+        Reload custom namespace expressions.
+        Reloads all .add files from custom folder.
+        """
+        from additory.core.config import Config
+        config = Config()
+        custom_folder = config.get_expressions_folder()
+        if custom_folder:
+            # Clear existing custom expressions
+            self.clear_custom_namespace()
-        except Exception as e:
-            log_warning(f"[polars_engine] Expression validation failed: {e}")
-            return False
+            # Reload
+            self.load_namespace('user', custom_folder)
-    def get_execution_stats(self) -> Dict[str, Any]:
-        """Get execution statistics"""
-        stats = self.execution_stats.copy()
+    def clear_custom_namespace(self):
+        """Clear custom namespace."""
+        # Remove all expressions that don't start with 'inbuilt:'
+        to_remove = [
+            ref for ref in self.loaded_expressions.keys()
+            if not ref.startswith('inbuilt:')
+        ]
-        if stats["total_executions"] > 0:
-            stats["avg_time_ms"] = stats["total_time_ms"] / stats["total_executions"]
-            stats["avg_rows_per_execution"] = stats["total_rows_processed"] / stats["total_executions"]
-        else:
-            stats["avg_time_ms"] = 0.0
-            stats["avg_rows_per_execution"] = 0
+        for ref in to_remove:
+            del self.loaded_expressions[ref]
+        self.logger.info(f"Cleared {len(to_remove)} custom expressions")
+def get_engine() -> ExpressionEngine:
+    """
+    Get the global expression engine instance.
+    Returns:
+        Global ExpressionEngine instance
-        return stats
+    Example:
+        engine = get_engine()
+        result = engine.evaluate(df, 'inbuilt:bmi')
+    """
+    global _engine_instance
-    def reset_stats(self):
-        """Reset execution statistics"""
-        self.execution_stats = {
-            "total_executions": 0,
-            "total_time_ms": 0.0,
-            "total_rows_processed": 0,
-            "errors": 0
-        }
-        log_info("[polars_engine] Statistics reset")
+    if _engine_instance is None:
+        _engine_instance = ExpressionEngine()
-    def benchmark_expression(self, df: Any, expression: str, output_column: str,
-                           iterations: int = 3) -> Dict[str, Any]:
-        """
-        Benchmark expression execution performance
+    return _engine_instance
+def parse_expression_reference(expression: str) -> Tuple[str, str]:
+    """
+    Parse expression reference into namespace and name.
+    Args:
+        expression: Expression string
-        Args:
-            df: Input dataframe
-            expression: Expression to benchmark
-            output_column: Output column name
-            iterations: Number of iterations
-        Returns:
-            Benchmark results
-        """
-        times = []
-        backend_type = self.arrow_bridge.detect_backend(df)
+    Returns:
+        Tuple of (namespace, name)
-        for i in range(iterations):
-            try:
-                result = self.execute_expression(df, expression, output_column, backend_type)
-                times.append(result.execution_time_ms)
-            except Exception as e:
-                log_warning(f"[polars_engine] Benchmark iteration {i+1} failed: {e}")
-                continue
+    Raises:
+        ValueError: If not a valid reference
-        if not times:
-            return {"error": "All benchmark iterations failed"}
+    Example:
+        namespace, name = parse_expression_reference('inbuilt:bmi')
+        # Returns: ('inbuilt', 'bmi')
+    """
+    if ':' not in expression:
+        raise ValueError(f"Invalid expression reference: {expression}")
+    parts = expression.split(':', 1)
+    namespace = parts[0]
+    name = parts[1]
+    return namespace, name
+def is_reference(expression: str) -> bool:
+    """
+    Check if expression is a reference (not inline).
+    Args:
+        expression: Expression string
-        return {
-            "expression": expression,
-            "backend_type": backend_type,
-            "iterations": len(times),
-            "min_time_ms": min(times),
-            "max_time_ms": max(times),
-            "avg_time_ms": sum(times) / len(times),
-            "total_time_ms": sum(times)
-        }
+    Returns:
+        True if reference, False if inline
+    Example:
+        is_reference('inbuilt:bmi')  # True
+        is_reference('weight / height')  # False
+    """
+    # A reference has the format: namespace:name
+    # It should have exactly one colon and no spaces before the colon
+    if ':' not in expression:
+        return False
-    def get_supported_functions(self) -> list:
-        """Get list of supported functions"""
-        return [
-            "min", "max", "abs", "log", "exp", "sqrt", "pow",
-            "round", "floor", "ceil"
-        ]
+    # Check if it looks like a reference (namespace:name)
+    parts = expression.split(':', 1)
+    if len(parts) != 2:
+        return False
-    def get_supported_operators(self) -> Dict[str, list]:
-        """Get list of supported operators by category"""
-        return {
-            "arithmetic": ["+", "-", "*", "/", "**", "%", "//"],
-            "comparison": ["==", "!=", ">", "<", ">=", "<="],
-            "boolean": ["and", "or", "not"],
-            "conditional": ["if_else"]
-        }
+    namespace = parts[0].strip()
+    name = parts[1].strip()
-    def _cleanup_callback(self):
-        """Cleanup callback for memory manager"""
-        try:
-            # Cleanup Arrow bridge memory
-            self.arrow_bridge.cleanup_arrow_memory()
-            # Reset statistics if they get too large
-            if self.execution_stats["total_executions"] > 10000:
-                log_info("[polars_engine] Resetting statistics due to high execution count")
-                self.reset_stats()
-        except Exception as e:
-            log_warning(f"[polars_engine] Cleanup callback failed: {e}")
+    # Namespace and name should be valid identifiers (no spaces, operators, etc.)
+    if not namespace or not name:
+        return False
+    # Check if namespace looks like an identifier
+    if not namespace.replace('_', '').isalnum():
+        return False
+    # Check if name looks like an identifier
+    if not name.replace('_', '').isalnum():
+        return False
-    def __del__(self):
-        """Cleanup when engine is destroyed"""
-        try:
-            if hasattr(self, 'memory_manager'):
-                self.memory_manager.unregister_cleanup_callback(self._cleanup_callback)
-        except Exception:
-            pass
+    return True

additory 0.1.0a3__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

additory 0.1.0a3py3-none-any.whl → 0.1.1a1py3-none-any.whl