PyPI - additory - Versions diffs - 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl - Mend

additory 0.1.0a4py3-none-any.whl → 0.1.1a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

additory/__init__.py +58 -14
additory/common/__init__.py +31 -147
additory/common/column_selector.py +255 -0
additory/common/distributions.py +286 -613
additory/common/extractors.py +313 -0
additory/common/knn_imputation.py +332 -0
additory/common/result.py +380 -0
additory/common/strategy_parser.py +243 -0
additory/common/unit_conversions.py +338 -0
additory/common/validation.py +283 -103
additory/core/__init__.py +34 -22
additory/core/backend.py +258 -0
additory/core/config.py +177 -305
additory/core/logging.py +230 -24
additory/core/memory_manager.py +157 -495
additory/expressions/__init__.py +2 -23
additory/expressions/compiler.py +457 -0
additory/expressions/engine.py +264 -487
additory/expressions/integrity.py +179 -0
additory/expressions/loader.py +263 -0
additory/expressions/parser.py +363 -167
additory/expressions/resolver.py +274 -0
additory/functions/__init__.py +1 -0
additory/functions/analyze/__init__.py +144 -0
additory/functions/analyze/cardinality.py +58 -0
additory/functions/analyze/correlations.py +66 -0
additory/functions/analyze/distributions.py +53 -0
additory/functions/analyze/duplicates.py +49 -0
additory/functions/analyze/features.py +61 -0
additory/functions/analyze/imputation.py +66 -0
additory/functions/analyze/outliers.py +65 -0
additory/functions/analyze/patterns.py +65 -0
additory/functions/analyze/presets.py +72 -0
additory/functions/analyze/quality.py +59 -0
additory/functions/analyze/timeseries.py +53 -0
additory/functions/analyze/types.py +45 -0
additory/functions/expressions/__init__.py +161 -0
additory/functions/snapshot/__init__.py +82 -0
additory/functions/snapshot/filter.py +119 -0
additory/functions/synthetic/__init__.py +113 -0
additory/functions/synthetic/mode_detector.py +47 -0
additory/functions/synthetic/strategies/__init__.py +1 -0
additory/functions/synthetic/strategies/advanced.py +35 -0
additory/functions/synthetic/strategies/augmentative.py +160 -0
additory/functions/synthetic/strategies/generative.py +168 -0
additory/functions/synthetic/strategies/presets.py +116 -0
additory/functions/to/__init__.py +188 -0
additory/functions/to/lookup.py +351 -0
additory/functions/to/merge.py +189 -0
additory/functions/to/sort.py +91 -0
additory/functions/to/summarize.py +170 -0
additory/functions/transform/__init__.py +140 -0
additory/functions/transform/datetime.py +79 -0
additory/functions/transform/extract.py +85 -0
additory/functions/transform/harmonize.py +105 -0
additory/functions/transform/knn.py +62 -0
additory/functions/transform/onehotencoding.py +68 -0
additory/functions/transform/transpose.py +42 -0
additory-0.1.1a1.dist-info/METADATA +83 -0
additory-0.1.1a1.dist-info/RECORD +62 -0
additory/analysis/__init__.py +0 -48
additory/analysis/cardinality.py +0 -126
additory/analysis/correlations.py +0 -124
additory/analysis/distributions.py +0 -376
additory/analysis/quality.py +0 -158
additory/analysis/scan.py +0 -400
additory/common/backend.py +0 -371
additory/common/column_utils.py +0 -191
additory/common/exceptions.py +0 -62
additory/common/lists.py +0 -229
additory/common/patterns.py +0 -240
additory/common/resolver.py +0 -567
additory/common/sample_data.py +0 -182
additory/core/ast_builder.py +0 -165
additory/core/backends/__init__.py +0 -23
additory/core/backends/arrow_bridge.py +0 -483
additory/core/backends/cudf_bridge.py +0 -355
additory/core/column_positioning.py +0 -358
additory/core/compiler_polars.py +0 -166
additory/core/enhanced_cache_manager.py +0 -1119
additory/core/enhanced_matchers.py +0 -473
additory/core/enhanced_version_manager.py +0 -325
additory/core/executor.py +0 -59
additory/core/integrity_manager.py +0 -477
additory/core/loader.py +0 -190
additory/core/namespace_manager.py +0 -657
additory/core/parser.py +0 -176
additory/core/polars_expression_engine.py +0 -601
additory/core/registry.py +0 -177
additory/core/sample_data_manager.py +0 -492
additory/core/user_namespace.py +0 -751
additory/core/validator.py +0 -27
additory/dynamic_api.py +0 -352
additory/expressions/proxy.py +0 -549
additory/expressions/registry.py +0 -313
additory/expressions/samples.py +0 -492
additory/synthetic/__init__.py +0 -13
additory/synthetic/column_name_resolver.py +0 -149
additory/synthetic/deduce.py +0 -259
additory/synthetic/distributions.py +0 -22
additory/synthetic/forecast.py +0 -1132
additory/synthetic/linked_list_parser.py +0 -415
additory/synthetic/namespace_lookup.py +0 -129
additory/synthetic/smote.py +0 -320
additory/synthetic/strategies.py +0 -926
additory/synthetic/synthesizer.py +0 -713
additory/utilities/__init__.py +0 -53
additory/utilities/encoding.py +0 -600
additory/utilities/games.py +0 -300
additory/utilities/keys.py +0 -8
additory/utilities/lookup.py +0 -103
additory/utilities/matchers.py +0 -216
additory/utilities/resolvers.py +0 -286
additory/utilities/settings.py +0 -167
additory/utilities/units.py +0 -749
additory/utilities/validators.py +0 -153
additory-0.1.0a4.dist-info/METADATA +0 -311
additory-0.1.0a4.dist-info/RECORD +0 -72
additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
{additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
{additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0

additory/core/column_positioning.py DELETED Viewed

@@ -1,358 +0,0 @@
-# additory/core/column_positioning.py
-"""
-Column Positioning System for Smart Column Insertion
-This module provides intelligent column positioning capabilities for the add.to() function.
-Users can specify where new columns should be inserted in the target dataframe.
-Supported positioning options:
-- "end": Append at end (default)
-- "start": Insert at beginning
-- int: 0-based index position
-- "after:col_name": After specific column
-- "before:col_name": Before specific column
-Design Philosophy:
-- Intuitive positioning syntax
-- Robust error handling with helpful suggestions
-- Preserve dataframe structure and types
-- Support for multiple new columns with smart placement
-"""
-import logging
-from typing import Union, List, Optional, Dict, Any
-import pandas as pd
-logger = logging.getLogger(__name__)
-class ColumnPositioner:
-    """
-    Handles intelligent column positioning for dataframes
-    """
-    def __init__(self):
-        self._positioning_stats = {
-            'total_operations': 0,
-            'end_insertions': 0,
-            'start_insertions': 0,
-            'index_insertions': 0,
-            'relative_insertions': 0,
-            'errors_handled': 0
-        }
-    def position_columns(self,
-                        df: pd.DataFrame,
-                        new_columns: List[str],
-                        position: Union[str, int] = "end") -> pd.DataFrame:
-        """
-        Insert new columns at specified position in dataframe
-        Args:
-            df: Target dataframe with new columns already added at the end
-            new_columns: List of column names that were just added
-            position: Where to position the new columns
-        Returns:
-            DataFrame with columns repositioned as requested
-        """
-        self._positioning_stats['total_operations'] += 1
-        # Validate inputs
-        if not new_columns:
-            logger.warning("No new columns specified for positioning")
-            return df
-        # Check that new columns exist in dataframe
-        missing_cols = [col for col in new_columns if col not in df.columns]
-        if missing_cols:
-            logger.error(f"New columns not found in dataframe: {missing_cols}")
-            self._positioning_stats['errors_handled'] += 1
-            return df
-        # Handle different positioning options
-        try:
-            if position == "end":
-                # Already at end, no change needed
-                self._positioning_stats['end_insertions'] += 1
-                logger.debug("Columns positioned at end (default)")
-                return df
-            elif position == "start":
-                return self._position_at_start(df, new_columns)
-            elif isinstance(position, int):
-                return self._position_at_index(df, new_columns, position)
-            elif isinstance(position, str) and position.startswith("after:"):
-                reference_col = position[6:]  # Remove "after:" prefix
-                return self._position_after_column(df, new_columns, reference_col)
-            elif isinstance(position, str) and position.startswith("before:"):
-                reference_col = position[7:]  # Remove "before:" prefix
-                return self._position_before_column(df, new_columns, reference_col)
-            else:
-                logger.warning(f"Unknown position specification: '{position}'. Using default 'end'.")
-                self._positioning_stats['errors_handled'] += 1
-                return df
-        except Exception as e:
-            logger.error(f"Column positioning failed: {e}. Using default 'end'.")
-            self._positioning_stats['errors_handled'] += 1
-            return df
-    def _position_at_start(self, df: pd.DataFrame, new_columns: List[str]) -> pd.DataFrame:
-        """Position new columns at the start of the dataframe"""
-        self._positioning_stats['start_insertions'] += 1
-        # Get existing columns (excluding new ones)
-        existing_columns = [col for col in df.columns if col not in new_columns]
-        # Reorder: new columns first, then existing columns
-        new_order = new_columns + existing_columns
-        logger.debug(f"Positioning {len(new_columns)} columns at start")
-        return df[new_order]
-    def _position_at_index(self, df: pd.DataFrame, new_columns: List[str], index: int) -> pd.DataFrame:
-        """Position new columns at specific 0-based index"""
-        self._positioning_stats['index_insertions'] += 1
-        # Get existing columns (excluding new ones)
-        existing_columns = [col for col in df.columns if col not in new_columns]
-        # Validate index
-        max_index = len(existing_columns)
-        if index < 0:
-            # Convert negative index: -1 means before last column, -2 before second-to-last, etc.
-            index = max(0, max_index + index)
-        elif index > max_index:
-            logger.warning(f"Index {index} exceeds column count {max_index}. Using end position.")
-            index = max_index
-        # Insert new columns at specified index
-        new_order = existing_columns[:index] + new_columns + existing_columns[index:]
-        logger.debug(f"Positioning {len(new_columns)} columns at index {index}")
-        return df[new_order]
-    def _position_after_column(self, df: pd.DataFrame, new_columns: List[str],
-                              reference_col: str) -> pd.DataFrame:
-        """Position new columns after a specific reference column"""
-        self._positioning_stats['relative_insertions'] += 1
-        # Get existing columns (excluding new ones)
-        existing_columns = [col for col in df.columns if col not in new_columns]
-        # Check if reference column exists
-        if reference_col not in existing_columns:
-            available_cols = existing_columns[:5]  # Show first 5 for brevity
-            logger.warning(f"Reference column '{reference_col}' not found. "
-                          f"Available columns: {available_cols}{'...' if len(existing_columns) > 5 else ''}. "
-                          f"Using end position.")
-            return df
-        # Find position after reference column
-        ref_index = existing_columns.index(reference_col)
-        insert_index = ref_index + 1
-        # Insert new columns after reference column
-        new_order = (existing_columns[:insert_index] +
-                    new_columns +
-                    existing_columns[insert_index:])
-        logger.debug(f"Positioning {len(new_columns)} columns after '{reference_col}'")
-        return df[new_order]
-    def _position_before_column(self, df: pd.DataFrame, new_columns: List[str],
-                               reference_col: str) -> pd.DataFrame:
-        """Position new columns before a specific reference column"""
-        self._positioning_stats['relative_insertions'] += 1
-        # Get existing columns (excluding new ones)
-        existing_columns = [col for col in df.columns if col not in new_columns]
-        # Check if reference column exists
-        if reference_col not in existing_columns:
-            available_cols = existing_columns[:5]  # Show first 5 for brevity
-            logger.warning(f"Reference column '{reference_col}' not found. "
-                          f"Available columns: {available_cols}{'...' if len(existing_columns) > 5 else ''}. "
-                          f"Using end position.")
-            return df
-        # Find position before reference column
-        ref_index = existing_columns.index(reference_col)
-        # Insert new columns before reference column
-        new_order = (existing_columns[:ref_index] +
-                    new_columns +
-                    existing_columns[ref_index:])
-        logger.debug(f"Positioning {len(new_columns)} columns before '{reference_col}'")
-        return df[new_order]
-    def validate_position_syntax(self, position: Union[str, int]) -> Dict[str, Any]:
-        """
-        Validate position syntax and provide helpful feedback
-        Returns:
-            Dict with validation results and suggestions
-        """
-        result = {
-            'valid': True,
-            'position_type': None,
-            'parsed_value': None,
-            'warnings': [],
-            'suggestions': []
-        }
-        if position == "end":
-            result['position_type'] = 'end'
-        elif position == "start":
-            result['position_type'] = 'start'
-        elif isinstance(position, int):
-            result['position_type'] = 'index'
-            result['parsed_value'] = position
-            if position < 0:
-                result['warnings'].append("Negative index will be converted to positive")
-        elif isinstance(position, str) and position.startswith("after:"):
-            reference_col = position[6:]
-            if not reference_col:
-                result['valid'] = False
-                result['suggestions'].append("Specify column name after 'after:' (e.g., 'after:product_id')")
-            else:
-                result['position_type'] = 'after'
-                result['parsed_value'] = reference_col
-        elif isinstance(position, str) and position.startswith("before:"):
-            reference_col = position[7:]
-            if not reference_col:
-                result['valid'] = False
-                result['suggestions'].append("Specify column name after 'before:' (e.g., 'before:total')")
-            else:
-                result['position_type'] = 'before'
-                result['parsed_value'] = reference_col
-        else:
-            result['valid'] = False
-            result['suggestions'].extend([
-                "Valid position options:",
-                "  - 'end' (default)",
-                "  - 'start'",
-                "  - integer index (0-based)",
-                "  - 'after:column_name'",
-                "  - 'before:column_name'"
-            ])
-        return result
-    def get_column_suggestions(self, df: pd.DataFrame, partial_name: str = "") -> List[str]:
-        """
-        Get column name suggestions for positioning
-        Args:
-            df: Target dataframe
-            partial_name: Partial column name for filtering suggestions
-        Returns:
-            List of suggested column names
-        """
-        columns = list(df.columns)
-        if not partial_name:
-            return columns[:10]  # Return first 10 columns
-        # Filter columns that contain the partial name (case-insensitive)
-        partial_lower = partial_name.lower()
-        matches = [col for col in columns if partial_lower in col.lower()]
-        return matches[:10]  # Return up to 10 matches
-    def get_stats(self) -> Dict[str, Any]:
-        """Get column positioning statistics"""
-        return self._positioning_stats.copy()
-    def reset_stats(self):
-        """Reset positioning statistics"""
-        self._positioning_stats = {
-            'total_operations': 0,
-            'end_insertions': 0,
-            'start_insertions': 0,
-            'index_insertions': 0,
-            'relative_insertions': 0,
-            'errors_handled': 0
-        }
-# Global positioner instance
-_positioner = ColumnPositioner()
-# Convenience functions
-def position_columns(df: pd.DataFrame,
-                    new_columns: List[str],
-                    position: Union[str, int] = "end") -> pd.DataFrame:
-    """Position new columns in dataframe"""
-    return _positioner.position_columns(df, new_columns, position)
-def validate_position_syntax(position: Union[str, int]) -> Dict[str, Any]:
-    """Validate position syntax"""
-    return _positioner.validate_position_syntax(position)
-def get_column_suggestions(df: pd.DataFrame, partial_name: str = "") -> List[str]:
-    """Get column name suggestions"""
-    return _positioner.get_column_suggestions(df, partial_name)
-def get_positioning_stats() -> Dict[str, Any]:
-    """Get positioning statistics"""
-    return _positioner.get_stats()
-# Example usage and validation
-def demonstrate_positioning():
-    """Demonstrate column positioning capabilities"""
-    # Create sample dataframe
-    df = pd.DataFrame({
-        'id': [1, 2, 3],
-        'name': ['A', 'B', 'C'],
-        'category': ['X', 'Y', 'Z'],
-        'new_col1': [10, 20, 30],  # Simulated new columns
-        'new_col2': [100, 200, 300]
-    })
-    new_columns = ['new_col1', 'new_col2']
-    print("Original column order:", list(df.columns))
-    # Test different positioning options
-    positions = [
-        "start",
-        "end",
-        1,
-        "after:name",
-        "before:category"
-    ]
-    for pos in positions:
-        result = position_columns(df, new_columns, pos)
-        print(f"Position '{pos}': {list(result.columns)}")
-if __name__ == "__main__":
-    demonstrate_positioning()

additory/core/compiler_polars.py DELETED Viewed

@@ -1,166 +0,0 @@
-# compiler_polars.py
-import polars as pl
-def compile_polars(ast):
-    """
-    Convert AST → Polars expression.
-    Supports:
-      - column
-      - literal
-      - binary arithmetic
-      - comparisons
-      - boolean logic
-      - unary boolean
-      - ternary (if_expr)
-      - function calls (min, max, abs, log, exp)
-    """
-    node_type = ast["type"]
-    # ------------------------------------------------------------
-    # Column reference
-    # ------------------------------------------------------------
-    if node_type == "column":
-        return pl.col(ast["name"])
-    # ------------------------------------------------------------
-    # Literal
-    # ------------------------------------------------------------
-    if node_type == "literal":
-        return pl.lit(ast["value"])
-    # ------------------------------------------------------------
-    # Binary arithmetic: + - * / ** % //
-    # ------------------------------------------------------------
-    if node_type == "binary":
-        left = compile_polars(ast["left"])
-        right = compile_polars(ast["right"])
-        op = ast["op"]
-        if op == "+":
-            return left + right
-        if op == "-":
-            return left - right
-        if op == "*":
-            return left * right
-        if op == "/":
-            return left / right
-        if op == "**":
-            return left ** right
-        if op == "%":
-            return left % right
-        if op == "//":
-            return left // right
-        raise NotImplementedError(f"Unknown binary op: {op}")
-    # ------------------------------------------------------------
-    # Comparison: == != > < >= <=
-    # ------------------------------------------------------------
-    if node_type == "cmp":
-        left = compile_polars(ast["left"])
-        right = compile_polars(ast["right"])
-        op = ast["op"]
-        if op == "==":
-            return left == right
-        if op == "!=":
-            return left != right
-        if op == ">":
-            return left > right
-        if op == "<":
-            return left < right
-        if op == ">=":
-            return left >= right
-        if op == "<=":
-            return left <= right
-        raise NotImplementedError(f"Unknown comparison op: {op}")
-    # ------------------------------------------------------------
-    # Boolean operations: and/or
-    # ------------------------------------------------------------
-    if node_type == "bool_op":
-        op = ast["op"]
-        values = [compile_polars(v) for v in ast["values"]]
-        if op == "and":
-            expr = values[0]
-            for v in values[1:]:
-                expr = expr & v
-            return expr
-        if op == "or":
-            expr = values[0]
-            for v in values[1:]:
-                expr = expr | v
-            return expr
-        raise NotImplementedError(f"Unknown boolean op: {op}")
-    # ------------------------------------------------------------
-    # Unary boolean: not x
-    # ------------------------------------------------------------
-    if node_type == "unary_bool":
-        val = compile_polars(ast["value"])
-        return ~val
-    # ------------------------------------------------------------
-    # Ternary: a if cond else b
-    # ------------------------------------------------------------
-    if node_type == "if_expr":
-        cond = compile_polars(ast["cond"])
-        then = compile_polars(ast["then"])
-        els = compile_polars(ast["else"])
-        return pl.when(cond).then(then).otherwise(els)
-    # ------------------------------------------------------------
-    # Function calls: min, max, abs, log, exp, sqrt, sin, cos, tan, round, ceil, floor
-    # ------------------------------------------------------------
-    if node_type == "call":
-        name = ast["name"]
-        args = [compile_polars(a) for a in ast["args"]]
-        # Basic math functions
-        if name == "abs":
-            return args[0].abs()
-        if name == "log":
-            return args[0].log()
-        if name == "exp":
-            return args[0].exp()
-        if name == "sqrt":
-            return args[0].sqrt()
-        # Rounding functions
-        if name == "round":
-            if len(args) == 1:
-                return args[0].round(0)
-            else:
-                return args[0].round(args[1])
-        if name == "ceil":
-            return args[0].ceil()
-        if name == "floor":
-            return args[0].floor()
-        # Trigonometric functions
-        if name == "sin":
-            return args[0].sin()
-        if name == "cos":
-            return args[0].cos()
-        if name == "tan":
-            return args[0].tan()
-        # Aggregation functions (horizontal)
-        if name == "min":
-            return pl.min_horizontal(*args)
-        if name == "max":
-            return pl.max_horizontal(*args)
-        raise NotImplementedError(f"Unknown function: {name}")
-    # ------------------------------------------------------------
-    # Fallback
-    # ------------------------------------------------------------
-    raise NotImplementedError(f"Unsupported AST node: {ast}")

additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

additory 0.1.0a4py3-none-any.whl → 0.1.1a1py3-none-any.whl