PyPI - additory - Versions diffs - 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl - Mend

additory 0.1.0a4py3-none-any.whl → 0.1.1a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

additory/__init__.py +58 -14
additory/common/__init__.py +31 -147
additory/common/column_selector.py +255 -0
additory/common/distributions.py +286 -613
additory/common/extractors.py +313 -0
additory/common/knn_imputation.py +332 -0
additory/common/result.py +380 -0
additory/common/strategy_parser.py +243 -0
additory/common/unit_conversions.py +338 -0
additory/common/validation.py +283 -103
additory/core/__init__.py +34 -22
additory/core/backend.py +258 -0
additory/core/config.py +177 -305
additory/core/logging.py +230 -24
additory/core/memory_manager.py +157 -495
additory/expressions/__init__.py +2 -23
additory/expressions/compiler.py +457 -0
additory/expressions/engine.py +264 -487
additory/expressions/integrity.py +179 -0
additory/expressions/loader.py +263 -0
additory/expressions/parser.py +363 -167
additory/expressions/resolver.py +274 -0
additory/functions/__init__.py +1 -0
additory/functions/analyze/__init__.py +144 -0
additory/functions/analyze/cardinality.py +58 -0
additory/functions/analyze/correlations.py +66 -0
additory/functions/analyze/distributions.py +53 -0
additory/functions/analyze/duplicates.py +49 -0
additory/functions/analyze/features.py +61 -0
additory/functions/analyze/imputation.py +66 -0
additory/functions/analyze/outliers.py +65 -0
additory/functions/analyze/patterns.py +65 -0
additory/functions/analyze/presets.py +72 -0
additory/functions/analyze/quality.py +59 -0
additory/functions/analyze/timeseries.py +53 -0
additory/functions/analyze/types.py +45 -0
additory/functions/expressions/__init__.py +161 -0
additory/functions/snapshot/__init__.py +82 -0
additory/functions/snapshot/filter.py +119 -0
additory/functions/synthetic/__init__.py +113 -0
additory/functions/synthetic/mode_detector.py +47 -0
additory/functions/synthetic/strategies/__init__.py +1 -0
additory/functions/synthetic/strategies/advanced.py +35 -0
additory/functions/synthetic/strategies/augmentative.py +160 -0
additory/functions/synthetic/strategies/generative.py +168 -0
additory/functions/synthetic/strategies/presets.py +116 -0
additory/functions/to/__init__.py +188 -0
additory/functions/to/lookup.py +351 -0
additory/functions/to/merge.py +189 -0
additory/functions/to/sort.py +91 -0
additory/functions/to/summarize.py +170 -0
additory/functions/transform/__init__.py +140 -0
additory/functions/transform/datetime.py +79 -0
additory/functions/transform/extract.py +85 -0
additory/functions/transform/harmonize.py +105 -0
additory/functions/transform/knn.py +62 -0
additory/functions/transform/onehotencoding.py +68 -0
additory/functions/transform/transpose.py +42 -0
additory-0.1.1a1.dist-info/METADATA +83 -0
additory-0.1.1a1.dist-info/RECORD +62 -0
additory/analysis/__init__.py +0 -48
additory/analysis/cardinality.py +0 -126
additory/analysis/correlations.py +0 -124
additory/analysis/distributions.py +0 -376
additory/analysis/quality.py +0 -158
additory/analysis/scan.py +0 -400
additory/common/backend.py +0 -371
additory/common/column_utils.py +0 -191
additory/common/exceptions.py +0 -62
additory/common/lists.py +0 -229
additory/common/patterns.py +0 -240
additory/common/resolver.py +0 -567
additory/common/sample_data.py +0 -182
additory/core/ast_builder.py +0 -165
additory/core/backends/__init__.py +0 -23
additory/core/backends/arrow_bridge.py +0 -483
additory/core/backends/cudf_bridge.py +0 -355
additory/core/column_positioning.py +0 -358
additory/core/compiler_polars.py +0 -166
additory/core/enhanced_cache_manager.py +0 -1119
additory/core/enhanced_matchers.py +0 -473
additory/core/enhanced_version_manager.py +0 -325
additory/core/executor.py +0 -59
additory/core/integrity_manager.py +0 -477
additory/core/loader.py +0 -190
additory/core/namespace_manager.py +0 -657
additory/core/parser.py +0 -176
additory/core/polars_expression_engine.py +0 -601
additory/core/registry.py +0 -177
additory/core/sample_data_manager.py +0 -492
additory/core/user_namespace.py +0 -751
additory/core/validator.py +0 -27
additory/dynamic_api.py +0 -352
additory/expressions/proxy.py +0 -549
additory/expressions/registry.py +0 -313
additory/expressions/samples.py +0 -492
additory/synthetic/__init__.py +0 -13
additory/synthetic/column_name_resolver.py +0 -149
additory/synthetic/deduce.py +0 -259
additory/synthetic/distributions.py +0 -22
additory/synthetic/forecast.py +0 -1132
additory/synthetic/linked_list_parser.py +0 -415
additory/synthetic/namespace_lookup.py +0 -129
additory/synthetic/smote.py +0 -320
additory/synthetic/strategies.py +0 -926
additory/synthetic/synthesizer.py +0 -713
additory/utilities/__init__.py +0 -53
additory/utilities/encoding.py +0 -600
additory/utilities/games.py +0 -300
additory/utilities/keys.py +0 -8
additory/utilities/lookup.py +0 -103
additory/utilities/matchers.py +0 -216
additory/utilities/resolvers.py +0 -286
additory/utilities/settings.py +0 -167
additory/utilities/units.py +0 -749
additory/utilities/validators.py +0 -153
additory-0.1.0a4.dist-info/METADATA +0 -311
additory-0.1.0a4.dist-info/RECORD +0 -72
additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
{additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
{additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0

additory/utilities/resolvers.py DELETED Viewed

@@ -1,286 +0,0 @@
-# additory/ops/resolvers.py
-import pandas as pd
-from typing import List, Dict, Any
-from collections import Counter
-def resolve_strict(matches, ref_df, cols):
-    """
-    Only accept exactly one match.
-    If 0 or >1 matches → return None for all columns.
-    """
-    if len(matches) != 1:
-        return {col: None for col in cols}
-    row = ref_df.iloc[matches[0]]
-    return {col: row[col] for col in cols}
-def resolve_first(matches, ref_df, cols):
-    """
-    Excel VLOOKUP behavior:
-    - If no matches → None
-    - If multiple → take the first
-    """
-    if not matches:
-        return {col: None for col in cols}
-    row = ref_df.iloc[matches[0]]
-    return {col: row[col] for col in cols}
-def resolve_last(matches, ref_df, cols):
-    """
-    Take the last match:
-    - If no matches → None
-    - If multiple → take the last
-    """
-    if not matches:
-        return {col: None for col in cols}
-    row = ref_df.iloc[matches[-1]]
-    return {col: row[col] for col in cols}
-def resolve_majority(matches, ref_df, cols):
-    """
-    For each column:
-    - pick the most frequent value among duplicates
-    - ties → first occurring
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-            continue
-        # Count frequency, handling NaN values
-        non_null_values = [v for v in values if pd.notna(v)]
-        if not non_null_values:
-            out[col] = None
-            continue
-        # Use Counter for frequency counting
-        freq = Counter(non_null_values)
-        # Pick the value with highest frequency (first in case of tie)
-        out[col] = freq.most_common(1)[0][0]
-    return out
-def resolve_max(matches, ref_df, cols):
-    """
-    For numeric or date-like columns:
-    - pick the maximum value
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            try:
-                # Filter out NaN values
-                non_null_values = [v for v in values if pd.notna(v)]
-                if non_null_values:
-                    out[col] = max(non_null_values)
-                else:
-                    out[col] = None
-            except (TypeError, ValueError):
-                out[col] = None
-    return out
-def resolve_min(matches, ref_df, cols):
-    """
-    For numeric or date-like columns:
-    - pick the minimum value
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            try:
-                # Filter out NaN values
-                non_null_values = [v for v in values if pd.notna(v)]
-                if non_null_values:
-                    out[col] = min(non_null_values)
-                else:
-                    out[col] = None
-            except (TypeError, ValueError):
-                out[col] = None
-    return out
-def resolve_longest(matches, ref_df, cols):
-    """
-    For text columns:
-    - pick the longest string representation
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            # Filter out NaN values and convert to string
-            non_null_values = [v for v in values if pd.notna(v)]
-            if non_null_values:
-                out[col] = max(non_null_values, key=lambda x: len(str(x)))
-            else:
-                out[col] = None
-    return out
-def resolve_shortest(matches, ref_df, cols):
-    """
-    For text columns:
-    - pick the shortest string representation
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            # Filter out NaN values and convert to string
-            non_null_values = [v for v in values if pd.notna(v)]
-            if non_null_values:
-                out[col] = min(non_null_values, key=lambda x: len(str(x)))
-            else:
-                out[col] = None
-    return out
-def resolve_sum(matches, ref_df, cols):
-    """
-    For numeric columns:
-    - sum all matching values
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            try:
-                # Filter out NaN values
-                non_null_values = [v for v in values if pd.notna(v)]
-                if non_null_values:
-                    # Try to sum numeric values
-                    numeric_values = [float(v) for v in non_null_values]
-                    out[col] = sum(numeric_values)
-                else:
-                    out[col] = None
-            except (TypeError, ValueError):
-                out[col] = None
-    return out
-def resolve_count(matches, ref_df, cols):
-    """
-    Count the number of matches for each column
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        # Count non-null values
-        non_null_count = sum(1 for v in values if pd.notna(v))
-        out[col] = non_null_count
-    return out
-def resolve_avg(matches, ref_df, cols):
-    """
-    For numeric columns:
-    - calculate average of all matching values
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            try:
-                # Filter out NaN values
-                non_null_values = [v for v in values if pd.notna(v)]
-                if non_null_values:
-                    # Try to average numeric values
-                    numeric_values = [float(v) for v in non_null_values]
-                    out[col] = sum(numeric_values) / len(numeric_values)
-                else:
-                    out[col] = None
-            except (TypeError, ValueError):
-                out[col] = None
-    return out
-def resolve_concat(matches, ref_df, cols):
-    """
-    For text columns:
-    - concatenate all matching values with separator
-    """
-    out = {}
-    for col in cols:
-        values = [ref_df.iloc[i][col] for i in matches]
-        if not values:
-            out[col] = None
-        else:
-            # Filter out NaN values and convert to string
-            non_null_values = [str(v) for v in values if pd.notna(v)]
-            if non_null_values:
-                out[col] = "; ".join(non_null_values)
-            else:
-                out[col] = None
-    return out
-RESOLVERS = {
-    # Single value selection
-    "strict": resolve_strict,
-    "first": resolve_first,
-    "last": resolve_last,
-    "majority": resolve_majority,
-    # Numeric aggregation
-    "max": resolve_max,
-    "min": resolve_min,
-    "sum": resolve_sum,
-    "avg": resolve_avg,
-    "count": resolve_count,
-    # Text aggregation
-    "longest": resolve_longest,
-    "shortest": resolve_shortest,
-    "concat": resolve_concat,
-}

additory/utilities/settings.py DELETED Viewed

@@ -1,167 +0,0 @@
-# additory/utilities/settings.py
-# Global settings management
-"""
-Settings Utilities Module
-This module provides global settings management for the additory library:
-- Backend preferences
-- Path configurations
-- Performance settings
-- User preferences
-"""
-from typing import Optional, Dict, Any
-import os
-# Global settings storage
-_global_settings = {
-    "backend": "auto",  # auto, pandas, polars, cudf
-    "precision": "auto",  # auto, float32, float64
-    "my_expressions_path": None,
-    "my_schemas_path": None,
-    "cache_enabled": True,
-    "memory_threshold_mb": 100,
-    "performance_mode": "balanced"  # fast, balanced, memory_optimized
-}
-def set_global_settings(**kwargs) -> Dict[str, Any]:
-    """
-    Set global settings for additory
-    Args:
-        backend: Preferred backend ("auto", "pandas", "polars", "cudf")
-        precision: Numeric precision ("auto", "float32", "float64")
-        my_expressions_path: Path to user expressions
-        my_schemas_path: Path to user schemas
-        cache_enabled: Enable/disable caching
-        memory_threshold_mb: Memory cleanup threshold
-        performance_mode: Performance mode ("fast", "balanced", "memory_optimized")
-    Returns:
-        Dictionary with updated settings
-    """
-    global _global_settings
-    valid_backends = ["auto", "pandas", "polars", "cudf"]
-    valid_precisions = ["auto", "float32", "float64"]
-    valid_performance_modes = ["fast", "balanced", "memory_optimized"]
-    for key, value in kwargs.items():
-        if key == "backend" and value not in valid_backends:
-            raise ValueError(f"Invalid backend: {value}. Must be one of {valid_backends}")
-        elif key == "precision" and value not in valid_precisions:
-            raise ValueError(f"Invalid precision: {value}. Must be one of {valid_precisions}")
-        elif key == "performance_mode" and value not in valid_performance_modes:
-            raise ValueError(f"Invalid performance_mode: {value}. Must be one of {valid_performance_modes}")
-        elif key in ["my_expressions_path", "my_schemas_path"] and value is not None:
-            if not os.path.exists(value):
-                raise ValueError(f"Path does not exist: {value}")
-        if key in _global_settings:
-            _global_settings[key] = value
-        else:
-            raise ValueError(f"Unknown setting: {key}")
-    return _global_settings.copy()
-def get_global_settings() -> Dict[str, Any]:
-    """
-    Get current global settings
-    Returns:
-        Dictionary with current settings
-    """
-    return _global_settings.copy()
-def get_setting(key: str, default: Any = None) -> Any:
-    """
-    Get a specific setting value
-    Args:
-        key: Setting key
-        default: Default value if key not found
-    Returns:
-        Setting value or default
-    """
-    return _global_settings.get(key, default)
-def reset_settings():
-    """Reset all settings to defaults"""
-    global _global_settings
-    _global_settings = {
-        "backend": "auto",
-        "precision": "auto",
-        "my_expressions_path": None,
-        "my_schemas_path": None,
-        "cache_enabled": True,
-        "memory_threshold_mb": 100,
-        "performance_mode": "balanced"
-    }
-def set_my_expressions_path(path: str):
-    """
-    Set path for user expressions
-    Args:
-        path: Path to user expressions directory
-    """
-    if not os.path.exists(path):
-        raise ValueError(f"Path does not exist: {path}")
-    _global_settings["my_expressions_path"] = path
-def set_my_schemas_path(path: str):
-    """
-    Set path for user schemas
-    Args:
-        path: Path to user schemas directory
-    """
-    if not os.path.exists(path):
-        raise ValueError(f"Path does not exist: {path}")
-    _global_settings["my_schemas_path"] = path
-def get_my_expressions_path() -> Optional[str]:
-    """Get current user expressions path"""
-    return _global_settings.get("my_expressions_path")
-def get_my_schemas_path() -> Optional[str]:
-    """Get current user schemas path"""
-    return _global_settings.get("my_schemas_path")
-# Convenience functions for common settings
-def set_backend(backend: str):
-    """Set preferred backend"""
-    set_global_settings(backend=backend)
-def set_precision(precision: str):
-    """Set numeric precision"""
-    set_global_settings(precision=precision)
-def enable_cache():
-    """Enable caching"""
-    set_global_settings(cache_enabled=True)
-def disable_cache():
-    """Disable caching"""
-    set_global_settings(cache_enabled=False)
-def set_performance_mode(mode: str):
-    """Set performance mode"""
-    set_global_settings(performance_mode=mode)

additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

additory 0.1.0a4py3-none-any.whl → 0.1.1a1py3-none-any.whl