PyPI - additory - Versions diffs - 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl - Mend

additory 0.1.0a4py3-none-any.whl → 0.1.1a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (121) hide show

additory/__init__.py +58 -14
additory/common/__init__.py +31 -147
additory/common/column_selector.py +255 -0
additory/common/distributions.py +286 -613
additory/common/extractors.py +313 -0
additory/common/knn_imputation.py +332 -0
additory/common/result.py +380 -0
additory/common/strategy_parser.py +243 -0
additory/common/unit_conversions.py +338 -0
additory/common/validation.py +283 -103
additory/core/__init__.py +34 -22
additory/core/backend.py +258 -0
additory/core/config.py +177 -305
additory/core/logging.py +230 -24
additory/core/memory_manager.py +157 -495
additory/expressions/__init__.py +2 -23
additory/expressions/compiler.py +457 -0
additory/expressions/engine.py +264 -487
additory/expressions/integrity.py +179 -0
additory/expressions/loader.py +263 -0
additory/expressions/parser.py +363 -167
additory/expressions/resolver.py +274 -0
additory/functions/__init__.py +1 -0
additory/functions/analyze/__init__.py +144 -0
additory/functions/analyze/cardinality.py +58 -0
additory/functions/analyze/correlations.py +66 -0
additory/functions/analyze/distributions.py +53 -0
additory/functions/analyze/duplicates.py +49 -0
additory/functions/analyze/features.py +61 -0
additory/functions/analyze/imputation.py +66 -0
additory/functions/analyze/outliers.py +65 -0
additory/functions/analyze/patterns.py +65 -0
additory/functions/analyze/presets.py +72 -0
additory/functions/analyze/quality.py +59 -0
additory/functions/analyze/timeseries.py +53 -0
additory/functions/analyze/types.py +45 -0
additory/functions/expressions/__init__.py +161 -0
additory/functions/snapshot/__init__.py +82 -0
additory/functions/snapshot/filter.py +119 -0
additory/functions/synthetic/__init__.py +113 -0
additory/functions/synthetic/mode_detector.py +47 -0
additory/functions/synthetic/strategies/__init__.py +1 -0
additory/functions/synthetic/strategies/advanced.py +35 -0
additory/functions/synthetic/strategies/augmentative.py +160 -0
additory/functions/synthetic/strategies/generative.py +168 -0
additory/functions/synthetic/strategies/presets.py +116 -0
additory/functions/to/__init__.py +188 -0
additory/functions/to/lookup.py +351 -0
additory/functions/to/merge.py +189 -0
additory/functions/to/sort.py +91 -0
additory/functions/to/summarize.py +170 -0
additory/functions/transform/__init__.py +140 -0
additory/functions/transform/datetime.py +79 -0
additory/functions/transform/extract.py +85 -0
additory/functions/transform/harmonize.py +105 -0
additory/functions/transform/knn.py +62 -0
additory/functions/transform/onehotencoding.py +68 -0
additory/functions/transform/transpose.py +42 -0
additory-0.1.1a1.dist-info/METADATA +83 -0
additory-0.1.1a1.dist-info/RECORD +62 -0
additory/analysis/__init__.py +0 -48
additory/analysis/cardinality.py +0 -126
additory/analysis/correlations.py +0 -124
additory/analysis/distributions.py +0 -376
additory/analysis/quality.py +0 -158
additory/analysis/scan.py +0 -400
additory/common/backend.py +0 -371
additory/common/column_utils.py +0 -191
additory/common/exceptions.py +0 -62
additory/common/lists.py +0 -229
additory/common/patterns.py +0 -240
additory/common/resolver.py +0 -567
additory/common/sample_data.py +0 -182
additory/core/ast_builder.py +0 -165
additory/core/backends/__init__.py +0 -23
additory/core/backends/arrow_bridge.py +0 -483
additory/core/backends/cudf_bridge.py +0 -355
additory/core/column_positioning.py +0 -358
additory/core/compiler_polars.py +0 -166
additory/core/enhanced_cache_manager.py +0 -1119
additory/core/enhanced_matchers.py +0 -473
additory/core/enhanced_version_manager.py +0 -325
additory/core/executor.py +0 -59
additory/core/integrity_manager.py +0 -477
additory/core/loader.py +0 -190
additory/core/namespace_manager.py +0 -657
additory/core/parser.py +0 -176
additory/core/polars_expression_engine.py +0 -601
additory/core/registry.py +0 -177
additory/core/sample_data_manager.py +0 -492
additory/core/user_namespace.py +0 -751
additory/core/validator.py +0 -27
additory/dynamic_api.py +0 -352
additory/expressions/proxy.py +0 -549
additory/expressions/registry.py +0 -313
additory/expressions/samples.py +0 -492
additory/synthetic/__init__.py +0 -13
additory/synthetic/column_name_resolver.py +0 -149
additory/synthetic/deduce.py +0 -259
additory/synthetic/distributions.py +0 -22
additory/synthetic/forecast.py +0 -1132
additory/synthetic/linked_list_parser.py +0 -415
additory/synthetic/namespace_lookup.py +0 -129
additory/synthetic/smote.py +0 -320
additory/synthetic/strategies.py +0 -926
additory/synthetic/synthesizer.py +0 -713
additory/utilities/__init__.py +0 -53
additory/utilities/encoding.py +0 -600
additory/utilities/games.py +0 -300
additory/utilities/keys.py +0 -8
additory/utilities/lookup.py +0 -103
additory/utilities/matchers.py +0 -216
additory/utilities/resolvers.py +0 -286
additory/utilities/settings.py +0 -167
additory/utilities/units.py +0 -749
additory/utilities/validators.py +0 -153
additory-0.1.0a4.dist-info/METADATA +0 -311
additory-0.1.0a4.dist-info/RECORD +0 -72
additory-0.1.0a4.dist-info/licenses/LICENSE +0 -21
{additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/WHEEL +0 -0
{additory-0.1.0a4.dist-info → additory-0.1.1a1.dist-info}/top_level.txt +0 -0

additory/utilities/games.py DELETED Viewed

@@ -1,300 +0,0 @@
-"""
-Easter Egg Games Module
-Hidden games for the curious. Not documented in main API docs.
-Reinforces row-column thinking - critical for DataFrame operations.
-Inspired by: Chrome dinosaur game, Python's antigravity, apt-get moo
-"""
-import random
-def print_board(board):
-    """Print a 3x3 tic-tac-toe board"""
-    print("\n")
-    for i in range(3):
-        row = " | ".join(board[i])
-        print(" " + row)
-        if i < 2:
-            print("---+---+---")
-    print("\n")
-def check_winner(board, player):
-    """Check if a player has won"""
-    win_states = [
-        [(0,0),(0,1),(0,2)],  # Row 1
-        [(1,0),(1,1),(1,2)],  # Row 2
-        [(2,0),(2,1),(2,2)],  # Row 3
-        [(0,0),(1,0),(2,0)],  # Col 1
-        [(0,1),(1,1),(2,1)],  # Col 2
-        [(0,2),(1,2),(2,2)],  # Col 3
-        [(0,0),(1,1),(2,2)],  # Diagonal \
-        [(0,2),(1,1),(2,0)]   # Diagonal /
-    ]
-    return any(all(board[r][c] == player for r, c in combo) for combo in win_states)
-def is_full(board):
-    """Check if board is full"""
-    return all(board[r][c] != " " for r in range(3) for c in range(3))
-def get_empty_cells(board):
-    """Get list of empty cells"""
-    return [(r, c) for r in range(3) for c in range(3) if board[r][c] == " "]
-def computer_move(board):
-    """AI move for tic-tac-toe"""
-    # 1. Try to win
-    for r, c in get_empty_cells(board):
-        board[r][c] = "O"
-        if check_winner(board, "O"):
-            return
-        board[r][c] = " "
-    # 2. Try to block user
-    for r, c in get_empty_cells(board):
-        board[r][c] = "X"
-        if check_winner(board, "X"):
-            board[r][c] = "O"
-            return
-        board[r][c] = " "
-    # 3. Otherwise pick random
-    r, c = random.choice(get_empty_cells(board))
-    board[r][c] = "O"
-def tictactoe():
-    """
-    Play Tic-Tac-Toe against the computer.
-    Reinforces row-column thinking - enter moves as "row col" (e.g., "2 3")
-    Just like DataFrame indexing: df.iloc[row, col]
-    """
-    board = [[" " for _ in range(3)] for _ in range(3)]
-    print("=" * 50)
-    print("Welcome to Tic Tac Toe!")
-    print("=" * 50)
-    print("You are X. Computer is O.")
-    print("Enter moves as: row col (e.g., '2 3' for row 2, column 3)")
-    print("Rows and columns are numbered 1-3")
-    print("Think of it like DataFrame indexing: df.iloc[row, col]")
-    print("=" * 50)
-    print_board(board)
-    while True:
-        # USER MOVE
-        try:
-            move = input("Your move (row col): ")
-            r, c = map(int, move.split())
-            r -= 1  # Convert to 0-indexed
-            c -= 1
-            if r not in range(3) or c not in range(3):
-                print("Invalid position. Choose row/col between 1 and 3.")
-                continue
-            if board[r][c] != " ":
-                print("That spot is already taken. Try again.")
-                continue
-            board[r][c] = "X"
-            print_board(board)
-            if check_winner(board, "X"):
-                print("🎉 You win!")
-                break
-            if is_full(board):
-                print("It's a draw!")
-                break
-            # COMPUTER MOVE
-            print("Computer is thinking...")
-            computer_move(board)
-            print_board(board)
-            if check_winner(board, "O"):
-                print("💻 Computer wins!")
-                break
-            if is_full(board):
-                print("It's a draw!")
-                break
-        except ValueError:
-            print("Invalid input. Enter row and column like: 2 3")
-        except KeyboardInterrupt:
-            print("\n\nGame interrupted. Thanks for playing!")
-            break
-# A simple valid completed Sudoku board
-BASE_BOARD = [
-    [5,3,4,6,7,8,9,1,2],
-    [6,7,2,1,9,5,3,4,8],
-    [1,9,8,3,4,2,5,6,7],
-    [8,5,9,7,6,1,4,2,3],
-    [4,2,6,8,5,3,7,9,1],
-    [7,1,3,9,2,4,8,5,6],
-    [9,6,1,5,3,7,2,8,4],
-    [2,8,7,4,1,9,6,3,5],
-    [3,4,5,2,8,6,1,7,9]
-]
-def remove_numbers(board, holes=40):
-    """Remove numbers from a completed Sudoku board to create a puzzle"""
-    puzzle = [row[:] for row in board]
-    removed = 0
-    while removed < holes:
-        r = random.randint(0, 8)
-        c = random.randint(0, 8)
-        if puzzle[r][c] != 0:
-            puzzle[r][c] = 0
-            removed += 1
-    return puzzle
-def print_sudoku_board(board):
-    """Print a Sudoku board with nice formatting"""
-    print("\nSudoku Board:")
-    for i, row in enumerate(board):
-        if i % 3 == 0 and i != 0:
-            print("------+-------+------")
-        row_str = ""
-        for j, val in enumerate(row):
-            if j % 3 == 0 and j != 0:
-                row_str += "| "
-            row_str += (str(val) if val != 0 else ".") + " "
-        print(row_str)
-    print()
-def is_valid_sudoku(board, r, c, num):
-    """Check if placing num at (r, c) is valid"""
-    # Check row
-    if num in board[r]:
-        return False
-    # Check column
-    for i in range(9):
-        if board[i][c] == num:
-            return False
-    # Check 3x3 box
-    br = (r // 3) * 3
-    bc = (c // 3) * 3
-    for i in range(br, br + 3):
-        for j in range(bc, bc + 3):
-            if board[i][j] == num:
-                return False
-    return True
-def is_solved(board):
-    """Check if Sudoku is completely solved"""
-    return all(all(cell != 0 for cell in row) for row in board)
-def sudoku():
-    """
-    Play Sudoku!
-    Reinforces row-column thinking - enter moves as "row col number" (e.g., "3 4 9")
-    Just like DataFrame operations: df.iloc[row, col] = value
-    """
-    print("=" * 50)
-    print("Welcome to Sudoku!")
-    print("=" * 50)
-    print("Enter moves as: row col number (e.g., '3 4 9')")
-    print("Rows and columns are numbered 1-9")
-    print("Think of it like DataFrame assignment: df.iloc[row, col] = value")
-    print("Type 'abort' or 'exit' to quit.")
-    print("=" * 50)
-    solution = BASE_BOARD
-    puzzle = remove_numbers(solution, holes=45)
-    board = [row[:] for row in puzzle]
-    print_sudoku_board(board)
-    while True:
-        move = input("Your move: ").strip().lower()
-        if move in ("abort", "exit"):
-            print("Game ended by user.")
-            break
-        try:
-            r, c, num = map(int, move.split())
-            r -= 1  # Convert to 0-indexed
-            c -= 1
-            if not (0 <= r < 9 and 0 <= c < 9):
-                print("Invalid position. Row/col must be 1–9.")
-                continue
-            if not (1 <= num <= 9):
-                print("Number must be between 1 and 9.")
-                continue
-            if puzzle[r][c] != 0:
-                print("This cell is fixed and cannot be changed.")
-                continue
-            if not is_valid_sudoku(board, r, c, num):
-                print("Invalid move. Violates Sudoku rules.")
-                continue
-            board[r][c] = num
-            print_sudoku_board(board)
-            if is_solved(board):
-                print("🎉 Congratulations! You solved the Sudoku.")
-                break
-        except ValueError:
-            print("Invalid input. Use: row col number (e.g., 2 5 7)")
-        except KeyboardInterrupt:
-            print("\n\nGame interrupted. Thanks for playing!")
-            break
-def play(game="tictactoe"):
-    """
-    Play a game! 🎮
-    Available games:
-    - 'tictactoe' or 'ttt': Play Tic-Tac-Toe
-    - 'sudoku': Play Sudoku
-    Both games reinforce row-column thinking - critical for DataFrame operations!
-    Args:
-        game: Name of the game to play (default: 'tictactoe')
-    Example:
-        >>> import additory
-        >>> additory.add.play('tictactoe')
-        >>> additory.add.play('sudoku')
-    """
-    game = game.lower().strip()
-    if game in ('tictactoe', 'ttt'):
-        tictactoe()
-    elif game == 'sudoku':
-        sudoku()
-    else:
-        print(f"Unknown game: {game}")
-        print("Available games: 'tictactoe' (or 'ttt'), 'sudoku'")
-        print("\nExample:")
-        print("  add.play('tictactoe')")
-        print("  add.play('sudoku')")

additory/utilities/keys.py DELETED Viewed

@@ -1,8 +0,0 @@
-# additory/ops/keys.py
-def build_keys(df, columns):
-    """
-    Build composite tuple keys for each row.
-    Vectorized where possible.
-    """
-    return list(zip(*[df[col] for col in columns]))

additory/utilities/lookup.py DELETED Viewed

@@ -1,103 +0,0 @@
-# additory/utilities/lookup.py
-# Consolidated lookup functionality (add.to)
-"""
-Lookup Utilities Module
-This module provides the add.to() functionality for adding columns from reference dataframes.
-"""
-import pandas as pd
-import polars as pl
-from typing import Union, List, Optional, Any
-def to(target_df: Union[pd.DataFrame, pl.DataFrame],
-      from_df: Optional[Union[pd.DataFrame, pl.DataFrame]] = None,
-      bring: Union[str, List[str]] = None,
-      against: Union[str, List[str]] = None,
-      **kwargs) -> Union[pd.DataFrame, pl.DataFrame]:
-    """
-    Add columns from reference dataframe to target dataframe
-    Args:
-        target_df: Target dataframe to add columns to
-        from_df: Reference dataframe to get columns from
-        bring: Column(s) to bring from reference dataframe
-        against: Column(s) to match on
-        **kwargs: Additional parameters
-    Returns:
-        Target dataframe with new columns added
-    Example:
-        result = add.to(orders_df, from_df=products_df, bring='price', against='product_id')
-    """
-    if from_df is None:
-        raise ValueError("from_df parameter is required")
-    if bring is None:
-        raise ValueError("bring parameter is required")
-    if against is None:
-        raise ValueError("against parameter is required")
-    # Convert single values to lists
-    if isinstance(bring, str):
-        bring = [bring]
-    if isinstance(against, str):
-        against = [against]
-    # Simple pandas-based implementation
-    if isinstance(target_df, pd.DataFrame) and isinstance(from_df, pd.DataFrame):
-        # Create a mapping from the reference dataframe
-        merge_cols = against
-        result = target_df.merge(
-            from_df[merge_cols + bring],
-            on=merge_cols,
-            how='left'
-        )
-        return result
-    # For other backends, convert to pandas, process, and convert back
-    # This is a simplified implementation
-    if hasattr(target_df, 'to_pandas'):
-        target_pd = target_df.to_pandas()
-    else:
-        target_pd = target_df
-    if hasattr(from_df, 'to_pandas'):
-        from_pd = from_df.to_pandas()
-    else:
-        from_pd = from_df
-    # Perform the merge
-    result_pd = target_pd.merge(
-        from_pd[against + bring],
-        on=against,
-        how='left'
-    )
-    # Convert back to original format if needed
-    if isinstance(target_df, pl.DataFrame):
-        return pl.from_pandas(result_pd)
-    return result_pd
-def fuzzy_lookup(target_df, lookup_df, **kwargs):
-    """Placeholder for fuzzy lookup - not implemented"""
-    raise NotImplementedError("Fuzzy lookup not yet implemented in restructured version")
-def aggregate_lookup(target_df, lookup_df, **kwargs):
-    """Placeholder for aggregate lookup - not implemented"""
-    raise NotImplementedError("Aggregate lookup not yet implemented in restructured version")
-# Re-export the main functions
-__all__ = [
-    'to',
-    'fuzzy_lookup',
-    'aggregate_lookup'
-]

additory/utilities/matchers.py DELETED Viewed

@@ -1,216 +0,0 @@
-# additory/ops/matchers.py
-import re
-from typing import List, Dict, Any, Tuple
-def match_exact(key, lookup):
-    """Exact match - case sensitive"""
-    return lookup.get(key, [])
-def match_iexact(key, lookup):
-    """Case-insensitive exact match"""
-    matches = []
-    key_lower = tuple(str(k).lower() if k is not None else k for k in key)
-    for k, rows in lookup.items():
-        k_lower = tuple(str(v).lower() if v is not None else v for v in k)
-        if key_lower == k_lower:
-            matches.extend(rows)
-    return matches
-def match_contains(key, lookup):
-    """Substring matching - case sensitive"""
-    matches = []
-    for k, rows in lookup.items():
-        if all(_safe_contains(str(a), str(b)) for a, b in zip(key, k) if a is not None and b is not None):
-            matches.extend(rows)
-    return matches
-def match_icontains(key, lookup):
-    """Case-insensitive substring matching"""
-    matches = []
-    for k, rows in lookup.items():
-        if all(_safe_contains(str(a).lower(), str(b).lower()) for a, b in zip(key, k) if a is not None and b is not None):
-            matches.extend(rows)
-    return matches
-def match_beginswith(key, lookup):
-    """Prefix matching - case sensitive"""
-    matches = []
-    for k, rows in lookup.items():
-        if all(_safe_startswith(str(b), str(a)) for a, b in zip(key, k) if a is not None and b is not None):
-            matches.extend(rows)
-    return matches
-def match_ibeginswith(key, lookup):
-    """Case-insensitive prefix matching"""
-    matches = []
-    for k, rows in lookup.items():
-        if all(_safe_startswith(str(b).lower(), str(a).lower()) for a, b in zip(key, k) if a is not None and b is not None):
-            matches.extend(rows)
-    return matches
-def match_endswith(key, lookup):
-    """Suffix matching - case sensitive"""
-    matches = []
-    for k, rows in lookup.items():
-        if all(_safe_endswith(str(b), str(a)) for a, b in zip(key, k) if a is not None and b is not None):
-            matches.extend(rows)
-    return matches
-def match_iendswith(key, lookup):
-    """Case-insensitive suffix matching"""
-    matches = []
-    for k, rows in lookup.items():
-        if all(_safe_endswith(str(b).lower(), str(a).lower()) for a, b in zip(key, k) if a is not None and b is not None):
-            matches.extend(rows)
-    return matches
-def match_regex(key, lookup):
-    """Regular expression matching"""
-    matches = []
-    try:
-        patterns = [re.compile(str(a)) if a is not None else None for a in key]
-    except re.error:
-        return []  # Invalid regex returns no matches
-    for k, rows in lookup.items():
-        try:
-            if all(pattern.search(str(b)) if pattern and b is not None else False
-                   for pattern, b in zip(patterns, k)):
-                matches.extend(rows)
-        except (TypeError, AttributeError):
-            continue
-    return matches
-def match_numeric_range(key, lookup):
-    """
-    Numeric range matching.
-    Key format: (min_val, max_val) matches values in lookup between min and max
-    """
-    matches = []
-    if len(key) != 2:
-        return matches
-    try:
-        min_val, max_val = float(key[0]), float(key[1])
-    except (ValueError, TypeError):
-        return matches
-    for k, rows in lookup.items():
-        if len(k) == 1:  # Single numeric value in lookup
-            try:
-                val = float(k[0])
-                if min_val <= val <= max_val:
-                    matches.extend(rows)
-            except (ValueError, TypeError):
-                continue
-    return matches
-def match_fuzzy(key, lookup):
-    """
-    Basic fuzzy matching using simple string similarity.
-    For V1, we'll use a simple approach. V2 will have semantic matching.
-    """
-    matches = []
-    threshold = 0.8  # Similarity threshold
-    for k, rows in lookup.items():
-        similarity = _calculate_similarity(key, k)
-        if similarity >= threshold:
-            matches.extend(rows)
-    return matches
-# Helper functions
-def _safe_contains(needle, haystack):
-    """Safe substring check"""
-    try:
-        return needle in haystack
-    except TypeError:
-        return False
-def _safe_startswith(text, prefix):
-    """Safe prefix check"""
-    try:
-        return text.startswith(prefix)
-    except (TypeError, AttributeError):
-        return False
-def _safe_endswith(text, suffix):
-    """Safe suffix check"""
-    try:
-        return text.endswith(suffix)
-    except (TypeError, AttributeError):
-        return False
-def _calculate_similarity(key1, key2):
-    """
-    Simple similarity calculation for fuzzy matching.
-    Uses Jaccard similarity on character sets.
-    """
-    if len(key1) != len(key2):
-        return 0.0
-    total_similarity = 0.0
-    for a, b in zip(key1, key2):
-        if a is None or b is None:
-            if a == b:  # Both None
-                total_similarity += 1.0
-            else:
-                total_similarity += 0.0
-        else:
-            str_a, str_b = str(a).lower(), str(b).lower()
-            if str_a == str_b:
-                total_similarity += 1.0
-            else:
-                # Character-level Jaccard similarity
-                set_a, set_b = set(str_a), set(str_b)
-                if len(set_a) == 0 and len(set_b) == 0:
-                    total_similarity += 1.0
-                else:
-                    intersection = len(set_a & set_b)
-                    union = len(set_a | set_b)
-                    total_similarity += intersection / union if union > 0 else 0.0
-    return total_similarity / len(key1)
-MATCHERS = {
-    # Exact matching
-    "exact": match_exact,
-    "iexact": match_iexact,
-    # Substring matching
-    "contains": match_contains,
-    "icontains": match_icontains,
-    # Prefix/suffix matching
-    "beginswith": match_beginswith,
-    "ibeginswith": match_ibeginswith,
-    "endswith": match_endswith,
-    "iendswith": match_iendswith,
-    # Pattern matching
-    "regex": match_regex,
-    # Numeric matching
-    "range": match_numeric_range,
-    # Fuzzy matching
-    "fuzzy": match_fuzzy,
-}

additory 0.1.0a4__py3-none-any.whl → 0.1.1a1__py3-none-any.whl

additory 0.1.0a4py3-none-any.whl → 0.1.1a1py3-none-any.whl