PyPI - additory - Versions diffs - 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl - Mend

additory 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

additory/__init__.py +4 -0
additory/common/__init__.py +2 -2
additory/common/backend.py +20 -4
additory/common/distributions.py +1 -1
additory/common/sample_data.py +19 -19
additory/core/backends/arrow_bridge.py +7 -0
additory/core/config.py +3 -3
additory/core/polars_expression_engine.py +66 -16
additory/core/registry.py +4 -3
additory/dynamic_api.py +95 -51
additory/expressions/proxy.py +4 -1
additory/expressions/registry.py +3 -3
additory/synthetic/__init__.py +7 -95
additory/synthetic/column_name_resolver.py +149 -0
additory/synthetic/deduce.py +259 -0
additory/{augment → synthetic}/distributions.py +2 -2
additory/{augment → synthetic}/forecast.py +1 -1
additory/synthetic/linked_list_parser.py +415 -0
additory/synthetic/namespace_lookup.py +129 -0
additory/{augment → synthetic}/smote.py +1 -1
additory/{augment → synthetic}/strategies.py +87 -44
additory/{augment/augmentor.py → synthetic/synthesizer.py} +75 -15
additory/utilities/units.py +4 -1
{additory-0.1.0a2.dist-info → additory-0.1.0a4.dist-info}/METADATA +44 -28
{additory-0.1.0a2.dist-info → additory-0.1.0a4.dist-info}/RECORD +28 -43
{additory-0.1.0a2.dist-info → additory-0.1.0a4.dist-info}/WHEEL +1 -1
additory/augment/__init__.py +0 -24
additory/augment/builtin_lists.py +0 -430
additory/augment/list_registry.py +0 -177
additory/synthetic/api.py +0 -220
additory/synthetic/common_integration.py +0 -314
additory/synthetic/config.py +0 -262
additory/synthetic/engines.py +0 -529
additory/synthetic/exceptions.py +0 -180
additory/synthetic/file_managers.py +0 -518
additory/synthetic/generator.py +0 -702
additory/synthetic/generator_parser.py +0 -68
additory/synthetic/integration.py +0 -319
additory/synthetic/models.py +0 -241
additory/synthetic/pattern_resolver.py +0 -573
additory/synthetic/performance.py +0 -469
additory/synthetic/polars_integration.py +0 -464
additory/synthetic/proxy.py +0 -60
additory/synthetic/schema_parser.py +0 -685
additory/synthetic/validator.py +0 -553
{additory-0.1.0a2.dist-info → additory-0.1.0a4.dist-info}/licenses/LICENSE +0 -0
{additory-0.1.0a2.dist-info → additory-0.1.0a4.dist-info}/top_level.txt +0 -0

additory/__init__.py CHANGED Viewed

@@ -2,6 +2,9 @@
 from .dynamic_api import add as _api_instance
+# Version information
+__version__ = "0.1.0a4"
 # Expose the API instance normally
 add = _api_instance
@@ -12,4 +15,5 @@ def __getattr__(name):
 __all__ = [
     "add",
+    "__version__",
 ]

additory/common/__init__.py CHANGED Viewed

@@ -1,14 +1,14 @@
 """
 Common Utilities Module
-Shared functionality used by both augment and synthetic modules:
+Shared functionality used by both synthetic and expressions modules:
 - Distribution functions (normal, uniform, skewed, etc.)
 - List file management (.list format)
 - Pattern file management (.properties format)
 - Fallback resolution logic
 This module eliminates code duplication and provides consistent behavior
-across augment and synthetic data generation.
+across synthetic and expression data generation.
 """
 from .distributions import (

additory/common/backend.py CHANGED Viewed

@@ -180,11 +180,14 @@ def get_arrow_bridge():
         - Use for all cross-backend conversions
         - Handles pandas/polars/cuDF via Arrow
     """
-    from additory.core.backends.arrow_bridge import EnhancedArrowBridge
+    from additory.core.backends.arrow_bridge import EnhancedArrowBridge, ArrowBridgeError
     # Singleton pattern
     if not hasattr(get_arrow_bridge, '_instance'):
-        get_arrow_bridge._instance = EnhancedArrowBridge()
+        try:
+            get_arrow_bridge._instance = EnhancedArrowBridge()
+        except ArrowBridgeError:
+            get_arrow_bridge._instance = None
     return get_arrow_bridge._instance
@@ -194,7 +197,7 @@ def to_polars(df: Any, backend_type: BackendType = None) -> 'pl.DataFrame':
     Convert any dataframe to Polars via Arrow bridge.
     This is the primary conversion function for the Polars-only architecture.
-    All operations (expressions, augment, etc.) use this to convert input
+    All operations (expressions, synthetic, etc.) use this to convert input
     dataframes to Polars for processing.
     Args:
@@ -224,7 +227,7 @@ def to_polars(df: Any, backend_type: BackendType = None) -> 'pl.DataFrame':
         )
     # Fast path: already Polars
-    if isinstance(df, pl.DataFrame):
+    if HAS_POLARS and isinstance(df, pl.DataFrame):
         return df
     # Validate input
@@ -240,6 +243,13 @@ def to_polars(df: Any, backend_type: BackendType = None) -> 'pl.DataFrame':
     # Convert via Arrow bridge
     try:
         bridge = get_arrow_bridge()
+        if bridge is None:
+            # Fallback: direct conversion for pandas
+            if backend_type == "pandas":
+                if isinstance(df, pd.DataFrame):
+                    return pl.from_pandas(df)
+            raise RuntimeError("Arrow bridge not available and cannot convert non-pandas DataFrame")
         arrow_table = bridge.to_arrow(df, backend_type)
         pl_df = bridge.from_arrow(arrow_table, "polars")
         return pl_df
@@ -309,6 +319,12 @@ def from_polars(pl_df: 'pl.DataFrame', target_backend: BackendType) -> Any:
     # Convert via Arrow bridge
     try:
         bridge = get_arrow_bridge()
+        if bridge is None:
+            # Fallback: direct conversion for pandas
+            if target_backend == "pandas":
+                return pl_df.to_pandas()
+            raise RuntimeError("Arrow bridge not available and cannot convert to non-pandas DataFrame")
         arrow_table = bridge.to_arrow(pl_df, "polars")
         result_df = bridge.from_arrow(arrow_table, target_backend)
         return result_df

additory/common/distributions.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Distribution Strategies for Data Augmentation
+Distribution Strategies for Synthetic Data Generation
 Provides statistical distribution-based data generation:
 - Normal (Gaussian) distribution

additory/common/sample_data.py CHANGED Viewed

@@ -8,8 +8,8 @@ loaded on-demand using the existing .add file parser.
 Usage:
     from additory.common.sample_data import get_sample_dataset
-    # For augment
-    df = get_sample_dataset("augment", "sample")
+    # For synthetic
+    df = get_sample_dataset("synthetic", "sample")
     # For expressions (future)
     df = get_sample_dataset("expressions", "sample")
@@ -25,7 +25,7 @@ from additory.common.exceptions import ValidationError
 def get_sample_dataset(
-    module: str = "augment",
+    module: str = "synthetic",
     block: str = "sample",
     dataset_type: str = "clean"
 ) -> pl.DataFrame:
@@ -33,12 +33,12 @@ def get_sample_dataset(
     Load a sample dataset from .add files.
     This function provides centralized access to sample datasets across
-    all additory modules (augment, expressions, utilities). Sample datasets
+    all additory modules (synthetic, expressions, utilities). Sample datasets
     are stored as .add files in the reference/ directory structure.
     Args:
-        module: Module name ("augment", "expressions", "utilities")
-        block: Block name within the .add file ("sample" for augment)
+        module: Module name ("synthetic", "expressions", "utilities")
+        block: Block name within the .add file ("sample" for synthetic)
         dataset_type: Type of sample data ("clean" or "unclean")
     Returns:
@@ -48,8 +48,8 @@ def get_sample_dataset(
         ValidationError: If module, block, or dataset_type not found
     Examples:
-        >>> # Load augment sample dataset
-        >>> df = get_sample_dataset("augment", "sample")
+        >>> # Load synthetic sample dataset
+        >>> df = get_sample_dataset("synthetic", "sample")
         >>> print(df.shape)
         (50, 10)
@@ -57,7 +57,7 @@ def get_sample_dataset(
         >>> df = get_sample_dataset("expressions", "sample", "clean")
         >>> df_unclean = get_sample_dataset("expressions", "sample", "unclean")
-    Sample Dataset Structure (augment):
+    Sample Dataset Structure (synthetic):
         - id: Sequential numeric IDs (1-50)
         - emp_id: Employee IDs with pattern (EMP_001 - EMP_050)
         - order_id: Order IDs with different padding (ORD_0001 - ORD_0050)
@@ -72,8 +72,8 @@ def get_sample_dataset(
     # Construct path to .add file
     base_path = Path(__file__).parent.parent.parent / "reference"
-    if module == "augment":
-        add_file_path = base_path / "augment_definitions" / f"{block}_0.1.add"
+    if module == "synthetic":
+        add_file_path = base_path / "synthetic_definitions" / f"{block}_0.1.add"
     elif module == "expressions":
         add_file_path = base_path / "expressions_definitions" / f"{block}_0.1.add"
     elif module == "utilities":
@@ -81,7 +81,7 @@ def get_sample_dataset(
     else:
         raise ValidationError(
             f"Unknown module '{module}'. "
-            f"Valid modules: augment, expressions, utilities"
+            f"Valid modules: synthetic, expressions, utilities"
         )
     # Check if file exists
@@ -141,7 +141,7 @@ def list_available_samples() -> dict:
         >>> samples = list_available_samples()
         >>> print(samples)
         {
-            'augment': ['sample'],
+            'synthetic': ['sample'],
             'expressions': ['sample'],
             'utilities': []
         }
@@ -149,15 +149,15 @@ def list_available_samples() -> dict:
     base_path = Path(__file__).parent.parent.parent / "reference"
     available = {}
-    # Check augment
-    augment_path = base_path / "augment_definitions"
-    if augment_path.exists():
-        available['augment'] = [
+    # Check synthetic
+    synthetic_path = base_path / "synthetic_definitions"
+    if synthetic_path.exists():
+        available['synthetic'] = [
             f.stem.rsplit('_', 1)[0]  # Remove version suffix
-            for f in augment_path.glob("*.add")
+            for f in synthetic_path.glob("*.add")
         ]
     else:
-        available['augment'] = []
+        available['synthetic'] = []
     # Check expressions
     expressions_path = base_path / "expressions_definitions"

additory/core/backends/arrow_bridge.py CHANGED Viewed

@@ -16,6 +16,13 @@ try:
 except ImportError as e:
     ARROW_AVAILABLE = False
     IMPORT_ERROR = str(e)
+    # Create dummy classes for type annotations
+    class pa:
+        Table = Any
+    class pl:
+        DataFrame = Any
+    class pd:
+        DataFrame = Any
 from ..logging import log_info, log_warning
 from .cudf_bridge import get_cudf_bridge

additory/core/config.py CHANGED Viewed

@@ -329,14 +329,14 @@ def set_custom_formula_path(path):
 # backend preference setting
-_backend_preference: str | None = None  # "cpu", "gpu", or None
+_backend_preference: Optional[str] = None  # "cpu", "gpu", or None
-def set_backend_preference(mode: str | None):
+def set_backend_preference(mode: Optional[str]):
     global _backend_preference
     if mode not in (None, "cpu", "gpu"):
         raise ValueError("backend must be 'cpu', 'gpu', or None")
     _backend_preference = mode
-def get_backend_preference() -> str | None:
+def get_backend_preference() -> Optional[str]:
     return _backend_preference

additory/core/polars_expression_engine.py CHANGED Viewed

@@ -32,7 +32,10 @@ class PolarsExpressionEngine:
     """Exclusive Polars-based expression processing engine"""
     def __init__(self):
-        self.arrow_bridge = EnhancedArrowBridge()
+        try:
+            self.arrow_bridge = EnhancedArrowBridge()
+        except ArrowBridgeError:
+            self.arrow_bridge = None
         self.execution_stats = {
             "total_executions": 0,
             "total_time_ms": 0.0,
@@ -68,14 +71,28 @@ class PolarsExpressionEngine:
             try:
                 # Auto-detect backend if not specified
                 if backend_type is None:
-                    backend_type = self.arrow_bridge.detect_backend(df)
+                    if self.arrow_bridge:
+                        backend_type = self.arrow_bridge.detect_backend(df)
+                    else:
+                        backend_type = "pandas"  # fallback
                 # Get memory usage before processing
-                memory_before = self.arrow_bridge._get_memory_usage_mb()
+                if self.arrow_bridge:
+                    memory_before = self.arrow_bridge._get_memory_usage_mb()
+                else:
+                    memory_before = 0
                 # 1. Convert input to Arrow
                 log_info(f"[polars_engine] Converting {backend_type} to Arrow")
-                arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
+                if self.arrow_bridge:
+                    arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
+                else:
+                    # Fallback: assume pandas and convert directly
+                    import pandas as pd
+                    if isinstance(df, pd.DataFrame):
+                        arrow_table = pl.from_pandas(df).to_arrow()
+                    else:
+                        raise RuntimeError("Arrow bridge not available and input is not pandas DataFrame")
                 # 2. Convert Arrow to Polars
                 log_info("[polars_engine] Converting Arrow to Polars")
@@ -93,11 +110,18 @@ class PolarsExpressionEngine:
                 # 5. Convert to original backend format
                 log_info(f"[polars_engine] Converting Arrow to {backend_type}")
-                final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
+                if self.arrow_bridge:
+                    final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
+                else:
+                    # Fallback: convert back to pandas
+                    final_result = pl.from_arrow(result_arrow).to_pandas()
                 # Calculate execution statistics
                 execution_time = (datetime.now() - start_time).total_seconds() * 1000
-                memory_after = self.arrow_bridge._get_memory_usage_mb()
+                if self.arrow_bridge:
+                    memory_after = self.arrow_bridge._get_memory_usage_mb()
+                else:
+                    memory_after = 0
                 memory_used = max(0, memory_after - memory_before)
                 # Update global statistics
@@ -122,7 +146,8 @@ class PolarsExpressionEngine:
             finally:
                 # 6. Always cleanup Arrow memory
-                self.arrow_bridge.cleanup_arrow_memory()
+                if self.arrow_bridge:
+                    self.arrow_bridge.cleanup_arrow_memory()
     def _execute_polars_expression(self, polars_df: pl.DataFrame,
                                  expression: str, output_column: str) -> pl.DataFrame:
@@ -381,14 +406,28 @@ class PolarsExpressionEngine:
         try:
             # Auto-detect backend if not specified
             if backend_type is None:
-                backend_type = self.arrow_bridge.detect_backend(df)
+                if self.arrow_bridge:
+                    backend_type = self.arrow_bridge.detect_backend(df)
+                else:
+                    backend_type = "pandas"
             # Get memory usage before processing
-            memory_before = self.arrow_bridge._get_memory_usage_mb()
+            if self.arrow_bridge:
+                memory_before = self.arrow_bridge._get_memory_usage_mb()
+            else:
+                memory_before = 0
             # Convert to Polars via Arrow
-            arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
-            polars_df = pl.from_arrow(arrow_table)
+            if self.arrow_bridge:
+                arrow_table = self.arrow_bridge.to_arrow(df, backend_type)
+                polars_df = pl.from_arrow(arrow_table)
+            else:
+                # Fallback: assume pandas
+                import pandas as pd
+                if isinstance(df, pd.DataFrame):
+                    polars_df = pl.from_pandas(df)
+                else:
+                    raise RuntimeError("Arrow bridge not available and input is not pandas DataFrame")
             # Execute using AST
             polars_expr = self._ast_to_polars_expr(ast_tree)
@@ -396,11 +435,17 @@ class PolarsExpressionEngine:
             # Convert back to original format
             result_arrow = result_df.to_arrow()
-            final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
+            if self.arrow_bridge:
+                final_result = self.arrow_bridge.from_arrow(result_arrow, backend_type)
+            else:
+                final_result = pl.from_arrow(result_arrow).to_pandas()
             # Calculate statistics
             execution_time = (datetime.now() - start_time).total_seconds() * 1000
-            memory_after = self.arrow_bridge._get_memory_usage_mb()
+            if self.arrow_bridge:
+                memory_after = self.arrow_bridge._get_memory_usage_mb()
+            else:
+                memory_after = 0
             memory_used = max(0, memory_after - memory_before)
             # Update statistics
@@ -422,7 +467,8 @@ class PolarsExpressionEngine:
             raise PolarsExpressionError(f"AST execution failed: {e}")
         finally:
-            self.arrow_bridge.cleanup_arrow_memory()
+            if self.arrow_bridge:
+                self.arrow_bridge.cleanup_arrow_memory()
     def validate_expression(self, expression: str) -> bool:
         """
@@ -489,7 +535,10 @@ class PolarsExpressionEngine:
             Benchmark results
         """
         times = []
-        backend_type = self.arrow_bridge.detect_backend(df)
+        if self.arrow_bridge:
+            backend_type = self.arrow_bridge.detect_backend(df)
+        else:
+            backend_type = "pandas"
         for i in range(iterations):
             try:
@@ -532,7 +581,8 @@ class PolarsExpressionEngine:
         """Cleanup callback for memory manager"""
         try:
             # Cleanup Arrow bridge memory
-            self.arrow_bridge.cleanup_arrow_memory()
+            if self.arrow_bridge:
+                self.arrow_bridge.cleanup_arrow_memory()
             # Reset statistics if they get too large
             if self.execution_stats["total_executions"] > 10000:

additory/core/registry.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Versioned registry for additory
 from dataclasses import dataclass
+from typing import Optional
 import os
 import json
@@ -26,9 +27,9 @@ class ResolvedFormula:
     source: str
     version: str
     mode: str = "local"
-    ast: dict | None = None
-    sample_clean: dict | None = None
-    sample_unclean: dict | None = None
+    ast: Optional[dict] = None
+    sample_clean: Optional[dict] = None
+    sample_unclean: Optional[dict] = None
 # ------------------------------------------------------------

additory/dynamic_api.py CHANGED Viewed

@@ -15,9 +15,8 @@ class AdditoryAPI(SimpleNamespace):
     Main API class for Additory functionality.
     Provides access to:
-    - add.augment() - Data augmentation
+    - add.synthetic() - Synthetic data generation
     - add.to() - Lookup/join operations
-    - add.synth() - Synthetic data generation
     - add.scan() - Data profiling and analysis
     - add.my - User expressions
     - add.play() - Hidden games (for the curious 😉)
@@ -31,8 +30,15 @@ class AdditoryAPI(SimpleNamespace):
         self.my = ExpressionProxy(namespace="user")
         self._builtin_proxy = ExpressionProxy(namespace="builtin")
-        # Explicitly set the augment method to prevent namespace conflicts
-        self.augment = self._augment_method
+        # Explicitly set methods to prevent namespace conflicts
+        self.synthetic = self._synthetic_method
+        self.deduce = self._deduce_method
+        self.to = self._to_method
+        self.onehotencoding = self._onehotencoding_method
+        self.harmonize_units = self._harmonize_units_method
+        self.scan = self._scan_method
+        self.games = self._games_method
+        self.play = self._play_method
     def __getattr__(self, name):
         """
@@ -61,65 +67,65 @@ class AdditoryAPI(SimpleNamespace):
         except Exception:
             return False
-    def _augment_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
+    def _synthetic_method(self, df, n_rows=5, strategy="auto", seed=None, output_format="pandas", **kwargs):
         """
-        Augment a dataframe with additional rows or create data from scratch.
+        Generate synthetic data by extending a dataframe or creating from scratch.
         Three modes:
-        1. Augment mode: Pass a DataFrame to add rows
+        1. Extend mode: Pass a DataFrame to add synthetic rows
         2. Create mode: Pass "@new" to create data from scratch
         3. Sample mode: Pass "@sample" to load sample data
         Args:
-            df: DataFrame to augment, "@new" to create, or "@sample" for sample data
-            n_rows: Number of rows (int for create/sample, int/float/str for augment)
-            strategy: Strategy specification (dict for create, str/dict for augment)
+            df: DataFrame to extend, "@new" to create, or "@sample" for sample data
+            n_rows: Number of rows (int for create/sample, int/float/str for extend)
+            strategy: Strategy specification (dict for create, str/dict for extend)
             seed: Random seed for reproducibility
             output_format: Output format ("pandas", "polars", "cudf")
             **kwargs: Additional parameters
         Returns:
-            Augmented or generated DataFrame
+            Extended or generated DataFrame
         Examples:
-            # Augment existing data
-            result = add.augment(df, n_rows=100, strategy='auto')
+            # Extend existing data
+            result = add.synthetic(df, n_rows=100, strategy='auto')
             # Create from scratch
-            result = add.augment("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
+            result = add.synthetic("@new", n_rows=100, strategy={'id': 'increment', 'age': 'range:18-65'})
             # Load sample data
-            result = add.augment("@sample", n_rows=50)
+            result = add.synthetic("@sample", n_rows=50)
         """
         # Store reference to restore after import (in the correct namespace)
         import additory
-        original_augment = getattr(additory, 'augment', None)
+        original_synthetic = getattr(additory, 'synthetic', None)
         try:
             # Import and call the implementation
-            from additory.augment.augmentor import augment as augment_impl
-            result = augment_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
-                                 output_format=output_format, **kwargs)
+            from additory.synthetic.synthesizer import synthetic as synthetic_impl
+            result = synthetic_impl(df, n_rows=n_rows, strategy=strategy, seed=seed,
+                                   output_format=output_format, **kwargs)
             # Restore the method reference in the additory module namespace
-            # The import above will have overridden additory.augment with the module
+            # The import above will have overridden additory.synthetic with the module
             # We need to restore it to point to this method
-            if original_augment is not None:
-                additory.augment = original_augment
+            if original_synthetic is not None:
+                additory.synthetic = original_synthetic
             else:
-                # If there was no original augment, set it to this method
-                additory.augment = self._augment_method
+                # If there was no original synthetic, set it to this method
+                additory.synthetic = self._synthetic_method
             return result
         except Exception as e:
             # Restore the method reference even if there's an error
-            if original_augment is not None:
-                additory.augment = original_augment
+            if original_synthetic is not None:
+                additory.synthetic = original_synthetic
             else:
-                additory.augment = self._augment_method
+                additory.synthetic = self._synthetic_method
             raise
-    def to(self, target_df, from_df=None, bring=None, against=None, **kwargs):
+    def _to_method(self, target_df, from_df=None, bring=None, against=None, **kwargs):
         """
         Add columns from reference dataframe to target dataframe.
@@ -140,26 +146,7 @@ class AdditoryAPI(SimpleNamespace):
         from additory.utilities.lookup import to
         return to(target_df, from_df, bring=bring, against=against, **kwargs)
-    def synth(self, schema_path: str, rows: int = 1000, engine: Optional[str] = None):
-        """
-        Generate synthetic data from a schema file.
-        Args:
-            schema_path: Path to the .toml schema file
-            rows: Number of rows to generate (default: 1000)
-            engine: Output engine ("pandas" or "polars"). If None, uses default from config
-        Returns:
-            Generated DataFrame in the specified format
-        Example:
-            df = add.synth("customer.toml", rows=5000)
-            df = add.synth("customer.toml", rows=5000, engine="polars")
-        """
-        from additory.synthetic.api import synth as synth_impl
-        return synth_impl(schema_path, rows, engine)
-    def onehotencoding(self, df, columns=None, **kwargs):
+    def _onehotencoding_method(self, df, columns=None, **kwargs):
         """
         One-hot encode categorical columns.
@@ -174,7 +161,7 @@ class AdditoryAPI(SimpleNamespace):
         from additory.utilities.encoding import onehotencoding
         return onehotencoding(df, column=columns, **kwargs)
-    def harmonize_units(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
+    def _harmonize_units_method(self, df, value_column, unit_column, target_unit=None, position="end", **kwargs):
         """
         Harmonize units in a dataframe.
@@ -196,7 +183,7 @@ class AdditoryAPI(SimpleNamespace):
         from additory.utilities.units import harmonize_units
         return harmonize_units(df, value_column, unit_column, target_unit, position, **kwargs)
-    def scan(
+    def _scan_method(
         self,
         df: Union[pl.DataFrame, pd.DataFrame, Any],
         preset: Optional[str] = None,
@@ -279,7 +266,64 @@ class AdditoryAPI(SimpleNamespace):
             verbose=verbose
         )
-    def play(self, game: str = "tictactoe"):
+    def _deduce_method(
+        self,
+        df: Union[pd.DataFrame, pl.DataFrame, Any],
+        from_column: Union[str, List[str]],
+        to_column: str
+    ) -> Union[pd.DataFrame, pl.DataFrame, Any]:
+        """
+        Deduce missing labels based on text similarity to labeled examples.
+        Uses cosine similarity on TF-IDF vectors. Pure Python, no LLMs, offline-first.
+        Requires at least 3 labeled examples to work.
+        When multiple source columns are provided, they are concatenated with
+        spaces before computing similarity.
+        Args:
+            df: DataFrame with some labeled and some unlabeled rows
+            from_column: Text column(s) to analyze
+                        - str: Single column (e.g., "comment")
+                        - List[str]: Multiple columns (e.g., ["comment", "notes"])
+            to_column: Label column to fill (e.g., "status")
+        Returns:
+            DataFrame with deduced labels filled in
+        Examples:
+            # Single column
+            >>> result = add.deduce(df, from_column="comment", to_column="status")
+            # Multiple columns (better accuracy)
+            >>> result = add.deduce(
+            ...     df,
+            ...     from_column=["comment", "notes", "description"],
+            ...     to_column="status"
+            ... )
+        Privacy: Your data never leaves your machine. No external connections.
+        """
+        from additory.synthetic.deduce import deduce as deduce_impl
+        return deduce_impl(df, from_column, to_column)
+    def _games_method(self):
+        """
+        List available games! 🎮
+        Returns a list of games you can play with add.play().
+        Returns:
+            List of available game names
+        Example:
+            >>> import additory
+            >>> additory.add.games()
+            ['tictactoe', 'sudoku']
+        """
+        return ['tictactoe', 'sudoku']
+    def _play_method(self, game: str = "tictactoe"):
         """
         Play a game! 🎮

additory/expressions/proxy.py CHANGED Viewed

@@ -287,7 +287,10 @@ class EnhancedExpressionProxy:
                 backend_type = "polars"
             else:
                 # Try to detect other types
-                backend_type = self.polars_engine.arrow_bridge.detect_backend(df)
+                if self.polars_engine.arrow_bridge:
+                    backend_type = self.polars_engine.arrow_bridge.detect_backend(df)
+                else:
+                    backend_type = "pandas"  # fallback
             # Execute using Polars engine
             result = self.polars_engine.execute_expression(

additory 0.1.0a2__py3-none-any.whl → 0.1.0a4__py3-none-any.whl

additory 0.1.0a2py3-none-any.whl → 0.1.0a4py3-none-any.whl