PyPI - phantom-ai - Versions diffs - 0.1.0__tar.gz - Mend

phantom-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

phantom_ai-0.1.0/.env.example +4 -0
phantom_ai-0.1.0/.github/workflows/ci.yml +26 -0
phantom_ai-0.1.0/.gitignore +41 -0
phantom_ai-0.1.0/LICENSE +21 -0
phantom_ai-0.1.0/PKG-INFO +170 -0
phantom_ai-0.1.0/README.md +124 -0
phantom_ai-0.1.0/assets/logo.png +0 -0
phantom_ai-0.1.0/assets/logo.svg +27 -0
phantom_ai-0.1.0/phantom/__init__.py +108 -0
phantom_ai-0.1.0/phantom/_cache.py +256 -0
phantom_ai-0.1.0/phantom/_chat.py +311 -0
phantom_ai-0.1.0/phantom/_data.py +285 -0
phantom_ai-0.1.0/phantom/_errors.py +110 -0
phantom_ai-0.1.0/phantom/_graph.py +83 -0
phantom_ai-0.1.0/phantom/_inspect.py +68 -0
phantom_ai-0.1.0/phantom/_operation_set.py +155 -0
phantom_ai-0.1.0/phantom/_providers.py +644 -0
phantom_ai-0.1.0/phantom/_ref.py +134 -0
phantom_ai-0.1.0/phantom/_registry.py +299 -0
phantom_ai-0.1.0/phantom/_result.py +93 -0
phantom_ai-0.1.0/phantom/_security.py +279 -0
phantom_ai-0.1.0/phantom/_serialize.py +98 -0
phantom_ai-0.1.0/phantom/_session.py +1025 -0
phantom_ai-0.1.0/phantom/_system_prompt.py +129 -0
phantom_ai-0.1.0/phantom/py.typed +0 -0
phantom_ai-0.1.0/pyproject.toml +60 -0
phantom_ai-0.1.0/ruff.toml +12 -0

phantom_ai-0.1.0/.env.example ADDED Viewed

@@ -0,0 +1,4 @@
+# Copy this file to .env and fill in your API key
+# cp .env.example .env
+ANTHROPIC_API_KEY=sk-ant-api03-your-key-here

phantom_ai-0.1.0/.github/workflows/ci.yml ADDED Viewed

@@ -0,0 +1,26 @@
+name: CI
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ruff
+      - name: Lint with ruff
+        run: ruff check .

phantom_ai-0.1.0/.gitignore ADDED Viewed

@@ -0,0 +1,41 @@
+# .env.example is tracked as documentation
+.env
+.env.*
+!.env.example
+# Byte-compiled / optimized
+__pycache__/
+*.py[cod]
+*$py.class
+# Distribution / packaging
+dist/
+build/
+*.egg-info/
+# Virtual environments
+.venv/
+venv/
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+# Type checking / linting
+.mypy_cache/
+.ruff_cache/
+# IDE
+.idea/
+.vscode/
+*.swp
+# Claude Code
+.claude/
+# Dev notes
+TODO.md
+# Dev folders
+examples/

phantom_ai-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 James Wirth
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

phantom_ai-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,170 @@
+Metadata-Version: 2.4
+Name: phantom-ai
+Version: 0.1.0
+Summary: Sandboxed data analysis with LLMs, powered by DuckDB
+Project-URL: Homepage, https://github.com/James-Wirth/phantom
+Project-URL: Repository, https://github.com/James-Wirth/phantom
+Project-URL: Issues, https://github.com/James-Wirth/phantom/issues
+Author: James Wirth
+License-Expression: MIT
+License-File: LICENSE
+Keywords: data-analysis,duckdb,llm,sandbox,security,sql
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Topic :: Database
+Classifier: Topic :: Scientific/Engineering
+Classifier: Typing :: Typed
+Requires-Python: >=3.11
+Requires-Dist: duckdb>=0.9
+Provides-Extra: all
+Requires-Dist: anthropic>=0.40; extra == 'all'
+Requires-Dist: google-genai>=1.0; extra == 'all'
+Requires-Dist: openai>=1.0; extra == 'all'
+Requires-Dist: pandas>=2.0; extra == 'all'
+Requires-Dist: polars>=0.20; extra == 'all'
+Provides-Extra: anthropic
+Requires-Dist: anthropic>=0.40; extra == 'anthropic'
+Provides-Extra: dev
+Requires-Dist: mypy>=1.8; extra == 'dev'
+Requires-Dist: pandas>=2.0; extra == 'dev'
+Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest>=8.0; extra == 'dev'
+Provides-Extra: google
+Requires-Dist: google-genai>=1.0; extra == 'google'
+Provides-Extra: openai
+Requires-Dist: openai>=1.0; extra == 'openai'
+Provides-Extra: pandas
+Requires-Dist: pandas>=2.0; extra == 'pandas'
+Provides-Extra: polars
+Requires-Dist: polars>=0.20; extra == 'polars'
+Description-Content-Type: text/markdown
+<h1>
+<p align="center">
+  <img src="https://raw.githubusercontent.com/James-Wirth/phantom/main/assets/logo.png" alt="Phantom" width="80">
+  <br>phantom
+</h1>
+  <p align="center">
+    Sandboxed data analysis with LLMs (powered by DuckDB).
+    <br><br>
+    <a href="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml"><img src="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
+    <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
+  </p>
+</p>
+Phantom is a Python framework for LLM-assisted data analysis. The LLM doesn't need to see the actual data. Phantom reasons with opaque **semantic references** (`@a3f2`), writes SQL, and executes the queries locally in a sandboxed [DuckDB](https://duckdb.org/) engine.
+## Quick Start
+```bash
+pip install git+https://github.com/James-Wirth/phantom.git
+```
+```python
+import phantom
+session = phantom.Session(allowed_dirs=["./data"])
+chat = phantom.Chat(
+    session,
+    provider="anthropic",
+    model="claude-sonnet-4-20250514",
+    system="You are an astrophysicist. Data files are in ./data/.",
+)
+response = chat.ask(
+    "Which habitable-zone exoplanets are within 50 light-years of Earth, "
+    "and what kind of stars do they orbit?"
+)
+```
+## How It Works
+Given two CSV files and the question *"Which habitable-zone exoplanets are within 50 light-years of Earth, and what kind of stars do they orbit?"*, Phantom produces this tool-call trace:
+```
+[0] read_csv("exoplanets.csv")            → @6a97
+[1] read_csv("stars.csv")                 → @cc35
+[2] query({p: @6a97})                     → @b1a0  -- habitable-zone filter
+[3] query({s: @cc35})                     → @f4e2  -- nearby stars (< 50 ly)
+[4] query({hz: @b1a0, nb: @f4e2})         → @31d7  -- join + rank by distance
+[5] export(@31d7)                         → [{name: "Proxima Cen b", ...}]
+```
+The semantic refs (`@6a97`, `@cc35`, ...) compose into a lazy execution graph:
+```
+@6a97 → @b1a0 ─┐
+                ├→ @31d7
+@cc35 → @f4e2 ─┘
+```
+Shared subgraphs are resolved once and cached. The query engine is [DuckDB](https://duckdb.org/), so JOINs, window functions, CTEs, and aggregations all work natively.
+Claude's answer (abridged):
+> | Planet | Distance | Star | Spectral type |
+> |:-------|:---------|:-----|:--------------|
+> | Proxima Cen b | 4.2 ly | Proxima Cen | M-dwarf (3,042 K) |
+> | Ross 128 b | 11 ly | Ross 128 | M-dwarf (3,192 K) |
+> | Teegarden b | 12 ly | Teegarden | M-dwarf (2,904 K) |
+> | TRAPPIST-1 e/f/g | 40 ly | TRAPPIST-1 | M-dwarf (2,566 K) |
+>
+> The nearest habitable-zone candidates overwhelmingly orbit **M-dwarf** stars — small, cool, and the most common type in the galaxy.
+## LLM Providers
+Built-in support for **Anthropic**, **OpenAI**, and **Google Gemini**:
+```bash
+pip install "phantom[anthropic]"
+pip install "phantom[openai]"
+pip install "phantom[google]"
+```
+```python
+chat = phantom.Chat(
+    session,
+    provider="anthropic",
+    model="claude-sonnet-4-20250514"
+)
+chat = phantom.Chat(
+    session,
+    provider="openai",
+    model="gpt-4o"
+)
+chat = phantom.Chat(
+    session,
+    provider="google",
+    model="gemini-2.0-flash"
+)
+```
+Any **OpenAI-compatible** API (Groq, Together, Fireworks, Ollama, vLLM, ...) works via `base_url`:
+```python
+chat = phantom.Chat(
+    session,
+    provider=phantom.OpenAIProvider(
+        api_key="...",
+        base_url="https://api.groq.com/openai/v1",
+    ),
+    model="llama-3.1-70b-versatile",
+)
+```
+## Custom Operations
+Register domain-specific tools alongside the built-ins — the LLM can call them like any other operation:
+```python
+@session.op
+def fetch_lightcurve(target: str) -> dict:
+    """Fetch a lightcurve from the MAST archive."""
+    return mast_api.query(target)
+```

phantom_ai-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,124 @@
+<h1>
+<p align="center">
+  <img src="https://raw.githubusercontent.com/James-Wirth/phantom/main/assets/logo.png" alt="Phantom" width="80">
+  <br>phantom
+</h1>
+  <p align="center">
+    Sandboxed data analysis with LLMs (powered by DuckDB).
+    <br><br>
+    <a href="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml"><img src="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
+    <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
+  </p>
+</p>
+Phantom is a Python framework for LLM-assisted data analysis. The LLM doesn't need to see the actual data. Phantom reasons with opaque **semantic references** (`@a3f2`), writes SQL, and executes the queries locally in a sandboxed [DuckDB](https://duckdb.org/) engine.
+## Quick Start
+```bash
+pip install git+https://github.com/James-Wirth/phantom.git
+```
+```python
+import phantom
+session = phantom.Session(allowed_dirs=["./data"])
+chat = phantom.Chat(
+    session,
+    provider="anthropic",
+    model="claude-sonnet-4-20250514",
+    system="You are an astrophysicist. Data files are in ./data/.",
+)
+response = chat.ask(
+    "Which habitable-zone exoplanets are within 50 light-years of Earth, "
+    "and what kind of stars do they orbit?"
+)
+```
+## How It Works
+Given two CSV files and the question *"Which habitable-zone exoplanets are within 50 light-years of Earth, and what kind of stars do they orbit?"*, Phantom produces this tool-call trace:
+```
+[0] read_csv("exoplanets.csv")            → @6a97
+[1] read_csv("stars.csv")                 → @cc35
+[2] query({p: @6a97})                     → @b1a0  -- habitable-zone filter
+[3] query({s: @cc35})                     → @f4e2  -- nearby stars (< 50 ly)
+[4] query({hz: @b1a0, nb: @f4e2})         → @31d7  -- join + rank by distance
+[5] export(@31d7)                         → [{name: "Proxima Cen b", ...}]
+```
+The semantic refs (`@6a97`, `@cc35`, ...) compose into a lazy execution graph:
+```
+@6a97 → @b1a0 ─┐
+                ├→ @31d7
+@cc35 → @f4e2 ─┘
+```
+Shared subgraphs are resolved once and cached. The query engine is [DuckDB](https://duckdb.org/), so JOINs, window functions, CTEs, and aggregations all work natively.
+Claude's answer (abridged):
+> | Planet | Distance | Star | Spectral type |
+> |:-------|:---------|:-----|:--------------|
+> | Proxima Cen b | 4.2 ly | Proxima Cen | M-dwarf (3,042 K) |
+> | Ross 128 b | 11 ly | Ross 128 | M-dwarf (3,192 K) |
+> | Teegarden b | 12 ly | Teegarden | M-dwarf (2,904 K) |
+> | TRAPPIST-1 e/f/g | 40 ly | TRAPPIST-1 | M-dwarf (2,566 K) |
+>
+> The nearest habitable-zone candidates overwhelmingly orbit **M-dwarf** stars — small, cool, and the most common type in the galaxy.
+## LLM Providers
+Built-in support for **Anthropic**, **OpenAI**, and **Google Gemini**:
+```bash
+pip install "phantom[anthropic]"
+pip install "phantom[openai]"
+pip install "phantom[google]"
+```
+```python
+chat = phantom.Chat(
+    session,
+    provider="anthropic",
+    model="claude-sonnet-4-20250514"
+)
+chat = phantom.Chat(
+    session,
+    provider="openai",
+    model="gpt-4o"
+)
+chat = phantom.Chat(
+    session,
+    provider="google",
+    model="gemini-2.0-flash"
+)
+```
+Any **OpenAI-compatible** API (Groq, Together, Fireworks, Ollama, vLLM, ...) works via `base_url`:
+```python
+chat = phantom.Chat(
+    session,
+    provider=phantom.OpenAIProvider(
+        api_key="...",
+        base_url="https://api.groq.com/openai/v1",
+    ),
+    model="llama-3.1-70b-versatile",
+)
+```
+## Custom Operations
+Register domain-specific tools alongside the built-ins — the LLM can call them like any other operation:
+```python
+@session.op
+def fetch_lightcurve(target: str) -> dict:
+    """Fetch a lightcurve from the MAST archive."""
+    return mast_api.query(target)
+```

phantom_ai-0.1.0/assets/logo.png ADDED Viewed

Binary file

phantom_ai-0.1.0/assets/logo.svg ADDED Viewed

@@ -0,0 +1,27 @@
+<svg width="70" height="70" viewBox="0 0 70 70" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <defs>
+    <linearGradient id="grad" x1="0%" y1="0%" x2="100%" y2="100%">
+      <stop offset="0%" stop-color="#0EA5E9"/>
+      <stop offset="50%" stop-color="#3B82F6"/>
+      <stop offset="100%" stop-color="#6366F1"/>
+    </linearGradient>
+    <linearGradient id="textGrad" x1="0%" y1="0%" x2="100%" y2="0%">
+      <stop offset="0%" stop-color="#0EA5E9"/>
+      <stop offset="100%" stop-color="#6366F1"/>
+    </linearGradient>
+    <filter id="glow" x="-50%" y="-50%" width="200%" height="200%">
+      <feGaussianBlur stdDeviation="2" result="blur"/>
+      <feMerge>
+        <feMergeNode in="blur"/>
+        <feMergeNode in="SourceGraphic"/>
+      </feMerge>
+    </filter>
+  </defs>
+  <g transform="translate(2, 2)">
+    <rect x="0" y="0" width="66" height="66" rx="18" fill="url(#grad)"/>
+    <path d="M33 16C24.2 16 17 23.2 17 32V48C17 50.2 18.8 52 21 52H23V46C23 44.3 24.3 43 26 43C27.7 43 29 44.3 29 46V52H31V42C31 40.3 32.3 39 34 39C35.7 39 37 40.3 37 42V52H39V46C39 44.3 40.3 43 42 43C43.7 43 45 44.3 45 46V52H47C49.2 52 51 50.2 51 48V32C51 23.2 43.8 16 35 16H33Z" fill="white" opacity="0.95"/>
+  </g>
+</svg>

phantom_ai-0.1.0/phantom/__init__.py ADDED Viewed

@@ -0,0 +1,108 @@
+"""
+Phantom - The semantic-concrete bridge for LLM data pipelines.
+Phantom uses session-scoped operations for isolation and concurrency safety.
+Example:
+    import phantom
+    # Create a session
+    session = phantom.Session()
+    # Register operations with @session.op
+    @session.op
+    def load(source: str) -> pd.DataFrame:
+        return pd.read_parquet(source)
+    @session.op
+    def filter(data: pd.DataFrame, condition: str) -> pd.DataFrame:
+        return data.query(condition)
+    # Register custom inspectors with @session.inspector
+    @session.inspector(pd.DataFrame)
+    def inspect_df(df):
+        return {"shape": list(df.shape), "columns": list(df.columns)}
+    # Create refs (lazy - nothing executes yet)
+    sales = session.ref("load", source="sales.parquet")
+    filtered = session.ref("filter", data=sales, condition="amount > 100")
+    # Resolve when needed
+    df = session.resolve(filtered)
+    # Get tools for LLM integration
+    tools = session.get_tools()
+    # Save and load graphs
+    session.save_graph(filtered, "pipeline.json")
+    loaded = session.load_graph("pipeline.json")
+"""
+from importlib.metadata import PackageNotFoundError, version
+try:
+    __version__ = version("phantom")
+except PackageNotFoundError:
+    __version__ = "0.0.0-dev"
+from ._chat import Chat, ChatResponse
+from ._errors import CycleError, MaxTurnsError, ResolutionError, TypeValidationError
+from ._operation_set import OperationSet
+from ._providers import (
+    AnthropicProvider,
+    CallOptions,
+    GoogleProvider,
+    LLMProvider,
+    OpenAIProvider,
+    ProviderResponse,
+    ProviderToolCall,
+    Usage,
+    get_provider,
+    register_provider,
+)
+from ._ref import Ref
+from ._result import ToolResult
+from ._security import (
+    DEFAULT_DENY_PATTERNS,
+    FileSizeGuard,
+    Guard,
+    PathGuard,
+    SecurityError,
+    SecurityPolicy,
+)
+from ._session import Session
+__all__ = [
+    "__version__",
+    # Core types
+    "Ref",
+    "ToolResult",
+    "Session",
+    "OperationSet",
+    # LLM interface
+    "Chat",
+    "ChatResponse",
+    # Provider interface
+    "LLMProvider",
+    "AnthropicProvider",
+    "OpenAIProvider",
+    "GoogleProvider",
+    "CallOptions",
+    "Usage",
+    "ProviderResponse",
+    "ProviderToolCall",
+    "get_provider",
+    "register_provider",
+    # Security
+    "DEFAULT_DENY_PATTERNS",
+    "SecurityError",
+    "SecurityPolicy",
+    "Guard",
+    "PathGuard",
+    "FileSizeGuard",
+    # Errors
+    "ResolutionError",
+    "TypeValidationError",
+    "CycleError",
+    "MaxTurnsError",
+]