phantom-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,4 @@
1
+ # Copy this file to .env and fill in your API key
2
+ # cp .env.example .env
3
+
4
+ ANTHROPIC_API_KEY=sk-ant-api03-your-key-here
@@ -0,0 +1,26 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.11"
19
+
20
+ - name: Install dependencies
21
+ run: |
22
+ python -m pip install --upgrade pip
23
+ pip install ruff
24
+
25
+ - name: Lint with ruff
26
+ run: ruff check .
@@ -0,0 +1,41 @@
1
+ # .env.example is tracked as documentation
2
+ .env
3
+ .env.*
4
+ !.env.example
5
+
6
+ # Byte-compiled / optimized
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+
11
+ # Distribution / packaging
12
+ dist/
13
+ build/
14
+ *.egg-info/
15
+
16
+ # Virtual environments
17
+ .venv/
18
+ venv/
19
+
20
+ # Testing
21
+ .pytest_cache/
22
+ .coverage
23
+ htmlcov/
24
+
25
+ # Type checking / linting
26
+ .mypy_cache/
27
+ .ruff_cache/
28
+
29
+ # IDE
30
+ .idea/
31
+ .vscode/
32
+ *.swp
33
+
34
+ # Claude Code
35
+ .claude/
36
+
37
+ # Dev notes
38
+ TODO.md
39
+
40
+ # Dev folders
41
+ examples/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 James Wirth
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,170 @@
1
+ Metadata-Version: 2.4
2
+ Name: phantom-ai
3
+ Version: 0.1.0
4
+ Summary: Sandboxed data analysis with LLMs, powered by DuckDB
5
+ Project-URL: Homepage, https://github.com/James-Wirth/phantom
6
+ Project-URL: Repository, https://github.com/James-Wirth/phantom
7
+ Project-URL: Issues, https://github.com/James-Wirth/phantom/issues
8
+ Author: James Wirth
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: data-analysis,duckdb,llm,sandbox,security,sql
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Database
20
+ Classifier: Topic :: Scientific/Engineering
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.11
23
+ Requires-Dist: duckdb>=0.9
24
+ Provides-Extra: all
25
+ Requires-Dist: anthropic>=0.40; extra == 'all'
26
+ Requires-Dist: google-genai>=1.0; extra == 'all'
27
+ Requires-Dist: openai>=1.0; extra == 'all'
28
+ Requires-Dist: pandas>=2.0; extra == 'all'
29
+ Requires-Dist: polars>=0.20; extra == 'all'
30
+ Provides-Extra: anthropic
31
+ Requires-Dist: anthropic>=0.40; extra == 'anthropic'
32
+ Provides-Extra: dev
33
+ Requires-Dist: mypy>=1.8; extra == 'dev'
34
+ Requires-Dist: pandas>=2.0; extra == 'dev'
35
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
36
+ Requires-Dist: pytest>=8.0; extra == 'dev'
37
+ Provides-Extra: google
38
+ Requires-Dist: google-genai>=1.0; extra == 'google'
39
+ Provides-Extra: openai
40
+ Requires-Dist: openai>=1.0; extra == 'openai'
41
+ Provides-Extra: pandas
42
+ Requires-Dist: pandas>=2.0; extra == 'pandas'
43
+ Provides-Extra: polars
44
+ Requires-Dist: polars>=0.20; extra == 'polars'
45
+ Description-Content-Type: text/markdown
46
+
47
+ <h1>
48
+ <p align="center">
49
+ <img src="https://raw.githubusercontent.com/James-Wirth/phantom/main/assets/logo.png" alt="Phantom" width="80">
50
+ <br>phantom
51
+ </h1>
52
+ <p align="center">
53
+ Sandboxed data analysis with LLMs (powered by DuckDB).
54
+ <br><br>
55
+ <a href="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml"><img src="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
56
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
57
+ </p>
58
+ </p>
59
+
60
+ Phantom is a Python framework for LLM-assisted data analysis. The LLM doesn't need to see the actual data. Phantom reasons with opaque **semantic references** (`@a3f2`), writes SQL, and executes the queries locally in a sandboxed [DuckDB](https://duckdb.org/) engine.
61
+
62
+ ## Quick Start
63
+
64
+ ```bash
65
+ pip install git+https://github.com/James-Wirth/phantom.git
66
+ ```
67
+
68
+ ```python
69
+ import phantom
70
+
71
+ session = phantom.Session(allowed_dirs=["./data"])
72
+
73
+ chat = phantom.Chat(
74
+ session,
75
+ provider="anthropic",
76
+ model="claude-sonnet-4-20250514",
77
+ system="You are an astrophysicist. Data files are in ./data/.",
78
+ )
79
+
80
+ response = chat.ask(
81
+ "Which habitable-zone exoplanets are within 50 light-years of Earth, "
82
+ "and what kind of stars do they orbit?"
83
+ )
84
+ ```
85
+
86
+ ## How It Works
87
+
88
+ Given two CSV files and the question *"Which habitable-zone exoplanets are within 50 light-years of Earth, and what kind of stars do they orbit?"*, Phantom produces this tool-call trace:
89
+
90
+ ```
91
+ [0] read_csv("exoplanets.csv") → @6a97
92
+ [1] read_csv("stars.csv") → @cc35
93
+ [2] query({p: @6a97}) → @b1a0 -- habitable-zone filter
94
+ [3] query({s: @cc35}) → @f4e2 -- nearby stars (< 50 ly)
95
+ [4] query({hz: @b1a0, nb: @f4e2}) → @31d7 -- join + rank by distance
96
+ [5] export(@31d7) → [{name: "Proxima Cen b", ...}]
97
+ ```
98
+
99
+ The semantic refs (`@6a97`, `@cc35`, ...) compose into a lazy execution graph:
100
+
101
+ ```
102
+ @6a97 → @b1a0 ─┐
103
+ ├→ @31d7
104
+ @cc35 → @f4e2 ─┘
105
+ ```
106
+
107
+ Shared subgraphs are resolved once and cached. The query engine is [DuckDB](https://duckdb.org/), so JOINs, window functions, CTEs, and aggregations all work natively.
108
+
109
+ Claude's answer (abridged):
110
+
111
+ > | Planet | Distance | Star | Spectral type |
112
+ > |:-------|:---------|:-----|:--------------|
113
+ > | Proxima Cen b | 4.2 ly | Proxima Cen | M-dwarf (3,042 K) |
114
+ > | Ross 128 b | 11 ly | Ross 128 | M-dwarf (3,192 K) |
115
+ > | Teegarden b | 12 ly | Teegarden | M-dwarf (2,904 K) |
116
+ > | TRAPPIST-1 e/f/g | 40 ly | TRAPPIST-1 | M-dwarf (2,566 K) |
117
+ >
118
+ > The nearest habitable-zone candidates overwhelmingly orbit **M-dwarf** stars — small, cool, and the most common type in the galaxy.
119
+
120
+ ## LLM Providers
121
+
122
+ Built-in support for **Anthropic**, **OpenAI**, and **Google Gemini**:
123
+
124
+ ```bash
125
+ pip install "phantom[anthropic]"
126
+ pip install "phantom[openai]"
127
+ pip install "phantom[google]"
128
+ ```
129
+
130
+ ```python
131
+ chat = phantom.Chat(
132
+ session,
133
+ provider="anthropic",
134
+ model="claude-sonnet-4-20250514"
135
+ )
136
+ chat = phantom.Chat(
137
+ session,
138
+ provider="openai",
139
+ model="gpt-4o"
140
+ )
141
+ chat = phantom.Chat(
142
+ session,
143
+ provider="google",
144
+ model="gemini-2.0-flash"
145
+ )
146
+ ```
147
+
148
+ Any **OpenAI-compatible** API (Groq, Together, Fireworks, Ollama, vLLM, ...) works via `base_url`:
149
+
150
+ ```python
151
+ chat = phantom.Chat(
152
+ session,
153
+ provider=phantom.OpenAIProvider(
154
+ api_key="...",
155
+ base_url="https://api.groq.com/openai/v1",
156
+ ),
157
+ model="llama-3.1-70b-versatile",
158
+ )
159
+ ```
160
+
161
+ ## Custom Operations
162
+
163
+ Register domain-specific tools alongside the built-ins — the LLM can call them like any other operation:
164
+
165
+ ```python
166
+ @session.op
167
+ def fetch_lightcurve(target: str) -> dict:
168
+ """Fetch a lightcurve from the MAST archive."""
169
+ return mast_api.query(target)
170
+ ```
@@ -0,0 +1,124 @@
1
+ <h1>
2
+ <p align="center">
3
+ <img src="https://raw.githubusercontent.com/James-Wirth/phantom/main/assets/logo.png" alt="Phantom" width="80">
4
+ <br>phantom
5
+ </h1>
6
+ <p align="center">
7
+ Sandboxed data analysis with LLMs (powered by DuckDB).
8
+ <br><br>
9
+ <a href="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml"><img src="https://github.com/James-Wirth/phantom/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
10
+ <a href="LICENSE"><img src="https://img.shields.io/badge/license-MIT-blue.svg" alt="License"></a>
11
+ </p>
12
+ </p>
13
+
14
+ Phantom is a Python framework for LLM-assisted data analysis. The LLM doesn't need to see the actual data. Phantom reasons with opaque **semantic references** (`@a3f2`), writes SQL, and executes the queries locally in a sandboxed [DuckDB](https://duckdb.org/) engine.
15
+
16
+ ## Quick Start
17
+
18
+ ```bash
19
+ pip install git+https://github.com/James-Wirth/phantom.git
20
+ ```
21
+
22
+ ```python
23
+ import phantom
24
+
25
+ session = phantom.Session(allowed_dirs=["./data"])
26
+
27
+ chat = phantom.Chat(
28
+ session,
29
+ provider="anthropic",
30
+ model="claude-sonnet-4-20250514",
31
+ system="You are an astrophysicist. Data files are in ./data/.",
32
+ )
33
+
34
+ response = chat.ask(
35
+ "Which habitable-zone exoplanets are within 50 light-years of Earth, "
36
+ "and what kind of stars do they orbit?"
37
+ )
38
+ ```
39
+
40
+ ## How It Works
41
+
42
+ Given two CSV files and the question *"Which habitable-zone exoplanets are within 50 light-years of Earth, and what kind of stars do they orbit?"*, Phantom produces this tool-call trace:
43
+
44
+ ```
45
+ [0] read_csv("exoplanets.csv") → @6a97
46
+ [1] read_csv("stars.csv") → @cc35
47
+ [2] query({p: @6a97}) → @b1a0 -- habitable-zone filter
48
+ [3] query({s: @cc35}) → @f4e2 -- nearby stars (< 50 ly)
49
+ [4] query({hz: @b1a0, nb: @f4e2}) → @31d7 -- join + rank by distance
50
+ [5] export(@31d7) → [{name: "Proxima Cen b", ...}]
51
+ ```
52
+
53
+ The semantic refs (`@6a97`, `@cc35`, ...) compose into a lazy execution graph:
54
+
55
+ ```
56
+ @6a97 → @b1a0 ─┐
57
+ ├→ @31d7
58
+ @cc35 → @f4e2 ─┘
59
+ ```
60
+
61
+ Shared subgraphs are resolved once and cached. The query engine is [DuckDB](https://duckdb.org/), so JOINs, window functions, CTEs, and aggregations all work natively.
62
+
63
+ Claude's answer (abridged):
64
+
65
+ > | Planet | Distance | Star | Spectral type |
66
+ > |:-------|:---------|:-----|:--------------|
67
+ > | Proxima Cen b | 4.2 ly | Proxima Cen | M-dwarf (3,042 K) |
68
+ > | Ross 128 b | 11 ly | Ross 128 | M-dwarf (3,192 K) |
69
+ > | Teegarden b | 12 ly | Teegarden | M-dwarf (2,904 K) |
70
+ > | TRAPPIST-1 e/f/g | 40 ly | TRAPPIST-1 | M-dwarf (2,566 K) |
71
+ >
72
+ > The nearest habitable-zone candidates overwhelmingly orbit **M-dwarf** stars — small, cool, and the most common type in the galaxy.
73
+
74
+ ## LLM Providers
75
+
76
+ Built-in support for **Anthropic**, **OpenAI**, and **Google Gemini**:
77
+
78
+ ```bash
79
+ pip install "phantom[anthropic]"
80
+ pip install "phantom[openai]"
81
+ pip install "phantom[google]"
82
+ ```
83
+
84
+ ```python
85
+ chat = phantom.Chat(
86
+ session,
87
+ provider="anthropic",
88
+ model="claude-sonnet-4-20250514"
89
+ )
90
+ chat = phantom.Chat(
91
+ session,
92
+ provider="openai",
93
+ model="gpt-4o"
94
+ )
95
+ chat = phantom.Chat(
96
+ session,
97
+ provider="google",
98
+ model="gemini-2.0-flash"
99
+ )
100
+ ```
101
+
102
+ Any **OpenAI-compatible** API (Groq, Together, Fireworks, Ollama, vLLM, ...) works via `base_url`:
103
+
104
+ ```python
105
+ chat = phantom.Chat(
106
+ session,
107
+ provider=phantom.OpenAIProvider(
108
+ api_key="...",
109
+ base_url="https://api.groq.com/openai/v1",
110
+ ),
111
+ model="llama-3.1-70b-versatile",
112
+ )
113
+ ```
114
+
115
+ ## Custom Operations
116
+
117
+ Register domain-specific tools alongside the built-ins — the LLM can call them like any other operation:
118
+
119
+ ```python
120
+ @session.op
121
+ def fetch_lightcurve(target: str) -> dict:
122
+ """Fetch a lightcurve from the MAST archive."""
123
+ return mast_api.query(target)
124
+ ```
Binary file
@@ -0,0 +1,27 @@
1
+ <svg width="70" height="70" viewBox="0 0 70 70" fill="none" xmlns="http://www.w3.org/2000/svg">
2
+
3
+ <defs>
4
+ <linearGradient id="grad" x1="0%" y1="0%" x2="100%" y2="100%">
5
+ <stop offset="0%" stop-color="#0EA5E9"/>
6
+ <stop offset="50%" stop-color="#3B82F6"/>
7
+ <stop offset="100%" stop-color="#6366F1"/>
8
+ </linearGradient>
9
+ <linearGradient id="textGrad" x1="0%" y1="0%" x2="100%" y2="0%">
10
+ <stop offset="0%" stop-color="#0EA5E9"/>
11
+ <stop offset="100%" stop-color="#6366F1"/>
12
+ </linearGradient>
13
+ <filter id="glow" x="-50%" y="-50%" width="200%" height="200%">
14
+ <feGaussianBlur stdDeviation="2" result="blur"/>
15
+ <feMerge>
16
+ <feMergeNode in="blur"/>
17
+ <feMergeNode in="SourceGraphic"/>
18
+ </feMerge>
19
+ </filter>
20
+ </defs>
21
+
22
+ <g transform="translate(2, 2)">
23
+ <rect x="0" y="0" width="66" height="66" rx="18" fill="url(#grad)"/>
24
+ <path d="M33 16C24.2 16 17 23.2 17 32V48C17 50.2 18.8 52 21 52H23V46C23 44.3 24.3 43 26 43C27.7 43 29 44.3 29 46V52H31V42C31 40.3 32.3 39 34 39C35.7 39 37 40.3 37 42V52H39V46C39 44.3 40.3 43 42 43C43.7 43 45 44.3 45 46V52H47C49.2 52 51 50.2 51 48V32C51 23.2 43.8 16 35 16H33Z" fill="white" opacity="0.95"/>
25
+ </g>
26
+
27
+ </svg>
@@ -0,0 +1,108 @@
1
+ """
2
+ Phantom - The semantic-concrete bridge for LLM data pipelines.
3
+
4
+ Phantom uses session-scoped operations for isolation and concurrency safety.
5
+
6
+ Example:
7
+ import phantom
8
+
9
+ # Create a session
10
+ session = phantom.Session()
11
+
12
+ # Register operations with @session.op
13
+ @session.op
14
+ def load(source: str) -> pd.DataFrame:
15
+ return pd.read_parquet(source)
16
+
17
+ @session.op
18
+ def filter(data: pd.DataFrame, condition: str) -> pd.DataFrame:
19
+ return data.query(condition)
20
+
21
+ # Register custom inspectors with @session.inspector
22
+ @session.inspector(pd.DataFrame)
23
+ def inspect_df(df):
24
+ return {"shape": list(df.shape), "columns": list(df.columns)}
25
+
26
+ # Create refs (lazy - nothing executes yet)
27
+ sales = session.ref("load", source="sales.parquet")
28
+ filtered = session.ref("filter", data=sales, condition="amount > 100")
29
+
30
+ # Resolve when needed
31
+ df = session.resolve(filtered)
32
+
33
+ # Get tools for LLM integration
34
+ tools = session.get_tools()
35
+
36
+ # Save and load graphs
37
+ session.save_graph(filtered, "pipeline.json")
38
+ loaded = session.load_graph("pipeline.json")
39
+ """
40
+
41
+ from importlib.metadata import PackageNotFoundError, version
42
+
43
+ try:
44
+ __version__ = version("phantom")
45
+ except PackageNotFoundError:
46
+ __version__ = "0.0.0-dev"
47
+
48
+ from ._chat import Chat, ChatResponse
49
+ from ._errors import CycleError, MaxTurnsError, ResolutionError, TypeValidationError
50
+ from ._operation_set import OperationSet
51
+ from ._providers import (
52
+ AnthropicProvider,
53
+ CallOptions,
54
+ GoogleProvider,
55
+ LLMProvider,
56
+ OpenAIProvider,
57
+ ProviderResponse,
58
+ ProviderToolCall,
59
+ Usage,
60
+ get_provider,
61
+ register_provider,
62
+ )
63
+ from ._ref import Ref
64
+ from ._result import ToolResult
65
+ from ._security import (
66
+ DEFAULT_DENY_PATTERNS,
67
+ FileSizeGuard,
68
+ Guard,
69
+ PathGuard,
70
+ SecurityError,
71
+ SecurityPolicy,
72
+ )
73
+ from ._session import Session
74
+
75
+ __all__ = [
76
+ "__version__",
77
+ # Core types
78
+ "Ref",
79
+ "ToolResult",
80
+ "Session",
81
+ "OperationSet",
82
+ # LLM interface
83
+ "Chat",
84
+ "ChatResponse",
85
+ # Provider interface
86
+ "LLMProvider",
87
+ "AnthropicProvider",
88
+ "OpenAIProvider",
89
+ "GoogleProvider",
90
+ "CallOptions",
91
+ "Usage",
92
+ "ProviderResponse",
93
+ "ProviderToolCall",
94
+ "get_provider",
95
+ "register_provider",
96
+ # Security
97
+ "DEFAULT_DENY_PATTERNS",
98
+ "SecurityError",
99
+ "SecurityPolicy",
100
+ "Guard",
101
+ "PathGuard",
102
+ "FileSizeGuard",
103
+ # Errors
104
+ "ResolutionError",
105
+ "TypeValidationError",
106
+ "CycleError",
107
+ "MaxTurnsError",
108
+ ]