ghostfolio-ai-agent 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ghostfolio_ai_agent-0.2.0/LICENSE +21 -0
- ghostfolio_ai_agent-0.2.0/PKG-INFO +273 -0
- ghostfolio_ai_agent-0.2.0/README.md +235 -0
- ghostfolio_ai_agent-0.2.0/app/__init__.py +1 -0
- ghostfolio_ai_agent-0.2.0/app/agent.py +932 -0
- ghostfolio_ai_agent-0.2.0/app/config.py +36 -0
- ghostfolio_ai_agent-0.2.0/app/data_sources/__init__.py +13 -0
- ghostfolio_ai_agent-0.2.0/app/data_sources/base.py +13 -0
- ghostfolio_ai_agent-0.2.0/app/data_sources/ghostfolio_api_provider.py +217 -0
- ghostfolio_ai_agent-0.2.0/app/data_sources/mock_data/large_mock_data.json +7606 -0
- ghostfolio_ai_agent-0.2.0/app/data_sources/mock_file_provider.py +320 -0
- ghostfolio_ai_agent-0.2.0/app/data_sources/mock_provider.py +144 -0
- ghostfolio_ai_agent-0.2.0/app/ghostfolio_client.py +99 -0
- ghostfolio_ai_agent-0.2.0/app/llm.py +151 -0
- ghostfolio_ai_agent-0.2.0/app/main.py +145 -0
- ghostfolio_ai_agent-0.2.0/app/observability.py +219 -0
- ghostfolio_ai_agent-0.2.0/app/schemas.py +45 -0
- ghostfolio_ai_agent-0.2.0/app/telemetry.py +123 -0
- ghostfolio_ai_agent-0.2.0/app/tool_defs.py +147 -0
- ghostfolio_ai_agent-0.2.0/app/tools.py +273 -0
- ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/PKG-INFO +273 -0
- ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/SOURCES.txt +32 -0
- ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/dependency_links.txt +1 -0
- ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/requires.txt +15 -0
- ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/top_level.txt +1 -0
- ghostfolio_ai_agent-0.2.0/pyproject.toml +68 -0
- ghostfolio_ai_agent-0.2.0/setup.cfg +4 -0
- ghostfolio_ai_agent-0.2.0/tests/test_agent.py +199 -0
- ghostfolio_ai_agent-0.2.0/tests/test_app.py +8 -0
- ghostfolio_ai_agent-0.2.0/tests/test_data_sources.py +100 -0
- ghostfolio_ai_agent-0.2.0/tests/test_ghostfolio_api_provider.py +256 -0
- ghostfolio_ai_agent-0.2.0/tests/test_ghostfolio_client.py +159 -0
- ghostfolio_ai_agent-0.2.0/tests/test_llm_agent.py +289 -0
- ghostfolio_ai_agent-0.2.0/tests/test_telemetry.py +32 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Leszek Bartkowski
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: ghostfolio-ai-agent
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: AI-powered conversational portfolio assistant for Ghostfolio with tool calling, verification, and evaluation framework
|
|
5
|
+
Author: Leszek Bartkowski
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/leszekbar/ghostfolio-agent
|
|
8
|
+
Project-URL: Repository, https://github.com/leszekbar/ghostfolio-agent
|
|
9
|
+
Project-URL: Issues, https://github.com/leszekbar/ghostfolio-agent/issues
|
|
10
|
+
Keywords: ghostfolio,ai-agent,portfolio,finance,langchain,langgraph
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.11
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: fastapi>=0.116.0
|
|
24
|
+
Requires-Dist: httpx>=0.28.0
|
|
25
|
+
Requires-Dist: langchain-openai>=0.3.0
|
|
26
|
+
Requires-Dist: langchain-anthropic>=0.3.0
|
|
27
|
+
Requires-Dist: langfuse>=2.0.0
|
|
28
|
+
Requires-Dist: langgraph>=0.6.0
|
|
29
|
+
Requires-Dist: pydantic>=2.11.0
|
|
30
|
+
Requires-Dist: pydantic-settings>=2.10.0
|
|
31
|
+
Requires-Dist: streamlit>=1.49.0
|
|
32
|
+
Requires-Dist: uvicorn>=0.35.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=8.4.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
|
|
36
|
+
Requires-Dist: ruff>=0.9.0; extra == "dev"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# Ghostfolio AI Agent
|
|
40
|
+
|
|
41
|
+
AI-powered conversational portfolio assistant for [Ghostfolio](https://ghostfol.io). Ask natural-language questions about your portfolio and get verified, fact-grounded responses.
|
|
42
|
+
|
|
43
|
+
## Architecture
|
|
44
|
+
|
|
45
|
+
```mermaid
|
|
46
|
+
graph TD
|
|
47
|
+
A[Streamlit Chat UI] -->|HTTP| B[FastAPI Server]
|
|
48
|
+
B --> C{Agent}
|
|
49
|
+
C -->|Primary| D[LLM Mode<br/>OpenRouter / OpenAI / Anthropic]
|
|
50
|
+
C -->|Fallback| E[Rule-Based Mode]
|
|
51
|
+
D --> F[Tool Layer<br/>7 Tools]
|
|
52
|
+
E --> F
|
|
53
|
+
F --> G{Data Source}
|
|
54
|
+
G -->|Testing| H[Mock Provider]
|
|
55
|
+
G -->|Production| I[Ghostfolio API]
|
|
56
|
+
F --> J[Verification Layer]
|
|
57
|
+
J -->|Traces| K[Langfuse]
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Dual-mode agent**: LLM-powered tool calling with automatic rule-based fallback. Every response passes through fact-grounding, disclaimer enforcement, and confidence scoring.
|
|
61
|
+
|
|
62
|
+
## Features
|
|
63
|
+
|
|
64
|
+
- **7 portfolio tools**: Summary, performance, transactions, accounts, market data, allocation analysis, risk rules
|
|
65
|
+
- **LLM integration**: Configurable model via OpenRouter (GPT, Claude) with direct OpenAI/Anthropic fallback
|
|
66
|
+
- **Verification layer**: Fact grounding, financial disclaimer, trade advice refusal, prompt injection defense
|
|
67
|
+
- **Observability**: Langfuse tracing for tool calls, LLM invocations, and verification
|
|
68
|
+
- **50+ eval test cases**: Deterministic checks + LLM-as-judge scoring
|
|
69
|
+
- **Production-ready**: FastAPI + Streamlit, Railway deployment, CI/CD with linting and evals
|
|
70
|
+
|
|
71
|
+
## Quick Start
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
# 1. Setup
|
|
75
|
+
python -m venv .venv
|
|
76
|
+
source .venv/bin/activate
|
|
77
|
+
pip install -e ".[dev]"
|
|
78
|
+
|
|
79
|
+
# 2. Configure (optional — works with mock data by default)
|
|
80
|
+
cp .env.example .env # Add API keys if desired
|
|
81
|
+
|
|
82
|
+
# 3. Run API
|
|
83
|
+
uvicorn app.main:app --reload
|
|
84
|
+
|
|
85
|
+
# 4. Run Chat UI (new terminal)
|
|
86
|
+
streamlit run ui/streamlit_app.py
|
|
87
|
+
|
|
88
|
+
# 5. Test
|
|
89
|
+
pytest -v
|
|
90
|
+
|
|
91
|
+
# 6. Run evals
|
|
92
|
+
python evals/run_evals.py
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Configuration
|
|
96
|
+
|
|
97
|
+
Environment variables (prefix: `GHOSTFOLIO_`):
|
|
98
|
+
|
|
99
|
+
### Core
|
|
100
|
+
| Variable | Default | Description |
|
|
101
|
+
|----------|---------|-------------|
|
|
102
|
+
| `GHOSTFOLIO_DEFAULT_DATA_SOURCE` | `mock` | `mock` or `ghostfolio_api` |
|
|
103
|
+
| `GHOSTFOLIO_BASE_URL` | `https://ghostfol.io` | Ghostfolio instance URL |
|
|
104
|
+
| `GHOSTFOLIO_REQUEST_TIMEOUT_SECONDS` | `10` | HTTP timeout |
|
|
105
|
+
|
|
106
|
+
### LLM (OpenRouter — recommended)
|
|
107
|
+
|
|
108
|
+
The easiest way to configure the agent's LLM is via [OpenRouter](https://openrouter.ai), which provides a unified API for multiple providers. Set two env vars:
|
|
109
|
+
|
|
110
|
+
| Variable | Default | Description |
|
|
111
|
+
|----------|---------|-------------|
|
|
112
|
+
| `GHOSTFOLIO_OPENROUTER_API_KEY` | — | OpenRouter API key |
|
|
113
|
+
| `GHOSTFOLIO_AGENT_MODEL` | — | Model to use (see table below) |
|
|
114
|
+
|
|
115
|
+
Available models:
|
|
116
|
+
|
|
117
|
+
| `AGENT_MODEL` value | Routed to |
|
|
118
|
+
|---------------------|-----------|
|
|
119
|
+
| `gpt-o` | `openai/gpt-5.2` |
|
|
120
|
+
| `gpt-mini` | `openai/gpt-5.1-chat` |
|
|
121
|
+
| `claude-sonnet` | `anthropic/claude-sonnet-4.6` |
|
|
122
|
+
| `claude-opus` | `anthropic/claude-opus-4.6` |
|
|
123
|
+
|
|
124
|
+
You can also pass a raw OpenRouter model ID (e.g. `anthropic/claude-haiku-4-5-20251001`) for any model available on OpenRouter.
|
|
125
|
+
|
|
126
|
+
### LLM (direct API keys — fallback)
|
|
127
|
+
|
|
128
|
+
If OpenRouter is not configured, the agent falls back to direct provider keys:
|
|
129
|
+
|
|
130
|
+
| Variable | Default | Description |
|
|
131
|
+
|----------|---------|-------------|
|
|
132
|
+
| `GHOSTFOLIO_OPENAI_API_KEY` | — | OpenAI API key |
|
|
133
|
+
| `GHOSTFOLIO_OPENAI_MODEL` | `gpt-4.1` | OpenAI model |
|
|
134
|
+
| `GHOSTFOLIO_ANTHROPIC_API_KEY` | — | Anthropic API key |
|
|
135
|
+
| `GHOSTFOLIO_ANTHROPIC_MODEL` | `claude-sonnet-4-20250514` | Anthropic model |
|
|
136
|
+
| `GHOSTFOLIO_LLM_ENABLED` | `true` | Enable/disable LLM mode |
|
|
137
|
+
|
|
138
|
+
**Priority**: OpenRouter > direct OpenAI > direct Anthropic > rule-based fallback.
|
|
139
|
+
|
|
140
|
+
### Observability
|
|
141
|
+
| Variable | Default | Description |
|
|
142
|
+
|----------|---------|-------------|
|
|
143
|
+
| `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` | — | Langfuse public key |
|
|
144
|
+
| `GHOSTFOLIO_LANGFUSE_SECRET_KEY` | — | Langfuse secret key |
|
|
145
|
+
| `GHOSTFOLIO_LANGFUSE_HOST` | `https://cloud.langfuse.com` | Langfuse host |
|
|
146
|
+
|
|
147
|
+
### Logging
|
|
148
|
+
| Variable | Default | Description |
|
|
149
|
+
|----------|---------|-------------|
|
|
150
|
+
| `GHOSTFOLIO_LOG_LEVEL` | `INFO` | `DEBUG\|INFO\|WARNING\|ERROR` |
|
|
151
|
+
| `GHOSTFOLIO_LOG_FORMAT` | `json` | `json` or `text` |
|
|
152
|
+
|
|
153
|
+
## Tools
|
|
154
|
+
|
|
155
|
+
| Tool | Description |
|
|
156
|
+
|------|-------------|
|
|
157
|
+
| `get_portfolio_summary` | Portfolio value, holdings, allocations |
|
|
158
|
+
| `get_performance` | Returns for time ranges (1d, ytd, 1y, 5y, max) |
|
|
159
|
+
| `get_transactions` | Buy/sell activity history |
|
|
160
|
+
| `get_account_details` | Linked brokerage accounts and balances |
|
|
161
|
+
| `get_market_data` | Current prices for stock/ETF symbols |
|
|
162
|
+
| `analyze_allocation` | Sector, region, asset class breakdown + risk flags |
|
|
163
|
+
| `check_risk_rules` | Concentration, diversification, asset class risk checks |
|
|
164
|
+
|
|
165
|
+
## Verification
|
|
166
|
+
|
|
167
|
+
Every response is verified before delivery:
|
|
168
|
+
- **Fact grounding**: Numerical claims traced to tool output
|
|
169
|
+
- **Disclaimer**: Financial disclaimer on every response
|
|
170
|
+
- **Trade advice refusal**: Buy/sell recommendations politely refused
|
|
171
|
+
- **Prompt injection defense**: Override attempts detected and blocked
|
|
172
|
+
- **Data freshness**: Stale data warnings (>6h old)
|
|
173
|
+
- **Confidence scoring**: 0.4 (low) — 0.95 (high)
|
|
174
|
+
|
|
175
|
+
## Evaluation
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# Deterministic evals (50+ test cases, >80% gate)
|
|
179
|
+
python evals/run_evals.py
|
|
180
|
+
|
|
181
|
+
# LLM-as-judge (requires OpenAI key, advisory)
|
|
182
|
+
python evals/llm_judge.py
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
Categories: happy path (21), edge cases (10), adversarial (12), multi-step (10)
|
|
186
|
+
|
|
187
|
+
### Multi-Model Comparison
|
|
188
|
+
|
|
189
|
+
Compare agent configurations side-by-side across the full eval dataset. Runs each model against all 53 cases using `MockFileDataProvider` (48 holdings, 577 transactions, 5 accounts), scores with deterministic checks + LLM-as-judge, measures response time, and outputs a ranked comparison table.
|
|
190
|
+
|
|
191
|
+
Available models: `rule-based`, `gpt-o`, `gpt-mini`, `claude-haiku`, `claude-sonnet`, `claude-opus`.
|
|
192
|
+
|
|
193
|
+
```bash
|
|
194
|
+
# Quick smoke test (no API keys needed)
|
|
195
|
+
python evals/compare_models.py --models rule-based --no-judge
|
|
196
|
+
|
|
197
|
+
# Compare two LLM models with verbose per-case output
|
|
198
|
+
python evals/compare_models.py --models gpt-mini claude-sonnet -v
|
|
199
|
+
|
|
200
|
+
# Full run (all models except opus, with LLM judge)
|
|
201
|
+
python evals/compare_models.py
|
|
202
|
+
|
|
203
|
+
# Include opus (expensive)
|
|
204
|
+
python evals/compare_models.py --include-expensive
|
|
205
|
+
|
|
206
|
+
# Filter by eval category
|
|
207
|
+
python evals/compare_models.py --models gpt-o claude-sonnet --categories happy_path edge_cases
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Output includes a ranked summary table, per-category breakdown, and a detailed JSON results file. Ranking composite score: `0.4 × det_pass_rate + 0.4 × (judge/5) + 0.2 × (1 − error_rate)`.
|
|
211
|
+
|
|
212
|
+
## Deployment (Railway)
|
|
213
|
+
|
|
214
|
+
1. Create a Railway project from this repo
|
|
215
|
+
2. Set environment variables:
|
|
216
|
+
- `GHOSTFOLIO_DEFAULT_DATA_SOURCE=mock` (or `ghostfolio_api`)
|
|
217
|
+
- `GHOSTFOLIO_OPENROUTER_API_KEY=sk-or-...` and `GHOSTFOLIO_AGENT_MODEL=claude-sonnet` (recommended)
|
|
218
|
+
- Or `GHOSTFOLIO_OPENAI_API_KEY=sk-...` (direct OpenAI, alternative)
|
|
219
|
+
- `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` / `GHOSTFOLIO_LANGFUSE_SECRET_KEY` (optional)
|
|
220
|
+
3. Deploy — Railway uses `Procfile`: `web: bash scripts/start.sh`
|
|
221
|
+
4. The Streamlit UI is the public entrypoint on `$PORT`
|
|
222
|
+
|
|
223
|
+
## Project Structure
|
|
224
|
+
|
|
225
|
+
```
|
|
226
|
+
ghostfolio-agent/
|
|
227
|
+
├── app/
|
|
228
|
+
│ ├── agent.py # Dual-mode LLM + rule-based agent
|
|
229
|
+
│ ├── config.py # Environment-based settings
|
|
230
|
+
│ ├── ghostfolio_client.py # HTTP client with retry
|
|
231
|
+
│ ├── llm.py # LLM factory (OpenRouter/OpenAI/Anthropic)
|
|
232
|
+
│ ├── main.py # FastAPI server
|
|
233
|
+
│ ├── observability.py # Langfuse tracing
|
|
234
|
+
│ ├── schemas.py # Pydantic models
|
|
235
|
+
│ ├── telemetry.py # Structured logging
|
|
236
|
+
│ ├── tool_defs.py # Tool schemas for LLM
|
|
237
|
+
│ ├── tools.py # 7 tool implementations
|
|
238
|
+
│ └── data_sources/
|
|
239
|
+
│ ├── base.py # Provider protocol
|
|
240
|
+
│ ├── mock_provider.py
|
|
241
|
+
│ ├── mock_file_provider.py # Large dataset provider
|
|
242
|
+
│ └── ghostfolio_api_provider.py
|
|
243
|
+
├── evals/
|
|
244
|
+
│ ├── eval_dataset.json # 50+ test cases
|
|
245
|
+
│ ├── run_evals.py # Deterministic eval runner
|
|
246
|
+
│ ├── llm_judge.py # LLM-as-judge scorer
|
|
247
|
+
│ └── compare_models.py # Multi-model comparison
|
|
248
|
+
├── tests/ # pytest test suite
|
|
249
|
+
├── ui/
|
|
250
|
+
│ └── streamlit_app.py # Chat interface
|
|
251
|
+
├── docs/
|
|
252
|
+
│ ├── architecture.md # Architecture documentation
|
|
253
|
+
│ └── cost_analysis.md # Cost projections
|
|
254
|
+
└── scripts/
|
|
255
|
+
└── start.sh # Railway startup script
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## Development
|
|
259
|
+
|
|
260
|
+
```bash
|
|
261
|
+
# Run tests
|
|
262
|
+
pytest -v
|
|
263
|
+
|
|
264
|
+
# Lint
|
|
265
|
+
ruff check app/ tests/ evals/
|
|
266
|
+
|
|
267
|
+
# Format
|
|
268
|
+
ruff format app/ tests/ evals/
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
## License
|
|
272
|
+
|
|
273
|
+
See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
# Ghostfolio AI Agent
|
|
2
|
+
|
|
3
|
+
AI-powered conversational portfolio assistant for [Ghostfolio](https://ghostfol.io). Ask natural-language questions about your portfolio and get verified, fact-grounded responses.
|
|
4
|
+
|
|
5
|
+
## Architecture
|
|
6
|
+
|
|
7
|
+
```mermaid
|
|
8
|
+
graph TD
|
|
9
|
+
A[Streamlit Chat UI] -->|HTTP| B[FastAPI Server]
|
|
10
|
+
B --> C{Agent}
|
|
11
|
+
C -->|Primary| D[LLM Mode<br/>OpenRouter / OpenAI / Anthropic]
|
|
12
|
+
C -->|Fallback| E[Rule-Based Mode]
|
|
13
|
+
D --> F[Tool Layer<br/>7 Tools]
|
|
14
|
+
E --> F
|
|
15
|
+
F --> G{Data Source}
|
|
16
|
+
G -->|Testing| H[Mock Provider]
|
|
17
|
+
G -->|Production| I[Ghostfolio API]
|
|
18
|
+
F --> J[Verification Layer]
|
|
19
|
+
J -->|Traces| K[Langfuse]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Dual-mode agent**: LLM-powered tool calling with automatic rule-based fallback. Every response passes through fact-grounding, disclaimer enforcement, and confidence scoring.
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **7 portfolio tools**: Summary, performance, transactions, accounts, market data, allocation analysis, risk rules
|
|
27
|
+
- **LLM integration**: Configurable model via OpenRouter (GPT, Claude) with direct OpenAI/Anthropic fallback
|
|
28
|
+
- **Verification layer**: Fact grounding, financial disclaimer, trade advice refusal, prompt injection defense
|
|
29
|
+
- **Observability**: Langfuse tracing for tool calls, LLM invocations, and verification
|
|
30
|
+
- **50+ eval test cases**: Deterministic checks + LLM-as-judge scoring
|
|
31
|
+
- **Production-ready**: FastAPI + Streamlit, Railway deployment, CI/CD with linting and evals
|
|
32
|
+
|
|
33
|
+
## Quick Start
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
# 1. Setup
|
|
37
|
+
python -m venv .venv
|
|
38
|
+
source .venv/bin/activate
|
|
39
|
+
pip install -e ".[dev]"
|
|
40
|
+
|
|
41
|
+
# 2. Configure (optional — works with mock data by default)
|
|
42
|
+
cp .env.example .env # Add API keys if desired
|
|
43
|
+
|
|
44
|
+
# 3. Run API
|
|
45
|
+
uvicorn app.main:app --reload
|
|
46
|
+
|
|
47
|
+
# 4. Run Chat UI (new terminal)
|
|
48
|
+
streamlit run ui/streamlit_app.py
|
|
49
|
+
|
|
50
|
+
# 5. Test
|
|
51
|
+
pytest -v
|
|
52
|
+
|
|
53
|
+
# 6. Run evals
|
|
54
|
+
python evals/run_evals.py
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Configuration
|
|
58
|
+
|
|
59
|
+
Environment variables (prefix: `GHOSTFOLIO_`):
|
|
60
|
+
|
|
61
|
+
### Core
|
|
62
|
+
| Variable | Default | Description |
|
|
63
|
+
|----------|---------|-------------|
|
|
64
|
+
| `GHOSTFOLIO_DEFAULT_DATA_SOURCE` | `mock` | `mock` or `ghostfolio_api` |
|
|
65
|
+
| `GHOSTFOLIO_BASE_URL` | `https://ghostfol.io` | Ghostfolio instance URL |
|
|
66
|
+
| `GHOSTFOLIO_REQUEST_TIMEOUT_SECONDS` | `10` | HTTP timeout |
|
|
67
|
+
|
|
68
|
+
### LLM (OpenRouter — recommended)
|
|
69
|
+
|
|
70
|
+
The easiest way to configure the agent's LLM is via [OpenRouter](https://openrouter.ai), which provides a unified API for multiple providers. Set two env vars:
|
|
71
|
+
|
|
72
|
+
| Variable | Default | Description |
|
|
73
|
+
|----------|---------|-------------|
|
|
74
|
+
| `GHOSTFOLIO_OPENROUTER_API_KEY` | — | OpenRouter API key |
|
|
75
|
+
| `GHOSTFOLIO_AGENT_MODEL` | — | Model to use (see table below) |
|
|
76
|
+
|
|
77
|
+
Available models:
|
|
78
|
+
|
|
79
|
+
| `AGENT_MODEL` value | Routed to |
|
|
80
|
+
|---------------------|-----------|
|
|
81
|
+
| `gpt-o` | `openai/gpt-5.2` |
|
|
82
|
+
| `gpt-mini` | `openai/gpt-5.1-chat` |
|
|
83
|
+
| `claude-sonnet` | `anthropic/claude-sonnet-4.6` |
|
|
84
|
+
| `claude-opus` | `anthropic/claude-opus-4.6` |
|
|
85
|
+
|
|
86
|
+
You can also pass a raw OpenRouter model ID (e.g. `anthropic/claude-haiku-4-5-20251001`) for any model available on OpenRouter.
|
|
87
|
+
|
|
88
|
+
### LLM (direct API keys — fallback)
|
|
89
|
+
|
|
90
|
+
If OpenRouter is not configured, the agent falls back to direct provider keys:
|
|
91
|
+
|
|
92
|
+
| Variable | Default | Description |
|
|
93
|
+
|----------|---------|-------------|
|
|
94
|
+
| `GHOSTFOLIO_OPENAI_API_KEY` | — | OpenAI API key |
|
|
95
|
+
| `GHOSTFOLIO_OPENAI_MODEL` | `gpt-4.1` | OpenAI model |
|
|
96
|
+
| `GHOSTFOLIO_ANTHROPIC_API_KEY` | — | Anthropic API key |
|
|
97
|
+
| `GHOSTFOLIO_ANTHROPIC_MODEL` | `claude-sonnet-4-20250514` | Anthropic model |
|
|
98
|
+
| `GHOSTFOLIO_LLM_ENABLED` | `true` | Enable/disable LLM mode |
|
|
99
|
+
|
|
100
|
+
**Priority**: OpenRouter > direct OpenAI > direct Anthropic > rule-based fallback.
|
|
101
|
+
|
|
102
|
+
### Observability
|
|
103
|
+
| Variable | Default | Description |
|
|
104
|
+
|----------|---------|-------------|
|
|
105
|
+
| `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` | — | Langfuse public key |
|
|
106
|
+
| `GHOSTFOLIO_LANGFUSE_SECRET_KEY` | — | Langfuse secret key |
|
|
107
|
+
| `GHOSTFOLIO_LANGFUSE_HOST` | `https://cloud.langfuse.com` | Langfuse host |
|
|
108
|
+
|
|
109
|
+
### Logging
|
|
110
|
+
| Variable | Default | Description |
|
|
111
|
+
|----------|---------|-------------|
|
|
112
|
+
| `GHOSTFOLIO_LOG_LEVEL` | `INFO` | `DEBUG\|INFO\|WARNING\|ERROR` |
|
|
113
|
+
| `GHOSTFOLIO_LOG_FORMAT` | `json` | `json` or `text` |
|
|
114
|
+
|
|
115
|
+
## Tools
|
|
116
|
+
|
|
117
|
+
| Tool | Description |
|
|
118
|
+
|------|-------------|
|
|
119
|
+
| `get_portfolio_summary` | Portfolio value, holdings, allocations |
|
|
120
|
+
| `get_performance` | Returns for time ranges (1d, ytd, 1y, 5y, max) |
|
|
121
|
+
| `get_transactions` | Buy/sell activity history |
|
|
122
|
+
| `get_account_details` | Linked brokerage accounts and balances |
|
|
123
|
+
| `get_market_data` | Current prices for stock/ETF symbols |
|
|
124
|
+
| `analyze_allocation` | Sector, region, asset class breakdown + risk flags |
|
|
125
|
+
| `check_risk_rules` | Concentration, diversification, asset class risk checks |
|
|
126
|
+
|
|
127
|
+
## Verification
|
|
128
|
+
|
|
129
|
+
Every response is verified before delivery:
|
|
130
|
+
- **Fact grounding**: Numerical claims traced to tool output
|
|
131
|
+
- **Disclaimer**: Financial disclaimer on every response
|
|
132
|
+
- **Trade advice refusal**: Buy/sell recommendations politely refused
|
|
133
|
+
- **Prompt injection defense**: Override attempts detected and blocked
|
|
134
|
+
- **Data freshness**: Stale data warnings (>6h old)
|
|
135
|
+
- **Confidence scoring**: 0.4 (low) — 0.95 (high)
|
|
136
|
+
|
|
137
|
+
## Evaluation
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
# Deterministic evals (50+ test cases, >80% gate)
|
|
141
|
+
python evals/run_evals.py
|
|
142
|
+
|
|
143
|
+
# LLM-as-judge (requires OpenAI key, advisory)
|
|
144
|
+
python evals/llm_judge.py
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
Categories: happy path (21), edge cases (10), adversarial (12), multi-step (10)
|
|
148
|
+
|
|
149
|
+
### Multi-Model Comparison
|
|
150
|
+
|
|
151
|
+
Compare agent configurations side-by-side across the full eval dataset. Runs each model against all 53 cases using `MockFileDataProvider` (48 holdings, 577 transactions, 5 accounts), scores with deterministic checks + LLM-as-judge, measures response time, and outputs a ranked comparison table.
|
|
152
|
+
|
|
153
|
+
Available models: `rule-based`, `gpt-o`, `gpt-mini`, `claude-haiku`, `claude-sonnet`, `claude-opus`.
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
# Quick smoke test (no API keys needed)
|
|
157
|
+
python evals/compare_models.py --models rule-based --no-judge
|
|
158
|
+
|
|
159
|
+
# Compare two LLM models with verbose per-case output
|
|
160
|
+
python evals/compare_models.py --models gpt-mini claude-sonnet -v
|
|
161
|
+
|
|
162
|
+
# Full run (all models except opus, with LLM judge)
|
|
163
|
+
python evals/compare_models.py
|
|
164
|
+
|
|
165
|
+
# Include opus (expensive)
|
|
166
|
+
python evals/compare_models.py --include-expensive
|
|
167
|
+
|
|
168
|
+
# Filter by eval category
|
|
169
|
+
python evals/compare_models.py --models gpt-o claude-sonnet --categories happy_path edge_cases
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Output includes a ranked summary table, per-category breakdown, and a detailed JSON results file. Ranking composite score: `0.4 × det_pass_rate + 0.4 × (judge/5) + 0.2 × (1 − error_rate)`.
|
|
173
|
+
|
|
174
|
+
## Deployment (Railway)
|
|
175
|
+
|
|
176
|
+
1. Create a Railway project from this repo
|
|
177
|
+
2. Set environment variables:
|
|
178
|
+
- `GHOSTFOLIO_DEFAULT_DATA_SOURCE=mock` (or `ghostfolio_api`)
|
|
179
|
+
- `GHOSTFOLIO_OPENROUTER_API_KEY=sk-or-...` and `GHOSTFOLIO_AGENT_MODEL=claude-sonnet` (recommended)
|
|
180
|
+
- Or `GHOSTFOLIO_OPENAI_API_KEY=sk-...` (direct OpenAI, alternative)
|
|
181
|
+
- `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` / `GHOSTFOLIO_LANGFUSE_SECRET_KEY` (optional)
|
|
182
|
+
3. Deploy — Railway uses `Procfile`: `web: bash scripts/start.sh`
|
|
183
|
+
4. The Streamlit UI is the public entrypoint on `$PORT`
|
|
184
|
+
|
|
185
|
+
## Project Structure
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
ghostfolio-agent/
|
|
189
|
+
├── app/
|
|
190
|
+
│ ├── agent.py # Dual-mode LLM + rule-based agent
|
|
191
|
+
│ ├── config.py # Environment-based settings
|
|
192
|
+
│ ├── ghostfolio_client.py # HTTP client with retry
|
|
193
|
+
│ ├── llm.py # LLM factory (OpenRouter/OpenAI/Anthropic)
|
|
194
|
+
│ ├── main.py # FastAPI server
|
|
195
|
+
│ ├── observability.py # Langfuse tracing
|
|
196
|
+
│ ├── schemas.py # Pydantic models
|
|
197
|
+
│ ├── telemetry.py # Structured logging
|
|
198
|
+
│ ├── tool_defs.py # Tool schemas for LLM
|
|
199
|
+
│ ├── tools.py # 7 tool implementations
|
|
200
|
+
│ └── data_sources/
|
|
201
|
+
│ ├── base.py # Provider protocol
|
|
202
|
+
│ ├── mock_provider.py
|
|
203
|
+
│ ├── mock_file_provider.py # Large dataset provider
|
|
204
|
+
│ └── ghostfolio_api_provider.py
|
|
205
|
+
├── evals/
|
|
206
|
+
│ ├── eval_dataset.json # 50+ test cases
|
|
207
|
+
│ ├── run_evals.py # Deterministic eval runner
|
|
208
|
+
│ ├── llm_judge.py # LLM-as-judge scorer
|
|
209
|
+
│ └── compare_models.py # Multi-model comparison
|
|
210
|
+
├── tests/ # pytest test suite
|
|
211
|
+
├── ui/
|
|
212
|
+
│ └── streamlit_app.py # Chat interface
|
|
213
|
+
├── docs/
|
|
214
|
+
│ ├── architecture.md # Architecture documentation
|
|
215
|
+
│ └── cost_analysis.md # Cost projections
|
|
216
|
+
└── scripts/
|
|
217
|
+
└── start.sh # Railway startup script
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Development
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
# Run tests
|
|
224
|
+
pytest -v
|
|
225
|
+
|
|
226
|
+
# Lint
|
|
227
|
+
ruff check app/ tests/ evals/
|
|
228
|
+
|
|
229
|
+
# Format
|
|
230
|
+
ruff format app/ tests/ evals/
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## License
|
|
234
|
+
|
|
235
|
+
See [LICENSE](LICENSE).
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Ghostfolio agent application package."""
|