ghostfolio-ai-agent 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. ghostfolio_ai_agent-0.2.0/LICENSE +21 -0
  2. ghostfolio_ai_agent-0.2.0/PKG-INFO +273 -0
  3. ghostfolio_ai_agent-0.2.0/README.md +235 -0
  4. ghostfolio_ai_agent-0.2.0/app/__init__.py +1 -0
  5. ghostfolio_ai_agent-0.2.0/app/agent.py +932 -0
  6. ghostfolio_ai_agent-0.2.0/app/config.py +36 -0
  7. ghostfolio_ai_agent-0.2.0/app/data_sources/__init__.py +13 -0
  8. ghostfolio_ai_agent-0.2.0/app/data_sources/base.py +13 -0
  9. ghostfolio_ai_agent-0.2.0/app/data_sources/ghostfolio_api_provider.py +217 -0
  10. ghostfolio_ai_agent-0.2.0/app/data_sources/mock_data/large_mock_data.json +7606 -0
  11. ghostfolio_ai_agent-0.2.0/app/data_sources/mock_file_provider.py +320 -0
  12. ghostfolio_ai_agent-0.2.0/app/data_sources/mock_provider.py +144 -0
  13. ghostfolio_ai_agent-0.2.0/app/ghostfolio_client.py +99 -0
  14. ghostfolio_ai_agent-0.2.0/app/llm.py +151 -0
  15. ghostfolio_ai_agent-0.2.0/app/main.py +145 -0
  16. ghostfolio_ai_agent-0.2.0/app/observability.py +219 -0
  17. ghostfolio_ai_agent-0.2.0/app/schemas.py +45 -0
  18. ghostfolio_ai_agent-0.2.0/app/telemetry.py +123 -0
  19. ghostfolio_ai_agent-0.2.0/app/tool_defs.py +147 -0
  20. ghostfolio_ai_agent-0.2.0/app/tools.py +273 -0
  21. ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/PKG-INFO +273 -0
  22. ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/SOURCES.txt +32 -0
  23. ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/dependency_links.txt +1 -0
  24. ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/requires.txt +15 -0
  25. ghostfolio_ai_agent-0.2.0/ghostfolio_ai_agent.egg-info/top_level.txt +1 -0
  26. ghostfolio_ai_agent-0.2.0/pyproject.toml +68 -0
  27. ghostfolio_ai_agent-0.2.0/setup.cfg +4 -0
  28. ghostfolio_ai_agent-0.2.0/tests/test_agent.py +199 -0
  29. ghostfolio_ai_agent-0.2.0/tests/test_app.py +8 -0
  30. ghostfolio_ai_agent-0.2.0/tests/test_data_sources.py +100 -0
  31. ghostfolio_ai_agent-0.2.0/tests/test_ghostfolio_api_provider.py +256 -0
  32. ghostfolio_ai_agent-0.2.0/tests/test_ghostfolio_client.py +159 -0
  33. ghostfolio_ai_agent-0.2.0/tests/test_llm_agent.py +289 -0
  34. ghostfolio_ai_agent-0.2.0/tests/test_telemetry.py +32 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Leszek Bartkowski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,273 @@
1
+ Metadata-Version: 2.4
2
+ Name: ghostfolio-ai-agent
3
+ Version: 0.2.0
4
+ Summary: AI-powered conversational portfolio assistant for Ghostfolio with tool calling, verification, and evaluation framework
5
+ Author: Leszek Bartkowski
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/leszekbar/ghostfolio-agent
8
+ Project-URL: Repository, https://github.com/leszekbar/ghostfolio-agent
9
+ Project-URL: Issues, https://github.com/leszekbar/ghostfolio-agent/issues
10
+ Keywords: ghostfolio,ai-agent,portfolio,finance,langchain,langgraph
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Financial and Insurance Industry
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Topic :: Office/Business :: Financial
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.11
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: fastapi>=0.116.0
24
+ Requires-Dist: httpx>=0.28.0
25
+ Requires-Dist: langchain-openai>=0.3.0
26
+ Requires-Dist: langchain-anthropic>=0.3.0
27
+ Requires-Dist: langfuse>=2.0.0
28
+ Requires-Dist: langgraph>=0.6.0
29
+ Requires-Dist: pydantic>=2.11.0
30
+ Requires-Dist: pydantic-settings>=2.10.0
31
+ Requires-Dist: streamlit>=1.49.0
32
+ Requires-Dist: uvicorn>=0.35.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=8.4.0; extra == "dev"
35
+ Requires-Dist: pytest-asyncio>=1.1.0; extra == "dev"
36
+ Requires-Dist: ruff>=0.9.0; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ # Ghostfolio AI Agent
40
+
41
+ AI-powered conversational portfolio assistant for [Ghostfolio](https://ghostfol.io). Ask natural-language questions about your portfolio and get verified, fact-grounded responses.
42
+
43
+ ## Architecture
44
+
45
+ ```mermaid
46
+ graph TD
47
+ A[Streamlit Chat UI] -->|HTTP| B[FastAPI Server]
48
+ B --> C{Agent}
49
+ C -->|Primary| D[LLM Mode<br/>OpenRouter / OpenAI / Anthropic]
50
+ C -->|Fallback| E[Rule-Based Mode]
51
+ D --> F[Tool Layer<br/>7 Tools]
52
+ E --> F
53
+ F --> G{Data Source}
54
+ G -->|Testing| H[Mock Provider]
55
+ G -->|Production| I[Ghostfolio API]
56
+ F --> J[Verification Layer]
57
+ J -->|Traces| K[Langfuse]
58
+ ```
59
+
60
+ **Dual-mode agent**: LLM-powered tool calling with automatic rule-based fallback. Every response passes through fact-grounding, disclaimer enforcement, and confidence scoring.
61
+
62
+ ## Features
63
+
64
+ - **7 portfolio tools**: Summary, performance, transactions, accounts, market data, allocation analysis, risk rules
65
+ - **LLM integration**: Configurable model via OpenRouter (GPT, Claude) with direct OpenAI/Anthropic fallback
66
+ - **Verification layer**: Fact grounding, financial disclaimer, trade advice refusal, prompt injection defense
67
+ - **Observability**: Langfuse tracing for tool calls, LLM invocations, and verification
68
+ - **50+ eval test cases**: Deterministic checks + LLM-as-judge scoring
69
+ - **Production-ready**: FastAPI + Streamlit, Railway deployment, CI/CD with linting and evals
70
+
71
+ ## Quick Start
72
+
73
+ ```bash
74
+ # 1. Setup
75
+ python -m venv .venv
76
+ source .venv/bin/activate
77
+ pip install -e ".[dev]"
78
+
79
+ # 2. Configure (optional — works with mock data by default)
80
+ cp .env.example .env # Add API keys if desired
81
+
82
+ # 3. Run API
83
+ uvicorn app.main:app --reload
84
+
85
+ # 4. Run Chat UI (new terminal)
86
+ streamlit run ui/streamlit_app.py
87
+
88
+ # 5. Test
89
+ pytest -v
90
+
91
+ # 6. Run evals
92
+ python evals/run_evals.py
93
+ ```
94
+
95
+ ## Configuration
96
+
97
+ Environment variables (prefix: `GHOSTFOLIO_`):
98
+
99
+ ### Core
100
+ | Variable | Default | Description |
101
+ |----------|---------|-------------|
102
+ | `GHOSTFOLIO_DEFAULT_DATA_SOURCE` | `mock` | `mock` or `ghostfolio_api` |
103
+ | `GHOSTFOLIO_BASE_URL` | `https://ghostfol.io` | Ghostfolio instance URL |
104
+ | `GHOSTFOLIO_REQUEST_TIMEOUT_SECONDS` | `10` | HTTP timeout |
105
+
106
+ ### LLM (OpenRouter — recommended)
107
+
108
+ The easiest way to configure the agent's LLM is via [OpenRouter](https://openrouter.ai), which provides a unified API for multiple providers. Set two env vars:
109
+
110
+ | Variable | Default | Description |
111
+ |----------|---------|-------------|
112
+ | `GHOSTFOLIO_OPENROUTER_API_KEY` | — | OpenRouter API key |
113
+ | `GHOSTFOLIO_AGENT_MODEL` | — | Model to use (see table below) |
114
+
115
+ Available models:
116
+
117
+ | `AGENT_MODEL` value | Routed to |
118
+ |---------------------|-----------|
119
+ | `gpt-o` | `openai/gpt-5.2` |
120
+ | `gpt-mini` | `openai/gpt-5.1-chat` |
121
+ | `claude-sonnet` | `anthropic/claude-sonnet-4.6` |
122
+ | `claude-opus` | `anthropic/claude-opus-4.6` |
123
+
124
+ You can also pass a raw OpenRouter model ID (e.g. `anthropic/claude-haiku-4-5-20251001`) for any model available on OpenRouter.
125
+
126
+ ### LLM (direct API keys — fallback)
127
+
128
+ If OpenRouter is not configured, the agent falls back to direct provider keys:
129
+
130
+ | Variable | Default | Description |
131
+ |----------|---------|-------------|
132
+ | `GHOSTFOLIO_OPENAI_API_KEY` | — | OpenAI API key |
133
+ | `GHOSTFOLIO_OPENAI_MODEL` | `gpt-4.1` | OpenAI model |
134
+ | `GHOSTFOLIO_ANTHROPIC_API_KEY` | — | Anthropic API key |
135
+ | `GHOSTFOLIO_ANTHROPIC_MODEL` | `claude-sonnet-4-20250514` | Anthropic model |
136
+ | `GHOSTFOLIO_LLM_ENABLED` | `true` | Enable/disable LLM mode |
137
+
138
+ **Priority**: OpenRouter > direct OpenAI > direct Anthropic > rule-based fallback.
139
+
140
+ ### Observability
141
+ | Variable | Default | Description |
142
+ |----------|---------|-------------|
143
+ | `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` | — | Langfuse public key |
144
+ | `GHOSTFOLIO_LANGFUSE_SECRET_KEY` | — | Langfuse secret key |
145
+ | `GHOSTFOLIO_LANGFUSE_HOST` | `https://cloud.langfuse.com` | Langfuse host |
146
+
147
+ ### Logging
148
+ | Variable | Default | Description |
149
+ |----------|---------|-------------|
150
+ | `GHOSTFOLIO_LOG_LEVEL` | `INFO` | `DEBUG\|INFO\|WARNING\|ERROR` |
151
+ | `GHOSTFOLIO_LOG_FORMAT` | `json` | `json` or `text` |
152
+
153
+ ## Tools
154
+
155
+ | Tool | Description |
156
+ |------|-------------|
157
+ | `get_portfolio_summary` | Portfolio value, holdings, allocations |
158
+ | `get_performance` | Returns for time ranges (1d, ytd, 1y, 5y, max) |
159
+ | `get_transactions` | Buy/sell activity history |
160
+ | `get_account_details` | Linked brokerage accounts and balances |
161
+ | `get_market_data` | Current prices for stock/ETF symbols |
162
+ | `analyze_allocation` | Sector, region, asset class breakdown + risk flags |
163
+ | `check_risk_rules` | Concentration, diversification, asset class risk checks |
164
+
165
+ ## Verification
166
+
167
+ Every response is verified before delivery:
168
+ - **Fact grounding**: Numerical claims traced to tool output
169
+ - **Disclaimer**: Financial disclaimer on every response
170
+ - **Trade advice refusal**: Buy/sell recommendations politely refused
171
+ - **Prompt injection defense**: Override attempts detected and blocked
172
+ - **Data freshness**: Stale data warnings (>6h old)
173
+ - **Confidence scoring**: 0.4 (low) — 0.95 (high)
174
+
175
+ ## Evaluation
176
+
177
+ ```bash
178
+ # Deterministic evals (50+ test cases, >80% gate)
179
+ python evals/run_evals.py
180
+
181
+ # LLM-as-judge (requires OpenAI key, advisory)
182
+ python evals/llm_judge.py
183
+ ```
184
+
185
+ Categories: happy path (21), edge cases (10), adversarial (12), multi-step (10)
186
+
187
+ ### Multi-Model Comparison
188
+
189
+ Compare agent configurations side-by-side across the full eval dataset. Runs each model against all 53 cases using `MockFileDataProvider` (48 holdings, 577 transactions, 5 accounts), scores with deterministic checks + LLM-as-judge, measures response time, and outputs a ranked comparison table.
190
+
191
+ Available models: `rule-based`, `gpt-o`, `gpt-mini`, `claude-haiku`, `claude-sonnet`, `claude-opus`.
192
+
193
+ ```bash
194
+ # Quick smoke test (no API keys needed)
195
+ python evals/compare_models.py --models rule-based --no-judge
196
+
197
+ # Compare two LLM models with verbose per-case output
198
+ python evals/compare_models.py --models gpt-mini claude-sonnet -v
199
+
200
+ # Full run (all models except opus, with LLM judge)
201
+ python evals/compare_models.py
202
+
203
+ # Include opus (expensive)
204
+ python evals/compare_models.py --include-expensive
205
+
206
+ # Filter by eval category
207
+ python evals/compare_models.py --models gpt-o claude-sonnet --categories happy_path edge_cases
208
+ ```
209
+
210
+ Output includes a ranked summary table, per-category breakdown, and a detailed JSON results file. Ranking composite score: `0.4 × det_pass_rate + 0.4 × (judge/5) + 0.2 × (1 − error_rate)`.
211
+
212
+ ## Deployment (Railway)
213
+
214
+ 1. Create a Railway project from this repo
215
+ 2. Set environment variables:
216
+ - `GHOSTFOLIO_DEFAULT_DATA_SOURCE=mock` (or `ghostfolio_api`)
217
+ - `GHOSTFOLIO_OPENROUTER_API_KEY=sk-or-...` and `GHOSTFOLIO_AGENT_MODEL=claude-sonnet` (recommended)
218
+ - Or `GHOSTFOLIO_OPENAI_API_KEY=sk-...` (direct OpenAI, alternative)
219
+ - `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` / `GHOSTFOLIO_LANGFUSE_SECRET_KEY` (optional)
220
+ 3. Deploy — Railway uses `Procfile`: `web: bash scripts/start.sh`
221
+ 4. The Streamlit UI is the public entrypoint on `$PORT`
222
+
223
+ ## Project Structure
224
+
225
+ ```
226
+ ghostfolio-agent/
227
+ ├── app/
228
+ │ ├── agent.py # Dual-mode LLM + rule-based agent
229
+ │ ├── config.py # Environment-based settings
230
+ │ ├── ghostfolio_client.py # HTTP client with retry
231
+ │ ├── llm.py # LLM factory (OpenRouter/OpenAI/Anthropic)
232
+ │ ├── main.py # FastAPI server
233
+ │ ├── observability.py # Langfuse tracing
234
+ │ ├── schemas.py # Pydantic models
235
+ │ ├── telemetry.py # Structured logging
236
+ │ ├── tool_defs.py # Tool schemas for LLM
237
+ │ ├── tools.py # 7 tool implementations
238
+ │ └── data_sources/
239
+ │ ├── base.py # Provider protocol
240
+ │ ├── mock_provider.py
241
+ │ ├── mock_file_provider.py # Large dataset provider
242
+ │ └── ghostfolio_api_provider.py
243
+ ├── evals/
244
+ │ ├── eval_dataset.json # 50+ test cases
245
+ │ ├── run_evals.py # Deterministic eval runner
246
+ │ ├── llm_judge.py # LLM-as-judge scorer
247
+ │ └── compare_models.py # Multi-model comparison
248
+ ├── tests/ # pytest test suite
249
+ ├── ui/
250
+ │ └── streamlit_app.py # Chat interface
251
+ ├── docs/
252
+ │ ├── architecture.md # Architecture documentation
253
+ │ └── cost_analysis.md # Cost projections
254
+ └── scripts/
255
+ └── start.sh # Railway startup script
256
+ ```
257
+
258
+ ## Development
259
+
260
+ ```bash
261
+ # Run tests
262
+ pytest -v
263
+
264
+ # Lint
265
+ ruff check app/ tests/ evals/
266
+
267
+ # Format
268
+ ruff format app/ tests/ evals/
269
+ ```
270
+
271
+ ## License
272
+
273
+ See [LICENSE](LICENSE).
@@ -0,0 +1,235 @@
1
+ # Ghostfolio AI Agent
2
+
3
+ AI-powered conversational portfolio assistant for [Ghostfolio](https://ghostfol.io). Ask natural-language questions about your portfolio and get verified, fact-grounded responses.
4
+
5
+ ## Architecture
6
+
7
+ ```mermaid
8
+ graph TD
9
+ A[Streamlit Chat UI] -->|HTTP| B[FastAPI Server]
10
+ B --> C{Agent}
11
+ C -->|Primary| D[LLM Mode<br/>OpenRouter / OpenAI / Anthropic]
12
+ C -->|Fallback| E[Rule-Based Mode]
13
+ D --> F[Tool Layer<br/>7 Tools]
14
+ E --> F
15
+ F --> G{Data Source}
16
+ G -->|Testing| H[Mock Provider]
17
+ G -->|Production| I[Ghostfolio API]
18
+ F --> J[Verification Layer]
19
+ J -->|Traces| K[Langfuse]
20
+ ```
21
+
22
+ **Dual-mode agent**: LLM-powered tool calling with automatic rule-based fallback. Every response passes through fact-grounding, disclaimer enforcement, and confidence scoring.
23
+
24
+ ## Features
25
+
26
+ - **7 portfolio tools**: Summary, performance, transactions, accounts, market data, allocation analysis, risk rules
27
+ - **LLM integration**: Configurable model via OpenRouter (GPT, Claude) with direct OpenAI/Anthropic fallback
28
+ - **Verification layer**: Fact grounding, financial disclaimer, trade advice refusal, prompt injection defense
29
+ - **Observability**: Langfuse tracing for tool calls, LLM invocations, and verification
30
+ - **50+ eval test cases**: Deterministic checks + LLM-as-judge scoring
31
+ - **Production-ready**: FastAPI + Streamlit, Railway deployment, CI/CD with linting and evals
32
+
33
+ ## Quick Start
34
+
35
+ ```bash
36
+ # 1. Setup
37
+ python -m venv .venv
38
+ source .venv/bin/activate
39
+ pip install -e ".[dev]"
40
+
41
+ # 2. Configure (optional — works with mock data by default)
42
+ cp .env.example .env # Add API keys if desired
43
+
44
+ # 3. Run API
45
+ uvicorn app.main:app --reload
46
+
47
+ # 4. Run Chat UI (new terminal)
48
+ streamlit run ui/streamlit_app.py
49
+
50
+ # 5. Test
51
+ pytest -v
52
+
53
+ # 6. Run evals
54
+ python evals/run_evals.py
55
+ ```
56
+
57
+ ## Configuration
58
+
59
+ Environment variables (prefix: `GHOSTFOLIO_`):
60
+
61
+ ### Core
62
+ | Variable | Default | Description |
63
+ |----------|---------|-------------|
64
+ | `GHOSTFOLIO_DEFAULT_DATA_SOURCE` | `mock` | `mock` or `ghostfolio_api` |
65
+ | `GHOSTFOLIO_BASE_URL` | `https://ghostfol.io` | Ghostfolio instance URL |
66
+ | `GHOSTFOLIO_REQUEST_TIMEOUT_SECONDS` | `10` | HTTP timeout |
67
+
68
+ ### LLM (OpenRouter — recommended)
69
+
70
+ The easiest way to configure the agent's LLM is via [OpenRouter](https://openrouter.ai), which provides a unified API for multiple providers. Set two env vars:
71
+
72
+ | Variable | Default | Description |
73
+ |----------|---------|-------------|
74
+ | `GHOSTFOLIO_OPENROUTER_API_KEY` | — | OpenRouter API key |
75
+ | `GHOSTFOLIO_AGENT_MODEL` | — | Model to use (see table below) |
76
+
77
+ Available models:
78
+
79
+ | `AGENT_MODEL` value | Routed to |
80
+ |---------------------|-----------|
81
+ | `gpt-o` | `openai/gpt-5.2` |
82
+ | `gpt-mini` | `openai/gpt-5.1-chat` |
83
+ | `claude-sonnet` | `anthropic/claude-sonnet-4.6` |
84
+ | `claude-opus` | `anthropic/claude-opus-4.6` |
85
+
86
+ You can also pass a raw OpenRouter model ID (e.g. `anthropic/claude-haiku-4-5-20251001`) for any model available on OpenRouter.
87
+
88
+ ### LLM (direct API keys — fallback)
89
+
90
+ If OpenRouter is not configured, the agent falls back to direct provider keys:
91
+
92
+ | Variable | Default | Description |
93
+ |----------|---------|-------------|
94
+ | `GHOSTFOLIO_OPENAI_API_KEY` | — | OpenAI API key |
95
+ | `GHOSTFOLIO_OPENAI_MODEL` | `gpt-4.1` | OpenAI model |
96
+ | `GHOSTFOLIO_ANTHROPIC_API_KEY` | — | Anthropic API key |
97
+ | `GHOSTFOLIO_ANTHROPIC_MODEL` | `claude-sonnet-4-20250514` | Anthropic model |
98
+ | `GHOSTFOLIO_LLM_ENABLED` | `true` | Enable/disable LLM mode |
99
+
100
+ **Priority**: OpenRouter > direct OpenAI > direct Anthropic > rule-based fallback.
101
+
102
+ ### Observability
103
+ | Variable | Default | Description |
104
+ |----------|---------|-------------|
105
+ | `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` | — | Langfuse public key |
106
+ | `GHOSTFOLIO_LANGFUSE_SECRET_KEY` | — | Langfuse secret key |
107
+ | `GHOSTFOLIO_LANGFUSE_HOST` | `https://cloud.langfuse.com` | Langfuse host |
108
+
109
+ ### Logging
110
+ | Variable | Default | Description |
111
+ |----------|---------|-------------|
112
+ | `GHOSTFOLIO_LOG_LEVEL` | `INFO` | `DEBUG\|INFO\|WARNING\|ERROR` |
113
+ | `GHOSTFOLIO_LOG_FORMAT` | `json` | `json` or `text` |
114
+
115
+ ## Tools
116
+
117
+ | Tool | Description |
118
+ |------|-------------|
119
+ | `get_portfolio_summary` | Portfolio value, holdings, allocations |
120
+ | `get_performance` | Returns for time ranges (1d, ytd, 1y, 5y, max) |
121
+ | `get_transactions` | Buy/sell activity history |
122
+ | `get_account_details` | Linked brokerage accounts and balances |
123
+ | `get_market_data` | Current prices for stock/ETF symbols |
124
+ | `analyze_allocation` | Sector, region, asset class breakdown + risk flags |
125
+ | `check_risk_rules` | Concentration, diversification, asset class risk checks |
126
+
127
+ ## Verification
128
+
129
+ Every response is verified before delivery:
130
+ - **Fact grounding**: Numerical claims traced to tool output
131
+ - **Disclaimer**: Financial disclaimer on every response
132
+ - **Trade advice refusal**: Buy/sell recommendations politely refused
133
+ - **Prompt injection defense**: Override attempts detected and blocked
134
+ - **Data freshness**: Stale data warnings (>6h old)
135
+ - **Confidence scoring**: 0.4 (low) — 0.95 (high)
136
+
137
+ ## Evaluation
138
+
139
+ ```bash
140
+ # Deterministic evals (50+ test cases, >80% gate)
141
+ python evals/run_evals.py
142
+
143
+ # LLM-as-judge (requires OpenAI key, advisory)
144
+ python evals/llm_judge.py
145
+ ```
146
+
147
+ Categories: happy path (21), edge cases (10), adversarial (12), multi-step (10)
148
+
149
+ ### Multi-Model Comparison
150
+
151
+ Compare agent configurations side-by-side across the full eval dataset. Runs each model against all 53 cases using `MockFileDataProvider` (48 holdings, 577 transactions, 5 accounts), scores with deterministic checks + LLM-as-judge, measures response time, and outputs a ranked comparison table.
152
+
153
+ Available models: `rule-based`, `gpt-o`, `gpt-mini`, `claude-haiku`, `claude-sonnet`, `claude-opus`.
154
+
155
+ ```bash
156
+ # Quick smoke test (no API keys needed)
157
+ python evals/compare_models.py --models rule-based --no-judge
158
+
159
+ # Compare two LLM models with verbose per-case output
160
+ python evals/compare_models.py --models gpt-mini claude-sonnet -v
161
+
162
+ # Full run (all models except opus, with LLM judge)
163
+ python evals/compare_models.py
164
+
165
+ # Include opus (expensive)
166
+ python evals/compare_models.py --include-expensive
167
+
168
+ # Filter by eval category
169
+ python evals/compare_models.py --models gpt-o claude-sonnet --categories happy_path edge_cases
170
+ ```
171
+
172
+ Output includes a ranked summary table, per-category breakdown, and a detailed JSON results file. Ranking composite score: `0.4 × det_pass_rate + 0.4 × (judge/5) + 0.2 × (1 − error_rate)`.
173
+
174
+ ## Deployment (Railway)
175
+
176
+ 1. Create a Railway project from this repo
177
+ 2. Set environment variables:
178
+ - `GHOSTFOLIO_DEFAULT_DATA_SOURCE=mock` (or `ghostfolio_api`)
179
+ - `GHOSTFOLIO_OPENROUTER_API_KEY=sk-or-...` and `GHOSTFOLIO_AGENT_MODEL=claude-sonnet` (recommended)
180
+ - Or `GHOSTFOLIO_OPENAI_API_KEY=sk-...` (direct OpenAI, alternative)
181
+ - `GHOSTFOLIO_LANGFUSE_PUBLIC_KEY` / `GHOSTFOLIO_LANGFUSE_SECRET_KEY` (optional)
182
+ 3. Deploy — Railway uses `Procfile`: `web: bash scripts/start.sh`
183
+ 4. The Streamlit UI is the public entrypoint on `$PORT`
184
+
185
+ ## Project Structure
186
+
187
+ ```
188
+ ghostfolio-agent/
189
+ ├── app/
190
+ │ ├── agent.py # Dual-mode LLM + rule-based agent
191
+ │ ├── config.py # Environment-based settings
192
+ │ ├── ghostfolio_client.py # HTTP client with retry
193
+ │ ├── llm.py # LLM factory (OpenRouter/OpenAI/Anthropic)
194
+ │ ├── main.py # FastAPI server
195
+ │ ├── observability.py # Langfuse tracing
196
+ │ ├── schemas.py # Pydantic models
197
+ │ ├── telemetry.py # Structured logging
198
+ │ ├── tool_defs.py # Tool schemas for LLM
199
+ │ ├── tools.py # 7 tool implementations
200
+ │ └── data_sources/
201
+ │ ├── base.py # Provider protocol
202
+ │ ├── mock_provider.py
203
+ │ ├── mock_file_provider.py # Large dataset provider
204
+ │ └── ghostfolio_api_provider.py
205
+ ├── evals/
206
+ │ ├── eval_dataset.json # 50+ test cases
207
+ │ ├── run_evals.py # Deterministic eval runner
208
+ │ ├── llm_judge.py # LLM-as-judge scorer
209
+ │ └── compare_models.py # Multi-model comparison
210
+ ├── tests/ # pytest test suite
211
+ ├── ui/
212
+ │ └── streamlit_app.py # Chat interface
213
+ ├── docs/
214
+ │ ├── architecture.md # Architecture documentation
215
+ │ └── cost_analysis.md # Cost projections
216
+ └── scripts/
217
+ └── start.sh # Railway startup script
218
+ ```
219
+
220
+ ## Development
221
+
222
+ ```bash
223
+ # Run tests
224
+ pytest -v
225
+
226
+ # Lint
227
+ ruff check app/ tests/ evals/
228
+
229
+ # Format
230
+ ruff format app/ tests/ evals/
231
+ ```
232
+
233
+ ## License
234
+
235
+ See [LICENSE](LICENSE).
@@ -0,0 +1 @@
1
+ """Ghostfolio agent application package."""