agent-panorama 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_panorama-0.1.0/.github/workflows/ci.yml +40 -0
- agent_panorama-0.1.0/.github/workflows/publish.yml +29 -0
- agent_panorama-0.1.0/.gitignore +32 -0
- agent_panorama-0.1.0/LICENSE +21 -0
- agent_panorama-0.1.0/PKG-INFO +211 -0
- agent_panorama-0.1.0/README.md +182 -0
- agent_panorama-0.1.0/assets/logo.png +0 -0
- agent_panorama-0.1.0/config.example.yaml +27 -0
- agent_panorama-0.1.0/examples/langfuse_traces.json +143 -0
- agent_panorama-0.1.0/examples/langsmith_runs.json +56 -0
- agent_panorama-0.1.0/pyproject.toml +63 -0
- agent_panorama-0.1.0/src/agent_panorama/__init__.py +32 -0
- agent_panorama-0.1.0/src/agent_panorama/analysis.py +173 -0
- agent_panorama-0.1.0/src/agent_panorama/cli.py +84 -0
- agent_panorama-0.1.0/src/agent_panorama/config.py +90 -0
- agent_panorama-0.1.0/src/agent_panorama/core.py +76 -0
- agent_panorama-0.1.0/src/agent_panorama/models.py +146 -0
- agent_panorama-0.1.0/src/agent_panorama/parsers/__init__.py +34 -0
- agent_panorama-0.1.0/src/agent_panorama/parsers/common.py +240 -0
- agent_panorama-0.1.0/src/agent_panorama/parsers/langfuse.py +290 -0
- agent_panorama-0.1.0/src/agent_panorama/parsers/langsmith.py +163 -0
- agent_panorama-0.1.0/src/agent_panorama/render.py +82 -0
- agent_panorama-0.1.0/src/agent_panorama/templates/report.html.j2 +172 -0
- agent_panorama-0.1.0/src/agent_panorama/templates/report.md.j2 +65 -0
- agent_panorama-0.1.0/tests/_bootstrap.py +12 -0
- agent_panorama-0.1.0/tests/conftest.py +10 -0
- agent_panorama-0.1.0/tests/run_all_tests.py +24 -0
- agent_panorama-0.1.0/tests/test_analysis.py +84 -0
- agent_panorama-0.1.0/tests/test_common.py +73 -0
- agent_panorama-0.1.0/tests/test_core_cli.py +64 -0
- agent_panorama-0.1.0/tests/test_parsers.py +67 -0
- agent_panorama-0.1.0/tests/test_render.py +66 -0
- agent_panorama-0.1.0/uv.lock +584 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
permissions:
|
|
10
|
+
contents: read
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
test:
|
|
14
|
+
name: Lint & test (py${{ matrix.python-version }})
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- name: Check out repository
|
|
23
|
+
uses: actions/checkout@v4
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
uses: astral-sh/setup-uv@v5
|
|
27
|
+
with:
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Sync dependencies
|
|
31
|
+
run: uv sync --extra dev --python ${{ matrix.python-version }}
|
|
32
|
+
|
|
33
|
+
- name: Ruff lint
|
|
34
|
+
run: uv run ruff check src tests
|
|
35
|
+
|
|
36
|
+
- name: Ruff format check
|
|
37
|
+
run: uv run ruff format --check src tests
|
|
38
|
+
|
|
39
|
+
- name: Run tests
|
|
40
|
+
run: uv run python tests/run_all_tests.py
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
name: Build and publish to PyPI
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment: pypi
|
|
15
|
+
permissions:
|
|
16
|
+
id-token: write # required for PyPI Trusted Publishing (OIDC)
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- name: Check out repository
|
|
20
|
+
uses: actions/checkout@v4
|
|
21
|
+
|
|
22
|
+
- name: Install uv
|
|
23
|
+
uses: astral-sh/setup-uv@v5
|
|
24
|
+
|
|
25
|
+
- name: Build sdist and wheel
|
|
26
|
+
run: uv build
|
|
27
|
+
|
|
28
|
+
- name: Publish to PyPI (trusted publishing)
|
|
29
|
+
run: uv publish
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
.eggs/
|
|
6
|
+
build/
|
|
7
|
+
dist/
|
|
8
|
+
.venv/
|
|
9
|
+
venv/
|
|
10
|
+
|
|
11
|
+
# Tooling
|
|
12
|
+
.mypy_cache/
|
|
13
|
+
.ruff_cache/
|
|
14
|
+
.pytest_cache/
|
|
15
|
+
|
|
16
|
+
# Generated reports
|
|
17
|
+
report.md
|
|
18
|
+
report.html
|
|
19
|
+
/report/
|
|
20
|
+
|
|
21
|
+
# Real trace data dropped in for local validation (keep the instructions file)
|
|
22
|
+
/traces/*
|
|
23
|
+
!/traces/PUT_YOUR_TRACE_HERE.md
|
|
24
|
+
|
|
25
|
+
# Never commit raw trace exports anywhere in the tree (may contain private data)
|
|
26
|
+
trace-*.json
|
|
27
|
+
*trace_export*.json
|
|
28
|
+
|
|
29
|
+
# OS / editors
|
|
30
|
+
.DS_Store
|
|
31
|
+
.idea/
|
|
32
|
+
.vscode/
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 agent-panorama contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-panorama
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Turn Langfuse/LangSmith agent traces into human-readable Agent Activity Reports (Markdown + HTML).
|
|
5
|
+
Project-URL: Homepage, https://github.com/Idank96/agent-panorama
|
|
6
|
+
Project-URL: Repository, https://github.com/Idank96/agent-panorama
|
|
7
|
+
Project-URL: Issues, https://github.com/Idank96/agent-panorama/issues
|
|
8
|
+
Author: agent-panorama contributors
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: agents,langfuse,langsmith,llm,observability,reporting
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Requires-Dist: click>=8.1
|
|
21
|
+
Requires-Dist: jinja2>=3.1
|
|
22
|
+
Requires-Dist: pyyaml>=6.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
27
|
+
Requires-Dist: types-pyyaml>=6.0; extra == 'dev'
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
<p align="center">
|
|
31
|
+
<img src="assets/logo.png" alt="agent-panorama" width="320">
|
|
32
|
+
</p>
|
|
33
|
+
|
|
34
|
+
<h1 align="center">agent-panorama</h1>
|
|
35
|
+
|
|
36
|
+
<p align="center">
|
|
37
|
+
<a href="https://github.com/Idank96/agent-panorama/actions/workflows/ci.yml"><img src="https://github.com/Idank96/agent-panorama/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
38
|
+
</p>
|
|
39
|
+
|
|
40
|
+
Turn raw LLM agent traces into a **human-readable Agent Activity Report** that a
|
|
41
|
+
non-engineer can actually read. Point it at a Langfuse (or LangSmith) trace
|
|
42
|
+
export and get clean Markdown + a self-contained HTML report that explains, in
|
|
43
|
+
business language, what your agents did, what they decided, and anything that
|
|
44
|
+
looks off.
|
|
45
|
+
|
|
46
|
+
## Why
|
|
47
|
+
|
|
48
|
+
Traces are great for engineers and terrible for everyone else. `agent-panorama`
|
|
49
|
+
translates tool calls, retries, token usage, and errors into plain English. It
|
|
50
|
+
also pulls the real user request and final answer out of LangGraph/LangChain
|
|
51
|
+
`messages` payloads, so the report reads like a story, not a JSON dump:
|
|
52
|
+
|
|
53
|
+
- `get_weather({"city": "Paris"})` → **"Looked up the weather"**
|
|
54
|
+
- 3 failed model calls → **"High retry count: 3 failed attempts before completing."**
|
|
55
|
+
- `human_handoff(...)` → run outcome **human-escalated**
|
|
56
|
+
|
|
57
|
+
> Cost/USD estimation is intentionally out of scope for now — the report reports
|
|
58
|
+
> token usage, not dollars.
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install agent-panorama
|
|
64
|
+
# or, for local development:
|
|
65
|
+
uv pip install -e ".[dev]"
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Requires Python 3.10+. Dependencies are intentionally minimal: `click`,
|
|
69
|
+
`jinja2`, `pyyaml`.
|
|
70
|
+
|
|
71
|
+
## CLI usage
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
agent-panorama generate --input traces.json --output ./report --format html
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Options:
|
|
78
|
+
|
|
79
|
+
| Option | Description |
|
|
80
|
+
| --- | --- |
|
|
81
|
+
| `--input` | Path to the Langfuse/LangSmith JSON export (required). |
|
|
82
|
+
| `--output` | Output directory (default `./report`). |
|
|
83
|
+
| `--format` | `md`, `html`, or `both` (default `both`). |
|
|
84
|
+
| `--input-type` | `langfuse` or `langsmith` (default `langfuse`). |
|
|
85
|
+
| `--config` | Optional YAML config (tool naming, thresholds). |
|
|
86
|
+
|
|
87
|
+
Try it on the bundled example:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
agent-panorama generate --input examples/langfuse_traces.json --output ./report
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Library usage
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from agent_panorama import generate_report
|
|
97
|
+
|
|
98
|
+
report = generate_report(
|
|
99
|
+
"traces.json",
|
|
100
|
+
output_dir="./report",
|
|
101
|
+
formats=["md", "html"],
|
|
102
|
+
input_type="langfuse",
|
|
103
|
+
config="config.yaml", # optional
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
print(report.total_runs, report.total_tokens)
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
`generate_report` returns the in-memory `Report`, so you can also inspect runs,
|
|
110
|
+
the decision log, and anomalies programmatically without touching disk (use
|
|
111
|
+
`build_report_from_file` if you want the report without writing files).
|
|
112
|
+
|
|
113
|
+
## What's in a report
|
|
114
|
+
|
|
115
|
+
- **Summary** — time range, total runs, total actions, total tokens.
|
|
116
|
+
- **Per-agent section** — what it was asked to do, what it decided/did (tool calls
|
|
117
|
+
in plain English), final outcome, and a confidence signal (retries / fallback).
|
|
118
|
+
- **Decision log** — a sortable table of every consequential action: timestamp,
|
|
119
|
+
agent, action, parameters summarized in plain English, outcome.
|
|
120
|
+
- **Anomalies** — high retry counts, slow runs, high activity, errors, fallbacks.
|
|
121
|
+
|
|
122
|
+
## Configuration
|
|
123
|
+
|
|
124
|
+
All configuration is optional. See [`config.example.yaml`](config.example.yaml)
|
|
125
|
+
for the full set. Highlights:
|
|
126
|
+
|
|
127
|
+
```yaml
|
|
128
|
+
tool_descriptions:
|
|
129
|
+
get_weather: "Looked up the weather"
|
|
130
|
+
|
|
131
|
+
consequential_tools: [send_email, human_handoff]
|
|
132
|
+
escalation_tools: [human_handoff, handoff_to_agent]
|
|
133
|
+
|
|
134
|
+
anomaly_thresholds:
|
|
135
|
+
max_retries: 2
|
|
136
|
+
max_latency_seconds: 30
|
|
137
|
+
max_tool_calls: 15
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Supported inputs
|
|
141
|
+
|
|
142
|
+
- **Langfuse** trace exports — a single trace dict, the single-trace
|
|
143
|
+
`{"trace": {...}, "observations": [...]}` shape, a list of traces, or the
|
|
144
|
+
`{"data": [...]}` list-API shape. Tool calls are read from `TOOL`
|
|
145
|
+
observations (falling back to tool spans), and from `toolCalls` / OpenAI-style
|
|
146
|
+
`tool_calls` declared on generations.
|
|
147
|
+
- **LangSmith** run exports — a flat list (or `{"runs": [...]}`) of run nodes;
|
|
148
|
+
each root run is flattened into one agent run.
|
|
149
|
+
|
|
150
|
+
Token usage is read from the trace (`inputUsage`/`outputUsage` or
|
|
151
|
+
`usage`/`usage_metadata`). Dollar-cost estimation is intentionally out of scope.
|
|
152
|
+
|
|
153
|
+
## Roadmap
|
|
154
|
+
|
|
155
|
+
`agent-panorama` starts as a report generator and is growing into an **oversight
|
|
156
|
+
layer for fleets of agents** — a single pane of glass for everything your agents
|
|
157
|
+
did, decided, and got wrong. More than logs, across more than one agent.
|
|
158
|
+
|
|
159
|
+
**✅ v0.1 — Read one run clearly _(today)_**
|
|
160
|
+
- Langfuse + LangSmith trace ingestion
|
|
161
|
+
- Plain-language per-agent summaries, decision log, anomalies
|
|
162
|
+
- Markdown + self-contained HTML output; CLI and library API
|
|
163
|
+
|
|
164
|
+
**🔜 v0.2 — See the whole fleet (the panorama view)**
|
|
165
|
+
- A unified **cross-agent activity feed** — one scannable timeline of what every
|
|
166
|
+
agent did, in plain English:
|
|
167
|
+
|
|
168
|
+
```text
|
|
169
|
+
Agent Activity — May 28, 14:30–15:00
|
|
170
|
+
|
|
171
|
+
research-assistant → searched the web, summarized 3 papers ✓ success
|
|
172
|
+
scheduling-assistant → checked the calendar, handed the task to a human ⤴ escalated
|
|
173
|
+
weather-assistant → looked up the weather (retried once), emailed it ✓ success
|
|
174
|
+
billing-agent → issued 2 refunds, flagged 1 for review ⚠ anomaly
|
|
175
|
+
```
|
|
176
|
+
- Aggregate many traces into one report (by session, time window, or file glob)
|
|
177
|
+
- Per-agent rollups: runs, actions, success / escalation / retry rates
|
|
178
|
+
- Cross-agent decision log spanning every agent in the window
|
|
179
|
+
|
|
180
|
+
**📈 v0.3 — Trends & regressions**
|
|
181
|
+
- Track rates over time, not just a point-in-time snapshot
|
|
182
|
+
- Flag regressions (escalations or retries spiking vs. a baseline)
|
|
183
|
+
- Period-over-period comparison ("this week vs. last")
|
|
184
|
+
|
|
185
|
+
**🔌 v0.4 — More sources & deeper detail**
|
|
186
|
+
- OpenTelemetry / OpenInference and raw OpenAI-style logs
|
|
187
|
+
- Optionally fetch full input/output from the Langfuse API to enrich
|
|
188
|
+
decision-log parameters
|
|
189
|
+
- Pluggable parser interface for custom trace formats
|
|
190
|
+
|
|
191
|
+
**🎯 The vision — Continuous oversight**
|
|
192
|
+
- A live dashboard: the activity feed above, always-on, filterable by agent /
|
|
193
|
+
outcome / time
|
|
194
|
+
- Scheduled/continuous reports instead of one-off runs
|
|
195
|
+
- Accountability views a non-engineer can sign off on (what happened, what needs
|
|
196
|
+
a human)
|
|
197
|
+
- Alerting on anomalies across the fleet
|
|
198
|
+
|
|
199
|
+
> Have a use case or a trace format you want supported? Open an issue.
|
|
200
|
+
|
|
201
|
+
## Development
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
uv pip install -e ".[dev]"
|
|
205
|
+
python tests/run_all_tests.py # run the full suite
|
|
206
|
+
ruff check . && ruff format --check .
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
## License
|
|
210
|
+
|
|
211
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="assets/logo.png" alt="agent-panorama" width="320">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">agent-panorama</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<a href="https://github.com/Idank96/agent-panorama/actions/workflows/ci.yml"><img src="https://github.com/Idank96/agent-panorama/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
|
|
9
|
+
</p>
|
|
10
|
+
|
|
11
|
+
Turn raw LLM agent traces into a **human-readable Agent Activity Report** that a
|
|
12
|
+
non-engineer can actually read. Point it at a Langfuse (or LangSmith) trace
|
|
13
|
+
export and get clean Markdown + a self-contained HTML report that explains, in
|
|
14
|
+
business language, what your agents did, what they decided, and anything that
|
|
15
|
+
looks off.
|
|
16
|
+
|
|
17
|
+
## Why
|
|
18
|
+
|
|
19
|
+
Traces are great for engineers and terrible for everyone else. `agent-panorama`
|
|
20
|
+
translates tool calls, retries, token usage, and errors into plain English. It
|
|
21
|
+
also pulls the real user request and final answer out of LangGraph/LangChain
|
|
22
|
+
`messages` payloads, so the report reads like a story, not a JSON dump:
|
|
23
|
+
|
|
24
|
+
- `get_weather({"city": "Paris"})` → **"Looked up the weather"**
|
|
25
|
+
- 3 failed model calls → **"High retry count: 3 failed attempts before completing."**
|
|
26
|
+
- `human_handoff(...)` → run outcome **human-escalated**
|
|
27
|
+
|
|
28
|
+
> Cost/USD estimation is intentionally out of scope for now — the report reports
|
|
29
|
+
> token usage, not dollars.
|
|
30
|
+
|
|
31
|
+
## Install
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install agent-panorama
|
|
35
|
+
# or, for local development:
|
|
36
|
+
uv pip install -e ".[dev]"
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Requires Python 3.10+. Dependencies are intentionally minimal: `click`,
|
|
40
|
+
`jinja2`, `pyyaml`.
|
|
41
|
+
|
|
42
|
+
## CLI usage
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
agent-panorama generate --input traces.json --output ./report --format html
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Options:
|
|
49
|
+
|
|
50
|
+
| Option | Description |
|
|
51
|
+
| --- | --- |
|
|
52
|
+
| `--input` | Path to the Langfuse/LangSmith JSON export (required). |
|
|
53
|
+
| `--output` | Output directory (default `./report`). |
|
|
54
|
+
| `--format` | `md`, `html`, or `both` (default `both`). |
|
|
55
|
+
| `--input-type` | `langfuse` or `langsmith` (default `langfuse`). |
|
|
56
|
+
| `--config` | Optional YAML config (tool naming, thresholds). |
|
|
57
|
+
|
|
58
|
+
Try it on the bundled example:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
agent-panorama generate --input examples/langfuse_traces.json --output ./report
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Library usage
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from agent_panorama import generate_report
|
|
68
|
+
|
|
69
|
+
report = generate_report(
|
|
70
|
+
"traces.json",
|
|
71
|
+
output_dir="./report",
|
|
72
|
+
formats=["md", "html"],
|
|
73
|
+
input_type="langfuse",
|
|
74
|
+
config="config.yaml", # optional
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
print(report.total_runs, report.total_tokens)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
`generate_report` returns the in-memory `Report`, so you can also inspect runs,
|
|
81
|
+
the decision log, and anomalies programmatically without touching disk (use
|
|
82
|
+
`build_report_from_file` if you want the report without writing files).
|
|
83
|
+
|
|
84
|
+
## What's in a report
|
|
85
|
+
|
|
86
|
+
- **Summary** — time range, total runs, total actions, total tokens.
|
|
87
|
+
- **Per-agent section** — what it was asked to do, what it decided/did (tool calls
|
|
88
|
+
in plain English), final outcome, and a confidence signal (retries / fallback).
|
|
89
|
+
- **Decision log** — a sortable table of every consequential action: timestamp,
|
|
90
|
+
agent, action, parameters summarized in plain English, outcome.
|
|
91
|
+
- **Anomalies** — high retry counts, slow runs, high activity, errors, fallbacks.
|
|
92
|
+
|
|
93
|
+
## Configuration
|
|
94
|
+
|
|
95
|
+
All configuration is optional. See [`config.example.yaml`](config.example.yaml)
|
|
96
|
+
for the full set. Highlights:
|
|
97
|
+
|
|
98
|
+
```yaml
|
|
99
|
+
tool_descriptions:
|
|
100
|
+
get_weather: "Looked up the weather"
|
|
101
|
+
|
|
102
|
+
consequential_tools: [send_email, human_handoff]
|
|
103
|
+
escalation_tools: [human_handoff, handoff_to_agent]
|
|
104
|
+
|
|
105
|
+
anomaly_thresholds:
|
|
106
|
+
max_retries: 2
|
|
107
|
+
max_latency_seconds: 30
|
|
108
|
+
max_tool_calls: 15
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## Supported inputs
|
|
112
|
+
|
|
113
|
+
- **Langfuse** trace exports — a single trace dict, the single-trace
|
|
114
|
+
`{"trace": {...}, "observations": [...]}` shape, a list of traces, or the
|
|
115
|
+
`{"data": [...]}` list-API shape. Tool calls are read from `TOOL`
|
|
116
|
+
observations (falling back to tool spans), and from `toolCalls` / OpenAI-style
|
|
117
|
+
`tool_calls` declared on generations.
|
|
118
|
+
- **LangSmith** run exports — a flat list (or `{"runs": [...]}`) of run nodes;
|
|
119
|
+
each root run is flattened into one agent run.
|
|
120
|
+
|
|
121
|
+
Token usage is read from the trace (`inputUsage`/`outputUsage` or
|
|
122
|
+
`usage`/`usage_metadata`). Dollar-cost estimation is intentionally out of scope.
|
|
123
|
+
|
|
124
|
+
## Roadmap
|
|
125
|
+
|
|
126
|
+
`agent-panorama` starts as a report generator and is growing into an **oversight
|
|
127
|
+
layer for fleets of agents** — a single pane of glass for everything your agents
|
|
128
|
+
did, decided, and got wrong. More than logs, across more than one agent.
|
|
129
|
+
|
|
130
|
+
**✅ v0.1 — Read one run clearly _(today)_**
|
|
131
|
+
- Langfuse + LangSmith trace ingestion
|
|
132
|
+
- Plain-language per-agent summaries, decision log, anomalies
|
|
133
|
+
- Markdown + self-contained HTML output; CLI and library API
|
|
134
|
+
|
|
135
|
+
**🔜 v0.2 — See the whole fleet (the panorama view)**
|
|
136
|
+
- A unified **cross-agent activity feed** — one scannable timeline of what every
|
|
137
|
+
agent did, in plain English:
|
|
138
|
+
|
|
139
|
+
```text
|
|
140
|
+
Agent Activity — May 28, 14:30–15:00
|
|
141
|
+
|
|
142
|
+
research-assistant → searched the web, summarized 3 papers ✓ success
|
|
143
|
+
scheduling-assistant → checked the calendar, handed the task to a human ⤴ escalated
|
|
144
|
+
weather-assistant → looked up the weather (retried once), emailed it ✓ success
|
|
145
|
+
billing-agent → issued 2 refunds, flagged 1 for review ⚠ anomaly
|
|
146
|
+
```
|
|
147
|
+
- Aggregate many traces into one report (by session, time window, or file glob)
|
|
148
|
+
- Per-agent rollups: runs, actions, success / escalation / retry rates
|
|
149
|
+
- Cross-agent decision log spanning every agent in the window
|
|
150
|
+
|
|
151
|
+
**📈 v0.3 — Trends & regressions**
|
|
152
|
+
- Track rates over time, not just a point-in-time snapshot
|
|
153
|
+
- Flag regressions (escalations or retries spiking vs. a baseline)
|
|
154
|
+
- Period-over-period comparison ("this week vs. last")
|
|
155
|
+
|
|
156
|
+
**🔌 v0.4 — More sources & deeper detail**
|
|
157
|
+
- OpenTelemetry / OpenInference and raw OpenAI-style logs
|
|
158
|
+
- Optionally fetch full input/output from the Langfuse API to enrich
|
|
159
|
+
decision-log parameters
|
|
160
|
+
- Pluggable parser interface for custom trace formats
|
|
161
|
+
|
|
162
|
+
**🎯 The vision — Continuous oversight**
|
|
163
|
+
- A live dashboard: the activity feed above, always-on, filterable by agent /
|
|
164
|
+
outcome / time
|
|
165
|
+
- Scheduled/continuous reports instead of one-off runs
|
|
166
|
+
- Accountability views a non-engineer can sign off on (what happened, what needs
|
|
167
|
+
a human)
|
|
168
|
+
- Alerting on anomalies across the fleet
|
|
169
|
+
|
|
170
|
+
> Have a use case or a trace format you want supported? Open an issue.
|
|
171
|
+
|
|
172
|
+
## Development
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
uv pip install -e ".[dev]"
|
|
176
|
+
python tests/run_all_tests.py # run the full suite
|
|
177
|
+
ruff check . && ruff format --check .
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## License
|
|
181
|
+
|
|
182
|
+
MIT — see [LICENSE](LICENSE).
|
|
Binary file
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# agent-panorama configuration (all sections optional).
|
|
2
|
+
#
|
|
3
|
+
# Pass with: agent-panorama generate --input traces.json --output ./report --config config.yaml
|
|
4
|
+
|
|
5
|
+
# Map raw tool names to readable descriptions used in the report.
|
|
6
|
+
tool_descriptions:
|
|
7
|
+
web_search: "Searched the web"
|
|
8
|
+
get_weather: "Looked up the weather"
|
|
9
|
+
send_email: "Sent an email"
|
|
10
|
+
human_handoff: "Handed the task off to a person"
|
|
11
|
+
|
|
12
|
+
# Tool names whose calls are considered "consequential" (side effects) and
|
|
13
|
+
# therefore listed in the Decision Log. If omitted, every tool call is listed.
|
|
14
|
+
consequential_tools:
|
|
15
|
+
- send_email
|
|
16
|
+
- human_handoff
|
|
17
|
+
|
|
18
|
+
# Tool names that signal a human escalation (sets run outcome to "escalated").
|
|
19
|
+
escalation_tools:
|
|
20
|
+
- human_handoff
|
|
21
|
+
- handoff_to_agent
|
|
22
|
+
|
|
23
|
+
# Thresholds for the Anomalies section.
|
|
24
|
+
anomaly_thresholds:
|
|
25
|
+
max_retries: 2 # more than this many retries is flagged
|
|
26
|
+
max_latency_seconds: 30 # runs slower than this are flagged
|
|
27
|
+
max_tool_calls: 15 # runs with more tool calls than this are flagged
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
[
|
|
2
|
+
{
|
|
3
|
+
"id": "trace-001",
|
|
4
|
+
"name": "research-assistant",
|
|
5
|
+
"timestamp": "2026-05-20T09:15:00.000Z",
|
|
6
|
+
"input": {"question": "What are the top AI papers this week?"},
|
|
7
|
+
"output": {"content": "Here are three notable AI papers from this week, each with a short summary."},
|
|
8
|
+
"observations": [
|
|
9
|
+
{
|
|
10
|
+
"id": "obs-001-a",
|
|
11
|
+
"type": "GENERATION",
|
|
12
|
+
"name": "plan-response",
|
|
13
|
+
"startTime": "2026-05-20T09:15:01.000Z",
|
|
14
|
+
"endTime": "2026-05-20T09:15:03.500Z",
|
|
15
|
+
"model": "gpt-4o",
|
|
16
|
+
"usage": {"input": 800, "output": 120, "unit": "TOKENS"},
|
|
17
|
+
"level": "DEFAULT",
|
|
18
|
+
"output": {"tool_calls": [{"function": {"name": "web_search", "arguments": "{\"query\": \"top AI papers this week\"}"}}]}
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"id": "obs-001-b",
|
|
22
|
+
"type": "SPAN",
|
|
23
|
+
"name": "web_search",
|
|
24
|
+
"startTime": "2026-05-20T09:15:03.600Z",
|
|
25
|
+
"endTime": "2026-05-20T09:15:04.200Z",
|
|
26
|
+
"input": {"query": "top AI papers this week"},
|
|
27
|
+
"output": {"results": 3},
|
|
28
|
+
"level": "DEFAULT"
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "obs-001-c",
|
|
32
|
+
"type": "SPAN",
|
|
33
|
+
"name": "summarize_text",
|
|
34
|
+
"startTime": "2026-05-20T09:15:04.300Z",
|
|
35
|
+
"endTime": "2026-05-20T09:15:04.900Z",
|
|
36
|
+
"input": {"documents": 3, "max_words": 80},
|
|
37
|
+
"output": {"summary": "three short summaries"},
|
|
38
|
+
"level": "DEFAULT"
|
|
39
|
+
},
|
|
40
|
+
{
|
|
41
|
+
"id": "obs-001-d",
|
|
42
|
+
"type": "GENERATION",
|
|
43
|
+
"name": "final-answer",
|
|
44
|
+
"startTime": "2026-05-20T09:15:05.000Z",
|
|
45
|
+
"endTime": "2026-05-20T09:15:06.400Z",
|
|
46
|
+
"model": "gpt-4o",
|
|
47
|
+
"usage": {"input": 1000, "output": 100, "unit": "TOKENS"},
|
|
48
|
+
"level": "DEFAULT",
|
|
49
|
+
"output": {"content": "Here are three notable AI papers from this week."}
|
|
50
|
+
}
|
|
51
|
+
]
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
"id": "trace-002",
|
|
55
|
+
"name": "scheduling-assistant",
|
|
56
|
+
"timestamp": "2026-05-20T10:02:00.000Z",
|
|
57
|
+
"input": {"question": "Book a meeting with the design team next week."},
|
|
58
|
+
"output": {"content": "This request has been handed off to a human scheduler."},
|
|
59
|
+
"observations": [
|
|
60
|
+
{
|
|
61
|
+
"id": "obs-002-a",
|
|
62
|
+
"type": "GENERATION",
|
|
63
|
+
"name": "assess-request",
|
|
64
|
+
"startTime": "2026-05-20T10:02:01.000Z",
|
|
65
|
+
"endTime": "2026-05-20T10:02:04.000Z",
|
|
66
|
+
"model": "claude-3-5-sonnet-20241022",
|
|
67
|
+
"usage": {"input": 1200, "output": 200, "unit": "TOKENS"},
|
|
68
|
+
"level": "DEFAULT",
|
|
69
|
+
"output": {"content": "This needs a human to confirm availability."}
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"id": "obs-002-b",
|
|
73
|
+
"type": "SPAN",
|
|
74
|
+
"name": "check_calendar",
|
|
75
|
+
"startTime": "2026-05-20T10:02:04.100Z",
|
|
76
|
+
"endTime": "2026-05-20T10:02:04.400Z",
|
|
77
|
+
"input": {"team": "design", "window": "next_week"},
|
|
78
|
+
"output": {"free_slots": 2},
|
|
79
|
+
"level": "DEFAULT"
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"id": "obs-002-c",
|
|
83
|
+
"type": "SPAN",
|
|
84
|
+
"name": "human_handoff",
|
|
85
|
+
"startTime": "2026-05-20T10:02:04.500Z",
|
|
86
|
+
"endTime": "2026-05-20T10:02:04.800Z",
|
|
87
|
+
"input": {"reason": "needs human confirmation", "queue": "scheduling"},
|
|
88
|
+
"output": {"ticket_id": "hx-3320"},
|
|
89
|
+
"level": "DEFAULT"
|
|
90
|
+
}
|
|
91
|
+
]
|
|
92
|
+
},
|
|
93
|
+
{
|
|
94
|
+
"id": "trace-003",
|
|
95
|
+
"name": "weather-assistant",
|
|
96
|
+
"timestamp": "2026-05-20T11:30:00.000Z",
|
|
97
|
+
"input": {"task": "Email me tomorrow's forecast for Paris."},
|
|
98
|
+
"output": {"content": "Forecast sent after recovering from a provider timeout."},
|
|
99
|
+
"observations": [
|
|
100
|
+
{
|
|
101
|
+
"id": "obs-003-a",
|
|
102
|
+
"type": "GENERATION",
|
|
103
|
+
"name": "plan",
|
|
104
|
+
"startTime": "2026-05-20T11:30:01.000Z",
|
|
105
|
+
"endTime": "2026-05-20T11:30:02.000Z",
|
|
106
|
+
"model": "gpt-4o-mini",
|
|
107
|
+
"usage": {"input": 500, "output": 70, "unit": "TOKENS"},
|
|
108
|
+
"level": "DEFAULT"
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"id": "obs-003-b",
|
|
112
|
+
"type": "SPAN",
|
|
113
|
+
"name": "get_weather",
|
|
114
|
+
"startTime": "2026-05-20T11:30:02.100Z",
|
|
115
|
+
"endTime": "2026-05-20T11:30:14.100Z",
|
|
116
|
+
"input": {"city": "Paris", "provider": "primary"},
|
|
117
|
+
"output": {"error": "provider timeout"},
|
|
118
|
+
"level": "ERROR",
|
|
119
|
+
"statusMessage": "Weather provider timed out after 12s"
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"id": "obs-003-c",
|
|
123
|
+
"type": "SPAN",
|
|
124
|
+
"name": "get_weather",
|
|
125
|
+
"startTime": "2026-05-20T11:30:14.200Z",
|
|
126
|
+
"endTime": "2026-05-20T11:30:15.400Z",
|
|
127
|
+
"input": {"city": "Paris", "provider": "backup"},
|
|
128
|
+
"output": {"temp_c": 18, "summary": "partly cloudy"},
|
|
129
|
+
"level": "DEFAULT"
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"id": "obs-003-d",
|
|
133
|
+
"type": "SPAN",
|
|
134
|
+
"name": "send_email",
|
|
135
|
+
"startTime": "2026-05-20T11:30:15.500Z",
|
|
136
|
+
"endTime": "2026-05-20T11:30:16.000Z",
|
|
137
|
+
"input": {"to": "user", "subject": "Paris forecast for tomorrow"},
|
|
138
|
+
"output": {"sent": true},
|
|
139
|
+
"level": "DEFAULT"
|
|
140
|
+
}
|
|
141
|
+
]
|
|
142
|
+
}
|
|
143
|
+
]
|