adktelemetry 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. adktelemetry-0.1.0/LICENSE +21 -0
  2. adktelemetry-0.1.0/PKG-INFO +253 -0
  3. adktelemetry-0.1.0/README.md +200 -0
  4. adktelemetry-0.1.0/adktelemetry/__init__.py +8 -0
  5. adktelemetry-0.1.0/adktelemetry/adk/__init__.py +1 -0
  6. adktelemetry-0.1.0/adktelemetry/adk/agent_wrapper.py +4 -0
  7. adktelemetry-0.1.0/adktelemetry/adk/loader_patch.py +28 -0
  8. adktelemetry-0.1.0/adktelemetry/agentelemetry.py +58 -0
  9. adktelemetry-0.1.0/adktelemetry/autoagent.py +31 -0
  10. adktelemetry-0.1.0/adktelemetry/config.py +24 -0
  11. adktelemetry-0.1.0/adktelemetry/context.py +17 -0
  12. adktelemetry-0.1.0/adktelemetry/exceptions.py +14 -0
  13. adktelemetry-0.1.0/adktelemetry/finops.py +140 -0
  14. adktelemetry-0.1.0/adktelemetry/gemini_pricing.yaml +88 -0
  15. adktelemetry-0.1.0/adktelemetry/hooks.py +69 -0
  16. adktelemetry-0.1.0/adktelemetry/live_notify.py +97 -0
  17. adktelemetry-0.1.0/adktelemetry/patch_cli.py +77 -0
  18. adktelemetry-0.1.0/adktelemetry/registry.py +37 -0
  19. adktelemetry-0.1.0/adktelemetry/runtime.py +19 -0
  20. adktelemetry-0.1.0/adktelemetry/server.py +1762 -0
  21. adktelemetry-0.1.0/adktelemetry/sse_telemetry.py +166 -0
  22. adktelemetry-0.1.0/adktelemetry/store.py +839 -0
  23. adktelemetry-0.1.0/adktelemetry/tools.py +43 -0
  24. adktelemetry-0.1.0/adktelemetry.egg-info/PKG-INFO +253 -0
  25. adktelemetry-0.1.0/adktelemetry.egg-info/SOURCES.txt +40 -0
  26. adktelemetry-0.1.0/adktelemetry.egg-info/dependency_links.txt +1 -0
  27. adktelemetry-0.1.0/adktelemetry.egg-info/requires.txt +5 -0
  28. adktelemetry-0.1.0/adktelemetry.egg-info/top_level.txt +1 -0
  29. adktelemetry-0.1.0/pyproject.toml +61 -0
  30. adktelemetry-0.1.0/setup.cfg +4 -0
  31. adktelemetry-0.1.0/tests/test_adk_compat.py +27 -0
  32. adktelemetry-0.1.0/tests/test_agentelemetry.py +11 -0
  33. adktelemetry-0.1.0/tests/test_autoagent.py +17 -0
  34. adktelemetry-0.1.0/tests/test_error_breakdown.py +75 -0
  35. adktelemetry-0.1.0/tests/test_error_inference.py +24 -0
  36. adktelemetry-0.1.0/tests/test_event_error_scan.py +79 -0
  37. adktelemetry-0.1.0/tests/test_finops.py +52 -0
  38. adktelemetry-0.1.0/tests/test_registry.py +28 -0
  39. adktelemetry-0.1.0/tests/test_session_detail.py +150 -0
  40. adktelemetry-0.1.0/tests/test_snapshot_filtered.py +82 -0
  41. adktelemetry-0.1.0/tests/test_sse_error_capture.py +40 -0
  42. adktelemetry-0.1.0/tests/test_sse_route_patch.py +63 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Zack
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,253 @@
1
+ Metadata-Version: 2.4
2
+ Name: adktelemetry
3
+ Version: 0.1.0
4
+ Summary: Library for Google ADK focused on observability, telemetry, session tracing, and operational cost analysis for AI agents.
5
+ Author-email: Zack Mariano <zack.cmariano@oraicle.ai>
6
+ License: MIT License
7
+
8
+ Copyright (c) 2026 Zack
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+
28
+ Project-URL: Homepage, https://github.com/zackcmariano/AdkTelemetry
29
+ Project-URL: Documentation, https://github.com/zackcmariano/AdkTelemetry
30
+ Project-URL: Repository, https://github.com/zackcmariano/AdkTelemetry
31
+ Project-URL: Issues, https://github.com/zackcmariano/AdkTelemetry/issues
32
+ Keywords: google-adk,adk,telemetry,observability,ai-agents,agent-observability,session-tracing,finops,llm-monitoring,multi-agent
33
+ Classifier: Development Status :: 3 - Alpha
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: Intended Audience :: Information Technology
36
+ Classifier: License :: OSI Approved :: MIT License
37
+ Classifier: Programming Language :: Python :: 3
38
+ Classifier: Programming Language :: Python :: 3.10
39
+ Classifier: Programming Language :: Python :: 3.11
40
+ Classifier: Programming Language :: Python :: 3.12
41
+ Classifier: Operating System :: OS Independent
42
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
43
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
44
+ Classifier: Topic :: System :: Monitoring
45
+ Requires-Python: >=3.10
46
+ Description-Content-Type: text/markdown
47
+ License-File: LICENSE
48
+ Requires-Dist: google-adk>=1.16.0
49
+ Requires-Dist: pyyaml>=6.0
50
+ Provides-Extra: dev
51
+ Requires-Dist: pytest>=7.0; extra == "dev"
52
+ Dynamic: license-file
53
+
54
+ # AdkTelemetry
55
+
56
+ ![AdkTelemetry](https://raw.githubusercontent.com/zackcmariano/AdkTelemetry/refs/heads/master/assets/adk-telemetry-lib.png)
57
+
58
+ > **Observability & FinOps for Google ADK agents - in real time.**
59
+
60
+ **AdkTelemetry** is the product name. On PyPI the distribution is **`adktelemetry`** (all lowercase): use `pip install adktelemetry` and `import adktelemetry` in code.
61
+
62
+ AdkTelemetry is a **Python library for Google ADK** that captures **runner events**, **token usage**, **estimated USD cost**, and **error signals**, then exposes them through a **built-in dashboard** and **JSON APIs**.
63
+
64
+ **Dashboard URL** (example with `adk web` on port 8080): `http://localhost:8080/adktelemetry`
65
+
66
+ ---
67
+
68
+ ## Installation
69
+
70
+ ```bash
71
+ pip install adktelemetry
72
+ ```
73
+
74
+ ---
75
+
76
+ ## Quick start (developer)
77
+
78
+ 1. **Enable telemetry in your agent module** (call once at import time, before runs):
79
+
80
+ ```python
81
+ from adktelemetry import agentelemetry
82
+
83
+ agentelemetry(
84
+ modelkey="YOUR_GEMINI_API_KEY", # required - use your app’s secret/config source
85
+ adkmodel="gemini-2.5-flash", # optional FinOps fallback when events lack model_version
86
+ )
87
+ ```
88
+
89
+ 2. **Register the dashboard with `adk web`** by adding a `services.py` next to your app (same **agents directory** root ADK loads). ADK imports it **before** the web server builds FastAPI:
90
+
91
+ ```python
92
+ from adktelemetry.patch_cli import ensure_adk_web_server_patch
93
+
94
+ ensure_adk_web_server_patch()
95
+ ```
96
+
97
+ 3. Run `adk web`, open `/adktelemetry`, and use the time range control to match the window you care about.
98
+
99
+ **Configuration in code:** the library does not read environment variables by itself. You normally pass `modelkey` (and optionally `adkmodel`) from your own configuration - for example `os.environ["GEMINI_API_KEY"]`, a secrets manager, or Django settings.
100
+
101
+ ---
102
+
103
+ ## What you get (how to read the data)
104
+
105
+ | Surface | Purpose |
106
+ |--------|---------|
107
+ | **Dashboard** | Human-readable charts and tables; **event-driven** updates over **Server-Sent Events** (`GET /adktelemetry/api/v1/stream`) whenever a new telemetry record is stored (with a short debounce for bursts). **Idle:** one long-poll-style wait plus an occasional comment line (~45s) so proxies do not drop the stream. |
108
+ | **`GET /adktelemetry/api/v1/stream`** | `text/event-stream` for the dashboard: `event: ready` on connect, then `event: update` after each coalesced batch of store writes. |
109
+ | **`GET /adktelemetry/api/v1/snapshot`** | Same aggregates the UI uses; optional `since` / `until` (Unix seconds) for a time window. The UI calls this **after** each `update` event (and once on load), not on a fixed timer. |
110
+ | **`GET /adktelemetry/api/v1/session_detail`** | Per-session brief from the in-memory buffer (`user_id`, `session_id` query params). |
111
+ | **`GET /adktelemetry/api/v1/pricing_catalog`** | Reference FinOps rates shown in the UI (USD per 10K tokens) plus a **catalog reference date** (month/year). |
112
+ | **`GET /adktelemetry/api/v1/error_breakdown`** | Error counts grouped by short label for the selected range (same query rules as snapshot). |
113
+
114
+ **Interpreting a snapshot (filtered mode):** `totals` rolls up only sessions that had at least one event in `[since, until]`. `sessions` and `model_distribution` are recomputed from stored events in that window. `applied_range` is `{ "since", "until" }` when filtering, or `null` for the full in-memory snapshot. `pricing_models` is the count of model IDs in the active FinOps catalog (informational; opens the catalog modal from the header).
115
+
116
+ **Events without a usable timestamp** are omitted when filtering by range, so very old or malformed records may not appear in windowed views.
117
+
118
+ ---
119
+
120
+ ## Dashboard guide (metrics & usability)
121
+
122
+ ![DashAdkTelemetry](https://raw.githubusercontent.com/zackcmariano/AdkTelemetry/refs/heads/master/assets/page_dash_adktelemetry.png)
123
+
124
+ The layout is a single page: **header** (summary pills + time range), then a **grid** of cards, then **Sessions Errors** and **Sessions** tables. All numeric cards respect the **selected time range** except when you load the snapshot API with **no** `since`/`until` (full store).
125
+
126
+ ### Time range (header, right)
127
+
128
+ | Control | Behavior |
129
+ |--------|----------|
130
+ | **15 / 30 minutes, 1 hour, 12 hours** | Sliding window ending at “now” on each refresh. |
131
+ | **Custom** | Start and end **dates** in the **local** timezone (start 00:00, end 23:59:59.999). Maximum span **31 days**. |
132
+
133
+ Changing the range updates the snapshot query, recomputes aggregates, and keeps drill-down modals aligned with the same window.
134
+
135
+ ### Header pills (clickable)
136
+
137
+ | Pill | Shows | Click opens |
138
+ |------|--------|-------------|
139
+ | **Sessions** | Count of sessions with activity in the current range (or all sessions in unfiltered snapshot mode). | **Sessions overview** modal: total sessions, input/output tokens, total estimated USD, **last interaction** timestamp (latest session activity in the window). |
140
+ | **Errors** | Sum of per-session error counts in the range. | **Error breakdown** modal: pie chart by **short error label**, legend with % and counts, and a **top category** callout with a representative message when available. |
141
+ | **Pricing models** | Number of models in the FinOps catalog used for estimates. | **Gemini FinOps catalog** modal: table of **input/output USD per 10,000 tokens** per model, unit disclaimer, link to official Google pricing, and the **catalog reference month/year** (see FinOps below). |
142
+
143
+ ### Invocations by model
144
+
145
+ - **Donut + legend** - share of **invocation counts** by resolved model key in the range.
146
+ - Counts come from each event’s `model_version`, with FinOps resolution and **fallback** to `adkmodel` when the event has no model.
147
+ - Legend lists up to **8** rows; the distribution object in JSON may contain more keys.
148
+
149
+ ### ADK events (Runner)
150
+
151
+ Four **horizontal bars** (not a single combined scale):
152
+
153
+ 1. **adk** - total **runner event** count in the range (`totals.events`).
154
+ 2. **errors** - total error count (`totals.errors`), same basis as the Errors pill.
155
+ 3. **in tok** / **out tok** - **prompt** and **candidates** token totals (`totals.total_input_tokens`, `totals.total_output_tokens`).
156
+
157
+ Bar length is **normalized within two groups**: events vs errors share one max; input vs output tokens share another. Use this card to compare **volume of ADK traffic**, **error load**, and **token volume** side by side.
158
+
159
+ ### Estimated cost by session (USD)
160
+
161
+ - One row per session in the snapshot list: **truncated session id**, **relative bar** (max = largest session cost in the list), **cost** to six decimals.
162
+ - **Scroll** shows roughly **seven** rows; more sessions scroll inside the card.
163
+ - **Footer** - **Total cost (all sessions)** = sum of `total_cost_usd` for **every** session in the current range’s payload (not only visible rows). This matches FinOps recomputation from events in the window.
164
+
165
+ ### Activity timeline (stacked)
166
+
167
+ - **24 equal-width time buckets** over the **selected dashboard range** (when filtering) or over min–max of buffered event timestamps (unfiltered full snapshot).
168
+ - Each bar’s **height** is relative to the **busiest bucket** (tooltip: event count + local time span for that bucket).
169
+ - **Axis labels** group **6 buckets** each (local start–end text).
170
+ - If timeline metadata is missing, the UI falls back to a **placeholder** layout (illustrative heights); with valid `activity_timeline.since` / `until` / `counts`, the chart is **wall-clock faithful** for the range.
171
+
172
+ ### Token trend (in + out)
173
+
174
+ - Uses up to the **14 most recent sessions** in the snapshot (by `last_timestamp`), **not** chronological chat order.
175
+ - **Blue** = input tokens, **green** = output tokens per session.
176
+ - Lines are drawn with horizontal inset so paths do not run over the **in** / **out** legend text.
177
+
178
+ ### Sessions Errors
179
+
180
+ - Columns: **Time**, **Session**, **Author**, **Code**, **Message**.
181
+ - Rows combine native **`LlmResponse`** error fields and **plain-text** failures (e.g. `Error: …`) on model/system content - same signals that increment the **Errors** column in **Sessions**.
182
+ - Up to **40** rows per refresh in the UI; the API may return more in `recent_errors` (still subject to the global in-memory cap - see limitations).
183
+
184
+ ### Sessions
185
+
186
+ - Columns: **Session** (link), **User**, **Events**, **Errors**, **In tok**, **Out tok**, **Cost USD** - all **recomputed for the selected range** when filtering.
187
+ - Rows with **Errors > 0** are highlighted.
188
+ - Up to **50** session rows per refresh.
189
+ - **Click the session id** to open **Session detail** (modal): session/user ids, first/last buffered event times, buffer stats (event count, authors order, token totals from buffer), optional **errors brief**, and a short disclaimer that the brief is **deterministic** from the ring buffer (no LLM), and old events may have rotated out.
190
+
191
+ ---
192
+
193
+ ## REST API summary
194
+
195
+ ### `GET /adktelemetry/api/v1/stream`
196
+
197
+ - **`Content-Type`:** `text/event-stream` (SSE).
198
+ - **`event: ready`** — sent once when the browser connects (dashboard does not depend on it for the first paint; it already runs an initial `snapshot` fetch).
199
+ - **`event: update`** — emitted after the in-memory store receives new telemetry (Runner events and captured SSE errors), with a **~80ms debounce** so a single model turn does not flood the client.
200
+ - **Comment lines** (`: keepalive`) — about every **45 seconds** while idle so intermediaries treat the connection as alive.
201
+
202
+ Open one stream per dashboard tab. If the connection drops, the UI reconnects after ~3 seconds.
203
+
204
+ ### `GET /adktelemetry/api/v1/snapshot`
205
+
206
+ | Query | Description |
207
+ |-------|-------------|
208
+ | `since` | Optional. Range start (**Unix seconds**). |
209
+ | `until` | Optional. Range end (**Unix seconds**). |
210
+
211
+ - If **both** are omitted → full in-memory aggregate; `applied_range` is `null`.
212
+ - If either is set → **`until`** defaults to now, **`since`** defaults to **15 minutes** before `until` if omitted.
213
+ - **`since` < `until`** required; range length **≤ 31 days** or **400**.
214
+
215
+ ### `GET /adktelemetry/api/v1/session_detail`
216
+
217
+ | Query | Description |
218
+ |-------|-------------|
219
+ | `session_id` | Required. |
220
+ | `user_id` | Required. |
221
+
222
+ Returns **404** if the session is unknown to the store.
223
+
224
+ ### `GET /adktelemetry/api/v1/pricing_catalog`
225
+
226
+ Returns `models` (rows with `model_id`, `input_usd_per_10k`, `output_usd_per_10k`), `unit_label`, `catalog_updated` (MM/YY reference), and `pricing_doc_url`. Use this if you need the same reference the UI shows.
227
+
228
+ ### `GET /adktelemetry/api/v1/error_breakdown`
229
+
230
+ Same `since` / `until` rules as snapshot. Body includes `total`, `slices` (`label`, `count`, `percent`), and optional `top` with a longer message sample for the dominant label.
231
+
232
+ ---
233
+
234
+ ## FinOps (estimates)
235
+
236
+ - Session and total **USD** values are **estimates** from **token counts** and the library’s **shipped FinOps catalog** (list-style rates). They are **not** a substitute for your Google Cloud / Gemini **billing** exports.
237
+ - **Tiered pricing, modalities, or discounts** may differ; the UI and catalog modal note that official pricing may vary.
238
+ - The **catalog reference date** appears as **month/year** (e.g. in the page footer, the FinOps catalog modal, and the `catalog_updated` field from **`/adktelemetry/api/v1/pricing_catalog`**). **Always check that date** when comparing estimates to real invoices - the catalog is refreshed on a schedule by **support / operations**, not by each application team.
239
+ - For the latest public list prices, use the linked **[Gemini API pricing](https://ai.google.dev/gemini-api/docs/pricing)** documentation.
240
+
241
+ ---
242
+
243
+ ## Limitations (in-memory store)
244
+
245
+ - **Per-session ring buffer** of recent raw events (default **500**). Long custom ranges can be incomplete if events aged out.
246
+ - **Global error list** is capped (**200**); the dashboard shows up to **40** error rows per refresh from the filtered/recent list.
247
+ - **Not durable**: process restart clears telemetry.
248
+
249
+ ---
250
+
251
+ ## License
252
+
253
+ MIT License © 2026
@@ -0,0 +1,200 @@
1
+ # AdkTelemetry
2
+
3
+ ![AdkTelemetry](https://raw.githubusercontent.com/zackcmariano/AdkTelemetry/refs/heads/master/assets/adk-telemetry-lib.png)
4
+
5
+ > **Observability & FinOps for Google ADK agents - in real time.**
6
+
7
+ **AdkTelemetry** is the product name. On PyPI the distribution is **`adktelemetry`** (all lowercase): use `pip install adktelemetry` and `import adktelemetry` in code.
8
+
9
+ AdkTelemetry is a **Python library for Google ADK** that captures **runner events**, **token usage**, **estimated USD cost**, and **error signals**, then exposes them through a **built-in dashboard** and **JSON APIs**.
10
+
11
+ **Dashboard URL** (example with `adk web` on port 8080): `http://localhost:8080/adktelemetry`
12
+
13
+ ---
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install adktelemetry
19
+ ```
20
+
21
+ ---
22
+
23
+ ## Quick start (developer)
24
+
25
+ 1. **Enable telemetry in your agent module** (call once at import time, before runs):
26
+
27
+ ```python
28
+ from adktelemetry import agentelemetry
29
+
30
+ agentelemetry(
31
+ modelkey="YOUR_GEMINI_API_KEY", # required - use your app’s secret/config source
32
+ adkmodel="gemini-2.5-flash", # optional FinOps fallback when events lack model_version
33
+ )
34
+ ```
35
+
36
+ 2. **Register the dashboard with `adk web`** by adding a `services.py` next to your app (same **agents directory** root ADK loads). ADK imports it **before** the web server builds FastAPI:
37
+
38
+ ```python
39
+ from adktelemetry.patch_cli import ensure_adk_web_server_patch
40
+
41
+ ensure_adk_web_server_patch()
42
+ ```
43
+
44
+ 3. Run `adk web`, open `/adktelemetry`, and use the time range control to match the window you care about.
45
+
46
+ **Configuration in code:** the library does not read environment variables by itself. You normally pass `modelkey` (and optionally `adkmodel`) from your own configuration - for example `os.environ["GEMINI_API_KEY"]`, a secrets manager, or Django settings.
47
+
48
+ ---
49
+
50
+ ## What you get (how to read the data)
51
+
52
+ | Surface | Purpose |
53
+ |--------|---------|
54
+ | **Dashboard** | Human-readable charts and tables; **event-driven** updates over **Server-Sent Events** (`GET /adktelemetry/api/v1/stream`) whenever a new telemetry record is stored (with a short debounce for bursts). **Idle:** one long-poll-style wait plus an occasional comment line (~45s) so proxies do not drop the stream. |
55
+ | **`GET /adktelemetry/api/v1/stream`** | `text/event-stream` for the dashboard: `event: ready` on connect, then `event: update` after each coalesced batch of store writes. |
56
+ | **`GET /adktelemetry/api/v1/snapshot`** | Same aggregates the UI uses; optional `since` / `until` (Unix seconds) for a time window. The UI calls this **after** each `update` event (and once on load), not on a fixed timer. |
57
+ | **`GET /adktelemetry/api/v1/session_detail`** | Per-session brief from the in-memory buffer (`user_id`, `session_id` query params). |
58
+ | **`GET /adktelemetry/api/v1/pricing_catalog`** | Reference FinOps rates shown in the UI (USD per 10K tokens) plus a **catalog reference date** (month/year). |
59
+ | **`GET /adktelemetry/api/v1/error_breakdown`** | Error counts grouped by short label for the selected range (same query rules as snapshot). |
60
+
61
+ **Interpreting a snapshot (filtered mode):** `totals` rolls up only sessions that had at least one event in `[since, until]`. `sessions` and `model_distribution` are recomputed from stored events in that window. `applied_range` is `{ "since", "until" }` when filtering, or `null` for the full in-memory snapshot. `pricing_models` is the count of model IDs in the active FinOps catalog (informational; opens the catalog modal from the header).
62
+
63
+ **Events without a usable timestamp** are omitted when filtering by range, so very old or malformed records may not appear in windowed views.
64
+
65
+ ---
66
+
67
+ ## Dashboard guide (metrics & usability)
68
+
69
+ ![DashAdkTelemetry](https://raw.githubusercontent.com/zackcmariano/AdkTelemetry/refs/heads/master/assets/page_dash_adktelemetry.png)
70
+
71
+ The layout is a single page: **header** (summary pills + time range), then a **grid** of cards, then **Sessions Errors** and **Sessions** tables. All numeric cards respect the **selected time range** except when you load the snapshot API with **no** `since`/`until` (full store).
72
+
73
+ ### Time range (header, right)
74
+
75
+ | Control | Behavior |
76
+ |--------|----------|
77
+ | **15 / 30 minutes, 1 hour, 12 hours** | Sliding window ending at “now” on each refresh. |
78
+ | **Custom** | Start and end **dates** in the **local** timezone (start 00:00, end 23:59:59.999). Maximum span **31 days**. |
79
+
80
+ Changing the range updates the snapshot query, recomputes aggregates, and keeps drill-down modals aligned with the same window.
81
+
82
+ ### Header pills (clickable)
83
+
84
+ | Pill | Shows | Click opens |
85
+ |------|--------|-------------|
86
+ | **Sessions** | Count of sessions with activity in the current range (or all sessions in unfiltered snapshot mode). | **Sessions overview** modal: total sessions, input/output tokens, total estimated USD, **last interaction** timestamp (latest session activity in the window). |
87
+ | **Errors** | Sum of per-session error counts in the range. | **Error breakdown** modal: pie chart by **short error label**, legend with % and counts, and a **top category** callout with a representative message when available. |
88
+ | **Pricing models** | Number of models in the FinOps catalog used for estimates. | **Gemini FinOps catalog** modal: table of **input/output USD per 10,000 tokens** per model, unit disclaimer, link to official Google pricing, and the **catalog reference month/year** (see FinOps below). |
89
+
90
+ ### Invocations by model
91
+
92
+ - **Donut + legend** - share of **invocation counts** by resolved model key in the range.
93
+ - Counts come from each event’s `model_version`, with FinOps resolution and **fallback** to `adkmodel` when the event has no model.
94
+ - Legend lists up to **8** rows; the distribution object in JSON may contain more keys.
95
+
96
+ ### ADK events (Runner)
97
+
98
+ Four **horizontal bars** (not a single combined scale):
99
+
100
+ 1. **adk** - total **runner event** count in the range (`totals.events`).
101
+ 2. **errors** - total error count (`totals.errors`), same basis as the Errors pill.
102
+ 3. **in tok** / **out tok** - **prompt** and **candidates** token totals (`totals.total_input_tokens`, `totals.total_output_tokens`).
103
+
104
+ Bar length is **normalized within two groups**: events vs errors share one max; input vs output tokens share another. Use this card to compare **volume of ADK traffic**, **error load**, and **token volume** side by side.
105
+
106
+ ### Estimated cost by session (USD)
107
+
108
+ - One row per session in the snapshot list: **truncated session id**, **relative bar** (max = largest session cost in the list), **cost** to six decimals.
109
+ - **Scroll** shows roughly **seven** rows; more sessions scroll inside the card.
110
+ - **Footer** - **Total cost (all sessions)** = sum of `total_cost_usd` for **every** session in the current range’s payload (not only visible rows). This matches FinOps recomputation from events in the window.
111
+
112
+ ### Activity timeline (stacked)
113
+
114
+ - **24 equal-width time buckets** over the **selected dashboard range** (when filtering) or over min–max of buffered event timestamps (unfiltered full snapshot).
115
+ - Each bar’s **height** is relative to the **busiest bucket** (tooltip: event count + local time span for that bucket).
116
+ - **Axis labels** group **6 buckets** each (local start–end text).
117
+ - If timeline metadata is missing, the UI falls back to a **placeholder** layout (illustrative heights); with valid `activity_timeline.since` / `until` / `counts`, the chart is **wall-clock faithful** for the range.
118
+
119
+ ### Token trend (in + out)
120
+
121
+ - Uses up to the **14 most recent sessions** in the snapshot (by `last_timestamp`), **not** chronological chat order.
122
+ - **Blue** = input tokens, **green** = output tokens per session.
123
+ - Lines are drawn with horizontal inset so paths do not run over the **in** / **out** legend text.
124
+
125
+ ### Sessions Errors
126
+
127
+ - Columns: **Time**, **Session**, **Author**, **Code**, **Message**.
128
+ - Rows combine native **`LlmResponse`** error fields and **plain-text** failures (e.g. `Error: …`) on model/system content - same signals that increment the **Errors** column in **Sessions**.
129
+ - Up to **40** rows per refresh in the UI; the API may return more in `recent_errors` (still subject to the global in-memory cap - see limitations).
130
+
131
+ ### Sessions
132
+
133
+ - Columns: **Session** (link), **User**, **Events**, **Errors**, **In tok**, **Out tok**, **Cost USD** - all **recomputed for the selected range** when filtering.
134
+ - Rows with **Errors > 0** are highlighted.
135
+ - Up to **50** session rows per refresh.
136
+ - **Click the session id** to open **Session detail** (modal): session/user ids, first/last buffered event times, buffer stats (event count, authors order, token totals from buffer), optional **errors brief**, and a short disclaimer that the brief is **deterministic** from the ring buffer (no LLM), and old events may have rotated out.
137
+
138
+ ---
139
+
140
+ ## REST API summary
141
+
142
+ ### `GET /adktelemetry/api/v1/stream`
143
+
144
+ - **`Content-Type`:** `text/event-stream` (SSE).
145
+ - **`event: ready`** — sent once when the browser connects (dashboard does not depend on it for the first paint; it already runs an initial `snapshot` fetch).
146
+ - **`event: update`** — emitted after the in-memory store receives new telemetry (Runner events and captured SSE errors), with a **~80ms debounce** so a single model turn does not flood the client.
147
+ - **Comment lines** (`: keepalive`) — about every **45 seconds** while idle so intermediaries treat the connection as alive.
148
+
149
+ Open one stream per dashboard tab. If the connection drops, the UI reconnects after ~3 seconds.
150
+
151
+ ### `GET /adktelemetry/api/v1/snapshot`
152
+
153
+ | Query | Description |
154
+ |-------|-------------|
155
+ | `since` | Optional. Range start (**Unix seconds**). |
156
+ | `until` | Optional. Range end (**Unix seconds**). |
157
+
158
+ - If **both** are omitted → full in-memory aggregate; `applied_range` is `null`.
159
+ - If either is set → **`until`** defaults to now, **`since`** defaults to **15 minutes** before `until` if omitted.
160
+ - **`since` < `until`** required; range length **≤ 31 days** or **400**.
161
+
162
+ ### `GET /adktelemetry/api/v1/session_detail`
163
+
164
+ | Query | Description |
165
+ |-------|-------------|
166
+ | `session_id` | Required. |
167
+ | `user_id` | Required. |
168
+
169
+ Returns **404** if the session is unknown to the store.
170
+
171
+ ### `GET /adktelemetry/api/v1/pricing_catalog`
172
+
173
+ Returns `models` (rows with `model_id`, `input_usd_per_10k`, `output_usd_per_10k`), `unit_label`, `catalog_updated` (MM/YY reference), and `pricing_doc_url`. Use this if you need the same reference the UI shows.
174
+
175
+ ### `GET /adktelemetry/api/v1/error_breakdown`
176
+
177
+ Same `since` / `until` rules as snapshot. Body includes `total`, `slices` (`label`, `count`, `percent`), and optional `top` with a longer message sample for the dominant label.
178
+
179
+ ---
180
+
181
+ ## FinOps (estimates)
182
+
183
+ - Session and total **USD** values are **estimates** from **token counts** and the library’s **shipped FinOps catalog** (list-style rates). They are **not** a substitute for your Google Cloud / Gemini **billing** exports.
184
+ - **Tiered pricing, modalities, or discounts** may differ; the UI and catalog modal note that official pricing may vary.
185
+ - The **catalog reference date** appears as **month/year** (e.g. in the page footer, the FinOps catalog modal, and the `catalog_updated` field from **`/adktelemetry/api/v1/pricing_catalog`**). **Always check that date** when comparing estimates to real invoices - the catalog is refreshed on a schedule by **support / operations**, not by each application team.
186
+ - For the latest public list prices, use the linked **[Gemini API pricing](https://ai.google.dev/gemini-api/docs/pricing)** documentation.
187
+
188
+ ---
189
+
190
+ ## Limitations (in-memory store)
191
+
192
+ - **Per-session ring buffer** of recent raw events (default **500**). Long custom ranges can be incomplete if events aged out.
193
+ - **Global error list** is capped (**200**); the dashboard shows up to **40** error rows per refresh from the filtered/recent list.
194
+ - **Not durable**: process restart clears telemetry.
195
+
196
+ ---
197
+
198
+ ## License
199
+
200
+ MIT License © 2026
@@ -0,0 +1,8 @@
1
+ from adktelemetry.runtime import ensure_project_root
2
+
3
+ ensure_project_root()
4
+
5
+ from adktelemetry.agentelemetry import agentelemetry
6
+ from adktelemetry.autoagent import autoagent
7
+
8
+ __all__ = ["agentelemetry", "autoagent", "ensure_project_root"]
@@ -0,0 +1 @@
1
+ """ADK compatibility helpers (semantic root resolution)."""
@@ -0,0 +1,4 @@
1
+ class OraicleAgent:
2
+ def __init__(self, agent, is_root=False):
3
+ self.agent = agent
4
+ self.is_root = is_root
@@ -0,0 +1,28 @@
1
+ """
2
+ Semantic loader patch for Google ADK (multiple root agents).
3
+ """
4
+
5
+ from adktelemetry.exceptions import NoRootAgentRegistered
6
+ from adktelemetry.registry import AgentRegistry
7
+
8
+
9
+ def resolve_root_agent(agent_name: str | None = None):
10
+ roots = AgentRegistry.all_roots()
11
+
12
+ if not roots:
13
+ raise NoRootAgentRegistered("No root_agent registered. Use autoagent(agent).")
14
+
15
+ if agent_name:
16
+ registered = AgentRegistry.get_root(agent_name)
17
+ if registered:
18
+ return registered.agent
19
+
20
+ raise NoRootAgentRegistered(f"Root agent '{agent_name}' not found in registry.")
21
+
22
+ return next(iter(roots.values())).agent
23
+
24
+
25
+ try:
26
+ root_agent = resolve_root_agent()
27
+ except NoRootAgentRegistered:
28
+ root_agent = None
@@ -0,0 +1,58 @@
1
+ """
2
+ AdkTelemetry entrypoint: install hooks and expose the `/adktelemetry` dashboard.
3
+
4
+ The background telemetry collector does not participate in user-facing agent
5
+ turns; it observes ADK `Runner` events and aggregates per-session FinOps and
6
+ error signals. Optional future use of `modelkey` may power LLM-based log
7
+ analysis without exposing the secret via the HTTP API.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+
14
+ from adktelemetry.config import TelemetryConfig, set_config
15
+ from adktelemetry.hooks import ensure_runner_patch
16
+ from adktelemetry.patch_cli import ensure_fast_api_patch
17
+
18
+ logger = logging.getLogger("adktelemetry")
19
+
20
+
21
+ def agentelemetry(
22
+ *,
23
+ modelkey: str,
24
+ adkmodel: str = "gemini-2.5-flash",
25
+ pricing_config_path: str | None = None,
26
+ ) -> None:
27
+ """
28
+ Enable AdkTelemetry for the current process.
29
+
30
+ Args:
31
+ modelkey: Required. Gemini API key for the host application (stored in
32
+ memory only; never served on `/adktelemetry` endpoints).
33
+ adkmodel: Default Gemini model id used when an event has no
34
+ `model_version` (FinOps fallback). Default: gemini-2.5-flash.
35
+ pricing_config_path: Optional path to a YAML file with the same shape as
36
+ `adktelemetry/gemini_pricing.yaml` for custom or updated rates.
37
+
38
+ Call once at process startup (e.g. alongside your ADK app import) before
39
+ `adk web` constructs the FastAPI app so the `get_fast_api_app` patch applies.
40
+ """
41
+ if not modelkey or not str(modelkey).strip():
42
+ raise ValueError("agentelemetry(): modelkey is required and cannot be empty.")
43
+
44
+ set_config(
45
+ TelemetryConfig(
46
+ adkmodel=(adkmodel or "gemini-2.5-flash").strip(),
47
+ modelkey=str(modelkey).strip(),
48
+ pricing_config_path=pricing_config_path,
49
+ )
50
+ )
51
+ ensure_runner_patch()
52
+ # Idempotent: often no-op for HTTP if `services.py` already called
53
+ # `ensure_adk_web_server_patch()` before the FastAPI app was built.
54
+ ensure_fast_api_patch()
55
+ logger.info(
56
+ "AdkTelemetry enabled (default model=%s). Dashboard: <port>/adktelemetry",
57
+ adkmodel,
58
+ )
@@ -0,0 +1,31 @@
1
+ import inspect
2
+ from pathlib import Path
3
+
4
+ from adktelemetry.context import inject_app_root
5
+ from adktelemetry.registry import AgentRegistry
6
+
7
+
8
+ def autoagent(agent):
9
+ """
10
+ Promotes an Agent to a semantic root agent (Google ADK loader compatibility).
11
+ """
12
+ caller_frame = inspect.stack()[1]
13
+ caller_module = inspect.getmodule(caller_frame.frame)
14
+
15
+ if caller_module is None or not hasattr(caller_module, "__file__"):
16
+ raise RuntimeError("autoagent() must be called from a Python module file.")
17
+
18
+ caller_file = Path(caller_module.__file__).resolve()
19
+ module_name = caller_module.__name__
20
+
21
+ inject_app_root(caller_file.parent)
22
+
23
+ AgentRegistry.register_root(
24
+ agent,
25
+ file_path=str(caller_file),
26
+ module_name=module_name,
27
+ )
28
+
29
+ setattr(caller_module, "root_agent", agent)
30
+
31
+ return agent
@@ -0,0 +1,24 @@
1
+ """Runtime configuration set by `agentelemetry()`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class TelemetryConfig:
10
+ adkmodel: str = "gemini-2.5-flash"
11
+ modelkey: str = ""
12
+ pricing_config_path: str | None = None
13
+
14
+
15
+ _CONFIG: TelemetryConfig | None = None
16
+
17
+
18
+ def set_config(cfg: TelemetryConfig) -> None:
19
+ global _CONFIG
20
+ _CONFIG = cfg
21
+
22
+
23
+ def get_config() -> TelemetryConfig | None:
24
+ return _CONFIG
@@ -0,0 +1,17 @@
1
+ import sys
2
+ from pathlib import Path
3
+
4
+ _APP_ROOT_MARKERS = {"app", "config", "tools", "utils"}
5
+
6
+
7
+ def inject_app_root(start_path: Path):
8
+ current = start_path.resolve()
9
+
10
+ while current != current.parent:
11
+ if any((current / marker).exists() for marker in _APP_ROOT_MARKERS):
12
+ if str(current) not in sys.path:
13
+ sys.path.insert(0, str(current))
14
+ return current
15
+ current = current.parent
16
+
17
+ return None