hermes-agent-a2a 3.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hermes_agent_a2a-3.2.2/.github/ISSUE_TEMPLATE/security-vulnerability.md +31 -0
- hermes_agent_a2a-3.2.2/.github/dependabot.yml +16 -0
- hermes_agent_a2a-3.2.2/.github/workflows/ci.yml +22 -0
- hermes_agent_a2a-3.2.2/.github/workflows/release.yml +22 -0
- hermes_agent_a2a-3.2.2/.gitignore +7 -0
- hermes_agent_a2a-3.2.2/CHANGELOG.md +137 -0
- hermes_agent_a2a-3.2.2/LICENSE +21 -0
- hermes_agent_a2a-3.2.2/METRICS_DESIGN.md +256 -0
- hermes_agent_a2a-3.2.2/PKG-INFO +393 -0
- hermes_agent_a2a-3.2.2/QUICKSTART.md +272 -0
- hermes_agent_a2a-3.2.2/README.md +376 -0
- hermes_agent_a2a-3.2.2/REFACTORING_PLAN.md +223 -0
- hermes_agent_a2a-3.2.2/RETRO-2026-05-17.md +47 -0
- hermes_agent_a2a-3.2.2/RETRO-LESSONS-2026-05-17.md +25 -0
- hermes_agent_a2a-3.2.2/SECURITY.md +132 -0
- hermes_agent_a2a-3.2.2/__init__.py +6 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/__init__.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/_mode2_worker.py +120 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/a2a_spec/__init__.py +67 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/a2a_spec/agent_card.py +112 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/a2a_spec/hermes_ext.py +24 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/a2a_spec/push.py +122 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/a2a_spec/tasks.py +201 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/hooks.py +344 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/identity.py +458 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/persistence.py +259 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/plugin.py +139 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/push_delivery.py +364 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/runtime_state.py +283 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/schemas.py +342 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/security.py +297 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/server.py +2206 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/sse_handler.py +383 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/subscription_store.py +118 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_discovery.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_handlers.py +1393 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_help.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_protocol.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_registry.py +82 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_sessions.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tool_workers.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/tools.py +5 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/validators.py +83 -0
- hermes_agent_a2a-3.2.2/hermes_agent_a2a/worker_registry.py +55 -0
- hermes_agent_a2a-3.2.2/identity.yaml.example +23 -0
- hermes_agent_a2a-3.2.2/install.sh +100 -0
- hermes_agent_a2a-3.2.2/plugin.yaml +8 -0
- hermes_agent_a2a-3.2.2/pyproject.toml +33 -0
- hermes_agent_a2a-3.2.2/scripts/gateway-session-inject.patch +15 -0
- hermes_agent_a2a-3.2.2/templates/agent-config.yaml +16 -0
- hermes_agent_a2a-3.2.2/tests/__init__.py +0 -0
- hermes_agent_a2a-3.2.2/tests/conftest.py +42 -0
- hermes_agent_a2a-3.2.2/tests/smoke/__init__.py +0 -0
- hermes_agent_a2a-3.2.2/tests/smoke/test_mode2_mode3.py +108 -0
- hermes_agent_a2a-3.2.2/tests/smoke/test_smoke_suite.py +343 -0
- hermes_agent_a2a-3.2.2/tests/test_a2a_compliance.py +706 -0
- hermes_agent_a2a-3.2.2/tests/test_artifact_event.py +280 -0
- hermes_agent_a2a-3.2.2/tests/test_current_tools.py +1779 -0
- hermes_agent_a2a-3.2.2/tests/test_extended_agentcard.py +167 -0
- hermes_agent_a2a-3.2.2/tests/test_jws_signing.py +142 -0
- hermes_agent_a2a-3.2.2/tests/test_list_tasks.py +370 -0
- hermes_agent_a2a-3.2.2/tests/test_mode3_worker_subprocess.py +503 -0
- hermes_agent_a2a-3.2.2/tests/test_push_delivery.py +442 -0
- hermes_agent_a2a-3.2.2/tests/test_push_failure_logging.py +143 -0
- hermes_agent_a2a-3.2.2/tests/test_push_models.py +172 -0
- hermes_agent_a2a-3.2.2/tests/test_push_notifications.py +565 -0
- hermes_agent_a2a-3.2.2/tests/test_push_rest_handlers.py +600 -0
- hermes_agent_a2a-3.2.2/tests/test_push_schema_exports.py +57 -0
- hermes_agent_a2a-3.2.2/tests/test_rest_endpoints.py +641 -0
- hermes_agent_a2a-3.2.2/tests/test_security_push.py +464 -0
- hermes_agent_a2a-3.2.2/tests/test_sse_context_id.py +247 -0
- hermes_agent_a2a-3.2.2/tests/test_sse_event_names.py +250 -0
- hermes_agent_a2a-3.2.2/tests/test_sse_last_event_id.py +294 -0
- hermes_agent_a2a-3.2.2/tests/test_sse_shutdown.py +95 -0
- hermes_agent_a2a-3.2.2/tests/test_sse_streaming.py +409 -0
- hermes_agent_a2a-3.2.2/tests/test_sse_unclean_disconnect.py +351 -0
- hermes_agent_a2a-3.2.2/tests/test_subscription_store_ttl.py +116 -0
- hermes_agent_a2a-3.2.2/tests/test_task_queue_concurrent.py +351 -0
- hermes_agent_a2a-3.2.2/tests/test_task_state_enum.py +73 -0
- hermes_agent_a2a-3.2.2/tests/test_tasks_message_id.py +62 -0
- hermes_agent_a2a-3.2.2/tests/test_wave_c_perf_issue15.py +150 -0
- hermes_agent_a2a-3.2.2/vault/test-vault.yaml +13 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: Security Vulnerability
|
|
3
|
+
about: Report a security issue in hermes-agent-a2a
|
|
4
|
+
title: "[SECURITY]"
|
|
5
|
+
labels: security
|
|
6
|
+
assignees: ''
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
**Describe the vulnerability**
|
|
10
|
+
|
|
11
|
+
<!-- What is the security issue? -->
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
**Versions affected**
|
|
15
|
+
|
|
16
|
+
<!-- e.g. 2.0.0 - 2.0.3 -->
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
**Impact**
|
|
20
|
+
|
|
21
|
+
<!-- What is the worst-case impact if exploited? -->
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
**Proof of concept**
|
|
25
|
+
|
|
26
|
+
<!-- Steps to reproduce, if applicable -->
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
**Suggested fix**
|
|
30
|
+
|
|
31
|
+
<!-- How should this be fixed? -->
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: "github-actions"
|
|
4
|
+
directory: "/"
|
|
5
|
+
schedule:
|
|
6
|
+
interval: "weekly"
|
|
7
|
+
labels:
|
|
8
|
+
- "dependencies"
|
|
9
|
+
- "CI"
|
|
10
|
+
|
|
11
|
+
- package-ecosystem: "pip"
|
|
12
|
+
directory: "/"
|
|
13
|
+
schedule:
|
|
14
|
+
interval: "weekly"
|
|
15
|
+
labels:
|
|
16
|
+
- "dependencies"
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
matrix:
|
|
13
|
+
python-version: ["3.11", "3.12"]
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v6
|
|
16
|
+
- uses: actions/setup-python@v6
|
|
17
|
+
with:
|
|
18
|
+
python-version: ${{ matrix.python-version }}
|
|
19
|
+
- run: python -m pip install --upgrade pip
|
|
20
|
+
- run: python -m pip install -e ".[dev]"
|
|
21
|
+
- run: python -m py_compile hermes_agent_a2a/*.py
|
|
22
|
+
- run: python -m pytest tests/ -v
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags: ['v*']
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: write
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
release:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
steps:
|
|
14
|
+
- uses: actions/checkout@v6
|
|
15
|
+
- uses: actions/setup-python@v5
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.11"
|
|
18
|
+
- run: python -m pip install --upgrade pip build
|
|
19
|
+
- run: python -m build
|
|
20
|
+
- uses: softprops/action-gh-release@v3
|
|
21
|
+
with:
|
|
22
|
+
files: dist/*
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [3.2.2] - 2026-05-20
|
|
6
|
+
|
|
7
|
+
### Security Fixes
|
|
8
|
+
- **SEC-01: DNS timeout**: `socket.setdefaulttimeout(5.0)` added before `gethostbyname` in `is_safe_url` — prevents indefinite blocking on malicious DNS
|
|
9
|
+
- **SEC-02: Container auth bypass**: Localhost bypass now gated on `A2A_REQUIRE_AUTH=true` — loopback is not isolated in containers/shared namespaces
|
|
10
|
+
- **SEC-06: HMAC required on push config**: `_check_hmac_push(required=True)` — push subscription config now requires valid HMAC
|
|
11
|
+
|
|
12
|
+
### Documentation
|
|
13
|
+
- Description updated to reflect Hermes-specific A2A HTTP/JSON-RPC implementation
|
|
14
|
+
- `.gitignore` updated to exclude `dispatch/`
|
|
15
|
+
|
|
16
|
+
## [3.2.1] - 2026-05-19
|
|
17
|
+
|
|
18
|
+
### Bug Fixes (CRITICAL/HIGH from CODE_REVIEW.md)
|
|
19
|
+
|
|
20
|
+
#### Compliance Fixes
|
|
21
|
+
- **jsonrpc field missing in Mode3 timeout response**: `jsonrpc: "2.0"` added to tool_handlers.py:801
|
|
22
|
+
- **CORS hardcoded `*` at 5 locations**: Made configurable via `A2A_CORS_ORIGINS` env var (defaults to `*` for backward compat)
|
|
23
|
+
- **CORS method mismatch on POST-only endpoints**: Removed `GET` from `Access-Control-Allow-Methods` on POST-only endpoints
|
|
24
|
+
- **pushNotifications always returned boolean `True`**: Now returns `{webhookUrl: "..."}` object form when webhook is configured per A2A spec
|
|
25
|
+
|
|
26
|
+
#### Security Fixes
|
|
27
|
+
- **SSRF bypass via fleet-registry.yaml**: `_is_local_fleet_agent` now validates URL host is actually safe, not just loopback flag
|
|
28
|
+
- **Webhook secret exposure in error paths**: Generic error messages now used throughout
|
|
29
|
+
|
|
30
|
+
#### SSE / Streaming Fixes
|
|
31
|
+
- **SSE idle tracking used `created_at` (stream creation)**: Replaced with per-stream `_last_activity` updated on server-side activity (`push_event`). Client polling no longer resets idle timer.
|
|
32
|
+
- **SSE streams never expired**: Cleanup thread added with 300s idle timeout
|
|
33
|
+
|
|
34
|
+
#### Resource Leak Fixes
|
|
35
|
+
- **Daemon threads not joined on shutdown**: Plugin shutdown now joins SSE handler threads
|
|
36
|
+
- **Subscription store grew unbounded**: `add()`/`remove()` lifecycle now managed; TTL/cleanup logic added
|
|
37
|
+
|
|
38
|
+
#### Test Coverage
|
|
39
|
+
- **Mode3 worker subprocess tests**: Fixed 9 test failures — wrong patch targets (cleanup_zombie_processes in worker_registry, not tool_handlers), missing params, mock side_effect fixes
|
|
40
|
+
- **Mode2 worker tests**: Fixed os.path.isdir patching, TimeoutExpired exception handling
|
|
41
|
+
- **Concurrent TaskQueue access**: Tests added for race conditions
|
|
42
|
+
|
|
43
|
+
#### Other Fixes
|
|
44
|
+
- **Non-atomic metric recording**: `record_webhook_result(success: bool)` atomic API confirmed in place
|
|
45
|
+
- **Task loss between drain/requeue**: Safety comments and requeue logic verified
|
|
46
|
+
- **HMAC verification failure**: Proper exception handling in push_delivery.py
|
|
47
|
+
- **Path traversal check ordering**: Verified correct in tool_handlers.py
|
|
48
|
+
|
|
49
|
+
### Tests
|
|
50
|
+
- **535 tests passing**, 13 non-blocking teardown errors (mock subprocess cleanup in Mode3 tests — all assertions pass, non-spreading)
|
|
51
|
+
|
|
52
|
+
### v2 Deferred (API Design)
|
|
53
|
+
- Dual args/kwargs in `handle_send_session_message`
|
|
54
|
+
- Confusing token fallback chain
|
|
55
|
+
- Singleton reload behavior
|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
## [3.2.0] - 2026-05-17
|
|
60
|
+
|
|
61
|
+
### Google A2A v1.0 Full Compliance
|
|
62
|
+
- **Idempotency keys**: `IdempotencyStore` singleton with 24h TTL. Same-key/same-payload → cached result. Same-key/different-payload → `-38004` error.
|
|
63
|
+
- **Full state machine**: `auth_required`, `authenticated`, `rejected` states added to `TaskQueue._TRANSITIONS`. Invalid transitions → `-38003` error.
|
|
64
|
+
- **Error schema alignment**: All 8 A2A error codes defined (`-32700`, `-32600`, `-32603`, `-38000` through `-38004`). All error responses use `{code, message, data}` format.
|
|
65
|
+
- **CORS headers**: `Access-Control-Allow-Origin: *` on all responses. `do_OPTIONS()` for preflight. Applied to GET, POST, OPTIONS, and error responses.
|
|
66
|
+
- **Agent card schema**: `agentId` field added. `skills[]` uses `{id, name}` per spec.
|
|
67
|
+
|
|
68
|
+
#### Wave 2 — Streaming & Push (P1)
|
|
69
|
+
- **SSE streaming** (`tasks/sendSubscribe`): Server-Sent Events stream of task state transitions. `SSEStreamer` singleton manages stream lifecycle.
|
|
70
|
+
- **Push notifications** (`tasks/pushNotification/subscribe` + unsubscribe): `SubscriptionStore` persists webhook subscriptions with HMAC key. `PushDelivery` delivers HMAC-SHA256 signed payloads with exponential backoff retry (3 attempts).
|
|
71
|
+
- **Hook wiring**: `TaskStateChangeHook.on_state_change()` broadcasts SSE events and delivers push webhooks on task state transitions.
|
|
72
|
+
|
|
73
|
+
#### Tests
|
|
74
|
+
- 161 tests passing (50 compliance + 24 SSE + 26 push + 79 current + 2 hybrid)
|
|
75
|
+
- Coverage: subscription_store 100%, sse_handler 94%, push_delivery 80%, server 68%, hooks 39%
|
|
76
|
+
|
|
77
|
+
## [3.1.3] - 2026-05-15
|
|
78
|
+
|
|
79
|
+
### Security Fixes (CRITICAL)
|
|
80
|
+
- CR-1: Simplified resolve_agent to return only safe fields (name, a2a_url, description, role)
|
|
81
|
+
- Removed _strip_secrets function entirely
|
|
82
|
+
- Transports with auth secrets are never included in response
|
|
83
|
+
- Simpler and more secure than stripping secrets from full dict
|
|
84
|
+
|
|
85
|
+
### Bug Fixes (HIGH)
|
|
86
|
+
- HIGH #6: Replaced queue traversal with atomic counter in TaskQueue
|
|
87
|
+
- pending_count() now counter-based: max(0, _enqueue_count - _complete_count - _cancel_count)
|
|
88
|
+
- No singleton access, no re-entrancy path
|
|
89
|
+
- Fixed counter increment timing to prevent drift on queue overflow eviction
|
|
90
|
+
|
|
91
|
+
### Bug Fixes (MEDIUM)
|
|
92
|
+
- MEDIUM #1: Fixed update_exchange placeholder matching in persistence.py
|
|
93
|
+
- MEDIUM #2: Fixed queue overflow race condition in server.py
|
|
94
|
+
- MEDIUM #3: Fixed metrics logger idempotency in runtime_state.py
|
|
95
|
+
- MEDIUM #4: Fixed to_dict mutability in runtime_state.py
|
|
96
|
+
- MEDIUM #5: Fixed persistence.py atomicity
|
|
97
|
+
- MEDIUM #7: Fixed DEFAULT_PORT collision detection with retry logic in plugin.py
|
|
98
|
+
- MEDIUM #8: Fixed A2A_WEBHOOK_SECRET fallback to WEBHOOK_SECRET with warning
|
|
99
|
+
- MEDIUM #9: Fixed path traversal prevention for card_path in webhook agent card retrieval
|
|
100
|
+
- MEDIUM #10: Fixed AuditLogger exception logging to use logger.warning
|
|
101
|
+
- MEDIUM #11: Fixed TOCTOU race condition in audit log rotation
|
|
102
|
+
- MEDIUM #12: Disabled email pattern redaction in filter_outbound (too broad)
|
|
103
|
+
- MEDIUM #13: Fixed regex capture group comment in hooks.py
|
|
104
|
+
|
|
105
|
+
### Code Quality (LOW)
|
|
106
|
+
- LOW #1: Added sort_keys=True to HMAC json.dumps for canonical signatures
|
|
107
|
+
- LOW #2: Removed dead proc.wait() call and fixed SyntaxError in _handle_call_mode2
|
|
108
|
+
- LOW #3: Removed redundant cleanup_zombie_processes() call in finally block
|
|
109
|
+
- LOW #4: Removed redundant json import from inline import statement
|
|
110
|
+
- LOW #5: Removed redundant logging import inside handle_send_session_message
|
|
111
|
+
- LOW #6: Added comment explaining GIL guarantee for double-checked locking
|
|
112
|
+
- LOW #7: Removed module-level task_queue variable that shadowed TaskQueue class
|
|
113
|
+
- LOW #8: Removed unused user_task parameter from handle_help() and handle_list()
|
|
114
|
+
- LOW #9: Added warning when hermes_cli.__version import fails
|
|
115
|
+
- LOW #10: Changed msg_id from task_id[:12] to full task_id (UUID truncation)
|
|
116
|
+
- LOW #11: Added comment documenting daemon thread metrics loss limitation
|
|
117
|
+
- LOW #13: Removed self-import in _get_queue_depth method
|
|
118
|
+
- LOW #14: Added force parameter to set_runtime_callbacks() to prevent overwriting on reload
|
|
119
|
+
|
|
120
|
+
### Tests
|
|
121
|
+
- All 62 tests pass
|
|
122
|
+
- Added tests for _derive_hermes_home fallback and error raising scenarios
|
|
123
|
+
|
|
124
|
+
## [3.1.2] - 2026-05-15
|
|
125
|
+
|
|
126
|
+
### Bug Fixes (HIGH)
|
|
127
|
+
- HIGH #6: Replace queue traversal with atomic counter in TaskQueue
|
|
128
|
+
|
|
129
|
+
## [3.1.1] - Previous Release
|
|
130
|
+
|
|
131
|
+
## [3.1.0] - Previous Release
|
|
132
|
+
|
|
133
|
+
## [3.0.0] - Previous Release
|
|
134
|
+
|
|
135
|
+
## [2.0.1] - Previous Release
|
|
136
|
+
|
|
137
|
+
## [2.0.0] - Previous Release
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Linda, Britney, and the Hermes Fleet
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
# A2A Plugin Monitoring/Metrics Design
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
Add monitoring and metrics for the A2A plugin to track queue depth, webhook success rates, and other operational metrics.
|
|
5
|
+
|
|
6
|
+
## Architecture
|
|
7
|
+
|
|
8
|
+
### 1. Metrics Storage
|
|
9
|
+
Store metrics in `A2ARuntimeState` singleton for thread-safe access across plugin components.
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
class A2AMetrics:
|
|
13
|
+
"""Thread-safe metrics collector for A2A operations."""
|
|
14
|
+
|
|
15
|
+
def __init__(self):
|
|
16
|
+
self._lock = Lock()
|
|
17
|
+
self._webhook_attempts = 0
|
|
18
|
+
self._webhook_successes = 0
|
|
19
|
+
self._webhook_failures = 0
|
|
20
|
+
self._tasks_received = 0
|
|
21
|
+
self._tasks_completed = 0
|
|
22
|
+
self._tasks_canceled = 0
|
|
23
|
+
self._tasks_failed = 0
|
|
24
|
+
self._start_time = time.time()
|
|
25
|
+
|
|
26
|
+
def record_webhook_attempt(self):
|
|
27
|
+
with self._lock:
|
|
28
|
+
self._webhook_attempts += 1
|
|
29
|
+
|
|
30
|
+
def record_webhook_success(self):
|
|
31
|
+
with self._lock:
|
|
32
|
+
self._webhook_successes += 1
|
|
33
|
+
|
|
34
|
+
def record_webhook_failure(self):
|
|
35
|
+
with self._lock:
|
|
36
|
+
self._webhook_failures += 1
|
|
37
|
+
|
|
38
|
+
def record_task_received(self):
|
|
39
|
+
with self._lock:
|
|
40
|
+
self._tasks_received += 1
|
|
41
|
+
|
|
42
|
+
def record_task_completed(self):
|
|
43
|
+
with self._lock:
|
|
44
|
+
self._tasks_completed += 1
|
|
45
|
+
|
|
46
|
+
def record_task_canceled(self):
|
|
47
|
+
with self._lock:
|
|
48
|
+
self._tasks_canceled += 1
|
|
49
|
+
|
|
50
|
+
def record_task_failed(self):
|
|
51
|
+
with self._lock:
|
|
52
|
+
self._tasks_failed += 1
|
|
53
|
+
|
|
54
|
+
def get_metrics(self) -> dict:
|
|
55
|
+
with self._lock:
|
|
56
|
+
uptime = time.time() - self._start_time
|
|
57
|
+
webhook_success_rate = (
|
|
58
|
+
self._webhook_successes / self._webhook_attempts * 100
|
|
59
|
+
if self._webhook_attempts > 0 else 0
|
|
60
|
+
)
|
|
61
|
+
return {
|
|
62
|
+
"uptime_seconds": uptime,
|
|
63
|
+
"webhook": {
|
|
64
|
+
"attempts": self._webhook_attempts,
|
|
65
|
+
"successes": self._webhook_successes,
|
|
66
|
+
"failures": self._webhook_failures,
|
|
67
|
+
"success_rate_percent": round(webhook_success_rate, 2),
|
|
68
|
+
},
|
|
69
|
+
"tasks": {
|
|
70
|
+
"received": self._tasks_received,
|
|
71
|
+
"completed": self._tasks_completed,
|
|
72
|
+
"canceled": self._tasks_canceled,
|
|
73
|
+
"failed": self._tasks_failed,
|
|
74
|
+
},
|
|
75
|
+
"queue": {
|
|
76
|
+
"pending_count": self._get_queue_depth(),
|
|
77
|
+
},
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
def _get_queue_depth(self) -> int:
|
|
81
|
+
from .runtime_state import get_runtime_state as get_state
|
|
82
|
+
return get_state().get_task_queue().pending_count()
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### 2. Tool: a2a_get_metrics
|
|
86
|
+
Expose metrics via a tool that the LLM can query directly.
|
|
87
|
+
|
|
88
|
+
```python
|
|
89
|
+
def handle_get_metrics() -> dict:
|
|
90
|
+
"""Get current A2A plugin metrics."""
|
|
91
|
+
from .runtime_state import get_runtime_state as get_state
|
|
92
|
+
state = get_state()
|
|
93
|
+
return state.get_metrics().get_metrics()
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### 3. Periodic Logging
|
|
97
|
+
Log metrics at regular intervals (configurable via env var).
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
_METRICS_LOG_INTERVAL = int(os.getenv("A2A_METRICS_LOG_INTERVAL", "300")) # 5 minutes
|
|
101
|
+
|
|
102
|
+
def _start_metrics_logger():
|
|
103
|
+
"""Start background thread to log metrics periodically."""
|
|
104
|
+
if os.getenv("A2A_METRICS_LOG_ENABLED", "false").lower() != "true":
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
def log_metrics():
|
|
108
|
+
while True:
|
|
109
|
+
try:
|
|
110
|
+
from .runtime_state import get_runtime_state as get_state
|
|
111
|
+
metrics = get_state().get_metrics().get_metrics()
|
|
112
|
+
logger.info("[A2A Metrics] %s", json.dumps(metrics))
|
|
113
|
+
time.sleep(_METRICS_LOG_INTERVAL)
|
|
114
|
+
except Exception as exc:
|
|
115
|
+
logger.error("[A2A Metrics] Logger error: %s", exc)
|
|
116
|
+
time.sleep(_METRICS_LOG_INTERVAL)
|
|
117
|
+
|
|
118
|
+
threading.Thread(target=log_metrics, daemon=True).start()
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### 4. Instrumentation Points
|
|
122
|
+
Add metric recording at key points in the code:
|
|
123
|
+
|
|
124
|
+
**server.py** - task queue operations:
|
|
125
|
+
- Record task received on enqueue
|
|
126
|
+
- Record task completed/canceled/failed
|
|
127
|
+
|
|
128
|
+
**tool_handlers.py** - webhook delivery:
|
|
129
|
+
- Record webhook attempt before delivery
|
|
130
|
+
- Record webhook success on successful delivery
|
|
131
|
+
- Record webhook failure on retry exhaustion
|
|
132
|
+
|
|
133
|
+
### 5. Telegram Slash Command (Optional)
|
|
134
|
+
Inject metrics as a Telegram slash command `/a2a_metrics` similar to `/lcm` from hermes-lcm.
|
|
135
|
+
|
|
136
|
+
**Challenge**: This requires Hermes gateway integration, not just plugin code.
|
|
137
|
+
|
|
138
|
+
**Options**:
|
|
139
|
+
|
|
140
|
+
**Option A: Gateway Customization (Recommended for hermes-lcm pattern)**
|
|
141
|
+
Add a custom command handler in the Hermes gateway's Telegram platform adapter:
|
|
142
|
+
```python
|
|
143
|
+
# In gateway/platforms/telegram.py (customization)
|
|
144
|
+
async def handle_a2a_metrics_command(update, context):
|
|
145
|
+
"""Handle /a2a_metrics command."""
|
|
146
|
+
from hermes_agent_a2a.runtime_state import get_runtime_state
|
|
147
|
+
metrics = get_runtime_state().get_metrics().get_metrics()
|
|
148
|
+
response = format_metrics_for_telegram(metrics)
|
|
149
|
+
await update.message.reply_text(response)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
**Option B: Webhook-Backed Command (Plugin-Only)**
|
|
153
|
+
Register the command via the plugin's webhook endpoint:
|
|
154
|
+
- Send message to agent with special prefix: `/a2a_metrics`
|
|
155
|
+
- Tool handler detects command and returns metrics instead of processing as task
|
|
156
|
+
- Gateway routes response back to Telegram
|
|
157
|
+
|
|
158
|
+
```python
|
|
159
|
+
def handle_send_session_message(message, agent, ...):
|
|
160
|
+
# Detect metrics command
|
|
161
|
+
if message.strip().startswith("/a2a_metrics"):
|
|
162
|
+
from .runtime_state import get_runtime_state as get_state
|
|
163
|
+
metrics = get_state().get_metrics().get_metrics()
|
|
164
|
+
return {
|
|
165
|
+
"state": "completed",
|
|
166
|
+
"response": format_metrics_for_telegram(metrics),
|
|
167
|
+
"delivery": "command_response",
|
|
168
|
+
}
|
|
169
|
+
# ... normal processing
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Option C: Tool-Based Query (Simplest)**
|
|
173
|
+
LLM can query metrics via tool, then format for Telegram:
|
|
174
|
+
```
|
|
175
|
+
User: /a2a_metrics
|
|
176
|
+
LLM: [calls a2a_get_metrics tool]
|
|
177
|
+
LLM: [formats response and sends to Telegram]
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
### 6. Environment Variables
|
|
181
|
+
|
|
182
|
+
| Variable | Default | Description |
|
|
183
|
+
|----------|---------|-------------|
|
|
184
|
+
| `A2A_METRICS_LOG_ENABLED` | `false` | Enable periodic metrics logging |
|
|
185
|
+
| `A2A_METRICS_LOG_INTERVAL` | `300` | Logging interval in seconds |
|
|
186
|
+
| `A2A_METRICS_COMMAND_ENABLED` | `false` | Enable /a2a_metrics command (Option B) |
|
|
187
|
+
|
|
188
|
+
### 7. Implementation Priority
|
|
189
|
+
|
|
190
|
+
1. **Phase 1** (Core Metrics):
|
|
191
|
+
- Create A2AMetrics class
|
|
192
|
+
- Add to A2ARuntimeState
|
|
193
|
+
- Instrument webhook delivery points
|
|
194
|
+
- Instrument task queue operations
|
|
195
|
+
- Add a2a_get_metrics tool
|
|
196
|
+
|
|
197
|
+
2. **Phase 2** (Logging):
|
|
198
|
+
- Add periodic metrics logger
|
|
199
|
+
- Add configuration via env vars
|
|
200
|
+
|
|
201
|
+
3. **Phase 3** (Telegram Command - Optional):
|
|
202
|
+
- Implement Option B (webhook-backed command)
|
|
203
|
+
- Add formatting for Telegram
|
|
204
|
+
- Test command flow
|
|
205
|
+
|
|
206
|
+
## Example Output
|
|
207
|
+
|
|
208
|
+
### Tool Response (JSON)
|
|
209
|
+
```json
|
|
210
|
+
{
|
|
211
|
+
"uptime_seconds": 3600,
|
|
212
|
+
"webhook": {
|
|
213
|
+
"attempts": 150,
|
|
214
|
+
"successes": 142,
|
|
215
|
+
"failures": 8,
|
|
216
|
+
"success_rate_percent": 94.67
|
|
217
|
+
},
|
|
218
|
+
"tasks": {
|
|
219
|
+
"received": 150,
|
|
220
|
+
"completed": 142,
|
|
221
|
+
"canceled": 5,
|
|
222
|
+
"failed": 3
|
|
223
|
+
},
|
|
224
|
+
"queue": {
|
|
225
|
+
"pending_count": 0
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
### Telegram Formatted Response
|
|
231
|
+
```
|
|
232
|
+
📊 A2A Metrics
|
|
233
|
+
|
|
234
|
+
Uptime: 1h 0m
|
|
235
|
+
|
|
236
|
+
🔗 Webhook
|
|
237
|
+
Attempts: 150
|
|
238
|
+
✅ Success: 142 (94.67%)
|
|
239
|
+
❌ Failed: 8
|
|
240
|
+
|
|
241
|
+
📋 Tasks
|
|
242
|
+
Received: 150
|
|
243
|
+
Completed: 142
|
|
244
|
+
Canceled: 5
|
|
245
|
+
Failed: 3
|
|
246
|
+
|
|
247
|
+
📬 Queue
|
|
248
|
+
Pending: 0
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Testing
|
|
252
|
+
|
|
253
|
+
- Unit tests for metrics collection
|
|
254
|
+
- Integration tests for tool response
|
|
255
|
+
- Tests for periodic logging (mock time)
|
|
256
|
+
- Optional: Tests for Telegram command flow
|