hermes-agent-kit 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hermes_agent_kit-0.2.2/LICENSE +21 -0
- hermes_agent_kit-0.2.2/PKG-INFO +141 -0
- hermes_agent_kit-0.2.2/README.md +128 -0
- hermes_agent_kit-0.2.2/pyproject.toml +32 -0
- hermes_agent_kit-0.2.2/setup.cfg +4 -0
- hermes_agent_kit-0.2.2/src/hermes_agent_kit.egg-info/PKG-INFO +141 -0
- hermes_agent_kit-0.2.2/src/hermes_agent_kit.egg-info/SOURCES.txt +32 -0
- hermes_agent_kit-0.2.2/src/hermes_agent_kit.egg-info/dependency_links.txt +1 -0
- hermes_agent_kit-0.2.2/src/hermes_agent_kit.egg-info/entry_points.txt +2 -0
- hermes_agent_kit-0.2.2/src/hermes_agent_kit.egg-info/requires.txt +1 -0
- hermes_agent_kit-0.2.2/src/hermes_agent_kit.egg-info/top_level.txt +1 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/__init__.py +1 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/bridge.py +168 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/cli.py +240 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/cost_tracker/HOOK.yaml +5 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/cost_tracker/cost_tracker.yaml +1 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/cost_tracker/handler.py +30 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/fallback/HOOK.yaml +4 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/fallback/fallback_chain.yaml +5 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/fallback/handler.py +23 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/rate_limiter/HOOK.yaml +5 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/rate_limiter/handler.py +48 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/rate_limiter/rate_limits.yaml +8 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/router/HOOK.yaml +5 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/router/handler.py +47 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/hooks/router/topic_router.yaml +7 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/lib/__init__.py +0 -0
- hermes_agent_kit-0.2.2/src/hermes_kit/lib/hermes_api.py +6 -0
- hermes_agent_kit-0.2.2/tests/test_cli.py +24 -0
- hermes_agent_kit-0.2.2/tests/test_cost_tracker.py +62 -0
- hermes_agent_kit-0.2.2/tests/test_fallback.py +50 -0
- hermes_agent_kit-0.2.2/tests/test_integration.py +67 -0
- hermes_agent_kit-0.2.2/tests/test_rate_limiter.py +66 -0
- hermes_agent_kit-0.2.2/tests/test_router.py +124 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 srmdn
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hermes-agent-kit
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Production hardening pack for Hermes Agent — per-topic model routing, fallback chains, rate limiting, and cost tracking via gateway hooks
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Repository, https://github.com/srmdn/hermes-agent-kit
|
|
7
|
+
Project-URL: Homepage, https://github.com/srmdn/hermes-agent-kit#readme
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pyyaml>=6
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# hermes-kit
|
|
15
|
+
|
|
16
|
+
Production hardening pack for [Hermes Agent](https://github.com/NousResearch/hermes-agent).
|
|
17
|
+
|
|
18
|
+
Self-hosted Hermes gateways are powerful but built for single-user setups. Multi-user deployments hit walls: no per-topic model routing, API failures surface as hard errors, one heavy user can burn your API budget with no alert.
|
|
19
|
+
|
|
20
|
+
hermes-kit fills these gaps with production-grade hooks.
|
|
21
|
+
|
|
22
|
+
> ⚠️ **How it works**: hermes-kit monkey-patches Hermes Agent's internal model resolver at runtime. This is intentionally fragile — Hermes Agent updates may break your setup. We're working on an upstream PR to replace the patch with native hook return values. Until then, test after every Hermes upgrade.
|
|
23
|
+
|
|
24
|
+
## Prerequisites
|
|
25
|
+
|
|
26
|
+
- Python ≥ 3.11
|
|
27
|
+
- [Hermes Agent](https://github.com/NousResearch/hermes-agent) installed
|
|
28
|
+
- A configured gateway (Telegram, Discord, etc.)
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install hermes-agent-kit
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quickstart
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Install all hooks in one command
|
|
40
|
+
hermes-kit install router fallback rate-limiter cost-tracker
|
|
41
|
+
|
|
42
|
+
# Verify
|
|
43
|
+
hermes-kit doctor
|
|
44
|
+
|
|
45
|
+
# Start gateway with bridge auto-patched
|
|
46
|
+
hermes-kit gateway run --accept-hooks
|
|
47
|
+
|
|
48
|
+
# If new users get "I don't recognize you":
|
|
49
|
+
GATEWAY_ALLOW_ALL_USERS=true hermes-kit gateway run --accept-hooks
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Hooks land in `~/.hermes/hooks/<name>/`. Hermes discovers them on restart.
|
|
53
|
+
|
|
54
|
+
## Modules
|
|
55
|
+
|
|
56
|
+
### router — Per-Topic Model Routing
|
|
57
|
+
|
|
58
|
+
Route Telegram topics to different AI models. Finance chat uses Qwen, coding chat uses DeepSeek, everything else falls back to GPT-4o-mini.
|
|
59
|
+
|
|
60
|
+
**Via CLI:**
|
|
61
|
+
```bash
|
|
62
|
+
hermes-kit router set-default --model opencode-go/gpt-4o-mini
|
|
63
|
+
hermes-kit router add 42 --model opencode-go/deepseek-v4-pro
|
|
64
|
+
hermes-kit router show
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Via YAML** (`~/.hermes/hooks/router/topic_router.yaml`):
|
|
68
|
+
```yaml
|
|
69
|
+
default:
|
|
70
|
+
model: "opencode-go/gpt-4o-mini"
|
|
71
|
+
|
|
72
|
+
topics:
|
|
73
|
+
"42":
|
|
74
|
+
model: "opencode-go/deepseek-v4-pro"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**Multi-provider** — route specific topics to native providers:
|
|
78
|
+
```bash
|
|
79
|
+
hermes-kit router add 42 --model gpt-4o --provider openai
|
|
80
|
+
hermes-kit router add 7 --model claude-sonnet-4-20250514 --provider anthropic
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Hermes resolves API keys from `~/.hermes/.env` (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.). See [providers guide](docs/providers.md) for all supported providers and model IDs.
|
|
84
|
+
|
|
85
|
+
### fallback — Automatic Fallback Chain
|
|
86
|
+
|
|
87
|
+
Define a chain of models to try when the primary fails.
|
|
88
|
+
|
|
89
|
+
**Via YAML** (`~/.hermes/hooks/fallback/fallback_chain.yaml`):
|
|
90
|
+
```yaml
|
|
91
|
+
chains:
|
|
92
|
+
global:
|
|
93
|
+
- "opencode-go/deepseek-v4-pro" # primary
|
|
94
|
+
- "opencode-go/claude-sonnet-4" # fallback
|
|
95
|
+
- "opencode-go/gpt-4o-mini" # last resort
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
After a failure, call `hermes_kit.bridge.retry_with_fallback(session_key)` to advance to the next model.
|
|
99
|
+
|
|
100
|
+
### rate-limiter — Per-User Rate Limiting
|
|
101
|
+
|
|
102
|
+
Prevent a single user or chat from draining your API budget.
|
|
103
|
+
|
|
104
|
+
**Via YAML** (`~/.hermes/hooks/rate-limiter/rate_limits.yaml`):
|
|
105
|
+
```yaml
|
|
106
|
+
limits:
|
|
107
|
+
global:
|
|
108
|
+
max_messages_per_window: 100
|
|
109
|
+
window_seconds: 3600
|
|
110
|
+
per_user:
|
|
111
|
+
"123456789":
|
|
112
|
+
max_messages_per_window: 50
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
> ⚠️ Rate limiter currently tracks usage but does not block messages. Enforcement is planned for an upcoming release.
|
|
116
|
+
|
|
117
|
+
### cost-tracker — Real-Time Cost Tracking
|
|
118
|
+
|
|
119
|
+
Track token costs per session and alert when thresholds are exceeded.
|
|
120
|
+
|
|
121
|
+
**Via YAML** (`~/.hermes/hooks/cost-tracker/cost_tracker.yaml`):
|
|
122
|
+
```yaml
|
|
123
|
+
alert_threshold_usd: 1.0
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Set to `0` to disable alerts but continue tracking.
|
|
127
|
+
|
|
128
|
+
## Docs
|
|
129
|
+
|
|
130
|
+
- [Quickstart](docs/quickstart.md) — agent-driven and manual install
|
|
131
|
+
- [Providers](docs/providers.md) — supported AI providers and model lists
|
|
132
|
+
- Manual setup per module:
|
|
133
|
+
- [Router](docs/manual/router.md) — per-topic model routing
|
|
134
|
+
- [Fallback](docs/manual/fallback.md) — automatic retry chains
|
|
135
|
+
- [Rate Limiter](docs/manual/rate-limiter.md) — per-user quotas
|
|
136
|
+
- [Cost Tracker](docs/manual/cost-tracker.md) — budget alerts
|
|
137
|
+
- [Troubleshooting](docs/troubleshooting.md) — common issues
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# hermes-kit
|
|
2
|
+
|
|
3
|
+
Production hardening pack for [Hermes Agent](https://github.com/NousResearch/hermes-agent).
|
|
4
|
+
|
|
5
|
+
Self-hosted Hermes gateways are powerful but built for single-user setups. Multi-user deployments hit walls: no per-topic model routing, API failures surface as hard errors, one heavy user can burn your API budget with no alert.
|
|
6
|
+
|
|
7
|
+
hermes-kit fills these gaps with production-grade hooks.
|
|
8
|
+
|
|
9
|
+
> ⚠️ **How it works**: hermes-kit monkey-patches Hermes Agent's internal model resolver at runtime. This is intentionally fragile — Hermes Agent updates may break your setup. We're working on an upstream PR to replace the patch with native hook return values. Until then, test after every Hermes upgrade.
|
|
10
|
+
|
|
11
|
+
## Prerequisites
|
|
12
|
+
|
|
13
|
+
- Python ≥ 3.11
|
|
14
|
+
- [Hermes Agent](https://github.com/NousResearch/hermes-agent) installed
|
|
15
|
+
- A configured gateway (Telegram, Discord, etc.)
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install hermes-agent-kit
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Quickstart
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# Install all hooks in one command
|
|
27
|
+
hermes-kit install router fallback rate-limiter cost-tracker
|
|
28
|
+
|
|
29
|
+
# Verify
|
|
30
|
+
hermes-kit doctor
|
|
31
|
+
|
|
32
|
+
# Start gateway with bridge auto-patched
|
|
33
|
+
hermes-kit gateway run --accept-hooks
|
|
34
|
+
|
|
35
|
+
# If new users get "I don't recognize you":
|
|
36
|
+
GATEWAY_ALLOW_ALL_USERS=true hermes-kit gateway run --accept-hooks
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Hooks land in `~/.hermes/hooks/<name>/`. Hermes discovers them on restart.
|
|
40
|
+
|
|
41
|
+
## Modules
|
|
42
|
+
|
|
43
|
+
### router — Per-Topic Model Routing
|
|
44
|
+
|
|
45
|
+
Route Telegram topics to different AI models. Finance chat uses Qwen, coding chat uses DeepSeek, everything else falls back to GPT-4o-mini.
|
|
46
|
+
|
|
47
|
+
**Via CLI:**
|
|
48
|
+
```bash
|
|
49
|
+
hermes-kit router set-default --model opencode-go/gpt-4o-mini
|
|
50
|
+
hermes-kit router add 42 --model opencode-go/deepseek-v4-pro
|
|
51
|
+
hermes-kit router show
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**Via YAML** (`~/.hermes/hooks/router/topic_router.yaml`):
|
|
55
|
+
```yaml
|
|
56
|
+
default:
|
|
57
|
+
model: "opencode-go/gpt-4o-mini"
|
|
58
|
+
|
|
59
|
+
topics:
|
|
60
|
+
"42":
|
|
61
|
+
model: "opencode-go/deepseek-v4-pro"
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Multi-provider** — route specific topics to native providers:
|
|
65
|
+
```bash
|
|
66
|
+
hermes-kit router add 42 --model gpt-4o --provider openai
|
|
67
|
+
hermes-kit router add 7 --model claude-sonnet-4-20250514 --provider anthropic
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Hermes resolves API keys from `~/.hermes/.env` (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.). See [providers guide](docs/providers.md) for all supported providers and model IDs.
|
|
71
|
+
|
|
72
|
+
### fallback — Automatic Fallback Chain
|
|
73
|
+
|
|
74
|
+
Define a chain of models to try when the primary fails.
|
|
75
|
+
|
|
76
|
+
**Via YAML** (`~/.hermes/hooks/fallback/fallback_chain.yaml`):
|
|
77
|
+
```yaml
|
|
78
|
+
chains:
|
|
79
|
+
global:
|
|
80
|
+
- "opencode-go/deepseek-v4-pro" # primary
|
|
81
|
+
- "opencode-go/claude-sonnet-4" # fallback
|
|
82
|
+
- "opencode-go/gpt-4o-mini" # last resort
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
After a failure, call `hermes_kit.bridge.retry_with_fallback(session_key)` to advance to the next model.
|
|
86
|
+
|
|
87
|
+
### rate-limiter — Per-User Rate Limiting
|
|
88
|
+
|
|
89
|
+
Prevent a single user or chat from draining your API budget.
|
|
90
|
+
|
|
91
|
+
**Via YAML** (`~/.hermes/hooks/rate-limiter/rate_limits.yaml`):
|
|
92
|
+
```yaml
|
|
93
|
+
limits:
|
|
94
|
+
global:
|
|
95
|
+
max_messages_per_window: 100
|
|
96
|
+
window_seconds: 3600
|
|
97
|
+
per_user:
|
|
98
|
+
"123456789":
|
|
99
|
+
max_messages_per_window: 50
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
> ⚠️ Rate limiter currently tracks usage but does not block messages. Enforcement is planned for an upcoming release.
|
|
103
|
+
|
|
104
|
+
### cost-tracker — Real-Time Cost Tracking
|
|
105
|
+
|
|
106
|
+
Track token costs per session and alert when thresholds are exceeded.
|
|
107
|
+
|
|
108
|
+
**Via YAML** (`~/.hermes/hooks/cost-tracker/cost_tracker.yaml`):
|
|
109
|
+
```yaml
|
|
110
|
+
alert_threshold_usd: 1.0
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Set to `0` to disable alerts but continue tracking.
|
|
114
|
+
|
|
115
|
+
## Docs
|
|
116
|
+
|
|
117
|
+
- [Quickstart](docs/quickstart.md) — agent-driven and manual install
|
|
118
|
+
- [Providers](docs/providers.md) — supported AI providers and model lists
|
|
119
|
+
- Manual setup per module:
|
|
120
|
+
- [Router](docs/manual/router.md) — per-topic model routing
|
|
121
|
+
- [Fallback](docs/manual/fallback.md) — automatic retry chains
|
|
122
|
+
- [Rate Limiter](docs/manual/rate-limiter.md) — per-user quotas
|
|
123
|
+
- [Cost Tracker](docs/manual/cost-tracker.md) — budget alerts
|
|
124
|
+
- [Troubleshooting](docs/troubleshooting.md) — common issues
|
|
125
|
+
|
|
126
|
+
## License
|
|
127
|
+
|
|
128
|
+
MIT
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "hermes-agent-kit"
|
|
3
|
+
version = "0.2.2"
|
|
4
|
+
description = "Production hardening pack for Hermes Agent — per-topic model routing, fallback chains, rate limiting, and cost tracking via gateway hooks"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = {text = "MIT"}
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
dependencies = ["pyyaml>=6"]
|
|
9
|
+
|
|
10
|
+
[project.urls]
|
|
11
|
+
Repository = "https://github.com/srmdn/hermes-agent-kit"
|
|
12
|
+
Homepage = "https://github.com/srmdn/hermes-agent-kit#readme"
|
|
13
|
+
|
|
14
|
+
[dependency-groups]
|
|
15
|
+
dev = ["pytest>=8"]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
hermes-kit = "hermes_kit.cli:main"
|
|
19
|
+
|
|
20
|
+
[build-system]
|
|
21
|
+
requires = ["setuptools>=75"]
|
|
22
|
+
build-backend = "setuptools.build_meta"
|
|
23
|
+
|
|
24
|
+
[tool.pytest.ini_options]
|
|
25
|
+
testpaths = ["tests"]
|
|
26
|
+
addopts = "-v"
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.packages.find]
|
|
29
|
+
where = ["src"]
|
|
30
|
+
|
|
31
|
+
[tool.setuptools.package-data]
|
|
32
|
+
"hermes_kit.hooks" = ["*.yaml", "**/*.yaml"]
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hermes-agent-kit
|
|
3
|
+
Version: 0.2.2
|
|
4
|
+
Summary: Production hardening pack for Hermes Agent — per-topic model routing, fallback chains, rate limiting, and cost tracking via gateway hooks
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Repository, https://github.com/srmdn/hermes-agent-kit
|
|
7
|
+
Project-URL: Homepage, https://github.com/srmdn/hermes-agent-kit#readme
|
|
8
|
+
Requires-Python: >=3.11
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Requires-Dist: pyyaml>=6
|
|
12
|
+
Dynamic: license-file
|
|
13
|
+
|
|
14
|
+
# hermes-kit
|
|
15
|
+
|
|
16
|
+
Production hardening pack for [Hermes Agent](https://github.com/NousResearch/hermes-agent).
|
|
17
|
+
|
|
18
|
+
Self-hosted Hermes gateways are powerful but built for single-user setups. Multi-user deployments hit walls: no per-topic model routing, API failures surface as hard errors, one heavy user can burn your API budget with no alert.
|
|
19
|
+
|
|
20
|
+
hermes-kit fills these gaps with production-grade hooks.
|
|
21
|
+
|
|
22
|
+
> ⚠️ **How it works**: hermes-kit monkey-patches Hermes Agent's internal model resolver at runtime. This is intentionally fragile — Hermes Agent updates may break your setup. We're working on an upstream PR to replace the patch with native hook return values. Until then, test after every Hermes upgrade.
|
|
23
|
+
|
|
24
|
+
## Prerequisites
|
|
25
|
+
|
|
26
|
+
- Python ≥ 3.11
|
|
27
|
+
- [Hermes Agent](https://github.com/NousResearch/hermes-agent) installed
|
|
28
|
+
- A configured gateway (Telegram, Discord, etc.)
|
|
29
|
+
|
|
30
|
+
## Install
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install hermes-agent-kit
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Quickstart
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
# Install all hooks in one command
|
|
40
|
+
hermes-kit install router fallback rate-limiter cost-tracker
|
|
41
|
+
|
|
42
|
+
# Verify
|
|
43
|
+
hermes-kit doctor
|
|
44
|
+
|
|
45
|
+
# Start gateway with bridge auto-patched
|
|
46
|
+
hermes-kit gateway run --accept-hooks
|
|
47
|
+
|
|
48
|
+
# If new users get "I don't recognize you":
|
|
49
|
+
GATEWAY_ALLOW_ALL_USERS=true hermes-kit gateway run --accept-hooks
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Hooks land in `~/.hermes/hooks/<name>/`. Hermes discovers them on restart.
|
|
53
|
+
|
|
54
|
+
## Modules
|
|
55
|
+
|
|
56
|
+
### router — Per-Topic Model Routing
|
|
57
|
+
|
|
58
|
+
Route Telegram topics to different AI models. Finance chat uses Qwen, coding chat uses DeepSeek, everything else falls back to GPT-4o-mini.
|
|
59
|
+
|
|
60
|
+
**Via CLI:**
|
|
61
|
+
```bash
|
|
62
|
+
hermes-kit router set-default --model opencode-go/gpt-4o-mini
|
|
63
|
+
hermes-kit router add 42 --model opencode-go/deepseek-v4-pro
|
|
64
|
+
hermes-kit router show
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
**Via YAML** (`~/.hermes/hooks/router/topic_router.yaml`):
|
|
68
|
+
```yaml
|
|
69
|
+
default:
|
|
70
|
+
model: "opencode-go/gpt-4o-mini"
|
|
71
|
+
|
|
72
|
+
topics:
|
|
73
|
+
"42":
|
|
74
|
+
model: "opencode-go/deepseek-v4-pro"
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**Multi-provider** — route specific topics to native providers:
|
|
78
|
+
```bash
|
|
79
|
+
hermes-kit router add 42 --model gpt-4o --provider openai
|
|
80
|
+
hermes-kit router add 7 --model claude-sonnet-4-20250514 --provider anthropic
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
Hermes resolves API keys from `~/.hermes/.env` (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, etc.). See [providers guide](docs/providers.md) for all supported providers and model IDs.
|
|
84
|
+
|
|
85
|
+
### fallback — Automatic Fallback Chain
|
|
86
|
+
|
|
87
|
+
Define a chain of models to try when the primary fails.
|
|
88
|
+
|
|
89
|
+
**Via YAML** (`~/.hermes/hooks/fallback/fallback_chain.yaml`):
|
|
90
|
+
```yaml
|
|
91
|
+
chains:
|
|
92
|
+
global:
|
|
93
|
+
- "opencode-go/deepseek-v4-pro" # primary
|
|
94
|
+
- "opencode-go/claude-sonnet-4" # fallback
|
|
95
|
+
- "opencode-go/gpt-4o-mini" # last resort
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
After a failure, call `hermes_kit.bridge.retry_with_fallback(session_key)` to advance to the next model.
|
|
99
|
+
|
|
100
|
+
### rate-limiter — Per-User Rate Limiting
|
|
101
|
+
|
|
102
|
+
Prevent a single user or chat from draining your API budget.
|
|
103
|
+
|
|
104
|
+
**Via YAML** (`~/.hermes/hooks/rate-limiter/rate_limits.yaml`):
|
|
105
|
+
```yaml
|
|
106
|
+
limits:
|
|
107
|
+
global:
|
|
108
|
+
max_messages_per_window: 100
|
|
109
|
+
window_seconds: 3600
|
|
110
|
+
per_user:
|
|
111
|
+
"123456789":
|
|
112
|
+
max_messages_per_window: 50
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
> ⚠️ Rate limiter currently tracks usage but does not block messages. Enforcement is planned for an upcoming release.
|
|
116
|
+
|
|
117
|
+
### cost-tracker — Real-Time Cost Tracking
|
|
118
|
+
|
|
119
|
+
Track token costs per session and alert when thresholds are exceeded.
|
|
120
|
+
|
|
121
|
+
**Via YAML** (`~/.hermes/hooks/cost-tracker/cost_tracker.yaml`):
|
|
122
|
+
```yaml
|
|
123
|
+
alert_threshold_usd: 1.0
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Set to `0` to disable alerts but continue tracking.
|
|
127
|
+
|
|
128
|
+
## Docs
|
|
129
|
+
|
|
130
|
+
- [Quickstart](docs/quickstart.md) — agent-driven and manual install
|
|
131
|
+
- [Providers](docs/providers.md) — supported AI providers and model lists
|
|
132
|
+
- Manual setup per module:
|
|
133
|
+
- [Router](docs/manual/router.md) — per-topic model routing
|
|
134
|
+
- [Fallback](docs/manual/fallback.md) — automatic retry chains
|
|
135
|
+
- [Rate Limiter](docs/manual/rate-limiter.md) — per-user quotas
|
|
136
|
+
- [Cost Tracker](docs/manual/cost-tracker.md) — budget alerts
|
|
137
|
+
- [Troubleshooting](docs/troubleshooting.md) — common issues
|
|
138
|
+
|
|
139
|
+
## License
|
|
140
|
+
|
|
141
|
+
MIT
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
src/hermes_agent_kit.egg-info/PKG-INFO
|
|
5
|
+
src/hermes_agent_kit.egg-info/SOURCES.txt
|
|
6
|
+
src/hermes_agent_kit.egg-info/dependency_links.txt
|
|
7
|
+
src/hermes_agent_kit.egg-info/entry_points.txt
|
|
8
|
+
src/hermes_agent_kit.egg-info/requires.txt
|
|
9
|
+
src/hermes_agent_kit.egg-info/top_level.txt
|
|
10
|
+
src/hermes_kit/__init__.py
|
|
11
|
+
src/hermes_kit/bridge.py
|
|
12
|
+
src/hermes_kit/cli.py
|
|
13
|
+
src/hermes_kit/hooks/cost_tracker/HOOK.yaml
|
|
14
|
+
src/hermes_kit/hooks/cost_tracker/cost_tracker.yaml
|
|
15
|
+
src/hermes_kit/hooks/cost_tracker/handler.py
|
|
16
|
+
src/hermes_kit/hooks/fallback/HOOK.yaml
|
|
17
|
+
src/hermes_kit/hooks/fallback/fallback_chain.yaml
|
|
18
|
+
src/hermes_kit/hooks/fallback/handler.py
|
|
19
|
+
src/hermes_kit/hooks/rate_limiter/HOOK.yaml
|
|
20
|
+
src/hermes_kit/hooks/rate_limiter/handler.py
|
|
21
|
+
src/hermes_kit/hooks/rate_limiter/rate_limits.yaml
|
|
22
|
+
src/hermes_kit/hooks/router/HOOK.yaml
|
|
23
|
+
src/hermes_kit/hooks/router/handler.py
|
|
24
|
+
src/hermes_kit/hooks/router/topic_router.yaml
|
|
25
|
+
src/hermes_kit/lib/__init__.py
|
|
26
|
+
src/hermes_kit/lib/hermes_api.py
|
|
27
|
+
tests/test_cli.py
|
|
28
|
+
tests/test_cost_tracker.py
|
|
29
|
+
tests/test_fallback.py
|
|
30
|
+
tests/test_integration.py
|
|
31
|
+
tests/test_rate_limiter.py
|
|
32
|
+
tests/test_router.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
pyyaml>=6
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
hermes_kit
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""hermes-kit — Production hardening hooks for Hermes Agent."""
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
_model_overrides: dict[str, dict[str, Optional[str]]] = {}
|
|
5
|
+
_fallback_chains: dict[str, list[str]] = {}
|
|
6
|
+
_fallback_index: dict[str, int] = {}
|
|
7
|
+
|
|
8
|
+
_rate_counters: dict[str, int] = {}
|
|
9
|
+
_rate_windows: dict[str, float] = {}
|
|
10
|
+
_rate_limited: set[str] = set()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def set_override(session_key: str, model: str, provider: Optional[str] = None) -> None:
|
|
14
|
+
_model_overrides[session_key] = {
|
|
15
|
+
"model": model,
|
|
16
|
+
"provider": provider,
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def get_override(session_key: str) -> Optional[dict]:
|
|
21
|
+
return _model_overrides.get(session_key)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def clear_override(session_key: str) -> None:
|
|
25
|
+
_model_overrides.pop(session_key, None)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def set_fallback_chain(session_key: str, chain: list[str]) -> None:
|
|
29
|
+
_fallback_chains[session_key] = chain
|
|
30
|
+
_fallback_index[session_key] = 0
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_fallback_chain(session_key: str) -> list[str] | None:
|
|
34
|
+
return _fallback_chains.get(session_key)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def advance_fallback(session_key: str) -> None:
|
|
38
|
+
if session_key in _fallback_index:
|
|
39
|
+
_fallback_index[session_key] += 1
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_current_fallback(session_key: str) -> str | None:
|
|
43
|
+
chain = _fallback_chains.get(session_key)
|
|
44
|
+
if not chain:
|
|
45
|
+
return None
|
|
46
|
+
idx = _fallback_index.get(session_key, 0)
|
|
47
|
+
if idx < len(chain):
|
|
48
|
+
return chain[idx]
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def retry_with_fallback(session_key: str) -> str | None:
|
|
53
|
+
advance_fallback(session_key)
|
|
54
|
+
model = get_current_fallback(session_key)
|
|
55
|
+
if model:
|
|
56
|
+
set_override(session_key, model=model)
|
|
57
|
+
return model
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def reset_rate_counter(session_key: str) -> None:
|
|
61
|
+
_rate_counters[session_key] = 0
|
|
62
|
+
_rate_windows[session_key] = time.time()
|
|
63
|
+
_rate_limited.discard(session_key)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def increment_rate_counter(session_key: str) -> int:
|
|
67
|
+
_rate_counters[session_key] = _rate_counters.get(session_key, 0) + 1
|
|
68
|
+
if session_key not in _rate_windows:
|
|
69
|
+
_rate_windows[session_key] = time.time()
|
|
70
|
+
return _rate_counters[session_key]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_rate_window_start(session_key: str) -> float:
|
|
74
|
+
return _rate_windows.get(session_key, 0.0)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def set_rate_limited(session_key: str) -> None:
|
|
78
|
+
_rate_limited.add(session_key)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_rate_limited(session_key: str) -> bool:
|
|
82
|
+
return session_key in _rate_limited
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
_session_costs: dict[str, dict[str, float]] = {}
|
|
86
|
+
_cost_pricing: dict[str, tuple[float, float]] = {
|
|
87
|
+
"gpt-4o": (2.50, 10.00),
|
|
88
|
+
"gpt-4o-mini": (0.15, 0.60),
|
|
89
|
+
"claude-sonnet-4": (3.00, 15.00),
|
|
90
|
+
"deepseek-chat": (0.14, 0.28),
|
|
91
|
+
"qwen-3.6-plus": (0.40, 0.80),
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def track_cost(session_key: str, model: str, prompt_tokens: int, completion_tokens: int) -> None:
|
|
96
|
+
if session_key not in _session_costs:
|
|
97
|
+
_session_costs[session_key] = {}
|
|
98
|
+
if model not in _session_costs[session_key]:
|
|
99
|
+
_session_costs[session_key][model] = 0.0
|
|
100
|
+
|
|
101
|
+
input_price, output_price = _cost_pricing.get(model, (0.0, 0.0))
|
|
102
|
+
cost = (prompt_tokens / 1_000_000) * input_price + (completion_tokens / 1_000_000) * output_price
|
|
103
|
+
_session_costs[session_key][model] += cost
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def get_session_cost(session_key: str) -> float:
|
|
107
|
+
if session_key not in _session_costs:
|
|
108
|
+
return 0.0
|
|
109
|
+
return sum(_session_costs[session_key].values())
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def get_session_cost_breakdown(session_key: str) -> dict[str, float]:
|
|
113
|
+
return _session_costs.get(session_key, {})
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def reset_session_cost(session_key: str) -> None:
|
|
117
|
+
_session_costs.pop(session_key, None)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def alert_cost_exceeded(session_key: str, total: float, threshold: float) -> None:
|
|
121
|
+
print(f"[hermes-kit] COST ALERT: session {session_key} total ${total:.4f} exceeds threshold ${threshold:.2f}")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
_RUNTIME_KEYS = ("provider", "api_key", "base_url", "api_mode")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _apply_override(override: dict, model: str, runtime_kwargs: dict) -> tuple[str, dict]:
|
|
128
|
+
model = override.get("model", model)
|
|
129
|
+
for key in _RUNTIME_KEYS:
|
|
130
|
+
val = override.get(key)
|
|
131
|
+
if val is not None:
|
|
132
|
+
runtime_kwargs[key] = val
|
|
133
|
+
return model, runtime_kwargs
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def patch_gateway_resolver() -> None:
|
|
137
|
+
import inspect
|
|
138
|
+
from gateway.run import GatewayRunner
|
|
139
|
+
|
|
140
|
+
original = GatewayRunner._resolve_session_agent_runtime
|
|
141
|
+
original_is_async = inspect.iscoroutinefunction(original)
|
|
142
|
+
|
|
143
|
+
if original_is_async:
|
|
144
|
+
async def patched_resolver(self, *args, **kwargs):
|
|
145
|
+
model, runtime_kwargs = await original(self, *args, **kwargs)
|
|
146
|
+
session_key = kwargs.get("session_key") or (args[0] if args else None)
|
|
147
|
+
if session_key:
|
|
148
|
+
override = get_override(session_key)
|
|
149
|
+
if override:
|
|
150
|
+
model, runtime_kwargs = _apply_override(override, model, runtime_kwargs)
|
|
151
|
+
return model, runtime_kwargs
|
|
152
|
+
else:
|
|
153
|
+
def patched_resolver(self, *args, **kwargs):
|
|
154
|
+
model, runtime_kwargs = original(self, *args, **kwargs)
|
|
155
|
+
session_key = kwargs.get("session_key") or (args[0] if args else None)
|
|
156
|
+
if session_key:
|
|
157
|
+
override = get_override(session_key)
|
|
158
|
+
if override:
|
|
159
|
+
model, runtime_kwargs = _apply_override(override, model, runtime_kwargs)
|
|
160
|
+
return model, runtime_kwargs
|
|
161
|
+
|
|
162
|
+
GatewayRunner._resolve_session_agent_runtime = patched_resolver
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
try:
|
|
166
|
+
patch_gateway_resolver()
|
|
167
|
+
except ImportError:
|
|
168
|
+
pass
|