haon-agent 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- haon_agent-0.2.0/PKG-INFO +145 -0
- haon_agent-0.2.0/README.md +106 -0
- haon_agent-0.2.0/haon_agent/__init__.py +3 -0
- haon_agent-0.2.0/haon_agent/__main__.py +6 -0
- haon_agent-0.2.0/haon_agent/agent.py +233 -0
- haon_agent-0.2.0/haon_agent/api_client.py +182 -0
- haon_agent-0.2.0/haon_agent/broker_client.py +150 -0
- haon_agent-0.2.0/haon_agent/cli.py +250 -0
- haon_agent-0.2.0/haon_agent/config.py +134 -0
- haon_agent-0.2.0/haon_agent/credentials.py +102 -0
- haon_agent-0.2.0/haon_agent/hardware.py +192 -0
- haon_agent-0.2.0/haon_agent/pair.py +343 -0
- haon_agent-0.2.0/haon_agent/runtime/__init__.py +31 -0
- haon_agent-0.2.0/haon_agent/runtime/base.py +42 -0
- haon_agent-0.2.0/haon_agent/runtime/comfyui.py +33 -0
- haon_agent-0.2.0/haon_agent/runtime/custom_http.py +36 -0
- haon_agent-0.2.0/haon_agent/runtime/docker.py +197 -0
- haon_agent-0.2.0/haon_agent/runtime/docker_client.py +275 -0
- haon_agent-0.2.0/haon_agent/runtime/echo.py +37 -0
- haon_agent-0.2.0/haon_agent/runtime/ollama.py +42 -0
- haon_agent-0.2.0/haon_agent/runtime/sandbox.py +109 -0
- haon_agent-0.2.0/haon_agent/tick_emitter.py +125 -0
- haon_agent-0.2.0/haon_agent/usage.py +193 -0
- haon_agent-0.2.0/haon_agent.egg-info/PKG-INFO +145 -0
- haon_agent-0.2.0/haon_agent.egg-info/SOURCES.txt +29 -0
- haon_agent-0.2.0/haon_agent.egg-info/dependency_links.txt +1 -0
- haon_agent-0.2.0/haon_agent.egg-info/entry_points.txt +2 -0
- haon_agent-0.2.0/haon_agent.egg-info/requires.txt +12 -0
- haon_agent-0.2.0/haon_agent.egg-info/top_level.txt +1 -0
- haon_agent-0.2.0/pyproject.toml +84 -0
- haon_agent-0.2.0/setup.cfg +4 -0
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: haon-agent
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: HAON PowerHub — miner agent: rent out idle GPU time on a decentralized compute marketplace.
|
|
5
|
+
Author-email: HAON <alpha@haon.run>
|
|
6
|
+
Maintainer-email: HAON <alpha@haon.run>
|
|
7
|
+
License: MIT
|
|
8
|
+
Project-URL: Homepage, https://staging.haon.run
|
|
9
|
+
Project-URL: Documentation, https://staging.haon.run/docs/miner-install-quickstart.md
|
|
10
|
+
Project-URL: Issues, https://github.com/caiorlm/HAON-PowerHub-Marketplace/issues
|
|
11
|
+
Project-URL: Source, https://github.com/caiorlm/HAON-PowerHub-Marketplace/tree/main/apps/miner-agent
|
|
12
|
+
Keywords: gpu,marketplace,distributed-computing,llm,ollama,comfyui,ai-infrastructure,miner-agent
|
|
13
|
+
Classifier: Development Status :: 3 - Alpha
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Intended Audience :: System Administrators
|
|
16
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
17
|
+
Classifier: Operating System :: MacOS
|
|
18
|
+
Classifier: Operating System :: Microsoft :: Windows
|
|
19
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
20
|
+
Classifier: Programming Language :: Python :: 3
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
26
|
+
Requires-Python: >=3.11
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
Requires-Dist: click<9.0,>=8.1
|
|
29
|
+
Requires-Dist: httpx<0.28,>=0.27
|
|
30
|
+
Requires-Dist: websockets<14.0,>=13.0
|
|
31
|
+
Requires-Dist: psutil<7.0,>=6.0
|
|
32
|
+
Requires-Dist: keyring<26.0,>=25.0
|
|
33
|
+
Requires-Dist: structlog<25.0,>=24.0
|
|
34
|
+
Requires-Dist: pydantic<3.0,>=2.9
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-httpx>=0.30; extra == "dev"
|
|
39
|
+
|
|
40
|
+
# haon-agent
|
|
41
|
+
|
|
42
|
+
The miner-side agent for the **HAON PowerHub** distributed GPU marketplace.
|
|
43
|
+
Run this on a machine with a GPU, register with the marketplace, and
|
|
44
|
+
earn credit every time a worker rents your compute time.
|
|
45
|
+
|
|
46
|
+
- **Website**: <https://staging.haon.run>
|
|
47
|
+
- **Quickstart**: <https://staging.haon.run/docs/miner-install-quickstart.md>
|
|
48
|
+
- **Supported OS**: macOS, Linux, Windows 10+ (Python 3.11+)
|
|
49
|
+
|
|
50
|
+
---
|
|
51
|
+
|
|
52
|
+
## One-command install
|
|
53
|
+
|
|
54
|
+
### macOS / Linux
|
|
55
|
+
```bash
|
|
56
|
+
curl -sSL https://staging.haon.run/install-miner.sh | bash
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Windows (PowerShell)
|
|
60
|
+
```powershell
|
|
61
|
+
iwr -useb https://staging.haon.run/install-miner.ps1 | iex
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Either one-liner installs this package into a per-user venv at
|
|
65
|
+
`~/.haon/venv` (no admin required), exposes the `haon-agent` CLI, and
|
|
66
|
+
launches the pairing wizard.
|
|
67
|
+
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Manual install (prefer the one-liner for non-developers)
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
pip install haon-agent
|
|
74
|
+
haon-agent pair # email + password → marketplace registration
|
|
75
|
+
haon-agent run # starts the agent, heartbeats begin
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Configuration lands in `~/.haon/agent.toml`; the API key is stored in
|
|
79
|
+
your OS keyring (with a 0600 file fallback).
|
|
80
|
+
|
|
81
|
+
---
|
|
82
|
+
|
|
83
|
+
## What the agent does
|
|
84
|
+
|
|
85
|
+
- Probes your hardware (CPU / RAM / GPU via `nvidia-smi`).
|
|
86
|
+
- Registers the machine with the HAON API and mints a scoped API key.
|
|
87
|
+
- Holds a WebSocket to the HAON broker (outbound, NAT-friendly).
|
|
88
|
+
- When a worker opens a session against your machine, the agent pairs
|
|
89
|
+
the tunnel + starts the requested runtime container (Ollama, ComfyUI,
|
|
90
|
+
custom HTTP).
|
|
91
|
+
- Emits usage ticks every 10 seconds; the server turns ticks into
|
|
92
|
+
earnings in your wallet.
|
|
93
|
+
|
|
94
|
+
Nothing the agent does touches ports on your side — all traffic is
|
|
95
|
+
outbound WebSocket over TLS to `broker.staging.haon.run`. You can run it
|
|
96
|
+
behind NAT, corporate firewalls, or whatever.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## Supported runtimes (alpha)
|
|
101
|
+
|
|
102
|
+
| Runtime | Status | Notes |
|
|
103
|
+
|---|---|---|
|
|
104
|
+
| `echo` | ✅ Works everywhere | CPU-only smoke test |
|
|
105
|
+
| `ollama` | 🚧 Docker-based (today) / native (next) | LLM inference |
|
|
106
|
+
| `comfyui` | 🚧 Docker-based | Image / video diffusion |
|
|
107
|
+
| `custom_http` | 🚧 Docker-based | Bring your own HTTP server |
|
|
108
|
+
|
|
109
|
+
Docker-based runtimes require Docker Desktop (Windows/macOS) or
|
|
110
|
+
Docker Engine (Linux) + the NVIDIA Container Toolkit if you want GPU
|
|
111
|
+
passthrough. The pairing wizard does not yet auto-install Docker — do
|
|
112
|
+
that step yourself before running compute-heavy runtimes.
|
|
113
|
+
|
|
114
|
+
---
|
|
115
|
+
|
|
116
|
+
## Uninstall
|
|
117
|
+
|
|
118
|
+
Everything the installer writes lives under `~/.haon`:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
haon-agent logout # clears credentials from the keyring
|
|
122
|
+
rm -rf ~/.haon # removes the venv + config
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
No registry edits on Windows, no systemd unit on Linux.
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
## Security + privacy posture
|
|
130
|
+
|
|
131
|
+
- All API traffic is TLS 1.2+. Self-signed certs are rejected.
|
|
132
|
+
- Refresh tokens are rotated on every use (single-use).
|
|
133
|
+
- The agent never executes arbitrary code from workers — runtimes are
|
|
134
|
+
explicit runtime containers you opt into.
|
|
135
|
+
- No telemetry beyond the heartbeats + usage ticks that the marketplace
|
|
136
|
+
needs for billing. Source is auditable — this package is published
|
|
137
|
+
from the same git history as the server.
|
|
138
|
+
|
|
139
|
+
Report security issues to `alpha@haon.run`.
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT. See `LICENSE` in the source repository.
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# haon-agent
|
|
2
|
+
|
|
3
|
+
The miner-side agent for the **HAON PowerHub** distributed GPU marketplace.
|
|
4
|
+
Run this on a machine with a GPU, register with the marketplace, and
|
|
5
|
+
earn credit every time a worker rents your compute time.
|
|
6
|
+
|
|
7
|
+
- **Website**: <https://staging.haon.run>
|
|
8
|
+
- **Quickstart**: <https://staging.haon.run/docs/miner-install-quickstart.md>
|
|
9
|
+
- **Supported OS**: macOS, Linux, Windows 10+ (Python 3.11+)
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## One-command install
|
|
14
|
+
|
|
15
|
+
### macOS / Linux
|
|
16
|
+
```bash
|
|
17
|
+
curl -sSL https://staging.haon.run/install-miner.sh | bash
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
### Windows (PowerShell)
|
|
21
|
+
```powershell
|
|
22
|
+
iwr -useb https://staging.haon.run/install-miner.ps1 | iex
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Either one-liner installs this package into a per-user venv at
|
|
26
|
+
`~/.haon/venv` (no admin required), exposes the `haon-agent` CLI, and
|
|
27
|
+
launches the pairing wizard.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Manual install (prefer the one-liner for non-developers)
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install haon-agent
|
|
35
|
+
haon-agent pair # email + password → marketplace registration
|
|
36
|
+
haon-agent run # starts the agent, heartbeats begin
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Configuration lands in `~/.haon/agent.toml`; the API key is stored in
|
|
40
|
+
your OS keyring (with a 0600 file fallback).
|
|
41
|
+
|
|
42
|
+
---
|
|
43
|
+
|
|
44
|
+
## What the agent does
|
|
45
|
+
|
|
46
|
+
- Probes your hardware (CPU / RAM / GPU via `nvidia-smi`).
|
|
47
|
+
- Registers the machine with the HAON API and mints a scoped API key.
|
|
48
|
+
- Holds a WebSocket to the HAON broker (outbound, NAT-friendly).
|
|
49
|
+
- When a worker opens a session against your machine, the agent pairs
|
|
50
|
+
the tunnel + starts the requested runtime container (Ollama, ComfyUI,
|
|
51
|
+
custom HTTP).
|
|
52
|
+
- Emits usage ticks every 10 seconds; the server turns ticks into
|
|
53
|
+
earnings in your wallet.
|
|
54
|
+
|
|
55
|
+
Nothing the agent does touches ports on your side — all traffic is
|
|
56
|
+
outbound WebSocket over TLS to `broker.staging.haon.run`. You can run it
|
|
57
|
+
behind NAT, corporate firewalls, or whatever.
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Supported runtimes (alpha)
|
|
62
|
+
|
|
63
|
+
| Runtime | Status | Notes |
|
|
64
|
+
|---|---|---|
|
|
65
|
+
| `echo` | ✅ Works everywhere | CPU-only smoke test |
|
|
66
|
+
| `ollama` | 🚧 Docker-based (today) / native (next) | LLM inference |
|
|
67
|
+
| `comfyui` | 🚧 Docker-based | Image / video diffusion |
|
|
68
|
+
| `custom_http` | 🚧 Docker-based | Bring your own HTTP server |
|
|
69
|
+
|
|
70
|
+
Docker-based runtimes require Docker Desktop (Windows/macOS) or
|
|
71
|
+
Docker Engine (Linux) + the NVIDIA Container Toolkit if you want GPU
|
|
72
|
+
passthrough. The pairing wizard does not yet auto-install Docker — do
|
|
73
|
+
that step yourself before running compute-heavy runtimes.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## Uninstall
|
|
78
|
+
|
|
79
|
+
Everything the installer writes lives under `~/.haon`:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
haon-agent logout # clears credentials from the keyring
|
|
83
|
+
rm -rf ~/.haon # removes the venv + config
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
No registry edits on Windows, no systemd unit on Linux.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Security + privacy posture
|
|
91
|
+
|
|
92
|
+
- All API traffic is TLS 1.2+. Self-signed certs are rejected.
|
|
93
|
+
- Refresh tokens are rotated on every use (single-use).
|
|
94
|
+
- The agent never executes arbitrary code from workers — runtimes are
|
|
95
|
+
explicit runtime containers you opt into.
|
|
96
|
+
- No telemetry beyond the heartbeats + usage ticks that the marketplace
|
|
97
|
+
needs for billing. Source is auditable — this package is published
|
|
98
|
+
from the same git history as the server.
|
|
99
|
+
|
|
100
|
+
Report security issues to `alpha@haon.run`.
|
|
101
|
+
|
|
102
|
+
---
|
|
103
|
+
|
|
104
|
+
## License
|
|
105
|
+
|
|
106
|
+
MIT. See `LICENSE` in the source repository.
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
"""The agent's main orchestrator loop.
|
|
2
|
+
|
|
3
|
+
heartbeat (every N seconds)
|
|
4
|
+
│
|
|
5
|
+
├── if heartbeat.session_accepted_ids non-empty:
|
|
6
|
+
│ for each session_id:
|
|
7
|
+
│ accept_session(session_id)
|
|
8
|
+
│
|
|
9
|
+
└── sleep(heartbeat_interval)
|
|
10
|
+
|
|
11
|
+
accept_session(session_id):
|
|
12
|
+
call POST /v1/sessions/{id}/start → handoff
|
|
13
|
+
connect to broker with handoff.tunnel_token
|
|
14
|
+
wait for PAIRED
|
|
15
|
+
spawn runtime
|
|
16
|
+
concurrently:
|
|
17
|
+
- forward worker DATA frames to runtime.push
|
|
18
|
+
- forward runtime output to broker.send_data
|
|
19
|
+
on CLOSE / UNPAIRED / runtime stop: tear everything down, POST /close.
|
|
20
|
+
|
|
21
|
+
Recovery:
|
|
22
|
+
If heartbeat fails we keep retrying with exponential backoff. If broker
|
|
23
|
+
disconnects mid-session we close the session via API. No silent failures.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import asyncio
|
|
29
|
+
import contextlib
|
|
30
|
+
from dataclasses import dataclass
|
|
31
|
+
from typing import Awaitable, Callable
|
|
32
|
+
|
|
33
|
+
import structlog
|
|
34
|
+
|
|
35
|
+
from haon_agent.api_client import ApiClient, ApiError, SessionHandoff
|
|
36
|
+
from haon_agent.broker_client import BrokerClient, FrameType
|
|
37
|
+
from haon_agent.config import AgentConfig
|
|
38
|
+
from haon_agent.runtime import get_runtime
|
|
39
|
+
from haon_agent.runtime.base import Runtime
|
|
40
|
+
|
|
41
|
+
log = structlog.get_logger("agent")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class AgentDeps:
|
|
46
|
+
api: ApiClient
|
|
47
|
+
config: AgentConfig
|
|
48
|
+
# Factory so tests can inject a fake BrokerClient.
|
|
49
|
+
broker_factory: Callable[[str, str], BrokerClient] = lambda url, token: BrokerClient(url, token)
|
|
50
|
+
# Runtime resolver (defaults to the module-level registry).
|
|
51
|
+
runtime_factory: Callable[[str], Runtime] = get_runtime
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Agent:
|
|
55
|
+
def __init__(self, deps: AgentDeps) -> None:
|
|
56
|
+
self._deps = deps
|
|
57
|
+
self._running = False
|
|
58
|
+
self._active_sessions: dict[str, asyncio.Task[None]] = {}
|
|
59
|
+
self._stop_event = asyncio.Event()
|
|
60
|
+
|
|
61
|
+
async def run(self) -> None:
|
|
62
|
+
self._running = True
|
|
63
|
+
log.info("agent_starting", api_url=self._deps.config.api_url)
|
|
64
|
+
try:
|
|
65
|
+
await self._main_loop()
|
|
66
|
+
finally:
|
|
67
|
+
self._running = False
|
|
68
|
+
# Cancel any active session tasks.
|
|
69
|
+
for sid, task in list(self._active_sessions.items()):
|
|
70
|
+
log.info("session_task_cancelling", session_id=sid)
|
|
71
|
+
task.cancel()
|
|
72
|
+
for task in list(self._active_sessions.values()):
|
|
73
|
+
with contextlib.suppress(asyncio.CancelledError):
|
|
74
|
+
await task
|
|
75
|
+
log.info("agent_stopped")
|
|
76
|
+
|
|
77
|
+
def request_stop(self) -> None:
|
|
78
|
+
self._stop_event.set()
|
|
79
|
+
|
|
80
|
+
async def _main_loop(self) -> None:
|
|
81
|
+
cfg = self._deps.config
|
|
82
|
+
if not cfg.machine_id:
|
|
83
|
+
raise RuntimeError("machine_id not configured; run `haon-agent register` first")
|
|
84
|
+
|
|
85
|
+
backoff = 1.0
|
|
86
|
+
while not self._stop_event.is_set():
|
|
87
|
+
try:
|
|
88
|
+
resp = await self._deps.api.heartbeat(
|
|
89
|
+
cfg.machine_id,
|
|
90
|
+
runtime_ready=True,
|
|
91
|
+
pending_sessions=len(self._active_sessions),
|
|
92
|
+
)
|
|
93
|
+
backoff = 1.0 # reset on success
|
|
94
|
+
offered = resp.get("session_accepted_ids") or []
|
|
95
|
+
for sid in offered:
|
|
96
|
+
if sid in self._active_sessions:
|
|
97
|
+
continue
|
|
98
|
+
log.info("session_offered_received", session_id=sid)
|
|
99
|
+
task = asyncio.create_task(self._run_session(sid))
|
|
100
|
+
self._active_sessions[sid] = task
|
|
101
|
+
# Purge completed tasks.
|
|
102
|
+
for done_sid in [
|
|
103
|
+
s for s, t in self._active_sessions.items() if t.done()
|
|
104
|
+
]:
|
|
105
|
+
self._active_sessions.pop(done_sid, None)
|
|
106
|
+
except ApiError as e:
|
|
107
|
+
log.warning(
|
|
108
|
+
"heartbeat_failed",
|
|
109
|
+
code=e.code,
|
|
110
|
+
status_code=e.status_code,
|
|
111
|
+
)
|
|
112
|
+
except Exception as e: # noqa: BLE001
|
|
113
|
+
log.exception("heartbeat_unhandled_error", error=str(e))
|
|
114
|
+
backoff = min(backoff * 2, 30.0)
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
await asyncio.wait_for(
|
|
118
|
+
self._stop_event.wait(),
|
|
119
|
+
timeout=cfg.heartbeat_interval_seconds if backoff <= 1.0 else backoff,
|
|
120
|
+
)
|
|
121
|
+
except asyncio.TimeoutError:
|
|
122
|
+
pass
|
|
123
|
+
|
|
124
|
+
# ── Per-session pipeline ────────────────────────────────────────────
|
|
125
|
+
|
|
126
|
+
async def _run_session(self, session_id: str) -> None:
|
|
127
|
+
try:
|
|
128
|
+
handoff = await self._deps.api.start_session(session_id)
|
|
129
|
+
except ApiError as e:
|
|
130
|
+
log.warning(
|
|
131
|
+
"session_start_rejected",
|
|
132
|
+
session_id=session_id,
|
|
133
|
+
code=e.code,
|
|
134
|
+
)
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
runtime_name = handoff.runtime or self._deps.config.default_runtime
|
|
138
|
+
try:
|
|
139
|
+
runtime = self._deps.runtime_factory(runtime_name)
|
|
140
|
+
except KeyError:
|
|
141
|
+
log.warning(
|
|
142
|
+
"runtime_unsupported",
|
|
143
|
+
session_id=session_id,
|
|
144
|
+
runtime=runtime_name,
|
|
145
|
+
)
|
|
146
|
+
await self._safe_close(session_id, reason="miner_closed")
|
|
147
|
+
return
|
|
148
|
+
|
|
149
|
+
broker = self._deps.broker_factory(
|
|
150
|
+
handoff.tunnel_broker_url, handoff.tunnel_token
|
|
151
|
+
)
|
|
152
|
+
close_reason = "miner_closed"
|
|
153
|
+
tick_emitter = None
|
|
154
|
+
try:
|
|
155
|
+
await broker.connect()
|
|
156
|
+
# Wait for PAIRED — the worker-client opens the other side of the tunnel.
|
|
157
|
+
paired_task = asyncio.create_task(broker.await_paired(timeout=15))
|
|
158
|
+
|
|
159
|
+
async def on_runtime_output(data: bytes) -> None:
|
|
160
|
+
await broker.send_data(data)
|
|
161
|
+
|
|
162
|
+
await runtime.start(on_runtime_output)
|
|
163
|
+
await paired_task
|
|
164
|
+
|
|
165
|
+
log.info("session_paired_and_running", session_id=session_id)
|
|
166
|
+
|
|
167
|
+
# Phase 12: start the tick emitter for this session.
|
|
168
|
+
from haon_agent.tick_emitter import TickEmitter
|
|
169
|
+
|
|
170
|
+
container_name = None
|
|
171
|
+
handle = getattr(runtime, "_handle", None)
|
|
172
|
+
if handle is not None:
|
|
173
|
+
container_name = getattr(handle, "container_name", None)
|
|
174
|
+
tick_emitter = TickEmitter(
|
|
175
|
+
api=self._deps.api,
|
|
176
|
+
session_id=session_id,
|
|
177
|
+
broker=broker,
|
|
178
|
+
container_name=container_name,
|
|
179
|
+
)
|
|
180
|
+
tick_emitter.start()
|
|
181
|
+
|
|
182
|
+
async for ev in broker.events():
|
|
183
|
+
if ev.type is FrameType.DATA:
|
|
184
|
+
await runtime.push(ev.payload)
|
|
185
|
+
elif ev.type is FrameType.UNPAIRED:
|
|
186
|
+
log.warning("session_unpaired", session_id=session_id)
|
|
187
|
+
# Worker dropped — Phase 10 treats it as session end.
|
|
188
|
+
# A reconnection-grace window can land later.
|
|
189
|
+
break
|
|
190
|
+
elif ev.type is FrameType.CLOSE:
|
|
191
|
+
log.info(
|
|
192
|
+
"session_broker_closed",
|
|
193
|
+
session_id=session_id,
|
|
194
|
+
payload=ev.json(),
|
|
195
|
+
)
|
|
196
|
+
break
|
|
197
|
+
elif ev.type is FrameType.ERROR:
|
|
198
|
+
log.error(
|
|
199
|
+
"session_broker_error",
|
|
200
|
+
session_id=session_id,
|
|
201
|
+
payload=ev.json(),
|
|
202
|
+
)
|
|
203
|
+
break
|
|
204
|
+
except Exception as e: # noqa: BLE001
|
|
205
|
+
log.exception(
|
|
206
|
+
"session_pipeline_error", session_id=session_id, error=str(e)
|
|
207
|
+
)
|
|
208
|
+
close_reason = "miner_closed"
|
|
209
|
+
finally:
|
|
210
|
+
if tick_emitter is not None:
|
|
211
|
+
try:
|
|
212
|
+
await tick_emitter.stop()
|
|
213
|
+
except Exception: # noqa: BLE001
|
|
214
|
+
pass
|
|
215
|
+
try:
|
|
216
|
+
await runtime.stop()
|
|
217
|
+
except Exception: # noqa: BLE001
|
|
218
|
+
pass
|
|
219
|
+
await broker.close()
|
|
220
|
+
await self._safe_close(session_id, reason=close_reason)
|
|
221
|
+
self._active_sessions.pop(session_id, None)
|
|
222
|
+
|
|
223
|
+
async def _safe_close(self, session_id: str, *, reason: str) -> None:
|
|
224
|
+
try:
|
|
225
|
+
await self._deps.api.close_session(session_id, reason=reason)
|
|
226
|
+
except ApiError as e:
|
|
227
|
+
log.warning(
|
|
228
|
+
"session_close_failed", session_id=session_id, code=e.code
|
|
229
|
+
)
|
|
230
|
+
except Exception as e: # noqa: BLE001
|
|
231
|
+
log.warning(
|
|
232
|
+
"session_close_unhandled", session_id=session_id, error=str(e)
|
|
233
|
+
)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Thin HAON API client used by the agent.
|
|
2
|
+
|
|
3
|
+
One class, one responsibility per method. In tests we swap `httpx`'s transport
|
|
4
|
+
for an `ASGITransport` pointed at an in-process FastAPI app — no network, no
|
|
5
|
+
mocks.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ApiError(Exception):
|
|
17
|
+
def __init__(self, status_code: int, code: str, message: str, details: dict | None = None) -> None:
|
|
18
|
+
super().__init__(f"{code}: {message}")
|
|
19
|
+
self.status_code = status_code
|
|
20
|
+
self.code = code
|
|
21
|
+
self.message = message
|
|
22
|
+
self.details = details or {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _raise_for_error(r: httpx.Response) -> None:
|
|
26
|
+
if r.is_success:
|
|
27
|
+
return
|
|
28
|
+
try:
|
|
29
|
+
body = r.json()
|
|
30
|
+
err = body.get("error", {})
|
|
31
|
+
code = err.get("code", "UNKNOWN")
|
|
32
|
+
message = err.get("message", r.text)
|
|
33
|
+
details = err.get("details", {})
|
|
34
|
+
except Exception: # noqa: BLE001
|
|
35
|
+
code = "HTTP_ERROR"
|
|
36
|
+
message = r.text
|
|
37
|
+
details = {}
|
|
38
|
+
raise ApiError(r.status_code, code, message, details)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class SessionHandoff:
|
|
43
|
+
"""What the agent needs to actually run a session."""
|
|
44
|
+
session_id: str
|
|
45
|
+
worker_id: str
|
|
46
|
+
machine_id: str
|
|
47
|
+
runtime: str
|
|
48
|
+
tunnel_broker_url: str
|
|
49
|
+
tunnel_token: str
|
|
50
|
+
expires_at: str
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ApiClient:
|
|
54
|
+
"""Token-auth HTTP client for the HAON API."""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
base_url: str,
|
|
59
|
+
api_key: str,
|
|
60
|
+
*,
|
|
61
|
+
transport: httpx.AsyncBaseTransport | None = None,
|
|
62
|
+
timeout: float = 10.0,
|
|
63
|
+
) -> None:
|
|
64
|
+
self._client = httpx.AsyncClient(
|
|
65
|
+
base_url=base_url.rstrip("/"),
|
|
66
|
+
transport=transport,
|
|
67
|
+
timeout=timeout,
|
|
68
|
+
headers={"X-API-Key": api_key, "User-Agent": "haon-agent/0.2"},
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
async def aclose(self) -> None:
|
|
72
|
+
await self._client.aclose()
|
|
73
|
+
|
|
74
|
+
async def __aenter__(self) -> "ApiClient":
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
async def __aexit__(self, *_exc: Any) -> None:
|
|
78
|
+
await self._client.aclose()
|
|
79
|
+
|
|
80
|
+
# ── Machines ────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
async def register_machine(
|
|
83
|
+
self,
|
|
84
|
+
*,
|
|
85
|
+
hub_id: str,
|
|
86
|
+
name: str,
|
|
87
|
+
cpu_model: str,
|
|
88
|
+
cpu_cores: int,
|
|
89
|
+
ram_gb: int,
|
|
90
|
+
gpu_model: str | None,
|
|
91
|
+
gpu_vram_gb: int | None,
|
|
92
|
+
gpu_count: int,
|
|
93
|
+
supported_runtimes: list[str],
|
|
94
|
+
hardware_fingerprint: str,
|
|
95
|
+
hardware_signature: str | None = None,
|
|
96
|
+
) -> dict:
|
|
97
|
+
payload = {
|
|
98
|
+
"hub_id": hub_id,
|
|
99
|
+
"name": name,
|
|
100
|
+
"cpu_model": cpu_model,
|
|
101
|
+
"cpu_cores": cpu_cores,
|
|
102
|
+
"ram_gb": ram_gb,
|
|
103
|
+
"gpu_count": gpu_count,
|
|
104
|
+
"supported_runtimes": supported_runtimes,
|
|
105
|
+
"hardware_fingerprint": hardware_fingerprint,
|
|
106
|
+
}
|
|
107
|
+
if gpu_model is not None:
|
|
108
|
+
payload["gpu_model"] = gpu_model
|
|
109
|
+
if gpu_vram_gb is not None:
|
|
110
|
+
payload["gpu_vram_gb"] = gpu_vram_gb
|
|
111
|
+
if hardware_signature is not None:
|
|
112
|
+
payload["hardware_signature"] = hardware_signature
|
|
113
|
+
|
|
114
|
+
# Agent needs a user JWT (not API key) to register a machine. In Phase 10
|
|
115
|
+
# registration happens via `haon-agent register` which prompts for
|
|
116
|
+
# email+password → JWT → register. The runtime agent uses API keys.
|
|
117
|
+
# For now the method exists as a convenience; the CLI uses the login flow.
|
|
118
|
+
r = await self._client.post("/v1/machines", json=payload)
|
|
119
|
+
_raise_for_error(r)
|
|
120
|
+
return r.json()
|
|
121
|
+
|
|
122
|
+
async def heartbeat(
|
|
123
|
+
self,
|
|
124
|
+
machine_id: str,
|
|
125
|
+
*,
|
|
126
|
+
runtime_ready: bool,
|
|
127
|
+
cpu_percent: float | None = None,
|
|
128
|
+
ram_used_mb: int | None = None,
|
|
129
|
+
gpu_utilization_percent: list[float] | None = None,
|
|
130
|
+
pending_sessions: int | None = None,
|
|
131
|
+
) -> dict:
|
|
132
|
+
payload: dict = {"runtime_ready": runtime_ready}
|
|
133
|
+
if cpu_percent is not None:
|
|
134
|
+
payload["cpu_percent"] = cpu_percent
|
|
135
|
+
if ram_used_mb is not None:
|
|
136
|
+
payload["ram_used_mb"] = ram_used_mb
|
|
137
|
+
if gpu_utilization_percent is not None:
|
|
138
|
+
payload["gpu_utilization_percent"] = gpu_utilization_percent
|
|
139
|
+
if pending_sessions is not None:
|
|
140
|
+
payload["pending_sessions"] = pending_sessions
|
|
141
|
+
r = await self._client.post(
|
|
142
|
+
f"/v1/machines/{machine_id}/heartbeat", json=payload
|
|
143
|
+
)
|
|
144
|
+
_raise_for_error(r)
|
|
145
|
+
return r.json()
|
|
146
|
+
|
|
147
|
+
# ── Sessions ────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
async def start_session(self, session_id: str) -> SessionHandoff:
|
|
150
|
+
r = await self._client.post(f"/v1/sessions/{session_id}/start")
|
|
151
|
+
_raise_for_error(r)
|
|
152
|
+
body = r.json()
|
|
153
|
+
sess = body["session"]
|
|
154
|
+
return SessionHandoff(
|
|
155
|
+
session_id=sess["id"],
|
|
156
|
+
worker_id=sess["worker_id"],
|
|
157
|
+
machine_id=sess["machine_id"],
|
|
158
|
+
runtime=sess.get("runtime") or "",
|
|
159
|
+
tunnel_broker_url=body["tunnel"]["broker_url"],
|
|
160
|
+
tunnel_token=body["tunnel"]["token"],
|
|
161
|
+
expires_at=body["tunnel"]["expires_at"],
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
async def close_session(self, session_id: str, reason: str = "miner_closed") -> dict:
|
|
165
|
+
r = await self._client.post(
|
|
166
|
+
f"/v1/sessions/{session_id}/close", json={"reason": reason}
|
|
167
|
+
)
|
|
168
|
+
_raise_for_error(r)
|
|
169
|
+
return r.json()
|
|
170
|
+
|
|
171
|
+
async def get_session(self, session_id: str) -> dict:
|
|
172
|
+
r = await self._client.get(f"/v1/sessions/{session_id}")
|
|
173
|
+
_raise_for_error(r)
|
|
174
|
+
return r.json()
|
|
175
|
+
|
|
176
|
+
# ── Usage ticks ─────────────────────────────────────────────────────
|
|
177
|
+
|
|
178
|
+
async def post_ticks(self, ticks: list[dict]) -> dict:
|
|
179
|
+
"""Submit a batch (1..10) of UsageTick records."""
|
|
180
|
+
r = await self._client.post("/v1/usage/ticks", json={"ticks": ticks})
|
|
181
|
+
_raise_for_error(r)
|
|
182
|
+
return r.json()
|