remoteRF-server-testing 0.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- remoteRF_server/__init__.py +0 -0
- remoteRF_server/common/__init__.py +0 -0
- remoteRF_server/common/grpc/__init__.py +1 -0
- remoteRF_server/common/grpc/grpc_host_pb2.py +63 -0
- remoteRF_server/common/grpc/grpc_host_pb2_grpc.py +97 -0
- remoteRF_server/common/grpc/grpc_pb2.py +59 -0
- remoteRF_server/common/grpc/grpc_pb2_grpc.py +97 -0
- remoteRF_server/common/idl/__init__.py +1 -0
- remoteRF_server/common/idl/device_schema.py +39 -0
- remoteRF_server/common/idl/pluto_schema.py +174 -0
- remoteRF_server/common/idl/schema.py +358 -0
- remoteRF_server/common/utils/__init__.py +6 -0
- remoteRF_server/common/utils/ansi_codes.py +120 -0
- remoteRF_server/common/utils/api_token.py +21 -0
- remoteRF_server/common/utils/db_connection.py +35 -0
- remoteRF_server/common/utils/db_location.py +24 -0
- remoteRF_server/common/utils/list_string.py +5 -0
- remoteRF_server/common/utils/process_arg.py +80 -0
- remoteRF_server/drivers/__init__.py +0 -0
- remoteRF_server/drivers/adalm_pluto/__init__.py +0 -0
- remoteRF_server/drivers/adalm_pluto/pluto_remote_server.py +105 -0
- remoteRF_server/host/__init__.py +0 -0
- remoteRF_server/host/host_auth_token.py +292 -0
- remoteRF_server/host/host_directory_store.py +142 -0
- remoteRF_server/host/host_tunnel_server.py +1388 -0
- remoteRF_server/server/__init__.py +0 -0
- remoteRF_server/server/acc_perms.py +317 -0
- remoteRF_server/server/cert_provider.py +184 -0
- remoteRF_server/server/device_manager.py +688 -0
- remoteRF_server/server/grpc_server.py +1023 -0
- remoteRF_server/server/reservation.py +811 -0
- remoteRF_server/server/rpc_manager.py +104 -0
- remoteRF_server/server/user_group_cli.py +723 -0
- remoteRF_server/server/user_group_handler.py +1120 -0
- remoteRF_server/serverrf_cli.py +1377 -0
- remoteRF_server/tools/__init__.py +191 -0
- remoteRF_server/tools/gen_certs.py +274 -0
- remoteRF_server/tools/gist_status.py +139 -0
- remoteRF_server/tools/gist_status_testing.py +67 -0
- remoterf_server_testing-0.0.0.dist-info/METADATA +612 -0
- remoterf_server_testing-0.0.0.dist-info/RECORD +44 -0
- remoterf_server_testing-0.0.0.dist-info/WHEEL +5 -0
- remoterf_server_testing-0.0.0.dist-info/entry_points.txt +2 -0
- remoterf_server_testing-0.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1388 @@
|
|
|
1
|
+
# src/remoteRF_host/host/host_tunnel_server.py
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
import threading
|
|
7
|
+
import queue
|
|
8
|
+
import time
|
|
9
|
+
import secrets
|
|
10
|
+
import traceback
|
|
11
|
+
import hashlib
|
|
12
|
+
import re
|
|
13
|
+
import uuid
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Dict, Optional, Tuple, Any, List, Set, Iterator
|
|
17
|
+
|
|
18
|
+
from concurrent.futures import Future
|
|
19
|
+
from concurrent import futures
|
|
20
|
+
|
|
21
|
+
import grpc
|
|
22
|
+
|
|
23
|
+
from ..common.utils.process_arg import map_arg
|
|
24
|
+
from ..common.grpc import grpc_pb2 as generic_pb2
|
|
25
|
+
from ..common.grpc import grpc_host_pb2_grpc as host_tunnel_pb2_grpc
|
|
26
|
+
from ..common.grpc import grpc_host_pb2 as host_tunnel_pb2
|
|
27
|
+
|
|
28
|
+
from .host_directory_store import (
|
|
29
|
+
EnvStore,
|
|
30
|
+
DeviceIdConflictError,
|
|
31
|
+
now_ms,
|
|
32
|
+
sanitize_env_key,
|
|
33
|
+
csv_split,
|
|
34
|
+
cfg_dir_from_file,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from .host_auth_token import is_host_token_valid, list_hosts
|
|
38
|
+
|
|
39
|
+
# =============================================================================
|
|
40
|
+
# Hardcoded logging mode (NO env vars)
|
|
41
|
+
#
|
|
42
|
+
# Pick ONE:
|
|
43
|
+
# LOG_MODE = "OFF" -> show nothing
|
|
44
|
+
# LOG_MODE = "WARN" -> show only WARN + ERROR
|
|
45
|
+
# LOG_MODE = "ALL" -> show everything (INFO + WARN + ERROR)
|
|
46
|
+
# =============================================================================
|
|
47
|
+
|
|
48
|
+
LOG_MODE = "OFF"
|
|
49
|
+
_LOG_LOCK = threading.Lock()
|
|
50
|
+
HOST_ID_RE = re.compile(r"^[A-Za-z0-9_.-]{1,64}$") # adjust if you want
|
|
51
|
+
|
|
52
|
+
def _grpc_err_summary(e: grpc.RpcError) -> str:
|
|
53
|
+
try:
|
|
54
|
+
code = e.code()
|
|
55
|
+
name = code.name if code else "UNKNOWN"
|
|
56
|
+
except Exception:
|
|
57
|
+
name = "UNKNOWN"
|
|
58
|
+
try:
|
|
59
|
+
details = e.details() or ""
|
|
60
|
+
except Exception:
|
|
61
|
+
details = ""
|
|
62
|
+
return f"{name}: {details}".strip()
|
|
63
|
+
|
|
64
|
+
def _host_status(host_id: str) -> str:
|
|
65
|
+
try:
|
|
66
|
+
for hid, st in list_hosts():
|
|
67
|
+
if hid == host_id:
|
|
68
|
+
return (st or "").strip().lower()
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
return ""
|
|
72
|
+
|
|
73
|
+
def _auth_fail_code_and_details(host_id: str) -> tuple[str, str]:
|
|
74
|
+
st = _host_status(host_id)
|
|
75
|
+
if not st:
|
|
76
|
+
return ("UNAUTHENTICATED", f"Unknown host_id={host_id!r}. Ask admin to provision a host token.")
|
|
77
|
+
if st != "approved":
|
|
78
|
+
return ("PERMISSION_DENIED", f"host_id={host_id!r} not approved (status={st!r}).")
|
|
79
|
+
return ("UNAUTHENTICATED", f"Invalid host_token for host_id={host_id!r}.")
|
|
80
|
+
|
|
81
|
+
class HostIdConflictError(RuntimeError):
|
|
82
|
+
def __init__(self, host_id: str, *, age_ms: int, replace_after_ms: int) -> None:
|
|
83
|
+
super().__init__(
|
|
84
|
+
f"host_id '{host_id}' already in use (last heartbeat {age_ms}ms ago). "
|
|
85
|
+
f"Try again after {replace_after_ms}ms or fix the conflicting host config."
|
|
86
|
+
)
|
|
87
|
+
self.host_id = host_id
|
|
88
|
+
self.age_ms = age_ms
|
|
89
|
+
self.replace_after_ms = replace_after_ms
|
|
90
|
+
|
|
91
|
+
def _validate_host_id(raw: str) -> str:
|
|
92
|
+
hid = (raw or "").strip()
|
|
93
|
+
if not hid:
|
|
94
|
+
raise ValueError("Missing host_id (set HOST_ID in host.env; do not auto-generate).")
|
|
95
|
+
if hid == "unknown-host":
|
|
96
|
+
raise ValueError("host_id cannot be 'unknown-host' (set HOST_ID in host.env).")
|
|
97
|
+
if not HOST_ID_RE.fullmatch(hid):
|
|
98
|
+
raise ValueError(
|
|
99
|
+
"Invalid host_id. Allowed: 1..64 chars of [A-Za-z0-9_.-]. "
|
|
100
|
+
f"Got: {hid!r}"
|
|
101
|
+
)
|
|
102
|
+
return hid
|
|
103
|
+
|
|
104
|
+
def _ts() -> str:
|
|
105
|
+
t = time.time()
|
|
106
|
+
sec = int(t)
|
|
107
|
+
ms = int((t - sec) * 1000)
|
|
108
|
+
lt = time.localtime(sec)
|
|
109
|
+
return time.strftime("%H:%M:%S", lt) + f".{ms:03d}"
|
|
110
|
+
|
|
111
|
+
def _should_log(level: str) -> bool:
|
|
112
|
+
lvl = (level or "").upper()
|
|
113
|
+
mode = (LOG_MODE or "").upper()
|
|
114
|
+
if mode == "OFF":
|
|
115
|
+
return False
|
|
116
|
+
if mode == "WARN":
|
|
117
|
+
return lvl in ("WARN", "ERROR")
|
|
118
|
+
if mode == "ALL":
|
|
119
|
+
return lvl in ("INFO", "WARN", "ERROR")
|
|
120
|
+
return lvl in ("WARN", "ERROR")
|
|
121
|
+
|
|
122
|
+
def _log(level: str, msg: str) -> None:
|
|
123
|
+
if not _should_log(level):
|
|
124
|
+
return
|
|
125
|
+
tn = threading.current_thread().name
|
|
126
|
+
with _LOG_LOCK:
|
|
127
|
+
print(f"{_ts()} {level:<5} [{tn}] hostrf.host_tunnel_server: {msg}", file=sys.stderr, flush=True)
|
|
128
|
+
|
|
129
|
+
def info(msg: str) -> None:
|
|
130
|
+
_log("INFO", msg)
|
|
131
|
+
|
|
132
|
+
def warn(msg: str) -> None:
|
|
133
|
+
_log("WARN", msg)
|
|
134
|
+
|
|
135
|
+
def error(msg: str) -> None:
|
|
136
|
+
_log("ERROR", msg)
|
|
137
|
+
|
|
138
|
+
def exception(msg: str) -> None:
|
|
139
|
+
_log("ERROR", msg)
|
|
140
|
+
traceback.print_exc(limit=80, file=sys.stderr)
|
|
141
|
+
|
|
142
|
+
# Helpers
|
|
143
|
+
|
|
144
|
+
def host_key(host_id: str) -> str:
|
|
145
|
+
h = hashlib.sha256((host_id or "").encode("utf-8")).hexdigest()
|
|
146
|
+
return h[:16] # short stable key
|
|
147
|
+
|
|
148
|
+
def _safe_thread_tag(s: str) -> str:
|
|
149
|
+
s = s or "peer"
|
|
150
|
+
return re.sub(r"[^A-Za-z0-9_.-]+", "_", s)[:64]
|
|
151
|
+
|
|
152
|
+
def _copy_argument_map(dst_map: Any, src_map: Any) -> None:
|
|
153
|
+
if src_map is None:
|
|
154
|
+
return
|
|
155
|
+
try:
|
|
156
|
+
for k in src_map:
|
|
157
|
+
dst_map[str(k)].CopyFrom(src_map[k])
|
|
158
|
+
except Exception:
|
|
159
|
+
for k, v in dict(src_map).items():
|
|
160
|
+
dst_map[str(k)].CopyFrom(v)
|
|
161
|
+
|
|
162
|
+
# Types
|
|
163
|
+
|
|
164
|
+
@dataclass(frozen=True)
|
|
165
|
+
class DeviceRoute:
|
|
166
|
+
host_id: str
|
|
167
|
+
host_local_id: int
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass
|
|
171
|
+
class HostStatus:
|
|
172
|
+
host_id: str
|
|
173
|
+
online: bool = False
|
|
174
|
+
last_seen_ms: int = 0
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
@dataclass
|
|
178
|
+
class DeviceStatus:
|
|
179
|
+
device_id: str
|
|
180
|
+
host_id: str
|
|
181
|
+
host_local_id: int
|
|
182
|
+
online: bool = False
|
|
183
|
+
last_seen_ms: int = 0
|
|
184
|
+
|
|
185
|
+
# Session
|
|
186
|
+
|
|
187
|
+
class HostSession:
|
|
188
|
+
def __init__(self, host_id: str, *, peer: str = "<?>", session_uuid: str = "", outbound_max: int = 2048) -> None:
|
|
189
|
+
self.host_id = host_id
|
|
190
|
+
self.peer = peer
|
|
191
|
+
self.session_uuid = (session_uuid or uuid.uuid4().hex[:10])
|
|
192
|
+
|
|
193
|
+
self.out_q: "queue.Queue[Optional[host_tunnel_pb2.HostFrame]]" = queue.Queue(maxsize=int(outbound_max))
|
|
194
|
+
self.inflight: Dict[str, Future] = {}
|
|
195
|
+
self.inflight_lock = threading.Lock()
|
|
196
|
+
self.last_heartbeat_ms = now_ms()
|
|
197
|
+
self.alive = True
|
|
198
|
+
self.outbound_max = int(outbound_max)
|
|
199
|
+
|
|
200
|
+
info(
|
|
201
|
+
f"[session] created host_id={self.host_id!r} sess={self.session_uuid} "
|
|
202
|
+
f"peer={self.peer} outbound_max={self.outbound_max}"
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
def send(self, frame: host_tunnel_pb2.HostFrame, *, timeout: float = 2.0) -> bool:
|
|
206
|
+
which = frame.WhichOneof("msg")
|
|
207
|
+
if not self.alive:
|
|
208
|
+
warn(f"[session] send refused (dead session) host_id={self.host_id!r} sess={self.session_uuid} type={which}")
|
|
209
|
+
return False
|
|
210
|
+
try:
|
|
211
|
+
self.out_q.put(frame, timeout=timeout)
|
|
212
|
+
info(f"[session] queued outbound host_id={self.host_id!r} sess={self.session_uuid} type={which}")
|
|
213
|
+
return True
|
|
214
|
+
except queue.Full:
|
|
215
|
+
warn(
|
|
216
|
+
f"[session] outbound queue FULL host_id={self.host_id!r} sess={self.session_uuid} "
|
|
217
|
+
f"max={self.outbound_max} drop type={which}"
|
|
218
|
+
)
|
|
219
|
+
return False
|
|
220
|
+
except Exception:
|
|
221
|
+
exception(f"[session] send exception host_id={self.host_id!r} sess={self.session_uuid} type={which}")
|
|
222
|
+
return False
|
|
223
|
+
|
|
224
|
+
def close(self) -> None:
|
|
225
|
+
if not self.alive:
|
|
226
|
+
return
|
|
227
|
+
info(f"[session] closing host_id={self.host_id!r} sess={self.session_uuid} peer={self.peer}")
|
|
228
|
+
self.alive = False
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
self.out_q.put_nowait(None)
|
|
232
|
+
except Exception:
|
|
233
|
+
exception(f"[session] close: failed to enqueue sentinel host_id={self.host_id!r} sess={self.session_uuid}")
|
|
234
|
+
|
|
235
|
+
with self.inflight_lock:
|
|
236
|
+
for rid, fut in list(self.inflight.items()):
|
|
237
|
+
if not fut.done():
|
|
238
|
+
fut.set_exception(RuntimeError(f"Host '{self.host_id}' disconnected (req_id={rid})"))
|
|
239
|
+
self.inflight.clear()
|
|
240
|
+
|
|
241
|
+
info(f"[session] closed host_id={self.host_id!r} sess={self.session_uuid}")
|
|
242
|
+
|
|
243
|
+
# Registry
|
|
244
|
+
|
|
245
|
+
class HostTunnelRegistry:
|
|
246
|
+
|
|
247
|
+
def __init__(self, *, active_cache_ttl_ms: int = 250, dir_cache_ttl_ms: int = 1000) -> None:
|
|
248
|
+
self._lock = threading.RLock()
|
|
249
|
+
|
|
250
|
+
# live sessions
|
|
251
|
+
self._hosts: Dict[str, HostSession] = {} # host_id -> session
|
|
252
|
+
|
|
253
|
+
# directory state
|
|
254
|
+
self._routes: Dict[str, DeviceRoute] = {} # device_id -> route(host_id, local_id)
|
|
255
|
+
self._host_devices: Dict[str, Set[str]] = {} # host_id -> set(device_id)
|
|
256
|
+
self._device_infos: Dict[str, host_tunnel_pb2.DeviceInfo] = {} # device_id -> DeviceInfo
|
|
257
|
+
|
|
258
|
+
# status
|
|
259
|
+
self._host_status: Dict[str, HostStatus] = {}
|
|
260
|
+
self._device_status: Dict[str, DeviceStatus] = {}
|
|
261
|
+
|
|
262
|
+
# persistence
|
|
263
|
+
cfg = cfg_dir_from_file(__file__)
|
|
264
|
+
cfg.mkdir(parents=True, exist_ok=True)
|
|
265
|
+
self._env_lists = EnvStore(cfg / "host_directory.env")
|
|
266
|
+
self._env_meta = EnvStore(cfg / "host_directory_meta.env")
|
|
267
|
+
|
|
268
|
+
# caches
|
|
269
|
+
self._active_hosts_cache: Set[str] = set()
|
|
270
|
+
self._active_hosts_cache_ts_ms: int = 0
|
|
271
|
+
self._active_hosts_cache_ttl_ms: int = int(active_cache_ttl_ms)
|
|
272
|
+
|
|
273
|
+
self._dir_cache: Dict[str, Tuple[str, Any, bool]] = {}
|
|
274
|
+
self._dir_cache_ts_ms: int = 0
|
|
275
|
+
self._dir_cache_ttl_ms: int = int(dir_cache_ttl_ms)
|
|
276
|
+
|
|
277
|
+
info(f"[registry] init cfg_dir={cfg}")
|
|
278
|
+
self._load_persisted()
|
|
279
|
+
|
|
280
|
+
# caching helpers
|
|
281
|
+
|
|
282
|
+
def _invalidate_caches_locked(self) -> None:
|
|
283
|
+
self._active_hosts_cache_ts_ms = 0
|
|
284
|
+
self._dir_cache_ts_ms = 0
|
|
285
|
+
|
|
286
|
+
def _refresh_active_hosts_cache_locked(self, now: int) -> None:
|
|
287
|
+
if self._active_hosts_cache_ts_ms and (now - self._active_hosts_cache_ts_ms) < self._active_hosts_cache_ttl_ms:
|
|
288
|
+
return
|
|
289
|
+
self._active_hosts_cache = {hid for hid, sess in self._hosts.items() if sess is not None and sess.alive}
|
|
290
|
+
self._active_hosts_cache_ts_ms = now
|
|
291
|
+
|
|
292
|
+
# persistence load
|
|
293
|
+
|
|
294
|
+
def _load_persisted(self) -> None:
|
|
295
|
+
kv_lists = self._env_lists.read_kv()
|
|
296
|
+
kv_meta = self._env_meta.read_kv()
|
|
297
|
+
|
|
298
|
+
known_hosts = csv_split(kv_lists.get("KNOWN_HOSTS", ""))
|
|
299
|
+
known_devices = csv_split(kv_lists.get("KNOWN_DEVICES", ""))
|
|
300
|
+
|
|
301
|
+
info(f"[registry] load persisted: known_hosts={len(known_hosts)} known_devices={len(known_devices)}")
|
|
302
|
+
|
|
303
|
+
with self._lock:
|
|
304
|
+
self._routes.clear()
|
|
305
|
+
self._host_devices.clear()
|
|
306
|
+
self._device_infos.clear()
|
|
307
|
+
self._host_status.clear()
|
|
308
|
+
self._device_status.clear()
|
|
309
|
+
|
|
310
|
+
# seed hosts offline
|
|
311
|
+
for hid in known_hosts:
|
|
312
|
+
if hid:
|
|
313
|
+
self._host_status[hid] = HostStatus(host_id=hid, online=False, last_seen_ms=0)
|
|
314
|
+
self._host_devices.setdefault(hid, set())
|
|
315
|
+
|
|
316
|
+
devices_from_host_lists: Set[str] = set()
|
|
317
|
+
for k, v in kv_lists.items():
|
|
318
|
+
if k.startswith("HOST_") and k.endswith("_DEVICES"):
|
|
319
|
+
devices_from_host_lists.update(csv_split(v))
|
|
320
|
+
|
|
321
|
+
all_devices: List[str] = sorted(set(known_devices) | devices_from_host_lists)
|
|
322
|
+
|
|
323
|
+
for device_id in all_devices:
|
|
324
|
+
device_id = (device_id or "").strip()
|
|
325
|
+
if not device_id:
|
|
326
|
+
continue
|
|
327
|
+
|
|
328
|
+
# STRICT MODE: ignore any persisted non-numeric / non-canonical IDs
|
|
329
|
+
if not device_id.isdigit() or device_id != str(int(device_id)):
|
|
330
|
+
warn(f"[registry] skipping persisted non-numeric/non-canonical device_id={device_id!r}")
|
|
331
|
+
continue
|
|
332
|
+
|
|
333
|
+
dk = sanitize_env_key(device_id)
|
|
334
|
+
|
|
335
|
+
host_id = (kv_meta.get(f"DEVICE_{dk}_HOST", "") or "").strip()
|
|
336
|
+
local_id_s = (kv_meta.get(f"DEVICE_{dk}_LOCAL_ID", "") or "0").strip()
|
|
337
|
+
try:
|
|
338
|
+
local_id = int(local_id_s)
|
|
339
|
+
except Exception:
|
|
340
|
+
local_id = 0
|
|
341
|
+
|
|
342
|
+
label = (kv_meta.get(f"DEVICE_{dk}_LABEL", "") or "").strip()
|
|
343
|
+
serial = (kv_meta.get(f"DEVICE_{dk}_SERIAL", "") or "").strip()
|
|
344
|
+
kind = (kv_meta.get(f"DEVICE_{dk}_KIND", "") or "").strip()
|
|
345
|
+
|
|
346
|
+
if host_id:
|
|
347
|
+
if host_id not in self._host_status:
|
|
348
|
+
self._host_status[host_id] = HostStatus(host_id=host_id, online=False, last_seen_ms=0)
|
|
349
|
+
self._host_devices.setdefault(host_id, set()).add(device_id)
|
|
350
|
+
self._routes[device_id] = DeviceRoute(host_id=host_id, host_local_id=int(local_id))
|
|
351
|
+
|
|
352
|
+
self._device_infos[device_id] = host_tunnel_pb2.DeviceInfo(
|
|
353
|
+
device_id=str(device_id),
|
|
354
|
+
local_id=int(local_id),
|
|
355
|
+
label=str(label),
|
|
356
|
+
serial=str(serial),
|
|
357
|
+
kind=str(kind),
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
self._device_status[device_id] = DeviceStatus(
|
|
361
|
+
device_id=str(device_id),
|
|
362
|
+
host_id=str(host_id),
|
|
363
|
+
host_local_id=int(local_id),
|
|
364
|
+
online=False,
|
|
365
|
+
last_seen_ms=0,
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
self._invalidate_caches_locked()
|
|
369
|
+
|
|
370
|
+
info(
|
|
371
|
+
f"[registry] persisted directory loaded: routes={len(self._routes)} "
|
|
372
|
+
f"hosts={len(self._host_status)} devices={len(self._device_status)}"
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# persistence write helpers
|
|
376
|
+
|
|
377
|
+
def _persist_host_meta(self, host_id: str, *, host_name: str = "", platform: str = "", version: str = "") -> None:
|
|
378
|
+
hk = host_key(host_id)
|
|
379
|
+
self._env_meta.set_kv_if_absent(f"HOST_{hk}_ID", host_id)
|
|
380
|
+
if host_name:
|
|
381
|
+
self._env_meta.upsert_kv(f"HOST_{hk}_NAME", host_name)
|
|
382
|
+
if platform:
|
|
383
|
+
self._env_meta.upsert_kv(f"HOST_{hk}_PLATFORM", platform)
|
|
384
|
+
if version:
|
|
385
|
+
self._env_meta.upsert_kv(f"HOST_{hk}_VERSION", version)
|
|
386
|
+
|
|
387
|
+
def persist_host_meta_from_hello(self, hello: Any) -> None:
|
|
388
|
+
try:
|
|
389
|
+
hid = str(getattr(hello, "host_id", "") or "").strip()
|
|
390
|
+
ver = str(getattr(hello, "version", "") or "").strip()
|
|
391
|
+
if hid:
|
|
392
|
+
self._persist_host_meta(hid, version=ver)
|
|
393
|
+
info(f"[registry] persisted host meta from hello: host_id={hid!r} version={ver!r}")
|
|
394
|
+
except Exception:
|
|
395
|
+
exception("[registry] persist_host_meta_from_hello failed")
|
|
396
|
+
|
|
397
|
+
def _persist_host(self, host_id: str) -> None:
|
|
398
|
+
self._env_lists.append_to_csv_list("KNOWN_HOSTS", host_id)
|
|
399
|
+
info(f"[registry] persisted host list: host_id={host_id!r}")
|
|
400
|
+
|
|
401
|
+
def _persist_device(self, host_id: str, device_id: str, local_id: int, device_info: Any) -> None:
|
|
402
|
+
self._env_lists.append_to_csv_list("KNOWN_DEVICES", device_id)
|
|
403
|
+
hk = host_key(host_id)
|
|
404
|
+
self._env_meta.set_kv_if_absent(f"HOST_{hk}_ID", host_id)
|
|
405
|
+
self._env_lists.append_to_csv_list(f"HOST_{hk}_DEVICES", device_id)
|
|
406
|
+
|
|
407
|
+
dk = sanitize_env_key(device_id)
|
|
408
|
+
self._env_meta.upsert_kv(f"DEVICE_{dk}_HOST", host_id)
|
|
409
|
+
self._env_meta.upsert_kv(f"DEVICE_{dk}_LOCAL_ID", str(int(local_id)))
|
|
410
|
+
|
|
411
|
+
label = str(getattr(device_info, "label", "") or "").strip()
|
|
412
|
+
serial = str(getattr(device_info, "serial", "") or "").strip()
|
|
413
|
+
kind = str(getattr(device_info, "kind", "") or "").strip()
|
|
414
|
+
|
|
415
|
+
if label:
|
|
416
|
+
self._env_meta.upsert_kv(f"DEVICE_{dk}_LABEL", label)
|
|
417
|
+
if serial:
|
|
418
|
+
self._env_meta.upsert_kv(f"DEVICE_{dk}_SERIAL", serial)
|
|
419
|
+
if kind:
|
|
420
|
+
self._env_meta.upsert_kv(f"DEVICE_{dk}_KIND", kind)
|
|
421
|
+
|
|
422
|
+
info(
|
|
423
|
+
f"[registry] persisted device: host_id={host_id!r} device_id={device_id!r} local_id={int(local_id)} "
|
|
424
|
+
f"label={label!r} serial={serial!r} kind={kind!r}"
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
# directory/status mutators
|
|
428
|
+
|
|
429
|
+
def register_host(self, host_id: str, session: HostSession, *, replace_if_stale_ms: int = 15_000) -> None:
|
|
430
|
+
info(
|
|
431
|
+
f"[registry] register_host: host_id={host_id!r} incoming_sess={session.session_uuid} "
|
|
432
|
+
f"incoming_peer={session.peer}"
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
with self._lock:
|
|
436
|
+
old = self._hosts.get(host_id)
|
|
437
|
+
if old is not None and old.alive:
|
|
438
|
+
age_ms = int(now_ms() - int(getattr(old, "last_heartbeat_ms", 0) or 0))
|
|
439
|
+
|
|
440
|
+
if age_ms < replace_if_stale_ms:
|
|
441
|
+
warn(
|
|
442
|
+
f"[registry] HostIdConflict host_id={host_id!r} age_ms={age_ms} "
|
|
443
|
+
f"incoming_sess={session.session_uuid} incoming_peer={session.peer} "
|
|
444
|
+
f"existing_sess={getattr(old,'session_uuid','?')} existing_peer={getattr(old,'peer','?')}"
|
|
445
|
+
)
|
|
446
|
+
raise HostIdConflictError(host_id, age_ms=age_ms, replace_after_ms=replace_if_stale_ms)
|
|
447
|
+
|
|
448
|
+
warn(
|
|
449
|
+
f"[registry] replacing STALE host_id={host_id!r} age_ms={age_ms} "
|
|
450
|
+
f"incoming_sess={session.session_uuid} incoming_peer={session.peer} "
|
|
451
|
+
f"existing_sess={getattr(old,'session_uuid','?')} existing_peer={getattr(old,'peer','?')}"
|
|
452
|
+
)
|
|
453
|
+
old.close()
|
|
454
|
+
|
|
455
|
+
self._hosts[host_id] = session
|
|
456
|
+
|
|
457
|
+
hs = self._host_status.get(host_id) or HostStatus(host_id=host_id)
|
|
458
|
+
hs.online = True
|
|
459
|
+
hs.last_seen_ms = now_ms()
|
|
460
|
+
self._host_status[host_id] = hs
|
|
461
|
+
|
|
462
|
+
self._host_devices.setdefault(host_id, set())
|
|
463
|
+
self._persist_host(host_id)
|
|
464
|
+
self._invalidate_caches_locked()
|
|
465
|
+
|
|
466
|
+
info(f"[registry] register_host done: host_id={host_id!r} online=True incoming_sess={session.session_uuid}")
|
|
467
|
+
|
|
468
|
+
def drop_host(self, host_id: str) -> None:
|
|
469
|
+
info(f"[registry] drop_host: host_id={host_id!r}")
|
|
470
|
+
with self._lock:
|
|
471
|
+
sess = self._hosts.pop(host_id, None)
|
|
472
|
+
if sess is not None:
|
|
473
|
+
sess.close()
|
|
474
|
+
|
|
475
|
+
hs = self._host_status.get(host_id) or HostStatus(host_id=host_id)
|
|
476
|
+
hs.online = False
|
|
477
|
+
hs.last_seen_ms = now_ms()
|
|
478
|
+
self._host_status[host_id] = hs
|
|
479
|
+
|
|
480
|
+
for did in self._host_devices.get(host_id, set()):
|
|
481
|
+
ds = self._device_status.get(did)
|
|
482
|
+
if ds is not None:
|
|
483
|
+
ds.online = False
|
|
484
|
+
ds.last_seen_ms = now_ms()
|
|
485
|
+
|
|
486
|
+
self._invalidate_caches_locked()
|
|
487
|
+
|
|
488
|
+
info(f"[registry] drop_host done: host_id={host_id!r} online=False")
|
|
489
|
+
|
|
490
|
+
def drop_host_if_match(self, host_id: str, session: HostSession) -> bool:
|
|
491
|
+
with self._lock:
|
|
492
|
+
cur = self._hosts.get(host_id)
|
|
493
|
+
if cur is not session:
|
|
494
|
+
return False
|
|
495
|
+
|
|
496
|
+
sess = self._hosts.pop(host_id, None)
|
|
497
|
+
if sess is not None:
|
|
498
|
+
sess.close()
|
|
499
|
+
|
|
500
|
+
hs = self._host_status.get(host_id) or HostStatus(host_id=host_id)
|
|
501
|
+
hs.online = False
|
|
502
|
+
hs.last_seen_ms = now_ms()
|
|
503
|
+
self._host_status[host_id] = hs
|
|
504
|
+
|
|
505
|
+
for did in self._host_devices.get(host_id, set()):
|
|
506
|
+
ds = self._device_status.get(did)
|
|
507
|
+
if ds is not None:
|
|
508
|
+
ds.online = False
|
|
509
|
+
ds.last_seen_ms = now_ms()
|
|
510
|
+
|
|
511
|
+
self._invalidate_caches_locked()
|
|
512
|
+
|
|
513
|
+
info(f"[registry] drop_host_if_match done: host_id={host_id!r} online=False")
|
|
514
|
+
return True
|
|
515
|
+
|
|
516
|
+
def heartbeat(self, host_id: str, unix_ms: int = 0) -> None:
|
|
517
|
+
now = int(unix_ms) or now_ms()
|
|
518
|
+
with self._lock:
|
|
519
|
+
hs = self._host_status.get(host_id) or HostStatus(host_id=host_id)
|
|
520
|
+
hs.online = True
|
|
521
|
+
hs.last_seen_ms = now
|
|
522
|
+
self._host_status[host_id] = hs
|
|
523
|
+
|
|
524
|
+
for did in self._host_devices.get(host_id, set()):
|
|
525
|
+
ds = self._device_status.get(did)
|
|
526
|
+
if ds is not None:
|
|
527
|
+
ds.online = True
|
|
528
|
+
ds.last_seen_ms = now
|
|
529
|
+
|
|
530
|
+
self._invalidate_caches_locked()
|
|
531
|
+
|
|
532
|
+
info(f"[registry] heartbeat: host_id={host_id!r} unix_ms={now}")
|
|
533
|
+
|
|
534
|
+
def announce_devices(self, host_id: str, devices: list) -> None:
|
|
535
|
+
info(f"[registry] announce_devices: host_id={host_id!r} n={len(devices)}")
|
|
536
|
+
|
|
537
|
+
# STRICT MODE:
|
|
538
|
+
# - device_id MUST be canonical decimal string for gid (e.g. "10")
|
|
539
|
+
# - reject "010", reject "pluto:...", reject duplicates in a single announce
|
|
540
|
+
pairs: List[Tuple[Any, str]] = []
|
|
541
|
+
seen: Set[str] = set()
|
|
542
|
+
|
|
543
|
+
for d in devices:
|
|
544
|
+
raw = str(getattr(d, "device_id", "") or "").strip()
|
|
545
|
+
if not raw:
|
|
546
|
+
continue
|
|
547
|
+
|
|
548
|
+
if not raw.isdigit():
|
|
549
|
+
raise ValueError(
|
|
550
|
+
f"Host device_id must be a numeric gid string (e.g. '10'). "
|
|
551
|
+
f"Got {raw!r} from host_id={host_id!r}."
|
|
552
|
+
)
|
|
553
|
+
|
|
554
|
+
canon = str(int(raw))
|
|
555
|
+
if raw != canon:
|
|
556
|
+
raise ValueError(
|
|
557
|
+
f"Host device_id must be canonical decimal with no leading zeros. "
|
|
558
|
+
f"Got {raw!r}; use {canon!r} (host_id={host_id!r})."
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
if canon in seen:
|
|
562
|
+
raise ValueError(
|
|
563
|
+
f"Duplicate device_id {canon!r} in device announce from host_id={host_id!r}."
|
|
564
|
+
)
|
|
565
|
+
seen.add(canon)
|
|
566
|
+
pairs.append((d, canon))
|
|
567
|
+
|
|
568
|
+
# conflict check first (no partial apply)
|
|
569
|
+
with self._lock:
|
|
570
|
+
for _, device_id in pairs:
|
|
571
|
+
existing = self._routes.get(device_id)
|
|
572
|
+
if existing is not None and existing.host_id != host_id:
|
|
573
|
+
warn(
|
|
574
|
+
f"[registry] device_id conflict: device_id={device_id!r} "
|
|
575
|
+
f"existing_host={existing.host_id!r} new_host={host_id!r}"
|
|
576
|
+
)
|
|
577
|
+
raise DeviceIdConflictError(
|
|
578
|
+
device_id=device_id,
|
|
579
|
+
existing_host=existing.host_id,
|
|
580
|
+
new_host=host_id,
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
now = now_ms()
|
|
584
|
+
with self._lock:
|
|
585
|
+
self._host_devices.setdefault(host_id, set())
|
|
586
|
+
|
|
587
|
+
# Preserve old behavior: skip empty ids with warn (but strict checks already handled non-empty bad ones)
|
|
588
|
+
for d in devices:
|
|
589
|
+
device_id0 = str(getattr(d, "device_id", "") or "").strip()
|
|
590
|
+
if not device_id0:
|
|
591
|
+
warn(f"[registry] announce_devices: skipping device with empty device_id host_id={host_id!r}")
|
|
592
|
+
|
|
593
|
+
for d, device_id in pairs:
|
|
594
|
+
try:
|
|
595
|
+
local_id = int(getattr(d, "local_id", 0))
|
|
596
|
+
except Exception:
|
|
597
|
+
local_id = 0
|
|
598
|
+
|
|
599
|
+
label = str(getattr(d, "label", "") or "")
|
|
600
|
+
serial = str(getattr(d, "serial", "") or "")
|
|
601
|
+
kind = str(getattr(d, "kind", "") or "")
|
|
602
|
+
|
|
603
|
+
info(
|
|
604
|
+
f"[registry] device: host_id={host_id!r} local_id={local_id} "
|
|
605
|
+
f"device_id={device_id!r} label={label!r} serial={serial!r} kind={kind!r}"
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
self._routes[device_id] = DeviceRoute(host_id=host_id, host_local_id=local_id)
|
|
609
|
+
self._host_devices[host_id].add(device_id)
|
|
610
|
+
|
|
611
|
+
ds = self._device_status.get(device_id) or DeviceStatus(
|
|
612
|
+
device_id=device_id,
|
|
613
|
+
host_id=host_id,
|
|
614
|
+
host_local_id=local_id,
|
|
615
|
+
)
|
|
616
|
+
ds.host_id = host_id
|
|
617
|
+
ds.host_local_id = local_id
|
|
618
|
+
ds.online = True
|
|
619
|
+
ds.last_seen_ms = now
|
|
620
|
+
self._device_status[device_id] = ds
|
|
621
|
+
|
|
622
|
+
try:
|
|
623
|
+
tmp = host_tunnel_pb2.DeviceInfo()
|
|
624
|
+
tmp.CopyFrom(d)
|
|
625
|
+
# ensure stored info reflects canonical device_id key
|
|
626
|
+
tmp.device_id = str(device_id)
|
|
627
|
+
self._device_infos[device_id] = tmp
|
|
628
|
+
except Exception:
|
|
629
|
+
self._device_infos[device_id] = host_tunnel_pb2.DeviceInfo(
|
|
630
|
+
device_id=str(device_id),
|
|
631
|
+
local_id=int(local_id),
|
|
632
|
+
label=label,
|
|
633
|
+
serial=serial,
|
|
634
|
+
kind=kind,
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
self._persist_device(host_id, device_id, local_id, d)
|
|
638
|
+
|
|
639
|
+
self._invalidate_caches_locked()
|
|
640
|
+
|
|
641
|
+
info(f"[registry] announce_devices done: host_id={host_id!r} routes={len(self._routes)}")
|
|
642
|
+
|
|
643
|
+
# FAST PATHS (use per-RPC)
|
|
644
|
+
|
|
645
|
+
def is_host_device(self, device_id: str) -> bool:
|
|
646
|
+
did = (device_id or "").strip()
|
|
647
|
+
if not did:
|
|
648
|
+
return False
|
|
649
|
+
with self._lock:
|
|
650
|
+
return did in self._routes
|
|
651
|
+
|
|
652
|
+
def is_host_device_active(self, device_id: str) -> bool:
|
|
653
|
+
did = (device_id or "").strip()
|
|
654
|
+
if not did:
|
|
655
|
+
return False
|
|
656
|
+
now = now_ms()
|
|
657
|
+
with self._lock:
|
|
658
|
+
route = self._routes.get(did)
|
|
659
|
+
if route is None:
|
|
660
|
+
return False
|
|
661
|
+
self._refresh_active_hosts_cache_locked(now)
|
|
662
|
+
return route.host_id in self._active_hosts_cache
|
|
663
|
+
|
|
664
|
+
def get_host_for_device(self, device_id: str) -> Optional[str]:
|
|
665
|
+
did = (device_id or "").strip()
|
|
666
|
+
if not did:
|
|
667
|
+
return None
|
|
668
|
+
with self._lock:
|
|
669
|
+
r = self._routes.get(did)
|
|
670
|
+
return r.host_id if r else None
|
|
671
|
+
|
|
672
|
+
# UI/diagnostics snapshots
|
|
673
|
+
|
|
674
|
+
def device_directory_cached(self, *, ttl_ms: Optional[int] = None) -> Dict[str, Tuple[str, Any, bool]]:
|
|
675
|
+
now = now_ms()
|
|
676
|
+
ttl = int(self._dir_cache_ttl_ms if ttl_ms is None else ttl_ms)
|
|
677
|
+
|
|
678
|
+
with self._lock:
|
|
679
|
+
if self._dir_cache_ts_ms and (now - self._dir_cache_ts_ms) < ttl:
|
|
680
|
+
return dict(self._dir_cache)
|
|
681
|
+
|
|
682
|
+
self._refresh_active_hosts_cache_locked(now)
|
|
683
|
+
|
|
684
|
+
out: Dict[str, Tuple[str, Any, bool]] = {}
|
|
685
|
+
for device_id, route in self._routes.items():
|
|
686
|
+
info0 = self._device_infos.get(device_id)
|
|
687
|
+
info_obj = info0
|
|
688
|
+
if info0 is not None:
|
|
689
|
+
tmp = host_tunnel_pb2.DeviceInfo()
|
|
690
|
+
try:
|
|
691
|
+
tmp.CopyFrom(info0)
|
|
692
|
+
info_obj = tmp
|
|
693
|
+
except Exception:
|
|
694
|
+
info_obj = info0
|
|
695
|
+
|
|
696
|
+
is_active = route.host_id in self._active_hosts_cache
|
|
697
|
+
out[device_id] = (route.host_id, info_obj, is_active)
|
|
698
|
+
|
|
699
|
+
self._dir_cache = out
|
|
700
|
+
self._dir_cache_ts_ms = now
|
|
701
|
+
return dict(out)
|
|
702
|
+
|
|
703
|
+
# misc accessors
|
|
704
|
+
|
|
705
|
+
def list_routes(self) -> Dict[str, Tuple[str, int]]:
|
|
706
|
+
with self._lock:
|
|
707
|
+
return {did: (r.host_id, r.host_local_id) for did, r in self._routes.items()}
|
|
708
|
+
|
|
709
|
+
def list_hosts(self) -> Dict[str, HostStatus]:
|
|
710
|
+
with self._lock:
|
|
711
|
+
return dict(self._host_status)
|
|
712
|
+
|
|
713
|
+
def list_devices(self) -> Dict[str, DeviceStatus]:
|
|
714
|
+
with self._lock:
|
|
715
|
+
return dict(self._device_status)
|
|
716
|
+
|
|
717
|
+
# forwarding
|
|
718
|
+
|
|
719
|
+
def _get_session_and_route(self, device_id: str) -> Tuple[HostSession, DeviceRoute]:
|
|
720
|
+
with self._lock:
|
|
721
|
+
route = self._routes.get(str(device_id))
|
|
722
|
+
if route is None:
|
|
723
|
+
raise KeyError(f"Unknown device_id={device_id}")
|
|
724
|
+
|
|
725
|
+
sess = self._hosts.get(route.host_id)
|
|
726
|
+
if sess is None or not sess.alive:
|
|
727
|
+
raise RuntimeError(f"Host '{route.host_id}' not connected for device {device_id}")
|
|
728
|
+
|
|
729
|
+
return sess, route
|
|
730
|
+
|
|
731
|
+
def forward_request(
|
|
732
|
+
self,
|
|
733
|
+
*,
|
|
734
|
+
device_id: str,
|
|
735
|
+
request: "generic_pb2.GenericRPCRequest",
|
|
736
|
+
timeout_sec: float,
|
|
737
|
+
deadline_unix_ms: int = 0,
|
|
738
|
+
cancel_on_timeout: bool = False,
|
|
739
|
+
) -> "generic_pb2.GenericRPCResponse":
|
|
740
|
+
fn = str(getattr(request, "function_name", "") or "")
|
|
741
|
+
info(f"[forward] device_id={device_id!r} fn={fn!r} timeout_sec={timeout_sec}")
|
|
742
|
+
|
|
743
|
+
try:
|
|
744
|
+
sess, route = self._get_session_and_route(str(device_id))
|
|
745
|
+
except Exception as e:
|
|
746
|
+
exception(f"[forward] route/session lookup failed device_id={device_id!r} err={e!r}")
|
|
747
|
+
r = generic_pb2.GenericRPCResponse()
|
|
748
|
+
r.results["Ok"].CopyFrom(map_arg(False))
|
|
749
|
+
r.results["Error"].CopyFrom(map_arg(f"Route/session lookup failed: {e}"))
|
|
750
|
+
return r
|
|
751
|
+
|
|
752
|
+
req_id = secrets.token_hex(12)
|
|
753
|
+
fut: Future = Future()
|
|
754
|
+
|
|
755
|
+
with sess.inflight_lock:
|
|
756
|
+
sess.inflight[req_id] = fut
|
|
757
|
+
|
|
758
|
+
try:
|
|
759
|
+
gid_u32 = int(str(device_id)) # strict mode: device_id is canonical gid string
|
|
760
|
+
except Exception:
|
|
761
|
+
gid_u32 = 0
|
|
762
|
+
|
|
763
|
+
rpc_req = host_tunnel_pb2.RpcRequest(
|
|
764
|
+
req_id=req_id,
|
|
765
|
+
global_device_id=int(gid_u32),
|
|
766
|
+
local_device_id=int(route.host_local_id),
|
|
767
|
+
device_id=str(device_id),
|
|
768
|
+
deadline_unix_ms=int(deadline_unix_ms or 0),
|
|
769
|
+
)
|
|
770
|
+
rpc_req.request.CopyFrom(request)
|
|
771
|
+
|
|
772
|
+
frame = host_tunnel_pb2.HostFrame(rpc_request=rpc_req)
|
|
773
|
+
|
|
774
|
+
info(f"[forward] -> host_id={sess.host_id!r} req_id={req_id} local_id={route.host_local_id}")
|
|
775
|
+
|
|
776
|
+
if not sess.send(frame, timeout=2.0):
|
|
777
|
+
with sess.inflight_lock:
|
|
778
|
+
sess.inflight.pop(req_id, None)
|
|
779
|
+
|
|
780
|
+
r = generic_pb2.GenericRPCResponse()
|
|
781
|
+
r.results["Ok"].CopyFrom(map_arg(False))
|
|
782
|
+
r.results["Error"].CopyFrom(map_arg(f"Host '{sess.host_id}' outbound queue full / not writable."))
|
|
783
|
+
return r
|
|
784
|
+
|
|
785
|
+
t0 = time.time()
|
|
786
|
+
try:
|
|
787
|
+
resp: host_tunnel_pb2.RpcResponse = fut.result(timeout=timeout_sec)
|
|
788
|
+
except Exception as e:
|
|
789
|
+
dt_ms = int((time.time() - t0) * 1000)
|
|
790
|
+
warn(f"[forward] wait failed req_id={req_id} dt_ms={dt_ms} err={e!r}")
|
|
791
|
+
|
|
792
|
+
if cancel_on_timeout:
|
|
793
|
+
try:
|
|
794
|
+
ok = sess.send(host_tunnel_pb2.HostFrame(cancel=host_tunnel_pb2.Cancel(req_id=req_id)))
|
|
795
|
+
info(f"[forward] sent cancel req_id={req_id} ok={ok}")
|
|
796
|
+
except Exception:
|
|
797
|
+
exception(f"[forward] failed sending cancel req_id={req_id}")
|
|
798
|
+
|
|
799
|
+
with sess.inflight_lock:
|
|
800
|
+
sess.inflight.pop(req_id, None)
|
|
801
|
+
|
|
802
|
+
r = generic_pb2.GenericRPCResponse()
|
|
803
|
+
r.results["Ok"].CopyFrom(map_arg(False))
|
|
804
|
+
r.results["Error"].CopyFrom(map_arg(str(e)))
|
|
805
|
+
return r
|
|
806
|
+
|
|
807
|
+
with sess.inflight_lock:
|
|
808
|
+
sess.inflight.pop(req_id, None)
|
|
809
|
+
|
|
810
|
+
dt_ms = int((time.time() - t0) * 1000)
|
|
811
|
+
info(f"[forward] <- resp req_id={req_id} ok={bool(resp.ok)} dt_ms={dt_ms} err={str(resp.error or '')!r}")
|
|
812
|
+
|
|
813
|
+
if not resp.ok:
|
|
814
|
+
r = generic_pb2.GenericRPCResponse()
|
|
815
|
+
r.results["Ok"].CopyFrom(map_arg(False))
|
|
816
|
+
r.results["Error"].CopyFrom(map_arg(resp.error or "Remote host error."))
|
|
817
|
+
return r
|
|
818
|
+
|
|
819
|
+
return resp.response
|
|
820
|
+
|
|
821
|
+
def forward_request_by_host_device(
|
|
822
|
+
self,
|
|
823
|
+
*,
|
|
824
|
+
host_id: str,
|
|
825
|
+
device_id: str,
|
|
826
|
+
request: "generic_pb2.GenericRPCRequest",
|
|
827
|
+
timeout_sec: float,
|
|
828
|
+
deadline_unix_ms: int = 0,
|
|
829
|
+
cancel_on_timeout: bool = False,
|
|
830
|
+
) -> "generic_pb2.GenericRPCResponse":
|
|
831
|
+
with self._lock:
|
|
832
|
+
route = self._routes.get(str(device_id))
|
|
833
|
+
if route is None:
|
|
834
|
+
warn(f"[forward] unknown device_id={device_id!r}")
|
|
835
|
+
r = generic_pb2.GenericRPCResponse()
|
|
836
|
+
r.results["Ok"].CopyFrom(map_arg(False))
|
|
837
|
+
r.results["Error"].CopyFrom(map_arg(f"Unknown device_id={device_id}"))
|
|
838
|
+
return r
|
|
839
|
+
|
|
840
|
+
if route.host_id != host_id:
|
|
841
|
+
warn(f"[forward] device_id={device_id!r} not on host_id={host_id!r} (actual={route.host_id!r})")
|
|
842
|
+
r = generic_pb2.GenericRPCResponse()
|
|
843
|
+
r.results["Ok"].CopyFrom(map_arg(False))
|
|
844
|
+
r.results["Error"].CopyFrom(map_arg(f"device_id={device_id} is not on host_id={host_id}"))
|
|
845
|
+
return r
|
|
846
|
+
|
|
847
|
+
return self.forward_request(
|
|
848
|
+
device_id=device_id,
|
|
849
|
+
request=request,
|
|
850
|
+
timeout_sec=timeout_sec,
|
|
851
|
+
deadline_unix_ms=deadline_unix_ms,
|
|
852
|
+
cancel_on_timeout=cancel_on_timeout,
|
|
853
|
+
)
|
|
854
|
+
|
|
855
|
+
# Convenience forwarders
|
|
856
|
+
|
|
857
|
+
def handle_host_device(
|
|
858
|
+
registry: HostTunnelRegistry,
|
|
859
|
+
*,
|
|
860
|
+
host_id: str,
|
|
861
|
+
device_id: str,
|
|
862
|
+
function_name: str,
|
|
863
|
+
args: Any,
|
|
864
|
+
timeout_sec: float = 10.0,
|
|
865
|
+
) -> Dict[str, Any]:
|
|
866
|
+
info(f"[handle_host_device] host_id={host_id!r} device_id={device_id!r} fn={function_name!r}")
|
|
867
|
+
req = generic_pb2.GenericRPCRequest(function_name=str(function_name))
|
|
868
|
+
_copy_argument_map(req.args, args)
|
|
869
|
+
|
|
870
|
+
resp = registry.forward_request_by_host_device(
|
|
871
|
+
host_id=host_id,
|
|
872
|
+
device_id=device_id,
|
|
873
|
+
request=req,
|
|
874
|
+
timeout_sec=timeout_sec,
|
|
875
|
+
cancel_on_timeout=True,
|
|
876
|
+
)
|
|
877
|
+
return dict(resp.results)
|
|
878
|
+
|
|
879
|
+
def handle_host_device_request(
|
|
880
|
+
registry: HostTunnelRegistry,
|
|
881
|
+
*,
|
|
882
|
+
host_id: str,
|
|
883
|
+
device_id: str,
|
|
884
|
+
request: "generic_pb2.GenericRPCRequest",
|
|
885
|
+
timeout_sec: float = 10.0,
|
|
886
|
+
) -> "generic_pb2.GenericRPCResponse":
|
|
887
|
+
info(
|
|
888
|
+
f"[handle_host_device_request] host_id={host_id!r} device_id={device_id!r} "
|
|
889
|
+
f"fn={str(getattr(request,'function_name','') or '')!r}"
|
|
890
|
+
)
|
|
891
|
+
return registry.forward_request_by_host_device(
|
|
892
|
+
host_id=host_id,
|
|
893
|
+
device_id=device_id,
|
|
894
|
+
request=request,
|
|
895
|
+
timeout_sec=timeout_sec,
|
|
896
|
+
cancel_on_timeout=True,
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
# Servicer
|
|
900
|
+
|
|
901
|
+
class HostTunnelServicer(host_tunnel_pb2_grpc.HostTunnelServicer):
|
|
902
|
+
def __init__(self, registry: HostTunnelRegistry) -> None:
|
|
903
|
+
self.registry = registry
|
|
904
|
+
|
|
905
|
+
def Connect(self, request_iterator: Iterator[host_tunnel_pb2.HostFrame], context):
|
|
906
|
+
try:
|
|
907
|
+
peer = context.peer()
|
|
908
|
+
except Exception:
|
|
909
|
+
peer = "unknown-peer"
|
|
910
|
+
|
|
911
|
+
peer_tag = _safe_thread_tag(peer)
|
|
912
|
+
stream_uuid = uuid.uuid4().hex[:10]
|
|
913
|
+
info(f"[Connect] new stream peer={peer} stream_sess={stream_uuid}")
|
|
914
|
+
|
|
915
|
+
host_id: Optional[str] = None
|
|
916
|
+
session: Optional[HostSession] = None
|
|
917
|
+
stop = threading.Event()
|
|
918
|
+
fatal: Dict[str, str] = {} # {"code": "...", "details": "..."}
|
|
919
|
+
|
|
920
|
+
# IMPORTANT: only drop the host if THIS stream successfully registered it,
|
|
921
|
+
# and only if the registry still points at THIS session.
|
|
922
|
+
registered = False
|
|
923
|
+
|
|
924
|
+
cleanup_lock = threading.Lock()
|
|
925
|
+
cleanup_done = False
|
|
926
|
+
|
|
927
|
+
def _cleanup(reason: str, *, detail: str = "") -> None:
|
|
928
|
+
nonlocal cleanup_done
|
|
929
|
+
with cleanup_lock:
|
|
930
|
+
if cleanup_done:
|
|
931
|
+
return
|
|
932
|
+
cleanup_done = True
|
|
933
|
+
|
|
934
|
+
# stop pumps first
|
|
935
|
+
try:
|
|
936
|
+
stop.set()
|
|
937
|
+
except Exception:
|
|
938
|
+
pass
|
|
939
|
+
|
|
940
|
+
hid = host_id or "?"
|
|
941
|
+
det = (detail or "").strip()
|
|
942
|
+
if det:
|
|
943
|
+
warn(f"[host] offline host_id={hid!r} reason={reason} detail={det}")
|
|
944
|
+
else:
|
|
945
|
+
warn(f"[host] offline host_id={hid!r} reason={reason}")
|
|
946
|
+
|
|
947
|
+
# If we never registered, do NOT drop: it could be a conflict with a real live host.
|
|
948
|
+
try:
|
|
949
|
+
if registered and host_id is not None and session is not None:
|
|
950
|
+
self.registry.drop_host_if_match(host_id, session)
|
|
951
|
+
except Exception:
|
|
952
|
+
pass
|
|
953
|
+
|
|
954
|
+
# close session object (no-op if already closed)
|
|
955
|
+
try:
|
|
956
|
+
if session is not None:
|
|
957
|
+
session.close()
|
|
958
|
+
except Exception:
|
|
959
|
+
pass
|
|
960
|
+
|
|
961
|
+
def _on_rpc_done():
|
|
962
|
+
try:
|
|
963
|
+
_cleanup("grpc-stream-done")
|
|
964
|
+
except Exception:
|
|
965
|
+
pass
|
|
966
|
+
|
|
967
|
+
try:
|
|
968
|
+
context.add_callback(_on_rpc_done)
|
|
969
|
+
except Exception:
|
|
970
|
+
pass
|
|
971
|
+
|
|
972
|
+
def inbound_loop():
|
|
973
|
+
nonlocal host_id, session, registered
|
|
974
|
+
info(f"[inbound] start peer={peer} stream_sess={stream_uuid}")
|
|
975
|
+
|
|
976
|
+
try:
|
|
977
|
+
for frame in request_iterator:
|
|
978
|
+
which = frame.WhichOneof("msg")
|
|
979
|
+
if which is None:
|
|
980
|
+
warn(f"[inbound] frame with no msg peer={peer} stream_sess={stream_uuid}")
|
|
981
|
+
continue
|
|
982
|
+
|
|
983
|
+
info(f"[inbound] IN type={which} peer={peer} stream_sess={stream_uuid} host_id={host_id or '?'}")
|
|
984
|
+
|
|
985
|
+
if which == "hello":
|
|
986
|
+
# Guard: only accept one HELLO per stream.
|
|
987
|
+
if session is not None or host_id is not None:
|
|
988
|
+
warn(f"[inbound] duplicate HELLO ignored host_id={host_id!r} peer={peer} stream_sess={stream_uuid}")
|
|
989
|
+
continue
|
|
990
|
+
|
|
991
|
+
hello = frame.hello
|
|
992
|
+
raw_id = str(getattr(hello, "host_id", "") or "")
|
|
993
|
+
raw_tok = str(getattr(hello, "host_token", "") or "") # NEW FIELD
|
|
994
|
+
|
|
995
|
+
try:
|
|
996
|
+
host_id = _validate_host_id(raw_id)
|
|
997
|
+
except ValueError as e:
|
|
998
|
+
error(f"[inbound] invalid hello.host_id peer={peer} stream_sess={stream_uuid} err={e}")
|
|
999
|
+
fatal["code"] = "INVALID_ARGUMENT"
|
|
1000
|
+
fatal["details"] = str(e)
|
|
1001
|
+
stop.set()
|
|
1002
|
+
return
|
|
1003
|
+
|
|
1004
|
+
# Auth gate: MUST pass before we register the host_id.
|
|
1005
|
+
if not is_host_token_valid(host_id, raw_tok, require_status="approved"):
|
|
1006
|
+
code, details = _auth_fail_code_and_details(host_id)
|
|
1007
|
+
error(
|
|
1008
|
+
f"[inbound] auth failed host_id={host_id!r} peer={peer} stream_sess={stream_uuid} "
|
|
1009
|
+
f"code={code} details={details}"
|
|
1010
|
+
)
|
|
1011
|
+
fatal["code"] = code
|
|
1012
|
+
fatal["details"] = details
|
|
1013
|
+
stop.set()
|
|
1014
|
+
return
|
|
1015
|
+
|
|
1016
|
+
ver = str(getattr(hello, "version", "") or "")
|
|
1017
|
+
info(f"[inbound] HELLO host_id={host_id!r} version={ver!r} peer={peer} stream_sess={stream_uuid}")
|
|
1018
|
+
|
|
1019
|
+
# ONE session per stream
|
|
1020
|
+
session = HostSession(host_id, peer=peer, session_uuid=stream_uuid)
|
|
1021
|
+
|
|
1022
|
+
# ONE register per stream
|
|
1023
|
+
try:
|
|
1024
|
+
self.registry.register_host(host_id, session, replace_if_stale_ms=15_000)
|
|
1025
|
+
registered = True
|
|
1026
|
+
except HostIdConflictError as e:
|
|
1027
|
+
error(f"[inbound] HostIdConflictError host_id={host_id!r} stream_sess={stream_uuid} err={e}")
|
|
1028
|
+
fatal["code"] = "ALREADY_EXISTS"
|
|
1029
|
+
fatal["details"] = str(e)
|
|
1030
|
+
try:
|
|
1031
|
+
session.close()
|
|
1032
|
+
except Exception:
|
|
1033
|
+
pass
|
|
1034
|
+
stop.set()
|
|
1035
|
+
return
|
|
1036
|
+
|
|
1037
|
+
self.registry.persist_host_meta_from_hello(hello)
|
|
1038
|
+
|
|
1039
|
+
# Ask host for metadata immediately on join
|
|
1040
|
+
try:
|
|
1041
|
+
mrid = secrets.token_hex(12)
|
|
1042
|
+
ok = session.send(
|
|
1043
|
+
host_tunnel_pb2.HostFrame(
|
|
1044
|
+
meta_request=host_tunnel_pb2.MetaRequest(
|
|
1045
|
+
req_id=mrid,
|
|
1046
|
+
include_platform=True,
|
|
1047
|
+
include_env=True,
|
|
1048
|
+
include_devices=True,
|
|
1049
|
+
)
|
|
1050
|
+
)
|
|
1051
|
+
)
|
|
1052
|
+
info(
|
|
1053
|
+
f"[inbound] sent MetaRequest req_id={mrid} ok={ok} "
|
|
1054
|
+
f"host_id={host_id!r} stream_sess={stream_uuid}"
|
|
1055
|
+
)
|
|
1056
|
+
except Exception:
|
|
1057
|
+
exception("[inbound] FAILED sending MetaRequest")
|
|
1058
|
+
|
|
1059
|
+
# heartbeat ack
|
|
1060
|
+
try:
|
|
1061
|
+
ok = session.send(
|
|
1062
|
+
host_tunnel_pb2.HostFrame(
|
|
1063
|
+
heartbeat=host_tunnel_pb2.Heartbeat(unix_ms=now_ms())
|
|
1064
|
+
)
|
|
1065
|
+
)
|
|
1066
|
+
info(
|
|
1067
|
+
f"[inbound] sent heartbeat ack ok={ok} "
|
|
1068
|
+
f"host_id={host_id!r} stream_sess={stream_uuid}"
|
|
1069
|
+
)
|
|
1070
|
+
except Exception:
|
|
1071
|
+
exception("[inbound] FAILED sending heartbeat ack")
|
|
1072
|
+
|
|
1073
|
+
elif which == "device_announce":
|
|
1074
|
+
if session is None or host_id is None:
|
|
1075
|
+
warn(f"[inbound] device_announce before hello/session: ignoring peer={peer} stream_sess={stream_uuid}")
|
|
1076
|
+
continue
|
|
1077
|
+
|
|
1078
|
+
ann = frame.device_announce
|
|
1079
|
+
n = len(getattr(ann, "devices", []))
|
|
1080
|
+
info(
|
|
1081
|
+
f"[inbound] DEVICE_ANNOUNCE host_id={host_id!r} n={n} stream_sess={stream_uuid} "
|
|
1082
|
+
f"unix_ms={int(getattr(ann,'unix_ms',0) or 0)} full_snapshot={bool(getattr(ann,'full_snapshot',False))}"
|
|
1083
|
+
)
|
|
1084
|
+
|
|
1085
|
+
for d in list(ann.devices):
|
|
1086
|
+
info(
|
|
1087
|
+
f"[inbound] device local_id={int(getattr(d,'local_id',0))} "
|
|
1088
|
+
f"label={str(getattr(d,'label','') or '')!r} "
|
|
1089
|
+
f"device_id={str(getattr(d,'device_id','') or '')!r} "
|
|
1090
|
+
f"serial={str(getattr(d,'serial','') or '')!r} "
|
|
1091
|
+
f"kind={str(getattr(d,'kind','') or '')!r}"
|
|
1092
|
+
)
|
|
1093
|
+
|
|
1094
|
+
try:
|
|
1095
|
+
self.registry.announce_devices(host_id, list(ann.devices))
|
|
1096
|
+
info(f"[inbound] registry announce applied host_id={host_id!r} stream_sess={stream_uuid}")
|
|
1097
|
+
except ValueError as e:
|
|
1098
|
+
error(
|
|
1099
|
+
f"[inbound] INVALID_ARGUMENT device announce host_id={host_id!r} "
|
|
1100
|
+
f"stream_sess={stream_uuid} err={e}"
|
|
1101
|
+
)
|
|
1102
|
+
fatal["code"] = "INVALID_ARGUMENT"
|
|
1103
|
+
fatal["details"] = str(e)
|
|
1104
|
+
stop.set()
|
|
1105
|
+
return
|
|
1106
|
+
except DeviceIdConflictError as e:
|
|
1107
|
+
error(f"[inbound] DeviceIdConflictError host_id={host_id!r} stream_sess={stream_uuid} err={e}")
|
|
1108
|
+
fatal["code"] = "ALREADY_EXISTS"
|
|
1109
|
+
fatal["details"] = str(e)
|
|
1110
|
+
stop.set()
|
|
1111
|
+
return
|
|
1112
|
+
except Exception:
|
|
1113
|
+
exception(f"[inbound] announce_devices crashed host_id={host_id!r} stream_sess={stream_uuid}")
|
|
1114
|
+
|
|
1115
|
+
# heartbeat ack after announce
|
|
1116
|
+
try:
|
|
1117
|
+
ok = session.send(
|
|
1118
|
+
host_tunnel_pb2.HostFrame(
|
|
1119
|
+
heartbeat=host_tunnel_pb2.Heartbeat(unix_ms=now_ms())
|
|
1120
|
+
)
|
|
1121
|
+
)
|
|
1122
|
+
info(
|
|
1123
|
+
f"[inbound] sent heartbeat ack after announce ok={ok} "
|
|
1124
|
+
f"host_id={host_id!r} stream_sess={stream_uuid}"
|
|
1125
|
+
)
|
|
1126
|
+
except Exception:
|
|
1127
|
+
exception("[inbound] FAILED sending heartbeat ack after announce")
|
|
1128
|
+
|
|
1129
|
+
elif which == "rpc_response":
|
|
1130
|
+
if session is None:
|
|
1131
|
+
warn(f"[inbound] rpc_response before session: ignoring peer={peer} stream_sess={stream_uuid}")
|
|
1132
|
+
continue
|
|
1133
|
+
|
|
1134
|
+
resp = frame.rpc_response
|
|
1135
|
+
rid = str(getattr(resp, "req_id", "") or "")
|
|
1136
|
+
ok = bool(getattr(resp, "ok", False))
|
|
1137
|
+
err_s = str(getattr(resp, "error", "") or "")
|
|
1138
|
+
info(f"[inbound] RPC_RESPONSE req_id={rid} ok={ok} error={err_s!r} stream_sess={stream_uuid}")
|
|
1139
|
+
|
|
1140
|
+
with session.inflight_lock:
|
|
1141
|
+
fut = session.inflight.get(rid)
|
|
1142
|
+
|
|
1143
|
+
if fut is not None and not fut.done():
|
|
1144
|
+
fut.set_result(resp)
|
|
1145
|
+
info(f"[inbound] delivered rpc_response to waiter req_id={rid} stream_sess={stream_uuid}")
|
|
1146
|
+
else:
|
|
1147
|
+
warn(f"[inbound] rpc_response has no inflight waiter req_id={rid} stream_sess={stream_uuid}")
|
|
1148
|
+
|
|
1149
|
+
elif which == "meta_response":
|
|
1150
|
+
# NEW proto: MetaResponse.meta.devices
|
|
1151
|
+
if host_id is None or session is None:
|
|
1152
|
+
warn(f"[inbound] meta_response before hello/session: ignoring peer={peer} stream_sess={stream_uuid}")
|
|
1153
|
+
continue
|
|
1154
|
+
|
|
1155
|
+
mr = frame.meta_response
|
|
1156
|
+
ok = bool(getattr(mr, "ok", False))
|
|
1157
|
+
err_s = str(getattr(mr, "error", "") or "")
|
|
1158
|
+
info(f"[inbound] META_RESPONSE ok={ok} error={err_s!r} stream_sess={stream_uuid}")
|
|
1159
|
+
|
|
1160
|
+
if ok:
|
|
1161
|
+
try:
|
|
1162
|
+
meta = getattr(mr, "meta", None)
|
|
1163
|
+
devs = list(getattr(meta, "devices", []) or [])
|
|
1164
|
+
except Exception:
|
|
1165
|
+
devs = []
|
|
1166
|
+
|
|
1167
|
+
if devs:
|
|
1168
|
+
info(f"[inbound] META_RESPONSE applying {len(devs)} devices to registry host_id={host_id!r}")
|
|
1169
|
+
try:
|
|
1170
|
+
self.registry.announce_devices(host_id, devs)
|
|
1171
|
+
self.registry.heartbeat(host_id, now_ms())
|
|
1172
|
+
except ValueError as e:
|
|
1173
|
+
error(f"[inbound] META_RESPONSE INVALID_ARGUMENT host_id={host_id!r} err={e}")
|
|
1174
|
+
fatal["code"] = "INVALID_ARGUMENT"
|
|
1175
|
+
fatal["details"] = str(e)
|
|
1176
|
+
stop.set()
|
|
1177
|
+
return
|
|
1178
|
+
except DeviceIdConflictError as e:
|
|
1179
|
+
error(f"[inbound] META_RESPONSE DeviceIdConflictError host_id={host_id!r} err={e}")
|
|
1180
|
+
fatal["code"] = "ALREADY_EXISTS"
|
|
1181
|
+
fatal["details"] = str(e)
|
|
1182
|
+
stop.set()
|
|
1183
|
+
return
|
|
1184
|
+
except Exception:
|
|
1185
|
+
exception("[inbound] META_RESPONSE announce_devices failed")
|
|
1186
|
+
else:
|
|
1187
|
+
info("[inbound] META_RESPONSE had no meta.devices (or empty)")
|
|
1188
|
+
|
|
1189
|
+
elif which == "heartbeat":
|
|
1190
|
+
if session is not None and host_id is not None:
|
|
1191
|
+
ms = int(getattr(frame.heartbeat, "unix_ms", 0) or 0) or now_ms()
|
|
1192
|
+
session.last_heartbeat_ms = ms
|
|
1193
|
+
self.registry.heartbeat(host_id, ms)
|
|
1194
|
+
info(f"[inbound] HEARTBEAT host_id={host_id!r} unix_ms={ms} stream_sess={stream_uuid}")
|
|
1195
|
+
else:
|
|
1196
|
+
warn(
|
|
1197
|
+
f"[inbound] heartbeat before hello/session: ignoring peer={peer} "
|
|
1198
|
+
f"stream_sess={stream_uuid} host_id={(host_id or '?')!r}"
|
|
1199
|
+
)
|
|
1200
|
+
|
|
1201
|
+
elif which == "cancel":
|
|
1202
|
+
try:
|
|
1203
|
+
rid = str(getattr(frame.cancel, "req_id", "") or "")
|
|
1204
|
+
warn(f"[inbound] CANCEL received from host?? req_id={rid} peer={peer} stream_sess={stream_uuid}")
|
|
1205
|
+
except Exception:
|
|
1206
|
+
exception("[inbound] cancel frame parse failed")
|
|
1207
|
+
|
|
1208
|
+
else:
|
|
1209
|
+
warn(f"[inbound] unknown frame type={which} peer={peer} stream_sess={stream_uuid}")
|
|
1210
|
+
|
|
1211
|
+
except grpc.RpcError as e:
|
|
1212
|
+
warn(
|
|
1213
|
+
f"[inbound] stream ended peer={peer} stream_sess={stream_uuid} "
|
|
1214
|
+
f"host_id={host_id or '?'} grpc={_grpc_err_summary(e)}"
|
|
1215
|
+
)
|
|
1216
|
+
except Exception:
|
|
1217
|
+
exception(f"[inbound] loop crashed peer={peer} stream_sess={stream_uuid} host_id={host_id or '?'}")
|
|
1218
|
+
finally:
|
|
1219
|
+
info(f"[inbound] exiting peer={peer} stream_sess={stream_uuid} host_id={host_id or '?'}")
|
|
1220
|
+
_cleanup("inbound-exit")
|
|
1221
|
+
|
|
1222
|
+
t = threading.Thread(target=inbound_loop, daemon=True, name=f"inbound_{peer_tag}")
|
|
1223
|
+
t.start()
|
|
1224
|
+
|
|
1225
|
+
info(f"[Connect] outbound pump starting peer={peer} stream_sess={stream_uuid}")
|
|
1226
|
+
|
|
1227
|
+
printed_waiting = False
|
|
1228
|
+
try:
|
|
1229
|
+
while not stop.is_set():
|
|
1230
|
+
if not context.is_active():
|
|
1231
|
+
_cleanup("context-inactive")
|
|
1232
|
+
break
|
|
1233
|
+
|
|
1234
|
+
if fatal:
|
|
1235
|
+
code = fatal.get("code", "")
|
|
1236
|
+
details = fatal.get("details", "fatal")
|
|
1237
|
+
error(f"[Connect] aborting stream peer={peer} stream_sess={stream_uuid} code={code} details={details}")
|
|
1238
|
+
_cleanup(f"fatal:{code}", detail=details)
|
|
1239
|
+
|
|
1240
|
+
if code == "ALREADY_EXISTS":
|
|
1241
|
+
context.abort(grpc.StatusCode.ALREADY_EXISTS, details)
|
|
1242
|
+
if code == "INVALID_ARGUMENT":
|
|
1243
|
+
context.abort(grpc.StatusCode.INVALID_ARGUMENT, details)
|
|
1244
|
+
if code == "UNAUTHENTICATED":
|
|
1245
|
+
context.abort(grpc.StatusCode.UNAUTHENTICATED, details)
|
|
1246
|
+
if code == "PERMISSION_DENIED":
|
|
1247
|
+
context.abort(grpc.StatusCode.PERMISSION_DENIED, details)
|
|
1248
|
+
|
|
1249
|
+
context.abort(grpc.StatusCode.UNKNOWN, details)
|
|
1250
|
+
|
|
1251
|
+
if session is None:
|
|
1252
|
+
if not printed_waiting:
|
|
1253
|
+
info(f"[Connect] waiting for HELLO peer={peer} stream_sess={stream_uuid}")
|
|
1254
|
+
printed_waiting = True
|
|
1255
|
+
time.sleep(0.05)
|
|
1256
|
+
continue
|
|
1257
|
+
|
|
1258
|
+
try:
|
|
1259
|
+
out = session.out_q.get(timeout=0.5)
|
|
1260
|
+
except queue.Empty:
|
|
1261
|
+
continue
|
|
1262
|
+
except Exception:
|
|
1263
|
+
exception(
|
|
1264
|
+
f"[Connect] outbound queue get failed peer={peer} stream_sess={stream_uuid} "
|
|
1265
|
+
f"host_id={host_id or '?'}"
|
|
1266
|
+
)
|
|
1267
|
+
continue
|
|
1268
|
+
|
|
1269
|
+
if out is None:
|
|
1270
|
+
info(f"[Connect] outbound sentinel -> end stream peer={peer} stream_sess={stream_uuid} host_id={host_id or '?'}")
|
|
1271
|
+
_cleanup("outbound-sentinel")
|
|
1272
|
+
break
|
|
1273
|
+
|
|
1274
|
+
out_type = out.WhichOneof("msg")
|
|
1275
|
+
info(f"[Connect] OUT type={out_type} peer={peer} stream_sess={stream_uuid} host_id={host_id or '?'}")
|
|
1276
|
+
yield out
|
|
1277
|
+
|
|
1278
|
+
except grpc.RpcError as e:
|
|
1279
|
+
warn(
|
|
1280
|
+
f"[Connect] outbound ended peer={peer} stream_sess={stream_uuid} "
|
|
1281
|
+
f"host_id={host_id or '?'} grpc={_grpc_err_summary(e)}"
|
|
1282
|
+
)
|
|
1283
|
+
_cleanup("outbound-grpc", detail=_grpc_err_summary(e))
|
|
1284
|
+
except GeneratorExit:
|
|
1285
|
+
_cleanup("generator-exit")
|
|
1286
|
+
raise
|
|
1287
|
+
finally:
|
|
1288
|
+
_cleanup("connect-finally")
|
|
1289
|
+
info(f"[Connect] stream ended peer={peer} stream_sess={stream_uuid} host_id={host_id or '?'}")
|
|
1290
|
+
|
|
1291
|
+
# Credentials + server start
|
|
1292
|
+
|
|
1293
|
+
def build_server_credentials(
|
|
1294
|
+
*,
|
|
1295
|
+
private_key_pem: bytes,
|
|
1296
|
+
certificate_chain_pem: bytes,
|
|
1297
|
+
client_ca_pem: Optional[bytes] = None,
|
|
1298
|
+
require_client_auth: bool = True,
|
|
1299
|
+
) -> grpc.ServerCredentials:
|
|
1300
|
+
info(f"[creds] build_server_credentials client_ca={'yes' if bool(client_ca_pem) else 'no'} require_client_auth={require_client_auth}")
|
|
1301
|
+
if client_ca_pem:
|
|
1302
|
+
return grpc.ssl_server_credentials(
|
|
1303
|
+
((private_key_pem, certificate_chain_pem,),),
|
|
1304
|
+
root_certificates=client_ca_pem,
|
|
1305
|
+
require_client_auth=require_client_auth,
|
|
1306
|
+
)
|
|
1307
|
+
return grpc.ssl_server_credentials(((private_key_pem, certificate_chain_pem),))
|
|
1308
|
+
|
|
1309
|
+
|
|
1310
|
+
def start_host_tunnel_server(
|
|
1311
|
+
*,
|
|
1312
|
+
host: str,
|
|
1313
|
+
port: int,
|
|
1314
|
+
server_credentials: grpc.ServerCredentials,
|
|
1315
|
+
registry: Optional[HostTunnelRegistry] = None,
|
|
1316
|
+
max_workers: int = 32,
|
|
1317
|
+
) -> Tuple[grpc.Server, threading.Thread, HostTunnelRegistry]:
|
|
1318
|
+
if registry is not None:
|
|
1319
|
+
reg = set_tunnel_registry(registry)
|
|
1320
|
+
else:
|
|
1321
|
+
reg = get_tunnel_registry(create=True)
|
|
1322
|
+
assert reg is not None
|
|
1323
|
+
|
|
1324
|
+
options = [
|
|
1325
|
+
("grpc.max_send_message_length", 100 * 1024 * 1024),
|
|
1326
|
+
("grpc.max_receive_message_length", 100 * 1024 * 1024),
|
|
1327
|
+
("grpc.keepalive_time_ms", 30_000),
|
|
1328
|
+
("grpc.keepalive_timeout_ms", 10_000),
|
|
1329
|
+
("grpc.http2.max_pings_without_data", 0),
|
|
1330
|
+
("grpc.keepalive_permit_without_calls", 1),
|
|
1331
|
+
]
|
|
1332
|
+
|
|
1333
|
+
bind_addr = f"{host}:{int(port)}"
|
|
1334
|
+
info(f"[server] creating grpc.server max_workers={max_workers} bind={bind_addr}")
|
|
1335
|
+
|
|
1336
|
+
server = grpc.server(
|
|
1337
|
+
futures.ThreadPoolExecutor(max_workers=max_workers),
|
|
1338
|
+
options=options,
|
|
1339
|
+
)
|
|
1340
|
+
|
|
1341
|
+
host_tunnel_pb2_grpc.add_HostTunnelServicer_to_server(HostTunnelServicer(reg), server)
|
|
1342
|
+
|
|
1343
|
+
try:
|
|
1344
|
+
added = server.add_secure_port(bind_addr, server_credentials)
|
|
1345
|
+
info(f"[server] add_secure_port addr={bind_addr} -> {added}")
|
|
1346
|
+
except Exception:
|
|
1347
|
+
exception(f"[server] add_secure_port failed addr={bind_addr}")
|
|
1348
|
+
raise
|
|
1349
|
+
|
|
1350
|
+
def run():
|
|
1351
|
+
try:
|
|
1352
|
+
info(f"[server] starting on {bind_addr}")
|
|
1353
|
+
server.start()
|
|
1354
|
+
info(f"[server] started on {bind_addr} (waiting for termination)")
|
|
1355
|
+
server.wait_for_termination()
|
|
1356
|
+
info("[server] wait_for_termination returned")
|
|
1357
|
+
except Exception:
|
|
1358
|
+
exception("[server] run() crashed")
|
|
1359
|
+
raise
|
|
1360
|
+
|
|
1361
|
+
th = threading.Thread(target=run, daemon=True, name="host_tunnel_server")
|
|
1362
|
+
th.start()
|
|
1363
|
+
return server, th, reg
|
|
1364
|
+
|
|
1365
|
+
# Singleton registry (process-wide)
|
|
1366
|
+
|
|
1367
|
+
_TUNNEL_REGISTRY_LOCK = threading.Lock()
|
|
1368
|
+
TUNNEL_REGISTRY: Optional["HostTunnelRegistry"] = None
|
|
1369
|
+
|
|
1370
|
+
def get_tunnel_registry(*, create: bool = True) -> Optional["HostTunnelRegistry"]:
|
|
1371
|
+
global TUNNEL_REGISTRY
|
|
1372
|
+
|
|
1373
|
+
if TUNNEL_REGISTRY is not None:
|
|
1374
|
+
return TUNNEL_REGISTRY
|
|
1375
|
+
if not create:
|
|
1376
|
+
return None
|
|
1377
|
+
|
|
1378
|
+
with _TUNNEL_REGISTRY_LOCK:
|
|
1379
|
+
if TUNNEL_REGISTRY is None:
|
|
1380
|
+
TUNNEL_REGISTRY = HostTunnelRegistry()
|
|
1381
|
+
return TUNNEL_REGISTRY
|
|
1382
|
+
|
|
1383
|
+
def set_tunnel_registry(reg: "HostTunnelRegistry") -> "HostTunnelRegistry":
|
|
1384
|
+
"""Force the singleton to a specific instance."""
|
|
1385
|
+
global TUNNEL_REGISTRY
|
|
1386
|
+
with _TUNNEL_REGISTRY_LOCK:
|
|
1387
|
+
TUNNEL_REGISTRY = reg
|
|
1388
|
+
return reg
|