observability-agent 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: observability-agent
3
+ Version: 0.1.3
4
+ Summary: Opentrons observability relay agent
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27.0
@@ -0,0 +1,6 @@
1
+ Metadata-Version: 2.4
2
+ Name: observability-agent
3
+ Version: 0.1.3
4
+ Summary: Opentrons observability relay agent
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27.0
@@ -0,0 +1,8 @@
1
+ pyproject.toml
2
+ run_agent.py
3
+ observability_agent.egg-info/PKG-INFO
4
+ observability_agent.egg-info/SOURCES.txt
5
+ observability_agent.egg-info/dependency_links.txt
6
+ observability_agent.egg-info/entry_points.txt
7
+ observability_agent.egg-info/requires.txt
8
+ observability_agent.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ observability-agent = run_agent:main
@@ -0,0 +1,16 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "observability-agent"
7
+ version = "0.1.3"
8
+ description = "Opentrons observability relay agent"
9
+ requires-python = ">=3.10"
10
+ dependencies = ["httpx>=0.27.0"]
11
+
12
+ [project.scripts]
13
+ observability-agent = "run_agent:main"
14
+
15
+ [tool.setuptools]
16
+ py-modules = ["run_agent"]
@@ -0,0 +1,367 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Local relay agent: polls Opentrons robot(s) on the lab network and POSTs telemetry to the cloud.
4
+ Supports HTTP and HTTPS per robot (e.g. 198.51.100.73 and 203.0.113.198 over HTTPS, localhost over HTTP).
5
+
6
+ Usage:
7
+ python run_agent.py --lab-id=LAB_ID --agent-token=TOKEN --backend-url=https://your-api.com
8
+ python run_agent.py --config=agent_config.json
9
+
10
+ Robot addresses for production come from the cloud app (Fleet Manager): the agent calls
11
+ GET /api/agent/robot-poll-targets. Use --local-robots (or use_local_robots in JSON) only
12
+ for development without the cloud UI.
13
+
14
+ Example agent_config.json (production — no robots section):
15
+ {
16
+ "lab_id": "abc123",
17
+ "agent_token": "your-token",
18
+ "backend_url": "https://your-api.com",
19
+ "robot_poll_interval_seconds": 5
20
+ }
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import json
27
+ import logging
28
+ import os
29
+ import sys
30
+ import time
31
+ from pathlib import Path
32
+
33
+ import httpx
34
+
35
+ # Default robots for periodic check-ins: two over HTTPS, localhost over HTTP
36
+ DEFAULT_ROBOTS = [
37
+ {"ip": "198.51.100.73", "scheme": "https", "port": 31950},
38
+ {"ip": "203.0.113.198", "scheme": "https", "port": 31950},
39
+ {"ip": "localhost", "scheme": "http", "port": 31950},
40
+ ]
41
+
42
+ ROBOT_TIMEOUT = 10.0
43
+ BACKEND_TIMEOUT = 30.0
44
+ MIN_BACKOFF = 5.0
45
+ MAX_BACKOFF = 60.0
46
+ # How often to refresh robot list from the cloud (when not using --local-robots).
47
+ TARGETS_REFRESH_SECONDS = 30.0
48
+
49
+ logging.basicConfig(
50
+ level=logging.INFO,
51
+ format="%(asctime)s [%(levelname)s] %(message)s",
52
+ datefmt="%Y-%m-%d %H:%M:%S",
53
+ )
54
+ log = logging.getLogger("agent")
55
+
56
+
57
+ def _url(ip: str, path: str, scheme: str = "http", port: int = 31950) -> str:
58
+ path = path.strip("/")
59
+ return f"{scheme}://{ip}:{port}/{path}"
60
+
61
+
62
+ def fetch_robot_telemetry(
63
+ ip: str,
64
+ scheme: str = "http",
65
+ port: int = 31950,
66
+ timeout: float = ROBOT_TIMEOUT,
67
+ ) -> dict | None:
68
+ """Fetch health, runs, and logs from one robot. Returns dict for payload or None on failure."""
69
+ headers = {"Content-Type": "application/json", "Opentrons-Version": "*"}
70
+ out = {"ip": ip, "health": None, "runs": None, "logs": None, "serial": None}
71
+ try:
72
+ with httpx.Client(timeout=timeout) as client:
73
+ # Health
74
+ r = client.get(_url(ip, "health", scheme, port), headers=headers)
75
+ if r.status_code == 200:
76
+ out["health"] = {
77
+ "name": r.headers.get("name"),
78
+ "date": r.headers.get("date"),
79
+ "logs": r.headers.get("logs"),
80
+ "serial_number": r.headers.get("serial_number"),
81
+ "status": r.headers.get("status"),
82
+ "health_data": r.headers.get("health_data"),
83
+ }
84
+ out["serial"] = r.headers.get("serial_number")
85
+ # Runs
86
+ r = client.get(_url(ip, "runs", scheme, port), headers=headers)
87
+ if r.status_code == 200:
88
+ try:
89
+ out["runs"] = r.json()
90
+ except Exception:
91
+ out["runs"] = {}
92
+ # Logs
93
+ r = client.get(_url(ip, "logs", scheme, port), headers=headers)
94
+ if r.status_code == 200:
95
+ out["logs"] = r.headers.get("logs") or ""
96
+ except Exception as e:
97
+ log.warning("Robot %s (%s): %s", ip, scheme, e)
98
+ return None
99
+ return out
100
+
101
+
102
+ def build_telemetry_payload(robots_config: list, timeout: float = ROBOT_TIMEOUT) -> list:
103
+ """Build list of robot telemetry dicts for POST body."""
104
+ payload_robots = []
105
+ for r in robots_config:
106
+ if isinstance(r, str):
107
+ ip, scheme, port = r.strip(), "http", 31950
108
+ else:
109
+ ip = (r.get("ip") or "").strip()
110
+ scheme = (r.get("scheme") or "http").lower()
111
+ port = int(r.get("port") or 31950)
112
+ if not ip:
113
+ continue
114
+ data = fetch_robot_telemetry(ip, scheme=scheme, port=port, timeout=timeout)
115
+ if data is None:
116
+ continue
117
+ payload_robots.append({
118
+ "ip": ip,
119
+ "robot_id": data.get("serial"),
120
+ "serial": data.get("serial"),
121
+ "health": data.get("health"),
122
+ "runs": data.get("runs"),
123
+ "logs": data.get("logs"),
124
+ })
125
+ return payload_robots
126
+
127
+
128
+ def fetch_robot_poll_targets(
129
+ backend_url: str,
130
+ agent_token: str,
131
+ timeout: float = BACKEND_TIMEOUT,
132
+ ) -> list[dict] | None:
133
+ """GET poll targets from cloud. Returns None on HTTP/network failure."""
134
+ url = f"{backend_url.rstrip('/')}/api/agent/robot-poll-targets"
135
+ headers = {
136
+ "Authorization": f"Bearer {agent_token}",
137
+ "Accept": "application/json",
138
+ }
139
+ try:
140
+ with httpx.Client(timeout=timeout) as client:
141
+ r = client.get(url, headers=headers)
142
+ if r.status_code != 200:
143
+ log.error("GET robot-poll-targets %s: %s", r.status_code, r.text[:200])
144
+ return None
145
+ data = r.json()
146
+ robots = data.get("robots")
147
+ if not isinstance(robots, list):
148
+ return None
149
+ out: list[dict] = []
150
+ for item in robots:
151
+ if not isinstance(item, dict):
152
+ continue
153
+ ip = (item.get("ip") or "").strip()
154
+ if not ip:
155
+ continue
156
+ scheme = (item.get("scheme") or "http").lower()
157
+ if scheme not in ("http", "https"):
158
+ scheme = "http"
159
+ try:
160
+ port = int(item.get("port") or 31950)
161
+ except (TypeError, ValueError):
162
+ port = 31950
163
+ out.append({"ip": ip, "scheme": scheme, "port": port})
164
+ return out
165
+ except Exception as e:
166
+ log.error("GET robot-poll-targets failed: %s", e)
167
+ return None
168
+
169
+
170
+ def post_telemetry(
171
+ backend_url: str,
172
+ agent_token: str,
173
+ lab_id: str,
174
+ robots: list,
175
+ timeout: float = BACKEND_TIMEOUT,
176
+ ) -> bool:
177
+ """POST telemetry to cloud. Returns True on success."""
178
+ url = f"{backend_url.rstrip('/')}/api/agent/telemetry"
179
+ headers = {
180
+ "Authorization": f"Bearer {agent_token}",
181
+ "Content-Type": "application/json",
182
+ }
183
+ body = {"lab_id": lab_id, "robots": robots}
184
+ try:
185
+ with httpx.Client(timeout=timeout) as client:
186
+ r = client.post(url, json=body, headers=headers)
187
+ if r.status_code in (200, 201):
188
+ return True
189
+ log.error("Backend %s: %s %s", r.status_code, r.text[:200])
190
+ return False
191
+ except Exception as e:
192
+ log.error("Backend POST failed: %s", e)
193
+ return False
194
+
195
+
196
+ def load_config(path: str) -> dict:
197
+ with open(path, encoding="utf-8") as f:
198
+ data = json.load(f)
199
+ if not isinstance(data, dict):
200
+ raise ValueError("Config must be a JSON object")
201
+ robots = data.get("robots")
202
+ if robots is None:
203
+ robots = []
204
+ if not isinstance(robots, list):
205
+ robots = []
206
+ data["robots"] = robots
207
+ data.setdefault("robot_poll_interval_seconds", 5)
208
+ data.setdefault("backend_url", os.environ.get("BACKEND_URL", ""))
209
+ data.setdefault("lab_id", os.environ.get("LAB_ID", ""))
210
+ data.setdefault("agent_token", os.environ.get("AGENT_TOKEN", ""))
211
+ data.setdefault("use_local_robots", False)
212
+ return data
213
+
214
+
215
+ def _env_use_local_robots() -> bool:
216
+ v = os.environ.get("AGENT_USE_LOCAL_ROBOTS", "").strip().lower()
217
+ return v in ("1", "true", "yes")
218
+
219
+
220
+ def _config_use_local_robots(cfg: dict) -> bool:
221
+ v = cfg.get("use_local_robots")
222
+ if isinstance(v, bool):
223
+ return v
224
+ if isinstance(v, str):
225
+ return v.strip().lower() in ("1", "true", "yes")
226
+ return False
227
+
228
+
229
+ def main() -> int:
230
+ ap = argparse.ArgumentParser(description="Opentrons observability relay agent")
231
+ ap.add_argument("--lab-id", default=os.environ.get("LAB_ID"), help="Lab ID")
232
+ ap.add_argument("--agent-token", default=os.environ.get("AGENT_TOKEN"), help="Agent token")
233
+ ap.add_argument("--backend-url", default=os.environ.get("BACKEND_URL"), help="Cloud backend URL")
234
+ ap.add_argument("--robot-ips", help="With --local-robots: comma-separated robot IPs")
235
+ ap.add_argument("--config", help="Path to agent_config.json")
236
+ ap.add_argument("--interval", type=float, default=5, help="Poll interval in seconds")
237
+ ap.add_argument("--https-ips", help="With --local-robots: comma-separated IPs to use HTTPS")
238
+ ap.add_argument(
239
+ "--local-robots",
240
+ action="store_true",
241
+ help="Use robots from config/--robot-ips instead of the cloud (dev only; production uses Fleet Manager)",
242
+ )
243
+ args = ap.parse_args()
244
+
245
+ use_local = bool(args.local_robots)
246
+ if not use_local:
247
+ use_local = _env_use_local_robots()
248
+
249
+ if args.config:
250
+ config = load_config(args.config)
251
+ lab_id = config.get("lab_id") or args.lab_id
252
+ agent_token = config.get("agent_token") or args.agent_token
253
+ backend_url = config.get("backend_url") or args.backend_url
254
+ interval = float(config.get("robot_poll_interval_seconds", args.interval))
255
+ if not use_local:
256
+ use_local = _config_use_local_robots(config) or bool(args.local_robots)
257
+ if use_local:
258
+ robots_config = list(config.get("robots") or [])
259
+ if not robots_config:
260
+ robots_config = list(DEFAULT_ROBOTS)
261
+ else:
262
+ robots_config = []
263
+ else:
264
+ lab_id = args.lab_id
265
+ agent_token = args.agent_token
266
+ backend_url = args.backend_url
267
+ interval = args.interval
268
+ if use_local:
269
+ if args.robot_ips:
270
+ ips = [s.strip() for s in args.robot_ips.split(",") if s.strip()]
271
+ https_ips = set()
272
+ if args.https_ips:
273
+ https_ips = {s.strip() for s in args.https_ips.split(",") if s.strip()}
274
+ else:
275
+ https_ips = {"198.51.100.73", "203.0.113.198"}
276
+ robots_config = [
277
+ {"ip": ip, "scheme": "https" if ip in https_ips else "http", "port": 31950}
278
+ for ip in ips
279
+ ]
280
+ else:
281
+ robots_config = list(DEFAULT_ROBOTS)
282
+ else:
283
+ robots_config = []
284
+
285
+ if not lab_id or not agent_token or not backend_url:
286
+ log.error("Provide --lab-id, --agent-token, and --backend-url (or set LAB_ID, AGENT_TOKEN, BACKEND_URL)")
287
+ return 1
288
+
289
+ if use_local:
290
+ log.info(
291
+ "Lab %s; backend %s; LOCAL robots %s; interval %.1fs",
292
+ lab_id,
293
+ backend_url,
294
+ [r.get("ip") if isinstance(r, dict) else r for r in robots_config],
295
+ interval,
296
+ )
297
+ else:
298
+ log.info(
299
+ "Lab %s; backend %s; robot list from cloud (GET /api/agent/robot-poll-targets); interval %.1fs",
300
+ lab_id,
301
+ backend_url,
302
+ interval,
303
+ )
304
+
305
+ backoff = MIN_BACKOFF
306
+ cached_cloud_robots: list = []
307
+ last_targets_fetch = 0.0
308
+ have_cloud_targets_response = False
309
+
310
+ while True:
311
+ try:
312
+ if use_local:
313
+ active_robots = robots_config
314
+ else:
315
+ now = time.time()
316
+ if now - last_targets_fetch >= TARGETS_REFRESH_SECONDS:
317
+ fetched = fetch_robot_poll_targets(backend_url, agent_token)
318
+ last_targets_fetch = now
319
+ if fetched is not None:
320
+ cached_cloud_robots = fetched
321
+ have_cloud_targets_response = True
322
+ elif not cached_cloud_robots:
323
+ log.warning(
324
+ "Could not load robot list from cloud yet; retry in %.0fs",
325
+ TARGETS_REFRESH_SECONDS,
326
+ )
327
+ active_robots = cached_cloud_robots
328
+
329
+ if not use_local and not active_robots:
330
+ if have_cloud_targets_response:
331
+ log.warning(
332
+ "No robot addresses in the cloud for this lab. Add them in Fleet Manager (web app)."
333
+ )
334
+ else:
335
+ log.warning(
336
+ "Waiting for robot list from the cloud API (GET /api/agent/robot-poll-targets)."
337
+ )
338
+ time.sleep(interval)
339
+ continue
340
+
341
+ robots_payload = build_telemetry_payload(active_robots)
342
+ if not robots_payload:
343
+ log.warning("No robot data collected this cycle")
344
+ else:
345
+ ok = post_telemetry(backend_url, agent_token, lab_id, robots_payload)
346
+ if ok:
347
+ log.info("POST ok (%d robot(s))", len(robots_payload))
348
+ backoff = MIN_BACKOFF
349
+ else:
350
+ log.warning("POST failed; retry in %.0fs", backoff)
351
+ time.sleep(backoff)
352
+ backoff = min(backoff * 2, MAX_BACKOFF)
353
+ continue
354
+ except KeyboardInterrupt:
355
+ log.info("Stopping")
356
+ break
357
+ except Exception as e:
358
+ log.exception("Cycle error: %s", e)
359
+ time.sleep(backoff)
360
+ backoff = min(backoff * 2, MAX_BACKOFF)
361
+ continue
362
+ time.sleep(interval)
363
+ return 0
364
+
365
+
366
+ if __name__ == "__main__":
367
+ sys.exit(main())
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+