hypercli-sdk 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- c3/__init__.py +57 -0
- c3/billing.py +72 -0
- c3/client.py +60 -0
- c3/config.py +70 -0
- c3/files.py +386 -0
- c3/http.py +217 -0
- c3/instances.py +211 -0
- c3/job/__init__.py +24 -0
- c3/job/base.py +249 -0
- c3/job/comfyui.py +1469 -0
- c3/jobs.py +285 -0
- c3/logs.py +273 -0
- c3/renders.py +339 -0
- c3/user.py +37 -0
- hypercli_sdk-0.4.2.dist-info/METADATA +141 -0
- hypercli_sdk-0.4.2.dist-info/RECORD +17 -0
- hypercli_sdk-0.4.2.dist-info/WHEEL +4 -0
c3/jobs.py
ADDED
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
"""Jobs API"""
|
|
2
|
+
import base64
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import TYPE_CHECKING, Iterator
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
from .http import HTTPClient
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class Job:
|
|
12
|
+
job_id: str
|
|
13
|
+
job_key: str
|
|
14
|
+
state: str
|
|
15
|
+
gpu_type: str
|
|
16
|
+
gpu_count: int
|
|
17
|
+
region: str
|
|
18
|
+
interruptible: bool
|
|
19
|
+
price_per_hour: float
|
|
20
|
+
price_per_second: float
|
|
21
|
+
docker_image: str
|
|
22
|
+
runtime: int
|
|
23
|
+
hostname: str | None = None
|
|
24
|
+
created_at: float | None = None
|
|
25
|
+
started_at: float | None = None
|
|
26
|
+
completed_at: float | None = None
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def from_dict(cls, data: dict) -> "Job":
|
|
30
|
+
return cls(
|
|
31
|
+
job_id=data.get("job_id", ""),
|
|
32
|
+
job_key=data.get("job_key", ""),
|
|
33
|
+
state=data.get("state", ""),
|
|
34
|
+
gpu_type=data.get("gpu_type", ""),
|
|
35
|
+
gpu_count=data.get("gpu_count", 1),
|
|
36
|
+
region=data.get("region", ""),
|
|
37
|
+
interruptible=data.get("interruptible", True),
|
|
38
|
+
price_per_hour=data.get("price_per_hour", 0),
|
|
39
|
+
price_per_second=data.get("price_per_second", 0),
|
|
40
|
+
docker_image=data.get("docker_image", ""),
|
|
41
|
+
runtime=data.get("runtime", 0),
|
|
42
|
+
hostname=data.get("hostname"),
|
|
43
|
+
created_at=data.get("created_at"),
|
|
44
|
+
started_at=data.get("started_at"),
|
|
45
|
+
completed_at=data.get("completed_at"),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class GPUMetrics:
|
|
51
|
+
index: int
|
|
52
|
+
name: str
|
|
53
|
+
utilization: float
|
|
54
|
+
memory_used: float
|
|
55
|
+
memory_total: float
|
|
56
|
+
temperature: int
|
|
57
|
+
power_draw: float
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def from_dict(cls, data: dict) -> "GPUMetrics":
|
|
61
|
+
return cls(
|
|
62
|
+
index=data.get("index", 0),
|
|
63
|
+
name=data.get("name", ""),
|
|
64
|
+
utilization=data.get("utilization_gpu_percent", 0),
|
|
65
|
+
memory_used=data.get("memory_used_mb", 0),
|
|
66
|
+
memory_total=data.get("memory_total_mb", 0),
|
|
67
|
+
temperature=data.get("temperature_c", 0),
|
|
68
|
+
power_draw=data.get("power_draw_w", 0),
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
@dataclass
|
|
73
|
+
class SystemMetrics:
|
|
74
|
+
cpu_percent: float
|
|
75
|
+
cpu_cores: float
|
|
76
|
+
cpu_unix_percent: float
|
|
77
|
+
memory_used: float
|
|
78
|
+
memory_limit: float
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
def from_dict(cls, data: dict) -> "SystemMetrics":
|
|
82
|
+
return cls(
|
|
83
|
+
cpu_percent=data.get("cpu_percent", 0),
|
|
84
|
+
cpu_cores=data.get("cpu_cores", 1),
|
|
85
|
+
cpu_unix_percent=data.get("cpu_unix_percent", data.get("cpu_percent", 0)),
|
|
86
|
+
memory_used=data.get("memory_used_mb", 0),
|
|
87
|
+
memory_limit=data.get("memory_limit_mb", 0),
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class JobMetrics:
|
|
93
|
+
gpus: list[GPUMetrics] = field(default_factory=list)
|
|
94
|
+
system: SystemMetrics | None = None
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def from_dict(cls, data: dict) -> "JobMetrics":
|
|
98
|
+
system_data = data.get("system")
|
|
99
|
+
return cls(
|
|
100
|
+
gpus=[GPUMetrics.from_dict(g) for g in data.get("gpus", [])],
|
|
101
|
+
system=SystemMetrics.from_dict(system_data) if system_data else None,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class Jobs:
|
|
106
|
+
"""Jobs API wrapper"""
|
|
107
|
+
|
|
108
|
+
def __init__(self, http: "HTTPClient"):
|
|
109
|
+
self._http = http
|
|
110
|
+
|
|
111
|
+
def list(self, state: str = None) -> list[Job]:
|
|
112
|
+
"""List all jobs"""
|
|
113
|
+
params = {"state": state} if state else None
|
|
114
|
+
data = self._http.get("/api/jobs", params=params)
|
|
115
|
+
# API returns {"jobs": [...], "total_count": ...}
|
|
116
|
+
jobs = data.get("jobs", []) if isinstance(data, dict) else data
|
|
117
|
+
return [Job.from_dict(j) for j in jobs]
|
|
118
|
+
|
|
119
|
+
def get(self, job_id: str) -> Job:
|
|
120
|
+
"""Get job details"""
|
|
121
|
+
data = self._http.get(f"/api/jobs/{job_id}")
|
|
122
|
+
return Job.from_dict(data)
|
|
123
|
+
|
|
124
|
+
def create(
|
|
125
|
+
self,
|
|
126
|
+
image: str,
|
|
127
|
+
command: str = None,
|
|
128
|
+
gpu_type: str = "l40s",
|
|
129
|
+
gpu_count: int = 1,
|
|
130
|
+
region: str = None,
|
|
131
|
+
runtime: int = None,
|
|
132
|
+
interruptible: bool = True,
|
|
133
|
+
env: dict[str, str] = None,
|
|
134
|
+
ports: dict[str, int] = None,
|
|
135
|
+
auth: bool = False,
|
|
136
|
+
) -> Job:
|
|
137
|
+
"""Create a new job.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
image: Docker image to run
|
|
141
|
+
command: Command to execute (base64 encoded internally)
|
|
142
|
+
gpu_type: GPU type (e.g., "l40s", "a100")
|
|
143
|
+
gpu_count: Number of GPUs
|
|
144
|
+
region: Region to run in
|
|
145
|
+
runtime: Max runtime in seconds
|
|
146
|
+
interruptible: Allow spot/preemptible instances
|
|
147
|
+
env: Environment variables
|
|
148
|
+
ports: Ports to expose. Use {"lb": port} for HTTPS load balancer
|
|
149
|
+
auth: Enable Bearer token auth on load balancer (use with ports={"lb": port})
|
|
150
|
+
"""
|
|
151
|
+
payload = {
|
|
152
|
+
"docker_image": image,
|
|
153
|
+
"gpu_type": gpu_type,
|
|
154
|
+
"gpu_count": gpu_count,
|
|
155
|
+
"interruptible": interruptible,
|
|
156
|
+
"command": base64.b64encode((command or "").encode()).decode(),
|
|
157
|
+
}
|
|
158
|
+
if region:
|
|
159
|
+
payload["region"] = region
|
|
160
|
+
if runtime:
|
|
161
|
+
payload["runtime"] = runtime
|
|
162
|
+
if env:
|
|
163
|
+
payload["env_vars"] = env
|
|
164
|
+
if ports:
|
|
165
|
+
payload["ports"] = ports
|
|
166
|
+
if auth:
|
|
167
|
+
payload["auth"] = auth
|
|
168
|
+
|
|
169
|
+
data = self._http.post("/api/jobs", json=payload)
|
|
170
|
+
return Job.from_dict(data)
|
|
171
|
+
|
|
172
|
+
def cancel(self, job_id: str) -> dict:
|
|
173
|
+
"""Cancel a job"""
|
|
174
|
+
return self._http.delete(f"/api/jobs/{job_id}")
|
|
175
|
+
|
|
176
|
+
def extend(self, job_id: str, runtime: int) -> Job:
|
|
177
|
+
"""Extend job runtime"""
|
|
178
|
+
data = self._http.patch(f"/api/jobs/{job_id}", json={"runtime": runtime})
|
|
179
|
+
return Job.from_dict(data)
|
|
180
|
+
|
|
181
|
+
def logs(self, job_id: str) -> str:
|
|
182
|
+
"""Get job logs"""
|
|
183
|
+
data = self._http.get(f"/api/jobs/{job_id}/logs")
|
|
184
|
+
return data.get("logs", "")
|
|
185
|
+
|
|
186
|
+
def metrics(self, job_id: str) -> JobMetrics:
|
|
187
|
+
"""Get job GPU metrics"""
|
|
188
|
+
data = self._http.get(f"/api/jobs/{job_id}/metrics")
|
|
189
|
+
return JobMetrics.from_dict(data)
|
|
190
|
+
|
|
191
|
+
def token(self, job_id: str) -> str:
|
|
192
|
+
"""Get job auth token"""
|
|
193
|
+
data = self._http.get(f"/api/jobs/{job_id}/token")
|
|
194
|
+
return data.get("token", "")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# Utility functions for finding jobs
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def is_uuid(s: str) -> bool:
|
|
201
|
+
"""Check if string looks like a UUID (job ID)"""
|
|
202
|
+
return "-" in s and len(s) > 30
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def find_by_id(jobs: Jobs, job_id: str) -> Job | None:
|
|
206
|
+
"""Find job by UUID via direct API call.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
jobs: Jobs API instance
|
|
210
|
+
job_id: Full job UUID
|
|
211
|
+
|
|
212
|
+
Returns:
|
|
213
|
+
Job if found, None if not found or error
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
return jobs.get(job_id)
|
|
217
|
+
except Exception:
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def find_by_hostname(job_list: list[Job], hostname: str) -> Job | None:
|
|
222
|
+
"""Find job by hostname (exact or prefix match).
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
job_list: List of Job objects to search
|
|
226
|
+
hostname: Hostname to match (can be partial prefix)
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
First matching Job or None
|
|
230
|
+
"""
|
|
231
|
+
for job in job_list:
|
|
232
|
+
if job.hostname and (job.hostname == hostname or job.hostname.startswith(hostname)):
|
|
233
|
+
return job
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def find_by_ip(job_list: list[Job], ip: str) -> Job | None:
|
|
238
|
+
"""Find job by IP address (extracted from hostname).
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
job_list: List of Job objects to search
|
|
242
|
+
ip: IP address to match
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
First matching Job or None
|
|
246
|
+
"""
|
|
247
|
+
import socket
|
|
248
|
+
|
|
249
|
+
for job in job_list:
|
|
250
|
+
if not job.hostname:
|
|
251
|
+
continue
|
|
252
|
+
try:
|
|
253
|
+
job_ip = socket.gethostbyname(job.hostname)
|
|
254
|
+
if job_ip == ip:
|
|
255
|
+
return job
|
|
256
|
+
except socket.gaierror:
|
|
257
|
+
continue
|
|
258
|
+
return None
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def find_job(jobs: Jobs, identifier: str, state: str = None) -> Job | None:
|
|
262
|
+
"""Find a job by UUID, hostname, or IP address.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
jobs: Jobs API instance
|
|
266
|
+
identifier: Job UUID, hostname (partial match), or IP address
|
|
267
|
+
state: Optional state filter for listing jobs
|
|
268
|
+
|
|
269
|
+
Returns:
|
|
270
|
+
Matching Job or None
|
|
271
|
+
"""
|
|
272
|
+
# Try UUID first (direct API call)
|
|
273
|
+
if is_uuid(identifier):
|
|
274
|
+
return find_by_id(jobs, identifier)
|
|
275
|
+
|
|
276
|
+
# Get job list for hostname/IP search
|
|
277
|
+
job_list = jobs.list(state=state)
|
|
278
|
+
|
|
279
|
+
# Try hostname match
|
|
280
|
+
job = find_by_hostname(job_list, identifier)
|
|
281
|
+
if job:
|
|
282
|
+
return job
|
|
283
|
+
|
|
284
|
+
# Try IP match (slower, requires DNS lookup)
|
|
285
|
+
return find_by_ip(job_list, identifier)
|
c3/logs.py
ADDED
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""Async log streaming for jobs"""
|
|
2
|
+
import asyncio
|
|
3
|
+
import json
|
|
4
|
+
from collections import deque
|
|
5
|
+
from typing import TYPE_CHECKING, AsyncIterator, Callable
|
|
6
|
+
|
|
7
|
+
import websockets
|
|
8
|
+
|
|
9
|
+
from .config import get_ws_url, WS_LOGS_PATH
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from .client import C3
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
# Default limits to prevent memory blowup
|
|
16
|
+
DEFAULT_MAX_INITIAL_LINES = 1000 # Max lines to fetch on initial REST call
|
|
17
|
+
DEFAULT_MAX_BUFFER = 5000 # Max lines to keep in memory buffer
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fetch_logs(c3: "C3", job_id: str, tail: int = None) -> list[str]:
|
|
21
|
+
"""Fetch logs via REST API (one-time call).
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
c3: C3 client
|
|
25
|
+
job_id: Job ID
|
|
26
|
+
tail: Only return last N lines (default: all)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
List of log lines
|
|
30
|
+
"""
|
|
31
|
+
try:
|
|
32
|
+
logs = c3.jobs.logs(job_id)
|
|
33
|
+
if not logs:
|
|
34
|
+
return []
|
|
35
|
+
lines = logs.strip().split("\n")
|
|
36
|
+
if tail and len(lines) > tail:
|
|
37
|
+
return lines[-tail:]
|
|
38
|
+
return lines
|
|
39
|
+
except Exception:
|
|
40
|
+
return []
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class LogStream:
|
|
44
|
+
"""Async log streamer - websocket streaming with optional initial fetch.
|
|
45
|
+
|
|
46
|
+
Usage:
|
|
47
|
+
stream = LogStream(c3, job)
|
|
48
|
+
await stream.connect()
|
|
49
|
+
async for line in stream:
|
|
50
|
+
print(line)
|
|
51
|
+
await stream.close()
|
|
52
|
+
|
|
53
|
+
This class guarantees:
|
|
54
|
+
- Initial logs fetched ONCE on connect (limited to max_initial_lines)
|
|
55
|
+
- All subsequent logs via websocket (NO polling)
|
|
56
|
+
- Bounded buffer to prevent memory blowup
|
|
57
|
+
- Proper cleanup on close
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
c3: "C3",
|
|
63
|
+
job_id: str,
|
|
64
|
+
job_key: str = None,
|
|
65
|
+
fetch_initial: bool = True,
|
|
66
|
+
max_initial_lines: int = DEFAULT_MAX_INITIAL_LINES,
|
|
67
|
+
max_buffer: int = DEFAULT_MAX_BUFFER,
|
|
68
|
+
):
|
|
69
|
+
"""
|
|
70
|
+
Args:
|
|
71
|
+
c3: C3 client
|
|
72
|
+
job_id: Job ID for REST log fetch
|
|
73
|
+
job_key: Job key for websocket (if None, fetched from job)
|
|
74
|
+
fetch_initial: Whether to fetch existing logs on connect
|
|
75
|
+
max_initial_lines: Max lines to fetch initially (prevents huge fetch)
|
|
76
|
+
max_buffer: Max lines to keep in buffer (oldest dropped)
|
|
77
|
+
"""
|
|
78
|
+
self.c3 = c3
|
|
79
|
+
self.job_id = job_id
|
|
80
|
+
self.job_key = job_key
|
|
81
|
+
self.fetch_initial = fetch_initial
|
|
82
|
+
self.max_initial_lines = max_initial_lines
|
|
83
|
+
self.max_buffer = max_buffer
|
|
84
|
+
|
|
85
|
+
self._ws = None
|
|
86
|
+
self._buffer: deque[str] = deque(maxlen=max_buffer)
|
|
87
|
+
self._initial_fetched = False
|
|
88
|
+
self._connected = False
|
|
89
|
+
self._closed = False
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def status(self) -> str:
|
|
93
|
+
"""Connection status: disconnected, connecting, connected, closed"""
|
|
94
|
+
if self._closed:
|
|
95
|
+
return "closed"
|
|
96
|
+
if self._connected:
|
|
97
|
+
return "connected"
|
|
98
|
+
if self._ws:
|
|
99
|
+
return "connecting"
|
|
100
|
+
return "disconnected"
|
|
101
|
+
|
|
102
|
+
async def connect(self) -> list[str]:
|
|
103
|
+
"""Connect to log stream.
|
|
104
|
+
|
|
105
|
+
Returns initial logs (if fetch_initial=True).
|
|
106
|
+
After this, iterate with `async for line in stream`.
|
|
107
|
+
"""
|
|
108
|
+
if self._closed:
|
|
109
|
+
raise RuntimeError("LogStream is closed")
|
|
110
|
+
|
|
111
|
+
initial_lines = []
|
|
112
|
+
|
|
113
|
+
# Fetch initial logs ONCE (bounded)
|
|
114
|
+
if self.fetch_initial and not self._initial_fetched:
|
|
115
|
+
initial_lines = fetch_logs(self.c3, self.job_id, tail=self.max_initial_lines)
|
|
116
|
+
for line in initial_lines:
|
|
117
|
+
self._buffer.append(line)
|
|
118
|
+
self._initial_fetched = True
|
|
119
|
+
|
|
120
|
+
# Get job_key if not provided
|
|
121
|
+
if not self.job_key:
|
|
122
|
+
job = self.c3.jobs.get(self.job_id)
|
|
123
|
+
self.job_key = job.job_key
|
|
124
|
+
|
|
125
|
+
# Connect websocket
|
|
126
|
+
if self.job_key and not self._ws:
|
|
127
|
+
ws_url = get_ws_url()
|
|
128
|
+
full_url = f"{ws_url}{WS_LOGS_PATH}/{self.job_key}"
|
|
129
|
+
self._ws = await websockets.connect(full_url)
|
|
130
|
+
self._connected = True
|
|
131
|
+
|
|
132
|
+
return initial_lines
|
|
133
|
+
|
|
134
|
+
async def close(self):
|
|
135
|
+
"""Close the websocket connection"""
|
|
136
|
+
self._closed = True
|
|
137
|
+
self._connected = False
|
|
138
|
+
if self._ws:
|
|
139
|
+
await self._ws.close()
|
|
140
|
+
self._ws = None
|
|
141
|
+
|
|
142
|
+
def get_buffer(self) -> list[str]:
|
|
143
|
+
"""Get current buffer contents (bounded, oldest may be dropped)"""
|
|
144
|
+
return list(self._buffer)
|
|
145
|
+
|
|
146
|
+
def clear_buffer(self):
|
|
147
|
+
"""Clear the buffer"""
|
|
148
|
+
self._buffer.clear()
|
|
149
|
+
|
|
150
|
+
async def __aiter__(self) -> AsyncIterator[str]:
|
|
151
|
+
"""Async iterate over NEW log lines from websocket.
|
|
152
|
+
|
|
153
|
+
Note: This does NOT yield initial logs. Call connect() first
|
|
154
|
+
and handle the returned initial lines separately.
|
|
155
|
+
"""
|
|
156
|
+
if not self._ws:
|
|
157
|
+
raise RuntimeError("Not connected. Call connect() first.")
|
|
158
|
+
|
|
159
|
+
try:
|
|
160
|
+
async for message in self._ws:
|
|
161
|
+
if self._closed:
|
|
162
|
+
break
|
|
163
|
+
try:
|
|
164
|
+
data = json.loads(message)
|
|
165
|
+
if data.get("event") == "log" and data.get("log"):
|
|
166
|
+
for line in data["log"].splitlines():
|
|
167
|
+
if line:
|
|
168
|
+
self._buffer.append(line)
|
|
169
|
+
yield line
|
|
170
|
+
except json.JSONDecodeError:
|
|
171
|
+
continue
|
|
172
|
+
except websockets.ConnectionClosed:
|
|
173
|
+
self._connected = False
|
|
174
|
+
|
|
175
|
+
async def __aenter__(self):
|
|
176
|
+
await self.connect()
|
|
177
|
+
return self
|
|
178
|
+
|
|
179
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
180
|
+
await self.close()
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
async def stream_logs(
|
|
184
|
+
c3: "C3",
|
|
185
|
+
job_id: str,
|
|
186
|
+
on_line: Callable[[str], None],
|
|
187
|
+
until_state: set[str] = None,
|
|
188
|
+
poll_state_interval: float = 2.0,
|
|
189
|
+
fetch_initial: bool = True,
|
|
190
|
+
fetch_final: bool = True,
|
|
191
|
+
max_initial_lines: int = DEFAULT_MAX_INITIAL_LINES,
|
|
192
|
+
) -> None:
|
|
193
|
+
"""Stream logs until job reaches a terminal state.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
c3: C3 client
|
|
197
|
+
job_id: Job ID to stream logs from
|
|
198
|
+
on_line: Callback for each log line (called immediately, no buffering)
|
|
199
|
+
until_state: States to stop on (default: terminal states)
|
|
200
|
+
poll_state_interval: How often to check job STATE (NOT log polling!)
|
|
201
|
+
fetch_initial: Fetch existing logs on start
|
|
202
|
+
fetch_final: Fetch logs one more time after job terminates
|
|
203
|
+
max_initial_lines: Max lines to fetch initially
|
|
204
|
+
|
|
205
|
+
This function:
|
|
206
|
+
- Fetches initial logs ONCE (bounded)
|
|
207
|
+
- Streams via websocket (NO log polling)
|
|
208
|
+
- Polls job STATE only (to detect termination)
|
|
209
|
+
- Optionally fetches final logs ONCE when job terminates
|
|
210
|
+
"""
|
|
211
|
+
if until_state is None:
|
|
212
|
+
until_state = {"succeeded", "failed", "canceled", "terminated"}
|
|
213
|
+
|
|
214
|
+
job = c3.jobs.get(job_id)
|
|
215
|
+
initial_fetched = False
|
|
216
|
+
ws = None
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
# Wait for job to be assigned/running
|
|
220
|
+
while job.state in ("pending", "queued"):
|
|
221
|
+
await asyncio.sleep(poll_state_interval)
|
|
222
|
+
job = c3.jobs.get(job_id)
|
|
223
|
+
|
|
224
|
+
# Check for immediate terminal state
|
|
225
|
+
if job.state in until_state:
|
|
226
|
+
if fetch_final:
|
|
227
|
+
for line in fetch_logs(c3, job_id, tail=max_initial_lines):
|
|
228
|
+
on_line(line)
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
# Fetch initial logs ONCE when running (bounded)
|
|
232
|
+
if fetch_initial and job.state == "running" and not initial_fetched:
|
|
233
|
+
for line in fetch_logs(c3, job_id, tail=max_initial_lines):
|
|
234
|
+
on_line(line)
|
|
235
|
+
initial_fetched = True
|
|
236
|
+
|
|
237
|
+
# Connect websocket
|
|
238
|
+
if job.job_key:
|
|
239
|
+
ws_url = get_ws_url()
|
|
240
|
+
full_url = f"{ws_url}{WS_LOGS_PATH}/{job.job_key}"
|
|
241
|
+
ws = await websockets.connect(full_url)
|
|
242
|
+
|
|
243
|
+
# Stream logs while checking job state periodically
|
|
244
|
+
while True:
|
|
245
|
+
try:
|
|
246
|
+
# Wait for message with timeout to allow state checks
|
|
247
|
+
message = await asyncio.wait_for(ws.recv(), timeout=poll_state_interval)
|
|
248
|
+
try:
|
|
249
|
+
data = json.loads(message)
|
|
250
|
+
if data.get("event") == "log" and data.get("log"):
|
|
251
|
+
for line in data["log"].splitlines():
|
|
252
|
+
if line:
|
|
253
|
+
on_line(line)
|
|
254
|
+
except json.JSONDecodeError:
|
|
255
|
+
continue
|
|
256
|
+
except asyncio.TimeoutError:
|
|
257
|
+
# Check job state (NOT polling logs!)
|
|
258
|
+
job = c3.jobs.get(job_id)
|
|
259
|
+
if job.state in until_state:
|
|
260
|
+
break
|
|
261
|
+
except websockets.ConnectionClosed:
|
|
262
|
+
break
|
|
263
|
+
|
|
264
|
+
# Fetch final logs ONCE (may have missed some during shutdown)
|
|
265
|
+
if fetch_final:
|
|
266
|
+
# Small delay to let final logs flush
|
|
267
|
+
await asyncio.sleep(0.5)
|
|
268
|
+
for line in fetch_logs(c3, job_id, tail=max_initial_lines):
|
|
269
|
+
on_line(line)
|
|
270
|
+
|
|
271
|
+
finally:
|
|
272
|
+
if ws:
|
|
273
|
+
await ws.close()
|