ttsd-colabcli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +148 -0
- package/core/app/__init__.py +0 -0
- package/core/app/colab_cli/__init__.py +0 -0
- package/core/app/colab_cli/__pycache__/__init__.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/auth.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/auto_update.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/cli.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/client.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/common.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/console.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/contents.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/history.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/runtime.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/state.cpython-312.pyc +0 -0
- package/core/app/colab_cli/__pycache__/utils.cpython-312.pyc +0 -0
- package/core/app/colab_cli/auth.py +278 -0
- package/core/app/colab_cli/auto_update.py +248 -0
- package/core/app/colab_cli/cli.py +155 -0
- package/core/app/colab_cli/client.py +310 -0
- package/core/app/colab_cli/commands/__init__.py +14 -0
- package/core/app/colab_cli/commands/__pycache__/__init__.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/__pycache__/automation.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/__pycache__/execution.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/__pycache__/files.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/__pycache__/run.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/__pycache__/session.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/__pycache__/utility.cpython-312.pyc +0 -0
- package/core/app/colab_cli/commands/automation.py +265 -0
- package/core/app/colab_cli/commands/execution.py +362 -0
- package/core/app/colab_cli/commands/files.py +204 -0
- package/core/app/colab_cli/commands/run.py +477 -0
- package/core/app/colab_cli/commands/session.py +519 -0
- package/core/app/colab_cli/commands/utility.py +436 -0
- package/core/app/colab_cli/common.py +185 -0
- package/core/app/colab_cli/console.py +172 -0
- package/core/app/colab_cli/contents.py +93 -0
- package/core/app/colab_cli/converter.py +184 -0
- package/core/app/colab_cli/history.py +65 -0
- package/core/app/colab_cli/oauth_config.json +11 -0
- package/core/app/colab_cli/repl.py +173 -0
- package/core/app/colab_cli/runtime.py +262 -0
- package/core/app/colab_cli/state.py +156 -0
- package/core/app/colab_cli/utils.py +85 -0
- package/core/colab/worker.py +679 -0
- package/core/daemon.py +184 -0
- package/core/requirements.txt +8 -0
- package/package.json +22 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
# Copyright 2026 Google LLC
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
import time
|
|
19
|
+
import uuid
|
|
20
|
+
from typing import Any, Dict, Optional
|
|
21
|
+
import typer
|
|
22
|
+
from typing_extensions import Annotated
|
|
23
|
+
|
|
24
|
+
from app.colab_cli.client import (
|
|
25
|
+
Accelerator,
|
|
26
|
+
ColabRequestError,
|
|
27
|
+
PostAssignmentResponse,
|
|
28
|
+
Variant,
|
|
29
|
+
)
|
|
30
|
+
from app.colab_cli.utils import get_status_code
|
|
31
|
+
from app.colab_cli.state import SessionState
|
|
32
|
+
from app.colab_cli.runtime import ColabRuntime
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _is_scope_error(e: Exception) -> bool:
|
|
36
|
+
"""True if a ColabRequestError's response body indicates a missing OAuth scope.
|
|
37
|
+
|
|
38
|
+
The frontend returns a `google.rpc.Status` with `code=7` (PERMISSION_DENIED)
|
|
39
|
+
and a `DebugInfo` payload mentioning `SCOPE_NOT_PERMITTED` /
|
|
40
|
+
"insufficient authentication scopes". Match on either substring so we
|
|
41
|
+
don't depend on the exact wording of one of them.
|
|
42
|
+
"""
|
|
43
|
+
body = getattr(e, "response_body", None) or ""
|
|
44
|
+
body_str = str(body)
|
|
45
|
+
return (
|
|
46
|
+
"SCOPE_NOT_PERMITTED" in body_str
|
|
47
|
+
or "insufficient authentication scopes" in body_str
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _scope_remediation_message(provider) -> str:
|
|
52
|
+
"""User-facing remediation hint, tailored per auth provider."""
|
|
53
|
+
# Importing locally to avoid a circular import at module load time.
|
|
54
|
+
from app.colab_cli.auth import AuthProvider
|
|
55
|
+
|
|
56
|
+
common = (
|
|
57
|
+
"The Colab keep-alive RPC requires the "
|
|
58
|
+
"'https://www.googleapis.com/auth/colaboratory' OAuth scope."
|
|
59
|
+
)
|
|
60
|
+
if provider == AuthProvider.ADC:
|
|
61
|
+
return (
|
|
62
|
+
f"{common}\n"
|
|
63
|
+
"Re-authenticate ADC with both userinfo.email (required by the "
|
|
64
|
+
"Colab session backend at colab.research.google.com) and "
|
|
65
|
+
"colaboratory (required by the runtime service at "
|
|
66
|
+
"colab.pa.googleapis.com). The cloud-platform and openid scopes "
|
|
67
|
+
"are required by gcloud itself:\n"
|
|
68
|
+
" gcloud auth application-default login \\\n"
|
|
69
|
+
" --scopes=openid,"
|
|
70
|
+
"https://www.googleapis.com/auth/cloud-platform,"
|
|
71
|
+
"https://www.googleapis.com/auth/userinfo.email,"
|
|
72
|
+
"https://www.googleapis.com/auth/colaboratory\n"
|
|
73
|
+
"Then re-run `colab new`."
|
|
74
|
+
)
|
|
75
|
+
# OAuth2 (and any future provider) fallback.
|
|
76
|
+
return (
|
|
77
|
+
f"{common}\n"
|
|
78
|
+
"Delete the cached token at ~/.config/colab-cli/token.json and "
|
|
79
|
+
"re-run `colab new` to trigger a fresh consent flow that includes "
|
|
80
|
+
"the colaboratory scope."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _hardware_label(accelerator: str) -> str:
|
|
85
|
+
"""`NONE` -> `CPU`; everything else passes through."""
|
|
86
|
+
return "CPU" if accelerator == "NONE" else accelerator
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _format_session_line(
|
|
90
|
+
name: str,
|
|
91
|
+
endpoint: str,
|
|
92
|
+
accelerator: str,
|
|
93
|
+
variant: str,
|
|
94
|
+
status: Optional[str] = None,
|
|
95
|
+
) -> str:
|
|
96
|
+
"""Single source of truth for session display lines.
|
|
97
|
+
|
|
98
|
+
Format: ``[name] endpoint | Hardware: X | Variant: Y[ | Status: Z]``.
|
|
99
|
+
Use ``"?"`` as the name for orphaned server-side assignments with no local
|
|
100
|
+
state.
|
|
101
|
+
"""
|
|
102
|
+
parts = [
|
|
103
|
+
f"[{name}] {endpoint}",
|
|
104
|
+
f"Hardware: {_hardware_label(accelerator)}",
|
|
105
|
+
f"Variant: {variant}",
|
|
106
|
+
]
|
|
107
|
+
if status is not None:
|
|
108
|
+
parts.append(f"Status: {status}")
|
|
109
|
+
return " | ".join(parts)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def new(
|
|
113
|
+
session: Annotated[
|
|
114
|
+
Optional[str], typer.Option("-s", "--session", help="Session name")
|
|
115
|
+
] = None,
|
|
116
|
+
tpu: Annotated[
|
|
117
|
+
Optional[str],
|
|
118
|
+
typer.Option(
|
|
119
|
+
help="TPU accelerator variant. Supported: v5e1, v6e1.",
|
|
120
|
+
),
|
|
121
|
+
] = None,
|
|
122
|
+
gpu: Annotated[
|
|
123
|
+
Optional[str],
|
|
124
|
+
typer.Option(
|
|
125
|
+
help=(
|
|
126
|
+
"GPU accelerator variant. Supported: T4, L4, G4, H100, A100."
|
|
127
|
+
"\n\nIf omitted (along with --tpu), a CPU runtime is created."
|
|
128
|
+
"\n\nAvailability varies by Colab subscription tier."
|
|
129
|
+
),
|
|
130
|
+
),
|
|
131
|
+
] = None,
|
|
132
|
+
):
|
|
133
|
+
"""Create a new session"""
|
|
134
|
+
from app.colab_cli.common import state
|
|
135
|
+
|
|
136
|
+
name = session or uuid.uuid4().hex[:6]
|
|
137
|
+
variant = Variant.DEFAULT
|
|
138
|
+
accelerator = Accelerator.NONE
|
|
139
|
+
|
|
140
|
+
if tpu:
|
|
141
|
+
variant = Variant.TPU
|
|
142
|
+
accelerator = Accelerator.V5E1 if tpu.lower() == "v5e1" else Accelerator.V6E1
|
|
143
|
+
elif gpu:
|
|
144
|
+
variant = Variant.GPU
|
|
145
|
+
mapping = {
|
|
146
|
+
"a100": Accelerator.A100,
|
|
147
|
+
"h100": Accelerator.H100,
|
|
148
|
+
"l4": Accelerator.L4,
|
|
149
|
+
"t4": Accelerator.T4,
|
|
150
|
+
"g4": Accelerator.G4,
|
|
151
|
+
}
|
|
152
|
+
accelerator = mapping.get(gpu.lower(), Accelerator.A100)
|
|
153
|
+
|
|
154
|
+
typer.echo(f"[colab] Creating session '{name}'...")
|
|
155
|
+
try:
|
|
156
|
+
res = state.client.assign(
|
|
157
|
+
uuid.uuid4(), variant=variant, accelerator=accelerator
|
|
158
|
+
)
|
|
159
|
+
except ColabRequestError as e:
|
|
160
|
+
# The Colab backend returns 400 when the caller is not entitled to the
|
|
161
|
+
# requested accelerator (e.g. no A100 quota). Translate that to a
|
|
162
|
+
# friendly, actionable message instead of a raw traceback. We only
|
|
163
|
+
# interpret it this way when an accelerator was actually requested;
|
|
164
|
+
# otherwise we re-raise so the user sees the real cause.
|
|
165
|
+
if get_status_code(e) == 400 and accelerator != Accelerator.NONE:
|
|
166
|
+
typer.echo(
|
|
167
|
+
f"[colab] Backend rejected accelerator '{accelerator.value}'. "
|
|
168
|
+
"You may not have quota or entitlement for this accelerator on "
|
|
169
|
+
"your account. Try a different one (e.g. --gpu T4) or omit "
|
|
170
|
+
"--gpu/--tpu for a CPU runtime.",
|
|
171
|
+
err=True,
|
|
172
|
+
)
|
|
173
|
+
raise typer.Exit(code=1)
|
|
174
|
+
raise
|
|
175
|
+
|
|
176
|
+
if isinstance(res, PostAssignmentResponse):
|
|
177
|
+
token = res.runtime_proxy_info.token
|
|
178
|
+
url = res.runtime_proxy_info.url
|
|
179
|
+
endpoint = res.endpoint
|
|
180
|
+
else:
|
|
181
|
+
token = (
|
|
182
|
+
res.runtime_proxy_info.token
|
|
183
|
+
if hasattr(res, "runtime_proxy_info")
|
|
184
|
+
else getattr(res, "runtime_proxy_token", "")
|
|
185
|
+
)
|
|
186
|
+
url = res.runtime_proxy_info.url if hasattr(res, "runtime_proxy_info") else ""
|
|
187
|
+
endpoint = res.endpoint
|
|
188
|
+
|
|
189
|
+
# Importing locally to avoid a top-level circular import via auth.
|
|
190
|
+
|
|
191
|
+
s = SessionState(
|
|
192
|
+
name=name,
|
|
193
|
+
token=token,
|
|
194
|
+
url=url,
|
|
195
|
+
endpoint=endpoint,
|
|
196
|
+
variant=variant.value,
|
|
197
|
+
accelerator=accelerator.value,
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Pre-flight the keep-alive RPC once. If it returns 403 SCOPE_NOT_PERMITTED
|
|
201
|
+
# we know the daemon will fail and the VM would be idle-pruned. Catch
|
|
202
|
+
# it now so we (a) never leak a billable assignment, (b) surface an
|
|
203
|
+
# actionable remediation instead of a "session quietly disappeared".
|
|
204
|
+
try:
|
|
205
|
+
state.client.keep_alive_assignment(endpoint)
|
|
206
|
+
except ColabRequestError as e:
|
|
207
|
+
if get_status_code(e) == 403 and _is_scope_error(e):
|
|
208
|
+
typer.echo(
|
|
209
|
+
"[colab] Keep-alive pre-flight failed: your OAuth "
|
|
210
|
+
"credentials are missing the 'colaboratory' scope, which "
|
|
211
|
+
"is required by the Colab RuntimeService.\n",
|
|
212
|
+
err=True,
|
|
213
|
+
)
|
|
214
|
+
typer.echo(_scope_remediation_message(state.auth_provider), err=True)
|
|
215
|
+
# Don't leak the assignment we just created.
|
|
216
|
+
try:
|
|
217
|
+
state.client.unassign(endpoint)
|
|
218
|
+
except Exception:
|
|
219
|
+
pass
|
|
220
|
+
raise typer.Exit(code=1)
|
|
221
|
+
# Other failures: don't block session creation — the daemon will
|
|
222
|
+
# retry and log via the existing keep_alive_error event path.
|
|
223
|
+
|
|
224
|
+
# Persist the session BEFORE spawning the daemon so the daemon's
|
|
225
|
+
# initial `state.store.get(session_name)` check doesn't race and
|
|
226
|
+
# exit with `reason=session_not_found`. We re-persist below to also
|
|
227
|
+
# capture the daemon PID.
|
|
228
|
+
state.store.add(s)
|
|
229
|
+
s.keep_alive_pid = spawn_keep_alive(
|
|
230
|
+
endpoint,
|
|
231
|
+
name,
|
|
232
|
+
auth_provider=state.auth_provider,
|
|
233
|
+
config_path=state.config_path,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
state.store.add(s)
|
|
237
|
+
state.history.log_event(
|
|
238
|
+
name,
|
|
239
|
+
"session_created",
|
|
240
|
+
{
|
|
241
|
+
"endpoint": endpoint,
|
|
242
|
+
"variant": variant.value,
|
|
243
|
+
"accelerator": accelerator.value,
|
|
244
|
+
},
|
|
245
|
+
)
|
|
246
|
+
typer.echo("[colab] Session READY.")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def restart_kernel(
|
|
250
|
+
session: Annotated[
|
|
251
|
+
Optional[str], typer.Option("-s", "--session", help="Session name")
|
|
252
|
+
] = None,
|
|
253
|
+
):
|
|
254
|
+
"""Restart a session's kernel"""
|
|
255
|
+
from app.colab_cli.common import state
|
|
256
|
+
|
|
257
|
+
name = state.resolve_session(session)
|
|
258
|
+
s = state.store.get(name)
|
|
259
|
+
|
|
260
|
+
def on_started(kid):
|
|
261
|
+
s.kernel_id = kid
|
|
262
|
+
state.store.add(s)
|
|
263
|
+
|
|
264
|
+
def on_sess_started(sid):
|
|
265
|
+
s.session_id = sid
|
|
266
|
+
state.store.add(s)
|
|
267
|
+
|
|
268
|
+
runtime = ColabRuntime(
|
|
269
|
+
s.url,
|
|
270
|
+
s.token,
|
|
271
|
+
kernel_id=s.kernel_id,
|
|
272
|
+
session_id=s.session_id,
|
|
273
|
+
on_kernel_started=on_started,
|
|
274
|
+
on_session_started=on_sess_started,
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
try:
|
|
278
|
+
runtime.restart()
|
|
279
|
+
finally:
|
|
280
|
+
runtime.stop()
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def sessions_command():
|
|
284
|
+
"""List all active sessions"""
|
|
285
|
+
from app.colab_cli.common import state
|
|
286
|
+
|
|
287
|
+
sessions, assignments = state.sync_sessions()
|
|
288
|
+
if not assignments:
|
|
289
|
+
typer.echo("[colab] No active sessions found on server.")
|
|
290
|
+
return
|
|
291
|
+
|
|
292
|
+
# Build endpoint -> local-name lookup so we can lead with the friendly name.
|
|
293
|
+
name_by_endpoint = {s.endpoint: s.name for s in sessions.values()}
|
|
294
|
+
for a in assignments:
|
|
295
|
+
name = name_by_endpoint.get(a.endpoint, "?")
|
|
296
|
+
# `a.variant` is an int-valued AssignmentVariant (DEFAULT=0/GPU=1/TPU=2);
|
|
297
|
+
# its `.name` matches the user-facing string Variant enum, which is what
|
|
298
|
+
# `status` shows for locally-tracked sessions.
|
|
299
|
+
typer.echo(
|
|
300
|
+
_format_session_line(
|
|
301
|
+
name=name,
|
|
302
|
+
endpoint=a.endpoint,
|
|
303
|
+
accelerator=a.accelerator.value,
|
|
304
|
+
variant=a.variant.name,
|
|
305
|
+
)
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _print_status_for(s: SessionState) -> None:
|
|
310
|
+
"""Print one session's status line plus optional last-execution detail."""
|
|
311
|
+
status = f"BUSY ({s.running})" if s.running else "IDLE"
|
|
312
|
+
typer.echo(
|
|
313
|
+
_format_session_line(
|
|
314
|
+
name=s.name,
|
|
315
|
+
endpoint=s.endpoint,
|
|
316
|
+
accelerator=s.accelerator,
|
|
317
|
+
variant=s.variant,
|
|
318
|
+
status=status,
|
|
319
|
+
)
|
|
320
|
+
)
|
|
321
|
+
if s.last_execution:
|
|
322
|
+
exec_file, exec_cell, exec_time = s.last_execution
|
|
323
|
+
cell_str = f" | Cell: {exec_cell}" if exec_cell else ""
|
|
324
|
+
typer.echo(f" Last Execution: {exec_file}{cell_str} at {exec_time}")
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def status(
|
|
328
|
+
session: Annotated[
|
|
329
|
+
Optional[str], typer.Option("-s", "--session", help="Session name")
|
|
330
|
+
] = None,
|
|
331
|
+
):
|
|
332
|
+
"""Show session status"""
|
|
333
|
+
from app.colab_cli.common import state
|
|
334
|
+
|
|
335
|
+
local_sessions, _ = state.sync_sessions()
|
|
336
|
+
if session:
|
|
337
|
+
s = state.store.get(session)
|
|
338
|
+
if s:
|
|
339
|
+
_print_status_for(s)
|
|
340
|
+
else:
|
|
341
|
+
typer.echo(f"[colab] Session '{session}' not found.")
|
|
342
|
+
return
|
|
343
|
+
|
|
344
|
+
if not local_sessions:
|
|
345
|
+
typer.echo("[colab] No active sessions.")
|
|
346
|
+
return
|
|
347
|
+
for s in local_sessions.values():
|
|
348
|
+
_print_status_for(s)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def stop(
|
|
352
|
+
session: Annotated[
|
|
353
|
+
Optional[str], typer.Option("-s", "--session", help="Session name")
|
|
354
|
+
] = None,
|
|
355
|
+
):
|
|
356
|
+
"""Stop a session"""
|
|
357
|
+
from app.colab_cli.common import state
|
|
358
|
+
|
|
359
|
+
name = state.resolve_session(session)
|
|
360
|
+
s = state.store.get(name)
|
|
361
|
+
if not s:
|
|
362
|
+
typer.echo(f"[colab] Session '{name}' not found.")
|
|
363
|
+
return
|
|
364
|
+
|
|
365
|
+
typer.echo(f"[colab] Stopping session '{name}'...")
|
|
366
|
+
if s.keep_alive_pid:
|
|
367
|
+
from app.colab_cli.common import kill_process
|
|
368
|
+
|
|
369
|
+
kill_process(s.keep_alive_pid)
|
|
370
|
+
|
|
371
|
+
try:
|
|
372
|
+
runtime = ColabRuntime(s.url, s.token, kernel_id=s.kernel_id)
|
|
373
|
+
runtime.stop(shutdown_kernel=True)
|
|
374
|
+
except Exception:
|
|
375
|
+
pass
|
|
376
|
+
|
|
377
|
+
state.client.unassign(s.endpoint)
|
|
378
|
+
state.store.remove(name)
|
|
379
|
+
state.history.log_event(name, "session_terminated", {"reason": "user_requested"})
|
|
380
|
+
typer.echo("[colab] Session terminated.")
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def spawn_keep_alive(
|
|
384
|
+
endpoint: str, session_name: str, auth_provider=None, config_path=None, token_path=None
|
|
385
|
+
):
|
|
386
|
+
"""Spawns a detached keep-alive process.
|
|
387
|
+
|
|
388
|
+
Both `auth_provider` and `config_path` are propagated as global flags
|
|
389
|
+
so the detached child uses the same authentication strategy AND the
|
|
390
|
+
same session state file as the parent that invoked `colab new`.
|
|
391
|
+
Without this, the child inherits Typer's defaults (`--auth=oauth2`,
|
|
392
|
+
`--config=~/.config/colab-cli/sessions.json`), which causes:
|
|
393
|
+
(a) wrong auth backend, and
|
|
394
|
+
(b) the daemon's `state.store.get(session_name)` check finds nothing
|
|
395
|
+
and exits with `reason=session_not_found` when the parent used
|
|
396
|
+
`--config` to write to a non-default path.
|
|
397
|
+
"""
|
|
398
|
+
# Use the project's app.colab_cli.cli instead of pip-installed colab_cli.cli
|
|
399
|
+
import os as _os
|
|
400
|
+
_project_root = _os.path.dirname(_os.path.dirname(_os.path.dirname(_os.path.abspath(__file__))))
|
|
401
|
+
cmd = [sys.executable, "-m", "app.colab_cli.cli"]
|
|
402
|
+
_keep_alive_env = _os.environ.copy()
|
|
403
|
+
_pythonpath = _keep_alive_env.get("PYTHONPATH", "")
|
|
404
|
+
if token_path is not None:
|
|
405
|
+
_keep_alive_env["COLAB_CLI_TOKEN_PATH"] = token_path
|
|
406
|
+
_keep_alive_env["PYTHONPATH"] = f"{_project_root}:{_pythonpath}" if _pythonpath else _project_root
|
|
407
|
+
if auth_provider is not None:
|
|
408
|
+
cmd.append(f"--auth={auth_provider.value}")
|
|
409
|
+
if config_path is not None:
|
|
410
|
+
cmd.extend(["--config", config_path])
|
|
411
|
+
cmd.extend(["keep-alive", endpoint, session_name])
|
|
412
|
+
# Detach process
|
|
413
|
+
kwargs = {}
|
|
414
|
+
if sys.platform != "win32":
|
|
415
|
+
kwargs["start_new_session"] = True
|
|
416
|
+
else:
|
|
417
|
+
# https://stackoverflow.com/questions/1356540/how-can-i-make-a-python-script-run-in-the-background-as-a-service-on-windows
|
|
418
|
+
CREATE_NEW_PROCESS_GROUP = 0x00000200
|
|
419
|
+
DETACHED_PROCESS = 0x00000008
|
|
420
|
+
kwargs["creationflags"] = DETACHED_PROCESS | CREATE_NEW_PROCESS_GROUP
|
|
421
|
+
|
|
422
|
+
p = subprocess.Popen(
|
|
423
|
+
cmd,
|
|
424
|
+
stdout=subprocess.DEVNULL,
|
|
425
|
+
stderr=subprocess.DEVNULL,
|
|
426
|
+
stdin=subprocess.DEVNULL,
|
|
427
|
+
**kwargs,
|
|
428
|
+
)
|
|
429
|
+
return p.pid
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def keep_alive(
|
|
433
|
+
endpoint: Annotated[str, typer.Argument(help="Endpoint ID")],
|
|
434
|
+
session_name: Annotated[str, typer.Argument(help="Session name")],
|
|
435
|
+
):
|
|
436
|
+
"""Hidden command to run keep-alive loop. Terminate after 24h."""
|
|
437
|
+
from app.colab_cli.common import state
|
|
438
|
+
|
|
439
|
+
state.history.log_event(
|
|
440
|
+
session_name,
|
|
441
|
+
"keep_alive_started",
|
|
442
|
+
{"endpoint": endpoint, "pid": os.getpid()},
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
start_time = time.time()
|
|
446
|
+
# 24 hours limit
|
|
447
|
+
max_duration = 24 * 3600
|
|
448
|
+
consecutive_4xx = 0
|
|
449
|
+
iterations = 0
|
|
450
|
+
last_error: Optional[Dict[str, Any]] = None
|
|
451
|
+
|
|
452
|
+
reason = "time_limit_reached"
|
|
453
|
+
extra: Dict[str, Any] = {}
|
|
454
|
+
while time.time() - start_time < max_duration:
|
|
455
|
+
iterations += 1
|
|
456
|
+
# Check if session still exists in local state
|
|
457
|
+
s = state.store.get(session_name)
|
|
458
|
+
if not s:
|
|
459
|
+
reason = "session_not_found"
|
|
460
|
+
break
|
|
461
|
+
if s.endpoint != endpoint:
|
|
462
|
+
reason = "endpoint_mismatch"
|
|
463
|
+
extra["expected_endpoint"] = endpoint
|
|
464
|
+
extra["actual_endpoint"] = s.endpoint
|
|
465
|
+
break
|
|
466
|
+
|
|
467
|
+
try:
|
|
468
|
+
state.client.keep_alive_assignment(endpoint)
|
|
469
|
+
consecutive_4xx = 0
|
|
470
|
+
last_error = None
|
|
471
|
+
except Exception as e:
|
|
472
|
+
code = get_status_code(e)
|
|
473
|
+
response_body = getattr(e, "response_body", None)
|
|
474
|
+
err_info = {
|
|
475
|
+
"status_code": code,
|
|
476
|
+
"error_type": type(e).__name__,
|
|
477
|
+
"error": str(e)[:500],
|
|
478
|
+
"response_body": (str(response_body)[:1000] if response_body else None),
|
|
479
|
+
}
|
|
480
|
+
last_error = err_info
|
|
481
|
+
state.history.log_event(
|
|
482
|
+
session_name,
|
|
483
|
+
"keep_alive_error",
|
|
484
|
+
{
|
|
485
|
+
**err_info,
|
|
486
|
+
"iteration": iterations,
|
|
487
|
+
"consecutive_4xx": consecutive_4xx
|
|
488
|
+
+ (1 if code is not None and 400 <= code < 500 else 0),
|
|
489
|
+
},
|
|
490
|
+
)
|
|
491
|
+
if code is not None and 400 <= code < 500:
|
|
492
|
+
consecutive_4xx += 1
|
|
493
|
+
if consecutive_4xx >= 2:
|
|
494
|
+
reason = "consecutive_4xx_errors"
|
|
495
|
+
break
|
|
496
|
+
else:
|
|
497
|
+
# For other errors (network), we retry and don't count as 4xx
|
|
498
|
+
pass
|
|
499
|
+
|
|
500
|
+
time.sleep(60)
|
|
501
|
+
|
|
502
|
+
payload: Dict[str, Any] = {
|
|
503
|
+
"reason": reason,
|
|
504
|
+
"iterations": iterations,
|
|
505
|
+
"duration_seconds": round(time.time() - start_time, 2),
|
|
506
|
+
}
|
|
507
|
+
if last_error is not None:
|
|
508
|
+
payload["last_error"] = last_error
|
|
509
|
+
payload.update(extra)
|
|
510
|
+
state.history.log_event(session_name, "keep_alive_stopped", payload)
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def register(app: typer.Typer):
|
|
514
|
+
app.command()(new)
|
|
515
|
+
app.command(name="sessions")(sessions_command)
|
|
516
|
+
app.command(name="restart-kernel")(restart_kernel)
|
|
517
|
+
app.command()(status)
|
|
518
|
+
app.command()(stop)
|
|
519
|
+
app.command(hidden=True)(keep_alive)
|