colabctl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. colabctl/__init__.py +71 -0
  2. colabctl/auth/__init__.py +21 -0
  3. colabctl/auth/adc.py +80 -0
  4. colabctl/auth/base.py +63 -0
  5. colabctl/backends/__init__.py +42 -0
  6. colabctl/backends/base.py +147 -0
  7. colabctl/backends/colab.py +166 -0
  8. colabctl/backends/factory.py +65 -0
  9. colabctl/backends/hf_backend.py +197 -0
  10. colabctl/backends/kaggle_backend.py +216 -0
  11. colabctl/backends/modal_backend.py +201 -0
  12. colabctl/backends/router.py +76 -0
  13. colabctl/backends/vertex_backend.py +237 -0
  14. colabctl/cli.py +327 -0
  15. colabctl/drive.py +218 -0
  16. colabctl/errors.py +147 -0
  17. colabctl/lifecycle.py +194 -0
  18. colabctl/mcp_server.py +209 -0
  19. colabctl/models.py +233 -0
  20. colabctl/observability.py +107 -0
  21. colabctl/sdk/__init__.py +8 -0
  22. colabctl/sdk/client.py +202 -0
  23. colabctl/sdk/remote.py +178 -0
  24. colabctl/secrets/__init__.py +37 -0
  25. colabctl/secrets/base.py +56 -0
  26. colabctl/secrets/encrypted_file.py +122 -0
  27. colabctl/secrets/keyring_store.py +107 -0
  28. colabctl/secrets/memory.py +24 -0
  29. colabctl/transport/__init__.py +18 -0
  30. colabctl/transport/base.py +122 -0
  31. colabctl/transport/browser/__init__.py +19 -0
  32. colabctl/transport/browser/bridge.py +260 -0
  33. colabctl/transport/cli/__init__.py +15 -0
  34. colabctl/transport/cli/adapter.py +252 -0
  35. colabctl/transport/cli/parser.py +192 -0
  36. colabctl/transport/native/__init__.py +49 -0
  37. colabctl/transport/native/adapter.py +260 -0
  38. colabctl/transport/native/client.py +371 -0
  39. colabctl/transport/native/kernel.py +259 -0
  40. colabctl-0.1.0.dist-info/METADATA +200 -0
  41. colabctl-0.1.0.dist-info/RECORD +44 -0
  42. colabctl-0.1.0.dist-info/WHEEL +4 -0
  43. colabctl-0.1.0.dist-info/entry_points.txt +3 -0
  44. colabctl-0.1.0.dist-info/licenses/LICENSE +201 -0
colabctl/__init__.py ADDED
@@ -0,0 +1,71 @@
1
+ """colabctl — programmatic control of Google Colab for developers and AI agents.
2
+
3
+ Public API is intentionally small and stable; everything else is an
4
+ implementation detail behind the transport/provider abstractions.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from colabctl.drive import DriveSync, drive_checkpoint_hooks
10
+ from colabctl.errors import (
11
+ AcceleratorUnavailableError,
12
+ AllocationError,
13
+ AuthError,
14
+ ColabctlError,
15
+ ConfigurationError,
16
+ ExecutionError,
17
+ FileTransferError,
18
+ KeepAliveError,
19
+ KernelError,
20
+ QuotaExceededError,
21
+ SecretStoreError,
22
+ TooManyAssignmentsError,
23
+ TransportError,
24
+ )
25
+ from colabctl.lifecycle import RuntimeLifecycleManager
26
+ from colabctl.models import (
27
+ Accelerator,
28
+ Assignment,
29
+ ExecutionResult,
30
+ MachineShape,
31
+ RuntimeProxyInfo,
32
+ RuntimeSpec,
33
+ SessionInfo,
34
+ SessionStatus,
35
+ Variant,
36
+ )
37
+ from colabctl.sdk import ColabClient, ColabSession, remote
38
+
39
+ __version__ = "0.1.0"
40
+
41
+ __all__ = [
42
+ "Accelerator",
43
+ "AcceleratorUnavailableError",
44
+ "AllocationError",
45
+ "Assignment",
46
+ "AuthError",
47
+ "ColabClient",
48
+ "ColabSession",
49
+ "ColabctlError",
50
+ "ConfigurationError",
51
+ "DriveSync",
52
+ "ExecutionError",
53
+ "ExecutionResult",
54
+ "FileTransferError",
55
+ "KeepAliveError",
56
+ "KernelError",
57
+ "MachineShape",
58
+ "QuotaExceededError",
59
+ "RuntimeLifecycleManager",
60
+ "RuntimeProxyInfo",
61
+ "RuntimeSpec",
62
+ "SecretStoreError",
63
+ "SessionInfo",
64
+ "SessionStatus",
65
+ "TooManyAssignmentsError",
66
+ "TransportError",
67
+ "Variant",
68
+ "__version__",
69
+ "drive_checkpoint_hooks",
70
+ "remote",
71
+ ]
@@ -0,0 +1,21 @@
1
+ """Authentication providers for the Colab backend."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from colabctl.auth.adc import ADCAuthProvider
6
+ from colabctl.auth.base import (
7
+ ADC_LOGIN_SCOPES,
8
+ COLAB_SCOPES,
9
+ AuthProvider,
10
+ StaticTokenProvider,
11
+ TokenCallable,
12
+ )
13
+
14
+ __all__ = [
15
+ "ADC_LOGIN_SCOPES",
16
+ "COLAB_SCOPES",
17
+ "ADCAuthProvider",
18
+ "AuthProvider",
19
+ "StaticTokenProvider",
20
+ "TokenCallable",
21
+ ]
colabctl/auth/adc.py ADDED
@@ -0,0 +1,80 @@
1
+ """ADC auth provider — the Phase 0-verified working path.
2
+
3
+ Uses Application Default Credentials (``gcloud auth application-default login
4
+ --scopes=…colaboratory``). ``google.auth`` is sync, so refreshes run in a thread;
5
+ a lock serializes concurrent refreshes. ``google-auth`` is imported lazily.
6
+
7
+ Setup (once, by the user):
8
+
9
+ gcloud auth application-default login \\
10
+ --scopes=openid,https://www.googleapis.com/auth/cloud-platform,\\
11
+ https://www.googleapis.com/auth/userinfo.email,\\
12
+ https://www.googleapis.com/auth/colaboratory,\\
13
+ https://www.googleapis.com/auth/drive.file
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import warnings
20
+ from typing import Any
21
+
22
+ from colabctl.auth.base import COLAB_SCOPES, AuthProvider
23
+ from colabctl.errors import AuthError
24
+
25
+
26
+ class ADCAuthProvider(AuthProvider):
27
+ """Bearer tokens from Application Default Credentials with the Colab scopes."""
28
+
29
+ def __init__(self, *, scopes: tuple[str, ...] = COLAB_SCOPES) -> None:
30
+ self._scopes = list(scopes)
31
+ self._creds: Any | None = None
32
+ self._lock = asyncio.Lock()
33
+
34
+ async def token(self) -> str:
35
+ async with self._lock:
36
+ return await asyncio.to_thread(self._sync_token)
37
+
38
+ async def email(self) -> str | None:
39
+ async with self._lock:
40
+ creds = self._creds
41
+ # ADC user creds expose the signer email for service accounts; user creds
42
+ # usually don't, so this is best-effort.
43
+ return getattr(creds, "service_account_email", None) if creds else None
44
+
45
+ def _sync_token(self) -> str:
46
+ try:
47
+ import google.auth
48
+ from google.auth.transport.requests import Request
49
+ except ImportError as exc: # pragma: no cover - only without the extra
50
+ raise AuthError(
51
+ "google-auth is not installed. Install with `pip install 'colabctl[native]'`."
52
+ ) from exc
53
+
54
+ # Typed Any: google-auth credential subclasses differ structurally
55
+ # (only scopable creds have with_scopes; refresh is untyped upstream).
56
+ creds: Any = self._creds
57
+ if creds is None:
58
+ # ADC user creds emit a noisy "no quota project" UserWarning on every
59
+ # call; it's irrelevant here (Colab calls pin their own project), so
60
+ # suppress just that one message.
61
+ with warnings.catch_warnings():
62
+ warnings.filterwarnings(
63
+ "ignore",
64
+ message=r"Your application has authenticated using end user credentials.*",
65
+ category=UserWarning,
66
+ )
67
+ creds, _ = google.auth.default(scopes=self._scopes)
68
+ # User creds ignore scopes= in default(); re-apply when supported.
69
+ if getattr(creds, "requires_scopes", False):
70
+ creds = creds.with_scopes(self._scopes)
71
+ if not creds.valid:
72
+ try:
73
+ creds.refresh(Request())
74
+ except Exception as exc:
75
+ raise AuthError(f"Failed to refresh ADC credentials: {exc}") from exc
76
+ self._creds = creds
77
+ token = getattr(creds, "token", None)
78
+ if not token:
79
+ raise AuthError("ADC credentials produced no access token.")
80
+ return str(token)
colabctl/auth/base.py ADDED
@@ -0,0 +1,63 @@
1
+ """Authentication contract.
2
+
3
+ An :class:`AuthProvider` yields a fresh OAuth bearer token (with the Colab scopes)
4
+ on demand. Transports depend only on this; concrete providers (ADC, OAuth2-loopback,
5
+ static) are swappable. The Colab scope set is the one verified in Phase 0 — note
6
+ that ``colaboratory`` is not third-party-grantable, so ADC (gcloud's client) is the
7
+ working path; ``cloud-platform`` is additionally required by gcloud itself.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import abc
13
+ from collections.abc import Awaitable, Callable
14
+
15
+ #: OAuth scopes the Colab backend requires (verified from CLI ``PUBLIC_SCOPES``).
16
+ COLAB_SCOPES: tuple[str, ...] = (
17
+ "openid",
18
+ "https://www.googleapis.com/auth/userinfo.profile",
19
+ "https://www.googleapis.com/auth/userinfo.email",
20
+ "https://www.googleapis.com/auth/colaboratory",
21
+ "https://www.googleapis.com/auth/drive.file",
22
+ )
23
+
24
+ #: ADC via ``gcloud`` additionally requires these (gcloud refuses otherwise).
25
+ ADC_LOGIN_SCOPES: tuple[str, ...] = (
26
+ "openid",
27
+ "https://www.googleapis.com/auth/cloud-platform",
28
+ "https://www.googleapis.com/auth/userinfo.email",
29
+ "https://www.googleapis.com/auth/colaboratory",
30
+ "https://www.googleapis.com/auth/drive.file",
31
+ )
32
+
33
+ TokenCallable = Callable[[], Awaitable[str]]
34
+
35
+
36
+ class AuthProvider(abc.ABC):
37
+ """Yields fresh bearer tokens for the Colab backend."""
38
+
39
+ @abc.abstractmethod
40
+ async def token(self) -> str:
41
+ """Return a currently-valid bearer token, refreshing if needed."""
42
+
43
+ async def email(self) -> str | None:
44
+ """Return the authenticated account email if known (best-effort)."""
45
+ return None
46
+
47
+ def as_token_callable(self) -> TokenCallable:
48
+ """Adapt to the ``TokenProvider`` callable the native client expects."""
49
+ return self.token
50
+
51
+
52
+ class StaticTokenProvider(AuthProvider):
53
+ """An :class:`AuthProvider` wrapping a fixed token — for tests/injection."""
54
+
55
+ def __init__(self, token: str, *, email: str | None = None) -> None:
56
+ self._token = token
57
+ self._email = email
58
+
59
+ async def token(self) -> str:
60
+ return self._token
61
+
62
+ async def email(self) -> str | None:
63
+ return self._email
@@ -0,0 +1,42 @@
1
+ """Provider abstraction: pluggable batch backends + capability-based routing.
2
+
3
+ - :class:`Backend` — the submit/status/logs/result/cancel contract.
4
+ - :class:`ColabBackend` — Colab via an interactive transport (sanctioned default).
5
+ - :class:`ModalBackend` — gVisor GPU sandboxes (best for agent code).
6
+ - :class:`VertexBackend` — sanctioned, headless, deadline-bound GPU jobs.
7
+ - :class:`BackendRouter` — selects a backend by capability and fails over on infra errors.
8
+
9
+ HF Jobs / Kaggle / IaaS are registered-but-deferred (Phase 4).
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from colabctl.backends.base import (
15
+ Backend,
16
+ BackendCapabilities,
17
+ JobInfo,
18
+ JobResult,
19
+ JobSpec,
20
+ JobState,
21
+ )
22
+ from colabctl.backends.colab import ColabBackend
23
+ from colabctl.backends.hf_backend import HFJobsBackend
24
+ from colabctl.backends.kaggle_backend import KaggleBackend
25
+ from colabctl.backends.modal_backend import ModalBackend
26
+ from colabctl.backends.router import BackendRouter
27
+ from colabctl.backends.vertex_backend import VertexBackend
28
+
29
+ __all__ = [
30
+ "Backend",
31
+ "BackendCapabilities",
32
+ "BackendRouter",
33
+ "ColabBackend",
34
+ "HFJobsBackend",
35
+ "JobInfo",
36
+ "JobResult",
37
+ "JobSpec",
38
+ "JobState",
39
+ "KaggleBackend",
40
+ "ModalBackend",
41
+ "VertexBackend",
42
+ ]
@@ -0,0 +1,147 @@
1
+ """Provider abstraction: the batch-`Backend` contract.
2
+
3
+ Two complementary abstractions exist in colabctl:
4
+
5
+ - :class:`~colabctl.transport.base.TransportAdapter` — *interactive* runtimes
6
+ (allocate a warm GPU, run cells on a live kernel). Colab's native shape.
7
+ - :class:`Backend` (this module) — *batch jobs* (submit code → poll → fetch result →
8
+ cancel). The natural shape for Modal, Vertex, HF Jobs, etc.
9
+
10
+ The :class:`~colabctl.backends.router.BackendRouter` selects a backend by capability
11
+ and fails over between them, so a Colab outage/quota/ban degrades to Modal or Vertex
12
+ instead of failing. Colab is also exposed as a batch backend
13
+ (:class:`~colabctl.backends.colab.ColabBackend`) so callers can use one job API
14
+ across every provider.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import abc
20
+ import enum
21
+
22
+ from pydantic import BaseModel, model_validator
23
+
24
+ from colabctl.models import Accelerator
25
+
26
+
27
+ class JobState(enum.StrEnum):
28
+ PENDING = "PENDING"
29
+ RUNNING = "RUNNING"
30
+ SUCCEEDED = "SUCCEEDED"
31
+ FAILED = "FAILED"
32
+ CANCELLED = "CANCELLED"
33
+ UNKNOWN = "UNKNOWN"
34
+
35
+ @property
36
+ def is_terminal(self) -> bool:
37
+ return self in {JobState.SUCCEEDED, JobState.FAILED, JobState.CANCELLED}
38
+
39
+
40
+ class JobSpec(BaseModel):
41
+ """What to run on a backend. Provide exactly one of ``code`` or ``script_path``."""
42
+
43
+ code: str | None = None
44
+ script_path: str | None = None
45
+ requirements: list[str] = []
46
+ accelerator: Accelerator = Accelerator.T4
47
+ env: dict[str, str] = {}
48
+ timeout: int | None = None
49
+ name: str | None = None
50
+
51
+ @model_validator(mode="after")
52
+ def _exactly_one_source(self) -> JobSpec:
53
+ if bool(self.code) == bool(self.script_path):
54
+ raise ValueError("Provide exactly one of `code` or `script_path`.")
55
+ return self
56
+
57
+ def resolved_code(self) -> str:
58
+ """Return the code to run (reads ``script_path`` if that's what was given)."""
59
+ if self.code is not None:
60
+ return self.code
61
+ from pathlib import Path
62
+
63
+ return Path(self.script_path or "").read_text()
64
+
65
+
66
+ class JobInfo(BaseModel):
67
+ """A lightweight view of a submitted job."""
68
+
69
+ id: str
70
+ backend: str
71
+ state: JobState
72
+ accelerator: Accelerator = Accelerator.NONE
73
+ detail: str | None = None
74
+
75
+
76
+ class JobResult(BaseModel):
77
+ """The outcome of a job."""
78
+
79
+ id: str
80
+ backend: str
81
+ state: JobState
82
+ exit_code: int | None = None
83
+ stdout: str = ""
84
+ stderr: str = ""
85
+ error: str | None = None
86
+
87
+ @property
88
+ def ok(self) -> bool:
89
+ return self.state is JobState.SUCCEEDED
90
+
91
+
92
+ class BackendCapabilities(BaseModel):
93
+ """What a backend can do — used for routing and honest disclosure."""
94
+
95
+ name: str
96
+ accelerators: list[str] = [] # supported accelerator values; empty = any/unknown
97
+ interactive: bool = False
98
+ streaming_logs: bool = False
99
+ persistent: bool = False # runtime survives between calls
100
+ max_runtime_seconds: int | None = None
101
+ requires_account: bool = True
102
+ tos_posture: str = "sanctioned" # "sanctioned" | "gray-area" | "prohibited"
103
+ notes: list[str] = []
104
+
105
+ def supports(self, accelerator: Accelerator) -> bool:
106
+ if not self.accelerators:
107
+ return True
108
+ return accelerator.value in self.accelerators or accelerator is Accelerator.NONE
109
+
110
+
111
+ class Backend(abc.ABC):
112
+ """A pluggable batch-execution backend (Colab, Modal, Vertex, ...)."""
113
+
114
+ name: str = "backend"
115
+
116
+ @property
117
+ @abc.abstractmethod
118
+ def capabilities(self) -> BackendCapabilities: ...
119
+
120
+ @abc.abstractmethod
121
+ async def submit(self, spec: JobSpec) -> JobInfo:
122
+ """Start a job and return immediately with its handle."""
123
+
124
+ @abc.abstractmethod
125
+ async def status(self, job_id: str) -> JobInfo:
126
+ """Return the job's current state."""
127
+
128
+ @abc.abstractmethod
129
+ async def logs(self, job_id: str) -> str:
130
+ """Return the job's logs so far (best-effort)."""
131
+
132
+ @abc.abstractmethod
133
+ async def result(self, job_id: str) -> JobResult:
134
+ """Wait for the job to finish and return its result."""
135
+
136
+ @abc.abstractmethod
137
+ async def cancel(self, job_id: str) -> None:
138
+ """Cancel a running job."""
139
+
140
+ async def run(self, spec: JobSpec) -> JobResult:
141
+ """Convenience: submit and wait for the result."""
142
+ info = await self.submit(spec)
143
+ return await self.result(info.id)
144
+
145
+ async def aclose(self) -> None:
146
+ """Release backend-level resources (default no-op; override if needed)."""
147
+ return None
@@ -0,0 +1,166 @@
1
+ """Colab as a batch :class:`Backend` (wraps a TransportAdapter).
2
+
3
+ Presents Colab's interactive transport through the unified job API: ``submit``
4
+ launches an in-process asyncio task that allocates a runtime, optionally pip-installs
5
+ requirements, runs the code, captures output, and releases the runtime. State/logs/
6
+ result read the in-memory job record.
7
+
8
+ Limitation: job tracking is in-process (if the host process dies, the record is lost).
9
+ Cross-process durability is the runtime-lifecycle manager's concern (checkpoint to
10
+ Drive/GCS); for the interactive, warm-GPU workflow use the SDK's ``ColabSession`` directly.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import asyncio
16
+ import contextlib
17
+ import uuid
18
+ from dataclasses import dataclass, field
19
+
20
+ from colabctl.backends.base import (
21
+ Backend,
22
+ BackendCapabilities,
23
+ JobInfo,
24
+ JobResult,
25
+ JobSpec,
26
+ JobState,
27
+ )
28
+ from colabctl.errors import ColabctlError
29
+ from colabctl.models import RuntimeSpec
30
+ from colabctl.transport.base import TransportAdapter
31
+
32
+
33
+ def _install_code(requirements: list[str]) -> str:
34
+ pkgs = ", ".join(repr(r) for r in requirements)
35
+ return (
36
+ "import subprocess, sys\n"
37
+ f"subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', {pkgs}], check=True)\n"
38
+ )
39
+
40
+
41
+ @dataclass
42
+ class _Job:
43
+ info: JobInfo
44
+ spec: JobSpec
45
+ task: asyncio.Task[None] | None = None
46
+ session: str | None = None
47
+ stdout: str = ""
48
+ stderr: str = ""
49
+ exit_code: int | None = None
50
+ error: str | None = None
51
+ logbuf: list[str] = field(default_factory=list)
52
+
53
+
54
+ class ColabBackend(Backend):
55
+ """Run batch jobs on Colab via an interactive transport."""
56
+
57
+ name = "colab"
58
+
59
+ def __init__(self, transport: TransportAdapter) -> None:
60
+ self._transport = transport
61
+ self._jobs: dict[str, _Job] = {}
62
+
63
+ @property
64
+ def capabilities(self) -> BackendCapabilities:
65
+ caps = self._transport.capabilities
66
+ return BackendCapabilities(
67
+ name=self.name,
68
+ accelerators=["T4", "L4", "G4", "A100", "H100"],
69
+ interactive=caps.interactive,
70
+ streaming_logs=False,
71
+ persistent=True,
72
+ requires_account=True,
73
+ tos_posture="sanctioned" if self._transport.name == "cli" else "gray-area",
74
+ notes=[f"via the {self._transport.name!r} transport", *caps.caveats],
75
+ )
76
+
77
+ async def submit(self, spec: JobSpec) -> JobInfo:
78
+ job_id = f"colab-{uuid.uuid4().hex[:10]}"
79
+ info = JobInfo(
80
+ id=job_id, backend=self.name, state=JobState.PENDING, accelerator=spec.accelerator
81
+ )
82
+ job = _Job(info=info, spec=spec)
83
+ self._jobs[job_id] = job
84
+ job.task = asyncio.create_task(self._execute(job))
85
+ return info
86
+
87
+ async def status(self, job_id: str) -> JobInfo:
88
+ return self._require(job_id).info
89
+
90
+ async def logs(self, job_id: str) -> str:
91
+ return "".join(self._require(job_id).logbuf)
92
+
93
+ async def result(self, job_id: str) -> JobResult:
94
+ job = self._require(job_id)
95
+ if job.task is not None:
96
+ with contextlib.suppress(asyncio.CancelledError):
97
+ await job.task
98
+ return JobResult(
99
+ id=job_id,
100
+ backend=self.name,
101
+ state=job.info.state,
102
+ exit_code=job.exit_code,
103
+ stdout=job.stdout,
104
+ stderr=job.stderr,
105
+ error=job.error,
106
+ )
107
+
108
+ async def cancel(self, job_id: str) -> None:
109
+ job = self._require(job_id)
110
+ if job.task is not None and not job.task.done():
111
+ job.task.cancel()
112
+ job.info.state = JobState.CANCELLED
113
+
114
+ async def aclose(self) -> None:
115
+ for job in list(self._jobs.values()):
116
+ if job.task is not None and not job.task.done():
117
+ job.task.cancel()
118
+ await self._transport.aclose()
119
+
120
+ # -- internals ----------------------------------------------------------
121
+
122
+ def _require(self, job_id: str) -> _Job:
123
+ job = self._jobs.get(job_id)
124
+ if job is None:
125
+ raise ColabctlError(f"No such job: {job_id!r}")
126
+ return job
127
+
128
+ async def _execute(self, job: _Job) -> None:
129
+ job.info.state = JobState.RUNNING
130
+ try:
131
+ session = await self._transport.allocate(
132
+ RuntimeSpec(accelerator=job.spec.accelerator, name=job.spec.name)
133
+ )
134
+ job.session = session.name
135
+ if job.spec.requirements:
136
+ install = await self._transport.execute(
137
+ session.name, _install_code(job.spec.requirements), timeout=job.spec.timeout
138
+ )
139
+ if not install.ok:
140
+ job.error = "pip install failed: " + (install.stderr or install.text)[:400]
141
+ job.info.state = JobState.FAILED
142
+ return
143
+ result = await self._transport.execute(
144
+ session.name, job.spec.resolved_code(), timeout=job.spec.timeout
145
+ )
146
+ job.stdout = result.text
147
+ job.stderr = result.stderr
148
+ job.logbuf.append(result.text)
149
+ if result.ok:
150
+ job.info.state = JobState.SUCCEEDED
151
+ job.exit_code = 0
152
+ else:
153
+ job.info.state = JobState.FAILED
154
+ job.exit_code = 1
155
+ if result.error is not None:
156
+ job.error = f"{result.error.ename}: {result.error.evalue}"
157
+ except asyncio.CancelledError:
158
+ job.info.state = JobState.CANCELLED
159
+ raise
160
+ except ColabctlError as exc:
161
+ job.info.state = JobState.FAILED
162
+ job.error = str(exc)
163
+ finally:
164
+ if job.session is not None:
165
+ with contextlib.suppress(ColabctlError):
166
+ await self._transport.stop(job.session)
@@ -0,0 +1,65 @@
1
+ """Backend construction by name — the seam the CLI and MCP server build on.
2
+
3
+ Keeps backend wiring in one place so ``colabctl job run --backend modal`` and the
4
+ MCP ``run_job`` tool construct backends identically. The Colab backend is built over
5
+ the chosen transport (cli/native); Modal/Vertex read their own env/config.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from colabctl.backends.base import Backend
11
+ from colabctl.backends.modal_backend import ModalBackend
12
+ from colabctl.backends.router import BackendRouter
13
+ from colabctl.backends.vertex_backend import VertexBackend
14
+ from colabctl.errors import ConfigurationError
15
+
16
+ #: Backends available for selection.
17
+ BACKEND_NAMES: tuple[str, ...] = ("colab", "modal", "vertex", "hf", "kaggle")
18
+
19
+
20
+ def build_backend(
21
+ name: str,
22
+ *,
23
+ transport_name: str = "cli",
24
+ auth_mode: str = "adc",
25
+ colab_bin: str = "colab",
26
+ ) -> Backend:
27
+ """Construct a backend by name. Colab uses the chosen transport."""
28
+ key = name.lower()
29
+ if key == "colab":
30
+ from colabctl.backends.colab import ColabBackend
31
+ from colabctl.sdk.client import ColabClient
32
+
33
+ client = ColabClient(
34
+ transport_name=transport_name, auth_mode=auth_mode, colab_bin=colab_bin
35
+ )
36
+ return ColabBackend(client.transport)
37
+ if key == "modal":
38
+ return ModalBackend()
39
+ if key == "vertex":
40
+ return VertexBackend()
41
+ if key == "hf":
42
+ from colabctl.backends.hf_backend import HFJobsBackend
43
+
44
+ return HFJobsBackend()
45
+ if key == "kaggle":
46
+ from colabctl.backends.kaggle_backend import KaggleBackend
47
+
48
+ return KaggleBackend()
49
+ raise ConfigurationError(f"Unknown backend {name!r}. Choose from: {', '.join(BACKEND_NAMES)}.")
50
+
51
+
52
+ def build_router(
53
+ names: list[str] | None = None,
54
+ *,
55
+ transport_name: str = "cli",
56
+ auth_mode: str = "adc",
57
+ colab_bin: str = "colab",
58
+ ) -> BackendRouter:
59
+ """Build a router over the named backends (default: all), in failover order."""
60
+ selected = names or list(BACKEND_NAMES)
61
+ backends = [
62
+ build_backend(n, transport_name=transport_name, auth_mode=auth_mode, colab_bin=colab_bin)
63
+ for n in selected
64
+ ]
65
+ return BackendRouter(backends, order=selected)