@opengeni/runtime 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/chunk-2PO56VAL.js +3478 -0
  2. package/dist/chunk-2PO56VAL.js.map +1 -0
  3. package/dist/index.d.ts +912 -0
  4. package/dist/index.js +3663 -0
  5. package/dist/index.js.map +1 -0
  6. package/dist/sandbox/index.d.ts +1738 -0
  7. package/dist/sandbox/index.js +187 -0
  8. package/dist/sandbox/index.js.map +1 -0
  9. package/package.json +49 -0
  10. package/src/bundled_hashicorp_terraform_skills/LICENSE +373 -0
  11. package/src/bundled_hashicorp_terraform_skills/README.md +18 -0
  12. package/src/bundled_hashicorp_terraform_skills/UPSTREAM_GIT_SHA +1 -0
  13. package/src/bundled_hashicorp_terraform_skills/azure-verified-modules/SKILL.md +613 -0
  14. package/src/bundled_hashicorp_terraform_skills/checkov/SKILL.md +43 -0
  15. package/src/bundled_hashicorp_terraform_skills/refactor-module/SKILL.md +538 -0
  16. package/src/bundled_hashicorp_terraform_skills/social-media-marketing/SKILL.md +35 -0
  17. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/SKILL.md +372 -0
  18. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/references/MANUAL-IMPORT.md +113 -0
  19. package/src/bundled_hashicorp_terraform_skills/terraform-search-import/scripts/list_resources.sh +38 -0
  20. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/SKILL.md +480 -0
  21. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/api-monitoring.md +543 -0
  22. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/component-blocks.md +476 -0
  23. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/deployment-blocks.md +391 -0
  24. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/examples.md +1529 -0
  25. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/linked-stacks.md +187 -0
  26. package/src/bundled_hashicorp_terraform_skills/terraform-stacks/references/troubleshooting.md +671 -0
  27. package/src/bundled_hashicorp_terraform_skills/terraform-style-guide/SKILL.md +353 -0
  28. package/src/bundled_hashicorp_terraform_skills/terraform-test/SKILL.md +451 -0
  29. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/CI_CD.md +80 -0
  30. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/EXAMPLES.md +314 -0
  31. package/src/bundled_hashicorp_terraform_skills/terraform-test/references/MOCK_PROVIDERS.md +171 -0
  32. package/src/codex-tool-search.ts +267 -0
  33. package/src/context-compaction.ts +538 -0
  34. package/src/history-sanitizer.ts +719 -0
  35. package/src/index.ts +3299 -0
  36. package/src/sandbox/capabilities.ts +69 -0
  37. package/src/sandbox/channel-a.ts +1031 -0
  38. package/src/sandbox/display-stack.ts +231 -0
  39. package/src/sandbox/errors.ts +34 -0
  40. package/src/sandbox/index.ts +832 -0
  41. package/src/sandbox/providers/blaxel.ts +35 -0
  42. package/src/sandbox/providers/cloudflare.ts +24 -0
  43. package/src/sandbox/providers/daytona.ts +34 -0
  44. package/src/sandbox/providers/docker.ts +17 -0
  45. package/src/sandbox/providers/e2b.ts +36 -0
  46. package/src/sandbox/providers/index.ts +107 -0
  47. package/src/sandbox/providers/local.ts +13 -0
  48. package/src/sandbox/providers/modal.ts +55 -0
  49. package/src/sandbox/providers/none.ts +13 -0
  50. package/src/sandbox/providers/runloop.ts +32 -0
  51. package/src/sandbox/providers/selfhosted.ts +96 -0
  52. package/src/sandbox/providers/types.ts +38 -0
  53. package/src/sandbox/providers/vercel.ts +29 -0
  54. package/src/sandbox/recording.ts +286 -0
  55. package/src/sandbox/routing/backend-resolver.ts +189 -0
  56. package/src/sandbox/routing/routing-session.ts +455 -0
  57. package/src/sandbox/select.ts +371 -0
  58. package/src/sandbox/selfhosted/capabilities.ts +255 -0
  59. package/src/sandbox/selfhosted/control-rpc.ts +351 -0
  60. package/src/sandbox/selfhosted/session.ts +930 -0
  61. package/src/sandbox/selfhosted/testing.ts +230 -0
  62. package/src/sandbox/stream-port.ts +185 -0
  63. package/src/sandbox/stream-token.ts +90 -0
  64. package/src/sandbox/terminal-server.ts +203 -0
  65. package/src/sandbox-computer.ts +835 -0
@@ -0,0 +1,351 @@
1
+ // The selfhosted CONTROL-PLANE transport seam (the M3/M4 layering boundary).
2
+ //
3
+ // `ControlRpc` is the ONE seam the `SelfhostedSession` depends on to reach a
4
+ // user's enrolled machine: request/reply addressed by the subject
5
+ // `agent.<workspaceId>.<agentId>.rpc`, payloads encoded/decoded via
6
+ // `@opengeni/agent-proto` (the single-source-of-truth wire types). The session
7
+ // knows NOTHING about NATS — it speaks only `ControlRpc`.
8
+ //
9
+ // M3 ships TWO implementors behind this interface:
10
+ // - `NatsControlRpc` — a thin wrapper over a NATS request/reply connection
11
+ // (the existing `@opengeni/events` bus connection). Constructed LAZILY: if
12
+ // NATS/the relay is not configured it surfaces `agent_offline` rather than
13
+ // throwing at construction, so boot never requires a live NATS.
14
+ // - `MockAgentResponder` — an in-process test double that answers
15
+ // exec/fs.read/fs.write/ping (and the rest of the op table) without any
16
+ // broker, so the session surface + the AgentError→reason mapping are unit-
17
+ // and integration-testable with no live NATS (that is M4).
18
+ //
19
+ // M4 will HARDEN/REPLACE `NatsControlRpc` (NATS Accounts, real request
20
+ // hardening, retries) behind this SAME `ControlRpc` interface — design for that,
21
+ // do not duplicate.
22
+
23
+ import {
24
+ AgentError,
25
+ ControlRequest,
26
+ ControlResponse,
27
+ ErrorCode,
28
+ } from "@opengeni/agent-proto";
29
+ import type { CapabilityUnavailableReason } from "@opengeni/contracts";
30
+
31
+ // Re-export the contracts reason union locally so callers of the mapping below
32
+ // don't have to import from two places. CapabilityUnavailableReason here is the
33
+ // agent-proto mirror of the contracts enum (same string values); the runtime
34
+ // negotiation maps to the contracts type, which is structurally identical.
35
+ export type SelfhostedUnavailableReason = Extract<
36
+ CapabilityUnavailableReason,
37
+ "agent_offline" | "agent_reconnecting" | "consent_required" | "display_unavailable"
38
+ >;
39
+
40
+ /**
41
+ * The selfhosted control-plane transport seam. ONE method: `request` — send a
42
+ * `ControlRequest` to the agent addressed by subject and await its
43
+ * `ControlResponse`. The subject is `subjectFor(workspaceId, agentId)`.
44
+ *
45
+ * The CONTRACT every implementor MUST honour (the M3 ruling): a
46
+ * no-responder / request-timeout is NOT an exception that means "not found" — it
47
+ * is surfaced as a `ControlResponse` carrying an `AgentError` with code
48
+ * `AGENT_OFFLINE` (no responder at all) or, when the caller can distinguish a
49
+ * transient blip, `TIMEOUT` (→ `agent_reconnecting`). The session maps these to
50
+ * the runtime error taxonomy; it NEVER lets agent-offline look like a provider
51
+ * NotFound (which would cold-create a rival box for a user's real machine).
52
+ */
53
+ export interface ControlRpc {
54
+ request(
55
+ subject: string,
56
+ req: ControlRequest,
57
+ opts: { timeoutMs: number },
58
+ ): Promise<ControlResponse>;
59
+ }
60
+
61
+ /** The control-plane RPC subject for an enrolled agent — its subscription IS the
62
+ * registry (the binding two-plane decision). */
63
+ export function subjectFor(workspaceId: string, agentId: string): string {
64
+ return `agent.${workspaceId}.${agentId}.rpc`;
65
+ }
66
+
67
+ // ── The runtime error taxonomy for a selfhosted control op ────────────────────
68
+
69
+ /**
70
+ * The runtime-level error a `SelfhostedSession` op throws when the agent returns
71
+ * an `AgentError` (or no responder / timeout maps to one). It carries:
72
+ * - `code` — the wire `ErrorCode` (single-source-of-truth);
73
+ * - `reason` — the negotiated `CapabilityUnavailableReason` the capability /
74
+ * liveness surface uses (`agent_offline` / `agent_reconnecting`
75
+ * / `consent_required`), or null for op-level errors
76
+ * (OS/NOT_FOUND/UNSUPPORTED/STREAM/PROTOCOL) that are not a
77
+ * machine-liveness condition;
78
+ * - `retryable`— whether the caller should re-resolve + retry (DRAINING /
79
+ * FENCED / a reconnecting blip);
80
+ * - `notFound` — ALWAYS the provider-NotFound discriminator value: for
81
+ * selfhosted this is true ONLY for an OS-level NOT_FOUND of a
82
+ * path/ref (a real "the file does not exist"), and is FALSE for
83
+ * AGENT_OFFLINE (the machine isn't recreatable — never let the
84
+ * lease cold-create a rival). `isProviderSandboxNotFoundError`
85
+ * reads this.
86
+ */
87
+ export class SelfhostedControlError extends Error {
88
+ readonly name = "SelfhostedControlError";
89
+ readonly code: ErrorCode;
90
+ readonly reason: SelfhostedUnavailableReason | null;
91
+ readonly retryable: boolean;
92
+ readonly fenced: boolean;
93
+ readonly draining: boolean;
94
+ readonly agentOffline: boolean;
95
+ readonly osNotFound: boolean;
96
+ readonly detail: Record<string, string>;
97
+
98
+ constructor(input: {
99
+ message: string;
100
+ code: ErrorCode;
101
+ reason: SelfhostedUnavailableReason | null;
102
+ retryable: boolean;
103
+ fenced?: boolean;
104
+ draining?: boolean;
105
+ agentOffline?: boolean;
106
+ osNotFound?: boolean;
107
+ detail?: Record<string, string>;
108
+ }) {
109
+ super(input.message);
110
+ this.code = input.code;
111
+ this.reason = input.reason;
112
+ this.retryable = input.retryable;
113
+ this.fenced = input.fenced ?? false;
114
+ this.draining = input.draining ?? false;
115
+ this.agentOffline = input.agentOffline ?? false;
116
+ this.osNotFound = input.osNotFound ?? false;
117
+ this.detail = input.detail ?? {};
118
+ }
119
+ }
120
+
121
+ /**
122
+ * Map an `AgentError` (from a `ControlResponse`) to the runtime
123
+ * `SelfhostedControlError`. THE load-bearing mapping (the M3 ruling):
124
+ * - AGENT_OFFLINE → reason `agent_offline`, agentOffline=true,
125
+ * osNotFound=FALSE (NEVER a provider NotFound).
126
+ * - TIMEOUT (a transient missed-window / no-responder blip the caller marked
127
+ * retryable) → reason `agent_reconnecting`.
128
+ * - CONSENT_REQUIRED → reason `consent_required`.
129
+ * - DRAINING → no capability reason; retryable (turn pauses + retries).
130
+ * - FENCED → no capability reason; retryable (the existing
131
+ * epoch-fence retry; the caller re-resolves + retries).
132
+ * - NOT_FOUND → an OS-level path/ref NotFound — osNotFound=true (a
133
+ * real "file does not exist"), no machine-liveness
134
+ * reason. (This is the ONLY NotFound; it is NOT the
135
+ * box-gone NotFound that licenses a cold restore.)
136
+ * - OS / UNSUPPORTED / STREAM / PROTOCOL / UNSPECIFIED → op-level error, no
137
+ * reason, non-retryable.
138
+ */
139
+ export function agentErrorToControlError(err: AgentError): SelfhostedControlError {
140
+ const message = err.message || `agent error (${err.code})`;
141
+ const detail = err.detail ?? {};
142
+ switch (err.code) {
143
+ case ErrorCode.ERROR_CODE_AGENT_OFFLINE:
144
+ return new SelfhostedControlError({
145
+ message: message || "the enrolled agent is offline",
146
+ code: err.code,
147
+ reason: "agent_offline",
148
+ retryable: false,
149
+ agentOffline: true,
150
+ detail,
151
+ });
152
+ case ErrorCode.ERROR_CODE_TIMEOUT:
153
+ // A timeout is a transient blip: the agent may be reconnecting. The turn
154
+ // pauses-with-timeout then retries against the re-resolved active sandbox.
155
+ return new SelfhostedControlError({
156
+ message: message || "the enrolled agent did not respond in time",
157
+ code: err.code,
158
+ reason: "agent_reconnecting",
159
+ retryable: true,
160
+ detail,
161
+ });
162
+ case ErrorCode.ERROR_CODE_CONSENT_REQUIRED:
163
+ return new SelfhostedControlError({
164
+ message: message || "the op requires consent that has not been granted",
165
+ code: err.code,
166
+ reason: "consent_required",
167
+ retryable: false,
168
+ detail,
169
+ });
170
+ case ErrorCode.ERROR_CODE_DRAINING:
171
+ return new SelfhostedControlError({
172
+ message: message || "the agent is draining and cannot accept new work",
173
+ code: err.code,
174
+ reason: null,
175
+ retryable: true,
176
+ draining: true,
177
+ detail,
178
+ });
179
+ case ErrorCode.ERROR_CODE_FENCED:
180
+ return new SelfhostedControlError({
181
+ message: message || "a stale op was fenced by the epoch guard; re-resolve and retry",
182
+ code: err.code,
183
+ reason: null,
184
+ retryable: true,
185
+ fenced: true,
186
+ detail,
187
+ });
188
+ case ErrorCode.ERROR_CODE_NOT_FOUND:
189
+ // An OS-level path/ref NotFound — a real "the file/ref does not exist". This
190
+ // is NOT the box-gone NotFound (selfhosted has no box-gone — the machine is
191
+ // not recreatable). osNotFound is surfaced so a fs-layer caller can 404 the
192
+ // path, but isProviderSandboxNotFoundError stays FALSE for selfhosted (see
193
+ // session.ts) — a missing file must never license a cold re-create.
194
+ return new SelfhostedControlError({
195
+ message: message || "the referenced path or ref does not exist",
196
+ code: err.code,
197
+ reason: null,
198
+ retryable: Boolean(err.retryable),
199
+ osNotFound: true,
200
+ detail,
201
+ });
202
+ default:
203
+ // OS / UNSUPPORTED / STREAM / PROTOCOL / UNSPECIFIED — an op-level failure.
204
+ return new SelfhostedControlError({
205
+ message,
206
+ code: err.code,
207
+ reason: null,
208
+ retryable: Boolean(err.retryable),
209
+ detail,
210
+ });
211
+ }
212
+ }
213
+
214
+ /** Build a synthesized AGENT_OFFLINE `AgentError` — the control plane uses this
215
+ * when no agent responds on the subject at all. */
216
+ export function offlineAgentError(message = "no agent responded (offline)"): AgentError {
217
+ return {
218
+ code: ErrorCode.ERROR_CODE_AGENT_OFFLINE,
219
+ message,
220
+ retryable: false,
221
+ detail: {},
222
+ };
223
+ }
224
+
225
+ /** Build a synthesized TIMEOUT `AgentError` — the control plane uses this when a
226
+ * responder existed but the request timed out (a transient blip → reconnecting). */
227
+ export function timeoutAgentError(message = "the agent did not respond in time"): AgentError {
228
+ return {
229
+ code: ErrorCode.ERROR_CODE_TIMEOUT,
230
+ message,
231
+ retryable: true,
232
+ detail: {},
233
+ };
234
+ }
235
+
236
+ // ── NatsControlRpc — the thin request/reply wrapper (M4 hardens this) ─────────
237
+
238
+ /**
239
+ * The minimal NATS request/reply surface `NatsControlRpc` needs. It mirrors the
240
+ * `nats` `NatsConnection.request` signature WITHOUT importing `nats` into the
241
+ * agent-loop-free runtime leaf: the API/worker injects the live connection (the
242
+ * SAME `@opengeni/events` bus connection). A factory may return `null` when NATS
243
+ * is not configured (boot must not require a live NATS) — `NatsControlRpc` then
244
+ * surfaces `agent_offline` for every request rather than throwing.
245
+ */
246
+ export interface NatsRequestConnection {
247
+ request(
248
+ subject: string,
249
+ payload: Uint8Array,
250
+ opts: { timeout: number },
251
+ ): Promise<{ data: Uint8Array }>;
252
+ }
253
+
254
+ /** A NATS error whose `code` marks "no responder on the subject" (NATS 503). The
255
+ * selfhosted control plane reads this as `agent_offline`, NEVER a NotFound. */
256
+ const NATS_NO_RESPONDERS_CODE = "503";
257
+
258
+ function isNoRespondersError(err: unknown): boolean {
259
+ const code = (err as { code?: unknown })?.code;
260
+ if (typeof code === "string" && code === NATS_NO_RESPONDERS_CODE) {
261
+ return true;
262
+ }
263
+ const message = err instanceof Error ? err.message : String(err);
264
+ return /no responders|503/i.test(message);
265
+ }
266
+
267
+ function isRequestTimeoutError(err: unknown): boolean {
268
+ const code = (err as { code?: unknown })?.code;
269
+ // nats.js uses "TIMEOUT" for a request timeout.
270
+ if (typeof code === "string" && /timeout/i.test(code)) {
271
+ return true;
272
+ }
273
+ const message = err instanceof Error ? err.message : String(err);
274
+ return /timeout|timed out/i.test(message);
275
+ }
276
+
277
+ /**
278
+ * A thin `ControlRpc` over a NATS request/reply connection. Constructed with a
279
+ * LAZY factory: the connection is resolved on first `request` (so boot never
280
+ * requires a live NATS). A null factory result, a no-responder error, or a
281
+ * request timeout each yield a `ControlResponse` carrying a synthesized
282
+ * `AgentError` (AGENT_OFFLINE / TIMEOUT) — NEVER a thrown transport error and
283
+ * NEVER a NotFound.
284
+ *
285
+ * The factory is async and memoized; it may itself dial the bus. M4 replaces the
286
+ * factory's body with the Accounts-scoped, hardened connection — this class's
287
+ * shape does not change.
288
+ */
289
+ export class NatsControlRpc implements ControlRpc {
290
+ private readonly connect: () => Promise<NatsRequestConnection | null>;
291
+ private connection: NatsRequestConnection | null | undefined;
292
+
293
+ constructor(connect: () => Promise<NatsRequestConnection | null>) {
294
+ this.connect = connect;
295
+ }
296
+
297
+ private async resolveConnection(): Promise<NatsRequestConnection | null> {
298
+ if (this.connection === undefined) {
299
+ try {
300
+ this.connection = await this.connect();
301
+ } catch {
302
+ // A connect failure is an OFFLINE condition, not a crash. Cache null so a
303
+ // later request retries the factory (set undefined to allow retry).
304
+ this.connection = null;
305
+ return null;
306
+ }
307
+ }
308
+ return this.connection ?? null;
309
+ }
310
+
311
+ async request(
312
+ subject: string,
313
+ req: ControlRequest,
314
+ opts: { timeoutMs: number },
315
+ ): Promise<ControlResponse> {
316
+ const conn = await this.resolveConnection();
317
+ if (!conn) {
318
+ // No NATS configured / not reachable → the agent is unaddressable → offline.
319
+ return offlineControlResponse(req.requestId);
320
+ }
321
+ const payload = ControlRequest.encode(req).finish();
322
+ try {
323
+ const reply = await conn.request(subject, payload, { timeout: opts.timeoutMs });
324
+ return ControlResponse.decode(reply.data);
325
+ } catch (err) {
326
+ // Re-allow a future request to re-dial if the cached conn was torn down.
327
+ if (isNoRespondersError(err)) {
328
+ // No subscriber on the subject at all → the machine is offline.
329
+ return offlineControlResponse(req.requestId);
330
+ }
331
+ if (isRequestTimeoutError(err)) {
332
+ // A responder may exist but the request timed out → a transient blip.
333
+ return timeoutControlResponse(req.requestId);
334
+ }
335
+ // Any other transport error → treat as offline (never a NotFound). The op
336
+ // surfaces agent_offline and the lease never cold-creates a rival.
337
+ this.connection = undefined; // force a re-dial next time
338
+ return offlineControlResponse(req.requestId);
339
+ }
340
+ }
341
+ }
342
+
343
+ /** A `ControlResponse` carrying a synthesized AGENT_OFFLINE error. */
344
+ export function offlineControlResponse(requestId: string): ControlResponse {
345
+ return { requestId, error: offlineAgentError(), result: undefined };
346
+ }
347
+
348
+ /** A `ControlResponse` carrying a synthesized TIMEOUT error (→ reconnecting). */
349
+ export function timeoutControlResponse(requestId: string): ControlResponse {
350
+ return { requestId, error: timeoutAgentError(), result: undefined };
351
+ }