@shogo-ai/worker 1.7.4 → 1.7.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,11 +33,14 @@
33
33
  */
34
34
  import { type ChildProcess, spawn } from 'node:child_process';
35
35
  import { createHmac, randomBytes } from 'node:crypto';
36
- import { existsSync, mkdirSync } from 'node:fs';
36
+ import { existsSync, mkdirSync, readdirSync } from 'node:fs';
37
37
  import { tmpdir } from 'node:os';
38
- import { join } from 'node:path';
38
+ import { dirname, join } from 'node:path';
39
39
  import { resolveRuntime, type ResolvedRuntime } from './runtime-resolver.ts';
40
- import type { RuntimeResolver } from './tunnel.ts';
40
+ import type { ResolveRejection, RuntimeResolver } from './tunnel.ts';
41
+ import { CloudFileTransport } from '@shogo-ai/sdk';
42
+ import { CloudSyncWatcher } from './cloud-sync-watcher.ts';
43
+ import { cloneProject, gitIsAvailable, isGitRepo } from './git-cloner.ts';
41
44
 
42
45
  /** Port range for random allocation (mirrors apps/api desktop manager). */
43
46
  const PORT_RANGE_START = 37100;
@@ -83,8 +86,6 @@ export interface ProjectSpawnConfig {
83
86
  aiProxyToken?: string;
84
87
  /** Tech-stack id (for runtime to seed correct template if PROJECT_DIR is empty). */
85
88
  techStackId?: string;
86
- /** Template id passed through to the runtime. */
87
- templateId?: string;
88
89
  /** Friendly project name. */
89
90
  name?: string;
90
91
  /** Workspace id for this project. */
@@ -149,8 +150,55 @@ export interface WorkerRuntimeManagerOptions {
149
150
  * Prisma-derived secrets (AI proxy token, security policy, etc.).
150
151
  */
151
152
  enrichSpawnConfig?: (projectId: string, base: ProjectSpawnConfig) => Promise<ProjectSpawnConfig>;
153
+ /**
154
+ * Auto-pull configuration. When enabled, the manager will clone a
155
+ * project's workspace from Shogo Cloud into `<projectsDir>/<projectId>/`
156
+ * on first request, then keep edits in sync via a {@link CloudSyncWatcher}.
157
+ *
158
+ * This is what makes "pin a staging project to a paired VPS, send a
159
+ * webhook" work end-to-end without the user manually running
160
+ * `shogo project pull` first.
161
+ */
162
+ autoPull?: AutoPullOptions;
163
+ }
164
+
165
+ export interface AutoPullOptions {
166
+ /** Master switch. Defaults to false; the `worker start` command flips
167
+ * it on for `cli_worker` instances unless `--no-auto-pull` is passed. */
168
+ enabled: boolean;
169
+ /** Directory under which each project's workspace lives. Required when enabled. */
170
+ projectsDir: string;
171
+ /** Watch the pulled workspace and push edits back to cloud. Default: true. */
172
+ watch?: boolean;
173
+ /**
174
+ * Prefer the git smart-HTTP backend over the file transport for the
175
+ * initial clone and the watcher's flush path. Defaults to `true`:
176
+ * the worker will probe `git --version` and fall back to the file
177
+ * transport if git isn't installed. Set to `false` (via `--no-git` on
178
+ * `worker start`) to force the file-transport path even when git is
179
+ * available — useful for environments where outbound HTTPS to git's
180
+ * pack RPC endpoints is blocked at the firewall.
181
+ */
182
+ useGit?: boolean;
183
+ /** Optional logger. Defaults to the manager's logger. */
184
+ logger?: Pick<Console, 'log' | 'warn' | 'error'>;
185
+ /** Test seam: swap in fakes for the git-cloner ops without resorting
186
+ * to module-level mocking (which leaks across bun:test files). Each
187
+ * field falls back to the real implementation when not provided. */
188
+ gitOps?: Partial<GitOpsAdapter>;
152
189
  }
153
190
 
191
+ /** Subset of git-cloner that the runtime-manager actually calls. */
192
+ export interface GitOpsAdapter {
193
+ cloneProject: typeof cloneProject;
194
+ gitIsAvailable: typeof gitIsAvailable;
195
+ isGitRepo: typeof isGitRepo;
196
+ }
197
+
198
+ /** Per-project sync strategy. Recorded so the watcher (or stopAll) can
199
+ * branch on it without re-probing `git --version`. */
200
+ export type SyncMode = 'git' | 'files';
201
+
154
202
  /** Internal per-project runtime record. */
155
203
  interface InternalRuntime {
156
204
  projectId: string;
@@ -203,6 +251,81 @@ function splitPathAndQuery(pathWithQuery: string): { pathname: string; search: s
203
251
  return { pathname: pathWithQuery.slice(0, q), search: pathWithQuery.slice(q) };
204
252
  }
205
253
 
254
+ /** True if the directory exists and contains no entries (or doesn't exist). */
255
+ function isDirEmpty(dir: string): boolean {
256
+ try {
257
+ const entries = readdirSync(dir);
258
+ return entries.length === 0;
259
+ } catch {
260
+ return true;
261
+ }
262
+ }
263
+
264
+ /**
265
+ * Render the operator-facing multi-line error the worker raises when it
266
+ * cannot determine a real on-disk workspace for a project.
267
+ *
268
+ * Why it's verbose: every branch here represents a real misconfig the
269
+ * operator has to fix before the worker can serve traffic, and the
270
+ * runtime's own `WORKSPACE_DIR fell back to '/app/workspace'` warning
271
+ * is only visible in the spawned child's stderr (often hidden behind
272
+ * the worker's logging seam). Surfacing the full menu of fixes — flag,
273
+ * env var, manual `shogo project pull` — at the throw site means an
274
+ * operator's first sight of the failure is also their fix.
275
+ */
276
+ export function formatWorkspaceMisconfigError(
277
+ projectId: string,
278
+ reason: 'no-auto-pull-config' | 'no-projects-dir' | 'auto-pull-disabled',
279
+ expectedDir: string | null,
280
+ ): string {
281
+ const lines: string[] = [];
282
+ lines.push(`Cannot spawn agent-runtime for project ${projectId}: no workspace directory available.`);
283
+ lines.push('');
284
+ switch (reason) {
285
+ case 'no-auto-pull-config':
286
+ lines.push(
287
+ ' Reason: WorkerRuntimeManager was constructed without an `autoPull` config and ' +
288
+ 'no caller-provided `projectDir` was found on disk.',
289
+ );
290
+ lines.push(
291
+ ' This usually means a programmatic embedder forgot to wire up enrichSpawnConfig ' +
292
+ 'or autoPull. CLI users should not see this — please file a bug.',
293
+ );
294
+ break;
295
+ case 'no-projects-dir':
296
+ lines.push(
297
+ ' Reason: auto-pull is configured but `projectsDir` is empty. The worker needs a ' +
298
+ 'persistent root directory under which it can store cloned project workspaces.',
299
+ );
300
+ break;
301
+ case 'auto-pull-disabled':
302
+ lines.push(
303
+ ' Reason: auto-pull was disabled (--no-auto-pull) and the expected pre-pulled ' +
304
+ `workspace at ${expectedDir} is missing or empty.`,
305
+ );
306
+ break;
307
+ }
308
+ lines.push('');
309
+ lines.push(' How to fix (pick one):');
310
+ lines.push(' 1. Re-enable auto-pull (default). Drop the --no-auto-pull flag and restart');
311
+ lines.push(' the worker. The first inbound request for this project will clone its');
312
+ lines.push(' workspace from Shogo Cloud into <projectsDir>/<projectId>/.');
313
+ lines.push('');
314
+ lines.push(' 2. Pre-pull manually with `shogo project pull <projectId>` before starting');
315
+ lines.push(' the worker. Use this when you want full control over when the clone runs');
316
+ lines.push(' (slow links, scheduled maintenance windows, etc.).');
317
+ lines.push('');
318
+ lines.push(' 3. Point the worker at an existing workspace by setting either:');
319
+ lines.push(' --projects-dir <path> (per-invocation flag)');
320
+ lines.push(' SHOGO_PROJECTS_DIR=<path> (env var, persists across restarts)');
321
+ lines.push(' shogo config set projectsDir <path>');
322
+ lines.push(' Whichever path you pick must contain a subdirectory named after the ');
323
+ lines.push(` project id (e.g. <path>/${projectId}/) populated with the project's source.`);
324
+ lines.push('');
325
+ lines.push(' Docs: https://shogo.ai/docs/self-hosted-worker#workspace-seeding');
326
+ return lines.join('\n');
327
+ }
328
+
206
329
  export class WorkerRuntimeManager implements RuntimeResolver {
207
330
  private readonly opts: WorkerRuntimeManagerOptions;
208
331
  private readonly log: Pick<Console, 'log' | 'warn' | 'error'>;
@@ -212,6 +335,14 @@ export class WorkerRuntimeManager implements RuntimeResolver {
212
335
  private resolved: ResolvedRuntime | null = null;
213
336
  private stopped = false;
214
337
 
338
+ /** Active watchers per projectId, keyed by projectId. Stopped in stopAll. */
339
+ private readonly watchers = new Map<string, CloudSyncWatcher>();
340
+ /** Projects we've already pulled (or attempted to pull) this lifetime. */
341
+ private readonly pulledProjects = new Set<string>();
342
+ /** Which sync strategy each project ended up using. Used by the watcher
343
+ * to pick between git commit-push and file-transport flush modes. */
344
+ private readonly syncModes = new Map<string, SyncMode>();
345
+
215
346
  constructor(opts: WorkerRuntimeManagerOptions = {}) {
216
347
  this.opts = opts;
217
348
  this.log = opts.logger ?? console;
@@ -265,6 +396,42 @@ export class WorkerRuntimeManager implements RuntimeResolver {
265
396
  return deriveRuntimeToken(projectId);
266
397
  }
267
398
 
399
+ /**
400
+ * Tell the {@link WorkerTunnel} why we returned `null` from
401
+ * `resolveLocalUrl`. The tunnel echoes this into the structured 502
402
+ * body so a Studio client reading the response can tell whether the
403
+ * request hit a path the worker has no opinion about
404
+ * (`/api/projects`) versus an /agent path that lacked a project
405
+ * context.
406
+ *
407
+ * Stable codes:
408
+ * CLI_WORKER_HAS_NO_DATA_API — non-/agent path; cli-worker
409
+ * instances are execution targets,
410
+ * not data sources. Studio is
411
+ * expected to gate stateful API
412
+ * routing on `instance.kind` and
413
+ * fall back to cloud for these.
414
+ * CLI_WORKER_NO_PROJECT_FOR_PATH — /agent path arrived without a
415
+ * `projectId` and we don't have a
416
+ * single active project to fall
417
+ * back to.
418
+ */
419
+ describeRejection(pathWithQuery: string, projectId?: string): ResolveRejection {
420
+ const { pathname } = splitPathAndQuery(pathWithQuery);
421
+ if (!(pathname.startsWith('/agent/') || pathname === '/agent')) {
422
+ return {
423
+ code: 'CLI_WORKER_HAS_NO_DATA_API',
424
+ message: `cli-worker only serves /agent/* paths; tried: ${pathname}`,
425
+ };
426
+ }
427
+ return {
428
+ code: 'CLI_WORKER_NO_PROJECT_FOR_PATH',
429
+ message:
430
+ `cli-worker received an /agent path without a single active project; ` +
431
+ `projectId=${projectId ?? 'none'}, path=${pathname}`,
432
+ };
433
+ }
434
+
268
435
  private async spawnConfigFor(projectId: string): Promise<ProjectSpawnConfig | null> {
269
436
  const base = this.opts.defaultSpawnConfig;
270
437
  if (!base) return null;
@@ -283,10 +450,21 @@ export class WorkerRuntimeManager implements RuntimeResolver {
283
450
  /**
284
451
  * Idempotently ensure a runtime exists for this projectId. Concurrent
285
452
  * callers share the in-flight spawn promise.
453
+ *
454
+ * Side effect: if `opts.autoPull.enabled` is true and this is the first
455
+ * time we've seen this projectId, we'll clone the workspace from cloud
456
+ * BEFORE spawning the runtime. Failures are non-fatal — the runtime
457
+ * still spawns and falls back to template-seeded defaults so the worker
458
+ * never bricks because the cloud Files API was momentarily down.
286
459
  */
287
460
  async ensureRunning(projectId: string, config: ProjectSpawnConfig): Promise<RuntimeStatusInfo> {
288
461
  if (this.stopped) throw new Error('WorkerRuntimeManager is stopped');
289
462
 
463
+ // Apply auto-pull before any runtime spawn so the runtime's PROJECT_DIR
464
+ // points at a fully-cloned workspace. Idempotent: subsequent calls hit
465
+ // the `pulledProjects` short-circuit.
466
+ config = await this.maybeAutoPull(projectId, config);
467
+
290
468
  const existing = this.runtimes.get(projectId);
291
469
  if (existing?.status === 'running') {
292
470
  this.touch(projectId);
@@ -309,6 +487,245 @@ export class WorkerRuntimeManager implements RuntimeResolver {
309
487
  }
310
488
  }
311
489
 
490
+ /**
491
+ * Public entry point for tests + the `worker start` command to pre-warm
492
+ * a project's workspace without spawning anything. Internally idempotent.
493
+ */
494
+ async ensurePulled(projectId: string, config: ProjectSpawnConfig): Promise<ProjectSpawnConfig> {
495
+ return this.maybeAutoPull(projectId, config);
496
+ }
497
+
498
+ private async maybeAutoPull(projectId: string, config: ProjectSpawnConfig): Promise<ProjectSpawnConfig> {
499
+ const auto = this.opts.autoPull;
500
+
501
+ // ── Workspace-locatability invariants for the cli-worker ──
502
+ //
503
+ // The agent-runtime hard-falls-back to `/app/workspace` (a Docker
504
+ // convention) when none of WORKSPACE_DIR / AGENT_DIR / PROJECT_DIR
505
+ // are set. On a self-hosted VPS that path doesn't exist, so the
506
+ // runtime boots but every project-aware route silently serves
507
+ // empty state. To keep that bug from ever shipping again, the
508
+ // worker spawn path is required to either:
509
+ //
510
+ // (a) populate `cfg.projectDir` itself before reaching here
511
+ // (desktop AGPL adapter does this through `enrichSpawnConfig`,
512
+ // cloud sets WORKSPACE_DIR via Knative env vars), OR
513
+ // (b) carry an `autoPull` config with a `projectsDir` so this
514
+ // method can synthesise a per-project workspace path on
515
+ // disk and clone the cloud snapshot into it.
516
+ //
517
+ // Anything else is a misconfiguration — the runtime would either
518
+ // not find the workspace or scribble into a co-tenant's tree —
519
+ // so we throw with a multi-line operator-facing message instead
520
+ // of letting the agent-runtime print a deceptively-mild warning.
521
+ //
522
+ // The bypass for `enrichSpawnConfig` callers is deliberate: the
523
+ // desktop AGPL adapter wires up its own per-project Prisma-derived
524
+ // workspace path and does NOT need autoPull. We honour `projectDir`
525
+ // when the directory actually exists.
526
+ if (config.projectDir && existsSync(config.projectDir)) {
527
+ return config;
528
+ }
529
+
530
+ if (!auto) {
531
+ throw new Error(formatWorkspaceMisconfigError(projectId, 'no-auto-pull-config', null));
532
+ }
533
+
534
+ if (!auto.projectsDir) {
535
+ throw new Error(formatWorkspaceMisconfigError(projectId, 'no-projects-dir', null));
536
+ }
537
+
538
+ if (!auto.enabled) {
539
+ // Operator opted out of auto-pull. Honour a pre-pulled workspace
540
+ // (`shogo project pull <id>` lays one down at the canonical path);
541
+ // anything else is a misconfig because the runtime has nothing to
542
+ // operate on.
543
+ const candidate = join(auto.projectsDir, projectId);
544
+ if (existsSync(candidate) && !isDirEmpty(candidate)) {
545
+ return { ...config, projectDir: candidate };
546
+ }
547
+ throw new Error(formatWorkspaceMisconfigError(projectId, 'auto-pull-disabled', candidate));
548
+ }
549
+
550
+ if (this.pulledProjects.has(projectId)) {
551
+ // Already attempted in this process — return the canonical path
552
+ // so a previous failure doesn't strand the runtime on the
553
+ // /app/workspace fallback.
554
+ return { ...config, projectDir: join(auto.projectsDir, projectId) };
555
+ }
556
+
557
+ const projectDir = join(auto.projectsDir, projectId);
558
+ const log = auto.logger ?? this.log;
559
+ const git = {
560
+ cloneProject: auto.gitOps?.cloneProject ?? cloneProject,
561
+ gitIsAvailable: auto.gitOps?.gitIsAvailable ?? gitIsAvailable,
562
+ isGitRepo: auto.gitOps?.isGitRepo ?? isGitRepo,
563
+ };
564
+
565
+ // Mark before attempting so failures don't cause repeated re-pulls
566
+ // on every single request — the runtime will still start with an
567
+ // empty WORKSPACE_DIR and seed templates as a fallback.
568
+ this.pulledProjects.add(projectId);
569
+
570
+ try {
571
+ mkdirSync(projectDir, { recursive: true });
572
+ const isEmpty = isDirEmpty(projectDir);
573
+ const alreadyGitRepo = git.isGitRepo(projectDir);
574
+
575
+ // Strategy:
576
+ // 1. If git is available AND the dir is empty (no .git, no files):
577
+ // clone via smart-HTTP, then top-up `.shogo/` (SQLite, gitignored)
578
+ // via the file transport.
579
+ // 2. If git is available AND the dir already has a .git/: trust
580
+ // the existing clone. The watcher / a later `shogo project
581
+ // checkout` brings refs forward.
582
+ // 3. Otherwise (git unavailable, useGit=false, OR the dir has
583
+ // non-empty content with no .git/): fall back to file transport.
584
+ const wantGit = auto.useGit !== false;
585
+ const gitAvailable = wantGit ? await git.gitIsAvailable() : false;
586
+ const mode: SyncMode = (gitAvailable && (isEmpty || alreadyGitRepo)) ? 'git' : 'files';
587
+ this.syncModes.set(projectId, mode);
588
+
589
+ if (mode === 'git') {
590
+ if (isEmpty) {
591
+ log.log(`[WorkerRuntimeManager] auto-pull: git clone project ${projectId} into ${projectDir}`);
592
+ try {
593
+ const res = await git.cloneProject({
594
+ apiUrl: config.cloudUrl,
595
+ apiKey: config.apiKey,
596
+ projectId,
597
+ localDir: projectDir,
598
+ shallow: true,
599
+ logger: log,
600
+ });
601
+ log.log(`[WorkerRuntimeManager] auto-pull: ${projectId} cloned at ${res.commitSha.slice(0, 8)}`);
602
+ } catch (err: any) {
603
+ // Git clone failed — try the file transport as a fallback.
604
+ // We DON'T retry git on subsequent runs: the mode flip is
605
+ // sticky for this projectId's lifetime to avoid bouncing.
606
+ log.warn(
607
+ `[WorkerRuntimeManager] auto-pull: git clone failed for ${projectId} (${err?.message ?? err}); ` +
608
+ `falling back to CloudFileTransport.downloadAll`,
609
+ );
610
+ this.syncModes.set(projectId, 'files');
611
+ await this.fileTransportClone(projectId, projectDir, config, log);
612
+ }
613
+ } else if (alreadyGitRepo) {
614
+ log.log(`[WorkerRuntimeManager] auto-pull: ${projectId} already has .git/; skipping clone`);
615
+ }
616
+
617
+ // After a git clone, top-up gitignored `.shogo/` SQLite state via
618
+ // the file transport. `.shogo/` is excluded from git but the
619
+ // agent-runtime requires it for state continuity across pins.
620
+ if (this.syncModes.get(projectId) === 'git') {
621
+ await this.topUpShogoState(projectId, projectDir, config, log);
622
+ }
623
+ } else if (isEmpty) {
624
+ // Pure file-transport path (git unavailable or disabled).
625
+ await this.fileTransportClone(projectId, projectDir, config, log);
626
+ } else {
627
+ log.log(`[WorkerRuntimeManager] auto-pull: ${projectId} workspace already populated; skipping clone`);
628
+ }
629
+
630
+ // Spin up a watcher so locally written files sync back to cloud.
631
+ // We only need ONE watcher per project regardless of how many
632
+ // runtimes spawn for it. The watcher's mode mirrors the chosen
633
+ // sync strategy: git → commit+push on flush, files → PUT per file.
634
+ if (auto.watch !== false && !this.watchers.has(projectId)) {
635
+ try {
636
+ const transport = new CloudFileTransport({
637
+ apiUrl: config.cloudUrl,
638
+ apiKey: config.apiKey,
639
+ projectId,
640
+ localDir: projectDir,
641
+ });
642
+ const finalMode = this.syncModes.get(projectId) ?? 'files';
643
+ const watcher = new CloudSyncWatcher({
644
+ rootDir: projectDir,
645
+ transport,
646
+ logger: log,
647
+ mode: finalMode,
648
+ git: finalMode === 'git'
649
+ ? {
650
+ apiUrl: config.cloudUrl,
651
+ apiKey: config.apiKey,
652
+ projectId,
653
+ }
654
+ : undefined,
655
+ });
656
+ watcher.start();
657
+ this.watchers.set(projectId, watcher);
658
+ } catch (err: any) {
659
+ log.warn(`[WorkerRuntimeManager] auto-pull: watcher start failed for ${projectId}: ${err?.message ?? err}`);
660
+ }
661
+ }
662
+ } catch (err: any) {
663
+ log.warn(
664
+ `[WorkerRuntimeManager] auto-pull: failed for ${projectId} — runtime will fall back to template defaults. ` +
665
+ `(${err?.message ?? err})`,
666
+ );
667
+ }
668
+
669
+ // Always set projectDir so the runtime points at the (possibly empty)
670
+ // persistent location instead of a tmpdir. This keeps the runtime
671
+ // crash-resilient — restarts find the same workspace.
672
+ return { ...config, projectDir };
673
+ }
674
+
675
+ /** File-transport clone of an entire project workspace into `projectDir`. */
676
+ private async fileTransportClone(
677
+ projectId: string,
678
+ projectDir: string,
679
+ config: ProjectSpawnConfig,
680
+ log: Pick<Console, 'log' | 'warn' | 'error'>,
681
+ ): Promise<void> {
682
+ log.log(`[WorkerRuntimeManager] auto-pull: file-transport clone of ${projectId} into ${projectDir}`);
683
+ const transport = new CloudFileTransport({
684
+ apiUrl: config.cloudUrl,
685
+ apiKey: config.apiKey,
686
+ projectId,
687
+ localDir: projectDir,
688
+ });
689
+ const stats = await transport.downloadAll();
690
+ log.log(
691
+ `[WorkerRuntimeManager] auto-pull: ${projectId} downloaded ${stats.downloaded} files ` +
692
+ `(${stats.errors.length} errors)`,
693
+ );
694
+ }
695
+
696
+ /**
697
+ * After a git clone, the worker's workspace is missing `.shogo/`
698
+ * (the per-project SQLite state directory) because `.shogo/` is
699
+ * gitignored. Pull just those entries via the file transport so the
700
+ * agent-runtime sees consistent DB state on first spawn.
701
+ */
702
+ private async topUpShogoState(
703
+ projectId: string,
704
+ projectDir: string,
705
+ config: ProjectSpawnConfig,
706
+ log: Pick<Console, 'log' | 'warn' | 'error'>,
707
+ ): Promise<void> {
708
+ try {
709
+ const transport = new CloudFileTransport({
710
+ apiUrl: config.cloudUrl,
711
+ apiKey: config.apiKey,
712
+ projectId,
713
+ localDir: projectDir,
714
+ });
715
+ const manifest = await transport.listManifest();
716
+ const shogoEntries = manifest.filter((e: { path: string }) => e.path === '.shogo' || e.path.startsWith('.shogo/'));
717
+ if (shogoEntries.length === 0) return;
718
+ const stats = await transport.downloadFiles(shogoEntries);
719
+ log.log(
720
+ `[WorkerRuntimeManager] auto-pull: ${projectId} .shogo/ top-up downloaded ${stats.downloaded} ` +
721
+ `files (${stats.errors.length} errors)`,
722
+ );
723
+ } catch (err: any) {
724
+ // Non-fatal: the runtime will create a fresh SQLite db if needed.
725
+ log.warn(`[WorkerRuntimeManager] auto-pull: .shogo top-up failed for ${projectId}: ${err?.message ?? err}`);
726
+ }
727
+ }
728
+
312
729
  status(projectId: string): RuntimeStatusInfo | null {
313
730
  const r = this.runtimes.get(projectId);
314
731
  return r ? this.snapshot(r) : null;
@@ -345,6 +762,21 @@ export class WorkerRuntimeManager implements RuntimeResolver {
345
762
 
346
763
  async stopAll(signal: NodeJS.Signals = 'SIGTERM'): Promise<void> {
347
764
  this.stopped = true;
765
+ // Stop watchers FIRST so their final flush has a chance to PUT before
766
+ // we tear down processes. We don't await individual stops in parallel
767
+ // with runtime stops because watcher.stop() does network IO and we
768
+ // want it to complete before the runtime kill.
769
+ const watcherIds = Array.from(this.watchers.keys());
770
+ await Promise.all(watcherIds.map(async (id) => {
771
+ const w = this.watchers.get(id);
772
+ this.watchers.delete(id);
773
+ if (w) {
774
+ try { await w.stop(); } catch (err: any) {
775
+ this.log.warn(`[WorkerRuntimeManager] watcher stop ${id}: ${err?.message ?? err}`);
776
+ }
777
+ }
778
+ }));
779
+
348
780
  const ids = Array.from(this.runtimes.keys());
349
781
  await Promise.all(ids.map((id) => this.stop(id, signal).catch((err) => {
350
782
  this.log.error(`[WorkerRuntimeManager] Failed to stop ${id}: ${err?.message ?? err}`);
@@ -383,7 +815,7 @@ export class WorkerRuntimeManager implements RuntimeResolver {
383
815
  slot.apiServerPort = slot.agentPort + API_PORT_OFFSET;
384
816
  }
385
817
 
386
- const env = this.buildEnv(slot);
818
+ const env = this.buildEnv(slot, resolved.path);
387
819
  const cwd = this.resolveCwd(slot);
388
820
  const { command, args } = this.spawnCommand(resolved.path);
389
821
 
@@ -441,7 +873,7 @@ export class WorkerRuntimeManager implements RuntimeResolver {
441
873
  }
442
874
  }
443
875
 
444
- private buildEnv(slot: InternalRuntime): NodeJS.ProcessEnv {
876
+ private buildEnv(slot: InternalRuntime, runtimeBinPath: string): NodeJS.ProcessEnv {
445
877
  const cfg = slot.spawnConfig;
446
878
  const env: NodeJS.ProcessEnv = {
447
879
  ...(this.opts.env ?? process.env),
@@ -461,13 +893,42 @@ export class WorkerRuntimeManager implements RuntimeResolver {
461
893
  env.PROJECT_DIR = cfg.projectDir;
462
894
  env.WORKSPACE_DIR = cfg.projectDir;
463
895
  }
896
+ // Tell the agent-runtime to skip its built-in S3Sync — the worker is
897
+ // already running a CloudFileTransport watcher against this WORKSPACE_DIR.
898
+ // Without this both sides upload the same files and the watcher loops on
899
+ // its own writes.
900
+ if (this.opts.autoPull?.enabled) {
901
+ env.SHOGO_CLOUD_SYNC = '1';
902
+ }
464
903
  if (cfg.aiProxyUrl) env.AI_PROXY_URL = cfg.aiProxyUrl;
465
904
  if (cfg.aiProxyToken) env.AI_PROXY_TOKEN = cfg.aiProxyToken;
466
905
  if (cfg.techStackId) env.TECH_STACK_ID = cfg.techStackId;
467
- if (cfg.templateId) env.TEMPLATE_ID = cfg.templateId;
468
906
  if (cfg.name) env.AGENT_NAME = cfg.name;
469
907
  if (cfg.workspaceId) env.WORKSPACE_ID = cfg.workspaceId;
470
908
 
909
+ // Belt-and-suspenders: explicitly point the spawned agent-runtime at
910
+ // the WASM sidecar that ships next to its binary. The runtime's own
911
+ // `code-extractor.ts:getWasmDir()` would derive the same path from
912
+ // `dirname(process.execPath)` as a fallback, but exporting it here:
913
+ //
914
+ // (a) makes the resolved location observable via `env | grep
915
+ // TREE_SITTER` for an operator debugging a self-hosted box,
916
+ // (b) survives a future build-script regression that breaks the
917
+ // sidecar copy on a per-platform basis (the env var still
918
+ // points to the expected directory, so the loud failure in
919
+ // `code-extractor.ts:getLanguage()` reports the right path),
920
+ // (c) keeps explicit operator overrides working — the runtime
921
+ // reads `process.env.TREE_SITTER_WASM_DIR` first, so an
922
+ // operator who sets it externally still wins.
923
+ //
924
+ // We do NOT verify the directory exists here. The runtime's
925
+ // resolver does that check; if it's missing we want the loud
926
+ // runtime error (which lists every override knob), not a silent
927
+ // worker-side `process.env` deletion that hides the bundling bug.
928
+ if (!env.TREE_SITTER_WASM_DIR) {
929
+ env.TREE_SITTER_WASM_DIR = join(dirname(runtimeBinPath), 'tree-sitter-wasm');
930
+ }
931
+
471
932
  if (cfg.extraEnv) Object.assign(env, cfg.extraEnv);
472
933
  return env;
473
934
  }
package/src/lib/tunnel.ts CHANGED
@@ -21,6 +21,20 @@
21
21
  */
22
22
  import { hostname as osHostname, platform, arch as osArch } from 'node:os';
23
23
 
24
+ /**
25
+ * Structured reason returned to the cloud (and ultimately to a Studio
26
+ * client) when {@link RuntimeResolver.resolveLocalUrl} declines to
27
+ * forward a tunneled request. Surfaced verbatim in the 502 body so a
28
+ * future debugger reading the response without log access can tell
29
+ * what happened.
30
+ */
31
+ export interface ResolveRejection {
32
+ /** Stable machine-readable identifier (UPPER_SNAKE_CASE). */
33
+ code: string;
34
+ /** Human-readable explanation. Should reference the actual path. */
35
+ message: string;
36
+ }
37
+
24
38
  /**
25
39
  * Pluggable resolver for the tunnel — provided by whoever owns the local
26
40
  * services that the cloud's tunneled requests should be forwarded to.
@@ -55,6 +69,14 @@ export interface RuntimeResolver {
55
69
 
56
70
  /** Status snapshot for a single project — used in metadata payloads. */
57
71
  status(projectId: string): { status: string; agentPort?: number } | null;
72
+
73
+ /**
74
+ * Describe why a path was rejected. Called by the tunnel after a
75
+ * `resolveLocalUrl` returned null so the structured 502 body can
76
+ * carry an actionable code + message. Optional — when absent the
77
+ * tunnel falls back to a generic `NO_LOCAL_RUNTIME` payload.
78
+ */
79
+ describeRejection?(pathWithQuery: string, projectId?: string): ResolveRejection;
58
80
  }
59
81
 
60
82
  interface TunnelRequest {
@@ -391,11 +413,24 @@ export class WorkerTunnel {
391
413
  try {
392
414
  const url = await this.resolveLocalUrl(msg.path, msg.projectId);
393
415
  if (!url) {
416
+ // Structured 502 body so future debuggers reading the response
417
+ // (without access to worker logs) can tell what happened. The
418
+ // resolver provides the code/message; the tunnel always echoes
419
+ // back the original path so the operator doesn't have to
420
+ // correlate request-ids to figure out which fetch failed.
421
+ const rejection: ResolveRejection = this.opts.resolver.describeRejection
422
+ ? this.opts.resolver.describeRejection(msg.path, msg.projectId)
423
+ : { code: 'NO_LOCAL_RUNTIME', message: `no local runtime available for path: ${msg.path}` };
394
424
  this.sendFrame({
395
425
  type: 'response',
396
426
  requestId: msg.requestId,
397
427
  status: 502,
398
- body: JSON.stringify({ error: 'No local runtime available for path' }),
428
+ headers: { 'content-type': 'application/json' },
429
+ body: JSON.stringify({
430
+ code: rejection.code,
431
+ message: rejection.message,
432
+ path: msg.path,
433
+ }),
399
434
  });
400
435
  return;
401
436
  }
@@ -636,6 +671,10 @@ export class WorkerTunnel {
636
671
  heartbeatLoop: () => self.heartbeatLoop(),
637
672
  connectWs: () => self.connectWs(),
638
673
  cleanupWs: () => self.cleanupWs(),
674
+ handleRequest: (msg: TunnelRequest) => self.handleRequest(msg),
675
+ installFakeWs: (fake: WebSocket) => {
676
+ self.ws = fake;
677
+ },
639
678
  getCloudUrl: () => self.getCloudUrl(),
640
679
  getWsBaseUrl: () => self.getWsBaseUrl(),
641
680
  buildWsUrl: () => self.buildWsUrl(),