npm - @valescoagency/runway - Versions diffs - 0.3.0 → 0.5.0 - Mend

@valescoagency/runway 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +107 -10
package/dist/commands/doctor.js +203 -2
package/dist/commands/run.js +70 -15
package/dist/config.js +53 -61
package/dist/git.js +43 -29
package/dist/github.js +136 -21
package/dist/linear.js +295 -63
package/dist/orchestrator.js +407 -115
package/dist/policy.js +76 -0
package/dist/prompts.js +44 -1
package/dist/subprocess.js +40 -0
package/dist/telemetry.js +31 -0
package/package.json +10 -1
package/prompts/implement.md +46 -2
package/templates/Dockerfile.claude-code.base +24 -0

package/dist/policy.js ADDED Viewed

@@ -0,0 +1,76 @@
+import { existsSync, readFileSync } from "node:fs";
+import { join } from "node:path";
+import { parse as parseYaml } from "yaml";
+import { z } from "zod";
+/**
+ * VA-352: per-repo + per-run write-path policy for the impl agent.
+ *
+ * Defaults are conservative — secrets and sandbox-internals are always
+ * denied. Repos that need agents to touch CI workflows (the common
+ * case) opt in by creating `.runway/policy.yml` with `allowedPaths`,
+ * or by passing `--allow-paths=` for a single invocation.
+ *
+ * The policy is reflected back to the agent in the rendered prompt
+ * (`prompts/implement.md`'s "Working style" denylist sentence) so the
+ * sentence the agent sees matches what runway will enforce at review
+ * time. Enforcement itself (refusing to push a PR that touches a
+ * denied path) lives in the reviewer pass — out of scope for this
+ * change; the goal here is that the agent gets a correct denylist
+ * and surfaces `IMPL: BLOCKED` when an AC requires a denied path.
+ */
+export const DEFAULT_FORBIDDEN_PATHS = [
+    ".github/workflows/**",
+    ".env*",
+    "*.pem",
+    "*.key",
+    "pnpm-lock.yaml",
+    ".sandcastle/**",
+];
+const PolicyFileSchema = z.object({
+    allowedPaths: z.array(z.string()).optional(),
+    forbiddenPaths: z.array(z.string()).optional(),
+});
+const POLICY_RELATIVE_PATH = join(".runway", "policy.yml");
+/**
+ * Resolve the effective policy for `cwd`. Reads `.runway/policy.yml`
+ * when present, layers it on top of the conservative defaults, then
+ * applies any `--allow-paths` CLI override.
+ */
+export function loadPolicy(cwd, opts = {}) {
+    const sources = [];
+    let forbidden = new Set(DEFAULT_FORBIDDEN_PATHS);
+    const policyPath = join(cwd, POLICY_RELATIVE_PATH);
+    if (existsSync(policyPath)) {
+        sources.push(POLICY_RELATIVE_PATH);
+        const raw = readFileSync(policyPath, "utf8");
+        const parsed = PolicyFileSchema.parse(parseYaml(raw) ?? {});
+        if (parsed.forbiddenPaths) {
+            forbidden = new Set(parsed.forbiddenPaths);
+        }
+        for (const allow of parsed.allowedPaths ?? [])
+            forbidden.delete(allow);
+    }
+    else {
+        sources.push("defaults");
+    }
+    if (opts.allowPathsOverride?.length) {
+        for (const allow of opts.allowPathsOverride)
+            forbidden.delete(allow);
+        sources.push("--allow-paths");
+    }
+    return {
+        forbiddenPaths: [...forbidden],
+        source: sources.join(" + "),
+    };
+}
+/**
+ * Render the bullet sentence the impl prompt shows the agent. Stable
+ * formatting so a missing path is visible in a diff.
+ */
+export function renderForbiddenPathsBullet(policy) {
+    if (policy.forbiddenPaths.length === 0) {
+        return "- (No write-path restrictions for this repo. Use judgment.)";
+    }
+    const quoted = policy.forbiddenPaths.map((p) => `\`${p}\``).join(", ");
+    return `- Never modify ${quoted}. If the issue's acceptance criteria require modifying one of these paths, **stop and emit \`IMPL: BLOCKED — issue requires modifying <path>, which working-style policy forbids\`** — do not silently skip the work.`;
+}

package/dist/prompts.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import { readFile } from "node:fs/promises";
 import { fileURLToPath } from "node:url";
 import { dirname, join } from "node:path";
+import { renderForbiddenPathsBullet } from "./policy.js";
 const __dirname = dirname(fileURLToPath(import.meta.url));
 // Prompts ship with the runway package, NOT in the target repo's
 // .sandcastle/. Runway substitutes {{KEY}} placeholders before passing
@@ -22,13 +23,55 @@ export async function loadReviewPrompt() {
 export function renderPrompt(template, vars) {
     return template.replace(/\{\{(\w+)\}\}/g, (_, k) => vars[k] ?? `{{${k}}}`);
 }
-export function implementVars(issue) {
+export function implementVars(issue, opts = {}) {
     return {
         ISSUE_IDENTIFIER: issue.identifier,
         ISSUE_TITLE: issue.title,
         ISSUE_DESCRIPTION: issue.description || "(no description)",
+        // VA-349: empty for iteration 1, a structured summary for 2+.
+        PREVIOUS_ITERATIONS: opts.previousIterations ?? "",
+        // VA-352: render the working-style denylist from the active policy
+        // so the agent never sees a hardcoded list that diverges from what
+        // runway actually enforces.
+        POLICY_FORBIDDEN_BULLET: opts.policy
+            ? renderForbiddenPathsBullet(opts.policy)
+            : "",
     };
 }
+/**
+ * VA-349: build the "## Previous iterations" block that gets prepended
+ * to iteration N+1's prompt. Carries the agent's commit log and the
+ * tail of its final message so the next iteration doesn't re-explore
+ * the repo from scratch.
+ */
+export function buildIterationSummary(args) {
+    const { iterationsRun, commits, finalMessageTail } = args;
+    return [
+        "## Previous iterations",
+        "",
+        `You have already completed ${iterationsRun} iteration(s) on this issue.`,
+        "Do **not** re-explore the repository — pick up where the last iteration left off.",
+        "",
+        "### Commits so far on this branch",
+        "",
+        "```",
+        commits.trim() || "(no commits yet)",
+        "```",
+        "",
+        "### Tail of the last iteration's final message",
+        "",
+        "```",
+        finalMessageTail.trim() || "(no output captured)",
+        "```",
+        "",
+    ].join("\n");
+}
+/** Keep the tail of an iteration's stdout small enough to fit alongside the prompt. */
+export function tailOfMessage(stdout, maxChars = 2000) {
+    if (stdout.length <= maxChars)
+        return stdout;
+    return `…(earlier output truncated)\n${stdout.slice(-maxChars)}`;
+}
 export function reviewVars(args) {
     return {
         ISSUE_IDENTIFIER: args.issue.identifier,

package/dist/subprocess.js ADDED Viewed

@@ -0,0 +1,40 @@
+import { Effect } from "effect";
+import { execa } from "execa";
+/**
+ * VA-358: scoped subprocess runner. Spawns a child via `execa`, awaits
+ * its result, and — critically — sends SIGKILL on Effect interruption
+ * (Ctrl-C, parent fiber failure, timeout, etc.). Without this, a hung
+ * `git push`, stalled `gh pr create`, or in-flight sandcastle agent
+ * would survive process exit as an orphan.
+ *
+ * `classifyError` translates the raw thrown value (an `ExecaError`,
+ * usually) into the caller's typed error ADT — this is the same hook
+ * we used in VA-356's `Effect.tryPromise({try, catch})`, kept here so
+ * the gateway methods can preserve their existing `GhCliMissing` /
+ * `PushFailed` / `PrCreateFailed` discrimination.
+ */
+export const runExecaScoped = (bin, args, opts, classifyError) => Effect.acquireUseRelease(Effect.sync(() => execa(bin, args, opts)), (proc) => Effect.tryPromise({
+    try: () => proc,
+    catch: classifyError,
+}), (proc) => Effect.sync(() => {
+    // SIGKILL the child if it's still running when the Effect's
+    // fiber is interrupted, errors, or times out. `exitCode` is
+    // null until the child has settled.
+    //
+    // Defensive: execa's `ResultPromise` carries `.kill / .exitCode /
+    // .killed`, but vitest mocks frequently return a bare `Promise`
+    // (no kill handle). Skip the kill in that case — there's no
+    // child to clean up.
+    const killable = proc;
+    if (typeof killable.kill === "function" &&
+        killable.exitCode === null &&
+        !killable.killed) {
+        try {
+            killable.kill("SIGKILL");
+        }
+        catch {
+            // Race: child may have exited between the check and the
+            // kill. Best-effort cleanup; nothing more to do.
+        }
+    }
+}));

package/dist/telemetry.js ADDED Viewed

@@ -0,0 +1,31 @@
+import { NodeSdk } from "@effect/opentelemetry";
+import { OTLPTraceExporter } from "@opentelemetry/exporter-trace-otlp-http";
+import { BatchSpanProcessor } from "@opentelemetry/sdk-trace-base";
+/**
+ * VA-358: OpenTelemetry tracer for the orchestrator.
+ *
+ * Env-conditional: only wires the OTLP HTTP exporter when
+ * `OTEL_EXPORTER_OTLP_ENDPOINT` (or `OTEL_EXPORTER_OTLP_TRACES_ENDPOINT`)
+ * is set. Otherwise we provide `NodeSdk.layerEmpty` — `Effect.withSpan`
+ * still works in the program but the resulting spans get dropped on the
+ * floor rather than spamming the network. Dev-mode default: no traces,
+ * no warnings.
+ *
+ * Standard OpenTelemetry env vars apply (the exporter reads them
+ * itself): `OTEL_EXPORTER_OTLP_ENDPOINT`, `OTEL_EXPORTER_OTLP_HEADERS`,
+ * `OTEL_SERVICE_NAME`, etc.
+ */
+const isTracingEnabled = () => {
+    return Boolean(process.env.OTEL_EXPORTER_OTLP_ENDPOINT ||
+        process.env.OTEL_EXPORTER_OTLP_TRACES_ENDPOINT);
+};
+const liveLayer = NodeSdk.layer(() => ({
+    resource: {
+        serviceName: process.env.OTEL_SERVICE_NAME ?? "runway",
+        serviceVersion: process.env.RUNWAY_VERSION,
+    },
+    spanProcessor: new BatchSpanProcessor(new OTLPTraceExporter()),
+}));
+export const TelemetryLive = isTracingEnabled()
+    ? liveLayer
+    : NodeSdk.layerEmpty;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@valescoagency/runway",
-  "version": "0.3.0",
+  "version": "0.5.0",
   "description": "Linear-driven orchestrator + scaffolder for coding agents on Sandcastle. `runway init` scaffolds a target repo (sandcastle + varlock + 1Password); `runway run` drains a Linear queue against it; `runway doctor`, `runway upgrade`, `runway upgrade-repo` round out the lifecycle.",
   "license": "MIT",
   "author": {
@@ -40,8 +40,17 @@
   ],
   "dependencies": {
     "@ai-hero/sandcastle": "^0.5.10",
+    "@effect/opentelemetry": "^0.63.0",
     "@linear/sdk": "^41.0.0",
+    "@opentelemetry/api": "^1.9.1",
+    "@opentelemetry/exporter-trace-otlp-http": "^0.217.0",
+    "@opentelemetry/resources": "^2.7.1",
+    "@opentelemetry/sdk-trace-base": "^2.7.1",
+    "@opentelemetry/sdk-trace-node": "^2.7.1",
+    "@opentelemetry/semantic-conventions": "^1.40.0",
+    "effect": "^3.21.2",
     "execa": "^9.5.2",
+    "yaml": "^2.9.0",
     "zod": "^3.23.8"
   },
   "devDependencies": {

package/prompts/implement.md CHANGED Viewed

@@ -6,6 +6,8 @@ You are an autonomous coding agent working on a single Linear issue.
 {{ISSUE_DESCRIPTION}}
+{{PREVIOUS_ITERATIONS}}
 # Repository context
 You are operating inside a clean checkout of the target repository on a
@@ -29,9 +31,51 @@ fresh branch named `agent/{{ISSUE_IDENTIFIER}}`. Branch off `main`.
 - If the issue is ambiguous and you can't make a reasonable judgment
   call, stop and explain what's missing in your final message — runway
   will route to a human.
-- Never modify `.github/workflows/**`, `.env*`, `*.pem`, `*.key`,
-  `pnpm-lock.yaml` (unless the task is a dep bump), or `.sandcastle/**`.
+{{POLICY_FORBIDDEN_BULLET}}
 # Stop conditions
 When all five "done" criteria pass, stop. Don't keep polishing.
+# Termination contract — REQUIRED
+End **every** response with exactly one of these markers, on its own
+line, as the **last non-empty line** of your message. Nothing after it.
+- `IMPL: DONE` — all five "done" criteria are met. The reviewer pass
+  will run next; no further iterations are needed.
+- `IMPL: BLOCKED — <one-line reason>` — you cannot proceed without
+  human input (issue is ambiguous, requires a decision outside the
+  agent's purview, conflicts with a working-style constraint, hits a
+  permission wall, etc.). Runway will route the issue to a human with
+  your reason attached and will not run the reviewer pass.
+- `IMPL: CONTINUE` — you made progress but the work isn't done yet.
+  Runway will run another iteration so you can pick up where you left
+  off.
+Examples:
+```
+…all tests pass, typecheck clean, lint clean. Commit pushed.
+IMPL: DONE
+```
+```
+…the issue's acceptance criteria require modifying
+`.github/workflows/release.yml`, which the working-style policy
+forbids. Cannot proceed.
+IMPL: BLOCKED — issue requires CI workflow changes that working-style policy forbids
+```
+```
+…added the migration and the RLS policy. Tests for the policy
+helper still need to be written next iteration.
+IMPL: CONTINUE
+```
+The marker is parsed mechanically by runway. A missing or malformed
+marker is treated as `CONTINUE` for back-compat, but **always** emit
+one explicitly — silent completions waste budget on re-exploration.

package/templates/Dockerfile.claude-code.base CHANGED Viewed

@@ -39,6 +39,30 @@ RUN if ! getent group $AGENT_GID >/dev/null; then \
       groupmod -g $AGENT_GID node; \
     fi \
  && usermod -u $AGENT_UID -g $AGENT_GID -d /home/agent -m -l agent node
+# VA-351: bake the container env up front so agents don't manually
+# work around host-path leaks, missing pnpm, or unset HOME on every
+# iteration. Without these, every agent run repeats the same
+# corepack/TURBO_CACHE_DIR/HOME setup commands — see VA-312's run log
+# for the receipts.
+ENV HOME=/home/agent
+ENV XDG_CACHE_HOME=/home/agent/.cache
+ENV TURBO_CACHE_DIR=/tmp/turbo-cache
+ENV npm_config_cache=/home/agent/.cache/npm
+# Pre-create cache dirs with agent ownership so the first pnpm/turbo
+# run doesn't have to chown them. Both are inside paths the agent owns
+# anyway; this just makes them exist.
+RUN mkdir -p /home/agent/.cache /home/agent/.cache/npm /tmp/turbo-cache \
+ && chown -R $AGENT_UID:$AGENT_GID /home/agent/.cache /tmp/turbo-cache
+# Bake pnpm via corepack at build time so `pnpm` is on PATH inside the
+# container before any agent command runs. Pin a default; target repos
+# can override at runtime via `packageManager` in package.json +
+# `corepack use`.
+RUN corepack enable \
+ && corepack prepare pnpm@10.0.0 --activate
 USER ${AGENT_UID}:${AGENT_GID}
 # Install Claude Code CLI