npm - solo-cto-agent - Versions diffs - 1.3.0 → 1.3.2 - Mend

solo-cto-agent 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -121,6 +121,14 @@ non-interactive verify in CI, and tear it all down with one command.
 ## Unreleased
+* ci: add VS Code extension auto-publish to release workflow
+* docs: add hero banner to README, update test badge to 996
+* fix: rewrite managedAgentReview to match real Managed Agents API
+* chore: vscode extension packaging verified (icon, license, gitignore)
 * fix: routine.js readTier import from personalization (not core)
 * fix: resolve 2 hanging tests + add vitest timeout config

package/README.md CHANGED Viewed

@@ -1,9 +1,13 @@
+<p align="center">
+  <img src="docs/hero-banner.png" alt="solo-cto-agent — AI code review for solo founders" width="720" />
+</p>
 # solo-cto-agent
-**Your AI coding agent already writes code. This makes it think like a CTO.**
+**Dual-agent code review, secret detection, and circuit breakers for solo founders.**
 [![npm](https://img.shields.io/npm/v/solo-cto-agent)](https://www.npmjs.com/package/solo-cto-agent)
-[![Test](https://img.shields.io/badge/tests-894%20passing-brightgreen)](https://github.com/seunghunbae-3svs/solo-cto-agent/actions/workflows/test.yml)
+[![Test](https://img.shields.io/badge/tests-996%20passing-brightgreen)](https://github.com/seunghunbae-3svs/solo-cto-agent/actions/workflows/test.yml)
 [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
 [![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](CONTRIBUTING.md)
@@ -90,9 +94,9 @@ The point is simple:
 * less repetitive setup work
 * less context loss between sessions
-* less AI slop in code and design
-* more useful criticism before you commit to bad ideas
-* more initiative from the agent on low-risk work
+* two models cross-checking each other's review (not one model's opinion)
+* actual criticism before you commit to bad ideas
+* secrets caught before they leave your machine
 ## What changes in practice
@@ -116,12 +120,12 @@ This is running on three private repos (Next.js + Supabase, Vite + React, Next.j
 | PRs opened | 53 |
 | PRs merged | 48 |
 | Mean time to merge | 0.64 hours |
-| Test suite | 894 tests, 48 files, all passing |
+| Test suite | 996 tests, 57 files, all passing |
 | CLI commands | 25 subcommands |
 | Skills | 8 (44 reference docs) |
-| npm version | 1.2.0 |
+| npm version | 1.3.2 |
-What is not there yet: dual-agent cross-review metrics are still accumulating (rate shows 0 because the structured scoring pipeline was deployed recently). Decision tracking is wired but the decision queue has not produced enough data for meaningful stats. Those will fill in over the next few weeks of normal use.
+Dual-agent cross-review and Managed Agents deep review are live and tested against real diffs. Decision tracking is wired but the decision queue has not produced enough data for meaningful stats yet.
 ## Who this is for
@@ -246,7 +250,7 @@ solo-cto-agent/
     product-repo/             # product repo scaffold (workflows, STATE.md, .env.example)
     builder-defaults/         # routing-policy.json, agent-scores.json
     workflows/                # solo-cto-review.yml (3-pass auto-review)
-  tests/                      # 894 tests across 48 files
+  tests/                      # 996 tests across 57 files
   benchmarks/                 # effectiveness reports, metrics
   docs/                       # claude.md, tier-matrix, configuration, policies
   examples/                   # real-world flows: build, ship, review, founder-workflow

package/bin/engine/routine.js CHANGED Viewed

@@ -123,8 +123,76 @@ function buildRoutineSchedules() {
 }
 // ============================================================================
-// CLAUDE MANAGED AGENTS
+// CLAUDE MANAGED AGENTS (v2 — real API, April 2026)
 // ============================================================================
+//
+// Flow: create agent → create environment → create session → send event → poll
+// Docs: https://platform.claude.com/docs/en/managed-agents/overview
+// Beta header: managed-agents-2026-04-01
+// Endpoints: /v1/agents, /v1/environments, /v1/sessions, /v1/sessions/{id}/events
+// ============================================================================
+/**
+ * Helper: make an HTTPS JSON request to the Anthropic API.
+ * Returns { statusCode, body } where body is parsed JSON.
+ */
+function _apiRequest(method, urlPath, apiKey, payload) {
+  return new Promise((resolve) => {
+    const body = payload ? JSON.stringify(payload) : undefined;
+    const req = https.request({
+      hostname: C.API_HOSTS.anthropic,
+      path: urlPath,
+      method,
+      headers: {
+        "Content-Type": "application/json",
+        "x-api-key": apiKey,
+        "anthropic-version": C.ANTHROPIC_API_VERSION,
+        "anthropic-beta": C.BETA_HEADERS.managedAgents,
+      },
+    }, (res) => {
+      let data = "";
+      res.on("data", (chunk) => (data += chunk));
+      res.on("end", () => {
+        try {
+          resolve({ statusCode: res.statusCode, body: JSON.parse(data) });
+        } catch {
+          resolve({ statusCode: res.statusCode, body: { raw: data } });
+        }
+      });
+    });
+    req.on("error", (e) => resolve({ statusCode: 0, body: { error: e.message } }));
+    req.setTimeout(C.TIMEOUTS.managedAgent, () => {
+      req.destroy(new Error("request timeout"));
+    });
+    if (body) req.write(body);
+    req.end();
+  });
+}
+/**
+ * Poll session until status is "idle" (agent finished) or timeout.
+ * Returns the full session object on success, null on timeout/error.
+ */
+async function _pollSession(sessionId, apiKey, timeoutMs) {
+  const deadline = Date.now() + timeoutMs;
+  const pollInterval = 3000; // 3s
+  while (Date.now() < deadline) {
+    const { statusCode, body } = await _apiRequest("GET", `/v1/sessions/${sessionId}`, apiKey);
+    if (statusCode !== 200) {
+      logWarn(`Poll failed (${statusCode}): ${JSON.stringify(body).slice(0, 200)}`);
+      return null;
+    }
+    if (body.status === "idle") return body;
+    if (body.status === "error" || body.status === "failed") {
+      logError(`Session entered error state: ${body.status}`);
+      return null;
+    }
+    await new Promise((r) => setTimeout(r, pollInterval));
+  }
+  logError(`Session poll timed out after ${timeoutMs / 1000}s`);
+  return null;
+}
 async function managedAgentReview(options = {}) {
   const tier = readTier();
@@ -193,92 +261,142 @@ ${errorPatterns}
    [SUMMARY] ...
    [NEXT ACTION] ...`;
+  const timeoutMs = CONFIG.managedAgents.sessionTimeoutMs || C.TIMEOUTS.managedAgent;
   if (options.dryRun) {
     logSection("Managed Agent Review — DRY RUN");
     logInfo(`Model: ${model}`);
     logInfo(`Diff size: ${(Buffer.byteLength(diff, "utf8") / 1024).toFixed(0)}KB`);
-    logInfo(`Timeout: ${CONFIG.managedAgents.sessionTimeoutMs / 1000}s`);
-    logInfo(`Beta header: ${CONFIG.managedAgents.betaHeader}`);
+    logInfo(`Timeout: ${timeoutMs / 1000}s`);
+    logInfo(`Beta header: ${C.BETA_HEADERS.managedAgents}`);
     logInfo(`Cost: standard token rates + $0.08/session-hour`);
+    logInfo("API flow: create agent → create env → create session → send event → poll");
     return null;
   }
   logSection("Managed Agent Deep Review");
-  logInfo(`Model: ${model} | Timeout: ${CONFIG.managedAgents.sessionTimeoutMs / 1000}s`);
+  logInfo(`Model: ${model} | Timeout: ${timeoutMs / 1000}s`);
   logInfo("Cost: standard token rates + $0.08/session-hour active runtime");
   const startTime = Date.now();
-  return new Promise((resolve, reject) => {
-    const body = JSON.stringify({
-      model,
+  // ── Step 1: Create or reuse agent ──
+  let agentId = options.agentId || CONFIG.managedAgents.agentId;
+  if (!agentId) {
+    logInfo("Creating agent...");
+    const agentRes = await _apiRequest("POST", "/v1/agents", apiKey, {
+      name: "solo-cto-deep-reviewer",
+      description: "CTO-level deep code reviewer for solo-cto-agent CLI.",
+      model: { id: model },
       system: systemPrompt,
-      messages: [{ role: "user", content: `Review this diff:\n\`\`\`diff\n${diff}\n\`\`\`` }],
-      max_tokens: C.LIMITS.maxTokensDeep,
-      tools: [{ type: "computer_20250124", name: "computer" }],
+      tools: [{ type: "agent_toolset_20260401" }],
     });
+    if (agentRes.statusCode >= 400 || !agentRes.body.id) {
+      logError(`Failed to create agent (${agentRes.statusCode}): ${JSON.stringify(agentRes.body).slice(0, 300)}`);
+      return null;
+    }
+    agentId = agentRes.body.id;
+    logInfo(`Agent created: ${agentId}`);
+  }
-    const req = https.request({
-      hostname: C.API_HOSTS.anthropic,
-      path: "/v1/managed_agents/sessions",
-      method: "POST",
-      headers: {
-        "Content-Type": "application/json",
-        "x-api-key": apiKey,
-        "anthropic-beta": CONFIG.managedAgents.betaHeader,
-        "anthropic-version": C.ANTHROPIC_API_VERSION,
-      },
-    }, (res) => {
-      let data = "";
-      res.on("data", (chunk) => (data += chunk));
-      res.on("end", () => {
-        const elapsed = (Date.now() - startTime) / 1000;
-        const sessionHours = elapsed / 3600;
-        const runtimeCost = (sessionHours * 0.08).toFixed(4);
-        if (res.statusCode >= 400) {
-          logError(`Managed Agent failed (${res.statusCode}): ${data.slice(0, 300)}`);
-          return resolve(null);
-        }
-        try {
-          const reviewParser = require("../review-parser");
-          const parseReviewResponse = reviewParser.parseReviewResponse;
-          const parsed = JSON.parse(data);
-          const text = parsed.content?.map(b => b.text).filter(Boolean).join("\n") || data;
-          const review = parseReviewResponse(text);
-          const inputTokens = parsed.usage?.input_tokens || Math.ceil(body.length / 4);
-          const outputTokens = parsed.usage?.output_tokens || Math.ceil(text.length / 4);
-          const tokenCost = estimateCost(inputTokens, outputTokens, model);
-          const totalCost = (parseFloat(tokenCost) + parseFloat(runtimeCost)).toFixed(4);
-          logSuccess(`Deep review complete (${elapsed.toFixed(1)}s)`);
-          logInfo(`Runtime cost: $${runtimeCost} | Token cost: $${tokenCost} | Total: $${totalCost}`);
-          resolve({
-            ...review,
-            raw: text,
-            sessionHours,
-            tokens: { input: inputTokens, output: outputTokens },
-            cost: { token: tokenCost, runtime: runtimeCost, total: totalCost },
-          });
-        } catch (e) {
-          logWarn(`Managed Agent response unparseable: ${e.message}`);
-          resolve(null);
-        }
-      });
-    });
-    req.on("error", (e) => {
-      logError(`Managed Agent network error: ${e.message}`);
-      resolve(null);
-    });
-    req.setTimeout(CONFIG.managedAgents.sessionTimeoutMs, () => {
-      req.destroy(new Error(`Managed Agent timeout after ${CONFIG.managedAgents.sessionTimeoutMs / 1000}s`));
+  // ── Step 2: Create or reuse environment ──
+  let envId = options.environmentId || CONFIG.managedAgents.environmentId;
+  if (!envId) {
+    logInfo("Creating environment...");
+    const envRes = await _apiRequest("POST", "/v1/environments", apiKey, {
+      name: "solo-cto-review-env",
+      config: { type: "cloud", networking: { type: "unrestricted" } },
     });
-    req.write(body);
-    req.end();
+    if (envRes.statusCode >= 400 || !envRes.body.id) {
+      logError(`Failed to create environment (${envRes.statusCode}): ${JSON.stringify(envRes.body).slice(0, 300)}`);
+      return null;
+    }
+    envId = envRes.body.id;
+    logInfo(`Environment created: ${envId}`);
+  }
+  // ── Step 3: Create session ──
+  logInfo("Creating session...");
+  const sessionRes = await _apiRequest("POST", "/v1/sessions", apiKey, {
+    agent: agentId,
+    environment_id: envId,
+    title: `deep-review-${new Date().toISOString().slice(0, 19)}`,
   });
+  if (sessionRes.statusCode >= 400 || !sessionRes.body.id) {
+    logError(`Failed to create session (${sessionRes.statusCode}): ${JSON.stringify(sessionRes.body).slice(0, 300)}`);
+    return null;
+  }
+  const sessionId = sessionRes.body.id;
+  logInfo(`Session created: ${sessionId}`);
+  // ── Step 4: Send user message event ──
+  logInfo("Sending diff for review...");
+  const eventRes = await _apiRequest("POST", `/v1/sessions/${sessionId}/events`, apiKey, {
+    events: [{
+      type: "user.message",
+      content: [{
+        type: "text",
+        text: `Review this diff:\n\`\`\`diff\n${diff}\n\`\`\`\n\nOutput your review in the standard format:\n[VERDICT] APPROVE | REQUEST_CHANGES | COMMENT\n[ISSUES] list each issue\n[SUMMARY] one-line summary\n[NEXT ACTION] suggested next steps`,
+      }],
+    }],
+  });
+  if (eventRes.statusCode >= 400) {
+    logError(`Failed to send event (${eventRes.statusCode}): ${JSON.stringify(eventRes.body).slice(0, 300)}`);
+    return null;
+  }
+  logInfo("Event sent — waiting for agent to complete...");
+  // ── Step 5: Poll until idle ──
+  const finalSession = await _pollSession(sessionId, apiKey, timeoutMs);
+  if (!finalSession) return null;
+  const elapsed = (Date.now() - startTime) / 1000;
+  const activeSeconds = finalSession.stats?.active_seconds || 0;
+  const sessionHours = activeSeconds / 3600;
+  const runtimeCost = (sessionHours * (C.PRICING.managedAgentRuntime || 0.08)).toFixed(4);
+  // ── Step 6: Fetch events to extract agent response ──
+  const eventsRes = await _apiRequest("GET", `/v1/sessions/${sessionId}/events`, apiKey);
+  if (eventsRes.statusCode >= 400) {
+    logError(`Failed to fetch events: ${eventsRes.statusCode}`);
+    return null;
+  }
+  const events = eventsRes.body.data || [];
+  const agentMessages = events.filter((e) => e.type === "agent.message");
+  const text = agentMessages
+    .flatMap((e) => (e.content || []).filter((b) => b.type === "text").map((b) => b.text))
+    .join("\n");
+  if (!text) {
+    logWarn("Agent session completed but no text response found.");
+    return null;
+  }
+  const reviewParser = require("../review-parser");
+  const review = reviewParser.parseReviewResponse(text);
+  const inputTokens = finalSession.usage?.input_tokens || 0;
+  const outputTokens = finalSession.usage?.output_tokens || 0;
+  const cacheTokens = finalSession.usage?.cache_creation_input_tokens || 0;
+  const tokenCost = estimateCost(inputTokens + cacheTokens, outputTokens, model);
+  const totalCost = (parseFloat(tokenCost) + parseFloat(runtimeCost)).toFixed(4);
+  logSuccess(`Deep review complete (${elapsed.toFixed(1)}s wall, ${activeSeconds.toFixed(1)}s active)`);
+  logInfo(`Runtime cost: $${runtimeCost} | Token cost: $${tokenCost} | Total: $${totalCost}`);
+  logInfo(`Session: ${sessionId} | Agent: ${agentId} | Env: ${envId}`);
+  return {
+    ...review,
+    raw: text,
+    sessionId,
+    agentId,
+    environmentId: envId,
+    activeSeconds,
+    sessionHours,
+    tokens: { input: inputTokens, output: outputTokens, cache: cacheTokens },
+    cost: { token: tokenCost, runtime: runtimeCost, total: totalCost },
+  };
 }
 module.exports = {

package/docs/hero-banner.png ADDED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "solo-cto-agent",
-  "version": "1.3.0",
+  "version": "1.3.2",
   "private": false,
   "description": "CTO-level AI agent toolkit for solo founders. Dual-agent review, circuit breakers, design quality gates, and session memory for Claude Cowork + OpenAI Codex.",
   "author": "seunghunbae-3svs",