solo-cto-agent 1.3.0 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -0
- package/README.md +13 -9
- package/bin/engine/routine.js +187 -69
- package/docs/hero-banner.png +0 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -121,6 +121,14 @@ non-interactive verify in CI, and tear it all down with one command.
|
|
|
121
121
|
|
|
122
122
|
## Unreleased
|
|
123
123
|
|
|
124
|
+
* ci: add VS Code extension auto-publish to release workflow
|
|
125
|
+
|
|
126
|
+
* docs: add hero banner to README, update test badge to 996
|
|
127
|
+
|
|
128
|
+
* fix: rewrite managedAgentReview to match real Managed Agents API
|
|
129
|
+
|
|
130
|
+
* chore: vscode extension packaging verified (icon, license, gitignore)
|
|
131
|
+
|
|
124
132
|
* fix: routine.js readTier import from personalization (not core)
|
|
125
133
|
|
|
126
134
|
* fix: resolve 2 hanging tests + add vitest timeout config
|
package/README.md
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="docs/hero-banner.png" alt="solo-cto-agent — AI code review for solo founders" width="720" />
|
|
3
|
+
</p>
|
|
4
|
+
|
|
1
5
|
# solo-cto-agent
|
|
2
6
|
|
|
3
|
-
**
|
|
7
|
+
**Dual-agent code review, secret detection, and circuit breakers for solo founders.**
|
|
4
8
|
|
|
5
9
|
[](https://www.npmjs.com/package/solo-cto-agent)
|
|
6
|
-
[](https://github.com/seunghunbae-3svs/solo-cto-agent/actions/workflows/test.yml)
|
|
7
11
|
[](LICENSE)
|
|
8
12
|
[](CONTRIBUTING.md)
|
|
9
13
|
|
|
@@ -90,9 +94,9 @@ The point is simple:
|
|
|
90
94
|
|
|
91
95
|
* less repetitive setup work
|
|
92
96
|
* less context loss between sessions
|
|
93
|
-
*
|
|
94
|
-
*
|
|
95
|
-
*
|
|
97
|
+
* two models cross-checking each other's review (not one model's opinion)
|
|
98
|
+
* actual criticism before you commit to bad ideas
|
|
99
|
+
* secrets caught before they leave your machine
|
|
96
100
|
|
|
97
101
|
## What changes in practice
|
|
98
102
|
|
|
@@ -116,12 +120,12 @@ This is running on three private repos (Next.js + Supabase, Vite + React, Next.j
|
|
|
116
120
|
| PRs opened | 53 |
|
|
117
121
|
| PRs merged | 48 |
|
|
118
122
|
| Mean time to merge | 0.64 hours |
|
|
119
|
-
| Test suite |
|
|
123
|
+
| Test suite | 996 tests, 57 files, all passing |
|
|
120
124
|
| CLI commands | 25 subcommands |
|
|
121
125
|
| Skills | 8 (44 reference docs) |
|
|
122
|
-
| npm version | 1.2
|
|
126
|
+
| npm version | 1.3.2 |
|
|
123
127
|
|
|
124
|
-
|
|
128
|
+
Dual-agent cross-review and Managed Agents deep review are live and tested against real diffs. Decision tracking is wired but the decision queue has not produced enough data for meaningful stats yet.
|
|
125
129
|
|
|
126
130
|
## Who this is for
|
|
127
131
|
|
|
@@ -246,7 +250,7 @@ solo-cto-agent/
|
|
|
246
250
|
product-repo/ # product repo scaffold (workflows, STATE.md, .env.example)
|
|
247
251
|
builder-defaults/ # routing-policy.json, agent-scores.json
|
|
248
252
|
workflows/ # solo-cto-review.yml (3-pass auto-review)
|
|
249
|
-
tests/ #
|
|
253
|
+
tests/ # 996 tests across 57 files
|
|
250
254
|
benchmarks/ # effectiveness reports, metrics
|
|
251
255
|
docs/ # claude.md, tier-matrix, configuration, policies
|
|
252
256
|
examples/ # real-world flows: build, ship, review, founder-workflow
|
package/bin/engine/routine.js
CHANGED
|
@@ -123,8 +123,76 @@ function buildRoutineSchedules() {
|
|
|
123
123
|
}
|
|
124
124
|
|
|
125
125
|
// ============================================================================
|
|
126
|
-
// CLAUDE MANAGED AGENTS
|
|
126
|
+
// CLAUDE MANAGED AGENTS (v2 — real API, April 2026)
|
|
127
127
|
// ============================================================================
|
|
128
|
+
//
|
|
129
|
+
// Flow: create agent → create environment → create session → send event → poll
|
|
130
|
+
// Docs: https://platform.claude.com/docs/en/managed-agents/overview
|
|
131
|
+
// Beta header: managed-agents-2026-04-01
|
|
132
|
+
// Endpoints: /v1/agents, /v1/environments, /v1/sessions, /v1/sessions/{id}/events
|
|
133
|
+
// ============================================================================
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Helper: make an HTTPS JSON request to the Anthropic API.
|
|
137
|
+
* Returns { statusCode, body } where body is parsed JSON.
|
|
138
|
+
*/
|
|
139
|
+
function _apiRequest(method, urlPath, apiKey, payload) {
|
|
140
|
+
return new Promise((resolve) => {
|
|
141
|
+
const body = payload ? JSON.stringify(payload) : undefined;
|
|
142
|
+
const req = https.request({
|
|
143
|
+
hostname: C.API_HOSTS.anthropic,
|
|
144
|
+
path: urlPath,
|
|
145
|
+
method,
|
|
146
|
+
headers: {
|
|
147
|
+
"Content-Type": "application/json",
|
|
148
|
+
"x-api-key": apiKey,
|
|
149
|
+
"anthropic-version": C.ANTHROPIC_API_VERSION,
|
|
150
|
+
"anthropic-beta": C.BETA_HEADERS.managedAgents,
|
|
151
|
+
},
|
|
152
|
+
}, (res) => {
|
|
153
|
+
let data = "";
|
|
154
|
+
res.on("data", (chunk) => (data += chunk));
|
|
155
|
+
res.on("end", () => {
|
|
156
|
+
try {
|
|
157
|
+
resolve({ statusCode: res.statusCode, body: JSON.parse(data) });
|
|
158
|
+
} catch {
|
|
159
|
+
resolve({ statusCode: res.statusCode, body: { raw: data } });
|
|
160
|
+
}
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
req.on("error", (e) => resolve({ statusCode: 0, body: { error: e.message } }));
|
|
164
|
+
req.setTimeout(C.TIMEOUTS.managedAgent, () => {
|
|
165
|
+
req.destroy(new Error("request timeout"));
|
|
166
|
+
});
|
|
167
|
+
if (body) req.write(body);
|
|
168
|
+
req.end();
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Poll session until status is "idle" (agent finished) or timeout.
|
|
174
|
+
* Returns the full session object on success, null on timeout/error.
|
|
175
|
+
*/
|
|
176
|
+
async function _pollSession(sessionId, apiKey, timeoutMs) {
|
|
177
|
+
const deadline = Date.now() + timeoutMs;
|
|
178
|
+
const pollInterval = 3000; // 3s
|
|
179
|
+
|
|
180
|
+
while (Date.now() < deadline) {
|
|
181
|
+
const { statusCode, body } = await _apiRequest("GET", `/v1/sessions/${sessionId}`, apiKey);
|
|
182
|
+
if (statusCode !== 200) {
|
|
183
|
+
logWarn(`Poll failed (${statusCode}): ${JSON.stringify(body).slice(0, 200)}`);
|
|
184
|
+
return null;
|
|
185
|
+
}
|
|
186
|
+
if (body.status === "idle") return body;
|
|
187
|
+
if (body.status === "error" || body.status === "failed") {
|
|
188
|
+
logError(`Session entered error state: ${body.status}`);
|
|
189
|
+
return null;
|
|
190
|
+
}
|
|
191
|
+
await new Promise((r) => setTimeout(r, pollInterval));
|
|
192
|
+
}
|
|
193
|
+
logError(`Session poll timed out after ${timeoutMs / 1000}s`);
|
|
194
|
+
return null;
|
|
195
|
+
}
|
|
128
196
|
|
|
129
197
|
async function managedAgentReview(options = {}) {
|
|
130
198
|
const tier = readTier();
|
|
@@ -193,92 +261,142 @@ ${errorPatterns}
|
|
|
193
261
|
[SUMMARY] ...
|
|
194
262
|
[NEXT ACTION] ...`;
|
|
195
263
|
|
|
264
|
+
const timeoutMs = CONFIG.managedAgents.sessionTimeoutMs || C.TIMEOUTS.managedAgent;
|
|
265
|
+
|
|
196
266
|
if (options.dryRun) {
|
|
197
267
|
logSection("Managed Agent Review — DRY RUN");
|
|
198
268
|
logInfo(`Model: ${model}`);
|
|
199
269
|
logInfo(`Diff size: ${(Buffer.byteLength(diff, "utf8") / 1024).toFixed(0)}KB`);
|
|
200
|
-
logInfo(`Timeout: ${
|
|
201
|
-
logInfo(`Beta header: ${
|
|
270
|
+
logInfo(`Timeout: ${timeoutMs / 1000}s`);
|
|
271
|
+
logInfo(`Beta header: ${C.BETA_HEADERS.managedAgents}`);
|
|
202
272
|
logInfo(`Cost: standard token rates + $0.08/session-hour`);
|
|
273
|
+
logInfo("API flow: create agent → create env → create session → send event → poll");
|
|
203
274
|
return null;
|
|
204
275
|
}
|
|
205
276
|
|
|
206
277
|
logSection("Managed Agent Deep Review");
|
|
207
|
-
logInfo(`Model: ${model} | Timeout: ${
|
|
278
|
+
logInfo(`Model: ${model} | Timeout: ${timeoutMs / 1000}s`);
|
|
208
279
|
logInfo("Cost: standard token rates + $0.08/session-hour active runtime");
|
|
209
280
|
|
|
210
281
|
const startTime = Date.now();
|
|
211
282
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
283
|
+
// ── Step 1: Create or reuse agent ──
|
|
284
|
+
let agentId = options.agentId || CONFIG.managedAgents.agentId;
|
|
285
|
+
if (!agentId) {
|
|
286
|
+
logInfo("Creating agent...");
|
|
287
|
+
const agentRes = await _apiRequest("POST", "/v1/agents", apiKey, {
|
|
288
|
+
name: "solo-cto-deep-reviewer",
|
|
289
|
+
description: "CTO-level deep code reviewer for solo-cto-agent CLI.",
|
|
290
|
+
model: { id: model },
|
|
215
291
|
system: systemPrompt,
|
|
216
|
-
|
|
217
|
-
max_tokens: C.LIMITS.maxTokensDeep,
|
|
218
|
-
tools: [{ type: "computer_20250124", name: "computer" }],
|
|
292
|
+
tools: [{ type: "agent_toolset_20260401" }],
|
|
219
293
|
});
|
|
294
|
+
if (agentRes.statusCode >= 400 || !agentRes.body.id) {
|
|
295
|
+
logError(`Failed to create agent (${agentRes.statusCode}): ${JSON.stringify(agentRes.body).slice(0, 300)}`);
|
|
296
|
+
return null;
|
|
297
|
+
}
|
|
298
|
+
agentId = agentRes.body.id;
|
|
299
|
+
logInfo(`Agent created: ${agentId}`);
|
|
300
|
+
}
|
|
220
301
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
"anthropic-beta": CONFIG.managedAgents.betaHeader,
|
|
229
|
-
"anthropic-version": C.ANTHROPIC_API_VERSION,
|
|
230
|
-
},
|
|
231
|
-
}, (res) => {
|
|
232
|
-
let data = "";
|
|
233
|
-
res.on("data", (chunk) => (data += chunk));
|
|
234
|
-
res.on("end", () => {
|
|
235
|
-
const elapsed = (Date.now() - startTime) / 1000;
|
|
236
|
-
const sessionHours = elapsed / 3600;
|
|
237
|
-
const runtimeCost = (sessionHours * 0.08).toFixed(4);
|
|
238
|
-
|
|
239
|
-
if (res.statusCode >= 400) {
|
|
240
|
-
logError(`Managed Agent failed (${res.statusCode}): ${data.slice(0, 300)}`);
|
|
241
|
-
return resolve(null);
|
|
242
|
-
}
|
|
243
|
-
try {
|
|
244
|
-
const reviewParser = require("../review-parser");
|
|
245
|
-
const parseReviewResponse = reviewParser.parseReviewResponse;
|
|
246
|
-
|
|
247
|
-
const parsed = JSON.parse(data);
|
|
248
|
-
const text = parsed.content?.map(b => b.text).filter(Boolean).join("\n") || data;
|
|
249
|
-
const review = parseReviewResponse(text);
|
|
250
|
-
|
|
251
|
-
const inputTokens = parsed.usage?.input_tokens || Math.ceil(body.length / 4);
|
|
252
|
-
const outputTokens = parsed.usage?.output_tokens || Math.ceil(text.length / 4);
|
|
253
|
-
const tokenCost = estimateCost(inputTokens, outputTokens, model);
|
|
254
|
-
const totalCost = (parseFloat(tokenCost) + parseFloat(runtimeCost)).toFixed(4);
|
|
255
|
-
|
|
256
|
-
logSuccess(`Deep review complete (${elapsed.toFixed(1)}s)`);
|
|
257
|
-
logInfo(`Runtime cost: $${runtimeCost} | Token cost: $${tokenCost} | Total: $${totalCost}`);
|
|
258
|
-
|
|
259
|
-
resolve({
|
|
260
|
-
...review,
|
|
261
|
-
raw: text,
|
|
262
|
-
sessionHours,
|
|
263
|
-
tokens: { input: inputTokens, output: outputTokens },
|
|
264
|
-
cost: { token: tokenCost, runtime: runtimeCost, total: totalCost },
|
|
265
|
-
});
|
|
266
|
-
} catch (e) {
|
|
267
|
-
logWarn(`Managed Agent response unparseable: ${e.message}`);
|
|
268
|
-
resolve(null);
|
|
269
|
-
}
|
|
270
|
-
});
|
|
271
|
-
});
|
|
272
|
-
req.on("error", (e) => {
|
|
273
|
-
logError(`Managed Agent network error: ${e.message}`);
|
|
274
|
-
resolve(null);
|
|
275
|
-
});
|
|
276
|
-
req.setTimeout(CONFIG.managedAgents.sessionTimeoutMs, () => {
|
|
277
|
-
req.destroy(new Error(`Managed Agent timeout after ${CONFIG.managedAgents.sessionTimeoutMs / 1000}s`));
|
|
302
|
+
// ── Step 2: Create or reuse environment ──
|
|
303
|
+
let envId = options.environmentId || CONFIG.managedAgents.environmentId;
|
|
304
|
+
if (!envId) {
|
|
305
|
+
logInfo("Creating environment...");
|
|
306
|
+
const envRes = await _apiRequest("POST", "/v1/environments", apiKey, {
|
|
307
|
+
name: "solo-cto-review-env",
|
|
308
|
+
config: { type: "cloud", networking: { type: "unrestricted" } },
|
|
278
309
|
});
|
|
279
|
-
|
|
280
|
-
|
|
310
|
+
if (envRes.statusCode >= 400 || !envRes.body.id) {
|
|
311
|
+
logError(`Failed to create environment (${envRes.statusCode}): ${JSON.stringify(envRes.body).slice(0, 300)}`);
|
|
312
|
+
return null;
|
|
313
|
+
}
|
|
314
|
+
envId = envRes.body.id;
|
|
315
|
+
logInfo(`Environment created: ${envId}`);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
// ── Step 3: Create session ──
|
|
319
|
+
logInfo("Creating session...");
|
|
320
|
+
const sessionRes = await _apiRequest("POST", "/v1/sessions", apiKey, {
|
|
321
|
+
agent: agentId,
|
|
322
|
+
environment_id: envId,
|
|
323
|
+
title: `deep-review-${new Date().toISOString().slice(0, 19)}`,
|
|
281
324
|
});
|
|
325
|
+
if (sessionRes.statusCode >= 400 || !sessionRes.body.id) {
|
|
326
|
+
logError(`Failed to create session (${sessionRes.statusCode}): ${JSON.stringify(sessionRes.body).slice(0, 300)}`);
|
|
327
|
+
return null;
|
|
328
|
+
}
|
|
329
|
+
const sessionId = sessionRes.body.id;
|
|
330
|
+
logInfo(`Session created: ${sessionId}`);
|
|
331
|
+
|
|
332
|
+
// ── Step 4: Send user message event ──
|
|
333
|
+
logInfo("Sending diff for review...");
|
|
334
|
+
const eventRes = await _apiRequest("POST", `/v1/sessions/${sessionId}/events`, apiKey, {
|
|
335
|
+
events: [{
|
|
336
|
+
type: "user.message",
|
|
337
|
+
content: [{
|
|
338
|
+
type: "text",
|
|
339
|
+
text: `Review this diff:\n\`\`\`diff\n${diff}\n\`\`\`\n\nOutput your review in the standard format:\n[VERDICT] APPROVE | REQUEST_CHANGES | COMMENT\n[ISSUES] list each issue\n[SUMMARY] one-line summary\n[NEXT ACTION] suggested next steps`,
|
|
340
|
+
}],
|
|
341
|
+
}],
|
|
342
|
+
});
|
|
343
|
+
if (eventRes.statusCode >= 400) {
|
|
344
|
+
logError(`Failed to send event (${eventRes.statusCode}): ${JSON.stringify(eventRes.body).slice(0, 300)}`);
|
|
345
|
+
return null;
|
|
346
|
+
}
|
|
347
|
+
logInfo("Event sent — waiting for agent to complete...");
|
|
348
|
+
|
|
349
|
+
// ── Step 5: Poll until idle ──
|
|
350
|
+
const finalSession = await _pollSession(sessionId, apiKey, timeoutMs);
|
|
351
|
+
if (!finalSession) return null;
|
|
352
|
+
|
|
353
|
+
const elapsed = (Date.now() - startTime) / 1000;
|
|
354
|
+
const activeSeconds = finalSession.stats?.active_seconds || 0;
|
|
355
|
+
const sessionHours = activeSeconds / 3600;
|
|
356
|
+
const runtimeCost = (sessionHours * (C.PRICING.managedAgentRuntime || 0.08)).toFixed(4);
|
|
357
|
+
|
|
358
|
+
// ── Step 6: Fetch events to extract agent response ──
|
|
359
|
+
const eventsRes = await _apiRequest("GET", `/v1/sessions/${sessionId}/events`, apiKey);
|
|
360
|
+
if (eventsRes.statusCode >= 400) {
|
|
361
|
+
logError(`Failed to fetch events: ${eventsRes.statusCode}`);
|
|
362
|
+
return null;
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
const events = eventsRes.body.data || [];
|
|
366
|
+
const agentMessages = events.filter((e) => e.type === "agent.message");
|
|
367
|
+
const text = agentMessages
|
|
368
|
+
.flatMap((e) => (e.content || []).filter((b) => b.type === "text").map((b) => b.text))
|
|
369
|
+
.join("\n");
|
|
370
|
+
|
|
371
|
+
if (!text) {
|
|
372
|
+
logWarn("Agent session completed but no text response found.");
|
|
373
|
+
return null;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
const reviewParser = require("../review-parser");
|
|
377
|
+
const review = reviewParser.parseReviewResponse(text);
|
|
378
|
+
|
|
379
|
+
const inputTokens = finalSession.usage?.input_tokens || 0;
|
|
380
|
+
const outputTokens = finalSession.usage?.output_tokens || 0;
|
|
381
|
+
const cacheTokens = finalSession.usage?.cache_creation_input_tokens || 0;
|
|
382
|
+
const tokenCost = estimateCost(inputTokens + cacheTokens, outputTokens, model);
|
|
383
|
+
const totalCost = (parseFloat(tokenCost) + parseFloat(runtimeCost)).toFixed(4);
|
|
384
|
+
|
|
385
|
+
logSuccess(`Deep review complete (${elapsed.toFixed(1)}s wall, ${activeSeconds.toFixed(1)}s active)`);
|
|
386
|
+
logInfo(`Runtime cost: $${runtimeCost} | Token cost: $${tokenCost} | Total: $${totalCost}`);
|
|
387
|
+
logInfo(`Session: ${sessionId} | Agent: ${agentId} | Env: ${envId}`);
|
|
388
|
+
|
|
389
|
+
return {
|
|
390
|
+
...review,
|
|
391
|
+
raw: text,
|
|
392
|
+
sessionId,
|
|
393
|
+
agentId,
|
|
394
|
+
environmentId: envId,
|
|
395
|
+
activeSeconds,
|
|
396
|
+
sessionHours,
|
|
397
|
+
tokens: { input: inputTokens, output: outputTokens, cache: cacheTokens },
|
|
398
|
+
cost: { token: tokenCost, runtime: runtimeCost, total: totalCost },
|
|
399
|
+
};
|
|
282
400
|
}
|
|
283
401
|
|
|
284
402
|
module.exports = {
|
|
Binary file
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "solo-cto-agent",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.2",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "CTO-level AI agent toolkit for solo founders. Dual-agent review, circuit breakers, design quality gates, and session memory for Claude Cowork + OpenAI Codex.",
|
|
6
6
|
"author": "seunghunbae-3svs",
|