@gaberrb/polypus 0.4.16 → 0.4.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -196,6 +196,17 @@ agent is also instructed to talk back to you in the configured language.
196
196
  All file access is restricted to the workspace and the configured **allow-list**
197
197
  globs; the **deny-list** (e.g. `.git/**`, `**/.env`) always wins.
198
198
 
199
+ ### Timeout Configuration
200
+
201
+ You can control the maximum duration of a swarm session using the environment variable `POLYPUS_SWARM_OVERALL_TIMEOUT_MS` (default: 1 hour). This prevents the session from hanging indefinitely if an agent stalls:
202
+
203
+ ```bash
204
+ export POLYPUS_SWARM_OVERALL_TIMEOUT_MS=1800000 # 30 minutes
205
+ polypus swarm "your task"
206
+ ```
207
+
208
+ Similarly, `POLYPUS_SWARM_IDLE_TIMEOUT_MS` controls the idle timeout for individual workers (default: 5 minutes).
209
+
199
210
  ## Swarm (parallel agents)
200
211
 
201
212
  ```bash
@@ -207,6 +218,48 @@ worktree (in `bypass` mode, since the worktree is throwaway), and the branches
207
218
  are merged back sequentially. Conflicting branches are kept for manual
208
219
  inspection rather than force-merged.
209
220
 
221
+ ## Autonomous agent — the tool self-improving 🤖
222
+
223
+ Polypus can run **itself** in CI to implement its own issues. Label an issue
224
+ `polypus-go` and the `agent.yml` workflow implements it headlessly, gates on the
225
+ local CI, patch-bumps the version + CHANGELOG, and opens a release-ready PR; when
226
+ you merge it, `auto-release.yml` cuts the GitHub Release and `release.yml`
227
+ publishes the new version to npm. The only human step is the **merge**.
228
+
229
+ ```
230
+ issue + label `polypus-go`
231
+ → agent.yml: implement (cheap model) → secret scan → CI gate → bump + CHANGELOG → open PR
232
+ → you merge the PR
233
+ → auto-release.yml → release.yml → npm publish
234
+ ```
235
+
236
+ **Setup (one-time):**
237
+
238
+ ```bash
239
+ # 1) the trigger label
240
+ gh label create polypus-go --color 5be4b1
241
+
242
+ # 2) the model key
243
+ gh secret set OPENROUTER_API_KEY --body "sk-or-v1-..."
244
+
245
+ # 3) a PAT (repo scope) so the agent can open the PR AND the release can publish
246
+ # (a Release made with the default GITHUB_TOKEN does NOT trigger release.yml)
247
+ gh secret set POLYPUS_PR_TOKEN --body "github_pat_..."
248
+
249
+ # 4) optional: cheap model + per-run budget
250
+ gh variable set POLYPUS_AGENT_MODEL --body "deepseek/deepseek-chat-v3-0324"
251
+ gh variable set POLYPUS_BUDGET_USD --body "0.50"
252
+ ```
253
+
254
+ Guard-rails: own-repo only, secret scan on the diff, mandatory CI gate, a spend
255
+ budget, and your merge as the final gate. Without `POLYPUS_PR_TOKEN` nothing
256
+ breaks — the agent still implements and pushes a branch, and comments the branch
257
+ link on the issue instead of failing.
258
+
259
+ 📖 **Full guide with a diagram, examples and the `POLYPUS_PR_TOKEN` walkthrough:**
260
+ [the autonomous-agent page](https://gaberrb.github.io/polypus/agent.html)
261
+ (`docs/agent.html`).
262
+
210
263
  ## Configuration
211
264
 
212
265
  Stored at `~/.polypus/config.json` (override the directory with `POLYPUS_HOME`).
package/dist/index.js CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  // src/cli/index.ts
4
4
  import { Command } from "commander";
5
- import pc15 from "picocolors";
5
+ import pc16 from "picocolors";
6
6
 
7
7
  // src/cli/commands/add-agent.ts
8
8
  import pc from "picocolors";
@@ -113,6 +113,8 @@ var en = {
113
113
  "cli.opt.verify": "after the agent finishes, run project checks (typecheck/build/test) and iterate until they pass",
114
114
  "cli.opt.budget": "stop the run when the estimated session cost reaches this USD amount (OpenRouter pricing)",
115
115
  "cli.cmd.usage": "Show token/cost analytics aggregated per day",
116
+ "cli.cmd.estimate": "Estimate the effort/cost to implement a task (no changes made)",
117
+ "cli.arg.estimateTask": "task to estimate",
116
118
  "cli.cmd.sessions": "List saved sessions that can be resumed",
117
119
  "cli.opt.continue": "resume the most recent saved session",
118
120
  "cli.opt.resume": "resume a specific saved session by id",
@@ -161,6 +163,12 @@ var en = {
161
163
  "usage.empty": "No usage recorded yet. Run a task to start tracking.",
162
164
  "usage.total": "total",
163
165
  "usage.runs": "runs",
166
+ // estimate
167
+ "estimate.header": "Effort estimate:",
168
+ "estimate.complexity": "complexity",
169
+ "estimate.steps": "steps",
170
+ "estimate.tokens": "tokens",
171
+ "estimate.cost": "estimated cost",
164
172
  // sessions
165
173
  "sessions.header": "Saved sessions (most recent first):",
166
174
  "sessions.empty": "No saved sessions yet.",
@@ -229,6 +237,7 @@ var en = {
229
237
  "swarm.conflictsHeader": "\u26A0 {n} branch(es) had merge conflicts (kept for inspection):",
230
238
  "swarm.statusDone": "done",
231
239
  "swarm.statusIncomplete": "incomplete",
240
+ "swarm.timeout": "\u26A0 Session timeout reached. Operation aborted.",
232
241
  // init
233
242
  "init.created": "\u2713 .poly scaffolded:",
234
243
  "init.skipped": "Kept (already existed):",
@@ -377,6 +386,8 @@ var ptBR = {
377
386
  "cli.opt.verify": "ap\xF3s o agente terminar, roda as checagens do projeto (typecheck/build/test) e itera at\xE9 passar",
378
387
  "cli.opt.budget": "interrompe a execu\xE7\xE3o quando o custo estimado da sess\xE3o atingir este valor em USD (pre\xE7os do OpenRouter)",
379
388
  "cli.cmd.usage": "Mostra analytics de tokens/custo agregados por dia",
389
+ "cli.cmd.estimate": "Estima o esfor\xE7o/custo para implementar uma tarefa (sem alterar nada)",
390
+ "cli.arg.estimateTask": "tarefa a estimar",
380
391
  "cli.cmd.sessions": "Lista as sess\xF5es salvas que podem ser retomadas",
381
392
  "cli.opt.continue": "retoma a sess\xE3o salva mais recente",
382
393
  "cli.opt.resume": "retoma uma sess\xE3o salva espec\xEDfica pelo id",
@@ -423,6 +434,12 @@ var ptBR = {
423
434
  "usage.empty": "Nenhum uso registrado ainda. Rode uma tarefa para come\xE7ar a medir.",
424
435
  "usage.total": "total",
425
436
  "usage.runs": "execu\xE7\xF5es",
437
+ // estimate
438
+ "estimate.header": "Estimativa de esfor\xE7o:",
439
+ "estimate.complexity": "complexidade",
440
+ "estimate.steps": "passos",
441
+ "estimate.tokens": "tokens",
442
+ "estimate.cost": "custo estimado",
426
443
  // sessions
427
444
  "sessions.header": "Sess\xF5es salvas (mais recentes primeiro):",
428
445
  "sessions.empty": "Nenhuma sess\xE3o salva ainda.",
@@ -489,6 +506,7 @@ var ptBR = {
489
506
  "swarm.conflictsHeader": "\u26A0 {n} branch(es) tiveram conflitos de merge (mantidos para inspe\xE7\xE3o):",
490
507
  "swarm.statusDone": "ok",
491
508
  "swarm.statusIncomplete": "incompleta",
509
+ "swarm.timeout": "\u26A0 O tempo m\xE1ximo da sess\xE3o foi atingido. A opera\xE7\xE3o foi interrompida.",
492
510
  // init
493
511
  "init.created": "\u2713 .poly criado:",
494
512
  "init.skipped": "Mantidos (j\xE1 existiam):",
@@ -3868,6 +3886,10 @@ function idleTimeoutMs() {
3868
3886
  const raw = Number(process.env.POLYPUS_SWARM_IDLE_TIMEOUT_MS);
3869
3887
  return Number.isFinite(raw) && raw > 0 ? raw : DEFAULT_IDLE_TIMEOUT_MS;
3870
3888
  }
3889
+ function overallTimeoutMs() {
3890
+ const raw = Number(process.env.POLYPUS_SWARM_OVERALL_TIMEOUT_MS);
3891
+ return Number.isFinite(raw) && raw > 0 ? raw : 36e5;
3892
+ }
3871
3893
 
3872
3894
  // src/ui/swarm-view.ts
3873
3895
  var RESET2 = "\x1B[0m";
@@ -4092,6 +4114,10 @@ async function runSwarmSession(task, config, opts = {}) {
4092
4114
  const view = new SwarmView(resolved[0].config.name);
4093
4115
  view.start();
4094
4116
  let result;
4117
+ const sessionTimeout = setTimeout(() => {
4118
+ controller.abort();
4119
+ console.log(pc7.red(t("swarm.timeout")));
4120
+ }, overallTimeoutMs());
4095
4121
  try {
4096
4122
  result = await runSwarm({
4097
4123
  task,
@@ -4115,6 +4141,7 @@ async function runSwarmSession(task, config, opts = {}) {
4115
4141
  }
4116
4142
  });
4117
4143
  } finally {
4144
+ clearTimeout(sessionTimeout);
4118
4145
  view.stop();
4119
4146
  cancel2.dispose();
4120
4147
  }
@@ -4895,14 +4922,98 @@ async function sessions() {
4895
4922
  console.log(pc12.dim("\n" + t("sessions.hint")));
4896
4923
  }
4897
4924
 
4925
+ // src/cli/commands/estimate.ts
4926
+ import pc13 from "picocolors";
4927
+
4928
+ // src/core/agent/estimate.ts
4929
+ var SYSTEM = [
4930
+ "You estimate the effort for an autonomous coding agent (a ReAct loop that reads/edits files",
4931
+ "and runs commands over several steps) to implement a software task in an existing repo.",
4932
+ "Account for the loop re-sending growing context each step. Be realistic, not optimistic.",
4933
+ "Return ONLY a JSON object, no prose, with exactly these keys:",
4934
+ '{"complexity":"low|medium|high","estimatedSteps":<int>,"estimatedTokens":<int total across all steps>,',
4935
+ '"rationale":"<one sentence>","risks":"<one sentence>"}'
4936
+ ].join(" ");
4937
+ function extractJsonObject(text2) {
4938
+ const start = text2.indexOf("{");
4939
+ if (start === -1) return void 0;
4940
+ let depth = 0;
4941
+ for (let i = start; i < text2.length; i++) {
4942
+ if (text2[i] === "{") depth++;
4943
+ else if (text2[i] === "}" && --depth === 0) {
4944
+ try {
4945
+ return JSON.parse(text2.slice(start, i + 1));
4946
+ } catch {
4947
+ return void 0;
4948
+ }
4949
+ }
4950
+ }
4951
+ return void 0;
4952
+ }
4953
+ function clampInt(value, min, max, fallback) {
4954
+ const n = Math.round(Number(value));
4955
+ if (!Number.isFinite(n)) return fallback;
4956
+ return Math.min(max, Math.max(min, n));
4957
+ }
4958
+ async function estimateTask(task, agent, pricing) {
4959
+ const res = await agent.provider.chat({
4960
+ messages: [
4961
+ { role: "system", content: SYSTEM },
4962
+ { role: "user", content: `Task:
4963
+ ${task}` }
4964
+ ],
4965
+ params: { temperature: 0 }
4966
+ });
4967
+ const parsed = extractJsonObject(res.content) ?? {};
4968
+ const complexity = ["low", "medium", "high"].includes(parsed.complexity) ? parsed.complexity : "medium";
4969
+ const estimatedSteps = clampInt(parsed.estimatedSteps, 1, 300, 30);
4970
+ const estimatedTokens = clampInt(parsed.estimatedTokens, 1e3, 2e7, 8e4);
4971
+ const rationale = typeof parsed.rationale === "string" ? parsed.rationale : "";
4972
+ const risks = typeof parsed.risks === "string" ? parsed.risks : "";
4973
+ let costUsd;
4974
+ let costLabel = "unknown (no pricing for this model)";
4975
+ if (pricing) {
4976
+ costUsd = estimateCost(
4977
+ { promptTokens: Math.round(estimatedTokens * 0.8), completionTokens: Math.round(estimatedTokens * 0.2) },
4978
+ pricing
4979
+ );
4980
+ costLabel = fmtUsd(costUsd);
4981
+ }
4982
+ return { complexity, estimatedSteps, estimatedTokens, rationale, risks, costUsd, costLabel };
4983
+ }
4984
+
4985
+ // src/cli/commands/estimate.ts
4986
+ async function estimate(task, opts) {
4987
+ const config = await loadConfig();
4988
+ const agentConfig = resolveAgent(config, opts.agent);
4989
+ const resolved = createProvider(agentConfig);
4990
+ const pricing = await resolveModelPricing(resolved.config);
4991
+ const est = await estimateTask(task, resolved, pricing);
4992
+ if (opts.json) {
4993
+ process.stdout.write(JSON.stringify({ estimate: est }) + "\n");
4994
+ return;
4995
+ }
4996
+ console.log(pc13.bold(t("estimate.header")));
4997
+ console.log(` ${t("estimate.complexity")}: ${pc13.cyan(est.complexity)}`);
4998
+ console.log(` ${t("estimate.steps")}: ~${est.estimatedSteps}`);
4999
+ console.log(` ${t("estimate.tokens")}: ~${fmtTokens3(est.estimatedTokens)}`);
5000
+ console.log(` ${t("estimate.cost")}: ${pc13.green(est.costLabel)}`);
5001
+ if (est.rationale) console.log(pc13.dim(` ${est.rationale}`));
5002
+ if (est.risks) console.log(pc13.dim(` \u26A0 ${est.risks}`));
5003
+ }
5004
+ function fmtTokens3(n) {
5005
+ if (n >= 1e6) return `${(n / 1e6).toFixed(1)}M`;
5006
+ return n >= 1e3 ? `${(n / 1e3).toFixed(1)}k` : String(n);
5007
+ }
5008
+
4898
5009
  // src/cli/commands/prd.ts
4899
5010
  import { writeFile as writeFile6, readFile as readFile14 } from "fs/promises";
4900
5011
  import { execFile } from "child_process";
4901
5012
  import { promisify as promisify5 } from "util";
4902
- import pc13 from "picocolors";
5013
+ import pc14 from "picocolors";
4903
5014
 
4904
5015
  // src/core/agent/prd.ts
4905
- var SYSTEM = [
5016
+ var SYSTEM2 = [
4906
5017
  "You are a product analyst. You turn a GitHub issue into a concise, structured PRD",
4907
5018
  "(Product Requirements Document) in Markdown.",
4908
5019
  "Rules:",
@@ -4932,7 +5043,7 @@ ${comments}` : "",
4932
5043
  ].join("\n");
4933
5044
  }
4934
5045
  async function generatePrd(issue, provider, projectContext) {
4935
- const messages = [{ role: "system", content: SYSTEM }];
5046
+ const messages = [{ role: "system", content: SYSTEM2 }];
4936
5047
  if (projectContext) {
4937
5048
  messages.push({
4938
5049
  role: "system",
@@ -5028,7 +5139,7 @@ async function prd(issueRef, opts) {
5028
5139
  const markdown = await withRetry(() => generatePrd(issue, provider, guide));
5029
5140
  if (opts.out) {
5030
5141
  await writeFile6(opts.out, markdown + "\n", "utf8");
5031
- console.error(pc13.green(t("prd.wrote", { path: opts.out })));
5142
+ console.error(pc14.green(t("prd.wrote", { path: opts.out })));
5032
5143
  } else {
5033
5144
  process.stdout.write(markdown + "\n");
5034
5145
  }
@@ -5057,11 +5168,11 @@ function normalize2(raw) {
5057
5168
  import { writeFile as writeFile7, readFile as readFile15 } from "fs/promises";
5058
5169
  import { execFile as execFile2 } from "child_process";
5059
5170
  import { promisify as promisify6 } from "util";
5060
- import pc14 from "picocolors";
5171
+ import pc15 from "picocolors";
5061
5172
 
5062
5173
  // src/core/agent/review.ts
5063
5174
  var MAX_DIFF_CHARS = Number(process.env.POLYPUS_MAX_DIFF_CHARS) || 6e4;
5064
- var SYSTEM2 = [
5175
+ var SYSTEM3 = [
5065
5176
  "You are a senior code reviewer. Review the pull request diff below and report",
5066
5177
  "concrete findings in Markdown.",
5067
5178
  "Rules:",
@@ -5095,7 +5206,7 @@ function buildReviewPrompt(diff, meta) {
5095
5206
  }
5096
5207
  async function reviewDiff(diff, meta, provider, projectGuide) {
5097
5208
  if (!diff.trim()) return "_Sem altera\xE7\xF5es no diff para revisar._";
5098
- const messages = [{ role: "system", content: SYSTEM2 }];
5209
+ const messages = [{ role: "system", content: SYSTEM3 }];
5099
5210
  if (projectGuide) {
5100
5211
  messages.push({
5101
5212
  role: "system",
@@ -5124,7 +5235,7 @@ async function review(prRef, opts) {
5124
5235
  const markdown = await withRetry(() => reviewDiff(diff, meta, provider, guide));
5125
5236
  if (opts.out) {
5126
5237
  await writeFile7(opts.out, markdown + "\n", "utf8");
5127
- console.error(pc14.green(t("review.wrote", { path: opts.out })));
5238
+ console.error(pc15.green(t("review.wrote", { path: opts.out })));
5128
5239
  } else {
5129
5240
  process.stdout.write(markdown + "\n");
5130
5241
  }
@@ -5175,7 +5286,7 @@ async function launchInteractive() {
5175
5286
  const config = await loadConfig();
5176
5287
  if (config.agents.length === 0) {
5177
5288
  console.log(banner());
5178
- console.log(" " + pc15.yellow(t("welcome.firstRun")) + "\n");
5289
+ console.log(" " + pc16.yellow(t("welcome.firstRun")) + "\n");
5179
5290
  await setup();
5180
5291
  }
5181
5292
  await run(void 0, {});
@@ -5206,6 +5317,7 @@ function buildProgram() {
5206
5317
  program.command("swarm").argument("<task>", t("cli.arg.swarmTask")).option("--agents <names>", t("cli.opt.agents")).option("--max-subtasks <n>", t("cli.opt.maxSubtasks")).description(t("cli.cmd.swarm")).action((task, opts) => swarm(task, opts));
5207
5318
  program.command("models").option("--search <text>", t("cli.opt.search")).option("--tools", t("cli.opt.toolsOnly")).option("--free", t("cli.opt.free")).option("--max-price <usd>", t("cli.opt.maxPrice")).option("--sort <order>", t("cli.opt.sort")).option("--limit <n>", t("cli.opt.limit")).description(t("cli.cmd.models")).action((opts) => models(opts));
5208
5319
  program.command("usage").description(t("cli.cmd.usage")).action(() => usage());
5320
+ program.command("estimate").argument("<task>", t("cli.arg.estimateTask")).option("--agent <name>", t("cli.opt.agent")).option("--json", t("cli.opt.json")).description(t("cli.cmd.estimate")).action((task, opts) => estimate(task, opts));
5209
5321
  program.command("sessions").description(t("cli.cmd.sessions")).action(() => sessions());
5210
5322
  program.command("prd").argument("<issue>", t("cli.arg.prdIssue")).option("--out <file>", t("cli.opt.out")).option("--model <model>", t("cli.opt.model")).option("--input <file>", t("cli.opt.input")).description(t("cli.cmd.prd")).action((issue, opts) => prd(issue, opts));
5211
5323
  program.command("review").argument("<pr>", t("cli.arg.reviewPr")).option("--out <file>", t("cli.opt.out")).option("--model <model>", t("cli.opt.model")).option("--input <file>", t("cli.opt.input")).description(t("cli.cmd.review")).action((pr, opts) => review(pr, opts));
@@ -5217,7 +5329,7 @@ async function main() {
5217
5329
  await resolveLocale();
5218
5330
  await buildProgram().parseAsync(process.argv);
5219
5331
  } catch (err) {
5220
- console.error(pc15.red(`\u2717 ${err.message}`));
5332
+ console.error(pc16.red(`\u2717 ${err.message}`));
5221
5333
  process.exitCode = 1;
5222
5334
  }
5223
5335
  }