@agentplate/cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +54 -0
- package/LICENSE +21 -0
- package/README.md +206 -0
- package/agents/architect.md +108 -0
- package/agents/builder.md +97 -0
- package/agents/coordinator.md +113 -0
- package/agents/deployer.md +117 -0
- package/agents/devops.md +114 -0
- package/agents/lead.md +107 -0
- package/agents/merger.md +103 -0
- package/agents/reviewer.md +90 -0
- package/agents/scout.md +95 -0
- package/agents/verifier.md +106 -0
- package/package.json +64 -0
- package/src/agents/guard-rules.ts +55 -0
- package/src/agents/identity.test.ts +161 -0
- package/src/agents/identity.ts +229 -0
- package/src/agents/manifest.test.ts +260 -0
- package/src/agents/manifest.ts +286 -0
- package/src/agents/overlay.test.ts +190 -0
- package/src/agents/overlay.ts +212 -0
- package/src/agents/system-prompt.test.ts +53 -0
- package/src/agents/system-prompt.ts +95 -0
- package/src/agents/turn-runner.ts +79 -0
- package/src/commands/coordinator.test.ts +75 -0
- package/src/commands/coordinator.ts +259 -0
- package/src/commands/deploy.test.ts +504 -0
- package/src/commands/deploy.ts +874 -0
- package/src/commands/doctor.test.ts +106 -0
- package/src/commands/doctor.ts +208 -0
- package/src/commands/init.ts +71 -0
- package/src/commands/log.ts +51 -0
- package/src/commands/mail.ts +197 -0
- package/src/commands/merge.ts +127 -0
- package/src/commands/model.ts +58 -0
- package/src/commands/prime.ts +61 -0
- package/src/commands/reap.ts +87 -0
- package/src/commands/serve.ts +61 -0
- package/src/commands/setup.ts +48 -0
- package/src/commands/ship.test.ts +106 -0
- package/src/commands/ship.ts +202 -0
- package/src/commands/skill.test.ts +458 -0
- package/src/commands/skill.ts +730 -0
- package/src/commands/sling.ts +365 -0
- package/src/commands/status.ts +60 -0
- package/src/commands/stop.ts +56 -0
- package/src/commands/tui.ts +199 -0
- package/src/commands/worktree.ts +77 -0
- package/src/config.test.ts +92 -0
- package/src/config.ts +202 -0
- package/src/db/sqlite.test.ts +77 -0
- package/src/db/sqlite.ts +102 -0
- package/src/deploy/audit.test.ts +233 -0
- package/src/deploy/audit.ts +245 -0
- package/src/deploy/context.test.ts +243 -0
- package/src/deploy/context.ts +72 -0
- package/src/deploy/registry.test.ts +101 -0
- package/src/deploy/registry.ts +86 -0
- package/src/deploy/secrets.test.ts +129 -0
- package/src/deploy/secrets.ts +69 -0
- package/src/deploy/targets/docker-gha.test.ts +323 -0
- package/src/deploy/targets/docker-gha.ts +841 -0
- package/src/deploy/types.ts +153 -0
- package/src/errors.test.ts +42 -0
- package/src/errors.ts +69 -0
- package/src/events/store.test.ts +183 -0
- package/src/events/store.ts +201 -0
- package/src/index.ts +137 -0
- package/src/insights/quality-gates.ts +73 -0
- package/src/json.test.ts +28 -0
- package/src/json.ts +50 -0
- package/src/logging/color.ts +62 -0
- package/src/logging/logger.ts +60 -0
- package/src/logging/sanitizer.test.ts +36 -0
- package/src/logging/sanitizer.ts +57 -0
- package/src/mail/client.test.ts +192 -0
- package/src/mail/client.ts +188 -0
- package/src/mail/store.test.ts +279 -0
- package/src/mail/store.ts +311 -0
- package/src/merge/lock.test.ts +88 -0
- package/src/merge/lock.ts +84 -0
- package/src/merge/queue.test.ts +136 -0
- package/src/merge/queue.ts +177 -0
- package/src/merge/resolver.test.ts +219 -0
- package/src/merge/resolver.ts +274 -0
- package/src/paths.ts +36 -0
- package/src/providers/apply.test.ts +90 -0
- package/src/providers/apply.ts +66 -0
- package/src/providers/registry.test.ts +74 -0
- package/src/providers/registry.ts +254 -0
- package/src/runtimes/claude.ts +313 -0
- package/src/runtimes/codex.ts +280 -0
- package/src/runtimes/cursor.ts +247 -0
- package/src/runtimes/gemini.ts +173 -0
- package/src/runtimes/mock.ts +71 -0
- package/src/runtimes/opencode.ts +259 -0
- package/src/runtimes/registry.test.ts +924 -0
- package/src/runtimes/registry.ts +63 -0
- package/src/runtimes/resolve.ts +45 -0
- package/src/runtimes/types.ts +97 -0
- package/src/scaffold.ts +68 -0
- package/src/secrets.test.ts +51 -0
- package/src/secrets.ts +78 -0
- package/src/serve/api.ts +667 -0
- package/src/serve/server.test.ts +433 -0
- package/src/serve/server.ts +271 -0
- package/src/serve/system.ts +90 -0
- package/src/serve/weather.ts +140 -0
- package/src/sessions/reaper.test.ts +162 -0
- package/src/sessions/reaper.ts +149 -0
- package/src/sessions/store.test.ts +351 -0
- package/src/sessions/store.ts +350 -0
- package/src/skills/distiller.test.ts +498 -0
- package/src/skills/distiller.ts +426 -0
- package/src/skills/feedback.test.ts +300 -0
- package/src/skills/feedback.ts +168 -0
- package/src/skills/lifecycle.ts +169 -0
- package/src/skills/retrieval.test.ts +421 -0
- package/src/skills/retrieval.ts +365 -0
- package/src/skills/safety.test.ts +335 -0
- package/src/skills/safety.ts +216 -0
- package/src/skills/store.test.ts +425 -0
- package/src/skills/store.ts +684 -0
- package/src/skills/types.ts +107 -0
- package/src/types.ts +442 -0
- package/src/utils/detect.test.ts +35 -0
- package/src/utils/detect.ts +82 -0
- package/src/version.test.ts +19 -0
- package/src/version.ts +7 -0
- package/src/wizard/setup.ts +254 -0
- package/src/worktree/manager.test.ts +181 -0
- package/src/worktree/manager.ts +229 -0
- package/templates/overlay.md.tmpl +102 -0
- package/ui/dist/assets/index-C7rXIMER.css +1 -0
- package/ui/dist/assets/index-W4kbr4by.js +4526 -0
- package/ui/dist/favicon.svg +21 -0
- package/ui/dist/index.html +16 -0
- package/ui/dist/logo-clay.svg +21 -0
- package/ui/dist/logo.svg +18 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Verifier Agent
|
|
2
|
+
|
|
3
|
+
You are a **verifier** in the Agentplate delivery pipeline. Your job is to **prove the
|
|
4
|
+
deployment actually works**: smoke-test the live URL and health endpoints the
|
|
5
|
+
deployer produced, report a pass/fail backed by concrete evidence, and — on failure
|
|
6
|
+
— optionally request a rollback. You are a **leaf node** — you never spawn other
|
|
7
|
+
agents.
|
|
8
|
+
|
|
9
|
+
The reusable HOW lives in this file. The per-task WHAT (your task ID, the target,
|
|
10
|
+
environment, the deployment's URLs/`deploymentId`, your agent name, parent) comes
|
|
11
|
+
from the overlay `CLAUDE.md` in your worktree. Read it first; it overrides anything
|
|
12
|
+
generic here.
|
|
13
|
+
|
|
14
|
+
## Core Discipline: Read-Only, But Networked
|
|
15
|
+
|
|
16
|
+
You **never modify source files** — no edits, no writes to the repo, no commits,
|
|
17
|
+
no config. Your output is a *verdict*, delivered as mail.
|
|
18
|
+
|
|
19
|
+
What you *do* get is **network access**: you reach out to the deployed URL and
|
|
20
|
+
probe it for real. Verification is empirical — you confirm the running deployment,
|
|
21
|
+
not the plan or the config.
|
|
22
|
+
|
|
23
|
+
The only things you may write are scratch notes under `/tmp` (never inside the
|
|
24
|
+
repo).
|
|
25
|
+
|
|
26
|
+
### Failure Mode (avoid this above all)
|
|
27
|
+
|
|
28
|
+
- **FALSE_GREEN** — reporting `healthy` / `verify_done` without a real probe. This
|
|
29
|
+
is the one failure that defeats the whole pipeline: it lets a broken deploy look
|
|
30
|
+
shipped. **Never** assert health you did not observe. Every "ok" in your verdict
|
|
31
|
+
must be backed by an actual response (a status code, a body match, a latency).
|
|
32
|
+
If you could not reach the deployment, that is a **fail**, not an unknown-pass.
|
|
33
|
+
|
|
34
|
+
## When to Act Immediately
|
|
35
|
+
|
|
36
|
+
Begin the moment you are spawned.
|
|
37
|
+
|
|
38
|
+
1. Read your overlay `CLAUDE.md` for the target, environment, and the
|
|
39
|
+
deployment's URLs and `deploymentId`.
|
|
40
|
+
2. `agentplate mail check` for the deployer's `deploy_done` (with the live URLs) and
|
|
41
|
+
any specific checks your parent wants run.
|
|
42
|
+
3. Probe the deployment.
|
|
43
|
+
|
|
44
|
+
## How to Verify
|
|
45
|
+
|
|
46
|
+
Drive verification through the engine; it invokes the target adapter's `verify()`
|
|
47
|
+
(read-only, health/smoke checks) and returns a `VerifyResult` (`healthy`, a list
|
|
48
|
+
of named `checks` with `ok` + `detail`, and the `probedUrl`).
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
agentplate verify --target <target> --env <environment>
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
Probe like you mean it:
|
|
55
|
+
|
|
56
|
+
- **Hit the real URL.** Request the deployed endpoint(s); confirm an actual
|
|
57
|
+
`2xx`/expected status, not just DNS resolving.
|
|
58
|
+
- **Check the health endpoint** if the app exposes one; confirm the body, not only
|
|
59
|
+
the code.
|
|
60
|
+
- **Exercise a representative path** when the overlay names one (a critical route,
|
|
61
|
+
an API ping), so "healthy" means *serving*, not merely *listening*.
|
|
62
|
+
- **Record the evidence** — status codes, body snippets, latencies — so each
|
|
63
|
+
check's `detail` shows *why* it passed or failed.
|
|
64
|
+
|
|
65
|
+
## Optional: Request Rollback on Failure
|
|
66
|
+
|
|
67
|
+
You do **not** roll back yourself — that is an outward-facing mutation, the
|
|
68
|
+
deployer's gated job. If the deployment is unhealthy and your overlay/parent wants
|
|
69
|
+
the environment restored, **request** a rollback via mail (include the
|
|
70
|
+
`deploymentId` so the deployer can target it), then let the deployer execute it.
|
|
71
|
+
|
|
72
|
+
## Communication Protocol
|
|
73
|
+
|
|
74
|
+
You report to your parent via mail. Lead with the verdict, then the evidence.
|
|
75
|
+
|
|
76
|
+
- **Progress** — `--type status` for interim notes during a longer probe sweep.
|
|
77
|
+
- **Rollback request** — `--type escalation` when the deployment is unhealthy and
|
|
78
|
+
the environment should be rolled back; name the `deploymentId` and the failing
|
|
79
|
+
checks so the deployer can act.
|
|
80
|
+
|
|
81
|
+
## Completion Protocol
|
|
82
|
+
|
|
83
|
+
Your terminal mail is **`verify_done`** when the deployment is genuinely healthy or
|
|
84
|
+
**`verify_failed`** when it is not — this is what the runner watches to close your
|
|
85
|
+
session.
|
|
86
|
+
|
|
87
|
+
**On a verified pass** (real probes, all green):
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
agentplate mail send --to <parent> \
|
|
91
|
+
--subject "Verify done: <taskId>" \
|
|
92
|
+
--body "Healthy. Probed https://app-staging.example.com → 200 (12ms); /health → 200 {\"status\":\"ok\"}; GET /api/ping → 200. All checks green." \
|
|
93
|
+
--type verify_done
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**On a fail** (a probe failed, or the deployment was unreachable):
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
agentplate mail send --to <parent> \
|
|
100
|
+
--subject "Verify failed: <taskId>" \
|
|
101
|
+
--body "Unhealthy. https://app-staging.example.com → 502 on /; /health unreachable. Rollback requested (deploymentId sha256:abc123) — see escalation." \
|
|
102
|
+
--type verify_failed
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Send exactly one terminal mail (`verify_done` **or** `verify_failed`), grounded in
|
|
106
|
+
probes you actually ran, then stop. Never green a deployment you did not observe.
|
package/package.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@agentplate/cli",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"publishConfig": {
|
|
5
|
+
"access": "public"
|
|
6
|
+
},
|
|
7
|
+
"description": "Self-improving multi-agent orchestration that takes you from build → CI/CD → deploy. Interactive setup wizard, pluggable AI providers, agent swarms in git worktrees.",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"license": "MIT",
|
|
10
|
+
"repository": {
|
|
11
|
+
"type": "git",
|
|
12
|
+
"url": "git+https://github.com/agentplate/agentplate.git"
|
|
13
|
+
},
|
|
14
|
+
"homepage": "https://github.com/agentplate/agentplate",
|
|
15
|
+
"keywords": [
|
|
16
|
+
"ai",
|
|
17
|
+
"agents",
|
|
18
|
+
"orchestration",
|
|
19
|
+
"multi-agent",
|
|
20
|
+
"swarm",
|
|
21
|
+
"deploy",
|
|
22
|
+
"ci-cd",
|
|
23
|
+
"self-improving",
|
|
24
|
+
"cli",
|
|
25
|
+
"developer-tools"
|
|
26
|
+
],
|
|
27
|
+
"bin": {
|
|
28
|
+
"agentplate": "src/index.ts",
|
|
29
|
+
"ap": "src/index.ts"
|
|
30
|
+
},
|
|
31
|
+
"main": "src/index.ts",
|
|
32
|
+
"files": [
|
|
33
|
+
"src",
|
|
34
|
+
"agents",
|
|
35
|
+
"templates",
|
|
36
|
+
"ui/dist",
|
|
37
|
+
"LICENSE",
|
|
38
|
+
"CHANGELOG.md"
|
|
39
|
+
],
|
|
40
|
+
"engines": {
|
|
41
|
+
"bun": ">=1.0"
|
|
42
|
+
},
|
|
43
|
+
"scripts": {
|
|
44
|
+
"test": "bun test",
|
|
45
|
+
"lint": "biome check .",
|
|
46
|
+
"lint:fix": "biome check --write .",
|
|
47
|
+
"typecheck": "tsc --noEmit",
|
|
48
|
+
"check": "bun test && biome check . && tsc --noEmit",
|
|
49
|
+
"build:ui": "cd ui && bun install && bun run build",
|
|
50
|
+
"prepack": "bun run build:ui"
|
|
51
|
+
},
|
|
52
|
+
"dependencies": {
|
|
53
|
+
"@clack/prompts": "^0.11.0",
|
|
54
|
+
"chalk": "^5.6.2",
|
|
55
|
+
"commander": "^14.0.3",
|
|
56
|
+
"js-yaml": "^4.1.1"
|
|
57
|
+
},
|
|
58
|
+
"devDependencies": {
|
|
59
|
+
"@biomejs/biome": "2.3.15",
|
|
60
|
+
"@types/bun": "latest",
|
|
61
|
+
"@types/js-yaml": "^4.0.9",
|
|
62
|
+
"typescript": "^5.9.0"
|
|
63
|
+
}
|
|
64
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared guard rules.
|
|
3
|
+
*
|
|
4
|
+
* Single source of truth for "what is a dangerous shell command" and similar
|
|
5
|
+
* safety constants. Used by skill safety scrubbing (Phase 3) and, later, by
|
|
6
|
+
* agent tool guards and deploy guards (Phase 4). Centralized so the definition
|
|
7
|
+
* of "dangerous" never drifts between subsystems.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Regexes matching shell commands that must never appear in a distilled skill's
|
|
12
|
+
* snippets (destructive, networked-pipe-to-shell, privilege escalation, or
|
|
13
|
+
* outward-facing mutations that belong only to a gated deployer).
|
|
14
|
+
*/
|
|
15
|
+
export const DANGEROUS_BASH_PATTERNS: RegExp[] = [
|
|
16
|
+
/\brm\s+-rf?\b/i, // recursive force delete
|
|
17
|
+
/\b(curl|wget)\b[^\n|]*\|\s*(sudo\s+)?(ba)?sh\b/i, // curl … | sh
|
|
18
|
+
/\bsudo\b/i, // privilege escalation
|
|
19
|
+
/\bgit\s+push\b/i, // pushing from inside a skill
|
|
20
|
+
/\bgit\s+reset\s+--hard\b/i, // destructive reset
|
|
21
|
+
/\b(mkfs|dd)\b/i, // disk-level operations
|
|
22
|
+
/\bchmod\s+-R\s+0?777\b/i, // world-writable recursion
|
|
23
|
+
/:\(\)\s*\{\s*:\|:&\s*\}\s*;/, // fork bomb
|
|
24
|
+
/>\s*\/dev\/sd[a-z]/i, // writing to a raw disk
|
|
25
|
+
/\beval\b\s+["'`$]/i, // eval of dynamic input
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
/** Outward-facing deploy/apply verbs (reserved for the gated deployer, Phase 4). */
|
|
29
|
+
export const DEPLOY_VERB_PATTERNS: RegExp[] = [
|
|
30
|
+
/\bterraform\s+apply\b/i,
|
|
31
|
+
/\bkubectl\s+apply\b/i,
|
|
32
|
+
/\bhelm\s+(install|upgrade)\b/i,
|
|
33
|
+
/\bdocker\s+push\b/i,
|
|
34
|
+
/\bvercel\b[^\n]*--prod\b/i,
|
|
35
|
+
];
|
|
36
|
+
|
|
37
|
+
/** Does the text contain a dangerous shell command? */
|
|
38
|
+
export function hasDangerousCommand(text: string): boolean {
|
|
39
|
+
return DANGEROUS_BASH_PATTERNS.some((re) => re.test(text));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Return every dangerous pattern that matches (for reporting which line tripped). */
|
|
43
|
+
export function findDangerousCommands(text: string): string[] {
|
|
44
|
+
const hits: string[] = [];
|
|
45
|
+
for (const re of DANGEROUS_BASH_PATTERNS) {
|
|
46
|
+
const m = text.match(re);
|
|
47
|
+
if (m) hits.push(m[0]);
|
|
48
|
+
}
|
|
49
|
+
return hits;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/** Does the text contain an outward-facing deploy/apply verb? */
|
|
53
|
+
export function hasDeployVerb(text: string): boolean {
|
|
54
|
+
return DEPLOY_VERB_PATTERNS.some((re) => re.test(text));
|
|
55
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, test } from "bun:test";
|
|
2
|
+
import { existsSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import yaml from "js-yaml";
|
|
6
|
+
import { ConfigError } from "../errors.ts";
|
|
7
|
+
import { type AgentIdentity, createIdentity, loadIdentity, updateIdentity } from "./identity.ts";
|
|
8
|
+
|
|
9
|
+
// Real filesystem against a throwaway temp dir — no mocks. The module only
|
|
10
|
+
// touches the fs + YAML, so a plain temp root (no git) is sufficient.
|
|
11
|
+
let root: string;
|
|
12
|
+
|
|
13
|
+
beforeEach(() => {
|
|
14
|
+
root = mkdtempSync(join(tmpdir(), "agentplate-identity-"));
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
afterEach(() => {
|
|
18
|
+
rmSync(root, { recursive: true, force: true });
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
/** Path where the module is expected to store an agent's CV. */
|
|
22
|
+
function identityFile(name: string): string {
|
|
23
|
+
return join(root, ".agentplate", "agents", name, "identity.yaml");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
describe("createIdentity", () => {
|
|
27
|
+
test("creates the directory and file with sane defaults", () => {
|
|
28
|
+
const id = createIdentity(root, "alice", "builder");
|
|
29
|
+
|
|
30
|
+
expect(id.name).toBe("alice");
|
|
31
|
+
expect(id.capability).toBe("builder");
|
|
32
|
+
expect(id.sessionsCompleted).toBe(0);
|
|
33
|
+
expect(id.expertiseDomains).toEqual([]);
|
|
34
|
+
expect(id.recentTasks).toEqual([]);
|
|
35
|
+
// created is an ISO-8601 string.
|
|
36
|
+
expect(new Date(id.created).toISOString()).toBe(id.created);
|
|
37
|
+
expect(existsSync(identityFile("alice"))).toBe(true);
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
test("is idempotent: returns existing identity without clobbering history", () => {
|
|
41
|
+
createIdentity(root, "bob", "scout");
|
|
42
|
+
updateIdentity(root, "bob", { taskId: "t-1", summary: "did a thing" });
|
|
43
|
+
|
|
44
|
+
// Re-create with a different capability — must NOT reset the CV.
|
|
45
|
+
const again = createIdentity(root, "bob", "reviewer");
|
|
46
|
+
expect(again.capability).toBe("scout"); // original preserved
|
|
47
|
+
expect(again.sessionsCompleted).toBe(1);
|
|
48
|
+
expect(again.recentTasks).toHaveLength(1);
|
|
49
|
+
});
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
describe("loadIdentity", () => {
|
|
53
|
+
test("returns null when no identity exists", () => {
|
|
54
|
+
expect(loadIdentity(root, "ghost")).toBeNull();
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
test("round-trips a created identity", () => {
|
|
58
|
+
const created = createIdentity(root, "carol", "lead");
|
|
59
|
+
const loaded = loadIdentity(root, "carol");
|
|
60
|
+
expect(loaded).not.toBeNull();
|
|
61
|
+
expect(loaded).toEqual(created);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
test("treats an empty file as no identity", () => {
|
|
65
|
+
const path = identityFile("empty");
|
|
66
|
+
// Create the dir via a real identity first, then blank the file.
|
|
67
|
+
createIdentity(root, "empty", "builder");
|
|
68
|
+
writeFileSync(path, "", "utf8");
|
|
69
|
+
expect(loadIdentity(root, "empty")).toBeNull();
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
test("throws ConfigError on malformed YAML", () => {
|
|
73
|
+
const path = identityFile("broken");
|
|
74
|
+
createIdentity(root, "broken", "builder");
|
|
75
|
+
writeFileSync(path, "name: [unclosed\n", "utf8");
|
|
76
|
+
expect(() => loadIdentity(root, "broken")).toThrow(ConfigError);
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
describe("updateIdentity", () => {
|
|
81
|
+
test("increments sessionsCompleted on each update", () => {
|
|
82
|
+
createIdentity(root, "dave", "builder");
|
|
83
|
+
|
|
84
|
+
const first = updateIdentity(root, "dave", { domains: ["api"] });
|
|
85
|
+
expect(first.sessionsCompleted).toBe(1);
|
|
86
|
+
|
|
87
|
+
const second = updateIdentity(root, "dave", { domains: ["db"] });
|
|
88
|
+
expect(second.sessionsCompleted).toBe(2);
|
|
89
|
+
|
|
90
|
+
// Persisted, not just in-memory.
|
|
91
|
+
const reloaded = loadIdentity(root, "dave");
|
|
92
|
+
expect(reloaded?.sessionsCompleted).toBe(2);
|
|
93
|
+
expect(reloaded?.expertiseDomains).toEqual(["api", "db"]);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
test("merges domains uniquely, preserving first-seen order", () => {
|
|
97
|
+
createIdentity(root, "erin", "builder");
|
|
98
|
+
updateIdentity(root, "erin", { domains: ["api", "db"] });
|
|
99
|
+
const out = updateIdentity(root, "erin", { domains: ["db", "ui", "api"] });
|
|
100
|
+
expect(out.expertiseDomains).toEqual(["api", "db", "ui"]);
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test("appends a task only when taskId is provided", () => {
|
|
104
|
+
createIdentity(root, "frank", "builder");
|
|
105
|
+
|
|
106
|
+
// No taskId -> no task appended, but session still counts.
|
|
107
|
+
const noTask = updateIdentity(root, "frank", { domains: ["api"] });
|
|
108
|
+
expect(noTask.recentTasks).toHaveLength(0);
|
|
109
|
+
expect(noTask.sessionsCompleted).toBe(1);
|
|
110
|
+
|
|
111
|
+
const withTask = updateIdentity(root, "frank", {
|
|
112
|
+
taskId: "task-42",
|
|
113
|
+
summary: "implemented endpoint",
|
|
114
|
+
});
|
|
115
|
+
expect(withTask.recentTasks).toHaveLength(1);
|
|
116
|
+
const last = withTask.recentTasks.at(-1);
|
|
117
|
+
expect(last?.taskId).toBe("task-42");
|
|
118
|
+
expect(last?.summary).toBe("implemented endpoint");
|
|
119
|
+
const completedAt = last?.completedAt ?? "";
|
|
120
|
+
expect(new Date(completedAt).toISOString()).toBe(completedAt);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
test("creates an identity on the fly if none exists", () => {
|
|
124
|
+
// No createIdentity call first.
|
|
125
|
+
const id = updateIdentity(root, "grace", { taskId: "t-1", summary: "s" });
|
|
126
|
+
expect(id.sessionsCompleted).toBe(1);
|
|
127
|
+
expect(id.recentTasks).toHaveLength(1);
|
|
128
|
+
expect(existsSync(identityFile("grace"))).toBe(true);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("caps recentTasks at 20, keeping the newest (push 25, expect 20)", () => {
|
|
132
|
+
createIdentity(root, "heidi", "builder");
|
|
133
|
+
|
|
134
|
+
for (let i = 0; i < 25; i++) {
|
|
135
|
+
updateIdentity(root, "heidi", { taskId: `task-${i}`, summary: `summary ${i}` });
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const loaded = loadIdentity(root, "heidi");
|
|
139
|
+
expect(loaded).not.toBeNull();
|
|
140
|
+
expect(loaded?.recentTasks).toHaveLength(20);
|
|
141
|
+
// Oldest five (task-0..task-4) dropped; newest is last.
|
|
142
|
+
expect(loaded?.recentTasks[0]?.taskId).toBe("task-5");
|
|
143
|
+
expect(loaded?.recentTasks.at(-1)?.taskId).toBe("task-24");
|
|
144
|
+
// 25 updates -> 25 sessions counted regardless of the cap.
|
|
145
|
+
expect(loaded?.sessionsCompleted).toBe(25);
|
|
146
|
+
});
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
describe("on-disk format", () => {
|
|
150
|
+
test("identity.yaml is valid YAML round-trippable by js-yaml", () => {
|
|
151
|
+
createIdentity(root, "ivan", "merger");
|
|
152
|
+
updateIdentity(root, "ivan", { taskId: "t-1", summary: "x", domains: ["api"] });
|
|
153
|
+
|
|
154
|
+
const text = readFileSync(identityFile("ivan"), "utf8");
|
|
155
|
+
const parsed = yaml.load(text) as AgentIdentity;
|
|
156
|
+
expect(parsed.name).toBe("ivan");
|
|
157
|
+
expect(parsed.capability).toBe("merger");
|
|
158
|
+
expect(parsed.expertiseDomains).toEqual(["api"]);
|
|
159
|
+
expect(parsed.recentTasks[0]?.taskId).toBe("t-1");
|
|
160
|
+
});
|
|
161
|
+
});
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Persistent agent identity (CV).
|
|
3
|
+
*
|
|
4
|
+
* Each agent accumulates a small "CV" across the sessions it runs: how many it
|
|
5
|
+
* has completed, which expertise domains it has touched, and a rolling list of
|
|
6
|
+
* its most recent tasks. This survives worktree cleanup because it is stored
|
|
7
|
+
* under the *main* project's `.agentplate/` tree (not inside the agent's throwaway
|
|
8
|
+
* worktree), so a long-lived named agent keeps its history run after run.
|
|
9
|
+
*
|
|
10
|
+
* Storage is one YAML file per agent at
|
|
11
|
+
* `<root>/.agentplate/agents/<agentName>/identity.yaml`. We use js-yaml (the same
|
|
12
|
+
* dependency and read/write style as `config.ts` and `secrets.ts`) rather than a
|
|
13
|
+
* hand-rolled serializer so the format stays human-editable and robust.
|
|
14
|
+
*
|
|
15
|
+
* Why local types: {@link AgentIdentity} is not part of the shared `types.ts`
|
|
16
|
+
* surface — it is an implementation detail of this module — so it is declared
|
|
17
|
+
* here to keep the cross-module type barrel lean.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
21
|
+
import { dirname, join } from "node:path";
|
|
22
|
+
import yaml from "js-yaml";
|
|
23
|
+
import { AGENTPLATE_DIR } from "../config.ts";
|
|
24
|
+
import { ConfigError } from "../errors.ts";
|
|
25
|
+
|
|
26
|
+
/** Filename of an agent's CV within its identity directory. */
|
|
27
|
+
const IDENTITY_FILENAME = "identity.yaml";
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Maximum number of recent tasks retained on an identity. Older entries are
|
|
31
|
+
* dropped from the front so the file stays small; the newest task is last.
|
|
32
|
+
*/
|
|
33
|
+
const MAX_RECENT_TASKS = 20;
|
|
34
|
+
|
|
35
|
+
/** One entry in an agent's rolling task history. */
|
|
36
|
+
export interface RecentTask {
|
|
37
|
+
taskId: string;
|
|
38
|
+
summary: string;
|
|
39
|
+
/** ISO-8601 timestamp of when the task completed. */
|
|
40
|
+
completedAt: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/** A persistent agent CV. */
|
|
44
|
+
export interface AgentIdentity {
|
|
45
|
+
/** Unique agent name (also the directory name under `.agentplate/agents/`). */
|
|
46
|
+
name: string;
|
|
47
|
+
/** The capability this agent was created for (e.g. "builder"). */
|
|
48
|
+
capability: string;
|
|
49
|
+
/** ISO-8601 timestamp of first creation. */
|
|
50
|
+
created: string;
|
|
51
|
+
/** Count of sessions this agent has completed. */
|
|
52
|
+
sessionsCompleted: number;
|
|
53
|
+
/** Distinct expertise domains the agent has worked in. */
|
|
54
|
+
expertiseDomains: string[];
|
|
55
|
+
/** Rolling history of the most recent tasks (newest last), capped at 20. */
|
|
56
|
+
recentTasks: RecentTask[];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Absolute path to an agent's identity directory under the project root. */
|
|
60
|
+
function identityDir(root: string, name: string): string {
|
|
61
|
+
return join(root, AGENTPLATE_DIR, "agents", name);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/** Absolute path to an agent's `identity.yaml`. */
|
|
65
|
+
function identityPath(root: string, name: string): string {
|
|
66
|
+
return join(identityDir(root, name), IDENTITY_FILENAME);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Coerce an unknown parsed YAML value into a normalized {@link AgentIdentity}.
|
|
71
|
+
*
|
|
72
|
+
* We do not trust the on-disk shape (it may be hand-edited or written by an
|
|
73
|
+
* older version), so every field is validated/defaulted defensively. Unknown or
|
|
74
|
+
* malformed entries are skipped rather than crashing a long-lived agent.
|
|
75
|
+
*/
|
|
76
|
+
function coerceIdentity(parsed: unknown, name: string): AgentIdentity {
|
|
77
|
+
if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
|
|
78
|
+
throw new ConfigError(`Expected a mapping in identity file for agent "${name}"`);
|
|
79
|
+
}
|
|
80
|
+
const obj = parsed as Record<string, unknown>;
|
|
81
|
+
|
|
82
|
+
const expertiseDomains: string[] = [];
|
|
83
|
+
if (Array.isArray(obj.expertiseDomains)) {
|
|
84
|
+
for (const domain of obj.expertiseDomains) {
|
|
85
|
+
if (typeof domain === "string" && domain !== "") expertiseDomains.push(domain);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
const recentTasks: RecentTask[] = [];
|
|
90
|
+
if (Array.isArray(obj.recentTasks)) {
|
|
91
|
+
for (const entry of obj.recentTasks) {
|
|
92
|
+
if (entry === null || typeof entry !== "object" || Array.isArray(entry)) continue;
|
|
93
|
+
const task = entry as Record<string, unknown>;
|
|
94
|
+
recentTasks.push({
|
|
95
|
+
taskId: typeof task.taskId === "string" ? task.taskId : "",
|
|
96
|
+
summary: typeof task.summary === "string" ? task.summary : "",
|
|
97
|
+
completedAt: typeof task.completedAt === "string" ? task.completedAt : "",
|
|
98
|
+
});
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return {
|
|
103
|
+
// The on-disk name is advisory; the lookup key (`name`) is authoritative.
|
|
104
|
+
name: typeof obj.name === "string" && obj.name !== "" ? obj.name : name,
|
|
105
|
+
capability: typeof obj.capability === "string" ? obj.capability : "",
|
|
106
|
+
created: typeof obj.created === "string" ? obj.created : new Date().toISOString(),
|
|
107
|
+
sessionsCompleted:
|
|
108
|
+
typeof obj.sessionsCompleted === "number" && Number.isFinite(obj.sessionsCompleted)
|
|
109
|
+
? obj.sessionsCompleted
|
|
110
|
+
: 0,
|
|
111
|
+
expertiseDomains,
|
|
112
|
+
recentTasks,
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Serialize an identity to YAML and write it atomically-ish to disk. */
|
|
117
|
+
function writeIdentity(root: string, identity: AgentIdentity): void {
|
|
118
|
+
const path = identityPath(root, identity.name);
|
|
119
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
120
|
+
const header =
|
|
121
|
+
"# Agentplate agent identity (CV). Survives worktree cleanup.\n" +
|
|
122
|
+
"# Managed by src/agents/identity.ts — safe to read, edit with care.\n";
|
|
123
|
+
writeFileSync(path, header + yaml.dump(identity, { indent: 2, sortKeys: false }), "utf8");
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Create an agent identity, or return the existing one if already present.
|
|
128
|
+
*
|
|
129
|
+
* Creating is idempotent: if the identity file already exists it is loaded and
|
|
130
|
+
* returned unchanged (so the agent's accumulated history is never clobbered by a
|
|
131
|
+
* re-spawn). Only the first call writes a fresh CV.
|
|
132
|
+
*/
|
|
133
|
+
export function createIdentity(root: string, name: string, capability: string): AgentIdentity {
|
|
134
|
+
const existing = loadIdentity(root, name);
|
|
135
|
+
if (existing !== null) return existing;
|
|
136
|
+
|
|
137
|
+
const identity: AgentIdentity = {
|
|
138
|
+
name,
|
|
139
|
+
capability,
|
|
140
|
+
created: new Date().toISOString(),
|
|
141
|
+
sessionsCompleted: 0,
|
|
142
|
+
expertiseDomains: [],
|
|
143
|
+
recentTasks: [],
|
|
144
|
+
};
|
|
145
|
+
writeIdentity(root, identity);
|
|
146
|
+
return identity;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Load an agent's identity, or `null` if it has none yet.
|
|
151
|
+
*
|
|
152
|
+
* @throws ConfigError if the file exists but contains invalid YAML or a
|
|
153
|
+
* non-mapping top level.
|
|
154
|
+
*/
|
|
155
|
+
export function loadIdentity(root: string, name: string): AgentIdentity | null {
|
|
156
|
+
const path = identityPath(root, name);
|
|
157
|
+
if (!existsSync(path)) return null;
|
|
158
|
+
|
|
159
|
+
let parsed: unknown;
|
|
160
|
+
try {
|
|
161
|
+
parsed = yaml.load(readFileSync(path, "utf8"));
|
|
162
|
+
} catch (error) {
|
|
163
|
+
throw new ConfigError(`Invalid YAML in ${path}: ${(error as Error).message}`);
|
|
164
|
+
}
|
|
165
|
+
// An empty file parses to null/undefined — treat it as "no identity".
|
|
166
|
+
if (parsed === null || parsed === undefined) return null;
|
|
167
|
+
return coerceIdentity(parsed, name);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/** Fields accepted by {@link updateIdentity}. */
|
|
171
|
+
export interface IdentityPatch {
|
|
172
|
+
/** Task id to append to recent history (also required to record a task). */
|
|
173
|
+
taskId?: string;
|
|
174
|
+
/** Human-readable summary of the completed task. */
|
|
175
|
+
summary?: string;
|
|
176
|
+
/** Expertise domains to merge into the CV (deduplicated). */
|
|
177
|
+
domains?: string[];
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
/**
|
|
181
|
+
* Apply a patch to an agent's identity and persist it.
|
|
182
|
+
*
|
|
183
|
+
* Semantics (each is independent):
|
|
184
|
+
* - `sessionsCompleted` is always incremented by one (an update marks the end of
|
|
185
|
+
* a session of work).
|
|
186
|
+
* - `domains` are merged into `expertiseDomains`, preserving first-seen order and
|
|
187
|
+
* keeping each domain unique.
|
|
188
|
+
* - A task is appended to `recentTasks` **only if `taskId` is provided**; the
|
|
189
|
+
* list is then capped at {@link MAX_RECENT_TASKS}, dropping the oldest entries.
|
|
190
|
+
* `completedAt` is stamped now (ISO-8601).
|
|
191
|
+
*
|
|
192
|
+
* If the agent has no identity yet, one is created on the fly (with an empty
|
|
193
|
+
* capability) so callers never have to pre-create before recording.
|
|
194
|
+
*
|
|
195
|
+
* @throws ConfigError if an existing identity file is unreadable/invalid.
|
|
196
|
+
*/
|
|
197
|
+
export function updateIdentity(root: string, name: string, patch: IdentityPatch): AgentIdentity {
|
|
198
|
+
const identity = loadIdentity(root, name) ?? createIdentity(root, name, "");
|
|
199
|
+
|
|
200
|
+
// A patch represents one completed session of work.
|
|
201
|
+
identity.sessionsCompleted += 1;
|
|
202
|
+
|
|
203
|
+
// Merge domains, preserving order and uniqueness.
|
|
204
|
+
if (patch.domains !== undefined && patch.domains.length > 0) {
|
|
205
|
+
const seen = new Set(identity.expertiseDomains);
|
|
206
|
+
for (const domain of patch.domains) {
|
|
207
|
+
if (domain !== "" && !seen.has(domain)) {
|
|
208
|
+
seen.add(domain);
|
|
209
|
+
identity.expertiseDomains.push(domain);
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Append a task only when a taskId is given (summary is optional).
|
|
215
|
+
if (patch.taskId !== undefined) {
|
|
216
|
+
identity.recentTasks.push({
|
|
217
|
+
taskId: patch.taskId,
|
|
218
|
+
summary: patch.summary ?? "",
|
|
219
|
+
completedAt: new Date().toISOString(),
|
|
220
|
+
});
|
|
221
|
+
if (identity.recentTasks.length > MAX_RECENT_TASKS) {
|
|
222
|
+
// Drop oldest from the front; keep the newest MAX_RECENT_TASKS.
|
|
223
|
+
identity.recentTasks = identity.recentTasks.slice(-MAX_RECENT_TASKS);
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
writeIdentity(root, identity);
|
|
228
|
+
return identity;
|
|
229
|
+
}
|