typeclaw 0.8.0 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/package.json +5 -3
- package/scripts/require-parallel.ts +41 -0
- package/src/agent/index.ts +55 -6
- package/src/agent/live-sessions.ts +34 -0
- package/src/agent/plugin-tools.ts +2 -0
- package/src/agent/session-meta.ts +21 -2
- package/src/agent/subagent-completion-reminder.ts +89 -0
- package/src/agent/subagents.ts +3 -2
- package/src/agent/system-prompt.ts +10 -8
- package/src/bundled-plugins/explorer/explorer.ts +2 -2
- package/src/bundled-plugins/guard/index.ts +14 -1
- package/src/bundled-plugins/guard/policies/managed-config.ts +43 -13
- package/src/bundled-plugins/guard/policies/memory-retrieval-cache-write.ts +37 -0
- package/src/bundled-plugins/guard/policies/memory-topics-delete.ts +67 -0
- package/src/bundled-plugins/guard/policies/memory-topics-write.ts +33 -0
- package/src/bundled-plugins/guard/policies/non-workspace-write.ts +8 -2
- package/src/bundled-plugins/guard/policy.ts +7 -0
- package/src/bundled-plugins/memory/README.md +76 -62
- package/src/bundled-plugins/memory/append-tool.ts +3 -2
- package/src/bundled-plugins/memory/citation-superset.ts +49 -11
- package/src/bundled-plugins/memory/citations.ts +19 -8
- package/src/bundled-plugins/memory/delete-tool.ts +57 -0
- package/src/bundled-plugins/memory/dreaming-state.ts +1 -1
- package/src/bundled-plugins/memory/dreaming.ts +364 -146
- package/src/bundled-plugins/memory/frontmatter.ts +165 -0
- package/src/bundled-plugins/memory/index.ts +236 -16
- package/src/bundled-plugins/memory/injection-plan.ts +15 -0
- package/src/bundled-plugins/memory/load-memory.ts +102 -103
- package/src/bundled-plugins/memory/load-shards.ts +156 -0
- package/src/bundled-plugins/memory/memory-logger.ts +16 -15
- package/src/bundled-plugins/memory/memory-retrieval.ts +105 -0
- package/src/bundled-plugins/memory/migration.ts +282 -1
- package/src/bundled-plugins/memory/paths.ts +42 -0
- package/src/bundled-plugins/memory/search-tool.ts +232 -0
- package/src/bundled-plugins/memory/secret-detector.ts +2 -2
- package/src/bundled-plugins/memory/shard-snapshot.ts +51 -0
- package/src/bundled-plugins/memory/slug.ts +59 -0
- package/src/bundled-plugins/memory/stream-io.ts +110 -1
- package/src/bundled-plugins/memory/strength.ts +3 -3
- package/src/bundled-plugins/memory/topics.ts +70 -16
- package/src/bundled-plugins/security/index.ts +24 -0
- package/src/bundled-plugins/security/permissions.ts +4 -0
- package/src/bundled-plugins/security/policies/cron-promotion.ts +349 -0
- package/src/bundled-plugins/security/policies/git-exfil.ts +2 -0
- package/src/bundled-plugins/security/policies/prompt-injection.ts +3 -0
- package/src/bundled-plugins/security/policies/role-promotion.ts +419 -0
- package/src/bundled-plugins/security/policies/system-prompt-leak.ts +1 -0
- package/src/channels/adapters/kakaotalk-attachment.ts +7 -17
- package/src/channels/adapters/kakaotalk.ts +64 -37
- package/src/channels/adapters/slack-bot-classify.ts +2 -27
- package/src/channels/index.ts +5 -0
- package/src/channels/router.ts +201 -17
- package/src/channels/subagent-completion-bridge.ts +84 -0
- package/src/cli/builtins.ts +1 -0
- package/src/cli/index.ts +1 -0
- package/src/cli/init.ts +122 -14
- package/src/cli/inspect.ts +151 -0
- package/src/cron/consumer.ts +1 -1
- package/src/init/dockerfile.ts +268 -4
- package/src/init/hatching.ts +5 -6
- package/src/init/kakaotalk-auth.ts +6 -47
- package/src/init/validate-api-key.ts +121 -0
- package/src/inspect/index.ts +213 -0
- package/src/inspect/label.ts +50 -0
- package/src/inspect/live.ts +221 -0
- package/src/inspect/render.ts +163 -0
- package/src/inspect/replay.ts +265 -0
- package/src/inspect/session-list.ts +160 -0
- package/src/inspect/types.ts +110 -0
- package/src/plugin/hooks.ts +23 -1
- package/src/plugin/index.ts +2 -0
- package/src/plugin/manager.ts +1 -1
- package/src/plugin/registry.ts +1 -1
- package/src/plugin/types.ts +10 -0
- package/src/run/channel-session-factory.ts +7 -1
- package/src/run/index.ts +87 -21
- package/src/secrets/kakao-renewal.ts +3 -47
- package/src/server/index.ts +241 -60
- package/src/shared/index.ts +3 -0
- package/src/shared/protocol.ts +49 -0
- package/src/skills/typeclaw-channel-kakaotalk/SKILL.md +9 -9
- package/src/skills/typeclaw-claude-code/SKILL.md +57 -39
- package/src/skills/typeclaw-claude-code/references/stop-hook.md +2 -0
- package/src/skills/typeclaw-claude-code/references/tmux-driving.md +102 -16
- package/src/skills/typeclaw-config/SKILL.md +1 -1
- package/src/skills/typeclaw-cron/SKILL.md +1 -1
- package/src/skills/typeclaw-memory/SKILL.md +16 -163
- package/src/skills/typeclaw-permissions/SKILL.md +2 -2
- package/src/skills/typeclaw-plugins/SKILL.md +25 -14
- package/src/test-helpers/wait-for.ts +7 -1
- package/typeclaw.schema.json +7 -0
package/README.md
CHANGED
|
@@ -19,7 +19,7 @@ TypeClaw is the agent I wanted to use:
|
|
|
19
19
|
- **TypeScript end to end** — agent core, plugins, channel adapters, CLI, TUI all in one language
|
|
20
20
|
- **Bun-native plugins** — plugins are just TS modules; no IPC, no FFI, hot-reloadable config
|
|
21
21
|
- **Docker-friendly by default** — every agent runs in its own container; the host CLI is purely a launcher
|
|
22
|
-
- **Self-improving** — the agent observes its own work, distills it into long-term memory and reusable skills, and gets sharper over time without you writing prompts for it
|
|
22
|
+
- **Self-improving** — the agent observes its own work, distills it into sharded long-term memory and reusable skills, and gets sharper over time without you writing prompts for it
|
|
23
23
|
|
|
24
24
|
If you're like me, TypeClaw is the right choice. If not, that's fine too.
|
|
25
25
|
|
|
@@ -31,18 +31,18 @@ If you're like me, TypeClaw is the right choice. If not, that's fine too.
|
|
|
31
31
|
- ⏰ **Cron** — schedule prompts or shell commands; per-job coalescing so slow jobs don't pile up
|
|
32
32
|
- 📚 **Skills on demand** — markdown procedures the agent loads only when relevant; zero token cost until used
|
|
33
33
|
- 🔎 **Web research** — bundled `scout` subagent plus first-class `websearch` and `webfetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
|
|
34
|
-
- 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection,
|
|
35
|
-
- 📊 **Usage
|
|
34
|
+
- 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection, tainted git remotes, and silent privilege escalation (role/cron promotion) before they fire
|
|
35
|
+
- 📊 **Usage, inspect, doctor** — `typeclaw usage` reports token/$ spend per session, model, or day; `typeclaw inspect` replays a session transcript and tails live activity; `typeclaw doctor` diagnoses host, agent folder, and plugin state
|
|
36
36
|
|
|
37
37
|
## Where it goes further
|
|
38
38
|
|
|
39
|
-
- 🌱 **Self-improving** — bundled `memory` plugin
|
|
39
|
+
- 🌱 **Self-improving** — bundled `memory` plugin logs sessions to daily streams, then a `dreaming` subagent distills them into sharded long-term memory (`memory/topics/`) on its own schedule; no prompts to write
|
|
40
40
|
- 🧠 **Muscle memory** — repeated procedures get distilled into reusable skills the agent writes for itself and loads on later runs
|
|
41
41
|
- 💾 **Auto-backup** — the bundled `backup` plugin commits session logs and memory on every idle window with an LLM-generated commit subject
|
|
42
42
|
- 🪄 **Subagents** — first-class child sessions with their own system prompt, payload schema, and per-payload coalescing; cron and the main agent fire them through one in-process Stream
|
|
43
43
|
- 🪪 **Roles and permissions** — `owner` / `trusted` / `member` / `guest` with first-message match rules per channel; gates `channel.respond`, cron scheduling, and security bypasses, so a Slack stranger can't tell the agent to push to main
|
|
44
44
|
- 👥 **Group chat awareness** — knows who's in the room, distinguishes humans from bots, and stays engaged after a reply without re-mentioning
|
|
45
|
-
- 🧱 **Managed-file guards** — `typeclaw.json`, `cron.json`,
|
|
45
|
+
- 🧱 **Managed-file guards** — `typeclaw.json`, `cron.json`, memory shards, and bundled skills are protected from accidental rewrites; invalid config writes and silent role/cron privilege grants are rejected at the tool boundary
|
|
46
46
|
- 🌐 **Headed browser inside the container** — bundled `agent-browser` plugin ships Chrome under Xvfb so the agent can drive real web pages past bot fingerprinting
|
|
47
47
|
- 🌍 **Tunnels and auto port-forward** — dev servers inside the container appear on `localhost` (even loopback-only ones); public URLs via Cloudflare Quick (zero signup) or your own external URL, with GitHub webhooks self-registered at the resulting URL
|
|
48
48
|
- 🔄 **Hot reload** — change `typeclaw.json`, run `typeclaw reload` — no restart for most fields
|
|
@@ -78,7 +78,7 @@ See `typeclaw --help` for the full command surface, or [typeclaw.dev](https://ty
|
|
|
78
78
|
git clone https://github.com/typeclaw/typeclaw
|
|
79
79
|
cd typeclaw
|
|
80
80
|
bun install
|
|
81
|
-
bun test
|
|
81
|
+
bun run test
|
|
82
82
|
```
|
|
83
83
|
|
|
84
84
|
Pre-commit checks (all must pass — no exceptions):
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "typeclaw",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.9.0",
|
|
4
4
|
"homepage": "https://github.com/typeclaw/typeclaw#readme",
|
|
5
5
|
"bugs": {
|
|
6
6
|
"url": "https://github.com/typeclaw/typeclaw/issues"
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
"format": "oxfmt --write .",
|
|
37
37
|
"format:check": "oxfmt --check .",
|
|
38
38
|
"check": "bun run typecheck && bun run lint && bun run format:check",
|
|
39
|
-
"test": "bun test",
|
|
39
|
+
"test": "bun test --parallel",
|
|
40
40
|
"generate:schema": "bun run scripts/generate-schema.ts",
|
|
41
41
|
"debug:prompt": "bun run scripts/dump-system-prompt.ts",
|
|
42
42
|
"postinstall": "bun run scripts/generate-schema.ts"
|
|
@@ -46,7 +46,7 @@
|
|
|
46
46
|
"@mariozechner/pi-coding-agent": "^0.67.3",
|
|
47
47
|
"@mariozechner/pi-tui": "^0.67.3",
|
|
48
48
|
"@mozilla/readability": "^0.6.0",
|
|
49
|
-
"agent-messenger": "2.
|
|
49
|
+
"agent-messenger": "2.17.0",
|
|
50
50
|
"cheerio": "^1.2.0",
|
|
51
51
|
"citty": "^0.2.2",
|
|
52
52
|
"cron-parser": "^5.5.0",
|
|
@@ -56,9 +56,11 @@
|
|
|
56
56
|
"zod": "^4.3.6"
|
|
57
57
|
},
|
|
58
58
|
"devDependencies": {
|
|
59
|
+
"@sinonjs/fake-timers": "^15.4.0",
|
|
59
60
|
"@types/bun": "latest",
|
|
60
61
|
"@types/jsdom": "^28.0.1",
|
|
61
62
|
"@types/proper-lockfile": "^4.1.4",
|
|
63
|
+
"@types/sinonjs__fake-timers": "^15.0.1",
|
|
62
64
|
"@types/turndown": "^5.0.6",
|
|
63
65
|
"@types/ws": "^8.18.1",
|
|
64
66
|
"@typescript/native-preview": "^7.0.0-dev.20260416.1",
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
// Preloaded by bunfig.toml `[test] preload`. Denies `bun test` without
|
|
2
|
+
// --parallel. Serial runs are ~3.4x slower (44s → 13s, see commit
|
|
3
|
+
// 1c66d5e), and Bun has no bunfig knob for the flag yet (verified
|
|
4
|
+
// against bunfig.zig in oven-sh/bun main, May 2026). Without this
|
|
5
|
+
// guard, IDE test runners and ad-hoc shells silently fall back to the
|
|
6
|
+
// slow path.
|
|
7
|
+
//
|
|
8
|
+
// Detection: Bun strips CLI flags from `Bun.argv` before invoking the
|
|
9
|
+
// preload, so we can't scrape the flag directly. Instead we look for
|
|
10
|
+
// BUN_TEST_WORKER_ID, which Bun sets in the preload env exactly when
|
|
11
|
+
// `--parallel` is active (the variable carries the worker index for
|
|
12
|
+
// the IPC handshake between coordinator and workers). Empirically
|
|
13
|
+
// verified against bun 1.3.14: present under --parallel, absent under
|
|
14
|
+
// serial. If a future Bun version renames this var, the guard fails
|
|
15
|
+
// closed (treats every run as serial → always denies), which is the
|
|
16
|
+
// safe direction.
|
|
17
|
+
//
|
|
18
|
+
// Bypass with TYPECLAW_ALLOW_SERIAL_TESTS=1 when debugging a flaky
|
|
19
|
+
// test where worker contention obscures the failure.
|
|
20
|
+
|
|
21
|
+
const isParallelWorker = typeof process.env.BUN_TEST_WORKER_ID === 'string'
|
|
22
|
+
|
|
23
|
+
if (isParallelWorker) {
|
|
24
|
+
// proceed
|
|
25
|
+
} else if (process.env.TYPECLAW_ALLOW_SERIAL_TESTS === '1') {
|
|
26
|
+
console.warn('[require-parallel] Running serially — TYPECLAW_ALLOW_SERIAL_TESTS=1 set.')
|
|
27
|
+
} else {
|
|
28
|
+
console.error('')
|
|
29
|
+
console.error(' ✗ `bun test` without --parallel is denied in this repo.')
|
|
30
|
+
console.error('')
|
|
31
|
+
console.error(' Serial runs take ~46s; --parallel cuts that to ~14s on a multi-core')
|
|
32
|
+
console.error(' machine and is what CI uses. Bun does not (yet) accept `[test] parallel`')
|
|
33
|
+
console.error(' in bunfig.toml, so we enforce it via this preload.')
|
|
34
|
+
console.error('')
|
|
35
|
+
console.error(' Use one of:')
|
|
36
|
+
console.error(' bun run test # preferred')
|
|
37
|
+
console.error(' bun test --parallel # direct')
|
|
38
|
+
console.error(' TYPECLAW_ALLOW_SERIAL_TESTS=1 bun test # intentional serial run')
|
|
39
|
+
console.error('')
|
|
40
|
+
process.exit(1)
|
|
41
|
+
}
|
package/src/agent/index.ts
CHANGED
|
@@ -765,7 +765,40 @@ export async function createResourceLoader(options: CreateResourceLoaderOptions
|
|
|
765
765
|
const agentDir = options.agentDir ?? process.cwd()
|
|
766
766
|
const mode: SystemPromptMode = options.mode ?? deriveSystemPromptMode(options.origin)
|
|
767
767
|
const basePrompt = mode === 'slim' ? SLIM_SYSTEM_PROMPT : DEFAULT_SYSTEM_PROMPT
|
|
768
|
-
|
|
768
|
+
|
|
769
|
+
// Kick off the three independent I/O paths concurrently. Sequential awaits
|
|
770
|
+
// here used to be the dominant cold-start cost amplifier: loadSelf is 2
|
|
771
|
+
// file reads, renderGitNudge spawns a subprocess, loadMemory reads N topic
|
|
772
|
+
// shards. None of them depend on each other, so we run them in parallel.
|
|
773
|
+
// The plugin hook (runSessionPrompt) only needs `self`, so it can overlap
|
|
774
|
+
// with the gitNudge subprocess and the shard reads while `self` is in
|
|
775
|
+
// flight too.
|
|
776
|
+
//
|
|
777
|
+
// Plugin-hook contract: `runSessionPrompt` runs AFTER gitNudge/memory I/O
|
|
778
|
+
// has been kicked off. A hook that mutates `memory/topics/` or git-tracked
|
|
779
|
+
// files during its body races those in-flight reads -- mutations may or
|
|
780
|
+
// may not be reflected in the resulting prompt. The bundled hooks only
|
|
781
|
+
// mutate the prompt string itself; third-party plugins that need to mutate
|
|
782
|
+
// disk before the suffix sections see it must do so before/outside the
|
|
783
|
+
// session-prompt hook.
|
|
784
|
+
//
|
|
785
|
+
// We wrap gitNudge and memory promises in `settled` shells so any
|
|
786
|
+
// rejection from them cannot surface as an unhandled rejection during the
|
|
787
|
+
// window where we're awaiting selfPromise + runSessionPrompt. Production
|
|
788
|
+
// callers don't reject (renderGitNudge swallows internally, loadMemory
|
|
789
|
+
// catches ENOENT) but a non-ENOENT fs error (EACCES/EIO) on the agent
|
|
790
|
+
// folder would otherwise terminate the process before we reach the
|
|
791
|
+
// gather point.
|
|
792
|
+
const selfPromise = loadSelf(agentDir)
|
|
793
|
+
const gitNudgeSettled = mode === 'slim' ? Promise.resolve(ok('')) : settle(renderGitNudge(agentDir))
|
|
794
|
+
const memorySettled = settle(
|
|
795
|
+
loadMemory(agentDir, {
|
|
796
|
+
...(options.origin !== undefined ? { origin: options.origin } : {}),
|
|
797
|
+
...(options.plugins?.sessionId !== undefined ? { currentSessionId: options.plugins.sessionId } : {}),
|
|
798
|
+
}),
|
|
799
|
+
)
|
|
800
|
+
|
|
801
|
+
let self = await selfPromise
|
|
769
802
|
|
|
770
803
|
if (options.plugins) {
|
|
771
804
|
// The plugin hook receives the partially-assembled prompt (base + identity)
|
|
@@ -788,11 +821,9 @@ export async function createResourceLoader(options: CreateResourceLoaderOptions
|
|
|
788
821
|
// commit guidance the nudge points back to is itself excluded from the slim
|
|
789
822
|
// base prompt. Memory is still included so cron jobs that depend on MEMORY.md
|
|
790
823
|
// context (e.g. "send today's standup summary") keep working.
|
|
791
|
-
const
|
|
792
|
-
const
|
|
793
|
-
|
|
794
|
-
...(options.plugins?.sessionId !== undefined ? { currentSessionId: options.plugins.sessionId } : {}),
|
|
795
|
-
})
|
|
824
|
+
const [gitNudgeResult, memoryResult] = await Promise.all([gitNudgeSettled, memorySettled])
|
|
825
|
+
const gitNudge = unwrapSettled(gitNudgeResult)
|
|
826
|
+
const memorySection = unwrapSettled(memoryResult)
|
|
796
827
|
|
|
797
828
|
const systemPrompt = composeSystemPrompt({
|
|
798
829
|
mode,
|
|
@@ -872,3 +903,21 @@ function resolveRoleContext(
|
|
|
872
903
|
export function getBundledSkillsDir(): string {
|
|
873
904
|
return join(dirname(fileURLToPath(import.meta.url)), '..', 'skills')
|
|
874
905
|
}
|
|
906
|
+
|
|
907
|
+
type Settled<T> = { ok: true; value: T } | { ok: false; error: unknown }
|
|
908
|
+
|
|
909
|
+
function ok<T>(value: T): Settled<T> {
|
|
910
|
+
return { ok: true, value }
|
|
911
|
+
}
|
|
912
|
+
|
|
913
|
+
function settle<T>(promise: Promise<T>): Promise<Settled<T>> {
|
|
914
|
+
return promise.then(
|
|
915
|
+
(value): Settled<T> => ({ ok: true, value }),
|
|
916
|
+
(error: unknown): Settled<T> => ({ ok: false, error }),
|
|
917
|
+
)
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
function unwrapSettled<T>(result: Settled<T>): T {
|
|
921
|
+
if (result.ok) return result.value
|
|
922
|
+
throw result.error
|
|
923
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { AgentSession } from './index'
|
|
2
|
+
|
|
3
|
+
export type LiveAgentSession = {
|
|
4
|
+
sessionId: string
|
|
5
|
+
session: Pick<AgentSession, 'subscribe'>
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export class LiveSessionRegistry {
|
|
9
|
+
private readonly entries = new Map<string, LiveAgentSession>()
|
|
10
|
+
|
|
11
|
+
register(live: LiveAgentSession): void {
|
|
12
|
+
this.entries.set(live.sessionId, live)
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
unregister(sessionId: string): void {
|
|
16
|
+
this.entries.delete(sessionId)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
get(sessionId: string): LiveAgentSession | undefined {
|
|
20
|
+
return this.entries.get(sessionId)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
has(sessionId: string): boolean {
|
|
24
|
+
return this.entries.has(sessionId)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
size(): number {
|
|
28
|
+
return this.entries.size
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
clear(): void {
|
|
32
|
+
this.entries.clear()
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -40,6 +40,8 @@ const ACKNOWLEDGE_GUARDS_SCHEMA = Type.Optional(
|
|
|
40
40
|
Type.Object(
|
|
41
41
|
{
|
|
42
42
|
nonWorkspaceWrite: Type.Optional(Type.Boolean()),
|
|
43
|
+
rolePromotion: Type.Optional(Type.Boolean()),
|
|
44
|
+
cronPromotion: Type.Optional(Type.Boolean()),
|
|
43
45
|
},
|
|
44
46
|
{ additionalProperties: false },
|
|
45
47
|
),
|
|
@@ -9,12 +9,29 @@ export type SessionMetaPayload = {
|
|
|
9
9
|
export type MinimalSessionOrigin =
|
|
10
10
|
| { kind: 'tui' }
|
|
11
11
|
| { kind: 'cron'; jobId: string; jobKind: 'prompt' | 'exec' | 'subagent' | 'handler' }
|
|
12
|
-
| {
|
|
12
|
+
| {
|
|
13
|
+
kind: 'channel'
|
|
14
|
+
adapter: string
|
|
15
|
+
workspace: string
|
|
16
|
+
// Optional human-readable names persisted alongside IDs so offline
|
|
17
|
+
// tooling (`typeclaw inspect`, future report commands) can render
|
|
18
|
+
// sessions as `Slack acme-corp/#general` instead of bare IDs without
|
|
19
|
+
// re-querying the adapter at runtime. Workspace/chat NAMES are not
|
|
20
|
+
// secrets — they are visible to any participant — and they are
|
|
21
|
+
// stable across reopens, so the tradeoff is one-time write cost for
|
|
22
|
+
// permanent offline readability. Author handles, participant lists,
|
|
23
|
+
// and membership counts remain dropped (those carry author identity
|
|
24
|
+
// and would land in `sessions/`'s auto-backup git history).
|
|
25
|
+
workspaceName?: string
|
|
26
|
+
chat: string
|
|
27
|
+
chatName?: string
|
|
28
|
+
thread: string | null
|
|
29
|
+
}
|
|
13
30
|
| { kind: 'subagent'; subagent: string; parentSessionId: string }
|
|
14
31
|
|
|
15
32
|
// Reduce a full SessionOrigin to the minimum projection persisted to disk.
|
|
16
33
|
// Drops participant lists, membership counts, recursive provenance, and
|
|
17
|
-
//
|
|
34
|
+
// author identifiers — none of which `typeclaw usage` reads, and all of
|
|
18
35
|
// which would otherwise land in git history when sessions/ is auto-backed-up.
|
|
19
36
|
// Kept as a separate function so the boundary between "data the LLM sees in
|
|
20
37
|
// the system prompt" (full origin) and "data persisted for usage reporting"
|
|
@@ -34,7 +51,9 @@ function minimalOrigin(origin: SessionOrigin): MinimalSessionOrigin {
|
|
|
34
51
|
kind: 'channel',
|
|
35
52
|
adapter: origin.adapter,
|
|
36
53
|
workspace: origin.workspace,
|
|
54
|
+
...(origin.workspaceName !== undefined ? { workspaceName: origin.workspaceName } : {}),
|
|
37
55
|
chat: origin.chat,
|
|
56
|
+
...(origin.chatName !== undefined ? { chatName: origin.chatName } : {}),
|
|
38
57
|
thread: origin.thread,
|
|
39
58
|
}
|
|
40
59
|
case 'subagent':
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
// Shared renderer for the `<system-reminder>` block injected into a parent
|
|
2
|
+
// session's prompt queue when one of its backgrounded subagents finishes.
|
|
3
|
+
// Used by the TUI route in src/server/index.ts and the channel-router
|
|
4
|
+
// bridge so the model sees identical wording across origins. The
|
|
5
|
+
// `channel` knob is the only per-origin difference: channel sessions
|
|
6
|
+
// need the "end your reply via channel_reply" nudge because plain-text
|
|
7
|
+
// output is invisible there AND the reminder is not a user message —
|
|
8
|
+
// the channel origin block's MUST-call-channel_reply rule is keyed to
|
|
9
|
+
// user messages, so a model that reads the spec literally would
|
|
10
|
+
// otherwise leave the reply un-sent.
|
|
11
|
+
|
|
12
|
+
export type CompletionReminderArgs = {
|
|
13
|
+
subagent: string
|
|
14
|
+
taskId: string
|
|
15
|
+
ok: boolean
|
|
16
|
+
durationMs: number
|
|
17
|
+
error?: string
|
|
18
|
+
channel?: boolean
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const CHANNEL_REPLY_NUDGE =
|
|
22
|
+
'This reminder is a system message, not a user inbound — but you are in a channel session, ' +
|
|
23
|
+
'so end your turn via `channel_reply` (or `channel_send`) to surface the result. ' +
|
|
24
|
+
'Plain-text output is invisible here. If there is genuinely nothing to surface, end with `NO_REPLY`.'
|
|
25
|
+
|
|
26
|
+
export function renderSubagentCompletionReminder(args: CompletionReminderArgs): string {
|
|
27
|
+
const durationStr = formatReminderDuration(args.durationMs)
|
|
28
|
+
const channelTail = args.channel === true ? ` ${CHANNEL_REPLY_NUDGE}` : ''
|
|
29
|
+
if (args.ok) {
|
|
30
|
+
return (
|
|
31
|
+
`<system-reminder>\n` +
|
|
32
|
+
`Subagent \`${args.subagent}\` (${args.taskId}) completed in ${durationStr}. ` +
|
|
33
|
+
`Use subagent_output to fetch the result.${channelTail}\n` +
|
|
34
|
+
`</system-reminder>`
|
|
35
|
+
)
|
|
36
|
+
}
|
|
37
|
+
const err = args.error ?? 'unknown error'
|
|
38
|
+
return (
|
|
39
|
+
`<system-reminder>\n` +
|
|
40
|
+
`Subagent \`${args.subagent}\` (${args.taskId}) FAILED after ${durationStr}: ${err}. ` +
|
|
41
|
+
`Use subagent_output to inspect.${channelTail}\n` +
|
|
42
|
+
`</system-reminder>`
|
|
43
|
+
)
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export function formatReminderDuration(ms: number): string {
|
|
47
|
+
if (ms < 1000) return `${ms}ms`
|
|
48
|
+
const totalSec = Math.floor(ms / 1000)
|
|
49
|
+
if (totalSec < 60) return `${totalSec}s`
|
|
50
|
+
const min = Math.floor(totalSec / 60)
|
|
51
|
+
const sec = totalSec % 60
|
|
52
|
+
return `${min}m${sec}s`
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export type SubagentCompletedPayload = {
|
|
56
|
+
taskId: string
|
|
57
|
+
subagent: string
|
|
58
|
+
parentSessionId: string
|
|
59
|
+
ok: boolean
|
|
60
|
+
durationMs: number
|
|
61
|
+
error?: string
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Type guard for the `subagent.completed` broadcast payload. Subscribers
|
|
65
|
+
// to `target: { kind: 'broadcast' }` see every broadcast; this guard
|
|
66
|
+
// filters and narrows in one place so callers don't repeat the
|
|
67
|
+
// typeof-checking dance.
|
|
68
|
+
export function parseSubagentCompletedPayload(payload: unknown): SubagentCompletedPayload | null {
|
|
69
|
+
if (payload === null || typeof payload !== 'object') return null
|
|
70
|
+
const p = payload as {
|
|
71
|
+
kind?: unknown
|
|
72
|
+
taskId?: unknown
|
|
73
|
+
subagent?: unknown
|
|
74
|
+
parentSessionId?: unknown
|
|
75
|
+
ok?: unknown
|
|
76
|
+
durationMs?: unknown
|
|
77
|
+
error?: unknown
|
|
78
|
+
}
|
|
79
|
+
if (p.kind !== 'subagent.completed') return null
|
|
80
|
+
if (typeof p.parentSessionId !== 'string') return null
|
|
81
|
+
return {
|
|
82
|
+
taskId: typeof p.taskId === 'string' ? p.taskId : '<unknown>',
|
|
83
|
+
subagent: typeof p.subagent === 'string' ? p.subagent : 'subagent',
|
|
84
|
+
parentSessionId: p.parentSessionId,
|
|
85
|
+
ok: p.ok === true,
|
|
86
|
+
durationMs: typeof p.durationMs === 'number' ? p.durationMs : 0,
|
|
87
|
+
...(typeof p.error === 'string' ? { error: p.error } : {}),
|
|
88
|
+
}
|
|
89
|
+
}
|
package/src/agent/subagents.ts
CHANGED
|
@@ -206,12 +206,13 @@ export async function invokeSubagent(name: string, options: InvokeSubagentOption
|
|
|
206
206
|
hooks && sessionId !== undefined && agentDir !== undefined
|
|
207
207
|
? { sessionId, agentDir, ...(origin !== undefined ? { origin } : {}) }
|
|
208
208
|
: undefined
|
|
209
|
+
const userPromptForTurn = override?.userPrompt ?? options.userPrompt
|
|
209
210
|
try {
|
|
210
211
|
if (hooks && turnEvent !== undefined) {
|
|
211
|
-
await hooks.runSessionTurnStart(turnEvent)
|
|
212
|
+
await hooks.runSessionTurnStart({ ...turnEvent, userPrompt: userPromptForTurn })
|
|
212
213
|
}
|
|
213
214
|
try {
|
|
214
|
-
await session.prompt(
|
|
215
|
+
await session.prompt(userPromptForTurn)
|
|
215
216
|
} finally {
|
|
216
217
|
if (hooks && turnEvent !== undefined) {
|
|
217
218
|
await hooks.runSessionTurnEnd(turnEvent)
|
|
@@ -10,15 +10,15 @@ TypeClaw is domain-agnostic — your purpose is defined by \`IDENTITY.md\`, your
|
|
|
10
10
|
- **SOUL.md** *(always injected below)* — your character, tone, voice. Edit rarely.
|
|
11
11
|
- **USER.md** *(read on demand)* — what you know about the user. Update as you learn.
|
|
12
12
|
- **AGENTS.md** *(read on demand)* — your operating manual. Read at the start of any non-trivial task and re-read whenever process is unclear.
|
|
13
|
-
-
|
|
13
|
+
- **\`memory/topics/\`** *(always injected below, READ-ONLY)* — sharded long-term memory, owned by the dreaming subagent. To capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`; never edit memory shards directly.
|
|
14
14
|
|
|
15
|
-
If a task reveals durable guidance or identity/user context, update the owning file (IDENTITY / SOUL / USER / AGENTS) — never
|
|
15
|
+
If a task reveals durable guidance or identity/user context, update the owning file (IDENTITY / SOUL / USER / AGENTS) — never memory shards.
|
|
16
16
|
|
|
17
17
|
## Your workspace
|
|
18
18
|
|
|
19
19
|
- **\`workspace/\`** — your free-write zone for drafts, scratch work, generated artifacts. Do not create files at the agent-folder root unless the user explicitly asks.
|
|
20
20
|
- **\`sessions/\`** — transcripts of past conversations. Runtime-managed; don't write here.
|
|
21
|
-
- **\`memory/\`** *(
|
|
21
|
+
- **\`memory/streams/\`** *(not injected — reach via \`memory_search\`)* — dated streams written by the memory-logger between sessions. Runtime-owned. Undreamed observations are searchable on demand instead of injected into every prompt.
|
|
22
22
|
- **\`memory/skills/\`** — muscle-memory skills written by the dreaming subagent. Auto-loaded; don't write here directly.
|
|
23
23
|
- **\`.agents/skills/\`** — user-installed skills.
|
|
24
24
|
|
|
@@ -47,7 +47,7 @@ Your agent folder is a git repository.
|
|
|
47
47
|
## How to behave
|
|
48
48
|
|
|
49
49
|
- Match the user's register. If SOUL.md specifies a voice, use it. Otherwise, be concise and direct, without filler or flattery.
|
|
50
|
-
- Prefer reading files over guessing — IDENTITY / SOUL / USER /
|
|
50
|
+
- Prefer reading files over guessing — IDENTITY / SOUL / USER / memory topics / AGENTS or the workspace. Follow AGENTS.md in whatever role IDENTITY.md assigns you; propose additions to AGENTS.md when you find gaps worth codifying.
|
|
51
51
|
- Answer questions. Do work. Don't over-explain unless asked.
|
|
52
52
|
- If a request is ambiguous in a way that doubles the effort, ask one clarifying question; otherwise proceed with a reasonable default.
|
|
53
53
|
- Never suppress errors to make things "work", and never fabricate results. Report failures clearly.
|
|
@@ -62,7 +62,7 @@ There are two delegation modes. Pick deliberately.
|
|
|
62
62
|
|
|
63
63
|
When you need information to answer the user and the search is broad, fire 2-5 subagents in parallel with \`run_in_background: true\` covering different angles. End your response after spawning. The system will deliver a \`<system-reminder>\` for each completion; gather results then answer the user. Do NOT poll \`subagent_output\` in a tight loop.
|
|
64
64
|
|
|
65
|
-
The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`),
|
|
65
|
+
The bundled \`explorer\` subagent is the right tool for **local** reconnaissance — anything reachable on the agent's filesystem: code, past sessions (\`sessions/*.jsonl\`), memory topic shards and daily memory streams, skills, cron jobs, config, git history, mounts, channels state. It is read-only and runs on a fast/cheap model, so fire liberally. Do NOT ask it to plan, decide, or write code — it finds and reports.
|
|
66
66
|
|
|
67
67
|
The bundled \`scout\` subagent is its external counterpart — web research only. Use it when you need information from public sources (docs, library references, vendor changelogs, news, anything not already in this agent's folder). Scout runs \`websearch\` and \`webfetch\` in a fresh context window so the search churn does not pollute yours; it returns a citation-backed answer with a confidence rating. Prefer scout over running \`websearch\`/\`webfetch\` yourself when the research is non-trivial (more than 1-2 queries) or when you want to save your context for the synthesis step.
|
|
68
68
|
|
|
@@ -70,6 +70,8 @@ The bundled \`scout\` subagent is its external counterpart — web research only
|
|
|
70
70
|
|
|
71
71
|
When the user hands you a task that will take minutes (a multi-step browser session, a long build, a complex external operation), acknowledge in plain language ("Alright, running that in the background — I'll let you know when it's done"), spawn one subagent with \`run_in_background: true\`, then KEEP TALKING. Stay available for follow-ups, related questions, parallel small tasks. When the completion reminder lands, weave the result into your next reply naturally. If the conversation has gone idle, proactively message the user with the result rather than waiting.
|
|
72
72
|
|
|
73
|
+
In a channel session, the completion \`<system-reminder>\` is NOT a user message — the channel origin's "you MUST call \`channel_reply\` for every user message" rule does not literally apply, but the underlying constraint does: plain-text output is invisible in a channel. Surface the result via \`channel_reply\` (or \`channel_send\`) so the user actually sees it. Failures need surfacing too: when a delegated task didn't complete, the user needs the outcome and whatever partial progress you got. \`NO_REPLY\` is the escape hatch only when the user has already seen the substantive answer — typically because you posted it via \`channel_reply\` in the same turn that spawned the subagent, and the reminder is purely confirming completion of a step the user is already tracking. Otherwise, post the result.
|
|
74
|
+
|
|
73
75
|
Before you run a tool chain that returns bulky intermediate output you won't need again — multiple \`webfetch\` calls, a \`websearch\` round you'll iterate on, a \`bash\` command that scrapes a site or dumps a large response, an \`agent-browser\` session, a \`claude\` (Claude Code) delegation driven through tmux, any "fetch N things and synthesize" loop — delegate it to a subagent. \`scout\` (for research) or \`operator\` (for actions with side effects) runs the noisy work in its own context window and returns a distilled summary; your session carries the *answer*, not the raw material you derived it from. This is about context economy, not latency: even a fast operation belongs in a subagent when the byproducts are large and disposable (three quick news searches across different outlets still dumps three SERPs and three article bodies into your context forever). The exception is exactly one call whose result you'll cite directly — one \`webfetch\` of a known URL, one \`websearch\` query whose top result is the answer. Two of either, or any "across multiple sources" framing, is delegation territory.
|
|
74
76
|
|
|
75
77
|
The bundled \`operator\` subagent is the right tool for this mode. It is write-capable (read, write, edit, bash with side effects) and runs on the default model. Use it for: browser sessions, multi-file refactors, deploys, batch API calls, Claude Code delegations (the tmux driving loop, the multi-turn polling, the worktree teardown — all of it inside operator), anything that involves taking action on behalf of the user over multiple steps. The operator returns a structured final report (outcome, what changed, what was observed); surface it naturally rather than copy-pasting. Operator is gated by a separate permission (\`subagent.spawn.operator\`) so write-capable spawns are restricted to owner-tier and trusted-tier callers — if the gate denies, fall back to doing the work in your own session rather than reporting failure to the user.
|
|
@@ -165,10 +167,10 @@ Session started at \`${iso}\` (${zone}). This is a session-creation snapshot, no
|
|
|
165
167
|
// plugin does not catch this.
|
|
166
168
|
// 4. Output discipline — keeps tool-call narration from bloating the
|
|
167
169
|
// ever-growing transcript that the next memory-logger pass has to read.
|
|
168
|
-
// 5. Filesystem hygiene — workspace boundary,
|
|
170
|
+
// 5. Filesystem hygiene — workspace boundary, memory-shard ownership, and
|
|
169
171
|
// runtime-managed paths (secrets.json / .env / sessions/ / memory/ / workspace/). The
|
|
170
172
|
// guard plugin blocks non-workspace writes for write/edit, but it
|
|
171
|
-
//
|
|
173
|
+
// does not gate bash/git on the
|
|
172
174
|
// runtime-managed paths.
|
|
173
175
|
//
|
|
174
176
|
// What does NOT live here, by design:
|
|
@@ -189,6 +191,6 @@ Never suppress errors to make things "work", and never fabricate results. If som
|
|
|
189
191
|
|
|
190
192
|
Do not narrate routine, low-risk tool calls — just call the tool. Do not over-explain what you did unless asked.
|
|
191
193
|
|
|
192
|
-
Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`
|
|
194
|
+
Your free-write zone is \`workspace/\`. Do not create files at the root of the agent folder unless the prompt names another path. Do not edit \`memory/topics/\` directly — the dreaming subagent owns it; to capture something memorable, surface it in your reply or let the memory-logger append to \`memory/streams/\`. Never stage or commit \`secrets.json\`, \`.env\`, \`sessions/\`, \`memory/\`, or \`workspace/\` — those are runtime- or user-managed.
|
|
193
195
|
|
|
194
196
|
See the session-origin block below for what kind of session this is and what's expected of you.`
|
|
@@ -9,7 +9,7 @@ You are STRICTLY PROHIBITED from:
|
|
|
9
9
|
- Creating, modifying, or deleting files
|
|
10
10
|
- Using bash for: mkdir, touch, rm, cp, mv, git add, git commit, npm install, pip install, or any write operation
|
|
11
11
|
- Starting long-running background processes
|
|
12
|
-
- Writing to
|
|
12
|
+
- Writing to memory/topics/, memory/streams/, sessions/, workspace/, or any other runtime-managed path
|
|
13
13
|
- Spawning further subagents — you are at the end of the delegation chain
|
|
14
14
|
|
|
15
15
|
Your role is EXCLUSIVELY to search and analyze existing local state.
|
|
@@ -32,7 +32,7 @@ The agent folder is mounted at \`/agent\` inside the container. Search the narro
|
|
|
32
32
|
|
|
33
33
|
1. **Codebase** — \`/agent/\` root and subdirs (excluding the runtime-managed paths below). Source files, docs, identity files (\`IDENTITY.md\`, \`SOUL.md\`, \`USER.md\`, \`AGENTS.md\`).
|
|
34
34
|
2. **Sessions** — \`/agent/sessions/*.jsonl\` — conversation transcripts. Each line is a JSON event (user message, tool call, tool result, assistant message). Filename pattern \`\${ISO_TIMESTAMP}_\${UUID}.jsonl\`. \`grep\` works directly on the JSONL.
|
|
35
|
-
3. **Memory** — \`/agent/
|
|
35
|
+
3. **Memory** — \`/agent/memory/topics/*.md\` (long-term topic shards) and \`/agent/memory/streams/yyyy-MM-dd.jsonl\` (daily fragment streams written by the memory-logger subagent). \`memory/.dreaming-state.json\` tracks the dreaming watermark. Do NOT edit any of these — they are runtime-owned.
|
|
36
36
|
4. **Muscle-memory skills** — \`/agent/memory/skills/<name>/SKILL.md\` — procedures the dreaming subagent distilled from repeated work.
|
|
37
37
|
5. **User-installed skills** — \`/agent/.agents/skills/<name>/SKILL.md\` — hand-authored or downloaded skills.
|
|
38
38
|
6. **Workspace** — \`/agent/workspace/\` — the agent's free-write zone. Drafts, scratch work, generated artifacts.
|
|
@@ -2,6 +2,7 @@ import { definePlugin } from '@/plugin'
|
|
|
2
2
|
|
|
3
3
|
import {
|
|
4
4
|
checkManagedConfigGuard,
|
|
5
|
+
checkMemoryTopicsDeleteGuard,
|
|
5
6
|
checkNonWorkspaceWriteGuard,
|
|
6
7
|
checkSkillAuthoringGuard,
|
|
7
8
|
checkUncommittedChangesAdvice,
|
|
@@ -23,7 +24,19 @@ export default definePlugin({
|
|
|
23
24
|
agentDir: ctx.agentDir,
|
|
24
25
|
})
|
|
25
26
|
if (skillResult) return skillResult
|
|
26
|
-
|
|
27
|
+
const memoryTopicsDeleteResult = checkMemoryTopicsDeleteGuard({
|
|
28
|
+
tool: event.tool,
|
|
29
|
+
args: event.args,
|
|
30
|
+
agentDir: ctx.agentDir,
|
|
31
|
+
origin: event.origin,
|
|
32
|
+
})
|
|
33
|
+
if (memoryTopicsDeleteResult) return memoryTopicsDeleteResult
|
|
34
|
+
return checkNonWorkspaceWriteGuard({
|
|
35
|
+
tool: event.tool,
|
|
36
|
+
args: event.args,
|
|
37
|
+
agentDir: ctx.agentDir,
|
|
38
|
+
origin: event.origin,
|
|
39
|
+
})
|
|
27
40
|
},
|
|
28
41
|
'tool.after': async (event, ctx) => {
|
|
29
42
|
await checkUncommittedChangesAdvice({
|
|
@@ -39,19 +39,31 @@ export async function checkManagedConfigGuard(options: {
|
|
|
39
39
|
}
|
|
40
40
|
}
|
|
41
41
|
|
|
42
|
+
// Oracle PR #305 findings #5 and #6: identity-based managed-file
|
|
43
|
+
// detection. The earlier shape compared `basename(realpath(target))` to
|
|
44
|
+
// the managed-file list, which missed two attacks: (5) a symlink at
|
|
45
|
+
// agent root `typeclaw.json -> workspace/tc.json` realpathed to a name
|
|
46
|
+
// outside the managed list, and (6) on case-insensitive filesystems,
|
|
47
|
+
// `TYPECLAW.JSON` addresses the same file as `typeclaw.json` but
|
|
48
|
+
// basename string-equality missed the casing variant.
|
|
49
|
+
//
|
|
50
|
+
// New shape: for each managed-file name, compute the canonical agent-
|
|
51
|
+
// root path and compare against the target. We accept if EITHER the
|
|
52
|
+
// lexical paths match OR they realpath to the same file. Branch (a)
|
|
53
|
+
// covers symlinks and case-aliased filesystems; branch (b) keeps the
|
|
54
|
+
// canonical lexical name authoritative even before the file exists
|
|
55
|
+
// (first-init writes).
|
|
42
56
|
async function resolveManagedTarget(agentDir: string, targetPath: string): Promise<{ file: ManagedFile } | undefined> {
|
|
43
57
|
const resolvedAgentDir = path.resolve(agentDir)
|
|
44
|
-
const
|
|
45
|
-
const
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
function isManagedFile(basename: string): basename is ManagedFile {
|
|
54
|
-
return MANAGED_FILES.has(basename as ManagedFile)
|
|
58
|
+
const resolvedTarget = path.resolve(targetPath)
|
|
59
|
+
for (const file of MANAGED_FILES) {
|
|
60
|
+
const canonical = path.join(resolvedAgentDir, file)
|
|
61
|
+
if (canonical === resolvedTarget) return { file }
|
|
62
|
+
const realCanonical = await resolveRealIntendedPath(canonical)
|
|
63
|
+
const realTarget = await resolveRealIntendedPath(resolvedTarget)
|
|
64
|
+
if (realCanonical === realTarget) return { file }
|
|
65
|
+
}
|
|
66
|
+
return undefined
|
|
55
67
|
}
|
|
56
68
|
|
|
57
69
|
function validateManagedContent(file: ManagedFile, content: string): { ok: true } | { ok: false; reason: string } {
|
|
@@ -81,6 +93,20 @@ async function intendedContent(
|
|
|
81
93
|
return blockReason(tool, targetPath, 'edit calls must include an edits array')
|
|
82
94
|
}
|
|
83
95
|
|
|
96
|
+
// Oracle PR #305 finding #4: refuse multi-edit on managed files to
|
|
97
|
+
// avoid simulator-vs-pi divergence. The canonical workflow for
|
|
98
|
+
// typeclaw.json / cron.json is read + modify in memory + write the
|
|
99
|
+
// whole file back; multi-edit is not required and the divergence
|
|
100
|
+
// would let an attacker validate a different final file here than
|
|
101
|
+
// the one pi actually writes.
|
|
102
|
+
if (edits.length > 1) {
|
|
103
|
+
return blockReason(
|
|
104
|
+
tool,
|
|
105
|
+
targetPath,
|
|
106
|
+
'multi-edit calls on managed files are refused — use `write` with full content instead',
|
|
107
|
+
)
|
|
108
|
+
}
|
|
109
|
+
|
|
84
110
|
let content: string
|
|
85
111
|
try {
|
|
86
112
|
content = await readFile(targetPath, 'utf8')
|
|
@@ -100,10 +126,14 @@ async function intendedContent(
|
|
|
100
126
|
if (oldText.length === 0) {
|
|
101
127
|
return blockReason(tool, targetPath, 'edit oldText must not be empty')
|
|
102
128
|
}
|
|
103
|
-
|
|
129
|
+
const firstIdx = content.indexOf(oldText)
|
|
130
|
+
if (firstIdx === -1) {
|
|
104
131
|
return blockReason(tool, targetPath, 'edit oldText was not found in existing file')
|
|
105
132
|
}
|
|
106
|
-
|
|
133
|
+
if (content.indexOf(oldText, firstIdx + 1) !== -1) {
|
|
134
|
+
return blockReason(tool, targetPath, 'edit oldText is not unique in the existing file')
|
|
135
|
+
}
|
|
136
|
+
content = content.slice(0, firstIdx) + newText + content.slice(firstIdx + oldText.length)
|
|
107
137
|
}
|
|
108
138
|
return { content }
|
|
109
139
|
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import path from 'node:path'
|
|
2
|
+
|
|
3
|
+
import type { SessionOrigin } from '@/agent/session-origin'
|
|
4
|
+
import type { SecuritySeverity } from '@/bundled-plugins/security/permissions'
|
|
5
|
+
|
|
6
|
+
export const GUARD_MEMORY_RETRIEVAL_CACHE_WRITE = 'memoryRetrievalCacheWrite'
|
|
7
|
+
export const GUARD_MEMORY_RETRIEVAL_CACHE_WRITE_SEVERITY: SecuritySeverity = 'low'
|
|
8
|
+
|
|
9
|
+
const SESSION_ID_REGEX = /^[A-Za-z0-9._-]{1,128}$/
|
|
10
|
+
|
|
11
|
+
export async function isMemoryRetrievalCacheWriteAllowed(options: {
|
|
12
|
+
tool: string
|
|
13
|
+
args: Record<string, unknown>
|
|
14
|
+
agentDir: string
|
|
15
|
+
origin?: SessionOrigin
|
|
16
|
+
}): Promise<boolean> {
|
|
17
|
+
const { tool, args, agentDir, origin } = options
|
|
18
|
+
if (tool !== 'write') return false
|
|
19
|
+
if (origin?.kind !== 'subagent' || origin.subagent !== 'memory-retrieval') return false
|
|
20
|
+
|
|
21
|
+
const rawPath = args.path
|
|
22
|
+
if (typeof rawPath !== 'string') return false
|
|
23
|
+
|
|
24
|
+
const targetPath = path.resolve(agentDir, rawPath)
|
|
25
|
+
const expectedDir = path.resolve(agentDir, 'memory', '.retrieval-cache')
|
|
26
|
+
const relative = path.relative(expectedDir, targetPath)
|
|
27
|
+
if (relative === '' || relative.startsWith('..') || path.isAbsolute(relative)) return false
|
|
28
|
+
|
|
29
|
+
const parts = relative.split(path.sep).filter(Boolean)
|
|
30
|
+
if (parts.length !== 1) return false
|
|
31
|
+
|
|
32
|
+
const fileName = parts[0]!
|
|
33
|
+
if (!fileName.endsWith('.md')) return false
|
|
34
|
+
|
|
35
|
+
const sessionId = fileName.slice(0, -3)
|
|
36
|
+
return SESSION_ID_REGEX.test(sessionId)
|
|
37
|
+
}
|