typeclaw 0.37.3 → 0.37.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/README.md +69 -46
  2. package/package.json +1 -1
  3. package/src/agent/compaction.ts +24 -15
  4. package/src/agent/doctor.ts +6 -1
  5. package/src/agent/session-origin.ts +101 -173
  6. package/src/agent/subagents.ts +146 -14
  7. package/src/agent/system-prompt.ts +46 -48
  8. package/src/agent/todo/scope.ts +4 -2
  9. package/src/agent/tools/channel-reply.ts +7 -9
  10. package/src/bundled-plugins/memory/index.ts +33 -33
  11. package/src/bundled-plugins/memory/load-memory.ts +92 -35
  12. package/src/bundled-plugins/memory/slug.ts +19 -0
  13. package/src/bundled-plugins/memory/turn-dedup.ts +32 -29
  14. package/src/bundled-plugins/security/policies/private-surface-read.ts +4 -1
  15. package/src/bundled-plugins/tool-result-cap/README.md +7 -7
  16. package/src/bundled-plugins/tool-result-cap/index.ts +1 -1
  17. package/src/channels/adapters/discord-bot.ts +11 -4
  18. package/src/channels/adapters/github/inbound.ts +68 -43
  19. package/src/channels/adapters/github/index.ts +57 -9
  20. package/src/channels/adapters/github/recover-failed-deliveries.ts +270 -0
  21. package/src/channels/adapters/kakaotalk.ts +5 -1
  22. package/src/channels/adapters/mention-hints.ts +75 -0
  23. package/src/channels/adapters/slack-bot.ts +8 -2
  24. package/src/channels/continuation-willingness.ts +216 -68
  25. package/src/channels/router.ts +149 -15
  26. package/src/cli/dreams.ts +2 -2
  27. package/src/cli/init.ts +41 -7
  28. package/src/cli/inspect.ts +2 -2
  29. package/src/cli/logs.ts +2 -2
  30. package/src/cli/qr.ts +4 -3
  31. package/src/cli/require-agent-dir.ts +31 -0
  32. package/src/cli/shell.ts +2 -2
  33. package/src/cli/stop.ts +2 -2
  34. package/src/cli/tui.ts +20 -6
  35. package/src/cli/ui.ts +8 -4
  36. package/src/container/shared.ts +18 -0
  37. package/src/container/start.ts +1 -1
  38. package/src/doctor/checks.ts +145 -2
  39. package/src/hostd/client.ts +48 -52
  40. package/src/hostd/daemon.ts +82 -39
  41. package/src/hostd/paths.ts +22 -2
  42. package/src/hostd/spawn.ts +7 -0
  43. package/src/hostd/tailscale.ts +12 -1
  44. package/src/init/index.ts +35 -8
  45. package/src/init/kakaotalk-auth.ts +2 -2
  46. package/src/init/packagejson.ts +2 -2
  47. package/src/init/run-bun-install.ts +71 -37
  48. package/src/inspect/transcript-view.ts +15 -2
  49. package/src/plugin/loader.ts +7 -4
  50. package/src/portbroker/hostd-client.ts +32 -6
  51. package/src/sandbox/session-tmp.ts +6 -1
  52. package/src/secrets/export-claude-credentials-file.ts +2 -2
  53. package/src/shared/index.ts +4 -0
  54. package/src/shared/platform.ts +11 -0
  55. package/src/shared/wsl.ts +139 -0
  56. package/src/tui/index.ts +26 -8
  57. package/src/tui/terminal-guard.ts +139 -0
  58. package/typeclaw.schema.json +2 -2
package/README.md CHANGED
@@ -1,60 +1,85 @@
1
- # TypeClaw
2
-
3
1
  <p align="center">
4
2
  <img src="./docs/public/typeclaw-transparent.png" alt="TypeClaw logo" width="240" />
5
3
  </p>
6
4
 
7
- <h3 align="center">The agent for perfectionists</h3>
5
+ <h3 align="center">TypeClaw: The agent for perfectionists</h3>
8
6
  <p align="center">Crafted in every detail – it behaves in your team's chat and<br />gets sharper the longer it runs. Sandboxed and self-managing.</p>
9
7
 
10
- ## Why?
8
+ <br />
9
+ <br />
10
+
11
+ ## Self-improving — a learning loop, not a black box
12
+
13
+ - 🌱 **Memory** — logs its own work to a daily stream as it goes
14
+ - 💤 **Dreaming** — a subagent distills each day's work into long-term memory, committed to git as plain files you can read, diff, and revert
15
+ - 🧠 **Muscle memory** — recurring procedures become reusable skills it writes for itself and loads on later runs
16
+ - 🔎 **Optional embedding recall** — hybrid keyword-and-embedding search over the same markdown memory, off by default; the plain files remain the durable source of truth
17
+
18
+ ## Group chat — knows when not to talk
19
+
20
+ - 👥 **Room awareness** — knows who's present and tells humans from bots, so it stays quiet when people are talking to each other rather than chiming in on messages it wasn't part of
21
+ - 💬 **Sticky engagement** — holds an ongoing thread after replying without needing to be re-mentioned, then steps back when the conversation moves on; multilingual continuation detection, peer-bot loop guards, and flood filters keep it from spiraling
22
+
23
+ ## Channels — one agent, many inboxes
24
+
25
+ - 📨 **Supported channels** — Slack, Discord, Telegram, LINE, KakaoTalk, GitHub, and a websocket TUI, driven by the same agent
26
+ - ✅ **Pull-request review** — treats a GitHub PR as a conversation, reviewing as a participant, with guards against claiming a verdict it didn't actually post and against leaving a PR stranded
27
+
28
+ ## Web & research — reads the web like a person
29
+
30
+ - 🔍 **Live web search & fetch** — pull a page as a readable article, a JSON query, a selected slice, a grep, or raw
31
+ - 🪪 **Browser-like fetching** — replays a Chrome-like TLS/HTTP fingerprint to get past many generic-client blocks; CAPTCHA and IP-reputation gates can still fail
32
+ - 🌐 **Interactive browser sessions** — drives a browser on live pages, with a dashboard you can step into for logins, 2FA, or CAPTCHA
33
+
34
+ ## Security — defense-in-depth for risky actions
35
+
36
+ - 🛡 **Layered guards** — stop secret exfiltration, SSRF, prompt injection, rogue git pushes, and silent privilege escalation before they fire
37
+ - 🪪 **Roles** — owner, trusted, member, and guest gate privileged actions
38
+ - 🔑 **Permissions** — per-channel match rules decide who can ask for what; an untrusted channel user can't trigger privileged behavior
39
+ - 🔒 **Encryption at rest** — sensitive channel passwords are sealed with authenticated encryption; the key is host-held and isn't passed into the container during normal operation
40
+
41
+ ## Isolation & sandbox — runs clean, stays out of each other's way
42
+
43
+ - 🐳 **No machine clutter** — agent runtime state lives in its own folder and container; apart from the TypeClaw CLI install, it doesn't scatter services or config across your machine, and stopping it shuts the running pieces down, leaving a folder you can keep, copy, or delete
44
+ - 🧩 **No cross-agent interference** — run as many as you like; each gets its own container, files, memory, and even its own browser, so one can read a page while another drives a different one
45
+ - 📁 **Self-contained folder** — settings, memory, and connections live together in the agent's folder, kept as a version history you can review, undo, or back up
46
+
47
+ ## Subagents — delegation in a fresh context
48
+
49
+ - 🪄 **A bench of specialists** — it hands off research, planning, code review, and hands-on execution to focused child sessions, each with its own prompt, tools, and model
50
+ - 🔀 **Sync or background** — spawn and block for a result, or spawn in the background and collect completions later; coalescing prevents duplicate concurrent runs and depth limits keep delegation chains bounded
11
51
 
12
- There are great agents out there. None of them were quite the shape I wanted:
52
+ ## Extensibility teach it new tricks in TypeScript
13
53
 
14
- - **OpenClaw** — feature-rich, but heavy
15
- - **NanoClaw** — simple, but no plugin system
16
- - **PicoClaw** — fast, but Go (so plugins live outside the runtime)
17
- - **ZeroClaw** — light, but Rust (same problem, different ecosystem)
18
- - **Hermes Agent** — awesome, but Python
54
+ - 🔌 **Plugins are just imports** — a plugin is a plain TypeScript file that imports the runtime and adds tools, skills, channels, and commands; no IPC, no FFI, no DSL, distributed as packages and resolved like any dependency
55
+ - 🛰 **MCP support** — connect external MCP servers over stdio or HTTP; their tools become the agent's tools
56
+ - 📚 **Skills on demand** — markdown procedures load lazily when selected, so they avoid prompt-token cost until used; skills layer from bundled, your own, and what the agent learns
57
+ - ⚙️ **Typed config with hot reload** — most config changes take effect live; boot-only fields are flagged restart-required
19
58
 
20
- None of that matters to most people. It matters to me. If you're like me, TypeClaw is the right choice.
59
+ ## Connectivity reachable wherever you need it
21
60
 
22
- TypeClaw is the agent I wanted to use:
61
+ - 🌍 **Auto port-forward** — services inside the container appear on your `localhost`, including loopback-only ones
62
+ - 🚇 **Public tunnels** — a zero-signup public URL out of the box, or bring your own; webhooks self-register at the resulting URL
63
+ - 🔗 **Private network access** — forwarded ports can publish to a private network when configured
23
64
 
24
- - **TypeScript end to end** agent core, plugins, channel adapters, CLI, TUI all in one language
25
- - **Bun-native plugins** — plugins are just TS modules; no IPC, no FFI, hot-reloadable config
26
- - **Docker-friendly by default** — every agent runs in its own container; the host CLI is purely a launcher
27
- - **Self-improving** — the agent observes its own work, distills it into sharded long-term memory and reusable skills, and gets sharper over time without you writing prompts for it
65
+ ## Self-managingoperational autonomy, on a budget
28
66
 
29
- If you're like me, TypeClaw is the right choice. If not, that's fine too.
67
+ - 💾 **Self-backup** commits and pushes its own state during idle windows, with a generated commit message
68
+ - 🔁 **Self-restart** — can rebuild and restart its own container when it needs to, through the host daemon
69
+ - ♻️ **Self-continuation** — keeps working through an unfinished task list when you step away, bounded by a turn, token, and wall-clock budget
30
70
 
31
- ## What you'd expect
71
+ ## Operator CLI — see what it's doing and what it costs
32
72
 
33
- - 🐳 **Sandboxed by default** — every agent runs in its own Docker container with `.env` injection and bind-mounted host folders
34
- - 🔌 **Plugin system** — plain TypeScript modules contribute tools, skills, subagents, channels, commands, and typed config
35
- - 💬 **Multi-channel** — Slack, Discord, Telegram, LINE, KakaoTalk, GitHub webhooks, and a websocket TUI; one agent, many inboxes
36
- - **Cron** — schedule prompts or shell commands; per-job coalescing so slow jobs don't pile up
37
- - 📚 **Skills on demand** — markdown procedures the agent loads only when relevant; zero token cost until used
38
- - 🔎 **Web research** — bundled `scout` subagent plus first-class `web_search` and `web_fetch` tools (DuckDuckGo via curl-impersonate, Wikipedia)
39
- - 🛡 **Security guards** — bundled `tool.before` policies catch secret exfil, SSRF, prompt injection, tainted git remotes, and silent privilege escalation (role/cron promotion) before they fire
40
- - 📊 **Usage, inspect, doctor** — `typeclaw usage` reports token/$ spend per session, model, or day; `typeclaw inspect` replays a session transcript and tails live activity; `typeclaw doctor` diagnoses host, agent folder, and plugin state
73
+ - 🩺 **doctor** — diagnoses host, agent folder, config, and channels, with auto-fix for managed files
74
+ - 📊 **usage** — reports token and dollar spend by day, model, session, or origin
75
+ - 🔍 **inspect** — replays a session transcript and tails live activity
76
+ - 📜 **logs** — streams container logs with local-time prefixes
41
77
 
42
- ## Where it goes further
78
+ ## Compose manage a fleet from the CLI
43
79
 
44
- - 🌱 **Self-improving** — bundled `memory` plugin logs sessions to daily streams, then a `dreaming` subagent distills them into sharded long-term memory (`memory/topics/`) on its own schedule; no prompts to write
45
- - 🧠 **Muscle memory** — repeated procedures get distilled into reusable skills the agent writes for itself and loads on later runs
46
- - 💾 **Auto-backup** — the bundled `backup` plugin commits session logs and memory on every idle window with an LLM-generated commit subject
47
- - 🪄 **Subagents** — first-class child sessions with their own system prompt, payload schema, and per-payload coalescing; cron and the main agent fire them through one in-process Stream
48
- - 🪪 **Roles and permissions** — `owner` / `trusted` / `member` / `guest` with first-message match rules per channel; gates `channel.respond`, cron scheduling, and security bypasses, so a Slack stranger can't tell the agent to push to main
49
- - 👥 **Group chat awareness** — knows who's in the room, distinguishes humans from bots, and stays engaged after a reply without re-mentioning
50
- - 🧱 **Managed-file guards** — `typeclaw.json`, `cron.json`, memory shards, and bundled skills are protected from accidental rewrites; invalid config writes and silent role/cron privilege grants are rejected at the tool boundary
51
- - 🌐 **Headed browser inside the container** — bundled `agent-browser` plugin ships Chrome under Xvfb so the agent can drive real web pages past bot fingerprinting
52
- - 🌍 **Tunnels and auto port-forward** — dev servers inside the container appear on `localhost` (even loopback-only ones); public URLs via Cloudflare Quick (zero signup) or your own external URL, with GitHub webhooks self-registered at the resulting URL
53
- - 🔄 **Hot reload** — change `typeclaw.json`, run `typeclaw reload` — no restart for most fields
54
- - 🔁 **Self-restart** — the agent can bounce its own container when it updates itself
55
- - 🎼 **Compose** — orchestrate multiple agents across multiple folders
80
+ - 🎼 **Fleet operations** — discover agent folders and start, stop, restart, check status, tail logs, report usage, and run diagnostics across them from the command line
56
81
 
57
- Memory loop and subagent architecture are covered in detail in [AGENTS.md](./AGENTS.md) and [`src/bundled-plugins/memory/README.md`](./src/bundled-plugins/memory/README.md).
82
+ Memory loop and subagent architecture are covered in detail in the [Internals docs](https://typeclaw.dev/docs/internals) and [`src/bundled-plugins/memory/README.md`](./src/bundled-plugins/memory/README.md).
58
83
 
59
84
  ## Install
60
85
 
@@ -68,14 +93,12 @@ Requires Bun ≥ 1.1 and Docker (or OrbStack) on the host.
68
93
 
69
94
  ```sh
70
95
  mkdir my-agent && cd my-agent
71
- typeclaw init # scaffold typeclaw.json, .env, Dockerfile, package.json
72
- typeclaw start # build + run the container
73
- typeclaw tui # attach a terminal UI to the running agent
96
+ typeclaw init # scaffold, build, run the container, and attach a TUI
74
97
  ```
75
98
 
76
- That's it. The agent is now alive, listening on a websocket, ready to receive prompts from the TUI or any wired channel.
99
+ That's it. `init` hatches the agent end to end — it scaffolds the folder (`typeclaw.json`, `.env`, `Dockerfile`, `package.json`), builds and runs the container, then drops you into a terminal UI. The agent is now alive, listening on a websocket, ready to receive prompts from the TUI or any wired channel.
77
100
 
78
- See `typeclaw --help` for the full command surface, or [typeclaw.dev](https://typeclaw.dev) for guides and configuration reference.
101
+ For later sessions, `typeclaw start` runs the container and `typeclaw tui` re-attaches. See `typeclaw --help` for the full command surface, or [typeclaw.dev](https://typeclaw.dev) for guides and configuration reference.
79
102
 
80
103
  ## Development
81
104
 
@@ -94,7 +117,7 @@ bun run lint
94
117
  bun run format
95
118
  ```
96
119
 
97
- See [CONTRIBUTING.md](./CONTRIBUTING.md) for the recommended local dev loop (`bun link` → `typeclaw init`), commit and PR conventions, and where to ask questions. See [AGENTS.md](./AGENTS.md) for the long-form architecture notes — stages, hostd internals, message stream, plugin contracts, and the testing philosophy. The docs site at [typeclaw.dev](https://typeclaw.dev) lives in [`docs/`](./docs/).
120
+ See [CONTRIBUTING.md](./CONTRIBUTING.md) for the recommended local dev loop (`bun link` → `typeclaw init`), commit and PR conventions, and where to ask questions. The [Internals docs](https://typeclaw.dev/docs/internals) cover the long-form architecture notes — stages, hostd internals, message stream, plugin contracts, and the testing philosophy. The docs site at [typeclaw.dev](https://typeclaw.dev) lives in [`docs/`](./docs/).
98
121
 
99
122
  ## Acknowledgments
100
123
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "typeclaw",
3
- "version": "0.37.3",
3
+ "version": "0.37.5",
4
4
  "homepage": "https://github.com/typeclaw/typeclaw#readme",
5
5
  "bugs": {
6
6
  "url": "https://github.com/typeclaw/typeclaw/issues"
@@ -1,27 +1,36 @@
1
1
  import type { KnownApi, Model } from '@mariozechner/pi-ai'
2
2
  import { SettingsManager } from '@mariozechner/pi-coding-agent'
3
3
 
4
- // Compaction trigger threshold expressed as a percentage of the model's
5
- // context window. pi-coding-agent's auto-compaction fires when
6
- // `contextTokens > contextWindow - reserveTokens`. To honor a percentage-
7
- // based intent across models with very different window sizes (200K Claude
8
- // vs. 1M Gemini vs. 256K Kimi), we derive `reserveTokens` per-model from
9
- // the model's `contextWindow`. SDK defaults (16384 reserve) are a fixed
10
- // number of tokens that drift in relative terms across models — at 256K
11
- // that's ~6% headroom (94% trigger), at 1M it's ~1.6% (98% trigger). A
12
- // percentage-derived reserve trips at the same fraction regardless of
13
- // model, which is what we actually want.
4
+ // Compaction trigger expressed as a fraction of the model's context window.
5
+ // pi-coding-agent auto-compaction fires when `contextTokens > contextWindow -
6
+ // reserveTokens`; deriving `reserveTokens` from the window keeps the trigger at
7
+ // the same fraction across models with very different windows (200K Claude vs.
8
+ // 1M Gemini vs. 256K Kimi) instead of the SDK's fixed 16384 reserve, which
9
+ // drifts to ~94% on a 256K window and ~98% on 1M.
14
10
  export const COMPACTION_TRIGGER_PERCENT = 0.8
15
11
 
12
+ // Absolute ceiling on the compaction trigger, independent of window size. The
13
+ // window-relative trigger alone optimizes for overflow avoidance, not token
14
+ // cost: at 80% of a large window a session accumulates ~160K (200K window) to
15
+ // ~800K (1M window) tokens of history that get re-shipped as `cacheRead` every
16
+ // turn before compaction ever fires. Capping the trigger bounds that
17
+ // steady-state re-read on big-window models; `min()` keeps the 80% behavior on
18
+ // small ones. 64K is 3x keepRecent (invariant asserted in the test), leaving
19
+ // growth room after a compaction so it does not retrigger immediately.
20
+ export const COMPACTION_ABSOLUTE_TRIGGER_TOKENS = 64_000
21
+
16
22
  // Tokens to keep in the recent window after compaction. Fixed (not a
17
- // percentage) because "recent context" is a property of conversation
18
- // shape, not model capacity the same recent ~20K is roughly the right
19
- // amount of history regardless of whether the model has 200K or 1M total.
20
- // Mirrors pi's DEFAULT_COMPACTION_SETTINGS.keepRecentTokens.
23
+ // percentage) because "recent context" is a property of conversation shape, not
24
+ // model capacity. Mirrors pi's DEFAULT_COMPACTION_SETTINGS.keepRecentTokens.
21
25
  export const COMPACTION_KEEP_RECENT_TOKENS = 20_000
22
26
 
27
+ export function compactionTriggerTokens<TApi extends KnownApi>(model: Model<TApi>): number {
28
+ const windowRelative = Math.round(model.contextWindow * COMPACTION_TRIGGER_PERCENT)
29
+ return Math.min(windowRelative, COMPACTION_ABSOLUTE_TRIGGER_TOKENS)
30
+ }
31
+
23
32
  export function reserveTokensForModel<TApi extends KnownApi>(model: Model<TApi>): number {
24
- return Math.max(1, Math.round(model.contextWindow * (1 - COMPACTION_TRIGGER_PERCENT)))
33
+ return Math.max(1, model.contextWindow - compactionTriggerTokens(model))
25
34
  }
26
35
 
27
36
  export function createCompactionSettingsManager<TApi extends KnownApi>(model: Model<TApi>): SettingsManager {
@@ -1,4 +1,9 @@
1
- import { isAbsolute, normalize } from 'node:path'
1
+ import { posix } from 'node:path'
2
+
3
+ // changedPaths are a wire format: agentDir-relative POSIX paths the container
4
+ // emits and the host re-validates. Resolved with `path.posix` so a win32 test
5
+ // runner keeps `/`-separators instead of rewriting `memory/x.md` to `memory\x.md`.
6
+ const { isAbsolute, normalize } = posix
2
7
 
3
8
  import type {
4
9
  PluginCheckResult,