typeclaw 0.29.0 → 0.30.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/verify-realproc-sandbox.sh +58 -0
- package/src/agent/index.ts +6 -0
- package/src/agent/live-subagents.ts +5 -0
- package/src/agent/plugin-tools.ts +79 -10
- package/src/agent/subagent-drain.ts +150 -0
- package/src/agent/subagents.ts +34 -3
- package/src/agent/system-prompt.ts +1 -1
- package/src/agent/tools/spawn-subagent.ts +13 -1
- package/src/bundled-plugins/bun-hygiene/README.md +12 -11
- package/src/bundled-plugins/bun-hygiene/policy.ts +8 -3
- package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +116 -35
- package/src/bundled-plugins/github-cli-auth/effective-approval.ts +14 -9
- package/src/bundled-plugins/github-cli-auth/index.ts +3 -3
- package/src/bundled-plugins/planner/planner.ts +2 -1
- package/src/bundled-plugins/researcher/researcher.ts +9 -2
- package/src/bundled-plugins/reviewer/reviewer.ts +2 -1
- package/src/channels/adapters/discord-bot-format.ts +191 -0
- package/src/channels/adapters/discord-bot.ts +2 -1
- package/src/channels/adapters/github/inbound.ts +88 -30
- package/src/channels/adapters/github/review-state.ts +27 -0
- package/src/channels/github-review-claim.ts +15 -3
- package/src/channels/outbound-flood-filter.ts +70 -3
- package/src/channels/router.ts +53 -0
- package/src/compose/discover.ts +5 -1
- package/src/config/config.ts +38 -0
- package/src/container/start.ts +14 -0
- package/src/migrations/index.ts +35 -0
- package/src/migrations/secrets-v1-to-v2.ts +344 -0
- package/src/run/index.ts +13 -0
- package/src/sandbox/availability.ts +12 -0
- package/src/sandbox/build.ts +53 -9
- package/src/sandbox/index.ts +1 -1
- package/src/sandbox/policy.ts +17 -1
- package/typeclaw.schema.json +24 -0
|
@@ -3,7 +3,12 @@ import { ACKNOWLEDGE_GUARDS, type GuardBlock, isGuardAcknowledged } from '../gua
|
|
|
3
3
|
export const GUARD_GLOBAL_INSTALL = 'globalInstall'
|
|
4
4
|
export const GUARD_NON_BUN_PACKAGE_MANAGER = 'nonBunPackageManager'
|
|
5
5
|
|
|
6
|
-
|
|
6
|
+
// Only install managers are blocked. The ephemeral runners npx/pnpx (and bunx,
|
|
7
|
+
// which is `bun`) are intentionally absent: they run a tool once without
|
|
8
|
+
// touching the dependency tree or writing a competing lockfile, so they don't
|
|
9
|
+
// undermine the bun-standardization this set protects. classify() skips any
|
|
10
|
+
// command word not in here, so leaving them out is what allows them.
|
|
11
|
+
const NON_BUN_MANAGERS = new Set(['npm', 'pnpm', 'yarn'])
|
|
7
12
|
const INSTALL_SUBCOMMANDS = new Set(['install', 'i', 'add'])
|
|
8
13
|
|
|
9
14
|
export function checkBunHygieneGuard(options: { tool: string; args: Record<string, unknown> }): GuardBlock | undefined {
|
|
@@ -310,8 +315,8 @@ function blockNonBunManager(manager: string, args: Record<string, unknown>): Gua
|
|
|
310
315
|
return {
|
|
311
316
|
block: true,
|
|
312
317
|
reason: [
|
|
313
|
-
`Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun.`,
|
|
314
|
-
'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn
|
|
318
|
+
`Guard \`${GUARD_NON_BUN_PACKAGE_MANAGER}\` blocked \`${manager}\`. This container standardizes on bun for dependency management.`,
|
|
319
|
+
'Use `bun install` / `bun add <pkg>` instead of npm/pnpm/yarn. Ephemeral runners (`bunx`, `npx`, `pnpx`) are allowed for one-off tool execution.',
|
|
315
320
|
`Retry with \`${ACKNOWLEDGE_GUARDS}.${GUARD_NON_BUN_PACKAGE_MANAGER}: true\` if this package manager is genuinely required (e.g. a project pinned to a different lockfile).`,
|
|
316
321
|
].join(' '),
|
|
317
322
|
}
|
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
import type { ReviewVerdict } from '@/channels/github-review-turn-ledger'
|
|
2
2
|
|
|
3
|
+
// `NONE` covers "never reviewed" and "last decisive review was DISMISSED" — both
|
|
4
|
+
// mean a fresh verdict is legitimate (not a duplicate).
|
|
5
|
+
export type EffectiveVerdict = 'APPROVED' | 'CHANGES_REQUESTED' | 'NONE'
|
|
6
|
+
|
|
3
7
|
export type EffectiveApprovalResolver = (target: {
|
|
4
8
|
workspace: string
|
|
5
9
|
prNumber: number
|
|
6
|
-
}) => Promise<{ ok: true;
|
|
10
|
+
}) => Promise<{ ok: true; effective: EffectiveVerdict } | { ok: false }>
|
|
7
11
|
|
|
8
12
|
export type ApproveBlock = { block: true; reason: string }
|
|
9
13
|
|
|
10
|
-
export type
|
|
14
|
+
export type ReviewVerdictGuard = {
|
|
11
15
|
guard: (args: {
|
|
12
16
|
callId: string
|
|
13
17
|
workspace: string
|
|
@@ -17,58 +21,135 @@ export type ApproveIdempotencyGuard = {
|
|
|
17
21
|
release: (args: { callId: string; succeeded: boolean }) => void
|
|
18
22
|
}
|
|
19
23
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
24
|
+
// Back-compat alias: the guard now covers REQUEST_CHANGES too, not just APPROVE.
|
|
25
|
+
export type ApproveIdempotencyGuard = ReviewVerdictGuard
|
|
26
|
+
|
|
27
|
+
function duplicateReason(verdict: ReviewVerdict): string {
|
|
28
|
+
if (verdict === 'APPROVE') {
|
|
29
|
+
return (
|
|
30
|
+
'This bot already holds a standing APPROVED review on this pull request. A second APPROVE would ' +
|
|
31
|
+
'post a redundant review. If you intended to change your verdict, request changes or dismiss the ' +
|
|
32
|
+
'prior review instead of re-approving.'
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
return (
|
|
36
|
+
'This bot already holds a standing CHANGES_REQUESTED review on this pull request. A second ' +
|
|
37
|
+
'REQUEST_CHANGES would post a redundant blocking review. The prior review is still live — push a fix ' +
|
|
38
|
+
'and APPROVE, or reply in the existing thread, instead of re-requesting changes.'
|
|
39
|
+
)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const CONCURRENT_REASON =
|
|
43
|
+
'Another session in this agent is already submitting a formal review verdict for this pull request. ' +
|
|
44
|
+
'Only one verdict may land per PR — do not submit a second review; the in-flight one will post.'
|
|
45
|
+
|
|
46
|
+
// The standing verdict a fresh attempt would duplicate. APPROVE duplicates a
|
|
47
|
+
// standing APPROVED; REQUEST_CHANGES duplicates a standing CHANGES_REQUESTED.
|
|
48
|
+
function duplicatesStanding(verdict: ReviewVerdict, effective: EffectiveVerdict): boolean {
|
|
49
|
+
return verdict === 'APPROVE' ? effective === 'APPROVED' : effective === 'CHANGES_REQUESTED'
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// How long a reservation may sit before it is treated as abandoned. A normal
|
|
53
|
+
// `gh` review submit completes in seconds; this only guards against a tool.after
|
|
54
|
+
// that never fires (crash mid-command), so it must outlast a slow command yet
|
|
55
|
+
// never strand a PR for long.
|
|
56
|
+
const LEASE_TTL_MS = 5 * 60_000
|
|
57
|
+
|
|
58
|
+
type Reservation = { key: string; token: number; createdAt: number }
|
|
59
|
+
|
|
60
|
+
// MODULE-LEVEL singletons, shared by every plugin instance in this process. The
|
|
61
|
+
// github-cli-auth plugin's `plugin: async (ctx) => ...` factory may run once per
|
|
62
|
+
// session, giving each its own closure — but all of those closures import THIS
|
|
63
|
+
// module, so they coordinate through one Map. A closure-local Set (the prior
|
|
64
|
+
// design) could not see a concurrent session's in-flight verdict, which is how
|
|
65
|
+
// three sessions each landed an APPROVE on the same PR within ten seconds.
|
|
66
|
+
const inFlightByPr = new Map<string, Reservation>()
|
|
67
|
+
const reservationByCall = new Map<string, Reservation>()
|
|
68
|
+
let tokenSeq = 0
|
|
69
|
+
|
|
70
|
+
// Makes a formal `gh ... event=APPROVE|REQUEST_CHANGES` idempotent per PR across
|
|
71
|
+
// turns, sessions, and (in-process) concurrent fan-out. Two layers:
|
|
72
|
+
//
|
|
73
|
+
// 1. A process-wide in-flight lease keyed by `workspace#prNumber`, held from
|
|
74
|
+
// tool.before through tool.after. While one verdict is mid-flight, every
|
|
75
|
+
// other session's verdict for the same PR is blocked — even though GitHub
|
|
76
|
+
// has not yet recorded the in-flight review. This is the layer the old
|
|
77
|
+
// closure-local Set could not provide: separate plugin instances meant
|
|
78
|
+
// separate Sets, so concurrent sessions never saw each other.
|
|
79
|
+
//
|
|
80
|
+
// 2. The authoritative GitHub effective-state read, consulted AFTER the lease
|
|
81
|
+
// is acquired. It catches the cross-restart case (lease lost) and tracks
|
|
82
|
+
// supersession: a later CHANGES_REQUESTED/DISMISSED demotes an earlier
|
|
83
|
+
// APPROVED, so a genuine re-verdict is allowed. Reads fail OPEN — a
|
|
84
|
+
// transient error must never strand a genuine first verdict; the lease
|
|
85
|
+
// still covers the concurrent case while the command runs.
|
|
86
|
+
//
|
|
87
|
+
// The lease is released only in release() (tool.after) or on a terminal block,
|
|
88
|
+
// never after the remote read — releasing early reopens the TOCTOU the lease
|
|
89
|
+
// exists to close. Release is keyed by a per-call token so a late/stale
|
|
90
|
+
// tool.after for a superseded reservation cannot drop a newer session's lease.
|
|
33
91
|
export function createApproveIdempotencyGuard(deps: {
|
|
34
92
|
resolveEffectiveApproval: EffectiveApprovalResolver
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
const
|
|
93
|
+
now?: () => number
|
|
94
|
+
}): ReviewVerdictGuard {
|
|
95
|
+
const now = deps.now ?? Date.now
|
|
38
96
|
|
|
39
97
|
return {
|
|
40
98
|
async guard(args): Promise<ApproveBlock | null> {
|
|
41
|
-
if (args.verdict !== 'APPROVE') return null
|
|
99
|
+
if (args.verdict !== 'APPROVE' && args.verdict !== 'REQUEST_CHANGES') return null
|
|
42
100
|
const key = prKey(args.workspace, args.prNumber)
|
|
43
101
|
|
|
44
|
-
// Reserve BEFORE the await so two calls racing into guard() for the same
|
|
45
|
-
//
|
|
46
|
-
//
|
|
47
|
-
//
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
102
|
+
// Reserve BEFORE the await so two calls racing into guard() for the same PR
|
|
103
|
+
// cannot both observe an empty map: the loser sees the winner's in-flight
|
|
104
|
+
// lease and is blocked. An expired lease (tool.after never fired) is
|
|
105
|
+
// reclaimable so a crash cannot permanently strand the PR.
|
|
106
|
+
const held = inFlightByPr.get(key)
|
|
107
|
+
if (held !== undefined && now() - held.createdAt < LEASE_TTL_MS) {
|
|
108
|
+
return { block: true, reason: CONCURRENT_REASON }
|
|
109
|
+
}
|
|
110
|
+
const reservation: Reservation = { key, token: ++tokenSeq, createdAt: now() }
|
|
111
|
+
inFlightByPr.set(key, reservation)
|
|
112
|
+
reservationByCall.set(args.callId, reservation)
|
|
51
113
|
|
|
52
114
|
const remote = await deps.resolveEffectiveApproval({ workspace: args.workspace, prNumber: args.prNumber })
|
|
53
|
-
if (remote.ok && remote.
|
|
54
|
-
//
|
|
55
|
-
//
|
|
56
|
-
|
|
57
|
-
|
|
115
|
+
if (remote.ok && duplicatesStanding(args.verdict, remote.effective)) {
|
|
116
|
+
// Standing verdict upstream already matches. Block, and release the lease
|
|
117
|
+
// now: a blocked command never reaches tool.after, so release() won't run
|
|
118
|
+
// for this callId. Leaving the lease set would resurrect the strand bug —
|
|
119
|
+
// the GitHub read is authoritative for the standing case.
|
|
120
|
+
releaseReservation(args.callId, reservation)
|
|
121
|
+
return { block: true, reason: duplicateReason(args.verdict) }
|
|
58
122
|
}
|
|
59
123
|
|
|
60
124
|
return null
|
|
61
125
|
},
|
|
62
126
|
|
|
63
127
|
release(args): void {
|
|
64
|
-
const
|
|
65
|
-
if (
|
|
66
|
-
|
|
67
|
-
if (!args.succeeded) approvedOrPending.delete(key)
|
|
128
|
+
const reservation = reservationByCall.get(args.callId)
|
|
129
|
+
if (reservation === undefined) return
|
|
130
|
+
releaseReservation(args.callId, reservation)
|
|
68
131
|
},
|
|
69
132
|
}
|
|
70
133
|
}
|
|
71
134
|
|
|
135
|
+
// Drop the lease only if THIS reservation still owns the key. A stale tool.after
|
|
136
|
+
// for a reservation that was already superseded (e.g. reclaimed after TTL by a
|
|
137
|
+
// newer session) must not yank the live session's lease.
|
|
138
|
+
function releaseReservation(callId: string, reservation: Reservation): void {
|
|
139
|
+
reservationByCall.delete(callId)
|
|
140
|
+
const current = inFlightByPr.get(reservation.key)
|
|
141
|
+
if (current !== undefined && current.token === reservation.token) {
|
|
142
|
+
inFlightByPr.delete(reservation.key)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
72
146
|
function prKey(workspace: string, prNumber: number): string {
|
|
73
147
|
return `${workspace}#${prNumber}`
|
|
74
148
|
}
|
|
149
|
+
|
|
150
|
+
// Test-only: clear the process-wide lease state between cases.
|
|
151
|
+
export function __resetReviewVerdictGuardForTest(): void {
|
|
152
|
+
inFlightByPr.clear()
|
|
153
|
+
reservationByCall.clear()
|
|
154
|
+
tokenSeq = 0
|
|
155
|
+
}
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import { GITHUB_API_BASE, githubJsonHeaders } from '@/channels/adapters/github/auth-pat'
|
|
2
2
|
|
|
3
|
-
import type { EffectiveApprovalResolver } from './approve-idempotency'
|
|
4
|
-
|
|
5
|
-
// Resolves
|
|
6
|
-
//
|
|
7
|
-
//
|
|
8
|
-
//
|
|
9
|
-
//
|
|
10
|
-
// first approval.
|
|
3
|
+
import type { EffectiveApprovalResolver, EffectiveVerdict } from './approve-idempotency'
|
|
4
|
+
|
|
5
|
+
// Resolves THIS bot's standing decisive review on a PR, used by the review
|
|
6
|
+
// verdict guard to stop a second formal verdict after a restart (the in-process
|
|
7
|
+
// lease covers the same-container case but is lost when the container bounces).
|
|
8
|
+
// Every failure returns { ok: false } so the guard fails open — a transient read
|
|
9
|
+
// error must never permanently block a genuine first verdict.
|
|
11
10
|
export function createGithubEffectiveApprovalResolver(deps: {
|
|
12
11
|
resolveToken: (workspace: string) => Promise<string | null>
|
|
13
12
|
fetchImpl?: typeof fetch
|
|
@@ -27,10 +26,16 @@ export function createGithubEffectiveApprovalResolver(deps: {
|
|
|
27
26
|
if (reviews === null) return { ok: false }
|
|
28
27
|
|
|
29
28
|
const lastDecisive = reviews.filter((r) => isSelf(r.login, r.isBot, self) && isDecisive(r.state)).at(-1)
|
|
30
|
-
return { ok: true,
|
|
29
|
+
return { ok: true, effective: toEffective(lastDecisive?.state) }
|
|
31
30
|
}
|
|
32
31
|
}
|
|
33
32
|
|
|
33
|
+
function toEffective(state: string | undefined): EffectiveVerdict {
|
|
34
|
+
if (state === 'APPROVED') return 'APPROVED'
|
|
35
|
+
if (state === 'CHANGES_REQUESTED') return 'CHANGES_REQUESTED'
|
|
36
|
+
return 'NONE'
|
|
37
|
+
}
|
|
38
|
+
|
|
34
39
|
// A bot's effective review is its LATEST decisive one. COMMENTED/PENDING are
|
|
35
40
|
// non-deciding noise that must not clear an earlier APPROVED/CHANGES_REQUESTED;
|
|
36
41
|
// a later CHANGES_REQUESTED or DISMISSED supersedes an earlier APPROVED. The
|
|
@@ -11,7 +11,7 @@ import { classifyGhToken } from './token-class'
|
|
|
11
11
|
export default definePlugin({
|
|
12
12
|
plugin: async (ctx) => {
|
|
13
13
|
const resolveTokenForRepo = ctx.github.resolveTokenForRepo
|
|
14
|
-
const
|
|
14
|
+
const verdictGuard = createApproveIdempotencyGuard({
|
|
15
15
|
resolveEffectiveApproval: createGithubEffectiveApprovalResolver({
|
|
16
16
|
resolveToken: async (workspace) => {
|
|
17
17
|
const result = await resolveTokenForRepo(workspace)
|
|
@@ -28,7 +28,7 @@ export default definePlugin({
|
|
|
28
28
|
|
|
29
29
|
const review = await noteReviewCommand({ callId: event.callId, command })
|
|
30
30
|
if (review.detected !== null) {
|
|
31
|
-
const block = await
|
|
31
|
+
const block = await verdictGuard.guard({
|
|
32
32
|
callId: event.callId,
|
|
33
33
|
workspace: review.detected.workspace,
|
|
34
34
|
prNumber: review.detected.prNumber,
|
|
@@ -70,7 +70,7 @@ export default definePlugin({
|
|
|
70
70
|
callId: event.callId,
|
|
71
71
|
result: event.result,
|
|
72
72
|
})
|
|
73
|
-
|
|
73
|
+
verdictGuard.release({ callId: event.callId, succeeded: committed })
|
|
74
74
|
},
|
|
75
75
|
},
|
|
76
76
|
}
|
|
@@ -78,7 +78,7 @@ PREFER the two purpose-built research workers for any quick search or gathering
|
|
|
78
78
|
- \`scout\` — web research. Spawn it for ANYTHING that lives on the public internet: prices, schedules, opening hours, standard timelines, prevailing practice, vendor docs, prior art, "what are the options for X". It returns a focused, citation-backed answer. This is your default for the research-resolvable facts a plan rests on.
|
|
79
79
|
- \`explorer\` — local filesystem search. Spawn it to understand the existing code, config, sessions, memory, or git history on this agent — "what does this module do", "where is X configured", "summarize the shape of this system" — before planning a change to it.
|
|
80
80
|
|
|
81
|
-
Lean on these liberally. A quick \`scout\` for real prices or a quick \`explorer\` for the actual shape of a module turns an assumption-laden plan into a grounded one, and it costs you almost nothing because the heavy reading happens in their context, not yours.
|
|
81
|
+
Lean on these liberally. A quick \`scout\` for real prices or a quick \`explorer\` for the actual shape of a module turns an assumption-laden plan into a grounded one, and it costs you almost nothing because the heavy reading happens in their context, not yours. When a plan depends on multiple independent facts, **fan out in parallel**: either emit all the independent \`spawn_subagent\` calls (sync, the default) in a SINGLE turn so they run concurrently and return together, or spawn them with \`run_in_background=true\` and fold each result in as its \`<system-reminder>\` arrives (your session stays alive until every child reports back). Either way, fold the distilled results into your single planning pass; do NOT spawn one, wait, then spawn the next unless the second genuinely depends on the first — that serializes what should be parallel.
|
|
82
82
|
|
|
83
83
|
- Spawn these workers for context-heavy GATHERING, not for forming the plan. The decomposition, the sequencing, and the verdict are YOURS — never delegate the judgment.
|
|
84
84
|
- Each delegated task must be self-contained: the worker does not see this conversation or the goal. Put everything it needs in the prompt.
|
|
@@ -269,6 +269,7 @@ If none of the listed skills fit the goal, load \`general\`. Keep the skill-sele
|
|
|
269
269
|
rosterDescription:
|
|
270
270
|
'turns a goal — a trip, a launch, a migration, a feature — into an actionable, sequenced, risk-aware plan, writes it to a file, and returns a structured signal; domain-neutral and reasoning-heavy, for any multi-step goal worth thinking through before acting; consider a `reviewer` pass on the plan it produces',
|
|
271
271
|
canSpawnSubagents: true,
|
|
272
|
+
canBackgroundSpawnSubagents: true,
|
|
272
273
|
timeoutMs: PLANNER_SPAWN_TIMEOUT_MS,
|
|
273
274
|
inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
274
275
|
toolResultBudget: {
|
|
@@ -64,14 +64,20 @@ The \`write_report\` tool enforces these limits in code: it accepts exactly one
|
|
|
64
64
|
|
|
65
65
|
You run on a deliberately expensive model. Every search result page and every fetched article you pull into YOUR context spends that budget on grunt work and crowds out the thinking only you can do. So your DEFAULT for gathering is to delegate — not just for big sweeps, but for routine fetches too.
|
|
66
66
|
|
|
67
|
-
**Delegate first; fetch yourself only as a last resort.** Before you reach for \`web_search\`, \`web_fetch\`, \`read\`, or \`grep\`, ask: "could \`scout\` or \`explorer\` get this for me and hand back just the distilled answer?" If yes — which is almost always — spawn the worker with \`spawn_subagent\`.
|
|
67
|
+
**Delegate first; fetch yourself only as a last resort.** Before you reach for \`web_search\`, \`web_fetch\`, \`read\`, or \`grep\`, ask: "could \`scout\` or \`explorer\` get this for me and hand back just the distilled answer?" If yes — which is almost always — spawn the worker with \`spawn_subagent\`.
|
|
68
|
+
|
|
69
|
+
**Fan out in parallel.** For a gathering round, emit several \`scout\`/\`explorer\` \`spawn_subagent\` calls together in a SINGLE turn so they run concurrently rather than one-at-a-time. You have two equivalent ways to do this, both of which deliver every worker's findings back to you:
|
|
70
|
+
- **Synchronous batch (simplest):** emit the calls with \`run_in_background=false\` (the default) in one assistant message. They execute concurrently and all results return together before your next turn, where you fold them into one synthesis pass.
|
|
71
|
+
- **Background:** emit them with \`run_in_background=true\`; each returns a task_id immediately and you receive a \`<system-reminder>\` as each completes, then fetch the result with \`subagent_output\`. Use this when you want to start synthesizing on early results while slower workers finish. Your session stays alive until every background child you spawned has reported back, so no result is lost.
|
|
72
|
+
|
|
73
|
+
Either way, do NOT spawn one, wait for it, then spawn the next unless the second task genuinely depends on the first's result — that serializes what should be parallel.
|
|
68
74
|
|
|
69
75
|
- \`scout\` — web gathering. Hand it any web question, quick or broad ("latest figure for X", "find the primary source for Y", "sweep for every source on Z"); it does the searching and fetching and returns citation-backed findings, so the raw pages never touch your context.
|
|
70
76
|
- \`explorer\` — local gathering. Hand it any filesystem/git/memory question; it returns the paths and excerpts you need without you grepping the tree yourself.
|
|
71
77
|
- The synthesis, the cross-validation, and the confidence call are YOURS. Delegate the gathering, never the conclusion.
|
|
72
78
|
- Each delegated task is self-contained: the worker does not see this conversation. Put everything it needs in the prompt.
|
|
73
79
|
- The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
|
|
74
|
-
- \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned.
|
|
80
|
+
- \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. Whether you spawn synchronously or in the background, fold every worker's result into your single report before you finish.
|
|
75
81
|
|
|
76
82
|
When IS it right to use your own \`web_search\`/\`web_fetch\`/\`read\`/\`grep\`? Only for the surgical, decisive touch: re-reading one specific passage a worker flagged, resolving a contradiction between two workers' findings, or a single fetch so central you must read it verbatim. If you find yourself doing more than a couple of direct fetches, stop and delegate the rest.
|
|
77
83
|
|
|
@@ -210,6 +216,7 @@ If none of the listed skills fit the question, load \`general\`. Keep the skill-
|
|
|
210
216
|
// warrant operator's owner/trusted-only gate; any caller that can spawn a
|
|
211
217
|
// subagent can spawn the researcher.
|
|
212
218
|
canSpawnSubagents: true,
|
|
219
|
+
canBackgroundSpawnSubagents: true,
|
|
213
220
|
timeoutMs: RESEARCHER_SPAWN_TIMEOUT_MS,
|
|
214
221
|
inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
215
222
|
toolResultBudget: {
|
|
@@ -79,7 +79,7 @@ You run on a deliberately expensive model. Reading a sprawling file tree, a gian
|
|
|
79
79
|
- Spawn read-only/research workers for context-heavy gathering, not for forming the verdict. The findings and the \`<review>\` block are YOURS — never delegate the judgment.
|
|
80
80
|
- Each delegated task must be self-contained: the worker does not see this conversation or the target. Put everything it needs in the prompt.
|
|
81
81
|
- The chain is depth-limited: a worker you spawn cannot spawn again. Keep delegation one level deep.
|
|
82
|
-
- \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned.
|
|
82
|
+
- \`subagent_output\`/\`subagent_cancel\` reach only the tasks YOU spawned. To gather in parallel, either emit all the independent \`spawn_subagent\` calls (sync, the default) in a SINGLE turn so they run concurrently and return together, or spawn them with \`run_in_background=true\` and fold each result in as its \`<system-reminder>\` arrives (your session stays alive until every child reports back). Either way, fold the results into your single review pass before you finish.
|
|
83
83
|
|
|
84
84
|
## Tools
|
|
85
85
|
|
|
@@ -199,6 +199,7 @@ If none of the listed skills fit the target, load \`general\`. Keep the skill-se
|
|
|
199
199
|
rosterDescription:
|
|
200
200
|
'deep read-only code/PR/plan review in a fresh context, returns a structured verdict; it does NOT post — you act on its findings',
|
|
201
201
|
canSpawnSubagents: true,
|
|
202
|
+
canBackgroundSpawnSubagents: true,
|
|
202
203
|
timeoutMs: REVIEWER_SPAWN_TIMEOUT_MS,
|
|
203
204
|
inFlightKey: (payload) => payload?.requestId ?? `anon-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
|
|
204
205
|
toolResultBudget: {
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
// Discord renders no GitHub-flavored Markdown tables — a `| a | b |` block
|
|
2
|
+
// shows up as literal pipes and dashes, so an agent reply that leans on a table
|
|
3
|
+
// (very common) becomes unreadable. Discord DOES preserve whitespace verbatim
|
|
4
|
+
// inside inline code spans, so we re-emit each table row as a single
|
|
5
|
+
// backtick-wrapped line with columns padded to a fixed width. Columns line up
|
|
6
|
+
// because every row is the same monospaced inline-code span. The header row is
|
|
7
|
+
// additionally wrapped in `**...**` so it reads as a bold caption above the body.
|
|
8
|
+
//
|
|
9
|
+
// This is a line-walker, not a Markdown parser: it only touches blocks that
|
|
10
|
+
// match the pipe-table shape (a `|`-bearing line followed by a `|---|` alignment
|
|
11
|
+
// row) and leaves every other byte — prose, code fences, lists — untouched.
|
|
12
|
+
|
|
13
|
+
const TABLE_SEP_RE = /^\s*\|?\s*:?-{3,}:?\s*(\|\s*:?-{3,}:?\s*)+\|?\s*$/
|
|
14
|
+
const FENCE_RE = /^(\s*)(```+|~~~+)(.*)$/
|
|
15
|
+
|
|
16
|
+
export function convertDiscordTables(input: string): string {
|
|
17
|
+
if (input === '') return ''
|
|
18
|
+
if (!input.includes('|')) return input
|
|
19
|
+
|
|
20
|
+
const lines = input.split('\n')
|
|
21
|
+
const out: string[] = []
|
|
22
|
+
let i = 0
|
|
23
|
+
let openFence: string | null = null
|
|
24
|
+
|
|
25
|
+
while (i < lines.length) {
|
|
26
|
+
const line = lines[i]!
|
|
27
|
+
|
|
28
|
+
// A code fence (``` / ~~~) suspends table detection until it closes — a
|
|
29
|
+
// table-shaped block inside a fence is literal text, not a table. The close
|
|
30
|
+
// must use the same fence char and be at least as long as the opener, per
|
|
31
|
+
// CommonMark.
|
|
32
|
+
const fence = FENCE_RE.exec(line)
|
|
33
|
+
if (fence !== null) {
|
|
34
|
+
const marker = fence[2]!
|
|
35
|
+
if (openFence === null) {
|
|
36
|
+
openFence = marker
|
|
37
|
+
} else if (marker[0] === openFence[0] && marker.length >= openFence.length) {
|
|
38
|
+
openFence = null
|
|
39
|
+
}
|
|
40
|
+
out.push(line)
|
|
41
|
+
i++
|
|
42
|
+
continue
|
|
43
|
+
}
|
|
44
|
+
if (openFence !== null) {
|
|
45
|
+
out.push(line)
|
|
46
|
+
i++
|
|
47
|
+
continue
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// A table needs a `|`-bearing header line immediately followed by the
|
|
51
|
+
// alignment row; same disambiguation rule chunkMarkdown uses so a stray
|
|
52
|
+
// leading `|` in prose is not mistaken for a table.
|
|
53
|
+
if (line.includes('|') && i + 1 < lines.length && TABLE_SEP_RE.test(lines[i + 1]!)) {
|
|
54
|
+
const start = i
|
|
55
|
+
i += 2
|
|
56
|
+
while (i < lines.length && lines[i]!.includes('|') && lines[i]!.trim() !== '') {
|
|
57
|
+
i++
|
|
58
|
+
}
|
|
59
|
+
const tableLines = lines.slice(start, i)
|
|
60
|
+
out.push(renderTable(tableLines))
|
|
61
|
+
continue
|
|
62
|
+
}
|
|
63
|
+
out.push(line)
|
|
64
|
+
i++
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
return out.join('\n')
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
function renderTable(tableLines: string[]): string {
|
|
71
|
+
const headerCells = splitRow(tableLines[0]!)
|
|
72
|
+
const bodyRows = tableLines.slice(2).map(splitRow)
|
|
73
|
+
const widths = computeWidths([headerCells, ...bodyRows])
|
|
74
|
+
|
|
75
|
+
const header = wrapCode(padRow(headerCells, widths))
|
|
76
|
+
const renderedRows = [`**${header}**`, ...bodyRows.map((cells) => wrapCode(padRow(cells, widths)))]
|
|
77
|
+
return renderedRows.join('\n')
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function splitRow(row: string): string[] {
|
|
81
|
+
// Trim one optional leading/trailing pipe, then split on the rest. A trailing
|
|
82
|
+
// backslash before a pipe escapes it, but GFM table escaping is rare in agent
|
|
83
|
+
// output — we keep it simple and split on bare pipes.
|
|
84
|
+
let trimmed = row.trim()
|
|
85
|
+
if (trimmed.startsWith('|')) trimmed = trimmed.slice(1)
|
|
86
|
+
if (trimmed.endsWith('|')) trimmed = trimmed.slice(0, -1)
|
|
87
|
+
return trimmed.split('|').map((cell) => cell.trim())
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function computeWidths(rows: string[][]): number[] {
|
|
91
|
+
const widths: number[] = []
|
|
92
|
+
for (const row of rows) {
|
|
93
|
+
for (let c = 0; c < row.length; c++) {
|
|
94
|
+
const cellWidth = displayWidth(row[c]!)
|
|
95
|
+
if (widths[c] === undefined || cellWidth > widths[c]!) {
|
|
96
|
+
widths[c] = cellWidth
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return widths
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function padRow(cells: string[], widths: number[]): string {
|
|
104
|
+
const padded = widths.map((width, c) => padToWidth(cells[c] ?? '', width))
|
|
105
|
+
// Two spaces between columns keeps them visually distinct inside the
|
|
106
|
+
// monospaced span without a vertical-bar separator.
|
|
107
|
+
return padded.join(' ')
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function padToWidth(cell: string, width: number): string {
|
|
111
|
+
const pad = width - displayWidth(cell)
|
|
112
|
+
return pad > 0 ? cell + ' '.repeat(pad) : cell
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Discord's monospaced inline-code font renders CJK ideographs, full-width
|
|
116
|
+
// punctuation, and most emoji at two columns, while combining/zero-width marks
|
|
117
|
+
// take none. `String.prototype.padEnd` counts UTF-16 code units, so padding by
|
|
118
|
+
// `.length` leaves wide-character tables visually ragged. We iterate by code
|
|
119
|
+
// point and sum per-glyph column widths so every cell pads to the same VISUAL
|
|
120
|
+
// width. The ranges below are the standard East-Asian-Wide / Wide blocks plus
|
|
121
|
+
// the common emoji planes; this is the same wcwidth approximation editors use.
|
|
122
|
+
export function displayWidth(text: string): number {
|
|
123
|
+
let width = 0
|
|
124
|
+
for (const ch of text) {
|
|
125
|
+
width += charWidth(ch.codePointAt(0)!)
|
|
126
|
+
}
|
|
127
|
+
return width
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function charWidth(cp: number): number {
|
|
131
|
+
if (isZeroWidth(cp)) return 0
|
|
132
|
+
if (isWide(cp)) return 2
|
|
133
|
+
return 1
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function isZeroWidth(cp: number): boolean {
|
|
137
|
+
return (
|
|
138
|
+
cp === 0x200b || // zero-width space
|
|
139
|
+
(cp >= 0x0300 && cp <= 0x036f) || // combining diacritical marks
|
|
140
|
+
(cp >= 0x200c && cp <= 0x200f) || // ZWNJ/ZWJ/directional marks
|
|
141
|
+
(cp >= 0xfe00 && cp <= 0xfe0f) // variation selectors
|
|
142
|
+
)
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function isWide(cp: number): boolean {
|
|
146
|
+
return (
|
|
147
|
+
(cp >= 0x1100 && cp <= 0x115f) || // Hangul Jamo
|
|
148
|
+
(cp >= 0x2e80 && cp <= 0x303e) || // CJK radicals, Kangxi
|
|
149
|
+
(cp >= 0x3041 && cp <= 0x33ff) || // Hiragana, Katakana, CJK symbols
|
|
150
|
+
(cp >= 0x3400 && cp <= 0x4dbf) || // CJK Ext A
|
|
151
|
+
(cp >= 0x4e00 && cp <= 0x9fff) || // CJK Unified Ideographs
|
|
152
|
+
(cp >= 0xa000 && cp <= 0xa4cf) || // Yi
|
|
153
|
+
(cp >= 0xac00 && cp <= 0xd7a3) || // Hangul Syllables
|
|
154
|
+
(cp >= 0xf900 && cp <= 0xfaff) || // CJK Compatibility Ideographs
|
|
155
|
+
(cp >= 0xfe30 && cp <= 0xfe4f) || // CJK Compatibility Forms
|
|
156
|
+
(cp >= 0xff00 && cp <= 0xff60) || // Fullwidth Forms
|
|
157
|
+
(cp >= 0xffe0 && cp <= 0xffe6) || // Fullwidth signs
|
|
158
|
+
(cp >= 0x2600 && cp <= 0x26ff) || // Miscellaneous Symbols (☀ ♻ ⚠ …)
|
|
159
|
+
(cp >= 0x2700 && cp <= 0x27bf) || // Dingbats (✅ ✔ ✨ ➡ …)
|
|
160
|
+
(cp >= 0x2b00 && cp <= 0x2bff) || // Misc Symbols and Arrows (⭐ …)
|
|
161
|
+
(cp >= 0x1f300 && cp <= 0x1faff) || // emoji, symbols, pictographs
|
|
162
|
+
(cp >= 0x20000 && cp <= 0x3fffd) // CJK Ext B+ (supplementary ideographic)
|
|
163
|
+
)
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// CommonMark inline code: the delimiter must be a backtick run LONGER than any
|
|
167
|
+
// run inside the content, otherwise an embedded `` ` `` (e.g. a cell holding
|
|
168
|
+
// `bun test`) closes the span early and corrupts the row. When the content
|
|
169
|
+
// begins or ends with a backtick, one space of padding is inserted on each side
|
|
170
|
+
// so the delimiter is not adjacent to a content backtick; CommonMark strips that
|
|
171
|
+
// single padding space on render, leaving our column widths intact.
|
|
172
|
+
function wrapCode(text: string): string {
|
|
173
|
+
const fence = '`'.repeat(longestBacktickRun(text) + 1)
|
|
174
|
+
const needsPad = text.startsWith('`') || text.endsWith('`')
|
|
175
|
+
const pad = needsPad ? ' ' : ''
|
|
176
|
+
return `${fence}${pad}${text}${pad}${fence}`
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function longestBacktickRun(text: string): number {
|
|
180
|
+
let longest = 0
|
|
181
|
+
let run = 0
|
|
182
|
+
for (const ch of text) {
|
|
183
|
+
if (ch === '`') {
|
|
184
|
+
run++
|
|
185
|
+
if (run > longest) longest = run
|
|
186
|
+
} else {
|
|
187
|
+
run = 0
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
return longest
|
|
191
|
+
}
|
|
@@ -39,6 +39,7 @@ import {
|
|
|
39
39
|
type InboundDropReason,
|
|
40
40
|
renderPlaceholder,
|
|
41
41
|
} from './discord-bot-classify'
|
|
42
|
+
import { convertDiscordTables } from './discord-bot-format'
|
|
42
43
|
import { createDiscordReactionCallback, createDiscordRemoveReactionCallback } from './discord-bot-reactions'
|
|
43
44
|
import { enrichDiscordMessageReferences } from './discord-bot-reference'
|
|
44
45
|
import {
|
|
@@ -647,7 +648,7 @@ export function createOutboundCallback(deps: {
|
|
|
647
648
|
if (msg.adapter !== 'discord-bot') {
|
|
648
649
|
return { ok: false, error: `unknown adapter: ${msg.adapter}` }
|
|
649
650
|
}
|
|
650
|
-
const text = msg.text ?? ''
|
|
651
|
+
const text = convertDiscordTables(msg.text ?? '')
|
|
651
652
|
const attachments = msg.attachments ?? []
|
|
652
653
|
if (text === '' && attachments.length === 0) {
|
|
653
654
|
return { ok: false, error: 'message has neither text nor attachments' }
|