typeclaw 0.30.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ import { removeRequestedReviewer } from './decoy-reviewer'
9
9
  import type { DeliveryDedup } from './dedup'
10
10
  import { isGithubEventAllowed } from './event-allowlist'
11
11
  import { encodeGithubReactionRef, type GithubReactionTarget } from './reactions'
12
+ import { fetchSelfReviewBlocking } from './review-state'
12
13
  import { listUnresolvedSelfReviewThreads } from './review-thread-resolver'
13
14
 
14
15
  export type GithubInboundLogger = { info: (m: string) => void; warn: (m: string) => void; error: (m: string) => void }
@@ -83,14 +84,16 @@ export function createGithubWebhookHandler(options: GithubWebhookHandlerOptions)
83
84
  }
84
85
 
85
86
  // A push to an open PR (`synchronize`) is not a message to react to — it is
86
- // a trigger to re-check whether the new commits addressed the bot's own
87
- // still-open review threads. The check needs a GraphQL round-trip, so it
88
- // runs OFF the ACK path (like the decoy-reviewer drop) and only wakes a
89
- // session when there is at least one such thread. Returning here also keeps
87
+ // a trigger to re-evaluate the bot's own outstanding review obligations on
88
+ // this PR: unresolved review threads it authored AND a sticky
89
+ // CHANGES_REQUESTED block (which leaves no threads when filed as a top-level
90
+ // verdict the black hole this path closes). Both need an API round-trip,
91
+ // so it runs OFF the ACK path (like the decoy-reviewer drop) and only wakes a
92
+ // session when an obligation is outstanding. Returning here also keeps
90
93
  // synchronize out of the generic awareness-only fallthrough below.
91
94
  if (event === 'pull_request' && action === 'synchronize') {
92
95
  if (delivery !== '') options.dedup.add(delivery)
93
- scheduleReviewThreadRecheck({ payload, selfLogin, options })
96
+ scheduleReviewFollowup({ payload, selfLogin, options })
94
97
  return ok()
95
98
  }
96
99
 
@@ -187,7 +190,7 @@ function defaultScheduleBackgroundTask(task: () => Promise<void>): void {
187
190
  void task().catch(() => {})
188
191
  }
189
192
 
190
- function scheduleReviewThreadRecheck(input: {
193
+ function scheduleReviewFollowup(input: {
191
194
  payload: Record<string, unknown>
192
195
  selfLogin: string | null
193
196
  options: GithubWebhookHandlerOptions
@@ -203,13 +206,27 @@ function scheduleReviewThreadRecheck(input: {
203
206
  if (repository === null || pullNumber === null) return
204
207
  const headSha = readString(readRecord(pr?.head), 'sha')
205
208
 
209
+ // Same webhook head SHA can arrive on several deliveries (a multi-commit push
210
+ // emits one synchronize per ref update). Dedup the follow-up on the head SHA
211
+ // so a single push wakes at most one re-review, distinct from the per-delivery
212
+ // dedup above. When headSha is absent we cannot dedup, so we skip the followup
213
+ // rather than risk a re-review storm.
214
+ if (headSha === null) {
215
+ options.logger.warn(`[github] synchronize for ${repository.owner}/${repository.name}#${pullNumber} has no head sha`)
216
+ return
217
+ }
218
+ const followupKey = `synchronize-followup:${repository.owner}/${repository.name}#${pullNumber}:${headSha}`
219
+ if (options.dedup.has(followupKey)) return
220
+ options.dedup.add(followupKey)
221
+
222
+ const reviewOn = options.reviewOn?.() ?? 'review_requested'
206
223
  const fetchImpl = options.fetchImpl ?? fetch
207
224
  const schedule = options.scheduleBackgroundTask ?? defaultScheduleBackgroundTask
208
225
  const target = `${repository.owner}/${repository.name}#${pullNumber}`
209
226
  schedule(async () => {
210
227
  try {
211
228
  const token = await authToken({ repoSlug: `${repository.owner}/${repository.name}` })
212
- const result = await listUnresolvedSelfReviewThreads({
229
+ const threads = await listUnresolvedSelfReviewThreads({
213
230
  token,
214
231
  selfLogin,
215
232
  owner: repository.owner,
@@ -217,46 +234,63 @@ function scheduleReviewThreadRecheck(input: {
217
234
  prNumber: pullNumber,
218
235
  fetchImpl,
219
236
  })
220
- if (!result.ok) {
221
- options.logger.warn(`[github] review-thread recheck failed for ${target}: ${result.error}`)
237
+ if (!threads.ok) {
238
+ options.logger.warn(`[github] review-thread recheck failed for ${target}: ${threads.error}`)
222
239
  return
223
240
  }
224
- if (result.threads.length === 0) return
241
+
242
+ // A held CHANGES_REQUESTED is the bot's own obligation regardless of how
243
+ // reviews are triggered, so re-evaluate it on push unless review is off.
244
+ let selfBlocking = false
245
+ if (reviewOn !== 'off') {
246
+ const blocking = await fetchSelfReviewBlocking({
247
+ token,
248
+ selfLogin,
249
+ owner: repository.owner,
250
+ repo: repository.name,
251
+ prNumber: pullNumber,
252
+ fetchImpl,
253
+ })
254
+ if (blocking.ok) selfBlocking = blocking.selfBlocking
255
+ else options.logger.warn(`[github] review-state recheck failed for ${target}: ${blocking.error}`)
256
+ }
257
+
258
+ const rootCommentIds = threads.threads.map((t) => t.rootCommentId)
259
+ if (rootCommentIds.length === 0 && !selfBlocking) return
225
260
  options.route(
226
- buildRecheckInbound({
227
- repository,
228
- pullNumber,
229
- headSha,
230
- rootCommentIds: result.threads.map((t) => t.rootCommentId),
231
- title: readString(pr, 'title'),
232
- }),
261
+ withApprovalPolicy(
262
+ buildReviewFollowupInbound({
263
+ repository,
264
+ pullNumber,
265
+ headSha,
266
+ rootCommentIds,
267
+ selfBlocking,
268
+ title: readString(pr, 'title'),
269
+ }),
270
+ options.allowApprove?.() ?? true,
271
+ ),
233
272
  )
234
273
  } catch (err) {
235
274
  options.logger.warn(
236
- `[github] review-thread recheck failed for ${target}: ${err instanceof Error ? err.message : String(err)}`,
275
+ `[github] review followup failed for ${target}: ${err instanceof Error ? err.message : String(err)}`,
237
276
  )
238
277
  }
239
278
  })
240
279
  }
241
280
 
242
- function buildRecheckInbound(input: {
281
+ function buildReviewFollowupInbound(input: {
243
282
  repository: { owner: string; name: string }
244
283
  pullNumber: number
245
- headSha: string | null
284
+ headSha: string
246
285
  rootCommentIds: readonly number[]
286
+ selfBlocking: boolean
247
287
  title: string | null
248
288
  }): InboundMessage {
249
- const { repository, pullNumber, headSha, rootCommentIds, title } = input
289
+ const { repository, pullNumber, headSha, rootCommentIds, selfBlocking, title } = input
250
290
  const titleSegment = title !== null && title.trim() !== '' ? `: "${title}"` : ''
251
- const shaSegment = headSha !== null ? ` (now at ${headSha.slice(0, 7)})` : ''
252
- const idList = rootCommentIds.join(', ')
253
291
  const text =
254
- `PR #${pullNumber}${titleSegment} received new commits${shaSegment}. ` +
255
- `You have ${rootCommentIds.length} unresolved review thread(s) you authored on this PR ` +
256
- `(root comment id(s): ${idList}). For each, check whether the new commits addressed your ` +
257
- `concern. If addressed, reply on that thread via channel_send with a short acknowledgement ` +
258
- `and resolve_review_thread: true (the thread id is the root comment id). If not addressed, ` +
259
- `leave it open. If none are addressed, end your turn without replying.`
292
+ `PR #${pullNumber}${titleSegment} received new commits (now at ${headSha.slice(0, 7)}). ` +
293
+ followupInstruction(rootCommentIds, selfBlocking)
260
294
 
261
295
  return {
262
296
  adapter: 'github',
@@ -264,7 +298,7 @@ function buildRecheckInbound(input: {
264
298
  chat: `pr:${pullNumber}`,
265
299
  thread: null,
266
300
  text,
267
- externalMessageId: `pr-${pullNumber}-recheck-${headSha ?? 'unknown'}`,
301
+ externalMessageId: `pr-${pullNumber}-recheck-${headSha}`,
268
302
  authorId: 'github-system',
269
303
  authorName: 'github',
270
304
  authorIsBot: false,
@@ -277,6 +311,30 @@ function buildRecheckInbound(input: {
277
311
  }
278
312
  }
279
313
 
314
+ function followupInstruction(rootCommentIds: readonly number[], selfBlocking: boolean): string {
315
+ const threadPart =
316
+ rootCommentIds.length > 0
317
+ ? `You have ${rootCommentIds.length} unresolved review thread(s) you authored on this PR ` +
318
+ `(root comment id(s): ${rootCommentIds.join(', ')}). For each, check whether the new commits ` +
319
+ `addressed your concern. If addressed, reply on that thread via channel_send with a short ` +
320
+ `acknowledgement and resolve_review_thread: true (the thread id is the root comment id); ` +
321
+ `if not, leave it open. `
322
+ : ''
323
+ // A held CHANGES_REQUESTED never clears itself: GitHub keeps the block until a
324
+ // fresh APPROVE/COMMENT/dismiss, so a blocking follow-up must always end with a
325
+ // submitted verdict — the "end without replying" escape hatch is reserved for
326
+ // the thread-only path, where leaving every thread open is a valid no-op.
327
+ const blockingPart = selfBlocking
328
+ ? `Your latest review on this PR is still CHANGES_REQUESTED, which keeps the PR blocked until you ` +
329
+ `submit a fresh review. Re-review the current head against the concerns from that blocking review ` +
330
+ `and always end with a new verdict: if the commits resolve your concerns, submit an APPROVE ` +
331
+ `(or COMMENT if approval is disabled) to clear the block; if concerns remain, submit a new ` +
332
+ `CHANGES_REQUESTED explaining what is still blocking. `
333
+ : ''
334
+ const tail = selfBlocking ? '' : 'If none are addressed, end your turn without replying.'
335
+ return `${threadPart}${blockingPart}${tail}`
336
+ }
337
+
280
338
  export async function verifySignature(body: string, secret: string, sigHeader: string): Promise<boolean> {
281
339
  const expected = `sha256=${createHmac('sha256', secret).update(body).digest('hex')}`
282
340
  const a = Buffer.from(expected)
@@ -48,6 +48,33 @@ export function createGithubReviewStateResolver(deps: {
48
48
  }
49
49
  }
50
50
 
51
+ export type SelfReviewBlockingResult =
52
+ | { ok: true; selfBlocking: boolean }
53
+ | { ok: false; error: string; code: 'not-found' | 'permission-denied' | 'transient' }
54
+
55
+ // Last DECISIVE self review == CHANGES_REQUESTED? (COMMENTED/PENDING ignored, as
56
+ // in createGithubReviewStateResolver.) Standalone so the synchronize follow-up
57
+ // skips the reviewDecision round-trip the stranding guard needs but this doesn't.
58
+ export async function fetchSelfReviewBlocking(deps: {
59
+ token: string
60
+ selfLogin: string
61
+ owner: string
62
+ repo: string
63
+ prNumber: number
64
+ fetchImpl?: typeof fetch
65
+ }): Promise<SelfReviewBlockingResult> {
66
+ const fetchImpl = deps.fetchImpl ?? fetch
67
+ const reviews = await fetchSelfReviews(
68
+ fetchImpl,
69
+ deps.token,
70
+ { owner: deps.owner, repo: deps.repo, prNumber: deps.prNumber },
71
+ deps.selfLogin,
72
+ )
73
+ if (!reviews.ok) return { ok: false, error: reviews.error, code: reviews.code }
74
+ const lastDecisive = reviews.states.filter(isDecisive).at(-1) ?? null
75
+ return { ok: true, selfBlocking: lastDecisive === 'CHANGES_REQUESTED' }
76
+ }
77
+
51
78
  type Target = { owner: string; repo: string; prNumber: number }
52
79
 
53
80
  function parseTarget(workspace: string, chat: string): Target | null {
@@ -3,9 +3,34 @@ export type OutboundFloodCheckResult = { ok: true } | { ok: false; reason: strin
3
3
  const MIN_LENGTH = 40
4
4
  const MAX_RUN = 30
5
5
  const MIN_LONG_LENGTH = 80
6
- const MIN_UNIQUE_RATIO = 0.05
7
6
  const MAX_DOMINANCE = 0.9
8
7
 
8
+ // Contiguous-span detector for multi-character floods ("lollol...", "ababab...",
9
+ // repeated emoji pairs) — including a flood body buried inside otherwise-varied
10
+ // text, which a whole-message periodicity test misses. Strict equality (no
11
+ // mismatch budget) and a large span floor keep it clear of incidental prose
12
+ // repetition ("---", "....", "hahaha", code indentation, table separators).
13
+ const MAX_REPEATING_PERIOD = 32
14
+ // Span floor is deliberately a flood boundary, not a "never-deny" guarantee: it
15
+ // catches obvious short-period floods like "ab".repeat(300) (600 chars) and
16
+ // "lol".repeat(300) (900). Hundreds of byte-identical rows or box-art lines also
17
+ // trip it — that output is information-poor and flood-like, and raising the floor
18
+ // to clear it would let those real floods through. Tables/diagrams with varying
19
+ // cells break periodicity and pass.
20
+ const MIN_PERIODIC_SPAN = 384
21
+ const MIN_PERIODIC_REPETITIONS = 24
22
+
23
+ // Narrow last resort: structured text (code, tables, logs) is often lower-
24
+ // entropy than prose, so this only fires on a tiny alphabet at real length.
25
+ const MIN_ENTROPY_LENGTH = 200
26
+ const MAX_TINY_ALPHABET_SIZE = 4
27
+ const VERY_LOW_ENTROPY_BITS = 1.25
28
+
29
+ // Replaces the old `uniqueRatio = distinctChars / length` gate, which was
30
+ // length-coupled: natural language draws from a fixed alphabet, so any reply
31
+ // past ~(alphabet/0.05) chars failed it regardless of variety — a 2.9KB
32
+ // markdown report was silently dropped. Every check below is bounded-run or
33
+ // length-independent, so length alone never makes a reply look like a flood.
9
34
  export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
10
35
  if (text.length < MIN_LENGTH) return { ok: true }
11
36
 
@@ -18,12 +43,18 @@ export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
18
43
  if (graphemes.length < MIN_LONG_LENGTH) return { ok: true }
19
44
 
20
45
  const counts = countGraphemes(graphemes)
21
- const uniqueRatio = counts.size / graphemes.length
22
- if (uniqueRatio < MIN_UNIQUE_RATIO) return { ok: false, reason: `low-unique-ratio:${uniqueRatio.toFixed(3)}` }
23
46
 
24
47
  const dominance = maxValue(counts) / graphemes.length
25
48
  if (dominance > MAX_DOMINANCE) return { ok: false, reason: `char-dominance:${dominance.toFixed(2)}` }
26
49
 
50
+ const span = findLongestPeriodicSpan(graphemes)
51
+ if (span !== undefined) return { ok: false, reason: `repeated-pattern-span:${span.period}:${span.spanLength}` }
52
+
53
+ if (graphemes.length >= MIN_ENTROPY_LENGTH && counts.size <= MAX_TINY_ALPHABET_SIZE) {
54
+ const entropy = shannonEntropyBitsPerGrapheme(counts, graphemes.length)
55
+ if (entropy < VERY_LOW_ENTROPY_BITS) return { ok: false, reason: `low-entropy:${entropy.toFixed(2)}` }
56
+ }
57
+
27
58
  return { ok: true }
28
59
  }
29
60
 
@@ -42,6 +73,42 @@ function findLongestRun(graphemes: readonly string[]): number {
42
73
  return longest
43
74
  }
44
75
 
76
+ // Longest contiguous span (in graphemes) that is exactly periodic at some
77
+ // period 2..32, or undefined when no span clears the flood floor. Period 1 is
78
+ // left to the run check above. A span must reach MIN_PERIODIC_SPAN graphemes
79
+ // AND repeat its unit MIN_PERIODIC_REPETITIONS times — the larger bound wins,
80
+ // so a 32-period unit needs 768 graphemes, not three echoes of a 32-char line.
81
+ function findLongestPeriodicSpan(graphemes: readonly string[]): { period: number; spanLength: number } | undefined {
82
+ const maxPeriod = Math.min(MAX_REPEATING_PERIOD, Math.floor(graphemes.length / MIN_PERIODIC_REPETITIONS))
83
+ let best: { period: number; spanLength: number } | undefined
84
+ for (let period = 2; period <= maxPeriod; period++) {
85
+ let matches = 0
86
+ let longestForPeriod = 0
87
+ for (let i = period; i < graphemes.length; i++) {
88
+ if (graphemes[i] === graphemes[i - period]) {
89
+ matches++
90
+ const spanLength = matches + period
91
+ if (spanLength > longestForPeriod) longestForPeriod = spanLength
92
+ } else {
93
+ matches = 0
94
+ }
95
+ }
96
+ const requiredSpan = Math.max(MIN_PERIODIC_SPAN, period * MIN_PERIODIC_REPETITIONS)
97
+ if (longestForPeriod < requiredSpan) continue
98
+ if (best === undefined || longestForPeriod > best.spanLength) best = { period, spanLength: longestForPeriod }
99
+ }
100
+ return best
101
+ }
102
+
103
+ function shannonEntropyBitsPerGrapheme(counts: Map<string, number>, length: number): number {
104
+ let entropy = 0
105
+ for (const count of counts.values()) {
106
+ const probability = count / length
107
+ entropy -= probability * Math.log2(probability)
108
+ }
109
+ return entropy
110
+ }
111
+
45
112
  function countGraphemes(graphemes: readonly string[]): Map<string, number> {
46
113
  const counts = new Map<string, number>()
47
114
  for (const grapheme of graphemes) counts.set(grapheme, (counts.get(grapheme) ?? 0) + 1)
@@ -183,6 +183,18 @@ export const MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN = 3
183
183
  // including reasoning). Deliberately NOT lowered in `providers.ts`, where
184
184
  // `maxTokens` is the model's true capability that compaction math reads.
185
185
  export const CHANNEL_MAX_OUTPUT_TOKENS = 4096
186
+ // Raised output-token budget threaded into the ONE re-prompt that follows a
187
+ // `stopReason:'length'` empty turn. The default 4096 backstop bounds kimi's
188
+ // degenerate repetition loop, but it is the same ceiling a *legitimate*
189
+ // reasoning-heavy turn hits when it spends the whole pool thinking and emits no
190
+ // prose — re-prompting under the identical cap reproduces the truncation. A
191
+ // `length` truncation that the byte-identical loop guard did NOT catch is
192
+ // evidence of genuine reasoning starved for room, not a repetition loop, so the
193
+ // retry grants 4x headroom for thinking + a reply. Bounded (not 32000) so a
194
+ // turn that IS looping still can't burn the full pi-ai default. Consumed
195
+ // one-shot via `LiveSession.nextPromptMaxTokens`, then reset at the next real
196
+ // user turn so the raised budget never leaks past the turn that needed it.
197
+ export const CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS = 16384
186
198
  // Ceiling on automatic re-prompts for a turn that ended with NO user-facing
187
199
  // reply AND no attempted send — the pure "the model burned its budget thinking
188
200
  // and produced nothing" failure. The canonical trigger is Fireworks'
@@ -200,18 +212,24 @@ export const CHANNEL_MAX_OUTPUT_TOKENS = 4096
200
212
  export const MAX_EMPTY_TURN_RETRIES = 2
201
213
  // Reminder-only nudge injected before an empty-turn retry. Uses the repo's
202
214
  // SYSTEM MESSAGE framing (see composeTurnPrompt) so persona-rich models do not
203
- // reply to the notice itself. Neutral by design: it asks for a direct reply
204
- // without prescribing length or tone, matching the chosen "just retry" posture.
215
+ // reply to the notice itself. Names the actual failure (the prior turn ran out
216
+ // of its output budget mid-reasoning and produced no reply) and asks the model
217
+ // to keep its thinking short and answer directly — the empty turn was budget
218
+ // exhaustion, not a forgotten tool call, so a "reply directly" nudge alone
219
+ // would re-loop. The matching retry re-prompt also runs with a raised budget
220
+ // (CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS) so the room actually exists.
205
221
  export const EMPTY_TURN_RETRY_NUDGE = [
206
222
  '---',
207
223
  '**[SYSTEM MESSAGE — not from a human]**',
208
224
  '',
209
- 'Your previous turn ended without sending any reply to the channel. This is',
225
+ 'Your previous turn ran out of its output budget before sending a reply — it',
226
+ 'spent the whole turn thinking and produced nothing for the channel. This is',
210
227
  'an automated signal from the channel router, not a message from anyone in',
211
228
  'the chat. **Do not acknowledge or reply to this notice itself.**',
212
229
  '',
213
- 'Respond to the last user message now with a direct answer via your channel',
214
- 'reply tool. If you genuinely have nothing to say, reply with `NO_REPLY`.',
230
+ 'Answer the last user message now: keep any reasoning brief and send a direct',
231
+ 'reply via your channel reply tool. If you genuinely have nothing to say,',
232
+ 'reply with `NO_REPLY`.',
215
233
  '',
216
234
  '---',
217
235
  ].join('\n')
@@ -532,6 +550,13 @@ type LiveSession = {
532
550
  // increments it before injecting EMPTY_TURN_RETRY_NUDGE and reads it to decide
533
551
  // retry-vs-fallback. See the candidate===null branch.
534
552
  emptyTurnRetries: number
553
+ // One-shot output-token budget for the NEXT `session.prompt()` only.
554
+ // `installChannelOutputCap` reads and clears it per stream call, so it
555
+ // overrides the default backstop for exactly one re-prompt. Set by the
556
+ // empty-turn length-retry branch to CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS
557
+ // and reset to undefined at each fresh user turn so the raised budget cannot
558
+ // leak past the turn that needed it.
559
+ nextPromptMaxTokens: number | undefined
535
560
  // Stamped by `markTurnSkipped` (called from the `skip_response` tool)
536
561
  // with the current `turnSeq`. Read at the top of `validateChannelTurn`:
537
562
  // if it matches the just-completed turn, recovery is skipped entirely
@@ -1417,6 +1442,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1417
1442
  inFlightToolSends: new Map(),
1418
1443
  policyDeniedToolSendsThisTurn: new Map(),
1419
1444
  emptyTurnRetries: 0,
1445
+ nextPromptMaxTokens: undefined,
1420
1446
  skippedTurn: null,
1421
1447
  skipLockedSendTurn: null,
1422
1448
  pendingQuoteCandidate: null,
@@ -1704,14 +1730,22 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1704
1730
  // Override pi-ai's hidden `Math.min(model.maxTokens, 32000)` output cap for
1705
1731
  // channel sessions by threading an explicit `maxTokens` into every stream
1706
1732
  // call. See CHANNEL_MAX_OUTPUT_TOKENS for why. Composes the existing streamFn
1707
- // (pi's default `streamSimple` unless a proxy was installed) and only fills
1708
- // `maxTokens` when the caller left it unset, so an explicit per-call value
1709
- // still wins.
1733
+ // (pi's default `streamSimple` unless a proxy was installed). Precedence:
1734
+ // an explicit per-call `maxTokens` always wins; otherwise a one-shot
1735
+ // `live.nextPromptMaxTokens` (set by the empty-turn length-retry) is consumed
1736
+ // and cleared so the raised budget applies to exactly one stream call;
1737
+ // otherwise the default backstop.
1710
1738
  const installChannelOutputCap = (live: LiveSession): void => {
1711
1739
  const { agent } = live.session
1712
1740
  const inner = agent.streamFn
1713
- agent.streamFn = (model, context, options) =>
1714
- inner(model, context, { ...options, maxTokens: options?.maxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS })
1741
+ agent.streamFn = (model, context, options) => {
1742
+ let maxTokens = options?.maxTokens
1743
+ if (maxTokens === undefined && live.nextPromptMaxTokens !== undefined) {
1744
+ maxTokens = live.nextPromptMaxTokens
1745
+ live.nextPromptMaxTokens = undefined
1746
+ }
1747
+ return inner(model, context, { ...options, maxTokens: maxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS })
1748
+ }
1715
1749
  }
1716
1750
 
1717
1751
  const startTypingHeartbeat = (live: LiveSession): void => {
@@ -1904,10 +1938,13 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
1904
1938
  live.lastSentText.clear()
1905
1939
  live.pendingQuoteCandidate = captureQuoteCandidate(live.key.adapter, batch, observed)
1906
1940
  // A real user batch starts a fresh logical turn → restore the full
1907
- // empty-turn retry budget. Reset here (batch.length > 0) and NOT in
1908
- // the per-prompt block below, so the reminder-only iterations the
1909
- // retry itself queues do not refill the budget and loop forever.
1941
+ // empty-turn retry budget and drop any raised output-token budget left
1942
+ // over from a prior turn's length-retry. Reset here (batch.length > 0)
1943
+ // and NOT in the per-prompt block below, so the reminder-only
1944
+ // iterations the retry itself queues do not refill the budget and loop
1945
+ // forever (and the raised cap stays scoped to the turn that set it).
1910
1946
  live.emptyTurnRetries = 0
1947
+ live.nextPromptMaxTokens = undefined
1911
1948
  } else if (live.lastTurnAuthorId !== null) {
1912
1949
  live.currentTurnEngageReactions = []
1913
1950
  // Reminder-only turn (batch.length === 0, reminders.length > 0):
@@ -3037,8 +3074,18 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
3037
3074
  }
3038
3075
  if (!attemptedSendThisTurn && live.emptyTurnRetries < MAX_EMPTY_TURN_RETRIES) {
3039
3076
  live.emptyTurnRetries++
3077
+ // Raise the re-prompt's budget ONLY for a `length` truncation: that is
3078
+ // the budget-exhaustion case (reasoning ate the whole pool before any
3079
+ // prose), so the retry needs room to finish thinking AND reply. `error`
3080
+ // and `aborted` are not budget exhaustion — an upstream failure or the
3081
+ // terminal-reply abort — so they retry under the default backstop.
3082
+ // Consumed one-shot by installChannelOutputCap on the next prompt().
3083
+ if (assistantLeafStopReason(live.session) === 'length') {
3084
+ live.nextPromptMaxTokens = CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS
3085
+ }
3040
3086
  logger.warn(
3041
- `[channels] ${live.keyId} empty_turn_retry attempt=${live.emptyTurnRetries}/${MAX_EMPTY_TURN_RETRIES}`,
3087
+ `[channels] ${live.keyId} empty_turn_retry attempt=${live.emptyTurnRetries}/${MAX_EMPTY_TURN_RETRIES} ` +
3088
+ `max_tokens=${live.nextPromptMaxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS}`,
3042
3089
  )
3043
3090
  live.pendingSystemReminders.push(EMPTY_TURN_RETRY_NUDGE)
3044
3091
  return
@@ -4355,18 +4402,25 @@ function recoverableAssistantText(
4355
4402
  return null
4356
4403
  }
4357
4404
 
4358
- // True only when the leaf is an assistant message that was CUT OFF mid-output:
4359
- // `length` (hit the token cap the canonical kimi reasoning-loop), `error`, or
4360
- // `aborted`. This is the precise signature of "the model was producing but got
4361
- // truncated", as distinct from a turn that produced no assistant message at all
4362
- // (leaf undefined / a non-assistant entry), which is a benign empty/cold turn —
4363
- // NOT something to re-prompt. The empty-turn retry guard keys off this so it
4364
- // fires for real degenerations and stays silent for cold sessions.
4365
- function assistantLeafTruncated(session: AgentSession): boolean {
4405
+ // The truncation stop reason when the leaf is an assistant message that was CUT
4406
+ // OFF mid-output — `length` (hit the token cap, the canonical kimi reasoning-
4407
+ // loop), `error`, or `aborted` — else undefined. This is the precise signature
4408
+ // of "the model was producing but got truncated", as distinct from a turn that
4409
+ // produced no assistant message at all (leaf undefined / a non-assistant
4410
+ // entry), which is a benign empty/cold turn. Callers that only need the boolean
4411
+ // use `assistantLeafTruncated`; the retry guard reads the reason itself because
4412
+ // the raised reasoning budget is justified ONLY for `length` (budget
4413
+ // exhaustion), not for `error`/`aborted`.
4414
+ function assistantLeafStopReason(session: AgentSession): 'length' | 'error' | 'aborted' | undefined {
4366
4415
  const leaf = session.sessionManager.getLeafEntry()
4367
- if (!leaf || leaf.type !== 'message' || leaf.message.role !== 'assistant') return false
4416
+ if (!leaf || leaf.type !== 'message' || leaf.message.role !== 'assistant') return undefined
4368
4417
  const stop = leaf.message.stopReason
4369
- return stop === 'length' || stop === 'error' || stop === 'aborted'
4418
+ if (stop === 'length' || stop === 'error' || stop === 'aborted') return stop
4419
+ return undefined
4420
+ }
4421
+
4422
+ function assistantLeafTruncated(session: AgentSession): boolean {
4423
+ return assistantLeafStopReason(session) !== undefined
4370
4424
  }
4371
4425
 
4372
4426
  function visibleAssistantText(message: AssistantMessage): string {
@@ -1,6 +1,7 @@
1
1
  import { readdirSync } from 'node:fs'
2
2
  import { join, resolve } from 'node:path'
3
3
 
4
+ import { loadConfigSyncOrDefaults } from '@/config'
4
5
  import { containerNameFromCwd } from '@/container'
5
6
  import { isInitialized } from '@/init'
6
7
 
@@ -17,7 +18,9 @@ export type AgentEntry = {
17
18
  //
18
19
  // Underscore-prefixed names are also skipped so operators can park a disabled
19
20
  // or in-progress agent next to live ones (e.g. `_archived-coder/`) without
20
- // compose touching it.
21
+ // compose touching it. Agents with `compose.exclude: true` in typeclaw.json
22
+ // are skipped too — the in-config opt-out for operators who don't want to rename
23
+ // the folder.
21
24
  //
22
25
  // Returns an empty array when rootCwd doesn't exist or is empty — discovery is
23
26
  // not the place to fail; the caller decides what to do with zero agents.
@@ -40,6 +43,7 @@ export function discoverAgents(rootCwd: string): AgentEntry[] {
40
43
  if (entry.name.startsWith('_')) continue
41
44
  const cwd = join(root, entry.name)
42
45
  if (!isInitialized(cwd)) continue
46
+ if (loadConfigSyncOrDefaults(cwd).compose.exclude) continue
43
47
  agents.push({ name: entry.name, cwd, containerName: containerNameFromCwd(cwd) })
44
48
  }
45
49
 
@@ -338,6 +338,39 @@ export const networkSchema = z
338
338
 
339
339
  export type NetworkConfig = z.infer<typeof networkSchema>
340
340
 
341
+ // `realProc` opts the per-tool bwrap sandbox into the 'real-proc' strategy
342
+ // (src/sandbox/build.ts): a fresh procfs scoped to a new PID namespace so
343
+ // external-package runners (`bunx`, `bun add <pkg>`, `bun run <pkg-bin>`) get a
344
+ // working /proc/self/{fd,maps} and stop aborting with Bun's "NotDir". Default
345
+ // `false` keeps the universally-portable '--tmpfs /proc' profile, under which
346
+ // sandboxed external-package execution is unsupported by design. Turning it on
347
+ // makes `typeclaw start` grant the container CAP_SYS_ADMIN (required to mount
348
+ // proc for the new PID namespace), which is a deliberate posture change on the
349
+ // single-tenant outer boundary — see docs/internals/sandbox.mdx. PID isolation
350
+ // and the /proc/N/environ leak guard are both preserved; the trade is the
351
+ // CAP_SYS_ADMIN grant, not sandbox strength.
352
+ export const sandboxSchema = z
353
+ .object({
354
+ realProc: z.boolean().default(false),
355
+ })
356
+ .default({ realProc: false })
357
+
358
+ export type SandboxConfig = z.infer<typeof sandboxSchema>
359
+
360
+ // Host-stage `typeclaw compose` knobs. `exclude: true` skips this agent during
361
+ // compose discovery (same effect as parking it under an `_`-prefixed dir, but
362
+ // without renaming the folder). The container never reads this block — it's a
363
+ // pure compose CLI hint, so omitting it keeps the agent in every compose
364
+ // operation. Namespaced under `compose` so future compose-only settings have a
365
+ // home without crowding the top level.
366
+ export const composeSchema = z
367
+ .object({
368
+ exclude: z.boolean().default(false),
369
+ })
370
+ .default({ exclude: false })
371
+
372
+ export type ComposeConfig = z.infer<typeof composeSchema>
373
+
341
374
  // Reverse-proxy tunnels expose a container-private port to the public internet
342
375
  // via a managed subprocess (cloudflared) or a user-supplied external URL.
343
376
  // See AGENTS.md `## Tunnels`. Keeping the enum scoped to what's implemented
@@ -490,9 +523,11 @@ export const configSchema = z
490
523
  // time. Defaults to `[]`. Hatching appends the agent's chosen name
491
524
  // here, so a freshly-hatched bot already has its identity wired up.
492
525
  alias: z.array(z.string().trim().min(1)).default([]),
526
+ compose: composeSchema,
493
527
  channels: channelsSchema,
494
528
  portForward: portForwardSchema,
495
529
  network: networkSchema,
530
+ sandbox: sandboxSchema,
496
531
  docker: dockerSchema,
497
532
  git: gitSchema,
498
533
  roles: rolesConfigSchema.optional(),
@@ -632,9 +667,11 @@ export const FIELD_EFFECTS: Record<string, FieldEffect> = {
632
667
  mcpServers: 'restart-required',
633
668
  plugins: 'restart-required',
634
669
  alias: 'applied',
670
+ compose: 'ignored',
635
671
  channels: 'applied',
636
672
  portForward: 'restart-required',
637
673
  network: 'restart-required',
674
+ sandbox: 'restart-required',
638
675
  tunnels: 'restart-required',
639
676
  'docker.file': 'restart-required',
640
677
  'git.ignore': 'restart-required',
@@ -723,6 +760,7 @@ export function extractPluginConfigs(raw: unknown): Record<string, unknown> {
723
760
  'mounts',
724
761
  'plugins',
725
762
  'alias',
763
+ 'compose',
726
764
  'channels',
727
765
  'portForward',
728
766
  'network',
@@ -514,6 +514,20 @@ export async function planStart({
514
514
  }
515
515
  }
516
516
 
517
+ // sandbox.realProc opts the per-tool bwrap sandbox into the 'real-proc'
518
+ // strategy (src/sandbox/build.ts), which prefixes the sandbox with
519
+ // `unshare --pid --fork --mount --mount-proc`. Mounting a fresh procfs for the
520
+ // new PID namespace needs real CAP_SYS_ADMIN — seccomp=unconfined alone is not
521
+ // enough (it only unblocks the unshare/clone SYSCALLS; the kernel still
522
+ // rejects mount(2) of proc without the capability). This is the deliberate
523
+ // posture change documented in docs/internals/sandbox.mdx: the default keeps
524
+ // the narrower seccomp-only profile, and the operator grants the broad
525
+ // "new root" capability ONLY by opting into real-proc. Placed before the
526
+ // image tag (like --cap-add=NET_ADMIN) so docker applies it at run time.
527
+ if (cfg.sandbox.realProc) {
528
+ runArgs.push('--cap-add=SYS_ADMIN')
529
+ }
530
+
517
531
  if (hostdControl) {
518
532
  runArgs.push('--add-host', HOST_GATEWAY_ALIAS)
519
533
  }