typeclaw 0.30.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/verify-realproc-sandbox.sh +58 -0
- package/src/agent/plugin-tools.ts +13 -0
- package/src/agent/system-prompt.ts +1 -1
- package/src/bundled-plugins/github-cli-auth/approve-idempotency.ts +113 -52
- package/src/bundled-plugins/github-cli-auth/effective-approval.ts +14 -9
- package/src/bundled-plugins/github-cli-auth/index.ts +3 -3
- package/src/channels/adapters/discord-bot-format.ts +191 -0
- package/src/channels/adapters/discord-bot.ts +2 -1
- package/src/channels/adapters/github/inbound.ts +88 -30
- package/src/channels/adapters/github/review-state.ts +27 -0
- package/src/channels/outbound-flood-filter.ts +70 -3
- package/src/channels/router.ts +78 -24
- package/src/compose/discover.ts +5 -1
- package/src/config/config.ts +38 -0
- package/src/container/start.ts +14 -0
- package/src/sandbox/build.ts +41 -9
- package/src/sandbox/policy.ts +9 -1
- package/src/skills/typeclaw-markdown-pdf/SKILL.md +327 -0
- package/typeclaw.schema.json +24 -0
|
@@ -9,6 +9,7 @@ import { removeRequestedReviewer } from './decoy-reviewer'
|
|
|
9
9
|
import type { DeliveryDedup } from './dedup'
|
|
10
10
|
import { isGithubEventAllowed } from './event-allowlist'
|
|
11
11
|
import { encodeGithubReactionRef, type GithubReactionTarget } from './reactions'
|
|
12
|
+
import { fetchSelfReviewBlocking } from './review-state'
|
|
12
13
|
import { listUnresolvedSelfReviewThreads } from './review-thread-resolver'
|
|
13
14
|
|
|
14
15
|
export type GithubInboundLogger = { info: (m: string) => void; warn: (m: string) => void; error: (m: string) => void }
|
|
@@ -83,14 +84,16 @@ export function createGithubWebhookHandler(options: GithubWebhookHandlerOptions)
|
|
|
83
84
|
}
|
|
84
85
|
|
|
85
86
|
// A push to an open PR (`synchronize`) is not a message to react to — it is
|
|
86
|
-
// a trigger to re-
|
|
87
|
-
//
|
|
88
|
-
//
|
|
89
|
-
//
|
|
87
|
+
// a trigger to re-evaluate the bot's own outstanding review obligations on
|
|
88
|
+
// this PR: unresolved review threads it authored AND a sticky
|
|
89
|
+
// CHANGES_REQUESTED block (which leaves no threads when filed as a top-level
|
|
90
|
+
// verdict — the black hole this path closes). Both need an API round-trip,
|
|
91
|
+
// so it runs OFF the ACK path (like the decoy-reviewer drop) and only wakes a
|
|
92
|
+
// session when an obligation is outstanding. Returning here also keeps
|
|
90
93
|
// synchronize out of the generic awareness-only fallthrough below.
|
|
91
94
|
if (event === 'pull_request' && action === 'synchronize') {
|
|
92
95
|
if (delivery !== '') options.dedup.add(delivery)
|
|
93
|
-
|
|
96
|
+
scheduleReviewFollowup({ payload, selfLogin, options })
|
|
94
97
|
return ok()
|
|
95
98
|
}
|
|
96
99
|
|
|
@@ -187,7 +190,7 @@ function defaultScheduleBackgroundTask(task: () => Promise<void>): void {
|
|
|
187
190
|
void task().catch(() => {})
|
|
188
191
|
}
|
|
189
192
|
|
|
190
|
-
function
|
|
193
|
+
function scheduleReviewFollowup(input: {
|
|
191
194
|
payload: Record<string, unknown>
|
|
192
195
|
selfLogin: string | null
|
|
193
196
|
options: GithubWebhookHandlerOptions
|
|
@@ -203,13 +206,27 @@ function scheduleReviewThreadRecheck(input: {
|
|
|
203
206
|
if (repository === null || pullNumber === null) return
|
|
204
207
|
const headSha = readString(readRecord(pr?.head), 'sha')
|
|
205
208
|
|
|
209
|
+
// Same webhook head SHA can arrive on several deliveries (a multi-commit push
|
|
210
|
+
// emits one synchronize per ref update). Dedup the follow-up on the head SHA
|
|
211
|
+
// so a single push wakes at most one re-review, distinct from the per-delivery
|
|
212
|
+
// dedup above. When headSha is absent we cannot dedup, so we skip the followup
|
|
213
|
+
// rather than risk a re-review storm.
|
|
214
|
+
if (headSha === null) {
|
|
215
|
+
options.logger.warn(`[github] synchronize for ${repository.owner}/${repository.name}#${pullNumber} has no head sha`)
|
|
216
|
+
return
|
|
217
|
+
}
|
|
218
|
+
const followupKey = `synchronize-followup:${repository.owner}/${repository.name}#${pullNumber}:${headSha}`
|
|
219
|
+
if (options.dedup.has(followupKey)) return
|
|
220
|
+
options.dedup.add(followupKey)
|
|
221
|
+
|
|
222
|
+
const reviewOn = options.reviewOn?.() ?? 'review_requested'
|
|
206
223
|
const fetchImpl = options.fetchImpl ?? fetch
|
|
207
224
|
const schedule = options.scheduleBackgroundTask ?? defaultScheduleBackgroundTask
|
|
208
225
|
const target = `${repository.owner}/${repository.name}#${pullNumber}`
|
|
209
226
|
schedule(async () => {
|
|
210
227
|
try {
|
|
211
228
|
const token = await authToken({ repoSlug: `${repository.owner}/${repository.name}` })
|
|
212
|
-
const
|
|
229
|
+
const threads = await listUnresolvedSelfReviewThreads({
|
|
213
230
|
token,
|
|
214
231
|
selfLogin,
|
|
215
232
|
owner: repository.owner,
|
|
@@ -217,46 +234,63 @@ function scheduleReviewThreadRecheck(input: {
|
|
|
217
234
|
prNumber: pullNumber,
|
|
218
235
|
fetchImpl,
|
|
219
236
|
})
|
|
220
|
-
if (!
|
|
221
|
-
options.logger.warn(`[github] review-thread recheck failed for ${target}: ${
|
|
237
|
+
if (!threads.ok) {
|
|
238
|
+
options.logger.warn(`[github] review-thread recheck failed for ${target}: ${threads.error}`)
|
|
222
239
|
return
|
|
223
240
|
}
|
|
224
|
-
|
|
241
|
+
|
|
242
|
+
// A held CHANGES_REQUESTED is the bot's own obligation regardless of how
|
|
243
|
+
// reviews are triggered, so re-evaluate it on push unless review is off.
|
|
244
|
+
let selfBlocking = false
|
|
245
|
+
if (reviewOn !== 'off') {
|
|
246
|
+
const blocking = await fetchSelfReviewBlocking({
|
|
247
|
+
token,
|
|
248
|
+
selfLogin,
|
|
249
|
+
owner: repository.owner,
|
|
250
|
+
repo: repository.name,
|
|
251
|
+
prNumber: pullNumber,
|
|
252
|
+
fetchImpl,
|
|
253
|
+
})
|
|
254
|
+
if (blocking.ok) selfBlocking = blocking.selfBlocking
|
|
255
|
+
else options.logger.warn(`[github] review-state recheck failed for ${target}: ${blocking.error}`)
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const rootCommentIds = threads.threads.map((t) => t.rootCommentId)
|
|
259
|
+
if (rootCommentIds.length === 0 && !selfBlocking) return
|
|
225
260
|
options.route(
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
261
|
+
withApprovalPolicy(
|
|
262
|
+
buildReviewFollowupInbound({
|
|
263
|
+
repository,
|
|
264
|
+
pullNumber,
|
|
265
|
+
headSha,
|
|
266
|
+
rootCommentIds,
|
|
267
|
+
selfBlocking,
|
|
268
|
+
title: readString(pr, 'title'),
|
|
269
|
+
}),
|
|
270
|
+
options.allowApprove?.() ?? true,
|
|
271
|
+
),
|
|
233
272
|
)
|
|
234
273
|
} catch (err) {
|
|
235
274
|
options.logger.warn(
|
|
236
|
-
`[github] review
|
|
275
|
+
`[github] review followup failed for ${target}: ${err instanceof Error ? err.message : String(err)}`,
|
|
237
276
|
)
|
|
238
277
|
}
|
|
239
278
|
})
|
|
240
279
|
}
|
|
241
280
|
|
|
242
|
-
function
|
|
281
|
+
function buildReviewFollowupInbound(input: {
|
|
243
282
|
repository: { owner: string; name: string }
|
|
244
283
|
pullNumber: number
|
|
245
|
-
headSha: string
|
|
284
|
+
headSha: string
|
|
246
285
|
rootCommentIds: readonly number[]
|
|
286
|
+
selfBlocking: boolean
|
|
247
287
|
title: string | null
|
|
248
288
|
}): InboundMessage {
|
|
249
|
-
const { repository, pullNumber, headSha, rootCommentIds, title } = input
|
|
289
|
+
const { repository, pullNumber, headSha, rootCommentIds, selfBlocking, title } = input
|
|
250
290
|
const titleSegment = title !== null && title.trim() !== '' ? `: "${title}"` : ''
|
|
251
|
-
const shaSegment = headSha !== null ? ` (now at ${headSha.slice(0, 7)})` : ''
|
|
252
|
-
const idList = rootCommentIds.join(', ')
|
|
253
291
|
const text =
|
|
254
|
-
`PR #${pullNumber}${titleSegment} received new commits${
|
|
255
|
-
|
|
256
|
-
`(root comment id(s): ${idList}). For each, check whether the new commits addressed your ` +
|
|
257
|
-
`concern. If addressed, reply on that thread via channel_send with a short acknowledgement ` +
|
|
258
|
-
`and resolve_review_thread: true (the thread id is the root comment id). If not addressed, ` +
|
|
259
|
-
`leave it open. If none are addressed, end your turn without replying.`
|
|
292
|
+
`PR #${pullNumber}${titleSegment} received new commits (now at ${headSha.slice(0, 7)}). ` +
|
|
293
|
+
followupInstruction(rootCommentIds, selfBlocking)
|
|
260
294
|
|
|
261
295
|
return {
|
|
262
296
|
adapter: 'github',
|
|
@@ -264,7 +298,7 @@ function buildRecheckInbound(input: {
|
|
|
264
298
|
chat: `pr:${pullNumber}`,
|
|
265
299
|
thread: null,
|
|
266
300
|
text,
|
|
267
|
-
externalMessageId: `pr-${pullNumber}-recheck-${headSha
|
|
301
|
+
externalMessageId: `pr-${pullNumber}-recheck-${headSha}`,
|
|
268
302
|
authorId: 'github-system',
|
|
269
303
|
authorName: 'github',
|
|
270
304
|
authorIsBot: false,
|
|
@@ -277,6 +311,30 @@ function buildRecheckInbound(input: {
|
|
|
277
311
|
}
|
|
278
312
|
}
|
|
279
313
|
|
|
314
|
+
function followupInstruction(rootCommentIds: readonly number[], selfBlocking: boolean): string {
|
|
315
|
+
const threadPart =
|
|
316
|
+
rootCommentIds.length > 0
|
|
317
|
+
? `You have ${rootCommentIds.length} unresolved review thread(s) you authored on this PR ` +
|
|
318
|
+
`(root comment id(s): ${rootCommentIds.join(', ')}). For each, check whether the new commits ` +
|
|
319
|
+
`addressed your concern. If addressed, reply on that thread via channel_send with a short ` +
|
|
320
|
+
`acknowledgement and resolve_review_thread: true (the thread id is the root comment id); ` +
|
|
321
|
+
`if not, leave it open. `
|
|
322
|
+
: ''
|
|
323
|
+
// A held CHANGES_REQUESTED never clears itself: GitHub keeps the block until a
|
|
324
|
+
// fresh APPROVE/COMMENT/dismiss, so a blocking follow-up must always end with a
|
|
325
|
+
// submitted verdict — the "end without replying" escape hatch is reserved for
|
|
326
|
+
// the thread-only path, where leaving every thread open is a valid no-op.
|
|
327
|
+
const blockingPart = selfBlocking
|
|
328
|
+
? `Your latest review on this PR is still CHANGES_REQUESTED, which keeps the PR blocked until you ` +
|
|
329
|
+
`submit a fresh review. Re-review the current head against the concerns from that blocking review ` +
|
|
330
|
+
`and always end with a new verdict: if the commits resolve your concerns, submit an APPROVE ` +
|
|
331
|
+
`(or COMMENT if approval is disabled) to clear the block; if concerns remain, submit a new ` +
|
|
332
|
+
`CHANGES_REQUESTED explaining what is still blocking. `
|
|
333
|
+
: ''
|
|
334
|
+
const tail = selfBlocking ? '' : 'If none are addressed, end your turn without replying.'
|
|
335
|
+
return `${threadPart}${blockingPart}${tail}`
|
|
336
|
+
}
|
|
337
|
+
|
|
280
338
|
export async function verifySignature(body: string, secret: string, sigHeader: string): Promise<boolean> {
|
|
281
339
|
const expected = `sha256=${createHmac('sha256', secret).update(body).digest('hex')}`
|
|
282
340
|
const a = Buffer.from(expected)
|
|
@@ -48,6 +48,33 @@ export function createGithubReviewStateResolver(deps: {
|
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
50
|
|
|
51
|
+
export type SelfReviewBlockingResult =
|
|
52
|
+
| { ok: true; selfBlocking: boolean }
|
|
53
|
+
| { ok: false; error: string; code: 'not-found' | 'permission-denied' | 'transient' }
|
|
54
|
+
|
|
55
|
+
// Last DECISIVE self review == CHANGES_REQUESTED? (COMMENTED/PENDING ignored, as
|
|
56
|
+
// in createGithubReviewStateResolver.) Standalone so the synchronize follow-up
|
|
57
|
+
// skips the reviewDecision round-trip the stranding guard needs but this doesn't.
|
|
58
|
+
export async function fetchSelfReviewBlocking(deps: {
|
|
59
|
+
token: string
|
|
60
|
+
selfLogin: string
|
|
61
|
+
owner: string
|
|
62
|
+
repo: string
|
|
63
|
+
prNumber: number
|
|
64
|
+
fetchImpl?: typeof fetch
|
|
65
|
+
}): Promise<SelfReviewBlockingResult> {
|
|
66
|
+
const fetchImpl = deps.fetchImpl ?? fetch
|
|
67
|
+
const reviews = await fetchSelfReviews(
|
|
68
|
+
fetchImpl,
|
|
69
|
+
deps.token,
|
|
70
|
+
{ owner: deps.owner, repo: deps.repo, prNumber: deps.prNumber },
|
|
71
|
+
deps.selfLogin,
|
|
72
|
+
)
|
|
73
|
+
if (!reviews.ok) return { ok: false, error: reviews.error, code: reviews.code }
|
|
74
|
+
const lastDecisive = reviews.states.filter(isDecisive).at(-1) ?? null
|
|
75
|
+
return { ok: true, selfBlocking: lastDecisive === 'CHANGES_REQUESTED' }
|
|
76
|
+
}
|
|
77
|
+
|
|
51
78
|
type Target = { owner: string; repo: string; prNumber: number }
|
|
52
79
|
|
|
53
80
|
function parseTarget(workspace: string, chat: string): Target | null {
|
|
@@ -3,9 +3,34 @@ export type OutboundFloodCheckResult = { ok: true } | { ok: false; reason: strin
|
|
|
3
3
|
const MIN_LENGTH = 40
|
|
4
4
|
const MAX_RUN = 30
|
|
5
5
|
const MIN_LONG_LENGTH = 80
|
|
6
|
-
const MIN_UNIQUE_RATIO = 0.05
|
|
7
6
|
const MAX_DOMINANCE = 0.9
|
|
8
7
|
|
|
8
|
+
// Contiguous-span detector for multi-character floods ("lollol...", "ababab...",
|
|
9
|
+
// repeated emoji pairs) — including a flood body buried inside otherwise-varied
|
|
10
|
+
// text, which a whole-message periodicity test misses. Strict equality (no
|
|
11
|
+
// mismatch budget) and a large span floor keep it clear of incidental prose
|
|
12
|
+
// repetition ("---", "....", "hahaha", code indentation, table separators).
|
|
13
|
+
const MAX_REPEATING_PERIOD = 32
|
|
14
|
+
// Span floor is deliberately a flood boundary, not a "never-deny" guarantee: it
|
|
15
|
+
// catches obvious short-period floods like "ab".repeat(300) (600 chars) and
|
|
16
|
+
// "lol".repeat(300) (900). Hundreds of byte-identical rows or box-art lines also
|
|
17
|
+
// trip it — that output is information-poor and flood-like, and raising the floor
|
|
18
|
+
// to clear it would let those real floods through. Tables/diagrams with varying
|
|
19
|
+
// cells break periodicity and pass.
|
|
20
|
+
const MIN_PERIODIC_SPAN = 384
|
|
21
|
+
const MIN_PERIODIC_REPETITIONS = 24
|
|
22
|
+
|
|
23
|
+
// Narrow last resort: structured text (code, tables, logs) is often lower-
|
|
24
|
+
// entropy than prose, so this only fires on a tiny alphabet at real length.
|
|
25
|
+
const MIN_ENTROPY_LENGTH = 200
|
|
26
|
+
const MAX_TINY_ALPHABET_SIZE = 4
|
|
27
|
+
const VERY_LOW_ENTROPY_BITS = 1.25
|
|
28
|
+
|
|
29
|
+
// Replaces the old `uniqueRatio = distinctChars / length` gate, which was
|
|
30
|
+
// length-coupled: natural language draws from a fixed alphabet, so any reply
|
|
31
|
+
// past ~(alphabet/0.05) chars failed it regardless of variety — a 2.9KB
|
|
32
|
+
// markdown report was silently dropped. Every check below is bounded-run or
|
|
33
|
+
// length-independent, so length alone never makes a reply look like a flood.
|
|
9
34
|
export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
|
|
10
35
|
if (text.length < MIN_LENGTH) return { ok: true }
|
|
11
36
|
|
|
@@ -18,12 +43,18 @@ export function checkOutboundFlood(text: string): OutboundFloodCheckResult {
|
|
|
18
43
|
if (graphemes.length < MIN_LONG_LENGTH) return { ok: true }
|
|
19
44
|
|
|
20
45
|
const counts = countGraphemes(graphemes)
|
|
21
|
-
const uniqueRatio = counts.size / graphemes.length
|
|
22
|
-
if (uniqueRatio < MIN_UNIQUE_RATIO) return { ok: false, reason: `low-unique-ratio:${uniqueRatio.toFixed(3)}` }
|
|
23
46
|
|
|
24
47
|
const dominance = maxValue(counts) / graphemes.length
|
|
25
48
|
if (dominance > MAX_DOMINANCE) return { ok: false, reason: `char-dominance:${dominance.toFixed(2)}` }
|
|
26
49
|
|
|
50
|
+
const span = findLongestPeriodicSpan(graphemes)
|
|
51
|
+
if (span !== undefined) return { ok: false, reason: `repeated-pattern-span:${span.period}:${span.spanLength}` }
|
|
52
|
+
|
|
53
|
+
if (graphemes.length >= MIN_ENTROPY_LENGTH && counts.size <= MAX_TINY_ALPHABET_SIZE) {
|
|
54
|
+
const entropy = shannonEntropyBitsPerGrapheme(counts, graphemes.length)
|
|
55
|
+
if (entropy < VERY_LOW_ENTROPY_BITS) return { ok: false, reason: `low-entropy:${entropy.toFixed(2)}` }
|
|
56
|
+
}
|
|
57
|
+
|
|
27
58
|
return { ok: true }
|
|
28
59
|
}
|
|
29
60
|
|
|
@@ -42,6 +73,42 @@ function findLongestRun(graphemes: readonly string[]): number {
|
|
|
42
73
|
return longest
|
|
43
74
|
}
|
|
44
75
|
|
|
76
|
+
// Longest contiguous span (in graphemes) that is exactly periodic at some
|
|
77
|
+
// period 2..32, or undefined when no span clears the flood floor. Period 1 is
|
|
78
|
+
// left to the run check above. A span must reach MIN_PERIODIC_SPAN graphemes
|
|
79
|
+
// AND repeat its unit MIN_PERIODIC_REPETITIONS times — the larger bound wins,
|
|
80
|
+
// so a 32-period unit needs 768 graphemes, not three echoes of a 32-char line.
|
|
81
|
+
function findLongestPeriodicSpan(graphemes: readonly string[]): { period: number; spanLength: number } | undefined {
|
|
82
|
+
const maxPeriod = Math.min(MAX_REPEATING_PERIOD, Math.floor(graphemes.length / MIN_PERIODIC_REPETITIONS))
|
|
83
|
+
let best: { period: number; spanLength: number } | undefined
|
|
84
|
+
for (let period = 2; period <= maxPeriod; period++) {
|
|
85
|
+
let matches = 0
|
|
86
|
+
let longestForPeriod = 0
|
|
87
|
+
for (let i = period; i < graphemes.length; i++) {
|
|
88
|
+
if (graphemes[i] === graphemes[i - period]) {
|
|
89
|
+
matches++
|
|
90
|
+
const spanLength = matches + period
|
|
91
|
+
if (spanLength > longestForPeriod) longestForPeriod = spanLength
|
|
92
|
+
} else {
|
|
93
|
+
matches = 0
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const requiredSpan = Math.max(MIN_PERIODIC_SPAN, period * MIN_PERIODIC_REPETITIONS)
|
|
97
|
+
if (longestForPeriod < requiredSpan) continue
|
|
98
|
+
if (best === undefined || longestForPeriod > best.spanLength) best = { period, spanLength: longestForPeriod }
|
|
99
|
+
}
|
|
100
|
+
return best
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function shannonEntropyBitsPerGrapheme(counts: Map<string, number>, length: number): number {
|
|
104
|
+
let entropy = 0
|
|
105
|
+
for (const count of counts.values()) {
|
|
106
|
+
const probability = count / length
|
|
107
|
+
entropy -= probability * Math.log2(probability)
|
|
108
|
+
}
|
|
109
|
+
return entropy
|
|
110
|
+
}
|
|
111
|
+
|
|
45
112
|
function countGraphemes(graphemes: readonly string[]): Map<string, number> {
|
|
46
113
|
const counts = new Map<string, number>()
|
|
47
114
|
for (const grapheme of graphemes) counts.set(grapheme, (counts.get(grapheme) ?? 0) + 1)
|
package/src/channels/router.ts
CHANGED
|
@@ -183,6 +183,18 @@ export const MAX_POLICY_DENIED_CHANNEL_SENDS_PER_TURN = 3
|
|
|
183
183
|
// including reasoning). Deliberately NOT lowered in `providers.ts`, where
|
|
184
184
|
// `maxTokens` is the model's true capability that compaction math reads.
|
|
185
185
|
export const CHANNEL_MAX_OUTPUT_TOKENS = 4096
|
|
186
|
+
// Raised output-token budget threaded into the ONE re-prompt that follows a
|
|
187
|
+
// `stopReason:'length'` empty turn. The default 4096 backstop bounds kimi's
|
|
188
|
+
// degenerate repetition loop, but it is the same ceiling a *legitimate*
|
|
189
|
+
// reasoning-heavy turn hits when it spends the whole pool thinking and emits no
|
|
190
|
+
// prose — re-prompting under the identical cap reproduces the truncation. A
|
|
191
|
+
// `length` truncation that the byte-identical loop guard did NOT catch is
|
|
192
|
+
// evidence of genuine reasoning starved for room, not a repetition loop, so the
|
|
193
|
+
// retry grants 4x headroom for thinking + a reply. Bounded (not 32000) so a
|
|
194
|
+
// turn that IS looping still can't burn the full pi-ai default. Consumed
|
|
195
|
+
// one-shot via `LiveSession.nextPromptMaxTokens`, then reset at the next real
|
|
196
|
+
// user turn so the raised budget never leaks past the turn that needed it.
|
|
197
|
+
export const CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS = 16384
|
|
186
198
|
// Ceiling on automatic re-prompts for a turn that ended with NO user-facing
|
|
187
199
|
// reply AND no attempted send — the pure "the model burned its budget thinking
|
|
188
200
|
// and produced nothing" failure. The canonical trigger is Fireworks'
|
|
@@ -200,18 +212,24 @@ export const CHANNEL_MAX_OUTPUT_TOKENS = 4096
|
|
|
200
212
|
export const MAX_EMPTY_TURN_RETRIES = 2
|
|
201
213
|
// Reminder-only nudge injected before an empty-turn retry. Uses the repo's
|
|
202
214
|
// SYSTEM MESSAGE framing (see composeTurnPrompt) so persona-rich models do not
|
|
203
|
-
// reply to the notice itself.
|
|
204
|
-
//
|
|
215
|
+
// reply to the notice itself. Names the actual failure (the prior turn ran out
|
|
216
|
+
// of its output budget mid-reasoning and produced no reply) and asks the model
|
|
217
|
+
// to keep its thinking short and answer directly — the empty turn was budget
|
|
218
|
+
// exhaustion, not a forgotten tool call, so a "reply directly" nudge alone
|
|
219
|
+
// would re-loop. The matching retry re-prompt also runs with a raised budget
|
|
220
|
+
// (CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS) so the room actually exists.
|
|
205
221
|
export const EMPTY_TURN_RETRY_NUDGE = [
|
|
206
222
|
'---',
|
|
207
223
|
'**[SYSTEM MESSAGE — not from a human]**',
|
|
208
224
|
'',
|
|
209
|
-
'Your previous turn
|
|
225
|
+
'Your previous turn ran out of its output budget before sending a reply — it',
|
|
226
|
+
'spent the whole turn thinking and produced nothing for the channel. This is',
|
|
210
227
|
'an automated signal from the channel router, not a message from anyone in',
|
|
211
228
|
'the chat. **Do not acknowledge or reply to this notice itself.**',
|
|
212
229
|
'',
|
|
213
|
-
'
|
|
214
|
-
'reply tool. If you genuinely have nothing to say,
|
|
230
|
+
'Answer the last user message now: keep any reasoning brief and send a direct',
|
|
231
|
+
'reply via your channel reply tool. If you genuinely have nothing to say,',
|
|
232
|
+
'reply with `NO_REPLY`.',
|
|
215
233
|
'',
|
|
216
234
|
'---',
|
|
217
235
|
].join('\n')
|
|
@@ -532,6 +550,13 @@ type LiveSession = {
|
|
|
532
550
|
// increments it before injecting EMPTY_TURN_RETRY_NUDGE and reads it to decide
|
|
533
551
|
// retry-vs-fallback. See the candidate===null branch.
|
|
534
552
|
emptyTurnRetries: number
|
|
553
|
+
// One-shot output-token budget for the NEXT `session.prompt()` only.
|
|
554
|
+
// `installChannelOutputCap` reads and clears it per stream call, so it
|
|
555
|
+
// overrides the default backstop for exactly one re-prompt. Set by the
|
|
556
|
+
// empty-turn length-retry branch to CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS
|
|
557
|
+
// and reset to undefined at each fresh user turn so the raised budget cannot
|
|
558
|
+
// leak past the turn that needed it.
|
|
559
|
+
nextPromptMaxTokens: number | undefined
|
|
535
560
|
// Stamped by `markTurnSkipped` (called from the `skip_response` tool)
|
|
536
561
|
// with the current `turnSeq`. Read at the top of `validateChannelTurn`:
|
|
537
562
|
// if it matches the just-completed turn, recovery is skipped entirely
|
|
@@ -1417,6 +1442,7 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
|
|
|
1417
1442
|
inFlightToolSends: new Map(),
|
|
1418
1443
|
policyDeniedToolSendsThisTurn: new Map(),
|
|
1419
1444
|
emptyTurnRetries: 0,
|
|
1445
|
+
nextPromptMaxTokens: undefined,
|
|
1420
1446
|
skippedTurn: null,
|
|
1421
1447
|
skipLockedSendTurn: null,
|
|
1422
1448
|
pendingQuoteCandidate: null,
|
|
@@ -1704,14 +1730,22 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
|
|
|
1704
1730
|
// Override pi-ai's hidden `Math.min(model.maxTokens, 32000)` output cap for
|
|
1705
1731
|
// channel sessions by threading an explicit `maxTokens` into every stream
|
|
1706
1732
|
// call. See CHANNEL_MAX_OUTPUT_TOKENS for why. Composes the existing streamFn
|
|
1707
|
-
// (pi's default `streamSimple` unless a proxy was installed)
|
|
1708
|
-
// `maxTokens`
|
|
1709
|
-
//
|
|
1733
|
+
// (pi's default `streamSimple` unless a proxy was installed). Precedence:
|
|
1734
|
+
// an explicit per-call `maxTokens` always wins; otherwise a one-shot
|
|
1735
|
+
// `live.nextPromptMaxTokens` (set by the empty-turn length-retry) is consumed
|
|
1736
|
+
// and cleared so the raised budget applies to exactly one stream call;
|
|
1737
|
+
// otherwise the default backstop.
|
|
1710
1738
|
const installChannelOutputCap = (live: LiveSession): void => {
|
|
1711
1739
|
const { agent } = live.session
|
|
1712
1740
|
const inner = agent.streamFn
|
|
1713
|
-
agent.streamFn = (model, context, options) =>
|
|
1714
|
-
|
|
1741
|
+
agent.streamFn = (model, context, options) => {
|
|
1742
|
+
let maxTokens = options?.maxTokens
|
|
1743
|
+
if (maxTokens === undefined && live.nextPromptMaxTokens !== undefined) {
|
|
1744
|
+
maxTokens = live.nextPromptMaxTokens
|
|
1745
|
+
live.nextPromptMaxTokens = undefined
|
|
1746
|
+
}
|
|
1747
|
+
return inner(model, context, { ...options, maxTokens: maxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS })
|
|
1748
|
+
}
|
|
1715
1749
|
}
|
|
1716
1750
|
|
|
1717
1751
|
const startTypingHeartbeat = (live: LiveSession): void => {
|
|
@@ -1904,10 +1938,13 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
|
|
|
1904
1938
|
live.lastSentText.clear()
|
|
1905
1939
|
live.pendingQuoteCandidate = captureQuoteCandidate(live.key.adapter, batch, observed)
|
|
1906
1940
|
// A real user batch starts a fresh logical turn → restore the full
|
|
1907
|
-
// empty-turn retry budget
|
|
1908
|
-
//
|
|
1909
|
-
//
|
|
1941
|
+
// empty-turn retry budget and drop any raised output-token budget left
|
|
1942
|
+
// over from a prior turn's length-retry. Reset here (batch.length > 0)
|
|
1943
|
+
// and NOT in the per-prompt block below, so the reminder-only
|
|
1944
|
+
// iterations the retry itself queues do not refill the budget and loop
|
|
1945
|
+
// forever (and the raised cap stays scoped to the turn that set it).
|
|
1910
1946
|
live.emptyTurnRetries = 0
|
|
1947
|
+
live.nextPromptMaxTokens = undefined
|
|
1911
1948
|
} else if (live.lastTurnAuthorId !== null) {
|
|
1912
1949
|
live.currentTurnEngageReactions = []
|
|
1913
1950
|
// Reminder-only turn (batch.length === 0, reminders.length > 0):
|
|
@@ -3037,8 +3074,18 @@ export function createChannelRouter(options: CreateChannelRouterOptions): Channe
|
|
|
3037
3074
|
}
|
|
3038
3075
|
if (!attemptedSendThisTurn && live.emptyTurnRetries < MAX_EMPTY_TURN_RETRIES) {
|
|
3039
3076
|
live.emptyTurnRetries++
|
|
3077
|
+
// Raise the re-prompt's budget ONLY for a `length` truncation: that is
|
|
3078
|
+
// the budget-exhaustion case (reasoning ate the whole pool before any
|
|
3079
|
+
// prose), so the retry needs room to finish thinking AND reply. `error`
|
|
3080
|
+
// and `aborted` are not budget exhaustion — an upstream failure or the
|
|
3081
|
+
// terminal-reply abort — so they retry under the default backstop.
|
|
3082
|
+
// Consumed one-shot by installChannelOutputCap on the next prompt().
|
|
3083
|
+
if (assistantLeafStopReason(live.session) === 'length') {
|
|
3084
|
+
live.nextPromptMaxTokens = CHANNEL_EMPTY_TURN_RETRY_MAX_OUTPUT_TOKENS
|
|
3085
|
+
}
|
|
3040
3086
|
logger.warn(
|
|
3041
|
-
`[channels] ${live.keyId} empty_turn_retry attempt=${live.emptyTurnRetries}/${MAX_EMPTY_TURN_RETRIES}
|
|
3087
|
+
`[channels] ${live.keyId} empty_turn_retry attempt=${live.emptyTurnRetries}/${MAX_EMPTY_TURN_RETRIES} ` +
|
|
3088
|
+
`max_tokens=${live.nextPromptMaxTokens ?? CHANNEL_MAX_OUTPUT_TOKENS}`,
|
|
3042
3089
|
)
|
|
3043
3090
|
live.pendingSystemReminders.push(EMPTY_TURN_RETRY_NUDGE)
|
|
3044
3091
|
return
|
|
@@ -4355,18 +4402,25 @@ function recoverableAssistantText(
|
|
|
4355
4402
|
return null
|
|
4356
4403
|
}
|
|
4357
4404
|
|
|
4358
|
-
//
|
|
4359
|
-
// `length` (hit the token cap
|
|
4360
|
-
// `aborted
|
|
4361
|
-
// truncated", as distinct from a turn that
|
|
4362
|
-
// (leaf undefined / a non-assistant
|
|
4363
|
-
//
|
|
4364
|
-
//
|
|
4365
|
-
|
|
4405
|
+
// The truncation stop reason when the leaf is an assistant message that was CUT
|
|
4406
|
+
// OFF mid-output — `length` (hit the token cap, the canonical kimi reasoning-
|
|
4407
|
+
// loop), `error`, or `aborted` — else undefined. This is the precise signature
|
|
4408
|
+
// of "the model was producing but got truncated", as distinct from a turn that
|
|
4409
|
+
// produced no assistant message at all (leaf undefined / a non-assistant
|
|
4410
|
+
// entry), which is a benign empty/cold turn. Callers that only need the boolean
|
|
4411
|
+
// use `assistantLeafTruncated`; the retry guard reads the reason itself because
|
|
4412
|
+
// the raised reasoning budget is justified ONLY for `length` (budget
|
|
4413
|
+
// exhaustion), not for `error`/`aborted`.
|
|
4414
|
+
function assistantLeafStopReason(session: AgentSession): 'length' | 'error' | 'aborted' | undefined {
|
|
4366
4415
|
const leaf = session.sessionManager.getLeafEntry()
|
|
4367
|
-
if (!leaf || leaf.type !== 'message' || leaf.message.role !== 'assistant') return
|
|
4416
|
+
if (!leaf || leaf.type !== 'message' || leaf.message.role !== 'assistant') return undefined
|
|
4368
4417
|
const stop = leaf.message.stopReason
|
|
4369
|
-
|
|
4418
|
+
if (stop === 'length' || stop === 'error' || stop === 'aborted') return stop
|
|
4419
|
+
return undefined
|
|
4420
|
+
}
|
|
4421
|
+
|
|
4422
|
+
function assistantLeafTruncated(session: AgentSession): boolean {
|
|
4423
|
+
return assistantLeafStopReason(session) !== undefined
|
|
4370
4424
|
}
|
|
4371
4425
|
|
|
4372
4426
|
function visibleAssistantText(message: AssistantMessage): string {
|
package/src/compose/discover.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readdirSync } from 'node:fs'
|
|
2
2
|
import { join, resolve } from 'node:path'
|
|
3
3
|
|
|
4
|
+
import { loadConfigSyncOrDefaults } from '@/config'
|
|
4
5
|
import { containerNameFromCwd } from '@/container'
|
|
5
6
|
import { isInitialized } from '@/init'
|
|
6
7
|
|
|
@@ -17,7 +18,9 @@ export type AgentEntry = {
|
|
|
17
18
|
//
|
|
18
19
|
// Underscore-prefixed names are also skipped so operators can park a disabled
|
|
19
20
|
// or in-progress agent next to live ones (e.g. `_archived-coder/`) without
|
|
20
|
-
// compose touching it.
|
|
21
|
+
// compose touching it. Agents with `compose.exclude: true` in typeclaw.json
|
|
22
|
+
// are skipped too — the in-config opt-out for operators who don't want to rename
|
|
23
|
+
// the folder.
|
|
21
24
|
//
|
|
22
25
|
// Returns an empty array when rootCwd doesn't exist or is empty — discovery is
|
|
23
26
|
// not the place to fail; the caller decides what to do with zero agents.
|
|
@@ -40,6 +43,7 @@ export function discoverAgents(rootCwd: string): AgentEntry[] {
|
|
|
40
43
|
if (entry.name.startsWith('_')) continue
|
|
41
44
|
const cwd = join(root, entry.name)
|
|
42
45
|
if (!isInitialized(cwd)) continue
|
|
46
|
+
if (loadConfigSyncOrDefaults(cwd).compose.exclude) continue
|
|
43
47
|
agents.push({ name: entry.name, cwd, containerName: containerNameFromCwd(cwd) })
|
|
44
48
|
}
|
|
45
49
|
|
package/src/config/config.ts
CHANGED
|
@@ -338,6 +338,39 @@ export const networkSchema = z
|
|
|
338
338
|
|
|
339
339
|
export type NetworkConfig = z.infer<typeof networkSchema>
|
|
340
340
|
|
|
341
|
+
// `realProc` opts the per-tool bwrap sandbox into the 'real-proc' strategy
|
|
342
|
+
// (src/sandbox/build.ts): a fresh procfs scoped to a new PID namespace so
|
|
343
|
+
// external-package runners (`bunx`, `bun add <pkg>`, `bun run <pkg-bin>`) get a
|
|
344
|
+
// working /proc/self/{fd,maps} and stop aborting with Bun's "NotDir". Default
|
|
345
|
+
// `false` keeps the universally-portable '--tmpfs /proc' profile, under which
|
|
346
|
+
// sandboxed external-package execution is unsupported by design. Turning it on
|
|
347
|
+
// makes `typeclaw start` grant the container CAP_SYS_ADMIN (required to mount
|
|
348
|
+
// proc for the new PID namespace), which is a deliberate posture change on the
|
|
349
|
+
// single-tenant outer boundary — see docs/internals/sandbox.mdx. PID isolation
|
|
350
|
+
// and the /proc/N/environ leak guard are both preserved; the trade is the
|
|
351
|
+
// CAP_SYS_ADMIN grant, not sandbox strength.
|
|
352
|
+
export const sandboxSchema = z
|
|
353
|
+
.object({
|
|
354
|
+
realProc: z.boolean().default(false),
|
|
355
|
+
})
|
|
356
|
+
.default({ realProc: false })
|
|
357
|
+
|
|
358
|
+
export type SandboxConfig = z.infer<typeof sandboxSchema>
|
|
359
|
+
|
|
360
|
+
// Host-stage `typeclaw compose` knobs. `exclude: true` skips this agent during
|
|
361
|
+
// compose discovery (same effect as parking it under an `_`-prefixed dir, but
|
|
362
|
+
// without renaming the folder). The container never reads this block — it's a
|
|
363
|
+
// pure compose CLI hint, so omitting it keeps the agent in every compose
|
|
364
|
+
// operation. Namespaced under `compose` so future compose-only settings have a
|
|
365
|
+
// home without crowding the top level.
|
|
366
|
+
export const composeSchema = z
|
|
367
|
+
.object({
|
|
368
|
+
exclude: z.boolean().default(false),
|
|
369
|
+
})
|
|
370
|
+
.default({ exclude: false })
|
|
371
|
+
|
|
372
|
+
export type ComposeConfig = z.infer<typeof composeSchema>
|
|
373
|
+
|
|
341
374
|
// Reverse-proxy tunnels expose a container-private port to the public internet
|
|
342
375
|
// via a managed subprocess (cloudflared) or a user-supplied external URL.
|
|
343
376
|
// See AGENTS.md `## Tunnels`. Keeping the enum scoped to what's implemented
|
|
@@ -490,9 +523,11 @@ export const configSchema = z
|
|
|
490
523
|
// time. Defaults to `[]`. Hatching appends the agent's chosen name
|
|
491
524
|
// here, so a freshly-hatched bot already has its identity wired up.
|
|
492
525
|
alias: z.array(z.string().trim().min(1)).default([]),
|
|
526
|
+
compose: composeSchema,
|
|
493
527
|
channels: channelsSchema,
|
|
494
528
|
portForward: portForwardSchema,
|
|
495
529
|
network: networkSchema,
|
|
530
|
+
sandbox: sandboxSchema,
|
|
496
531
|
docker: dockerSchema,
|
|
497
532
|
git: gitSchema,
|
|
498
533
|
roles: rolesConfigSchema.optional(),
|
|
@@ -632,9 +667,11 @@ export const FIELD_EFFECTS: Record<string, FieldEffect> = {
|
|
|
632
667
|
mcpServers: 'restart-required',
|
|
633
668
|
plugins: 'restart-required',
|
|
634
669
|
alias: 'applied',
|
|
670
|
+
compose: 'ignored',
|
|
635
671
|
channels: 'applied',
|
|
636
672
|
portForward: 'restart-required',
|
|
637
673
|
network: 'restart-required',
|
|
674
|
+
sandbox: 'restart-required',
|
|
638
675
|
tunnels: 'restart-required',
|
|
639
676
|
'docker.file': 'restart-required',
|
|
640
677
|
'git.ignore': 'restart-required',
|
|
@@ -723,6 +760,7 @@ export function extractPluginConfigs(raw: unknown): Record<string, unknown> {
|
|
|
723
760
|
'mounts',
|
|
724
761
|
'plugins',
|
|
725
762
|
'alias',
|
|
763
|
+
'compose',
|
|
726
764
|
'channels',
|
|
727
765
|
'portForward',
|
|
728
766
|
'network',
|
package/src/container/start.ts
CHANGED
|
@@ -514,6 +514,20 @@ export async function planStart({
|
|
|
514
514
|
}
|
|
515
515
|
}
|
|
516
516
|
|
|
517
|
+
// sandbox.realProc opts the per-tool bwrap sandbox into the 'real-proc'
|
|
518
|
+
// strategy (src/sandbox/build.ts), which prefixes the sandbox with
|
|
519
|
+
// `unshare --pid --fork --mount --mount-proc`. Mounting a fresh procfs for the
|
|
520
|
+
// new PID namespace needs real CAP_SYS_ADMIN — seccomp=unconfined alone is not
|
|
521
|
+
// enough (it only unblocks the unshare/clone SYSCALLS; the kernel still
|
|
522
|
+
// rejects mount(2) of proc without the capability). This is the deliberate
|
|
523
|
+
// posture change documented in docs/internals/sandbox.mdx: the default keeps
|
|
524
|
+
// the narrower seccomp-only profile, and the operator grants the broad
|
|
525
|
+
// "new root" capability ONLY by opting into real-proc. Placed before the
|
|
526
|
+
// image tag (like --cap-add=NET_ADMIN) so docker applies it at run time.
|
|
527
|
+
if (cfg.sandbox.realProc) {
|
|
528
|
+
runArgs.push('--cap-add=SYS_ADMIN')
|
|
529
|
+
}
|
|
530
|
+
|
|
517
531
|
if (hostdControl) {
|
|
518
532
|
runArgs.push('--add-host', HOST_GATEWAY_ALIAS)
|
|
519
533
|
}
|