bloby-bot 0.70.12 → 0.70.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +11 -3
- package/dist-bloby/assets/{bloby-DSNB0g4w.js → bloby-CU9KhQdP.js} +4 -4
- package/dist-bloby/assets/globals-DlPtwiZL.css +2 -0
- package/dist-bloby/assets/{globals-B3cTbITX.js → globals-mGpojCOe.js} +1 -1
- package/dist-bloby/assets/{highlighted-body-OFNGDK62-BLforpkr.js → highlighted-body-OFNGDK62-D0Tm_wgU.js} +1 -1
- package/dist-bloby/assets/mermaid-GHXKKRXX-B95J3s3s.js +1 -0
- package/dist-bloby/assets/{onboard-Dn2Ws_G2.js → onboard-GfjHF9nm.js} +1 -1
- package/dist-bloby/bloby.html +3 -3
- package/dist-bloby/onboard.html +3 -3
- package/package.json +2 -2
- package/scripts/install +15 -7
- package/scripts/install.ps1 +35 -14
- package/scripts/install.sh +15 -7
- package/shared/relay.ts +3 -1
- package/supervisor/channels/manager.ts +16 -11
- package/supervisor/chat/OnboardWizard.tsx +0 -15
- package/supervisor/harnesses/pi/index.ts +320 -100
- package/supervisor/harnesses/pi/providers/humanize-error.ts +2 -2
- package/supervisor/harnesses/pi/providers/retry.ts +31 -0
- package/supervisor/harnesses/pi/providers/stream-anthropic.ts +23 -3
- package/supervisor/harnesses/pi/providers/stream-google.ts +21 -3
- package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +17 -3
- package/supervisor/harnesses/pi/providers/types.ts +11 -0
- package/supervisor/harnesses/pi/session.ts +116 -3
- package/supervisor/harnesses/pi/test-completion.ts +56 -0
- package/supervisor/harnesses/pi/tools/bash.ts +198 -22
- package/supervisor/harnesses/pi/tools/glob.ts +79 -0
- package/supervisor/harnesses/pi/tools/grep.ts +0 -0
- package/supervisor/harnesses/pi/tools/registry.ts +18 -6
- package/supervisor/harnesses/pi/tools/todo-write.ts +45 -0
- package/supervisor/harnesses/pi/tools/web-fetch.ts +129 -0
- package/supervisor/index.ts +36 -2
- package/worker/index.ts +18 -1
- package/worker/prompts/bloby-system-prompt-codex.txt +1 -1
- package/worker/prompts/bloby-system-prompt-pi.txt +6 -24
- package/worker/prompts/bloby-system-prompt.txt +1 -1
- package/workspace/client/src/components/Dashboard/DashboardPage.tsx +4 -117
- package/workspace/client/src/components/Dashboard/deleteme_placeholders.tsx +194 -0
- package/workspace/client/src/components/Layout/Sidebar.tsx +52 -30
- package/workspace/client/src/components/deleteme_onboarding/WorkspaceTour.tsx +25 -15
- package/workspace/client/src/components/deleteme_onboarding/tour-theme.css +24 -0
- package/workspace/skills/mac/SKILL.md +13 -4
- package/dist-bloby/assets/globals-DyeW509Y.css +0 -2
- package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +0 -1
|
@@ -20,7 +20,7 @@ import type {
|
|
|
20
20
|
PiStopReason,
|
|
21
21
|
PiUsage,
|
|
22
22
|
} from './types.js';
|
|
23
|
-
import { fetchWithRetry } from './retry.js';
|
|
23
|
+
import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
|
|
24
24
|
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
25
25
|
|
|
26
26
|
/* ── SSE parser (shares the LF/CRLF-tolerant pattern from the other providers) ── */
|
|
@@ -32,7 +32,7 @@ async function* parseSse(res: Response): AsyncIterable<any> {
|
|
|
32
32
|
let buffer = '';
|
|
33
33
|
try {
|
|
34
34
|
while (true) {
|
|
35
|
-
const { value, done } = await reader
|
|
35
|
+
const { value, done } = await readWithIdleTimeout(reader, 'Anthropic');
|
|
36
36
|
if (done) break;
|
|
37
37
|
buffer += decoder.decode(value, { stream: true });
|
|
38
38
|
let idx;
|
|
@@ -79,6 +79,10 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
|
|
|
79
79
|
const out: any[] = [];
|
|
80
80
|
for (const b of blocks) {
|
|
81
81
|
if (b.type === 'text') {
|
|
82
|
+
// The Messages API rejects empty/whitespace-only text blocks ("text
|
|
83
|
+
// content blocks must be non-empty") — drop them; an all-empty message
|
|
84
|
+
// is then filtered by the content-length guards in toAnthropicMessages.
|
|
85
|
+
if (!b.text || !b.text.trim()) continue;
|
|
82
86
|
out.push({ type: 'text', text: b.text });
|
|
83
87
|
} else if (b.type === 'image') {
|
|
84
88
|
out.push({
|
|
@@ -105,13 +109,19 @@ function toAnthropicContent(blocks: PiContentBlock[]): any[] {
|
|
|
105
109
|
}
|
|
106
110
|
|
|
107
111
|
function toAnthropicMessages(pi: PiMessage[]): any[] {
|
|
108
|
-
|
|
112
|
+
const msgs = pi
|
|
109
113
|
.filter((m) => m.content.length > 0)
|
|
110
114
|
.map((m) => ({
|
|
111
115
|
role: m.role === 'assistant' ? 'assistant' : 'user',
|
|
112
116
|
content: toAnthropicContent(m.content),
|
|
113
117
|
}))
|
|
114
118
|
.filter((m) => m.content.length > 0);
|
|
119
|
+
// The Messages API requires the first message to be user-role. Rolling
|
|
120
|
+
// history windows (customer buffers) are trimmed user-first at the source
|
|
121
|
+
// (channels/manager.ts trimCustomerBuffer), but defend here too — a leading
|
|
122
|
+
// assistant message 400s the whole request (audit C-7).
|
|
123
|
+
while (msgs.length > 0 && msgs[0].role !== 'user') msgs.shift();
|
|
124
|
+
return msgs;
|
|
115
125
|
}
|
|
116
126
|
|
|
117
127
|
function toAnthropicTools(tools: { name: string; description: string; inputSchema: Record<string, any> }[]) {
|
|
@@ -166,6 +176,9 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
166
176
|
if (req.tools && req.tools.length > 0) {
|
|
167
177
|
body.tools = toAnthropicTools(req.tools);
|
|
168
178
|
body.tools[body.tools.length - 1].cache_control = { type: 'ephemeral' };
|
|
179
|
+
// Round-cap wrap-up: forbid further tool calls; tools stay declared so
|
|
180
|
+
// tool_use/tool_result blocks in history remain valid.
|
|
181
|
+
if (req.toolChoice === 'none') body.tool_choice = { type: 'none' };
|
|
169
182
|
}
|
|
170
183
|
if (Array.isArray(body.messages) && body.messages.length > 0) {
|
|
171
184
|
const lastContent = body.messages[body.messages.length - 1].content;
|
|
@@ -213,6 +226,7 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
213
226
|
let usage: PiUsage | undefined;
|
|
214
227
|
let chunkCount = 0;
|
|
215
228
|
let firstChunkSummary = '';
|
|
229
|
+
let thinkingEmitted = false;
|
|
216
230
|
|
|
217
231
|
try {
|
|
218
232
|
for await (const evt of parseSse(res)) {
|
|
@@ -250,6 +264,12 @@ export async function* streamAnthropic(req: PiStreamRequest): AsyncIterable<PiSt
|
|
|
250
264
|
toolArgsBuf: '',
|
|
251
265
|
});
|
|
252
266
|
} else {
|
|
267
|
+
// Extended-thinking blocks (not requested today, future-proofed):
|
|
268
|
+
// one liveness pulse, text never forwarded.
|
|
269
|
+
if (block.type === 'thinking' && !thinkingEmitted) {
|
|
270
|
+
thinkingEmitted = true;
|
|
271
|
+
yield { type: 'thinking' };
|
|
272
|
+
}
|
|
253
273
|
blocks.set(idx, { kind: 'other' });
|
|
254
274
|
}
|
|
255
275
|
break;
|
|
@@ -18,7 +18,7 @@ import type {
|
|
|
18
18
|
PiStopReason,
|
|
19
19
|
PiUsage,
|
|
20
20
|
} from './types.js';
|
|
21
|
-
import { fetchWithRetry } from './retry.js';
|
|
21
|
+
import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
|
|
22
22
|
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
23
23
|
|
|
24
24
|
/** Walk an SSE byte stream and yield each parsed JSON event. */
|
|
@@ -30,7 +30,7 @@ async function* parseSse(res: Response, dbg: { firstBytes: string }): AsyncItera
|
|
|
30
30
|
let totalBytes = 0;
|
|
31
31
|
try {
|
|
32
32
|
while (true) {
|
|
33
|
-
const { value, done } = await reader
|
|
33
|
+
const { value, done } = await readWithIdleTimeout(reader, 'Google Gemini');
|
|
34
34
|
if (done) break;
|
|
35
35
|
if (value) totalBytes += value.byteLength;
|
|
36
36
|
buffer += decoder.decode(value, { stream: true });
|
|
@@ -203,11 +203,25 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
203
203
|
maxOutputTokens: req.maxOutputTokens ?? 32768,
|
|
204
204
|
},
|
|
205
205
|
};
|
|
206
|
+
// Thinking-capable families (2.5+/3.x): ask for thought summaries so the
|
|
207
|
+
// harness can emit a liveness pulse — without this, Gemini 3 burns its
|
|
208
|
+
// output budget on invisible reasoning and the chat looks hung. Gated by
|
|
209
|
+
// model id; unknown/dynamic ids skip it (older models reject the field).
|
|
210
|
+
// The rolling aliases (gemini-flash-latest / gemini-flash-lite-latest)
|
|
211
|
+
// resolve to 2.5+/3.x thinking models too (review PI-D-2).
|
|
212
|
+
if (/gemini-(2\.5|[3-9]|flash(-lite)?-latest)/i.test(req.modelId)) {
|
|
213
|
+
body.generationConfig.thinkingConfig = { includeThoughts: true };
|
|
214
|
+
}
|
|
206
215
|
if (req.systemPrompt?.trim()) {
|
|
207
216
|
body.systemInstruction = { parts: [{ text: req.systemPrompt }] };
|
|
208
217
|
}
|
|
209
218
|
if (req.tools && req.tools.length > 0) {
|
|
210
219
|
body.tools = toGeminiTools(req.tools);
|
|
220
|
+
// Round-cap wrap-up: forbid further function calls; tools stay declared so
|
|
221
|
+
// functionCall/functionResponse parts in history remain valid.
|
|
222
|
+
if (req.toolChoice === 'none') {
|
|
223
|
+
body.toolConfig = { functionCallingConfig: { mode: 'NONE' } };
|
|
224
|
+
}
|
|
211
225
|
}
|
|
212
226
|
|
|
213
227
|
let res: Response;
|
|
@@ -263,7 +277,11 @@ export async function* streamGoogle(req: PiStreamRequest): AsyncIterable<PiStrea
|
|
|
263
277
|
for (const part of parts) {
|
|
264
278
|
// Thinking models emit reasoning parts with `thought: true`. They
|
|
265
279
|
// shouldn't be shown to the user as part of the visible answer.
|
|
266
|
-
if (part?.thought) {
|
|
280
|
+
if (part?.thought) {
|
|
281
|
+
thoughtPartCount++;
|
|
282
|
+
if (thoughtPartCount === 1) yield { type: 'thinking' };
|
|
283
|
+
continue;
|
|
284
|
+
}
|
|
267
285
|
if (part?.functionCall && typeof part.functionCall.name === 'string') {
|
|
268
286
|
// Gemini doesn't surface a tool-call id of its own; bake the tool
|
|
269
287
|
// name into the id so the session can echo it back as a
|
|
@@ -18,7 +18,7 @@ import type {
|
|
|
18
18
|
PiStopReason,
|
|
19
19
|
PiUsage,
|
|
20
20
|
} from './types.js';
|
|
21
|
-
import { fetchWithRetry } from './retry.js';
|
|
21
|
+
import { fetchWithRetry, readWithIdleTimeout } from './retry.js';
|
|
22
22
|
import { classifyPiError, classifyPiNetworkError } from './humanize-error.js';
|
|
23
23
|
|
|
24
24
|
/* ── SSE parser (LF or CRLF tolerant, flushes the trailing event) ── */
|
|
@@ -30,7 +30,7 @@ async function* parseSse(res: Response): AsyncIterable<any> {
|
|
|
30
30
|
let buffer = '';
|
|
31
31
|
try {
|
|
32
32
|
while (true) {
|
|
33
|
-
const { value, done } = await reader
|
|
33
|
+
const { value, done } = await readWithIdleTimeout(reader, 'OpenAI-compat');
|
|
34
34
|
if (done) break;
|
|
35
35
|
buffer += decoder.decode(value, { stream: true });
|
|
36
36
|
let idx;
|
|
@@ -203,7 +203,10 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
203
203
|
}
|
|
204
204
|
if (req.tools && req.tools.length > 0) {
|
|
205
205
|
body.tools = toOpenAITools(req.tools);
|
|
206
|
-
|
|
206
|
+
// 'none' = the round-cap wrap-up round: the model must summarize, not
|
|
207
|
+
// start more work. Tools stay declared so histories containing tool calls
|
|
208
|
+
// remain valid.
|
|
209
|
+
body.tool_choice = req.toolChoice === 'none' ? 'none' : 'auto';
|
|
207
210
|
}
|
|
208
211
|
|
|
209
212
|
let res: Response;
|
|
@@ -243,6 +246,7 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
243
246
|
const toolCallsByIndex = new Map<number, PartialToolCall>();
|
|
244
247
|
let chunkCount = 0;
|
|
245
248
|
let firstChunkSummary = '';
|
|
249
|
+
let thinkingEmitted = false;
|
|
246
250
|
|
|
247
251
|
// Vendors disagree on where streamed usage lives: spec says a final
|
|
248
252
|
// choice-less chunk's `usage`, Groq defaults to nesting under `x_groq.usage`,
|
|
@@ -267,6 +271,16 @@ export async function* streamOpenAICompletions(req: PiStreamRequest): AsyncItera
|
|
|
267
271
|
readUsage(choice?.usage);
|
|
268
272
|
const delta = choice.delta || {};
|
|
269
273
|
|
|
274
|
+
// Reasoning models stream hidden thinking under vendor-specific fields
|
|
275
|
+
// (DeepSeek/OpenRouter: reasoning_content; others: reasoning /
|
|
276
|
+
// reasoning_text — upstream pi's field priority). Emit ONE liveness
|
|
277
|
+
// pulse so the chat doesn't look hung; never forward the text itself.
|
|
278
|
+
const reasoningDelta = delta.reasoning_content ?? delta.reasoning ?? delta.reasoning_text;
|
|
279
|
+
if (!thinkingEmitted && typeof reasoningDelta === 'string' && reasoningDelta.length > 0) {
|
|
280
|
+
thinkingEmitted = true;
|
|
281
|
+
yield { type: 'thinking' };
|
|
282
|
+
}
|
|
283
|
+
|
|
270
284
|
if (typeof delta.content === 'string' && delta.content.length > 0) {
|
|
271
285
|
accumulated += delta.content;
|
|
272
286
|
yield { type: 'text_delta', delta: delta.content };
|
|
@@ -56,6 +56,13 @@ export interface PiStreamRequest {
|
|
|
56
56
|
* that 422 on the `stream_options.include_usage` opt-in. Default true.
|
|
57
57
|
*/
|
|
58
58
|
includeStreamUsage?: boolean;
|
|
59
|
+
/**
|
|
60
|
+
* 'none' forbids tool calls for this request (mapped per flavor: OpenAI
|
|
61
|
+
* tool_choice:'none', Anthropic {type:'none'}, Gemini functionCallingConfig
|
|
62
|
+
* mode NONE). Used by the session's round-cap wrap-up round, where the model
|
|
63
|
+
* must summarize instead of starting more work.
|
|
64
|
+
*/
|
|
65
|
+
toolChoice?: 'auto' | 'none';
|
|
59
66
|
/** Optional abort signal so the session can interrupt in-flight requests. */
|
|
60
67
|
signal?: AbortSignal;
|
|
61
68
|
}
|
|
@@ -72,6 +79,10 @@ export type PiErrorKind = 'auth' | 'context-overflow' | 'rate-limit' | 'billing'
|
|
|
72
79
|
export type PiStreamEvent =
|
|
73
80
|
| { type: 'text_delta'; delta: string }
|
|
74
81
|
| { type: 'text_end'; text: string }
|
|
82
|
+
/** Emitted when the model starts (visibly) reasoning — a liveness pulse for
|
|
83
|
+
* thinking models so the chat doesn't look hung. Reasoning TEXT is never
|
|
84
|
+
* forwarded (it would corrupt the streamed-text == response contract). */
|
|
85
|
+
| { type: 'thinking' }
|
|
75
86
|
| { type: 'tool_use'; id: string; name: string; input: any; thoughtSignature?: string }
|
|
76
87
|
| { type: 'done'; stopReason: PiStopReason; usage?: PiUsage }
|
|
77
88
|
| { type: 'error'; error: string; status?: number; kind?: PiErrorKind; retryable?: boolean };
|
|
@@ -44,6 +44,8 @@ export type PiSessionEvent =
|
|
|
44
44
|
| { type: 'turn_started' }
|
|
45
45
|
| { type: 'text_delta'; delta: string }
|
|
46
46
|
| { type: 'text_end'; text: string }
|
|
47
|
+
/** Liveness pulse: the model is reasoning (thinking models) — no text attached. */
|
|
48
|
+
| { type: 'thinking' }
|
|
47
49
|
| { type: 'tool_use'; id: string; name: string; input: any }
|
|
48
50
|
| { type: 'tool_result'; toolUseId: string; name: string; isError?: boolean }
|
|
49
51
|
| {
|
|
@@ -78,6 +80,10 @@ export interface PiSessionAuth {
|
|
|
78
80
|
includeStreamUsage?: boolean;
|
|
79
81
|
/** Model context window from the catalog — reported on turn_complete for the recycler. */
|
|
80
82
|
contextWindow?: number;
|
|
83
|
+
/** False when the catalog says the model is text-only — image blocks are
|
|
84
|
+
* downgraded to placeholders on send so one screenshot can't 400-poison
|
|
85
|
+
* the session (audit C-8). Undefined (dynamic models) ⇒ assume vision. */
|
|
86
|
+
supportsImages?: boolean;
|
|
81
87
|
}
|
|
82
88
|
|
|
83
89
|
export interface PiSessionInit {
|
|
@@ -117,6 +123,51 @@ export interface PiSession {
|
|
|
117
123
|
getMessages(): PiMessage[];
|
|
118
124
|
}
|
|
119
125
|
|
|
126
|
+
/** Transform-on-send for text-only models (audit C-8): image blocks become
|
|
127
|
+
* placeholders in the REQUEST only — the stored history keeps the images, so
|
|
128
|
+
* switching to a vision model later restores them. */
|
|
129
|
+
function downgradeImages(messages: PiMessage[]): PiMessage[] {
|
|
130
|
+
let any = false;
|
|
131
|
+
const out = messages.map((m) => {
|
|
132
|
+
if (!m.content.some((b) => b.type === 'image')) return m;
|
|
133
|
+
any = true;
|
|
134
|
+
return {
|
|
135
|
+
...m,
|
|
136
|
+
content: m.content.map((b): PiContentBlock =>
|
|
137
|
+
b.type === 'image'
|
|
138
|
+
? { type: 'text', text: '[An image was attached here, but the current model cannot view images. Tell the user to switch to a vision-capable model if the image matters.]' }
|
|
139
|
+
: b,
|
|
140
|
+
),
|
|
141
|
+
};
|
|
142
|
+
});
|
|
143
|
+
return any ? out : messages;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
/** Emergency in-turn context relief (audit D2-6): when occupancy crosses the
|
|
147
|
+
* threshold MID-turn (recycling only acts between idle turns), stub out the
|
|
148
|
+
* oldest large tool_result payloads — never user/assistant text, never the
|
|
149
|
+
* protected tail (the current round's results). Cruder than real compaction,
|
|
150
|
+
* but the turn finishes instead of 400ing on the context wall. */
|
|
151
|
+
function trimOldToolResults(messages: PiMessage[], charsToFree: number, protectTail: number): number {
|
|
152
|
+
let freed = 0;
|
|
153
|
+
const limit = Math.max(0, messages.length - protectTail);
|
|
154
|
+
for (let i = 0; i < limit && freed < charsToFree; i++) {
|
|
155
|
+
const m = messages[i];
|
|
156
|
+
if (m.role !== 'user') continue;
|
|
157
|
+
for (const b of m.content) {
|
|
158
|
+
if (b.type === 'tool_result' && typeof b.content === 'string' && b.content.length > 2048) {
|
|
159
|
+
freed += b.content.length;
|
|
160
|
+
b.content = `[tool output trimmed to fit the context window — ~${Math.round(b.content.length / 1024)} KB removed]`;
|
|
161
|
+
if (freed >= charsToFree) break;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return freed;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const ROUND_CAP_NOTICE =
|
|
169
|
+
'[System: the tool budget for this turn is exhausted. Stop working now. In 2-3 sentences, summarize what you completed, what remains, and the exact next step.]';
|
|
170
|
+
|
|
120
171
|
const FILE_TOOL_NAMES = new Set(['Write', 'Edit', 'MultiEdit', 'NotebookEdit', 'write', 'edit', 'multiEdit', 'notebookEdit']);
|
|
121
172
|
const MAX_TOOL_ROUNDS = 25;
|
|
122
173
|
/** Transparent re-runs of a failed round that produced nothing (audit D6-1). */
|
|
@@ -142,7 +193,7 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
142
193
|
retryable?: boolean;
|
|
143
194
|
}
|
|
144
195
|
|
|
145
|
-
async function runOneRound(emitSeparatorFirst: boolean): Promise<RoundResult> {
|
|
196
|
+
async function runOneRound(emitSeparatorFirst: boolean, opts?: { wrapUp?: boolean }): Promise<RoundResult> {
|
|
146
197
|
const result: RoundResult = { text: '', toolUses: [], errored: false };
|
|
147
198
|
let firstDelta = true;
|
|
148
199
|
try {
|
|
@@ -153,8 +204,9 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
153
204
|
baseUrl: auth.baseUrl,
|
|
154
205
|
apiKey: auth.apiKey,
|
|
155
206
|
systemPrompt: init.systemPrompt,
|
|
156
|
-
messages,
|
|
207
|
+
messages: auth.supportsImages === false ? downgradeImages(messages) : messages,
|
|
157
208
|
tools: init.tools,
|
|
209
|
+
toolChoice: opts?.wrapUp ? 'none' : undefined,
|
|
158
210
|
maxOutputTokens: auth.maxOutputTokens,
|
|
159
211
|
maxTokensField: auth.maxTokensField,
|
|
160
212
|
includeStreamUsage: auth.includeStreamUsage,
|
|
@@ -182,6 +234,9 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
182
234
|
// at the end of the whole turn so the UI doesn't show half-answers.
|
|
183
235
|
result.text = evt.text;
|
|
184
236
|
break;
|
|
237
|
+
case 'thinking':
|
|
238
|
+
init.onEvent({ type: 'thinking' });
|
|
239
|
+
break;
|
|
185
240
|
case 'tool_use':
|
|
186
241
|
result.toolUses.push({
|
|
187
242
|
id: evt.id,
|
|
@@ -189,7 +244,12 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
189
244
|
input: evt.input,
|
|
190
245
|
thoughtSignature: evt.thoughtSignature,
|
|
191
246
|
});
|
|
192
|
-
|
|
247
|
+
// Wrap-up rounds forbid tools (toolChoice 'none'); if a vendor
|
|
248
|
+
// ignores that, swallow the phantom call silently — it is never
|
|
249
|
+
// executed or persisted.
|
|
250
|
+
if (!opts?.wrapUp) {
|
|
251
|
+
init.onEvent({ type: 'tool_use', id: evt.id, name: evt.name, input: evt.input });
|
|
252
|
+
}
|
|
193
253
|
break;
|
|
194
254
|
case 'error':
|
|
195
255
|
result.errored = true;
|
|
@@ -331,10 +391,63 @@ export function createPiSession(init: PiSessionInit): PiSession {
|
|
|
331
391
|
messages.push({ role: 'user', content: toolResultBlocks });
|
|
332
392
|
}
|
|
333
393
|
|
|
394
|
+
// Emergency in-turn context relief (audit D2-6): recycling only acts
|
|
395
|
+
// between idle turns, so a single heavy tool loop could cross the wall
|
|
396
|
+
// mid-turn. Above 85% occupancy, stub the oldest large tool outputs to
|
|
397
|
+
// bring the next request back toward 70%.
|
|
398
|
+
if (lastContextWindow && lastUsage) {
|
|
399
|
+
const occupancy =
|
|
400
|
+
(lastUsage.inputTokens || 0) + (lastUsage.cacheReadTokens || 0) + (lastUsage.cacheCreationTokens || 0);
|
|
401
|
+
if (occupancy > 0.85 * lastContextWindow) {
|
|
402
|
+
const charsToFree = (occupancy - Math.floor(0.7 * lastContextWindow)) * 4; // ~4 chars/token
|
|
403
|
+
const freed = trimOldToolResults(messages, charsToFree, 4);
|
|
404
|
+
if (freed > 0) {
|
|
405
|
+
log.info(`[pi/session] context at ${occupancy}/${lastContextWindow} tok mid-turn — trimmed ~${Math.round(freed / 1024)} KB of old tool output`);
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
334
410
|
// No tool calls ⇒ the model is done with this turn.
|
|
335
411
|
if (toolUses.length === 0) { roundCapHit = false; break; }
|
|
336
412
|
}
|
|
337
413
|
|
|
414
|
+
// Round-cap wrap-up (audit D5-8): the budget ran out with the model still
|
|
415
|
+
// mid-task. Run ONE final no-tools round so the turn ends with an honest
|
|
416
|
+
// status summary instead of silent truncation. roundCapHit stays true on
|
|
417
|
+
// turn_complete — consumers still know the work is incomplete.
|
|
418
|
+
if (roundCapHit && !turnErrored && !init.abortController.signal.aborted) {
|
|
419
|
+
log.info(`[pi/session] tool-round budget (${maxRounds}) exhausted — running a no-tools wrap-up round`);
|
|
420
|
+
messages.push({ role: 'user', content: [{ type: 'text', text: ROUND_CAP_NOTICE }] });
|
|
421
|
+
const needsSeparator = accumulatedText.length > 0 && !accumulatedText.endsWith('\n');
|
|
422
|
+
const res = await runOneRound(needsSeparator, { wrapUp: true });
|
|
423
|
+
if (res.text) {
|
|
424
|
+
if (needsSeparator) accumulatedText += '\n\n';
|
|
425
|
+
accumulatedText += res.text;
|
|
426
|
+
messages.push({ role: 'assistant', content: [{ type: 'text', text: res.text }] });
|
|
427
|
+
} else {
|
|
428
|
+
// The notice was never answered — pop it so the NEXT turn doesn't
|
|
429
|
+
// open under a stale "stop working now" instruction (review PI-D-1).
|
|
430
|
+
const last = messages[messages.length - 1];
|
|
431
|
+
if (last?.role === 'user' && last.content.length === 1 &&
|
|
432
|
+
last.content[0].type === 'text' && last.content[0].text === ROUND_CAP_NOTICE) {
|
|
433
|
+
messages.pop();
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
// Fatal wrap-up failures (dead key / context wall) must still tear the
|
|
437
|
+
// session down, and a cap-hit turn with NO text at all must not end in
|
|
438
|
+
// total silence — claude surfaces error_max_turns and pi's one-shot
|
|
439
|
+
// paths guard this state too (PI-C-2). Set the turn-error fields so the
|
|
440
|
+
// standard emission below handles both (review PI-D-1).
|
|
441
|
+
if (res.errored && (res.errorKind === 'auth' || res.errorKind === 'context-overflow')) {
|
|
442
|
+
turnErrored = true;
|
|
443
|
+
turnErrorMsg = res.errorMsg;
|
|
444
|
+
turnErrorKind = res.errorKind;
|
|
445
|
+
} else if (!accumulatedText) {
|
|
446
|
+
turnErrored = true;
|
|
447
|
+
turnErrorMsg = `I hit my tool budget for this turn (${maxRounds} rounds) before finishing — say "continue" and I'll pick up where I left off.`;
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
338
451
|
// Turn-end emission order (audit D6-2, mirrors claude.ts:394-401):
|
|
339
452
|
// 1. text_end whenever ANY text streamed — even on errored turns, so the
|
|
340
453
|
// partial the user watched is committed, persisted, and consumes its
|
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
* - google-gemini → POST {baseUrl}/models/{modelId}:generateContent
|
|
12
12
|
*/
|
|
13
13
|
import { getPiSubProvider, type PiApiFlavor } from './sub-providers.js';
|
|
14
|
+
import { streamProvider } from './providers/stream.js';
|
|
15
|
+
import { toolDefsForProvider } from './tools/registry.js';
|
|
14
16
|
|
|
15
17
|
export interface PiTestCompletionInput {
|
|
16
18
|
subProvider: string;
|
|
@@ -88,6 +90,60 @@ export async function runPiTestCompletion(input: PiTestCompletionInput): Promise
|
|
|
88
90
|
}
|
|
89
91
|
}
|
|
90
92
|
|
|
93
|
+
/**
|
|
94
|
+
* Streaming + tools probe (audit C-4). The non-streaming, tool-less test above
|
|
95
|
+
* validates a contract no real turn uses — free-form model ids (Ollama, LM
|
|
96
|
+
* Studio, custom, OpenRouter) could pass it and then fail the first actual
|
|
97
|
+
* message, which streams SSE with the full tool schema attached. This probe
|
|
98
|
+
* exercises the REAL wire shape in one cheap request: success = any
|
|
99
|
+
* text/tool-call event arrives before an error does.
|
|
100
|
+
*/
|
|
101
|
+
export async function runPiStreamProbe(input: PiTestCompletionInput): Promise<PiTestCompletionResult> {
|
|
102
|
+
const provider = getPiSubProvider(input.subProvider);
|
|
103
|
+
if (!provider) return { ok: false, error: `Unknown sub-provider: ${input.subProvider}` };
|
|
104
|
+
const baseUrl = pickBaseUrl(input);
|
|
105
|
+
if (!baseUrl) return { ok: false, error: 'Missing base URL' };
|
|
106
|
+
const modelId = pickModelId(input);
|
|
107
|
+
if (!modelId) return { ok: false, error: 'Missing model ID' };
|
|
108
|
+
|
|
109
|
+
const ctl = new AbortController();
|
|
110
|
+
const timer = setTimeout(() => ctl.abort(), REQUEST_TIMEOUT_MS);
|
|
111
|
+
try {
|
|
112
|
+
const stream = streamProvider(provider.flavor, {
|
|
113
|
+
modelId,
|
|
114
|
+
baseUrl,
|
|
115
|
+
apiKey: input.apiKey?.trim() || '',
|
|
116
|
+
systemPrompt: 'You are a connectivity probe. Reply with the single word OK.',
|
|
117
|
+
messages: [{ role: 'user', content: [{ type: 'text', text: input.prompt || 'Reply with the single word OK.' }] }],
|
|
118
|
+
// withTask: the live conversation's schema is the superset every real
|
|
119
|
+
// turn sends — probe with the same shape (review PI-D-4).
|
|
120
|
+
tools: toolDefsForProvider({ withTask: true }),
|
|
121
|
+
// Generous: reasoning models burn output budget on hidden thinking first.
|
|
122
|
+
maxOutputTokens: 2048,
|
|
123
|
+
maxTokensField: provider.maxTokensField,
|
|
124
|
+
includeStreamUsage: provider.noStreamUsage ? false : undefined,
|
|
125
|
+
signal: ctl.signal,
|
|
126
|
+
});
|
|
127
|
+
for await (const evt of stream) {
|
|
128
|
+
if (evt.type === 'text_delta' || evt.type === 'tool_use') {
|
|
129
|
+
return { ok: true, text: 'stream OK', modelId, subProvider: provider.id };
|
|
130
|
+
}
|
|
131
|
+
if (evt.type === 'error') {
|
|
132
|
+
return { ok: false, error: evt.error, modelId, subProvider: provider.id };
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
if (ctl.signal.aborted) {
|
|
136
|
+
return { ok: false, error: `Stream probe timed out after ${REQUEST_TIMEOUT_MS / 1000}s.`, modelId, subProvider: provider.id };
|
|
137
|
+
}
|
|
138
|
+
return { ok: false, error: 'The stream ended without producing any output.', modelId, subProvider: provider.id };
|
|
139
|
+
} catch (err: any) {
|
|
140
|
+
const msg = err?.name === 'AbortError' ? `Stream probe timed out after ${REQUEST_TIMEOUT_MS / 1000}s.` : err?.message || String(err);
|
|
141
|
+
return { ok: false, error: msg, modelId, subProvider: provider.id };
|
|
142
|
+
} finally {
|
|
143
|
+
clearTimeout(timer);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
91
147
|
interface DispatchArgs {
|
|
92
148
|
baseUrl: string;
|
|
93
149
|
modelId: string;
|