bloby-bot 0.70.12 → 0.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/bin/cli.js +234 -48
  2. package/dist-bloby/assets/{bloby-DSNB0g4w.js → bloby-es6cZJzs.js} +6 -6
  3. package/dist-bloby/assets/globals-DBqwNiJV.css +2 -0
  4. package/dist-bloby/assets/{globals-B3cTbITX.js → globals-DN3F0CQE.js} +1 -1
  5. package/dist-bloby/assets/{highlighted-body-OFNGDK62-BLforpkr.js → highlighted-body-OFNGDK62-8PiOHw9p.js} +1 -1
  6. package/dist-bloby/assets/mermaid-GHXKKRXX-BJWX8urU.js +1 -0
  7. package/dist-bloby/assets/{onboard-Dn2Ws_G2.js → onboard-BKgy17OU.js} +1 -1
  8. package/dist-bloby/bloby.html +3 -3
  9. package/dist-bloby/onboard.html +3 -3
  10. package/package.json +3 -4
  11. package/scripts/install +156 -41
  12. package/scripts/install.ps1 +146 -29
  13. package/scripts/install.sh +156 -41
  14. package/shared/config.ts +37 -2
  15. package/shared/relay.ts +3 -1
  16. package/supervisor/channels/manager.ts +84 -44
  17. package/supervisor/channels/telegram.ts +57 -16
  18. package/supervisor/channels/types.ts +4 -1
  19. package/supervisor/channels/whatsapp.ts +57 -10
  20. package/supervisor/chat/OnboardWizard.tsx +0 -15
  21. package/supervisor/chat/src/components/Chat/AudioBubble.tsx +1 -1
  22. package/supervisor/chat/src/components/Chat/AuthedImage.tsx +16 -3
  23. package/supervisor/chat/src/components/Chat/BlobyImageCard.tsx +2 -2
  24. package/supervisor/chat/src/components/Chat/ImageLightbox.tsx +25 -8
  25. package/supervisor/chat/src/components/Chat/InputBar.tsx +62 -7
  26. package/supervisor/chat/src/components/Chat/MessageBubble.tsx +37 -18
  27. package/supervisor/chat/src/components/Chat/MessageList.tsx +3 -3
  28. package/supervisor/chat/src/hooks/useChat.ts +52 -0
  29. package/supervisor/chat/src/lib/authedFile.ts +24 -12
  30. package/supervisor/file-saver.ts +92 -19
  31. package/supervisor/harnesses/attachment-policy.ts +111 -0
  32. package/supervisor/harnesses/claude.ts +62 -15
  33. package/supervisor/harnesses/codex.ts +69 -43
  34. package/supervisor/harnesses/pi/index.ts +367 -112
  35. package/supervisor/harnesses/pi/providers/humanize-error.ts +27 -2
  36. package/supervisor/harnesses/pi/providers/retry.ts +31 -0
  37. package/supervisor/harnesses/pi/providers/stream-anthropic.ts +31 -3
  38. package/supervisor/harnesses/pi/providers/stream-google.ts +26 -3
  39. package/supervisor/harnesses/pi/providers/stream-openai-completions.ts +32 -9
  40. package/supervisor/harnesses/pi/providers/types.ts +29 -1
  41. package/supervisor/harnesses/pi/session.ts +143 -3
  42. package/supervisor/harnesses/pi/test-completion.ts +56 -0
  43. package/supervisor/harnesses/pi/tools/bash.ts +198 -22
  44. package/supervisor/harnesses/pi/tools/glob.ts +79 -0
  45. package/supervisor/harnesses/pi/tools/grep.ts +0 -0
  46. package/supervisor/harnesses/pi/tools/registry.ts +18 -6
  47. package/supervisor/harnesses/pi/tools/todo-write.ts +45 -0
  48. package/supervisor/harnesses/pi/tools/web-fetch.ts +129 -0
  49. package/supervisor/index.ts +93 -18
  50. package/supervisor/widget.js +19 -5
  51. package/worker/db.ts +2 -0
  52. package/worker/index.ts +18 -1
  53. package/worker/prompts/bloby-system-prompt-codex.txt +1 -1
  54. package/worker/prompts/bloby-system-prompt-pi.txt +6 -24
  55. package/worker/prompts/bloby-system-prompt.txt +1 -1
  56. package/workspace/client/src/components/Dashboard/DashboardPage.tsx +4 -117
  57. package/workspace/client/src/components/Dashboard/deleteme_placeholders.tsx +194 -0
  58. package/workspace/client/src/components/Layout/Sidebar.tsx +52 -30
  59. package/workspace/client/src/components/deleteme_onboarding/WorkspaceTour.tsx +25 -15
  60. package/workspace/client/src/components/deleteme_onboarding/tour-theme.css +24 -0
  61. package/workspace/skills/mac/SKILL.md +13 -4
  62. package/dist-bloby/assets/globals-DyeW509Y.css +0 -2
  63. package/dist-bloby/assets/mermaid-GHXKKRXX-C1H_fSCU.js +0 -1
  64. package/supervisor/public/headphones_spritesheet.webp +0 -0
  65. package/supervisor/public/spritesheet.webp +0 -0
@@ -29,12 +29,20 @@ import type {
29
29
  export type { RecentMessage, AgentAttachment };
30
30
 
31
31
  import { buildSkillsIndex } from '../skills.js';
32
+ import {
33
+ routeAttachment,
34
+ buildSavedFilesNote,
35
+ normalizeImageMediaType,
36
+ approxBase64Bytes,
37
+ MAX_INLINE_IMAGE_BYTES,
38
+ INLINE_TEXT_PER_FILE_CHARS,
39
+ INLINE_TEXT_TOTAL_CHARS,
40
+ } from '../attachment-policy.js';
32
41
  import { createAsyncQueue, type AsyncQueue } from './async-queue.js';
33
42
  import { createPiSession, type PiSessionEvent, type PiSessionAuth } from './session.js';
34
- import { getPiSubProvider, getCatalogModel } from './sub-providers.js';
43
+ import { getPiSubProvider, getCatalogModel, type PiApiFlavor } from './sub-providers.js';
35
44
  import { readPiAuth } from './auth-storage.js';
36
- import { streamProvider } from './providers/stream.js';
37
- import type { PiMessage } from './providers/types.js';
45
+ import type { PiMessage, PiContentBlock } from './providers/types.js';
38
46
  import { toolDefsForProvider } from './tools/registry.js';
39
47
  import type { PiTaskHost } from './tools/types.js';
40
48
 
@@ -264,6 +272,11 @@ function resolveAuth(): { ok: true; auth: PiSessionAuth } | { ok: false; error:
264
272
  maxTokensField: sub.maxTokensField,
265
273
  includeStreamUsage: sub.noStreamUsage ? false : undefined,
266
274
  contextWindow,
275
+ // Text-only models 400 on image blocks AND the stuck image re-fails
276
+ // every later message (audit C-8) — the session downgrades images to
277
+ // placeholders when the catalog says no vision. Unknown (dynamic
278
+ // sub-providers) ⇒ undefined ⇒ assume vision.
279
+ supportsImages: catalog?.input ? catalog.input.includes('image') : undefined,
267
280
  },
268
281
  };
269
282
  }
@@ -349,7 +362,7 @@ function createTaskHost(conv: LiveConversation, getAuth: () => PiSessionAuth): P
349
362
  // Honor the agent config's tool restrictions (claude applies these via
350
363
  // the SDK's tools/disallowedTools options — e.g. a future researcher
351
364
  // agent with disallowedTools: ['Write','Edit']).
352
- let childTools = toolDefsForProvider({ forSubagent: true });
365
+ let childTools = toolDefsForProvider();
353
366
  if (Array.isArray(cfg.tools) && cfg.tools.length > 0) {
354
367
  childTools = childTools.filter((t) => cfg.tools.includes(t.name));
355
368
  }
@@ -491,27 +504,82 @@ function recentToPiMessages(messages: RecentMessage[] | undefined): PiMessage[]
491
504
  }));
492
505
  }
493
506
 
494
- /** Wrap a raw user input into a PiMessage with text + optional image blocks. */
495
- function buildUserMessage(text: string, attachments?: AgentAttachment[], savedFiles?: SavedFile[]): PiMessage {
496
- const content: PiMessage['content'] = [];
507
+ /** Native PDF document blocks reach only the flavors that render them the
508
+ * Anthropic Messages API and Gemini both ingest application/pdf inline
509
+ * (base64 document source / inlineData). openai-completions has no document
510
+ * type, so a PDF there falls back to the saved-files disk pointer. Matches the
511
+ * shared attachment-policy routing rule. */
512
+ function canNativeDocumentForFlavor(flavor: PiApiFlavor): boolean {
513
+ return flavor === 'anthropic-messages' || flavor === 'google-gemini';
514
+ }
515
+
516
+ /** Build a PiContentBlock[] from raw text + attachments, MEDIA-FIRST then the
517
+ * prompt text last (parity with claude.ts and the other pi providers). Routing
518
+ * is delegated to the shared attachment-policy so all three harnesses ingest
519
+ * identically; canNativeDocument is the active provider's PDF capability. */
520
+ function buildAttachmentBlocks(
521
+ text: string,
522
+ canNativeDocument: boolean,
523
+ attachments?: AgentAttachment[],
524
+ savedFiles?: SavedFile[],
525
+ ): PiContentBlock[] {
526
+ const content: PiContentBlock[] = [];
497
527
  if (attachments?.length) {
528
+ // Running budget so the cross-file inline-text total never exceeds the cap.
529
+ let inlineTextBudget = INLINE_TEXT_TOTAL_CHARS;
498
530
  for (const att of attachments) {
499
- if (att.type === 'image') {
500
- content.push({ type: 'image', mediaType: att.mediaType, data: att.data });
501
- } else {
502
- // Documents aren't directly supported across all sub-providers yet.
503
- // Surface their existence in the text body instead.
504
- content.push({ type: 'text', text: `[Attached document: ${att.name} (${att.mediaType})]` });
531
+ switch (routeAttachment(att, { canNativeDocument })) {
532
+ case 'image': {
533
+ // Drop the inline copy when it would bloat every stateless resend —
534
+ // the file is on disk and buildSavedFilesNote points the tools at it.
535
+ if (approxBase64Bytes(att.data) > MAX_INLINE_IMAGE_BYTES) break;
536
+ content.push({ type: 'image', mediaType: normalizeImageMediaType(att.mediaType), data: att.data });
537
+ break;
538
+ }
539
+ case 'native-document': {
540
+ // PDF on a flavor that renders it natively (anthropic / gemini).
541
+ content.push({ type: 'document', mediaType: 'application/pdf', data: att.data, name: att.name });
542
+ break;
543
+ }
544
+ case 'inline-text': {
545
+ if (inlineTextBudget <= 0) break;
546
+ let decoded = '';
547
+ try { decoded = Buffer.from(att.data, 'base64').toString('utf-8'); }
548
+ catch { break; } // undecodable → rely on the saved-files note
549
+ const cap = Math.min(INLINE_TEXT_PER_FILE_CHARS, inlineTextBudget);
550
+ const slice = decoded.slice(0, cap);
551
+ inlineTextBudget -= slice.length;
552
+ content.push({ type: 'text', text: `--- ${att.name} ---\n${slice}` });
553
+ break;
554
+ }
555
+ case 'reference-only':
556
+ default:
557
+ // Binary we can't inline (docx/xlsx/zip/…), a PDF on a flavor without
558
+ // native documents, or an unexpected route — no provider block; the
559
+ // saved-files note below carries the disk pointer. Never emit a
560
+ // malformed block (defensive default, review PI-E).
561
+ break;
505
562
  }
506
563
  }
507
564
  }
565
+
508
566
  let prompt = text || '(attached files)';
509
567
  if (savedFiles?.length) {
510
- const lines = savedFiles.map((f) => `- ${f.name} -> ${f.relPath}`);
511
- prompt += `\n\n[Attached files saved to disk]\n${lines.join('\n')}\nYou can read or reference these files using the paths above (relative to your cwd).`;
568
+ const note = buildSavedFilesNote(savedFiles);
569
+ if (note) prompt += `\n\n${note}`;
512
570
  }
513
571
  content.push({ type: 'text', text: prompt });
514
- return { role: 'user', content };
572
+ return content;
573
+ }
574
+
575
+ /** Wrap a raw user input into a PiMessage with text + optional media blocks. */
576
+ function buildUserMessage(
577
+ text: string,
578
+ canNativeDocument: boolean,
579
+ attachments?: AgentAttachment[],
580
+ savedFiles?: SavedFile[],
581
+ ): PiMessage {
582
+ return { role: 'user', content: buildAttachmentBlocks(text, canNativeDocument, attachments, savedFiles) };
515
583
  }
516
584
 
517
585
  // ── Live Conversation API ──────────────────────────────────────────────────
@@ -582,7 +650,7 @@ export async function startConversation(
582
650
  const session = createPiSession({
583
651
  getAuth,
584
652
  systemPrompt,
585
- tools: toolDefsForProvider(),
653
+ tools: toolDefsForProvider({ withTask: true }),
586
654
  cwd: WORKSPACE_DIR,
587
655
  abortController,
588
656
  taskHost: createTaskHost(conv, getAuth),
@@ -647,8 +715,17 @@ function translateAndEmit(conv: LiveConversation, evt: PiSessionEvent) {
647
715
  conv.onMessage('bot:tool', { conversationId: conv.id, name: toolName, input: evt.input, ...syn });
648
716
  break;
649
717
  }
718
+ case 'thinking':
719
+ // Reasoning-model liveness pulse (house standard, codex M1 analog) —
720
+ // the UI dedups repeated name+running entries, channels get a chunk
721
+ // flush opportunity. Reasoning TEXT is never forwarded.
722
+ conv.onMessage('bot:tool', { conversationId: conv.id, name: 'thinking', status: 'running', ...syn });
723
+ break;
650
724
  case 'tool_result':
651
- // Not surfaced yet (Phase D: translate to a bot:tool progress pulse).
725
+ // Progress pulse between tool rounds (audit D1-7): claude punctuates
726
+ // long tasks with tool_progress events; this is pi's equivalent —
727
+ // commits dashboard bubbles and flushes channel chunks mid-task.
728
+ conv.onMessage('bot:tool', { conversationId: conv.id, name: evt.name, status: 'running', ...syn });
652
729
  break;
653
730
  case 'turn_complete': {
654
731
  conv.busy = false;
@@ -723,7 +800,13 @@ export function pushMessage(
723
800
  conv.busy = true;
724
801
  conv.pendingCount += 1;
725
802
  conv.turnOrigins.push('user');
726
- conv.inputQueue.push(buildUserMessage(content, attachments, savedFiles));
803
+ // Resolve the active flavor at push time (the session re-resolves auth every
804
+ // round, so a wizard provider switch mid-session is honored). Unreadable auth
805
+ // ⇒ no native documents — the conservative route sends a PDF to the disk
806
+ // pointer rather than emitting a block the provider can't render.
807
+ const resolved = resolveAuth();
808
+ const canNativeDocument = resolved.ok ? canNativeDocumentForFlavor(resolved.auth.flavor) : false;
809
+ conv.inputQueue.push(buildUserMessage(content, canNativeDocument, attachments, savedFiles));
727
810
  conv.onMessage('bot:typing', { conversationId });
728
811
  return true;
729
812
  }
@@ -795,9 +878,20 @@ export function anyOneShotActive(): boolean {
795
878
  }
796
879
 
797
880
  /**
798
- * One-shot text query — used by customer WhatsApp + scheduler. Uses the
799
- * provider stream directly (no async queue), drains it, emits the same
800
- * bloby events the live path does.
881
+ * One-shot agentic query — used by customer WhatsApp + scheduler (pulse/cron).
882
+ *
883
+ * Phase C (audit D5-1/D3-1): runs the SAME tool loop as the live path — a
884
+ * single-message `createPiSession` — so pulse/cron runs can actually edit
885
+ * files, run Bash, and read skills, and the tool-advertising system prompt is
886
+ * finally true (a tool-less request under that prompt made Gemini emit
887
+ * MALFORMED_FUNCTION_CALL — PI-HARNESS.md gotcha #3). No task host: background
888
+ * sub-agents stay a live-conversation feature (claude parity), so the Task def
889
+ * is excluded from the tool list and a hallucinated call fails gracefully.
890
+ *
891
+ * Guarantees preserved: finally-emitted bot:done, 5-min non-resetting
892
+ * watchdog, activeQueries registration AFTER the awaited prompt build
893
+ * (leak-ordering, claude.ts), supportPrompt bypasses the owner prompt +
894
+ * skills index entirely.
801
895
  */
802
896
  export async function startBlobyAgentQuery(
803
897
  conversationId: string,
@@ -809,7 +903,7 @@ export async function startBlobyAgentQuery(
809
903
  names?: { botName: string; humanName: string },
810
904
  recentMessages?: RecentMessage[],
811
905
  supportPrompt?: string,
812
- _maxTurns?: number,
906
+ maxTurns?: number,
813
907
  ): Promise<void> {
814
908
  const resolved = resolveAuth();
815
909
  if (!resolved.ok) {
@@ -820,7 +914,6 @@ export async function startBlobyAgentQuery(
820
914
  onMessage('bot:done', { conversationId, usedFileTools: false });
821
915
  return;
822
916
  }
823
- const auth = resolved.auth;
824
917
 
825
918
  // Build the prompt BEFORE registering in activeQueries / arming the watchdog
826
919
  // (claude.ts ordering): if anything in here ever rejected after registration,
@@ -830,15 +923,21 @@ export async function startBlobyAgentQuery(
830
923
  if (supportPrompt) {
831
924
  systemPrompt = supportPrompt;
832
925
  } else {
833
- systemPrompt = await buildSystemPrompt(names, recentMessages);
926
+ // History rides ONLY as structured messages (initialMessages below).
927
+ // Passing it here too duplicated every prior turn into the system prompt
928
+ // (audit D3-6).
929
+ systemPrompt = await buildSystemPrompt(names, undefined);
930
+ // The base prompt routes heavy coding to the Agent tool, which only LIVE
931
+ // conversations have (one-shots have no task host) — keep the model
932
+ // honest so it doesn't chase a tool that isn't declared (review PI-C-4).
933
+ systemPrompt +=
934
+ '\n\n---\n# One-shot run\nThis is a scheduled/one-shot run: the Agent tool is NOT available here. ' +
935
+ 'Do any heavy work yourself, directly with Read, Write, Edit, and Bash.';
834
936
  }
835
937
 
836
- const messages: PiMessage[] = recentToPiMessages(recentMessages);
837
- messages.push(buildUserMessage(prompt, attachments, savedFiles));
838
-
839
938
  const abortController = new AbortController();
840
939
  activeQueries.set(conversationId, abortController);
841
- // Hard watchdog — a hung provider stream would otherwise pin this query forever (finally never
940
+ // Hard watchdog — a hung turn would otherwise pin this query forever (finally never
842
941
  // runs, bot:done never fires). Abort after 5 min; cleared in the finally on normal completion.
843
942
  const watchdog = setTimeout(() => {
844
943
  log.warn(`[pi/bloby-agent] one-shot timed out (5m) — aborting conv=${conversationId}`);
@@ -847,76 +946,107 @@ export async function startBlobyAgentQuery(
847
946
 
848
947
  onMessage('bot:typing', { conversationId });
849
948
 
850
- let accumulated = '';
949
+ let usedFileTools = false;
950
+ // Track tool names LIVE (not only via turn_complete): an aborted run never
951
+ // emits turn_complete, and files written in earlier rounds must still flag
952
+ // usedFileTools on bot:done or the backend serves stale code
953
+ // (review PI-C-1; mirrors claude.ts:723-760 and runAgentQuery below).
851
954
  const usedTools = new Set<string>();
852
- // Errors are stashed, not emitted inline — at the end, partial text wins
853
- // over the error bubble (audit D3-5/D6-2, claude.ts:730-737 precedence).
854
- let errorMsg: string | null = null;
955
+ let sawResponse = false;
956
+ let capHit = false;
855
957
  const batcher = createTokenBatcher((text) => onMessage('bot:token', { conversationId, token: text }));
856
958
 
959
+ // Re-resolve auth per round, same as the live path — a key/model fix in the
960
+ // wizard applies to the next round of an in-flight pulse run too.
961
+ let currentAuth: PiSessionAuth = resolved.auth;
962
+ const getAuth = (): PiSessionAuth => {
963
+ const fresh = resolveAuth();
964
+ if (fresh.ok) currentAuth = fresh.auth;
965
+ return currentAuth;
966
+ };
967
+
857
968
  try {
858
- const stream = streamProvider(auth.flavor, {
859
- modelId: auth.modelId,
860
- baseUrl: auth.baseUrl,
861
- apiKey: auth.apiKey,
969
+ const session = createPiSession({
970
+ getAuth,
862
971
  systemPrompt,
863
- messages,
864
- maxOutputTokens: auth.maxOutputTokens,
865
- maxTokensField: auth.maxTokensField,
866
- includeStreamUsage: auth.includeStreamUsage,
867
- signal: abortController.signal,
972
+ initialMessages: recentToPiMessages(recentMessages),
973
+ tools: toolDefsForProvider(), // no Task — one-shots have no task host
974
+ cwd: WORKSPACE_DIR,
975
+ abortController,
976
+ maxToolRounds: maxTurns ?? 50, // claude one-shot default (claude.ts:677)
977
+ onEvent: (evt: PiSessionEvent) => {
978
+ switch (evt.type) {
979
+ case 'text_delta':
980
+ batcher.add(evt.delta);
981
+ break;
982
+ case 'text_end':
983
+ // Session precedence (D6-2): emitted even on errored turns when
984
+ // partial text streamed — the partial reaches the customer/pulse.
985
+ batcher.flush();
986
+ sawResponse = true;
987
+ onMessage('bot:response', { conversationId, content: evt.text });
988
+ break;
989
+ case 'tool_use': {
990
+ batcher.flush();
991
+ usedTools.add(evt.name);
992
+ const toolName = evt.name === 'Agent' || evt.name === 'agent' ? 'Task' : evt.name;
993
+ onMessage('bot:tool', { conversationId, name: toolName, input: evt.input });
994
+ break;
995
+ }
996
+ case 'thinking':
997
+ batcher.flush();
998
+ onMessage('bot:tool', { conversationId, name: 'thinking', status: 'running' });
999
+ break;
1000
+ case 'tool_result':
1001
+ batcher.flush();
1002
+ onMessage('bot:tool', { conversationId, name: evt.name, status: 'running' });
1003
+ break;
1004
+ case 'error':
1005
+ // Fires only when the turn produced no text, or fatally (D6-2).
1006
+ batcher.flush();
1007
+ sawResponse = true; // the caller got a terminal signal for this turn
1008
+ onMessage('bot:error', { conversationId, error: evt.error });
1009
+ break;
1010
+ case 'turn_complete':
1011
+ usedFileTools = usedFileTools || evt.usedFileTools;
1012
+ if (evt.roundCapHit) capHit = true;
1013
+ break;
1014
+ }
1015
+ },
868
1016
  });
869
1017
 
870
- for await (const evt of stream) {
871
- if (abortController.signal.aborted) break;
872
- switch (evt.type) {
873
- case 'text_delta':
874
- accumulated += evt.delta;
875
- batcher.add(evt.delta);
876
- break;
877
- case 'text_end':
878
- batcher.flush();
879
- accumulated = evt.text;
880
- break;
881
- case 'tool_use':
882
- batcher.flush();
883
- usedTools.add(evt.name);
884
- onMessage('bot:tool', { conversationId, name: evt.name, input: evt.input });
885
- break;
886
- case 'error':
887
- batcher.flush();
888
- errorMsg = evt.error;
889
- break;
890
- }
891
- }
892
- // Abort guard (audit D3-8): a watchdog-aborted run must not surface a
893
- // truncated reply — a stopped pulse could otherwise still fire <Message>
894
- // pushes with half-finished content.
895
- if (!abortController.signal.aborted) {
1018
+ const queue = createAsyncQueue<PiMessage>();
1019
+ queue.push(buildUserMessage(prompt, canNativeDocumentForFlavor(resolved.auth.flavor), attachments, savedFiles));
1020
+ queue.end();
1021
+ await session.run(queue);
1022
+
1023
+ // Round-cap exhaustion with no terminal signal: the model was still
1024
+ // mid-task when the budget ran out and no text streamed — without this the
1025
+ // customer/pulse gets dead silence (review PI-C-2; claude surfaces an
1026
+ // error_max_turns result on the same path).
1027
+ if (!abortController.signal.aborted && capHit && !sawResponse) {
896
1028
  batcher.flush();
897
- if (accumulated) {
898
- onMessage('bot:response', { conversationId, content: accumulated });
899
- } else if (errorMsg) {
900
- onMessage('bot:error', { conversationId, error: errorMsg });
901
- }
1029
+ onMessage('bot:error', {
1030
+ conversationId,
1031
+ error: `The run hit its ${maxTurns ?? 50}-round tool limit before producing a reply. Try a narrower request.`,
1032
+ });
902
1033
  }
903
1034
  } catch (err: any) {
1035
+ // session.run contains per-turn error handling; a throw here is unexpected.
904
1036
  if (!abortController.signal.aborted) {
905
1037
  log.warn(`[pi/bloby-agent] one-shot error: ${err?.message || err}`);
906
1038
  batcher.flush();
907
- if (accumulated) {
908
- onMessage('bot:response', { conversationId, content: accumulated });
909
- } else {
910
- onMessage('bot:error', { conversationId, error: err?.message || String(err) });
911
- }
1039
+ onMessage('bot:error', { conversationId, error: err?.message || String(err) });
912
1040
  }
913
1041
  } finally {
1042
+ // Aborted-run stragglers must not surface (audit D3-8) — discard, never flush.
914
1043
  batcher.discard();
915
1044
  clearTimeout(watchdog);
916
1045
  activeQueries.delete(conversationId);
917
- const FILE_TOOL_NAMES = ['Write', 'Edit', 'write', 'edit'];
918
- const usedFileTools = FILE_TOOL_NAMES.some((t) => usedTools.has(t));
919
- onMessage('bot:done', { conversationId, usedFileTools });
1046
+ // Live tool tracking covers aborted runs whose turn_complete never fired —
1047
+ // files already written must still trigger the backend restart (PI-C-1).
1048
+ const fileToolsUsed = usedFileTools || ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
1049
+ onMessage('bot:done', { conversationId, usedFileTools: fileToolsUsed });
920
1050
  }
921
1051
  }
922
1052
 
@@ -930,45 +1060,126 @@ export function stopBlobyAgentQuery(conversationId: string): void {
930
1060
 
931
1061
  // ── Workspace agent endpoint (POST /api/agent/query) ──────────────────────
932
1062
 
1063
+ /** Minimal coding-agent prompt for /api/agent/query when the caller supplies
1064
+ * none — claude falls back to its native `claude_code` preset; pi's
1065
+ * equivalent advertises ONLY the tools that actually exist, and never the
1066
+ * Bloby owner persona (agent-API callers are workspace apps, not the bot). */
1067
+ const PI_CODING_AGENT_PROMPT =
1068
+ 'You are a coding agent operating non-interactively inside a project workspace. ' +
1069
+ 'Complete the request fully using your tools, then reply with a concise summary of what you did. ' +
1070
+ 'Tools: Read (file contents), Write (create/overwrite a file), Edit (exact string replacement), ' +
1071
+ 'Bash (shell commands; cwd is the workspace root). Paths are relative to the workspace root. ' +
1072
+ 'Do the work — never claim to have done something without actually using the tools.';
1073
+
1074
+ /** In-memory session store for the agent API (audit D2-7/D3-3). Process-
1075
+ * lifetime only — AGENT-API.md documents that sessions die on supervisor
1076
+ * restart, and claude's resume has the same practical bound. */
1077
+ interface StoredAgentSession { messages: PiMessage[]; lastUsed: number }
1078
+ const agentSessions = new Map<string, StoredAgentSession>();
1079
+ const AGENT_SESSION_CAP = 50;
1080
+ const AGENT_SESSION_TTL_MS = 24 * 60 * 60_000;
1081
+ const AGENT_SESSION_MAX_MESSAGES = 40;
1082
+
1083
+ function sweepAgentSessions(): void {
1084
+ const now = Date.now();
1085
+ for (const [id, s] of agentSessions) {
1086
+ if (now - s.lastUsed > AGENT_SESSION_TTL_MS) agentSessions.delete(id);
1087
+ }
1088
+ if (agentSessions.size > AGENT_SESSION_CAP) {
1089
+ const byAge = [...agentSessions.entries()].sort((a, b) => a[1].lastUsed - b[1].lastUsed);
1090
+ for (const [id] of byAge.slice(0, agentSessions.size - AGENT_SESSION_CAP)) {
1091
+ agentSessions.delete(id);
1092
+ }
1093
+ }
1094
+ }
1095
+
1096
+ /** Trim resumed history at a clean turn boundary: the window must start on a
1097
+ * REAL user message (not a tool_result carrier) — an orphaned tool_result or
1098
+ * a leading assistant message makes Anthropic/Gemini reject the request. */
1099
+ function trimAgentHistory(messages: PiMessage[]): PiMessage[] {
1100
+ if (messages.length <= AGENT_SESSION_MAX_MESSAGES) return messages;
1101
+ const isRealUser = (m: PiMessage) =>
1102
+ m.role === 'user' && !m.content.some((b) => b.type === 'tool_result');
1103
+ const windowStart = messages.length - AGENT_SESSION_MAX_MESSAGES;
1104
+ for (let i = windowStart; i < messages.length; i++) {
1105
+ if (isRealUser(messages[i])) return messages.slice(i);
1106
+ }
1107
+ // No clean boundary inside the window — a single tool-heavy turn (each round
1108
+ // adds an assistant + a tool_result message) exceeds the cap by itself.
1109
+ // Fall back BACKWARD to that turn's own user message: the window overshoots
1110
+ // the cap (bounded by the turn's size) instead of silently wiping the whole
1111
+ // history to [] (review PI-C-3 — total session amnesia).
1112
+ for (let j = windowStart - 1; j >= 0; j--) {
1113
+ if (isRealUser(messages[j])) return messages.slice(j);
1114
+ }
1115
+ return messages;
1116
+ }
1117
+
1118
+ /** Per-sessionId serialization (review PI-C-SESS-2): two concurrent resumes of
1119
+ * the same session would both read the same stored history and last-write-win
1120
+ * the store, silently erasing one call's turn. Chaining the second behind the
1121
+ * first keeps the linear-history contract; each run is bounded by its own
1122
+ * timeout (≤300s), so the wait is too. */
1123
+ const agentSessionLocks = new Map<string, Promise<void>>();
1124
+
933
1125
  export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryResult> {
1126
+ if (!req.sessionId) return runAgentQueryInner(req);
1127
+ const id = req.sessionId;
1128
+ const prev = agentSessionLocks.get(id) ?? Promise.resolve();
1129
+ let release!: () => void;
1130
+ const gate = new Promise<void>((r) => { release = r; });
1131
+ const chained = prev.then(() => gate);
1132
+ agentSessionLocks.set(id, chained);
1133
+ await prev;
1134
+ try {
1135
+ return await runAgentQueryInner(req);
1136
+ } finally {
1137
+ release();
1138
+ if (agentSessionLocks.get(id) === chained) agentSessionLocks.delete(id);
1139
+ }
1140
+ }
1141
+
1142
+ async function runAgentQueryInner(req: AgentQueryRequest): Promise<AgentQueryResult> {
934
1143
  const resolved = resolveAuth();
935
1144
  if (!resolved.ok) return { ok: false, error: resolved.error };
936
- const auth = resolved.auth;
937
1145
 
938
1146
  const timeout = Math.min(Math.max(req.timeout || 120_000, 5_000), 300_000);
1147
+ // Same clamp as claude.ts:781 — maxTurns maps onto the session's tool-round budget.
1148
+ const maxTurns = Math.min(Math.max(req.maxTurns || 25, 1), 50);
939
1149
  const abortController = new AbortController();
940
1150
  const timeoutHandle = setTimeout(() => abortController.abort(), timeout);
941
1151
 
942
- const systemPrompt = req.systemPrompt ?? '';
943
- const messages: PiMessage[] = [{
944
- role: 'user',
945
- content: [{ type: 'text', text: req.message }],
946
- }];
1152
+ const systemPrompt = req.systemPrompt?.trim() ? req.systemPrompt : PI_CODING_AGENT_PROMPT;
1153
+
1154
+ sweepAgentSessions();
1155
+ const resumed = req.sessionId ? agentSessions.get(req.sessionId) : undefined;
1156
+ const sessionId = resumed ? req.sessionId! : crypto.randomUUID();
1157
+ if (resumed) resumed.lastUsed = Date.now();
947
1158
 
948
1159
  let fullText = '';
949
1160
  const usedTools = new Set<string>();
950
1161
  let errored = false;
951
1162
  let errorMsg = '';
1163
+ let usedFileTools = false;
1164
+ let capHit = false;
952
1165
 
953
- try {
954
- const stream = streamProvider(auth.flavor, {
955
- modelId: auth.modelId,
956
- baseUrl: auth.baseUrl,
957
- apiKey: auth.apiKey,
958
- systemPrompt,
959
- messages,
960
- maxOutputTokens: auth.maxOutputTokens,
961
- maxTokensField: auth.maxTokensField,
962
- includeStreamUsage: auth.includeStreamUsage,
963
- signal: abortController.signal,
964
- });
1166
+ let currentAuth: PiSessionAuth = resolved.auth;
1167
+ const getAuth = (): PiSessionAuth => {
1168
+ const fresh = resolveAuth();
1169
+ if (fresh.ok) currentAuth = fresh.auth;
1170
+ return currentAuth;
1171
+ };
965
1172
 
966
- for await (const evt of stream) {
967
- if (abortController.signal.aborted) break;
1173
+ const session = createPiSession({
1174
+ getAuth,
1175
+ systemPrompt,
1176
+ initialMessages: resumed ? trimAgentHistory(resumed.messages) : undefined,
1177
+ tools: toolDefsForProvider(), // no Task — no task host on this path
1178
+ cwd: WORKSPACE_DIR,
1179
+ abortController,
1180
+ maxToolRounds: maxTurns,
1181
+ onEvent: (evt: PiSessionEvent) => {
968
1182
  switch (evt.type) {
969
- case 'text_delta':
970
- fullText += evt.delta;
971
- break;
972
1183
  case 'text_end':
973
1184
  fullText = evt.text;
974
1185
  break;
@@ -979,22 +1190,66 @@ export async function runAgentQuery(req: AgentQueryRequest): Promise<AgentQueryR
979
1190
  errored = true;
980
1191
  errorMsg = evt.error;
981
1192
  break;
1193
+ case 'turn_complete':
1194
+ usedFileTools = usedFileTools || evt.usedFileTools;
1195
+ // The error EVENT is suppressed when partial text streamed (D6-2) —
1196
+ // read the outcome fields so a failed turn isn't reported clean.
1197
+ if (evt.errored) {
1198
+ errored = true;
1199
+ errorMsg = errorMsg || evt.errorMsg || '';
1200
+ }
1201
+ if (evt.roundCapHit) capHit = true;
1202
+ break;
982
1203
  }
983
- }
1204
+ },
1205
+ });
1206
+
1207
+ try {
1208
+ log.info(`[pi/agent-api] Query: msg="${req.message.slice(0, 80)}..." maxTurns=${maxTurns} timeout=${timeout}ms resume=${resumed ? sessionId : 'none'}`);
1209
+ const queue = createAsyncQueue<PiMessage>();
1210
+ queue.push({ role: 'user', content: [{ type: 'text', text: req.message }] });
1211
+ queue.end();
1212
+ await session.run(queue);
984
1213
  } catch (err: any) {
985
- if (abortController.signal.aborted) {
986
- return { ok: false, error: 'Query timed out.' };
987
- }
988
- return { ok: false, error: err?.message || String(err) };
1214
+ if (abortController.signal.aborted) return { ok: false, error: 'Query timed out.', sessionId };
1215
+ return { ok: false, error: err?.message || String(err), sessionId };
989
1216
  } finally {
990
1217
  clearTimeout(timeoutHandle);
991
1218
  }
992
1219
 
1220
+ if (abortController.signal.aborted) {
1221
+ // Timed-out histories can hold a dangling tool_use (aborted mid-round) —
1222
+ // don't persist them for resume.
1223
+ return { ok: false, error: 'Query timed out.', sessionId };
1224
+ }
1225
+
1226
+ // Round-cap exhaustion with no answer: the model was still mid-task when the
1227
+ // budget ran out (claude maps the same state to an error_max_turns result —
1228
+ // review PI-C-2; an ok:true empty response reads as a silent blank bubble in
1229
+ // the documented maxTurns:1 aichat pattern). Don't persist the half-done
1230
+ // turn either — a fresh retry beats resuming into unanswered tool results.
1231
+ if (capHit && !fullText) {
1232
+ return {
1233
+ ok: false,
1234
+ error: `Agent hit its turn limit (maxTurns=${maxTurns}) before producing a response — raise maxTurns or narrow the request.`,
1235
+ sessionId,
1236
+ toolsUsed: Array.from(usedTools),
1237
+ };
1238
+ }
1239
+
1240
+ // Trim at store time too — otherwise a long-lived session's stored history
1241
+ // grows unboundedly across resumes (the resume-side trim only caps what the
1242
+ // provider sees, not what we keep in memory).
1243
+ agentSessions.set(sessionId, { messages: trimAgentHistory(session.getMessages()), lastUsed: Date.now() });
1244
+
993
1245
  // Partial-text precedence (claude parity, audit D6-2): if the model streamed
994
1246
  // anything before failing, return it as a successful (truncated) response —
995
1247
  // claude's runAgentQuery only reports the error when nothing streamed.
996
- if (errored && !fullText) return { ok: false, error: errorMsg || 'Agent query failed' };
1248
+ if (errored && !fullText) {
1249
+ return { ok: false, error: errorMsg || 'Agent query failed', sessionId, toolsUsed: Array.from(usedTools) };
1250
+ }
997
1251
 
998
- const usedFileTools = ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
999
- return { ok: true, response: fullText, toolsUsed: Array.from(usedTools), usedFileTools };
1252
+ const fileToolsUsed = usedFileTools || ['Write', 'Edit', 'write', 'edit'].some((t) => usedTools.has(t));
1253
+ log.info(`[pi/agent-api] Done: ${fullText.length} chars, tools=[${Array.from(usedTools).join(',')}], session=${sessionId}`);
1254
+ return { ok: true, response: fullText, sessionId, toolsUsed: Array.from(usedTools), usedFileTools: fileToolsUsed };
1000
1255
  }