@stevederico/dotbot 0.28.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,8 @@
1
1
  /**
2
2
  * Cron task handler for dotbot.
3
3
  *
4
- * Extracted from dottie-os server.js to provide a reusable cron task executor
5
- * that handles session resolution, stale user gates, task injection, and
6
- * notification hooks.
4
+ * Reusable cron task executor that handles session resolution, stale user
5
+ * gates, task injection, and notification hooks.
7
6
  */
8
7
 
9
8
  import { compactMessages } from './compaction.js';
@@ -18,6 +17,7 @@ import { compactMessages } from './compaction.js';
18
17
  * @param {Object} options.memoryStore - Memory store instance (optional)
19
18
  * @param {Object} options.providers - Provider API keys for compaction
20
19
  * @param {number} [options.staleThresholdMs=86400000] - Skip heartbeat if user idle longer than this (default: 24h)
20
+ * @param {string} [options.notificationTitle='Assistant'] - Title used when dispatching notifications via hooks.onNotification
21
21
  * @param {Object} [options.hooks] - Host-specific hooks
22
22
  * @param {Function} [options.hooks.onNotification] - async (userId, { title, body, type }) => void
23
23
  * @param {Function} [options.hooks.taskFetcher] - async (userId, taskId) => task object
@@ -31,6 +31,7 @@ export function createCronHandler({
31
31
  memoryStore,
32
32
  providers = {},
33
33
  staleThresholdMs = 24 * 60 * 60 * 1000,
34
+ notificationTitle = 'Assistant',
34
35
  hooks = {},
35
36
  }) {
36
37
  // Agent reference - will be set after init() creates the agent
@@ -139,7 +140,7 @@ export function createCronHandler({
139
140
  if (trimmed && trimmed.length > 10 && updatedSession.owner && hooks.onNotification) {
140
141
  try {
141
142
  await hooks.onNotification(updatedSession.owner, {
142
- title: 'Dottie',
143
+ title: notificationTitle,
143
144
  body: trimmed.slice(0, 500),
144
145
  type: task.name === 'heartbeat' ? 'heartbeat' : 'cron',
145
146
  });
@@ -224,33 +225,43 @@ export function createCronHandler({
224
225
  tasks = await taskStore.findTasks(session.owner, { status: ['pending', 'in_progress'] });
225
226
  }
226
227
 
227
- if (tasks.length > 0) {
228
- // Check if any task is in auto mode with pending steps
229
- const autoTask = tasks.find(t => t.mode === 'auto' && t.steps?.some(s => !s.done));
230
- if (autoTask) {
231
- const doneCount = autoTask.steps.filter(s => s.done).length;
232
- const nextStep = autoTask.steps.find(s => !s.done);
233
- taskContent = `[Heartbeat] Auto-mode task "${autoTask.description}" has pending steps (${doneCount}/${autoTask.steps.length} done). Call task_work with task_id "${autoTask._id || autoTask.id}" to execute: "${nextStep.text}"`;
234
- } else {
235
- // List all active tasks
236
- const lines = tasks.map(t => {
237
- let line = `• [${t.priority}] ${t.description}`;
238
- if (t.mode) line += ` [${t.mode}]`;
239
- if (t.deadline) line += ` (due: ${t.deadline})`;
240
- if (t.steps && t.steps.length > 0) {
241
- const done = t.steps.filter(s => s.done).length;
242
- line += ` (${done}/${t.steps.length} steps)`;
243
- for (const step of t.steps) {
244
- line += `\n ${step.done ? '[x]' : '[ ]'} ${step.text}`;
245
- }
228
+ // Skip the LLM call entirely when there's nothing to discuss. A heartbeat
229
+ // with no active tasks is a waste of tokens on every provider (and is
230
+ // especially expensive on cloud providers that charge per call). The
231
+ // caller at handleTaskFire() treats a null return as "skip this tick".
232
+ if (tasks.length === 0) {
233
+ console.log(`[cron] heartbeat for ${session.owner}: no active tasks, skipping AI call`);
234
+ return null;
235
+ }
236
+
237
+ // Check if any task is in auto mode with pending steps
238
+ const autoTask = tasks.find(t => t.mode === 'auto' && t.steps?.some(s => !s.done));
239
+ if (autoTask) {
240
+ const doneCount = autoTask.steps.filter(s => s.done).length;
241
+ const nextStep = autoTask.steps.find(s => !s.done);
242
+ taskContent = `[Heartbeat] Auto-mode task "${autoTask.description}" has pending steps (${doneCount}/${autoTask.steps.length} done). Call task_work with task_id "${autoTask._id || autoTask.id}" to execute: "${nextStep.text}"`;
243
+ } else {
244
+ // List all active tasks
245
+ const lines = tasks.map(t => {
246
+ let line = `• [${t.priority}] ${t.description}`;
247
+ if (t.mode) line += ` [${t.mode}]`;
248
+ if (t.deadline) line += ` (due: ${t.deadline})`;
249
+ if (t.steps && t.steps.length > 0) {
250
+ const done = t.steps.filter(s => s.done).length;
251
+ line += ` (${done}/${t.steps.length} steps)`;
252
+ for (const step of t.steps) {
253
+ line += `\n ${step.done ? '[x]' : '[ ]'} ${step.text}`;
246
254
  }
247
- return line;
248
- });
249
- taskContent += `\n\nActive tasks:\n${lines.join('\n')}`;
250
- }
255
+ }
256
+ return line;
257
+ });
258
+ taskContent += `\n\nActive tasks:\n${lines.join('\n')}`;
251
259
  }
252
260
  } catch (err) {
261
+ // Fail closed: if we can't fetch tasks, skip this heartbeat rather
262
+ // than call the LLM with a meaningless default prompt.
253
263
  console.error('[cron] failed to fetch tasks for heartbeat:', err.message);
264
+ return null;
254
265
  }
255
266
 
256
267
  return taskContent;
package/core/init.js CHANGED
@@ -25,6 +25,7 @@ import { createTriggerHandler } from './trigger_handler.js';
25
25
  * @param {Object} [options.providers] - Provider API keys: { anthropic: { apiKey }, openai: { apiKey }, xai: { apiKey } }
26
26
  * @param {Array} [options.tools] - Tool definitions (default: coreTools)
27
27
  * @param {number} [options.staleThresholdMs=86400000] - Skip heartbeat if user idle longer than this (default: 24h)
28
+ * @param {string} [options.notificationTitle='Assistant'] - Title used when cron/trigger handlers dispatch notifications
28
29
  * @param {Function} [options.systemPrompt] - System prompt builder function
29
30
  * @param {Function} [options.screenshotUrlPattern] - Screenshot URL pattern function
30
31
  * @param {Object} [options.compaction] - Compaction settings
@@ -42,6 +43,7 @@ export async function init({
42
43
  providers = {},
43
44
  tools = coreTools,
44
45
  staleThresholdMs = 24 * 60 * 60 * 1000,
46
+ notificationTitle = 'Assistant',
45
47
  systemPrompt,
46
48
  screenshotUrlPattern,
47
49
  compaction = { enabled: true },
@@ -68,7 +70,8 @@ export async function init({
68
70
  memory: memoryStore,
69
71
  };
70
72
 
71
- // For stores-only mode (e.g., dottie-desktop), skip session/cron/agent setup
73
+ // For stores-only mode (host manages sessions/cron/agent itself),
74
+ // skip session/cron/agent setup
72
75
  if (storesOnly) {
73
76
  return {
74
77
  stores,
@@ -101,6 +104,7 @@ export async function init({
101
104
  memoryStore,
102
105
  providers,
103
106
  staleThresholdMs,
107
+ notificationTitle,
104
108
  hooks,
105
109
  });
106
110
 
@@ -134,6 +138,7 @@ export async function init({
134
138
  triggerStore,
135
139
  memoryStore,
136
140
  providers,
141
+ notificationTitle,
137
142
  hooks,
138
143
  });
139
144
 
@@ -1,8 +1,8 @@
1
1
  /**
2
2
  * Trigger handler for dotbot.
3
3
  *
4
- * Extracted from dottie-os server.js to provide a reusable trigger executor
5
- * that handles event matching, firing, and notification hooks.
4
+ * Reusable trigger executor that handles event matching, firing, and
5
+ * notification hooks.
6
6
  */
7
7
 
8
8
  import { compactMessages } from './compaction.js';
@@ -16,6 +16,7 @@ import { compactMessages } from './compaction.js';
16
16
  * @param {Object} options.triggerStore - Trigger store instance
17
17
  * @param {Object} options.memoryStore - Memory store instance (optional)
18
18
  * @param {Object} options.providers - Provider API keys for compaction
19
+ * @param {string} [options.notificationTitle='Assistant'] - Title used when dispatching notifications via hooks.onNotification
19
20
  * @param {Object} [options.hooks] - Host-specific hooks
20
21
  * @param {Function} [options.hooks.onNotification] - async (userId, { title, body, type }) => void
21
22
  * @returns {Function} Async function: (eventType, userId, eventData?) => Promise<void>
@@ -26,6 +27,7 @@ export function createTriggerHandler({
26
27
  triggerStore,
27
28
  memoryStore,
28
29
  providers = {},
30
+ notificationTitle = 'Assistant',
29
31
  hooks = {},
30
32
  }) {
31
33
  /**
@@ -133,7 +135,7 @@ export function createTriggerHandler({
133
135
  if (trimmed && trimmed.length > 10 && updatedSession.owner && hooks.onNotification) {
134
136
  try {
135
137
  await hooks.onNotification(updatedSession.owner, {
136
- title: 'Dottie',
138
+ title: notificationTitle,
137
139
  body: trimmed.slice(0, 500),
138
140
  type: 'trigger',
139
141
  });
package/docs/core.md CHANGED
@@ -50,7 +50,7 @@ Standard AI Agent Tools (Industry Common)
50
50
  14. Notifications - Push alerts to users
51
51
  15. Weather - Current conditions/forecasts
52
52
 
53
- Your Library (@dottie/agent) Has:
53
+ dotbot Has:
54
54
 
55
55
  ✅ Memory (6 tools)
56
56
  ✅ Web (3 tools)
@@ -48,12 +48,12 @@ Absolutely! Yes, you should definitely protect .ssh and similar system-level dir
48
48
  ~/Library/Application Support/Firefox/
49
49
 
50
50
 
51
- Dottie-Specific (Your App)
51
+ Host-App Data (Example)
52
52
 
53
- ~/.dottie/logs/ # May contain user conversations
54
- ~/.dottie/chat_history.json
55
- ~/.dottie/*.db
53
+ ~/.myapp/logs/ # May contain user conversations
54
+ ~/.myapp/chat_history.json
55
+ ~/.myapp/*.db
56
56
 
57
57
 
58
- Bottom line: Any path under ~ (home directory) that contains credentials, personal data, command history, or configuration files should be protected. The general rule is: never search/glob/grep from ~ or /Users/sd root — only within specific project directories.
58
+ Bottom line: Any path under ~ (home directory) that contains credentials, personal data, command history, or configuration files should be protected. The general rule is: never search/glob/grep from the home directory root — only within specific project directories.
59
59
  ctrl+q to copy · 6 snippets
package/index.js CHANGED
@@ -18,10 +18,8 @@ import {
18
18
  notifyTools,
19
19
  createBrowserTools,
20
20
  taskTools,
21
- goalTools,
22
21
  triggerTools,
23
22
  jobTools,
24
- cronTools,
25
23
  eventTools,
26
24
  appgenTools,
27
25
  } from './tools/index.js';
@@ -40,9 +38,6 @@ export {
40
38
  runWithConcurrency,
41
39
  TaskStore,
42
40
  SQLiteTaskStore,
43
- // Backwards compatibility aliases
44
- GoalStore,
45
- SQLiteGoalStore,
46
41
  TriggerStore,
47
42
  SQLiteTriggerStore,
48
43
  SQLiteMemoryStore,
@@ -65,10 +60,8 @@ export {
65
60
  browserTools,
66
61
  createBrowserTools,
67
62
  taskTools,
68
- goalTools, // backwards compatibility alias
69
63
  triggerTools,
70
64
  jobTools,
71
- cronTools, // backwards compatibility alias
72
65
  eventTools,
73
66
  appgenTools,
74
67
  } from './tools/index.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stevederico/dotbot",
3
- "version": "0.28.0",
3
+ "version": "0.31.0",
4
4
  "description": "AI agent CLI and library for Node.js — streaming, multi-provider, tool execution, autonomous tasks",
5
5
  "type": "module",
6
6
  "main": "index.js",
@@ -11,7 +11,7 @@ import { toStandardFormat } from '../core/normalize.js';
11
11
  * @param {string} options.agentPersonality - Personality description
12
12
  * @returns {string} System prompt
13
13
  */
14
- export function defaultSystemPrompt({ agentName = 'Dottie', agentPersonality = '' } = {}) {
14
+ export function defaultSystemPrompt({ agentName = 'Assistant', agentPersonality = '' } = {}) {
15
15
  const now = new Date().toISOString();
16
16
  return `You are a helpful personal AI assistant called ${agentName}.${agentPersonality ? `\nYour personality and tone: ${agentPersonality}. Embody this in all responses.` : ''}
17
17
  You have access to tools for searching the web, reading/writing files, fetching URLs, running code, long-term memory, and scheduled tasks.
@@ -220,16 +220,7 @@ export class SQLiteCronStore extends CronStore {
220
220
  "SELECT * FROM cron_tasks WHERE session_id = ? AND name != 'heartbeat' ORDER BY next_run_at ASC"
221
221
  ).all(sessionId || 'default');
222
222
 
223
- return rows.map(r => ({
224
- id: r.id,
225
- name: r.name,
226
- prompt: r.prompt,
227
- nextRunAt: new Date(r.next_run_at),
228
- recurring: !!r.recurring,
229
- intervalMs: r.interval_ms,
230
- enabled: !!r.enabled,
231
- lastRunAt: r.last_run_at ? new Date(r.last_run_at) : null,
232
- }));
223
+ return rows.map(r => this._rowToTask(r));
233
224
  }
234
225
 
235
226
  /**
@@ -257,18 +248,7 @@ export class SQLiteCronStore extends CronStore {
257
248
 
258
249
  const rows = this.db.prepare(query).all(...params);
259
250
 
260
- return rows.map(r => ({
261
- id: r.id,
262
- name: r.name,
263
- prompt: r.prompt,
264
- sessionId: r.session_id,
265
- nextRunAt: new Date(r.next_run_at),
266
- recurring: !!r.recurring,
267
- intervalMs: r.interval_ms,
268
- enabled: !!r.enabled,
269
- lastRunAt: r.last_run_at ? new Date(r.last_run_at) : null,
270
- createdAt: new Date(r.created_at),
271
- }));
251
+ return rows.map(r => this._rowToTask(r));
272
252
  }
273
253
 
274
254
  /**
@@ -374,53 +354,6 @@ export class SQLiteCronStore extends CronStore {
374
354
  return null;
375
355
  }
376
356
 
377
- /**
378
- * Ensure a Morning Brief job exists for the user (disabled by default).
379
- * Creates a daily recurring job at 8:00 AM if not present.
380
- *
381
- * @param {string} userId - User ID
382
- * @returns {Promise<Object|null>} Created task or null if already exists
383
- */
384
- async ensureMorningBrief(userId) {
385
- if (!this.db || !userId) return null;
386
-
387
- // Check if Morning Brief already exists for this user
388
- const existing = this.db.prepare(
389
- `SELECT id FROM cron_tasks WHERE user_id = ? AND name = 'Morning Brief' LIMIT 1`
390
- ).get(userId);
391
- if (existing) return null;
392
-
393
- const DAY_MS = 24 * 60 * 60 * 1000;
394
- const MORNING_BRIEF_PROMPT = `Good morning! Give me a brief summary to start my day:
395
- 1. What's on my calendar today?
396
- 2. Any important reminders or tasks due?
397
- 3. A quick weather update for my location.
398
- Keep it concise and actionable.`;
399
-
400
- // Calculate next 8:00 AM
401
- const now = new Date();
402
- const today8AM = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 8, 0, 0, 0);
403
- const nextRun = now.getTime() < today8AM.getTime()
404
- ? today8AM.getTime()
405
- : today8AM.getTime() + DAY_MS;
406
-
407
- const id = crypto.randomUUID();
408
- const nowMs = Date.now();
409
-
410
- const result = this.db.prepare(`
411
- INSERT OR IGNORE INTO cron_tasks (id, name, prompt, session_id, user_id, next_run_at, interval_ms, recurring, enabled, created_at, last_run_at)
412
- VALUES (?, 'Morning Brief', ?, 'default', ?, ?, ?, 1, 0, ?, NULL)
413
- `).run(id, MORNING_BRIEF_PROMPT, userId, nextRun, DAY_MS, nowMs);
414
-
415
- if (result.changes > 0) {
416
- const runTime = new Date(nextRun);
417
- console.log(`[cron] created Morning Brief for user ${userId}, next run at ${runTime.toLocaleTimeString()} (disabled by default)`);
418
- return { id };
419
- }
420
-
421
- return null;
422
- }
423
-
424
357
  /**
425
358
  * Get heartbeat status for a user
426
359
  *
@@ -455,35 +388,18 @@ Keep it concise and actionable.`;
455
388
  async resetHeartbeat(userId) {
456
389
  if (!this.db || !userId) return null;
457
390
 
458
- const deleted = this.db.prepare(
391
+ this.db.prepare(
459
392
  "DELETE FROM cron_tasks WHERE user_id = ? AND name = 'heartbeat'"
460
393
  ).run(userId);
461
394
  console.log(`[cron] deleted existing heartbeat(s) for user ${userId}`);
462
395
 
463
- const jitter = Math.floor(Math.random() * HEARTBEAT_INTERVAL_MS);
464
- const now = Date.now();
465
- const id = crypto.randomUUID();
396
+ const result = await this.ensureHeartbeat(userId);
466
397
 
467
- this.db.prepare(`
468
- INSERT INTO cron_tasks (id, name, prompt, session_id, user_id, next_run_at, interval_ms, recurring, enabled, created_at, last_run_at)
469
- VALUES (?, 'heartbeat', ?, 'default', ?, ?, ?, 1, 1, ?, NULL)
470
- `).run(id, HEARTBEAT_PROMPT, userId, now + jitter, HEARTBEAT_INTERVAL_MS, now);
471
-
472
- console.log(`[cron] created new heartbeat for user ${userId}, first run in ${Math.round(jitter / 60000)}m`);
398
+ if (!result) return null;
473
399
 
474
- return {
475
- id,
476
- name: 'heartbeat',
477
- prompt: HEARTBEAT_PROMPT,
478
- userId,
479
- sessionId: 'default',
480
- nextRunAt: new Date(now + jitter),
481
- intervalMs: HEARTBEAT_INTERVAL_MS,
482
- recurring: true,
483
- enabled: true,
484
- createdAt: new Date(now),
485
- lastRunAt: null,
486
- };
400
+ // Return the full task object for the newly created heartbeat
401
+ const row = this.db.prepare('SELECT * FROM cron_tasks WHERE id = ?').get(result.id);
402
+ return row ? this._rowToTask(row) : null;
487
403
  }
488
404
 
489
405
  /**
package/storage/index.js CHANGED
@@ -5,9 +5,6 @@ export { CronStore } from './CronStore.js';
5
5
  export { SQLiteCronStore, parseInterval, HEARTBEAT_INTERVAL_MS, HEARTBEAT_PROMPT } from './SQLiteCronAdapter.js';
6
6
  export { TaskStore } from './TaskStore.js';
7
7
  export { SQLiteTaskStore } from './SQLiteTaskAdapter.js';
8
- // Backwards compatibility aliases
9
- export { TaskStore as GoalStore } from './TaskStore.js';
10
- export { SQLiteTaskStore as SQLiteGoalStore } from './SQLiteTaskAdapter.js';
11
8
  export { TriggerStore } from './TriggerStore.js';
12
9
  export { SQLiteTriggerStore } from './SQLiteTriggerAdapter.js';
13
10
  export { SQLiteMemoryStore } from './SQLiteMemoryAdapter.js';
@@ -0,0 +1,192 @@
1
+ import { test, describe, beforeEach, afterEach } from 'node:test';
2
+ import assert from 'node:assert';
3
+ import { agentLoop } from '../core/agent.js';
4
+
5
+ /**
6
+ * Regression tests for the mlx_local provider branch of agentLoop.
7
+ *
8
+ * These cover the flush branch added in 0.30 that handles short plain-text
9
+ * responses from local models that never emit gpt-oss channel tokens
10
+ * (Gemma 4 E2B, LFM2.5, SmolLM). Without the flush, the rawBuffer was
11
+ * silently discarded on stream end and the downstream consumer received
12
+ * zero text_delta events — empty assistant bubbles in the UI.
13
+ */
14
+
15
+ /**
16
+ * Build a minimal mlx_local-style provider for agentLoop tests.
17
+ * The `id` must be "mlx_local" to hit the buffered-parsing branch,
18
+ * and `local: true` skips the failover path for a direct fetch.
19
+ */
20
+ function makeLocalProvider() {
21
+ return {
22
+ id: 'mlx_local',
23
+ name: 'Test Local',
24
+ apiUrl: 'http://127.0.0.1:1316/v1',
25
+ endpoint: '/chat/completions',
26
+ local: true,
27
+ headers: () => ({ 'Content-Type': 'application/json' }),
28
+ };
29
+ }
30
+
31
+ /**
32
+ * Mock a fetch Response carrying an OpenAI-style SSE stream.
33
+ * Accepts an array of {content?, finish_reason?} deltas. Each becomes one
34
+ * SSE data line. A final "data: [DONE]" terminator is appended automatically.
35
+ */
36
+ function mockSSEResponse(deltas) {
37
+ const encoder = new TextEncoder();
38
+ const body = new ReadableStream({
39
+ start(controller) {
40
+ for (const delta of deltas) {
41
+ const chunk = { choices: [{ delta }] };
42
+ controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
43
+ }
44
+ controller.enqueue(encoder.encode('data: [DONE]\n\n'));
45
+ controller.close();
46
+ },
47
+ });
48
+ return new Response(body, {
49
+ status: 200,
50
+ headers: { 'Content-Type': 'text/event-stream' },
51
+ });
52
+ }
53
+
54
+ /**
55
+ * Replace globalThis.fetch with a mock that returns the given Response
56
+ * for every call. Returns a restore function to put the original back.
57
+ */
58
+ function stubFetch(response) {
59
+ const original = globalThis.fetch;
60
+ globalThis.fetch = async () => response;
61
+ return () => { globalThis.fetch = original; };
62
+ }
63
+
64
+ describe('agentLoop — mlx_local short plain-text response flush', () => {
65
+ let restoreFetch;
66
+
67
+ afterEach(() => {
68
+ if (restoreFetch) {
69
+ restoreFetch();
70
+ restoreFetch = null;
71
+ }
72
+ });
73
+
74
+ test('yields text_delta for a <200-char greeting that never hits passthrough threshold', async () => {
75
+ // Gemma 4 E2B greetings are 30-150 chars and emit no <|channel|> markers.
76
+ // Pre-0.30: rawBuffer accumulated silently, never yielded, full response 0 chars.
77
+ // Post-0.30: the stream-done handler flushes the buffer to a text_delta.
78
+ restoreFetch = stubFetch(mockSSEResponse([
79
+ { content: 'Hi' },
80
+ { content: ' there!' },
81
+ { content: ' How can I help?' },
82
+ { finish_reason: 'stop' },
83
+ ]));
84
+
85
+ const gen = agentLoop({
86
+ model: 'test-model',
87
+ messages: [
88
+ { role: 'system', content: 'test' },
89
+ { role: 'user', content: 'hi' },
90
+ ],
91
+ tools: [],
92
+ provider: makeLocalProvider(),
93
+ });
94
+
95
+ const events = [];
96
+ let fullResponse = '';
97
+ for await (const event of gen) {
98
+ events.push(event);
99
+ if (event.type === 'text_delta' && event.text) {
100
+ fullResponse += event.text;
101
+ }
102
+ if (event.type === 'done') break;
103
+ }
104
+
105
+ assert.strictEqual(fullResponse, 'Hi there! How can I help?');
106
+ const textDeltas = events.filter((e) => e.type === 'text_delta');
107
+ assert.ok(textDeltas.length >= 1, 'expected at least one text_delta event');
108
+ const doneEvents = events.filter((e) => e.type === 'done');
109
+ assert.strictEqual(doneEvents.length, 1);
110
+ });
111
+
112
+ test('does not flush when the buffer contains tool call markers', async () => {
113
+ // Guards against false-positive text emission when the model emits a
114
+ // text-based tool call — those are handled by the post-loop parseToolCalls()
115
+ // branch, not the flush path.
116
+ restoreFetch = stubFetch(mockSSEResponse([
117
+ { content: '<tool_call>' },
118
+ { content: '{"name":"web_search","arguments":{"query":"weather"}}' },
119
+ { content: '</tool_call>' },
120
+ { finish_reason: 'stop' },
121
+ ]));
122
+
123
+ const gen = agentLoop({
124
+ model: 'test-model',
125
+ messages: [
126
+ { role: 'system', content: 'test' },
127
+ { role: 'user', content: 'weather?' },
128
+ ],
129
+ tools: [
130
+ {
131
+ name: 'web_search',
132
+ description: 'Search',
133
+ parameters: { type: 'object' },
134
+ execute: async () => 'sunny',
135
+ },
136
+ ],
137
+ provider: makeLocalProvider(),
138
+ maxTurns: 1, // Cap after the first iteration so the loop exits
139
+ });
140
+
141
+ const events = [];
142
+ for await (const event of gen) {
143
+ events.push(event);
144
+ if (events.length > 20) break; // Safety cap in case tool loop misbehaves
145
+ }
146
+
147
+ // Critical assertion: no text_delta should carry the raw <tool_call> markup.
148
+ // If the flush branch fires unguarded, the user would see literal
149
+ // "<tool_call>..." in their chat bubble.
150
+ const textWithMarkers = events
151
+ .filter((e) => e.type === 'text_delta')
152
+ .filter((e) => e.text && e.text.includes('<tool_call>'));
153
+ assert.strictEqual(textWithMarkers.length, 0,
154
+ 'tool_call markup must not leak through the flush branch');
155
+ });
156
+
157
+ test('end-to-end text accumulation matches the realtime consumer pattern', async () => {
158
+ // Simulates a streaming consumer (e.g. a WebSocket bridge): accumulate
159
+ // text from text_delta events, break on done. Pre-0.30 the accumulated
160
+ // string was empty. Post-0.30 it matches the model's full utterance.
161
+ restoreFetch = stubFetch(mockSSEResponse([
162
+ { content: 'Hello' },
163
+ { content: '!' },
164
+ { finish_reason: 'stop' },
165
+ ]));
166
+
167
+ const gen = agentLoop({
168
+ model: 'test-model',
169
+ messages: [{ role: 'user', content: 'hi' }],
170
+ tools: [],
171
+ provider: makeLocalProvider(),
172
+ });
173
+
174
+ let fullResponse = '';
175
+ let textDeltaCount = 0;
176
+ let sawDone = false;
177
+ for await (const event of gen) {
178
+ if (event.type === 'text_delta') {
179
+ fullResponse += event.text;
180
+ textDeltaCount++;
181
+ }
182
+ if (event.type === 'done') {
183
+ sawDone = true;
184
+ break;
185
+ }
186
+ }
187
+
188
+ assert.strictEqual(fullResponse, 'Hello!');
189
+ assert.ok(textDeltaCount > 0, 'expected at least one text_delta');
190
+ assert.strictEqual(sawDone, true);
191
+ });
192
+ });