@stevederico/dotbot 0.28.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +65 -24
- package/bin/dotbot.js +63 -93
- package/core/agent.js +30 -13
- package/core/cdp.js +5 -58
- package/core/compaction.js +1 -1
- package/core/cron_handler.js +38 -27
- package/core/init.js +6 -1
- package/core/trigger_handler.js +5 -3
- package/docs/core.md +1 -1
- package/docs/protected-files.md +5 -5
- package/index.js +0 -7
- package/package.json +1 -1
- package/storage/SQLiteAdapter.js +1 -1
- package/storage/SQLiteCronAdapter.js +8 -92
- package/storage/index.js +0 -3
- package/test/agent.test.js +192 -0
- package/test/cron_handler.test.js +116 -0
- package/tools/appgen.js +1 -10
- package/tools/browser.js +0 -15
- package/tools/code.js +0 -28
- package/tools/images.js +0 -10
- package/tools/index.js +2 -4
- package/tools/jobs.js +0 -2
- package/tools/memory.js +1 -1
- package/tools/tasks.js +0 -2
- package/tools/web.js +0 -36
- package/utils/providers.js +21 -0
- package/.claude/settings.local.json +0 -7
- package/dotbot.db +0 -0
- package/examples/sqlite-session-example.js +0 -69
- package/observer/index.js +0 -164
package/core/cron_handler.js
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Cron task handler for dotbot.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* notification hooks.
|
|
4
|
+
* Reusable cron task executor that handles session resolution, stale user
|
|
5
|
+
* gates, task injection, and notification hooks.
|
|
7
6
|
*/
|
|
8
7
|
|
|
9
8
|
import { compactMessages } from './compaction.js';
|
|
@@ -18,6 +17,7 @@ import { compactMessages } from './compaction.js';
|
|
|
18
17
|
* @param {Object} options.memoryStore - Memory store instance (optional)
|
|
19
18
|
* @param {Object} options.providers - Provider API keys for compaction
|
|
20
19
|
* @param {number} [options.staleThresholdMs=86400000] - Skip heartbeat if user idle longer than this (default: 24h)
|
|
20
|
+
* @param {string} [options.notificationTitle='Assistant'] - Title used when dispatching notifications via hooks.onNotification
|
|
21
21
|
* @param {Object} [options.hooks] - Host-specific hooks
|
|
22
22
|
* @param {Function} [options.hooks.onNotification] - async (userId, { title, body, type }) => void
|
|
23
23
|
* @param {Function} [options.hooks.taskFetcher] - async (userId, taskId) => task object
|
|
@@ -31,6 +31,7 @@ export function createCronHandler({
|
|
|
31
31
|
memoryStore,
|
|
32
32
|
providers = {},
|
|
33
33
|
staleThresholdMs = 24 * 60 * 60 * 1000,
|
|
34
|
+
notificationTitle = 'Assistant',
|
|
34
35
|
hooks = {},
|
|
35
36
|
}) {
|
|
36
37
|
// Agent reference - will be set after init() creates the agent
|
|
@@ -139,7 +140,7 @@ export function createCronHandler({
|
|
|
139
140
|
if (trimmed && trimmed.length > 10 && updatedSession.owner && hooks.onNotification) {
|
|
140
141
|
try {
|
|
141
142
|
await hooks.onNotification(updatedSession.owner, {
|
|
142
|
-
title:
|
|
143
|
+
title: notificationTitle,
|
|
143
144
|
body: trimmed.slice(0, 500),
|
|
144
145
|
type: task.name === 'heartbeat' ? 'heartbeat' : 'cron',
|
|
145
146
|
});
|
|
@@ -224,33 +225,43 @@ export function createCronHandler({
|
|
|
224
225
|
tasks = await taskStore.findTasks(session.owner, { status: ['pending', 'in_progress'] });
|
|
225
226
|
}
|
|
226
227
|
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
228
|
+
// Skip the LLM call entirely when there's nothing to discuss. A heartbeat
|
|
229
|
+
// with no active tasks is a waste of tokens on every provider (and is
|
|
230
|
+
// especially expensive on cloud providers that charge per call). The
|
|
231
|
+
// caller at handleTaskFire() treats a null return as "skip this tick".
|
|
232
|
+
if (tasks.length === 0) {
|
|
233
|
+
console.log(`[cron] heartbeat for ${session.owner}: no active tasks, skipping AI call`);
|
|
234
|
+
return null;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
// Check if any task is in auto mode with pending steps
|
|
238
|
+
const autoTask = tasks.find(t => t.mode === 'auto' && t.steps?.some(s => !s.done));
|
|
239
|
+
if (autoTask) {
|
|
240
|
+
const doneCount = autoTask.steps.filter(s => s.done).length;
|
|
241
|
+
const nextStep = autoTask.steps.find(s => !s.done);
|
|
242
|
+
taskContent = `[Heartbeat] Auto-mode task "${autoTask.description}" has pending steps (${doneCount}/${autoTask.steps.length} done). Call task_work with task_id "${autoTask._id || autoTask.id}" to execute: "${nextStep.text}"`;
|
|
243
|
+
} else {
|
|
244
|
+
// List all active tasks
|
|
245
|
+
const lines = tasks.map(t => {
|
|
246
|
+
let line = `• [${t.priority}] ${t.description}`;
|
|
247
|
+
if (t.mode) line += ` [${t.mode}]`;
|
|
248
|
+
if (t.deadline) line += ` (due: ${t.deadline})`;
|
|
249
|
+
if (t.steps && t.steps.length > 0) {
|
|
250
|
+
const done = t.steps.filter(s => s.done).length;
|
|
251
|
+
line += ` (${done}/${t.steps.length} steps)`;
|
|
252
|
+
for (const step of t.steps) {
|
|
253
|
+
line += `\n ${step.done ? '[x]' : '[ ]'} ${step.text}`;
|
|
246
254
|
}
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
}
|
|
255
|
+
}
|
|
256
|
+
return line;
|
|
257
|
+
});
|
|
258
|
+
taskContent += `\n\nActive tasks:\n${lines.join('\n')}`;
|
|
251
259
|
}
|
|
252
260
|
} catch (err) {
|
|
261
|
+
// Fail closed: if we can't fetch tasks, skip this heartbeat rather
|
|
262
|
+
// than call the LLM with a meaningless default prompt.
|
|
253
263
|
console.error('[cron] failed to fetch tasks for heartbeat:', err.message);
|
|
264
|
+
return null;
|
|
254
265
|
}
|
|
255
266
|
|
|
256
267
|
return taskContent;
|
package/core/init.js
CHANGED
|
@@ -25,6 +25,7 @@ import { createTriggerHandler } from './trigger_handler.js';
|
|
|
25
25
|
* @param {Object} [options.providers] - Provider API keys: { anthropic: { apiKey }, openai: { apiKey }, xai: { apiKey } }
|
|
26
26
|
* @param {Array} [options.tools] - Tool definitions (default: coreTools)
|
|
27
27
|
* @param {number} [options.staleThresholdMs=86400000] - Skip heartbeat if user idle longer than this (default: 24h)
|
|
28
|
+
* @param {string} [options.notificationTitle='Assistant'] - Title used when cron/trigger handlers dispatch notifications
|
|
28
29
|
* @param {Function} [options.systemPrompt] - System prompt builder function
|
|
29
30
|
* @param {Function} [options.screenshotUrlPattern] - Screenshot URL pattern function
|
|
30
31
|
* @param {Object} [options.compaction] - Compaction settings
|
|
@@ -42,6 +43,7 @@ export async function init({
|
|
|
42
43
|
providers = {},
|
|
43
44
|
tools = coreTools,
|
|
44
45
|
staleThresholdMs = 24 * 60 * 60 * 1000,
|
|
46
|
+
notificationTitle = 'Assistant',
|
|
45
47
|
systemPrompt,
|
|
46
48
|
screenshotUrlPattern,
|
|
47
49
|
compaction = { enabled: true },
|
|
@@ -68,7 +70,8 @@ export async function init({
|
|
|
68
70
|
memory: memoryStore,
|
|
69
71
|
};
|
|
70
72
|
|
|
71
|
-
// For stores-only mode (
|
|
73
|
+
// For stores-only mode (host manages sessions/cron/agent itself),
|
|
74
|
+
// skip session/cron/agent setup
|
|
72
75
|
if (storesOnly) {
|
|
73
76
|
return {
|
|
74
77
|
stores,
|
|
@@ -101,6 +104,7 @@ export async function init({
|
|
|
101
104
|
memoryStore,
|
|
102
105
|
providers,
|
|
103
106
|
staleThresholdMs,
|
|
107
|
+
notificationTitle,
|
|
104
108
|
hooks,
|
|
105
109
|
});
|
|
106
110
|
|
|
@@ -134,6 +138,7 @@ export async function init({
|
|
|
134
138
|
triggerStore,
|
|
135
139
|
memoryStore,
|
|
136
140
|
providers,
|
|
141
|
+
notificationTitle,
|
|
137
142
|
hooks,
|
|
138
143
|
});
|
|
139
144
|
|
package/core/trigger_handler.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Trigger handler for dotbot.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Reusable trigger executor that handles event matching, firing, and
|
|
5
|
+
* notification hooks.
|
|
6
6
|
*/
|
|
7
7
|
|
|
8
8
|
import { compactMessages } from './compaction.js';
|
|
@@ -16,6 +16,7 @@ import { compactMessages } from './compaction.js';
|
|
|
16
16
|
* @param {Object} options.triggerStore - Trigger store instance
|
|
17
17
|
* @param {Object} options.memoryStore - Memory store instance (optional)
|
|
18
18
|
* @param {Object} options.providers - Provider API keys for compaction
|
|
19
|
+
* @param {string} [options.notificationTitle='Assistant'] - Title used when dispatching notifications via hooks.onNotification
|
|
19
20
|
* @param {Object} [options.hooks] - Host-specific hooks
|
|
20
21
|
* @param {Function} [options.hooks.onNotification] - async (userId, { title, body, type }) => void
|
|
21
22
|
* @returns {Function} Async function: (eventType, userId, eventData?) => Promise<void>
|
|
@@ -26,6 +27,7 @@ export function createTriggerHandler({
|
|
|
26
27
|
triggerStore,
|
|
27
28
|
memoryStore,
|
|
28
29
|
providers = {},
|
|
30
|
+
notificationTitle = 'Assistant',
|
|
29
31
|
hooks = {},
|
|
30
32
|
}) {
|
|
31
33
|
/**
|
|
@@ -133,7 +135,7 @@ export function createTriggerHandler({
|
|
|
133
135
|
if (trimmed && trimmed.length > 10 && updatedSession.owner && hooks.onNotification) {
|
|
134
136
|
try {
|
|
135
137
|
await hooks.onNotification(updatedSession.owner, {
|
|
136
|
-
title:
|
|
138
|
+
title: notificationTitle,
|
|
137
139
|
body: trimmed.slice(0, 500),
|
|
138
140
|
type: 'trigger',
|
|
139
141
|
});
|
package/docs/core.md
CHANGED
package/docs/protected-files.md
CHANGED
|
@@ -48,12 +48,12 @@ Absolutely! Yes, you should definitely protect .ssh and similar system-level dir
|
|
|
48
48
|
~/Library/Application Support/Firefox/
|
|
49
49
|
|
|
50
50
|
|
|
51
|
-
|
|
51
|
+
Host-App Data (Example)
|
|
52
52
|
|
|
53
|
-
~/.
|
|
54
|
-
~/.
|
|
55
|
-
~/.
|
|
53
|
+
~/.myapp/logs/ # May contain user conversations
|
|
54
|
+
~/.myapp/chat_history.json
|
|
55
|
+
~/.myapp/*.db
|
|
56
56
|
|
|
57
57
|
|
|
58
|
-
Bottom line: Any path under ~ (home directory) that contains credentials, personal data, command history, or configuration files should be protected. The general rule is: never search/glob/grep from
|
|
58
|
+
Bottom line: Any path under ~ (home directory) that contains credentials, personal data, command history, or configuration files should be protected. The general rule is: never search/glob/grep from the home directory root — only within specific project directories.
|
|
59
59
|
ctrl+q to copy · 6 snippets
|
package/index.js
CHANGED
|
@@ -18,10 +18,8 @@ import {
|
|
|
18
18
|
notifyTools,
|
|
19
19
|
createBrowserTools,
|
|
20
20
|
taskTools,
|
|
21
|
-
goalTools,
|
|
22
21
|
triggerTools,
|
|
23
22
|
jobTools,
|
|
24
|
-
cronTools,
|
|
25
23
|
eventTools,
|
|
26
24
|
appgenTools,
|
|
27
25
|
} from './tools/index.js';
|
|
@@ -40,9 +38,6 @@ export {
|
|
|
40
38
|
runWithConcurrency,
|
|
41
39
|
TaskStore,
|
|
42
40
|
SQLiteTaskStore,
|
|
43
|
-
// Backwards compatibility aliases
|
|
44
|
-
GoalStore,
|
|
45
|
-
SQLiteGoalStore,
|
|
46
41
|
TriggerStore,
|
|
47
42
|
SQLiteTriggerStore,
|
|
48
43
|
SQLiteMemoryStore,
|
|
@@ -65,10 +60,8 @@ export {
|
|
|
65
60
|
browserTools,
|
|
66
61
|
createBrowserTools,
|
|
67
62
|
taskTools,
|
|
68
|
-
goalTools, // backwards compatibility alias
|
|
69
63
|
triggerTools,
|
|
70
64
|
jobTools,
|
|
71
|
-
cronTools, // backwards compatibility alias
|
|
72
65
|
eventTools,
|
|
73
66
|
appgenTools,
|
|
74
67
|
} from './tools/index.js';
|
package/package.json
CHANGED
package/storage/SQLiteAdapter.js
CHANGED
|
@@ -11,7 +11,7 @@ import { toStandardFormat } from '../core/normalize.js';
|
|
|
11
11
|
* @param {string} options.agentPersonality - Personality description
|
|
12
12
|
* @returns {string} System prompt
|
|
13
13
|
*/
|
|
14
|
-
export function defaultSystemPrompt({ agentName = '
|
|
14
|
+
export function defaultSystemPrompt({ agentName = 'Assistant', agentPersonality = '' } = {}) {
|
|
15
15
|
const now = new Date().toISOString();
|
|
16
16
|
return `You are a helpful personal AI assistant called ${agentName}.${agentPersonality ? `\nYour personality and tone: ${agentPersonality}. Embody this in all responses.` : ''}
|
|
17
17
|
You have access to tools for searching the web, reading/writing files, fetching URLs, running code, long-term memory, and scheduled tasks.
|
|
@@ -220,16 +220,7 @@ export class SQLiteCronStore extends CronStore {
|
|
|
220
220
|
"SELECT * FROM cron_tasks WHERE session_id = ? AND name != 'heartbeat' ORDER BY next_run_at ASC"
|
|
221
221
|
).all(sessionId || 'default');
|
|
222
222
|
|
|
223
|
-
return rows.map(r => (
|
|
224
|
-
id: r.id,
|
|
225
|
-
name: r.name,
|
|
226
|
-
prompt: r.prompt,
|
|
227
|
-
nextRunAt: new Date(r.next_run_at),
|
|
228
|
-
recurring: !!r.recurring,
|
|
229
|
-
intervalMs: r.interval_ms,
|
|
230
|
-
enabled: !!r.enabled,
|
|
231
|
-
lastRunAt: r.last_run_at ? new Date(r.last_run_at) : null,
|
|
232
|
-
}));
|
|
223
|
+
return rows.map(r => this._rowToTask(r));
|
|
233
224
|
}
|
|
234
225
|
|
|
235
226
|
/**
|
|
@@ -257,18 +248,7 @@ export class SQLiteCronStore extends CronStore {
|
|
|
257
248
|
|
|
258
249
|
const rows = this.db.prepare(query).all(...params);
|
|
259
250
|
|
|
260
|
-
return rows.map(r => (
|
|
261
|
-
id: r.id,
|
|
262
|
-
name: r.name,
|
|
263
|
-
prompt: r.prompt,
|
|
264
|
-
sessionId: r.session_id,
|
|
265
|
-
nextRunAt: new Date(r.next_run_at),
|
|
266
|
-
recurring: !!r.recurring,
|
|
267
|
-
intervalMs: r.interval_ms,
|
|
268
|
-
enabled: !!r.enabled,
|
|
269
|
-
lastRunAt: r.last_run_at ? new Date(r.last_run_at) : null,
|
|
270
|
-
createdAt: new Date(r.created_at),
|
|
271
|
-
}));
|
|
251
|
+
return rows.map(r => this._rowToTask(r));
|
|
272
252
|
}
|
|
273
253
|
|
|
274
254
|
/**
|
|
@@ -374,53 +354,6 @@ export class SQLiteCronStore extends CronStore {
|
|
|
374
354
|
return null;
|
|
375
355
|
}
|
|
376
356
|
|
|
377
|
-
/**
|
|
378
|
-
* Ensure a Morning Brief job exists for the user (disabled by default).
|
|
379
|
-
* Creates a daily recurring job at 8:00 AM if not present.
|
|
380
|
-
*
|
|
381
|
-
* @param {string} userId - User ID
|
|
382
|
-
* @returns {Promise<Object|null>} Created task or null if already exists
|
|
383
|
-
*/
|
|
384
|
-
async ensureMorningBrief(userId) {
|
|
385
|
-
if (!this.db || !userId) return null;
|
|
386
|
-
|
|
387
|
-
// Check if Morning Brief already exists for this user
|
|
388
|
-
const existing = this.db.prepare(
|
|
389
|
-
`SELECT id FROM cron_tasks WHERE user_id = ? AND name = 'Morning Brief' LIMIT 1`
|
|
390
|
-
).get(userId);
|
|
391
|
-
if (existing) return null;
|
|
392
|
-
|
|
393
|
-
const DAY_MS = 24 * 60 * 60 * 1000;
|
|
394
|
-
const MORNING_BRIEF_PROMPT = `Good morning! Give me a brief summary to start my day:
|
|
395
|
-
1. What's on my calendar today?
|
|
396
|
-
2. Any important reminders or tasks due?
|
|
397
|
-
3. A quick weather update for my location.
|
|
398
|
-
Keep it concise and actionable.`;
|
|
399
|
-
|
|
400
|
-
// Calculate next 8:00 AM
|
|
401
|
-
const now = new Date();
|
|
402
|
-
const today8AM = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 8, 0, 0, 0);
|
|
403
|
-
const nextRun = now.getTime() < today8AM.getTime()
|
|
404
|
-
? today8AM.getTime()
|
|
405
|
-
: today8AM.getTime() + DAY_MS;
|
|
406
|
-
|
|
407
|
-
const id = crypto.randomUUID();
|
|
408
|
-
const nowMs = Date.now();
|
|
409
|
-
|
|
410
|
-
const result = this.db.prepare(`
|
|
411
|
-
INSERT OR IGNORE INTO cron_tasks (id, name, prompt, session_id, user_id, next_run_at, interval_ms, recurring, enabled, created_at, last_run_at)
|
|
412
|
-
VALUES (?, 'Morning Brief', ?, 'default', ?, ?, ?, 1, 0, ?, NULL)
|
|
413
|
-
`).run(id, MORNING_BRIEF_PROMPT, userId, nextRun, DAY_MS, nowMs);
|
|
414
|
-
|
|
415
|
-
if (result.changes > 0) {
|
|
416
|
-
const runTime = new Date(nextRun);
|
|
417
|
-
console.log(`[cron] created Morning Brief for user ${userId}, next run at ${runTime.toLocaleTimeString()} (disabled by default)`);
|
|
418
|
-
return { id };
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
return null;
|
|
422
|
-
}
|
|
423
|
-
|
|
424
357
|
/**
|
|
425
358
|
* Get heartbeat status for a user
|
|
426
359
|
*
|
|
@@ -455,35 +388,18 @@ Keep it concise and actionable.`;
|
|
|
455
388
|
async resetHeartbeat(userId) {
|
|
456
389
|
if (!this.db || !userId) return null;
|
|
457
390
|
|
|
458
|
-
|
|
391
|
+
this.db.prepare(
|
|
459
392
|
"DELETE FROM cron_tasks WHERE user_id = ? AND name = 'heartbeat'"
|
|
460
393
|
).run(userId);
|
|
461
394
|
console.log(`[cron] deleted existing heartbeat(s) for user ${userId}`);
|
|
462
395
|
|
|
463
|
-
const
|
|
464
|
-
const now = Date.now();
|
|
465
|
-
const id = crypto.randomUUID();
|
|
396
|
+
const result = await this.ensureHeartbeat(userId);
|
|
466
397
|
|
|
467
|
-
|
|
468
|
-
INSERT INTO cron_tasks (id, name, prompt, session_id, user_id, next_run_at, interval_ms, recurring, enabled, created_at, last_run_at)
|
|
469
|
-
VALUES (?, 'heartbeat', ?, 'default', ?, ?, ?, 1, 1, ?, NULL)
|
|
470
|
-
`).run(id, HEARTBEAT_PROMPT, userId, now + jitter, HEARTBEAT_INTERVAL_MS, now);
|
|
471
|
-
|
|
472
|
-
console.log(`[cron] created new heartbeat for user ${userId}, first run in ${Math.round(jitter / 60000)}m`);
|
|
398
|
+
if (!result) return null;
|
|
473
399
|
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
prompt: HEARTBEAT_PROMPT,
|
|
478
|
-
userId,
|
|
479
|
-
sessionId: 'default',
|
|
480
|
-
nextRunAt: new Date(now + jitter),
|
|
481
|
-
intervalMs: HEARTBEAT_INTERVAL_MS,
|
|
482
|
-
recurring: true,
|
|
483
|
-
enabled: true,
|
|
484
|
-
createdAt: new Date(now),
|
|
485
|
-
lastRunAt: null,
|
|
486
|
-
};
|
|
400
|
+
// Return the full task object for the newly created heartbeat
|
|
401
|
+
const row = this.db.prepare('SELECT * FROM cron_tasks WHERE id = ?').get(result.id);
|
|
402
|
+
return row ? this._rowToTask(row) : null;
|
|
487
403
|
}
|
|
488
404
|
|
|
489
405
|
/**
|
package/storage/index.js
CHANGED
|
@@ -5,9 +5,6 @@ export { CronStore } from './CronStore.js';
|
|
|
5
5
|
export { SQLiteCronStore, parseInterval, HEARTBEAT_INTERVAL_MS, HEARTBEAT_PROMPT } from './SQLiteCronAdapter.js';
|
|
6
6
|
export { TaskStore } from './TaskStore.js';
|
|
7
7
|
export { SQLiteTaskStore } from './SQLiteTaskAdapter.js';
|
|
8
|
-
// Backwards compatibility aliases
|
|
9
|
-
export { TaskStore as GoalStore } from './TaskStore.js';
|
|
10
|
-
export { SQLiteTaskStore as SQLiteGoalStore } from './SQLiteTaskAdapter.js';
|
|
11
8
|
export { TriggerStore } from './TriggerStore.js';
|
|
12
9
|
export { SQLiteTriggerStore } from './SQLiteTriggerAdapter.js';
|
|
13
10
|
export { SQLiteMemoryStore } from './SQLiteMemoryAdapter.js';
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
import { test, describe, beforeEach, afterEach } from 'node:test';
|
|
2
|
+
import assert from 'node:assert';
|
|
3
|
+
import { agentLoop } from '../core/agent.js';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Regression tests for the mlx_local provider branch of agentLoop.
|
|
7
|
+
*
|
|
8
|
+
* These cover the flush branch added in 0.30 that handles short plain-text
|
|
9
|
+
* responses from local models that never emit gpt-oss channel tokens
|
|
10
|
+
* (Gemma 4 E2B, LFM2.5, SmolLM). Without the flush, the rawBuffer was
|
|
11
|
+
* silently discarded on stream end and the downstream consumer received
|
|
12
|
+
* zero text_delta events — empty assistant bubbles in the UI.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Build a minimal mlx_local-style provider for agentLoop tests.
|
|
17
|
+
* The `id` must be "mlx_local" to hit the buffered-parsing branch,
|
|
18
|
+
* and `local: true` skips the failover path for a direct fetch.
|
|
19
|
+
*/
|
|
20
|
+
function makeLocalProvider() {
|
|
21
|
+
return {
|
|
22
|
+
id: 'mlx_local',
|
|
23
|
+
name: 'Test Local',
|
|
24
|
+
apiUrl: 'http://127.0.0.1:1316/v1',
|
|
25
|
+
endpoint: '/chat/completions',
|
|
26
|
+
local: true,
|
|
27
|
+
headers: () => ({ 'Content-Type': 'application/json' }),
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Mock a fetch Response carrying an OpenAI-style SSE stream.
|
|
33
|
+
* Accepts an array of {content?, finish_reason?} deltas. Each becomes one
|
|
34
|
+
* SSE data line. A final "data: [DONE]" terminator is appended automatically.
|
|
35
|
+
*/
|
|
36
|
+
function mockSSEResponse(deltas) {
|
|
37
|
+
const encoder = new TextEncoder();
|
|
38
|
+
const body = new ReadableStream({
|
|
39
|
+
start(controller) {
|
|
40
|
+
for (const delta of deltas) {
|
|
41
|
+
const chunk = { choices: [{ delta }] };
|
|
42
|
+
controller.enqueue(encoder.encode(`data: ${JSON.stringify(chunk)}\n\n`));
|
|
43
|
+
}
|
|
44
|
+
controller.enqueue(encoder.encode('data: [DONE]\n\n'));
|
|
45
|
+
controller.close();
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
return new Response(body, {
|
|
49
|
+
status: 200,
|
|
50
|
+
headers: { 'Content-Type': 'text/event-stream' },
|
|
51
|
+
});
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Replace globalThis.fetch with a mock that returns the given Response
|
|
56
|
+
* for every call. Returns a restore function to put the original back.
|
|
57
|
+
*/
|
|
58
|
+
function stubFetch(response) {
|
|
59
|
+
const original = globalThis.fetch;
|
|
60
|
+
globalThis.fetch = async () => response;
|
|
61
|
+
return () => { globalThis.fetch = original; };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
describe('agentLoop — mlx_local short plain-text response flush', () => {
|
|
65
|
+
let restoreFetch;
|
|
66
|
+
|
|
67
|
+
afterEach(() => {
|
|
68
|
+
if (restoreFetch) {
|
|
69
|
+
restoreFetch();
|
|
70
|
+
restoreFetch = null;
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('yields text_delta for a <200-char greeting that never hits passthrough threshold', async () => {
|
|
75
|
+
// Gemma 4 E2B greetings are 30-150 chars and emit no <|channel|> markers.
|
|
76
|
+
// Pre-0.30: rawBuffer accumulated silently, never yielded, full response 0 chars.
|
|
77
|
+
// Post-0.30: the stream-done handler flushes the buffer to a text_delta.
|
|
78
|
+
restoreFetch = stubFetch(mockSSEResponse([
|
|
79
|
+
{ content: 'Hi' },
|
|
80
|
+
{ content: ' there!' },
|
|
81
|
+
{ content: ' How can I help?' },
|
|
82
|
+
{ finish_reason: 'stop' },
|
|
83
|
+
]));
|
|
84
|
+
|
|
85
|
+
const gen = agentLoop({
|
|
86
|
+
model: 'test-model',
|
|
87
|
+
messages: [
|
|
88
|
+
{ role: 'system', content: 'test' },
|
|
89
|
+
{ role: 'user', content: 'hi' },
|
|
90
|
+
],
|
|
91
|
+
tools: [],
|
|
92
|
+
provider: makeLocalProvider(),
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const events = [];
|
|
96
|
+
let fullResponse = '';
|
|
97
|
+
for await (const event of gen) {
|
|
98
|
+
events.push(event);
|
|
99
|
+
if (event.type === 'text_delta' && event.text) {
|
|
100
|
+
fullResponse += event.text;
|
|
101
|
+
}
|
|
102
|
+
if (event.type === 'done') break;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
assert.strictEqual(fullResponse, 'Hi there! How can I help?');
|
|
106
|
+
const textDeltas = events.filter((e) => e.type === 'text_delta');
|
|
107
|
+
assert.ok(textDeltas.length >= 1, 'expected at least one text_delta event');
|
|
108
|
+
const doneEvents = events.filter((e) => e.type === 'done');
|
|
109
|
+
assert.strictEqual(doneEvents.length, 1);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('does not flush when the buffer contains tool call markers', async () => {
|
|
113
|
+
// Guards against false-positive text emission when the model emits a
|
|
114
|
+
// text-based tool call — those are handled by the post-loop parseToolCalls()
|
|
115
|
+
// branch, not the flush path.
|
|
116
|
+
restoreFetch = stubFetch(mockSSEResponse([
|
|
117
|
+
{ content: '<tool_call>' },
|
|
118
|
+
{ content: '{"name":"web_search","arguments":{"query":"weather"}}' },
|
|
119
|
+
{ content: '</tool_call>' },
|
|
120
|
+
{ finish_reason: 'stop' },
|
|
121
|
+
]));
|
|
122
|
+
|
|
123
|
+
const gen = agentLoop({
|
|
124
|
+
model: 'test-model',
|
|
125
|
+
messages: [
|
|
126
|
+
{ role: 'system', content: 'test' },
|
|
127
|
+
{ role: 'user', content: 'weather?' },
|
|
128
|
+
],
|
|
129
|
+
tools: [
|
|
130
|
+
{
|
|
131
|
+
name: 'web_search',
|
|
132
|
+
description: 'Search',
|
|
133
|
+
parameters: { type: 'object' },
|
|
134
|
+
execute: async () => 'sunny',
|
|
135
|
+
},
|
|
136
|
+
],
|
|
137
|
+
provider: makeLocalProvider(),
|
|
138
|
+
maxTurns: 1, // Cap after the first iteration so the loop exits
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
const events = [];
|
|
142
|
+
for await (const event of gen) {
|
|
143
|
+
events.push(event);
|
|
144
|
+
if (events.length > 20) break; // Safety cap in case tool loop misbehaves
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// Critical assertion: no text_delta should carry the raw <tool_call> markup.
|
|
148
|
+
// If the flush branch fires unguarded, the user would see literal
|
|
149
|
+
// "<tool_call>..." in their chat bubble.
|
|
150
|
+
const textWithMarkers = events
|
|
151
|
+
.filter((e) => e.type === 'text_delta')
|
|
152
|
+
.filter((e) => e.text && e.text.includes('<tool_call>'));
|
|
153
|
+
assert.strictEqual(textWithMarkers.length, 0,
|
|
154
|
+
'tool_call markup must not leak through the flush branch');
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
test('end-to-end text accumulation matches the realtime consumer pattern', async () => {
|
|
158
|
+
// Simulates a streaming consumer (e.g. a WebSocket bridge): accumulate
|
|
159
|
+
// text from text_delta events, break on done. Pre-0.30 the accumulated
|
|
160
|
+
// string was empty. Post-0.30 it matches the model's full utterance.
|
|
161
|
+
restoreFetch = stubFetch(mockSSEResponse([
|
|
162
|
+
{ content: 'Hello' },
|
|
163
|
+
{ content: '!' },
|
|
164
|
+
{ finish_reason: 'stop' },
|
|
165
|
+
]));
|
|
166
|
+
|
|
167
|
+
const gen = agentLoop({
|
|
168
|
+
model: 'test-model',
|
|
169
|
+
messages: [{ role: 'user', content: 'hi' }],
|
|
170
|
+
tools: [],
|
|
171
|
+
provider: makeLocalProvider(),
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
let fullResponse = '';
|
|
175
|
+
let textDeltaCount = 0;
|
|
176
|
+
let sawDone = false;
|
|
177
|
+
for await (const event of gen) {
|
|
178
|
+
if (event.type === 'text_delta') {
|
|
179
|
+
fullResponse += event.text;
|
|
180
|
+
textDeltaCount++;
|
|
181
|
+
}
|
|
182
|
+
if (event.type === 'done') {
|
|
183
|
+
sawDone = true;
|
|
184
|
+
break;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
assert.strictEqual(fullResponse, 'Hello!');
|
|
189
|
+
assert.ok(textDeltaCount > 0, 'expected at least one text_delta');
|
|
190
|
+
assert.strictEqual(sawDone, true);
|
|
191
|
+
});
|
|
192
|
+
});
|