visionclaw 0.1.35 → 0.1.37

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (120) hide show
  1. package/CHANGELOG.md +30 -0
  2. package/dist/agent/client-factory.d.ts +32 -0
  3. package/dist/agent/client-factory.d.ts.map +1 -0
  4. package/dist/agent/client-factory.js +68 -0
  5. package/dist/agent/client-factory.js.map +1 -0
  6. package/dist/agent/fast-responder.d.ts +2 -1
  7. package/dist/agent/fast-responder.d.ts.map +1 -1
  8. package/dist/agent/fast-responder.js +4 -6
  9. package/dist/agent/fast-responder.js.map +1 -1
  10. package/dist/agent/gather-context.d.ts +15 -0
  11. package/dist/agent/gather-context.d.ts.map +1 -0
  12. package/dist/agent/gather-context.js +55 -0
  13. package/dist/agent/gather-context.js.map +1 -0
  14. package/dist/agent/interrupt-handler.d.ts +30 -0
  15. package/dist/agent/interrupt-handler.d.ts.map +1 -0
  16. package/dist/agent/interrupt-handler.js +141 -0
  17. package/dist/agent/interrupt-handler.js.map +1 -0
  18. package/dist/agent/loop.d.ts +3 -18
  19. package/dist/agent/loop.d.ts.map +1 -1
  20. package/dist/agent/loop.js +67 -532
  21. package/dist/agent/loop.js.map +1 -1
  22. package/dist/agent/mailbox.d.ts +28 -0
  23. package/dist/agent/mailbox.d.ts.map +1 -0
  24. package/dist/agent/mailbox.js +73 -0
  25. package/dist/agent/mailbox.js.map +1 -0
  26. package/dist/agent/onboarding-gate.d.ts +14 -0
  27. package/dist/agent/onboarding-gate.d.ts.map +1 -0
  28. package/dist/agent/onboarding-gate.js +70 -0
  29. package/dist/agent/onboarding-gate.js.map +1 -0
  30. package/dist/agent/session-manager.d.ts +69 -0
  31. package/dist/agent/session-manager.d.ts.map +1 -0
  32. package/dist/agent/session-manager.js +160 -0
  33. package/dist/agent/session-manager.js.map +1 -0
  34. package/dist/agent/session.d.ts +0 -1
  35. package/dist/agent/session.d.ts.map +1 -1
  36. package/dist/agent/session.js +5 -11
  37. package/dist/agent/session.js.map +1 -1
  38. package/dist/agent/state.d.ts +21 -0
  39. package/dist/agent/state.d.ts.map +1 -0
  40. package/dist/agent/state.js +11 -0
  41. package/dist/agent/state.js.map +1 -0
  42. package/dist/agent/stream-handler.d.ts +10 -0
  43. package/dist/agent/stream-handler.d.ts.map +1 -0
  44. package/dist/agent/stream-handler.js +232 -0
  45. package/dist/agent/stream-handler.js.map +1 -0
  46. package/dist/agent/system-prompt.d.ts.map +1 -1
  47. package/dist/agent/system-prompt.js +21 -67
  48. package/dist/agent/system-prompt.js.map +1 -1
  49. package/dist/channels/discord.d.ts +1 -0
  50. package/dist/channels/discord.d.ts.map +1 -1
  51. package/dist/channels/discord.js +30 -0
  52. package/dist/channels/discord.js.map +1 -1
  53. package/dist/channels/interface.d.ts +19 -0
  54. package/dist/channels/interface.d.ts.map +1 -1
  55. package/dist/channels/manager.d.ts +4 -0
  56. package/dist/channels/manager.d.ts.map +1 -1
  57. package/dist/channels/manager.js +15 -0
  58. package/dist/channels/manager.js.map +1 -1
  59. package/dist/channels/telegram.d.ts +1 -0
  60. package/dist/channels/telegram.d.ts.map +1 -1
  61. package/dist/channels/telegram.js +32 -0
  62. package/dist/channels/telegram.js.map +1 -1
  63. package/dist/config/types.d.ts +16 -1
  64. package/dist/config/types.d.ts.map +1 -1
  65. package/dist/config/types.js +9 -1
  66. package/dist/config/types.js.map +1 -1
  67. package/dist/email/gmail-email.d.ts +4 -0
  68. package/dist/email/gmail-email.d.ts.map +1 -1
  69. package/dist/email/gmail-email.js +8 -0
  70. package/dist/email/gmail-email.js.map +1 -1
  71. package/dist/index.js +7 -1
  72. package/dist/index.js.map +1 -1
  73. package/dist/obs/notify.js +1 -1
  74. package/dist/obs/notify.js.map +1 -1
  75. package/dist/onboarding/index.d.ts.map +1 -1
  76. package/dist/onboarding/index.js +84 -10
  77. package/dist/onboarding/index.js.map +1 -1
  78. package/dist/onboarding/onboarding-session.d.ts.map +1 -1
  79. package/dist/onboarding/onboarding-session.js +4 -7
  80. package/dist/onboarding/onboarding-session.js.map +1 -1
  81. package/dist/onboarding/onboarding-tools.d.ts.map +1 -1
  82. package/dist/onboarding/onboarding-tools.js +2 -1
  83. package/dist/onboarding/onboarding-tools.js.map +1 -1
  84. package/dist/onboarding/telegram-onboarding.d.ts.map +1 -1
  85. package/dist/onboarding/telegram-onboarding.js +1 -5
  86. package/dist/onboarding/telegram-onboarding.js.map +1 -1
  87. package/dist/reconfigure.d.ts.map +1 -1
  88. package/dist/reconfigure.js +151 -8
  89. package/dist/reconfigure.js.map +1 -1
  90. package/dist/tools/calendar.js +1 -1
  91. package/dist/tools/calendar.js.map +1 -1
  92. package/dist/tools/computer-use.d.ts.map +1 -1
  93. package/dist/tools/computer-use.js +21 -19
  94. package/dist/tools/computer-use.js.map +1 -1
  95. package/dist/tools/coordinate-resolver.d.ts +2 -1
  96. package/dist/tools/coordinate-resolver.d.ts.map +1 -1
  97. package/dist/tools/coordinate-resolver.js +4 -5
  98. package/dist/tools/coordinate-resolver.js.map +1 -1
  99. package/dist/tools/drive.js +1 -1
  100. package/dist/tools/drive.js.map +1 -1
  101. package/dist/tools/email.d.ts +2 -0
  102. package/dist/tools/email.d.ts.map +1 -1
  103. package/dist/tools/email.js +16 -7
  104. package/dist/tools/email.js.map +1 -1
  105. package/dist/tools/index.d.ts.map +1 -1
  106. package/dist/tools/index.js +0 -2
  107. package/dist/tools/index.js.map +1 -1
  108. package/dist/tools/memory.d.ts +1 -1
  109. package/dist/tools/notify.js +1 -1
  110. package/dist/tools/notify.js.map +1 -1
  111. package/dist/tools/skill.d.ts +1 -1
  112. package/dist/tools/switch-session.d.ts +10 -0
  113. package/dist/tools/switch-session.d.ts.map +1 -0
  114. package/dist/tools/switch-session.js +31 -0
  115. package/dist/tools/switch-session.js.map +1 -0
  116. package/dist/tools/web-search.d.ts +2 -0
  117. package/dist/tools/web-search.d.ts.map +1 -0
  118. package/dist/tools/web-search.js +2 -0
  119. package/dist/tools/web-search.js.map +1 -0
  120. package/package.json +4 -3
@@ -1,169 +1,41 @@
1
1
  import { buildSystemPrompt } from "./system-prompt.js";
2
2
  import { AgentSession } from "./session.js";
3
- import { buildWakeContext, buildInterruptContext, buildInterruptWithBriefing } from "./context.js";
3
+ import { buildWakeContext } from "./context.js";
4
4
  import { createToolServer } from "../tools/index.js";
5
5
  import { MessageQueue } from "../channels/queue.js";
6
6
  import { ChannelManager } from "../channels/manager.js";
7
- import { takeScreenshot } from "../tools/screenshot.js";
8
- import { getCalendarSummary } from "../calendar/scheduler.js";
9
7
  import { ActivityTracker } from "./activity-tracker.js";
10
- import { fastRespond } from "./fast-responder.js";
8
+ import { shouldNotifyUpdate } from "../utils/version-check.js";
9
+ import { loadOwnerConfig, saveOwnerConfig } from "../config/index.js";
11
10
  import { logger } from "../logger.js";
12
- import { checkForUpdate, shouldNotifyUpdate, recordUpdateNotification } from "../utils/version-check.js";
13
- import { isOnboardingComplete, loadOwnerConfig, ownerConfigExists } from "../config/index.js";
14
- import { TelegramOnboardingHandler } from "../onboarding/telegram-onboarding.js";
15
- import { sendOnboardingEmail } from "../onboarding/send-invite.js";
16
- // Module-level state so tools can access it
17
- let agentState = null;
18
- export function getAgentState() {
19
- if (!agentState) {
20
- throw new Error("Agent state not initialized");
21
- }
22
- return agentState;
23
- }
24
- /**
25
- * Log an SDK result message (success or error).
26
- */
27
- function logResult(msg) {
28
- const cost = msg.total_cost_usd.toFixed(4);
29
- logger.result(msg.num_turns, msg.usage.input_tokens, msg.usage.output_tokens, cost);
30
- if (msg.is_error) {
31
- const errors = "errors" in msg ? msg.errors.join("; ") : msg.subtype;
32
- logger.err(`Agent ended with error (${msg.subtype}): ${errors}`);
33
- }
34
- }
35
- function captureUsageSnapshotFromResult(session, msg) {
36
- try {
37
- logger.debug("Capturing model usage snapshot from result message...");
38
- const modelUsage = msg.modelUsage ??
39
- msg.model_usage;
40
- if (!modelUsage || typeof modelUsage !== "object") {
41
- logger.debug("[modelUsage] missing on result message");
42
- return;
43
- }
44
- // Prefer the primary model when present, otherwise pick the first entry.
45
- const byModel = modelUsage;
46
- const primary = typeof msg.model === "string"
47
- ? (msg.model)
48
- : undefined;
49
- const usageObjUnknown = (primary && byModel[primary] !== undefined ? byModel[primary] : undefined) ??
50
- Object.values(byModel)[0];
51
- if (!usageObjUnknown || typeof usageObjUnknown !== "object")
52
- return;
53
- const u = usageObjUnknown;
54
- const contextWindow = typeof u.contextWindow === "number" ? u.contextWindow : undefined;
55
- if (!contextWindow || contextWindow <= 0) {
56
- logger.debug("[modelUsage] missing/invalid contextWindow", { contextWindow });
57
- return;
58
- }
59
- const inputTokens = typeof u.inputTokens === "number" ? u.inputTokens : 0;
60
- const cacheReadInputTokens = typeof u.cacheReadInputTokens === "number" ? u.cacheReadInputTokens : 0;
61
- const cacheCreationInputTokens = typeof u.cacheCreationInputTokens === "number" ? u.cacheCreationInputTokens : 0;
62
- const usedInputTokens = inputTokens + cacheReadInputTokens + cacheCreationInputTokens;
63
- const usedPct = usedInputTokens / contextWindow;
64
- if (!Number.isFinite(usedPct) || usedPct < 0) {
65
- logger.debug("[modelUsage] computed invalid usedPct", {
66
- usedPct,
67
- usedInputTokens,
68
- contextWindow,
69
- });
70
- return;
71
- }
72
- session.captureUsageSnapshot({
73
- usedInputTokens,
74
- contextWindow,
75
- usedPct,
76
- });
77
- logger.debug(`[modelUsage] snapshot: usedPct=${(usedPct * 100).toFixed(1)}% used=${usedInputTokens} window=${contextWindow} (in=${inputTokens} cache_read=${cacheReadInputTokens} cache_create=${cacheCreationInputTokens})`);
78
- }
79
- catch {
80
- // ignore
81
- }
82
- }
11
+ import { initAgentState } from "./state.js";
12
+ import { runOnboardingGate } from "./onboarding-gate.js";
13
+ import { InterruptHandler } from "./interrupt-handler.js";
14
+ import { processAgentStream } from "./stream-handler.js";
15
+ import { gatherWakeContext } from "./gather-context.js";
16
+ export { getAgentState } from "./state.js";
83
17
  /**
84
18
  * Main agent loop. This is the entry point that runs forever.
85
19
  */
86
20
  export async function startAgentLoop(config) {
87
- // --- Initialize components ---
88
21
  const messageQueue = new MessageQueue();
89
22
  const channelManager = new ChannelManager(config, messageQueue);
90
23
  const activityTracker = new ActivityTracker();
91
- // Start all enabled channels (needed for both onboarding and normal operation)
92
24
  await channelManager.startAll();
93
25
  // --- Onboarding gate ---
94
- // If owner onboarding is not complete, run the restricted Telegram onboarding
95
- // loop before entering the normal agent loop.
96
- if (!isOnboardingComplete()) {
97
- if (!ownerConfigExists()) {
98
- logger.system("No owner configured. Run 'visionclaw set-owner --email <email>' to set up the owner.");
99
- process.exit(1);
100
- }
101
- const partialOwner = loadOwnerConfig();
102
- logger.system("Owner onboarding not complete. Entering onboarding mode...");
103
- // Only send invitation email if the verification code hasn't been used yet
104
- if (!partialOwner.onboardingCodeVerified) {
105
- try {
106
- await sendOnboardingEmail(config, partialOwner);
107
- }
108
- catch (err) {
109
- logger.err(`Failed to send onboarding email: ${err instanceof Error ? err.message : String(err)}`);
110
- }
111
- }
112
- else {
113
- logger.system("Code already verified — resuming onboarding conversation...");
114
- }
115
- // Run restricted loop: only process Telegram messages for onboarding
116
- const botToken = config.channels.telegram?.botToken ?? "";
117
- const handler = new TelegramOnboardingHandler(partialOwner, config, botToken, channelManager);
118
- const sendReply = async (chatId, text) => {
119
- await channelManager.sendMessage("telegram", String(chatId), text);
120
- };
121
- await new Promise((resolve) => {
122
- channelManager.on("message", () => {
123
- void (async () => {
124
- while (!messageQueue.isEmpty()) {
125
- const msg = messageQueue.dequeue();
126
- if (!msg)
127
- continue;
128
- if (msg.channel !== "telegram") {
129
- logger.debug(`Onboarding: ignoring ${msg.channel} message`);
130
- continue;
131
- }
132
- logger.log("info", "incoming", `[onboarding] [${msg.channel}] ${msg.sender}: ${msg.text}`, {
133
- channel: msg.channel,
134
- sender: msg.sender,
135
- text: msg.text,
136
- });
137
- const done = await handler.handleMessage(msg, sendReply);
138
- if (done) {
139
- resolve();
140
- return;
141
- }
142
- }
143
- })();
144
- });
145
- });
146
- logger.system("Owner onboarding complete! Restarting to clean up onboarding context...\n");
147
- // Stop all channels before restart so listeners are torn down cleanly.
148
- await channelManager.stopAll();
149
- // Restart the process so the normal agent loop starts with a clean slate
150
- // (no leftover onboarding session state, event listeners, etc.).
151
- const { scheduleRestart } = await import("../utils/restart.js");
152
- scheduleRestart(2_000);
26
+ if (await runOnboardingGate(config, channelManager, messageQueue)) {
153
27
  return;
154
28
  }
155
29
  // --- Normal operation (onboarding complete) ---
156
30
  const ownerConfig = loadOwnerConfig();
157
- // Apply Telegram allowed chat IDs from owner config
158
31
  if (ownerConfig.allowedTelegramChatIds.length > 0) {
159
32
  channelManager.setTelegramAllowedChatIds(ownerConfig.allowedTelegramChatIds);
160
33
  }
161
- // Apply Discord allowed channel IDs from owner config
162
34
  if (ownerConfig.allowedDiscordChannelIds.length > 0) {
163
35
  channelManager.setDiscordAllowedChannelIds(ownerConfig.allowedDiscordChannelIds);
164
36
  }
165
37
  let requestSessionStop = () => { };
166
- agentState = {
38
+ const agentState = {
167
39
  config,
168
40
  ownerConfig,
169
41
  channelManager,
@@ -172,21 +44,18 @@ export async function startAgentLoop(config) {
172
44
  busy: false,
173
45
  stopRequested: false,
174
46
  requestStop: () => {
175
- if (agentState)
176
- agentState.stopRequested = true;
47
+ agentState.stopRequested = true;
177
48
  requestSessionStop();
178
49
  },
179
50
  };
51
+ initAgentState(agentState);
180
52
  logger.system(`Agent: ${ownerConfig.agentName} <${config.gmail}>`);
181
53
  logger.system(`Owner: ${ownerConfig.ownerName} <${ownerConfig.ownerEmail}>`);
182
54
  logger.system(`Heartbeat interval: ${ownerConfig.heartbeatIntervalMs / 1000}s`);
183
55
  logger.system("");
184
56
  // Create the in-process MCP tool server
185
57
  const toolServer = createToolServer();
186
- // Create the agent session (uses V1 query API with full tool support)
187
- // Pass a builder function so the system prompt is regenerated fresh
188
- // each wake cycle (keeps current time / heartbeat interval accurate).
189
- const session = new AgentSession(config, () => buildSystemPrompt(config, ownerConfig), {
58
+ const mcpServers = {
190
59
  visionclaw: toolServer,
191
60
  playwright: {
192
61
  command: "npx",
@@ -196,149 +65,39 @@ export async function startAgentLoop(config) {
196
65
  process.env.PLAYWRIGHT_MCP_CDP_ENDPOINT ?? "http://127.0.0.1:9222",
197
66
  ],
198
67
  },
199
- });
68
+ };
69
+ if (config.provider === "bedrock" && config.serpApiKey) {
70
+ mcpServers.serpapi = {
71
+ type: "http",
72
+ url: `https://mcp.serpapi.com/${config.serpApiKey}/mcp`,
73
+ };
74
+ }
75
+ const session = new AgentSession(config, () => buildSystemPrompt(config, ownerConfig), mcpServers);
200
76
  requestSessionStop = () => session.requestStop();
201
- // Listen for incoming messages to wake the agent or inject interrupts
77
+ // --- Interrupt handling ---
202
78
  let wakeResolver = null;
203
- // Debounce + concurrency guard for interrupt handling.
204
- // When messages arrive in quick succession while the agent is busy, we
205
- // wait a short window to batch them, then process the batch in a single
206
- // fast-responder call. Only one interrupt batch runs at a time; messages
207
- // arriving while a batch is in-flight are picked up by the next batch.
208
- const INTERRUPT_DEBOUNCE_MS = 10000;
209
- const FAST_RESPONDER_TIMEOUT_MS = 8000;
210
- let interruptTimer = null;
211
- let interruptInFlight = false;
212
- function requeue(batch) {
213
- logger.warn(`Session closed before inject — re-queuing ${batch.length} message(s) for next wake cycle`);
214
- for (const msg of batch) {
215
- messageQueue.enqueue(msg);
216
- }
217
- }
218
- async function processInterruptBatch() {
219
- if (interruptInFlight || !agentState?.busy)
220
- return;
221
- interruptInFlight = true;
222
- try {
223
- // Drain everything currently in the queue
224
- const batch = [];
225
- while (!messageQueue.isEmpty()) {
226
- const msg = messageQueue.dequeue();
227
- if (msg)
228
- batch.push(msg);
229
- }
230
- if (batch.length === 0)
231
- return;
232
- // Update last-message tracking
233
- const last = batch[batch.length - 1];
234
- agentState.lastMessageChannel = last.channel;
235
- agentState.lastMessageSender = last.sender;
236
- // Log each interrupt message
237
- for (const msg of batch) {
238
- logger.log("info", "incoming", `[interrupt] [${msg.channel}] ${msg.sender}: ${msg.text}`, {
239
- channel: msg.channel,
240
- sender: msg.sender,
241
- text: msg.text,
242
- id: msg.id,
243
- interrupt: true,
244
- });
245
- }
246
- // Single fast-responder call for the entire batch
247
- const fr = await Promise.race([
248
- fastRespond({
249
- systemPrompt: buildSystemPrompt(config, ownerConfig),
250
- messages: batch,
251
- activitySummary: activityTracker.getSummary(),
252
- apiKey: config.anthropicApiKey,
253
- }),
254
- new Promise((resolve) => setTimeout(() => resolve(null), FAST_RESPONDER_TIMEOUT_MS)),
255
- ]);
256
- if (fr) {
257
- // Build a lookup from the per-recipient replies
258
- const replyMap = new Map();
259
- for (const r of fr.replies) {
260
- replyMap.set(`${r.channel}::${r.sender}`, r.reply);
261
- }
262
- // Send each recipient their personalized reply
263
- const sent = new Set();
264
- for (const msg of batch) {
265
- const key = `${msg.channel}::${msg.sender}`;
266
- if (sent.has(key))
267
- continue;
268
- sent.add(key);
269
- const reply = replyMap.get(key);
270
- if (!reply)
271
- continue;
272
- try {
273
- await channelManager.sendMessage(msg.channel, msg.sender, reply);
274
- }
275
- catch (sendErr) {
276
- logger.warn(`Failed to send fast response: ${sendErr instanceof Error ? sendErr.message : String(sendErr)}`);
277
- }
278
- }
279
- logger.log("info", "fast_response", `Replied to ${sent.size} recipient(s) for ${batch.length} message(s)`, {
280
- messageCount: batch.length,
281
- recipientCount: sent.size,
282
- needsPrimaryAction: fr.needsPrimaryAction,
283
- });
284
- // Inject briefing into the primary session if follow-up is needed
285
- if (fr.needsPrimaryAction) {
286
- const briefingBlocks = buildInterruptWithBriefing({
287
- messages: batch,
288
- fastResponse: fr,
289
- });
290
- if (!session.injectMessage(briefingBlocks)) {
291
- requeue(batch);
292
- }
293
- }
294
- else {
295
- logger.log("info", "fast_response", `No follow-up needed for ${batch.length} message(s)`, {
296
- messageCount: batch.length,
297
- recipientCount: sent.size,
298
- needsPrimaryAction: fr.needsPrimaryAction,
299
- });
300
- }
301
- }
302
- else {
303
- // Fast responder failed — fall back to the original interrupt path
304
- const interruptBlocks = await buildInterruptContext({ messages: batch });
305
- if (!session.injectMessage(interruptBlocks)) {
306
- requeue(batch);
307
- }
308
- }
309
- }
310
- finally {
311
- interruptInFlight = false;
312
- // If more messages arrived while we were processing, schedule another batch
313
- // Re-check module-level state since it may have changed during async work.
314
- // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition -- agentState is module-level and may change across awaits
315
- if (!messageQueue.isEmpty() && agentState?.busy) {
316
- scheduleInterruptBatch();
317
- }
318
- }
319
- }
320
- function scheduleInterruptBatch() {
321
- if (interruptTimer !== null)
322
- return; // already scheduled
323
- interruptTimer = setTimeout(() => {
324
- interruptTimer = null;
325
- void processInterruptBatch();
326
- }, INTERRUPT_DEBOUNCE_MS);
327
- }
79
+ const interruptHandler = new InterruptHandler({
80
+ session,
81
+ config,
82
+ ownerConfig,
83
+ channelManager,
84
+ messageQueue,
85
+ activityTracker,
86
+ getAgentState: () => agentState,
87
+ });
328
88
  channelManager.on("message", () => {
329
- // If agent is sleeping, wake it up immediately
330
89
  if (wakeResolver !== null) {
331
90
  wakeResolver();
332
91
  wakeResolver = null;
333
92
  return;
334
93
  }
335
- // If agent is busy, schedule a debounced interrupt batch
336
- if (agentState?.busy) {
337
- scheduleInterruptBatch();
94
+ if (agentState.busy) {
95
+ interruptHandler.schedule();
338
96
  }
339
97
  });
98
+ // --- Channel command handlers ---
340
99
  channelManager.on("stop_request", (req) => {
341
- if (agentState?.busy && !agentState.stopRequested) {
100
+ if (agentState.busy && !agentState.stopRequested) {
342
101
  agentState.requestStop();
343
102
  logger.system(`Agent stop requested via ${req.channel} by ${req.sender}`);
344
103
  channelManager
@@ -355,6 +114,29 @@ export async function startAgentLoop(config) {
355
114
  });
356
115
  }
357
116
  });
117
+ channelManager.on("sleep_request", (req) => {
118
+ const minutes = Math.max(1, Math.min(1440, req.minutes));
119
+ const oldMs = ownerConfig.heartbeatIntervalMs;
120
+ ownerConfig.heartbeatIntervalMs = minutes * 60 * 1000;
121
+ saveOwnerConfig(ownerConfig);
122
+ logger.system(`Sleep interval changed from ${oldMs / 60000}m to ${minutes}m by ${req.sender} via ${req.channel}`);
123
+ channelManager
124
+ .sendMessage(req.channel, req.sender, `Sleep interval changed from ${oldMs / 60000}m to ${minutes}m.`)
125
+ .catch((err) => {
126
+ logger.warn(`Failed to send sleep confirmation: ${err instanceof Error ? err.message : String(err)}`);
127
+ });
128
+ });
129
+ channelManager.on("console_request", (req) => {
130
+ const url = agentState.consoleUrl;
131
+ const reply = url
132
+ ? `${url}/console`
133
+ : "Console URL is not available yet (tunnel may still be starting).";
134
+ channelManager
135
+ .sendMessage(req.channel, req.sender, reply)
136
+ .catch((err) => {
137
+ logger.warn(`Failed to send console URL: ${err instanceof Error ? err.message : String(err)}`);
138
+ });
139
+ });
358
140
  logger.system("Agent loop started. Waiting for messages or heartbeat...");
359
141
  // --- Main loop ---
360
142
  let firstIteration = true;
@@ -364,12 +146,10 @@ export async function startAgentLoop(config) {
364
146
  agentState.stopRequested = false;
365
147
  session.clearStop();
366
148
  activityTracker.clear();
367
- // Fire immediately on startup; sleep on subsequent iterations
368
149
  if (firstIteration) {
369
150
  firstIteration = false;
370
151
  }
371
152
  else {
372
- // Sleep until heartbeat or new message
373
153
  await new Promise((resolve) => {
374
154
  wakeResolver = resolve;
375
155
  const timer = setTimeout(() => {
@@ -387,33 +167,25 @@ export async function startAgentLoop(config) {
387
167
  }
388
168
  agentState.busy = true;
389
169
  activityTracker.clear();
390
- // Determine trigger
391
170
  const hasMessages = !messageQueue.isEmpty();
392
171
  const trigger = hasMessages ? "message" : "heartbeat";
393
- // Dequeue all pending messages
394
172
  const messages = [];
395
173
  while (!messageQueue.isEmpty()) {
396
174
  const msg = messageQueue.dequeue();
397
175
  if (msg)
398
176
  messages.push(msg);
399
177
  }
400
- // Track last message for notify_user
401
178
  if (messages.length > 0) {
402
179
  const last = messages[messages.length - 1];
403
180
  agentState.lastMessageChannel = last.channel;
404
181
  agentState.lastMessageSender = last.sender;
405
182
  }
406
- // Start tracking this wake cycle's activity
407
183
  const taskDesc = messages.length > 0
408
184
  ? messages.map((m) => `[${m.channel}] ${m.sender}: ${m.text}`).join("; ")
409
185
  : "heartbeat";
410
186
  activityTracker.startTask(taskDesc);
411
- // Log the wake event
412
187
  logger.wake(trigger, messages.length);
413
- // If this is a heartbeat wake (no new messages), it's a good time to compact.
414
188
  if (trigger === "heartbeat") {
415
- // After a turn completes we have a fresh usage snapshot; it's a safe time
416
- // to consider compaction.
417
189
  try {
418
190
  await session.maybeCompactByTokens();
419
191
  }
@@ -421,7 +193,6 @@ export async function startAgentLoop(config) {
421
193
  logger.warn(`Compaction request failed: ${err instanceof Error ? err.message : String(err)}`);
422
194
  }
423
195
  }
424
- // Log each incoming message
425
196
  for (const msg of messages) {
426
197
  logger.log("info", "incoming", `[${msg.channel}] ${msg.sender}: ${msg.text}`, {
427
198
  channel: msg.channel,
@@ -431,50 +202,8 @@ export async function startAgentLoop(config) {
431
202
  });
432
203
  }
433
204
  // --- Gather context ---
434
- // Gather screenshot, calendar, and version check in parallel
435
205
  const doVersionCheck = trigger === "heartbeat" && shouldNotifyUpdate();
436
- const [screenshotResult, calendarResult, versionResult] = await Promise.allSettled([
437
- (async () => {
438
- const t0 = Date.now();
439
- const base64 = await takeScreenshot();
440
- logger.debug(`Screenshot captured in ${Date.now() - t0}ms`);
441
- return { base64 };
442
- })(),
443
- (async () => {
444
- const t0 = Date.now();
445
- const summary = await getCalendarSummary(config);
446
- logger.debug(`Calendar fetched in ${Date.now() - t0}ms`);
447
- return summary;
448
- })(),
449
- (async () => {
450
- if (!doVersionCheck)
451
- return undefined;
452
- const t0 = Date.now();
453
- const result = await checkForUpdate();
454
- logger.debug(`Version check completed in ${Date.now() - t0}ms`);
455
- if (result.hasUpdate) {
456
- recordUpdateNotification(result.latestVersion);
457
- logger.system(`Update available: ${result.currentVersion} → ${result.latestVersion}`);
458
- }
459
- return result;
460
- })(),
461
- ]);
462
- const screenshot = screenshotResult.status === "fulfilled" ? screenshotResult.value : undefined;
463
- if (screenshotResult.status === "rejected") {
464
- logger.warn(`Screenshot failed: ${screenshotResult.reason instanceof Error ? screenshotResult.reason.message : String(screenshotResult.reason)}`);
465
- }
466
- if (screenshot?.base64) {
467
- logger.screenshot(screenshot.base64);
468
- }
469
- const calendarSummary = calendarResult.status === "fulfilled" ? calendarResult.value : "";
470
- if (calendarResult.status === "rejected") {
471
- logger.warn(`Calendar fetch failed: ${calendarResult.reason instanceof Error ? calendarResult.reason.message : String(calendarResult.reason)}`);
472
- }
473
- const versionUpdate = versionResult.status === "fulfilled" ? versionResult.value : undefined;
474
- if (versionResult.status === "rejected") {
475
- logger.debug(`Version check failed: ${versionResult.reason instanceof Error ? versionResult.reason.message : String(versionResult.reason)}`);
476
- }
477
- // Build multimodal content blocks
206
+ const { screenshot, calendarSummary, versionUpdate } = await gatherWakeContext(config, doVersionCheck);
478
207
  const contentBlocks = await buildWakeContext({
479
208
  config,
480
209
  trigger,
@@ -483,219 +212,25 @@ export async function startAgentLoop(config) {
483
212
  screenshot,
484
213
  versionUpdate,
485
214
  });
486
- // Map tool_use_id -> tool name for correlating results (per wake cycle)
487
- const toolNameById = new Map();
488
- // Log the multimodal message being sent (text + image summary)
489
215
  logger.sendMultimodal(contentBlocks);
216
+ if (trigger === "message") {
217
+ // send typing indicator to all channels
218
+ for (const msg of messages) {
219
+ channelManager.sendTypingIndicator(msg.channel, msg.sender).catch(() => { });
220
+ }
221
+ }
490
222
  try {
491
- // Send the multimodal content and stream the response.
492
- // Cast through unknown because our ContentBlock type may not
493
- // exactly match MessageParam["content"].
494
223
  const messageContent = contentBlocks;
495
224
  const queryStream = session.sendAndStream(messageContent);
496
- // Track Task -> subagent mapping so we can annotate logs for subagent activity.
497
- // Per Claude Agent SDK docs, subagents are invoked via the Task tool, and messages
498
- // emitted from within a subagent execution context include parent_tool_use_id.
499
- const subagentByTaskToolUseId = new Map();
500
- // Process the response stream.
501
- // We don't break on finish -- breaking calls generator.return() which
502
- // can hang waiting for the SDK child process to exit. Instead we
503
- // drain the stream naturally and skip logging after finish.
504
- let finishCalled = false;
505
- for await (const msg of queryStream) {
506
- // Always capture session ID
507
- if ("session_id" in msg && msg.session_id) {
508
- session.captureSessionId(msg.session_id);
509
- }
510
- // After finish, only log the result message and skip everything else
511
- if (finishCalled) {
512
- if (msg.type === "result") {
513
- logResult(msg);
514
- captureUsageSnapshotFromResult(session, msg);
515
- }
516
- continue;
517
- }
518
- // Handle different message types
519
- switch (msg.type) {
520
- case "user": {
521
- // In Claude Agent SDK v2, tool execution results are surfaced back
522
- // into the stream as "user" messages (not as a separate tool_result
523
- // message type). Capture and log them for OBS.
524
- const um = msg;
525
- const parentToolUseId = typeof um.parent_tool_use_id === "string" && um.parent_tool_use_id.length > 0
526
- ? um.parent_tool_use_id
527
- : undefined;
528
- const subagentMeta = parentToolUseId
529
- ? subagentByTaskToolUseId.get(parentToolUseId)
530
- : undefined;
531
- // Tool results in this SDK are encoded as user message content blocks:
532
- // [{ type: "tool_result", tool_use_id, content: [...] }]
533
- const blocks = um.message?.content;
534
- if (Array.isArray(blocks)) {
535
- for (const b of blocks) {
536
- const block = b;
537
- if (block.type !== "tool_result" || !block.tool_use_id)
538
- continue;
539
- // If this is the result of a Task tool invocation, extract agentId for correlation.
540
- // The SDK docs show agentId appearing in Task tool results; store it so subsequent
541
- // subagent events (which reference parent_tool_use_id) can include it.
542
- const maybeTask = subagentByTaskToolUseId.get(block.tool_use_id);
543
- if (maybeTask) {
544
- const contentStr = block.content === undefined
545
- ? ""
546
- : typeof block.content === "string"
547
- ? block.content
548
- : JSON.stringify(block.content);
549
- const re = /agentId:\s*([a-f0-9-]+)/i;
550
- const match = re.exec(contentStr);
551
- if (match?.[1]) {
552
- subagentByTaskToolUseId.set(block.tool_use_id, {
553
- ...maybeTask,
554
- agentId: match[1],
555
- });
556
- }
557
- }
558
- const out = block.content;
559
- const summary = out === undefined
560
- ? ""
561
- : typeof out === "string"
562
- ? out
563
- : JSON.stringify(out);
564
- const resolvedName = toolNameById.get(block.tool_use_id) ?? "tool";
565
- logger.toolResult(resolvedName, summary, {
566
- tool_use_id: block.tool_use_id,
567
- is_error: block.is_error,
568
- source: "sdk_user_message_block",
569
- ...(parentToolUseId ? { parent_tool_use_id: parentToolUseId } : {}),
570
- ...(subagentMeta?.subagent_type
571
- ? { subagent_type: subagentMeta.subagent_type }
572
- : {}),
573
- ...(subagentMeta?.agentId ? { subagent_agent_id: subagentMeta.agentId } : {}),
574
- });
575
- if (!parentToolUseId) {
576
- activityTracker.recordToolResult(resolvedName, summary);
577
- }
578
- }
579
- }
580
- break;
581
- }
582
- case "assistant": {
583
- const assistantMsg = msg;
584
- const content = assistantMsg.message.content;
585
- const parentToolUseIdRaw = assistantMsg
586
- .parent_tool_use_id;
587
- const parentToolUseId = typeof parentToolUseIdRaw === "string" && parentToolUseIdRaw.length > 0
588
- ? parentToolUseIdRaw
589
- : undefined;
590
- const subagentMeta = parentToolUseId
591
- ? subagentByTaskToolUseId.get(parentToolUseId)
592
- : undefined;
593
- for (const block of content) {
594
- switch (block.type) {
595
- case "text":
596
- if (block.text) {
597
- // If this text is produced inside a subagent, annotate it.
598
- if (parentToolUseId) {
599
- const tag = subagentMeta?.subagent_type
600
- ? `subagent:${subagentMeta.subagent_type}`
601
- : "subagent";
602
- logger.assistant(`[${tag}] ${block.text}`);
603
- }
604
- else {
605
- logger.assistant(block.text);
606
- activityTracker.recordAssistantText(block.text);
607
- }
608
- }
609
- break;
610
- case "thinking":
611
- logger.debug(`[thinking] ${block.thinking.substring(0, 200)}${block.thinking.length > 200 ? "..." : ""}`);
612
- break;
613
- case "tool_use":
614
- {
615
- const toolUseId = block.id ??
616
- block.tool_use_id;
617
- if (toolUseId) {
618
- toolNameById.set(toolUseId, block.name);
619
- }
620
- // Detect subagent invocation via Task tool_use blocks.
621
- if (block.name === "Task" && toolUseId) {
622
- const input = block.input;
623
- const subagent_type = typeof input.subagent_type === "string"
624
- ? (input.subagent_type)
625
- : undefined;
626
- subagentByTaskToolUseId.set(toolUseId, { subagent_type });
627
- }
628
- logger.toolCall(block.name, block.input, {
629
- tool_use_id: toolUseId,
630
- ...(parentToolUseId ? { parent_tool_use_id: parentToolUseId } : {}),
631
- ...(subagentMeta?.subagent_type
632
- ? { subagent_type: subagentMeta.subagent_type }
633
- : {}),
634
- ...(subagentMeta?.agentId ? { subagent_agent_id: subagentMeta.agentId } : {}),
635
- });
636
- if (!parentToolUseId) {
637
- const inputStr = block.input ? JSON.stringify(block.input) : "";
638
- activityTracker.recordToolCall(block.name, inputStr);
639
- }
640
- }
641
- if (block.name === "finish" || block.name === "mcp__visionclaw__finish") {
642
- finishCalled = true;
643
- session.closeInput();
644
- }
645
- break;
646
- default:
647
- // Log other block types (server_tool_use, mcp_tool_use, etc.)
648
- logger.debug(`Assistant block: ${block.type}`);
649
- break;
650
- }
651
- }
652
- // Log assistant-level errors
653
- if (assistantMsg.error) {
654
- logger.err(`Assistant error: ${assistantMsg.error}`);
655
- }
656
- break;
657
- }
658
- case "result":
659
- logResult(msg);
660
- captureUsageSnapshotFromResult(session, msg);
661
- break;
662
- case "system":
663
- logger.debug(`System message: ${JSON.stringify(msg)}`, {
664
- subtype: msg.subtype,
665
- });
666
- break;
667
- default:
668
- // stream_event, status, task_notification, etc. — skip silently
669
- // Also handle tool_result even if the SDK type union doesn't include it.
670
- if (msg.type === "tool_result") {
671
- const tr = msg;
672
- const name = tr.name ?? "tool";
673
- const summary = tr.content === undefined
674
- ? ""
675
- : typeof tr.content === "string"
676
- ? tr.content
677
- : JSON.stringify(tr.content);
678
- logger.toolResult(name, summary, {
679
- tool_use_id: tr.tool_use_id,
680
- is_error: tr.is_error,
681
- });
682
- }
683
- break;
684
- }
685
- }
686
- // Ensure generator is closed after stream ends
687
- session.closeInput();
225
+ await processAgentStream(queryStream, session, activityTracker);
688
226
  }
689
227
  catch (err) {
690
- // Ensure generator is closed on error too
691
228
  session.closeInput();
692
- // When the user requested a stop, the SDK throws an AbortError — this is expected.
693
229
  if (session.isStopRequested()) {
694
230
  logger.system("Agent stopped by user request");
695
231
  }
696
232
  else {
697
233
  logger.err(`Agent loop error: ${err instanceof Error ? err.message : String(err)}`, { stack: err instanceof Error ? err.stack : undefined });
698
- // Wait a bit before retrying to avoid tight error loops
699
234
  await new Promise((r) => setTimeout(r, 5000));
700
235
  }
701
236
  }