@dotsetlabs/dotclaw 1.5.2 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +6 -3
  2. package/config-examples/runtime.json +9 -8
  3. package/config-examples/tool-policy.json +6 -0
  4. package/container/agent-runner/package-lock.json +2 -2
  5. package/container/agent-runner/package.json +1 -1
  6. package/container/agent-runner/src/agent-config.ts +6 -13
  7. package/container/agent-runner/src/container-protocol.ts +0 -6
  8. package/container/agent-runner/src/id.ts +4 -0
  9. package/container/agent-runner/src/index.ts +175 -178
  10. package/container/agent-runner/src/ipc.ts +3 -15
  11. package/container/agent-runner/src/prompt-packs.ts +5 -209
  12. package/container/agent-runner/src/tools.ts +6 -5
  13. package/dist/agent-execution.d.ts +0 -6
  14. package/dist/agent-execution.d.ts.map +1 -1
  15. package/dist/agent-execution.js +2 -2
  16. package/dist/agent-execution.js.map +1 -1
  17. package/dist/background-jobs.d.ts +1 -0
  18. package/dist/background-jobs.d.ts.map +1 -1
  19. package/dist/background-jobs.js +18 -3
  20. package/dist/background-jobs.js.map +1 -1
  21. package/dist/behavior-config.d.ts +0 -1
  22. package/dist/behavior-config.d.ts.map +1 -1
  23. package/dist/behavior-config.js +0 -3
  24. package/dist/behavior-config.js.map +1 -1
  25. package/dist/cli.js +294 -41
  26. package/dist/cli.js.map +1 -1
  27. package/dist/config.d.ts +1 -0
  28. package/dist/config.d.ts.map +1 -1
  29. package/dist/config.js +1 -0
  30. package/dist/config.js.map +1 -1
  31. package/dist/container-protocol.d.ts +0 -6
  32. package/dist/container-protocol.d.ts.map +1 -1
  33. package/dist/container-runner.d.ts +5 -0
  34. package/dist/container-runner.d.ts.map +1 -1
  35. package/dist/container-runner.js +44 -2
  36. package/dist/container-runner.js.map +1 -1
  37. package/dist/dashboard.js +1 -1
  38. package/dist/dashboard.js.map +1 -1
  39. package/dist/db.d.ts +19 -1
  40. package/dist/db.d.ts.map +1 -1
  41. package/dist/db.js +130 -28
  42. package/dist/db.js.map +1 -1
  43. package/dist/id.d.ts +2 -0
  44. package/dist/id.d.ts.map +1 -0
  45. package/dist/id.js +4 -0
  46. package/dist/id.js.map +1 -0
  47. package/dist/index.js +152 -272
  48. package/dist/index.js.map +1 -1
  49. package/dist/json-helpers.d.ts +1 -0
  50. package/dist/json-helpers.d.ts.map +1 -1
  51. package/dist/json-helpers.js +33 -1
  52. package/dist/json-helpers.js.map +1 -1
  53. package/dist/maintenance.d.ts +1 -0
  54. package/dist/maintenance.d.ts.map +1 -1
  55. package/dist/maintenance.js +13 -3
  56. package/dist/maintenance.js.map +1 -1
  57. package/dist/memory-embeddings.d.ts +1 -0
  58. package/dist/memory-embeddings.d.ts.map +1 -1
  59. package/dist/memory-embeddings.js +10 -1
  60. package/dist/memory-embeddings.js.map +1 -1
  61. package/dist/memory-store.d.ts.map +1 -1
  62. package/dist/memory-store.js +2 -1
  63. package/dist/memory-store.js.map +1 -1
  64. package/dist/metrics.d.ts +1 -0
  65. package/dist/metrics.d.ts.map +1 -1
  66. package/dist/metrics.js +16 -2
  67. package/dist/metrics.js.map +1 -1
  68. package/dist/paths.d.ts +4 -2
  69. package/dist/paths.d.ts.map +1 -1
  70. package/dist/paths.js +4 -2
  71. package/dist/paths.js.map +1 -1
  72. package/dist/runtime-config.d.ts +4 -7
  73. package/dist/runtime-config.d.ts.map +1 -1
  74. package/dist/runtime-config.js +13 -16
  75. package/dist/runtime-config.js.map +1 -1
  76. package/dist/task-scheduler.d.ts +1 -0
  77. package/dist/task-scheduler.d.ts.map +1 -1
  78. package/dist/task-scheduler.js +10 -1
  79. package/dist/task-scheduler.js.map +1 -1
  80. package/dist/types.d.ts +14 -0
  81. package/dist/types.d.ts.map +1 -1
  82. package/package.json +6 -1
package/dist/index.js CHANGED
@@ -3,31 +3,32 @@ import { Telegraf } from 'telegraf';
3
3
  import { execSync } from 'child_process';
4
4
  import fs from 'fs';
5
5
  import path from 'path';
6
- import { DATA_DIR, MAIN_GROUP_FOLDER, GROUPS_DIR, IPC_POLL_INTERVAL, TIMEZONE, CONTAINER_MODE, WARM_START_ENABLED, ENV_PATH } from './config.js';
6
+ import { DATA_DIR, MAIN_GROUP_FOLDER, GROUPS_DIR, IPC_POLL_INTERVAL, TIMEZONE, CONTAINER_MODE, WARM_START_ENABLED, ENV_PATH, BATCH_WINDOW_MS } from './config.js';
7
7
  // Load .env from the canonical location (~/.dotclaw/.env)
8
8
  dotenv.config({ path: ENV_PATH });
9
- import { initDatabase, storeMessage, upsertChat, getMessagesSinceCursor, getChatState, updateChatState, createTask, updateTask, deleteTask, getTaskById, getAllGroupSessions, setGroupSession, deleteGroupSession, pauseTasksForGroup, getBackgroundJobQueuePosition, getBackgroundJobQueueDepth, linkMessageToTrace, getTraceIdForMessage, recordUserFeedback } from './db.js';
10
- import { startSchedulerLoop, runTaskNow } from './task-scheduler.js';
11
- import { startBackgroundJobLoop, spawnBackgroundJob, getBackgroundJobStatus, listBackgroundJobsForGroup, cancelBackgroundJob, recordBackgroundJobUpdate } from './background-jobs.js';
9
+ import { initDatabase, closeDatabase, storeMessage, upsertChat, getMessagesSinceCursor, getChatState, updateChatState, createTask, updateTask, deleteTask, getTaskById, getAllGroupSessions, setGroupSession, deleteGroupSession, pauseTasksForGroup, getBackgroundJobQueuePosition, getBackgroundJobQueueDepth, linkMessageToTrace, getTraceIdForMessage, recordUserFeedback, enqueueMessageItem, claimBatchForChat, completeQueuedMessages, failQueuedMessages, getChatsWithPendingMessages, resetStalledMessages } from './db.js';
10
+ import { startSchedulerLoop, stopSchedulerLoop, runTaskNow } from './task-scheduler.js';
11
+ import { startBackgroundJobLoop, stopBackgroundJobLoop, spawnBackgroundJob, getBackgroundJobStatus, listBackgroundJobsForGroup, cancelBackgroundJob, recordBackgroundJobUpdate } from './background-jobs.js';
12
12
  import { loadJson, saveJson, isSafeGroupFolder } from './utils.js';
13
13
  import { writeTrace } from './trace-writer.js';
14
14
  import { formatTelegramMessage, TELEGRAM_PARSE_MODE } from './telegram-format.js';
15
15
  import { initMemoryStore, getMemoryStats, upsertMemoryItems, searchMemories, listMemories, forgetMemories, cleanupExpiredMemories } from './memory-store.js';
16
- import { startEmbeddingWorker } from './memory-embeddings.js';
16
+ import { startEmbeddingWorker, stopEmbeddingWorker } from './memory-embeddings.js';
17
17
  import { createProgressManager, DEFAULT_PROGRESS_MESSAGES, DEFAULT_PROGRESS_STAGES, formatProgressWithPlan, formatPlanStepList } from './progress.js';
18
18
  import { parseAdminCommand } from './admin-commands.js';
19
19
  import { loadModelRegistry, saveModelRegistry } from './model-registry.js';
20
- import { startMetricsServer, recordMessage, recordError, recordRoutingDecision, recordStageLatency } from './metrics.js';
21
- import { startMaintenanceLoop } from './maintenance.js';
22
- import { warmGroupContainer, startDaemonHealthCheckLoop } from './container-runner.js';
20
+ import { startMetricsServer, stopMetricsServer, recordMessage, recordError, recordRoutingDecision, recordStageLatency } from './metrics.js';
21
+ import { startMaintenanceLoop, stopMaintenanceLoop } from './maintenance.js';
22
+ import { warmGroupContainer, startDaemonHealthCheckLoop, stopDaemonHealthCheckLoop, cleanupInstanceContainers } from './container-runner.js';
23
23
  import { loadRuntimeConfig } from './runtime-config.js';
24
24
  import { createTraceBase, executeAgentRun, recordAgentTelemetry, AgentExecutionError } from './agent-execution.js';
25
25
  import { logger } from './logger.js';
26
- import { startDashboard, setTelegramConnected, setLastMessageTime, setMessageQueueDepth } from './dashboard.js';
26
+ import { startDashboard, stopDashboard, setTelegramConnected, setLastMessageTime, setMessageQueueDepth } from './dashboard.js';
27
27
  import { humanizeError } from './error-messages.js';
28
28
  import { classifyBackgroundJob } from './background-job-classifier.js';
29
29
  import { routeRequest, routePrompt } from './request-router.js';
30
30
  import { probePlanner } from './planner-probe.js';
31
+ import { generateId } from './id.js';
31
32
  const runtime = loadRuntimeConfig();
32
33
  function buildTriggerRegex(pattern) {
33
34
  if (!pattern)
@@ -142,9 +143,6 @@ setInterval(cleanupRateLimiter, 60_000);
142
143
  const TELEGRAM_HANDLER_TIMEOUT_MS = runtime.host.telegram.handlerTimeoutMs;
143
144
  const TELEGRAM_SEND_RETRIES = runtime.host.telegram.sendRetries;
144
145
  const TELEGRAM_SEND_RETRY_DELAY_MS = runtime.host.telegram.sendRetryDelayMs;
145
- const TELEGRAM_STREAM_MODE = runtime.host.telegram.streamMode.toLowerCase();
146
- const TELEGRAM_STREAM_MIN_INTERVAL_MS = runtime.host.telegram.streamMinIntervalMs;
147
- const TELEGRAM_STREAM_MIN_CHARS = runtime.host.telegram.streamMinChars;
148
146
  const MEMORY_RECALL_MAX_RESULTS = runtime.host.memory.recall.maxResults;
149
147
  const MEMORY_RECALL_MAX_TOKENS = runtime.host.memory.recall.maxTokens;
150
148
  const INPUT_MESSAGE_MAX_CHARS = runtime.host.telegram.inputMessageMaxChars;
@@ -170,7 +168,7 @@ let sessions = {};
170
168
  let registeredGroups = {};
171
169
  const TELEGRAM_MAX_MESSAGE_LENGTH = 4000;
172
170
  const TELEGRAM_SEND_DELAY_MS = 250;
173
- const messageQueues = new Map();
171
+ const activeDrains = new Set();
174
172
  const activeRuns = new Map();
175
173
  function isCancelMessage(content) {
176
174
  if (!content)
@@ -229,21 +227,6 @@ function inferPlanStepIndex(stage, totalSteps) {
229
227
  return 1;
230
228
  }
231
229
  }
232
- const draftSessions = new Map();
233
- function parseTelegramStreamMode(value) {
234
- const normalized = value.trim().toLowerCase();
235
- if (normalized === 'draft' || normalized === 'edit' || normalized === 'auto' || normalized === 'off') {
236
- return normalized;
237
- }
238
- return 'off';
239
- }
240
- function getDraftKey(chatId, draftId) {
241
- return `${chatId}:${draftId}`;
242
- }
243
- function createDraftId() {
244
- const max = 2_147_483_647;
245
- return Math.floor(Math.random() * (max - 1)) + 1;
246
- }
247
230
  async function setTyping(chatId) {
248
231
  try {
249
232
  await telegrafBot.telegram.sendChatAction(chatId, 'typing');
@@ -252,34 +235,6 @@ async function setTyping(chatId) {
252
235
  logger.debug({ chatId, err }, 'Failed to set typing indicator');
253
236
  }
254
237
  }
255
- function canUseTelegramDraft(msg) {
256
- return msg.chatType === 'private' && Number.isFinite(msg.messageThreadId);
257
- }
258
- function registerDraftSession(msg) {
259
- const mode = parseTelegramStreamMode(TELEGRAM_STREAM_MODE);
260
- if (mode === 'off')
261
- return null;
262
- if (msg.chatType !== 'private')
263
- return null;
264
- const supportsDraft = canUseTelegramDraft(msg);
265
- const resolvedMode = mode === 'auto'
266
- ? (supportsDraft ? 'draft' : 'edit')
267
- : (mode === 'draft' ? (supportsDraft ? 'draft' : 'edit') : (mode === 'edit' ? 'edit' : null));
268
- if (!resolvedMode)
269
- return null;
270
- const draftId = createDraftId();
271
- draftSessions.set(getDraftKey(msg.chatId, draftId), {
272
- mode: resolvedMode,
273
- messageThreadId: msg.messageThreadId,
274
- started: false,
275
- lastSentAt: 0,
276
- lastChunk: undefined
277
- });
278
- return { mode: resolvedMode, draftId };
279
- }
280
- function clearDraftSession(chatId, draftId) {
281
- draftSessions.delete(getDraftKey(chatId, draftId));
282
- }
283
238
  function sleep(ms) {
284
239
  return new Promise(resolve => setTimeout(resolve, ms));
285
240
  }
@@ -511,113 +466,6 @@ async function sendMessage(chatId, text, options) {
511
466
  return { success: false };
512
467
  }
513
468
  }
514
- async function sendDraftUpdate(chatId, draftId, text) {
515
- const key = getDraftKey(chatId, draftId);
516
- const session = draftSessions.get(key);
517
- if (!session)
518
- return;
519
- if (!text || !text.trim())
520
- return;
521
- const now = Date.now();
522
- if (now - session.lastSentAt < TELEGRAM_STREAM_MIN_INTERVAL_MS)
523
- return;
524
- session.lastSentAt = now;
525
- const chunk = splitPlainText(text, TELEGRAM_MAX_MESSAGE_LENGTH)[0] ?? '';
526
- if (!chunk)
527
- return;
528
- if (session.lastChunk === chunk)
529
- return;
530
- session.lastChunk = chunk;
531
- if (session.mode === 'draft') {
532
- try {
533
- await telegrafBot.telegram
534
- .callApi('sendMessageDraft', {
535
- chat_id: chatId,
536
- draft_id: draftId,
537
- text: chunk,
538
- message_thread_id: session.messageThreadId
539
- });
540
- session.started = true;
541
- return;
542
- }
543
- catch (err) {
544
- logger.warn({ chatId, err }, 'sendMessageDraft failed; switching to edit fallback');
545
- session.mode = 'edit';
546
- }
547
- }
548
- if (!session.messageId) {
549
- try {
550
- const payload = {};
551
- if (session.messageThreadId)
552
- payload.message_thread_id = session.messageThreadId;
553
- const sent = await telegrafBot.telegram.sendMessage(chatId, chunk, payload);
554
- session.messageId = sent.message_id;
555
- session.started = true;
556
- return;
557
- }
558
- catch (err) {
559
- logger.warn({ chatId, err }, 'Failed to send draft placeholder');
560
- return;
561
- }
562
- }
563
- try {
564
- await telegrafBot.telegram.editMessageText(chatId, session.messageId, undefined, chunk);
565
- session.started = true;
566
- }
567
- catch (err) {
568
- logger.debug({ chatId, err }, 'Failed to edit draft message');
569
- }
570
- }
571
- function isTelegramNotModifiedError(err) {
572
- const description = err?.response?.description;
573
- if (typeof description === 'string' && description.toLowerCase().includes('message is not modified')) {
574
- return true;
575
- }
576
- return false;
577
- }
578
- async function finalizeStreamedMessage(msg, draftId, text) {
579
- if (!draftId) {
580
- await sendMessage(msg.chatId, text, { messageThreadId: msg.messageThreadId });
581
- return;
582
- }
583
- const key = getDraftKey(msg.chatId, draftId);
584
- const session = draftSessions.get(key);
585
- if (!session) {
586
- await sendMessage(msg.chatId, text, { messageThreadId: msg.messageThreadId });
587
- return;
588
- }
589
- if (session.mode === 'edit' && session.messageId) {
590
- const chunks = formatTelegramMessage(text, TELEGRAM_MAX_MESSAGE_LENGTH);
591
- if (chunks.length === 0) {
592
- clearDraftSession(msg.chatId, draftId);
593
- return;
594
- }
595
- const firstChunk = chunks[0];
596
- const firstChunkMatches = session.lastChunk === firstChunk;
597
- try {
598
- if (!firstChunkMatches) {
599
- await telegrafBot.telegram.editMessageText(msg.chatId, session.messageId, undefined, firstChunk, { parse_mode: TELEGRAM_PARSE_MODE });
600
- }
601
- for (let i = firstChunkMatches ? 1 : 1; i < chunks.length; i += 1) {
602
- await sendMessage(msg.chatId, chunks[i], { messageThreadId: msg.messageThreadId });
603
- }
604
- clearDraftSession(msg.chatId, draftId);
605
- return;
606
- }
607
- catch (err) {
608
- if (isTelegramNotModifiedError(err)) {
609
- for (let i = firstChunkMatches ? 1 : 1; i < chunks.length; i += 1) {
610
- await sendMessage(msg.chatId, chunks[i], { messageThreadId: msg.messageThreadId });
611
- }
612
- clearDraftSession(msg.chatId, draftId);
613
- return;
614
- }
615
- logger.warn({ chatId: msg.chatId, err }, 'Failed to finalize streamed edit; sending new message');
616
- }
617
- }
618
- await sendMessage(msg.chatId, text, { messageThreadId: msg.messageThreadId });
619
- clearDraftSession(msg.chatId, draftId);
620
- }
621
469
  function enqueueMessage(msg) {
622
470
  if (isCancelMessage(msg.content)) {
623
471
  const controller = activeRuns.get(msg.chatId);
@@ -630,43 +478,59 @@ function enqueueMessage(msg) {
630
478
  void sendMessage(msg.chatId, 'There is no active request to cancel.', { messageThreadId: msg.messageThreadId });
631
479
  return;
632
480
  }
633
- const existing = messageQueues.get(msg.chatId);
634
- if (existing) {
635
- existing.pendingMessage = msg;
636
- if (!existing.inFlight) {
637
- void drainQueue(msg.chatId);
638
- }
639
- setMessageQueueDepth(messageQueues.size);
640
- return;
481
+ enqueueMessageItem({
482
+ chat_jid: msg.chatId,
483
+ message_id: msg.messageId,
484
+ sender_id: msg.senderId,
485
+ sender_name: msg.senderName,
486
+ content: msg.content,
487
+ timestamp: msg.timestamp,
488
+ is_group: msg.isGroup,
489
+ chat_type: msg.chatType,
490
+ message_thread_id: msg.messageThreadId
491
+ });
492
+ setMessageQueueDepth(activeDrains.size);
493
+ if (!activeDrains.has(msg.chatId)) {
494
+ void drainQueue(msg.chatId);
641
495
  }
642
- messageQueues.set(msg.chatId, { inFlight: false, pendingMessage: msg });
643
- setMessageQueueDepth(messageQueues.size);
644
- void drainQueue(msg.chatId);
645
496
  }
646
497
  async function drainQueue(chatId) {
647
- const state = messageQueues.get(chatId);
648
- if (!state || state.inFlight)
498
+ if (activeDrains.has(chatId))
649
499
  return;
650
- state.inFlight = true;
500
+ activeDrains.add(chatId);
501
+ setMessageQueueDepth(activeDrains.size);
651
502
  try {
652
- while (state.pendingMessage) {
653
- const next = state.pendingMessage;
654
- state.pendingMessage = undefined;
655
- await processMessage(next);
503
+ while (true) {
504
+ const batch = claimBatchForChat(chatId, BATCH_WINDOW_MS);
505
+ if (batch.length === 0)
506
+ break;
507
+ const last = batch[batch.length - 1];
508
+ const triggerMsg = {
509
+ chatId: last.chat_jid,
510
+ messageId: last.message_id,
511
+ senderId: last.sender_id,
512
+ senderName: last.sender_name,
513
+ content: last.content,
514
+ timestamp: last.timestamp,
515
+ isGroup: last.is_group === 1,
516
+ chatType: last.chat_type,
517
+ messageThreadId: last.message_thread_id ?? undefined
518
+ };
519
+ const batchIds = batch.map(b => b.id);
520
+ try {
521
+ await processMessage(triggerMsg);
522
+ completeQueuedMessages(batchIds);
523
+ }
524
+ catch (err) {
525
+ const errMsg = err instanceof Error ? err.message : String(err);
526
+ failQueuedMessages(batchIds, errMsg);
527
+ logger.error({ chatId, err }, 'Error processing message batch');
528
+ }
656
529
  }
657
530
  }
658
- catch (err) {
659
- logger.error({ chatId, err }, 'Error draining message queue');
660
- }
661
531
  finally {
662
- state.inFlight = false;
663
- if (state.pendingMessage) {
664
- void drainQueue(chatId);
665
- }
666
- else {
667
- messageQueues.delete(chatId);
668
- }
669
- setMessageQueueDepth(messageQueues.size);
532
+ activeDrains.delete(chatId);
533
+ setMessageQueueDepth(activeDrains.size);
670
534
  }
671
535
  }
672
536
  async function processMessage(msg) {
@@ -677,9 +541,12 @@ async function processMessage(msg) {
677
541
  }
678
542
  recordMessage('telegram');
679
543
  setLastMessageTime(msg.timestamp);
680
- // Get all messages since last agent interaction so the session has full context
544
+ // Get messages since last agent interaction, filtered to only include
545
+ // messages up to and including the triggering message (not future queued ones)
681
546
  const chatState = getChatState(msg.chatId);
682
547
  let missedMessages = getMessagesSinceCursor(msg.chatId, chatState?.last_agent_timestamp || null, chatState?.last_agent_message_id || null);
548
+ missedMessages = missedMessages.filter(m => m.timestamp < msg.timestamp ||
549
+ (m.timestamp === msg.timestamp && m.id <= msg.messageId));
683
550
  if (missedMessages.length === 0) {
684
551
  logger.warn({ chatId: msg.chatId }, 'No missed messages found; falling back to current message');
685
552
  missedMessages = [{
@@ -729,9 +596,6 @@ ${lines.join('\n')}
729
596
  logger.info({ group: group.name, messageCount: missedMessages.length }, 'Processing message');
730
597
  await setTyping(msg.chatId);
731
598
  const recallQuery = missedMessages.map(entry => entry.content).join('\n');
732
- const draftSession = registerDraftSession(msg);
733
- const draftId = draftSession?.draftId ?? null;
734
- const streamingEnabled = Boolean(draftSession && draftId);
735
599
  let output = null;
736
600
  let context = null;
737
601
  let errorMessage = null;
@@ -799,12 +663,7 @@ ${lines.join('\n')}
799
663
  : '';
800
664
  const planLine = planPreview ? `\n\nPlanned steps:\n${planPreview}` : '';
801
665
  await sendMessage(msg.chatId, `Queued this as background job ${result.jobId}. I'll report back when it's done. You can keep chatting while it runs.${queueLine}${etaLine}${detailLine}${planLine}`, { messageThreadId: msg.messageThreadId });
802
- if (lastMessage) {
803
- updateChatState(msg.chatId, lastMessage.timestamp, lastMessage.id);
804
- }
805
- if (draftId) {
806
- clearDraftSession(msg.chatId, draftId);
807
- }
666
+ updateChatState(msg.chatId, msg.timestamp, msg.messageId);
808
667
  return true;
809
668
  };
810
669
  let plannerProbeTools = [];
@@ -897,7 +756,7 @@ ${lines.join('\n')}
897
756
  });
898
757
  const planStepIndex = inferPlanStepIndex(predictedStage, plannerProbeSteps.length);
899
758
  const progressManager = createProgressManager({
900
- enabled: routingDecision.progress.enabled && !streamingEnabled,
759
+ enabled: routingDecision.progress.enabled,
901
760
  initialDelayMs: routingDecision.progress.initialMs,
902
761
  intervalMs: routingDecision.progress.intervalMs,
903
762
  maxUpdates: routingDecision.progress.maxUpdates,
@@ -911,7 +770,7 @@ ${lines.join('\n')}
911
770
  });
912
771
  progressManager.start();
913
772
  let sentPlan = false;
914
- if (predictedMs && predictedMs >= 10_000 && routingDecision.progress.enabled && !streamingEnabled) {
773
+ if (predictedMs && predictedMs >= 10_000 && routingDecision.progress.enabled) {
915
774
  if (plannerProbeSteps.length > 0) {
916
775
  const planMessage = formatProgressWithPlan({
917
776
  steps: plannerProbeSteps,
@@ -966,14 +825,6 @@ ${lines.join('\n')}
966
825
  toolDeny: routingDecision.toolDeny,
967
826
  sessionId: sessions[group.folder],
968
827
  onSessionUpdate: (sessionId) => { sessions[group.folder] = sessionId; },
969
- streaming: streamingEnabled && draftId
970
- ? {
971
- enabled: true,
972
- draftId,
973
- minIntervalMs: TELEGRAM_STREAM_MIN_INTERVAL_MS,
974
- minChars: TELEGRAM_STREAM_MIN_CHARS
975
- }
976
- : undefined,
977
828
  availableGroups: buildAvailableGroupsSnapshot(),
978
829
  modelOverride: routingDecision.modelOverride,
979
830
  modelMaxOutputTokens: routingDecision.maxOutputTokens,
@@ -1053,9 +904,6 @@ ${lines.join('\n')}
1053
904
  }
1054
905
  const userMessage = humanizeError(errorMessage || 'Unknown error');
1055
906
  await sendMessage(msg.chatId, userMessage, { messageThreadId: msg.messageThreadId });
1056
- if (draftId) {
1057
- clearDraftSession(msg.chatId, draftId);
1058
- }
1059
907
  return false;
1060
908
  }
1061
909
  if (output.status === 'error') {
@@ -1081,24 +929,12 @@ ${lines.join('\n')}
1081
929
  }
1082
930
  const userMessage = humanizeError(errorText);
1083
931
  await sendMessage(msg.chatId, userMessage, { messageThreadId: msg.messageThreadId });
1084
- if (draftId) {
1085
- clearDraftSession(msg.chatId, draftId);
1086
- }
1087
932
  return false;
1088
933
  }
1089
- if (lastMessage) {
1090
- updateChatState(msg.chatId, lastMessage.timestamp, lastMessage.id);
1091
- }
934
+ updateChatState(msg.chatId, msg.timestamp, msg.messageId);
1092
935
  if (output.result && output.result.trim()) {
1093
- let sentMessageId;
1094
- if (streamingEnabled && draftId) {
1095
- await finalizeStreamedMessage(msg, draftId, output.result);
1096
- // Note: streaming doesn't easily give us the message ID
1097
- }
1098
- else {
1099
- const sendResult = await sendMessage(msg.chatId, output.result, { messageThreadId: msg.messageThreadId });
1100
- sentMessageId = sendResult.messageId;
1101
- }
936
+ const sendResult = await sendMessage(msg.chatId, output.result, { messageThreadId: msg.messageThreadId });
937
+ const sentMessageId = sendResult.messageId;
1102
938
  // Link the sent message to the trace for feedback tracking
1103
939
  if (sentMessageId) {
1104
940
  try {
@@ -1128,15 +964,10 @@ ${lines.join('\n')}
1128
964
  }
1129
965
  }
1130
966
  await sendMessage(msg.chatId, 'I hit my tool-call step limit before I could finish. If you want me to keep going, please narrow the scope or ask for a specific subtask.', { messageThreadId: msg.messageThreadId });
1131
- if (draftId) {
1132
- clearDraftSession(msg.chatId, draftId);
1133
- }
1134
967
  }
1135
968
  else {
1136
969
  logger.warn({ chatId: msg.chatId }, 'Agent returned empty/whitespace response');
1137
- if (draftId) {
1138
- clearDraftSession(msg.chatId, draftId);
1139
- }
970
+ await sendMessage(msg.chatId, "I wasn't able to generate a response. Please try rephrasing your message.", { messageThreadId: msg.messageThreadId });
1140
971
  }
1141
972
  if (context) {
1142
973
  recordAgentTelemetry({
@@ -1150,12 +981,26 @@ ${lines.join('\n')}
1150
981
  }
1151
982
  return true;
1152
983
  }
984
+ let ipcWatcher = null;
985
+ let ipcPollingTimer = null;
986
+ let ipcStopped = false;
987
+ function stopIpcWatcher() {
988
+ ipcStopped = true;
989
+ if (ipcWatcher) {
990
+ ipcWatcher.close();
991
+ ipcWatcher = null;
992
+ }
993
+ if (ipcPollingTimer) {
994
+ clearTimeout(ipcPollingTimer);
995
+ ipcPollingTimer = null;
996
+ }
997
+ }
1153
998
  function startIpcWatcher() {
1154
999
  const ipcBaseDir = path.join(DATA_DIR, 'ipc');
1155
1000
  fs.mkdirSync(ipcBaseDir, { recursive: true });
1001
+ ipcStopped = false;
1156
1002
  let processing = false;
1157
1003
  let scheduled = false;
1158
- let pollingTimer = null;
1159
1004
  const processIpcFiles = async () => {
1160
1005
  if (processing)
1161
1006
  return;
@@ -1187,23 +1032,12 @@ function startIpcWatcher() {
1187
1032
  const filePath = path.join(messagesDir, file);
1188
1033
  try {
1189
1034
  const data = JSON.parse(fs.readFileSync(filePath, 'utf-8'));
1190
- if ((data.type === 'message' || data.type === 'message_draft') && data.chatJid && data.text) {
1035
+ if (data.type === 'message' && data.chatJid && data.text) {
1191
1036
  // Authorization: verify this group can send to this chatJid
1192
1037
  const targetGroup = registeredGroups[data.chatJid];
1193
1038
  if (isMain || (targetGroup && targetGroup.folder === sourceGroup)) {
1194
- if (data.type === 'message_draft') {
1195
- const draftId = Number.isFinite(data.draftId) ? Number(data.draftId) : NaN;
1196
- if (!Number.isFinite(draftId)) {
1197
- logger.warn({ chatJid: data.chatJid, sourceGroup }, 'IPC draft missing draftId');
1198
- }
1199
- else {
1200
- await sendDraftUpdate(data.chatJid, draftId, data.text);
1201
- }
1202
- }
1203
- else {
1204
- await sendMessage(data.chatJid, data.text);
1205
- logger.info({ chatJid: data.chatJid, sourceGroup }, 'IPC message sent');
1206
- }
1039
+ await sendMessage(data.chatJid, data.text);
1040
+ logger.info({ chatJid: data.chatJid, sourceGroup }, 'IPC message sent');
1207
1041
  }
1208
1042
  else {
1209
1043
  logger.warn({ chatJid: data.chatJid, sourceGroup }, 'Unauthorized IPC message attempt blocked');
@@ -1279,27 +1113,30 @@ function startIpcWatcher() {
1279
1113
  processing = false;
1280
1114
  };
1281
1115
  const scheduleProcess = () => {
1282
- if (scheduled)
1116
+ if (scheduled || ipcStopped)
1283
1117
  return;
1284
1118
  scheduled = true;
1285
1119
  setTimeout(async () => {
1286
1120
  scheduled = false;
1287
- await processIpcFiles();
1121
+ if (!ipcStopped)
1122
+ await processIpcFiles();
1288
1123
  }, 100);
1289
1124
  };
1290
1125
  let watcherActive = false;
1291
- let watcher = null;
1292
1126
  try {
1293
- watcher = fs.watch(ipcBaseDir, { recursive: true }, () => {
1127
+ ipcWatcher = fs.watch(ipcBaseDir, { recursive: true }, () => {
1294
1128
  scheduleProcess();
1295
1129
  });
1296
- watcher.on('error', (err) => {
1130
+ ipcWatcher.on('error', (err) => {
1297
1131
  logger.warn({ err }, 'IPC watcher error; falling back to polling');
1298
- watcher?.close();
1299
- if (!pollingTimer) {
1132
+ ipcWatcher?.close();
1133
+ ipcWatcher = null;
1134
+ if (!ipcPollingTimer && !ipcStopped) {
1300
1135
  const poll = () => {
1136
+ if (ipcStopped)
1137
+ return;
1301
1138
  scheduleProcess();
1302
- pollingTimer = setTimeout(poll, IPC_POLL_INTERVAL);
1139
+ ipcPollingTimer = setTimeout(poll, IPC_POLL_INTERVAL);
1303
1140
  };
1304
1141
  poll();
1305
1142
  }
@@ -1311,15 +1148,17 @@ function startIpcWatcher() {
1311
1148
  }
1312
1149
  if (!watcherActive) {
1313
1150
  const poll = () => {
1151
+ if (ipcStopped)
1152
+ return;
1314
1153
  scheduleProcess();
1315
- pollingTimer = setTimeout(poll, IPC_POLL_INTERVAL);
1154
+ ipcPollingTimer = setTimeout(poll, IPC_POLL_INTERVAL);
1316
1155
  };
1317
1156
  poll();
1318
1157
  }
1319
1158
  else {
1320
1159
  scheduleProcess();
1321
1160
  }
1322
- if (pollingTimer) {
1161
+ if (ipcPollingTimer) {
1323
1162
  logger.info('IPC watcher started (polling)');
1324
1163
  }
1325
1164
  else {
@@ -1425,17 +1264,26 @@ async function runHeartbeatOnce() {
1425
1264
  });
1426
1265
  }
1427
1266
  }
1267
+ let heartbeatStopped = false;
1268
+ function stopHeartbeatLoop() {
1269
+ heartbeatStopped = true;
1270
+ }
1428
1271
  function startHeartbeatLoop() {
1429
1272
  if (!HEARTBEAT_ENABLED)
1430
1273
  return;
1274
+ heartbeatStopped = false;
1431
1275
  const loop = async () => {
1276
+ if (heartbeatStopped)
1277
+ return;
1432
1278
  try {
1433
1279
  await runHeartbeatOnce();
1434
1280
  }
1435
1281
  catch (err) {
1436
1282
  logger.error({ err }, 'Heartbeat run failed');
1437
1283
  }
1438
- setTimeout(loop, HEARTBEAT_INTERVAL_MS);
1284
+ if (!heartbeatStopped) {
1285
+ setTimeout(loop, HEARTBEAT_INTERVAL_MS);
1286
+ }
1439
1287
  };
1440
1288
  loop();
1441
1289
  }
@@ -1484,7 +1332,7 @@ async function processTaskIpc(data, sourceGroup, isMain) {
1484
1332
  }
1485
1333
  nextRun = scheduled.toISOString();
1486
1334
  }
1487
- const taskId = `task-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
1335
+ const taskId = generateId('task');
1488
1336
  const contextMode = (data.context_mode === 'group' || data.context_mode === 'isolated')
1489
1337
  ? data.context_mode
1490
1338
  : 'isolated';
@@ -2253,6 +2101,7 @@ function ensureDockerRunning() {
2253
2101
  }
2254
2102
  catch {
2255
2103
  logger.error('Docker daemon is not running');
2104
+ // Intentionally using console.error for maximum visibility on fatal exit
2256
2105
  console.error('\n╔════════════════════════════════════════════════════════════════╗');
2257
2106
  console.error('║ FATAL: Docker is not running ║');
2258
2107
  console.error('║ ║');
@@ -2286,6 +2135,10 @@ async function main() {
2286
2135
  }
2287
2136
  ensureDockerRunning();
2288
2137
  initDatabase();
2138
+ const resetCount = resetStalledMessages();
2139
+ if (resetCount > 0) {
2140
+ logger.info({ resetCount }, 'Reset stalled queue messages to pending');
2141
+ }
2289
2142
  initMemoryStore();
2290
2143
  startEmbeddingWorker();
2291
2144
  const expiredMemories = cleanupExpiredMemories();
@@ -2307,6 +2160,14 @@ async function main() {
2307
2160
  }
2308
2161
  }
2309
2162
  }
2163
+ // Resume any pending message queues from before restart
2164
+ const pendingChats = getChatsWithPendingMessages();
2165
+ for (const chatId of pendingChats) {
2166
+ if (registeredGroups[chatId]) {
2167
+ logger.info({ chatId }, 'Resuming message queue drain after restart');
2168
+ void drainQueue(chatId);
2169
+ }
2170
+ }
2310
2171
  // Set up Telegram message handlers
2311
2172
  setupTelegramHandlers();
2312
2173
  // Start dashboard
@@ -2317,16 +2178,35 @@ async function main() {
2317
2178
  setTelegramConnected(true);
2318
2179
  logger.info('Telegram bot started');
2319
2180
  // Graceful shutdown
2320
- process.once('SIGINT', () => {
2321
- logger.info('Shutting down Telegram bot');
2322
- setTelegramConnected(false);
2323
- telegrafBot.stop('SIGINT');
2324
- });
2325
- process.once('SIGTERM', () => {
2326
- logger.info('Shutting down Telegram bot');
2181
+ let shuttingDown = false;
2182
+ const gracefulShutdown = (signal) => {
2183
+ if (shuttingDown)
2184
+ return;
2185
+ shuttingDown = true;
2186
+ logger.info({ signal }, 'Graceful shutdown initiated');
2187
+ // 1. Stop accepting new work
2327
2188
  setTelegramConnected(false);
2328
- telegrafBot.stop('SIGTERM');
2329
- });
2189
+ telegrafBot.stop(signal);
2190
+ // 2. Stop all loops and watchers
2191
+ stopSchedulerLoop();
2192
+ stopBackgroundJobLoop();
2193
+ stopIpcWatcher();
2194
+ stopMaintenanceLoop();
2195
+ stopHeartbeatLoop();
2196
+ stopDaemonHealthCheckLoop();
2197
+ stopEmbeddingWorker();
2198
+ // 3. Stop HTTP servers
2199
+ stopMetricsServer();
2200
+ stopDashboard();
2201
+ // 4. Clean up Docker containers for this instance
2202
+ cleanupInstanceContainers();
2203
+ // 5. Close database
2204
+ closeDatabase();
2205
+ logger.info('Shutdown complete');
2206
+ process.exit(0);
2207
+ };
2208
+ process.once('SIGINT', () => gracefulShutdown('SIGINT'));
2209
+ process.once('SIGTERM', () => gracefulShutdown('SIGTERM'));
2330
2210
  // Start scheduler and IPC watcher
2331
2211
  // Wrapper that matches the scheduler's expected interface (Promise<void>)
2332
2212
  const sendMessageForScheduler = async (jid, text) => {