cli-claw-kit 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +245 -0
- package/config/default-groups.json +1 -0
- package/config/global-agents-md.template.md +37 -0
- package/config/mount-allowlist.json +11 -0
- package/container/Dockerfile +160 -0
- package/container/agent-runner/dist/.tsbuildinfo +1 -0
- package/container/agent-runner/dist/agent-definitions.js +22 -0
- package/container/agent-runner/dist/channel-prefixes.js +16 -0
- package/container/agent-runner/dist/codex-config.js +29 -0
- package/container/agent-runner/dist/image-detector.js +96 -0
- package/container/agent-runner/dist/index.js +2587 -0
- package/container/agent-runner/dist/mcp-tools.js +1076 -0
- package/container/agent-runner/dist/stream-event.types.js +5 -0
- package/container/agent-runner/dist/stream-processor.js +867 -0
- package/container/agent-runner/dist/types.js +6 -0
- package/container/agent-runner/dist/utils.js +115 -0
- package/container/agent-runner/package.json +36 -0
- package/container/agent-runner/prompts/security-rules.md +31 -0
- package/container/agent-runner/src/agent-definitions.ts +27 -0
- package/container/agent-runner/src/channel-prefixes.ts +16 -0
- package/container/agent-runner/src/codex-config.ts +40 -0
- package/container/agent-runner/src/image-detector.ts +116 -0
- package/container/agent-runner/src/index.ts +3107 -0
- package/container/agent-runner/src/mcp-tools.ts +1295 -0
- package/container/agent-runner/src/stream-event.types.ts +10 -0
- package/container/agent-runner/src/stream-processor.ts +932 -0
- package/container/agent-runner/src/types.ts +75 -0
- package/container/agent-runner/src/utils.ts +114 -0
- package/container/agent-runner/tsconfig.json +17 -0
- package/container/build.sh +28 -0
- package/container/entrypoint.sh +64 -0
- package/container/skills/agent-browser/SKILL.md +159 -0
- package/container/skills/install-skill/SKILL.md +64 -0
- package/container/skills/post-test-cleanup/SKILL.md +121 -0
- package/dist/.tsbuildinfo +1 -0
- package/dist/agent-output-parser.js +459 -0
- package/dist/app-root.js +52 -0
- package/dist/assistant-meta-footer.js +1 -0
- package/dist/auth.js +91 -0
- package/dist/billing.js +694 -0
- package/dist/channel-prefixes.js +16 -0
- package/dist/cli.js +86 -0
- package/dist/commands.js +79 -0
- package/dist/config.js +120 -0
- package/dist/container-runner.js +981 -0
- package/dist/daily-summary.js +210 -0
- package/dist/db.js +3683 -0
- package/dist/dingtalk.js +1347 -0
- package/dist/feishu-markdown-style.js +97 -0
- package/dist/feishu-streaming-card.js +1875 -0
- package/dist/feishu.js +1628 -0
- package/dist/file-manager.js +270 -0
- package/dist/group-queue.js +1070 -0
- package/dist/group-runtime.js +35 -0
- package/dist/host-workspace-cwd.js +85 -0
- package/dist/im-channel.js +384 -0
- package/dist/im-command-utils.js +142 -0
- package/dist/im-downloader.js +45 -0
- package/dist/im-manager.js +527 -0
- package/dist/im-utils.js +53 -0
- package/dist/image-detector.js +96 -0
- package/dist/index.js +5828 -0
- package/dist/logger.js +22 -0
- package/dist/mcp-utils.js +66 -0
- package/dist/message-attachments.js +69 -0
- package/dist/message-notifier.js +36 -0
- package/dist/middleware/auth.js +85 -0
- package/dist/mount-security.js +315 -0
- package/dist/permissions.js +67 -0
- package/dist/project-memory.js +6 -0
- package/dist/provider-pool.js +189 -0
- package/dist/qq.js +826 -0
- package/dist/reset-admin.js +42 -0
- package/dist/routes/admin.js +543 -0
- package/dist/routes/agent-definitions.js +241 -0
- package/dist/routes/agents.js +533 -0
- package/dist/routes/auth.js +675 -0
- package/dist/routes/billing.js +490 -0
- package/dist/routes/browse.js +210 -0
- package/dist/routes/bug-report.js +387 -0
- package/dist/routes/config.js +1868 -0
- package/dist/routes/files.js +671 -0
- package/dist/routes/groups.js +1367 -0
- package/dist/routes/mcp-servers.js +320 -0
- package/dist/routes/memory.js +523 -0
- package/dist/routes/monitor.js +307 -0
- package/dist/routes/skills.js +777 -0
- package/dist/routes/tasks.js +509 -0
- package/dist/routes/usage.js +64 -0
- package/dist/routes/workspace-config.js +458 -0
- package/dist/runtime-build.js +112 -0
- package/dist/runtime-command-handler.js +189 -0
- package/dist/runtime-command-registry.js +1 -0
- package/dist/runtime-config.js +1777 -0
- package/dist/runtime-identity.js +52 -0
- package/dist/schemas.js +590 -0
- package/dist/script-runner.js +64 -0
- package/dist/sdk-query.js +82 -0
- package/dist/skill-utils.js +145 -0
- package/dist/sqlite-compat.js +19 -0
- package/dist/stream-event.types.js +5 -0
- package/dist/streaming-runtime-meta.js +29 -0
- package/dist/task-scheduler.js +695 -0
- package/dist/task-utils.js +13 -0
- package/dist/telegram-pairing.js +59 -0
- package/dist/telegram.js +897 -0
- package/dist/terminal-manager.js +307 -0
- package/dist/tool-step-display.js +1 -0
- package/dist/types.js +1 -0
- package/dist/utils.js +85 -0
- package/dist/web-context.js +161 -0
- package/dist/web.js +1377 -0
- package/dist/wechat-crypto.js +182 -0
- package/dist/wechat.js +589 -0
- package/dist/workspace-runtime-reset.js +35 -0
- package/package.json +107 -0
- package/shared/assistant-meta-footer.ts +127 -0
- package/shared/channel-prefixes.ts +16 -0
- package/shared/dist/assistant-meta-footer.d.ts +29 -0
- package/shared/dist/assistant-meta-footer.js +85 -0
- package/shared/dist/channel-prefixes.d.ts +4 -0
- package/shared/dist/channel-prefixes.js +16 -0
- package/shared/dist/image-detector.d.ts +20 -0
- package/shared/dist/image-detector.js +96 -0
- package/shared/dist/runtime-command-registry.d.ts +38 -0
- package/shared/dist/runtime-command-registry.js +185 -0
- package/shared/dist/stream-event.d.ts +65 -0
- package/shared/dist/stream-event.js +8 -0
- package/shared/dist/tool-step-display.d.ts +4 -0
- package/shared/dist/tool-step-display.js +11 -0
- package/shared/image-detector.ts +116 -0
- package/shared/runtime-command-registry.ts +252 -0
- package/shared/stream-event.ts +67 -0
- package/shared/tool-step-display.ts +21 -0
- package/shared/tsconfig.json +24 -0
- package/web/dist/assets/BillingPage-B1wBR_o-.js +52 -0
- package/web/dist/assets/ChatPage-6GBZ9nXN.css +32 -0
- package/web/dist/assets/ChatPage-BOJcXtaj.js +161 -0
- package/web/dist/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +0 -0
- package/web/dist/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +0 -0
- package/web/dist/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +0 -0
- package/web/dist/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +0 -0
- package/web/dist/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-Bold-Jm3AIy58.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Bold-waoOVXN0.ttf +0 -0
- package/web/dist/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +0 -0
- package/web/dist/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Italic-3WenGoN9.ttf +0 -0
- package/web/dist/assets/KaTeX_Main-Italic-BMLOBm91.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-Regular-B22Nviop.woff2 +0 -0
- package/web/dist/assets/KaTeX_Main-Regular-Dr94JaBh.woff +0 -0
- package/web/dist/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +0 -0
- package/web/dist/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +0 -0
- package/web/dist/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +0 -0
- package/web/dist/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +0 -0
- package/web/dist/assets/KaTeX_Math-Italic-DA0__PXp.woff +0 -0
- package/web/dist/assets/KaTeX_Math-Italic-flOr_0UB.ttf +0 -0
- package/web/dist/assets/KaTeX_Math-Italic-t53AETM-.woff2 +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +0 -0
- package/web/dist/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +0 -0
- package/web/dist/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +0 -0
- package/web/dist/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +0 -0
- package/web/dist/assets/KaTeX_Script-Regular-D5yQViql.woff +0 -0
- package/web/dist/assets/KaTeX_Size1-Regular-C195tn64.woff +0 -0
- package/web/dist/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +0 -0
- package/web/dist/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +0 -0
- package/web/dist/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +0 -0
- package/web/dist/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +0 -0
- package/web/dist/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +0 -0
- package/web/dist/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +0 -0
- package/web/dist/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +0 -0
- package/web/dist/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +0 -0
- package/web/dist/assets/KaTeX_Size4-Regular-DWFBv043.ttf +0 -0
- package/web/dist/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +0 -0
- package/web/dist/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +0 -0
- package/web/dist/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +0 -0
- package/web/dist/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +0 -0
- package/web/dist/assets/SettingsPage-DoY7FoZ_.js +153 -0
- package/web/dist/assets/ShareImageDialog-C1ga8b7l.js +22 -0
- package/web/dist/assets/TasksPage-CRivnNsx.js +14 -0
- package/web/dist/assets/_basePickBy-Bf-bSoS9.js +1 -0
- package/web/dist/assets/_baseUniq-zAOaCuKw.js +1 -0
- package/web/dist/assets/arc-Dm9mVQ9U.js +1 -0
- package/web/dist/assets/architectureDiagram-2XIMDMQ5-BLmzX1wr.js +36 -0
- package/web/dist/assets/band-CquvqAHh.js +1 -0
- package/web/dist/assets/blockDiagram-WCTKOSBZ-B9pcqm3j.js +132 -0
- package/web/dist/assets/c4Diagram-IC4MRINW-Cytx1q3b.js +10 -0
- package/web/dist/assets/channel-BOVj73LR.js +1 -0
- package/web/dist/assets/channel-meta-CQD0Pei-.js +41 -0
- package/web/dist/assets/chunk-4BX2VUAB-0ToDr6RE.js +1 -0
- package/web/dist/assets/chunk-55IACEB6-DQDjnXfS.js +1 -0
- package/web/dist/assets/chunk-FMBD7UC4-Di8ABm6c.js +15 -0
- package/web/dist/assets/chunk-JSJVCQXG-BZQN6rnX.js +1 -0
- package/web/dist/assets/chunk-KX2RTZJC-zBbcpaN_.js +1 -0
- package/web/dist/assets/chunk-NQ4KR5QH-BCrLoU88.js +220 -0
- package/web/dist/assets/chunk-QZHKN3VN-Bqk8juan.js +1 -0
- package/web/dist/assets/chunk-WL4C6EOR-D2YX-MHY.js +189 -0
- package/web/dist/assets/classDiagram-VBA2DB6C-DUUoMyaK.js +1 -0
- package/web/dist/assets/classDiagram-v2-RAHNMMFH-DUUoMyaK.js +1 -0
- package/web/dist/assets/clone-BmaCesfa.js +1 -0
- package/web/dist/assets/cose-bilkent-S5V4N54A-CTsv6qQA.js +1 -0
- package/web/dist/assets/cytoscape.esm-BQaXIfA_.js +331 -0
- package/web/dist/assets/dagre-KLK3FWXG-Ci4Jh9nu.js +4 -0
- package/web/dist/assets/defaultLocale-DX6XiGOO.js +1 -0
- package/web/dist/assets/diagram-E7M64L7V-BFRnfTI2.js +24 -0
- package/web/dist/assets/diagram-IFDJBPK2-B7Zhnp0b.js +43 -0
- package/web/dist/assets/diagram-P4PSJMXO-BVyP7nwq.js +24 -0
- package/web/dist/assets/erDiagram-INFDFZHY-NorKdTOF.js +70 -0
- package/web/dist/assets/error-CGD5mp5f.js +1 -0
- package/web/dist/assets/flowDiagram-PKNHOUZH-Ch97nABF.js +162 -0
- package/web/dist/assets/ganttDiagram-A5KZAMGK-BQ2pLWsy.js +292 -0
- package/web/dist/assets/gitGraphDiagram-K3NZZRJ6-bcvnBsD2.js +65 -0
- package/web/dist/assets/graph-CeAEckur.js +1 -0
- package/web/dist/assets/index-CPnL1_qC.js +768 -0
- package/web/dist/assets/index-DVevCbcO.css +10 -0
- package/web/dist/assets/infoDiagram-LFFYTUFH-CcsrFdj-.js +2 -0
- package/web/dist/assets/init-Dmth1JHB.js +1 -0
- package/web/dist/assets/ishikawaDiagram-PHBUUO56-1upyMfHN.js +70 -0
- package/web/dist/assets/journeyDiagram-4ABVD52K-CKUi-V0c.js +139 -0
- package/web/dist/assets/kanban-definition-K7BYSVSG-DOnQwXfL.js +89 -0
- package/web/dist/assets/layout-BmMMqTnJ.js +1 -0
- package/web/dist/assets/linear-DiaJloY5.js +1 -0
- package/web/dist/assets/mermaid.core-BWLV1B2v.js +254 -0
- package/web/dist/assets/mindmap-definition-YRQLILUH-BeAKHVWP.js +68 -0
- package/web/dist/assets/ordinal-DILIJJjt.js +1 -0
- package/web/dist/assets/pieDiagram-SKSYHLDU-DfiMSfWo.js +30 -0
- package/web/dist/assets/quadrantDiagram-337W2JSQ-wZxZOJxd.js +7 -0
- package/web/dist/assets/requirementDiagram-Z7DCOOCP-BK4HHm17.js +73 -0
- package/web/dist/assets/sankeyDiagram-WA2Y5GQK-BX6t2avX.js +10 -0
- package/web/dist/assets/sequenceDiagram-2WXFIKYE-BPQlkbAa.js +145 -0
- package/web/dist/assets/sheet-rI0FfB1g.js +6 -0
- package/web/dist/assets/sliders-horizontal-CuijWFNK.js +6 -0
- package/web/dist/assets/sparkles-BsMYXJoT.js +11 -0
- package/web/dist/assets/square-0CqMX1Q3.js +11 -0
- package/web/dist/assets/stateDiagram-RAJIS63D-DxkV0Vwd.js +1 -0
- package/web/dist/assets/stateDiagram-v2-FVOUBMTO-qLYoiOPe.js +1 -0
- package/web/dist/assets/step-D51IIHGA.js +1 -0
- package/web/dist/assets/tasks-D8JjBTwx.js +1 -0
- package/web/dist/assets/time-O8zIGux3.js +1 -0
- package/web/dist/assets/timeline-definition-YZTLITO2-kNp1DyFc.js +61 -0
- package/web/dist/assets/treemap-KZPCXAKY-CkrClVhk.js +162 -0
- package/web/dist/assets/utils-KGAn0XTg.js +11 -0
- package/web/dist/assets/vennDiagram-LZ73GAT5-CgdzEZz4.js +34 -0
- package/web/dist/assets/xychartDiagram-JWTSCODW-DfYGPfNB.js +7 -0
- package/web/dist/assets/zap-_hKJYy7J.js +6 -0
- package/web/dist/favicon.svg +332 -0
- package/web/dist/fonts/AlibabaPuHuiTi-3-55-Regular.woff2 +0 -0
- package/web/dist/fonts/AlibabaPuHuiTi-3-65-Medium.woff2 +0 -0
- package/web/dist/fonts/AlibabaPuHuiTi-3-75-SemiBold.woff2 +0 -0
- package/web/dist/fonts/DMSans-latin-ext.woff2 +0 -0
- package/web/dist/fonts/DMSans-latin.woff2 +0 -0
- package/web/dist/icons/README.md +20 -0
- package/web/dist/icons/apple-touch-icon-180.png +0 -0
- package/web/dist/icons/icon-128.png +0 -0
- package/web/dist/icons/icon-144.png +0 -0
- package/web/dist/icons/icon-152.png +0 -0
- package/web/dist/icons/icon-192.png +0 -0
- package/web/dist/icons/icon-192.svg +332 -0
- package/web/dist/icons/icon-384.png +0 -0
- package/web/dist/icons/icon-48.png +0 -0
- package/web/dist/icons/icon-512-maskable.png +0 -0
- package/web/dist/icons/icon-512.png +0 -0
- package/web/dist/icons/icon-512.svg +332 -0
- package/web/dist/icons/icon-72.png +0 -0
- package/web/dist/icons/icon-96.png +0 -0
- package/web/dist/icons/loading-logo.svg +332 -0
- package/web/dist/icons/logo-1024.png +0 -0
- package/web/dist/icons/logo-icon.svg +332 -0
- package/web/dist/icons/logo-text.svg +332 -0
- package/web/dist/index.html +30 -0
- package/web/dist/manifest.webmanifest +1 -0
- package/web/dist/registerSW.js +1 -0
- package/web/dist/sw.js +1 -0
- package/web/dist/workbox-08d6266a.js +1 -0
|
@@ -0,0 +1,1070 @@
|
|
|
1
|
+
import { execFile } from 'child_process';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
import { DATA_DIR } from './config.js';
|
|
5
|
+
import { killProcessTree } from './container-runner.js';
|
|
6
|
+
import { getTaskById } from './db.js';
|
|
7
|
+
import { getSystemSettings } from './runtime-config.js';
|
|
8
|
+
import { logger } from './logger.js';
|
|
9
|
+
const MAX_RETRIES = 5;
|
|
10
|
+
const BASE_RETRY_MS = 5000;
|
|
11
|
+
export class GroupQueue {
|
|
12
|
+
groups = new Map();
|
|
13
|
+
activeCount = 0;
|
|
14
|
+
activeContainerCount = 0;
|
|
15
|
+
activeHostProcessCount = 0;
|
|
16
|
+
waitingGroups = new Set();
|
|
17
|
+
contextOverflowGroups = new Set(); // 跟踪发生上下文溢出的 group
|
|
18
|
+
processMessagesFn = null;
|
|
19
|
+
shuttingDown = false;
|
|
20
|
+
hostModeChecker = null;
|
|
21
|
+
serializationKeyResolver = null;
|
|
22
|
+
onMaxRetriesExceededFn = null;
|
|
23
|
+
onContainerExitFn = null;
|
|
24
|
+
onRunnerStateChangeFn = null;
|
|
25
|
+
userConcurrentLimitFn = null;
|
|
26
|
+
onUnconsumedAgentIpcFn = null;
|
|
27
|
+
getGroup(groupJid) {
|
|
28
|
+
let state = this.groups.get(groupJid);
|
|
29
|
+
if (!state) {
|
|
30
|
+
state = {
|
|
31
|
+
active: false,
|
|
32
|
+
activeRunnerIsTask: false,
|
|
33
|
+
lastActivityAt: null,
|
|
34
|
+
queryInFlight: false,
|
|
35
|
+
pendingMessages: false,
|
|
36
|
+
pendingTasks: [],
|
|
37
|
+
process: null,
|
|
38
|
+
containerName: null,
|
|
39
|
+
displayName: null,
|
|
40
|
+
groupFolder: null,
|
|
41
|
+
agentId: null,
|
|
42
|
+
taskRunId: null,
|
|
43
|
+
retryCount: 0,
|
|
44
|
+
retryTimer: null,
|
|
45
|
+
restarting: false,
|
|
46
|
+
drainSentinelWritten: false,
|
|
47
|
+
hasIpcInjectedMessages: false,
|
|
48
|
+
};
|
|
49
|
+
this.groups.set(groupJid, state);
|
|
50
|
+
}
|
|
51
|
+
return state;
|
|
52
|
+
}
|
|
53
|
+
setProcessMessagesFn(fn) {
|
|
54
|
+
this.processMessagesFn = fn;
|
|
55
|
+
}
|
|
56
|
+
setHostModeChecker(fn) {
|
|
57
|
+
this.hostModeChecker = fn;
|
|
58
|
+
}
|
|
59
|
+
setSerializationKeyResolver(fn) {
|
|
60
|
+
this.serializationKeyResolver = fn;
|
|
61
|
+
}
|
|
62
|
+
setOnMaxRetriesExceeded(fn) {
|
|
63
|
+
this.onMaxRetriesExceededFn = fn;
|
|
64
|
+
}
|
|
65
|
+
setOnContainerExit(fn) {
|
|
66
|
+
this.onContainerExitFn = fn;
|
|
67
|
+
}
|
|
68
|
+
setOnRunnerStateChange(fn) {
|
|
69
|
+
this.onRunnerStateChangeFn = fn;
|
|
70
|
+
}
|
|
71
|
+
setUserConcurrentLimitChecker(fn) {
|
|
72
|
+
this.userConcurrentLimitFn = fn;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Called when an agent runner exits with unconsumed IPC message files.
|
|
76
|
+
* The callback should re-enqueue processAgentConversation for the agent.
|
|
77
|
+
* See GitHub issue #240.
|
|
78
|
+
*/
|
|
79
|
+
setOnUnconsumedAgentIpc(fn) {
|
|
80
|
+
this.onUnconsumedAgentIpcFn = fn;
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* 标记 group 发生了上下文溢出错误,跳过指数退避重试
|
|
84
|
+
*/
|
|
85
|
+
markContextOverflow(groupJid) {
|
|
86
|
+
this.contextOverflowGroups.add(groupJid);
|
|
87
|
+
logger.warn({ groupJid }, 'Marked group as context overflow - will skip retry backoff');
|
|
88
|
+
}
|
|
89
|
+
clearRetryTimer(state) {
|
|
90
|
+
if (state.retryTimer !== null) {
|
|
91
|
+
clearTimeout(state.retryTimer);
|
|
92
|
+
state.retryTimer = null;
|
|
93
|
+
}
|
|
94
|
+
state.retryCount = 0;
|
|
95
|
+
}
|
|
96
|
+
isHostMode(groupJid) {
|
|
97
|
+
return this.hostModeChecker?.(groupJid) ?? false;
|
|
98
|
+
}
|
|
99
|
+
getSerializationKey(groupJid) {
|
|
100
|
+
const key = this.serializationKeyResolver?.(groupJid)?.trim();
|
|
101
|
+
return key || groupJid;
|
|
102
|
+
}
|
|
103
|
+
findActiveRunnerFor(groupJid) {
|
|
104
|
+
const key = this.getSerializationKey(groupJid);
|
|
105
|
+
for (const [jid, state] of this.groups.entries()) {
|
|
106
|
+
if (!state.active)
|
|
107
|
+
continue;
|
|
108
|
+
if (this.getSerializationKey(jid) === key)
|
|
109
|
+
return jid;
|
|
110
|
+
}
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
hasCapacityFor(groupJid) {
|
|
114
|
+
const isHost = this.isHostMode(groupJid);
|
|
115
|
+
const systemCapacity = isHost
|
|
116
|
+
? this.activeHostProcessCount <
|
|
117
|
+
getSystemSettings().maxConcurrentHostProcesses
|
|
118
|
+
: this.activeContainerCount < getSystemSettings().maxConcurrentContainers;
|
|
119
|
+
if (!systemCapacity)
|
|
120
|
+
return false;
|
|
121
|
+
// User-level concurrent container limit (billing)
|
|
122
|
+
if (this.userConcurrentLimitFn) {
|
|
123
|
+
const result = this.userConcurrentLimitFn(groupJid);
|
|
124
|
+
if (!result.allowed)
|
|
125
|
+
return false;
|
|
126
|
+
}
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
resolveActiveState(groupJid) {
|
|
130
|
+
const own = this.getGroup(groupJid);
|
|
131
|
+
if (own.active && own.groupFolder)
|
|
132
|
+
return own;
|
|
133
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
134
|
+
if (!activeRunner)
|
|
135
|
+
return null;
|
|
136
|
+
const shared = this.getGroup(activeRunner);
|
|
137
|
+
if (!shared.active || !shared.groupFolder)
|
|
138
|
+
return null;
|
|
139
|
+
return shared;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Write a single _drain sentinel to the actual active main-agent runner that
|
|
143
|
+
* owns this serialization key. This must target the runner state rather than
|
|
144
|
+
* the caller's group state because sibling JIDs can share one process.
|
|
145
|
+
*/
|
|
146
|
+
requestDrainForActiveRunner(groupJid, reason) {
|
|
147
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
148
|
+
if (!activeRunner)
|
|
149
|
+
return false;
|
|
150
|
+
const runnerState = this.getGroup(activeRunner);
|
|
151
|
+
if (!runnerState.active ||
|
|
152
|
+
!runnerState.groupFolder ||
|
|
153
|
+
runnerState.agentId !== null) {
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
156
|
+
if (runnerState.drainSentinelWritten) {
|
|
157
|
+
return true;
|
|
158
|
+
}
|
|
159
|
+
const wrote = this.writeDrainSentinel(runnerState);
|
|
160
|
+
if (!wrote)
|
|
161
|
+
return false;
|
|
162
|
+
runnerState.drainSentinelWritten = true;
|
|
163
|
+
logger.info({ groupJid, activeRunner }, reason);
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
/** 检查指定 JID 是否有自己直接启动的活跃 runner(非通过 folder 共享匹配) */
|
|
167
|
+
hasDirectActiveRunner(groupJid) {
|
|
168
|
+
const state = this.groups.get(groupJid);
|
|
169
|
+
return state?.active === true;
|
|
170
|
+
}
|
|
171
|
+
/** Count active task runners whose JID starts with the given base JID + '#task:' */
|
|
172
|
+
countActiveTaskRunners(baseJid) {
|
|
173
|
+
const prefix = baseJid + '#task:';
|
|
174
|
+
let count = 0;
|
|
175
|
+
for (const [jid, state] of this.groups.entries()) {
|
|
176
|
+
if (state.active && jid.startsWith(prefix)) {
|
|
177
|
+
count++;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return count;
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Returns true if the active runner for this group (or its serialization
|
|
184
|
+
* sibling) is currently executing a scheduled task rather than user messages.
|
|
185
|
+
* Used by the message loop to avoid prematurely interrupting task containers.
|
|
186
|
+
*/
|
|
187
|
+
isActiveRunnerTask(groupJid) {
|
|
188
|
+
const state = this.resolveActiveState(groupJid);
|
|
189
|
+
return state?.activeRunnerIsTask === true;
|
|
190
|
+
}
|
|
191
|
+
markRunnerActivity(groupJid) {
|
|
192
|
+
const state = this.resolveActiveState(groupJid);
|
|
193
|
+
if (!state?.active)
|
|
194
|
+
return;
|
|
195
|
+
state.lastActivityAt = Date.now();
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Mark that a message was IPC-injected into the running agent.
|
|
199
|
+
* The caller (web.ts) has already advanced the per-group cursor for this
|
|
200
|
+
* message. If the agent crashes without processing it, the close handler
|
|
201
|
+
* uses this flag to force pendingMessages so drainGroup re-reads from DB.
|
|
202
|
+
*/
|
|
203
|
+
markIpcInjectedMessage(groupJid) {
|
|
204
|
+
const state = this.resolveActiveState(groupJid);
|
|
205
|
+
if (!state?.active)
|
|
206
|
+
return;
|
|
207
|
+
state.hasIpcInjectedMessages = true;
|
|
208
|
+
}
|
|
209
|
+
markRunnerQueryIdle(groupJid) {
|
|
210
|
+
const state = this.resolveActiveState(groupJid);
|
|
211
|
+
if (!state?.active)
|
|
212
|
+
return;
|
|
213
|
+
state.queryInFlight = false;
|
|
214
|
+
}
|
|
215
|
+
getStuckPendingGroups(idleThresholdMs) {
|
|
216
|
+
const now = Date.now();
|
|
217
|
+
const stuck = [];
|
|
218
|
+
for (const [jid, state] of this.groups.entries()) {
|
|
219
|
+
if (!state.active)
|
|
220
|
+
continue;
|
|
221
|
+
if (state.activeRunnerIsTask)
|
|
222
|
+
continue;
|
|
223
|
+
if (!state.pendingMessages)
|
|
224
|
+
continue;
|
|
225
|
+
if (state.agentId !== null)
|
|
226
|
+
continue;
|
|
227
|
+
if (state.restarting)
|
|
228
|
+
continue;
|
|
229
|
+
const lastActivityAt = state.lastActivityAt ?? 0;
|
|
230
|
+
if (lastActivityAt <= 0)
|
|
231
|
+
continue;
|
|
232
|
+
const idleMs = now - lastActivityAt;
|
|
233
|
+
if (idleMs < idleThresholdMs)
|
|
234
|
+
continue;
|
|
235
|
+
stuck.push({ jid, idleMs });
|
|
236
|
+
}
|
|
237
|
+
return stuck;
|
|
238
|
+
}
|
|
239
|
+
enqueueMessageCheck(groupJid) {
|
|
240
|
+
if (this.shuttingDown)
|
|
241
|
+
return;
|
|
242
|
+
const state = this.getGroup(groupJid);
|
|
243
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
244
|
+
if (state.active || (activeRunner && activeRunner !== groupJid)) {
|
|
245
|
+
state.pendingMessages = true;
|
|
246
|
+
this.waitingGroups.add(groupJid);
|
|
247
|
+
// Write _drain to the actual active runner so sibling JIDs sharing one
|
|
248
|
+
// folder also unblock immediately instead of waiting for idle timeout.
|
|
249
|
+
this.requestDrainForActiveRunner(groupJid, 'Drain sentinel written during enqueueMessageCheck to unblock pending messages');
|
|
250
|
+
logger.debug({ groupJid, activeRunner: activeRunner || groupJid }, 'Group runner active, message queued');
|
|
251
|
+
return;
|
|
252
|
+
}
|
|
253
|
+
if (!this.hasCapacityFor(groupJid)) {
|
|
254
|
+
const isHost = this.isHostMode(groupJid);
|
|
255
|
+
state.pendingMessages = true;
|
|
256
|
+
this.waitingGroups.add(groupJid);
|
|
257
|
+
logger.debug({
|
|
258
|
+
groupJid,
|
|
259
|
+
activeContainerCount: this.activeContainerCount,
|
|
260
|
+
activeHostProcessCount: this.activeHostProcessCount,
|
|
261
|
+
mode: isHost ? 'host' : 'container',
|
|
262
|
+
}, 'At concurrency limit, message queued');
|
|
263
|
+
return;
|
|
264
|
+
}
|
|
265
|
+
this.waitingGroups.delete(groupJid);
|
|
266
|
+
this.runForGroup(groupJid, 'messages');
|
|
267
|
+
}
|
|
268
|
+
enqueueTask(groupJid, taskId, fn) {
|
|
269
|
+
if (this.shuttingDown)
|
|
270
|
+
return;
|
|
271
|
+
const state = this.getGroup(groupJid);
|
|
272
|
+
// Prevent double-queuing of the same task
|
|
273
|
+
if (state.pendingTasks.some((t) => t.id === taskId)) {
|
|
274
|
+
logger.debug({ groupJid, taskId }, 'Task already queued, skipping');
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
278
|
+
if (state.active || (activeRunner && activeRunner !== groupJid)) {
|
|
279
|
+
state.pendingTasks.push({ id: taskId, groupJid, fn });
|
|
280
|
+
this.waitingGroups.add(groupJid);
|
|
281
|
+
logger.debug({ groupJid, taskId, activeRunner: activeRunner || groupJid }, 'Group runner active, task queued');
|
|
282
|
+
return;
|
|
283
|
+
}
|
|
284
|
+
if (!this.hasCapacityFor(groupJid)) {
|
|
285
|
+
const isHost = this.isHostMode(groupJid);
|
|
286
|
+
state.pendingTasks.push({ id: taskId, groupJid, fn });
|
|
287
|
+
this.waitingGroups.add(groupJid);
|
|
288
|
+
logger.debug({
|
|
289
|
+
groupJid,
|
|
290
|
+
taskId,
|
|
291
|
+
activeContainerCount: this.activeContainerCount,
|
|
292
|
+
activeHostProcessCount: this.activeHostProcessCount,
|
|
293
|
+
mode: isHost ? 'host' : 'container',
|
|
294
|
+
}, 'At concurrency limit, task queued');
|
|
295
|
+
return;
|
|
296
|
+
}
|
|
297
|
+
// Run immediately
|
|
298
|
+
this.waitingGroups.delete(groupJid);
|
|
299
|
+
this.runTask(groupJid, { id: taskId, groupJid, fn });
|
|
300
|
+
}
|
|
301
|
+
registerProcess(groupJid, proc, containerName, groupFolder, displayName, agentId, taskRunId) {
|
|
302
|
+
const state = this.getGroup(groupJid);
|
|
303
|
+
state.process = proc;
|
|
304
|
+
state.containerName = containerName;
|
|
305
|
+
state.displayName = displayName || null;
|
|
306
|
+
if (groupFolder)
|
|
307
|
+
state.groupFolder = groupFolder;
|
|
308
|
+
state.agentId = agentId || null;
|
|
309
|
+
state.taskRunId = taskRunId || null;
|
|
310
|
+
}
|
|
311
|
+
/**
|
|
312
|
+
* Resolve IPC input directory for a group state.
|
|
313
|
+
* Sub-agents use a nested path: ~/.cli-claw/ipc/{folder}/agents/{agentId}/input/
|
|
314
|
+
*/
|
|
315
|
+
resolveIpcInputDir(state) {
|
|
316
|
+
if (state.taskRunId) {
|
|
317
|
+
return path.join(DATA_DIR, 'ipc', state.groupFolder, 'tasks-run', state.taskRunId, 'input');
|
|
318
|
+
}
|
|
319
|
+
if (state.agentId) {
|
|
320
|
+
return path.join(DATA_DIR, 'ipc', state.groupFolder, 'agents', state.agentId, 'input');
|
|
321
|
+
}
|
|
322
|
+
return path.join(DATA_DIR, 'ipc', state.groupFolder, 'input');
|
|
323
|
+
}
|
|
324
|
+
/**
|
|
325
|
+
* Send a follow-up message to the active container via IPC file.
|
|
326
|
+
*
|
|
327
|
+
* Returns:
|
|
328
|
+
* - 'sent': message written to IPC (包括 queryInFlight 时的排队写入)
|
|
329
|
+
* - 'no_active': no active container/process for this group
|
|
330
|
+
*/
|
|
331
|
+
sendMessage(groupJid, text, images, onInjected) {
|
|
332
|
+
const state = this.resolveActiveState(groupJid);
|
|
333
|
+
if (!state)
|
|
334
|
+
return 'no_active';
|
|
335
|
+
// If the active runner is a scheduled task (not a user-message handler),
|
|
336
|
+
// do NOT pipe user messages into it. The task container has no knowledge
|
|
337
|
+
// of the user conversation context, so any IPC message injected here would
|
|
338
|
+
// be silently consumed (or confusingly processed) by the task agent and the
|
|
339
|
+
// reply would never reach the user. Returning 'no_active' causes the
|
|
340
|
+
// caller to enqueue a fresh message-processing run that will execute once
|
|
341
|
+
// the task finishes. See GitHub issue riba2534/happyclaw#151.
|
|
342
|
+
//
|
|
343
|
+
// Exception: conversation agent tasks (virtual JIDs with #agent:) are
|
|
344
|
+
// user-message handlers started via enqueueTask. They DO accept IPC
|
|
345
|
+
// messages — blocking them causes a deadlock where the agent waits for
|
|
346
|
+
// IPC input that never arrives.
|
|
347
|
+
if (state.activeRunnerIsTask && !groupJid.includes('#agent:')) {
|
|
348
|
+
logger.debug({ groupJid }, 'Active runner is a scheduled task; deferring user message until task completes');
|
|
349
|
+
return 'no_active';
|
|
350
|
+
}
|
|
351
|
+
// queryInFlight=true:当前 query 正在执行,将消息写入 IPC 文件排队。
|
|
352
|
+
// 当前 query 完成后 waitForIpcMessage() → drainIpcInput() 会合并所有
|
|
353
|
+
// 待处理的 IPC 消息为一个 prompt,实现自然聚合(如飞书转发+评论场景)。
|
|
354
|
+
// 不再写 _drain:容器无需退出重启,复用当前进程即可。
|
|
355
|
+
const inputDir = this.resolveIpcInputDir(state);
|
|
356
|
+
try {
|
|
357
|
+
fs.mkdirSync(inputDir, { recursive: true });
|
|
358
|
+
const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 6)}.json`;
|
|
359
|
+
const filepath = path.join(inputDir, filename);
|
|
360
|
+
const tempPath = `${filepath}.tmp`;
|
|
361
|
+
fs.writeFileSync(tempPath, JSON.stringify({ type: 'message', text, images }));
|
|
362
|
+
fs.renameSync(tempPath, filepath);
|
|
363
|
+
state.queryInFlight = true;
|
|
364
|
+
onInjected?.();
|
|
365
|
+
return 'sent';
|
|
366
|
+
}
|
|
367
|
+
catch {
|
|
368
|
+
return 'no_active';
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Signal the active container to wind down by writing a close sentinel.
|
|
373
|
+
*/
|
|
374
|
+
closeStdin(groupJid) {
|
|
375
|
+
const state = this.resolveActiveState(groupJid);
|
|
376
|
+
if (!state)
|
|
377
|
+
return;
|
|
378
|
+
const inputDir = this.resolveIpcInputDir(state);
|
|
379
|
+
try {
|
|
380
|
+
fs.mkdirSync(inputDir, { recursive: true });
|
|
381
|
+
fs.writeFileSync(path.join(inputDir, '_close'), '');
|
|
382
|
+
}
|
|
383
|
+
catch {
|
|
384
|
+
// ignore
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Remove leftover _drain and _close sentinel files from the IPC input
|
|
389
|
+
* directory. Called in finally blocks after a runner exits so that a
|
|
390
|
+
* subsequent runner for the same folder does not immediately see stale
|
|
391
|
+
* sentinels and exit prematurely.
|
|
392
|
+
*/
|
|
393
|
+
cleanupIpcSentinels(groupFolder, agentId, taskRunId) {
|
|
394
|
+
const inputDir = taskRunId
|
|
395
|
+
? path.join(DATA_DIR, 'ipc', groupFolder, 'tasks-run', taskRunId, 'input')
|
|
396
|
+
: agentId
|
|
397
|
+
? path.join(DATA_DIR, 'ipc', groupFolder, 'agents', agentId, 'input')
|
|
398
|
+
: path.join(DATA_DIR, 'ipc', groupFolder, 'input');
|
|
399
|
+
for (const name of ['_drain', '_close']) {
|
|
400
|
+
try {
|
|
401
|
+
fs.unlinkSync(path.join(inputDir, name));
|
|
402
|
+
}
|
|
403
|
+
catch {
|
|
404
|
+
// file may not exist – that's fine
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
/**
|
|
409
|
+
* Check if there are unconsumed IPC message files (.json) in the input directory.
|
|
410
|
+
* Called after process exit to detect messages written via sendMessage() that were
|
|
411
|
+
* never consumed due to a race condition (process exiting before reading IPC).
|
|
412
|
+
* See GitHub issue #240.
|
|
413
|
+
*/
|
|
414
|
+
/**
|
|
415
|
+
* Check for unconsumed IPC messages after agent/task exit and recover.
|
|
416
|
+
* Handles the race where sendMessage() wrote a file but the process
|
|
417
|
+
* exited before reading it (issue #240).
|
|
418
|
+
*/
|
|
419
|
+
recoverUnconsumedIpc(groupJid, state, context) {
|
|
420
|
+
if (!state.groupFolder)
|
|
421
|
+
return;
|
|
422
|
+
try {
|
|
423
|
+
if (!this.hasRemainingIpcMessages(state.groupFolder, state.agentId, state.taskRunId))
|
|
424
|
+
return;
|
|
425
|
+
if (state.agentId && this.onUnconsumedAgentIpcFn) {
|
|
426
|
+
logger.warn({ groupJid, agentId: state.agentId }, `Unconsumed IPC messages found after ${context}, re-enqueuing`);
|
|
427
|
+
this.onUnconsumedAgentIpcFn(groupJid, state.agentId);
|
|
428
|
+
}
|
|
429
|
+
else if (!state.taskRunId) {
|
|
430
|
+
state.pendingMessages = true;
|
|
431
|
+
logger.warn({ groupJid }, `Unconsumed IPC messages found after ${context}, marking pending`);
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
catch (err) {
|
|
435
|
+
logger.warn({ groupJid, err }, 'Failed to check remaining IPC messages');
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
hasRemainingIpcMessages(groupFolder, agentId, taskRunId) {
|
|
439
|
+
const inputDir = taskRunId
|
|
440
|
+
? path.join(DATA_DIR, 'ipc', groupFolder, 'tasks-run', taskRunId, 'input')
|
|
441
|
+
: agentId
|
|
442
|
+
? path.join(DATA_DIR, 'ipc', groupFolder, 'agents', agentId, 'input')
|
|
443
|
+
: path.join(DATA_DIR, 'ipc', groupFolder, 'input');
|
|
444
|
+
try {
|
|
445
|
+
const files = fs.readdirSync(inputDir);
|
|
446
|
+
return files.some((f) => f.endsWith('.json'));
|
|
447
|
+
}
|
|
448
|
+
catch {
|
|
449
|
+
return false;
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
/**
|
|
453
|
+
* Signal the active container to finish the current query and then exit.
|
|
454
|
+
* Unlike _close which exits immediately from waitForIpcMessage, _drain
|
|
455
|
+
* is only checked after the current query completes, ensuring one-question-
|
|
456
|
+
* one-answer semantics.
|
|
457
|
+
*/
|
|
458
|
+
writeDrainSentinel(state) {
|
|
459
|
+
const inputDir = this.resolveIpcInputDir(state);
|
|
460
|
+
try {
|
|
461
|
+
fs.mkdirSync(inputDir, { recursive: true });
|
|
462
|
+
fs.writeFileSync(path.join(inputDir, '_drain'), '');
|
|
463
|
+
return true;
|
|
464
|
+
}
|
|
465
|
+
catch {
|
|
466
|
+
return false;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* Close all active containers/processes so they restart with fresh credentials.
|
|
471
|
+
* Called after OAuth token refresh to ensure running agents pick up new tokens.
|
|
472
|
+
*/
|
|
473
|
+
closeAllActiveForCredentialRefresh() {
|
|
474
|
+
let closed = 0;
|
|
475
|
+
for (const [jid, state] of this.groups) {
|
|
476
|
+
if (state.active && state.groupFolder) {
|
|
477
|
+
const inputDir = this.resolveIpcInputDir(state);
|
|
478
|
+
try {
|
|
479
|
+
fs.mkdirSync(inputDir, { recursive: true });
|
|
480
|
+
fs.writeFileSync(path.join(inputDir, '_close'), '');
|
|
481
|
+
closed++;
|
|
482
|
+
logger.info({ groupJid: jid, groupFolder: state.groupFolder }, 'Sent close signal for credential refresh');
|
|
483
|
+
}
|
|
484
|
+
catch {
|
|
485
|
+
// ignore
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
if (closed > 0) {
|
|
490
|
+
logger.info({ closed }, 'Closed active containers/processes for credential refresh');
|
|
491
|
+
}
|
|
492
|
+
return closed;
|
|
493
|
+
}
|
|
494
|
+
/**
|
|
495
|
+
* Interrupt the current query for the same chat only (do not cross-interrupt
|
|
496
|
+
* sibling chats that share a serialized runner/folder).
|
|
497
|
+
*
|
|
498
|
+
* Writes a _interrupt sentinel that agent-runner detects and calls
|
|
499
|
+
* query.interrupt(). The container stays alive and accepts new messages.
|
|
500
|
+
*/
|
|
501
|
+
interruptQuery(groupJid) {
|
|
502
|
+
// Use resolveActiveState so sibling JIDs (feishu/telegram sharing the
|
|
503
|
+
// same folder as a web group) are correctly resolved to the active runner.
|
|
504
|
+
const state = this.resolveActiveState(groupJid);
|
|
505
|
+
if (!state)
|
|
506
|
+
return false;
|
|
507
|
+
this.clearRetryTimer(state);
|
|
508
|
+
const inputDir = this.resolveIpcInputDir(state);
|
|
509
|
+
try {
|
|
510
|
+
fs.mkdirSync(inputDir, { recursive: true });
|
|
511
|
+
try {
|
|
512
|
+
fs.chmodSync(inputDir, 0o777);
|
|
513
|
+
}
|
|
514
|
+
catch {
|
|
515
|
+
/* ignore */
|
|
516
|
+
}
|
|
517
|
+
fs.writeFileSync(path.join(inputDir, '_interrupt'), '');
|
|
518
|
+
logger.info({ groupJid, inputDir }, 'Interrupt sentinel written');
|
|
519
|
+
return true;
|
|
520
|
+
}
|
|
521
|
+
catch (err) {
|
|
522
|
+
logger.warn({ groupJid, inputDir, err }, 'Failed to write interrupt sentinel');
|
|
523
|
+
return false;
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
/**
|
|
527
|
+
* Force-stop a group's active container and clear queued work.
|
|
528
|
+
* Returns a promise that resolves when the container has fully exited
|
|
529
|
+
* (state.active becomes false), not just when docker stop completes.
|
|
530
|
+
*/
|
|
531
|
+
async stopGroup(groupJid, options) {
|
|
532
|
+
const force = options?.force ?? false;
|
|
533
|
+
const requestedState = this.getGroup(groupJid);
|
|
534
|
+
requestedState.pendingMessages = false;
|
|
535
|
+
requestedState.pendingTasks = [];
|
|
536
|
+
this.clearRetryTimer(requestedState);
|
|
537
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
538
|
+
const targetJid = activeRunner || groupJid;
|
|
539
|
+
const state = this.getGroup(targetJid);
|
|
540
|
+
if (targetJid !== groupJid) {
|
|
541
|
+
state.pendingMessages = false;
|
|
542
|
+
state.pendingTasks = [];
|
|
543
|
+
this.clearRetryTimer(state);
|
|
544
|
+
}
|
|
545
|
+
this.waitingGroups.delete(groupJid);
|
|
546
|
+
this.waitingGroups.delete(targetJid);
|
|
547
|
+
if (state.groupFolder) {
|
|
548
|
+
this.closeStdin(targetJid);
|
|
549
|
+
}
|
|
550
|
+
if (force) {
|
|
551
|
+
// Force mode: skip graceful stop, go straight to kill
|
|
552
|
+
if (state.containerName) {
|
|
553
|
+
const name = state.containerName;
|
|
554
|
+
await new Promise((resolve) => {
|
|
555
|
+
execFile('docker', ['kill', name], { timeout: 5000 }, () => resolve());
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
else if (state.process && !state.process.killed) {
|
|
559
|
+
killProcessTree(state.process, 'SIGKILL');
|
|
560
|
+
}
|
|
561
|
+
if (state.active) {
|
|
562
|
+
const start = Date.now();
|
|
563
|
+
while (state.active && Date.now() - start < 5000) {
|
|
564
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
else {
|
|
569
|
+
// Graceful mode: try SIGTERM/docker stop first
|
|
570
|
+
if (state.containerName) {
|
|
571
|
+
const name = state.containerName;
|
|
572
|
+
await new Promise((resolve) => {
|
|
573
|
+
execFile('docker', ['stop', name], { timeout: 10000 }, () => resolve());
|
|
574
|
+
});
|
|
575
|
+
}
|
|
576
|
+
else if (state.process && !state.process.killed) {
|
|
577
|
+
killProcessTree(state.process, 'SIGTERM');
|
|
578
|
+
}
|
|
579
|
+
// Wait for state.active to become false (runForGroup/runTask finally block)
|
|
580
|
+
if (state.active) {
|
|
581
|
+
const maxWait = 10000;
|
|
582
|
+
const start = Date.now();
|
|
583
|
+
while (state.active && Date.now() - start < maxWait) {
|
|
584
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
// Graceful stop timed out — force-kill the container
|
|
588
|
+
if (state.active && state.containerName) {
|
|
589
|
+
const killName = state.containerName;
|
|
590
|
+
logger.warn({ groupJid: targetJid, containerName: killName }, 'Graceful stop timed out, force-killing container');
|
|
591
|
+
await new Promise((resolve) => {
|
|
592
|
+
execFile('docker', ['kill', killName], { timeout: 5000 }, () => resolve());
|
|
593
|
+
});
|
|
594
|
+
const killStart = Date.now();
|
|
595
|
+
while (state.active && Date.now() - killStart < 5000) {
|
|
596
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
597
|
+
}
|
|
598
|
+
}
|
|
599
|
+
else if (state.active && state.process) {
|
|
600
|
+
killProcessTree(state.process, 'SIGKILL');
|
|
601
|
+
const killStart = Date.now();
|
|
602
|
+
while (state.active && Date.now() - killStart < 5000) {
|
|
603
|
+
await new Promise((r) => setTimeout(r, 100));
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
if (state.active) {
|
|
608
|
+
logger.error({ groupJid: targetJid }, 'Container still active after force-kill in stopGroup');
|
|
609
|
+
throw new Error(`Failed to stop container for group ${targetJid}`);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
/**
|
|
613
|
+
* Stop the running container, wait for it to finish, then start a new one.
|
|
614
|
+
*/
|
|
615
|
+
async restartGroup(groupJid) {
|
|
616
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
617
|
+
const targetJid = activeRunner || groupJid;
|
|
618
|
+
const state = this.getGroup(targetJid);
|
|
619
|
+
if (state.restarting) {
|
|
620
|
+
logger.warn({ groupJid: targetJid }, 'Restart already in progress, skipping');
|
|
621
|
+
return;
|
|
622
|
+
}
|
|
623
|
+
state.restarting = true;
|
|
624
|
+
try {
|
|
625
|
+
if (state.groupFolder) {
|
|
626
|
+
this.closeStdin(targetJid);
|
|
627
|
+
}
|
|
628
|
+
// Give agent-runner time to detect _close sentinel and exit gracefully
|
|
629
|
+
// before sending SIGTERM. The IPC poll interval is 500ms, so 2s is
|
|
630
|
+
// generous enough for the agent to finish its current operation and
|
|
631
|
+
// emit the final session ID.
|
|
632
|
+
if (state.groupFolder && !state.containerName) {
|
|
633
|
+
const graceStart = Date.now();
|
|
634
|
+
while (state.active && Date.now() - graceStart < 2000) {
|
|
635
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
636
|
+
}
|
|
637
|
+
}
|
|
638
|
+
// Stop docker container / host process
|
|
639
|
+
if (state.containerName) {
|
|
640
|
+
const name = state.containerName;
|
|
641
|
+
await new Promise((resolve) => {
|
|
642
|
+
execFile('docker', ['stop', name], { timeout: 15000 }, () => resolve());
|
|
643
|
+
});
|
|
644
|
+
}
|
|
645
|
+
else if (state.active && state.process && !state.process.killed) {
|
|
646
|
+
killProcessTree(state.process, 'SIGTERM');
|
|
647
|
+
}
|
|
648
|
+
// Wait for runForGroup to finish and reset state
|
|
649
|
+
const maxWait = 20000;
|
|
650
|
+
const start = Date.now();
|
|
651
|
+
while (state.active && Date.now() - start < maxWait) {
|
|
652
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
653
|
+
}
|
|
654
|
+
if (state.active) {
|
|
655
|
+
logger.warn({ groupJid: targetJid }, 'Timeout waiting for container to stop, force-killing');
|
|
656
|
+
// Force-kill the container to avoid conflicts with the new one
|
|
657
|
+
if (state.containerName) {
|
|
658
|
+
const killName = state.containerName;
|
|
659
|
+
await new Promise((resolve) => {
|
|
660
|
+
execFile('docker', ['kill', killName], { timeout: 5000 }, () => resolve());
|
|
661
|
+
});
|
|
662
|
+
// Brief wait for process cleanup after force-kill
|
|
663
|
+
const killStart = Date.now();
|
|
664
|
+
while (state.active && Date.now() - killStart < 5000) {
|
|
665
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
else if (state.process) {
|
|
669
|
+
killProcessTree(state.process, 'SIGKILL');
|
|
670
|
+
const killStart = Date.now();
|
|
671
|
+
while (state.active && Date.now() - killStart < 5000) {
|
|
672
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
if (state.active) {
|
|
677
|
+
logger.error({ groupJid: targetJid }, 'Container still active after force-kill in restartGroup');
|
|
678
|
+
throw new Error(`Failed to restart container for group ${targetJid}`);
|
|
679
|
+
}
|
|
680
|
+
// Trigger a fresh container start
|
|
681
|
+
logger.info({ groupJid: targetJid }, 'Restarting container');
|
|
682
|
+
this.enqueueMessageCheck(groupJid);
|
|
683
|
+
}
|
|
684
|
+
finally {
|
|
685
|
+
state.restarting = false;
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
async runForGroup(groupJid, reason) {
|
|
689
|
+
const state = this.getGroup(groupJid);
|
|
690
|
+
const isHostMode = this.isHostMode(groupJid);
|
|
691
|
+
state.active = true;
|
|
692
|
+
state.activeRunnerIsTask = false;
|
|
693
|
+
state.lastActivityAt = Date.now();
|
|
694
|
+
state.queryInFlight = true;
|
|
695
|
+
state.pendingMessages = false;
|
|
696
|
+
this.waitingGroups.delete(groupJid);
|
|
697
|
+
this.activeCount++;
|
|
698
|
+
if (isHostMode) {
|
|
699
|
+
this.activeHostProcessCount++;
|
|
700
|
+
}
|
|
701
|
+
else {
|
|
702
|
+
this.activeContainerCount++;
|
|
703
|
+
}
|
|
704
|
+
logger.debug({
|
|
705
|
+
groupJid,
|
|
706
|
+
reason,
|
|
707
|
+
activeCount: this.activeCount,
|
|
708
|
+
activeContainerCount: this.activeContainerCount,
|
|
709
|
+
}, 'Starting container for group');
|
|
710
|
+
try {
|
|
711
|
+
this.onRunnerStateChangeFn?.(groupJid, 'running');
|
|
712
|
+
}
|
|
713
|
+
catch (err) {
|
|
714
|
+
logger.error({ groupJid, err }, 'onRunnerStateChange(running) failed');
|
|
715
|
+
}
|
|
716
|
+
try {
|
|
717
|
+
if (this.processMessagesFn) {
|
|
718
|
+
const success = await this.processMessagesFn(groupJid);
|
|
719
|
+
if (success) {
|
|
720
|
+
state.retryCount = 0;
|
|
721
|
+
// Defensive: clear any lingering retry timer from a previous failed
|
|
722
|
+
// run that was superseded by a successful drain-triggered run.
|
|
723
|
+
this.clearRetryTimer(state);
|
|
724
|
+
}
|
|
725
|
+
else {
|
|
726
|
+
this.scheduleRetry(groupJid, state);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
catch (err) {
|
|
731
|
+
logger.error({ groupJid, err }, 'Error processing messages for group');
|
|
732
|
+
this.scheduleRetry(groupJid, state);
|
|
733
|
+
}
|
|
734
|
+
finally {
|
|
735
|
+
// Clean up stale sentinel files before clearing groupFolder/agentId
|
|
736
|
+
if (state.groupFolder) {
|
|
737
|
+
try {
|
|
738
|
+
this.cleanupIpcSentinels(state.groupFolder, state.agentId, state.taskRunId);
|
|
739
|
+
}
|
|
740
|
+
catch (err) {
|
|
741
|
+
logger.warn({ groupJid, err }, 'Failed to clean up IPC sentinels');
|
|
742
|
+
}
|
|
743
|
+
this.recoverUnconsumedIpc(groupJid, state, 'agent exit');
|
|
744
|
+
}
|
|
745
|
+
// If messages were IPC-injected during this run, always mark pending
|
|
746
|
+
// so drainGroup triggers a fresh processGroupMessages. If the agent
|
|
747
|
+
// already replied to them, processGroupMessages will find 0 new messages
|
|
748
|
+
// (cursor was committed) and return immediately — harmless. If the
|
|
749
|
+
// agent crashed, this ensures the messages are re-read from DB.
|
|
750
|
+
if (state.hasIpcInjectedMessages) {
|
|
751
|
+
state.pendingMessages = true;
|
|
752
|
+
logger.debug({ groupJid }, 'IPC-injected messages detected, marking pending for safety re-check');
|
|
753
|
+
}
|
|
754
|
+
state.active = false;
|
|
755
|
+
state.drainSentinelWritten = false;
|
|
756
|
+
state.hasIpcInjectedMessages = false;
|
|
757
|
+
state.lastActivityAt = null;
|
|
758
|
+
state.queryInFlight = false;
|
|
759
|
+
state.process = null;
|
|
760
|
+
state.containerName = null;
|
|
761
|
+
state.displayName = null;
|
|
762
|
+
state.groupFolder = null;
|
|
763
|
+
state.agentId = null;
|
|
764
|
+
state.taskRunId = null;
|
|
765
|
+
this.activeCount--;
|
|
766
|
+
if (isHostMode) {
|
|
767
|
+
this.activeHostProcessCount--;
|
|
768
|
+
}
|
|
769
|
+
else {
|
|
770
|
+
this.activeContainerCount--;
|
|
771
|
+
}
|
|
772
|
+
try {
|
|
773
|
+
this.onRunnerStateChangeFn?.(groupJid, 'idle');
|
|
774
|
+
}
|
|
775
|
+
catch (err) {
|
|
776
|
+
logger.error({ groupJid, err }, 'onRunnerStateChange(idle) failed');
|
|
777
|
+
}
|
|
778
|
+
try {
|
|
779
|
+
this.onContainerExitFn?.(groupJid);
|
|
780
|
+
}
|
|
781
|
+
catch (err) {
|
|
782
|
+
logger.error({ groupJid, err }, 'onContainerExit callback failed');
|
|
783
|
+
}
|
|
784
|
+
try {
|
|
785
|
+
this.drainGroup(groupJid);
|
|
786
|
+
}
|
|
787
|
+
catch (err) {
|
|
788
|
+
logger.error({ groupJid, err }, 'drainGroup failed');
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
async runTask(groupJid, task) {
|
|
793
|
+
const state = this.getGroup(groupJid);
|
|
794
|
+
const isHostMode = this.isHostMode(groupJid);
|
|
795
|
+
state.active = true;
|
|
796
|
+
state.activeRunnerIsTask = true;
|
|
797
|
+
state.lastActivityAt = Date.now();
|
|
798
|
+
state.queryInFlight = false;
|
|
799
|
+
this.waitingGroups.delete(groupJid);
|
|
800
|
+
this.activeCount++;
|
|
801
|
+
if (isHostMode) {
|
|
802
|
+
this.activeHostProcessCount++;
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
this.activeContainerCount++;
|
|
806
|
+
}
|
|
807
|
+
logger.debug({
|
|
808
|
+
groupJid,
|
|
809
|
+
taskId: task.id,
|
|
810
|
+
activeCount: this.activeCount,
|
|
811
|
+
activeContainerCount: this.activeContainerCount,
|
|
812
|
+
}, 'Running queued task');
|
|
813
|
+
try {
|
|
814
|
+
this.onRunnerStateChangeFn?.(groupJid, 'running');
|
|
815
|
+
}
|
|
816
|
+
catch (err) {
|
|
817
|
+
logger.error({ groupJid, err }, 'onRunnerStateChange(running) failed');
|
|
818
|
+
}
|
|
819
|
+
try {
|
|
820
|
+
await task.fn();
|
|
821
|
+
}
|
|
822
|
+
catch (err) {
|
|
823
|
+
logger.error({ groupJid, taskId: task.id, err }, 'Error running task');
|
|
824
|
+
}
|
|
825
|
+
finally {
|
|
826
|
+
// Clean up stale sentinel files before clearing groupFolder/agentId
|
|
827
|
+
if (state.groupFolder) {
|
|
828
|
+
try {
|
|
829
|
+
this.cleanupIpcSentinels(state.groupFolder, state.agentId, state.taskRunId);
|
|
830
|
+
}
|
|
831
|
+
catch (err) {
|
|
832
|
+
logger.warn({ groupJid, err }, 'Failed to clean up IPC sentinels');
|
|
833
|
+
}
|
|
834
|
+
this.recoverUnconsumedIpc(groupJid, state, 'task exit');
|
|
835
|
+
}
|
|
836
|
+
state.active = false;
|
|
837
|
+
state.activeRunnerIsTask = false;
|
|
838
|
+
state.drainSentinelWritten = false;
|
|
839
|
+
state.lastActivityAt = null;
|
|
840
|
+
state.queryInFlight = false;
|
|
841
|
+
state.process = null;
|
|
842
|
+
state.containerName = null;
|
|
843
|
+
state.displayName = null;
|
|
844
|
+
state.groupFolder = null;
|
|
845
|
+
state.agentId = null;
|
|
846
|
+
state.taskRunId = null;
|
|
847
|
+
this.activeCount--;
|
|
848
|
+
if (isHostMode) {
|
|
849
|
+
this.activeHostProcessCount--;
|
|
850
|
+
}
|
|
851
|
+
else {
|
|
852
|
+
this.activeContainerCount--;
|
|
853
|
+
}
|
|
854
|
+
try {
|
|
855
|
+
this.onRunnerStateChangeFn?.(groupJid, 'idle');
|
|
856
|
+
}
|
|
857
|
+
catch (err) {
|
|
858
|
+
logger.error({ groupJid, err }, 'onRunnerStateChange(idle) failed');
|
|
859
|
+
}
|
|
860
|
+
try {
|
|
861
|
+
this.onContainerExitFn?.(groupJid);
|
|
862
|
+
}
|
|
863
|
+
catch (err) {
|
|
864
|
+
logger.error({ groupJid, err }, 'onContainerExit callback failed');
|
|
865
|
+
}
|
|
866
|
+
try {
|
|
867
|
+
this.drainGroup(groupJid);
|
|
868
|
+
}
|
|
869
|
+
catch (err) {
|
|
870
|
+
logger.error({ groupJid, err }, 'drainGroup failed');
|
|
871
|
+
}
|
|
872
|
+
}
|
|
873
|
+
}
|
|
874
|
+
scheduleRetry(groupJid, state) {
|
|
875
|
+
// 清除可能存在的旧定时器(不重置 retryCount,因为这里在递增)
|
|
876
|
+
if (state.retryTimer !== null) {
|
|
877
|
+
clearTimeout(state.retryTimer);
|
|
878
|
+
state.retryTimer = null;
|
|
879
|
+
}
|
|
880
|
+
// 检查是否为上下文溢出错误,如果是则跳过重试
|
|
881
|
+
if (this.contextOverflowGroups.has(groupJid)) {
|
|
882
|
+
logger.warn({ groupJid }, 'Skipping retry for context overflow error (agent already retried 3 times)');
|
|
883
|
+
state.retryCount = 0;
|
|
884
|
+
this.contextOverflowGroups.delete(groupJid); // 清除标记
|
|
885
|
+
return;
|
|
886
|
+
}
|
|
887
|
+
state.retryCount++;
|
|
888
|
+
if (state.retryCount > MAX_RETRIES) {
|
|
889
|
+
logger.error({ groupJid, retryCount: state.retryCount }, 'Max retries exceeded, dropping messages (will retry on next incoming message)');
|
|
890
|
+
state.retryCount = 0;
|
|
891
|
+
try {
|
|
892
|
+
this.onMaxRetriesExceededFn?.(groupJid);
|
|
893
|
+
}
|
|
894
|
+
catch (err) {
|
|
895
|
+
logger.error({ groupJid, err }, 'onMaxRetriesExceeded callback failed');
|
|
896
|
+
}
|
|
897
|
+
return;
|
|
898
|
+
}
|
|
899
|
+
const delayMs = BASE_RETRY_MS * Math.pow(2, state.retryCount - 1);
|
|
900
|
+
logger.info({ groupJid, retryCount: state.retryCount, delayMs }, 'Scheduling retry with backoff');
|
|
901
|
+
state.retryTimer = setTimeout(() => {
|
|
902
|
+
state.retryTimer = null;
|
|
903
|
+
if (!this.shuttingDown) {
|
|
904
|
+
this.enqueueMessageCheck(groupJid);
|
|
905
|
+
}
|
|
906
|
+
}, delayMs);
|
|
907
|
+
}
|
|
908
|
+
drainGroup(groupJid) {
|
|
909
|
+
if (this.shuttingDown)
|
|
910
|
+
return;
|
|
911
|
+
const state = this.getGroup(groupJid);
|
|
912
|
+
const activeRunner = this.findActiveRunnerFor(groupJid);
|
|
913
|
+
if (activeRunner && activeRunner !== groupJid) {
|
|
914
|
+
this.waitingGroups.add(groupJid);
|
|
915
|
+
return;
|
|
916
|
+
}
|
|
917
|
+
if (!this.hasCapacityFor(groupJid)) {
|
|
918
|
+
this.waitingGroups.add(groupJid);
|
|
919
|
+
return;
|
|
920
|
+
}
|
|
921
|
+
// Tasks first (they won't be re-discovered from SQLite like messages)
|
|
922
|
+
while (state.pendingTasks.length > 0) {
|
|
923
|
+
const task = state.pendingTasks.shift();
|
|
924
|
+
// Check if scheduled task is still active before occupying a slot.
|
|
925
|
+
// Only skip tasks that exist in the DB and are no longer active.
|
|
926
|
+
// Dynamic tasks (agent conversations, etc.) don't have DB entries
|
|
927
|
+
// and must always be allowed to run.
|
|
928
|
+
const dbTask = getTaskById(task.id);
|
|
929
|
+
if (dbTask && dbTask.status !== 'active') {
|
|
930
|
+
logger.info({ groupJid, taskId: task.id }, 'Skipping cancelled/deleted task during drain');
|
|
931
|
+
continue;
|
|
932
|
+
}
|
|
933
|
+
this.runTask(groupJid, task);
|
|
934
|
+
return;
|
|
935
|
+
}
|
|
936
|
+
// Then pending messages — but NOT if a retry timer is already scheduled.
|
|
937
|
+
// When processMessagesFn() fails, both scheduleRetry() and drainGroup() fire.
|
|
938
|
+
// Without this guard, drainGroup would start a new container while the retry
|
|
939
|
+
// timer later starts another, causing duplicate processing of the same messages.
|
|
940
|
+
if (state.pendingMessages && !state.retryTimer) {
|
|
941
|
+
this.runForGroup(groupJid, 'drain');
|
|
942
|
+
return;
|
|
943
|
+
}
|
|
944
|
+
this.waitingGroups.delete(groupJid);
|
|
945
|
+
// Nothing pending for this group; check if other groups are waiting for a slot
|
|
946
|
+
this.drainWaiting();
|
|
947
|
+
}
|
|
948
|
+
drainWaiting() {
|
|
949
|
+
// Drain waiting groups one at a time, re-checking capacity after each launch.
|
|
950
|
+
// runTask/runForGroup increment counters synchronously, so capacity checks
|
|
951
|
+
// stay accurate even though the async work is not awaited.
|
|
952
|
+
const candidates = [...this.waitingGroups];
|
|
953
|
+
for (const jid of candidates) {
|
|
954
|
+
const activeRunner = this.findActiveRunnerFor(jid);
|
|
955
|
+
if (activeRunner && activeRunner !== jid)
|
|
956
|
+
continue;
|
|
957
|
+
if (!this.hasCapacityFor(jid))
|
|
958
|
+
continue;
|
|
959
|
+
this.waitingGroups.delete(jid);
|
|
960
|
+
const state = this.getGroup(jid);
|
|
961
|
+
// Prioritize tasks over messages
|
|
962
|
+
if (state.pendingTasks.length > 0) {
|
|
963
|
+
// Skip cancelled/deleted scheduled tasks (but allow dynamic tasks
|
|
964
|
+
// like agent conversations that have no DB entry).
|
|
965
|
+
let validTask;
|
|
966
|
+
while (state.pendingTasks.length > 0) {
|
|
967
|
+
const candidate = state.pendingTasks.shift();
|
|
968
|
+
const dbTask = getTaskById(candidate.id);
|
|
969
|
+
if (dbTask && dbTask.status !== 'active') {
|
|
970
|
+
logger.info({ groupJid: jid, taskId: candidate.id }, 'Skipping cancelled/deleted task during drainWaiting');
|
|
971
|
+
continue;
|
|
972
|
+
}
|
|
973
|
+
validTask = candidate;
|
|
974
|
+
break;
|
|
975
|
+
}
|
|
976
|
+
if (validTask) {
|
|
977
|
+
this.runTask(jid, validTask);
|
|
978
|
+
}
|
|
979
|
+
else if (state.pendingMessages && !state.retryTimer) {
|
|
980
|
+
// All tasks were stale, fall through to messages
|
|
981
|
+
// (skip if retry timer is pending to avoid duplicate processing)
|
|
982
|
+
this.runForGroup(jid, 'drain');
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
else if (state.pendingMessages && !state.retryTimer) {
|
|
986
|
+
// Skip if retry timer is pending to avoid duplicate processing
|
|
987
|
+
this.runForGroup(jid, 'drain');
|
|
988
|
+
}
|
|
989
|
+
// If neither pending, skip this group
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
getStatus() {
|
|
993
|
+
const groups = [];
|
|
994
|
+
for (const [jid, state] of this.groups) {
|
|
995
|
+
groups.push({
|
|
996
|
+
jid,
|
|
997
|
+
active: state.active,
|
|
998
|
+
pendingMessages: state.pendingMessages,
|
|
999
|
+
pendingTasks: state.pendingTasks.length,
|
|
1000
|
+
containerName: state.containerName,
|
|
1001
|
+
displayName: state.displayName,
|
|
1002
|
+
});
|
|
1003
|
+
}
|
|
1004
|
+
return {
|
|
1005
|
+
activeCount: this.activeCount,
|
|
1006
|
+
activeContainerCount: this.activeContainerCount,
|
|
1007
|
+
activeHostProcessCount: this.activeHostProcessCount,
|
|
1008
|
+
waitingCount: this.waitingGroups.size,
|
|
1009
|
+
waitingGroupJids: Array.from(this.waitingGroups),
|
|
1010
|
+
groups,
|
|
1011
|
+
};
|
|
1012
|
+
}
|
|
1013
|
+
async shutdown(gracePeriodMs) {
|
|
1014
|
+
this.shuttingDown = true;
|
|
1015
|
+
// 清除所有待执行的重试定时器,防止关闭期间容器重启
|
|
1016
|
+
for (const state of this.groups.values()) {
|
|
1017
|
+
this.clearRetryTimer(state);
|
|
1018
|
+
}
|
|
1019
|
+
logger.info({
|
|
1020
|
+
activeCount: this.activeCount,
|
|
1021
|
+
activeContainerCount: this.activeContainerCount,
|
|
1022
|
+
gracePeriodMs,
|
|
1023
|
+
}, 'GroupQueue shutting down, waiting for containers');
|
|
1024
|
+
// Wait for activeCount to reach zero or timeout
|
|
1025
|
+
const startTime = Date.now();
|
|
1026
|
+
while (this.activeCount > 0 && Date.now() - startTime < gracePeriodMs) {
|
|
1027
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1028
|
+
}
|
|
1029
|
+
// If still active after grace period, force stop all containers
|
|
1030
|
+
if (this.activeCount > 0) {
|
|
1031
|
+
logger.warn({
|
|
1032
|
+
activeCount: this.activeCount,
|
|
1033
|
+
activeContainerCount: this.activeContainerCount,
|
|
1034
|
+
}, 'Grace period expired, force stopping containers');
|
|
1035
|
+
const stopPromises = [];
|
|
1036
|
+
for (const [jid, state] of this.groups) {
|
|
1037
|
+
if (state.containerName) {
|
|
1038
|
+
const containerName = state.containerName;
|
|
1039
|
+
const promise = new Promise((resolve) => {
|
|
1040
|
+
execFile('docker', ['stop', '-t', '5', containerName], { timeout: 10000 }, (err) => {
|
|
1041
|
+
if (err) {
|
|
1042
|
+
logger.error({ jid, containerName, err }, 'Failed to stop container');
|
|
1043
|
+
}
|
|
1044
|
+
resolve();
|
|
1045
|
+
});
|
|
1046
|
+
});
|
|
1047
|
+
stopPromises.push(promise);
|
|
1048
|
+
}
|
|
1049
|
+
else if (state.process && !state.process.killed) {
|
|
1050
|
+
const proc = state.process;
|
|
1051
|
+
const promise = new Promise((resolve) => {
|
|
1052
|
+
if (!killProcessTree(proc, 'SIGTERM')) {
|
|
1053
|
+
resolve();
|
|
1054
|
+
return;
|
|
1055
|
+
}
|
|
1056
|
+
setTimeout(() => {
|
|
1057
|
+
if (proc.exitCode === null && proc.signalCode === null) {
|
|
1058
|
+
killProcessTree(proc, 'SIGKILL');
|
|
1059
|
+
}
|
|
1060
|
+
resolve();
|
|
1061
|
+
}, 3000);
|
|
1062
|
+
});
|
|
1063
|
+
stopPromises.push(promise);
|
|
1064
|
+
}
|
|
1065
|
+
}
|
|
1066
|
+
await Promise.all(stopPromises);
|
|
1067
|
+
}
|
|
1068
|
+
logger.info({ activeCount: this.activeCount }, 'GroupQueue shutdown complete');
|
|
1069
|
+
}
|
|
1070
|
+
}
|