dominds 1.16.4 → 1.16.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. package/dist/dialog-display-state.d.ts +1 -0
  2. package/dist/dialog-display-state.js +71 -2
  3. package/dist/docs/mcp-support.md +6 -2
  4. package/dist/llm/api-quirks.js +34 -6
  5. package/dist/llm/client.d.ts +1 -1
  6. package/dist/llm/defaults.yaml +6 -3
  7. package/dist/llm/gen/failure-classifier.d.ts +0 -2
  8. package/dist/llm/gen/failure-classifier.js +92 -25
  9. package/dist/llm/kernel-driver/drive.js +8 -8
  10. package/dist/llm/kernel-driver/runtime.d.ts +1 -1
  11. package/dist/llm/kernel-driver/runtime.js +63 -34
  12. package/dist/server/websocket-handler.js +90 -3
  13. package/dist/tools/team_mgmt.js +2 -2
  14. package/package.json +3 -3
  15. package/webapp/dist/assets/{_basePickBy-BKLfvXfr.js → _basePickBy-B7FV6Gnn.js} +3 -3
  16. package/webapp/dist/assets/{_basePickBy-BKLfvXfr.js.map → _basePickBy-B7FV6Gnn.js.map} +1 -1
  17. package/webapp/dist/assets/{_baseUniq-DeO2MBcA.js → _baseUniq-CmVnLJpw.js} +2 -2
  18. package/webapp/dist/assets/{_baseUniq-DeO2MBcA.js.map → _baseUniq-CmVnLJpw.js.map} +1 -1
  19. package/webapp/dist/assets/{arc-Bez5-ouI.js → arc-DSJlh9AU.js} +2 -2
  20. package/webapp/dist/assets/{arc-Bez5-ouI.js.map → arc-DSJlh9AU.js.map} +1 -1
  21. package/webapp/dist/assets/{architectureDiagram-2XIMDMQ5-BiDh8CGJ.js → architectureDiagram-2XIMDMQ5-Cd2cTHzo.js} +7 -7
  22. package/webapp/dist/assets/{architectureDiagram-2XIMDMQ5-BiDh8CGJ.js.map → architectureDiagram-2XIMDMQ5-Cd2cTHzo.js.map} +1 -1
  23. package/webapp/dist/assets/{blockDiagram-WCTKOSBZ-fSZbZ3PY.js → blockDiagram-WCTKOSBZ-C0uEp_Tz.js} +7 -7
  24. package/webapp/dist/assets/{blockDiagram-WCTKOSBZ-fSZbZ3PY.js.map → blockDiagram-WCTKOSBZ-C0uEp_Tz.js.map} +1 -1
  25. package/webapp/dist/assets/{c4Diagram-IC4MRINW-C-WxkPD_.js → c4Diagram-IC4MRINW-B37U92JK.js} +3 -3
  26. package/webapp/dist/assets/{c4Diagram-IC4MRINW-C-WxkPD_.js.map → c4Diagram-IC4MRINW-B37U92JK.js.map} +1 -1
  27. package/webapp/dist/assets/{channel-CbXK2-c_.js → channel-DLHnjnQf.js} +2 -2
  28. package/webapp/dist/assets/{channel-CbXK2-c_.js.map → channel-DLHnjnQf.js.map} +1 -1
  29. package/webapp/dist/assets/{chunk-4BX2VUAB-D6CgMaUm.js → chunk-4BX2VUAB-DnZvfQyp.js} +2 -2
  30. package/webapp/dist/assets/{chunk-4BX2VUAB-D6CgMaUm.js.map → chunk-4BX2VUAB-DnZvfQyp.js.map} +1 -1
  31. package/webapp/dist/assets/{chunk-55IACEB6-CafQjmEn.js → chunk-55IACEB6-BG-cjz3M.js} +2 -2
  32. package/webapp/dist/assets/{chunk-55IACEB6-CafQjmEn.js.map → chunk-55IACEB6-BG-cjz3M.js.map} +1 -1
  33. package/webapp/dist/assets/{chunk-FMBD7UC4-CZRe1oW9.js → chunk-FMBD7UC4-B1L8cPfl.js} +2 -2
  34. package/webapp/dist/assets/{chunk-FMBD7UC4-CZRe1oW9.js.map → chunk-FMBD7UC4-B1L8cPfl.js.map} +1 -1
  35. package/webapp/dist/assets/{chunk-JSJVCQXG-C3KJDde2.js → chunk-JSJVCQXG-C65w23ZF.js} +2 -2
  36. package/webapp/dist/assets/{chunk-JSJVCQXG-C3KJDde2.js.map → chunk-JSJVCQXG-C65w23ZF.js.map} +1 -1
  37. package/webapp/dist/assets/{chunk-KX2RTZJC-1gHfMJyM.js → chunk-KX2RTZJC-_4YSMrEL.js} +2 -2
  38. package/webapp/dist/assets/{chunk-KX2RTZJC-1gHfMJyM.js.map → chunk-KX2RTZJC-_4YSMrEL.js.map} +1 -1
  39. package/webapp/dist/assets/{chunk-NQ4KR5QH-CEZ0VIio.js → chunk-NQ4KR5QH-ComURSQb.js} +4 -4
  40. package/webapp/dist/assets/{chunk-NQ4KR5QH-CEZ0VIio.js.map → chunk-NQ4KR5QH-ComURSQb.js.map} +1 -1
  41. package/webapp/dist/assets/{chunk-QZHKN3VN-BE5nbumW.js → chunk-QZHKN3VN-BrxvuRI6.js} +2 -2
  42. package/webapp/dist/assets/{chunk-QZHKN3VN-BE5nbumW.js.map → chunk-QZHKN3VN-BrxvuRI6.js.map} +1 -1
  43. package/webapp/dist/assets/{chunk-WL4C6EOR-CY1FogYe.js → chunk-WL4C6EOR-CW7tAF_z.js} +6 -6
  44. package/webapp/dist/assets/{chunk-WL4C6EOR-CY1FogYe.js.map → chunk-WL4C6EOR-CW7tAF_z.js.map} +1 -1
  45. package/webapp/dist/assets/{classDiagram-VBA2DB6C-DIAo1m4U.js → classDiagram-VBA2DB6C-BWrPbwfd.js} +7 -7
  46. package/webapp/dist/assets/{classDiagram-VBA2DB6C-DIAo1m4U.js.map → classDiagram-VBA2DB6C-BWrPbwfd.js.map} +1 -1
  47. package/webapp/dist/assets/{classDiagram-v2-RAHNMMFH-DIAo1m4U.js → classDiagram-v2-RAHNMMFH-BWrPbwfd.js} +7 -7
  48. package/webapp/dist/assets/{classDiagram-v2-RAHNMMFH-DIAo1m4U.js.map → classDiagram-v2-RAHNMMFH-BWrPbwfd.js.map} +1 -1
  49. package/webapp/dist/assets/{clone-BzZfwhKW.js → clone-B-TYPsxN.js} +2 -2
  50. package/webapp/dist/assets/{clone-BzZfwhKW.js.map → clone-B-TYPsxN.js.map} +1 -1
  51. package/webapp/dist/assets/{cose-bilkent-S5V4N54A-DIRdD9UY.js → cose-bilkent-S5V4N54A-C1bfZcVY.js} +2 -2
  52. package/webapp/dist/assets/{cose-bilkent-S5V4N54A-DIRdD9UY.js.map → cose-bilkent-S5V4N54A-C1bfZcVY.js.map} +1 -1
  53. package/webapp/dist/assets/{dagre-KLK3FWXG-BCEgv7zL.js → dagre-KLK3FWXG-BfD08d2e.js} +7 -7
  54. package/webapp/dist/assets/{dagre-KLK3FWXG-BCEgv7zL.js.map → dagre-KLK3FWXG-BfD08d2e.js.map} +1 -1
  55. package/webapp/dist/assets/{diagram-E7M64L7V-CwNdHUlg.js → diagram-E7M64L7V-Dyd9OucT.js} +8 -8
  56. package/webapp/dist/assets/{diagram-E7M64L7V-CwNdHUlg.js.map → diagram-E7M64L7V-Dyd9OucT.js.map} +1 -1
  57. package/webapp/dist/assets/{diagram-IFDJBPK2-DBtRFFBv.js → diagram-IFDJBPK2-BN-JCceb.js} +7 -7
  58. package/webapp/dist/assets/{diagram-IFDJBPK2-DBtRFFBv.js.map → diagram-IFDJBPK2-BN-JCceb.js.map} +1 -1
  59. package/webapp/dist/assets/{diagram-P4PSJMXO-BJRf8VnU.js → diagram-P4PSJMXO-C2jh_Kry.js} +7 -7
  60. package/webapp/dist/assets/{diagram-P4PSJMXO-BJRf8VnU.js.map → diagram-P4PSJMXO-C2jh_Kry.js.map} +1 -1
  61. package/webapp/dist/assets/{erDiagram-INFDFZHY-BoZdWdP2.js → erDiagram-INFDFZHY-C3Pj10OJ.js} +5 -5
  62. package/webapp/dist/assets/{erDiagram-INFDFZHY-BoZdWdP2.js.map → erDiagram-INFDFZHY-C3Pj10OJ.js.map} +1 -1
  63. package/webapp/dist/assets/{flowDiagram-PKNHOUZH-Dptcw76l.js → flowDiagram-PKNHOUZH-a5vzSSCo.js} +7 -7
  64. package/webapp/dist/assets/{flowDiagram-PKNHOUZH-Dptcw76l.js.map → flowDiagram-PKNHOUZH-a5vzSSCo.js.map} +1 -1
  65. package/webapp/dist/assets/{ganttDiagram-A5KZAMGK-qM3zsgxI.js → ganttDiagram-A5KZAMGK-DIua0Qjr.js} +3 -3
  66. package/webapp/dist/assets/{ganttDiagram-A5KZAMGK-qM3zsgxI.js.map → ganttDiagram-A5KZAMGK-DIua0Qjr.js.map} +1 -1
  67. package/webapp/dist/assets/{gitGraphDiagram-K3NZZRJ6-125S1YW0.js → gitGraphDiagram-K3NZZRJ6-CJt16FXK.js} +8 -8
  68. package/webapp/dist/assets/{gitGraphDiagram-K3NZZRJ6-125S1YW0.js.map → gitGraphDiagram-K3NZZRJ6-CJt16FXK.js.map} +1 -1
  69. package/webapp/dist/assets/{graph-PACfG8qk.js → graph-lQSuHjYm.js} +3 -3
  70. package/webapp/dist/assets/{graph-PACfG8qk.js.map → graph-lQSuHjYm.js.map} +1 -1
  71. package/webapp/dist/assets/{index-D8Klc1n-.js → index-B7llu28V.js} +198 -44
  72. package/webapp/dist/assets/{index-D8Klc1n-.js.map → index-B7llu28V.js.map} +1 -1
  73. package/webapp/dist/assets/{infoDiagram-LFFYTUFH-B9vrFy_9.js → infoDiagram-LFFYTUFH-DzJuXz5H.js} +6 -6
  74. package/webapp/dist/assets/{infoDiagram-LFFYTUFH-B9vrFy_9.js.map → infoDiagram-LFFYTUFH-DzJuXz5H.js.map} +1 -1
  75. package/webapp/dist/assets/{ishikawaDiagram-PHBUUO56-CzcXR0Tc.js → ishikawaDiagram-PHBUUO56-BP_s17vw.js} +2 -2
  76. package/webapp/dist/assets/{ishikawaDiagram-PHBUUO56-CzcXR0Tc.js.map → ishikawaDiagram-PHBUUO56-BP_s17vw.js.map} +1 -1
  77. package/webapp/dist/assets/{journeyDiagram-4ABVD52K-BzoWs6ft.js → journeyDiagram-4ABVD52K-Lic1mhBM.js} +5 -5
  78. package/webapp/dist/assets/{journeyDiagram-4ABVD52K-BzoWs6ft.js.map → journeyDiagram-4ABVD52K-Lic1mhBM.js.map} +1 -1
  79. package/webapp/dist/assets/{kanban-definition-K7BYSVSG-TJm1UiSH.js → kanban-definition-K7BYSVSG-Baf2kCwQ.js} +3 -3
  80. package/webapp/dist/assets/{kanban-definition-K7BYSVSG-TJm1UiSH.js.map → kanban-definition-K7BYSVSG-Baf2kCwQ.js.map} +1 -1
  81. package/webapp/dist/assets/{layout-D-kg27bk.js → layout-DUMDc8rv.js} +5 -5
  82. package/webapp/dist/assets/{layout-D-kg27bk.js.map → layout-DUMDc8rv.js.map} +1 -1
  83. package/webapp/dist/assets/{linear-l0qAHpRW.js → linear-CZMoHeVH.js} +2 -2
  84. package/webapp/dist/assets/{linear-l0qAHpRW.js.map → linear-CZMoHeVH.js.map} +1 -1
  85. package/webapp/dist/assets/{mindmap-definition-YRQLILUH-D4282T7u.js → mindmap-definition-YRQLILUH-DB2sQ--_.js} +4 -4
  86. package/webapp/dist/assets/{mindmap-definition-YRQLILUH-D4282T7u.js.map → mindmap-definition-YRQLILUH-DB2sQ--_.js.map} +1 -1
  87. package/webapp/dist/assets/{pieDiagram-SKSYHLDU-BKJYIUkU.js → pieDiagram-SKSYHLDU-Dgdrlric.js} +8 -8
  88. package/webapp/dist/assets/{pieDiagram-SKSYHLDU-BKJYIUkU.js.map → pieDiagram-SKSYHLDU-Dgdrlric.js.map} +1 -1
  89. package/webapp/dist/assets/{quadrantDiagram-337W2JSQ-yjekONzR.js → quadrantDiagram-337W2JSQ-CJcFYfqf.js} +3 -3
  90. package/webapp/dist/assets/{quadrantDiagram-337W2JSQ-yjekONzR.js.map → quadrantDiagram-337W2JSQ-CJcFYfqf.js.map} +1 -1
  91. package/webapp/dist/assets/{requirementDiagram-Z7DCOOCP-DMH1wutn.js → requirementDiagram-Z7DCOOCP-CDsT-ac7.js} +4 -4
  92. package/webapp/dist/assets/{requirementDiagram-Z7DCOOCP-DMH1wutn.js.map → requirementDiagram-Z7DCOOCP-CDsT-ac7.js.map} +1 -1
  93. package/webapp/dist/assets/{sankeyDiagram-WA2Y5GQK-Cs4ACtdq.js → sankeyDiagram-WA2Y5GQK-DiO55skm.js} +2 -2
  94. package/webapp/dist/assets/{sankeyDiagram-WA2Y5GQK-Cs4ACtdq.js.map → sankeyDiagram-WA2Y5GQK-DiO55skm.js.map} +1 -1
  95. package/webapp/dist/assets/{sequenceDiagram-2WXFIKYE-4sriOpV9.js → sequenceDiagram-2WXFIKYE-CNHjdBNC.js} +4 -4
  96. package/webapp/dist/assets/{sequenceDiagram-2WXFIKYE-4sriOpV9.js.map → sequenceDiagram-2WXFIKYE-CNHjdBNC.js.map} +1 -1
  97. package/webapp/dist/assets/{stateDiagram-RAJIS63D-BUMObt6W.js → stateDiagram-RAJIS63D-CtS3TXEd.js} +9 -9
  98. package/webapp/dist/assets/{stateDiagram-RAJIS63D-BUMObt6W.js.map → stateDiagram-RAJIS63D-CtS3TXEd.js.map} +1 -1
  99. package/webapp/dist/assets/{stateDiagram-v2-FVOUBMTO-DazzpfnH.js → stateDiagram-v2-FVOUBMTO-BdjJA1de.js} +5 -5
  100. package/webapp/dist/assets/{stateDiagram-v2-FVOUBMTO-DazzpfnH.js.map → stateDiagram-v2-FVOUBMTO-BdjJA1de.js.map} +1 -1
  101. package/webapp/dist/assets/{timeline-definition-YZTLITO2-CdChFPnp.js → timeline-definition-YZTLITO2-D3AiTIhK.js} +3 -3
  102. package/webapp/dist/assets/{timeline-definition-YZTLITO2-CdChFPnp.js.map → timeline-definition-YZTLITO2-D3AiTIhK.js.map} +1 -1
  103. package/webapp/dist/assets/{treemap-KZPCXAKY-DW9mBchB.js → treemap-KZPCXAKY-F6nRvLGK.js} +5 -5
  104. package/webapp/dist/assets/{treemap-KZPCXAKY-DW9mBchB.js.map → treemap-KZPCXAKY-F6nRvLGK.js.map} +1 -1
  105. package/webapp/dist/assets/{vennDiagram-LZ73GAT5-Bu9N_8Cu.js → vennDiagram-LZ73GAT5-BoukZEuo.js} +2 -2
  106. package/webapp/dist/assets/{vennDiagram-LZ73GAT5-Bu9N_8Cu.js.map → vennDiagram-LZ73GAT5-BoukZEuo.js.map} +1 -1
  107. package/webapp/dist/assets/{xychartDiagram-JWTSCODW-BI_N4JiZ.js → xychartDiagram-JWTSCODW-ByfGkhZz.js} +3 -3
  108. package/webapp/dist/assets/{xychartDiagram-JWTSCODW-BI_N4JiZ.js.map → xychartDiagram-JWTSCODW-ByfGkhZz.js.map} +1 -1
  109. package/webapp/dist/index.html +1 -1
@@ -51,6 +51,7 @@ export declare function broadcastDisplayStateMarker(dialogId: DialogID, marker:
51
51
  reason?: DialogInterruptionReason;
52
52
  }): void;
53
53
  export declare function computeIdleDisplayState(dlg: Dialog): Promise<DialogDisplayState>;
54
+ export declare function refreshRunControlProjectionFromPersistenceFacts(dialogId: DialogID, trigger: 'resume_dialog' | 'resume_all' | 'run_control_snapshot' | 'pending_subdialogs_changed' | 'q4h_changed'): Promise<DialogLatestFile | null>;
54
55
  export declare function reconcileDisplayStatesAfterRestart(): Promise<void>;
55
56
  export declare function requestInterruptDialog(dialogId: DialogID, reason: StopRequestedReason): Promise<{
56
57
  applied: boolean;
@@ -39,6 +39,7 @@ exports.clearDialogInterruptedExecutionMarker = clearDialogInterruptedExecutionM
39
39
  exports.setDialogDisplayState = setDialogDisplayState;
40
40
  exports.broadcastDisplayStateMarker = broadcastDisplayStateMarker;
41
41
  exports.computeIdleDisplayState = computeIdleDisplayState;
42
+ exports.refreshRunControlProjectionFromPersistenceFacts = refreshRunControlProjectionFromPersistenceFacts;
42
43
  exports.reconcileDisplayStatesAfterRestart = reconcileDisplayStatesAfterRestart;
43
44
  exports.requestInterruptDialog = requestInterruptDialog;
44
45
  exports.requestEmergencyStopAll = requestEmergencyStopAll;
@@ -84,6 +85,9 @@ function isDialogLatestResumable(latest) {
84
85
  latest.displayState.continueEnabled &&
85
86
  latest.executionMarker?.kind === 'interrupted');
86
87
  }
88
+ function isSameDisplayState(left, right) {
89
+ return JSON.stringify(left) === JSON.stringify(right);
90
+ }
87
91
  function classifyRunControlBucket(state) {
88
92
  if (!state)
89
93
  return 'none';
@@ -122,8 +126,13 @@ async function getRunControlCountsSnapshot() {
122
126
  if (latest?.generating === true) {
123
127
  proceeding++;
124
128
  }
125
- else if (isDialogLatestResumable(latest)) {
126
- resumable++;
129
+ else if (latest?.executionMarker?.kind === 'interrupted' &&
130
+ isStoppedReasonResumable(latest.executionMarker.reason)) {
131
+ const q4h = await persistence_1.DialogPersistence.loadQuestions4HumanState(dialogId, 'running');
132
+ const pendingSubdialogs = await persistence_1.DialogPersistence.loadPendingSubdialogs(dialogId, 'running');
133
+ if (q4h.length === 0 && pendingSubdialogs.length === 0) {
134
+ resumable++;
135
+ }
127
136
  }
128
137
  }
129
138
  catch (error) {
@@ -444,6 +453,66 @@ async function computeIdleDisplayStateFromPersistence(dialogId) {
444
453
  }
445
454
  return { kind: 'idle_waiting_user' };
446
455
  }
456
+ async function refreshRunControlProjectionFromPersistenceFacts(dialogId, trigger) {
457
+ const latest = await persistence_1.DialogPersistence.loadDialogLatest(dialogId, 'running');
458
+ if (!latest) {
459
+ return null;
460
+ }
461
+ if (latest.generating === true) {
462
+ return latest;
463
+ }
464
+ if (hasActiveRun(dialogId)) {
465
+ return latest;
466
+ }
467
+ const desired = await (async () => {
468
+ if (dialogId.selfId !== dialogId.rootId &&
469
+ latest.executionMarker &&
470
+ latest.executionMarker.kind === 'dead') {
471
+ return { kind: 'dead', reason: latest.executionMarker.reason };
472
+ }
473
+ const q4h = await persistence_1.DialogPersistence.loadQuestions4HumanState(dialogId, 'running');
474
+ const pendingSubdialogs = await persistence_1.DialogPersistence.loadPendingSubdialogs(dialogId, 'running');
475
+ const hasQ4H = q4h.length > 0;
476
+ const hasSubdialogs = pendingSubdialogs.length > 0;
477
+ if (hasQ4H && hasSubdialogs) {
478
+ return { kind: 'blocked', reason: { kind: 'needs_human_input_and_subdialogs' } };
479
+ }
480
+ if (hasQ4H) {
481
+ return { kind: 'blocked', reason: { kind: 'needs_human_input' } };
482
+ }
483
+ if (hasSubdialogs) {
484
+ return { kind: 'blocked', reason: { kind: 'waiting_for_subdialogs' } };
485
+ }
486
+ if (latest.executionMarker?.kind === 'interrupted') {
487
+ return {
488
+ kind: 'stopped',
489
+ reason: latest.executionMarker.reason,
490
+ continueEnabled: isStoppedReasonResumable(latest.executionMarker.reason),
491
+ };
492
+ }
493
+ return { kind: 'idle_waiting_user' };
494
+ })();
495
+ const executionMarkerNeedsHealing = desired.kind === 'stopped'
496
+ ? latest.executionMarker?.kind !== 'interrupted' ||
497
+ JSON.stringify(latest.executionMarker.reason) !== JSON.stringify(desired.reason)
498
+ : desired.kind === 'dead'
499
+ ? latest.executionMarker?.kind !== 'dead' ||
500
+ JSON.stringify(latest.executionMarker.reason) !== JSON.stringify(desired.reason)
501
+ : latest.executionMarker?.kind === 'interrupted';
502
+ const displayStateNeedsHealing = !isSameDisplayState(latest.displayState, desired);
503
+ if (!displayStateNeedsHealing && !executionMarkerNeedsHealing) {
504
+ return latest;
505
+ }
506
+ log.warn('Healing stale run-control projection from persistence facts', undefined, {
507
+ dialogId: dialogId.valueOf(),
508
+ trigger,
509
+ previousDisplayState: latest.displayState ?? null,
510
+ previousExecutionMarker: latest.executionMarker ?? null,
511
+ healedDisplayState: desired,
512
+ });
513
+ await setDialogDisplayState(dialogId, desired);
514
+ return await persistence_1.DialogPersistence.loadDialogLatest(dialogId, 'running');
515
+ }
447
516
  async function computeIdleDisplayStateForReconciliation(dialogId) {
448
517
  try {
449
518
  return await computeIdleDisplayStateFromPersistence(dialogId);
@@ -335,9 +335,13 @@ Policy:
335
335
  reduce tool set, etc.).
336
336
  - **Network/retriable errors**: Dominds may auto-retry only for clearly retriable classes such as
337
337
  transient network failures/timeouts and provider transient errors (e.g. rate limits or 5xx), using
338
- bounded backoff and a max retry count.
338
+ strategy-specific backoff. Explicit short transport failures may use a brief aggressive burst, explicit
339
+ rate-limit signals may use smart-rate backoff, and everything else should default to conservative
340
+ keepalive retry instead of stopping just because a fixed retry-count ceiling was reached. Only
341
+ high-confidence non-progress cases should stop automatic retry.
339
342
 
340
- This keeps the system responsive and avoids infinite “retry loops caused by invalid tool schemas.
343
+ This keeps the system responsive while still avoiding pointless automatic loops caused by invalid tool
344
+ schemas or other high-confidence non-retriable failures.
341
345
 
342
346
  ## Environment Variables (`env`)
343
347
 
@@ -7,6 +7,7 @@ const DOMINDS_LLM_EMPTY_RESPONSE_ERROR_CODE = 'DOMINDS_LLM_EMPTY_RESPONSE';
7
7
  const XCODE_BEST_EMPTY_RESPONSE_SINGLE_RETRY_DELAY_MS = 3000;
8
8
  const XCODE_BEST_EMPTY_RESPONSE_GIVE_UP_THRESHOLD = 5;
9
9
  const XCODE_BEST_GATEWAY_HTML_502_RETRY_MESSAGE = 'xcode.best gateway returned an HTML 502 Bad Gateway page; retrying conservatively.';
10
+ const XCODE_BEST_AUTH_UNAVAILABLE_RETRY_MESSAGE = 'xcode.best upstream returned 500 auth_unavailable: no auth available; treating it as an infrastructure failure and retrying conservatively.';
10
11
  const XCODE_BEST_UNEXPECTED_EOF_RETRY_MESSAGE = 'xcode.best upstream stream ended unexpectedly (unexpected EOF); retrying conservatively.';
11
12
  const LOCAL_FILE_IO_ERROR_CODES = new Set(['ENOENT', 'ENOTDIR', 'EISDIR', 'EACCES', 'EPERM']);
12
13
  const LOCAL_FILE_IO_SYSCALLS = new Set([
@@ -42,6 +43,21 @@ function isXcodeBestUnexpectedEofFailure(failure, error) {
42
43
  failure.message.toLowerCase().includes('unexpected eof')) &&
43
44
  !hasLikelyLocalFileErrorContext(error));
44
45
  }
46
+ function isXcodeBestAuthUnavailableFailure(failure, error) {
47
+ const status = failure.status ?? readErrorStatus(error);
48
+ if (status !== 500) {
49
+ return false;
50
+ }
51
+ const code = (failure.code ?? readErrorCode(error))?.trim().toLowerCase();
52
+ if (code === 'auth_unavailable') {
53
+ return true;
54
+ }
55
+ const message = (readErrorMessage(error) ?? failure.message).toLowerCase();
56
+ if (message.includes('auth_unavailable')) {
57
+ return true;
58
+ }
59
+ return code === 'internal_server_error' && message.includes('no auth available');
60
+ }
45
61
  function getErrorChain(error) {
46
62
  const queue = [error];
47
63
  const visited = new Set();
@@ -248,12 +264,12 @@ function buildXcodeBestEmptyResponseGiveUpText(providerConfig, provider) {
248
264
  const providerName = providerConfig.name.trim().length > 0 ? providerConfig.name : provider;
249
265
  const summaryTextI18n = {
250
266
  zh: `${providerName} 在同一对话上下文中连续返回 empty response。` +
251
- `Dominds 已在 ${String(XCODE_BEST_EMPTY_RESPONSE_GIVE_UP_THRESHOLD)} 次 empty response 后停止继续重试,因为这通常表示 provider 侧该对话上下文已经卡住;` +
252
- '如果直接点继续,大概率仍然无真实进展;更建议结合真实情况灵活尝试多种新的指令,例如改写问题、补充上下文、换一个切入方式。',
267
+ `Dominds 已在 ${String(XCODE_BEST_EMPTY_RESPONSE_GIVE_UP_THRESHOLD)} 次 empty response 后停止沿用同一上下文继续自动重试,因为这通常表示 provider 侧该对话上下文已经卡住;` +
268
+ '如果不引入新的信息或新的指令,直接点继续大概率仍然无真实进展;更建议补充上下文、改写问题、换一个切入方式,或在确实需要人类判断时调用 askHuman。',
253
269
  en: `${providerName} returned empty responses repeatedly for the same dialog context. ` +
254
- `Dominds stopped retrying after ${String(XCODE_BEST_EMPTY_RESPONSE_GIVE_UP_THRESHOLD)} empty responses because this usually means the provider-side conversation ` +
255
- 'context is stuck; simply pressing Continue is still unlikely to make real progress, ' +
256
- 'so it is better to try different fresh instructions based on the real situation, such as reframing the ask, adding context, or changing the angle.',
270
+ `Dominds stopped repeating the same-context automatic retry path after ${String(XCODE_BEST_EMPTY_RESPONSE_GIVE_UP_THRESHOLD)} empty responses because this usually means the provider-side conversation ` +
271
+ 'context is stuck; simply pressing Continue without new information or fresh instructions is still unlikely to make real progress, ' +
272
+ 'so it is better to add context, reframe the ask, change the angle, or call askHuman when human judgment is genuinely needed.',
257
273
  };
258
274
  return {
259
275
  providerName,
@@ -269,6 +285,11 @@ function createXcodeBestFailureQuirkHandlerSession(providerConfig) {
269
285
  onFailure(args) {
270
286
  const { providerName, summaryTextI18n, recoveryAction } = buildXcodeBestEmptyResponseGiveUpText(providerConfig, args.provider);
271
287
  if (args.failure.code === DOMINDS_LLM_EMPTY_RESPONSE_ERROR_CODE) {
288
+ // xcode.best can enter a same-context deadlock where the upstream keeps returning empty
289
+ // responses forever until the dialog context changes materially. A short burst of
290
+ // temporary retries is still worthwhile for transient glitches, but once the streak reaches
291
+ // the threshold we must stop repeating the exact same automatic path and require fresh
292
+ // information / fresh instructions instead of hiding the deadlock behind slow retries.
272
293
  consecutiveEmptyResponseCount += 1;
273
294
  if (consecutiveEmptyResponseCount < XCODE_BEST_EMPTY_RESPONSE_GIVE_UP_THRESHOLD) {
274
295
  return {
@@ -279,7 +300,7 @@ function createXcodeBestFailureQuirkHandlerSession(providerConfig) {
279
300
  return {
280
301
  kind: 'give_up',
281
302
  message: `${providerName} returned empty responses repeatedly for the same dialog context; ` +
282
- 'automatic retries were stopped; simply continuing is still unlikely to make real progress, so it is better to flexibly try different fresh instructions based on the real situation.',
303
+ 'Dominds stopped repeating the same-context automatic retry path; continuing without new information is still unlikely to make real progress, so it is better to introduce fresh instructions or new context based on the real situation.',
283
304
  summaryTextI18n,
284
305
  recoveryAction: consumedDiligencePushRecoverySinceLastSuccess
285
306
  ? { kind: 'none' }
@@ -301,6 +322,13 @@ function createXcodeBestFailureQuirkHandlerSession(providerConfig) {
301
322
  message: XCODE_BEST_GATEWAY_HTML_502_RETRY_MESSAGE,
302
323
  };
303
324
  }
325
+ if (isXcodeBestAuthUnavailableFailure(args.failure, args.error)) {
326
+ return {
327
+ kind: 'retry_strategy',
328
+ retryStrategy: 'conservative',
329
+ message: XCODE_BEST_AUTH_UNAVAILABLE_RETRY_MESSAGE,
330
+ };
331
+ }
304
332
  return { kind: 'default' };
305
333
  },
306
334
  onRequestSucceeded() {
@@ -56,7 +56,7 @@ export type ProviderConfig = {
56
56
  baseUrl: string;
57
57
  apiKeyEnvVar: string;
58
58
  tool_result_max_chars?: number;
59
- llm_retry_max_retries?: number;
59
+ llm_retry_aggressive_max_retries?: number;
60
60
  llm_retry_initial_delay_ms?: number;
61
61
  llm_retry_conservative_delay_ms?: number;
62
62
  llm_retry_backoff_multiplier?: number;
@@ -1,5 +1,7 @@
1
1
  # Optional provider-level retry controls for kernel dialog driving:
2
- # - llm_retry_max_retries: extra retries after the initial attempt (default 99; ~41.2h total retry window with current backoff defaults).
2
+ # - llm_retry_aggressive_max_retries: front-loaded fast retries before Dominds automatically downgrades
3
+ # to conservative slow retry (default 3). Automatic retries no longer stop just because a retry
4
+ # count budget was exhausted.
3
5
  # - llm_retry_initial_delay_ms: delay before the 1st retry (default 1000).
4
6
  # - llm_retry_conservative_delay_ms: base delay for provider-classified conservative retries (default 30000; fixed for first 10 retries, then ramps by x1.5 with the normal max-delay cap).
5
7
  # - llm_retry_backoff_multiplier: exponential factor between retries (default 1.5).
@@ -7,8 +9,9 @@
7
9
  # - apiQuirks: optional provider/gateway quirk profile(s) for non-standard transport behavior
8
10
  # and provider-specific failure handling that can override the driver's default retry disposition.
9
11
  # Example: `apiQuirks: xcode.best` currently covers both keepalive/heartbeat stream quirks and
10
- # repeated empty-response handling for the same dialog context, plus conservative retry
11
- # classification for gateway-returned HTML 502 Bad Gateway pages.
12
+ # repeated empty-response deadlock detection for the same unchanged dialog context, plus conservative retry
13
+ # classification for gateway-returned HTML 502 Bad Gateway pages and
14
+ # `500 auth_unavailable: no auth available` infrastructure failures.
12
15
  # - tool_result_max_chars: optional transport-level cap for a single tool-result text payload
13
16
  # before Dominds projects it into the provider request. Use this when a provider/gateway enforces
14
17
  # a stricter per-item string limit than Dominds' built-in defaults.
@@ -3,8 +3,6 @@ export declare function readErrorStatus(error: unknown): number | undefined;
3
3
  export declare function readErrorCode(error: unknown): string | undefined;
4
4
  export declare function readErrorMessage(error: unknown): string | undefined;
5
5
  export declare function readProviderSuggestedRetryAfterMs(error: unknown): number | undefined;
6
- export declare function isConservativeRetryMessage(lowerMessage: string): boolean;
7
- export declare function isOpenAiLikeOverloadFailure(error: unknown): boolean;
8
6
  export declare function isOpenAiLikeRateLimitFailure(error: unknown): boolean;
9
7
  export declare function classifyOpenAiLikeFailure(error: unknown): LlmFailureDisposition | undefined;
10
8
  export declare function classifyAnthropicFailure(error: unknown): LlmFailureDisposition | undefined;
@@ -4,8 +4,6 @@ exports.readErrorStatus = readErrorStatus;
4
4
  exports.readErrorCode = readErrorCode;
5
5
  exports.readErrorMessage = readErrorMessage;
6
6
  exports.readProviderSuggestedRetryAfterMs = readProviderSuggestedRetryAfterMs;
7
- exports.isConservativeRetryMessage = isConservativeRetryMessage;
8
- exports.isOpenAiLikeOverloadFailure = isOpenAiLikeOverloadFailure;
9
7
  exports.isOpenAiLikeRateLimitFailure = isOpenAiLikeRateLimitFailure;
10
8
  exports.classifyOpenAiLikeFailure = classifyOpenAiLikeFailure;
11
9
  exports.classifyAnthropicFailure = classifyAnthropicFailure;
@@ -195,28 +193,46 @@ function isOpenAiRetriableProcessingFailureMessage(lowerMessage) {
195
193
  }
196
194
  return lowerMessage.includes('help.openai.com') && lowerMessage.includes('request id');
197
195
  }
198
- function isConservativeRetryMessage(lowerMessage) {
199
- if (lowerMessage.includes('servers are currently overloaded')) {
200
- return true;
201
- }
202
- if (lowerMessage.includes('server is currently overloaded')) {
196
+ const OPENAI_LIKE_AGGRESSIVE_TRANSPORT_CODES = new Set([
197
+ 'ETIMEDOUT',
198
+ 'ECONNRESET',
199
+ 'ECONNREFUSED',
200
+ 'EAI_AGAIN',
201
+ 'ENOTFOUND',
202
+ 'ENETUNREACH',
203
+ 'EHOSTUNREACH',
204
+ 'UND_ERR_CONNECT_TIMEOUT',
205
+ 'UND_ERR_HEADERS_TIMEOUT',
206
+ 'UND_ERR_BODY_TIMEOUT',
207
+ 'UND_ERR_SOCKET',
208
+ ]);
209
+ function isOpenAiLikeAggressiveTransportFailure(error, lowerMessage) {
210
+ const code = readErrorCode(error);
211
+ if (typeof code === 'string' && OPENAI_LIKE_AGGRESSIVE_TRANSPORT_CODES.has(code)) {
203
212
  return true;
204
213
  }
205
- if (lowerMessage.includes('currently overloaded')) {
214
+ if (lowerMessage.includes('fetch failed') || lowerMessage.includes('socket hang up')) {
206
215
  return true;
207
216
  }
208
- if (lowerMessage.includes('temporarily overloaded')) {
217
+ if (lowerMessage.includes('terminated')) {
209
218
  return true;
210
219
  }
211
- if (lowerMessage.includes('service unavailable')) {
220
+ if (lowerMessage.includes('timeout') || lowerMessage.includes('timed out')) {
212
221
  return true;
213
222
  }
214
- return lowerMessage.includes('overloaded') && lowerMessage.includes('try again later');
223
+ return false;
215
224
  }
216
- function isOpenAiLikeOverloadFailure(error) {
217
- const lowerMessage = buildFailureMessage(error).toLowerCase();
225
+ function isHighConfidenceRejectedStatus(status) {
226
+ return (status === 400 ||
227
+ status === 401 ||
228
+ status === 403 ||
229
+ status === 404 ||
230
+ status === 413 ||
231
+ status === 422);
232
+ }
233
+ function isOpenAiLikeRejectedFailure(error) {
218
234
  const status = readErrorStatus(error);
219
- return status === 503 || status === 529 || isConservativeRetryMessage(lowerMessage);
235
+ return isHighConfidenceRejectedStatus(status);
220
236
  }
221
237
  function isOpenAiLikeRateLimitFailure(error) {
222
238
  const lowerMessage = buildFailureMessage(error).toLowerCase();
@@ -251,6 +267,14 @@ function classifyOpenAiLikeFailure(error) {
251
267
  const lowerMessage = message.toLowerCase();
252
268
  const status = readErrorStatus(error);
253
269
  const code = readErrorCode(error);
270
+ if (isOpenAiLikeRejectedFailure(error)) {
271
+ return {
272
+ kind: 'rejected',
273
+ message,
274
+ status,
275
+ code,
276
+ };
277
+ }
254
278
  if (code === 'OPENAI_MALFORMED_BATCH_OUTPUT_ITEM') {
255
279
  return {
256
280
  kind: 'fatal',
@@ -268,23 +292,23 @@ function classifyOpenAiLikeFailure(error) {
268
292
  retryStrategy: 'aggressive',
269
293
  };
270
294
  }
271
- if (isOpenAiLikeOverloadFailure(error)) {
295
+ if (isOpenAiLikeRateLimitFailure(error)) {
272
296
  return {
273
297
  kind: 'retriable',
274
298
  message,
275
299
  status,
276
300
  code,
277
- retryStrategy: 'conservative',
301
+ retryStrategy: 'smart_rate',
302
+ retryAfterMs: readProviderSuggestedRetryAfterMs(error),
278
303
  };
279
304
  }
280
- if (isOpenAiLikeRateLimitFailure(error)) {
305
+ if (isOpenAiLikeAggressiveTransportFailure(error, lowerMessage)) {
281
306
  return {
282
307
  kind: 'retriable',
283
308
  message,
284
309
  status,
285
310
  code,
286
- retryStrategy: 'smart_rate',
287
- retryAfterMs: readProviderSuggestedRetryAfterMs(error),
311
+ retryStrategy: 'aggressive',
288
312
  };
289
313
  }
290
314
  if (isOpenAiRetriableProcessingFailureMessage(lowerMessage)) {
@@ -293,20 +317,63 @@ function classifyOpenAiLikeFailure(error) {
293
317
  message,
294
318
  status,
295
319
  code,
296
- retryStrategy: 'aggressive',
320
+ retryStrategy: 'conservative',
321
+ };
322
+ }
323
+ if (status !== undefined || code !== undefined) {
324
+ return {
325
+ kind: 'retriable',
326
+ message,
327
+ status,
328
+ code,
329
+ retryStrategy: 'conservative',
297
330
  };
298
331
  }
299
332
  return undefined;
300
333
  }
334
+ function isAnthropicRejectedFailure(error) {
335
+ const status = readErrorStatus(error);
336
+ const errorType = readErrorType(error);
337
+ if (isHighConfidenceRejectedStatus(status)) {
338
+ return true;
339
+ }
340
+ return (errorType === 'invalid_request_error' ||
341
+ errorType === 'authentication_error' ||
342
+ errorType === 'permission_error' ||
343
+ errorType === 'not_found_error');
344
+ }
345
+ function isAnthropicRateLimitFailure(error) {
346
+ const status = readErrorStatus(error);
347
+ const errorType = readErrorType(error);
348
+ const lowerMessage = buildFailureMessage(error).toLowerCase();
349
+ if (status === 429 || errorType === 'rate_limit_error') {
350
+ return true;
351
+ }
352
+ return lowerMessage.includes('rate limit');
353
+ }
301
354
  function classifyAnthropicFailure(error) {
302
355
  const message = buildFailureMessage(error);
303
- const lowerMessage = message.toLowerCase();
304
356
  const status = readErrorStatus(error);
305
357
  const code = readErrorCode(error);
306
- const errorType = readErrorType(error);
307
- if (errorType === 'overloaded_error' ||
308
- status === 529 ||
309
- isConservativeRetryMessage(lowerMessage)) {
358
+ if (isAnthropicRejectedFailure(error)) {
359
+ return {
360
+ kind: 'rejected',
361
+ message,
362
+ status,
363
+ code,
364
+ };
365
+ }
366
+ if (isAnthropicRateLimitFailure(error)) {
367
+ return {
368
+ kind: 'retriable',
369
+ message,
370
+ status,
371
+ code,
372
+ retryStrategy: 'smart_rate',
373
+ retryAfterMs: readProviderSuggestedRetryAfterMs(error),
374
+ };
375
+ }
376
+ if (status !== undefined || readErrorType(error) !== undefined) {
310
377
  return {
311
378
  kind: 'retriable',
312
379
  message,
@@ -33,7 +33,7 @@ const reply_guidance_1 = require("./reply-guidance");
33
33
  const runtime_1 = require("./runtime");
34
34
  const tellask_special_1 = require("./tellask-special");
35
35
  const KERNEL_DRIVER_DEFAULT_RETRY_POLICY = {
36
- maxRetries: 99, // long total retry window to survive major down-time by llm providers
36
+ aggressiveMaxRetries: 3, // short fast burst; persistent failures automatically downgrade to conservative
37
37
  initialDelayMs: 1000,
38
38
  conservativeDelayMs: 30000,
39
39
  backoffMultiplier: 1.5,
@@ -169,13 +169,13 @@ function isUserOriginPrompt(prompt) {
169
169
  function resolveModelInfo(providerCfg, model) {
170
170
  return providerCfg.models[model];
171
171
  }
172
- function resolveRetryMaxRetries(raw) {
172
+ function resolveRetryAggressiveMaxRetries(raw) {
173
173
  if (typeof raw !== 'number' || !Number.isFinite(raw)) {
174
- return KERNEL_DRIVER_DEFAULT_RETRY_POLICY.maxRetries;
174
+ return KERNEL_DRIVER_DEFAULT_RETRY_POLICY.aggressiveMaxRetries;
175
175
  }
176
176
  const normalized = Math.floor(raw);
177
177
  if (normalized < 0) {
178
- return KERNEL_DRIVER_DEFAULT_RETRY_POLICY.maxRetries;
178
+ return KERNEL_DRIVER_DEFAULT_RETRY_POLICY.aggressiveMaxRetries;
179
179
  }
180
180
  return normalized;
181
181
  }
@@ -219,13 +219,13 @@ function resolveRetryMaxDelayMs(raw) {
219
219
  return normalized;
220
220
  }
221
221
  function resolveKernelDriverRetryPolicy(providerCfg) {
222
- const maxRetries = resolveRetryMaxRetries(providerCfg.llm_retry_max_retries);
222
+ const aggressiveMaxRetries = resolveRetryAggressiveMaxRetries(providerCfg.llm_retry_aggressive_max_retries);
223
223
  const initialDelayMs = resolveRetryInitialDelayMs(providerCfg.llm_retry_initial_delay_ms);
224
224
  const conservativeDelayMs = resolveRetryConservativeDelayMs(providerCfg.llm_retry_conservative_delay_ms);
225
225
  const backoffMultiplier = resolveRetryBackoffMultiplier(providerCfg.llm_retry_backoff_multiplier);
226
226
  const maxDelayMs = resolveRetryMaxDelayMs(providerCfg.llm_retry_max_delay_ms);
227
227
  return {
228
- maxRetries,
228
+ aggressiveMaxRetries,
229
229
  initialDelayMs,
230
230
  conservativeDelayMs: Math.max(initialDelayMs, conservativeDelayMs),
231
231
  backoffMultiplier,
@@ -1584,7 +1584,7 @@ async function driveDialogStreamCore(dlg, callbacks, humanPrompt, driveOptions)
1584
1584
  modelId: model,
1585
1585
  providerConfig: providerCfg,
1586
1586
  abortSignal,
1587
- maxRetries: retryPolicy.maxRetries,
1587
+ aggressiveRetryMaxRetries: retryPolicy.aggressiveMaxRetries,
1588
1588
  retryInitialDelayMs: retryPolicy.initialDelayMs,
1589
1589
  retryConservativeDelayMs: retryPolicy.conservativeDelayMs,
1590
1590
  retryBackoffMultiplier: retryPolicy.backoffMultiplier,
@@ -1792,7 +1792,7 @@ async function driveDialogStreamCore(dlg, callbacks, humanPrompt, driveOptions)
1792
1792
  modelId: model,
1793
1793
  providerConfig: providerCfg,
1794
1794
  abortSignal,
1795
- maxRetries: retryPolicy.maxRetries,
1795
+ aggressiveRetryMaxRetries: retryPolicy.aggressiveMaxRetries,
1796
1796
  retryInitialDelayMs: retryPolicy.initialDelayMs,
1797
1797
  retryConservativeDelayMs: retryPolicy.conservativeDelayMs,
1798
1798
  retryBackoffMultiplier: retryPolicy.backoffMultiplier,
@@ -56,7 +56,7 @@ export declare function runLlmRequestWithRetry<T>(params: {
56
56
  modelId: string;
57
57
  providerConfig: ProviderConfig;
58
58
  abortSignal?: AbortSignal;
59
- maxRetries: number;
59
+ aggressiveRetryMaxRetries: number;
60
60
  retryInitialDelayMs: number;
61
61
  retryConservativeDelayMs: number;
62
62
  retryBackoffMultiplier: number;