npm - @swarmclawai/swarmclaw - Versions diffs - 1.9.37 → 1.9.39 - Mend

@swarmclawai/swarmclaw 1.9.37 → 1.9.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +43 -1
package/package.json +2 -2
package/src/app/api/chats/[id]/context-status/route.ts +2 -0
package/src/app/api/chats/context-status-route.test.ts +59 -0
package/src/app/api/setup/check-provider/route.test.ts +12 -0
package/src/app/api/setup/check-provider/route.ts +6 -0
package/src/lib/providers/index.ts +23 -0
package/src/lib/server/autonomy/supervisor-reflection.test.ts +10 -1
package/src/lib/server/connectors/outbox.ts +22 -2
package/src/lib/server/context-manager.ts +4 -0
package/src/lib/server/openrouter-model-context.test.ts +205 -0
package/src/lib/server/openrouter-model-context.ts +169 -0
package/src/lib/server/provider-health.ts +1 -0
package/src/lib/server/runtime/queue/core.ts +160 -18
package/src/lib/server/runtime/queue/orphan-recovery.test.ts +49 -0
package/src/lib/server/runtime/queue/orphan-recovery.ts +32 -0
package/src/lib/server/runtime/scheduled-run-preflight.test.ts +73 -0
package/src/lib/server/runtime/scheduled-run-preflight.ts +83 -0
package/src/lib/server/schedules/schedule-lifecycle.test.ts +44 -0
package/src/lib/server/schedules/schedule-lifecycle.ts +27 -0
package/src/lib/server/storage-normalization.ts +13 -0
package/src/lib/server/tasks/task-followups.test.ts +124 -41
package/src/lib/server/tasks/task-followups.ts +28 -3
package/src/lib/server/tasks/task-lifecycle.test.ts +25 -0
package/src/lib/server/tasks/task-lifecycle.ts +6 -0
package/src/lib/server/tasks/task-result.test.ts +25 -1
package/src/lib/server/tasks/task-result.ts +22 -0
package/src/lib/server/workspace-paths.test.ts +72 -0
package/src/lib/server/workspace-paths.ts +60 -0
package/src/lib/setup-defaults.test.ts +10 -1
package/src/lib/setup-defaults.ts +20 -0
package/src/types/provider.ts +1 -1

package/src/lib/server/openrouter-model-context.ts ADDED Viewed

@@ -0,0 +1,169 @@
+import fs from 'node:fs/promises'
+import path from 'node:path'
+import { fetchWithTimeout } from '@/lib/fetch-timeout'
+import { DATA_DIR } from '@/lib/server/data-dir'
+interface OpenRouterModelEntry {
+  id?: string
+  context_length?: number
+  top_provider?: {
+    context_length?: number
+  }
+}
+interface OpenRouterModelsResponse {
+  data?: OpenRouterModelEntry[]
+}
+interface OpenRouterModelContextCache {
+  loadedAt: number
+  models: Record<string, number>
+}
+const OPENROUTER_MODELS_URL = 'https://openrouter.ai/api/v1/models'
+const CACHE_TTL_MS = 24 * 60 * 60 * 1000
+const FETCH_TIMEOUT_MS = 2_000
+const CACHE_PATH = path.join(DATA_DIR, 'openrouter-model-context.json')
+let cache: OpenRouterModelContextCache | null = null
+let loading: Promise<void> | null = null
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === 'object' && value !== null && !Array.isArray(value)
+}
+function parseModelEntry(value: unknown): OpenRouterModelEntry | null {
+  if (!isRecord(value)) return null
+  const entry: OpenRouterModelEntry = {}
+  if (typeof value.id === 'string') entry.id = value.id
+  if (typeof value.context_length === 'number') entry.context_length = value.context_length
+  if (isRecord(value.top_provider)) {
+    const topProvider: OpenRouterModelEntry['top_provider'] = {}
+    if (typeof value.top_provider.context_length === 'number') {
+      topProvider.context_length = value.top_provider.context_length
+    }
+    entry.top_provider = topProvider
+  }
+  return entry
+}
+function parseModelsResponse(value: unknown): OpenRouterModelsResponse {
+  if (!isRecord(value) || !Array.isArray(value.data)) return {}
+  return {
+    data: value.data
+      .map(parseModelEntry)
+      .filter((entry): entry is OpenRouterModelEntry => entry !== null),
+  }
+}
+function parseCache(value: unknown): OpenRouterModelContextCache | null {
+  if (!isRecord(value) || typeof value.loadedAt !== 'number' || !isRecord(value.models)) {
+    return null
+  }
+  const models: Record<string, number> = {}
+  for (const [id, contextLength] of Object.entries(value.models)) {
+    if (typeof contextLength === 'number' && Number.isFinite(contextLength) && contextLength > 0) {
+      models[id] = contextLength
+    }
+  }
+  return { loadedAt: value.loadedAt, models }
+}
+function isFreshCache(value: OpenRouterModelContextCache | null): value is OpenRouterModelContextCache {
+  return value !== null
+    && Number.isFinite(value.loadedAt)
+    && Date.now() - value.loadedAt <= CACHE_TTL_MS
+}
+async function readCache(): Promise<OpenRouterModelContextCache | null> {
+  try {
+    const raw = await fs.readFile(CACHE_PATH, 'utf8')
+    const parsed = parseCache(JSON.parse(raw))
+    return isFreshCache(parsed) ? parsed : null
+  } catch {
+    return null
+  }
+}
+async function writeCache(nextCache: OpenRouterModelContextCache): Promise<void> {
+  try {
+    await fs.mkdir(DATA_DIR, { recursive: true })
+    await fs.writeFile(CACHE_PATH, JSON.stringify(nextCache), 'utf8')
+  } catch {
+    // Best-effort cache. Runtime behavior should not depend on disk writes.
+  }
+}
+function buildModelContextMap(response: OpenRouterModelsResponse): Record<string, number> {
+  const models: Record<string, number> = {}
+  for (const entry of response.data || []) {
+    if (!entry.id) continue
+    const contextLength = entry.top_provider?.context_length || entry.context_length
+    if (typeof contextLength === 'number' && Number.isFinite(contextLength) && contextLength > 0) {
+      models[entry.id] = contextLength
+    }
+  }
+  return models
+}
+async function fetchOpenRouterModels(): Promise<OpenRouterModelContextCache | null> {
+  try {
+    const response = await fetchWithTimeout(OPENROUTER_MODELS_URL, {}, FETCH_TIMEOUT_MS)
+    if (!response.ok) return null
+    const payload = parseModelsResponse(await response.json())
+    return {
+      loadedAt: Date.now(),
+      models: buildModelContextMap(payload),
+    }
+  } catch {
+    return null
+  }
+}
+async function loadOpenRouterModelContextCache(): Promise<void> {
+  const diskCache = await readCache()
+  if (diskCache) {
+    cache = diskCache
+    return
+  }
+  const fetchedCache = await fetchOpenRouterModels()
+  if (!fetchedCache) return
+  cache = fetchedCache
+  await writeCache(fetchedCache)
+}
+export function getCachedOpenRouterContextWindow(provider: string, model: string): number | null {
+  if (provider !== 'openrouter' || !isFreshCache(cache)) return null
+  const exactMatch = cache.models[model]
+  if (exactMatch) return exactMatch
+  if (model.includes('/')) return null
+  const suffixMatches = Object.entries(cache.models)
+    .filter(([id]) => id.endsWith(`/${model}`))
+    .map(([, contextLength]) => contextLength)
+  return suffixMatches.length === 1 ? suffixMatches[0] : null
+}
+export async function ensureOpenRouterModelContextCache(provider: string): Promise<void> {
+  if (provider !== 'openrouter' || isFreshCache(cache)) return
+  if (!loading) {
+    loading = loadOpenRouterModelContextCache().finally(() => {
+      loading = null
+    })
+  }
+  await loading
+}

package/src/lib/server/provider-health.ts CHANGED Viewed

@@ -261,6 +261,7 @@ async function parseErrorMessage(res: Response, fallback: string): Promise<strin
 export const OPENAI_COMPATIBLE_DEFAULTS: Record<string, { name: string; defaultEndpoint: string }> = {
   openai: { name: 'OpenAI', defaultEndpoint: 'https://api.openai.com/v1' },
   openrouter: { name: 'OpenRouter', defaultEndpoint: 'https://openrouter.ai/api/v1' },
+  tokenmix: { name: 'TokenMix', defaultEndpoint: 'https://api.tokenmix.ai/v1' },
   google: { name: 'Google Gemini', defaultEndpoint: 'https://generativelanguage.googleapis.com/v1beta/openai' },
   deepseek: { name: 'DeepSeek', defaultEndpoint: 'https://api.deepseek.com/v1' },
   groq: { name: 'Groq', defaultEndpoint: 'https://api.groq.com/openai/v1' },

package/src/lib/server/runtime/queue/core.ts CHANGED Viewed

@@ -8,7 +8,8 @@ import { logActivity } from '@/lib/server/activity/activity-log'
 import { loadAgents } from '@/lib/server/agents/agent-repository'
 import { withTransaction } from '@/lib/server/persistence/transaction'
 import { loadQueue, saveQueue } from '@/lib/server/runtime/queue-repository'
-import { loadSchedules, saveSchedules } from '@/lib/server/schedules/schedule-repository'
+import { loadSchedules, saveSchedules, upsertSchedule } from '@/lib/server/schedules/schedule-repository'
+import { applyScheduleRunOutcome } from '@/lib/server/schedules/schedule-lifecycle'
 import { loadSessions, saveSessions } from '@/lib/server/sessions/session-repository'
 import { loadSettings } from '@/lib/server/settings/settings-repository'
 import { loadTasks, saveTasks } from '@/lib/server/tasks/task-repository'
@@ -16,13 +17,20 @@ import { notify } from '@/lib/server/ws-hub'
 import { getMessages, getLastMessage, appendMessage } from '@/lib/server/messages/message-repository'
 import { perf } from '@/lib/server/runtime/perf'
 import { WORKSPACE_DIR } from '@/lib/server/data-dir'
+import { normalizeLegacyWorkspacePath } from '@/lib/server/workspace-paths'
+import {
+  MAX_ORPHAN_RECOVERY_ATTEMPTS,
+  pruneOrphanRecovery,
+  trackOrphanRecovery,
+} from '@/lib/server/runtime/queue/orphan-recovery'
+import { preflightProviderCredential } from '@/lib/server/runtime/scheduled-run-preflight'
 import { createAgentTaskSession } from '@/lib/server/agents/task-session'
 import { formatValidationFailure } from '@/lib/server/tasks/task-validation'
 import { pushMainLoopEventToMainSessions } from '@/lib/server/agents/main-agent-loop'
 import type { ExecuteChatTurnResult } from '@/lib/server/chat-execution/chat-execution-types'
 import { checkAgentBudgetLimits } from '@/lib/server/cost'
 import { enqueueExecution } from '@/lib/server/execution-engine'
-import { extractTaskResult, formatResultBody } from '@/lib/server/tasks/task-result'
+import { classifyEmptyRunOutcome, EMPTY_RUN_OUTCOME_MESSAGE, extractTaskResult, formatResultBody } from '@/lib/server/tasks/task-result'
 import { checkoutTask } from '@/lib/server/tasks/task-checkout'
 import { queueSwarmFeedTaskCompletionWake } from '@/lib/server/swarmfeed-runtime'
 import {
@@ -64,6 +72,7 @@ const _queueState = hmrSingleton('__swarmclaw_queue__', () => ({
   activeCount: 0,
   maxConcurrent: 3,
   pendingKick: false,
+  orphanRecoveryAttempts: {} as Record<string, number>,
 }))
 function normalizeInt(value: unknown, fallback: number, min: number, max: number): number {
@@ -499,7 +508,10 @@ function inferWorkspaceProjectCwd(task: Pick<BoardTask, 'title' | 'description'
 function resolveTaskExecutionCwd(task: ScheduleTaskMeta, sessions: Record<string, SessionLike>): string {
   const workspaceRoot = path.resolve(WORKSPACE_DIR)
-  const explicitCwd = normalizeDirCandidate(task.cwd, workspaceRoot)
+  const explicitCwd = normalizeDirCandidate(
+    normalizeLegacyWorkspacePath(typeof task.cwd === 'string' ? task.cwd : '', { workspaceRoot, taskId: task.id }),
+    workspaceRoot,
+  )
   if (explicitCwd) return explicitCwd
   const projectId = typeof task.projectId === 'string' ? task.projectId.trim() : ''
@@ -520,13 +532,19 @@ function resolveTaskExecutionCwd(task: ScheduleTaskMeta, sessions: Record<string
   const sourceSessionId = typeof task.createdInSessionId === 'string' ? task.createdInSessionId.trim() : ''
   const sourceSessionCwd = sourceSessionId
-    ? normalizeDirCandidate(sessions[sourceSessionId]?.cwd, workspaceRoot)
+    ? normalizeDirCandidate(
+        normalizeLegacyWorkspacePath(sessions[sourceSessionId]?.cwd, { workspaceRoot, taskId: task.id }),
+        workspaceRoot,
+      )
     : null
   if (sourceSessionCwd && path.resolve(sourceSessionCwd) !== workspaceRoot) return sourceSessionCwd
   const runSessionId = typeof task.sessionId === 'string' ? task.sessionId.trim() : ''
   const runSessionCwd = runSessionId
-    ? normalizeDirCandidate(sessions[runSessionId]?.cwd, workspaceRoot)
+    ? normalizeDirCandidate(
+        normalizeLegacyWorkspacePath(sessions[runSessionId]?.cwd, { workspaceRoot, taskId: task.id }),
+        workspaceRoot,
+      )
     : null
   if (runSessionCwd && path.resolve(runSessionCwd) !== workspaceRoot) return runSessionCwd
@@ -708,6 +726,7 @@ export function reconcileFinishedRunningTasks(): { reconciled: number; deadLette
     if (!fallbackText && !task.result) {
       task.status = 'failed'
       task.result = 'Agent session finished without producing output.'
+      task.error = EMPTY_RUN_OUTCOME_MESSAGE.slice(0, 500)
       task.checkoutRunId = null
       task.updatedAt = now
       tasksDirty = true
@@ -854,7 +873,20 @@ function deliverTaskConnectorFollowups(task: BoardTask, sessions: Record<string,
   })
 }
+/** Reflects a terminal scheduled-run outcome back onto the originating schedule. */
+function recordScheduleRunOutcome(task: BoardTask): void {
+  const meta = task as ScheduleTaskMeta
+  const sourceScheduleId = typeof meta.sourceScheduleId === 'string' ? meta.sourceScheduleId.trim() : ''
+  if (!sourceScheduleId) return
+  const schedule = loadSchedules()[sourceScheduleId]
+  if (!schedule) return
+  if (!applyScheduleRunOutcome(schedule, task, Date.now())) return
+  upsertSchedule(sourceScheduleId, schedule)
+  notify('schedules')
+}
 function handleTerminalTaskResultDeliveries(task: BoardTask): void {
+  recordScheduleRunOutcome(task)
   const sessions = loadSessions() as Record<string, SessionLike>
   pushUserFacingTaskResult(task, sessions)
   deliverTaskConnectorFollowups(task, sessions)
@@ -1114,25 +1146,68 @@ export async function processNext() {
       const allTasks = loadTasks()
       const currentQueue = loadQueue()
       const queueSet = new Set(currentQueue)
+      // Backfill for hmrSingleton state created before this field existed
+      _queueState.orphanRecoveryAttempts ??= {}
+      const orphanAttempts = _queueState.orphanRecoveryAttempts
+      const stillOrphanedIds = new Set<string>()
+      const deadLetteredOrphans: BoardTask[] = []
       let recovered = false
       let tasksDirty = false
       for (const [id, t] of Object.entries(allTasks) as [string, BoardTask][]) {
-        if (t.status === 'queued' && !queueSet.has(id)) {
+        if (t.status !== 'queued' || queueSet.has(id)) continue
+        const decision = trackOrphanRecovery(orphanAttempts, id)
+        if (decision.action === 'dead_letter') {
+          // Recovery keeps re-queueing this task but it never starts. Stop the
+          // loop with one terminal reason instead of spamming recovery forever.
+          const now = Date.now()
+          t.status = 'failed'
+          t.deadLetteredAt = now
+          t.retryScheduledAt = null
+          t.checkoutRunId = null
+          t.updatedAt = now
+          t.error = `Orphan recovery exhausted after ${MAX_ORPHAN_RECOVERY_ATTEMPTS} attempts: task repeatedly returned to "queued" without starting.`
+          if (!t.comments) t.comments = []
+          t.comments.push({
+            id: genId(),
+            author: 'System',
+            text: t.error,
+            createdAt: now,
+          })
+          delete orphanAttempts[id]
+          tasksDirty = true
+          deadLetteredOrphans.push(t)
+          log.warn(TAG, `[queue] Dead-lettered orphaned queued task after ${decision.attempt - 1} recovery attempts: "${t.title}" (${id})`)
+          continue
+        }
+        stillOrphanedIds.add(id)
+        if (decision.firstAttempt) {
           log.info(TAG, `[queue] Recovering orphaned queued task: "${t.title}" (${id})`)
-          // Defence in depth: a queued task must not carry a stale checkoutRunId
-          // (left over from pre-1.5.38 retries). If it does, checkoutTask() will
-          // reject every attempt and this orphan-recovery loop will spin at 100%
-          // CPU re-queueing a task that can never run.
-          if (t.checkoutRunId) {
-            t.checkoutRunId = null
-            tasksDirty = true
-          }
-          pushQueueUnique(currentQueue, id)
-          recovered = true
+        } else {
+          log.debug(TAG, `[queue] Re-recovering orphaned queued task (attempt ${decision.attempt}): "${t.title}" (${id})`)
+        }
+        // Defence in depth: a queued task must not carry a stale checkoutRunId
+        // (left over from pre-1.5.38 retries). If it does, checkoutTask() will
+        // reject every attempt and this orphan-recovery loop will spin at 100%
+        // CPU re-queueing a task that can never run.
+        if (t.checkoutRunId) {
+          t.checkoutRunId = null
+          tasksDirty = true
         }
+        pushQueueUnique(currentQueue, id)
+        recovered = true
       }
+      pruneOrphanRecovery(orphanAttempts, stillOrphanedIds)
       if (tasksDirty) saveTasks(allTasks)
       if (recovered) saveQueue(currentQueue)
+      for (const t of deadLetteredOrphans) {
+        notify('tasks')
+        logActivity({ entityType: 'task', entityId: t.id, action: 'failed', actor: 'system', actorId: t.agentId, summary: `Task failed: "${t.title}" (orphan recovery exhausted)` })
+        pushMainLoopEventToMainSessions({
+          type: 'task_failed',
+          text: `Task failed: "${t.title}" (${t.id}): orphan recovery exhausted.`,
+        })
+        handleTerminalTaskResultDeliveries(t)
+      }
     }
     // Process ONE task per invocation (no while loop)
@@ -1261,6 +1336,61 @@ export async function processNext() {
         } catch {}
       }
+      // Credential preflight for scheduled runs: fail fast with an actionable
+      // error instead of letting the schedule die on a 401 deep in execution.
+      // Retries cannot succeed without a key, so this dead-letters immediately.
+      if ((task as ScheduleTaskMeta).sourceType === 'schedule') {
+        const preflight = preflightProviderCredential({
+          provider: typedAgent.provider,
+          ollamaMode: typedAgent.ollamaMode ?? null,
+          credentialId: typedAgent.credentialId ?? null,
+          fallbackCredentialIds: typedAgent.fallbackCredentialIds || null,
+        })
+        if (!preflight.ok) {
+          const now = Date.now()
+          task.status = 'failed'
+          task.deadLetteredAt = now
+          task.retryScheduledAt = null
+          task.checkoutRunId = null
+          task.error = preflight.error.slice(0, 500)
+          task.updatedAt = now
+          if (!task.comments) task.comments = []
+          task.comments.push({
+            id: genId(),
+            author: 'System',
+            text: preflight.error,
+            createdAt: now,
+          })
+          saveTasks(latestTasks)
+          notify('tasks')
+          const failure = classifyRuntimeFailure({ source: 'task', message: preflight.error })
+          recordSupervisorIncident({
+            runId: task.id,
+            sessionId: task.sessionId || '',
+            taskId: task.id,
+            agentId: typedAgent.id,
+            source: 'task',
+            kind: 'runtime_failure',
+            severity: failure.severity,
+            summary: `Scheduled run blocked by credential preflight: ${preflight.error}`.slice(0, 320),
+            details: preflight.error,
+            failureFamily: failure.family,
+            remediation: failure.remediation,
+            repairPrompt: failure.repairPrompt,
+            autoAction: null,
+          })
+          logActivity({ entityType: 'task', entityId: task.id, action: 'failed', actor: 'system', actorId: typedAgent.id, summary: `Task failed credential preflight: "${task.title}"` })
+          pushMainLoopEventToMainSessions({
+            type: 'task_failed',
+            text: `Task failed: "${task.title}" (${task.id}): ${preflight.error.slice(0, 200)}`,
+          })
+          handleTerminalTaskResultDeliveries(task)
+          cleanupTerminalOneOffSchedule(task)
+          log.warn(TAG, `[queue] Scheduled task "${task.title}" (${taskId}) failed credential preflight: ${preflight.error}`)
+          return
+        }
+      }
       // Atomic checkout — prevents two runners from starting the same task
       const runId = genId()
       task = checkoutTask(taskId, runId) as BoardTask | undefined
@@ -1296,8 +1426,17 @@ export async function processNext() {
           : ''
         if (existingSessionId) {
           const sessions = loadSessions()
-          if (sessions[existingSessionId]) {
+          const existingSession = sessions[existingSessionId]
+          if (existingSession) {
             sessionId = existingSessionId
+            // Rebind sessions still pinned to a legacy workspace root (e.g. a
+            // pre-migration ~/.swarmclaw/workspace path) onto the current root.
+            const sessionCwd = typeof existingSession.cwd === 'string' ? existingSession.cwd : ''
+            if (sessionCwd && normalizeLegacyWorkspacePath(sessionCwd, { taskId: task.id }) !== sessionCwd) {
+              existingSession.cwd = taskCwd
+              saveSessions(sessions)
+              log.info(TAG, `[queue] Rebound stale schedule session cwd to ${taskCwd} (session ${existingSessionId})`)
+            }
           }
         }
         if (!sessionId) {
@@ -1467,7 +1606,10 @@ export async function processNext() {
               createdAt: now,
             })
           } else {
-            const failureReason = formatValidationFailure(validation.reasons).slice(0, 500)
+            // A run with no text, no tool calls, and no error gets an actionable
+            // reason instead of the generic "Result summary is empty." message.
+            const emptyRunReason = classifyEmptyRunOutcome(taskRun)
+            const failureReason = (emptyRunReason || formatValidationFailure(validation.reasons)).slice(0, 500)
             const retryState = scheduleRetryOrDeadLetter(t2[taskId], failureReason)
             t2[taskId].completedAt = retryState === 'dead_lettered' ? null : t2[taskId].completedAt
             t2[taskId].comments!.push({

package/src/lib/server/runtime/queue/orphan-recovery.test.ts ADDED Viewed

@@ -0,0 +1,49 @@
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import {
+  MAX_ORPHAN_RECOVERY_ATTEMPTS,
+  pruneOrphanRecovery,
+  trackOrphanRecovery,
+} from './orphan-recovery'
+test('allows recovery for the first attempts and flags only the first one', () => {
+  const attempts: Record<string, number> = {}
+  const first = trackOrphanRecovery(attempts, 'task-1')
+  assert.deepEqual(first, { action: 'recover', attempt: 1, firstAttempt: true })
+  const second = trackOrphanRecovery(attempts, 'task-1')
+  assert.deepEqual(second, { action: 'recover', attempt: 2, firstAttempt: false })
+  const third = trackOrphanRecovery(attempts, 'task-1')
+  assert.deepEqual(third, { action: 'recover', attempt: 3, firstAttempt: false })
+})
+test('dead-letters once the attempt cap is exceeded', () => {
+  const attempts: Record<string, number> = { 'task-1': MAX_ORPHAN_RECOVERY_ATTEMPTS }
+  const decision = trackOrphanRecovery(attempts, 'task-1')
+  assert.deepEqual(decision, { action: 'dead_letter', attempt: MAX_ORPHAN_RECOVERY_ATTEMPTS + 1 })
+})
+test('tracks tasks independently', () => {
+  const attempts: Record<string, number> = {}
+  trackOrphanRecovery(attempts, 'task-1')
+  trackOrphanRecovery(attempts, 'task-1')
+  const other = trackOrphanRecovery(attempts, 'task-2')
+  assert.equal(other.action, 'recover')
+  assert.equal(other.attempt, 1)
+})
+test('prune drops counters for tasks no longer orphaned', () => {
+  const attempts: Record<string, number> = { 'task-1': 2, 'task-2': 1 }
+  pruneOrphanRecovery(attempts, new Set(['task-2']))
+  assert.deepEqual(attempts, { 'task-2': 1 })
+})
+test('honors a custom max', () => {
+  const attempts: Record<string, number> = {}
+  assert.equal(trackOrphanRecovery(attempts, 'task-1', 1).action, 'recover')
+  assert.equal(trackOrphanRecovery(attempts, 'task-1', 1).action, 'dead_letter')
+})

package/src/lib/server/runtime/queue/orphan-recovery.ts ADDED Viewed

@@ -0,0 +1,32 @@
+export const MAX_ORPHAN_RECOVERY_ATTEMPTS = 3
+export type OrphanRecoveryDecision =
+  | { action: 'recover'; attempt: number; firstAttempt: boolean }
+  | { action: 'dead_letter'; attempt: number }
+/**
+ * Tracks how many times an orphaned queued task has been re-queued by the
+ * startup/daemon recovery scan. Recovery is allowed a bounded number of
+ * attempts; after that the task should be dead-lettered with one terminal
+ * reason instead of looping through recovery forever.
+ */
+export function trackOrphanRecovery(
+  attempts: Record<string, number>,
+  taskId: string,
+  max: number = MAX_ORPHAN_RECOVERY_ATTEMPTS,
+): OrphanRecoveryDecision {
+  const attempt = (attempts[taskId] || 0) + 1
+  attempts[taskId] = attempt
+  if (attempt > max) return { action: 'dead_letter', attempt }
+  return { action: 'recover', attempt, firstAttempt: attempt === 1 }
+}
+/** Drops counters for tasks that are no longer orphaned so a future orphan starts fresh. */
+export function pruneOrphanRecovery(
+  attempts: Record<string, number>,
+  stillOrphanedIds: ReadonlySet<string>,
+): void {
+  for (const taskId of Object.keys(attempts)) {
+    if (!stillOrphanedIds.has(taskId)) delete attempts[taskId]
+  }
+}

package/src/lib/server/runtime/scheduled-run-preflight.test.ts ADDED Viewed

@@ -0,0 +1,73 @@
+import test from 'node:test'
+import assert from 'node:assert/strict'
+import {
+  preflightProviderCredential,
+  type ProviderCredentialPreflightDeps,
+} from './scheduled-run-preflight'
+function makeDeps(overrides: Partial<ProviderCredentialPreflightDeps> = {}): ProviderCredentialPreflightDeps {
+  return {
+    getProvider: () => ({ requiresApiKey: true }),
+    resolveProviderCredentialId: (input) => input.credentialId || null,
+    resolveCredentialSecret: () => null,
+    ...overrides,
+  }
+}
+test('passes when the provider does not require an API key', () => {
+  const result = preflightProviderCredential(
+    { provider: 'ollama' },
+    makeDeps({ getProvider: () => ({ requiresApiKey: false }) }),
+  )
+  assert.deepEqual(result, { ok: true })
+})
+test('passes when the provider is unknown', () => {
+  const result = preflightProviderCredential(
+    { provider: 'mystery' },
+    makeDeps({ getProvider: () => null }),
+  )
+  assert.deepEqual(result, { ok: true })
+})
+test('passes when no provider is set', () => {
+  assert.deepEqual(preflightProviderCredential({ provider: '' }, makeDeps()), { ok: true })
+})
+test('passes when the resolved credential has a secret', () => {
+  const result = preflightProviderCredential(
+    { provider: 'openai', credentialId: 'cred-1' },
+    makeDeps({ resolveCredentialSecret: (id) => (id === 'cred-1' ? 'sk-test' : null) }),
+  )
+  assert.deepEqual(result, { ok: true })
+})
+test('passes when a fallback credential rescues a dead primary', () => {
+  const result = preflightProviderCredential(
+    { provider: 'openai', credentialId: 'cred-dead', fallbackCredentialIds: ['cred-live'] },
+    makeDeps({ resolveCredentialSecret: (id) => (id === 'cred-live' ? 'sk-test' : null) }),
+  )
+  assert.deepEqual(result, { ok: true })
+})
+test('passes when auto-matching finds another credential for the provider', () => {
+  const result = preflightProviderCredential(
+    { provider: 'openai', credentialId: 'cred-dead' },
+    makeDeps({
+      resolveProviderCredentialId: (input) => (input.credentialId ? input.credentialId : 'cred-auto'),
+      resolveCredentialSecret: (id) => (id === 'cred-auto' ? 'sk-test' : null),
+    }),
+  )
+  assert.deepEqual(result, { ok: true })
+})
+test('fails with an actionable error naming the provider when nothing resolves', () => {
+  const result = preflightProviderCredential({ provider: 'openai', credentialId: 'cred-dead' }, makeDeps())
+  assert.equal(result.ok, false)
+  if (!result.ok) {
+    assert.match(result.error, /Provider authentication preflight failed/)
+    assert.match(result.error, /"openai"/)
+    assert.match(result.error, /Settings/)
+  }
+})