npm - @biaoo/tiangong-wiki - Versions diffs - 0.3.2 → 0.3.3 - Mend

@biaoo/tiangong-wiki 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/core/db.js +1 -0
package/dist/core/vault-processing.js +393 -173
package/dist/core/vault.js +10 -0
package/dist/daemon/server.js +1 -0
package/dist/operations/dashboard.js +5 -0
package/package.json +1 -1
package/references/troubleshooting.md +3 -1

package/dist/core/db.js CHANGED Viewed

@@ -177,6 +177,7 @@ function ensureBaseTables(db, embeddingDimensions) {
         decision: "TEXT",
         result_manifest_path: "TEXT",
         last_error_at: "TEXT",
+        last_error_code: "TEXT",
         retry_after: "TEXT",
         created_page_ids: "TEXT",
         updated_page_ids: "TEXT",

package/dist/core/vault-processing.js CHANGED Viewed

@@ -9,8 +9,12 @@ import { buildVaultWorkflowPrompt, ensureWorkflowArtifactSet, getWorkflowArtifac
 import { readWorkflowResult } from "./workflow-result.js";
 import { AppError } from "../utils/errors.js";
 import { readTextFileSync } from "../utils/fs.js";
-import { toOffsetIso } from "../utils/time.js";
+import { addSeconds, toOffsetIso } from "../utils/time.js";
 const INLINE_WORKFLOW_ATTEMPTS = 2;
+const MAX_QUEUE_ERROR_RETRIES = 3;
+const QUEUE_FULL_RETRY_DELAY_SECONDS = 300;
+const WORKFLOW_TIMEOUT_RETRY_DELAY_SECONDS = 120;
+const NON_RETRYABLE_QUEUE_ERROR_CODES = new Set(["config_error", "invalid_request"]);
 function buildFileIdFilterClause(filterFileIds) {
     if (!filterFileIds || filterFileIds.length === 0) {
         return { clause: "", params: [] };
@@ -20,6 +24,16 @@ function buildFileIdFilterClause(filterFileIds) {
         params: filterFileIds,
     };
 }
+function buildExcludedFileIdClause(excludedFileIds) {
+    const params = Array.from(excludedFileIds ?? []).filter((value) => value.trim().length > 0);
+    if (params.length === 0) {
+        return { clause: "", params: [] };
+    }
+    return {
+        clause: ` AND vault_processing_queue.file_id NOT IN (${params.map(() => "?").join(", ")})`,
+        params,
+    };
+}
 function parseOptionalStringArray(value) {
     if (Array.isArray(value)) {
         return value
@@ -43,9 +57,11 @@ function parseOptionalStringArray(value) {
     }
 }
 function mapQueueRow(row) {
+    const attempts = Number(row.attempts ?? 0);
+    const status = row.status;
     return {
         fileId: String(row.fileId),
-        status: row.status,
+        status,
         priority: Number(row.priority ?? 0),
         queuedAt: String(row.queuedAt),
         claimedAt: typeof row.claimedAt === "string" ? row.claimedAt : null,
@@ -53,13 +69,15 @@ function mapQueueRow(row) {
         processedAt: typeof row.processedAt === "string" ? row.processedAt : null,
         resultPageId: typeof row.resultPageId === "string" ? row.resultPageId : null,
         errorMessage: typeof row.errorMessage === "string" ? row.errorMessage : null,
-        attempts: Number(row.attempts ?? 0),
+        attempts,
         threadId: typeof row.threadId === "string" ? row.threadId : null,
         workflowVersion: typeof row.workflowVersion === "string" ? row.workflowVersion : null,
         decision: typeof row.decision === "string" ? row.decision : null,
         resultManifestPath: typeof row.resultManifestPath === "string" ? row.resultManifestPath : null,
         lastErrorAt: typeof row.lastErrorAt === "string" ? row.lastErrorAt : null,
+        lastErrorCode: typeof row.lastErrorCode === "string" ? row.lastErrorCode : null,
         retryAfter: typeof row.retryAfter === "string" ? row.retryAfter : null,
+        autoRetryExhausted: status === "error" && attempts > MAX_QUEUE_ERROR_RETRIES,
         createdPageIds: parseOptionalStringArray(row.createdPageIds),
         updatedPageIds: parseOptionalStringArray(row.updatedPageIds),
         appliedTypeNames: parseOptionalStringArray(row.appliedTypeNames),
@@ -72,8 +90,20 @@ function mapQueueRow(row) {
         filePath: typeof row.filePath === "string" ? row.filePath : undefined,
     };
 }
-function claimQueueItems(db, limit, filterFileIds) {
-    const filter = buildFileIdFilterClause(filterFileIds);
+function claimQueueItems(db, limit, options = {}) {
+    const filter = buildFileIdFilterClause(options.filterFileIds);
+    const exclude = buildExcludedFileIdClause(options.excludeFileIds);
+    const manualClaim = Boolean(options.filterFileIds && options.filterFileIds.length > 0);
+    const errorEligibility = manualClaim
+        ? "vault_processing_queue.status = 'error'"
+        : [
+            "vault_processing_queue.status = 'error'",
+            `vault_processing_queue.attempts <= ${MAX_QUEUE_ERROR_RETRIES}`,
+            `COALESCE(vault_processing_queue.last_error_code, '') NOT IN (${Array.from(NON_RETRYABLE_QUEUE_ERROR_CODES)
+                .map((code) => `'${code}'`)
+                .join(", ")})`,
+            "(vault_processing_queue.retry_after IS NULL OR julianday(vault_processing_queue.retry_after) <= julianday(?))",
+        ].join("\n          AND ");
     const select = db.prepare(`
       SELECT
         file_id AS fileId,
@@ -91,6 +121,7 @@ function claimQueueItems(db, limit, filterFileIds) {
         decision,
         result_manifest_path AS resultManifestPath,
         last_error_at AS lastErrorAt,
+        last_error_code AS lastErrorCode,
         retry_after AS retryAfter,
         created_page_ids AS createdPageIds,
         updated_page_ids AS updatedPageIds,
@@ -104,7 +135,12 @@ function claimQueueItems(db, limit, filterFileIds) {
         vault_files.file_path AS filePath
       FROM vault_processing_queue
       LEFT JOIN vault_files ON vault_files.id = vault_processing_queue.file_id
-      WHERE status IN ('pending', 'error')${filter.clause}
+      WHERE (
+        vault_processing_queue.status = 'pending'
+        OR (
+          ${errorEligibility}
+        )
+      )${filter.clause}${exclude.clause}
       ORDER BY priority DESC, queued_at ASC
       LIMIT ?
     `);
@@ -114,12 +150,16 @@ function claimQueueItems(db, limit, filterFileIds) {
         status = 'processing',
         claimed_at = @claimed_at,
         started_at = @started_at,
-        error_message = NULL
+        error_message = NULL,
+        retry_after = NULL
       WHERE file_id = @file_id AND status IN ('pending', 'error')
     `);
     return db.transaction((claimLimit, claimFilterParams) => {
         const startedAt = toOffsetIso();
-        const items = select.all(...claimFilterParams, claimLimit).map(mapQueueRow);
+        const selectParams = manualClaim
+            ? [...claimFilterParams, claimLimit]
+            : [startedAt, ...claimFilterParams, claimLimit];
+        const items = select.all(...selectParams).map(mapQueueRow);
         for (const item of items) {
             markProcessing.run({
                 file_id: item.fileId,
@@ -132,7 +172,7 @@ function claimQueueItems(db, limit, filterFileIds) {
             claimedAt: startedAt,
             startedAt,
         }));
-    })(limit, filter.params);
+    })(limit, [...filter.params, ...exclude.params]);
 }
 function fetchQueueItemsByStatus(db, status) {
     const rows = db.prepare(`
@@ -152,6 +192,7 @@ function fetchQueueItemsByStatus(db, status) {
         decision,
         result_manifest_path AS resultManifestPath,
         last_error_at AS lastErrorAt,
+        last_error_code AS lastErrorCode,
         retry_after AS retryAfter,
         created_page_ids AS createdPageIds,
         updated_page_ids AS updatedPageIds,
@@ -188,6 +229,7 @@ function fetchQueueItemByFileId(db, fileId) {
         decision,
         result_manifest_path AS resultManifestPath,
         last_error_at AS lastErrorAt,
+        last_error_code AS lastErrorCode,
         retry_after AS retryAfter,
         created_page_ids AS createdPageIds,
         updated_page_ids AS updatedPageIds,
@@ -262,10 +304,112 @@ function serializeArray(value) {
 function formatManifestLogFields(manifest) {
     return `decision=${manifest.decision} skills=${manifest.skillsUsed.join(",") || "-"} created=${manifest.createdPageIds.join(",") || "-"} updated=${manifest.updatedPageIds.join(",") || "-"} proposed=${manifest.proposedTypes.map((item) => item.name).join(",") || "-"}`;
 }
-function applyWorkflowManifest(db, fileId, manifest, resultManifestPath) {
+function extractErrorDetailsCode(error) {
+    if (!(error instanceof AppError)) {
+        return null;
+    }
+    if (typeof error.details !== "object" || error.details === null || Array.isArray(error.details)) {
+        return null;
+    }
+    const code = error.details.code;
+    return typeof code === "string" && code.trim() ? code.trim() : null;
+}
+function inferWorkflowErrorCode(message) {
+    const normalized = message.toLowerCase();
+    if (normalized.includes("queue_full") || normalized.includes("write queue is full")) {
+        return "queue_full";
+    }
+    if (normalized.includes("timed out")) {
+        return "workflow_timeout";
+    }
+    return null;
+}
+function buildRetryAfter(seconds) {
+    return toOffsetIso(addSeconds(new Date(), seconds));
+}
+function buildQueueFailureState(message, options = {}) {
+    const inferredCode = inferWorkflowErrorCode(message);
+    const errorCode = inferredCode ?? options.explicitCode ?? (options.errorType === "config" ? "config_error" : null);
+    if (errorCode && NON_RETRYABLE_QUEUE_ERROR_CODES.has(errorCode)) {
+        return {
+            errorCode,
+            retryAfter: null,
+            autoRetryEligible: false,
+        };
+    }
+    if (errorCode === "queue_full") {
+        return {
+            errorCode,
+            retryAfter: buildRetryAfter(QUEUE_FULL_RETRY_DELAY_SECONDS),
+            autoRetryEligible: true,
+        };
+    }
+    if (errorCode === "workflow_timeout") {
+        return {
+            errorCode,
+            retryAfter: buildRetryAfter(WORKFLOW_TIMEOUT_RETRY_DELAY_SECONDS),
+            autoRetryEligible: true,
+        };
+    }
+    return {
+        errorCode,
+        retryAfter: null,
+        autoRetryEligible: true,
+    };
+}
+function formatQueueErrorMessage(message, autoRetryExhausted) {
+    const autoRetrySuffix = autoRetryExhausted
+        ? ` Auto retry limit reached after ${MAX_QUEUE_ERROR_RETRIES} retries; use manual retry or requeue after the vault file changes.`
+        : "";
+    return `${message}${autoRetrySuffix}`.slice(0, 1_000);
+}
+function applyWorkflowManifest(db, fileId, manifest, resultManifestPath, currentAttempts) {
     const resultPageId = manifest.createdPageIds[0] ?? manifest.updatedPageIds[0] ?? null;
     const status = manifest.status;
     const processedAt = toOffsetIso();
+    if (status === "error") {
+        const failureState = buildQueueFailureState(manifest.reason);
+        const nextAttempts = currentAttempts + 1;
+        const autoRetryExhausted = failureState.autoRetryEligible && nextAttempts > MAX_QUEUE_ERROR_RETRIES;
+        db.prepare(`
+        UPDATE vault_processing_queue
+        SET
+          status = 'error',
+          processed_at = @processed_at,
+          result_page_id = @result_page_id,
+          error_message = @error_message,
+          attempts = attempts + 1,
+          workflow_version = @workflow_version,
+          decision = @decision,
+          result_manifest_path = @result_manifest_path,
+          last_error_at = @last_error_at,
+          last_error_code = @last_error_code,
+          retry_after = @retry_after,
+          created_page_ids = @created_page_ids,
+          updated_page_ids = @updated_page_ids,
+          applied_type_names = @applied_type_names,
+          proposed_type_names = @proposed_type_names,
+          skills_used = @skills_used
+        WHERE file_id = @file_id
+      `).run({
+            file_id: fileId,
+            processed_at: processedAt,
+            result_page_id: resultPageId,
+            error_message: formatQueueErrorMessage(manifest.reason, autoRetryExhausted),
+            workflow_version: CODEX_WORKFLOW_VERSION,
+            decision: manifest.decision,
+            result_manifest_path: resultManifestPath,
+            last_error_at: processedAt,
+            last_error_code: failureState.errorCode,
+            retry_after: autoRetryExhausted ? null : failureState.retryAfter,
+            created_page_ids: serializeArray(manifest.createdPageIds),
+            updated_page_ids: serializeArray(manifest.updatedPageIds),
+            applied_type_names: serializeArray(manifest.appliedTypeNames),
+            proposed_type_names: serializeArray(manifest.proposedTypes.map((item) => item.name)),
+            skills_used: serializeArray(manifest.skillsUsed),
+        });
+        return { status, pageId: resultPageId };
+    }
     db.prepare(`
       UPDATE vault_processing_queue
       SET
@@ -277,6 +421,7 @@ function applyWorkflowManifest(db, fileId, manifest, resultManifestPath) {
         decision = @decision,
         result_manifest_path = @result_manifest_path,
         last_error_at = NULL,
+        last_error_code = NULL,
         retry_after = NULL,
         created_page_ids = @created_page_ids,
         updated_page_ids = @updated_page_ids,
@@ -425,16 +570,20 @@ function updateQueueWorkflowError(db, fileId, payload) {
         thread_id = COALESCE(@thread_id, thread_id),
         workflow_version = @workflow_version,
         result_manifest_path = COALESCE(@result_manifest_path, result_manifest_path),
-        last_error_at = @last_error_at
+        last_error_at = @last_error_at,
+        last_error_code = @last_error_code,
+        retry_after = @retry_after
       WHERE file_id = @file_id
     `).run({
         file_id: fileId,
         processed_at: processedAt,
-        error_message: payload.errorMessage.slice(0, 1_000),
+        error_message: formatQueueErrorMessage(payload.errorMessage, payload.autoRetryExhausted === true),
         thread_id: payload.threadId ?? null,
         workflow_version: CODEX_WORKFLOW_VERSION,
         result_manifest_path: payload.resultManifestPath ?? null,
         last_error_at: processedAt,
+        last_error_code: payload.errorCode ?? null,
+        retry_after: payload.autoRetryExhausted ? null : payload.retryAfter ?? null,
     });
 }
 function prepareCodexWorkflowInput(paths, item, file, localFilePath, env, allowTemplateEvolution) {
@@ -484,6 +633,185 @@ function prepareCodexWorkflowInput(paths, item, file, localFilePath, env, allowT
         },
     };
 }
+async function processClaimedQueueItem(input) {
+    const { db, env, paths, item, workflowRunner, templateEvolution, maxWorkflowAttempts, workflowTimeoutMs } = input;
+    input.log?.(`${item.fileId}: start processing attempt=${item.attempts + 1} queuedAt=${item.queuedAt} thread=${item.threadId ?? "-"}`);
+    const file = fetchVaultFile(db, item.fileId);
+    if (!file) {
+        updateQueueStatus(db, item.fileId, {
+            status: "error",
+            processedAt: toOffsetIso(),
+            errorMessage: `Vault file missing from index: ${item.fileId}`,
+            incrementAttempts: true,
+        });
+        input.log?.(`${item.fileId}: error thread=- result=- message=Vault file missing from index`);
+        return {
+            status: "error",
+            item: {
+                fileId: item.fileId,
+                status: "error",
+                reason: "Vault file missing from index",
+            },
+        };
+    }
+    let threadId = item.threadId ?? null;
+    let resultManifestPath = null;
+    try {
+        const localFilePath = await ensureLocalVaultFile(file, paths.vaultPath, env);
+        const { artifacts, input: workflowInput } = prepareCodexWorkflowInput(paths, item, file, localFilePath, env, templateEvolution.canApply);
+        resultManifestPath = artifacts.resultPath;
+        let finalOutcome = null;
+        let lastWorkflowError;
+        for (let attempt = 1; attempt <= maxWorkflowAttempts; attempt += 1) {
+            try {
+                const mode = threadId ? "resume" : "start";
+                const workflowController = new AbortController();
+                let loggedStartedThreadId = null;
+                const attemptInput = {
+                    ...workflowInput,
+                    signal: workflowController.signal,
+                    onThreadStarted: (startedThreadId) => {
+                        if (loggedStartedThreadId === startedThreadId) {
+                            return;
+                        }
+                        loggedStartedThreadId = startedThreadId;
+                        threadId = startedThreadId;
+                        updateQueueWorkflowTracking(db, item.fileId, {
+                            threadId: startedThreadId,
+                            resultManifestPath: artifacts.resultPath,
+                        });
+                        input.log?.(`${item.fileId}: workflow started mode=${mode} attempt=${attempt}/${maxWorkflowAttempts} thread=${startedThreadId} result=${artifacts.resultPath}`);
+                    },
+                };
+                input.log?.(`${item.fileId}: launching workflow mode=${mode} attempt=${attempt}/${maxWorkflowAttempts} timeout=${Math.ceil(workflowTimeoutMs / 1000)}s result=${artifacts.resultPath}`);
+                const handle = threadId
+                    ? await runWithWorkflowTimeout("resumeWorkflow", workflowTimeoutMs, workflowController, () => workflowRunner.resumeWorkflow(threadId, attemptInput))
+                    : await runWithWorkflowTimeout("startWorkflow", workflowTimeoutMs, workflowController, () => workflowRunner.startWorkflow(attemptInput));
+                threadId = handle.threadId;
+                if (loggedStartedThreadId !== handle.threadId) {
+                    loggedStartedThreadId = handle.threadId;
+                    input.log?.(`${item.fileId}: workflow started mode=${mode} attempt=${attempt}/${maxWorkflowAttempts} thread=${handle.threadId} result=${artifacts.resultPath}`);
+                }
+                updateQueueWorkflowTracking(db, item.fileId, {
+                    threadId: handle.threadId,
+                    resultManifestPath: artifacts.resultPath,
+                });
+                input.log?.(`${item.fileId}: waiting for workflow result thread=${handle.threadId} attempt=${attempt}/${maxWorkflowAttempts} result=${artifacts.resultPath}`);
+                const collectController = new AbortController();
+                const manifest = await runWithWorkflowTimeout("collectResult", workflowTimeoutMs, collectController, () => workflowRunner.collectResult(handle, {
+                    ...workflowInput,
+                    signal: collectController.signal,
+                }));
+                assertTemplateEvolutionAllowed(manifest, templateEvolution);
+                finalOutcome = {
+                    outcome: applyWorkflowManifest(db, item.fileId, manifest, artifacts.resultPath, item.attempts),
+                    manifest,
+                    handleThreadId: handle.threadId,
+                };
+                break;
+            }
+            catch (error) {
+                lastWorkflowError = error;
+                threadId = readPersistedWorkflowThreadId(artifacts.queueItemPath) ?? threadId;
+                if (threadId) {
+                    updateQueueWorkflowTracking(db, item.fileId, {
+                        threadId,
+                        resultManifestPath: artifacts.resultPath,
+                    });
+                }
+                const recoveredManifest = shouldAttemptManifestRecovery(error)
+                    ? readRecoverableWorkflowResult(artifacts.resultPath, threadId)
+                    : null;
+                if (recoveredManifest) {
+                    assertTemplateEvolutionAllowed(recoveredManifest, templateEvolution);
+                    finalOutcome = {
+                        outcome: applyWorkflowManifest(db, item.fileId, recoveredManifest, artifacts.resultPath, item.attempts),
+                        manifest: recoveredManifest,
+                        handleThreadId: recoveredManifest.threadId,
+                    };
+                    input.log?.(`${item.fileId}: recovered persisted workflow result status=${recoveredManifest.status} thread=${recoveredManifest.threadId} ${formatManifestLogFields(recoveredManifest)} result=${artifacts.resultPath} message=${formatWorkflowError(error)}`);
+                    break;
+                }
+                if (!shouldRetryWorkflowAttempt(error, attempt, maxWorkflowAttempts)) {
+                    throw error;
+                }
+                input.log?.(`${item.fileId}: retrying workflow attempt ${attempt + 1}/${maxWorkflowAttempts} thread=${threadId ?? "-"} result=${artifacts.resultPath} message=${formatWorkflowError(error)}`);
+            }
+        }
+        if (!finalOutcome) {
+            throw (lastWorkflowError ?? new AppError("Workflow completed without a result", "runtime"));
+        }
+        input.log?.(`${item.fileId}: ${finalOutcome.outcome.status} thread=${finalOutcome.handleThreadId} ${formatManifestLogFields(finalOutcome.manifest)} result=${artifacts.resultPath}`);
+        return {
+            status: finalOutcome.outcome.status,
+            item: {
+                fileId: item.fileId,
+                status: finalOutcome.outcome.status,
+                pageId: finalOutcome.outcome.pageId,
+                reason: finalOutcome.manifest.reason,
+                threadId: finalOutcome.handleThreadId,
+                decision: finalOutcome.manifest.decision,
+                skillsUsed: finalOutcome.manifest.skillsUsed,
+                createdPageIds: finalOutcome.manifest.createdPageIds,
+                updatedPageIds: finalOutcome.manifest.updatedPageIds,
+                proposedTypeNames: finalOutcome.manifest.proposedTypes.map((entry) => entry.name),
+                resultManifestPath: artifacts.resultPath,
+            },
+        };
+    }
+    catch (error) {
+        const recoveredManifest = shouldAttemptManifestRecovery(error)
+            ? readRecoverableWorkflowResult(resultManifestPath, threadId)
+            : null;
+        if (recoveredManifest && resultManifestPath) {
+            assertTemplateEvolutionAllowed(recoveredManifest, templateEvolution);
+            const recoveredOutcome = applyWorkflowManifest(db, item.fileId, recoveredManifest, resultManifestPath, item.attempts);
+            input.log?.(`${item.fileId}: recovered persisted workflow result after terminal failure status=${recoveredOutcome.status} thread=${recoveredManifest.threadId} ${formatManifestLogFields(recoveredManifest)} result=${resultManifestPath} message=${formatWorkflowError(error)}`);
+            return {
+                status: recoveredOutcome.status,
+                item: {
+                    fileId: item.fileId,
+                    status: recoveredOutcome.status,
+                    pageId: recoveredOutcome.pageId,
+                    reason: recoveredManifest.reason,
+                    threadId: recoveredManifest.threadId,
+                    decision: recoveredManifest.decision,
+                    skillsUsed: recoveredManifest.skillsUsed,
+                    createdPageIds: recoveredManifest.createdPageIds,
+                    updatedPageIds: recoveredManifest.updatedPageIds,
+                    proposedTypeNames: recoveredManifest.proposedTypes.map((entry) => entry.name),
+                    resultManifestPath,
+                },
+            };
+        }
+        const message = formatWorkflowError(error);
+        const failureState = buildQueueFailureState(message, {
+            explicitCode: extractErrorDetailsCode(error),
+            errorType: error instanceof AppError ? error.type : null,
+        });
+        const autoRetryExhausted = failureState.autoRetryEligible && item.attempts >= MAX_QUEUE_ERROR_RETRIES;
+        updateQueueWorkflowError(db, item.fileId, {
+            errorMessage: message,
+            errorCode: failureState.errorCode,
+            retryAfter: failureState.retryAfter,
+            threadId,
+            resultManifestPath,
+            autoRetryExhausted,
+        });
+        input.log?.(`${item.fileId}: error thread=${threadId ?? "-"} result=${resultManifestPath ?? "-"} message=${message}${autoRetryExhausted ? ` autoRetryLimit=${MAX_QUEUE_ERROR_RETRIES}` : ""}`);
+        return {
+            status: "error",
+            item: {
+                fileId: item.fileId,
+                status: "error",
+                pageId: item.resultPageId ?? null,
+                reason: message,
+                threadId,
+                resultManifestPath,
+            },
+        };
+    }
+}
 export function getVaultQueueSnapshot(env = process.env, status) {
     const paths = resolveRuntimePaths(env);
     const config = loadConfig(paths.configPath);
@@ -539,7 +867,6 @@ export async function processVaultQueueBatch(env = process.env, options = {}) {
     const config = loadConfig(paths.configPath);
     const { db } = openDb(paths.dbPath, config, Number.parseInt(env.EMBEDDING_DIMENSIONS ?? "384", 10) || 384);
     try {
-        const items = claimQueueItems(db, options.maxItems ?? agentSettings.batchSize, options.filterFileIds);
         const result = {
             enabled: true,
             processed: 0,
@@ -552,9 +879,10 @@ export async function processVaultQueueBatch(env = process.env, options = {}) {
         const templateEvolution = resolveTemplateEvolutionSettings(env);
         const maxWorkflowAttempts = isInlineRetryCapable(workflowRunner) ? INLINE_WORKFLOW_ATTEMPTS : 1;
         const workflowTimeoutMs = agentSettings.workflowTimeoutSeconds * 1000;
-        if (items.length > 0) {
-            options.log?.(`claimed ${items.length} items: ${items.map((item) => item.fileId).join(", ")}`);
-        }
+        const workerSlots = Math.max(0, options.maxItems ?? agentSettings.batchSize);
+        const attemptedFileIds = new Set();
+        const orderedItems = [];
+        let nextSequence = 0;
         const countOutcome = (status) => {
             if (status === "done") {
                 result.done += 1;
@@ -567,169 +895,61 @@ export async function processVaultQueueBatch(env = process.env, options = {}) {
             }
             result.processed += 1;
         };
-        for (const item of items) {
-            options.log?.(`${item.fileId}: start processing attempt=${item.attempts + 1} queuedAt=${item.queuedAt} thread=${item.threadId ?? "-"}`);
-            const file = fetchVaultFile(db, item.fileId);
-            if (!file) {
-                updateQueueStatus(db, item.fileId, {
-                    status: "error",
-                    processedAt: toOffsetIso(),
-                    errorMessage: `Vault file missing from index: ${item.fileId}`,
-                    incrementAttempts: true,
-                });
-                countOutcome("error");
-                options.log?.(`${item.fileId}: error thread=- result=- message=Vault file missing from index`);
-                result.items.push({
-                    fileId: item.fileId,
-                    status: "error",
-                    reason: "Vault file missing from index",
-                });
-                continue;
+        const claimNextQueueItem = () => {
+            if (options.shouldStop?.() === true) {
+                return null;
             }
-            let threadId = item.threadId ?? null;
-            let resultManifestPath = null;
-            try {
-                const localFilePath = await ensureLocalVaultFile(file, paths.vaultPath, env);
-                const { artifacts, input } = prepareCodexWorkflowInput(paths, item, file, localFilePath, env, templateEvolution.canApply);
-                resultManifestPath = artifacts.resultPath;
-                let finalOutcome = null;
-                let lastWorkflowError;
-                for (let attempt = 1; attempt <= maxWorkflowAttempts; attempt += 1) {
-                    try {
-                        const mode = threadId ? "resume" : "start";
-                        const workflowController = new AbortController();
-                        let loggedStartedThreadId = null;
-                        const attemptInput = {
-                            ...input,
-                            signal: workflowController.signal,
-                            onThreadStarted: (startedThreadId) => {
-                                if (loggedStartedThreadId === startedThreadId) {
-                                    return;
-                                }
-                                loggedStartedThreadId = startedThreadId;
-                                threadId = startedThreadId;
-                                updateQueueWorkflowTracking(db, item.fileId, {
-                                    threadId: startedThreadId,
-                                    resultManifestPath: artifacts.resultPath,
-                                });
-                                options.log?.(`${item.fileId}: workflow started mode=${mode} attempt=${attempt}/${maxWorkflowAttempts} thread=${startedThreadId} result=${artifacts.resultPath}`);
-                            },
-                        };
-                        options.log?.(`${item.fileId}: launching workflow mode=${mode} attempt=${attempt}/${maxWorkflowAttempts} timeout=${agentSettings.workflowTimeoutSeconds}s result=${artifacts.resultPath}`);
-                        const handle = threadId
-                            ? await runWithWorkflowTimeout("resumeWorkflow", workflowTimeoutMs, workflowController, () => workflowRunner.resumeWorkflow(threadId, attemptInput))
-                            : await runWithWorkflowTimeout("startWorkflow", workflowTimeoutMs, workflowController, () => workflowRunner.startWorkflow(attemptInput));
-                        threadId = handle.threadId;
-                        if (loggedStartedThreadId !== handle.threadId) {
-                            loggedStartedThreadId = handle.threadId;
-                            options.log?.(`${item.fileId}: workflow started mode=${mode} attempt=${attempt}/${maxWorkflowAttempts} thread=${handle.threadId} result=${artifacts.resultPath}`);
-                        }
-                        updateQueueWorkflowTracking(db, item.fileId, {
-                            threadId: handle.threadId,
-                            resultManifestPath: artifacts.resultPath,
-                        });
-                        options.log?.(`${item.fileId}: waiting for workflow result thread=${handle.threadId} attempt=${attempt}/${maxWorkflowAttempts} result=${artifacts.resultPath}`);
-                        const collectController = new AbortController();
-                        const manifest = await runWithWorkflowTimeout("collectResult", workflowTimeoutMs, collectController, () => workflowRunner.collectResult(handle, {
-                            ...input,
-                            signal: collectController.signal,
-                        }));
-                        assertTemplateEvolutionAllowed(manifest, templateEvolution);
-                        finalOutcome = {
-                            outcome: applyWorkflowManifest(db, item.fileId, manifest, artifacts.resultPath),
-                            manifest,
-                            handleThreadId: handle.threadId,
-                        };
-                        break;
-                    }
-                    catch (error) {
-                        lastWorkflowError = error;
-                        threadId = readPersistedWorkflowThreadId(artifacts.queueItemPath) ?? threadId;
-                        if (threadId) {
-                            updateQueueWorkflowTracking(db, item.fileId, {
-                                threadId,
-                                resultManifestPath: artifacts.resultPath,
-                            });
-                        }
-                        const recoveredManifest = shouldAttemptManifestRecovery(error)
-                            ? readRecoverableWorkflowResult(artifacts.resultPath, threadId)
-                            : null;
-                        if (recoveredManifest) {
-                            assertTemplateEvolutionAllowed(recoveredManifest, templateEvolution);
-                            finalOutcome = {
-                                outcome: applyWorkflowManifest(db, item.fileId, recoveredManifest, artifacts.resultPath),
-                                manifest: recoveredManifest,
-                                handleThreadId: recoveredManifest.threadId,
-                            };
-                            options.log?.(`${item.fileId}: recovered persisted workflow result status=${recoveredManifest.status} thread=${recoveredManifest.threadId} ${formatManifestLogFields(recoveredManifest)} result=${artifacts.resultPath} message=${formatWorkflowError(error)}`);
-                            break;
-                        }
-                        if (!shouldRetryWorkflowAttempt(error, attempt, maxWorkflowAttempts)) {
-                            throw error;
-                        }
-                        options.log?.(`${item.fileId}: retrying workflow attempt ${attempt + 1}/${maxWorkflowAttempts} thread=${threadId ?? "-"} result=${artifacts.resultPath} message=${formatWorkflowError(error)}`);
-                    }
-                }
-                if (!finalOutcome) {
-                    throw (lastWorkflowError ?? new AppError("Workflow completed without a result", "runtime"));
-                }
-                options.log?.(`${item.fileId}: ${finalOutcome.outcome.status} thread=${finalOutcome.handleThreadId} ${formatManifestLogFields(finalOutcome.manifest)} result=${artifacts.resultPath}`);
-                countOutcome(finalOutcome.outcome.status);
-                result.items.push({
-                    fileId: item.fileId,
-                    status: finalOutcome.outcome.status,
-                    pageId: finalOutcome.outcome.pageId,
-                    reason: finalOutcome.manifest.reason,
-                    threadId: finalOutcome.handleThreadId,
-                    decision: finalOutcome.manifest.decision,
-                    skillsUsed: finalOutcome.manifest.skillsUsed,
-                    createdPageIds: finalOutcome.manifest.createdPageIds,
-                    updatedPageIds: finalOutcome.manifest.updatedPageIds,
-                    proposedTypeNames: finalOutcome.manifest.proposedTypes.map((entry) => entry.name),
-                    resultManifestPath: artifacts.resultPath,
-                });
+            const remainingFilterFileIds = options.filterFileIds?.filter((fileId) => !attemptedFileIds.has(fileId));
+            if (options.filterFileIds && remainingFilterFileIds?.length === 0) {
+                return null;
             }
-            catch (error) {
-                const recoveredManifest = shouldAttemptManifestRecovery(error)
-                    ? readRecoverableWorkflowResult(resultManifestPath, threadId)
-                    : null;
-                if (recoveredManifest && resultManifestPath) {
-                    assertTemplateEvolutionAllowed(recoveredManifest, templateEvolution);
-                    const recoveredOutcome = applyWorkflowManifest(db, item.fileId, recoveredManifest, resultManifestPath);
-                    options.log?.(`${item.fileId}: recovered persisted workflow result after terminal failure status=${recoveredOutcome.status} thread=${recoveredManifest.threadId} ${formatManifestLogFields(recoveredManifest)} result=${resultManifestPath} message=${formatWorkflowError(error)}`);
-                    countOutcome(recoveredOutcome.status);
-                    result.items.push({
-                        fileId: item.fileId,
-                        status: recoveredOutcome.status,
-                        pageId: recoveredOutcome.pageId,
-                        reason: recoveredManifest.reason,
-                        threadId: recoveredManifest.threadId,
-                        decision: recoveredManifest.decision,
-                        skillsUsed: recoveredManifest.skillsUsed,
-                        createdPageIds: recoveredManifest.createdPageIds,
-                        updatedPageIds: recoveredManifest.updatedPageIds,
-                        proposedTypeNames: recoveredManifest.proposedTypes.map((entry) => entry.name),
-                        resultManifestPath,
-                    });
-                    continue;
+            const item = claimQueueItems(db, 1, {
+                filterFileIds: remainingFilterFileIds,
+                excludeFileIds: attemptedFileIds,
+            })[0];
+            if (!item) {
+                return null;
+            }
+            attemptedFileIds.add(item.fileId);
+            options.log?.(`claimed 1 items: ${item.fileId}`);
+            return {
+                sequence: nextSequence++,
+                item,
+            };
+        };
+        const workerCount = options.filterFileIds
+            ? Math.min(workerSlots, options.filterFileIds.length)
+            : workerSlots;
+        const workers = Array.from({ length: workerCount }, async () => {
+            while (true) {
+                const claimed = claimNextQueueItem();
+                if (!claimed) {
+                    return;
                 }
-                const message = formatWorkflowError(error);
-                updateQueueWorkflowError(db, item.fileId, {
-                    errorMessage: message,
-                    threadId,
-                    resultManifestPath,
+                const processed = await processClaimedQueueItem({
+                    db,
+                    env,
+                    paths,
+                    item: claimed.item,
+                    log: options.log,
+                    workflowRunner,
+                    templateEvolution,
+                    maxWorkflowAttempts,
+                    workflowTimeoutMs,
                 });
-                options.log?.(`${item.fileId}: error thread=${threadId ?? "-"} result=${resultManifestPath ?? "-"} message=${message}`);
-                countOutcome("error");
-                result.items.push({
-                    fileId: item.fileId,
-                    status: "error",
-                    pageId: item.resultPageId ?? null,
-                    reason: message,
-                    threadId,
-                    resultManifestPath,
+                countOutcome(processed.status);
+                orderedItems.push({
+                    sequence: claimed.sequence,
+                    item: processed.item,
                 });
             }
+        });
+        await Promise.all(workers);
+        result.items = orderedItems
+            .sort((left, right) => left.sequence - right.sequence)
+            .map((entry) => entry.item);
+        if (result.items.length > 0) {
+            options.log?.(`processed ${result.items.length} queue items with workerPool=${workerCount}`);
         }
         return result;
     }

package/dist/core/vault.js CHANGED Viewed

@@ -441,6 +441,7 @@ export function syncVaultIndex(db, currentFiles, syncId) {
         decision,
         result_manifest_path,
         last_error_at,
+        last_error_code,
         retry_after,
         created_page_ids,
         updated_page_ids,
@@ -468,6 +469,7 @@ export function syncVaultIndex(db, currentFiles, syncId) {
         NULL,
         NULL,
         NULL,
+        NULL,
         NULL
       )
       ON CONFLICT(file_id) DO UPDATE SET
@@ -493,6 +495,10 @@ export function syncVaultIndex(db, currentFiles, syncId) {
           WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.error_message
           ELSE NULL
         END,
+        attempts = CASE
+          WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.attempts
+          ELSE 0
+        END,
         thread_id = CASE
           WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.thread_id
           ELSE NULL
@@ -513,6 +519,10 @@ export function syncVaultIndex(db, currentFiles, syncId) {
           WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.last_error_at
           ELSE NULL
         END,
+        last_error_code = CASE
+          WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.last_error_code
+          ELSE NULL
+        END,
         retry_after = CASE
           WHEN vault_processing_queue.status = 'processing' THEN vault_processing_queue.retry_after
           ELSE NULL

package/dist/daemon/server.js CHANGED Viewed

@@ -511,6 +511,7 @@ export async function runDaemonServer(options) {
         while (!stopping) {
             const batchResult = await processVaultQueueBatch(env, {
                 log: (message) => logInfo(`queue ${message}`),
+                shouldStop: () => stopping,
             });
             if (!batchResult.enabled) {
                 break;

package/dist/operations/dashboard.js CHANGED Viewed

@@ -254,6 +254,7 @@ function buildQueueTiming(item) {
         startedAt: item.startedAt ?? null,
         processedAt: item.processedAt,
         lastErrorAt: item.lastErrorAt ?? null,
+        lastErrorCode: item.lastErrorCode ?? null,
         retryAfter: item.retryAfter ?? null,
         queueAgeMs: Number.isFinite(queuedAt) ? now - queuedAt : null,
         waitDurationMs: Number.isFinite(claimedAt) && Number.isFinite(queuedAt) ? claimedAt - queuedAt : null,
@@ -271,8 +272,10 @@ function buildQueueListItem(item) {
         status: item.status,
         priority: item.priority,
         attempts: item.attempts,
+        autoRetryExhausted: item.autoRetryExhausted ?? false,
         resultPageId: item.resultPageId,
         errorMessage: item.errorMessage,
+        lastErrorCode: item.lastErrorCode ?? null,
         threadId: item.threadId ?? null,
         decision: item.decision ?? null,
         workflowVersion: item.workflowVersion ?? null,
@@ -790,11 +793,13 @@ export function retryDashboardQueueItem(env = process.env, fileId) {
           processed_at = NULL,
           result_page_id = NULL,
           error_message = NULL,
+          attempts = 0,
           thread_id = NULL,
           workflow_version = NULL,
           decision = NULL,
           result_manifest_path = NULL,
           last_error_at = NULL,
+          last_error_code = NULL,
           retry_after = NULL,
           created_page_ids = NULL,
           updated_page_ids = NULL,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@biaoo/tiangong-wiki",
-  "version": "0.3.2",
+  "version": "0.3.3",
   "description": "Local-first wiki index and query engine for Markdown knowledge pages (Tiangong Wiki).",
   "type": "module",
   "publishConfig": {

package/references/troubleshooting.md CHANGED Viewed

@@ -87,12 +87,14 @@ The agent uses [Codex SDK](https://www.npmjs.com/package/@openai/codex-sdk) to p
 | `WIKI_AGENT_BASE_URL` | No | LLM API base URL (e.g. `https://api.openai.com/v1`). When set, overrides global Codex config |
 | `WIKI_AGENT_API_KEY` | If enabled | API key for the LLM provider |
 | `WIKI_AGENT_MODEL` | No | Model name (e.g. `gpt-5.4`, `Qwen/Qwen3.5-397B-A17B-GPTQ-Int4`) |
-| `WIKI_AGENT_BATCH_SIZE` | No | Max concurrent vault items per batch (default: `5`) |
+| `WIKI_AGENT_BATCH_SIZE` | No | Max concurrent vault queue workers per cycle (default: `5`) |
 | `WIKI_AGENT_SANDBOX_MODE` | No | Codex sandbox mode: `danger-full-access` (default) or `workspace-write` |
 | `WIKI_PARSER_SKILLS` | No | Comma-separated parser skill list (e.g. `pdf,docx,pptx,xlsx`) |
 `tiangong-wiki setup` now prompts for `WIKI_AGENT_SANDBOX_MODE` when automatic vault processing is enabled. The default is `danger-full-access`, and the setup wizard highlights that this mode grants full runtime access.
+Queue items that fail workflow execution are auto-retried up to 3 times. After that they remain in `error` until you manually retry them from the dashboard / queue tooling, or a later vault sync requeues the file because the source changed.
 ---
 ## Common Issues