npm - aiex-cli - Versions diffs - 0.0.5-beta.6 → 0.0.6-beta.1 - Mend

aiex-cli 0.0.5-beta.6 → 0.0.6-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +0 -11
package/dist/cli.mjs +197 -900
package/dist/{doctor-collector-BpqhXNcO.mjs → doctor-collector-hWEvJ4lw.mjs} +89 -44
package/dist/index.d.mts +88 -91
package/dist/index.mjs +1 -1
package/dist/web/assets/AISettings-BlyTFIIy.js +272 -0
package/dist/web/assets/{DataBrowser-BGkZb9FV.js → DataBrowser-GAA-pGq0.js} +1 -1
package/dist/web/assets/ExtractionViewer-DqIrBGNK.js +1 -0
package/dist/web/assets/{api-client-gQAAOw0v.js → api-client-b4ZBXpNH.js} +1 -1
package/dist/web/assets/index-CvY9TGny.css +2 -0
package/dist/web/assets/{index-BQKZKzzP.js → index-Dlze68g1.js} +3 -3
package/dist/web/index.html +3 -3
package/dist/{zh-CN-DkillGHx.mjs → zh-CN-Qcn0DHFh.mjs} +22 -16
package/package.json +1 -3
package/dist/web/assets/AISettings-sVI4PTNB.js +0 -264
package/dist/web/assets/ExtractionViewer-DNrkSECj.js +0 -1
package/dist/web/assets/index-BU58oIRd.css +0 -2

package/dist/cli.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { C as name, D as doctorDiagnosticsTableRows, O as formatDoctorDiagnosticsJson, S as description, T as version, _ as PLACEHOLDER_SCHEMA, a as parseJsonSchema, b as createConfig, c as recognizeImageText, d as getDefaultAIConfig, f as readAIConfig, g as DEFAULT_PROMPT_CONFIG, h as DEFAULT_MINERU_CONFIG, i as JsonSchemaDefinitionSchema, l as initI18n, m as DEFAULT_MINERU_API_CONFIG, n as createMigrationConfig, o as toSnakeCase, p as writeAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as t, v as PLACEHOLDER_TEXT, w as package_default, x as seedConfig, y as AIConfigSchema } from "./doctor-collector-BpqhXNcO.mjs";
+import { A as doctorDiagnosticsTableRows, C as createConfig, D as package_default, E as name, O as version, S as AIConfigSchema, T as description, _ as DEFAULT_MINERU_API_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_SCHEMA, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_MINERU_CONFIG, w as seedConfig, x as PLACEHOLDER_TEXT, y as DEFAULT_PROMPT_CONFIG } from "./doctor-collector-hWEvJ4lw.mjs";
 import { createRequire } from "node:module";
 import fs from "node:fs/promises";
 import os from "node:os";
@@ -17,14 +17,13 @@ import Database from "better-sqlite3";
 import pc from "picocolors";
 import { Buffer } from "node:buffer";
 import * as XLSX from "xlsx";
-import { getEncoding } from "js-tiktoken";
 import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
 import { APICallError, Output, generateText, jsonSchema } from "ai";
 import pRetry from "p-retry";
+import mime from "mime";
 import { jsonrepair } from "jsonrepair";
 import { LangfuseSpanProcessor } from "@langfuse/otel";
 import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
-import { marked } from "marked";
 import crypto from "node:crypto";
 import { Client, extractNotionId } from "@notionhq/client";
 import { execa } from "execa";
@@ -12860,6 +12859,28 @@ async function withRetry(fn, onRetry, maxRetries = 5) {
 	});
 }
+//#endregion
+//#region src/core/ai-extraction/file-utils.ts
+function detectMimeType(filePath) {
+	return mime.getType(filePath) ?? "application/octet-stream";
+}
+async function readFilePart(filePath) {
+	const mimeStr = detectMimeType(filePath);
+	const buffer = await fs.readFile(filePath);
+	const name$1 = path.basename(filePath);
+	if (mimeStr.startsWith("image/")) return {
+		type: "image",
+		image: buffer,
+		mimeType: mimeStr
+	};
+	return {
+		type: "file",
+		data: buffer,
+		mediaType: mimeStr,
+		filename: name$1
+	};
+}
 //#endregion
 //#region src/core/ai-extraction/json-utils.ts
 function parseJsonLike(text$1) {
@@ -12920,10 +12941,25 @@ function filterCompatible(models, inputTokens, outputTokens) {
 	});
 }
 function selectModel(input) {
-	const { models, inputTokens, outputTokens } = input;
+	const { models, isImage, fileName, inputTokens, outputTokens } = input;
 	if (models.length === 0) throw new Error(t("errors.ai.noModels"));
 	let candidates = filterCompatible(models, inputTokens, outputTokens);
 	if (candidates.length === 0) candidates = models;
+	if (isImage) {
+		const visionModel = candidates.find((m) => m.capabilities.vision);
+		if (!visionModel) {
+			const hint = fileName ? ` (${fileName})` : "";
+			const msg = inputTokens ? t("errors.ai.noVisionModelContext", {
+				tokens: inputTokens,
+				hint
+			}) : t("errors.ai.noVisionModel", { hint });
+			throw new Error(msg + t("errors.ai.addSuitableModel"));
+		}
+		return {
+			name: visionModel.name,
+			capabilities: visionModel.capabilities
+		};
+	}
 	const soModel = candidates.find((m) => m.capabilities.structuredOutput);
 	if (soModel) return {
 		name: soModel.name,
@@ -12937,46 +12973,36 @@ function selectModel(input) {
 //#endregion
 //#region src/core/ai-extraction/prompt-generator.ts
-const CAMEL_CASE_BOUNDARY_RE = /([a-z0-9])([A-Z])/g;
-const IDENTIFIER_SEPARATOR_RE = /[\s_-]+/;
-function splitIdentifier(name$1) {
-	return name$1.replace(CAMEL_CASE_BOUNDARY_RE, "$1 $2").split(IDENTIFIER_SEPARATOR_RE).map((part) => part.trim().toLowerCase()).filter(Boolean);
-}
-function propertyToDescription(name$1, prop, indent = "", required = false) {
+function propertyToDescription(name$1, prop, indent = "") {
 	const lines = [];
 	let typeStr = prop.type;
 	if (prop.type === "array" && prop.items) typeStr = `array of ${prop.items.type}`;
-	lines.push(`${indent}- ${name$1}: ${typeStr}${required ? " (required)" : ""}`);
-	const terms = splitIdentifier(name$1);
-	if (terms.length > 1) lines.push(`${indent}  search terms: ${terms.join(", ")}`);
-	if (prop.description) lines.push(`${indent}  description: ${prop.description}`);
+	lines.push(`${indent}- ${name$1}: ${typeStr}`);
 	if (prop.minLength !== void 0 || prop.maxLength !== void 0) lines.push(`${indent}  length: ${prop.minLength ?? 0} - ${prop.maxLength ?? "unlimited"}`);
-	if (prop.minimum !== void 0 || prop.maximum !== void 0) lines.push(`${indent}  range: ${prop.minimum ?? "-∞"} - ${prop.maximum ?? "+∞"}`);
 	if (prop.format) lines.push(`${indent}  format: ${prop.format}`);
 	if (prop.unique) lines.push(`${indent}  unique: true`);
 	if (prop.default !== void 0) lines.push(`${indent}  default: ${JSON.stringify(prop.default)}`);
 	return lines.join("\n");
 }
-function nestedPropertyToDescription(name$1, prop, indent = "", requiredFields = []) {
+function nestedPropertyToDescription(name$1, prop, indent = "") {
 	const lines = [];
-	const isRequired = requiredFields.includes(name$1);
 	if (prop.nested?.enabled && prop.type === "object") {
 		const relation = prop.nested.relation || "has-one";
-		lines.push(`${indent}- ${name$1}: object (related table, ${relation})${isRequired ? " (required)" : ""}`);
-		if (prop.properties) for (const [childName, childProp] of Object.entries(prop.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}  `, prop.required ?? []));
+		lines.push(`${indent}- ${name$1}: object (related table, ${relation})`);
+		if (prop.properties) for (const [childName, childProp] of Object.entries(prop.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}  `));
 		return lines.join("\n");
 	}
 	if (prop.type === "array" && prop.items?.nested?.enabled) {
 		const relation = prop.items.nested.relation || "has-many";
-		lines.push(`${indent}- ${name$1}: array of object (related table, ${relation})${isRequired ? " (required)" : ""}`);
-		if (prop.items.properties) for (const [childName, childProp] of Object.entries(prop.items.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}  `, prop.items.required ?? []));
+		lines.push(`${indent}- ${name$1}: array of object (related table, ${relation})`);
+		if (prop.items.properties) for (const [childName, childProp] of Object.entries(prop.items.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}  `));
 		return lines.join("\n");
 	}
-	lines.push(propertyToDescription(name$1, prop, indent, isRequired));
-	if (prop.type === "object" && prop.properties) for (const [childName, childProp] of Object.entries(prop.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}  `, prop.required ?? []));
+	lines.push(propertyToDescription(name$1, prop, indent));
+	if (prop.type === "object" && prop.properties) for (const [childName, childProp] of Object.entries(prop.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}  `));
 	if (prop.type === "array" && prop.items?.properties && !prop.items?.nested?.enabled) {
 		lines.push(`${indent}  item fields:`);
-		for (const [childName, childProp] of Object.entries(prop.items.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}    `, prop.items.required ?? []));
+		for (const [childName, childProp] of Object.entries(prop.items.properties)) lines.push(nestedPropertyToDescription(childName, childProp, `${indent}    `));
 	}
 	return lines.join("\n");
 }
@@ -12988,7 +13014,7 @@ function schemaToDescription(schema) {
 	lines.push("Fields:");
 	for (const [name$1, prop] of Object.entries(schema.properties)) {
 		const property = prop;
-		lines.push(nestedPropertyToDescription(name$1, property, "", schema.required ?? []));
+		lines.push(nestedPropertyToDescription(name$1, property));
 	}
 	if (schema.examples && schema.examples.length > 0) {
 		lines.push("");
@@ -13033,6 +13059,33 @@ function generatePromptSnapshot(schema, promptConfig = DEFAULT_PROMPT_CONFIG) {
 	].join("\n");
 }
+//#endregion
+//#region src/core/ai-extraction/snapshot.ts
+const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
+const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
+async function loadPromptSnapshot(aiexDir, tableName) {
+	const snapshotPath = path.join(aiexDir, "extracted", `${tableName}.prompt.md`);
+	try {
+		const content = await fs.readFile(snapshotPath, "utf-8");
+		const systemMatch = content.match(SYSTEM_PROMPT_REGEX);
+		const userMatch = content.match(USER_PROMPT_REGEX);
+		if (systemMatch && userMatch) return {
+			system: systemMatch[1].trim(),
+			user: userMatch[1].trim()
+		};
+	} catch {}
+	return null;
+}
+async function savePromptSnapshot(schema, aiexDir) {
+	const content = generatePromptSnapshot(schema, (await readAIConfig(aiexDir))?.prompt ?? DEFAULT_PROMPT_CONFIG);
+	const outputDir = path.join(aiexDir, "extracted");
+	await fs.mkdir(outputDir, { recursive: true });
+	const fileName = `${schema.table.name}.prompt.md`;
+	const outputPath = path.join(outputDir, fileName);
+	await fs.writeFile(outputPath, content);
+	return outputPath;
+}
 //#endregion
 //#region src/core/ai-extraction/telemetry.ts
 let langfuseInitialized = false;
@@ -13075,7 +13128,7 @@ function propertyToExtractionSchema(property) {
 	}
 	return { type: nullableType(property.type) };
 }
-function isRecord$2(value) {
+function isRecord(value) {
 	return typeof value === "object" && value !== null && !Array.isArray(value);
 }
 function schemaToExtractionOutputSchema(schema) {
@@ -13113,7 +13166,7 @@ function validatePropertyValue(path$1, property, value, issues) {
 			}
 			return;
 		case "object":
-			if (!isRecord$2(value)) {
+			if (!isRecord(value)) {
 				issues.push(`${path$1}: expected object or null`);
 				return;
 			}
@@ -13136,7 +13189,7 @@ function validateProperties(basePath, properties, data, issues) {
 	}
 }
 function validateExtractedData(schema, data) {
-	if (!isRecord$2(data)) return {
+	if (!isRecord(data)) return {
 		success: false,
 		error: "Extracted data must be a JSON object."
 	};
@@ -13153,11 +13206,13 @@ function validateExtractedData(schema, data) {
 //#region src/core/ai-extraction/extractor.ts
 const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
 async function extractStructuredData(input) {
-	const { config, schema, text: text$1, modelOverride } = input;
+	const { config, schema, text: text$1, aiexDir, file, modelOverride } = input;
 	if (!config.provider.apiKey) return {
 		success: false,
 		error: t("errors.ai.apiKeyMissing")
 	};
+	const useFileContent = !!file;
+	const isImageFile = useFileContent && detectMimeType(file).startsWith("image/");
 	const inputTokens = text$1 ? Math.ceil(text$1.length / 2) : void 0;
 	const fieldCount = schema.properties ? Object.keys(schema.properties).length : 0;
 	const outputTokens = fieldCount > 0 ? fieldCount * 80 : void 0;
@@ -13165,6 +13220,8 @@ async function extractStructuredData(input) {
 	try {
 		selected = modelOverride ?? selectModel({
 			models: config.provider.models,
+			isImage: isImageFile,
+			fileName: file,
 			inputTokens,
 			outputTokens
 		});
@@ -13184,7 +13241,18 @@ async function extractStructuredData(input) {
 			apiKey: config.provider.apiKey,
 			supportsStructuredOutputs: useStructuredOutput
 		});
-		const { system, user } = generateExtractionPrompt(schema, text$1, config.prompt ?? DEFAULT_PROMPT_CONFIG);
+		let system;
+		let user;
+		const snapshot = await loadPromptSnapshot(aiexDir, schema.table.name);
+		const promptText = file ? PLACEHOLDER_TEXT : text$1;
+		if (snapshot) {
+			system = snapshot.system;
+			user = snapshot.user.replaceAll(PLACEHOLDER_TEXT, promptText);
+		} else {
+			const generated = generateExtractionPrompt(schema, promptText, config.prompt ?? DEFAULT_PROMPT_CONFIG);
+			system = generated.system;
+			user = generated.user;
+		}
 		const outputSchema = jsonSchema(schemaToExtractionOutputSchema(schema));
 		const timeoutMs = (config.provider.timeout ?? 300) * 1e3;
 		let systemPrompt = system;
@@ -13199,16 +13267,38 @@ async function extractStructuredData(input) {
 			let parseError;
 			let validationError;
 			try {
-				const textOpts = {
-					model: provider.chatModel(selected.name),
-					system: systemPrompt,
-					prompt: userPrompt,
-					abortSignal: AbortSignal.timeout(timeoutMs),
-					maxRetries: 0,
-					experimental_telemetry: { isEnabled: useTelemetry }
-				};
-				if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
-				result = await withRetry(() => generateText(textOpts), input.onRetry);
+				if (useFileContent) {
+					const filePart = await readFilePart(file);
+					const fileName = filePart.type === "file" ? filePart.filename : path.basename(file);
+					const contentParts = [{
+						type: "text",
+						text: userPrompt.includes(PLACEHOLDER_TEXT) ? userPrompt.replaceAll(PLACEHOLDER_TEXT, text$1 || `Data is contained in the attached file: ${fileName}`) : userPrompt
+					}, filePart];
+					const fileOpts = {
+						model: provider.chatModel(selected.name),
+						system: systemPrompt,
+						messages: [{
+							role: "user",
+							content: contentParts
+						}],
+						abortSignal: AbortSignal.timeout(timeoutMs),
+						maxRetries: 0,
+						experimental_telemetry: { isEnabled: useTelemetry }
+					};
+					if (useStructuredOutput) fileOpts.output = Output.object({ schema: outputSchema });
+					result = await withRetry(() => generateText(fileOpts), input.onRetry);
+				} else {
+					const textOpts = {
+						model: provider.chatModel(selected.name),
+						system: systemPrompt,
+						prompt: userPrompt,
+						abortSignal: AbortSignal.timeout(timeoutMs),
+						maxRetries: 0,
+						experimental_telemetry: { isEnabled: useTelemetry }
+					};
+					if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
+					result = await withRetry(() => generateText(textOpts), input.onRetry);
+				}
 				if (result.usage) {
 					totalPromptTokens += result.usage.inputTokens ?? 0;
 					totalCompletionTokens += result.usage.outputTokens ?? 0;
@@ -13224,16 +13314,27 @@ async function extractStructuredData(input) {
 			}
 			if (!parseError && data !== void 0) {
 				const validation = validateExtractedData(schema, data);
-				if (validation.success) return {
-					success: true,
-					data,
-					tokensUsed: {
-						prompt: totalPromptTokens,
-						completion: totalCompletionTokens,
-						total: totalPromptTokens + totalCompletionTokens
-					}
-				};
-				else validationError = validation.error;
+				if (validation.success) {
+					const outputDir = path.resolve(aiexDir, config.extraction.outputDir.replace(".aiex/", ""));
+					await fs.mkdir(outputDir, { recursive: true });
+					const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
+					const outputFileName = `${schema.table.name}-${timestamp}.json`;
+					const outputPath = path.join(outputDir, outputFileName);
+					await writeFile(outputPath, data, {
+						spaces: 2,
+						EOL: "\n"
+					});
+					return {
+						success: true,
+						outputPath,
+						data,
+						tokensUsed: {
+							prompt: totalPromptTokens,
+							completion: totalCompletionTokens,
+							total: totalPromptTokens + totalCompletionTokens
+						}
+					};
+				} else validationError = validation.error;
 			}
 			const errorMsg = parseError || validationError || "Unknown validation error";
 			lastError = errorMsg;
@@ -13244,14 +13345,11 @@ async function extractStructuredData(input) {
 CRITICAL RULES:
 1. Only correct the fields that failed validation.
 2. Preserve all other correctly extracted fields and their values exactly.
-3. Use only values supported by the original text. If a value cannot be confirmed, set it to null.
-4. Remove any fields not defined by the JSON Schema.
-5. Normalize values to the expected JSON type without changing the intended meaning.
-6. Return ONLY the corrected JSON object. No explanations, no markdown blocks other than JSON.`;
+3. Return ONLY the corrected JSON object. No explanations, no markdown blocks other than JSON.`;
 				userPrompt = `The JSON data you generated previously failed validation. Please correct it.
 [Original Text]
-${text$1 || "Original text is empty."}
+${text$1 || "Data is contained in the attached file."}
 [JSON Schema Definition]
 ${JSON.stringify(schemaToExtractionOutputSchema(schema), null, 2)}
@@ -13262,11 +13360,6 @@ ${invalidJson}
 [Validation Error Details]
 ${errorMsg}
-Correction checklist:
-- Fix each field path mentioned in the validation error.
-- Keep schema-valid fields unchanged.
-- Do not invent missing facts; use null when the original text does not support a value.
 Please output the corrected JSON object now:`;
 			}
 		}
@@ -13419,343 +13512,6 @@ function insertExtractedData(db, schema, data) {
 	}
 }
-//#endregion
-//#region src/core/ai-extraction/json-merger.ts
-function isRecord$1(value) {
-	return typeof value === "object" && value !== null && !Array.isArray(value);
-}
-function stableKey(value) {
-	if (!isRecord$1(value)) return JSON.stringify(value);
-	return JSON.stringify(Object.keys(value).sort().reduce((acc, key) => {
-		acc[key] = value[key];
-		return acc;
-	}, {}));
-}
-function isBlankString(value) {
-	return typeof value === "string" && value.trim() === "";
-}
-function isPlaceholderString$1(value) {
-	if (typeof value !== "string") return false;
-	const normalized = value.trim().toLowerCase();
-	return normalized === "" || normalized === "n/a" || normalized === "na" || normalized === "none" || normalized === "null" || normalized === "unknown" || normalized === "tbd" || normalized === "-" || normalized === "--";
-}
-function pickPrimitiveValue(values) {
-	const meaningful = values.filter((v) => !isBlankString(v) && !isPlaceholderString$1(v));
-	if (meaningful.length === 0) return null;
-	if (typeof meaningful[0] === "boolean") {
-		const trueCount = meaningful.filter(Boolean).length;
-		return trueCount >= meaningful.length - trueCount;
-	}
-	return meaningful[0];
-}
-function mergePropertyValue(property, values) {
-	const nonNullValues = values.filter((v) => v !== null && v !== void 0);
-	if (nonNullValues.length === 0) return null;
-	if (property.type === "array") {
-		const concatenated = [];
-		const seen = /* @__PURE__ */ new Set();
-		for (const val of nonNullValues) if (Array.isArray(val)) for (const item of val) {
-			const key = stableKey(item);
-			if (!seen.has(key)) {
-				seen.add(key);
-				concatenated.push(item);
-			}
-		}
-		return concatenated;
-	}
-	if (property.type === "object") {
-		const childProperties = property.properties;
-		if (!childProperties) {
-			const mergedObj$1 = {};
-			for (const val of nonNullValues) if (isRecord$1(val)) Object.assign(mergedObj$1, val);
-			return mergedObj$1;
-		}
-		const mergedObj = {};
-		for (const [propName, propDef] of Object.entries(childProperties)) mergedObj[propName] = mergePropertyValue(propDef, nonNullValues.map((v) => isRecord$1(v) ? v[propName] : void 0));
-		return mergedObj;
-	}
-	return pickPrimitiveValue(nonNullValues);
-}
-/**
-* Merges structured extraction outputs from multiple document chunks
-* according to the schema properties.
-*/
-function mergeExtractionResults(schema, results) {
-	if (results.length === 0) return {};
-	if (results.length === 1) return results[0];
-	const merged = {};
-	for (const [propName, propDef] of Object.entries(schema.properties)) {
-		if (propDef.primary && propDef.autoIncrement) continue;
-		merged[propName] = mergePropertyValue(propDef, results.map((r) => r[propName]));
-	}
-	return merged;
-}
-//#endregion
-//#region src/core/ai-extraction/snapshot.ts
-async function savePromptSnapshot(schema, aiexDir) {
-	const content = generatePromptSnapshot(schema, (await readAIConfig(aiexDir))?.prompt ?? DEFAULT_PROMPT_CONFIG);
-	const outputDir = path.join(aiexDir, "extracted");
-	await fs.mkdir(outputDir, { recursive: true });
-	const fileName = `${schema.table.name}.prompt.md`;
-	const outputPath = path.join(outputDir, fileName);
-	await fs.writeFile(outputPath, content);
-	return outputPath;
-}
-//#endregion
-//#region src/core/ai-extraction/text-splitter.ts
-const encoding$1 = getEncoding("cl100k_base");
-const MAX_OVERLAP_RATIO = .15;
-const MAX_EFFECTIVE_OVERLAP_TOKENS = 1200;
-const TABLE_SEPARATOR_CELL_RE = /^:?-{3,}:?$/;
-const LEADING_TABLE_PIPE_RE = /^\|/;
-const TRAILING_TABLE_PIPE_RE = /\|$/;
-function countTokens(text$1) {
-	return encoding$1.encode(text$1).length;
-}
-function calculateChunkTokenBudget(options = {}) {
-	const configuredMaxTokens = options.configuredMaxTokens ?? 8e3;
-	const modelMaxTokens = options.modelMaxTokens;
-	if (!modelMaxTokens) return configuredMaxTokens;
-	const outputReserveTokens = options.outputReserveTokens ?? 2e3;
-	const promptReserveTokens = options.promptReserveTokens ?? 1200;
-	const safetyBufferTokens = options.safetyBufferTokens ?? Math.min(1e3, Math.floor(modelMaxTokens * .1));
-	const available = modelMaxTokens - outputReserveTokens - promptReserveTokens - safetyBufferTokens;
-	return Math.max(512, Math.min(configuredMaxTokens, available));
-}
-function formatHeadingContext(headings) {
-	const active = headings.filter(Boolean);
-	if (active.length === 0) return "";
-	return `> **[Context]** Belong to: ${active.join(" > ")}\n\n`;
-}
-function getMetadata(headings) {
-	return {
-		h1: headings[0] || void 0,
-		h2: headings[1] || void 0,
-		h3: headings[2] || void 0,
-		h4: headings[3] || void 0
-	};
-}
-function getHeadingPath(metadata) {
-	return [
-		metadata.h1,
-		metadata.h2,
-		metadata.h3,
-		metadata.h4
-	].filter(Boolean);
-}
-function finalizeChunks(chunks, sourceText) {
-	let searchStart = 0;
-	const totalChunks = chunks.length;
-	return chunks.map((chunk, index) => {
-		const tokenCount = countTokens(chunk.pageContent);
-		let charStart = sourceText.indexOf(chunk.pageContent, searchStart);
-		if (charStart === -1) charStart = sourceText.indexOf(chunk.pageContent);
-		const charEnd = charStart >= 0 ? charStart + chunk.pageContent.length : void 0;
-		if (charStart >= 0 && charEnd !== void 0) searchStart = charEnd;
-		return {
-			...chunk,
-			chunkIndex: index,
-			totalChunks,
-			tokenCount,
-			headingPath: getHeadingPath(chunk.metadata),
-			charStart: charStart >= 0 ? charStart : void 0,
-			charEnd
-		};
-	});
-}
-function getEffectiveOverlapTokens(maxTokens, overlapTokens) {
-	return Math.floor(Math.min(overlapTokens, Math.max(64, maxTokens * MAX_OVERLAP_RATIO), MAX_EFFECTIVE_OVERLAP_TOKENS));
-}
-function splitMarkdownTable(tableText, maxTokens) {
-	if (countTokens(tableText) <= maxTokens) return [tableText];
-	const lines = tableText.split("\n");
-	const headerIndex = lines.findIndex((line) => line.trim().startsWith("|"));
-	const separatorIndex = lines.findIndex((line, index) => {
-		if (index <= headerIndex) return false;
-		const cells = line.trim().replace(LEADING_TABLE_PIPE_RE, "").replace(TRAILING_TABLE_PIPE_RE, "").split("|").map((cell) => cell.trim());
-		return cells.length > 0 && cells.every((cell) => TABLE_SEPARATOR_CELL_RE.test(cell));
-	});
-	if (headerIndex === -1 || separatorIndex === -1) return splitTextRecursively(tableText, maxTokens, ["\n"]);
-	const prefix = lines.slice(0, headerIndex);
-	const header = lines[headerIndex];
-	const separator = lines[separatorIndex];
-	const rows = lines.slice(separatorIndex + 1).filter((line) => line.trim() !== "");
-	const chunks = [];
-	let currentRows = [];
-	const buildTable = (tableRows) => {
-		return [
-			...prefix,
-			header,
-			separator,
-			...tableRows
-		].join("\n");
-	};
-	for (const row of rows) {
-		const candidateRows = [...currentRows, row];
-		if (currentRows.length > 0 && countTokens(buildTable(candidateRows)) > maxTokens) {
-			chunks.push(buildTable(currentRows));
-			currentRows = [row];
-		} else currentRows = candidateRows;
-	}
-	if (currentRows.length > 0) chunks.push(buildTable(currentRows));
-	return chunks.length > 0 ? chunks : [tableText];
-}
-/**
-* Splits text recursively using a list of separators.
-* Preserves the separators when re-joining.
-*/
-function splitTextRecursively(text$1, maxTokens, separators = [
-	"\n\n",
-	"\n",
-	"。",
-	". ",
-	" "
-]) {
-	if (countTokens(text$1) <= maxTokens) return [text$1];
-	if (separators.length === 0) {
-		const chunks = [];
-		let current = "";
-		for (const char of text$1) if (countTokens(current + char) > maxTokens) {
-			chunks.push(current);
-			current = char;
-		} else current += char;
-		if (current) chunks.push(current);
-		return chunks;
-	}
-	const separator = separators[0];
-	const nextSeparators = separators.slice(1);
-	const parts = text$1.split(separator);
-	const result = [];
-	let currentChunk = [];
-	let currentChunkTokens = 0;
-	for (let i = 0; i < parts.length; i++) {
-		const part = parts[i];
-		const itemText = part + (i < parts.length - 1 ? separator : "");
-		const partTokens = countTokens(itemText);
-		if (partTokens > maxTokens) {
-			if (currentChunk.length > 0) {
-				result.push(currentChunk.join(""));
-				currentChunk = [];
-				currentChunkTokens = 0;
-			}
-			const subParts = splitTextRecursively(part, maxTokens, nextSeparators);
-			for (let j = 0; j < subParts.length; j++) {
-				const finalSub = subParts[j] + (j === subParts.length - 1 && i < parts.length - 1 ? separator : "");
-				result.push(finalSub);
-			}
-		} else if (currentChunkTokens + partTokens > maxTokens) {
-			result.push(currentChunk.join(""));
-			currentChunk = [itemText];
-			currentChunkTokens = partTokens;
-		} else {
-			currentChunk.push(itemText);
-			currentChunkTokens += partTokens;
-		}
-	}
-	if (currentChunk.length > 0) result.push(currentChunk.join(""));
-	return result;
-}
-/**
-* Splits a Markdown document into chunks based on heading contexts, AST block parsing, and token limits.
-* Protects tables, list items, and code blocks from being broken.
-*/
-function splitMarkdown(text$1, maxTokens = 8e3, overlapTokens = 1e3) {
-	const tokens = marked.lexer(text$1);
-	const chunks = [];
-	const effectiveOverlapTokens = getEffectiveOverlapTokens(maxTokens, overlapTokens);
-	let currentHeadings = [];
-	let currentChunkList = [];
-	let accumulatedTokens = 0;
-	const flushCurrentChunk = (isHeadingChange = false) => {
-		if (currentChunkList.length === 0) return;
-		const pageContent = currentChunkList.map((item) => item.text).join("");
-		const firstHeadings = currentChunkList[0].headings;
-		chunks.push({
-			pageContent,
-			metadata: getMetadata(firstHeadings)
-		});
-		if (isHeadingChange || effectiveOverlapTokens <= 0) {
-			currentChunkList = [];
-			accumulatedTokens = 0;
-		} else {
-			const overlapItems = [];
-			let currentOverlapTokens = 0;
-			for (let i = currentChunkList.length - 1; i >= 0; i--) {
-				const item = currentChunkList[i];
-				const itemTokens = countTokens(item.text);
-				if (currentOverlapTokens + itemTokens > effectiveOverlapTokens && overlapItems.length > 0) break;
-				overlapItems.unshift(item);
-				currentOverlapTokens += itemTokens;
-			}
-			currentChunkList = [...overlapItems];
-			accumulatedTokens = currentOverlapTokens;
-		}
-	};
-	for (const token of tokens) {
-		if (token.type === "space") {
-			if (currentChunkList.length > 0) {
-				currentChunkList[currentChunkList.length - 1].text += token.raw;
-				accumulatedTokens += countTokens(token.raw);
-			}
-			continue;
-		}
-		if (token.type === "heading") {
-			flushCurrentChunk(true);
-			const depth = token.depth;
-			const title = token.text.trim();
-			currentHeadings = currentHeadings.slice(0, depth - 1);
-			currentHeadings[depth - 1] = title;
-		}
-		const rawText = token.raw;
-		if (token.type === "list" && countTokens(rawText) > maxTokens) for (const item of token.items) processTextBlock(item.raw, currentHeadings);
-		else {
-			const isAtomic = token.type === "table" || token.type === "code";
-			processTextBlock(rawText, currentHeadings, isAtomic);
-		}
-	}
-	flushCurrentChunk(true);
-	return finalizeChunks(chunks, text$1);
-	function processTextBlock(blockText, headings, isAtomic = false) {
-		const blockTokens = countTokens(blockText);
-		const contextTokens = countTokens(formatHeadingContext(headings));
-		const safetyBuffer = Math.min(100, Math.max(2, Math.floor(maxTokens * .1)));
-		const budgetLimit = Math.max(5, maxTokens - contextTokens - safetyBuffer);
-		if (blockTokens > budgetLimit) if (isAtomic) {
-			flushCurrentChunk(false);
-			const atomicBlocks = blockTokens <= maxTokens ? [blockText] : blockText.includes("|") ? splitMarkdownTable(blockText, budgetLimit) : splitTextRecursively(blockText, budgetLimit, ["\n"]);
-			for (const block of atomicBlocks) {
-				currentChunkList.push({
-					text: block,
-					headings: [...headings]
-				});
-				accumulatedTokens = countTokens(block);
-				flushCurrentChunk(false);
-			}
-		} else {
-			flushCurrentChunk(false);
-			const subBlocks = splitTextRecursively(blockText, budgetLimit);
-			for (const sub of subBlocks) {
-				currentChunkList.push({
-					text: sub,
-					headings: [...headings]
-				});
-				accumulatedTokens += countTokens(sub);
-				if (accumulatedTokens > budgetLimit) flushCurrentChunk(false);
-			}
-		}
-		else {
-			if (accumulatedTokens + blockTokens + contextTokens > maxTokens && currentChunkList.length > 0) flushCurrentChunk(false);
-			currentChunkList.push({
-				text: blockText,
-				headings: [...headings]
-			});
-			accumulatedTokens += blockTokens;
-		}
-	}
-}
 //#endregion
 //#region src/core/extraction-audit.ts
 const AUDIT_ID_RE = /^[\w.-]+$/;
@@ -13906,276 +13662,6 @@ function getFileHash(filePath) {
 	});
 }
-//#endregion
-//#region src/core/ai-extraction/evidence.ts
-const JSON_FILE_SUFFIX_RE$1 = /\.json$/i;
-const FIELD_PATH_PREFIX_RE = /^\$\./;
-function isRecord(value) {
-	return typeof value === "object" && value !== null && !Array.isArray(value);
-}
-function stableValueKey(value) {
-	return JSON.stringify(value);
-}
-function isPlaceholderString(value) {
-	if (typeof value !== "string") return false;
-	const normalized = value.trim().toLowerCase();
-	return normalized === "" || normalized === "n/a" || normalized === "na" || normalized === "none" || normalized === "null" || normalized === "unknown" || normalized === "tbd" || normalized === "-" || normalized === "--";
-}
-function primitiveToText(value) {
-	if (value === null || value === void 0) return null;
-	if (typeof value === "string") return value.trim() || null;
-	if (typeof value === "number" || typeof value === "boolean") return String(value);
-	return null;
-}
-function isMeaningfulValue(value) {
-	return primitiveToText(value) !== null && !isPlaceholderString(value);
-}
-function normalizeText(value) {
-	return value.toLowerCase().replace(/\s+/g, " ").trim();
-}
-function quoteAround(text$1, start, length) {
-	const before = Math.max(0, start - 80);
-	const after = Math.min(text$1.length, start + length + 80);
-	return text$1.slice(before, after).replace(/\s+/g, " ").trim();
-}
-function findEvidence(value, chunks) {
-	const searchText = primitiveToText(value);
-	if (!searchText) return null;
-	const normalizedSearchText = normalizeText(searchText);
-	if (!normalizedSearchText) return null;
-	for (const chunk of chunks) {
-		if (normalizeText(chunk.text).indexOf(normalizedSearchText) === -1) continue;
-		const rawIndex = chunk.text.toLowerCase().indexOf(searchText.toLowerCase());
-		const quoteIndex = rawIndex >= 0 ? rawIndex : 0;
-		return {
-			chunkIndex: chunk.chunkIndex,
-			headingPath: chunk.headingPath,
-			quote: quoteAround(chunk.text, quoteIndex, searchText.length)
-		};
-	}
-	return null;
-}
-function addEvidenceForProperty(fields, path$1, property, value, chunks) {
-	if (property.type === "object" && property.properties) {
-		const record = isRecord(value) ? value : {};
-		for (const [childName, childProperty] of Object.entries(property.properties)) addEvidenceForProperty(fields, `${path$1}.${childName}`, childProperty, record[childName], chunks);
-		return;
-	}
-	if (property.type === "array") {
-		if (!Array.isArray(value) || value.length === 0) {
-			fields.push({
-				fieldPath: path$1,
-				status: "missing",
-				value: null,
-				confidence: 0,
-				note: "Array field is empty or missing."
-			});
-			return;
-		}
-		value.forEach((item, index) => {
-			if (property.items?.type === "object" && property.items.properties) {
-				const record = isRecord(item) ? item : {};
-				for (const [childName, childProperty] of Object.entries(property.items.properties)) addEvidenceForProperty(fields, `${path$1}[${index}].${childName}`, childProperty, record[childName], chunks);
-			} else addPrimitiveEvidence(fields, `${path$1}[${index}]`, item, chunks);
-		});
-		return;
-	}
-	addPrimitiveEvidence(fields, path$1, value, chunks);
-}
-function addPrimitiveEvidence(fields, fieldPath, value, chunks) {
-	if (value === null || value === void 0 || value === "") {
-		fields.push({
-			fieldPath,
-			status: "missing",
-			value: null,
-			confidence: 0,
-			note: "Field is null or empty in final extraction."
-		});
-		return;
-	}
-	const found = findEvidence(value, chunks);
-	if (found) {
-		fields.push({
-			fieldPath,
-			status: "found",
-			value,
-			confidence: .8,
-			...found
-		});
-		return;
-	}
-	fields.push({
-		fieldPath,
-		status: "inferred",
-		value,
-		confidence: .35,
-		note: "Final value was not found verbatim in the available source text."
-	});
-}
-function sourceChunksFromText(text$1) {
-	return text$1 ? [{
-		text: text$1,
-		chunkIndex: 0,
-		headingPath: []
-	}] : [];
-}
-function sourceChunksFromMarkdownChunks(chunks) {
-	return chunks.map((chunk, index) => ({
-		text: chunk.pageContent,
-		chunkIndex: chunk.chunkIndex ?? index,
-		headingPath: chunk.headingPath ?? []
-	}));
-}
-function getPathParts(fieldPath) {
-	return fieldPath.replace(FIELD_PATH_PREFIX_RE, "").split(".").filter(Boolean);
-}
-function getValueAtPath$1(data, fieldPath) {
-	let current = data;
-	for (const part of getPathParts(fieldPath)) {
-		if (!isRecord(current)) return void 0;
-		current = current[part];
-	}
-	return current;
-}
-function setValueAtPath(data, fieldPath, value) {
-	const parts = getPathParts(fieldPath);
-	let current = data;
-	for (let i = 0; i < parts.length - 1; i++) {
-		const part = parts[i];
-		if (!isRecord(current[part])) current[part] = {};
-		current = current[part];
-	}
-	current[parts[parts.length - 1]] = value;
-}
-function collectScalarFields(fields, fieldPath, property) {
-	if (property.type === "object" && property.properties) {
-		for (const [name$1, childProperty] of Object.entries(property.properties)) collectScalarFields(fields, `${fieldPath}.${name$1}`, childProperty);
-		return;
-	}
-	if (property.type !== "array") fields.push({
-		fieldPath,
-		property
-	});
-}
-function candidateScore(candidate) {
-	return (candidate.status === "found" ? 100 : 0) + Math.round(candidate.confidence * 10) + candidate.chunkIndex;
-}
-function selectCandidatesForField(candidates) {
-	if (candidates.length === 0) return null;
-	candidates.sort((a, b) => candidateScore(b) - candidateScore(a));
-	const selected = candidates[0];
-	selected.selected = true;
-	for (const candidate of candidates.slice(1)) {
-		candidate.selected = false;
-		candidate.rejectionReason = "Lower evidence score or earlier chunk position.";
-	}
-	const distinctValues = /* @__PURE__ */ new Map();
-	for (const candidate of candidates) distinctValues.set(stableValueKey(candidate.value), candidate.value);
-	if (distinctValues.size <= 1) return null;
-	return {
-		fieldPath: selected.fieldPath,
-		selectedValue: selected.value,
-		rejectedValues: candidates.slice(1).map((candidate) => candidate.value),
-		candidates: [...candidates]
-	};
-}
-function buildCandidateMergeReport(input) {
-	const scalarFields = [];
-	for (const [name$1, property] of Object.entries(input.schema.properties)) {
-		if (property.primary && property.autoIncrement) continue;
-		collectScalarFields(scalarFields, `$.${name$1}`, property);
-	}
-	const sourceChunks = sourceChunksFromMarkdownChunks(input.chunks);
-	const candidatesByPath = /* @__PURE__ */ new Map();
-	for (const { fieldPath } of scalarFields) for (let chunkIndex = 0; chunkIndex < input.chunkResults.length; chunkIndex++) {
-		const value = getValueAtPath$1(input.chunkResults[chunkIndex], fieldPath);
-		if (!isMeaningfulValue(value)) continue;
-		const sourceChunk = sourceChunks[chunkIndex] ?? {
-			text: "",
-			chunkIndex
-		};
-		const found = findEvidence(value, [sourceChunk]);
-		const candidate = {
-			fieldPath,
-			value,
-			chunkIndex: sourceChunk.chunkIndex ?? chunkIndex,
-			headingPath: sourceChunk.headingPath,
-			status: found ? "found" : "inferred",
-			quote: found?.quote,
-			confidence: found ? .85 : .35
-		};
-		const candidates = candidatesByPath.get(fieldPath) ?? [];
-		candidates.push(candidate);
-		candidatesByPath.set(fieldPath, candidates);
-	}
-	const allCandidates = [];
-	const conflicts = [];
-	for (const candidates of candidatesByPath.values()) {
-		const conflict = selectCandidatesForField(candidates);
-		allCandidates.push(...candidates);
-		if (conflict) conflicts.push(conflict);
-	}
-	return {
-		candidates: allCandidates,
-		conflicts
-	};
-}
-function applySelectedCandidates(data, report) {
-	const merged = structuredClone(data);
-	for (const candidate of report.candidates) if (candidate.selected) setValueAtPath(merged, candidate.fieldPath, candidate.value);
-	return merged;
-}
-function buildExtractionEvidence(input) {
-	const data = isRecord(input.data) ? input.data : {};
-	const chunks = input.chunks ? sourceChunksFromMarkdownChunks(input.chunks) : sourceChunksFromText(input.text ?? "");
-	const fields = [];
-	for (const [name$1, property] of Object.entries(input.schema.properties)) {
-		if (property.primary && property.autoIncrement) continue;
-		addEvidenceForProperty(fields, `$.${name$1}`, property, data[name$1], chunks);
-	}
-	const inferredIssues = fields.filter((field) => field.status === "inferred").map((field) => ({
-		fieldPath: field.fieldPath,
-		message: field.note ?? "Field value lacks source evidence."
-	}));
-	const conflictIssues = (input.candidateReport?.conflicts ?? []).map((conflict) => ({
-		fieldPath: conflict.fieldPath,
-		message: "Multiple chunk candidates disagree for this field."
-	}));
-	const issues = [...inferredIssues, ...conflictIssues];
-	return {
-		coverage: {
-			path: input.outputPath ? evidencePathForOutput(input.outputPath) : void 0,
-			fieldCount: fields.length,
-			evidenceCount: fields.filter((field) => field.status === "found").length,
-			foundCount: fields.filter((field) => field.status === "found").length,
-			missingCount: fields.filter((field) => field.status === "missing").length,
-			inferredCount: fields.filter((field) => field.status === "inferred").length,
-			conflictCount: input.candidateReport?.conflicts.length ?? 0,
-			issueCount: issues.length
-		},
-		fields,
-		candidates: input.candidateReport?.candidates,
-		conflicts: input.candidateReport?.conflicts,
-		issues
-	};
-}
-function evidencePathForOutput(outputPath) {
-	return outputPath.replace(JSON_FILE_SUFFIX_RE$1, ".evidence.json");
-}
-async function writeExtractionEvidence(input) {
-	const report = buildExtractionEvidence(input);
-	const evidencePath = evidencePathForOutput(input.outputPath);
-	report.coverage.path = evidencePath;
-	await writeFile(evidencePath, report, {
-		spaces: 2,
-		EOL: "\n"
-	});
-	return {
-		...report.coverage,
-		path: path.resolve(evidencePath)
-	};
-}
 //#endregion
 //#region src/core/notion-sink.ts
 const RICH_TEXT_LIMIT = 2e3;
@@ -14461,36 +13947,6 @@ async function triggerWebhook(aiConfig, auditId, schemaName, event, source, data
 	}
 }
-//#endregion
-//#region src/core/ai-extraction/transcriber.ts
-const TRANSCRIPTION_PROMPT = "Transcribe all visible text from this image accurately. Preserve the layout and line breaks as much as possible.";
-async function transcribeImageWithVision(imagePath, baseURL, apiKey, modelName, timeoutMs) {
-	const provider = createOpenAICompatible({
-		baseURL,
-		name: "openai-compatible",
-		apiKey
-	});
-	const buffer = await fs.readFile(imagePath);
-	const effectiveTimeout = timeoutMs ?? 3e5;
-	return {
-		text: (await generateText({
-			model: provider.chatModel(modelName),
-			messages: [{
-				role: "user",
-				content: [{
-					type: "text",
-					text: TRANSCRIPTION_PROMPT
-				}, {
-					type: "image",
-					image: buffer
-				}]
-			}],
-			abortSignal: AbortSignal.timeout(effectiveTimeout)
-		})).text,
-		modelName
-	};
-}
 //#endregion
 //#region src/core/file-constants.ts
 const MAX_UPLOAD_SIZE = 30 * 1024 * 1024;
@@ -14824,6 +14280,14 @@ function createPdfConverter(config) {
 			return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
 		}
 		if (config.converter === "mineru_api") return new MineruApiPdfConverter(config.mineruApi ?? DEFAULT_MINERU_API_CONFIG);
+		if (config.converter === "markitdown") {
+			const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
+			return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
+		}
+		if (config.converter === "marker") {
+			const markerConfig = config.marker ?? DEFAULT_MARKER_CONFIG;
+			return withFallback(new ExternalCommandPdfConverter("marker", markerConfig), markerConfig);
+		}
 		if (config.converter === "external") {
 			if (!config.external) throw new Error(t("errors.pdf.externalNotConfigured"));
 			return new ExternalCommandPdfConverter("external", config.external);
@@ -14851,7 +14315,7 @@ const FILE_PART_EXTENSIONS = new Set([
 	"svg"
 ]);
 const PDF_EXT_RE = /\.pdf$/i;
-async function readExtractFileInput(filePath, aiConfig) {
+async function readExtractFileInput(filePath, aiConfig, modelOverride) {
 	const stat = fs$1.statSync(filePath);
 	if (stat.size > MAX_UPLOAD_SIZE) throw new Error(t("errors.file.sizeExceeded", {
 		size: bytesToMB(stat.size).toFixed(1),
@@ -14860,22 +14324,15 @@ async function readExtractFileInput(filePath, aiConfig) {
 	}));
 	const ext = path.extname(filePath).toLowerCase().replace(".", "");
 	if (FILE_PART_EXTENSIONS.has(ext)) {
-		const image = aiConfig?.image;
-		if (image?.imageConversion === "vision" && image.imageModelName && aiConfig) {
-			const baseURL = image.visionBaseURL || aiConfig.provider.baseURL;
-			const apiKey = image.visionApiKey || aiConfig.provider.apiKey;
-			const timeout = (aiConfig.provider.timeout ?? 300) * 1e3;
-			try {
-				const result$1 = await transcribeImageWithVision(filePath, baseURL, apiKey, image.imageModelName, timeout);
-				consola.info(t("command.extract.file.visionTranscribed", { model: result$1.modelName }));
-				return { text: result$1.text };
-			} catch {
-				consola.warn(t("command.extract.file.visionTranscribeFailed", { model: image.imageModelName }));
-			}
+		if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
+			const result = await recognizeImageText(filePath, aiConfig?.image);
+			consola.info(t("command.extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
+			return { text: result.text };
 		}
-		const result = await recognizeImageText(filePath, aiConfig?.image);
-		consola.info(t("command.extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
-		return { text: result.text };
+		return {
+			text: "",
+			filePath
+		};
 	}
 	if (ext === "pdf") {
 		const buffer = await fs.readFile(filePath);
@@ -14996,21 +14453,7 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
 //#endregion
 //#region src/core/extract-runner.ts
-const encoding = getEncoding("cl100k_base");
 const JSON_EXT_RE$1 = /\.json$/;
-async function limitConcurrency(concurrency, items, fn) {
-	const results = Array.from({ length: items.length });
-	let nextIndex = 0;
-	async function worker() {
-		while (nextIndex < items.length) {
-			const currentIndex = nextIndex++;
-			results[currentIndex] = await fn(items[currentIndex], currentIndex);
-		}
-	}
-	const workers = Array.from({ length: Math.min(concurrency, items.length) }, worker);
-	await Promise.all(workers);
-	return results;
-}
 async function ensureDatabaseReady(dbPath, schema) {
 	try {
 		await fs.access(dbPath);
@@ -15082,146 +14525,34 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 	}
 	const s = spinner();
 	if (!options?.quiet) s.start(filePath ? t("command.extract.file.extractedFrom", { file: path.basename(filePath) }) : t("command.extract.file.extracting"));
-	const maxTokens = calculateChunkTokenBudget({
-		configuredMaxTokens: aiConfig.extraction?.maxTokens ?? 8e3,
-		modelMaxTokens: modelOverride?.capabilities.maxTokens
-	});
-	const overlapTokens = aiConfig.extraction?.overlapSize ?? 1e3;
-	const totalTokens = text$1 ? encoding.encode(text$1).length : 0;
-	if (text$1 && totalTokens > maxTokens && !options?.quiet) consola.info(t("command.extract.file.chunking", {
-		length: totalTokens,
-		limit: maxTokens
-	}));
-	const processedDocs = text$1 && totalTokens > maxTokens ? splitMarkdown(text$1, maxTokens, overlapTokens) : [{
-		pageContent: text$1 ?? "",
-		metadata: {},
-		chunkIndex: 0,
-		totalChunks: 1,
-		tokenCount: totalTokens,
-		headingPath: [],
-		charStart: 0,
-		charEnd: text$1?.length ?? 0
-	}];
-	if (text$1 && totalTokens > maxTokens && !options?.quiet) consola.info(t("command.extract.file.chunksCount", { count: processedDocs.length }));
-	const chunkResults = Array.from({ length: processedDocs.length });
-	const accumulatedTokens = {
-		prompt: 0,
-		completion: 0,
-		total: 0
-	};
-	let success = true;
-	let errorMsg = "";
-	const extractionTasks = processedDocs.map((doc, i) => {
-		return async () => {
-			if (!success) return;
-			const headings = doc.headingPath?.length ? doc.headingPath : [
-				doc.metadata.h1,
-				doc.metadata.h2,
-				doc.metadata.h3,
-				doc.metadata.h4
-			].filter(Boolean);
-			let chunkText = doc.pageContent;
-			if (headings.length > 0) chunkText = `> **[Context]** Belong to: ${headings.join(" > ")}\n\n${chunkText}`;
-			const chunkResult = await extractStructuredData({
-				config: aiConfig,
-				schema: schemaLoad.schema,
-				text: chunkText,
-				aiexDir,
-				modelOverride,
-				onRetry(info) {
-					if (!options?.quiet) s.message(t("command.extract.file.extractRetryChunk", {
-						current: i + 1,
-						total: processedDocs.length,
-						code: info.statusCode,
-						delay: info.delayMs / 1e3,
-						attempt: info.attempt,
-						max: info.maxRetries
-					}));
-				}
-			});
-			if (!chunkResult.success) {
-				success = false;
-				errorMsg = chunkResult.error || t("common.unknownError");
-				if (!options?.quiet) {
-					s.stop(t("command.extract.file.extractFailChunk", { current: i + 1 }));
-					consola.error(errorMsg);
-				}
-				return;
-			}
-			if (chunkResult.data) chunkResults[i] = chunkResult.data;
-			if (chunkResult.tokensUsed) {
-				accumulatedTokens.prompt += chunkResult.tokensUsed.prompt ?? 0;
-				accumulatedTokens.completion += chunkResult.tokensUsed.completion ?? 0;
-				accumulatedTokens.total += chunkResult.tokensUsed.total ?? 0;
-			}
-		};
-	});
-	const concurrency = Math.min(aiConfig.extraction?.concurrency ?? 2, 2);
-	if (!options?.quiet && processedDocs.length > 0) s.message(t("command.extract.file.extractingChunk", {
-		current: 1,
-		total: processedDocs.length
-	}));
-	try {
-		await limitConcurrency(concurrency, extractionTasks, async (task, idx) => {
-			if (!options?.quiet && success) s.message(t("command.extract.file.extractingChunk", {
-				current: idx + 1,
-				total: processedDocs.length
-			}));
-			await task();
-		});
-	} catch (e) {
-		success = false;
-		errorMsg = e instanceof Error ? e.message : String(e);
-	}
-	if (!success) return {
-		success: false,
-		error: errorMsg
-	};
-	const successfulChunkResults = chunkResults.filter((chunkResult) => !!chunkResult);
-	const candidateReport = buildCandidateMergeReport({
+	const result = await extractStructuredData({
+		config: aiConfig,
 		schema: schemaLoad.schema,
-		chunkResults: successfulChunkResults,
-		chunks: processedDocs
+		text: text$1 ?? "",
+		aiexDir,
+		file: filePath,
+		modelOverride,
+		onRetry(info) {
+			if (!options?.quiet) s.message(t("command.extract.file.extractRetry", {
+				code: info.statusCode,
+				delay: info.delayMs / 1e3,
+				attempt: info.attempt,
+				max: info.maxRetries
+			}));
+		}
 	});
-	const mergedData = applySelectedCandidates(mergeExtractionResults(schemaLoad.schema, successfulChunkResults), candidateReport);
-	const validation = validateExtractedData(schemaLoad.schema, mergedData);
-	if (!validation.success) {
-		const valError = validation.error || "Merged data validation failed";
+	if (!result.success) {
 		if (!options?.quiet) {
-			s.stop(t("command.extract.file.validationFail"));
-			consola.error(valError);
+			s.stop(t("command.extract.file.extractFail"));
+			consola.error(result.error || t("common.unknownError"));
 		}
 		return {
 			success: false,
-			error: valError
+			error: result.error || t("common.unknownError")
 		};
 	}
-	const outputDir = path.resolve(aiexDir, aiConfig.extraction?.outputDir?.replace(".aiex/", "") ?? "extracted");
-	await fs.mkdir(outputDir, { recursive: true });
-	const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
-	const outputFileName = `${schemaLoad.schema.table.name}-${timestamp}.json`;
-	const outputPath = path.join(outputDir, outputFileName);
-	await fs.writeFile(outputPath, JSON.stringify(mergedData, null, 2));
-	const result = {
-		success: true,
-		data: mergedData,
-		tokensUsed: accumulatedTokens,
-		outputPath,
-		evidenceSummary: await writeExtractionEvidence({
-			schema: schemaLoad.schema,
-			data: mergedData,
-			outputPath,
-			chunks: processedDocs,
-			candidateReport
-		})
-	};
 	if (!options?.quiet) s.stop(t("command.extract.file.extractComplete"));
 	if (result.outputPath && !options?.quiet) consola.success(t("command.extract.file.resultSaved", { path: pc.cyan(result.outputPath) }));
-	if (result.evidenceSummary && !options?.quiet) {
-		const summary = result.evidenceSummary;
-		const issueText = summary.issueCount > 0 ? pc.yellow(String(summary.issueCount)) : pc.green("0");
-		consola.info(pc.gray(`Evidence coverage: ${summary.evidenceCount}/${summary.fieldCount} fields, found ${summary.foundCount}, inferred ${summary.inferredCount}, missing ${summary.missingCount}, conflicts ${summary.conflictCount ?? 0}, issues ${issueText}`));
-	}
 	if (result.tokensUsed && !options?.quiet) consola.info(pc.gray(t("command.extract.file.tokenUsage", {
 		prompt: result.tokensUsed.prompt,
 		completion: result.tokensUsed.completion,
@@ -15250,7 +14581,6 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 						outputPath: result.outputPath,
 						data: result.data,
 						tablesInserted: insertResult.tablesInserted,
-						evidenceSummary: result.evidenceSummary,
 						tokensUsed: result.tokensUsed
 					};
 				} else {
@@ -15277,7 +14607,6 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 		success: true,
 		outputPath: result.outputPath,
 		data: result.data,
-		evidenceSummary: result.evidenceSummary,
 		tokensUsed: result.tokensUsed
 	};
 }
@@ -15342,9 +14671,13 @@ async function runAuditedExtraction(options) {
 	});
 	try {
 		let text$1 = "";
-		if (source.type === "file") text$1 = (await readExtractFileInput(source.filePath, aiConfig)).text;
-		else text$1 = source.text;
-		const r = await extractSingle(aiexDir, config, aiConfig, schemaName, text$1, source.type === "file" ? source.filePath : void 0, modelOverride, {
+		let filePath;
+		if (source.type === "file") {
+			const input = await readExtractFileInput(source.filePath, aiConfig, modelOverride);
+			text$1 = input.text;
+			filePath = input.filePath;
+		} else text$1 = source.text;
+		const r = await extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, {
 			quiet,
 			insert
 		});
@@ -15386,7 +14719,6 @@ async function runAuditedExtraction(options) {
 				outputName: updated.outputName,
 				tablesInserted: updated.tablesInserted,
 				notionPages: updated.notionPages,
-				evidenceSummary: r.evidenceSummary,
 				tokensUsed: updated.tokensUsed,
 				auditId: updated.id,
 				fileHash
@@ -16514,7 +15846,6 @@ function aiRoutes(config) {
 //#endregion
 //#region src/core/data-service.ts
 const FILE_REGEX = /\.json$/;
-const EVIDENCE_FILE_SUFFIX = ".evidence.json";
 const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
 const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
 const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
@@ -16530,24 +15861,6 @@ function getAuditNotionStatus(record) {
 	if (record.status === "failed") return "failed";
 	return "not_synced";
 }
-async function readEvidenceSummary(extractedDir, outputName) {
-	const evidencePath = path.join(extractedDir, outputName.replace(FILE_REGEX, EVIDENCE_FILE_SUFFIX));
-	try {
-		const coverage = (await readFile(evidencePath))?.coverage;
-		if (!coverage || typeof coverage !== "object") return void 0;
-		return {
-			path: evidencePath,
-			fieldCount: Number(coverage.fieldCount) || 0,
-			evidenceCount: Number(coverage.evidenceCount) || 0,
-			foundCount: Number(coverage.foundCount) || 0,
-			missingCount: Number(coverage.missingCount) || 0,
-			inferredCount: Number(coverage.inferredCount) || 0,
-			issueCount: Number(coverage.issueCount) || 0
-		};
-	} catch {
-		return;
-	}
-}
 async function getRowExtractionActions(aiexDir, tableName) {
 	const actions = /* @__PURE__ */ new Map();
 	const auditRecords = await listExtractionAuditRecords(aiexDir);
@@ -16575,7 +15888,7 @@ async function listExtractions(config) {
 	const aiexDir = path.dirname(config.schemaPath);
 	const extractedDir = path.join(aiexDir, "extracted");
 	await fs.mkdir(extractedDir, { recursive: true });
-	const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md") && !f.endsWith(EVIDENCE_FILE_SUFFIX));
+	const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
 	const auditRecords = await listExtractionAuditRecords(aiexDir);
 	const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
 	const records = [];
@@ -16594,7 +15907,6 @@ async function listExtractions(config) {
 				timestamp,
 				fileSize: stat.size,
 				modifiedAt: stat.mtime.toISOString(),
-				evidenceSummary: await readEvidenceSummary(extractedDir, file),
 				notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
 				notionPages,
 				notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
@@ -16774,7 +16086,6 @@ async function retryNotionSync(config, fileName) {
 //#endregion
 //#region src/server/routes/data.ts
-const JSON_FILE_SUFFIX_RE = /\.json$/;
 const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
 const extractionFileParamSchema = z.object({ name: z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes("..")) });
 const tableQuerySchema = z.object({
@@ -16827,22 +16138,10 @@ function dataRoutes(config) {
 		const filePath = path.join(extractedDir, name$1);
 		try {
 			const content = await fs.readFile(filePath, "utf-8");
-			const evidencePath = path.join(extractedDir, name$1.replace(JSON_FILE_SUFFIX_RE, ".evidence.json"));
-			let evidenceSummary;
-			try {
-				const evidence = JSON.parse(await fs.readFile(evidencePath, "utf-8"));
-				evidenceSummary = evidence?.coverage ? {
-					...evidence.coverage,
-					path: evidencePath
-				} : void 0;
-			} catch {
-				evidenceSummary = void 0;
-			}
 			return c.json({
 				success: true,
 				content,
-				name: name$1,
-				evidenceSummary
+				name: name$1
 			});
 		} catch {
 			return c.json({ error: t("server.extractionNotFound") }, 404);
@@ -16986,7 +16285,6 @@ function extractRoutes(config) {
 				outputName: result.outputName,
 				tablesInserted: result.tablesInserted,
 				notionPages: result.notionPages,
-				evidenceSummary: result.evidenceSummary,
 				tokensUsed: result.tokensUsed,
 				auditId: result.auditId
 			}, 200);
@@ -17054,7 +16352,6 @@ function extractRoutes(config) {
 			outputName: result.outputName,
 			tablesInserted: result.tablesInserted,
 			notionPages: result.notionPages,
-			evidenceSummary: result.evidenceSummary,
 			tokensUsed: result.tokensUsed,
 			auditId: result.auditId
 		}, 200);