npm - aiex-cli - Versions diffs - 0.0.5-beta.5 → 0.0.6-beta.1 - Mend

aiex-cli 0.0.5-beta.5 → 0.0.6-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/README.md +0 -11
package/dist/cli.mjs +7 -449
package/dist/{doctor-collector-NTNBFeBw.mjs → doctor-collector-hWEvJ4lw.mjs} +4 -24
package/dist/index.d.mts +88 -91
package/dist/index.mjs +1 -1
package/dist/web/assets/ExtractionViewer-DqIrBGNK.js +1 -0
package/dist/web/assets/index-CvY9TGny.css +2 -0
package/dist/web/assets/{index-CKV2X6sS.js → index-Dlze68g1.js} +2 -2
package/dist/web/index.html +2 -2
package/dist/{zh-CN-Ca-Dv775.mjs → zh-CN-Qcn0DHFh.mjs} +0 -7
package/package.json +1 -3
package/dist/web/assets/ExtractionViewer-BhhWrBs2.js +0 -1
package/dist/web/assets/index-Csdgio76.css +0 -2

package/README.md CHANGED Viewed

@@ -202,17 +202,6 @@ aiex completion fish | source
 <br>
-## 📄 Large Document Processing
-When processing very large documents (exceeding `40,000` characters), `aiex` runs an optimized **Pipeline Mode** to handle context window limits and control API costs:
-- **Token-Aware AST Splitting**: Parses structural Markdown elements (headings, paragraphs, lists) using an AST-based parser (`marked.lexer`) and splits them using precise token counters (`js-tiktoken`). Active heading hierarchies are tracked and prepended to each chunk as context. Tables and code blocks are kept intact (atomic blocks) to avoid syntax corruption.
-- **Concurrency Limiting**: To respect strict model rate limits, chunk extractions are processed in parallel with a strict concurrency limit (capped at 2 concurrent requests).
-- **Pre-filtering**: Integrates hybrid search-based pre-filtering to score and select only the most relevant document chunks based on schema queries, preventing unnecessary token usage on unrelated sections.
-- **Recursive Merging**: The final extracted JSON objects from each chunk are recursively merged, concatenating lists and deduplicating primitive fields.
-<br>
 ## 🔧 AI Configuration
 aiex works with any OpenAI-compatible API provider. Configure in the Web UI (AI Settings panel):

package/dist/cli.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-import { A as doctorDiagnosticsTableRows, C as createConfig, D as package_default, E as name, O as version, S as AIConfigSchema, T as description, _ as DEFAULT_MINERU_API_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_SCHEMA, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_MINERU_CONFIG, w as seedConfig, x as PLACEHOLDER_TEXT, y as DEFAULT_PROMPT_CONFIG } from "./doctor-collector-NTNBFeBw.mjs";
+import { A as doctorDiagnosticsTableRows, C as createConfig, D as package_default, E as name, O as version, S as AIConfigSchema, T as description, _ as DEFAULT_MINERU_API_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_SCHEMA, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_MINERU_CONFIG, w as seedConfig, x as PLACEHOLDER_TEXT, y as DEFAULT_PROMPT_CONFIG } from "./doctor-collector-hWEvJ4lw.mjs";
 import { createRequire } from "node:module";
 import fs from "node:fs/promises";
 import os from "node:os";
@@ -17,7 +17,6 @@ import Database from "better-sqlite3";
 import pc from "picocolors";
 import { Buffer } from "node:buffer";
 import * as XLSX from "xlsx";
-import { getEncoding } from "js-tiktoken";
 import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
 import { APICallError, Output, generateText, jsonSchema } from "ai";
 import pRetry from "p-retry";
@@ -25,7 +24,6 @@ import mime from "mime";
 import { jsonrepair } from "jsonrepair";
 import { LangfuseSpanProcessor } from "@langfuse/otel";
 import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
-import { marked } from "marked";
 import crypto from "node:crypto";
 import { Client, extractNotionId } from "@notionhq/client";
 import { execa } from "execa";
@@ -13130,7 +13128,7 @@ function propertyToExtractionSchema(property) {
 	}
 	return { type: nullableType(property.type) };
 }
-function isRecord$1(value) {
+function isRecord(value) {
 	return typeof value === "object" && value !== null && !Array.isArray(value);
 }
 function schemaToExtractionOutputSchema(schema) {
@@ -13168,7 +13166,7 @@ function validatePropertyValue(path$1, property, value, issues) {
 			}
 			return;
 		case "object":
-			if (!isRecord$1(value)) {
+			if (!isRecord(value)) {
 				issues.push(`${path$1}: expected object or null`);
 				return;
 			}
@@ -13191,7 +13189,7 @@ function validateProperties(basePath, properties, data, issues) {
 	}
 }
 function validateExtractedData(schema, data) {
-	if (!isRecord$1(data)) return {
+	if (!isRecord(data)) return {
 		success: false,
 		error: "Extracted data must be a JSON object."
 	};
@@ -13514,220 +13512,6 @@ function insertExtractedData(db, schema, data) {
 	}
 }
-//#endregion
-//#region src/core/ai-extraction/json-merger.ts
-function isRecord(value) {
-	return typeof value === "object" && value !== null && !Array.isArray(value);
-}
-function mergePropertyValue(property, values) {
-	const nonNullValues = values.filter((v) => v !== null && v !== void 0);
-	if (nonNullValues.length === 0) return null;
-	if (property.type === "array") {
-		const concatenated = [];
-		for (const val of nonNullValues) if (Array.isArray(val)) concatenated.push(...val);
-		return concatenated;
-	}
-	if (property.type === "object") {
-		const childProperties = property.properties;
-		if (!childProperties) {
-			const mergedObj$1 = {};
-			for (const val of nonNullValues) if (isRecord(val)) Object.assign(mergedObj$1, val);
-			return mergedObj$1;
-		}
-		const mergedObj = {};
-		for (const [propName, propDef] of Object.entries(childProperties)) mergedObj[propName] = mergePropertyValue(propDef, nonNullValues.map((v) => isRecord(v) ? v[propName] : void 0));
-		return mergedObj;
-	}
-	const bestValue = nonNullValues.find((v) => {
-		if (typeof v === "string") return v.trim() !== "";
-		return true;
-	});
-	return bestValue !== void 0 ? bestValue : null;
-}
-/**
-* Merges structured extraction outputs from multiple document chunks
-* according to the schema properties.
-*/
-function mergeExtractionResults(schema, results) {
-	if (results.length === 0) return {};
-	if (results.length === 1) return results[0];
-	const merged = {};
-	for (const [propName, propDef] of Object.entries(schema.properties)) {
-		if (propDef.primary && propDef.autoIncrement) continue;
-		merged[propName] = mergePropertyValue(propDef, results.map((r) => r[propName]));
-	}
-	return merged;
-}
-//#endregion
-//#region src/core/ai-extraction/text-splitter.ts
-const encoding$1 = getEncoding("cl100k_base");
-function countTokens(text$1) {
-	return encoding$1.encode(text$1).length;
-}
-function formatHeadingContext(headings) {
-	const active = headings.filter(Boolean);
-	if (active.length === 0) return "";
-	return `> **[Context]** Belong to: ${active.join(" > ")}\n\n`;
-}
-function getMetadata(headings) {
-	return {
-		h1: headings[0] || void 0,
-		h2: headings[1] || void 0,
-		h3: headings[2] || void 0,
-		h4: headings[3] || void 0
-	};
-}
-/**
-* Splits text recursively using a list of separators.
-* Preserves the separators when re-joining.
-*/
-function splitTextRecursively(text$1, maxTokens, separators = [
-	"\n\n",
-	"\n",
-	"。",
-	". ",
-	" "
-]) {
-	if (countTokens(text$1) <= maxTokens) return [text$1];
-	if (separators.length === 0) {
-		const chunks = [];
-		let current = "";
-		for (const char of text$1) if (countTokens(current + char) > maxTokens) {
-			chunks.push(current);
-			current = char;
-		} else current += char;
-		if (current) chunks.push(current);
-		return chunks;
-	}
-	const separator = separators[0];
-	const nextSeparators = separators.slice(1);
-	const parts = text$1.split(separator);
-	const result = [];
-	let currentChunk = [];
-	let currentChunkTokens = 0;
-	for (let i = 0; i < parts.length; i++) {
-		const part = parts[i];
-		const itemText = part + (i < parts.length - 1 ? separator : "");
-		const partTokens = countTokens(itemText);
-		if (partTokens > maxTokens) {
-			if (currentChunk.length > 0) {
-				result.push(currentChunk.join(""));
-				currentChunk = [];
-				currentChunkTokens = 0;
-			}
-			const subParts = splitTextRecursively(part, maxTokens, nextSeparators);
-			for (let j = 0; j < subParts.length; j++) {
-				const finalSub = subParts[j] + (j === subParts.length - 1 && i < parts.length - 1 ? separator : "");
-				result.push(finalSub);
-			}
-		} else if (currentChunkTokens + partTokens > maxTokens) {
-			result.push(currentChunk.join(""));
-			currentChunk = [itemText];
-			currentChunkTokens = partTokens;
-		} else {
-			currentChunk.push(itemText);
-			currentChunkTokens += partTokens;
-		}
-	}
-	if (currentChunk.length > 0) result.push(currentChunk.join(""));
-	return result;
-}
-/**
-* Splits a Markdown document into chunks based on heading contexts, AST block parsing, and token limits.
-* Protects tables, list items, and code blocks from being broken.
-*/
-function splitMarkdown(text$1, maxTokens = 8e3, overlapTokens = 1e3) {
-	const tokens = marked.lexer(text$1);
-	const chunks = [];
-	let currentHeadings = [];
-	let currentChunkList = [];
-	let accumulatedTokens = 0;
-	const flushCurrentChunk = (isHeadingChange = false) => {
-		if (currentChunkList.length === 0) return;
-		const pageContent = currentChunkList.map((item) => item.text).join("");
-		const firstHeadings = currentChunkList[0].headings;
-		chunks.push({
-			pageContent,
-			metadata: getMetadata(firstHeadings)
-		});
-		if (isHeadingChange || overlapTokens <= 0) {
-			currentChunkList = [];
-			accumulatedTokens = 0;
-		} else {
-			const overlapItems = [];
-			let currentOverlapTokens = 0;
-			for (let i = currentChunkList.length - 1; i >= 0; i--) {
-				const item = currentChunkList[i];
-				const itemTokens = countTokens(item.text);
-				if (currentOverlapTokens + itemTokens > overlapTokens && overlapItems.length > 0) break;
-				overlapItems.unshift(item);
-				currentOverlapTokens += itemTokens;
-			}
-			currentChunkList = [...overlapItems];
-			accumulatedTokens = currentOverlapTokens;
-		}
-	};
-	for (const token of tokens) {
-		if (token.type === "space") {
-			if (currentChunkList.length > 0) {
-				currentChunkList[currentChunkList.length - 1].text += token.raw;
-				accumulatedTokens += countTokens(token.raw);
-			}
-			continue;
-		}
-		if (token.type === "heading") {
-			flushCurrentChunk(true);
-			const depth = token.depth;
-			const title = token.text.trim();
-			currentHeadings = currentHeadings.slice(0, depth - 1);
-			currentHeadings[depth - 1] = title;
-		}
-		const rawText = token.raw;
-		if (token.type === "list" && countTokens(rawText) > maxTokens) for (const item of token.items) processTextBlock(item.raw, currentHeadings);
-		else {
-			const isAtomic = token.type === "table" || token.type === "code";
-			processTextBlock(rawText, currentHeadings, isAtomic);
-		}
-	}
-	flushCurrentChunk(true);
-	return chunks;
-	function processTextBlock(blockText, headings, isAtomic = false) {
-		const blockTokens = countTokens(blockText);
-		const contextTokens = countTokens(formatHeadingContext(headings));
-		const safetyBuffer = Math.min(100, Math.max(2, Math.floor(maxTokens * .1)));
-		const budgetLimit = Math.max(5, maxTokens - contextTokens - safetyBuffer);
-		if (blockTokens > budgetLimit) if (isAtomic) {
-			flushCurrentChunk(false);
-			currentChunkList.push({
-				text: blockText,
-				headings: [...headings]
-			});
-			accumulatedTokens = blockTokens;
-			flushCurrentChunk(false);
-		} else {
-			flushCurrentChunk(false);
-			const subBlocks = splitTextRecursively(blockText, budgetLimit);
-			for (const sub of subBlocks) {
-				currentChunkList.push({
-					text: sub,
-					headings: [...headings]
-				});
-				accumulatedTokens += countTokens(sub);
-				if (accumulatedTokens > budgetLimit) flushCurrentChunk(false);
-			}
-		}
-		else {
-			if (accumulatedTokens + blockTokens + contextTokens > maxTokens && currentChunkList.length > 0) flushCurrentChunk(false);
-			currentChunkList.push({
-				text: blockText,
-				headings: [...headings]
-			});
-			accumulatedTokens += blockTokens;
-		}
-	}
-}
 //#endregion
 //#region src/core/extraction-audit.ts
 const AUDIT_ID_RE = /^[\w.-]+$/;
@@ -14669,44 +14453,7 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
 //#endregion
 //#region src/core/extract-runner.ts
-const encoding = getEncoding("cl100k_base");
 const JSON_EXT_RE$1 = /\.json$/;
-async function limitConcurrency(concurrency, items, fn) {
-	const results = Array.from({ length: items.length });
-	let nextIndex = 0;
-	async function worker() {
-		while (nextIndex < items.length) {
-			const currentIndex = nextIndex++;
-			results[currentIndex] = await fn(items[currentIndex], currentIndex);
-		}
-	}
-	const workers = Array.from({ length: Math.min(concurrency, items.length) }, worker);
-	await Promise.all(workers);
-	return results;
-}
-function getSchemaKeywords(schema) {
-	const keywords = /* @__PURE__ */ new Set();
-	function walk(properties) {
-		if (!properties) return;
-		for (const [name$1, prop] of Object.entries(properties)) {
-			keywords.add(name$1.toLowerCase());
-			const parts = name$1.replace(/([a-z0-9])([A-Z])/g, "$1 $2").split(/[\s._:/\\-]+/g);
-			for (const part of parts) if (part.length > 1) keywords.add(part.toLowerCase());
-			if (prop && typeof prop === "object") {
-				const p = prop;
-				if (typeof p.title === "string") keywords.add(p.title.toLowerCase());
-				if (typeof p.description === "string") {
-					const descParts = p.description.toLowerCase().match(/[\p{L}\p{N}_-]+/gu) ?? [];
-					for (const d of descParts) if (d.length > 2) keywords.add(d);
-				}
-				if (p.type === "object") walk(p.properties);
-				if (p.type === "array" && p.items?.type === "object") walk(p.items.properties);
-			}
-		}
-	}
-	walk(schema.properties);
-	return Array.from(keywords);
-}
 async function ensureDatabaseReady(dbPath, schema) {
 	try {
 		await fs.access(dbPath);
@@ -14778,153 +14525,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 	}
 	const s = spinner();
 	if (!options?.quiet) s.start(filePath ? t("command.extract.file.extractedFrom", { file: path.basename(filePath) }) : t("command.extract.file.extracting"));
-	const maxTokens = aiConfig.extraction?.maxTokens ?? 8e3;
-	const overlapTokens = aiConfig.extraction?.overlapSize ?? 1e3;
-	let result;
-	const totalTokens = text$1 ? encoding.encode(text$1).length : 0;
-	if (text$1 && totalTokens > maxTokens) {
-		if (!options?.quiet) consola.info(t("command.extract.file.chunking", {
-			length: totalTokens,
-			limit: maxTokens
-		}));
-		const finalDocs = splitMarkdown(text$1, maxTokens, overlapTokens);
-		if (!options?.quiet) consola.info(t("command.extract.file.chunksCount", { count: finalDocs.length }));
-		let processedDocs = finalDocs;
-		if (!!aiConfig.extraction?.preFiltering && finalDocs.length > 1) {
-			const preFilteringLimit = aiConfig.extraction?.preFilteringLimit ?? 5;
-			const keywords = getSchemaKeywords(schemaLoad.schema);
-			const scoredChunks = finalDocs.map((doc, idx) => {
-				if (idx === 0) return {
-					index: idx,
-					score: Number.POSITIVE_INFINITY
-				};
-				let score = 0;
-				const docTextLower = doc.pageContent.toLowerCase();
-				for (const kw of keywords) {
-					let pos = docTextLower.indexOf(kw);
-					while (pos !== -1) {
-						score++;
-						pos = docTextLower.indexOf(kw, pos + kw.length);
-					}
-				}
-				return {
-					index: idx,
-					score
-				};
-			}).slice(1).sort((a, b) => b.score - a.score);
-			const selectedIndices = new Set([0]);
-			let keptCount = 0;
-			for (const sc of scoredChunks) if (sc.score > 0 && keptCount < preFilteringLimit) {
-				selectedIndices.add(sc.index);
-				keptCount++;
-			}
-			processedDocs = finalDocs.filter((_, idx) => selectedIndices.has(idx));
-			if (!options?.quiet) consola.info(t("command.extract.file.preFiltering", {
-				original: finalDocs.length,
-				filtered: processedDocs.length
-			}));
-		}
-		const chunkResults = [];
-		const accumulatedTokens = {
-			prompt: 0,
-			completion: 0,
-			total: 0
-		};
-		let success = true;
-		let errorMsg = "";
-		const extractionTasks = processedDocs.map((doc, i) => {
-			return async () => {
-				if (!success) return;
-				const headings = [];
-				if (doc.metadata) {
-					if (doc.metadata.h1) headings.push(doc.metadata.h1);
-					if (doc.metadata.h2) headings.push(doc.metadata.h2);
-					if (doc.metadata.h3) headings.push(doc.metadata.h3);
-					if (doc.metadata.h4) headings.push(doc.metadata.h4);
-				}
-				let chunkText = doc.pageContent;
-				if (headings.length > 0) chunkText = `> **[Context]** Belong to: ${headings.join(" > ")}\n\n${chunkText}`;
-				const chunkResult = await extractStructuredData({
-					config: aiConfig,
-					schema: schemaLoad.schema,
-					text: chunkText,
-					aiexDir,
-					modelOverride,
-					onRetry(info) {
-						if (!options?.quiet) s.message(t("command.extract.file.extractRetryChunk", {
-							current: i + 1,
-							total: processedDocs.length,
-							code: info.statusCode,
-							delay: info.delayMs / 1e3,
-							attempt: info.attempt,
-							max: info.maxRetries
-						}));
-					}
-				});
-				if (!chunkResult.success) {
-					success = false;
-					errorMsg = chunkResult.error || t("common.unknownError");
-					if (!options?.quiet) {
-						s.stop(t("command.extract.file.extractFailChunk", { current: i + 1 }));
-						consola.error(errorMsg);
-					}
-					return;
-				}
-				if (chunkResult.data) chunkResults.push(chunkResult.data);
-				if (chunkResult.tokensUsed) {
-					accumulatedTokens.prompt += chunkResult.tokensUsed.prompt ?? 0;
-					accumulatedTokens.completion += chunkResult.tokensUsed.completion ?? 0;
-					accumulatedTokens.total += chunkResult.tokensUsed.total ?? 0;
-				}
-			};
-		});
-		const concurrency = Math.min(aiConfig.extraction?.concurrency ?? 2, 2);
-		if (!options?.quiet && processedDocs.length > 0) s.message(t("command.extract.file.extractingChunk", {
-			current: 1,
-			total: processedDocs.length
-		}));
-		try {
-			await limitConcurrency(concurrency, extractionTasks, async (task, idx) => {
-				if (!options?.quiet && success) s.message(t("command.extract.file.extractingChunk", {
-					current: idx + 1,
-					total: processedDocs.length
-				}));
-				await task();
-			});
-		} catch (e) {
-			success = false;
-			errorMsg = e instanceof Error ? e.message : String(e);
-		}
-		if (!success) return {
-			success: false,
-			error: errorMsg
-		};
-		const mergedData = mergeExtractionResults(schemaLoad.schema, chunkResults);
-		const validation = validateExtractedData(schemaLoad.schema, mergedData);
-		if (!validation.success) {
-			const valError = validation.error || "Merged data validation failed";
-			if (!options?.quiet) {
-				s.stop(t("command.extract.file.validationFail"));
-				consola.error(valError);
-			}
-			return {
-				success: false,
-				error: valError
-			};
-		}
-		const outputDir = path.resolve(aiexDir, aiConfig.extraction?.outputDir?.replace(".aiex/", "") ?? "extracted");
-		await fs.mkdir(outputDir, { recursive: true });
-		const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
-		const outputFileName = `${schemaLoad.schema.table.name}-${timestamp}.json`;
-		const finalMergedOutputPath = path.join(outputDir, outputFileName);
-		await fs.writeFile(finalMergedOutputPath, JSON.stringify(mergedData, null, 2));
-		result = {
-			success: true,
-			data: mergedData,
-			tokensUsed: accumulatedTokens,
-			outputPath: finalMergedOutputPath
-		};
-	} else result = await extractStructuredData({
+	const result = await extractStructuredData({
 		config: aiConfig,
 		schema: schemaLoad.schema,
 		text: text$1 ?? "",
@@ -14952,11 +14553,6 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 	}
 	if (!options?.quiet) s.stop(t("command.extract.file.extractComplete"));
 	if (result.outputPath && !options?.quiet) consola.success(t("command.extract.file.resultSaved", { path: pc.cyan(result.outputPath) }));
-	if (result.evidenceSummary && !options?.quiet) {
-		const summary = result.evidenceSummary;
-		const issueText = summary.issueCount > 0 ? pc.yellow(String(summary.issueCount)) : pc.green("0");
-		consola.info(pc.gray(`Evidence coverage: ${summary.evidenceCount}/${summary.fieldCount} fields, found ${summary.foundCount}, inferred ${summary.inferredCount}, missing ${summary.missingCount}, issues ${issueText}`));
-	}
 	if (result.tokensUsed && !options?.quiet) consola.info(pc.gray(t("command.extract.file.tokenUsage", {
 		prompt: result.tokensUsed.prompt,
 		completion: result.tokensUsed.completion,
@@ -14985,7 +14581,6 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 						outputPath: result.outputPath,
 						data: result.data,
 						tablesInserted: insertResult.tablesInserted,
-						evidenceSummary: result.evidenceSummary,
 						tokensUsed: result.tokensUsed
 					};
 				} else {
@@ -15012,7 +14607,6 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
 		success: true,
 		outputPath: result.outputPath,
 		data: result.data,
-		evidenceSummary: result.evidenceSummary,
 		tokensUsed: result.tokensUsed
 	};
 }
@@ -15125,7 +14719,6 @@ async function runAuditedExtraction(options) {
 				outputName: updated.outputName,
 				tablesInserted: updated.tablesInserted,
 				notionPages: updated.notionPages,
-				evidenceSummary: r.evidenceSummary,
 				tokensUsed: updated.tokensUsed,
 				auditId: updated.id,
 				fileHash
@@ -16253,7 +15846,6 @@ function aiRoutes(config) {
 //#endregion
 //#region src/core/data-service.ts
 const FILE_REGEX = /\.json$/;
-const EVIDENCE_FILE_SUFFIX = ".evidence.json";
 const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
 const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
 const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
@@ -16269,24 +15861,6 @@ function getAuditNotionStatus(record) {
 	if (record.status === "failed") return "failed";
 	return "not_synced";
 }
-async function readEvidenceSummary(extractedDir, outputName) {
-	const evidencePath = path.join(extractedDir, outputName.replace(FILE_REGEX, EVIDENCE_FILE_SUFFIX));
-	try {
-		const coverage = (await readFile(evidencePath))?.coverage;
-		if (!coverage || typeof coverage !== "object") return void 0;
-		return {
-			path: evidencePath,
-			fieldCount: Number(coverage.fieldCount) || 0,
-			evidenceCount: Number(coverage.evidenceCount) || 0,
-			foundCount: Number(coverage.foundCount) || 0,
-			missingCount: Number(coverage.missingCount) || 0,
-			inferredCount: Number(coverage.inferredCount) || 0,
-			issueCount: Number(coverage.issueCount) || 0
-		};
-	} catch {
-		return;
-	}
-}
 async function getRowExtractionActions(aiexDir, tableName) {
 	const actions = /* @__PURE__ */ new Map();
 	const auditRecords = await listExtractionAuditRecords(aiexDir);
@@ -16314,7 +15888,7 @@ async function listExtractions(config) {
 	const aiexDir = path.dirname(config.schemaPath);
 	const extractedDir = path.join(aiexDir, "extracted");
 	await fs.mkdir(extractedDir, { recursive: true });
-	const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md") && !f.endsWith(EVIDENCE_FILE_SUFFIX));
+	const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
 	const auditRecords = await listExtractionAuditRecords(aiexDir);
 	const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
 	const records = [];
@@ -16333,7 +15907,6 @@ async function listExtractions(config) {
 				timestamp,
 				fileSize: stat.size,
 				modifiedAt: stat.mtime.toISOString(),
-				evidenceSummary: await readEvidenceSummary(extractedDir, file),
 				notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
 				notionPages,
 				notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
@@ -16513,7 +16086,6 @@ async function retryNotionSync(config, fileName) {
 //#endregion
 //#region src/server/routes/data.ts
-const JSON_FILE_SUFFIX_RE = /\.json$/;
 const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
 const extractionFileParamSchema = z.object({ name: z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes("..")) });
 const tableQuerySchema = z.object({
@@ -16566,22 +16138,10 @@ function dataRoutes(config) {
 		const filePath = path.join(extractedDir, name$1);
 		try {
 			const content = await fs.readFile(filePath, "utf-8");
-			const evidencePath = path.join(extractedDir, name$1.replace(JSON_FILE_SUFFIX_RE, ".evidence.json"));
-			let evidenceSummary;
-			try {
-				const evidence = JSON.parse(await fs.readFile(evidencePath, "utf-8"));
-				evidenceSummary = evidence?.coverage ? {
-					...evidence.coverage,
-					path: evidencePath
-				} : void 0;
-			} catch {
-				evidenceSummary = void 0;
-			}
 			return c.json({
 				success: true,
 				content,
-				name: name$1,
-				evidenceSummary
+				name: name$1
 			});
 		} catch {
 			return c.json({ error: t("server.extractionNotFound") }, 404);
@@ -16725,7 +16285,6 @@ function extractRoutes(config) {
 				outputName: result.outputName,
 				tablesInserted: result.tablesInserted,
 				notionPages: result.notionPages,
-				evidenceSummary: result.evidenceSummary,
 				tokensUsed: result.tokensUsed,
 				auditId: result.auditId
 			}, 200);
@@ -16793,7 +16352,6 @@ function extractRoutes(config) {
 			outputName: result.outputName,
 			tablesInserted: result.tablesInserted,
 			notionPages: result.notionPages,
-			evidenceSummary: result.evidenceSummary,
 			tokensUsed: result.tokensUsed,
 			auditId: result.auditId
 		}, 200);

package/dist/{doctor-collector-NTNBFeBw.mjs → doctor-collector-hWEvJ4lw.mjs} RENAMED Viewed

@@ -74,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
 //#endregion
 //#region package.json
 var name = "aiex-cli";
-var version = "0.0.5-beta.5";
+var version = "0.0.6-beta.1";
 var description = "JSON Schema → SQLite with AI-powered data extraction";
 var package_default = {
 	name,
@@ -158,11 +158,9 @@ var package_default = {
 		"hono": "catalog:",
 		"i18next": "catalog:",
 		"i18next-fs-backend": "catalog:",
-		"js-tiktoken": "catalog:",
 		"jsonfile": "catalog:",
 		"jsonrepair": "catalog:",
 		"kysely": "catalog:",
-		"marked": "catalog:",
 		"mime": "catalog:",
 		"open": "catalog:",
 		"p-retry": "catalog:",
@@ -230,15 +228,7 @@ const PromptConfigSchema = z.object({
 	systemTemplate: z.string().min(1),
 	userTemplate: z.string().min(1)
 });
-const ExtractionConfigSchema = z.object({
-	outputDir: z.string().min(1),
-	mode: z.enum(["pipeline"]).default("pipeline").optional(),
-	concurrency: z.number().int().min(1).optional(),
-	maxTokens: z.number().int().positive().default(8e3).optional(),
-	overlapSize: z.number().int().nonnegative().optional(),
-	preFiltering: z.boolean().optional(),
-	preFilteringLimit: z.number().int().min(1).optional()
-});
+const ExtractionConfigSchema = z.object({ outputDir: z.string().min(1) });
 const ImageOcrConfigSchema = z.object({
 	ocrFallback: z.enum([
 		"auto",
@@ -345,10 +335,7 @@ Extraction requirements:
 	userTemplate: `Please extract data from the following text:
 {text}`
 };
-const DEFAULT_EXTRACTION_CONFIG = {
-	outputDir: ".aiex/extracted",
-	mode: "pipeline"
-};
+const DEFAULT_EXTRACTION_CONFIG = { outputDir: ".aiex/extracted" };
 const DEFAULT_IMAGE_OCR_CONFIG = {
 	ocrFallback: "auto",
 	ocrLanguages: "en-US, zh-Hans",
@@ -580,13 +567,6 @@ const en = {
 				extractFail: "Extraction failed",
 				extractComplete: "Extraction complete",
 				extractRetry: "API responded with {{code}}, retrying in {{delay}}s ({{attempt}}/{{max}})",
-				chunking: "Input text ({{length}} tokens) exceeds limit ({{limit}} tokens). Splitting into chunks...",
-				chunksCount: "Split into {{count}} chunk(s).",
-				preFiltering: "Hybrid pre-filtering: selected {{filtered}} out of {{original}} chunks based on schema relevance.",
-				extractingChunk: "Extracting chunk {{current}}/{{total}}...",
-				extractRetryChunk: "Chunk {{current}}/{{total}} API responded with {{code}}, retrying in {{delay}}s ({{attempt}}/{{max}})",
-				extractFailChunk: "Extraction failed for chunk {{current}}/{{total}}",
-				validationFail: "Merged data validation failed",
 				resultSaved: "Result saved: {{path}}",
 				tokenUsage: "Token usage: prompt={{prompt}}, completion={{completion}}, total={{total}}",
 				insertingDb: "Inserting into database...",
@@ -976,7 +956,7 @@ async function initI18n(lng) {
 			fallbackLng: "en",
 			resources: {
 				"en": { translation: en },
-				"zh-CN": { translation: await import("./zh-CN-Ca-Dv775.mjs").then((m) => m.zhCN) }
+				"zh-CN": { translation: await import("./zh-CN-Qcn0DHFh.mjs").then((m) => m.zhCN) }
 			},
 			interpolation: { escapeValue: false },
 			returnNull: false