npm - aiex-cli - Versions diffs - 0.0.5-beta.5 → 0.0.5-beta.6 - Mend

aiex-cli 0.0.5-beta.5 → 0.0.5-beta.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/README.md +4 -4
package/dist/cli.mjs +638 -377
package/dist/{doctor-collector-NTNBFeBw.mjs → doctor-collector-BpqhXNcO.mjs} +26 -91
package/dist/index.mjs +1 -1
package/dist/web/assets/AISettings-sVI4PTNB.js +264 -0
package/dist/web/assets/{DataBrowser-GAA-pGq0.js → DataBrowser-BGkZb9FV.js} +1 -1
package/dist/web/assets/{ExtractionViewer-BhhWrBs2.js → ExtractionViewer-DNrkSECj.js} +1 -1
package/dist/web/assets/{api-client-b4ZBXpNH.js → api-client-gQAAOw0v.js} +1 -1
package/dist/web/assets/{index-CKV2X6sS.js → index-BQKZKzzP.js} +3 -3
package/dist/web/assets/index-BU58oIRd.css +2 -0
package/dist/web/index.html +3 -3
package/dist/{zh-CN-Ca-Dv775.mjs → zh-CN-DkillGHx.mjs} +10 -23
package/package.json +1 -1
package/dist/web/assets/AISettings-BlyTFIIy.js +0 -272
package/dist/web/assets/index-Csdgio76.css +0 -2

package/dist/{doctor-collector-NTNBFeBw.mjs → doctor-collector-BpqhXNcO.mjs} RENAMED Viewed

@@ -74,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
 //#endregion
 //#region package.json
 var name = "aiex-cli";
-var version = "0.0.5-beta.5";
+var version = "0.0.5-beta.6";
 var description = "JSON Schema → SQLite with AI-powered data extraction";
 var package_default = {
 	name,
@@ -211,7 +211,6 @@ function seedConfig(config = createConfig()) {
 //#endregion
 //#region src/core/ai-extraction/schemas.ts
 const ModelCapabilitiesSchema = z.object({
-	vision: z.boolean(),
 	structuredOutput: z.boolean(),
 	maxTokens: z.number().int().positive().optional(),
 	maxOutputTokens: z.number().int().positive().optional()
@@ -235,17 +234,14 @@ const ExtractionConfigSchema = z.object({
 	mode: z.enum(["pipeline"]).default("pipeline").optional(),
 	concurrency: z.number().int().min(1).optional(),
 	maxTokens: z.number().int().positive().default(8e3).optional(),
-	overlapSize: z.number().int().nonnegative().optional(),
-	preFiltering: z.boolean().optional(),
-	preFilteringLimit: z.number().int().min(1).optional()
+	overlapSize: z.number().int().nonnegative().optional()
 });
 const ImageOcrConfigSchema = z.object({
-	ocrFallback: z.enum([
-		"auto",
-		"off",
-		"local"
-	]).default("auto").optional(),
-	ocrLanguages: z.string().min(1).optional(),
+	imageConversion: z.enum(["vision", "local"]).default("local").optional(),
+	visionBaseURL: z.string().url().optional(),
+	visionApiKey: z.string().optional(),
+	imageModelName: z.string().min(1).optional(),
+	ocrLanguages: z.string().optional(),
 	ocrMinConfidence: z.number().min(0).max(1).optional()
 });
 const ExternalPdfConverterConfigSchema = z.object({
@@ -268,14 +264,10 @@ const PdfConfigSchema = z.object({
 		"unpdf",
 		"mineru",
 		"mineru_api",
-		"markitdown",
-		"marker",
 		"external"
 	]),
 	mineru: ExternalPdfConverterConfigSchema.optional(),
 	mineruApi: MineruApiPdfConverterConfigSchema.optional(),
-	markitdown: ExternalPdfConverterConfigSchema.optional(),
-	marker: ExternalPdfConverterConfigSchema.optional(),
 	external: ExternalPdfConverterConfigSchema.optional()
 });
 const LangfuseConfigSchema = z.object({
@@ -315,16 +307,7 @@ const PLACEHOLDER_SCHEMA = "{schema}";
 const PLACEHOLDER_TEXT = "{text}";
 const DEFAULT_MODELS = [{
 	name: "qwen-plus",
-	capabilities: {
-		vision: false,
-		structuredOutput: true
-	}
-}, {
-	name: "qwen-vl-plus",
-	capabilities: {
-		vision: true,
-		structuredOutput: true
-	}
+	capabilities: { structuredOutput: true }
 }];
 const DEFAULT_PROVIDER_CONFIG = {
 	baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1",
@@ -341,7 +324,10 @@ Extraction requirements:
 1. Extract strictly according to the field names and types defined in the structure
 2. If the text lacks information for a field, set that field to null
 3. Do not add fields that do not exist in the structure definition
-4. Maintain data accuracy and completeness`,
+4. Use only facts present in the source text; do not infer, guess, or complete missing values from outside knowledge
+5. Normalize values to the target type: numbers as JSON numbers, booleans as true/false, dates and formatted strings exactly as requested by the field format
+6. For repeated or conflicting mentions, prefer the most specific final value in the source text and ignore placeholder values such as N/A, unknown, TBD, or empty strings
+7. Maintain data accuracy and completeness`,
 	userTemplate: `Please extract data from the following text:
 {text}`
 };
@@ -350,7 +336,7 @@ const DEFAULT_EXTRACTION_CONFIG = {
 	mode: "pipeline"
 };
 const DEFAULT_IMAGE_OCR_CONFIG = {
-	ocrFallback: "auto",
+	imageConversion: "local",
 	ocrLanguages: "en-US, zh-Hans",
 	ocrMinConfidence: 0
 };
@@ -365,26 +351,6 @@ const DEFAULT_MINERU_CONFIG = {
 	timeout: 600,
 	fallbackToUnpdf: true
 };
-const DEFAULT_MARKITDOWN_CONFIG = {
-	command: "markitdown",
-	args: [
-		"{input}",
-		"-o",
-		"{outputDir}/{basename}.md"
-	],
-	timeout: 600,
-	fallbackToUnpdf: true
-};
-const DEFAULT_MARKER_CONFIG = {
-	command: "marker_single",
-	args: [
-		"{input}",
-		"--output_dir",
-		"{outputDir}"
-	],
-	timeout: 600,
-	fallbackToUnpdf: true
-};
 const DEFAULT_MINERU_API_CONFIG = {
 	token: "",
 	baseURL: "https://mineru.net/api/v4",
@@ -396,9 +362,7 @@ const DEFAULT_MINERU_API_CONFIG = {
 const DEFAULT_PDF_CONFIG = {
 	converter: "unpdf",
 	mineru: DEFAULT_MINERU_CONFIG,
-	mineruApi: DEFAULT_MINERU_API_CONFIG,
-	markitdown: DEFAULT_MARKITDOWN_CONFIG,
-	marker: DEFAULT_MARKER_CONFIG
+	mineruApi: DEFAULT_MINERU_API_CONFIG
 };
 const DEFAULT_AI_CONFIG = {
 	provider: DEFAULT_PROVIDER_CONFIG,
@@ -570,6 +534,8 @@ const en = {
 			file: {
 				hashWarning: "Failed to calculate file hash for {{file}}: {{error}}",
 				alreadyProcessed: "File {{file}} (hash: {{hash}}) has already been processed successfully. Skipping.",
+				visionTranscribed: "Transcribed image text via AI vision model ({{model}})",
+				visionTranscribeFailed: "Vision model transcription failed for {{model}}, falling back to local OCR",
 				ocrText: "Extracted image text via local OCR (confidence: {{confidence}}%)",
 				pdfFallback: "Fell back to unpdf — {{count}} page(s) extracted",
 				pdfConverted: "Converted PDF via {{name}}, {{count}} page(s)",
@@ -582,7 +548,6 @@ const en = {
 				extractRetry: "API responded with {{code}}, retrying in {{delay}}s ({{attempt}}/{{max}})",
 				chunking: "Input text ({{length}} tokens) exceeds limit ({{limit}} tokens). Splitting into chunks...",
 				chunksCount: "Split into {{count}} chunk(s).",
-				preFiltering: "Hybrid pre-filtering: selected {{filtered}} out of {{original}} chunks based on schema relevance.",
 				extractingChunk: "Extracting chunk {{current}}/{{total}}...",
 				extractRetryChunk: "Chunk {{current}}/{{total}} API responded with {{code}}, retrying in {{delay}}s ({{attempt}}/{{max}})",
 				extractFailChunk: "Extraction failed for chunk {{current}}/{{total}}",
@@ -694,14 +659,10 @@ const en = {
 		ai: {
 			apiKeyMissing: "API Key not configured. Please configure AI settings in the web UI.",
 			extractionNotObject: "Extraction result is not an object and cannot be written to Notion.",
-			noModels: "No AI models configured. Please add at least one model in AI Settings.",
-			noVisionModel: "Image input requires a model with vision capability{{hint}}.",
-			noVisionModelContext: "No vision-capable model with sufficient context window (≥{{tokens}} tokens) found{{hint}}.",
-			addSuitableModel: " Please add a suitable vision-capable model in AI Settings."
+			noModels: "No AI models configured. Please add at least one model in AI Settings."
 		},
 		ocr: {
 			platformUnsupported: "Local OCR is only available on macOS or Windows. Current platform: {{platform}}.",
-			disabled: "Image OCR fallback is disabled in AI settings.",
 			unavailable: "Local OCR is unavailable. Install optional dependency @napi-rs/system-ocr and approve its native build scripts. {{error}}",
 			noText: "Local OCR did not recognize any text in the image.",
 			lowConfidence: "Local OCR confidence {{confidence}}% is below the configured minimum {{min}}%."
@@ -803,26 +764,19 @@ const en = {
 		models: "Models",
 		addModel: "Add Model",
 		modelName: "Model name (e.g. gpt-4o)",
-		structuredOutput: "Structured Output",
-		textOnlyOutput: "Text-only Output",
-		visionSupported: "Vision Supported",
-		visionUnsupported: "Vision Unsupported",
 		subscribe: "Registry",
 		imageInput: "Image Input",
 		imageInputSummary: {
-			visionModel: "Image files will use your configured vision model first.",
-			ocrFallback: "No vision model is configured, and local OCR fallback is disabled.",
-			ocrLocal: "No vision model is configured. Image text will require local OCR on macOS or Windows.",
-			ocrAuto: "No vision model is configured. On macOS or Windows, local OCR will be tried automatically for text-heavy images."
+			visionModel: "Image text is transcribed via {{model}} before structured extraction.",
+			ocrNoModel: "No vision model selected. Image text will be read through local OCR.",
+			ocrLocal: "Image text will be read through local OCR."
 		},
-		visionModelConfigured: "Vision model configured",
-		noVisionModel: "No vision model",
 		advancedImageSettings: "Advanced image settings",
 		hideAdvancedImageSettings: "Hide advanced image settings",
-		ocrFallback: "OCR fallback",
+		ocrFallback: "Image input mode",
 		ocrLanguages: "Languages",
 		ocrMinConfidence: "Minimum confidence",
-		ocrHint: "Image extraction always prefers a vision model. OCR fallback is only used when no vision model is available.",
+		ocrHint: "Images are converted to text before structured extraction.",
 		pdfConversion: "PDF Conversion",
 		converter: "Converter",
 		command: "Command",
@@ -897,14 +851,11 @@ const en = {
 		converterOptions: {
 			unpdf: "Built-in text extraction (unpdf)",
 			mineru: "MinerU (mineru)",
-			markitdown: "MarkItDown (markitdown)",
-			marker: "Marker (marker_single)",
 			external: "Custom External Command"
 		},
 		ocrFallbackOptions: {
-			auto: "Auto on macOS or Windows when no vision model exists",
-			off: "Off",
-			local: "Require local OCR"
+			vision: "Vision model (fallback to OCR)",
+			local: "Local OCR only"
 		}
 	},
 	prompt: {
@@ -976,7 +927,7 @@ async function initI18n(lng) {
 			fallbackLng: "en",
 			resources: {
 				"en": { translation: en },
-				"zh-CN": { translation: await import("./zh-CN-Ca-Dv775.mjs").then((m) => m.zhCN) }
+				"zh-CN": { translation: await import("./zh-CN-DkillGHx.mjs").then((m) => m.zhCN) }
 			},
 			interpolation: { escapeValue: false },
 			returnNull: false
@@ -1000,20 +951,6 @@ const defaultRuntime = {
 		return await import("@napi-rs/system-ocr");
 	}
 };
-function imageOcrMode(config) {
-	return config?.ocrFallback ?? "auto";
-}
-function hasVisionModel(aiConfig, modelOverride) {
-	if (modelOverride) return modelOverride.capabilities.vision;
-	return aiConfig?.provider.models.some((model) => model.capabilities.vision) ?? true;
-}
-function shouldUseImageOcrFallback(aiConfig, modelOverride, runtime = defaultRuntime) {
-	if (hasVisionModel(aiConfig, modelOverride)) return false;
-	const mode = imageOcrMode(aiConfig?.image);
-	if (mode === "off") return false;
-	if (mode === "local") return true;
-	return isLocalOcrPlatform(runtime.platform);
-}
 function isLocalOcrPlatform(platform) {
 	return platform === "darwin" || platform === "win32";
 }
@@ -1021,9 +958,7 @@ function parseOcrLanguages(languages) {
 	return (languages ?? DEFAULT_OCR_LANGUAGES).split(",").map((language) => language.trim()).filter(Boolean);
 }
 async function recognizeImageText(imagePath, config, runtime = defaultRuntime) {
-	const mode = imageOcrMode(config);
 	if (!isLocalOcrPlatform(runtime.platform)) throw new Error(t("errors.ocr.platformUnsupported", { platform: runtime.platform }));
-	if (mode === "off") throw new Error(t("errors.ocr.disabled"));
 	let localOcr;
 	try {
 		localOcr = await runtime.loadLocalOcr();
@@ -1556,4 +1491,4 @@ async function collectDoctorDiagnostics(options = {}) {
 }
 //#endregion
-export { doctorDiagnosticsTableRows as A, createConfig as C, package_default as D, name as E, version as O, AIConfigSchema as S, description as T, DEFAULT_MINERU_API_CONFIG as _, parseJsonSchema as a, PLACEHOLDER_SCHEMA as b, recognizeImageText as c, t as d, getDefaultAIConfig as f, DEFAULT_MARKITDOWN_CONFIG as g, DEFAULT_MARKER_CONFIG as h, JsonSchemaDefinitionSchema as i, formatDoctorDiagnosticsJson as j, buildDoctorDiagnostics as k, shouldUseImageOcrFallback as l, writeAIConfig as m, createMigrationConfig as n, toSnakeCase as o, readAIConfig as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, initI18n as u, DEFAULT_MINERU_CONFIG as v, seedConfig as w, PLACEHOLDER_TEXT as x, DEFAULT_PROMPT_CONFIG as y };
+export { name as C, doctorDiagnosticsTableRows as D, buildDoctorDiagnostics as E, formatDoctorDiagnosticsJson as O, description as S, version as T, PLACEHOLDER_SCHEMA as _, parseJsonSchema as a, createConfig as b, recognizeImageText as c, getDefaultAIConfig as d, readAIConfig as f, DEFAULT_PROMPT_CONFIG as g, DEFAULT_MINERU_CONFIG as h, JsonSchemaDefinitionSchema as i, initI18n as l, DEFAULT_MINERU_API_CONFIG as m, createMigrationConfig as n, toSnakeCase as o, writeAIConfig as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, t as u, PLACEHOLDER_TEXT as v, package_default as w, seedConfig as x, AIConfigSchema as y };

package/dist/index.mjs CHANGED Viewed

@@ -1,3 +1,3 @@
-import { A as doctorDiagnosticsTableRows, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, j as formatDoctorDiagnosticsJson, k as buildDoctorDiagnostics, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-NTNBFeBw.mjs";
+import { D as doctorDiagnosticsTableRows, E as buildDoctorDiagnostics, O as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-BpqhXNcO.mjs";
 export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };