aiex-cli 0.0.3-beta.1 → 0.0.3-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +124 -155
- package/dist/core/schema-sqlite/migrate-helper.mjs +1 -1
- package/dist/{doctor-collector-CjFTz8p4.mjs → doctor-collector-Cz7mb2L8.mjs} +151 -2
- package/dist/index.d.mts +10 -0
- package/dist/index.mjs +1 -1
- package/dist/logo.png +0 -0
- package/dist/web/assets/AISettings-YW-fn5h5.js +346 -0
- package/dist/web/assets/index-BHM3EpP-.css +2 -0
- package/dist/web/assets/{index-BpxCXucL.js → index-fSfuQz4G.js} +2 -2
- package/dist/web/index.html +2 -2
- package/package.json +4 -1
- package/dist/web/assets/AISettings-DwXkpWZU.js +0 -346
- package/dist/web/assets/index-Lx807is2.css +0 -2
package/dist/cli.mjs
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { C as package_default, D as formatDoctorDiagnosticsJson, E as doctorDiagnosticsTableRows, S as name, _ as PLACEHOLDER_TEXT, a as parseJsonSchema, b as seedConfig, c as recognizeImageText, d as readAIConfig, f as writeAIConfig, g as PLACEHOLDER_SCHEMA, h as DEFAULT_PROMPT_CONFIG, i as JsonSchemaDefinitionSchema, l as shouldUseImageOcrFallback, m as DEFAULT_MINERU_CONFIG, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_MARKITDOWN_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as getDefaultAIConfig, v as AIConfigSchema, w as version, x as description, y as createConfig } from "./doctor-collector-Cz7mb2L8.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import process from "node:process";
|
|
7
|
+
import { fileURLToPath } from "node:url";
|
|
7
8
|
import { readFile, writeFile } from "jsonfile";
|
|
8
9
|
import { ZodError, z } from "zod";
|
|
9
|
-
import { fileURLToPath } from "node:url";
|
|
10
10
|
import { defineCommand, runMain } from "citty";
|
|
11
11
|
import { consola } from "consola";
|
|
12
12
|
import updateNotifier from "update-notifier";
|
|
@@ -13513,6 +13513,73 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
|
|
|
13513
13513
|
return true;
|
|
13514
13514
|
}
|
|
13515
13515
|
|
|
13516
|
+
//#endregion
|
|
13517
|
+
//#region src/core/file-constants.ts
|
|
13518
|
+
const MAX_UPLOAD_SIZE = 150 * 1024 * 1024;
|
|
13519
|
+
const MAX_UPLOAD_SIZE_TEXT = "150MB";
|
|
13520
|
+
const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
|
|
13521
|
+
const MISSING_UPLOAD_FILE_TEXT = "Uploaded file is no longer available. Re-run extraction with the original file.";
|
|
13522
|
+
const SUPPORTED_MIME_TYPES = new Set([
|
|
13523
|
+
"image/png",
|
|
13524
|
+
"image/jpeg",
|
|
13525
|
+
"image/gif",
|
|
13526
|
+
"image/webp",
|
|
13527
|
+
"image/bmp",
|
|
13528
|
+
"image/svg+xml",
|
|
13529
|
+
"application/pdf",
|
|
13530
|
+
"text/plain",
|
|
13531
|
+
"text/markdown",
|
|
13532
|
+
"text/csv",
|
|
13533
|
+
"application/json",
|
|
13534
|
+
"text/html",
|
|
13535
|
+
"text/xml",
|
|
13536
|
+
"application/x-yaml",
|
|
13537
|
+
"text/yaml"
|
|
13538
|
+
]);
|
|
13539
|
+
const MIME_TO_EXT = {
|
|
13540
|
+
"image/png": "png",
|
|
13541
|
+
"image/jpeg": "jpg",
|
|
13542
|
+
"image/gif": "gif",
|
|
13543
|
+
"image/webp": "webp",
|
|
13544
|
+
"image/bmp": "bmp",
|
|
13545
|
+
"image/svg+xml": "svg",
|
|
13546
|
+
"application/pdf": "pdf",
|
|
13547
|
+
"text/plain": "txt",
|
|
13548
|
+
"text/markdown": "md",
|
|
13549
|
+
"text/csv": "csv",
|
|
13550
|
+
"application/json": "json",
|
|
13551
|
+
"text/html": "html",
|
|
13552
|
+
"text/xml": "xml",
|
|
13553
|
+
"application/x-yaml": "yaml",
|
|
13554
|
+
"text/yaml": "yaml"
|
|
13555
|
+
};
|
|
13556
|
+
function bytesToMB(bytes) {
|
|
13557
|
+
return bytes / (1024 * 1024);
|
|
13558
|
+
}
|
|
13559
|
+
function getExtensionFromMime(mimeType) {
|
|
13560
|
+
return MIME_TO_EXT[mimeType];
|
|
13561
|
+
}
|
|
13562
|
+
function isAllowedMimeType(mimeType) {
|
|
13563
|
+
return SUPPORTED_MIME_TYPES.has(mimeType);
|
|
13564
|
+
}
|
|
13565
|
+
function unsupportedFileTypeMessage(mimeType) {
|
|
13566
|
+
return `Unsupported file type "${mimeType}". Supported: ${SUPPORTED_FILE_TYPES_TEXT}.`;
|
|
13567
|
+
}
|
|
13568
|
+
function isMissingUploadFileError(error) {
|
|
13569
|
+
return !!error && typeof error === "object" && error.code === "ENOENT";
|
|
13570
|
+
}
|
|
13571
|
+
var FileValidationError = class extends Error {
|
|
13572
|
+
constructor(message) {
|
|
13573
|
+
super(message);
|
|
13574
|
+
this.name = "FileValidationError";
|
|
13575
|
+
}
|
|
13576
|
+
};
|
|
13577
|
+
function validateFileUpload(file) {
|
|
13578
|
+
if (file.size === 0) throw new FileValidationError("Uploaded file is empty");
|
|
13579
|
+
if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(`File size (${bytesToMB(file.size).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit`);
|
|
13580
|
+
if (!isAllowedMimeType(file.type)) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
|
|
13581
|
+
}
|
|
13582
|
+
|
|
13516
13583
|
//#endregion
|
|
13517
13584
|
//#region src/core/notion-sink.ts
|
|
13518
13585
|
const RICH_TEXT_LIMIT = 2e3;
|
|
@@ -14011,17 +14078,21 @@ async function listSchemas(aiexDir) {
|
|
|
14011
14078
|
return [];
|
|
14012
14079
|
}
|
|
14013
14080
|
}
|
|
14014
|
-
function
|
|
14015
|
-
|
|
14016
|
-
}
|
|
14017
|
-
async function readExtractFileInput(filePath, aiConfig) {
|
|
14018
|
-
const sizeMB = getFileSizeMB(filePath);
|
|
14019
|
-
if (sizeMB > 150) throw new Error(`File size (${sizeMB.toFixed(1)}MB) exceeds 150MB limit: ${filePath}`);
|
|
14081
|
+
async function readExtractFileInput(filePath, aiConfig, modelOverride) {
|
|
14082
|
+
const stat = fs$1.statSync(filePath);
|
|
14083
|
+
if (stat.size > MAX_UPLOAD_SIZE) throw new Error(`File size (${bytesToMB(stat.size).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit: ${filePath}`);
|
|
14020
14084
|
const ext = path.extname(filePath).toLowerCase().replace(".", "");
|
|
14021
|
-
if (FILE_PART_EXTENSIONS.has(ext))
|
|
14022
|
-
|
|
14023
|
-
|
|
14024
|
-
|
|
14085
|
+
if (FILE_PART_EXTENSIONS.has(ext)) {
|
|
14086
|
+
if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
|
|
14087
|
+
const result = await recognizeImageText(filePath, aiConfig?.image);
|
|
14088
|
+
consola.info(`Extracted image text via local OCR (confidence: ${(result.confidence * 100).toFixed(1)}%)`);
|
|
14089
|
+
return { text: result.text };
|
|
14090
|
+
}
|
|
14091
|
+
return {
|
|
14092
|
+
text: "",
|
|
14093
|
+
filePath
|
|
14094
|
+
};
|
|
14095
|
+
}
|
|
14025
14096
|
if (ext === "pdf") {
|
|
14026
14097
|
const buffer = await fs.readFile(filePath);
|
|
14027
14098
|
const converter = createPdfConverter(aiConfig?.pdf);
|
|
@@ -14139,7 +14210,7 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
|
|
|
14139
14210
|
}
|
|
14140
14211
|
});
|
|
14141
14212
|
try {
|
|
14142
|
-
const input = await readExtractFileInput(filePath, aiConfig);
|
|
14213
|
+
const input = await readExtractFileInput(filePath, aiConfig, modelOverride);
|
|
14143
14214
|
const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
|
|
14144
14215
|
quiet: false,
|
|
14145
14216
|
insert: options?.insert
|
|
@@ -14412,6 +14483,10 @@ const retryCommand = defineCommand({
|
|
|
14412
14483
|
}
|
|
14413
14484
|
outro("Done!");
|
|
14414
14485
|
} catch (error) {
|
|
14486
|
+
if (isMissingUploadFileError(error)) {
|
|
14487
|
+
failCommand(MISSING_UPLOAD_FILE_TEXT);
|
|
14488
|
+
return;
|
|
14489
|
+
}
|
|
14415
14490
|
failCommand(error instanceof Error ? error.message : String(error));
|
|
14416
14491
|
}
|
|
14417
14492
|
}
|
|
@@ -14464,7 +14539,7 @@ const extractCommand = defineCommand({
|
|
|
14464
14539
|
file: {
|
|
14465
14540
|
type: "string",
|
|
14466
14541
|
alias: "f",
|
|
14467
|
-
description:
|
|
14542
|
+
description: `File path to extract from. Supported: ${SUPPORTED_FILE_TYPES_TEXT}.`
|
|
14468
14543
|
},
|
|
14469
14544
|
model: {
|
|
14470
14545
|
type: "string",
|
|
@@ -14538,7 +14613,7 @@ const extractCommand = defineCommand({
|
|
|
14538
14613
|
let text$1 = "";
|
|
14539
14614
|
let filePath;
|
|
14540
14615
|
if (args.file) try {
|
|
14541
|
-
const input = await readExtractFileInput(args.file, aiConfig);
|
|
14616
|
+
const input = await readExtractFileInput(args.file, aiConfig, modelOverride);
|
|
14542
14617
|
text$1 = input.text;
|
|
14543
14618
|
filePath = input.filePath;
|
|
14544
14619
|
} catch (e) {
|
|
@@ -14647,7 +14722,7 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
14647
14722
|
}
|
|
14648
14723
|
const fp = filePathStr;
|
|
14649
14724
|
try {
|
|
14650
|
-
const input = await readExtractFileInput(fp, aiConfig);
|
|
14725
|
+
const input = await readExtractFileInput(fp, aiConfig, modelOverride);
|
|
14651
14726
|
return runAuditedSingleExtraction({
|
|
14652
14727
|
aiexDir,
|
|
14653
14728
|
config,
|
|
@@ -15268,62 +15343,6 @@ function dataRoutes(config) {
|
|
|
15268
15343
|
return app;
|
|
15269
15344
|
}
|
|
15270
15345
|
|
|
15271
|
-
//#endregion
|
|
15272
|
-
//#region src/core/file-constants.ts
|
|
15273
|
-
const MAX_UPLOAD_SIZE = 150 * 1024 * 1024;
|
|
15274
|
-
const MAX_UPLOAD_SIZE_TEXT = "150MB";
|
|
15275
|
-
const SUPPORTED_MIME_TYPES = new Set([
|
|
15276
|
-
"image/png",
|
|
15277
|
-
"image/jpeg",
|
|
15278
|
-
"image/gif",
|
|
15279
|
-
"image/webp",
|
|
15280
|
-
"image/bmp",
|
|
15281
|
-
"image/svg+xml",
|
|
15282
|
-
"application/pdf",
|
|
15283
|
-
"text/plain",
|
|
15284
|
-
"text/markdown",
|
|
15285
|
-
"text/csv",
|
|
15286
|
-
"application/json",
|
|
15287
|
-
"text/html",
|
|
15288
|
-
"text/xml",
|
|
15289
|
-
"application/x-yaml",
|
|
15290
|
-
"text/yaml"
|
|
15291
|
-
]);
|
|
15292
|
-
const MIME_TO_EXT = {
|
|
15293
|
-
"image/png": "png",
|
|
15294
|
-
"image/jpeg": "jpg",
|
|
15295
|
-
"image/gif": "gif",
|
|
15296
|
-
"image/webp": "webp",
|
|
15297
|
-
"image/bmp": "bmp",
|
|
15298
|
-
"image/svg+xml": "svg",
|
|
15299
|
-
"application/pdf": "pdf",
|
|
15300
|
-
"text/plain": "txt",
|
|
15301
|
-
"text/markdown": "md",
|
|
15302
|
-
"text/csv": "csv",
|
|
15303
|
-
"application/json": "json",
|
|
15304
|
-
"text/html": "html",
|
|
15305
|
-
"text/xml": "xml",
|
|
15306
|
-
"application/x-yaml": "yaml",
|
|
15307
|
-
"text/yaml": "yaml"
|
|
15308
|
-
};
|
|
15309
|
-
function getExtensionFromMime(mimeType) {
|
|
15310
|
-
return MIME_TO_EXT[mimeType];
|
|
15311
|
-
}
|
|
15312
|
-
function isAllowedMimeType(mimeType) {
|
|
15313
|
-
return SUPPORTED_MIME_TYPES.has(mimeType);
|
|
15314
|
-
}
|
|
15315
|
-
var FileValidationError = class extends Error {
|
|
15316
|
-
constructor(message) {
|
|
15317
|
-
super(message);
|
|
15318
|
-
this.name = "FileValidationError";
|
|
15319
|
-
}
|
|
15320
|
-
};
|
|
15321
|
-
function validateFileUpload(file) {
|
|
15322
|
-
if (file.size === 0) throw new FileValidationError("Uploaded file is empty");
|
|
15323
|
-
if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(`File size (${(file.size / 1024 / 1024).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit`);
|
|
15324
|
-
if (!isAllowedMimeType(file.type)) throw new FileValidationError(`Unsupported file type "${file.type}". Supported types: ${[...SUPPORTED_MIME_TYPES].join(", ")}`);
|
|
15325
|
-
}
|
|
15326
|
-
|
|
15327
15346
|
//#endregion
|
|
15328
15347
|
//#region src/server/routes/extract.ts
|
|
15329
15348
|
function getFormString(value) {
|
|
@@ -15340,9 +15359,26 @@ function safeUploadName(name$1) {
|
|
|
15340
15359
|
function safeUploadNameForMime(file) {
|
|
15341
15360
|
const safeName = safeUploadName(file.name);
|
|
15342
15361
|
const ext = getExtensionFromMime(file.type);
|
|
15343
|
-
if (!ext) throw new FileValidationError(
|
|
15362
|
+
if (!ext) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
|
|
15344
15363
|
return `${path.parse(safeName).name || "upload"}.${ext}`;
|
|
15345
15364
|
}
|
|
15365
|
+
function jsonResponse(body, status) {
|
|
15366
|
+
return new Response(JSON.stringify(body), {
|
|
15367
|
+
status,
|
|
15368
|
+
headers: { "content-type": "application/json" }
|
|
15369
|
+
});
|
|
15370
|
+
}
|
|
15371
|
+
async function auditFailureResponse(aiexDir, auditId, error, status) {
|
|
15372
|
+
const record = await updateExtractionAuditRecord(aiexDir, auditId, {
|
|
15373
|
+
status: "failed",
|
|
15374
|
+
error
|
|
15375
|
+
});
|
|
15376
|
+
return jsonResponse({
|
|
15377
|
+
success: false,
|
|
15378
|
+
error: record.error,
|
|
15379
|
+
auditId: record.id
|
|
15380
|
+
}, status);
|
|
15381
|
+
}
|
|
15346
15382
|
async function saveUploadToFile(file, uploadsDir, id) {
|
|
15347
15383
|
validateFileUpload(file);
|
|
15348
15384
|
await fs.mkdir(uploadsDir, { recursive: true });
|
|
@@ -15353,85 +15389,23 @@ async function saveUploadToFile(file, uploadsDir, id) {
|
|
|
15353
15389
|
}
|
|
15354
15390
|
async function executeAuditedExtraction(input) {
|
|
15355
15391
|
const aiConfig = await readAIConfig(input.aiexDir);
|
|
15356
|
-
if (!aiConfig)
|
|
15357
|
-
|
|
15358
|
-
|
|
15359
|
-
error: "AI configuration not found. Configure AI settings first."
|
|
15360
|
-
});
|
|
15361
|
-
return new Response(JSON.stringify({
|
|
15362
|
-
success: false,
|
|
15363
|
-
error: record$1.error,
|
|
15364
|
-
auditId: record$1.id
|
|
15365
|
-
}), {
|
|
15366
|
-
status: 400,
|
|
15367
|
-
headers: { "content-type": "application/json" }
|
|
15368
|
-
});
|
|
15369
|
-
}
|
|
15370
|
-
if (!aiConfig.provider.apiKey) {
|
|
15371
|
-
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15372
|
-
status: "failed",
|
|
15373
|
-
error: "API Key not configured. Configure AI settings first."
|
|
15374
|
-
});
|
|
15375
|
-
return new Response(JSON.stringify({
|
|
15376
|
-
success: false,
|
|
15377
|
-
error: record$1.error,
|
|
15378
|
-
auditId: record$1.id
|
|
15379
|
-
}), {
|
|
15380
|
-
status: 400,
|
|
15381
|
-
headers: { "content-type": "application/json" }
|
|
15382
|
-
});
|
|
15383
|
-
}
|
|
15384
|
-
if (!aiConfig.provider.models?.length) {
|
|
15385
|
-
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15386
|
-
status: "failed",
|
|
15387
|
-
error: "No models configured. Add at least one model in AI Settings."
|
|
15388
|
-
});
|
|
15389
|
-
return new Response(JSON.stringify({
|
|
15390
|
-
success: false,
|
|
15391
|
-
error: record$1.error,
|
|
15392
|
-
auditId: record$1.id
|
|
15393
|
-
}), {
|
|
15394
|
-
status: 400,
|
|
15395
|
-
headers: { "content-type": "application/json" }
|
|
15396
|
-
});
|
|
15397
|
-
}
|
|
15392
|
+
if (!aiConfig) return auditFailureResponse(input.aiexDir, input.auditId, "AI configuration not found. Configure AI settings first.", 400);
|
|
15393
|
+
if (!aiConfig.provider.apiKey) return auditFailureResponse(input.aiexDir, input.auditId, "API Key not configured. Configure AI settings first.", 400);
|
|
15394
|
+
if (!aiConfig.provider.models?.length) return auditFailureResponse(input.aiexDir, input.auditId, "No models configured. Add at least one model in AI Settings.", 400);
|
|
15398
15395
|
const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
|
|
15399
|
-
if (input.modelName && !modelOverride) {
|
|
15400
|
-
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15401
|
-
status: "failed",
|
|
15402
|
-
error: `Model "${input.modelName}" not found in AI settings`
|
|
15403
|
-
});
|
|
15404
|
-
return new Response(JSON.stringify({
|
|
15405
|
-
success: false,
|
|
15406
|
-
error: record$1.error,
|
|
15407
|
-
auditId: record$1.id
|
|
15408
|
-
}), {
|
|
15409
|
-
status: 400,
|
|
15410
|
-
headers: { "content-type": "application/json" }
|
|
15411
|
-
});
|
|
15412
|
-
}
|
|
15396
|
+
if (input.modelName && !modelOverride) return auditFailureResponse(input.aiexDir, input.auditId, `Model "${input.modelName}" not found in AI settings`, 400);
|
|
15413
15397
|
let inputText = input.text;
|
|
15414
15398
|
let inputFilePath = input.filePath;
|
|
15415
|
-
if (input.filePath) {
|
|
15399
|
+
if (input.filePath) try {
|
|
15416
15400
|
const source = await readExtractFileInput(input.filePath, aiConfig);
|
|
15417
15401
|
inputText = source.text;
|
|
15418
15402
|
inputFilePath = source.filePath;
|
|
15403
|
+
} catch (error) {
|
|
15404
|
+
if (isMissingUploadFileError(error)) return auditFailureResponse(input.aiexDir, input.auditId, MISSING_UPLOAD_FILE_TEXT, 400);
|
|
15405
|
+
throw error;
|
|
15419
15406
|
}
|
|
15420
15407
|
const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
|
|
15421
|
-
if (!result.success)
|
|
15422
|
-
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15423
|
-
status: "failed",
|
|
15424
|
-
error: result.error || "Extraction failed"
|
|
15425
|
-
});
|
|
15426
|
-
return new Response(JSON.stringify({
|
|
15427
|
-
success: false,
|
|
15428
|
-
error: record$1.error,
|
|
15429
|
-
auditId: record$1.id
|
|
15430
|
-
}), {
|
|
15431
|
-
status: 500,
|
|
15432
|
-
headers: { "content-type": "application/json" }
|
|
15433
|
-
});
|
|
15434
|
-
}
|
|
15408
|
+
if (!result.success) return auditFailureResponse(input.aiexDir, input.auditId, result.error || "Extraction failed", 500);
|
|
15435
15409
|
const notionPages = [];
|
|
15436
15410
|
if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
15437
15411
|
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
@@ -15445,14 +15419,11 @@ async function executeAuditedExtraction(input) {
|
|
|
15445
15419
|
tokensUsed: result.tokensUsed,
|
|
15446
15420
|
error: error instanceof Error ? error.message : String(error)
|
|
15447
15421
|
});
|
|
15448
|
-
return
|
|
15422
|
+
return jsonResponse({
|
|
15449
15423
|
success: false,
|
|
15450
15424
|
error: record$1.error,
|
|
15451
15425
|
auditId: record$1.id
|
|
15452
|
-
}
|
|
15453
|
-
status: 500,
|
|
15454
|
-
headers: { "content-type": "application/json" }
|
|
15455
|
-
});
|
|
15426
|
+
}, 500);
|
|
15456
15427
|
}
|
|
15457
15428
|
const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15458
15429
|
status: "succeeded",
|
|
@@ -15462,7 +15433,7 @@ async function executeAuditedExtraction(input) {
|
|
|
15462
15433
|
notionPages: notionPages.length > 0 ? notionPages : void 0,
|
|
15463
15434
|
tokensUsed: result.tokensUsed
|
|
15464
15435
|
});
|
|
15465
|
-
return
|
|
15436
|
+
return jsonResponse({
|
|
15466
15437
|
success: true,
|
|
15467
15438
|
outputPath: record.outputPath,
|
|
15468
15439
|
outputName: record.outputName,
|
|
@@ -15470,10 +15441,7 @@ async function executeAuditedExtraction(input) {
|
|
|
15470
15441
|
notionPages: record.notionPages,
|
|
15471
15442
|
tokensUsed: record.tokensUsed,
|
|
15472
15443
|
auditId: record.id
|
|
15473
|
-
}
|
|
15474
|
-
status: 200,
|
|
15475
|
-
headers: { "content-type": "application/json" }
|
|
15476
|
-
});
|
|
15444
|
+
}, 200);
|
|
15477
15445
|
}
|
|
15478
15446
|
function extractRoutes(config) {
|
|
15479
15447
|
const app = new Hono();
|
|
@@ -15524,7 +15492,8 @@ function extractRoutes(config) {
|
|
|
15524
15492
|
});
|
|
15525
15493
|
return c.json({
|
|
15526
15494
|
success: false,
|
|
15527
|
-
error: e.message
|
|
15495
|
+
error: e.message,
|
|
15496
|
+
auditId: audit.id
|
|
15528
15497
|
}, 400);
|
|
15529
15498
|
}
|
|
15530
15499
|
throw e;
|
|
@@ -2,8 +2,8 @@ import { createRequire } from "node:module";
|
|
|
2
2
|
import fs from "node:fs/promises";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import process from "node:process";
|
|
5
|
-
import { readFile, writeFile } from "jsonfile";
|
|
6
5
|
import { fileURLToPath } from "node:url";
|
|
6
|
+
import { readFile, writeFile } from "jsonfile";
|
|
7
7
|
import Database from "better-sqlite3";
|
|
8
8
|
import * as esbuild from "esbuild";
|
|
9
9
|
import lockfile from "proper-lockfile";
|
|
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
|
|
|
2
2
|
import os from "node:os";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import process from "node:process";
|
|
5
|
+
import { fileURLToPath } from "node:url";
|
|
5
6
|
import Conf from "conf";
|
|
6
7
|
import { readFile, writeFile } from "jsonfile";
|
|
7
8
|
import { z } from "zod";
|
|
@@ -25,6 +26,7 @@ function buildDoctorDiagnostics(input) {
|
|
|
25
26
|
os: `${input.osType} ${input.osRelease}`,
|
|
26
27
|
cwd: input.cwd
|
|
27
28
|
},
|
|
29
|
+
imageOcr: { ...input.imageOcr },
|
|
28
30
|
config: {
|
|
29
31
|
path: input.configPath,
|
|
30
32
|
keys: [...input.configStoreKeys].sort()
|
|
@@ -56,6 +58,13 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
56
58
|
rows.push(["aiModels", p.aiModelCount ? p.aiModels.join(", ") : "none"]);
|
|
57
59
|
rows.push(["aiProvider", p.aiProvider ?? "none"]);
|
|
58
60
|
rows.push(["aiConnectionOk", p.aiConnectionOk === null ? "not tested" : String(p.aiConnectionOk)]);
|
|
61
|
+
rows.push(["imageOcrPlatform", String(d.imageOcr.platformSupported)]);
|
|
62
|
+
rows.push(["imageOcrDependency", String(d.imageOcr.dependencyLoaded)]);
|
|
63
|
+
rows.push(["imageOcrOk", d.imageOcr.ocrOk === null ? "not tested" : String(d.imageOcr.ocrOk)]);
|
|
64
|
+
if (d.imageOcr.imagePath) rows.push(["imageOcrImage", d.imageOcr.imagePath]);
|
|
65
|
+
if (d.imageOcr.recognizedText) rows.push(["imageOcrText", d.imageOcr.recognizedText]);
|
|
66
|
+
if (typeof d.imageOcr.confidence === "number") rows.push(["imageOcrConfidence", `${(d.imageOcr.confidence * 100).toFixed(1)}%`]);
|
|
67
|
+
if (d.imageOcr.error) rows.push(["imageOcrError", d.imageOcr.error]);
|
|
59
68
|
rows.push(["hasDatabase", String(p.hasDatabase)]);
|
|
60
69
|
rows.push(["migrations", String(p.migrationCount)]);
|
|
61
70
|
for (const err of p.errors) rows.push(["error", err]);
|
|
@@ -65,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
65
74
|
//#endregion
|
|
66
75
|
//#region package.json
|
|
67
76
|
var name = "aiex-cli";
|
|
68
|
-
var version = "0.0.3-beta.
|
|
77
|
+
var version = "0.0.3-beta.3";
|
|
69
78
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
70
79
|
var package_default = {
|
|
71
80
|
name,
|
|
@@ -156,6 +165,7 @@ var package_default = {
|
|
|
156
165
|
"update-notifier": "catalog:",
|
|
157
166
|
"zod": "catalog:"
|
|
158
167
|
},
|
|
168
|
+
optionalDependencies: { "@napi-rs/system-ocr": "catalog:" },
|
|
159
169
|
devDependencies: {
|
|
160
170
|
"@antfu/eslint-config": "catalog:cli",
|
|
161
171
|
"@antfu/ni": "catalog:cli",
|
|
@@ -210,6 +220,15 @@ const PromptConfigSchema = z.object({
|
|
|
210
220
|
userTemplate: z.string().min(1)
|
|
211
221
|
});
|
|
212
222
|
const ExtractionConfigSchema = z.object({ outputDir: z.string().min(1) });
|
|
223
|
+
const ImageOcrConfigSchema = z.object({
|
|
224
|
+
ocrFallback: z.enum([
|
|
225
|
+
"auto",
|
|
226
|
+
"off",
|
|
227
|
+
"local"
|
|
228
|
+
]).default("auto").optional(),
|
|
229
|
+
ocrLanguages: z.string().min(1).optional(),
|
|
230
|
+
ocrMinConfidence: z.number().min(0).max(1).optional()
|
|
231
|
+
});
|
|
213
232
|
const ExternalPdfConverterConfigSchema = z.object({
|
|
214
233
|
command: z.string().min(1),
|
|
215
234
|
args: z.array(z.string()),
|
|
@@ -248,6 +267,7 @@ const AIConfigSchema = z.object({
|
|
|
248
267
|
provider: AIProviderConfigSchema,
|
|
249
268
|
prompt: PromptConfigSchema,
|
|
250
269
|
extraction: ExtractionConfigSchema,
|
|
270
|
+
image: ImageOcrConfigSchema.optional(),
|
|
251
271
|
pdf: PdfConfigSchema.optional(),
|
|
252
272
|
langfuse: LangfuseConfigSchema.optional(),
|
|
253
273
|
notion: NotionConfigSchema.optional()
|
|
@@ -290,6 +310,11 @@ Extraction requirements:
|
|
|
290
310
|
{text}`
|
|
291
311
|
};
|
|
292
312
|
const DEFAULT_EXTRACTION_CONFIG = { outputDir: ".aiex/extracted" };
|
|
313
|
+
const DEFAULT_IMAGE_OCR_CONFIG = {
|
|
314
|
+
ocrFallback: "auto",
|
|
315
|
+
ocrLanguages: "en-US, zh-Hans",
|
|
316
|
+
ocrMinConfidence: 0
|
|
317
|
+
};
|
|
293
318
|
const DEFAULT_MINERU_CONFIG = {
|
|
294
319
|
command: "mineru",
|
|
295
320
|
args: [
|
|
@@ -323,6 +348,7 @@ const DEFAULT_AI_CONFIG = {
|
|
|
323
348
|
provider: DEFAULT_PROVIDER_CONFIG,
|
|
324
349
|
prompt: DEFAULT_PROMPT_CONFIG,
|
|
325
350
|
extraction: DEFAULT_EXTRACTION_CONFIG,
|
|
351
|
+
image: DEFAULT_IMAGE_OCR_CONFIG,
|
|
326
352
|
pdf: DEFAULT_PDF_CONFIG
|
|
327
353
|
};
|
|
328
354
|
|
|
@@ -364,6 +390,111 @@ async function addToGitignore(aiexDir, fileName) {
|
|
|
364
390
|
}
|
|
365
391
|
}
|
|
366
392
|
|
|
393
|
+
//#endregion
|
|
394
|
+
//#region src/core/image-ocr/index.ts
|
|
395
|
+
const DEFAULT_OCR_LANGUAGES = "en-US, zh-Hans";
|
|
396
|
+
const SELF_CHECK_EXPECTED_TEXT = "AIEX";
|
|
397
|
+
const defaultRuntime = {
|
|
398
|
+
platform: process.platform,
|
|
399
|
+
async loadLocalOcr() {
|
|
400
|
+
return await import("@napi-rs/system-ocr");
|
|
401
|
+
}
|
|
402
|
+
};
|
|
403
|
+
function imageOcrMode(config) {
|
|
404
|
+
return config?.ocrFallback ?? "auto";
|
|
405
|
+
}
|
|
406
|
+
function hasVisionModel(aiConfig, modelOverride) {
|
|
407
|
+
if (modelOverride) return modelOverride.capabilities.vision;
|
|
408
|
+
return aiConfig?.provider.models.some((model) => model.capabilities.vision) ?? true;
|
|
409
|
+
}
|
|
410
|
+
function shouldUseImageOcrFallback(aiConfig, modelOverride, runtime = defaultRuntime) {
|
|
411
|
+
if (hasVisionModel(aiConfig, modelOverride)) return false;
|
|
412
|
+
const mode = imageOcrMode(aiConfig?.image);
|
|
413
|
+
if (mode === "off") return false;
|
|
414
|
+
if (mode === "local") return true;
|
|
415
|
+
return isLocalOcrPlatform(runtime.platform);
|
|
416
|
+
}
|
|
417
|
+
function isLocalOcrPlatform(platform) {
|
|
418
|
+
return platform === "darwin" || platform === "win32";
|
|
419
|
+
}
|
|
420
|
+
function parseOcrLanguages(languages) {
|
|
421
|
+
return (languages ?? DEFAULT_OCR_LANGUAGES).split(",").map((language) => language.trim()).filter(Boolean);
|
|
422
|
+
}
|
|
423
|
+
async function recognizeImageText(imagePath, config, runtime = defaultRuntime) {
|
|
424
|
+
const mode = imageOcrMode(config);
|
|
425
|
+
if (!isLocalOcrPlatform(runtime.platform)) throw new Error(`Local OCR is only available on macOS or Windows. Current platform: ${runtime.platform}.`);
|
|
426
|
+
if (mode === "off") throw new Error("Image OCR fallback is disabled in AI settings.");
|
|
427
|
+
let localOcr;
|
|
428
|
+
try {
|
|
429
|
+
localOcr = await runtime.loadLocalOcr();
|
|
430
|
+
} catch (error) {
|
|
431
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
432
|
+
throw new Error(`Local OCR is unavailable. Install optional dependency @napi-rs/system-ocr and approve its native build scripts. ${message}`);
|
|
433
|
+
}
|
|
434
|
+
const result = await localOcr.recognize(imagePath, localOcr.OcrAccuracy.Accurate, parseOcrLanguages(config?.ocrLanguages));
|
|
435
|
+
const text = result.text.trim();
|
|
436
|
+
if (!text) throw new Error("Local OCR did not recognize any text in the image.");
|
|
437
|
+
const confidence = result.confidence;
|
|
438
|
+
const minConfidence = config?.ocrMinConfidence ?? 0;
|
|
439
|
+
if (confidence < minConfidence) throw new Error(`Local OCR confidence ${(confidence * 100).toFixed(1)}% is below the configured minimum ${(minConfidence * 100).toFixed(1)}%.`);
|
|
440
|
+
return {
|
|
441
|
+
text,
|
|
442
|
+
confidence
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
function normalizeOcrText(text) {
|
|
446
|
+
return text.replace(/\s+/g, "").trim().toUpperCase();
|
|
447
|
+
}
|
|
448
|
+
async function checkImageOcrAvailability(imagePath, runtime = defaultRuntime) {
|
|
449
|
+
if (!isLocalOcrPlatform(runtime.platform)) return {
|
|
450
|
+
platformSupported: false,
|
|
451
|
+
dependencyLoaded: false,
|
|
452
|
+
ocrOk: null,
|
|
453
|
+
imagePath,
|
|
454
|
+
error: `Local OCR is only available on macOS or Windows. Current platform: ${runtime.platform}.`
|
|
455
|
+
};
|
|
456
|
+
let localOcr;
|
|
457
|
+
try {
|
|
458
|
+
localOcr = await runtime.loadLocalOcr();
|
|
459
|
+
} catch (error) {
|
|
460
|
+
return {
|
|
461
|
+
platformSupported: true,
|
|
462
|
+
dependencyLoaded: false,
|
|
463
|
+
ocrOk: null,
|
|
464
|
+
imagePath,
|
|
465
|
+
error: error instanceof Error ? error.message : String(error)
|
|
466
|
+
};
|
|
467
|
+
}
|
|
468
|
+
if (!imagePath) return {
|
|
469
|
+
platformSupported: true,
|
|
470
|
+
dependencyLoaded: true,
|
|
471
|
+
ocrOk: null,
|
|
472
|
+
error: "No OCR self-check image was found."
|
|
473
|
+
};
|
|
474
|
+
try {
|
|
475
|
+
const result = await localOcr.recognize(imagePath, localOcr.OcrAccuracy.Accurate, ["en-US"]);
|
|
476
|
+
const recognizedText = result.text.trim();
|
|
477
|
+
const ocrOk = normalizeOcrText(recognizedText).includes(SELF_CHECK_EXPECTED_TEXT);
|
|
478
|
+
return {
|
|
479
|
+
platformSupported: true,
|
|
480
|
+
dependencyLoaded: true,
|
|
481
|
+
ocrOk,
|
|
482
|
+
imagePath,
|
|
483
|
+
recognizedText,
|
|
484
|
+
confidence: result.confidence,
|
|
485
|
+
error: ocrOk ? void 0 : `Expected OCR text "${SELF_CHECK_EXPECTED_TEXT}" was not recognized.`
|
|
486
|
+
};
|
|
487
|
+
} catch (error) {
|
|
488
|
+
return {
|
|
489
|
+
platformSupported: true,
|
|
490
|
+
dependencyLoaded: true,
|
|
491
|
+
ocrOk: false,
|
|
492
|
+
imagePath,
|
|
493
|
+
error: error instanceof Error ? error.message : String(error)
|
|
494
|
+
};
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
|
|
367
498
|
//#endregion
|
|
368
499
|
//#region src/core/schema-sqlite/generator.ts
|
|
369
500
|
function generateColumnDefinition(column) {
|
|
@@ -715,6 +846,7 @@ function generateDrizzleConfig() {
|
|
|
715
846
|
//#endregion
|
|
716
847
|
//#region src/core/doctor-collector.ts
|
|
717
848
|
const V1_SUFFIX_RE = /\/v1\/?$/;
|
|
849
|
+
const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
|
|
718
850
|
async function checkConnection(baseURL) {
|
|
719
851
|
try {
|
|
720
852
|
const base = baseURL.replace(V1_SUFFIX_RE, "");
|
|
@@ -723,10 +855,26 @@ async function checkConnection(baseURL) {
|
|
|
723
855
|
return false;
|
|
724
856
|
}
|
|
725
857
|
}
|
|
858
|
+
async function findImageOcrSelfCheckLogo() {
|
|
859
|
+
const candidates = [
|
|
860
|
+
path.resolve(MODULE_DIR, "logo.png"),
|
|
861
|
+
path.resolve(MODULE_DIR, "assets/logo.png"),
|
|
862
|
+
path.resolve(MODULE_DIR, "../../assets/logo.png"),
|
|
863
|
+
path.resolve(MODULE_DIR, "../../../web/public/logo.png"),
|
|
864
|
+
path.resolve(MODULE_DIR, "../../web/public/logo.png"),
|
|
865
|
+
path.resolve(MODULE_DIR, "../../dist/web/logo.png"),
|
|
866
|
+
path.resolve(MODULE_DIR, "web/logo.png")
|
|
867
|
+
];
|
|
868
|
+
for (const candidate of candidates) try {
|
|
869
|
+
await fs.access(candidate);
|
|
870
|
+
return candidate;
|
|
871
|
+
} catch {}
|
|
872
|
+
}
|
|
726
873
|
async function collectDoctorDiagnostics(options = {}) {
|
|
727
874
|
const config = options.config ?? createConfig();
|
|
728
875
|
const cwd = process.cwd();
|
|
729
876
|
const errors = [];
|
|
877
|
+
const imageOcrLogoPath = await findImageOcrSelfCheckLogo();
|
|
730
878
|
const migConfig = createMigrationConfig(cwd);
|
|
731
879
|
const aiexDir = path.dirname(migConfig.schemaPath);
|
|
732
880
|
const dirExists = await fs.stat(aiexDir).then((s) => s.isDirectory()).catch(() => false);
|
|
@@ -778,6 +926,7 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
778
926
|
osType: os.type(),
|
|
779
927
|
osRelease: os.release(),
|
|
780
928
|
cwd,
|
|
929
|
+
imageOcr: await checkImageOcrAvailability(imageOcrLogoPath),
|
|
781
930
|
configPath: config.path,
|
|
782
931
|
configStoreKeys: Object.keys(config.store),
|
|
783
932
|
project: {
|
|
@@ -799,4 +948,4 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
799
948
|
}
|
|
800
949
|
|
|
801
950
|
//#endregion
|
|
802
|
-
export {
|
|
951
|
+
export { package_default as C, formatDoctorDiagnosticsJson as D, doctorDiagnosticsTableRows as E, name as S, buildDoctorDiagnostics as T, PLACEHOLDER_TEXT as _, parseJsonSchema as a, seedConfig as b, recognizeImageText as c, readAIConfig as d, writeAIConfig as f, PLACEHOLDER_SCHEMA as g, DEFAULT_PROMPT_CONFIG as h, JsonSchemaDefinitionSchema as i, shouldUseImageOcrFallback as l, DEFAULT_MINERU_CONFIG as m, createMigrationConfig as n, toSnakeCase as o, DEFAULT_MARKITDOWN_CONFIG as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, getDefaultAIConfig as u, AIConfigSchema as v, version as w, description as x, createConfig as y };
|
package/dist/index.d.mts
CHANGED
|
@@ -19,6 +19,15 @@ interface DoctorDiagnostics {
|
|
|
19
19
|
os: string;
|
|
20
20
|
cwd: string;
|
|
21
21
|
};
|
|
22
|
+
imageOcr: {
|
|
23
|
+
platformSupported: boolean;
|
|
24
|
+
dependencyLoaded: boolean;
|
|
25
|
+
ocrOk: boolean | null;
|
|
26
|
+
imagePath?: string;
|
|
27
|
+
recognizedText?: string;
|
|
28
|
+
confidence?: number;
|
|
29
|
+
error?: string;
|
|
30
|
+
};
|
|
22
31
|
config: {
|
|
23
32
|
path: string;
|
|
24
33
|
keys: string[];
|
|
@@ -53,6 +62,7 @@ declare function buildDoctorDiagnostics(input: {
|
|
|
53
62
|
osType: string;
|
|
54
63
|
osRelease: string;
|
|
55
64
|
cwd: string;
|
|
65
|
+
imageOcr: DoctorDiagnostics['imageOcr'];
|
|
56
66
|
configPath: string;
|
|
57
67
|
configStoreKeys: string[];
|
|
58
68
|
project: DoctorDiagnostics['project'];
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { D as formatDoctorDiagnosticsJson, E as doctorDiagnosticsTableRows, T as buildDoctorDiagnostics, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-Cz7mb2L8.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|