aiex-cli 0.0.3-beta.2 → 0.0.3-beta.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,12 +1,12 @@
1
- import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-Bnkbl48V.mjs";
1
+ import { C as package_default, D as formatDoctorDiagnosticsJson, E as doctorDiagnosticsTableRows, S as name, _ as PLACEHOLDER_TEXT, a as parseJsonSchema, b as seedConfig, c as recognizeImageText, d as readAIConfig, f as writeAIConfig, g as PLACEHOLDER_SCHEMA, h as DEFAULT_PROMPT_CONFIG, i as JsonSchemaDefinitionSchema, l as shouldUseImageOcrFallback, m as DEFAULT_MINERU_CONFIG, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_MARKITDOWN_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as getDefaultAIConfig, v as AIConfigSchema, w as version, x as description, y as createConfig } from "./doctor-collector-nMMG_h-w.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
5
5
  import path from "node:path";
6
6
  import process from "node:process";
7
+ import { fileURLToPath } from "node:url";
7
8
  import { readFile, writeFile } from "jsonfile";
8
9
  import { ZodError, z } from "zod";
9
- import { fileURLToPath } from "node:url";
10
10
  import { defineCommand, runMain } from "citty";
11
11
  import { consola } from "consola";
12
12
  import updateNotifier from "update-notifier";
@@ -14078,14 +14078,21 @@ async function listSchemas(aiexDir) {
14078
14078
  return [];
14079
14079
  }
14080
14080
  }
14081
- async function readExtractFileInput(filePath, aiConfig) {
14081
+ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
14082
14082
  const stat = fs$1.statSync(filePath);
14083
14083
  if (stat.size > MAX_UPLOAD_SIZE) throw new Error(`File size (${bytesToMB(stat.size).toFixed(1)}MB) exceeds ${MAX_UPLOAD_SIZE_TEXT} limit: ${filePath}`);
14084
14084
  const ext = path.extname(filePath).toLowerCase().replace(".", "");
14085
- if (FILE_PART_EXTENSIONS.has(ext)) return {
14086
- text: "",
14087
- filePath
14088
- };
14085
+ if (FILE_PART_EXTENSIONS.has(ext)) {
14086
+ if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
14087
+ const result = await recognizeImageText(filePath, aiConfig?.image);
14088
+ consola.info(`Extracted image text via local OCR (confidence: ${(result.confidence * 100).toFixed(1)}%)`);
14089
+ return { text: result.text };
14090
+ }
14091
+ return {
14092
+ text: "",
14093
+ filePath
14094
+ };
14095
+ }
14089
14096
  if (ext === "pdf") {
14090
14097
  const buffer = await fs.readFile(filePath);
14091
14098
  const converter = createPdfConverter(aiConfig?.pdf);
@@ -14203,7 +14210,7 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
14203
14210
  }
14204
14211
  });
14205
14212
  try {
14206
- const input = await readExtractFileInput(filePath, aiConfig);
14213
+ const input = await readExtractFileInput(filePath, aiConfig, modelOverride);
14207
14214
  const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
14208
14215
  quiet: false,
14209
14216
  insert: options?.insert
@@ -14606,7 +14613,7 @@ const extractCommand = defineCommand({
14606
14613
  let text$1 = "";
14607
14614
  let filePath;
14608
14615
  if (args.file) try {
14609
- const input = await readExtractFileInput(args.file, aiConfig);
14616
+ const input = await readExtractFileInput(args.file, aiConfig, modelOverride);
14610
14617
  text$1 = input.text;
14611
14618
  filePath = input.filePath;
14612
14619
  } catch (e) {
@@ -14715,7 +14722,7 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
14715
14722
  }
14716
14723
  const fp = filePathStr;
14717
14724
  try {
14718
- const input = await readExtractFileInput(fp, aiConfig);
14725
+ const input = await readExtractFileInput(fp, aiConfig, modelOverride);
14719
14726
  return runAuditedSingleExtraction({
14720
14727
  aiexDir,
14721
14728
  config,
@@ -2,8 +2,8 @@ import { createRequire } from "node:module";
2
2
  import fs from "node:fs/promises";
3
3
  import path from "node:path";
4
4
  import process from "node:process";
5
- import { readFile, writeFile } from "jsonfile";
6
5
  import { fileURLToPath } from "node:url";
6
+ import { readFile, writeFile } from "jsonfile";
7
7
  import Database from "better-sqlite3";
8
8
  import * as esbuild from "esbuild";
9
9
  import lockfile from "proper-lockfile";
@@ -2,6 +2,7 @@ import fs from "node:fs/promises";
2
2
  import os from "node:os";
3
3
  import path from "node:path";
4
4
  import process from "node:process";
5
+ import { fileURLToPath } from "node:url";
5
6
  import Conf from "conf";
6
7
  import { readFile, writeFile } from "jsonfile";
7
8
  import { z } from "zod";
@@ -25,6 +26,7 @@ function buildDoctorDiagnostics(input) {
25
26
  os: `${input.osType} ${input.osRelease}`,
26
27
  cwd: input.cwd
27
28
  },
29
+ imageOcr: { ...input.imageOcr },
28
30
  config: {
29
31
  path: input.configPath,
30
32
  keys: [...input.configStoreKeys].sort()
@@ -56,6 +58,13 @@ function doctorDiagnosticsTableRows(d) {
56
58
  rows.push(["aiModels", p.aiModelCount ? p.aiModels.join(", ") : "none"]);
57
59
  rows.push(["aiProvider", p.aiProvider ?? "none"]);
58
60
  rows.push(["aiConnectionOk", p.aiConnectionOk === null ? "not tested" : String(p.aiConnectionOk)]);
61
+ rows.push(["imageOcrPlatform", String(d.imageOcr.platformSupported)]);
62
+ rows.push(["imageOcrDependency", String(d.imageOcr.dependencyLoaded)]);
63
+ rows.push(["imageOcrOk", d.imageOcr.ocrOk === null ? "not tested" : String(d.imageOcr.ocrOk)]);
64
+ if (d.imageOcr.imagePath) rows.push(["imageOcrImage", d.imageOcr.imagePath]);
65
+ if (d.imageOcr.recognizedText) rows.push(["imageOcrText", d.imageOcr.recognizedText]);
66
+ if (typeof d.imageOcr.confidence === "number") rows.push(["imageOcrConfidence", `${(d.imageOcr.confidence * 100).toFixed(1)}%`]);
67
+ if (d.imageOcr.error) rows.push(["imageOcrError", d.imageOcr.error]);
59
68
  rows.push(["hasDatabase", String(p.hasDatabase)]);
60
69
  rows.push(["migrations", String(p.migrationCount)]);
61
70
  for (const err of p.errors) rows.push(["error", err]);
@@ -65,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
65
74
  //#endregion
66
75
  //#region package.json
67
76
  var name = "aiex-cli";
68
- var version = "0.0.3-beta.2";
77
+ var version = "0.0.3-beta.4";
69
78
  var description = "JSON Schema → SQLite with AI-powered data extraction";
70
79
  var package_default = {
71
80
  name,
@@ -102,7 +111,10 @@ var package_default = {
102
111
  main: "./dist/index.mjs",
103
112
  module: "./dist/index.mjs",
104
113
  types: "./dist/index.d.mts",
105
- bin: { "aiex": "./bin/cli.mjs" },
114
+ bin: {
115
+ "aiex": "./bin/cli.mjs",
116
+ "aiex-cli": "./bin/cli.mjs"
117
+ },
106
118
  files: [
107
119
  "bin",
108
120
  "dist",
@@ -156,6 +168,7 @@ var package_default = {
156
168
  "update-notifier": "catalog:",
157
169
  "zod": "catalog:"
158
170
  },
171
+ optionalDependencies: { "@napi-rs/system-ocr": "catalog:" },
159
172
  devDependencies: {
160
173
  "@antfu/eslint-config": "catalog:cli",
161
174
  "@antfu/ni": "catalog:cli",
@@ -210,6 +223,15 @@ const PromptConfigSchema = z.object({
210
223
  userTemplate: z.string().min(1)
211
224
  });
212
225
  const ExtractionConfigSchema = z.object({ outputDir: z.string().min(1) });
226
+ const ImageOcrConfigSchema = z.object({
227
+ ocrFallback: z.enum([
228
+ "auto",
229
+ "off",
230
+ "local"
231
+ ]).default("auto").optional(),
232
+ ocrLanguages: z.string().min(1).optional(),
233
+ ocrMinConfidence: z.number().min(0).max(1).optional()
234
+ });
213
235
  const ExternalPdfConverterConfigSchema = z.object({
214
236
  command: z.string().min(1),
215
237
  args: z.array(z.string()),
@@ -248,6 +270,7 @@ const AIConfigSchema = z.object({
248
270
  provider: AIProviderConfigSchema,
249
271
  prompt: PromptConfigSchema,
250
272
  extraction: ExtractionConfigSchema,
273
+ image: ImageOcrConfigSchema.optional(),
251
274
  pdf: PdfConfigSchema.optional(),
252
275
  langfuse: LangfuseConfigSchema.optional(),
253
276
  notion: NotionConfigSchema.optional()
@@ -290,6 +313,11 @@ Extraction requirements:
290
313
  {text}`
291
314
  };
292
315
  const DEFAULT_EXTRACTION_CONFIG = { outputDir: ".aiex/extracted" };
316
+ const DEFAULT_IMAGE_OCR_CONFIG = {
317
+ ocrFallback: "auto",
318
+ ocrLanguages: "en-US, zh-Hans",
319
+ ocrMinConfidence: 0
320
+ };
293
321
  const DEFAULT_MINERU_CONFIG = {
294
322
  command: "mineru",
295
323
  args: [
@@ -323,6 +351,7 @@ const DEFAULT_AI_CONFIG = {
323
351
  provider: DEFAULT_PROVIDER_CONFIG,
324
352
  prompt: DEFAULT_PROMPT_CONFIG,
325
353
  extraction: DEFAULT_EXTRACTION_CONFIG,
354
+ image: DEFAULT_IMAGE_OCR_CONFIG,
326
355
  pdf: DEFAULT_PDF_CONFIG
327
356
  };
328
357
 
@@ -364,6 +393,111 @@ async function addToGitignore(aiexDir, fileName) {
364
393
  }
365
394
  }
366
395
 
396
+ //#endregion
397
+ //#region src/core/image-ocr/index.ts
398
+ const DEFAULT_OCR_LANGUAGES = "en-US, zh-Hans";
399
+ const SELF_CHECK_EXPECTED_TEXT = "AIEX";
400
+ const defaultRuntime = {
401
+ platform: process.platform,
402
+ async loadLocalOcr() {
403
+ return await import("@napi-rs/system-ocr");
404
+ }
405
+ };
406
+ function imageOcrMode(config) {
407
+ return config?.ocrFallback ?? "auto";
408
+ }
409
+ function hasVisionModel(aiConfig, modelOverride) {
410
+ if (modelOverride) return modelOverride.capabilities.vision;
411
+ return aiConfig?.provider.models.some((model) => model.capabilities.vision) ?? true;
412
+ }
413
+ function shouldUseImageOcrFallback(aiConfig, modelOverride, runtime = defaultRuntime) {
414
+ if (hasVisionModel(aiConfig, modelOverride)) return false;
415
+ const mode = imageOcrMode(aiConfig?.image);
416
+ if (mode === "off") return false;
417
+ if (mode === "local") return true;
418
+ return isLocalOcrPlatform(runtime.platform);
419
+ }
420
+ function isLocalOcrPlatform(platform) {
421
+ return platform === "darwin" || platform === "win32";
422
+ }
423
+ function parseOcrLanguages(languages) {
424
+ return (languages ?? DEFAULT_OCR_LANGUAGES).split(",").map((language) => language.trim()).filter(Boolean);
425
+ }
426
+ async function recognizeImageText(imagePath, config, runtime = defaultRuntime) {
427
+ const mode = imageOcrMode(config);
428
+ if (!isLocalOcrPlatform(runtime.platform)) throw new Error(`Local OCR is only available on macOS or Windows. Current platform: ${runtime.platform}.`);
429
+ if (mode === "off") throw new Error("Image OCR fallback is disabled in AI settings.");
430
+ let localOcr;
431
+ try {
432
+ localOcr = await runtime.loadLocalOcr();
433
+ } catch (error) {
434
+ const message = error instanceof Error ? error.message : String(error);
435
+ throw new Error(`Local OCR is unavailable. Install optional dependency @napi-rs/system-ocr and approve its native build scripts. ${message}`);
436
+ }
437
+ const result = await localOcr.recognize(imagePath, localOcr.OcrAccuracy.Accurate, parseOcrLanguages(config?.ocrLanguages));
438
+ const text = result.text.trim();
439
+ if (!text) throw new Error("Local OCR did not recognize any text in the image.");
440
+ const confidence = result.confidence;
441
+ const minConfidence = config?.ocrMinConfidence ?? 0;
442
+ if (confidence < minConfidence) throw new Error(`Local OCR confidence ${(confidence * 100).toFixed(1)}% is below the configured minimum ${(minConfidence * 100).toFixed(1)}%.`);
443
+ return {
444
+ text,
445
+ confidence
446
+ };
447
+ }
448
+ function normalizeOcrText(text) {
449
+ return text.replace(/\s+/g, "").trim().toUpperCase();
450
+ }
451
+ async function checkImageOcrAvailability(imagePath, runtime = defaultRuntime) {
452
+ if (!isLocalOcrPlatform(runtime.platform)) return {
453
+ platformSupported: false,
454
+ dependencyLoaded: false,
455
+ ocrOk: null,
456
+ imagePath,
457
+ error: `Local OCR is only available on macOS or Windows. Current platform: ${runtime.platform}.`
458
+ };
459
+ let localOcr;
460
+ try {
461
+ localOcr = await runtime.loadLocalOcr();
462
+ } catch (error) {
463
+ return {
464
+ platformSupported: true,
465
+ dependencyLoaded: false,
466
+ ocrOk: null,
467
+ imagePath,
468
+ error: error instanceof Error ? error.message : String(error)
469
+ };
470
+ }
471
+ if (!imagePath) return {
472
+ platformSupported: true,
473
+ dependencyLoaded: true,
474
+ ocrOk: null,
475
+ error: "No OCR self-check image was found."
476
+ };
477
+ try {
478
+ const result = await localOcr.recognize(imagePath, localOcr.OcrAccuracy.Accurate, ["en-US"]);
479
+ const recognizedText = result.text.trim();
480
+ const ocrOk = normalizeOcrText(recognizedText).includes(SELF_CHECK_EXPECTED_TEXT);
481
+ return {
482
+ platformSupported: true,
483
+ dependencyLoaded: true,
484
+ ocrOk,
485
+ imagePath,
486
+ recognizedText,
487
+ confidence: result.confidence,
488
+ error: ocrOk ? void 0 : `Expected OCR text "${SELF_CHECK_EXPECTED_TEXT}" was not recognized.`
489
+ };
490
+ } catch (error) {
491
+ return {
492
+ platformSupported: true,
493
+ dependencyLoaded: true,
494
+ ocrOk: false,
495
+ imagePath,
496
+ error: error instanceof Error ? error.message : String(error)
497
+ };
498
+ }
499
+ }
500
+
367
501
  //#endregion
368
502
  //#region src/core/schema-sqlite/generator.ts
369
503
  function generateColumnDefinition(column) {
@@ -715,6 +849,7 @@ function generateDrizzleConfig() {
715
849
  //#endregion
716
850
  //#region src/core/doctor-collector.ts
717
851
  const V1_SUFFIX_RE = /\/v1\/?$/;
852
+ const MODULE_DIR = path.dirname(fileURLToPath(import.meta.url));
718
853
  async function checkConnection(baseURL) {
719
854
  try {
720
855
  const base = baseURL.replace(V1_SUFFIX_RE, "");
@@ -723,10 +858,26 @@ async function checkConnection(baseURL) {
723
858
  return false;
724
859
  }
725
860
  }
861
+ async function findImageOcrSelfCheckLogo() {
862
+ const candidates = [
863
+ path.resolve(MODULE_DIR, "logo.png"),
864
+ path.resolve(MODULE_DIR, "assets/logo.png"),
865
+ path.resolve(MODULE_DIR, "../../assets/logo.png"),
866
+ path.resolve(MODULE_DIR, "../../../web/public/logo.png"),
867
+ path.resolve(MODULE_DIR, "../../web/public/logo.png"),
868
+ path.resolve(MODULE_DIR, "../../dist/web/logo.png"),
869
+ path.resolve(MODULE_DIR, "web/logo.png")
870
+ ];
871
+ for (const candidate of candidates) try {
872
+ await fs.access(candidate);
873
+ return candidate;
874
+ } catch {}
875
+ }
726
876
  async function collectDoctorDiagnostics(options = {}) {
727
877
  const config = options.config ?? createConfig();
728
878
  const cwd = process.cwd();
729
879
  const errors = [];
880
+ const imageOcrLogoPath = await findImageOcrSelfCheckLogo();
730
881
  const migConfig = createMigrationConfig(cwd);
731
882
  const aiexDir = path.dirname(migConfig.schemaPath);
732
883
  const dirExists = await fs.stat(aiexDir).then((s) => s.isDirectory()).catch(() => false);
@@ -778,6 +929,7 @@ async function collectDoctorDiagnostics(options = {}) {
778
929
  osType: os.type(),
779
930
  osRelease: os.release(),
780
931
  cwd,
932
+ imageOcr: await checkImageOcrAvailability(imageOcrLogoPath),
781
933
  configPath: config.path,
782
934
  configStoreKeys: Object.keys(config.store),
783
935
  project: {
@@ -799,4 +951,4 @@ async function collectDoctorDiagnostics(options = {}) {
799
951
  }
800
952
 
801
953
  //#endregion
802
- export { buildDoctorDiagnostics as C, version as S, formatDoctorDiagnosticsJson as T, createConfig as _, parseJsonSchema as a, name as b, getDefaultAIConfig as c, DEFAULT_MARKITDOWN_CONFIG as d, DEFAULT_MINERU_CONFIG as f, AIConfigSchema as g, PLACEHOLDER_TEXT as h, JsonSchemaDefinitionSchema as i, readAIConfig as l, PLACEHOLDER_SCHEMA as m, createMigrationConfig as n, toSnakeCase as o, DEFAULT_PROMPT_CONFIG as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, writeAIConfig as u, seedConfig as v, doctorDiagnosticsTableRows as w, package_default as x, description as y };
954
+ export { package_default as C, formatDoctorDiagnosticsJson as D, doctorDiagnosticsTableRows as E, name as S, buildDoctorDiagnostics as T, PLACEHOLDER_TEXT as _, parseJsonSchema as a, seedConfig as b, recognizeImageText as c, readAIConfig as d, writeAIConfig as f, PLACEHOLDER_SCHEMA as g, DEFAULT_PROMPT_CONFIG as h, JsonSchemaDefinitionSchema as i, shouldUseImageOcrFallback as l, DEFAULT_MINERU_CONFIG as m, createMigrationConfig as n, toSnakeCase as o, DEFAULT_MARKITDOWN_CONFIG as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, getDefaultAIConfig as u, AIConfigSchema as v, version as w, description as x, createConfig as y };
package/dist/index.d.mts CHANGED
@@ -19,6 +19,15 @@ interface DoctorDiagnostics {
19
19
  os: string;
20
20
  cwd: string;
21
21
  };
22
+ imageOcr: {
23
+ platformSupported: boolean;
24
+ dependencyLoaded: boolean;
25
+ ocrOk: boolean | null;
26
+ imagePath?: string;
27
+ recognizedText?: string;
28
+ confidence?: number;
29
+ error?: string;
30
+ };
22
31
  config: {
23
32
  path: string;
24
33
  keys: string[];
@@ -53,6 +62,7 @@ declare function buildDoctorDiagnostics(input: {
53
62
  osType: string;
54
63
  osRelease: string;
55
64
  cwd: string;
65
+ imageOcr: DoctorDiagnostics['imageOcr'];
56
66
  configPath: string;
57
67
  configStoreKeys: string[];
58
68
  project: DoctorDiagnostics['project'];
package/dist/index.mjs CHANGED
@@ -1,3 +1,3 @@
1
- import { C as buildDoctorDiagnostics, T as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as doctorDiagnosticsTableRows } from "./doctor-collector-Bnkbl48V.mjs";
1
+ import { D as formatDoctorDiagnosticsJson, E as doctorDiagnosticsTableRows, T as buildDoctorDiagnostics, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-nMMG_h-w.mjs";
2
2
 
3
3
  export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
package/dist/logo.png ADDED
Binary file