aiex-cli 0.0.5-beta.5 → 0.0.5-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/cli.mjs +638 -377
- package/dist/{doctor-collector-NTNBFeBw.mjs → doctor-collector-BpqhXNcO.mjs} +26 -91
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-sVI4PTNB.js +264 -0
- package/dist/web/assets/{DataBrowser-GAA-pGq0.js → DataBrowser-BGkZb9FV.js} +1 -1
- package/dist/web/assets/{ExtractionViewer-BhhWrBs2.js → ExtractionViewer-DNrkSECj.js} +1 -1
- package/dist/web/assets/{api-client-b4ZBXpNH.js → api-client-gQAAOw0v.js} +1 -1
- package/dist/web/assets/{index-CKV2X6sS.js → index-BQKZKzzP.js} +3 -3
- package/dist/web/assets/index-BU58oIRd.css +2 -0
- package/dist/web/index.html +3 -3
- package/dist/{zh-CN-Ca-Dv775.mjs → zh-CN-DkillGHx.mjs} +10 -23
- package/package.json +1 -1
- package/dist/web/assets/AISettings-BlyTFIIy.js +0 -272
- package/dist/web/assets/index-Csdgio76.css +0 -2
|
@@ -74,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
74
74
|
//#endregion
|
|
75
75
|
//#region package.json
|
|
76
76
|
var name = "aiex-cli";
|
|
77
|
-
var version = "0.0.5-beta.
|
|
77
|
+
var version = "0.0.5-beta.6";
|
|
78
78
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
79
79
|
var package_default = {
|
|
80
80
|
name,
|
|
@@ -211,7 +211,6 @@ function seedConfig(config = createConfig()) {
|
|
|
211
211
|
//#endregion
|
|
212
212
|
//#region src/core/ai-extraction/schemas.ts
|
|
213
213
|
const ModelCapabilitiesSchema = z.object({
|
|
214
|
-
vision: z.boolean(),
|
|
215
214
|
structuredOutput: z.boolean(),
|
|
216
215
|
maxTokens: z.number().int().positive().optional(),
|
|
217
216
|
maxOutputTokens: z.number().int().positive().optional()
|
|
@@ -235,17 +234,14 @@ const ExtractionConfigSchema = z.object({
|
|
|
235
234
|
mode: z.enum(["pipeline"]).default("pipeline").optional(),
|
|
236
235
|
concurrency: z.number().int().min(1).optional(),
|
|
237
236
|
maxTokens: z.number().int().positive().default(8e3).optional(),
|
|
238
|
-
overlapSize: z.number().int().nonnegative().optional()
|
|
239
|
-
preFiltering: z.boolean().optional(),
|
|
240
|
-
preFilteringLimit: z.number().int().min(1).optional()
|
|
237
|
+
overlapSize: z.number().int().nonnegative().optional()
|
|
241
238
|
});
|
|
242
239
|
const ImageOcrConfigSchema = z.object({
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
ocrLanguages: z.string().min(1).optional(),
|
|
240
|
+
imageConversion: z.enum(["vision", "local"]).default("local").optional(),
|
|
241
|
+
visionBaseURL: z.string().url().optional(),
|
|
242
|
+
visionApiKey: z.string().optional(),
|
|
243
|
+
imageModelName: z.string().min(1).optional(),
|
|
244
|
+
ocrLanguages: z.string().optional(),
|
|
249
245
|
ocrMinConfidence: z.number().min(0).max(1).optional()
|
|
250
246
|
});
|
|
251
247
|
const ExternalPdfConverterConfigSchema = z.object({
|
|
@@ -268,14 +264,10 @@ const PdfConfigSchema = z.object({
|
|
|
268
264
|
"unpdf",
|
|
269
265
|
"mineru",
|
|
270
266
|
"mineru_api",
|
|
271
|
-
"markitdown",
|
|
272
|
-
"marker",
|
|
273
267
|
"external"
|
|
274
268
|
]),
|
|
275
269
|
mineru: ExternalPdfConverterConfigSchema.optional(),
|
|
276
270
|
mineruApi: MineruApiPdfConverterConfigSchema.optional(),
|
|
277
|
-
markitdown: ExternalPdfConverterConfigSchema.optional(),
|
|
278
|
-
marker: ExternalPdfConverterConfigSchema.optional(),
|
|
279
271
|
external: ExternalPdfConverterConfigSchema.optional()
|
|
280
272
|
});
|
|
281
273
|
const LangfuseConfigSchema = z.object({
|
|
@@ -315,16 +307,7 @@ const PLACEHOLDER_SCHEMA = "{schema}";
|
|
|
315
307
|
const PLACEHOLDER_TEXT = "{text}";
|
|
316
308
|
const DEFAULT_MODELS = [{
|
|
317
309
|
name: "qwen-plus",
|
|
318
|
-
capabilities: {
|
|
319
|
-
vision: false,
|
|
320
|
-
structuredOutput: true
|
|
321
|
-
}
|
|
322
|
-
}, {
|
|
323
|
-
name: "qwen-vl-plus",
|
|
324
|
-
capabilities: {
|
|
325
|
-
vision: true,
|
|
326
|
-
structuredOutput: true
|
|
327
|
-
}
|
|
310
|
+
capabilities: { structuredOutput: true }
|
|
328
311
|
}];
|
|
329
312
|
const DEFAULT_PROVIDER_CONFIG = {
|
|
330
313
|
baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
@@ -341,7 +324,10 @@ Extraction requirements:
|
|
|
341
324
|
1. Extract strictly according to the field names and types defined in the structure
|
|
342
325
|
2. If the text lacks information for a field, set that field to null
|
|
343
326
|
3. Do not add fields that do not exist in the structure definition
|
|
344
|
-
4.
|
|
327
|
+
4. Use only facts present in the source text; do not infer, guess, or complete missing values from outside knowledge
|
|
328
|
+
5. Normalize values to the target type: numbers as JSON numbers, booleans as true/false, dates and formatted strings exactly as requested by the field format
|
|
329
|
+
6. For repeated or conflicting mentions, prefer the most specific final value in the source text and ignore placeholder values such as N/A, unknown, TBD, or empty strings
|
|
330
|
+
7. Maintain data accuracy and completeness`,
|
|
345
331
|
userTemplate: `Please extract data from the following text:
|
|
346
332
|
{text}`
|
|
347
333
|
};
|
|
@@ -350,7 +336,7 @@ const DEFAULT_EXTRACTION_CONFIG = {
|
|
|
350
336
|
mode: "pipeline"
|
|
351
337
|
};
|
|
352
338
|
const DEFAULT_IMAGE_OCR_CONFIG = {
|
|
353
|
-
|
|
339
|
+
imageConversion: "local",
|
|
354
340
|
ocrLanguages: "en-US, zh-Hans",
|
|
355
341
|
ocrMinConfidence: 0
|
|
356
342
|
};
|
|
@@ -365,26 +351,6 @@ const DEFAULT_MINERU_CONFIG = {
|
|
|
365
351
|
timeout: 600,
|
|
366
352
|
fallbackToUnpdf: true
|
|
367
353
|
};
|
|
368
|
-
const DEFAULT_MARKITDOWN_CONFIG = {
|
|
369
|
-
command: "markitdown",
|
|
370
|
-
args: [
|
|
371
|
-
"{input}",
|
|
372
|
-
"-o",
|
|
373
|
-
"{outputDir}/{basename}.md"
|
|
374
|
-
],
|
|
375
|
-
timeout: 600,
|
|
376
|
-
fallbackToUnpdf: true
|
|
377
|
-
};
|
|
378
|
-
const DEFAULT_MARKER_CONFIG = {
|
|
379
|
-
command: "marker_single",
|
|
380
|
-
args: [
|
|
381
|
-
"{input}",
|
|
382
|
-
"--output_dir",
|
|
383
|
-
"{outputDir}"
|
|
384
|
-
],
|
|
385
|
-
timeout: 600,
|
|
386
|
-
fallbackToUnpdf: true
|
|
387
|
-
};
|
|
388
354
|
const DEFAULT_MINERU_API_CONFIG = {
|
|
389
355
|
token: "",
|
|
390
356
|
baseURL: "https://mineru.net/api/v4",
|
|
@@ -396,9 +362,7 @@ const DEFAULT_MINERU_API_CONFIG = {
|
|
|
396
362
|
const DEFAULT_PDF_CONFIG = {
|
|
397
363
|
converter: "unpdf",
|
|
398
364
|
mineru: DEFAULT_MINERU_CONFIG,
|
|
399
|
-
mineruApi: DEFAULT_MINERU_API_CONFIG
|
|
400
|
-
markitdown: DEFAULT_MARKITDOWN_CONFIG,
|
|
401
|
-
marker: DEFAULT_MARKER_CONFIG
|
|
365
|
+
mineruApi: DEFAULT_MINERU_API_CONFIG
|
|
402
366
|
};
|
|
403
367
|
const DEFAULT_AI_CONFIG = {
|
|
404
368
|
provider: DEFAULT_PROVIDER_CONFIG,
|
|
@@ -570,6 +534,8 @@ const en = {
|
|
|
570
534
|
file: {
|
|
571
535
|
hashWarning: "Failed to calculate file hash for {{file}}: {{error}}",
|
|
572
536
|
alreadyProcessed: "File {{file}} (hash: {{hash}}) has already been processed successfully. Skipping.",
|
|
537
|
+
visionTranscribed: "Transcribed image text via AI vision model ({{model}})",
|
|
538
|
+
visionTranscribeFailed: "Vision model transcription failed for {{model}}, falling back to local OCR",
|
|
573
539
|
ocrText: "Extracted image text via local OCR (confidence: {{confidence}}%)",
|
|
574
540
|
pdfFallback: "Fell back to unpdf — {{count}} page(s) extracted",
|
|
575
541
|
pdfConverted: "Converted PDF via {{name}}, {{count}} page(s)",
|
|
@@ -582,7 +548,6 @@ const en = {
|
|
|
582
548
|
extractRetry: "API responded with {{code}}, retrying in {{delay}}s ({{attempt}}/{{max}})",
|
|
583
549
|
chunking: "Input text ({{length}} tokens) exceeds limit ({{limit}} tokens). Splitting into chunks...",
|
|
584
550
|
chunksCount: "Split into {{count}} chunk(s).",
|
|
585
|
-
preFiltering: "Hybrid pre-filtering: selected {{filtered}} out of {{original}} chunks based on schema relevance.",
|
|
586
551
|
extractingChunk: "Extracting chunk {{current}}/{{total}}...",
|
|
587
552
|
extractRetryChunk: "Chunk {{current}}/{{total}} API responded with {{code}}, retrying in {{delay}}s ({{attempt}}/{{max}})",
|
|
588
553
|
extractFailChunk: "Extraction failed for chunk {{current}}/{{total}}",
|
|
@@ -694,14 +659,10 @@ const en = {
|
|
|
694
659
|
ai: {
|
|
695
660
|
apiKeyMissing: "API Key not configured. Please configure AI settings in the web UI.",
|
|
696
661
|
extractionNotObject: "Extraction result is not an object and cannot be written to Notion.",
|
|
697
|
-
noModels: "No AI models configured. Please add at least one model in AI Settings."
|
|
698
|
-
noVisionModel: "Image input requires a model with vision capability{{hint}}.",
|
|
699
|
-
noVisionModelContext: "No vision-capable model with sufficient context window (≥{{tokens}} tokens) found{{hint}}.",
|
|
700
|
-
addSuitableModel: " Please add a suitable vision-capable model in AI Settings."
|
|
662
|
+
noModels: "No AI models configured. Please add at least one model in AI Settings."
|
|
701
663
|
},
|
|
702
664
|
ocr: {
|
|
703
665
|
platformUnsupported: "Local OCR is only available on macOS or Windows. Current platform: {{platform}}.",
|
|
704
|
-
disabled: "Image OCR fallback is disabled in AI settings.",
|
|
705
666
|
unavailable: "Local OCR is unavailable. Install optional dependency @napi-rs/system-ocr and approve its native build scripts. {{error}}",
|
|
706
667
|
noText: "Local OCR did not recognize any text in the image.",
|
|
707
668
|
lowConfidence: "Local OCR confidence {{confidence}}% is below the configured minimum {{min}}%."
|
|
@@ -803,26 +764,19 @@ const en = {
|
|
|
803
764
|
models: "Models",
|
|
804
765
|
addModel: "Add Model",
|
|
805
766
|
modelName: "Model name (e.g. gpt-4o)",
|
|
806
|
-
structuredOutput: "Structured Output",
|
|
807
|
-
textOnlyOutput: "Text-only Output",
|
|
808
|
-
visionSupported: "Vision Supported",
|
|
809
|
-
visionUnsupported: "Vision Unsupported",
|
|
810
767
|
subscribe: "Registry",
|
|
811
768
|
imageInput: "Image Input",
|
|
812
769
|
imageInputSummary: {
|
|
813
|
-
visionModel: "Image
|
|
814
|
-
|
|
815
|
-
ocrLocal: "
|
|
816
|
-
ocrAuto: "No vision model is configured. On macOS or Windows, local OCR will be tried automatically for text-heavy images."
|
|
770
|
+
visionModel: "Image text is transcribed via {{model}} before structured extraction.",
|
|
771
|
+
ocrNoModel: "No vision model selected. Image text will be read through local OCR.",
|
|
772
|
+
ocrLocal: "Image text will be read through local OCR."
|
|
817
773
|
},
|
|
818
|
-
visionModelConfigured: "Vision model configured",
|
|
819
|
-
noVisionModel: "No vision model",
|
|
820
774
|
advancedImageSettings: "Advanced image settings",
|
|
821
775
|
hideAdvancedImageSettings: "Hide advanced image settings",
|
|
822
|
-
ocrFallback: "
|
|
776
|
+
ocrFallback: "Image input mode",
|
|
823
777
|
ocrLanguages: "Languages",
|
|
824
778
|
ocrMinConfidence: "Minimum confidence",
|
|
825
|
-
ocrHint: "
|
|
779
|
+
ocrHint: "Images are converted to text before structured extraction.",
|
|
826
780
|
pdfConversion: "PDF Conversion",
|
|
827
781
|
converter: "Converter",
|
|
828
782
|
command: "Command",
|
|
@@ -897,14 +851,11 @@ const en = {
|
|
|
897
851
|
converterOptions: {
|
|
898
852
|
unpdf: "Built-in text extraction (unpdf)",
|
|
899
853
|
mineru: "MinerU (mineru)",
|
|
900
|
-
markitdown: "MarkItDown (markitdown)",
|
|
901
|
-
marker: "Marker (marker_single)",
|
|
902
854
|
external: "Custom External Command"
|
|
903
855
|
},
|
|
904
856
|
ocrFallbackOptions: {
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
local: "Require local OCR"
|
|
857
|
+
vision: "Vision model (fallback to OCR)",
|
|
858
|
+
local: "Local OCR only"
|
|
908
859
|
}
|
|
909
860
|
},
|
|
910
861
|
prompt: {
|
|
@@ -976,7 +927,7 @@ async function initI18n(lng) {
|
|
|
976
927
|
fallbackLng: "en",
|
|
977
928
|
resources: {
|
|
978
929
|
"en": { translation: en },
|
|
979
|
-
"zh-CN": { translation: await import("./zh-CN-
|
|
930
|
+
"zh-CN": { translation: await import("./zh-CN-DkillGHx.mjs").then((m) => m.zhCN) }
|
|
980
931
|
},
|
|
981
932
|
interpolation: { escapeValue: false },
|
|
982
933
|
returnNull: false
|
|
@@ -1000,20 +951,6 @@ const defaultRuntime = {
|
|
|
1000
951
|
return await import("@napi-rs/system-ocr");
|
|
1001
952
|
}
|
|
1002
953
|
};
|
|
1003
|
-
function imageOcrMode(config) {
|
|
1004
|
-
return config?.ocrFallback ?? "auto";
|
|
1005
|
-
}
|
|
1006
|
-
function hasVisionModel(aiConfig, modelOverride) {
|
|
1007
|
-
if (modelOverride) return modelOverride.capabilities.vision;
|
|
1008
|
-
return aiConfig?.provider.models.some((model) => model.capabilities.vision) ?? true;
|
|
1009
|
-
}
|
|
1010
|
-
function shouldUseImageOcrFallback(aiConfig, modelOverride, runtime = defaultRuntime) {
|
|
1011
|
-
if (hasVisionModel(aiConfig, modelOverride)) return false;
|
|
1012
|
-
const mode = imageOcrMode(aiConfig?.image);
|
|
1013
|
-
if (mode === "off") return false;
|
|
1014
|
-
if (mode === "local") return true;
|
|
1015
|
-
return isLocalOcrPlatform(runtime.platform);
|
|
1016
|
-
}
|
|
1017
954
|
function isLocalOcrPlatform(platform) {
|
|
1018
955
|
return platform === "darwin" || platform === "win32";
|
|
1019
956
|
}
|
|
@@ -1021,9 +958,7 @@ function parseOcrLanguages(languages) {
|
|
|
1021
958
|
return (languages ?? DEFAULT_OCR_LANGUAGES).split(",").map((language) => language.trim()).filter(Boolean);
|
|
1022
959
|
}
|
|
1023
960
|
async function recognizeImageText(imagePath, config, runtime = defaultRuntime) {
|
|
1024
|
-
const mode = imageOcrMode(config);
|
|
1025
961
|
if (!isLocalOcrPlatform(runtime.platform)) throw new Error(t("errors.ocr.platformUnsupported", { platform: runtime.platform }));
|
|
1026
|
-
if (mode === "off") throw new Error(t("errors.ocr.disabled"));
|
|
1027
962
|
let localOcr;
|
|
1028
963
|
try {
|
|
1029
964
|
localOcr = await runtime.loadLocalOcr();
|
|
@@ -1556,4 +1491,4 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
1556
1491
|
}
|
|
1557
1492
|
|
|
1558
1493
|
//#endregion
|
|
1559
|
-
export {
|
|
1494
|
+
export { name as C, doctorDiagnosticsTableRows as D, buildDoctorDiagnostics as E, formatDoctorDiagnosticsJson as O, description as S, version as T, PLACEHOLDER_SCHEMA as _, parseJsonSchema as a, createConfig as b, recognizeImageText as c, getDefaultAIConfig as d, readAIConfig as f, DEFAULT_PROMPT_CONFIG as g, DEFAULT_MINERU_CONFIG as h, JsonSchemaDefinitionSchema as i, initI18n as l, DEFAULT_MINERU_API_CONFIG as m, createMigrationConfig as n, toSnakeCase as o, writeAIConfig as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, t as u, PLACEHOLDER_TEXT as v, package_default as w, seedConfig as x, AIConfigSchema as y };
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { D as doctorDiagnosticsTableRows, E as buildDoctorDiagnostics, O as formatDoctorDiagnosticsJson, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-BpqhXNcO.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|