aiex-cli 0.0.6-beta.2 → 0.0.6-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +200 -37
- package/dist/{doctor-collector-CGo5dgHm.mjs → doctor-collector-abgpqc5T.mjs} +1 -1
- package/dist/index.mjs +1 -1
- package/dist/web/assets/ExtractionViewer-BEYHgPw2.js +1 -0
- package/dist/web/assets/index-D7eI2nAX.js +882 -0
- package/dist/web/index.html +1 -1
- package/package.json +1 -1
- package/dist/web/assets/ExtractionViewer-CrQMLtX7.js +0 -1
- package/dist/web/assets/index-CdQgz6dJ.js +0 -882
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as description, E as version, O as doctorDiagnosticsTableRows, S as seedConfig, T as package_default, _ as DEFAULT_PROMPT_CONFIG, a as parseJsonSchema, b as AIConfigSchema, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MINERU_CONFIG, h as DEFAULT_MINERU_API_CONFIG, i as JsonSchemaDefinitionSchema, k as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as PLACEHOLDER_SCHEMA, w as name, x as createConfig, y as PLACEHOLDER_TEXT } from "./doctor-collector-
|
|
1
|
+
import { C as description, E as version, O as doctorDiagnosticsTableRows, S as seedConfig, T as package_default, _ as DEFAULT_PROMPT_CONFIG, a as parseJsonSchema, b as AIConfigSchema, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MINERU_CONFIG, h as DEFAULT_MINERU_API_CONFIG, i as JsonSchemaDefinitionSchema, k as formatDoctorDiagnosticsJson, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as PLACEHOLDER_SCHEMA, w as name, x as createConfig, y as PLACEHOLDER_TEXT } from "./doctor-collector-abgpqc5T.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -13258,11 +13258,45 @@ function validateExtractedData(schema, data) {
|
|
|
13258
13258
|
//#endregion
|
|
13259
13259
|
//#region src/core/ai-extraction/extractor.ts
|
|
13260
13260
|
const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
|
|
13261
|
+
function expectedExtractionFields(schema) {
|
|
13262
|
+
return Object.entries(schema.properties).filter(([, prop]) => !(prop.primary && prop.autoIncrement)).map(([name$1]) => name$1);
|
|
13263
|
+
}
|
|
13264
|
+
function calculateMissingFields(schema, data) {
|
|
13265
|
+
const expected = expectedExtractionFields(schema);
|
|
13266
|
+
if (expected.length === 0) return {
|
|
13267
|
+
fields: [],
|
|
13268
|
+
rate: 0
|
|
13269
|
+
};
|
|
13270
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return {
|
|
13271
|
+
fields: expected,
|
|
13272
|
+
rate: 1
|
|
13273
|
+
};
|
|
13274
|
+
const record = data;
|
|
13275
|
+
const fields = expected.filter((field) => {
|
|
13276
|
+
const value = record[field];
|
|
13277
|
+
return value === void 0 || value === null || value === "";
|
|
13278
|
+
});
|
|
13279
|
+
return {
|
|
13280
|
+
fields,
|
|
13281
|
+
rate: fields.length / expected.length
|
|
13282
|
+
};
|
|
13283
|
+
}
|
|
13261
13284
|
async function extractStructuredData(input) {
|
|
13262
13285
|
const { config, schema, text: text$1, aiexDir, file, modelOverride } = input;
|
|
13286
|
+
let apiRetryCount = 0;
|
|
13287
|
+
const onApiRetry = (info) => {
|
|
13288
|
+
apiRetryCount += 1;
|
|
13289
|
+
input.onRetry?.(info);
|
|
13290
|
+
};
|
|
13263
13291
|
if (!config.provider.apiKey) return {
|
|
13264
13292
|
success: false,
|
|
13265
|
-
error: t("errors.ai.apiKeyMissing")
|
|
13293
|
+
error: t("errors.ai.apiKeyMissing"),
|
|
13294
|
+
quality: { ai: {
|
|
13295
|
+
validationPassed: false,
|
|
13296
|
+
attempts: 0,
|
|
13297
|
+
selfCorrectionCount: 0,
|
|
13298
|
+
apiRetryCount
|
|
13299
|
+
} }
|
|
13266
13300
|
};
|
|
13267
13301
|
const useFileContent = !!file;
|
|
13268
13302
|
const isImageFile = (useFileContent ? await detectMimeType(file) : "").startsWith("image/");
|
|
@@ -13281,7 +13315,13 @@ async function extractStructuredData(input) {
|
|
|
13281
13315
|
} catch (e) {
|
|
13282
13316
|
return {
|
|
13283
13317
|
success: false,
|
|
13284
|
-
error: e.message
|
|
13318
|
+
error: e.message,
|
|
13319
|
+
quality: { ai: {
|
|
13320
|
+
validationPassed: false,
|
|
13321
|
+
attempts: 0,
|
|
13322
|
+
selfCorrectionCount: 0,
|
|
13323
|
+
apiRetryCount
|
|
13324
|
+
} }
|
|
13285
13325
|
};
|
|
13286
13326
|
}
|
|
13287
13327
|
const useStructuredOutput = selected.capabilities.structuredOutput;
|
|
@@ -13339,7 +13379,7 @@ async function extractStructuredData(input) {
|
|
|
13339
13379
|
experimental_telemetry: { isEnabled: useTelemetry }
|
|
13340
13380
|
};
|
|
13341
13381
|
if (useStructuredOutput) fileOpts.output = Output.object({ schema: outputSchema });
|
|
13342
|
-
result = await withRetry(() => generateText(fileOpts),
|
|
13382
|
+
result = await withRetry(() => generateText(fileOpts), onApiRetry);
|
|
13343
13383
|
} else {
|
|
13344
13384
|
const textOpts = {
|
|
13345
13385
|
model: provider.chatModel(selected.name),
|
|
@@ -13350,7 +13390,7 @@ async function extractStructuredData(input) {
|
|
|
13350
13390
|
experimental_telemetry: { isEnabled: useTelemetry }
|
|
13351
13391
|
};
|
|
13352
13392
|
if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
|
|
13353
|
-
result = await withRetry(() => generateText(textOpts),
|
|
13393
|
+
result = await withRetry(() => generateText(textOpts), onApiRetry);
|
|
13354
13394
|
}
|
|
13355
13395
|
if (result.usage) {
|
|
13356
13396
|
totalPromptTokens += result.usage.inputTokens ?? 0;
|
|
@@ -13368,6 +13408,7 @@ async function extractStructuredData(input) {
|
|
|
13368
13408
|
if (!parseError && data !== void 0) {
|
|
13369
13409
|
const validation = validateExtractedData(schema, data);
|
|
13370
13410
|
if (validation.success) {
|
|
13411
|
+
const missing = calculateMissingFields(schema, data);
|
|
13371
13412
|
const outputDir = path.resolve(aiexDir, config.extraction.outputDir.replace(".aiex/", ""));
|
|
13372
13413
|
await fs.mkdir(outputDir, { recursive: true });
|
|
13373
13414
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
@@ -13385,7 +13426,15 @@ async function extractStructuredData(input) {
|
|
|
13385
13426
|
prompt: totalPromptTokens,
|
|
13386
13427
|
completion: totalCompletionTokens,
|
|
13387
13428
|
total: totalPromptTokens + totalCompletionTokens
|
|
13388
|
-
}
|
|
13429
|
+
},
|
|
13430
|
+
quality: { ai: {
|
|
13431
|
+
validationPassed: true,
|
|
13432
|
+
attempts: attempt,
|
|
13433
|
+
selfCorrectionCount: attempt - 1,
|
|
13434
|
+
apiRetryCount,
|
|
13435
|
+
missingFields: missing.fields,
|
|
13436
|
+
missingFieldRate: missing.rate
|
|
13437
|
+
} }
|
|
13389
13438
|
};
|
|
13390
13439
|
} else validationError = validation.error;
|
|
13391
13440
|
}
|
|
@@ -13418,12 +13467,26 @@ Please output the corrected JSON object now:`;
|
|
|
13418
13467
|
}
|
|
13419
13468
|
return {
|
|
13420
13469
|
success: false,
|
|
13421
|
-
error: lastError || "Extraction failed after self-reflection retries"
|
|
13470
|
+
error: lastError || "Extraction failed after self-reflection retries",
|
|
13471
|
+
quality: { ai: {
|
|
13472
|
+
validationPassed: false,
|
|
13473
|
+
attempts: maxAttempts,
|
|
13474
|
+
selfCorrectionCount: maxAttempts - 1,
|
|
13475
|
+
apiRetryCount,
|
|
13476
|
+
validationError: lastError
|
|
13477
|
+
} }
|
|
13422
13478
|
};
|
|
13423
13479
|
} catch (error) {
|
|
13424
13480
|
return {
|
|
13425
13481
|
success: false,
|
|
13426
|
-
error: getErrorMessage(error)
|
|
13482
|
+
error: getErrorMessage(error),
|
|
13483
|
+
quality: { ai: {
|
|
13484
|
+
validationPassed: false,
|
|
13485
|
+
attempts: 0,
|
|
13486
|
+
selfCorrectionCount: 0,
|
|
13487
|
+
apiRetryCount,
|
|
13488
|
+
validationError: getErrorMessage(error)
|
|
13489
|
+
} }
|
|
13427
13490
|
};
|
|
13428
13491
|
}
|
|
13429
13492
|
}
|
|
@@ -14376,13 +14439,24 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
|
|
|
14376
14439
|
consola.info(t("command.extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
|
|
14377
14440
|
return {
|
|
14378
14441
|
text: result.text,
|
|
14379
|
-
inputProcessing
|
|
14442
|
+
inputProcessing,
|
|
14443
|
+
quality: { input: {
|
|
14444
|
+
kind: "image",
|
|
14445
|
+
textLength: result.text.length,
|
|
14446
|
+
emptyText: result.text.trim().length === 0,
|
|
14447
|
+
ocr: {
|
|
14448
|
+
confidence: result.confidence,
|
|
14449
|
+
textLength: result.text.length,
|
|
14450
|
+
platform: process.platform
|
|
14451
|
+
}
|
|
14452
|
+
} }
|
|
14380
14453
|
};
|
|
14381
14454
|
}
|
|
14382
14455
|
return {
|
|
14383
14456
|
text: "",
|
|
14384
14457
|
filePath,
|
|
14385
|
-
inputProcessing
|
|
14458
|
+
inputProcessing,
|
|
14459
|
+
quality: { input: { kind: "image" } }
|
|
14386
14460
|
};
|
|
14387
14461
|
}
|
|
14388
14462
|
if (inputProcessing.kind === "pdf") {
|
|
@@ -14403,15 +14477,36 @@ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
|
|
|
14403
14477
|
await fs.writeFile(fallbackMd, result.text);
|
|
14404
14478
|
consola.info(t("command.extract.file.markdownSaved", { path: fallbackMd }));
|
|
14405
14479
|
}
|
|
14480
|
+
const textLength = result.text.length;
|
|
14406
14481
|
return {
|
|
14407
14482
|
text: result.text,
|
|
14408
|
-
inputProcessing
|
|
14483
|
+
inputProcessing,
|
|
14484
|
+
quality: { input: {
|
|
14485
|
+
kind: "pdf",
|
|
14486
|
+
textLength,
|
|
14487
|
+
emptyText: result.text.trim().length === 0,
|
|
14488
|
+
pdf: {
|
|
14489
|
+
pageCount: result.pageCount,
|
|
14490
|
+
textLength,
|
|
14491
|
+
emptyText: result.text.trim().length === 0,
|
|
14492
|
+
fallbackUsed: result.metadata?.fallback === "true",
|
|
14493
|
+
converter: result.metadata?.converter ?? converter.name
|
|
14494
|
+
}
|
|
14495
|
+
} }
|
|
14496
|
+
};
|
|
14497
|
+
}
|
|
14498
|
+
if (inputProcessing.kind === "text") {
|
|
14499
|
+
const text$1 = await fs.readFile(filePath, "utf-8");
|
|
14500
|
+
return {
|
|
14501
|
+
text: text$1,
|
|
14502
|
+
inputProcessing,
|
|
14503
|
+
quality: { input: {
|
|
14504
|
+
kind: "text",
|
|
14505
|
+
textLength: text$1.length,
|
|
14506
|
+
emptyText: text$1.trim().length === 0
|
|
14507
|
+
} }
|
|
14409
14508
|
};
|
|
14410
14509
|
}
|
|
14411
|
-
if (inputProcessing.kind === "text") return {
|
|
14412
|
-
text: await fs.readFile(filePath, "utf-8"),
|
|
14413
|
-
inputProcessing
|
|
14414
|
-
};
|
|
14415
14510
|
throw new Error(unsupportedFileTypeMessage(inputProcessing.mime ?? "application/octet-stream"));
|
|
14416
14511
|
}
|
|
14417
14512
|
|
|
@@ -14606,7 +14701,9 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
14606
14701
|
}
|
|
14607
14702
|
return {
|
|
14608
14703
|
success: false,
|
|
14609
|
-
error: result.error || t("common.unknownError")
|
|
14704
|
+
error: result.error || t("common.unknownError"),
|
|
14705
|
+
quality: result.quality,
|
|
14706
|
+
failureStage: "ai_extraction"
|
|
14610
14707
|
};
|
|
14611
14708
|
}
|
|
14612
14709
|
if (!options?.quiet) s.stop(t("command.extract.file.extractComplete"));
|
|
@@ -14625,7 +14722,9 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
14625
14722
|
consola.error(dbError);
|
|
14626
14723
|
return {
|
|
14627
14724
|
success: false,
|
|
14628
|
-
error: dbError
|
|
14725
|
+
error: dbError,
|
|
14726
|
+
quality: result.quality,
|
|
14727
|
+
failureStage: "db_insert"
|
|
14629
14728
|
};
|
|
14630
14729
|
}
|
|
14631
14730
|
try {
|
|
@@ -14639,14 +14738,17 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
14639
14738
|
outputPath: result.outputPath,
|
|
14640
14739
|
data: result.data,
|
|
14641
14740
|
tablesInserted: insertResult.tablesInserted,
|
|
14642
|
-
tokensUsed: result.tokensUsed
|
|
14741
|
+
tokensUsed: result.tokensUsed,
|
|
14742
|
+
quality: result.quality
|
|
14643
14743
|
};
|
|
14644
14744
|
} else {
|
|
14645
14745
|
if (!options?.quiet) s2.stop(t("command.extract.file.dbInsertFail"));
|
|
14646
14746
|
consola.error(insertResult.error || t("common.unknownError"));
|
|
14647
14747
|
return {
|
|
14648
14748
|
success: false,
|
|
14649
|
-
error: insertResult.error
|
|
14749
|
+
error: insertResult.error,
|
|
14750
|
+
quality: result.quality,
|
|
14751
|
+
failureStage: "db_insert"
|
|
14650
14752
|
};
|
|
14651
14753
|
}
|
|
14652
14754
|
} finally {
|
|
@@ -14657,7 +14759,9 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
14657
14759
|
consola.error(e instanceof Error ? e.message : String(e));
|
|
14658
14760
|
return {
|
|
14659
14761
|
success: false,
|
|
14660
|
-
error: String(e)
|
|
14762
|
+
error: String(e),
|
|
14763
|
+
quality: result.quality,
|
|
14764
|
+
failureStage: "db_insert"
|
|
14661
14765
|
};
|
|
14662
14766
|
}
|
|
14663
14767
|
}
|
|
@@ -14665,13 +14769,29 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
14665
14769
|
success: true,
|
|
14666
14770
|
outputPath: result.outputPath,
|
|
14667
14771
|
data: result.data,
|
|
14668
|
-
tokensUsed: result.tokensUsed
|
|
14772
|
+
tokensUsed: result.tokensUsed,
|
|
14773
|
+
quality: result.quality
|
|
14669
14774
|
};
|
|
14670
14775
|
}
|
|
14671
14776
|
function formatInputProcessing$1(input) {
|
|
14672
14777
|
const handler = input.converter ? `${input.handler}(${input.converter})` : input.handler;
|
|
14673
14778
|
return `${input.mime ?? input.kind} -> ${handler}`;
|
|
14674
14779
|
}
|
|
14780
|
+
function mergeQuality(inputQuality, aiQuality) {
|
|
14781
|
+
if (!inputQuality && !aiQuality) return void 0;
|
|
14782
|
+
return {
|
|
14783
|
+
input: inputQuality?.input,
|
|
14784
|
+
ai: aiQuality?.ai
|
|
14785
|
+
};
|
|
14786
|
+
}
|
|
14787
|
+
function classifyInputError(error, inputProcessing) {
|
|
14788
|
+
if (inputProcessing?.handler === "pdf_converter") return "file_conversion";
|
|
14789
|
+
if (inputProcessing?.handler === "image_local_ocr") return "ocr";
|
|
14790
|
+
const message = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
|
14791
|
+
if (message.includes("ocr")) return "ocr";
|
|
14792
|
+
if (message.includes("pdf") || message.includes("converter")) return "file_conversion";
|
|
14793
|
+
return "input_detection";
|
|
14794
|
+
}
|
|
14675
14795
|
async function runAuditedExtraction(options) {
|
|
14676
14796
|
const { aiexDir, config, aiConfig, schemaName, source, modelOverride, retryOf, insert, force, quiet = false } = options;
|
|
14677
14797
|
let fileHash;
|
|
@@ -14713,7 +14833,9 @@ async function runAuditedExtraction(options) {
|
|
|
14713
14833
|
tablesInserted: existing.tablesInserted,
|
|
14714
14834
|
notionPages: existing.notionPages,
|
|
14715
14835
|
tokensUsed: existing.tokensUsed,
|
|
14716
|
-
inputProcessing: existing.inputProcessing
|
|
14836
|
+
inputProcessing: existing.inputProcessing,
|
|
14837
|
+
quality: existing.quality,
|
|
14838
|
+
failureStage: existing.failureStage
|
|
14717
14839
|
};
|
|
14718
14840
|
}
|
|
14719
14841
|
}
|
|
@@ -14732,17 +14854,22 @@ async function runAuditedExtraction(options) {
|
|
|
14732
14854
|
},
|
|
14733
14855
|
retryOf
|
|
14734
14856
|
});
|
|
14857
|
+
let inputProcessing;
|
|
14858
|
+
let inputQuality;
|
|
14735
14859
|
try {
|
|
14736
14860
|
let text$1 = "";
|
|
14737
14861
|
let filePath;
|
|
14738
|
-
let inputProcessing;
|
|
14739
14862
|
if (source.type === "file") {
|
|
14740
14863
|
const input = await readExtractFileInput(source.filePath, aiConfig, modelOverride);
|
|
14741
14864
|
text$1 = input.text;
|
|
14742
14865
|
filePath = input.filePath;
|
|
14743
14866
|
inputProcessing = input.inputProcessing;
|
|
14867
|
+
inputQuality = input.quality;
|
|
14744
14868
|
if (!quiet) consola.info(`Input: ${formatInputProcessing$1(inputProcessing)}`);
|
|
14745
|
-
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14869
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14870
|
+
inputProcessing,
|
|
14871
|
+
quality: inputQuality
|
|
14872
|
+
});
|
|
14746
14873
|
} else text$1 = source.text;
|
|
14747
14874
|
const r = await extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, {
|
|
14748
14875
|
quiet,
|
|
@@ -14760,6 +14887,8 @@ async function runAuditedExtraction(options) {
|
|
|
14760
14887
|
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
14761
14888
|
tablesInserted: r.tablesInserted,
|
|
14762
14889
|
tokensUsed: r.tokensUsed,
|
|
14890
|
+
quality: mergeQuality(inputQuality, r.quality),
|
|
14891
|
+
failureStage: "integration",
|
|
14763
14892
|
error: error instanceof Error ? error.message : String(error)
|
|
14764
14893
|
});
|
|
14765
14894
|
if (!quiet) consola.error(t("command.extract.file.notionSyncFail", { error: error instanceof Error ? error.message : String(error) }));
|
|
@@ -14769,7 +14898,9 @@ async function runAuditedExtraction(options) {
|
|
|
14769
14898
|
error: error instanceof Error ? error.message : String(error),
|
|
14770
14899
|
auditId: audit.id,
|
|
14771
14900
|
fileHash,
|
|
14772
|
-
inputProcessing
|
|
14901
|
+
inputProcessing,
|
|
14902
|
+
quality: mergeQuality(inputQuality, r.quality),
|
|
14903
|
+
failureStage: "integration"
|
|
14773
14904
|
};
|
|
14774
14905
|
}
|
|
14775
14906
|
const updated = await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
@@ -14778,7 +14909,8 @@ async function runAuditedExtraction(options) {
|
|
|
14778
14909
|
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
14779
14910
|
tablesInserted: r.tablesInserted,
|
|
14780
14911
|
notionPages,
|
|
14781
|
-
tokensUsed: r.tokensUsed
|
|
14912
|
+
tokensUsed: r.tokensUsed,
|
|
14913
|
+
quality: mergeQuality(inputQuality, r.quality)
|
|
14782
14914
|
});
|
|
14783
14915
|
await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.success", source, r.data, void 0, r.tokensUsed, quiet);
|
|
14784
14916
|
return {
|
|
@@ -14790,12 +14922,16 @@ async function runAuditedExtraction(options) {
|
|
|
14790
14922
|
tokensUsed: updated.tokensUsed,
|
|
14791
14923
|
auditId: updated.id,
|
|
14792
14924
|
fileHash,
|
|
14793
|
-
inputProcessing: updated.inputProcessing
|
|
14925
|
+
inputProcessing: updated.inputProcessing,
|
|
14926
|
+
quality: updated.quality,
|
|
14927
|
+
failureStage: updated.failureStage
|
|
14794
14928
|
};
|
|
14795
14929
|
} else {
|
|
14796
14930
|
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14797
14931
|
status: "failed",
|
|
14798
|
-
error: r.error || "Extraction failed"
|
|
14932
|
+
error: r.error || "Extraction failed",
|
|
14933
|
+
quality: mergeQuality(inputQuality, r.quality),
|
|
14934
|
+
failureStage: r.failureStage ?? "ai_extraction"
|
|
14799
14935
|
});
|
|
14800
14936
|
if (!quiet) consola.error(t("command.extract.file.extractionFailed", { error: r.error }));
|
|
14801
14937
|
await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, r.error || "Extraction failed", void 0, quiet);
|
|
@@ -14804,13 +14940,18 @@ async function runAuditedExtraction(options) {
|
|
|
14804
14940
|
error: r.error,
|
|
14805
14941
|
auditId: audit.id,
|
|
14806
14942
|
fileHash,
|
|
14807
|
-
inputProcessing
|
|
14943
|
+
inputProcessing,
|
|
14944
|
+
quality: mergeQuality(inputQuality, r.quality),
|
|
14945
|
+
failureStage: r.failureStage ?? "ai_extraction"
|
|
14808
14946
|
};
|
|
14809
14947
|
}
|
|
14810
14948
|
} catch (e) {
|
|
14949
|
+
const failureStage = classifyInputError(e, inputProcessing);
|
|
14811
14950
|
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14812
14951
|
status: "failed",
|
|
14813
|
-
error: e instanceof Error ? e.message : String(e)
|
|
14952
|
+
error: e instanceof Error ? e.message : String(e),
|
|
14953
|
+
quality: inputQuality,
|
|
14954
|
+
failureStage
|
|
14814
14955
|
});
|
|
14815
14956
|
if (!quiet) {
|
|
14816
14957
|
const name$1 = source.type === "file" ? path.basename(source.filePath) : "text input";
|
|
@@ -14824,7 +14965,10 @@ async function runAuditedExtraction(options) {
|
|
|
14824
14965
|
success: false,
|
|
14825
14966
|
error: e instanceof Error ? e.message : String(e),
|
|
14826
14967
|
auditId: audit.id,
|
|
14827
|
-
fileHash
|
|
14968
|
+
fileHash,
|
|
14969
|
+
inputProcessing,
|
|
14970
|
+
quality: inputQuality,
|
|
14971
|
+
failureStage
|
|
14828
14972
|
};
|
|
14829
14973
|
}
|
|
14830
14974
|
}
|
|
@@ -15013,6 +15157,13 @@ function formatInputProcessing(input) {
|
|
|
15013
15157
|
const handler = input.converter ? `${input.handler}(${input.converter})` : input.handler;
|
|
15014
15158
|
return ` [${input.mime ?? input.kind} -> ${handler}]`;
|
|
15015
15159
|
}
|
|
15160
|
+
function formatQuality(quality, failureStage) {
|
|
15161
|
+
if (failureStage) return ` [failed:${failureStage}]`;
|
|
15162
|
+
if (quality?.input?.pdf) return ` [pdf:${quality.input.pdf.pageCount}p/${quality.input.pdf.textLength}chars${quality.input.pdf.fallbackUsed ? "/fallback" : ""}]`;
|
|
15163
|
+
if (quality?.input?.ocr) return ` [ocr:${Math.round(quality.input.ocr.confidence * 100)}%/${quality.input.ocr.textLength}chars]`;
|
|
15164
|
+
if (quality?.ai?.missingFieldRate !== void 0) return ` [missing:${Math.round(quality.ai.missingFieldRate * 100)}%]`;
|
|
15165
|
+
return "";
|
|
15166
|
+
}
|
|
15016
15167
|
async function loadConfiguredAI(aiexDir) {
|
|
15017
15168
|
const aiConfig = await readAIConfig(aiexDir);
|
|
15018
15169
|
if (!aiConfig) {
|
|
@@ -15055,7 +15206,7 @@ const historyCommand = defineCommand({
|
|
|
15055
15206
|
}
|
|
15056
15207
|
for (const record of records) {
|
|
15057
15208
|
const suffix = record.error ? ` — ${record.error}` : record.outputName ? ` — ${record.outputName}` : "";
|
|
15058
|
-
consola.info(`${record.status.padEnd(9)} ${record.id} ${record.schemaName} ${formatSource(record.source)}${formatInputProcessing(record.inputProcessing)}${suffix}`);
|
|
15209
|
+
consola.info(`${record.status.padEnd(9)} ${record.id} ${record.schemaName} ${formatSource(record.source)}${formatInputProcessing(record.inputProcessing)}${formatQuality(record.quality, record.failureStage)}${suffix}`);
|
|
15059
15210
|
}
|
|
15060
15211
|
}
|
|
15061
15212
|
});
|
|
@@ -15982,7 +16133,9 @@ async function listExtractions(config) {
|
|
|
15982
16133
|
notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
|
|
15983
16134
|
notionPages,
|
|
15984
16135
|
notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0,
|
|
15985
|
-
inputProcessing: audit?.inputProcessing
|
|
16136
|
+
inputProcessing: audit?.inputProcessing,
|
|
16137
|
+
quality: audit?.quality,
|
|
16138
|
+
failureStage: audit?.failureStage
|
|
15986
16139
|
});
|
|
15987
16140
|
} catch {
|
|
15988
16141
|
continue;
|
|
@@ -16349,7 +16502,10 @@ function extractRoutes(config) {
|
|
|
16349
16502
|
if (!result.success) return jsonResponse({
|
|
16350
16503
|
success: false,
|
|
16351
16504
|
error: result.error,
|
|
16352
|
-
auditId: result.auditId
|
|
16505
|
+
auditId: result.auditId,
|
|
16506
|
+
inputProcessing: result.inputProcessing,
|
|
16507
|
+
quality: result.quality,
|
|
16508
|
+
failureStage: result.failureStage
|
|
16353
16509
|
}, 500);
|
|
16354
16510
|
return jsonResponse({
|
|
16355
16511
|
success: true,
|
|
@@ -16359,7 +16515,9 @@ function extractRoutes(config) {
|
|
|
16359
16515
|
notionPages: result.notionPages,
|
|
16360
16516
|
tokensUsed: result.tokensUsed,
|
|
16361
16517
|
auditId: result.auditId,
|
|
16362
|
-
inputProcessing: result.inputProcessing
|
|
16518
|
+
inputProcessing: result.inputProcessing,
|
|
16519
|
+
quality: result.quality,
|
|
16520
|
+
failureStage: result.failureStage
|
|
16363
16521
|
}, 200);
|
|
16364
16522
|
} catch (error) {
|
|
16365
16523
|
if (isMissingUploadFileError(error)) return c.json({
|
|
@@ -16417,7 +16575,10 @@ function extractRoutes(config) {
|
|
|
16417
16575
|
if (!result.success) return jsonResponse({
|
|
16418
16576
|
success: false,
|
|
16419
16577
|
error: result.error,
|
|
16420
|
-
auditId: result.auditId
|
|
16578
|
+
auditId: result.auditId,
|
|
16579
|
+
inputProcessing: result.inputProcessing,
|
|
16580
|
+
quality: result.quality,
|
|
16581
|
+
failureStage: result.failureStage
|
|
16421
16582
|
}, 500);
|
|
16422
16583
|
return jsonResponse({
|
|
16423
16584
|
success: true,
|
|
@@ -16427,7 +16588,9 @@ function extractRoutes(config) {
|
|
|
16427
16588
|
notionPages: result.notionPages,
|
|
16428
16589
|
tokensUsed: result.tokensUsed,
|
|
16429
16590
|
auditId: result.auditId,
|
|
16430
|
-
inputProcessing: result.inputProcessing
|
|
16591
|
+
inputProcessing: result.inputProcessing,
|
|
16592
|
+
quality: result.quality,
|
|
16593
|
+
failureStage: result.failureStage
|
|
16431
16594
|
}, 200);
|
|
16432
16595
|
});
|
|
16433
16596
|
app.delete("/extract/records/:id", async (c) => {
|
|
@@ -74,7 +74,7 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
74
74
|
//#endregion
|
|
75
75
|
//#region package.json
|
|
76
76
|
var name = "aiex-cli";
|
|
77
|
-
var version = "0.0.6-beta.
|
|
77
|
+
var version = "0.0.6-beta.3";
|
|
78
78
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
79
79
|
var package_default = {
|
|
80
80
|
name,
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { D as buildDoctorDiagnostics, O as doctorDiagnosticsTableRows, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, k as formatDoctorDiagnosticsJson, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-
|
|
1
|
+
import { D as buildDoctorDiagnostics, O as doctorDiagnosticsTableRows, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, k as formatDoctorDiagnosticsJson, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics } from "./doctor-collector-abgpqc5T.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
import{A as e,H as t,O as n,S as r,T as i,W as a,gt as o,ht as s,lt as c,n as l,r as u,tt as d,w as f,x as p,yt as m}from"./vue-i18n-Du42D0vb.js";import{_ as h,p as g,r as _}from"./api-client-b4ZBXpNH.js";var v={class:`flex h-full min-w-0 overflow-hidden`},y={key:0,class:`flex-1 flex flex-col items-center justify-center text-muted-foreground`},b={class:`text-sm`},x={key:1,class:`flex-1 flex items-center justify-center text-muted-foreground`},S={key:2,class:`flex min-h-0 min-w-0 flex-1 flex-col overflow-hidden p-4`},C={class:`mb-4 flex shrink-0 flex-wrap items-center justify-between gap-2`},w={class:`m-0 text-lg font-semibold text-foreground`},T={class:`flex shrink-0 flex-wrap items-center justify-end gap-2`},E={key:0,class:`rounded bg-secondary px-2 py-1 text-xs font-medium text-muted-foreground`},D={key:1,class:`rounded bg-secondary px-2 py-1 text-xs font-medium text-muted-foreground`},O={class:`flex-1 min-h-0 overflow-auto`},k={class:`text-sm font-mono whitespace-pre-wrap text-foreground bg-secondary border border-border rounded-lg p-4`},A=e({__name:`ExtractionViewer`,props:{extractionName:{},record:{}},emits:[`notionSynced`],setup(e,{emit:A}){let j=e,M=A,{t:N}=l(),P=c(``),F=c(!1),I=c(!1),L=p(()=>j.record?.notionStatus===`synced`?N(`app.notionSynced`):j.record?.notionStatus===`failed`?N(`app.retryNotion`):N(`app.syncNotion`)),R=p(()=>G(j.record?.inputProcessing)),z=p(()=>q(j.record));async function B(){if(j.extractionName){F.value=!0,P.value=``;try{let e=await _(j.extractionName);e.success&&e.content?P.value=e.content:h.error(e.error||N(`app.failedToLoadExtraction`))}catch{h.error(N(`app.failedToLoadExtraction`))}F.value=!1}}function V(){if(!j.extractionName||!P.value)return;let e=new Blob([P.value],{type:`application/json`}),t=URL.createObjectURL(e),n=document.createElement(`a`);n.href=t,n.download=j.extractionName,document.body.appendChild(n),n.click(),document.body.removeChild(n),URL.revokeObjectURL(t)}async function H(){if(j.extractionName){I.value=!0;try{let e=await g(j.extractionName);h.success(N(`app.notionSyncedToNotionDetail`,{count:e.notionPages?.length??0})),M(`notionSynced`)}catch(e){h.error(e instanceof Error?e.message:N(`app.notionSyncFailed`))}I.value=!1}}function U(e){return N(e===`synced`?`app.notionStatusSynced`:e===`failed`?`app.notionStatusFailed`:`app.notionStatusNotSynced`)}function W(e){return e.handler===`image_vision`?`Vision`:e.handler===`image_local_ocr`?`Local OCR`:e.handler===`pdf_converter`?e.converter?`PDF ${e.converter}`:`PDF converter`:`Text`}function G(e){return e?`${e.mime??e.kind} -> ${W(e)}`:``}function K(e){return`${Math.round(e*100)}%`}function q(e){let t=[],n=e?.quality;if(n?.input?.pdf){let e=n.input.pdf;t.push(`PDF ${e.pageCount}p/${e.textLength} chars${e.fallbackUsed?`/fallback`:``}`)}else n?.input?.ocr?t.push(`OCR ${K(n.input.ocr.confidence)}/${n.input.ocr.textLength} chars`):n?.input?.textLength!==void 0&&t.push(`Text ${n.input.textLength} chars`);return n?.ai&&(t.push(`AI ${n.ai.attempts} attempt${n.ai.attempts===1?``:`s`}`),n.ai.missingFieldRate!==void 0&&t.push(`missing ${K(n.ai.missingFieldRate)}`)),e?.failureStage&&t.push(`failed at ${e.failureStage}`),t.join(` · `)}function J(e){try{return JSON.stringify(JSON.parse(e),null,2)}catch{return e}}return d(()=>j.extractionName,B),t(B),(t,c)=>(a(),i(`div`,v,[e.extractionName?F.value?(a(),i(`div`,x,m(t.$t(`app.loading`)),1)):(a(),i(`div`,S,[r(`div`,C,[r(`h2`,w,m(e.extractionName),1),r(`div`,T,[R.value?(a(),i(`span`,E,m(R.value),1)):f(``,!0),z.value?(a(),i(`span`,D,m(z.value),1)):f(``,!0),e.record?(a(),i(`span`,{key:2,class:o([`rounded px-2 py-1 text-xs font-medium`,[e.record.notionStatus===`synced`?`bg-green-500/10 text-green-700`:e.record.notionStatus===`failed`?`bg-red-500/10 text-red-700`:`bg-secondary text-muted-foreground`]])},m(U(e.record.notionStatus)),3)):f(``,!0),n(s(u),{icon:`pi pi-refresh`,label:L.value,severity:`secondary`,size:`small`,loading:I.value,disabled:e.record?.notionStatus===`synced`,onClick:H},null,8,[`label`,`loading`,`disabled`]),n(s(u),{icon:`pi pi-download`,label:t.$t(`app.download`),severity:`secondary`,size:`small`,onClick:V},null,8,[`label`])])]),r(`div`,O,[r(`pre`,k,m(J(P.value)),1)])])):(a(),i(`div`,y,[c[0]||=r(`i`,{class:`pi pi-file text-4xl mb-3 opacity-50`},null,-1),r(`p`,b,m(t.$t(`app.selectExtraction`)),1)]))]))}});export{A as default};
|