aiex-cli 0.0.1-beta.28 → 0.0.1-beta.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +151 -26
- package/dist/core/schema-sqlite/migrate-helper.mjs +15 -28
- package/dist/{doctor-collector-D2q6iD_e.mjs → doctor-collector-j2dG7dG1.mjs} +41 -4
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-Dn58ZHhM.js +339 -0
- package/dist/web/assets/DataBrowser-B6WECCZM.js +6 -0
- package/dist/web/assets/{ExtractionViewer-WMUdXeyU.js → ExtractionViewer-CTS1RXzc.js} +1 -1
- package/dist/web/assets/JsonSchemaEditor-Dpgu6HPz.js +570 -0
- package/dist/web/assets/api-client-CbQEkaKT.js +1 -0
- package/dist/web/assets/dialog-CUkPLPNP.js +109 -0
- package/dist/web/assets/{index-CuOQk7nB.js → index-g2pWXPQZ.js} +38 -38
- package/dist/web/assets/object-utils-DPPzLQjH.js +1 -0
- package/dist/web/assets/select-DyjIzt-v.js +439 -0
- package/dist/web/index.html +5 -5
- package/package.json +5 -1
- package/src/core/schema-sqlite/migrate-helper.ts +15 -40
- package/dist/web/assets/AISettings-DFi-nXIi.js +0 -334
- package/dist/web/assets/DataBrowser-BWSX8O2h.js +0 -5
- package/dist/web/assets/JsonSchemaEditor-B57coz1O.js +0 -929
- package/dist/web/assets/api-client-By2rWtpv.js +0 -1
- package/dist/web/assets/dialog-dMXSeJQQ.js +0 -108
- package/dist/web/assets/overlayeventbus-CRKW6UCj.js +0 -80
- package/dist/web/assets/table-schema-C90NJyfq.js +0 -2
- /package/dist/web/assets/{runtime-dom.esm-bundler-DmdkgxQM.js → runtime-dom.esm-bundler-ei_N7Xjw.js} +0 -0
package/dist/cli.mjs
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as doctorDiagnosticsTableRows, _ as seedConfig, a as parseJsonSchema, b as package_default, c as getDefaultAIConfig, d as DEFAULT_MINERU_CONFIG, f as DEFAULT_PROMPT_CONFIG, g as createConfig, h as AIConfigSchema, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_TEXT, n as createMigrationConfig, o as toSnakeCase, p as PLACEHOLDER_SCHEMA, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as description, w as formatDoctorDiagnosticsJson, x as version, y as name } from "./doctor-collector-j2dG7dG1.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
|
+
import os from "node:os";
|
|
4
5
|
import path from "node:path";
|
|
5
6
|
import process from "node:process";
|
|
6
7
|
import { ZodError } from "zod";
|
|
@@ -15,9 +16,11 @@ import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
|
15
16
|
import { LangfuseSpanProcessor } from "@langfuse/otel";
|
|
16
17
|
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
17
18
|
import { APICallError, Output, generateText, jsonSchema } from "ai";
|
|
19
|
+
import pRetry from "p-retry";
|
|
18
20
|
import fs$1 from "node:fs";
|
|
19
21
|
import Database from "better-sqlite3";
|
|
20
22
|
import picomatch from "picomatch";
|
|
23
|
+
import { execa } from "execa";
|
|
21
24
|
import { Buffer } from "node:buffer";
|
|
22
25
|
import { extractText, getMeta } from "unpdf";
|
|
23
26
|
import { execFile } from "node:child_process";
|
|
@@ -12780,23 +12783,25 @@ function lookupModelCapabilities(modelName) {
|
|
|
12780
12783
|
//#endregion
|
|
12781
12784
|
//#region src/utils/retry.ts
|
|
12782
12785
|
async function withRetry(fn, onRetry, maxRetries = 5) {
|
|
12783
|
-
|
|
12784
|
-
|
|
12785
|
-
|
|
12786
|
-
|
|
12787
|
-
|
|
12788
|
-
|
|
12789
|
-
|
|
12790
|
-
|
|
12791
|
-
|
|
12792
|
-
|
|
12793
|
-
|
|
12794
|
-
|
|
12795
|
-
|
|
12796
|
-
|
|
12797
|
-
|
|
12798
|
-
|
|
12799
|
-
|
|
12786
|
+
return pRetry(async () => fn(), {
|
|
12787
|
+
retries: maxRetries,
|
|
12788
|
+
factor: 2,
|
|
12789
|
+
minTimeout: 1e3,
|
|
12790
|
+
randomize: true,
|
|
12791
|
+
onFailedAttempt({ error, attemptNumber, retriesLeft }) {
|
|
12792
|
+
if (!(error instanceof APICallError) || !error.isRetryable || retriesLeft <= 0) return;
|
|
12793
|
+
const baseDelayMs = 1e3 * 2 ** (attemptNumber - 1);
|
|
12794
|
+
onRetry?.({
|
|
12795
|
+
attempt: attemptNumber,
|
|
12796
|
+
maxRetries,
|
|
12797
|
+
delayMs: baseDelayMs,
|
|
12798
|
+
statusCode: error.statusCode
|
|
12799
|
+
});
|
|
12800
|
+
},
|
|
12801
|
+
shouldRetry({ error }) {
|
|
12802
|
+
return error instanceof APICallError && error.isRetryable;
|
|
12803
|
+
}
|
|
12804
|
+
});
|
|
12800
12805
|
}
|
|
12801
12806
|
|
|
12802
12807
|
//#endregion
|
|
@@ -13373,6 +13378,97 @@ async function savePromptSnapshot(schema, aiexDir) {
|
|
|
13373
13378
|
return outputPath;
|
|
13374
13379
|
}
|
|
13375
13380
|
|
|
13381
|
+
//#endregion
|
|
13382
|
+
//#region src/core/pdf-converter/external.ts
|
|
13383
|
+
function applyTemplate(value, context) {
|
|
13384
|
+
return value.replaceAll("{input}", context.input).replaceAll("{outputDir}", context.outputDir).replaceAll("{basename}", context.basename);
|
|
13385
|
+
}
|
|
13386
|
+
function isError(error) {
|
|
13387
|
+
return error instanceof Error;
|
|
13388
|
+
}
|
|
13389
|
+
async function pathExists(filePath) {
|
|
13390
|
+
try {
|
|
13391
|
+
await fs.access(filePath);
|
|
13392
|
+
return true;
|
|
13393
|
+
} catch {
|
|
13394
|
+
return false;
|
|
13395
|
+
}
|
|
13396
|
+
}
|
|
13397
|
+
async function collectMarkdownFiles(dir) {
|
|
13398
|
+
const entries = await fs.readdir(dir, { withFileTypes: true });
|
|
13399
|
+
const files = [];
|
|
13400
|
+
for (const entry of entries) {
|
|
13401
|
+
const entryPath = path.join(dir, entry.name);
|
|
13402
|
+
if (entry.isDirectory()) {
|
|
13403
|
+
files.push(...await collectMarkdownFiles(entryPath));
|
|
13404
|
+
continue;
|
|
13405
|
+
}
|
|
13406
|
+
if (entry.isFile() && entry.name.toLowerCase().endsWith(".md")) files.push(entryPath);
|
|
13407
|
+
}
|
|
13408
|
+
return files.sort();
|
|
13409
|
+
}
|
|
13410
|
+
async function selectMarkdownFile(outputDir, basename) {
|
|
13411
|
+
const files = await collectMarkdownFiles(outputDir);
|
|
13412
|
+
if (files.length === 0) throw new Error(`External PDF converter did not produce a markdown file in ${outputDir}`);
|
|
13413
|
+
const preferredName = `${basename}.md`.toLowerCase();
|
|
13414
|
+
return files.find((file) => path.basename(file).toLowerCase() === preferredName) ?? files[0];
|
|
13415
|
+
}
|
|
13416
|
+
function formatCommandError(error, command$1) {
|
|
13417
|
+
if (!isError(error)) return new Error(String(error));
|
|
13418
|
+
const details = [`External PDF converter failed: ${command$1}`];
|
|
13419
|
+
if ("exitCode" in error && typeof error.exitCode === "number") details.push(`exitCode=${error.exitCode}`);
|
|
13420
|
+
if ("signal" in error && error.signal) details.push(`signal=${String(error.signal)}`);
|
|
13421
|
+
if ("stderr" in error && typeof error.stderr === "string" && error.stderr.trim()) details.push(error.stderr.trim());
|
|
13422
|
+
else if (error.message) details.push(error.message);
|
|
13423
|
+
return new Error(details.join("\n"));
|
|
13424
|
+
}
|
|
13425
|
+
var ExternalCommandPdfConverter = class {
|
|
13426
|
+
name;
|
|
13427
|
+
constructor(name$1, config) {
|
|
13428
|
+
this.config = config;
|
|
13429
|
+
this.name = name$1;
|
|
13430
|
+
}
|
|
13431
|
+
async convert(input, filePath) {
|
|
13432
|
+
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "aiex-pdf-"));
|
|
13433
|
+
const outputDir = path.join(tempRoot, "output");
|
|
13434
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
13435
|
+
const inputPath = filePath ?? path.join(tempRoot, "input.pdf");
|
|
13436
|
+
if (!filePath) await fs.writeFile(inputPath, input);
|
|
13437
|
+
const basename = path.basename(inputPath, path.extname(inputPath));
|
|
13438
|
+
const context = {
|
|
13439
|
+
input: inputPath,
|
|
13440
|
+
outputDir,
|
|
13441
|
+
basename
|
|
13442
|
+
};
|
|
13443
|
+
const args = this.config.args.map((arg) => applyTemplate(arg, context));
|
|
13444
|
+
const timeoutMs = (this.config.timeout ?? 600) * 1e3;
|
|
13445
|
+
try {
|
|
13446
|
+
await execa(this.config.command, args, {
|
|
13447
|
+
shell: false,
|
|
13448
|
+
timeout: timeoutMs,
|
|
13449
|
+
maxBuffer: 1024 * 1024 * 20
|
|
13450
|
+
});
|
|
13451
|
+
const outputPath = this.config.outputFile ? applyTemplate(this.config.outputFile, context) : await selectMarkdownFile(outputDir, basename);
|
|
13452
|
+
if (!await pathExists(outputPath)) throw new Error(`External PDF converter output was not found: ${outputPath}`);
|
|
13453
|
+
return {
|
|
13454
|
+
text: await fs.readFile(outputPath, "utf-8"),
|
|
13455
|
+
pageCount: 0,
|
|
13456
|
+
metadata: {
|
|
13457
|
+
converter: this.name,
|
|
13458
|
+
outputPath
|
|
13459
|
+
}
|
|
13460
|
+
};
|
|
13461
|
+
} catch (error) {
|
|
13462
|
+
throw formatCommandError(error, `${this.config.command} ${args.join(" ")}`);
|
|
13463
|
+
} finally {
|
|
13464
|
+
await fs.rm(tempRoot, {
|
|
13465
|
+
recursive: true,
|
|
13466
|
+
force: true
|
|
13467
|
+
});
|
|
13468
|
+
}
|
|
13469
|
+
}
|
|
13470
|
+
};
|
|
13471
|
+
|
|
13376
13472
|
//#endregion
|
|
13377
13473
|
//#region src/core/pdf-converter/unpdf.ts
|
|
13378
13474
|
var UnpdfConverter = class {
|
|
@@ -13391,10 +13487,40 @@ var UnpdfConverter = class {
|
|
|
13391
13487
|
//#endregion
|
|
13392
13488
|
//#region src/core/pdf-converter/factory.ts
|
|
13393
13489
|
const registry = /* @__PURE__ */ new Map();
|
|
13394
|
-
|
|
13395
|
-
|
|
13490
|
+
var FallbackPdfConverter = class {
|
|
13491
|
+
name;
|
|
13492
|
+
constructor(primary, fallback) {
|
|
13493
|
+
this.primary = primary;
|
|
13494
|
+
this.fallback = fallback;
|
|
13495
|
+
this.name = primary.name;
|
|
13496
|
+
}
|
|
13497
|
+
async convert(input, filePath) {
|
|
13498
|
+
try {
|
|
13499
|
+
return await this.primary.convert(input, filePath);
|
|
13500
|
+
} catch {
|
|
13501
|
+
return await this.fallback.convert(input, filePath);
|
|
13502
|
+
}
|
|
13503
|
+
}
|
|
13504
|
+
};
|
|
13505
|
+
function withFallback(converter, config) {
|
|
13506
|
+
if (!config.fallbackToUnpdf) return converter;
|
|
13507
|
+
return new FallbackPdfConverter(converter, new UnpdfConverter());
|
|
13508
|
+
}
|
|
13509
|
+
function createPdfConverter(config) {
|
|
13510
|
+
if (typeof config === "object") {
|
|
13511
|
+
if (config.converter === "mineru") {
|
|
13512
|
+
const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
|
|
13513
|
+
return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
|
|
13514
|
+
}
|
|
13515
|
+
if (config.converter === "external") {
|
|
13516
|
+
if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
|
|
13517
|
+
return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
|
|
13518
|
+
}
|
|
13519
|
+
}
|
|
13520
|
+
const key = typeof config === "string" ? config : "unpdf";
|
|
13396
13521
|
let instance = registry.get(key);
|
|
13397
13522
|
if (!instance) {
|
|
13523
|
+
if (key !== "unpdf") throw new Error(`PDF converter "${key}" requires configuration.`);
|
|
13398
13524
|
instance = new UnpdfConverter();
|
|
13399
13525
|
registry.set(key, instance);
|
|
13400
13526
|
}
|
|
@@ -13425,7 +13551,6 @@ const SUPPORTED_EXTENSIONS = new Set([
|
|
|
13425
13551
|
"yml"
|
|
13426
13552
|
]);
|
|
13427
13553
|
const JSON_EXT_RE = /\.json$/;
|
|
13428
|
-
const PDF_CONVERTER = createPdfConverter();
|
|
13429
13554
|
async function ensureDatabaseReady(dbPath, schema) {
|
|
13430
13555
|
try {
|
|
13431
13556
|
await fs.access(dbPath);
|
|
@@ -13490,7 +13615,7 @@ async function listSchemas(aiexDir) {
|
|
|
13490
13615
|
return [];
|
|
13491
13616
|
}
|
|
13492
13617
|
}
|
|
13493
|
-
async function readExtractFileInput(filePath) {
|
|
13618
|
+
async function readExtractFileInput(filePath, aiConfig) {
|
|
13494
13619
|
const ext = path.extname(filePath).toLowerCase().replace(".", "");
|
|
13495
13620
|
if (FILE_PART_EXTENSIONS.has(ext)) return {
|
|
13496
13621
|
text: "",
|
|
@@ -13498,7 +13623,7 @@ async function readExtractFileInput(filePath) {
|
|
|
13498
13623
|
};
|
|
13499
13624
|
if (ext === "pdf") {
|
|
13500
13625
|
const buffer = await fs.readFile(filePath);
|
|
13501
|
-
const result = await
|
|
13626
|
+
const result = await createPdfConverter(aiConfig?.pdf).convert(buffer, filePath);
|
|
13502
13627
|
consola.info(`Extracted ${result.pageCount} page(s) from PDF`);
|
|
13503
13628
|
return { text: result.text };
|
|
13504
13629
|
}
|
|
@@ -13581,7 +13706,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
13581
13706
|
}
|
|
13582
13707
|
async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride) {
|
|
13583
13708
|
try {
|
|
13584
|
-
const input = await readExtractFileInput(filePath);
|
|
13709
|
+
const input = await readExtractFileInput(filePath, aiConfig);
|
|
13585
13710
|
const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, { quiet: false });
|
|
13586
13711
|
if (r.success) {
|
|
13587
13712
|
consola.success(`Processed: ${path.basename(filePath)}`);
|
|
@@ -13739,7 +13864,7 @@ const extractCommand = defineCommand({
|
|
|
13739
13864
|
let text$1 = "";
|
|
13740
13865
|
let filePath;
|
|
13741
13866
|
if (args.file) try {
|
|
13742
|
-
const input = await readExtractFileInput(args.file);
|
|
13867
|
+
const input = await readExtractFileInput(args.file, aiConfig);
|
|
13743
13868
|
text$1 = input.text;
|
|
13744
13869
|
filePath = input.filePath;
|
|
13745
13870
|
} catch (e) {
|
|
@@ -13820,7 +13945,7 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
13820
13945
|
}
|
|
13821
13946
|
const fp = filePathStr;
|
|
13822
13947
|
try {
|
|
13823
|
-
const input = await readExtractFileInput(fp);
|
|
13948
|
+
const input = await readExtractFileInput(fp, aiConfig);
|
|
13824
13949
|
return (await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride)).success;
|
|
13825
13950
|
} catch (e) {
|
|
13826
13951
|
consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
|
|
@@ -5,6 +5,7 @@ import process from "node:process";
|
|
|
5
5
|
import { fileURLToPath } from "node:url";
|
|
6
6
|
import Database from "better-sqlite3";
|
|
7
7
|
import * as esbuild from "esbuild";
|
|
8
|
+
import lockfile from "proper-lockfile";
|
|
8
9
|
|
|
9
10
|
//#region src/core/schema-sqlite/migration-name.ts
|
|
10
11
|
function sanitizeMigrationName(name) {
|
|
@@ -119,35 +120,22 @@ function applyMigrationWithTransaction(dbPath, sqlStatements) {
|
|
|
119
120
|
}
|
|
120
121
|
}
|
|
121
122
|
const LOCK_FILE = ".migrate.lock";
|
|
122
|
-
async function
|
|
123
|
-
const lockPath = path.join(aiexDir, LOCK_FILE);
|
|
123
|
+
async function acquireMigrationLock(aiexDir) {
|
|
124
124
|
await fs.mkdir(aiexDir, { recursive: true });
|
|
125
125
|
try {
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
return acquireLock(aiexDir);
|
|
138
|
-
}
|
|
139
|
-
throw new Error(`Migration is already running (PID ${lockPid}, started ${Math.round(lockAge / 1e3)}s ago). Wait for it to complete or remove ${lockPath} if stale.`);
|
|
140
|
-
} catch {
|
|
141
|
-
await fs.unlink(lockPath).catch(() => {});
|
|
142
|
-
return acquireLock(aiexDir);
|
|
143
|
-
}
|
|
144
|
-
throw e;
|
|
126
|
+
return await lockfile.lock(aiexDir, {
|
|
127
|
+
lockfilePath: path.join(aiexDir, LOCK_FILE),
|
|
128
|
+
realpath: false,
|
|
129
|
+
stale: 3e5,
|
|
130
|
+
update: 1e4,
|
|
131
|
+
retries: 0
|
|
132
|
+
});
|
|
133
|
+
} catch (error) {
|
|
134
|
+
const lockPath = path.join(aiexDir, LOCK_FILE);
|
|
135
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
136
|
+
throw new Error(`Migration is already running or the lock could not be acquired. Wait for it to complete or remove ${lockPath} if stale. ${message}`);
|
|
145
137
|
}
|
|
146
138
|
}
|
|
147
|
-
async function releaseLock(aiexDir) {
|
|
148
|
-
const lockPath = path.join(aiexDir, LOCK_FILE);
|
|
149
|
-
await fs.unlink(lockPath).catch(() => {});
|
|
150
|
-
}
|
|
151
139
|
async function main() {
|
|
152
140
|
const args = process.argv.slice(2);
|
|
153
141
|
const schemaPath = args[0];
|
|
@@ -159,8 +147,7 @@ async function main() {
|
|
|
159
147
|
process.exit(1);
|
|
160
148
|
}
|
|
161
149
|
try {
|
|
162
|
-
const
|
|
163
|
-
await acquireLock(aiexDir);
|
|
150
|
+
const releaseLock = await acquireMigrationLock(path.dirname(path.dirname(migrationsPath)));
|
|
164
151
|
try {
|
|
165
152
|
const exports = await loadSchemaExports(schemaPath);
|
|
166
153
|
let dbMissing = false;
|
|
@@ -188,7 +175,7 @@ async function main() {
|
|
|
188
175
|
tag
|
|
189
176
|
}));
|
|
190
177
|
} finally {
|
|
191
|
-
await releaseLock(
|
|
178
|
+
await releaseLock();
|
|
192
179
|
}
|
|
193
180
|
} catch (error) {
|
|
194
181
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -64,7 +64,7 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
64
64
|
//#endregion
|
|
65
65
|
//#region package.json
|
|
66
66
|
var name = "aiex-cli";
|
|
67
|
-
var version = "0.0.1-beta.
|
|
67
|
+
var version = "0.0.1-beta.29";
|
|
68
68
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
69
69
|
var package_default = {
|
|
70
70
|
name,
|
|
@@ -137,9 +137,12 @@ var package_default = {
|
|
|
137
137
|
"drizzle-orm": "catalog:",
|
|
138
138
|
"es-toolkit": "catalog:",
|
|
139
139
|
"esbuild": "catalog:",
|
|
140
|
+
"execa": "catalog:",
|
|
140
141
|
"hono": "catalog:",
|
|
142
|
+
"p-retry": "catalog:",
|
|
141
143
|
"picocolors": "catalog:",
|
|
142
144
|
"picomatch": "catalog:",
|
|
145
|
+
"proper-lockfile": "catalog:",
|
|
143
146
|
"tsx": "catalog:cli",
|
|
144
147
|
"unpdf": "catalog:",
|
|
145
148
|
"update-notifier": "catalog:",
|
|
@@ -151,6 +154,7 @@ var package_default = {
|
|
|
151
154
|
"@types/better-sqlite3": "catalog:types",
|
|
152
155
|
"@types/node": "catalog:types",
|
|
153
156
|
"@types/picomatch": "catalog:",
|
|
157
|
+
"@types/proper-lockfile": "catalog:",
|
|
154
158
|
"@types/update-notifier": "catalog:",
|
|
155
159
|
"@vitest/coverage-v8": "catalog:testing",
|
|
156
160
|
"eslint": "catalog:cli",
|
|
@@ -198,6 +202,22 @@ const PromptConfigSchema = z.object({
|
|
|
198
202
|
userTemplate: z.string().min(1)
|
|
199
203
|
});
|
|
200
204
|
const ExtractionConfigSchema = z.object({ outputDir: z.string().min(1) });
|
|
205
|
+
const ExternalPdfConverterConfigSchema = z.object({
|
|
206
|
+
command: z.string().min(1),
|
|
207
|
+
args: z.array(z.string()),
|
|
208
|
+
outputFile: z.string().min(1).optional(),
|
|
209
|
+
timeout: z.number().int().positive().default(600).optional(),
|
|
210
|
+
fallbackToUnpdf: z.boolean().optional()
|
|
211
|
+
});
|
|
212
|
+
const PdfConfigSchema = z.object({
|
|
213
|
+
converter: z.enum([
|
|
214
|
+
"unpdf",
|
|
215
|
+
"mineru",
|
|
216
|
+
"external"
|
|
217
|
+
]),
|
|
218
|
+
mineru: ExternalPdfConverterConfigSchema.optional(),
|
|
219
|
+
external: ExternalPdfConverterConfigSchema.optional()
|
|
220
|
+
});
|
|
201
221
|
const LangfuseConfigSchema = z.object({
|
|
202
222
|
publicKey: z.string(),
|
|
203
223
|
secretKey: z.string(),
|
|
@@ -207,6 +227,7 @@ const AIConfigSchema = z.object({
|
|
|
207
227
|
provider: AIProviderConfigSchema,
|
|
208
228
|
prompt: PromptConfigSchema,
|
|
209
229
|
extraction: ExtractionConfigSchema,
|
|
230
|
+
pdf: PdfConfigSchema.optional(),
|
|
210
231
|
langfuse: LangfuseConfigSchema.optional()
|
|
211
232
|
});
|
|
212
233
|
|
|
@@ -247,10 +268,26 @@ Extraction requirements:
|
|
|
247
268
|
{text}`
|
|
248
269
|
};
|
|
249
270
|
const DEFAULT_EXTRACTION_CONFIG = { outputDir: ".aiex/extracted" };
|
|
271
|
+
const DEFAULT_MINERU_CONFIG = {
|
|
272
|
+
command: "mineru",
|
|
273
|
+
args: [
|
|
274
|
+
"-p",
|
|
275
|
+
"{input}",
|
|
276
|
+
"-o",
|
|
277
|
+
"{outputDir}"
|
|
278
|
+
],
|
|
279
|
+
timeout: 600,
|
|
280
|
+
fallbackToUnpdf: true
|
|
281
|
+
};
|
|
282
|
+
const DEFAULT_PDF_CONFIG = {
|
|
283
|
+
converter: "unpdf",
|
|
284
|
+
mineru: DEFAULT_MINERU_CONFIG
|
|
285
|
+
};
|
|
250
286
|
const DEFAULT_AI_CONFIG = {
|
|
251
287
|
provider: DEFAULT_PROVIDER_CONFIG,
|
|
252
288
|
prompt: DEFAULT_PROMPT_CONFIG,
|
|
253
|
-
extraction: DEFAULT_EXTRACTION_CONFIG
|
|
289
|
+
extraction: DEFAULT_EXTRACTION_CONFIG,
|
|
290
|
+
pdf: DEFAULT_PDF_CONFIG
|
|
254
291
|
};
|
|
255
292
|
|
|
256
293
|
//#endregion
|
|
@@ -274,7 +311,7 @@ async function writeAIConfig(aiexDir, config) {
|
|
|
274
311
|
await addToGitignore(aiexDir, CONFIG_FILE_NAME);
|
|
275
312
|
}
|
|
276
313
|
function getDefaultAIConfig() {
|
|
277
|
-
return
|
|
314
|
+
return structuredClone(DEFAULT_AI_CONFIG);
|
|
278
315
|
}
|
|
279
316
|
async function addToGitignore(aiexDir, fileName) {
|
|
280
317
|
const projectRoot = path.dirname(aiexDir);
|
|
@@ -724,4 +761,4 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
724
761
|
}
|
|
725
762
|
|
|
726
763
|
//#endregion
|
|
727
|
-
export {
|
|
764
|
+
export { doctorDiagnosticsTableRows as C, buildDoctorDiagnostics as S, seedConfig as _, parseJsonSchema as a, package_default as b, getDefaultAIConfig as c, DEFAULT_MINERU_CONFIG as d, DEFAULT_PROMPT_CONFIG as f, createConfig as g, AIConfigSchema as h, JsonSchemaDefinitionSchema as i, readAIConfig as l, PLACEHOLDER_TEXT as m, createMigrationConfig as n, toSnakeCase as o, PLACEHOLDER_SCHEMA as p, generateDrizzleConfig as r, generateDrizzleSchema as s, collectDoctorDiagnostics as t, writeAIConfig as u, description as v, formatDoctorDiagnosticsJson as w, version as x, name as y };
|
package/dist/index.mjs
CHANGED
|
@@ -1,3 +1,3 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as doctorDiagnosticsTableRows, S as buildDoctorDiagnostics, a as parseJsonSchema, i as JsonSchemaDefinitionSchema, n as createMigrationConfig, r as generateDrizzleConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, w as formatDoctorDiagnosticsJson } from "./doctor-collector-j2dG7dG1.mjs";
|
|
2
2
|
|
|
3
3
|
export { JsonSchemaDefinitionSchema, buildDoctorDiagnostics, collectDoctorDiagnostics, createMigrationConfig, doctorDiagnosticsTableRows, formatDoctorDiagnosticsJson, generateDrizzleConfig, generateDrizzleSchema, parseJsonSchema };
|