aiex-cli 0.0.7-beta.1 → 0.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +93 -13
- package/dist/{generate-drizzle-schema-D0o_j12G.mjs → generate-drizzle-schema-BAMq_Ufp.mjs} +199 -72
- package/dist/index.d.mts +24 -3
- package/dist/index.mjs +2 -2
- package/dist/{core/schema-sqlite → infrastructure/schema}/migrate-helper.mjs +2 -2
- package/dist/table-schema.json +18 -0
- package/dist/web/assets/AISettings-CxoghpZX.js +264 -0
- package/dist/web/assets/{DataBrowser-GAA-pGq0.js → DataBrowser-BqmnFDWC.js} +2 -2
- package/dist/web/assets/{ExtractionViewer-BEYHgPw2.js → ExtractionViewer-Bs8c6xa2.js} +1 -1
- package/dist/web/assets/{JsonSchemaEditor-CfPzcMKJ.js → JsonSchemaEditor-ITVm2zG1.js} +1 -1
- package/dist/web/assets/{api-client-b4ZBXpNH.js → api-client-EJKabzZK.js} +1 -1
- package/dist/web/assets/{index-D7eI2nAX.js → index-DhL7jaO_.js} +3 -3
- package/dist/web/assets/object-utils-CqCiBqJ4.js +1 -0
- package/dist/web/index.html +3 -3
- package/dist/{zh-CN-wEUNhuHM.mjs → zh-CN-B2yrInX9.mjs} +2 -0
- package/package.json +6 -4
- package/src/{core/schema-sqlite → infrastructure/schema}/migrate-helper.ts +1 -1
- package/src/infrastructure/schema/migration-name.ts +14 -0
- package/dist/web/assets/AISettings-Dbma0Oku.js +0 -264
- package/dist/web/assets/object-utils-C6FkG7fw.js +0 -1
- package/src/core/schema-sqlite/migration-name.ts +0 -1
- /package/dist/{core/schema-sqlite → infrastructure/schema}/migrate-helper.d.mts +0 -0
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { A as package_default, C as DEFAULT_PROMPT_CONFIG, D as seedConfig, E as createConfig, O as description, S as DEFAULT_MINERU_CONFIG, T as PLACEHOLDER_TEXT, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as writeAIConfig, f as AIConfigSchema, h as toSnakeCase, j as version, k as name, l as getDefaultAIConfig, m as parseJsonSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as JsonSchemaDefinitionSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as readAIConfig, v as doctorDiagnosticsTableRows, w as PLACEHOLDER_SCHEMA, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BAMq_Ufp.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -128,6 +128,16 @@ const doctorCommand = defineCommand({
|
|
|
128
128
|
});
|
|
129
129
|
table.push(...doctorDiagnosticsTableRows(diagnostics));
|
|
130
130
|
process.stdout.write(`${table.toString()}\n`);
|
|
131
|
+
const severityRows = doctorDiagnosticsSeverityRows(diagnostics);
|
|
132
|
+
if (severityRows.length) {
|
|
133
|
+
const summary = new CliTable3({
|
|
134
|
+
head: ["status", "diagnostic"],
|
|
135
|
+
colAligns: ["right", "left"],
|
|
136
|
+
style: { compact: true }
|
|
137
|
+
});
|
|
138
|
+
summary.push(...severityRows);
|
|
139
|
+
process.stdout.write(`${summary.toString()}\n`);
|
|
140
|
+
}
|
|
131
141
|
} catch (err) {
|
|
132
142
|
consola.error(t("command.doctor.diagnosticsFailed", { error: err }));
|
|
133
143
|
}
|
|
@@ -609,6 +619,66 @@ var ExternalCommandPdfConverter = class {
|
|
|
609
619
|
}
|
|
610
620
|
};
|
|
611
621
|
|
|
622
|
+
//#endregion
|
|
623
|
+
//#region src/infrastructure/pdf/liteparse-converter.ts
|
|
624
|
+
const TESSERACT_FAILURE_RE = /tesseract|tessdata|traineddata|language/i;
|
|
625
|
+
function textFromPages(pages = []) {
|
|
626
|
+
return pages.map((page) => {
|
|
627
|
+
if (typeof page.text === "string") return page.text;
|
|
628
|
+
return page.textItems?.map((item) => item.text).filter(Boolean).join("\n") ?? "";
|
|
629
|
+
}).filter(Boolean).join("\n\n");
|
|
630
|
+
}
|
|
631
|
+
function hasBoundingBoxes(pages = []) {
|
|
632
|
+
return pages.some((page) => page.textItems?.some((item) => typeof item.x === "number" && typeof item.y === "number" && typeof item.width === "number" && typeof item.height === "number"));
|
|
633
|
+
}
|
|
634
|
+
async function loadLiteParse() {
|
|
635
|
+
try {
|
|
636
|
+
return (await import("@llamaindex/liteparse")).LiteParse;
|
|
637
|
+
} catch (error) {
|
|
638
|
+
const details = error instanceof Error ? error.message : String(error);
|
|
639
|
+
throw new Error(`LiteParse is selected but @llamaindex/liteparse is not available. Install optional dependencies or switch the PDF converter to unpdf, mineru, mineru_api, or external. ${details}`);
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
function formatLiteparseError(error, config) {
|
|
643
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
644
|
+
if (!config.ocrEnabled) return new Error(message);
|
|
645
|
+
if (!TESSERACT_FAILURE_RE.test(message)) return new Error(message);
|
|
646
|
+
return /* @__PURE__ */ new Error(`LiteParse OCR is enabled but Tesseract language data could not be loaded. Install the traineddata file for "${config.ocrLanguage ?? DEFAULT_LITEPARSE_CONFIG.ocrLanguage}" and set pdf.liteparse.tessdataPath to the directory that contains it, or disable pdf.liteparse.ocrEnabled. Original error: ${message}`);
|
|
647
|
+
}
|
|
648
|
+
var LiteparsePdfConverter = class {
|
|
649
|
+
name = "liteparse";
|
|
650
|
+
constructor(config = DEFAULT_LITEPARSE_CONFIG) {
|
|
651
|
+
this.config = config;
|
|
652
|
+
}
|
|
653
|
+
async convert(input, filePath) {
|
|
654
|
+
const LiteParse = await loadLiteParse();
|
|
655
|
+
const config = {
|
|
656
|
+
...DEFAULT_LITEPARSE_CONFIG,
|
|
657
|
+
...this.config
|
|
658
|
+
};
|
|
659
|
+
const result = await new LiteParse({
|
|
660
|
+
ocrEnabled: config.ocrEnabled,
|
|
661
|
+
ocrLanguage: config.ocrLanguage,
|
|
662
|
+
ocrServerUrl: config.ocrServerUrl,
|
|
663
|
+
tessdataPath: config.tessdataPath,
|
|
664
|
+
quiet: true
|
|
665
|
+
}).parse(filePath ?? input).catch((error) => {
|
|
666
|
+
throw formatLiteparseError(error, config);
|
|
667
|
+
});
|
|
668
|
+
const pages = Array.isArray(result.pages) ? result.pages : [];
|
|
669
|
+
return {
|
|
670
|
+
text: typeof result.text === "string" ? result.text : textFromPages(pages),
|
|
671
|
+
pageCount: pages.length,
|
|
672
|
+
metadata: {
|
|
673
|
+
converter: this.name,
|
|
674
|
+
ocrEnabled: String(config.ocrEnabled ?? false),
|
|
675
|
+
...config.ocrLanguage ? { ocrLanguage: config.ocrLanguage } : {},
|
|
676
|
+
hasBoundingBoxes: String(hasBoundingBoxes(pages))
|
|
677
|
+
}
|
|
678
|
+
};
|
|
679
|
+
}
|
|
680
|
+
};
|
|
681
|
+
|
|
612
682
|
//#endregion
|
|
613
683
|
//#region src/infrastructure/pdf/mineru-api-converter.ts
|
|
614
684
|
const TRAILING_SLASH_REGEXP = /\/+$/;
|
|
@@ -770,6 +840,7 @@ function withFallback(converter, config) {
|
|
|
770
840
|
}
|
|
771
841
|
function createPdfConverter(config) {
|
|
772
842
|
if (typeof config === "object") {
|
|
843
|
+
if (config.converter === "liteparse") return new LiteparsePdfConverter(config.liteparse ?? DEFAULT_LITEPARSE_CONFIG);
|
|
773
844
|
if (config.converter === "mineru") {
|
|
774
845
|
const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
|
|
775
846
|
return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
|
|
@@ -783,8 +854,9 @@ function createPdfConverter(config) {
|
|
|
783
854
|
const key = typeof config === "string" ? config : "unpdf";
|
|
784
855
|
let instance = registry$1.get(key);
|
|
785
856
|
if (!instance) {
|
|
786
|
-
if (key
|
|
787
|
-
instance = new UnpdfConverter();
|
|
857
|
+
if (key === "liteparse") instance = new LiteparsePdfConverter();
|
|
858
|
+
else if (key === "unpdf") instance = new UnpdfConverter();
|
|
859
|
+
else throw new Error(t("errors.pdf.converterRequiresConfig", { name: key }));
|
|
788
860
|
registry$1.set(key, instance);
|
|
789
861
|
}
|
|
790
862
|
return instance;
|
|
@@ -1500,11 +1572,23 @@ function propertyToDescription(name$1, prop, indent = "") {
|
|
|
1500
1572
|
const lines = [];
|
|
1501
1573
|
let typeStr = prop.type;
|
|
1502
1574
|
if (prop.type === "array" && prop.items) typeStr = `array of ${prop.items.type}`;
|
|
1503
|
-
|
|
1575
|
+
const tags = [];
|
|
1576
|
+
if (prop.primary) tags.push("primary key");
|
|
1577
|
+
const tagStr = tags.length > 0 ? ` (${tags.join(", ")})` : "";
|
|
1578
|
+
lines.push(`${indent}- ${name$1}: ${typeStr}${tagStr}`);
|
|
1579
|
+
if (prop.description) lines.push(`${indent} description: ${prop.description}`);
|
|
1580
|
+
if (prop.enum && prop.enum.length > 0) lines.push(`${indent} allowed values: ${prop.enum.map((v) => JSON.stringify(v)).join(", ")}`);
|
|
1581
|
+
if (prop.pattern) lines.push(`${indent} pattern: ${prop.pattern}`);
|
|
1504
1582
|
if (prop.minLength !== void 0 || prop.maxLength !== void 0) lines.push(`${indent} length: ${prop.minLength ?? 0} - ${prop.maxLength ?? "unlimited"}`);
|
|
1583
|
+
if (prop.minimum !== void 0 || prop.maximum !== void 0) lines.push(`${indent} range: ${prop.minimum ?? "-∞"} - ${prop.maximum ?? "∞"}`);
|
|
1505
1584
|
if (prop.format) lines.push(`${indent} format: ${prop.format}`);
|
|
1506
1585
|
if (prop.unique) lines.push(`${indent} unique: true`);
|
|
1507
1586
|
if (prop.default !== void 0) lines.push(`${indent} default: ${JSON.stringify(prop.default)}`);
|
|
1587
|
+
if (prop.examples && prop.examples.length > 0) {
|
|
1588
|
+
const rendered = prop.examples.map((v) => JSON.stringify(v)).join(", ");
|
|
1589
|
+
lines.push(`${indent} examples: ${rendered}`);
|
|
1590
|
+
}
|
|
1591
|
+
if (prop.xPrompt) lines.push(`${indent} extraction hint: ${prop.xPrompt}`);
|
|
1508
1592
|
return lines.join("\n");
|
|
1509
1593
|
}
|
|
1510
1594
|
function nestedPropertyToDescription(name$1, prop, indent = "") {
|
|
@@ -2084,13 +2168,9 @@ Please output the corrected JSON object now:`;
|
|
|
2084
2168
|
|
|
2085
2169
|
//#endregion
|
|
2086
2170
|
//#region src/infrastructure/extraction/insert-extracted-data.ts
|
|
2087
|
-
const DRIZZLE_MODE_RE = /mode:\s*'(\w+)'/;
|
|
2088
|
-
function extractDrizzleMode(column) {
|
|
2089
|
-
return column.drizzleType.match(DRIZZLE_MODE_RE)?.[1];
|
|
2090
|
-
}
|
|
2091
2171
|
function convertValue(value, column) {
|
|
2092
2172
|
if (value === null || value === void 0) return null;
|
|
2093
|
-
const mode =
|
|
2173
|
+
const mode = column.columnType.class !== "real" ? column.columnType.mode : void 0;
|
|
2094
2174
|
if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
|
|
2095
2175
|
if (mode === "boolean") return value ? 1 : 0;
|
|
2096
2176
|
if (mode === "timestamp" || mode === "timestamp_ms") {
|
|
@@ -2110,9 +2190,9 @@ function buildInsertSql(table, data) {
|
|
|
2110
2190
|
if (col.isAutoIncrement) continue;
|
|
2111
2191
|
const value = data[col.name];
|
|
2112
2192
|
if (value === void 0) {
|
|
2113
|
-
if (col.
|
|
2193
|
+
if (col.default !== void 0) {
|
|
2114
2194
|
columns.push(col.name);
|
|
2115
|
-
values.push(convertValue(
|
|
2195
|
+
values.push(convertValue(col.default, col));
|
|
2116
2196
|
}
|
|
2117
2197
|
continue;
|
|
2118
2198
|
}
|
|
@@ -3053,9 +3133,9 @@ function resolveTsxPath() {
|
|
|
3053
3133
|
}
|
|
3054
3134
|
function resolveHelperPath() {
|
|
3055
3135
|
try {
|
|
3056
|
-
return path.join(resolvePackageRoot(), "src/
|
|
3136
|
+
return path.join(resolvePackageRoot(), "src/infrastructure/schema/migrate-helper.ts");
|
|
3057
3137
|
} catch {
|
|
3058
|
-
return path.join(__dirname, "
|
|
3138
|
+
return path.join(__dirname, "../schema/migrate-helper.ts");
|
|
3059
3139
|
}
|
|
3060
3140
|
}
|
|
3061
3141
|
|
|
@@ -11,7 +11,7 @@ import { z } from "zod";
|
|
|
11
11
|
|
|
12
12
|
//#region package.json
|
|
13
13
|
var name = "aiex-cli";
|
|
14
|
-
var version = "0.0
|
|
14
|
+
var version = "0.1.0-beta.1";
|
|
15
15
|
var description = "JSON Schema → SQLite with AI-powered data extraction";
|
|
16
16
|
var package_default = {
|
|
17
17
|
name,
|
|
@@ -42,7 +42,6 @@ var package_default = {
|
|
|
42
42
|
exports: {
|
|
43
43
|
".": "./dist/index.mjs",
|
|
44
44
|
"./cli": "./dist/cli.mjs",
|
|
45
|
-
"./core/schema-sqlite/migrate-helper": "./dist/core/schema-sqlite/migrate-helper.mjs",
|
|
46
45
|
"./package.json": "./package.json"
|
|
47
46
|
},
|
|
48
47
|
main: "./dist/index.mjs",
|
|
@@ -55,8 +54,8 @@ var package_default = {
|
|
|
55
54
|
files: [
|
|
56
55
|
"bin",
|
|
57
56
|
"dist",
|
|
58
|
-
"src/
|
|
59
|
-
"src/
|
|
57
|
+
"src/infrastructure/schema/migrate-helper.ts",
|
|
58
|
+
"src/infrastructure/schema/migration-name.ts"
|
|
60
59
|
],
|
|
61
60
|
scripts: {
|
|
62
61
|
"build": "tsdown && pnpm --filter aiex-web build",
|
|
@@ -64,6 +63,7 @@ var package_default = {
|
|
|
64
63
|
"start": "tsx src/index.ts",
|
|
65
64
|
"test": "vitest",
|
|
66
65
|
"coverage": "vitest --coverage",
|
|
66
|
+
"smoke:package": "node scripts/package-smoke.mjs",
|
|
67
67
|
"typecheck": "tsc",
|
|
68
68
|
"lint": "eslint .",
|
|
69
69
|
"prepack": "cp ../../README.md . && node scripts/generate-completions.mjs",
|
|
@@ -111,7 +111,10 @@ var package_default = {
|
|
|
111
111
|
"xlsx": "catalog:",
|
|
112
112
|
"zod": "catalog:"
|
|
113
113
|
},
|
|
114
|
-
optionalDependencies: {
|
|
114
|
+
optionalDependencies: {
|
|
115
|
+
"@llamaindex/liteparse": "catalog:",
|
|
116
|
+
"@napi-rs/system-ocr": "catalog:"
|
|
117
|
+
},
|
|
115
118
|
devDependencies: {
|
|
116
119
|
"@antfu/eslint-config": "catalog:cli",
|
|
117
120
|
"@antfu/ni": "catalog:cli",
|
|
@@ -146,6 +149,13 @@ function seedConfig(config = createConfig()) {
|
|
|
146
149
|
|
|
147
150
|
//#endregion
|
|
148
151
|
//#region src/domain/ai/types.ts
|
|
152
|
+
const PDF_CONVERTER_KINDS = [
|
|
153
|
+
"unpdf",
|
|
154
|
+
"liteparse",
|
|
155
|
+
"mineru",
|
|
156
|
+
"mineru_api",
|
|
157
|
+
"external"
|
|
158
|
+
];
|
|
149
159
|
const PLACEHOLDER_SCHEMA = "{schema}";
|
|
150
160
|
const PLACEHOLDER_TEXT = "{text}";
|
|
151
161
|
const DEFAULT_MODELS = [{
|
|
@@ -192,6 +202,10 @@ const DEFAULT_MINERU_CONFIG = {
|
|
|
192
202
|
timeout: 600,
|
|
193
203
|
fallbackToUnpdf: true
|
|
194
204
|
};
|
|
205
|
+
const DEFAULT_LITEPARSE_CONFIG = {
|
|
206
|
+
ocrEnabled: false,
|
|
207
|
+
ocrLanguage: "eng"
|
|
208
|
+
};
|
|
195
209
|
const DEFAULT_MINERU_API_CONFIG = {
|
|
196
210
|
token: "",
|
|
197
211
|
baseURL: "https://mineru.net/api/v4",
|
|
@@ -202,6 +216,7 @@ const DEFAULT_MINERU_API_CONFIG = {
|
|
|
202
216
|
};
|
|
203
217
|
const DEFAULT_PDF_CONFIG = {
|
|
204
218
|
converter: "unpdf",
|
|
219
|
+
liteparse: DEFAULT_LITEPARSE_CONFIG,
|
|
205
220
|
mineru: DEFAULT_MINERU_CONFIG,
|
|
206
221
|
mineruApi: DEFAULT_MINERU_API_CONFIG
|
|
207
222
|
};
|
|
@@ -290,63 +305,112 @@ function doctorDiagnosticsTableRows(d) {
|
|
|
290
305
|
for (const err of p.errors) rows.push(["error", err]);
|
|
291
306
|
return rows;
|
|
292
307
|
}
|
|
308
|
+
function doctorDiagnosticsSeverityRows(d) {
|
|
309
|
+
const rows = [];
|
|
310
|
+
const p = d.project;
|
|
311
|
+
rows.push([p.dirExists ? "ok" : "warn", p.dirExists ? "Project directory exists" : "Project directory is not initialized"]);
|
|
312
|
+
rows.push([p.aiConfig ? "ok" : "warn", p.aiConfig ? "AI config exists" : "AI config is missing"]);
|
|
313
|
+
if (p.aiConfig) rows.push([p.aiApiKeySet ? "ok" : "warn", p.aiApiKeySet ? "AI API key is set" : "AI API key is empty"]);
|
|
314
|
+
if (p.aiConnectionOk !== null) rows.push([p.aiConnectionOk ? "ok" : "warn", p.aiConnectionOk ? "AI provider connection succeeded" : "AI provider connection failed"]);
|
|
315
|
+
if (p.pdfConverterOk !== null) rows.push([p.pdfConverterOk ? "ok" : "error", p.pdfConverterOk ? `PDF converter is available: ${p.pdfConverter ?? "none"}` : `PDF converter is unavailable: ${p.pdfConverter ?? "none"}${p.pdfConverterError ? ` (${p.pdfConverterError})` : ""}`]);
|
|
316
|
+
else if (p.pdfConverterError) rows.push(["warn", p.pdfConverterError]);
|
|
317
|
+
if (d.imageOcr.ocrOk !== null) rows.push([d.imageOcr.ocrOk ? "ok" : "warn", d.imageOcr.ocrOk ? "Image OCR self-check passed" : `Image OCR self-check failed${d.imageOcr.error ? `: ${d.imageOcr.error}` : ""}`]);
|
|
318
|
+
if (p.databaseTablesOk !== null) rows.push([p.databaseTablesOk ? "ok" : "error", p.databaseTablesOk ? "Database tables match schemas" : `Database tables are missing: ${p.missingDatabaseTables.join(", ") || "unknown"}`]);
|
|
319
|
+
for (const invalid of p.invalidSchemas) rows.push(["error", `Invalid schema ${invalid.file}: ${invalid.error}`]);
|
|
320
|
+
for (const err of p.errors) rows.push(["error", err]);
|
|
321
|
+
return rows;
|
|
322
|
+
}
|
|
293
323
|
|
|
294
324
|
//#endregion
|
|
295
325
|
//#region src/domain/schema/parser.ts
|
|
296
326
|
function toSnakeCase(str) {
|
|
297
327
|
return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
|
|
298
328
|
}
|
|
299
|
-
function
|
|
300
|
-
const snakeName = toSnakeCase(name$1);
|
|
301
|
-
let drizzleType;
|
|
302
|
-
const isPrimary = property.primary ?? false;
|
|
303
|
-
const isAutoIncrement = property.autoIncrement ?? false;
|
|
329
|
+
function mapColumnType(property) {
|
|
304
330
|
switch (property.type) {
|
|
305
331
|
case "string": {
|
|
306
332
|
const format = property.format;
|
|
307
|
-
if (format === "date-time" || property.drizzle?.mode === "timestamp")
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
333
|
+
if (format === "date-time" || property.drizzle?.mode === "timestamp") return {
|
|
334
|
+
class: "integer",
|
|
335
|
+
mode: "timestamp"
|
|
336
|
+
};
|
|
337
|
+
if (format === "json" || property.drizzle?.mode === "json") return {
|
|
338
|
+
class: "text",
|
|
339
|
+
mode: "json"
|
|
340
|
+
};
|
|
341
|
+
return { class: "text" };
|
|
311
342
|
}
|
|
312
343
|
case "integer": {
|
|
313
344
|
const mode = property.drizzle?.mode;
|
|
314
|
-
if (mode === "boolean"
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
345
|
+
if (mode === "boolean" || mode === "timestamp" || mode === "timestamp_ms" || mode === "bigint") return {
|
|
346
|
+
class: "integer",
|
|
347
|
+
mode
|
|
348
|
+
};
|
|
349
|
+
return { class: "integer" };
|
|
319
350
|
}
|
|
320
|
-
case "number":
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
break;
|
|
351
|
+
case "number": return { class: "real" };
|
|
352
|
+
case "boolean": return {
|
|
353
|
+
class: "integer",
|
|
354
|
+
mode: "boolean"
|
|
355
|
+
};
|
|
326
356
|
case "object":
|
|
327
|
-
case "array":
|
|
328
|
-
|
|
329
|
-
|
|
357
|
+
case "array": return {
|
|
358
|
+
class: "text",
|
|
359
|
+
mode: "json"
|
|
360
|
+
};
|
|
330
361
|
case "null":
|
|
331
|
-
|
|
332
|
-
break;
|
|
333
|
-
default: drizzleType = "text()";
|
|
362
|
+
default: return { class: "text" };
|
|
334
363
|
}
|
|
364
|
+
}
|
|
365
|
+
function mapPropertyToColumn(name$1, property, isRequired) {
|
|
335
366
|
return {
|
|
336
|
-
name:
|
|
337
|
-
|
|
338
|
-
isPrimary,
|
|
339
|
-
isAutoIncrement,
|
|
340
|
-
isNullable: !isRequired && !
|
|
367
|
+
name: toSnakeCase(name$1),
|
|
368
|
+
columnType: mapColumnType(property),
|
|
369
|
+
isPrimary: property.primary ?? false,
|
|
370
|
+
isAutoIncrement: property.autoIncrement ?? false,
|
|
371
|
+
isNullable: !isRequired && !property.primary,
|
|
341
372
|
isUnique: property.unique ?? false,
|
|
342
|
-
|
|
373
|
+
default: property.default,
|
|
343
374
|
isForeignKey: property.foreignKey !== void 0,
|
|
344
375
|
foreignKeyRef: property.foreignKey ?? void 0
|
|
345
376
|
};
|
|
346
377
|
}
|
|
378
|
+
function getColumnChecks(prop, colName) {
|
|
379
|
+
const checks = [];
|
|
380
|
+
if (prop.type === "string") {
|
|
381
|
+
if (prop.minLength !== void 0 && prop.minLength > 0) checks.push({
|
|
382
|
+
name: `${colName}_min_length`,
|
|
383
|
+
column: colName,
|
|
384
|
+
kind: "min_length",
|
|
385
|
+
value: prop.minLength
|
|
386
|
+
});
|
|
387
|
+
if (prop.maxLength !== void 0) checks.push({
|
|
388
|
+
name: `${colName}_max_length`,
|
|
389
|
+
column: colName,
|
|
390
|
+
kind: "max_length",
|
|
391
|
+
value: prop.maxLength
|
|
392
|
+
});
|
|
393
|
+
}
|
|
394
|
+
if (prop.type === "integer" || prop.type === "number") {
|
|
395
|
+
if (prop.minimum !== void 0) checks.push({
|
|
396
|
+
name: `${colName}_min`,
|
|
397
|
+
column: colName,
|
|
398
|
+
kind: "min_value",
|
|
399
|
+
value: prop.minimum
|
|
400
|
+
});
|
|
401
|
+
if (prop.maximum !== void 0) checks.push({
|
|
402
|
+
name: `${colName}_max`,
|
|
403
|
+
column: colName,
|
|
404
|
+
kind: "max_value",
|
|
405
|
+
value: prop.maximum
|
|
406
|
+
});
|
|
407
|
+
}
|
|
408
|
+
return checks;
|
|
409
|
+
}
|
|
347
410
|
function parseObjectToTable(schema, _warnings) {
|
|
348
411
|
const tableName = schema.table.name;
|
|
349
412
|
const columns = [];
|
|
413
|
+
const checks = [];
|
|
350
414
|
const requiredFields = new Set(schema.required ?? []);
|
|
351
415
|
const autoColumns = /* @__PURE__ */ new Set();
|
|
352
416
|
if (schema.table.timestamps) {
|
|
@@ -361,37 +425,42 @@ function parseObjectToTable(schema, _warnings) {
|
|
|
361
425
|
if (autoColumns.has(snakeName)) continue;
|
|
362
426
|
const column = mapPropertyToColumn(propName, prop, requiredFields.has(propName));
|
|
363
427
|
columns.push(column);
|
|
428
|
+
checks.push(...getColumnChecks(prop, column.name));
|
|
364
429
|
}
|
|
365
430
|
if (schema.table.timestamps) {
|
|
366
|
-
|
|
431
|
+
const tsCol = {
|
|
367
432
|
name: "created_at",
|
|
368
|
-
|
|
433
|
+
columnType: {
|
|
434
|
+
class: "integer",
|
|
435
|
+
mode: "timestamp"
|
|
436
|
+
},
|
|
369
437
|
isPrimary: false,
|
|
370
438
|
isAutoIncrement: false,
|
|
371
439
|
isNullable: false,
|
|
372
|
-
isUnique: false
|
|
373
|
-
|
|
374
|
-
|
|
440
|
+
isUnique: false
|
|
441
|
+
};
|
|
442
|
+
columns.push(tsCol);
|
|
375
443
|
columns.push({
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
isPrimary: false,
|
|
379
|
-
isAutoIncrement: false,
|
|
380
|
-
isNullable: false,
|
|
381
|
-
isUnique: false,
|
|
382
|
-
defaultValue: void 0
|
|
444
|
+
...tsCol,
|
|
445
|
+
name: "updated_at"
|
|
383
446
|
});
|
|
384
447
|
}
|
|
385
448
|
if (schema.table.softDelete) columns.push({
|
|
386
449
|
name: "deleted_at",
|
|
387
|
-
|
|
450
|
+
columnType: {
|
|
451
|
+
class: "integer",
|
|
452
|
+
mode: "timestamp"
|
|
453
|
+
},
|
|
388
454
|
isPrimary: false,
|
|
389
455
|
isAutoIncrement: false,
|
|
390
456
|
isNullable: true,
|
|
391
|
-
isUnique: false
|
|
392
|
-
defaultValue: void 0
|
|
457
|
+
isUnique: false
|
|
393
458
|
});
|
|
394
|
-
return {
|
|
459
|
+
return checks.length > 0 ? {
|
|
460
|
+
name: tableName,
|
|
461
|
+
columns,
|
|
462
|
+
checks
|
|
463
|
+
} : {
|
|
395
464
|
name: tableName,
|
|
396
465
|
columns
|
|
397
466
|
};
|
|
@@ -399,24 +468,23 @@ function parseObjectToTable(schema, _warnings) {
|
|
|
399
468
|
function parseNestedObject(propName, property, parentTableName, warnings) {
|
|
400
469
|
const nestedTableName = `${parentTableName}_${toSnakeCase(propName)}`;
|
|
401
470
|
const columns = [];
|
|
471
|
+
const checks = [];
|
|
402
472
|
const relationType = property.nested?.relation === "has-many" ? "has-many" : "has-one";
|
|
403
473
|
columns.push({
|
|
404
474
|
name: "id",
|
|
405
|
-
|
|
475
|
+
columnType: { class: "integer" },
|
|
406
476
|
isPrimary: true,
|
|
407
477
|
isAutoIncrement: true,
|
|
408
478
|
isNullable: false,
|
|
409
|
-
isUnique: false
|
|
410
|
-
defaultValue: void 0
|
|
479
|
+
isUnique: false
|
|
411
480
|
});
|
|
412
481
|
columns.push({
|
|
413
482
|
name: `${parentTableName}_id`,
|
|
414
|
-
|
|
483
|
+
columnType: { class: "integer" },
|
|
415
484
|
isPrimary: false,
|
|
416
485
|
isAutoIncrement: false,
|
|
417
486
|
isNullable: false,
|
|
418
487
|
isUnique: false,
|
|
419
|
-
defaultValue: void 0,
|
|
420
488
|
isForeignKey: true,
|
|
421
489
|
foreignKeyRef: {
|
|
422
490
|
table: parentTableName,
|
|
@@ -430,6 +498,7 @@ function parseNestedObject(propName, property, parentTableName, warnings) {
|
|
|
430
498
|
}
|
|
431
499
|
const column = mapPropertyToColumn(childName, childProp, false);
|
|
432
500
|
columns.push(column);
|
|
501
|
+
checks.push(...getColumnChecks(childProp, column.name));
|
|
433
502
|
}
|
|
434
503
|
const relation = {
|
|
435
504
|
fromTable: nestedTableName,
|
|
@@ -445,7 +514,11 @@ function parseNestedObject(propName, property, parentTableName, warnings) {
|
|
|
445
514
|
name: toSnakeCase(propName)
|
|
446
515
|
};
|
|
447
516
|
return {
|
|
448
|
-
table: {
|
|
517
|
+
table: checks.length > 0 ? {
|
|
518
|
+
name: nestedTableName,
|
|
519
|
+
columns,
|
|
520
|
+
checks
|
|
521
|
+
} : {
|
|
449
522
|
name: nestedTableName,
|
|
450
523
|
columns
|
|
451
524
|
},
|
|
@@ -512,6 +585,8 @@ const JsonSchemaPropertySchema = z.lazy(() => z.object({
|
|
|
512
585
|
"null"
|
|
513
586
|
]),
|
|
514
587
|
format: z.string().optional(),
|
|
588
|
+
pattern: z.string().optional(),
|
|
589
|
+
enum: z.array(z.union([z.string(), z.number()])).optional(),
|
|
515
590
|
primary: z.boolean().optional(),
|
|
516
591
|
autoIncrement: z.boolean().optional(),
|
|
517
592
|
unique: z.boolean().optional(),
|
|
@@ -520,6 +595,8 @@ const JsonSchemaPropertySchema = z.lazy(() => z.object({
|
|
|
520
595
|
minLength: z.number().int().nonnegative().optional(),
|
|
521
596
|
minimum: z.number().optional(),
|
|
522
597
|
maximum: z.number().optional(),
|
|
598
|
+
examples: z.array(z.unknown()).optional(),
|
|
599
|
+
xPrompt: z.string().optional(),
|
|
523
600
|
drizzle: DrizzleExtensionSchema,
|
|
524
601
|
nested: NestedConfigSchema.optional(),
|
|
525
602
|
foreignKey: ForeignKeyRefSchema.optional(),
|
|
@@ -606,6 +683,15 @@ const MineruApiPdfConverterConfigSchema = z.object({
|
|
|
606
683
|
enableFormula: z.boolean().optional(),
|
|
607
684
|
enableTable: z.boolean().optional()
|
|
608
685
|
});
|
|
686
|
+
const LiteparsePdfConverterConfigSchema = z.object({
|
|
687
|
+
ocrEnabled: z.boolean().default(false),
|
|
688
|
+
ocrLanguage: z.string().min(1).default("eng"),
|
|
689
|
+
tessdataPath: z.string().min(1).optional(),
|
|
690
|
+
ocrServerUrl: z.string().url().refine((value) => {
|
|
691
|
+
const url = new URL(value);
|
|
692
|
+
return url.protocol === "http:" || url.protocol === "https:";
|
|
693
|
+
}, { message: "ocrServerUrl must use http or https" }).optional()
|
|
694
|
+
});
|
|
609
695
|
const PdfConfigSchema = z.preprocess((value) => {
|
|
610
696
|
if (!value || typeof value !== "object") return value;
|
|
611
697
|
const config = { ...value };
|
|
@@ -620,12 +706,8 @@ const PdfConfigSchema = z.preprocess((value) => {
|
|
|
620
706
|
delete config.marker;
|
|
621
707
|
return config;
|
|
622
708
|
}, z.object({
|
|
623
|
-
converter: z.enum(
|
|
624
|
-
|
|
625
|
-
"mineru",
|
|
626
|
-
"mineru_api",
|
|
627
|
-
"external"
|
|
628
|
-
]),
|
|
709
|
+
converter: z.enum(PDF_CONVERTER_KINDS),
|
|
710
|
+
liteparse: LiteparsePdfConverterConfigSchema.optional(),
|
|
629
711
|
mineru: ExternalPdfConverterConfigSchema.optional(),
|
|
630
712
|
mineruApi: MineruApiPdfConverterConfigSchema.optional(),
|
|
631
713
|
external: ExternalPdfConverterConfigSchema.optional()
|
|
@@ -1137,7 +1219,9 @@ const en = {
|
|
|
1137
1219
|
pdf: {
|
|
1138
1220
|
converterOptions: {
|
|
1139
1221
|
unpdf: "Built-in text extraction (unpdf)",
|
|
1222
|
+
liteparse: "Built-in layout parsing (liteparse)",
|
|
1140
1223
|
mineru: "MinerU (mineru)",
|
|
1224
|
+
mineru_api: "MinerU API (mineru_api)",
|
|
1141
1225
|
external: "Custom External Command"
|
|
1142
1226
|
},
|
|
1143
1227
|
ocrFallbackOptions: { localAuto: "Vision model or local OCR" }
|
|
@@ -1211,7 +1295,7 @@ async function initI18n(lng) {
|
|
|
1211
1295
|
fallbackLng: "en",
|
|
1212
1296
|
resources: {
|
|
1213
1297
|
"en": { translation: en },
|
|
1214
|
-
"zh-CN": { translation: await import("./zh-CN-
|
|
1298
|
+
"zh-CN": { translation: await import("./zh-CN-B2yrInX9.mjs").then((m) => m.zhCN) }
|
|
1215
1299
|
},
|
|
1216
1300
|
interpolation: { escapeValue: false },
|
|
1217
1301
|
returnNull: false
|
|
@@ -1372,6 +1456,14 @@ async function commandAvailable(command) {
|
|
|
1372
1456
|
return false;
|
|
1373
1457
|
}
|
|
1374
1458
|
}
|
|
1459
|
+
async function liteparseAvailable() {
|
|
1460
|
+
try {
|
|
1461
|
+
await import("@llamaindex/liteparse");
|
|
1462
|
+
return true;
|
|
1463
|
+
} catch {
|
|
1464
|
+
return false;
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1375
1467
|
async function findImageOcrSelfCheckLogo() {
|
|
1376
1468
|
const candidates = [
|
|
1377
1469
|
path.resolve(MODULE_DIR, "logo.png"),
|
|
@@ -1426,7 +1518,11 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
1426
1518
|
aiConnectionOk = await checkConnection(cfg.provider.baseURL);
|
|
1427
1519
|
pdfConverter = cfg.pdf?.converter ?? "unpdf";
|
|
1428
1520
|
if (pdfConverter === "unpdf") pdfConverterOk = true;
|
|
1429
|
-
else if (pdfConverter === "
|
|
1521
|
+
else if (pdfConverter === "liteparse") {
|
|
1522
|
+
pdfConverterOk = await liteparseAvailable();
|
|
1523
|
+
if (!pdfConverterOk) pdfConverterError = "@llamaindex/liteparse optional dependency is not installed or cannot be loaded";
|
|
1524
|
+
else if (cfg.pdf?.liteparse?.ocrEnabled && !cfg.pdf.liteparse.tessdataPath) pdfConverterError = "LiteParse OCR is enabled. If OCR fails, install Tesseract traineddata and configure pdf.liteparse.tessdataPath.";
|
|
1525
|
+
} else if (pdfConverter === "mineru") {
|
|
1430
1526
|
const command = cfg.pdf?.mineru?.command ?? DEFAULT_MINERU_CONFIG.command;
|
|
1431
1527
|
pdfConverterOk = await commandAvailable(command);
|
|
1432
1528
|
if (!pdfConverterOk) pdfConverterError = `Command not found: ${command}`;
|
|
@@ -1531,19 +1627,50 @@ async function collectDoctorDiagnostics(options = {}) {
|
|
|
1531
1627
|
|
|
1532
1628
|
//#endregion
|
|
1533
1629
|
//#region src/infrastructure/schema/generate-drizzle-schema.ts
|
|
1630
|
+
function renderColumnType(ct) {
|
|
1631
|
+
switch (ct.class) {
|
|
1632
|
+
case "text": return ct.mode === "json" ? `text({ mode: 'json' })` : "text()";
|
|
1633
|
+
case "integer": return ct.mode ? `integer({ mode: '${ct.mode}' })` : "integer()";
|
|
1634
|
+
case "real": return "real()";
|
|
1635
|
+
}
|
|
1636
|
+
}
|
|
1637
|
+
function renderDefaultValue(value) {
|
|
1638
|
+
return JSON.stringify(value);
|
|
1639
|
+
}
|
|
1534
1640
|
function generateColumnDefinition(column) {
|
|
1535
1641
|
if (column.isPrimary && column.isAutoIncrement) return ` ${column.name}: integer().primaryKey({ autoIncrement: true })`;
|
|
1536
|
-
let def = ` ${column.name}: ${column.
|
|
1642
|
+
let def = ` ${column.name}: ${renderColumnType(column.columnType)}`;
|
|
1537
1643
|
if (column.isPrimary) def += ".primaryKey()";
|
|
1538
1644
|
if (!column.isNullable && !column.isPrimary) def += ".notNull()";
|
|
1539
1645
|
if (column.isUnique && !column.isPrimary) def += ".unique()";
|
|
1540
|
-
if (column.
|
|
1646
|
+
if (column.default !== void 0) def += `.default(${renderDefaultValue(column.default)})`;
|
|
1541
1647
|
if (column.isForeignKey && column.foreignKeyRef) def += `.references(() => ${column.foreignKeyRef.table}.${column.foreignKeyRef.column})`;
|
|
1542
1648
|
return def;
|
|
1543
1649
|
}
|
|
1650
|
+
function renderCheckToDrizzle(check, tableVar) {
|
|
1651
|
+
const colRef = `\${${tableVar}.${check.column}}`;
|
|
1652
|
+
let expr;
|
|
1653
|
+
switch (check.kind) {
|
|
1654
|
+
case "min_length":
|
|
1655
|
+
expr = `length(${colRef}) >= ${check.value}`;
|
|
1656
|
+
break;
|
|
1657
|
+
case "max_length":
|
|
1658
|
+
expr = `length(${colRef}) <= ${check.value}`;
|
|
1659
|
+
break;
|
|
1660
|
+
case "min_value":
|
|
1661
|
+
expr = `${colRef} >= ${check.value}`;
|
|
1662
|
+
break;
|
|
1663
|
+
case "max_value":
|
|
1664
|
+
expr = `${colRef} <= ${check.value}`;
|
|
1665
|
+
break;
|
|
1666
|
+
}
|
|
1667
|
+
return ` ${check.name}: check('${check.name}', sql\`${expr}\`)`;
|
|
1668
|
+
}
|
|
1544
1669
|
function generateTableDefinition(table) {
|
|
1545
1670
|
const columns = table.columns.map(generateColumnDefinition);
|
|
1546
|
-
return `export const ${table.name} = sqliteTable('${table.name}', {\n${columns.join(",\n")}\n})`;
|
|
1671
|
+
if (!table.checks?.length) return `export const ${table.name} = sqliteTable('${table.name}', {\n${columns.join(",\n")}\n})`;
|
|
1672
|
+
const checkLines = table.checks.map((c) => renderCheckToDrizzle(c, "table"));
|
|
1673
|
+
return `export const ${table.name} = sqliteTable('${table.name}', {\n${columns.join(",\n")}\n}, (table) => ({\n${checkLines.join(",\n")}\n}))`;
|
|
1547
1674
|
}
|
|
1548
1675
|
function generateRelationDefinitions(relations, reverseRelations) {
|
|
1549
1676
|
if (relations.length === 0 && reverseRelations.length === 0) return "";
|
|
@@ -1579,7 +1706,7 @@ function generateRelationDefinitions(relations, reverseRelations) {
|
|
|
1579
1706
|
return definitions.join("\n\n");
|
|
1580
1707
|
}
|
|
1581
1708
|
function generateDrizzleSchema(result) {
|
|
1582
|
-
const imports = `import { sqliteTable, text, integer, real } from 'drizzle-orm/sqlite-core'\nimport { relations } from 'drizzle-orm'`;
|
|
1709
|
+
const imports = `import { ${`sqliteTable, text, integer, real${result.tables.some((t$1) => t$1.checks?.length) ? ", check, sql" : ""}`} } from 'drizzle-orm/sqlite-core'\nimport { relations } from 'drizzle-orm'`;
|
|
1583
1710
|
const tableDefs = result.tables.map(generateTableDefinition).join("\n\n");
|
|
1584
1711
|
const relationDefs = generateRelationDefinitions(result.relations, result.reverseRelations);
|
|
1585
1712
|
const parts = [
|
|
@@ -1593,4 +1720,4 @@ function generateDrizzleSchema(result) {
|
|
|
1593
1720
|
}
|
|
1594
1721
|
|
|
1595
1722
|
//#endregion
|
|
1596
|
-
export {
|
|
1723
|
+
export { package_default as A, DEFAULT_PROMPT_CONFIG as C, seedConfig as D, createConfig as E, description as O, DEFAULT_MINERU_CONFIG as S, PLACEHOLDER_TEXT as T, doctorDiagnosticsSeverityRows as _, recognizeImageText as a, DEFAULT_LITEPARSE_CONFIG as b, t as c, writeAIConfig as d, AIConfigSchema as f, buildDoctorDiagnostics as g, toSnakeCase as h, generateDrizzleConfig as i, version as j, name as k, getDefaultAIConfig as l, parseJsonSchema as m, collectDoctorDiagnostics as n, shouldUseImageOcrFallback as o, JsonSchemaDefinitionSchema as p, createMigrationConfig as r, initI18n as s, generateDrizzleSchema as t, readAIConfig as u, doctorDiagnosticsTableRows as v, PLACEHOLDER_SCHEMA as w, DEFAULT_MINERU_API_CONFIG as x, formatDoctorDiagnosticsJson as y };
|