aiex-cli 0.0.7-beta.1 → 0.1.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { C as PLACEHOLDER_TEXT, D as name, E as description, O as package_default, S as PLACEHOLDER_SCHEMA, T as seedConfig, _ as doctorDiagnosticsTableRows, a as recognizeImageText, b as DEFAULT_MINERU_CONFIG, c as t, d as writeAIConfig, f as AIConfigSchema, h as toSnakeCase, k as version, l as getDefaultAIConfig, m as parseJsonSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as JsonSchemaDefinitionSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as readAIConfig, v as formatDoctorDiagnosticsJson, w as createConfig, x as DEFAULT_PROMPT_CONFIG, y as DEFAULT_MINERU_API_CONFIG } from "./generate-drizzle-schema-D0o_j12G.mjs";
1
+ import { A as package_default, C as DEFAULT_PROMPT_CONFIG, D as seedConfig, E as createConfig, O as description, S as DEFAULT_MINERU_CONFIG, T as PLACEHOLDER_TEXT, _ as doctorDiagnosticsSeverityRows, a as recognizeImageText, b as DEFAULT_LITEPARSE_CONFIG, c as t, d as writeAIConfig, f as AIConfigSchema, h as toSnakeCase, j as version, k as name, l as getDefaultAIConfig, m as parseJsonSchema, n as collectDoctorDiagnostics, o as shouldUseImageOcrFallback, p as JsonSchemaDefinitionSchema, r as createMigrationConfig, s as initI18n, t as generateDrizzleSchema, u as readAIConfig, v as doctorDiagnosticsTableRows, w as PLACEHOLDER_SCHEMA, x as DEFAULT_MINERU_API_CONFIG, y as formatDoctorDiagnosticsJson } from "./generate-drizzle-schema-BAMq_Ufp.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -128,6 +128,16 @@ const doctorCommand = defineCommand({
128
128
  });
129
129
  table.push(...doctorDiagnosticsTableRows(diagnostics));
130
130
  process.stdout.write(`${table.toString()}\n`);
131
+ const severityRows = doctorDiagnosticsSeverityRows(diagnostics);
132
+ if (severityRows.length) {
133
+ const summary = new CliTable3({
134
+ head: ["status", "diagnostic"],
135
+ colAligns: ["right", "left"],
136
+ style: { compact: true }
137
+ });
138
+ summary.push(...severityRows);
139
+ process.stdout.write(`${summary.toString()}\n`);
140
+ }
131
141
  } catch (err) {
132
142
  consola.error(t("command.doctor.diagnosticsFailed", { error: err }));
133
143
  }
@@ -609,6 +619,66 @@ var ExternalCommandPdfConverter = class {
609
619
  }
610
620
  };
611
621
 
622
+ //#endregion
623
+ //#region src/infrastructure/pdf/liteparse-converter.ts
624
+ const TESSERACT_FAILURE_RE = /tesseract|tessdata|traineddata|language/i;
625
+ function textFromPages(pages = []) {
626
+ return pages.map((page) => {
627
+ if (typeof page.text === "string") return page.text;
628
+ return page.textItems?.map((item) => item.text).filter(Boolean).join("\n") ?? "";
629
+ }).filter(Boolean).join("\n\n");
630
+ }
631
+ function hasBoundingBoxes(pages = []) {
632
+ return pages.some((page) => page.textItems?.some((item) => typeof item.x === "number" && typeof item.y === "number" && typeof item.width === "number" && typeof item.height === "number"));
633
+ }
634
+ async function loadLiteParse() {
635
+ try {
636
+ return (await import("@llamaindex/liteparse")).LiteParse;
637
+ } catch (error) {
638
+ const details = error instanceof Error ? error.message : String(error);
639
+ throw new Error(`LiteParse is selected but @llamaindex/liteparse is not available. Install optional dependencies or switch the PDF converter to unpdf, mineru, mineru_api, or external. ${details}`);
640
+ }
641
+ }
642
+ function formatLiteparseError(error, config) {
643
+ const message = error instanceof Error ? error.message : String(error);
644
+ if (!config.ocrEnabled) return new Error(message);
645
+ if (!TESSERACT_FAILURE_RE.test(message)) return new Error(message);
646
+ return /* @__PURE__ */ new Error(`LiteParse OCR is enabled but Tesseract language data could not be loaded. Install the traineddata file for "${config.ocrLanguage ?? DEFAULT_LITEPARSE_CONFIG.ocrLanguage}" and set pdf.liteparse.tessdataPath to the directory that contains it, or disable pdf.liteparse.ocrEnabled. Original error: ${message}`);
647
+ }
648
+ var LiteparsePdfConverter = class {
649
+ name = "liteparse";
650
+ constructor(config = DEFAULT_LITEPARSE_CONFIG) {
651
+ this.config = config;
652
+ }
653
+ async convert(input, filePath) {
654
+ const LiteParse = await loadLiteParse();
655
+ const config = {
656
+ ...DEFAULT_LITEPARSE_CONFIG,
657
+ ...this.config
658
+ };
659
+ const result = await new LiteParse({
660
+ ocrEnabled: config.ocrEnabled,
661
+ ocrLanguage: config.ocrLanguage,
662
+ ocrServerUrl: config.ocrServerUrl,
663
+ tessdataPath: config.tessdataPath,
664
+ quiet: true
665
+ }).parse(filePath ?? input).catch((error) => {
666
+ throw formatLiteparseError(error, config);
667
+ });
668
+ const pages = Array.isArray(result.pages) ? result.pages : [];
669
+ return {
670
+ text: typeof result.text === "string" ? result.text : textFromPages(pages),
671
+ pageCount: pages.length,
672
+ metadata: {
673
+ converter: this.name,
674
+ ocrEnabled: String(config.ocrEnabled ?? false),
675
+ ...config.ocrLanguage ? { ocrLanguage: config.ocrLanguage } : {},
676
+ hasBoundingBoxes: String(hasBoundingBoxes(pages))
677
+ }
678
+ };
679
+ }
680
+ };
681
+
612
682
  //#endregion
613
683
  //#region src/infrastructure/pdf/mineru-api-converter.ts
614
684
  const TRAILING_SLASH_REGEXP = /\/+$/;
@@ -770,6 +840,7 @@ function withFallback(converter, config) {
770
840
  }
771
841
  function createPdfConverter(config) {
772
842
  if (typeof config === "object") {
843
+ if (config.converter === "liteparse") return new LiteparsePdfConverter(config.liteparse ?? DEFAULT_LITEPARSE_CONFIG);
773
844
  if (config.converter === "mineru") {
774
845
  const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
775
846
  return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
@@ -783,8 +854,9 @@ function createPdfConverter(config) {
783
854
  const key = typeof config === "string" ? config : "unpdf";
784
855
  let instance = registry$1.get(key);
785
856
  if (!instance) {
786
- if (key !== "unpdf") throw new Error(t("errors.pdf.converterRequiresConfig", { name: key }));
787
- instance = new UnpdfConverter();
857
+ if (key === "liteparse") instance = new LiteparsePdfConverter();
858
+ else if (key === "unpdf") instance = new UnpdfConverter();
859
+ else throw new Error(t("errors.pdf.converterRequiresConfig", { name: key }));
788
860
  registry$1.set(key, instance);
789
861
  }
790
862
  return instance;
@@ -1500,11 +1572,23 @@ function propertyToDescription(name$1, prop, indent = "") {
1500
1572
  const lines = [];
1501
1573
  let typeStr = prop.type;
1502
1574
  if (prop.type === "array" && prop.items) typeStr = `array of ${prop.items.type}`;
1503
- lines.push(`${indent}- ${name$1}: ${typeStr}`);
1575
+ const tags = [];
1576
+ if (prop.primary) tags.push("primary key");
1577
+ const tagStr = tags.length > 0 ? ` (${tags.join(", ")})` : "";
1578
+ lines.push(`${indent}- ${name$1}: ${typeStr}${tagStr}`);
1579
+ if (prop.description) lines.push(`${indent} description: ${prop.description}`);
1580
+ if (prop.enum && prop.enum.length > 0) lines.push(`${indent} allowed values: ${prop.enum.map((v) => JSON.stringify(v)).join(", ")}`);
1581
+ if (prop.pattern) lines.push(`${indent} pattern: ${prop.pattern}`);
1504
1582
  if (prop.minLength !== void 0 || prop.maxLength !== void 0) lines.push(`${indent} length: ${prop.minLength ?? 0} - ${prop.maxLength ?? "unlimited"}`);
1583
+ if (prop.minimum !== void 0 || prop.maximum !== void 0) lines.push(`${indent} range: ${prop.minimum ?? "-∞"} - ${prop.maximum ?? "∞"}`);
1505
1584
  if (prop.format) lines.push(`${indent} format: ${prop.format}`);
1506
1585
  if (prop.unique) lines.push(`${indent} unique: true`);
1507
1586
  if (prop.default !== void 0) lines.push(`${indent} default: ${JSON.stringify(prop.default)}`);
1587
+ if (prop.examples && prop.examples.length > 0) {
1588
+ const rendered = prop.examples.map((v) => JSON.stringify(v)).join(", ");
1589
+ lines.push(`${indent} examples: ${rendered}`);
1590
+ }
1591
+ if (prop.xPrompt) lines.push(`${indent} extraction hint: ${prop.xPrompt}`);
1508
1592
  return lines.join("\n");
1509
1593
  }
1510
1594
  function nestedPropertyToDescription(name$1, prop, indent = "") {
@@ -2084,13 +2168,9 @@ Please output the corrected JSON object now:`;
2084
2168
 
2085
2169
  //#endregion
2086
2170
  //#region src/infrastructure/extraction/insert-extracted-data.ts
2087
- const DRIZZLE_MODE_RE = /mode:\s*'(\w+)'/;
2088
- function extractDrizzleMode(column) {
2089
- return column.drizzleType.match(DRIZZLE_MODE_RE)?.[1];
2090
- }
2091
2171
  function convertValue(value, column) {
2092
2172
  if (value === null || value === void 0) return null;
2093
- const mode = extractDrizzleMode(column);
2173
+ const mode = column.columnType.class !== "real" ? column.columnType.mode : void 0;
2094
2174
  if (mode === "json") return typeof value === "string" ? value : JSON.stringify(value);
2095
2175
  if (mode === "boolean") return value ? 1 : 0;
2096
2176
  if (mode === "timestamp" || mode === "timestamp_ms") {
@@ -2110,9 +2190,9 @@ function buildInsertSql(table, data) {
2110
2190
  if (col.isAutoIncrement) continue;
2111
2191
  const value = data[col.name];
2112
2192
  if (value === void 0) {
2113
- if (col.defaultValue !== void 0) {
2193
+ if (col.default !== void 0) {
2114
2194
  columns.push(col.name);
2115
- values.push(convertValue(JSON.parse(col.defaultValue), col));
2195
+ values.push(convertValue(col.default, col));
2116
2196
  }
2117
2197
  continue;
2118
2198
  }
@@ -3053,9 +3133,9 @@ function resolveTsxPath() {
3053
3133
  }
3054
3134
  function resolveHelperPath() {
3055
3135
  try {
3056
- return path.join(resolvePackageRoot(), "src/core/schema-sqlite/migrate-helper.ts");
3136
+ return path.join(resolvePackageRoot(), "src/infrastructure/schema/migrate-helper.ts");
3057
3137
  } catch {
3058
- return path.join(__dirname, "../../core/schema-sqlite/migrate-helper.ts");
3138
+ return path.join(__dirname, "../schema/migrate-helper.ts");
3059
3139
  }
3060
3140
  }
3061
3141
 
@@ -11,7 +11,7 @@ import { z } from "zod";
11
11
 
12
12
  //#region package.json
13
13
  var name = "aiex-cli";
14
- var version = "0.0.7-beta.1";
14
+ var version = "0.1.0-beta.1";
15
15
  var description = "JSON Schema → SQLite with AI-powered data extraction";
16
16
  var package_default = {
17
17
  name,
@@ -42,7 +42,6 @@ var package_default = {
42
42
  exports: {
43
43
  ".": "./dist/index.mjs",
44
44
  "./cli": "./dist/cli.mjs",
45
- "./core/schema-sqlite/migrate-helper": "./dist/core/schema-sqlite/migrate-helper.mjs",
46
45
  "./package.json": "./package.json"
47
46
  },
48
47
  main: "./dist/index.mjs",
@@ -55,8 +54,8 @@ var package_default = {
55
54
  files: [
56
55
  "bin",
57
56
  "dist",
58
- "src/core/schema-sqlite/migrate-helper.ts",
59
- "src/core/schema-sqlite/migration-name.ts"
57
+ "src/infrastructure/schema/migrate-helper.ts",
58
+ "src/infrastructure/schema/migration-name.ts"
60
59
  ],
61
60
  scripts: {
62
61
  "build": "tsdown && pnpm --filter aiex-web build",
@@ -64,6 +63,7 @@ var package_default = {
64
63
  "start": "tsx src/index.ts",
65
64
  "test": "vitest",
66
65
  "coverage": "vitest --coverage",
66
+ "smoke:package": "node scripts/package-smoke.mjs",
67
67
  "typecheck": "tsc",
68
68
  "lint": "eslint .",
69
69
  "prepack": "cp ../../README.md . && node scripts/generate-completions.mjs",
@@ -111,7 +111,10 @@ var package_default = {
111
111
  "xlsx": "catalog:",
112
112
  "zod": "catalog:"
113
113
  },
114
- optionalDependencies: { "@napi-rs/system-ocr": "catalog:" },
114
+ optionalDependencies: {
115
+ "@llamaindex/liteparse": "catalog:",
116
+ "@napi-rs/system-ocr": "catalog:"
117
+ },
115
118
  devDependencies: {
116
119
  "@antfu/eslint-config": "catalog:cli",
117
120
  "@antfu/ni": "catalog:cli",
@@ -146,6 +149,13 @@ function seedConfig(config = createConfig()) {
146
149
 
147
150
  //#endregion
148
151
  //#region src/domain/ai/types.ts
152
+ const PDF_CONVERTER_KINDS = [
153
+ "unpdf",
154
+ "liteparse",
155
+ "mineru",
156
+ "mineru_api",
157
+ "external"
158
+ ];
149
159
  const PLACEHOLDER_SCHEMA = "{schema}";
150
160
  const PLACEHOLDER_TEXT = "{text}";
151
161
  const DEFAULT_MODELS = [{
@@ -192,6 +202,10 @@ const DEFAULT_MINERU_CONFIG = {
192
202
  timeout: 600,
193
203
  fallbackToUnpdf: true
194
204
  };
205
+ const DEFAULT_LITEPARSE_CONFIG = {
206
+ ocrEnabled: false,
207
+ ocrLanguage: "eng"
208
+ };
195
209
  const DEFAULT_MINERU_API_CONFIG = {
196
210
  token: "",
197
211
  baseURL: "https://mineru.net/api/v4",
@@ -202,6 +216,7 @@ const DEFAULT_MINERU_API_CONFIG = {
202
216
  };
203
217
  const DEFAULT_PDF_CONFIG = {
204
218
  converter: "unpdf",
219
+ liteparse: DEFAULT_LITEPARSE_CONFIG,
205
220
  mineru: DEFAULT_MINERU_CONFIG,
206
221
  mineruApi: DEFAULT_MINERU_API_CONFIG
207
222
  };
@@ -290,63 +305,112 @@ function doctorDiagnosticsTableRows(d) {
290
305
  for (const err of p.errors) rows.push(["error", err]);
291
306
  return rows;
292
307
  }
308
+ function doctorDiagnosticsSeverityRows(d) {
309
+ const rows = [];
310
+ const p = d.project;
311
+ rows.push([p.dirExists ? "ok" : "warn", p.dirExists ? "Project directory exists" : "Project directory is not initialized"]);
312
+ rows.push([p.aiConfig ? "ok" : "warn", p.aiConfig ? "AI config exists" : "AI config is missing"]);
313
+ if (p.aiConfig) rows.push([p.aiApiKeySet ? "ok" : "warn", p.aiApiKeySet ? "AI API key is set" : "AI API key is empty"]);
314
+ if (p.aiConnectionOk !== null) rows.push([p.aiConnectionOk ? "ok" : "warn", p.aiConnectionOk ? "AI provider connection succeeded" : "AI provider connection failed"]);
315
+ if (p.pdfConverterOk !== null) rows.push([p.pdfConverterOk ? "ok" : "error", p.pdfConverterOk ? `PDF converter is available: ${p.pdfConverter ?? "none"}` : `PDF converter is unavailable: ${p.pdfConverter ?? "none"}${p.pdfConverterError ? ` (${p.pdfConverterError})` : ""}`]);
316
+ else if (p.pdfConverterError) rows.push(["warn", p.pdfConverterError]);
317
+ if (d.imageOcr.ocrOk !== null) rows.push([d.imageOcr.ocrOk ? "ok" : "warn", d.imageOcr.ocrOk ? "Image OCR self-check passed" : `Image OCR self-check failed${d.imageOcr.error ? `: ${d.imageOcr.error}` : ""}`]);
318
+ if (p.databaseTablesOk !== null) rows.push([p.databaseTablesOk ? "ok" : "error", p.databaseTablesOk ? "Database tables match schemas" : `Database tables are missing: ${p.missingDatabaseTables.join(", ") || "unknown"}`]);
319
+ for (const invalid of p.invalidSchemas) rows.push(["error", `Invalid schema ${invalid.file}: ${invalid.error}`]);
320
+ for (const err of p.errors) rows.push(["error", err]);
321
+ return rows;
322
+ }
293
323
 
294
324
  //#endregion
295
325
  //#region src/domain/schema/parser.ts
296
326
  function toSnakeCase(str) {
297
327
  return str.replace(/[A-Z]/g, (letter) => `_${letter.toLowerCase()}`);
298
328
  }
299
- function mapPropertyToColumn(name$1, property, isRequired) {
300
- const snakeName = toSnakeCase(name$1);
301
- let drizzleType;
302
- const isPrimary = property.primary ?? false;
303
- const isAutoIncrement = property.autoIncrement ?? false;
329
+ function mapColumnType(property) {
304
330
  switch (property.type) {
305
331
  case "string": {
306
332
  const format = property.format;
307
- if (format === "date-time" || property.drizzle?.mode === "timestamp") drizzleType = `integer({ mode: 'timestamp' })`;
308
- else if (format === "json" || property.drizzle?.mode === "json") drizzleType = `text({ mode: 'json' })`;
309
- else drizzleType = "text()";
310
- break;
333
+ if (format === "date-time" || property.drizzle?.mode === "timestamp") return {
334
+ class: "integer",
335
+ mode: "timestamp"
336
+ };
337
+ if (format === "json" || property.drizzle?.mode === "json") return {
338
+ class: "text",
339
+ mode: "json"
340
+ };
341
+ return { class: "text" };
311
342
  }
312
343
  case "integer": {
313
344
  const mode = property.drizzle?.mode;
314
- if (mode === "boolean") drizzleType = `integer({ mode: 'boolean' })`;
315
- else if (mode === "timestamp" || mode === "timestamp_ms") drizzleType = `integer({ mode: '${mode}' })`;
316
- else if (mode === "bigint") drizzleType = `integer({ mode: 'bigint' })`;
317
- else drizzleType = "integer()";
318
- break;
345
+ if (mode === "boolean" || mode === "timestamp" || mode === "timestamp_ms" || mode === "bigint") return {
346
+ class: "integer",
347
+ mode
348
+ };
349
+ return { class: "integer" };
319
350
  }
320
- case "number":
321
- drizzleType = "real()";
322
- break;
323
- case "boolean":
324
- drizzleType = `integer({ mode: 'boolean' })`;
325
- break;
351
+ case "number": return { class: "real" };
352
+ case "boolean": return {
353
+ class: "integer",
354
+ mode: "boolean"
355
+ };
326
356
  case "object":
327
- case "array":
328
- drizzleType = `text({ mode: 'json' })`;
329
- break;
357
+ case "array": return {
358
+ class: "text",
359
+ mode: "json"
360
+ };
330
361
  case "null":
331
- drizzleType = "text()";
332
- break;
333
- default: drizzleType = "text()";
362
+ default: return { class: "text" };
334
363
  }
364
+ }
365
+ function mapPropertyToColumn(name$1, property, isRequired) {
335
366
  return {
336
- name: snakeName,
337
- drizzleType,
338
- isPrimary,
339
- isAutoIncrement,
340
- isNullable: !isRequired && !isPrimary,
367
+ name: toSnakeCase(name$1),
368
+ columnType: mapColumnType(property),
369
+ isPrimary: property.primary ?? false,
370
+ isAutoIncrement: property.autoIncrement ?? false,
371
+ isNullable: !isRequired && !property.primary,
341
372
  isUnique: property.unique ?? false,
342
- defaultValue: property.default !== void 0 ? JSON.stringify(property.default) : void 0,
373
+ default: property.default,
343
374
  isForeignKey: property.foreignKey !== void 0,
344
375
  foreignKeyRef: property.foreignKey ?? void 0
345
376
  };
346
377
  }
378
+ function getColumnChecks(prop, colName) {
379
+ const checks = [];
380
+ if (prop.type === "string") {
381
+ if (prop.minLength !== void 0 && prop.minLength > 0) checks.push({
382
+ name: `${colName}_min_length`,
383
+ column: colName,
384
+ kind: "min_length",
385
+ value: prop.minLength
386
+ });
387
+ if (prop.maxLength !== void 0) checks.push({
388
+ name: `${colName}_max_length`,
389
+ column: colName,
390
+ kind: "max_length",
391
+ value: prop.maxLength
392
+ });
393
+ }
394
+ if (prop.type === "integer" || prop.type === "number") {
395
+ if (prop.minimum !== void 0) checks.push({
396
+ name: `${colName}_min`,
397
+ column: colName,
398
+ kind: "min_value",
399
+ value: prop.minimum
400
+ });
401
+ if (prop.maximum !== void 0) checks.push({
402
+ name: `${colName}_max`,
403
+ column: colName,
404
+ kind: "max_value",
405
+ value: prop.maximum
406
+ });
407
+ }
408
+ return checks;
409
+ }
347
410
  function parseObjectToTable(schema, _warnings) {
348
411
  const tableName = schema.table.name;
349
412
  const columns = [];
413
+ const checks = [];
350
414
  const requiredFields = new Set(schema.required ?? []);
351
415
  const autoColumns = /* @__PURE__ */ new Set();
352
416
  if (schema.table.timestamps) {
@@ -361,37 +425,42 @@ function parseObjectToTable(schema, _warnings) {
361
425
  if (autoColumns.has(snakeName)) continue;
362
426
  const column = mapPropertyToColumn(propName, prop, requiredFields.has(propName));
363
427
  columns.push(column);
428
+ checks.push(...getColumnChecks(prop, column.name));
364
429
  }
365
430
  if (schema.table.timestamps) {
366
- columns.push({
431
+ const tsCol = {
367
432
  name: "created_at",
368
- drizzleType: `integer({ mode: 'timestamp' })`,
433
+ columnType: {
434
+ class: "integer",
435
+ mode: "timestamp"
436
+ },
369
437
  isPrimary: false,
370
438
  isAutoIncrement: false,
371
439
  isNullable: false,
372
- isUnique: false,
373
- defaultValue: void 0
374
- });
440
+ isUnique: false
441
+ };
442
+ columns.push(tsCol);
375
443
  columns.push({
376
- name: "updated_at",
377
- drizzleType: `integer({ mode: 'timestamp' })`,
378
- isPrimary: false,
379
- isAutoIncrement: false,
380
- isNullable: false,
381
- isUnique: false,
382
- defaultValue: void 0
444
+ ...tsCol,
445
+ name: "updated_at"
383
446
  });
384
447
  }
385
448
  if (schema.table.softDelete) columns.push({
386
449
  name: "deleted_at",
387
- drizzleType: `integer({ mode: 'timestamp' })`,
450
+ columnType: {
451
+ class: "integer",
452
+ mode: "timestamp"
453
+ },
388
454
  isPrimary: false,
389
455
  isAutoIncrement: false,
390
456
  isNullable: true,
391
- isUnique: false,
392
- defaultValue: void 0
457
+ isUnique: false
393
458
  });
394
- return {
459
+ return checks.length > 0 ? {
460
+ name: tableName,
461
+ columns,
462
+ checks
463
+ } : {
395
464
  name: tableName,
396
465
  columns
397
466
  };
@@ -399,24 +468,23 @@ function parseObjectToTable(schema, _warnings) {
399
468
  function parseNestedObject(propName, property, parentTableName, warnings) {
400
469
  const nestedTableName = `${parentTableName}_${toSnakeCase(propName)}`;
401
470
  const columns = [];
471
+ const checks = [];
402
472
  const relationType = property.nested?.relation === "has-many" ? "has-many" : "has-one";
403
473
  columns.push({
404
474
  name: "id",
405
- drizzleType: "integer()",
475
+ columnType: { class: "integer" },
406
476
  isPrimary: true,
407
477
  isAutoIncrement: true,
408
478
  isNullable: false,
409
- isUnique: false,
410
- defaultValue: void 0
479
+ isUnique: false
411
480
  });
412
481
  columns.push({
413
482
  name: `${parentTableName}_id`,
414
- drizzleType: "integer()",
483
+ columnType: { class: "integer" },
415
484
  isPrimary: false,
416
485
  isAutoIncrement: false,
417
486
  isNullable: false,
418
487
  isUnique: false,
419
- defaultValue: void 0,
420
488
  isForeignKey: true,
421
489
  foreignKeyRef: {
422
490
  table: parentTableName,
@@ -430,6 +498,7 @@ function parseNestedObject(propName, property, parentTableName, warnings) {
430
498
  }
431
499
  const column = mapPropertyToColumn(childName, childProp, false);
432
500
  columns.push(column);
501
+ checks.push(...getColumnChecks(childProp, column.name));
433
502
  }
434
503
  const relation = {
435
504
  fromTable: nestedTableName,
@@ -445,7 +514,11 @@ function parseNestedObject(propName, property, parentTableName, warnings) {
445
514
  name: toSnakeCase(propName)
446
515
  };
447
516
  return {
448
- table: {
517
+ table: checks.length > 0 ? {
518
+ name: nestedTableName,
519
+ columns,
520
+ checks
521
+ } : {
449
522
  name: nestedTableName,
450
523
  columns
451
524
  },
@@ -512,6 +585,8 @@ const JsonSchemaPropertySchema = z.lazy(() => z.object({
512
585
  "null"
513
586
  ]),
514
587
  format: z.string().optional(),
588
+ pattern: z.string().optional(),
589
+ enum: z.array(z.union([z.string(), z.number()])).optional(),
515
590
  primary: z.boolean().optional(),
516
591
  autoIncrement: z.boolean().optional(),
517
592
  unique: z.boolean().optional(),
@@ -520,6 +595,8 @@ const JsonSchemaPropertySchema = z.lazy(() => z.object({
520
595
  minLength: z.number().int().nonnegative().optional(),
521
596
  minimum: z.number().optional(),
522
597
  maximum: z.number().optional(),
598
+ examples: z.array(z.unknown()).optional(),
599
+ xPrompt: z.string().optional(),
523
600
  drizzle: DrizzleExtensionSchema,
524
601
  nested: NestedConfigSchema.optional(),
525
602
  foreignKey: ForeignKeyRefSchema.optional(),
@@ -606,6 +683,15 @@ const MineruApiPdfConverterConfigSchema = z.object({
606
683
  enableFormula: z.boolean().optional(),
607
684
  enableTable: z.boolean().optional()
608
685
  });
686
+ const LiteparsePdfConverterConfigSchema = z.object({
687
+ ocrEnabled: z.boolean().default(false),
688
+ ocrLanguage: z.string().min(1).default("eng"),
689
+ tessdataPath: z.string().min(1).optional(),
690
+ ocrServerUrl: z.string().url().refine((value) => {
691
+ const url = new URL(value);
692
+ return url.protocol === "http:" || url.protocol === "https:";
693
+ }, { message: "ocrServerUrl must use http or https" }).optional()
694
+ });
609
695
  const PdfConfigSchema = z.preprocess((value) => {
610
696
  if (!value || typeof value !== "object") return value;
611
697
  const config = { ...value };
@@ -620,12 +706,8 @@ const PdfConfigSchema = z.preprocess((value) => {
620
706
  delete config.marker;
621
707
  return config;
622
708
  }, z.object({
623
- converter: z.enum([
624
- "unpdf",
625
- "mineru",
626
- "mineru_api",
627
- "external"
628
- ]),
709
+ converter: z.enum(PDF_CONVERTER_KINDS),
710
+ liteparse: LiteparsePdfConverterConfigSchema.optional(),
629
711
  mineru: ExternalPdfConverterConfigSchema.optional(),
630
712
  mineruApi: MineruApiPdfConverterConfigSchema.optional(),
631
713
  external: ExternalPdfConverterConfigSchema.optional()
@@ -1137,7 +1219,9 @@ const en = {
1137
1219
  pdf: {
1138
1220
  converterOptions: {
1139
1221
  unpdf: "Built-in text extraction (unpdf)",
1222
+ liteparse: "Built-in layout parsing (liteparse)",
1140
1223
  mineru: "MinerU (mineru)",
1224
+ mineru_api: "MinerU API (mineru_api)",
1141
1225
  external: "Custom External Command"
1142
1226
  },
1143
1227
  ocrFallbackOptions: { localAuto: "Vision model or local OCR" }
@@ -1211,7 +1295,7 @@ async function initI18n(lng) {
1211
1295
  fallbackLng: "en",
1212
1296
  resources: {
1213
1297
  "en": { translation: en },
1214
- "zh-CN": { translation: await import("./zh-CN-wEUNhuHM.mjs").then((m) => m.zhCN) }
1298
+ "zh-CN": { translation: await import("./zh-CN-B2yrInX9.mjs").then((m) => m.zhCN) }
1215
1299
  },
1216
1300
  interpolation: { escapeValue: false },
1217
1301
  returnNull: false
@@ -1372,6 +1456,14 @@ async function commandAvailable(command) {
1372
1456
  return false;
1373
1457
  }
1374
1458
  }
1459
+ async function liteparseAvailable() {
1460
+ try {
1461
+ await import("@llamaindex/liteparse");
1462
+ return true;
1463
+ } catch {
1464
+ return false;
1465
+ }
1466
+ }
1375
1467
  async function findImageOcrSelfCheckLogo() {
1376
1468
  const candidates = [
1377
1469
  path.resolve(MODULE_DIR, "logo.png"),
@@ -1426,7 +1518,11 @@ async function collectDoctorDiagnostics(options = {}) {
1426
1518
  aiConnectionOk = await checkConnection(cfg.provider.baseURL);
1427
1519
  pdfConverter = cfg.pdf?.converter ?? "unpdf";
1428
1520
  if (pdfConverter === "unpdf") pdfConverterOk = true;
1429
- else if (pdfConverter === "mineru") {
1521
+ else if (pdfConverter === "liteparse") {
1522
+ pdfConverterOk = await liteparseAvailable();
1523
+ if (!pdfConverterOk) pdfConverterError = "@llamaindex/liteparse optional dependency is not installed or cannot be loaded";
1524
+ else if (cfg.pdf?.liteparse?.ocrEnabled && !cfg.pdf.liteparse.tessdataPath) pdfConverterError = "LiteParse OCR is enabled. If OCR fails, install Tesseract traineddata and configure pdf.liteparse.tessdataPath.";
1525
+ } else if (pdfConverter === "mineru") {
1430
1526
  const command = cfg.pdf?.mineru?.command ?? DEFAULT_MINERU_CONFIG.command;
1431
1527
  pdfConverterOk = await commandAvailable(command);
1432
1528
  if (!pdfConverterOk) pdfConverterError = `Command not found: ${command}`;
@@ -1531,19 +1627,50 @@ async function collectDoctorDiagnostics(options = {}) {
1531
1627
 
1532
1628
  //#endregion
1533
1629
  //#region src/infrastructure/schema/generate-drizzle-schema.ts
1630
+ function renderColumnType(ct) {
1631
+ switch (ct.class) {
1632
+ case "text": return ct.mode === "json" ? `text({ mode: 'json' })` : "text()";
1633
+ case "integer": return ct.mode ? `integer({ mode: '${ct.mode}' })` : "integer()";
1634
+ case "real": return "real()";
1635
+ }
1636
+ }
1637
+ function renderDefaultValue(value) {
1638
+ return JSON.stringify(value);
1639
+ }
1534
1640
  function generateColumnDefinition(column) {
1535
1641
  if (column.isPrimary && column.isAutoIncrement) return ` ${column.name}: integer().primaryKey({ autoIncrement: true })`;
1536
- let def = ` ${column.name}: ${column.drizzleType}`;
1642
+ let def = ` ${column.name}: ${renderColumnType(column.columnType)}`;
1537
1643
  if (column.isPrimary) def += ".primaryKey()";
1538
1644
  if (!column.isNullable && !column.isPrimary) def += ".notNull()";
1539
1645
  if (column.isUnique && !column.isPrimary) def += ".unique()";
1540
- if (column.defaultValue !== void 0) def += `.default(${column.defaultValue})`;
1646
+ if (column.default !== void 0) def += `.default(${renderDefaultValue(column.default)})`;
1541
1647
  if (column.isForeignKey && column.foreignKeyRef) def += `.references(() => ${column.foreignKeyRef.table}.${column.foreignKeyRef.column})`;
1542
1648
  return def;
1543
1649
  }
1650
+ function renderCheckToDrizzle(check, tableVar) {
1651
+ const colRef = `\${${tableVar}.${check.column}}`;
1652
+ let expr;
1653
+ switch (check.kind) {
1654
+ case "min_length":
1655
+ expr = `length(${colRef}) >= ${check.value}`;
1656
+ break;
1657
+ case "max_length":
1658
+ expr = `length(${colRef}) <= ${check.value}`;
1659
+ break;
1660
+ case "min_value":
1661
+ expr = `${colRef} >= ${check.value}`;
1662
+ break;
1663
+ case "max_value":
1664
+ expr = `${colRef} <= ${check.value}`;
1665
+ break;
1666
+ }
1667
+ return ` ${check.name}: check('${check.name}', sql\`${expr}\`)`;
1668
+ }
1544
1669
  function generateTableDefinition(table) {
1545
1670
  const columns = table.columns.map(generateColumnDefinition);
1546
- return `export const ${table.name} = sqliteTable('${table.name}', {\n${columns.join(",\n")}\n})`;
1671
+ if (!table.checks?.length) return `export const ${table.name} = sqliteTable('${table.name}', {\n${columns.join(",\n")}\n})`;
1672
+ const checkLines = table.checks.map((c) => renderCheckToDrizzle(c, "table"));
1673
+ return `export const ${table.name} = sqliteTable('${table.name}', {\n${columns.join(",\n")}\n}, (table) => ({\n${checkLines.join(",\n")}\n}))`;
1547
1674
  }
1548
1675
  function generateRelationDefinitions(relations, reverseRelations) {
1549
1676
  if (relations.length === 0 && reverseRelations.length === 0) return "";
@@ -1579,7 +1706,7 @@ function generateRelationDefinitions(relations, reverseRelations) {
1579
1706
  return definitions.join("\n\n");
1580
1707
  }
1581
1708
  function generateDrizzleSchema(result) {
1582
- const imports = `import { sqliteTable, text, integer, real } from 'drizzle-orm/sqlite-core'\nimport { relations } from 'drizzle-orm'`;
1709
+ const imports = `import { ${`sqliteTable, text, integer, real${result.tables.some((t$1) => t$1.checks?.length) ? ", check, sql" : ""}`} } from 'drizzle-orm/sqlite-core'\nimport { relations } from 'drizzle-orm'`;
1583
1710
  const tableDefs = result.tables.map(generateTableDefinition).join("\n\n");
1584
1711
  const relationDefs = generateRelationDefinitions(result.relations, result.reverseRelations);
1585
1712
  const parts = [
@@ -1593,4 +1720,4 @@ function generateDrizzleSchema(result) {
1593
1720
  }
1594
1721
 
1595
1722
  //#endregion
1596
- export { PLACEHOLDER_TEXT as C, name as D, description as E, package_default as O, PLACEHOLDER_SCHEMA as S, seedConfig as T, doctorDiagnosticsTableRows as _, recognizeImageText as a, DEFAULT_MINERU_CONFIG as b, t as c, writeAIConfig as d, AIConfigSchema as f, buildDoctorDiagnostics as g, toSnakeCase as h, generateDrizzleConfig as i, version as k, getDefaultAIConfig as l, parseJsonSchema as m, collectDoctorDiagnostics as n, shouldUseImageOcrFallback as o, JsonSchemaDefinitionSchema as p, createMigrationConfig as r, initI18n as s, generateDrizzleSchema as t, readAIConfig as u, formatDoctorDiagnosticsJson as v, createConfig as w, DEFAULT_PROMPT_CONFIG as x, DEFAULT_MINERU_API_CONFIG as y };
1723
+ export { package_default as A, DEFAULT_PROMPT_CONFIG as C, seedConfig as D, createConfig as E, description as O, DEFAULT_MINERU_CONFIG as S, PLACEHOLDER_TEXT as T, doctorDiagnosticsSeverityRows as _, recognizeImageText as a, DEFAULT_LITEPARSE_CONFIG as b, t as c, writeAIConfig as d, AIConfigSchema as f, buildDoctorDiagnostics as g, toSnakeCase as h, generateDrizzleConfig as i, version as j, name as k, getDefaultAIConfig as l, parseJsonSchema as m, collectDoctorDiagnostics as n, shouldUseImageOcrFallback as o, JsonSchemaDefinitionSchema as p, createMigrationConfig as r, initI18n as s, generateDrizzleSchema as t, readAIConfig as u, doctorDiagnosticsTableRows as v, PLACEHOLDER_SCHEMA as w, DEFAULT_MINERU_API_CONFIG as x, formatDoctorDiagnosticsJson as y };