aiex-cli 0.0.1 → 0.0.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/README.md +19 -2
  2. package/dist/cli.mjs +840 -35
  3. package/dist/{doctor-collector-CykRm0fC.mjs → doctor-collector-Cbqgwx__.mjs} +31 -4
  4. package/dist/index.mjs +1 -1
  5. package/dist/web/assets/AISettings-Cm8fAfaG.js +205 -0
  6. package/dist/web/assets/{DataBrowser-CwcTG80-.js → DataBrowser-DQwOvooY.js} +3 -3
  7. package/dist/web/assets/ExtractRunner-CzfLlCCf.js +1 -0
  8. package/dist/web/assets/{ExtractionViewer-CsdK1kKK.js → ExtractionViewer-D7tghmR-.js} +1 -1
  9. package/dist/web/assets/{JsonSchemaEditor-D477lV5a.js → JsonSchemaEditor-DXhYW-Un.js} +3 -3
  10. package/dist/web/assets/{api-client-D2Y_-4JM.js → api-client-CIC2X8_4.js} +1 -1
  11. package/dist/web/assets/baseinput-B7PU5-38.js +2 -0
  12. package/dist/web/assets/checkbox-CCQwMfLd.js +142 -0
  13. package/dist/web/assets/{cssMode-CPThwItX.js → cssMode-CCz1uj8b.js} +1 -1
  14. package/dist/web/assets/dialog-8xaIXBKO.js +108 -0
  15. package/dist/web/assets/{editor.main-BnOkwRFv.js → editor.main-DOue-nGf.js} +2 -2
  16. package/dist/web/assets/{freemarker2-DWDTYVJR.js → freemarker2-C87H7V8i.js} +1 -1
  17. package/dist/web/assets/{handlebars-D4DzjGQ7.js → handlebars-TLnoktZW.js} +1 -1
  18. package/dist/web/assets/{html-DnzhKSoD.js → html-Rewbtgcp.js} +1 -1
  19. package/dist/web/assets/{htmlMode-CR7UKfEH.js → htmlMode-Cr9y3YUi.js} +1 -1
  20. package/dist/web/assets/index-BEorm4a5.css +2 -0
  21. package/dist/web/assets/{index-DVDVw-GK.js → index-CWy8KajQ.js} +38 -38
  22. package/dist/web/assets/{javascript-D2srszZ8.js → javascript-BO8DPECx.js} +1 -1
  23. package/dist/web/assets/{jsonMode-B4jaPYEr.js → jsonMode-B78m_Y1l.js} +1 -1
  24. package/dist/web/assets/{liquid-CIT2Wl_l.js → liquid-CDp_8YUE.js} +1 -1
  25. package/dist/web/assets/{mdx-CWLaEOFy.js → mdx-CeR1GULE.js} +1 -1
  26. package/dist/web/assets/{monaco.contribution-DDv5ldfS.js → monaco.contribution-tCitzj1_.js} +2 -2
  27. package/dist/web/assets/{python-6CGfpCNq.js → python-DWmB1hQ6.js} +1 -1
  28. package/dist/web/assets/{razor-DEMMh3TD.js → razor-BVk762Lq.js} +1 -1
  29. package/dist/web/assets/runtime-dom.esm-bundler-DZwuSnld.js +1 -0
  30. package/dist/web/assets/select-DKPPF403.js +438 -0
  31. package/dist/web/assets/{tsMode-Cm1NtjPs.js → tsMode-Gjs5D1gt.js} +1 -1
  32. package/dist/web/assets/{typescript-BM9aPEFg.js → typescript-Cw6YtwrM.js} +1 -1
  33. package/dist/web/assets/utils-CZhxh2Ya.js +2 -0
  34. package/dist/web/assets/{xml-CoSbvcg5.js → xml-guZq0YZJ.js} +1 -1
  35. package/dist/web/assets/{yaml-56GOgy8k.js → yaml-B9TbU-LX.js} +1 -1
  36. package/dist/web/index.html +9 -8
  37. package/package.json +2 -1
  38. package/dist/web/assets/AISettings-CI6Lgx0p.js +0 -339
  39. package/dist/web/assets/dialog-CUkPLPNP.js +0 -109
  40. package/dist/web/assets/index-C9N8oWt4.css +0 -2
  41. package/dist/web/assets/runtime-dom.esm-bundler-ei_N7Xjw.js +0 -1
  42. package/dist/web/assets/select-BGex2SPs.js +0 -439
  43. /package/dist/web/assets/{abap-Bgec7Keq.js → abap-C3UM4cME.js} +0 -0
  44. /package/dist/web/assets/{apex-VBlPwEoQ.js → apex-BQBZvQmN.js} +0 -0
  45. /package/dist/web/assets/{azcli-DKqrEFBx.js → azcli-Dn9Awrok.js} +0 -0
  46. /package/dist/web/assets/{bat-DdgQWy_0.js → bat-JgEezSDo.js} +0 -0
  47. /package/dist/web/assets/{bicep-CRMM43EB.js → bicep-vcQeC7wE.js} +0 -0
  48. /package/dist/web/assets/{cameligo-UatALtML.js → cameligo-C0cr0T3L.js} +0 -0
  49. /package/dist/web/assets/{clojure-D8JU08RA.js → clojure-Brc1-rbW.js} +0 -0
  50. /package/dist/web/assets/{coffee-C56wu358.js → coffee-BiZYU83a.js} +0 -0
  51. /package/dist/web/assets/{cpp-CyZLvhJG.js → cpp-ikWHCInz.js} +0 -0
  52. /package/dist/web/assets/{csharp-BJl3ixva.js → csharp-BRWs_LfH.js} +0 -0
  53. /package/dist/web/assets/{csp-CxEKxmO-.js → csp-CcAumoJw.js} +0 -0
  54. /package/dist/web/assets/{css-B0t_muXd.js → css-BZm6paiA.js} +0 -0
  55. /package/dist/web/assets/{cypher-D1hqiMFD.js → cypher-CDQMONdb.js} +0 -0
  56. /package/dist/web/assets/{dart-Bz550Pyv.js → dart-Ci4SZdF1.js} +0 -0
  57. /package/dist/web/assets/{dist-9yHVMqQ0.js → dist-B5Yem2Jn.js} +0 -0
  58. /package/dist/web/assets/{dockerfile-CIXgVAuA.js → dockerfile-BV0tAr-M.js} +0 -0
  59. /package/dist/web/assets/{ecl-D9qbvZoA.js → ecl-CP7nM2KN.js} +0 -0
  60. /package/dist/web/assets/{editor.api-C8BHpRhn.js → editor.api-BU_q4v8i.js} +0 -0
  61. /package/dist/web/assets/{elixir-b2M38fAy.js → elixir-GcA6wFiI.js} +0 -0
  62. /package/dist/web/assets/{flow9-Dq1UYMkt.js → flow9-CIb9youF.js} +0 -0
  63. /package/dist/web/assets/{fsharp-BaeLhgfq.js → fsharp-BVaBE4co.js} +0 -0
  64. /package/dist/web/assets/{go-Bd-NFKIC.js → go-Bbqf306x.js} +0 -0
  65. /package/dist/web/assets/{graphql-DZVerJfy.js → graphql-DJPrC4l-.js} +0 -0
  66. /package/dist/web/assets/{hcl-CAVzrZfH.js → hcl-QyfWVWpM.js} +0 -0
  67. /package/dist/web/assets/{ini-CyXdX58t.js → ini-CgstZeS8.js} +0 -0
  68. /package/dist/web/assets/{java-B5pNgvhy.js → java-D4AG88ZY.js} +0 -0
  69. /package/dist/web/assets/{julia-XRhmV3AN.js → julia-CN8U9648.js} +0 -0
  70. /package/dist/web/assets/{kotlin-DOd3J5vr.js → kotlin-gNNgpJhY.js} +0 -0
  71. /package/dist/web/assets/{less-veZSnyw6.js → less-C3SY2L8t.js} +0 -0
  72. /package/dist/web/assets/{lexon-QWGkuK0H.js → lexon-CznnqzUX.js} +0 -0
  73. /package/dist/web/assets/{lua-CYGpjuO5.js → lua-f3xyJgy5.js} +0 -0
  74. /package/dist/web/assets/{m3-yNnrZkdc.js → m3-Cl7J89p-.js} +0 -0
  75. /package/dist/web/assets/{markdown-BCSWEPSX.js → markdown-Bv2fnzzT.js} +0 -0
  76. /package/dist/web/assets/{mips-OpYmcC30.js → mips-D6rXUTWa.js} +0 -0
  77. /package/dist/web/assets/{msdax-2oxoTO9Z.js → msdax-Bb1N2x5J.js} +0 -0
  78. /package/dist/web/assets/{mysql-5KlC-K_9.js → mysql-DXSr6oD7.js} +0 -0
  79. /package/dist/web/assets/{object-utils-I4gWdSnS.js → object-utils-Cm2M3fPO.js} +0 -0
  80. /package/dist/web/assets/{objective-c-CcDCgtLx.js → objective-c-CEJiVkDa.js} +0 -0
  81. /package/dist/web/assets/{pascal-BZGsbaEV.js → pascal-BtkMEIba.js} +0 -0
  82. /package/dist/web/assets/{pascaligo-DtD5qU3G.js → pascaligo-C7FAwqk7.js} +0 -0
  83. /package/dist/web/assets/{perl-C1jNNS3E.js → perl-D9kqkBbN.js} +0 -0
  84. /package/dist/web/assets/{pgsql-CT0fhiZa.js → pgsql-BjGTBL1W.js} +0 -0
  85. /package/dist/web/assets/{php-D6DrXoPM.js → php-BN0c0noA.js} +0 -0
  86. /package/dist/web/assets/{pla-b3-HN2pF.js → pla-B94QTqOt.js} +0 -0
  87. /package/dist/web/assets/{postiats-Bin2ApVS.js → postiats-DH91dqBs.js} +0 -0
  88. /package/dist/web/assets/{powerquery-7ASnn-ZG.js → powerquery-D7P0oUen.js} +0 -0
  89. /package/dist/web/assets/{powershell-t4p7sU1H.js → powershell-CCVHmJax.js} +0 -0
  90. /package/dist/web/assets/{preload-helper-Dd-HcVz_.js → preload-helper-DgFuoWHe.js} +0 -0
  91. /package/dist/web/assets/{protobuf-BUGeWa_j.js → protobuf-BIP7pixC.js} +0 -0
  92. /package/dist/web/assets/{pug-BuKcgC9s.js → pug-DcbLK7HH.js} +0 -0
  93. /package/dist/web/assets/{qsharp-DxLLX8mo.js → qsharp-B-VY_WOG.js} +0 -0
  94. /package/dist/web/assets/{r-DMlFgn7A.js → r-DwRtsJsj.js} +0 -0
  95. /package/dist/web/assets/{redis-cXItkC5u.js → redis-CaW0tkwu.js} +0 -0
  96. /package/dist/web/assets/{redshift-BZVbW7HE.js → redshift-3tS8G0ME.js} +0 -0
  97. /package/dist/web/assets/{restructuredtext-BzjxwS8h.js → restructuredtext-_TNyGyK0.js} +0 -0
  98. /package/dist/web/assets/{ruby-C5nyLV4l.js → ruby-A-MwVfO4.js} +0 -0
  99. /package/dist/web/assets/{rust-BcmMsHdf.js → rust-oemlUIvG.js} +0 -0
  100. /package/dist/web/assets/{sb-Dnb1iy6B.js → sb-BDZuaI3W.js} +0 -0
  101. /package/dist/web/assets/{scala-anMIFYpA.js → scala-Bfo2loK4.js} +0 -0
  102. /package/dist/web/assets/{scheme-BItQTe08.js → scheme-N2eo7rjB.js} +0 -0
  103. /package/dist/web/assets/{scss-BOv51BJ5.js → scss-vjjSCTgN.js} +0 -0
  104. /package/dist/web/assets/{shell-BsRYRTNN.js → shell-Bfb9Yq6w.js} +0 -0
  105. /package/dist/web/assets/{solidity-BtuLgGDx.js → solidity-C9RbukzG.js} +0 -0
  106. /package/dist/web/assets/{sophia-B0Vkc5MF.js → sophia-DWV_MWOg.js} +0 -0
  107. /package/dist/web/assets/{sparql-B7lvkZQM.js → sparql-iMXILWhh.js} +0 -0
  108. /package/dist/web/assets/{sql-DvP5MpA3.js → sql-CJDj31JM.js} +0 -0
  109. /package/dist/web/assets/{st-GVUeyB3U.js → st-BG9AQ1OO.js} +0 -0
  110. /package/dist/web/assets/{swift-DSPIoCjm.js → swift-B579DvHm.js} +0 -0
  111. /package/dist/web/assets/{systemverilog-Icj2-k23.js → systemverilog-BNgaF3ZX.js} +0 -0
  112. /package/dist/web/assets/{tcl-Cd8KQcm-.js → tcl-grdtJiUA.js} +0 -0
  113. /package/dist/web/assets/{twig-CBHmt8z3.js → twig-JAsFXBZw.js} +0 -0
  114. /package/dist/web/assets/{typespec-Ckc037mq.js → typespec-D3hIQXEU.js} +0 -0
  115. /package/dist/web/assets/{vb-B97GW9Wb.js → vb-H38jRcEz.js} +0 -0
  116. /package/dist/web/assets/{wgsl-DIKmb3YH.js → wgsl-BC5Grc5r.js} +0 -0
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { C as doctorDiagnosticsTableRows, _ as seedConfig, a as parseJsonSchema, b as package_default, c as getDefaultAIConfig, d as DEFAULT_MINERU_CONFIG, f as DEFAULT_PROMPT_CONFIG, g as createConfig, h as AIConfigSchema, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_TEXT, n as createMigrationConfig, o as toSnakeCase, p as PLACEHOLDER_SCHEMA, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as description, w as formatDoctorDiagnosticsJson, x as version, y as name } from "./doctor-collector-CykRm0fC.mjs";
1
+ import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-Cbqgwx__.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -33,6 +33,7 @@ import open from "open";
33
33
  import { serveStatic } from "@hono/node-server/serve-static";
34
34
  import { Hono } from "hono";
35
35
  import { cors } from "hono/cors";
36
+ import { Client } from "@notionhq/client";
36
37
  import { zValidator } from "@hono/zod-validator";
37
38
  import { Kysely, SqliteDialect, sql } from "kysely";
38
39
 
@@ -13381,6 +13382,111 @@ async function savePromptSnapshot(schema, aiexDir) {
13381
13382
  return outputPath;
13382
13383
  }
13383
13384
 
13385
+ //#endregion
13386
+ //#region src/core/extraction-audit.ts
13387
+ const AUDIT_ID_RE = /^[\w.-]+$/;
13388
+ const STALE_AFTER_MS = 1800 * 1e3;
13389
+ function auditDir(aiexDir) {
13390
+ return path.join(aiexDir, "extracted", "_audit");
13391
+ }
13392
+ function auditPath(aiexDir, id) {
13393
+ return path.join(auditDir(aiexDir), `${id}.json`);
13394
+ }
13395
+ function createAuditId(schemaName) {
13396
+ return `${schemaName}-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2, 8)}`;
13397
+ }
13398
+ async function createExtractionAuditRecord(aiexDir, input) {
13399
+ const now = (/* @__PURE__ */ new Date()).toISOString();
13400
+ const record = {
13401
+ id: createAuditId(input.schemaName),
13402
+ status: "running",
13403
+ schemaName: input.schemaName,
13404
+ modelName: input.modelName || void 0,
13405
+ source: input.source,
13406
+ retryOf: input.retryOf,
13407
+ createdAt: now,
13408
+ updatedAt: now
13409
+ };
13410
+ await fs.mkdir(auditDir(aiexDir), { recursive: true });
13411
+ await writeFile(auditPath(aiexDir, record.id), record, {
13412
+ spaces: 2,
13413
+ EOL: "\n"
13414
+ });
13415
+ return record;
13416
+ }
13417
+ async function updateExtractionAuditRecord(aiexDir, id, patch) {
13418
+ const current = await readExtractionAuditRecord(aiexDir, id);
13419
+ if (!current) throw new Error(`Extraction audit record not found: ${id}`);
13420
+ const record = {
13421
+ ...current,
13422
+ ...patch,
13423
+ source: patch.source ?? current.source,
13424
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
13425
+ };
13426
+ await fs.mkdir(auditDir(aiexDir), { recursive: true });
13427
+ await writeFile(auditPath(aiexDir, id), record, {
13428
+ spaces: 2,
13429
+ EOL: "\n"
13430
+ });
13431
+ return record;
13432
+ }
13433
+ async function readExtractionAuditRecord(aiexDir, id) {
13434
+ if (!AUDIT_ID_RE.test(id)) return null;
13435
+ try {
13436
+ return await readFile(auditPath(aiexDir, id));
13437
+ } catch {
13438
+ return null;
13439
+ }
13440
+ }
13441
+ function isStale(record) {
13442
+ if (record.status !== "running") return false;
13443
+ const updated = Date.parse(record.updatedAt);
13444
+ return !Number.isNaN(updated) && Date.now() - updated > STALE_AFTER_MS;
13445
+ }
13446
+ async function markStaleIfNeeded(aiexDir, record) {
13447
+ if (!isStale(record)) return record;
13448
+ const staleRecord = {
13449
+ ...record,
13450
+ status: "stale",
13451
+ error: record.error ?? "Extraction did not finish. It may have been interrupted.",
13452
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
13453
+ };
13454
+ await writeFile(auditPath(aiexDir, staleRecord.id), staleRecord, {
13455
+ spaces: 2,
13456
+ EOL: "\n"
13457
+ });
13458
+ return staleRecord;
13459
+ }
13460
+ async function listExtractionAuditRecords(aiexDir) {
13461
+ try {
13462
+ const dir = auditDir(aiexDir);
13463
+ const files = await fs.readdir(dir);
13464
+ return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13465
+ try {
13466
+ return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
13467
+ } catch {
13468
+ return null;
13469
+ }
13470
+ }))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
13471
+ } catch {
13472
+ return [];
13473
+ }
13474
+ }
13475
+ function isPathInside(childPath, parentPath) {
13476
+ const relative = path.relative(path.resolve(parentPath), path.resolve(childPath));
13477
+ return !!relative && !relative.startsWith("..") && !path.isAbsolute(relative);
13478
+ }
13479
+ async function deleteExtractionAuditRecord(aiexDir, id) {
13480
+ const record = await readExtractionAuditRecord(aiexDir, id);
13481
+ if (!record) return false;
13482
+ const uploadsDir = path.join(aiexDir, "uploads");
13483
+ if (record.source.type === "file" && record.source.filePath && isPathInside(record.source.filePath, uploadsDir)) await fs.unlink(record.source.filePath).catch(() => {});
13484
+ const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
13485
+ await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
13486
+ await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
13487
+ return true;
13488
+ }
13489
+
13384
13490
  //#endregion
13385
13491
  //#region src/core/pdf-converter/external.ts
13386
13492
  function applyTemplate(value, context) {
@@ -13433,7 +13539,7 @@ var ExternalCommandPdfConverter = class {
13433
13539
  this.name = name$1;
13434
13540
  }
13435
13541
  async convert(input, filePath) {
13436
- const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "aiex-mineru-"));
13542
+ const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), `aiex-${this.name}-`));
13437
13543
  const outputDir = path.join(tempRoot, "output");
13438
13544
  await fs.mkdir(outputDir, { recursive: true });
13439
13545
  const inputPath = filePath ?? path.join(tempRoot, "input.pdf");
@@ -13530,6 +13636,10 @@ function createPdfConverter(config) {
13530
13636
  const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
13531
13637
  return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
13532
13638
  }
13639
+ if (config.converter === "markitdown") {
13640
+ const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
13641
+ return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
13642
+ }
13533
13643
  if (config.converter === "external") {
13534
13644
  if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
13535
13645
  return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
@@ -13569,7 +13679,7 @@ const SUPPORTED_EXTENSIONS = new Set([
13569
13679
  "yml"
13570
13680
  ]);
13571
13681
  const PDF_EXT_RE = /\.pdf$/i;
13572
- const JSON_EXT_RE = /\.json$/;
13682
+ const JSON_EXT_RE$1 = /\.json$/;
13573
13683
  const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
13574
13684
  async function ensureDatabaseReady(dbPath, schema) {
13575
13685
  try {
@@ -13628,7 +13738,7 @@ async function loadSchema(config, schemaName) {
13628
13738
  async function listSchemas(aiexDir) {
13629
13739
  try {
13630
13740
  const dir = path.join(aiexDir, "schema");
13631
- return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE, "")).sort();
13741
+ return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
13632
13742
  } catch {
13633
13743
  return [];
13634
13744
  }
@@ -13693,7 +13803,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
13693
13803
  if (!options?.quiet) s.stop("Extraction complete");
13694
13804
  if (result.outputPath && !options?.quiet) consola.success(`Result saved: ${pc.cyan(result.outputPath)}`);
13695
13805
  if (result.tokensUsed && !options?.quiet) consola.info(pc.gray(`Token usage: prompt=${result.tokensUsed.prompt}, completion=${result.tokensUsed.completion}, total=${result.tokensUsed.total}`));
13696
- if (result.data) {
13806
+ if (result.data && options?.insert !== false) {
13697
13807
  const s2 = spinner();
13698
13808
  if (!options?.quiet) s2.start("Inserting into database...");
13699
13809
  const dbError = await ensureDatabaseReady(config.databasePath, schemaLoad.schema);
@@ -13711,6 +13821,13 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
13711
13821
  const insertResult = insertExtractedData(db, schemaLoad.schema, result.data);
13712
13822
  if (insertResult.success) {
13713
13823
  if (!options?.quiet) s2.stop(`Inserted into ${insertResult.tablesInserted.length} table(s)`);
13824
+ return {
13825
+ success: true,
13826
+ outputPath: result.outputPath,
13827
+ data: result.data,
13828
+ tablesInserted: insertResult.tablesInserted,
13829
+ tokensUsed: result.tokensUsed
13830
+ };
13714
13831
  } else {
13715
13832
  if (!options?.quiet) s2.stop("Database insert failed");
13716
13833
  consola.error(insertResult.error || "Unknown error");
@@ -13731,25 +13848,57 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
13731
13848
  };
13732
13849
  }
13733
13850
  }
13734
- return { success: true };
13851
+ return {
13852
+ success: true,
13853
+ outputPath: result.outputPath,
13854
+ data: result.data,
13855
+ tokensUsed: result.tokensUsed
13856
+ };
13735
13857
  }
13736
- async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride) {
13858
+ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
13859
+ const audit = await createExtractionAuditRecord(aiexDir, {
13860
+ schemaName,
13861
+ modelName: modelOverride?.name,
13862
+ source: {
13863
+ type: "file",
13864
+ filePath,
13865
+ fileName: path.basename(filePath)
13866
+ }
13867
+ });
13737
13868
  try {
13738
13869
  const input = await readExtractFileInput(filePath, aiConfig);
13739
- const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, { quiet: false });
13870
+ const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
13871
+ quiet: false,
13872
+ insert: options?.insert
13873
+ });
13740
13874
  if (r.success) {
13875
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
13876
+ status: "succeeded",
13877
+ outputPath: r.outputPath,
13878
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
13879
+ tablesInserted: r.tablesInserted,
13880
+ tokensUsed: r.tokensUsed
13881
+ });
13741
13882
  consola.success(`Processed: ${path.basename(filePath)}`);
13742
13883
  return true;
13743
13884
  } else {
13885
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
13886
+ status: "failed",
13887
+ error: r.error || "Extraction failed"
13888
+ });
13744
13889
  consola.error(`Failed: ${r.error}`);
13745
13890
  return false;
13746
13891
  }
13747
13892
  } catch (e) {
13893
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
13894
+ status: "failed",
13895
+ error: e instanceof Error ? e.message : String(e)
13896
+ });
13748
13897
  consola.error(`Error processing ${path.basename(filePath)}: ${e instanceof Error ? e.message : String(e)}`);
13749
13898
  return false;
13750
13899
  }
13751
13900
  }
13752
- async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride) {
13901
+ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
13753
13902
  consola.info(`Scanning ${pc.cyan(dir)} for supported files...`);
13754
13903
  let files;
13755
13904
  try {
@@ -13774,7 +13923,7 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
13774
13923
  for (let i = 0; i < files.length; i++) {
13775
13924
  const file = files[i];
13776
13925
  consola.info(`\n[${i + 1}/${files.length}] Processing: ${pc.cyan(path.basename(file))}`);
13777
- if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride)) successCount++;
13926
+ if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, options)) successCount++;
13778
13927
  else failCount++;
13779
13928
  }
13780
13929
  consola.info(`\nBatch complete: ${pc.green(`${successCount} succeeded`)}, ${pc.red(`${failCount} failed`)}, ${files.length} total`);
@@ -13787,11 +13936,199 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
13787
13936
 
13788
13937
  //#endregion
13789
13938
  //#region src/commands/extract.ts
13939
+ function getIdArg(args) {
13940
+ if (typeof args.id === "string") return args.id;
13941
+ const positional = args._;
13942
+ return Array.isArray(positional) && typeof positional[0] === "string" ? positional[0] : "";
13943
+ }
13944
+ function formatSource(source) {
13945
+ return source.type === "file" ? source.fileName || "file" : "text";
13946
+ }
13947
+ async function loadConfiguredAI(aiexDir) {
13948
+ const aiConfig = await readAIConfig(aiexDir);
13949
+ if (!aiConfig) {
13950
+ failCommand("AI configuration not found. Please run \"aiex web\" to configure AI settings first");
13951
+ return null;
13952
+ }
13953
+ if (!aiConfig.provider.apiKey) {
13954
+ failCommand("API Key not configured. Please configure AI settings in the Web interface first");
13955
+ return null;
13956
+ }
13957
+ if (!aiConfig.provider.models?.length) {
13958
+ failCommand("No models configured. Please add at least one model in AI Settings");
13959
+ return null;
13960
+ }
13961
+ return aiConfig;
13962
+ }
13963
+ function resolveModelOverride(aiConfig, modelName) {
13964
+ if (!modelName) return void 0;
13965
+ const matched = aiConfig.provider.models.find((m) => m.name === modelName);
13966
+ if (!matched) {
13967
+ failCommand(`Model "${modelName}" not found in configuration. Available models: ${aiConfig.provider.models.map((m) => m.name).join(", ")}`);
13968
+ return null;
13969
+ }
13970
+ return matched;
13971
+ }
13972
+ async function runAuditedSingleExtraction(input) {
13973
+ const audit = await createExtractionAuditRecord(input.aiexDir, {
13974
+ schemaName: input.schemaName,
13975
+ modelName: input.modelOverride?.name,
13976
+ source: input.source,
13977
+ retryOf: input.retryOf
13978
+ });
13979
+ const result = await extractSingle(input.aiexDir, input.config, input.aiConfig, input.schemaName, input.text, input.filePath, input.modelOverride, { insert: input.insert });
13980
+ if (!result.success) {
13981
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
13982
+ status: "failed",
13983
+ error: result.error || "Extraction failed"
13984
+ });
13985
+ return false;
13986
+ }
13987
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
13988
+ status: "succeeded",
13989
+ outputPath: result.outputPath,
13990
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
13991
+ tablesInserted: result.tablesInserted,
13992
+ tokensUsed: result.tokensUsed
13993
+ });
13994
+ return true;
13995
+ }
13996
+ const historyCommand = defineCommand({
13997
+ meta: {
13998
+ name: "history",
13999
+ description: "List extraction audit records"
14000
+ },
14001
+ async run() {
14002
+ const config = createMigrationConfig(process.cwd());
14003
+ const records = await listExtractionAuditRecords(path.dirname(config.schemaPath));
14004
+ if (records.length === 0) {
14005
+ consola.info("No extraction history found");
14006
+ return;
14007
+ }
14008
+ for (const record of records) {
14009
+ const suffix = record.error ? ` — ${record.error}` : record.outputName ? ` — ${record.outputName}` : "";
14010
+ consola.info(`${record.status.padEnd(9)} ${record.id} ${record.schemaName} ${formatSource(record.source)}${suffix}`);
14011
+ }
14012
+ }
14013
+ });
14014
+ const showCommand = defineCommand({
14015
+ meta: {
14016
+ name: "show",
14017
+ description: "Show an extraction audit record"
14018
+ },
14019
+ args: { id: {
14020
+ type: "string",
14021
+ description: "Audit record id"
14022
+ } },
14023
+ async run({ args }) {
14024
+ const id = getIdArg(args);
14025
+ if (!id) {
14026
+ failCommand("Audit record id is required");
14027
+ return;
14028
+ }
14029
+ const config = createMigrationConfig(process.cwd());
14030
+ const record = await readExtractionAuditRecord(path.dirname(config.schemaPath), id);
14031
+ if (!record) {
14032
+ failCommand(`Extraction record not found: ${id}`);
14033
+ return;
14034
+ }
14035
+ consola.info(JSON.stringify(record, null, 2));
14036
+ }
14037
+ });
14038
+ const retryCommand = defineCommand({
14039
+ meta: {
14040
+ name: "retry",
14041
+ description: "Retry an extraction audit record"
14042
+ },
14043
+ args: {
14044
+ id: {
14045
+ type: "string",
14046
+ description: "Audit record id"
14047
+ },
14048
+ noInsert: {
14049
+ type: "boolean",
14050
+ description: "Extract and save JSON without inserting into SQLite",
14051
+ default: false
14052
+ }
14053
+ },
14054
+ async run({ args }) {
14055
+ intro(pc.inverse(" aiex extract retry "));
14056
+ const id = getIdArg(args);
14057
+ if (!id) {
14058
+ failCommand("Audit record id is required");
14059
+ return;
14060
+ }
14061
+ const config = createMigrationConfig(process.cwd());
14062
+ const aiexDir = path.dirname(config.schemaPath);
14063
+ const record = await readExtractionAuditRecord(aiexDir, id);
14064
+ if (!record) {
14065
+ failCommand(`Extraction record not found: ${id}`);
14066
+ return;
14067
+ }
14068
+ const aiConfig = await loadConfiguredAI(aiexDir);
14069
+ if (!aiConfig) return;
14070
+ const modelOverride = resolveModelOverride(aiConfig, record.modelName);
14071
+ if (modelOverride === null) return;
14072
+ try {
14073
+ const sourceInput = record.source.type === "file" ? await readExtractFileInput(record.source.filePath || "", aiConfig) : {
14074
+ text: record.source.text || "",
14075
+ filePath: void 0
14076
+ };
14077
+ if (!await runAuditedSingleExtraction({
14078
+ aiexDir,
14079
+ config,
14080
+ aiConfig,
14081
+ schemaName: record.schemaName,
14082
+ text: sourceInput.text,
14083
+ filePath: sourceInput.filePath,
14084
+ source: record.source,
14085
+ modelOverride,
14086
+ retryOf: record.id,
14087
+ insert: !args.noInsert
14088
+ })) {
14089
+ failCommand();
14090
+ return;
14091
+ }
14092
+ outro("Done!");
14093
+ } catch (error) {
14094
+ failCommand(error instanceof Error ? error.message : String(error));
14095
+ }
14096
+ }
14097
+ });
14098
+ const rmCommand = defineCommand({
14099
+ meta: {
14100
+ name: "rm",
14101
+ description: "Delete an extraction audit record and cached upload"
14102
+ },
14103
+ args: { id: {
14104
+ type: "string",
14105
+ description: "Audit record id"
14106
+ } },
14107
+ async run({ args }) {
14108
+ const id = getIdArg(args);
14109
+ if (!id) {
14110
+ failCommand("Audit record id is required");
14111
+ return;
14112
+ }
14113
+ const config = createMigrationConfig(process.cwd());
14114
+ if (!await deleteExtractionAuditRecord(path.dirname(config.schemaPath), id)) {
14115
+ failCommand(`Extraction record not found: ${id}`);
14116
+ return;
14117
+ }
14118
+ consola.success(`Deleted extraction record: ${id}`);
14119
+ }
14120
+ });
13790
14121
  const extractCommand = defineCommand({
13791
14122
  meta: {
13792
14123
  name: "extract",
13793
14124
  description: "Extract structured data from text, images, or PDFs"
13794
14125
  },
14126
+ subCommands: {
14127
+ history: historyCommand,
14128
+ show: showCommand,
14129
+ retry: retryCommand,
14130
+ rm: rmCommand
14131
+ },
13795
14132
  args: {
13796
14133
  schema: {
13797
14134
  type: "string",
@@ -13822,6 +14159,11 @@ const extractCommand = defineCommand({
13822
14159
  type: "string",
13823
14160
  alias: "g",
13824
14161
  description: "Glob pattern to filter files in batch mode (e.g. \"*.pdf\")"
14162
+ },
14163
+ noInsert: {
14164
+ type: "boolean",
14165
+ description: "Extract and save JSON without inserting into SQLite",
14166
+ default: false
13825
14167
  }
13826
14168
  },
13827
14169
  async run({ args }) {
@@ -13836,29 +14178,10 @@ const extractCommand = defineCommand({
13836
14178
  failCommand("Cannot combine -f/--file with -d/--dir");
13837
14179
  return;
13838
14180
  }
13839
- const aiConfig = await readAIConfig(aiexDir);
13840
- if (!aiConfig) {
13841
- failCommand("AI configuration not found. Please run \"aiex web\" to configure AI settings first");
13842
- return;
13843
- }
13844
- if (!aiConfig.provider.apiKey) {
13845
- failCommand("API Key not configured. Please configure AI settings in the Web interface first");
13846
- return;
13847
- }
13848
- if (!aiConfig.provider.models?.length) {
13849
- failCommand("No models configured. Please add at least one model in AI Settings");
13850
- return;
13851
- }
13852
- let modelOverride;
13853
- if (args.model) {
13854
- const matched = aiConfig.provider.models.find((m) => m.name === args.model);
13855
- if (!matched) {
13856
- const available = aiConfig.provider.models.map((m) => m.name).join(", ");
13857
- failCommand(`Model "${args.model}" not found in configuration. Available models: ${available}`);
13858
- return;
13859
- }
13860
- modelOverride = matched;
13861
- }
14181
+ const aiConfig = await loadConfiguredAI(aiexDir);
14182
+ if (!aiConfig) return;
14183
+ const modelOverride = resolveModelOverride(aiConfig, args.model);
14184
+ if (modelOverride === null) return;
13862
14185
  if (!args.schema && !args.text && !args.file && !args.dir) {
13863
14186
  if (await runInteractive(aiexDir, config, aiConfig, modelOverride)) outro("Done!");
13864
14187
  return;
@@ -13868,7 +14191,7 @@ const extractCommand = defineCommand({
13868
14191
  failCommand("Schema name (-s) is required in batch mode");
13869
14192
  return;
13870
14193
  }
13871
- const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride);
14194
+ const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, { insert: !args.noInsert });
13872
14195
  if (!result.ok) {
13873
14196
  failCommand(result.error);
13874
14197
  return;
@@ -13901,7 +14224,24 @@ const extractCommand = defineCommand({
13901
14224
  return;
13902
14225
  }
13903
14226
  else if (args.text) text$1 = args.text;
13904
- if (!(await extractSingle(aiexDir, config, aiConfig, args.schema, text$1, filePath, modelOverride)).success) {
14227
+ if (!await runAuditedSingleExtraction({
14228
+ aiexDir,
14229
+ config,
14230
+ aiConfig,
14231
+ schemaName: args.schema,
14232
+ text: text$1,
14233
+ filePath,
14234
+ source: filePath ? {
14235
+ type: "file",
14236
+ filePath: args.file,
14237
+ fileName: path.basename(args.file)
14238
+ } : {
14239
+ type: "text",
14240
+ text: text$1
14241
+ },
14242
+ modelOverride,
14243
+ insert: !args.noInsert
14244
+ })) {
13905
14245
  failCommand();
13906
14246
  return;
13907
14247
  }
@@ -14181,8 +14521,192 @@ const schemaCommand = defineCommand({
14181
14521
  }
14182
14522
  });
14183
14523
 
14524
+ //#endregion
14525
+ //#region src/core/notion-sink.ts
14526
+ const RICH_TEXT_LIMIT = 2e3;
14527
+ const UUID_RE = /^[0-9a-f]{32}$/i;
14528
+ const HYPHENATED_UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
14529
+ const UUID_IN_TEXT_RE = /[0-9a-f]{32}/i;
14530
+ function truncateText(value) {
14531
+ return value.length > RICH_TEXT_LIMIT ? value.slice(0, RICH_TEXT_LIMIT) : value;
14532
+ }
14533
+ function stringifyValue(value) {
14534
+ if (value === null || value === void 0) return "";
14535
+ if (typeof value === "string") return value;
14536
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
14537
+ return JSON.stringify(value);
14538
+ }
14539
+ function asNumber(value) {
14540
+ if (typeof value === "number" && Number.isFinite(value)) return value;
14541
+ if (typeof value === "string" && value.trim()) {
14542
+ const parsed = Number(value);
14543
+ return Number.isFinite(parsed) ? parsed : null;
14544
+ }
14545
+ return null;
14546
+ }
14547
+ function asBoolean(value) {
14548
+ if (typeof value === "boolean") return value;
14549
+ if (typeof value === "number") return value !== 0;
14550
+ if (typeof value === "string") {
14551
+ const normalized = value.trim().toLowerCase();
14552
+ return [
14553
+ "true",
14554
+ "yes",
14555
+ "1",
14556
+ "y"
14557
+ ].includes(normalized);
14558
+ }
14559
+ return !!value;
14560
+ }
14561
+ function asDateStart(value) {
14562
+ if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString();
14563
+ if (typeof value === "number" && Number.isFinite(value)) {
14564
+ const date = new Date(value);
14565
+ return Number.isNaN(date.getTime()) ? null : date.toISOString();
14566
+ }
14567
+ if (typeof value === "string" && value.trim()) {
14568
+ const ms = Date.parse(value);
14569
+ if (Number.isNaN(ms)) return null;
14570
+ return new Date(ms).toISOString();
14571
+ }
14572
+ return null;
14573
+ }
14574
+ function asStringArray(value) {
14575
+ if (Array.isArray(value)) return value.map((item) => stringifyValue(item).trim()).filter(Boolean);
14576
+ const text$1 = stringifyValue(value).trim();
14577
+ return text$1 ? [text$1] : [];
14578
+ }
14579
+ function buildPropertyValue(type, value) {
14580
+ const text$1 = truncateText(stringifyValue(value));
14581
+ switch (type) {
14582
+ case "title": return { title: text$1 ? [{ text: { content: text$1 } }] : [] };
14583
+ case "rich_text": return { rich_text: text$1 ? [{ text: { content: text$1 } }] : [] };
14584
+ case "number": return { number: asNumber(value) };
14585
+ case "checkbox": return { checkbox: asBoolean(value) };
14586
+ case "date": {
14587
+ const start = asDateStart(value);
14588
+ return { date: start ? { start } : null };
14589
+ }
14590
+ case "select": {
14591
+ const name$1 = stringifyValue(value).trim();
14592
+ return { select: name$1 ? { name: name$1 } : null };
14593
+ }
14594
+ case "multi_select": return { multi_select: asStringArray(value).map((name$1) => ({ name: name$1 })) };
14595
+ case "url": return { url: text$1 || null };
14596
+ case "email": return { email: text$1 || null };
14597
+ case "phone_number": return { phone_number: text$1 || null };
14598
+ default: return null;
14599
+ }
14600
+ }
14601
+ function findTitleProperty(properties, preferred) {
14602
+ if (preferred && properties[preferred]?.type === "title") return preferred;
14603
+ return Object.entries(properties).find(([, property]) => property?.type === "title")?.[0] ?? null;
14604
+ }
14605
+ function hyphenateDatabaseId(value) {
14606
+ const id = value.replace(/-/g, "");
14607
+ if (!UUID_RE.test(id)) return value;
14608
+ return `${id.slice(0, 8)}-${id.slice(8, 12)}-${id.slice(12, 16)}-${id.slice(16, 20)}-${id.slice(20)}`;
14609
+ }
14610
+ function parseNotionDatabaseId(value) {
14611
+ const input = value.trim();
14612
+ if (!input) return "";
14613
+ if (HYPHENATED_UUID_RE.test(input)) return input;
14614
+ if (UUID_RE.test(input)) return hyphenateDatabaseId(input);
14615
+ const match = input.replace(/-/g, "").match(UUID_IN_TEXT_RE);
14616
+ return match ? hyphenateDatabaseId(match[0]) : input;
14617
+ }
14618
+ function normalizeFieldName(value) {
14619
+ return value.normalize("NFKC").toLowerCase().replace(/[^\p{Letter}\p{Number}]+/gu, "");
14620
+ }
14621
+ function buildMatchKeys(field) {
14622
+ return [
14623
+ field.name,
14624
+ field.title,
14625
+ field.description
14626
+ ].filter((value) => !!value?.trim()).map(normalizeFieldName).filter(Boolean);
14627
+ }
14628
+ function suggestFieldMap(schemaFields, databaseProperties) {
14629
+ const propertyByKey = /* @__PURE__ */ new Map();
14630
+ for (const propertyName of Object.keys(databaseProperties)) propertyByKey.set(normalizeFieldName(propertyName), propertyName);
14631
+ const fieldMap = {};
14632
+ for (const field of schemaFields) for (const key of buildMatchKeys(field)) {
14633
+ const propertyName = propertyByKey.get(key);
14634
+ if (propertyName) {
14635
+ fieldMap[field.name] = propertyName;
14636
+ break;
14637
+ }
14638
+ }
14639
+ return fieldMap;
14640
+ }
14641
+ async function inspectNotionDatabase(input) {
14642
+ if (!input.token.trim()) throw new Error("Notion integration token is required.");
14643
+ const databaseId = parseNotionDatabaseId(input.databaseId);
14644
+ if (!databaseId) throw new Error("Notion database URL or ID is required.");
14645
+ const databaseProperties = (await new Client({ auth: input.token }).databases.retrieve({ database_id: databaseId })).properties;
14646
+ return {
14647
+ databaseId,
14648
+ titleProperty: findTitleProperty(databaseProperties) ?? void 0,
14649
+ properties: Object.entries(databaseProperties).map(([name$1, property]) => ({
14650
+ name: name$1,
14651
+ type: property?.type ?? "unknown"
14652
+ })).sort((a, b) => a.name.localeCompare(b.name)),
14653
+ suggestedFieldMap: suggestFieldMap(input.schemaFields, databaseProperties)
14654
+ };
14655
+ }
14656
+ function validateNotionConfig(config) {
14657
+ if (!config?.enabled) return "Notion export is not enabled. Configure Notion settings first.";
14658
+ if (!config.token.trim()) return "Notion integration token is required.";
14659
+ return null;
14660
+ }
14661
+ async function writeNotionPage(config, schemaName, data) {
14662
+ const configError = validateNotionConfig(config);
14663
+ if (configError) throw new Error(configError);
14664
+ const notionConfig = config;
14665
+ const schemaConfig = notionConfig.schemas[schemaName];
14666
+ if (!schemaConfig) throw new Error(`Notion database is not configured for schema "${schemaName}".`);
14667
+ if (!schemaConfig.databaseId.trim()) throw new Error(`Notion database ID is required for schema "${schemaName}".`);
14668
+ const notion = new Client({ auth: notionConfig.token });
14669
+ const databaseProperties = (await notion.databases.retrieve({ database_id: schemaConfig.databaseId })).properties;
14670
+ const fieldMap = schemaConfig.fieldMap ?? {};
14671
+ const properties = {};
14672
+ for (const [sourceField, sourceValue] of Object.entries(data)) {
14673
+ const notionPropertyName = fieldMap[sourceField] ?? sourceField;
14674
+ const notionProperty = databaseProperties[notionPropertyName];
14675
+ if (!notionProperty) continue;
14676
+ const propertyValue = buildPropertyValue(notionProperty.type, sourceValue);
14677
+ if (propertyValue) properties[notionPropertyName] = propertyValue;
14678
+ }
14679
+ const titleProperty = findTitleProperty(databaseProperties, schemaConfig.titleProperty);
14680
+ if (titleProperty && !properties[titleProperty]) properties[titleProperty] = buildPropertyValue("title", Object.entries(data).find(([, value]) => typeof value === "string" && value.trim())?.[1] ?? schemaName);
14681
+ if (Object.keys(properties).length === 0) throw new Error("No extracted fields matched Notion database properties.");
14682
+ return {
14683
+ pageId: (await notion.pages.create({
14684
+ parent: { database_id: schemaConfig.databaseId },
14685
+ properties
14686
+ })).id,
14687
+ databaseId: schemaConfig.databaseId
14688
+ };
14689
+ }
14690
+
14184
14691
  //#endregion
14185
14692
  //#region src/server/routes/ai.ts
14693
+ const JSON_EXT_RE = /\.json$/i;
14694
+ function extractSchemaFields(schema) {
14695
+ if (!schema?.properties || typeof schema.properties !== "object") return [];
14696
+ return Object.entries(schema.properties).filter(([, property]) => {
14697
+ if (property?.nested?.enabled) return false;
14698
+ if (property?.type === "array" && property?.items?.nested?.enabled) return false;
14699
+ return true;
14700
+ }).map(([name$1, property]) => ({
14701
+ name: name$1,
14702
+ title: typeof property?.title === "string" ? property.title : void 0,
14703
+ description: typeof property?.description === "string" ? property.description : void 0
14704
+ }));
14705
+ }
14706
+ async function loadSchemaFields(config, schemaName) {
14707
+ const safeName = path.basename(schemaName).replace(JSON_EXT_RE, "");
14708
+ return extractSchemaFields(await readFile(path.join(config.schemaPath, `${safeName}.json`)));
14709
+ }
14186
14710
  function aiRoutes(config) {
14187
14711
  const app = new Hono();
14188
14712
  const aiexDir = path.dirname(config.schemaPath);
@@ -14209,6 +14733,32 @@ function aiRoutes(config) {
14209
14733
  return c.json({});
14210
14734
  }
14211
14735
  });
14736
+ app.post("/ai/notion/inspect", async (c) => {
14737
+ try {
14738
+ const body = await c.req.json();
14739
+ const token = typeof body.token === "string" ? body.token : "";
14740
+ const databaseId = typeof body.databaseId === "string" ? body.databaseId : "";
14741
+ const schemaName = typeof body.schemaName === "string" ? body.schemaName : "";
14742
+ if (!schemaName) return c.json({
14743
+ success: false,
14744
+ error: "Schema is required"
14745
+ }, 400);
14746
+ const result = await inspectNotionDatabase({
14747
+ token,
14748
+ databaseId,
14749
+ schemaFields: await loadSchemaFields(config, schemaName)
14750
+ });
14751
+ return c.json({
14752
+ success: true,
14753
+ ...result
14754
+ });
14755
+ } catch (error) {
14756
+ return c.json({
14757
+ success: false,
14758
+ error: getErrorMessage(error)
14759
+ }, 400);
14760
+ }
14761
+ });
14212
14762
  app.put("/ai/config", async (c) => {
14213
14763
  try {
14214
14764
  const body = await c.req.json();
@@ -14226,6 +14776,19 @@ function aiRoutes(config) {
14226
14776
  success: false,
14227
14777
  error: "At least one model must be configured"
14228
14778
  }, 400);
14779
+ if (body.notion?.enabled) {
14780
+ if (!body.notion.token?.trim()) return c.json({
14781
+ success: false,
14782
+ error: "Notion token is required when Notion export is enabled"
14783
+ }, 400);
14784
+ for (const [schemaName, schemaConfig] of Object.entries(body.notion.schemas ?? {})) {
14785
+ if (typeof schemaConfig.databaseId === "string") schemaConfig.databaseId = parseNotionDatabaseId(schemaConfig.databaseId);
14786
+ if (!schemaConfig.databaseId?.trim()) return c.json({
14787
+ success: false,
14788
+ error: `Notion database ID is required for schema "${schemaName}"`
14789
+ }, 400);
14790
+ }
14791
+ }
14229
14792
  await writeAIConfig(aiexDir, AIConfigSchema.parse(body));
14230
14793
  return c.json({ success: true });
14231
14794
  } catch (error) {
@@ -14408,6 +14971,247 @@ function dataRoutes(config) {
14408
14971
  return app;
14409
14972
  }
14410
14973
 
14974
+ //#endregion
14975
+ //#region src/server/routes/extract.ts
14976
+ function getFormString(value) {
14977
+ if (Array.isArray(value)) return getFormString(value[0]);
14978
+ return typeof value === "string" ? value.trim() : "";
14979
+ }
14980
+ function getFormFile(value) {
14981
+ if (Array.isArray(value)) return getFormFile(value[0]);
14982
+ return value instanceof File && value.size > 0 ? value : null;
14983
+ }
14984
+ function safeUploadName(name$1) {
14985
+ return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
14986
+ }
14987
+ async function saveUploadToFile(file, uploadsDir, id) {
14988
+ await fs.mkdir(uploadsDir, { recursive: true });
14989
+ const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
14990
+ const buffer = Buffer.from(await file.arrayBuffer());
14991
+ await fs.writeFile(filePath, buffer);
14992
+ return filePath;
14993
+ }
14994
+ async function executeAuditedExtraction(input) {
14995
+ const aiConfig = await readAIConfig(input.aiexDir);
14996
+ if (!aiConfig) {
14997
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
14998
+ status: "failed",
14999
+ error: "AI configuration not found. Configure AI settings first."
15000
+ });
15001
+ return new Response(JSON.stringify({
15002
+ success: false,
15003
+ error: record$1.error,
15004
+ auditId: record$1.id
15005
+ }), {
15006
+ status: 400,
15007
+ headers: { "content-type": "application/json" }
15008
+ });
15009
+ }
15010
+ if (!aiConfig.provider.apiKey) {
15011
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15012
+ status: "failed",
15013
+ error: "API Key not configured. Configure AI settings first."
15014
+ });
15015
+ return new Response(JSON.stringify({
15016
+ success: false,
15017
+ error: record$1.error,
15018
+ auditId: record$1.id
15019
+ }), {
15020
+ status: 400,
15021
+ headers: { "content-type": "application/json" }
15022
+ });
15023
+ }
15024
+ if (!aiConfig.provider.models?.length) {
15025
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15026
+ status: "failed",
15027
+ error: "No models configured. Add at least one model in AI Settings."
15028
+ });
15029
+ return new Response(JSON.stringify({
15030
+ success: false,
15031
+ error: record$1.error,
15032
+ auditId: record$1.id
15033
+ }), {
15034
+ status: 400,
15035
+ headers: { "content-type": "application/json" }
15036
+ });
15037
+ }
15038
+ const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
15039
+ if (input.modelName && !modelOverride) {
15040
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15041
+ status: "failed",
15042
+ error: `Model "${input.modelName}" not found in AI settings`
15043
+ });
15044
+ return new Response(JSON.stringify({
15045
+ success: false,
15046
+ error: record$1.error,
15047
+ auditId: record$1.id
15048
+ }), {
15049
+ status: 400,
15050
+ headers: { "content-type": "application/json" }
15051
+ });
15052
+ }
15053
+ let inputText = input.text;
15054
+ let inputFilePath = input.filePath;
15055
+ if (input.filePath) {
15056
+ const source = await readExtractFileInput(input.filePath, aiConfig);
15057
+ inputText = source.text;
15058
+ inputFilePath = source.filePath;
15059
+ }
15060
+ const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
15061
+ if (!result.success) {
15062
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15063
+ status: "failed",
15064
+ error: result.error || "Extraction failed"
15065
+ });
15066
+ return new Response(JSON.stringify({
15067
+ success: false,
15068
+ error: record$1.error,
15069
+ auditId: record$1.id
15070
+ }), {
15071
+ status: 500,
15072
+ headers: { "content-type": "application/json" }
15073
+ });
15074
+ }
15075
+ const notionPages = [];
15076
+ if (input.syncNotion) try {
15077
+ if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
15078
+ notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
15079
+ } catch (error) {
15080
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15081
+ status: "failed",
15082
+ outputPath: result.outputPath,
15083
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15084
+ tablesInserted: result.tablesInserted,
15085
+ tokensUsed: result.tokensUsed,
15086
+ error: error instanceof Error ? error.message : String(error)
15087
+ });
15088
+ return new Response(JSON.stringify({
15089
+ success: false,
15090
+ error: record$1.error,
15091
+ auditId: record$1.id
15092
+ }), {
15093
+ status: 500,
15094
+ headers: { "content-type": "application/json" }
15095
+ });
15096
+ }
15097
+ const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15098
+ status: "succeeded",
15099
+ outputPath: result.outputPath,
15100
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15101
+ tablesInserted: result.tablesInserted,
15102
+ notionPages: notionPages.length > 0 ? notionPages : void 0,
15103
+ tokensUsed: result.tokensUsed
15104
+ });
15105
+ return new Response(JSON.stringify({
15106
+ success: true,
15107
+ outputPath: record.outputPath,
15108
+ outputName: record.outputName,
15109
+ tablesInserted: record.tablesInserted,
15110
+ notionPages: record.notionPages,
15111
+ tokensUsed: record.tokensUsed,
15112
+ auditId: record.id
15113
+ }), {
15114
+ status: 200,
15115
+ headers: { "content-type": "application/json" }
15116
+ });
15117
+ }
15118
+ function extractRoutes(config) {
15119
+ const app = new Hono();
15120
+ const aiexDir = path.dirname(config.schemaPath);
15121
+ const uploadsDir = path.join(aiexDir, "uploads");
15122
+ app.get("/extract/records", async (c) => {
15123
+ return c.json(await listExtractionAuditRecords(aiexDir));
15124
+ });
15125
+ app.post("/extract", async (c) => {
15126
+ try {
15127
+ const body = await c.req.parseBody();
15128
+ const schemaName = getFormString(body.schema);
15129
+ const text$1 = getFormString(body.text);
15130
+ const modelName = getFormString(body.model);
15131
+ const syncNotion = getFormString(body.notion) === "true";
15132
+ const file = getFormFile(body.file);
15133
+ if (!schemaName) return c.json({
15134
+ success: false,
15135
+ error: "Schema is required"
15136
+ }, 400);
15137
+ if (!text$1 && !file) return c.json({
15138
+ success: false,
15139
+ error: "Provide text or upload a file to extract"
15140
+ }, 400);
15141
+ if (text$1 && file) return c.json({
15142
+ success: false,
15143
+ error: "Text and file input cannot be used together"
15144
+ }, 400);
15145
+ const audit = await createExtractionAuditRecord(aiexDir, {
15146
+ schemaName,
15147
+ modelName,
15148
+ source: file ? {
15149
+ type: "file",
15150
+ fileName: safeUploadName(file.name)
15151
+ } : {
15152
+ type: "text",
15153
+ text: text$1
15154
+ }
15155
+ });
15156
+ let filePath;
15157
+ if (file) {
15158
+ filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15159
+ await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15160
+ type: "file",
15161
+ filePath,
15162
+ fileName: safeUploadName(file.name)
15163
+ } });
15164
+ }
15165
+ return executeAuditedExtraction({
15166
+ aiexDir,
15167
+ config,
15168
+ auditId: audit.id,
15169
+ schemaName,
15170
+ text: text$1,
15171
+ filePath,
15172
+ modelName,
15173
+ syncNotion
15174
+ });
15175
+ } catch (error) {
15176
+ return c.json({
15177
+ success: false,
15178
+ error: error instanceof Error ? error.message : String(error)
15179
+ }, 500);
15180
+ }
15181
+ });
15182
+ app.post("/extract/records/:id/retry", async (c) => {
15183
+ const original = await readExtractionAuditRecord(aiexDir, c.req.param("id"));
15184
+ if (!original) return c.json({
15185
+ success: false,
15186
+ error: "Extraction record not found"
15187
+ }, 404);
15188
+ return executeAuditedExtraction({
15189
+ aiexDir,
15190
+ config,
15191
+ auditId: (await createExtractionAuditRecord(aiexDir, {
15192
+ schemaName: original.schemaName,
15193
+ modelName: original.modelName,
15194
+ source: original.source,
15195
+ retryOf: original.id
15196
+ })).id,
15197
+ schemaName: original.schemaName,
15198
+ text: original.source.type === "text" ? original.source.text ?? "" : "",
15199
+ filePath: original.source.type === "file" ? original.source.filePath : void 0,
15200
+ modelName: original.modelName
15201
+ });
15202
+ });
15203
+ app.delete("/extract/records/:id", async (c) => {
15204
+ const id = c.req.param("id");
15205
+ if (!await readExtractionAuditRecord(aiexDir, id)) return c.json({
15206
+ success: false,
15207
+ error: "Extraction record not found"
15208
+ }, 404);
15209
+ await deleteExtractionAuditRecord(aiexDir, id);
15210
+ return c.json({ success: true });
15211
+ });
15212
+ return app;
15213
+ }
15214
+
14411
15215
  //#endregion
14412
15216
  //#region src/server/routes/schema.ts
14413
15217
  const schemaFileNameSchema = z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes(".."));
@@ -14530,6 +15334,7 @@ function createApp(config, staticDir) {
14530
15334
  app.use("*", cors({ origin: (origin) => LOCAL_ORIGIN_RE.test(origin) ? origin : null }));
14531
15335
  app.route("/api", schemaRoutes(config));
14532
15336
  app.route("/api", aiRoutes(config));
15337
+ app.route("/api", extractRoutes(config));
14533
15338
  app.route("/api", dataRoutes(config));
14534
15339
  app.use("/*", serveStatic({
14535
15340
  root: staticDir,