aiex-cli 0.0.1-beta.9 → 0.0.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. package/README.md +50 -26
  2. package/dist/cli.mjs +1776 -626
  3. package/dist/{completions-ygS1okck.mjs → completions-C3rmTwXZ.mjs} +2 -2
  4. package/dist/core/schema-sqlite/migrate-helper.mjs +35 -37
  5. package/dist/{doctor-BiU1lDp-.mjs → doctor-collector-Cbqgwx__.mjs} +309 -223
  6. package/dist/index.d.mts +15 -15
  7. package/dist/index.mjs +1 -1
  8. package/dist/table-schema.json +4 -0
  9. package/dist/web/assets/AISettings-Cm8fAfaG.js +205 -0
  10. package/dist/web/assets/DataBrowser-DQwOvooY.js +6 -0
  11. package/dist/web/assets/ExtractRunner-CzfLlCCf.js +1 -0
  12. package/dist/web/assets/ExtractionViewer-D7tghmR-.js +1 -0
  13. package/dist/web/assets/JsonSchemaEditor-DXhYW-Un.js +570 -0
  14. package/dist/web/assets/api-client-CIC2X8_4.js +1 -0
  15. package/dist/web/assets/baseinput-B7PU5-38.js +2 -0
  16. package/dist/web/assets/button-Cdgr9Igy.js +927 -0
  17. package/dist/web/assets/checkbox-CCQwMfLd.js +142 -0
  18. package/dist/web/assets/{cssMode-DAbG0CMn.js → cssMode-CCz1uj8b.js} +1 -1
  19. package/dist/web/assets/dialog-8xaIXBKO.js +108 -0
  20. package/dist/web/assets/dist-B5Yem2Jn.js +1 -0
  21. package/dist/web/assets/{editor.main-BqhfoHxy.js → editor.main-DOue-nGf.js} +2 -2
  22. package/dist/web/assets/{freemarker2-B9_5ct2b.js → freemarker2-C87H7V8i.js} +1 -1
  23. package/dist/web/assets/{handlebars-TY59WcoQ.js → handlebars-TLnoktZW.js} +1 -1
  24. package/dist/web/assets/{html-CLULsh27.js → html-Rewbtgcp.js} +1 -1
  25. package/dist/web/assets/{htmlMode-BvG7RNbU.js → htmlMode-Cr9y3YUi.js} +1 -1
  26. package/dist/web/assets/index-BEorm4a5.css +2 -0
  27. package/dist/web/assets/{index-CKUXTDYj.js → index-CWy8KajQ.js} +38 -38
  28. package/dist/web/assets/{javascript-DHrLp6gu.js → javascript-BO8DPECx.js} +1 -1
  29. package/dist/web/assets/{jsonMode-DBDhdzl1.js → jsonMode-B78m_Y1l.js} +1 -1
  30. package/dist/web/assets/{liquid-tGeb-nqF.js → liquid-CDp_8YUE.js} +1 -1
  31. package/dist/web/assets/{mdx-Cmdz78VU.js → mdx-CeR1GULE.js} +1 -1
  32. package/dist/web/assets/{monaco.contribution-CroYPUF5.js → monaco.contribution-tCitzj1_.js} +2 -2
  33. package/dist/web/assets/object-utils-Cm2M3fPO.js +1 -0
  34. package/dist/web/assets/{python-Dmfz4iDE.js → python-DWmB1hQ6.js} +1 -1
  35. package/dist/web/assets/{razor-BJicZHJs.js → razor-BVk762Lq.js} +1 -1
  36. package/dist/web/assets/runtime-dom.esm-bundler-DZwuSnld.js +1 -0
  37. package/dist/web/assets/select-DKPPF403.js +438 -0
  38. package/dist/web/assets/{tsMode-DYqTyE66.js → tsMode-Gjs5D1gt.js} +1 -1
  39. package/dist/web/assets/{typescript-DLnTe9Hf.js → typescript-Cw6YtwrM.js} +1 -1
  40. package/dist/web/assets/utils-CZhxh2Ya.js +2 -0
  41. package/dist/web/assets/{xml-BIYqLORk.js → xml-guZq0YZJ.js} +1 -1
  42. package/dist/web/assets/{yaml-BjmulkMX.js → yaml-B9TbU-LX.js} +1 -1
  43. package/dist/web/index.html +11 -8
  44. package/package.json +17 -1
  45. package/src/core/schema-sqlite/migrate-helper.ts +32 -46
  46. package/dist/web/assets/AISettings-DOzonIux.js +0 -334
  47. package/dist/web/assets/DataBrowser-rznfVRaV.js +0 -3
  48. package/dist/web/assets/JsonSchemaEditor-C9iyQs7N.js +0 -929
  49. package/dist/web/assets/api-client-Dsg4WOM9.js +0 -1
  50. package/dist/web/assets/button-kTMweGMc.js +0 -927
  51. package/dist/web/assets/dialog-CWuu7WjI.js +0 -108
  52. package/dist/web/assets/index-DDFnprdM.css +0 -2
  53. package/dist/web/assets/lib-C30cIFrm.js +0 -1
  54. package/dist/web/assets/overlayeventbus-AtOpmI6n.js +0 -80
  55. package/dist/web/assets/table-schema-mJrrf9qw.js +0 -2
  56. /package/dist/web/assets/{abap-DrZwwXZX.js → abap-C3UM4cME.js} +0 -0
  57. /package/dist/web/assets/{apex-CrCz0btt.js → apex-BQBZvQmN.js} +0 -0
  58. /package/dist/web/assets/{azcli-BapzKHay.js → azcli-Dn9Awrok.js} +0 -0
  59. /package/dist/web/assets/{bat-C_NRAiA1.js → bat-JgEezSDo.js} +0 -0
  60. /package/dist/web/assets/{bicep-C7pp2CNk.js → bicep-vcQeC7wE.js} +0 -0
  61. /package/dist/web/assets/{cameligo-BhhK9vxZ.js → cameligo-C0cr0T3L.js} +0 -0
  62. /package/dist/web/assets/{clojure-D0ujmUyE.js → clojure-Brc1-rbW.js} +0 -0
  63. /package/dist/web/assets/{coffee-DHEl7Jbb.js → coffee-BiZYU83a.js} +0 -0
  64. /package/dist/web/assets/{cpp-Iil-3nzZ.js → cpp-ikWHCInz.js} +0 -0
  65. /package/dist/web/assets/{csharp-Dh0Ee7SY.js → csharp-BRWs_LfH.js} +0 -0
  66. /package/dist/web/assets/{csp-mwzjw0JL.js → csp-CcAumoJw.js} +0 -0
  67. /package/dist/web/assets/{css-COIa8ZTR.js → css-BZm6paiA.js} +0 -0
  68. /package/dist/web/assets/{cypher-GVc17FC4.js → cypher-CDQMONdb.js} +0 -0
  69. /package/dist/web/assets/{dart-phiCaE7_.js → dart-Ci4SZdF1.js} +0 -0
  70. /package/dist/web/assets/{dockerfile-BMaDhdim.js → dockerfile-BV0tAr-M.js} +0 -0
  71. /package/dist/web/assets/{ecl-Cj47kvqp.js → ecl-CP7nM2KN.js} +0 -0
  72. /package/dist/web/assets/{editor.api-DLXGyrN1.js → editor.api-BU_q4v8i.js} +0 -0
  73. /package/dist/web/assets/{elixir-DBbstcE1.js → elixir-GcA6wFiI.js} +0 -0
  74. /package/dist/web/assets/{flow9-ChHb1adO.js → flow9-CIb9youF.js} +0 -0
  75. /package/dist/web/assets/{fsharp-CMk2OIJN.js → fsharp-BVaBE4co.js} +0 -0
  76. /package/dist/web/assets/{go-BrMkuJg0.js → go-Bbqf306x.js} +0 -0
  77. /package/dist/web/assets/{graphql-PSR1UKGv.js → graphql-DJPrC4l-.js} +0 -0
  78. /package/dist/web/assets/{hcl-DAQrbDOW.js → hcl-QyfWVWpM.js} +0 -0
  79. /package/dist/web/assets/{ini-0TG5BxW0.js → ini-CgstZeS8.js} +0 -0
  80. /package/dist/web/assets/{java-rgorz17v.js → java-D4AG88ZY.js} +0 -0
  81. /package/dist/web/assets/{julia-C8VMdHm8.js → julia-CN8U9648.js} +0 -0
  82. /package/dist/web/assets/{kotlin-CllWo3gX.js → kotlin-gNNgpJhY.js} +0 -0
  83. /package/dist/web/assets/{less-Cgca25AP.js → less-C3SY2L8t.js} +0 -0
  84. /package/dist/web/assets/{lexon-D0GHdBaw.js → lexon-CznnqzUX.js} +0 -0
  85. /package/dist/web/assets/{lua-DmRsNG-P.js → lua-f3xyJgy5.js} +0 -0
  86. /package/dist/web/assets/{m3-BgL5dNKT.js → m3-Cl7J89p-.js} +0 -0
  87. /package/dist/web/assets/{markdown-BuJfycGS.js → markdown-Bv2fnzzT.js} +0 -0
  88. /package/dist/web/assets/{mips-C9m_93PR.js → mips-D6rXUTWa.js} +0 -0
  89. /package/dist/web/assets/{msdax-CpFHC9OI.js → msdax-Bb1N2x5J.js} +0 -0
  90. /package/dist/web/assets/{mysql-qFvltsqN.js → mysql-DXSr6oD7.js} +0 -0
  91. /package/dist/web/assets/{objective-c-Bnmr858J.js → objective-c-CEJiVkDa.js} +0 -0
  92. /package/dist/web/assets/{pascal-WP0_D5AO.js → pascal-BtkMEIba.js} +0 -0
  93. /package/dist/web/assets/{pascaligo-Blom4Rij.js → pascaligo-C7FAwqk7.js} +0 -0
  94. /package/dist/web/assets/{perl-B-vk8g64.js → perl-D9kqkBbN.js} +0 -0
  95. /package/dist/web/assets/{pgsql-Cgvz6v67.js → pgsql-BjGTBL1W.js} +0 -0
  96. /package/dist/web/assets/{php-8a3Lrw9m.js → php-BN0c0noA.js} +0 -0
  97. /package/dist/web/assets/{pla-DuFqEZ8V.js → pla-B94QTqOt.js} +0 -0
  98. /package/dist/web/assets/{postiats-DkLtSgkp.js → postiats-DH91dqBs.js} +0 -0
  99. /package/dist/web/assets/{powerquery-BJ1aNepW.js → powerquery-D7P0oUen.js} +0 -0
  100. /package/dist/web/assets/{powershell-rE98k687.js → powershell-CCVHmJax.js} +0 -0
  101. /package/dist/web/assets/{preload-helper-DWTEM3RW.js → preload-helper-DgFuoWHe.js} +0 -0
  102. /package/dist/web/assets/{protobuf-CUheFacr.js → protobuf-BIP7pixC.js} +0 -0
  103. /package/dist/web/assets/{pug-LDcAMD8w.js → pug-DcbLK7HH.js} +0 -0
  104. /package/dist/web/assets/{qsharp-IHfqKOfK.js → qsharp-B-VY_WOG.js} +0 -0
  105. /package/dist/web/assets/{r-D-QApv87.js → r-DwRtsJsj.js} +0 -0
  106. /package/dist/web/assets/{redis-SXdDyWR9.js → redis-CaW0tkwu.js} +0 -0
  107. /package/dist/web/assets/{redshift-Y6lsCryn.js → redshift-3tS8G0ME.js} +0 -0
  108. /package/dist/web/assets/{restructuredtext-edObr9a8.js → restructuredtext-_TNyGyK0.js} +0 -0
  109. /package/dist/web/assets/{ruby-CNnUfF-8.js → ruby-A-MwVfO4.js} +0 -0
  110. /package/dist/web/assets/{rust-IHUZWzBr.js → rust-oemlUIvG.js} +0 -0
  111. /package/dist/web/assets/{sb-DrUvY44N.js → sb-BDZuaI3W.js} +0 -0
  112. /package/dist/web/assets/{scala-B4hbXGLM.js → scala-Bfo2loK4.js} +0 -0
  113. /package/dist/web/assets/{scheme-BGrd12j3.js → scheme-N2eo7rjB.js} +0 -0
  114. /package/dist/web/assets/{scss-x5G1ES4U.js → scss-vjjSCTgN.js} +0 -0
  115. /package/dist/web/assets/{shell-DOehe2Y8.js → shell-Bfb9Yq6w.js} +0 -0
  116. /package/dist/web/assets/{solidity-BeRvcwWV.js → solidity-C9RbukzG.js} +0 -0
  117. /package/dist/web/assets/{sophia-DZbkUNjy.js → sophia-DWV_MWOg.js} +0 -0
  118. /package/dist/web/assets/{sparql-B7_oi5-h.js → sparql-iMXILWhh.js} +0 -0
  119. /package/dist/web/assets/{sql-CTlsFWVE.js → sql-CJDj31JM.js} +0 -0
  120. /package/dist/web/assets/{st-DJVEJdPE.js → st-BG9AQ1OO.js} +0 -0
  121. /package/dist/web/assets/{swift-CwhT3fYa.js → swift-B579DvHm.js} +0 -0
  122. /package/dist/web/assets/{systemverilog-BQN63pkN.js → systemverilog-BNgaF3ZX.js} +0 -0
  123. /package/dist/web/assets/{tcl-DqwfpskA.js → tcl-grdtJiUA.js} +0 -0
  124. /package/dist/web/assets/{twig-BiyenUgc.js → twig-JAsFXBZw.js} +0 -0
  125. /package/dist/web/assets/{typespec-CWOJribt.js → typespec-D3hIQXEU.js} +0 -0
  126. /package/dist/web/assets/{vb-Cq5F87m3.js → vb-H38jRcEz.js} +0 -0
  127. /package/dist/web/assets/{wgsl-BAvW2lVr.js → wgsl-BC5Grc5r.js} +0 -0
package/dist/cli.mjs CHANGED
@@ -1,25 +1,41 @@
1
- import { C as doctorDiagnosticsTableRows, a as writeAIConfig, b as toSnakeCase, c as PLACEHOLDER_TEXT, d as seedConfig, f as description, g as createMigrationConfig, h as version, i as readAIConfig, l as AIConfigSchema, m as package_default, n as getDefaultAIConfig, o as DEFAULT_PROMPT_CONFIG, p as name, r as maskApiKey, s as PLACEHOLDER_SCHEMA, t as collectDoctorDiagnostics, u as createConfig, v as JsonSchemaDefinitionSchema, w as formatDoctorDiagnosticsJson, x as generateDrizzleSchema, y as parseJsonSchema } from "./doctor-BiU1lDp-.mjs";
1
+ import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-Cbqgwx__.mjs";
2
2
  import { createRequire } from "node:module";
3
+ import fs from "node:fs/promises";
4
+ import os from "node:os";
3
5
  import path from "node:path";
4
6
  import process from "node:process";
7
+ import { readFile, writeFile } from "jsonfile";
8
+ import { ZodError, z } from "zod";
5
9
  import { fileURLToPath } from "node:url";
6
- import { ZodError } from "zod";
7
- import fs from "node:fs/promises";
8
10
  import { defineCommand, runMain } from "citty";
9
11
  import { consola } from "consola";
10
12
  import updateNotifier from "update-notifier";
11
13
  import CliTable3 from "cli-table3";
12
- import { intro, outro, spinner } from "@clack/prompts";
13
- import Database from "better-sqlite3";
14
+ import { intro, isCancel, outro, select, spinner, text } from "@clack/prompts";
14
15
  import pc from "picocolors";
15
16
  import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
17
+ import { LangfuseSpanProcessor } from "@langfuse/otel";
18
+ import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
16
19
  import { APICallError, Output, generateText, jsonSchema } from "ai";
17
- import { exec, execFile } from "node:child_process";
20
+ import mime from "mime";
21
+ import pRetry from "p-retry";
22
+ import { jsonrepair } from "jsonrepair";
23
+ import fs$1 from "node:fs";
24
+ import Database from "better-sqlite3";
25
+ import { glob, globSync } from "tinyglobby";
26
+ import { execa } from "execa";
27
+ import { extractText, getDocumentProxy, getMeta } from "unpdf";
28
+ import { Buffer } from "node:buffer";
29
+ import { execFile } from "node:child_process";
18
30
  import { promisify } from "node:util";
19
31
  import { serve } from "@hono/node-server";
32
+ import open from "open";
20
33
  import { serveStatic } from "@hono/node-server/serve-static";
21
34
  import { Hono } from "hono";
22
35
  import { cors } from "hono/cors";
36
+ import { Client } from "@notionhq/client";
37
+ import { zValidator } from "@hono/zod-validator";
38
+ import { Kysely, SqliteDialect, sql } from "kysely";
23
39
 
24
40
  //#region src/core/schema-sqlite/helpers.ts
25
41
  const __filename = fileURLToPath(import.meta.url);
@@ -96,7 +112,7 @@ function parseAllSchemas(entries) {
96
112
  }
97
113
 
98
114
  //#endregion
99
- //#region src/commands/completion.ts
115
+ //#region src/core/completion-scripts.ts
100
116
  function bashScript(name$1) {
101
117
  return `# ${name$1} bash completion
102
118
  _${name$1}() {
@@ -123,7 +139,7 @@ function fishScript(name$1) {
123
139
  complete -c ${name$1} -f -a '(${name$1} _complete (commandline -cp) 2>/dev/null)'
124
140
  `;
125
141
  }
126
- function generateScript(name$1, shell) {
142
+ function generateCompletionScript(name$1, shell) {
127
143
  switch (shell) {
128
144
  case "bash": return bashScript(name$1);
129
145
  case "zsh": return zshScript(name$1);
@@ -131,6 +147,9 @@ function generateScript(name$1, shell) {
131
147
  default: throw new Error(`Unsupported shell: ${shell}. Use bash, zsh, or fish.`);
132
148
  }
133
149
  }
150
+
151
+ //#endregion
152
+ //#region src/commands/completion.ts
134
153
  const completionCommand = defineCommand({
135
154
  meta: {
136
155
  name: "completion",
@@ -145,7 +164,7 @@ const completionCommand = defineCommand({
145
164
  const name$1 = "aiex";
146
165
  const shell = args.shell;
147
166
  try {
148
- process.stdout.write(generateScript(name$1, shell));
167
+ process.stdout.write(generateCompletionScript(name$1, shell));
149
168
  } catch (error) {
150
169
  process.stderr.write(`Error: ${error instanceof Error ? error.message : String(error)}\n`);
151
170
  process.exit(1);
@@ -185,6 +204,14 @@ const doctorCommand = defineCommand({
185
204
  }
186
205
  });
187
206
 
207
+ //#endregion
208
+ //#region src/commands/utils.ts
209
+ function failCommand(message) {
210
+ if (message) consola.error(message);
211
+ outro("Failed!");
212
+ process.exitCode = 1;
213
+ }
214
+
188
215
  //#endregion
189
216
  //#region src/core/ai-extraction/model-capabilities.json
190
217
  var model_capabilities_default = {
@@ -12727,26 +12754,26 @@ var model_capabilities_default = {
12727
12754
 
12728
12755
  //#endregion
12729
12756
  //#region src/core/ai-extraction/model-registry.ts
12730
- const registry = model_capabilities_default;
12757
+ const registry$1 = model_capabilities_default;
12731
12758
  function normalize(name$1) {
12732
12759
  return name$1.toLowerCase().replace(/[-_. ]/g, "");
12733
12760
  }
12734
12761
  const normalizedCache = /* @__PURE__ */ new Map();
12735
12762
  function buildNormalizedCache() {
12736
12763
  if (normalizedCache.size > 0) return;
12737
- for (const key of Object.keys(registry)) {
12764
+ for (const key of Object.keys(registry$1)) {
12738
12765
  const nk = normalize(key);
12739
12766
  if (!normalizedCache.has(nk)) normalizedCache.set(nk, key);
12740
12767
  }
12741
12768
  }
12742
12769
  function lookupModel(name$1) {
12743
- const exact = registry[name$1];
12770
+ const exact = registry$1[name$1];
12744
12771
  if (exact) return { ...exact };
12745
12772
  buildNormalizedCache();
12746
12773
  const nk = normalize(name$1);
12747
12774
  const matched = normalizedCache.get(nk);
12748
12775
  if (matched) {
12749
- const entry = registry[matched];
12776
+ const entry = registry$1[matched];
12750
12777
  return entry ? { ...entry } : null;
12751
12778
  }
12752
12779
  return null;
@@ -12763,29 +12790,40 @@ function lookupModelCapabilities(modelName) {
12763
12790
  //#endregion
12764
12791
  //#region src/utils/retry.ts
12765
12792
  async function withRetry(fn, onRetry, maxRetries = 5) {
12766
- let lastError;
12767
- for (let attempt = 0; attempt <= maxRetries; attempt++) try {
12768
- return await fn();
12769
- } catch (error) {
12770
- const err = error instanceof Error ? error : new Error(String(error));
12771
- lastError = err;
12772
- if (!(err instanceof APICallError && err.isRetryable && attempt < maxRetries)) throw err;
12773
- const delayMs = 1e3 * 2 ** attempt + Math.round(Math.random() * 500);
12774
- onRetry?.({
12775
- attempt: attempt + 1,
12776
- maxRetries,
12777
- delayMs,
12778
- statusCode: err.statusCode
12779
- });
12780
- await new Promise((resolve) => setTimeout(resolve, delayMs));
12781
- }
12782
- throw lastError ?? /* @__PURE__ */ new Error("Retry failed after all attempts");
12793
+ return pRetry(async () => fn(), {
12794
+ retries: maxRetries,
12795
+ factor: 2,
12796
+ minTimeout: 1e3,
12797
+ randomize: true,
12798
+ onFailedAttempt({ error, attemptNumber, retriesLeft }) {
12799
+ if (!(error instanceof APICallError) || !error.isRetryable || retriesLeft <= 0) return;
12800
+ const baseDelayMs = 1e3 * 2 ** (attemptNumber - 1);
12801
+ onRetry?.({
12802
+ attempt: attemptNumber,
12803
+ maxRetries,
12804
+ delayMs: baseDelayMs,
12805
+ statusCode: error.statusCode
12806
+ });
12807
+ },
12808
+ shouldRetry({ error }) {
12809
+ return error instanceof APICallError && error.isRetryable;
12810
+ }
12811
+ });
12783
12812
  }
12784
12813
 
12785
12814
  //#endregion
12786
12815
  //#region src/core/ai-extraction/json-utils.ts
12787
- function stripFences(text) {
12788
- const trimmed = text.trim();
12816
+ function parseJsonLike(text$1) {
12817
+ const trimmed = text$1.trim();
12818
+ try {
12819
+ return JSON.parse(trimmed);
12820
+ } catch {
12821
+ if (!trimmed.startsWith("{") && !trimmed.startsWith("[")) throw new SyntaxError("JSON candidate must start with an object or array");
12822
+ return JSON.parse(jsonrepair(trimmed));
12823
+ }
12824
+ }
12825
+ function stripFences(text$1) {
12826
+ const trimmed = text$1.trim();
12789
12827
  if (!trimmed.startsWith("```")) return null;
12790
12828
  const endIndex = trimmed.lastIndexOf("```");
12791
12829
  if (endIndex <= 3) return null;
@@ -12794,8 +12832,8 @@ function stripFences(text) {
12794
12832
  if (firstNewline === -1) return null;
12795
12833
  return inside.slice(firstNewline + 1).trim();
12796
12834
  }
12797
- function extractFirstJSON(text) {
12798
- const trimmed = text.trim();
12835
+ function extractFirstJSON(text$1) {
12836
+ const trimmed = text$1.trim();
12799
12837
  const firstBrace = trimmed.indexOf("{");
12800
12838
  const firstBracket = trimmed.indexOf("[");
12801
12839
  let start = -1;
@@ -12806,20 +12844,20 @@ function extractFirstJSON(text) {
12806
12844
  if (end <= start) return null;
12807
12845
  return trimmed.slice(start, end);
12808
12846
  }
12809
- function safeParseJSON(text) {
12810
- const cleaned = text.trim();
12847
+ function safeParseJSON(text$1) {
12848
+ const cleaned = text$1.trim();
12811
12849
  try {
12812
12850
  return JSON.parse(cleaned);
12813
12851
  } catch {}
12814
12852
  const fromFence = stripFences(cleaned);
12815
12853
  if (fromFence) try {
12816
- return JSON.parse(fromFence);
12854
+ return parseJsonLike(fromFence);
12817
12855
  } catch {}
12818
12856
  const extracted = extractFirstJSON(cleaned);
12819
12857
  if (extracted) try {
12820
- return JSON.parse(extracted);
12858
+ return parseJsonLike(extracted);
12821
12859
  } catch {}
12822
- const truncated = text.length > 200 ? `${text.slice(0, 200)}...` : text;
12860
+ const truncated = text$1.length > 200 ? `${text$1.slice(0, 200)}...` : text$1;
12823
12861
  throw new Error(`Failed to parse JSON from model output. Expected a valid JSON object or array but received unparseable text. Raw output: ${truncated}`);
12824
12862
  }
12825
12863
 
@@ -12907,11 +12945,11 @@ function schemaToDescription(schema) {
12907
12945
  }
12908
12946
  return lines.join("\n");
12909
12947
  }
12910
- function generateExtractionPrompt(schema, text, promptConfig = DEFAULT_PROMPT_CONFIG) {
12948
+ function generateExtractionPrompt(schema, text$1, promptConfig = DEFAULT_PROMPT_CONFIG) {
12911
12949
  const schemaDescription = schemaToDescription(schema);
12912
12950
  return {
12913
12951
  system: promptConfig.systemTemplate.replaceAll(PLACEHOLDER_SCHEMA, schemaDescription),
12914
- user: promptConfig.userTemplate.replaceAll(PLACEHOLDER_TEXT, text)
12952
+ user: promptConfig.userTemplate.replaceAll(PLACEHOLDER_TEXT, text$1)
12915
12953
  };
12916
12954
  }
12917
12955
  function generatePromptSnapshot(schema, promptConfig = DEFAULT_PROMPT_CONFIG) {
@@ -12934,39 +12972,40 @@ function generatePromptSnapshot(schema, promptConfig = DEFAULT_PROMPT_CONFIG) {
12934
12972
 
12935
12973
  //#endregion
12936
12974
  //#region src/core/ai-extraction/extractor.ts
12975
+ let langfuseInitialized = false;
12976
+ function initLangfuse(config) {
12977
+ if (!config.langfuse?.publicKey || !config.langfuse.secretKey) return;
12978
+ if (langfuseInitialized) return;
12979
+ langfuseInitialized = true;
12980
+ try {
12981
+ new NodeTracerProvider({ spanProcessors: [new LangfuseSpanProcessor({
12982
+ publicKey: config.langfuse.publicKey,
12983
+ secretKey: config.langfuse.secretKey,
12984
+ baseUrl: config.langfuse.host || "https://us.cloud.langfuse.com",
12985
+ exportMode: "immediate"
12986
+ })] }).register();
12987
+ } catch (e) {
12988
+ console.warn("[Langfuse] Failed to initialize tracing:", e instanceof Error ? e.message : e);
12989
+ }
12990
+ }
12937
12991
  const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
12938
12992
  const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
12939
- const MIME_TYPES = {
12940
- png: "image/png",
12941
- jpg: "image/jpeg",
12942
- jpeg: "image/jpeg",
12943
- gif: "image/gif",
12944
- webp: "image/webp",
12945
- bmp: "image/bmp",
12946
- svg: "image/svg+xml",
12947
- pdf: "application/pdf",
12948
- txt: "text/plain",
12949
- csv: "text/csv",
12950
- json: "application/json",
12951
- md: "text/markdown",
12952
- html: "text/html"
12953
- };
12954
12993
  function detectMimeType(filePath) {
12955
- return MIME_TYPES[path.extname(filePath).toLowerCase().replace(".", "")] || "application/octet-stream";
12994
+ return mime.getType(filePath) ?? "application/octet-stream";
12956
12995
  }
12957
12996
  async function readFilePart(filePath) {
12958
- const mime = detectMimeType(filePath);
12997
+ const mime$1 = detectMimeType(filePath);
12959
12998
  const buffer = await fs.readFile(filePath);
12960
12999
  const name$1 = path.basename(filePath);
12961
- if (mime.startsWith("image/")) return {
13000
+ if (mime$1.startsWith("image/")) return {
12962
13001
  type: "image",
12963
13002
  image: buffer,
12964
- mimeType: mime
13003
+ mimeType: mime$1
12965
13004
  };
12966
13005
  return {
12967
13006
  type: "file",
12968
13007
  data: buffer,
12969
- mediaType: mime,
13008
+ mediaType: mime$1,
12970
13009
  filename: name$1
12971
13010
  };
12972
13011
  }
@@ -13078,25 +13117,35 @@ async function loadPromptSnapshot(aiexDir, tableName) {
13078
13117
  return null;
13079
13118
  }
13080
13119
  async function extractStructuredData(input) {
13081
- const { config, schema, text, aiexDir, file, modelOverride } = input;
13120
+ const { config, schema, text: text$1, aiexDir, file, modelOverride } = input;
13082
13121
  if (!config.provider.apiKey) return {
13083
13122
  success: false,
13084
13123
  error: "API Key not configured. Please configure AI settings in the web UI."
13085
13124
  };
13086
13125
  const useFileContent = !!file;
13087
13126
  const isImageFile = useFileContent && detectMimeType(file).startsWith("image/");
13088
- const inputTokens = text ? Math.ceil(text.length / 2) : void 0;
13127
+ const inputTokens = text$1 ? Math.ceil(text$1.length / 2) : void 0;
13089
13128
  const fieldCount = schema.properties ? Object.keys(schema.properties).length : 0;
13090
13129
  const outputTokens = fieldCount > 0 ? fieldCount * 80 : void 0;
13091
- const selected = modelOverride ?? selectModel({
13092
- models: config.provider.models,
13093
- isImage: isImageFile,
13094
- fileName: file,
13095
- inputTokens,
13096
- outputTokens
13097
- });
13130
+ let selected;
13131
+ try {
13132
+ selected = modelOverride ?? selectModel({
13133
+ models: config.provider.models,
13134
+ isImage: isImageFile,
13135
+ fileName: file,
13136
+ inputTokens,
13137
+ outputTokens
13138
+ });
13139
+ } catch (e) {
13140
+ return {
13141
+ success: false,
13142
+ error: e.message
13143
+ };
13144
+ }
13098
13145
  const useStructuredOutput = selected.capabilities.structuredOutput;
13146
+ const useTelemetry = !!(config.langfuse?.publicKey && config.langfuse.secretKey);
13099
13147
  try {
13148
+ if (useTelemetry) initLangfuse(config);
13100
13149
  const provider = createOpenAICompatible({
13101
13150
  baseURL: config.provider.baseURL,
13102
13151
  name: "qwen",
@@ -13105,7 +13154,7 @@ async function extractStructuredData(input) {
13105
13154
  let system;
13106
13155
  let user;
13107
13156
  const snapshot = await loadPromptSnapshot(aiexDir, schema.table.name);
13108
- const promptText = file ? PLACEHOLDER_TEXT : text;
13157
+ const promptText = file ? PLACEHOLDER_TEXT : text$1;
13109
13158
  if (snapshot) {
13110
13159
  system = snapshot.system;
13111
13160
  user = snapshot.user.replaceAll(PLACEHOLDER_TEXT, promptText);
@@ -13116,12 +13165,13 @@ async function extractStructuredData(input) {
13116
13165
  }
13117
13166
  const outputSchema = jsonSchema(schemaToExtractionOutputSchema(schema));
13118
13167
  let result;
13168
+ const timeoutMs = (config.provider.timeout ?? 300) * 1e3;
13119
13169
  if (useFileContent) {
13120
13170
  const filePart = await readFilePart(file);
13121
13171
  const fileName = filePart.type === "file" ? filePart.filename : path.basename(file);
13122
13172
  const contentParts = [{
13123
13173
  type: "text",
13124
- text: user.includes(PLACEHOLDER_TEXT) ? user.replaceAll(PLACEHOLDER_TEXT, text || `Data is contained in the attached file: ${fileName}`) : user
13174
+ text: user.includes(PLACEHOLDER_TEXT) ? user.replaceAll(PLACEHOLDER_TEXT, text$1 || `Data is contained in the attached file: ${fileName}`) : user
13125
13175
  }, filePart];
13126
13176
  const fileOpts = {
13127
13177
  model: provider.chatModel(selected.name),
@@ -13130,8 +13180,9 @@ async function extractStructuredData(input) {
13130
13180
  role: "user",
13131
13181
  content: contentParts
13132
13182
  }],
13133
- abortSignal: AbortSignal.timeout(12e4),
13134
- maxRetries: 0
13183
+ abortSignal: AbortSignal.timeout(timeoutMs),
13184
+ maxRetries: 0,
13185
+ experimental_telemetry: { isEnabled: useTelemetry }
13135
13186
  };
13136
13187
  if (useStructuredOutput) fileOpts.output = Output.object({ schema: outputSchema });
13137
13188
  result = await withRetry(() => generateText(fileOpts), input.onRetry);
@@ -13140,8 +13191,9 @@ async function extractStructuredData(input) {
13140
13191
  model: provider.chatModel(selected.name),
13141
13192
  system,
13142
13193
  prompt: user,
13143
- abortSignal: AbortSignal.timeout(6e4),
13144
- maxRetries: 0
13194
+ abortSignal: AbortSignal.timeout(timeoutMs),
13195
+ maxRetries: 0,
13196
+ experimental_telemetry: { isEnabled: useTelemetry }
13145
13197
  };
13146
13198
  if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
13147
13199
  result = await withRetry(() => generateText(textOpts), input.onRetry);
@@ -13159,7 +13211,10 @@ async function extractStructuredData(input) {
13159
13211
  const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
13160
13212
  const outputFileName = `${schema.table.name}-${timestamp}.json`;
13161
13213
  const outputPath = path.join(outputDir, outputFileName);
13162
- await fs.writeFile(outputPath, `${JSON.stringify(data, null, 2)}\n`);
13214
+ await writeFile(outputPath, data, {
13215
+ spaces: 2,
13216
+ EOL: "\n"
13217
+ });
13163
13218
  return {
13164
13219
  success: true,
13165
13220
  outputPath,
@@ -13224,8 +13279,8 @@ function buildInsertSql(table, data) {
13224
13279
  function insertTableRow({ db, table, data, parentRowId, foreignKeyColumn }) {
13225
13280
  const rowData = { ...data };
13226
13281
  if (parentRowId !== void 0 && foreignKeyColumn) rowData[foreignKeyColumn] = parentRowId;
13227
- const { sql, values } = buildInsertSql(table, rowData);
13228
- const info = db.prepare(sql).run(...values);
13282
+ const { sql: sql$1, values } = buildInsertSql(table, rowData);
13283
+ const info = db.prepare(sql$1).run(...values);
13229
13284
  return Number(info.lastInsertRowid);
13230
13285
  }
13231
13286
  function parseDataByColumns(data, schema, table) {
@@ -13328,7 +13383,280 @@ async function savePromptSnapshot(schema, aiexDir) {
13328
13383
  }
13329
13384
 
13330
13385
  //#endregion
13331
- //#region src/commands/extract.ts
13386
+ //#region src/core/extraction-audit.ts
13387
+ const AUDIT_ID_RE = /^[\w.-]+$/;
13388
+ const STALE_AFTER_MS = 1800 * 1e3;
13389
+ function auditDir(aiexDir) {
13390
+ return path.join(aiexDir, "extracted", "_audit");
13391
+ }
13392
+ function auditPath(aiexDir, id) {
13393
+ return path.join(auditDir(aiexDir), `${id}.json`);
13394
+ }
13395
+ function createAuditId(schemaName) {
13396
+ return `${schemaName}-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2, 8)}`;
13397
+ }
13398
+ async function createExtractionAuditRecord(aiexDir, input) {
13399
+ const now = (/* @__PURE__ */ new Date()).toISOString();
13400
+ const record = {
13401
+ id: createAuditId(input.schemaName),
13402
+ status: "running",
13403
+ schemaName: input.schemaName,
13404
+ modelName: input.modelName || void 0,
13405
+ source: input.source,
13406
+ retryOf: input.retryOf,
13407
+ createdAt: now,
13408
+ updatedAt: now
13409
+ };
13410
+ await fs.mkdir(auditDir(aiexDir), { recursive: true });
13411
+ await writeFile(auditPath(aiexDir, record.id), record, {
13412
+ spaces: 2,
13413
+ EOL: "\n"
13414
+ });
13415
+ return record;
13416
+ }
13417
+ async function updateExtractionAuditRecord(aiexDir, id, patch) {
13418
+ const current = await readExtractionAuditRecord(aiexDir, id);
13419
+ if (!current) throw new Error(`Extraction audit record not found: ${id}`);
13420
+ const record = {
13421
+ ...current,
13422
+ ...patch,
13423
+ source: patch.source ?? current.source,
13424
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
13425
+ };
13426
+ await fs.mkdir(auditDir(aiexDir), { recursive: true });
13427
+ await writeFile(auditPath(aiexDir, id), record, {
13428
+ spaces: 2,
13429
+ EOL: "\n"
13430
+ });
13431
+ return record;
13432
+ }
13433
+ async function readExtractionAuditRecord(aiexDir, id) {
13434
+ if (!AUDIT_ID_RE.test(id)) return null;
13435
+ try {
13436
+ return await readFile(auditPath(aiexDir, id));
13437
+ } catch {
13438
+ return null;
13439
+ }
13440
+ }
13441
+ function isStale(record) {
13442
+ if (record.status !== "running") return false;
13443
+ const updated = Date.parse(record.updatedAt);
13444
+ return !Number.isNaN(updated) && Date.now() - updated > STALE_AFTER_MS;
13445
+ }
13446
+ async function markStaleIfNeeded(aiexDir, record) {
13447
+ if (!isStale(record)) return record;
13448
+ const staleRecord = {
13449
+ ...record,
13450
+ status: "stale",
13451
+ error: record.error ?? "Extraction did not finish. It may have been interrupted.",
13452
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
13453
+ };
13454
+ await writeFile(auditPath(aiexDir, staleRecord.id), staleRecord, {
13455
+ spaces: 2,
13456
+ EOL: "\n"
13457
+ });
13458
+ return staleRecord;
13459
+ }
13460
+ async function listExtractionAuditRecords(aiexDir) {
13461
+ try {
13462
+ const dir = auditDir(aiexDir);
13463
+ const files = await fs.readdir(dir);
13464
+ return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13465
+ try {
13466
+ return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
13467
+ } catch {
13468
+ return null;
13469
+ }
13470
+ }))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
13471
+ } catch {
13472
+ return [];
13473
+ }
13474
+ }
13475
+ function isPathInside(childPath, parentPath) {
13476
+ const relative = path.relative(path.resolve(parentPath), path.resolve(childPath));
13477
+ return !!relative && !relative.startsWith("..") && !path.isAbsolute(relative);
13478
+ }
13479
+ async function deleteExtractionAuditRecord(aiexDir, id) {
13480
+ const record = await readExtractionAuditRecord(aiexDir, id);
13481
+ if (!record) return false;
13482
+ const uploadsDir = path.join(aiexDir, "uploads");
13483
+ if (record.source.type === "file" && record.source.filePath && isPathInside(record.source.filePath, uploadsDir)) await fs.unlink(record.source.filePath).catch(() => {});
13484
+ const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
13485
+ await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
13486
+ await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
13487
+ return true;
13488
+ }
13489
+
13490
+ //#endregion
13491
+ //#region src/core/pdf-converter/external.ts
13492
+ function applyTemplate(value, context) {
13493
+ return value.replaceAll("{input}", context.input).replaceAll("{outputDir}", context.outputDir).replaceAll("{basename}", context.basename);
13494
+ }
13495
+ function isError(error) {
13496
+ return error instanceof Error;
13497
+ }
13498
+ async function pathExists(filePath) {
13499
+ try {
13500
+ await fs.access(filePath);
13501
+ return true;
13502
+ } catch {
13503
+ return false;
13504
+ }
13505
+ }
13506
+ async function collectMarkdownFiles(dir) {
13507
+ return (await glob("**/*.md", {
13508
+ cwd: dir,
13509
+ absolute: true,
13510
+ onlyFiles: true
13511
+ })).sort();
13512
+ }
13513
+ async function selectMarkdownFile(outputDir, basename) {
13514
+ const files = await collectMarkdownFiles(outputDir);
13515
+ if (files.length === 0) throw new Error(`External PDF converter did not produce a markdown file in ${outputDir}`);
13516
+ const preferredName = `${basename}.md`.toLowerCase();
13517
+ return files.find((file) => path.basename(file).toLowerCase() === preferredName) ?? files[0];
13518
+ }
13519
+ function formatCommandError(error, command$1) {
13520
+ if (!isError(error)) return new Error(String(error));
13521
+ const details = [`External PDF converter failed: ${command$1}`];
13522
+ if ("exitCode" in error && typeof error.exitCode === "number") details.push(`exitCode=${error.exitCode}`);
13523
+ if ("signal" in error && error.signal) details.push(`signal=${String(error.signal)}`);
13524
+ if ("stderr" in error && typeof error.stderr === "string" && error.stderr.trim()) details.push(error.stderr.trim());
13525
+ else if (error.message) details.push(error.message);
13526
+ return new Error(details.join("\n"));
13527
+ }
13528
+ async function countPdfPages(input) {
13529
+ try {
13530
+ return (await getDocumentProxy(input)).numPages;
13531
+ } catch {
13532
+ return 0;
13533
+ }
13534
+ }
13535
+ var ExternalCommandPdfConverter = class {
13536
+ name;
13537
+ constructor(name$1, config) {
13538
+ this.config = config;
13539
+ this.name = name$1;
13540
+ }
13541
+ async convert(input, filePath) {
13542
+ const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), `aiex-${this.name}-`));
13543
+ const outputDir = path.join(tempRoot, "output");
13544
+ await fs.mkdir(outputDir, { recursive: true });
13545
+ const inputPath = filePath ?? path.join(tempRoot, "input.pdf");
13546
+ if (!filePath) await fs.writeFile(inputPath, input);
13547
+ const pageCount = await countPdfPages(input);
13548
+ const basename = path.basename(inputPath, path.extname(inputPath));
13549
+ const context = {
13550
+ input: inputPath,
13551
+ outputDir,
13552
+ basename
13553
+ };
13554
+ const args = this.config.args.map((arg) => applyTemplate(arg, context));
13555
+ const timeoutMs = (this.config.timeout ?? 600) * 1e3;
13556
+ try {
13557
+ await execa(this.config.command, args, {
13558
+ shell: false,
13559
+ timeout: timeoutMs,
13560
+ maxBuffer: 1024 * 1024 * 20
13561
+ });
13562
+ const outputPath = this.config.outputFile ? applyTemplate(this.config.outputFile, context) : await selectMarkdownFile(outputDir, basename);
13563
+ if (!await pathExists(outputPath)) throw new Error(`External PDF converter output was not found: ${outputPath}`);
13564
+ return {
13565
+ text: await fs.readFile(outputPath, "utf-8"),
13566
+ pageCount,
13567
+ metadata: {
13568
+ converter: this.name,
13569
+ outputPath,
13570
+ ...this.config.keepOutput ? { outputDir } : {}
13571
+ }
13572
+ };
13573
+ } catch (error) {
13574
+ throw formatCommandError(error, `${this.config.command} ${args.join(" ")}`);
13575
+ } finally {
13576
+ if (!this.config.keepOutput) await fs.rm(tempRoot, {
13577
+ recursive: true,
13578
+ force: true
13579
+ }).catch(() => {});
13580
+ }
13581
+ }
13582
+ };
13583
+
13584
+ //#endregion
13585
+ //#region src/core/pdf-converter/unpdf.ts
13586
+ var UnpdfConverter = class {
13587
+ name = "unpdf";
13588
+ async convert(input) {
13589
+ const data = Buffer.isBuffer(input) ? new Uint8Array(input) : input;
13590
+ const [textResult, meta] = await Promise.all([extractText(data, { mergePages: true }), getMeta(data).catch(() => null)]);
13591
+ return {
13592
+ text: textResult.text,
13593
+ pageCount: textResult.totalPages,
13594
+ metadata: {
13595
+ converter: this.name,
13596
+ ...meta?.info ? Object.fromEntries(Object.entries(meta.info).map(([k, v]) => [k, String(v)])) : {}
13597
+ }
13598
+ };
13599
+ }
13600
+ };
13601
+
13602
+ //#endregion
13603
+ //#region src/core/pdf-converter/factory.ts
13604
+ const registry = /* @__PURE__ */ new Map();
13605
+ var FallbackPdfConverter = class {
13606
+ name;
13607
+ constructor(primary, fallback) {
13608
+ this.primary = primary;
13609
+ this.fallback = fallback;
13610
+ this.name = primary.name;
13611
+ }
13612
+ async convert(input, filePath) {
13613
+ try {
13614
+ return await this.primary.convert(input, filePath);
13615
+ } catch (err) {
13616
+ consola.warn(`${this.primary.name} failed: ${err instanceof Error ? err.message : String(err)}`);
13617
+ consola.info(`Falling back to ${this.fallback.name}`);
13618
+ const result = await this.fallback.convert(input, filePath);
13619
+ return {
13620
+ ...result,
13621
+ metadata: {
13622
+ ...result.metadata,
13623
+ fallback: "true"
13624
+ }
13625
+ };
13626
+ }
13627
+ }
13628
+ };
13629
+ function withFallback(converter, config) {
13630
+ if (!config.fallbackToUnpdf) return converter;
13631
+ return new FallbackPdfConverter(converter, new UnpdfConverter());
13632
+ }
13633
+ function createPdfConverter(config) {
13634
+ if (typeof config === "object") {
13635
+ if (config.converter === "mineru") {
13636
+ const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
13637
+ return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
13638
+ }
13639
+ if (config.converter === "markitdown") {
13640
+ const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
13641
+ return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
13642
+ }
13643
+ if (config.converter === "external") {
13644
+ if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
13645
+ return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
13646
+ }
13647
+ }
13648
+ const key = typeof config === "string" ? config : "unpdf";
13649
+ let instance = registry.get(key);
13650
+ if (!instance) {
13651
+ if (key !== "unpdf") throw new Error(`PDF converter "${key}" requires configuration.`);
13652
+ instance = new UnpdfConverter();
13653
+ registry.set(key, instance);
13654
+ }
13655
+ return instance;
13656
+ }
13657
+
13658
+ //#endregion
13659
+ //#region src/core/extract-runner.ts
13332
13660
  const FILE_PART_EXTENSIONS = new Set([
13333
13661
  "png",
13334
13662
  "jpg",
@@ -13338,11 +13666,21 @@ const FILE_PART_EXTENSIONS = new Set([
13338
13666
  "bmp",
13339
13667
  "svg"
13340
13668
  ]);
13341
- function fail$1(message) {
13342
- if (message) consola.error(message);
13343
- outro("Failed!");
13344
- process.exitCode = 1;
13345
- }
13669
+ const SUPPORTED_EXTENSIONS = new Set([
13670
+ ...FILE_PART_EXTENSIONS,
13671
+ "pdf",
13672
+ "txt",
13673
+ "md",
13674
+ "csv",
13675
+ "json",
13676
+ "html",
13677
+ "xml",
13678
+ "yaml",
13679
+ "yml"
13680
+ ]);
13681
+ const PDF_EXT_RE = /\.pdf$/i;
13682
+ const JSON_EXT_RE$1 = /\.json$/;
13683
+ const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
13346
13684
  async function ensureDatabaseReady(dbPath, schema) {
13347
13685
  try {
13348
13686
  await fs.access(dbPath);
@@ -13362,194 +13700,706 @@ async function ensureDatabaseReady(dbPath, schema) {
13362
13700
  }
13363
13701
  return null;
13364
13702
  }
13365
- const extractCommand = defineCommand({
13366
- meta: {
13367
- name: "extract",
13368
- description: "Extract structured data from text or image files"
13369
- },
13370
- args: {
13371
- schema: {
13372
- type: "string",
13373
- alias: "s",
13374
- description: "Schema name (without .json extension)",
13375
- required: true
13376
- },
13377
- text: {
13378
- type: "string",
13379
- alias: "t",
13380
- description: "Text content to extract"
13381
- },
13382
- file: {
13383
- type: "string",
13384
- alias: "f",
13385
- description: "File path (image) to extract from"
13386
- },
13387
- model: {
13388
- type: "string",
13389
- alias: "m",
13390
- description: "AI model to use for extraction (overrides auto-selection)"
13391
- },
13392
- db: {
13393
- type: "boolean",
13394
- alias: "d",
13395
- description: "Insert extracted data into SQLite database",
13396
- default: false
13397
- }
13398
- },
13399
- async run({ args }) {
13400
- intro(pc.inverse(" aiex extract "));
13401
- const config = createMigrationConfig(process.cwd());
13402
- const aiexDir = path.dirname(config.schemaPath);
13403
- if (!args.text && !args.file) {
13404
- fail$1("Please provide text (-t) or a file (-f) to extract from");
13405
- return;
13406
- }
13407
- if (args.text && args.file) {
13408
- fail$1("-t and -f cannot be used together");
13409
- return;
13410
- }
13411
- const aiConfig = await readAIConfig(aiexDir);
13412
- if (!aiConfig) {
13413
- fail$1("AI configuration not found. Please configure AI settings in the Web interface first");
13414
- return;
13415
- }
13416
- if (!aiConfig.provider.apiKey) {
13417
- fail$1("API Key not configured. Please configure AI settings in the Web interface first");
13418
- return;
13419
- }
13420
- if (!aiConfig.provider.models?.length) {
13421
- fail$1("No models configured. Please add at least one model in AI Settings");
13422
- return;
13423
- }
13424
- let modelOverride;
13425
- if (args.model) {
13426
- const matched = aiConfig.provider.models.find((m) => m.name === args.model);
13427
- if (!matched) {
13428
- const available = aiConfig.provider.models.map((m) => m.name).join(", ");
13429
- fail$1(`Model "${args.model}" not found in configuration. Available models: ${available}`);
13430
- return;
13431
- }
13432
- modelOverride = matched;
13433
- }
13434
- let text = "";
13435
- let filePath;
13436
- if (args.file) {
13437
- const ext = path.extname(args.file).toLowerCase().replace(".", "");
13438
- if (FILE_PART_EXTENSIONS.has(ext)) filePath = args.file;
13439
- else text = await fs.readFile(args.file, "utf-8");
13440
- } else if (args.text) text = args.text;
13441
- const schemaName = args.schema;
13442
- const schemaPath = path.join(config.schemaPath, `${schemaName}.json`);
13443
- let schema;
13703
+ function listSupportedFiles(dir, pattern) {
13704
+ if (!fs$1.statSync(dir).isDirectory()) throw new Error(`Not a directory: ${dir}`);
13705
+ return globSync(pattern ?? SUPPORTED_FILE_PATTERN, {
13706
+ cwd: dir,
13707
+ absolute: true,
13708
+ onlyFiles: true
13709
+ }).filter((file) => {
13710
+ const ext = path.extname(file).toLowerCase().replace(".", "");
13711
+ return SUPPORTED_EXTENSIONS.has(ext);
13712
+ }).sort();
13713
+ }
13714
+ async function loadSchema(config, schemaName) {
13715
+ const schemaPath = path.join(config.schemaPath, `${schemaName}.json`);
13716
+ try {
13717
+ const parsed = await readFile(schemaPath);
13718
+ return { schema: JsonSchemaDefinitionSchema.parse(parsed) };
13719
+ } catch (e) {
13720
+ if (e instanceof ZodError) return {
13721
+ schema: null,
13722
+ error: `Schema validation failed: ${schemaName}.json\n${e.issues.map((i) => ` - ${i.path.join(".")}: ${i.message}`).join("\n")}`
13723
+ };
13724
+ if (e.code === "ENOENT") return {
13725
+ schema: null,
13726
+ error: `Cannot read schema file: ${schemaName}.json`
13727
+ };
13728
+ if (e instanceof SyntaxError) return {
13729
+ schema: null,
13730
+ error: `Invalid JSON in schema file: ${schemaName}.json`
13731
+ };
13732
+ return {
13733
+ schema: null,
13734
+ error: String(e)
13735
+ };
13736
+ }
13737
+ }
13738
+ async function listSchemas(aiexDir) {
13739
+ try {
13740
+ const dir = path.join(aiexDir, "schema");
13741
+ return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
13742
+ } catch {
13743
+ return [];
13744
+ }
13745
+ }
13746
+ async function readExtractFileInput(filePath, aiConfig) {
13747
+ const ext = path.extname(filePath).toLowerCase().replace(".", "");
13748
+ if (FILE_PART_EXTENSIONS.has(ext)) return {
13749
+ text: "",
13750
+ filePath
13751
+ };
13752
+ if (ext === "pdf") {
13753
+ const buffer = await fs.readFile(filePath);
13754
+ const converter = createPdfConverter(aiConfig?.pdf);
13755
+ const result = await converter.convert(buffer, filePath);
13756
+ if (result.metadata?.fallback === "true") consola.info(`Fell back to unpdf — ${result.pageCount} page(s) extracted`);
13757
+ else consola.info(`Converted PDF via ${converter.name}, ${result.pageCount} page(s)`);
13758
+ const mdPath = filePath.replace(PDF_EXT_RE, ".md");
13444
13759
  try {
13445
- const content = await fs.readFile(schemaPath, "utf-8");
13446
- schema = JSON.parse(content);
13760
+ await fs.writeFile(mdPath, result.text);
13761
+ consola.info(`Markdown saved: ${mdPath}`);
13447
13762
  } catch {
13448
- fail$1(`Cannot read schema file: ${schemaName}.json`);
13449
- return;
13763
+ const fallbackMd = path.join(os.tmpdir(), `${path.basename(filePath, ".pdf")}.md`);
13764
+ await fs.writeFile(fallbackMd, result.text);
13765
+ consola.info(`Markdown saved: ${fallbackMd}`);
13450
13766
  }
13451
- try {
13452
- schema = JsonSchemaDefinitionSchema.parse(schema);
13453
- } catch (e) {
13454
- if (e instanceof ZodError) {
13455
- consola.error(`Schema validation failed: ${schemaName}.json`);
13456
- for (const issue of e.issues) consola.error(` - ${issue.path.join(".")}: ${issue.message}`);
13457
- }
13458
- fail$1();
13459
- return;
13767
+ return { text: result.text };
13768
+ }
13769
+ return { text: await fs.readFile(filePath, "utf-8") };
13770
+ }
13771
+ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, options) {
13772
+ const schemaLoad = await loadSchema(config, schemaName);
13773
+ if (!schemaLoad.schema) {
13774
+ if (!options?.quiet) consola.error(schemaLoad.error);
13775
+ return {
13776
+ success: false,
13777
+ error: schemaLoad.error
13778
+ };
13779
+ }
13780
+ const s = spinner();
13781
+ if (!options?.quiet) s.start(filePath ? `Extracting from ${path.basename(filePath)}...` : "Extracting data...");
13782
+ const result = await extractStructuredData({
13783
+ config: aiConfig,
13784
+ schema: schemaLoad.schema,
13785
+ text: text$1 ?? "",
13786
+ aiexDir,
13787
+ file: filePath,
13788
+ modelOverride,
13789
+ onRetry(info) {
13790
+ if (!options?.quiet) s.message(`API responded with ${info.statusCode}, retrying in ${info.delayMs / 1e3}s (${info.attempt}/${info.maxRetries})...`);
13460
13791
  }
13461
- const s = spinner();
13462
- s.start(filePath ? "Extracting data from image..." : "Extracting data...");
13463
- const result = await extractStructuredData({
13464
- config: aiConfig,
13465
- schema,
13466
- text,
13467
- aiexDir,
13468
- file: filePath,
13469
- modelOverride,
13470
- onRetry(info) {
13471
- s.message(`API responded with ${info.statusCode}, retrying in ${info.delayMs / 1e3}s (${info.attempt}/${info.maxRetries})...`);
13472
- }
13473
- });
13474
- if (!result.success) {
13792
+ });
13793
+ if (!result.success) {
13794
+ if (!options?.quiet) {
13475
13795
  s.stop("Extraction failed");
13476
- fail$1(result.error || "Unknown error");
13477
- return;
13796
+ consola.error(result.error || "Unknown error");
13478
13797
  }
13479
- s.stop("Extraction complete");
13480
- if (result.outputPath) consola.success(`Result saved: ${pc.cyan(result.outputPath)}`);
13481
- if (result.tokensUsed) consola.info(pc.gray(`Token usage: prompt=${result.tokensUsed.prompt}, completion=${result.tokensUsed.completion}, total=${result.tokensUsed.total}`));
13482
- if (args.db && result.data) {
13483
- const s2 = spinner();
13484
- s2.start("Inserting into database...");
13485
- const dbError = await ensureDatabaseReady(config.databasePath, schema);
13486
- if (dbError) {
13487
- s2.stop("Database not ready");
13488
- fail$1(dbError);
13489
- return;
13490
- }
13798
+ return {
13799
+ success: false,
13800
+ error: result.error || "Unknown error"
13801
+ };
13802
+ }
13803
+ if (!options?.quiet) s.stop("Extraction complete");
13804
+ if (result.outputPath && !options?.quiet) consola.success(`Result saved: ${pc.cyan(result.outputPath)}`);
13805
+ if (result.tokensUsed && !options?.quiet) consola.info(pc.gray(`Token usage: prompt=${result.tokensUsed.prompt}, completion=${result.tokensUsed.completion}, total=${result.tokensUsed.total}`));
13806
+ if (result.data && options?.insert !== false) {
13807
+ const s2 = spinner();
13808
+ if (!options?.quiet) s2.start("Inserting into database...");
13809
+ const dbError = await ensureDatabaseReady(config.databasePath, schemaLoad.schema);
13810
+ if (dbError) {
13811
+ if (!options?.quiet) s2.stop("Database not ready");
13812
+ consola.error(dbError);
13813
+ return {
13814
+ success: false,
13815
+ error: dbError
13816
+ };
13817
+ }
13818
+ try {
13819
+ const db = new Database(config.databasePath);
13491
13820
  try {
13492
- const db = new Database(config.databasePath);
13493
- try {
13494
- const insertResult = insertExtractedData(db, schema, result.data);
13495
- if (insertResult.success) s2.stop(`Inserted into ${insertResult.tablesInserted.length} table(s)`);
13496
- else {
13497
- s2.stop("Database insert failed");
13498
- fail$1(insertResult.error || "Unknown error");
13499
- return;
13500
- }
13501
- } finally {
13502
- db.close();
13821
+ const insertResult = insertExtractedData(db, schemaLoad.schema, result.data);
13822
+ if (insertResult.success) {
13823
+ if (!options?.quiet) s2.stop(`Inserted into ${insertResult.tablesInserted.length} table(s)`);
13824
+ return {
13825
+ success: true,
13826
+ outputPath: result.outputPath,
13827
+ data: result.data,
13828
+ tablesInserted: insertResult.tablesInserted,
13829
+ tokensUsed: result.tokensUsed
13830
+ };
13831
+ } else {
13832
+ if (!options?.quiet) s2.stop("Database insert failed");
13833
+ consola.error(insertResult.error || "Unknown error");
13834
+ return {
13835
+ success: false,
13836
+ error: insertResult.error
13837
+ };
13503
13838
  }
13504
- } catch (e) {
13505
- s2.stop("Database insert failed");
13506
- fail$1(e instanceof Error ? e.message : String(e));
13507
- return;
13839
+ } finally {
13840
+ db.close();
13508
13841
  }
13842
+ } catch (e) {
13843
+ if (!options?.quiet) s2.stop("Database insert failed");
13844
+ consola.error(e instanceof Error ? e.message : String(e));
13845
+ return {
13846
+ success: false,
13847
+ error: String(e)
13848
+ };
13509
13849
  }
13510
- outro("Done!");
13511
- }
13512
- });
13513
-
13514
- //#endregion
13515
- //#region schemas/table-schema.json
13516
- var $id = "https://raw.githubusercontent.com/OSpoon/aiex-cli/main/app/cli/schemas/table-schema.json";
13517
-
13518
- //#endregion
13519
- //#region src/commands/schema.ts
13520
- const execFileAsync$1 = promisify(execFile);
13521
- function fail(message) {
13522
- if (message) consola.error(message);
13523
- outro("Failed!");
13524
- process.exitCode = 1;
13525
- }
13526
- async function writeJsonIfAbsent(filePath, data) {
13527
- try {
13528
- await fs.writeFile(filePath, `${JSON.stringify(data, null, 2)}\n`, { flag: "wx" });
13529
- return "created";
13530
- } catch (error) {
13531
- if (error.code === "EEXIST") return "skipped";
13532
- throw error;
13533
13850
  }
13851
+ return {
13852
+ success: true,
13853
+ outputPath: result.outputPath,
13854
+ data: result.data,
13855
+ tokensUsed: result.tokensUsed
13856
+ };
13534
13857
  }
13535
- async function generateFromFiles(schemaFiles, config) {
13536
- const result = parseAllSchemas(await Promise.all(schemaFiles.map(async (filePath) => {
13537
- return {
13858
+ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
13859
+ const audit = await createExtractionAuditRecord(aiexDir, {
13860
+ schemaName,
13861
+ modelName: modelOverride?.name,
13862
+ source: {
13863
+ type: "file",
13538
13864
  filePath,
13539
- content: await fs.readFile(filePath, "utf-8")
13540
- };
13541
- })));
13865
+ fileName: path.basename(filePath)
13866
+ }
13867
+ });
13868
+ try {
13869
+ const input = await readExtractFileInput(filePath, aiConfig);
13870
+ const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
13871
+ quiet: false,
13872
+ insert: options?.insert
13873
+ });
13874
+ if (r.success) {
13875
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
13876
+ status: "succeeded",
13877
+ outputPath: r.outputPath,
13878
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
13879
+ tablesInserted: r.tablesInserted,
13880
+ tokensUsed: r.tokensUsed
13881
+ });
13882
+ consola.success(`Processed: ${path.basename(filePath)}`);
13883
+ return true;
13884
+ } else {
13885
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
13886
+ status: "failed",
13887
+ error: r.error || "Extraction failed"
13888
+ });
13889
+ consola.error(`Failed: ${r.error}`);
13890
+ return false;
13891
+ }
13892
+ } catch (e) {
13893
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
13894
+ status: "failed",
13895
+ error: e instanceof Error ? e.message : String(e)
13896
+ });
13897
+ consola.error(`Error processing ${path.basename(filePath)}: ${e instanceof Error ? e.message : String(e)}`);
13898
+ return false;
13899
+ }
13900
+ }
13901
+ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
13902
+ consola.info(`Scanning ${pc.cyan(dir)} for supported files...`);
13903
+ let files;
13904
+ try {
13905
+ files = listSupportedFiles(dir, globPattern);
13906
+ } catch {
13907
+ return {
13908
+ ok: false,
13909
+ successCount: 0,
13910
+ failCount: 0,
13911
+ error: `Cannot read directory: ${dir}`
13912
+ };
13913
+ }
13914
+ if (files.length === 0) return {
13915
+ ok: false,
13916
+ successCount: 0,
13917
+ failCount: 0,
13918
+ error: `No supported files found in ${dir}`
13919
+ };
13920
+ consola.info(`Found ${files.length} file(s) to process`);
13921
+ let successCount = 0;
13922
+ let failCount = 0;
13923
+ for (let i = 0; i < files.length; i++) {
13924
+ const file = files[i];
13925
+ consola.info(`\n[${i + 1}/${files.length}] Processing: ${pc.cyan(path.basename(file))}`);
13926
+ if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, options)) successCount++;
13927
+ else failCount++;
13928
+ }
13929
+ consola.info(`\nBatch complete: ${pc.green(`${successCount} succeeded`)}, ${pc.red(`${failCount} failed`)}, ${files.length} total`);
13930
+ return {
13931
+ ok: true,
13932
+ successCount,
13933
+ failCount
13934
+ };
13935
+ }
13936
+
13937
+ //#endregion
13938
+ //#region src/commands/extract.ts
13939
+ function getIdArg(args) {
13940
+ if (typeof args.id === "string") return args.id;
13941
+ const positional = args._;
13942
+ return Array.isArray(positional) && typeof positional[0] === "string" ? positional[0] : "";
13943
+ }
13944
+ function formatSource(source) {
13945
+ return source.type === "file" ? source.fileName || "file" : "text";
13946
+ }
13947
+ async function loadConfiguredAI(aiexDir) {
13948
+ const aiConfig = await readAIConfig(aiexDir);
13949
+ if (!aiConfig) {
13950
+ failCommand("AI configuration not found. Please run \"aiex web\" to configure AI settings first");
13951
+ return null;
13952
+ }
13953
+ if (!aiConfig.provider.apiKey) {
13954
+ failCommand("API Key not configured. Please configure AI settings in the Web interface first");
13955
+ return null;
13956
+ }
13957
+ if (!aiConfig.provider.models?.length) {
13958
+ failCommand("No models configured. Please add at least one model in AI Settings");
13959
+ return null;
13960
+ }
13961
+ return aiConfig;
13962
+ }
13963
+ function resolveModelOverride(aiConfig, modelName) {
13964
+ if (!modelName) return void 0;
13965
+ const matched = aiConfig.provider.models.find((m) => m.name === modelName);
13966
+ if (!matched) {
13967
+ failCommand(`Model "${modelName}" not found in configuration. Available models: ${aiConfig.provider.models.map((m) => m.name).join(", ")}`);
13968
+ return null;
13969
+ }
13970
+ return matched;
13971
+ }
13972
+ async function runAuditedSingleExtraction(input) {
13973
+ const audit = await createExtractionAuditRecord(input.aiexDir, {
13974
+ schemaName: input.schemaName,
13975
+ modelName: input.modelOverride?.name,
13976
+ source: input.source,
13977
+ retryOf: input.retryOf
13978
+ });
13979
+ const result = await extractSingle(input.aiexDir, input.config, input.aiConfig, input.schemaName, input.text, input.filePath, input.modelOverride, { insert: input.insert });
13542
13980
  if (!result.success) {
13543
- consola.error(result.error);
13981
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
13982
+ status: "failed",
13983
+ error: result.error || "Extraction failed"
13984
+ });
13544
13985
  return false;
13545
13986
  }
13546
- for (const warning of result.data.warnings) consola.warn(warning);
13547
- await fs.mkdir(path.dirname(config.drizzleSchemaPath), { recursive: true });
13548
- await fs.writeFile(config.drizzleSchemaPath, result.data.drizzleCode);
13549
- consola.success(`Generated ${pc.cyan(".aiex/drizzle/schema.ts")} from ${schemaFiles.length} schema file(s)`);
13987
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
13988
+ status: "succeeded",
13989
+ outputPath: result.outputPath,
13990
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
13991
+ tablesInserted: result.tablesInserted,
13992
+ tokensUsed: result.tokensUsed
13993
+ });
13550
13994
  return true;
13551
13995
  }
13552
- async function migrate(config, migrationName) {
13996
+ const historyCommand = defineCommand({
13997
+ meta: {
13998
+ name: "history",
13999
+ description: "List extraction audit records"
14000
+ },
14001
+ async run() {
14002
+ const config = createMigrationConfig(process.cwd());
14003
+ const records = await listExtractionAuditRecords(path.dirname(config.schemaPath));
14004
+ if (records.length === 0) {
14005
+ consola.info("No extraction history found");
14006
+ return;
14007
+ }
14008
+ for (const record of records) {
14009
+ const suffix = record.error ? ` — ${record.error}` : record.outputName ? ` — ${record.outputName}` : "";
14010
+ consola.info(`${record.status.padEnd(9)} ${record.id} ${record.schemaName} ${formatSource(record.source)}${suffix}`);
14011
+ }
14012
+ }
14013
+ });
14014
+ const showCommand = defineCommand({
14015
+ meta: {
14016
+ name: "show",
14017
+ description: "Show an extraction audit record"
14018
+ },
14019
+ args: { id: {
14020
+ type: "string",
14021
+ description: "Audit record id"
14022
+ } },
14023
+ async run({ args }) {
14024
+ const id = getIdArg(args);
14025
+ if (!id) {
14026
+ failCommand("Audit record id is required");
14027
+ return;
14028
+ }
14029
+ const config = createMigrationConfig(process.cwd());
14030
+ const record = await readExtractionAuditRecord(path.dirname(config.schemaPath), id);
14031
+ if (!record) {
14032
+ failCommand(`Extraction record not found: ${id}`);
14033
+ return;
14034
+ }
14035
+ consola.info(JSON.stringify(record, null, 2));
14036
+ }
14037
+ });
14038
+ const retryCommand = defineCommand({
14039
+ meta: {
14040
+ name: "retry",
14041
+ description: "Retry an extraction audit record"
14042
+ },
14043
+ args: {
14044
+ id: {
14045
+ type: "string",
14046
+ description: "Audit record id"
14047
+ },
14048
+ noInsert: {
14049
+ type: "boolean",
14050
+ description: "Extract and save JSON without inserting into SQLite",
14051
+ default: false
14052
+ }
14053
+ },
14054
+ async run({ args }) {
14055
+ intro(pc.inverse(" aiex extract retry "));
14056
+ const id = getIdArg(args);
14057
+ if (!id) {
14058
+ failCommand("Audit record id is required");
14059
+ return;
14060
+ }
14061
+ const config = createMigrationConfig(process.cwd());
14062
+ const aiexDir = path.dirname(config.schemaPath);
14063
+ const record = await readExtractionAuditRecord(aiexDir, id);
14064
+ if (!record) {
14065
+ failCommand(`Extraction record not found: ${id}`);
14066
+ return;
14067
+ }
14068
+ const aiConfig = await loadConfiguredAI(aiexDir);
14069
+ if (!aiConfig) return;
14070
+ const modelOverride = resolveModelOverride(aiConfig, record.modelName);
14071
+ if (modelOverride === null) return;
14072
+ try {
14073
+ const sourceInput = record.source.type === "file" ? await readExtractFileInput(record.source.filePath || "", aiConfig) : {
14074
+ text: record.source.text || "",
14075
+ filePath: void 0
14076
+ };
14077
+ if (!await runAuditedSingleExtraction({
14078
+ aiexDir,
14079
+ config,
14080
+ aiConfig,
14081
+ schemaName: record.schemaName,
14082
+ text: sourceInput.text,
14083
+ filePath: sourceInput.filePath,
14084
+ source: record.source,
14085
+ modelOverride,
14086
+ retryOf: record.id,
14087
+ insert: !args.noInsert
14088
+ })) {
14089
+ failCommand();
14090
+ return;
14091
+ }
14092
+ outro("Done!");
14093
+ } catch (error) {
14094
+ failCommand(error instanceof Error ? error.message : String(error));
14095
+ }
14096
+ }
14097
+ });
14098
+ const rmCommand = defineCommand({
14099
+ meta: {
14100
+ name: "rm",
14101
+ description: "Delete an extraction audit record and cached upload"
14102
+ },
14103
+ args: { id: {
14104
+ type: "string",
14105
+ description: "Audit record id"
14106
+ } },
14107
+ async run({ args }) {
14108
+ const id = getIdArg(args);
14109
+ if (!id) {
14110
+ failCommand("Audit record id is required");
14111
+ return;
14112
+ }
14113
+ const config = createMigrationConfig(process.cwd());
14114
+ if (!await deleteExtractionAuditRecord(path.dirname(config.schemaPath), id)) {
14115
+ failCommand(`Extraction record not found: ${id}`);
14116
+ return;
14117
+ }
14118
+ consola.success(`Deleted extraction record: ${id}`);
14119
+ }
14120
+ });
14121
+ const extractCommand = defineCommand({
14122
+ meta: {
14123
+ name: "extract",
14124
+ description: "Extract structured data from text, images, or PDFs"
14125
+ },
14126
+ subCommands: {
14127
+ history: historyCommand,
14128
+ show: showCommand,
14129
+ retry: retryCommand,
14130
+ rm: rmCommand
14131
+ },
14132
+ args: {
14133
+ schema: {
14134
+ type: "string",
14135
+ alias: "s",
14136
+ description: "Schema name (without .json extension)"
14137
+ },
14138
+ text: {
14139
+ type: "string",
14140
+ alias: "t",
14141
+ description: "Text content to extract"
14142
+ },
14143
+ file: {
14144
+ type: "string",
14145
+ alias: "f",
14146
+ description: "File path (image/PDF) to extract from"
14147
+ },
14148
+ model: {
14149
+ type: "string",
14150
+ alias: "m",
14151
+ description: "AI model to use for extraction (overrides auto-selection)"
14152
+ },
14153
+ dir: {
14154
+ type: "string",
14155
+ alias: "d",
14156
+ description: "Directory containing files to batch extract"
14157
+ },
14158
+ glob: {
14159
+ type: "string",
14160
+ alias: "g",
14161
+ description: "Glob pattern to filter files in batch mode (e.g. \"*.pdf\")"
14162
+ },
14163
+ noInsert: {
14164
+ type: "boolean",
14165
+ description: "Extract and save JSON without inserting into SQLite",
14166
+ default: false
14167
+ }
14168
+ },
14169
+ async run({ args }) {
14170
+ intro(pc.inverse(" aiex extract "));
14171
+ const config = createMigrationConfig(process.cwd());
14172
+ const aiexDir = path.dirname(config.schemaPath);
14173
+ if (args.dir && args.text) {
14174
+ failCommand("Cannot combine -t/--text with -d/--dir");
14175
+ return;
14176
+ }
14177
+ if (args.dir && args.file) {
14178
+ failCommand("Cannot combine -f/--file with -d/--dir");
14179
+ return;
14180
+ }
14181
+ const aiConfig = await loadConfiguredAI(aiexDir);
14182
+ if (!aiConfig) return;
14183
+ const modelOverride = resolveModelOverride(aiConfig, args.model);
14184
+ if (modelOverride === null) return;
14185
+ if (!args.schema && !args.text && !args.file && !args.dir) {
14186
+ if (await runInteractive(aiexDir, config, aiConfig, modelOverride)) outro("Done!");
14187
+ return;
14188
+ }
14189
+ if (args.dir) {
14190
+ if (!args.schema) {
14191
+ failCommand("Schema name (-s) is required in batch mode");
14192
+ return;
14193
+ }
14194
+ const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, { insert: !args.noInsert });
14195
+ if (!result.ok) {
14196
+ failCommand(result.error);
14197
+ return;
14198
+ }
14199
+ if (result.failCount > 0) process.exitCode = 1;
14200
+ if (result.failCount > 0) outro(`Completed with failures (${result.failCount} failed)`);
14201
+ else outro("Done!");
14202
+ return;
14203
+ }
14204
+ if (!args.schema) {
14205
+ failCommand("Please provide a schema name (-s) to extract from");
14206
+ return;
14207
+ }
14208
+ if (!args.text && !args.file) {
14209
+ failCommand("Please provide text (-t) or a file (-f) to extract from");
14210
+ return;
14211
+ }
14212
+ if (args.text && args.file) {
14213
+ failCommand("-t and -f cannot be used together");
14214
+ return;
14215
+ }
14216
+ let text$1 = "";
14217
+ let filePath;
14218
+ if (args.file) try {
14219
+ const input = await readExtractFileInput(args.file, aiConfig);
14220
+ text$1 = input.text;
14221
+ filePath = input.filePath;
14222
+ } catch (e) {
14223
+ failCommand(`Cannot read file: ${args.file} — ${e instanceof Error ? e.message : String(e)}`);
14224
+ return;
14225
+ }
14226
+ else if (args.text) text$1 = args.text;
14227
+ if (!await runAuditedSingleExtraction({
14228
+ aiexDir,
14229
+ config,
14230
+ aiConfig,
14231
+ schemaName: args.schema,
14232
+ text: text$1,
14233
+ filePath,
14234
+ source: filePath ? {
14235
+ type: "file",
14236
+ filePath: args.file,
14237
+ fileName: path.basename(args.file)
14238
+ } : {
14239
+ type: "text",
14240
+ text: text$1
14241
+ },
14242
+ modelOverride,
14243
+ insert: !args.noInsert
14244
+ })) {
14245
+ failCommand();
14246
+ return;
14247
+ }
14248
+ outro("Done!");
14249
+ }
14250
+ });
14251
+ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
14252
+ const schemas = await listSchemas(aiexDir);
14253
+ if (schemas.length === 0) {
14254
+ failCommand(`No schema files found in ${pc.cyan(".aiex/schema/")}. Run ${pc.cyan("aiex web")} to create and configure schemas first.`);
14255
+ return false;
14256
+ }
14257
+ const schemaName = await select({
14258
+ message: "Select a schema to extract data for:",
14259
+ options: schemas.map((s) => ({
14260
+ label: s,
14261
+ value: s
14262
+ }))
14263
+ });
14264
+ if (isCancel(schemaName)) {
14265
+ cancel("Cancelled");
14266
+ return false;
14267
+ }
14268
+ const inputSource = await select({
14269
+ message: "Choose input source:",
14270
+ options: [
14271
+ {
14272
+ label: "Text content",
14273
+ value: "text",
14274
+ hint: "Paste or type text directly"
14275
+ },
14276
+ {
14277
+ label: "Single file",
14278
+ value: "file",
14279
+ hint: "Extract from a file (txt, pdf, image)"
14280
+ },
14281
+ {
14282
+ label: "Batch directory",
14283
+ value: "dir",
14284
+ hint: "Extract all supported files in a directory"
14285
+ }
14286
+ ]
14287
+ });
14288
+ if (isCancel(inputSource)) {
14289
+ cancel("Cancelled");
14290
+ return false;
14291
+ }
14292
+ if (inputSource === "text") {
14293
+ const textContent = await text({
14294
+ message: "Enter text content to extract:",
14295
+ validate(value) {
14296
+ if (!value || value.trim().length === 0) return "Please enter some text";
14297
+ }
14298
+ });
14299
+ if (isCancel(textContent)) {
14300
+ cancel("Cancelled");
14301
+ return false;
14302
+ }
14303
+ return (await extractSingle(aiexDir, config, aiConfig, schemaName, textContent, void 0, modelOverride)).success;
14304
+ } else if (inputSource === "file") {
14305
+ const filePathStr = await text({
14306
+ message: "Enter file path:",
14307
+ validate(value) {
14308
+ if (!value || value.trim().length === 0) return "Please enter a file path";
14309
+ }
14310
+ });
14311
+ if (isCancel(filePathStr)) {
14312
+ cancel("Cancelled");
14313
+ return false;
14314
+ }
14315
+ const fp = filePathStr;
14316
+ try {
14317
+ const input = await readExtractFileInput(fp, aiConfig);
14318
+ return (await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride)).success;
14319
+ } catch (e) {
14320
+ consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
14321
+ return false;
14322
+ }
14323
+ } else if (inputSource === "dir") {
14324
+ const dirPath = await text({
14325
+ message: "Enter directory path:",
14326
+ validate(value) {
14327
+ if (!value || value.trim().length === 0) return "Please enter a directory path";
14328
+ }
14329
+ });
14330
+ if (isCancel(dirPath)) {
14331
+ cancel("Cancelled");
14332
+ return false;
14333
+ }
14334
+ const result = await runBatchExtraction(aiexDir, config, aiConfig, schemaName, dirPath, void 0, modelOverride);
14335
+ if (!result.ok) failCommand(result.error);
14336
+ return result.ok && result.failCount === 0;
14337
+ }
14338
+ return false;
14339
+ }
14340
+ function cancel(msg) {
14341
+ consola.info(msg);
14342
+ outro("Cancelled");
14343
+ process.exitCode = 0;
14344
+ }
14345
+
14346
+ //#endregion
14347
+ //#region src/core/schema-runner.ts
14348
+ const execFileAsync = promisify(execFile);
14349
+ async function listSchemaFiles(schemaDir) {
14350
+ try {
14351
+ return (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json")).map((f) => path.join(schemaDir, f)).sort();
14352
+ } catch {
14353
+ return [];
14354
+ }
14355
+ }
14356
+ async function generateSchemaFromFiles(schemaFiles, config) {
14357
+ const result = parseAllSchemas(await Promise.all(schemaFiles.map(async (filePath) => {
14358
+ return {
14359
+ filePath,
14360
+ content: await fs.readFile(filePath, "utf-8")
14361
+ };
14362
+ })));
14363
+ if (!result.success) return {
14364
+ success: false,
14365
+ error: result.error,
14366
+ warnings: [],
14367
+ schemaCount: schemaFiles.length,
14368
+ tables: 0,
14369
+ relations: 0
14370
+ };
14371
+ const { tables, relations, reverseRelations, warnings, drizzleCode } = result.data;
14372
+ await fs.mkdir(path.dirname(config.drizzleSchemaPath), { recursive: true });
14373
+ await fs.writeFile(config.drizzleSchemaPath, drizzleCode);
14374
+ return {
14375
+ success: true,
14376
+ warnings,
14377
+ schemaCount: schemaFiles.length,
14378
+ tables: tables.length,
14379
+ relations: relations.length + reverseRelations.length
14380
+ };
14381
+ }
14382
+ function parseMigrationOutput(stdout, stderr) {
14383
+ try {
14384
+ const jsonLine = stdout.trim().split("\n").find((l) => l.startsWith("{") && l.endsWith("}"));
14385
+ if (!jsonLine) return {
14386
+ success: false,
14387
+ error: "Migration helper did not return valid output"
14388
+ };
14389
+ const result = JSON.parse(jsonLine);
14390
+ if (!result.success) return {
14391
+ success: false,
14392
+ error: result.error || "Migration failed"
14393
+ };
14394
+ return result;
14395
+ } catch {
14396
+ return {
14397
+ success: false,
14398
+ error: stderr || stdout || "Migration helper failed"
14399
+ };
14400
+ }
14401
+ }
14402
+ async function runSchemaMigration(config, migrationName) {
13553
14403
  const helperPath = resolveHelperPath();
13554
14404
  const helperArgs = [
13555
14405
  resolveTsxPath(),
@@ -13560,40 +14410,62 @@ async function migrate(config, migrationName) {
13560
14410
  ];
13561
14411
  if (migrationName) helperArgs.push(migrationName);
13562
14412
  try {
13563
- const { stdout } = await execFileAsync$1(process.execPath, helperArgs, { cwd: process.cwd() });
13564
- const result = JSON.parse(stdout.trim());
13565
- if (!result.success) {
13566
- consola.error("Failed to generate migration");
13567
- consola.error(result.error);
13568
- return false;
13569
- }
13570
- if (result.changes === 0) {
13571
- consola.info(pc.gray("No changes detected"));
13572
- return true;
13573
- }
13574
- consola.success(pc.green("Migration files generated"));
13575
- consola.success(pc.green("Database migrated"));
13576
- return true;
14413
+ const { stdout, stderr } = await execFileAsync(process.execPath, helperArgs, { cwd: process.cwd() });
14414
+ return parseMigrationOutput(stdout, stderr);
13577
14415
  } catch (error) {
13578
- consola.error("Failed to generate migration");
13579
14416
  const execError = error;
13580
- if (execError.stderr) consola.error(execError.stderr);
13581
- else consola.error(execError.message || String(error));
13582
- return false;
14417
+ return {
14418
+ success: false,
14419
+ error: execError.stderr || execError.stdout || execError.message || String(error)
14420
+ };
13583
14421
  }
13584
14422
  }
14423
+ async function runSchemaSync(config, options = {}) {
14424
+ const schemaFiles = await listSchemaFiles(config.schemaPath);
14425
+ if (schemaFiles.length === 0) return {
14426
+ success: false,
14427
+ error: "No schema files found",
14428
+ warnings: [],
14429
+ schemaCount: 0,
14430
+ tables: 0,
14431
+ relations: 0
14432
+ };
14433
+ const generated = await generateSchemaFromFiles(schemaFiles, config);
14434
+ if (!generated.success) return {
14435
+ success: false,
14436
+ error: generated.error,
14437
+ warnings: generated.warnings,
14438
+ schemaCount: generated.schemaCount,
14439
+ tables: generated.tables,
14440
+ relations: generated.relations
14441
+ };
14442
+ if (options.generateOnly) return {
14443
+ success: true,
14444
+ warnings: generated.warnings,
14445
+ schemaCount: generated.schemaCount,
14446
+ tables: generated.tables,
14447
+ relations: generated.relations
14448
+ };
14449
+ const migration = await runSchemaMigration(config, options.migrationName);
14450
+ return {
14451
+ success: migration.success,
14452
+ error: migration.error,
14453
+ warnings: generated.warnings,
14454
+ schemaCount: generated.schemaCount,
14455
+ tables: generated.tables,
14456
+ relations: generated.relations,
14457
+ migration
14458
+ };
14459
+ }
14460
+
14461
+ //#endregion
14462
+ //#region src/commands/schema.ts
13585
14463
  const schemaCommand = defineCommand({
13586
14464
  meta: {
13587
14465
  name: "schema",
13588
14466
  description: "Sync JSON Schema to SQLite database"
13589
14467
  },
13590
14468
  args: {
13591
- init: {
13592
- type: "boolean",
13593
- alias: "i",
13594
- description: "Only initialize .aiex/ directory with example schema",
13595
- default: false
13596
- },
13597
14469
  generate: {
13598
14470
  type: "boolean",
13599
14471
  alias: "g",
@@ -13608,226 +14480,21 @@ const schemaCommand = defineCommand({
13608
14480
  async run({ args }) {
13609
14481
  intro(pc.inverse(" aiex schema "));
13610
14482
  const config = createMigrationConfig(process.cwd());
13611
- if (args.init) {
13612
- await fs.mkdir(config.schemaPath, { recursive: true });
13613
- await fs.mkdir(path.dirname(config.drizzleSchemaPath), { recursive: true });
13614
- await fs.mkdir(config.migrationsPath, { recursive: true });
13615
- const userSchema = {
13616
- $schema: $id,
13617
- title: "User",
13618
- type: "object",
13619
- table: {
13620
- name: "users",
13621
- timestamps: true,
13622
- softDelete: true
13623
- },
13624
- properties: {
13625
- id: {
13626
- type: "integer",
13627
- primary: true,
13628
- autoIncrement: true
13629
- },
13630
- email: {
13631
- type: "string",
13632
- format: "email",
13633
- unique: true
13634
- },
13635
- username: {
13636
- type: "string",
13637
- minLength: 3,
13638
- maxLength: 50,
13639
- unique: true
13640
- },
13641
- displayName: {
13642
- type: "string",
13643
- maxLength: 100
13644
- },
13645
- bio: {
13646
- type: "string",
13647
- maxLength: 500
13648
- },
13649
- avatarUrl: {
13650
- type: "string",
13651
- format: "uri"
13652
- },
13653
- role: {
13654
- type: "string",
13655
- default: "member"
13656
- },
13657
- isActive: {
13658
- type: "boolean",
13659
- default: true
13660
- },
13661
- lastLoginAt: {
13662
- type: "string",
13663
- format: "date-time"
13664
- },
13665
- profile: {
13666
- type: "object",
13667
- drizzle: { mode: "json" },
13668
- properties: {
13669
- website: { type: "string" },
13670
- location: { type: "string" },
13671
- socialLinks: {
13672
- type: "array",
13673
- items: {
13674
- type: "object",
13675
- properties: {
13676
- platform: { type: "string" },
13677
- url: { type: "string" }
13678
- }
13679
- }
13680
- }
13681
- }
13682
- },
13683
- preferences: {
13684
- type: "object",
13685
- nested: {
13686
- enabled: true,
13687
- relation: "has-one"
13688
- },
13689
- properties: {
13690
- theme: {
13691
- type: "string",
13692
- default: "light"
13693
- },
13694
- language: {
13695
- type: "string",
13696
- default: "en"
13697
- },
13698
- emailNotifications: {
13699
- type: "boolean",
13700
- default: true
13701
- },
13702
- pushNotifications: {
13703
- type: "boolean",
13704
- default: false
13705
- }
13706
- }
13707
- }
13708
- },
13709
- required: ["email", "username"]
13710
- };
13711
- const postSchema = {
13712
- $schema: $id,
13713
- title: "Post",
13714
- type: "object",
13715
- table: {
13716
- name: "posts",
13717
- timestamps: true,
13718
- softDelete: true
13719
- },
13720
- properties: {
13721
- id: {
13722
- type: "integer",
13723
- primary: true,
13724
- autoIncrement: true
13725
- },
13726
- title: {
13727
- type: "string",
13728
- minLength: 5,
13729
- maxLength: 200
13730
- },
13731
- slug: {
13732
- type: "string",
13733
- maxLength: 250,
13734
- unique: true
13735
- },
13736
- content: { type: "string" },
13737
- excerpt: {
13738
- type: "string",
13739
- maxLength: 300
13740
- },
13741
- authorId: { type: "integer" },
13742
- status: {
13743
- type: "string",
13744
- default: "draft"
13745
- },
13746
- viewCount: {
13747
- type: "integer",
13748
- default: 0,
13749
- minimum: 0
13750
- },
13751
- likeCount: {
13752
- type: "integer",
13753
- default: 0,
13754
- minimum: 0
13755
- },
13756
- publishedAt: {
13757
- type: "string",
13758
- format: "date-time"
13759
- },
13760
- tags: {
13761
- type: "array",
13762
- items: { type: "string" }
13763
- },
13764
- metadata: {
13765
- type: "object",
13766
- drizzle: { mode: "json" },
13767
- properties: {
13768
- featuredImage: { type: "string" },
13769
- readingTime: { type: "integer" },
13770
- seoTitle: { type: "string" },
13771
- seoDescription: { type: "string" }
13772
- }
13773
- },
13774
- comments: {
13775
- type: "array",
13776
- items: {
13777
- type: "object",
13778
- nested: {
13779
- enabled: true,
13780
- relation: "has-many"
13781
- },
13782
- properties: {
13783
- content: {
13784
- type: "string",
13785
- minLength: 1,
13786
- maxLength: 1e3
13787
- },
13788
- authorId: { type: "integer" },
13789
- status: {
13790
- type: "string",
13791
- default: "pending"
13792
- },
13793
- parentId: { type: "integer" }
13794
- }
13795
- }
13796
- }
13797
- },
13798
- required: [
13799
- "title",
13800
- "slug",
13801
- "authorId"
13802
- ]
13803
- };
13804
- const userStatus = await writeJsonIfAbsent(path.join(config.schemaPath, "user.json"), userSchema);
13805
- const postStatus = await writeJsonIfAbsent(path.join(config.schemaPath, "post.json"), postSchema);
13806
- consola.success(`Initialized ${pc.cyan(".aiex/")} with example schemas`);
13807
- if (userStatus === "skipped") consola.warn(`${pc.cyan(".aiex/schema/user.json")} already exists, skipped`);
13808
- if (postStatus === "skipped") consola.warn(`${pc.cyan(".aiex/schema/post.json")} already exists, skipped`);
13809
- consola.info("Example includes: User (with preferences has-one), Post (with comments has-many)");
13810
- outro("Run: aiex schema");
13811
- return;
13812
- }
13813
- let schemaFiles;
13814
- try {
13815
- schemaFiles = await fs.readdir(config.schemaPath);
13816
- schemaFiles = schemaFiles.filter((f) => f.endsWith(".json")).map((f) => path.join(config.schemaPath, f));
13817
- } catch {
13818
- schemaFiles = [];
13819
- }
14483
+ const schemaFiles = await listSchemaFiles(config.schemaPath);
13820
14484
  if (schemaFiles.length === 0) {
13821
- consola.info("Use --init to initialize with an example schema");
13822
- fail(`No schema files found in ${pc.cyan(".aiex/schema/")}`);
14485
+ consola.info(`Run ${pc.cyan("aiex web")} to create and configure schemas in the Web UI`);
14486
+ failCommand(`No schema files found in ${pc.cyan(".aiex/schema/")}`);
13823
14487
  return;
13824
14488
  }
13825
14489
  const s1 = spinner();
13826
14490
  s1.start("Generating Drizzle schema...");
13827
- const genOk = await generateFromFiles(schemaFiles, config);
13828
- s1.stop(genOk ? "Schema generated" : "Generation failed");
13829
- if (!genOk) {
13830
- fail();
14491
+ const generated = await generateSchemaFromFiles(schemaFiles, config);
14492
+ for (const warning of generated.warnings) consola.warn(warning);
14493
+ if (generated.success) consola.success(`Generated ${pc.cyan(".aiex/drizzle/schema.ts")} from ${generated.schemaCount} schema file(s)`);
14494
+ else if (generated.error) consola.error(generated.error);
14495
+ s1.stop(generated.success ? "Schema generated" : "Generation failed");
14496
+ if (!generated.success) {
14497
+ failCommand();
13831
14498
  return;
13832
14499
  }
13833
14500
  if (args.generate) {
@@ -13836,18 +14503,210 @@ const schemaCommand = defineCommand({
13836
14503
  }
13837
14504
  const s2 = spinner();
13838
14505
  s2.start("Running migrations...");
13839
- const migOk = await migrate(config, args.name);
13840
- s2.stop(migOk ? "Migrations applied" : "Migration failed");
13841
- if (!migOk) {
13842
- fail();
14506
+ const migration = await runSchemaMigration(config, args.name);
14507
+ if (!migration.success) {
14508
+ consola.error("Failed to generate migration");
14509
+ consola.error(migration.error || "Migration failed");
14510
+ } else if (migration.changes === 0) consola.info(pc.gray("No changes detected"));
14511
+ else {
14512
+ consola.success(pc.green("Migration files generated"));
14513
+ consola.success(pc.green("Database migrated"));
14514
+ }
14515
+ s2.stop(migration.success ? "Migrations applied" : "Migration failed");
14516
+ if (!migration.success) {
14517
+ failCommand();
13843
14518
  return;
13844
14519
  }
13845
14520
  outro("Done!");
13846
14521
  }
13847
14522
  });
13848
14523
 
14524
+ //#endregion
14525
+ //#region src/core/notion-sink.ts
14526
+ const RICH_TEXT_LIMIT = 2e3;
14527
+ const UUID_RE = /^[0-9a-f]{32}$/i;
14528
+ const HYPHENATED_UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
14529
+ const UUID_IN_TEXT_RE = /[0-9a-f]{32}/i;
14530
+ function truncateText(value) {
14531
+ return value.length > RICH_TEXT_LIMIT ? value.slice(0, RICH_TEXT_LIMIT) : value;
14532
+ }
14533
+ function stringifyValue(value) {
14534
+ if (value === null || value === void 0) return "";
14535
+ if (typeof value === "string") return value;
14536
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
14537
+ return JSON.stringify(value);
14538
+ }
14539
+ function asNumber(value) {
14540
+ if (typeof value === "number" && Number.isFinite(value)) return value;
14541
+ if (typeof value === "string" && value.trim()) {
14542
+ const parsed = Number(value);
14543
+ return Number.isFinite(parsed) ? parsed : null;
14544
+ }
14545
+ return null;
14546
+ }
14547
+ function asBoolean(value) {
14548
+ if (typeof value === "boolean") return value;
14549
+ if (typeof value === "number") return value !== 0;
14550
+ if (typeof value === "string") {
14551
+ const normalized = value.trim().toLowerCase();
14552
+ return [
14553
+ "true",
14554
+ "yes",
14555
+ "1",
14556
+ "y"
14557
+ ].includes(normalized);
14558
+ }
14559
+ return !!value;
14560
+ }
14561
+ function asDateStart(value) {
14562
+ if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString();
14563
+ if (typeof value === "number" && Number.isFinite(value)) {
14564
+ const date = new Date(value);
14565
+ return Number.isNaN(date.getTime()) ? null : date.toISOString();
14566
+ }
14567
+ if (typeof value === "string" && value.trim()) {
14568
+ const ms = Date.parse(value);
14569
+ if (Number.isNaN(ms)) return null;
14570
+ return new Date(ms).toISOString();
14571
+ }
14572
+ return null;
14573
+ }
14574
+ function asStringArray(value) {
14575
+ if (Array.isArray(value)) return value.map((item) => stringifyValue(item).trim()).filter(Boolean);
14576
+ const text$1 = stringifyValue(value).trim();
14577
+ return text$1 ? [text$1] : [];
14578
+ }
14579
+ function buildPropertyValue(type, value) {
14580
+ const text$1 = truncateText(stringifyValue(value));
14581
+ switch (type) {
14582
+ case "title": return { title: text$1 ? [{ text: { content: text$1 } }] : [] };
14583
+ case "rich_text": return { rich_text: text$1 ? [{ text: { content: text$1 } }] : [] };
14584
+ case "number": return { number: asNumber(value) };
14585
+ case "checkbox": return { checkbox: asBoolean(value) };
14586
+ case "date": {
14587
+ const start = asDateStart(value);
14588
+ return { date: start ? { start } : null };
14589
+ }
14590
+ case "select": {
14591
+ const name$1 = stringifyValue(value).trim();
14592
+ return { select: name$1 ? { name: name$1 } : null };
14593
+ }
14594
+ case "multi_select": return { multi_select: asStringArray(value).map((name$1) => ({ name: name$1 })) };
14595
+ case "url": return { url: text$1 || null };
14596
+ case "email": return { email: text$1 || null };
14597
+ case "phone_number": return { phone_number: text$1 || null };
14598
+ default: return null;
14599
+ }
14600
+ }
14601
+ function findTitleProperty(properties, preferred) {
14602
+ if (preferred && properties[preferred]?.type === "title") return preferred;
14603
+ return Object.entries(properties).find(([, property]) => property?.type === "title")?.[0] ?? null;
14604
+ }
14605
+ function hyphenateDatabaseId(value) {
14606
+ const id = value.replace(/-/g, "");
14607
+ if (!UUID_RE.test(id)) return value;
14608
+ return `${id.slice(0, 8)}-${id.slice(8, 12)}-${id.slice(12, 16)}-${id.slice(16, 20)}-${id.slice(20)}`;
14609
+ }
14610
+ function parseNotionDatabaseId(value) {
14611
+ const input = value.trim();
14612
+ if (!input) return "";
14613
+ if (HYPHENATED_UUID_RE.test(input)) return input;
14614
+ if (UUID_RE.test(input)) return hyphenateDatabaseId(input);
14615
+ const match = input.replace(/-/g, "").match(UUID_IN_TEXT_RE);
14616
+ return match ? hyphenateDatabaseId(match[0]) : input;
14617
+ }
14618
+ function normalizeFieldName(value) {
14619
+ return value.normalize("NFKC").toLowerCase().replace(/[^\p{Letter}\p{Number}]+/gu, "");
14620
+ }
14621
+ function buildMatchKeys(field) {
14622
+ return [
14623
+ field.name,
14624
+ field.title,
14625
+ field.description
14626
+ ].filter((value) => !!value?.trim()).map(normalizeFieldName).filter(Boolean);
14627
+ }
14628
+ function suggestFieldMap(schemaFields, databaseProperties) {
14629
+ const propertyByKey = /* @__PURE__ */ new Map();
14630
+ for (const propertyName of Object.keys(databaseProperties)) propertyByKey.set(normalizeFieldName(propertyName), propertyName);
14631
+ const fieldMap = {};
14632
+ for (const field of schemaFields) for (const key of buildMatchKeys(field)) {
14633
+ const propertyName = propertyByKey.get(key);
14634
+ if (propertyName) {
14635
+ fieldMap[field.name] = propertyName;
14636
+ break;
14637
+ }
14638
+ }
14639
+ return fieldMap;
14640
+ }
14641
+ async function inspectNotionDatabase(input) {
14642
+ if (!input.token.trim()) throw new Error("Notion integration token is required.");
14643
+ const databaseId = parseNotionDatabaseId(input.databaseId);
14644
+ if (!databaseId) throw new Error("Notion database URL or ID is required.");
14645
+ const databaseProperties = (await new Client({ auth: input.token }).databases.retrieve({ database_id: databaseId })).properties;
14646
+ return {
14647
+ databaseId,
14648
+ titleProperty: findTitleProperty(databaseProperties) ?? void 0,
14649
+ properties: Object.entries(databaseProperties).map(([name$1, property]) => ({
14650
+ name: name$1,
14651
+ type: property?.type ?? "unknown"
14652
+ })).sort((a, b) => a.name.localeCompare(b.name)),
14653
+ suggestedFieldMap: suggestFieldMap(input.schemaFields, databaseProperties)
14654
+ };
14655
+ }
14656
+ function validateNotionConfig(config) {
14657
+ if (!config?.enabled) return "Notion export is not enabled. Configure Notion settings first.";
14658
+ if (!config.token.trim()) return "Notion integration token is required.";
14659
+ return null;
14660
+ }
14661
+ async function writeNotionPage(config, schemaName, data) {
14662
+ const configError = validateNotionConfig(config);
14663
+ if (configError) throw new Error(configError);
14664
+ const notionConfig = config;
14665
+ const schemaConfig = notionConfig.schemas[schemaName];
14666
+ if (!schemaConfig) throw new Error(`Notion database is not configured for schema "${schemaName}".`);
14667
+ if (!schemaConfig.databaseId.trim()) throw new Error(`Notion database ID is required for schema "${schemaName}".`);
14668
+ const notion = new Client({ auth: notionConfig.token });
14669
+ const databaseProperties = (await notion.databases.retrieve({ database_id: schemaConfig.databaseId })).properties;
14670
+ const fieldMap = schemaConfig.fieldMap ?? {};
14671
+ const properties = {};
14672
+ for (const [sourceField, sourceValue] of Object.entries(data)) {
14673
+ const notionPropertyName = fieldMap[sourceField] ?? sourceField;
14674
+ const notionProperty = databaseProperties[notionPropertyName];
14675
+ if (!notionProperty) continue;
14676
+ const propertyValue = buildPropertyValue(notionProperty.type, sourceValue);
14677
+ if (propertyValue) properties[notionPropertyName] = propertyValue;
14678
+ }
14679
+ const titleProperty = findTitleProperty(databaseProperties, schemaConfig.titleProperty);
14680
+ if (titleProperty && !properties[titleProperty]) properties[titleProperty] = buildPropertyValue("title", Object.entries(data).find(([, value]) => typeof value === "string" && value.trim())?.[1] ?? schemaName);
14681
+ if (Object.keys(properties).length === 0) throw new Error("No extracted fields matched Notion database properties.");
14682
+ return {
14683
+ pageId: (await notion.pages.create({
14684
+ parent: { database_id: schemaConfig.databaseId },
14685
+ properties
14686
+ })).id,
14687
+ databaseId: schemaConfig.databaseId
14688
+ };
14689
+ }
14690
+
13849
14691
  //#endregion
13850
14692
  //#region src/server/routes/ai.ts
14693
+ const JSON_EXT_RE = /\.json$/i;
14694
+ function extractSchemaFields(schema) {
14695
+ if (!schema?.properties || typeof schema.properties !== "object") return [];
14696
+ return Object.entries(schema.properties).filter(([, property]) => {
14697
+ if (property?.nested?.enabled) return false;
14698
+ if (property?.type === "array" && property?.items?.nested?.enabled) return false;
14699
+ return true;
14700
+ }).map(([name$1, property]) => ({
14701
+ name: name$1,
14702
+ title: typeof property?.title === "string" ? property.title : void 0,
14703
+ description: typeof property?.description === "string" ? property.description : void 0
14704
+ }));
14705
+ }
14706
+ async function loadSchemaFields(config, schemaName) {
14707
+ const safeName = path.basename(schemaName).replace(JSON_EXT_RE, "");
14708
+ return extractSchemaFields(await readFile(path.join(config.schemaPath, `${safeName}.json`)));
14709
+ }
13851
14710
  function aiRoutes(config) {
13852
14711
  const app = new Hono();
13853
14712
  const aiexDir = path.dirname(config.schemaPath);
@@ -13863,13 +14722,7 @@ function aiRoutes(config) {
13863
14722
  }
13864
14723
  });
13865
14724
  }
13866
- return c.json({
13867
- ...aiConfig,
13868
- provider: {
13869
- ...aiConfig.provider,
13870
- apiKey: maskApiKey(aiConfig.provider.apiKey)
13871
- }
13872
- });
14725
+ return c.json(aiConfig);
13873
14726
  });
13874
14727
  app.post("/ai/registry-lookup", async (c) => {
13875
14728
  try {
@@ -13880,6 +14733,32 @@ function aiRoutes(config) {
13880
14733
  return c.json({});
13881
14734
  }
13882
14735
  });
14736
+ app.post("/ai/notion/inspect", async (c) => {
14737
+ try {
14738
+ const body = await c.req.json();
14739
+ const token = typeof body.token === "string" ? body.token : "";
14740
+ const databaseId = typeof body.databaseId === "string" ? body.databaseId : "";
14741
+ const schemaName = typeof body.schemaName === "string" ? body.schemaName : "";
14742
+ if (!schemaName) return c.json({
14743
+ success: false,
14744
+ error: "Schema is required"
14745
+ }, 400);
14746
+ const result = await inspectNotionDatabase({
14747
+ token,
14748
+ databaseId,
14749
+ schemaFields: await loadSchemaFields(config, schemaName)
14750
+ });
14751
+ return c.json({
14752
+ success: true,
14753
+ ...result
14754
+ });
14755
+ } catch (error) {
14756
+ return c.json({
14757
+ success: false,
14758
+ error: getErrorMessage(error)
14759
+ }, 400);
14760
+ }
14761
+ });
13883
14762
  app.put("/ai/config", async (c) => {
13884
14763
  try {
13885
14764
  const body = await c.req.json();
@@ -13897,10 +14776,18 @@ function aiRoutes(config) {
13897
14776
  success: false,
13898
14777
  error: "At least one model must be configured"
13899
14778
  }, 400);
13900
- if (body.provider?.apiKey?.startsWith("sk-***")) {
13901
- const existing = await readAIConfig(aiexDir);
13902
- if (existing) body.provider.apiKey = existing.provider.apiKey;
13903
- else body.provider.apiKey = "";
14779
+ if (body.notion?.enabled) {
14780
+ if (!body.notion.token?.trim()) return c.json({
14781
+ success: false,
14782
+ error: "Notion token is required when Notion export is enabled"
14783
+ }, 400);
14784
+ for (const [schemaName, schemaConfig] of Object.entries(body.notion.schemas ?? {})) {
14785
+ if (typeof schemaConfig.databaseId === "string") schemaConfig.databaseId = parseNotionDatabaseId(schemaConfig.databaseId);
14786
+ if (!schemaConfig.databaseId?.trim()) return c.json({
14787
+ success: false,
14788
+ error: `Notion database ID is required for schema "${schemaName}"`
14789
+ }, 400);
14790
+ }
13904
14791
  }
13905
14792
  await writeAIConfig(aiexDir, AIConfigSchema.parse(body));
13906
14793
  return c.json({ success: true });
@@ -13917,10 +14804,25 @@ function aiRoutes(config) {
13917
14804
  //#endregion
13918
14805
  //#region src/server/routes/data.ts
13919
14806
  const FILE_REGEX = /\.json$/;
13920
- const EXTRACTION_FILE_RE = /^[\w.-]+\.json$/;
13921
- const TABLE_NAME_RE$1 = /^[a-z][a-z0-9_]*$/;
13922
14807
  const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
13923
14808
  const TIMESTAMP_TZ = /(\d{3})Z/;
14809
+ const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
14810
+ const extractionFileParamSchema = z.object({ name: z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes("..")) });
14811
+ const tableQuerySchema = z.object({
14812
+ page: z.coerce.number().int().min(1).catch(1),
14813
+ pageSize: z.coerce.number().int().min(1).max(500).catch(50),
14814
+ search: z.string().catch(""),
14815
+ sortField: z.string().optional(),
14816
+ sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc"))
14817
+ });
14818
+ function invalidParamResponse$1(message) {
14819
+ return (result, c) => {
14820
+ if (!result.success) return c.json({ error: message }, 400);
14821
+ };
14822
+ }
14823
+ function createReadonlyQueryDb(databasePath) {
14824
+ return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
14825
+ }
13924
14826
  function dataRoutes(config) {
13925
14827
  const app = new Hono();
13926
14828
  const aiexDir = path.dirname(config.schemaPath);
@@ -13968,15 +14870,19 @@ function dataRoutes(config) {
13968
14870
  let db = null;
13969
14871
  let dbTables = [];
13970
14872
  try {
13971
- db = new Database(config.databasePath, { readonly: true });
13972
- dbTables = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%' AND name NOT LIKE '_%' ORDER BY name`).all().map((r) => r.name);
14873
+ db = createReadonlyQueryDb(config.databasePath);
14874
+ dbTables = (await sql`
14875
+ select name
14876
+ from sqlite_master
14877
+ where type = 'table' and name not like 'sqlite_%' and name not like '_%'
14878
+ order by name
14879
+ `.execute(db)).rows.map((row) => row.name);
13973
14880
  } catch {} finally {
13974
- db?.close();
14881
+ await db?.destroy();
13975
14882
  }
13976
14883
  const tables = [];
13977
14884
  for (const file of schemaFiles) try {
13978
- const content = await fs.readFile(path.join(schemaDir, file), "utf-8");
13979
- const schema = JSON.parse(content);
14885
+ const schema = await readFile(path.join(schemaDir, file));
13980
14886
  const tableName = schema.table?.name;
13981
14887
  if (!tableName) continue;
13982
14888
  tables.push({
@@ -13992,46 +14898,64 @@ function dataRoutes(config) {
13992
14898
  return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
13993
14899
  }
13994
14900
  });
13995
- app.get("/data/tables/:name", async (c) => {
13996
- const tableName = c.req.param("name");
13997
- if (!TABLE_NAME_RE$1.test(tableName)) return c.json({ error: "Invalid table name" }, 400);
13998
- const sortField = c.req.query("sortField");
13999
- const sortOrder = c.req.query("sortOrder") || "asc";
14901
+ app.get("/data/tables/:name", zValidator("param", tableParamSchema, invalidParamResponse$1("Invalid table name")), zValidator("query", tableQuerySchema), async (c) => {
14902
+ const { name: tableName } = c.req.valid("param");
14903
+ const { page, pageSize, search, sortField, sortOrder } = c.req.valid("query");
14000
14904
  let db;
14001
14905
  try {
14002
- db = new Database(config.databasePath, { readonly: true });
14906
+ db = createReadonlyQueryDb(config.databasePath);
14003
14907
  } catch {
14004
14908
  return c.json({ error: "Database not found. Run `aiex schema` first." }, 400);
14005
14909
  }
14006
14910
  try {
14007
- if (!db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name=?`).get(tableName)) {
14008
- db.close();
14009
- return c.json({ error: `Table "${tableName}" not found in database` }, 404);
14010
- }
14011
- const columns = db.prepare(`PRAGMA table_info(\`${tableName}\`)`).all().map((col) => ({
14911
+ if ((await sql`
14912
+ select name
14913
+ from sqlite_master
14914
+ where type = 'table' and name = ${tableName}
14915
+ `.execute(db)).rows.length === 0) return c.json({ error: `Table "${tableName}" not found in database` }, 404);
14916
+ const columns = (await sql`
14917
+ pragma table_info(${sql.table(tableName)})
14918
+ `.execute(db)).rows.map((col) => ({
14012
14919
  name: col.name,
14013
14920
  type: col.type,
14014
14921
  notNull: !!col.notnull,
14015
14922
  pk: !!col.pk
14016
14923
  }));
14017
- let orderClause = "";
14018
- if (sortField && columns.some((c$1) => c$1.name === sortField)) orderClause = ` ORDER BY \`${sortField}\` ${sortOrder.toLowerCase() === "desc" ? "DESC" : "ASC"}`;
14019
- const total = db.prepare(`SELECT COUNT(*) as count FROM \`${tableName}\``).get().count;
14020
- const rows = db.prepare(`SELECT * FROM \`${tableName}\`${orderClause} LIMIT 200`).all();
14924
+ const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
14925
+ const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
14926
+ const sortColumn = columns.find((col) => col.name === sortField);
14927
+ const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
14928
+ const total = (await sql`
14929
+ select count(*) as count
14930
+ from ${sql.table(tableName)}
14931
+ ${searchCondition}
14932
+ `.execute(db)).rows[0]?.count ?? 0;
14933
+ const offset = (page - 1) * pageSize;
14934
+ const totalPages = Math.max(1, Math.ceil(total / pageSize));
14935
+ const result = await sql`
14936
+ select *
14937
+ from ${sql.table(tableName)}
14938
+ ${searchCondition}
14939
+ ${orderBy}
14940
+ limit ${pageSize}
14941
+ offset ${offset}
14942
+ `.execute(db);
14021
14943
  return c.json({
14022
14944
  columns,
14023
- rows,
14024
- total
14945
+ rows: result.rows,
14946
+ total,
14947
+ page,
14948
+ pageSize,
14949
+ totalPages
14025
14950
  });
14026
14951
  } catch (error) {
14027
14952
  return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
14028
14953
  } finally {
14029
- db.close();
14954
+ await db.destroy();
14030
14955
  }
14031
14956
  });
14032
- app.get("/data/:name", async (c) => {
14033
- const name$1 = c.req.param("name");
14034
- if (name$1 !== path.basename(name$1) || !EXTRACTION_FILE_RE.test(name$1) || name$1.includes("..")) return c.json({ error: "Invalid extraction file name" }, 400);
14957
+ app.get("/data/:name", zValidator("param", extractionFileParamSchema, invalidParamResponse$1("Invalid extraction file name")), async (c) => {
14958
+ const { name: name$1 } = c.req.valid("param");
14035
14959
  const filePath = path.join(extractedDir, name$1);
14036
14960
  try {
14037
14961
  const content = await fs.readFile(filePath, "utf-8");
@@ -14047,14 +14971,256 @@ function dataRoutes(config) {
14047
14971
  return app;
14048
14972
  }
14049
14973
 
14974
+ //#endregion
14975
+ //#region src/server/routes/extract.ts
14976
+ function getFormString(value) {
14977
+ if (Array.isArray(value)) return getFormString(value[0]);
14978
+ return typeof value === "string" ? value.trim() : "";
14979
+ }
14980
+ function getFormFile(value) {
14981
+ if (Array.isArray(value)) return getFormFile(value[0]);
14982
+ return value instanceof File && value.size > 0 ? value : null;
14983
+ }
14984
+ function safeUploadName(name$1) {
14985
+ return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
14986
+ }
14987
+ async function saveUploadToFile(file, uploadsDir, id) {
14988
+ await fs.mkdir(uploadsDir, { recursive: true });
14989
+ const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
14990
+ const buffer = Buffer.from(await file.arrayBuffer());
14991
+ await fs.writeFile(filePath, buffer);
14992
+ return filePath;
14993
+ }
14994
+ async function executeAuditedExtraction(input) {
14995
+ const aiConfig = await readAIConfig(input.aiexDir);
14996
+ if (!aiConfig) {
14997
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
14998
+ status: "failed",
14999
+ error: "AI configuration not found. Configure AI settings first."
15000
+ });
15001
+ return new Response(JSON.stringify({
15002
+ success: false,
15003
+ error: record$1.error,
15004
+ auditId: record$1.id
15005
+ }), {
15006
+ status: 400,
15007
+ headers: { "content-type": "application/json" }
15008
+ });
15009
+ }
15010
+ if (!aiConfig.provider.apiKey) {
15011
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15012
+ status: "failed",
15013
+ error: "API Key not configured. Configure AI settings first."
15014
+ });
15015
+ return new Response(JSON.stringify({
15016
+ success: false,
15017
+ error: record$1.error,
15018
+ auditId: record$1.id
15019
+ }), {
15020
+ status: 400,
15021
+ headers: { "content-type": "application/json" }
15022
+ });
15023
+ }
15024
+ if (!aiConfig.provider.models?.length) {
15025
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15026
+ status: "failed",
15027
+ error: "No models configured. Add at least one model in AI Settings."
15028
+ });
15029
+ return new Response(JSON.stringify({
15030
+ success: false,
15031
+ error: record$1.error,
15032
+ auditId: record$1.id
15033
+ }), {
15034
+ status: 400,
15035
+ headers: { "content-type": "application/json" }
15036
+ });
15037
+ }
15038
+ const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
15039
+ if (input.modelName && !modelOverride) {
15040
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15041
+ status: "failed",
15042
+ error: `Model "${input.modelName}" not found in AI settings`
15043
+ });
15044
+ return new Response(JSON.stringify({
15045
+ success: false,
15046
+ error: record$1.error,
15047
+ auditId: record$1.id
15048
+ }), {
15049
+ status: 400,
15050
+ headers: { "content-type": "application/json" }
15051
+ });
15052
+ }
15053
+ let inputText = input.text;
15054
+ let inputFilePath = input.filePath;
15055
+ if (input.filePath) {
15056
+ const source = await readExtractFileInput(input.filePath, aiConfig);
15057
+ inputText = source.text;
15058
+ inputFilePath = source.filePath;
15059
+ }
15060
+ const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
15061
+ if (!result.success) {
15062
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15063
+ status: "failed",
15064
+ error: result.error || "Extraction failed"
15065
+ });
15066
+ return new Response(JSON.stringify({
15067
+ success: false,
15068
+ error: record$1.error,
15069
+ auditId: record$1.id
15070
+ }), {
15071
+ status: 500,
15072
+ headers: { "content-type": "application/json" }
15073
+ });
15074
+ }
15075
+ const notionPages = [];
15076
+ if (input.syncNotion) try {
15077
+ if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
15078
+ notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
15079
+ } catch (error) {
15080
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15081
+ status: "failed",
15082
+ outputPath: result.outputPath,
15083
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15084
+ tablesInserted: result.tablesInserted,
15085
+ tokensUsed: result.tokensUsed,
15086
+ error: error instanceof Error ? error.message : String(error)
15087
+ });
15088
+ return new Response(JSON.stringify({
15089
+ success: false,
15090
+ error: record$1.error,
15091
+ auditId: record$1.id
15092
+ }), {
15093
+ status: 500,
15094
+ headers: { "content-type": "application/json" }
15095
+ });
15096
+ }
15097
+ const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15098
+ status: "succeeded",
15099
+ outputPath: result.outputPath,
15100
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15101
+ tablesInserted: result.tablesInserted,
15102
+ notionPages: notionPages.length > 0 ? notionPages : void 0,
15103
+ tokensUsed: result.tokensUsed
15104
+ });
15105
+ return new Response(JSON.stringify({
15106
+ success: true,
15107
+ outputPath: record.outputPath,
15108
+ outputName: record.outputName,
15109
+ tablesInserted: record.tablesInserted,
15110
+ notionPages: record.notionPages,
15111
+ tokensUsed: record.tokensUsed,
15112
+ auditId: record.id
15113
+ }), {
15114
+ status: 200,
15115
+ headers: { "content-type": "application/json" }
15116
+ });
15117
+ }
15118
+ function extractRoutes(config) {
15119
+ const app = new Hono();
15120
+ const aiexDir = path.dirname(config.schemaPath);
15121
+ const uploadsDir = path.join(aiexDir, "uploads");
15122
+ app.get("/extract/records", async (c) => {
15123
+ return c.json(await listExtractionAuditRecords(aiexDir));
15124
+ });
15125
+ app.post("/extract", async (c) => {
15126
+ try {
15127
+ const body = await c.req.parseBody();
15128
+ const schemaName = getFormString(body.schema);
15129
+ const text$1 = getFormString(body.text);
15130
+ const modelName = getFormString(body.model);
15131
+ const syncNotion = getFormString(body.notion) === "true";
15132
+ const file = getFormFile(body.file);
15133
+ if (!schemaName) return c.json({
15134
+ success: false,
15135
+ error: "Schema is required"
15136
+ }, 400);
15137
+ if (!text$1 && !file) return c.json({
15138
+ success: false,
15139
+ error: "Provide text or upload a file to extract"
15140
+ }, 400);
15141
+ if (text$1 && file) return c.json({
15142
+ success: false,
15143
+ error: "Text and file input cannot be used together"
15144
+ }, 400);
15145
+ const audit = await createExtractionAuditRecord(aiexDir, {
15146
+ schemaName,
15147
+ modelName,
15148
+ source: file ? {
15149
+ type: "file",
15150
+ fileName: safeUploadName(file.name)
15151
+ } : {
15152
+ type: "text",
15153
+ text: text$1
15154
+ }
15155
+ });
15156
+ let filePath;
15157
+ if (file) {
15158
+ filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15159
+ await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15160
+ type: "file",
15161
+ filePath,
15162
+ fileName: safeUploadName(file.name)
15163
+ } });
15164
+ }
15165
+ return executeAuditedExtraction({
15166
+ aiexDir,
15167
+ config,
15168
+ auditId: audit.id,
15169
+ schemaName,
15170
+ text: text$1,
15171
+ filePath,
15172
+ modelName,
15173
+ syncNotion
15174
+ });
15175
+ } catch (error) {
15176
+ return c.json({
15177
+ success: false,
15178
+ error: error instanceof Error ? error.message : String(error)
15179
+ }, 500);
15180
+ }
15181
+ });
15182
+ app.post("/extract/records/:id/retry", async (c) => {
15183
+ const original = await readExtractionAuditRecord(aiexDir, c.req.param("id"));
15184
+ if (!original) return c.json({
15185
+ success: false,
15186
+ error: "Extraction record not found"
15187
+ }, 404);
15188
+ return executeAuditedExtraction({
15189
+ aiexDir,
15190
+ config,
15191
+ auditId: (await createExtractionAuditRecord(aiexDir, {
15192
+ schemaName: original.schemaName,
15193
+ modelName: original.modelName,
15194
+ source: original.source,
15195
+ retryOf: original.id
15196
+ })).id,
15197
+ schemaName: original.schemaName,
15198
+ text: original.source.type === "text" ? original.source.text ?? "" : "",
15199
+ filePath: original.source.type === "file" ? original.source.filePath : void 0,
15200
+ modelName: original.modelName
15201
+ });
15202
+ });
15203
+ app.delete("/extract/records/:id", async (c) => {
15204
+ const id = c.req.param("id");
15205
+ if (!await readExtractionAuditRecord(aiexDir, id)) return c.json({
15206
+ success: false,
15207
+ error: "Extraction record not found"
15208
+ }, 404);
15209
+ await deleteExtractionAuditRecord(aiexDir, id);
15210
+ return c.json({ success: true });
15211
+ });
15212
+ return app;
15213
+ }
15214
+
14050
15215
  //#endregion
14051
15216
  //#region src/server/routes/schema.ts
14052
- const execFileAsync = promisify(execFile);
14053
- const SCHEMA_FILE_RE = /^[\w.-]+\.json$/;
14054
- const TABLE_NAME_RE = /^[a-z][a-z0-9_]*$/;
14055
- function resolveSchemaFile(schemaDir, name$1) {
14056
- if (name$1 !== path.basename(name$1) || !SCHEMA_FILE_RE.test(name$1) || name$1.includes("..")) return null;
14057
- return path.join(schemaDir, name$1);
15217
+ const schemaFileNameSchema = z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes(".."));
15218
+ const schemaFileParamSchema = z.object({ name: schemaFileNameSchema });
15219
+ const tableNameParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
15220
+ function invalidParamResponse(message) {
15221
+ return (result, c) => {
15222
+ if (!result.success) return c.json({ error: message }, 400);
15223
+ };
14058
15224
  }
14059
15225
  function schemaRoutes(config) {
14060
15226
  const app = new Hono();
@@ -14067,23 +15233,25 @@ function schemaRoutes(config) {
14067
15233
  const jsonFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
14068
15234
  return c.json(jsonFiles);
14069
15235
  });
14070
- app.get("/schema/:name", async (c) => {
14071
- const filePath = resolveSchemaFile(schemaDir, c.req.param("name"));
14072
- if (!filePath) return c.json({ error: "Invalid schema file name" }, 400);
15236
+ app.get("/schema/:name", zValidator("param", schemaFileParamSchema, invalidParamResponse("Invalid schema file name")), async (c) => {
15237
+ const { name: name$1 } = c.req.valid("param");
15238
+ const filePath = path.join(schemaDir, name$1);
14073
15239
  try {
14074
- const content = await fs.readFile(filePath, "utf-8");
14075
- return c.json(JSON.parse(content));
15240
+ return c.json(await readFile(filePath));
14076
15241
  } catch {
14077
15242
  return c.json({ error: "Schema not found" }, 404);
14078
15243
  }
14079
15244
  });
14080
- app.post("/schema/:name", async (c) => {
14081
- const filePath = resolveSchemaFile(schemaDir, c.req.param("name"));
14082
- if (!filePath) return c.json({ error: "Invalid schema file name" }, 400);
15245
+ app.post("/schema/:name", zValidator("param", schemaFileParamSchema, invalidParamResponse("Invalid schema file name")), async (c) => {
15246
+ const { name: name$1 } = c.req.valid("param");
15247
+ const filePath = path.join(schemaDir, name$1);
14083
15248
  try {
14084
15249
  const body = await c.req.json();
14085
15250
  await ensureDir();
14086
- await fs.writeFile(filePath, `${JSON.stringify(body, null, 2)}\n`);
15251
+ await writeFile(filePath, body, {
15252
+ spaces: 2,
15253
+ EOL: "\n"
15254
+ });
14087
15255
  const aiexDir = path.dirname(schemaDir);
14088
15256
  try {
14089
15257
  await savePromptSnapshot(JsonSchemaDefinitionSchema.parse(body), aiexDir);
@@ -14093,12 +15261,8 @@ function schemaRoutes(config) {
14093
15261
  return c.json({ error: "Failed to save schema" }, 500);
14094
15262
  }
14095
15263
  });
14096
- app.get("/prompt-snapshot/:name", async (c) => {
14097
- const name$1 = c.req.param("name");
14098
- if (!TABLE_NAME_RE.test(name$1)) return c.json({
14099
- success: false,
14100
- error: "Invalid table name"
14101
- }, 400);
15264
+ app.get("/prompt-snapshot/:name", zValidator("param", tableNameParamSchema, invalidParamResponse("Invalid table name")), async (c) => {
15265
+ const { name: name$1 } = c.req.valid("param");
14102
15266
  const aiexDir = path.dirname(schemaDir);
14103
15267
  const snapshotPath = path.join(aiexDir, "extracted", `${name$1}.prompt.md`);
14104
15268
  try {
@@ -14114,14 +15278,13 @@ function schemaRoutes(config) {
14114
15278
  }, 404);
14115
15279
  }
14116
15280
  });
14117
- app.delete("/schema/:name", async (c) => {
14118
- const filePath = resolveSchemaFile(schemaDir, c.req.param("name"));
14119
- if (!filePath) return c.json({ error: "Invalid schema file name" }, 400);
15281
+ app.delete("/schema/:name", zValidator("param", schemaFileParamSchema, invalidParamResponse("Invalid schema file name")), async (c) => {
15282
+ const { name: name$1 } = c.req.valid("param");
15283
+ const filePath = path.join(schemaDir, name$1);
14120
15284
  try {
14121
15285
  const aiexDir = path.dirname(schemaDir);
14122
15286
  try {
14123
- const content = await fs.readFile(filePath, "utf-8");
14124
- const parsed = JsonSchemaDefinitionSchema.safeParse(JSON.parse(content));
15287
+ const parsed = JsonSchemaDefinitionSchema.safeParse(await readFile(filePath));
14125
15288
  if (parsed.success) {
14126
15289
  const tableName = parsed.data.table.name;
14127
15290
  const snapshotPath = path.join(aiexDir, "extracted", `${tableName}.prompt.md`);
@@ -14137,59 +15300,21 @@ function schemaRoutes(config) {
14137
15300
  app.post("/migrate", async (c) => {
14138
15301
  try {
14139
15302
  await ensureDir();
14140
- await fs.mkdir(path.dirname(config.drizzleSchemaPath), { recursive: true });
14141
- const jsonFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
14142
- if (jsonFiles.length === 0) return c.json({
14143
- success: false,
14144
- error: "No schema files found"
14145
- }, 400);
14146
- const parsedResult = parseAllSchemas(await Promise.all(jsonFiles.map(async (fileName) => {
14147
- const filePath = path.join(schemaDir, fileName);
14148
- return {
14149
- filePath,
14150
- content: await fs.readFile(filePath, "utf-8")
14151
- };
14152
- })));
14153
- if (!parsedResult.success) return c.json({
14154
- success: false,
14155
- error: parsedResult.error
14156
- }, 400);
14157
- const { tables, relations, reverseRelations, warnings, drizzleCode } = parsedResult.data;
14158
- await fs.writeFile(config.drizzleSchemaPath, drizzleCode);
14159
- const helperPath = resolveHelperPath();
14160
- const tsxPath = resolveTsxPath();
14161
- const { stdout, stderr } = await execFileAsync(process.execPath, [
14162
- tsxPath,
14163
- helperPath,
14164
- config.drizzleSchemaPath,
14165
- config.migrationsPath,
14166
- config.databasePath
14167
- ], { cwd: process.cwd() });
14168
- let migrationResult;
14169
- try {
14170
- const jsonLine = stdout.trim().split("\n").find((l) => l.startsWith("{") && l.endsWith("}"));
14171
- if (!jsonLine) return c.json({
14172
- success: false,
14173
- error: "Migration helper did not return valid output"
14174
- }, 500);
14175
- migrationResult = JSON.parse(jsonLine);
14176
- } catch {
15303
+ const result = await runSchemaSync(config);
15304
+ if (!result.success) {
15305
+ const status = result.schemaCount === 0 ? 400 : 500;
14177
15306
  return c.json({
14178
15307
  success: false,
14179
- error: stderr || stdout || "Migration helper failed"
14180
- }, 500);
15308
+ error: result.error || "Migration failed"
15309
+ }, status);
14181
15310
  }
14182
- if (!migrationResult.success) return c.json({
14183
- success: false,
14184
- error: migrationResult.error || "Migration failed"
14185
- }, 500);
14186
15311
  return c.json({
14187
15312
  success: true,
14188
- changes: migrationResult.changes ?? 0,
14189
- tag: migrationResult.tag,
14190
- tables: tables.length,
14191
- relations: relations.length + reverseRelations.length,
14192
- warnings
15313
+ changes: result.migration?.changes ?? 0,
15314
+ tag: result.migration?.tag,
15315
+ tables: result.tables,
15316
+ relations: result.relations,
15317
+ warnings: result.warnings
14193
15318
  });
14194
15319
  } catch (error) {
14195
15320
  return c.json({
@@ -14209,6 +15334,7 @@ function createApp(config, staticDir) {
14209
15334
  app.use("*", cors({ origin: (origin) => LOCAL_ORIGIN_RE.test(origin) ? origin : null }));
14210
15335
  app.route("/api", schemaRoutes(config));
14211
15336
  app.route("/api", aiRoutes(config));
15337
+ app.route("/api", extractRoutes(config));
14212
15338
  app.route("/api", dataRoutes(config));
14213
15339
  app.use("/*", serveStatic({
14214
15340
  root: staticDir,
@@ -14237,9 +15363,36 @@ function createApp(config, staticDir) {
14237
15363
  return app;
14238
15364
  }
14239
15365
 
15366
+ //#endregion
15367
+ //#region src/core/web-runner.ts
15368
+ function resolveWebStaticDir() {
15369
+ return path.join(resolvePackageRoot(), "dist/web");
15370
+ }
15371
+ async function openBrowser(url) {
15372
+ await open(url);
15373
+ }
15374
+ async function startWebServer(input) {
15375
+ const { config, port } = input;
15376
+ const staticDir = input.staticDir ?? resolveWebStaticDir();
15377
+ const url = `http://localhost:${port}`;
15378
+ serve({
15379
+ fetch: createApp(config, staticDir).fetch,
15380
+ port
15381
+ }, () => {
15382
+ input.onStarted?.({
15383
+ url,
15384
+ schemaPath: config.schemaPath
15385
+ });
15386
+ if (input.open === false) return;
15387
+ openBrowser(url).catch(() => {
15388
+ input.onOpenFailed?.(url);
15389
+ });
15390
+ });
15391
+ await new Promise(() => {});
15392
+ }
15393
+
14240
15394
  //#endregion
14241
15395
  //#region src/commands/web.ts
14242
- const execAsync = promisify(exec);
14243
15396
  const webCommand = defineCommand({
14244
15397
  meta: {
14245
15398
  name: "web",
@@ -14256,23 +15409,20 @@ const webCommand = defineCommand({
14256
15409
  const cwd = process.cwd();
14257
15410
  const port = Number(args.port) || 13e3;
14258
15411
  const config = createMigrationConfig(cwd);
14259
- const packageRoot = resolvePackageRoot();
14260
- const staticDir = path.join(packageRoot, "dist/web");
14261
15412
  const s = spinner();
14262
15413
  s.start("Starting web server...");
14263
- serve({
14264
- fetch: createApp(config, staticDir).fetch,
14265
- port
14266
- }, () => {
14267
- s.stop(`Server running at ${pc.cyan(`http://localhost:${port}`)}`);
14268
- consola.info(`Schema directory: ${pc.dim(config.schemaPath)}`);
14269
- consola.info("Press Ctrl+C to stop");
14270
- const url = `http://localhost:${port}`;
14271
- execAsync(`${process.platform === "darwin" ? "open" : process.platform === "win32" ? "start" : "xdg-open"} ${url}`).catch(() => {
15414
+ await startWebServer({
15415
+ config,
15416
+ port,
15417
+ onStarted(info) {
15418
+ s.stop(`Server running at ${pc.cyan(info.url)}`);
15419
+ consola.info(`Schema directory: ${pc.dim(info.schemaPath)}`);
15420
+ consola.info("Press Ctrl+C to stop");
15421
+ },
15422
+ onOpenFailed(url) {
14272
15423
  consola.warn(`Could not open browser. Visit ${url} manually.`);
14273
- });
15424
+ }
14274
15425
  });
14275
- await new Promise(() => {});
14276
15426
  }
14277
15427
  });
14278
15428
 
@@ -14300,7 +15450,7 @@ process.on("unhandledRejection", (reason) => {
14300
15450
  process.exit(1);
14301
15451
  });
14302
15452
  if (process.argv[2] === "_complete") {
14303
- const { getCompletions } = await import("./completions-ygS1okck.mjs");
15453
+ const { getCompletions } = await import("./completions-C3rmTwXZ.mjs");
14304
15454
  const suggestions = getCompletions(subCommands, process.argv.slice(3));
14305
15455
  for (const s of suggestions) process.stdout.write(`${s}\n`);
14306
15456
  process.exit(0);