aiex-cli 0.0.4-beta.1 → 0.0.4-beta.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { A as formatDoctorDiagnosticsJson, C as seedConfig, D as version, E as package_default, S as createConfig, T as name, _ as DEFAULT_MINERU_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_TEXT, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, k as doctorDiagnosticsTableRows, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_PROMPT_CONFIG, w as description, x as AIConfigSchema, y as PLACEHOLDER_SCHEMA } from "./doctor-collector-xRnW5Rj3.mjs";
1
+ import { A as formatDoctorDiagnosticsJson, C as seedConfig, D as version, E as package_default, S as createConfig, T as name, _ as DEFAULT_MINERU_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_TEXT, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, k as doctorDiagnosticsTableRows, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_PROMPT_CONFIG, w as description, x as AIConfigSchema, y as PLACEHOLDER_SCHEMA } from "./doctor-collector-8fLyh9lK.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -15,20 +15,20 @@ import fs$1 from "node:fs";
15
15
  import { intro, isCancel, outro, select, spinner, text } from "@clack/prompts";
16
16
  import Database from "better-sqlite3";
17
17
  import pc from "picocolors";
18
+ import { Buffer } from "node:buffer";
18
19
  import * as XLSX from "xlsx";
19
- import { glob, globSync } from "tinyglobby";
20
20
  import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
21
- import { LangfuseSpanProcessor } from "@langfuse/otel";
22
- import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
23
21
  import { APICallError, Output, generateText, jsonSchema } from "ai";
24
- import mime from "mime";
25
22
  import pRetry from "p-retry";
23
+ import mime from "mime";
26
24
  import { jsonrepair } from "jsonrepair";
25
+ import { LangfuseSpanProcessor } from "@langfuse/otel";
26
+ import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
27
+ import crypto from "node:crypto";
27
28
  import { Client, extractNotionId } from "@notionhq/client";
28
- import { Buffer } from "node:buffer";
29
29
  import { execa } from "execa";
30
+ import { glob, globSync } from "tinyglobby";
30
31
  import { extractText, getDocumentProxy, getMeta } from "unpdf";
31
- import crypto from "node:crypto";
32
32
  import { execFile } from "node:child_process";
33
33
  import { promisify } from "node:util";
34
34
  import * as chokidar from "chokidar";
@@ -215,6 +215,50 @@ function failCommand(message) {
215
215
  process.exitCode = 1;
216
216
  }
217
217
 
218
+ //#endregion
219
+ //#region src/core/export-manager.ts
220
+ function formatRowsConformingToSchema(rows, columns, schema, format) {
221
+ return rows.map((row) => {
222
+ const newRow = {};
223
+ columns.forEach((col) => {
224
+ const colName = col.name;
225
+ const val = row[colName];
226
+ const type = (schema?.properties?.[colName])?.type || "";
227
+ if (val === null || val === void 0) newRow[colName] = "";
228
+ else if (type === "boolean") if (format === "xlsx") newRow[colName] = val === 1 || val === "1" || val === true;
229
+ else newRow[colName] = val === 1 || val === "1" || val === true ? "true" : "false";
230
+ else if (type === "number" || type === "integer") if (val === "") newRow[colName] = "";
231
+ else {
232
+ const num = Number(val);
233
+ newRow[colName] = Number.isNaN(num) ? val : num;
234
+ }
235
+ else if (typeof val === "object") newRow[colName] = JSON.stringify(val);
236
+ else {
237
+ const dbType = (col.type || "").toLowerCase();
238
+ if ((dbType.includes("int") || dbType.includes("real") || dbType.includes("num") || dbType.includes("double") || dbType.includes("float")) && typeof val === "string" && val !== "") {
239
+ const num = Number(val);
240
+ newRow[colName] = Number.isNaN(num) ? val : num;
241
+ } else newRow[colName] = val;
242
+ }
243
+ });
244
+ return newRow;
245
+ });
246
+ }
247
+ function generateExportBuffer(tableName, formattedRows, columns, format) {
248
+ const ws = XLSX.utils.json_to_sheet(formattedRows, { header: columns.map((col) => col.name) });
249
+ if (format === "xlsx") {
250
+ const wb = XLSX.utils.book_new();
251
+ XLSX.utils.book_append_sheet(wb, ws, tableName.slice(0, 31));
252
+ return XLSX.write(wb, {
253
+ bookType: "xlsx",
254
+ type: "buffer"
255
+ });
256
+ } else {
257
+ const csv = XLSX.utils.sheet_to_csv(ws);
258
+ return Buffer.from("" + csv, "utf8");
259
+ }
260
+ }
261
+
218
262
  //#endregion
219
263
  //#region src/core/ai-extraction/model-capabilities.json
220
264
  var model_capabilities_default = {
@@ -12814,6 +12858,28 @@ async function withRetry(fn, onRetry, maxRetries = 5) {
12814
12858
  });
12815
12859
  }
12816
12860
 
12861
+ //#endregion
12862
+ //#region src/core/ai-extraction/file-utils.ts
12863
+ function detectMimeType(filePath) {
12864
+ return mime.getType(filePath) ?? "application/octet-stream";
12865
+ }
12866
+ async function readFilePart(filePath) {
12867
+ const mimeStr = detectMimeType(filePath);
12868
+ const buffer = await fs.readFile(filePath);
12869
+ const name$1 = path.basename(filePath);
12870
+ if (mimeStr.startsWith("image/")) return {
12871
+ type: "image",
12872
+ image: buffer,
12873
+ mimeType: mimeStr
12874
+ };
12875
+ return {
12876
+ type: "file",
12877
+ data: buffer,
12878
+ mediaType: mimeStr,
12879
+ filename: name$1
12880
+ };
12881
+ }
12882
+
12817
12883
  //#endregion
12818
12884
  //#region src/core/ai-extraction/json-utils.ts
12819
12885
  function parseJsonLike(text$1) {
@@ -12993,7 +13059,34 @@ function generatePromptSnapshot(schema, promptConfig = DEFAULT_PROMPT_CONFIG) {
12993
13059
  }
12994
13060
 
12995
13061
  //#endregion
12996
- //#region src/core/ai-extraction/extractor.ts
13062
+ //#region src/core/ai-extraction/snapshot.ts
13063
+ const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
13064
+ const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
13065
+ async function loadPromptSnapshot(aiexDir, tableName) {
13066
+ const snapshotPath = path.join(aiexDir, "extracted", `${tableName}.prompt.md`);
13067
+ try {
13068
+ const content = await fs.readFile(snapshotPath, "utf-8");
13069
+ const systemMatch = content.match(SYSTEM_PROMPT_REGEX);
13070
+ const userMatch = content.match(USER_PROMPT_REGEX);
13071
+ if (systemMatch && userMatch) return {
13072
+ system: systemMatch[1].trim(),
13073
+ user: userMatch[1].trim()
13074
+ };
13075
+ } catch {}
13076
+ return null;
13077
+ }
13078
+ async function savePromptSnapshot(schema, aiexDir) {
13079
+ const content = generatePromptSnapshot(schema, (await readAIConfig(aiexDir))?.prompt ?? DEFAULT_PROMPT_CONFIG);
13080
+ const outputDir = path.join(aiexDir, "extracted");
13081
+ await fs.mkdir(outputDir, { recursive: true });
13082
+ const fileName = `${schema.table.name}.prompt.md`;
13083
+ const outputPath = path.join(outputDir, fileName);
13084
+ await fs.writeFile(outputPath, content);
13085
+ return outputPath;
13086
+ }
13087
+
13088
+ //#endregion
13089
+ //#region src/core/ai-extraction/telemetry.ts
12997
13090
  let langfuseInitialized = false;
12998
13091
  function initLangfuse(config) {
12999
13092
  if (!config.langfuse?.publicKey || !config.langfuse.secretKey) return;
@@ -13010,28 +13103,9 @@ function initLangfuse(config) {
13010
13103
  console.warn("[Langfuse] Failed to initialize tracing:", e instanceof Error ? e.message : e);
13011
13104
  }
13012
13105
  }
13013
- const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
13014
- const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
13015
- const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
13016
- function detectMimeType(filePath) {
13017
- return mime.getType(filePath) ?? "application/octet-stream";
13018
- }
13019
- async function readFilePart(filePath) {
13020
- const mime$1 = detectMimeType(filePath);
13021
- const buffer = await fs.readFile(filePath);
13022
- const name$1 = path.basename(filePath);
13023
- if (mime$1.startsWith("image/")) return {
13024
- type: "image",
13025
- image: buffer,
13026
- mimeType: mime$1
13027
- };
13028
- return {
13029
- type: "file",
13030
- data: buffer,
13031
- mediaType: mime$1,
13032
- filename: name$1
13033
- };
13034
- }
13106
+
13107
+ //#endregion
13108
+ //#region src/core/ai-extraction/validator.ts
13035
13109
  function nullableType(type) {
13036
13110
  return type === "null" ? ["null"] : [type, "null"];
13037
13111
  }
@@ -13126,19 +13200,10 @@ function validateExtractedData(schema, data) {
13126
13200
  };
13127
13201
  return { success: true };
13128
13202
  }
13129
- async function loadPromptSnapshot(aiexDir, tableName) {
13130
- const snapshotPath = path.join(aiexDir, "extracted", `${tableName}.prompt.md`);
13131
- try {
13132
- const content = await fs.readFile(snapshotPath, "utf-8");
13133
- const systemMatch = content.match(SYSTEM_PROMPT_REGEX);
13134
- const userMatch = content.match(USER_PROMPT_REGEX);
13135
- if (systemMatch && userMatch) return {
13136
- system: systemMatch[1].trim(),
13137
- user: userMatch[1].trim()
13138
- };
13139
- } catch {}
13140
- return null;
13141
- }
13203
+
13204
+ //#endregion
13205
+ //#region src/core/ai-extraction/extractor.ts
13206
+ const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
13142
13207
  async function extractStructuredData(input) {
13143
13208
  const { config, schema, text: text$1, aiexDir, file, modelOverride } = input;
13144
13209
  if (!config.provider.apiKey) return {
@@ -13188,66 +13253,118 @@ async function extractStructuredData(input) {
13188
13253
  user = generated.user;
13189
13254
  }
13190
13255
  const outputSchema = jsonSchema(schemaToExtractionOutputSchema(schema));
13191
- let result;
13192
13256
  const timeoutMs = (config.provider.timeout ?? 300) * 1e3;
13193
- if (useFileContent) {
13194
- const filePart = await readFilePart(file);
13195
- const fileName = filePart.type === "file" ? filePart.filename : path.basename(file);
13196
- const contentParts = [{
13197
- type: "text",
13198
- text: user.includes(PLACEHOLDER_TEXT) ? user.replaceAll(PLACEHOLDER_TEXT, text$1 || `Data is contained in the attached file: ${fileName}`) : user
13199
- }, filePart];
13200
- const fileOpts = {
13201
- model: provider.chatModel(selected.name),
13202
- system,
13203
- messages: [{
13204
- role: "user",
13205
- content: contentParts
13206
- }],
13207
- abortSignal: AbortSignal.timeout(timeoutMs),
13208
- maxRetries: 0,
13209
- experimental_telemetry: { isEnabled: useTelemetry }
13210
- };
13211
- if (useStructuredOutput) fileOpts.output = Output.object({ schema: outputSchema });
13212
- result = await withRetry(() => generateText(fileOpts), input.onRetry);
13213
- } else {
13214
- const textOpts = {
13215
- model: provider.chatModel(selected.name),
13216
- system,
13217
- prompt: user,
13218
- abortSignal: AbortSignal.timeout(timeoutMs),
13219
- maxRetries: 0,
13220
- experimental_telemetry: { isEnabled: useTelemetry }
13221
- };
13222
- if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
13223
- result = await withRetry(() => generateText(textOpts), input.onRetry);
13257
+ let systemPrompt = system;
13258
+ let userPrompt = user;
13259
+ const maxAttempts = 3;
13260
+ let lastError = "";
13261
+ let totalPromptTokens = 0;
13262
+ let totalCompletionTokens = 0;
13263
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
13264
+ let result = null;
13265
+ let data;
13266
+ let parseError;
13267
+ let validationError;
13268
+ try {
13269
+ if (useFileContent) {
13270
+ const filePart = await readFilePart(file);
13271
+ const fileName = filePart.type === "file" ? filePart.filename : path.basename(file);
13272
+ const contentParts = [{
13273
+ type: "text",
13274
+ text: userPrompt.includes(PLACEHOLDER_TEXT) ? userPrompt.replaceAll(PLACEHOLDER_TEXT, text$1 || `Data is contained in the attached file: ${fileName}`) : userPrompt
13275
+ }, filePart];
13276
+ const fileOpts = {
13277
+ model: provider.chatModel(selected.name),
13278
+ system: systemPrompt,
13279
+ messages: [{
13280
+ role: "user",
13281
+ content: contentParts
13282
+ }],
13283
+ abortSignal: AbortSignal.timeout(timeoutMs),
13284
+ maxRetries: 0,
13285
+ experimental_telemetry: { isEnabled: useTelemetry }
13286
+ };
13287
+ if (useStructuredOutput) fileOpts.output = Output.object({ schema: outputSchema });
13288
+ result = await withRetry(() => generateText(fileOpts), input.onRetry);
13289
+ } else {
13290
+ const textOpts = {
13291
+ model: provider.chatModel(selected.name),
13292
+ system: systemPrompt,
13293
+ prompt: userPrompt,
13294
+ abortSignal: AbortSignal.timeout(timeoutMs),
13295
+ maxRetries: 0,
13296
+ experimental_telemetry: { isEnabled: useTelemetry }
13297
+ };
13298
+ if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
13299
+ result = await withRetry(() => generateText(textOpts), input.onRetry);
13300
+ }
13301
+ if (result.usage) {
13302
+ totalPromptTokens += result.usage.inputTokens ?? 0;
13303
+ totalCompletionTokens += result.usage.outputTokens ?? 0;
13304
+ }
13305
+ if (useStructuredOutput) data = result.output;
13306
+ else try {
13307
+ data = safeParseJSON(result.text);
13308
+ } catch (e) {
13309
+ parseError = e instanceof Error ? e.message : String(e);
13310
+ }
13311
+ } catch (error) {
13312
+ parseError = getErrorMessage(error);
13313
+ }
13314
+ if (!parseError && data !== void 0) {
13315
+ const validation = validateExtractedData(schema, data);
13316
+ if (validation.success) {
13317
+ const outputDir = path.resolve(aiexDir, config.extraction.outputDir.replace(".aiex/", ""));
13318
+ await fs.mkdir(outputDir, { recursive: true });
13319
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
13320
+ const outputFileName = `${schema.table.name}-${timestamp}.json`;
13321
+ const outputPath = path.join(outputDir, outputFileName);
13322
+ await writeFile(outputPath, data, {
13323
+ spaces: 2,
13324
+ EOL: "\n"
13325
+ });
13326
+ return {
13327
+ success: true,
13328
+ outputPath,
13329
+ data,
13330
+ tokensUsed: {
13331
+ prompt: totalPromptTokens,
13332
+ completion: totalCompletionTokens,
13333
+ total: totalPromptTokens + totalCompletionTokens
13334
+ }
13335
+ };
13336
+ } else validationError = validation.error;
13337
+ }
13338
+ const errorMsg = parseError || validationError || "Unknown validation error";
13339
+ lastError = errorMsg;
13340
+ if (attempt < maxAttempts) {
13341
+ const invalidJson = data !== void 0 ? JSON.stringify(data, null, 2) : result ? result.text : "";
13342
+ systemPrompt = `You are a precise data correction assistant. Your task is to correct validation errors in a previously generated JSON object to make it comply with the provided JSON Schema.
13343
+
13344
+ CRITICAL RULES:
13345
+ 1. Only correct the fields that failed validation.
13346
+ 2. Preserve all other correctly extracted fields and their values exactly.
13347
+ 3. Return ONLY the corrected JSON object. No explanations, no markdown blocks other than JSON.`;
13348
+ userPrompt = `The JSON data you generated previously failed validation. Please correct it.
13349
+
13350
+ [Original Text]
13351
+ ${text$1 || "Data is contained in the attached file."}
13352
+
13353
+ [JSON Schema Definition]
13354
+ ${JSON.stringify(schemaToExtractionOutputSchema(schema), null, 2)}
13355
+
13356
+ [Previously Generated Invalid JSON]
13357
+ ${invalidJson}
13358
+
13359
+ [Validation Error Details]
13360
+ ${errorMsg}
13361
+
13362
+ Please output the corrected JSON object now:`;
13363
+ }
13224
13364
  }
13225
- let data;
13226
- if (useStructuredOutput) data = result.output;
13227
- else data = safeParseJSON(result.text);
13228
- const validation = validateExtractedData(schema, data);
13229
- if (!validation.success) return {
13230
- success: false,
13231
- error: validation.error
13232
- };
13233
- const outputDir = path.resolve(aiexDir, config.extraction.outputDir.replace(".aiex/", ""));
13234
- await fs.mkdir(outputDir, { recursive: true });
13235
- const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
13236
- const outputFileName = `${schema.table.name}-${timestamp}.json`;
13237
- const outputPath = path.join(outputDir, outputFileName);
13238
- await writeFile(outputPath, data, {
13239
- spaces: 2,
13240
- EOL: "\n"
13241
- });
13242
13365
  return {
13243
- success: true,
13244
- outputPath,
13245
- data,
13246
- tokensUsed: result.usage ? {
13247
- prompt: result.usage.inputTokens ?? 0,
13248
- completion: result.usage.outputTokens ?? 0,
13249
- total: (result.usage.inputTokens ?? 0) + (result.usage.outputTokens ?? 0)
13250
- } : void 0
13366
+ success: false,
13367
+ error: lastError || "Extraction failed after self-reflection retries"
13251
13368
  };
13252
13369
  } catch (error) {
13253
13370
  return {
@@ -13394,18 +13511,6 @@ function insertExtractedData(db, schema, data) {
13394
13511
  }
13395
13512
  }
13396
13513
 
13397
- //#endregion
13398
- //#region src/core/ai-extraction/snapshot.ts
13399
- async function savePromptSnapshot(schema, aiexDir) {
13400
- const content = generatePromptSnapshot(schema, (await readAIConfig(aiexDir))?.prompt ?? DEFAULT_PROMPT_CONFIG);
13401
- const outputDir = path.join(aiexDir, "extracted");
13402
- await fs.mkdir(outputDir, { recursive: true });
13403
- const fileName = `${schema.table.name}.prompt.md`;
13404
- const outputPath = path.join(outputDir, fileName);
13405
- await fs.writeFile(outputPath, content);
13406
- return outputPath;
13407
- }
13408
-
13409
13514
  //#endregion
13410
13515
  //#region src/core/extraction-audit.ts
13411
13516
  const AUDIT_ID_RE = /^[\w.-]+$/;
@@ -13542,78 +13647,19 @@ async function findSucceededAuditByHash(aiexDir, schemaName, fileHash) {
13542
13647
  }
13543
13648
 
13544
13649
  //#endregion
13545
- //#region src/core/file-constants.ts
13546
- const MAX_UPLOAD_SIZE = 30 * 1024 * 1024;
13547
- const MAX_UPLOAD_SIZE_TEXT = "30MB";
13548
- const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
13549
- const MISSING_UPLOAD_FILE_TEXT = t("errors.file.missingUpload");
13550
- const SUPPORTED_MIME_TYPES = new Set([
13551
- "image/png",
13552
- "image/jpeg",
13553
- "image/gif",
13554
- "image/webp",
13555
- "image/bmp",
13556
- "image/svg+xml",
13557
- "application/pdf",
13558
- "text/plain",
13559
- "text/markdown",
13560
- "text/csv",
13561
- "application/json",
13562
- "text/html",
13563
- "text/xml",
13564
- "application/x-yaml",
13565
- "text/yaml"
13566
- ]);
13567
- const MIME_TO_EXT = {
13568
- "image/png": "png",
13569
- "image/jpeg": "jpg",
13570
- "image/gif": "gif",
13571
- "image/webp": "webp",
13572
- "image/bmp": "bmp",
13573
- "image/svg+xml": "svg",
13574
- "application/pdf": "pdf",
13575
- "text/plain": "txt",
13576
- "text/markdown": "md",
13577
- "text/csv": "csv",
13578
- "application/json": "json",
13579
- "text/html": "html",
13580
- "text/xml": "xml",
13581
- "application/x-yaml": "yaml",
13582
- "text/yaml": "yaml"
13583
- };
13584
- function bytesToMB(bytes) {
13585
- return bytes / (1024 * 1024);
13586
- }
13587
- function getExtensionFromMime(mimeType) {
13588
- return MIME_TO_EXT[mimeType];
13589
- }
13590
- function isAllowedMimeType(mimeType) {
13591
- return SUPPORTED_MIME_TYPES.has(mimeType);
13592
- }
13593
- function unsupportedFileTypeMessage(mimeType) {
13594
- return t("errors.file.unsupportedType", {
13595
- type: mimeType,
13596
- supported: SUPPORTED_FILE_TYPES_TEXT
13650
+ //#region src/utils/hash.ts
13651
+ /**
13652
+ * Helper to compute SHA-256 hash of a file asynchronously.
13653
+ */
13654
+ function getFileHash(filePath) {
13655
+ return new Promise((resolve, reject) => {
13656
+ const hash = crypto.createHash("sha256");
13657
+ const stream = fs$1.createReadStream(filePath);
13658
+ stream.on("data", (data) => hash.update(data));
13659
+ stream.on("end", () => resolve(hash.digest("hex")));
13660
+ stream.on("error", (err) => reject(err));
13597
13661
  });
13598
13662
  }
13599
- function isMissingUploadFileError(error) {
13600
- return !!error && typeof error === "object" && error.code === "ENOENT";
13601
- }
13602
- var FileValidationError = class extends Error {
13603
- constructor(message) {
13604
- super(message);
13605
- this.name = "FileValidationError";
13606
- }
13607
- };
13608
- function validateFileUpload(file) {
13609
- if (file.size === 0) throw new FileValidationError(t("errors.file.empty"));
13610
- if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(t("errors.file.sizeExceeded", {
13611
- size: bytesToMB(file.size).toFixed(1),
13612
- limit: MAX_UPLOAD_SIZE_TEXT,
13613
- file: file.name
13614
- }));
13615
- if (!isAllowedMimeType(file.type)) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
13616
- }
13617
13663
 
13618
13664
  //#endregion
13619
13665
  //#region src/core/notion-sink.ts
@@ -13847,20 +13893,148 @@ async function writeNotionPage(config, schemaName, data) {
13847
13893
  }
13848
13894
 
13849
13895
  //#endregion
13850
- //#region src/core/pdf-converter/external.ts
13851
- function applyTemplate(value, context) {
13852
- return value.replaceAll("{input}", context.input).replaceAll("{outputDir}", context.outputDir).replaceAll("{basename}", context.basename);
13896
+ //#region src/core/webhook-sink.ts
13897
+ async function sendWebhook(config, payload) {
13898
+ if (!config || !config.enabled || !config.url) return;
13899
+ const body = JSON.stringify(payload);
13900
+ const headers = {
13901
+ "Content-Type": "application/json",
13902
+ "User-Agent": "aiex-webhook-dispatcher"
13903
+ };
13904
+ if (config.secret) headers["X-Aiex-Signature"] = `sha256=${crypto.createHmac("sha256", config.secret).update(body).digest("hex")}`;
13905
+ const response = await fetch(config.url, {
13906
+ method: "POST",
13907
+ headers,
13908
+ body
13909
+ });
13910
+ if (!response.ok) throw new Error(`Webhook request failed with status: ${response.status} ${response.statusText}`);
13853
13911
  }
13854
- function isError(error) {
13855
- return error instanceof Error;
13912
+
13913
+ //#endregion
13914
+ //#region src/core/integration/dispatcher.ts
13915
+ async function syncResultToNotion(aiConfig, schemaName, data) {
13916
+ if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error(t("errors.ai.extractionNotObject"));
13917
+ const page = await writeNotionPage(aiConfig.notion, schemaName, data);
13918
+ return [{
13919
+ databaseId: page.databaseId,
13920
+ pageId: page.pageId
13921
+ }];
13856
13922
  }
13857
- async function pathExists(filePath) {
13858
- try {
13859
- await fs.access(filePath);
13860
- return true;
13861
- } catch {
13862
- return false;
13863
- }
13923
+ function shouldSyncNotion(aiConfig, schemaName) {
13924
+ return !!aiConfig.notion?.enabled && !!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim();
13925
+ }
13926
+ async function triggerWebhook(aiConfig, auditId, schemaName, event, source, data, error, tokensUsed, quiet = false) {
13927
+ if (!aiConfig.webhook?.enabled) return;
13928
+ try {
13929
+ await sendWebhook(aiConfig.webhook, {
13930
+ event,
13931
+ schemaName,
13932
+ auditId,
13933
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
13934
+ source: {
13935
+ type: source.type,
13936
+ fileName: source.filePath ? path.basename(source.filePath) : void 0,
13937
+ filePath: source.filePath
13938
+ },
13939
+ data,
13940
+ error,
13941
+ tokensUsed
13942
+ });
13943
+ if (!quiet) consola.success(t("extract.file.webhookSynced"));
13944
+ } catch (err) {
13945
+ if (!quiet) consola.error(t("extract.file.webhookSyncFail", { error: err instanceof Error ? err.message : String(err) }));
13946
+ }
13947
+ }
13948
+
13949
+ //#endregion
13950
+ //#region src/core/file-constants.ts
13951
+ const MAX_UPLOAD_SIZE = 30 * 1024 * 1024;
13952
+ const MAX_UPLOAD_SIZE_TEXT = "30MB";
13953
+ const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
13954
+ const MISSING_UPLOAD_FILE_TEXT = t("errors.file.missingUpload");
13955
+ const SUPPORTED_MIME_TYPES = new Set([
13956
+ "image/png",
13957
+ "image/jpeg",
13958
+ "image/gif",
13959
+ "image/webp",
13960
+ "image/bmp",
13961
+ "image/svg+xml",
13962
+ "application/pdf",
13963
+ "text/plain",
13964
+ "text/markdown",
13965
+ "text/csv",
13966
+ "application/json",
13967
+ "text/html",
13968
+ "text/xml",
13969
+ "application/x-yaml",
13970
+ "text/yaml"
13971
+ ]);
13972
+ const MIME_TO_EXT = {
13973
+ "image/png": "png",
13974
+ "image/jpeg": "jpg",
13975
+ "image/gif": "gif",
13976
+ "image/webp": "webp",
13977
+ "image/bmp": "bmp",
13978
+ "image/svg+xml": "svg",
13979
+ "application/pdf": "pdf",
13980
+ "text/plain": "txt",
13981
+ "text/markdown": "md",
13982
+ "text/csv": "csv",
13983
+ "application/json": "json",
13984
+ "text/html": "html",
13985
+ "text/xml": "xml",
13986
+ "application/x-yaml": "yaml",
13987
+ "text/yaml": "yaml"
13988
+ };
13989
+ function bytesToMB(bytes) {
13990
+ return bytes / (1024 * 1024);
13991
+ }
13992
+ function getExtensionFromMime(mimeType) {
13993
+ return MIME_TO_EXT[mimeType];
13994
+ }
13995
+ function isAllowedMimeType(mimeType) {
13996
+ return SUPPORTED_MIME_TYPES.has(mimeType);
13997
+ }
13998
+ function unsupportedFileTypeMessage(mimeType) {
13999
+ return t("errors.file.unsupportedType", {
14000
+ type: mimeType,
14001
+ supported: SUPPORTED_FILE_TYPES_TEXT
14002
+ });
14003
+ }
14004
+ function isMissingUploadFileError(error) {
14005
+ return !!error && typeof error === "object" && error.code === "ENOENT";
14006
+ }
14007
+ var FileValidationError = class extends Error {
14008
+ constructor(message) {
14009
+ super(message);
14010
+ this.name = "FileValidationError";
14011
+ }
14012
+ };
14013
+ function validateFileUpload(file) {
14014
+ if (file.size === 0) throw new FileValidationError(t("errors.file.empty"));
14015
+ if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(t("errors.file.sizeExceeded", {
14016
+ size: bytesToMB(file.size).toFixed(1),
14017
+ limit: MAX_UPLOAD_SIZE_TEXT,
14018
+ file: file.name
14019
+ }));
14020
+ if (!isAllowedMimeType(file.type)) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
14021
+ }
14022
+
14023
+ //#endregion
14024
+ //#region src/core/pdf-converter/external.ts
14025
+ function applyTemplate(value, context) {
14026
+ return value.replaceAll("{input}", context.input).replaceAll("{outputDir}", context.outputDir).replaceAll("{basename}", context.basename);
14027
+ }
14028
+ function isError(error) {
14029
+ return error instanceof Error;
14030
+ }
14031
+ async function pathExists(filePath) {
14032
+ try {
14033
+ await fs.access(filePath);
14034
+ return true;
14035
+ } catch {
14036
+ return false;
14037
+ }
13864
14038
  }
13865
14039
  async function collectMarkdownFiles(dir) {
13866
14040
  return (await glob("**/*.md", {
@@ -14022,22 +14196,7 @@ function createPdfConverter(config) {
14022
14196
  }
14023
14197
 
14024
14198
  //#endregion
14025
- //#region src/utils/hash.ts
14026
- /**
14027
- * Helper to compute SHA-256 hash of a file asynchronously.
14028
- */
14029
- function getFileHash(filePath) {
14030
- return new Promise((resolve, reject) => {
14031
- const hash = crypto.createHash("sha256");
14032
- const stream = fs$1.createReadStream(filePath);
14033
- stream.on("data", (data) => hash.update(data));
14034
- stream.on("end", () => resolve(hash.digest("hex")));
14035
- stream.on("error", (err) => reject(err));
14036
- });
14037
- }
14038
-
14039
- //#endregion
14040
- //#region src/core/extract-runner.ts
14199
+ //#region src/core/pdf-converter/orchestrator.ts
14041
14200
  const FILE_PART_EXTENSIONS = new Set([
14042
14201
  "png",
14043
14202
  "jpg",
@@ -14047,6 +14206,51 @@ const FILE_PART_EXTENSIONS = new Set([
14047
14206
  "bmp",
14048
14207
  "svg"
14049
14208
  ]);
14209
+ const PDF_EXT_RE = /\.pdf$/i;
14210
+ async function readExtractFileInput(filePath, aiConfig, modelOverride) {
14211
+ const stat = fs$1.statSync(filePath);
14212
+ if (stat.size > MAX_UPLOAD_SIZE) throw new Error(t("errors.file.sizeExceeded", {
14213
+ size: bytesToMB(stat.size).toFixed(1),
14214
+ limit: MAX_UPLOAD_SIZE_TEXT,
14215
+ file: filePath
14216
+ }));
14217
+ const ext = path.extname(filePath).toLowerCase().replace(".", "");
14218
+ if (FILE_PART_EXTENSIONS.has(ext)) {
14219
+ if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
14220
+ const result = await recognizeImageText(filePath, aiConfig?.image);
14221
+ consola.info(t("extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
14222
+ return { text: result.text };
14223
+ }
14224
+ return {
14225
+ text: "",
14226
+ filePath
14227
+ };
14228
+ }
14229
+ if (ext === "pdf") {
14230
+ const buffer = await fs.readFile(filePath);
14231
+ const converter = createPdfConverter(aiConfig?.pdf);
14232
+ const result = await converter.convert(buffer, filePath);
14233
+ if (result.metadata?.fallback === "true") consola.info(t("extract.file.pdfFallback", { count: result.pageCount }));
14234
+ else consola.info(t("extract.file.pdfConverted", {
14235
+ name: converter.name,
14236
+ count: result.pageCount
14237
+ }));
14238
+ const mdPath = filePath.replace(PDF_EXT_RE, ".md");
14239
+ try {
14240
+ await fs.writeFile(mdPath, result.text);
14241
+ consola.info(t("extract.file.markdownSaved", { path: mdPath }));
14242
+ } catch {
14243
+ const fallbackMd = path.join(os.tmpdir(), `${path.basename(filePath, ".pdf")}.md`);
14244
+ await fs.writeFile(fallbackMd, result.text);
14245
+ consola.info(t("extract.file.markdownSaved", { path: fallbackMd }));
14246
+ }
14247
+ return { text: result.text };
14248
+ }
14249
+ return { text: await fs.readFile(filePath, "utf-8") };
14250
+ }
14251
+
14252
+ //#endregion
14253
+ //#region src/core/batch/batch-processor.ts
14050
14254
  const SUPPORTED_EXTENSIONS$1 = new Set([
14051
14255
  ...FILE_PART_EXTENSIONS,
14052
14256
  "pdf",
@@ -14059,20 +14263,89 @@ const SUPPORTED_EXTENSIONS$1 = new Set([
14059
14263
  "yaml",
14060
14264
  "yml"
14061
14265
  ]);
14062
- const PDF_EXT_RE = /\.pdf$/i;
14063
- const JSON_EXT_RE$1 = /\.json$/;
14064
14266
  const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS$1].join(",")}}`;
14065
- async function syncResultToNotion(aiConfig, schemaName, data) {
14066
- if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error(t("errors.ai.extractionNotObject"));
14067
- const page = await writeNotionPage(aiConfig.notion, schemaName, data);
14068
- return [{
14069
- databaseId: page.databaseId,
14070
- pageId: page.pageId
14071
- }];
14267
+ function listSupportedFiles(dir, pattern) {
14268
+ if (!fs$1.statSync(dir).isDirectory()) throw new Error(t("errors.file.notADirectory", { dir }));
14269
+ return globSync(pattern ?? SUPPORTED_FILE_PATTERN, {
14270
+ cwd: dir,
14271
+ absolute: true,
14272
+ onlyFiles: true
14273
+ }).filter((file) => {
14274
+ const ext = path.extname(file).toLowerCase().replace(".", "");
14275
+ return SUPPORTED_EXTENSIONS$1.has(ext);
14276
+ }).sort();
14072
14277
  }
14073
- function shouldSyncNotion(aiConfig, schemaName) {
14074
- return !!aiConfig.notion?.enabled && !!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim();
14278
+ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
14279
+ const result = await runAuditedExtraction({
14280
+ aiexDir,
14281
+ config,
14282
+ aiConfig,
14283
+ schemaName,
14284
+ source: {
14285
+ type: "file",
14286
+ filePath
14287
+ },
14288
+ modelOverride,
14289
+ insert: options?.insert,
14290
+ force: options?.force,
14291
+ quiet: false
14292
+ });
14293
+ if (result.success) {
14294
+ if (!result.skipped) consola.success(t("extract.file.processSuccess", { file: path.basename(filePath) }));
14295
+ return true;
14296
+ }
14297
+ return false;
14298
+ }
14299
+ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
14300
+ consola.info(t("extract.batch.scanning", { dir: pc.cyan(dir) }));
14301
+ let files;
14302
+ try {
14303
+ files = listSupportedFiles(dir, globPattern);
14304
+ } catch {
14305
+ return {
14306
+ ok: false,
14307
+ successCount: 0,
14308
+ failCount: 0,
14309
+ error: t("extract.batch.errors.cannotReadDir", { dir })
14310
+ };
14311
+ }
14312
+ if (files.length === 0) return {
14313
+ ok: false,
14314
+ successCount: 0,
14315
+ failCount: 0,
14316
+ error: t("extract.batch.errors.noSupportedFiles", { dir })
14317
+ };
14318
+ consola.info(t("extract.batch.found", { count: files.length }));
14319
+ let successCount = 0;
14320
+ let failCount = 0;
14321
+ for (let i = 0; i < files.length; i++) {
14322
+ const file = files[i];
14323
+ consola.info(`\n${t("extract.batch.processing", {
14324
+ current: i + 1,
14325
+ total: files.length,
14326
+ file: pc.cyan(path.basename(file))
14327
+ })}`);
14328
+ if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, {
14329
+ insert: options?.insert,
14330
+ force: options?.force
14331
+ })) successCount++;
14332
+ else failCount++;
14333
+ }
14334
+ consola.info(`\n${t("extract.batch.complete", {
14335
+ success: pc.green(successCount),
14336
+ fail: pc.red(failCount),
14337
+ total: files.length
14338
+ })}`);
14339
+ return {
14340
+ ok: true,
14341
+ successCount,
14342
+ failCount
14343
+ };
14075
14344
  }
14345
+
14346
+ //#endregion
14347
+ //#region src/core/extract-runner.ts
14348
+ const JSON_EXT_RE$1 = /\.json$/;
14076
14349
  async function ensureDatabaseReady(dbPath, schema) {
14077
14350
  try {
14078
14351
  await fs.access(dbPath);
@@ -14098,17 +14371,6 @@ async function ensureDatabaseReady(dbPath, schema) {
14098
14371
  }
14099
14372
  return null;
14100
14373
  }
14101
- function listSupportedFiles(dir, pattern) {
14102
- if (!fs$1.statSync(dir).isDirectory()) throw new Error(t("errors.file.notADirectory", { dir }));
14103
- return globSync(pattern ?? SUPPORTED_FILE_PATTERN, {
14104
- cwd: dir,
14105
- absolute: true,
14106
- onlyFiles: true
14107
- }).filter((file) => {
14108
- const ext = path.extname(file).toLowerCase().replace(".", "");
14109
- return SUPPORTED_EXTENSIONS$1.has(ext);
14110
- }).sort();
14111
- }
14112
14374
  async function loadSchema(config, schemaName) {
14113
14375
  const schemaPath = path.join(config.schemaPath, `${schemaName}.json`);
14114
14376
  try {
@@ -14122,68 +14384,27 @@ async function loadSchema(config, schemaName) {
14122
14384
  issues: e.issues.map((i) => ` - ${i.path.join(".")}: ${i.message}`).join("\n")
14123
14385
  })
14124
14386
  };
14125
- if (e.code === "ENOENT") return {
14126
- schema: null,
14127
- error: t("errors.schema.cannotRead", { name: `${schemaName}.json` })
14128
- };
14129
- if (e instanceof SyntaxError) return {
14130
- schema: null,
14131
- error: t("errors.schema.invalidJson", { name: `${schemaName}.json` })
14132
- };
14133
- return {
14134
- schema: null,
14135
- error: String(e)
14136
- };
14137
- }
14138
- }
14139
- async function listSchemas(aiexDir) {
14140
- try {
14141
- const dir = path.join(aiexDir, "schema");
14142
- return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
14143
- } catch {
14144
- return [];
14145
- }
14146
- }
14147
- async function readExtractFileInput(filePath, aiConfig, modelOverride) {
14148
- const stat = fs$1.statSync(filePath);
14149
- if (stat.size > MAX_UPLOAD_SIZE) throw new Error(t("errors.file.sizeExceeded", {
14150
- size: bytesToMB(stat.size).toFixed(1),
14151
- limit: MAX_UPLOAD_SIZE_TEXT,
14152
- file: filePath
14153
- }));
14154
- const ext = path.extname(filePath).toLowerCase().replace(".", "");
14155
- if (FILE_PART_EXTENSIONS.has(ext)) {
14156
- if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
14157
- const result = await recognizeImageText(filePath, aiConfig?.image);
14158
- consola.info(t("extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
14159
- return { text: result.text };
14160
- }
14161
- return {
14162
- text: "",
14163
- filePath
14164
- };
14165
- }
14166
- if (ext === "pdf") {
14167
- const buffer = await fs.readFile(filePath);
14168
- const converter = createPdfConverter(aiConfig?.pdf);
14169
- const result = await converter.convert(buffer, filePath);
14170
- if (result.metadata?.fallback === "true") consola.info(t("extract.file.pdfFallback", { count: result.pageCount }));
14171
- else consola.info(t("extract.file.pdfConverted", {
14172
- name: converter.name,
14173
- count: result.pageCount
14174
- }));
14175
- const mdPath = filePath.replace(PDF_EXT_RE, ".md");
14176
- try {
14177
- await fs.writeFile(mdPath, result.text);
14178
- consola.info(t("extract.file.markdownSaved", { path: mdPath }));
14179
- } catch {
14180
- const fallbackMd = path.join(os.tmpdir(), `${path.basename(filePath, ".pdf")}.md`);
14181
- await fs.writeFile(fallbackMd, result.text);
14182
- consola.info(t("extract.file.markdownSaved", { path: fallbackMd }));
14183
- }
14184
- return { text: result.text };
14387
+ if (e.code === "ENOENT") return {
14388
+ schema: null,
14389
+ error: t("errors.schema.cannotRead", { name: `${schemaName}.json` })
14390
+ };
14391
+ if (e instanceof SyntaxError) return {
14392
+ schema: null,
14393
+ error: t("errors.schema.invalidJson", { name: `${schemaName}.json` })
14394
+ };
14395
+ return {
14396
+ schema: null,
14397
+ error: String(e)
14398
+ };
14399
+ }
14400
+ }
14401
+ async function listSchemas(aiexDir) {
14402
+ try {
14403
+ const dir = path.join(aiexDir, "schema");
14404
+ return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
14405
+ } catch {
14406
+ return [];
14185
14407
  }
14186
- return { text: await fs.readFile(filePath, "utf-8") };
14187
14408
  }
14188
14409
  async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, options) {
14189
14410
  const schemaLoad = await loadSchema(config, schemaName);
@@ -14367,6 +14588,7 @@ async function runAuditedExtraction(options) {
14367
14588
  error: error instanceof Error ? error.message : String(error)
14368
14589
  });
14369
14590
  if (!quiet) consola.error(t("extract.file.notionSyncFail", { error: error instanceof Error ? error.message : String(error) }));
14591
+ await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, r.data, error instanceof Error ? error.message : String(error), r.tokensUsed, quiet);
14370
14592
  return {
14371
14593
  success: false,
14372
14594
  error: error instanceof Error ? error.message : String(error),
@@ -14382,6 +14604,7 @@ async function runAuditedExtraction(options) {
14382
14604
  notionPages,
14383
14605
  tokensUsed: r.tokensUsed
14384
14606
  });
14607
+ await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.success", source, r.data, void 0, r.tokensUsed, quiet);
14385
14608
  return {
14386
14609
  success: true,
14387
14610
  outputPath: updated.outputPath,
@@ -14398,6 +14621,7 @@ async function runAuditedExtraction(options) {
14398
14621
  error: r.error || "Extraction failed"
14399
14622
  });
14400
14623
  if (!quiet) consola.error(t("extract.file.extractionFailed", { error: r.error }));
14624
+ await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, r.error || "Extraction failed", void 0, quiet);
14401
14625
  return {
14402
14626
  success: false,
14403
14627
  error: r.error,
@@ -14417,6 +14641,7 @@ async function runAuditedExtraction(options) {
14417
14641
  error: e instanceof Error ? e.message : String(e)
14418
14642
  }));
14419
14643
  }
14644
+ await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, e instanceof Error ? e.message : String(e), void 0, quiet);
14420
14645
  return {
14421
14646
  success: false,
14422
14647
  error: e instanceof Error ? e.message : String(e),
@@ -14425,73 +14650,6 @@ async function runAuditedExtraction(options) {
14425
14650
  };
14426
14651
  }
14427
14652
  }
14428
- async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
14429
- const result = await runAuditedExtraction({
14430
- aiexDir,
14431
- config,
14432
- aiConfig,
14433
- schemaName,
14434
- source: {
14435
- type: "file",
14436
- filePath
14437
- },
14438
- modelOverride,
14439
- insert: options?.insert,
14440
- force: options?.force,
14441
- quiet: false
14442
- });
14443
- if (result.success) {
14444
- if (!result.skipped) consola.success(t("extract.file.processSuccess", { file: path.basename(filePath) }));
14445
- return true;
14446
- }
14447
- return false;
14448
- }
14449
- async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
14450
- consola.info(t("extract.batch.scanning", { dir: pc.cyan(dir) }));
14451
- let files;
14452
- try {
14453
- files = listSupportedFiles(dir, globPattern);
14454
- } catch {
14455
- return {
14456
- ok: false,
14457
- successCount: 0,
14458
- failCount: 0,
14459
- error: t("extract.batch.errors.cannotReadDir", { dir })
14460
- };
14461
- }
14462
- if (files.length === 0) return {
14463
- ok: false,
14464
- successCount: 0,
14465
- failCount: 0,
14466
- error: t("extract.batch.errors.noSupportedFiles", { dir })
14467
- };
14468
- consola.info(t("extract.batch.found", { count: files.length }));
14469
- let successCount = 0;
14470
- let failCount = 0;
14471
- for (let i = 0; i < files.length; i++) {
14472
- const file = files[i];
14473
- consola.info(`\n${t("extract.batch.processing", {
14474
- current: i + 1,
14475
- total: files.length,
14476
- file: pc.cyan(path.basename(file))
14477
- })}`);
14478
- if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, {
14479
- insert: options?.insert,
14480
- force: options?.force
14481
- })) successCount++;
14482
- else failCount++;
14483
- }
14484
- consola.info(`\n${t("extract.batch.complete", {
14485
- success: pc.green(successCount),
14486
- fail: pc.red(failCount),
14487
- total: files.length
14488
- })}`);
14489
- return {
14490
- ok: true,
14491
- successCount,
14492
- failCount
14493
- };
14494
- }
14495
14653
 
14496
14654
  //#endregion
14497
14655
  //#region src/commands/dump.ts
@@ -14620,49 +14778,25 @@ const dumpCommand = defineCommand({
14620
14778
  } else s.stop(t("command.dump.loaded", { count: rows.length }));
14621
14779
  const s2 = spinner();
14622
14780
  s2.start(t("command.dump.formatting"));
14623
- const formattedRows = rows.map((row) => {
14624
- const newRow = {};
14625
- columns.forEach((col) => {
14626
- const colName = col.name;
14627
- const val = row[colName];
14628
- const type = (schema?.properties?.[colName])?.type || "";
14629
- if (val === null || val === void 0) newRow[colName] = "";
14630
- else if (type === "boolean") if (format === "xlsx") newRow[colName] = val === 1 || val === "1" || val === true;
14631
- else newRow[colName] = val === 1 || val === "1" || val === true ? "true" : "false";
14632
- else if (type === "number" || type === "integer") if (val === "") newRow[colName] = "";
14633
- else {
14634
- const num = Number(val);
14635
- newRow[colName] = Number.isNaN(num) ? val : num;
14636
- }
14637
- else if (typeof val === "object") newRow[colName] = JSON.stringify(val);
14638
- else {
14639
- const dbType = (col.type || "").toLowerCase();
14640
- if ((dbType.includes("int") || dbType.includes("real") || dbType.includes("num") || dbType.includes("double") || dbType.includes("float")) && typeof val === "string" && val !== "") {
14641
- const num = Number(val);
14642
- newRow[colName] = Number.isNaN(num) ? val : num;
14643
- } else newRow[colName] = val;
14644
- }
14645
- });
14646
- return newRow;
14647
- });
14648
- s2.stop(t("command.dump.formatted"));
14781
+ let formattedRows;
14782
+ try {
14783
+ formattedRows = formatRowsConformingToSchema(rows, columns, schema, format);
14784
+ s2.stop(t("command.dump.formatted"));
14785
+ } catch (error) {
14786
+ s2.stop(t("command.dump.dbQueryFailed"));
14787
+ failCommand(error instanceof Error ? error.message : String(error));
14788
+ return;
14789
+ }
14649
14790
  const s3 = spinner();
14650
14791
  s3.start(t("command.dump.writing", {
14651
14792
  format: format.toUpperCase(),
14652
14793
  path: resolvedOutput
14653
14794
  }));
14654
14795
  try {
14655
- const ws = XLSX.utils.json_to_sheet(formattedRows, { header: columns.map((col) => col.name) });
14796
+ const buffer = generateExportBuffer(tableName, formattedRows, columns, format);
14656
14797
  const outputDir = path.dirname(resolvedOutput);
14657
14798
  if (!fs$1.existsSync(outputDir)) fs$1.mkdirSync(outputDir, { recursive: true });
14658
- if (format === "xlsx") {
14659
- const wb = XLSX.utils.book_new();
14660
- XLSX.utils.book_append_sheet(wb, ws, tableName.slice(0, 31));
14661
- XLSX.writeFile(wb, resolvedOutput);
14662
- } else {
14663
- const csv = XLSX.utils.sheet_to_csv(ws);
14664
- fs$1.writeFileSync(resolvedOutput, "" + csv, "utf8");
14665
- }
14799
+ fs$1.writeFileSync(resolvedOutput, buffer);
14666
14800
  s3.stop(t("command.dump.dumpCompleted"));
14667
14801
  consola.success(t("command.dump.successMsg", {
14668
14802
  count: rows.length,
@@ -15584,26 +15718,17 @@ function aiRoutes(config) {
15584
15718
  }
15585
15719
 
15586
15720
  //#endregion
15587
- //#region src/server/routes/data.ts
15721
+ //#region src/core/data-service.ts
15588
15722
  const FILE_REGEX = /\.json$/;
15589
15723
  const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
15590
15724
  const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
15591
15725
  const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
15592
15726
  const TIMESTAMP_TZ = /(\d{3})Z/;
15593
- const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
15594
- const extractionFileParamSchema = z.object({ name: z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes("..")) });
15595
- const tableQuerySchema = z.object({
15596
- page: z.coerce.number().int().min(1).catch(1),
15597
- pageSize: z.coerce.number().int().min(1).max(500).catch(50),
15598
- search: z.string().catch(""),
15599
- sortField: z.string().optional(),
15600
- sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc")),
15601
- all: z.preprocess((value) => value === "true" || value === true, z.boolean().catch(false))
15602
- });
15603
- function invalidParamResponse$1(message) {
15604
- return (result, c) => {
15605
- if (!result.success) return c.json({ error: message }, 400);
15606
- };
15727
+ function schemaNameFromExtractionFile(name$1) {
15728
+ const stem = name$1.replace(FILE_REGEX, "");
15729
+ const match = stem.match(EXTRACTION_TIMESTAMP_RE);
15730
+ if (!match || typeof match.index !== "number" || match.index <= 0) return null;
15731
+ return stem.slice(0, match.index);
15607
15732
  }
15608
15733
  function getAuditNotionStatus(record) {
15609
15734
  if (record.notionPages?.length) return "synced";
@@ -15630,50 +15755,233 @@ async function getRowExtractionActions(aiexDir, tableName) {
15630
15755
  }
15631
15756
  return actions;
15632
15757
  }
15633
- function schemaNameFromExtractionFile(name$1) {
15634
- const stem = name$1.replace(FILE_REGEX, "");
15635
- const match = stem.match(EXTRACTION_TIMESTAMP_RE);
15636
- if (!match || typeof match.index !== "number" || match.index <= 0) return null;
15637
- return stem.slice(0, match.index);
15638
- }
15639
15758
  function createReadonlyQueryDb(databasePath) {
15640
15759
  return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
15641
15760
  }
15761
+ async function listExtractions(config) {
15762
+ const aiexDir = path.dirname(config.schemaPath);
15763
+ const extractedDir = path.join(aiexDir, "extracted");
15764
+ await fs.mkdir(extractedDir, { recursive: true });
15765
+ const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
15766
+ const auditRecords = await listExtractionAuditRecords(aiexDir);
15767
+ const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
15768
+ const records = [];
15769
+ for (const file of jsonFiles) {
15770
+ const schemaName = schemaNameFromExtractionFile(file);
15771
+ if (!schemaName) continue;
15772
+ const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
15773
+ const filePath = path.join(extractedDir, file);
15774
+ try {
15775
+ const stat = await fs.stat(filePath);
15776
+ const audit = auditByOutputName.get(file);
15777
+ const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
15778
+ records.push({
15779
+ name: file,
15780
+ schemaName,
15781
+ timestamp,
15782
+ fileSize: stat.size,
15783
+ modifiedAt: stat.mtime.toISOString(),
15784
+ notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
15785
+ notionPages,
15786
+ notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
15787
+ });
15788
+ } catch {
15789
+ continue;
15790
+ }
15791
+ }
15792
+ records.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
15793
+ return records;
15794
+ }
15795
+ async function listTables(config) {
15796
+ const schemaDir = config.schemaPath;
15797
+ let schemaFiles = [];
15798
+ try {
15799
+ schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
15800
+ } catch {
15801
+ schemaFiles = [];
15802
+ }
15803
+ let db = null;
15804
+ let dbTables = [];
15805
+ try {
15806
+ db = createReadonlyQueryDb(config.databasePath);
15807
+ dbTables = (await sql`
15808
+ select name
15809
+ from sqlite_master
15810
+ where type = 'table' and name not like 'sqlite_%' and name not like '_%'
15811
+ order by name
15812
+ `.execute(db)).rows.map((row) => row.name);
15813
+ } catch {} finally {
15814
+ await db?.destroy();
15815
+ }
15816
+ const tables = [];
15817
+ for (const file of schemaFiles) try {
15818
+ const schema = await readFile(path.join(schemaDir, file));
15819
+ const tableName = schema.table?.name;
15820
+ if (!tableName) continue;
15821
+ tables.push({
15822
+ name: tableName,
15823
+ title: schema.title || tableName,
15824
+ hasData: dbTables.includes(tableName)
15825
+ });
15826
+ } catch {
15827
+ continue;
15828
+ }
15829
+ return tables;
15830
+ }
15831
+ async function getTableData(config, tableName, query) {
15832
+ const { page, pageSize, search, sortField, sortOrder, all } = query;
15833
+ const aiexDir = path.dirname(config.schemaPath);
15834
+ let db;
15835
+ try {
15836
+ db = createReadonlyQueryDb(config.databasePath);
15837
+ } catch {
15838
+ throw new Error(t("server.dbNotFound"));
15839
+ }
15840
+ try {
15841
+ if ((await sql`
15842
+ select name
15843
+ from sqlite_master
15844
+ where type = 'table' and name = ${tableName}
15845
+ `.execute(db)).rows.length === 0) throw new Error(t("server.tableNotFound", { name: tableName }));
15846
+ const columns = (await sql`
15847
+ pragma table_info(${sql.table(tableName)})
15848
+ `.execute(db)).rows.map((col) => ({
15849
+ name: col.name,
15850
+ type: col.type,
15851
+ notNull: !!col.notnull,
15852
+ pk: !!col.pk
15853
+ }));
15854
+ const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
15855
+ const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
15856
+ const sortColumn = columns.find((col) => col.name === sortField);
15857
+ const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
15858
+ const total = (await sql`
15859
+ select count(*) as count
15860
+ from ${sql.table(tableName)}
15861
+ ${searchCondition}
15862
+ `.execute(db)).rows[0]?.count ?? 0;
15863
+ const offset = (page - 1) * pageSize;
15864
+ const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
15865
+ const result = all ? await sql`
15866
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15867
+ from ${sql.table(tableName)}
15868
+ ${searchCondition}
15869
+ ${orderBy}
15870
+ `.execute(db) : await sql`
15871
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15872
+ from ${sql.table(tableName)}
15873
+ ${searchCondition}
15874
+ ${orderBy}
15875
+ limit ${pageSize}
15876
+ offset ${offset}
15877
+ `.execute(db);
15878
+ const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
15879
+ const rowActions = Object.fromEntries(result.rows.map((row, index) => {
15880
+ const rowId = row[INTERNAL_ROWID_COLUMN];
15881
+ const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
15882
+ return action ? [String(index), action] : null;
15883
+ }).filter((entry) => !!entry));
15884
+ const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
15885
+ const schemaDir = config.schemaPath;
15886
+ let schema = null;
15887
+ try {
15888
+ const schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
15889
+ for (const file of schemaFiles) {
15890
+ const s = await readFile(path.join(schemaDir, file));
15891
+ if (s.table?.name === tableName) {
15892
+ schema = s;
15893
+ break;
15894
+ }
15895
+ }
15896
+ } catch {}
15897
+ return {
15898
+ columns,
15899
+ rows,
15900
+ rowActions,
15901
+ total,
15902
+ page: all ? 1 : page,
15903
+ pageSize: all ? total : pageSize,
15904
+ totalPages,
15905
+ schema
15906
+ };
15907
+ } finally {
15908
+ await db.destroy();
15909
+ }
15910
+ }
15911
+ async function retryNotionSync(config, fileName) {
15912
+ const aiexDir = path.dirname(config.schemaPath);
15913
+ const extractedDir = path.join(aiexDir, "extracted");
15914
+ const filePath = path.join(extractedDir, fileName);
15915
+ const schemaName = schemaNameFromExtractionFile(fileName);
15916
+ if (!schemaName) throw new Error(t("server.cannotInferSchema"));
15917
+ const aiConfig = await readAIConfig(aiexDir);
15918
+ if (!aiConfig?.notion?.enabled) throw new Error(t("errors.notion.notEnabled"));
15919
+ if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) throw new Error(t("errors.notion.noSchemaConfig", { name: schemaName }));
15920
+ try {
15921
+ const data = await readFile(filePath);
15922
+ if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error(t("errors.ai.extractionNotObject"));
15923
+ const page = await writeNotionPage(aiConfig.notion, schemaName, data);
15924
+ const notionPages = [{
15925
+ databaseId: page.databaseId,
15926
+ pageId: page.pageId
15927
+ }];
15928
+ let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === fileName);
15929
+ if (!record) record = await createExtractionAuditRecord(aiexDir, {
15930
+ schemaName,
15931
+ source: {
15932
+ type: "file",
15933
+ filePath,
15934
+ fileName
15935
+ }
15936
+ });
15937
+ if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15938
+ status: "succeeded",
15939
+ outputPath: filePath,
15940
+ outputName: fileName,
15941
+ notionPages,
15942
+ error: void 0
15943
+ });
15944
+ return {
15945
+ success: true,
15946
+ notionPages
15947
+ };
15948
+ } catch (error) {
15949
+ const message = error instanceof Error ? error.message : String(error);
15950
+ const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === fileName);
15951
+ if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15952
+ status: "failed",
15953
+ outputPath: filePath,
15954
+ outputName: fileName,
15955
+ error: message
15956
+ });
15957
+ throw error;
15958
+ }
15959
+ }
15960
+
15961
+ //#endregion
15962
+ //#region src/server/routes/data.ts
15963
+ const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
15964
+ const extractionFileParamSchema = z.object({ name: z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes("..")) });
15965
+ const tableQuerySchema = z.object({
15966
+ page: z.coerce.number().int().min(1).catch(1),
15967
+ pageSize: z.coerce.number().int().min(1).max(500).catch(50),
15968
+ search: z.string().catch(""),
15969
+ sortField: z.string().optional(),
15970
+ sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc")),
15971
+ all: z.preprocess((value) => value === "true" || value === true, z.boolean().catch(false))
15972
+ });
15973
+ function invalidParamResponse$1(message) {
15974
+ return (result, c) => {
15975
+ if (!result.success) return c.json({ error: message }, 400);
15976
+ };
15977
+ }
15642
15978
  function dataRoutes(config) {
15643
15979
  const app = new Hono();
15644
15980
  const aiexDir = path.dirname(config.schemaPath);
15645
15981
  const extractedDir = path.join(aiexDir, "extracted");
15646
15982
  app.get("/data", async (c) => {
15647
15983
  try {
15648
- await fs.mkdir(extractedDir, { recursive: true });
15649
- const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
15650
- const auditRecords = await listExtractionAuditRecords(aiexDir);
15651
- const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
15652
- const records = [];
15653
- for (const file of jsonFiles) {
15654
- const schemaName = schemaNameFromExtractionFile(file);
15655
- if (!schemaName) continue;
15656
- const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
15657
- const filePath = path.join(extractedDir, file);
15658
- try {
15659
- const stat = await fs.stat(filePath);
15660
- const audit = auditByOutputName.get(file);
15661
- const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
15662
- records.push({
15663
- name: file,
15664
- schemaName,
15665
- timestamp,
15666
- fileSize: stat.size,
15667
- modifiedAt: stat.mtime.toISOString(),
15668
- notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
15669
- notionPages,
15670
- notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
15671
- });
15672
- } catch {
15673
- continue;
15674
- }
15675
- }
15676
- records.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
15984
+ const records = await listExtractions(config);
15677
15985
  return c.json(records);
15678
15986
  } catch (error) {
15679
15987
  return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
@@ -15681,39 +15989,7 @@ function dataRoutes(config) {
15681
15989
  });
15682
15990
  app.get("/data/tables", async (c) => {
15683
15991
  try {
15684
- const schemaDir = config.schemaPath;
15685
- let schemaFiles = [];
15686
- try {
15687
- schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
15688
- } catch {
15689
- schemaFiles = [];
15690
- }
15691
- let db = null;
15692
- let dbTables = [];
15693
- try {
15694
- db = createReadonlyQueryDb(config.databasePath);
15695
- dbTables = (await sql`
15696
- select name
15697
- from sqlite_master
15698
- where type = 'table' and name not like 'sqlite_%' and name not like '_%'
15699
- order by name
15700
- `.execute(db)).rows.map((row) => row.name);
15701
- } catch {} finally {
15702
- await db?.destroy();
15703
- }
15704
- const tables = [];
15705
- for (const file of schemaFiles) try {
15706
- const schema = await readFile(path.join(schemaDir, file));
15707
- const tableName = schema.table?.name;
15708
- if (!tableName) continue;
15709
- tables.push({
15710
- name: tableName,
15711
- title: schema.title || tableName,
15712
- hasData: dbTables.includes(tableName)
15713
- });
15714
- } catch {
15715
- continue;
15716
- }
15992
+ const tables = await listTables(config);
15717
15993
  return c.json(tables);
15718
15994
  } catch (error) {
15719
15995
  return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
@@ -15721,84 +15997,14 @@ function dataRoutes(config) {
15721
15997
  });
15722
15998
  app.get("/data/tables/:name", zValidator("param", tableParamSchema, invalidParamResponse$1(t("server.invalidTableName"))), zValidator("query", tableQuerySchema), async (c) => {
15723
15999
  const { name: tableName } = c.req.valid("param");
15724
- const { page, pageSize, search, sortField, sortOrder, all } = c.req.valid("query");
15725
- let db;
16000
+ const query = c.req.valid("query");
15726
16001
  try {
15727
- db = createReadonlyQueryDb(config.databasePath);
15728
- } catch {
15729
- return c.json({ error: t("server.dbNotFound") }, 400);
15730
- }
15731
- try {
15732
- if ((await sql`
15733
- select name
15734
- from sqlite_master
15735
- where type = 'table' and name = ${tableName}
15736
- `.execute(db)).rows.length === 0) return c.json({ error: t("server.tableNotFound", { name: tableName }) }, 404);
15737
- const columns = (await sql`
15738
- pragma table_info(${sql.table(tableName)})
15739
- `.execute(db)).rows.map((col) => ({
15740
- name: col.name,
15741
- type: col.type,
15742
- notNull: !!col.notnull,
15743
- pk: !!col.pk
15744
- }));
15745
- const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
15746
- const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
15747
- const sortColumn = columns.find((col) => col.name === sortField);
15748
- const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
15749
- const total = (await sql`
15750
- select count(*) as count
15751
- from ${sql.table(tableName)}
15752
- ${searchCondition}
15753
- `.execute(db)).rows[0]?.count ?? 0;
15754
- const offset = (page - 1) * pageSize;
15755
- const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
15756
- const result = all ? await sql`
15757
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15758
- from ${sql.table(tableName)}
15759
- ${searchCondition}
15760
- ${orderBy}
15761
- `.execute(db) : await sql`
15762
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15763
- from ${sql.table(tableName)}
15764
- ${searchCondition}
15765
- ${orderBy}
15766
- limit ${pageSize}
15767
- offset ${offset}
15768
- `.execute(db);
15769
- const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
15770
- const rowActions = Object.fromEntries(result.rows.map((row, index) => {
15771
- const rowId = row[INTERNAL_ROWID_COLUMN];
15772
- const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
15773
- return action ? [String(index), action] : null;
15774
- }).filter((entry) => !!entry));
15775
- const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
15776
- const schemaDir = config.schemaPath;
15777
- let schema = null;
15778
- try {
15779
- const schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
15780
- for (const file of schemaFiles) {
15781
- const s = await readFile(path.join(schemaDir, file));
15782
- if (s.table?.name === tableName) {
15783
- schema = s;
15784
- break;
15785
- }
15786
- }
15787
- } catch {}
15788
- return c.json({
15789
- columns,
15790
- rows,
15791
- rowActions,
15792
- total,
15793
- page: all ? 1 : page,
15794
- pageSize: all ? total : pageSize,
15795
- totalPages,
15796
- schema
15797
- });
16002
+ const result = await getTableData(config, tableName, query);
16003
+ return c.json(result);
15798
16004
  } catch (error) {
15799
- return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
15800
- } finally {
15801
- await db.destroy();
16005
+ const errMessage = error instanceof Error ? error.message : String(error);
16006
+ const status = errMessage.includes("not found") ? 404 : 500;
16007
+ return c.json({ error: errMessage }, status);
15802
16008
  }
15803
16009
  });
15804
16010
  app.get("/data/:name", zValidator("param", extractionFileParamSchema, invalidParamResponse$1(t("server.invalidFileName"))), async (c) => {
@@ -15817,61 +16023,15 @@ function dataRoutes(config) {
15817
16023
  });
15818
16024
  app.post("/data/:name/notion/retry", zValidator("param", extractionFileParamSchema, invalidParamResponse$1(t("server.invalidFileName"))), async (c) => {
15819
16025
  const { name: name$1 } = c.req.valid("param");
15820
- const filePath = path.join(extractedDir, name$1);
15821
- const schemaName = schemaNameFromExtractionFile(name$1);
15822
- if (!schemaName) return c.json({
16026
+ if (!schemaNameFromExtractionFile(name$1)) return c.json({
15823
16027
  success: false,
15824
16028
  error: t("server.cannotInferSchema")
15825
16029
  }, 400);
15826
- const aiConfig = await readAIConfig(aiexDir);
15827
- if (!aiConfig?.notion?.enabled) return c.json({
15828
- success: false,
15829
- error: t("errors.notion.notEnabled")
15830
- }, 400);
15831
- if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) return c.json({
15832
- success: false,
15833
- error: t("errors.notion.noSchemaConfig", { name: schemaName })
15834
- }, 400);
15835
16030
  try {
15836
- const data = await readFile(filePath);
15837
- if (!data || typeof data !== "object" || Array.isArray(data)) return c.json({
15838
- success: false,
15839
- error: t("errors.ai.extractionNotObject")
15840
- }, 400);
15841
- const page = await writeNotionPage(aiConfig.notion, schemaName, data);
15842
- const notionPages = [{
15843
- databaseId: page.databaseId,
15844
- pageId: page.pageId
15845
- }];
15846
- let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15847
- if (!record) record = await createExtractionAuditRecord(aiexDir, {
15848
- schemaName,
15849
- source: {
15850
- type: "file",
15851
- filePath,
15852
- fileName: name$1
15853
- }
15854
- });
15855
- if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15856
- status: "succeeded",
15857
- outputPath: filePath,
15858
- outputName: name$1,
15859
- notionPages,
15860
- error: void 0
15861
- });
15862
- return c.json({
15863
- success: true,
15864
- notionPages
15865
- });
16031
+ const result = await retryNotionSync(config, name$1);
16032
+ return c.json(result);
15866
16033
  } catch (error) {
15867
16034
  const message = error instanceof Error ? error.message : String(error);
15868
- const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15869
- if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15870
- status: "failed",
15871
- outputPath: filePath,
15872
- outputName: name$1,
15873
- error: message
15874
- });
15875
16035
  return c.json({
15876
16036
  success: false,
15877
16037
  error: message