aiex-cli 0.0.4-beta.1 → 0.0.4-beta.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +763 -603
- package/dist/{doctor-collector-xRnW5Rj3.mjs → doctor-collector-8fLyh9lK.mjs} +17 -4
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-DfoDfxk9.js +272 -0
- package/dist/web/assets/{index-CGZLSwt2.js → index-sK43vSj1.js} +2 -2
- package/dist/web/index.html +1 -1
- package/dist/{zh-CN-DAlmQ2hb.mjs → zh-CN-B5QVQVm-.mjs} +2 -0
- package/package.json +1 -1
- package/dist/web/assets/AISettings-BmCr8Kj4.js +0 -272
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as formatDoctorDiagnosticsJson, C as seedConfig, D as version, E as package_default, S as createConfig, T as name, _ as DEFAULT_MINERU_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_TEXT, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, k as doctorDiagnosticsTableRows, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_PROMPT_CONFIG, w as description, x as AIConfigSchema, y as PLACEHOLDER_SCHEMA } from "./doctor-collector-
|
|
1
|
+
import { A as formatDoctorDiagnosticsJson, C as seedConfig, D as version, E as package_default, S as createConfig, T as name, _ as DEFAULT_MINERU_CONFIG, a as parseJsonSchema, b as PLACEHOLDER_TEXT, c as recognizeImageText, d as t, f as getDefaultAIConfig, g as DEFAULT_MARKITDOWN_CONFIG, h as DEFAULT_MARKER_CONFIG, i as JsonSchemaDefinitionSchema, k as doctorDiagnosticsTableRows, l as shouldUseImageOcrFallback, m as writeAIConfig, n as createMigrationConfig, o as toSnakeCase, p as readAIConfig, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as initI18n, v as DEFAULT_PROMPT_CONFIG, w as description, x as AIConfigSchema, y as PLACEHOLDER_SCHEMA } from "./doctor-collector-8fLyh9lK.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -15,20 +15,20 @@ import fs$1 from "node:fs";
|
|
|
15
15
|
import { intro, isCancel, outro, select, spinner, text } from "@clack/prompts";
|
|
16
16
|
import Database from "better-sqlite3";
|
|
17
17
|
import pc from "picocolors";
|
|
18
|
+
import { Buffer } from "node:buffer";
|
|
18
19
|
import * as XLSX from "xlsx";
|
|
19
|
-
import { glob, globSync } from "tinyglobby";
|
|
20
20
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
21
|
-
import { LangfuseSpanProcessor } from "@langfuse/otel";
|
|
22
|
-
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
23
21
|
import { APICallError, Output, generateText, jsonSchema } from "ai";
|
|
24
|
-
import mime from "mime";
|
|
25
22
|
import pRetry from "p-retry";
|
|
23
|
+
import mime from "mime";
|
|
26
24
|
import { jsonrepair } from "jsonrepair";
|
|
25
|
+
import { LangfuseSpanProcessor } from "@langfuse/otel";
|
|
26
|
+
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
27
|
+
import crypto from "node:crypto";
|
|
27
28
|
import { Client, extractNotionId } from "@notionhq/client";
|
|
28
|
-
import { Buffer } from "node:buffer";
|
|
29
29
|
import { execa } from "execa";
|
|
30
|
+
import { glob, globSync } from "tinyglobby";
|
|
30
31
|
import { extractText, getDocumentProxy, getMeta } from "unpdf";
|
|
31
|
-
import crypto from "node:crypto";
|
|
32
32
|
import { execFile } from "node:child_process";
|
|
33
33
|
import { promisify } from "node:util";
|
|
34
34
|
import * as chokidar from "chokidar";
|
|
@@ -215,6 +215,50 @@ function failCommand(message) {
|
|
|
215
215
|
process.exitCode = 1;
|
|
216
216
|
}
|
|
217
217
|
|
|
218
|
+
//#endregion
|
|
219
|
+
//#region src/core/export-manager.ts
|
|
220
|
+
function formatRowsConformingToSchema(rows, columns, schema, format) {
|
|
221
|
+
return rows.map((row) => {
|
|
222
|
+
const newRow = {};
|
|
223
|
+
columns.forEach((col) => {
|
|
224
|
+
const colName = col.name;
|
|
225
|
+
const val = row[colName];
|
|
226
|
+
const type = (schema?.properties?.[colName])?.type || "";
|
|
227
|
+
if (val === null || val === void 0) newRow[colName] = "";
|
|
228
|
+
else if (type === "boolean") if (format === "xlsx") newRow[colName] = val === 1 || val === "1" || val === true;
|
|
229
|
+
else newRow[colName] = val === 1 || val === "1" || val === true ? "true" : "false";
|
|
230
|
+
else if (type === "number" || type === "integer") if (val === "") newRow[colName] = "";
|
|
231
|
+
else {
|
|
232
|
+
const num = Number(val);
|
|
233
|
+
newRow[colName] = Number.isNaN(num) ? val : num;
|
|
234
|
+
}
|
|
235
|
+
else if (typeof val === "object") newRow[colName] = JSON.stringify(val);
|
|
236
|
+
else {
|
|
237
|
+
const dbType = (col.type || "").toLowerCase();
|
|
238
|
+
if ((dbType.includes("int") || dbType.includes("real") || dbType.includes("num") || dbType.includes("double") || dbType.includes("float")) && typeof val === "string" && val !== "") {
|
|
239
|
+
const num = Number(val);
|
|
240
|
+
newRow[colName] = Number.isNaN(num) ? val : num;
|
|
241
|
+
} else newRow[colName] = val;
|
|
242
|
+
}
|
|
243
|
+
});
|
|
244
|
+
return newRow;
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
function generateExportBuffer(tableName, formattedRows, columns, format) {
|
|
248
|
+
const ws = XLSX.utils.json_to_sheet(formattedRows, { header: columns.map((col) => col.name) });
|
|
249
|
+
if (format === "xlsx") {
|
|
250
|
+
const wb = XLSX.utils.book_new();
|
|
251
|
+
XLSX.utils.book_append_sheet(wb, ws, tableName.slice(0, 31));
|
|
252
|
+
return XLSX.write(wb, {
|
|
253
|
+
bookType: "xlsx",
|
|
254
|
+
type: "buffer"
|
|
255
|
+
});
|
|
256
|
+
} else {
|
|
257
|
+
const csv = XLSX.utils.sheet_to_csv(ws);
|
|
258
|
+
return Buffer.from("" + csv, "utf8");
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
218
262
|
//#endregion
|
|
219
263
|
//#region src/core/ai-extraction/model-capabilities.json
|
|
220
264
|
var model_capabilities_default = {
|
|
@@ -12814,6 +12858,28 @@ async function withRetry(fn, onRetry, maxRetries = 5) {
|
|
|
12814
12858
|
});
|
|
12815
12859
|
}
|
|
12816
12860
|
|
|
12861
|
+
//#endregion
|
|
12862
|
+
//#region src/core/ai-extraction/file-utils.ts
|
|
12863
|
+
function detectMimeType(filePath) {
|
|
12864
|
+
return mime.getType(filePath) ?? "application/octet-stream";
|
|
12865
|
+
}
|
|
12866
|
+
async function readFilePart(filePath) {
|
|
12867
|
+
const mimeStr = detectMimeType(filePath);
|
|
12868
|
+
const buffer = await fs.readFile(filePath);
|
|
12869
|
+
const name$1 = path.basename(filePath);
|
|
12870
|
+
if (mimeStr.startsWith("image/")) return {
|
|
12871
|
+
type: "image",
|
|
12872
|
+
image: buffer,
|
|
12873
|
+
mimeType: mimeStr
|
|
12874
|
+
};
|
|
12875
|
+
return {
|
|
12876
|
+
type: "file",
|
|
12877
|
+
data: buffer,
|
|
12878
|
+
mediaType: mimeStr,
|
|
12879
|
+
filename: name$1
|
|
12880
|
+
};
|
|
12881
|
+
}
|
|
12882
|
+
|
|
12817
12883
|
//#endregion
|
|
12818
12884
|
//#region src/core/ai-extraction/json-utils.ts
|
|
12819
12885
|
function parseJsonLike(text$1) {
|
|
@@ -12993,7 +13059,34 @@ function generatePromptSnapshot(schema, promptConfig = DEFAULT_PROMPT_CONFIG) {
|
|
|
12993
13059
|
}
|
|
12994
13060
|
|
|
12995
13061
|
//#endregion
|
|
12996
|
-
//#region src/core/ai-extraction/
|
|
13062
|
+
//#region src/core/ai-extraction/snapshot.ts
|
|
13063
|
+
const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
|
|
13064
|
+
const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
|
|
13065
|
+
async function loadPromptSnapshot(aiexDir, tableName) {
|
|
13066
|
+
const snapshotPath = path.join(aiexDir, "extracted", `${tableName}.prompt.md`);
|
|
13067
|
+
try {
|
|
13068
|
+
const content = await fs.readFile(snapshotPath, "utf-8");
|
|
13069
|
+
const systemMatch = content.match(SYSTEM_PROMPT_REGEX);
|
|
13070
|
+
const userMatch = content.match(USER_PROMPT_REGEX);
|
|
13071
|
+
if (systemMatch && userMatch) return {
|
|
13072
|
+
system: systemMatch[1].trim(),
|
|
13073
|
+
user: userMatch[1].trim()
|
|
13074
|
+
};
|
|
13075
|
+
} catch {}
|
|
13076
|
+
return null;
|
|
13077
|
+
}
|
|
13078
|
+
async function savePromptSnapshot(schema, aiexDir) {
|
|
13079
|
+
const content = generatePromptSnapshot(schema, (await readAIConfig(aiexDir))?.prompt ?? DEFAULT_PROMPT_CONFIG);
|
|
13080
|
+
const outputDir = path.join(aiexDir, "extracted");
|
|
13081
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
13082
|
+
const fileName = `${schema.table.name}.prompt.md`;
|
|
13083
|
+
const outputPath = path.join(outputDir, fileName);
|
|
13084
|
+
await fs.writeFile(outputPath, content);
|
|
13085
|
+
return outputPath;
|
|
13086
|
+
}
|
|
13087
|
+
|
|
13088
|
+
//#endregion
|
|
13089
|
+
//#region src/core/ai-extraction/telemetry.ts
|
|
12997
13090
|
let langfuseInitialized = false;
|
|
12998
13091
|
function initLangfuse(config) {
|
|
12999
13092
|
if (!config.langfuse?.publicKey || !config.langfuse.secretKey) return;
|
|
@@ -13010,28 +13103,9 @@ function initLangfuse(config) {
|
|
|
13010
13103
|
console.warn("[Langfuse] Failed to initialize tracing:", e instanceof Error ? e.message : e);
|
|
13011
13104
|
}
|
|
13012
13105
|
}
|
|
13013
|
-
|
|
13014
|
-
|
|
13015
|
-
|
|
13016
|
-
function detectMimeType(filePath) {
|
|
13017
|
-
return mime.getType(filePath) ?? "application/octet-stream";
|
|
13018
|
-
}
|
|
13019
|
-
async function readFilePart(filePath) {
|
|
13020
|
-
const mime$1 = detectMimeType(filePath);
|
|
13021
|
-
const buffer = await fs.readFile(filePath);
|
|
13022
|
-
const name$1 = path.basename(filePath);
|
|
13023
|
-
if (mime$1.startsWith("image/")) return {
|
|
13024
|
-
type: "image",
|
|
13025
|
-
image: buffer,
|
|
13026
|
-
mimeType: mime$1
|
|
13027
|
-
};
|
|
13028
|
-
return {
|
|
13029
|
-
type: "file",
|
|
13030
|
-
data: buffer,
|
|
13031
|
-
mediaType: mime$1,
|
|
13032
|
-
filename: name$1
|
|
13033
|
-
};
|
|
13034
|
-
}
|
|
13106
|
+
|
|
13107
|
+
//#endregion
|
|
13108
|
+
//#region src/core/ai-extraction/validator.ts
|
|
13035
13109
|
function nullableType(type) {
|
|
13036
13110
|
return type === "null" ? ["null"] : [type, "null"];
|
|
13037
13111
|
}
|
|
@@ -13126,19 +13200,10 @@ function validateExtractedData(schema, data) {
|
|
|
13126
13200
|
};
|
|
13127
13201
|
return { success: true };
|
|
13128
13202
|
}
|
|
13129
|
-
|
|
13130
|
-
|
|
13131
|
-
|
|
13132
|
-
|
|
13133
|
-
const systemMatch = content.match(SYSTEM_PROMPT_REGEX);
|
|
13134
|
-
const userMatch = content.match(USER_PROMPT_REGEX);
|
|
13135
|
-
if (systemMatch && userMatch) return {
|
|
13136
|
-
system: systemMatch[1].trim(),
|
|
13137
|
-
user: userMatch[1].trim()
|
|
13138
|
-
};
|
|
13139
|
-
} catch {}
|
|
13140
|
-
return null;
|
|
13141
|
-
}
|
|
13203
|
+
|
|
13204
|
+
//#endregion
|
|
13205
|
+
//#region src/core/ai-extraction/extractor.ts
|
|
13206
|
+
const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
|
|
13142
13207
|
async function extractStructuredData(input) {
|
|
13143
13208
|
const { config, schema, text: text$1, aiexDir, file, modelOverride } = input;
|
|
13144
13209
|
if (!config.provider.apiKey) return {
|
|
@@ -13188,66 +13253,118 @@ async function extractStructuredData(input) {
|
|
|
13188
13253
|
user = generated.user;
|
|
13189
13254
|
}
|
|
13190
13255
|
const outputSchema = jsonSchema(schemaToExtractionOutputSchema(schema));
|
|
13191
|
-
let result;
|
|
13192
13256
|
const timeoutMs = (config.provider.timeout ?? 300) * 1e3;
|
|
13193
|
-
|
|
13194
|
-
|
|
13195
|
-
|
|
13196
|
-
|
|
13197
|
-
|
|
13198
|
-
|
|
13199
|
-
|
|
13200
|
-
|
|
13201
|
-
|
|
13202
|
-
|
|
13203
|
-
|
|
13204
|
-
|
|
13205
|
-
|
|
13206
|
-
|
|
13207
|
-
|
|
13208
|
-
|
|
13209
|
-
|
|
13210
|
-
|
|
13211
|
-
|
|
13212
|
-
|
|
13213
|
-
|
|
13214
|
-
|
|
13215
|
-
|
|
13216
|
-
|
|
13217
|
-
|
|
13218
|
-
|
|
13219
|
-
|
|
13220
|
-
|
|
13221
|
-
|
|
13222
|
-
|
|
13223
|
-
|
|
13257
|
+
let systemPrompt = system;
|
|
13258
|
+
let userPrompt = user;
|
|
13259
|
+
const maxAttempts = 3;
|
|
13260
|
+
let lastError = "";
|
|
13261
|
+
let totalPromptTokens = 0;
|
|
13262
|
+
let totalCompletionTokens = 0;
|
|
13263
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
13264
|
+
let result = null;
|
|
13265
|
+
let data;
|
|
13266
|
+
let parseError;
|
|
13267
|
+
let validationError;
|
|
13268
|
+
try {
|
|
13269
|
+
if (useFileContent) {
|
|
13270
|
+
const filePart = await readFilePart(file);
|
|
13271
|
+
const fileName = filePart.type === "file" ? filePart.filename : path.basename(file);
|
|
13272
|
+
const contentParts = [{
|
|
13273
|
+
type: "text",
|
|
13274
|
+
text: userPrompt.includes(PLACEHOLDER_TEXT) ? userPrompt.replaceAll(PLACEHOLDER_TEXT, text$1 || `Data is contained in the attached file: ${fileName}`) : userPrompt
|
|
13275
|
+
}, filePart];
|
|
13276
|
+
const fileOpts = {
|
|
13277
|
+
model: provider.chatModel(selected.name),
|
|
13278
|
+
system: systemPrompt,
|
|
13279
|
+
messages: [{
|
|
13280
|
+
role: "user",
|
|
13281
|
+
content: contentParts
|
|
13282
|
+
}],
|
|
13283
|
+
abortSignal: AbortSignal.timeout(timeoutMs),
|
|
13284
|
+
maxRetries: 0,
|
|
13285
|
+
experimental_telemetry: { isEnabled: useTelemetry }
|
|
13286
|
+
};
|
|
13287
|
+
if (useStructuredOutput) fileOpts.output = Output.object({ schema: outputSchema });
|
|
13288
|
+
result = await withRetry(() => generateText(fileOpts), input.onRetry);
|
|
13289
|
+
} else {
|
|
13290
|
+
const textOpts = {
|
|
13291
|
+
model: provider.chatModel(selected.name),
|
|
13292
|
+
system: systemPrompt,
|
|
13293
|
+
prompt: userPrompt,
|
|
13294
|
+
abortSignal: AbortSignal.timeout(timeoutMs),
|
|
13295
|
+
maxRetries: 0,
|
|
13296
|
+
experimental_telemetry: { isEnabled: useTelemetry }
|
|
13297
|
+
};
|
|
13298
|
+
if (useStructuredOutput) textOpts.output = Output.object({ schema: outputSchema });
|
|
13299
|
+
result = await withRetry(() => generateText(textOpts), input.onRetry);
|
|
13300
|
+
}
|
|
13301
|
+
if (result.usage) {
|
|
13302
|
+
totalPromptTokens += result.usage.inputTokens ?? 0;
|
|
13303
|
+
totalCompletionTokens += result.usage.outputTokens ?? 0;
|
|
13304
|
+
}
|
|
13305
|
+
if (useStructuredOutput) data = result.output;
|
|
13306
|
+
else try {
|
|
13307
|
+
data = safeParseJSON(result.text);
|
|
13308
|
+
} catch (e) {
|
|
13309
|
+
parseError = e instanceof Error ? e.message : String(e);
|
|
13310
|
+
}
|
|
13311
|
+
} catch (error) {
|
|
13312
|
+
parseError = getErrorMessage(error);
|
|
13313
|
+
}
|
|
13314
|
+
if (!parseError && data !== void 0) {
|
|
13315
|
+
const validation = validateExtractedData(schema, data);
|
|
13316
|
+
if (validation.success) {
|
|
13317
|
+
const outputDir = path.resolve(aiexDir, config.extraction.outputDir.replace(".aiex/", ""));
|
|
13318
|
+
await fs.mkdir(outputDir, { recursive: true });
|
|
13319
|
+
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
13320
|
+
const outputFileName = `${schema.table.name}-${timestamp}.json`;
|
|
13321
|
+
const outputPath = path.join(outputDir, outputFileName);
|
|
13322
|
+
await writeFile(outputPath, data, {
|
|
13323
|
+
spaces: 2,
|
|
13324
|
+
EOL: "\n"
|
|
13325
|
+
});
|
|
13326
|
+
return {
|
|
13327
|
+
success: true,
|
|
13328
|
+
outputPath,
|
|
13329
|
+
data,
|
|
13330
|
+
tokensUsed: {
|
|
13331
|
+
prompt: totalPromptTokens,
|
|
13332
|
+
completion: totalCompletionTokens,
|
|
13333
|
+
total: totalPromptTokens + totalCompletionTokens
|
|
13334
|
+
}
|
|
13335
|
+
};
|
|
13336
|
+
} else validationError = validation.error;
|
|
13337
|
+
}
|
|
13338
|
+
const errorMsg = parseError || validationError || "Unknown validation error";
|
|
13339
|
+
lastError = errorMsg;
|
|
13340
|
+
if (attempt < maxAttempts) {
|
|
13341
|
+
const invalidJson = data !== void 0 ? JSON.stringify(data, null, 2) : result ? result.text : "";
|
|
13342
|
+
systemPrompt = `You are a precise data correction assistant. Your task is to correct validation errors in a previously generated JSON object to make it comply with the provided JSON Schema.
|
|
13343
|
+
|
|
13344
|
+
CRITICAL RULES:
|
|
13345
|
+
1. Only correct the fields that failed validation.
|
|
13346
|
+
2. Preserve all other correctly extracted fields and their values exactly.
|
|
13347
|
+
3. Return ONLY the corrected JSON object. No explanations, no markdown blocks other than JSON.`;
|
|
13348
|
+
userPrompt = `The JSON data you generated previously failed validation. Please correct it.
|
|
13349
|
+
|
|
13350
|
+
[Original Text]
|
|
13351
|
+
${text$1 || "Data is contained in the attached file."}
|
|
13352
|
+
|
|
13353
|
+
[JSON Schema Definition]
|
|
13354
|
+
${JSON.stringify(schemaToExtractionOutputSchema(schema), null, 2)}
|
|
13355
|
+
|
|
13356
|
+
[Previously Generated Invalid JSON]
|
|
13357
|
+
${invalidJson}
|
|
13358
|
+
|
|
13359
|
+
[Validation Error Details]
|
|
13360
|
+
${errorMsg}
|
|
13361
|
+
|
|
13362
|
+
Please output the corrected JSON object now:`;
|
|
13363
|
+
}
|
|
13224
13364
|
}
|
|
13225
|
-
let data;
|
|
13226
|
-
if (useStructuredOutput) data = result.output;
|
|
13227
|
-
else data = safeParseJSON(result.text);
|
|
13228
|
-
const validation = validateExtractedData(schema, data);
|
|
13229
|
-
if (!validation.success) return {
|
|
13230
|
-
success: false,
|
|
13231
|
-
error: validation.error
|
|
13232
|
-
};
|
|
13233
|
-
const outputDir = path.resolve(aiexDir, config.extraction.outputDir.replace(".aiex/", ""));
|
|
13234
|
-
await fs.mkdir(outputDir, { recursive: true });
|
|
13235
|
-
const timestamp = (/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-");
|
|
13236
|
-
const outputFileName = `${schema.table.name}-${timestamp}.json`;
|
|
13237
|
-
const outputPath = path.join(outputDir, outputFileName);
|
|
13238
|
-
await writeFile(outputPath, data, {
|
|
13239
|
-
spaces: 2,
|
|
13240
|
-
EOL: "\n"
|
|
13241
|
-
});
|
|
13242
13365
|
return {
|
|
13243
|
-
success:
|
|
13244
|
-
|
|
13245
|
-
data,
|
|
13246
|
-
tokensUsed: result.usage ? {
|
|
13247
|
-
prompt: result.usage.inputTokens ?? 0,
|
|
13248
|
-
completion: result.usage.outputTokens ?? 0,
|
|
13249
|
-
total: (result.usage.inputTokens ?? 0) + (result.usage.outputTokens ?? 0)
|
|
13250
|
-
} : void 0
|
|
13366
|
+
success: false,
|
|
13367
|
+
error: lastError || "Extraction failed after self-reflection retries"
|
|
13251
13368
|
};
|
|
13252
13369
|
} catch (error) {
|
|
13253
13370
|
return {
|
|
@@ -13394,18 +13511,6 @@ function insertExtractedData(db, schema, data) {
|
|
|
13394
13511
|
}
|
|
13395
13512
|
}
|
|
13396
13513
|
|
|
13397
|
-
//#endregion
|
|
13398
|
-
//#region src/core/ai-extraction/snapshot.ts
|
|
13399
|
-
async function savePromptSnapshot(schema, aiexDir) {
|
|
13400
|
-
const content = generatePromptSnapshot(schema, (await readAIConfig(aiexDir))?.prompt ?? DEFAULT_PROMPT_CONFIG);
|
|
13401
|
-
const outputDir = path.join(aiexDir, "extracted");
|
|
13402
|
-
await fs.mkdir(outputDir, { recursive: true });
|
|
13403
|
-
const fileName = `${schema.table.name}.prompt.md`;
|
|
13404
|
-
const outputPath = path.join(outputDir, fileName);
|
|
13405
|
-
await fs.writeFile(outputPath, content);
|
|
13406
|
-
return outputPath;
|
|
13407
|
-
}
|
|
13408
|
-
|
|
13409
13514
|
//#endregion
|
|
13410
13515
|
//#region src/core/extraction-audit.ts
|
|
13411
13516
|
const AUDIT_ID_RE = /^[\w.-]+$/;
|
|
@@ -13542,78 +13647,19 @@ async function findSucceededAuditByHash(aiexDir, schemaName, fileHash) {
|
|
|
13542
13647
|
}
|
|
13543
13648
|
|
|
13544
13649
|
//#endregion
|
|
13545
|
-
//#region src/
|
|
13546
|
-
|
|
13547
|
-
|
|
13548
|
-
|
|
13549
|
-
|
|
13550
|
-
|
|
13551
|
-
|
|
13552
|
-
|
|
13553
|
-
|
|
13554
|
-
|
|
13555
|
-
|
|
13556
|
-
"image/svg+xml",
|
|
13557
|
-
"application/pdf",
|
|
13558
|
-
"text/plain",
|
|
13559
|
-
"text/markdown",
|
|
13560
|
-
"text/csv",
|
|
13561
|
-
"application/json",
|
|
13562
|
-
"text/html",
|
|
13563
|
-
"text/xml",
|
|
13564
|
-
"application/x-yaml",
|
|
13565
|
-
"text/yaml"
|
|
13566
|
-
]);
|
|
13567
|
-
const MIME_TO_EXT = {
|
|
13568
|
-
"image/png": "png",
|
|
13569
|
-
"image/jpeg": "jpg",
|
|
13570
|
-
"image/gif": "gif",
|
|
13571
|
-
"image/webp": "webp",
|
|
13572
|
-
"image/bmp": "bmp",
|
|
13573
|
-
"image/svg+xml": "svg",
|
|
13574
|
-
"application/pdf": "pdf",
|
|
13575
|
-
"text/plain": "txt",
|
|
13576
|
-
"text/markdown": "md",
|
|
13577
|
-
"text/csv": "csv",
|
|
13578
|
-
"application/json": "json",
|
|
13579
|
-
"text/html": "html",
|
|
13580
|
-
"text/xml": "xml",
|
|
13581
|
-
"application/x-yaml": "yaml",
|
|
13582
|
-
"text/yaml": "yaml"
|
|
13583
|
-
};
|
|
13584
|
-
function bytesToMB(bytes) {
|
|
13585
|
-
return bytes / (1024 * 1024);
|
|
13586
|
-
}
|
|
13587
|
-
function getExtensionFromMime(mimeType) {
|
|
13588
|
-
return MIME_TO_EXT[mimeType];
|
|
13589
|
-
}
|
|
13590
|
-
function isAllowedMimeType(mimeType) {
|
|
13591
|
-
return SUPPORTED_MIME_TYPES.has(mimeType);
|
|
13592
|
-
}
|
|
13593
|
-
function unsupportedFileTypeMessage(mimeType) {
|
|
13594
|
-
return t("errors.file.unsupportedType", {
|
|
13595
|
-
type: mimeType,
|
|
13596
|
-
supported: SUPPORTED_FILE_TYPES_TEXT
|
|
13650
|
+
//#region src/utils/hash.ts
|
|
13651
|
+
/**
|
|
13652
|
+
* Helper to compute SHA-256 hash of a file asynchronously.
|
|
13653
|
+
*/
|
|
13654
|
+
function getFileHash(filePath) {
|
|
13655
|
+
return new Promise((resolve, reject) => {
|
|
13656
|
+
const hash = crypto.createHash("sha256");
|
|
13657
|
+
const stream = fs$1.createReadStream(filePath);
|
|
13658
|
+
stream.on("data", (data) => hash.update(data));
|
|
13659
|
+
stream.on("end", () => resolve(hash.digest("hex")));
|
|
13660
|
+
stream.on("error", (err) => reject(err));
|
|
13597
13661
|
});
|
|
13598
13662
|
}
|
|
13599
|
-
function isMissingUploadFileError(error) {
|
|
13600
|
-
return !!error && typeof error === "object" && error.code === "ENOENT";
|
|
13601
|
-
}
|
|
13602
|
-
var FileValidationError = class extends Error {
|
|
13603
|
-
constructor(message) {
|
|
13604
|
-
super(message);
|
|
13605
|
-
this.name = "FileValidationError";
|
|
13606
|
-
}
|
|
13607
|
-
};
|
|
13608
|
-
function validateFileUpload(file) {
|
|
13609
|
-
if (file.size === 0) throw new FileValidationError(t("errors.file.empty"));
|
|
13610
|
-
if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(t("errors.file.sizeExceeded", {
|
|
13611
|
-
size: bytesToMB(file.size).toFixed(1),
|
|
13612
|
-
limit: MAX_UPLOAD_SIZE_TEXT,
|
|
13613
|
-
file: file.name
|
|
13614
|
-
}));
|
|
13615
|
-
if (!isAllowedMimeType(file.type)) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
|
|
13616
|
-
}
|
|
13617
13663
|
|
|
13618
13664
|
//#endregion
|
|
13619
13665
|
//#region src/core/notion-sink.ts
|
|
@@ -13847,20 +13893,148 @@ async function writeNotionPage(config, schemaName, data) {
|
|
|
13847
13893
|
}
|
|
13848
13894
|
|
|
13849
13895
|
//#endregion
|
|
13850
|
-
//#region src/core/
|
|
13851
|
-
function
|
|
13852
|
-
|
|
13896
|
+
//#region src/core/webhook-sink.ts
|
|
13897
|
+
async function sendWebhook(config, payload) {
|
|
13898
|
+
if (!config || !config.enabled || !config.url) return;
|
|
13899
|
+
const body = JSON.stringify(payload);
|
|
13900
|
+
const headers = {
|
|
13901
|
+
"Content-Type": "application/json",
|
|
13902
|
+
"User-Agent": "aiex-webhook-dispatcher"
|
|
13903
|
+
};
|
|
13904
|
+
if (config.secret) headers["X-Aiex-Signature"] = `sha256=${crypto.createHmac("sha256", config.secret).update(body).digest("hex")}`;
|
|
13905
|
+
const response = await fetch(config.url, {
|
|
13906
|
+
method: "POST",
|
|
13907
|
+
headers,
|
|
13908
|
+
body
|
|
13909
|
+
});
|
|
13910
|
+
if (!response.ok) throw new Error(`Webhook request failed with status: ${response.status} ${response.statusText}`);
|
|
13853
13911
|
}
|
|
13854
|
-
|
|
13855
|
-
|
|
13912
|
+
|
|
13913
|
+
//#endregion
|
|
13914
|
+
//#region src/core/integration/dispatcher.ts
|
|
13915
|
+
async function syncResultToNotion(aiConfig, schemaName, data) {
|
|
13916
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error(t("errors.ai.extractionNotObject"));
|
|
13917
|
+
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
13918
|
+
return [{
|
|
13919
|
+
databaseId: page.databaseId,
|
|
13920
|
+
pageId: page.pageId
|
|
13921
|
+
}];
|
|
13856
13922
|
}
|
|
13857
|
-
|
|
13858
|
-
|
|
13859
|
-
|
|
13860
|
-
|
|
13861
|
-
|
|
13862
|
-
|
|
13863
|
-
|
|
13923
|
+
function shouldSyncNotion(aiConfig, schemaName) {
|
|
13924
|
+
return !!aiConfig.notion?.enabled && !!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim();
|
|
13925
|
+
}
|
|
13926
|
+
async function triggerWebhook(aiConfig, auditId, schemaName, event, source, data, error, tokensUsed, quiet = false) {
|
|
13927
|
+
if (!aiConfig.webhook?.enabled) return;
|
|
13928
|
+
try {
|
|
13929
|
+
await sendWebhook(aiConfig.webhook, {
|
|
13930
|
+
event,
|
|
13931
|
+
schemaName,
|
|
13932
|
+
auditId,
|
|
13933
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
13934
|
+
source: {
|
|
13935
|
+
type: source.type,
|
|
13936
|
+
fileName: source.filePath ? path.basename(source.filePath) : void 0,
|
|
13937
|
+
filePath: source.filePath
|
|
13938
|
+
},
|
|
13939
|
+
data,
|
|
13940
|
+
error,
|
|
13941
|
+
tokensUsed
|
|
13942
|
+
});
|
|
13943
|
+
if (!quiet) consola.success(t("extract.file.webhookSynced"));
|
|
13944
|
+
} catch (err) {
|
|
13945
|
+
if (!quiet) consola.error(t("extract.file.webhookSyncFail", { error: err instanceof Error ? err.message : String(err) }));
|
|
13946
|
+
}
|
|
13947
|
+
}
|
|
13948
|
+
|
|
13949
|
+
//#endregion
|
|
13950
|
+
//#region src/core/file-constants.ts
|
|
13951
|
+
const MAX_UPLOAD_SIZE = 30 * 1024 * 1024;
|
|
13952
|
+
const MAX_UPLOAD_SIZE_TEXT = "30MB";
|
|
13953
|
+
const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
|
|
13954
|
+
const MISSING_UPLOAD_FILE_TEXT = t("errors.file.missingUpload");
|
|
13955
|
+
const SUPPORTED_MIME_TYPES = new Set([
|
|
13956
|
+
"image/png",
|
|
13957
|
+
"image/jpeg",
|
|
13958
|
+
"image/gif",
|
|
13959
|
+
"image/webp",
|
|
13960
|
+
"image/bmp",
|
|
13961
|
+
"image/svg+xml",
|
|
13962
|
+
"application/pdf",
|
|
13963
|
+
"text/plain",
|
|
13964
|
+
"text/markdown",
|
|
13965
|
+
"text/csv",
|
|
13966
|
+
"application/json",
|
|
13967
|
+
"text/html",
|
|
13968
|
+
"text/xml",
|
|
13969
|
+
"application/x-yaml",
|
|
13970
|
+
"text/yaml"
|
|
13971
|
+
]);
|
|
13972
|
+
const MIME_TO_EXT = {
|
|
13973
|
+
"image/png": "png",
|
|
13974
|
+
"image/jpeg": "jpg",
|
|
13975
|
+
"image/gif": "gif",
|
|
13976
|
+
"image/webp": "webp",
|
|
13977
|
+
"image/bmp": "bmp",
|
|
13978
|
+
"image/svg+xml": "svg",
|
|
13979
|
+
"application/pdf": "pdf",
|
|
13980
|
+
"text/plain": "txt",
|
|
13981
|
+
"text/markdown": "md",
|
|
13982
|
+
"text/csv": "csv",
|
|
13983
|
+
"application/json": "json",
|
|
13984
|
+
"text/html": "html",
|
|
13985
|
+
"text/xml": "xml",
|
|
13986
|
+
"application/x-yaml": "yaml",
|
|
13987
|
+
"text/yaml": "yaml"
|
|
13988
|
+
};
|
|
13989
|
+
function bytesToMB(bytes) {
|
|
13990
|
+
return bytes / (1024 * 1024);
|
|
13991
|
+
}
|
|
13992
|
+
function getExtensionFromMime(mimeType) {
|
|
13993
|
+
return MIME_TO_EXT[mimeType];
|
|
13994
|
+
}
|
|
13995
|
+
function isAllowedMimeType(mimeType) {
|
|
13996
|
+
return SUPPORTED_MIME_TYPES.has(mimeType);
|
|
13997
|
+
}
|
|
13998
|
+
function unsupportedFileTypeMessage(mimeType) {
|
|
13999
|
+
return t("errors.file.unsupportedType", {
|
|
14000
|
+
type: mimeType,
|
|
14001
|
+
supported: SUPPORTED_FILE_TYPES_TEXT
|
|
14002
|
+
});
|
|
14003
|
+
}
|
|
14004
|
+
function isMissingUploadFileError(error) {
|
|
14005
|
+
return !!error && typeof error === "object" && error.code === "ENOENT";
|
|
14006
|
+
}
|
|
14007
|
+
var FileValidationError = class extends Error {
|
|
14008
|
+
constructor(message) {
|
|
14009
|
+
super(message);
|
|
14010
|
+
this.name = "FileValidationError";
|
|
14011
|
+
}
|
|
14012
|
+
};
|
|
14013
|
+
function validateFileUpload(file) {
|
|
14014
|
+
if (file.size === 0) throw new FileValidationError(t("errors.file.empty"));
|
|
14015
|
+
if (file.size > MAX_UPLOAD_SIZE) throw new FileValidationError(t("errors.file.sizeExceeded", {
|
|
14016
|
+
size: bytesToMB(file.size).toFixed(1),
|
|
14017
|
+
limit: MAX_UPLOAD_SIZE_TEXT,
|
|
14018
|
+
file: file.name
|
|
14019
|
+
}));
|
|
14020
|
+
if (!isAllowedMimeType(file.type)) throw new FileValidationError(unsupportedFileTypeMessage(file.type));
|
|
14021
|
+
}
|
|
14022
|
+
|
|
14023
|
+
//#endregion
|
|
14024
|
+
//#region src/core/pdf-converter/external.ts
|
|
14025
|
+
function applyTemplate(value, context) {
|
|
14026
|
+
return value.replaceAll("{input}", context.input).replaceAll("{outputDir}", context.outputDir).replaceAll("{basename}", context.basename);
|
|
14027
|
+
}
|
|
14028
|
+
function isError(error) {
|
|
14029
|
+
return error instanceof Error;
|
|
14030
|
+
}
|
|
14031
|
+
async function pathExists(filePath) {
|
|
14032
|
+
try {
|
|
14033
|
+
await fs.access(filePath);
|
|
14034
|
+
return true;
|
|
14035
|
+
} catch {
|
|
14036
|
+
return false;
|
|
14037
|
+
}
|
|
13864
14038
|
}
|
|
13865
14039
|
async function collectMarkdownFiles(dir) {
|
|
13866
14040
|
return (await glob("**/*.md", {
|
|
@@ -14022,22 +14196,7 @@ function createPdfConverter(config) {
|
|
|
14022
14196
|
}
|
|
14023
14197
|
|
|
14024
14198
|
//#endregion
|
|
14025
|
-
//#region src/
|
|
14026
|
-
/**
|
|
14027
|
-
* Helper to compute SHA-256 hash of a file asynchronously.
|
|
14028
|
-
*/
|
|
14029
|
-
function getFileHash(filePath) {
|
|
14030
|
-
return new Promise((resolve, reject) => {
|
|
14031
|
-
const hash = crypto.createHash("sha256");
|
|
14032
|
-
const stream = fs$1.createReadStream(filePath);
|
|
14033
|
-
stream.on("data", (data) => hash.update(data));
|
|
14034
|
-
stream.on("end", () => resolve(hash.digest("hex")));
|
|
14035
|
-
stream.on("error", (err) => reject(err));
|
|
14036
|
-
});
|
|
14037
|
-
}
|
|
14038
|
-
|
|
14039
|
-
//#endregion
|
|
14040
|
-
//#region src/core/extract-runner.ts
|
|
14199
|
+
//#region src/core/pdf-converter/orchestrator.ts
|
|
14041
14200
|
const FILE_PART_EXTENSIONS = new Set([
|
|
14042
14201
|
"png",
|
|
14043
14202
|
"jpg",
|
|
@@ -14047,6 +14206,51 @@ const FILE_PART_EXTENSIONS = new Set([
|
|
|
14047
14206
|
"bmp",
|
|
14048
14207
|
"svg"
|
|
14049
14208
|
]);
|
|
14209
|
+
const PDF_EXT_RE = /\.pdf$/i;
|
|
14210
|
+
async function readExtractFileInput(filePath, aiConfig, modelOverride) {
|
|
14211
|
+
const stat = fs$1.statSync(filePath);
|
|
14212
|
+
if (stat.size > MAX_UPLOAD_SIZE) throw new Error(t("errors.file.sizeExceeded", {
|
|
14213
|
+
size: bytesToMB(stat.size).toFixed(1),
|
|
14214
|
+
limit: MAX_UPLOAD_SIZE_TEXT,
|
|
14215
|
+
file: filePath
|
|
14216
|
+
}));
|
|
14217
|
+
const ext = path.extname(filePath).toLowerCase().replace(".", "");
|
|
14218
|
+
if (FILE_PART_EXTENSIONS.has(ext)) {
|
|
14219
|
+
if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
|
|
14220
|
+
const result = await recognizeImageText(filePath, aiConfig?.image);
|
|
14221
|
+
consola.info(t("extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
|
|
14222
|
+
return { text: result.text };
|
|
14223
|
+
}
|
|
14224
|
+
return {
|
|
14225
|
+
text: "",
|
|
14226
|
+
filePath
|
|
14227
|
+
};
|
|
14228
|
+
}
|
|
14229
|
+
if (ext === "pdf") {
|
|
14230
|
+
const buffer = await fs.readFile(filePath);
|
|
14231
|
+
const converter = createPdfConverter(aiConfig?.pdf);
|
|
14232
|
+
const result = await converter.convert(buffer, filePath);
|
|
14233
|
+
if (result.metadata?.fallback === "true") consola.info(t("extract.file.pdfFallback", { count: result.pageCount }));
|
|
14234
|
+
else consola.info(t("extract.file.pdfConverted", {
|
|
14235
|
+
name: converter.name,
|
|
14236
|
+
count: result.pageCount
|
|
14237
|
+
}));
|
|
14238
|
+
const mdPath = filePath.replace(PDF_EXT_RE, ".md");
|
|
14239
|
+
try {
|
|
14240
|
+
await fs.writeFile(mdPath, result.text);
|
|
14241
|
+
consola.info(t("extract.file.markdownSaved", { path: mdPath }));
|
|
14242
|
+
} catch {
|
|
14243
|
+
const fallbackMd = path.join(os.tmpdir(), `${path.basename(filePath, ".pdf")}.md`);
|
|
14244
|
+
await fs.writeFile(fallbackMd, result.text);
|
|
14245
|
+
consola.info(t("extract.file.markdownSaved", { path: fallbackMd }));
|
|
14246
|
+
}
|
|
14247
|
+
return { text: result.text };
|
|
14248
|
+
}
|
|
14249
|
+
return { text: await fs.readFile(filePath, "utf-8") };
|
|
14250
|
+
}
|
|
14251
|
+
|
|
14252
|
+
//#endregion
|
|
14253
|
+
//#region src/core/batch/batch-processor.ts
|
|
14050
14254
|
const SUPPORTED_EXTENSIONS$1 = new Set([
|
|
14051
14255
|
...FILE_PART_EXTENSIONS,
|
|
14052
14256
|
"pdf",
|
|
@@ -14059,20 +14263,89 @@ const SUPPORTED_EXTENSIONS$1 = new Set([
|
|
|
14059
14263
|
"yaml",
|
|
14060
14264
|
"yml"
|
|
14061
14265
|
]);
|
|
14062
|
-
const PDF_EXT_RE = /\.pdf$/i;
|
|
14063
|
-
const JSON_EXT_RE$1 = /\.json$/;
|
|
14064
14266
|
const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS$1].join(",")}}`;
|
|
14065
|
-
|
|
14066
|
-
if (!
|
|
14067
|
-
|
|
14068
|
-
|
|
14069
|
-
|
|
14070
|
-
|
|
14071
|
-
}
|
|
14267
|
+
function listSupportedFiles(dir, pattern) {
|
|
14268
|
+
if (!fs$1.statSync(dir).isDirectory()) throw new Error(t("errors.file.notADirectory", { dir }));
|
|
14269
|
+
return globSync(pattern ?? SUPPORTED_FILE_PATTERN, {
|
|
14270
|
+
cwd: dir,
|
|
14271
|
+
absolute: true,
|
|
14272
|
+
onlyFiles: true
|
|
14273
|
+
}).filter((file) => {
|
|
14274
|
+
const ext = path.extname(file).toLowerCase().replace(".", "");
|
|
14275
|
+
return SUPPORTED_EXTENSIONS$1.has(ext);
|
|
14276
|
+
}).sort();
|
|
14072
14277
|
}
|
|
14073
|
-
function
|
|
14074
|
-
|
|
14278
|
+
async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
|
|
14279
|
+
const result = await runAuditedExtraction({
|
|
14280
|
+
aiexDir,
|
|
14281
|
+
config,
|
|
14282
|
+
aiConfig,
|
|
14283
|
+
schemaName,
|
|
14284
|
+
source: {
|
|
14285
|
+
type: "file",
|
|
14286
|
+
filePath
|
|
14287
|
+
},
|
|
14288
|
+
modelOverride,
|
|
14289
|
+
insert: options?.insert,
|
|
14290
|
+
force: options?.force,
|
|
14291
|
+
quiet: false
|
|
14292
|
+
});
|
|
14293
|
+
if (result.success) {
|
|
14294
|
+
if (!result.skipped) consola.success(t("extract.file.processSuccess", { file: path.basename(filePath) }));
|
|
14295
|
+
return true;
|
|
14296
|
+
}
|
|
14297
|
+
return false;
|
|
14298
|
+
}
|
|
14299
|
+
async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
|
|
14300
|
+
consola.info(t("extract.batch.scanning", { dir: pc.cyan(dir) }));
|
|
14301
|
+
let files;
|
|
14302
|
+
try {
|
|
14303
|
+
files = listSupportedFiles(dir, globPattern);
|
|
14304
|
+
} catch {
|
|
14305
|
+
return {
|
|
14306
|
+
ok: false,
|
|
14307
|
+
successCount: 0,
|
|
14308
|
+
failCount: 0,
|
|
14309
|
+
error: t("extract.batch.errors.cannotReadDir", { dir })
|
|
14310
|
+
};
|
|
14311
|
+
}
|
|
14312
|
+
if (files.length === 0) return {
|
|
14313
|
+
ok: false,
|
|
14314
|
+
successCount: 0,
|
|
14315
|
+
failCount: 0,
|
|
14316
|
+
error: t("extract.batch.errors.noSupportedFiles", { dir })
|
|
14317
|
+
};
|
|
14318
|
+
consola.info(t("extract.batch.found", { count: files.length }));
|
|
14319
|
+
let successCount = 0;
|
|
14320
|
+
let failCount = 0;
|
|
14321
|
+
for (let i = 0; i < files.length; i++) {
|
|
14322
|
+
const file = files[i];
|
|
14323
|
+
consola.info(`\n${t("extract.batch.processing", {
|
|
14324
|
+
current: i + 1,
|
|
14325
|
+
total: files.length,
|
|
14326
|
+
file: pc.cyan(path.basename(file))
|
|
14327
|
+
})}`);
|
|
14328
|
+
if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, {
|
|
14329
|
+
insert: options?.insert,
|
|
14330
|
+
force: options?.force
|
|
14331
|
+
})) successCount++;
|
|
14332
|
+
else failCount++;
|
|
14333
|
+
}
|
|
14334
|
+
consola.info(`\n${t("extract.batch.complete", {
|
|
14335
|
+
success: pc.green(successCount),
|
|
14336
|
+
fail: pc.red(failCount),
|
|
14337
|
+
total: files.length
|
|
14338
|
+
})}`);
|
|
14339
|
+
return {
|
|
14340
|
+
ok: true,
|
|
14341
|
+
successCount,
|
|
14342
|
+
failCount
|
|
14343
|
+
};
|
|
14075
14344
|
}
|
|
14345
|
+
|
|
14346
|
+
//#endregion
|
|
14347
|
+
//#region src/core/extract-runner.ts
|
|
14348
|
+
const JSON_EXT_RE$1 = /\.json$/;
|
|
14076
14349
|
async function ensureDatabaseReady(dbPath, schema) {
|
|
14077
14350
|
try {
|
|
14078
14351
|
await fs.access(dbPath);
|
|
@@ -14098,17 +14371,6 @@ async function ensureDatabaseReady(dbPath, schema) {
|
|
|
14098
14371
|
}
|
|
14099
14372
|
return null;
|
|
14100
14373
|
}
|
|
14101
|
-
function listSupportedFiles(dir, pattern) {
|
|
14102
|
-
if (!fs$1.statSync(dir).isDirectory()) throw new Error(t("errors.file.notADirectory", { dir }));
|
|
14103
|
-
return globSync(pattern ?? SUPPORTED_FILE_PATTERN, {
|
|
14104
|
-
cwd: dir,
|
|
14105
|
-
absolute: true,
|
|
14106
|
-
onlyFiles: true
|
|
14107
|
-
}).filter((file) => {
|
|
14108
|
-
const ext = path.extname(file).toLowerCase().replace(".", "");
|
|
14109
|
-
return SUPPORTED_EXTENSIONS$1.has(ext);
|
|
14110
|
-
}).sort();
|
|
14111
|
-
}
|
|
14112
14374
|
async function loadSchema(config, schemaName) {
|
|
14113
14375
|
const schemaPath = path.join(config.schemaPath, `${schemaName}.json`);
|
|
14114
14376
|
try {
|
|
@@ -14122,68 +14384,27 @@ async function loadSchema(config, schemaName) {
|
|
|
14122
14384
|
issues: e.issues.map((i) => ` - ${i.path.join(".")}: ${i.message}`).join("\n")
|
|
14123
14385
|
})
|
|
14124
14386
|
};
|
|
14125
|
-
if (e.code === "ENOENT") return {
|
|
14126
|
-
schema: null,
|
|
14127
|
-
error: t("errors.schema.cannotRead", { name: `${schemaName}.json` })
|
|
14128
|
-
};
|
|
14129
|
-
if (e instanceof SyntaxError) return {
|
|
14130
|
-
schema: null,
|
|
14131
|
-
error: t("errors.schema.invalidJson", { name: `${schemaName}.json` })
|
|
14132
|
-
};
|
|
14133
|
-
return {
|
|
14134
|
-
schema: null,
|
|
14135
|
-
error: String(e)
|
|
14136
|
-
};
|
|
14137
|
-
}
|
|
14138
|
-
}
|
|
14139
|
-
async function listSchemas(aiexDir) {
|
|
14140
|
-
try {
|
|
14141
|
-
const dir = path.join(aiexDir, "schema");
|
|
14142
|
-
return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
|
|
14143
|
-
} catch {
|
|
14144
|
-
return [];
|
|
14145
|
-
}
|
|
14146
|
-
}
|
|
14147
|
-
async function readExtractFileInput(filePath, aiConfig, modelOverride) {
|
|
14148
|
-
const stat = fs$1.statSync(filePath);
|
|
14149
|
-
if (stat.size > MAX_UPLOAD_SIZE) throw new Error(t("errors.file.sizeExceeded", {
|
|
14150
|
-
size: bytesToMB(stat.size).toFixed(1),
|
|
14151
|
-
limit: MAX_UPLOAD_SIZE_TEXT,
|
|
14152
|
-
file: filePath
|
|
14153
|
-
}));
|
|
14154
|
-
const ext = path.extname(filePath).toLowerCase().replace(".", "");
|
|
14155
|
-
if (FILE_PART_EXTENSIONS.has(ext)) {
|
|
14156
|
-
if (shouldUseImageOcrFallback(aiConfig, modelOverride)) {
|
|
14157
|
-
const result = await recognizeImageText(filePath, aiConfig?.image);
|
|
14158
|
-
consola.info(t("extract.file.ocrText", { confidence: (result.confidence * 100).toFixed(1) }));
|
|
14159
|
-
return { text: result.text };
|
|
14160
|
-
}
|
|
14161
|
-
return {
|
|
14162
|
-
text: "",
|
|
14163
|
-
filePath
|
|
14164
|
-
};
|
|
14165
|
-
}
|
|
14166
|
-
if (ext === "pdf") {
|
|
14167
|
-
const buffer = await fs.readFile(filePath);
|
|
14168
|
-
const converter = createPdfConverter(aiConfig?.pdf);
|
|
14169
|
-
const result = await converter.convert(buffer, filePath);
|
|
14170
|
-
if (result.metadata?.fallback === "true") consola.info(t("extract.file.pdfFallback", { count: result.pageCount }));
|
|
14171
|
-
else consola.info(t("extract.file.pdfConverted", {
|
|
14172
|
-
name: converter.name,
|
|
14173
|
-
count: result.pageCount
|
|
14174
|
-
}));
|
|
14175
|
-
const mdPath = filePath.replace(PDF_EXT_RE, ".md");
|
|
14176
|
-
try {
|
|
14177
|
-
await fs.writeFile(mdPath, result.text);
|
|
14178
|
-
consola.info(t("extract.file.markdownSaved", { path: mdPath }));
|
|
14179
|
-
} catch {
|
|
14180
|
-
const fallbackMd = path.join(os.tmpdir(), `${path.basename(filePath, ".pdf")}.md`);
|
|
14181
|
-
await fs.writeFile(fallbackMd, result.text);
|
|
14182
|
-
consola.info(t("extract.file.markdownSaved", { path: fallbackMd }));
|
|
14183
|
-
}
|
|
14184
|
-
return { text: result.text };
|
|
14387
|
+
if (e.code === "ENOENT") return {
|
|
14388
|
+
schema: null,
|
|
14389
|
+
error: t("errors.schema.cannotRead", { name: `${schemaName}.json` })
|
|
14390
|
+
};
|
|
14391
|
+
if (e instanceof SyntaxError) return {
|
|
14392
|
+
schema: null,
|
|
14393
|
+
error: t("errors.schema.invalidJson", { name: `${schemaName}.json` })
|
|
14394
|
+
};
|
|
14395
|
+
return {
|
|
14396
|
+
schema: null,
|
|
14397
|
+
error: String(e)
|
|
14398
|
+
};
|
|
14399
|
+
}
|
|
14400
|
+
}
|
|
14401
|
+
async function listSchemas(aiexDir) {
|
|
14402
|
+
try {
|
|
14403
|
+
const dir = path.join(aiexDir, "schema");
|
|
14404
|
+
return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
|
|
14405
|
+
} catch {
|
|
14406
|
+
return [];
|
|
14185
14407
|
}
|
|
14186
|
-
return { text: await fs.readFile(filePath, "utf-8") };
|
|
14187
14408
|
}
|
|
14188
14409
|
async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, options) {
|
|
14189
14410
|
const schemaLoad = await loadSchema(config, schemaName);
|
|
@@ -14367,6 +14588,7 @@ async function runAuditedExtraction(options) {
|
|
|
14367
14588
|
error: error instanceof Error ? error.message : String(error)
|
|
14368
14589
|
});
|
|
14369
14590
|
if (!quiet) consola.error(t("extract.file.notionSyncFail", { error: error instanceof Error ? error.message : String(error) }));
|
|
14591
|
+
await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, r.data, error instanceof Error ? error.message : String(error), r.tokensUsed, quiet);
|
|
14370
14592
|
return {
|
|
14371
14593
|
success: false,
|
|
14372
14594
|
error: error instanceof Error ? error.message : String(error),
|
|
@@ -14382,6 +14604,7 @@ async function runAuditedExtraction(options) {
|
|
|
14382
14604
|
notionPages,
|
|
14383
14605
|
tokensUsed: r.tokensUsed
|
|
14384
14606
|
});
|
|
14607
|
+
await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.success", source, r.data, void 0, r.tokensUsed, quiet);
|
|
14385
14608
|
return {
|
|
14386
14609
|
success: true,
|
|
14387
14610
|
outputPath: updated.outputPath,
|
|
@@ -14398,6 +14621,7 @@ async function runAuditedExtraction(options) {
|
|
|
14398
14621
|
error: r.error || "Extraction failed"
|
|
14399
14622
|
});
|
|
14400
14623
|
if (!quiet) consola.error(t("extract.file.extractionFailed", { error: r.error }));
|
|
14624
|
+
await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, r.error || "Extraction failed", void 0, quiet);
|
|
14401
14625
|
return {
|
|
14402
14626
|
success: false,
|
|
14403
14627
|
error: r.error,
|
|
@@ -14417,6 +14641,7 @@ async function runAuditedExtraction(options) {
|
|
|
14417
14641
|
error: e instanceof Error ? e.message : String(e)
|
|
14418
14642
|
}));
|
|
14419
14643
|
}
|
|
14644
|
+
await triggerWebhook(aiConfig, audit.id, schemaName, "extraction.failed", source, void 0, e instanceof Error ? e.message : String(e), void 0, quiet);
|
|
14420
14645
|
return {
|
|
14421
14646
|
success: false,
|
|
14422
14647
|
error: e instanceof Error ? e.message : String(e),
|
|
@@ -14425,73 +14650,6 @@ async function runAuditedExtraction(options) {
|
|
|
14425
14650
|
};
|
|
14426
14651
|
}
|
|
14427
14652
|
}
|
|
14428
|
-
async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
|
|
14429
|
-
const result = await runAuditedExtraction({
|
|
14430
|
-
aiexDir,
|
|
14431
|
-
config,
|
|
14432
|
-
aiConfig,
|
|
14433
|
-
schemaName,
|
|
14434
|
-
source: {
|
|
14435
|
-
type: "file",
|
|
14436
|
-
filePath
|
|
14437
|
-
},
|
|
14438
|
-
modelOverride,
|
|
14439
|
-
insert: options?.insert,
|
|
14440
|
-
force: options?.force,
|
|
14441
|
-
quiet: false
|
|
14442
|
-
});
|
|
14443
|
-
if (result.success) {
|
|
14444
|
-
if (!result.skipped) consola.success(t("extract.file.processSuccess", { file: path.basename(filePath) }));
|
|
14445
|
-
return true;
|
|
14446
|
-
}
|
|
14447
|
-
return false;
|
|
14448
|
-
}
|
|
14449
|
-
async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
|
|
14450
|
-
consola.info(t("extract.batch.scanning", { dir: pc.cyan(dir) }));
|
|
14451
|
-
let files;
|
|
14452
|
-
try {
|
|
14453
|
-
files = listSupportedFiles(dir, globPattern);
|
|
14454
|
-
} catch {
|
|
14455
|
-
return {
|
|
14456
|
-
ok: false,
|
|
14457
|
-
successCount: 0,
|
|
14458
|
-
failCount: 0,
|
|
14459
|
-
error: t("extract.batch.errors.cannotReadDir", { dir })
|
|
14460
|
-
};
|
|
14461
|
-
}
|
|
14462
|
-
if (files.length === 0) return {
|
|
14463
|
-
ok: false,
|
|
14464
|
-
successCount: 0,
|
|
14465
|
-
failCount: 0,
|
|
14466
|
-
error: t("extract.batch.errors.noSupportedFiles", { dir })
|
|
14467
|
-
};
|
|
14468
|
-
consola.info(t("extract.batch.found", { count: files.length }));
|
|
14469
|
-
let successCount = 0;
|
|
14470
|
-
let failCount = 0;
|
|
14471
|
-
for (let i = 0; i < files.length; i++) {
|
|
14472
|
-
const file = files[i];
|
|
14473
|
-
consola.info(`\n${t("extract.batch.processing", {
|
|
14474
|
-
current: i + 1,
|
|
14475
|
-
total: files.length,
|
|
14476
|
-
file: pc.cyan(path.basename(file))
|
|
14477
|
-
})}`);
|
|
14478
|
-
if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, {
|
|
14479
|
-
insert: options?.insert,
|
|
14480
|
-
force: options?.force
|
|
14481
|
-
})) successCount++;
|
|
14482
|
-
else failCount++;
|
|
14483
|
-
}
|
|
14484
|
-
consola.info(`\n${t("extract.batch.complete", {
|
|
14485
|
-
success: pc.green(successCount),
|
|
14486
|
-
fail: pc.red(failCount),
|
|
14487
|
-
total: files.length
|
|
14488
|
-
})}`);
|
|
14489
|
-
return {
|
|
14490
|
-
ok: true,
|
|
14491
|
-
successCount,
|
|
14492
|
-
failCount
|
|
14493
|
-
};
|
|
14494
|
-
}
|
|
14495
14653
|
|
|
14496
14654
|
//#endregion
|
|
14497
14655
|
//#region src/commands/dump.ts
|
|
@@ -14620,49 +14778,25 @@ const dumpCommand = defineCommand({
|
|
|
14620
14778
|
} else s.stop(t("command.dump.loaded", { count: rows.length }));
|
|
14621
14779
|
const s2 = spinner();
|
|
14622
14780
|
s2.start(t("command.dump.formatting"));
|
|
14623
|
-
|
|
14624
|
-
|
|
14625
|
-
|
|
14626
|
-
|
|
14627
|
-
|
|
14628
|
-
|
|
14629
|
-
|
|
14630
|
-
|
|
14631
|
-
|
|
14632
|
-
else if (type === "number" || type === "integer") if (val === "") newRow[colName] = "";
|
|
14633
|
-
else {
|
|
14634
|
-
const num = Number(val);
|
|
14635
|
-
newRow[colName] = Number.isNaN(num) ? val : num;
|
|
14636
|
-
}
|
|
14637
|
-
else if (typeof val === "object") newRow[colName] = JSON.stringify(val);
|
|
14638
|
-
else {
|
|
14639
|
-
const dbType = (col.type || "").toLowerCase();
|
|
14640
|
-
if ((dbType.includes("int") || dbType.includes("real") || dbType.includes("num") || dbType.includes("double") || dbType.includes("float")) && typeof val === "string" && val !== "") {
|
|
14641
|
-
const num = Number(val);
|
|
14642
|
-
newRow[colName] = Number.isNaN(num) ? val : num;
|
|
14643
|
-
} else newRow[colName] = val;
|
|
14644
|
-
}
|
|
14645
|
-
});
|
|
14646
|
-
return newRow;
|
|
14647
|
-
});
|
|
14648
|
-
s2.stop(t("command.dump.formatted"));
|
|
14781
|
+
let formattedRows;
|
|
14782
|
+
try {
|
|
14783
|
+
formattedRows = formatRowsConformingToSchema(rows, columns, schema, format);
|
|
14784
|
+
s2.stop(t("command.dump.formatted"));
|
|
14785
|
+
} catch (error) {
|
|
14786
|
+
s2.stop(t("command.dump.dbQueryFailed"));
|
|
14787
|
+
failCommand(error instanceof Error ? error.message : String(error));
|
|
14788
|
+
return;
|
|
14789
|
+
}
|
|
14649
14790
|
const s3 = spinner();
|
|
14650
14791
|
s3.start(t("command.dump.writing", {
|
|
14651
14792
|
format: format.toUpperCase(),
|
|
14652
14793
|
path: resolvedOutput
|
|
14653
14794
|
}));
|
|
14654
14795
|
try {
|
|
14655
|
-
const
|
|
14796
|
+
const buffer = generateExportBuffer(tableName, formattedRows, columns, format);
|
|
14656
14797
|
const outputDir = path.dirname(resolvedOutput);
|
|
14657
14798
|
if (!fs$1.existsSync(outputDir)) fs$1.mkdirSync(outputDir, { recursive: true });
|
|
14658
|
-
|
|
14659
|
-
const wb = XLSX.utils.book_new();
|
|
14660
|
-
XLSX.utils.book_append_sheet(wb, ws, tableName.slice(0, 31));
|
|
14661
|
-
XLSX.writeFile(wb, resolvedOutput);
|
|
14662
|
-
} else {
|
|
14663
|
-
const csv = XLSX.utils.sheet_to_csv(ws);
|
|
14664
|
-
fs$1.writeFileSync(resolvedOutput, "" + csv, "utf8");
|
|
14665
|
-
}
|
|
14799
|
+
fs$1.writeFileSync(resolvedOutput, buffer);
|
|
14666
14800
|
s3.stop(t("command.dump.dumpCompleted"));
|
|
14667
14801
|
consola.success(t("command.dump.successMsg", {
|
|
14668
14802
|
count: rows.length,
|
|
@@ -15584,26 +15718,17 @@ function aiRoutes(config) {
|
|
|
15584
15718
|
}
|
|
15585
15719
|
|
|
15586
15720
|
//#endregion
|
|
15587
|
-
//#region src/
|
|
15721
|
+
//#region src/core/data-service.ts
|
|
15588
15722
|
const FILE_REGEX = /\.json$/;
|
|
15589
15723
|
const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
|
|
15590
15724
|
const INTERNAL_ROWID_COLUMN = "__aiex_rowid";
|
|
15591
15725
|
const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
|
|
15592
15726
|
const TIMESTAMP_TZ = /(\d{3})Z/;
|
|
15593
|
-
|
|
15594
|
-
const
|
|
15595
|
-
const
|
|
15596
|
-
|
|
15597
|
-
|
|
15598
|
-
search: z.string().catch(""),
|
|
15599
|
-
sortField: z.string().optional(),
|
|
15600
|
-
sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc")),
|
|
15601
|
-
all: z.preprocess((value) => value === "true" || value === true, z.boolean().catch(false))
|
|
15602
|
-
});
|
|
15603
|
-
function invalidParamResponse$1(message) {
|
|
15604
|
-
return (result, c) => {
|
|
15605
|
-
if (!result.success) return c.json({ error: message }, 400);
|
|
15606
|
-
};
|
|
15727
|
+
function schemaNameFromExtractionFile(name$1) {
|
|
15728
|
+
const stem = name$1.replace(FILE_REGEX, "");
|
|
15729
|
+
const match = stem.match(EXTRACTION_TIMESTAMP_RE);
|
|
15730
|
+
if (!match || typeof match.index !== "number" || match.index <= 0) return null;
|
|
15731
|
+
return stem.slice(0, match.index);
|
|
15607
15732
|
}
|
|
15608
15733
|
function getAuditNotionStatus(record) {
|
|
15609
15734
|
if (record.notionPages?.length) return "synced";
|
|
@@ -15630,50 +15755,233 @@ async function getRowExtractionActions(aiexDir, tableName) {
|
|
|
15630
15755
|
}
|
|
15631
15756
|
return actions;
|
|
15632
15757
|
}
|
|
15633
|
-
function schemaNameFromExtractionFile(name$1) {
|
|
15634
|
-
const stem = name$1.replace(FILE_REGEX, "");
|
|
15635
|
-
const match = stem.match(EXTRACTION_TIMESTAMP_RE);
|
|
15636
|
-
if (!match || typeof match.index !== "number" || match.index <= 0) return null;
|
|
15637
|
-
return stem.slice(0, match.index);
|
|
15638
|
-
}
|
|
15639
15758
|
function createReadonlyQueryDb(databasePath) {
|
|
15640
15759
|
return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
|
|
15641
15760
|
}
|
|
15761
|
+
async function listExtractions(config) {
|
|
15762
|
+
const aiexDir = path.dirname(config.schemaPath);
|
|
15763
|
+
const extractedDir = path.join(aiexDir, "extracted");
|
|
15764
|
+
await fs.mkdir(extractedDir, { recursive: true });
|
|
15765
|
+
const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
|
|
15766
|
+
const auditRecords = await listExtractionAuditRecords(aiexDir);
|
|
15767
|
+
const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
|
|
15768
|
+
const records = [];
|
|
15769
|
+
for (const file of jsonFiles) {
|
|
15770
|
+
const schemaName = schemaNameFromExtractionFile(file);
|
|
15771
|
+
if (!schemaName) continue;
|
|
15772
|
+
const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
|
|
15773
|
+
const filePath = path.join(extractedDir, file);
|
|
15774
|
+
try {
|
|
15775
|
+
const stat = await fs.stat(filePath);
|
|
15776
|
+
const audit = auditByOutputName.get(file);
|
|
15777
|
+
const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
|
|
15778
|
+
records.push({
|
|
15779
|
+
name: file,
|
|
15780
|
+
schemaName,
|
|
15781
|
+
timestamp,
|
|
15782
|
+
fileSize: stat.size,
|
|
15783
|
+
modifiedAt: stat.mtime.toISOString(),
|
|
15784
|
+
notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
|
|
15785
|
+
notionPages,
|
|
15786
|
+
notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
|
|
15787
|
+
});
|
|
15788
|
+
} catch {
|
|
15789
|
+
continue;
|
|
15790
|
+
}
|
|
15791
|
+
}
|
|
15792
|
+
records.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
15793
|
+
return records;
|
|
15794
|
+
}
|
|
15795
|
+
async function listTables(config) {
|
|
15796
|
+
const schemaDir = config.schemaPath;
|
|
15797
|
+
let schemaFiles = [];
|
|
15798
|
+
try {
|
|
15799
|
+
schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
|
|
15800
|
+
} catch {
|
|
15801
|
+
schemaFiles = [];
|
|
15802
|
+
}
|
|
15803
|
+
let db = null;
|
|
15804
|
+
let dbTables = [];
|
|
15805
|
+
try {
|
|
15806
|
+
db = createReadonlyQueryDb(config.databasePath);
|
|
15807
|
+
dbTables = (await sql`
|
|
15808
|
+
select name
|
|
15809
|
+
from sqlite_master
|
|
15810
|
+
where type = 'table' and name not like 'sqlite_%' and name not like '_%'
|
|
15811
|
+
order by name
|
|
15812
|
+
`.execute(db)).rows.map((row) => row.name);
|
|
15813
|
+
} catch {} finally {
|
|
15814
|
+
await db?.destroy();
|
|
15815
|
+
}
|
|
15816
|
+
const tables = [];
|
|
15817
|
+
for (const file of schemaFiles) try {
|
|
15818
|
+
const schema = await readFile(path.join(schemaDir, file));
|
|
15819
|
+
const tableName = schema.table?.name;
|
|
15820
|
+
if (!tableName) continue;
|
|
15821
|
+
tables.push({
|
|
15822
|
+
name: tableName,
|
|
15823
|
+
title: schema.title || tableName,
|
|
15824
|
+
hasData: dbTables.includes(tableName)
|
|
15825
|
+
});
|
|
15826
|
+
} catch {
|
|
15827
|
+
continue;
|
|
15828
|
+
}
|
|
15829
|
+
return tables;
|
|
15830
|
+
}
|
|
15831
|
+
async function getTableData(config, tableName, query) {
|
|
15832
|
+
const { page, pageSize, search, sortField, sortOrder, all } = query;
|
|
15833
|
+
const aiexDir = path.dirname(config.schemaPath);
|
|
15834
|
+
let db;
|
|
15835
|
+
try {
|
|
15836
|
+
db = createReadonlyQueryDb(config.databasePath);
|
|
15837
|
+
} catch {
|
|
15838
|
+
throw new Error(t("server.dbNotFound"));
|
|
15839
|
+
}
|
|
15840
|
+
try {
|
|
15841
|
+
if ((await sql`
|
|
15842
|
+
select name
|
|
15843
|
+
from sqlite_master
|
|
15844
|
+
where type = 'table' and name = ${tableName}
|
|
15845
|
+
`.execute(db)).rows.length === 0) throw new Error(t("server.tableNotFound", { name: tableName }));
|
|
15846
|
+
const columns = (await sql`
|
|
15847
|
+
pragma table_info(${sql.table(tableName)})
|
|
15848
|
+
`.execute(db)).rows.map((col) => ({
|
|
15849
|
+
name: col.name,
|
|
15850
|
+
type: col.type,
|
|
15851
|
+
notNull: !!col.notnull,
|
|
15852
|
+
pk: !!col.pk
|
|
15853
|
+
}));
|
|
15854
|
+
const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
|
|
15855
|
+
const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
|
|
15856
|
+
const sortColumn = columns.find((col) => col.name === sortField);
|
|
15857
|
+
const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
|
|
15858
|
+
const total = (await sql`
|
|
15859
|
+
select count(*) as count
|
|
15860
|
+
from ${sql.table(tableName)}
|
|
15861
|
+
${searchCondition}
|
|
15862
|
+
`.execute(db)).rows[0]?.count ?? 0;
|
|
15863
|
+
const offset = (page - 1) * pageSize;
|
|
15864
|
+
const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
|
|
15865
|
+
const result = all ? await sql`
|
|
15866
|
+
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
15867
|
+
from ${sql.table(tableName)}
|
|
15868
|
+
${searchCondition}
|
|
15869
|
+
${orderBy}
|
|
15870
|
+
`.execute(db) : await sql`
|
|
15871
|
+
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
15872
|
+
from ${sql.table(tableName)}
|
|
15873
|
+
${searchCondition}
|
|
15874
|
+
${orderBy}
|
|
15875
|
+
limit ${pageSize}
|
|
15876
|
+
offset ${offset}
|
|
15877
|
+
`.execute(db);
|
|
15878
|
+
const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
|
|
15879
|
+
const rowActions = Object.fromEntries(result.rows.map((row, index) => {
|
|
15880
|
+
const rowId = row[INTERNAL_ROWID_COLUMN];
|
|
15881
|
+
const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
|
|
15882
|
+
return action ? [String(index), action] : null;
|
|
15883
|
+
}).filter((entry) => !!entry));
|
|
15884
|
+
const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
|
|
15885
|
+
const schemaDir = config.schemaPath;
|
|
15886
|
+
let schema = null;
|
|
15887
|
+
try {
|
|
15888
|
+
const schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
|
|
15889
|
+
for (const file of schemaFiles) {
|
|
15890
|
+
const s = await readFile(path.join(schemaDir, file));
|
|
15891
|
+
if (s.table?.name === tableName) {
|
|
15892
|
+
schema = s;
|
|
15893
|
+
break;
|
|
15894
|
+
}
|
|
15895
|
+
}
|
|
15896
|
+
} catch {}
|
|
15897
|
+
return {
|
|
15898
|
+
columns,
|
|
15899
|
+
rows,
|
|
15900
|
+
rowActions,
|
|
15901
|
+
total,
|
|
15902
|
+
page: all ? 1 : page,
|
|
15903
|
+
pageSize: all ? total : pageSize,
|
|
15904
|
+
totalPages,
|
|
15905
|
+
schema
|
|
15906
|
+
};
|
|
15907
|
+
} finally {
|
|
15908
|
+
await db.destroy();
|
|
15909
|
+
}
|
|
15910
|
+
}
|
|
15911
|
+
async function retryNotionSync(config, fileName) {
|
|
15912
|
+
const aiexDir = path.dirname(config.schemaPath);
|
|
15913
|
+
const extractedDir = path.join(aiexDir, "extracted");
|
|
15914
|
+
const filePath = path.join(extractedDir, fileName);
|
|
15915
|
+
const schemaName = schemaNameFromExtractionFile(fileName);
|
|
15916
|
+
if (!schemaName) throw new Error(t("server.cannotInferSchema"));
|
|
15917
|
+
const aiConfig = await readAIConfig(aiexDir);
|
|
15918
|
+
if (!aiConfig?.notion?.enabled) throw new Error(t("errors.notion.notEnabled"));
|
|
15919
|
+
if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) throw new Error(t("errors.notion.noSchemaConfig", { name: schemaName }));
|
|
15920
|
+
try {
|
|
15921
|
+
const data = await readFile(filePath);
|
|
15922
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error(t("errors.ai.extractionNotObject"));
|
|
15923
|
+
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
15924
|
+
const notionPages = [{
|
|
15925
|
+
databaseId: page.databaseId,
|
|
15926
|
+
pageId: page.pageId
|
|
15927
|
+
}];
|
|
15928
|
+
let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === fileName);
|
|
15929
|
+
if (!record) record = await createExtractionAuditRecord(aiexDir, {
|
|
15930
|
+
schemaName,
|
|
15931
|
+
source: {
|
|
15932
|
+
type: "file",
|
|
15933
|
+
filePath,
|
|
15934
|
+
fileName
|
|
15935
|
+
}
|
|
15936
|
+
});
|
|
15937
|
+
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15938
|
+
status: "succeeded",
|
|
15939
|
+
outputPath: filePath,
|
|
15940
|
+
outputName: fileName,
|
|
15941
|
+
notionPages,
|
|
15942
|
+
error: void 0
|
|
15943
|
+
});
|
|
15944
|
+
return {
|
|
15945
|
+
success: true,
|
|
15946
|
+
notionPages
|
|
15947
|
+
};
|
|
15948
|
+
} catch (error) {
|
|
15949
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
15950
|
+
const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === fileName);
|
|
15951
|
+
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15952
|
+
status: "failed",
|
|
15953
|
+
outputPath: filePath,
|
|
15954
|
+
outputName: fileName,
|
|
15955
|
+
error: message
|
|
15956
|
+
});
|
|
15957
|
+
throw error;
|
|
15958
|
+
}
|
|
15959
|
+
}
|
|
15960
|
+
|
|
15961
|
+
//#endregion
|
|
15962
|
+
//#region src/server/routes/data.ts
|
|
15963
|
+
const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
|
|
15964
|
+
const extractionFileParamSchema = z.object({ name: z.string().regex(/^[\w.-]+\.json$/).refine((name$1) => name$1 === path.basename(name$1) && !name$1.includes("..")) });
|
|
15965
|
+
const tableQuerySchema = z.object({
|
|
15966
|
+
page: z.coerce.number().int().min(1).catch(1),
|
|
15967
|
+
pageSize: z.coerce.number().int().min(1).max(500).catch(50),
|
|
15968
|
+
search: z.string().catch(""),
|
|
15969
|
+
sortField: z.string().optional(),
|
|
15970
|
+
sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc")),
|
|
15971
|
+
all: z.preprocess((value) => value === "true" || value === true, z.boolean().catch(false))
|
|
15972
|
+
});
|
|
15973
|
+
function invalidParamResponse$1(message) {
|
|
15974
|
+
return (result, c) => {
|
|
15975
|
+
if (!result.success) return c.json({ error: message }, 400);
|
|
15976
|
+
};
|
|
15977
|
+
}
|
|
15642
15978
|
function dataRoutes(config) {
|
|
15643
15979
|
const app = new Hono();
|
|
15644
15980
|
const aiexDir = path.dirname(config.schemaPath);
|
|
15645
15981
|
const extractedDir = path.join(aiexDir, "extracted");
|
|
15646
15982
|
app.get("/data", async (c) => {
|
|
15647
15983
|
try {
|
|
15648
|
-
|
|
15649
|
-
const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
|
|
15650
|
-
const auditRecords = await listExtractionAuditRecords(aiexDir);
|
|
15651
|
-
const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
|
|
15652
|
-
const records = [];
|
|
15653
|
-
for (const file of jsonFiles) {
|
|
15654
|
-
const schemaName = schemaNameFromExtractionFile(file);
|
|
15655
|
-
if (!schemaName) continue;
|
|
15656
|
-
const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
|
|
15657
|
-
const filePath = path.join(extractedDir, file);
|
|
15658
|
-
try {
|
|
15659
|
-
const stat = await fs.stat(filePath);
|
|
15660
|
-
const audit = auditByOutputName.get(file);
|
|
15661
|
-
const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
|
|
15662
|
-
records.push({
|
|
15663
|
-
name: file,
|
|
15664
|
-
schemaName,
|
|
15665
|
-
timestamp,
|
|
15666
|
-
fileSize: stat.size,
|
|
15667
|
-
modifiedAt: stat.mtime.toISOString(),
|
|
15668
|
-
notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
|
|
15669
|
-
notionPages,
|
|
15670
|
-
notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
|
|
15671
|
-
});
|
|
15672
|
-
} catch {
|
|
15673
|
-
continue;
|
|
15674
|
-
}
|
|
15675
|
-
}
|
|
15676
|
-
records.sort((a, b) => b.timestamp.localeCompare(a.timestamp));
|
|
15984
|
+
const records = await listExtractions(config);
|
|
15677
15985
|
return c.json(records);
|
|
15678
15986
|
} catch (error) {
|
|
15679
15987
|
return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
|
|
@@ -15681,39 +15989,7 @@ function dataRoutes(config) {
|
|
|
15681
15989
|
});
|
|
15682
15990
|
app.get("/data/tables", async (c) => {
|
|
15683
15991
|
try {
|
|
15684
|
-
const
|
|
15685
|
-
let schemaFiles = [];
|
|
15686
|
-
try {
|
|
15687
|
-
schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
|
|
15688
|
-
} catch {
|
|
15689
|
-
schemaFiles = [];
|
|
15690
|
-
}
|
|
15691
|
-
let db = null;
|
|
15692
|
-
let dbTables = [];
|
|
15693
|
-
try {
|
|
15694
|
-
db = createReadonlyQueryDb(config.databasePath);
|
|
15695
|
-
dbTables = (await sql`
|
|
15696
|
-
select name
|
|
15697
|
-
from sqlite_master
|
|
15698
|
-
where type = 'table' and name not like 'sqlite_%' and name not like '_%'
|
|
15699
|
-
order by name
|
|
15700
|
-
`.execute(db)).rows.map((row) => row.name);
|
|
15701
|
-
} catch {} finally {
|
|
15702
|
-
await db?.destroy();
|
|
15703
|
-
}
|
|
15704
|
-
const tables = [];
|
|
15705
|
-
for (const file of schemaFiles) try {
|
|
15706
|
-
const schema = await readFile(path.join(schemaDir, file));
|
|
15707
|
-
const tableName = schema.table?.name;
|
|
15708
|
-
if (!tableName) continue;
|
|
15709
|
-
tables.push({
|
|
15710
|
-
name: tableName,
|
|
15711
|
-
title: schema.title || tableName,
|
|
15712
|
-
hasData: dbTables.includes(tableName)
|
|
15713
|
-
});
|
|
15714
|
-
} catch {
|
|
15715
|
-
continue;
|
|
15716
|
-
}
|
|
15992
|
+
const tables = await listTables(config);
|
|
15717
15993
|
return c.json(tables);
|
|
15718
15994
|
} catch (error) {
|
|
15719
15995
|
return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
|
|
@@ -15721,84 +15997,14 @@ function dataRoutes(config) {
|
|
|
15721
15997
|
});
|
|
15722
15998
|
app.get("/data/tables/:name", zValidator("param", tableParamSchema, invalidParamResponse$1(t("server.invalidTableName"))), zValidator("query", tableQuerySchema), async (c) => {
|
|
15723
15999
|
const { name: tableName } = c.req.valid("param");
|
|
15724
|
-
const
|
|
15725
|
-
let db;
|
|
16000
|
+
const query = c.req.valid("query");
|
|
15726
16001
|
try {
|
|
15727
|
-
|
|
15728
|
-
|
|
15729
|
-
return c.json({ error: t("server.dbNotFound") }, 400);
|
|
15730
|
-
}
|
|
15731
|
-
try {
|
|
15732
|
-
if ((await sql`
|
|
15733
|
-
select name
|
|
15734
|
-
from sqlite_master
|
|
15735
|
-
where type = 'table' and name = ${tableName}
|
|
15736
|
-
`.execute(db)).rows.length === 0) return c.json({ error: t("server.tableNotFound", { name: tableName }) }, 404);
|
|
15737
|
-
const columns = (await sql`
|
|
15738
|
-
pragma table_info(${sql.table(tableName)})
|
|
15739
|
-
`.execute(db)).rows.map((col) => ({
|
|
15740
|
-
name: col.name,
|
|
15741
|
-
type: col.type,
|
|
15742
|
-
notNull: !!col.notnull,
|
|
15743
|
-
pk: !!col.pk
|
|
15744
|
-
}));
|
|
15745
|
-
const searchConditions = columns.map((col) => sql`${sql.ref(col.name)} like ${`%${search}%`}`);
|
|
15746
|
-
const searchCondition = search ? sql`where ${sql.join(searchConditions, sql` or `)}` : sql``;
|
|
15747
|
-
const sortColumn = columns.find((col) => col.name === sortField);
|
|
15748
|
-
const orderBy = sortColumn ? sql`order by ${sql.ref(sortColumn.name)} ${sql.raw(sortOrder === "desc" ? "desc" : "asc")}` : sql``;
|
|
15749
|
-
const total = (await sql`
|
|
15750
|
-
select count(*) as count
|
|
15751
|
-
from ${sql.table(tableName)}
|
|
15752
|
-
${searchCondition}
|
|
15753
|
-
`.execute(db)).rows[0]?.count ?? 0;
|
|
15754
|
-
const offset = (page - 1) * pageSize;
|
|
15755
|
-
const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
|
|
15756
|
-
const result = all ? await sql`
|
|
15757
|
-
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
15758
|
-
from ${sql.table(tableName)}
|
|
15759
|
-
${searchCondition}
|
|
15760
|
-
${orderBy}
|
|
15761
|
-
`.execute(db) : await sql`
|
|
15762
|
-
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
15763
|
-
from ${sql.table(tableName)}
|
|
15764
|
-
${searchCondition}
|
|
15765
|
-
${orderBy}
|
|
15766
|
-
limit ${pageSize}
|
|
15767
|
-
offset ${offset}
|
|
15768
|
-
`.execute(db);
|
|
15769
|
-
const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
|
|
15770
|
-
const rowActions = Object.fromEntries(result.rows.map((row, index) => {
|
|
15771
|
-
const rowId = row[INTERNAL_ROWID_COLUMN];
|
|
15772
|
-
const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
|
|
15773
|
-
return action ? [String(index), action] : null;
|
|
15774
|
-
}).filter((entry) => !!entry));
|
|
15775
|
-
const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
|
|
15776
|
-
const schemaDir = config.schemaPath;
|
|
15777
|
-
let schema = null;
|
|
15778
|
-
try {
|
|
15779
|
-
const schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
|
|
15780
|
-
for (const file of schemaFiles) {
|
|
15781
|
-
const s = await readFile(path.join(schemaDir, file));
|
|
15782
|
-
if (s.table?.name === tableName) {
|
|
15783
|
-
schema = s;
|
|
15784
|
-
break;
|
|
15785
|
-
}
|
|
15786
|
-
}
|
|
15787
|
-
} catch {}
|
|
15788
|
-
return c.json({
|
|
15789
|
-
columns,
|
|
15790
|
-
rows,
|
|
15791
|
-
rowActions,
|
|
15792
|
-
total,
|
|
15793
|
-
page: all ? 1 : page,
|
|
15794
|
-
pageSize: all ? total : pageSize,
|
|
15795
|
-
totalPages,
|
|
15796
|
-
schema
|
|
15797
|
-
});
|
|
16002
|
+
const result = await getTableData(config, tableName, query);
|
|
16003
|
+
return c.json(result);
|
|
15798
16004
|
} catch (error) {
|
|
15799
|
-
|
|
15800
|
-
|
|
15801
|
-
|
|
16005
|
+
const errMessage = error instanceof Error ? error.message : String(error);
|
|
16006
|
+
const status = errMessage.includes("not found") ? 404 : 500;
|
|
16007
|
+
return c.json({ error: errMessage }, status);
|
|
15802
16008
|
}
|
|
15803
16009
|
});
|
|
15804
16010
|
app.get("/data/:name", zValidator("param", extractionFileParamSchema, invalidParamResponse$1(t("server.invalidFileName"))), async (c) => {
|
|
@@ -15817,61 +16023,15 @@ function dataRoutes(config) {
|
|
|
15817
16023
|
});
|
|
15818
16024
|
app.post("/data/:name/notion/retry", zValidator("param", extractionFileParamSchema, invalidParamResponse$1(t("server.invalidFileName"))), async (c) => {
|
|
15819
16025
|
const { name: name$1 } = c.req.valid("param");
|
|
15820
|
-
|
|
15821
|
-
const schemaName = schemaNameFromExtractionFile(name$1);
|
|
15822
|
-
if (!schemaName) return c.json({
|
|
16026
|
+
if (!schemaNameFromExtractionFile(name$1)) return c.json({
|
|
15823
16027
|
success: false,
|
|
15824
16028
|
error: t("server.cannotInferSchema")
|
|
15825
16029
|
}, 400);
|
|
15826
|
-
const aiConfig = await readAIConfig(aiexDir);
|
|
15827
|
-
if (!aiConfig?.notion?.enabled) return c.json({
|
|
15828
|
-
success: false,
|
|
15829
|
-
error: t("errors.notion.notEnabled")
|
|
15830
|
-
}, 400);
|
|
15831
|
-
if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) return c.json({
|
|
15832
|
-
success: false,
|
|
15833
|
-
error: t("errors.notion.noSchemaConfig", { name: schemaName })
|
|
15834
|
-
}, 400);
|
|
15835
16030
|
try {
|
|
15836
|
-
const
|
|
15837
|
-
|
|
15838
|
-
success: false,
|
|
15839
|
-
error: t("errors.ai.extractionNotObject")
|
|
15840
|
-
}, 400);
|
|
15841
|
-
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
15842
|
-
const notionPages = [{
|
|
15843
|
-
databaseId: page.databaseId,
|
|
15844
|
-
pageId: page.pageId
|
|
15845
|
-
}];
|
|
15846
|
-
let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
|
|
15847
|
-
if (!record) record = await createExtractionAuditRecord(aiexDir, {
|
|
15848
|
-
schemaName,
|
|
15849
|
-
source: {
|
|
15850
|
-
type: "file",
|
|
15851
|
-
filePath,
|
|
15852
|
-
fileName: name$1
|
|
15853
|
-
}
|
|
15854
|
-
});
|
|
15855
|
-
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15856
|
-
status: "succeeded",
|
|
15857
|
-
outputPath: filePath,
|
|
15858
|
-
outputName: name$1,
|
|
15859
|
-
notionPages,
|
|
15860
|
-
error: void 0
|
|
15861
|
-
});
|
|
15862
|
-
return c.json({
|
|
15863
|
-
success: true,
|
|
15864
|
-
notionPages
|
|
15865
|
-
});
|
|
16031
|
+
const result = await retryNotionSync(config, name$1);
|
|
16032
|
+
return c.json(result);
|
|
15866
16033
|
} catch (error) {
|
|
15867
16034
|
const message = error instanceof Error ? error.message : String(error);
|
|
15868
|
-
const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
|
|
15869
|
-
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15870
|
-
status: "failed",
|
|
15871
|
-
outputPath: filePath,
|
|
15872
|
-
outputName: name$1,
|
|
15873
|
-
error: message
|
|
15874
|
-
});
|
|
15875
16035
|
return c.json({
|
|
15876
16036
|
success: false,
|
|
15877
16037
|
error: message
|