aiex-cli 0.0.3-beta.4 → 0.0.3-beta.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -16
- package/dist/cli.mjs +754 -316
- package/dist/{doctor-collector-nMMG_h-w.mjs → doctor-collector-DnH1Qu1e.mjs} +26 -4
- package/dist/index.d.mts +43 -0
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-D6EpB8tt.js +272 -0
- package/dist/web/assets/DataBrowser-N77fBaoa.js +97 -0
- package/dist/web/assets/{ExtractionViewer-DNcRCmNK.js → ExtractionViewer-BSZycwgL.js} +1 -1
- package/dist/web/assets/{JsonSchemaEditor-D477lV5a.js → JsonSchemaEditor-DfHs5bc0.js} +15 -14
- package/dist/web/assets/{api-client-CG1VV5gz.js → api-client-BsgtGnzl.js} +1 -1
- package/dist/web/assets/{index-fSfuQz4G.js → index-CPjJbU4i.js} +3 -3
- package/dist/web/assets/index-DcTjZeUT.css +2 -0
- package/dist/web/assets/textarea-DEQMRfG8.js +522 -0
- package/dist/web/index.html +3 -3
- package/package.json +3 -1
- package/dist/web/assets/AISettings-YW-fn5h5.js +0 -346
- package/dist/web/assets/DataBrowser-Dzdc0ESt.js +0 -6
- package/dist/web/assets/index-BHM3EpP-.css +0 -2
- package/dist/web/assets/select-BGex2SPs.js +0 -439
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { C as
|
|
1
|
+
import { C as name, D as doctorDiagnosticsTableRows, O as formatDoctorDiagnosticsJson, S as description, T as version, _ as PLACEHOLDER_SCHEMA, a as parseJsonSchema, b as createConfig, c as recognizeImageText, d as readAIConfig, f as writeAIConfig, g as DEFAULT_PROMPT_CONFIG, h as DEFAULT_MINERU_CONFIG, i as JsonSchemaDefinitionSchema, l as shouldUseImageOcrFallback, m as DEFAULT_MARKITDOWN_CONFIG, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_MARKER_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as getDefaultAIConfig, v as PLACEHOLDER_TEXT, w as package_default, x as seedConfig, y as AIConfigSchema } from "./doctor-collector-DnH1Qu1e.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -11,8 +11,12 @@ import { defineCommand, runMain } from "citty";
|
|
|
11
11
|
import { consola } from "consola";
|
|
12
12
|
import updateNotifier from "update-notifier";
|
|
13
13
|
import CliTable3 from "cli-table3";
|
|
14
|
+
import fs$1 from "node:fs";
|
|
14
15
|
import { intro, isCancel, outro, select, spinner, text } from "@clack/prompts";
|
|
16
|
+
import Database from "better-sqlite3";
|
|
15
17
|
import pc from "picocolors";
|
|
18
|
+
import * as XLSX from "xlsx";
|
|
19
|
+
import { glob, globSync } from "tinyglobby";
|
|
16
20
|
import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
|
|
17
21
|
import { LangfuseSpanProcessor } from "@langfuse/otel";
|
|
18
22
|
import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
|
|
@@ -20,15 +24,14 @@ import { APICallError, Output, generateText, jsonSchema } from "ai";
|
|
|
20
24
|
import mime from "mime";
|
|
21
25
|
import pRetry from "p-retry";
|
|
22
26
|
import { jsonrepair } from "jsonrepair";
|
|
23
|
-
import fs$1 from "node:fs";
|
|
24
|
-
import Database from "better-sqlite3";
|
|
25
|
-
import { glob, globSync } from "tinyglobby";
|
|
26
27
|
import { Client, extractNotionId } from "@notionhq/client";
|
|
27
28
|
import { Buffer } from "node:buffer";
|
|
28
29
|
import { execa } from "execa";
|
|
29
30
|
import { extractText, getDocumentProxy, getMeta } from "unpdf";
|
|
31
|
+
import crypto from "node:crypto";
|
|
30
32
|
import { execFile } from "node:child_process";
|
|
31
33
|
import { promisify } from "node:util";
|
|
34
|
+
import * as chokidar from "chokidar";
|
|
32
35
|
import { serve } from "@hono/node-server";
|
|
33
36
|
import open from "open";
|
|
34
37
|
import { serveStatic } from "@hono/node-server/serve-static";
|
|
@@ -12943,6 +12946,22 @@ function schemaToDescription(schema) {
|
|
|
12943
12946
|
const property = prop;
|
|
12944
12947
|
lines.push(nestedPropertyToDescription(name$1, property));
|
|
12945
12948
|
}
|
|
12949
|
+
if (schema.examples && schema.examples.length > 0) {
|
|
12950
|
+
lines.push("");
|
|
12951
|
+
lines.push("Examples / Few-shot Cases:");
|
|
12952
|
+
schema.examples.forEach((example, idx) => {
|
|
12953
|
+
lines.push("");
|
|
12954
|
+
lines.push(`Example ${idx + 1}:`);
|
|
12955
|
+
lines.push("Input text:");
|
|
12956
|
+
lines.push("\"\"\"");
|
|
12957
|
+
lines.push(example.text);
|
|
12958
|
+
lines.push("\"\"\"");
|
|
12959
|
+
lines.push("Expected JSON output:");
|
|
12960
|
+
lines.push("```json");
|
|
12961
|
+
lines.push(JSON.stringify(example.output, null, 2));
|
|
12962
|
+
lines.push("```");
|
|
12963
|
+
});
|
|
12964
|
+
}
|
|
12946
12965
|
return lines.join("\n");
|
|
12947
12966
|
}
|
|
12948
12967
|
function generateExtractionPrompt(schema, text$1, promptConfig = DEFAULT_PROMPT_CONFIG) {
|
|
@@ -13512,11 +13531,17 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
|
|
|
13512
13531
|
clearRecordCache(aiexDir);
|
|
13513
13532
|
return true;
|
|
13514
13533
|
}
|
|
13534
|
+
/**
|
|
13535
|
+
* Finds the first succeeded extraction audit record matching a schema and file hash.
|
|
13536
|
+
*/
|
|
13537
|
+
async function findSucceededAuditByHash(aiexDir, schemaName, fileHash) {
|
|
13538
|
+
return (await listExtractionAuditRecords(aiexDir)).find((r) => r.schemaName === schemaName && r.status === "succeeded" && r.source.fileHash === fileHash) || null;
|
|
13539
|
+
}
|
|
13515
13540
|
|
|
13516
13541
|
//#endregion
|
|
13517
13542
|
//#region src/core/file-constants.ts
|
|
13518
|
-
const MAX_UPLOAD_SIZE =
|
|
13519
|
-
const MAX_UPLOAD_SIZE_TEXT = "
|
|
13543
|
+
const MAX_UPLOAD_SIZE = 30 * 1024 * 1024;
|
|
13544
|
+
const MAX_UPLOAD_SIZE_TEXT = "30MB";
|
|
13520
13545
|
const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
|
|
13521
13546
|
const MISSING_UPLOAD_FILE_TEXT = "Uploaded file is no longer available. Re-run extraction with the original file.";
|
|
13522
13547
|
const SUPPORTED_MIME_TYPES = new Set([
|
|
@@ -13964,6 +13989,10 @@ function createPdfConverter(config) {
|
|
|
13964
13989
|
const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
|
|
13965
13990
|
return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
|
|
13966
13991
|
}
|
|
13992
|
+
if (config.converter === "marker") {
|
|
13993
|
+
const markerConfig = config.marker ?? DEFAULT_MARKER_CONFIG;
|
|
13994
|
+
return withFallback(new ExternalCommandPdfConverter("marker", markerConfig), markerConfig);
|
|
13995
|
+
}
|
|
13967
13996
|
if (config.converter === "external") {
|
|
13968
13997
|
if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
|
|
13969
13998
|
return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
|
|
@@ -13979,6 +14008,21 @@ function createPdfConverter(config) {
|
|
|
13979
14008
|
return instance;
|
|
13980
14009
|
}
|
|
13981
14010
|
|
|
14011
|
+
//#endregion
|
|
14012
|
+
//#region src/utils/hash.ts
|
|
14013
|
+
/**
|
|
14014
|
+
* Helper to compute SHA-256 hash of a file asynchronously.
|
|
14015
|
+
*/
|
|
14016
|
+
function getFileHash(filePath) {
|
|
14017
|
+
return new Promise((resolve, reject) => {
|
|
14018
|
+
const hash = crypto.createHash("sha256");
|
|
14019
|
+
const stream = fs$1.createReadStream(filePath);
|
|
14020
|
+
stream.on("data", (data) => hash.update(data));
|
|
14021
|
+
stream.on("end", () => resolve(hash.digest("hex")));
|
|
14022
|
+
stream.on("error", (err) => reject(err));
|
|
14023
|
+
});
|
|
14024
|
+
}
|
|
14025
|
+
|
|
13982
14026
|
//#endregion
|
|
13983
14027
|
//#region src/core/extract-runner.ts
|
|
13984
14028
|
const FILE_PART_EXTENSIONS = new Set([
|
|
@@ -13990,7 +14034,7 @@ const FILE_PART_EXTENSIONS = new Set([
|
|
|
13990
14034
|
"bmp",
|
|
13991
14035
|
"svg"
|
|
13992
14036
|
]);
|
|
13993
|
-
const SUPPORTED_EXTENSIONS = new Set([
|
|
14037
|
+
const SUPPORTED_EXTENSIONS$1 = new Set([
|
|
13994
14038
|
...FILE_PART_EXTENSIONS,
|
|
13995
14039
|
"pdf",
|
|
13996
14040
|
"txt",
|
|
@@ -14004,7 +14048,7 @@ const SUPPORTED_EXTENSIONS = new Set([
|
|
|
14004
14048
|
]);
|
|
14005
14049
|
const PDF_EXT_RE = /\.pdf$/i;
|
|
14006
14050
|
const JSON_EXT_RE$1 = /\.json$/;
|
|
14007
|
-
const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
|
|
14051
|
+
const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS$1].join(",")}}`;
|
|
14008
14052
|
async function syncResultToNotion(aiConfig, schemaName, data) {
|
|
14009
14053
|
if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
14010
14054
|
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
@@ -14043,7 +14087,7 @@ function listSupportedFiles(dir, pattern) {
|
|
|
14043
14087
|
onlyFiles: true
|
|
14044
14088
|
}).filter((file) => {
|
|
14045
14089
|
const ext = path.extname(file).toLowerCase().replace(".", "");
|
|
14046
|
-
return SUPPORTED_EXTENSIONS.has(ext);
|
|
14090
|
+
return SUPPORTED_EXTENSIONS$1.has(ext);
|
|
14047
14091
|
}).sort();
|
|
14048
14092
|
}
|
|
14049
14093
|
async function loadSchema(config, schemaName) {
|
|
@@ -14199,27 +14243,76 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
14199
14243
|
tokensUsed: result.tokensUsed
|
|
14200
14244
|
};
|
|
14201
14245
|
}
|
|
14202
|
-
async function
|
|
14246
|
+
async function runAuditedExtraction(options) {
|
|
14247
|
+
const { aiexDir, config, aiConfig, schemaName, source, modelOverride, retryOf, insert, force, quiet = false } = options;
|
|
14248
|
+
let fileHash;
|
|
14249
|
+
let isPlainTextFile = false;
|
|
14250
|
+
if (source.type === "file") {
|
|
14251
|
+
const ext = path.extname(source.filePath).toLowerCase().replace(".", "");
|
|
14252
|
+
isPlainTextFile = [
|
|
14253
|
+
"txt",
|
|
14254
|
+
"md",
|
|
14255
|
+
"csv",
|
|
14256
|
+
"json",
|
|
14257
|
+
"html",
|
|
14258
|
+
"xml",
|
|
14259
|
+
"yaml",
|
|
14260
|
+
"yml"
|
|
14261
|
+
].includes(ext);
|
|
14262
|
+
try {
|
|
14263
|
+
fileHash = await getFileHash(source.filePath);
|
|
14264
|
+
} catch (e) {
|
|
14265
|
+
if (!quiet) consola.warn(`Failed to calculate file hash for ${path.basename(source.filePath)}: ${e instanceof Error ? e.message : String(e)}`);
|
|
14266
|
+
}
|
|
14267
|
+
if (fileHash && !isPlainTextFile && !force) {
|
|
14268
|
+
const existing = await findSucceededAuditByHash(aiexDir, schemaName, fileHash);
|
|
14269
|
+
if (existing) {
|
|
14270
|
+
if (!quiet) consola.info(`File ${pc.cyan(path.basename(source.filePath))} (hash: ${fileHash.slice(0, 8)}) has already been processed successfully. Skipping.`);
|
|
14271
|
+
return {
|
|
14272
|
+
success: true,
|
|
14273
|
+
skipped: true,
|
|
14274
|
+
auditId: existing.id,
|
|
14275
|
+
fileHash,
|
|
14276
|
+
outputPath: existing.outputPath,
|
|
14277
|
+
outputName: existing.outputName,
|
|
14278
|
+
tablesInserted: existing.tablesInserted,
|
|
14279
|
+
notionPages: existing.notionPages,
|
|
14280
|
+
tokensUsed: existing.tokensUsed
|
|
14281
|
+
};
|
|
14282
|
+
}
|
|
14283
|
+
}
|
|
14284
|
+
}
|
|
14203
14285
|
const audit = await createExtractionAuditRecord(aiexDir, {
|
|
14204
14286
|
schemaName,
|
|
14205
14287
|
modelName: modelOverride?.name,
|
|
14206
|
-
source: {
|
|
14288
|
+
source: source.type === "file" ? {
|
|
14207
14289
|
type: "file",
|
|
14208
|
-
filePath,
|
|
14209
|
-
fileName: path.basename(filePath)
|
|
14210
|
-
|
|
14290
|
+
filePath: source.filePath,
|
|
14291
|
+
fileName: path.basename(source.filePath),
|
|
14292
|
+
fileHash
|
|
14293
|
+
} : {
|
|
14294
|
+
type: "text",
|
|
14295
|
+
text: source.text
|
|
14296
|
+
},
|
|
14297
|
+
retryOf
|
|
14211
14298
|
});
|
|
14212
14299
|
try {
|
|
14213
|
-
|
|
14214
|
-
|
|
14215
|
-
|
|
14216
|
-
|
|
14300
|
+
let text$1 = "";
|
|
14301
|
+
let filePath;
|
|
14302
|
+
if (source.type === "file") {
|
|
14303
|
+
const input = await readExtractFileInput(source.filePath, aiConfig, modelOverride);
|
|
14304
|
+
text$1 = input.text;
|
|
14305
|
+
filePath = input.filePath;
|
|
14306
|
+
} else text$1 = source.text;
|
|
14307
|
+
const r = await extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, {
|
|
14308
|
+
quiet,
|
|
14309
|
+
insert
|
|
14217
14310
|
});
|
|
14218
14311
|
if (r.success) {
|
|
14219
14312
|
let notionPages;
|
|
14220
14313
|
if (shouldSyncNotion(aiConfig, schemaName)) try {
|
|
14221
14314
|
notionPages = await syncResultToNotion(aiConfig, schemaName, r.data);
|
|
14222
|
-
consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14315
|
+
if (!quiet) consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14223
14316
|
} catch (error) {
|
|
14224
14317
|
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14225
14318
|
status: "failed",
|
|
@@ -14229,10 +14322,15 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
|
|
|
14229
14322
|
tokensUsed: r.tokensUsed,
|
|
14230
14323
|
error: error instanceof Error ? error.message : String(error)
|
|
14231
14324
|
});
|
|
14232
|
-
consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14233
|
-
return
|
|
14325
|
+
if (!quiet) consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14326
|
+
return {
|
|
14327
|
+
success: false,
|
|
14328
|
+
error: error instanceof Error ? error.message : String(error),
|
|
14329
|
+
auditId: audit.id,
|
|
14330
|
+
fileHash
|
|
14331
|
+
};
|
|
14234
14332
|
}
|
|
14235
|
-
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14333
|
+
const updated = await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14236
14334
|
status: "succeeded",
|
|
14237
14335
|
outputPath: r.outputPath,
|
|
14238
14336
|
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
@@ -14240,24 +14338,66 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
|
|
|
14240
14338
|
notionPages,
|
|
14241
14339
|
tokensUsed: r.tokensUsed
|
|
14242
14340
|
});
|
|
14243
|
-
|
|
14244
|
-
|
|
14341
|
+
return {
|
|
14342
|
+
success: true,
|
|
14343
|
+
outputPath: updated.outputPath,
|
|
14344
|
+
outputName: updated.outputName,
|
|
14345
|
+
tablesInserted: updated.tablesInserted,
|
|
14346
|
+
notionPages: updated.notionPages,
|
|
14347
|
+
tokensUsed: updated.tokensUsed,
|
|
14348
|
+
auditId: updated.id,
|
|
14349
|
+
fileHash
|
|
14350
|
+
};
|
|
14245
14351
|
} else {
|
|
14246
14352
|
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14247
14353
|
status: "failed",
|
|
14248
14354
|
error: r.error || "Extraction failed"
|
|
14249
14355
|
});
|
|
14250
|
-
consola.error(`Failed: ${r.error}`);
|
|
14251
|
-
return
|
|
14356
|
+
if (!quiet) consola.error(`Failed: ${r.error}`);
|
|
14357
|
+
return {
|
|
14358
|
+
success: false,
|
|
14359
|
+
error: r.error,
|
|
14360
|
+
auditId: audit.id,
|
|
14361
|
+
fileHash
|
|
14362
|
+
};
|
|
14252
14363
|
}
|
|
14253
14364
|
} catch (e) {
|
|
14254
14365
|
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14255
14366
|
status: "failed",
|
|
14256
14367
|
error: e instanceof Error ? e.message : String(e)
|
|
14257
14368
|
});
|
|
14258
|
-
|
|
14259
|
-
|
|
14369
|
+
if (!quiet) {
|
|
14370
|
+
const name$1 = source.type === "file" ? path.basename(source.filePath) : "text input";
|
|
14371
|
+
consola.error(`Error processing ${name$1}: ${e instanceof Error ? e.message : String(e)}`);
|
|
14372
|
+
}
|
|
14373
|
+
return {
|
|
14374
|
+
success: false,
|
|
14375
|
+
error: e instanceof Error ? e.message : String(e),
|
|
14376
|
+
auditId: audit.id,
|
|
14377
|
+
fileHash
|
|
14378
|
+
};
|
|
14379
|
+
}
|
|
14380
|
+
}
|
|
14381
|
+
async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
|
|
14382
|
+
const result = await runAuditedExtraction({
|
|
14383
|
+
aiexDir,
|
|
14384
|
+
config,
|
|
14385
|
+
aiConfig,
|
|
14386
|
+
schemaName,
|
|
14387
|
+
source: {
|
|
14388
|
+
type: "file",
|
|
14389
|
+
filePath
|
|
14390
|
+
},
|
|
14391
|
+
modelOverride,
|
|
14392
|
+
insert: options?.insert,
|
|
14393
|
+
force: options?.force,
|
|
14394
|
+
quiet: false
|
|
14395
|
+
});
|
|
14396
|
+
if (result.success) {
|
|
14397
|
+
if (!result.skipped) consola.success(`Processed: ${path.basename(filePath)}`);
|
|
14398
|
+
return true;
|
|
14260
14399
|
}
|
|
14400
|
+
return false;
|
|
14261
14401
|
}
|
|
14262
14402
|
async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
|
|
14263
14403
|
consola.info(`Scanning ${pc.cyan(dir)} for supported files...`);
|
|
@@ -14284,7 +14424,10 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
|
|
|
14284
14424
|
for (let i = 0; i < files.length; i++) {
|
|
14285
14425
|
const file = files[i];
|
|
14286
14426
|
consola.info(`\n[${i + 1}/${files.length}] Processing: ${pc.cyan(path.basename(file))}`);
|
|
14287
|
-
if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride,
|
|
14427
|
+
if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, {
|
|
14428
|
+
insert: options?.insert,
|
|
14429
|
+
force: options?.force
|
|
14430
|
+
})) successCount++;
|
|
14288
14431
|
else failCount++;
|
|
14289
14432
|
}
|
|
14290
14433
|
consola.info(`\nBatch complete: ${pc.green(`${successCount} succeeded`)}, ${pc.red(`${failCount} failed`)}, ${files.length} total`);
|
|
@@ -14295,6 +14438,174 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
|
|
|
14295
14438
|
};
|
|
14296
14439
|
}
|
|
14297
14440
|
|
|
14441
|
+
//#endregion
|
|
14442
|
+
//#region src/commands/export.ts
|
|
14443
|
+
const exportCommand = defineCommand({
|
|
14444
|
+
meta: {
|
|
14445
|
+
name: "export",
|
|
14446
|
+
description: "Export SQLite database table to Excel (.xlsx) or CSV (.csv)"
|
|
14447
|
+
},
|
|
14448
|
+
args: {
|
|
14449
|
+
table: {
|
|
14450
|
+
type: "string",
|
|
14451
|
+
alias: "t",
|
|
14452
|
+
description: "SQLite table name to export"
|
|
14453
|
+
},
|
|
14454
|
+
schema: {
|
|
14455
|
+
type: "string",
|
|
14456
|
+
alias: "s",
|
|
14457
|
+
description: "Schema name (without .json extension) to export"
|
|
14458
|
+
},
|
|
14459
|
+
format: {
|
|
14460
|
+
type: "string",
|
|
14461
|
+
alias: "f",
|
|
14462
|
+
description: "Export format: csv or xlsx (default: inferred from output or csv)"
|
|
14463
|
+
},
|
|
14464
|
+
output: {
|
|
14465
|
+
type: "string",
|
|
14466
|
+
alias: "o",
|
|
14467
|
+
description: "Output file path (default: ./<tableName>.<format>)"
|
|
14468
|
+
}
|
|
14469
|
+
},
|
|
14470
|
+
async run({ args }) {
|
|
14471
|
+
intro(pc.inverse(" aiex export "));
|
|
14472
|
+
if (!args.table && !args.schema) {
|
|
14473
|
+
failCommand("Either table name (--table / -t) or schema name (--schema / -s) is required");
|
|
14474
|
+
return;
|
|
14475
|
+
}
|
|
14476
|
+
const cwd = process.cwd();
|
|
14477
|
+
const config = createMigrationConfig(cwd);
|
|
14478
|
+
const schemaDir = config.schemaPath;
|
|
14479
|
+
let tableName = args.table || "";
|
|
14480
|
+
let schema = null;
|
|
14481
|
+
if (args.schema) {
|
|
14482
|
+
const schemaLoad = await loadSchema(config, args.schema);
|
|
14483
|
+
if (!schemaLoad.schema) {
|
|
14484
|
+
failCommand(schemaLoad.error || `Schema file for "${args.schema}" not found`);
|
|
14485
|
+
return;
|
|
14486
|
+
}
|
|
14487
|
+
schema = schemaLoad.schema;
|
|
14488
|
+
const tName = schema.table?.name;
|
|
14489
|
+
if (!tName) {
|
|
14490
|
+
failCommand(`Schema "${args.schema}" does not define a database table name.`);
|
|
14491
|
+
return;
|
|
14492
|
+
}
|
|
14493
|
+
if (tableName && tableName !== tName) {
|
|
14494
|
+
failCommand(`Specified table name "${tableName}" does not match schema table name "${tName}"`);
|
|
14495
|
+
return;
|
|
14496
|
+
}
|
|
14497
|
+
tableName = tName;
|
|
14498
|
+
} else try {
|
|
14499
|
+
if (fs$1.existsSync(schemaDir)) {
|
|
14500
|
+
const files = fs$1.readdirSync(schemaDir).filter((f) => f.endsWith(".json"));
|
|
14501
|
+
for (const file of files) {
|
|
14502
|
+
const s$1 = await readFile(path.join(schemaDir, file));
|
|
14503
|
+
if (s$1.table?.name === tableName) {
|
|
14504
|
+
schema = s$1;
|
|
14505
|
+
break;
|
|
14506
|
+
}
|
|
14507
|
+
}
|
|
14508
|
+
}
|
|
14509
|
+
} catch {}
|
|
14510
|
+
let format = args.format?.toLowerCase();
|
|
14511
|
+
const outputPathArg = args.output;
|
|
14512
|
+
if (outputPathArg) {
|
|
14513
|
+
const ext = path.extname(outputPathArg).toLowerCase();
|
|
14514
|
+
if (!format) {
|
|
14515
|
+
if (ext === ".xlsx") format = "xlsx";
|
|
14516
|
+
else if (ext === ".csv") format = "csv";
|
|
14517
|
+
}
|
|
14518
|
+
}
|
|
14519
|
+
if (!format) format = "csv";
|
|
14520
|
+
if (format !== "csv" && format !== "xlsx") {
|
|
14521
|
+
failCommand(`Unsupported export format: "${format}". Supported formats: csv, xlsx`);
|
|
14522
|
+
return;
|
|
14523
|
+
}
|
|
14524
|
+
const resolvedOutput = outputPathArg ? path.resolve(outputPathArg) : path.resolve(cwd, `${tableName}.${format}`);
|
|
14525
|
+
if (!fs$1.existsSync(config.databasePath)) {
|
|
14526
|
+
failCommand(`Database file not found at ${config.databasePath}. Please run "aiex schema" to create the database first.`);
|
|
14527
|
+
return;
|
|
14528
|
+
}
|
|
14529
|
+
const s = spinner();
|
|
14530
|
+
s.start(`Loading data from table "${tableName}"...`);
|
|
14531
|
+
let columns = [];
|
|
14532
|
+
let rows = [];
|
|
14533
|
+
try {
|
|
14534
|
+
const db = new Database(config.databasePath, { readonly: true });
|
|
14535
|
+
if (!db.prepare(`
|
|
14536
|
+
select name from sqlite_master
|
|
14537
|
+
where type = 'table' and name = ?
|
|
14538
|
+
`).get(tableName)) {
|
|
14539
|
+
s.stop("Database query failed");
|
|
14540
|
+
failCommand(`Table "${tableName}" not found in database. Run "aiex schema" first to migrate.`);
|
|
14541
|
+
db.close();
|
|
14542
|
+
return;
|
|
14543
|
+
}
|
|
14544
|
+
columns = db.pragma(`table_info(${tableName})`);
|
|
14545
|
+
rows = db.prepare(`select * from ${tableName}`).all();
|
|
14546
|
+
db.close();
|
|
14547
|
+
} catch (error) {
|
|
14548
|
+
s.stop("Database query failed");
|
|
14549
|
+
failCommand(error instanceof Error ? error.message : String(error));
|
|
14550
|
+
return;
|
|
14551
|
+
}
|
|
14552
|
+
if (rows.length === 0) {
|
|
14553
|
+
s.stop("Empty table");
|
|
14554
|
+
consola.warn(`Table "${tableName}" is empty. Exporting empty file...`);
|
|
14555
|
+
} else s.stop(`Loaded ${rows.length} row(s)`);
|
|
14556
|
+
const s2 = spinner();
|
|
14557
|
+
s2.start("Formatting data...");
|
|
14558
|
+
const formattedRows = rows.map((row) => {
|
|
14559
|
+
const newRow = {};
|
|
14560
|
+
columns.forEach((col) => {
|
|
14561
|
+
const colName = col.name;
|
|
14562
|
+
const val = row[colName];
|
|
14563
|
+
const type = (schema?.properties?.[colName])?.type || "";
|
|
14564
|
+
if (val === null || val === void 0) newRow[colName] = "";
|
|
14565
|
+
else if (type === "boolean") if (format === "xlsx") newRow[colName] = val === 1 || val === "1" || val === true;
|
|
14566
|
+
else newRow[colName] = val === 1 || val === "1" || val === true ? "true" : "false";
|
|
14567
|
+
else if (type === "number" || type === "integer") if (val === "") newRow[colName] = "";
|
|
14568
|
+
else {
|
|
14569
|
+
const num = Number(val);
|
|
14570
|
+
newRow[colName] = Number.isNaN(num) ? val : num;
|
|
14571
|
+
}
|
|
14572
|
+
else if (typeof val === "object") newRow[colName] = JSON.stringify(val);
|
|
14573
|
+
else {
|
|
14574
|
+
const dbType = (col.type || "").toLowerCase();
|
|
14575
|
+
if ((dbType.includes("int") || dbType.includes("real") || dbType.includes("num") || dbType.includes("double") || dbType.includes("float")) && typeof val === "string" && val !== "") {
|
|
14576
|
+
const num = Number(val);
|
|
14577
|
+
newRow[colName] = Number.isNaN(num) ? val : num;
|
|
14578
|
+
} else newRow[colName] = val;
|
|
14579
|
+
}
|
|
14580
|
+
});
|
|
14581
|
+
return newRow;
|
|
14582
|
+
});
|
|
14583
|
+
s2.stop("Data formatted");
|
|
14584
|
+
const s3 = spinner();
|
|
14585
|
+
s3.start(`Writing ${format.toUpperCase()} file to ${resolvedOutput}...`);
|
|
14586
|
+
try {
|
|
14587
|
+
const ws = XLSX.utils.json_to_sheet(formattedRows, { header: columns.map((col) => col.name) });
|
|
14588
|
+
const outputDir = path.dirname(resolvedOutput);
|
|
14589
|
+
if (!fs$1.existsSync(outputDir)) fs$1.mkdirSync(outputDir, { recursive: true });
|
|
14590
|
+
if (format === "xlsx") {
|
|
14591
|
+
const wb = XLSX.utils.book_new();
|
|
14592
|
+
XLSX.utils.book_append_sheet(wb, ws, tableName.slice(0, 31));
|
|
14593
|
+
XLSX.writeFile(wb, resolvedOutput);
|
|
14594
|
+
} else {
|
|
14595
|
+
const csv = XLSX.utils.sheet_to_csv(ws);
|
|
14596
|
+
fs$1.writeFileSync(resolvedOutput, "" + csv, "utf8");
|
|
14597
|
+
}
|
|
14598
|
+
s3.stop("Export completed successfully");
|
|
14599
|
+
consola.success(`Successfully exported ${rows.length} row(s) to ${pc.cyan(resolvedOutput)}`);
|
|
14600
|
+
} catch (error) {
|
|
14601
|
+
s3.stop("File write failed");
|
|
14602
|
+
failCommand(error instanceof Error ? error.message : String(error));
|
|
14603
|
+
return;
|
|
14604
|
+
}
|
|
14605
|
+
outro("Done!");
|
|
14606
|
+
}
|
|
14607
|
+
});
|
|
14608
|
+
|
|
14298
14609
|
//#endregion
|
|
14299
14610
|
//#region src/commands/extract.ts
|
|
14300
14611
|
function getIdArg(args) {
|
|
@@ -14312,7 +14623,7 @@ function isExtractSubCommand(rawArgs) {
|
|
|
14312
14623
|
].includes(arg));
|
|
14313
14624
|
}
|
|
14314
14625
|
function formatSource(source) {
|
|
14315
|
-
return source.type === "file" ? source.fileName || "file" : "
|
|
14626
|
+
return source.type === "file" ? source.fileName || "file" : "unknown";
|
|
14316
14627
|
}
|
|
14317
14628
|
async function loadConfiguredAI(aiexDir) {
|
|
14318
14629
|
const aiConfig = await readAIConfig(aiexDir);
|
|
@@ -14339,52 +14650,6 @@ function resolveModelOverride(aiConfig, modelName) {
|
|
|
14339
14650
|
}
|
|
14340
14651
|
return matched;
|
|
14341
14652
|
}
|
|
14342
|
-
async function runAuditedSingleExtraction(input) {
|
|
14343
|
-
const audit = await createExtractionAuditRecord(input.aiexDir, {
|
|
14344
|
-
schemaName: input.schemaName,
|
|
14345
|
-
modelName: input.modelOverride?.name,
|
|
14346
|
-
source: input.source,
|
|
14347
|
-
retryOf: input.retryOf
|
|
14348
|
-
});
|
|
14349
|
-
const result = await extractSingle(input.aiexDir, input.config, input.aiConfig, input.schemaName, input.text, input.filePath, input.modelOverride, { insert: input.insert });
|
|
14350
|
-
if (!result.success) {
|
|
14351
|
-
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14352
|
-
status: "failed",
|
|
14353
|
-
error: result.error || "Extraction failed"
|
|
14354
|
-
});
|
|
14355
|
-
return false;
|
|
14356
|
-
}
|
|
14357
|
-
let notionPages;
|
|
14358
|
-
if (input.aiConfig.notion?.enabled && input.aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
14359
|
-
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
14360
|
-
const page = await writeNotionPage(input.aiConfig.notion, input.schemaName, result.data);
|
|
14361
|
-
notionPages = [{
|
|
14362
|
-
databaseId: page.databaseId,
|
|
14363
|
-
pageId: page.pageId
|
|
14364
|
-
}];
|
|
14365
|
-
consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14366
|
-
} catch (error) {
|
|
14367
|
-
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14368
|
-
status: "failed",
|
|
14369
|
-
outputPath: result.outputPath,
|
|
14370
|
-
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
14371
|
-
tablesInserted: result.tablesInserted,
|
|
14372
|
-
tokensUsed: result.tokensUsed,
|
|
14373
|
-
error: error instanceof Error ? error.message : String(error)
|
|
14374
|
-
});
|
|
14375
|
-
consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14376
|
-
return false;
|
|
14377
|
-
}
|
|
14378
|
-
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14379
|
-
status: "succeeded",
|
|
14380
|
-
outputPath: result.outputPath,
|
|
14381
|
-
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
14382
|
-
tablesInserted: result.tablesInserted,
|
|
14383
|
-
notionPages,
|
|
14384
|
-
tokensUsed: result.tokensUsed
|
|
14385
|
-
});
|
|
14386
|
-
return true;
|
|
14387
|
-
}
|
|
14388
14653
|
const historyCommand = defineCommand({
|
|
14389
14654
|
meta: {
|
|
14390
14655
|
name: "history",
|
|
@@ -14462,23 +14727,19 @@ const retryCommand = defineCommand({
|
|
|
14462
14727
|
const modelOverride = resolveModelOverride(aiConfig, record.modelName);
|
|
14463
14728
|
if (modelOverride === null) return;
|
|
14464
14729
|
try {
|
|
14465
|
-
const
|
|
14466
|
-
text: record.source.text || "",
|
|
14467
|
-
filePath: void 0
|
|
14468
|
-
};
|
|
14469
|
-
if (!await runAuditedSingleExtraction({
|
|
14730
|
+
const result = await runAuditedExtraction({
|
|
14470
14731
|
aiexDir,
|
|
14471
14732
|
config,
|
|
14472
14733
|
aiConfig,
|
|
14473
14734
|
schemaName: record.schemaName,
|
|
14474
|
-
text: sourceInput.text,
|
|
14475
|
-
filePath: sourceInput.filePath,
|
|
14476
14735
|
source: record.source,
|
|
14477
14736
|
modelOverride,
|
|
14478
14737
|
retryOf: record.id,
|
|
14479
|
-
insert: !args.noInsert
|
|
14480
|
-
|
|
14481
|
-
|
|
14738
|
+
insert: !args.noInsert,
|
|
14739
|
+
force: true
|
|
14740
|
+
});
|
|
14741
|
+
if (!result.success) {
|
|
14742
|
+
failCommand(result.error);
|
|
14482
14743
|
return;
|
|
14483
14744
|
}
|
|
14484
14745
|
outro("Done!");
|
|
@@ -14531,11 +14792,6 @@ const extractCommand = defineCommand({
|
|
|
14531
14792
|
alias: "s",
|
|
14532
14793
|
description: "Schema name (without .json extension)"
|
|
14533
14794
|
},
|
|
14534
|
-
text: {
|
|
14535
|
-
type: "string",
|
|
14536
|
-
alias: "t",
|
|
14537
|
-
description: "Text content to extract"
|
|
14538
|
-
},
|
|
14539
14795
|
file: {
|
|
14540
14796
|
type: "string",
|
|
14541
14797
|
alias: "f",
|
|
@@ -14560,6 +14816,11 @@ const extractCommand = defineCommand({
|
|
|
14560
14816
|
type: "boolean",
|
|
14561
14817
|
description: "Extract and save JSON without inserting into SQLite",
|
|
14562
14818
|
default: false
|
|
14819
|
+
},
|
|
14820
|
+
force: {
|
|
14821
|
+
type: "boolean",
|
|
14822
|
+
description: "Force re-extraction even if the file has already been processed successfully",
|
|
14823
|
+
default: false
|
|
14563
14824
|
}
|
|
14564
14825
|
},
|
|
14565
14826
|
async run({ args, rawArgs }) {
|
|
@@ -14567,10 +14828,6 @@ const extractCommand = defineCommand({
|
|
|
14567
14828
|
intro(pc.inverse(" aiex extract "));
|
|
14568
14829
|
const config = createMigrationConfig(process.cwd());
|
|
14569
14830
|
const aiexDir = path.dirname(config.schemaPath);
|
|
14570
|
-
if (args.dir && args.text) {
|
|
14571
|
-
failCommand("Cannot combine -t/--text with -d/--dir");
|
|
14572
|
-
return;
|
|
14573
|
-
}
|
|
14574
14831
|
if (args.dir && args.file) {
|
|
14575
14832
|
failCommand("Cannot combine -f/--file with -d/--dir");
|
|
14576
14833
|
return;
|
|
@@ -14579,7 +14836,7 @@ const extractCommand = defineCommand({
|
|
|
14579
14836
|
if (!aiConfig) return;
|
|
14580
14837
|
const modelOverride = resolveModelOverride(aiConfig, args.model);
|
|
14581
14838
|
if (modelOverride === null) return;
|
|
14582
|
-
if (!args.schema && !args.
|
|
14839
|
+
if (!args.schema && !args.file && !args.dir) {
|
|
14583
14840
|
if (await runInteractive(aiexDir, config, aiConfig, modelOverride)) outro("Done!");
|
|
14584
14841
|
return;
|
|
14585
14842
|
}
|
|
@@ -14588,13 +14845,16 @@ const extractCommand = defineCommand({
|
|
|
14588
14845
|
failCommand("Schema name (-s) is required in batch mode");
|
|
14589
14846
|
return;
|
|
14590
14847
|
}
|
|
14591
|
-
const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, {
|
|
14592
|
-
|
|
14593
|
-
|
|
14848
|
+
const result$1 = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, {
|
|
14849
|
+
insert: !args.noInsert,
|
|
14850
|
+
force: args.force
|
|
14851
|
+
});
|
|
14852
|
+
if (!result$1.ok) {
|
|
14853
|
+
failCommand(result$1.error);
|
|
14594
14854
|
return;
|
|
14595
14855
|
}
|
|
14596
|
-
if (result.failCount > 0) process.exitCode = 1;
|
|
14597
|
-
if (result.failCount > 0) outro(`Completed with failures (${result.failCount} failed)`);
|
|
14856
|
+
if (result$1.failCount > 0) process.exitCode = 1;
|
|
14857
|
+
if (result$1.failCount > 0) outro(`Completed with failures (${result$1.failCount} failed)`);
|
|
14598
14858
|
else outro("Done!");
|
|
14599
14859
|
return;
|
|
14600
14860
|
}
|
|
@@ -14602,44 +14862,26 @@ const extractCommand = defineCommand({
|
|
|
14602
14862
|
failCommand("Please provide a schema name (-s) to extract from");
|
|
14603
14863
|
return;
|
|
14604
14864
|
}
|
|
14605
|
-
if (!args.
|
|
14606
|
-
failCommand("Please provide
|
|
14865
|
+
if (!args.file) {
|
|
14866
|
+
failCommand("Please provide a file (-f) to extract from");
|
|
14607
14867
|
return;
|
|
14608
14868
|
}
|
|
14609
|
-
|
|
14610
|
-
failCommand("-t and -f cannot be used together");
|
|
14611
|
-
return;
|
|
14612
|
-
}
|
|
14613
|
-
let text$1 = "";
|
|
14614
|
-
let filePath;
|
|
14615
|
-
if (args.file) try {
|
|
14616
|
-
const input = await readExtractFileInput(args.file, aiConfig, modelOverride);
|
|
14617
|
-
text$1 = input.text;
|
|
14618
|
-
filePath = input.filePath;
|
|
14619
|
-
} catch (e) {
|
|
14620
|
-
failCommand(`Cannot read file: ${args.file} — ${e instanceof Error ? e.message : String(e)}`);
|
|
14621
|
-
return;
|
|
14622
|
-
}
|
|
14623
|
-
else if (args.text) text$1 = args.text;
|
|
14624
|
-
if (!await runAuditedSingleExtraction({
|
|
14869
|
+
const result = await runAuditedExtraction({
|
|
14625
14870
|
aiexDir,
|
|
14626
14871
|
config,
|
|
14627
14872
|
aiConfig,
|
|
14628
14873
|
schemaName: args.schema,
|
|
14629
|
-
|
|
14630
|
-
filePath,
|
|
14631
|
-
source: filePath ? {
|
|
14874
|
+
source: {
|
|
14632
14875
|
type: "file",
|
|
14633
|
-
filePath: args.file
|
|
14634
|
-
fileName: path.basename(args.file)
|
|
14635
|
-
} : {
|
|
14636
|
-
type: "text",
|
|
14637
|
-
text: text$1
|
|
14876
|
+
filePath: args.file
|
|
14638
14877
|
},
|
|
14639
14878
|
modelOverride,
|
|
14640
|
-
insert: !args.noInsert
|
|
14641
|
-
|
|
14642
|
-
|
|
14879
|
+
insert: !args.noInsert,
|
|
14880
|
+
force: args.force,
|
|
14881
|
+
quiet: false
|
|
14882
|
+
});
|
|
14883
|
+
if (!result.success) {
|
|
14884
|
+
failCommand(result.error);
|
|
14643
14885
|
return;
|
|
14644
14886
|
}
|
|
14645
14887
|
outro("Done!");
|
|
@@ -14664,83 +14906,42 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
14664
14906
|
}
|
|
14665
14907
|
const inputSource = await select({
|
|
14666
14908
|
message: "Choose input source:",
|
|
14667
|
-
options: [
|
|
14668
|
-
|
|
14669
|
-
|
|
14670
|
-
|
|
14671
|
-
|
|
14672
|
-
|
|
14673
|
-
|
|
14674
|
-
|
|
14675
|
-
|
|
14676
|
-
hint: "Extract from a file (txt, pdf, image)"
|
|
14677
|
-
},
|
|
14678
|
-
{
|
|
14679
|
-
label: "Batch directory",
|
|
14680
|
-
value: "dir",
|
|
14681
|
-
hint: "Extract all supported files in a directory"
|
|
14682
|
-
}
|
|
14683
|
-
]
|
|
14909
|
+
options: [{
|
|
14910
|
+
label: "Single file",
|
|
14911
|
+
value: "file",
|
|
14912
|
+
hint: "Extract from a file (txt, pdf, image)"
|
|
14913
|
+
}, {
|
|
14914
|
+
label: "Batch directory",
|
|
14915
|
+
value: "dir",
|
|
14916
|
+
hint: "Extract all supported files in a directory"
|
|
14917
|
+
}]
|
|
14684
14918
|
});
|
|
14685
14919
|
if (isCancel(inputSource)) {
|
|
14686
14920
|
cancel("Cancelled");
|
|
14687
14921
|
return false;
|
|
14688
14922
|
}
|
|
14689
|
-
if (inputSource === "
|
|
14690
|
-
const
|
|
14691
|
-
message: "Enter
|
|
14923
|
+
if (inputSource === "file") {
|
|
14924
|
+
const filePathStr = await text({
|
|
14925
|
+
message: "Enter file path:",
|
|
14692
14926
|
validate(value) {
|
|
14693
|
-
if (!value || value.trim().length === 0) return "Please enter
|
|
14927
|
+
if (!value || value.trim().length === 0) return "Please enter a file path";
|
|
14694
14928
|
}
|
|
14695
14929
|
});
|
|
14696
|
-
if (isCancel(
|
|
14930
|
+
if (isCancel(filePathStr)) {
|
|
14697
14931
|
cancel("Cancelled");
|
|
14698
14932
|
return false;
|
|
14699
14933
|
}
|
|
14700
|
-
return
|
|
14934
|
+
return (await runAuditedExtraction({
|
|
14701
14935
|
aiexDir,
|
|
14702
14936
|
config,
|
|
14703
14937
|
aiConfig,
|
|
14704
14938
|
schemaName,
|
|
14705
|
-
text: textContent,
|
|
14706
14939
|
source: {
|
|
14707
|
-
type: "
|
|
14708
|
-
|
|
14940
|
+
type: "file",
|
|
14941
|
+
filePath: filePathStr
|
|
14709
14942
|
},
|
|
14710
14943
|
modelOverride
|
|
14711
|
-
});
|
|
14712
|
-
} else if (inputSource === "file") {
|
|
14713
|
-
const filePathStr = await text({
|
|
14714
|
-
message: "Enter file path:",
|
|
14715
|
-
validate(value) {
|
|
14716
|
-
if (!value || value.trim().length === 0) return "Please enter a file path";
|
|
14717
|
-
}
|
|
14718
|
-
});
|
|
14719
|
-
if (isCancel(filePathStr)) {
|
|
14720
|
-
cancel("Cancelled");
|
|
14721
|
-
return false;
|
|
14722
|
-
}
|
|
14723
|
-
const fp = filePathStr;
|
|
14724
|
-
try {
|
|
14725
|
-
const input = await readExtractFileInput(fp, aiConfig, modelOverride);
|
|
14726
|
-
return runAuditedSingleExtraction({
|
|
14727
|
-
aiexDir,
|
|
14728
|
-
config,
|
|
14729
|
-
aiConfig,
|
|
14730
|
-
schemaName,
|
|
14731
|
-
text: input.text,
|
|
14732
|
-
filePath: input.filePath,
|
|
14733
|
-
source: {
|
|
14734
|
-
type: "file",
|
|
14735
|
-
filePath: fp,
|
|
14736
|
-
fileName: path.basename(fp)
|
|
14737
|
-
},
|
|
14738
|
-
modelOverride
|
|
14739
|
-
});
|
|
14740
|
-
} catch (e) {
|
|
14741
|
-
consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
|
|
14742
|
-
return false;
|
|
14743
|
-
}
|
|
14944
|
+
})).success;
|
|
14744
14945
|
} else if (inputSource === "dir") {
|
|
14745
14946
|
const dirPath = await text({
|
|
14746
14947
|
message: "Enter directory path:",
|
|
@@ -14942,6 +15143,231 @@ const schemaCommand = defineCommand({
|
|
|
14942
15143
|
}
|
|
14943
15144
|
});
|
|
14944
15145
|
|
|
15146
|
+
//#endregion
|
|
15147
|
+
//#region src/core/watch-service.ts
|
|
15148
|
+
const PDF_EXT_REGEXP = /\.pdf$/i;
|
|
15149
|
+
const SUPPORTED_EXTENSIONS = new Set([
|
|
15150
|
+
"png",
|
|
15151
|
+
"jpg",
|
|
15152
|
+
"jpeg",
|
|
15153
|
+
"gif",
|
|
15154
|
+
"webp",
|
|
15155
|
+
"bmp",
|
|
15156
|
+
"svg",
|
|
15157
|
+
"pdf",
|
|
15158
|
+
"txt",
|
|
15159
|
+
"md",
|
|
15160
|
+
"csv",
|
|
15161
|
+
"json",
|
|
15162
|
+
"html",
|
|
15163
|
+
"xml",
|
|
15164
|
+
"yaml",
|
|
15165
|
+
"yml"
|
|
15166
|
+
]);
|
|
15167
|
+
var WatchRegistry = class {
|
|
15168
|
+
registryPath;
|
|
15169
|
+
constructor(aiexDir) {
|
|
15170
|
+
this.registryPath = path.join(aiexDir, "watch-registry.json");
|
|
15171
|
+
}
|
|
15172
|
+
async load() {
|
|
15173
|
+
try {
|
|
15174
|
+
const content = await fs.readFile(this.registryPath, "utf-8");
|
|
15175
|
+
return JSON.parse(content);
|
|
15176
|
+
} catch {
|
|
15177
|
+
return { processed: {} };
|
|
15178
|
+
}
|
|
15179
|
+
}
|
|
15180
|
+
async save(data) {
|
|
15181
|
+
await fs.mkdir(path.dirname(this.registryPath), { recursive: true });
|
|
15182
|
+
await fs.writeFile(this.registryPath, JSON.stringify(data, null, 2), "utf-8");
|
|
15183
|
+
}
|
|
15184
|
+
async markSucceeded(hash, filePath) {
|
|
15185
|
+
const data = await this.load();
|
|
15186
|
+
data.processed[hash] = {
|
|
15187
|
+
filePath,
|
|
15188
|
+
fileName: path.basename(filePath),
|
|
15189
|
+
processedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
15190
|
+
status: "succeeded"
|
|
15191
|
+
};
|
|
15192
|
+
await this.save(data);
|
|
15193
|
+
}
|
|
15194
|
+
async markFailed(hash, filePath, error) {
|
|
15195
|
+
const data = await this.load();
|
|
15196
|
+
data.processed[hash] = {
|
|
15197
|
+
filePath,
|
|
15198
|
+
fileName: path.basename(filePath),
|
|
15199
|
+
processedAt: (/* @__PURE__ */ new Date()).toISOString(),
|
|
15200
|
+
status: "failed",
|
|
15201
|
+
error
|
|
15202
|
+
};
|
|
15203
|
+
await this.save(data);
|
|
15204
|
+
}
|
|
15205
|
+
async getStatus(hash) {
|
|
15206
|
+
return (await this.load()).processed[hash]?.status ?? null;
|
|
15207
|
+
}
|
|
15208
|
+
};
|
|
15209
|
+
async function notifySuccess(fileName) {
|
|
15210
|
+
if (process.platform === "darwin") try {
|
|
15211
|
+
await execa("osascript", ["-e", `display notification "Successfully processed and inserted data." with title "AIEX Watch: ${fileName}"`]);
|
|
15212
|
+
await execa("afplay", ["/System/Library/Sounds/Glass.aiff"]);
|
|
15213
|
+
} catch {}
|
|
15214
|
+
else process.stdout.write("\x07");
|
|
15215
|
+
}
|
|
15216
|
+
async function notifyFailure(fileName, errorMessage) {
|
|
15217
|
+
if (process.platform === "darwin") try {
|
|
15218
|
+
await execa("osascript", ["-e", `display notification "${errorMessage.replace(/"/g, "\\\"")}" with title "AIEX Watch Failed: ${fileName}"`]);
|
|
15219
|
+
await execa("afplay", ["/System/Library/Sounds/Basso.aiff"]);
|
|
15220
|
+
} catch {}
|
|
15221
|
+
else process.stdout.write("\x07\x07");
|
|
15222
|
+
}
|
|
15223
|
+
function startWatcher(options) {
|
|
15224
|
+
const { aiexDir, config, aiConfig, schemaName, watchDir, modelOverride, insert } = options;
|
|
15225
|
+
const queueDirActive = path.join(aiexDir, "watch-queue", "active");
|
|
15226
|
+
const queueDirFailed = path.join(aiexDir, "watch-queue", "failed");
|
|
15227
|
+
const registry$2 = new WatchRegistry(aiexDir);
|
|
15228
|
+
fs$1.mkdirSync(queueDirActive, { recursive: true });
|
|
15229
|
+
fs$1.mkdirSync(queueDirFailed, { recursive: true });
|
|
15230
|
+
consola.info(pc.green(`Starting watch on folder: ${pc.cyan(watchDir)}`));
|
|
15231
|
+
consola.info(pc.green(`Schema: ${pc.cyan(schemaName)}`));
|
|
15232
|
+
if (modelOverride) consola.info(pc.green(`Model Override: ${pc.cyan(modelOverride.name)}`));
|
|
15233
|
+
const watcher = chokidar.watch(watchDir, {
|
|
15234
|
+
persistent: true,
|
|
15235
|
+
ignoreInitial: false,
|
|
15236
|
+
awaitWriteFinish: {
|
|
15237
|
+
stabilityThreshold: 2e3,
|
|
15238
|
+
pollInterval: 500
|
|
15239
|
+
}
|
|
15240
|
+
});
|
|
15241
|
+
watcher.on("add", async (filePath) => {
|
|
15242
|
+
const resolvedPath = path.resolve(filePath);
|
|
15243
|
+
if (resolvedPath.startsWith(path.resolve(aiexDir))) return;
|
|
15244
|
+
const stat = await fs.stat(resolvedPath).catch(() => null);
|
|
15245
|
+
if (!stat || !stat.isFile()) return;
|
|
15246
|
+
const ext = path.extname(resolvedPath).toLowerCase().replace(".", "");
|
|
15247
|
+
if (!SUPPORTED_EXTENSIONS.has(ext)) {
|
|
15248
|
+
consola.warn(`[Watcher] Skipped unsupported file type: ${path.basename(resolvedPath)}`);
|
|
15249
|
+
return;
|
|
15250
|
+
}
|
|
15251
|
+
const fileName = path.basename(resolvedPath);
|
|
15252
|
+
consola.info(`[Watcher] New file detected: ${pc.cyan(fileName)}. Processing...`);
|
|
15253
|
+
try {
|
|
15254
|
+
const hash = await getFileHash(resolvedPath);
|
|
15255
|
+
if (await registry$2.getStatus(hash) === "succeeded") {
|
|
15256
|
+
consola.info(`[Watcher] File ${pc.cyan(fileName)} (hash: ${hash.slice(0, 8)}) has already been processed successfully. Skipping.`);
|
|
15257
|
+
return;
|
|
15258
|
+
}
|
|
15259
|
+
const activeQueuePath = path.join(queueDirActive, `${hash}.${ext}`);
|
|
15260
|
+
await fs.copyFile(resolvedPath, activeQueuePath);
|
|
15261
|
+
if (await processOneFile(aiexDir, config, aiConfig, schemaName, activeQueuePath, modelOverride, { insert })) {
|
|
15262
|
+
await registry$2.markSucceeded(hash, resolvedPath);
|
|
15263
|
+
await fs.rm(activeQueuePath, { force: true }).catch(() => {});
|
|
15264
|
+
await fs.rm(activeQueuePath.replace(PDF_EXT_REGEXP, ".md"), { force: true }).catch(() => {});
|
|
15265
|
+
consola.success(`[Watcher] File processed successfully: ${pc.green(fileName)}`);
|
|
15266
|
+
await notifySuccess(fileName);
|
|
15267
|
+
} else {
|
|
15268
|
+
const errorMsg = "Extraction failed. See extraction audit history.";
|
|
15269
|
+
await registry$2.markFailed(hash, resolvedPath, errorMsg);
|
|
15270
|
+
const failedQueuePath = path.join(queueDirFailed, `${hash}-${Date.now()}.${ext}`);
|
|
15271
|
+
await fs.rename(activeQueuePath, failedQueuePath).catch(() => {});
|
|
15272
|
+
await fs.rm(activeQueuePath.replace(PDF_EXT_REGEXP, ".md"), { force: true }).catch(() => {});
|
|
15273
|
+
consola.error(`[Watcher] Processing failed for: ${pc.red(fileName)}`);
|
|
15274
|
+
await notifyFailure(fileName, errorMsg);
|
|
15275
|
+
}
|
|
15276
|
+
} catch (e) {
|
|
15277
|
+
const errorMsg = e instanceof Error ? e.message : String(e);
|
|
15278
|
+
consola.error(`[Watcher] Error processing file ${fileName}: ${errorMsg}`);
|
|
15279
|
+
await notifyFailure(fileName, errorMsg);
|
|
15280
|
+
}
|
|
15281
|
+
});
|
|
15282
|
+
watcher.on("error", (error) => {
|
|
15283
|
+
consola.error(`[Watcher] Watcher error: ${error?.message || String(error)}`);
|
|
15284
|
+
});
|
|
15285
|
+
return watcher;
|
|
15286
|
+
}
|
|
15287
|
+
|
|
15288
|
+
//#endregion
|
|
15289
|
+
//#region src/commands/watch.ts
|
|
15290
|
+
const watchCommand = defineCommand({
|
|
15291
|
+
meta: {
|
|
15292
|
+
name: "watch",
|
|
15293
|
+
description: "Watch a directory for new files and automatically extract data"
|
|
15294
|
+
},
|
|
15295
|
+
args: {
|
|
15296
|
+
schema: {
|
|
15297
|
+
type: "string",
|
|
15298
|
+
alias: "s",
|
|
15299
|
+
description: "Schema name (without .json extension) to use for extraction"
|
|
15300
|
+
},
|
|
15301
|
+
dir: {
|
|
15302
|
+
type: "string",
|
|
15303
|
+
alias: "d",
|
|
15304
|
+
description: "Directory path to watch for incoming files"
|
|
15305
|
+
},
|
|
15306
|
+
model: {
|
|
15307
|
+
type: "string",
|
|
15308
|
+
alias: "m",
|
|
15309
|
+
description: "AI model to use for extraction (overrides default/auto-selected model)"
|
|
15310
|
+
},
|
|
15311
|
+
noInsert: {
|
|
15312
|
+
type: "boolean",
|
|
15313
|
+
description: "Extract and save JSON without inserting into SQLite database",
|
|
15314
|
+
default: false
|
|
15315
|
+
}
|
|
15316
|
+
},
|
|
15317
|
+
async run({ args }) {
|
|
15318
|
+
intro(pc.inverse(" aiex watch "));
|
|
15319
|
+
if (!args.schema) {
|
|
15320
|
+
failCommand("Schema name (-s) is required");
|
|
15321
|
+
return;
|
|
15322
|
+
}
|
|
15323
|
+
if (!args.dir) {
|
|
15324
|
+
failCommand("Watch directory path (-d) is required");
|
|
15325
|
+
return;
|
|
15326
|
+
}
|
|
15327
|
+
const config = createMigrationConfig(process.cwd());
|
|
15328
|
+
const aiexDir = path.dirname(config.schemaPath);
|
|
15329
|
+
const schemaLoad = await loadSchema(config, args.schema);
|
|
15330
|
+
if (!schemaLoad.schema) {
|
|
15331
|
+
failCommand(schemaLoad.error || `Schema file for "${args.schema}" not found`);
|
|
15332
|
+
return;
|
|
15333
|
+
}
|
|
15334
|
+
let watchDirStat;
|
|
15335
|
+
try {
|
|
15336
|
+
watchDirStat = fs$1.statSync(args.dir);
|
|
15337
|
+
} catch (e) {
|
|
15338
|
+
failCommand(`Watch directory does not exist: ${args.dir} — ${e instanceof Error ? e.message : String(e)}`);
|
|
15339
|
+
return;
|
|
15340
|
+
}
|
|
15341
|
+
if (!watchDirStat.isDirectory()) {
|
|
15342
|
+
failCommand(`Watch path is not a directory: ${args.dir}`);
|
|
15343
|
+
return;
|
|
15344
|
+
}
|
|
15345
|
+
const watchDirAbs = path.resolve(args.dir);
|
|
15346
|
+
const aiConfig = await loadConfiguredAI(aiexDir);
|
|
15347
|
+
if (!aiConfig) return;
|
|
15348
|
+
const modelOverride = resolveModelOverride(aiConfig, args.model);
|
|
15349
|
+
if (modelOverride === null) return;
|
|
15350
|
+
const watcher = startWatcher({
|
|
15351
|
+
aiexDir,
|
|
15352
|
+
config,
|
|
15353
|
+
aiConfig,
|
|
15354
|
+
schemaName: args.schema,
|
|
15355
|
+
watchDir: watchDirAbs,
|
|
15356
|
+
modelOverride,
|
|
15357
|
+
insert: !args.noInsert
|
|
15358
|
+
});
|
|
15359
|
+
const cleanup = async () => {
|
|
15360
|
+
consola.info("\nStopping watch directory daemon...");
|
|
15361
|
+
await watcher.close();
|
|
15362
|
+
consola.success("Daemon stopped.");
|
|
15363
|
+
process.exit(0);
|
|
15364
|
+
};
|
|
15365
|
+
process.on("SIGINT", cleanup);
|
|
15366
|
+
process.on("SIGTERM", cleanup);
|
|
15367
|
+
consola.info("Press Ctrl+C to stop");
|
|
15368
|
+
}
|
|
15369
|
+
});
|
|
15370
|
+
|
|
14945
15371
|
//#endregion
|
|
14946
15372
|
//#region src/server/routes/ai.ts
|
|
14947
15373
|
const JSON_EXT_RE = /\.json$/i;
|
|
@@ -15078,7 +15504,8 @@ const tableQuerySchema = z.object({
|
|
|
15078
15504
|
pageSize: z.coerce.number().int().min(1).max(500).catch(50),
|
|
15079
15505
|
search: z.string().catch(""),
|
|
15080
15506
|
sortField: z.string().optional(),
|
|
15081
|
-
sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc"))
|
|
15507
|
+
sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc")),
|
|
15508
|
+
all: z.preprocess((value) => value === "true" || value === true, z.boolean().catch(false))
|
|
15082
15509
|
});
|
|
15083
15510
|
function invalidParamResponse$1(message) {
|
|
15084
15511
|
return (result, c) => {
|
|
@@ -15201,7 +15628,7 @@ function dataRoutes(config) {
|
|
|
15201
15628
|
});
|
|
15202
15629
|
app.get("/data/tables/:name", zValidator("param", tableParamSchema, invalidParamResponse$1("Invalid table name")), zValidator("query", tableQuerySchema), async (c) => {
|
|
15203
15630
|
const { name: tableName } = c.req.valid("param");
|
|
15204
|
-
const { page, pageSize, search, sortField, sortOrder } = c.req.valid("query");
|
|
15631
|
+
const { page, pageSize, search, sortField, sortOrder, all } = c.req.valid("query");
|
|
15205
15632
|
let db;
|
|
15206
15633
|
try {
|
|
15207
15634
|
db = createReadonlyQueryDb(config.databasePath);
|
|
@@ -15232,15 +15659,20 @@ function dataRoutes(config) {
|
|
|
15232
15659
|
${searchCondition}
|
|
15233
15660
|
`.execute(db)).rows[0]?.count ?? 0;
|
|
15234
15661
|
const offset = (page - 1) * pageSize;
|
|
15235
|
-
const totalPages = Math.max(1, Math.ceil(total / pageSize));
|
|
15236
|
-
const result = await sql`
|
|
15237
|
-
|
|
15238
|
-
|
|
15239
|
-
|
|
15240
|
-
|
|
15241
|
-
|
|
15242
|
-
|
|
15243
|
-
|
|
15662
|
+
const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
|
|
15663
|
+
const result = all ? await sql`
|
|
15664
|
+
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
15665
|
+
from ${sql.table(tableName)}
|
|
15666
|
+
${searchCondition}
|
|
15667
|
+
${orderBy}
|
|
15668
|
+
`.execute(db) : await sql`
|
|
15669
|
+
select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
|
|
15670
|
+
from ${sql.table(tableName)}
|
|
15671
|
+
${searchCondition}
|
|
15672
|
+
${orderBy}
|
|
15673
|
+
limit ${pageSize}
|
|
15674
|
+
offset ${offset}
|
|
15675
|
+
`.execute(db);
|
|
15244
15676
|
const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
|
|
15245
15677
|
const rowActions = Object.fromEntries(result.rows.map((row, index) => {
|
|
15246
15678
|
const rowId = row[INTERNAL_ROWID_COLUMN];
|
|
@@ -15248,14 +15680,27 @@ function dataRoutes(config) {
|
|
|
15248
15680
|
return action ? [String(index), action] : null;
|
|
15249
15681
|
}).filter((entry) => !!entry));
|
|
15250
15682
|
const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
|
|
15683
|
+
const schemaDir = config.schemaPath;
|
|
15684
|
+
let schema = null;
|
|
15685
|
+
try {
|
|
15686
|
+
const schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
|
|
15687
|
+
for (const file of schemaFiles) {
|
|
15688
|
+
const s = await readFile(path.join(schemaDir, file));
|
|
15689
|
+
if (s.table?.name === tableName) {
|
|
15690
|
+
schema = s;
|
|
15691
|
+
break;
|
|
15692
|
+
}
|
|
15693
|
+
}
|
|
15694
|
+
} catch {}
|
|
15251
15695
|
return c.json({
|
|
15252
15696
|
columns,
|
|
15253
15697
|
rows,
|
|
15254
15698
|
rowActions,
|
|
15255
15699
|
total,
|
|
15256
|
-
page,
|
|
15257
|
-
pageSize,
|
|
15258
|
-
totalPages
|
|
15700
|
+
page: all ? 1 : page,
|
|
15701
|
+
pageSize: all ? total : pageSize,
|
|
15702
|
+
totalPages,
|
|
15703
|
+
schema
|
|
15259
15704
|
});
|
|
15260
15705
|
} catch (error) {
|
|
15261
15706
|
return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
|
|
@@ -15368,17 +15813,6 @@ function jsonResponse(body, status) {
|
|
|
15368
15813
|
headers: { "content-type": "application/json" }
|
|
15369
15814
|
});
|
|
15370
15815
|
}
|
|
15371
|
-
async function auditFailureResponse(aiexDir, auditId, error, status) {
|
|
15372
|
-
const record = await updateExtractionAuditRecord(aiexDir, auditId, {
|
|
15373
|
-
status: "failed",
|
|
15374
|
-
error
|
|
15375
|
-
});
|
|
15376
|
-
return jsonResponse({
|
|
15377
|
-
success: false,
|
|
15378
|
-
error: record.error,
|
|
15379
|
-
auditId: record.id
|
|
15380
|
-
}, status);
|
|
15381
|
-
}
|
|
15382
15816
|
async function saveUploadToFile(file, uploadsDir, id) {
|
|
15383
15817
|
validateFileUpload(file);
|
|
15384
15818
|
await fs.mkdir(uploadsDir, { recursive: true });
|
|
@@ -15387,62 +15821,6 @@ async function saveUploadToFile(file, uploadsDir, id) {
|
|
|
15387
15821
|
await fs.writeFile(filePath, buffer);
|
|
15388
15822
|
return filePath;
|
|
15389
15823
|
}
|
|
15390
|
-
async function executeAuditedExtraction(input) {
|
|
15391
|
-
const aiConfig = await readAIConfig(input.aiexDir);
|
|
15392
|
-
if (!aiConfig) return auditFailureResponse(input.aiexDir, input.auditId, "AI configuration not found. Configure AI settings first.", 400);
|
|
15393
|
-
if (!aiConfig.provider.apiKey) return auditFailureResponse(input.aiexDir, input.auditId, "API Key not configured. Configure AI settings first.", 400);
|
|
15394
|
-
if (!aiConfig.provider.models?.length) return auditFailureResponse(input.aiexDir, input.auditId, "No models configured. Add at least one model in AI Settings.", 400);
|
|
15395
|
-
const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
|
|
15396
|
-
if (input.modelName && !modelOverride) return auditFailureResponse(input.aiexDir, input.auditId, `Model "${input.modelName}" not found in AI settings`, 400);
|
|
15397
|
-
let inputText = input.text;
|
|
15398
|
-
let inputFilePath = input.filePath;
|
|
15399
|
-
if (input.filePath) try {
|
|
15400
|
-
const source = await readExtractFileInput(input.filePath, aiConfig);
|
|
15401
|
-
inputText = source.text;
|
|
15402
|
-
inputFilePath = source.filePath;
|
|
15403
|
-
} catch (error) {
|
|
15404
|
-
if (isMissingUploadFileError(error)) return auditFailureResponse(input.aiexDir, input.auditId, MISSING_UPLOAD_FILE_TEXT, 400);
|
|
15405
|
-
throw error;
|
|
15406
|
-
}
|
|
15407
|
-
const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
|
|
15408
|
-
if (!result.success) return auditFailureResponse(input.aiexDir, input.auditId, result.error || "Extraction failed", 500);
|
|
15409
|
-
const notionPages = [];
|
|
15410
|
-
if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
15411
|
-
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
15412
|
-
notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
|
|
15413
|
-
} catch (error) {
|
|
15414
|
-
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15415
|
-
status: "failed",
|
|
15416
|
-
outputPath: result.outputPath,
|
|
15417
|
-
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
15418
|
-
tablesInserted: result.tablesInserted,
|
|
15419
|
-
tokensUsed: result.tokensUsed,
|
|
15420
|
-
error: error instanceof Error ? error.message : String(error)
|
|
15421
|
-
});
|
|
15422
|
-
return jsonResponse({
|
|
15423
|
-
success: false,
|
|
15424
|
-
error: record$1.error,
|
|
15425
|
-
auditId: record$1.id
|
|
15426
|
-
}, 500);
|
|
15427
|
-
}
|
|
15428
|
-
const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15429
|
-
status: "succeeded",
|
|
15430
|
-
outputPath: result.outputPath,
|
|
15431
|
-
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
15432
|
-
tablesInserted: result.tablesInserted,
|
|
15433
|
-
notionPages: notionPages.length > 0 ? notionPages : void 0,
|
|
15434
|
-
tokensUsed: result.tokensUsed
|
|
15435
|
-
});
|
|
15436
|
-
return jsonResponse({
|
|
15437
|
-
success: true,
|
|
15438
|
-
outputPath: record.outputPath,
|
|
15439
|
-
outputName: record.outputName,
|
|
15440
|
-
tablesInserted: record.tablesInserted,
|
|
15441
|
-
notionPages: record.notionPages,
|
|
15442
|
-
tokensUsed: record.tokensUsed,
|
|
15443
|
-
auditId: record.id
|
|
15444
|
-
}, 200);
|
|
15445
|
-
}
|
|
15446
15824
|
function extractRoutes(config) {
|
|
15447
15825
|
const app = new Hono();
|
|
15448
15826
|
const aiexDir = path.dirname(config.schemaPath);
|
|
@@ -15469,51 +15847,73 @@ function extractRoutes(config) {
|
|
|
15469
15847
|
success: false,
|
|
15470
15848
|
error: "Text and file input cannot be used together"
|
|
15471
15849
|
}, 400);
|
|
15472
|
-
|
|
15473
|
-
schemaName,
|
|
15474
|
-
modelName,
|
|
15475
|
-
source: file ? {
|
|
15476
|
-
type: "file",
|
|
15477
|
-
fileName: safeUploadName(file.name)
|
|
15478
|
-
} : {
|
|
15479
|
-
type: "text",
|
|
15480
|
-
text: text$1
|
|
15481
|
-
}
|
|
15482
|
-
});
|
|
15483
|
-
let filePath;
|
|
15850
|
+
let source;
|
|
15484
15851
|
if (file) {
|
|
15852
|
+
const uploadId = `upload-${Date.now()}`;
|
|
15853
|
+
let filePath;
|
|
15485
15854
|
try {
|
|
15486
|
-
filePath = await saveUploadToFile(file, uploadsDir,
|
|
15855
|
+
filePath = await saveUploadToFile(file, uploadsDir, uploadId);
|
|
15487
15856
|
} catch (e) {
|
|
15488
|
-
if (e instanceof FileValidationError) {
|
|
15489
|
-
|
|
15490
|
-
|
|
15491
|
-
|
|
15492
|
-
});
|
|
15493
|
-
return c.json({
|
|
15494
|
-
success: false,
|
|
15495
|
-
error: e.message,
|
|
15496
|
-
auditId: audit.id
|
|
15497
|
-
}, 400);
|
|
15498
|
-
}
|
|
15857
|
+
if (e instanceof FileValidationError) return c.json({
|
|
15858
|
+
success: false,
|
|
15859
|
+
error: e.message
|
|
15860
|
+
}, 400);
|
|
15499
15861
|
throw e;
|
|
15500
15862
|
}
|
|
15501
|
-
|
|
15863
|
+
source = {
|
|
15502
15864
|
type: "file",
|
|
15503
|
-
filePath
|
|
15504
|
-
|
|
15505
|
-
|
|
15506
|
-
|
|
15507
|
-
|
|
15865
|
+
filePath
|
|
15866
|
+
};
|
|
15867
|
+
} else source = {
|
|
15868
|
+
type: "text",
|
|
15869
|
+
text: text$1
|
|
15870
|
+
};
|
|
15871
|
+
const aiConfig = await readAIConfig(aiexDir);
|
|
15872
|
+
if (!aiConfig) return c.json({
|
|
15873
|
+
success: false,
|
|
15874
|
+
error: "AI configuration not found. Configure AI settings first."
|
|
15875
|
+
}, 400);
|
|
15876
|
+
if (!aiConfig.provider.apiKey) return c.json({
|
|
15877
|
+
success: false,
|
|
15878
|
+
error: "API Key not configured. Configure AI settings first."
|
|
15879
|
+
}, 400);
|
|
15880
|
+
if (!aiConfig.provider.models?.length) return c.json({
|
|
15881
|
+
success: false,
|
|
15882
|
+
error: "No models configured. Add at least one model in AI Settings."
|
|
15883
|
+
}, 400);
|
|
15884
|
+
const modelOverride = modelName ? aiConfig.provider.models.find((model) => model.name === modelName) : void 0;
|
|
15885
|
+
if (modelName && !modelOverride) return c.json({
|
|
15886
|
+
success: false,
|
|
15887
|
+
error: `Model "${modelName}" not found in AI settings`
|
|
15888
|
+
}, 400);
|
|
15889
|
+
const result = await runAuditedExtraction({
|
|
15508
15890
|
aiexDir,
|
|
15509
|
-
config,
|
|
15510
|
-
|
|
15891
|
+
config: createMigrationConfig(path.dirname(aiexDir)),
|
|
15892
|
+
aiConfig,
|
|
15511
15893
|
schemaName,
|
|
15512
|
-
|
|
15513
|
-
|
|
15514
|
-
|
|
15894
|
+
source,
|
|
15895
|
+
modelOverride,
|
|
15896
|
+
quiet: true
|
|
15515
15897
|
});
|
|
15898
|
+
if (!result.success) return jsonResponse({
|
|
15899
|
+
success: false,
|
|
15900
|
+
error: result.error,
|
|
15901
|
+
auditId: result.auditId
|
|
15902
|
+
}, 500);
|
|
15903
|
+
return jsonResponse({
|
|
15904
|
+
success: true,
|
|
15905
|
+
outputPath: result.outputPath,
|
|
15906
|
+
outputName: result.outputName,
|
|
15907
|
+
tablesInserted: result.tablesInserted,
|
|
15908
|
+
notionPages: result.notionPages,
|
|
15909
|
+
tokensUsed: result.tokensUsed,
|
|
15910
|
+
auditId: result.auditId
|
|
15911
|
+
}, 200);
|
|
15516
15912
|
} catch (error) {
|
|
15913
|
+
if (isMissingUploadFileError(error)) return c.json({
|
|
15914
|
+
success: false,
|
|
15915
|
+
error: MISSING_UPLOAD_FILE_TEXT
|
|
15916
|
+
}, 400);
|
|
15517
15917
|
return c.json({
|
|
15518
15918
|
success: false,
|
|
15519
15919
|
error: error instanceof Error ? error.message : String(error)
|
|
@@ -15526,20 +15926,56 @@ function extractRoutes(config) {
|
|
|
15526
15926
|
success: false,
|
|
15527
15927
|
error: "Extraction record not found"
|
|
15528
15928
|
}, 404);
|
|
15529
|
-
|
|
15929
|
+
const aiConfig = await readAIConfig(aiexDir);
|
|
15930
|
+
if (!aiConfig) return c.json({
|
|
15931
|
+
success: false,
|
|
15932
|
+
error: "AI configuration not found. Configure AI settings first."
|
|
15933
|
+
}, 400);
|
|
15934
|
+
if (!aiConfig.provider.apiKey) return c.json({
|
|
15935
|
+
success: false,
|
|
15936
|
+
error: "API Key not configured. Configure AI settings first."
|
|
15937
|
+
}, 400);
|
|
15938
|
+
if (!aiConfig.provider.models?.length) return c.json({
|
|
15939
|
+
success: false,
|
|
15940
|
+
error: "No models configured. Add at least one model in AI Settings."
|
|
15941
|
+
}, 400);
|
|
15942
|
+
const modelOverride = original.modelName ? aiConfig.provider.models.find((m) => m.name === original.modelName) : void 0;
|
|
15943
|
+
if (original.modelName && !modelOverride) return c.json({
|
|
15944
|
+
success: false,
|
|
15945
|
+
error: `Model "${original.modelName}" not found in AI settings`
|
|
15946
|
+
}, 400);
|
|
15947
|
+
const source = original.source.type === "file" && original.source.filePath ? {
|
|
15948
|
+
type: "file",
|
|
15949
|
+
filePath: original.source.filePath
|
|
15950
|
+
} : {
|
|
15951
|
+
type: "text",
|
|
15952
|
+
text: original.source.text ?? ""
|
|
15953
|
+
};
|
|
15954
|
+
const result = await runAuditedExtraction({
|
|
15530
15955
|
aiexDir,
|
|
15531
|
-
config,
|
|
15532
|
-
|
|
15533
|
-
schemaName: original.schemaName,
|
|
15534
|
-
modelName: original.modelName,
|
|
15535
|
-
source: original.source,
|
|
15536
|
-
retryOf: original.id
|
|
15537
|
-
})).id,
|
|
15956
|
+
config: createMigrationConfig(path.dirname(aiexDir)),
|
|
15957
|
+
aiConfig,
|
|
15538
15958
|
schemaName: original.schemaName,
|
|
15539
|
-
|
|
15540
|
-
|
|
15541
|
-
|
|
15959
|
+
source,
|
|
15960
|
+
modelOverride,
|
|
15961
|
+
retryOf: original.id,
|
|
15962
|
+
force: true,
|
|
15963
|
+
quiet: true
|
|
15542
15964
|
});
|
|
15965
|
+
if (!result.success) return jsonResponse({
|
|
15966
|
+
success: false,
|
|
15967
|
+
error: result.error,
|
|
15968
|
+
auditId: result.auditId
|
|
15969
|
+
}, 500);
|
|
15970
|
+
return jsonResponse({
|
|
15971
|
+
success: true,
|
|
15972
|
+
outputPath: result.outputPath,
|
|
15973
|
+
outputName: result.outputName,
|
|
15974
|
+
tablesInserted: result.tablesInserted,
|
|
15975
|
+
notionPages: result.notionPages,
|
|
15976
|
+
tokensUsed: result.tokensUsed,
|
|
15977
|
+
auditId: result.auditId
|
|
15978
|
+
}, 200);
|
|
15543
15979
|
});
|
|
15544
15980
|
app.delete("/extract/records/:id", async (c) => {
|
|
15545
15981
|
const id = c.req.param("id");
|
|
@@ -15773,6 +16209,8 @@ const subCommands = {
|
|
|
15773
16209
|
web: webCommand,
|
|
15774
16210
|
schema: schemaCommand,
|
|
15775
16211
|
extract: extractCommand,
|
|
16212
|
+
watch: watchCommand,
|
|
16213
|
+
export: exportCommand,
|
|
15776
16214
|
completion: completionCommand,
|
|
15777
16215
|
doctor: doctorCommand
|
|
15778
16216
|
};
|