aiex-cli 0.0.3-beta.4 → 0.0.3-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { C as package_default, D as formatDoctorDiagnosticsJson, E as doctorDiagnosticsTableRows, S as name, _ as PLACEHOLDER_TEXT, a as parseJsonSchema, b as seedConfig, c as recognizeImageText, d as readAIConfig, f as writeAIConfig, g as PLACEHOLDER_SCHEMA, h as DEFAULT_PROMPT_CONFIG, i as JsonSchemaDefinitionSchema, l as shouldUseImageOcrFallback, m as DEFAULT_MINERU_CONFIG, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_MARKITDOWN_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as getDefaultAIConfig, v as AIConfigSchema, w as version, x as description, y as createConfig } from "./doctor-collector-nMMG_h-w.mjs";
1
+ import { C as name, D as doctorDiagnosticsTableRows, O as formatDoctorDiagnosticsJson, S as description, T as version, _ as PLACEHOLDER_SCHEMA, a as parseJsonSchema, b as createConfig, c as recognizeImageText, d as readAIConfig, f as writeAIConfig, g as DEFAULT_PROMPT_CONFIG, h as DEFAULT_MINERU_CONFIG, i as JsonSchemaDefinitionSchema, l as shouldUseImageOcrFallback, m as DEFAULT_MARKITDOWN_CONFIG, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_MARKER_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as getDefaultAIConfig, v as PLACEHOLDER_TEXT, w as package_default, x as seedConfig, y as AIConfigSchema } from "./doctor-collector-DnH1Qu1e.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -11,8 +11,12 @@ import { defineCommand, runMain } from "citty";
11
11
  import { consola } from "consola";
12
12
  import updateNotifier from "update-notifier";
13
13
  import CliTable3 from "cli-table3";
14
+ import fs$1 from "node:fs";
14
15
  import { intro, isCancel, outro, select, spinner, text } from "@clack/prompts";
16
+ import Database from "better-sqlite3";
15
17
  import pc from "picocolors";
18
+ import * as XLSX from "xlsx";
19
+ import { glob, globSync } from "tinyglobby";
16
20
  import { createOpenAICompatible } from "@ai-sdk/openai-compatible";
17
21
  import { LangfuseSpanProcessor } from "@langfuse/otel";
18
22
  import { NodeTracerProvider } from "@opentelemetry/sdk-trace-node";
@@ -20,15 +24,14 @@ import { APICallError, Output, generateText, jsonSchema } from "ai";
20
24
  import mime from "mime";
21
25
  import pRetry from "p-retry";
22
26
  import { jsonrepair } from "jsonrepair";
23
- import fs$1 from "node:fs";
24
- import Database from "better-sqlite3";
25
- import { glob, globSync } from "tinyglobby";
26
27
  import { Client, extractNotionId } from "@notionhq/client";
27
28
  import { Buffer } from "node:buffer";
28
29
  import { execa } from "execa";
29
30
  import { extractText, getDocumentProxy, getMeta } from "unpdf";
31
+ import crypto from "node:crypto";
30
32
  import { execFile } from "node:child_process";
31
33
  import { promisify } from "node:util";
34
+ import * as chokidar from "chokidar";
32
35
  import { serve } from "@hono/node-server";
33
36
  import open from "open";
34
37
  import { serveStatic } from "@hono/node-server/serve-static";
@@ -12943,6 +12946,22 @@ function schemaToDescription(schema) {
12943
12946
  const property = prop;
12944
12947
  lines.push(nestedPropertyToDescription(name$1, property));
12945
12948
  }
12949
+ if (schema.examples && schema.examples.length > 0) {
12950
+ lines.push("");
12951
+ lines.push("Examples / Few-shot Cases:");
12952
+ schema.examples.forEach((example, idx) => {
12953
+ lines.push("");
12954
+ lines.push(`Example ${idx + 1}:`);
12955
+ lines.push("Input text:");
12956
+ lines.push("\"\"\"");
12957
+ lines.push(example.text);
12958
+ lines.push("\"\"\"");
12959
+ lines.push("Expected JSON output:");
12960
+ lines.push("```json");
12961
+ lines.push(JSON.stringify(example.output, null, 2));
12962
+ lines.push("```");
12963
+ });
12964
+ }
12946
12965
  return lines.join("\n");
12947
12966
  }
12948
12967
  function generateExtractionPrompt(schema, text$1, promptConfig = DEFAULT_PROMPT_CONFIG) {
@@ -13512,11 +13531,17 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
13512
13531
  clearRecordCache(aiexDir);
13513
13532
  return true;
13514
13533
  }
13534
+ /**
13535
+ * Finds the first succeeded extraction audit record matching a schema and file hash.
13536
+ */
13537
+ async function findSucceededAuditByHash(aiexDir, schemaName, fileHash) {
13538
+ return (await listExtractionAuditRecords(aiexDir)).find((r) => r.schemaName === schemaName && r.status === "succeeded" && r.source.fileHash === fileHash) || null;
13539
+ }
13515
13540
 
13516
13541
  //#endregion
13517
13542
  //#region src/core/file-constants.ts
13518
- const MAX_UPLOAD_SIZE = 150 * 1024 * 1024;
13519
- const MAX_UPLOAD_SIZE_TEXT = "150MB";
13543
+ const MAX_UPLOAD_SIZE = 30 * 1024 * 1024;
13544
+ const MAX_UPLOAD_SIZE_TEXT = "30MB";
13520
13545
  const SUPPORTED_FILE_TYPES_TEXT = "images, PDF, text, markdown, CSV, JSON, HTML, XML, YAML";
13521
13546
  const MISSING_UPLOAD_FILE_TEXT = "Uploaded file is no longer available. Re-run extraction with the original file.";
13522
13547
  const SUPPORTED_MIME_TYPES = new Set([
@@ -13964,6 +13989,10 @@ function createPdfConverter(config) {
13964
13989
  const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
13965
13990
  return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
13966
13991
  }
13992
+ if (config.converter === "marker") {
13993
+ const markerConfig = config.marker ?? DEFAULT_MARKER_CONFIG;
13994
+ return withFallback(new ExternalCommandPdfConverter("marker", markerConfig), markerConfig);
13995
+ }
13967
13996
  if (config.converter === "external") {
13968
13997
  if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
13969
13998
  return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
@@ -13979,6 +14008,21 @@ function createPdfConverter(config) {
13979
14008
  return instance;
13980
14009
  }
13981
14010
 
14011
+ //#endregion
14012
+ //#region src/utils/hash.ts
14013
+ /**
14014
+ * Helper to compute SHA-256 hash of a file asynchronously.
14015
+ */
14016
+ function getFileHash(filePath) {
14017
+ return new Promise((resolve, reject) => {
14018
+ const hash = crypto.createHash("sha256");
14019
+ const stream = fs$1.createReadStream(filePath);
14020
+ stream.on("data", (data) => hash.update(data));
14021
+ stream.on("end", () => resolve(hash.digest("hex")));
14022
+ stream.on("error", (err) => reject(err));
14023
+ });
14024
+ }
14025
+
13982
14026
  //#endregion
13983
14027
  //#region src/core/extract-runner.ts
13984
14028
  const FILE_PART_EXTENSIONS = new Set([
@@ -13990,7 +14034,7 @@ const FILE_PART_EXTENSIONS = new Set([
13990
14034
  "bmp",
13991
14035
  "svg"
13992
14036
  ]);
13993
- const SUPPORTED_EXTENSIONS = new Set([
14037
+ const SUPPORTED_EXTENSIONS$1 = new Set([
13994
14038
  ...FILE_PART_EXTENSIONS,
13995
14039
  "pdf",
13996
14040
  "txt",
@@ -14004,7 +14048,7 @@ const SUPPORTED_EXTENSIONS = new Set([
14004
14048
  ]);
14005
14049
  const PDF_EXT_RE = /\.pdf$/i;
14006
14050
  const JSON_EXT_RE$1 = /\.json$/;
14007
- const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
14051
+ const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS$1].join(",")}}`;
14008
14052
  async function syncResultToNotion(aiConfig, schemaName, data) {
14009
14053
  if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
14010
14054
  const page = await writeNotionPage(aiConfig.notion, schemaName, data);
@@ -14043,7 +14087,7 @@ function listSupportedFiles(dir, pattern) {
14043
14087
  onlyFiles: true
14044
14088
  }).filter((file) => {
14045
14089
  const ext = path.extname(file).toLowerCase().replace(".", "");
14046
- return SUPPORTED_EXTENSIONS.has(ext);
14090
+ return SUPPORTED_EXTENSIONS$1.has(ext);
14047
14091
  }).sort();
14048
14092
  }
14049
14093
  async function loadSchema(config, schemaName) {
@@ -14199,27 +14243,76 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
14199
14243
  tokensUsed: result.tokensUsed
14200
14244
  };
14201
14245
  }
14202
- async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
14246
+ async function runAuditedExtraction(options) {
14247
+ const { aiexDir, config, aiConfig, schemaName, source, modelOverride, retryOf, insert, force, quiet = false } = options;
14248
+ let fileHash;
14249
+ let isPlainTextFile = false;
14250
+ if (source.type === "file") {
14251
+ const ext = path.extname(source.filePath).toLowerCase().replace(".", "");
14252
+ isPlainTextFile = [
14253
+ "txt",
14254
+ "md",
14255
+ "csv",
14256
+ "json",
14257
+ "html",
14258
+ "xml",
14259
+ "yaml",
14260
+ "yml"
14261
+ ].includes(ext);
14262
+ try {
14263
+ fileHash = await getFileHash(source.filePath);
14264
+ } catch (e) {
14265
+ if (!quiet) consola.warn(`Failed to calculate file hash for ${path.basename(source.filePath)}: ${e instanceof Error ? e.message : String(e)}`);
14266
+ }
14267
+ if (fileHash && !isPlainTextFile && !force) {
14268
+ const existing = await findSucceededAuditByHash(aiexDir, schemaName, fileHash);
14269
+ if (existing) {
14270
+ if (!quiet) consola.info(`File ${pc.cyan(path.basename(source.filePath))} (hash: ${fileHash.slice(0, 8)}) has already been processed successfully. Skipping.`);
14271
+ return {
14272
+ success: true,
14273
+ skipped: true,
14274
+ auditId: existing.id,
14275
+ fileHash,
14276
+ outputPath: existing.outputPath,
14277
+ outputName: existing.outputName,
14278
+ tablesInserted: existing.tablesInserted,
14279
+ notionPages: existing.notionPages,
14280
+ tokensUsed: existing.tokensUsed
14281
+ };
14282
+ }
14283
+ }
14284
+ }
14203
14285
  const audit = await createExtractionAuditRecord(aiexDir, {
14204
14286
  schemaName,
14205
14287
  modelName: modelOverride?.name,
14206
- source: {
14288
+ source: source.type === "file" ? {
14207
14289
  type: "file",
14208
- filePath,
14209
- fileName: path.basename(filePath)
14210
- }
14290
+ filePath: source.filePath,
14291
+ fileName: path.basename(source.filePath),
14292
+ fileHash
14293
+ } : {
14294
+ type: "text",
14295
+ text: source.text
14296
+ },
14297
+ retryOf
14211
14298
  });
14212
14299
  try {
14213
- const input = await readExtractFileInput(filePath, aiConfig, modelOverride);
14214
- const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
14215
- quiet: false,
14216
- insert: options?.insert
14300
+ let text$1 = "";
14301
+ let filePath;
14302
+ if (source.type === "file") {
14303
+ const input = await readExtractFileInput(source.filePath, aiConfig, modelOverride);
14304
+ text$1 = input.text;
14305
+ filePath = input.filePath;
14306
+ } else text$1 = source.text;
14307
+ const r = await extractSingle(aiexDir, config, aiConfig, schemaName, text$1, filePath, modelOverride, {
14308
+ quiet,
14309
+ insert
14217
14310
  });
14218
14311
  if (r.success) {
14219
14312
  let notionPages;
14220
14313
  if (shouldSyncNotion(aiConfig, schemaName)) try {
14221
14314
  notionPages = await syncResultToNotion(aiConfig, schemaName, r.data);
14222
- consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
14315
+ if (!quiet) consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
14223
14316
  } catch (error) {
14224
14317
  await updateExtractionAuditRecord(aiexDir, audit.id, {
14225
14318
  status: "failed",
@@ -14229,10 +14322,15 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
14229
14322
  tokensUsed: r.tokensUsed,
14230
14323
  error: error instanceof Error ? error.message : String(error)
14231
14324
  });
14232
- consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
14233
- return false;
14325
+ if (!quiet) consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
14326
+ return {
14327
+ success: false,
14328
+ error: error instanceof Error ? error.message : String(error),
14329
+ auditId: audit.id,
14330
+ fileHash
14331
+ };
14234
14332
  }
14235
- await updateExtractionAuditRecord(aiexDir, audit.id, {
14333
+ const updated = await updateExtractionAuditRecord(aiexDir, audit.id, {
14236
14334
  status: "succeeded",
14237
14335
  outputPath: r.outputPath,
14238
14336
  outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
@@ -14240,24 +14338,66 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
14240
14338
  notionPages,
14241
14339
  tokensUsed: r.tokensUsed
14242
14340
  });
14243
- consola.success(`Processed: ${path.basename(filePath)}`);
14244
- return true;
14341
+ return {
14342
+ success: true,
14343
+ outputPath: updated.outputPath,
14344
+ outputName: updated.outputName,
14345
+ tablesInserted: updated.tablesInserted,
14346
+ notionPages: updated.notionPages,
14347
+ tokensUsed: updated.tokensUsed,
14348
+ auditId: updated.id,
14349
+ fileHash
14350
+ };
14245
14351
  } else {
14246
14352
  await updateExtractionAuditRecord(aiexDir, audit.id, {
14247
14353
  status: "failed",
14248
14354
  error: r.error || "Extraction failed"
14249
14355
  });
14250
- consola.error(`Failed: ${r.error}`);
14251
- return false;
14356
+ if (!quiet) consola.error(`Failed: ${r.error}`);
14357
+ return {
14358
+ success: false,
14359
+ error: r.error,
14360
+ auditId: audit.id,
14361
+ fileHash
14362
+ };
14252
14363
  }
14253
14364
  } catch (e) {
14254
14365
  await updateExtractionAuditRecord(aiexDir, audit.id, {
14255
14366
  status: "failed",
14256
14367
  error: e instanceof Error ? e.message : String(e)
14257
14368
  });
14258
- consola.error(`Error processing ${path.basename(filePath)}: ${e instanceof Error ? e.message : String(e)}`);
14259
- return false;
14369
+ if (!quiet) {
14370
+ const name$1 = source.type === "file" ? path.basename(source.filePath) : "text input";
14371
+ consola.error(`Error processing ${name$1}: ${e instanceof Error ? e.message : String(e)}`);
14372
+ }
14373
+ return {
14374
+ success: false,
14375
+ error: e instanceof Error ? e.message : String(e),
14376
+ auditId: audit.id,
14377
+ fileHash
14378
+ };
14379
+ }
14380
+ }
14381
+ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
14382
+ const result = await runAuditedExtraction({
14383
+ aiexDir,
14384
+ config,
14385
+ aiConfig,
14386
+ schemaName,
14387
+ source: {
14388
+ type: "file",
14389
+ filePath
14390
+ },
14391
+ modelOverride,
14392
+ insert: options?.insert,
14393
+ force: options?.force,
14394
+ quiet: false
14395
+ });
14396
+ if (result.success) {
14397
+ if (!result.skipped) consola.success(`Processed: ${path.basename(filePath)}`);
14398
+ return true;
14260
14399
  }
14400
+ return false;
14261
14401
  }
14262
14402
  async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
14263
14403
  consola.info(`Scanning ${pc.cyan(dir)} for supported files...`);
@@ -14284,7 +14424,10 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
14284
14424
  for (let i = 0; i < files.length; i++) {
14285
14425
  const file = files[i];
14286
14426
  consola.info(`\n[${i + 1}/${files.length}] Processing: ${pc.cyan(path.basename(file))}`);
14287
- if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, options)) successCount++;
14427
+ if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, {
14428
+ insert: options?.insert,
14429
+ force: options?.force
14430
+ })) successCount++;
14288
14431
  else failCount++;
14289
14432
  }
14290
14433
  consola.info(`\nBatch complete: ${pc.green(`${successCount} succeeded`)}, ${pc.red(`${failCount} failed`)}, ${files.length} total`);
@@ -14295,6 +14438,174 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
14295
14438
  };
14296
14439
  }
14297
14440
 
14441
+ //#endregion
14442
+ //#region src/commands/export.ts
14443
+ const exportCommand = defineCommand({
14444
+ meta: {
14445
+ name: "export",
14446
+ description: "Export SQLite database table to Excel (.xlsx) or CSV (.csv)"
14447
+ },
14448
+ args: {
14449
+ table: {
14450
+ type: "string",
14451
+ alias: "t",
14452
+ description: "SQLite table name to export"
14453
+ },
14454
+ schema: {
14455
+ type: "string",
14456
+ alias: "s",
14457
+ description: "Schema name (without .json extension) to export"
14458
+ },
14459
+ format: {
14460
+ type: "string",
14461
+ alias: "f",
14462
+ description: "Export format: csv or xlsx (default: inferred from output or csv)"
14463
+ },
14464
+ output: {
14465
+ type: "string",
14466
+ alias: "o",
14467
+ description: "Output file path (default: ./<tableName>.<format>)"
14468
+ }
14469
+ },
14470
+ async run({ args }) {
14471
+ intro(pc.inverse(" aiex export "));
14472
+ if (!args.table && !args.schema) {
14473
+ failCommand("Either table name (--table / -t) or schema name (--schema / -s) is required");
14474
+ return;
14475
+ }
14476
+ const cwd = process.cwd();
14477
+ const config = createMigrationConfig(cwd);
14478
+ const schemaDir = config.schemaPath;
14479
+ let tableName = args.table || "";
14480
+ let schema = null;
14481
+ if (args.schema) {
14482
+ const schemaLoad = await loadSchema(config, args.schema);
14483
+ if (!schemaLoad.schema) {
14484
+ failCommand(schemaLoad.error || `Schema file for "${args.schema}" not found`);
14485
+ return;
14486
+ }
14487
+ schema = schemaLoad.schema;
14488
+ const tName = schema.table?.name;
14489
+ if (!tName) {
14490
+ failCommand(`Schema "${args.schema}" does not define a database table name.`);
14491
+ return;
14492
+ }
14493
+ if (tableName && tableName !== tName) {
14494
+ failCommand(`Specified table name "${tableName}" does not match schema table name "${tName}"`);
14495
+ return;
14496
+ }
14497
+ tableName = tName;
14498
+ } else try {
14499
+ if (fs$1.existsSync(schemaDir)) {
14500
+ const files = fs$1.readdirSync(schemaDir).filter((f) => f.endsWith(".json"));
14501
+ for (const file of files) {
14502
+ const s$1 = await readFile(path.join(schemaDir, file));
14503
+ if (s$1.table?.name === tableName) {
14504
+ schema = s$1;
14505
+ break;
14506
+ }
14507
+ }
14508
+ }
14509
+ } catch {}
14510
+ let format = args.format?.toLowerCase();
14511
+ const outputPathArg = args.output;
14512
+ if (outputPathArg) {
14513
+ const ext = path.extname(outputPathArg).toLowerCase();
14514
+ if (!format) {
14515
+ if (ext === ".xlsx") format = "xlsx";
14516
+ else if (ext === ".csv") format = "csv";
14517
+ }
14518
+ }
14519
+ if (!format) format = "csv";
14520
+ if (format !== "csv" && format !== "xlsx") {
14521
+ failCommand(`Unsupported export format: "${format}". Supported formats: csv, xlsx`);
14522
+ return;
14523
+ }
14524
+ const resolvedOutput = outputPathArg ? path.resolve(outputPathArg) : path.resolve(cwd, `${tableName}.${format}`);
14525
+ if (!fs$1.existsSync(config.databasePath)) {
14526
+ failCommand(`Database file not found at ${config.databasePath}. Please run "aiex schema" to create the database first.`);
14527
+ return;
14528
+ }
14529
+ const s = spinner();
14530
+ s.start(`Loading data from table "${tableName}"...`);
14531
+ let columns = [];
14532
+ let rows = [];
14533
+ try {
14534
+ const db = new Database(config.databasePath, { readonly: true });
14535
+ if (!db.prepare(`
14536
+ select name from sqlite_master
14537
+ where type = 'table' and name = ?
14538
+ `).get(tableName)) {
14539
+ s.stop("Database query failed");
14540
+ failCommand(`Table "${tableName}" not found in database. Run "aiex schema" first to migrate.`);
14541
+ db.close();
14542
+ return;
14543
+ }
14544
+ columns = db.pragma(`table_info(${tableName})`);
14545
+ rows = db.prepare(`select * from ${tableName}`).all();
14546
+ db.close();
14547
+ } catch (error) {
14548
+ s.stop("Database query failed");
14549
+ failCommand(error instanceof Error ? error.message : String(error));
14550
+ return;
14551
+ }
14552
+ if (rows.length === 0) {
14553
+ s.stop("Empty table");
14554
+ consola.warn(`Table "${tableName}" is empty. Exporting empty file...`);
14555
+ } else s.stop(`Loaded ${rows.length} row(s)`);
14556
+ const s2 = spinner();
14557
+ s2.start("Formatting data...");
14558
+ const formattedRows = rows.map((row) => {
14559
+ const newRow = {};
14560
+ columns.forEach((col) => {
14561
+ const colName = col.name;
14562
+ const val = row[colName];
14563
+ const type = (schema?.properties?.[colName])?.type || "";
14564
+ if (val === null || val === void 0) newRow[colName] = "";
14565
+ else if (type === "boolean") if (format === "xlsx") newRow[colName] = val === 1 || val === "1" || val === true;
14566
+ else newRow[colName] = val === 1 || val === "1" || val === true ? "true" : "false";
14567
+ else if (type === "number" || type === "integer") if (val === "") newRow[colName] = "";
14568
+ else {
14569
+ const num = Number(val);
14570
+ newRow[colName] = Number.isNaN(num) ? val : num;
14571
+ }
14572
+ else if (typeof val === "object") newRow[colName] = JSON.stringify(val);
14573
+ else {
14574
+ const dbType = (col.type || "").toLowerCase();
14575
+ if ((dbType.includes("int") || dbType.includes("real") || dbType.includes("num") || dbType.includes("double") || dbType.includes("float")) && typeof val === "string" && val !== "") {
14576
+ const num = Number(val);
14577
+ newRow[colName] = Number.isNaN(num) ? val : num;
14578
+ } else newRow[colName] = val;
14579
+ }
14580
+ });
14581
+ return newRow;
14582
+ });
14583
+ s2.stop("Data formatted");
14584
+ const s3 = spinner();
14585
+ s3.start(`Writing ${format.toUpperCase()} file to ${resolvedOutput}...`);
14586
+ try {
14587
+ const ws = XLSX.utils.json_to_sheet(formattedRows, { header: columns.map((col) => col.name) });
14588
+ const outputDir = path.dirname(resolvedOutput);
14589
+ if (!fs$1.existsSync(outputDir)) fs$1.mkdirSync(outputDir, { recursive: true });
14590
+ if (format === "xlsx") {
14591
+ const wb = XLSX.utils.book_new();
14592
+ XLSX.utils.book_append_sheet(wb, ws, tableName.slice(0, 31));
14593
+ XLSX.writeFile(wb, resolvedOutput);
14594
+ } else {
14595
+ const csv = XLSX.utils.sheet_to_csv(ws);
14596
+ fs$1.writeFileSync(resolvedOutput, "" + csv, "utf8");
14597
+ }
14598
+ s3.stop("Export completed successfully");
14599
+ consola.success(`Successfully exported ${rows.length} row(s) to ${pc.cyan(resolvedOutput)}`);
14600
+ } catch (error) {
14601
+ s3.stop("File write failed");
14602
+ failCommand(error instanceof Error ? error.message : String(error));
14603
+ return;
14604
+ }
14605
+ outro("Done!");
14606
+ }
14607
+ });
14608
+
14298
14609
  //#endregion
14299
14610
  //#region src/commands/extract.ts
14300
14611
  function getIdArg(args) {
@@ -14312,7 +14623,7 @@ function isExtractSubCommand(rawArgs) {
14312
14623
  ].includes(arg));
14313
14624
  }
14314
14625
  function formatSource(source) {
14315
- return source.type === "file" ? source.fileName || "file" : "text";
14626
+ return source.type === "file" ? source.fileName || "file" : "unknown";
14316
14627
  }
14317
14628
  async function loadConfiguredAI(aiexDir) {
14318
14629
  const aiConfig = await readAIConfig(aiexDir);
@@ -14339,52 +14650,6 @@ function resolveModelOverride(aiConfig, modelName) {
14339
14650
  }
14340
14651
  return matched;
14341
14652
  }
14342
- async function runAuditedSingleExtraction(input) {
14343
- const audit = await createExtractionAuditRecord(input.aiexDir, {
14344
- schemaName: input.schemaName,
14345
- modelName: input.modelOverride?.name,
14346
- source: input.source,
14347
- retryOf: input.retryOf
14348
- });
14349
- const result = await extractSingle(input.aiexDir, input.config, input.aiConfig, input.schemaName, input.text, input.filePath, input.modelOverride, { insert: input.insert });
14350
- if (!result.success) {
14351
- await updateExtractionAuditRecord(input.aiexDir, audit.id, {
14352
- status: "failed",
14353
- error: result.error || "Extraction failed"
14354
- });
14355
- return false;
14356
- }
14357
- let notionPages;
14358
- if (input.aiConfig.notion?.enabled && input.aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
14359
- if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
14360
- const page = await writeNotionPage(input.aiConfig.notion, input.schemaName, result.data);
14361
- notionPages = [{
14362
- databaseId: page.databaseId,
14363
- pageId: page.pageId
14364
- }];
14365
- consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
14366
- } catch (error) {
14367
- await updateExtractionAuditRecord(input.aiexDir, audit.id, {
14368
- status: "failed",
14369
- outputPath: result.outputPath,
14370
- outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
14371
- tablesInserted: result.tablesInserted,
14372
- tokensUsed: result.tokensUsed,
14373
- error: error instanceof Error ? error.message : String(error)
14374
- });
14375
- consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
14376
- return false;
14377
- }
14378
- await updateExtractionAuditRecord(input.aiexDir, audit.id, {
14379
- status: "succeeded",
14380
- outputPath: result.outputPath,
14381
- outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
14382
- tablesInserted: result.tablesInserted,
14383
- notionPages,
14384
- tokensUsed: result.tokensUsed
14385
- });
14386
- return true;
14387
- }
14388
14653
  const historyCommand = defineCommand({
14389
14654
  meta: {
14390
14655
  name: "history",
@@ -14462,23 +14727,19 @@ const retryCommand = defineCommand({
14462
14727
  const modelOverride = resolveModelOverride(aiConfig, record.modelName);
14463
14728
  if (modelOverride === null) return;
14464
14729
  try {
14465
- const sourceInput = record.source.type === "file" ? await readExtractFileInput(record.source.filePath || "", aiConfig) : {
14466
- text: record.source.text || "",
14467
- filePath: void 0
14468
- };
14469
- if (!await runAuditedSingleExtraction({
14730
+ const result = await runAuditedExtraction({
14470
14731
  aiexDir,
14471
14732
  config,
14472
14733
  aiConfig,
14473
14734
  schemaName: record.schemaName,
14474
- text: sourceInput.text,
14475
- filePath: sourceInput.filePath,
14476
14735
  source: record.source,
14477
14736
  modelOverride,
14478
14737
  retryOf: record.id,
14479
- insert: !args.noInsert
14480
- })) {
14481
- failCommand();
14738
+ insert: !args.noInsert,
14739
+ force: true
14740
+ });
14741
+ if (!result.success) {
14742
+ failCommand(result.error);
14482
14743
  return;
14483
14744
  }
14484
14745
  outro("Done!");
@@ -14531,11 +14792,6 @@ const extractCommand = defineCommand({
14531
14792
  alias: "s",
14532
14793
  description: "Schema name (without .json extension)"
14533
14794
  },
14534
- text: {
14535
- type: "string",
14536
- alias: "t",
14537
- description: "Text content to extract"
14538
- },
14539
14795
  file: {
14540
14796
  type: "string",
14541
14797
  alias: "f",
@@ -14560,6 +14816,11 @@ const extractCommand = defineCommand({
14560
14816
  type: "boolean",
14561
14817
  description: "Extract and save JSON without inserting into SQLite",
14562
14818
  default: false
14819
+ },
14820
+ force: {
14821
+ type: "boolean",
14822
+ description: "Force re-extraction even if the file has already been processed successfully",
14823
+ default: false
14563
14824
  }
14564
14825
  },
14565
14826
  async run({ args, rawArgs }) {
@@ -14567,10 +14828,6 @@ const extractCommand = defineCommand({
14567
14828
  intro(pc.inverse(" aiex extract "));
14568
14829
  const config = createMigrationConfig(process.cwd());
14569
14830
  const aiexDir = path.dirname(config.schemaPath);
14570
- if (args.dir && args.text) {
14571
- failCommand("Cannot combine -t/--text with -d/--dir");
14572
- return;
14573
- }
14574
14831
  if (args.dir && args.file) {
14575
14832
  failCommand("Cannot combine -f/--file with -d/--dir");
14576
14833
  return;
@@ -14579,7 +14836,7 @@ const extractCommand = defineCommand({
14579
14836
  if (!aiConfig) return;
14580
14837
  const modelOverride = resolveModelOverride(aiConfig, args.model);
14581
14838
  if (modelOverride === null) return;
14582
- if (!args.schema && !args.text && !args.file && !args.dir) {
14839
+ if (!args.schema && !args.file && !args.dir) {
14583
14840
  if (await runInteractive(aiexDir, config, aiConfig, modelOverride)) outro("Done!");
14584
14841
  return;
14585
14842
  }
@@ -14588,13 +14845,16 @@ const extractCommand = defineCommand({
14588
14845
  failCommand("Schema name (-s) is required in batch mode");
14589
14846
  return;
14590
14847
  }
14591
- const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, { insert: !args.noInsert });
14592
- if (!result.ok) {
14593
- failCommand(result.error);
14848
+ const result$1 = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, {
14849
+ insert: !args.noInsert,
14850
+ force: args.force
14851
+ });
14852
+ if (!result$1.ok) {
14853
+ failCommand(result$1.error);
14594
14854
  return;
14595
14855
  }
14596
- if (result.failCount > 0) process.exitCode = 1;
14597
- if (result.failCount > 0) outro(`Completed with failures (${result.failCount} failed)`);
14856
+ if (result$1.failCount > 0) process.exitCode = 1;
14857
+ if (result$1.failCount > 0) outro(`Completed with failures (${result$1.failCount} failed)`);
14598
14858
  else outro("Done!");
14599
14859
  return;
14600
14860
  }
@@ -14602,44 +14862,26 @@ const extractCommand = defineCommand({
14602
14862
  failCommand("Please provide a schema name (-s) to extract from");
14603
14863
  return;
14604
14864
  }
14605
- if (!args.text && !args.file) {
14606
- failCommand("Please provide text (-t) or a file (-f) to extract from");
14865
+ if (!args.file) {
14866
+ failCommand("Please provide a file (-f) to extract from");
14607
14867
  return;
14608
14868
  }
14609
- if (args.text && args.file) {
14610
- failCommand("-t and -f cannot be used together");
14611
- return;
14612
- }
14613
- let text$1 = "";
14614
- let filePath;
14615
- if (args.file) try {
14616
- const input = await readExtractFileInput(args.file, aiConfig, modelOverride);
14617
- text$1 = input.text;
14618
- filePath = input.filePath;
14619
- } catch (e) {
14620
- failCommand(`Cannot read file: ${args.file} — ${e instanceof Error ? e.message : String(e)}`);
14621
- return;
14622
- }
14623
- else if (args.text) text$1 = args.text;
14624
- if (!await runAuditedSingleExtraction({
14869
+ const result = await runAuditedExtraction({
14625
14870
  aiexDir,
14626
14871
  config,
14627
14872
  aiConfig,
14628
14873
  schemaName: args.schema,
14629
- text: text$1,
14630
- filePath,
14631
- source: filePath ? {
14874
+ source: {
14632
14875
  type: "file",
14633
- filePath: args.file,
14634
- fileName: path.basename(args.file)
14635
- } : {
14636
- type: "text",
14637
- text: text$1
14876
+ filePath: args.file
14638
14877
  },
14639
14878
  modelOverride,
14640
- insert: !args.noInsert
14641
- })) {
14642
- failCommand();
14879
+ insert: !args.noInsert,
14880
+ force: args.force,
14881
+ quiet: false
14882
+ });
14883
+ if (!result.success) {
14884
+ failCommand(result.error);
14643
14885
  return;
14644
14886
  }
14645
14887
  outro("Done!");
@@ -14664,83 +14906,42 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
14664
14906
  }
14665
14907
  const inputSource = await select({
14666
14908
  message: "Choose input source:",
14667
- options: [
14668
- {
14669
- label: "Text content",
14670
- value: "text",
14671
- hint: "Paste or type text directly"
14672
- },
14673
- {
14674
- label: "Single file",
14675
- value: "file",
14676
- hint: "Extract from a file (txt, pdf, image)"
14677
- },
14678
- {
14679
- label: "Batch directory",
14680
- value: "dir",
14681
- hint: "Extract all supported files in a directory"
14682
- }
14683
- ]
14909
+ options: [{
14910
+ label: "Single file",
14911
+ value: "file",
14912
+ hint: "Extract from a file (txt, pdf, image)"
14913
+ }, {
14914
+ label: "Batch directory",
14915
+ value: "dir",
14916
+ hint: "Extract all supported files in a directory"
14917
+ }]
14684
14918
  });
14685
14919
  if (isCancel(inputSource)) {
14686
14920
  cancel("Cancelled");
14687
14921
  return false;
14688
14922
  }
14689
- if (inputSource === "text") {
14690
- const textContent = await text({
14691
- message: "Enter text content to extract:",
14923
+ if (inputSource === "file") {
14924
+ const filePathStr = await text({
14925
+ message: "Enter file path:",
14692
14926
  validate(value) {
14693
- if (!value || value.trim().length === 0) return "Please enter some text";
14927
+ if (!value || value.trim().length === 0) return "Please enter a file path";
14694
14928
  }
14695
14929
  });
14696
- if (isCancel(textContent)) {
14930
+ if (isCancel(filePathStr)) {
14697
14931
  cancel("Cancelled");
14698
14932
  return false;
14699
14933
  }
14700
- return runAuditedSingleExtraction({
14934
+ return (await runAuditedExtraction({
14701
14935
  aiexDir,
14702
14936
  config,
14703
14937
  aiConfig,
14704
14938
  schemaName,
14705
- text: textContent,
14706
14939
  source: {
14707
- type: "text",
14708
- text: textContent
14940
+ type: "file",
14941
+ filePath: filePathStr
14709
14942
  },
14710
14943
  modelOverride
14711
- });
14712
- } else if (inputSource === "file") {
14713
- const filePathStr = await text({
14714
- message: "Enter file path:",
14715
- validate(value) {
14716
- if (!value || value.trim().length === 0) return "Please enter a file path";
14717
- }
14718
- });
14719
- if (isCancel(filePathStr)) {
14720
- cancel("Cancelled");
14721
- return false;
14722
- }
14723
- const fp = filePathStr;
14724
- try {
14725
- const input = await readExtractFileInput(fp, aiConfig, modelOverride);
14726
- return runAuditedSingleExtraction({
14727
- aiexDir,
14728
- config,
14729
- aiConfig,
14730
- schemaName,
14731
- text: input.text,
14732
- filePath: input.filePath,
14733
- source: {
14734
- type: "file",
14735
- filePath: fp,
14736
- fileName: path.basename(fp)
14737
- },
14738
- modelOverride
14739
- });
14740
- } catch (e) {
14741
- consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
14742
- return false;
14743
- }
14944
+ })).success;
14744
14945
  } else if (inputSource === "dir") {
14745
14946
  const dirPath = await text({
14746
14947
  message: "Enter directory path:",
@@ -14942,6 +15143,231 @@ const schemaCommand = defineCommand({
14942
15143
  }
14943
15144
  });
14944
15145
 
15146
+ //#endregion
15147
+ //#region src/core/watch-service.ts
15148
+ const PDF_EXT_REGEXP = /\.pdf$/i;
15149
+ const SUPPORTED_EXTENSIONS = new Set([
15150
+ "png",
15151
+ "jpg",
15152
+ "jpeg",
15153
+ "gif",
15154
+ "webp",
15155
+ "bmp",
15156
+ "svg",
15157
+ "pdf",
15158
+ "txt",
15159
+ "md",
15160
+ "csv",
15161
+ "json",
15162
+ "html",
15163
+ "xml",
15164
+ "yaml",
15165
+ "yml"
15166
+ ]);
15167
+ var WatchRegistry = class {
15168
+ registryPath;
15169
+ constructor(aiexDir) {
15170
+ this.registryPath = path.join(aiexDir, "watch-registry.json");
15171
+ }
15172
+ async load() {
15173
+ try {
15174
+ const content = await fs.readFile(this.registryPath, "utf-8");
15175
+ return JSON.parse(content);
15176
+ } catch {
15177
+ return { processed: {} };
15178
+ }
15179
+ }
15180
+ async save(data) {
15181
+ await fs.mkdir(path.dirname(this.registryPath), { recursive: true });
15182
+ await fs.writeFile(this.registryPath, JSON.stringify(data, null, 2), "utf-8");
15183
+ }
15184
+ async markSucceeded(hash, filePath) {
15185
+ const data = await this.load();
15186
+ data.processed[hash] = {
15187
+ filePath,
15188
+ fileName: path.basename(filePath),
15189
+ processedAt: (/* @__PURE__ */ new Date()).toISOString(),
15190
+ status: "succeeded"
15191
+ };
15192
+ await this.save(data);
15193
+ }
15194
+ async markFailed(hash, filePath, error) {
15195
+ const data = await this.load();
15196
+ data.processed[hash] = {
15197
+ filePath,
15198
+ fileName: path.basename(filePath),
15199
+ processedAt: (/* @__PURE__ */ new Date()).toISOString(),
15200
+ status: "failed",
15201
+ error
15202
+ };
15203
+ await this.save(data);
15204
+ }
15205
+ async getStatus(hash) {
15206
+ return (await this.load()).processed[hash]?.status ?? null;
15207
+ }
15208
+ };
15209
+ async function notifySuccess(fileName) {
15210
+ if (process.platform === "darwin") try {
15211
+ await execa("osascript", ["-e", `display notification "Successfully processed and inserted data." with title "AIEX Watch: ${fileName}"`]);
15212
+ await execa("afplay", ["/System/Library/Sounds/Glass.aiff"]);
15213
+ } catch {}
15214
+ else process.stdout.write("\x07");
15215
+ }
15216
+ async function notifyFailure(fileName, errorMessage) {
15217
+ if (process.platform === "darwin") try {
15218
+ await execa("osascript", ["-e", `display notification "${errorMessage.replace(/"/g, "\\\"")}" with title "AIEX Watch Failed: ${fileName}"`]);
15219
+ await execa("afplay", ["/System/Library/Sounds/Basso.aiff"]);
15220
+ } catch {}
15221
+ else process.stdout.write("\x07\x07");
15222
+ }
15223
+ function startWatcher(options) {
15224
+ const { aiexDir, config, aiConfig, schemaName, watchDir, modelOverride, insert } = options;
15225
+ const queueDirActive = path.join(aiexDir, "watch-queue", "active");
15226
+ const queueDirFailed = path.join(aiexDir, "watch-queue", "failed");
15227
+ const registry$2 = new WatchRegistry(aiexDir);
15228
+ fs$1.mkdirSync(queueDirActive, { recursive: true });
15229
+ fs$1.mkdirSync(queueDirFailed, { recursive: true });
15230
+ consola.info(pc.green(`Starting watch on folder: ${pc.cyan(watchDir)}`));
15231
+ consola.info(pc.green(`Schema: ${pc.cyan(schemaName)}`));
15232
+ if (modelOverride) consola.info(pc.green(`Model Override: ${pc.cyan(modelOverride.name)}`));
15233
+ const watcher = chokidar.watch(watchDir, {
15234
+ persistent: true,
15235
+ ignoreInitial: false,
15236
+ awaitWriteFinish: {
15237
+ stabilityThreshold: 2e3,
15238
+ pollInterval: 500
15239
+ }
15240
+ });
15241
+ watcher.on("add", async (filePath) => {
15242
+ const resolvedPath = path.resolve(filePath);
15243
+ if (resolvedPath.startsWith(path.resolve(aiexDir))) return;
15244
+ const stat = await fs.stat(resolvedPath).catch(() => null);
15245
+ if (!stat || !stat.isFile()) return;
15246
+ const ext = path.extname(resolvedPath).toLowerCase().replace(".", "");
15247
+ if (!SUPPORTED_EXTENSIONS.has(ext)) {
15248
+ consola.warn(`[Watcher] Skipped unsupported file type: ${path.basename(resolvedPath)}`);
15249
+ return;
15250
+ }
15251
+ const fileName = path.basename(resolvedPath);
15252
+ consola.info(`[Watcher] New file detected: ${pc.cyan(fileName)}. Processing...`);
15253
+ try {
15254
+ const hash = await getFileHash(resolvedPath);
15255
+ if (await registry$2.getStatus(hash) === "succeeded") {
15256
+ consola.info(`[Watcher] File ${pc.cyan(fileName)} (hash: ${hash.slice(0, 8)}) has already been processed successfully. Skipping.`);
15257
+ return;
15258
+ }
15259
+ const activeQueuePath = path.join(queueDirActive, `${hash}.${ext}`);
15260
+ await fs.copyFile(resolvedPath, activeQueuePath);
15261
+ if (await processOneFile(aiexDir, config, aiConfig, schemaName, activeQueuePath, modelOverride, { insert })) {
15262
+ await registry$2.markSucceeded(hash, resolvedPath);
15263
+ await fs.rm(activeQueuePath, { force: true }).catch(() => {});
15264
+ await fs.rm(activeQueuePath.replace(PDF_EXT_REGEXP, ".md"), { force: true }).catch(() => {});
15265
+ consola.success(`[Watcher] File processed successfully: ${pc.green(fileName)}`);
15266
+ await notifySuccess(fileName);
15267
+ } else {
15268
+ const errorMsg = "Extraction failed. See extraction audit history.";
15269
+ await registry$2.markFailed(hash, resolvedPath, errorMsg);
15270
+ const failedQueuePath = path.join(queueDirFailed, `${hash}-${Date.now()}.${ext}`);
15271
+ await fs.rename(activeQueuePath, failedQueuePath).catch(() => {});
15272
+ await fs.rm(activeQueuePath.replace(PDF_EXT_REGEXP, ".md"), { force: true }).catch(() => {});
15273
+ consola.error(`[Watcher] Processing failed for: ${pc.red(fileName)}`);
15274
+ await notifyFailure(fileName, errorMsg);
15275
+ }
15276
+ } catch (e) {
15277
+ const errorMsg = e instanceof Error ? e.message : String(e);
15278
+ consola.error(`[Watcher] Error processing file ${fileName}: ${errorMsg}`);
15279
+ await notifyFailure(fileName, errorMsg);
15280
+ }
15281
+ });
15282
+ watcher.on("error", (error) => {
15283
+ consola.error(`[Watcher] Watcher error: ${error?.message || String(error)}`);
15284
+ });
15285
+ return watcher;
15286
+ }
15287
+
15288
+ //#endregion
15289
+ //#region src/commands/watch.ts
15290
+ const watchCommand = defineCommand({
15291
+ meta: {
15292
+ name: "watch",
15293
+ description: "Watch a directory for new files and automatically extract data"
15294
+ },
15295
+ args: {
15296
+ schema: {
15297
+ type: "string",
15298
+ alias: "s",
15299
+ description: "Schema name (without .json extension) to use for extraction"
15300
+ },
15301
+ dir: {
15302
+ type: "string",
15303
+ alias: "d",
15304
+ description: "Directory path to watch for incoming files"
15305
+ },
15306
+ model: {
15307
+ type: "string",
15308
+ alias: "m",
15309
+ description: "AI model to use for extraction (overrides default/auto-selected model)"
15310
+ },
15311
+ noInsert: {
15312
+ type: "boolean",
15313
+ description: "Extract and save JSON without inserting into SQLite database",
15314
+ default: false
15315
+ }
15316
+ },
15317
+ async run({ args }) {
15318
+ intro(pc.inverse(" aiex watch "));
15319
+ if (!args.schema) {
15320
+ failCommand("Schema name (-s) is required");
15321
+ return;
15322
+ }
15323
+ if (!args.dir) {
15324
+ failCommand("Watch directory path (-d) is required");
15325
+ return;
15326
+ }
15327
+ const config = createMigrationConfig(process.cwd());
15328
+ const aiexDir = path.dirname(config.schemaPath);
15329
+ const schemaLoad = await loadSchema(config, args.schema);
15330
+ if (!schemaLoad.schema) {
15331
+ failCommand(schemaLoad.error || `Schema file for "${args.schema}" not found`);
15332
+ return;
15333
+ }
15334
+ let watchDirStat;
15335
+ try {
15336
+ watchDirStat = fs$1.statSync(args.dir);
15337
+ } catch (e) {
15338
+ failCommand(`Watch directory does not exist: ${args.dir} — ${e instanceof Error ? e.message : String(e)}`);
15339
+ return;
15340
+ }
15341
+ if (!watchDirStat.isDirectory()) {
15342
+ failCommand(`Watch path is not a directory: ${args.dir}`);
15343
+ return;
15344
+ }
15345
+ const watchDirAbs = path.resolve(args.dir);
15346
+ const aiConfig = await loadConfiguredAI(aiexDir);
15347
+ if (!aiConfig) return;
15348
+ const modelOverride = resolveModelOverride(aiConfig, args.model);
15349
+ if (modelOverride === null) return;
15350
+ const watcher = startWatcher({
15351
+ aiexDir,
15352
+ config,
15353
+ aiConfig,
15354
+ schemaName: args.schema,
15355
+ watchDir: watchDirAbs,
15356
+ modelOverride,
15357
+ insert: !args.noInsert
15358
+ });
15359
+ const cleanup = async () => {
15360
+ consola.info("\nStopping watch directory daemon...");
15361
+ await watcher.close();
15362
+ consola.success("Daemon stopped.");
15363
+ process.exit(0);
15364
+ };
15365
+ process.on("SIGINT", cleanup);
15366
+ process.on("SIGTERM", cleanup);
15367
+ consola.info("Press Ctrl+C to stop");
15368
+ }
15369
+ });
15370
+
14945
15371
  //#endregion
14946
15372
  //#region src/server/routes/ai.ts
14947
15373
  const JSON_EXT_RE = /\.json$/i;
@@ -15078,7 +15504,8 @@ const tableQuerySchema = z.object({
15078
15504
  pageSize: z.coerce.number().int().min(1).max(500).catch(50),
15079
15505
  search: z.string().catch(""),
15080
15506
  sortField: z.string().optional(),
15081
- sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc"))
15507
+ sortOrder: z.preprocess((value) => typeof value === "string" ? value.toLowerCase() : value, z.enum(["asc", "desc"]).catch("asc")),
15508
+ all: z.preprocess((value) => value === "true" || value === true, z.boolean().catch(false))
15082
15509
  });
15083
15510
  function invalidParamResponse$1(message) {
15084
15511
  return (result, c) => {
@@ -15201,7 +15628,7 @@ function dataRoutes(config) {
15201
15628
  });
15202
15629
  app.get("/data/tables/:name", zValidator("param", tableParamSchema, invalidParamResponse$1("Invalid table name")), zValidator("query", tableQuerySchema), async (c) => {
15203
15630
  const { name: tableName } = c.req.valid("param");
15204
- const { page, pageSize, search, sortField, sortOrder } = c.req.valid("query");
15631
+ const { page, pageSize, search, sortField, sortOrder, all } = c.req.valid("query");
15205
15632
  let db;
15206
15633
  try {
15207
15634
  db = createReadonlyQueryDb(config.databasePath);
@@ -15232,15 +15659,20 @@ function dataRoutes(config) {
15232
15659
  ${searchCondition}
15233
15660
  `.execute(db)).rows[0]?.count ?? 0;
15234
15661
  const offset = (page - 1) * pageSize;
15235
- const totalPages = Math.max(1, Math.ceil(total / pageSize));
15236
- const result = await sql`
15237
- select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15238
- from ${sql.table(tableName)}
15239
- ${searchCondition}
15240
- ${orderBy}
15241
- limit ${pageSize}
15242
- offset ${offset}
15243
- `.execute(db);
15662
+ const totalPages = all ? 1 : Math.max(1, Math.ceil(total / pageSize));
15663
+ const result = all ? await sql`
15664
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15665
+ from ${sql.table(tableName)}
15666
+ ${searchCondition}
15667
+ ${orderBy}
15668
+ `.execute(db) : await sql`
15669
+ select rowid as ${sql.raw(INTERNAL_ROWID_COLUMN)}, *
15670
+ from ${sql.table(tableName)}
15671
+ ${searchCondition}
15672
+ ${orderBy}
15673
+ limit ${pageSize}
15674
+ offset ${offset}
15675
+ `.execute(db);
15244
15676
  const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
15245
15677
  const rowActions = Object.fromEntries(result.rows.map((row, index) => {
15246
15678
  const rowId = row[INTERNAL_ROWID_COLUMN];
@@ -15248,14 +15680,27 @@ function dataRoutes(config) {
15248
15680
  return action ? [String(index), action] : null;
15249
15681
  }).filter((entry) => !!entry));
15250
15682
  const rows = result.rows.map(({ [INTERNAL_ROWID_COLUMN]: _rowid, ...row }) => row);
15683
+ const schemaDir = config.schemaPath;
15684
+ let schema = null;
15685
+ try {
15686
+ const schemaFiles = (await fs.readdir(schemaDir)).filter((f) => f.endsWith(".json"));
15687
+ for (const file of schemaFiles) {
15688
+ const s = await readFile(path.join(schemaDir, file));
15689
+ if (s.table?.name === tableName) {
15690
+ schema = s;
15691
+ break;
15692
+ }
15693
+ }
15694
+ } catch {}
15251
15695
  return c.json({
15252
15696
  columns,
15253
15697
  rows,
15254
15698
  rowActions,
15255
15699
  total,
15256
- page,
15257
- pageSize,
15258
- totalPages
15700
+ page: all ? 1 : page,
15701
+ pageSize: all ? total : pageSize,
15702
+ totalPages,
15703
+ schema
15259
15704
  });
15260
15705
  } catch (error) {
15261
15706
  return c.json({ error: error instanceof Error ? error.message : String(error) }, 500);
@@ -15368,17 +15813,6 @@ function jsonResponse(body, status) {
15368
15813
  headers: { "content-type": "application/json" }
15369
15814
  });
15370
15815
  }
15371
- async function auditFailureResponse(aiexDir, auditId, error, status) {
15372
- const record = await updateExtractionAuditRecord(aiexDir, auditId, {
15373
- status: "failed",
15374
- error
15375
- });
15376
- return jsonResponse({
15377
- success: false,
15378
- error: record.error,
15379
- auditId: record.id
15380
- }, status);
15381
- }
15382
15816
  async function saveUploadToFile(file, uploadsDir, id) {
15383
15817
  validateFileUpload(file);
15384
15818
  await fs.mkdir(uploadsDir, { recursive: true });
@@ -15387,62 +15821,6 @@ async function saveUploadToFile(file, uploadsDir, id) {
15387
15821
  await fs.writeFile(filePath, buffer);
15388
15822
  return filePath;
15389
15823
  }
15390
- async function executeAuditedExtraction(input) {
15391
- const aiConfig = await readAIConfig(input.aiexDir);
15392
- if (!aiConfig) return auditFailureResponse(input.aiexDir, input.auditId, "AI configuration not found. Configure AI settings first.", 400);
15393
- if (!aiConfig.provider.apiKey) return auditFailureResponse(input.aiexDir, input.auditId, "API Key not configured. Configure AI settings first.", 400);
15394
- if (!aiConfig.provider.models?.length) return auditFailureResponse(input.aiexDir, input.auditId, "No models configured. Add at least one model in AI Settings.", 400);
15395
- const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
15396
- if (input.modelName && !modelOverride) return auditFailureResponse(input.aiexDir, input.auditId, `Model "${input.modelName}" not found in AI settings`, 400);
15397
- let inputText = input.text;
15398
- let inputFilePath = input.filePath;
15399
- if (input.filePath) try {
15400
- const source = await readExtractFileInput(input.filePath, aiConfig);
15401
- inputText = source.text;
15402
- inputFilePath = source.filePath;
15403
- } catch (error) {
15404
- if (isMissingUploadFileError(error)) return auditFailureResponse(input.aiexDir, input.auditId, MISSING_UPLOAD_FILE_TEXT, 400);
15405
- throw error;
15406
- }
15407
- const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
15408
- if (!result.success) return auditFailureResponse(input.aiexDir, input.auditId, result.error || "Extraction failed", 500);
15409
- const notionPages = [];
15410
- if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
15411
- if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
15412
- notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
15413
- } catch (error) {
15414
- const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15415
- status: "failed",
15416
- outputPath: result.outputPath,
15417
- outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15418
- tablesInserted: result.tablesInserted,
15419
- tokensUsed: result.tokensUsed,
15420
- error: error instanceof Error ? error.message : String(error)
15421
- });
15422
- return jsonResponse({
15423
- success: false,
15424
- error: record$1.error,
15425
- auditId: record$1.id
15426
- }, 500);
15427
- }
15428
- const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15429
- status: "succeeded",
15430
- outputPath: result.outputPath,
15431
- outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15432
- tablesInserted: result.tablesInserted,
15433
- notionPages: notionPages.length > 0 ? notionPages : void 0,
15434
- tokensUsed: result.tokensUsed
15435
- });
15436
- return jsonResponse({
15437
- success: true,
15438
- outputPath: record.outputPath,
15439
- outputName: record.outputName,
15440
- tablesInserted: record.tablesInserted,
15441
- notionPages: record.notionPages,
15442
- tokensUsed: record.tokensUsed,
15443
- auditId: record.id
15444
- }, 200);
15445
- }
15446
15824
  function extractRoutes(config) {
15447
15825
  const app = new Hono();
15448
15826
  const aiexDir = path.dirname(config.schemaPath);
@@ -15469,51 +15847,73 @@ function extractRoutes(config) {
15469
15847
  success: false,
15470
15848
  error: "Text and file input cannot be used together"
15471
15849
  }, 400);
15472
- const audit = await createExtractionAuditRecord(aiexDir, {
15473
- schemaName,
15474
- modelName,
15475
- source: file ? {
15476
- type: "file",
15477
- fileName: safeUploadName(file.name)
15478
- } : {
15479
- type: "text",
15480
- text: text$1
15481
- }
15482
- });
15483
- let filePath;
15850
+ let source;
15484
15851
  if (file) {
15852
+ const uploadId = `upload-${Date.now()}`;
15853
+ let filePath;
15485
15854
  try {
15486
- filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15855
+ filePath = await saveUploadToFile(file, uploadsDir, uploadId);
15487
15856
  } catch (e) {
15488
- if (e instanceof FileValidationError) {
15489
- await updateExtractionAuditRecord(aiexDir, audit.id, {
15490
- status: "failed",
15491
- error: e.message
15492
- });
15493
- return c.json({
15494
- success: false,
15495
- error: e.message,
15496
- auditId: audit.id
15497
- }, 400);
15498
- }
15857
+ if (e instanceof FileValidationError) return c.json({
15858
+ success: false,
15859
+ error: e.message
15860
+ }, 400);
15499
15861
  throw e;
15500
15862
  }
15501
- await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15863
+ source = {
15502
15864
  type: "file",
15503
- filePath,
15504
- fileName: path.basename(filePath)
15505
- } });
15506
- }
15507
- return executeAuditedExtraction({
15865
+ filePath
15866
+ };
15867
+ } else source = {
15868
+ type: "text",
15869
+ text: text$1
15870
+ };
15871
+ const aiConfig = await readAIConfig(aiexDir);
15872
+ if (!aiConfig) return c.json({
15873
+ success: false,
15874
+ error: "AI configuration not found. Configure AI settings first."
15875
+ }, 400);
15876
+ if (!aiConfig.provider.apiKey) return c.json({
15877
+ success: false,
15878
+ error: "API Key not configured. Configure AI settings first."
15879
+ }, 400);
15880
+ if (!aiConfig.provider.models?.length) return c.json({
15881
+ success: false,
15882
+ error: "No models configured. Add at least one model in AI Settings."
15883
+ }, 400);
15884
+ const modelOverride = modelName ? aiConfig.provider.models.find((model) => model.name === modelName) : void 0;
15885
+ if (modelName && !modelOverride) return c.json({
15886
+ success: false,
15887
+ error: `Model "${modelName}" not found in AI settings`
15888
+ }, 400);
15889
+ const result = await runAuditedExtraction({
15508
15890
  aiexDir,
15509
- config,
15510
- auditId: audit.id,
15891
+ config: createMigrationConfig(path.dirname(aiexDir)),
15892
+ aiConfig,
15511
15893
  schemaName,
15512
- text: text$1,
15513
- filePath,
15514
- modelName
15894
+ source,
15895
+ modelOverride,
15896
+ quiet: true
15515
15897
  });
15898
+ if (!result.success) return jsonResponse({
15899
+ success: false,
15900
+ error: result.error,
15901
+ auditId: result.auditId
15902
+ }, 500);
15903
+ return jsonResponse({
15904
+ success: true,
15905
+ outputPath: result.outputPath,
15906
+ outputName: result.outputName,
15907
+ tablesInserted: result.tablesInserted,
15908
+ notionPages: result.notionPages,
15909
+ tokensUsed: result.tokensUsed,
15910
+ auditId: result.auditId
15911
+ }, 200);
15516
15912
  } catch (error) {
15913
+ if (isMissingUploadFileError(error)) return c.json({
15914
+ success: false,
15915
+ error: MISSING_UPLOAD_FILE_TEXT
15916
+ }, 400);
15517
15917
  return c.json({
15518
15918
  success: false,
15519
15919
  error: error instanceof Error ? error.message : String(error)
@@ -15526,20 +15926,56 @@ function extractRoutes(config) {
15526
15926
  success: false,
15527
15927
  error: "Extraction record not found"
15528
15928
  }, 404);
15529
- return executeAuditedExtraction({
15929
+ const aiConfig = await readAIConfig(aiexDir);
15930
+ if (!aiConfig) return c.json({
15931
+ success: false,
15932
+ error: "AI configuration not found. Configure AI settings first."
15933
+ }, 400);
15934
+ if (!aiConfig.provider.apiKey) return c.json({
15935
+ success: false,
15936
+ error: "API Key not configured. Configure AI settings first."
15937
+ }, 400);
15938
+ if (!aiConfig.provider.models?.length) return c.json({
15939
+ success: false,
15940
+ error: "No models configured. Add at least one model in AI Settings."
15941
+ }, 400);
15942
+ const modelOverride = original.modelName ? aiConfig.provider.models.find((m) => m.name === original.modelName) : void 0;
15943
+ if (original.modelName && !modelOverride) return c.json({
15944
+ success: false,
15945
+ error: `Model "${original.modelName}" not found in AI settings`
15946
+ }, 400);
15947
+ const source = original.source.type === "file" && original.source.filePath ? {
15948
+ type: "file",
15949
+ filePath: original.source.filePath
15950
+ } : {
15951
+ type: "text",
15952
+ text: original.source.text ?? ""
15953
+ };
15954
+ const result = await runAuditedExtraction({
15530
15955
  aiexDir,
15531
- config,
15532
- auditId: (await createExtractionAuditRecord(aiexDir, {
15533
- schemaName: original.schemaName,
15534
- modelName: original.modelName,
15535
- source: original.source,
15536
- retryOf: original.id
15537
- })).id,
15956
+ config: createMigrationConfig(path.dirname(aiexDir)),
15957
+ aiConfig,
15538
15958
  schemaName: original.schemaName,
15539
- text: original.source.type === "text" ? original.source.text ?? "" : "",
15540
- filePath: original.source.type === "file" ? original.source.filePath : void 0,
15541
- modelName: original.modelName
15959
+ source,
15960
+ modelOverride,
15961
+ retryOf: original.id,
15962
+ force: true,
15963
+ quiet: true
15542
15964
  });
15965
+ if (!result.success) return jsonResponse({
15966
+ success: false,
15967
+ error: result.error,
15968
+ auditId: result.auditId
15969
+ }, 500);
15970
+ return jsonResponse({
15971
+ success: true,
15972
+ outputPath: result.outputPath,
15973
+ outputName: result.outputName,
15974
+ tablesInserted: result.tablesInserted,
15975
+ notionPages: result.notionPages,
15976
+ tokensUsed: result.tokensUsed,
15977
+ auditId: result.auditId
15978
+ }, 200);
15543
15979
  });
15544
15980
  app.delete("/extract/records/:id", async (c) => {
15545
15981
  const id = c.req.param("id");
@@ -15773,6 +16209,8 @@ const subCommands = {
15773
16209
  web: webCommand,
15774
16210
  schema: schemaCommand,
15775
16211
  extract: extractCommand,
16212
+ watch: watchCommand,
16213
+ export: exportCommand,
15776
16214
  completion: completionCommand,
15777
16215
  doctor: doctorCommand
15778
16216
  };