aiex-cli 0.0.1 → 0.0.2-beta.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.mjs CHANGED
@@ -1,4 +1,4 @@
1
- import { C as doctorDiagnosticsTableRows, _ as seedConfig, a as parseJsonSchema, b as package_default, c as getDefaultAIConfig, d as DEFAULT_MINERU_CONFIG, f as DEFAULT_PROMPT_CONFIG, g as createConfig, h as AIConfigSchema, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_TEXT, n as createMigrationConfig, o as toSnakeCase, p as PLACEHOLDER_SCHEMA, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as description, w as formatDoctorDiagnosticsJson, x as version, y as name } from "./doctor-collector-CykRm0fC.mjs";
1
+ import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-DlG_mJKG.mjs";
2
2
  import { createRequire } from "node:module";
3
3
  import fs from "node:fs/promises";
4
4
  import os from "node:os";
@@ -23,9 +23,10 @@ import { jsonrepair } from "jsonrepair";
23
23
  import fs$1 from "node:fs";
24
24
  import Database from "better-sqlite3";
25
25
  import { glob, globSync } from "tinyglobby";
26
+ import { Client, extractNotionId } from "@notionhq/client";
27
+ import { Buffer } from "node:buffer";
26
28
  import { execa } from "execa";
27
29
  import { extractText, getDocumentProxy, getMeta } from "unpdf";
28
- import { Buffer } from "node:buffer";
29
30
  import { execFile } from "node:child_process";
30
31
  import { promisify } from "node:util";
31
32
  import { serve } from "@hono/node-server";
@@ -12989,6 +12990,7 @@ function initLangfuse(config) {
12989
12990
  }
12990
12991
  const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
12991
12992
  const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
12993
+ const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
12992
12994
  function detectMimeType(filePath) {
12993
12995
  return mime.getType(filePath) ?? "application/octet-stream";
12994
12996
  }
@@ -13147,8 +13149,9 @@ async function extractStructuredData(input) {
13147
13149
  if (useTelemetry) initLangfuse(config);
13148
13150
  const provider = createOpenAICompatible({
13149
13151
  baseURL: config.provider.baseURL,
13150
- name: "qwen",
13151
- apiKey: config.provider.apiKey
13152
+ name: OPENAI_COMPATIBLE_PROVIDER_NAME,
13153
+ apiKey: config.provider.apiKey,
13154
+ supportsStructuredOutputs: useStructuredOutput
13152
13155
  });
13153
13156
  let system;
13154
13157
  let user;
@@ -13381,6 +13384,342 @@ async function savePromptSnapshot(schema, aiexDir) {
13381
13384
  return outputPath;
13382
13385
  }
13383
13386
 
13387
+ //#endregion
13388
+ //#region src/core/extraction-audit.ts
13389
+ const AUDIT_ID_RE = /^[\w.-]+$/;
13390
+ const STALE_AFTER_MS = 1800 * 1e3;
13391
+ function auditDir(aiexDir) {
13392
+ return path.join(aiexDir, "extracted", "_audit");
13393
+ }
13394
+ function auditPath(aiexDir, id) {
13395
+ return path.join(auditDir(aiexDir), `${id}.json`);
13396
+ }
13397
+ function createAuditId(schemaName) {
13398
+ return `${schemaName}-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2, 8)}`;
13399
+ }
13400
+ async function createExtractionAuditRecord(aiexDir, input) {
13401
+ const now = (/* @__PURE__ */ new Date()).toISOString();
13402
+ const record = {
13403
+ id: createAuditId(input.schemaName),
13404
+ status: "running",
13405
+ schemaName: input.schemaName,
13406
+ modelName: input.modelName || void 0,
13407
+ source: input.source,
13408
+ retryOf: input.retryOf,
13409
+ createdAt: now,
13410
+ updatedAt: now
13411
+ };
13412
+ await fs.mkdir(auditDir(aiexDir), { recursive: true });
13413
+ await writeFile(auditPath(aiexDir, record.id), record, {
13414
+ spaces: 2,
13415
+ EOL: "\n"
13416
+ });
13417
+ return record;
13418
+ }
13419
+ async function updateExtractionAuditRecord(aiexDir, id, patch) {
13420
+ const current = await readExtractionAuditRecord(aiexDir, id);
13421
+ if (!current) throw new Error(`Extraction audit record not found: ${id}`);
13422
+ const record = {
13423
+ ...current,
13424
+ ...patch,
13425
+ source: patch.source ?? current.source,
13426
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
13427
+ };
13428
+ await fs.mkdir(auditDir(aiexDir), { recursive: true });
13429
+ await writeFile(auditPath(aiexDir, id), record, {
13430
+ spaces: 2,
13431
+ EOL: "\n"
13432
+ });
13433
+ return record;
13434
+ }
13435
+ async function readExtractionAuditRecord(aiexDir, id) {
13436
+ if (!AUDIT_ID_RE.test(id)) return null;
13437
+ try {
13438
+ return await readFile(auditPath(aiexDir, id));
13439
+ } catch {
13440
+ return null;
13441
+ }
13442
+ }
13443
+ function isStale(record) {
13444
+ if (record.status !== "running") return false;
13445
+ const updated = Date.parse(record.updatedAt);
13446
+ return !Number.isNaN(updated) && Date.now() - updated > STALE_AFTER_MS;
13447
+ }
13448
+ async function markStaleIfNeeded(aiexDir, record) {
13449
+ if (!isStale(record)) return record;
13450
+ const staleRecord = {
13451
+ ...record,
13452
+ status: "stale",
13453
+ error: record.error ?? "Extraction did not finish. It may have been interrupted.",
13454
+ updatedAt: (/* @__PURE__ */ new Date()).toISOString()
13455
+ };
13456
+ await writeFile(auditPath(aiexDir, staleRecord.id), staleRecord, {
13457
+ spaces: 2,
13458
+ EOL: "\n"
13459
+ });
13460
+ return staleRecord;
13461
+ }
13462
+ async function listExtractionAuditRecords(aiexDir) {
13463
+ try {
13464
+ const dir = auditDir(aiexDir);
13465
+ const files = await fs.readdir(dir);
13466
+ return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
13467
+ try {
13468
+ return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
13469
+ } catch {
13470
+ return null;
13471
+ }
13472
+ }))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
13473
+ } catch {
13474
+ return [];
13475
+ }
13476
+ }
13477
+ function isPathInside(childPath, parentPath) {
13478
+ const relative = path.relative(path.resolve(parentPath), path.resolve(childPath));
13479
+ return !!relative && !relative.startsWith("..") && !path.isAbsolute(relative);
13480
+ }
13481
+ async function deleteExtractionAuditRecord(aiexDir, id) {
13482
+ const record = await readExtractionAuditRecord(aiexDir, id);
13483
+ if (!record) return false;
13484
+ const uploadsDir = path.join(aiexDir, "uploads");
13485
+ if (record.source.type === "file" && record.source.filePath && isPathInside(record.source.filePath, uploadsDir)) await fs.unlink(record.source.filePath).catch(() => {});
13486
+ const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
13487
+ await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
13488
+ await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
13489
+ return true;
13490
+ }
13491
+
13492
+ //#endregion
13493
+ //#region src/core/notion-sink.ts
13494
+ const RICH_TEXT_LIMIT = 2e3;
13495
+ const UUID_RE = /^[0-9a-f]{32}$/i;
13496
+ const HYPHENATED_UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
13497
+ function truncateText(value) {
13498
+ return value.length > RICH_TEXT_LIMIT ? value.slice(0, RICH_TEXT_LIMIT) : value;
13499
+ }
13500
+ function stringifyValue(value) {
13501
+ if (value === null || value === void 0) return "";
13502
+ if (typeof value === "string") return value;
13503
+ if (typeof value === "number" || typeof value === "boolean") return String(value);
13504
+ return JSON.stringify(value);
13505
+ }
13506
+ function asNumber(value) {
13507
+ if (typeof value === "number" && Number.isFinite(value)) return value;
13508
+ if (typeof value === "string" && value.trim()) {
13509
+ const parsed = Number(value);
13510
+ return Number.isFinite(parsed) ? parsed : null;
13511
+ }
13512
+ return null;
13513
+ }
13514
+ function asBoolean(value) {
13515
+ if (typeof value === "boolean") return value;
13516
+ if (typeof value === "number") return value !== 0;
13517
+ if (typeof value === "string") {
13518
+ const normalized = value.trim().toLowerCase();
13519
+ return [
13520
+ "true",
13521
+ "yes",
13522
+ "1",
13523
+ "y"
13524
+ ].includes(normalized);
13525
+ }
13526
+ return !!value;
13527
+ }
13528
+ function asDateStart(value) {
13529
+ if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString();
13530
+ if (typeof value === "number" && Number.isFinite(value)) {
13531
+ const date = new Date(value);
13532
+ return Number.isNaN(date.getTime()) ? null : date.toISOString();
13533
+ }
13534
+ if (typeof value === "string" && value.trim()) {
13535
+ const ms = Date.parse(value);
13536
+ if (Number.isNaN(ms)) return null;
13537
+ return new Date(ms).toISOString();
13538
+ }
13539
+ return null;
13540
+ }
13541
+ function asStringArray(value) {
13542
+ if (Array.isArray(value)) return value.map((item) => stringifyValue(item).trim()).filter(Boolean);
13543
+ const text$1 = stringifyValue(value).trim();
13544
+ return text$1 ? [text$1] : [];
13545
+ }
13546
+ function getValueAtPath(data, path$1) {
13547
+ if (!path$1.includes(".")) return Object.hasOwn(data, path$1) ? {
13548
+ found: true,
13549
+ value: data[path$1]
13550
+ } : {
13551
+ found: false,
13552
+ value: void 0
13553
+ };
13554
+ let current = data;
13555
+ for (const part of path$1.split(".")) {
13556
+ if (!current || typeof current !== "object" || Array.isArray(current)) return {
13557
+ found: false,
13558
+ value: void 0
13559
+ };
13560
+ const record = current;
13561
+ if (!Object.hasOwn(record, part)) return {
13562
+ found: false,
13563
+ value: void 0
13564
+ };
13565
+ current = record[part];
13566
+ }
13567
+ return {
13568
+ found: true,
13569
+ value: current
13570
+ };
13571
+ }
13572
+ function buildPropertyValue(type, value) {
13573
+ const text$1 = truncateText(stringifyValue(value));
13574
+ switch (type) {
13575
+ case "title": return { title: text$1 ? [{ text: { content: text$1 } }] : [] };
13576
+ case "rich_text": return { rich_text: text$1 ? [{ text: { content: text$1 } }] : [] };
13577
+ case "number": return { number: asNumber(value) };
13578
+ case "checkbox": return { checkbox: asBoolean(value) };
13579
+ case "date": {
13580
+ const start = asDateStart(value);
13581
+ return { date: start ? { start } : null };
13582
+ }
13583
+ case "select": {
13584
+ const name$1 = stringifyValue(value).trim();
13585
+ return { select: name$1 ? { name: name$1 } : null };
13586
+ }
13587
+ case "multi_select": return { multi_select: asStringArray(value).map((name$1) => ({ name: name$1 })) };
13588
+ case "url": return { url: text$1 || null };
13589
+ case "email": return { email: text$1 || null };
13590
+ case "phone_number": return { phone_number: text$1 || null };
13591
+ default: return null;
13592
+ }
13593
+ }
13594
+ function findTitleProperty(properties, preferred) {
13595
+ if (preferred && properties[preferred]?.type === "title") return preferred;
13596
+ return Object.entries(properties).find(([, property]) => property?.type === "title")?.[0] ?? null;
13597
+ }
13598
+ function hyphenateDatabaseId(value) {
13599
+ const id = value.replace(/-/g, "");
13600
+ if (!UUID_RE.test(id)) return value;
13601
+ return `${id.slice(0, 8)}-${id.slice(8, 12)}-${id.slice(12, 16)}-${id.slice(16, 20)}-${id.slice(20)}`;
13602
+ }
13603
+ function parseNotionDatabaseId(value) {
13604
+ const input = value.trim();
13605
+ if (!input) return "";
13606
+ const extracted = extractNotionId(input);
13607
+ if (extracted) return extracted;
13608
+ if (HYPHENATED_UUID_RE.test(input)) return input;
13609
+ if (UUID_RE.test(input)) return hyphenateDatabaseId(input);
13610
+ return input;
13611
+ }
13612
+ function normalizeFieldName(value) {
13613
+ return value.normalize("NFKC").toLowerCase().replace(/[^\p{Letter}\p{Number}]+/gu, "");
13614
+ }
13615
+ function buildMatchKeys(field) {
13616
+ return [
13617
+ field.name,
13618
+ field.title,
13619
+ field.description
13620
+ ].filter((value) => !!value?.trim()).map(normalizeFieldName).filter(Boolean);
13621
+ }
13622
+ function suggestFieldMap(schemaFields, databaseProperties) {
13623
+ const propertyByKey = /* @__PURE__ */ new Map();
13624
+ for (const propertyName of Object.keys(databaseProperties)) propertyByKey.set(normalizeFieldName(propertyName), propertyName);
13625
+ const fieldMap = {};
13626
+ for (const field of schemaFields) for (const key of buildMatchKeys(field)) {
13627
+ const propertyName = propertyByKey.get(key);
13628
+ if (propertyName) {
13629
+ fieldMap[field.name] = propertyName;
13630
+ break;
13631
+ }
13632
+ }
13633
+ return fieldMap;
13634
+ }
13635
+ function hasProperties(value) {
13636
+ return !!value && typeof value === "object" && !!value.properties && typeof value.properties === "object";
13637
+ }
13638
+ function firstDataSourceId(database) {
13639
+ return (Array.isArray(database?.data_sources) ? database.data_sources : []).find((source) => typeof source?.id === "string" && source.id.trim())?.id;
13640
+ }
13641
+ async function resolveNotionDataSource(notion, inputId) {
13642
+ const id = parseNotionDatabaseId(inputId);
13643
+ if (!id) throw new Error("Notion database or data source URL/ID is required.");
13644
+ try {
13645
+ const dataSource$1 = await notion.dataSources.retrieve({ data_source_id: id });
13646
+ if (hasProperties(dataSource$1)) return {
13647
+ databaseId: typeof dataSource$1.parent?.database_id === "string" ? dataSource$1.parent.database_id : id,
13648
+ dataSourceId: dataSource$1.id ?? id,
13649
+ properties: dataSource$1.properties,
13650
+ parent: { data_source_id: dataSource$1.id ?? id }
13651
+ };
13652
+ } catch {}
13653
+ const database = await notion.databases.retrieve({ database_id: id });
13654
+ const dataSourceId = firstDataSourceId(database);
13655
+ if (!dataSourceId) throw new Error("No data source found for this Notion database. Copy the data source link from Notion, or share the source database with the integration.");
13656
+ const dataSource = await notion.dataSources.retrieve({ data_source_id: dataSourceId });
13657
+ if (!hasProperties(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
13658
+ return {
13659
+ databaseId: database.id ?? id,
13660
+ dataSourceId: dataSource.id ?? dataSourceId,
13661
+ properties: dataSource.properties,
13662
+ parent: { data_source_id: dataSource.id ?? dataSourceId }
13663
+ };
13664
+ }
13665
+ async function inspectNotionDatabase(input) {
13666
+ if (!input.token.trim()) throw new Error("Notion integration token is required.");
13667
+ const id = parseNotionDatabaseId(input.databaseId);
13668
+ if (!id) throw new Error("Notion database or data source URL/ID is required.");
13669
+ const resolved = await resolveNotionDataSource(new Client({ auth: input.token }), id);
13670
+ const databaseProperties = resolved.properties;
13671
+ const titleProperty = findTitleProperty(databaseProperties) ?? void 0;
13672
+ return {
13673
+ databaseId: resolved.databaseId,
13674
+ dataSourceId: resolved.dataSourceId,
13675
+ titleProperty,
13676
+ properties: Object.entries(databaseProperties).map(([name$1, property]) => ({
13677
+ name: name$1,
13678
+ type: property?.type ?? "unknown"
13679
+ })).sort((a, b) => a.name.localeCompare(b.name)),
13680
+ suggestedFieldMap: suggestFieldMap(input.schemaFields, databaseProperties)
13681
+ };
13682
+ }
13683
+ function validateNotionConfig(config) {
13684
+ if (!config?.enabled) return "Notion export is not enabled. Configure Notion settings first.";
13685
+ if (!config.token.trim()) return "Notion integration token is required.";
13686
+ return null;
13687
+ }
13688
+ async function writeNotionPage(config, schemaName, data) {
13689
+ const configError = validateNotionConfig(config);
13690
+ if (configError) throw new Error(configError);
13691
+ const notionConfig = config;
13692
+ const schemaConfig = notionConfig.schemas[schemaName];
13693
+ if (!schemaConfig) throw new Error(`Notion database is not configured for schema "${schemaName}".`);
13694
+ if (!schemaConfig.databaseId.trim()) throw new Error(`Notion database ID is required for schema "${schemaName}".`);
13695
+ const notion = new Client({ auth: notionConfig.token });
13696
+ const resolved = await resolveNotionDataSource(notion, schemaConfig.databaseId);
13697
+ const databaseProperties = resolved.properties;
13698
+ const fieldMap = schemaConfig.fieldMap ?? {};
13699
+ const properties = {};
13700
+ const sourceFields = new Set([...Object.keys(data), ...Object.keys(fieldMap)]);
13701
+ for (const sourceField of sourceFields) {
13702
+ const source = getValueAtPath(data, sourceField);
13703
+ if (!source.found) continue;
13704
+ const notionPropertyName = fieldMap[sourceField] ?? sourceField;
13705
+ const notionProperty = databaseProperties[notionPropertyName];
13706
+ if (!notionProperty) continue;
13707
+ const propertyValue = buildPropertyValue(notionProperty.type, source.value);
13708
+ if (propertyValue) properties[notionPropertyName] = propertyValue;
13709
+ }
13710
+ const titleProperty = findTitleProperty(databaseProperties, schemaConfig.titleProperty);
13711
+ if (titleProperty && !properties[titleProperty]) properties[titleProperty] = buildPropertyValue("title", schemaName);
13712
+ if (Object.keys(properties).length === 0) throw new Error("No extracted fields matched Notion database properties.");
13713
+ return {
13714
+ pageId: (await notion.pages.create({
13715
+ parent: resolved.parent,
13716
+ properties
13717
+ })).id,
13718
+ databaseId: resolved.databaseId,
13719
+ dataSourceId: resolved.dataSourceId
13720
+ };
13721
+ }
13722
+
13384
13723
  //#endregion
13385
13724
  //#region src/core/pdf-converter/external.ts
13386
13725
  function applyTemplate(value, context) {
@@ -13421,7 +13760,7 @@ function formatCommandError(error, command$1) {
13421
13760
  }
13422
13761
  async function countPdfPages(input) {
13423
13762
  try {
13424
- return (await getDocumentProxy(input)).numPages;
13763
+ return (await getDocumentProxy(Buffer.isBuffer(input) ? new Uint8Array(input) : input)).numPages;
13425
13764
  } catch {
13426
13765
  return 0;
13427
13766
  }
@@ -13433,7 +13772,7 @@ var ExternalCommandPdfConverter = class {
13433
13772
  this.name = name$1;
13434
13773
  }
13435
13774
  async convert(input, filePath) {
13436
- const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), "aiex-mineru-"));
13775
+ const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), `aiex-${this.name}-`));
13437
13776
  const outputDir = path.join(tempRoot, "output");
13438
13777
  await fs.mkdir(outputDir, { recursive: true });
13439
13778
  const inputPath = filePath ?? path.join(tempRoot, "input.pdf");
@@ -13530,6 +13869,10 @@ function createPdfConverter(config) {
13530
13869
  const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
13531
13870
  return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
13532
13871
  }
13872
+ if (config.converter === "markitdown") {
13873
+ const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
13874
+ return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
13875
+ }
13533
13876
  if (config.converter === "external") {
13534
13877
  if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
13535
13878
  return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
@@ -13569,8 +13912,19 @@ const SUPPORTED_EXTENSIONS = new Set([
13569
13912
  "yml"
13570
13913
  ]);
13571
13914
  const PDF_EXT_RE = /\.pdf$/i;
13572
- const JSON_EXT_RE = /\.json$/;
13915
+ const JSON_EXT_RE$1 = /\.json$/;
13573
13916
  const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
13917
+ async function syncResultToNotion(aiConfig, schemaName, data) {
13918
+ if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
13919
+ const page = await writeNotionPage(aiConfig.notion, schemaName, data);
13920
+ return [{
13921
+ databaseId: page.databaseId,
13922
+ pageId: page.pageId
13923
+ }];
13924
+ }
13925
+ function shouldSyncNotion(aiConfig, schemaName) {
13926
+ return !!aiConfig.notion?.enabled && !!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim();
13927
+ }
13574
13928
  async function ensureDatabaseReady(dbPath, schema) {
13575
13929
  try {
13576
13930
  await fs.access(dbPath);
@@ -13628,7 +13982,7 @@ async function loadSchema(config, schemaName) {
13628
13982
  async function listSchemas(aiexDir) {
13629
13983
  try {
13630
13984
  const dir = path.join(aiexDir, "schema");
13631
- return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE, "")).sort();
13985
+ return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
13632
13986
  } catch {
13633
13987
  return [];
13634
13988
  }
@@ -13693,7 +14047,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
13693
14047
  if (!options?.quiet) s.stop("Extraction complete");
13694
14048
  if (result.outputPath && !options?.quiet) consola.success(`Result saved: ${pc.cyan(result.outputPath)}`);
13695
14049
  if (result.tokensUsed && !options?.quiet) consola.info(pc.gray(`Token usage: prompt=${result.tokensUsed.prompt}, completion=${result.tokensUsed.completion}, total=${result.tokensUsed.total}`));
13696
- if (result.data) {
14050
+ if (result.data && options?.insert !== false) {
13697
14051
  const s2 = spinner();
13698
14052
  if (!options?.quiet) s2.start("Inserting into database...");
13699
14053
  const dbError = await ensureDatabaseReady(config.databasePath, schemaLoad.schema);
@@ -13711,6 +14065,13 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
13711
14065
  const insertResult = insertExtractedData(db, schemaLoad.schema, result.data);
13712
14066
  if (insertResult.success) {
13713
14067
  if (!options?.quiet) s2.stop(`Inserted into ${insertResult.tablesInserted.length} table(s)`);
14068
+ return {
14069
+ success: true,
14070
+ outputPath: result.outputPath,
14071
+ data: result.data,
14072
+ tablesInserted: insertResult.tablesInserted,
14073
+ tokensUsed: result.tokensUsed
14074
+ };
13714
14075
  } else {
13715
14076
  if (!options?.quiet) s2.stop("Database insert failed");
13716
14077
  consola.error(insertResult.error || "Unknown error");
@@ -13731,25 +14092,74 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
13731
14092
  };
13732
14093
  }
13733
14094
  }
13734
- return { success: true };
14095
+ return {
14096
+ success: true,
14097
+ outputPath: result.outputPath,
14098
+ data: result.data,
14099
+ tokensUsed: result.tokensUsed
14100
+ };
13735
14101
  }
13736
- async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride) {
14102
+ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
14103
+ const audit = await createExtractionAuditRecord(aiexDir, {
14104
+ schemaName,
14105
+ modelName: modelOverride?.name,
14106
+ source: {
14107
+ type: "file",
14108
+ filePath,
14109
+ fileName: path.basename(filePath)
14110
+ }
14111
+ });
13737
14112
  try {
13738
14113
  const input = await readExtractFileInput(filePath, aiConfig);
13739
- const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, { quiet: false });
14114
+ const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
14115
+ quiet: false,
14116
+ insert: options?.insert
14117
+ });
13740
14118
  if (r.success) {
14119
+ let notionPages;
14120
+ if (shouldSyncNotion(aiConfig, schemaName)) try {
14121
+ notionPages = await syncResultToNotion(aiConfig, schemaName, r.data);
14122
+ consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
14123
+ } catch (error) {
14124
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
14125
+ status: "failed",
14126
+ outputPath: r.outputPath,
14127
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
14128
+ tablesInserted: r.tablesInserted,
14129
+ tokensUsed: r.tokensUsed,
14130
+ error: error instanceof Error ? error.message : String(error)
14131
+ });
14132
+ consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
14133
+ return false;
14134
+ }
14135
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
14136
+ status: "succeeded",
14137
+ outputPath: r.outputPath,
14138
+ outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
14139
+ tablesInserted: r.tablesInserted,
14140
+ notionPages,
14141
+ tokensUsed: r.tokensUsed
14142
+ });
13741
14143
  consola.success(`Processed: ${path.basename(filePath)}`);
13742
14144
  return true;
13743
14145
  } else {
14146
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
14147
+ status: "failed",
14148
+ error: r.error || "Extraction failed"
14149
+ });
13744
14150
  consola.error(`Failed: ${r.error}`);
13745
14151
  return false;
13746
14152
  }
13747
14153
  } catch (e) {
14154
+ await updateExtractionAuditRecord(aiexDir, audit.id, {
14155
+ status: "failed",
14156
+ error: e instanceof Error ? e.message : String(e)
14157
+ });
13748
14158
  consola.error(`Error processing ${path.basename(filePath)}: ${e instanceof Error ? e.message : String(e)}`);
13749
14159
  return false;
13750
14160
  }
13751
14161
  }
13752
- async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride) {
14162
+ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
13753
14163
  consola.info(`Scanning ${pc.cyan(dir)} for supported files...`);
13754
14164
  let files;
13755
14165
  try {
@@ -13774,7 +14184,7 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
13774
14184
  for (let i = 0; i < files.length; i++) {
13775
14185
  const file = files[i];
13776
14186
  consola.info(`\n[${i + 1}/${files.length}] Processing: ${pc.cyan(path.basename(file))}`);
13777
- if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride)) successCount++;
14187
+ if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, options)) successCount++;
13778
14188
  else failCount++;
13779
14189
  }
13780
14190
  consola.info(`\nBatch complete: ${pc.green(`${successCount} succeeded`)}, ${pc.red(`${failCount} failed`)}, ${files.length} total`);
@@ -13787,11 +14197,230 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
13787
14197
 
13788
14198
  //#endregion
13789
14199
  //#region src/commands/extract.ts
14200
+ function getIdArg(args) {
14201
+ if (typeof args.id === "string") return args.id;
14202
+ const positional = args._;
14203
+ return Array.isArray(positional) && typeof positional[0] === "string" ? positional[0] : "";
14204
+ }
14205
+ function isExtractSubCommand(rawArgs) {
14206
+ if (!Array.isArray(rawArgs)) return false;
14207
+ return rawArgs.some((arg) => typeof arg === "string" && [
14208
+ "history",
14209
+ "show",
14210
+ "retry",
14211
+ "rm"
14212
+ ].includes(arg));
14213
+ }
14214
+ function formatSource(source) {
14215
+ return source.type === "file" ? source.fileName || "file" : "text";
14216
+ }
14217
+ async function loadConfiguredAI(aiexDir) {
14218
+ const aiConfig = await readAIConfig(aiexDir);
14219
+ if (!aiConfig) {
14220
+ failCommand("AI configuration not found. Please run \"aiex web\" to configure AI settings first");
14221
+ return null;
14222
+ }
14223
+ if (!aiConfig.provider.apiKey) {
14224
+ failCommand("API Key not configured. Please configure AI settings in the Web interface first");
14225
+ return null;
14226
+ }
14227
+ if (!aiConfig.provider.models?.length) {
14228
+ failCommand("No models configured. Please add at least one model in AI Settings");
14229
+ return null;
14230
+ }
14231
+ return aiConfig;
14232
+ }
14233
+ function resolveModelOverride(aiConfig, modelName) {
14234
+ if (!modelName) return void 0;
14235
+ const matched = aiConfig.provider.models.find((m) => m.name === modelName);
14236
+ if (!matched) {
14237
+ failCommand(`Model "${modelName}" not found in configuration. Available models: ${aiConfig.provider.models.map((m) => m.name).join(", ")}`);
14238
+ return null;
14239
+ }
14240
+ return matched;
14241
+ }
14242
+ async function runAuditedSingleExtraction(input) {
14243
+ const audit = await createExtractionAuditRecord(input.aiexDir, {
14244
+ schemaName: input.schemaName,
14245
+ modelName: input.modelOverride?.name,
14246
+ source: input.source,
14247
+ retryOf: input.retryOf
14248
+ });
14249
+ const result = await extractSingle(input.aiexDir, input.config, input.aiConfig, input.schemaName, input.text, input.filePath, input.modelOverride, { insert: input.insert });
14250
+ if (!result.success) {
14251
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
14252
+ status: "failed",
14253
+ error: result.error || "Extraction failed"
14254
+ });
14255
+ return false;
14256
+ }
14257
+ let notionPages;
14258
+ if (input.aiConfig.notion?.enabled && input.aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
14259
+ if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
14260
+ const page = await writeNotionPage(input.aiConfig.notion, input.schemaName, result.data);
14261
+ notionPages = [{
14262
+ databaseId: page.databaseId,
14263
+ pageId: page.pageId
14264
+ }];
14265
+ consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
14266
+ } catch (error) {
14267
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
14268
+ status: "failed",
14269
+ outputPath: result.outputPath,
14270
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
14271
+ tablesInserted: result.tablesInserted,
14272
+ tokensUsed: result.tokensUsed,
14273
+ error: error instanceof Error ? error.message : String(error)
14274
+ });
14275
+ consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
14276
+ return false;
14277
+ }
14278
+ await updateExtractionAuditRecord(input.aiexDir, audit.id, {
14279
+ status: "succeeded",
14280
+ outputPath: result.outputPath,
14281
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
14282
+ tablesInserted: result.tablesInserted,
14283
+ notionPages,
14284
+ tokensUsed: result.tokensUsed
14285
+ });
14286
+ return true;
14287
+ }
14288
+ const historyCommand = defineCommand({
14289
+ meta: {
14290
+ name: "history",
14291
+ description: "List extraction audit records"
14292
+ },
14293
+ async run() {
14294
+ const config = createMigrationConfig(process.cwd());
14295
+ const records = await listExtractionAuditRecords(path.dirname(config.schemaPath));
14296
+ if (records.length === 0) {
14297
+ consola.info("No extraction history found");
14298
+ return;
14299
+ }
14300
+ for (const record of records) {
14301
+ const suffix = record.error ? ` — ${record.error}` : record.outputName ? ` — ${record.outputName}` : "";
14302
+ consola.info(`${record.status.padEnd(9)} ${record.id} ${record.schemaName} ${formatSource(record.source)}${suffix}`);
14303
+ }
14304
+ }
14305
+ });
14306
+ const showCommand = defineCommand({
14307
+ meta: {
14308
+ name: "show",
14309
+ description: "Show an extraction audit record"
14310
+ },
14311
+ args: { id: {
14312
+ type: "string",
14313
+ description: "Audit record id"
14314
+ } },
14315
+ async run({ args }) {
14316
+ const id = getIdArg(args);
14317
+ if (!id) {
14318
+ failCommand("Audit record id is required");
14319
+ return;
14320
+ }
14321
+ const config = createMigrationConfig(process.cwd());
14322
+ const record = await readExtractionAuditRecord(path.dirname(config.schemaPath), id);
14323
+ if (!record) {
14324
+ failCommand(`Extraction record not found: ${id}`);
14325
+ return;
14326
+ }
14327
+ consola.info(JSON.stringify(record, null, 2));
14328
+ }
14329
+ });
14330
+ const retryCommand = defineCommand({
14331
+ meta: {
14332
+ name: "retry",
14333
+ description: "Retry an extraction audit record"
14334
+ },
14335
+ args: {
14336
+ id: {
14337
+ type: "string",
14338
+ description: "Audit record id"
14339
+ },
14340
+ noInsert: {
14341
+ type: "boolean",
14342
+ description: "Extract and save JSON without inserting into SQLite",
14343
+ default: false
14344
+ }
14345
+ },
14346
+ async run({ args }) {
14347
+ intro(pc.inverse(" aiex extract retry "));
14348
+ const id = getIdArg(args);
14349
+ if (!id) {
14350
+ failCommand("Audit record id is required");
14351
+ return;
14352
+ }
14353
+ const config = createMigrationConfig(process.cwd());
14354
+ const aiexDir = path.dirname(config.schemaPath);
14355
+ const record = await readExtractionAuditRecord(aiexDir, id);
14356
+ if (!record) {
14357
+ failCommand(`Extraction record not found: ${id}`);
14358
+ return;
14359
+ }
14360
+ const aiConfig = await loadConfiguredAI(aiexDir);
14361
+ if (!aiConfig) return;
14362
+ const modelOverride = resolveModelOverride(aiConfig, record.modelName);
14363
+ if (modelOverride === null) return;
14364
+ try {
14365
+ const sourceInput = record.source.type === "file" ? await readExtractFileInput(record.source.filePath || "", aiConfig) : {
14366
+ text: record.source.text || "",
14367
+ filePath: void 0
14368
+ };
14369
+ if (!await runAuditedSingleExtraction({
14370
+ aiexDir,
14371
+ config,
14372
+ aiConfig,
14373
+ schemaName: record.schemaName,
14374
+ text: sourceInput.text,
14375
+ filePath: sourceInput.filePath,
14376
+ source: record.source,
14377
+ modelOverride,
14378
+ retryOf: record.id,
14379
+ insert: !args.noInsert
14380
+ })) {
14381
+ failCommand();
14382
+ return;
14383
+ }
14384
+ outro("Done!");
14385
+ } catch (error) {
14386
+ failCommand(error instanceof Error ? error.message : String(error));
14387
+ }
14388
+ }
14389
+ });
14390
+ const rmCommand = defineCommand({
14391
+ meta: {
14392
+ name: "rm",
14393
+ description: "Delete an extraction audit record and cached upload"
14394
+ },
14395
+ args: { id: {
14396
+ type: "string",
14397
+ description: "Audit record id"
14398
+ } },
14399
+ async run({ args }) {
14400
+ const id = getIdArg(args);
14401
+ if (!id) {
14402
+ failCommand("Audit record id is required");
14403
+ return;
14404
+ }
14405
+ const config = createMigrationConfig(process.cwd());
14406
+ if (!await deleteExtractionAuditRecord(path.dirname(config.schemaPath), id)) {
14407
+ failCommand(`Extraction record not found: ${id}`);
14408
+ return;
14409
+ }
14410
+ consola.success(`Deleted extraction record: ${id}`);
14411
+ }
14412
+ });
13790
14413
  const extractCommand = defineCommand({
13791
14414
  meta: {
13792
14415
  name: "extract",
13793
14416
  description: "Extract structured data from text, images, or PDFs"
13794
14417
  },
14418
+ subCommands: {
14419
+ history: historyCommand,
14420
+ show: showCommand,
14421
+ retry: retryCommand,
14422
+ rm: rmCommand
14423
+ },
13795
14424
  args: {
13796
14425
  schema: {
13797
14426
  type: "string",
@@ -13822,9 +14451,15 @@ const extractCommand = defineCommand({
13822
14451
  type: "string",
13823
14452
  alias: "g",
13824
14453
  description: "Glob pattern to filter files in batch mode (e.g. \"*.pdf\")"
14454
+ },
14455
+ noInsert: {
14456
+ type: "boolean",
14457
+ description: "Extract and save JSON without inserting into SQLite",
14458
+ default: false
13825
14459
  }
13826
14460
  },
13827
- async run({ args }) {
14461
+ async run({ args, rawArgs }) {
14462
+ if (isExtractSubCommand(rawArgs)) return;
13828
14463
  intro(pc.inverse(" aiex extract "));
13829
14464
  const config = createMigrationConfig(process.cwd());
13830
14465
  const aiexDir = path.dirname(config.schemaPath);
@@ -13836,29 +14471,10 @@ const extractCommand = defineCommand({
13836
14471
  failCommand("Cannot combine -f/--file with -d/--dir");
13837
14472
  return;
13838
14473
  }
13839
- const aiConfig = await readAIConfig(aiexDir);
13840
- if (!aiConfig) {
13841
- failCommand("AI configuration not found. Please run \"aiex web\" to configure AI settings first");
13842
- return;
13843
- }
13844
- if (!aiConfig.provider.apiKey) {
13845
- failCommand("API Key not configured. Please configure AI settings in the Web interface first");
13846
- return;
13847
- }
13848
- if (!aiConfig.provider.models?.length) {
13849
- failCommand("No models configured. Please add at least one model in AI Settings");
13850
- return;
13851
- }
13852
- let modelOverride;
13853
- if (args.model) {
13854
- const matched = aiConfig.provider.models.find((m) => m.name === args.model);
13855
- if (!matched) {
13856
- const available = aiConfig.provider.models.map((m) => m.name).join(", ");
13857
- failCommand(`Model "${args.model}" not found in configuration. Available models: ${available}`);
13858
- return;
13859
- }
13860
- modelOverride = matched;
13861
- }
14474
+ const aiConfig = await loadConfiguredAI(aiexDir);
14475
+ if (!aiConfig) return;
14476
+ const modelOverride = resolveModelOverride(aiConfig, args.model);
14477
+ if (modelOverride === null) return;
13862
14478
  if (!args.schema && !args.text && !args.file && !args.dir) {
13863
14479
  if (await runInteractive(aiexDir, config, aiConfig, modelOverride)) outro("Done!");
13864
14480
  return;
@@ -13868,7 +14484,7 @@ const extractCommand = defineCommand({
13868
14484
  failCommand("Schema name (-s) is required in batch mode");
13869
14485
  return;
13870
14486
  }
13871
- const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride);
14487
+ const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, { insert: !args.noInsert });
13872
14488
  if (!result.ok) {
13873
14489
  failCommand(result.error);
13874
14490
  return;
@@ -13901,7 +14517,24 @@ const extractCommand = defineCommand({
13901
14517
  return;
13902
14518
  }
13903
14519
  else if (args.text) text$1 = args.text;
13904
- if (!(await extractSingle(aiexDir, config, aiConfig, args.schema, text$1, filePath, modelOverride)).success) {
14520
+ if (!await runAuditedSingleExtraction({
14521
+ aiexDir,
14522
+ config,
14523
+ aiConfig,
14524
+ schemaName: args.schema,
14525
+ text: text$1,
14526
+ filePath,
14527
+ source: filePath ? {
14528
+ type: "file",
14529
+ filePath: args.file,
14530
+ fileName: path.basename(args.file)
14531
+ } : {
14532
+ type: "text",
14533
+ text: text$1
14534
+ },
14535
+ modelOverride,
14536
+ insert: !args.noInsert
14537
+ })) {
13905
14538
  failCommand();
13906
14539
  return;
13907
14540
  }
@@ -13960,7 +14593,18 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
13960
14593
  cancel("Cancelled");
13961
14594
  return false;
13962
14595
  }
13963
- return (await extractSingle(aiexDir, config, aiConfig, schemaName, textContent, void 0, modelOverride)).success;
14596
+ return runAuditedSingleExtraction({
14597
+ aiexDir,
14598
+ config,
14599
+ aiConfig,
14600
+ schemaName,
14601
+ text: textContent,
14602
+ source: {
14603
+ type: "text",
14604
+ text: textContent
14605
+ },
14606
+ modelOverride
14607
+ });
13964
14608
  } else if (inputSource === "file") {
13965
14609
  const filePathStr = await text({
13966
14610
  message: "Enter file path:",
@@ -13975,7 +14619,20 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
13975
14619
  const fp = filePathStr;
13976
14620
  try {
13977
14621
  const input = await readExtractFileInput(fp, aiConfig);
13978
- return (await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride)).success;
14622
+ return runAuditedSingleExtraction({
14623
+ aiexDir,
14624
+ config,
14625
+ aiConfig,
14626
+ schemaName,
14627
+ text: input.text,
14628
+ filePath: input.filePath,
14629
+ source: {
14630
+ type: "file",
14631
+ filePath: fp,
14632
+ fileName: path.basename(fp)
14633
+ },
14634
+ modelOverride
14635
+ });
13979
14636
  } catch (e) {
13980
14637
  consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
13981
14638
  return false;
@@ -14183,6 +14840,32 @@ const schemaCommand = defineCommand({
14183
14840
 
14184
14841
  //#endregion
14185
14842
  //#region src/server/routes/ai.ts
14843
+ const JSON_EXT_RE = /\.json$/i;
14844
+ function extractSchemaFields(schema) {
14845
+ if (!schema?.properties || typeof schema.properties !== "object") return [];
14846
+ const fields = [];
14847
+ function visitProperties(properties, prefix = "") {
14848
+ for (const [name$1, property] of Object.entries(properties)) {
14849
+ const fieldName = prefix ? `${prefix}.${name$1}` : name$1;
14850
+ if (property?.type === "object" && property?.properties && typeof property.properties === "object") {
14851
+ visitProperties(property.properties, fieldName);
14852
+ continue;
14853
+ }
14854
+ if (property?.type === "array" && property?.items?.type === "object") continue;
14855
+ fields.push({
14856
+ name: fieldName,
14857
+ title: typeof property?.title === "string" ? property.title : void 0,
14858
+ description: typeof property?.description === "string" ? property.description : void 0
14859
+ });
14860
+ }
14861
+ }
14862
+ visitProperties(schema.properties);
14863
+ return fields;
14864
+ }
14865
+ async function loadSchemaFields(config, schemaName) {
14866
+ const safeName = path.basename(schemaName).replace(JSON_EXT_RE, "");
14867
+ return extractSchemaFields(await readFile(path.join(config.schemaPath, `${safeName}.json`)));
14868
+ }
14186
14869
  function aiRoutes(config) {
14187
14870
  const app = new Hono();
14188
14871
  const aiexDir = path.dirname(config.schemaPath);
@@ -14209,6 +14892,32 @@ function aiRoutes(config) {
14209
14892
  return c.json({});
14210
14893
  }
14211
14894
  });
14895
+ app.post("/ai/notion/inspect", async (c) => {
14896
+ try {
14897
+ const body = await c.req.json();
14898
+ const token = typeof body.token === "string" ? body.token : "";
14899
+ const databaseId = typeof body.databaseId === "string" ? body.databaseId : "";
14900
+ const schemaName = typeof body.schemaName === "string" ? body.schemaName : "";
14901
+ if (!schemaName) return c.json({
14902
+ success: false,
14903
+ error: "Schema is required"
14904
+ }, 400);
14905
+ const result = await inspectNotionDatabase({
14906
+ token,
14907
+ databaseId,
14908
+ schemaFields: await loadSchemaFields(config, schemaName)
14909
+ });
14910
+ return c.json({
14911
+ success: true,
14912
+ ...result
14913
+ });
14914
+ } catch (error) {
14915
+ return c.json({
14916
+ success: false,
14917
+ error: getErrorMessage(error)
14918
+ }, 400);
14919
+ }
14920
+ });
14212
14921
  app.put("/ai/config", async (c) => {
14213
14922
  try {
14214
14923
  const body = await c.req.json();
@@ -14226,6 +14935,19 @@ function aiRoutes(config) {
14226
14935
  success: false,
14227
14936
  error: "At least one model must be configured"
14228
14937
  }, 400);
14938
+ if (body.notion?.enabled) {
14939
+ if (!body.notion.token?.trim()) return c.json({
14940
+ success: false,
14941
+ error: "Notion token is required when Notion export is enabled"
14942
+ }, 400);
14943
+ for (const [schemaName, schemaConfig] of Object.entries(body.notion.schemas ?? {})) {
14944
+ if (typeof schemaConfig.databaseId === "string") schemaConfig.databaseId = parseNotionDatabaseId(schemaConfig.databaseId);
14945
+ if (!schemaConfig.databaseId?.trim()) return c.json({
14946
+ success: false,
14947
+ error: `Notion database ID is required for schema "${schemaName}"`
14948
+ }, 400);
14949
+ }
14950
+ }
14229
14951
  await writeAIConfig(aiexDir, AIConfigSchema.parse(body));
14230
14952
  return c.json({ success: true });
14231
14953
  } catch (error) {
@@ -14241,6 +14963,7 @@ function aiRoutes(config) {
14241
14963
  //#endregion
14242
14964
  //#region src/server/routes/data.ts
14243
14965
  const FILE_REGEX = /\.json$/;
14966
+ const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
14244
14967
  const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
14245
14968
  const TIMESTAMP_TZ = /(\d{3})Z/;
14246
14969
  const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
@@ -14257,6 +14980,12 @@ function invalidParamResponse$1(message) {
14257
14980
  if (!result.success) return c.json({ error: message }, 400);
14258
14981
  };
14259
14982
  }
14983
+ function schemaNameFromExtractionFile(name$1) {
14984
+ const stem = name$1.replace(FILE_REGEX, "");
14985
+ const match = stem.match(EXTRACTION_TIMESTAMP_RE);
14986
+ if (!match || typeof match.index !== "number" || match.index <= 0) return null;
14987
+ return stem.slice(0, match.index);
14988
+ }
14260
14989
  function createReadonlyQueryDb(databasePath) {
14261
14990
  return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
14262
14991
  }
@@ -14268,22 +14997,27 @@ function dataRoutes(config) {
14268
14997
  try {
14269
14998
  await fs.mkdir(extractedDir, { recursive: true });
14270
14999
  const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
15000
+ const auditRecords = await listExtractionAuditRecords(aiexDir);
15001
+ const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
14271
15002
  const records = [];
14272
15003
  for (const file of jsonFiles) {
14273
- const name$1 = file.replace(FILE_REGEX, "");
14274
- const idx = name$1.lastIndexOf("-");
14275
- if (idx === -1) continue;
14276
- const schemaName = name$1.slice(0, idx);
14277
- const timestamp = name$1.slice(idx + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
15004
+ const schemaName = schemaNameFromExtractionFile(file);
15005
+ if (!schemaName) continue;
15006
+ const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
14278
15007
  const filePath = path.join(extractedDir, file);
14279
15008
  try {
14280
15009
  const stat = await fs.stat(filePath);
15010
+ const audit = auditByOutputName.get(file);
15011
+ const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
14281
15012
  records.push({
14282
15013
  name: file,
14283
15014
  schemaName,
14284
15015
  timestamp,
14285
15016
  fileSize: stat.size,
14286
- modifiedAt: stat.mtime.toISOString()
15017
+ modifiedAt: stat.mtime.toISOString(),
15018
+ notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
15019
+ notionPages,
15020
+ notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
14287
15021
  });
14288
15022
  } catch {
14289
15023
  continue;
@@ -14405,6 +15139,308 @@ function dataRoutes(config) {
14405
15139
  return c.json({ error: "Extraction result not found" }, 404);
14406
15140
  }
14407
15141
  });
15142
+ app.post("/data/:name/notion/retry", zValidator("param", extractionFileParamSchema, invalidParamResponse$1("Invalid extraction file name")), async (c) => {
15143
+ const { name: name$1 } = c.req.valid("param");
15144
+ const filePath = path.join(extractedDir, name$1);
15145
+ const schemaName = schemaNameFromExtractionFile(name$1);
15146
+ if (!schemaName) return c.json({
15147
+ success: false,
15148
+ error: "Cannot infer schema name from extraction file name"
15149
+ }, 400);
15150
+ const aiConfig = await readAIConfig(aiexDir);
15151
+ if (!aiConfig?.notion?.enabled) return c.json({
15152
+ success: false,
15153
+ error: "Notion export is not enabled. Configure Notion settings first."
15154
+ }, 400);
15155
+ if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) return c.json({
15156
+ success: false,
15157
+ error: `Notion database is not configured for schema "${schemaName}".`
15158
+ }, 400);
15159
+ try {
15160
+ const data = await readFile(filePath);
15161
+ if (!data || typeof data !== "object" || Array.isArray(data)) return c.json({
15162
+ success: false,
15163
+ error: "Extraction result is not a JSON object and cannot be written to Notion."
15164
+ }, 400);
15165
+ const page = await writeNotionPage(aiConfig.notion, schemaName, data);
15166
+ const notionPages = [{
15167
+ databaseId: page.databaseId,
15168
+ pageId: page.pageId
15169
+ }];
15170
+ let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15171
+ if (!record) record = await createExtractionAuditRecord(aiexDir, {
15172
+ schemaName,
15173
+ source: {
15174
+ type: "file",
15175
+ filePath,
15176
+ fileName: name$1
15177
+ }
15178
+ });
15179
+ if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15180
+ status: "succeeded",
15181
+ outputPath: filePath,
15182
+ outputName: name$1,
15183
+ notionPages,
15184
+ error: void 0
15185
+ });
15186
+ return c.json({
15187
+ success: true,
15188
+ notionPages
15189
+ });
15190
+ } catch (error) {
15191
+ const message = error instanceof Error ? error.message : String(error);
15192
+ const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
15193
+ if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
15194
+ status: "failed",
15195
+ outputPath: filePath,
15196
+ outputName: name$1,
15197
+ error: message
15198
+ });
15199
+ return c.json({
15200
+ success: false,
15201
+ error: message
15202
+ }, 500);
15203
+ }
15204
+ });
15205
+ return app;
15206
+ }
15207
+
15208
+ //#endregion
15209
+ //#region src/server/routes/extract.ts
15210
+ function getFormString(value) {
15211
+ if (Array.isArray(value)) return getFormString(value[0]);
15212
+ return typeof value === "string" ? value.trim() : "";
15213
+ }
15214
+ function getFormFile(value) {
15215
+ if (Array.isArray(value)) return getFormFile(value[0]);
15216
+ return value instanceof File && value.size > 0 ? value : null;
15217
+ }
15218
+ function safeUploadName(name$1) {
15219
+ return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
15220
+ }
15221
+ async function saveUploadToFile(file, uploadsDir, id) {
15222
+ await fs.mkdir(uploadsDir, { recursive: true });
15223
+ const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
15224
+ const buffer = Buffer.from(await file.arrayBuffer());
15225
+ await fs.writeFile(filePath, buffer);
15226
+ return filePath;
15227
+ }
15228
+ async function executeAuditedExtraction(input) {
15229
+ const aiConfig = await readAIConfig(input.aiexDir);
15230
+ if (!aiConfig) {
15231
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15232
+ status: "failed",
15233
+ error: "AI configuration not found. Configure AI settings first."
15234
+ });
15235
+ return new Response(JSON.stringify({
15236
+ success: false,
15237
+ error: record$1.error,
15238
+ auditId: record$1.id
15239
+ }), {
15240
+ status: 400,
15241
+ headers: { "content-type": "application/json" }
15242
+ });
15243
+ }
15244
+ if (!aiConfig.provider.apiKey) {
15245
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15246
+ status: "failed",
15247
+ error: "API Key not configured. Configure AI settings first."
15248
+ });
15249
+ return new Response(JSON.stringify({
15250
+ success: false,
15251
+ error: record$1.error,
15252
+ auditId: record$1.id
15253
+ }), {
15254
+ status: 400,
15255
+ headers: { "content-type": "application/json" }
15256
+ });
15257
+ }
15258
+ if (!aiConfig.provider.models?.length) {
15259
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15260
+ status: "failed",
15261
+ error: "No models configured. Add at least one model in AI Settings."
15262
+ });
15263
+ return new Response(JSON.stringify({
15264
+ success: false,
15265
+ error: record$1.error,
15266
+ auditId: record$1.id
15267
+ }), {
15268
+ status: 400,
15269
+ headers: { "content-type": "application/json" }
15270
+ });
15271
+ }
15272
+ const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
15273
+ if (input.modelName && !modelOverride) {
15274
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15275
+ status: "failed",
15276
+ error: `Model "${input.modelName}" not found in AI settings`
15277
+ });
15278
+ return new Response(JSON.stringify({
15279
+ success: false,
15280
+ error: record$1.error,
15281
+ auditId: record$1.id
15282
+ }), {
15283
+ status: 400,
15284
+ headers: { "content-type": "application/json" }
15285
+ });
15286
+ }
15287
+ let inputText = input.text;
15288
+ let inputFilePath = input.filePath;
15289
+ if (input.filePath) {
15290
+ const source = await readExtractFileInput(input.filePath, aiConfig);
15291
+ inputText = source.text;
15292
+ inputFilePath = source.filePath;
15293
+ }
15294
+ const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
15295
+ if (!result.success) {
15296
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15297
+ status: "failed",
15298
+ error: result.error || "Extraction failed"
15299
+ });
15300
+ return new Response(JSON.stringify({
15301
+ success: false,
15302
+ error: record$1.error,
15303
+ auditId: record$1.id
15304
+ }), {
15305
+ status: 500,
15306
+ headers: { "content-type": "application/json" }
15307
+ });
15308
+ }
15309
+ const notionPages = [];
15310
+ if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
15311
+ if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
15312
+ notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
15313
+ } catch (error) {
15314
+ const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15315
+ status: "failed",
15316
+ outputPath: result.outputPath,
15317
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15318
+ tablesInserted: result.tablesInserted,
15319
+ tokensUsed: result.tokensUsed,
15320
+ error: error instanceof Error ? error.message : String(error)
15321
+ });
15322
+ return new Response(JSON.stringify({
15323
+ success: false,
15324
+ error: record$1.error,
15325
+ auditId: record$1.id
15326
+ }), {
15327
+ status: 500,
15328
+ headers: { "content-type": "application/json" }
15329
+ });
15330
+ }
15331
+ const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
15332
+ status: "succeeded",
15333
+ outputPath: result.outputPath,
15334
+ outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
15335
+ tablesInserted: result.tablesInserted,
15336
+ notionPages: notionPages.length > 0 ? notionPages : void 0,
15337
+ tokensUsed: result.tokensUsed
15338
+ });
15339
+ return new Response(JSON.stringify({
15340
+ success: true,
15341
+ outputPath: record.outputPath,
15342
+ outputName: record.outputName,
15343
+ tablesInserted: record.tablesInserted,
15344
+ notionPages: record.notionPages,
15345
+ tokensUsed: record.tokensUsed,
15346
+ auditId: record.id
15347
+ }), {
15348
+ status: 200,
15349
+ headers: { "content-type": "application/json" }
15350
+ });
15351
+ }
15352
+ function extractRoutes(config) {
15353
+ const app = new Hono();
15354
+ const aiexDir = path.dirname(config.schemaPath);
15355
+ const uploadsDir = path.join(aiexDir, "uploads");
15356
+ app.get("/extract/records", async (c) => {
15357
+ return c.json(await listExtractionAuditRecords(aiexDir));
15358
+ });
15359
+ app.post("/extract", async (c) => {
15360
+ try {
15361
+ const body = await c.req.parseBody();
15362
+ const schemaName = getFormString(body.schema);
15363
+ const text$1 = getFormString(body.text);
15364
+ const modelName = getFormString(body.model);
15365
+ const file = getFormFile(body.file);
15366
+ if (!schemaName) return c.json({
15367
+ success: false,
15368
+ error: "Schema is required"
15369
+ }, 400);
15370
+ if (!text$1 && !file) return c.json({
15371
+ success: false,
15372
+ error: "Provide text or upload a file to extract"
15373
+ }, 400);
15374
+ if (text$1 && file) return c.json({
15375
+ success: false,
15376
+ error: "Text and file input cannot be used together"
15377
+ }, 400);
15378
+ const audit = await createExtractionAuditRecord(aiexDir, {
15379
+ schemaName,
15380
+ modelName,
15381
+ source: file ? {
15382
+ type: "file",
15383
+ fileName: safeUploadName(file.name)
15384
+ } : {
15385
+ type: "text",
15386
+ text: text$1
15387
+ }
15388
+ });
15389
+ let filePath;
15390
+ if (file) {
15391
+ filePath = await saveUploadToFile(file, uploadsDir, audit.id);
15392
+ await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
15393
+ type: "file",
15394
+ filePath,
15395
+ fileName: safeUploadName(file.name)
15396
+ } });
15397
+ }
15398
+ return executeAuditedExtraction({
15399
+ aiexDir,
15400
+ config,
15401
+ auditId: audit.id,
15402
+ schemaName,
15403
+ text: text$1,
15404
+ filePath,
15405
+ modelName
15406
+ });
15407
+ } catch (error) {
15408
+ return c.json({
15409
+ success: false,
15410
+ error: error instanceof Error ? error.message : String(error)
15411
+ }, 500);
15412
+ }
15413
+ });
15414
+ app.post("/extract/records/:id/retry", async (c) => {
15415
+ const original = await readExtractionAuditRecord(aiexDir, c.req.param("id"));
15416
+ if (!original) return c.json({
15417
+ success: false,
15418
+ error: "Extraction record not found"
15419
+ }, 404);
15420
+ return executeAuditedExtraction({
15421
+ aiexDir,
15422
+ config,
15423
+ auditId: (await createExtractionAuditRecord(aiexDir, {
15424
+ schemaName: original.schemaName,
15425
+ modelName: original.modelName,
15426
+ source: original.source,
15427
+ retryOf: original.id
15428
+ })).id,
15429
+ schemaName: original.schemaName,
15430
+ text: original.source.type === "text" ? original.source.text ?? "" : "",
15431
+ filePath: original.source.type === "file" ? original.source.filePath : void 0,
15432
+ modelName: original.modelName
15433
+ });
15434
+ });
15435
+ app.delete("/extract/records/:id", async (c) => {
15436
+ const id = c.req.param("id");
15437
+ if (!await readExtractionAuditRecord(aiexDir, id)) return c.json({
15438
+ success: false,
15439
+ error: "Extraction record not found"
15440
+ }, 404);
15441
+ await deleteExtractionAuditRecord(aiexDir, id);
15442
+ return c.json({ success: true });
15443
+ });
14408
15444
  return app;
14409
15445
  }
14410
15446
 
@@ -14530,6 +15566,7 @@ function createApp(config, staticDir) {
14530
15566
  app.use("*", cors({ origin: (origin) => LOCAL_ORIGIN_RE.test(origin) ? origin : null }));
14531
15567
  app.route("/api", schemaRoutes(config));
14532
15568
  app.route("/api", aiRoutes(config));
15569
+ app.route("/api", extractRoutes(config));
14533
15570
  app.route("/api", dataRoutes(config));
14534
15571
  app.use("/*", serveStatic({
14535
15572
  root: staticDir,