aiex-cli 0.0.1 → 0.0.2-beta.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -4
- package/dist/cli.mjs +1085 -48
- package/dist/{doctor-collector-CykRm0fC.mjs → doctor-collector-DlG_mJKG.mjs} +31 -4
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-DwXkpWZU.js +346 -0
- package/dist/web/assets/ExtractionViewer-DNcRCmNK.js +1 -0
- package/dist/web/assets/{api-client-D2Y_-4JM.js → api-client-CG1VV5gz.js} +1 -1
- package/dist/web/assets/index-C2Nbrhs2.css +2 -0
- package/dist/web/assets/{index-DVDVw-GK.js → index-CWSsEI38.js} +38 -38
- package/dist/web/index.html +3 -3
- package/package.json +2 -1
- package/dist/web/assets/AISettings-CI6Lgx0p.js +0 -339
- package/dist/web/assets/ExtractionViewer-CsdK1kKK.js +0 -1
- package/dist/web/assets/index-C9N8oWt4.css +0 -2
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-DlG_mJKG.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -23,9 +23,10 @@ import { jsonrepair } from "jsonrepair";
|
|
|
23
23
|
import fs$1 from "node:fs";
|
|
24
24
|
import Database from "better-sqlite3";
|
|
25
25
|
import { glob, globSync } from "tinyglobby";
|
|
26
|
+
import { Client, extractNotionId } from "@notionhq/client";
|
|
27
|
+
import { Buffer } from "node:buffer";
|
|
26
28
|
import { execa } from "execa";
|
|
27
29
|
import { extractText, getDocumentProxy, getMeta } from "unpdf";
|
|
28
|
-
import { Buffer } from "node:buffer";
|
|
29
30
|
import { execFile } from "node:child_process";
|
|
30
31
|
import { promisify } from "node:util";
|
|
31
32
|
import { serve } from "@hono/node-server";
|
|
@@ -12989,6 +12990,7 @@ function initLangfuse(config) {
|
|
|
12989
12990
|
}
|
|
12990
12991
|
const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
|
|
12991
12992
|
const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
|
|
12993
|
+
const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
|
|
12992
12994
|
function detectMimeType(filePath) {
|
|
12993
12995
|
return mime.getType(filePath) ?? "application/octet-stream";
|
|
12994
12996
|
}
|
|
@@ -13147,8 +13149,9 @@ async function extractStructuredData(input) {
|
|
|
13147
13149
|
if (useTelemetry) initLangfuse(config);
|
|
13148
13150
|
const provider = createOpenAICompatible({
|
|
13149
13151
|
baseURL: config.provider.baseURL,
|
|
13150
|
-
name:
|
|
13151
|
-
apiKey: config.provider.apiKey
|
|
13152
|
+
name: OPENAI_COMPATIBLE_PROVIDER_NAME,
|
|
13153
|
+
apiKey: config.provider.apiKey,
|
|
13154
|
+
supportsStructuredOutputs: useStructuredOutput
|
|
13152
13155
|
});
|
|
13153
13156
|
let system;
|
|
13154
13157
|
let user;
|
|
@@ -13381,6 +13384,342 @@ async function savePromptSnapshot(schema, aiexDir) {
|
|
|
13381
13384
|
return outputPath;
|
|
13382
13385
|
}
|
|
13383
13386
|
|
|
13387
|
+
//#endregion
|
|
13388
|
+
//#region src/core/extraction-audit.ts
|
|
13389
|
+
const AUDIT_ID_RE = /^[\w.-]+$/;
|
|
13390
|
+
const STALE_AFTER_MS = 1800 * 1e3;
|
|
13391
|
+
function auditDir(aiexDir) {
|
|
13392
|
+
return path.join(aiexDir, "extracted", "_audit");
|
|
13393
|
+
}
|
|
13394
|
+
function auditPath(aiexDir, id) {
|
|
13395
|
+
return path.join(auditDir(aiexDir), `${id}.json`);
|
|
13396
|
+
}
|
|
13397
|
+
function createAuditId(schemaName) {
|
|
13398
|
+
return `${schemaName}-${(/* @__PURE__ */ new Date()).toISOString().replace(/[:.]/g, "-")}-${Math.random().toString(36).slice(2, 8)}`;
|
|
13399
|
+
}
|
|
13400
|
+
async function createExtractionAuditRecord(aiexDir, input) {
|
|
13401
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
13402
|
+
const record = {
|
|
13403
|
+
id: createAuditId(input.schemaName),
|
|
13404
|
+
status: "running",
|
|
13405
|
+
schemaName: input.schemaName,
|
|
13406
|
+
modelName: input.modelName || void 0,
|
|
13407
|
+
source: input.source,
|
|
13408
|
+
retryOf: input.retryOf,
|
|
13409
|
+
createdAt: now,
|
|
13410
|
+
updatedAt: now
|
|
13411
|
+
};
|
|
13412
|
+
await fs.mkdir(auditDir(aiexDir), { recursive: true });
|
|
13413
|
+
await writeFile(auditPath(aiexDir, record.id), record, {
|
|
13414
|
+
spaces: 2,
|
|
13415
|
+
EOL: "\n"
|
|
13416
|
+
});
|
|
13417
|
+
return record;
|
|
13418
|
+
}
|
|
13419
|
+
async function updateExtractionAuditRecord(aiexDir, id, patch) {
|
|
13420
|
+
const current = await readExtractionAuditRecord(aiexDir, id);
|
|
13421
|
+
if (!current) throw new Error(`Extraction audit record not found: ${id}`);
|
|
13422
|
+
const record = {
|
|
13423
|
+
...current,
|
|
13424
|
+
...patch,
|
|
13425
|
+
source: patch.source ?? current.source,
|
|
13426
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
13427
|
+
};
|
|
13428
|
+
await fs.mkdir(auditDir(aiexDir), { recursive: true });
|
|
13429
|
+
await writeFile(auditPath(aiexDir, id), record, {
|
|
13430
|
+
spaces: 2,
|
|
13431
|
+
EOL: "\n"
|
|
13432
|
+
});
|
|
13433
|
+
return record;
|
|
13434
|
+
}
|
|
13435
|
+
async function readExtractionAuditRecord(aiexDir, id) {
|
|
13436
|
+
if (!AUDIT_ID_RE.test(id)) return null;
|
|
13437
|
+
try {
|
|
13438
|
+
return await readFile(auditPath(aiexDir, id));
|
|
13439
|
+
} catch {
|
|
13440
|
+
return null;
|
|
13441
|
+
}
|
|
13442
|
+
}
|
|
13443
|
+
function isStale(record) {
|
|
13444
|
+
if (record.status !== "running") return false;
|
|
13445
|
+
const updated = Date.parse(record.updatedAt);
|
|
13446
|
+
return !Number.isNaN(updated) && Date.now() - updated > STALE_AFTER_MS;
|
|
13447
|
+
}
|
|
13448
|
+
async function markStaleIfNeeded(aiexDir, record) {
|
|
13449
|
+
if (!isStale(record)) return record;
|
|
13450
|
+
const staleRecord = {
|
|
13451
|
+
...record,
|
|
13452
|
+
status: "stale",
|
|
13453
|
+
error: record.error ?? "Extraction did not finish. It may have been interrupted.",
|
|
13454
|
+
updatedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
13455
|
+
};
|
|
13456
|
+
await writeFile(auditPath(aiexDir, staleRecord.id), staleRecord, {
|
|
13457
|
+
spaces: 2,
|
|
13458
|
+
EOL: "\n"
|
|
13459
|
+
});
|
|
13460
|
+
return staleRecord;
|
|
13461
|
+
}
|
|
13462
|
+
async function listExtractionAuditRecords(aiexDir) {
|
|
13463
|
+
try {
|
|
13464
|
+
const dir = auditDir(aiexDir);
|
|
13465
|
+
const files = await fs.readdir(dir);
|
|
13466
|
+
return (await Promise.all(files.filter((file) => file.endsWith(".json")).map(async (file) => {
|
|
13467
|
+
try {
|
|
13468
|
+
return await markStaleIfNeeded(aiexDir, await readFile(path.join(dir, file)));
|
|
13469
|
+
} catch {
|
|
13470
|
+
return null;
|
|
13471
|
+
}
|
|
13472
|
+
}))).filter((record) => !!record).sort((a, b) => b.createdAt.localeCompare(a.createdAt));
|
|
13473
|
+
} catch {
|
|
13474
|
+
return [];
|
|
13475
|
+
}
|
|
13476
|
+
}
|
|
13477
|
+
function isPathInside(childPath, parentPath) {
|
|
13478
|
+
const relative = path.relative(path.resolve(parentPath), path.resolve(childPath));
|
|
13479
|
+
return !!relative && !relative.startsWith("..") && !path.isAbsolute(relative);
|
|
13480
|
+
}
|
|
13481
|
+
async function deleteExtractionAuditRecord(aiexDir, id) {
|
|
13482
|
+
const record = await readExtractionAuditRecord(aiexDir, id);
|
|
13483
|
+
if (!record) return false;
|
|
13484
|
+
const uploadsDir = path.join(aiexDir, "uploads");
|
|
13485
|
+
if (record.source.type === "file" && record.source.filePath && isPathInside(record.source.filePath, uploadsDir)) await fs.unlink(record.source.filePath).catch(() => {});
|
|
13486
|
+
const uploadFiles = await fs.readdir(uploadsDir).catch(() => []);
|
|
13487
|
+
await Promise.all(uploadFiles.filter((file) => file.startsWith(`${id}-`)).map((file) => fs.unlink(path.join(uploadsDir, file)).catch(() => {})));
|
|
13488
|
+
await fs.unlink(auditPath(aiexDir, id)).catch(() => {});
|
|
13489
|
+
return true;
|
|
13490
|
+
}
|
|
13491
|
+
|
|
13492
|
+
//#endregion
|
|
13493
|
+
//#region src/core/notion-sink.ts
|
|
13494
|
+
const RICH_TEXT_LIMIT = 2e3;
|
|
13495
|
+
const UUID_RE = /^[0-9a-f]{32}$/i;
|
|
13496
|
+
const HYPHENATED_UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
13497
|
+
function truncateText(value) {
|
|
13498
|
+
return value.length > RICH_TEXT_LIMIT ? value.slice(0, RICH_TEXT_LIMIT) : value;
|
|
13499
|
+
}
|
|
13500
|
+
function stringifyValue(value) {
|
|
13501
|
+
if (value === null || value === void 0) return "";
|
|
13502
|
+
if (typeof value === "string") return value;
|
|
13503
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
13504
|
+
return JSON.stringify(value);
|
|
13505
|
+
}
|
|
13506
|
+
function asNumber(value) {
|
|
13507
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
13508
|
+
if (typeof value === "string" && value.trim()) {
|
|
13509
|
+
const parsed = Number(value);
|
|
13510
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
13511
|
+
}
|
|
13512
|
+
return null;
|
|
13513
|
+
}
|
|
13514
|
+
function asBoolean(value) {
|
|
13515
|
+
if (typeof value === "boolean") return value;
|
|
13516
|
+
if (typeof value === "number") return value !== 0;
|
|
13517
|
+
if (typeof value === "string") {
|
|
13518
|
+
const normalized = value.trim().toLowerCase();
|
|
13519
|
+
return [
|
|
13520
|
+
"true",
|
|
13521
|
+
"yes",
|
|
13522
|
+
"1",
|
|
13523
|
+
"y"
|
|
13524
|
+
].includes(normalized);
|
|
13525
|
+
}
|
|
13526
|
+
return !!value;
|
|
13527
|
+
}
|
|
13528
|
+
function asDateStart(value) {
|
|
13529
|
+
if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString();
|
|
13530
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
13531
|
+
const date = new Date(value);
|
|
13532
|
+
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
|
13533
|
+
}
|
|
13534
|
+
if (typeof value === "string" && value.trim()) {
|
|
13535
|
+
const ms = Date.parse(value);
|
|
13536
|
+
if (Number.isNaN(ms)) return null;
|
|
13537
|
+
return new Date(ms).toISOString();
|
|
13538
|
+
}
|
|
13539
|
+
return null;
|
|
13540
|
+
}
|
|
13541
|
+
function asStringArray(value) {
|
|
13542
|
+
if (Array.isArray(value)) return value.map((item) => stringifyValue(item).trim()).filter(Boolean);
|
|
13543
|
+
const text$1 = stringifyValue(value).trim();
|
|
13544
|
+
return text$1 ? [text$1] : [];
|
|
13545
|
+
}
|
|
13546
|
+
function getValueAtPath(data, path$1) {
|
|
13547
|
+
if (!path$1.includes(".")) return Object.hasOwn(data, path$1) ? {
|
|
13548
|
+
found: true,
|
|
13549
|
+
value: data[path$1]
|
|
13550
|
+
} : {
|
|
13551
|
+
found: false,
|
|
13552
|
+
value: void 0
|
|
13553
|
+
};
|
|
13554
|
+
let current = data;
|
|
13555
|
+
for (const part of path$1.split(".")) {
|
|
13556
|
+
if (!current || typeof current !== "object" || Array.isArray(current)) return {
|
|
13557
|
+
found: false,
|
|
13558
|
+
value: void 0
|
|
13559
|
+
};
|
|
13560
|
+
const record = current;
|
|
13561
|
+
if (!Object.hasOwn(record, part)) return {
|
|
13562
|
+
found: false,
|
|
13563
|
+
value: void 0
|
|
13564
|
+
};
|
|
13565
|
+
current = record[part];
|
|
13566
|
+
}
|
|
13567
|
+
return {
|
|
13568
|
+
found: true,
|
|
13569
|
+
value: current
|
|
13570
|
+
};
|
|
13571
|
+
}
|
|
13572
|
+
function buildPropertyValue(type, value) {
|
|
13573
|
+
const text$1 = truncateText(stringifyValue(value));
|
|
13574
|
+
switch (type) {
|
|
13575
|
+
case "title": return { title: text$1 ? [{ text: { content: text$1 } }] : [] };
|
|
13576
|
+
case "rich_text": return { rich_text: text$1 ? [{ text: { content: text$1 } }] : [] };
|
|
13577
|
+
case "number": return { number: asNumber(value) };
|
|
13578
|
+
case "checkbox": return { checkbox: asBoolean(value) };
|
|
13579
|
+
case "date": {
|
|
13580
|
+
const start = asDateStart(value);
|
|
13581
|
+
return { date: start ? { start } : null };
|
|
13582
|
+
}
|
|
13583
|
+
case "select": {
|
|
13584
|
+
const name$1 = stringifyValue(value).trim();
|
|
13585
|
+
return { select: name$1 ? { name: name$1 } : null };
|
|
13586
|
+
}
|
|
13587
|
+
case "multi_select": return { multi_select: asStringArray(value).map((name$1) => ({ name: name$1 })) };
|
|
13588
|
+
case "url": return { url: text$1 || null };
|
|
13589
|
+
case "email": return { email: text$1 || null };
|
|
13590
|
+
case "phone_number": return { phone_number: text$1 || null };
|
|
13591
|
+
default: return null;
|
|
13592
|
+
}
|
|
13593
|
+
}
|
|
13594
|
+
function findTitleProperty(properties, preferred) {
|
|
13595
|
+
if (preferred && properties[preferred]?.type === "title") return preferred;
|
|
13596
|
+
return Object.entries(properties).find(([, property]) => property?.type === "title")?.[0] ?? null;
|
|
13597
|
+
}
|
|
13598
|
+
function hyphenateDatabaseId(value) {
|
|
13599
|
+
const id = value.replace(/-/g, "");
|
|
13600
|
+
if (!UUID_RE.test(id)) return value;
|
|
13601
|
+
return `${id.slice(0, 8)}-${id.slice(8, 12)}-${id.slice(12, 16)}-${id.slice(16, 20)}-${id.slice(20)}`;
|
|
13602
|
+
}
|
|
13603
|
+
function parseNotionDatabaseId(value) {
|
|
13604
|
+
const input = value.trim();
|
|
13605
|
+
if (!input) return "";
|
|
13606
|
+
const extracted = extractNotionId(input);
|
|
13607
|
+
if (extracted) return extracted;
|
|
13608
|
+
if (HYPHENATED_UUID_RE.test(input)) return input;
|
|
13609
|
+
if (UUID_RE.test(input)) return hyphenateDatabaseId(input);
|
|
13610
|
+
return input;
|
|
13611
|
+
}
|
|
13612
|
+
function normalizeFieldName(value) {
|
|
13613
|
+
return value.normalize("NFKC").toLowerCase().replace(/[^\p{Letter}\p{Number}]+/gu, "");
|
|
13614
|
+
}
|
|
13615
|
+
function buildMatchKeys(field) {
|
|
13616
|
+
return [
|
|
13617
|
+
field.name,
|
|
13618
|
+
field.title,
|
|
13619
|
+
field.description
|
|
13620
|
+
].filter((value) => !!value?.trim()).map(normalizeFieldName).filter(Boolean);
|
|
13621
|
+
}
|
|
13622
|
+
function suggestFieldMap(schemaFields, databaseProperties) {
|
|
13623
|
+
const propertyByKey = /* @__PURE__ */ new Map();
|
|
13624
|
+
for (const propertyName of Object.keys(databaseProperties)) propertyByKey.set(normalizeFieldName(propertyName), propertyName);
|
|
13625
|
+
const fieldMap = {};
|
|
13626
|
+
for (const field of schemaFields) for (const key of buildMatchKeys(field)) {
|
|
13627
|
+
const propertyName = propertyByKey.get(key);
|
|
13628
|
+
if (propertyName) {
|
|
13629
|
+
fieldMap[field.name] = propertyName;
|
|
13630
|
+
break;
|
|
13631
|
+
}
|
|
13632
|
+
}
|
|
13633
|
+
return fieldMap;
|
|
13634
|
+
}
|
|
13635
|
+
function hasProperties(value) {
|
|
13636
|
+
return !!value && typeof value === "object" && !!value.properties && typeof value.properties === "object";
|
|
13637
|
+
}
|
|
13638
|
+
function firstDataSourceId(database) {
|
|
13639
|
+
return (Array.isArray(database?.data_sources) ? database.data_sources : []).find((source) => typeof source?.id === "string" && source.id.trim())?.id;
|
|
13640
|
+
}
|
|
13641
|
+
async function resolveNotionDataSource(notion, inputId) {
|
|
13642
|
+
const id = parseNotionDatabaseId(inputId);
|
|
13643
|
+
if (!id) throw new Error("Notion database or data source URL/ID is required.");
|
|
13644
|
+
try {
|
|
13645
|
+
const dataSource$1 = await notion.dataSources.retrieve({ data_source_id: id });
|
|
13646
|
+
if (hasProperties(dataSource$1)) return {
|
|
13647
|
+
databaseId: typeof dataSource$1.parent?.database_id === "string" ? dataSource$1.parent.database_id : id,
|
|
13648
|
+
dataSourceId: dataSource$1.id ?? id,
|
|
13649
|
+
properties: dataSource$1.properties,
|
|
13650
|
+
parent: { data_source_id: dataSource$1.id ?? id }
|
|
13651
|
+
};
|
|
13652
|
+
} catch {}
|
|
13653
|
+
const database = await notion.databases.retrieve({ database_id: id });
|
|
13654
|
+
const dataSourceId = firstDataSourceId(database);
|
|
13655
|
+
if (!dataSourceId) throw new Error("No data source found for this Notion database. Copy the data source link from Notion, or share the source database with the integration.");
|
|
13656
|
+
const dataSource = await notion.dataSources.retrieve({ data_source_id: dataSourceId });
|
|
13657
|
+
if (!hasProperties(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
|
|
13658
|
+
return {
|
|
13659
|
+
databaseId: database.id ?? id,
|
|
13660
|
+
dataSourceId: dataSource.id ?? dataSourceId,
|
|
13661
|
+
properties: dataSource.properties,
|
|
13662
|
+
parent: { data_source_id: dataSource.id ?? dataSourceId }
|
|
13663
|
+
};
|
|
13664
|
+
}
|
|
13665
|
+
async function inspectNotionDatabase(input) {
|
|
13666
|
+
if (!input.token.trim()) throw new Error("Notion integration token is required.");
|
|
13667
|
+
const id = parseNotionDatabaseId(input.databaseId);
|
|
13668
|
+
if (!id) throw new Error("Notion database or data source URL/ID is required.");
|
|
13669
|
+
const resolved = await resolveNotionDataSource(new Client({ auth: input.token }), id);
|
|
13670
|
+
const databaseProperties = resolved.properties;
|
|
13671
|
+
const titleProperty = findTitleProperty(databaseProperties) ?? void 0;
|
|
13672
|
+
return {
|
|
13673
|
+
databaseId: resolved.databaseId,
|
|
13674
|
+
dataSourceId: resolved.dataSourceId,
|
|
13675
|
+
titleProperty,
|
|
13676
|
+
properties: Object.entries(databaseProperties).map(([name$1, property]) => ({
|
|
13677
|
+
name: name$1,
|
|
13678
|
+
type: property?.type ?? "unknown"
|
|
13679
|
+
})).sort((a, b) => a.name.localeCompare(b.name)),
|
|
13680
|
+
suggestedFieldMap: suggestFieldMap(input.schemaFields, databaseProperties)
|
|
13681
|
+
};
|
|
13682
|
+
}
|
|
13683
|
+
function validateNotionConfig(config) {
|
|
13684
|
+
if (!config?.enabled) return "Notion export is not enabled. Configure Notion settings first.";
|
|
13685
|
+
if (!config.token.trim()) return "Notion integration token is required.";
|
|
13686
|
+
return null;
|
|
13687
|
+
}
|
|
13688
|
+
async function writeNotionPage(config, schemaName, data) {
|
|
13689
|
+
const configError = validateNotionConfig(config);
|
|
13690
|
+
if (configError) throw new Error(configError);
|
|
13691
|
+
const notionConfig = config;
|
|
13692
|
+
const schemaConfig = notionConfig.schemas[schemaName];
|
|
13693
|
+
if (!schemaConfig) throw new Error(`Notion database is not configured for schema "${schemaName}".`);
|
|
13694
|
+
if (!schemaConfig.databaseId.trim()) throw new Error(`Notion database ID is required for schema "${schemaName}".`);
|
|
13695
|
+
const notion = new Client({ auth: notionConfig.token });
|
|
13696
|
+
const resolved = await resolveNotionDataSource(notion, schemaConfig.databaseId);
|
|
13697
|
+
const databaseProperties = resolved.properties;
|
|
13698
|
+
const fieldMap = schemaConfig.fieldMap ?? {};
|
|
13699
|
+
const properties = {};
|
|
13700
|
+
const sourceFields = new Set([...Object.keys(data), ...Object.keys(fieldMap)]);
|
|
13701
|
+
for (const sourceField of sourceFields) {
|
|
13702
|
+
const source = getValueAtPath(data, sourceField);
|
|
13703
|
+
if (!source.found) continue;
|
|
13704
|
+
const notionPropertyName = fieldMap[sourceField] ?? sourceField;
|
|
13705
|
+
const notionProperty = databaseProperties[notionPropertyName];
|
|
13706
|
+
if (!notionProperty) continue;
|
|
13707
|
+
const propertyValue = buildPropertyValue(notionProperty.type, source.value);
|
|
13708
|
+
if (propertyValue) properties[notionPropertyName] = propertyValue;
|
|
13709
|
+
}
|
|
13710
|
+
const titleProperty = findTitleProperty(databaseProperties, schemaConfig.titleProperty);
|
|
13711
|
+
if (titleProperty && !properties[titleProperty]) properties[titleProperty] = buildPropertyValue("title", schemaName);
|
|
13712
|
+
if (Object.keys(properties).length === 0) throw new Error("No extracted fields matched Notion database properties.");
|
|
13713
|
+
return {
|
|
13714
|
+
pageId: (await notion.pages.create({
|
|
13715
|
+
parent: resolved.parent,
|
|
13716
|
+
properties
|
|
13717
|
+
})).id,
|
|
13718
|
+
databaseId: resolved.databaseId,
|
|
13719
|
+
dataSourceId: resolved.dataSourceId
|
|
13720
|
+
};
|
|
13721
|
+
}
|
|
13722
|
+
|
|
13384
13723
|
//#endregion
|
|
13385
13724
|
//#region src/core/pdf-converter/external.ts
|
|
13386
13725
|
function applyTemplate(value, context) {
|
|
@@ -13421,7 +13760,7 @@ function formatCommandError(error, command$1) {
|
|
|
13421
13760
|
}
|
|
13422
13761
|
async function countPdfPages(input) {
|
|
13423
13762
|
try {
|
|
13424
|
-
return (await getDocumentProxy(input)).numPages;
|
|
13763
|
+
return (await getDocumentProxy(Buffer.isBuffer(input) ? new Uint8Array(input) : input)).numPages;
|
|
13425
13764
|
} catch {
|
|
13426
13765
|
return 0;
|
|
13427
13766
|
}
|
|
@@ -13433,7 +13772,7 @@ var ExternalCommandPdfConverter = class {
|
|
|
13433
13772
|
this.name = name$1;
|
|
13434
13773
|
}
|
|
13435
13774
|
async convert(input, filePath) {
|
|
13436
|
-
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(),
|
|
13775
|
+
const tempRoot = await fs.mkdtemp(path.join(os.tmpdir(), `aiex-${this.name}-`));
|
|
13437
13776
|
const outputDir = path.join(tempRoot, "output");
|
|
13438
13777
|
await fs.mkdir(outputDir, { recursive: true });
|
|
13439
13778
|
const inputPath = filePath ?? path.join(tempRoot, "input.pdf");
|
|
@@ -13530,6 +13869,10 @@ function createPdfConverter(config) {
|
|
|
13530
13869
|
const mineruConfig = config.mineru ?? DEFAULT_MINERU_CONFIG;
|
|
13531
13870
|
return withFallback(new ExternalCommandPdfConverter("mineru", mineruConfig), mineruConfig);
|
|
13532
13871
|
}
|
|
13872
|
+
if (config.converter === "markitdown") {
|
|
13873
|
+
const markitdownConfig = config.markitdown ?? DEFAULT_MARKITDOWN_CONFIG;
|
|
13874
|
+
return withFallback(new ExternalCommandPdfConverter("markitdown", markitdownConfig), markitdownConfig);
|
|
13875
|
+
}
|
|
13533
13876
|
if (config.converter === "external") {
|
|
13534
13877
|
if (!config.external) throw new Error("External PDF converter is selected but no external command is configured.");
|
|
13535
13878
|
return withFallback(new ExternalCommandPdfConverter("external", config.external), config.external);
|
|
@@ -13569,8 +13912,19 @@ const SUPPORTED_EXTENSIONS = new Set([
|
|
|
13569
13912
|
"yml"
|
|
13570
13913
|
]);
|
|
13571
13914
|
const PDF_EXT_RE = /\.pdf$/i;
|
|
13572
|
-
const JSON_EXT_RE = /\.json$/;
|
|
13915
|
+
const JSON_EXT_RE$1 = /\.json$/;
|
|
13573
13916
|
const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
|
|
13917
|
+
async function syncResultToNotion(aiConfig, schemaName, data) {
|
|
13918
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
13919
|
+
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
13920
|
+
return [{
|
|
13921
|
+
databaseId: page.databaseId,
|
|
13922
|
+
pageId: page.pageId
|
|
13923
|
+
}];
|
|
13924
|
+
}
|
|
13925
|
+
function shouldSyncNotion(aiConfig, schemaName) {
|
|
13926
|
+
return !!aiConfig.notion?.enabled && !!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim();
|
|
13927
|
+
}
|
|
13574
13928
|
async function ensureDatabaseReady(dbPath, schema) {
|
|
13575
13929
|
try {
|
|
13576
13930
|
await fs.access(dbPath);
|
|
@@ -13628,7 +13982,7 @@ async function loadSchema(config, schemaName) {
|
|
|
13628
13982
|
async function listSchemas(aiexDir) {
|
|
13629
13983
|
try {
|
|
13630
13984
|
const dir = path.join(aiexDir, "schema");
|
|
13631
|
-
return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE, "")).sort();
|
|
13985
|
+
return (await fs.readdir(dir)).filter((f) => f.endsWith(".json")).map((f) => f.replace(JSON_EXT_RE$1, "")).sort();
|
|
13632
13986
|
} catch {
|
|
13633
13987
|
return [];
|
|
13634
13988
|
}
|
|
@@ -13693,7 +14047,7 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
13693
14047
|
if (!options?.quiet) s.stop("Extraction complete");
|
|
13694
14048
|
if (result.outputPath && !options?.quiet) consola.success(`Result saved: ${pc.cyan(result.outputPath)}`);
|
|
13695
14049
|
if (result.tokensUsed && !options?.quiet) consola.info(pc.gray(`Token usage: prompt=${result.tokensUsed.prompt}, completion=${result.tokensUsed.completion}, total=${result.tokensUsed.total}`));
|
|
13696
|
-
if (result.data) {
|
|
14050
|
+
if (result.data && options?.insert !== false) {
|
|
13697
14051
|
const s2 = spinner();
|
|
13698
14052
|
if (!options?.quiet) s2.start("Inserting into database...");
|
|
13699
14053
|
const dbError = await ensureDatabaseReady(config.databasePath, schemaLoad.schema);
|
|
@@ -13711,6 +14065,13 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
13711
14065
|
const insertResult = insertExtractedData(db, schemaLoad.schema, result.data);
|
|
13712
14066
|
if (insertResult.success) {
|
|
13713
14067
|
if (!options?.quiet) s2.stop(`Inserted into ${insertResult.tablesInserted.length} table(s)`);
|
|
14068
|
+
return {
|
|
14069
|
+
success: true,
|
|
14070
|
+
outputPath: result.outputPath,
|
|
14071
|
+
data: result.data,
|
|
14072
|
+
tablesInserted: insertResult.tablesInserted,
|
|
14073
|
+
tokensUsed: result.tokensUsed
|
|
14074
|
+
};
|
|
13714
14075
|
} else {
|
|
13715
14076
|
if (!options?.quiet) s2.stop("Database insert failed");
|
|
13716
14077
|
consola.error(insertResult.error || "Unknown error");
|
|
@@ -13731,25 +14092,74 @@ async function extractSingle(aiexDir, config, aiConfig, schemaName, text$1, file
|
|
|
13731
14092
|
};
|
|
13732
14093
|
}
|
|
13733
14094
|
}
|
|
13734
|
-
return {
|
|
14095
|
+
return {
|
|
14096
|
+
success: true,
|
|
14097
|
+
outputPath: result.outputPath,
|
|
14098
|
+
data: result.data,
|
|
14099
|
+
tokensUsed: result.tokensUsed
|
|
14100
|
+
};
|
|
13735
14101
|
}
|
|
13736
|
-
async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride) {
|
|
14102
|
+
async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, modelOverride, options) {
|
|
14103
|
+
const audit = await createExtractionAuditRecord(aiexDir, {
|
|
14104
|
+
schemaName,
|
|
14105
|
+
modelName: modelOverride?.name,
|
|
14106
|
+
source: {
|
|
14107
|
+
type: "file",
|
|
14108
|
+
filePath,
|
|
14109
|
+
fileName: path.basename(filePath)
|
|
14110
|
+
}
|
|
14111
|
+
});
|
|
13737
14112
|
try {
|
|
13738
14113
|
const input = await readExtractFileInput(filePath, aiConfig);
|
|
13739
|
-
const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
|
|
14114
|
+
const r = await extractSingle(aiexDir, config, aiConfig, schemaName, input.text, input.filePath, modelOverride, {
|
|
14115
|
+
quiet: false,
|
|
14116
|
+
insert: options?.insert
|
|
14117
|
+
});
|
|
13740
14118
|
if (r.success) {
|
|
14119
|
+
let notionPages;
|
|
14120
|
+
if (shouldSyncNotion(aiConfig, schemaName)) try {
|
|
14121
|
+
notionPages = await syncResultToNotion(aiConfig, schemaName, r.data);
|
|
14122
|
+
consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14123
|
+
} catch (error) {
|
|
14124
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14125
|
+
status: "failed",
|
|
14126
|
+
outputPath: r.outputPath,
|
|
14127
|
+
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
14128
|
+
tablesInserted: r.tablesInserted,
|
|
14129
|
+
tokensUsed: r.tokensUsed,
|
|
14130
|
+
error: error instanceof Error ? error.message : String(error)
|
|
14131
|
+
});
|
|
14132
|
+
consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14133
|
+
return false;
|
|
14134
|
+
}
|
|
14135
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14136
|
+
status: "succeeded",
|
|
14137
|
+
outputPath: r.outputPath,
|
|
14138
|
+
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
14139
|
+
tablesInserted: r.tablesInserted,
|
|
14140
|
+
notionPages,
|
|
14141
|
+
tokensUsed: r.tokensUsed
|
|
14142
|
+
});
|
|
13741
14143
|
consola.success(`Processed: ${path.basename(filePath)}`);
|
|
13742
14144
|
return true;
|
|
13743
14145
|
} else {
|
|
14146
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14147
|
+
status: "failed",
|
|
14148
|
+
error: r.error || "Extraction failed"
|
|
14149
|
+
});
|
|
13744
14150
|
consola.error(`Failed: ${r.error}`);
|
|
13745
14151
|
return false;
|
|
13746
14152
|
}
|
|
13747
14153
|
} catch (e) {
|
|
14154
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14155
|
+
status: "failed",
|
|
14156
|
+
error: e instanceof Error ? e.message : String(e)
|
|
14157
|
+
});
|
|
13748
14158
|
consola.error(`Error processing ${path.basename(filePath)}: ${e instanceof Error ? e.message : String(e)}`);
|
|
13749
14159
|
return false;
|
|
13750
14160
|
}
|
|
13751
14161
|
}
|
|
13752
|
-
async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride) {
|
|
14162
|
+
async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, globPattern, modelOverride, options) {
|
|
13753
14163
|
consola.info(`Scanning ${pc.cyan(dir)} for supported files...`);
|
|
13754
14164
|
let files;
|
|
13755
14165
|
try {
|
|
@@ -13774,7 +14184,7 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
|
|
|
13774
14184
|
for (let i = 0; i < files.length; i++) {
|
|
13775
14185
|
const file = files[i];
|
|
13776
14186
|
consola.info(`\n[${i + 1}/${files.length}] Processing: ${pc.cyan(path.basename(file))}`);
|
|
13777
|
-
if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride)) successCount++;
|
|
14187
|
+
if (await processOneFile(aiexDir, config, aiConfig, schemaName, file, modelOverride, options)) successCount++;
|
|
13778
14188
|
else failCount++;
|
|
13779
14189
|
}
|
|
13780
14190
|
consola.info(`\nBatch complete: ${pc.green(`${successCount} succeeded`)}, ${pc.red(`${failCount} failed`)}, ${files.length} total`);
|
|
@@ -13787,11 +14197,230 @@ async function runBatchExtraction(aiexDir, config, aiConfig, schemaName, dir, gl
|
|
|
13787
14197
|
|
|
13788
14198
|
//#endregion
|
|
13789
14199
|
//#region src/commands/extract.ts
|
|
14200
|
+
function getIdArg(args) {
|
|
14201
|
+
if (typeof args.id === "string") return args.id;
|
|
14202
|
+
const positional = args._;
|
|
14203
|
+
return Array.isArray(positional) && typeof positional[0] === "string" ? positional[0] : "";
|
|
14204
|
+
}
|
|
14205
|
+
function isExtractSubCommand(rawArgs) {
|
|
14206
|
+
if (!Array.isArray(rawArgs)) return false;
|
|
14207
|
+
return rawArgs.some((arg) => typeof arg === "string" && [
|
|
14208
|
+
"history",
|
|
14209
|
+
"show",
|
|
14210
|
+
"retry",
|
|
14211
|
+
"rm"
|
|
14212
|
+
].includes(arg));
|
|
14213
|
+
}
|
|
14214
|
+
function formatSource(source) {
|
|
14215
|
+
return source.type === "file" ? source.fileName || "file" : "text";
|
|
14216
|
+
}
|
|
14217
|
+
async function loadConfiguredAI(aiexDir) {
|
|
14218
|
+
const aiConfig = await readAIConfig(aiexDir);
|
|
14219
|
+
if (!aiConfig) {
|
|
14220
|
+
failCommand("AI configuration not found. Please run \"aiex web\" to configure AI settings first");
|
|
14221
|
+
return null;
|
|
14222
|
+
}
|
|
14223
|
+
if (!aiConfig.provider.apiKey) {
|
|
14224
|
+
failCommand("API Key not configured. Please configure AI settings in the Web interface first");
|
|
14225
|
+
return null;
|
|
14226
|
+
}
|
|
14227
|
+
if (!aiConfig.provider.models?.length) {
|
|
14228
|
+
failCommand("No models configured. Please add at least one model in AI Settings");
|
|
14229
|
+
return null;
|
|
14230
|
+
}
|
|
14231
|
+
return aiConfig;
|
|
14232
|
+
}
|
|
14233
|
+
function resolveModelOverride(aiConfig, modelName) {
|
|
14234
|
+
if (!modelName) return void 0;
|
|
14235
|
+
const matched = aiConfig.provider.models.find((m) => m.name === modelName);
|
|
14236
|
+
if (!matched) {
|
|
14237
|
+
failCommand(`Model "${modelName}" not found in configuration. Available models: ${aiConfig.provider.models.map((m) => m.name).join(", ")}`);
|
|
14238
|
+
return null;
|
|
14239
|
+
}
|
|
14240
|
+
return matched;
|
|
14241
|
+
}
|
|
14242
|
+
async function runAuditedSingleExtraction(input) {
|
|
14243
|
+
const audit = await createExtractionAuditRecord(input.aiexDir, {
|
|
14244
|
+
schemaName: input.schemaName,
|
|
14245
|
+
modelName: input.modelOverride?.name,
|
|
14246
|
+
source: input.source,
|
|
14247
|
+
retryOf: input.retryOf
|
|
14248
|
+
});
|
|
14249
|
+
const result = await extractSingle(input.aiexDir, input.config, input.aiConfig, input.schemaName, input.text, input.filePath, input.modelOverride, { insert: input.insert });
|
|
14250
|
+
if (!result.success) {
|
|
14251
|
+
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14252
|
+
status: "failed",
|
|
14253
|
+
error: result.error || "Extraction failed"
|
|
14254
|
+
});
|
|
14255
|
+
return false;
|
|
14256
|
+
}
|
|
14257
|
+
let notionPages;
|
|
14258
|
+
if (input.aiConfig.notion?.enabled && input.aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
14259
|
+
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
14260
|
+
const page = await writeNotionPage(input.aiConfig.notion, input.schemaName, result.data);
|
|
14261
|
+
notionPages = [{
|
|
14262
|
+
databaseId: page.databaseId,
|
|
14263
|
+
pageId: page.pageId
|
|
14264
|
+
}];
|
|
14265
|
+
consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14266
|
+
} catch (error) {
|
|
14267
|
+
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14268
|
+
status: "failed",
|
|
14269
|
+
outputPath: result.outputPath,
|
|
14270
|
+
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
14271
|
+
tablesInserted: result.tablesInserted,
|
|
14272
|
+
tokensUsed: result.tokensUsed,
|
|
14273
|
+
error: error instanceof Error ? error.message : String(error)
|
|
14274
|
+
});
|
|
14275
|
+
consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14276
|
+
return false;
|
|
14277
|
+
}
|
|
14278
|
+
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14279
|
+
status: "succeeded",
|
|
14280
|
+
outputPath: result.outputPath,
|
|
14281
|
+
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
14282
|
+
tablesInserted: result.tablesInserted,
|
|
14283
|
+
notionPages,
|
|
14284
|
+
tokensUsed: result.tokensUsed
|
|
14285
|
+
});
|
|
14286
|
+
return true;
|
|
14287
|
+
}
|
|
14288
|
+
const historyCommand = defineCommand({
|
|
14289
|
+
meta: {
|
|
14290
|
+
name: "history",
|
|
14291
|
+
description: "List extraction audit records"
|
|
14292
|
+
},
|
|
14293
|
+
async run() {
|
|
14294
|
+
const config = createMigrationConfig(process.cwd());
|
|
14295
|
+
const records = await listExtractionAuditRecords(path.dirname(config.schemaPath));
|
|
14296
|
+
if (records.length === 0) {
|
|
14297
|
+
consola.info("No extraction history found");
|
|
14298
|
+
return;
|
|
14299
|
+
}
|
|
14300
|
+
for (const record of records) {
|
|
14301
|
+
const suffix = record.error ? ` — ${record.error}` : record.outputName ? ` — ${record.outputName}` : "";
|
|
14302
|
+
consola.info(`${record.status.padEnd(9)} ${record.id} ${record.schemaName} ${formatSource(record.source)}${suffix}`);
|
|
14303
|
+
}
|
|
14304
|
+
}
|
|
14305
|
+
});
|
|
14306
|
+
const showCommand = defineCommand({
|
|
14307
|
+
meta: {
|
|
14308
|
+
name: "show",
|
|
14309
|
+
description: "Show an extraction audit record"
|
|
14310
|
+
},
|
|
14311
|
+
args: { id: {
|
|
14312
|
+
type: "string",
|
|
14313
|
+
description: "Audit record id"
|
|
14314
|
+
} },
|
|
14315
|
+
async run({ args }) {
|
|
14316
|
+
const id = getIdArg(args);
|
|
14317
|
+
if (!id) {
|
|
14318
|
+
failCommand("Audit record id is required");
|
|
14319
|
+
return;
|
|
14320
|
+
}
|
|
14321
|
+
const config = createMigrationConfig(process.cwd());
|
|
14322
|
+
const record = await readExtractionAuditRecord(path.dirname(config.schemaPath), id);
|
|
14323
|
+
if (!record) {
|
|
14324
|
+
failCommand(`Extraction record not found: ${id}`);
|
|
14325
|
+
return;
|
|
14326
|
+
}
|
|
14327
|
+
consola.info(JSON.stringify(record, null, 2));
|
|
14328
|
+
}
|
|
14329
|
+
});
|
|
14330
|
+
const retryCommand = defineCommand({
|
|
14331
|
+
meta: {
|
|
14332
|
+
name: "retry",
|
|
14333
|
+
description: "Retry an extraction audit record"
|
|
14334
|
+
},
|
|
14335
|
+
args: {
|
|
14336
|
+
id: {
|
|
14337
|
+
type: "string",
|
|
14338
|
+
description: "Audit record id"
|
|
14339
|
+
},
|
|
14340
|
+
noInsert: {
|
|
14341
|
+
type: "boolean",
|
|
14342
|
+
description: "Extract and save JSON without inserting into SQLite",
|
|
14343
|
+
default: false
|
|
14344
|
+
}
|
|
14345
|
+
},
|
|
14346
|
+
async run({ args }) {
|
|
14347
|
+
intro(pc.inverse(" aiex extract retry "));
|
|
14348
|
+
const id = getIdArg(args);
|
|
14349
|
+
if (!id) {
|
|
14350
|
+
failCommand("Audit record id is required");
|
|
14351
|
+
return;
|
|
14352
|
+
}
|
|
14353
|
+
const config = createMigrationConfig(process.cwd());
|
|
14354
|
+
const aiexDir = path.dirname(config.schemaPath);
|
|
14355
|
+
const record = await readExtractionAuditRecord(aiexDir, id);
|
|
14356
|
+
if (!record) {
|
|
14357
|
+
failCommand(`Extraction record not found: ${id}`);
|
|
14358
|
+
return;
|
|
14359
|
+
}
|
|
14360
|
+
const aiConfig = await loadConfiguredAI(aiexDir);
|
|
14361
|
+
if (!aiConfig) return;
|
|
14362
|
+
const modelOverride = resolveModelOverride(aiConfig, record.modelName);
|
|
14363
|
+
if (modelOverride === null) return;
|
|
14364
|
+
try {
|
|
14365
|
+
const sourceInput = record.source.type === "file" ? await readExtractFileInput(record.source.filePath || "", aiConfig) : {
|
|
14366
|
+
text: record.source.text || "",
|
|
14367
|
+
filePath: void 0
|
|
14368
|
+
};
|
|
14369
|
+
if (!await runAuditedSingleExtraction({
|
|
14370
|
+
aiexDir,
|
|
14371
|
+
config,
|
|
14372
|
+
aiConfig,
|
|
14373
|
+
schemaName: record.schemaName,
|
|
14374
|
+
text: sourceInput.text,
|
|
14375
|
+
filePath: sourceInput.filePath,
|
|
14376
|
+
source: record.source,
|
|
14377
|
+
modelOverride,
|
|
14378
|
+
retryOf: record.id,
|
|
14379
|
+
insert: !args.noInsert
|
|
14380
|
+
})) {
|
|
14381
|
+
failCommand();
|
|
14382
|
+
return;
|
|
14383
|
+
}
|
|
14384
|
+
outro("Done!");
|
|
14385
|
+
} catch (error) {
|
|
14386
|
+
failCommand(error instanceof Error ? error.message : String(error));
|
|
14387
|
+
}
|
|
14388
|
+
}
|
|
14389
|
+
});
|
|
14390
|
+
const rmCommand = defineCommand({
|
|
14391
|
+
meta: {
|
|
14392
|
+
name: "rm",
|
|
14393
|
+
description: "Delete an extraction audit record and cached upload"
|
|
14394
|
+
},
|
|
14395
|
+
args: { id: {
|
|
14396
|
+
type: "string",
|
|
14397
|
+
description: "Audit record id"
|
|
14398
|
+
} },
|
|
14399
|
+
async run({ args }) {
|
|
14400
|
+
const id = getIdArg(args);
|
|
14401
|
+
if (!id) {
|
|
14402
|
+
failCommand("Audit record id is required");
|
|
14403
|
+
return;
|
|
14404
|
+
}
|
|
14405
|
+
const config = createMigrationConfig(process.cwd());
|
|
14406
|
+
if (!await deleteExtractionAuditRecord(path.dirname(config.schemaPath), id)) {
|
|
14407
|
+
failCommand(`Extraction record not found: ${id}`);
|
|
14408
|
+
return;
|
|
14409
|
+
}
|
|
14410
|
+
consola.success(`Deleted extraction record: ${id}`);
|
|
14411
|
+
}
|
|
14412
|
+
});
|
|
13790
14413
|
const extractCommand = defineCommand({
|
|
13791
14414
|
meta: {
|
|
13792
14415
|
name: "extract",
|
|
13793
14416
|
description: "Extract structured data from text, images, or PDFs"
|
|
13794
14417
|
},
|
|
14418
|
+
subCommands: {
|
|
14419
|
+
history: historyCommand,
|
|
14420
|
+
show: showCommand,
|
|
14421
|
+
retry: retryCommand,
|
|
14422
|
+
rm: rmCommand
|
|
14423
|
+
},
|
|
13795
14424
|
args: {
|
|
13796
14425
|
schema: {
|
|
13797
14426
|
type: "string",
|
|
@@ -13822,9 +14451,15 @@ const extractCommand = defineCommand({
|
|
|
13822
14451
|
type: "string",
|
|
13823
14452
|
alias: "g",
|
|
13824
14453
|
description: "Glob pattern to filter files in batch mode (e.g. \"*.pdf\")"
|
|
14454
|
+
},
|
|
14455
|
+
noInsert: {
|
|
14456
|
+
type: "boolean",
|
|
14457
|
+
description: "Extract and save JSON without inserting into SQLite",
|
|
14458
|
+
default: false
|
|
13825
14459
|
}
|
|
13826
14460
|
},
|
|
13827
|
-
async run({ args }) {
|
|
14461
|
+
async run({ args, rawArgs }) {
|
|
14462
|
+
if (isExtractSubCommand(rawArgs)) return;
|
|
13828
14463
|
intro(pc.inverse(" aiex extract "));
|
|
13829
14464
|
const config = createMigrationConfig(process.cwd());
|
|
13830
14465
|
const aiexDir = path.dirname(config.schemaPath);
|
|
@@ -13836,29 +14471,10 @@ const extractCommand = defineCommand({
|
|
|
13836
14471
|
failCommand("Cannot combine -f/--file with -d/--dir");
|
|
13837
14472
|
return;
|
|
13838
14473
|
}
|
|
13839
|
-
const aiConfig = await
|
|
13840
|
-
if (!aiConfig)
|
|
13841
|
-
|
|
13842
|
-
|
|
13843
|
-
}
|
|
13844
|
-
if (!aiConfig.provider.apiKey) {
|
|
13845
|
-
failCommand("API Key not configured. Please configure AI settings in the Web interface first");
|
|
13846
|
-
return;
|
|
13847
|
-
}
|
|
13848
|
-
if (!aiConfig.provider.models?.length) {
|
|
13849
|
-
failCommand("No models configured. Please add at least one model in AI Settings");
|
|
13850
|
-
return;
|
|
13851
|
-
}
|
|
13852
|
-
let modelOverride;
|
|
13853
|
-
if (args.model) {
|
|
13854
|
-
const matched = aiConfig.provider.models.find((m) => m.name === args.model);
|
|
13855
|
-
if (!matched) {
|
|
13856
|
-
const available = aiConfig.provider.models.map((m) => m.name).join(", ");
|
|
13857
|
-
failCommand(`Model "${args.model}" not found in configuration. Available models: ${available}`);
|
|
13858
|
-
return;
|
|
13859
|
-
}
|
|
13860
|
-
modelOverride = matched;
|
|
13861
|
-
}
|
|
14474
|
+
const aiConfig = await loadConfiguredAI(aiexDir);
|
|
14475
|
+
if (!aiConfig) return;
|
|
14476
|
+
const modelOverride = resolveModelOverride(aiConfig, args.model);
|
|
14477
|
+
if (modelOverride === null) return;
|
|
13862
14478
|
if (!args.schema && !args.text && !args.file && !args.dir) {
|
|
13863
14479
|
if (await runInteractive(aiexDir, config, aiConfig, modelOverride)) outro("Done!");
|
|
13864
14480
|
return;
|
|
@@ -13868,7 +14484,7 @@ const extractCommand = defineCommand({
|
|
|
13868
14484
|
failCommand("Schema name (-s) is required in batch mode");
|
|
13869
14485
|
return;
|
|
13870
14486
|
}
|
|
13871
|
-
const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride);
|
|
14487
|
+
const result = await runBatchExtraction(aiexDir, config, aiConfig, args.schema, args.dir, args.glob, modelOverride, { insert: !args.noInsert });
|
|
13872
14488
|
if (!result.ok) {
|
|
13873
14489
|
failCommand(result.error);
|
|
13874
14490
|
return;
|
|
@@ -13901,7 +14517,24 @@ const extractCommand = defineCommand({
|
|
|
13901
14517
|
return;
|
|
13902
14518
|
}
|
|
13903
14519
|
else if (args.text) text$1 = args.text;
|
|
13904
|
-
if (!
|
|
14520
|
+
if (!await runAuditedSingleExtraction({
|
|
14521
|
+
aiexDir,
|
|
14522
|
+
config,
|
|
14523
|
+
aiConfig,
|
|
14524
|
+
schemaName: args.schema,
|
|
14525
|
+
text: text$1,
|
|
14526
|
+
filePath,
|
|
14527
|
+
source: filePath ? {
|
|
14528
|
+
type: "file",
|
|
14529
|
+
filePath: args.file,
|
|
14530
|
+
fileName: path.basename(args.file)
|
|
14531
|
+
} : {
|
|
14532
|
+
type: "text",
|
|
14533
|
+
text: text$1
|
|
14534
|
+
},
|
|
14535
|
+
modelOverride,
|
|
14536
|
+
insert: !args.noInsert
|
|
14537
|
+
})) {
|
|
13905
14538
|
failCommand();
|
|
13906
14539
|
return;
|
|
13907
14540
|
}
|
|
@@ -13960,7 +14593,18 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
13960
14593
|
cancel("Cancelled");
|
|
13961
14594
|
return false;
|
|
13962
14595
|
}
|
|
13963
|
-
return (
|
|
14596
|
+
return runAuditedSingleExtraction({
|
|
14597
|
+
aiexDir,
|
|
14598
|
+
config,
|
|
14599
|
+
aiConfig,
|
|
14600
|
+
schemaName,
|
|
14601
|
+
text: textContent,
|
|
14602
|
+
source: {
|
|
14603
|
+
type: "text",
|
|
14604
|
+
text: textContent
|
|
14605
|
+
},
|
|
14606
|
+
modelOverride
|
|
14607
|
+
});
|
|
13964
14608
|
} else if (inputSource === "file") {
|
|
13965
14609
|
const filePathStr = await text({
|
|
13966
14610
|
message: "Enter file path:",
|
|
@@ -13975,7 +14619,20 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
13975
14619
|
const fp = filePathStr;
|
|
13976
14620
|
try {
|
|
13977
14621
|
const input = await readExtractFileInput(fp, aiConfig);
|
|
13978
|
-
return (
|
|
14622
|
+
return runAuditedSingleExtraction({
|
|
14623
|
+
aiexDir,
|
|
14624
|
+
config,
|
|
14625
|
+
aiConfig,
|
|
14626
|
+
schemaName,
|
|
14627
|
+
text: input.text,
|
|
14628
|
+
filePath: input.filePath,
|
|
14629
|
+
source: {
|
|
14630
|
+
type: "file",
|
|
14631
|
+
filePath: fp,
|
|
14632
|
+
fileName: path.basename(fp)
|
|
14633
|
+
},
|
|
14634
|
+
modelOverride
|
|
14635
|
+
});
|
|
13979
14636
|
} catch (e) {
|
|
13980
14637
|
consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
|
|
13981
14638
|
return false;
|
|
@@ -14183,6 +14840,32 @@ const schemaCommand = defineCommand({
|
|
|
14183
14840
|
|
|
14184
14841
|
//#endregion
|
|
14185
14842
|
//#region src/server/routes/ai.ts
|
|
14843
|
+
const JSON_EXT_RE = /\.json$/i;
|
|
14844
|
+
function extractSchemaFields(schema) {
|
|
14845
|
+
if (!schema?.properties || typeof schema.properties !== "object") return [];
|
|
14846
|
+
const fields = [];
|
|
14847
|
+
function visitProperties(properties, prefix = "") {
|
|
14848
|
+
for (const [name$1, property] of Object.entries(properties)) {
|
|
14849
|
+
const fieldName = prefix ? `${prefix}.${name$1}` : name$1;
|
|
14850
|
+
if (property?.type === "object" && property?.properties && typeof property.properties === "object") {
|
|
14851
|
+
visitProperties(property.properties, fieldName);
|
|
14852
|
+
continue;
|
|
14853
|
+
}
|
|
14854
|
+
if (property?.type === "array" && property?.items?.type === "object") continue;
|
|
14855
|
+
fields.push({
|
|
14856
|
+
name: fieldName,
|
|
14857
|
+
title: typeof property?.title === "string" ? property.title : void 0,
|
|
14858
|
+
description: typeof property?.description === "string" ? property.description : void 0
|
|
14859
|
+
});
|
|
14860
|
+
}
|
|
14861
|
+
}
|
|
14862
|
+
visitProperties(schema.properties);
|
|
14863
|
+
return fields;
|
|
14864
|
+
}
|
|
14865
|
+
async function loadSchemaFields(config, schemaName) {
|
|
14866
|
+
const safeName = path.basename(schemaName).replace(JSON_EXT_RE, "");
|
|
14867
|
+
return extractSchemaFields(await readFile(path.join(config.schemaPath, `${safeName}.json`)));
|
|
14868
|
+
}
|
|
14186
14869
|
function aiRoutes(config) {
|
|
14187
14870
|
const app = new Hono();
|
|
14188
14871
|
const aiexDir = path.dirname(config.schemaPath);
|
|
@@ -14209,6 +14892,32 @@ function aiRoutes(config) {
|
|
|
14209
14892
|
return c.json({});
|
|
14210
14893
|
}
|
|
14211
14894
|
});
|
|
14895
|
+
app.post("/ai/notion/inspect", async (c) => {
|
|
14896
|
+
try {
|
|
14897
|
+
const body = await c.req.json();
|
|
14898
|
+
const token = typeof body.token === "string" ? body.token : "";
|
|
14899
|
+
const databaseId = typeof body.databaseId === "string" ? body.databaseId : "";
|
|
14900
|
+
const schemaName = typeof body.schemaName === "string" ? body.schemaName : "";
|
|
14901
|
+
if (!schemaName) return c.json({
|
|
14902
|
+
success: false,
|
|
14903
|
+
error: "Schema is required"
|
|
14904
|
+
}, 400);
|
|
14905
|
+
const result = await inspectNotionDatabase({
|
|
14906
|
+
token,
|
|
14907
|
+
databaseId,
|
|
14908
|
+
schemaFields: await loadSchemaFields(config, schemaName)
|
|
14909
|
+
});
|
|
14910
|
+
return c.json({
|
|
14911
|
+
success: true,
|
|
14912
|
+
...result
|
|
14913
|
+
});
|
|
14914
|
+
} catch (error) {
|
|
14915
|
+
return c.json({
|
|
14916
|
+
success: false,
|
|
14917
|
+
error: getErrorMessage(error)
|
|
14918
|
+
}, 400);
|
|
14919
|
+
}
|
|
14920
|
+
});
|
|
14212
14921
|
app.put("/ai/config", async (c) => {
|
|
14213
14922
|
try {
|
|
14214
14923
|
const body = await c.req.json();
|
|
@@ -14226,6 +14935,19 @@ function aiRoutes(config) {
|
|
|
14226
14935
|
success: false,
|
|
14227
14936
|
error: "At least one model must be configured"
|
|
14228
14937
|
}, 400);
|
|
14938
|
+
if (body.notion?.enabled) {
|
|
14939
|
+
if (!body.notion.token?.trim()) return c.json({
|
|
14940
|
+
success: false,
|
|
14941
|
+
error: "Notion token is required when Notion export is enabled"
|
|
14942
|
+
}, 400);
|
|
14943
|
+
for (const [schemaName, schemaConfig] of Object.entries(body.notion.schemas ?? {})) {
|
|
14944
|
+
if (typeof schemaConfig.databaseId === "string") schemaConfig.databaseId = parseNotionDatabaseId(schemaConfig.databaseId);
|
|
14945
|
+
if (!schemaConfig.databaseId?.trim()) return c.json({
|
|
14946
|
+
success: false,
|
|
14947
|
+
error: `Notion database ID is required for schema "${schemaName}"`
|
|
14948
|
+
}, 400);
|
|
14949
|
+
}
|
|
14950
|
+
}
|
|
14229
14951
|
await writeAIConfig(aiexDir, AIConfigSchema.parse(body));
|
|
14230
14952
|
return c.json({ success: true });
|
|
14231
14953
|
} catch (error) {
|
|
@@ -14241,6 +14963,7 @@ function aiRoutes(config) {
|
|
|
14241
14963
|
//#endregion
|
|
14242
14964
|
//#region src/server/routes/data.ts
|
|
14243
14965
|
const FILE_REGEX = /\.json$/;
|
|
14966
|
+
const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
|
|
14244
14967
|
const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
|
|
14245
14968
|
const TIMESTAMP_TZ = /(\d{3})Z/;
|
|
14246
14969
|
const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
|
|
@@ -14257,6 +14980,12 @@ function invalidParamResponse$1(message) {
|
|
|
14257
14980
|
if (!result.success) return c.json({ error: message }, 400);
|
|
14258
14981
|
};
|
|
14259
14982
|
}
|
|
14983
|
+
function schemaNameFromExtractionFile(name$1) {
|
|
14984
|
+
const stem = name$1.replace(FILE_REGEX, "");
|
|
14985
|
+
const match = stem.match(EXTRACTION_TIMESTAMP_RE);
|
|
14986
|
+
if (!match || typeof match.index !== "number" || match.index <= 0) return null;
|
|
14987
|
+
return stem.slice(0, match.index);
|
|
14988
|
+
}
|
|
14260
14989
|
function createReadonlyQueryDb(databasePath) {
|
|
14261
14990
|
return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
|
|
14262
14991
|
}
|
|
@@ -14268,22 +14997,27 @@ function dataRoutes(config) {
|
|
|
14268
14997
|
try {
|
|
14269
14998
|
await fs.mkdir(extractedDir, { recursive: true });
|
|
14270
14999
|
const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
|
|
15000
|
+
const auditRecords = await listExtractionAuditRecords(aiexDir);
|
|
15001
|
+
const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
|
|
14271
15002
|
const records = [];
|
|
14272
15003
|
for (const file of jsonFiles) {
|
|
14273
|
-
const
|
|
14274
|
-
|
|
14275
|
-
|
|
14276
|
-
const schemaName = name$1.slice(0, idx);
|
|
14277
|
-
const timestamp = name$1.slice(idx + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
|
|
15004
|
+
const schemaName = schemaNameFromExtractionFile(file);
|
|
15005
|
+
if (!schemaName) continue;
|
|
15006
|
+
const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
|
|
14278
15007
|
const filePath = path.join(extractedDir, file);
|
|
14279
15008
|
try {
|
|
14280
15009
|
const stat = await fs.stat(filePath);
|
|
15010
|
+
const audit = auditByOutputName.get(file);
|
|
15011
|
+
const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
|
|
14281
15012
|
records.push({
|
|
14282
15013
|
name: file,
|
|
14283
15014
|
schemaName,
|
|
14284
15015
|
timestamp,
|
|
14285
15016
|
fileSize: stat.size,
|
|
14286
|
-
modifiedAt: stat.mtime.toISOString()
|
|
15017
|
+
modifiedAt: stat.mtime.toISOString(),
|
|
15018
|
+
notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
|
|
15019
|
+
notionPages,
|
|
15020
|
+
notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
|
|
14287
15021
|
});
|
|
14288
15022
|
} catch {
|
|
14289
15023
|
continue;
|
|
@@ -14405,6 +15139,308 @@ function dataRoutes(config) {
|
|
|
14405
15139
|
return c.json({ error: "Extraction result not found" }, 404);
|
|
14406
15140
|
}
|
|
14407
15141
|
});
|
|
15142
|
+
app.post("/data/:name/notion/retry", zValidator("param", extractionFileParamSchema, invalidParamResponse$1("Invalid extraction file name")), async (c) => {
|
|
15143
|
+
const { name: name$1 } = c.req.valid("param");
|
|
15144
|
+
const filePath = path.join(extractedDir, name$1);
|
|
15145
|
+
const schemaName = schemaNameFromExtractionFile(name$1);
|
|
15146
|
+
if (!schemaName) return c.json({
|
|
15147
|
+
success: false,
|
|
15148
|
+
error: "Cannot infer schema name from extraction file name"
|
|
15149
|
+
}, 400);
|
|
15150
|
+
const aiConfig = await readAIConfig(aiexDir);
|
|
15151
|
+
if (!aiConfig?.notion?.enabled) return c.json({
|
|
15152
|
+
success: false,
|
|
15153
|
+
error: "Notion export is not enabled. Configure Notion settings first."
|
|
15154
|
+
}, 400);
|
|
15155
|
+
if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) return c.json({
|
|
15156
|
+
success: false,
|
|
15157
|
+
error: `Notion database is not configured for schema "${schemaName}".`
|
|
15158
|
+
}, 400);
|
|
15159
|
+
try {
|
|
15160
|
+
const data = await readFile(filePath);
|
|
15161
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return c.json({
|
|
15162
|
+
success: false,
|
|
15163
|
+
error: "Extraction result is not a JSON object and cannot be written to Notion."
|
|
15164
|
+
}, 400);
|
|
15165
|
+
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
15166
|
+
const notionPages = [{
|
|
15167
|
+
databaseId: page.databaseId,
|
|
15168
|
+
pageId: page.pageId
|
|
15169
|
+
}];
|
|
15170
|
+
let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
|
|
15171
|
+
if (!record) record = await createExtractionAuditRecord(aiexDir, {
|
|
15172
|
+
schemaName,
|
|
15173
|
+
source: {
|
|
15174
|
+
type: "file",
|
|
15175
|
+
filePath,
|
|
15176
|
+
fileName: name$1
|
|
15177
|
+
}
|
|
15178
|
+
});
|
|
15179
|
+
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15180
|
+
status: "succeeded",
|
|
15181
|
+
outputPath: filePath,
|
|
15182
|
+
outputName: name$1,
|
|
15183
|
+
notionPages,
|
|
15184
|
+
error: void 0
|
|
15185
|
+
});
|
|
15186
|
+
return c.json({
|
|
15187
|
+
success: true,
|
|
15188
|
+
notionPages
|
|
15189
|
+
});
|
|
15190
|
+
} catch (error) {
|
|
15191
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
15192
|
+
const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
|
|
15193
|
+
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15194
|
+
status: "failed",
|
|
15195
|
+
outputPath: filePath,
|
|
15196
|
+
outputName: name$1,
|
|
15197
|
+
error: message
|
|
15198
|
+
});
|
|
15199
|
+
return c.json({
|
|
15200
|
+
success: false,
|
|
15201
|
+
error: message
|
|
15202
|
+
}, 500);
|
|
15203
|
+
}
|
|
15204
|
+
});
|
|
15205
|
+
return app;
|
|
15206
|
+
}
|
|
15207
|
+
|
|
15208
|
+
//#endregion
|
|
15209
|
+
//#region src/server/routes/extract.ts
|
|
15210
|
+
function getFormString(value) {
|
|
15211
|
+
if (Array.isArray(value)) return getFormString(value[0]);
|
|
15212
|
+
return typeof value === "string" ? value.trim() : "";
|
|
15213
|
+
}
|
|
15214
|
+
function getFormFile(value) {
|
|
15215
|
+
if (Array.isArray(value)) return getFormFile(value[0]);
|
|
15216
|
+
return value instanceof File && value.size > 0 ? value : null;
|
|
15217
|
+
}
|
|
15218
|
+
function safeUploadName(name$1) {
|
|
15219
|
+
return path.basename(name$1).replace(/[^\w.-]/g, "_") || "upload.txt";
|
|
15220
|
+
}
|
|
15221
|
+
async function saveUploadToFile(file, uploadsDir, id) {
|
|
15222
|
+
await fs.mkdir(uploadsDir, { recursive: true });
|
|
15223
|
+
const filePath = path.join(uploadsDir, `${id}-${safeUploadName(file.name)}`);
|
|
15224
|
+
const buffer = Buffer.from(await file.arrayBuffer());
|
|
15225
|
+
await fs.writeFile(filePath, buffer);
|
|
15226
|
+
return filePath;
|
|
15227
|
+
}
|
|
15228
|
+
async function executeAuditedExtraction(input) {
|
|
15229
|
+
const aiConfig = await readAIConfig(input.aiexDir);
|
|
15230
|
+
if (!aiConfig) {
|
|
15231
|
+
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15232
|
+
status: "failed",
|
|
15233
|
+
error: "AI configuration not found. Configure AI settings first."
|
|
15234
|
+
});
|
|
15235
|
+
return new Response(JSON.stringify({
|
|
15236
|
+
success: false,
|
|
15237
|
+
error: record$1.error,
|
|
15238
|
+
auditId: record$1.id
|
|
15239
|
+
}), {
|
|
15240
|
+
status: 400,
|
|
15241
|
+
headers: { "content-type": "application/json" }
|
|
15242
|
+
});
|
|
15243
|
+
}
|
|
15244
|
+
if (!aiConfig.provider.apiKey) {
|
|
15245
|
+
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15246
|
+
status: "failed",
|
|
15247
|
+
error: "API Key not configured. Configure AI settings first."
|
|
15248
|
+
});
|
|
15249
|
+
return new Response(JSON.stringify({
|
|
15250
|
+
success: false,
|
|
15251
|
+
error: record$1.error,
|
|
15252
|
+
auditId: record$1.id
|
|
15253
|
+
}), {
|
|
15254
|
+
status: 400,
|
|
15255
|
+
headers: { "content-type": "application/json" }
|
|
15256
|
+
});
|
|
15257
|
+
}
|
|
15258
|
+
if (!aiConfig.provider.models?.length) {
|
|
15259
|
+
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15260
|
+
status: "failed",
|
|
15261
|
+
error: "No models configured. Add at least one model in AI Settings."
|
|
15262
|
+
});
|
|
15263
|
+
return new Response(JSON.stringify({
|
|
15264
|
+
success: false,
|
|
15265
|
+
error: record$1.error,
|
|
15266
|
+
auditId: record$1.id
|
|
15267
|
+
}), {
|
|
15268
|
+
status: 400,
|
|
15269
|
+
headers: { "content-type": "application/json" }
|
|
15270
|
+
});
|
|
15271
|
+
}
|
|
15272
|
+
const modelOverride = input.modelName ? aiConfig.provider.models.find((model) => model.name === input.modelName) : void 0;
|
|
15273
|
+
if (input.modelName && !modelOverride) {
|
|
15274
|
+
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15275
|
+
status: "failed",
|
|
15276
|
+
error: `Model "${input.modelName}" not found in AI settings`
|
|
15277
|
+
});
|
|
15278
|
+
return new Response(JSON.stringify({
|
|
15279
|
+
success: false,
|
|
15280
|
+
error: record$1.error,
|
|
15281
|
+
auditId: record$1.id
|
|
15282
|
+
}), {
|
|
15283
|
+
status: 400,
|
|
15284
|
+
headers: { "content-type": "application/json" }
|
|
15285
|
+
});
|
|
15286
|
+
}
|
|
15287
|
+
let inputText = input.text;
|
|
15288
|
+
let inputFilePath = input.filePath;
|
|
15289
|
+
if (input.filePath) {
|
|
15290
|
+
const source = await readExtractFileInput(input.filePath, aiConfig);
|
|
15291
|
+
inputText = source.text;
|
|
15292
|
+
inputFilePath = source.filePath;
|
|
15293
|
+
}
|
|
15294
|
+
const result = await extractSingle(input.aiexDir, input.config, aiConfig, input.schemaName, inputText, inputFilePath, modelOverride, { quiet: true });
|
|
15295
|
+
if (!result.success) {
|
|
15296
|
+
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15297
|
+
status: "failed",
|
|
15298
|
+
error: result.error || "Extraction failed"
|
|
15299
|
+
});
|
|
15300
|
+
return new Response(JSON.stringify({
|
|
15301
|
+
success: false,
|
|
15302
|
+
error: record$1.error,
|
|
15303
|
+
auditId: record$1.id
|
|
15304
|
+
}), {
|
|
15305
|
+
status: 500,
|
|
15306
|
+
headers: { "content-type": "application/json" }
|
|
15307
|
+
});
|
|
15308
|
+
}
|
|
15309
|
+
const notionPages = [];
|
|
15310
|
+
if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
15311
|
+
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
15312
|
+
notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
|
|
15313
|
+
} catch (error) {
|
|
15314
|
+
const record$1 = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15315
|
+
status: "failed",
|
|
15316
|
+
outputPath: result.outputPath,
|
|
15317
|
+
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
15318
|
+
tablesInserted: result.tablesInserted,
|
|
15319
|
+
tokensUsed: result.tokensUsed,
|
|
15320
|
+
error: error instanceof Error ? error.message : String(error)
|
|
15321
|
+
});
|
|
15322
|
+
return new Response(JSON.stringify({
|
|
15323
|
+
success: false,
|
|
15324
|
+
error: record$1.error,
|
|
15325
|
+
auditId: record$1.id
|
|
15326
|
+
}), {
|
|
15327
|
+
status: 500,
|
|
15328
|
+
headers: { "content-type": "application/json" }
|
|
15329
|
+
});
|
|
15330
|
+
}
|
|
15331
|
+
const record = await updateExtractionAuditRecord(input.aiexDir, input.auditId, {
|
|
15332
|
+
status: "succeeded",
|
|
15333
|
+
outputPath: result.outputPath,
|
|
15334
|
+
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
15335
|
+
tablesInserted: result.tablesInserted,
|
|
15336
|
+
notionPages: notionPages.length > 0 ? notionPages : void 0,
|
|
15337
|
+
tokensUsed: result.tokensUsed
|
|
15338
|
+
});
|
|
15339
|
+
return new Response(JSON.stringify({
|
|
15340
|
+
success: true,
|
|
15341
|
+
outputPath: record.outputPath,
|
|
15342
|
+
outputName: record.outputName,
|
|
15343
|
+
tablesInserted: record.tablesInserted,
|
|
15344
|
+
notionPages: record.notionPages,
|
|
15345
|
+
tokensUsed: record.tokensUsed,
|
|
15346
|
+
auditId: record.id
|
|
15347
|
+
}), {
|
|
15348
|
+
status: 200,
|
|
15349
|
+
headers: { "content-type": "application/json" }
|
|
15350
|
+
});
|
|
15351
|
+
}
|
|
15352
|
+
function extractRoutes(config) {
|
|
15353
|
+
const app = new Hono();
|
|
15354
|
+
const aiexDir = path.dirname(config.schemaPath);
|
|
15355
|
+
const uploadsDir = path.join(aiexDir, "uploads");
|
|
15356
|
+
app.get("/extract/records", async (c) => {
|
|
15357
|
+
return c.json(await listExtractionAuditRecords(aiexDir));
|
|
15358
|
+
});
|
|
15359
|
+
app.post("/extract", async (c) => {
|
|
15360
|
+
try {
|
|
15361
|
+
const body = await c.req.parseBody();
|
|
15362
|
+
const schemaName = getFormString(body.schema);
|
|
15363
|
+
const text$1 = getFormString(body.text);
|
|
15364
|
+
const modelName = getFormString(body.model);
|
|
15365
|
+
const file = getFormFile(body.file);
|
|
15366
|
+
if (!schemaName) return c.json({
|
|
15367
|
+
success: false,
|
|
15368
|
+
error: "Schema is required"
|
|
15369
|
+
}, 400);
|
|
15370
|
+
if (!text$1 && !file) return c.json({
|
|
15371
|
+
success: false,
|
|
15372
|
+
error: "Provide text or upload a file to extract"
|
|
15373
|
+
}, 400);
|
|
15374
|
+
if (text$1 && file) return c.json({
|
|
15375
|
+
success: false,
|
|
15376
|
+
error: "Text and file input cannot be used together"
|
|
15377
|
+
}, 400);
|
|
15378
|
+
const audit = await createExtractionAuditRecord(aiexDir, {
|
|
15379
|
+
schemaName,
|
|
15380
|
+
modelName,
|
|
15381
|
+
source: file ? {
|
|
15382
|
+
type: "file",
|
|
15383
|
+
fileName: safeUploadName(file.name)
|
|
15384
|
+
} : {
|
|
15385
|
+
type: "text",
|
|
15386
|
+
text: text$1
|
|
15387
|
+
}
|
|
15388
|
+
});
|
|
15389
|
+
let filePath;
|
|
15390
|
+
if (file) {
|
|
15391
|
+
filePath = await saveUploadToFile(file, uploadsDir, audit.id);
|
|
15392
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, { source: {
|
|
15393
|
+
type: "file",
|
|
15394
|
+
filePath,
|
|
15395
|
+
fileName: safeUploadName(file.name)
|
|
15396
|
+
} });
|
|
15397
|
+
}
|
|
15398
|
+
return executeAuditedExtraction({
|
|
15399
|
+
aiexDir,
|
|
15400
|
+
config,
|
|
15401
|
+
auditId: audit.id,
|
|
15402
|
+
schemaName,
|
|
15403
|
+
text: text$1,
|
|
15404
|
+
filePath,
|
|
15405
|
+
modelName
|
|
15406
|
+
});
|
|
15407
|
+
} catch (error) {
|
|
15408
|
+
return c.json({
|
|
15409
|
+
success: false,
|
|
15410
|
+
error: error instanceof Error ? error.message : String(error)
|
|
15411
|
+
}, 500);
|
|
15412
|
+
}
|
|
15413
|
+
});
|
|
15414
|
+
app.post("/extract/records/:id/retry", async (c) => {
|
|
15415
|
+
const original = await readExtractionAuditRecord(aiexDir, c.req.param("id"));
|
|
15416
|
+
if (!original) return c.json({
|
|
15417
|
+
success: false,
|
|
15418
|
+
error: "Extraction record not found"
|
|
15419
|
+
}, 404);
|
|
15420
|
+
return executeAuditedExtraction({
|
|
15421
|
+
aiexDir,
|
|
15422
|
+
config,
|
|
15423
|
+
auditId: (await createExtractionAuditRecord(aiexDir, {
|
|
15424
|
+
schemaName: original.schemaName,
|
|
15425
|
+
modelName: original.modelName,
|
|
15426
|
+
source: original.source,
|
|
15427
|
+
retryOf: original.id
|
|
15428
|
+
})).id,
|
|
15429
|
+
schemaName: original.schemaName,
|
|
15430
|
+
text: original.source.type === "text" ? original.source.text ?? "" : "",
|
|
15431
|
+
filePath: original.source.type === "file" ? original.source.filePath : void 0,
|
|
15432
|
+
modelName: original.modelName
|
|
15433
|
+
});
|
|
15434
|
+
});
|
|
15435
|
+
app.delete("/extract/records/:id", async (c) => {
|
|
15436
|
+
const id = c.req.param("id");
|
|
15437
|
+
if (!await readExtractionAuditRecord(aiexDir, id)) return c.json({
|
|
15438
|
+
success: false,
|
|
15439
|
+
error: "Extraction record not found"
|
|
15440
|
+
}, 404);
|
|
15441
|
+
await deleteExtractionAuditRecord(aiexDir, id);
|
|
15442
|
+
return c.json({ success: true });
|
|
15443
|
+
});
|
|
14408
15444
|
return app;
|
|
14409
15445
|
}
|
|
14410
15446
|
|
|
@@ -14530,6 +15566,7 @@ function createApp(config, staticDir) {
|
|
|
14530
15566
|
app.use("*", cors({ origin: (origin) => LOCAL_ORIGIN_RE.test(origin) ? origin : null }));
|
|
14531
15567
|
app.route("/api", schemaRoutes(config));
|
|
14532
15568
|
app.route("/api", aiRoutes(config));
|
|
15569
|
+
app.route("/api", extractRoutes(config));
|
|
14533
15570
|
app.route("/api", dataRoutes(config));
|
|
14534
15571
|
app.use("/*", serveStatic({
|
|
14535
15572
|
root: staticDir,
|