aiex-cli 0.0.2-beta.1 → 0.0.2-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -7
- package/dist/cli.mjs +460 -195
- package/dist/{doctor-collector-Cbqgwx__.mjs → doctor-collector-BEyduuZS.mjs} +1 -1
- package/dist/index.mjs +1 -1
- package/dist/web/assets/AISettings-DwXkpWZU.js +346 -0
- package/dist/web/assets/DataBrowser-BVXdTA8A.js +6 -0
- package/dist/web/assets/ExtractionViewer-DNcRCmNK.js +1 -0
- package/dist/web/assets/{JsonSchemaEditor-DXhYW-Un.js → JsonSchemaEditor-D477lV5a.js} +3 -3
- package/dist/web/assets/{api-client-CIC2X8_4.js → api-client-CG1VV5gz.js} +1 -1
- package/dist/web/assets/{cssMode-CCz1uj8b.js → cssMode-CPThwItX.js} +1 -1
- package/dist/web/assets/dialog-CUkPLPNP.js +109 -0
- package/dist/web/assets/{editor.main-DOue-nGf.js → editor.main-BnOkwRFv.js} +2 -2
- package/dist/web/assets/{freemarker2-C87H7V8i.js → freemarker2-DWDTYVJR.js} +1 -1
- package/dist/web/assets/{handlebars-TLnoktZW.js → handlebars-D4DzjGQ7.js} +1 -1
- package/dist/web/assets/{html-Rewbtgcp.js → html-DnzhKSoD.js} +1 -1
- package/dist/web/assets/{htmlMode-Cr9y3YUi.js → htmlMode-CR7UKfEH.js} +1 -1
- package/dist/web/assets/{index-CWy8KajQ.js → index-CxdBxxXb.js} +7 -7
- package/dist/web/assets/index-D4eIQ8eU.css +2 -0
- package/dist/web/assets/{javascript-BO8DPECx.js → javascript-D2srszZ8.js} +1 -1
- package/dist/web/assets/{jsonMode-B78m_Y1l.js → jsonMode-B4jaPYEr.js} +1 -1
- package/dist/web/assets/{liquid-CDp_8YUE.js → liquid-CIT2Wl_l.js} +1 -1
- package/dist/web/assets/{mdx-CeR1GULE.js → mdx-CWLaEOFy.js} +1 -1
- package/dist/web/assets/{monaco.contribution-tCitzj1_.js → monaco.contribution-DDv5ldfS.js} +2 -2
- package/dist/web/assets/{python-DWmB1hQ6.js → python-6CGfpCNq.js} +1 -1
- package/dist/web/assets/{razor-BVk762Lq.js → razor-DEMMh3TD.js} +1 -1
- package/dist/web/assets/runtime-dom.esm-bundler-ei_N7Xjw.js +1 -0
- package/dist/web/assets/select-BGex2SPs.js +439 -0
- package/dist/web/assets/{tsMode-Gjs5D1gt.js → tsMode-Cm1NtjPs.js} +1 -1
- package/dist/web/assets/{typescript-Cw6YtwrM.js → typescript-BM9aPEFg.js} +1 -1
- package/dist/web/assets/{xml-guZq0YZJ.js → xml-CoSbvcg5.js} +1 -1
- package/dist/web/assets/{yaml-B9TbU-LX.js → yaml-56GOgy8k.js} +1 -1
- package/dist/web/index.html +8 -9
- package/package.json +1 -1
- package/dist/web/assets/AISettings-Cm8fAfaG.js +0 -205
- package/dist/web/assets/DataBrowser-DQwOvooY.js +0 -6
- package/dist/web/assets/ExtractRunner-CzfLlCCf.js +0 -1
- package/dist/web/assets/ExtractionViewer-D7tghmR-.js +0 -1
- package/dist/web/assets/baseinput-B7PU5-38.js +0 -2
- package/dist/web/assets/checkbox-CCQwMfLd.js +0 -142
- package/dist/web/assets/dialog-8xaIXBKO.js +0 -108
- package/dist/web/assets/index-BEorm4a5.css +0 -2
- package/dist/web/assets/runtime-dom.esm-bundler-DZwuSnld.js +0 -1
- package/dist/web/assets/select-DKPPF403.js +0 -438
- package/dist/web/assets/utils-CZhxh2Ya.js +0 -2
- /package/dist/web/assets/{abap-C3UM4cME.js → abap-Bgec7Keq.js} +0 -0
- /package/dist/web/assets/{apex-BQBZvQmN.js → apex-VBlPwEoQ.js} +0 -0
- /package/dist/web/assets/{azcli-Dn9Awrok.js → azcli-DKqrEFBx.js} +0 -0
- /package/dist/web/assets/{bat-JgEezSDo.js → bat-DdgQWy_0.js} +0 -0
- /package/dist/web/assets/{bicep-vcQeC7wE.js → bicep-CRMM43EB.js} +0 -0
- /package/dist/web/assets/{cameligo-C0cr0T3L.js → cameligo-UatALtML.js} +0 -0
- /package/dist/web/assets/{clojure-Brc1-rbW.js → clojure-D8JU08RA.js} +0 -0
- /package/dist/web/assets/{coffee-BiZYU83a.js → coffee-C56wu358.js} +0 -0
- /package/dist/web/assets/{cpp-ikWHCInz.js → cpp-CyZLvhJG.js} +0 -0
- /package/dist/web/assets/{csharp-BRWs_LfH.js → csharp-BJl3ixva.js} +0 -0
- /package/dist/web/assets/{csp-CcAumoJw.js → csp-CxEKxmO-.js} +0 -0
- /package/dist/web/assets/{css-BZm6paiA.js → css-B0t_muXd.js} +0 -0
- /package/dist/web/assets/{cypher-CDQMONdb.js → cypher-D1hqiMFD.js} +0 -0
- /package/dist/web/assets/{dart-Ci4SZdF1.js → dart-Bz550Pyv.js} +0 -0
- /package/dist/web/assets/{dist-B5Yem2Jn.js → dist-9yHVMqQ0.js} +0 -0
- /package/dist/web/assets/{dockerfile-BV0tAr-M.js → dockerfile-CIXgVAuA.js} +0 -0
- /package/dist/web/assets/{ecl-CP7nM2KN.js → ecl-D9qbvZoA.js} +0 -0
- /package/dist/web/assets/{editor.api-BU_q4v8i.js → editor.api-C8BHpRhn.js} +0 -0
- /package/dist/web/assets/{elixir-GcA6wFiI.js → elixir-b2M38fAy.js} +0 -0
- /package/dist/web/assets/{flow9-CIb9youF.js → flow9-Dq1UYMkt.js} +0 -0
- /package/dist/web/assets/{fsharp-BVaBE4co.js → fsharp-BaeLhgfq.js} +0 -0
- /package/dist/web/assets/{go-Bbqf306x.js → go-Bd-NFKIC.js} +0 -0
- /package/dist/web/assets/{graphql-DJPrC4l-.js → graphql-DZVerJfy.js} +0 -0
- /package/dist/web/assets/{hcl-QyfWVWpM.js → hcl-CAVzrZfH.js} +0 -0
- /package/dist/web/assets/{ini-CgstZeS8.js → ini-CyXdX58t.js} +0 -0
- /package/dist/web/assets/{java-D4AG88ZY.js → java-B5pNgvhy.js} +0 -0
- /package/dist/web/assets/{julia-CN8U9648.js → julia-XRhmV3AN.js} +0 -0
- /package/dist/web/assets/{kotlin-gNNgpJhY.js → kotlin-DOd3J5vr.js} +0 -0
- /package/dist/web/assets/{less-C3SY2L8t.js → less-veZSnyw6.js} +0 -0
- /package/dist/web/assets/{lexon-CznnqzUX.js → lexon-QWGkuK0H.js} +0 -0
- /package/dist/web/assets/{lua-f3xyJgy5.js → lua-CYGpjuO5.js} +0 -0
- /package/dist/web/assets/{m3-Cl7J89p-.js → m3-yNnrZkdc.js} +0 -0
- /package/dist/web/assets/{markdown-Bv2fnzzT.js → markdown-BCSWEPSX.js} +0 -0
- /package/dist/web/assets/{mips-D6rXUTWa.js → mips-OpYmcC30.js} +0 -0
- /package/dist/web/assets/{msdax-Bb1N2x5J.js → msdax-2oxoTO9Z.js} +0 -0
- /package/dist/web/assets/{mysql-DXSr6oD7.js → mysql-5KlC-K_9.js} +0 -0
- /package/dist/web/assets/{object-utils-Cm2M3fPO.js → object-utils-I4gWdSnS.js} +0 -0
- /package/dist/web/assets/{objective-c-CEJiVkDa.js → objective-c-CcDCgtLx.js} +0 -0
- /package/dist/web/assets/{pascal-BtkMEIba.js → pascal-BZGsbaEV.js} +0 -0
- /package/dist/web/assets/{pascaligo-C7FAwqk7.js → pascaligo-DtD5qU3G.js} +0 -0
- /package/dist/web/assets/{perl-D9kqkBbN.js → perl-C1jNNS3E.js} +0 -0
- /package/dist/web/assets/{pgsql-BjGTBL1W.js → pgsql-CT0fhiZa.js} +0 -0
- /package/dist/web/assets/{php-BN0c0noA.js → php-D6DrXoPM.js} +0 -0
- /package/dist/web/assets/{pla-B94QTqOt.js → pla-b3-HN2pF.js} +0 -0
- /package/dist/web/assets/{postiats-DH91dqBs.js → postiats-Bin2ApVS.js} +0 -0
- /package/dist/web/assets/{powerquery-D7P0oUen.js → powerquery-7ASnn-ZG.js} +0 -0
- /package/dist/web/assets/{powershell-CCVHmJax.js → powershell-t4p7sU1H.js} +0 -0
- /package/dist/web/assets/{preload-helper-DgFuoWHe.js → preload-helper-Dd-HcVz_.js} +0 -0
- /package/dist/web/assets/{protobuf-BIP7pixC.js → protobuf-BUGeWa_j.js} +0 -0
- /package/dist/web/assets/{pug-DcbLK7HH.js → pug-BuKcgC9s.js} +0 -0
- /package/dist/web/assets/{qsharp-B-VY_WOG.js → qsharp-DxLLX8mo.js} +0 -0
- /package/dist/web/assets/{r-DwRtsJsj.js → r-DMlFgn7A.js} +0 -0
- /package/dist/web/assets/{redis-CaW0tkwu.js → redis-cXItkC5u.js} +0 -0
- /package/dist/web/assets/{redshift-3tS8G0ME.js → redshift-BZVbW7HE.js} +0 -0
- /package/dist/web/assets/{restructuredtext-_TNyGyK0.js → restructuredtext-BzjxwS8h.js} +0 -0
- /package/dist/web/assets/{ruby-A-MwVfO4.js → ruby-C5nyLV4l.js} +0 -0
- /package/dist/web/assets/{rust-oemlUIvG.js → rust-BcmMsHdf.js} +0 -0
- /package/dist/web/assets/{sb-BDZuaI3W.js → sb-Dnb1iy6B.js} +0 -0
- /package/dist/web/assets/{scala-Bfo2loK4.js → scala-anMIFYpA.js} +0 -0
- /package/dist/web/assets/{scheme-N2eo7rjB.js → scheme-BItQTe08.js} +0 -0
- /package/dist/web/assets/{scss-vjjSCTgN.js → scss-BOv51BJ5.js} +0 -0
- /package/dist/web/assets/{shell-Bfb9Yq6w.js → shell-BsRYRTNN.js} +0 -0
- /package/dist/web/assets/{solidity-C9RbukzG.js → solidity-BtuLgGDx.js} +0 -0
- /package/dist/web/assets/{sophia-DWV_MWOg.js → sophia-B0Vkc5MF.js} +0 -0
- /package/dist/web/assets/{sparql-iMXILWhh.js → sparql-B7lvkZQM.js} +0 -0
- /package/dist/web/assets/{sql-CJDj31JM.js → sql-DvP5MpA3.js} +0 -0
- /package/dist/web/assets/{st-BG9AQ1OO.js → st-GVUeyB3U.js} +0 -0
- /package/dist/web/assets/{swift-B579DvHm.js → swift-DSPIoCjm.js} +0 -0
- /package/dist/web/assets/{systemverilog-BNgaF3ZX.js → systemverilog-Icj2-k23.js} +0 -0
- /package/dist/web/assets/{tcl-grdtJiUA.js → tcl-Cd8KQcm-.js} +0 -0
- /package/dist/web/assets/{twig-JAsFXBZw.js → twig-CBHmt8z3.js} +0 -0
- /package/dist/web/assets/{typespec-D3hIQXEU.js → typespec-Ckc037mq.js} +0 -0
- /package/dist/web/assets/{vb-H38jRcEz.js → vb-B97GW9Wb.js} +0 -0
- /package/dist/web/assets/{wgsl-BC5Grc5r.js → wgsl-DIKmb3YH.js} +0 -0
package/dist/cli.mjs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-
|
|
1
|
+
import { S as version, T as formatDoctorDiagnosticsJson, _ as createConfig, a as parseJsonSchema, b as name, c as getDefaultAIConfig, d as DEFAULT_MARKITDOWN_CONFIG, f as DEFAULT_MINERU_CONFIG, g as AIConfigSchema, h as PLACEHOLDER_TEXT, i as JsonSchemaDefinitionSchema, l as readAIConfig, m as PLACEHOLDER_SCHEMA, n as createMigrationConfig, o as toSnakeCase, p as DEFAULT_PROMPT_CONFIG, s as generateDrizzleSchema, t as collectDoctorDiagnostics, u as writeAIConfig, v as seedConfig, w as doctorDiagnosticsTableRows, x as package_default, y as description } from "./doctor-collector-BEyduuZS.mjs";
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import fs from "node:fs/promises";
|
|
4
4
|
import os from "node:os";
|
|
@@ -23,9 +23,10 @@ import { jsonrepair } from "jsonrepair";
|
|
|
23
23
|
import fs$1 from "node:fs";
|
|
24
24
|
import Database from "better-sqlite3";
|
|
25
25
|
import { glob, globSync } from "tinyglobby";
|
|
26
|
+
import { Client, extractNotionId } from "@notionhq/client";
|
|
27
|
+
import { Buffer } from "node:buffer";
|
|
26
28
|
import { execa } from "execa";
|
|
27
29
|
import { extractText, getDocumentProxy, getMeta } from "unpdf";
|
|
28
|
-
import { Buffer } from "node:buffer";
|
|
29
30
|
import { execFile } from "node:child_process";
|
|
30
31
|
import { promisify } from "node:util";
|
|
31
32
|
import { serve } from "@hono/node-server";
|
|
@@ -33,7 +34,6 @@ import open from "open";
|
|
|
33
34
|
import { serveStatic } from "@hono/node-server/serve-static";
|
|
34
35
|
import { Hono } from "hono";
|
|
35
36
|
import { cors } from "hono/cors";
|
|
36
|
-
import { Client } from "@notionhq/client";
|
|
37
37
|
import { zValidator } from "@hono/zod-validator";
|
|
38
38
|
import { Kysely, SqliteDialect, sql } from "kysely";
|
|
39
39
|
|
|
@@ -12990,6 +12990,7 @@ function initLangfuse(config) {
|
|
|
12990
12990
|
}
|
|
12991
12991
|
const SYSTEM_PROMPT_REGEX = /## System Prompt\n([\s\S]*?)(?=## User Prompt|$)/;
|
|
12992
12992
|
const USER_PROMPT_REGEX = /## User Prompt Template\n([\s\S]*)$/;
|
|
12993
|
+
const OPENAI_COMPATIBLE_PROVIDER_NAME = "openai-compatible";
|
|
12993
12994
|
function detectMimeType(filePath) {
|
|
12994
12995
|
return mime.getType(filePath) ?? "application/octet-stream";
|
|
12995
12996
|
}
|
|
@@ -13148,8 +13149,9 @@ async function extractStructuredData(input) {
|
|
|
13148
13149
|
if (useTelemetry) initLangfuse(config);
|
|
13149
13150
|
const provider = createOpenAICompatible({
|
|
13150
13151
|
baseURL: config.provider.baseURL,
|
|
13151
|
-
name:
|
|
13152
|
-
apiKey: config.provider.apiKey
|
|
13152
|
+
name: OPENAI_COMPATIBLE_PROVIDER_NAME,
|
|
13153
|
+
apiKey: config.provider.apiKey,
|
|
13154
|
+
supportsStructuredOutputs: useStructuredOutput
|
|
13153
13155
|
});
|
|
13154
13156
|
let system;
|
|
13155
13157
|
let user;
|
|
@@ -13487,6 +13489,237 @@ async function deleteExtractionAuditRecord(aiexDir, id) {
|
|
|
13487
13489
|
return true;
|
|
13488
13490
|
}
|
|
13489
13491
|
|
|
13492
|
+
//#endregion
|
|
13493
|
+
//#region src/core/notion-sink.ts
|
|
13494
|
+
const RICH_TEXT_LIMIT = 2e3;
|
|
13495
|
+
const UUID_RE = /^[0-9a-f]{32}$/i;
|
|
13496
|
+
const HYPHENATED_UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
13497
|
+
function truncateText(value) {
|
|
13498
|
+
return value.length > RICH_TEXT_LIMIT ? value.slice(0, RICH_TEXT_LIMIT) : value;
|
|
13499
|
+
}
|
|
13500
|
+
function stringifyValue(value) {
|
|
13501
|
+
if (value === null || value === void 0) return "";
|
|
13502
|
+
if (typeof value === "string") return value;
|
|
13503
|
+
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
13504
|
+
return JSON.stringify(value);
|
|
13505
|
+
}
|
|
13506
|
+
function asNumber(value) {
|
|
13507
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
13508
|
+
if (typeof value === "string" && value.trim()) {
|
|
13509
|
+
const parsed = Number(value);
|
|
13510
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
13511
|
+
}
|
|
13512
|
+
return null;
|
|
13513
|
+
}
|
|
13514
|
+
function asBoolean(value) {
|
|
13515
|
+
if (typeof value === "boolean") return value;
|
|
13516
|
+
if (typeof value === "number") return value !== 0;
|
|
13517
|
+
if (typeof value === "string") {
|
|
13518
|
+
const normalized = value.trim().toLowerCase();
|
|
13519
|
+
return [
|
|
13520
|
+
"true",
|
|
13521
|
+
"yes",
|
|
13522
|
+
"1",
|
|
13523
|
+
"y"
|
|
13524
|
+
].includes(normalized);
|
|
13525
|
+
}
|
|
13526
|
+
return !!value;
|
|
13527
|
+
}
|
|
13528
|
+
function asDateStart(value) {
|
|
13529
|
+
if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString();
|
|
13530
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
13531
|
+
const date = new Date(value);
|
|
13532
|
+
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
|
13533
|
+
}
|
|
13534
|
+
if (typeof value === "string" && value.trim()) {
|
|
13535
|
+
const ms = Date.parse(value);
|
|
13536
|
+
if (Number.isNaN(ms)) return null;
|
|
13537
|
+
return new Date(ms).toISOString();
|
|
13538
|
+
}
|
|
13539
|
+
return null;
|
|
13540
|
+
}
|
|
13541
|
+
function asStringArray(value) {
|
|
13542
|
+
if (Array.isArray(value)) return value.map((item) => stringifyValue(item).trim()).filter(Boolean);
|
|
13543
|
+
const text$1 = stringifyValue(value).trim();
|
|
13544
|
+
return text$1 ? [text$1] : [];
|
|
13545
|
+
}
|
|
13546
|
+
function getValueAtPath(data, path$1) {
|
|
13547
|
+
if (!path$1.includes(".")) return Object.hasOwn(data, path$1) ? {
|
|
13548
|
+
found: true,
|
|
13549
|
+
value: data[path$1]
|
|
13550
|
+
} : {
|
|
13551
|
+
found: false,
|
|
13552
|
+
value: void 0
|
|
13553
|
+
};
|
|
13554
|
+
let current = data;
|
|
13555
|
+
for (const part of path$1.split(".")) {
|
|
13556
|
+
if (!current || typeof current !== "object" || Array.isArray(current)) return {
|
|
13557
|
+
found: false,
|
|
13558
|
+
value: void 0
|
|
13559
|
+
};
|
|
13560
|
+
const record = current;
|
|
13561
|
+
if (!Object.hasOwn(record, part)) return {
|
|
13562
|
+
found: false,
|
|
13563
|
+
value: void 0
|
|
13564
|
+
};
|
|
13565
|
+
current = record[part];
|
|
13566
|
+
}
|
|
13567
|
+
return {
|
|
13568
|
+
found: true,
|
|
13569
|
+
value: current
|
|
13570
|
+
};
|
|
13571
|
+
}
|
|
13572
|
+
function buildPropertyValue(type, value) {
|
|
13573
|
+
const text$1 = truncateText(stringifyValue(value));
|
|
13574
|
+
switch (type) {
|
|
13575
|
+
case "title": return { title: text$1 ? [{ text: { content: text$1 } }] : [] };
|
|
13576
|
+
case "rich_text": return { rich_text: text$1 ? [{ text: { content: text$1 } }] : [] };
|
|
13577
|
+
case "number": return { number: asNumber(value) };
|
|
13578
|
+
case "checkbox": return { checkbox: asBoolean(value) };
|
|
13579
|
+
case "date": {
|
|
13580
|
+
const start = asDateStart(value);
|
|
13581
|
+
return { date: start ? { start } : null };
|
|
13582
|
+
}
|
|
13583
|
+
case "select": {
|
|
13584
|
+
const name$1 = stringifyValue(value).trim();
|
|
13585
|
+
return { select: name$1 ? { name: name$1 } : null };
|
|
13586
|
+
}
|
|
13587
|
+
case "multi_select": return { multi_select: asStringArray(value).map((name$1) => ({ name: name$1 })) };
|
|
13588
|
+
case "url": return { url: text$1 || null };
|
|
13589
|
+
case "email": return { email: text$1 || null };
|
|
13590
|
+
case "phone_number": return { phone_number: text$1 || null };
|
|
13591
|
+
default: return null;
|
|
13592
|
+
}
|
|
13593
|
+
}
|
|
13594
|
+
function findTitleProperty(properties, preferred) {
|
|
13595
|
+
if (preferred && properties[preferred]?.type === "title") return preferred;
|
|
13596
|
+
return Object.entries(properties).find(([, property]) => property?.type === "title")?.[0] ?? null;
|
|
13597
|
+
}
|
|
13598
|
+
function hyphenateDatabaseId(value) {
|
|
13599
|
+
const id = value.replace(/-/g, "");
|
|
13600
|
+
if (!UUID_RE.test(id)) return value;
|
|
13601
|
+
return `${id.slice(0, 8)}-${id.slice(8, 12)}-${id.slice(12, 16)}-${id.slice(16, 20)}-${id.slice(20)}`;
|
|
13602
|
+
}
|
|
13603
|
+
function parseNotionDatabaseId(value) {
|
|
13604
|
+
const input = value.trim();
|
|
13605
|
+
if (!input) return "";
|
|
13606
|
+
const extracted = extractNotionId(input);
|
|
13607
|
+
if (extracted) return extracted;
|
|
13608
|
+
if (HYPHENATED_UUID_RE.test(input)) return input;
|
|
13609
|
+
if (UUID_RE.test(input)) return hyphenateDatabaseId(input);
|
|
13610
|
+
return input;
|
|
13611
|
+
}
|
|
13612
|
+
function normalizeFieldName(value) {
|
|
13613
|
+
return value.normalize("NFKC").toLowerCase().replace(/[^\p{Letter}\p{Number}]+/gu, "");
|
|
13614
|
+
}
|
|
13615
|
+
function buildMatchKeys(field) {
|
|
13616
|
+
return [
|
|
13617
|
+
field.name,
|
|
13618
|
+
field.title,
|
|
13619
|
+
field.description
|
|
13620
|
+
].filter((value) => !!value?.trim()).map(normalizeFieldName).filter(Boolean);
|
|
13621
|
+
}
|
|
13622
|
+
function suggestFieldMap(schemaFields, databaseProperties) {
|
|
13623
|
+
const propertyByKey = /* @__PURE__ */ new Map();
|
|
13624
|
+
for (const propertyName of Object.keys(databaseProperties)) propertyByKey.set(normalizeFieldName(propertyName), propertyName);
|
|
13625
|
+
const fieldMap = {};
|
|
13626
|
+
for (const field of schemaFields) for (const key of buildMatchKeys(field)) {
|
|
13627
|
+
const propertyName = propertyByKey.get(key);
|
|
13628
|
+
if (propertyName) {
|
|
13629
|
+
fieldMap[field.name] = propertyName;
|
|
13630
|
+
break;
|
|
13631
|
+
}
|
|
13632
|
+
}
|
|
13633
|
+
return fieldMap;
|
|
13634
|
+
}
|
|
13635
|
+
function hasProperties(value) {
|
|
13636
|
+
return !!value && typeof value === "object" && !!value.properties && typeof value.properties === "object";
|
|
13637
|
+
}
|
|
13638
|
+
function firstDataSourceId(database) {
|
|
13639
|
+
return (Array.isArray(database?.data_sources) ? database.data_sources : []).find((source) => typeof source?.id === "string" && source.id.trim())?.id;
|
|
13640
|
+
}
|
|
13641
|
+
async function resolveNotionDataSource(notion, inputId) {
|
|
13642
|
+
const id = parseNotionDatabaseId(inputId);
|
|
13643
|
+
if (!id) throw new Error("Notion database or data source URL/ID is required.");
|
|
13644
|
+
try {
|
|
13645
|
+
const dataSource$1 = await notion.dataSources.retrieve({ data_source_id: id });
|
|
13646
|
+
if (hasProperties(dataSource$1)) return {
|
|
13647
|
+
databaseId: typeof dataSource$1.parent?.database_id === "string" ? dataSource$1.parent.database_id : id,
|
|
13648
|
+
dataSourceId: dataSource$1.id ?? id,
|
|
13649
|
+
properties: dataSource$1.properties,
|
|
13650
|
+
parent: { data_source_id: dataSource$1.id ?? id }
|
|
13651
|
+
};
|
|
13652
|
+
} catch {}
|
|
13653
|
+
const database = await notion.databases.retrieve({ database_id: id });
|
|
13654
|
+
const dataSourceId = firstDataSourceId(database);
|
|
13655
|
+
if (!dataSourceId) throw new Error("No data source found for this Notion database. Copy the data source link from Notion, or share the source database with the integration.");
|
|
13656
|
+
const dataSource = await notion.dataSources.retrieve({ data_source_id: dataSourceId });
|
|
13657
|
+
if (!hasProperties(dataSource)) throw new Error("Notion data source did not return properties. Make sure it is shared with the integration and is not a linked data source.");
|
|
13658
|
+
return {
|
|
13659
|
+
databaseId: database.id ?? id,
|
|
13660
|
+
dataSourceId: dataSource.id ?? dataSourceId,
|
|
13661
|
+
properties: dataSource.properties,
|
|
13662
|
+
parent: { data_source_id: dataSource.id ?? dataSourceId }
|
|
13663
|
+
};
|
|
13664
|
+
}
|
|
13665
|
+
async function inspectNotionDatabase(input) {
|
|
13666
|
+
if (!input.token.trim()) throw new Error("Notion integration token is required.");
|
|
13667
|
+
const id = parseNotionDatabaseId(input.databaseId);
|
|
13668
|
+
if (!id) throw new Error("Notion database or data source URL/ID is required.");
|
|
13669
|
+
const resolved = await resolveNotionDataSource(new Client({ auth: input.token }), id);
|
|
13670
|
+
const databaseProperties = resolved.properties;
|
|
13671
|
+
const titleProperty = findTitleProperty(databaseProperties) ?? void 0;
|
|
13672
|
+
return {
|
|
13673
|
+
databaseId: resolved.databaseId,
|
|
13674
|
+
dataSourceId: resolved.dataSourceId,
|
|
13675
|
+
titleProperty,
|
|
13676
|
+
properties: Object.entries(databaseProperties).map(([name$1, property]) => ({
|
|
13677
|
+
name: name$1,
|
|
13678
|
+
type: property?.type ?? "unknown"
|
|
13679
|
+
})).sort((a, b) => a.name.localeCompare(b.name)),
|
|
13680
|
+
suggestedFieldMap: suggestFieldMap(input.schemaFields, databaseProperties)
|
|
13681
|
+
};
|
|
13682
|
+
}
|
|
13683
|
+
function validateNotionConfig(config) {
|
|
13684
|
+
if (!config?.enabled) return "Notion export is not enabled. Configure Notion settings first.";
|
|
13685
|
+
if (!config.token.trim()) return "Notion integration token is required.";
|
|
13686
|
+
return null;
|
|
13687
|
+
}
|
|
13688
|
+
async function writeNotionPage(config, schemaName, data) {
|
|
13689
|
+
const configError = validateNotionConfig(config);
|
|
13690
|
+
if (configError) throw new Error(configError);
|
|
13691
|
+
const notionConfig = config;
|
|
13692
|
+
const schemaConfig = notionConfig.schemas[schemaName];
|
|
13693
|
+
if (!schemaConfig) throw new Error(`Notion database is not configured for schema "${schemaName}".`);
|
|
13694
|
+
if (!schemaConfig.databaseId.trim()) throw new Error(`Notion database ID is required for schema "${schemaName}".`);
|
|
13695
|
+
const notion = new Client({ auth: notionConfig.token });
|
|
13696
|
+
const resolved = await resolveNotionDataSource(notion, schemaConfig.databaseId);
|
|
13697
|
+
const databaseProperties = resolved.properties;
|
|
13698
|
+
const fieldMap = schemaConfig.fieldMap ?? {};
|
|
13699
|
+
const properties = {};
|
|
13700
|
+
const sourceFields = new Set([...Object.keys(data), ...Object.keys(fieldMap)]);
|
|
13701
|
+
for (const sourceField of sourceFields) {
|
|
13702
|
+
const source = getValueAtPath(data, sourceField);
|
|
13703
|
+
if (!source.found) continue;
|
|
13704
|
+
const notionPropertyName = fieldMap[sourceField] ?? sourceField;
|
|
13705
|
+
const notionProperty = databaseProperties[notionPropertyName];
|
|
13706
|
+
if (!notionProperty) continue;
|
|
13707
|
+
const propertyValue = buildPropertyValue(notionProperty.type, source.value);
|
|
13708
|
+
if (propertyValue) properties[notionPropertyName] = propertyValue;
|
|
13709
|
+
}
|
|
13710
|
+
const titleProperty = findTitleProperty(databaseProperties, schemaConfig.titleProperty);
|
|
13711
|
+
if (titleProperty && !properties[titleProperty]) properties[titleProperty] = buildPropertyValue("title", schemaName);
|
|
13712
|
+
if (Object.keys(properties).length === 0) throw new Error("No extracted fields matched Notion database properties.");
|
|
13713
|
+
return {
|
|
13714
|
+
pageId: (await notion.pages.create({
|
|
13715
|
+
parent: resolved.parent,
|
|
13716
|
+
properties
|
|
13717
|
+
})).id,
|
|
13718
|
+
databaseId: resolved.databaseId,
|
|
13719
|
+
dataSourceId: resolved.dataSourceId
|
|
13720
|
+
};
|
|
13721
|
+
}
|
|
13722
|
+
|
|
13490
13723
|
//#endregion
|
|
13491
13724
|
//#region src/core/pdf-converter/external.ts
|
|
13492
13725
|
function applyTemplate(value, context) {
|
|
@@ -13527,7 +13760,7 @@ function formatCommandError(error, command$1) {
|
|
|
13527
13760
|
}
|
|
13528
13761
|
async function countPdfPages(input) {
|
|
13529
13762
|
try {
|
|
13530
|
-
return (await getDocumentProxy(input)).numPages;
|
|
13763
|
+
return (await getDocumentProxy(Buffer.isBuffer(input) ? new Uint8Array(input) : input)).numPages;
|
|
13531
13764
|
} catch {
|
|
13532
13765
|
return 0;
|
|
13533
13766
|
}
|
|
@@ -13681,6 +13914,17 @@ const SUPPORTED_EXTENSIONS = new Set([
|
|
|
13681
13914
|
const PDF_EXT_RE = /\.pdf$/i;
|
|
13682
13915
|
const JSON_EXT_RE$1 = /\.json$/;
|
|
13683
13916
|
const SUPPORTED_FILE_PATTERN = `*.{${[...SUPPORTED_EXTENSIONS].join(",")}}`;
|
|
13917
|
+
async function syncResultToNotion(aiConfig, schemaName, data) {
|
|
13918
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
13919
|
+
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
13920
|
+
return [{
|
|
13921
|
+
databaseId: page.databaseId,
|
|
13922
|
+
pageId: page.pageId
|
|
13923
|
+
}];
|
|
13924
|
+
}
|
|
13925
|
+
function shouldSyncNotion(aiConfig, schemaName) {
|
|
13926
|
+
return !!aiConfig.notion?.enabled && !!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim();
|
|
13927
|
+
}
|
|
13684
13928
|
async function ensureDatabaseReady(dbPath, schema) {
|
|
13685
13929
|
try {
|
|
13686
13930
|
await fs.access(dbPath);
|
|
@@ -13872,11 +14116,28 @@ async function processOneFile(aiexDir, config, aiConfig, schemaName, filePath, m
|
|
|
13872
14116
|
insert: options?.insert
|
|
13873
14117
|
});
|
|
13874
14118
|
if (r.success) {
|
|
14119
|
+
let notionPages;
|
|
14120
|
+
if (shouldSyncNotion(aiConfig, schemaName)) try {
|
|
14121
|
+
notionPages = await syncResultToNotion(aiConfig, schemaName, r.data);
|
|
14122
|
+
consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14123
|
+
} catch (error) {
|
|
14124
|
+
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
14125
|
+
status: "failed",
|
|
14126
|
+
outputPath: r.outputPath,
|
|
14127
|
+
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
14128
|
+
tablesInserted: r.tablesInserted,
|
|
14129
|
+
tokensUsed: r.tokensUsed,
|
|
14130
|
+
error: error instanceof Error ? error.message : String(error)
|
|
14131
|
+
});
|
|
14132
|
+
consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14133
|
+
return false;
|
|
14134
|
+
}
|
|
13875
14135
|
await updateExtractionAuditRecord(aiexDir, audit.id, {
|
|
13876
14136
|
status: "succeeded",
|
|
13877
14137
|
outputPath: r.outputPath,
|
|
13878
14138
|
outputName: r.outputPath ? path.basename(r.outputPath) : void 0,
|
|
13879
14139
|
tablesInserted: r.tablesInserted,
|
|
14140
|
+
notionPages,
|
|
13880
14141
|
tokensUsed: r.tokensUsed
|
|
13881
14142
|
});
|
|
13882
14143
|
consola.success(`Processed: ${path.basename(filePath)}`);
|
|
@@ -13941,6 +14202,15 @@ function getIdArg(args) {
|
|
|
13941
14202
|
const positional = args._;
|
|
13942
14203
|
return Array.isArray(positional) && typeof positional[0] === "string" ? positional[0] : "";
|
|
13943
14204
|
}
|
|
14205
|
+
function isExtractSubCommand(rawArgs) {
|
|
14206
|
+
if (!Array.isArray(rawArgs)) return false;
|
|
14207
|
+
return rawArgs.some((arg) => typeof arg === "string" && [
|
|
14208
|
+
"history",
|
|
14209
|
+
"show",
|
|
14210
|
+
"retry",
|
|
14211
|
+
"rm"
|
|
14212
|
+
].includes(arg));
|
|
14213
|
+
}
|
|
13944
14214
|
function formatSource(source) {
|
|
13945
14215
|
return source.type === "file" ? source.fileName || "file" : "text";
|
|
13946
14216
|
}
|
|
@@ -13984,11 +14254,33 @@ async function runAuditedSingleExtraction(input) {
|
|
|
13984
14254
|
});
|
|
13985
14255
|
return false;
|
|
13986
14256
|
}
|
|
14257
|
+
let notionPages;
|
|
14258
|
+
if (input.aiConfig.notion?.enabled && input.aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
14259
|
+
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
14260
|
+
const page = await writeNotionPage(input.aiConfig.notion, input.schemaName, result.data);
|
|
14261
|
+
notionPages = [{
|
|
14262
|
+
databaseId: page.databaseId,
|
|
14263
|
+
pageId: page.pageId
|
|
14264
|
+
}];
|
|
14265
|
+
consola.success(`Synced to Notion: ${notionPages.length} page(s)`);
|
|
14266
|
+
} catch (error) {
|
|
14267
|
+
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
14268
|
+
status: "failed",
|
|
14269
|
+
outputPath: result.outputPath,
|
|
14270
|
+
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
14271
|
+
tablesInserted: result.tablesInserted,
|
|
14272
|
+
tokensUsed: result.tokensUsed,
|
|
14273
|
+
error: error instanceof Error ? error.message : String(error)
|
|
14274
|
+
});
|
|
14275
|
+
consola.error(`Notion sync failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
14276
|
+
return false;
|
|
14277
|
+
}
|
|
13987
14278
|
await updateExtractionAuditRecord(input.aiexDir, audit.id, {
|
|
13988
14279
|
status: "succeeded",
|
|
13989
14280
|
outputPath: result.outputPath,
|
|
13990
14281
|
outputName: result.outputPath ? path.basename(result.outputPath) : void 0,
|
|
13991
14282
|
tablesInserted: result.tablesInserted,
|
|
14283
|
+
notionPages,
|
|
13992
14284
|
tokensUsed: result.tokensUsed
|
|
13993
14285
|
});
|
|
13994
14286
|
return true;
|
|
@@ -14166,7 +14458,8 @@ const extractCommand = defineCommand({
|
|
|
14166
14458
|
default: false
|
|
14167
14459
|
}
|
|
14168
14460
|
},
|
|
14169
|
-
async run({ args }) {
|
|
14461
|
+
async run({ args, rawArgs }) {
|
|
14462
|
+
if (isExtractSubCommand(rawArgs)) return;
|
|
14170
14463
|
intro(pc.inverse(" aiex extract "));
|
|
14171
14464
|
const config = createMigrationConfig(process.cwd());
|
|
14172
14465
|
const aiexDir = path.dirname(config.schemaPath);
|
|
@@ -14300,7 +14593,18 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
14300
14593
|
cancel("Cancelled");
|
|
14301
14594
|
return false;
|
|
14302
14595
|
}
|
|
14303
|
-
return (
|
|
14596
|
+
return runAuditedSingleExtraction({
|
|
14597
|
+
aiexDir,
|
|
14598
|
+
config,
|
|
14599
|
+
aiConfig,
|
|
14600
|
+
schemaName,
|
|
14601
|
+
text: textContent,
|
|
14602
|
+
source: {
|
|
14603
|
+
type: "text",
|
|
14604
|
+
text: textContent
|
|
14605
|
+
},
|
|
14606
|
+
modelOverride
|
|
14607
|
+
});
|
|
14304
14608
|
} else if (inputSource === "file") {
|
|
14305
14609
|
const filePathStr = await text({
|
|
14306
14610
|
message: "Enter file path:",
|
|
@@ -14315,7 +14619,20 @@ async function runInteractive(aiexDir, config, aiConfig, modelOverride) {
|
|
|
14315
14619
|
const fp = filePathStr;
|
|
14316
14620
|
try {
|
|
14317
14621
|
const input = await readExtractFileInput(fp, aiConfig);
|
|
14318
|
-
return (
|
|
14622
|
+
return runAuditedSingleExtraction({
|
|
14623
|
+
aiexDir,
|
|
14624
|
+
config,
|
|
14625
|
+
aiConfig,
|
|
14626
|
+
schemaName,
|
|
14627
|
+
text: input.text,
|
|
14628
|
+
filePath: input.filePath,
|
|
14629
|
+
source: {
|
|
14630
|
+
type: "file",
|
|
14631
|
+
filePath: fp,
|
|
14632
|
+
fileName: path.basename(fp)
|
|
14633
|
+
},
|
|
14634
|
+
modelOverride
|
|
14635
|
+
});
|
|
14319
14636
|
} catch (e) {
|
|
14320
14637
|
consola.error(`Cannot read file: ${fp} — ${e instanceof Error ? e.message : String(e)}`);
|
|
14321
14638
|
return false;
|
|
@@ -14521,187 +14838,29 @@ const schemaCommand = defineCommand({
|
|
|
14521
14838
|
}
|
|
14522
14839
|
});
|
|
14523
14840
|
|
|
14524
|
-
//#endregion
|
|
14525
|
-
//#region src/core/notion-sink.ts
|
|
14526
|
-
const RICH_TEXT_LIMIT = 2e3;
|
|
14527
|
-
const UUID_RE = /^[0-9a-f]{32}$/i;
|
|
14528
|
-
const HYPHENATED_UUID_RE = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
14529
|
-
const UUID_IN_TEXT_RE = /[0-9a-f]{32}/i;
|
|
14530
|
-
function truncateText(value) {
|
|
14531
|
-
return value.length > RICH_TEXT_LIMIT ? value.slice(0, RICH_TEXT_LIMIT) : value;
|
|
14532
|
-
}
|
|
14533
|
-
function stringifyValue(value) {
|
|
14534
|
-
if (value === null || value === void 0) return "";
|
|
14535
|
-
if (typeof value === "string") return value;
|
|
14536
|
-
if (typeof value === "number" || typeof value === "boolean") return String(value);
|
|
14537
|
-
return JSON.stringify(value);
|
|
14538
|
-
}
|
|
14539
|
-
function asNumber(value) {
|
|
14540
|
-
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
14541
|
-
if (typeof value === "string" && value.trim()) {
|
|
14542
|
-
const parsed = Number(value);
|
|
14543
|
-
return Number.isFinite(parsed) ? parsed : null;
|
|
14544
|
-
}
|
|
14545
|
-
return null;
|
|
14546
|
-
}
|
|
14547
|
-
function asBoolean(value) {
|
|
14548
|
-
if (typeof value === "boolean") return value;
|
|
14549
|
-
if (typeof value === "number") return value !== 0;
|
|
14550
|
-
if (typeof value === "string") {
|
|
14551
|
-
const normalized = value.trim().toLowerCase();
|
|
14552
|
-
return [
|
|
14553
|
-
"true",
|
|
14554
|
-
"yes",
|
|
14555
|
-
"1",
|
|
14556
|
-
"y"
|
|
14557
|
-
].includes(normalized);
|
|
14558
|
-
}
|
|
14559
|
-
return !!value;
|
|
14560
|
-
}
|
|
14561
|
-
function asDateStart(value) {
|
|
14562
|
-
if (value instanceof Date && !Number.isNaN(value.getTime())) return value.toISOString();
|
|
14563
|
-
if (typeof value === "number" && Number.isFinite(value)) {
|
|
14564
|
-
const date = new Date(value);
|
|
14565
|
-
return Number.isNaN(date.getTime()) ? null : date.toISOString();
|
|
14566
|
-
}
|
|
14567
|
-
if (typeof value === "string" && value.trim()) {
|
|
14568
|
-
const ms = Date.parse(value);
|
|
14569
|
-
if (Number.isNaN(ms)) return null;
|
|
14570
|
-
return new Date(ms).toISOString();
|
|
14571
|
-
}
|
|
14572
|
-
return null;
|
|
14573
|
-
}
|
|
14574
|
-
function asStringArray(value) {
|
|
14575
|
-
if (Array.isArray(value)) return value.map((item) => stringifyValue(item).trim()).filter(Boolean);
|
|
14576
|
-
const text$1 = stringifyValue(value).trim();
|
|
14577
|
-
return text$1 ? [text$1] : [];
|
|
14578
|
-
}
|
|
14579
|
-
function buildPropertyValue(type, value) {
|
|
14580
|
-
const text$1 = truncateText(stringifyValue(value));
|
|
14581
|
-
switch (type) {
|
|
14582
|
-
case "title": return { title: text$1 ? [{ text: { content: text$1 } }] : [] };
|
|
14583
|
-
case "rich_text": return { rich_text: text$1 ? [{ text: { content: text$1 } }] : [] };
|
|
14584
|
-
case "number": return { number: asNumber(value) };
|
|
14585
|
-
case "checkbox": return { checkbox: asBoolean(value) };
|
|
14586
|
-
case "date": {
|
|
14587
|
-
const start = asDateStart(value);
|
|
14588
|
-
return { date: start ? { start } : null };
|
|
14589
|
-
}
|
|
14590
|
-
case "select": {
|
|
14591
|
-
const name$1 = stringifyValue(value).trim();
|
|
14592
|
-
return { select: name$1 ? { name: name$1 } : null };
|
|
14593
|
-
}
|
|
14594
|
-
case "multi_select": return { multi_select: asStringArray(value).map((name$1) => ({ name: name$1 })) };
|
|
14595
|
-
case "url": return { url: text$1 || null };
|
|
14596
|
-
case "email": return { email: text$1 || null };
|
|
14597
|
-
case "phone_number": return { phone_number: text$1 || null };
|
|
14598
|
-
default: return null;
|
|
14599
|
-
}
|
|
14600
|
-
}
|
|
14601
|
-
function findTitleProperty(properties, preferred) {
|
|
14602
|
-
if (preferred && properties[preferred]?.type === "title") return preferred;
|
|
14603
|
-
return Object.entries(properties).find(([, property]) => property?.type === "title")?.[0] ?? null;
|
|
14604
|
-
}
|
|
14605
|
-
function hyphenateDatabaseId(value) {
|
|
14606
|
-
const id = value.replace(/-/g, "");
|
|
14607
|
-
if (!UUID_RE.test(id)) return value;
|
|
14608
|
-
return `${id.slice(0, 8)}-${id.slice(8, 12)}-${id.slice(12, 16)}-${id.slice(16, 20)}-${id.slice(20)}`;
|
|
14609
|
-
}
|
|
14610
|
-
function parseNotionDatabaseId(value) {
|
|
14611
|
-
const input = value.trim();
|
|
14612
|
-
if (!input) return "";
|
|
14613
|
-
if (HYPHENATED_UUID_RE.test(input)) return input;
|
|
14614
|
-
if (UUID_RE.test(input)) return hyphenateDatabaseId(input);
|
|
14615
|
-
const match = input.replace(/-/g, "").match(UUID_IN_TEXT_RE);
|
|
14616
|
-
return match ? hyphenateDatabaseId(match[0]) : input;
|
|
14617
|
-
}
|
|
14618
|
-
function normalizeFieldName(value) {
|
|
14619
|
-
return value.normalize("NFKC").toLowerCase().replace(/[^\p{Letter}\p{Number}]+/gu, "");
|
|
14620
|
-
}
|
|
14621
|
-
function buildMatchKeys(field) {
|
|
14622
|
-
return [
|
|
14623
|
-
field.name,
|
|
14624
|
-
field.title,
|
|
14625
|
-
field.description
|
|
14626
|
-
].filter((value) => !!value?.trim()).map(normalizeFieldName).filter(Boolean);
|
|
14627
|
-
}
|
|
14628
|
-
function suggestFieldMap(schemaFields, databaseProperties) {
|
|
14629
|
-
const propertyByKey = /* @__PURE__ */ new Map();
|
|
14630
|
-
for (const propertyName of Object.keys(databaseProperties)) propertyByKey.set(normalizeFieldName(propertyName), propertyName);
|
|
14631
|
-
const fieldMap = {};
|
|
14632
|
-
for (const field of schemaFields) for (const key of buildMatchKeys(field)) {
|
|
14633
|
-
const propertyName = propertyByKey.get(key);
|
|
14634
|
-
if (propertyName) {
|
|
14635
|
-
fieldMap[field.name] = propertyName;
|
|
14636
|
-
break;
|
|
14637
|
-
}
|
|
14638
|
-
}
|
|
14639
|
-
return fieldMap;
|
|
14640
|
-
}
|
|
14641
|
-
async function inspectNotionDatabase(input) {
|
|
14642
|
-
if (!input.token.trim()) throw new Error("Notion integration token is required.");
|
|
14643
|
-
const databaseId = parseNotionDatabaseId(input.databaseId);
|
|
14644
|
-
if (!databaseId) throw new Error("Notion database URL or ID is required.");
|
|
14645
|
-
const databaseProperties = (await new Client({ auth: input.token }).databases.retrieve({ database_id: databaseId })).properties;
|
|
14646
|
-
return {
|
|
14647
|
-
databaseId,
|
|
14648
|
-
titleProperty: findTitleProperty(databaseProperties) ?? void 0,
|
|
14649
|
-
properties: Object.entries(databaseProperties).map(([name$1, property]) => ({
|
|
14650
|
-
name: name$1,
|
|
14651
|
-
type: property?.type ?? "unknown"
|
|
14652
|
-
})).sort((a, b) => a.name.localeCompare(b.name)),
|
|
14653
|
-
suggestedFieldMap: suggestFieldMap(input.schemaFields, databaseProperties)
|
|
14654
|
-
};
|
|
14655
|
-
}
|
|
14656
|
-
function validateNotionConfig(config) {
|
|
14657
|
-
if (!config?.enabled) return "Notion export is not enabled. Configure Notion settings first.";
|
|
14658
|
-
if (!config.token.trim()) return "Notion integration token is required.";
|
|
14659
|
-
return null;
|
|
14660
|
-
}
|
|
14661
|
-
async function writeNotionPage(config, schemaName, data) {
|
|
14662
|
-
const configError = validateNotionConfig(config);
|
|
14663
|
-
if (configError) throw new Error(configError);
|
|
14664
|
-
const notionConfig = config;
|
|
14665
|
-
const schemaConfig = notionConfig.schemas[schemaName];
|
|
14666
|
-
if (!schemaConfig) throw new Error(`Notion database is not configured for schema "${schemaName}".`);
|
|
14667
|
-
if (!schemaConfig.databaseId.trim()) throw new Error(`Notion database ID is required for schema "${schemaName}".`);
|
|
14668
|
-
const notion = new Client({ auth: notionConfig.token });
|
|
14669
|
-
const databaseProperties = (await notion.databases.retrieve({ database_id: schemaConfig.databaseId })).properties;
|
|
14670
|
-
const fieldMap = schemaConfig.fieldMap ?? {};
|
|
14671
|
-
const properties = {};
|
|
14672
|
-
for (const [sourceField, sourceValue] of Object.entries(data)) {
|
|
14673
|
-
const notionPropertyName = fieldMap[sourceField] ?? sourceField;
|
|
14674
|
-
const notionProperty = databaseProperties[notionPropertyName];
|
|
14675
|
-
if (!notionProperty) continue;
|
|
14676
|
-
const propertyValue = buildPropertyValue(notionProperty.type, sourceValue);
|
|
14677
|
-
if (propertyValue) properties[notionPropertyName] = propertyValue;
|
|
14678
|
-
}
|
|
14679
|
-
const titleProperty = findTitleProperty(databaseProperties, schemaConfig.titleProperty);
|
|
14680
|
-
if (titleProperty && !properties[titleProperty]) properties[titleProperty] = buildPropertyValue("title", Object.entries(data).find(([, value]) => typeof value === "string" && value.trim())?.[1] ?? schemaName);
|
|
14681
|
-
if (Object.keys(properties).length === 0) throw new Error("No extracted fields matched Notion database properties.");
|
|
14682
|
-
return {
|
|
14683
|
-
pageId: (await notion.pages.create({
|
|
14684
|
-
parent: { database_id: schemaConfig.databaseId },
|
|
14685
|
-
properties
|
|
14686
|
-
})).id,
|
|
14687
|
-
databaseId: schemaConfig.databaseId
|
|
14688
|
-
};
|
|
14689
|
-
}
|
|
14690
|
-
|
|
14691
14841
|
//#endregion
|
|
14692
14842
|
//#region src/server/routes/ai.ts
|
|
14693
14843
|
const JSON_EXT_RE = /\.json$/i;
|
|
14694
14844
|
function extractSchemaFields(schema) {
|
|
14695
14845
|
if (!schema?.properties || typeof schema.properties !== "object") return [];
|
|
14696
|
-
|
|
14697
|
-
|
|
14698
|
-
|
|
14699
|
-
|
|
14700
|
-
|
|
14701
|
-
|
|
14702
|
-
|
|
14703
|
-
|
|
14704
|
-
|
|
14846
|
+
const fields = [];
|
|
14847
|
+
function visitProperties(properties, prefix = "") {
|
|
14848
|
+
for (const [name$1, property] of Object.entries(properties)) {
|
|
14849
|
+
const fieldName = prefix ? `${prefix}.${name$1}` : name$1;
|
|
14850
|
+
if (property?.type === "object" && property?.properties && typeof property.properties === "object") {
|
|
14851
|
+
visitProperties(property.properties, fieldName);
|
|
14852
|
+
continue;
|
|
14853
|
+
}
|
|
14854
|
+
if (property?.type === "array" && property?.items?.type === "object") continue;
|
|
14855
|
+
fields.push({
|
|
14856
|
+
name: fieldName,
|
|
14857
|
+
title: typeof property?.title === "string" ? property.title : void 0,
|
|
14858
|
+
description: typeof property?.description === "string" ? property.description : void 0
|
|
14859
|
+
});
|
|
14860
|
+
}
|
|
14861
|
+
}
|
|
14862
|
+
visitProperties(schema.properties);
|
|
14863
|
+
return fields;
|
|
14705
14864
|
}
|
|
14706
14865
|
async function loadSchemaFields(config, schemaName) {
|
|
14707
14866
|
const safeName = path.basename(schemaName).replace(JSON_EXT_RE, "");
|
|
@@ -14804,6 +14963,7 @@ function aiRoutes(config) {
|
|
|
14804
14963
|
//#endregion
|
|
14805
14964
|
//#region src/server/routes/data.ts
|
|
14806
14965
|
const FILE_REGEX = /\.json$/;
|
|
14966
|
+
const EXTRACTION_TIMESTAMP_RE = /-\d{4}-\d{2}-\d{2}T/;
|
|
14807
14967
|
const TIMESTAMP_CLEANUP = /(\d{2})-(\d{2})-(\d{2})/;
|
|
14808
14968
|
const TIMESTAMP_TZ = /(\d{3})Z/;
|
|
14809
14969
|
const tableParamSchema = z.object({ name: z.string().regex(/^[a-z][a-z0-9_]*$/) });
|
|
@@ -14820,6 +14980,37 @@ function invalidParamResponse$1(message) {
|
|
|
14820
14980
|
if (!result.success) return c.json({ error: message }, 400);
|
|
14821
14981
|
};
|
|
14822
14982
|
}
|
|
14983
|
+
function getAuditNotionStatus(record) {
|
|
14984
|
+
if (record.notionPages?.length) return "synced";
|
|
14985
|
+
if (record.status === "failed") return "failed";
|
|
14986
|
+
return "not_synced";
|
|
14987
|
+
}
|
|
14988
|
+
async function getRowExtractionActions(aiexDir, tableName) {
|
|
14989
|
+
const actions = /* @__PURE__ */ new Map();
|
|
14990
|
+
const auditRecords = await listExtractionAuditRecords(aiexDir);
|
|
14991
|
+
for (const record of auditRecords) {
|
|
14992
|
+
if (!record.outputName) continue;
|
|
14993
|
+
for (const inserted of record.tablesInserted ?? []) {
|
|
14994
|
+
if (inserted.table !== tableName) continue;
|
|
14995
|
+
const key = String(inserted.rowId);
|
|
14996
|
+
if (actions.has(key)) continue;
|
|
14997
|
+
const notionPages = record.notionPages?.length ? record.notionPages : void 0;
|
|
14998
|
+
actions.set(key, {
|
|
14999
|
+
extractionName: record.outputName,
|
|
15000
|
+
notionStatus: getAuditNotionStatus(record),
|
|
15001
|
+
notionPages,
|
|
15002
|
+
notionError: !notionPages && record.status === "failed" ? record.error : void 0
|
|
15003
|
+
});
|
|
15004
|
+
}
|
|
15005
|
+
}
|
|
15006
|
+
return actions;
|
|
15007
|
+
}
|
|
15008
|
+
function schemaNameFromExtractionFile(name$1) {
|
|
15009
|
+
const stem = name$1.replace(FILE_REGEX, "");
|
|
15010
|
+
const match = stem.match(EXTRACTION_TIMESTAMP_RE);
|
|
15011
|
+
if (!match || typeof match.index !== "number" || match.index <= 0) return null;
|
|
15012
|
+
return stem.slice(0, match.index);
|
|
15013
|
+
}
|
|
14823
15014
|
function createReadonlyQueryDb(databasePath) {
|
|
14824
15015
|
return new Kysely({ dialect: new SqliteDialect({ database: new Database(databasePath, { readonly: true }) }) });
|
|
14825
15016
|
}
|
|
@@ -14831,22 +15022,27 @@ function dataRoutes(config) {
|
|
|
14831
15022
|
try {
|
|
14832
15023
|
await fs.mkdir(extractedDir, { recursive: true });
|
|
14833
15024
|
const jsonFiles = (await fs.readdir(extractedDir)).filter((f) => f.endsWith(".json") && !f.endsWith(".prompt.md"));
|
|
15025
|
+
const auditRecords = await listExtractionAuditRecords(aiexDir);
|
|
15026
|
+
const auditByOutputName = new Map(auditRecords.map((record) => [record.outputName, record]));
|
|
14834
15027
|
const records = [];
|
|
14835
15028
|
for (const file of jsonFiles) {
|
|
14836
|
-
const
|
|
14837
|
-
|
|
14838
|
-
|
|
14839
|
-
const schemaName = name$1.slice(0, idx);
|
|
14840
|
-
const timestamp = name$1.slice(idx + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
|
|
15029
|
+
const schemaName = schemaNameFromExtractionFile(file);
|
|
15030
|
+
if (!schemaName) continue;
|
|
15031
|
+
const timestamp = file.replace(FILE_REGEX, "").slice(schemaName.length + 1).replace(/-/g, (d, i) => i === 4 || i === 7 ? "-" : d).replace(TIMESTAMP_CLEANUP, (_, h, m, s) => `${h}:${m}:${s}`).replace(TIMESTAMP_TZ, ".$1Z");
|
|
14841
15032
|
const filePath = path.join(extractedDir, file);
|
|
14842
15033
|
try {
|
|
14843
15034
|
const stat = await fs.stat(filePath);
|
|
15035
|
+
const audit = auditByOutputName.get(file);
|
|
15036
|
+
const notionPages = audit?.notionPages?.length ? audit.notionPages : void 0;
|
|
14844
15037
|
records.push({
|
|
14845
15038
|
name: file,
|
|
14846
15039
|
schemaName,
|
|
14847
15040
|
timestamp,
|
|
14848
15041
|
fileSize: stat.size,
|
|
14849
|
-
modifiedAt: stat.mtime.toISOString()
|
|
15042
|
+
modifiedAt: stat.mtime.toISOString(),
|
|
15043
|
+
notionStatus: notionPages ? "synced" : audit?.status === "failed" ? "failed" : "not_synced",
|
|
15044
|
+
notionPages,
|
|
15045
|
+
notionError: !notionPages && audit?.status === "failed" ? audit.error : void 0
|
|
14850
15046
|
});
|
|
14851
15047
|
} catch {
|
|
14852
15048
|
continue;
|
|
@@ -14940,9 +15136,17 @@ function dataRoutes(config) {
|
|
|
14940
15136
|
limit ${pageSize}
|
|
14941
15137
|
offset ${offset}
|
|
14942
15138
|
`.execute(db);
|
|
15139
|
+
const primaryKey = columns.find((column) => column.pk);
|
|
15140
|
+
const actionsByRowId = await getRowExtractionActions(aiexDir, tableName);
|
|
15141
|
+
const rowActions = primaryKey ? Object.fromEntries(result.rows.map((row, index) => {
|
|
15142
|
+
const rowId = row[primaryKey.name];
|
|
15143
|
+
const action = rowId === null || rowId === void 0 ? void 0 : actionsByRowId.get(String(rowId));
|
|
15144
|
+
return action ? [String(index), action] : null;
|
|
15145
|
+
}).filter((entry) => !!entry)) : {};
|
|
14943
15146
|
return c.json({
|
|
14944
15147
|
columns,
|
|
14945
15148
|
rows: result.rows,
|
|
15149
|
+
rowActions,
|
|
14946
15150
|
total,
|
|
14947
15151
|
page,
|
|
14948
15152
|
pageSize,
|
|
@@ -14968,6 +15172,69 @@ function dataRoutes(config) {
|
|
|
14968
15172
|
return c.json({ error: "Extraction result not found" }, 404);
|
|
14969
15173
|
}
|
|
14970
15174
|
});
|
|
15175
|
+
app.post("/data/:name/notion/retry", zValidator("param", extractionFileParamSchema, invalidParamResponse$1("Invalid extraction file name")), async (c) => {
|
|
15176
|
+
const { name: name$1 } = c.req.valid("param");
|
|
15177
|
+
const filePath = path.join(extractedDir, name$1);
|
|
15178
|
+
const schemaName = schemaNameFromExtractionFile(name$1);
|
|
15179
|
+
if (!schemaName) return c.json({
|
|
15180
|
+
success: false,
|
|
15181
|
+
error: "Cannot infer schema name from extraction file name"
|
|
15182
|
+
}, 400);
|
|
15183
|
+
const aiConfig = await readAIConfig(aiexDir);
|
|
15184
|
+
if (!aiConfig?.notion?.enabled) return c.json({
|
|
15185
|
+
success: false,
|
|
15186
|
+
error: "Notion export is not enabled. Configure Notion settings first."
|
|
15187
|
+
}, 400);
|
|
15188
|
+
if (!aiConfig.notion.schemas?.[schemaName]?.databaseId?.trim()) return c.json({
|
|
15189
|
+
success: false,
|
|
15190
|
+
error: `Notion database is not configured for schema "${schemaName}".`
|
|
15191
|
+
}, 400);
|
|
15192
|
+
try {
|
|
15193
|
+
const data = await readFile(filePath);
|
|
15194
|
+
if (!data || typeof data !== "object" || Array.isArray(data)) return c.json({
|
|
15195
|
+
success: false,
|
|
15196
|
+
error: "Extraction result is not a JSON object and cannot be written to Notion."
|
|
15197
|
+
}, 400);
|
|
15198
|
+
const page = await writeNotionPage(aiConfig.notion, schemaName, data);
|
|
15199
|
+
const notionPages = [{
|
|
15200
|
+
databaseId: page.databaseId,
|
|
15201
|
+
pageId: page.pageId
|
|
15202
|
+
}];
|
|
15203
|
+
let record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
|
|
15204
|
+
if (!record) record = await createExtractionAuditRecord(aiexDir, {
|
|
15205
|
+
schemaName,
|
|
15206
|
+
source: {
|
|
15207
|
+
type: "file",
|
|
15208
|
+
filePath,
|
|
15209
|
+
fileName: name$1
|
|
15210
|
+
}
|
|
15211
|
+
});
|
|
15212
|
+
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15213
|
+
status: "succeeded",
|
|
15214
|
+
outputPath: filePath,
|
|
15215
|
+
outputName: name$1,
|
|
15216
|
+
notionPages,
|
|
15217
|
+
error: void 0
|
|
15218
|
+
});
|
|
15219
|
+
return c.json({
|
|
15220
|
+
success: true,
|
|
15221
|
+
notionPages
|
|
15222
|
+
});
|
|
15223
|
+
} catch (error) {
|
|
15224
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
15225
|
+
const record = (await listExtractionAuditRecords(aiexDir)).find((record$1) => record$1.outputName === name$1);
|
|
15226
|
+
if (record) await updateExtractionAuditRecord(aiexDir, record.id, {
|
|
15227
|
+
status: "failed",
|
|
15228
|
+
outputPath: filePath,
|
|
15229
|
+
outputName: name$1,
|
|
15230
|
+
error: message
|
|
15231
|
+
});
|
|
15232
|
+
return c.json({
|
|
15233
|
+
success: false,
|
|
15234
|
+
error: message
|
|
15235
|
+
}, 500);
|
|
15236
|
+
}
|
|
15237
|
+
});
|
|
14971
15238
|
return app;
|
|
14972
15239
|
}
|
|
14973
15240
|
|
|
@@ -15073,7 +15340,7 @@ async function executeAuditedExtraction(input) {
|
|
|
15073
15340
|
});
|
|
15074
15341
|
}
|
|
15075
15342
|
const notionPages = [];
|
|
15076
|
-
if (input.
|
|
15343
|
+
if (aiConfig.notion?.enabled && aiConfig.notion.schemas?.[input.schemaName]?.databaseId?.trim()) try {
|
|
15077
15344
|
if (!result.data || typeof result.data !== "object" || Array.isArray(result.data)) throw new Error("Extraction result is not an object and cannot be written to Notion.");
|
|
15078
15345
|
notionPages.push(await writeNotionPage(aiConfig.notion, input.schemaName, result.data));
|
|
15079
15346
|
} catch (error) {
|
|
@@ -15128,7 +15395,6 @@ function extractRoutes(config) {
|
|
|
15128
15395
|
const schemaName = getFormString(body.schema);
|
|
15129
15396
|
const text$1 = getFormString(body.text);
|
|
15130
15397
|
const modelName = getFormString(body.model);
|
|
15131
|
-
const syncNotion = getFormString(body.notion) === "true";
|
|
15132
15398
|
const file = getFormFile(body.file);
|
|
15133
15399
|
if (!schemaName) return c.json({
|
|
15134
15400
|
success: false,
|
|
@@ -15169,8 +15435,7 @@ function extractRoutes(config) {
|
|
|
15169
15435
|
schemaName,
|
|
15170
15436
|
text: text$1,
|
|
15171
15437
|
filePath,
|
|
15172
|
-
modelName
|
|
15173
|
-
syncNotion
|
|
15438
|
+
modelName
|
|
15174
15439
|
});
|
|
15175
15440
|
} catch (error) {
|
|
15176
15441
|
return c.json({
|