unrag 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +150 -4
- package/package.json +1 -1
- package/registry/connectors/notion/client.ts +22 -0
- package/registry/connectors/notion/ids.ts +39 -0
- package/registry/connectors/notion/index.ts +7 -0
- package/registry/connectors/notion/render.ts +98 -0
- package/registry/connectors/notion/sync.ts +222 -0
- package/registry/connectors/notion/types.ts +56 -0
package/dist/cli/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// cli/run.ts
|
|
4
|
-
import { intro, outro as
|
|
4
|
+
import { intro, outro as outro3 } from "@clack/prompts";
|
|
5
5
|
|
|
6
6
|
// cli/commands/init.ts
|
|
7
7
|
import { cancel as cancel2, isCancel as isCancel2, outro, select, text } from "@clack/prompts";
|
|
@@ -47,6 +47,23 @@ async function findUp(startDir, filename) {
|
|
|
47
47
|
current = parent;
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
|
+
async function listFilesRecursive(dir) {
|
|
51
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
52
|
+
const out = [];
|
|
53
|
+
for (const entry of entries) {
|
|
54
|
+
const full = path.join(dir, entry.name);
|
|
55
|
+
if (entry.isDirectory()) {
|
|
56
|
+
out.push(...await listFilesRecursive(full));
|
|
57
|
+
} else if (entry.isFile()) {
|
|
58
|
+
out.push(full);
|
|
59
|
+
} else {
|
|
60
|
+
const s = await stat(full).catch(() => null);
|
|
61
|
+
if (s?.isFile())
|
|
62
|
+
out.push(full);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
50
67
|
|
|
51
68
|
// cli/lib/registry.ts
|
|
52
69
|
var readText = (filePath) => readFile(filePath, "utf8");
|
|
@@ -209,6 +226,42 @@ async function copyRegistryFiles(selection) {
|
|
|
209
226
|
await writeText(mapping.dest, content);
|
|
210
227
|
}
|
|
211
228
|
}
|
|
229
|
+
async function copyConnectorFiles(selection) {
|
|
230
|
+
const toAbs = (projectRelative) => path2.join(selection.projectRoot, projectRelative);
|
|
231
|
+
const installBaseAbs = toAbs(selection.installDir);
|
|
232
|
+
const connectorRegistryAbs = path2.join(selection.registryRoot, "connectors", selection.connector);
|
|
233
|
+
if (!await exists(connectorRegistryAbs)) {
|
|
234
|
+
throw new Error(`Unknown connector registry: ${path2.relative(selection.registryRoot, connectorRegistryAbs)}`);
|
|
235
|
+
}
|
|
236
|
+
const files = await listFilesRecursive(connectorRegistryAbs);
|
|
237
|
+
const destRootAbs = path2.join(installBaseAbs, "connectors", selection.connector);
|
|
238
|
+
const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
|
|
239
|
+
for (const src of files) {
|
|
240
|
+
if (!await exists(src)) {
|
|
241
|
+
throw new Error(`Registry file missing: ${src}`);
|
|
242
|
+
}
|
|
243
|
+
const rel = path2.relative(connectorRegistryAbs, src);
|
|
244
|
+
const dest = path2.join(destRootAbs, rel);
|
|
245
|
+
if (await exists(dest)) {
|
|
246
|
+
if (nonInteractive) {
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
const answer = await confirm({
|
|
250
|
+
message: `Overwrite ${path2.relative(selection.projectRoot, dest)}?`,
|
|
251
|
+
initialValue: false
|
|
252
|
+
});
|
|
253
|
+
if (isCancel(answer)) {
|
|
254
|
+
cancel("Cancelled.");
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
if (!answer) {
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
const raw = await readText(src);
|
|
262
|
+
await writeText(dest, raw);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
212
265
|
|
|
213
266
|
// cli/lib/json.ts
|
|
214
267
|
import { readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
|
|
@@ -286,6 +339,14 @@ function depsForAdapter(adapter) {
|
|
|
286
339
|
}
|
|
287
340
|
return { deps, devDeps };
|
|
288
341
|
}
|
|
342
|
+
function depsForConnector(connector) {
|
|
343
|
+
const deps = {};
|
|
344
|
+
const devDeps = {};
|
|
345
|
+
if (connector === "notion") {
|
|
346
|
+
deps["@notionhq/client"] = "^2.2.16";
|
|
347
|
+
}
|
|
348
|
+
return { deps, devDeps };
|
|
349
|
+
}
|
|
289
350
|
function installCmd(pm) {
|
|
290
351
|
if (pm === "bun")
|
|
291
352
|
return "bun install";
|
|
@@ -488,7 +549,8 @@ async function initCommand(args) {
|
|
|
488
549
|
installDir,
|
|
489
550
|
storeAdapter: storeAdapterAnswer,
|
|
490
551
|
aliasBase,
|
|
491
|
-
version: CONFIG_VERSION
|
|
552
|
+
version: CONFIG_VERSION,
|
|
553
|
+
connectors: existing?.connectors ?? []
|
|
492
554
|
};
|
|
493
555
|
await writeJsonFile(path5.join(root, CONFIG_FILE), config);
|
|
494
556
|
const pm = await detectPackageManager(root);
|
|
@@ -512,19 +574,103 @@ async function initCommand(args) {
|
|
|
512
574
|
`));
|
|
513
575
|
}
|
|
514
576
|
|
|
577
|
+
// cli/commands/add.ts
|
|
578
|
+
import { outro as outro2 } from "@clack/prompts";
|
|
579
|
+
import path6 from "node:path";
|
|
580
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
581
|
+
var CONFIG_FILE2 = "unrag.json";
|
|
582
|
+
var __filename3 = fileURLToPath2(import.meta.url);
|
|
583
|
+
var __dirname3 = path6.dirname(__filename3);
|
|
584
|
+
var parseAddArgs = (args) => {
|
|
585
|
+
const out = {};
|
|
586
|
+
for (let i = 0;i < args.length; i++) {
|
|
587
|
+
const a = args[i];
|
|
588
|
+
if (a === "--yes" || a === "-y") {
|
|
589
|
+
out.yes = true;
|
|
590
|
+
continue;
|
|
591
|
+
}
|
|
592
|
+
if (!out.connector && !a.startsWith("-")) {
|
|
593
|
+
out.connector = a;
|
|
594
|
+
continue;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
return out;
|
|
598
|
+
};
|
|
599
|
+
async function addCommand(args) {
|
|
600
|
+
const root = await tryFindProjectRoot(process.cwd());
|
|
601
|
+
if (!root) {
|
|
602
|
+
throw new Error("Could not find a project root (no package.json found).");
|
|
603
|
+
}
|
|
604
|
+
const parsed = parseAddArgs(args);
|
|
605
|
+
const connector = parsed.connector;
|
|
606
|
+
if (!connector) {
|
|
607
|
+
outro2(`Usage: unrag add <connector>
|
|
608
|
+
|
|
609
|
+
Available connectors: notion`);
|
|
610
|
+
return;
|
|
611
|
+
}
|
|
612
|
+
if (connector !== "notion") {
|
|
613
|
+
outro2(`Unknown connector: ${connector}
|
|
614
|
+
|
|
615
|
+
Available connectors: notion`);
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
const configPath = path6.join(root, CONFIG_FILE2);
|
|
619
|
+
const config = await readJsonFile(configPath);
|
|
620
|
+
if (!config?.installDir) {
|
|
621
|
+
throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag init\` first.`);
|
|
622
|
+
}
|
|
623
|
+
const cliPackageRoot = await findUp(__dirname3, "package.json");
|
|
624
|
+
if (!cliPackageRoot) {
|
|
625
|
+
throw new Error("Could not locate CLI package root (package.json not found).");
|
|
626
|
+
}
|
|
627
|
+
const registryRoot = path6.join(cliPackageRoot, "registry");
|
|
628
|
+
const nonInteractive = parsed.yes || !process.stdin.isTTY;
|
|
629
|
+
await copyConnectorFiles({
|
|
630
|
+
projectRoot: root,
|
|
631
|
+
registryRoot,
|
|
632
|
+
installDir: config.installDir,
|
|
633
|
+
connector,
|
|
634
|
+
yes: nonInteractive
|
|
635
|
+
});
|
|
636
|
+
const pkg = await readPackageJson(root);
|
|
637
|
+
const { deps, devDeps } = depsForConnector(connector);
|
|
638
|
+
const merged = mergeDeps(pkg, deps, devDeps);
|
|
639
|
+
if (merged.changes.length > 0) {
|
|
640
|
+
await writePackageJson(root, merged.pkg);
|
|
641
|
+
}
|
|
642
|
+
const connectors = Array.from(new Set([...config.connectors ?? [], connector])).sort();
|
|
643
|
+
await writeJsonFile(configPath, { ...config, connectors });
|
|
644
|
+
outro2([
|
|
645
|
+
`Installed connector: ${connector}.`,
|
|
646
|
+
"",
|
|
647
|
+
`- Code: ${path6.join(config.installDir, "connectors", connector)}`,
|
|
648
|
+
`- Docs: /docs/connectors/${connector}`,
|
|
649
|
+
"",
|
|
650
|
+
merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
|
|
651
|
+
nonInteractive ? "" : "Tip: keep NOTION_TOKEN server-side only (env var)."
|
|
652
|
+
].filter(Boolean).join(`
|
|
653
|
+
`));
|
|
654
|
+
}
|
|
655
|
+
|
|
515
656
|
// cli/run.ts
|
|
516
657
|
async function run(argv) {
|
|
517
658
|
const [, , command, ...rest] = argv;
|
|
518
659
|
intro("unrag");
|
|
519
660
|
if (!command || command === "help" || command === "--help" || command === "-h") {
|
|
520
|
-
|
|
661
|
+
outro3(["Usage:", "", "- unrag init", "- unrag add <connector>"].join(`
|
|
662
|
+
`));
|
|
521
663
|
return;
|
|
522
664
|
}
|
|
523
665
|
if (command === "init") {
|
|
524
666
|
await initCommand(rest);
|
|
525
667
|
return;
|
|
526
668
|
}
|
|
527
|
-
|
|
669
|
+
if (command === "add") {
|
|
670
|
+
await addCommand(rest);
|
|
671
|
+
return;
|
|
672
|
+
}
|
|
673
|
+
outro3(`Unknown command: ${command}`);
|
|
528
674
|
process.exitCode = 1;
|
|
529
675
|
}
|
|
530
676
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Client } from "@notionhq/client";
|
|
2
|
+
|
|
3
|
+
export type NotionClient = Client;
|
|
4
|
+
|
|
5
|
+
export type CreateNotionClientInput = {
|
|
6
|
+
token: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export function createNotionClient(input: CreateNotionClientInput): NotionClient {
|
|
11
|
+
const token = input.token?.trim();
|
|
12
|
+
if (!token) throw new Error("NOTION token is required");
|
|
13
|
+
|
|
14
|
+
return new Client({
|
|
15
|
+
auth: token,
|
|
16
|
+
// @notionhq/client uses undici/fetch under the hood; timeout is supported.
|
|
17
|
+
// If unsupported in a future version, callers can wrap requests.
|
|
18
|
+
timeoutMs: input.timeoutMs ?? 30_000,
|
|
19
|
+
} as any);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const UUID_32_RE = /^[0-9a-f]{32}$/i;
|
|
2
|
+
const UUID_HYPHEN_RE =
|
|
3
|
+
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
4
|
+
|
|
5
|
+
export function normalizeNotionId32(input: string): string {
|
|
6
|
+
const raw = String(input ?? "").trim();
|
|
7
|
+
if (!raw) throw new Error("Notion id is required");
|
|
8
|
+
|
|
9
|
+
// Try to extract UUID-like tokens from URLs or mixed strings.
|
|
10
|
+
const token =
|
|
11
|
+
raw.match(/[0-9a-fA-F]{32}/)?.[0] ??
|
|
12
|
+
raw.match(/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/)?.[0] ??
|
|
13
|
+
raw;
|
|
14
|
+
|
|
15
|
+
const normalized = token.replaceAll("-", "").toLowerCase();
|
|
16
|
+
if (!UUID_32_RE.test(normalized)) {
|
|
17
|
+
throw new Error(`Invalid Notion id: ${input}`);
|
|
18
|
+
}
|
|
19
|
+
return normalized;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function toUuidHyphenated(id32: string): string {
|
|
23
|
+
const n = normalizeNotionId32(id32);
|
|
24
|
+
return `${n.slice(0, 8)}-${n.slice(8, 12)}-${n.slice(12, 16)}-${n.slice(
|
|
25
|
+
16,
|
|
26
|
+
20
|
|
27
|
+
)}-${n.slice(20)}`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function normalizeNotionPageId32(pageIdOrUrl: string): string {
|
|
31
|
+
return normalizeNotionId32(pageIdOrUrl);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function isUuidLike(input: string): boolean {
|
|
35
|
+
const s = String(input ?? "").trim();
|
|
36
|
+
return UUID_32_RE.test(s.replaceAll("-", "")) || UUID_HYPHEN_RE.test(s);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export { createNotionClient } from "./client";
|
|
2
|
+
export { normalizeNotionId32, normalizeNotionPageId32, toUuidHyphenated } from "./ids";
|
|
3
|
+
export { renderNotionBlocksToText } from "./render";
|
|
4
|
+
export { buildNotionPageIngestInput, loadNotionPageDocument, syncNotionPages } from "./sync";
|
|
5
|
+
export * from "./types";
|
|
6
|
+
|
|
7
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
type RichText = { plain_text?: string };
|
|
2
|
+
|
|
3
|
+
export type NotionBlock = {
|
|
4
|
+
id: string;
|
|
5
|
+
type: string;
|
|
6
|
+
has_children?: boolean;
|
|
7
|
+
// Notion block payload is keyed by `type`; we keep it loose to stay stable.
|
|
8
|
+
[key: string]: unknown;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type NotionBlockNode = {
|
|
12
|
+
block: NotionBlock;
|
|
13
|
+
children: NotionBlockNode[];
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const rt = (value: unknown): string => {
|
|
17
|
+
const items = Array.isArray(value) ? (value as RichText[]) : [];
|
|
18
|
+
return items.map((t) => t?.plain_text ?? "").join("");
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const indent = (n: number) => (n > 0 ? " ".repeat(n) : "");
|
|
22
|
+
|
|
23
|
+
export function renderNotionBlocksToText(
|
|
24
|
+
nodes: NotionBlockNode[],
|
|
25
|
+
opts: { maxDepth?: number } = {}
|
|
26
|
+
): string {
|
|
27
|
+
const maxDepth = opts.maxDepth ?? 6;
|
|
28
|
+
const lines: string[] = [];
|
|
29
|
+
|
|
30
|
+
const walk = (node: NotionBlockNode, depth: number, listDepth: number) => {
|
|
31
|
+
if (depth > maxDepth) return;
|
|
32
|
+
const b = node.block;
|
|
33
|
+
|
|
34
|
+
const t = b.type;
|
|
35
|
+
|
|
36
|
+
if (t === "paragraph") {
|
|
37
|
+
const text = rt((b as any).paragraph?.rich_text);
|
|
38
|
+
if (text.trim()) lines.push(text);
|
|
39
|
+
} else if (t === "heading_1") {
|
|
40
|
+
const text = rt((b as any).heading_1?.rich_text);
|
|
41
|
+
if (text.trim()) lines.push(`# ${text}`);
|
|
42
|
+
} else if (t === "heading_2") {
|
|
43
|
+
const text = rt((b as any).heading_2?.rich_text);
|
|
44
|
+
if (text.trim()) lines.push(`## ${text}`);
|
|
45
|
+
} else if (t === "heading_3") {
|
|
46
|
+
const text = rt((b as any).heading_3?.rich_text);
|
|
47
|
+
if (text.trim()) lines.push(`### ${text}`);
|
|
48
|
+
} else if (t === "bulleted_list_item") {
|
|
49
|
+
const text = rt((b as any).bulleted_list_item?.rich_text);
|
|
50
|
+
if (text.trim()) lines.push(`${indent(listDepth)}- ${text}`);
|
|
51
|
+
} else if (t === "numbered_list_item") {
|
|
52
|
+
const text = rt((b as any).numbered_list_item?.rich_text);
|
|
53
|
+
if (text.trim()) lines.push(`${indent(listDepth)}- ${text}`);
|
|
54
|
+
} else if (t === "to_do") {
|
|
55
|
+
const text = rt((b as any).to_do?.rich_text);
|
|
56
|
+
const checked = Boolean((b as any).to_do?.checked);
|
|
57
|
+
if (text.trim()) lines.push(`${indent(listDepth)}- [${checked ? "x" : " "}] ${text}`);
|
|
58
|
+
} else if (t === "quote") {
|
|
59
|
+
const text = rt((b as any).quote?.rich_text);
|
|
60
|
+
if (text.trim()) lines.push(`> ${text}`);
|
|
61
|
+
} else if (t === "callout") {
|
|
62
|
+
const text = rt((b as any).callout?.rich_text);
|
|
63
|
+
if (text.trim()) lines.push(text);
|
|
64
|
+
} else if (t === "code") {
|
|
65
|
+
const text = rt((b as any).code?.rich_text);
|
|
66
|
+
const lang = String((b as any).code?.language ?? "").trim();
|
|
67
|
+
lines.push("```" + lang);
|
|
68
|
+
if (text.trim()) lines.push(text);
|
|
69
|
+
lines.push("```");
|
|
70
|
+
} else if (t === "divider") {
|
|
71
|
+
lines.push("---");
|
|
72
|
+
} else {
|
|
73
|
+
// Unsupported block types are ignored for v1.
|
|
74
|
+
// This keeps the output focused and avoids surprises.
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Render children (nested blocks). For list items, increase listDepth.
|
|
78
|
+
const nextListDepth =
|
|
79
|
+
t === "bulleted_list_item" ||
|
|
80
|
+
t === "numbered_list_item" ||
|
|
81
|
+
t === "to_do"
|
|
82
|
+
? listDepth + 1
|
|
83
|
+
: listDepth;
|
|
84
|
+
|
|
85
|
+
for (const child of node.children) {
|
|
86
|
+
walk(child, depth + 1, nextListDepth);
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
for (const node of nodes) {
|
|
91
|
+
walk(node, 0, 0);
|
|
92
|
+
lines.push("");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return lines.join("\n").trim();
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import type { ContextEngine } from "../../core";
|
|
2
|
+
import type { IngestResult } from "../../core/types";
|
|
3
|
+
import { createNotionClient, type NotionClient } from "./client";
|
|
4
|
+
import { normalizeNotionPageId32, toUuidHyphenated } from "./ids";
|
|
5
|
+
import { renderNotionBlocksToText, type NotionBlock, type NotionBlockNode } from "./render";
|
|
6
|
+
import type {
|
|
7
|
+
BuildNotionPageIngestInputArgs,
|
|
8
|
+
NotionPageDocument,
|
|
9
|
+
NotionSyncProgressEvent,
|
|
10
|
+
SyncNotionPagesInput,
|
|
11
|
+
SyncNotionPagesResult,
|
|
12
|
+
} from "./types";
|
|
13
|
+
|
|
14
|
+
const joinPrefix = (prefix: string | undefined, rest: string) => {
|
|
15
|
+
const p = (prefix ?? "").trim();
|
|
16
|
+
if (!p) return rest;
|
|
17
|
+
return p.endsWith(":") ? p + rest : p + ":" + rest;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export function buildNotionPageIngestInput(
|
|
21
|
+
args: BuildNotionPageIngestInputArgs
|
|
22
|
+
) {
|
|
23
|
+
const sourceId = joinPrefix(
|
|
24
|
+
args.sourceIdPrefix,
|
|
25
|
+
`notion:page:${args.pageId}`
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
return {
|
|
29
|
+
sourceId,
|
|
30
|
+
content: args.content,
|
|
31
|
+
metadata: args.metadata ?? {},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const richTextToText = (richText: any[] | undefined) =>
|
|
36
|
+
(Array.isArray(richText) ? richText : [])
|
|
37
|
+
.map((t) => String(t?.plain_text ?? ""))
|
|
38
|
+
.join("");
|
|
39
|
+
|
|
40
|
+
const getNotionPageTitle = (page: any): string => {
|
|
41
|
+
const props = page?.properties ?? {};
|
|
42
|
+
for (const key of Object.keys(props)) {
|
|
43
|
+
const p = props[key];
|
|
44
|
+
if (p?.type === "title") {
|
|
45
|
+
return richTextToText(p?.title);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return "";
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
async function listAllBlockChildren(
|
|
52
|
+
notion: NotionClient,
|
|
53
|
+
blockId: string
|
|
54
|
+
): Promise<NotionBlock[]> {
|
|
55
|
+
const blocks: NotionBlock[] = [];
|
|
56
|
+
let cursor: string | undefined = undefined;
|
|
57
|
+
|
|
58
|
+
while (true) {
|
|
59
|
+
const res: any = await notion.blocks.children.list({
|
|
60
|
+
block_id: blockId,
|
|
61
|
+
start_cursor: cursor,
|
|
62
|
+
page_size: 100,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
blocks.push(...((res?.results ?? []) as NotionBlock[]));
|
|
66
|
+
if (!res?.has_more) break;
|
|
67
|
+
cursor = res?.next_cursor ?? undefined;
|
|
68
|
+
if (!cursor) break;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return blocks;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function buildBlockTree(
|
|
75
|
+
notion: NotionClient,
|
|
76
|
+
rootBlockId: string,
|
|
77
|
+
depth: number,
|
|
78
|
+
maxDepth: number
|
|
79
|
+
): Promise<NotionBlockNode[]> {
|
|
80
|
+
const children = await listAllBlockChildren(notion, rootBlockId);
|
|
81
|
+
const nodes: NotionBlockNode[] = [];
|
|
82
|
+
|
|
83
|
+
for (const block of children) {
|
|
84
|
+
let grandChildren: NotionBlockNode[] = [];
|
|
85
|
+
if (block.has_children && depth < maxDepth) {
|
|
86
|
+
grandChildren = await buildBlockTree(notion, block.id, depth + 1, maxDepth);
|
|
87
|
+
}
|
|
88
|
+
nodes.push({ block, children: grandChildren });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return nodes;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export async function loadNotionPageDocument(args: {
|
|
95
|
+
notion: NotionClient;
|
|
96
|
+
pageIdOrUrl: string;
|
|
97
|
+
sourceIdPrefix?: string;
|
|
98
|
+
maxDepth?: number;
|
|
99
|
+
}): Promise<NotionPageDocument> {
|
|
100
|
+
const pageId = normalizeNotionPageId32(args.pageIdOrUrl);
|
|
101
|
+
const apiId = toUuidHyphenated(pageId);
|
|
102
|
+
|
|
103
|
+
const page: any = await args.notion.pages.retrieve({ page_id: apiId });
|
|
104
|
+
const title = getNotionPageTitle(page);
|
|
105
|
+
const url = String(page?.url ?? "");
|
|
106
|
+
const lastEditedTime = String(page?.last_edited_time ?? "");
|
|
107
|
+
|
|
108
|
+
const tree = await buildBlockTree(args.notion, apiId, 0, args.maxDepth ?? 4);
|
|
109
|
+
const body = renderNotionBlocksToText(tree);
|
|
110
|
+
const content = [title.trim(), body.trim()].filter(Boolean).join("\n\n");
|
|
111
|
+
|
|
112
|
+
const metadata = {
|
|
113
|
+
connector: "notion",
|
|
114
|
+
kind: "page",
|
|
115
|
+
pageId,
|
|
116
|
+
url,
|
|
117
|
+
title,
|
|
118
|
+
lastEditedTime,
|
|
119
|
+
} as const;
|
|
120
|
+
|
|
121
|
+
const ingest = buildNotionPageIngestInput({
|
|
122
|
+
pageId,
|
|
123
|
+
content,
|
|
124
|
+
metadata: metadata as any,
|
|
125
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
sourceId: ingest.sourceId,
|
|
130
|
+
content: ingest.content,
|
|
131
|
+
metadata: ingest.metadata ?? {},
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const isNotFound = (err: any) => {
|
|
136
|
+
const status = Number(err?.status ?? err?.statusCode ?? err?.code);
|
|
137
|
+
if (status === 404) return true;
|
|
138
|
+
const msg = String(err?.message ?? "");
|
|
139
|
+
return msg.toLowerCase().includes("could not find");
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
export async function syncNotionPages(
|
|
143
|
+
input: SyncNotionPagesInput
|
|
144
|
+
): Promise<SyncNotionPagesResult> {
|
|
145
|
+
const deleteOnNotFound = input.deleteOnNotFound ?? false;
|
|
146
|
+
|
|
147
|
+
const notion = createNotionClient({ token: input.token });
|
|
148
|
+
const errors: SyncNotionPagesResult["errors"] = [];
|
|
149
|
+
|
|
150
|
+
let succeeded = 0;
|
|
151
|
+
let failed = 0;
|
|
152
|
+
let deleted = 0;
|
|
153
|
+
|
|
154
|
+
for (const rawId of input.pageIds) {
|
|
155
|
+
const pageId = normalizeNotionPageId32(rawId);
|
|
156
|
+
const sourceId = joinPrefix(
|
|
157
|
+
input.sourceIdPrefix,
|
|
158
|
+
`notion:page:${pageId}`
|
|
159
|
+
);
|
|
160
|
+
|
|
161
|
+
const emit = (event: NotionSyncProgressEvent) => {
|
|
162
|
+
try {
|
|
163
|
+
input.onProgress?.(event);
|
|
164
|
+
} catch {
|
|
165
|
+
// ignore progress handler errors
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
emit({ type: "page:start", pageId, sourceId });
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
const doc = await loadNotionPageDocument({
|
|
173
|
+
notion,
|
|
174
|
+
pageIdOrUrl: pageId,
|
|
175
|
+
sourceIdPrefix: input.sourceIdPrefix,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const result: IngestResult = await input.engine.ingest({
|
|
179
|
+
sourceId: doc.sourceId,
|
|
180
|
+
content: doc.content,
|
|
181
|
+
metadata: doc.metadata as any,
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
succeeded += 1;
|
|
185
|
+
emit({
|
|
186
|
+
type: "page:success",
|
|
187
|
+
pageId,
|
|
188
|
+
sourceId,
|
|
189
|
+
chunkCount: result.chunkCount,
|
|
190
|
+
});
|
|
191
|
+
} catch (err) {
|
|
192
|
+
if (isNotFound(err)) {
|
|
193
|
+
emit({ type: "page:not-found", pageId, sourceId });
|
|
194
|
+
if (deleteOnNotFound) {
|
|
195
|
+
try {
|
|
196
|
+
await input.engine.delete({ sourceId });
|
|
197
|
+
deleted += 1;
|
|
198
|
+
} catch (deleteErr) {
|
|
199
|
+
failed += 1;
|
|
200
|
+
errors.push({ pageId, sourceId, error: deleteErr });
|
|
201
|
+
emit({ type: "page:error", pageId, sourceId, error: deleteErr });
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
failed += 1;
|
|
208
|
+
errors.push({ pageId, sourceId, error: err });
|
|
209
|
+
emit({ type: "page:error", pageId, sourceId, error: err });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
pageCount: input.pageIds.length,
|
|
215
|
+
succeeded,
|
|
216
|
+
failed,
|
|
217
|
+
deleted,
|
|
218
|
+
errors,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { ContextEngine } from "../../core";
|
|
2
|
+
import type { IngestInput } from "../../core/types";
|
|
3
|
+
|
|
4
|
+
export type NotionSyncProgressEvent =
|
|
5
|
+
| { type: "page:start"; pageId: string; sourceId: string }
|
|
6
|
+
| { type: "page:success"; pageId: string; sourceId: string; chunkCount: number }
|
|
7
|
+
| { type: "page:error"; pageId: string; sourceId: string; error: unknown }
|
|
8
|
+
| { type: "page:not-found"; pageId: string; sourceId: string };
|
|
9
|
+
|
|
10
|
+
export type SyncNotionPagesInput = {
|
|
11
|
+
engine: ContextEngine;
|
|
12
|
+
/**
|
|
13
|
+
* Server-side Notion integration token.
|
|
14
|
+
* Keep this server-only (env var).
|
|
15
|
+
*/
|
|
16
|
+
token: string;
|
|
17
|
+
/** Notion page IDs or page URLs. */
|
|
18
|
+
pageIds: string[];
|
|
19
|
+
/**
|
|
20
|
+
* Optional namespace prefix, useful for multi-tenant apps:
|
|
21
|
+
* `tenant:acme:` -> `tenant:acme:notion:page:<id>`
|
|
22
|
+
*/
|
|
23
|
+
sourceIdPrefix?: string;
|
|
24
|
+
/**
|
|
25
|
+
* When true, if a page is not found/accessible, delete the previously ingested
|
|
26
|
+
* document for that page (exact sourceId).
|
|
27
|
+
*/
|
|
28
|
+
deleteOnNotFound?: boolean;
|
|
29
|
+
/** Optional progress callback. */
|
|
30
|
+
onProgress?: (event: NotionSyncProgressEvent) => void;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export type SyncNotionPagesResult = {
|
|
34
|
+
pageCount: number;
|
|
35
|
+
succeeded: number;
|
|
36
|
+
failed: number;
|
|
37
|
+
deleted: number;
|
|
38
|
+
errors: Array<{ pageId: string; sourceId: string; error: unknown }>;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export type NotionPageDocument = {
|
|
42
|
+
sourceId: string;
|
|
43
|
+
content: string;
|
|
44
|
+
metadata: Record<string, unknown>;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
export type BuildNotionPageIngestInputArgs = {
|
|
48
|
+
pageId: string; // normalized 32-hex (no dashes)
|
|
49
|
+
content: string;
|
|
50
|
+
metadata?: Record<string, unknown>;
|
|
51
|
+
sourceIdPrefix?: string;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export type BuildNotionPageIngestInputResult = IngestInput;
|
|
55
|
+
|
|
56
|
+
|