unrag 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +150 -4
- package/package.json +1 -1
- package/registry/connectors/notion/client.ts +22 -0
- package/registry/connectors/notion/ids.ts +39 -0
- package/registry/connectors/notion/index.ts +7 -0
- package/registry/connectors/notion/render.ts +98 -0
- package/registry/connectors/notion/sync.ts +222 -0
- package/registry/connectors/notion/types.ts +56 -0
- package/registry/core/context-engine.ts +6 -0
- package/registry/core/delete.ts +19 -0
- package/registry/core/index.ts +1 -0
- package/registry/core/types.ts +23 -0
- package/registry/store/drizzle-postgres-pgvector/store.ts +16 -1
- package/registry/store/prisma-postgres-pgvector/store.ts +17 -0
- package/registry/store/raw-sql-postgres-pgvector/store.ts +21 -0
package/dist/cli/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
// cli/run.ts
|
|
4
|
-
import { intro, outro as
|
|
4
|
+
import { intro, outro as outro3 } from "@clack/prompts";
|
|
5
5
|
|
|
6
6
|
// cli/commands/init.ts
|
|
7
7
|
import { cancel as cancel2, isCancel as isCancel2, outro, select, text } from "@clack/prompts";
|
|
@@ -47,6 +47,23 @@ async function findUp(startDir, filename) {
|
|
|
47
47
|
current = parent;
|
|
48
48
|
}
|
|
49
49
|
}
|
|
50
|
+
async function listFilesRecursive(dir) {
|
|
51
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
52
|
+
const out = [];
|
|
53
|
+
for (const entry of entries) {
|
|
54
|
+
const full = path.join(dir, entry.name);
|
|
55
|
+
if (entry.isDirectory()) {
|
|
56
|
+
out.push(...await listFilesRecursive(full));
|
|
57
|
+
} else if (entry.isFile()) {
|
|
58
|
+
out.push(full);
|
|
59
|
+
} else {
|
|
60
|
+
const s = await stat(full).catch(() => null);
|
|
61
|
+
if (s?.isFile())
|
|
62
|
+
out.push(full);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return out;
|
|
66
|
+
}
|
|
50
67
|
|
|
51
68
|
// cli/lib/registry.ts
|
|
52
69
|
var readText = (filePath) => readFile(filePath, "utf8");
|
|
@@ -209,6 +226,42 @@ async function copyRegistryFiles(selection) {
|
|
|
209
226
|
await writeText(mapping.dest, content);
|
|
210
227
|
}
|
|
211
228
|
}
|
|
229
|
+
async function copyConnectorFiles(selection) {
|
|
230
|
+
const toAbs = (projectRelative) => path2.join(selection.projectRoot, projectRelative);
|
|
231
|
+
const installBaseAbs = toAbs(selection.installDir);
|
|
232
|
+
const connectorRegistryAbs = path2.join(selection.registryRoot, "connectors", selection.connector);
|
|
233
|
+
if (!await exists(connectorRegistryAbs)) {
|
|
234
|
+
throw new Error(`Unknown connector registry: ${path2.relative(selection.registryRoot, connectorRegistryAbs)}`);
|
|
235
|
+
}
|
|
236
|
+
const files = await listFilesRecursive(connectorRegistryAbs);
|
|
237
|
+
const destRootAbs = path2.join(installBaseAbs, "connectors", selection.connector);
|
|
238
|
+
const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
|
|
239
|
+
for (const src of files) {
|
|
240
|
+
if (!await exists(src)) {
|
|
241
|
+
throw new Error(`Registry file missing: ${src}`);
|
|
242
|
+
}
|
|
243
|
+
const rel = path2.relative(connectorRegistryAbs, src);
|
|
244
|
+
const dest = path2.join(destRootAbs, rel);
|
|
245
|
+
if (await exists(dest)) {
|
|
246
|
+
if (nonInteractive) {
|
|
247
|
+
continue;
|
|
248
|
+
}
|
|
249
|
+
const answer = await confirm({
|
|
250
|
+
message: `Overwrite ${path2.relative(selection.projectRoot, dest)}?`,
|
|
251
|
+
initialValue: false
|
|
252
|
+
});
|
|
253
|
+
if (isCancel(answer)) {
|
|
254
|
+
cancel("Cancelled.");
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
if (!answer) {
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
const raw = await readText(src);
|
|
262
|
+
await writeText(dest, raw);
|
|
263
|
+
}
|
|
264
|
+
}
|
|
212
265
|
|
|
213
266
|
// cli/lib/json.ts
|
|
214
267
|
import { readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
|
|
@@ -286,6 +339,14 @@ function depsForAdapter(adapter) {
|
|
|
286
339
|
}
|
|
287
340
|
return { deps, devDeps };
|
|
288
341
|
}
|
|
342
|
+
function depsForConnector(connector) {
|
|
343
|
+
const deps = {};
|
|
344
|
+
const devDeps = {};
|
|
345
|
+
if (connector === "notion") {
|
|
346
|
+
deps["@notionhq/client"] = "^2.2.16";
|
|
347
|
+
}
|
|
348
|
+
return { deps, devDeps };
|
|
349
|
+
}
|
|
289
350
|
function installCmd(pm) {
|
|
290
351
|
if (pm === "bun")
|
|
291
352
|
return "bun install";
|
|
@@ -488,7 +549,8 @@ async function initCommand(args) {
|
|
|
488
549
|
installDir,
|
|
489
550
|
storeAdapter: storeAdapterAnswer,
|
|
490
551
|
aliasBase,
|
|
491
|
-
version: CONFIG_VERSION
|
|
552
|
+
version: CONFIG_VERSION,
|
|
553
|
+
connectors: existing?.connectors ?? []
|
|
492
554
|
};
|
|
493
555
|
await writeJsonFile(path5.join(root, CONFIG_FILE), config);
|
|
494
556
|
const pm = await detectPackageManager(root);
|
|
@@ -512,19 +574,103 @@ async function initCommand(args) {
|
|
|
512
574
|
`));
|
|
513
575
|
}
|
|
514
576
|
|
|
577
|
+
// cli/commands/add.ts
|
|
578
|
+
import { outro as outro2 } from "@clack/prompts";
|
|
579
|
+
import path6 from "node:path";
|
|
580
|
+
import { fileURLToPath as fileURLToPath2 } from "node:url";
|
|
581
|
+
var CONFIG_FILE2 = "unrag.json";
|
|
582
|
+
var __filename3 = fileURLToPath2(import.meta.url);
|
|
583
|
+
var __dirname3 = path6.dirname(__filename3);
|
|
584
|
+
var parseAddArgs = (args) => {
|
|
585
|
+
const out = {};
|
|
586
|
+
for (let i = 0;i < args.length; i++) {
|
|
587
|
+
const a = args[i];
|
|
588
|
+
if (a === "--yes" || a === "-y") {
|
|
589
|
+
out.yes = true;
|
|
590
|
+
continue;
|
|
591
|
+
}
|
|
592
|
+
if (!out.connector && !a.startsWith("-")) {
|
|
593
|
+
out.connector = a;
|
|
594
|
+
continue;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
return out;
|
|
598
|
+
};
|
|
599
|
+
async function addCommand(args) {
|
|
600
|
+
const root = await tryFindProjectRoot(process.cwd());
|
|
601
|
+
if (!root) {
|
|
602
|
+
throw new Error("Could not find a project root (no package.json found).");
|
|
603
|
+
}
|
|
604
|
+
const parsed = parseAddArgs(args);
|
|
605
|
+
const connector = parsed.connector;
|
|
606
|
+
if (!connector) {
|
|
607
|
+
outro2(`Usage: unrag add <connector>
|
|
608
|
+
|
|
609
|
+
Available connectors: notion`);
|
|
610
|
+
return;
|
|
611
|
+
}
|
|
612
|
+
if (connector !== "notion") {
|
|
613
|
+
outro2(`Unknown connector: ${connector}
|
|
614
|
+
|
|
615
|
+
Available connectors: notion`);
|
|
616
|
+
return;
|
|
617
|
+
}
|
|
618
|
+
const configPath = path6.join(root, CONFIG_FILE2);
|
|
619
|
+
const config = await readJsonFile(configPath);
|
|
620
|
+
if (!config?.installDir) {
|
|
621
|
+
throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag init\` first.`);
|
|
622
|
+
}
|
|
623
|
+
const cliPackageRoot = await findUp(__dirname3, "package.json");
|
|
624
|
+
if (!cliPackageRoot) {
|
|
625
|
+
throw new Error("Could not locate CLI package root (package.json not found).");
|
|
626
|
+
}
|
|
627
|
+
const registryRoot = path6.join(cliPackageRoot, "registry");
|
|
628
|
+
const nonInteractive = parsed.yes || !process.stdin.isTTY;
|
|
629
|
+
await copyConnectorFiles({
|
|
630
|
+
projectRoot: root,
|
|
631
|
+
registryRoot,
|
|
632
|
+
installDir: config.installDir,
|
|
633
|
+
connector,
|
|
634
|
+
yes: nonInteractive
|
|
635
|
+
});
|
|
636
|
+
const pkg = await readPackageJson(root);
|
|
637
|
+
const { deps, devDeps } = depsForConnector(connector);
|
|
638
|
+
const merged = mergeDeps(pkg, deps, devDeps);
|
|
639
|
+
if (merged.changes.length > 0) {
|
|
640
|
+
await writePackageJson(root, merged.pkg);
|
|
641
|
+
}
|
|
642
|
+
const connectors = Array.from(new Set([...config.connectors ?? [], connector])).sort();
|
|
643
|
+
await writeJsonFile(configPath, { ...config, connectors });
|
|
644
|
+
outro2([
|
|
645
|
+
`Installed connector: ${connector}.`,
|
|
646
|
+
"",
|
|
647
|
+
`- Code: ${path6.join(config.installDir, "connectors", connector)}`,
|
|
648
|
+
`- Docs: /docs/connectors/${connector}`,
|
|
649
|
+
"",
|
|
650
|
+
merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
|
|
651
|
+
nonInteractive ? "" : "Tip: keep NOTION_TOKEN server-side only (env var)."
|
|
652
|
+
].filter(Boolean).join(`
|
|
653
|
+
`));
|
|
654
|
+
}
|
|
655
|
+
|
|
515
656
|
// cli/run.ts
|
|
516
657
|
async function run(argv) {
|
|
517
658
|
const [, , command, ...rest] = argv;
|
|
518
659
|
intro("unrag");
|
|
519
660
|
if (!command || command === "help" || command === "--help" || command === "-h") {
|
|
520
|
-
|
|
661
|
+
outro3(["Usage:", "", "- unrag init", "- unrag add <connector>"].join(`
|
|
662
|
+
`));
|
|
521
663
|
return;
|
|
522
664
|
}
|
|
523
665
|
if (command === "init") {
|
|
524
666
|
await initCommand(rest);
|
|
525
667
|
return;
|
|
526
668
|
}
|
|
527
|
-
|
|
669
|
+
if (command === "add") {
|
|
670
|
+
await addCommand(rest);
|
|
671
|
+
return;
|
|
672
|
+
}
|
|
673
|
+
outro3(`Unknown command: ${command}`);
|
|
528
674
|
process.exitCode = 1;
|
|
529
675
|
}
|
|
530
676
|
|
package/package.json
CHANGED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import { Client } from "@notionhq/client";
|
|
2
|
+
|
|
3
|
+
export type NotionClient = Client;
|
|
4
|
+
|
|
5
|
+
export type CreateNotionClientInput = {
|
|
6
|
+
token: string;
|
|
7
|
+
timeoutMs?: number;
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
export function createNotionClient(input: CreateNotionClientInput): NotionClient {
|
|
11
|
+
const token = input.token?.trim();
|
|
12
|
+
if (!token) throw new Error("NOTION token is required");
|
|
13
|
+
|
|
14
|
+
return new Client({
|
|
15
|
+
auth: token,
|
|
16
|
+
// @notionhq/client uses undici/fetch under the hood; timeout is supported.
|
|
17
|
+
// If unsupported in a future version, callers can wrap requests.
|
|
18
|
+
timeoutMs: input.timeoutMs ?? 30_000,
|
|
19
|
+
} as any);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const UUID_32_RE = /^[0-9a-f]{32}$/i;
|
|
2
|
+
const UUID_HYPHEN_RE =
|
|
3
|
+
/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
4
|
+
|
|
5
|
+
export function normalizeNotionId32(input: string): string {
|
|
6
|
+
const raw = String(input ?? "").trim();
|
|
7
|
+
if (!raw) throw new Error("Notion id is required");
|
|
8
|
+
|
|
9
|
+
// Try to extract UUID-like tokens from URLs or mixed strings.
|
|
10
|
+
const token =
|
|
11
|
+
raw.match(/[0-9a-fA-F]{32}/)?.[0] ??
|
|
12
|
+
raw.match(/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/)?.[0] ??
|
|
13
|
+
raw;
|
|
14
|
+
|
|
15
|
+
const normalized = token.replaceAll("-", "").toLowerCase();
|
|
16
|
+
if (!UUID_32_RE.test(normalized)) {
|
|
17
|
+
throw new Error(`Invalid Notion id: ${input}`);
|
|
18
|
+
}
|
|
19
|
+
return normalized;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function toUuidHyphenated(id32: string): string {
|
|
23
|
+
const n = normalizeNotionId32(id32);
|
|
24
|
+
return `${n.slice(0, 8)}-${n.slice(8, 12)}-${n.slice(12, 16)}-${n.slice(
|
|
25
|
+
16,
|
|
26
|
+
20
|
|
27
|
+
)}-${n.slice(20)}`;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function normalizeNotionPageId32(pageIdOrUrl: string): string {
|
|
31
|
+
return normalizeNotionId32(pageIdOrUrl);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function isUuidLike(input: string): boolean {
|
|
35
|
+
const s = String(input ?? "").trim();
|
|
36
|
+
return UUID_32_RE.test(s.replaceAll("-", "")) || UUID_HYPHEN_RE.test(s);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
export { createNotionClient } from "./client";
|
|
2
|
+
export { normalizeNotionId32, normalizeNotionPageId32, toUuidHyphenated } from "./ids";
|
|
3
|
+
export { renderNotionBlocksToText } from "./render";
|
|
4
|
+
export { buildNotionPageIngestInput, loadNotionPageDocument, syncNotionPages } from "./sync";
|
|
5
|
+
export * from "./types";
|
|
6
|
+
|
|
7
|
+
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
type RichText = { plain_text?: string };
|
|
2
|
+
|
|
3
|
+
export type NotionBlock = {
|
|
4
|
+
id: string;
|
|
5
|
+
type: string;
|
|
6
|
+
has_children?: boolean;
|
|
7
|
+
// Notion block payload is keyed by `type`; we keep it loose to stay stable.
|
|
8
|
+
[key: string]: unknown;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type NotionBlockNode = {
|
|
12
|
+
block: NotionBlock;
|
|
13
|
+
children: NotionBlockNode[];
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
const rt = (value: unknown): string => {
|
|
17
|
+
const items = Array.isArray(value) ? (value as RichText[]) : [];
|
|
18
|
+
return items.map((t) => t?.plain_text ?? "").join("");
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
const indent = (n: number) => (n > 0 ? " ".repeat(n) : "");
|
|
22
|
+
|
|
23
|
+
export function renderNotionBlocksToText(
|
|
24
|
+
nodes: NotionBlockNode[],
|
|
25
|
+
opts: { maxDepth?: number } = {}
|
|
26
|
+
): string {
|
|
27
|
+
const maxDepth = opts.maxDepth ?? 6;
|
|
28
|
+
const lines: string[] = [];
|
|
29
|
+
|
|
30
|
+
const walk = (node: NotionBlockNode, depth: number, listDepth: number) => {
|
|
31
|
+
if (depth > maxDepth) return;
|
|
32
|
+
const b = node.block;
|
|
33
|
+
|
|
34
|
+
const t = b.type;
|
|
35
|
+
|
|
36
|
+
if (t === "paragraph") {
|
|
37
|
+
const text = rt((b as any).paragraph?.rich_text);
|
|
38
|
+
if (text.trim()) lines.push(text);
|
|
39
|
+
} else if (t === "heading_1") {
|
|
40
|
+
const text = rt((b as any).heading_1?.rich_text);
|
|
41
|
+
if (text.trim()) lines.push(`# ${text}`);
|
|
42
|
+
} else if (t === "heading_2") {
|
|
43
|
+
const text = rt((b as any).heading_2?.rich_text);
|
|
44
|
+
if (text.trim()) lines.push(`## ${text}`);
|
|
45
|
+
} else if (t === "heading_3") {
|
|
46
|
+
const text = rt((b as any).heading_3?.rich_text);
|
|
47
|
+
if (text.trim()) lines.push(`### ${text}`);
|
|
48
|
+
} else if (t === "bulleted_list_item") {
|
|
49
|
+
const text = rt((b as any).bulleted_list_item?.rich_text);
|
|
50
|
+
if (text.trim()) lines.push(`${indent(listDepth)}- ${text}`);
|
|
51
|
+
} else if (t === "numbered_list_item") {
|
|
52
|
+
const text = rt((b as any).numbered_list_item?.rich_text);
|
|
53
|
+
if (text.trim()) lines.push(`${indent(listDepth)}- ${text}`);
|
|
54
|
+
} else if (t === "to_do") {
|
|
55
|
+
const text = rt((b as any).to_do?.rich_text);
|
|
56
|
+
const checked = Boolean((b as any).to_do?.checked);
|
|
57
|
+
if (text.trim()) lines.push(`${indent(listDepth)}- [${checked ? "x" : " "}] ${text}`);
|
|
58
|
+
} else if (t === "quote") {
|
|
59
|
+
const text = rt((b as any).quote?.rich_text);
|
|
60
|
+
if (text.trim()) lines.push(`> ${text}`);
|
|
61
|
+
} else if (t === "callout") {
|
|
62
|
+
const text = rt((b as any).callout?.rich_text);
|
|
63
|
+
if (text.trim()) lines.push(text);
|
|
64
|
+
} else if (t === "code") {
|
|
65
|
+
const text = rt((b as any).code?.rich_text);
|
|
66
|
+
const lang = String((b as any).code?.language ?? "").trim();
|
|
67
|
+
lines.push("```" + lang);
|
|
68
|
+
if (text.trim()) lines.push(text);
|
|
69
|
+
lines.push("```");
|
|
70
|
+
} else if (t === "divider") {
|
|
71
|
+
lines.push("---");
|
|
72
|
+
} else {
|
|
73
|
+
// Unsupported block types are ignored for v1.
|
|
74
|
+
// This keeps the output focused and avoids surprises.
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Render children (nested blocks). For list items, increase listDepth.
|
|
78
|
+
const nextListDepth =
|
|
79
|
+
t === "bulleted_list_item" ||
|
|
80
|
+
t === "numbered_list_item" ||
|
|
81
|
+
t === "to_do"
|
|
82
|
+
? listDepth + 1
|
|
83
|
+
: listDepth;
|
|
84
|
+
|
|
85
|
+
for (const child of node.children) {
|
|
86
|
+
walk(child, depth + 1, nextListDepth);
|
|
87
|
+
}
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
for (const node of nodes) {
|
|
91
|
+
walk(node, 0, 0);
|
|
92
|
+
lines.push("");
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return lines.join("\n").trim();
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
import type { ContextEngine } from "../../core";
|
|
2
|
+
import type { IngestResult } from "../../core/types";
|
|
3
|
+
import { createNotionClient, type NotionClient } from "./client";
|
|
4
|
+
import { normalizeNotionPageId32, toUuidHyphenated } from "./ids";
|
|
5
|
+
import { renderNotionBlocksToText, type NotionBlock, type NotionBlockNode } from "./render";
|
|
6
|
+
import type {
|
|
7
|
+
BuildNotionPageIngestInputArgs,
|
|
8
|
+
NotionPageDocument,
|
|
9
|
+
NotionSyncProgressEvent,
|
|
10
|
+
SyncNotionPagesInput,
|
|
11
|
+
SyncNotionPagesResult,
|
|
12
|
+
} from "./types";
|
|
13
|
+
|
|
14
|
+
const joinPrefix = (prefix: string | undefined, rest: string) => {
|
|
15
|
+
const p = (prefix ?? "").trim();
|
|
16
|
+
if (!p) return rest;
|
|
17
|
+
return p.endsWith(":") ? p + rest : p + ":" + rest;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
export function buildNotionPageIngestInput(
|
|
21
|
+
args: BuildNotionPageIngestInputArgs
|
|
22
|
+
) {
|
|
23
|
+
const sourceId = joinPrefix(
|
|
24
|
+
args.sourceIdPrefix,
|
|
25
|
+
`notion:page:${args.pageId}`
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
return {
|
|
29
|
+
sourceId,
|
|
30
|
+
content: args.content,
|
|
31
|
+
metadata: args.metadata ?? {},
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const richTextToText = (richText: any[] | undefined) =>
|
|
36
|
+
(Array.isArray(richText) ? richText : [])
|
|
37
|
+
.map((t) => String(t?.plain_text ?? ""))
|
|
38
|
+
.join("");
|
|
39
|
+
|
|
40
|
+
const getNotionPageTitle = (page: any): string => {
|
|
41
|
+
const props = page?.properties ?? {};
|
|
42
|
+
for (const key of Object.keys(props)) {
|
|
43
|
+
const p = props[key];
|
|
44
|
+
if (p?.type === "title") {
|
|
45
|
+
return richTextToText(p?.title);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return "";
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
async function listAllBlockChildren(
|
|
52
|
+
notion: NotionClient,
|
|
53
|
+
blockId: string
|
|
54
|
+
): Promise<NotionBlock[]> {
|
|
55
|
+
const blocks: NotionBlock[] = [];
|
|
56
|
+
let cursor: string | undefined = undefined;
|
|
57
|
+
|
|
58
|
+
while (true) {
|
|
59
|
+
const res: any = await notion.blocks.children.list({
|
|
60
|
+
block_id: blockId,
|
|
61
|
+
start_cursor: cursor,
|
|
62
|
+
page_size: 100,
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
blocks.push(...((res?.results ?? []) as NotionBlock[]));
|
|
66
|
+
if (!res?.has_more) break;
|
|
67
|
+
cursor = res?.next_cursor ?? undefined;
|
|
68
|
+
if (!cursor) break;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return blocks;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function buildBlockTree(
|
|
75
|
+
notion: NotionClient,
|
|
76
|
+
rootBlockId: string,
|
|
77
|
+
depth: number,
|
|
78
|
+
maxDepth: number
|
|
79
|
+
): Promise<NotionBlockNode[]> {
|
|
80
|
+
const children = await listAllBlockChildren(notion, rootBlockId);
|
|
81
|
+
const nodes: NotionBlockNode[] = [];
|
|
82
|
+
|
|
83
|
+
for (const block of children) {
|
|
84
|
+
let grandChildren: NotionBlockNode[] = [];
|
|
85
|
+
if (block.has_children && depth < maxDepth) {
|
|
86
|
+
grandChildren = await buildBlockTree(notion, block.id, depth + 1, maxDepth);
|
|
87
|
+
}
|
|
88
|
+
nodes.push({ block, children: grandChildren });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
return nodes;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export async function loadNotionPageDocument(args: {
|
|
95
|
+
notion: NotionClient;
|
|
96
|
+
pageIdOrUrl: string;
|
|
97
|
+
sourceIdPrefix?: string;
|
|
98
|
+
maxDepth?: number;
|
|
99
|
+
}): Promise<NotionPageDocument> {
|
|
100
|
+
const pageId = normalizeNotionPageId32(args.pageIdOrUrl);
|
|
101
|
+
const apiId = toUuidHyphenated(pageId);
|
|
102
|
+
|
|
103
|
+
const page: any = await args.notion.pages.retrieve({ page_id: apiId });
|
|
104
|
+
const title = getNotionPageTitle(page);
|
|
105
|
+
const url = String(page?.url ?? "");
|
|
106
|
+
const lastEditedTime = String(page?.last_edited_time ?? "");
|
|
107
|
+
|
|
108
|
+
const tree = await buildBlockTree(args.notion, apiId, 0, args.maxDepth ?? 4);
|
|
109
|
+
const body = renderNotionBlocksToText(tree);
|
|
110
|
+
const content = [title.trim(), body.trim()].filter(Boolean).join("\n\n");
|
|
111
|
+
|
|
112
|
+
const metadata = {
|
|
113
|
+
connector: "notion",
|
|
114
|
+
kind: "page",
|
|
115
|
+
pageId,
|
|
116
|
+
url,
|
|
117
|
+
title,
|
|
118
|
+
lastEditedTime,
|
|
119
|
+
} as const;
|
|
120
|
+
|
|
121
|
+
const ingest = buildNotionPageIngestInput({
|
|
122
|
+
pageId,
|
|
123
|
+
content,
|
|
124
|
+
metadata: metadata as any,
|
|
125
|
+
sourceIdPrefix: args.sourceIdPrefix,
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
return {
|
|
129
|
+
sourceId: ingest.sourceId,
|
|
130
|
+
content: ingest.content,
|
|
131
|
+
metadata: ingest.metadata ?? {},
|
|
132
|
+
};
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
const isNotFound = (err: any) => {
|
|
136
|
+
const status = Number(err?.status ?? err?.statusCode ?? err?.code);
|
|
137
|
+
if (status === 404) return true;
|
|
138
|
+
const msg = String(err?.message ?? "");
|
|
139
|
+
return msg.toLowerCase().includes("could not find");
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
export async function syncNotionPages(
|
|
143
|
+
input: SyncNotionPagesInput
|
|
144
|
+
): Promise<SyncNotionPagesResult> {
|
|
145
|
+
const deleteOnNotFound = input.deleteOnNotFound ?? false;
|
|
146
|
+
|
|
147
|
+
const notion = createNotionClient({ token: input.token });
|
|
148
|
+
const errors: SyncNotionPagesResult["errors"] = [];
|
|
149
|
+
|
|
150
|
+
let succeeded = 0;
|
|
151
|
+
let failed = 0;
|
|
152
|
+
let deleted = 0;
|
|
153
|
+
|
|
154
|
+
for (const rawId of input.pageIds) {
|
|
155
|
+
const pageId = normalizeNotionPageId32(rawId);
|
|
156
|
+
const sourceId = joinPrefix(
|
|
157
|
+
input.sourceIdPrefix,
|
|
158
|
+
`notion:page:${pageId}`
|
|
159
|
+
);
|
|
160
|
+
|
|
161
|
+
const emit = (event: NotionSyncProgressEvent) => {
|
|
162
|
+
try {
|
|
163
|
+
input.onProgress?.(event);
|
|
164
|
+
} catch {
|
|
165
|
+
// ignore progress handler errors
|
|
166
|
+
}
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
emit({ type: "page:start", pageId, sourceId });
|
|
170
|
+
|
|
171
|
+
try {
|
|
172
|
+
const doc = await loadNotionPageDocument({
|
|
173
|
+
notion,
|
|
174
|
+
pageIdOrUrl: pageId,
|
|
175
|
+
sourceIdPrefix: input.sourceIdPrefix,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const result: IngestResult = await input.engine.ingest({
|
|
179
|
+
sourceId: doc.sourceId,
|
|
180
|
+
content: doc.content,
|
|
181
|
+
metadata: doc.metadata as any,
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
succeeded += 1;
|
|
185
|
+
emit({
|
|
186
|
+
type: "page:success",
|
|
187
|
+
pageId,
|
|
188
|
+
sourceId,
|
|
189
|
+
chunkCount: result.chunkCount,
|
|
190
|
+
});
|
|
191
|
+
} catch (err) {
|
|
192
|
+
if (isNotFound(err)) {
|
|
193
|
+
emit({ type: "page:not-found", pageId, sourceId });
|
|
194
|
+
if (deleteOnNotFound) {
|
|
195
|
+
try {
|
|
196
|
+
await input.engine.delete({ sourceId });
|
|
197
|
+
deleted += 1;
|
|
198
|
+
} catch (deleteErr) {
|
|
199
|
+
failed += 1;
|
|
200
|
+
errors.push({ pageId, sourceId, error: deleteErr });
|
|
201
|
+
emit({ type: "page:error", pageId, sourceId, error: deleteErr });
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
failed += 1;
|
|
208
|
+
errors.push({ pageId, sourceId, error: err });
|
|
209
|
+
emit({ type: "page:error", pageId, sourceId, error: err });
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
return {
|
|
214
|
+
pageCount: input.pageIds.length,
|
|
215
|
+
succeeded,
|
|
216
|
+
failed,
|
|
217
|
+
deleted,
|
|
218
|
+
errors,
|
|
219
|
+
};
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import type { ContextEngine } from "../../core";
|
|
2
|
+
import type { IngestInput } from "../../core/types";
|
|
3
|
+
|
|
4
|
+
export type NotionSyncProgressEvent =
|
|
5
|
+
| { type: "page:start"; pageId: string; sourceId: string }
|
|
6
|
+
| { type: "page:success"; pageId: string; sourceId: string; chunkCount: number }
|
|
7
|
+
| { type: "page:error"; pageId: string; sourceId: string; error: unknown }
|
|
8
|
+
| { type: "page:not-found"; pageId: string; sourceId: string };
|
|
9
|
+
|
|
10
|
+
export type SyncNotionPagesInput = {
|
|
11
|
+
engine: ContextEngine;
|
|
12
|
+
/**
|
|
13
|
+
* Server-side Notion integration token.
|
|
14
|
+
* Keep this server-only (env var).
|
|
15
|
+
*/
|
|
16
|
+
token: string;
|
|
17
|
+
/** Notion page IDs or page URLs. */
|
|
18
|
+
pageIds: string[];
|
|
19
|
+
/**
|
|
20
|
+
* Optional namespace prefix, useful for multi-tenant apps:
|
|
21
|
+
* `tenant:acme:` -> `tenant:acme:notion:page:<id>`
|
|
22
|
+
*/
|
|
23
|
+
sourceIdPrefix?: string;
|
|
24
|
+
/**
|
|
25
|
+
* When true, if a page is not found/accessible, delete the previously ingested
|
|
26
|
+
* document for that page (exact sourceId).
|
|
27
|
+
*/
|
|
28
|
+
deleteOnNotFound?: boolean;
|
|
29
|
+
/** Optional progress callback. */
|
|
30
|
+
onProgress?: (event: NotionSyncProgressEvent) => void;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export type SyncNotionPagesResult = {
|
|
34
|
+
pageCount: number;
|
|
35
|
+
succeeded: number;
|
|
36
|
+
failed: number;
|
|
37
|
+
deleted: number;
|
|
38
|
+
errors: Array<{ pageId: string; sourceId: string; error: unknown }>;
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
export type NotionPageDocument = {
|
|
42
|
+
sourceId: string;
|
|
43
|
+
content: string;
|
|
44
|
+
metadata: Record<string, unknown>;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
export type BuildNotionPageIngestInputArgs = {
|
|
48
|
+
pageId: string; // normalized 32-hex (no dashes)
|
|
49
|
+
content: string;
|
|
50
|
+
metadata?: Record<string, unknown>;
|
|
51
|
+
sourceIdPrefix?: string;
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
export type BuildNotionPageIngestInputResult = IngestInput;
|
|
55
|
+
|
|
56
|
+
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import { deleteDocuments } from "./delete";
|
|
1
2
|
import { ingest } from "./ingest";
|
|
2
3
|
import { retrieve } from "./retrieve";
|
|
3
4
|
import { defineConfig, resolveConfig } from "./config";
|
|
4
5
|
import type {
|
|
5
6
|
ContextEngineConfig,
|
|
7
|
+
DeleteInput,
|
|
6
8
|
IngestInput,
|
|
7
9
|
IngestResult,
|
|
8
10
|
ResolvedContextEngineConfig,
|
|
@@ -24,6 +26,10 @@ export class ContextEngine {
|
|
|
24
26
|
async retrieve(input: RetrieveInput): Promise<RetrieveResult> {
|
|
25
27
|
return retrieve(this.config, input);
|
|
26
28
|
}
|
|
29
|
+
|
|
30
|
+
async delete(input: DeleteInput): Promise<void> {
|
|
31
|
+
return deleteDocuments(this.config, input);
|
|
32
|
+
}
|
|
27
33
|
}
|
|
28
34
|
|
|
29
35
|
export const createContextEngine = (config: ContextEngineConfig) =>
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { DeleteInput, ResolvedContextEngineConfig } from "./types";
|
|
2
|
+
|
|
3
|
+
export const deleteDocuments = async (
|
|
4
|
+
config: ResolvedContextEngineConfig,
|
|
5
|
+
input: DeleteInput
|
|
6
|
+
): Promise<void> => {
|
|
7
|
+
const hasSourceId = "sourceId" in input && typeof input.sourceId === "string";
|
|
8
|
+
const hasPrefix =
|
|
9
|
+
"sourceIdPrefix" in input && typeof input.sourceIdPrefix === "string";
|
|
10
|
+
|
|
11
|
+
if (hasSourceId === hasPrefix) {
|
|
12
|
+
// Both true or both false.
|
|
13
|
+
throw new Error('Provide exactly one of "sourceId" or "sourceIdPrefix".');
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
await config.store.delete(input);
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
|
package/registry/core/index.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { ContextEngine, createContextEngine, defineConfig } from "./context-engine";
|
|
2
|
+
export { deleteDocuments } from "./delete";
|
|
2
3
|
export { ingest } from "./ingest";
|
|
3
4
|
export { retrieve } from "./retrieve";
|
|
4
5
|
export { defaultChunker, resolveChunkingOptions } from "./chunking";
|
package/registry/core/types.ts
CHANGED
|
@@ -44,7 +44,29 @@ export type EmbeddingProvider = {
|
|
|
44
44
|
embed: (input: EmbeddingInput) => Promise<number[]>;
|
|
45
45
|
};
|
|
46
46
|
|
|
47
|
+
export type DeleteInput =
|
|
48
|
+
| {
|
|
49
|
+
/** Delete a single logical document by exact `sourceId`. */
|
|
50
|
+
sourceId: string;
|
|
51
|
+
sourceIdPrefix?: never;
|
|
52
|
+
}
|
|
53
|
+
| {
|
|
54
|
+
/**
|
|
55
|
+
* Delete all logical documents whose `sourceId` starts with the prefix.
|
|
56
|
+
* This matches Unrag's prefix scoping behavior in retrieval.
|
|
57
|
+
*/
|
|
58
|
+
sourceId?: never;
|
|
59
|
+
sourceIdPrefix: string;
|
|
60
|
+
};
|
|
61
|
+
|
|
47
62
|
export type VectorStore = {
|
|
63
|
+
/**
|
|
64
|
+
* Persist (replace) a single document's chunks.
|
|
65
|
+
*
|
|
66
|
+
* The store treats `chunks[0].sourceId` as the logical identifier for the document.
|
|
67
|
+
* Calling `upsert()` multiple times with the same `sourceId` replaces the previously
|
|
68
|
+
* stored content for that document (including when the chunk count changes).
|
|
69
|
+
*/
|
|
48
70
|
upsert: (chunks: Chunk[]) => Promise<void>;
|
|
49
71
|
query: (params: {
|
|
50
72
|
embedding: number[];
|
|
@@ -53,6 +75,7 @@ export type VectorStore = {
|
|
|
53
75
|
sourceId?: string;
|
|
54
76
|
};
|
|
55
77
|
}) => Promise<Array<Chunk & { score: number }>>;
|
|
78
|
+
delete: (input: DeleteInput) => Promise<void>;
|
|
56
79
|
};
|
|
57
80
|
|
|
58
81
|
export type IngestInput = {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { documents, chunks, embeddings } from "./schema";
|
|
2
2
|
import type { Chunk, VectorStore } from "../../core/types";
|
|
3
|
-
import { sql, type SQL } from "drizzle-orm";
|
|
3
|
+
import { eq, like, sql, type SQL } from "drizzle-orm";
|
|
4
4
|
import type { PgDatabase } from "drizzle-orm/pg-core";
|
|
5
5
|
|
|
6
6
|
type DrizzleDb = PgDatabase<any, any, any>;
|
|
@@ -44,6 +44,10 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
|
|
|
44
44
|
const head = chunkItems[0]!;
|
|
45
45
|
const documentRow = toDocumentRow(head);
|
|
46
46
|
|
|
47
|
+
// Replace-by-sourceId: delete any previously stored document(s) for this logical id.
|
|
48
|
+
// Cascades to chunks and embeddings.
|
|
49
|
+
await tx.delete(documents).where(eq(documents.sourceId, head.sourceId));
|
|
50
|
+
|
|
47
51
|
await tx
|
|
48
52
|
.insert(documents)
|
|
49
53
|
.values(documentRow)
|
|
@@ -140,6 +144,17 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
|
|
|
140
144
|
score: Number(row.score),
|
|
141
145
|
}));
|
|
142
146
|
},
|
|
147
|
+
|
|
148
|
+
delete: async (input) => {
|
|
149
|
+
if ("sourceId" in input) {
|
|
150
|
+
await db.delete(documents).where(eq(documents.sourceId, input.sourceId));
|
|
151
|
+
return;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
await db
|
|
155
|
+
.delete(documents)
|
|
156
|
+
.where(like(documents.sourceId, input.sourceIdPrefix + "%"));
|
|
157
|
+
},
|
|
143
158
|
});
|
|
144
159
|
|
|
145
160
|
|
|
@@ -21,6 +21,10 @@ export const createPrismaVectorStore = (prisma: PrismaClient): VectorStore => ({
|
|
|
21
21
|
const documentMetadata = sanitizeMetadata(head.metadata);
|
|
22
22
|
|
|
23
23
|
await prisma.$transaction(async (tx: { $executeRaw: (query: unknown) => Promise<unknown> }) => {
|
|
24
|
+
// Replace-by-sourceId: delete any previously stored document(s) for this logical id.
|
|
25
|
+
// Cascade removes chunks and embeddings.
|
|
26
|
+
await tx.$executeRaw(sql`delete from documents where source_id = ${head.sourceId}`);
|
|
27
|
+
|
|
24
28
|
await tx.$executeRaw(
|
|
25
29
|
sql`
|
|
26
30
|
insert into documents (id, source_id, content, metadata)
|
|
@@ -128,6 +132,19 @@ export const createPrismaVectorStore = (prisma: PrismaClient): VectorStore => ({
|
|
|
128
132
|
score: Number(row.score),
|
|
129
133
|
}));
|
|
130
134
|
},
|
|
135
|
+
|
|
136
|
+
delete: async (input) => {
|
|
137
|
+
if ("sourceId" in input) {
|
|
138
|
+
await prisma.$executeRaw(
|
|
139
|
+
sql`delete from documents where source_id = ${input.sourceId}`
|
|
140
|
+
);
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
await prisma.$executeRaw(
|
|
145
|
+
sql`delete from documents where source_id like ${input.sourceIdPrefix + "%"}`
|
|
146
|
+
);
|
|
147
|
+
},
|
|
131
148
|
});
|
|
132
149
|
|
|
133
150
|
|
|
@@ -42,6 +42,12 @@ export const createRawSqlVectorStore = (pool: Pool): VectorStore => ({
|
|
|
42
42
|
const head = chunkItems[0]!;
|
|
43
43
|
const documentMetadata = sanitizeMetadata(head.metadata);
|
|
44
44
|
|
|
45
|
+
// Replace-by-sourceId: delete any previously stored document(s) for this logical id.
|
|
46
|
+
// Cascades to chunks and embeddings.
|
|
47
|
+
await client.query(`delete from documents where source_id = $1`, [
|
|
48
|
+
head.sourceId,
|
|
49
|
+
]);
|
|
50
|
+
|
|
45
51
|
await client.query(
|
|
46
52
|
`
|
|
47
53
|
insert into documents (id, source_id, content, metadata)
|
|
@@ -149,6 +155,21 @@ export const createRawSqlVectorStore = (pool: Pool): VectorStore => ({
|
|
|
149
155
|
score: Number(row.score),
|
|
150
156
|
}));
|
|
151
157
|
},
|
|
158
|
+
|
|
159
|
+
delete: async (input) => {
|
|
160
|
+
await withTx(pool, async (client) => {
|
|
161
|
+
if ("sourceId" in input) {
|
|
162
|
+
await client.query(`delete from documents where source_id = $1`, [
|
|
163
|
+
input.sourceId,
|
|
164
|
+
]);
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
await client.query(`delete from documents where source_id like $1`, [
|
|
169
|
+
input.sourceIdPrefix + "%",
|
|
170
|
+
]);
|
|
171
|
+
});
|
|
172
|
+
},
|
|
152
173
|
});
|
|
153
174
|
|
|
154
175
|
|