unrag 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  // cli/run.ts
4
- import { intro, outro as outro2 } from "@clack/prompts";
4
+ import { intro, outro as outro3 } from "@clack/prompts";
5
5
 
6
6
  // cli/commands/init.ts
7
7
  import { cancel as cancel2, isCancel as isCancel2, outro, select, text } from "@clack/prompts";
@@ -47,6 +47,23 @@ async function findUp(startDir, filename) {
47
47
  current = parent;
48
48
  }
49
49
  }
50
+ async function listFilesRecursive(dir) {
51
+ const entries = await readdir(dir, { withFileTypes: true });
52
+ const out = [];
53
+ for (const entry of entries) {
54
+ const full = path.join(dir, entry.name);
55
+ if (entry.isDirectory()) {
56
+ out.push(...await listFilesRecursive(full));
57
+ } else if (entry.isFile()) {
58
+ out.push(full);
59
+ } else {
60
+ const s = await stat(full).catch(() => null);
61
+ if (s?.isFile())
62
+ out.push(full);
63
+ }
64
+ }
65
+ return out;
66
+ }
50
67
 
51
68
  // cli/lib/registry.ts
52
69
  var readText = (filePath) => readFile(filePath, "utf8");
@@ -209,6 +226,42 @@ async function copyRegistryFiles(selection) {
209
226
  await writeText(mapping.dest, content);
210
227
  }
211
228
  }
229
+ async function copyConnectorFiles(selection) {
230
+ const toAbs = (projectRelative) => path2.join(selection.projectRoot, projectRelative);
231
+ const installBaseAbs = toAbs(selection.installDir);
232
+ const connectorRegistryAbs = path2.join(selection.registryRoot, "connectors", selection.connector);
233
+ if (!await exists(connectorRegistryAbs)) {
234
+ throw new Error(`Unknown connector registry: ${path2.relative(selection.registryRoot, connectorRegistryAbs)}`);
235
+ }
236
+ const files = await listFilesRecursive(connectorRegistryAbs);
237
+ const destRootAbs = path2.join(installBaseAbs, "connectors", selection.connector);
238
+ const nonInteractive = Boolean(selection.yes) || !process.stdin.isTTY;
239
+ for (const src of files) {
240
+ if (!await exists(src)) {
241
+ throw new Error(`Registry file missing: ${src}`);
242
+ }
243
+ const rel = path2.relative(connectorRegistryAbs, src);
244
+ const dest = path2.join(destRootAbs, rel);
245
+ if (await exists(dest)) {
246
+ if (nonInteractive) {
247
+ continue;
248
+ }
249
+ const answer = await confirm({
250
+ message: `Overwrite ${path2.relative(selection.projectRoot, dest)}?`,
251
+ initialValue: false
252
+ });
253
+ if (isCancel(answer)) {
254
+ cancel("Cancelled.");
255
+ return;
256
+ }
257
+ if (!answer) {
258
+ continue;
259
+ }
260
+ }
261
+ const raw = await readText(src);
262
+ await writeText(dest, raw);
263
+ }
264
+ }
212
265
 
213
266
  // cli/lib/json.ts
214
267
  import { readFile as readFile2, writeFile as writeFile2 } from "node:fs/promises";
@@ -286,6 +339,14 @@ function depsForAdapter(adapter) {
286
339
  }
287
340
  return { deps, devDeps };
288
341
  }
342
+ function depsForConnector(connector) {
343
+ const deps = {};
344
+ const devDeps = {};
345
+ if (connector === "notion") {
346
+ deps["@notionhq/client"] = "^2.2.16";
347
+ }
348
+ return { deps, devDeps };
349
+ }
289
350
  function installCmd(pm) {
290
351
  if (pm === "bun")
291
352
  return "bun install";
@@ -488,7 +549,8 @@ async function initCommand(args) {
488
549
  installDir,
489
550
  storeAdapter: storeAdapterAnswer,
490
551
  aliasBase,
491
- version: CONFIG_VERSION
552
+ version: CONFIG_VERSION,
553
+ connectors: existing?.connectors ?? []
492
554
  };
493
555
  await writeJsonFile(path5.join(root, CONFIG_FILE), config);
494
556
  const pm = await detectPackageManager(root);
@@ -512,19 +574,103 @@ async function initCommand(args) {
512
574
  `));
513
575
  }
514
576
 
577
+ // cli/commands/add.ts
578
+ import { outro as outro2 } from "@clack/prompts";
579
+ import path6 from "node:path";
580
+ import { fileURLToPath as fileURLToPath2 } from "node:url";
581
+ var CONFIG_FILE2 = "unrag.json";
582
+ var __filename3 = fileURLToPath2(import.meta.url);
583
+ var __dirname3 = path6.dirname(__filename3);
584
+ var parseAddArgs = (args) => {
585
+ const out = {};
586
+ for (let i = 0;i < args.length; i++) {
587
+ const a = args[i];
588
+ if (a === "--yes" || a === "-y") {
589
+ out.yes = true;
590
+ continue;
591
+ }
592
+ if (!out.connector && !a.startsWith("-")) {
593
+ out.connector = a;
594
+ continue;
595
+ }
596
+ }
597
+ return out;
598
+ };
599
+ async function addCommand(args) {
600
+ const root = await tryFindProjectRoot(process.cwd());
601
+ if (!root) {
602
+ throw new Error("Could not find a project root (no package.json found).");
603
+ }
604
+ const parsed = parseAddArgs(args);
605
+ const connector = parsed.connector;
606
+ if (!connector) {
607
+ outro2(`Usage: unrag add <connector>
608
+
609
+ Available connectors: notion`);
610
+ return;
611
+ }
612
+ if (connector !== "notion") {
613
+ outro2(`Unknown connector: ${connector}
614
+
615
+ Available connectors: notion`);
616
+ return;
617
+ }
618
+ const configPath = path6.join(root, CONFIG_FILE2);
619
+ const config = await readJsonFile(configPath);
620
+ if (!config?.installDir) {
621
+ throw new Error(`Missing ${CONFIG_FILE2}. Run \`unrag init\` first.`);
622
+ }
623
+ const cliPackageRoot = await findUp(__dirname3, "package.json");
624
+ if (!cliPackageRoot) {
625
+ throw new Error("Could not locate CLI package root (package.json not found).");
626
+ }
627
+ const registryRoot = path6.join(cliPackageRoot, "registry");
628
+ const nonInteractive = parsed.yes || !process.stdin.isTTY;
629
+ await copyConnectorFiles({
630
+ projectRoot: root,
631
+ registryRoot,
632
+ installDir: config.installDir,
633
+ connector,
634
+ yes: nonInteractive
635
+ });
636
+ const pkg = await readPackageJson(root);
637
+ const { deps, devDeps } = depsForConnector(connector);
638
+ const merged = mergeDeps(pkg, deps, devDeps);
639
+ if (merged.changes.length > 0) {
640
+ await writePackageJson(root, merged.pkg);
641
+ }
642
+ const connectors = Array.from(new Set([...config.connectors ?? [], connector])).sort();
643
+ await writeJsonFile(configPath, { ...config, connectors });
644
+ outro2([
645
+ `Installed connector: ${connector}.`,
646
+ "",
647
+ `- Code: ${path6.join(config.installDir, "connectors", connector)}`,
648
+ `- Docs: /docs/connectors/${connector}`,
649
+ "",
650
+ merged.changes.length > 0 ? `Added deps: ${merged.changes.map((c) => c.name).join(", ")}` : "Added deps: none",
651
+ nonInteractive ? "" : "Tip: keep NOTION_TOKEN server-side only (env var)."
652
+ ].filter(Boolean).join(`
653
+ `));
654
+ }
655
+
515
656
  // cli/run.ts
516
657
  async function run(argv) {
517
658
  const [, , command, ...rest] = argv;
518
659
  intro("unrag");
519
660
  if (!command || command === "help" || command === "--help" || command === "-h") {
520
- outro2("Usage: unrag init");
661
+ outro3(["Usage:", "", "- unrag init", "- unrag add <connector>"].join(`
662
+ `));
521
663
  return;
522
664
  }
523
665
  if (command === "init") {
524
666
  await initCommand(rest);
525
667
  return;
526
668
  }
527
- outro2(`Unknown command: ${command}`);
669
+ if (command === "add") {
670
+ await addCommand(rest);
671
+ return;
672
+ }
673
+ outro3(`Unknown command: ${command}`);
528
674
  process.exitCode = 1;
529
675
  }
530
676
 
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "bin": {
5
5
  "unrag": "./dist/cli/index.js"
6
6
  },
7
- "version": "0.1.1",
7
+ "version": "0.2.1",
8
8
  "private": false,
9
9
  "license": "Apache-2.0",
10
10
  "devDependencies": {
@@ -0,0 +1,22 @@
1
+ import { Client } from "@notionhq/client";
2
+
3
+ export type NotionClient = Client;
4
+
5
+ export type CreateNotionClientInput = {
6
+ token: string;
7
+ timeoutMs?: number;
8
+ };
9
+
10
+ export function createNotionClient(input: CreateNotionClientInput): NotionClient {
11
+ const token = input.token?.trim();
12
+ if (!token) throw new Error("NOTION token is required");
13
+
14
+ return new Client({
15
+ auth: token,
16
+ // @notionhq/client uses undici/fetch under the hood; timeout is supported.
17
+ // If unsupported in a future version, callers can wrap requests.
18
+ timeoutMs: input.timeoutMs ?? 30_000,
19
+ } as any);
20
+ }
21
+
22
+
@@ -0,0 +1,39 @@
1
+ const UUID_32_RE = /^[0-9a-f]{32}$/i;
2
+ const UUID_HYPHEN_RE =
3
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
4
+
5
+ export function normalizeNotionId32(input: string): string {
6
+ const raw = String(input ?? "").trim();
7
+ if (!raw) throw new Error("Notion id is required");
8
+
9
+ // Try to extract UUID-like tokens from URLs or mixed strings.
10
+ const token =
11
+ raw.match(/[0-9a-fA-F]{32}/)?.[0] ??
12
+ raw.match(/[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}/)?.[0] ??
13
+ raw;
14
+
15
+ const normalized = token.replaceAll("-", "").toLowerCase();
16
+ if (!UUID_32_RE.test(normalized)) {
17
+ throw new Error(`Invalid Notion id: ${input}`);
18
+ }
19
+ return normalized;
20
+ }
21
+
22
+ export function toUuidHyphenated(id32: string): string {
23
+ const n = normalizeNotionId32(id32);
24
+ return `${n.slice(0, 8)}-${n.slice(8, 12)}-${n.slice(12, 16)}-${n.slice(
25
+ 16,
26
+ 20
27
+ )}-${n.slice(20)}`;
28
+ }
29
+
30
+ export function normalizeNotionPageId32(pageIdOrUrl: string): string {
31
+ return normalizeNotionId32(pageIdOrUrl);
32
+ }
33
+
34
+ export function isUuidLike(input: string): boolean {
35
+ const s = String(input ?? "").trim();
36
+ return UUID_32_RE.test(s.replaceAll("-", "")) || UUID_HYPHEN_RE.test(s);
37
+ }
38
+
39
+
@@ -0,0 +1,7 @@
1
+ export { createNotionClient } from "./client";
2
+ export { normalizeNotionId32, normalizeNotionPageId32, toUuidHyphenated } from "./ids";
3
+ export { renderNotionBlocksToText } from "./render";
4
+ export { buildNotionPageIngestInput, loadNotionPageDocument, syncNotionPages } from "./sync";
5
+ export * from "./types";
6
+
7
+
@@ -0,0 +1,98 @@
1
+ type RichText = { plain_text?: string };
2
+
3
+ export type NotionBlock = {
4
+ id: string;
5
+ type: string;
6
+ has_children?: boolean;
7
+ // Notion block payload is keyed by `type`; we keep it loose to stay stable.
8
+ [key: string]: unknown;
9
+ };
10
+
11
+ export type NotionBlockNode = {
12
+ block: NotionBlock;
13
+ children: NotionBlockNode[];
14
+ };
15
+
16
+ const rt = (value: unknown): string => {
17
+ const items = Array.isArray(value) ? (value as RichText[]) : [];
18
+ return items.map((t) => t?.plain_text ?? "").join("");
19
+ };
20
+
21
+ const indent = (n: number) => (n > 0 ? " ".repeat(n) : "");
22
+
23
+ export function renderNotionBlocksToText(
24
+ nodes: NotionBlockNode[],
25
+ opts: { maxDepth?: number } = {}
26
+ ): string {
27
+ const maxDepth = opts.maxDepth ?? 6;
28
+ const lines: string[] = [];
29
+
30
+ const walk = (node: NotionBlockNode, depth: number, listDepth: number) => {
31
+ if (depth > maxDepth) return;
32
+ const b = node.block;
33
+
34
+ const t = b.type;
35
+
36
+ if (t === "paragraph") {
37
+ const text = rt((b as any).paragraph?.rich_text);
38
+ if (text.trim()) lines.push(text);
39
+ } else if (t === "heading_1") {
40
+ const text = rt((b as any).heading_1?.rich_text);
41
+ if (text.trim()) lines.push(`# ${text}`);
42
+ } else if (t === "heading_2") {
43
+ const text = rt((b as any).heading_2?.rich_text);
44
+ if (text.trim()) lines.push(`## ${text}`);
45
+ } else if (t === "heading_3") {
46
+ const text = rt((b as any).heading_3?.rich_text);
47
+ if (text.trim()) lines.push(`### ${text}`);
48
+ } else if (t === "bulleted_list_item") {
49
+ const text = rt((b as any).bulleted_list_item?.rich_text);
50
+ if (text.trim()) lines.push(`${indent(listDepth)}- ${text}`);
51
+ } else if (t === "numbered_list_item") {
52
+ const text = rt((b as any).numbered_list_item?.rich_text);
53
+ if (text.trim()) lines.push(`${indent(listDepth)}- ${text}`);
54
+ } else if (t === "to_do") {
55
+ const text = rt((b as any).to_do?.rich_text);
56
+ const checked = Boolean((b as any).to_do?.checked);
57
+ if (text.trim()) lines.push(`${indent(listDepth)}- [${checked ? "x" : " "}] ${text}`);
58
+ } else if (t === "quote") {
59
+ const text = rt((b as any).quote?.rich_text);
60
+ if (text.trim()) lines.push(`> ${text}`);
61
+ } else if (t === "callout") {
62
+ const text = rt((b as any).callout?.rich_text);
63
+ if (text.trim()) lines.push(text);
64
+ } else if (t === "code") {
65
+ const text = rt((b as any).code?.rich_text);
66
+ const lang = String((b as any).code?.language ?? "").trim();
67
+ lines.push("```" + lang);
68
+ if (text.trim()) lines.push(text);
69
+ lines.push("```");
70
+ } else if (t === "divider") {
71
+ lines.push("---");
72
+ } else {
73
+ // Unsupported block types are ignored for v1.
74
+ // This keeps the output focused and avoids surprises.
75
+ }
76
+
77
+ // Render children (nested blocks). For list items, increase listDepth.
78
+ const nextListDepth =
79
+ t === "bulleted_list_item" ||
80
+ t === "numbered_list_item" ||
81
+ t === "to_do"
82
+ ? listDepth + 1
83
+ : listDepth;
84
+
85
+ for (const child of node.children) {
86
+ walk(child, depth + 1, nextListDepth);
87
+ }
88
+ };
89
+
90
+ for (const node of nodes) {
91
+ walk(node, 0, 0);
92
+ lines.push("");
93
+ }
94
+
95
+ return lines.join("\n").trim();
96
+ }
97
+
98
+
@@ -0,0 +1,222 @@
1
+ import type { ContextEngine } from "../../core";
2
+ import type { IngestResult } from "../../core/types";
3
+ import { createNotionClient, type NotionClient } from "./client";
4
+ import { normalizeNotionPageId32, toUuidHyphenated } from "./ids";
5
+ import { renderNotionBlocksToText, type NotionBlock, type NotionBlockNode } from "./render";
6
+ import type {
7
+ BuildNotionPageIngestInputArgs,
8
+ NotionPageDocument,
9
+ NotionSyncProgressEvent,
10
+ SyncNotionPagesInput,
11
+ SyncNotionPagesResult,
12
+ } from "./types";
13
+
14
+ const joinPrefix = (prefix: string | undefined, rest: string) => {
15
+ const p = (prefix ?? "").trim();
16
+ if (!p) return rest;
17
+ return p.endsWith(":") ? p + rest : p + ":" + rest;
18
+ };
19
+
20
+ export function buildNotionPageIngestInput(
21
+ args: BuildNotionPageIngestInputArgs
22
+ ) {
23
+ const sourceId = joinPrefix(
24
+ args.sourceIdPrefix,
25
+ `notion:page:${args.pageId}`
26
+ );
27
+
28
+ return {
29
+ sourceId,
30
+ content: args.content,
31
+ metadata: args.metadata ?? {},
32
+ };
33
+ }
34
+
35
+ const richTextToText = (richText: any[] | undefined) =>
36
+ (Array.isArray(richText) ? richText : [])
37
+ .map((t) => String(t?.plain_text ?? ""))
38
+ .join("");
39
+
40
+ const getNotionPageTitle = (page: any): string => {
41
+ const props = page?.properties ?? {};
42
+ for (const key of Object.keys(props)) {
43
+ const p = props[key];
44
+ if (p?.type === "title") {
45
+ return richTextToText(p?.title);
46
+ }
47
+ }
48
+ return "";
49
+ };
50
+
51
+ async function listAllBlockChildren(
52
+ notion: NotionClient,
53
+ blockId: string
54
+ ): Promise<NotionBlock[]> {
55
+ const blocks: NotionBlock[] = [];
56
+ let cursor: string | undefined = undefined;
57
+
58
+ while (true) {
59
+ const res: any = await notion.blocks.children.list({
60
+ block_id: blockId,
61
+ start_cursor: cursor,
62
+ page_size: 100,
63
+ });
64
+
65
+ blocks.push(...((res?.results ?? []) as NotionBlock[]));
66
+ if (!res?.has_more) break;
67
+ cursor = res?.next_cursor ?? undefined;
68
+ if (!cursor) break;
69
+ }
70
+
71
+ return blocks;
72
+ }
73
+
74
+ async function buildBlockTree(
75
+ notion: NotionClient,
76
+ rootBlockId: string,
77
+ depth: number,
78
+ maxDepth: number
79
+ ): Promise<NotionBlockNode[]> {
80
+ const children = await listAllBlockChildren(notion, rootBlockId);
81
+ const nodes: NotionBlockNode[] = [];
82
+
83
+ for (const block of children) {
84
+ let grandChildren: NotionBlockNode[] = [];
85
+ if (block.has_children && depth < maxDepth) {
86
+ grandChildren = await buildBlockTree(notion, block.id, depth + 1, maxDepth);
87
+ }
88
+ nodes.push({ block, children: grandChildren });
89
+ }
90
+
91
+ return nodes;
92
+ }
93
+
94
+ export async function loadNotionPageDocument(args: {
95
+ notion: NotionClient;
96
+ pageIdOrUrl: string;
97
+ sourceIdPrefix?: string;
98
+ maxDepth?: number;
99
+ }): Promise<NotionPageDocument> {
100
+ const pageId = normalizeNotionPageId32(args.pageIdOrUrl);
101
+ const apiId = toUuidHyphenated(pageId);
102
+
103
+ const page: any = await args.notion.pages.retrieve({ page_id: apiId });
104
+ const title = getNotionPageTitle(page);
105
+ const url = String(page?.url ?? "");
106
+ const lastEditedTime = String(page?.last_edited_time ?? "");
107
+
108
+ const tree = await buildBlockTree(args.notion, apiId, 0, args.maxDepth ?? 4);
109
+ const body = renderNotionBlocksToText(tree);
110
+ const content = [title.trim(), body.trim()].filter(Boolean).join("\n\n");
111
+
112
+ const metadata = {
113
+ connector: "notion",
114
+ kind: "page",
115
+ pageId,
116
+ url,
117
+ title,
118
+ lastEditedTime,
119
+ } as const;
120
+
121
+ const ingest = buildNotionPageIngestInput({
122
+ pageId,
123
+ content,
124
+ metadata: metadata as any,
125
+ sourceIdPrefix: args.sourceIdPrefix,
126
+ });
127
+
128
+ return {
129
+ sourceId: ingest.sourceId,
130
+ content: ingest.content,
131
+ metadata: ingest.metadata ?? {},
132
+ };
133
+ }
134
+
135
+ const isNotFound = (err: any) => {
136
+ const status = Number(err?.status ?? err?.statusCode ?? err?.code);
137
+ if (status === 404) return true;
138
+ const msg = String(err?.message ?? "");
139
+ return msg.toLowerCase().includes("could not find");
140
+ };
141
+
142
+ export async function syncNotionPages(
143
+ input: SyncNotionPagesInput
144
+ ): Promise<SyncNotionPagesResult> {
145
+ const deleteOnNotFound = input.deleteOnNotFound ?? false;
146
+
147
+ const notion = createNotionClient({ token: input.token });
148
+ const errors: SyncNotionPagesResult["errors"] = [];
149
+
150
+ let succeeded = 0;
151
+ let failed = 0;
152
+ let deleted = 0;
153
+
154
+ for (const rawId of input.pageIds) {
155
+ const pageId = normalizeNotionPageId32(rawId);
156
+ const sourceId = joinPrefix(
157
+ input.sourceIdPrefix,
158
+ `notion:page:${pageId}`
159
+ );
160
+
161
+ const emit = (event: NotionSyncProgressEvent) => {
162
+ try {
163
+ input.onProgress?.(event);
164
+ } catch {
165
+ // ignore progress handler errors
166
+ }
167
+ };
168
+
169
+ emit({ type: "page:start", pageId, sourceId });
170
+
171
+ try {
172
+ const doc = await loadNotionPageDocument({
173
+ notion,
174
+ pageIdOrUrl: pageId,
175
+ sourceIdPrefix: input.sourceIdPrefix,
176
+ });
177
+
178
+ const result: IngestResult = await input.engine.ingest({
179
+ sourceId: doc.sourceId,
180
+ content: doc.content,
181
+ metadata: doc.metadata as any,
182
+ });
183
+
184
+ succeeded += 1;
185
+ emit({
186
+ type: "page:success",
187
+ pageId,
188
+ sourceId,
189
+ chunkCount: result.chunkCount,
190
+ });
191
+ } catch (err) {
192
+ if (isNotFound(err)) {
193
+ emit({ type: "page:not-found", pageId, sourceId });
194
+ if (deleteOnNotFound) {
195
+ try {
196
+ await input.engine.delete({ sourceId });
197
+ deleted += 1;
198
+ } catch (deleteErr) {
199
+ failed += 1;
200
+ errors.push({ pageId, sourceId, error: deleteErr });
201
+ emit({ type: "page:error", pageId, sourceId, error: deleteErr });
202
+ }
203
+ }
204
+ continue;
205
+ }
206
+
207
+ failed += 1;
208
+ errors.push({ pageId, sourceId, error: err });
209
+ emit({ type: "page:error", pageId, sourceId, error: err });
210
+ }
211
+ }
212
+
213
+ return {
214
+ pageCount: input.pageIds.length,
215
+ succeeded,
216
+ failed,
217
+ deleted,
218
+ errors,
219
+ };
220
+ }
221
+
222
+
@@ -0,0 +1,56 @@
1
+ import type { ContextEngine } from "../../core";
2
+ import type { IngestInput } from "../../core/types";
3
+
4
+ export type NotionSyncProgressEvent =
5
+ | { type: "page:start"; pageId: string; sourceId: string }
6
+ | { type: "page:success"; pageId: string; sourceId: string; chunkCount: number }
7
+ | { type: "page:error"; pageId: string; sourceId: string; error: unknown }
8
+ | { type: "page:not-found"; pageId: string; sourceId: string };
9
+
10
+ export type SyncNotionPagesInput = {
11
+ engine: ContextEngine;
12
+ /**
13
+ * Server-side Notion integration token.
14
+ * Keep this server-only (env var).
15
+ */
16
+ token: string;
17
+ /** Notion page IDs or page URLs. */
18
+ pageIds: string[];
19
+ /**
20
+ * Optional namespace prefix, useful for multi-tenant apps:
21
+ * `tenant:acme:` -> `tenant:acme:notion:page:<id>`
22
+ */
23
+ sourceIdPrefix?: string;
24
+ /**
25
+ * When true, if a page is not found/accessible, delete the previously ingested
26
+ * document for that page (exact sourceId).
27
+ */
28
+ deleteOnNotFound?: boolean;
29
+ /** Optional progress callback. */
30
+ onProgress?: (event: NotionSyncProgressEvent) => void;
31
+ };
32
+
33
+ export type SyncNotionPagesResult = {
34
+ pageCount: number;
35
+ succeeded: number;
36
+ failed: number;
37
+ deleted: number;
38
+ errors: Array<{ pageId: string; sourceId: string; error: unknown }>;
39
+ };
40
+
41
+ export type NotionPageDocument = {
42
+ sourceId: string;
43
+ content: string;
44
+ metadata: Record<string, unknown>;
45
+ };
46
+
47
+ export type BuildNotionPageIngestInputArgs = {
48
+ pageId: string; // normalized 32-hex (no dashes)
49
+ content: string;
50
+ metadata?: Record<string, unknown>;
51
+ sourceIdPrefix?: string;
52
+ };
53
+
54
+ export type BuildNotionPageIngestInputResult = IngestInput;
55
+
56
+
@@ -1,8 +1,10 @@
1
+ import { deleteDocuments } from "./delete";
1
2
  import { ingest } from "./ingest";
2
3
  import { retrieve } from "./retrieve";
3
4
  import { defineConfig, resolveConfig } from "./config";
4
5
  import type {
5
6
  ContextEngineConfig,
7
+ DeleteInput,
6
8
  IngestInput,
7
9
  IngestResult,
8
10
  ResolvedContextEngineConfig,
@@ -24,6 +26,10 @@ export class ContextEngine {
24
26
  async retrieve(input: RetrieveInput): Promise<RetrieveResult> {
25
27
  return retrieve(this.config, input);
26
28
  }
29
+
30
+ async delete(input: DeleteInput): Promise<void> {
31
+ return deleteDocuments(this.config, input);
32
+ }
27
33
  }
28
34
 
29
35
  export const createContextEngine = (config: ContextEngineConfig) =>
@@ -0,0 +1,19 @@
1
+ import type { DeleteInput, ResolvedContextEngineConfig } from "./types";
2
+
3
+ export const deleteDocuments = async (
4
+ config: ResolvedContextEngineConfig,
5
+ input: DeleteInput
6
+ ): Promise<void> => {
7
+ const hasSourceId = "sourceId" in input && typeof input.sourceId === "string";
8
+ const hasPrefix =
9
+ "sourceIdPrefix" in input && typeof input.sourceIdPrefix === "string";
10
+
11
+ if (hasSourceId === hasPrefix) {
12
+ // Both true or both false.
13
+ throw new Error('Provide exactly one of "sourceId" or "sourceIdPrefix".');
14
+ }
15
+
16
+ await config.store.delete(input);
17
+ };
18
+
19
+
@@ -1,4 +1,5 @@
1
1
  export { ContextEngine, createContextEngine, defineConfig } from "./context-engine";
2
+ export { deleteDocuments } from "./delete";
2
3
  export { ingest } from "./ingest";
3
4
  export { retrieve } from "./retrieve";
4
5
  export { defaultChunker, resolveChunkingOptions } from "./chunking";
@@ -44,7 +44,29 @@ export type EmbeddingProvider = {
44
44
  embed: (input: EmbeddingInput) => Promise<number[]>;
45
45
  };
46
46
 
47
+ export type DeleteInput =
48
+ | {
49
+ /** Delete a single logical document by exact `sourceId`. */
50
+ sourceId: string;
51
+ sourceIdPrefix?: never;
52
+ }
53
+ | {
54
+ /**
55
+ * Delete all logical documents whose `sourceId` starts with the prefix.
56
+ * This matches Unrag's prefix scoping behavior in retrieval.
57
+ */
58
+ sourceId?: never;
59
+ sourceIdPrefix: string;
60
+ };
61
+
47
62
  export type VectorStore = {
63
+ /**
64
+ * Persist (replace) a single document's chunks.
65
+ *
66
+ * The store treats `chunks[0].sourceId` as the logical identifier for the document.
67
+ * Calling `upsert()` multiple times with the same `sourceId` replaces the previously
68
+ * stored content for that document (including when the chunk count changes).
69
+ */
48
70
  upsert: (chunks: Chunk[]) => Promise<void>;
49
71
  query: (params: {
50
72
  embedding: number[];
@@ -53,6 +75,7 @@ export type VectorStore = {
53
75
  sourceId?: string;
54
76
  };
55
77
  }) => Promise<Array<Chunk & { score: number }>>;
78
+ delete: (input: DeleteInput) => Promise<void>;
56
79
  };
57
80
 
58
81
  export type IngestInput = {
@@ -1,6 +1,6 @@
1
1
  import { documents, chunks, embeddings } from "./schema";
2
2
  import type { Chunk, VectorStore } from "../../core/types";
3
- import { sql, type SQL } from "drizzle-orm";
3
+ import { eq, like, sql, type SQL } from "drizzle-orm";
4
4
  import type { PgDatabase } from "drizzle-orm/pg-core";
5
5
 
6
6
  type DrizzleDb = PgDatabase<any, any, any>;
@@ -44,6 +44,10 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
44
44
  const head = chunkItems[0]!;
45
45
  const documentRow = toDocumentRow(head);
46
46
 
47
+ // Replace-by-sourceId: delete any previously stored document(s) for this logical id.
48
+ // Cascades to chunks and embeddings.
49
+ await tx.delete(documents).where(eq(documents.sourceId, head.sourceId));
50
+
47
51
  await tx
48
52
  .insert(documents)
49
53
  .values(documentRow)
@@ -140,6 +144,17 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
140
144
  score: Number(row.score),
141
145
  }));
142
146
  },
147
+
148
+ delete: async (input) => {
149
+ if ("sourceId" in input) {
150
+ await db.delete(documents).where(eq(documents.sourceId, input.sourceId));
151
+ return;
152
+ }
153
+
154
+ await db
155
+ .delete(documents)
156
+ .where(like(documents.sourceId, input.sourceIdPrefix + "%"));
157
+ },
143
158
  });
144
159
 
145
160
 
@@ -21,6 +21,10 @@ export const createPrismaVectorStore = (prisma: PrismaClient): VectorStore => ({
21
21
  const documentMetadata = sanitizeMetadata(head.metadata);
22
22
 
23
23
  await prisma.$transaction(async (tx: { $executeRaw: (query: unknown) => Promise<unknown> }) => {
24
+ // Replace-by-sourceId: delete any previously stored document(s) for this logical id.
25
+ // Cascade removes chunks and embeddings.
26
+ await tx.$executeRaw(sql`delete from documents where source_id = ${head.sourceId}`);
27
+
24
28
  await tx.$executeRaw(
25
29
  sql`
26
30
  insert into documents (id, source_id, content, metadata)
@@ -128,6 +132,19 @@ export const createPrismaVectorStore = (prisma: PrismaClient): VectorStore => ({
128
132
  score: Number(row.score),
129
133
  }));
130
134
  },
135
+
136
+ delete: async (input) => {
137
+ if ("sourceId" in input) {
138
+ await prisma.$executeRaw(
139
+ sql`delete from documents where source_id = ${input.sourceId}`
140
+ );
141
+ return;
142
+ }
143
+
144
+ await prisma.$executeRaw(
145
+ sql`delete from documents where source_id like ${input.sourceIdPrefix + "%"}`
146
+ );
147
+ },
131
148
  });
132
149
 
133
150
 
@@ -42,6 +42,12 @@ export const createRawSqlVectorStore = (pool: Pool): VectorStore => ({
42
42
  const head = chunkItems[0]!;
43
43
  const documentMetadata = sanitizeMetadata(head.metadata);
44
44
 
45
+ // Replace-by-sourceId: delete any previously stored document(s) for this logical id.
46
+ // Cascades to chunks and embeddings.
47
+ await client.query(`delete from documents where source_id = $1`, [
48
+ head.sourceId,
49
+ ]);
50
+
45
51
  await client.query(
46
52
  `
47
53
  insert into documents (id, source_id, content, metadata)
@@ -149,6 +155,21 @@ export const createRawSqlVectorStore = (pool: Pool): VectorStore => ({
149
155
  score: Number(row.score),
150
156
  }));
151
157
  },
158
+
159
+ delete: async (input) => {
160
+ await withTx(pool, async (client) => {
161
+ if ("sourceId" in input) {
162
+ await client.query(`delete from documents where source_id = $1`, [
163
+ input.sourceId,
164
+ ]);
165
+ return;
166
+ }
167
+
168
+ await client.query(`delete from documents where source_id like $1`, [
169
+ input.sourceIdPrefix + "%",
170
+ ]);
171
+ });
172
+ },
152
173
  });
153
174
 
154
175