unrag 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "bin": {
5
5
  "unrag": "./dist/cli/index.js"
6
6
  },
7
- "version": "0.1.1",
7
+ "version": "0.2.0",
8
8
  "private": false,
9
9
  "license": "Apache-2.0",
10
10
  "devDependencies": {
@@ -1,8 +1,10 @@
1
+ import { deleteDocuments } from "./delete";
1
2
  import { ingest } from "./ingest";
2
3
  import { retrieve } from "./retrieve";
3
4
  import { defineConfig, resolveConfig } from "./config";
4
5
  import type {
5
6
  ContextEngineConfig,
7
+ DeleteInput,
6
8
  IngestInput,
7
9
  IngestResult,
8
10
  ResolvedContextEngineConfig,
@@ -24,6 +26,10 @@ export class ContextEngine {
24
26
  async retrieve(input: RetrieveInput): Promise<RetrieveResult> {
25
27
  return retrieve(this.config, input);
26
28
  }
29
+
30
+ async delete(input: DeleteInput): Promise<void> {
31
+ return deleteDocuments(this.config, input);
32
+ }
27
33
  }
28
34
 
29
35
  export const createContextEngine = (config: ContextEngineConfig) =>
@@ -0,0 +1,19 @@
1
+ import type { DeleteInput, ResolvedContextEngineConfig } from "./types";
2
+
3
+ export const deleteDocuments = async (
4
+ config: ResolvedContextEngineConfig,
5
+ input: DeleteInput
6
+ ): Promise<void> => {
7
+ const hasSourceId = "sourceId" in input && typeof input.sourceId === "string";
8
+ const hasPrefix =
9
+ "sourceIdPrefix" in input && typeof input.sourceIdPrefix === "string";
10
+
11
+ if (hasSourceId === hasPrefix) {
12
+ // Both true or both false.
13
+ throw new Error('Provide exactly one of "sourceId" or "sourceIdPrefix".');
14
+ }
15
+
16
+ await config.store.delete(input);
17
+ };
18
+
19
+
@@ -1,4 +1,5 @@
1
1
  export { ContextEngine, createContextEngine, defineConfig } from "./context-engine";
2
+ export { deleteDocuments } from "./delete";
2
3
  export { ingest } from "./ingest";
3
4
  export { retrieve } from "./retrieve";
4
5
  export { defaultChunker, resolveChunkingOptions } from "./chunking";
@@ -44,7 +44,29 @@ export type EmbeddingProvider = {
44
44
  embed: (input: EmbeddingInput) => Promise<number[]>;
45
45
  };
46
46
 
47
+ export type DeleteInput =
48
+ | {
49
+ /** Delete a single logical document by exact `sourceId`. */
50
+ sourceId: string;
51
+ sourceIdPrefix?: never;
52
+ }
53
+ | {
54
+ /**
55
+ * Delete all logical documents whose `sourceId` starts with the prefix.
56
+ * This matches Unrag's prefix scoping behavior in retrieval.
57
+ */
58
+ sourceId?: never;
59
+ sourceIdPrefix: string;
60
+ };
61
+
47
62
  export type VectorStore = {
63
+ /**
64
+ * Persist (replace) a single document's chunks.
65
+ *
66
+ * The store treats `chunks[0].sourceId` as the logical identifier for the document.
67
+ * Calling `upsert()` multiple times with the same `sourceId` replaces the previously
68
+ * stored content for that document (including when the chunk count changes).
69
+ */
48
70
  upsert: (chunks: Chunk[]) => Promise<void>;
49
71
  query: (params: {
50
72
  embedding: number[];
@@ -53,6 +75,7 @@ export type VectorStore = {
53
75
  sourceId?: string;
54
76
  };
55
77
  }) => Promise<Array<Chunk & { score: number }>>;
78
+ delete: (input: DeleteInput) => Promise<void>;
56
79
  };
57
80
 
58
81
  export type IngestInput = {
@@ -1,6 +1,6 @@
1
1
  import { documents, chunks, embeddings } from "./schema";
2
2
  import type { Chunk, VectorStore } from "../../core/types";
3
- import { sql, type SQL } from "drizzle-orm";
3
+ import { eq, like, sql, type SQL } from "drizzle-orm";
4
4
  import type { PgDatabase } from "drizzle-orm/pg-core";
5
5
 
6
6
  type DrizzleDb = PgDatabase<any, any, any>;
@@ -44,6 +44,10 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
44
44
  const head = chunkItems[0]!;
45
45
  const documentRow = toDocumentRow(head);
46
46
 
47
+ // Replace-by-sourceId: delete any previously stored document(s) for this logical id.
48
+ // Cascades to chunks and embeddings.
49
+ await tx.delete(documents).where(eq(documents.sourceId, head.sourceId));
50
+
47
51
  await tx
48
52
  .insert(documents)
49
53
  .values(documentRow)
@@ -140,6 +144,17 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
140
144
  score: Number(row.score),
141
145
  }));
142
146
  },
147
+
148
+ delete: async (input) => {
149
+ if ("sourceId" in input) {
150
+ await db.delete(documents).where(eq(documents.sourceId, input.sourceId));
151
+ return;
152
+ }
153
+
154
+ await db
155
+ .delete(documents)
156
+ .where(like(documents.sourceId, input.sourceIdPrefix + "%"));
157
+ },
143
158
  });
144
159
 
145
160
 
@@ -21,6 +21,10 @@ export const createPrismaVectorStore = (prisma: PrismaClient): VectorStore => ({
21
21
  const documentMetadata = sanitizeMetadata(head.metadata);
22
22
 
23
23
  await prisma.$transaction(async (tx: { $executeRaw: (query: unknown) => Promise<unknown> }) => {
24
+ // Replace-by-sourceId: delete any previously stored document(s) for this logical id.
25
+ // Cascade removes chunks and embeddings.
26
+ await tx.$executeRaw(sql`delete from documents where source_id = ${head.sourceId}`);
27
+
24
28
  await tx.$executeRaw(
25
29
  sql`
26
30
  insert into documents (id, source_id, content, metadata)
@@ -128,6 +132,19 @@ export const createPrismaVectorStore = (prisma: PrismaClient): VectorStore => ({
128
132
  score: Number(row.score),
129
133
  }));
130
134
  },
135
+
136
+ delete: async (input) => {
137
+ if ("sourceId" in input) {
138
+ await prisma.$executeRaw(
139
+ sql`delete from documents where source_id = ${input.sourceId}`
140
+ );
141
+ return;
142
+ }
143
+
144
+ await prisma.$executeRaw(
145
+ sql`delete from documents where source_id like ${input.sourceIdPrefix + "%"}`
146
+ );
147
+ },
131
148
  });
132
149
 
133
150
 
@@ -42,6 +42,12 @@ export const createRawSqlVectorStore = (pool: Pool): VectorStore => ({
42
42
  const head = chunkItems[0]!;
43
43
  const documentMetadata = sanitizeMetadata(head.metadata);
44
44
 
45
+ // Replace-by-sourceId: delete any previously stored document(s) for this logical id.
46
+ // Cascades to chunks and embeddings.
47
+ await client.query(`delete from documents where source_id = $1`, [
48
+ head.sourceId,
49
+ ]);
50
+
45
51
  await client.query(
46
52
  `
47
53
  insert into documents (id, source_id, content, metadata)
@@ -149,6 +155,21 @@ export const createRawSqlVectorStore = (pool: Pool): VectorStore => ({
149
155
  score: Number(row.score),
150
156
  }));
151
157
  },
158
+
159
+ delete: async (input) => {
160
+ await withTx(pool, async (client) => {
161
+ if ("sourceId" in input) {
162
+ await client.query(`delete from documents where source_id = $1`, [
163
+ input.sourceId,
164
+ ]);
165
+ return;
166
+ }
167
+
168
+ await client.query(`delete from documents where source_id like $1`, [
169
+ input.sourceIdPrefix + "%",
170
+ ]);
171
+ });
172
+ },
152
173
  });
153
174
 
154
175