@halo-sdk/rag 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,31 @@
1
+ # @halo-sdk/rag
2
+
3
+ Cache-aware retrieval-augmented generation for Halo AI SDK.
4
+
5
+ Most RAG implementations re-retrieve every turn and splice fresh documents into the prompt — which silently busts the provider's prefix cache on every message. `@halo-sdk/rag` produces a reusable cache segment with two cache-preserving policies:
6
+
7
+ - **Sticky retrieval** — near-duplicate consecutive queries skip re-retrieval entirely; the segment (and the cache after it) is untouched.
8
+ - **Append-only growth** — when a new result set extends the previous one, new docs are appended rather than rebuilding, so the already-cached prefix stays valid.
9
+
10
+ ## Usage
11
+
12
+ ```ts
13
+ import { Halo } from "@halo-sdk/core";
14
+ import { CacheAwareRag, VectorRetriever, MemoryVectorStore } from "@halo-sdk/rag";
15
+
16
+ const store = new MemoryVectorStore();
17
+ // store.add(docs, await embedder.embed(docs.map(d => d.text)))
18
+ const retriever = new VectorRetriever(myEmbedder, store);
19
+
20
+ const rag = new CacheAwareRag({ retriever, k: 4 });
21
+ await rag.update("what is prompt caching?");
22
+
23
+ const agent = halo.agent({
24
+ /* ... */
25
+ });
26
+ agent.setContextSegments([rag.segment]);
27
+ ```
28
+
29
+ ## Seams
30
+
31
+ `Embedder`, `VectorStore`, and `Retriever` are interfaces — bring your own embedding model and vector DB. `MemoryVectorStore` (cosine) and `VectorRetriever` are included for tests, demos, and small corpora. Halo owns the cache-aware orchestration, not the embedding/storage.
package/dist/index.cjs ADDED
@@ -0,0 +1,131 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/index.ts
21
+ var index_exports = {};
22
+ __export(index_exports, {
23
+ CacheAwareRag: () => CacheAwareRag,
24
+ MemoryVectorStore: () => MemoryVectorStore,
25
+ VectorRetriever: () => VectorRetriever,
26
+ cosineSimilarity: () => cosineSimilarity,
27
+ jaccardSimilarity: () => jaccardSimilarity
28
+ });
29
+ module.exports = __toCommonJS(index_exports);
30
+ var import_core = require("@halo-sdk/core");
31
+ function cosineSimilarity(a, b) {
32
+ let dot = 0;
33
+ let na = 0;
34
+ let nb = 0;
35
+ for (let i = 0; i < a.length; i++) {
36
+ dot += a[i] * b[i];
37
+ na += a[i] * a[i];
38
+ nb += b[i] * b[i];
39
+ }
40
+ const denom = Math.sqrt(na) * Math.sqrt(nb);
41
+ return denom === 0 ? 0 : dot / denom;
42
+ }
43
+ var MemoryVectorStore = class {
44
+ _items = [];
45
+ add(docs, embeddings) {
46
+ for (let i = 0; i < docs.length; i++) {
47
+ this._items.push({ doc: docs[i], embedding: embeddings[i] });
48
+ }
49
+ }
50
+ query(embedding, k) {
51
+ return this._items.map(({ doc, embedding: e }) => ({ ...doc, score: cosineSimilarity(embedding, e) })).sort((a, b) => b.score - a.score).slice(0, k);
52
+ }
53
+ };
54
+ var VectorRetriever = class {
55
+ constructor(_embedder, _store) {
56
+ this._embedder = _embedder;
57
+ this._store = _store;
58
+ }
59
+ _embedder;
60
+ _store;
61
+ async retrieve(query, k) {
62
+ const [queryEmbedding] = await this._embedder.embed([query]);
63
+ return [...await this._store.query(queryEmbedding, k)];
64
+ }
65
+ };
66
+ var CacheAwareRag = class {
67
+ segment;
68
+ _retriever;
69
+ _k;
70
+ _stickyThreshold;
71
+ _formatDoc;
72
+ _similarity;
73
+ _lastQuery = null;
74
+ _docIds = [];
75
+ constructor(opts) {
76
+ this._retriever = opts.retriever;
77
+ this._k = opts.k ?? 4;
78
+ this._stickyThreshold = opts.stickyThreshold ?? 0.85;
79
+ this._formatDoc = opts.formatDoc ?? ((d) => `[${d.id}] ${d.text}`);
80
+ this._similarity = opts.similarity ?? jaccardSimilarity;
81
+ this.segment = new import_core.StableContext({ id: opts.segmentId ?? "rag", kind: "rag", messages: [] });
82
+ }
83
+ /** Document ids currently in the segment, in order. */
84
+ get documentIds() {
85
+ return this._docIds;
86
+ }
87
+ /**
88
+ * Refresh the segment for `query`. Returns whether a retrieval actually ran
89
+ * (`false` = sticky reuse) and whether the cache was preserved (append-only or
90
+ * skip) vs. busted (rebuild).
91
+ */
92
+ async update(query) {
93
+ if (this._lastQuery !== null && this._docIds.length > 0 && this._similarity(query, this._lastQuery) >= this._stickyThreshold) {
94
+ return { retrieved: false, cachePreserved: true };
95
+ }
96
+ this._lastQuery = query;
97
+ const docs = await this._retriever.retrieve(query, this._k);
98
+ const newIds = docs.map((d) => d.id);
99
+ const isPrefix = this._docIds.length > 0 && this._docIds.length <= newIds.length && this._docIds.every((id, i) => id === newIds[i]);
100
+ if (isPrefix) {
101
+ const extra = docs.slice(this._docIds.length);
102
+ this.segment.append(extra.map((d) => this._toMessage(d)));
103
+ this._docIds = newIds;
104
+ return { retrieved: true, cachePreserved: true };
105
+ }
106
+ this.segment.setMessages(docs.map((d) => this._toMessage(d)));
107
+ this._docIds = newIds;
108
+ return { retrieved: true, cachePreserved: this._docIds.length === 0 };
109
+ }
110
+ _toMessage(doc) {
111
+ return { role: "user", content: this._formatDoc(doc), discardable: false };
112
+ }
113
+ };
114
+ function jaccardSimilarity(a, b) {
115
+ const ta = new Set(a.toLowerCase().match(/\w+/g) ?? []);
116
+ const tb = new Set(b.toLowerCase().match(/\w+/g) ?? []);
117
+ if (ta.size === 0 && tb.size === 0) return 1;
118
+ let inter = 0;
119
+ for (const t of ta) if (tb.has(t)) inter++;
120
+ const union = ta.size + tb.size - inter;
121
+ return union === 0 ? 0 : inter / union;
122
+ }
123
+ // Annotate the CommonJS export names for ESM import in node:
124
+ 0 && (module.exports = {
125
+ CacheAwareRag,
126
+ MemoryVectorStore,
127
+ VectorRetriever,
128
+ cosineSimilarity,
129
+ jaccardSimilarity
130
+ });
131
+ //# sourceMappingURL=index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import { StableContext, type CacheSegment, type ChatMessage } from \"@halo-sdk/core\";\n\n/** A retrievable document. */\nexport interface RagDocument {\n id: string;\n text: string;\n metadata?: Record<string, unknown>;\n}\n\n/** A document with a relevance score (higher = more relevant). */\nexport interface ScoredDocument extends RagDocument {\n score: number;\n}\n\n// ── Seams (bring your own implementation) ──\n\n/** Turns text into vectors. Seam — wrap any embedding model. */\nexport interface Embedder {\n embed(texts: string[]): Promise<number[][]>;\n}\n\n/** Stores + searches document vectors. Seam — wrap any vector DB. */\nexport interface VectorStore {\n add(docs: RagDocument[], embeddings: number[][]): Promise<void> | void;\n query(embedding: number[], k: number): Promise<ScoredDocument[]> | ScoredDocument[];\n}\n\n/** Produces relevant documents for a query. Seam — the top-level RAG interface. */\nexport interface Retriever {\n retrieve(query: string, k: number): Promise<RagDocument[]>;\n}\n\n// ── In-memory defaults (dependency-free) ──\n\n/** Cosine similarity of two equal-length vectors. */\nexport function cosineSimilarity(a: number[], b: number[]): number {\n let dot = 0;\n let na = 0;\n let nb = 0;\n for (let i = 0; i < a.length; i++) {\n dot += a[i]! * b[i]!;\n na += a[i]! * a[i]!;\n nb += b[i]! * b[i]!;\n }\n const denom = Math.sqrt(na) * Math.sqrt(nb);\n return denom === 0 ? 0 : dot / denom;\n}\n\n/** A simple in-memory cosine vector store. Good for tests, demos, small corpora. */\nexport class MemoryVectorStore implements VectorStore {\n private _items: { doc: RagDocument; embedding: number[] }[] = [];\n\n add(docs: RagDocument[], embeddings: number[][]): void {\n for (let i = 0; i < docs.length; i++) {\n this._items.push({ doc: docs[i]!, embedding: embeddings[i]! });\n }\n }\n\n query(embedding: number[], k: number): ScoredDocument[] {\n return this._items\n .map(({ doc, embedding: e }) => ({ ...doc, score: cosineSimilarity(embedding, e) }))\n .sort((a, b) => b.score - a.score)\n .slice(0, k);\n }\n}\n\n/** Build a {@link Retriever} from an {@link Embedder} + {@link VectorStore}. */\nexport class VectorRetriever implements Retriever {\n constructor(\n private readonly _embedder: Embedder,\n private readonly _store: VectorStore,\n ) {}\n\n async retrieve(query: string, k: number): Promise<RagDocument[]> {\n const [queryEmbedding] = await this._embedder.embed([query]);\n return [...(await this._store.query(queryEmbedding!, k))];\n }\n}\n\n// ── Cache-aware orchestration (what Halo owns) ──\n\nexport interface CacheAwareRagOptions {\n retriever: Retriever;\n /** Docs to retrieve per query. Default 4. */\n k?: number;\n /**\n * Sticky-retrieval threshold. If the new query's lexical similarity to the\n * last one is ≥ this, retrieval is skipped and the cached segment is reused.\n * Default 0.85. Set to 1 to always re-retrieve.\n */\n stickyThreshold?: number;\n /** Segment id (for cache miss attribution). Default \"rag\". */\n segmentId?: string;\n /** Format a document into a context message. Default: `[doc id] text`. */\n formatDoc?: (doc: RagDocument) => string;\n /** Override the lexical similarity measure (default token Jaccard). */\n similarity?: (a: string, b: string) => number;\n}\n\n/**\n * Cache-aware RAG: turns a {@link Retriever} into a reusable {@link CacheSegment}\n * with two cache-preserving policies:\n *\n * - **sticky retrieval** — when consecutive queries are near-duplicates, skip\n * re-retrieval entirely so the segment (and the prefix cache after it) is\n * untouched, and\n * - **append-only growth** — when a new result set extends the previous one,\n * append the new docs instead of rebuilding, keeping the already-cached prefix\n * valid; only a genuinely different result set rebuilds (and busts) the block.\n *\n * Attach `rag.segment` via `agent.setContextSegments([rag.segment])`.\n */\nexport class CacheAwareRag {\n readonly segment: StableContext;\n\n private readonly _retriever: Retriever;\n private readonly _k: number;\n private readonly _stickyThreshold: number;\n private readonly _formatDoc: (doc: RagDocument) => string;\n private readonly _similarity: (a: string, b: string) => number;\n\n private _lastQuery: string | null = null;\n private _docIds: string[] = [];\n\n constructor(opts: CacheAwareRagOptions) {\n this._retriever = opts.retriever;\n this._k = opts.k ?? 4;\n this._stickyThreshold = opts.stickyThreshold ?? 0.85;\n this._formatDoc = opts.formatDoc ?? ((d) => `[${d.id}] ${d.text}`);\n this._similarity = opts.similarity ?? jaccardSimilarity;\n this.segment = new StableContext({ id: opts.segmentId ?? \"rag\", kind: \"rag\", messages: [] });\n }\n\n /** Document ids currently in the segment, in order. */\n get documentIds(): readonly string[] {\n return this._docIds;\n }\n\n /**\n * Refresh the segment for `query`. Returns whether a retrieval actually ran\n * (`false` = sticky reuse) and whether the cache was preserved (append-only or\n * skip) vs. busted (rebuild).\n */\n async update(query: string): Promise<{ retrieved: boolean; cachePreserved: boolean }> {\n if (\n this._lastQuery !== null &&\n this._docIds.length > 0 &&\n this._similarity(query, this._lastQuery) >= this._stickyThreshold\n ) {\n return { retrieved: false, cachePreserved: true }; // sticky — segment untouched\n }\n\n this._lastQuery = query;\n const docs = await this._retriever.retrieve(query, this._k);\n const newIds = docs.map((d) => d.id);\n\n // Append-only when the previous ids are an ordered prefix of the new ids.\n const isPrefix =\n this._docIds.length > 0 &&\n this._docIds.length <= newIds.length &&\n this._docIds.every((id, i) => id === newIds[i]);\n\n if (isPrefix) {\n const extra = docs.slice(this._docIds.length);\n this.segment.append(extra.map((d) => this._toMessage(d)));\n this._docIds = newIds;\n return { retrieved: true, cachePreserved: true };\n }\n\n this.segment.setMessages(docs.map((d) => this._toMessage(d)));\n this._docIds = newIds;\n return { retrieved: true, cachePreserved: this._docIds.length === 0 };\n }\n\n private _toMessage(doc: RagDocument): ChatMessage {\n return { role: \"user\", content: this._formatDoc(doc), discardable: false };\n }\n}\n\n/** Token-set Jaccard similarity (lowercased word tokens). */\nexport function jaccardSimilarity(a: string, b: string): number {\n const ta = new Set(a.toLowerCase().match(/\\w+/g) ?? []);\n const tb = new Set(b.toLowerCase().match(/\\w+/g) ?? []);\n if (ta.size === 0 && tb.size === 0) return 1;\n let inter = 0;\n for (const t of ta) if (tb.has(t)) inter++;\n const union = ta.size + tb.size - inter;\n return union === 0 ? 0 : inter / union;\n}\n\nexport type { CacheSegment };\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,kBAAmE;AAmC5D,SAAS,iBAAiB,GAAa,GAAqB;AACjE,MAAI,MAAM;AACV,MAAI,KAAK;AACT,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,WAAO,EAAE,CAAC,IAAK,EAAE,CAAC;AAClB,UAAM,EAAE,CAAC,IAAK,EAAE,CAAC;AACjB,UAAM,EAAE,CAAC,IAAK,EAAE,CAAC;AAAA,EACnB;AACA,QAAM,QAAQ,KAAK,KAAK,EAAE,IAAI,KAAK,KAAK,EAAE;AAC1C,SAAO,UAAU,IAAI,IAAI,MAAM;AACjC;AAGO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,SAAsD,CAAC;AAAA,EAE/D,IAAI,MAAqB,YAA8B;AACrD,aAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,WAAK,OAAO,KAAK,EAAE,KAAK,KAAK,CAAC,GAAI,WAAW,WAAW,CAAC,EAAG,CAAC;AAAA,IAC/D;AAAA,EACF;AAAA,EAEA,MAAM,WAAqB,GAA6B;AACtD,WAAO,KAAK,OACT,IAAI,CAAC,EAAE,KAAK,WAAW,EAAE,OAAO,EAAE,GAAG,KAAK,OAAO,iBAAiB,WAAW,CAAC,EAAE,EAAE,EAClF,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,MAAM,GAAG,CAAC;AAAA,EACf;AACF;AAGO,IAAM,kBAAN,MAA2C;AAAA,EAChD,YACmB,WACA,QACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAGnB,MAAM,SAAS,OAAe,GAAmC;AAC/D,UAAM,CAAC,cAAc,IAAI,MAAM,KAAK,UAAU,MAAM,CAAC,KAAK,CAAC;AAC3D,WAAO,CAAC,GAAI,MAAM,KAAK,OAAO,MAAM,gBAAiB,CAAC,CAAE;AAAA,EAC1D;AACF;AAmCO,IAAM,gBAAN,MAAoB;AAAA,EAChB;AAAA,EAEQ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAET,aAA4B;AAAA,EAC5B,UAAoB,CAAC;AAAA,EAE7B,YAAY,MAA4B;AACtC,SAAK,aAAa,KAAK;AACvB,SAAK,KAAK,KAAK,KAAK;AACpB,SAAK,mBAAmB,KAAK,mBAAmB;AAChD,SAAK,aAAa,KAAK,cAAc,CAAC,MAAM,IAAI,EAAE,EAAE,KAAK,EAAE,IAAI;AAC/D,SAAK,cAAc,KAAK,cAAc;AACtC,SAAK,UAAU,IAAI,0BAAc,EAAE,IAAI,KAAK,aAAa,OAAO,MAAM,OAAO,UAAU,CAAC,EAAE,CAAC;AAAA,EAC7F;AAAA;AAAA,EAGA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAO,OAAyE;AACpF,QACE,KAAK,eAAe,QACpB,KAAK,QAAQ,SAAS,KACtB,KAAK,YAAY,OAAO,KAAK,UAAU,KAAK,KAAK,kBACjD;AACA,aAAO,EAAE,WAAW,OAAO,gBAAgB,KAAK;AAAA,IAClD;AAEA,SAAK,aAAa;AAClB,UAAM,OAAO,MAAM,KAAK,WAAW,SAAS,OAAO,KAAK,EAAE;AAC1D,UAAM,SAAS,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE;AAGnC,UAAM,WACJ,KAAK,QAAQ,SAAS,KACtB,KAAK,QAAQ,UAAU,OAAO,UAC9B,KAAK,QAAQ,MAAM,CAAC,IAAI,MAAM,OAAO,OAAO,CAAC,CAAC;AAEhD,QAAI,UAAU;AACZ,YAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ,MAAM;AAC5C,WAAK,QAAQ,OAAO,MAAM,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC;AACxD,WAAK,UAAU;AACf,aAAO,EAAE,WAAW,MAAM,gBAAgB,KAAK;AAAA,IACjD;AAEA,SAAK,QAAQ,YAAY,KAAK,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC;AAC5D,SAAK,UAAU;AACf,WAAO,EAAE,WAAW,MAAM,gBAAgB,KAAK,QAAQ,WAAW,EAAE;AAAA,EACtE;AAAA,EAEQ,WAAW,KAA+B;AAChD,WAAO,EAAE,MAAM,QAAQ,SAAS,KAAK,WAAW,GAAG,GAAG,aAAa,MAAM;AAAA,EAC3E;AACF;AAGO,SAAS,kBAAkB,GAAW,GAAmB;AAC9D,QAAM,KAAK,IAAI,IAAI,EAAE,YAAY,EAAE,MAAM,MAAM,KAAK,CAAC,CAAC;AACtD,QAAM,KAAK,IAAI,IAAI,EAAE,YAAY,EAAE,MAAM,MAAM,KAAK,CAAC,CAAC;AACtD,MAAI,GAAG,SAAS,KAAK,GAAG,SAAS,EAAG,QAAO;AAC3C,MAAI,QAAQ;AACZ,aAAW,KAAK,GAAI,KAAI,GAAG,IAAI,CAAC,EAAG;AACnC,QAAM,QAAQ,GAAG,OAAO,GAAG,OAAO;AAClC,SAAO,UAAU,IAAI,IAAI,QAAQ;AACnC;","names":[]}
@@ -0,0 +1,96 @@
1
+ import { StableContext, type CacheSegment } from "@halo-sdk/core";
2
+ /** A retrievable document. */
3
+ export interface RagDocument {
4
+ id: string;
5
+ text: string;
6
+ metadata?: Record<string, unknown>;
7
+ }
8
+ /** A document with a relevance score (higher = more relevant). */
9
+ export interface ScoredDocument extends RagDocument {
10
+ score: number;
11
+ }
12
+ /** Turns text into vectors. Seam — wrap any embedding model. */
13
+ export interface Embedder {
14
+ embed(texts: string[]): Promise<number[][]>;
15
+ }
16
+ /** Stores + searches document vectors. Seam — wrap any vector DB. */
17
+ export interface VectorStore {
18
+ add(docs: RagDocument[], embeddings: number[][]): Promise<void> | void;
19
+ query(embedding: number[], k: number): Promise<ScoredDocument[]> | ScoredDocument[];
20
+ }
21
+ /** Produces relevant documents for a query. Seam — the top-level RAG interface. */
22
+ export interface Retriever {
23
+ retrieve(query: string, k: number): Promise<RagDocument[]>;
24
+ }
25
+ /** Cosine similarity of two equal-length vectors. */
26
+ export declare function cosineSimilarity(a: number[], b: number[]): number;
27
+ /** A simple in-memory cosine vector store. Good for tests, demos, small corpora. */
28
+ export declare class MemoryVectorStore implements VectorStore {
29
+ private _items;
30
+ add(docs: RagDocument[], embeddings: number[][]): void;
31
+ query(embedding: number[], k: number): ScoredDocument[];
32
+ }
33
+ /** Build a {@link Retriever} from an {@link Embedder} + {@link VectorStore}. */
34
+ export declare class VectorRetriever implements Retriever {
35
+ private readonly _embedder;
36
+ private readonly _store;
37
+ constructor(_embedder: Embedder, _store: VectorStore);
38
+ retrieve(query: string, k: number): Promise<RagDocument[]>;
39
+ }
40
+ export interface CacheAwareRagOptions {
41
+ retriever: Retriever;
42
+ /** Docs to retrieve per query. Default 4. */
43
+ k?: number;
44
+ /**
45
+ * Sticky-retrieval threshold. If the new query's lexical similarity to the
46
+ * last one is ≥ this, retrieval is skipped and the cached segment is reused.
47
+ * Default 0.85. Set to 1 to always re-retrieve.
48
+ */
49
+ stickyThreshold?: number;
50
+ /** Segment id (for cache miss attribution). Default "rag". */
51
+ segmentId?: string;
52
+ /** Format a document into a context message. Default: `[doc id] text`. */
53
+ formatDoc?: (doc: RagDocument) => string;
54
+ /** Override the lexical similarity measure (default token Jaccard). */
55
+ similarity?: (a: string, b: string) => number;
56
+ }
57
+ /**
58
+ * Cache-aware RAG: turns a {@link Retriever} into a reusable {@link CacheSegment}
59
+ * with two cache-preserving policies:
60
+ *
61
+ * - **sticky retrieval** — when consecutive queries are near-duplicates, skip
62
+ * re-retrieval entirely so the segment (and the prefix cache after it) is
63
+ * untouched, and
64
+ * - **append-only growth** — when a new result set extends the previous one,
65
+ * append the new docs instead of rebuilding, keeping the already-cached prefix
66
+ * valid; only a genuinely different result set rebuilds (and busts) the block.
67
+ *
68
+ * Attach `rag.segment` via `agent.setContextSegments([rag.segment])`.
69
+ */
70
+ export declare class CacheAwareRag {
71
+ readonly segment: StableContext;
72
+ private readonly _retriever;
73
+ private readonly _k;
74
+ private readonly _stickyThreshold;
75
+ private readonly _formatDoc;
76
+ private readonly _similarity;
77
+ private _lastQuery;
78
+ private _docIds;
79
+ constructor(opts: CacheAwareRagOptions);
80
+ /** Document ids currently in the segment, in order. */
81
+ get documentIds(): readonly string[];
82
+ /**
83
+ * Refresh the segment for `query`. Returns whether a retrieval actually ran
84
+ * (`false` = sticky reuse) and whether the cache was preserved (append-only or
85
+ * skip) vs. busted (rebuild).
86
+ */
87
+ update(query: string): Promise<{
88
+ retrieved: boolean;
89
+ cachePreserved: boolean;
90
+ }>;
91
+ private _toMessage;
92
+ }
93
+ /** Token-set Jaccard similarity (lowercased word tokens). */
94
+ export declare function jaccardSimilarity(a: string, b: string): number;
95
+ export type { CacheSegment };
96
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,KAAK,YAAY,EAAoB,MAAM,gBAAgB,CAAC;AAEpF,8BAA8B;AAC9B,MAAM,WAAW,WAAW;IAC1B,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAED,kEAAkE;AAClE,MAAM,WAAW,cAAe,SAAQ,WAAW;IACjD,KAAK,EAAE,MAAM,CAAC;CACf;AAID,gEAAgE;AAChE,MAAM,WAAW,QAAQ;IACvB,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;CAC7C;AAED,qEAAqE;AACrE,MAAM,WAAW,WAAW;IAC1B,GAAG,CAAC,IAAI,EAAE,WAAW,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IACvE,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,GAAG,cAAc,EAAE,CAAC;CACrF;AAED,mFAAmF;AACnF,MAAM,WAAW,SAAS;IACxB,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC;CAC5D;AAID,qDAAqD;AACrD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAWjE;AAED,oFAAoF;AACpF,qBAAa,iBAAkB,YAAW,WAAW;IACnD,OAAO,CAAC,MAAM,CAAmD;IAEjE,GAAG,CAAC,IAAI,EAAE,WAAW,EAAE,EAAE,UAAU,EAAE,MAAM,EAAE,EAAE,GAAG,IAAI;IAMtD,KAAK,CAAC,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,GAAG,cAAc,EAAE;CAMxD;AAED,gFAAgF;AAChF,qBAAa,eAAgB,YAAW,SAAS;IAE7C,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,MAAM;gBADN,SAAS,EAAE,QAAQ,EACnB,MAAM,EAAE,WAAW;IAGhC,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;CAIjE;AAID,MAAM,WAAW,oBAAoB;IACnC,SAAS,EAAE,SAAS,CAAC;IACrB,6CAA6C;IAC7C,CAAC,CAAC,EAAE,MAAM,CAAC;IACX;;;;OAIG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB,8DAA8D;IAC9D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,0EAA0E;IAC1E,SAAS,CAAC,EAAE,CAAC,GAAG,EAAE,WAAW,KAAK,MAAM,CAAC;IACzC,uEAAuE;IACvE,UAAU,CAAC,EAAE,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,KAAK,MAAM,CAAC;CAC/C;AAED;;;;;;;;;;;;GAYG;AACH,qBAAa,aAAa;IACxB,QAAQ,CAAC,OAAO,EAAE,aAAa,CAAC;IAEhC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAY;IACvC,OAAO,CAAC,QAAQ,CAAC,EAAE,CAAS;IAC5B,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAC1C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA+B;IAC1D,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAmC;IAE/D,OAAO,CAAC,UAAU,CAAuB;IACzC,OAAO,CAAC,OAAO,CAAgB;gBAEnB,IAAI,EAAE,oBAAoB;IAStC,uDAAuD;IACvD,IAAI,WAAW,IAAI,SAAS,MAAM,EAAE,CAEnC;IAED;;;;OAIG;IACG,MAAM,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC;QAAE,SAAS,EAAE,OAAO,CAAC;QAAC,cAAc,EAAE,OAAO,CAAA;KAAE,CAAC;IA+BrF,OAAO,CAAC,UAAU;CAGnB;AAED,6DAA6D;AAC7D,wBAAgB,iBAAiB,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,MAAM,GAAG,MAAM,CAQ9D;AAED,YAAY,EAAE,YAAY,EAAE,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1,102 @@
1
+ // src/index.ts
2
+ import { StableContext } from "@halo-sdk/core";
3
+ function cosineSimilarity(a, b) {
4
+ let dot = 0;
5
+ let na = 0;
6
+ let nb = 0;
7
+ for (let i = 0; i < a.length; i++) {
8
+ dot += a[i] * b[i];
9
+ na += a[i] * a[i];
10
+ nb += b[i] * b[i];
11
+ }
12
+ const denom = Math.sqrt(na) * Math.sqrt(nb);
13
+ return denom === 0 ? 0 : dot / denom;
14
+ }
15
+ var MemoryVectorStore = class {
16
+ _items = [];
17
+ add(docs, embeddings) {
18
+ for (let i = 0; i < docs.length; i++) {
19
+ this._items.push({ doc: docs[i], embedding: embeddings[i] });
20
+ }
21
+ }
22
+ query(embedding, k) {
23
+ return this._items.map(({ doc, embedding: e }) => ({ ...doc, score: cosineSimilarity(embedding, e) })).sort((a, b) => b.score - a.score).slice(0, k);
24
+ }
25
+ };
26
+ var VectorRetriever = class {
27
+ constructor(_embedder, _store) {
28
+ this._embedder = _embedder;
29
+ this._store = _store;
30
+ }
31
+ _embedder;
32
+ _store;
33
+ async retrieve(query, k) {
34
+ const [queryEmbedding] = await this._embedder.embed([query]);
35
+ return [...await this._store.query(queryEmbedding, k)];
36
+ }
37
+ };
38
+ var CacheAwareRag = class {
39
+ segment;
40
+ _retriever;
41
+ _k;
42
+ _stickyThreshold;
43
+ _formatDoc;
44
+ _similarity;
45
+ _lastQuery = null;
46
+ _docIds = [];
47
+ constructor(opts) {
48
+ this._retriever = opts.retriever;
49
+ this._k = opts.k ?? 4;
50
+ this._stickyThreshold = opts.stickyThreshold ?? 0.85;
51
+ this._formatDoc = opts.formatDoc ?? ((d) => `[${d.id}] ${d.text}`);
52
+ this._similarity = opts.similarity ?? jaccardSimilarity;
53
+ this.segment = new StableContext({ id: opts.segmentId ?? "rag", kind: "rag", messages: [] });
54
+ }
55
+ /** Document ids currently in the segment, in order. */
56
+ get documentIds() {
57
+ return this._docIds;
58
+ }
59
+ /**
60
+ * Refresh the segment for `query`. Returns whether a retrieval actually ran
61
+ * (`false` = sticky reuse) and whether the cache was preserved (append-only or
62
+ * skip) vs. busted (rebuild).
63
+ */
64
+ async update(query) {
65
+ if (this._lastQuery !== null && this._docIds.length > 0 && this._similarity(query, this._lastQuery) >= this._stickyThreshold) {
66
+ return { retrieved: false, cachePreserved: true };
67
+ }
68
+ this._lastQuery = query;
69
+ const docs = await this._retriever.retrieve(query, this._k);
70
+ const newIds = docs.map((d) => d.id);
71
+ const isPrefix = this._docIds.length > 0 && this._docIds.length <= newIds.length && this._docIds.every((id, i) => id === newIds[i]);
72
+ if (isPrefix) {
73
+ const extra = docs.slice(this._docIds.length);
74
+ this.segment.append(extra.map((d) => this._toMessage(d)));
75
+ this._docIds = newIds;
76
+ return { retrieved: true, cachePreserved: true };
77
+ }
78
+ this.segment.setMessages(docs.map((d) => this._toMessage(d)));
79
+ this._docIds = newIds;
80
+ return { retrieved: true, cachePreserved: this._docIds.length === 0 };
81
+ }
82
+ _toMessage(doc) {
83
+ return { role: "user", content: this._formatDoc(doc), discardable: false };
84
+ }
85
+ };
86
+ function jaccardSimilarity(a, b) {
87
+ const ta = new Set(a.toLowerCase().match(/\w+/g) ?? []);
88
+ const tb = new Set(b.toLowerCase().match(/\w+/g) ?? []);
89
+ if (ta.size === 0 && tb.size === 0) return 1;
90
+ let inter = 0;
91
+ for (const t of ta) if (tb.has(t)) inter++;
92
+ const union = ta.size + tb.size - inter;
93
+ return union === 0 ? 0 : inter / union;
94
+ }
95
+ export {
96
+ CacheAwareRag,
97
+ MemoryVectorStore,
98
+ VectorRetriever,
99
+ cosineSimilarity,
100
+ jaccardSimilarity
101
+ };
102
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import { StableContext, type CacheSegment, type ChatMessage } from \"@halo-sdk/core\";\n\n/** A retrievable document. */\nexport interface RagDocument {\n id: string;\n text: string;\n metadata?: Record<string, unknown>;\n}\n\n/** A document with a relevance score (higher = more relevant). */\nexport interface ScoredDocument extends RagDocument {\n score: number;\n}\n\n// ── Seams (bring your own implementation) ──\n\n/** Turns text into vectors. Seam — wrap any embedding model. */\nexport interface Embedder {\n embed(texts: string[]): Promise<number[][]>;\n}\n\n/** Stores + searches document vectors. Seam — wrap any vector DB. */\nexport interface VectorStore {\n add(docs: RagDocument[], embeddings: number[][]): Promise<void> | void;\n query(embedding: number[], k: number): Promise<ScoredDocument[]> | ScoredDocument[];\n}\n\n/** Produces relevant documents for a query. Seam — the top-level RAG interface. */\nexport interface Retriever {\n retrieve(query: string, k: number): Promise<RagDocument[]>;\n}\n\n// ── In-memory defaults (dependency-free) ──\n\n/** Cosine similarity of two equal-length vectors. */\nexport function cosineSimilarity(a: number[], b: number[]): number {\n let dot = 0;\n let na = 0;\n let nb = 0;\n for (let i = 0; i < a.length; i++) {\n dot += a[i]! * b[i]!;\n na += a[i]! * a[i]!;\n nb += b[i]! * b[i]!;\n }\n const denom = Math.sqrt(na) * Math.sqrt(nb);\n return denom === 0 ? 0 : dot / denom;\n}\n\n/** A simple in-memory cosine vector store. Good for tests, demos, small corpora. */\nexport class MemoryVectorStore implements VectorStore {\n private _items: { doc: RagDocument; embedding: number[] }[] = [];\n\n add(docs: RagDocument[], embeddings: number[][]): void {\n for (let i = 0; i < docs.length; i++) {\n this._items.push({ doc: docs[i]!, embedding: embeddings[i]! });\n }\n }\n\n query(embedding: number[], k: number): ScoredDocument[] {\n return this._items\n .map(({ doc, embedding: e }) => ({ ...doc, score: cosineSimilarity(embedding, e) }))\n .sort((a, b) => b.score - a.score)\n .slice(0, k);\n }\n}\n\n/** Build a {@link Retriever} from an {@link Embedder} + {@link VectorStore}. */\nexport class VectorRetriever implements Retriever {\n constructor(\n private readonly _embedder: Embedder,\n private readonly _store: VectorStore,\n ) {}\n\n async retrieve(query: string, k: number): Promise<RagDocument[]> {\n const [queryEmbedding] = await this._embedder.embed([query]);\n return [...(await this._store.query(queryEmbedding!, k))];\n }\n}\n\n// ── Cache-aware orchestration (what Halo owns) ──\n\nexport interface CacheAwareRagOptions {\n retriever: Retriever;\n /** Docs to retrieve per query. Default 4. */\n k?: number;\n /**\n * Sticky-retrieval threshold. If the new query's lexical similarity to the\n * last one is ≥ this, retrieval is skipped and the cached segment is reused.\n * Default 0.85. Set to 1 to always re-retrieve.\n */\n stickyThreshold?: number;\n /** Segment id (for cache miss attribution). Default \"rag\". */\n segmentId?: string;\n /** Format a document into a context message. Default: `[doc id] text`. */\n formatDoc?: (doc: RagDocument) => string;\n /** Override the lexical similarity measure (default token Jaccard). */\n similarity?: (a: string, b: string) => number;\n}\n\n/**\n * Cache-aware RAG: turns a {@link Retriever} into a reusable {@link CacheSegment}\n * with two cache-preserving policies:\n *\n * - **sticky retrieval** — when consecutive queries are near-duplicates, skip\n * re-retrieval entirely so the segment (and the prefix cache after it) is\n * untouched, and\n * - **append-only growth** — when a new result set extends the previous one,\n * append the new docs instead of rebuilding, keeping the already-cached prefix\n * valid; only a genuinely different result set rebuilds (and busts) the block.\n *\n * Attach `rag.segment` via `agent.setContextSegments([rag.segment])`.\n */\nexport class CacheAwareRag {\n readonly segment: StableContext;\n\n private readonly _retriever: Retriever;\n private readonly _k: number;\n private readonly _stickyThreshold: number;\n private readonly _formatDoc: (doc: RagDocument) => string;\n private readonly _similarity: (a: string, b: string) => number;\n\n private _lastQuery: string | null = null;\n private _docIds: string[] = [];\n\n constructor(opts: CacheAwareRagOptions) {\n this._retriever = opts.retriever;\n this._k = opts.k ?? 4;\n this._stickyThreshold = opts.stickyThreshold ?? 0.85;\n this._formatDoc = opts.formatDoc ?? ((d) => `[${d.id}] ${d.text}`);\n this._similarity = opts.similarity ?? jaccardSimilarity;\n this.segment = new StableContext({ id: opts.segmentId ?? \"rag\", kind: \"rag\", messages: [] });\n }\n\n /** Document ids currently in the segment, in order. */\n get documentIds(): readonly string[] {\n return this._docIds;\n }\n\n /**\n * Refresh the segment for `query`. Returns whether a retrieval actually ran\n * (`false` = sticky reuse) and whether the cache was preserved (append-only or\n * skip) vs. busted (rebuild).\n */\n async update(query: string): Promise<{ retrieved: boolean; cachePreserved: boolean }> {\n if (\n this._lastQuery !== null &&\n this._docIds.length > 0 &&\n this._similarity(query, this._lastQuery) >= this._stickyThreshold\n ) {\n return { retrieved: false, cachePreserved: true }; // sticky — segment untouched\n }\n\n this._lastQuery = query;\n const docs = await this._retriever.retrieve(query, this._k);\n const newIds = docs.map((d) => d.id);\n\n // Append-only when the previous ids are an ordered prefix of the new ids.\n const isPrefix =\n this._docIds.length > 0 &&\n this._docIds.length <= newIds.length &&\n this._docIds.every((id, i) => id === newIds[i]);\n\n if (isPrefix) {\n const extra = docs.slice(this._docIds.length);\n this.segment.append(extra.map((d) => this._toMessage(d)));\n this._docIds = newIds;\n return { retrieved: true, cachePreserved: true };\n }\n\n this.segment.setMessages(docs.map((d) => this._toMessage(d)));\n this._docIds = newIds;\n return { retrieved: true, cachePreserved: this._docIds.length === 0 };\n }\n\n private _toMessage(doc: RagDocument): ChatMessage {\n return { role: \"user\", content: this._formatDoc(doc), discardable: false };\n }\n}\n\n/** Token-set Jaccard similarity (lowercased word tokens). */\nexport function jaccardSimilarity(a: string, b: string): number {\n const ta = new Set(a.toLowerCase().match(/\\w+/g) ?? []);\n const tb = new Set(b.toLowerCase().match(/\\w+/g) ?? []);\n if (ta.size === 0 && tb.size === 0) return 1;\n let inter = 0;\n for (const t of ta) if (tb.has(t)) inter++;\n const union = ta.size + tb.size - inter;\n return union === 0 ? 0 : inter / union;\n}\n\nexport type { CacheSegment };\n"],"mappings":";AAAA,SAAS,qBAA0D;AAmC5D,SAAS,iBAAiB,GAAa,GAAqB;AACjE,MAAI,MAAM;AACV,MAAI,KAAK;AACT,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,WAAO,EAAE,CAAC,IAAK,EAAE,CAAC;AAClB,UAAM,EAAE,CAAC,IAAK,EAAE,CAAC;AACjB,UAAM,EAAE,CAAC,IAAK,EAAE,CAAC;AAAA,EACnB;AACA,QAAM,QAAQ,KAAK,KAAK,EAAE,IAAI,KAAK,KAAK,EAAE;AAC1C,SAAO,UAAU,IAAI,IAAI,MAAM;AACjC;AAGO,IAAM,oBAAN,MAA+C;AAAA,EAC5C,SAAsD,CAAC;AAAA,EAE/D,IAAI,MAAqB,YAA8B;AACrD,aAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AACpC,WAAK,OAAO,KAAK,EAAE,KAAK,KAAK,CAAC,GAAI,WAAW,WAAW,CAAC,EAAG,CAAC;AAAA,IAC/D;AAAA,EACF;AAAA,EAEA,MAAM,WAAqB,GAA6B;AACtD,WAAO,KAAK,OACT,IAAI,CAAC,EAAE,KAAK,WAAW,EAAE,OAAO,EAAE,GAAG,KAAK,OAAO,iBAAiB,WAAW,CAAC,EAAE,EAAE,EAClF,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,MAAM,GAAG,CAAC;AAAA,EACf;AACF;AAGO,IAAM,kBAAN,MAA2C;AAAA,EAChD,YACmB,WACA,QACjB;AAFiB;AACA;AAAA,EAChB;AAAA,EAFgB;AAAA,EACA;AAAA,EAGnB,MAAM,SAAS,OAAe,GAAmC;AAC/D,UAAM,CAAC,cAAc,IAAI,MAAM,KAAK,UAAU,MAAM,CAAC,KAAK,CAAC;AAC3D,WAAO,CAAC,GAAI,MAAM,KAAK,OAAO,MAAM,gBAAiB,CAAC,CAAE;AAAA,EAC1D;AACF;AAmCO,IAAM,gBAAN,MAAoB;AAAA,EAChB;AAAA,EAEQ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAET,aAA4B;AAAA,EAC5B,UAAoB,CAAC;AAAA,EAE7B,YAAY,MAA4B;AACtC,SAAK,aAAa,KAAK;AACvB,SAAK,KAAK,KAAK,KAAK;AACpB,SAAK,mBAAmB,KAAK,mBAAmB;AAChD,SAAK,aAAa,KAAK,cAAc,CAAC,MAAM,IAAI,EAAE,EAAE,KAAK,EAAE,IAAI;AAC/D,SAAK,cAAc,KAAK,cAAc;AACtC,SAAK,UAAU,IAAI,cAAc,EAAE,IAAI,KAAK,aAAa,OAAO,MAAM,OAAO,UAAU,CAAC,EAAE,CAAC;AAAA,EAC7F;AAAA;AAAA,EAGA,IAAI,cAAiC;AACnC,WAAO,KAAK;AAAA,EACd;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAM,OAAO,OAAyE;AACpF,QACE,KAAK,eAAe,QACpB,KAAK,QAAQ,SAAS,KACtB,KAAK,YAAY,OAAO,KAAK,UAAU,KAAK,KAAK,kBACjD;AACA,aAAO,EAAE,WAAW,OAAO,gBAAgB,KAAK;AAAA,IAClD;AAEA,SAAK,aAAa;AAClB,UAAM,OAAO,MAAM,KAAK,WAAW,SAAS,OAAO,KAAK,EAAE;AAC1D,UAAM,SAAS,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE;AAGnC,UAAM,WACJ,KAAK,QAAQ,SAAS,KACtB,KAAK,QAAQ,UAAU,OAAO,UAC9B,KAAK,QAAQ,MAAM,CAAC,IAAI,MAAM,OAAO,OAAO,CAAC,CAAC;AAEhD,QAAI,UAAU;AACZ,YAAM,QAAQ,KAAK,MAAM,KAAK,QAAQ,MAAM;AAC5C,WAAK,QAAQ,OAAO,MAAM,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC;AACxD,WAAK,UAAU;AACf,aAAO,EAAE,WAAW,MAAM,gBAAgB,KAAK;AAAA,IACjD;AAEA,SAAK,QAAQ,YAAY,KAAK,IAAI,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,CAAC;AAC5D,SAAK,UAAU;AACf,WAAO,EAAE,WAAW,MAAM,gBAAgB,KAAK,QAAQ,WAAW,EAAE;AAAA,EACtE;AAAA,EAEQ,WAAW,KAA+B;AAChD,WAAO,EAAE,MAAM,QAAQ,SAAS,KAAK,WAAW,GAAG,GAAG,aAAa,MAAM;AAAA,EAC3E;AACF;AAGO,SAAS,kBAAkB,GAAW,GAAmB;AAC9D,QAAM,KAAK,IAAI,IAAI,EAAE,YAAY,EAAE,MAAM,MAAM,KAAK,CAAC,CAAC;AACtD,QAAM,KAAK,IAAI,IAAI,EAAE,YAAY,EAAE,MAAM,MAAM,KAAK,CAAC,CAAC;AACtD,MAAI,GAAG,SAAS,KAAK,GAAG,SAAS,EAAG,QAAO;AAC3C,MAAI,QAAQ;AACZ,aAAW,KAAK,GAAI,KAAI,GAAG,IAAI,CAAC,EAAG;AACnC,QAAM,QAAQ,GAAG,OAAO,GAAG,OAAO;AAClC,SAAO,UAAU,IAAI,IAAI,QAAQ;AACnC;","names":[]}
package/package.json ADDED
@@ -0,0 +1,51 @@
1
+ {
2
+ "name": "@halo-sdk/rag",
3
+ "version": "1.0.0",
4
+ "description": "Cache-aware retrieval-augmented generation for Halo AI SDK — sticky retrieval + append-only growth that keeps the prefix cache warm",
5
+ "keywords": [
6
+ "ai",
7
+ "embeddings",
8
+ "llm",
9
+ "prefix-cache",
10
+ "rag",
11
+ "retrieval"
12
+ ],
13
+ "license": "MIT",
14
+ "repository": {
15
+ "type": "git",
16
+ "url": "https://github.com/halo-sdk/halo-ai",
17
+ "directory": "packages/rag"
18
+ },
19
+ "files": [
20
+ "dist"
21
+ ],
22
+ "type": "module",
23
+ "main": "./dist/index.js",
24
+ "types": "./dist/index.d.ts",
25
+ "exports": {
26
+ ".": {
27
+ "types": "./dist/index.d.ts",
28
+ "import": "./dist/index.js",
29
+ "require": "./dist/index.cjs"
30
+ }
31
+ },
32
+ "publishConfig": {
33
+ "access": "public"
34
+ },
35
+ "devDependencies": {
36
+ "typescript": "^5.8.0",
37
+ "vitest": "^3.0.0",
38
+ "@halo-sdk/core": "1.1.0"
39
+ },
40
+ "peerDependencies": {
41
+ "@halo-sdk/core": ">=1.1.0"
42
+ },
43
+ "scripts": {
44
+ "build": "tsc --build --emitDeclarationOnly && tsup",
45
+ "dev": "tsup --watch",
46
+ "clean": "del-cli dist *.tsbuildinfo",
47
+ "publint": "publint",
48
+ "test": "vitest run",
49
+ "test:watch": "vitest"
50
+ }
51
+ }