@chatman-media/kb 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +169 -0
- package/dist/ab-router.d.ts +66 -0
- package/dist/ab-router.d.ts.map +1 -0
- package/dist/answer-types.d.ts +194 -0
- package/dist/answer-types.d.ts.map +1 -0
- package/dist/answer.d.ts +59 -0
- package/dist/answer.d.ts.map +1 -0
- package/dist/built-in-tools/calendly.d.ts +19 -0
- package/dist/built-in-tools/calendly.d.ts.map +1 -0
- package/dist/chunk.d.ts +48 -0
- package/dist/chunk.d.ts.map +1 -0
- package/dist/conversation-store.d.ts +76 -0
- package/dist/conversation-store.d.ts.map +1 -0
- package/dist/eval.d.ts +64 -0
- package/dist/eval.d.ts.map +1 -0
- package/dist/extract-user-facts.d.ts +27 -0
- package/dist/extract-user-facts.d.ts.map +1 -0
- package/dist/fact-checker.d.ts +46 -0
- package/dist/fact-checker.d.ts.map +1 -0
- package/dist/grade-skills.d.ts +29 -0
- package/dist/grade-skills.d.ts.map +1 -0
- package/dist/index.d.ts +76 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +62655 -0
- package/dist/ingest.d.ts +49 -0
- package/dist/ingest.d.ts.map +1 -0
- package/dist/multi-query.d.ts +29 -0
- package/dist/multi-query.d.ts.map +1 -0
- package/dist/parse-pdf.d.ts +14 -0
- package/dist/parse-pdf.d.ts.map +1 -0
- package/dist/persona-shortcuts.d.ts +51 -0
- package/dist/persona-shortcuts.d.ts.map +1 -0
- package/dist/prompt.d.ts +9 -0
- package/dist/prompt.d.ts.map +1 -0
- package/dist/reflect.d.ts +29 -0
- package/dist/reflect.d.ts.map +1 -0
- package/dist/reranker.d.ts +71 -0
- package/dist/reranker.d.ts.map +1 -0
- package/dist/retrieval-utils.d.ts +94 -0
- package/dist/retrieval-utils.d.ts.map +1 -0
- package/dist/retry.d.ts +53 -0
- package/dist/retry.d.ts.map +1 -0
- package/dist/rewrite-query.d.ts +30 -0
- package/dist/rewrite-query.d.ts.map +1 -0
- package/dist/sanitize.d.ts +21 -0
- package/dist/sanitize.d.ts.map +1 -0
- package/dist/semantic-cache.d.ts +70 -0
- package/dist/semantic-cache.d.ts.map +1 -0
- package/dist/server.d.ts +77 -0
- package/dist/server.d.ts.map +1 -0
- package/dist/stores/memory-store.d.ts +72 -0
- package/dist/stores/memory-store.d.ts.map +1 -0
- package/dist/structured-output.d.ts +21 -0
- package/dist/structured-output.d.ts.map +1 -0
- package/dist/styles.d.ts +186 -0
- package/dist/styles.d.ts.map +1 -0
- package/dist/summarize-conversation.d.ts +31 -0
- package/dist/summarize-conversation.d.ts.map +1 -0
- package/dist/system-prompt.d.ts +11 -0
- package/dist/system-prompt.d.ts.map +1 -0
- package/dist/text-style-rules.d.ts +133 -0
- package/dist/text-style-rules.d.ts.map +1 -0
- package/dist/tool-loop.d.ts +44 -0
- package/dist/tool-loop.d.ts.map +1 -0
- package/dist/tools.d.ts +64 -0
- package/dist/tools.d.ts.map +1 -0
- package/dist/topic-classifier.d.ts +11 -0
- package/dist/topic-classifier.d.ts.map +1 -0
- package/dist/types.d.ts +83 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/utils.d.ts +19 -0
- package/dist/utils.d.ts.map +1 -0
- package/dist/vision.d.ts +72 -0
- package/dist/vision.d.ts.map +1 -0
- package/package.json +76 -0
- package/src/ab-router.ts +118 -0
- package/src/answer-types.ts +191 -0
- package/src/answer.ts +696 -0
- package/src/built-in-tools/calendly.ts +32 -0
- package/src/chunk.ts +198 -0
- package/src/conversation-store.ts +138 -0
- package/src/eval.ts +127 -0
- package/src/extract-user-facts.ts +120 -0
- package/src/fact-checker.ts +171 -0
- package/src/grade-skills.ts +79 -0
- package/src/index.ts +191 -0
- package/src/ingest.ts +193 -0
- package/src/multi-query.ts +89 -0
- package/src/parse-pdf.ts +24 -0
- package/src/persona-shortcuts.ts +255 -0
- package/src/prompt.ts +190 -0
- package/src/reflect.ts +99 -0
- package/src/reranker.ts +166 -0
- package/src/retrieval-utils.ts +209 -0
- package/src/retry.ts +139 -0
- package/src/rewrite-query.ts +124 -0
- package/src/sanitize.ts +44 -0
- package/src/semantic-cache.ts +154 -0
- package/src/server.ts +164 -0
- package/src/stores/memory-store.ts +249 -0
- package/src/structured-output.ts +47 -0
- package/src/styles.ts +138 -0
- package/src/summarize-conversation.ts +88 -0
- package/src/system-prompt.ts +118 -0
- package/src/text-style-rules.ts +244 -0
- package/src/tool-loop.ts +110 -0
- package/src/tools.ts +79 -0
- package/src/topic-classifier.ts +112 -0
- package/src/types.ts +91 -0
- package/src/utils.ts +81 -0
- package/src/vision.ts +265 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import type { AnswerResult, AnswerTelemetry } from "./answer-types.ts";
|
|
2
|
+
import type { EmbeddingClient } from "@chatman-media/llm-router";
|
|
3
|
+
|
|
4
|
+
export interface SemanticCacheOptions {
|
|
5
|
+
/**
|
|
6
|
+
* Cosine similarity threshold for a cache hit (0–1). Higher = stricter.
|
|
7
|
+
* Default: 0.92 — catches paraphrases and minor reformulations.
|
|
8
|
+
*/
|
|
9
|
+
threshold?: number;
|
|
10
|
+
/**
|
|
11
|
+
* Maximum number of entries to keep in memory. Oldest entries are evicted
|
|
12
|
+
* when the limit is reached. Default: 500.
|
|
13
|
+
*/
|
|
14
|
+
maxEntries?: number;
|
|
15
|
+
/**
|
|
16
|
+
* TTL in milliseconds. Entries older than this are ignored. Default: 1 hour.
|
|
17
|
+
* Set to `Infinity` to disable expiry.
|
|
18
|
+
*/
|
|
19
|
+
ttlMs?: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface CacheEntry {
|
|
23
|
+
embedding: number[];
|
|
24
|
+
result: AnswerResult;
|
|
25
|
+
createdAt: number;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* In-memory semantic cache for `answerWithRag` results.
|
|
30
|
+
*
|
|
31
|
+
* Stores (question embedding → AnswerResult) pairs. On lookup, computes
|
|
32
|
+
* cosine similarity between the incoming question embedding and all cached
|
|
33
|
+
* embeddings, returning the best match above `threshold`.
|
|
34
|
+
*
|
|
35
|
+
* Use `SemanticCache.wrap()` to get a drop-in cached version of any async
|
|
36
|
+
* function that accepts an `EmbeddingClient` and a question string.
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* ```ts
|
|
40
|
+
* import { SemanticCache, answerWithRag } from "@chatman-media/kb";
|
|
41
|
+
*
|
|
42
|
+
* const cache = new SemanticCache(embedder, { threshold: 0.93, ttlMs: 30 * 60_000 });
|
|
43
|
+
*
|
|
44
|
+
* const result = await cache.getOrSet(
|
|
45
|
+
* input.question,
|
|
46
|
+
* () => answerWithRag(input),
|
|
47
|
+
* );
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
export class SemanticCache {
|
|
51
|
+
private readonly embedder: EmbeddingClient;
|
|
52
|
+
private readonly threshold: number;
|
|
53
|
+
private readonly maxEntries: number;
|
|
54
|
+
private readonly ttlMs: number;
|
|
55
|
+
private entries: CacheEntry[] = [];
|
|
56
|
+
|
|
57
|
+
/** Total cache hits since creation. */
|
|
58
|
+
hits = 0;
|
|
59
|
+
/** Total cache misses since creation. */
|
|
60
|
+
misses = 0;
|
|
61
|
+
|
|
62
|
+
constructor(embedder: EmbeddingClient, opts: SemanticCacheOptions = {}) {
|
|
63
|
+
this.embedder = embedder;
|
|
64
|
+
this.threshold = opts.threshold ?? 0.92;
|
|
65
|
+
this.maxEntries = opts.maxEntries ?? 500;
|
|
66
|
+
this.ttlMs = opts.ttlMs ?? 60 * 60_000;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Look up `question` in the cache. If a similar question was cached and is
|
|
71
|
+
* still fresh, returns the cached `AnswerResult` with `telemetry.path`
|
|
72
|
+
* overridden to `"cache_hit"`. Otherwise calls `fn()`, stores the result,
|
|
73
|
+
* and returns it.
|
|
74
|
+
*/
|
|
75
|
+
async getOrSet(question: string, fn: () => Promise<AnswerResult>): Promise<AnswerResult> {
|
|
76
|
+
const [queryVec] = await this.embedder.embed([question]);
|
|
77
|
+
if (!queryVec) return fn();
|
|
78
|
+
|
|
79
|
+
this.evictExpired();
|
|
80
|
+
|
|
81
|
+
const hit = this.findBestMatch(queryVec);
|
|
82
|
+
if (hit) {
|
|
83
|
+
this.hits++;
|
|
84
|
+
return {
|
|
85
|
+
...hit.result,
|
|
86
|
+
telemetry: { ...hit.result.telemetry, path: "cache_hit" as AnswerTelemetry["path"] },
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
this.misses++;
|
|
91
|
+
const result = await fn();
|
|
92
|
+
this.store(queryVec, result);
|
|
93
|
+
return result;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/** Manually insert a question→result pair (e.g. from a warm-up script). */
|
|
97
|
+
async prime(question: string, result: AnswerResult): Promise<void> {
|
|
98
|
+
const [vec] = await this.embedder.embed([question]);
|
|
99
|
+
if (vec) this.store(vec, result);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Remove all entries. */
|
|
103
|
+
clear(): void {
|
|
104
|
+
this.entries = [];
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/** Number of live (non-expired) entries. */
|
|
108
|
+
get size(): number {
|
|
109
|
+
this.evictExpired();
|
|
110
|
+
return this.entries.length;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// ── Private ───────────────────────────────────────────────────────────────
|
|
114
|
+
|
|
115
|
+
private findBestMatch(queryVec: number[]): CacheEntry | null {
|
|
116
|
+
let bestSim = -1;
|
|
117
|
+
let bestEntry: CacheEntry | null = null;
|
|
118
|
+
for (const entry of this.entries) {
|
|
119
|
+
const sim = cosineSimilarity(queryVec, entry.embedding);
|
|
120
|
+
if (sim > bestSim) {
|
|
121
|
+
bestSim = sim;
|
|
122
|
+
bestEntry = entry;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
return bestSim >= this.threshold ? bestEntry : null;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
private store(embedding: number[], result: AnswerResult): void {
|
|
129
|
+
if (this.entries.length >= this.maxEntries) {
|
|
130
|
+
// Evict oldest entry
|
|
131
|
+
this.entries.shift();
|
|
132
|
+
}
|
|
133
|
+
this.entries.push({ embedding, result, createdAt: Date.now() });
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
private evictExpired(): void {
|
|
137
|
+
if (this.ttlMs === Infinity) return;
|
|
138
|
+
const cutoff = Date.now() - this.ttlMs;
|
|
139
|
+
this.entries = this.entries.filter((e) => e.createdAt >= cutoff);
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function cosineSimilarity(a: number[], b: number[]): number {
|
|
144
|
+
let dot = 0;
|
|
145
|
+
let normA = 0;
|
|
146
|
+
let normB = 0;
|
|
147
|
+
for (let i = 0; i < a.length; i++) {
|
|
148
|
+
dot += (a[i] ?? 0) * (b[i] ?? 0);
|
|
149
|
+
normA += (a[i] ?? 0) ** 2;
|
|
150
|
+
normB += (b[i] ?? 0) ** 2;
|
|
151
|
+
}
|
|
152
|
+
if (normA === 0 || normB === 0) return 0;
|
|
153
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
154
|
+
}
|
package/src/server.ts
ADDED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { answerWithRagStream } from "./answer.ts";
|
|
2
|
+
import type { AnswerInput, AnswerTelemetry } from "./answer-types.ts";
|
|
3
|
+
import type { ChatClient } from "@chatman-media/llm-router";
|
|
4
|
+
import type { EmbeddingClient } from "@chatman-media/llm-router";
|
|
5
|
+
import type { IKbStore } from "./types.ts";
|
|
6
|
+
|
|
7
|
+
export interface RagServerOptions {
|
|
8
|
+
/** Knowledge base store. */
|
|
9
|
+
kb: IKbStore;
|
|
10
|
+
/** LLM chat client. */
|
|
11
|
+
chat: ChatClient;
|
|
12
|
+
/** Embedding client. */
|
|
13
|
+
embedder: EmbeddingClient;
|
|
14
|
+
/** Port to listen on. Default: 3000. */
|
|
15
|
+
port?: number;
|
|
16
|
+
/** Hostname to bind to. Default: "0.0.0.0". */
|
|
17
|
+
hostname?: string;
|
|
18
|
+
/**
|
|
19
|
+
* Path that the server listens on. Default: "/chat".
|
|
20
|
+
* POST JSON `{ question, userId?, conversationId?, ... }` → SSE stream of tokens.
|
|
21
|
+
*/
|
|
22
|
+
path?: string;
|
|
23
|
+
/**
|
|
24
|
+
* Extra `AnswerInput` defaults merged into every request.
|
|
25
|
+
* Request body fields take precedence over these defaults.
|
|
26
|
+
*/
|
|
27
|
+
defaults?: Partial<Omit<AnswerInput, "question" | "kb" | "chat" | "embedder">>;
|
|
28
|
+
/**
|
|
29
|
+
* Called after every answered request with the final telemetry.
|
|
30
|
+
* Use this to log to your analytics backend.
|
|
31
|
+
*/
|
|
32
|
+
onTelemetry?: (telemetry: AnswerTelemetry) => void;
|
|
33
|
+
/**
|
|
34
|
+
* CORS origin header value. Set to `"*"` to allow all origins.
|
|
35
|
+
* Default: `undefined` (no CORS headers added).
|
|
36
|
+
*/
|
|
37
|
+
cors?: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/** Shape of the JSON request body accepted by the RAG server. */
|
|
41
|
+
export interface RagRequestBody {
|
|
42
|
+
question: string;
|
|
43
|
+
[key: string]: unknown;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Lightweight Bun HTTP server that exposes `answerWithRagStream` as an SSE endpoint.
|
|
48
|
+
*
|
|
49
|
+
* **Request** — `POST {path}` with `Content-Type: application/json`:
|
|
50
|
+
* ```json
|
|
51
|
+
* { "question": "What is the onboarding process?" }
|
|
52
|
+
* ```
|
|
53
|
+
* Any additional fields in the body are merged into `AnswerInput` (e.g. `style`,
|
|
54
|
+
* `history`, `conversationSummary`, `userFacts`).
|
|
55
|
+
*
|
|
56
|
+
* **Response** — `text/event-stream`:
|
|
57
|
+
* ```
|
|
58
|
+
* data: Hello
|
|
59
|
+
* data: world
|
|
60
|
+
* data: [DONE]
|
|
61
|
+
* ```
|
|
62
|
+
* Each `data:` line carries one streamed token. The stream ends with `data: [DONE]`.
|
|
63
|
+
* On error the server emits `data: [ERROR] <message>` and closes the stream.
|
|
64
|
+
*
|
|
65
|
+
* @example
|
|
66
|
+
* ```ts
|
|
67
|
+
* import { createRagServer } from "@chatman-media/kb";
|
|
68
|
+
*
|
|
69
|
+
* const server = createRagServer({
|
|
70
|
+
* kb, chat, embedder,
|
|
71
|
+
* port: 3000,
|
|
72
|
+
* cors: "*",
|
|
73
|
+
* onTelemetry: (t) => console.log("path:", t.path, "ms:", t.latencyMs),
|
|
74
|
+
* });
|
|
75
|
+
*
|
|
76
|
+
* console.log(`Listening on http://localhost:${server.port}`);
|
|
77
|
+
* // server.stop() to shut down
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
80
|
+
export function createRagServer(opts: RagServerOptions): ReturnType<typeof Bun.serve> {
|
|
81
|
+
const path = opts.path ?? "/chat";
|
|
82
|
+
const corsOrigin = opts.cors;
|
|
83
|
+
|
|
84
|
+
function corsHeaders(): Record<string, string> {
|
|
85
|
+
if (!corsOrigin) return {};
|
|
86
|
+
return {
|
|
87
|
+
"access-control-allow-origin": corsOrigin,
|
|
88
|
+
"access-control-allow-methods": "POST, OPTIONS",
|
|
89
|
+
"access-control-allow-headers": "content-type",
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return Bun.serve({
|
|
94
|
+
port: opts.port ?? 3000,
|
|
95
|
+
hostname: opts.hostname ?? "0.0.0.0",
|
|
96
|
+
|
|
97
|
+
async fetch(req) {
|
|
98
|
+
const url = new URL(req.url);
|
|
99
|
+
|
|
100
|
+
// CORS preflight
|
|
101
|
+
if (req.method === "OPTIONS") {
|
|
102
|
+
return new Response(null, { status: 204, headers: corsHeaders() });
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (req.method !== "POST" || url.pathname !== path) {
|
|
106
|
+
return new Response("Not Found", { status: 404 });
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
let body: RagRequestBody;
|
|
110
|
+
try {
|
|
111
|
+
body = (await req.json()) as RagRequestBody;
|
|
112
|
+
} catch {
|
|
113
|
+
return new Response("Invalid JSON", { status: 400 });
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (!body.question || typeof body.question !== "string") {
|
|
117
|
+
return new Response('Missing required field "question"', { status: 400 });
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const { question, ...rest } = body;
|
|
121
|
+
|
|
122
|
+
// Merge defaults → request body fields win
|
|
123
|
+
const answerInput: AnswerInput = {
|
|
124
|
+
...(opts.defaults ?? {}),
|
|
125
|
+
...(rest as Partial<AnswerInput>),
|
|
126
|
+
question,
|
|
127
|
+
kb: opts.kb,
|
|
128
|
+
chat: opts.chat,
|
|
129
|
+
embedder: opts.embedder,
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
const stream = new ReadableStream({
|
|
133
|
+
async start(controller) {
|
|
134
|
+
const enc = new TextEncoder();
|
|
135
|
+
|
|
136
|
+
function send(token: string) {
|
|
137
|
+
controller.enqueue(enc.encode(`data: ${token}\n\n`));
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
try {
|
|
141
|
+
for await (const token of answerWithRagStream(answerInput)) {
|
|
142
|
+
send(token);
|
|
143
|
+
}
|
|
144
|
+
send("[DONE]");
|
|
145
|
+
} catch (err) {
|
|
146
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
147
|
+
send(`[ERROR] ${msg}`);
|
|
148
|
+
} finally {
|
|
149
|
+
controller.close();
|
|
150
|
+
}
|
|
151
|
+
},
|
|
152
|
+
});
|
|
153
|
+
|
|
154
|
+
return new Response(stream, {
|
|
155
|
+
headers: {
|
|
156
|
+
"content-type": "text/event-stream",
|
|
157
|
+
"cache-control": "no-cache",
|
|
158
|
+
connection: "keep-alive",
|
|
159
|
+
...corsHeaders(),
|
|
160
|
+
},
|
|
161
|
+
});
|
|
162
|
+
},
|
|
163
|
+
});
|
|
164
|
+
}
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
import type { IKbStore, KbSearchHit } from "../types.ts";
|
|
2
|
+
import { reciprocalRankFusion } from "../utils.ts";
|
|
3
|
+
|
|
4
|
+
interface StoredDocument {
|
|
5
|
+
id: number;
|
|
6
|
+
source: string;
|
|
7
|
+
title: string;
|
|
8
|
+
contentHash: string;
|
|
9
|
+
topic: string | null;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
interface StoredChunk {
|
|
13
|
+
chunkId: number;
|
|
14
|
+
documentId: number;
|
|
15
|
+
chunkIndex: number;
|
|
16
|
+
text: string;
|
|
17
|
+
tokenCount: number;
|
|
18
|
+
embedding: number[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Zero-dependency in-memory `IKbStore` implementation.
|
|
23
|
+
*
|
|
24
|
+
* Suitable for:
|
|
25
|
+
* - Unit / integration tests (no database required)
|
|
26
|
+
* - Quick prototyping and examples
|
|
27
|
+
* - Demos and tutorials
|
|
28
|
+
*
|
|
29
|
+
* Limitations vs a real pgvector store:
|
|
30
|
+
* - O(n) linear scan for every search (no index) — fine up to ~50k chunks
|
|
31
|
+
* - BM25 is a minimal TF-IDF approximation, not a full BM25 implementation
|
|
32
|
+
* - Data is not persisted across process restarts
|
|
33
|
+
*
|
|
34
|
+
* @example
|
|
35
|
+
* ```ts
|
|
36
|
+
* import { InMemoryKbStore, ingestText, OllamaEmbeddingClient } from "@chatman-media/kb";
|
|
37
|
+
*
|
|
38
|
+
* const kb = new InMemoryKbStore();
|
|
39
|
+
* const embedder = new OllamaEmbeddingClient({ host: "http://localhost:11434", model: "nomic-embed-text", dim: 768 });
|
|
40
|
+
*
|
|
41
|
+
* await ingestText({ title: "FAQ", body: "..." }, { kb, embedder });
|
|
42
|
+
* const hits = await kb.search(await embedder.embed(["query"])[0], 5);
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
export class InMemoryKbStore implements IKbStore {
|
|
46
|
+
private docs: StoredDocument[] = [];
|
|
47
|
+
private chunks: StoredChunk[] = [];
|
|
48
|
+
private nextDocId = 1;
|
|
49
|
+
private nextChunkId = 1;
|
|
50
|
+
|
|
51
|
+
// ── Search ────────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
async search(embedding: number[], k: number, topic?: string | null): Promise<KbSearchHit[]> {
|
|
54
|
+
const pool = topic
|
|
55
|
+
? this.chunks.filter((c) => this.topicOf(c.documentId) === topic)
|
|
56
|
+
: this.chunks;
|
|
57
|
+
return pool
|
|
58
|
+
.map((c) => ({ chunk: c, distance: cosineDistance(embedding, c.embedding) }))
|
|
59
|
+
.sort((a, b) => a.distance - b.distance)
|
|
60
|
+
.slice(0, k)
|
|
61
|
+
.map(({ chunk, distance }) => this.toHit(chunk, distance));
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async hybridSearch(input: {
|
|
65
|
+
embedding: number[];
|
|
66
|
+
query: string;
|
|
67
|
+
k?: number;
|
|
68
|
+
topic?: string | null;
|
|
69
|
+
}): Promise<KbSearchHit[]> {
|
|
70
|
+
const k = input.k ?? 5;
|
|
71
|
+
const pool = input.topic
|
|
72
|
+
? this.chunks.filter((c) => this.topicOf(c.documentId) === input.topic)
|
|
73
|
+
: this.chunks;
|
|
74
|
+
|
|
75
|
+
const vec = pool
|
|
76
|
+
.map((c) => ({ chunk: c, distance: cosineDistance(input.embedding, c.embedding) }))
|
|
77
|
+
.sort((a, b) => a.distance - b.distance)
|
|
78
|
+
.slice(0, k * 2)
|
|
79
|
+
.map(({ chunk, distance }) => this.toHit(chunk, distance));
|
|
80
|
+
|
|
81
|
+
const bm25 = simpleBm25(input.query, pool)
|
|
82
|
+
.slice(0, k * 2)
|
|
83
|
+
.map(({ chunk, score }) => this.toHit(chunk, -score));
|
|
84
|
+
|
|
85
|
+
if (bm25.length === 0) return vec.slice(0, k);
|
|
86
|
+
return reciprocalRankFusion(vec, bm25, k);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
async prioritySearch(input: {
|
|
90
|
+
embedding: number[];
|
|
91
|
+
query: string;
|
|
92
|
+
k?: number;
|
|
93
|
+
vectorOnly?: boolean;
|
|
94
|
+
}): Promise<KbSearchHit[]> {
|
|
95
|
+
const k = input.k ?? 5;
|
|
96
|
+
const booksHits = input.vectorOnly
|
|
97
|
+
? await this.search(input.embedding, k, "books")
|
|
98
|
+
: await this.hybridSearch({ ...input, k, topic: "books" });
|
|
99
|
+
if (booksHits.length > 0) return booksHits;
|
|
100
|
+
return input.vectorOnly ? this.search(input.embedding, k) : this.hybridSearch({ ...input, k });
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// ── Ingest ────────────────────────────────────────────────────────────────
|
|
104
|
+
|
|
105
|
+
async getDocumentBySource(source: string): Promise<{ id: number; content_hash: string } | null> {
|
|
106
|
+
const doc = this.docs.find((d) => d.source === source);
|
|
107
|
+
return doc ? { id: doc.id, content_hash: doc.contentHash } : null;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async countChunksForDocument(documentId: number): Promise<number> {
|
|
111
|
+
return this.chunks.filter((c) => c.documentId === documentId).length;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async deleteDocument(id: number): Promise<boolean> {
|
|
115
|
+
const before = this.docs.length;
|
|
116
|
+
this.docs = this.docs.filter((d) => d.id !== id);
|
|
117
|
+
this.chunks = this.chunks.filter((c) => c.documentId !== id);
|
|
118
|
+
return this.docs.length < before;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async upsertDocument(input: {
|
|
122
|
+
source: string;
|
|
123
|
+
title: string;
|
|
124
|
+
contentHash: string;
|
|
125
|
+
topic?: string | null;
|
|
126
|
+
}): Promise<{ id: number }> {
|
|
127
|
+
const existing = this.docs.find((d) => d.source === input.source);
|
|
128
|
+
if (existing) {
|
|
129
|
+
existing.title = input.title;
|
|
130
|
+
existing.contentHash = input.contentHash;
|
|
131
|
+
existing.topic = input.topic ?? null;
|
|
132
|
+
return { id: existing.id };
|
|
133
|
+
}
|
|
134
|
+
const id = this.nextDocId++;
|
|
135
|
+
this.docs.push({
|
|
136
|
+
id,
|
|
137
|
+
source: input.source,
|
|
138
|
+
title: input.title,
|
|
139
|
+
contentHash: input.contentHash,
|
|
140
|
+
topic: input.topic ?? null,
|
|
141
|
+
});
|
|
142
|
+
return { id };
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
async insertChunkWithEmbedding(input: {
|
|
146
|
+
documentId: number;
|
|
147
|
+
chunkIndex: number;
|
|
148
|
+
text: string;
|
|
149
|
+
tokenCount: number;
|
|
150
|
+
embedding: number[];
|
|
151
|
+
}): Promise<void> {
|
|
152
|
+
this.chunks.push({
|
|
153
|
+
chunkId: this.nextChunkId++,
|
|
154
|
+
documentId: input.documentId,
|
|
155
|
+
chunkIndex: input.chunkIndex,
|
|
156
|
+
text: input.text,
|
|
157
|
+
tokenCount: input.tokenCount,
|
|
158
|
+
embedding: input.embedding,
|
|
159
|
+
});
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/** Total number of indexed chunks. Useful in tests. */
|
|
163
|
+
get chunkCount(): number {
|
|
164
|
+
return this.chunks.length;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/** Total number of indexed documents. Useful in tests. */
|
|
168
|
+
get documentCount(): number {
|
|
169
|
+
return this.docs.length;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// ── Private ───────────────────────────────────────────────────────────────
|
|
173
|
+
|
|
174
|
+
private topicOf(documentId: number): string | null {
|
|
175
|
+
return this.docs.find((d) => d.id === documentId)?.topic ?? null;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
private toHit(chunk: StoredChunk, distance: number): KbSearchHit {
|
|
179
|
+
const doc = this.docs.find((d) => d.id === chunk.documentId);
|
|
180
|
+
return {
|
|
181
|
+
chunk_id: chunk.chunkId,
|
|
182
|
+
distance,
|
|
183
|
+
text: chunk.text,
|
|
184
|
+
document_id: chunk.documentId,
|
|
185
|
+
source: doc?.source ?? "",
|
|
186
|
+
title: doc?.title ?? "",
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
// ── Helpers ───────────────────────────────────────────────────────────────────
|
|
192
|
+
|
|
193
|
+
function cosineDistance(a: number[], b: number[]): number {
|
|
194
|
+
let dot = 0;
|
|
195
|
+
let normA = 0;
|
|
196
|
+
let normB = 0;
|
|
197
|
+
for (let i = 0; i < a.length; i++) {
|
|
198
|
+
dot += (a[i] ?? 0) * (b[i] ?? 0);
|
|
199
|
+
normA += (a[i] ?? 0) ** 2;
|
|
200
|
+
normB += (b[i] ?? 0) ** 2;
|
|
201
|
+
}
|
|
202
|
+
if (normA === 0 || normB === 0) return 1;
|
|
203
|
+
return 1 - dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/** Minimal TF-IDF approximation — good enough for in-memory BM25 ranking. */
|
|
207
|
+
function simpleBm25(
|
|
208
|
+
query: string,
|
|
209
|
+
chunks: StoredChunk[],
|
|
210
|
+
): Array<{ chunk: StoredChunk; score: number }> {
|
|
211
|
+
const tokens = tokenize(query);
|
|
212
|
+
if (tokens.length === 0) return [];
|
|
213
|
+
|
|
214
|
+
const df = new Map<string, number>();
|
|
215
|
+
const N = chunks.length;
|
|
216
|
+
for (const chunk of chunks) {
|
|
217
|
+
const terms = new Set(tokenize(chunk.text));
|
|
218
|
+
for (const t of terms) df.set(t, (df.get(t) ?? 0) + 1);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return chunks
|
|
222
|
+
.map((chunk) => {
|
|
223
|
+
const words = tokenize(chunk.text);
|
|
224
|
+
const tf = new Map<string, number>();
|
|
225
|
+
for (const w of words) tf.set(w, (tf.get(w) ?? 0) + 1);
|
|
226
|
+
const dl = words.length;
|
|
227
|
+
const avgdl = 50;
|
|
228
|
+
const k1 = 1.5;
|
|
229
|
+
const b = 0.75;
|
|
230
|
+
|
|
231
|
+
let score = 0;
|
|
232
|
+
for (const t of tokens) {
|
|
233
|
+
const idf = Math.log((N - (df.get(t) ?? 0) + 0.5) / ((df.get(t) ?? 0) + 0.5) + 1);
|
|
234
|
+
const tfVal = tf.get(t) ?? 0;
|
|
235
|
+
score += idf * ((tfVal * (k1 + 1)) / (tfVal + k1 * (1 - b + b * (dl / avgdl))));
|
|
236
|
+
}
|
|
237
|
+
return { chunk, score };
|
|
238
|
+
})
|
|
239
|
+
.filter((r) => r.score > 0)
|
|
240
|
+
.sort((a, b) => b.score - a.score);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
function tokenize(text: string): string[] {
|
|
244
|
+
return text
|
|
245
|
+
.toLowerCase()
|
|
246
|
+
.replace(/[^\p{L}\p{N}]/gu, " ")
|
|
247
|
+
.split(/\s+/)
|
|
248
|
+
.filter((t) => t.length >= 2);
|
|
249
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Injects a JSON output instruction into the system prompt when native
|
|
5
|
+
* structured output is not available (Ollama, OpenRouter, etc.).
|
|
6
|
+
*/
|
|
7
|
+
export function injectJsonInstruction(
|
|
8
|
+
systemPrompt: string,
|
|
9
|
+
jsonSchema: Record<string, unknown>,
|
|
10
|
+
): string {
|
|
11
|
+
const schemaStr = JSON.stringify(jsonSchema, null, 2);
|
|
12
|
+
return `${systemPrompt}\n\nRespond with a single valid JSON object matching this schema. Do not include markdown, code fences, or any text outside the JSON.\nSchema:\n${schemaStr}`;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Parses and validates an LLM response against a Zod schema.
|
|
17
|
+
* Strips markdown code fences if present before parsing.
|
|
18
|
+
* Returns `{ success: true, data }` or `{ success: false, error }`.
|
|
19
|
+
*/
|
|
20
|
+
export function parseStructuredOutput<T extends z.ZodTypeAny>(
|
|
21
|
+
raw: string,
|
|
22
|
+
schema: T,
|
|
23
|
+
): { success: true; data: z.infer<T> } | { success: false; error: string } {
|
|
24
|
+
const cleaned = raw
|
|
25
|
+
.trim()
|
|
26
|
+
.replace(/^```(?:json)?\s*/i, "")
|
|
27
|
+
.replace(/\s*```$/, "")
|
|
28
|
+
.trim();
|
|
29
|
+
|
|
30
|
+
let parsed: unknown;
|
|
31
|
+
try {
|
|
32
|
+
parsed = JSON.parse(cleaned);
|
|
33
|
+
} catch {
|
|
34
|
+
return { success: false, error: `JSON parse failed: ${cleaned.slice(0, 200)}` };
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
const result = schema.safeParse(parsed);
|
|
38
|
+
if (!result.success) {
|
|
39
|
+
return { success: false, error: result.error.message };
|
|
40
|
+
}
|
|
41
|
+
return { success: true, data: result.data as z.infer<T> };
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Converts a Zod schema to a JSON Schema object for OpenAI's response_format. */
|
|
45
|
+
export function zodToJsonSchema(schema: z.ZodTypeAny): Record<string, unknown> {
|
|
46
|
+
return z.toJSONSchema(schema) as Record<string, unknown>;
|
|
47
|
+
}
|