@ceed/docs-mcp 1.0.0-next.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ export { DOCS_MCP_VERSION } from './version.js';
2
+ export declare const getDocsMcpVersion: () => string;
3
+ export { createDocsMcpServer, startDocsMcpServer } from './server.js';
package/dist/index.js ADDED
@@ -0,0 +1,5 @@
1
+ import { DOCS_MCP_VERSION } from './version.js';
2
+ export { DOCS_MCP_VERSION } from './version.js';
3
+ export const getDocsMcpVersion = () => DOCS_MCP_VERSION;
4
+ export { createDocsMcpServer, startDocsMcpServer } from './server.js';
5
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAEhD,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAEhD,MAAM,CAAC,MAAM,iBAAiB,GAAG,GAAG,EAAE,CAAC,gBAAgB,CAAC;AAExD,OAAO,EAAE,mBAAmB,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC","sourcesContent":["import { DOCS_MCP_VERSION } from './version.js';\n\nexport { DOCS_MCP_VERSION } from './version.js';\n\nexport const getDocsMcpVersion = () => DOCS_MCP_VERSION;\n\nexport { createDocsMcpServer, startDocsMcpServer } from './server.js';\n"]}
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -0,0 +1,278 @@
1
+ #!/usr/bin/env node
2
+ import { opendir, readFile, stat } from 'node:fs/promises';
3
+ import path from 'node:path';
4
+ import { pathToFileURL } from 'node:url';
5
+ import { pipeline } from '@xenova/transformers';
6
+ import { DEFAULT_EMBEDDING_BATCH_SIZE, DEFAULT_EMBEDDING_MODEL, INDEX_PATH, RAG_SOURCE_DIR } from './constants.js';
7
+ import { normalizeVectors, tensorToVectors } from './embeddings.js';
8
+ import { saveEmbeddingIndex } from './storage.js';
9
+ const MAX_CHUNK_CHARS = 2500;
10
+ const MIN_CHUNK_CHARS = 400;
11
+ const MIN_PERSISTED_CHARS = 64;
12
+ const SENTENCE_SEPARATOR = /(?<=[.!?。?!])\s+/u;
13
+ const ALLOWED_EXTENSIONS = new Set(['.md', '.txt']);
14
+ const toPosixPath = (input) => input.split(path.sep).join('/');
15
+ const collectFiles = async (dir) => {
16
+ const entries = await stat(dir).catch(() => {
17
+ throw new Error(`RAG source directory does not exist: ${dir}`);
18
+ });
19
+ if (!entries.isDirectory()) {
20
+ throw new Error(`RAG source path is not a directory: ${dir}`);
21
+ }
22
+ const files = [];
23
+ const walk = async (current) => {
24
+ const dirEntries = await opendir(current);
25
+ for await (const entry of dirEntries) {
26
+ const fullPath = path.join(current, entry.name);
27
+ if (entry.isDirectory()) {
28
+ await walk(fullPath);
29
+ continue;
30
+ }
31
+ if (!entry.isFile()) {
32
+ continue;
33
+ }
34
+ const extension = path.extname(entry.name).toLowerCase();
35
+ if (!ALLOWED_EXTENSIONS.has(extension)) {
36
+ continue;
37
+ }
38
+ files.push(fullPath);
39
+ }
40
+ };
41
+ await walk(dir);
42
+ return files.sort();
43
+ };
44
+ const normalizeText = (value) => value.replace(/\r\n/g, '\n').trim();
45
+ const splitByLength = (text) => {
46
+ const sanitized = text.trim();
47
+ if (!sanitized) {
48
+ return [];
49
+ }
50
+ if (sanitized.length <= MAX_CHUNK_CHARS) {
51
+ return [sanitized];
52
+ }
53
+ const segments = [];
54
+ for (let index = 0; index < sanitized.length; index += MAX_CHUNK_CHARS) {
55
+ const slice = sanitized.slice(index, index + MAX_CHUNK_CHARS).trim();
56
+ if (slice) {
57
+ segments.push(slice);
58
+ }
59
+ }
60
+ return segments;
61
+ };
62
+ const splitLongParagraph = (paragraph) => {
63
+ const sanitized = paragraph.trim();
64
+ if (!sanitized) {
65
+ return [];
66
+ }
67
+ if (sanitized.length <= MAX_CHUNK_CHARS) {
68
+ return [sanitized];
69
+ }
70
+ const sentences = sanitized.split(SENTENCE_SEPARATOR).filter(Boolean);
71
+ if (sentences.length <= 1) {
72
+ return splitByLength(sanitized);
73
+ }
74
+ const segments = [];
75
+ let current = '';
76
+ const pushCurrent = () => {
77
+ if (current.trim()) {
78
+ segments.push(current.trim());
79
+ current = '';
80
+ }
81
+ };
82
+ for (const sentence of sentences) {
83
+ const candidate = current ? `${current} ${sentence}` : sentence;
84
+ if (candidate.length > MAX_CHUNK_CHARS) {
85
+ pushCurrent();
86
+ if (sentence.length > MAX_CHUNK_CHARS) {
87
+ segments.push(...splitByLength(sentence));
88
+ continue;
89
+ }
90
+ current = sentence;
91
+ continue;
92
+ }
93
+ current = candidate;
94
+ }
95
+ pushCurrent();
96
+ return segments;
97
+ };
98
+ const mergeSmallSegments = (segments) => {
99
+ if (segments.length === 0) {
100
+ return [];
101
+ }
102
+ const merged = [];
103
+ for (const segment of segments) {
104
+ const trimmed = segment.trim();
105
+ if (!trimmed) {
106
+ continue;
107
+ }
108
+ const lastIndex = merged.length - 1;
109
+ if (lastIndex >= 0 &&
110
+ trimmed.length < MIN_CHUNK_CHARS &&
111
+ merged[lastIndex].length + 2 + trimmed.length <= MAX_CHUNK_CHARS) {
112
+ merged[lastIndex] = `${merged[lastIndex]}\n\n${trimmed}`;
113
+ continue;
114
+ }
115
+ merged.push(trimmed);
116
+ }
117
+ return merged;
118
+ };
119
+ const splitContentBlock = (content) => {
120
+ const paragraphs = content
121
+ .split(/\n{2,}/u)
122
+ .map((paragraph) => paragraph.trim())
123
+ .filter(Boolean);
124
+ if (paragraphs.length === 0) {
125
+ return splitByLength(content);
126
+ }
127
+ const segments = [];
128
+ let current = '';
129
+ const pushCurrent = () => {
130
+ if (current.trim()) {
131
+ segments.push(current.trim());
132
+ current = '';
133
+ }
134
+ };
135
+ for (const paragraph of paragraphs) {
136
+ const candidate = current ? `${current}\n\n${paragraph}` : paragraph;
137
+ if (candidate.length > MAX_CHUNK_CHARS) {
138
+ pushCurrent();
139
+ segments.push(...splitLongParagraph(paragraph));
140
+ continue;
141
+ }
142
+ current = candidate;
143
+ }
144
+ pushCurrent();
145
+ return mergeSmallSegments(segments);
146
+ };
147
+ const chunkDocument = (relativePath, content) => {
148
+ const normalized = normalizeText(content);
149
+ const lines = normalized.split('\n');
150
+ const rawChunks = [];
151
+ let currentLines = [];
152
+ let currentHeading;
153
+ const flush = () => {
154
+ const joined = currentLines.join('\n').trim();
155
+ currentLines = [];
156
+ if (!joined) {
157
+ return;
158
+ }
159
+ const segments = splitContentBlock(joined);
160
+ segments.forEach((segment, segmentIndex) => {
161
+ rawChunks.push({
162
+ heading: currentHeading,
163
+ content: segment,
164
+ includeHeading: segmentIndex === 0,
165
+ });
166
+ });
167
+ };
168
+ for (const line of lines) {
169
+ const headingMatch = line.match(/^(#{1,6})\s+(.*)$/u);
170
+ if (headingMatch) {
171
+ flush();
172
+ currentHeading = headingMatch[2].trim();
173
+ continue;
174
+ }
175
+ currentLines.push(line);
176
+ }
177
+ flush();
178
+ const chunks = [];
179
+ let chunkIndex = 0;
180
+ for (const rawChunk of rawChunks) {
181
+ const { heading, content: chunkContent, includeHeading } = rawChunk;
182
+ const trimmed = chunkContent.trim();
183
+ if (trimmed.length < MIN_PERSISTED_CHARS && chunks.length > 0) {
184
+ const last = chunks[chunks.length - 1];
185
+ if (last.text.length + 2 + trimmed.length <= MAX_CHUNK_CHARS) {
186
+ last.text = `${last.text}\n\n${trimmed}`;
187
+ continue;
188
+ }
189
+ }
190
+ let text = trimmed;
191
+ if (heading && includeHeading && !trimmed.startsWith(heading)) {
192
+ text = `${heading}\n\n${trimmed}`;
193
+ }
194
+ const id = `${toPosixPath(relativePath)}:chunk-${chunkIndex.toString().padStart(4, '0')}`;
195
+ chunks.push({
196
+ id,
197
+ text,
198
+ metadata: {
199
+ source: toPosixPath(relativePath),
200
+ heading,
201
+ },
202
+ });
203
+ chunkIndex += 1;
204
+ }
205
+ return chunks;
206
+ };
207
+ const embedChunks = async (chunks, model, batchSize) => {
208
+ if (chunks.length === 0) {
209
+ return [];
210
+ }
211
+ const extractor = (await pipeline('feature-extraction', model));
212
+ const embedded = [];
213
+ for (let index = 0; index < chunks.length; index += batchSize) {
214
+ const batch = chunks.slice(index, index + batchSize);
215
+ const inputs = batch.map((chunk) => chunk.text);
216
+ const tensor = await extractor(inputs, { pooling: 'mean' });
217
+ const values = normalizeVectors(tensorToVectors(tensor));
218
+ if (values.length !== batch.length) {
219
+ throw new Error('Embedding batch size mismatch');
220
+ }
221
+ batch.forEach((chunk, batchIndex) => {
222
+ embedded.push({
223
+ ...chunk,
224
+ embedding: values[batchIndex],
225
+ });
226
+ });
227
+ process.stdout.write(`Embedded ${Math.min(index + batch.length, chunks.length)} / ${chunks.length} chunks\r`);
228
+ }
229
+ process.stdout.write('\n');
230
+ return embedded;
231
+ };
232
+ const buildEmbeddingIndex = async () => {
233
+ const model = process.env.DOCS_MCP_EMBEDDING_MODEL ?? DEFAULT_EMBEDDING_MODEL;
234
+ const parsedBatch = Number.parseInt(process.env.DOCS_MCP_EMBEDDING_BATCH ?? '', 10);
235
+ const batchSize = Number.isNaN(parsedBatch) || parsedBatch < 1 ? DEFAULT_EMBEDDING_BATCH_SIZE : parsedBatch;
236
+ const files = await collectFiles(RAG_SOURCE_DIR);
237
+ if (files.length === 0) {
238
+ throw new Error(`No source documents found in ${RAG_SOURCE_DIR}`);
239
+ }
240
+ const allChunks = [];
241
+ for (const filePath of files) {
242
+ const relativePath = path.relative(RAG_SOURCE_DIR, filePath);
243
+ const contents = await readFile(filePath, 'utf-8');
244
+ const chunks = chunkDocument(relativePath, contents);
245
+ allChunks.push(...chunks);
246
+ }
247
+ if (allChunks.length === 0) {
248
+ throw new Error('No document chunks generated. Check chunking configuration.');
249
+ }
250
+ console.log(`Embedding ${allChunks.length} chunks from ${files.length} documents using ${model}`);
251
+ const embeddedChunks = await embedChunks(allChunks, model, batchSize);
252
+ if (embeddedChunks.length === 0) {
253
+ throw new Error('Failed to generate any embeddings');
254
+ }
255
+ const dimension = embeddedChunks[0].embedding.length;
256
+ const index = {
257
+ model,
258
+ dimension,
259
+ createdAt: new Date().toISOString(),
260
+ chunks: embeddedChunks,
261
+ };
262
+ await saveEmbeddingIndex(index, INDEX_PATH);
263
+ console.log(`Saved embedding index with ${embeddedChunks.length} vectors to ${INDEX_PATH}`);
264
+ };
265
+ const invokedFromCli = () => {
266
+ const entry = process.argv[1];
267
+ if (!entry) {
268
+ return false;
269
+ }
270
+ return import.meta.url === pathToFileURL(entry).href;
271
+ };
272
+ if (invokedFromCli()) {
273
+ buildEmbeddingIndex().catch((error) => {
274
+ console.error('Failed to build embedding index:', error);
275
+ process.exitCode = 1;
276
+ });
277
+ }
278
+ //# sourceMappingURL=build-index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"build-index.js","sourceRoot":"","sources":["../../src/rag/build-index.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,IAAI,EAAE,MAAM,kBAAkB,CAAC;AAC3D,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAEhD,OAAO,EAAE,4BAA4B,EAAE,uBAAuB,EAAE,UAAU,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACnH,OAAO,EAAE,gBAAgB,EAAE,eAAe,EAAkC,MAAM,iBAAiB,CAAC;AACpG,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAGlD,MAAM,eAAe,GAAG,IAAI,CAAC;AAC7B,MAAM,eAAe,GAAG,GAAG,CAAC;AAC5B,MAAM,mBAAmB,GAAG,EAAE,CAAC;AAC/B,MAAM,kBAAkB,GAAG,mBAAmB,CAAC;AAC/C,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC,CAAC,KAAK,EAAE,MAAM,CAAC,CAAC,CAAC;AAQpD,MAAM,WAAW,GAAG,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAEvE,MAAM,YAAY,GAAG,KAAK,EAAE,GAAW,EAAqB,EAAE;IAC1D,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE;QACvC,MAAM,IAAI,KAAK,CAAC,wCAAwC,GAAG,EAAE,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,uCAAuC,GAAG,EAAE,CAAC,CAAC;IAClE,CAAC;IAED,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,MAAM,IAAI,GAAG,KAAK,EAAE,OAAe,EAAE,EAAE;QACnC,MAAM,UAAU,GAAG,MAAM,OAAO,CAAC,OAAO,CAAC,CAAC;QAE1C,IAAI,KAAK,EAAE,MAAM,KAAK,IAAI,UAAU,EAAE,CAAC;YACnC,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAEhD,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACtB,MAAM,IAAI,CAAC,QAAQ,CAAC,CAAC;gBACrB,SAAS;YACb,CAAC;YAED,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,EAAE,CAAC;gBAClB,SAAS;YACb,CAAC;YAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;YAEzD,IAAI,CAAC,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBACrC,SAAS;YACb,CAAC;YAED,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QACzB,CAAC;IACL,CAAC,CAAC;IAEF,MAAM,IAAI,CAAC,GAAG,CAAC,CAAC;IAEhB,OAAO,KAAK,CAAC,IAAI,EAAE,CAAC;AACxB,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CAAC,KAAa,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AAE7E,MAAM,aAAa,GAAG,CAAC,IAAY,EAAY,EAAE;IAC7C,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAE9B,IAAI,CAAC,SAAS,EAAE,CAAC;QACb,OAAO,EAAE,CAAC;IACd,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,IAAI,eAAe,EAAE,CAAC;QACtC,OAAO,CAAC,SAAS,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,SAAS,CAAC,MAAM,EAAE,KAAK,IAAI,eAAe,EAAE,CAAC;QACrE,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,eAAe,CAAC,CAAC,IAAI,EAAE,CAAC;QAErE,IAAI,KAAK,EAAE,CAAC;YACR,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACzB,CAAC;IACL,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,SAAiB,EAAY,EAAE;IACvD,MAAM,SAAS,GAAG,SAAS,CAAC,IAAI,EAAE,CAAC;IAEnC,IAAI,CAAC,SAAS,EAAE,CAAC;QACb,OAAO,EAAE,CAAC;IACd,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,IAAI,eAAe,EAAE,CAAC;QACtC,OAAO,CAAC,SAAS,CAAC,CAAC;IACvB,CAAC;IAED,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEtE,IAAI,SAAS,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;QACxB,OAAO,aAAa,CAAC,SAAS,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,MAAM,WAAW,GAAG,GAAG,EAAE;QACrB,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,OAAO,GAAG,EAAE,CAAC;QACjB,CAAC;IACL,CAAC,CAAC;IAEF,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QAC/B,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,IAAI,QAAQ,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC;QAEhE,IAAI,SAAS,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YACrC,WAAW,EAAE,CAAC;YAEd,IAAI,QAAQ,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;gBACpC,QAAQ,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC,CAAC;gBAC1C,SAAS;YACb,CAAC;YAED,OAAO,GAAG,QAAQ,CAAC;YACnB,SAAS;QACb,CAAC;QAED,OAAO,GAAG,SAAS,CAAC;IACxB,CAAC;IAED,WAAW,EAAE,CAAC;IAEd,OAAO,QAAQ,CAAC;AACpB,CAAC,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,QAAkB,EAAY,EAAE;IACxD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,OAAO,EAAE,CAAC;IACd,CAAC;IAED,MAAM,MAAM,GAAa,EAAE,CAAC;IAE5B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAC;QAE/B,IAAI,CAAC,OAAO,EAAE,CAAC;YACX,SAAS;QACb,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC;QAEpC,IACI,SAAS,IAAI,CAAC;YACd,OAAO,CAAC,MAAM,GAAG,eAAe;YAChC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,IAAI,eAAe,EAClE,CAAC;YACC,MAAM,CAAC,SAAS,CAAC,GAAG,GAAG,MAAM,CAAC,SAAS,CAAC,OAAO,OAAO,EAAE,CAAC;YACzD,SAAS;QACb,CAAC;QAED,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,OAAe,EAAY,EAAE;IACpD,MAAM,UAAU,GAAG,OAAO;SACrB,KAAK,CAAC,SAAS,CAAC;SAChB,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC;SACpC,MAAM,CAAC,OAAO,CAAC,CAAC;IAErB,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,aAAa,CAAC,OAAO,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,QAAQ,GAAa,EAAE,CAAC;IAC9B,IAAI,OAAO,GAAG,EAAE,CAAC;IAEjB,MAAM,WAAW,GAAG,GAAG,EAAE;QACrB,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YACjB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,OAAO,GAAG,EAAE,CAAC;QACjB,CAAC;IACL,CAAC,CAAC;IAEF,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACjC,MAAM,SAAS,GAAG,OAAO,CAAC,CAAC,CAAC,GAAG,OAAO,OAAO,SAAS,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC;QAErE,IAAI,SAAS,CAAC,MAAM,GAAG,eAAe,EAAE,CAAC;YACrC,WAAW,EAAE,CAAC;YACd,QAAQ,CAAC,IAAI,CAAC,GAAG,kBAAkB,CAAC,SAAS,CAAC,CAAC,CAAC;YAChD,SAAS;QACb,CAAC;QAED,OAAO,GAAG,SAAS,CAAC;IACxB,CAAC;IAED,WAAW,EAAE,CAAC;IAEd,OAAO,kBAAkB,CAAC,QAAQ,CAAC,CAAC;AACxC,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CAAC,YAAoB,EAAE,OAAe,EAAmB,EAAE;IAC7E,MAAM,UAAU,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAC1C,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACrC,MAAM,SAAS,GAAe,EAAE,CAAC;IACjC,IAAI,YAAY,GAAa,EAAE,CAAC;IAChC,IAAI,cAAkC,CAAC;IAEvC,MAAM,KAAK,GAAG,GAAG,EAAE;QACf,MAAM,MAAM,GAAG,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC9C,YAAY,GAAG,EAAE,CAAC;QAElB,IAAI,CAAC,MAAM,EAAE,CAAC;YACV,OAAO;QACX,CAAC;QAED,MAAM,QAAQ,GAAG,iBAAiB,CAAC,MAAM,CAAC,CAAC;QAE3C,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,YAAY,EAAE,EAAE;YACvC,SAAS,CAAC,IAAI,CAAC;gBACX,OAAO,EAAE,cAAc;gBACvB,OAAO,EAAE,OAAO;gBAChB,cAAc,EAAE,YAAY,KAAK,CAAC;aACrC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;IACP,CAAC,CAAC;IAEF,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACvB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QAEtD,IAAI,YAAY,EAAE,CAAC;YACf,KAAK,EAAE,CAAC;YACR,cAAc,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACxC,SAAS;QACb,CAAC;QAED,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC5B,CAAC;IAED,KAAK,EAAE,CAAC;IAER,MAAM,MAAM,GAAoB,EAAE,CAAC;IACnC,IAAI,UAAU,GAAG,CAAC,CAAC;IAEnB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QAC/B,MAAM,EAAE,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,cAAc,EAAE,GAAG,QAAQ,CAAC;QACpE,MAAM,OAAO,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC;QAEpC,IAAI,OAAO,CAAC,MAAM,GAAG,mBAAmB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5D,MAAM,IAAI,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAEvC,IAAI,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,MAAM,IAAI,eAAe,EAAE,CAAC;gBAC3D,IAAI,CAAC,IAAI,GAAG,GAAG,IAAI,CAAC,IAAI,OAAO,OAAO,EAAE,CAAC;gBACzC,SAAS;YACb,CAAC;QACL,CAAC;QAED,IAAI,IAAI,GAAG,OAAO,CAAC;QAEnB,IAAI,OAAO,IAAI,cAAc,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YAC5D,IAAI,GAAG,GAAG,OAAO,OAAO,OAAO,EAAE,CAAC;QACtC,CAAC;QAED,MAAM,EAAE,GAAG,GAAG,WAAW,CAAC,YAAY,CAAC,UAAU,UAAU,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QAE1F,MAAM,CAAC,IAAI,CAAC;YACR,EAAE;YACF,IAAI;YACJ,QAAQ,EAAE;gBACN,MAAM,EAAE,WAAW,CAAC,YAAY,CAAC;gBACjC,OAAO;aACV;SACJ,CAAC,CAAC;QAEH,UAAU,IAAI,CAAC,CAAC;IACpB,CAAC;IAED,OAAO,MAAM,CAAC;AAClB,CAAC,CAAC;AAEF,MAAM,WAAW,GAAG,KAAK,EAAE,MAAuB,EAAE,KAAa,EAAE,SAAiB,EAA6B,EAAE;IAC/G,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO,EAAE,CAAC;IACd,CAAC;IAED,MAAM,SAAS,GAAG,CAAC,MAAM,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAC,CAA8B,CAAC;IAC7F,MAAM,QAAQ,GAAqB,EAAE,CAAC;IAEtC,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,MAAM,CAAC,MAAM,EAAE,KAAK,IAAI,SAAS,EAAE,CAAC;QAC5D,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,KAAK,GAAG,SAAS,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,MAAM,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QAC5D,MAAM,MAAM,GAAG,gBAAgB,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC;QAEzD,IAAI,MAAM,CAAC,MAAM,KAAK,KAAK,CAAC,MAAM,EAAE,CAAC;YACjC,MAAM,IAAI,KAAK,CAAC,+BAA+B,CAAC,CAAC;QACrD,CAAC;QAED,KAAK,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,UAAU,EAAE,EAAE;YAChC,QAAQ,CAAC,IAAI,CAAC;gBACV,GAAG,KAAK;gBACR,SAAS,EAAE,MAAM,CAAC,UAAU,CAAC;aAChC,CAAC,CAAC;QACP,CAAC,CAAC,CAAC;QAEH,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,YAAY,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,KAAK,CAAC,MAAM,EAAE,MAAM,CAAC,MAAM,CAAC,MAAM,MAAM,CAAC,MAAM,WAAW,CAAC,CAAC;IAClH,CAAC;IAED,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAE3B,OAAO,QAAQ,CAAC;AACpB,CAAC,CAAC;AAEF,MAAM,mBAAmB,GAAG,KAAK,IAAI,EAAE;IACnC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,uBAAuB,CAAC;IAC9E,MAAM,WAAW,GAAG,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,EAAE,EAAE,EAAE,CAAC,CAAC;IACpF,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,WAAW,GAAG,CAAC,CAAC,CAAC,CAAC,4BAA4B,CAAC,CAAC,CAAC,WAAW,CAAC;IAE5G,MAAM,KAAK,GAAG,MAAM,YAAY,CAAC,cAAc,CAAC,CAAC;IAEjD,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACrB,MAAM,IAAI,KAAK,CAAC,gCAAgC,cAAc,EAAE,CAAC,CAAC;IACtE,CAAC;IAED,MAAM,SAAS,GAAoB,EAAE,CAAC;IAEtC,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC3B,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC;QAC7D,MAAM,QAAQ,GAAG,MAAM,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;QACnD,MAAM,MAAM,GAAG,aAAa,CAAC,YAAY,EAAE,QAAQ,CAAC,CAAC;QACrD,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,6DAA6D,CAAC,CAAC;IACnF,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,aAAa,SAAS,CAAC,MAAM,gBAAgB,KAAK,CAAC,MAAM,oBAAoB,KAAK,EAAE,CAAC,CAAC;IAElG,MAAM,cAAc,GAAG,MAAM,WAAW,CAAC,SAAS,EAAE,KAAK,EAAE,SAAS,CAAC,CAAC;IAEtE,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,SAAS,GAAG,cAAc,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC;IAErD,MAAM,KAAK,GAAmB;QAC1B,KAAK;QACL,SAAS;QACT,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;QACnC,MAAM,EAAE,cAAc;KACzB,CAAC;IAEF,MAAM,kBAAkB,CAAC,KAAK,EAAE,UAAU,CAAC,CAAC;IAE5C,OAAO,CAAC,GAAG,CAAC,8BAA8B,cAAc,CAAC,MAAM,eAAe,UAAU,EAAE,CAAC,CAAC;AAChG,CAAC,CAAC;AAEF,MAAM,cAAc,GAAG,GAAY,EAAE;IACjC,MAAM,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAE9B,IAAI,CAAC,KAAK,EAAE,CAAC;QACT,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,KAAK,aAAa,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC;AACzD,CAAC,CAAC;AAEF,IAAI,cAAc,EAAE,EAAE,CAAC;IACnB,mBAAmB,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;QAClC,OAAO,CAAC,KAAK,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;QACzD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;IACzB,CAAC,CAAC,CAAC;AACP,CAAC","sourcesContent":["#!/usr/bin/env node\nimport { opendir, readFile, stat } from 'node:fs/promises';\nimport path from 'node:path';\nimport { pathToFileURL } from 'node:url';\n\nimport { pipeline } from '@xenova/transformers';\n\nimport { DEFAULT_EMBEDDING_BATCH_SIZE, DEFAULT_EMBEDDING_MODEL, INDEX_PATH, RAG_SOURCE_DIR } from './constants.js';\nimport { normalizeVectors, tensorToVectors, type FeatureExtractionFunction } from './embeddings.js';\nimport { saveEmbeddingIndex } from './storage.js';\nimport type { DocumentChunk, EmbeddingChunk, EmbeddingIndex } from './types.js';\n\nconst MAX_CHUNK_CHARS = 2500;\nconst MIN_CHUNK_CHARS = 400;\nconst MIN_PERSISTED_CHARS = 64;\nconst SENTENCE_SEPARATOR = /(?<=[.!?。?!])\\s+/u;\nconst ALLOWED_EXTENSIONS = new Set(['.md', '.txt']);\n\ntype RawChunk = {\n heading?: string;\n content: string;\n includeHeading: boolean;\n};\n\nconst toPosixPath = (input: string) => input.split(path.sep).join('/');\n\nconst collectFiles = async (dir: string): Promise<string[]> => {\n const entries = await stat(dir).catch(() => {\n throw new Error(`RAG source directory does not exist: ${dir}`);\n });\n\n if (!entries.isDirectory()) {\n throw new Error(`RAG source path is not a directory: ${dir}`);\n }\n\n const files: string[] = [];\n\n const walk = async (current: string) => {\n const dirEntries = await opendir(current);\n\n for await (const entry of dirEntries) {\n const fullPath = path.join(current, entry.name);\n\n if (entry.isDirectory()) {\n await walk(fullPath);\n continue;\n }\n\n if (!entry.isFile()) {\n continue;\n }\n\n const extension = path.extname(entry.name).toLowerCase();\n\n if (!ALLOWED_EXTENSIONS.has(extension)) {\n continue;\n }\n\n files.push(fullPath);\n }\n };\n\n await walk(dir);\n\n return files.sort();\n};\n\nconst normalizeText = (value: string) => value.replace(/\\r\\n/g, '\\n').trim();\n\nconst splitByLength = (text: string): string[] => {\n const sanitized = text.trim();\n\n if (!sanitized) {\n return [];\n }\n\n if (sanitized.length <= MAX_CHUNK_CHARS) {\n return [sanitized];\n }\n\n const segments: string[] = [];\n\n for (let index = 0; index < sanitized.length; index += MAX_CHUNK_CHARS) {\n const slice = sanitized.slice(index, index + MAX_CHUNK_CHARS).trim();\n\n if (slice) {\n segments.push(slice);\n }\n }\n\n return segments;\n};\n\nconst splitLongParagraph = (paragraph: string): string[] => {\n const sanitized = paragraph.trim();\n\n if (!sanitized) {\n return [];\n }\n\n if (sanitized.length <= MAX_CHUNK_CHARS) {\n return [sanitized];\n }\n\n const sentences = sanitized.split(SENTENCE_SEPARATOR).filter(Boolean);\n\n if (sentences.length <= 1) {\n return splitByLength(sanitized);\n }\n\n const segments: string[] = [];\n let current = '';\n\n const pushCurrent = () => {\n if (current.trim()) {\n segments.push(current.trim());\n current = '';\n }\n };\n\n for (const sentence of sentences) {\n const candidate = current ? `${current} ${sentence}` : sentence;\n\n if (candidate.length > MAX_CHUNK_CHARS) {\n pushCurrent();\n\n if (sentence.length > MAX_CHUNK_CHARS) {\n segments.push(...splitByLength(sentence));\n continue;\n }\n\n current = sentence;\n continue;\n }\n\n current = candidate;\n }\n\n pushCurrent();\n\n return segments;\n};\n\nconst mergeSmallSegments = (segments: string[]): string[] => {\n if (segments.length === 0) {\n return [];\n }\n\n const merged: string[] = [];\n\n for (const segment of segments) {\n const trimmed = segment.trim();\n\n if (!trimmed) {\n continue;\n }\n\n const lastIndex = merged.length - 1;\n\n if (\n lastIndex >= 0 &&\n trimmed.length < MIN_CHUNK_CHARS &&\n merged[lastIndex].length + 2 + trimmed.length <= MAX_CHUNK_CHARS\n ) {\n merged[lastIndex] = `${merged[lastIndex]}\\n\\n${trimmed}`;\n continue;\n }\n\n merged.push(trimmed);\n }\n\n return merged;\n};\n\nconst splitContentBlock = (content: string): string[] => {\n const paragraphs = content\n .split(/\\n{2,}/u)\n .map((paragraph) => paragraph.trim())\n .filter(Boolean);\n\n if (paragraphs.length === 0) {\n return splitByLength(content);\n }\n\n const segments: string[] = [];\n let current = '';\n\n const pushCurrent = () => {\n if (current.trim()) {\n segments.push(current.trim());\n current = '';\n }\n };\n\n for (const paragraph of paragraphs) {\n const candidate = current ? `${current}\\n\\n${paragraph}` : paragraph;\n\n if (candidate.length > MAX_CHUNK_CHARS) {\n pushCurrent();\n segments.push(...splitLongParagraph(paragraph));\n continue;\n }\n\n current = candidate;\n }\n\n pushCurrent();\n\n return mergeSmallSegments(segments);\n};\n\nconst chunkDocument = (relativePath: string, content: string): DocumentChunk[] => {\n const normalized = normalizeText(content);\n const lines = normalized.split('\\n');\n const rawChunks: RawChunk[] = [];\n let currentLines: string[] = [];\n let currentHeading: string | undefined;\n\n const flush = () => {\n const joined = currentLines.join('\\n').trim();\n currentLines = [];\n\n if (!joined) {\n return;\n }\n\n const segments = splitContentBlock(joined);\n\n segments.forEach((segment, segmentIndex) => {\n rawChunks.push({\n heading: currentHeading,\n content: segment,\n includeHeading: segmentIndex === 0,\n });\n });\n };\n\n for (const line of lines) {\n const headingMatch = line.match(/^(#{1,6})\\s+(.*)$/u);\n\n if (headingMatch) {\n flush();\n currentHeading = headingMatch[2].trim();\n continue;\n }\n\n currentLines.push(line);\n }\n\n flush();\n\n const chunks: DocumentChunk[] = [];\n let chunkIndex = 0;\n\n for (const rawChunk of rawChunks) {\n const { heading, content: chunkContent, includeHeading } = rawChunk;\n const trimmed = chunkContent.trim();\n\n if (trimmed.length < MIN_PERSISTED_CHARS && chunks.length > 0) {\n const last = chunks[chunks.length - 1];\n\n if (last.text.length + 2 + trimmed.length <= MAX_CHUNK_CHARS) {\n last.text = `${last.text}\\n\\n${trimmed}`;\n continue;\n }\n }\n\n let text = trimmed;\n\n if (heading && includeHeading && !trimmed.startsWith(heading)) {\n text = `${heading}\\n\\n${trimmed}`;\n }\n\n const id = `${toPosixPath(relativePath)}:chunk-${chunkIndex.toString().padStart(4, '0')}`;\n\n chunks.push({\n id,\n text,\n metadata: {\n source: toPosixPath(relativePath),\n heading,\n },\n });\n\n chunkIndex += 1;\n }\n\n return chunks;\n};\n\nconst embedChunks = async (chunks: DocumentChunk[], model: string, batchSize: number): Promise<EmbeddingChunk[]> => {\n if (chunks.length === 0) {\n return [];\n }\n\n const extractor = (await pipeline('feature-extraction', model)) as FeatureExtractionFunction;\n const embedded: EmbeddingChunk[] = [];\n\n for (let index = 0; index < chunks.length; index += batchSize) {\n const batch = chunks.slice(index, index + batchSize);\n const inputs = batch.map((chunk) => chunk.text);\n const tensor = await extractor(inputs, { pooling: 'mean' });\n const values = normalizeVectors(tensorToVectors(tensor));\n\n if (values.length !== batch.length) {\n throw new Error('Embedding batch size mismatch');\n }\n\n batch.forEach((chunk, batchIndex) => {\n embedded.push({\n ...chunk,\n embedding: values[batchIndex],\n });\n });\n\n process.stdout.write(`Embedded ${Math.min(index + batch.length, chunks.length)} / ${chunks.length} chunks\\r`);\n }\n\n process.stdout.write('\\n');\n\n return embedded;\n};\n\nconst buildEmbeddingIndex = async () => {\n const model = process.env.DOCS_MCP_EMBEDDING_MODEL ?? DEFAULT_EMBEDDING_MODEL;\n const parsedBatch = Number.parseInt(process.env.DOCS_MCP_EMBEDDING_BATCH ?? '', 10);\n const batchSize = Number.isNaN(parsedBatch) || parsedBatch < 1 ? DEFAULT_EMBEDDING_BATCH_SIZE : parsedBatch;\n\n const files = await collectFiles(RAG_SOURCE_DIR);\n\n if (files.length === 0) {\n throw new Error(`No source documents found in ${RAG_SOURCE_DIR}`);\n }\n\n const allChunks: DocumentChunk[] = [];\n\n for (const filePath of files) {\n const relativePath = path.relative(RAG_SOURCE_DIR, filePath);\n const contents = await readFile(filePath, 'utf-8');\n const chunks = chunkDocument(relativePath, contents);\n allChunks.push(...chunks);\n }\n\n if (allChunks.length === 0) {\n throw new Error('No document chunks generated. Check chunking configuration.');\n }\n\n console.log(`Embedding ${allChunks.length} chunks from ${files.length} documents using ${model}`);\n\n const embeddedChunks = await embedChunks(allChunks, model, batchSize);\n\n if (embeddedChunks.length === 0) {\n throw new Error('Failed to generate any embeddings');\n }\n\n const dimension = embeddedChunks[0].embedding.length;\n\n const index: EmbeddingIndex = {\n model,\n dimension,\n createdAt: new Date().toISOString(),\n chunks: embeddedChunks,\n };\n\n await saveEmbeddingIndex(index, INDEX_PATH);\n\n console.log(`Saved embedding index with ${embeddedChunks.length} vectors to ${INDEX_PATH}`);\n};\n\nconst invokedFromCli = (): boolean => {\n const entry = process.argv[1];\n\n if (!entry) {\n return false;\n }\n\n return import.meta.url === pathToFileURL(entry).href;\n};\n\nif (invokedFromCli()) {\n buildEmbeddingIndex().catch((error) => {\n console.error('Failed to build embedding index:', error);\n process.exitCode = 1;\n });\n}\n"]}
@@ -0,0 +1,7 @@
1
+ export declare const INDEX_FILENAME = "rag-index.json";
2
+ export declare const DEFAULT_EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
3
+ export declare const DEFAULT_EMBEDDING_BATCH_SIZE = 8;
4
+ export declare const PACKAGE_ROOT: string;
5
+ export declare const resolvePackagePath: (...segments: string[]) => string;
6
+ export declare const RAG_SOURCE_DIR: string;
7
+ export declare const INDEX_PATH: string;
@@ -0,0 +1,14 @@
1
+ import path from 'node:path';
2
+ import { fileURLToPath } from 'node:url';
3
+ export const INDEX_FILENAME = 'rag-index.json';
4
+ export const DEFAULT_EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';
5
+ export const DEFAULT_EMBEDDING_BATCH_SIZE = 8;
6
+ const currentFile = fileURLToPath(import.meta.url);
7
+ const currentDir = path.dirname(currentFile);
8
+ // Going up three levels: dist/rag -> dist -> package root.
9
+ // Works both for src and dist outputs because directory structure mirrors.
10
+ export const PACKAGE_ROOT = path.resolve(currentDir, '../../');
11
+ export const resolvePackagePath = (...segments) => path.join(PACKAGE_ROOT, ...segments);
12
+ export const RAG_SOURCE_DIR = resolvePackagePath('rag-source');
13
+ export const INDEX_PATH = resolvePackagePath(INDEX_FILENAME);
14
+ //# sourceMappingURL=constants.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"constants.js","sourceRoot":"","sources":["../../src/rag/constants.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AAEzC,MAAM,CAAC,MAAM,cAAc,GAAG,gBAAgB,CAAC;AAC/C,MAAM,CAAC,MAAM,uBAAuB,GAAG,yBAAyB,CAAC;AACjE,MAAM,CAAC,MAAM,4BAA4B,GAAG,CAAC,CAAC;AAE9C,MAAM,WAAW,GAAG,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AACnD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,WAAW,CAAC,CAAC;AAE7C,2DAA2D;AAC3D,2EAA2E;AAC3E,MAAM,CAAC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;AAE/D,MAAM,CAAC,MAAM,kBAAkB,GAAG,CAAC,GAAG,QAAkB,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,GAAG,QAAQ,CAAC,CAAC;AAElG,MAAM,CAAC,MAAM,cAAc,GAAG,kBAAkB,CAAC,YAAY,CAAC,CAAC;AAC/D,MAAM,CAAC,MAAM,UAAU,GAAG,kBAAkB,CAAC,cAAc,CAAC,CAAC","sourcesContent":["import path from 'node:path';\nimport { fileURLToPath } from 'node:url';\n\nexport const INDEX_FILENAME = 'rag-index.json';\nexport const DEFAULT_EMBEDDING_MODEL = 'Xenova/all-MiniLM-L6-v2';\nexport const DEFAULT_EMBEDDING_BATCH_SIZE = 8;\n\nconst currentFile = fileURLToPath(import.meta.url);\nconst currentDir = path.dirname(currentFile);\n\n// Going up three levels: dist/rag -> dist -> package root.\n// Works both for src and dist outputs because directory structure mirrors.\nexport const PACKAGE_ROOT = path.resolve(currentDir, '../../');\n\nexport const resolvePackagePath = (...segments: string[]) => path.join(PACKAGE_ROOT, ...segments);\n\nexport const RAG_SOURCE_DIR = resolvePackagePath('rag-source');\nexport const INDEX_PATH = resolvePackagePath(INDEX_FILENAME);\n"]}
@@ -0,0 +1,8 @@
1
+ export declare const normalizeVector: (vector: number[]) => number[];
2
+ export declare const normalizeVectors: (vectors: number[][]) => number[][];
3
+ export declare const tensorToVectors: (tensor: unknown) => number[][];
4
+ export declare const tensorToVector: (tensor: unknown) => number[];
5
+ export type FeatureExtractionOptions = {
6
+ pooling?: 'mean' | 'max';
7
+ };
8
+ export type FeatureExtractionFunction = (inputs: string | string[], options?: FeatureExtractionOptions) => Promise<unknown>;
@@ -0,0 +1,61 @@
1
+ const isTypedArray = (value) => {
2
+ return value instanceof Float32Array || value instanceof Float64Array;
3
+ };
4
+ const toNumber = (value) => {
5
+ if (typeof value === 'number') {
6
+ return value;
7
+ }
8
+ if (typeof value === 'bigint') {
9
+ return Number(value);
10
+ }
11
+ if (typeof value === 'string') {
12
+ const parsed = Number(value);
13
+ if (!Number.isNaN(parsed)) {
14
+ return parsed;
15
+ }
16
+ }
17
+ throw new Error('Encountered non-numeric value while converting tensor output');
18
+ };
19
+ const mapToNumberArray = (collection) => collection.map((item) => toNumber(item));
20
+ export const normalizeVector = (vector) => {
21
+ let sumOfSquares = 0;
22
+ for (const value of vector) {
23
+ sumOfSquares += value * value;
24
+ }
25
+ if (sumOfSquares === 0) {
26
+ return vector.map(() => 0);
27
+ }
28
+ const norm = Math.sqrt(sumOfSquares);
29
+ return vector.map((value) => value / norm);
30
+ };
31
+ export const normalizeVectors = (vectors) => vectors.map((vector) => normalizeVector(vector));
32
+ export const tensorToVectors = (tensor) => {
33
+ if (!tensor) {
34
+ throw new Error('Received empty embedding tensor');
35
+ }
36
+ if (typeof tensor.tolist === 'function') {
37
+ const raw = tensor.tolist();
38
+ return tensorToVectors(raw);
39
+ }
40
+ if (Array.isArray(tensor)) {
41
+ if (tensor.length === 0) {
42
+ return [];
43
+ }
44
+ if (Array.isArray(tensor[0])) {
45
+ return tensor.map((vector) => mapToNumberArray(vector));
46
+ }
47
+ return [mapToNumberArray(tensor)];
48
+ }
49
+ if (isTypedArray(tensor)) {
50
+ return [Array.from(tensor, (value) => Number(value))];
51
+ }
52
+ throw new Error('Unsupported embedding tensor output');
53
+ };
54
+ export const tensorToVector = (tensor) => {
55
+ const vectors = tensorToVectors(tensor);
56
+ if (vectors.length === 0) {
57
+ throw new Error('Embedding tensor did not contain any vectors');
58
+ }
59
+ return vectors[0];
60
+ };
61
+ //# sourceMappingURL=embeddings.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/rag/embeddings.ts"],"names":[],"mappings":"AAAA,MAAM,YAAY,GAAG,CAAC,KAAc,EAAwC,EAAE;IAC1E,OAAO,KAAK,YAAY,YAAY,IAAI,KAAK,YAAY,YAAY,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,QAAQ,GAAG,CAAC,KAAc,EAAU,EAAE;IACxC,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,OAAO,KAAK,CAAC;IACjB,CAAC;IAED,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,OAAO,MAAM,CAAC,KAAK,CAAC,CAAC;IACzB,CAAC;IAED,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;QAE7B,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC;YACxB,OAAO,MAAM,CAAC;QAClB,CAAC;IACL,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,8DAA8D,CAAC,CAAC;AACpF,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,UAAqB,EAAY,EAAE,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC;AAEvG,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,MAAgB,EAAY,EAAE;IAC1D,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;QACzB,YAAY,IAAI,KAAK,GAAG,KAAK,CAAC;IAClC,CAAC;IAED,IAAI,YAAY,KAAK,CAAC,EAAE,CAAC;QACrB,OAAO,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IAErC,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,IAAI,CAAC,CAAC;AAC/C,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,gBAAgB,GAAG,CAAC,OAAmB,EAAc,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,eAAe,CAAC,MAAM,CAAC,CAAC,CAAC;AAEtH,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,MAAe,EAAc,EAAE;IAC3D,IAAI,CAAC,MAAM,EAAE,CAAC;QACV,MAAM,IAAI,KAAK,CAAC,iCAAiC,CAAC,CAAC;IACvD,CAAC;IAED,IAAI,OAAQ,MAAqC,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;QACtE,MAAM,GAAG,GAAI,MAAoC,CAAC,MAAM,EAAE,CAAC;QAC3D,OAAO,eAAe,CAAC,GAAG,CAAC,CAAC;IAChC,CAAC;IAED,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC;QACxB,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACtB,OAAO,EAAE,CAAC;QACd,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3B,OAAQ,MAAsB,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,gBAAgB,CAAC,MAAM,CAAC,CAAC,CAAC;QAC7E,CAAC;QAED,OAAO,CAAC,gBAAgB,CAAC,MAAmB,CAAC,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,YAAY,CAAC,MAAM,CAAC,EAAE,CAAC;QACvB,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC1D,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,qCAAqC,CAAC,CAAC;AAC3D,CAAC,CAAC;AAEF,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,MAAe,EAAY,EAAE;IACxD,MAAM,OAAO,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;IAExC,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC;AACtB,CAAC,CAAC","sourcesContent":["const isTypedArray = (value: unknown): value is Float32Array | Float64Array => {\n return value instanceof Float32Array || value instanceof Float64Array;\n};\n\nconst toNumber = (value: unknown): number => {\n if (typeof value === 'number') {\n return value;\n }\n\n if (typeof value === 'bigint') {\n return Number(value);\n }\n\n if (typeof value === 'string') {\n const parsed = Number(value);\n\n if (!Number.isNaN(parsed)) {\n return parsed;\n }\n }\n\n throw new Error('Encountered non-numeric value while converting tensor output');\n};\n\nconst mapToNumberArray = (collection: unknown[]): number[] => collection.map((item) => toNumber(item));\n\nexport const normalizeVector = (vector: number[]): number[] => {\n let sumOfSquares = 0;\n\n for (const value of vector) {\n sumOfSquares += value * value;\n }\n\n if (sumOfSquares === 0) {\n return vector.map(() => 0);\n }\n\n const norm = Math.sqrt(sumOfSquares);\n\n return vector.map((value) => value / norm);\n};\n\nexport const normalizeVectors = (vectors: number[][]): number[][] => vectors.map((vector) => normalizeVector(vector));\n\nexport const tensorToVectors = (tensor: unknown): number[][] => {\n if (!tensor) {\n throw new Error('Received empty embedding tensor');\n }\n\n if (typeof (tensor as { tolist?: () => unknown }).tolist === 'function') {\n const raw = (tensor as { tolist: () => unknown }).tolist();\n return tensorToVectors(raw);\n }\n\n if (Array.isArray(tensor)) {\n if (tensor.length === 0) {\n return [];\n }\n\n if (Array.isArray(tensor[0])) {\n return (tensor as unknown[][]).map((vector) => mapToNumberArray(vector));\n }\n\n return [mapToNumberArray(tensor as unknown[])];\n }\n\n if (isTypedArray(tensor)) {\n return [Array.from(tensor, (value) => Number(value))];\n }\n\n throw new Error('Unsupported embedding tensor output');\n};\n\nexport const tensorToVector = (tensor: unknown): number[] => {\n const vectors = tensorToVectors(tensor);\n\n if (vectors.length === 0) {\n throw new Error('Embedding tensor did not contain any vectors');\n }\n\n return vectors[0];\n};\nexport type FeatureExtractionOptions = {\n pooling?: 'mean' | 'max';\n};\n\nexport type FeatureExtractionFunction = (\n inputs: string | string[],\n options?: FeatureExtractionOptions,\n) => Promise<unknown>;\n"]}
@@ -0,0 +1,12 @@
1
+ import type { SearchResultsPage } from './types.js';
2
+ export declare class RagSearcher {
3
+ private indexPromise?;
4
+ private embedderPromise?;
5
+ private readonly indexPath;
6
+ constructor(indexPath?: string);
7
+ search(query: string, topK?: number, packageName?: string, page?: number): Promise<SearchResultsPage>;
8
+ private getIndex;
9
+ private loadIndex;
10
+ private getEmbedder;
11
+ }
12
+ export declare const createRagSearcher: (indexPath?: string) => RagSearcher;
@@ -0,0 +1,158 @@
1
+ import { pipeline } from '@xenova/transformers';
2
+ import { DEFAULT_EMBEDDING_MODEL, INDEX_PATH } from './constants.js';
3
+ import { normalizeVector, tensorToVector } from './embeddings.js';
4
+ import { loadEmbeddingIndex } from './storage.js';
5
+ const MAX_TOP_K = 100;
6
+ const toFloat32 = (values, dimension) => {
7
+ if (values.length !== dimension) {
8
+ throw new Error(`Embedding dimension mismatch. Expected ${dimension}, received ${values.length}`);
9
+ }
10
+ return Float32Array.from(values);
11
+ };
12
+ const cosineSimilarity = (a, b) => {
13
+ if (a.length !== b.length) {
14
+ throw new Error('Embedding vectors have different dimensions');
15
+ }
16
+ let dot = 0;
17
+ for (let index = 0; index < a.length; index += 1) {
18
+ dot += a[index] * b[index];
19
+ }
20
+ return dot;
21
+ };
22
+ export class RagSearcher {
23
+ indexPromise;
24
+ embedderPromise;
25
+ indexPath;
26
+ constructor(indexPath = INDEX_PATH) {
27
+ this.indexPath = indexPath;
28
+ }
29
+ async search(query, topK = 5, packageName, page = 1) {
30
+ const sanitizedQuery = query.trim();
31
+ const pageSize = clampPageSize(topK);
32
+ const currentPage = clampPageNumber(page);
33
+ if (!sanitizedQuery) {
34
+ return emptySearchPage(pageSize, currentPage);
35
+ }
36
+ const index = await this.getIndex();
37
+ if (index.chunks.length === 0) {
38
+ return emptySearchPage(pageSize, currentPage);
39
+ }
40
+ const filteredChunks = filterChunksByPackage(index.chunks, packageName);
41
+ if (filteredChunks.length === 0) {
42
+ return emptySearchPage(pageSize, currentPage);
43
+ }
44
+ const embedder = await this.getEmbedder(index.model);
45
+ const tensor = await embedder(sanitizedQuery, { pooling: 'mean' });
46
+ const queryVector = Float32Array.from(normalizeVector(tensorToVector(tensor)));
47
+ const ranked = filteredChunks
48
+ .map((chunk) => ({
49
+ chunk,
50
+ score: cosineSimilarity(queryVector, chunk.embedding),
51
+ }))
52
+ .filter((item) => Number.isFinite(item.score))
53
+ .sort((left, right) => right.score - left.score);
54
+ const total = ranked.length;
55
+ if (total === 0) {
56
+ return emptySearchPage(pageSize, currentPage);
57
+ }
58
+ const startIndex = (currentPage - 1) * pageSize;
59
+ if (startIndex >= total) {
60
+ return {
61
+ results: [],
62
+ total,
63
+ page: currentPage,
64
+ pageSize,
65
+ hasMore: false,
66
+ };
67
+ }
68
+ const rows = ranked.slice(startIndex, startIndex + pageSize);
69
+ const results = rows.map(({ chunk, score }) => ({
70
+ id: chunk.id,
71
+ text: chunk.text,
72
+ metadata: chunk.metadata,
73
+ score,
74
+ }));
75
+ const hasMore = startIndex + results.length < total;
76
+ return {
77
+ results,
78
+ total,
79
+ page: currentPage,
80
+ pageSize,
81
+ hasMore,
82
+ };
83
+ }
84
+ async getIndex() {
85
+ if (!this.indexPromise) {
86
+ this.indexPromise = this.loadIndex();
87
+ }
88
+ return this.indexPromise;
89
+ }
90
+ async loadIndex() {
91
+ const persisted = await loadEmbeddingIndex(this.indexPath).catch((error) => {
92
+ if (isErrnoException(error) && error.code === 'ENOENT') {
93
+ throw new Error(`Embedding index not found at ${this.indexPath}. Run the build index script before searching.`);
94
+ }
95
+ throw error;
96
+ });
97
+ const dimension = persisted.dimension;
98
+ const chunks = persisted.chunks.map((chunk) => ({
99
+ ...chunk,
100
+ embedding: toFloat32(normalizeVector(chunk.embedding), dimension),
101
+ }));
102
+ return {
103
+ ...persisted,
104
+ model: persisted.model ?? DEFAULT_EMBEDDING_MODEL,
105
+ chunks,
106
+ };
107
+ }
108
+ async getEmbedder(modelFromIndex) {
109
+ if (!this.embedderPromise) {
110
+ const model = process.env.DOCS_MCP_EMBEDDING_MODEL ?? modelFromIndex ?? DEFAULT_EMBEDDING_MODEL;
111
+ this.embedderPromise = pipeline('feature-extraction', model);
112
+ }
113
+ return this.embedderPromise;
114
+ }
115
+ }
116
+ export const createRagSearcher = (indexPath = INDEX_PATH) => new RagSearcher(indexPath);
117
+ const isErrnoException = (error) => {
118
+ return typeof error === 'object' && error !== null && 'code' in error;
119
+ };
120
+ const filterChunksByPackage = (chunks, packageName) => {
121
+ if (!packageName) {
122
+ return chunks;
123
+ }
124
+ const normalized = packageName.trim().toLowerCase();
125
+ if (!normalized) {
126
+ return chunks;
127
+ }
128
+ const prefix = `${normalized}/`;
129
+ return chunks.filter((chunk) => chunk.metadata.source.startsWith(prefix));
130
+ };
131
+ const clampPageSize = (value) => {
132
+ if (!Number.isFinite(value)) {
133
+ return 5;
134
+ }
135
+ const rounded = Math.floor(value);
136
+ if (rounded < 1) {
137
+ return 1;
138
+ }
139
+ if (rounded > MAX_TOP_K) {
140
+ return MAX_TOP_K;
141
+ }
142
+ return rounded;
143
+ };
144
+ const clampPageNumber = (value) => {
145
+ if (!Number.isFinite(value)) {
146
+ return 1;
147
+ }
148
+ const rounded = Math.floor(value);
149
+ return rounded < 1 ? 1 : rounded;
150
+ };
151
+ const emptySearchPage = (pageSize, page) => ({
152
+ results: [],
153
+ total: 0,
154
+ page,
155
+ pageSize,
156
+ hasMore: false,
157
+ });
158
+ //# sourceMappingURL=search.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"search.js","sourceRoot":"","sources":["../../src/rag/search.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAEhD,OAAO,EAAE,uBAAuB,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACrE,OAAO,EAAkC,eAAe,EAAE,cAAc,EAAE,MAAM,iBAAiB,CAAC;AAClG,OAAO,EAAE,kBAAkB,EAAE,MAAM,cAAc,CAAC;AAWlD,MAAM,SAAS,GAAG,GAAG,CAAC;AAEtB,MAAM,SAAS,GAAG,CAAC,MAAgB,EAAE,SAAiB,EAAgB,EAAE;IACpE,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,0CAA0C,SAAS,cAAc,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACtG,CAAC;IAED,OAAO,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AACrC,CAAC,CAAC;AAEF,MAAM,gBAAgB,GAAG,CAAC,CAAe,EAAE,CAAe,EAAU,EAAE;IAClE,IAAI,CAAC,CAAC,MAAM,KAAK,CAAC,CAAC,MAAM,EAAE,CAAC;QACxB,MAAM,IAAI,KAAK,CAAC,6CAA6C,CAAC,CAAC;IACnE,CAAC;IAED,IAAI,GAAG,GAAG,CAAC,CAAC;IAEZ,KAAK,IAAI,KAAK,GAAG,CAAC,EAAE,KAAK,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QAC/C,GAAG,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,GAAG,CAAC;AACf,CAAC,CAAC;AAEF,MAAM,OAAO,WAAW;IACZ,YAAY,CAAkC;IAE9C,eAAe,CAAsC;IAE5C,SAAS,CAAS;IAEnC,YAAY,YAAoB,UAAU;QACtC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,KAAa,EAAE,OAAe,CAAC,EAAE,WAAoB,EAAE,OAAe,CAAC;QAChF,MAAM,cAAc,GAAG,KAAK,CAAC,IAAI,EAAE,CAAC;QACpC,MAAM,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,WAAW,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QAE1C,IAAI,CAAC,cAAc,EAAE,CAAC;YAClB,OAAO,eAAe,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,KAAK,GAAG,MAAM,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEpC,IAAI,KAAK,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC5B,OAAO,eAAe,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,cAAc,GAAG,qBAAqB,CAAC,KAAK,CAAC,MAAM,EAAE,WAAW,CAAC,CAAC;QAExE,IAAI,cAAc,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC9B,OAAO,eAAe,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,WAAW,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,CAAC;QACnE,MAAM,WAAW,GAAG,YAAY,CAAC,IAAI,CAAC,eAAe,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE/E,MAAM,MAAM,GAAG,cAAc;aACxB,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACb,KAAK;YACL,KAAK,EAAE,gBAAgB,CAAC,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC;SACxD,CAAC,CAAC;aACF,MAAM,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;aAC7C,IAAI,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;QAErD,MAAM,KAAK,GAAG,MAAM,CAAC,MAAM,CAAC;QAE5B,IAAI,KAAK,KAAK,CAAC,EAAE,CAAC;YACd,OAAO,eAAe,CAAC,QAAQ,EAAE,WAAW,CAAC,CAAC;QAClD,CAAC;QAED,MAAM,UAAU,GAAG,CAAC,WAAW,GAAG,CAAC,CAAC,GAAG,QAAQ,CAAC;QAEhD,IAAI,UAAU,IAAI,KAAK,EAAE,CAAC;YACtB,OAAO;gBACH,OAAO,EAAE,EAAE;gBACX,KAAK;gBACL,IAAI,EAAE,WAAW;gBACjB,QAAQ;gBACR,OAAO,EAAE,KAAK;aACjB,CAAC;QACN,CAAC;QAED,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC,UAAU,EAAE,UAAU,GAAG,QAAQ,CAAC,CAAC;QAE7D,MAAM,OAAO,GAAmB,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YAC5D,EAAE,EAAE,KAAK,CAAC,EAAE;YACZ,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,QAAQ,EAAE,KAAK,CAAC,QAAQ;YACxB,KAAK;SACR,CAAC,CAAC,CAAC;QAEJ,MAAM,OAAO,GAAG,UAAU,GAAG,OAAO,CAAC,MAAM,GAAG,KAAK,CAAC;QAEpD,OAAO;YACH,OAAO;YACP,KAAK;YACL,IAAI,EAAE,WAAW;YACjB,QAAQ;YACR,OAAO;SACV,CAAC;IACN,CAAC;IAEO,KAAK,CAAC,QAAQ;QAClB,IAAI,CAAC,IAAI,CAAC,YAAY,EAAE,CAAC;YACrB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC,SAAS,EAAE,CAAC;QACzC,CAAC;QAED,OAAO,IAAI,CAAC,YAAY,CAAC;IAC7B,CAAC;IAEO,KAAK,CAAC,SAAS;QACnB,MAAM,SAAS,GAAG,MAAM,kBAAkB,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,CAAC,CAAC,KAAc,EAAE,EAAE;YAChF,IAAI,gBAAgB,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACrD,MAAM,IAAI,KAAK,CACX,gCAAgC,IAAI,CAAC,SAAS,gDAAgD,CACjG,CAAC;YACN,CAAC;YAED,MAAM,KAAK,CAAC;QAChB,CAAC,CAAC,CAAC;QACH,MAAM,SAAS,GAAG,SAAS,CAAC,SAAS,CAAC;QAEtC,MAAM,MAAM,GAAG,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YAC5C,GAAG,KAAK;YACR,SAAS,EAAE,SAAS,CAAC,eAAe,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE,SAAS,CAAC;SACpE,CAAC,CAAC,CAAC;QAEJ,OAAO;YACH,GAAG,SAAS;YACZ,KAAK,EAAE,SAAS,CAAC,KAAK,IAAI,uBAAuB;YACjD,MAAM;SACT,CAAC;IACN,CAAC;IAEO,KAAK,CAAC,WAAW,CAAC,cAAsB;QAC5C,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YACxB,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,wBAAwB,IAAI,cAAc,IAAI,uBAAuB,CAAC;YAChG,IAAI,CAAC,eAAe,GAAG,QAAQ,CAAC,oBAAoB,EAAE,KAAK,CAAuC,CAAC;QACvG,CAAC;QAED,OAAO,IAAI,CAAC,eAAe,CAAC;IAChC,CAAC;CACJ;AAED,MAAM,CAAC,MAAM,iBAAiB,GAAG,CAAC,YAAoB,UAAU,EAAE,EAAE,CAAC,IAAI,WAAW,CAAC,SAAS,CAAC,CAAC;AAEhG,MAAM,gBAAgB,GAAG,CAAC,KAAc,EAAkC,EAAE;IACxE,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,MAAM,IAAI,KAAK,CAAC;AAC1E,CAAC,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,MAA+B,EAAE,WAAoB,EAA2B,EAAE;IAC7G,IAAI,CAAC,WAAW,EAAE,CAAC;QACf,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,MAAM,UAAU,GAAG,WAAW,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;IAEpD,IAAI,CAAC,UAAU,EAAE,CAAC;QACd,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,MAAM,MAAM,GAAG,GAAG,UAAU,GAAG,CAAC;IAEhC,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,MAAM,CAAC,CAAC,CAAC;AAC9E,CAAC,CAAC;AAEF,MAAM,aAAa,GAAG,CAAC,KAAa,EAAU,EAAE;IAC5C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,CAAC;IACb,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAElC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;QACd,OAAO,CAAC,CAAC;IACb,CAAC;IAED,IAAI,OAAO,GAAG,SAAS,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,OAAO,OAAO,CAAC;AACnB,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,KAAa,EAAU,EAAE;IAC9C,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,CAAC;IACb,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAElC,OAAO,OAAO,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;AACrC,CAAC,CAAC;AAEF,MAAM,eAAe,GAAG,CAAC,QAAgB,EAAE,IAAY,EAAqB,EAAE,CAAC,CAAC;IAC5E,OAAO,EAAE,EAAE;IACX,KAAK,EAAE,CAAC;IACR,IAAI;IACJ,QAAQ;IACR,OAAO,EAAE,KAAK;CACjB,CAAC,CAAC","sourcesContent":["import { pipeline } from '@xenova/transformers';\n\nimport { DEFAULT_EMBEDDING_MODEL, INDEX_PATH } from './constants.js';\nimport { type FeatureExtractionFunction, normalizeVector, tensorToVector } from './embeddings.js';\nimport { loadEmbeddingIndex } from './storage.js';\nimport type { DocumentChunk, EmbeddingIndex, SearchResult, SearchResultsPage } from './types.js';\n\ntype RuntimeEmbeddingChunk = DocumentChunk & {\n embedding: Float32Array;\n};\n\ntype RuntimeEmbeddingIndex = Omit<EmbeddingIndex, 'chunks'> & {\n chunks: RuntimeEmbeddingChunk[];\n};\n\nconst MAX_TOP_K = 100;\n\nconst toFloat32 = (values: number[], dimension: number): Float32Array => {\n if (values.length !== dimension) {\n throw new Error(`Embedding dimension mismatch. Expected ${dimension}, received ${values.length}`);\n }\n\n return Float32Array.from(values);\n};\n\nconst cosineSimilarity = (a: Float32Array, b: Float32Array): number => {\n if (a.length !== b.length) {\n throw new Error('Embedding vectors have different dimensions');\n }\n\n let dot = 0;\n\n for (let index = 0; index < a.length; index += 1) {\n dot += a[index] * b[index];\n }\n\n return dot;\n};\n\nexport class RagSearcher {\n private indexPromise?: Promise<RuntimeEmbeddingIndex>;\n\n private embedderPromise?: Promise<FeatureExtractionFunction>;\n\n private readonly indexPath: string;\n\n constructor(indexPath: string = INDEX_PATH) {\n this.indexPath = indexPath;\n }\n\n async search(query: string, topK: number = 5, packageName?: string, page: number = 1): Promise<SearchResultsPage> {\n const sanitizedQuery = query.trim();\n const pageSize = clampPageSize(topK);\n const currentPage = clampPageNumber(page);\n\n if (!sanitizedQuery) {\n return emptySearchPage(pageSize, currentPage);\n }\n\n const index = await this.getIndex();\n\n if (index.chunks.length === 0) {\n return emptySearchPage(pageSize, currentPage);\n }\n\n const filteredChunks = filterChunksByPackage(index.chunks, packageName);\n\n if (filteredChunks.length === 0) {\n return emptySearchPage(pageSize, currentPage);\n }\n\n const embedder = await this.getEmbedder(index.model);\n const tensor = await embedder(sanitizedQuery, { pooling: 'mean' });\n const queryVector = Float32Array.from(normalizeVector(tensorToVector(tensor)));\n\n const ranked = filteredChunks\n .map((chunk) => ({\n chunk,\n score: cosineSimilarity(queryVector, chunk.embedding),\n }))\n .filter((item) => Number.isFinite(item.score))\n .sort((left, right) => right.score - left.score);\n\n const total = ranked.length;\n\n if (total === 0) {\n return emptySearchPage(pageSize, currentPage);\n }\n\n const startIndex = (currentPage - 1) * pageSize;\n\n if (startIndex >= total) {\n return {\n results: [],\n total,\n page: currentPage,\n pageSize,\n hasMore: false,\n };\n }\n\n const rows = ranked.slice(startIndex, startIndex + pageSize);\n\n const results: SearchResult[] = rows.map(({ chunk, score }) => ({\n id: chunk.id,\n text: chunk.text,\n metadata: chunk.metadata,\n score,\n }));\n\n const hasMore = startIndex + results.length < total;\n\n return {\n results,\n total,\n page: currentPage,\n pageSize,\n hasMore,\n };\n }\n\n private async getIndex(): Promise<RuntimeEmbeddingIndex> {\n if (!this.indexPromise) {\n this.indexPromise = this.loadIndex();\n }\n\n return this.indexPromise;\n }\n\n private async loadIndex(): Promise<RuntimeEmbeddingIndex> {\n const persisted = await loadEmbeddingIndex(this.indexPath).catch((error: unknown) => {\n if (isErrnoException(error) && error.code === 'ENOENT') {\n throw new Error(\n `Embedding index not found at ${this.indexPath}. Run the build index script before searching.`,\n );\n }\n\n throw error;\n });\n const dimension = persisted.dimension;\n\n const chunks = persisted.chunks.map((chunk) => ({\n ...chunk,\n embedding: toFloat32(normalizeVector(chunk.embedding), dimension),\n }));\n\n return {\n ...persisted,\n model: persisted.model ?? DEFAULT_EMBEDDING_MODEL,\n chunks,\n };\n }\n\n private async getEmbedder(modelFromIndex: string): Promise<FeatureExtractionFunction> {\n if (!this.embedderPromise) {\n const model = process.env.DOCS_MCP_EMBEDDING_MODEL ?? modelFromIndex ?? DEFAULT_EMBEDDING_MODEL;\n this.embedderPromise = pipeline('feature-extraction', model) as Promise<FeatureExtractionFunction>;\n }\n\n return this.embedderPromise;\n }\n}\n\nexport const createRagSearcher = (indexPath: string = INDEX_PATH) => new RagSearcher(indexPath);\n\nconst isErrnoException = (error: unknown): error is NodeJS.ErrnoException => {\n return typeof error === 'object' && error !== null && 'code' in error;\n};\n\nconst filterChunksByPackage = (chunks: RuntimeEmbeddingChunk[], packageName?: string): RuntimeEmbeddingChunk[] => {\n if (!packageName) {\n return chunks;\n }\n\n const normalized = packageName.trim().toLowerCase();\n\n if (!normalized) {\n return chunks;\n }\n\n const prefix = `${normalized}/`;\n\n return chunks.filter((chunk) => chunk.metadata.source.startsWith(prefix));\n};\n\nconst clampPageSize = (value: number): number => {\n if (!Number.isFinite(value)) {\n return 5;\n }\n\n const rounded = Math.floor(value);\n\n if (rounded < 1) {\n return 1;\n }\n\n if (rounded > MAX_TOP_K) {\n return MAX_TOP_K;\n }\n\n return rounded;\n};\n\nconst clampPageNumber = (value: number): number => {\n if (!Number.isFinite(value)) {\n return 1;\n }\n\n const rounded = Math.floor(value);\n\n return rounded < 1 ? 1 : rounded;\n};\n\nconst emptySearchPage = (pageSize: number, page: number): SearchResultsPage => ({\n results: [],\n total: 0,\n page,\n pageSize,\n hasMore: false,\n});\n"]}