simple-dynamsoft-mcp 6.3.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.env.example +35 -9
  2. package/README.md +156 -497
  3. package/package.json +13 -7
  4. package/scripts/prebuild-rag-index.mjs +1 -1
  5. package/scripts/run-gemini-tests.mjs +1 -1
  6. package/scripts/sync-submodules.mjs +1 -1
  7. package/scripts/verify-doc-resources.mjs +79 -0
  8. package/src/data/bootstrap.js +475 -0
  9. package/src/data/download-utils.js +99 -0
  10. package/src/data/hydration-mode.js +15 -0
  11. package/src/data/hydration-policy.js +39 -0
  12. package/src/data/repo-map.js +149 -0
  13. package/src/{data-root.js → data/root.js} +1 -1
  14. package/src/{submodule-sync.js → data/submodule-sync.js} +1 -1
  15. package/src/index.js +49 -1499
  16. package/src/observability/logging.js +51 -0
  17. package/src/rag/config.js +96 -0
  18. package/src/rag/index.js +266 -0
  19. package/src/rag/lexical-provider.js +170 -0
  20. package/src/rag/logger.js +46 -0
  21. package/src/rag/profile-config.js +48 -0
  22. package/src/rag/providers.js +585 -0
  23. package/src/rag/search-utils.js +166 -0
  24. package/src/rag/vector-cache.js +323 -0
  25. package/src/server/create-server.js +168 -0
  26. package/src/server/helpers/server-helpers.js +33 -0
  27. package/src/{resource-index → server/resource-index}/paths.js +2 -2
  28. package/src/{resource-index → server/resource-index}/samples.js +9 -1
  29. package/src/{resource-index.js → server/resource-index.js} +158 -93
  30. package/src/server/resources/register-resources.js +56 -0
  31. package/src/server/runtime-config.js +66 -0
  32. package/src/server/tools/register-index-tools.js +130 -0
  33. package/src/server/tools/register-project-tools.js +305 -0
  34. package/src/server/tools/register-quickstart-tools.js +572 -0
  35. package/src/server/tools/register-sample-tools.js +333 -0
  36. package/src/server/tools/register-version-tools.js +136 -0
  37. package/src/server/transports/http.js +84 -0
  38. package/src/server/transports/stdio.js +12 -0
  39. package/src/data-bootstrap.js +0 -255
  40. package/src/rag.js +0 -1203
  41. /package/src/{gemini-retry.js → rag/gemini-retry.js} +0 -0
  42. /package/src/{normalizers.js → server/normalizers.js} +0 -0
  43. /package/src/{resource-index → server/resource-index}/builders.js +0 -0
  44. /package/src/{resource-index → server/resource-index}/config.js +0 -0
  45. /package/src/{resource-index → server/resource-index}/docs-loader.js +0 -0
  46. /package/src/{resource-index → server/resource-index}/uri.js +0 -0
  47. /package/src/{resource-index → server/resource-index}/version-policy.js +0 -0
package/src/rag.js DELETED
@@ -1,1203 +0,0 @@
1
- import { readFileSync, existsSync, writeFileSync, mkdirSync, readdirSync, copyFileSync, rmSync, statSync } from "node:fs";
2
- import { join, dirname, basename, resolve } from "node:path";
3
- import { createHash } from "node:crypto";
4
- import { fileURLToPath } from "node:url";
5
- import { tmpdir } from "node:os";
6
- import "dotenv/config";
7
- import Fuse from "fuse.js";
8
- import * as tar from "tar";
9
- import { getResolvedDataRoot } from "./data-root.js";
10
- import {
11
- resourceIndex,
12
- resourceIndexByUri,
13
- getSampleEntries,
14
- editionMatches,
15
- platformMatches,
16
- normalizeProduct,
17
- normalizePlatform,
18
- normalizeEdition,
19
- getRagSignatureData
20
- } from "./resource-index.js";
21
- import {
22
- sleepMs,
23
- parseRetryAfterMs,
24
- normalizeGeminiRetryConfig,
25
- isRateLimitGeminiStatus,
26
- GeminiHttpError,
27
- executeWithGeminiRetry
28
- } from "./gemini-retry.js";
29
-
30
- const __dirname = dirname(fileURLToPath(import.meta.url));
31
- const dataRoot = getResolvedDataRoot();
32
-
33
- const pkgUrl = new URL("../package.json", import.meta.url);
34
- const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
35
- const defaultPrebuiltIndexUrl =
36
- `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
37
-
38
- // ============================================================================
39
- // RAG configuration
40
- // ============================================================================
41
-
42
- function readEnvValue(key, fallback) {
43
- const value = process.env[key];
44
- if (value === undefined || value === "") return fallback;
45
- return value;
46
- }
47
-
48
- function readBoolEnv(key, fallback) {
49
- const value = readEnvValue(key, "");
50
- if (!value) return fallback;
51
- return ["1", "true", "yes", "on"].includes(String(value).toLowerCase());
52
- }
53
-
54
- function readIntEnv(key, fallback) {
55
- const raw = readEnvValue(key, "");
56
- if (!raw) return fallback;
57
- const value = Number.parseInt(raw, 10);
58
- return Number.isNaN(value) ? fallback : value;
59
- }
60
-
61
- function readFloatEnv(key, fallback) {
62
- const raw = readEnvValue(key, "");
63
- if (!raw) return fallback;
64
- const value = Number.parseFloat(raw);
65
- return Number.isNaN(value) ? fallback : value;
66
- }
67
-
68
- function normalizeGeminiModel(model) {
69
- if (!model) return "models/embedding-001";
70
- if (model.startsWith("models/")) return model;
71
- return `models/${model}`;
72
- }
73
-
74
- const ragConfig = {
75
- provider: readEnvValue("RAG_PROVIDER", "auto").toLowerCase(),
76
- fallback: readEnvValue("RAG_FALLBACK", "fuse").toLowerCase(),
77
- cacheDir: readEnvValue("RAG_CACHE_DIR", join(dataRoot, ".rag-cache")),
78
- modelCacheDir: readEnvValue("RAG_MODEL_CACHE_DIR", join(dataRoot, ".rag-cache", "models")),
79
- localModel: readEnvValue("RAG_LOCAL_MODEL", "Xenova/all-MiniLM-L6-v2"),
80
- localQuantized: readBoolEnv("RAG_LOCAL_QUANTIZED", true),
81
- chunkSize: readIntEnv("RAG_CHUNK_SIZE", 1200),
82
- chunkOverlap: readIntEnv("RAG_CHUNK_OVERLAP", 200),
83
- maxChunksPerDoc: readIntEnv("RAG_MAX_CHUNKS_PER_DOC", 6),
84
- maxTextChars: readIntEnv("RAG_MAX_TEXT_CHARS", 4000),
85
- minScore: readFloatEnv("RAG_MIN_SCORE", 0.2),
86
- includeScore: readBoolEnv("RAG_INCLUDE_SCORE", false),
87
- rebuild: readBoolEnv("RAG_REBUILD", false),
88
- prewarm: readBoolEnv("RAG_PREWARM", false),
89
- prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
90
- prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
91
- prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", defaultPrebuiltIndexUrl),
92
- prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
93
- geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
94
- geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
95
- geminiBaseUrl: readEnvValue("GEMINI_API_BASE_URL", "https://generativelanguage.googleapis.com"),
96
- geminiBatchSize: readIntEnv("GEMINI_EMBED_BATCH_SIZE", 16),
97
- geminiRetryMaxAttempts: readIntEnv("GEMINI_RETRY_MAX_ATTEMPTS", 5),
98
- geminiRetryBaseDelayMs: readIntEnv("GEMINI_RETRY_BASE_DELAY_MS", 500),
99
- geminiRetryMaxDelayMs: readIntEnv("GEMINI_RETRY_MAX_DELAY_MS", 10000),
100
- geminiRequestThrottleMs: readIntEnv("GEMINI_REQUEST_THROTTLE_MS", 0)
101
- };
102
-
103
- const ragLogState = {
104
- config: false,
105
- providerChain: false,
106
- localEmbedderInit: false,
107
- providerReady: new Set(),
108
- providerFirstUse: new Set(),
109
- fallbackUse: new Set()
110
- };
111
-
112
- const prebuiltDownloadAttempts = new Map();
113
-
114
- function logRag(message) {
115
- console.error(`[rag] ${message}`);
116
- }
117
-
118
- // ============================================================================
119
- // RAG search implementation
120
- // ============================================================================
121
-
122
- const fuseSearch = new Fuse(resourceIndex, {
123
- keys: ["title", "summary", "tags", "uri"],
124
- threshold: 0.35,
125
- ignoreLocation: true,
126
- includeScore: true
127
- });
128
-
129
- function attachScore(entry, score) {
130
- if (!ragConfig.includeScore || !Number.isFinite(score)) return entry;
131
- return { ...entry, score };
132
- }
133
-
134
- function normalizeSearchFilters({ product, edition, platform, type }) {
135
- const normalizedProduct = normalizeProduct(product);
136
- const normalizedPlatform = normalizePlatform(platform);
137
- const normalizedEdition = normalizeEdition(edition, normalizedPlatform, normalizedProduct);
138
- return {
139
- product: normalizedProduct,
140
- edition: normalizedEdition,
141
- platform: normalizedPlatform,
142
- type: type || "any"
143
- };
144
- }
145
-
146
- function entryMatchesScope(entry, filters) {
147
- if (filters.product && entry.product !== filters.product) return false;
148
- if (filters.edition && !editionMatches(filters.edition, entry.edition)) return false;
149
- if (filters.platform && !platformMatches(filters.platform, entry)) return false;
150
- if (filters.type && filters.type !== "any" && entry.type !== filters.type) return false;
151
- return true;
152
- }
153
-
154
- function normalizeText(text) {
155
- return String(text || "").replace(/\s+/g, " ").trim();
156
- }
157
-
158
- function truncateText(text, maxChars) {
159
- if (!maxChars || maxChars <= 0) return text;
160
- if (text.length <= maxChars) return text;
161
- return text.slice(0, Math.max(0, maxChars));
162
- }
163
-
164
- function chunkText(text, chunkSize, chunkOverlap, maxChunks) {
165
- const cleaned = normalizeText(text);
166
- if (!cleaned) return [];
167
- if (!chunkSize || chunkSize <= 0) return [cleaned];
168
- const overlap = Math.min(Math.max(0, chunkOverlap), Math.max(0, chunkSize - 1));
169
- const chunks = [];
170
- let start = 0;
171
- while (start < cleaned.length) {
172
- const end = Math.min(start + chunkSize, cleaned.length);
173
- const chunk = cleaned.slice(start, end).trim();
174
- if (chunk) chunks.push(chunk);
175
- if (end >= cleaned.length) break;
176
- start = Math.max(0, end - overlap);
177
- if (maxChunks && chunks.length >= maxChunks) break;
178
- }
179
- return chunks;
180
- }
181
-
182
- function buildEntryBaseText(entry) {
183
- const parts = [entry.title, entry.summary];
184
- if (Array.isArray(entry.tags) && entry.tags.length > 0) {
185
- parts.push(entry.tags.join(", "));
186
- }
187
- return normalizeText(parts.filter(Boolean).join("\n"));
188
- }
189
-
190
- function buildEmbeddingItems() {
191
- const items = [];
192
- for (const entry of resourceIndex) {
193
- const baseText = buildEntryBaseText(entry);
194
- if (!baseText) continue;
195
- if (entry.type === "doc" && entry.embedText) {
196
- const chunks = chunkText(entry.embedText, ragConfig.chunkSize, ragConfig.chunkOverlap, ragConfig.maxChunksPerDoc);
197
- if (chunks.length === 0) {
198
- items.push({
199
- id: entry.id,
200
- uri: entry.uri,
201
- text: truncateText(baseText, ragConfig.maxTextChars)
202
- });
203
- continue;
204
- }
205
- chunks.forEach((chunk, index) => {
206
- const combined = [baseText, chunk].filter(Boolean).join("\n\n");
207
- items.push({
208
- id: `${entry.id}#${index}`,
209
- uri: entry.uri,
210
- text: truncateText(combined, ragConfig.maxTextChars)
211
- });
212
- });
213
- continue;
214
- }
215
- items.push({
216
- id: entry.id,
217
- uri: entry.uri,
218
- text: truncateText(baseText, ragConfig.maxTextChars)
219
- });
220
- }
221
- return items;
222
- }
223
-
224
- function buildIndexSignature() {
225
- const signatureData = getRagSignatureData();
226
- return JSON.stringify({
227
- packageVersion: pkg.version,
228
- resourceCount: signatureData.resourceCount,
229
- dcvCoreDocCount: signatureData.dcvCoreDocCount,
230
- dcvWebDocCount: signatureData.dcvWebDocCount,
231
- dcvMobileDocCount: signatureData.dcvMobileDocCount,
232
- dcvServerDocCount: signatureData.dcvServerDocCount,
233
- dbrWebDocCount: signatureData.dbrWebDocCount,
234
- dbrMobileDocCount: signatureData.dbrMobileDocCount,
235
- dbrServerDocCount: signatureData.dbrServerDocCount,
236
- dwtDocCount: signatureData.dwtDocCount,
237
- ddvDocCount: signatureData.ddvDocCount,
238
- versions: signatureData.versions,
239
- dataSources: signatureData.dataSources,
240
- chunkSize: ragConfig.chunkSize,
241
- chunkOverlap: ragConfig.chunkOverlap,
242
- maxChunksPerDoc: ragConfig.maxChunksPerDoc,
243
- maxTextChars: ragConfig.maxTextChars
244
- });
245
- }
246
-
247
- function ensureDirectory(path) {
248
- if (!existsSync(path)) {
249
- mkdirSync(path, { recursive: true });
250
- }
251
- }
252
-
253
- function makeCacheFileName(provider, model, cacheKey) {
254
- const safeModel = String(model || "default").replace(/[^a-zA-Z0-9._-]+/g, "_").slice(0, 32);
255
- return `rag-${provider}-${safeModel}-${cacheKey.slice(0, 12)}.json`;
256
- }
257
-
258
- function makeCheckpointFileName(provider, model, cacheKey) {
259
- const safeModel = String(model || "default").replace(/[^a-zA-Z0-9._-]+/g, "_").slice(0, 32);
260
- return `rag-${provider}-${safeModel}-${cacheKey.slice(0, 12)}.checkpoint.json`;
261
- }
262
-
263
- function loadVectorIndexCache(
264
- cacheFile,
265
- { cacheKey, signature, provider, model, requireSignature = false } = {}
266
- ) {
267
- if (!existsSync(cacheFile)) {
268
- return { hit: false, reason: "missing", payload: null };
269
- }
270
- try {
271
- const parsed = JSON.parse(readFileSync(cacheFile, "utf8"));
272
- if (!parsed || (cacheKey && parsed.cacheKey !== cacheKey)) {
273
- return { hit: false, reason: "cache_key_mismatch", payload: null };
274
- }
275
- if (!Array.isArray(parsed.items) || !Array.isArray(parsed.vectors)) {
276
- return { hit: false, reason: "invalid_payload", payload: null };
277
- }
278
- const meta = parsed.meta || {};
279
- if (provider && meta.provider && meta.provider !== provider) {
280
- return { hit: false, reason: "provider_mismatch", payload: null };
281
- }
282
- if (model && meta.model && meta.model !== model) {
283
- return { hit: false, reason: "model_mismatch", payload: null };
284
- }
285
- if (signature) {
286
- if (!meta.signature) {
287
- if (requireSignature) {
288
- return { hit: false, reason: "missing_signature", payload: null };
289
- }
290
- } else if (meta.signature !== signature) {
291
- return { hit: false, reason: "signature_mismatch", payload: null };
292
- }
293
- }
294
- return { hit: true, reason: "ok", payload: parsed };
295
- } catch {
296
- return { hit: false, reason: "parse_error", payload: null };
297
- }
298
- }
299
-
300
- function listFilesRecursive(rootDir) {
301
- const files = [];
302
- const stack = [rootDir];
303
- while (stack.length > 0) {
304
- const current = stack.pop();
305
- const entries = readdirSync(current, { withFileTypes: true });
306
- for (const entry of entries) {
307
- const fullPath = join(current, entry.name);
308
- if (entry.isDirectory()) {
309
- stack.push(fullPath);
310
- } else if (entry.isFile()) {
311
- files.push(fullPath);
312
- }
313
- }
314
- }
315
- return files;
316
- }
317
-
318
- function readSignaturePackageVersion(signatureRaw) {
319
- if (!signatureRaw) return "";
320
- try {
321
- const parsed = JSON.parse(signatureRaw);
322
- return String(parsed?.packageVersion || "");
323
- } catch {
324
- return "";
325
- }
326
- }
327
-
328
- function listDownloadedCacheCandidates(extractRoot, expectedCacheFileName, cacheKey) {
329
- const allFiles = listFilesRecursive(extractRoot).filter((path) => path.toLowerCase().endsWith(".json")).sort();
330
- const expectedPath = allFiles.find((path) => basename(path) === expectedCacheFileName);
331
-
332
- const cachePrefix = cacheKey.slice(0, 12);
333
- const prefixPath = allFiles.find((path) => {
334
- const name = basename(path);
335
- return name.startsWith("rag-local-") && name.endsWith(`-${cachePrefix}.json`);
336
- });
337
-
338
- const ragLocalFiles = allFiles.filter((path) => basename(path).startsWith("rag-local-"));
339
- const unique = [];
340
- for (const path of [expectedPath, prefixPath, ...ragLocalFiles]) {
341
- if (!path) continue;
342
- if (!unique.includes(path)) unique.push(path);
343
- }
344
- return unique;
345
- }
346
-
347
- async function downloadPrebuiltArchive(url, outputPath, timeoutMs) {
348
- const source = String(url || "").trim();
349
- if (!source) {
350
- throw new Error("prebuilt URL is empty");
351
- }
352
-
353
- if (source.startsWith("file://")) {
354
- copyFileSync(fileURLToPath(source), outputPath);
355
- return { sourceType: "file", size: statSync(outputPath).size };
356
- }
357
-
358
- if (!/^https?:\/\//i.test(source)) {
359
- copyFileSync(resolve(source), outputPath);
360
- return { sourceType: "file", size: statSync(outputPath).size };
361
- }
362
-
363
- const controller = new AbortController();
364
- const timer = setTimeout(() => controller.abort(), Math.max(1000, timeoutMs));
365
- try {
366
- const response = await fetch(source, { signal: controller.signal });
367
- if (!response.ok) {
368
- throw new Error(`HTTP ${response.status}`);
369
- }
370
- const arrayBuffer = await response.arrayBuffer();
371
- writeFileSync(outputPath, Buffer.from(arrayBuffer));
372
- return { sourceType: "http", size: arrayBuffer.byteLength };
373
- } finally {
374
- clearTimeout(timer);
375
- }
376
- }
377
-
378
- async function maybeDownloadPrebuiltVectorIndex({ provider, model, cacheKey, signature, cacheFile }) {
379
- if (provider !== "local") {
380
- return { downloaded: false, reason: "provider_not_local" };
381
- }
382
- if (!ragConfig.prebuiltIndexAutoDownload) {
383
- return { downloaded: false, reason: "auto_download_disabled" };
384
- }
385
-
386
- const sourceUrl = String(ragConfig.prebuiltIndexUrl || "").trim();
387
- if (!sourceUrl) {
388
- return { downloaded: false, reason: "url_not_set" };
389
- }
390
-
391
- const attemptKey = `${provider}:${cacheKey}:${sourceUrl}`;
392
- if (prebuiltDownloadAttempts.has(attemptKey)) {
393
- return prebuiltDownloadAttempts.get(attemptKey);
394
- }
395
-
396
- const expectedCacheFileName = makeCacheFileName(provider, model, cacheKey);
397
- const attempt = (async () => {
398
- const tempRoot = join(tmpdir(), `simple-dynamsoft-mcp-rag-prebuilt-${Date.now()}-${Math.random().toString(16).slice(2)}`);
399
- const archivePath = join(tempRoot, "prebuilt-rag-index.tar.gz");
400
- const extractRoot = join(tempRoot, "extract");
401
-
402
- ensureDirectory(extractRoot);
403
- try {
404
- logRag(`prebuilt index download start provider=${provider} url=${sourceUrl} timeout_ms=${ragConfig.prebuiltIndexTimeoutMs}`);
405
- const downloaded = await downloadPrebuiltArchive(sourceUrl, archivePath, ragConfig.prebuiltIndexTimeoutMs);
406
- logRag(
407
- `prebuilt index downloaded provider=${provider} source=${downloaded.sourceType} size=${downloaded.size}B`
408
- );
409
-
410
- await tar.x({
411
- file: archivePath,
412
- cwd: extractRoot,
413
- strict: true
414
- });
415
-
416
- const candidateFiles = listDownloadedCacheCandidates(extractRoot, expectedCacheFileName, cacheKey);
417
- if (candidateFiles.length === 0) {
418
- throw new Error(`cache_file_not_found expected=${expectedCacheFileName}`);
419
- }
420
-
421
- for (const sourceCacheFile of candidateFiles) {
422
- const candidateCache = loadVectorIndexCache(sourceCacheFile, {
423
- provider,
424
- model
425
- });
426
- if (!candidateCache.hit) {
427
- continue;
428
- }
429
-
430
- const cachePackageVersion = readSignaturePackageVersion(candidateCache.payload?.meta?.signature);
431
- if (!cachePackageVersion || cachePackageVersion !== pkg.version) {
432
- continue;
433
- }
434
-
435
- const migratedPayload = {
436
- ...candidateCache.payload,
437
- cacheKey,
438
- meta: {
439
- ...(candidateCache.payload.meta || {}),
440
- provider,
441
- model,
442
- signature
443
- }
444
- };
445
- saveVectorIndexCache(cacheFile, migratedPayload);
446
- logRag(
447
- `prebuilt index installed provider=${provider} cache_file=${cacheFile} source=${basename(sourceCacheFile)} mode=version_only_compat version=${cachePackageVersion}`
448
- );
449
- return { downloaded: true, reason: "installed_version_only_compat" };
450
- }
451
-
452
- throw new Error(
453
- `no_compatible_cache expected=${expectedCacheFileName} found=${candidateFiles.map((path) => basename(path)).join(",")}`
454
- );
455
- } catch (error) {
456
- logRag(`prebuilt index unavailable provider=${provider} reason=${error.message}`);
457
- return { downloaded: false, reason: error.message };
458
- } finally {
459
- rmSync(tempRoot, { recursive: true, force: true });
460
- }
461
- })();
462
-
463
- prebuiltDownloadAttempts.set(attemptKey, attempt);
464
- return attempt;
465
- }
466
-
467
- function saveVectorIndexCache(cacheFile, payload) {
468
- ensureDirectory(ragConfig.cacheDir);
469
- writeFileSync(cacheFile, JSON.stringify(payload));
470
- }
471
-
472
- function loadVectorIndexCheckpoint(checkpointFile, expectedKey, expectedItems) {
473
- if (!existsSync(checkpointFile)) {
474
- return { hit: false, reason: "missing", payload: null };
475
- }
476
- try {
477
- const parsed = JSON.parse(readFileSync(checkpointFile, "utf8"));
478
- if (!parsed || parsed.cacheKey !== expectedKey) {
479
- return { hit: false, reason: "cache_key_mismatch", payload: null };
480
- }
481
- if (!Array.isArray(parsed.items) || !Array.isArray(parsed.vectors)) {
482
- return { hit: false, reason: "invalid_payload", payload: null };
483
- }
484
- if (parsed.items.length !== expectedItems.length) {
485
- return { hit: false, reason: "items_length_mismatch", payload: null };
486
- }
487
- for (let i = 0; i < expectedItems.length; i += 1) {
488
- if (parsed.items[i]?.id !== expectedItems[i]?.id || parsed.items[i]?.uri !== expectedItems[i]?.uri) {
489
- return { hit: false, reason: "items_mismatch", payload: null };
490
- }
491
- }
492
- if (parsed.vectors.length > expectedItems.length) {
493
- return { hit: false, reason: "vectors_overflow", payload: null };
494
- }
495
- return { hit: true, reason: "ok", payload: parsed };
496
- } catch {
497
- return { hit: false, reason: "parse_error", payload: null };
498
- }
499
- }
500
-
501
- function saveVectorIndexCheckpoint(checkpointFile, payload) {
502
- ensureDirectory(ragConfig.cacheDir);
503
- writeFileSync(checkpointFile, JSON.stringify(payload));
504
- }
505
-
506
- function clearVectorIndexCheckpoint(checkpointFile) {
507
- if (existsSync(checkpointFile)) {
508
- rmSync(checkpointFile, { force: true });
509
- }
510
- }
511
-
512
- function normalizeVector(vector) {
513
- let sum = 0;
514
- for (const value of vector) {
515
- sum += value * value;
516
- }
517
- const norm = Math.sqrt(sum);
518
- if (!norm) return vector.map(() => 0);
519
- return vector.map((value) => value / norm);
520
- }
521
-
522
- function dotProduct(a, b) {
523
- const len = Math.min(a.length, b.length);
524
- let sum = 0;
525
- for (let i = 0; i < len; i++) {
526
- sum += a[i] * b[i];
527
- }
528
- return sum;
529
- }
530
-
531
- function isRateLimitError(error) {
532
- if (error?.rateLimited) return true;
533
- const status = Number(error?.status);
534
- return isRateLimitGeminiStatus(status);
535
- }
536
-
537
- async function embedTextsWithProgress(
538
- texts,
539
- embedder,
540
- batchSize = 1,
541
- {
542
- offset = 0,
543
- total = texts.length,
544
- onChunk = null,
545
- providerName = ""
546
- } = {}
547
- ) {
548
- const results = [];
549
- const normalizedBatchSize = Math.max(1, batchSize);
550
- let completed = offset;
551
- let currentBatchSize = normalizedBatchSize;
552
- let rateLimitFailures = 0;
553
- let batchDowngrades = 0;
554
- let singleFallbackBatches = 0;
555
-
556
- const reportChunk = async (vectors, mode, sourceBatchSize) => {
557
- if (!Array.isArray(vectors) || vectors.length === 0) return;
558
- completed += vectors.length;
559
- if (onChunk) {
560
- await onChunk({
561
- vectors,
562
- mode,
563
- sourceBatchSize,
564
- completed,
565
- total
566
- });
567
- }
568
- };
569
-
570
- if (embedder.embedBatch && normalizedBatchSize > 1) {
571
- let index = 0;
572
- while (index < texts.length) {
573
- const batch = texts.slice(index, index + currentBatchSize);
574
- try {
575
- const vectors = await embedder.embedBatch(batch);
576
- if (!Array.isArray(vectors) || vectors.length !== batch.length) {
577
- throw new Error(`Gemini batch response size mismatch expected=${batch.length} actual=${vectors?.length || 0}`);
578
- }
579
- results.push(...vectors);
580
- index += batch.length;
581
- rateLimitFailures = 0;
582
- await reportChunk(vectors, "batch", batch.length);
583
- } catch (error) {
584
- if (isRateLimitError(error)) {
585
- rateLimitFailures += 1;
586
- const nextBatchSize = Math.max(1, Math.floor(currentBatchSize / 2));
587
- if (nextBatchSize < currentBatchSize) {
588
- batchDowngrades += 1;
589
- logRag(
590
- `gemini batch downgrade provider=${providerName || "unknown"} from=${currentBatchSize} to=${nextBatchSize} ` +
591
- `rate_limit_failures=${rateLimitFailures}`
592
- );
593
- currentBatchSize = nextBatchSize;
594
- continue;
595
- }
596
- }
597
-
598
- singleFallbackBatches += 1;
599
- logRag(
600
- `batch embedding fallback provider=${providerName || "unknown"} batch_size=${batch.length} reason=${error.message}`
601
- );
602
- for (const text of batch) {
603
- const vector = await embedder.embed(text);
604
- results.push(vector);
605
- await reportChunk([vector], "single_fallback", 1);
606
- }
607
- index += batch.length;
608
- rateLimitFailures = 0;
609
- }
610
- }
611
-
612
- return {
613
- vectors: results,
614
- stats: {
615
- batchDowngrades,
616
- singleFallbackBatches,
617
- finalBatchSize: currentBatchSize
618
- }
619
- };
620
- }
621
-
622
- for (const text of texts) {
623
- const vector = await embedder.embed(text);
624
- results.push(vector);
625
- await reportChunk([vector], "single", 1);
626
- }
627
-
628
- return {
629
- vectors: results,
630
- stats: {
631
- batchDowngrades,
632
- singleFallbackBatches,
633
- finalBatchSize: 1
634
- }
635
- };
636
- }
637
-
638
- let localEmbedderPromise = null;
639
- async function getLocalEmbedder() {
640
- if (localEmbedderPromise) return localEmbedderPromise;
641
- localEmbedderPromise = (async () => {
642
- const { pipeline, env } = await import("@xenova/transformers");
643
- ensureDirectory(ragConfig.modelCacheDir);
644
- if (!ragLogState.localEmbedderInit) {
645
- ragLogState.localEmbedderInit = true;
646
- logRag(
647
- `init local embedder model=${ragConfig.localModel} quantized=${ragConfig.localQuantized} model_cache_dir=${ragConfig.modelCacheDir}`
648
- );
649
- }
650
- env.cacheDir = ragConfig.modelCacheDir;
651
- env.allowLocalModels = true;
652
- const extractor = await pipeline("feature-extraction", ragConfig.localModel, {
653
- quantized: ragConfig.localQuantized
654
- });
655
- return {
656
- embed: async (text) => {
657
- const output = await extractor(text, { pooling: "mean", normalize: true });
658
- return Array.from(output.data);
659
- }
660
- };
661
- })();
662
- return localEmbedderPromise;
663
- }
664
-
665
- let geminiEmbedderPromise = null;
666
- async function getGeminiEmbedder() {
667
- if (!ragConfig.geminiApiKey) {
668
- throw new Error("GEMINI_API_KEY is required for gemini embeddings.");
669
- }
670
- if (geminiEmbedderPromise) return geminiEmbedderPromise;
671
- const retryConfig = normalizeGeminiRetryConfig({
672
- maxAttempts: ragConfig.geminiRetryMaxAttempts,
673
- baseDelayMs: ragConfig.geminiRetryBaseDelayMs,
674
- maxDelayMs: ragConfig.geminiRetryMaxDelayMs,
675
- requestThrottleMs: ragConfig.geminiRequestThrottleMs
676
- });
677
-
678
- geminiEmbedderPromise = Promise.resolve((() => {
679
- const metrics = {
680
- requests: 0,
681
- retries: 0,
682
- retryDelayMs: 0,
683
- throttleEvents: 0,
684
- throttleDelayMs: 0,
685
- rateLimitRetries: 0
686
- };
687
-
688
- let nextAllowedAt = 0;
689
-
690
- const throttleRequest = async (operation) => {
691
- if (retryConfig.requestThrottleMs <= 0) return;
692
- const now = Date.now();
693
- const waitMs = Math.max(0, nextAllowedAt - now);
694
- if (waitMs > 0) {
695
- metrics.throttleEvents += 1;
696
- metrics.throttleDelayMs += waitMs;
697
- logRag(`gemini throttle op=${operation} wait_ms=${waitMs}`);
698
- await sleepMs(waitMs);
699
- }
700
- nextAllowedAt = Date.now() + retryConfig.requestThrottleMs;
701
- };
702
-
703
- const requestJson = async (operation, endpoint, body) => executeWithGeminiRetry({
704
- operation,
705
- retryConfig,
706
- logger: (message) => logRag(message),
707
- onRetry: ({ delayMs, rateLimited }) => {
708
- metrics.retries += 1;
709
- metrics.retryDelayMs += delayMs;
710
- if (rateLimited) {
711
- metrics.rateLimitRetries += 1;
712
- }
713
- },
714
- requestFn: async () => {
715
- await throttleRequest(operation);
716
- metrics.requests += 1;
717
- const response = await fetch(
718
- `${ragConfig.geminiBaseUrl}/v1beta/${endpoint}?key=${ragConfig.geminiApiKey}`,
719
- {
720
- method: "POST",
721
- headers: { "Content-Type": "application/json" },
722
- body: JSON.stringify(body)
723
- }
724
- );
725
- if (!response.ok) {
726
- const detail = await response.text();
727
- throw new GeminiHttpError(`Gemini ${operation} failed (${response.status}): ${detail}`, {
728
- status: response.status,
729
- detail,
730
- retryAfterMs: parseRetryAfterMs(response.headers.get("retry-after"))
731
- });
732
- }
733
- return response.json();
734
- }
735
- });
736
-
737
- return {
738
- embed: async (text) => {
739
- const payload = await requestJson(
740
- "embedContent",
741
- `${ragConfig.geminiModel}:embedContent`,
742
- {
743
- content: {
744
- parts: [{ text }]
745
- }
746
- }
747
- );
748
- const embedding = payload.embedding?.values || payload.embedding || payload.embeddings?.[0]?.values;
749
- if (!embedding) {
750
- throw new Error("Gemini embedding response missing embedding values.");
751
- }
752
- return embedding;
753
- },
754
- embedBatch: async (texts) => {
755
- const payload = await requestJson(
756
- "batchEmbedContents",
757
- `${ragConfig.geminiModel}:batchEmbedContents`,
758
- {
759
- requests: texts.map((text) => ({
760
- model: ragConfig.geminiModel,
761
- content: {
762
- parts: [{ text }]
763
- }
764
- }))
765
- }
766
- );
767
- const embeddings = payload.embeddings || payload.responses;
768
- if (!Array.isArray(embeddings)) {
769
- throw new Error("Gemini batch response missing embeddings.");
770
- }
771
- return embeddings.map((item) => item.values || item.embedding?.values || item.embedding);
772
- },
773
- getMetrics: () => ({ ...metrics }),
774
- resetMetrics: () => {
775
- metrics.requests = 0;
776
- metrics.retries = 0;
777
- metrics.retryDelayMs = 0;
778
- metrics.throttleEvents = 0;
779
- metrics.throttleDelayMs = 0;
780
- metrics.rateLimitRetries = 0;
781
- }
782
- };
783
- })());
784
- return geminiEmbedderPromise;
785
- }
786
-
787
- async function createVectorProvider({ name, model, embedder, batchSize }) {
788
- const signature = buildIndexSignature();
789
- const cacheMeta = {
790
- provider: name,
791
- model,
792
- signature
793
- };
794
- const cacheKey = createHash("sha256").update(JSON.stringify(cacheMeta)).digest("hex");
795
- const cacheFile = join(ragConfig.cacheDir, makeCacheFileName(name, model, cacheKey));
796
- const checkpointFile = join(ragConfig.cacheDir, makeCheckpointFileName(name, model, cacheKey));
797
- const expectedCacheState = {
798
- cacheKey,
799
- signature,
800
- provider: name,
801
- model
802
- };
803
- logRag(
804
- `provider=${name} cache_file=${cacheFile} rebuild=${ragConfig.rebuild} cache_key=${cacheKey.slice(0, 12)}`
805
- );
806
-
807
- let indexPromise = null;
808
- const loadIndex = async () => {
809
- if (indexPromise) return indexPromise;
810
- indexPromise = (async () => {
811
- if (!ragConfig.rebuild) {
812
- let cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
813
- if (cacheState.hit) {
814
- const cached = cacheState.payload;
815
- logRag(
816
- `cache hit provider=${name} file=${cacheFile} items=${cached.items.length} vectors=${cached.vectors.length}`
817
- );
818
- return {
819
- items: cached.items,
820
- vectors: cached.vectors
821
- };
822
- }
823
- logRag(`cache miss provider=${name} file=${cacheFile} reason=${cacheState.reason}`);
824
-
825
- if (name === "local") {
826
- const downloadResult = await maybeDownloadPrebuiltVectorIndex({
827
- provider: name,
828
- model,
829
- cacheKey,
830
- signature,
831
- cacheFile
832
- });
833
- if (downloadResult.downloaded) {
834
- cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
835
- if (cacheState.hit) {
836
- const cached = cacheState.payload;
837
- logRag(
838
- `cache hit provider=${name} file=${cacheFile} source=prebuilt_download items=${cached.items.length} vectors=${cached.vectors.length}`
839
- );
840
- return {
841
- items: cached.items,
842
- vectors: cached.vectors
843
- };
844
- }
845
- logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
846
- }
847
- }
848
- } else {
849
- logRag(`cache bypass provider=${name} file=${cacheFile} reason=rebuild_true`);
850
- clearVectorIndexCheckpoint(checkpointFile);
851
- }
852
-
853
- const items = buildEmbeddingItems();
854
- const texts = items.map((item) => item.text);
855
- const indexedItems = items.map((item) => ({ id: item.id, uri: item.uri }));
856
- let normalized = [];
857
- let resumeFrom = 0;
858
- if (!ragConfig.rebuild) {
859
- const checkpointState = loadVectorIndexCheckpoint(checkpointFile, cacheKey, indexedItems);
860
- if (checkpointState.hit) {
861
- normalized = checkpointState.payload.vectors;
862
- resumeFrom = normalized.length;
863
- logRag(
864
- `checkpoint resume provider=${name} file=${checkpointFile} completed=${resumeFrom}/${texts.length}`
865
- );
866
- } else if (checkpointState.reason !== "missing") {
867
- logRag(`checkpoint ignored provider=${name} file=${checkpointFile} reason=${checkpointState.reason}`);
868
- }
869
- }
870
-
871
- if (name === "gemini" && embedder.resetMetrics) {
872
- embedder.resetMetrics();
873
- }
874
-
875
- const checkpointIntervalMs = 5000;
876
- let lastCheckpointAt = 0;
877
- const persistCheckpoint = (force = false) => {
878
- const now = Date.now();
879
- if (!force && now - lastCheckpointAt < checkpointIntervalMs) return;
880
- const payload = {
881
- cacheKey,
882
- meta: cacheMeta,
883
- items: indexedItems,
884
- vectors: normalized,
885
- completed: normalized.length,
886
- total: texts.length,
887
- updatedAt: new Date().toISOString()
888
- };
889
- saveVectorIndexCheckpoint(checkpointFile, payload);
890
- lastCheckpointAt = now;
891
- };
892
-
893
- if (resumeFrom < texts.length) {
894
- logRag(
895
- `building index provider=${name} embed_items=${texts.length} remaining=${texts.length - resumeFrom} batch_size=${batchSize}`
896
- );
897
- try {
898
- const embeddingResult = await embedTextsWithProgress(
899
- texts.slice(resumeFrom),
900
- embedder,
901
- batchSize,
902
- {
903
- offset: resumeFrom,
904
- total: texts.length,
905
- providerName: name,
906
- onChunk: ({ vectors, completed, total }) => {
907
- normalized.push(...vectors.map(normalizeVector));
908
- persistCheckpoint(completed >= total);
909
- }
910
- }
911
- );
912
-
913
- if (name === "gemini") {
914
- const metrics = embedder.getMetrics ? embedder.getMetrics() : {};
915
- logRag(
916
- `gemini build metrics provider=${name} requests=${metrics.requests || 0} retries=${metrics.retries || 0} ` +
917
- `retry_delay_ms=${metrics.retryDelayMs || 0} throttle_events=${metrics.throttleEvents || 0} ` +
918
- `throttle_delay_ms=${metrics.throttleDelayMs || 0} rate_limit_retries=${metrics.rateLimitRetries || 0} ` +
919
- `batch_downgrades=${embeddingResult.stats.batchDowngrades} single_fallback_batches=${embeddingResult.stats.singleFallbackBatches} ` +
920
- `final_batch_size=${embeddingResult.stats.finalBatchSize}`
921
- );
922
- }
923
- } catch (error) {
924
- persistCheckpoint(true);
925
- if (name === "gemini") {
926
- const metrics = embedder.getMetrics ? embedder.getMetrics() : {};
927
- logRag(
928
- `gemini build failed provider=${name} requests=${metrics.requests || 0} retries=${metrics.retries || 0} ` +
929
- `retry_delay_ms=${metrics.retryDelayMs || 0} throttle_events=${metrics.throttleEvents || 0} ` +
930
- `throttle_delay_ms=${metrics.throttleDelayMs || 0} rate_limit_retries=${metrics.rateLimitRetries || 0} ` +
931
- `checkpoint_completed=${normalized.length}/${texts.length} error=${error.message}`
932
- );
933
- }
934
- throw error;
935
- }
936
- } else {
937
- logRag(`checkpoint already complete provider=${name} completed=${resumeFrom}/${texts.length}`);
938
- }
939
-
940
- const payload = {
941
- cacheKey,
942
- meta: cacheMeta,
943
- items: indexedItems,
944
- vectors: normalized
945
- };
946
- saveVectorIndexCache(cacheFile, payload);
947
- clearVectorIndexCheckpoint(checkpointFile);
948
- logRag(`cache saved provider=${name} file=${cacheFile} items=${payload.items.length} vectors=${payload.vectors.length}`);
949
- return {
950
- items: payload.items,
951
- vectors: payload.vectors
952
- };
953
- })();
954
- return indexPromise;
955
- };
956
-
957
- return {
958
- name,
959
- search: async (query, filters, limit) => {
960
- const prepared = truncateText(normalizeText(query), ragConfig.maxTextChars);
961
- if (!prepared) return [];
962
- const index = await loadIndex();
963
- const queryVector = normalizeVector(await embedder.embed(prepared));
964
- const bestByUri = new Map();
965
-
966
- for (let i = 0; i < index.vectors.length; i++) {
967
- const score = dotProduct(queryVector, index.vectors[i]);
968
- if (ragConfig.minScore && score < ragConfig.minScore) continue;
969
- const item = index.items[i];
970
- const entry = resourceIndexByUri.get(item.uri);
971
- if (!entry || !entryMatchesScope(entry, filters)) continue;
972
- const existing = bestByUri.get(item.uri);
973
- if (!existing || score > existing.score) {
974
- bestByUri.set(item.uri, { entry, score });
975
- }
976
- }
977
-
978
- const results = Array.from(bestByUri.values())
979
- .sort((a, b) => b.score - a.score)
980
- .map((item) => attachScore(item.entry, item.score));
981
-
982
- if (limit) return results.slice(0, limit);
983
- return results;
984
- },
985
- warm: async () => {
986
- await loadIndex();
987
- }
988
- };
989
- }
990
-
991
- function createFuseProvider() {
992
- return {
993
- name: "fuse",
994
- search: async (query, filters, limit) => {
995
- const results = [];
996
- for (const result of fuseSearch.search(query)) {
997
- const entry = result.item;
998
- if (!entryMatchesScope(entry, filters)) continue;
999
- const score = Number.isFinite(result.score) ? Math.max(0, 1 - result.score) : undefined;
1000
- results.push(attachScore(entry, score));
1001
- }
1002
- if (limit) return results.slice(0, limit);
1003
- return results;
1004
- },
1005
- warm: async () => {}
1006
- };
1007
- }
1008
-
1009
- function resolveProviderChain() {
1010
- let primary = ragConfig.provider;
1011
- if (primary === "auto") {
1012
- primary = ragConfig.geminiApiKey ? "gemini" : "local";
1013
- }
1014
- const chain = [primary];
1015
- if (ragConfig.fallback && ragConfig.fallback !== "none" && ragConfig.fallback !== primary) {
1016
- chain.push(ragConfig.fallback);
1017
- }
1018
- return Array.from(new Set(chain));
1019
- }
1020
-
1021
- function logRagConfigOnce() {
1022
- if (ragLogState.config) return;
1023
- ragLogState.config = true;
1024
- logRag(
1025
- `config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
1026
- `cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
1027
- `prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
1028
- `gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
1029
- `gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
1030
- );
1031
- }
1032
-
1033
- const providerCache = new Map();
1034
-
1035
- async function loadSearchProvider(name) {
1036
- if (providerCache.has(name)) return providerCache.get(name);
1037
- let providerPromise;
1038
- if (name === "fuse") {
1039
- providerPromise = Promise.resolve(createFuseProvider());
1040
- } else if (name === "local") {
1041
- providerPromise = (async () => {
1042
- const embedder = await getLocalEmbedder();
1043
- return createVectorProvider({
1044
- name: "local",
1045
- model: ragConfig.localModel,
1046
- embedder,
1047
- batchSize: 1
1048
- });
1049
- })();
1050
- } else if (name === "gemini") {
1051
- providerPromise = (async () => {
1052
- const embedder = await getGeminiEmbedder();
1053
- return createVectorProvider({
1054
- name: "gemini",
1055
- model: ragConfig.geminiModel,
1056
- embedder,
1057
- batchSize: Math.max(1, ragConfig.geminiBatchSize)
1058
- });
1059
- })();
1060
- } else {
1061
- providerPromise = Promise.reject(new Error(`Unknown search provider: ${name}`));
1062
- }
1063
- if (!ragLogState.providerReady.has(name)) {
1064
- ragLogState.providerReady.add(name);
1065
- logRag(`provider ready name=${name}`);
1066
- }
1067
- providerCache.set(name, providerPromise);
1068
- return providerPromise;
1069
- }
1070
-
1071
- async function searchResources({ query, product, edition, platform, type, limit }) {
1072
- const filters = normalizeSearchFilters({ product, edition, platform, type });
1073
- const searchQuery = query ? String(query).trim() : "";
1074
- const maxResults = limit ? Math.min(limit, 50) : undefined;
1075
-
1076
- if (!searchQuery) {
1077
- const results = resourceIndex.filter((entry) => entryMatchesScope(entry, filters));
1078
- return maxResults ? results.slice(0, maxResults) : results;
1079
- }
1080
-
1081
- logRagConfigOnce();
1082
- const providers = resolveProviderChain();
1083
- if (!ragLogState.providerChain) {
1084
- ragLogState.providerChain = true;
1085
- logRag(`provider chain=${providers.join(" -> ")}`);
1086
- }
1087
- let lastError = null;
1088
- for (const name of providers) {
1089
- try {
1090
- const provider = await loadSearchProvider(name);
1091
- const results = await provider.search(searchQuery, filters, maxResults);
1092
- if (!ragLogState.providerFirstUse.has(name)) {
1093
- ragLogState.providerFirstUse.add(name);
1094
- logRag(`provider selected name=${name}`);
1095
- }
1096
- if (name !== providers[0] && !ragLogState.fallbackUse.has(name)) {
1097
- ragLogState.fallbackUse.add(name);
1098
- logRag(`fallback engaged selected=${name} primary=${providers[0]}`);
1099
- }
1100
- return results;
1101
- } catch (error) {
1102
- lastError = error;
1103
- console.error(`[rag] provider "${name}" failed: ${error.message}`);
1104
- }
1105
- }
1106
-
1107
- if (lastError) {
1108
- console.error(`[rag] all providers failed: ${lastError.message}`);
1109
- }
1110
- return [];
1111
- }
1112
-
1113
- async function prewarmRagIndex() {
1114
- if (!ragConfig.prewarm) return;
1115
- logRagConfigOnce();
1116
- const providers = resolveProviderChain();
1117
- const primary = providers[0];
1118
- if (!primary || primary === "fuse") return;
1119
- try {
1120
- logRag(`prewarm start provider=${primary}`);
1121
- const provider = await loadSearchProvider(primary);
1122
- if (provider.warm) {
1123
- await provider.warm();
1124
- }
1125
- logRag(`prewarm done provider=${primary}`);
1126
- } catch (error) {
1127
- console.error(`[rag] prewarm failed: ${error.message}`);
1128
- }
1129
- }
1130
-
1131
- async function getSampleSuggestions({ query, product, edition, platform, limit = 5 }) {
1132
- const normalizedProduct = normalizeProduct(product);
1133
- const normalizedPlatform = normalizePlatform(platform);
1134
- const normalizedEdition = normalizeEdition(edition, normalizedPlatform, normalizedProduct);
1135
- const searchQuery = query ? String(query).trim() : "";
1136
- const maxResults = Math.min(limit || 5, 10);
1137
-
1138
- if (searchQuery) {
1139
- const results = await searchResources({
1140
- query: searchQuery,
1141
- product: normalizedProduct,
1142
- edition: normalizedEdition,
1143
- platform: normalizedPlatform,
1144
- type: "sample",
1145
- limit: maxResults
1146
- });
1147
- if (results.length) return results;
1148
- }
1149
-
1150
- const matchesScope = (entry) => {
1151
- if (normalizedProduct && entry.product !== normalizedProduct) return false;
1152
- if (!editionMatches(normalizedEdition, entry.edition)) return false;
1153
- if (!platformMatches(normalizedPlatform, entry)) return false;
1154
- return entry.type === "sample";
1155
- };
1156
-
1157
- let candidates = resourceIndex.filter(matchesScope);
1158
- if (candidates.length === 0 && normalizedProduct) {
1159
- candidates = resourceIndex.filter((entry) => entry.type === "sample" && entry.product === normalizedProduct);
1160
- }
1161
-
1162
- if (searchQuery && candidates.length > 1) {
1163
- const terms = normalizeText(searchQuery.toLowerCase()).split(/\s+/).filter(Boolean);
1164
- const scoreEntry = (entry) => {
1165
- const tags = Array.isArray(entry.tags) ? entry.tags.map((tag) => String(tag).toLowerCase()) : [];
1166
- const haystack = [
1167
- String(entry.title || "").toLowerCase(),
1168
- String(entry.summary || "").toLowerCase(),
1169
- tags.join(" ")
1170
- ].join(" ");
1171
- let score = 0;
1172
- for (const term of terms) {
1173
- if (!term) continue;
1174
- if (tags.some((tag) => tag === term || tag.includes(term))) score += 3;
1175
- if (haystack.includes(term)) score += 1;
1176
- }
1177
- return score;
1178
- };
1179
- candidates = [...candidates].sort((a, b) => {
1180
- const delta = scoreEntry(b) - scoreEntry(a);
1181
- if (delta !== 0) return delta;
1182
- return String(a.title || "").localeCompare(String(b.title || ""));
1183
- });
1184
- }
1185
-
1186
- const seen = new Set();
1187
- const results = [];
1188
- for (const entry of candidates) {
1189
- if (seen.has(entry.uri)) continue;
1190
- seen.add(entry.uri);
1191
- results.push(entry);
1192
- if (results.length >= maxResults) break;
1193
- }
1194
-
1195
- return results;
1196
- }
1197
-
1198
- export {
1199
- ragConfig,
1200
- searchResources,
1201
- getSampleSuggestions,
1202
- prewarmRagIndex
1203
- };