simple-dynamsoft-mcp 6.2.0 → 6.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/rag.js CHANGED
@@ -1,9 +1,11 @@
1
- import { readFileSync, existsSync, writeFileSync, mkdirSync } from "node:fs";
2
- import { join, dirname } from "node:path";
1
+ import { readFileSync, existsSync, writeFileSync, mkdirSync, readdirSync, copyFileSync, rmSync, statSync } from "node:fs";
2
+ import { join, dirname, basename, resolve } from "node:path";
3
3
  import { createHash } from "node:crypto";
4
4
  import { fileURLToPath } from "node:url";
5
+ import { tmpdir } from "node:os";
5
6
  import "dotenv/config";
6
7
  import Fuse from "fuse.js";
8
+ import * as tar from "tar";
7
9
  import { getResolvedDataRoot } from "./data-root.js";
8
10
  import {
9
11
  resourceIndex,
@@ -16,12 +18,28 @@ import {
16
18
  normalizeEdition,
17
19
  getRagSignatureData
18
20
  } from "./resource-index.js";
21
+ import {
22
+ sleepMs,
23
+ parseRetryAfterMs,
24
+ normalizeGeminiRetryConfig,
25
+ isRateLimitGeminiStatus,
26
+ GeminiHttpError,
27
+ executeWithGeminiRetry
28
+ } from "./gemini-retry.js";
19
29
 
20
30
  const __dirname = dirname(fileURLToPath(import.meta.url));
21
31
  const dataRoot = getResolvedDataRoot();
22
32
 
23
33
  const pkgUrl = new URL("../package.json", import.meta.url);
24
34
  const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
35
+ const legacyPrebuiltIndexUrl =
36
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
37
+ const defaultPrebuiltIndexUrls = {
38
+ local:
39
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-local-${pkg.version}.tar.gz`,
40
+ gemini:
41
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-gemini-${pkg.version}.tar.gz`
42
+ };
25
43
 
26
44
  // ============================================================================
27
45
  // RAG configuration
@@ -75,10 +93,19 @@ const ragConfig = {
75
93
  rebuild: readBoolEnv("RAG_REBUILD", false),
76
94
  prewarm: readBoolEnv("RAG_PREWARM", false),
77
95
  prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
96
+ prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
97
+ prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", ""),
98
+ prebuiltIndexUrlLocal: readEnvValue("RAG_PREBUILT_INDEX_URL_LOCAL", defaultPrebuiltIndexUrls.local),
99
+ prebuiltIndexUrlGemini: readEnvValue("RAG_PREBUILT_INDEX_URL_GEMINI", defaultPrebuiltIndexUrls.gemini),
100
+ prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
78
101
  geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
79
102
  geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
80
103
  geminiBaseUrl: readEnvValue("GEMINI_API_BASE_URL", "https://generativelanguage.googleapis.com"),
81
- geminiBatchSize: readIntEnv("GEMINI_EMBED_BATCH_SIZE", 16)
104
+ geminiBatchSize: readIntEnv("GEMINI_EMBED_BATCH_SIZE", 16),
105
+ geminiRetryMaxAttempts: readIntEnv("GEMINI_RETRY_MAX_ATTEMPTS", 5),
106
+ geminiRetryBaseDelayMs: readIntEnv("GEMINI_RETRY_BASE_DELAY_MS", 500),
107
+ geminiRetryMaxDelayMs: readIntEnv("GEMINI_RETRY_MAX_DELAY_MS", 10000),
108
+ geminiRequestThrottleMs: readIntEnv("GEMINI_REQUEST_THROTTLE_MS", 0)
82
109
  };
83
110
 
84
111
  const ragLogState = {
@@ -90,6 +117,8 @@ const ragLogState = {
90
117
  fallbackUse: new Set()
91
118
  };
92
119
 
120
+ const prebuiltDownloadAttempts = new Map();
121
+
93
122
  function logRag(message) {
94
123
  console.error(`[rag] ${message}`);
95
124
  }
@@ -205,6 +234,10 @@ function buildIndexSignature() {
205
234
  return JSON.stringify({
206
235
  packageVersion: pkg.version,
207
236
  resourceCount: signatureData.resourceCount,
237
+ dcvCoreDocCount: signatureData.dcvCoreDocCount,
238
+ dcvWebDocCount: signatureData.dcvWebDocCount,
239
+ dcvMobileDocCount: signatureData.dcvMobileDocCount,
240
+ dcvServerDocCount: signatureData.dcvServerDocCount,
208
241
  dbrWebDocCount: signatureData.dbrWebDocCount,
209
242
  dbrMobileDocCount: signatureData.dbrMobileDocCount,
210
243
  dbrServerDocCount: signatureData.dbrServerDocCount,
@@ -230,29 +263,294 @@ function makeCacheFileName(provider, model, cacheKey) {
230
263
  return `rag-${provider}-${safeModel}-${cacheKey.slice(0, 12)}.json`;
231
264
  }
232
265
 
233
- function loadVectorIndexCache(cacheFile, expectedKey) {
266
+ function makeCheckpointFileName(provider, model, cacheKey) {
267
+ const safeModel = String(model || "default").replace(/[^a-zA-Z0-9._-]+/g, "_").slice(0, 32);
268
+ return `rag-${provider}-${safeModel}-${cacheKey.slice(0, 12)}.checkpoint.json`;
269
+ }
270
+
271
+ function loadVectorIndexCache(
272
+ cacheFile,
273
+ { cacheKey, signature, provider, model, requireSignature = false } = {}
274
+ ) {
234
275
  if (!existsSync(cacheFile)) {
235
276
  return { hit: false, reason: "missing", payload: null };
236
277
  }
237
278
  try {
238
279
  const parsed = JSON.parse(readFileSync(cacheFile, "utf8"));
239
- if (!parsed || parsed.cacheKey !== expectedKey) {
280
+ if (!parsed || (cacheKey && parsed.cacheKey !== cacheKey)) {
240
281
  return { hit: false, reason: "cache_key_mismatch", payload: null };
241
282
  }
242
283
  if (!Array.isArray(parsed.items) || !Array.isArray(parsed.vectors)) {
243
284
  return { hit: false, reason: "invalid_payload", payload: null };
244
285
  }
286
+ const meta = parsed.meta || {};
287
+ if (provider && meta.provider && meta.provider !== provider) {
288
+ return { hit: false, reason: "provider_mismatch", payload: null };
289
+ }
290
+ if (model && meta.model && meta.model !== model) {
291
+ return { hit: false, reason: "model_mismatch", payload: null };
292
+ }
293
+ if (signature) {
294
+ if (!meta.signature) {
295
+ if (requireSignature) {
296
+ return { hit: false, reason: "missing_signature", payload: null };
297
+ }
298
+ } else if (meta.signature !== signature) {
299
+ return { hit: false, reason: "signature_mismatch", payload: null };
300
+ }
301
+ }
245
302
  return { hit: true, reason: "ok", payload: parsed };
246
303
  } catch {
247
304
  return { hit: false, reason: "parse_error", payload: null };
248
305
  }
249
306
  }
250
307
 
308
+ function listFilesRecursive(rootDir) {
309
+ const files = [];
310
+ const stack = [rootDir];
311
+ while (stack.length > 0) {
312
+ const current = stack.pop();
313
+ const entries = readdirSync(current, { withFileTypes: true });
314
+ for (const entry of entries) {
315
+ const fullPath = join(current, entry.name);
316
+ if (entry.isDirectory()) {
317
+ stack.push(fullPath);
318
+ } else if (entry.isFile()) {
319
+ files.push(fullPath);
320
+ }
321
+ }
322
+ }
323
+ return files;
324
+ }
325
+
326
+ function readSignaturePackageVersion(signatureRaw) {
327
+ if (!signatureRaw) return "";
328
+ try {
329
+ const parsed = JSON.parse(signatureRaw);
330
+ return String(parsed?.packageVersion || "");
331
+ } catch {
332
+ return "";
333
+ }
334
+ }
335
+
336
+ function listDownloadedCacheCandidatesByProvider(extractRoot, expectedCacheFileName, cacheKey, provider) {
337
+ const allFiles = listFilesRecursive(extractRoot).filter((path) => path.toLowerCase().endsWith(".json")).sort();
338
+ const expectedPath = allFiles.find((path) => basename(path) === expectedCacheFileName);
339
+
340
+ const cachePrefix = cacheKey.slice(0, 12);
341
+ const prefixPath = allFiles.find((path) => {
342
+ const name = basename(path);
343
+ return name.startsWith(`rag-${provider}-`) && name.endsWith(`-${cachePrefix}.json`);
344
+ });
345
+
346
+ const providerFiles = allFiles.filter((path) => basename(path).startsWith(`rag-${provider}-`));
347
+ const unique = [];
348
+ for (const path of [expectedPath, prefixPath, ...providerFiles]) {
349
+ if (!path) continue;
350
+ if (!unique.includes(path)) unique.push(path);
351
+ }
352
+ return unique;
353
+ }
354
+
355
+ function resolvePrebuiltIndexUrlCandidates(provider) {
356
+ const override = String(ragConfig.prebuiltIndexUrl || "").trim();
357
+ if (override) return [override];
358
+
359
+ const candidates = [];
360
+ if (provider === "local") {
361
+ candidates.push(String(ragConfig.prebuiltIndexUrlLocal || "").trim());
362
+ } else if (provider === "gemini") {
363
+ candidates.push(String(ragConfig.prebuiltIndexUrlGemini || "").trim());
364
+ }
365
+ candidates.push(legacyPrebuiltIndexUrl);
366
+
367
+ const deduped = [];
368
+ for (const candidate of candidates) {
369
+ if (!candidate) continue;
370
+ if (!deduped.includes(candidate)) deduped.push(candidate);
371
+ }
372
+ return deduped;
373
+ }
374
+
375
+ async function downloadPrebuiltArchive(url, outputPath, timeoutMs) {
376
+ const source = String(url || "").trim();
377
+ if (!source) {
378
+ throw new Error("prebuilt URL is empty");
379
+ }
380
+
381
+ if (source.startsWith("file://")) {
382
+ copyFileSync(fileURLToPath(source), outputPath);
383
+ return { sourceType: "file", size: statSync(outputPath).size };
384
+ }
385
+
386
+ if (!/^https?:\/\//i.test(source)) {
387
+ copyFileSync(resolve(source), outputPath);
388
+ return { sourceType: "file", size: statSync(outputPath).size };
389
+ }
390
+
391
+ const controller = new AbortController();
392
+ const timer = setTimeout(() => controller.abort(), Math.max(1000, timeoutMs));
393
+ try {
394
+ const response = await fetch(source, { signal: controller.signal });
395
+ if (!response.ok) {
396
+ throw new Error(`HTTP ${response.status}`);
397
+ }
398
+ const arrayBuffer = await response.arrayBuffer();
399
+ writeFileSync(outputPath, Buffer.from(arrayBuffer));
400
+ return { sourceType: "http", size: arrayBuffer.byteLength };
401
+ } finally {
402
+ clearTimeout(timer);
403
+ }
404
+ }
405
+
406
+ async function maybeDownloadPrebuiltVectorIndex({ provider, model, cacheKey, signature, cacheFile }) {
407
+ if (!["local", "gemini"].includes(provider)) {
408
+ return { downloaded: false, reason: "provider_not_supported" };
409
+ }
410
+ if (!ragConfig.prebuiltIndexAutoDownload) {
411
+ return { downloaded: false, reason: "auto_download_disabled" };
412
+ }
413
+
414
+ const sourceUrls = resolvePrebuiltIndexUrlCandidates(provider);
415
+ if (sourceUrls.length === 0) {
416
+ return { downloaded: false, reason: "url_not_set" };
417
+ }
418
+
419
+ const attemptKey = `${provider}:${cacheKey}:${sourceUrls.join("|")}`;
420
+ if (prebuiltDownloadAttempts.has(attemptKey)) {
421
+ return prebuiltDownloadAttempts.get(attemptKey);
422
+ }
423
+
424
+ const expectedCacheFileName = makeCacheFileName(provider, model, cacheKey);
425
+ const attempt = (async () => {
426
+ let lastReason = "not_attempted";
427
+ for (const sourceUrl of sourceUrls) {
428
+ const tempRoot = join(
429
+ tmpdir(),
430
+ `simple-dynamsoft-mcp-rag-prebuilt-${Date.now()}-${Math.random().toString(16).slice(2)}`
431
+ );
432
+ const archivePath = join(tempRoot, "prebuilt-rag-index.tar.gz");
433
+ const extractRoot = join(tempRoot, "extract");
434
+
435
+ ensureDirectory(extractRoot);
436
+ try {
437
+ logRag(
438
+ `prebuilt index download start provider=${provider} url=${sourceUrl} timeout_ms=${ragConfig.prebuiltIndexTimeoutMs}`
439
+ );
440
+ const downloaded = await downloadPrebuiltArchive(sourceUrl, archivePath, ragConfig.prebuiltIndexTimeoutMs);
441
+ logRag(
442
+ `prebuilt index downloaded provider=${provider} source=${downloaded.sourceType} size=${downloaded.size}B url=${sourceUrl}`
443
+ );
444
+
445
+ await tar.x({
446
+ file: archivePath,
447
+ cwd: extractRoot,
448
+ strict: true
449
+ });
450
+
451
+ const candidateFiles = listDownloadedCacheCandidatesByProvider(
452
+ extractRoot,
453
+ expectedCacheFileName,
454
+ cacheKey,
455
+ provider
456
+ );
457
+ if (candidateFiles.length === 0) {
458
+ throw new Error(`cache_file_not_found expected=${expectedCacheFileName}`);
459
+ }
460
+
461
+ for (const sourceCacheFile of candidateFiles) {
462
+ const candidateCache = loadVectorIndexCache(sourceCacheFile, {
463
+ provider,
464
+ model
465
+ });
466
+ if (!candidateCache.hit) {
467
+ continue;
468
+ }
469
+
470
+ const cachePackageVersion = readSignaturePackageVersion(candidateCache.payload?.meta?.signature);
471
+ if (!cachePackageVersion || cachePackageVersion !== pkg.version) {
472
+ continue;
473
+ }
474
+
475
+ const migratedPayload = {
476
+ ...candidateCache.payload,
477
+ cacheKey,
478
+ meta: {
479
+ ...(candidateCache.payload.meta || {}),
480
+ provider,
481
+ model,
482
+ signature
483
+ }
484
+ };
485
+ saveVectorIndexCache(cacheFile, migratedPayload);
486
+ logRag(
487
+ `prebuilt index installed provider=${provider} cache_file=${cacheFile} source=${basename(sourceCacheFile)} mode=version_only_compat version=${cachePackageVersion}`
488
+ );
489
+ return { downloaded: true, reason: "installed_version_only_compat" };
490
+ }
491
+
492
+ throw new Error(
493
+ `no_compatible_cache expected=${expectedCacheFileName} found=${candidateFiles.map((path) => basename(path)).join(",")}`
494
+ );
495
+ } catch (error) {
496
+ lastReason = `${sourceUrl} => ${error.message}`;
497
+ logRag(`prebuilt index unavailable provider=${provider} url=${sourceUrl} reason=${error.message}`);
498
+ } finally {
499
+ rmSync(tempRoot, { recursive: true, force: true });
500
+ }
501
+ }
502
+ return { downloaded: false, reason: lastReason };
503
+ })();
504
+
505
+ prebuiltDownloadAttempts.set(attemptKey, attempt);
506
+ return attempt;
507
+ }
508
+
251
509
  function saveVectorIndexCache(cacheFile, payload) {
252
510
  ensureDirectory(ragConfig.cacheDir);
253
511
  writeFileSync(cacheFile, JSON.stringify(payload));
254
512
  }
255
513
 
514
+ function loadVectorIndexCheckpoint(checkpointFile, expectedKey, expectedItems) {
515
+ if (!existsSync(checkpointFile)) {
516
+ return { hit: false, reason: "missing", payload: null };
517
+ }
518
+ try {
519
+ const parsed = JSON.parse(readFileSync(checkpointFile, "utf8"));
520
+ if (!parsed || parsed.cacheKey !== expectedKey) {
521
+ return { hit: false, reason: "cache_key_mismatch", payload: null };
522
+ }
523
+ if (!Array.isArray(parsed.items) || !Array.isArray(parsed.vectors)) {
524
+ return { hit: false, reason: "invalid_payload", payload: null };
525
+ }
526
+ if (parsed.items.length !== expectedItems.length) {
527
+ return { hit: false, reason: "items_length_mismatch", payload: null };
528
+ }
529
+ for (let i = 0; i < expectedItems.length; i += 1) {
530
+ if (parsed.items[i]?.id !== expectedItems[i]?.id || parsed.items[i]?.uri !== expectedItems[i]?.uri) {
531
+ return { hit: false, reason: "items_mismatch", payload: null };
532
+ }
533
+ }
534
+ if (parsed.vectors.length > expectedItems.length) {
535
+ return { hit: false, reason: "vectors_overflow", payload: null };
536
+ }
537
+ return { hit: true, reason: "ok", payload: parsed };
538
+ } catch {
539
+ return { hit: false, reason: "parse_error", payload: null };
540
+ }
541
+ }
542
+
543
+ function saveVectorIndexCheckpoint(checkpointFile, payload) {
544
+ ensureDirectory(ragConfig.cacheDir);
545
+ writeFileSync(checkpointFile, JSON.stringify(payload));
546
+ }
547
+
548
+ function clearVectorIndexCheckpoint(checkpointFile) {
549
+ if (existsSync(checkpointFile)) {
550
+ rmSync(checkpointFile, { force: true });
551
+ }
552
+ }
553
+
256
554
  function normalizeVector(vector) {
257
555
  let sum = 0;
258
556
  for (const value of vector) {
@@ -272,25 +570,111 @@ function dotProduct(a, b) {
272
570
  return sum;
273
571
  }
274
572
 
275
- async function embedTexts(texts, embedder, batchSize = 1) {
573
+ function isRateLimitError(error) {
574
+ if (error?.rateLimited) return true;
575
+ const status = Number(error?.status);
576
+ return isRateLimitGeminiStatus(status);
577
+ }
578
+
579
+ async function embedTextsWithProgress(
580
+ texts,
581
+ embedder,
582
+ batchSize = 1,
583
+ {
584
+ offset = 0,
585
+ total = texts.length,
586
+ onChunk = null,
587
+ providerName = ""
588
+ } = {}
589
+ ) {
276
590
  const results = [];
277
- if (embedder.embedBatch && batchSize > 1) {
278
- try {
279
- for (let i = 0; i < texts.length; i += batchSize) {
280
- const batch = texts.slice(i, i + batchSize);
591
+ const normalizedBatchSize = Math.max(1, batchSize);
592
+ let completed = offset;
593
+ let currentBatchSize = normalizedBatchSize;
594
+ let rateLimitFailures = 0;
595
+ let batchDowngrades = 0;
596
+ let singleFallbackBatches = 0;
597
+
598
+ const reportChunk = async (vectors, mode, sourceBatchSize) => {
599
+ if (!Array.isArray(vectors) || vectors.length === 0) return;
600
+ completed += vectors.length;
601
+ if (onChunk) {
602
+ await onChunk({
603
+ vectors,
604
+ mode,
605
+ sourceBatchSize,
606
+ completed,
607
+ total
608
+ });
609
+ }
610
+ };
611
+
612
+ if (embedder.embedBatch && normalizedBatchSize > 1) {
613
+ let index = 0;
614
+ while (index < texts.length) {
615
+ const batch = texts.slice(index, index + currentBatchSize);
616
+ try {
281
617
  const vectors = await embedder.embedBatch(batch);
618
+ if (!Array.isArray(vectors) || vectors.length !== batch.length) {
619
+ throw new Error(`Gemini batch response size mismatch expected=${batch.length} actual=${vectors?.length || 0}`);
620
+ }
282
621
  results.push(...vectors);
622
+ index += batch.length;
623
+ rateLimitFailures = 0;
624
+ await reportChunk(vectors, "batch", batch.length);
625
+ } catch (error) {
626
+ if (isRateLimitError(error)) {
627
+ rateLimitFailures += 1;
628
+ const nextBatchSize = Math.max(1, Math.floor(currentBatchSize / 2));
629
+ if (nextBatchSize < currentBatchSize) {
630
+ batchDowngrades += 1;
631
+ logRag(
632
+ `gemini batch downgrade provider=${providerName || "unknown"} from=${currentBatchSize} to=${nextBatchSize} ` +
633
+ `rate_limit_failures=${rateLimitFailures}`
634
+ );
635
+ currentBatchSize = nextBatchSize;
636
+ continue;
637
+ }
638
+ }
639
+
640
+ singleFallbackBatches += 1;
641
+ logRag(
642
+ `batch embedding fallback provider=${providerName || "unknown"} batch_size=${batch.length} reason=${error.message}`
643
+ );
644
+ for (const text of batch) {
645
+ const vector = await embedder.embed(text);
646
+ results.push(vector);
647
+ await reportChunk([vector], "single_fallback", 1);
648
+ }
649
+ index += batch.length;
650
+ rateLimitFailures = 0;
283
651
  }
284
- return results;
285
- } catch (error) {
286
- console.error(`[rag] batch embedding failed, falling back to single requests: ${error.message}`);
287
- results.length = 0;
288
652
  }
653
+
654
+ return {
655
+ vectors: results,
656
+ stats: {
657
+ batchDowngrades,
658
+ singleFallbackBatches,
659
+ finalBatchSize: currentBatchSize
660
+ }
661
+ };
289
662
  }
663
+
290
664
  for (const text of texts) {
291
- results.push(await embedder.embed(text));
665
+ const vector = await embedder.embed(text);
666
+ results.push(vector);
667
+ await reportChunk([vector], "single", 1);
292
668
  }
293
- return results;
669
+
670
+ return {
671
+ vectors: results,
672
+ stats: {
673
+ batchDowngrades,
674
+ singleFallbackBatches,
675
+ finalBatchSize: 1
676
+ }
677
+ };
294
678
  }
295
679
 
296
680
  let localEmbedderPromise = null;
@@ -326,59 +710,119 @@ async function getGeminiEmbedder() {
326
710
  throw new Error("GEMINI_API_KEY is required for gemini embeddings.");
327
711
  }
328
712
  if (geminiEmbedderPromise) return geminiEmbedderPromise;
329
- geminiEmbedderPromise = Promise.resolve({
330
- embed: async (text) => {
331
- const response = await fetch(
332
- `${ragConfig.geminiBaseUrl}/v1beta/${ragConfig.geminiModel}:embedContent?key=${ragConfig.geminiApiKey}`,
333
- {
334
- method: "POST",
335
- headers: { "Content-Type": "application/json" },
336
- body: JSON.stringify({
713
+ const retryConfig = normalizeGeminiRetryConfig({
714
+ maxAttempts: ragConfig.geminiRetryMaxAttempts,
715
+ baseDelayMs: ragConfig.geminiRetryBaseDelayMs,
716
+ maxDelayMs: ragConfig.geminiRetryMaxDelayMs,
717
+ requestThrottleMs: ragConfig.geminiRequestThrottleMs
718
+ });
719
+
720
+ geminiEmbedderPromise = Promise.resolve((() => {
721
+ const metrics = {
722
+ requests: 0,
723
+ retries: 0,
724
+ retryDelayMs: 0,
725
+ throttleEvents: 0,
726
+ throttleDelayMs: 0,
727
+ rateLimitRetries: 0
728
+ };
729
+
730
+ let nextAllowedAt = 0;
731
+
732
+ const throttleRequest = async (operation) => {
733
+ if (retryConfig.requestThrottleMs <= 0) return;
734
+ const now = Date.now();
735
+ const waitMs = Math.max(0, nextAllowedAt - now);
736
+ if (waitMs > 0) {
737
+ metrics.throttleEvents += 1;
738
+ metrics.throttleDelayMs += waitMs;
739
+ logRag(`gemini throttle op=${operation} wait_ms=${waitMs}`);
740
+ await sleepMs(waitMs);
741
+ }
742
+ nextAllowedAt = Date.now() + retryConfig.requestThrottleMs;
743
+ };
744
+
745
+ const requestJson = async (operation, endpoint, body) => executeWithGeminiRetry({
746
+ operation,
747
+ retryConfig,
748
+ logger: (message) => logRag(message),
749
+ onRetry: ({ delayMs, rateLimited }) => {
750
+ metrics.retries += 1;
751
+ metrics.retryDelayMs += delayMs;
752
+ if (rateLimited) {
753
+ metrics.rateLimitRetries += 1;
754
+ }
755
+ },
756
+ requestFn: async () => {
757
+ await throttleRequest(operation);
758
+ metrics.requests += 1;
759
+ const response = await fetch(
760
+ `${ragConfig.geminiBaseUrl}/v1beta/${endpoint}?key=${ragConfig.geminiApiKey}`,
761
+ {
762
+ method: "POST",
763
+ headers: { "Content-Type": "application/json" },
764
+ body: JSON.stringify(body)
765
+ }
766
+ );
767
+ if (!response.ok) {
768
+ const detail = await response.text();
769
+ throw new GeminiHttpError(`Gemini ${operation} failed (${response.status}): ${detail}`, {
770
+ status: response.status,
771
+ detail,
772
+ retryAfterMs: parseRetryAfterMs(response.headers.get("retry-after"))
773
+ });
774
+ }
775
+ return response.json();
776
+ }
777
+ });
778
+
779
+ return {
780
+ embed: async (text) => {
781
+ const payload = await requestJson(
782
+ "embedContent",
783
+ `${ragConfig.geminiModel}:embedContent`,
784
+ {
337
785
  content: {
338
786
  parts: [{ text }]
339
787
  }
340
- })
788
+ }
789
+ );
790
+ const embedding = payload.embedding?.values || payload.embedding || payload.embeddings?.[0]?.values;
791
+ if (!embedding) {
792
+ throw new Error("Gemini embedding response missing embedding values.");
341
793
  }
342
- );
343
- if (!response.ok) {
344
- const detail = await response.text();
345
- throw new Error(`Gemini embedContent failed (${response.status}): ${detail}`);
346
- }
347
- const payload = await response.json();
348
- const embedding = payload.embedding?.values || payload.embedding || payload.embeddings?.[0]?.values;
349
- if (!embedding) {
350
- throw new Error("Gemini embedding response missing embedding values.");
351
- }
352
- return embedding;
353
- },
354
- embedBatch: async (texts) => {
355
- const response = await fetch(
356
- `${ragConfig.geminiBaseUrl}/v1beta/${ragConfig.geminiModel}:batchEmbedContents?key=${ragConfig.geminiApiKey}`,
357
- {
358
- method: "POST",
359
- headers: { "Content-Type": "application/json" },
360
- body: JSON.stringify({
794
+ return embedding;
795
+ },
796
+ embedBatch: async (texts) => {
797
+ const payload = await requestJson(
798
+ "batchEmbedContents",
799
+ `${ragConfig.geminiModel}:batchEmbedContents`,
800
+ {
361
801
  requests: texts.map((text) => ({
362
802
  model: ragConfig.geminiModel,
363
803
  content: {
364
804
  parts: [{ text }]
365
805
  }
366
806
  }))
367
- })
807
+ }
808
+ );
809
+ const embeddings = payload.embeddings || payload.responses;
810
+ if (!Array.isArray(embeddings)) {
811
+ throw new Error("Gemini batch response missing embeddings.");
368
812
  }
369
- );
370
- if (!response.ok) {
371
- const detail = await response.text();
372
- throw new Error(`Gemini batchEmbedContents failed (${response.status}): ${detail}`);
373
- }
374
- const payload = await response.json();
375
- const embeddings = payload.embeddings || payload.responses;
376
- if (!Array.isArray(embeddings)) {
377
- throw new Error("Gemini batch response missing embeddings.");
813
+ return embeddings.map((item) => item.values || item.embedding?.values || item.embedding);
814
+ },
815
+ getMetrics: () => ({ ...metrics }),
816
+ resetMetrics: () => {
817
+ metrics.requests = 0;
818
+ metrics.retries = 0;
819
+ metrics.retryDelayMs = 0;
820
+ metrics.throttleEvents = 0;
821
+ metrics.throttleDelayMs = 0;
822
+ metrics.rateLimitRetries = 0;
378
823
  }
379
- return embeddings.map((item) => item.values || item.embedding?.values || item.embedding);
380
- }
381
- });
824
+ };
825
+ })());
382
826
  return geminiEmbedderPromise;
383
827
  }
384
828
 
@@ -391,6 +835,13 @@ async function createVectorProvider({ name, model, embedder, batchSize }) {
391
835
  };
392
836
  const cacheKey = createHash("sha256").update(JSON.stringify(cacheMeta)).digest("hex");
393
837
  const cacheFile = join(ragConfig.cacheDir, makeCacheFileName(name, model, cacheKey));
838
+ const checkpointFile = join(ragConfig.cacheDir, makeCheckpointFileName(name, model, cacheKey));
839
+ const expectedCacheState = {
840
+ cacheKey,
841
+ signature,
842
+ provider: name,
843
+ model
844
+ };
394
845
  logRag(
395
846
  `provider=${name} cache_file=${cacheFile} rebuild=${ragConfig.rebuild} cache_key=${cacheKey.slice(0, 12)}`
396
847
  );
@@ -400,7 +851,7 @@ async function createVectorProvider({ name, model, embedder, batchSize }) {
400
851
  if (indexPromise) return indexPromise;
401
852
  indexPromise = (async () => {
402
853
  if (!ragConfig.rebuild) {
403
- const cacheState = loadVectorIndexCache(cacheFile, cacheKey);
854
+ let cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
404
855
  if (cacheState.hit) {
405
856
  const cached = cacheState.payload;
406
857
  logRag(
@@ -412,23 +863,128 @@ async function createVectorProvider({ name, model, embedder, batchSize }) {
412
863
  };
413
864
  }
414
865
  logRag(`cache miss provider=${name} file=${cacheFile} reason=${cacheState.reason}`);
866
+
867
+ const downloadResult = await maybeDownloadPrebuiltVectorIndex({
868
+ provider: name,
869
+ model,
870
+ cacheKey,
871
+ signature,
872
+ cacheFile
873
+ });
874
+ if (downloadResult.downloaded) {
875
+ cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
876
+ if (cacheState.hit) {
877
+ const cached = cacheState.payload;
878
+ logRag(
879
+ `cache hit provider=${name} file=${cacheFile} source=prebuilt_download items=${cached.items.length} vectors=${cached.vectors.length}`
880
+ );
881
+ return {
882
+ items: cached.items,
883
+ vectors: cached.vectors
884
+ };
885
+ }
886
+ logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
887
+ }
415
888
  } else {
416
889
  logRag(`cache bypass provider=${name} file=${cacheFile} reason=rebuild_true`);
890
+ clearVectorIndexCheckpoint(checkpointFile);
417
891
  }
418
892
 
419
893
  const items = buildEmbeddingItems();
420
894
  const texts = items.map((item) => item.text);
421
- logRag(`building index provider=${name} embed_items=${texts.length} batch_size=${batchSize}`);
422
- const vectors = await embedTexts(texts, embedder, batchSize);
423
- const normalized = vectors.map(normalizeVector);
895
+ const indexedItems = items.map((item) => ({ id: item.id, uri: item.uri }));
896
+ let normalized = [];
897
+ let resumeFrom = 0;
898
+ if (!ragConfig.rebuild) {
899
+ const checkpointState = loadVectorIndexCheckpoint(checkpointFile, cacheKey, indexedItems);
900
+ if (checkpointState.hit) {
901
+ normalized = checkpointState.payload.vectors;
902
+ resumeFrom = normalized.length;
903
+ logRag(
904
+ `checkpoint resume provider=${name} file=${checkpointFile} completed=${resumeFrom}/${texts.length}`
905
+ );
906
+ } else if (checkpointState.reason !== "missing") {
907
+ logRag(`checkpoint ignored provider=${name} file=${checkpointFile} reason=${checkpointState.reason}`);
908
+ }
909
+ }
910
+
911
+ if (name === "gemini" && embedder.resetMetrics) {
912
+ embedder.resetMetrics();
913
+ }
914
+
915
+ const checkpointIntervalMs = 5000;
916
+ let lastCheckpointAt = 0;
917
+ const persistCheckpoint = (force = false) => {
918
+ const now = Date.now();
919
+ if (!force && now - lastCheckpointAt < checkpointIntervalMs) return;
920
+ const payload = {
921
+ cacheKey,
922
+ meta: cacheMeta,
923
+ items: indexedItems,
924
+ vectors: normalized,
925
+ completed: normalized.length,
926
+ total: texts.length,
927
+ updatedAt: new Date().toISOString()
928
+ };
929
+ saveVectorIndexCheckpoint(checkpointFile, payload);
930
+ lastCheckpointAt = now;
931
+ };
932
+
933
+ if (resumeFrom < texts.length) {
934
+ logRag(
935
+ `building index provider=${name} embed_items=${texts.length} remaining=${texts.length - resumeFrom} batch_size=${batchSize}`
936
+ );
937
+ try {
938
+ const embeddingResult = await embedTextsWithProgress(
939
+ texts.slice(resumeFrom),
940
+ embedder,
941
+ batchSize,
942
+ {
943
+ offset: resumeFrom,
944
+ total: texts.length,
945
+ providerName: name,
946
+ onChunk: ({ vectors, completed, total }) => {
947
+ normalized.push(...vectors.map(normalizeVector));
948
+ persistCheckpoint(completed >= total);
949
+ }
950
+ }
951
+ );
952
+
953
+ if (name === "gemini") {
954
+ const metrics = embedder.getMetrics ? embedder.getMetrics() : {};
955
+ logRag(
956
+ `gemini build metrics provider=${name} requests=${metrics.requests || 0} retries=${metrics.retries || 0} ` +
957
+ `retry_delay_ms=${metrics.retryDelayMs || 0} throttle_events=${metrics.throttleEvents || 0} ` +
958
+ `throttle_delay_ms=${metrics.throttleDelayMs || 0} rate_limit_retries=${metrics.rateLimitRetries || 0} ` +
959
+ `batch_downgrades=${embeddingResult.stats.batchDowngrades} single_fallback_batches=${embeddingResult.stats.singleFallbackBatches} ` +
960
+ `final_batch_size=${embeddingResult.stats.finalBatchSize}`
961
+ );
962
+ }
963
+ } catch (error) {
964
+ persistCheckpoint(true);
965
+ if (name === "gemini") {
966
+ const metrics = embedder.getMetrics ? embedder.getMetrics() : {};
967
+ logRag(
968
+ `gemini build failed provider=${name} requests=${metrics.requests || 0} retries=${metrics.retries || 0} ` +
969
+ `retry_delay_ms=${metrics.retryDelayMs || 0} throttle_events=${metrics.throttleEvents || 0} ` +
970
+ `throttle_delay_ms=${metrics.throttleDelayMs || 0} rate_limit_retries=${metrics.rateLimitRetries || 0} ` +
971
+ `checkpoint_completed=${normalized.length}/${texts.length} error=${error.message}`
972
+ );
973
+ }
974
+ throw error;
975
+ }
976
+ } else {
977
+ logRag(`checkpoint already complete provider=${name} completed=${resumeFrom}/${texts.length}`);
978
+ }
424
979
 
425
980
  const payload = {
426
981
  cacheKey,
427
982
  meta: cacheMeta,
428
- items: items.map((item) => ({ id: item.id, uri: item.uri })),
983
+ items: indexedItems,
429
984
  vectors: normalized
430
985
  };
431
986
  saveVectorIndexCache(cacheFile, payload);
987
+ clearVectorIndexCheckpoint(checkpointFile);
432
988
  logRag(`cache saved provider=${name} file=${cacheFile} items=${payload.items.length} vectors=${payload.vectors.length}`);
433
989
  return {
434
990
  items: payload.items,
@@ -506,7 +1062,13 @@ function logRagConfigOnce() {
506
1062
  if (ragLogState.config) return;
507
1063
  ragLogState.config = true;
508
1064
  logRag(
509
- `config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} cache_dir=${ragConfig.cacheDir}`
1065
+ `config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
1066
+ `cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
1067
+ `prebuilt_url_override=${ragConfig.prebuiltIndexUrl ? "set" : "empty"} prebuilt_url_local=${ragConfig.prebuiltIndexUrlLocal ? "set" : "empty"} ` +
1068
+ `prebuilt_url_gemini=${ragConfig.prebuiltIndexUrlGemini ? "set" : "empty"} ` +
1069
+ `prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
1070
+ `gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
1071
+ `gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
510
1072
  );
511
1073
  }
512
1074
 
@@ -639,6 +1201,30 @@ async function getSampleSuggestions({ query, product, edition, platform, limit =
639
1201
  candidates = resourceIndex.filter((entry) => entry.type === "sample" && entry.product === normalizedProduct);
640
1202
  }
641
1203
 
1204
+ if (searchQuery && candidates.length > 1) {
1205
+ const terms = normalizeText(searchQuery.toLowerCase()).split(/\s+/).filter(Boolean);
1206
+ const scoreEntry = (entry) => {
1207
+ const tags = Array.isArray(entry.tags) ? entry.tags.map((tag) => String(tag).toLowerCase()) : [];
1208
+ const haystack = [
1209
+ String(entry.title || "").toLowerCase(),
1210
+ String(entry.summary || "").toLowerCase(),
1211
+ tags.join(" ")
1212
+ ].join(" ");
1213
+ let score = 0;
1214
+ for (const term of terms) {
1215
+ if (!term) continue;
1216
+ if (tags.some((tag) => tag === term || tag.includes(term))) score += 3;
1217
+ if (haystack.includes(term)) score += 1;
1218
+ }
1219
+ return score;
1220
+ };
1221
+ candidates = [...candidates].sort((a, b) => {
1222
+ const delta = scoreEntry(b) - scoreEntry(a);
1223
+ if (delta !== 0) return delta;
1224
+ return String(a.title || "").localeCompare(String(b.title || ""));
1225
+ });
1226
+ }
1227
+
642
1228
  const seen = new Set();
643
1229
  const results = [];
644
1230
  for (const entry of candidates) {