simple-dynamsoft-mcp 6.3.0 → 6.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -60,14 +60,18 @@ RAG_FALLBACK=fuse
60
60
  # * RAG_REBUILD: true to ignore cache and rebuild on startup/search
61
61
  # * RAG_PREWARM: true to build the embedding index at startup
62
62
  # * RAG_PREWARM_BLOCK: true to block startup until prewarm completes
63
- # * RAG_PREBUILT_INDEX_AUTO_DOWNLOAD: auto-download prebuilt local index when local embeddings are selected
64
- # * RAG_PREBUILT_INDEX_URL: override prebuilt index archive URL (default GitHub release asset for current package version)
63
+ # * RAG_PREBUILT_INDEX_AUTO_DOWNLOAD: auto-download prebuilt index when local or gemini embeddings are selected
64
+ # * RAG_PREBUILT_INDEX_URL: global override URL for prebuilt index archive (applies to both local and gemini providers)
65
+ # * RAG_PREBUILT_INDEX_URL_LOCAL: provider-specific URL override for local prebuilt index archive
66
+ # * RAG_PREBUILT_INDEX_URL_GEMINI: provider-specific URL override for gemini prebuilt index archive
65
67
  # * RAG_PREBUILT_INDEX_TIMEOUT_MS: timeout for prebuilt index download request
66
68
  # RAG_REBUILD=false
67
69
  # RAG_PREWARM=false
68
70
  # RAG_PREWARM_BLOCK=false
69
71
  # RAG_PREBUILT_INDEX_AUTO_DOWNLOAD=true
70
72
  # RAG_PREBUILT_INDEX_URL=
73
+ # RAG_PREBUILT_INDEX_URL_LOCAL=
74
+ # RAG_PREBUILT_INDEX_URL_GEMINI=
71
75
  # RAG_PREBUILT_INDEX_TIMEOUT_MS=180000
72
76
 
73
77
  # Optional data submodule sync on server startup
package/README.md CHANGED
@@ -185,8 +185,10 @@ Example:
185
185
  Commonly used settings:
186
186
  - `RAG_PROVIDER`: `auto` | `gemini` | `local` | `fuse`
187
187
  - `RAG_FALLBACK`: `fuse` | `local` | `none`
188
- - `RAG_PREBUILT_INDEX_AUTO_DOWNLOAD`: `true` by default; auto-fetch prebuilt local index when local embeddings are selected
189
- - `RAG_PREBUILT_INDEX_URL`: override release asset URL for prebuilt index archive
188
+ - `RAG_PREBUILT_INDEX_AUTO_DOWNLOAD`: `true` by default; auto-fetch prebuilt index when local or gemini embeddings are selected
189
+ - `RAG_PREBUILT_INDEX_URL`: global override release asset URL for prebuilt index archive
190
+ - `RAG_PREBUILT_INDEX_URL_LOCAL`: override release asset URL for local prebuilt index archive
191
+ - `RAG_PREBUILT_INDEX_URL_GEMINI`: override release asset URL for gemini prebuilt index archive
190
192
  - `RAG_PREBUILT_INDEX_TIMEOUT_MS`: download timeout for prebuilt index fetch
191
193
  - `MCP_DATA_DIR`: use a preloaded local data folder (`metadata/`, `samples/`, `documentation/`)
192
194
  - `MCP_DATA_AUTO_DOWNLOAD`: allow startup archive download when bundled data is unavailable
@@ -199,17 +201,19 @@ For the complete list and defaults, see `.env.example` and the sections `Submodu
199
201
 
200
202
  ## Use Release Assets In A Local Project
201
203
 
202
- Use this when you want to run from a built `.tgz` package and reuse a prebuilt local RAG index.
204
+ Use this when you want to run from a built `.tgz` package and reuse prebuilt RAG indexes.
203
205
 
204
206
  1. Download release assets from GitHub Releases for the same version:
205
207
  - `simple-dynamsoft-mcp-<version>.tgz`
206
- - `prebuilt-rag-index-<version>.tar.gz`
208
+ - `prebuilt-rag-index-local-<version>.tar.gz`
209
+ - `prebuilt-rag-index-gemini-<version>.tar.gz` (only needed if `RAG_PROVIDER=gemini`)
207
210
  2. In your project folder, create a local tools folder, for example:
208
211
  - `<project>/.tools/simple-dynamsoft-mcp/`
209
- 3. Copy assets into that folder and extract the prebuilt index:
212
+ 3. Copy assets into that folder and extract the prebuilt index you plan to use:
210
213
  - Keep `simple-dynamsoft-mcp-<version>.tgz` as-is for `npx --package`.
211
- - Extract `prebuilt-rag-index-<version>.tar.gz`.
212
- - Expected cache output path: `<project>/.tools/simple-dynamsoft-mcp/prebuilt-rag/cache/*.json`.
214
+ - Extract `prebuilt-rag-index-local-<version>.tar.gz` for local embeddings.
215
+ - Extract `prebuilt-rag-index-gemini-<version>.tar.gz` for gemini embeddings.
216
+ - Expected cache output path: `<project>/.tools/simple-dynamsoft-mcp/prebuilt-rag/<provider>/cache/*.json`.
213
217
  4. Configure project-local `.vscode/mcp.json` to use the local package and cache path.
214
218
 
215
219
  Example (`.vscode/mcp.json`):
@@ -231,7 +235,7 @@ Example (`.vscode/mcp.json`):
231
235
  "RAG_REBUILD": "false",
232
236
  "RAG_LOCAL_MODEL": "Xenova/all-MiniLM-L6-v2",
233
237
  "RAG_LOCAL_QUANTIZED": "true",
234
- "RAG_CACHE_DIR": ".tools/simple-dynamsoft-mcp/prebuilt-rag/cache"
238
+ "RAG_CACHE_DIR": ".tools/simple-dynamsoft-mcp/prebuilt-rag/local/cache"
235
239
  }
236
240
  }
237
241
  }
@@ -242,9 +246,11 @@ Notes:
242
246
  - Use absolute paths if your MCP client does not resolve relative paths from workspace root.
243
247
  - `RAG_REBUILD` must stay `false` to reuse prebuilt cache files.
244
248
  - Runtime auto-download is enabled by default (`RAG_PREBUILT_INDEX_AUTO_DOWNLOAD=true`) when provider resolution reaches local embeddings (primary or fallback).
245
- - Default prebuilt URL pattern: `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v<version>/prebuilt-rag-index-<version>.tar.gz`.
249
+ - Default prebuilt URL patterns:
250
+ - `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v<version>/prebuilt-rag-index-local-<version>.tar.gz`
251
+ - `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v<version>/prebuilt-rag-index-gemini-<version>.tar.gz`
246
252
  - Downloaded prebuilt cache is accepted when package version matches (with provider/model/payload sanity checks).
247
- - Prebuilt cache is used whenever provider execution resolves to local embeddings (primary or fallback).
253
+ - Prebuilt cache is used whenever provider execution resolves to local or gemini embeddings (primary or fallback).
248
254
 
249
255
  ## Supported SDKs
250
256
 
@@ -502,7 +508,7 @@ At startup, the server logs data mode/path to stderr:
502
508
  - Release workflow: `.github/workflows/release.yml`
503
509
  - Release behavior:
504
510
  - Creates GitHub release when `package.json` version changes on `main`
505
- - Attaches `npm pack` artifact and prebuilt RAG index artifact (release workflow requires `GEMINI_API_KEY` for gemini prebuild path)
511
+ - Attaches `npm pack` artifact plus separate prebuilt RAG index archives for local and gemini providers (release workflow requires `GEMINI_API_KEY` for gemini prebuild path)
506
512
  - Publishes the package to npm from the release workflow (OIDC trusted publishing)
507
513
 
508
514
  ## Testing
@@ -546,7 +552,9 @@ Key env vars:
546
552
  - `RAG_LOCAL_MODEL`: default `Xenova/all-MiniLM-L6-v2`
547
553
  - `RAG_CACHE_DIR`: default `data/.rag-cache`
548
554
  - `RAG_PREBUILT_INDEX_AUTO_DOWNLOAD`: default `true`
549
- - `RAG_PREBUILT_INDEX_URL`: override release prebuilt index asset URL
555
+ - `RAG_PREBUILT_INDEX_URL`: global override for release prebuilt index asset URL
556
+ - `RAG_PREBUILT_INDEX_URL_LOCAL`: override for local prebuilt index asset URL
557
+ - `RAG_PREBUILT_INDEX_URL_GEMINI`: override for gemini prebuilt index asset URL
550
558
  - `RAG_PREBUILT_INDEX_TIMEOUT_MS`: default `180000`
551
559
 
552
560
  Local embeddings download the model on first run and cache under `data/.rag-cache/models`.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "simple-dynamsoft-mcp",
3
- "version": "6.3.0",
3
+ "version": "6.4.0",
4
4
  "description": "MCP server for Dynamsoft SDKs - Capture Vision, Barcode Reader (Mobile/Python/Web), Dynamic Web TWAIN, and Document Viewer. Provides documentation, code snippets, and API guidance.",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -40,6 +40,7 @@
40
40
  "data:verify-versions:strict": "node scripts/update-sdk-versions.mjs --check --strict",
41
41
  "data:lock": "node scripts/update-data-lock.mjs",
42
42
  "data:verify-lock": "node scripts/verify-data-lock.mjs",
43
+ "data:verify-docs": "node scripts/verify-doc-resources.mjs",
43
44
  "rag:prebuild": "node scripts/prebuild-rag-index.mjs"
44
45
  },
45
46
  "keywords": [
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { resourceIndex, readResourceContent } from "../src/resource-index.js";
4
+
5
+ function parsePositiveInt(value, fallback) {
6
+ const parsed = Number.parseInt(String(value ?? ""), 10);
7
+ if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
8
+ return parsed;
9
+ }
10
+
11
+ async function main() {
12
+ const concurrency = parsePositiveInt(process.env.DOC_VERIFY_CONCURRENCY, 8);
13
+ const docs = resourceIndex.filter((entry) => entry.type === "doc");
14
+ const total = docs.length;
15
+
16
+ console.log(`[doc-verify] start total_docs=${total} concurrency=${concurrency}`);
17
+
18
+ if (total === 0) {
19
+ console.log("[doc-verify] no docs found; skipping");
20
+ return;
21
+ }
22
+
23
+ let index = 0;
24
+ let checked = 0;
25
+ const failures = [];
26
+ const workers = [];
27
+
28
+ const runOne = async () => {
29
+ while (true) {
30
+ const current = index;
31
+ index += 1;
32
+ if (current >= total) return;
33
+
34
+ const entry = docs[current];
35
+ try {
36
+ const content = await readResourceContent(entry.uri);
37
+ if (!content) {
38
+ throw new Error("readResourceContent returned null");
39
+ }
40
+ const hasText = typeof content.text === "string" && content.text.length > 0;
41
+ const hasBlob = typeof content.blob === "string" && content.blob.length > 0;
42
+ if (!hasText && !hasBlob) {
43
+ throw new Error("resource content is empty");
44
+ }
45
+ } catch (error) {
46
+ failures.push({
47
+ uri: entry.uri,
48
+ error: error?.message || String(error)
49
+ });
50
+ } finally {
51
+ checked += 1;
52
+ if (checked % 250 === 0 || checked === total) {
53
+ console.log(`[doc-verify] progress checked=${checked}/${total} failures=${failures.length}`);
54
+ }
55
+ }
56
+ }
57
+ };
58
+
59
+ for (let i = 0; i < Math.min(concurrency, total); i += 1) {
60
+ workers.push(runOne());
61
+ }
62
+ await Promise.all(workers);
63
+
64
+ if (failures.length > 0) {
65
+ console.error(`[doc-verify] failed count=${failures.length}`);
66
+ for (const failure of failures.slice(0, 20)) {
67
+ console.error(`[doc-verify] error uri=${failure.uri} message=${failure.error}`);
68
+ }
69
+ if (failures.length > 20) {
70
+ console.error(`[doc-verify] ... truncated ${failures.length - 20} additional failures`);
71
+ }
72
+ process.exitCode = 1;
73
+ return;
74
+ }
75
+
76
+ console.log(`[doc-verify] success checked=${checked}`);
77
+ }
78
+
79
+ await main();
package/src/rag.js CHANGED
@@ -32,8 +32,14 @@ const dataRoot = getResolvedDataRoot();
32
32
 
33
33
  const pkgUrl = new URL("../package.json", import.meta.url);
34
34
  const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
35
- const defaultPrebuiltIndexUrl =
35
+ const legacyPrebuiltIndexUrl =
36
36
  `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
37
+ const defaultPrebuiltIndexUrls = {
38
+ local:
39
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-local-${pkg.version}.tar.gz`,
40
+ gemini:
41
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-gemini-${pkg.version}.tar.gz`
42
+ };
37
43
 
38
44
  // ============================================================================
39
45
  // RAG configuration
@@ -88,7 +94,9 @@ const ragConfig = {
88
94
  prewarm: readBoolEnv("RAG_PREWARM", false),
89
95
  prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
90
96
  prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
91
- prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", defaultPrebuiltIndexUrl),
97
+ prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", ""),
98
+ prebuiltIndexUrlLocal: readEnvValue("RAG_PREBUILT_INDEX_URL_LOCAL", defaultPrebuiltIndexUrls.local),
99
+ prebuiltIndexUrlGemini: readEnvValue("RAG_PREBUILT_INDEX_URL_GEMINI", defaultPrebuiltIndexUrls.gemini),
92
100
  prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
93
101
  geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
94
102
  geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
@@ -325,25 +333,45 @@ function readSignaturePackageVersion(signatureRaw) {
325
333
  }
326
334
  }
327
335
 
328
- function listDownloadedCacheCandidates(extractRoot, expectedCacheFileName, cacheKey) {
336
+ function listDownloadedCacheCandidatesByProvider(extractRoot, expectedCacheFileName, cacheKey, provider) {
329
337
  const allFiles = listFilesRecursive(extractRoot).filter((path) => path.toLowerCase().endsWith(".json")).sort();
330
338
  const expectedPath = allFiles.find((path) => basename(path) === expectedCacheFileName);
331
339
 
332
340
  const cachePrefix = cacheKey.slice(0, 12);
333
341
  const prefixPath = allFiles.find((path) => {
334
342
  const name = basename(path);
335
- return name.startsWith("rag-local-") && name.endsWith(`-${cachePrefix}.json`);
343
+ return name.startsWith(`rag-${provider}-`) && name.endsWith(`-${cachePrefix}.json`);
336
344
  });
337
345
 
338
- const ragLocalFiles = allFiles.filter((path) => basename(path).startsWith("rag-local-"));
346
+ const providerFiles = allFiles.filter((path) => basename(path).startsWith(`rag-${provider}-`));
339
347
  const unique = [];
340
- for (const path of [expectedPath, prefixPath, ...ragLocalFiles]) {
348
+ for (const path of [expectedPath, prefixPath, ...providerFiles]) {
341
349
  if (!path) continue;
342
350
  if (!unique.includes(path)) unique.push(path);
343
351
  }
344
352
  return unique;
345
353
  }
346
354
 
355
+ function resolvePrebuiltIndexUrlCandidates(provider) {
356
+ const override = String(ragConfig.prebuiltIndexUrl || "").trim();
357
+ if (override) return [override];
358
+
359
+ const candidates = [];
360
+ if (provider === "local") {
361
+ candidates.push(String(ragConfig.prebuiltIndexUrlLocal || "").trim());
362
+ } else if (provider === "gemini") {
363
+ candidates.push(String(ragConfig.prebuiltIndexUrlGemini || "").trim());
364
+ }
365
+ candidates.push(legacyPrebuiltIndexUrl);
366
+
367
+ const deduped = [];
368
+ for (const candidate of candidates) {
369
+ if (!candidate) continue;
370
+ if (!deduped.includes(candidate)) deduped.push(candidate);
371
+ }
372
+ return deduped;
373
+ }
374
+
347
375
  async function downloadPrebuiltArchive(url, outputPath, timeoutMs) {
348
376
  const source = String(url || "").trim();
349
377
  if (!source) {
@@ -376,88 +404,102 @@ async function downloadPrebuiltArchive(url, outputPath, timeoutMs) {
376
404
  }
377
405
 
378
406
  async function maybeDownloadPrebuiltVectorIndex({ provider, model, cacheKey, signature, cacheFile }) {
379
- if (provider !== "local") {
380
- return { downloaded: false, reason: "provider_not_local" };
407
+ if (!["local", "gemini"].includes(provider)) {
408
+ return { downloaded: false, reason: "provider_not_supported" };
381
409
  }
382
410
  if (!ragConfig.prebuiltIndexAutoDownload) {
383
411
  return { downloaded: false, reason: "auto_download_disabled" };
384
412
  }
385
413
 
386
- const sourceUrl = String(ragConfig.prebuiltIndexUrl || "").trim();
387
- if (!sourceUrl) {
414
+ const sourceUrls = resolvePrebuiltIndexUrlCandidates(provider);
415
+ if (sourceUrls.length === 0) {
388
416
  return { downloaded: false, reason: "url_not_set" };
389
417
  }
390
418
 
391
- const attemptKey = `${provider}:${cacheKey}:${sourceUrl}`;
419
+ const attemptKey = `${provider}:${cacheKey}:${sourceUrls.join("|")}`;
392
420
  if (prebuiltDownloadAttempts.has(attemptKey)) {
393
421
  return prebuiltDownloadAttempts.get(attemptKey);
394
422
  }
395
423
 
396
424
  const expectedCacheFileName = makeCacheFileName(provider, model, cacheKey);
397
425
  const attempt = (async () => {
398
- const tempRoot = join(tmpdir(), `simple-dynamsoft-mcp-rag-prebuilt-${Date.now()}-${Math.random().toString(16).slice(2)}`);
399
- const archivePath = join(tempRoot, "prebuilt-rag-index.tar.gz");
400
- const extractRoot = join(tempRoot, "extract");
401
-
402
- ensureDirectory(extractRoot);
403
- try {
404
- logRag(`prebuilt index download start provider=${provider} url=${sourceUrl} timeout_ms=${ragConfig.prebuiltIndexTimeoutMs}`);
405
- const downloaded = await downloadPrebuiltArchive(sourceUrl, archivePath, ragConfig.prebuiltIndexTimeoutMs);
406
- logRag(
407
- `prebuilt index downloaded provider=${provider} source=${downloaded.sourceType} size=${downloaded.size}B`
426
+ let lastReason = "not_attempted";
427
+ for (const sourceUrl of sourceUrls) {
428
+ const tempRoot = join(
429
+ tmpdir(),
430
+ `simple-dynamsoft-mcp-rag-prebuilt-${Date.now()}-${Math.random().toString(16).slice(2)}`
408
431
  );
432
+ const archivePath = join(tempRoot, "prebuilt-rag-index.tar.gz");
433
+ const extractRoot = join(tempRoot, "extract");
409
434
 
410
- await tar.x({
411
- file: archivePath,
412
- cwd: extractRoot,
413
- strict: true
414
- });
415
-
416
- const candidateFiles = listDownloadedCacheCandidates(extractRoot, expectedCacheFileName, cacheKey);
417
- if (candidateFiles.length === 0) {
418
- throw new Error(`cache_file_not_found expected=${expectedCacheFileName}`);
419
- }
435
+ ensureDirectory(extractRoot);
436
+ try {
437
+ logRag(
438
+ `prebuilt index download start provider=${provider} url=${sourceUrl} timeout_ms=${ragConfig.prebuiltIndexTimeoutMs}`
439
+ );
440
+ const downloaded = await downloadPrebuiltArchive(sourceUrl, archivePath, ragConfig.prebuiltIndexTimeoutMs);
441
+ logRag(
442
+ `prebuilt index downloaded provider=${provider} source=${downloaded.sourceType} size=${downloaded.size}B url=${sourceUrl}`
443
+ );
420
444
 
421
- for (const sourceCacheFile of candidateFiles) {
422
- const candidateCache = loadVectorIndexCache(sourceCacheFile, {
423
- provider,
424
- model
445
+ await tar.x({
446
+ file: archivePath,
447
+ cwd: extractRoot,
448
+ strict: true
425
449
  });
426
- if (!candidateCache.hit) {
427
- continue;
428
- }
429
450
 
430
- const cachePackageVersion = readSignaturePackageVersion(candidateCache.payload?.meta?.signature);
431
- if (!cachePackageVersion || cachePackageVersion !== pkg.version) {
432
- continue;
451
+ const candidateFiles = listDownloadedCacheCandidatesByProvider(
452
+ extractRoot,
453
+ expectedCacheFileName,
454
+ cacheKey,
455
+ provider
456
+ );
457
+ if (candidateFiles.length === 0) {
458
+ throw new Error(`cache_file_not_found expected=${expectedCacheFileName}`);
433
459
  }
434
460
 
435
- const migratedPayload = {
436
- ...candidateCache.payload,
437
- cacheKey,
438
- meta: {
439
- ...(candidateCache.payload.meta || {}),
461
+ for (const sourceCacheFile of candidateFiles) {
462
+ const candidateCache = loadVectorIndexCache(sourceCacheFile, {
440
463
  provider,
441
- model,
442
- signature
464
+ model
465
+ });
466
+ if (!candidateCache.hit) {
467
+ continue;
443
468
  }
444
- };
445
- saveVectorIndexCache(cacheFile, migratedPayload);
446
- logRag(
447
- `prebuilt index installed provider=${provider} cache_file=${cacheFile} source=${basename(sourceCacheFile)} mode=version_only_compat version=${cachePackageVersion}`
469
+
470
+ const cachePackageVersion = readSignaturePackageVersion(candidateCache.payload?.meta?.signature);
471
+ if (!cachePackageVersion || cachePackageVersion !== pkg.version) {
472
+ continue;
473
+ }
474
+
475
+ const migratedPayload = {
476
+ ...candidateCache.payload,
477
+ cacheKey,
478
+ meta: {
479
+ ...(candidateCache.payload.meta || {}),
480
+ provider,
481
+ model,
482
+ signature
483
+ }
484
+ };
485
+ saveVectorIndexCache(cacheFile, migratedPayload);
486
+ logRag(
487
+ `prebuilt index installed provider=${provider} cache_file=${cacheFile} source=${basename(sourceCacheFile)} mode=version_only_compat version=${cachePackageVersion}`
488
+ );
489
+ return { downloaded: true, reason: "installed_version_only_compat" };
490
+ }
491
+
492
+ throw new Error(
493
+ `no_compatible_cache expected=${expectedCacheFileName} found=${candidateFiles.map((path) => basename(path)).join(",")}`
448
494
  );
449
- return { downloaded: true, reason: "installed_version_only_compat" };
495
+ } catch (error) {
496
+ lastReason = `${sourceUrl} => ${error.message}`;
497
+ logRag(`prebuilt index unavailable provider=${provider} url=${sourceUrl} reason=${error.message}`);
498
+ } finally {
499
+ rmSync(tempRoot, { recursive: true, force: true });
450
500
  }
451
-
452
- throw new Error(
453
- `no_compatible_cache expected=${expectedCacheFileName} found=${candidateFiles.map((path) => basename(path)).join(",")}`
454
- );
455
- } catch (error) {
456
- logRag(`prebuilt index unavailable provider=${provider} reason=${error.message}`);
457
- return { downloaded: false, reason: error.message };
458
- } finally {
459
- rmSync(tempRoot, { recursive: true, force: true });
460
501
  }
502
+ return { downloaded: false, reason: lastReason };
461
503
  })();
462
504
 
463
505
  prebuiltDownloadAttempts.set(attemptKey, attempt);
@@ -822,28 +864,26 @@ async function createVectorProvider({ name, model, embedder, batchSize }) {
822
864
  }
823
865
  logRag(`cache miss provider=${name} file=${cacheFile} reason=${cacheState.reason}`);
824
866
 
825
- if (name === "local") {
826
- const downloadResult = await maybeDownloadPrebuiltVectorIndex({
827
- provider: name,
828
- model,
829
- cacheKey,
830
- signature,
831
- cacheFile
832
- });
833
- if (downloadResult.downloaded) {
834
- cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
835
- if (cacheState.hit) {
836
- const cached = cacheState.payload;
837
- logRag(
838
- `cache hit provider=${name} file=${cacheFile} source=prebuilt_download items=${cached.items.length} vectors=${cached.vectors.length}`
839
- );
840
- return {
841
- items: cached.items,
842
- vectors: cached.vectors
843
- };
844
- }
845
- logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
867
+ const downloadResult = await maybeDownloadPrebuiltVectorIndex({
868
+ provider: name,
869
+ model,
870
+ cacheKey,
871
+ signature,
872
+ cacheFile
873
+ });
874
+ if (downloadResult.downloaded) {
875
+ cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
876
+ if (cacheState.hit) {
877
+ const cached = cacheState.payload;
878
+ logRag(
879
+ `cache hit provider=${name} file=${cacheFile} source=prebuilt_download items=${cached.items.length} vectors=${cached.vectors.length}`
880
+ );
881
+ return {
882
+ items: cached.items,
883
+ vectors: cached.vectors
884
+ };
846
885
  }
886
+ logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
847
887
  }
848
888
  } else {
849
889
  logRag(`cache bypass provider=${name} file=${cacheFile} reason=rebuild_true`);
@@ -1024,6 +1064,8 @@ function logRagConfigOnce() {
1024
1064
  logRag(
1025
1065
  `config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
1026
1066
  `cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
1067
+ `prebuilt_url_override=${ragConfig.prebuiltIndexUrl ? "set" : "empty"} prebuilt_url_local=${ragConfig.prebuiltIndexUrlLocal ? "set" : "empty"} ` +
1068
+ `prebuilt_url_gemini=${ragConfig.prebuiltIndexUrlGemini ? "set" : "empty"} ` +
1027
1069
  `prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
1028
1070
  `gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
1029
1071
  `gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
@@ -399,9 +399,49 @@ function getPinnedResources() {
399
399
  return resourceIndex.filter((entry) => entry.pinned);
400
400
  }
401
401
 
402
+ function safeDecodeURIComponent(value) {
403
+ try {
404
+ return decodeURIComponent(value);
405
+ } catch {
406
+ return value;
407
+ }
408
+ }
409
+
410
+ function buildResourceLookupCandidates(uri) {
411
+ const candidates = [];
412
+ if (typeof uri !== "string" || uri.length === 0) return candidates;
413
+ candidates.push(uri);
414
+
415
+ if (!uri.includes("://")) return candidates;
416
+ const [scheme, rest] = uri.split("://");
417
+ if (scheme !== "doc") return candidates;
418
+
419
+ const parts = String(rest || "").split("/").filter(Boolean);
420
+ if (parts.length < 5) return candidates;
421
+
422
+ const head = parts.slice(0, 4);
423
+ const slugRaw = parts.slice(4).join("/");
424
+ const decodedOnce = safeDecodeURIComponent(slugRaw);
425
+ const decodedTwice = safeDecodeURIComponent(decodedOnce);
426
+
427
+ for (const slug of [decodedOnce, decodedTwice]) {
428
+ const canonical = `${scheme}://${head.join("/")}/${encodeURIComponent(slug)}`;
429
+ if (!candidates.includes(canonical)) {
430
+ candidates.push(canonical);
431
+ }
432
+ }
433
+
434
+ return candidates;
435
+ }
436
+
402
437
  async function readResourceContent(uri) {
403
- const resource = resourceIndexByUri.get(uri);
438
+ let resource = null;
439
+ for (const candidate of buildResourceLookupCandidates(uri)) {
440
+ resource = resourceIndexByUri.get(candidate);
441
+ if (resource) break;
442
+ }
404
443
  if (!resource) return null;
444
+
405
445
  const content = await resource.loadContent();
406
446
  return {
407
447
  uri,