simple-dynamsoft-mcp 6.3.0 → 6.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +6 -2
- package/README.md +20 -12
- package/package.json +2 -1
- package/scripts/verify-doc-resources.mjs +79 -0
- package/src/rag.js +125 -83
- package/src/resource-index.js +41 -1
package/.env.example
CHANGED
|
@@ -60,14 +60,18 @@ RAG_FALLBACK=fuse
|
|
|
60
60
|
# * RAG_REBUILD: true to ignore cache and rebuild on startup/search
|
|
61
61
|
# * RAG_PREWARM: true to build the embedding index at startup
|
|
62
62
|
# * RAG_PREWARM_BLOCK: true to block startup until prewarm completes
|
|
63
|
-
# * RAG_PREBUILT_INDEX_AUTO_DOWNLOAD: auto-download prebuilt
|
|
64
|
-
# * RAG_PREBUILT_INDEX_URL: override prebuilt index archive
|
|
63
|
+
# * RAG_PREBUILT_INDEX_AUTO_DOWNLOAD: auto-download prebuilt index when local or gemini embeddings are selected
|
|
64
|
+
# * RAG_PREBUILT_INDEX_URL: global override URL for prebuilt index archive (applies to both local and gemini providers)
|
|
65
|
+
# * RAG_PREBUILT_INDEX_URL_LOCAL: provider-specific URL override for local prebuilt index archive
|
|
66
|
+
# * RAG_PREBUILT_INDEX_URL_GEMINI: provider-specific URL override for gemini prebuilt index archive
|
|
65
67
|
# * RAG_PREBUILT_INDEX_TIMEOUT_MS: timeout for prebuilt index download request
|
|
66
68
|
# RAG_REBUILD=false
|
|
67
69
|
# RAG_PREWARM=false
|
|
68
70
|
# RAG_PREWARM_BLOCK=false
|
|
69
71
|
# RAG_PREBUILT_INDEX_AUTO_DOWNLOAD=true
|
|
70
72
|
# RAG_PREBUILT_INDEX_URL=
|
|
73
|
+
# RAG_PREBUILT_INDEX_URL_LOCAL=
|
|
74
|
+
# RAG_PREBUILT_INDEX_URL_GEMINI=
|
|
71
75
|
# RAG_PREBUILT_INDEX_TIMEOUT_MS=180000
|
|
72
76
|
|
|
73
77
|
# Optional data submodule sync on server startup
|
package/README.md
CHANGED
|
@@ -185,8 +185,10 @@ Example:
|
|
|
185
185
|
Commonly used settings:
|
|
186
186
|
- `RAG_PROVIDER`: `auto` | `gemini` | `local` | `fuse`
|
|
187
187
|
- `RAG_FALLBACK`: `fuse` | `local` | `none`
|
|
188
|
-
- `RAG_PREBUILT_INDEX_AUTO_DOWNLOAD`: `true` by default; auto-fetch prebuilt
|
|
189
|
-
- `RAG_PREBUILT_INDEX_URL`: override release asset URL for prebuilt index archive
|
|
188
|
+
- `RAG_PREBUILT_INDEX_AUTO_DOWNLOAD`: `true` by default; auto-fetch prebuilt index when local or gemini embeddings are selected
|
|
189
|
+
- `RAG_PREBUILT_INDEX_URL`: global override release asset URL for prebuilt index archive
|
|
190
|
+
- `RAG_PREBUILT_INDEX_URL_LOCAL`: override release asset URL for local prebuilt index archive
|
|
191
|
+
- `RAG_PREBUILT_INDEX_URL_GEMINI`: override release asset URL for gemini prebuilt index archive
|
|
190
192
|
- `RAG_PREBUILT_INDEX_TIMEOUT_MS`: download timeout for prebuilt index fetch
|
|
191
193
|
- `MCP_DATA_DIR`: use a preloaded local data folder (`metadata/`, `samples/`, `documentation/`)
|
|
192
194
|
- `MCP_DATA_AUTO_DOWNLOAD`: allow startup archive download when bundled data is unavailable
|
|
@@ -199,17 +201,19 @@ For the complete list and defaults, see `.env.example` and the sections `Submodu
|
|
|
199
201
|
|
|
200
202
|
## Use Release Assets In A Local Project
|
|
201
203
|
|
|
202
|
-
Use this when you want to run from a built `.tgz` package and reuse
|
|
204
|
+
Use this when you want to run from a built `.tgz` package and reuse prebuilt RAG indexes.
|
|
203
205
|
|
|
204
206
|
1. Download release assets from GitHub Releases for the same version:
|
|
205
207
|
- `simple-dynamsoft-mcp-<version>.tgz`
|
|
206
|
-
- `prebuilt-rag-index-<version>.tar.gz`
|
|
208
|
+
- `prebuilt-rag-index-local-<version>.tar.gz`
|
|
209
|
+
- `prebuilt-rag-index-gemini-<version>.tar.gz` (only needed if `RAG_PROVIDER=gemini`)
|
|
207
210
|
2. In your project folder, create a local tools folder, for example:
|
|
208
211
|
- `<project>/.tools/simple-dynamsoft-mcp/`
|
|
209
|
-
3. Copy assets into that folder and extract the prebuilt index:
|
|
212
|
+
3. Copy assets into that folder and extract the prebuilt index you plan to use:
|
|
210
213
|
- Keep `simple-dynamsoft-mcp-<version>.tgz` as-is for `npx --package`.
|
|
211
|
-
- Extract `prebuilt-rag-index-<version>.tar.gz
|
|
212
|
-
-
|
|
214
|
+
- Extract `prebuilt-rag-index-local-<version>.tar.gz` for local embeddings.
|
|
215
|
+
- Extract `prebuilt-rag-index-gemini-<version>.tar.gz` for gemini embeddings.
|
|
216
|
+
- Expected cache output path: `<project>/.tools/simple-dynamsoft-mcp/prebuilt-rag/<provider>/cache/*.json`.
|
|
213
217
|
4. Configure project-local `.vscode/mcp.json` to use the local package and cache path.
|
|
214
218
|
|
|
215
219
|
Example (`.vscode/mcp.json`):
|
|
@@ -231,7 +235,7 @@ Example (`.vscode/mcp.json`):
|
|
|
231
235
|
"RAG_REBUILD": "false",
|
|
232
236
|
"RAG_LOCAL_MODEL": "Xenova/all-MiniLM-L6-v2",
|
|
233
237
|
"RAG_LOCAL_QUANTIZED": "true",
|
|
234
|
-
"RAG_CACHE_DIR": ".tools/simple-dynamsoft-mcp/prebuilt-rag/cache"
|
|
238
|
+
"RAG_CACHE_DIR": ".tools/simple-dynamsoft-mcp/prebuilt-rag/local/cache"
|
|
235
239
|
}
|
|
236
240
|
}
|
|
237
241
|
}
|
|
@@ -242,9 +246,11 @@ Notes:
|
|
|
242
246
|
- Use absolute paths if your MCP client does not resolve relative paths from workspace root.
|
|
243
247
|
- `RAG_REBUILD` must stay `false` to reuse prebuilt cache files.
|
|
244
248
|
- Runtime auto-download is enabled by default (`RAG_PREBUILT_INDEX_AUTO_DOWNLOAD=true`) when provider resolution reaches local embeddings (primary or fallback).
|
|
245
|
-
- Default prebuilt URL
|
|
249
|
+
- Default prebuilt URL patterns:
|
|
250
|
+
- `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v<version>/prebuilt-rag-index-local-<version>.tar.gz`
|
|
251
|
+
- `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v<version>/prebuilt-rag-index-gemini-<version>.tar.gz`
|
|
246
252
|
- Downloaded prebuilt cache is accepted when package version matches (with provider/model/payload sanity checks).
|
|
247
|
-
- Prebuilt cache is used whenever provider execution resolves to local embeddings (primary or fallback).
|
|
253
|
+
- Prebuilt cache is used whenever provider execution resolves to local or gemini embeddings (primary or fallback).
|
|
248
254
|
|
|
249
255
|
## Supported SDKs
|
|
250
256
|
|
|
@@ -502,7 +508,7 @@ At startup, the server logs data mode/path to stderr:
|
|
|
502
508
|
- Release workflow: `.github/workflows/release.yml`
|
|
503
509
|
- Release behavior:
|
|
504
510
|
- Creates GitHub release when `package.json` version changes on `main`
|
|
505
|
-
- Attaches `npm pack` artifact
|
|
511
|
+
- Attaches `npm pack` artifact plus separate prebuilt RAG index archives for local and gemini providers (release workflow requires `GEMINI_API_KEY` for gemini prebuild path)
|
|
506
512
|
- Publishes the package to npm from the release workflow (OIDC trusted publishing)
|
|
507
513
|
|
|
508
514
|
## Testing
|
|
@@ -546,7 +552,9 @@ Key env vars:
|
|
|
546
552
|
- `RAG_LOCAL_MODEL`: default `Xenova/all-MiniLM-L6-v2`
|
|
547
553
|
- `RAG_CACHE_DIR`: default `data/.rag-cache`
|
|
548
554
|
- `RAG_PREBUILT_INDEX_AUTO_DOWNLOAD`: default `true`
|
|
549
|
-
- `RAG_PREBUILT_INDEX_URL`: override release prebuilt index asset URL
|
|
555
|
+
- `RAG_PREBUILT_INDEX_URL`: global override for release prebuilt index asset URL
|
|
556
|
+
- `RAG_PREBUILT_INDEX_URL_LOCAL`: override for local prebuilt index asset URL
|
|
557
|
+
- `RAG_PREBUILT_INDEX_URL_GEMINI`: override for gemini prebuilt index asset URL
|
|
550
558
|
- `RAG_PREBUILT_INDEX_TIMEOUT_MS`: default `180000`
|
|
551
559
|
|
|
552
560
|
Local embeddings download the model on first run and cache under `data/.rag-cache/models`.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "simple-dynamsoft-mcp",
|
|
3
|
-
"version": "6.
|
|
3
|
+
"version": "6.4.0",
|
|
4
4
|
"description": "MCP server for Dynamsoft SDKs - Capture Vision, Barcode Reader (Mobile/Python/Web), Dynamic Web TWAIN, and Document Viewer. Provides documentation, code snippets, and API guidance.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -40,6 +40,7 @@
|
|
|
40
40
|
"data:verify-versions:strict": "node scripts/update-sdk-versions.mjs --check --strict",
|
|
41
41
|
"data:lock": "node scripts/update-data-lock.mjs",
|
|
42
42
|
"data:verify-lock": "node scripts/verify-data-lock.mjs",
|
|
43
|
+
"data:verify-docs": "node scripts/verify-doc-resources.mjs",
|
|
43
44
|
"rag:prebuild": "node scripts/prebuild-rag-index.mjs"
|
|
44
45
|
},
|
|
45
46
|
"keywords": [
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { resourceIndex, readResourceContent } from "../src/resource-index.js";
|
|
4
|
+
|
|
5
|
+
function parsePositiveInt(value, fallback) {
|
|
6
|
+
const parsed = Number.parseInt(String(value ?? ""), 10);
|
|
7
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return fallback;
|
|
8
|
+
return parsed;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
async function main() {
|
|
12
|
+
const concurrency = parsePositiveInt(process.env.DOC_VERIFY_CONCURRENCY, 8);
|
|
13
|
+
const docs = resourceIndex.filter((entry) => entry.type === "doc");
|
|
14
|
+
const total = docs.length;
|
|
15
|
+
|
|
16
|
+
console.log(`[doc-verify] start total_docs=${total} concurrency=${concurrency}`);
|
|
17
|
+
|
|
18
|
+
if (total === 0) {
|
|
19
|
+
console.log("[doc-verify] no docs found; skipping");
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
let index = 0;
|
|
24
|
+
let checked = 0;
|
|
25
|
+
const failures = [];
|
|
26
|
+
const workers = [];
|
|
27
|
+
|
|
28
|
+
const runOne = async () => {
|
|
29
|
+
while (true) {
|
|
30
|
+
const current = index;
|
|
31
|
+
index += 1;
|
|
32
|
+
if (current >= total) return;
|
|
33
|
+
|
|
34
|
+
const entry = docs[current];
|
|
35
|
+
try {
|
|
36
|
+
const content = await readResourceContent(entry.uri);
|
|
37
|
+
if (!content) {
|
|
38
|
+
throw new Error("readResourceContent returned null");
|
|
39
|
+
}
|
|
40
|
+
const hasText = typeof content.text === "string" && content.text.length > 0;
|
|
41
|
+
const hasBlob = typeof content.blob === "string" && content.blob.length > 0;
|
|
42
|
+
if (!hasText && !hasBlob) {
|
|
43
|
+
throw new Error("resource content is empty");
|
|
44
|
+
}
|
|
45
|
+
} catch (error) {
|
|
46
|
+
failures.push({
|
|
47
|
+
uri: entry.uri,
|
|
48
|
+
error: error?.message || String(error)
|
|
49
|
+
});
|
|
50
|
+
} finally {
|
|
51
|
+
checked += 1;
|
|
52
|
+
if (checked % 250 === 0 || checked === total) {
|
|
53
|
+
console.log(`[doc-verify] progress checked=${checked}/${total} failures=${failures.length}`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
for (let i = 0; i < Math.min(concurrency, total); i += 1) {
|
|
60
|
+
workers.push(runOne());
|
|
61
|
+
}
|
|
62
|
+
await Promise.all(workers);
|
|
63
|
+
|
|
64
|
+
if (failures.length > 0) {
|
|
65
|
+
console.error(`[doc-verify] failed count=${failures.length}`);
|
|
66
|
+
for (const failure of failures.slice(0, 20)) {
|
|
67
|
+
console.error(`[doc-verify] error uri=${failure.uri} message=${failure.error}`);
|
|
68
|
+
}
|
|
69
|
+
if (failures.length > 20) {
|
|
70
|
+
console.error(`[doc-verify] ... truncated ${failures.length - 20} additional failures`);
|
|
71
|
+
}
|
|
72
|
+
process.exitCode = 1;
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
console.log(`[doc-verify] success checked=${checked}`);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
await main();
|
package/src/rag.js
CHANGED
|
@@ -32,8 +32,14 @@ const dataRoot = getResolvedDataRoot();
|
|
|
32
32
|
|
|
33
33
|
const pkgUrl = new URL("../package.json", import.meta.url);
|
|
34
34
|
const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
|
|
35
|
-
const
|
|
35
|
+
const legacyPrebuiltIndexUrl =
|
|
36
36
|
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
|
|
37
|
+
const defaultPrebuiltIndexUrls = {
|
|
38
|
+
local:
|
|
39
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-local-${pkg.version}.tar.gz`,
|
|
40
|
+
gemini:
|
|
41
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-gemini-${pkg.version}.tar.gz`
|
|
42
|
+
};
|
|
37
43
|
|
|
38
44
|
// ============================================================================
|
|
39
45
|
// RAG configuration
|
|
@@ -88,7 +94,9 @@ const ragConfig = {
|
|
|
88
94
|
prewarm: readBoolEnv("RAG_PREWARM", false),
|
|
89
95
|
prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
|
|
90
96
|
prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
|
|
91
|
-
prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL",
|
|
97
|
+
prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", ""),
|
|
98
|
+
prebuiltIndexUrlLocal: readEnvValue("RAG_PREBUILT_INDEX_URL_LOCAL", defaultPrebuiltIndexUrls.local),
|
|
99
|
+
prebuiltIndexUrlGemini: readEnvValue("RAG_PREBUILT_INDEX_URL_GEMINI", defaultPrebuiltIndexUrls.gemini),
|
|
92
100
|
prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
|
|
93
101
|
geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
|
|
94
102
|
geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
|
|
@@ -325,25 +333,45 @@ function readSignaturePackageVersion(signatureRaw) {
|
|
|
325
333
|
}
|
|
326
334
|
}
|
|
327
335
|
|
|
328
|
-
function
|
|
336
|
+
function listDownloadedCacheCandidatesByProvider(extractRoot, expectedCacheFileName, cacheKey, provider) {
|
|
329
337
|
const allFiles = listFilesRecursive(extractRoot).filter((path) => path.toLowerCase().endsWith(".json")).sort();
|
|
330
338
|
const expectedPath = allFiles.find((path) => basename(path) === expectedCacheFileName);
|
|
331
339
|
|
|
332
340
|
const cachePrefix = cacheKey.slice(0, 12);
|
|
333
341
|
const prefixPath = allFiles.find((path) => {
|
|
334
342
|
const name = basename(path);
|
|
335
|
-
return name.startsWith(
|
|
343
|
+
return name.startsWith(`rag-${provider}-`) && name.endsWith(`-${cachePrefix}.json`);
|
|
336
344
|
});
|
|
337
345
|
|
|
338
|
-
const
|
|
346
|
+
const providerFiles = allFiles.filter((path) => basename(path).startsWith(`rag-${provider}-`));
|
|
339
347
|
const unique = [];
|
|
340
|
-
for (const path of [expectedPath, prefixPath, ...
|
|
348
|
+
for (const path of [expectedPath, prefixPath, ...providerFiles]) {
|
|
341
349
|
if (!path) continue;
|
|
342
350
|
if (!unique.includes(path)) unique.push(path);
|
|
343
351
|
}
|
|
344
352
|
return unique;
|
|
345
353
|
}
|
|
346
354
|
|
|
355
|
+
function resolvePrebuiltIndexUrlCandidates(provider) {
|
|
356
|
+
const override = String(ragConfig.prebuiltIndexUrl || "").trim();
|
|
357
|
+
if (override) return [override];
|
|
358
|
+
|
|
359
|
+
const candidates = [];
|
|
360
|
+
if (provider === "local") {
|
|
361
|
+
candidates.push(String(ragConfig.prebuiltIndexUrlLocal || "").trim());
|
|
362
|
+
} else if (provider === "gemini") {
|
|
363
|
+
candidates.push(String(ragConfig.prebuiltIndexUrlGemini || "").trim());
|
|
364
|
+
}
|
|
365
|
+
candidates.push(legacyPrebuiltIndexUrl);
|
|
366
|
+
|
|
367
|
+
const deduped = [];
|
|
368
|
+
for (const candidate of candidates) {
|
|
369
|
+
if (!candidate) continue;
|
|
370
|
+
if (!deduped.includes(candidate)) deduped.push(candidate);
|
|
371
|
+
}
|
|
372
|
+
return deduped;
|
|
373
|
+
}
|
|
374
|
+
|
|
347
375
|
async function downloadPrebuiltArchive(url, outputPath, timeoutMs) {
|
|
348
376
|
const source = String(url || "").trim();
|
|
349
377
|
if (!source) {
|
|
@@ -376,88 +404,102 @@ async function downloadPrebuiltArchive(url, outputPath, timeoutMs) {
|
|
|
376
404
|
}
|
|
377
405
|
|
|
378
406
|
async function maybeDownloadPrebuiltVectorIndex({ provider, model, cacheKey, signature, cacheFile }) {
|
|
379
|
-
if (
|
|
380
|
-
return { downloaded: false, reason: "
|
|
407
|
+
if (!["local", "gemini"].includes(provider)) {
|
|
408
|
+
return { downloaded: false, reason: "provider_not_supported" };
|
|
381
409
|
}
|
|
382
410
|
if (!ragConfig.prebuiltIndexAutoDownload) {
|
|
383
411
|
return { downloaded: false, reason: "auto_download_disabled" };
|
|
384
412
|
}
|
|
385
413
|
|
|
386
|
-
const
|
|
387
|
-
if (
|
|
414
|
+
const sourceUrls = resolvePrebuiltIndexUrlCandidates(provider);
|
|
415
|
+
if (sourceUrls.length === 0) {
|
|
388
416
|
return { downloaded: false, reason: "url_not_set" };
|
|
389
417
|
}
|
|
390
418
|
|
|
391
|
-
const attemptKey = `${provider}:${cacheKey}:${
|
|
419
|
+
const attemptKey = `${provider}:${cacheKey}:${sourceUrls.join("|")}`;
|
|
392
420
|
if (prebuiltDownloadAttempts.has(attemptKey)) {
|
|
393
421
|
return prebuiltDownloadAttempts.get(attemptKey);
|
|
394
422
|
}
|
|
395
423
|
|
|
396
424
|
const expectedCacheFileName = makeCacheFileName(provider, model, cacheKey);
|
|
397
425
|
const attempt = (async () => {
|
|
398
|
-
|
|
399
|
-
const
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
try {
|
|
404
|
-
logRag(`prebuilt index download start provider=${provider} url=${sourceUrl} timeout_ms=${ragConfig.prebuiltIndexTimeoutMs}`);
|
|
405
|
-
const downloaded = await downloadPrebuiltArchive(sourceUrl, archivePath, ragConfig.prebuiltIndexTimeoutMs);
|
|
406
|
-
logRag(
|
|
407
|
-
`prebuilt index downloaded provider=${provider} source=${downloaded.sourceType} size=${downloaded.size}B`
|
|
426
|
+
let lastReason = "not_attempted";
|
|
427
|
+
for (const sourceUrl of sourceUrls) {
|
|
428
|
+
const tempRoot = join(
|
|
429
|
+
tmpdir(),
|
|
430
|
+
`simple-dynamsoft-mcp-rag-prebuilt-${Date.now()}-${Math.random().toString(16).slice(2)}`
|
|
408
431
|
);
|
|
432
|
+
const archivePath = join(tempRoot, "prebuilt-rag-index.tar.gz");
|
|
433
|
+
const extractRoot = join(tempRoot, "extract");
|
|
409
434
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
}
|
|
435
|
+
ensureDirectory(extractRoot);
|
|
436
|
+
try {
|
|
437
|
+
logRag(
|
|
438
|
+
`prebuilt index download start provider=${provider} url=${sourceUrl} timeout_ms=${ragConfig.prebuiltIndexTimeoutMs}`
|
|
439
|
+
);
|
|
440
|
+
const downloaded = await downloadPrebuiltArchive(sourceUrl, archivePath, ragConfig.prebuiltIndexTimeoutMs);
|
|
441
|
+
logRag(
|
|
442
|
+
`prebuilt index downloaded provider=${provider} source=${downloaded.sourceType} size=${downloaded.size}B url=${sourceUrl}`
|
|
443
|
+
);
|
|
420
444
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
445
|
+
await tar.x({
|
|
446
|
+
file: archivePath,
|
|
447
|
+
cwd: extractRoot,
|
|
448
|
+
strict: true
|
|
425
449
|
});
|
|
426
|
-
if (!candidateCache.hit) {
|
|
427
|
-
continue;
|
|
428
|
-
}
|
|
429
450
|
|
|
430
|
-
const
|
|
431
|
-
|
|
432
|
-
|
|
451
|
+
const candidateFiles = listDownloadedCacheCandidatesByProvider(
|
|
452
|
+
extractRoot,
|
|
453
|
+
expectedCacheFileName,
|
|
454
|
+
cacheKey,
|
|
455
|
+
provider
|
|
456
|
+
);
|
|
457
|
+
if (candidateFiles.length === 0) {
|
|
458
|
+
throw new Error(`cache_file_not_found expected=${expectedCacheFileName}`);
|
|
433
459
|
}
|
|
434
460
|
|
|
435
|
-
const
|
|
436
|
-
|
|
437
|
-
cacheKey,
|
|
438
|
-
meta: {
|
|
439
|
-
...(candidateCache.payload.meta || {}),
|
|
461
|
+
for (const sourceCacheFile of candidateFiles) {
|
|
462
|
+
const candidateCache = loadVectorIndexCache(sourceCacheFile, {
|
|
440
463
|
provider,
|
|
441
|
-
model
|
|
442
|
-
|
|
464
|
+
model
|
|
465
|
+
});
|
|
466
|
+
if (!candidateCache.hit) {
|
|
467
|
+
continue;
|
|
443
468
|
}
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
469
|
+
|
|
470
|
+
const cachePackageVersion = readSignaturePackageVersion(candidateCache.payload?.meta?.signature);
|
|
471
|
+
if (!cachePackageVersion || cachePackageVersion !== pkg.version) {
|
|
472
|
+
continue;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const migratedPayload = {
|
|
476
|
+
...candidateCache.payload,
|
|
477
|
+
cacheKey,
|
|
478
|
+
meta: {
|
|
479
|
+
...(candidateCache.payload.meta || {}),
|
|
480
|
+
provider,
|
|
481
|
+
model,
|
|
482
|
+
signature
|
|
483
|
+
}
|
|
484
|
+
};
|
|
485
|
+
saveVectorIndexCache(cacheFile, migratedPayload);
|
|
486
|
+
logRag(
|
|
487
|
+
`prebuilt index installed provider=${provider} cache_file=${cacheFile} source=${basename(sourceCacheFile)} mode=version_only_compat version=${cachePackageVersion}`
|
|
488
|
+
);
|
|
489
|
+
return { downloaded: true, reason: "installed_version_only_compat" };
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
throw new Error(
|
|
493
|
+
`no_compatible_cache expected=${expectedCacheFileName} found=${candidateFiles.map((path) => basename(path)).join(",")}`
|
|
448
494
|
);
|
|
449
|
-
|
|
495
|
+
} catch (error) {
|
|
496
|
+
lastReason = `${sourceUrl} => ${error.message}`;
|
|
497
|
+
logRag(`prebuilt index unavailable provider=${provider} url=${sourceUrl} reason=${error.message}`);
|
|
498
|
+
} finally {
|
|
499
|
+
rmSync(tempRoot, { recursive: true, force: true });
|
|
450
500
|
}
|
|
451
|
-
|
|
452
|
-
throw new Error(
|
|
453
|
-
`no_compatible_cache expected=${expectedCacheFileName} found=${candidateFiles.map((path) => basename(path)).join(",")}`
|
|
454
|
-
);
|
|
455
|
-
} catch (error) {
|
|
456
|
-
logRag(`prebuilt index unavailable provider=${provider} reason=${error.message}`);
|
|
457
|
-
return { downloaded: false, reason: error.message };
|
|
458
|
-
} finally {
|
|
459
|
-
rmSync(tempRoot, { recursive: true, force: true });
|
|
460
501
|
}
|
|
502
|
+
return { downloaded: false, reason: lastReason };
|
|
461
503
|
})();
|
|
462
504
|
|
|
463
505
|
prebuiltDownloadAttempts.set(attemptKey, attempt);
|
|
@@ -822,28 +864,26 @@ async function createVectorProvider({ name, model, embedder, batchSize }) {
|
|
|
822
864
|
}
|
|
823
865
|
logRag(`cache miss provider=${name} file=${cacheFile} reason=${cacheState.reason}`);
|
|
824
866
|
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
};
|
|
844
|
-
}
|
|
845
|
-
logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
|
|
867
|
+
const downloadResult = await maybeDownloadPrebuiltVectorIndex({
|
|
868
|
+
provider: name,
|
|
869
|
+
model,
|
|
870
|
+
cacheKey,
|
|
871
|
+
signature,
|
|
872
|
+
cacheFile
|
|
873
|
+
});
|
|
874
|
+
if (downloadResult.downloaded) {
|
|
875
|
+
cacheState = loadVectorIndexCache(cacheFile, expectedCacheState);
|
|
876
|
+
if (cacheState.hit) {
|
|
877
|
+
const cached = cacheState.payload;
|
|
878
|
+
logRag(
|
|
879
|
+
`cache hit provider=${name} file=${cacheFile} source=prebuilt_download items=${cached.items.length} vectors=${cached.vectors.length}`
|
|
880
|
+
);
|
|
881
|
+
return {
|
|
882
|
+
items: cached.items,
|
|
883
|
+
vectors: cached.vectors
|
|
884
|
+
};
|
|
846
885
|
}
|
|
886
|
+
logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
|
|
847
887
|
}
|
|
848
888
|
} else {
|
|
849
889
|
logRag(`cache bypass provider=${name} file=${cacheFile} reason=rebuild_true`);
|
|
@@ -1024,6 +1064,8 @@ function logRagConfigOnce() {
|
|
|
1024
1064
|
logRag(
|
|
1025
1065
|
`config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
|
|
1026
1066
|
`cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
|
|
1067
|
+
`prebuilt_url_override=${ragConfig.prebuiltIndexUrl ? "set" : "empty"} prebuilt_url_local=${ragConfig.prebuiltIndexUrlLocal ? "set" : "empty"} ` +
|
|
1068
|
+
`prebuilt_url_gemini=${ragConfig.prebuiltIndexUrlGemini ? "set" : "empty"} ` +
|
|
1027
1069
|
`prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
|
|
1028
1070
|
`gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
|
|
1029
1071
|
`gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
|
package/src/resource-index.js
CHANGED
|
@@ -399,9 +399,49 @@ function getPinnedResources() {
|
|
|
399
399
|
return resourceIndex.filter((entry) => entry.pinned);
|
|
400
400
|
}
|
|
401
401
|
|
|
402
|
+
function safeDecodeURIComponent(value) {
|
|
403
|
+
try {
|
|
404
|
+
return decodeURIComponent(value);
|
|
405
|
+
} catch {
|
|
406
|
+
return value;
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
function buildResourceLookupCandidates(uri) {
|
|
411
|
+
const candidates = [];
|
|
412
|
+
if (typeof uri !== "string" || uri.length === 0) return candidates;
|
|
413
|
+
candidates.push(uri);
|
|
414
|
+
|
|
415
|
+
if (!uri.includes("://")) return candidates;
|
|
416
|
+
const [scheme, rest] = uri.split("://");
|
|
417
|
+
if (scheme !== "doc") return candidates;
|
|
418
|
+
|
|
419
|
+
const parts = String(rest || "").split("/").filter(Boolean);
|
|
420
|
+
if (parts.length < 5) return candidates;
|
|
421
|
+
|
|
422
|
+
const head = parts.slice(0, 4);
|
|
423
|
+
const slugRaw = parts.slice(4).join("/");
|
|
424
|
+
const decodedOnce = safeDecodeURIComponent(slugRaw);
|
|
425
|
+
const decodedTwice = safeDecodeURIComponent(decodedOnce);
|
|
426
|
+
|
|
427
|
+
for (const slug of [decodedOnce, decodedTwice]) {
|
|
428
|
+
const canonical = `${scheme}://${head.join("/")}/${encodeURIComponent(slug)}`;
|
|
429
|
+
if (!candidates.includes(canonical)) {
|
|
430
|
+
candidates.push(canonical);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
return candidates;
|
|
435
|
+
}
|
|
436
|
+
|
|
402
437
|
async function readResourceContent(uri) {
|
|
403
|
-
|
|
438
|
+
let resource = null;
|
|
439
|
+
for (const candidate of buildResourceLookupCandidates(uri)) {
|
|
440
|
+
resource = resourceIndexByUri.get(candidate);
|
|
441
|
+
if (resource) break;
|
|
442
|
+
}
|
|
404
443
|
if (!resource) return null;
|
|
444
|
+
|
|
405
445
|
const content = await resource.loadContent();
|
|
406
446
|
return {
|
|
407
447
|
uri,
|