simple-dynamsoft-mcp 7.2.0 → 7.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +5 -3
- package/README.md +1 -2
- package/package.json +3 -2
- package/src/index.js +1 -2
- package/src/rag/config.js +16 -3
- package/src/rag/gemini-retry.js +1 -1
- package/src/rag/index.js +6 -2
- package/src/rag/logger.js +4 -19
- package/src/rag/providers.js +15 -46
- package/src/rag/search-utils.js +8 -1
- package/src/rag/vector-cache.js +173 -171
- package/scripts/compute-repo-signatures.mjs +0 -210
- package/scripts/data-sync-azure.mjs +0 -364
- package/src/data/shared-state.js +0 -214
package/.env.example
CHANGED
|
@@ -17,9 +17,11 @@
|
|
|
17
17
|
# Optional: force hydration mode override
|
|
18
18
|
# MCP_DATA_HYDRATION_MODE=eager
|
|
19
19
|
|
|
20
|
-
# Optional:
|
|
21
|
-
#
|
|
22
|
-
#
|
|
20
|
+
# Optional: prebuilt Gemini index behavior
|
|
21
|
+
# RAG_PREBUILT_INDEX_AUTO_DOWNLOAD=true
|
|
22
|
+
# RAG_PREBUILT_INDEX_URL=
|
|
23
|
+
# RAG_PREBUILT_INDEX_URL_GEMINI=
|
|
24
|
+
# RAG_PREBUILT_INDEX_TIMEOUT_MS=180000
|
|
23
25
|
|
|
24
26
|
# Optional: prewarm behavior
|
|
25
27
|
# RAG_PREWARM=true
|
package/README.md
CHANGED
|
@@ -33,8 +33,7 @@ npx -y simple-dynamsoft-mcp@latest
|
|
|
33
33
|
|
|
34
34
|
## Deployment Guides
|
|
35
35
|
|
|
36
|
-
- Azure Container Apps runbook
|
|
37
|
-
- Shared Azure data/index sync runbook: `docs/deployment/data-sync-azure.md`
|
|
36
|
+
- Azure Container Apps runbook: `docs/deployment/azure-container-apps.md`
|
|
38
37
|
- Self-hosting (Ubuntu/any server): `docs/deployment/self-hosting.md`
|
|
39
38
|
|
|
40
39
|
## MCP Client Configuration
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "simple-dynamsoft-mcp",
|
|
3
|
-
"version": "7.2.
|
|
3
|
+
"version": "7.2.1",
|
|
4
4
|
"description": "MCP server for Dynamsoft SDKs - Capture Vision, Barcode Reader (Mobile/Python/Web), Dynamic Web TWAIN, and Document Viewer. Provides documentation, code snippets, and API guidance.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
"scripts": {
|
|
25
25
|
"start": "node src/index.js",
|
|
26
26
|
"test": "npm run test:lite",
|
|
27
|
-
"test:unit": "node --test test/unit/gemini-retry.test.js test/unit/
|
|
27
|
+
"test:unit": "node --test test/unit/gemini-retry.test.js test/unit/profile-config.test.js test/unit/lexical-provider.test.js test/unit/hydration-mode.test.js test/unit/hydration-policy.test.js test/unit/repo-map.test.js test/unit/download-utils.test.js test/unit/logging.test.js test/unit/create-server.test.js test/unit/server-helpers.test.js",
|
|
28
28
|
"test:lite": "npm run test:stdio && npm run test:http && npm run test:package",
|
|
29
29
|
"test:lexical": "node --test test/integration/stdio.test.js test/integration/http.test.js",
|
|
30
30
|
"test:gemini": "node scripts/run-gemini-tests.mjs",
|
|
@@ -65,6 +65,7 @@
|
|
|
65
65
|
],
|
|
66
66
|
"dependencies": {
|
|
67
67
|
"@modelcontextprotocol/sdk": "^1.25.2",
|
|
68
|
+
"@xenova/transformers": "^2.17.2",
|
|
68
69
|
"dotenv": "^16.4.5",
|
|
69
70
|
"extract-zip": "^2.0.1",
|
|
70
71
|
"fuse.js": "^7.0.0",
|
package/src/index.js
CHANGED
|
@@ -27,8 +27,7 @@ logEvent("profile", "resolved", {
|
|
|
27
27
|
provider: ragApi.ragConfig.provider,
|
|
28
28
|
provider_source: ragApi.ragConfig.providerSource,
|
|
29
29
|
fallback: ragApi.ragConfig.fallback,
|
|
30
|
-
fallback_source: ragApi.ragConfig.fallbackSource
|
|
31
|
-
shared_state_path: ragApi.ragConfig.sharedStatePath ? "set" : "empty"
|
|
30
|
+
fallback_source: ragApi.ragConfig.fallbackSource
|
|
32
31
|
});
|
|
33
32
|
|
|
34
33
|
const createServer = () => createMcpServerInstance({
|
package/src/rag/config.js
CHANGED
|
@@ -5,9 +5,18 @@ import { getResolvedDataRoot } from "../data/root.js";
|
|
|
5
5
|
import { resolveProfileConfig } from "./profile-config.js";
|
|
6
6
|
|
|
7
7
|
const dataRoot = getResolvedDataRoot();
|
|
8
|
+
|
|
8
9
|
const pkgUrl = new URL("../../package.json", import.meta.url);
|
|
9
10
|
const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
|
|
10
11
|
|
|
12
|
+
const legacyPrebuiltIndexUrl =
|
|
13
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
|
|
14
|
+
|
|
15
|
+
const defaultPrebuiltIndexUrls = {
|
|
16
|
+
gemini:
|
|
17
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-gemini-${pkg.version}.tar.gz`
|
|
18
|
+
};
|
|
19
|
+
|
|
11
20
|
function readEnvValue(key, fallback) {
|
|
12
21
|
const value = process.env[key];
|
|
13
22
|
if (value === undefined || value === "") return fallback;
|
|
@@ -50,10 +59,8 @@ const ragConfig = {
|
|
|
50
59
|
fallbackSource: profileConfig.fallbackSource,
|
|
51
60
|
provider: profileConfig.provider,
|
|
52
61
|
fallback: profileConfig.fallback,
|
|
53
|
-
dataRoot,
|
|
54
62
|
cacheDir: readEnvValue("RAG_CACHE_DIR", join(dataRoot, ".rag-cache")),
|
|
55
63
|
modelCacheDir: readEnvValue("RAG_MODEL_CACHE_DIR", join(dataRoot, ".rag-cache", "models")),
|
|
56
|
-
sharedStatePath: readEnvValue("RAG_SHARED_STATE_PATH", ""),
|
|
57
64
|
chunkSize: readIntEnv("RAG_CHUNK_SIZE", 1200),
|
|
58
65
|
chunkOverlap: readIntEnv("RAG_CHUNK_OVERLAP", 200),
|
|
59
66
|
maxChunksPerDoc: readIntEnv("RAG_MAX_CHUNKS_PER_DOC", 6),
|
|
@@ -63,6 +70,10 @@ const ragConfig = {
|
|
|
63
70
|
rebuild: readBoolEnv("RAG_REBUILD", false),
|
|
64
71
|
prewarm: readBoolEnv("RAG_PREWARM", defaultPrewarm),
|
|
65
72
|
prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
|
|
73
|
+
prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
|
|
74
|
+
prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", ""),
|
|
75
|
+
prebuiltIndexUrlGemini: readEnvValue("RAG_PREBUILT_INDEX_URL_GEMINI", defaultPrebuiltIndexUrls.gemini),
|
|
76
|
+
prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
|
|
66
77
|
geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
|
|
67
78
|
geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
|
|
68
79
|
geminiBaseUrl: readEnvValue("GEMINI_API_BASE_URL", "https://generativelanguage.googleapis.com"),
|
|
@@ -75,5 +86,7 @@ const ragConfig = {
|
|
|
75
86
|
|
|
76
87
|
export {
|
|
77
88
|
pkg,
|
|
78
|
-
ragConfig
|
|
89
|
+
ragConfig,
|
|
90
|
+
legacyPrebuiltIndexUrl,
|
|
91
|
+
defaultPrebuiltIndexUrls
|
|
79
92
|
};
|
package/src/rag/gemini-retry.js
CHANGED
package/src/rag/index.js
CHANGED
|
@@ -10,7 +10,7 @@ import {
|
|
|
10
10
|
} from "../server/resource-index.js";
|
|
11
11
|
import { latencyBucket } from "../observability/logging.js";
|
|
12
12
|
import { createLexicalProvider } from "./lexical-provider.js";
|
|
13
|
-
import { pkg, ragConfig } from "./config.js";
|
|
13
|
+
import { pkg, ragConfig, legacyPrebuiltIndexUrl } from "./config.js";
|
|
14
14
|
import {
|
|
15
15
|
ragLogState,
|
|
16
16
|
logRag,
|
|
@@ -27,7 +27,8 @@ import {
|
|
|
27
27
|
buildEmbeddingItems,
|
|
28
28
|
buildIndexSignature,
|
|
29
29
|
normalizeVector,
|
|
30
|
-
dotProduct
|
|
30
|
+
dotProduct,
|
|
31
|
+
isRateLimitError
|
|
31
32
|
} from "./search-utils.js";
|
|
32
33
|
import { createProviderOrchestrator } from "./providers.js";
|
|
33
34
|
import { createVectorCacheHelpers } from "./vector-cache.js";
|
|
@@ -41,6 +42,7 @@ const searchUtils = {
|
|
|
41
42
|
buildIndexSignature,
|
|
42
43
|
normalizeVector,
|
|
43
44
|
dotProduct,
|
|
45
|
+
isRateLimitError,
|
|
44
46
|
entryMatchesScope: (entry, filters) => entryMatchesScope(entry, filters, {
|
|
45
47
|
editionMatches,
|
|
46
48
|
platformMatches
|
|
@@ -49,6 +51,8 @@ const searchUtils = {
|
|
|
49
51
|
|
|
50
52
|
const vectorCache = createVectorCacheHelpers({
|
|
51
53
|
ragConfig,
|
|
54
|
+
pkgVersion: pkg.version,
|
|
55
|
+
legacyPrebuiltIndexUrl,
|
|
52
56
|
logRag
|
|
53
57
|
});
|
|
54
58
|
|
package/src/rag/logger.js
CHANGED
|
@@ -9,13 +9,6 @@ const ragLogState = {
|
|
|
9
9
|
fallbackUse: new Set()
|
|
10
10
|
};
|
|
11
11
|
|
|
12
|
-
const DEPRECATED_PREBUILT_ENV_KEYS = [
|
|
13
|
-
"RAG_PREBUILT_INDEX_AUTO_DOWNLOAD",
|
|
14
|
-
"RAG_PREBUILT_INDEX_URL",
|
|
15
|
-
"RAG_PREBUILT_INDEX_URL_GEMINI",
|
|
16
|
-
"RAG_PREBUILT_INDEX_TIMEOUT_MS"
|
|
17
|
-
];
|
|
18
|
-
|
|
19
12
|
function logRag(eventOrMessage, fields = {}, options = {}) {
|
|
20
13
|
if (fields && typeof fields === "object" && Object.keys(fields).length > 0) {
|
|
21
14
|
logEvent("rag", eventOrMessage, fields, options);
|
|
@@ -27,20 +20,12 @@ function logRag(eventOrMessage, fields = {}, options = {}) {
|
|
|
27
20
|
function logRagConfigOnce(ragConfig) {
|
|
28
21
|
if (ragLogState.config) return;
|
|
29
22
|
ragLogState.config = true;
|
|
30
|
-
const deprecatedKeys = DEPRECATED_PREBUILT_ENV_KEYS.filter((key) => {
|
|
31
|
-
const value = process.env[key];
|
|
32
|
-
return value !== undefined && value !== "";
|
|
33
|
-
});
|
|
34
|
-
if (deprecatedKeys.length > 0) {
|
|
35
|
-
logRag("deprecated_prebuilt_env_vars", {
|
|
36
|
-
keys: deprecatedKeys.join(","),
|
|
37
|
-
recommendation: "Use RAG_SHARED_STATE_PATH for shared shard loading."
|
|
38
|
-
}, { level: "warn" });
|
|
39
|
-
}
|
|
40
23
|
logRag(
|
|
41
24
|
`config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
|
|
42
|
-
`cache_dir=${ragConfig.cacheDir}
|
|
43
|
-
`
|
|
25
|
+
`cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
|
|
26
|
+
`prebuilt_url_override=${ragConfig.prebuiltIndexUrl ? "set" : "empty"} ` +
|
|
27
|
+
`prebuilt_url_gemini=${ragConfig.prebuiltIndexUrlGemini ? "set" : "empty"} ` +
|
|
28
|
+
`prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
|
|
44
29
|
`gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
|
|
45
30
|
`gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
|
|
46
31
|
);
|
package/src/rag/providers.js
CHANGED
|
@@ -9,18 +9,6 @@ import {
|
|
|
9
9
|
executeWithGeminiRetry
|
|
10
10
|
} from "./gemini-retry.js";
|
|
11
11
|
|
|
12
|
-
const GEMINI_EMBEDDING_PAYLOAD_ERROR_CODE = "GEMINI_EMBEDDING_PAYLOAD_INVALID";
|
|
13
|
-
|
|
14
|
-
function isValidEmbeddingValues(values) {
|
|
15
|
-
return Array.isArray(values) && values.length > 0;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
function createGeminiEmbeddingPayloadError(message) {
|
|
19
|
-
const error = new Error(message);
|
|
20
|
-
error.code = GEMINI_EMBEDDING_PAYLOAD_ERROR_CODE;
|
|
21
|
-
return error;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
12
|
function resolveProviderChain(ragConfig) {
|
|
25
13
|
let primary = ragConfig.provider;
|
|
26
14
|
if (primary === "auto") {
|
|
@@ -43,7 +31,7 @@ async function embedTextsWithProgress(
|
|
|
43
31
|
onChunk = null,
|
|
44
32
|
providerName = "",
|
|
45
33
|
logRag,
|
|
46
|
-
isRateLimitError
|
|
34
|
+
isRateLimitError
|
|
47
35
|
} = {}
|
|
48
36
|
) {
|
|
49
37
|
const results = [];
|
|
@@ -82,10 +70,6 @@ async function embedTextsWithProgress(
|
|
|
82
70
|
rateLimitFailures = 0;
|
|
83
71
|
await reportChunk(vectors, "batch", batch.length);
|
|
84
72
|
} catch (error) {
|
|
85
|
-
if (error?.code === GEMINI_EMBEDDING_PAYLOAD_ERROR_CODE) {
|
|
86
|
-
throw error;
|
|
87
|
-
}
|
|
88
|
-
|
|
89
73
|
if (isRateLimitError(error)) {
|
|
90
74
|
rateLimitFailures += 1;
|
|
91
75
|
const nextBatchSize = Math.max(1, Math.floor(currentBatchSize / 2));
|
|
@@ -122,12 +106,12 @@ async function embedTextsWithProgress(
|
|
|
122
106
|
finalBatchSize: currentBatchSize
|
|
123
107
|
}
|
|
124
108
|
};
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
for (const text of texts) {
|
|
112
|
+
const vector = await embedder.embed(text);
|
|
113
|
+
results.push(vector);
|
|
114
|
+
await reportChunk([vector], "single", 1);
|
|
131
115
|
}
|
|
132
116
|
|
|
133
117
|
return {
|
|
@@ -239,8 +223,8 @@ function createProviderOrchestrator({
|
|
|
239
223
|
}
|
|
240
224
|
);
|
|
241
225
|
const embedding = payload.embedding?.values || payload.embedding || payload.embeddings?.[0]?.values;
|
|
242
|
-
if (!
|
|
243
|
-
throw
|
|
226
|
+
if (!embedding) {
|
|
227
|
+
throw new Error("Gemini embedding response missing embedding values.");
|
|
244
228
|
}
|
|
245
229
|
return embedding;
|
|
246
230
|
},
|
|
@@ -261,16 +245,7 @@ function createProviderOrchestrator({
|
|
|
261
245
|
if (!Array.isArray(embeddings)) {
|
|
262
246
|
throw new Error("Gemini batch response missing embeddings.");
|
|
263
247
|
}
|
|
264
|
-
|
|
265
|
-
return embeddings.map((item, index) => {
|
|
266
|
-
const values = item?.values || item?.embedding?.values || item?.embedding;
|
|
267
|
-
if (!isValidEmbeddingValues(values)) {
|
|
268
|
-
throw createGeminiEmbeddingPayloadError(
|
|
269
|
-
`Gemini batch embedding response malformed at index=${index}.`
|
|
270
|
-
);
|
|
271
|
-
}
|
|
272
|
-
return values;
|
|
273
|
-
});
|
|
248
|
+
return embeddings.map((item) => item.values || item.embedding?.values || item.embedding);
|
|
274
249
|
},
|
|
275
250
|
getMetrics: () => ({ ...metrics }),
|
|
276
251
|
resetMetrics: () => {
|
|
@@ -328,32 +303,26 @@ function createProviderOrchestrator({
|
|
|
328
303
|
}
|
|
329
304
|
logRag(`cache miss provider=${name} file=${cacheFile} reason=${cacheState.reason}`);
|
|
330
305
|
|
|
331
|
-
const
|
|
306
|
+
const downloadResult = await vectorCache.maybeDownloadPrebuiltVectorIndex({
|
|
332
307
|
provider: name,
|
|
333
308
|
model,
|
|
334
309
|
cacheKey,
|
|
335
310
|
signature,
|
|
336
311
|
cacheFile
|
|
337
312
|
});
|
|
338
|
-
if (
|
|
313
|
+
if (downloadResult.downloaded) {
|
|
339
314
|
cacheState = vectorCache.loadVectorIndexCache(cacheFile, expectedCacheState);
|
|
340
315
|
if (cacheState.hit) {
|
|
341
316
|
const cached = cacheState.payload;
|
|
342
317
|
logRag(
|
|
343
|
-
`cache hit provider=${name} file=${cacheFile} source=
|
|
318
|
+
`cache hit provider=${name} file=${cacheFile} source=prebuilt_download items=${cached.items.length} vectors=${cached.vectors.length}`
|
|
344
319
|
);
|
|
345
320
|
return {
|
|
346
321
|
items: cached.items,
|
|
347
322
|
vectors: cached.vectors
|
|
348
323
|
};
|
|
349
324
|
}
|
|
350
|
-
logRag(`cache miss provider=${name} file=${cacheFile} source=
|
|
351
|
-
} else if (sharedLoadResult.fatal) {
|
|
352
|
-
const sharedError = sharedLoadResult.error || new Error(`shared shard load failed (${sharedLoadResult.reason})`);
|
|
353
|
-
logRag(
|
|
354
|
-
`shared shard load failed provider=${name} reason=${sharedLoadResult.reason} error=${sharedError.message}`
|
|
355
|
-
);
|
|
356
|
-
throw sharedError;
|
|
325
|
+
logRag(`cache miss provider=${name} file=${cacheFile} source=prebuilt_download reason=${cacheState.reason}`);
|
|
357
326
|
}
|
|
358
327
|
} else {
|
|
359
328
|
logRag(`cache bypass provider=${name} file=${cacheFile} reason=rebuild_true`);
|
|
@@ -414,7 +383,7 @@ function createProviderOrchestrator({
|
|
|
414
383
|
total: texts.length,
|
|
415
384
|
providerName: name,
|
|
416
385
|
logRag,
|
|
417
|
-
isRateLimitError: (error) => error
|
|
386
|
+
isRateLimitError: (error) => utils.isRateLimitError(error, isRateLimitGeminiStatus),
|
|
418
387
|
onChunk: ({ vectors, completed, total }) => {
|
|
419
388
|
normalized.push(...vectors.map(utils.normalizeVector));
|
|
420
389
|
persistCheckpoint(completed >= total);
|
package/src/rag/search-utils.js
CHANGED
|
@@ -145,6 +145,12 @@ function dotProduct(a, b) {
|
|
|
145
145
|
return sum;
|
|
146
146
|
}
|
|
147
147
|
|
|
148
|
+
function isRateLimitError(error, isRateLimitGeminiStatus) {
|
|
149
|
+
if (error?.rateLimited) return true;
|
|
150
|
+
const status = Number(error?.status);
|
|
151
|
+
return isRateLimitGeminiStatus(status);
|
|
152
|
+
}
|
|
153
|
+
|
|
148
154
|
export {
|
|
149
155
|
createFuseSearch,
|
|
150
156
|
attachScore,
|
|
@@ -155,5 +161,6 @@ export {
|
|
|
155
161
|
buildEmbeddingItems,
|
|
156
162
|
buildIndexSignature,
|
|
157
163
|
normalizeVector,
|
|
158
|
-
dotProduct
|
|
164
|
+
dotProduct,
|
|
165
|
+
isRateLimitError
|
|
159
166
|
};
|