simple-dynamsoft-mcp 6.3.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +35 -9
- package/README.md +156 -497
- package/package.json +13 -7
- package/scripts/prebuild-rag-index.mjs +1 -1
- package/scripts/run-gemini-tests.mjs +1 -1
- package/scripts/sync-submodules.mjs +1 -1
- package/scripts/verify-doc-resources.mjs +79 -0
- package/src/data/bootstrap.js +475 -0
- package/src/data/download-utils.js +99 -0
- package/src/data/hydration-mode.js +15 -0
- package/src/data/hydration-policy.js +39 -0
- package/src/data/repo-map.js +149 -0
- package/src/{data-root.js → data/root.js} +1 -1
- package/src/{submodule-sync.js → data/submodule-sync.js} +1 -1
- package/src/index.js +49 -1499
- package/src/observability/logging.js +51 -0
- package/src/rag/config.js +96 -0
- package/src/rag/index.js +266 -0
- package/src/rag/lexical-provider.js +170 -0
- package/src/rag/logger.js +46 -0
- package/src/rag/profile-config.js +48 -0
- package/src/rag/providers.js +585 -0
- package/src/rag/search-utils.js +166 -0
- package/src/rag/vector-cache.js +323 -0
- package/src/server/create-server.js +168 -0
- package/src/server/helpers/server-helpers.js +33 -0
- package/src/{resource-index → server/resource-index}/paths.js +2 -2
- package/src/{resource-index → server/resource-index}/samples.js +9 -1
- package/src/{resource-index.js → server/resource-index.js} +158 -93
- package/src/server/resources/register-resources.js +56 -0
- package/src/server/runtime-config.js +66 -0
- package/src/server/tools/register-index-tools.js +130 -0
- package/src/server/tools/register-project-tools.js +305 -0
- package/src/server/tools/register-quickstart-tools.js +572 -0
- package/src/server/tools/register-sample-tools.js +333 -0
- package/src/server/tools/register-version-tools.js +136 -0
- package/src/server/transports/http.js +84 -0
- package/src/server/transports/stdio.js +12 -0
- package/src/data-bootstrap.js +0 -255
- package/src/rag.js +0 -1203
- /package/src/{gemini-retry.js → rag/gemini-retry.js} +0 -0
- /package/src/{normalizers.js → server/normalizers.js} +0 -0
- /package/src/{resource-index → server/resource-index}/builders.js +0 -0
- /package/src/{resource-index → server/resource-index}/config.js +0 -0
- /package/src/{resource-index → server/resource-index}/docs-loader.js +0 -0
- /package/src/{resource-index → server/resource-index}/uri.js +0 -0
- /package/src/{resource-index → server/resource-index}/version-policy.js +0 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
function normalizeEnvValue(value) {
|
|
2
|
+
if (value === undefined || value === null) return "";
|
|
3
|
+
return String(value).trim().toLowerCase();
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
function isVerboseLoggingEnabled(env = process.env) {
|
|
7
|
+
const explicit = normalizeEnvValue(env.MCP_LOG_LEVEL);
|
|
8
|
+
if (["debug", "verbose", "trace"].includes(explicit)) return true;
|
|
9
|
+
const toggle = normalizeEnvValue(env.MCP_VERBOSE_LOGS);
|
|
10
|
+
return ["1", "true", "yes", "on"].includes(toggle);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function quoteValue(value) {
|
|
14
|
+
const raw = String(value ?? "");
|
|
15
|
+
if (raw.length === 0) return "\"\"";
|
|
16
|
+
if (/\s|=/.test(raw)) return JSON.stringify(raw);
|
|
17
|
+
return raw;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function latencyBucket(latencyMs) {
|
|
21
|
+
const n = Number(latencyMs);
|
|
22
|
+
if (!Number.isFinite(n) || n < 0) return "unknown";
|
|
23
|
+
if (n < 100) return "lt100ms";
|
|
24
|
+
if (n < 300) return "100-299ms";
|
|
25
|
+
if (n < 1000) return "300-999ms";
|
|
26
|
+
if (n < 3000) return "1-2s";
|
|
27
|
+
return "ge3s";
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function logEvent(component, event, fields = {}, options = {}) {
|
|
31
|
+
const level = normalizeEnvValue(options.level || "info") || "info";
|
|
32
|
+
const verbose = isVerboseLoggingEnabled(options.env);
|
|
33
|
+
if (level === "debug" && !verbose) return;
|
|
34
|
+
|
|
35
|
+
const parts = [`[${component}]`, `event=${quoteValue(event || "detail")}`];
|
|
36
|
+
for (const [key, value] of Object.entries(fields || {})) {
|
|
37
|
+
if (value === undefined || value === null || value === "") continue;
|
|
38
|
+
parts.push(`${key}=${quoteValue(value)}`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (level !== "info") {
|
|
42
|
+
parts.push(`level=${level}`);
|
|
43
|
+
}
|
|
44
|
+
console.error(parts.join(" "));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export {
|
|
48
|
+
isVerboseLoggingEnabled,
|
|
49
|
+
latencyBucket,
|
|
50
|
+
logEvent
|
|
51
|
+
};
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import "dotenv/config";
|
|
4
|
+
import { getResolvedDataRoot } from "../data/root.js";
|
|
5
|
+
import { resolveProfileConfig } from "./profile-config.js";
|
|
6
|
+
|
|
7
|
+
const dataRoot = getResolvedDataRoot();
|
|
8
|
+
|
|
9
|
+
const pkgUrl = new URL("../../package.json", import.meta.url);
|
|
10
|
+
const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
|
|
11
|
+
|
|
12
|
+
const legacyPrebuiltIndexUrl =
|
|
13
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
|
|
14
|
+
|
|
15
|
+
const defaultPrebuiltIndexUrls = {
|
|
16
|
+
local:
|
|
17
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-local-${pkg.version}.tar.gz`,
|
|
18
|
+
gemini:
|
|
19
|
+
`https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-gemini-${pkg.version}.tar.gz`
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
function readEnvValue(key, fallback) {
|
|
23
|
+
const value = process.env[key];
|
|
24
|
+
if (value === undefined || value === "") return fallback;
|
|
25
|
+
return value;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function readBoolEnv(key, fallback) {
|
|
29
|
+
const value = readEnvValue(key, "");
|
|
30
|
+
if (!value) return fallback;
|
|
31
|
+
return ["1", "true", "yes", "on"].includes(String(value).toLowerCase());
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function readIntEnv(key, fallback) {
|
|
35
|
+
const raw = readEnvValue(key, "");
|
|
36
|
+
if (!raw) return fallback;
|
|
37
|
+
const value = Number.parseInt(raw, 10);
|
|
38
|
+
return Number.isNaN(value) ? fallback : value;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function readFloatEnv(key, fallback) {
|
|
42
|
+
const raw = readEnvValue(key, "");
|
|
43
|
+
if (!raw) return fallback;
|
|
44
|
+
const value = Number.parseFloat(raw);
|
|
45
|
+
return Number.isNaN(value) ? fallback : value;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function normalizeGeminiModel(model) {
|
|
49
|
+
if (!model) return "models/embedding-001";
|
|
50
|
+
if (model.startsWith("models/")) return model;
|
|
51
|
+
return `models/${model}`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const profileConfig = resolveProfileConfig(process.env);
|
|
55
|
+
|
|
56
|
+
const ragConfig = {
|
|
57
|
+
profile: profileConfig.profile,
|
|
58
|
+
profileDefaults: profileConfig.defaults,
|
|
59
|
+
providerSource: profileConfig.providerSource,
|
|
60
|
+
fallbackSource: profileConfig.fallbackSource,
|
|
61
|
+
provider: profileConfig.provider,
|
|
62
|
+
fallback: profileConfig.fallback,
|
|
63
|
+
cacheDir: readEnvValue("RAG_CACHE_DIR", join(dataRoot, ".rag-cache")),
|
|
64
|
+
modelCacheDir: readEnvValue("RAG_MODEL_CACHE_DIR", join(dataRoot, ".rag-cache", "models")),
|
|
65
|
+
localModel: readEnvValue("RAG_LOCAL_MODEL", "Xenova/all-MiniLM-L6-v2"),
|
|
66
|
+
localQuantized: readBoolEnv("RAG_LOCAL_QUANTIZED", true),
|
|
67
|
+
chunkSize: readIntEnv("RAG_CHUNK_SIZE", 1200),
|
|
68
|
+
chunkOverlap: readIntEnv("RAG_CHUNK_OVERLAP", 200),
|
|
69
|
+
maxChunksPerDoc: readIntEnv("RAG_MAX_CHUNKS_PER_DOC", 6),
|
|
70
|
+
maxTextChars: readIntEnv("RAG_MAX_TEXT_CHARS", 4000),
|
|
71
|
+
minScore: readFloatEnv("RAG_MIN_SCORE", 0.2),
|
|
72
|
+
includeScore: readBoolEnv("RAG_INCLUDE_SCORE", false),
|
|
73
|
+
rebuild: readBoolEnv("RAG_REBUILD", false),
|
|
74
|
+
prewarm: readBoolEnv("RAG_PREWARM", false),
|
|
75
|
+
prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
|
|
76
|
+
prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
|
|
77
|
+
prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", ""),
|
|
78
|
+
prebuiltIndexUrlLocal: readEnvValue("RAG_PREBUILT_INDEX_URL_LOCAL", defaultPrebuiltIndexUrls.local),
|
|
79
|
+
prebuiltIndexUrlGemini: readEnvValue("RAG_PREBUILT_INDEX_URL_GEMINI", defaultPrebuiltIndexUrls.gemini),
|
|
80
|
+
prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
|
|
81
|
+
geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
|
|
82
|
+
geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
|
|
83
|
+
geminiBaseUrl: readEnvValue("GEMINI_API_BASE_URL", "https://generativelanguage.googleapis.com"),
|
|
84
|
+
geminiBatchSize: readIntEnv("GEMINI_EMBED_BATCH_SIZE", 16),
|
|
85
|
+
geminiRetryMaxAttempts: readIntEnv("GEMINI_RETRY_MAX_ATTEMPTS", 5),
|
|
86
|
+
geminiRetryBaseDelayMs: readIntEnv("GEMINI_RETRY_BASE_DELAY_MS", 500),
|
|
87
|
+
geminiRetryMaxDelayMs: readIntEnv("GEMINI_RETRY_MAX_DELAY_MS", 10000),
|
|
88
|
+
geminiRequestThrottleMs: readIntEnv("GEMINI_REQUEST_THROTTLE_MS", 0)
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
export {
|
|
92
|
+
pkg,
|
|
93
|
+
ragConfig,
|
|
94
|
+
legacyPrebuiltIndexUrl,
|
|
95
|
+
defaultPrebuiltIndexUrls
|
|
96
|
+
};
|
package/src/rag/index.js
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
import {
|
|
2
|
+
resourceIndex,
|
|
3
|
+
normalizeProduct,
|
|
4
|
+
normalizePlatform,
|
|
5
|
+
normalizeEdition,
|
|
6
|
+
editionMatches,
|
|
7
|
+
platformMatches,
|
|
8
|
+
getRagSignatureData,
|
|
9
|
+
resourceIndexByUri
|
|
10
|
+
} from "../server/resource-index.js";
|
|
11
|
+
import { latencyBucket } from "../observability/logging.js";
|
|
12
|
+
import { createLexicalProvider } from "./lexical-provider.js";
|
|
13
|
+
import { pkg, ragConfig, legacyPrebuiltIndexUrl } from "./config.js";
|
|
14
|
+
import {
|
|
15
|
+
ragLogState,
|
|
16
|
+
logRag,
|
|
17
|
+
logRagConfigOnce,
|
|
18
|
+
resetRagProviderLogState
|
|
19
|
+
} from "./logger.js";
|
|
20
|
+
import {
|
|
21
|
+
createFuseSearch,
|
|
22
|
+
attachScore,
|
|
23
|
+
normalizeSearchFilters,
|
|
24
|
+
entryMatchesScope,
|
|
25
|
+
normalizeText,
|
|
26
|
+
truncateText,
|
|
27
|
+
buildEmbeddingItems,
|
|
28
|
+
buildIndexSignature,
|
|
29
|
+
normalizeVector,
|
|
30
|
+
dotProduct,
|
|
31
|
+
isRateLimitError
|
|
32
|
+
} from "./search-utils.js";
|
|
33
|
+
import { createProviderOrchestrator } from "./providers.js";
|
|
34
|
+
import { createVectorCacheHelpers } from "./vector-cache.js";
|
|
35
|
+
|
|
36
|
+
const searchUtils = {
|
|
37
|
+
createFuseSearch: () => createFuseSearch(resourceIndex),
|
|
38
|
+
attachScore: (entry, score) => attachScore(entry, score, ragConfig.includeScore),
|
|
39
|
+
normalizeText,
|
|
40
|
+
truncateText,
|
|
41
|
+
buildEmbeddingItems,
|
|
42
|
+
buildIndexSignature,
|
|
43
|
+
normalizeVector,
|
|
44
|
+
dotProduct,
|
|
45
|
+
isRateLimitError,
|
|
46
|
+
entryMatchesScope: (entry, filters) => entryMatchesScope(entry, filters, {
|
|
47
|
+
editionMatches,
|
|
48
|
+
platformMatches
|
|
49
|
+
})
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const vectorCache = createVectorCacheHelpers({
|
|
53
|
+
ragConfig,
|
|
54
|
+
pkgVersion: pkg.version,
|
|
55
|
+
legacyPrebuiltIndexUrl,
|
|
56
|
+
logRag
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const providerOrchestrator = createProviderOrchestrator({
|
|
60
|
+
pkgVersion: pkg.version,
|
|
61
|
+
ragConfig,
|
|
62
|
+
ragLogState,
|
|
63
|
+
logRag,
|
|
64
|
+
resourceIndex,
|
|
65
|
+
resourceIndexByUri,
|
|
66
|
+
createLexicalProvider,
|
|
67
|
+
getRagSignatureData,
|
|
68
|
+
utils: searchUtils,
|
|
69
|
+
vectorCache
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
function refreshRagIndexes() {
|
|
73
|
+
providerOrchestrator.refreshProviders();
|
|
74
|
+
resetRagProviderLogState();
|
|
75
|
+
logRag(`indexes refreshed resources=${resourceIndex.length}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
async function searchResources({ query, product, edition, platform, type, limit }) {
|
|
79
|
+
const startedAt = Date.now();
|
|
80
|
+
const filters = normalizeSearchFilters({ product, edition, platform, type }, {
|
|
81
|
+
normalizeProduct,
|
|
82
|
+
normalizePlatform,
|
|
83
|
+
normalizeEdition
|
|
84
|
+
});
|
|
85
|
+
const searchQuery = query ? String(query).trim() : "";
|
|
86
|
+
const maxResults = limit ? Math.min(limit, 50) : undefined;
|
|
87
|
+
|
|
88
|
+
if (!searchQuery) {
|
|
89
|
+
const results = resourceIndex.filter((entry) => searchUtils.entryMatchesScope(entry, filters));
|
|
90
|
+
return maxResults ? results.slice(0, maxResults) : results;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
logRagConfigOnce(ragConfig);
|
|
94
|
+
const providers = providerOrchestrator.resolveProviderChain();
|
|
95
|
+
if (!ragLogState.providerChain) {
|
|
96
|
+
ragLogState.providerChain = true;
|
|
97
|
+
logRag("provider_chain", {
|
|
98
|
+
profile: ragConfig.profile,
|
|
99
|
+
provider: providers[0] || "unknown",
|
|
100
|
+
fallback: ragConfig.fallback,
|
|
101
|
+
chain: providers.join("->")
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
let lastError = null;
|
|
106
|
+
for (const name of providers) {
|
|
107
|
+
try {
|
|
108
|
+
const provider = await providerOrchestrator.loadSearchProvider(name);
|
|
109
|
+
const results = await provider.search(searchQuery, filters, maxResults);
|
|
110
|
+
if (!ragLogState.providerFirstUse.has(name)) {
|
|
111
|
+
ragLogState.providerFirstUse.add(name);
|
|
112
|
+
logRag("provider_selected", {
|
|
113
|
+
profile: ragConfig.profile,
|
|
114
|
+
provider: name,
|
|
115
|
+
fallback: ragConfig.fallback
|
|
116
|
+
});
|
|
117
|
+
}
|
|
118
|
+
if (name !== providers[0] && !ragLogState.fallbackUse.has(name)) {
|
|
119
|
+
ragLogState.fallbackUse.add(name);
|
|
120
|
+
logRag("fallback_engaged", {
|
|
121
|
+
selected_provider: name,
|
|
122
|
+
primary_provider: providers[0],
|
|
123
|
+
fallback: ragConfig.fallback
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
const elapsedMs = Date.now() - startedAt;
|
|
127
|
+
logRag("search_complete", {
|
|
128
|
+
profile: ragConfig.profile,
|
|
129
|
+
provider: name,
|
|
130
|
+
fallback: ragConfig.fallback,
|
|
131
|
+
product: filters.product || "any",
|
|
132
|
+
edition: filters.edition || "any",
|
|
133
|
+
platform: filters.platform || "any",
|
|
134
|
+
type: filters.type || "any",
|
|
135
|
+
result_count: results.length,
|
|
136
|
+
latency_ms: elapsedMs,
|
|
137
|
+
latency_bucket: latencyBucket(elapsedMs)
|
|
138
|
+
});
|
|
139
|
+
return results;
|
|
140
|
+
} catch (error) {
|
|
141
|
+
lastError = error;
|
|
142
|
+
logRag("provider_failed", {
|
|
143
|
+
provider: name,
|
|
144
|
+
fallback: ragConfig.fallback,
|
|
145
|
+
error: error.message
|
|
146
|
+
}, { level: "error" });
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
if (lastError) {
|
|
151
|
+
const elapsedMs = Date.now() - startedAt;
|
|
152
|
+
logRag("search_failed", {
|
|
153
|
+
profile: ragConfig.profile,
|
|
154
|
+
provider: providers[0] || "unknown",
|
|
155
|
+
fallback: ragConfig.fallback,
|
|
156
|
+
latency_ms: elapsedMs,
|
|
157
|
+
latency_bucket: latencyBucket(elapsedMs),
|
|
158
|
+
error: lastError.message
|
|
159
|
+
}, { level: "error" });
|
|
160
|
+
}
|
|
161
|
+
return [];
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
async function prewarmRagIndex() {
|
|
165
|
+
if (!ragConfig.prewarm) return;
|
|
166
|
+
logRagConfigOnce(ragConfig);
|
|
167
|
+
const providers = providerOrchestrator.resolveProviderChain();
|
|
168
|
+
const primary = providers[0];
|
|
169
|
+
if (!primary || primary === "fuse") return;
|
|
170
|
+
try {
|
|
171
|
+
logRag("prewarm_start", {
|
|
172
|
+
profile: ragConfig.profile,
|
|
173
|
+
provider: primary,
|
|
174
|
+
fallback: ragConfig.fallback
|
|
175
|
+
});
|
|
176
|
+
const provider = await providerOrchestrator.loadSearchProvider(primary);
|
|
177
|
+
if (provider.warm) {
|
|
178
|
+
await provider.warm();
|
|
179
|
+
}
|
|
180
|
+
logRag("prewarm_done", {
|
|
181
|
+
profile: ragConfig.profile,
|
|
182
|
+
provider: primary,
|
|
183
|
+
fallback: ragConfig.fallback
|
|
184
|
+
});
|
|
185
|
+
} catch (error) {
|
|
186
|
+
logRag("prewarm_failed", {
|
|
187
|
+
provider: primary,
|
|
188
|
+
error: error.message
|
|
189
|
+
}, { level: "error" });
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
async function getSampleSuggestions({ query, product, edition, platform, limit = 5 }) {
|
|
194
|
+
const normalizedProduct = normalizeProduct(product);
|
|
195
|
+
const normalizedPlatform = normalizePlatform(platform);
|
|
196
|
+
const normalizedEdition = normalizeEdition(edition, normalizedPlatform, normalizedProduct);
|
|
197
|
+
const searchQuery = query ? String(query).trim() : "";
|
|
198
|
+
const maxResults = Math.min(limit || 5, 10);
|
|
199
|
+
|
|
200
|
+
if (searchQuery) {
|
|
201
|
+
const results = await searchResources({
|
|
202
|
+
query: searchQuery,
|
|
203
|
+
product: normalizedProduct,
|
|
204
|
+
edition: normalizedEdition,
|
|
205
|
+
platform: normalizedPlatform,
|
|
206
|
+
type: "sample",
|
|
207
|
+
limit: maxResults
|
|
208
|
+
});
|
|
209
|
+
if (results.length) return results;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
const matchesScope = (entry) => {
|
|
213
|
+
if (normalizedProduct && entry.product !== normalizedProduct) return false;
|
|
214
|
+
if (!editionMatches(normalizedEdition, entry.edition)) return false;
|
|
215
|
+
if (!platformMatches(normalizedPlatform, entry)) return false;
|
|
216
|
+
return entry.type === "sample";
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
let candidates = resourceIndex.filter(matchesScope);
|
|
220
|
+
if (candidates.length === 0 && normalizedProduct) {
|
|
221
|
+
candidates = resourceIndex.filter((entry) => entry.type === "sample" && entry.product === normalizedProduct);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (searchQuery && candidates.length > 1) {
|
|
225
|
+
const terms = normalizeText(searchQuery.toLowerCase()).split(/\s+/).filter(Boolean);
|
|
226
|
+
const scoreEntry = (entry) => {
|
|
227
|
+
const tags = Array.isArray(entry.tags) ? entry.tags.map((tag) => String(tag).toLowerCase()) : [];
|
|
228
|
+
const haystack = [
|
|
229
|
+
String(entry.title || "").toLowerCase(),
|
|
230
|
+
String(entry.summary || "").toLowerCase(),
|
|
231
|
+
tags.join(" ")
|
|
232
|
+
].join(" ");
|
|
233
|
+
let score = 0;
|
|
234
|
+
for (const term of terms) {
|
|
235
|
+
if (!term) continue;
|
|
236
|
+
if (tags.some((tag) => tag === term || tag.includes(term))) score += 3;
|
|
237
|
+
if (haystack.includes(term)) score += 1;
|
|
238
|
+
}
|
|
239
|
+
return score;
|
|
240
|
+
};
|
|
241
|
+
candidates = [...candidates].sort((a, b) => {
|
|
242
|
+
const delta = scoreEntry(b) - scoreEntry(a);
|
|
243
|
+
if (delta !== 0) return delta;
|
|
244
|
+
return String(a.title || "").localeCompare(String(b.title || ""));
|
|
245
|
+
});
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const seen = new Set();
|
|
249
|
+
const results = [];
|
|
250
|
+
for (const entry of candidates) {
|
|
251
|
+
if (seen.has(entry.uri)) continue;
|
|
252
|
+
seen.add(entry.uri);
|
|
253
|
+
results.push(entry);
|
|
254
|
+
if (results.length >= maxResults) break;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
return results;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
export {
|
|
261
|
+
ragConfig,
|
|
262
|
+
searchResources,
|
|
263
|
+
getSampleSuggestions,
|
|
264
|
+
prewarmRagIndex,
|
|
265
|
+
refreshRagIndexes
|
|
266
|
+
};
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
import Fuse from "fuse.js";
|
|
2
|
+
|
|
3
|
+
const DEFAULT_FUSE_OPTIONS = {
|
|
4
|
+
keys: ["title", "summary", "tags", "uri"],
|
|
5
|
+
threshold: 0.35,
|
|
6
|
+
ignoreLocation: true,
|
|
7
|
+
includeScore: true
|
|
8
|
+
};
|
|
9
|
+
|
|
10
|
+
const BM25_K1 = 1.2;
|
|
11
|
+
const BM25_B = 0.75;
|
|
12
|
+
|
|
13
|
+
function tokenize(text) {
|
|
14
|
+
return String(text || "")
|
|
15
|
+
.toLowerCase()
|
|
16
|
+
.replace(/[^a-z0-9]+/g, " ")
|
|
17
|
+
.trim()
|
|
18
|
+
.split(/\s+/)
|
|
19
|
+
.filter(Boolean);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function normalizeScore(value, max) {
|
|
23
|
+
if (!Number.isFinite(value) || value <= 0) return 0;
|
|
24
|
+
if (!Number.isFinite(max) || max <= 0) return 0;
|
|
25
|
+
return value / max;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function compareLexicalResults(a, b) {
|
|
29
|
+
const scoreDelta = b.score - a.score;
|
|
30
|
+
if (scoreDelta !== 0) return scoreDelta;
|
|
31
|
+
const titleA = String(a.entry.title || "");
|
|
32
|
+
const titleB = String(b.entry.title || "");
|
|
33
|
+
const titleDelta = titleA.localeCompare(titleB);
|
|
34
|
+
if (titleDelta !== 0) return titleDelta;
|
|
35
|
+
return String(a.entry.uri || "").localeCompare(String(b.entry.uri || ""));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function buildBm25Index(entries) {
|
|
39
|
+
const documents = [];
|
|
40
|
+
const documentFrequency = new Map();
|
|
41
|
+
let totalLength = 0;
|
|
42
|
+
|
|
43
|
+
entries.forEach((entry, index) => {
|
|
44
|
+
const haystack = [
|
|
45
|
+
entry.title,
|
|
46
|
+
entry.summary,
|
|
47
|
+
Array.isArray(entry.tags) ? entry.tags.join(" ") : "",
|
|
48
|
+
entry.uri
|
|
49
|
+
].join(" \n ");
|
|
50
|
+
|
|
51
|
+
const tokens = tokenize(haystack);
|
|
52
|
+
const termFreq = new Map();
|
|
53
|
+
tokens.forEach((token) => {
|
|
54
|
+
termFreq.set(token, (termFreq.get(token) || 0) + 1);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
const seen = new Set();
|
|
58
|
+
for (const token of tokens) {
|
|
59
|
+
if (seen.has(token)) continue;
|
|
60
|
+
seen.add(token);
|
|
61
|
+
documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
totalLength += tokens.length;
|
|
65
|
+
documents.push({
|
|
66
|
+
index,
|
|
67
|
+
entry,
|
|
68
|
+
length: tokens.length,
|
|
69
|
+
termFreq
|
|
70
|
+
});
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
const avgLength = documents.length > 0 ? totalLength / documents.length : 0;
|
|
74
|
+
return {
|
|
75
|
+
documents,
|
|
76
|
+
documentFrequency,
|
|
77
|
+
totalDocuments: documents.length,
|
|
78
|
+
avgLength
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function computeBm25Score(indexState, docState, terms) {
|
|
83
|
+
if (terms.length === 0) return 0;
|
|
84
|
+
if (!docState.length || !indexState.avgLength) return 0;
|
|
85
|
+
|
|
86
|
+
let score = 0;
|
|
87
|
+
for (const term of terms) {
|
|
88
|
+
const tf = docState.termFreq.get(term) || 0;
|
|
89
|
+
if (!tf) continue;
|
|
90
|
+
const df = indexState.documentFrequency.get(term) || 0;
|
|
91
|
+
const idf = Math.log(1 + (indexState.totalDocuments - df + 0.5) / (df + 0.5));
|
|
92
|
+
const denom = tf + BM25_K1 * (1 - BM25_B + BM25_B * (docState.length / indexState.avgLength));
|
|
93
|
+
score += idf * ((tf * (BM25_K1 + 1)) / denom);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return score;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function createLexicalProvider({
|
|
100
|
+
entries,
|
|
101
|
+
entryMatchesScope,
|
|
102
|
+
attachScore,
|
|
103
|
+
fuseOptions = DEFAULT_FUSE_OPTIONS,
|
|
104
|
+
bm25Weight = 0.7,
|
|
105
|
+
fuseWeight = 0.3
|
|
106
|
+
}) {
|
|
107
|
+
const fuse = new Fuse(entries, fuseOptions);
|
|
108
|
+
const bm25Index = buildBm25Index(entries);
|
|
109
|
+
const entryByUri = new Map(entries.map((entry) => [entry.uri, entry]));
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
name: "lexical",
|
|
113
|
+
search: async (query, filters, limit) => {
|
|
114
|
+
const terms = [...new Set(tokenize(query))];
|
|
115
|
+
const fuseHits = fuse.search(query);
|
|
116
|
+
const fuseScoreByUri = new Map();
|
|
117
|
+
let maxFuse = 0;
|
|
118
|
+
|
|
119
|
+
for (const hit of fuseHits) {
|
|
120
|
+
const candidateScore = Number.isFinite(hit.score) ? Math.max(0, 1 - hit.score) : 0;
|
|
121
|
+
const current = fuseScoreByUri.get(hit.item.uri) || 0;
|
|
122
|
+
if (candidateScore > current) {
|
|
123
|
+
fuseScoreByUri.set(hit.item.uri, candidateScore);
|
|
124
|
+
if (candidateScore > maxFuse) maxFuse = candidateScore;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const bm25ScoreByUri = new Map();
|
|
129
|
+
let maxBm25 = 0;
|
|
130
|
+
|
|
131
|
+
for (const doc of bm25Index.documents) {
|
|
132
|
+
if (!entryMatchesScope(doc.entry, filters)) continue;
|
|
133
|
+
const score = computeBm25Score(bm25Index, doc, terms);
|
|
134
|
+
if (score <= 0) continue;
|
|
135
|
+
bm25ScoreByUri.set(doc.entry.uri, score);
|
|
136
|
+
if (score > maxBm25) maxBm25 = score;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const scopedUris = new Set();
|
|
140
|
+
for (const doc of bm25Index.documents) {
|
|
141
|
+
if (!entryMatchesScope(doc.entry, filters)) continue;
|
|
142
|
+
if (bm25ScoreByUri.has(doc.entry.uri) || fuseScoreByUri.has(doc.entry.uri)) {
|
|
143
|
+
scopedUris.add(doc.entry.uri);
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
const merged = [];
|
|
148
|
+
for (const uri of scopedUris) {
|
|
149
|
+
const entry = entryByUri.get(uri);
|
|
150
|
+
if (!entry) continue;
|
|
151
|
+
const bm25Norm = normalizeScore(bm25ScoreByUri.get(uri) || 0, maxBm25);
|
|
152
|
+
const fuseNorm = normalizeScore(fuseScoreByUri.get(uri) || 0, maxFuse);
|
|
153
|
+
const score = (bm25Norm * bm25Weight) + (fuseNorm * fuseWeight);
|
|
154
|
+
merged.push({ entry, score });
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
merged.sort(compareLexicalResults);
|
|
158
|
+
const ranked = merged.map((item) => attachScore(item.entry, item.score));
|
|
159
|
+
if (limit) return ranked.slice(0, limit);
|
|
160
|
+
return ranked;
|
|
161
|
+
},
|
|
162
|
+
warm: async () => {}
|
|
163
|
+
};
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
export {
|
|
167
|
+
tokenize,
|
|
168
|
+
compareLexicalResults,
|
|
169
|
+
createLexicalProvider
|
|
170
|
+
};
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import { logEvent } from "../observability/logging.js";
|
|
2
|
+
|
|
3
|
+
const ragLogState = {
|
|
4
|
+
config: false,
|
|
5
|
+
providerChain: false,
|
|
6
|
+
localEmbedderInit: false,
|
|
7
|
+
providerReady: new Set(),
|
|
8
|
+
providerFirstUse: new Set(),
|
|
9
|
+
fallbackUse: new Set()
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
function logRag(eventOrMessage, fields = {}, options = {}) {
|
|
13
|
+
if (fields && typeof fields === "object" && Object.keys(fields).length > 0) {
|
|
14
|
+
logEvent("rag", eventOrMessage, fields, options);
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
logEvent("rag", "detail", { message: String(eventOrMessage || "") }, { ...options, level: options.level || "debug" });
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function logRagConfigOnce(ragConfig) {
|
|
21
|
+
if (ragLogState.config) return;
|
|
22
|
+
ragLogState.config = true;
|
|
23
|
+
logRag(
|
|
24
|
+
`config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
|
|
25
|
+
`cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
|
|
26
|
+
`prebuilt_url_override=${ragConfig.prebuiltIndexUrl ? "set" : "empty"} prebuilt_url_local=${ragConfig.prebuiltIndexUrlLocal ? "set" : "empty"} ` +
|
|
27
|
+
`prebuilt_url_gemini=${ragConfig.prebuiltIndexUrlGemini ? "set" : "empty"} ` +
|
|
28
|
+
`prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
|
|
29
|
+
`gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
|
|
30
|
+
`gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
|
|
31
|
+
);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function resetRagProviderLogState() {
|
|
35
|
+
ragLogState.providerReady.clear();
|
|
36
|
+
ragLogState.providerFirstUse.clear();
|
|
37
|
+
ragLogState.fallbackUse.clear();
|
|
38
|
+
ragLogState.providerChain = false;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export {
|
|
42
|
+
ragLogState,
|
|
43
|
+
logRag,
|
|
44
|
+
logRagConfigOnce,
|
|
45
|
+
resetRagProviderLogState
|
|
46
|
+
};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
const PROFILE_DEFAULTS = {
|
|
2
|
+
lite: {
|
|
3
|
+
provider: "lexical",
|
|
4
|
+
fallback: "none"
|
|
5
|
+
},
|
|
6
|
+
"semantic-local": {
|
|
7
|
+
provider: "local",
|
|
8
|
+
fallback: "none"
|
|
9
|
+
},
|
|
10
|
+
"semantic-gemini": {
|
|
11
|
+
provider: "gemini",
|
|
12
|
+
fallback: "none"
|
|
13
|
+
}
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
function normalizeEnvValue(value) {
|
|
17
|
+
if (value === undefined || value === null) return "";
|
|
18
|
+
return String(value).trim().toLowerCase();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function resolveProfileConfig(env = process.env) {
|
|
22
|
+
const rawProfile = normalizeEnvValue(env.MCP_PROFILE);
|
|
23
|
+
const explicitProvider = normalizeEnvValue(env.RAG_PROVIDER);
|
|
24
|
+
const explicitFallback = normalizeEnvValue(env.RAG_FALLBACK);
|
|
25
|
+
|
|
26
|
+
if (rawProfile && !PROFILE_DEFAULTS[rawProfile]) {
|
|
27
|
+
throw new Error(
|
|
28
|
+
`Invalid MCP_PROFILE "${rawProfile}". Expected one of: ${Object.keys(PROFILE_DEFAULTS).join(", ")}.`
|
|
29
|
+
);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const profile = rawProfile || "lite";
|
|
33
|
+
const defaults = PROFILE_DEFAULTS[profile];
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
profile,
|
|
37
|
+
defaults,
|
|
38
|
+
provider: explicitProvider || defaults.provider,
|
|
39
|
+
fallback: explicitFallback || defaults.fallback,
|
|
40
|
+
providerSource: explicitProvider ? "env" : "profile-default",
|
|
41
|
+
fallbackSource: explicitFallback ? "env" : "profile-default"
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export {
|
|
46
|
+
PROFILE_DEFAULTS,
|
|
47
|
+
resolveProfileConfig
|
|
48
|
+
};
|