simple-dynamsoft-mcp 6.3.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/.env.example +35 -9
  2. package/README.md +156 -497
  3. package/package.json +13 -7
  4. package/scripts/prebuild-rag-index.mjs +1 -1
  5. package/scripts/run-gemini-tests.mjs +1 -1
  6. package/scripts/sync-submodules.mjs +1 -1
  7. package/scripts/verify-doc-resources.mjs +79 -0
  8. package/src/data/bootstrap.js +475 -0
  9. package/src/data/download-utils.js +99 -0
  10. package/src/data/hydration-mode.js +15 -0
  11. package/src/data/hydration-policy.js +39 -0
  12. package/src/data/repo-map.js +149 -0
  13. package/src/{data-root.js → data/root.js} +1 -1
  14. package/src/{submodule-sync.js → data/submodule-sync.js} +1 -1
  15. package/src/index.js +49 -1499
  16. package/src/observability/logging.js +51 -0
  17. package/src/rag/config.js +96 -0
  18. package/src/rag/index.js +266 -0
  19. package/src/rag/lexical-provider.js +170 -0
  20. package/src/rag/logger.js +46 -0
  21. package/src/rag/profile-config.js +48 -0
  22. package/src/rag/providers.js +585 -0
  23. package/src/rag/search-utils.js +166 -0
  24. package/src/rag/vector-cache.js +323 -0
  25. package/src/server/create-server.js +168 -0
  26. package/src/server/helpers/server-helpers.js +33 -0
  27. package/src/{resource-index → server/resource-index}/paths.js +2 -2
  28. package/src/{resource-index → server/resource-index}/samples.js +9 -1
  29. package/src/{resource-index.js → server/resource-index.js} +158 -93
  30. package/src/server/resources/register-resources.js +56 -0
  31. package/src/server/runtime-config.js +66 -0
  32. package/src/server/tools/register-index-tools.js +130 -0
  33. package/src/server/tools/register-project-tools.js +305 -0
  34. package/src/server/tools/register-quickstart-tools.js +572 -0
  35. package/src/server/tools/register-sample-tools.js +333 -0
  36. package/src/server/tools/register-version-tools.js +136 -0
  37. package/src/server/transports/http.js +84 -0
  38. package/src/server/transports/stdio.js +12 -0
  39. package/src/data-bootstrap.js +0 -255
  40. package/src/rag.js +0 -1203
  41. /package/src/{gemini-retry.js → rag/gemini-retry.js} +0 -0
  42. /package/src/{normalizers.js → server/normalizers.js} +0 -0
  43. /package/src/{resource-index → server/resource-index}/builders.js +0 -0
  44. /package/src/{resource-index → server/resource-index}/config.js +0 -0
  45. /package/src/{resource-index → server/resource-index}/docs-loader.js +0 -0
  46. /package/src/{resource-index → server/resource-index}/uri.js +0 -0
  47. /package/src/{resource-index → server/resource-index}/version-policy.js +0 -0
@@ -0,0 +1,51 @@
1
+ function normalizeEnvValue(value) {
2
+ if (value === undefined || value === null) return "";
3
+ return String(value).trim().toLowerCase();
4
+ }
5
+
6
+ function isVerboseLoggingEnabled(env = process.env) {
7
+ const explicit = normalizeEnvValue(env.MCP_LOG_LEVEL);
8
+ if (["debug", "verbose", "trace"].includes(explicit)) return true;
9
+ const toggle = normalizeEnvValue(env.MCP_VERBOSE_LOGS);
10
+ return ["1", "true", "yes", "on"].includes(toggle);
11
+ }
12
+
13
+ function quoteValue(value) {
14
+ const raw = String(value ?? "");
15
+ if (raw.length === 0) return "\"\"";
16
+ if (/\s|=/.test(raw)) return JSON.stringify(raw);
17
+ return raw;
18
+ }
19
+
20
+ function latencyBucket(latencyMs) {
21
+ const n = Number(latencyMs);
22
+ if (!Number.isFinite(n) || n < 0) return "unknown";
23
+ if (n < 100) return "lt100ms";
24
+ if (n < 300) return "100-299ms";
25
+ if (n < 1000) return "300-999ms";
26
+ if (n < 3000) return "1-2s";
27
+ return "ge3s";
28
+ }
29
+
30
+ function logEvent(component, event, fields = {}, options = {}) {
31
+ const level = normalizeEnvValue(options.level || "info") || "info";
32
+ const verbose = isVerboseLoggingEnabled(options.env);
33
+ if (level === "debug" && !verbose) return;
34
+
35
+ const parts = [`[${component}]`, `event=${quoteValue(event || "detail")}`];
36
+ for (const [key, value] of Object.entries(fields || {})) {
37
+ if (value === undefined || value === null || value === "") continue;
38
+ parts.push(`${key}=${quoteValue(value)}`);
39
+ }
40
+
41
+ if (level !== "info") {
42
+ parts.push(`level=${level}`);
43
+ }
44
+ console.error(parts.join(" "));
45
+ }
46
+
47
+ export {
48
+ isVerboseLoggingEnabled,
49
+ latencyBucket,
50
+ logEvent
51
+ };
@@ -0,0 +1,96 @@
1
+ import { readFileSync } from "node:fs";
2
+ import { join } from "node:path";
3
+ import "dotenv/config";
4
+ import { getResolvedDataRoot } from "../data/root.js";
5
+ import { resolveProfileConfig } from "./profile-config.js";
6
+
7
+ const dataRoot = getResolvedDataRoot();
8
+
9
+ const pkgUrl = new URL("../../package.json", import.meta.url);
10
+ const pkg = JSON.parse(readFileSync(pkgUrl, "utf8"));
11
+
12
+ const legacyPrebuiltIndexUrl =
13
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-${pkg.version}.tar.gz`;
14
+
15
+ const defaultPrebuiltIndexUrls = {
16
+ local:
17
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-local-${pkg.version}.tar.gz`,
18
+ gemini:
19
+ `https://github.com/yushulx/simple-dynamsoft-mcp/releases/download/v${pkg.version}/prebuilt-rag-index-gemini-${pkg.version}.tar.gz`
20
+ };
21
+
22
+ function readEnvValue(key, fallback) {
23
+ const value = process.env[key];
24
+ if (value === undefined || value === "") return fallback;
25
+ return value;
26
+ }
27
+
28
+ function readBoolEnv(key, fallback) {
29
+ const value = readEnvValue(key, "");
30
+ if (!value) return fallback;
31
+ return ["1", "true", "yes", "on"].includes(String(value).toLowerCase());
32
+ }
33
+
34
+ function readIntEnv(key, fallback) {
35
+ const raw = readEnvValue(key, "");
36
+ if (!raw) return fallback;
37
+ const value = Number.parseInt(raw, 10);
38
+ return Number.isNaN(value) ? fallback : value;
39
+ }
40
+
41
+ function readFloatEnv(key, fallback) {
42
+ const raw = readEnvValue(key, "");
43
+ if (!raw) return fallback;
44
+ const value = Number.parseFloat(raw);
45
+ return Number.isNaN(value) ? fallback : value;
46
+ }
47
+
48
+ function normalizeGeminiModel(model) {
49
+ if (!model) return "models/embedding-001";
50
+ if (model.startsWith("models/")) return model;
51
+ return `models/${model}`;
52
+ }
53
+
54
+ const profileConfig = resolveProfileConfig(process.env);
55
+
56
+ const ragConfig = {
57
+ profile: profileConfig.profile,
58
+ profileDefaults: profileConfig.defaults,
59
+ providerSource: profileConfig.providerSource,
60
+ fallbackSource: profileConfig.fallbackSource,
61
+ provider: profileConfig.provider,
62
+ fallback: profileConfig.fallback,
63
+ cacheDir: readEnvValue("RAG_CACHE_DIR", join(dataRoot, ".rag-cache")),
64
+ modelCacheDir: readEnvValue("RAG_MODEL_CACHE_DIR", join(dataRoot, ".rag-cache", "models")),
65
+ localModel: readEnvValue("RAG_LOCAL_MODEL", "Xenova/all-MiniLM-L6-v2"),
66
+ localQuantized: readBoolEnv("RAG_LOCAL_QUANTIZED", true),
67
+ chunkSize: readIntEnv("RAG_CHUNK_SIZE", 1200),
68
+ chunkOverlap: readIntEnv("RAG_CHUNK_OVERLAP", 200),
69
+ maxChunksPerDoc: readIntEnv("RAG_MAX_CHUNKS_PER_DOC", 6),
70
+ maxTextChars: readIntEnv("RAG_MAX_TEXT_CHARS", 4000),
71
+ minScore: readFloatEnv("RAG_MIN_SCORE", 0.2),
72
+ includeScore: readBoolEnv("RAG_INCLUDE_SCORE", false),
73
+ rebuild: readBoolEnv("RAG_REBUILD", false),
74
+ prewarm: readBoolEnv("RAG_PREWARM", false),
75
+ prewarmBlock: readBoolEnv("RAG_PREWARM_BLOCK", false),
76
+ prebuiltIndexAutoDownload: readBoolEnv("RAG_PREBUILT_INDEX_AUTO_DOWNLOAD", true),
77
+ prebuiltIndexUrl: readEnvValue("RAG_PREBUILT_INDEX_URL", ""),
78
+ prebuiltIndexUrlLocal: readEnvValue("RAG_PREBUILT_INDEX_URL_LOCAL", defaultPrebuiltIndexUrls.local),
79
+ prebuiltIndexUrlGemini: readEnvValue("RAG_PREBUILT_INDEX_URL_GEMINI", defaultPrebuiltIndexUrls.gemini),
80
+ prebuiltIndexTimeoutMs: readIntEnv("RAG_PREBUILT_INDEX_TIMEOUT_MS", 180000),
81
+ geminiApiKey: readEnvValue("GEMINI_API_KEY", ""),
82
+ geminiModel: normalizeGeminiModel(readEnvValue("GEMINI_EMBED_MODEL", "models/gemini-embedding-001")),
83
+ geminiBaseUrl: readEnvValue("GEMINI_API_BASE_URL", "https://generativelanguage.googleapis.com"),
84
+ geminiBatchSize: readIntEnv("GEMINI_EMBED_BATCH_SIZE", 16),
85
+ geminiRetryMaxAttempts: readIntEnv("GEMINI_RETRY_MAX_ATTEMPTS", 5),
86
+ geminiRetryBaseDelayMs: readIntEnv("GEMINI_RETRY_BASE_DELAY_MS", 500),
87
+ geminiRetryMaxDelayMs: readIntEnv("GEMINI_RETRY_MAX_DELAY_MS", 10000),
88
+ geminiRequestThrottleMs: readIntEnv("GEMINI_REQUEST_THROTTLE_MS", 0)
89
+ };
90
+
91
+ export {
92
+ pkg,
93
+ ragConfig,
94
+ legacyPrebuiltIndexUrl,
95
+ defaultPrebuiltIndexUrls
96
+ };
@@ -0,0 +1,266 @@
1
+ import {
2
+ resourceIndex,
3
+ normalizeProduct,
4
+ normalizePlatform,
5
+ normalizeEdition,
6
+ editionMatches,
7
+ platformMatches,
8
+ getRagSignatureData,
9
+ resourceIndexByUri
10
+ } from "../server/resource-index.js";
11
+ import { latencyBucket } from "../observability/logging.js";
12
+ import { createLexicalProvider } from "./lexical-provider.js";
13
+ import { pkg, ragConfig, legacyPrebuiltIndexUrl } from "./config.js";
14
+ import {
15
+ ragLogState,
16
+ logRag,
17
+ logRagConfigOnce,
18
+ resetRagProviderLogState
19
+ } from "./logger.js";
20
+ import {
21
+ createFuseSearch,
22
+ attachScore,
23
+ normalizeSearchFilters,
24
+ entryMatchesScope,
25
+ normalizeText,
26
+ truncateText,
27
+ buildEmbeddingItems,
28
+ buildIndexSignature,
29
+ normalizeVector,
30
+ dotProduct,
31
+ isRateLimitError
32
+ } from "./search-utils.js";
33
+ import { createProviderOrchestrator } from "./providers.js";
34
+ import { createVectorCacheHelpers } from "./vector-cache.js";
35
+
36
+ const searchUtils = {
37
+ createFuseSearch: () => createFuseSearch(resourceIndex),
38
+ attachScore: (entry, score) => attachScore(entry, score, ragConfig.includeScore),
39
+ normalizeText,
40
+ truncateText,
41
+ buildEmbeddingItems,
42
+ buildIndexSignature,
43
+ normalizeVector,
44
+ dotProduct,
45
+ isRateLimitError,
46
+ entryMatchesScope: (entry, filters) => entryMatchesScope(entry, filters, {
47
+ editionMatches,
48
+ platformMatches
49
+ })
50
+ };
51
+
52
+ const vectorCache = createVectorCacheHelpers({
53
+ ragConfig,
54
+ pkgVersion: pkg.version,
55
+ legacyPrebuiltIndexUrl,
56
+ logRag
57
+ });
58
+
59
+ const providerOrchestrator = createProviderOrchestrator({
60
+ pkgVersion: pkg.version,
61
+ ragConfig,
62
+ ragLogState,
63
+ logRag,
64
+ resourceIndex,
65
+ resourceIndexByUri,
66
+ createLexicalProvider,
67
+ getRagSignatureData,
68
+ utils: searchUtils,
69
+ vectorCache
70
+ });
71
+
72
+ function refreshRagIndexes() {
73
+ providerOrchestrator.refreshProviders();
74
+ resetRagProviderLogState();
75
+ logRag(`indexes refreshed resources=${resourceIndex.length}`);
76
+ }
77
+
78
+ async function searchResources({ query, product, edition, platform, type, limit }) {
79
+ const startedAt = Date.now();
80
+ const filters = normalizeSearchFilters({ product, edition, platform, type }, {
81
+ normalizeProduct,
82
+ normalizePlatform,
83
+ normalizeEdition
84
+ });
85
+ const searchQuery = query ? String(query).trim() : "";
86
+ const maxResults = limit ? Math.min(limit, 50) : undefined;
87
+
88
+ if (!searchQuery) {
89
+ const results = resourceIndex.filter((entry) => searchUtils.entryMatchesScope(entry, filters));
90
+ return maxResults ? results.slice(0, maxResults) : results;
91
+ }
92
+
93
+ logRagConfigOnce(ragConfig);
94
+ const providers = providerOrchestrator.resolveProviderChain();
95
+ if (!ragLogState.providerChain) {
96
+ ragLogState.providerChain = true;
97
+ logRag("provider_chain", {
98
+ profile: ragConfig.profile,
99
+ provider: providers[0] || "unknown",
100
+ fallback: ragConfig.fallback,
101
+ chain: providers.join("->")
102
+ });
103
+ }
104
+
105
+ let lastError = null;
106
+ for (const name of providers) {
107
+ try {
108
+ const provider = await providerOrchestrator.loadSearchProvider(name);
109
+ const results = await provider.search(searchQuery, filters, maxResults);
110
+ if (!ragLogState.providerFirstUse.has(name)) {
111
+ ragLogState.providerFirstUse.add(name);
112
+ logRag("provider_selected", {
113
+ profile: ragConfig.profile,
114
+ provider: name,
115
+ fallback: ragConfig.fallback
116
+ });
117
+ }
118
+ if (name !== providers[0] && !ragLogState.fallbackUse.has(name)) {
119
+ ragLogState.fallbackUse.add(name);
120
+ logRag("fallback_engaged", {
121
+ selected_provider: name,
122
+ primary_provider: providers[0],
123
+ fallback: ragConfig.fallback
124
+ });
125
+ }
126
+ const elapsedMs = Date.now() - startedAt;
127
+ logRag("search_complete", {
128
+ profile: ragConfig.profile,
129
+ provider: name,
130
+ fallback: ragConfig.fallback,
131
+ product: filters.product || "any",
132
+ edition: filters.edition || "any",
133
+ platform: filters.platform || "any",
134
+ type: filters.type || "any",
135
+ result_count: results.length,
136
+ latency_ms: elapsedMs,
137
+ latency_bucket: latencyBucket(elapsedMs)
138
+ });
139
+ return results;
140
+ } catch (error) {
141
+ lastError = error;
142
+ logRag("provider_failed", {
143
+ provider: name,
144
+ fallback: ragConfig.fallback,
145
+ error: error.message
146
+ }, { level: "error" });
147
+ }
148
+ }
149
+
150
+ if (lastError) {
151
+ const elapsedMs = Date.now() - startedAt;
152
+ logRag("search_failed", {
153
+ profile: ragConfig.profile,
154
+ provider: providers[0] || "unknown",
155
+ fallback: ragConfig.fallback,
156
+ latency_ms: elapsedMs,
157
+ latency_bucket: latencyBucket(elapsedMs),
158
+ error: lastError.message
159
+ }, { level: "error" });
160
+ }
161
+ return [];
162
+ }
163
+
164
+ async function prewarmRagIndex() {
165
+ if (!ragConfig.prewarm) return;
166
+ logRagConfigOnce(ragConfig);
167
+ const providers = providerOrchestrator.resolveProviderChain();
168
+ const primary = providers[0];
169
+ if (!primary || primary === "fuse") return;
170
+ try {
171
+ logRag("prewarm_start", {
172
+ profile: ragConfig.profile,
173
+ provider: primary,
174
+ fallback: ragConfig.fallback
175
+ });
176
+ const provider = await providerOrchestrator.loadSearchProvider(primary);
177
+ if (provider.warm) {
178
+ await provider.warm();
179
+ }
180
+ logRag("prewarm_done", {
181
+ profile: ragConfig.profile,
182
+ provider: primary,
183
+ fallback: ragConfig.fallback
184
+ });
185
+ } catch (error) {
186
+ logRag("prewarm_failed", {
187
+ provider: primary,
188
+ error: error.message
189
+ }, { level: "error" });
190
+ }
191
+ }
192
+
193
+ async function getSampleSuggestions({ query, product, edition, platform, limit = 5 }) {
194
+ const normalizedProduct = normalizeProduct(product);
195
+ const normalizedPlatform = normalizePlatform(platform);
196
+ const normalizedEdition = normalizeEdition(edition, normalizedPlatform, normalizedProduct);
197
+ const searchQuery = query ? String(query).trim() : "";
198
+ const maxResults = Math.min(limit || 5, 10);
199
+
200
+ if (searchQuery) {
201
+ const results = await searchResources({
202
+ query: searchQuery,
203
+ product: normalizedProduct,
204
+ edition: normalizedEdition,
205
+ platform: normalizedPlatform,
206
+ type: "sample",
207
+ limit: maxResults
208
+ });
209
+ if (results.length) return results;
210
+ }
211
+
212
+ const matchesScope = (entry) => {
213
+ if (normalizedProduct && entry.product !== normalizedProduct) return false;
214
+ if (!editionMatches(normalizedEdition, entry.edition)) return false;
215
+ if (!platformMatches(normalizedPlatform, entry)) return false;
216
+ return entry.type === "sample";
217
+ };
218
+
219
+ let candidates = resourceIndex.filter(matchesScope);
220
+ if (candidates.length === 0 && normalizedProduct) {
221
+ candidates = resourceIndex.filter((entry) => entry.type === "sample" && entry.product === normalizedProduct);
222
+ }
223
+
224
+ if (searchQuery && candidates.length > 1) {
225
+ const terms = normalizeText(searchQuery.toLowerCase()).split(/\s+/).filter(Boolean);
226
+ const scoreEntry = (entry) => {
227
+ const tags = Array.isArray(entry.tags) ? entry.tags.map((tag) => String(tag).toLowerCase()) : [];
228
+ const haystack = [
229
+ String(entry.title || "").toLowerCase(),
230
+ String(entry.summary || "").toLowerCase(),
231
+ tags.join(" ")
232
+ ].join(" ");
233
+ let score = 0;
234
+ for (const term of terms) {
235
+ if (!term) continue;
236
+ if (tags.some((tag) => tag === term || tag.includes(term))) score += 3;
237
+ if (haystack.includes(term)) score += 1;
238
+ }
239
+ return score;
240
+ };
241
+ candidates = [...candidates].sort((a, b) => {
242
+ const delta = scoreEntry(b) - scoreEntry(a);
243
+ if (delta !== 0) return delta;
244
+ return String(a.title || "").localeCompare(String(b.title || ""));
245
+ });
246
+ }
247
+
248
+ const seen = new Set();
249
+ const results = [];
250
+ for (const entry of candidates) {
251
+ if (seen.has(entry.uri)) continue;
252
+ seen.add(entry.uri);
253
+ results.push(entry);
254
+ if (results.length >= maxResults) break;
255
+ }
256
+
257
+ return results;
258
+ }
259
+
260
+ export {
261
+ ragConfig,
262
+ searchResources,
263
+ getSampleSuggestions,
264
+ prewarmRagIndex,
265
+ refreshRagIndexes
266
+ };
@@ -0,0 +1,170 @@
1
+ import Fuse from "fuse.js";
2
+
3
+ const DEFAULT_FUSE_OPTIONS = {
4
+ keys: ["title", "summary", "tags", "uri"],
5
+ threshold: 0.35,
6
+ ignoreLocation: true,
7
+ includeScore: true
8
+ };
9
+
10
+ const BM25_K1 = 1.2;
11
+ const BM25_B = 0.75;
12
+
13
+ function tokenize(text) {
14
+ return String(text || "")
15
+ .toLowerCase()
16
+ .replace(/[^a-z0-9]+/g, " ")
17
+ .trim()
18
+ .split(/\s+/)
19
+ .filter(Boolean);
20
+ }
21
+
22
+ function normalizeScore(value, max) {
23
+ if (!Number.isFinite(value) || value <= 0) return 0;
24
+ if (!Number.isFinite(max) || max <= 0) return 0;
25
+ return value / max;
26
+ }
27
+
28
+ function compareLexicalResults(a, b) {
29
+ const scoreDelta = b.score - a.score;
30
+ if (scoreDelta !== 0) return scoreDelta;
31
+ const titleA = String(a.entry.title || "");
32
+ const titleB = String(b.entry.title || "");
33
+ const titleDelta = titleA.localeCompare(titleB);
34
+ if (titleDelta !== 0) return titleDelta;
35
+ return String(a.entry.uri || "").localeCompare(String(b.entry.uri || ""));
36
+ }
37
+
38
+ function buildBm25Index(entries) {
39
+ const documents = [];
40
+ const documentFrequency = new Map();
41
+ let totalLength = 0;
42
+
43
+ entries.forEach((entry, index) => {
44
+ const haystack = [
45
+ entry.title,
46
+ entry.summary,
47
+ Array.isArray(entry.tags) ? entry.tags.join(" ") : "",
48
+ entry.uri
49
+ ].join(" \n ");
50
+
51
+ const tokens = tokenize(haystack);
52
+ const termFreq = new Map();
53
+ tokens.forEach((token) => {
54
+ termFreq.set(token, (termFreq.get(token) || 0) + 1);
55
+ });
56
+
57
+ const seen = new Set();
58
+ for (const token of tokens) {
59
+ if (seen.has(token)) continue;
60
+ seen.add(token);
61
+ documentFrequency.set(token, (documentFrequency.get(token) || 0) + 1);
62
+ }
63
+
64
+ totalLength += tokens.length;
65
+ documents.push({
66
+ index,
67
+ entry,
68
+ length: tokens.length,
69
+ termFreq
70
+ });
71
+ });
72
+
73
+ const avgLength = documents.length > 0 ? totalLength / documents.length : 0;
74
+ return {
75
+ documents,
76
+ documentFrequency,
77
+ totalDocuments: documents.length,
78
+ avgLength
79
+ };
80
+ }
81
+
82
+ function computeBm25Score(indexState, docState, terms) {
83
+ if (terms.length === 0) return 0;
84
+ if (!docState.length || !indexState.avgLength) return 0;
85
+
86
+ let score = 0;
87
+ for (const term of terms) {
88
+ const tf = docState.termFreq.get(term) || 0;
89
+ if (!tf) continue;
90
+ const df = indexState.documentFrequency.get(term) || 0;
91
+ const idf = Math.log(1 + (indexState.totalDocuments - df + 0.5) / (df + 0.5));
92
+ const denom = tf + BM25_K1 * (1 - BM25_B + BM25_B * (docState.length / indexState.avgLength));
93
+ score += idf * ((tf * (BM25_K1 + 1)) / denom);
94
+ }
95
+
96
+ return score;
97
+ }
98
+
99
+ function createLexicalProvider({
100
+ entries,
101
+ entryMatchesScope,
102
+ attachScore,
103
+ fuseOptions = DEFAULT_FUSE_OPTIONS,
104
+ bm25Weight = 0.7,
105
+ fuseWeight = 0.3
106
+ }) {
107
+ const fuse = new Fuse(entries, fuseOptions);
108
+ const bm25Index = buildBm25Index(entries);
109
+ const entryByUri = new Map(entries.map((entry) => [entry.uri, entry]));
110
+
111
+ return {
112
+ name: "lexical",
113
+ search: async (query, filters, limit) => {
114
+ const terms = [...new Set(tokenize(query))];
115
+ const fuseHits = fuse.search(query);
116
+ const fuseScoreByUri = new Map();
117
+ let maxFuse = 0;
118
+
119
+ for (const hit of fuseHits) {
120
+ const candidateScore = Number.isFinite(hit.score) ? Math.max(0, 1 - hit.score) : 0;
121
+ const current = fuseScoreByUri.get(hit.item.uri) || 0;
122
+ if (candidateScore > current) {
123
+ fuseScoreByUri.set(hit.item.uri, candidateScore);
124
+ if (candidateScore > maxFuse) maxFuse = candidateScore;
125
+ }
126
+ }
127
+
128
+ const bm25ScoreByUri = new Map();
129
+ let maxBm25 = 0;
130
+
131
+ for (const doc of bm25Index.documents) {
132
+ if (!entryMatchesScope(doc.entry, filters)) continue;
133
+ const score = computeBm25Score(bm25Index, doc, terms);
134
+ if (score <= 0) continue;
135
+ bm25ScoreByUri.set(doc.entry.uri, score);
136
+ if (score > maxBm25) maxBm25 = score;
137
+ }
138
+
139
+ const scopedUris = new Set();
140
+ for (const doc of bm25Index.documents) {
141
+ if (!entryMatchesScope(doc.entry, filters)) continue;
142
+ if (bm25ScoreByUri.has(doc.entry.uri) || fuseScoreByUri.has(doc.entry.uri)) {
143
+ scopedUris.add(doc.entry.uri);
144
+ }
145
+ }
146
+
147
+ const merged = [];
148
+ for (const uri of scopedUris) {
149
+ const entry = entryByUri.get(uri);
150
+ if (!entry) continue;
151
+ const bm25Norm = normalizeScore(bm25ScoreByUri.get(uri) || 0, maxBm25);
152
+ const fuseNorm = normalizeScore(fuseScoreByUri.get(uri) || 0, maxFuse);
153
+ const score = (bm25Norm * bm25Weight) + (fuseNorm * fuseWeight);
154
+ merged.push({ entry, score });
155
+ }
156
+
157
+ merged.sort(compareLexicalResults);
158
+ const ranked = merged.map((item) => attachScore(item.entry, item.score));
159
+ if (limit) return ranked.slice(0, limit);
160
+ return ranked;
161
+ },
162
+ warm: async () => {}
163
+ };
164
+ }
165
+
166
+ export {
167
+ tokenize,
168
+ compareLexicalResults,
169
+ createLexicalProvider
170
+ };
@@ -0,0 +1,46 @@
1
+ import { logEvent } from "../observability/logging.js";
2
+
3
+ const ragLogState = {
4
+ config: false,
5
+ providerChain: false,
6
+ localEmbedderInit: false,
7
+ providerReady: new Set(),
8
+ providerFirstUse: new Set(),
9
+ fallbackUse: new Set()
10
+ };
11
+
12
+ function logRag(eventOrMessage, fields = {}, options = {}) {
13
+ if (fields && typeof fields === "object" && Object.keys(fields).length > 0) {
14
+ logEvent("rag", eventOrMessage, fields, options);
15
+ return;
16
+ }
17
+ logEvent("rag", "detail", { message: String(eventOrMessage || "") }, { ...options, level: options.level || "debug" });
18
+ }
19
+
20
+ function logRagConfigOnce(ragConfig) {
21
+ if (ragLogState.config) return;
22
+ ragLogState.config = true;
23
+ logRag(
24
+ `config provider=${ragConfig.provider} fallback=${ragConfig.fallback} prewarm=${ragConfig.prewarm} rebuild=${ragConfig.rebuild} ` +
25
+ `cache_dir=${ragConfig.cacheDir} prebuilt_auto_download=${ragConfig.prebuiltIndexAutoDownload} ` +
26
+ `prebuilt_url_override=${ragConfig.prebuiltIndexUrl ? "set" : "empty"} prebuilt_url_local=${ragConfig.prebuiltIndexUrlLocal ? "set" : "empty"} ` +
27
+ `prebuilt_url_gemini=${ragConfig.prebuiltIndexUrlGemini ? "set" : "empty"} ` +
28
+ `prebuilt_timeout_ms=${ragConfig.prebuiltIndexTimeoutMs} gemini_retry_max_attempts=${ragConfig.geminiRetryMaxAttempts} ` +
29
+ `gemini_retry_base_delay_ms=${ragConfig.geminiRetryBaseDelayMs} gemini_retry_max_delay_ms=${ragConfig.geminiRetryMaxDelayMs} ` +
30
+ `gemini_request_throttle_ms=${ragConfig.geminiRequestThrottleMs}`
31
+ );
32
+ }
33
+
34
+ function resetRagProviderLogState() {
35
+ ragLogState.providerReady.clear();
36
+ ragLogState.providerFirstUse.clear();
37
+ ragLogState.fallbackUse.clear();
38
+ ragLogState.providerChain = false;
39
+ }
40
+
41
+ export {
42
+ ragLogState,
43
+ logRag,
44
+ logRagConfigOnce,
45
+ resetRagProviderLogState
46
+ };
@@ -0,0 +1,48 @@
1
+ const PROFILE_DEFAULTS = {
2
+ lite: {
3
+ provider: "lexical",
4
+ fallback: "none"
5
+ },
6
+ "semantic-local": {
7
+ provider: "local",
8
+ fallback: "none"
9
+ },
10
+ "semantic-gemini": {
11
+ provider: "gemini",
12
+ fallback: "none"
13
+ }
14
+ };
15
+
16
+ function normalizeEnvValue(value) {
17
+ if (value === undefined || value === null) return "";
18
+ return String(value).trim().toLowerCase();
19
+ }
20
+
21
+ function resolveProfileConfig(env = process.env) {
22
+ const rawProfile = normalizeEnvValue(env.MCP_PROFILE);
23
+ const explicitProvider = normalizeEnvValue(env.RAG_PROVIDER);
24
+ const explicitFallback = normalizeEnvValue(env.RAG_FALLBACK);
25
+
26
+ if (rawProfile && !PROFILE_DEFAULTS[rawProfile]) {
27
+ throw new Error(
28
+ `Invalid MCP_PROFILE "${rawProfile}". Expected one of: ${Object.keys(PROFILE_DEFAULTS).join(", ")}.`
29
+ );
30
+ }
31
+
32
+ const profile = rawProfile || "lite";
33
+ const defaults = PROFILE_DEFAULTS[profile];
34
+
35
+ return {
36
+ profile,
37
+ defaults,
38
+ provider: explicitProvider || defaults.provider,
39
+ fallback: explicitFallback || defaults.fallback,
40
+ providerSource: explicitProvider ? "env" : "profile-default",
41
+ fallbackSource: explicitFallback ? "env" : "profile-default"
42
+ };
43
+ }
44
+
45
+ export {
46
+ PROFILE_DEFAULTS,
47
+ resolveProfileConfig
48
+ };