prism-mcp-server 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.gitmodules +3 -0
- package/Dockerfile +30 -0
- package/LICENSE +21 -0
- package/README.md +970 -0
- package/benchmark.ts +172 -0
- package/call_chrome_mcp.py +96 -0
- package/docker-compose.yml +67 -0
- package/execute_via_chrome_mcp.py +133 -0
- package/gmail_auth_test.py +29 -0
- package/gmail_list_latest_5.py +27 -0
- package/index.ts +34 -0
- package/list_chrome_tools.py +70 -0
- package/package.json +64 -0
- package/patch_cgc_mcp.py +90 -0
- package/repomix-output.xml +9 -0
- package/run_server.sh +9 -0
- package/server.json +78 -0
- package/src/config.ts +85 -0
- package/src/server.ts +627 -0
- package/src/tools/compactionHandler.ts +313 -0
- package/src/tools/definitions.ts +367 -0
- package/src/tools/handlers.ts +261 -0
- package/src/tools/index.ts +38 -0
- package/src/tools/sessionMemoryDefinitions.ts +437 -0
- package/src/tools/sessionMemoryHandlers.ts +774 -0
- package/src/utils/braveApi.ts +375 -0
- package/src/utils/embeddingApi.ts +97 -0
- package/src/utils/executor.ts +105 -0
- package/src/utils/googleAi.ts +107 -0
- package/src/utils/keywordExtractor.ts +207 -0
- package/src/utils/supabaseApi.ts +194 -0
- package/supabase/migrations/015_session_memory.sql +145 -0
- package/supabase/migrations/016_knowledge_accumulation.sql +315 -0
- package/supabase/migrations/017_ledger_compaction.sql +74 -0
- package/supabase/migrations/018_semantic_search.sql +110 -0
- package/supabase/migrations/019_concurrency_control.sql +320 -0
- package/supabase/migrations/020_multi_tenant_rls.sql +459 -0
- package/test_cross_mcp.js +393 -0
- package/test_mcp_schema.js +83 -0
- package/tests/test_knowledge_system.js +319 -0
- package/tsconfig.json +16 -0
- package/vertex-ai/test_claude_vertex.py +78 -0
- package/vertex-ai/test_gemini_vertex.py +39 -0
- package/vertex-ai/test_hybrid_search_pipeline.ts +296 -0
- package/vertex-ai/test_pipeline_benchmark.ts +251 -0
- package/vertex-ai/test_realworld_comparison.ts +290 -0
- package/vertex-ai/verify_discovery_engine.ts +72 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Brave Search API Client
|
|
3
|
+
*
|
|
4
|
+
* This module handles all communication with Brave's Search APIs.
|
|
5
|
+
* Brave offers three search endpoints used by this server:
|
|
6
|
+
*
|
|
7
|
+
* 1. Web Search API (/v1/web/search)
|
|
8
|
+
* - Standard internet search returning titles, descriptions, URLs
|
|
9
|
+
* - Used by: brave_web_search, brave_web_search_code_mode
|
|
10
|
+
*
|
|
11
|
+
* 2. Local/POI Search API (/v1/local/pois + /v1/local/descriptions)
|
|
12
|
+
* - Business/place search returning addresses, ratings, hours, etc.
|
|
13
|
+
* - Uses a 3-step pipeline: web search (to get location IDs)
|
|
14
|
+
* → POI details (address, phone, hours)
|
|
15
|
+
* → descriptions (business text)
|
|
16
|
+
* - Used by: brave_local_search, brave_local_search_code_mode
|
|
17
|
+
*
|
|
18
|
+
* 3. AI Grounding / Chat Completions API (/v1/chat/completions)
|
|
19
|
+
* - OpenAI-compatible endpoint for AI-grounded answers
|
|
20
|
+
* - Used by: brave_answers
|
|
21
|
+
*
|
|
22
|
+
* Each function comes in two variants:
|
|
23
|
+
* - "Raw" version (e.g., performWebSearchRaw): Returns raw JSON string from API
|
|
24
|
+
* Used by code-mode handlers that need to pass raw data to the QuickJS sandbox
|
|
25
|
+
* - "Formatted" version (e.g., performWebSearch): Returns human-readable text
|
|
26
|
+
* Used by standard search handlers
|
|
27
|
+
*
|
|
28
|
+
* Authentication: All requests use the BRAVE_API_KEY via X-Subscription-Token header.
|
|
29
|
+
* The Brave Answers endpoint uses a separate BRAVE_ANSWERS_API_KEY via Bearer token.
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import { BRAVE_API_KEY, BRAVE_ANSWERS_API_KEY } from "../config.js";
|
|
33
|
+
|
|
34
|
+
// ─── TypeScript Interfaces for Brave API Responses ────────────
|
|
35
|
+
// These types match the shape of Brave's JSON responses so we get
|
|
36
|
+
// type safety when accessing fields like data.web.results[0].title
|
|
37
|
+
export interface BraveWeb {
|
|
38
|
+
web?: {
|
|
39
|
+
results?: Array<{
|
|
40
|
+
title: string;
|
|
41
|
+
description: string;
|
|
42
|
+
url: string;
|
|
43
|
+
language?: string;
|
|
44
|
+
published?: string;
|
|
45
|
+
rank?: number;
|
|
46
|
+
}>;
|
|
47
|
+
};
|
|
48
|
+
locations?: {
|
|
49
|
+
results?: Array<{
|
|
50
|
+
id: string;
|
|
51
|
+
title?: string;
|
|
52
|
+
}>;
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export interface BraveLocation {
|
|
57
|
+
id: string;
|
|
58
|
+
name?: string;
|
|
59
|
+
title?: string;
|
|
60
|
+
address: {
|
|
61
|
+
streetAddress?: string;
|
|
62
|
+
addressLocality?: string;
|
|
63
|
+
addressRegion?: string;
|
|
64
|
+
postalCode?: string;
|
|
65
|
+
};
|
|
66
|
+
coordinates?: {
|
|
67
|
+
latitude: number;
|
|
68
|
+
longitude: number;
|
|
69
|
+
};
|
|
70
|
+
phone?: string;
|
|
71
|
+
rating?: {
|
|
72
|
+
ratingValue?: number;
|
|
73
|
+
ratingCount?: number;
|
|
74
|
+
};
|
|
75
|
+
openingHours?: string[];
|
|
76
|
+
priceRange?: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface BravePoiResponse {
|
|
80
|
+
results: BraveLocation[];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export interface BraveDescription {
|
|
84
|
+
descriptions: { [id: string]: string };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
interface BraveAnswersMessage {
|
|
88
|
+
role: "user" | "system" | "assistant";
|
|
89
|
+
content: string;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
interface BraveAnswersResponse {
|
|
93
|
+
id?: string;
|
|
94
|
+
model?: string;
|
|
95
|
+
choices?: Array<{
|
|
96
|
+
message?: {
|
|
97
|
+
role?: string;
|
|
98
|
+
content?: string;
|
|
99
|
+
};
|
|
100
|
+
}>;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Brave Answers API call (AI Grounding/OpenAI-compatible)
|
|
104
|
+
export async function performBraveAnswers(
|
|
105
|
+
query: string,
|
|
106
|
+
model: string = "brave"
|
|
107
|
+
) {
|
|
108
|
+
if (!BRAVE_ANSWERS_API_KEY) {
|
|
109
|
+
throw new Error("BRAVE_ANSWERS_API_KEY is not configured");
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const url = new URL("https://api.search.brave.com/res/v1/chat/completions");
|
|
113
|
+
const messages: BraveAnswersMessage[] = [{ role: "user", content: query }];
|
|
114
|
+
|
|
115
|
+
const response = await fetch(url, {
|
|
116
|
+
method: "POST",
|
|
117
|
+
headers: {
|
|
118
|
+
Accept: "application/json",
|
|
119
|
+
"Content-Type": "application/json",
|
|
120
|
+
Authorization: `Bearer ${BRAVE_ANSWERS_API_KEY}`,
|
|
121
|
+
},
|
|
122
|
+
body: JSON.stringify({
|
|
123
|
+
model,
|
|
124
|
+
stream: false,
|
|
125
|
+
messages,
|
|
126
|
+
}),
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
if (!response.ok) {
|
|
130
|
+
throw new Error(
|
|
131
|
+
`Brave Answers API error: ${response.status} ${response.statusText
|
|
132
|
+
}\n${await response.text()}`
|
|
133
|
+
);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const data = (await response.json()) as BraveAnswersResponse;
|
|
137
|
+
const content = data.choices?.[0]?.message?.content?.trim();
|
|
138
|
+
|
|
139
|
+
if (!content) {
|
|
140
|
+
throw new Error("Brave Answers API returned an empty response");
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return content;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Raw web search API call
|
|
147
|
+
export async function performWebSearchRaw(
|
|
148
|
+
query: string,
|
|
149
|
+
count: number = 10,
|
|
150
|
+
offset: number = 0
|
|
151
|
+
): Promise<string> {
|
|
152
|
+
const url = new URL("https://api.search.brave.com/res/v1/web/search");
|
|
153
|
+
url.searchParams.set("q", query);
|
|
154
|
+
url.searchParams.set("count", Math.min(count, 20).toString()); // API limit
|
|
155
|
+
url.searchParams.set("offset", offset.toString());
|
|
156
|
+
|
|
157
|
+
const response = await fetch(url, {
|
|
158
|
+
headers: {
|
|
159
|
+
Accept: "application/json",
|
|
160
|
+
"Accept-Encoding": "gzip",
|
|
161
|
+
"X-Subscription-Token": BRAVE_API_KEY!,
|
|
162
|
+
},
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
if (!response.ok) {
|
|
166
|
+
throw new Error(
|
|
167
|
+
`Brave API error: ${response.status} ${response.statusText
|
|
168
|
+
}\n${await response.text()}`
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return await response.text();
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// Web search API call
|
|
176
|
+
export async function performWebSearch(
|
|
177
|
+
query: string,
|
|
178
|
+
count: number = 10,
|
|
179
|
+
offset: number = 0
|
|
180
|
+
) {
|
|
181
|
+
const textData = await performWebSearchRaw(query, count, offset);
|
|
182
|
+
const data = JSON.parse(textData) as BraveWeb;
|
|
183
|
+
|
|
184
|
+
// Extract just web results
|
|
185
|
+
const results = (data.web?.results || []).map((result) => ({
|
|
186
|
+
title: result.title || "",
|
|
187
|
+
description: result.description || "",
|
|
188
|
+
url: result.url || "",
|
|
189
|
+
}));
|
|
190
|
+
|
|
191
|
+
return results
|
|
192
|
+
.map(
|
|
193
|
+
(r) => `Title: ${r.title}\nDescription: ${r.description}\nURL: ${r.url}`
|
|
194
|
+
)
|
|
195
|
+
.join("\n\n");
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Get POI details
|
|
199
|
+
export async function getPoisData(ids: string[]): Promise<BravePoiResponse> {
|
|
200
|
+
const url = new URL("https://api.search.brave.com/res/v1/local/pois");
|
|
201
|
+
ids.filter(Boolean).forEach((id) => url.searchParams.append("ids", id));
|
|
202
|
+
const response = await fetch(url, {
|
|
203
|
+
headers: {
|
|
204
|
+
Accept: "application/json",
|
|
205
|
+
"Accept-Encoding": "gzip",
|
|
206
|
+
"X-Subscription-Token": BRAVE_API_KEY!,
|
|
207
|
+
},
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
if (!response.ok) {
|
|
211
|
+
throw new Error(
|
|
212
|
+
`Brave API error: ${response.status} ${response.statusText
|
|
213
|
+
}\n${await response.text()}`
|
|
214
|
+
);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
return (await response.json()) as BravePoiResponse;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Get descriptions data
|
|
221
|
+
export async function getDescriptionsData(
|
|
222
|
+
ids: string[]
|
|
223
|
+
): Promise<BraveDescription> {
|
|
224
|
+
const url = new URL("https://api.search.brave.com/res/v1/local/descriptions");
|
|
225
|
+
ids.filter(Boolean).forEach((id) => url.searchParams.append("ids", id));
|
|
226
|
+
const response = await fetch(url, {
|
|
227
|
+
headers: {
|
|
228
|
+
Accept: "application/json",
|
|
229
|
+
"Accept-Encoding": "gzip",
|
|
230
|
+
"X-Subscription-Token": BRAVE_API_KEY!,
|
|
231
|
+
},
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
if (!response.ok) {
|
|
235
|
+
throw new Error(
|
|
236
|
+
`Brave API error: ${response.status} ${response.statusText
|
|
237
|
+
}\n${await response.text()}`
|
|
238
|
+
);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
return (await response.json()) as BraveDescription;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
function chunkArray<T>(arr: T[], size: number): T[][] {
|
|
245
|
+
const chunks: T[][] = [];
|
|
246
|
+
for (let i = 0; i < arr.length; i += size) {
|
|
247
|
+
chunks.push(arr.slice(i, i + size));
|
|
248
|
+
}
|
|
249
|
+
return chunks;
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Raw local search API call with poi/details payload
|
|
253
|
+
export async function performLocalSearchRaw(
|
|
254
|
+
query: string,
|
|
255
|
+
count: number = 5
|
|
256
|
+
): Promise<string> {
|
|
257
|
+
// Initial search to get location IDs
|
|
258
|
+
const webUrl = new URL("https://api.search.brave.com/res/v1/web/search");
|
|
259
|
+
webUrl.searchParams.set("q", query);
|
|
260
|
+
webUrl.searchParams.set("search_lang", "en");
|
|
261
|
+
webUrl.searchParams.set("result_filter", "locations");
|
|
262
|
+
webUrl.searchParams.set("count", Math.min(count, 20).toString());
|
|
263
|
+
|
|
264
|
+
const webResponse = await fetch(webUrl, {
|
|
265
|
+
headers: {
|
|
266
|
+
Accept: "application/json",
|
|
267
|
+
"Accept-Encoding": "gzip",
|
|
268
|
+
"X-Subscription-Token": BRAVE_API_KEY!,
|
|
269
|
+
},
|
|
270
|
+
});
|
|
271
|
+
|
|
272
|
+
if (!webResponse.ok) {
|
|
273
|
+
throw new Error(
|
|
274
|
+
`Brave API error: ${webResponse.status} ${webResponse.statusText
|
|
275
|
+
}\n${await webResponse.text()}`
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const webData = (await webResponse.json()) as BraveWeb;
|
|
280
|
+
const locationIds =
|
|
281
|
+
webData.locations?.results
|
|
282
|
+
?.filter((r): r is { id: string; title?: string } => r.id != null)
|
|
283
|
+
.map((r) => r.id) || [];
|
|
284
|
+
|
|
285
|
+
if (locationIds.length === 0) {
|
|
286
|
+
const fallback = await performWebSearch(query, count);
|
|
287
|
+
return JSON.stringify({
|
|
288
|
+
source: "web_fallback",
|
|
289
|
+
query,
|
|
290
|
+
count,
|
|
291
|
+
formattedText: fallback,
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Batch IDs to avoid Brave query.ids validation errors
|
|
296
|
+
const uniqueIds = [...new Set(locationIds)];
|
|
297
|
+
const idBatches = chunkArray(uniqueIds, 5);
|
|
298
|
+
|
|
299
|
+
const [poisBatches, descBatches] = await Promise.all([
|
|
300
|
+
Promise.all(idBatches.map((ids) => getPoisData(ids))),
|
|
301
|
+
Promise.all(idBatches.map((ids) => getDescriptionsData(ids))),
|
|
302
|
+
]);
|
|
303
|
+
|
|
304
|
+
const poisData: BravePoiResponse = {
|
|
305
|
+
results: poisBatches.flatMap((batch) => batch.results || []),
|
|
306
|
+
};
|
|
307
|
+
|
|
308
|
+
const descriptionsData: BraveDescription = {
|
|
309
|
+
descriptions: Object.assign(
|
|
310
|
+
{},
|
|
311
|
+
...descBatches.map((batch) => batch.descriptions || {})
|
|
312
|
+
),
|
|
313
|
+
};
|
|
314
|
+
|
|
315
|
+
return JSON.stringify({
|
|
316
|
+
source: "local",
|
|
317
|
+
query,
|
|
318
|
+
count,
|
|
319
|
+
locationIds,
|
|
320
|
+
poisData,
|
|
321
|
+
descriptionsData,
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Local search API call with poi details
|
|
326
|
+
export async function performLocalSearch(query: string, count: number = 5) {
|
|
327
|
+
const rawData = await performLocalSearchRaw(query, count);
|
|
328
|
+
const parsed = JSON.parse(rawData) as {
|
|
329
|
+
source: "local" | "web_fallback";
|
|
330
|
+
formattedText?: string;
|
|
331
|
+
poisData?: BravePoiResponse;
|
|
332
|
+
descriptionsData?: BraveDescription;
|
|
333
|
+
};
|
|
334
|
+
|
|
335
|
+
if (parsed.source === "web_fallback") {
|
|
336
|
+
return parsed.formattedText || "No local results found";
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
return formatLocalResults(
|
|
340
|
+
parsed.poisData || { results: [] },
|
|
341
|
+
parsed.descriptionsData || { descriptions: {} }
|
|
342
|
+
);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
// Format local search results
|
|
346
|
+
export function formatLocalResults(
|
|
347
|
+
poisData: BravePoiResponse,
|
|
348
|
+
descData: BraveDescription
|
|
349
|
+
): string {
|
|
350
|
+
return (
|
|
351
|
+
(poisData.results || [])
|
|
352
|
+
.map((poi) => {
|
|
353
|
+
const address =
|
|
354
|
+
[
|
|
355
|
+
poi.address?.streetAddress ?? "",
|
|
356
|
+
poi.address?.addressLocality ?? "",
|
|
357
|
+
poi.address?.addressRegion ?? "",
|
|
358
|
+
poi.address?.postalCode ?? "",
|
|
359
|
+
]
|
|
360
|
+
.filter((part) => part !== "")
|
|
361
|
+
.join(", ") || "N/A";
|
|
362
|
+
|
|
363
|
+
return `Name: ${poi.name || poi.title || "N/A"}
|
|
364
|
+
Address: ${address}
|
|
365
|
+
Phone: ${poi.phone || "N/A"}
|
|
366
|
+
Rating: ${poi.rating?.ratingValue ?? "N/A"} (${poi.rating?.ratingCount ?? 0
|
|
367
|
+
} reviews)
|
|
368
|
+
Price Range: ${poi.priceRange || "N/A"}
|
|
369
|
+
Hours: ${(poi.openingHours || []).join(", ") || "N/A"}
|
|
370
|
+
Description: ${descData.descriptions[poi.id] || "No description available"}
|
|
371
|
+
`;
|
|
372
|
+
})
|
|
373
|
+
.join("\n---\n") || "No local results found"
|
|
374
|
+
);
|
|
375
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Generation Utility (v0.4.0 — Enhancement #4)
|
|
3
|
+
*
|
|
4
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
5
|
+
* REVIEWER NOTE: This module wraps Google's text-embedding-004 model
|
|
6
|
+
* to generate 768-dimensional vector embeddings for text.
|
|
7
|
+
*
|
|
8
|
+
* USAGE — Called in two places:
|
|
9
|
+
* 1. sessionSaveLedgerHandler — embeds summary+decisions at save time
|
|
10
|
+
* (fire-and-forget, non-blocking)
|
|
11
|
+
* 2. sessionSearchMemoryHandler — embeds the user's search query
|
|
12
|
+
* to find semantically similar past sessions
|
|
13
|
+
*
|
|
14
|
+
* WHY GEMINI: We already have @google/generative-ai as a dependency
|
|
15
|
+
* and GOOGLE_API_KEY configured for the research paper analysis tool.
|
|
16
|
+
* Using a separate embedding service (OpenAI, Cohere) would add
|
|
17
|
+
* another API key dependency and increase configuration complexity.
|
|
18
|
+
*
|
|
19
|
+
* COST: Gemini's text-embedding-004 is free tier for <1500 req/min.
|
|
20
|
+
* At typical usage (~10-50 ledger saves/day), we'll never approach
|
|
21
|
+
* this limit.
|
|
22
|
+
*
|
|
23
|
+
* TRUNCATION GUARD: text-embedding-004 has a token limit per API call
|
|
24
|
+
* (~8192 tokens ≈ ~32K characters). If the input text exceeds this,
|
|
25
|
+
* the API returns a 400 Bad Request. We implement a hard character
|
|
26
|
+
* limit (default 8000 chars) to guarantee the API call never crashes.
|
|
27
|
+
* This is applied before sending to the API, not after.
|
|
28
|
+
* ═══════════════════════════════════════════════════════════════════
|
|
29
|
+
*/
|
|
30
|
+
|
|
31
|
+
import { GoogleGenerativeAI } from "@google/generative-ai";
|
|
32
|
+
import { GOOGLE_API_KEY } from "../config.js";
|
|
33
|
+
|
|
34
|
+
// ─── Constants ────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
// REVIEWER NOTE: Maximum characters to send to the embedding API.
|
|
37
|
+
// text-embedding-004 supports ~8192 tokens. At ~4 chars per token,
|
|
38
|
+
// 8000 chars is a safe ceiling. Truncation is silent and non-fatal —
|
|
39
|
+
// the embedding still captures the semantic meaning of the first
|
|
40
|
+
// ~2000 tokens, which is more than enough for similarity search.
|
|
41
|
+
const MAX_EMBEDDING_CHARS = 8000;
|
|
42
|
+
|
|
43
|
+
// ─── Embedding Client ─────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Generates a 768-dimensional embedding vector for the given text.
|
|
47
|
+
*
|
|
48
|
+
* @param text - The text to embed (summary + decisions, search query, etc.)
|
|
49
|
+
* @returns Array of 768 floating-point numbers representing the text's
|
|
50
|
+
* semantic meaning in vector space.
|
|
51
|
+
* @throws Error if GOOGLE_API_KEY is not configured or API call fails.
|
|
52
|
+
*
|
|
53
|
+
* REVIEWER NOTE: The truncation happens BEFORE the API call, not after.
|
|
54
|
+
* If the text is longer than MAX_EMBEDDING_CHARS, we silently truncate
|
|
55
|
+
* and log a warning to stderr. This prevents 400 Bad Request errors
|
|
56
|
+
* from the Gemini API without blocking the caller.
|
|
57
|
+
*/
|
|
58
|
+
export async function generateEmbedding(text: string): Promise<number[]> {
|
|
59
|
+
if (!GOOGLE_API_KEY) {
|
|
60
|
+
throw new Error(
|
|
61
|
+
"Cannot generate embeddings: GOOGLE_API_KEY is not configured. " +
|
|
62
|
+
"Set this environment variable to enable semantic search."
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Truncation guard — prevent exceeding API token limits
|
|
67
|
+
// REVIEWER NOTE (v1.5.0 fix): JavaScript's substring() counts UTF-16
|
|
68
|
+
// code units. If the cut point lands in the middle of a surrogate pair
|
|
69
|
+
// (e.g., emoji 🚀 or complex CJK characters), the result contains an
|
|
70
|
+
// invalid trailing byte (\uFFFD) that some APIs reject with 400.
|
|
71
|
+
// Fix: truncate at the last word boundary before the limit.
|
|
72
|
+
let inputText = text;
|
|
73
|
+
if (inputText.length > MAX_EMBEDDING_CHARS) {
|
|
74
|
+
console.error(
|
|
75
|
+
`[embedding] Input text truncated from ${inputText.length} to ~${MAX_EMBEDDING_CHARS} chars (word-safe)`
|
|
76
|
+
);
|
|
77
|
+
inputText = inputText.substring(0, MAX_EMBEDDING_CHARS);
|
|
78
|
+
// Snap back to the last space to avoid splitting a word or surrogate pair
|
|
79
|
+
const lastSpace = inputText.lastIndexOf(' ');
|
|
80
|
+
if (lastSpace > 0) {
|
|
81
|
+
inputText = inputText.substring(0, lastSpace);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Skip empty or whitespace-only text
|
|
86
|
+
if (!inputText.trim()) {
|
|
87
|
+
throw new Error("Cannot generate embedding for empty text");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
const genAI = new GoogleGenerativeAI(GOOGLE_API_KEY);
|
|
91
|
+
const model = genAI.getGenerativeModel({ model: "text-embedding-004" });
|
|
92
|
+
|
|
93
|
+
console.error(`[embedding] Generating 768-dim embedding for ${inputText.length} chars`);
|
|
94
|
+
|
|
95
|
+
const result = await model.embedContent(inputText);
|
|
96
|
+
return result.embedding.values;
|
|
97
|
+
}
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* QuickJS Sandbox Executor
|
|
3
|
+
*
|
|
4
|
+
* This module runs user-provided JavaScript code in a secure, isolated
|
|
5
|
+
* environment using QuickJS (a lightweight JavaScript engine compiled to WASM).
|
|
6
|
+
*
|
|
7
|
+
* Why a sandbox?
|
|
8
|
+
* The "code mode" tools let the AI write JavaScript to extract specific
|
|
9
|
+
* fields from large API responses. Running untrusted code directly in
|
|
10
|
+
* Node.js would be a security risk. QuickJS provides:
|
|
11
|
+
* - Memory isolation (50MB limit, separate heap)
|
|
12
|
+
* - Execution timeout (10 seconds default)
|
|
13
|
+
* - No access to Node.js APIs, filesystem, or network
|
|
14
|
+
*
|
|
15
|
+
* How it works:
|
|
16
|
+
* 1. The raw data (e.g., Brave Search API response) is injected as a
|
|
17
|
+
* global variable called "DATA" (a JSON string)
|
|
18
|
+
* 2. The user's code reads DATA, parses it, extracts what it needs
|
|
19
|
+
* 3. The code calls console.log() to output the result
|
|
20
|
+
* 4. We capture the console.log output and return it
|
|
21
|
+
*
|
|
22
|
+
* Example user code:
|
|
23
|
+
* const r = JSON.parse(DATA);
|
|
24
|
+
* console.log(r.web.results.map(x => x.title).join('\\n'));
|
|
25
|
+
*
|
|
26
|
+
* Returns:
|
|
27
|
+
* { stdout: "captured output", error?: "if something went wrong", executionTimeMs: 42 }
|
|
28
|
+
*/
|
|
29
|
+
|
|
30
|
+
import { getQuickJS } from "quickjs-emscripten";
|
|
31
|
+
|
|
32
|
+
export interface SandboxResult {
|
|
33
|
+
stdout: string;
|
|
34
|
+
error?: string;
|
|
35
|
+
executionTimeMs: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Runs the given javascript `code` in a sandboxed QuickJS environment.
|
|
40
|
+
* Injects a global variable `DATA` which contains the stringified payload.
|
|
41
|
+
*/
|
|
42
|
+
export async function runInSandbox(dataStr: string, code: string, timeoutMs: number = 10000): Promise<SandboxResult> {
|
|
43
|
+
const QuickJS = await getQuickJS();
|
|
44
|
+
|
|
45
|
+
// Set memory limit to 50MB (arbitrary safe limit for extraction)
|
|
46
|
+
const vm = QuickJS.newContext();
|
|
47
|
+
vm.runtime.setMemoryLimit(50 * 1024 * 1024);
|
|
48
|
+
|
|
49
|
+
const startTime = Date.now();
|
|
50
|
+
let stdout = "";
|
|
51
|
+
|
|
52
|
+
try {
|
|
53
|
+
// Inject console.log to capture stdout
|
|
54
|
+
const logHandle = vm.newFunction("log", (...args) => {
|
|
55
|
+
const parts = args.map((arg) => vm.getString(arg));
|
|
56
|
+
stdout += parts.join(" ") + "\n";
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
const consoleHandle = vm.newObject();
|
|
60
|
+
vm.setProp(consoleHandle, "log", logHandle);
|
|
61
|
+
vm.setProp(vm.global, "console", consoleHandle);
|
|
62
|
+
consoleHandle.dispose();
|
|
63
|
+
logHandle.dispose();
|
|
64
|
+
|
|
65
|
+
// Inject the raw API response string as "DATA"
|
|
66
|
+
const dataHandle = vm.newString(dataStr);
|
|
67
|
+
vm.setProp(vm.global, "DATA", dataHandle);
|
|
68
|
+
dataHandle.dispose();
|
|
69
|
+
|
|
70
|
+
// Set execution timeout via interrupt handler periodically
|
|
71
|
+
vm.runtime.setInterruptHandler(() => {
|
|
72
|
+
if (Date.now() - startTime > timeoutMs) {
|
|
73
|
+
return true; // interrupt execution
|
|
74
|
+
}
|
|
75
|
+
return false;
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
const result = vm.evalCode(code);
|
|
79
|
+
|
|
80
|
+
if (result.error) {
|
|
81
|
+
const errorMsg = vm.dump(result.error);
|
|
82
|
+
result.error.dispose();
|
|
83
|
+
return {
|
|
84
|
+
stdout,
|
|
85
|
+
error: `Script Error: ${errorMsg}`,
|
|
86
|
+
executionTimeMs: Date.now() - startTime
|
|
87
|
+
};
|
|
88
|
+
} else {
|
|
89
|
+
result.value.dispose();
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
stdout,
|
|
94
|
+
executionTimeMs: Date.now() - startTime
|
|
95
|
+
};
|
|
96
|
+
} catch (err: any) {
|
|
97
|
+
return {
|
|
98
|
+
stdout,
|
|
99
|
+
error: `Runtime Exception: ${err.message}`,
|
|
100
|
+
executionTimeMs: Date.now() - startTime
|
|
101
|
+
};
|
|
102
|
+
} finally {
|
|
103
|
+
vm.dispose();
|
|
104
|
+
}
|
|
105
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Google Gemini AI Client
|
|
3
|
+
*
|
|
4
|
+
* This module integrates with Google's Gemini AI models for research
|
|
5
|
+
* paper analysis. It uses the @google/generative-ai SDK (Google AI Studio).
|
|
6
|
+
*
|
|
7
|
+
* Current model: gemini-2.0-flash (fast, high-quality, good for long documents)
|
|
8
|
+
*
|
|
9
|
+
* The main function (analyzePaperWithGemini) takes a paper's full text and
|
|
10
|
+
* generates a detailed analysis based on the requested type:
|
|
11
|
+
* - "summary" → research question, methodology, findings, conclusions
|
|
12
|
+
* - "critique" → methodology assessment, validity, limitations
|
|
13
|
+
* - "literature review" → how it fits in the broader research landscape
|
|
14
|
+
* - "key findings" → most significant results and implications
|
|
15
|
+
* - "comprehensive" → all of the above combined (default)
|
|
16
|
+
*
|
|
17
|
+
* Requires: GOOGLE_API_KEY environment variable
|
|
18
|
+
*
|
|
19
|
+
* Note: This module also exports an MCP client factory (createMcpClient)
|
|
20
|
+
* which can be used for testing or inter-server communication.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { GoogleGenerativeAI } from "@google/generative-ai";
|
|
24
|
+
import { Readable } from "stream";
|
|
25
|
+
import { Client } from "@modelcontextprotocol/sdk/client/index.js";
|
|
26
|
+
import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
|
|
27
|
+
|
|
28
|
+
export const googleGenAi = new GoogleGenerativeAI(process.env.GOOGLE_API_KEY!);
|
|
29
|
+
|
|
30
|
+
export function bufferToStream(buffer: Buffer): Readable {
|
|
31
|
+
const stream = new Readable();
|
|
32
|
+
stream.push(buffer);
|
|
33
|
+
stream.push(null); // Signal the end of the stream
|
|
34
|
+
return stream;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Creates an MCP client connected to the Brave Search server
|
|
39
|
+
*/
|
|
40
|
+
export async function createMcpClient() {
|
|
41
|
+
const transport = new StdioClientTransport({
|
|
42
|
+
command: "node",
|
|
43
|
+
args: ["index.js"], // Server entry point
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const client = new Client(
|
|
47
|
+
{ name: "gemini-mcp-client", version: "1.0.0" },
|
|
48
|
+
{ capabilities: { tools: {} } }
|
|
49
|
+
);
|
|
50
|
+
|
|
51
|
+
await client.connect(transport);
|
|
52
|
+
return { client, transport };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Analyzes research paper content using Google's Gemini-2.0-flash model
|
|
57
|
+
* @param paperContent - The text content of the research paper
|
|
58
|
+
* @param analysisType - The type of analysis to perform (summary, critique, etc.)
|
|
59
|
+
* @param additionalContext - Any additional context or specific questions
|
|
60
|
+
* @returns Detailed analysis of the research paper
|
|
61
|
+
*/
|
|
62
|
+
export async function analyzePaperWithGemini(
|
|
63
|
+
paperContent: string,
|
|
64
|
+
analysisType: string,
|
|
65
|
+
additionalContext?: string
|
|
66
|
+
): Promise<string> {
|
|
67
|
+
try {
|
|
68
|
+
// Initialize the Gemini Pro model
|
|
69
|
+
const model = googleGenAi.getGenerativeModel({ model: "gemini-2.0-flash" });
|
|
70
|
+
|
|
71
|
+
// Create the prompt based on analysis type
|
|
72
|
+
let prompt = `I need you to perform a detailed ${analysisType} analysis of the following research paper.\n\n`;
|
|
73
|
+
|
|
74
|
+
if (additionalContext) {
|
|
75
|
+
prompt += `Additional context: ${additionalContext}\n\n`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
prompt += `Research paper content:\n${paperContent}\n\n`;
|
|
79
|
+
|
|
80
|
+
switch (analysisType.toLowerCase()) {
|
|
81
|
+
case "summary":
|
|
82
|
+
prompt += "Provide a comprehensive summary including the research question, methodology, key findings, and conclusions.";
|
|
83
|
+
break;
|
|
84
|
+
case "critique":
|
|
85
|
+
prompt += "Provide a critical evaluation of the research methodology, validity of findings, limitations, and suggestions for improvement.";
|
|
86
|
+
break;
|
|
87
|
+
case "literature review":
|
|
88
|
+
prompt += "Analyze how this paper fits into the broader research landscape, identifying key related works and research gaps.";
|
|
89
|
+
break;
|
|
90
|
+
case "key findings":
|
|
91
|
+
prompt += "Extract and explain the most significant findings and their implications.";
|
|
92
|
+
break;
|
|
93
|
+
default:
|
|
94
|
+
prompt += "Perform a comprehensive analysis including summary, methodology assessment, key findings, limitations, and significance.";
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Generate content using Gemini
|
|
98
|
+
const result = await model.generateContent(prompt);
|
|
99
|
+
const response = await result.response;
|
|
100
|
+
const text = response.text();
|
|
101
|
+
|
|
102
|
+
return text;
|
|
103
|
+
} catch (error) {
|
|
104
|
+
console.error("Error analyzing paper with Gemini:", error);
|
|
105
|
+
throw new Error(`Failed to analyze paper: ${error instanceof Error ? error.message : String(error)}`);
|
|
106
|
+
}
|
|
107
|
+
}
|