freshcontext-mcp 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/worker/src/worker.ts +261 -83
package/package.json
CHANGED
package/worker/src/worker.ts
CHANGED
|
@@ -3,10 +3,11 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
|
3
3
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
|
|
6
|
-
// ─── Types
|
|
6
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
7
7
|
|
|
8
8
|
interface Env {
|
|
9
9
|
BROWSER: Fetcher;
|
|
10
|
+
API_KEY?: string; // Optional: set via `wrangler secret put API_KEY`
|
|
10
11
|
}
|
|
11
12
|
|
|
12
13
|
interface FreshContext {
|
|
@@ -18,9 +19,143 @@ interface FreshContext {
|
|
|
18
19
|
adapter: string;
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
// ───
|
|
22
|
+
// ─── Security ─────────────────────────────────────────────────────────────────
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
const ALLOWED_DOMAINS: Record<string, string[]> = {
|
|
25
|
+
github: ["github.com", "raw.githubusercontent.com"],
|
|
26
|
+
scholar: ["scholar.google.com"],
|
|
27
|
+
hackernews: ["news.ycombinator.com", "hn.algolia.com"],
|
|
28
|
+
yc: ["www.ycombinator.com", "ycombinator.com"],
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const PRIVATE_IP_PATTERNS = [
|
|
32
|
+
/^localhost$/i,
|
|
33
|
+
/^127\./,
|
|
34
|
+
/^10\./,
|
|
35
|
+
/^192\.168\./,
|
|
36
|
+
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
37
|
+
/^169\.254\./,
|
|
38
|
+
/^::1$/,
|
|
39
|
+
/^fc00:/i,
|
|
40
|
+
/^fe80:/i,
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const MAX_URL_LENGTH = 500;
|
|
44
|
+
const MAX_QUERY_LENGTH = 200;
|
|
45
|
+
|
|
46
|
+
class SecurityError extends Error {
|
|
47
|
+
constructor(message: string) {
|
|
48
|
+
super(message);
|
|
49
|
+
this.name = "SecurityError";
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function validateUrl(rawUrl: string, adapter: string): string {
|
|
54
|
+
if (rawUrl.length > MAX_URL_LENGTH)
|
|
55
|
+
throw new SecurityError(`URL too long (max ${MAX_URL_LENGTH} chars)`);
|
|
56
|
+
|
|
57
|
+
let parsed: URL;
|
|
58
|
+
try { parsed = new URL(rawUrl); }
|
|
59
|
+
catch { throw new SecurityError("Invalid URL format"); }
|
|
60
|
+
|
|
61
|
+
if (!["http:", "https:"].includes(parsed.protocol))
|
|
62
|
+
throw new SecurityError("Only http/https URLs are allowed");
|
|
63
|
+
|
|
64
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
65
|
+
|
|
66
|
+
for (const pattern of PRIVATE_IP_PATTERNS) {
|
|
67
|
+
if (pattern.test(hostname))
|
|
68
|
+
throw new SecurityError("Access to private/internal addresses is not allowed");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const allowed = ALLOWED_DOMAINS[adapter];
|
|
72
|
+
if (allowed && allowed.length > 0) {
|
|
73
|
+
const ok = allowed.some(d => hostname === d || hostname.endsWith(`.${d}`));
|
|
74
|
+
if (!ok)
|
|
75
|
+
throw new SecurityError(`URL not allowed for ${adapter}. Allowed domains: ${allowed.join(", ")}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return rawUrl;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function sanitizeQuery(query: string, maxLen = MAX_QUERY_LENGTH): string {
|
|
82
|
+
if (query.length > maxLen)
|
|
83
|
+
throw new SecurityError(`Query too long (max ${maxLen} chars)`);
|
|
84
|
+
// Strip null bytes and control characters
|
|
85
|
+
return query.replace(/[\x00-\x1F\x7F]/g, "").trim();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ─── Rate Limiting (in-memory, per isolate) ───────────────────────────────────
|
|
89
|
+
|
|
90
|
+
interface RateEntry { count: number; windowStart: number; }
|
|
91
|
+
const rateMap = new Map<string, RateEntry>();
|
|
92
|
+
|
|
93
|
+
const RATE_LIMIT = 20; // max requests
|
|
94
|
+
const RATE_WINDOW_MS = 60_000; // per 60 seconds
|
|
95
|
+
|
|
96
|
+
function checkRateLimit(ip: string): void {
|
|
97
|
+
const now = Date.now();
|
|
98
|
+
const entry = rateMap.get(ip);
|
|
99
|
+
|
|
100
|
+
if (!entry || now - entry.windowStart > RATE_WINDOW_MS) {
|
|
101
|
+
rateMap.set(ip, { count: 1, windowStart: now });
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (entry.count >= RATE_LIMIT) {
|
|
106
|
+
throw new SecurityError(`Rate limit exceeded. Max ${RATE_LIMIT} requests per minute.`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
entry.count++;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Prevent the map from growing unboundedly
|
|
113
|
+
function pruneRateMap(): void {
|
|
114
|
+
const now = Date.now();
|
|
115
|
+
for (const [ip, entry] of rateMap) {
|
|
116
|
+
if (now - entry.windowStart > RATE_WINDOW_MS) rateMap.delete(ip);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ─── Auth ─────────────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
function checkAuth(request: Request, env: Env): void {
|
|
123
|
+
if (!env.API_KEY) return; // Auth disabled if no key is set
|
|
124
|
+
|
|
125
|
+
const authHeader = request.headers.get("Authorization") ?? "";
|
|
126
|
+
const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
|
|
127
|
+
|
|
128
|
+
if (token !== env.API_KEY) {
|
|
129
|
+
throw new SecurityError("Unauthorized. Provide a valid Bearer token.");
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
function getClientIp(request: Request): string {
|
|
136
|
+
return (
|
|
137
|
+
request.headers.get("CF-Connecting-IP") ??
|
|
138
|
+
request.headers.get("X-Forwarded-For")?.split(",")[0]?.trim() ??
|
|
139
|
+
"unknown"
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function securityErrorResponse(message: string, status: number): Response {
|
|
144
|
+
return new Response(JSON.stringify({ error: message }), {
|
|
145
|
+
status,
|
|
146
|
+
headers: { "Content-Type": "application/json" },
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ─── Freshness Stamp ──────────────────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
function stamp(
|
|
153
|
+
content: string,
|
|
154
|
+
url: string,
|
|
155
|
+
date: string | null,
|
|
156
|
+
confidence: "high" | "medium" | "low",
|
|
157
|
+
adapter: string
|
|
158
|
+
): string {
|
|
24
159
|
const ctx: FreshContext = {
|
|
25
160
|
content: content.slice(0, 6000),
|
|
26
161
|
source_url: url,
|
|
@@ -44,107 +179,133 @@ function stamp(content: string, url: string, date: string | null, confidence: "h
|
|
|
44
179
|
// ─── Server Factory ───────────────────────────────────────────────────────────
|
|
45
180
|
|
|
46
181
|
function createServer(env: Env): McpServer {
|
|
47
|
-
const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.
|
|
182
|
+
const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.3" });
|
|
48
183
|
|
|
49
184
|
// ── extract_github ──────────────────────────────────────────────────────────
|
|
50
185
|
server.registerTool("extract_github", {
|
|
51
186
|
description: "Extract real-time data from a GitHub repository — README, stars, forks, last commit, topics. Returns timestamped freshcontext.",
|
|
52
187
|
inputSchema: z.object({
|
|
53
|
-
url: z.string().url().describe("Full GitHub repo URL"),
|
|
188
|
+
url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
|
|
54
189
|
}),
|
|
55
190
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
56
191
|
}, async ({ url }) => {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
192
|
+
try {
|
|
193
|
+
const safeUrl = validateUrl(url, "github");
|
|
194
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
195
|
+
const page = await browser.newPage();
|
|
196
|
+
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
|
|
197
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
198
|
+
|
|
199
|
+
const data = await page.evaluate(`(function() {
|
|
200
|
+
var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
|
|
201
|
+
var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
|
|
202
|
+
var stars = starsEl ? starsEl.textContent.trim() : null;
|
|
203
|
+
var forksEl = document.querySelector('[id="repo-network-counter"]');
|
|
204
|
+
var forks = forksEl ? forksEl.textContent.trim() : null;
|
|
205
|
+
var commitEl = document.querySelector('relative-time');
|
|
206
|
+
var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
|
|
207
|
+
var descEl = document.querySelector('.f4.my-3');
|
|
208
|
+
var description = descEl ? descEl.textContent.trim() : null;
|
|
209
|
+
var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
|
|
210
|
+
var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
|
|
211
|
+
var language = langEl ? langEl.textContent.trim() : null;
|
|
212
|
+
return { readme, stars, forks, lastCommit, description, topics, language };
|
|
213
|
+
})()`);
|
|
214
|
+
|
|
215
|
+
await browser.close();
|
|
216
|
+
const d = data as any;
|
|
217
|
+
const raw = [
|
|
218
|
+
`Description: ${d.description ?? "N/A"}`,
|
|
219
|
+
`Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`,
|
|
220
|
+
`Language: ${d.language ?? "N/A"}`,
|
|
221
|
+
`Last commit: ${d.lastCommit ?? "N/A"}`,
|
|
222
|
+
`Topics: ${d.topics?.join(", ") ?? "none"}`,
|
|
223
|
+
`\n--- README ---\n${d.readme ?? "No README"}`,
|
|
224
|
+
].join("\n");
|
|
225
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
|
|
226
|
+
} catch (err: any) {
|
|
227
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
228
|
+
}
|
|
82
229
|
});
|
|
83
230
|
|
|
84
231
|
// ── extract_hackernews ──────────────────────────────────────────────────────
|
|
85
232
|
server.registerTool("extract_hackernews", {
|
|
86
|
-
description: "Extract top stories from Hacker News with real-time timestamps.",
|
|
87
|
-
inputSchema: z.object({ url: z.string().url().describe("HN URL") }),
|
|
233
|
+
description: "Extract top stories or search results from Hacker News with real-time timestamps.",
|
|
234
|
+
inputSchema: z.object({ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com") }),
|
|
88
235
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
89
236
|
}, async ({ url }) => {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
var
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
237
|
+
try {
|
|
238
|
+
const safeUrl = validateUrl(url, "hackernews");
|
|
239
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
240
|
+
const page = await browser.newPage();
|
|
241
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
242
|
+
|
|
243
|
+
const data = await page.evaluate(`(function() {
|
|
244
|
+
var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
|
|
245
|
+
return items.map(function(el) {
|
|
246
|
+
var titleLineEl = el.querySelector('.titleline > a');
|
|
247
|
+
var title = titleLineEl ? titleLineEl.textContent.trim() : null;
|
|
248
|
+
var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
|
|
249
|
+
var subtext = el.nextElementSibling;
|
|
250
|
+
var scoreEl = subtext ? subtext.querySelector('.score') : null;
|
|
251
|
+
var score = scoreEl ? scoreEl.textContent.trim() : null;
|
|
252
|
+
var ageEl = subtext ? subtext.querySelector('.age') : null;
|
|
253
|
+
var age = ageEl ? ageEl.getAttribute('title') : null;
|
|
254
|
+
return { title, link, score, age };
|
|
255
|
+
});
|
|
256
|
+
})()`);
|
|
257
|
+
|
|
258
|
+
await browser.close();
|
|
259
|
+
const items = data as any[];
|
|
260
|
+
const raw = items.map((r, i) =>
|
|
261
|
+
`[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`
|
|
262
|
+
).join("\n\n");
|
|
263
|
+
const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
|
|
264
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, newest, newest ? "high" : "medium", "hackernews") }] };
|
|
265
|
+
} catch (err: any) {
|
|
266
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
267
|
+
}
|
|
114
268
|
});
|
|
115
269
|
|
|
116
270
|
// ── extract_scholar ─────────────────────────────────────────────────────────
|
|
117
271
|
server.registerTool("extract_scholar", {
|
|
118
272
|
description: "Extract research results from Google Scholar with publication dates.",
|
|
119
|
-
inputSchema: z.object({ url: z.string().url().describe("Google Scholar URL") }),
|
|
273
|
+
inputSchema: z.object({ url: z.string().url().describe("Google Scholar search URL") }),
|
|
120
274
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
121
275
|
}, async ({ url }) => {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
var
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
276
|
+
try {
|
|
277
|
+
const safeUrl = validateUrl(url, "scholar");
|
|
278
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
279
|
+
const page = await browser.newPage();
|
|
280
|
+
await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
|
|
281
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
282
|
+
|
|
283
|
+
const data = await page.evaluate(`(function() {
|
|
284
|
+
var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
|
|
285
|
+
return items.map(function(el) {
|
|
286
|
+
var titleEl = el.querySelector('.gs_rt');
|
|
287
|
+
var title = titleEl ? titleEl.textContent.trim() : null;
|
|
288
|
+
var authorsEl = el.querySelector('.gs_a');
|
|
289
|
+
var authors = authorsEl ? authorsEl.textContent.trim() : null;
|
|
290
|
+
var snippetEl = el.querySelector('.gs_rs');
|
|
291
|
+
var snippet = snippetEl ? snippetEl.textContent.trim() : null;
|
|
292
|
+
var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
|
|
293
|
+
var year = yearMatch ? yearMatch[0] : null;
|
|
294
|
+
return { title, authors, snippet, year };
|
|
295
|
+
});
|
|
296
|
+
})()`);
|
|
297
|
+
|
|
298
|
+
await browser.close();
|
|
299
|
+
const items = data as any[];
|
|
300
|
+
const raw = items.map((r, i) =>
|
|
301
|
+
`[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`
|
|
302
|
+
).join("\n\n");
|
|
303
|
+
const years = items.map(r => r.year).filter(Boolean).sort().reverse();
|
|
304
|
+
const newest = years[0] ?? null;
|
|
305
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
|
|
306
|
+
} catch (err: any) {
|
|
307
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
308
|
+
}
|
|
148
309
|
});
|
|
149
310
|
|
|
150
311
|
return server;
|
|
@@ -154,6 +315,23 @@ function createServer(env: Env): McpServer {
|
|
|
154
315
|
|
|
155
316
|
export default {
|
|
156
317
|
async fetch(request: Request, env: Env): Promise<Response> {
|
|
318
|
+
// Prune stale rate limit entries occasionally
|
|
319
|
+
if (Math.random() < 0.05) pruneRateMap();
|
|
320
|
+
|
|
321
|
+
try {
|
|
322
|
+
// 1. Auth check
|
|
323
|
+
checkAuth(request, env);
|
|
324
|
+
|
|
325
|
+
// 2. Rate limit check
|
|
326
|
+
const ip = getClientIp(request);
|
|
327
|
+
checkRateLimit(ip);
|
|
328
|
+
|
|
329
|
+
} catch (err: any) {
|
|
330
|
+
const status = err.message.startsWith("Unauthorized") ? 401 : 429;
|
|
331
|
+
return securityErrorResponse(err.message, status);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// 3. Handle MCP request
|
|
157
335
|
const transport = new WebStandardStreamableHTTPServerTransport();
|
|
158
336
|
const server = createServer(env);
|
|
159
337
|
await server.connect(transport);
|