freshcontext-mcp 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/github.js +3 -0
- package/dist/adapters/hackernews.js +3 -1
- package/dist/adapters/packageTrends.js +3 -3
- package/dist/adapters/repoSearch.js +4 -3
- package/dist/adapters/scholar.js +3 -0
- package/dist/adapters/yc.js +3 -0
- package/dist/security.js +117 -0
- package/dist/server.js +49 -18
- package/package.json +1 -1
- package/src/adapters/github.ts +4 -0
- package/src/adapters/hackernews.ts +3 -1
- package/src/adapters/packageTrends.ts +3 -3
- package/src/adapters/repoSearch.ts +4 -3
- package/src/adapters/scholar.ts +4 -0
- package/src/adapters/yc.ts +4 -0
- package/src/security.ts +161 -0
- package/src/server.ts +43 -18
- package/worker/src/worker.ts +261 -83
package/dist/adapters/github.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
|
+
import { validateUrl } from "../security.js";
|
|
2
3
|
export async function githubAdapter(options) {
|
|
4
|
+
const safeUrl = validateUrl(options.url, "github");
|
|
5
|
+
options = { ...options, url: safeUrl };
|
|
3
6
|
const browser = await chromium.launch({ headless: true });
|
|
4
7
|
const page = await browser.newPage();
|
|
5
8
|
// Spoof a real browser UA to avoid bot detection
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
|
+
import { validateUrl } from "../security.js";
|
|
2
3
|
export async function hackerNewsAdapter(options) {
|
|
3
|
-
//
|
|
4
|
+
// Validate URL — allow both HN and Algolia domains
|
|
5
|
+
validateUrl(options.url, "hackernews");
|
|
4
6
|
const url = options.url;
|
|
5
7
|
if (url.includes("hn.algolia.com/api/") || url.startsWith("hn-search:")) {
|
|
6
8
|
const query = url.startsWith("hn-search:")
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
+
import { sanitizePackages } from "../security.js";
|
|
1
2
|
// Uses npm registry API + PyPI JSON API (no auth needed)
|
|
2
3
|
export async function packageTrendsAdapter(options) {
|
|
3
|
-
//
|
|
4
|
-
|
|
5
|
-
const raw_input = options.url.replace(/^https?:\/\//, "").trim();
|
|
4
|
+
// Sanitize package input
|
|
5
|
+
const raw_input = sanitizePackages(options.url.replace(/^https?:\/\//, "").trim());
|
|
6
6
|
// Parse ecosystem prefix
|
|
7
7
|
const parts = raw_input.split(",").map((s) => s.trim());
|
|
8
8
|
const results = [];
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
+
import { sanitizeQuery } from "../security.js";
|
|
1
2
|
// Uses GitHub Search API (no auth needed for basic search)
|
|
2
3
|
export async function repoSearchAdapter(options) {
|
|
3
|
-
//
|
|
4
|
-
|
|
5
|
-
let query =
|
|
4
|
+
// Sanitize query input
|
|
5
|
+
const query_input = sanitizeQuery(options.url);
|
|
6
|
+
let query = query_input;
|
|
6
7
|
// If it's a full URL, extract the query param
|
|
7
8
|
try {
|
|
8
9
|
const parsed = new URL(options.url);
|
package/dist/adapters/scholar.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
|
+
import { validateUrl } from "../security.js";
|
|
2
3
|
export async function scholarAdapter(options) {
|
|
4
|
+
const safeUrl = validateUrl(options.url, "scholar");
|
|
5
|
+
options = { ...options, url: safeUrl };
|
|
3
6
|
const browser = await chromium.launch({ headless: true });
|
|
4
7
|
const page = await browser.newPage();
|
|
5
8
|
await page.setExtraHTTPHeaders({
|
package/dist/adapters/yc.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
|
+
import { validateUrl } from "../security.js";
|
|
2
3
|
export async function ycAdapter(options) {
|
|
4
|
+
const safeUrl = validateUrl(options.url, "yc");
|
|
5
|
+
options = { ...options, url: safeUrl };
|
|
3
6
|
const browser = await chromium.launch({ headless: true });
|
|
4
7
|
const page = await browser.newPage();
|
|
5
8
|
// YC company directory is React-rendered — wait for network to settle
|
package/dist/security.js
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* freshcontext-mcp security module
|
|
3
|
+
* Input sanitization, domain allowlists, and request validation
|
|
4
|
+
*/
|
|
5
|
+
// ─── Allowed domains per adapter ────────────────────────────────────────────
|
|
6
|
+
export const ALLOWED_DOMAINS = {
|
|
7
|
+
github: ["github.com", "raw.githubusercontent.com"],
|
|
8
|
+
scholar: ["scholar.google.com"],
|
|
9
|
+
hackernews: ["news.ycombinator.com", "hn.algolia.com"],
|
|
10
|
+
yc: ["www.ycombinator.com", "ycombinator.com"],
|
|
11
|
+
repoSearch: [], // uses GitHub API directly, no browser
|
|
12
|
+
packageTrends: [], // uses npm/PyPI APIs directly, no browser
|
|
13
|
+
};
|
|
14
|
+
// ─── Blocked IP ranges and internal hostnames ────────────────────────────────
|
|
15
|
+
const BLOCKED_PATTERNS = [
|
|
16
|
+
/^localhost$/i,
|
|
17
|
+
/^127\.\d+\.\d+\.\d+$/,
|
|
18
|
+
/^10\.\d+\.\d+\.\d+$/,
|
|
19
|
+
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
|
|
20
|
+
/^192\.168\.\d+\.\d+$/,
|
|
21
|
+
/^169\.254\.\d+\.\d+$/, // AWS metadata
|
|
22
|
+
/^0\.0\.0\.0$/,
|
|
23
|
+
/^::1$/,
|
|
24
|
+
/^fc00:/i,
|
|
25
|
+
/^fe80:/i,
|
|
26
|
+
];
|
|
27
|
+
// ─── Max length limits ────────────────────────────────────────────────────────
|
|
28
|
+
export const MAX_URL_LENGTH = 500;
|
|
29
|
+
export const MAX_QUERY_LENGTH = 200;
|
|
30
|
+
export const MAX_PACKAGES_LENGTH = 300;
|
|
31
|
+
// ─── Validation errors ───────────────────────────────────────────────────────
|
|
32
|
+
export class SecurityError extends Error {
|
|
33
|
+
constructor(message) {
|
|
34
|
+
super(message);
|
|
35
|
+
this.name = "SecurityError";
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// ─── URL validator ───────────────────────────────────────────────────────────
|
|
39
|
+
export function validateUrl(rawUrl, adapterName) {
|
|
40
|
+
// Length check
|
|
41
|
+
if (!rawUrl || rawUrl.trim().length === 0) {
|
|
42
|
+
throw new SecurityError("URL cannot be empty");
|
|
43
|
+
}
|
|
44
|
+
if (rawUrl.length > MAX_URL_LENGTH) {
|
|
45
|
+
throw new SecurityError(`URL exceeds maximum length of ${MAX_URL_LENGTH} characters`);
|
|
46
|
+
}
|
|
47
|
+
// Must be a valid URL
|
|
48
|
+
let parsed;
|
|
49
|
+
try {
|
|
50
|
+
parsed = new URL(rawUrl.trim());
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
throw new SecurityError(`Invalid URL format: ${rawUrl}`);
|
|
54
|
+
}
|
|
55
|
+
// Must use http or https
|
|
56
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
57
|
+
throw new SecurityError(`Protocol not allowed: ${parsed.protocol}. Only http/https permitted.`);
|
|
58
|
+
}
|
|
59
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
60
|
+
// Block internal/private IPs and hostnames
|
|
61
|
+
for (const pattern of BLOCKED_PATTERNS) {
|
|
62
|
+
if (pattern.test(hostname)) {
|
|
63
|
+
throw new SecurityError(`Access to internal/private addresses is not permitted: ${hostname}`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
// Domain allowlist check (skip if allowlist is empty — means no browser used)
|
|
67
|
+
const allowedDomains = ALLOWED_DOMAINS[adapterName];
|
|
68
|
+
if (allowedDomains && allowedDomains.length > 0) {
|
|
69
|
+
const isAllowed = allowedDomains.some((domain) => hostname === domain || hostname.endsWith(`.${domain}`));
|
|
70
|
+
if (!isAllowed) {
|
|
71
|
+
throw new SecurityError(`Domain not allowed for ${adapterName} adapter: ${hostname}. ` +
|
|
72
|
+
`Allowed domains: ${allowedDomains.join(", ")}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return parsed.toString();
|
|
76
|
+
}
|
|
77
|
+
// ─── Query string sanitizer ──────────────────────────────────────────────────
|
|
78
|
+
export function sanitizeQuery(query, maxLength = MAX_QUERY_LENGTH) {
|
|
79
|
+
if (!query || query.trim().length === 0) {
|
|
80
|
+
throw new SecurityError("Query cannot be empty");
|
|
81
|
+
}
|
|
82
|
+
const trimmed = query.trim().slice(0, maxLength);
|
|
83
|
+
// Strip null bytes and control characters
|
|
84
|
+
const cleaned = trimmed.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
|
|
85
|
+
if (cleaned.length === 0) {
|
|
86
|
+
throw new SecurityError("Query contains no valid characters after sanitization");
|
|
87
|
+
}
|
|
88
|
+
return cleaned;
|
|
89
|
+
}
|
|
90
|
+
// ─── Package name sanitizer ──────────────────────────────────────────────────
|
|
91
|
+
export function sanitizePackages(input) {
|
|
92
|
+
if (!input || input.trim().length === 0) {
|
|
93
|
+
throw new SecurityError("Package name cannot be empty");
|
|
94
|
+
}
|
|
95
|
+
if (input.length > MAX_PACKAGES_LENGTH) {
|
|
96
|
+
throw new SecurityError(`Package input exceeds maximum length of ${MAX_PACKAGES_LENGTH} characters`);
|
|
97
|
+
}
|
|
98
|
+
// Only allow valid npm/PyPI package name characters, commas, colons (for npm:/pypi: prefix)
|
|
99
|
+
const cleaned = input
|
|
100
|
+
.trim()
|
|
101
|
+
.replace(/[^a-zA-Z0-9@/._\-,:]/g, "")
|
|
102
|
+
.slice(0, MAX_PACKAGES_LENGTH);
|
|
103
|
+
if (cleaned.length === 0) {
|
|
104
|
+
throw new SecurityError("Package name contains no valid characters after sanitization");
|
|
105
|
+
}
|
|
106
|
+
return cleaned;
|
|
107
|
+
}
|
|
108
|
+
// ─── Error formatter ─────────────────────────────────────────────────────────
|
|
109
|
+
export function formatSecurityError(err) {
|
|
110
|
+
if (err instanceof SecurityError) {
|
|
111
|
+
return `[Security] ${err.message}`;
|
|
112
|
+
}
|
|
113
|
+
if (err instanceof Error) {
|
|
114
|
+
return `[Error] ${err.message}`;
|
|
115
|
+
}
|
|
116
|
+
return "[Error] Unknown error occurred";
|
|
117
|
+
}
|
package/dist/server.js
CHANGED
|
@@ -8,6 +8,7 @@ import { ycAdapter } from "./adapters/yc.js";
|
|
|
8
8
|
import { repoSearchAdapter } from "./adapters/repoSearch.js";
|
|
9
9
|
import { packageTrendsAdapter } from "./adapters/packageTrends.js";
|
|
10
10
|
import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
|
|
11
|
+
import { formatSecurityError } from "./security.js";
|
|
11
12
|
const server = new McpServer({
|
|
12
13
|
name: "freshcontext-mcp",
|
|
13
14
|
version: "0.1.0",
|
|
@@ -21,9 +22,14 @@ server.registerTool("extract_github", {
|
|
|
21
22
|
}),
|
|
22
23
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
23
24
|
}, async ({ url, max_length }) => {
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
try {
|
|
26
|
+
const result = await githubAdapter({ url, maxLength: max_length });
|
|
27
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
|
|
28
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
29
|
+
}
|
|
30
|
+
catch (err) {
|
|
31
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
32
|
+
}
|
|
27
33
|
});
|
|
28
34
|
// ─── Tool: extract_scholar ───────────────────────────────────────────────────
|
|
29
35
|
server.registerTool("extract_scholar", {
|
|
@@ -34,9 +40,14 @@ server.registerTool("extract_scholar", {
|
|
|
34
40
|
}),
|
|
35
41
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
36
42
|
}, async ({ url, max_length }) => {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
43
|
+
try {
|
|
44
|
+
const result = await scholarAdapter({ url, maxLength: max_length });
|
|
45
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
|
|
46
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
47
|
+
}
|
|
48
|
+
catch (err) {
|
|
49
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
50
|
+
}
|
|
40
51
|
});
|
|
41
52
|
// ─── Tool: extract_hackernews ────────────────────────────────────────────────
|
|
42
53
|
server.registerTool("extract_hackernews", {
|
|
@@ -47,9 +58,14 @@ server.registerTool("extract_hackernews", {
|
|
|
47
58
|
}),
|
|
48
59
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
49
60
|
}, async ({ url, max_length }) => {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
61
|
+
try {
|
|
62
|
+
const result = await hackerNewsAdapter({ url, maxLength: max_length });
|
|
63
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
|
|
64
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
65
|
+
}
|
|
66
|
+
catch (err) {
|
|
67
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
68
|
+
}
|
|
53
69
|
});
|
|
54
70
|
// ─── Tool: extract_yc ──────────────────────────────────────────────────────────
|
|
55
71
|
server.registerTool("extract_yc", {
|
|
@@ -60,9 +76,14 @@ server.registerTool("extract_yc", {
|
|
|
60
76
|
}),
|
|
61
77
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
62
78
|
}, async ({ url, max_length }) => {
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
79
|
+
try {
|
|
80
|
+
const result = await ycAdapter({ url, maxLength: max_length });
|
|
81
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
|
|
82
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
83
|
+
}
|
|
84
|
+
catch (err) {
|
|
85
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
86
|
+
}
|
|
66
87
|
});
|
|
67
88
|
// ─── Tool: search_repos ──────────────────────────────────────────────────────
|
|
68
89
|
server.registerTool("search_repos", {
|
|
@@ -73,9 +94,14 @@ server.registerTool("search_repos", {
|
|
|
73
94
|
}),
|
|
74
95
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
75
96
|
}, async ({ query, max_length }) => {
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
97
|
+
try {
|
|
98
|
+
const result = await repoSearchAdapter({ url: query, maxLength: max_length });
|
|
99
|
+
const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
|
|
100
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
101
|
+
}
|
|
102
|
+
catch (err) {
|
|
103
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
104
|
+
}
|
|
79
105
|
});
|
|
80
106
|
// ─── Tool: package_trends ────────────────────────────────────────────────────
|
|
81
107
|
server.registerTool("package_trends", {
|
|
@@ -86,9 +112,14 @@ server.registerTool("package_trends", {
|
|
|
86
112
|
}),
|
|
87
113
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
88
114
|
}, async ({ packages, max_length }) => {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
115
|
+
try {
|
|
116
|
+
const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
|
|
117
|
+
const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
|
|
118
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
119
|
+
}
|
|
120
|
+
catch (err) {
|
|
121
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
122
|
+
}
|
|
92
123
|
});
|
|
93
124
|
// ─── Tool: extract_landscape ─────────────────────────────────────────────────
|
|
94
125
|
server.registerTool("extract_landscape", {
|
package/package.json
CHANGED
package/src/adapters/github.ts
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
2
|
import { AdapterResult, ExtractOptions } from "../types.js";
|
|
3
|
+
import { validateUrl } from "../security.js";
|
|
3
4
|
|
|
4
5
|
export async function githubAdapter(options: ExtractOptions): Promise<AdapterResult> {
|
|
6
|
+
const safeUrl = validateUrl(options.url, "github");
|
|
7
|
+
options = { ...options, url: safeUrl };
|
|
8
|
+
|
|
5
9
|
const browser = await chromium.launch({ headless: true });
|
|
6
10
|
const page = await browser.newPage();
|
|
7
11
|
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
2
|
import { AdapterResult, ExtractOptions } from "../types.js";
|
|
3
|
+
import { validateUrl } from "../security.js";
|
|
3
4
|
|
|
4
5
|
export async function hackerNewsAdapter(options: ExtractOptions): Promise<AdapterResult> {
|
|
5
|
-
//
|
|
6
|
+
// Validate URL — allow both HN and Algolia domains
|
|
7
|
+
validateUrl(options.url, "hackernews");
|
|
6
8
|
const url = options.url;
|
|
7
9
|
|
|
8
10
|
if (url.includes("hn.algolia.com/api/") || url.startsWith("hn-search:")) {
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import { AdapterResult, ExtractOptions } from "../types.js";
|
|
2
|
+
import { sanitizePackages } from "../security.js";
|
|
2
3
|
|
|
3
4
|
// Uses npm registry API + PyPI JSON API (no auth needed)
|
|
4
5
|
export async function packageTrendsAdapter(options: ExtractOptions): Promise<AdapterResult> {
|
|
5
|
-
//
|
|
6
|
-
|
|
7
|
-
const raw_input = options.url.replace(/^https?:\/\//, "").trim();
|
|
6
|
+
// Sanitize package input
|
|
7
|
+
const raw_input = sanitizePackages(options.url.replace(/^https?:\/\//, "").trim());
|
|
8
8
|
|
|
9
9
|
// Parse ecosystem prefix
|
|
10
10
|
const parts = raw_input.split(",").map((s) => s.trim());
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
import { AdapterResult, ExtractOptions } from "../types.js";
|
|
2
|
+
import { sanitizeQuery } from "../security.js";
|
|
2
3
|
|
|
3
4
|
// Uses GitHub Search API (no auth needed for basic search)
|
|
4
5
|
export async function repoSearchAdapter(options: ExtractOptions): Promise<AdapterResult> {
|
|
5
|
-
//
|
|
6
|
-
|
|
7
|
-
let query =
|
|
6
|
+
// Sanitize query input
|
|
7
|
+
const query_input = sanitizeQuery(options.url);
|
|
8
|
+
let query = query_input;
|
|
8
9
|
|
|
9
10
|
// If it's a full URL, extract the query param
|
|
10
11
|
try {
|
package/src/adapters/scholar.ts
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
2
|
import { AdapterResult, ExtractOptions } from "../types.js";
|
|
3
|
+
import { validateUrl } from "../security.js";
|
|
3
4
|
|
|
4
5
|
export async function scholarAdapter(options: ExtractOptions): Promise<AdapterResult> {
|
|
6
|
+
const safeUrl = validateUrl(options.url, "scholar");
|
|
7
|
+
options = { ...options, url: safeUrl };
|
|
8
|
+
|
|
5
9
|
const browser = await chromium.launch({ headless: true });
|
|
6
10
|
const page = await browser.newPage();
|
|
7
11
|
|
package/src/adapters/yc.ts
CHANGED
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { chromium } from "playwright";
|
|
2
2
|
import { AdapterResult, ExtractOptions } from "../types.js";
|
|
3
|
+
import { validateUrl } from "../security.js";
|
|
3
4
|
|
|
4
5
|
export async function ycAdapter(options: ExtractOptions): Promise<AdapterResult> {
|
|
6
|
+
const safeUrl = validateUrl(options.url, "yc");
|
|
7
|
+
options = { ...options, url: safeUrl };
|
|
8
|
+
|
|
5
9
|
const browser = await chromium.launch({ headless: true });
|
|
6
10
|
const page = await browser.newPage();
|
|
7
11
|
|
package/src/security.ts
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* freshcontext-mcp security module
|
|
3
|
+
* Input sanitization, domain allowlists, and request validation
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// ─── Allowed domains per adapter ────────────────────────────────────────────
|
|
7
|
+
|
|
8
|
+
export const ALLOWED_DOMAINS: Record<string, string[]> = {
|
|
9
|
+
github: ["github.com", "raw.githubusercontent.com"],
|
|
10
|
+
scholar: ["scholar.google.com"],
|
|
11
|
+
hackernews: ["news.ycombinator.com", "hn.algolia.com"],
|
|
12
|
+
yc: ["www.ycombinator.com", "ycombinator.com"],
|
|
13
|
+
repoSearch: [], // uses GitHub API directly, no browser
|
|
14
|
+
packageTrends: [], // uses npm/PyPI APIs directly, no browser
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
// ─── Blocked IP ranges and internal hostnames ────────────────────────────────
|
|
18
|
+
|
|
19
|
+
const BLOCKED_PATTERNS = [
|
|
20
|
+
/^localhost$/i,
|
|
21
|
+
/^127\.\d+\.\d+\.\d+$/,
|
|
22
|
+
/^10\.\d+\.\d+\.\d+$/,
|
|
23
|
+
/^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
|
|
24
|
+
/^192\.168\.\d+\.\d+$/,
|
|
25
|
+
/^169\.254\.\d+\.\d+$/, // AWS metadata
|
|
26
|
+
/^0\.0\.0\.0$/,
|
|
27
|
+
/^::1$/,
|
|
28
|
+
/^fc00:/i,
|
|
29
|
+
/^fe80:/i,
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
// ─── Max length limits ────────────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
export const MAX_URL_LENGTH = 500;
|
|
35
|
+
export const MAX_QUERY_LENGTH = 200;
|
|
36
|
+
export const MAX_PACKAGES_LENGTH = 300;
|
|
37
|
+
|
|
38
|
+
// ─── Validation errors ───────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
export class SecurityError extends Error {
|
|
41
|
+
constructor(message: string) {
|
|
42
|
+
super(message);
|
|
43
|
+
this.name = "SecurityError";
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ─── URL validator ───────────────────────────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
export function validateUrl(
|
|
50
|
+
rawUrl: string,
|
|
51
|
+
adapterName: keyof typeof ALLOWED_DOMAINS
|
|
52
|
+
): string {
|
|
53
|
+
// Length check
|
|
54
|
+
if (!rawUrl || rawUrl.trim().length === 0) {
|
|
55
|
+
throw new SecurityError("URL cannot be empty");
|
|
56
|
+
}
|
|
57
|
+
if (rawUrl.length > MAX_URL_LENGTH) {
|
|
58
|
+
throw new SecurityError(
|
|
59
|
+
`URL exceeds maximum length of ${MAX_URL_LENGTH} characters`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Must be a valid URL
|
|
64
|
+
let parsed: URL;
|
|
65
|
+
try {
|
|
66
|
+
parsed = new URL(rawUrl.trim());
|
|
67
|
+
} catch {
|
|
68
|
+
throw new SecurityError(`Invalid URL format: ${rawUrl}`);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Must use http or https
|
|
72
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
73
|
+
throw new SecurityError(
|
|
74
|
+
`Protocol not allowed: ${parsed.protocol}. Only http/https permitted.`
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
79
|
+
|
|
80
|
+
// Block internal/private IPs and hostnames
|
|
81
|
+
for (const pattern of BLOCKED_PATTERNS) {
|
|
82
|
+
if (pattern.test(hostname)) {
|
|
83
|
+
throw new SecurityError(
|
|
84
|
+
`Access to internal/private addresses is not permitted: ${hostname}`
|
|
85
|
+
);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Domain allowlist check (skip if allowlist is empty — means no browser used)
|
|
90
|
+
const allowedDomains = ALLOWED_DOMAINS[adapterName];
|
|
91
|
+
if (allowedDomains && allowedDomains.length > 0) {
|
|
92
|
+
const isAllowed = allowedDomains.some(
|
|
93
|
+
(domain) => hostname === domain || hostname.endsWith(`.${domain}`)
|
|
94
|
+
);
|
|
95
|
+
if (!isAllowed) {
|
|
96
|
+
throw new SecurityError(
|
|
97
|
+
`Domain not allowed for ${adapterName} adapter: ${hostname}. ` +
|
|
98
|
+
`Allowed domains: ${allowedDomains.join(", ")}`
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return parsed.toString();
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// ─── Query string sanitizer ──────────────────────────────────────────────────
|
|
107
|
+
|
|
108
|
+
export function sanitizeQuery(query: string, maxLength = MAX_QUERY_LENGTH): string {
|
|
109
|
+
if (!query || query.trim().length === 0) {
|
|
110
|
+
throw new SecurityError("Query cannot be empty");
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const trimmed = query.trim().slice(0, maxLength);
|
|
114
|
+
|
|
115
|
+
// Strip null bytes and control characters
|
|
116
|
+
const cleaned = trimmed.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
|
|
117
|
+
|
|
118
|
+
if (cleaned.length === 0) {
|
|
119
|
+
throw new SecurityError("Query contains no valid characters after sanitization");
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return cleaned;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ─── Package name sanitizer ──────────────────────────────────────────────────
|
|
126
|
+
|
|
127
|
+
export function sanitizePackages(input: string): string {
|
|
128
|
+
if (!input || input.trim().length === 0) {
|
|
129
|
+
throw new SecurityError("Package name cannot be empty");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
if (input.length > MAX_PACKAGES_LENGTH) {
|
|
133
|
+
throw new SecurityError(
|
|
134
|
+
`Package input exceeds maximum length of ${MAX_PACKAGES_LENGTH} characters`
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Only allow valid npm/PyPI package name characters, commas, colons (for npm:/pypi: prefix)
|
|
139
|
+
const cleaned = input
|
|
140
|
+
.trim()
|
|
141
|
+
.replace(/[^a-zA-Z0-9@/._\-,:]/g, "")
|
|
142
|
+
.slice(0, MAX_PACKAGES_LENGTH);
|
|
143
|
+
|
|
144
|
+
if (cleaned.length === 0) {
|
|
145
|
+
throw new SecurityError("Package name contains no valid characters after sanitization");
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return cleaned;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// ─── Error formatter ─────────────────────────────────────────────────────────
|
|
152
|
+
|
|
153
|
+
export function formatSecurityError(err: unknown): string {
|
|
154
|
+
if (err instanceof SecurityError) {
|
|
155
|
+
return `[Security] ${err.message}`;
|
|
156
|
+
}
|
|
157
|
+
if (err instanceof Error) {
|
|
158
|
+
return `[Error] ${err.message}`;
|
|
159
|
+
}
|
|
160
|
+
return "[Error] Unknown error occurred";
|
|
161
|
+
}
|
package/src/server.ts
CHANGED
|
@@ -8,6 +8,7 @@ import { ycAdapter } from "./adapters/yc.js";
|
|
|
8
8
|
import { repoSearchAdapter } from "./adapters/repoSearch.js";
|
|
9
9
|
import { packageTrendsAdapter } from "./adapters/packageTrends.js";
|
|
10
10
|
import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
|
|
11
|
+
import { SecurityError, formatSecurityError } from "./security.js";
|
|
11
12
|
|
|
12
13
|
const server = new McpServer({
|
|
13
14
|
name: "freshcontext-mcp",
|
|
@@ -27,9 +28,13 @@ server.registerTool(
|
|
|
27
28
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
28
29
|
},
|
|
29
30
|
async ({ url, max_length }) => {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
try {
|
|
32
|
+
const result = await githubAdapter({ url, maxLength: max_length });
|
|
33
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
|
|
34
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
35
|
+
} catch (err) {
|
|
36
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
37
|
+
}
|
|
33
38
|
}
|
|
34
39
|
);
|
|
35
40
|
|
|
@@ -46,9 +51,13 @@ server.registerTool(
|
|
|
46
51
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
47
52
|
},
|
|
48
53
|
async ({ url, max_length }) => {
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
54
|
+
try {
|
|
55
|
+
const result = await scholarAdapter({ url, maxLength: max_length });
|
|
56
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
|
|
57
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
58
|
+
} catch (err) {
|
|
59
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
60
|
+
}
|
|
52
61
|
}
|
|
53
62
|
);
|
|
54
63
|
|
|
@@ -65,9 +74,13 @@ server.registerTool(
|
|
|
65
74
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
66
75
|
},
|
|
67
76
|
async ({ url, max_length }) => {
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
77
|
+
try {
|
|
78
|
+
const result = await hackerNewsAdapter({ url, maxLength: max_length });
|
|
79
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
|
|
80
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
81
|
+
} catch (err) {
|
|
82
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
83
|
+
}
|
|
71
84
|
}
|
|
72
85
|
);
|
|
73
86
|
|
|
@@ -84,9 +97,13 @@ server.registerTool(
|
|
|
84
97
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
85
98
|
},
|
|
86
99
|
async ({ url, max_length }) => {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
100
|
+
try {
|
|
101
|
+
const result = await ycAdapter({ url, maxLength: max_length });
|
|
102
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
|
|
103
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
104
|
+
} catch (err) {
|
|
105
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
106
|
+
}
|
|
90
107
|
}
|
|
91
108
|
);
|
|
92
109
|
|
|
@@ -103,9 +120,13 @@ server.registerTool(
|
|
|
103
120
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
104
121
|
},
|
|
105
122
|
async ({ query, max_length }) => {
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
123
|
+
try {
|
|
124
|
+
const result = await repoSearchAdapter({ url: query, maxLength: max_length });
|
|
125
|
+
const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
|
|
126
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
127
|
+
} catch (err) {
|
|
128
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
129
|
+
}
|
|
109
130
|
}
|
|
110
131
|
);
|
|
111
132
|
|
|
@@ -122,9 +143,13 @@ server.registerTool(
|
|
|
122
143
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
123
144
|
},
|
|
124
145
|
async ({ packages, max_length }) => {
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
146
|
+
try {
|
|
147
|
+
const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
|
|
148
|
+
const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
|
|
149
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
150
|
+
} catch (err) {
|
|
151
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
152
|
+
}
|
|
128
153
|
}
|
|
129
154
|
);
|
|
130
155
|
|
package/worker/src/worker.ts
CHANGED
|
@@ -3,10 +3,11 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
|
3
3
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
|
|
6
|
-
// ─── Types
|
|
6
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
7
7
|
|
|
8
8
|
interface Env {
|
|
9
9
|
BROWSER: Fetcher;
|
|
10
|
+
API_KEY?: string; // Optional: set via `wrangler secret put API_KEY`
|
|
10
11
|
}
|
|
11
12
|
|
|
12
13
|
interface FreshContext {
|
|
@@ -18,9 +19,143 @@ interface FreshContext {
|
|
|
18
19
|
adapter: string;
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
// ───
|
|
22
|
+
// ─── Security ─────────────────────────────────────────────────────────────────
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
const ALLOWED_DOMAINS: Record<string, string[]> = {
|
|
25
|
+
github: ["github.com", "raw.githubusercontent.com"],
|
|
26
|
+
scholar: ["scholar.google.com"],
|
|
27
|
+
hackernews: ["news.ycombinator.com", "hn.algolia.com"],
|
|
28
|
+
yc: ["www.ycombinator.com", "ycombinator.com"],
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const PRIVATE_IP_PATTERNS = [
|
|
32
|
+
/^localhost$/i,
|
|
33
|
+
/^127\./,
|
|
34
|
+
/^10\./,
|
|
35
|
+
/^192\.168\./,
|
|
36
|
+
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
37
|
+
/^169\.254\./,
|
|
38
|
+
/^::1$/,
|
|
39
|
+
/^fc00:/i,
|
|
40
|
+
/^fe80:/i,
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const MAX_URL_LENGTH = 500;
|
|
44
|
+
const MAX_QUERY_LENGTH = 200;
|
|
45
|
+
|
|
46
|
+
class SecurityError extends Error {
|
|
47
|
+
constructor(message: string) {
|
|
48
|
+
super(message);
|
|
49
|
+
this.name = "SecurityError";
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function validateUrl(rawUrl: string, adapter: string): string {
|
|
54
|
+
if (rawUrl.length > MAX_URL_LENGTH)
|
|
55
|
+
throw new SecurityError(`URL too long (max ${MAX_URL_LENGTH} chars)`);
|
|
56
|
+
|
|
57
|
+
let parsed: URL;
|
|
58
|
+
try { parsed = new URL(rawUrl); }
|
|
59
|
+
catch { throw new SecurityError("Invalid URL format"); }
|
|
60
|
+
|
|
61
|
+
if (!["http:", "https:"].includes(parsed.protocol))
|
|
62
|
+
throw new SecurityError("Only http/https URLs are allowed");
|
|
63
|
+
|
|
64
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
65
|
+
|
|
66
|
+
for (const pattern of PRIVATE_IP_PATTERNS) {
|
|
67
|
+
if (pattern.test(hostname))
|
|
68
|
+
throw new SecurityError("Access to private/internal addresses is not allowed");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const allowed = ALLOWED_DOMAINS[adapter];
|
|
72
|
+
if (allowed && allowed.length > 0) {
|
|
73
|
+
const ok = allowed.some(d => hostname === d || hostname.endsWith(`.${d}`));
|
|
74
|
+
if (!ok)
|
|
75
|
+
throw new SecurityError(`URL not allowed for ${adapter}. Allowed domains: ${allowed.join(", ")}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return rawUrl;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function sanitizeQuery(query: string, maxLen = MAX_QUERY_LENGTH): string {
|
|
82
|
+
if (query.length > maxLen)
|
|
83
|
+
throw new SecurityError(`Query too long (max ${maxLen} chars)`);
|
|
84
|
+
// Strip null bytes and control characters
|
|
85
|
+
return query.replace(/[\x00-\x1F\x7F]/g, "").trim();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ─── Rate Limiting (in-memory, per isolate) ───────────────────────────────────
|
|
89
|
+
|
|
90
|
+
interface RateEntry { count: number; windowStart: number; }
|
|
91
|
+
const rateMap = new Map<string, RateEntry>();
|
|
92
|
+
|
|
93
|
+
const RATE_LIMIT = 20; // max requests
|
|
94
|
+
const RATE_WINDOW_MS = 60_000; // per 60 seconds
|
|
95
|
+
|
|
96
|
+
function checkRateLimit(ip: string): void {
|
|
97
|
+
const now = Date.now();
|
|
98
|
+
const entry = rateMap.get(ip);
|
|
99
|
+
|
|
100
|
+
if (!entry || now - entry.windowStart > RATE_WINDOW_MS) {
|
|
101
|
+
rateMap.set(ip, { count: 1, windowStart: now });
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (entry.count >= RATE_LIMIT) {
|
|
106
|
+
throw new SecurityError(`Rate limit exceeded. Max ${RATE_LIMIT} requests per minute.`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
entry.count++;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Prevent the map from growing unboundedly
|
|
113
|
+
function pruneRateMap(): void {
|
|
114
|
+
const now = Date.now();
|
|
115
|
+
for (const [ip, entry] of rateMap) {
|
|
116
|
+
if (now - entry.windowStart > RATE_WINDOW_MS) rateMap.delete(ip);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ─── Auth ─────────────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
function checkAuth(request: Request, env: Env): void {
|
|
123
|
+
if (!env.API_KEY) return; // Auth disabled if no key is set
|
|
124
|
+
|
|
125
|
+
const authHeader = request.headers.get("Authorization") ?? "";
|
|
126
|
+
const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
|
|
127
|
+
|
|
128
|
+
if (token !== env.API_KEY) {
|
|
129
|
+
throw new SecurityError("Unauthorized. Provide a valid Bearer token.");
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
function getClientIp(request: Request): string {
|
|
136
|
+
return (
|
|
137
|
+
request.headers.get("CF-Connecting-IP") ??
|
|
138
|
+
request.headers.get("X-Forwarded-For")?.split(",")[0]?.trim() ??
|
|
139
|
+
"unknown"
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function securityErrorResponse(message: string, status: number): Response {
|
|
144
|
+
return new Response(JSON.stringify({ error: message }), {
|
|
145
|
+
status,
|
|
146
|
+
headers: { "Content-Type": "application/json" },
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ─── Freshness Stamp ──────────────────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
function stamp(
|
|
153
|
+
content: string,
|
|
154
|
+
url: string,
|
|
155
|
+
date: string | null,
|
|
156
|
+
confidence: "high" | "medium" | "low",
|
|
157
|
+
adapter: string
|
|
158
|
+
): string {
|
|
24
159
|
const ctx: FreshContext = {
|
|
25
160
|
content: content.slice(0, 6000),
|
|
26
161
|
source_url: url,
|
|
@@ -44,107 +179,133 @@ function stamp(content: string, url: string, date: string | null, confidence: "h
|
|
|
44
179
|
// ─── Server Factory ───────────────────────────────────────────────────────────
|
|
45
180
|
|
|
46
181
|
function createServer(env: Env): McpServer {
|
|
47
|
-
const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.
|
|
182
|
+
const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.3" });
|
|
48
183
|
|
|
49
184
|
// ── extract_github ──────────────────────────────────────────────────────────
|
|
50
185
|
server.registerTool("extract_github", {
|
|
51
186
|
description: "Extract real-time data from a GitHub repository — README, stars, forks, last commit, topics. Returns timestamped freshcontext.",
|
|
52
187
|
inputSchema: z.object({
|
|
53
|
-
url: z.string().url().describe("Full GitHub repo URL"),
|
|
188
|
+
url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
|
|
54
189
|
}),
|
|
55
190
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
56
191
|
}, async ({ url }) => {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
192
|
+
try {
|
|
193
|
+
const safeUrl = validateUrl(url, "github");
|
|
194
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
195
|
+
const page = await browser.newPage();
|
|
196
|
+
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
|
|
197
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
198
|
+
|
|
199
|
+
const data = await page.evaluate(`(function() {
|
|
200
|
+
var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
|
|
201
|
+
var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
|
|
202
|
+
var stars = starsEl ? starsEl.textContent.trim() : null;
|
|
203
|
+
var forksEl = document.querySelector('[id="repo-network-counter"]');
|
|
204
|
+
var forks = forksEl ? forksEl.textContent.trim() : null;
|
|
205
|
+
var commitEl = document.querySelector('relative-time');
|
|
206
|
+
var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
|
|
207
|
+
var descEl = document.querySelector('.f4.my-3');
|
|
208
|
+
var description = descEl ? descEl.textContent.trim() : null;
|
|
209
|
+
var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
|
|
210
|
+
var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
|
|
211
|
+
var language = langEl ? langEl.textContent.trim() : null;
|
|
212
|
+
return { readme, stars, forks, lastCommit, description, topics, language };
|
|
213
|
+
})()`);
|
|
214
|
+
|
|
215
|
+
await browser.close();
|
|
216
|
+
const d = data as any;
|
|
217
|
+
const raw = [
|
|
218
|
+
`Description: ${d.description ?? "N/A"}`,
|
|
219
|
+
`Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`,
|
|
220
|
+
`Language: ${d.language ?? "N/A"}`,
|
|
221
|
+
`Last commit: ${d.lastCommit ?? "N/A"}`,
|
|
222
|
+
`Topics: ${d.topics?.join(", ") ?? "none"}`,
|
|
223
|
+
`\n--- README ---\n${d.readme ?? "No README"}`,
|
|
224
|
+
].join("\n");
|
|
225
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
|
|
226
|
+
} catch (err: any) {
|
|
227
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
228
|
+
}
|
|
82
229
|
});
|
|
83
230
|
|
|
84
231
|
// ── extract_hackernews ──────────────────────────────────────────────────────
|
|
85
232
|
server.registerTool("extract_hackernews", {
|
|
86
|
-
description: "Extract top stories from Hacker News with real-time timestamps.",
|
|
87
|
-
inputSchema: z.object({ url: z.string().url().describe("HN URL") }),
|
|
233
|
+
description: "Extract top stories or search results from Hacker News with real-time timestamps.",
|
|
234
|
+
inputSchema: z.object({ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com") }),
|
|
88
235
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
89
236
|
}, async ({ url }) => {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
var
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
237
|
+
try {
|
|
238
|
+
const safeUrl = validateUrl(url, "hackernews");
|
|
239
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
240
|
+
const page = await browser.newPage();
|
|
241
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
242
|
+
|
|
243
|
+
const data = await page.evaluate(`(function() {
|
|
244
|
+
var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
|
|
245
|
+
return items.map(function(el) {
|
|
246
|
+
var titleLineEl = el.querySelector('.titleline > a');
|
|
247
|
+
var title = titleLineEl ? titleLineEl.textContent.trim() : null;
|
|
248
|
+
var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
|
|
249
|
+
var subtext = el.nextElementSibling;
|
|
250
|
+
var scoreEl = subtext ? subtext.querySelector('.score') : null;
|
|
251
|
+
var score = scoreEl ? scoreEl.textContent.trim() : null;
|
|
252
|
+
var ageEl = subtext ? subtext.querySelector('.age') : null;
|
|
253
|
+
var age = ageEl ? ageEl.getAttribute('title') : null;
|
|
254
|
+
return { title, link, score, age };
|
|
255
|
+
});
|
|
256
|
+
})()`);
|
|
257
|
+
|
|
258
|
+
await browser.close();
|
|
259
|
+
const items = data as any[];
|
|
260
|
+
const raw = items.map((r, i) =>
|
|
261
|
+
`[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`
|
|
262
|
+
).join("\n\n");
|
|
263
|
+
const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
|
|
264
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, newest, newest ? "high" : "medium", "hackernews") }] };
|
|
265
|
+
} catch (err: any) {
|
|
266
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
267
|
+
}
|
|
114
268
|
});
|
|
115
269
|
|
|
116
270
|
// ── extract_scholar ─────────────────────────────────────────────────────────
|
|
117
271
|
server.registerTool("extract_scholar", {
|
|
118
272
|
description: "Extract research results from Google Scholar with publication dates.",
|
|
119
|
-
inputSchema: z.object({ url: z.string().url().describe("Google Scholar URL") }),
|
|
273
|
+
inputSchema: z.object({ url: z.string().url().describe("Google Scholar search URL") }),
|
|
120
274
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
121
275
|
}, async ({ url }) => {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
var
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
276
|
+
try {
|
|
277
|
+
const safeUrl = validateUrl(url, "scholar");
|
|
278
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
279
|
+
const page = await browser.newPage();
|
|
280
|
+
await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
|
|
281
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
282
|
+
|
|
283
|
+
const data = await page.evaluate(`(function() {
|
|
284
|
+
var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
|
|
285
|
+
return items.map(function(el) {
|
|
286
|
+
var titleEl = el.querySelector('.gs_rt');
|
|
287
|
+
var title = titleEl ? titleEl.textContent.trim() : null;
|
|
288
|
+
var authorsEl = el.querySelector('.gs_a');
|
|
289
|
+
var authors = authorsEl ? authorsEl.textContent.trim() : null;
|
|
290
|
+
var snippetEl = el.querySelector('.gs_rs');
|
|
291
|
+
var snippet = snippetEl ? snippetEl.textContent.trim() : null;
|
|
292
|
+
var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
|
|
293
|
+
var year = yearMatch ? yearMatch[0] : null;
|
|
294
|
+
return { title, authors, snippet, year };
|
|
295
|
+
});
|
|
296
|
+
})()`);
|
|
297
|
+
|
|
298
|
+
await browser.close();
|
|
299
|
+
const items = data as any[];
|
|
300
|
+
const raw = items.map((r, i) =>
|
|
301
|
+
`[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`
|
|
302
|
+
).join("\n\n");
|
|
303
|
+
const years = items.map(r => r.year).filter(Boolean).sort().reverse();
|
|
304
|
+
const newest = years[0] ?? null;
|
|
305
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
|
|
306
|
+
} catch (err: any) {
|
|
307
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
308
|
+
}
|
|
148
309
|
});
|
|
149
310
|
|
|
150
311
|
return server;
|
|
@@ -154,6 +315,23 @@ function createServer(env: Env): McpServer {
|
|
|
154
315
|
|
|
155
316
|
export default {
|
|
156
317
|
async fetch(request: Request, env: Env): Promise<Response> {
|
|
318
|
+
// Prune stale rate limit entries occasionally
|
|
319
|
+
if (Math.random() < 0.05) pruneRateMap();
|
|
320
|
+
|
|
321
|
+
try {
|
|
322
|
+
// 1. Auth check
|
|
323
|
+
checkAuth(request, env);
|
|
324
|
+
|
|
325
|
+
// 2. Rate limit check
|
|
326
|
+
const ip = getClientIp(request);
|
|
327
|
+
checkRateLimit(ip);
|
|
328
|
+
|
|
329
|
+
} catch (err: any) {
|
|
330
|
+
const status = err.message.startsWith("Unauthorized") ? 401 : 429;
|
|
331
|
+
return securityErrorResponse(err.message, status);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// 3. Handle MCP request
|
|
157
335
|
const transport = new WebStandardStreamableHTTPServerTransport();
|
|
158
336
|
const server = createServer(env);
|
|
159
337
|
await server.connect(transport);
|