freshcontext-mcp 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -21,14 +21,14 @@ Every piece of data extracted by `freshcontext-mcp` is wrapped in a structured e
21
21
  [FRESHCONTEXT]
22
22
  Source: https://github.com/owner/repo
23
23
  Published: 2024-11-03
24
- Retrieved: 2026-03-03T10:14:00Z
24
+ Retrieved: 2026-03-04T10:14:00Z
25
25
  Confidence: high
26
26
  ---
27
27
  ... content ...
28
28
  [/FRESHCONTEXT]
29
29
  ```
30
30
 
31
- The AI agent always knows **when it's looking at data**, not just what the data says. This is the difference between a hallucinated recency claim and a verifiable one.
31
+ The AI agent always knows **when it's looking at data**, not just what the data says.
32
32
 
33
33
  ---
34
34
 
@@ -60,13 +60,33 @@ The AI agent always knows **when it's looking at data**, not just what the data
60
60
 
61
61
  ## Quick Start
62
62
 
63
- ### Install via npm
63
+ ### Option A — Cloud (no install, works immediately)
64
64
 
65
- ```bash
66
- npx freshcontext-mcp
65
+ No Node, no Playwright, nothing to install. Just add this to your Claude Desktop config and restart.
66
+
67
+ **Mac:** open `~/Library/Application Support/Claude/claude_desktop_config.json`
68
+ **Windows:** open `%APPDATA%\Claude\claude_desktop_config.json`
69
+
70
+ ```json
71
+ {
72
+ "mcpServers": {
73
+ "freshcontext": {
74
+ "command": "npx",
75
+ "args": ["-y", "mcp-remote", "https://freshcontext-mcp.gimmanuel73.workers.dev/mcp"]
76
+ }
77
+ }
78
+ }
67
79
  ```
68
80
 
69
- ### Or clone and run locally
81
+ Restart Claude Desktop. The freshcontext tools will appear in your session.
82
+
83
+ > **Note:** If `claude_desktop_config.json` doesn't exist yet, create it with the content above.
84
+
85
+ ---
86
+
87
+ ### Option B — Local (full Playwright, faster for heavy use)
88
+
89
+ **Prerequisites:** Node.js 18+ ([nodejs.org](https://nodejs.org))
70
90
 
71
91
  ```bash
72
92
  git clone https://github.com/PrinceGabriel-lgtm/freshcontext-mcp
@@ -76,39 +96,56 @@ npx playwright install chromium
76
96
  npm run build
77
97
  ```
78
98
 
79
- ### Connect to Claude Desktop
80
-
81
- Add to your `claude_desktop_config.json`:
82
-
83
- **Mac:** `~/Library/Application Support/Claude/claude_desktop_config.json`
84
- **Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
99
+ Then add to your Claude Desktop config:
85
100
 
101
+ **Mac** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
86
102
  ```json
87
103
  {
88
104
  "mcpServers": {
89
- "freshcontext-local": {
105
+ "freshcontext": {
90
106
  "command": "node",
91
- "args": ["/absolute/path/to/freshcontext-mcp/dist/server.js"]
107
+ "args": ["/Users/YOUR_USERNAME/path/to/freshcontext-mcp/dist/server.js"]
92
108
  }
93
109
  }
94
110
  }
95
111
  ```
96
112
 
97
- Restart Claude Desktop. You'll see the freshcontext tools available in your session.
98
-
99
- ### Or use the Cloudflare edge deployment (no install needed)
100
-
113
+ **Windows** (`%APPDATA%\Claude\claude_desktop_config.json`):
101
114
  ```json
102
115
  {
103
116
  "mcpServers": {
104
- "freshcontext-cloud": {
105
- "command": "npx",
106
- "args": ["-y", "mcp-remote", "https://freshcontext-worker.gimmanuel73.workers.dev/mcp"]
117
+ "freshcontext": {
118
+ "command": "node",
119
+ "args": ["C:\\Users\\YOUR_USERNAME\\path\\to\\freshcontext-mcp\\dist\\server.js"]
107
120
  }
108
121
  }
109
122
  }
110
123
  ```
111
124
 
125
+ Restart Claude Desktop.
126
+
127
+ ---
128
+
129
+ ### Troubleshooting (Mac)
130
+
131
+ **"command not found: node"** — Node isn't on your PATH inside Claude Desktop's environment. Use the full path:
132
+ ```bash
133
+ which node # copy this output
134
+ ```
135
+ Then replace `"command": "node"` with `"command": "/usr/local/bin/node"` (or whatever `which node` returned).
136
+
137
+ **"npx: command not found"** — Same issue. Run `which npx` and use the full path for Option A:
138
+ ```json
139
+ "command": "/usr/local/bin/npx"
140
+ ```
141
+
142
+ **Config file doesn't exist** — Create it. On Mac:
143
+ ```bash
144
+ mkdir -p ~/Library/Application\ Support/Claude
145
+ touch ~/Library/Application\ Support/Claude/claude_desktop_config.json
146
+ ```
147
+ Then paste the config JSON above into it.
148
+
112
149
  ---
113
150
 
114
151
  ## Usage Examples
@@ -162,12 +199,12 @@ This makes freshness **verifiable**, not assumed.
162
199
  Uses headless Chromium via Playwright. Full browser rendering for JavaScript-heavy sites.
163
200
 
164
201
  ### Cloud (Cloudflare Workers)
165
- The `worker/` directory contains a Cloudflare Workers deployment using the Browser Rendering REST API. No Playwright dependency — runs at the edge globally.
202
+ The `worker/` directory contains a Cloudflare Workers deployment. No Playwright dependency — runs at the edge globally.
166
203
 
167
204
  ```bash
168
205
  cd worker
169
206
  npm install
170
- npx wrangler secret put CF_API_TOKEN
207
+ npx wrangler secret put API_KEY
171
208
  npx wrangler deploy
172
209
  ```
173
210
 
@@ -180,15 +217,16 @@ freshcontext-mcp/
180
217
  ├── src/
181
218
  │ ├── server.ts # MCP server, all tool registrations
182
219
  │ ├── types.ts # FreshContext interfaces
220
+ │ ├── security.ts # Input validation, domain allowlists
183
221
  │ ├── adapters/
184
- │ │ ├── github.ts # GitHub repo extraction
185
- │ │ ├── hackernews.ts # HN front page + Algolia API
186
- │ │ ├── scholar.ts # Google Scholar scraping
187
- │ │ ├── yc.ts # YC company directory
188
- │ │ ├── repoSearch.ts # GitHub Search API
189
- │ │ └── packageTrends.ts # npm + PyPI registries
222
+ │ │ ├── github.ts
223
+ │ │ ├── hackernews.ts
224
+ │ │ ├── scholar.ts
225
+ │ │ ├── yc.ts
226
+ │ │ ├── repoSearch.ts
227
+ │ │ └── packageTrends.ts
190
228
  │ └── tools/
191
- │ └── freshnessStamp.ts # FreshContext envelope builder
229
+ │ └── freshnessStamp.ts
192
230
  └── worker/ # Cloudflare Workers deployment
193
231
  └── src/worker.ts
194
232
  ```
@@ -205,17 +243,17 @@ freshcontext-mcp/
205
243
  - [x] npm/PyPI package trends
206
244
  - [x] `extract_landscape` composite tool
207
245
  - [x] Cloudflare Workers deployment
246
+ - [x] Worker auth + rate limiting + domain allowlists
208
247
  - [ ] Product Hunt launches adapter
209
- - [ ] Crunchbase/funding signals adapter
248
+ - [ ] Finance/market data adapter
210
249
  - [ ] TTL-based caching layer
211
250
  - [ ] `freshness_score` numeric metric
212
- - [ ] Webhook support for real-time updates
213
251
 
214
252
  ---
215
253
 
216
254
  ## Contributing
217
255
 
218
- PRs welcome. New adapters are the highest-value contribution — see the existing adapters in `src/adapters/` for the pattern. Each adapter returns `{ raw, content_date, freshness_confidence }`.
256
+ PRs welcome. New adapters are the highest-value contribution — see `src/adapters/` for the pattern. Each adapter returns `{ raw, content_date, freshness_confidence }`.
219
257
 
220
258
  ---
221
259
 
package/dist/server.js CHANGED
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env node
1
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
4
  import { z } from "zod";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "freshcontext-mcp",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Real-time web extraction MCP server with freshness timestamps for AI agents",
5
5
  "keywords": [
6
6
  "mcp",
@@ -24,6 +24,9 @@
24
24
  "license": "MIT",
25
25
  "type": "module",
26
26
  "main": "dist/server.js",
27
+ "bin": {
28
+ "freshcontext-mcp": "dist/server.js"
29
+ },
27
30
  "scripts": {
28
31
  "build": "tsc",
29
32
  "dev": "tsx watch src/server.ts",
package/src/server.ts CHANGED
@@ -1,3 +1,4 @@
1
+ #!/usr/bin/env node
1
2
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
3
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
4
  import { z } from "zod";
@@ -202,3 +203,4 @@ async function main() {
202
203
  }
203
204
 
204
205
  main().catch(console.error);
206
+
@@ -0,0 +1,204 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod";
4
+ import { githubAdapter } from "./adapters/github.js";
5
+ import { scholarAdapter } from "./adapters/scholar.js";
6
+ import { hackerNewsAdapter } from "./adapters/hackernews.js";
7
+ import { ycAdapter } from "./adapters/yc.js";
8
+ import { repoSearchAdapter } from "./adapters/repoSearch.js";
9
+ import { packageTrendsAdapter } from "./adapters/packageTrends.js";
10
+ import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
11
+ import { SecurityError, formatSecurityError } from "./security.js";
12
+
13
+ const server = new McpServer({
14
+ name: "freshcontext-mcp",
15
+ version: "0.1.0",
16
+ });
17
+
18
+ // ─── Tool: extract_github ────────────────────────────────────────────────────
19
+ server.registerTool(
20
+ "extract_github",
21
+ {
22
+ description:
23
+ "Extract real-time data from a GitHub repository — README, stars, forks, language, topics, last commit. Returns timestamped freshcontext.",
24
+ inputSchema: z.object({
25
+ url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
26
+ max_length: z.number().optional().default(6000).describe("Max content length"),
27
+ }),
28
+ annotations: { readOnlyHint: true, openWorldHint: true },
29
+ },
30
+ async ({ url, max_length }) => {
31
+ try {
32
+ const result = await githubAdapter({ url, maxLength: max_length });
33
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
34
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
35
+ } catch (err) {
36
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
37
+ }
38
+ }
39
+ );
40
+
41
+ // ─── Tool: extract_scholar ───────────────────────────────────────────────────
42
+ server.registerTool(
43
+ "extract_scholar",
44
+ {
45
+ description:
46
+ "Extract research results from a Google Scholar search URL. Returns titles, authors, publication years, and snippets — all timestamped.",
47
+ inputSchema: z.object({
48
+ url: z.string().url().describe("Google Scholar search URL e.g. https://scholar.google.com/scholar?q=..."),
49
+ max_length: z.number().optional().default(6000),
50
+ }),
51
+ annotations: { readOnlyHint: true, openWorldHint: true },
52
+ },
53
+ async ({ url, max_length }) => {
54
+ try {
55
+ const result = await scholarAdapter({ url, maxLength: max_length });
56
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
57
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
58
+ } catch (err) {
59
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
60
+ }
61
+ }
62
+ );
63
+
64
+ // ─── Tool: extract_hackernews ────────────────────────────────────────────────
65
+ server.registerTool(
66
+ "extract_hackernews",
67
+ {
68
+ description:
69
+ "Extract top stories or search results from Hacker News. Real-time dev/tech community sentiment with post timestamps.",
70
+ inputSchema: z.object({
71
+ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com or https://hn.algolia.com/?q=..."),
72
+ max_length: z.number().optional().default(4000),
73
+ }),
74
+ annotations: { readOnlyHint: true, openWorldHint: true },
75
+ },
76
+ async ({ url, max_length }) => {
77
+ try {
78
+ const result = await hackerNewsAdapter({ url, maxLength: max_length });
79
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
80
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
81
+ } catch (err) {
82
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
83
+ }
84
+ }
85
+ );
86
+
87
+ // ─── Tool: extract_yc ──────────────────────────────────────────────────────────
88
+ server.registerTool(
89
+ "extract_yc",
90
+ {
91
+ description:
92
+ "Scrape YC company listings. Use https://www.ycombinator.com/companies?query=KEYWORD to find startups in a space. Returns name, batch, tags, description per company with freshness timestamp.",
93
+ inputSchema: z.object({
94
+ url: z.string().url().describe("YC companies URL e.g. https://www.ycombinator.com/companies?query=mcp"),
95
+ max_length: z.number().optional().default(6000),
96
+ }),
97
+ annotations: { readOnlyHint: true, openWorldHint: true },
98
+ },
99
+ async ({ url, max_length }) => {
100
+ try {
101
+ const result = await ycAdapter({ url, maxLength: max_length });
102
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
103
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
104
+ } catch (err) {
105
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
106
+ }
107
+ }
108
+ );
109
+
110
+ // ─── Tool: search_repos ──────────────────────────────────────────────────────
111
+ server.registerTool(
112
+ "search_repos",
113
+ {
114
+ description:
115
+ "Search GitHub for repositories matching a keyword or topic. Returns top results by stars with activity signals. Use to find competitors, similar tools, or related projects.",
116
+ inputSchema: z.object({
117
+ query: z.string().describe("Search query e.g. 'mcp server typescript' or 'cashflow prediction python'"),
118
+ max_length: z.number().optional().default(6000),
119
+ }),
120
+ annotations: { readOnlyHint: true, openWorldHint: true },
121
+ },
122
+ async ({ query, max_length }) => {
123
+ try {
124
+ const result = await repoSearchAdapter({ url: query, maxLength: max_length });
125
+ const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
126
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
127
+ } catch (err) {
128
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
129
+ }
130
+ }
131
+ );
132
+
133
+ // ─── Tool: package_trends ────────────────────────────────────────────────────
134
+ server.registerTool(
135
+ "package_trends",
136
+ {
137
+ description:
138
+ "Look up npm and PyPI package metadata — version history, release cadence, last updated. Use to gauge ecosystem activity around a tool or dependency. Supports comma-separated list of packages.",
139
+ inputSchema: z.object({
140
+ packages: z.string().describe("Package name(s) e.g. 'langchain' or 'npm:zod,pypi:fastapi'"),
141
+ max_length: z.number().optional().default(5000),
142
+ }),
143
+ annotations: { readOnlyHint: true, openWorldHint: true },
144
+ },
145
+ async ({ packages, max_length }) => {
146
+ try {
147
+ const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
148
+ const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
149
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
150
+ } catch (err) {
151
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
152
+ }
153
+ }
154
+ );
155
+
156
+ // ─── Tool: extract_landscape ─────────────────────────────────────────────────
157
+ server.registerTool(
158
+ "extract_landscape",
159
+ {
160
+ description:
161
+ "Composite intelligence tool. Given a project idea or keyword, simultaneously queries YC startups, GitHub repos, HN sentiment, and package activity to answer: Who is building this? Is it funded? What's getting traction? Returns a unified timestamped landscape report.",
162
+ inputSchema: z.object({
163
+ topic: z.string().describe("Your project idea or keyword e.g. 'mcp server' or 'cashflow prediction'"),
164
+ max_length: z.number().optional().default(8000),
165
+ }),
166
+ annotations: { readOnlyHint: true, openWorldHint: true },
167
+ },
168
+ async ({ topic, max_length }) => {
169
+ const perSection = Math.floor((max_length ?? 8000) / 4);
170
+
171
+ const [ycResult, repoResult, hnResult, pkgResult] = await Promise.allSettled([
172
+ ycAdapter({ url: `https://www.ycombinator.com/companies?query=${encodeURIComponent(topic)}`, maxLength: perSection }),
173
+ repoSearchAdapter({ url: topic, maxLength: perSection }),
174
+ hackerNewsAdapter({ url: `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(topic)}&tags=story&hitsPerPage=15`, maxLength: perSection }),
175
+ packageTrendsAdapter({ url: topic, maxLength: perSection }),
176
+ ]);
177
+
178
+ const section = (label: string, result: PromiseSettledResult<{ raw: string; content_date: string | null; freshness_confidence: string }>) =>
179
+ result.status === "fulfilled"
180
+ ? `## ${label}\n${result.value.raw}`
181
+ : `## ${label}\n[Error: ${(result as PromiseRejectedResult).reason}]`;
182
+
183
+ const combined = [
184
+ `# Landscape Report: "${topic}"`,
185
+ `Generated: ${new Date().toISOString()}`,
186
+ "",
187
+ section("🚀 YC Startups in this space", ycResult),
188
+ section("📦 Top GitHub repos", repoResult),
189
+ section("💬 HN sentiment (last month)", hnResult),
190
+ section("📊 Package ecosystem", pkgResult),
191
+ ].join("\n\n");
192
+
193
+ return { content: [{ type: "text", text: combined }] };
194
+ }
195
+ );
196
+
197
+ // ─── Start ───────────────────────────────────────────────────────────────────
198
+ async function main() {
199
+ const transport = new StdioServerTransport();
200
+ await server.connect(transport);
201
+ console.error("freshcontext-mcp running on stdio");
202
+ }
203
+
204
+ main().catch(console.error);
@@ -3,10 +3,11 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
3
3
  import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
4
4
  import { z } from "zod";
5
5
 
6
- // ─── Types ───────────────────────────────────────────────────────────────────
6
+ // ─── Types ────────────────────────────────────────────────────────────────────
7
7
 
8
8
  interface Env {
9
9
  BROWSER: Fetcher;
10
+ API_KEY?: string; // Optional: set via `wrangler secret put API_KEY`
10
11
  }
11
12
 
12
13
  interface FreshContext {
@@ -18,9 +19,143 @@ interface FreshContext {
18
19
  adapter: string;
19
20
  }
20
21
 
21
- // ─── Freshness Stamp ─────────────────────────────────────────────────────────
22
+ // ─── Security ─────────────────────────────────────────────────────────────────
22
23
 
23
- function stamp(content: string, url: string, date: string | null, confidence: "high" | "medium" | "low", adapter: string): string {
24
+ const ALLOWED_DOMAINS: Record<string, string[]> = {
25
+ github: ["github.com", "raw.githubusercontent.com"],
26
+ scholar: ["scholar.google.com"],
27
+ hackernews: ["news.ycombinator.com", "hn.algolia.com"],
28
+ yc: ["www.ycombinator.com", "ycombinator.com"],
29
+ };
30
+
31
+ const PRIVATE_IP_PATTERNS = [
32
+ /^localhost$/i,
33
+ /^127\./,
34
+ /^10\./,
35
+ /^192\.168\./,
36
+ /^172\.(1[6-9]|2\d|3[01])\./,
37
+ /^169\.254\./,
38
+ /^::1$/,
39
+ /^fc00:/i,
40
+ /^fe80:/i,
41
+ ];
42
+
43
+ const MAX_URL_LENGTH = 500;
44
+ const MAX_QUERY_LENGTH = 200;
45
+
46
+ class SecurityError extends Error {
47
+ constructor(message: string) {
48
+ super(message);
49
+ this.name = "SecurityError";
50
+ }
51
+ }
52
+
53
+ function validateUrl(rawUrl: string, adapter: string): string {
54
+ if (rawUrl.length > MAX_URL_LENGTH)
55
+ throw new SecurityError(`URL too long (max ${MAX_URL_LENGTH} chars)`);
56
+
57
+ let parsed: URL;
58
+ try { parsed = new URL(rawUrl); }
59
+ catch { throw new SecurityError("Invalid URL format"); }
60
+
61
+ if (!["http:", "https:"].includes(parsed.protocol))
62
+ throw new SecurityError("Only http/https URLs are allowed");
63
+
64
+ const hostname = parsed.hostname.toLowerCase();
65
+
66
+ for (const pattern of PRIVATE_IP_PATTERNS) {
67
+ if (pattern.test(hostname))
68
+ throw new SecurityError("Access to private/internal addresses is not allowed");
69
+ }
70
+
71
+ const allowed = ALLOWED_DOMAINS[adapter];
72
+ if (allowed && allowed.length > 0) {
73
+ const ok = allowed.some(d => hostname === d || hostname.endsWith(`.${d}`));
74
+ if (!ok)
75
+ throw new SecurityError(`URL not allowed for ${adapter}. Allowed domains: ${allowed.join(", ")}`);
76
+ }
77
+
78
+ return rawUrl;
79
+ }
80
+
81
+ function sanitizeQuery(query: string, maxLen = MAX_QUERY_LENGTH): string {
82
+ if (query.length > maxLen)
83
+ throw new SecurityError(`Query too long (max ${maxLen} chars)`);
84
+ // Strip null bytes and control characters
85
+ return query.replace(/[\x00-\x1F\x7F]/g, "").trim();
86
+ }
87
+
88
+ // ─── Rate Limiting (in-memory, per isolate) ───────────────────────────────────
89
+
90
+ interface RateEntry { count: number; windowStart: number; }
91
+ const rateMap = new Map<string, RateEntry>();
92
+
93
+ const RATE_LIMIT = 20; // max requests
94
+ const RATE_WINDOW_MS = 60_000; // per 60 seconds
95
+
96
+ function checkRateLimit(ip: string): void {
97
+ const now = Date.now();
98
+ const entry = rateMap.get(ip);
99
+
100
+ if (!entry || now - entry.windowStart > RATE_WINDOW_MS) {
101
+ rateMap.set(ip, { count: 1, windowStart: now });
102
+ return;
103
+ }
104
+
105
+ if (entry.count >= RATE_LIMIT) {
106
+ throw new SecurityError(`Rate limit exceeded. Max ${RATE_LIMIT} requests per minute.`);
107
+ }
108
+
109
+ entry.count++;
110
+ }
111
+
112
+ // Prevent the map from growing unboundedly
113
+ function pruneRateMap(): void {
114
+ const now = Date.now();
115
+ for (const [ip, entry] of rateMap) {
116
+ if (now - entry.windowStart > RATE_WINDOW_MS) rateMap.delete(ip);
117
+ }
118
+ }
119
+
120
+ // ─── Auth ─────────────────────────────────────────────────────────────────────
121
+
122
+ function checkAuth(request: Request, env: Env): void {
123
+ if (!env.API_KEY) return; // Auth disabled if no key is set
124
+
125
+ const authHeader = request.headers.get("Authorization") ?? "";
126
+ const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
127
+
128
+ if (token !== env.API_KEY) {
129
+ throw new SecurityError("Unauthorized. Provide a valid Bearer token.");
130
+ }
131
+ }
132
+
133
+ // ─── Helpers ──────────────────────────────────────────────────────────────────
134
+
135
+ function getClientIp(request: Request): string {
136
+ return (
137
+ request.headers.get("CF-Connecting-IP") ??
138
+ request.headers.get("X-Forwarded-For")?.split(",")[0]?.trim() ??
139
+ "unknown"
140
+ );
141
+ }
142
+
143
+ function securityErrorResponse(message: string, status: number): Response {
144
+ return new Response(JSON.stringify({ error: message }), {
145
+ status,
146
+ headers: { "Content-Type": "application/json" },
147
+ });
148
+ }
149
+
150
+ // ─── Freshness Stamp ──────────────────────────────────────────────────────────
151
+
152
+ function stamp(
153
+ content: string,
154
+ url: string,
155
+ date: string | null,
156
+ confidence: "high" | "medium" | "low",
157
+ adapter: string
158
+ ): string {
24
159
  const ctx: FreshContext = {
25
160
  content: content.slice(0, 6000),
26
161
  source_url: url,
@@ -44,107 +179,133 @@ function stamp(content: string, url: string, date: string | null, confidence: "h
44
179
  // ─── Server Factory ───────────────────────────────────────────────────────────
45
180
 
46
181
  function createServer(env: Env): McpServer {
47
- const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.0" });
182
+ const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.3" });
48
183
 
49
184
  // ── extract_github ──────────────────────────────────────────────────────────
50
185
  server.registerTool("extract_github", {
51
186
  description: "Extract real-time data from a GitHub repository — README, stars, forks, last commit, topics. Returns timestamped freshcontext.",
52
187
  inputSchema: z.object({
53
- url: z.string().url().describe("Full GitHub repo URL"),
188
+ url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
54
189
  }),
55
190
  annotations: { readOnlyHint: true, openWorldHint: true },
56
191
  }, async ({ url }) => {
57
- const browser = await puppeteer.launch(env.BROWSER);
58
- const page = await browser.newPage();
59
- await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
60
- await page.goto(url, { waitUntil: "domcontentloaded" });
61
-
62
- const data = await page.evaluate(`(function() {
63
- var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
64
- var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
65
- var stars = starsEl ? starsEl.textContent.trim() : null;
66
- var forksEl = document.querySelector('[id="repo-network-counter"]');
67
- var forks = forksEl ? forksEl.textContent.trim() : null;
68
- var commitEl = document.querySelector('relative-time');
69
- var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
70
- var descEl = document.querySelector('.f4.my-3');
71
- var description = descEl ? descEl.textContent.trim() : null;
72
- var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
73
- var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
74
- var language = langEl ? langEl.textContent.trim() : null;
75
- return { readme, stars, forks, lastCommit, description, topics, language };
76
- })()`);
77
-
78
- await browser.close();
79
- const d = data as any;
80
- const raw = [`Description: ${d.description ?? "N/A"}`, `Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`, `Language: ${d.language ?? "N/A"}`, `Last commit: ${d.lastCommit ?? "N/A"}`, `Topics: ${d.topics?.join(", ") ?? "none"}`, `\n--- README ---\n${d.readme ?? "No README"}`].join("\n");
81
- return { content: [{ type: "text", text: stamp(raw, url, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
192
+ try {
193
+ const safeUrl = validateUrl(url, "github");
194
+ const browser = await puppeteer.launch(env.BROWSER);
195
+ const page = await browser.newPage();
196
+ await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
197
+ await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
198
+
199
+ const data = await page.evaluate(`(function() {
200
+ var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
201
+ var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
202
+ var stars = starsEl ? starsEl.textContent.trim() : null;
203
+ var forksEl = document.querySelector('[id="repo-network-counter"]');
204
+ var forks = forksEl ? forksEl.textContent.trim() : null;
205
+ var commitEl = document.querySelector('relative-time');
206
+ var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
207
+ var descEl = document.querySelector('.f4.my-3');
208
+ var description = descEl ? descEl.textContent.trim() : null;
209
+ var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
210
+ var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
211
+ var language = langEl ? langEl.textContent.trim() : null;
212
+ return { readme, stars, forks, lastCommit, description, topics, language };
213
+ })()`);
214
+
215
+ await browser.close();
216
+ const d = data as any;
217
+ const raw = [
218
+ `Description: ${d.description ?? "N/A"}`,
219
+ `Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`,
220
+ `Language: ${d.language ?? "N/A"}`,
221
+ `Last commit: ${d.lastCommit ?? "N/A"}`,
222
+ `Topics: ${d.topics?.join(", ") ?? "none"}`,
223
+ `\n--- README ---\n${d.readme ?? "No README"}`,
224
+ ].join("\n");
225
+ return { content: [{ type: "text", text: stamp(raw, safeUrl, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
226
+ } catch (err: any) {
227
+ return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
228
+ }
82
229
  });
83
230
 
84
231
  // ── extract_hackernews ──────────────────────────────────────────────────────
85
232
  server.registerTool("extract_hackernews", {
86
- description: "Extract top stories from Hacker News with real-time timestamps.",
87
- inputSchema: z.object({ url: z.string().url().describe("HN URL") }),
233
+ description: "Extract top stories or search results from Hacker News with real-time timestamps.",
234
+ inputSchema: z.object({ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com") }),
88
235
  annotations: { readOnlyHint: true, openWorldHint: true },
89
236
  }, async ({ url }) => {
90
- const browser = await puppeteer.launch(env.BROWSER);
91
- const page = await browser.newPage();
92
- await page.goto(url, { waitUntil: "domcontentloaded" });
93
-
94
- const data = await page.evaluate(`(function() {
95
- var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
96
- return items.map(function(el) {
97
- var titleLineEl = el.querySelector('.titleline > a');
98
- var title = titleLineEl ? titleLineEl.textContent.trim() : null;
99
- var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
100
- var subtext = el.nextElementSibling;
101
- var scoreEl = subtext ? subtext.querySelector('.score') : null;
102
- var score = scoreEl ? scoreEl.textContent.trim() : null;
103
- var ageEl = subtext ? subtext.querySelector('.age') : null;
104
- var age = ageEl ? ageEl.getAttribute('title') : null;
105
- return { title, link, score, age };
106
- });
107
- })()`);
108
-
109
- await browser.close();
110
- const items = data as any[];
111
- const raw = items.map((r, i) => `[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`).join("\n\n");
112
- const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
113
- return { content: [{ type: "text", text: stamp(raw, url, newest, newest ? "high" : "medium", "hackernews") }] };
237
+ try {
238
+ const safeUrl = validateUrl(url, "hackernews");
239
+ const browser = await puppeteer.launch(env.BROWSER);
240
+ const page = await browser.newPage();
241
+ await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
242
+
243
+ const data = await page.evaluate(`(function() {
244
+ var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
245
+ return items.map(function(el) {
246
+ var titleLineEl = el.querySelector('.titleline > a');
247
+ var title = titleLineEl ? titleLineEl.textContent.trim() : null;
248
+ var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
249
+ var subtext = el.nextElementSibling;
250
+ var scoreEl = subtext ? subtext.querySelector('.score') : null;
251
+ var score = scoreEl ? scoreEl.textContent.trim() : null;
252
+ var ageEl = subtext ? subtext.querySelector('.age') : null;
253
+ var age = ageEl ? ageEl.getAttribute('title') : null;
254
+ return { title, link, score, age };
255
+ });
256
+ })()`);
257
+
258
+ await browser.close();
259
+ const items = data as any[];
260
+ const raw = items.map((r, i) =>
261
+ `[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`
262
+ ).join("\n\n");
263
+ const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
264
+ return { content: [{ type: "text", text: stamp(raw, safeUrl, newest, newest ? "high" : "medium", "hackernews") }] };
265
+ } catch (err: any) {
266
+ return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
267
+ }
114
268
  });
115
269
 
116
270
  // ── extract_scholar ─────────────────────────────────────────────────────────
117
271
  server.registerTool("extract_scholar", {
118
272
  description: "Extract research results from Google Scholar with publication dates.",
119
- inputSchema: z.object({ url: z.string().url().describe("Google Scholar URL") }),
273
+ inputSchema: z.object({ url: z.string().url().describe("Google Scholar search URL") }),
120
274
  annotations: { readOnlyHint: true, openWorldHint: true },
121
275
  }, async ({ url }) => {
122
- const browser = await puppeteer.launch(env.BROWSER);
123
- const page = await browser.newPage();
124
- await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
125
- await page.goto(url, { waitUntil: "domcontentloaded" });
126
-
127
- const data = await page.evaluate(`(function() {
128
- var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
129
- return items.map(function(el) {
130
- var titleEl = el.querySelector('.gs_rt');
131
- var title = titleEl ? titleEl.textContent.trim() : null;
132
- var authorsEl = el.querySelector('.gs_a');
133
- var authors = authorsEl ? authorsEl.textContent.trim() : null;
134
- var snippetEl = el.querySelector('.gs_rs');
135
- var snippet = snippetEl ? snippetEl.textContent.trim() : null;
136
- var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
137
- var year = yearMatch ? yearMatch[0] : null;
138
- return { title, authors, snippet, year };
139
- });
140
- })()`);
141
-
142
- await browser.close();
143
- const items = data as any[];
144
- const raw = items.map((r, i) => `[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`).join("\n\n");
145
- const years = items.map(r => r.year).filter(Boolean).sort().reverse();
146
- const newest = years[0] ?? null;
147
- return { content: [{ type: "text", text: stamp(raw, url, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
276
+ try {
277
+ const safeUrl = validateUrl(url, "scholar");
278
+ const browser = await puppeteer.launch(env.BROWSER);
279
+ const page = await browser.newPage();
280
+ await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
281
+ await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
282
+
283
+ const data = await page.evaluate(`(function() {
284
+ var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
285
+ return items.map(function(el) {
286
+ var titleEl = el.querySelector('.gs_rt');
287
+ var title = titleEl ? titleEl.textContent.trim() : null;
288
+ var authorsEl = el.querySelector('.gs_a');
289
+ var authors = authorsEl ? authorsEl.textContent.trim() : null;
290
+ var snippetEl = el.querySelector('.gs_rs');
291
+ var snippet = snippetEl ? snippetEl.textContent.trim() : null;
292
+ var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
293
+ var year = yearMatch ? yearMatch[0] : null;
294
+ return { title, authors, snippet, year };
295
+ });
296
+ })()`);
297
+
298
+ await browser.close();
299
+ const items = data as any[];
300
+ const raw = items.map((r, i) =>
301
+ `[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`
302
+ ).join("\n\n");
303
+ const years = items.map(r => r.year).filter(Boolean).sort().reverse();
304
+ const newest = years[0] ?? null;
305
+ return { content: [{ type: "text", text: stamp(raw, safeUrl, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
306
+ } catch (err: any) {
307
+ return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
308
+ }
148
309
  });
149
310
 
150
311
  return server;
@@ -154,6 +315,23 @@ function createServer(env: Env): McpServer {
154
315
 
155
316
  export default {
156
317
  async fetch(request: Request, env: Env): Promise<Response> {
318
+ // Prune stale rate limit entries occasionally
319
+ if (Math.random() < 0.05) pruneRateMap();
320
+
321
+ try {
322
+ // 1. Auth check
323
+ checkAuth(request, env);
324
+
325
+ // 2. Rate limit check
326
+ const ip = getClientIp(request);
327
+ checkRateLimit(ip);
328
+
329
+ } catch (err: any) {
330
+ const status = err.message.startsWith("Unauthorized") ? 401 : 429;
331
+ return securityErrorResponse(err.message, status);
332
+ }
333
+
334
+ // 3. Handle MCP request
157
335
  const transport = new WebStandardStreamableHTTPServerTransport();
158
336
  const server = createServer(env);
159
337
  await server.connect(transport);