freshcontext-mcp 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -33
- package/dist/server.js +1 -0
- package/package.json +4 -1
- package/src/server.ts +2 -0
- package/src/server.ts.bak +204 -0
- package/worker/src/worker.ts +261 -83
package/README.md
CHANGED
|
@@ -21,14 +21,14 @@ Every piece of data extracted by `freshcontext-mcp` is wrapped in a structured e
|
|
|
21
21
|
[FRESHCONTEXT]
|
|
22
22
|
Source: https://github.com/owner/repo
|
|
23
23
|
Published: 2024-11-03
|
|
24
|
-
Retrieved: 2026-03-
|
|
24
|
+
Retrieved: 2026-03-04T10:14:00Z
|
|
25
25
|
Confidence: high
|
|
26
26
|
---
|
|
27
27
|
... content ...
|
|
28
28
|
[/FRESHCONTEXT]
|
|
29
29
|
```
|
|
30
30
|
|
|
31
|
-
The AI agent always knows **when it's looking at data**, not just what the data says.
|
|
31
|
+
The AI agent always knows **when it's looking at data**, not just what the data says.
|
|
32
32
|
|
|
33
33
|
---
|
|
34
34
|
|
|
@@ -60,13 +60,33 @@ The AI agent always knows **when it's looking at data**, not just what the data
|
|
|
60
60
|
|
|
61
61
|
## Quick Start
|
|
62
62
|
|
|
63
|
-
###
|
|
63
|
+
### Option A — Cloud (no install, works immediately)
|
|
64
64
|
|
|
65
|
-
|
|
66
|
-
|
|
65
|
+
No Node, no Playwright, nothing to install. Just add this to your Claude Desktop config and restart.
|
|
66
|
+
|
|
67
|
+
**Mac:** open `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
68
|
+
**Windows:** open `%APPDATA%\Claude\claude_desktop_config.json`
|
|
69
|
+
|
|
70
|
+
```json
|
|
71
|
+
{
|
|
72
|
+
"mcpServers": {
|
|
73
|
+
"freshcontext": {
|
|
74
|
+
"command": "npx",
|
|
75
|
+
"args": ["-y", "mcp-remote", "https://freshcontext-mcp.gimmanuel73.workers.dev/mcp"]
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
67
79
|
```
|
|
68
80
|
|
|
69
|
-
|
|
81
|
+
Restart Claude Desktop. The freshcontext tools will appear in your session.
|
|
82
|
+
|
|
83
|
+
> **Note:** If `claude_desktop_config.json` doesn't exist yet, create it with the content above.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
### Option B — Local (full Playwright, faster for heavy use)
|
|
88
|
+
|
|
89
|
+
**Prerequisites:** Node.js 18+ ([nodejs.org](https://nodejs.org))
|
|
70
90
|
|
|
71
91
|
```bash
|
|
72
92
|
git clone https://github.com/PrinceGabriel-lgtm/freshcontext-mcp
|
|
@@ -76,39 +96,56 @@ npx playwright install chromium
|
|
|
76
96
|
npm run build
|
|
77
97
|
```
|
|
78
98
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
Add to your `claude_desktop_config.json`:
|
|
82
|
-
|
|
83
|
-
**Mac:** `~/Library/Application Support/Claude/claude_desktop_config.json`
|
|
84
|
-
**Windows:** `%APPDATA%\Claude\claude_desktop_config.json`
|
|
99
|
+
Then add to your Claude Desktop config:
|
|
85
100
|
|
|
101
|
+
**Mac** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
86
102
|
```json
|
|
87
103
|
{
|
|
88
104
|
"mcpServers": {
|
|
89
|
-
"freshcontext
|
|
105
|
+
"freshcontext": {
|
|
90
106
|
"command": "node",
|
|
91
|
-
"args": ["/
|
|
107
|
+
"args": ["/Users/YOUR_USERNAME/path/to/freshcontext-mcp/dist/server.js"]
|
|
92
108
|
}
|
|
93
109
|
}
|
|
94
110
|
}
|
|
95
111
|
```
|
|
96
112
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
### Or use the Cloudflare edge deployment (no install needed)
|
|
100
|
-
|
|
113
|
+
**Windows** (`%APPDATA%\Claude\claude_desktop_config.json`):
|
|
101
114
|
```json
|
|
102
115
|
{
|
|
103
116
|
"mcpServers": {
|
|
104
|
-
"freshcontext
|
|
105
|
-
"command": "
|
|
106
|
-
"args": ["-
|
|
117
|
+
"freshcontext": {
|
|
118
|
+
"command": "node",
|
|
119
|
+
"args": ["C:\\Users\\YOUR_USERNAME\\path\\to\\freshcontext-mcp\\dist\\server.js"]
|
|
107
120
|
}
|
|
108
121
|
}
|
|
109
122
|
}
|
|
110
123
|
```
|
|
111
124
|
|
|
125
|
+
Restart Claude Desktop.
|
|
126
|
+
|
|
127
|
+
---
|
|
128
|
+
|
|
129
|
+
### Troubleshooting (Mac)
|
|
130
|
+
|
|
131
|
+
**"command not found: node"** — Node isn't on your PATH inside Claude Desktop's environment. Use the full path:
|
|
132
|
+
```bash
|
|
133
|
+
which node # copy this output
|
|
134
|
+
```
|
|
135
|
+
Then replace `"command": "node"` with `"command": "/usr/local/bin/node"` (or whatever `which node` returned).
|
|
136
|
+
|
|
137
|
+
**"npx: command not found"** — Same issue. Run `which npx` and use the full path for Option A:
|
|
138
|
+
```json
|
|
139
|
+
"command": "/usr/local/bin/npx"
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**Config file doesn't exist** — Create it. On Mac:
|
|
143
|
+
```bash
|
|
144
|
+
mkdir -p ~/Library/Application\ Support/Claude
|
|
145
|
+
touch ~/Library/Application\ Support/Claude/claude_desktop_config.json
|
|
146
|
+
```
|
|
147
|
+
Then paste the config JSON above into it.
|
|
148
|
+
|
|
112
149
|
---
|
|
113
150
|
|
|
114
151
|
## Usage Examples
|
|
@@ -162,12 +199,12 @@ This makes freshness **verifiable**, not assumed.
|
|
|
162
199
|
Uses headless Chromium via Playwright. Full browser rendering for JavaScript-heavy sites.
|
|
163
200
|
|
|
164
201
|
### Cloud (Cloudflare Workers)
|
|
165
|
-
The `worker/` directory contains a Cloudflare Workers deployment
|
|
202
|
+
The `worker/` directory contains a Cloudflare Workers deployment. No Playwright dependency — runs at the edge globally.
|
|
166
203
|
|
|
167
204
|
```bash
|
|
168
205
|
cd worker
|
|
169
206
|
npm install
|
|
170
|
-
npx wrangler secret put
|
|
207
|
+
npx wrangler secret put API_KEY
|
|
171
208
|
npx wrangler deploy
|
|
172
209
|
```
|
|
173
210
|
|
|
@@ -180,15 +217,16 @@ freshcontext-mcp/
|
|
|
180
217
|
├── src/
|
|
181
218
|
│ ├── server.ts # MCP server, all tool registrations
|
|
182
219
|
│ ├── types.ts # FreshContext interfaces
|
|
220
|
+
│ ├── security.ts # Input validation, domain allowlists
|
|
183
221
|
│ ├── adapters/
|
|
184
|
-
│ │ ├── github.ts
|
|
185
|
-
│ │ ├── hackernews.ts
|
|
186
|
-
│ │ ├── scholar.ts
|
|
187
|
-
│ │ ├── yc.ts
|
|
188
|
-
│ │ ├── repoSearch.ts
|
|
189
|
-
│ │ └── packageTrends.ts
|
|
222
|
+
│ │ ├── github.ts
|
|
223
|
+
│ │ ├── hackernews.ts
|
|
224
|
+
│ │ ├── scholar.ts
|
|
225
|
+
│ │ ├── yc.ts
|
|
226
|
+
│ │ ├── repoSearch.ts
|
|
227
|
+
│ │ └── packageTrends.ts
|
|
190
228
|
│ └── tools/
|
|
191
|
-
│ └── freshnessStamp.ts
|
|
229
|
+
│ └── freshnessStamp.ts
|
|
192
230
|
└── worker/ # Cloudflare Workers deployment
|
|
193
231
|
└── src/worker.ts
|
|
194
232
|
```
|
|
@@ -205,17 +243,17 @@ freshcontext-mcp/
|
|
|
205
243
|
- [x] npm/PyPI package trends
|
|
206
244
|
- [x] `extract_landscape` composite tool
|
|
207
245
|
- [x] Cloudflare Workers deployment
|
|
246
|
+
- [x] Worker auth + rate limiting + domain allowlists
|
|
208
247
|
- [ ] Product Hunt launches adapter
|
|
209
|
-
- [ ]
|
|
248
|
+
- [ ] Finance/market data adapter
|
|
210
249
|
- [ ] TTL-based caching layer
|
|
211
250
|
- [ ] `freshness_score` numeric metric
|
|
212
|
-
- [ ] Webhook support for real-time updates
|
|
213
251
|
|
|
214
252
|
---
|
|
215
253
|
|
|
216
254
|
## Contributing
|
|
217
255
|
|
|
218
|
-
PRs welcome. New adapters are the highest-value contribution — see
|
|
256
|
+
PRs welcome. New adapters are the highest-value contribution — see `src/adapters/` for the pattern. Each adapter returns `{ raw, content_date, freshness_confidence }`.
|
|
219
257
|
|
|
220
258
|
---
|
|
221
259
|
|
package/dist/server.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "freshcontext-mcp",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.5",
|
|
4
4
|
"description": "Real-time web extraction MCP server with freshness timestamps for AI agents",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"mcp",
|
|
@@ -24,6 +24,9 @@
|
|
|
24
24
|
"license": "MIT",
|
|
25
25
|
"type": "module",
|
|
26
26
|
"main": "dist/server.js",
|
|
27
|
+
"bin": {
|
|
28
|
+
"freshcontext-mcp": "dist/server.js"
|
|
29
|
+
},
|
|
27
30
|
"scripts": {
|
|
28
31
|
"build": "tsc",
|
|
29
32
|
"dev": "tsx watch src/server.ts",
|
package/src/server.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
3
|
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
4
|
import { z } from "zod";
|
|
@@ -202,3 +203,4 @@ async function main() {
|
|
|
202
203
|
}
|
|
203
204
|
|
|
204
205
|
main().catch(console.error);
|
|
206
|
+
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
3
|
+
import { z } from "zod";
|
|
4
|
+
import { githubAdapter } from "./adapters/github.js";
|
|
5
|
+
import { scholarAdapter } from "./adapters/scholar.js";
|
|
6
|
+
import { hackerNewsAdapter } from "./adapters/hackernews.js";
|
|
7
|
+
import { ycAdapter } from "./adapters/yc.js";
|
|
8
|
+
import { repoSearchAdapter } from "./adapters/repoSearch.js";
|
|
9
|
+
import { packageTrendsAdapter } from "./adapters/packageTrends.js";
|
|
10
|
+
import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
|
|
11
|
+
import { SecurityError, formatSecurityError } from "./security.js";
|
|
12
|
+
|
|
13
|
+
const server = new McpServer({
|
|
14
|
+
name: "freshcontext-mcp",
|
|
15
|
+
version: "0.1.0",
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
// ─── Tool: extract_github ────────────────────────────────────────────────────
|
|
19
|
+
server.registerTool(
|
|
20
|
+
"extract_github",
|
|
21
|
+
{
|
|
22
|
+
description:
|
|
23
|
+
"Extract real-time data from a GitHub repository — README, stars, forks, language, topics, last commit. Returns timestamped freshcontext.",
|
|
24
|
+
inputSchema: z.object({
|
|
25
|
+
url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
|
|
26
|
+
max_length: z.number().optional().default(6000).describe("Max content length"),
|
|
27
|
+
}),
|
|
28
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
29
|
+
},
|
|
30
|
+
async ({ url, max_length }) => {
|
|
31
|
+
try {
|
|
32
|
+
const result = await githubAdapter({ url, maxLength: max_length });
|
|
33
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
|
|
34
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
35
|
+
} catch (err) {
|
|
36
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
);
|
|
40
|
+
|
|
41
|
+
// ─── Tool: extract_scholar ───────────────────────────────────────────────────
|
|
42
|
+
server.registerTool(
|
|
43
|
+
"extract_scholar",
|
|
44
|
+
{
|
|
45
|
+
description:
|
|
46
|
+
"Extract research results from a Google Scholar search URL. Returns titles, authors, publication years, and snippets — all timestamped.",
|
|
47
|
+
inputSchema: z.object({
|
|
48
|
+
url: z.string().url().describe("Google Scholar search URL e.g. https://scholar.google.com/scholar?q=..."),
|
|
49
|
+
max_length: z.number().optional().default(6000),
|
|
50
|
+
}),
|
|
51
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
52
|
+
},
|
|
53
|
+
async ({ url, max_length }) => {
|
|
54
|
+
try {
|
|
55
|
+
const result = await scholarAdapter({ url, maxLength: max_length });
|
|
56
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
|
|
57
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
58
|
+
} catch (err) {
|
|
59
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
);
|
|
63
|
+
|
|
64
|
+
// ─── Tool: extract_hackernews ────────────────────────────────────────────────
|
|
65
|
+
server.registerTool(
|
|
66
|
+
"extract_hackernews",
|
|
67
|
+
{
|
|
68
|
+
description:
|
|
69
|
+
"Extract top stories or search results from Hacker News. Real-time dev/tech community sentiment with post timestamps.",
|
|
70
|
+
inputSchema: z.object({
|
|
71
|
+
url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com or https://hn.algolia.com/?q=..."),
|
|
72
|
+
max_length: z.number().optional().default(4000),
|
|
73
|
+
}),
|
|
74
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
75
|
+
},
|
|
76
|
+
async ({ url, max_length }) => {
|
|
77
|
+
try {
|
|
78
|
+
const result = await hackerNewsAdapter({ url, maxLength: max_length });
|
|
79
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
|
|
80
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
81
|
+
} catch (err) {
|
|
82
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
);
|
|
86
|
+
|
|
87
|
+
// ─── Tool: extract_yc ──────────────────────────────────────────────────────────
|
|
88
|
+
server.registerTool(
|
|
89
|
+
"extract_yc",
|
|
90
|
+
{
|
|
91
|
+
description:
|
|
92
|
+
"Scrape YC company listings. Use https://www.ycombinator.com/companies?query=KEYWORD to find startups in a space. Returns name, batch, tags, description per company with freshness timestamp.",
|
|
93
|
+
inputSchema: z.object({
|
|
94
|
+
url: z.string().url().describe("YC companies URL e.g. https://www.ycombinator.com/companies?query=mcp"),
|
|
95
|
+
max_length: z.number().optional().default(6000),
|
|
96
|
+
}),
|
|
97
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
98
|
+
},
|
|
99
|
+
async ({ url, max_length }) => {
|
|
100
|
+
try {
|
|
101
|
+
const result = await ycAdapter({ url, maxLength: max_length });
|
|
102
|
+
const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
|
|
103
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
104
|
+
} catch (err) {
|
|
105
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
// ─── Tool: search_repos ──────────────────────────────────────────────────────
|
|
111
|
+
server.registerTool(
|
|
112
|
+
"search_repos",
|
|
113
|
+
{
|
|
114
|
+
description:
|
|
115
|
+
"Search GitHub for repositories matching a keyword or topic. Returns top results by stars with activity signals. Use to find competitors, similar tools, or related projects.",
|
|
116
|
+
inputSchema: z.object({
|
|
117
|
+
query: z.string().describe("Search query e.g. 'mcp server typescript' or 'cashflow prediction python'"),
|
|
118
|
+
max_length: z.number().optional().default(6000),
|
|
119
|
+
}),
|
|
120
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
121
|
+
},
|
|
122
|
+
async ({ query, max_length }) => {
|
|
123
|
+
try {
|
|
124
|
+
const result = await repoSearchAdapter({ url: query, maxLength: max_length });
|
|
125
|
+
const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
|
|
126
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
127
|
+
} catch (err) {
|
|
128
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
// ─── Tool: package_trends ────────────────────────────────────────────────────
|
|
134
|
+
server.registerTool(
|
|
135
|
+
"package_trends",
|
|
136
|
+
{
|
|
137
|
+
description:
|
|
138
|
+
"Look up npm and PyPI package metadata — version history, release cadence, last updated. Use to gauge ecosystem activity around a tool or dependency. Supports comma-separated list of packages.",
|
|
139
|
+
inputSchema: z.object({
|
|
140
|
+
packages: z.string().describe("Package name(s) e.g. 'langchain' or 'npm:zod,pypi:fastapi'"),
|
|
141
|
+
max_length: z.number().optional().default(5000),
|
|
142
|
+
}),
|
|
143
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
144
|
+
},
|
|
145
|
+
async ({ packages, max_length }) => {
|
|
146
|
+
try {
|
|
147
|
+
const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
|
|
148
|
+
const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
|
|
149
|
+
return { content: [{ type: "text", text: formatForLLM(ctx) }] };
|
|
150
|
+
} catch (err) {
|
|
151
|
+
return { content: [{ type: "text", text: formatSecurityError(err) }] };
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
);
|
|
155
|
+
|
|
156
|
+
// ─── Tool: extract_landscape ─────────────────────────────────────────────────
|
|
157
|
+
server.registerTool(
|
|
158
|
+
"extract_landscape",
|
|
159
|
+
{
|
|
160
|
+
description:
|
|
161
|
+
"Composite intelligence tool. Given a project idea or keyword, simultaneously queries YC startups, GitHub repos, HN sentiment, and package activity to answer: Who is building this? Is it funded? What's getting traction? Returns a unified timestamped landscape report.",
|
|
162
|
+
inputSchema: z.object({
|
|
163
|
+
topic: z.string().describe("Your project idea or keyword e.g. 'mcp server' or 'cashflow prediction'"),
|
|
164
|
+
max_length: z.number().optional().default(8000),
|
|
165
|
+
}),
|
|
166
|
+
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
167
|
+
},
|
|
168
|
+
async ({ topic, max_length }) => {
|
|
169
|
+
const perSection = Math.floor((max_length ?? 8000) / 4);
|
|
170
|
+
|
|
171
|
+
const [ycResult, repoResult, hnResult, pkgResult] = await Promise.allSettled([
|
|
172
|
+
ycAdapter({ url: `https://www.ycombinator.com/companies?query=${encodeURIComponent(topic)}`, maxLength: perSection }),
|
|
173
|
+
repoSearchAdapter({ url: topic, maxLength: perSection }),
|
|
174
|
+
hackerNewsAdapter({ url: `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(topic)}&tags=story&hitsPerPage=15`, maxLength: perSection }),
|
|
175
|
+
packageTrendsAdapter({ url: topic, maxLength: perSection }),
|
|
176
|
+
]);
|
|
177
|
+
|
|
178
|
+
const section = (label: string, result: PromiseSettledResult<{ raw: string; content_date: string | null; freshness_confidence: string }>) =>
|
|
179
|
+
result.status === "fulfilled"
|
|
180
|
+
? `## ${label}\n${result.value.raw}`
|
|
181
|
+
: `## ${label}\n[Error: ${(result as PromiseRejectedResult).reason}]`;
|
|
182
|
+
|
|
183
|
+
const combined = [
|
|
184
|
+
`# Landscape Report: "${topic}"`,
|
|
185
|
+
`Generated: ${new Date().toISOString()}`,
|
|
186
|
+
"",
|
|
187
|
+
section("🚀 YC Startups in this space", ycResult),
|
|
188
|
+
section("📦 Top GitHub repos", repoResult),
|
|
189
|
+
section("💬 HN sentiment (last month)", hnResult),
|
|
190
|
+
section("📊 Package ecosystem", pkgResult),
|
|
191
|
+
].join("\n\n");
|
|
192
|
+
|
|
193
|
+
return { content: [{ type: "text", text: combined }] };
|
|
194
|
+
}
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
// ─── Start ───────────────────────────────────────────────────────────────────
|
|
198
|
+
async function main() {
|
|
199
|
+
const transport = new StdioServerTransport();
|
|
200
|
+
await server.connect(transport);
|
|
201
|
+
console.error("freshcontext-mcp running on stdio");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
main().catch(console.error);
|
package/worker/src/worker.ts
CHANGED
|
@@ -3,10 +3,11 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
|
3
3
|
import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
|
|
4
4
|
import { z } from "zod";
|
|
5
5
|
|
|
6
|
-
// ─── Types
|
|
6
|
+
// ─── Types ────────────────────────────────────────────────────────────────────
|
|
7
7
|
|
|
8
8
|
interface Env {
|
|
9
9
|
BROWSER: Fetcher;
|
|
10
|
+
API_KEY?: string; // Optional: set via `wrangler secret put API_KEY`
|
|
10
11
|
}
|
|
11
12
|
|
|
12
13
|
interface FreshContext {
|
|
@@ -18,9 +19,143 @@ interface FreshContext {
|
|
|
18
19
|
adapter: string;
|
|
19
20
|
}
|
|
20
21
|
|
|
21
|
-
// ───
|
|
22
|
+
// ─── Security ─────────────────────────────────────────────────────────────────
|
|
22
23
|
|
|
23
|
-
|
|
24
|
+
const ALLOWED_DOMAINS: Record<string, string[]> = {
|
|
25
|
+
github: ["github.com", "raw.githubusercontent.com"],
|
|
26
|
+
scholar: ["scholar.google.com"],
|
|
27
|
+
hackernews: ["news.ycombinator.com", "hn.algolia.com"],
|
|
28
|
+
yc: ["www.ycombinator.com", "ycombinator.com"],
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const PRIVATE_IP_PATTERNS = [
|
|
32
|
+
/^localhost$/i,
|
|
33
|
+
/^127\./,
|
|
34
|
+
/^10\./,
|
|
35
|
+
/^192\.168\./,
|
|
36
|
+
/^172\.(1[6-9]|2\d|3[01])\./,
|
|
37
|
+
/^169\.254\./,
|
|
38
|
+
/^::1$/,
|
|
39
|
+
/^fc00:/i,
|
|
40
|
+
/^fe80:/i,
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const MAX_URL_LENGTH = 500;
|
|
44
|
+
const MAX_QUERY_LENGTH = 200;
|
|
45
|
+
|
|
46
|
+
class SecurityError extends Error {
|
|
47
|
+
constructor(message: string) {
|
|
48
|
+
super(message);
|
|
49
|
+
this.name = "SecurityError";
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function validateUrl(rawUrl: string, adapter: string): string {
|
|
54
|
+
if (rawUrl.length > MAX_URL_LENGTH)
|
|
55
|
+
throw new SecurityError(`URL too long (max ${MAX_URL_LENGTH} chars)`);
|
|
56
|
+
|
|
57
|
+
let parsed: URL;
|
|
58
|
+
try { parsed = new URL(rawUrl); }
|
|
59
|
+
catch { throw new SecurityError("Invalid URL format"); }
|
|
60
|
+
|
|
61
|
+
if (!["http:", "https:"].includes(parsed.protocol))
|
|
62
|
+
throw new SecurityError("Only http/https URLs are allowed");
|
|
63
|
+
|
|
64
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
65
|
+
|
|
66
|
+
for (const pattern of PRIVATE_IP_PATTERNS) {
|
|
67
|
+
if (pattern.test(hostname))
|
|
68
|
+
throw new SecurityError("Access to private/internal addresses is not allowed");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const allowed = ALLOWED_DOMAINS[adapter];
|
|
72
|
+
if (allowed && allowed.length > 0) {
|
|
73
|
+
const ok = allowed.some(d => hostname === d || hostname.endsWith(`.${d}`));
|
|
74
|
+
if (!ok)
|
|
75
|
+
throw new SecurityError(`URL not allowed for ${adapter}. Allowed domains: ${allowed.join(", ")}`);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
return rawUrl;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function sanitizeQuery(query: string, maxLen = MAX_QUERY_LENGTH): string {
|
|
82
|
+
if (query.length > maxLen)
|
|
83
|
+
throw new SecurityError(`Query too long (max ${maxLen} chars)`);
|
|
84
|
+
// Strip null bytes and control characters
|
|
85
|
+
return query.replace(/[\x00-\x1F\x7F]/g, "").trim();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ─── Rate Limiting (in-memory, per isolate) ───────────────────────────────────
|
|
89
|
+
|
|
90
|
+
interface RateEntry { count: number; windowStart: number; }
|
|
91
|
+
const rateMap = new Map<string, RateEntry>();
|
|
92
|
+
|
|
93
|
+
const RATE_LIMIT = 20; // max requests
|
|
94
|
+
const RATE_WINDOW_MS = 60_000; // per 60 seconds
|
|
95
|
+
|
|
96
|
+
function checkRateLimit(ip: string): void {
|
|
97
|
+
const now = Date.now();
|
|
98
|
+
const entry = rateMap.get(ip);
|
|
99
|
+
|
|
100
|
+
if (!entry || now - entry.windowStart > RATE_WINDOW_MS) {
|
|
101
|
+
rateMap.set(ip, { count: 1, windowStart: now });
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (entry.count >= RATE_LIMIT) {
|
|
106
|
+
throw new SecurityError(`Rate limit exceeded. Max ${RATE_LIMIT} requests per minute.`);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
entry.count++;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Prevent the map from growing unboundedly
|
|
113
|
+
function pruneRateMap(): void {
|
|
114
|
+
const now = Date.now();
|
|
115
|
+
for (const [ip, entry] of rateMap) {
|
|
116
|
+
if (now - entry.windowStart > RATE_WINDOW_MS) rateMap.delete(ip);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// ─── Auth ─────────────────────────────────────────────────────────────────────
|
|
121
|
+
|
|
122
|
+
function checkAuth(request: Request, env: Env): void {
|
|
123
|
+
if (!env.API_KEY) return; // Auth disabled if no key is set
|
|
124
|
+
|
|
125
|
+
const authHeader = request.headers.get("Authorization") ?? "";
|
|
126
|
+
const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
|
|
127
|
+
|
|
128
|
+
if (token !== env.API_KEY) {
|
|
129
|
+
throw new SecurityError("Unauthorized. Provide a valid Bearer token.");
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// ─── Helpers ──────────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
function getClientIp(request: Request): string {
|
|
136
|
+
return (
|
|
137
|
+
request.headers.get("CF-Connecting-IP") ??
|
|
138
|
+
request.headers.get("X-Forwarded-For")?.split(",")[0]?.trim() ??
|
|
139
|
+
"unknown"
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
function securityErrorResponse(message: string, status: number): Response {
|
|
144
|
+
return new Response(JSON.stringify({ error: message }), {
|
|
145
|
+
status,
|
|
146
|
+
headers: { "Content-Type": "application/json" },
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// ─── Freshness Stamp ──────────────────────────────────────────────────────────
|
|
151
|
+
|
|
152
|
+
function stamp(
|
|
153
|
+
content: string,
|
|
154
|
+
url: string,
|
|
155
|
+
date: string | null,
|
|
156
|
+
confidence: "high" | "medium" | "low",
|
|
157
|
+
adapter: string
|
|
158
|
+
): string {
|
|
24
159
|
const ctx: FreshContext = {
|
|
25
160
|
content: content.slice(0, 6000),
|
|
26
161
|
source_url: url,
|
|
@@ -44,107 +179,133 @@ function stamp(content: string, url: string, date: string | null, confidence: "h
|
|
|
44
179
|
// ─── Server Factory ───────────────────────────────────────────────────────────
|
|
45
180
|
|
|
46
181
|
function createServer(env: Env): McpServer {
|
|
47
|
-
const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.
|
|
182
|
+
const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.3" });
|
|
48
183
|
|
|
49
184
|
// ── extract_github ──────────────────────────────────────────────────────────
|
|
50
185
|
server.registerTool("extract_github", {
|
|
51
186
|
description: "Extract real-time data from a GitHub repository — README, stars, forks, last commit, topics. Returns timestamped freshcontext.",
|
|
52
187
|
inputSchema: z.object({
|
|
53
|
-
url: z.string().url().describe("Full GitHub repo URL"),
|
|
188
|
+
url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
|
|
54
189
|
}),
|
|
55
190
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
56
191
|
}, async ({ url }) => {
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
192
|
+
try {
|
|
193
|
+
const safeUrl = validateUrl(url, "github");
|
|
194
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
195
|
+
const page = await browser.newPage();
|
|
196
|
+
await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
|
|
197
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
198
|
+
|
|
199
|
+
const data = await page.evaluate(`(function() {
|
|
200
|
+
var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
|
|
201
|
+
var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
|
|
202
|
+
var stars = starsEl ? starsEl.textContent.trim() : null;
|
|
203
|
+
var forksEl = document.querySelector('[id="repo-network-counter"]');
|
|
204
|
+
var forks = forksEl ? forksEl.textContent.trim() : null;
|
|
205
|
+
var commitEl = document.querySelector('relative-time');
|
|
206
|
+
var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
|
|
207
|
+
var descEl = document.querySelector('.f4.my-3');
|
|
208
|
+
var description = descEl ? descEl.textContent.trim() : null;
|
|
209
|
+
var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
|
|
210
|
+
var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
|
|
211
|
+
var language = langEl ? langEl.textContent.trim() : null;
|
|
212
|
+
return { readme, stars, forks, lastCommit, description, topics, language };
|
|
213
|
+
})()`);
|
|
214
|
+
|
|
215
|
+
await browser.close();
|
|
216
|
+
const d = data as any;
|
|
217
|
+
const raw = [
|
|
218
|
+
`Description: ${d.description ?? "N/A"}`,
|
|
219
|
+
`Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`,
|
|
220
|
+
`Language: ${d.language ?? "N/A"}`,
|
|
221
|
+
`Last commit: ${d.lastCommit ?? "N/A"}`,
|
|
222
|
+
`Topics: ${d.topics?.join(", ") ?? "none"}`,
|
|
223
|
+
`\n--- README ---\n${d.readme ?? "No README"}`,
|
|
224
|
+
].join("\n");
|
|
225
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
|
|
226
|
+
} catch (err: any) {
|
|
227
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
228
|
+
}
|
|
82
229
|
});
|
|
83
230
|
|
|
84
231
|
// ── extract_hackernews ──────────────────────────────────────────────────────
|
|
85
232
|
server.registerTool("extract_hackernews", {
|
|
86
|
-
description: "Extract top stories from Hacker News with real-time timestamps.",
|
|
87
|
-
inputSchema: z.object({ url: z.string().url().describe("HN URL") }),
|
|
233
|
+
description: "Extract top stories or search results from Hacker News with real-time timestamps.",
|
|
234
|
+
inputSchema: z.object({ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com") }),
|
|
88
235
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
89
236
|
}, async ({ url }) => {
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
var
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
237
|
+
try {
|
|
238
|
+
const safeUrl = validateUrl(url, "hackernews");
|
|
239
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
240
|
+
const page = await browser.newPage();
|
|
241
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
242
|
+
|
|
243
|
+
const data = await page.evaluate(`(function() {
|
|
244
|
+
var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
|
|
245
|
+
return items.map(function(el) {
|
|
246
|
+
var titleLineEl = el.querySelector('.titleline > a');
|
|
247
|
+
var title = titleLineEl ? titleLineEl.textContent.trim() : null;
|
|
248
|
+
var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
|
|
249
|
+
var subtext = el.nextElementSibling;
|
|
250
|
+
var scoreEl = subtext ? subtext.querySelector('.score') : null;
|
|
251
|
+
var score = scoreEl ? scoreEl.textContent.trim() : null;
|
|
252
|
+
var ageEl = subtext ? subtext.querySelector('.age') : null;
|
|
253
|
+
var age = ageEl ? ageEl.getAttribute('title') : null;
|
|
254
|
+
return { title, link, score, age };
|
|
255
|
+
});
|
|
256
|
+
})()`);
|
|
257
|
+
|
|
258
|
+
await browser.close();
|
|
259
|
+
const items = data as any[];
|
|
260
|
+
const raw = items.map((r, i) =>
|
|
261
|
+
`[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`
|
|
262
|
+
).join("\n\n");
|
|
263
|
+
const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
|
|
264
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, newest, newest ? "high" : "medium", "hackernews") }] };
|
|
265
|
+
} catch (err: any) {
|
|
266
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
267
|
+
}
|
|
114
268
|
});
|
|
115
269
|
|
|
116
270
|
// ── extract_scholar ─────────────────────────────────────────────────────────
|
|
117
271
|
server.registerTool("extract_scholar", {
|
|
118
272
|
description: "Extract research results from Google Scholar with publication dates.",
|
|
119
|
-
inputSchema: z.object({ url: z.string().url().describe("Google Scholar URL") }),
|
|
273
|
+
inputSchema: z.object({ url: z.string().url().describe("Google Scholar search URL") }),
|
|
120
274
|
annotations: { readOnlyHint: true, openWorldHint: true },
|
|
121
275
|
}, async ({ url }) => {
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
var
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
276
|
+
try {
|
|
277
|
+
const safeUrl = validateUrl(url, "scholar");
|
|
278
|
+
const browser = await puppeteer.launch(env.BROWSER);
|
|
279
|
+
const page = await browser.newPage();
|
|
280
|
+
await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
|
|
281
|
+
await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
|
|
282
|
+
|
|
283
|
+
const data = await page.evaluate(`(function() {
|
|
284
|
+
var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
|
|
285
|
+
return items.map(function(el) {
|
|
286
|
+
var titleEl = el.querySelector('.gs_rt');
|
|
287
|
+
var title = titleEl ? titleEl.textContent.trim() : null;
|
|
288
|
+
var authorsEl = el.querySelector('.gs_a');
|
|
289
|
+
var authors = authorsEl ? authorsEl.textContent.trim() : null;
|
|
290
|
+
var snippetEl = el.querySelector('.gs_rs');
|
|
291
|
+
var snippet = snippetEl ? snippetEl.textContent.trim() : null;
|
|
292
|
+
var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
|
|
293
|
+
var year = yearMatch ? yearMatch[0] : null;
|
|
294
|
+
return { title, authors, snippet, year };
|
|
295
|
+
});
|
|
296
|
+
})()`);
|
|
297
|
+
|
|
298
|
+
await browser.close();
|
|
299
|
+
const items = data as any[];
|
|
300
|
+
const raw = items.map((r, i) =>
|
|
301
|
+
`[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`
|
|
302
|
+
).join("\n\n");
|
|
303
|
+
const years = items.map(r => r.year).filter(Boolean).sort().reverse();
|
|
304
|
+
const newest = years[0] ?? null;
|
|
305
|
+
return { content: [{ type: "text", text: stamp(raw, safeUrl, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
|
|
306
|
+
} catch (err: any) {
|
|
307
|
+
return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
|
|
308
|
+
}
|
|
148
309
|
});
|
|
149
310
|
|
|
150
311
|
return server;
|
|
@@ -154,6 +315,23 @@ function createServer(env: Env): McpServer {
|
|
|
154
315
|
|
|
155
316
|
export default {
|
|
156
317
|
async fetch(request: Request, env: Env): Promise<Response> {
|
|
318
|
+
// Prune stale rate limit entries occasionally
|
|
319
|
+
if (Math.random() < 0.05) pruneRateMap();
|
|
320
|
+
|
|
321
|
+
try {
|
|
322
|
+
// 1. Auth check
|
|
323
|
+
checkAuth(request, env);
|
|
324
|
+
|
|
325
|
+
// 2. Rate limit check
|
|
326
|
+
const ip = getClientIp(request);
|
|
327
|
+
checkRateLimit(ip);
|
|
328
|
+
|
|
329
|
+
} catch (err: any) {
|
|
330
|
+
const status = err.message.startsWith("Unauthorized") ? 401 : 429;
|
|
331
|
+
return securityErrorResponse(err.message, status);
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// 3. Handle MCP request
|
|
157
335
|
const transport = new WebStandardStreamableHTTPServerTransport();
|
|
158
336
|
const server = createServer(env);
|
|
159
337
|
await server.connect(transport);
|