freshcontext-mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,8 @@
1
+ # freshcontext-mcp environment variables
2
+ # Copy to .env and fill in
3
+
4
+ # Optional: GitHub Personal Access Token (increases rate limits for GitHub API fallback)
5
+ GITHUB_TOKEN=
6
+
7
+ # Optional: Proxy URL if needed for certain extractions
8
+ # PROXY_URL=http://user:pass@host:port
package/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # freshcontext-mcp
2
+
3
+ > Real-time web extraction MCP server with guaranteed freshness timestamps for AI agents.
4
+
5
+ ## The Problem
6
+
7
+ LLMs hallucinate recency. They'll cite a 2022 job posting as "current" or recall outdated API docs as if they're live. This happens because they have no reliable signal for *when* data was retrieved vs. when it was published.
8
+
9
+ ## The Fix
10
+
11
+ Every piece of data extracted by `freshcontext-mcp` is wrapped in a `FreshContext` envelope:
12
+
13
+ ```json
14
+ {
15
+ "content": "...",
16
+ "source_url": "https://github.com/owner/repo",
17
+ "content_date": "2024-11-03",
18
+ "retrieved_at": "2026-03-02T10:14:00Z",
19
+ "freshness_confidence": "high",
20
+ "adapter": "github"
21
+ }
22
+ ```
23
+
24
+ The AI agent always knows *when it's looking at*, not just *what*.
25
+
26
+ ## Adapters
27
+
28
+ | Adapter | Tool Name | What it extracts |
29
+ |---|---|---|
30
+ | GitHub | `extract_github` | README, stars, forks, last commit, topics |
31
+ | Google Scholar | `extract_scholar` | Titles, authors, years, snippets |
32
+ | Hacker News | `extract_hackernews` | Top stories, scores, post timestamps |
33
+
34
+ ## Setup
35
+
36
+ ```bash
37
+ git clone https://github.com/YOUR_USERNAME/freshcontext-mcp
38
+ cd freshcontext-mcp
39
+ npm install
40
+ npx playwright install chromium
41
+ npm run build
42
+ ```
43
+
44
+ ## Test locally
45
+
46
+ ```bash
47
+ npm run inspect
48
+ ```
49
+
50
+ ## Connect to Claude
51
+
52
+ Add to your `claude_desktop_config.json`:
53
+
54
+ ```json
55
+ {
56
+ "mcpServers": {
57
+ "freshcontext": {
58
+ "command": "node",
59
+ "args": ["/absolute/path/to/freshcontext-mcp/dist/server.js"]
60
+ }
61
+ }
62
+ }
63
+ ```
64
+
65
+ ## Roadmap
66
+
67
+ - [ ] Twitter/X public feed adapter
68
+ - [ ] Dev.to / Hashnode adapter
69
+ - [ ] Supabase changelog adapter
70
+ - [ ] Cloudflare Worker deployment
71
+ - [ ] Caching layer with TTL
@@ -0,0 +1,41 @@
1
+ import { chromium } from "playwright";
2
+ export async function githubAdapter(options) {
3
+ const browser = await chromium.launch({ headless: true });
4
+ const page = await browser.newPage();
5
+ // Spoof a real browser UA to avoid bot detection
6
+ await page.setExtraHTTPHeaders({
7
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
8
+ });
9
+ await page.goto(options.url, { waitUntil: "domcontentloaded", timeout: 20000 });
10
+ // Extract key repo signals — no inner functions to avoid esbuild __name injection
11
+ const data = await page.evaluate(`(function() {
12
+ var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
13
+ var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
14
+ var stars = starsEl ? starsEl.textContent.trim() : null;
15
+ var forksEl = document.querySelector('[id="repo-network-counter"]');
16
+ var forks = forksEl ? forksEl.textContent.trim() : null;
17
+ var commitEl = document.querySelector('relative-time');
18
+ var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
19
+ var descEl = document.querySelector('.f4.my-3');
20
+ var description = descEl ? descEl.textContent.trim() : null;
21
+ var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
22
+ var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
23
+ var language = langEl ? langEl.textContent.trim() : null;
24
+ return { readme: readme, stars: stars, forks: forks, lastCommit: lastCommit, description: description, topics: topics, language: language };
25
+ })()`);
26
+ const typedData = data;
27
+ await browser.close();
28
+ const raw = [
29
+ `Description: ${typedData.description ?? "N/A"}`,
30
+ `Stars: ${typedData.stars ?? "N/A"} | Forks: ${typedData.forks ?? "N/A"}`,
31
+ `Language: ${typedData.language ?? "N/A"}`,
32
+ `Last commit: ${typedData.lastCommit ?? "N/A"}`,
33
+ `Topics: ${typedData.topics?.join(", ") ?? "none"}`,
34
+ `\n--- README ---\n${typedData.readme ?? "No README found"}`,
35
+ ].join("\n");
36
+ return {
37
+ raw,
38
+ content_date: typedData.lastCommit ?? null,
39
+ freshness_confidence: typedData.lastCommit ? "high" : "medium",
40
+ };
41
+ }
@@ -0,0 +1,65 @@
1
+ import { chromium } from "playwright";
2
+ export async function hackerNewsAdapter(options) {
3
+ // If it's an Algolia API URL or search query, use the REST API directly (no browser)
4
+ const url = options.url;
5
+ if (url.includes("hn.algolia.com/api/") || url.startsWith("hn-search:")) {
6
+ const query = url.startsWith("hn-search:")
7
+ ? url.replace("hn-search:", "").trim()
8
+ : url;
9
+ const apiUrl = url.includes("hn.algolia.com/api/")
10
+ ? url
11
+ : `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(query)}&tags=story&hitsPerPage=20`;
12
+ const res = await fetch(apiUrl);
13
+ if (!res.ok)
14
+ throw new Error(`HN Algolia API error: ${res.status}`);
15
+ const data = await res.json();
16
+ const raw = data.hits
17
+ .map((r, i) => [
18
+ `[${i + 1}] ${r.title ?? "Untitled"}`,
19
+ `URL: ${r.url ?? `https://news.ycombinator.com/item?id=${r.objectID}`}`,
20
+ `Score: ${r.points} points | ${r.num_comments} comments`,
21
+ `Author: ${r.author} | Posted: ${r.created_at}`,
22
+ ].join("\n"))
23
+ .join("\n\n")
24
+ .slice(0, options.maxLength ?? 4000);
25
+ const newest = data.hits.map((r) => r.created_at).sort().reverse()[0] ?? null;
26
+ return { raw, content_date: newest, freshness_confidence: newest ? "high" : "medium" };
27
+ }
28
+ // Default: browser-based scrape for HN front page or search pages
29
+ const browser = await chromium.launch({ headless: true });
30
+ const page = await browser.newPage();
31
+ await page.goto(url, { waitUntil: "domcontentloaded", timeout: 20000 });
32
+ const data = await page.evaluate(`(function() {
33
+ var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
34
+ var results = items.map(function(el) {
35
+ var titleLineEl = el.querySelector('.titleline > a');
36
+ var title = titleLineEl ? titleLineEl.textContent.trim() : null;
37
+ var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
38
+ var subtext = el.nextElementSibling;
39
+ var scoreEl = subtext ? subtext.querySelector('.score') : null;
40
+ var score = scoreEl ? scoreEl.textContent.trim() : null;
41
+ var ageEl = subtext ? subtext.querySelector('.age') : null;
42
+ var age = ageEl ? ageEl.getAttribute('title') : null;
43
+ var anchors = subtext ? subtext.querySelectorAll('a') : [];
44
+ var commentLink = anchors.length > 0 ? anchors[anchors.length - 1].textContent.trim() : null;
45
+ return { title: title, link: link, score: score, age: age, commentLink: commentLink };
46
+ });
47
+ return results;
48
+ })()`);
49
+ await browser.close();
50
+ const typedData = data;
51
+ const raw = typedData
52
+ .map((r, i) => [
53
+ `[${i + 1}] ${r.title ?? "Untitled"}`,
54
+ `URL: ${r.link ?? "N/A"}`,
55
+ `Score: ${r.score ?? "N/A"} | ${r.commentLink ?? ""}`,
56
+ `Posted: ${r.age ?? "unknown"}`,
57
+ ].join("\n"))
58
+ .join("\n\n");
59
+ const newestDate = typedData.map((r) => r.age).filter(Boolean).sort().reverse()[0] ?? null;
60
+ return {
61
+ raw,
62
+ content_date: newestDate,
63
+ freshness_confidence: newestDate ? "high" : "medium",
64
+ };
65
+ }
@@ -0,0 +1,75 @@
1
+ // Uses npm registry API + PyPI JSON API (no auth needed)
2
+ export async function packageTrendsAdapter(options) {
3
+ // options.url is the package name or a comma-separated list
4
+ // e.g. "langchain" or "npm:langchain" or "pypi:langchain"
5
+ const raw_input = options.url.replace(/^https?:\/\//, "").trim();
6
+ // Parse ecosystem prefix
7
+ const parts = raw_input.split(",").map((s) => s.trim());
8
+ const results = [];
9
+ let latestDate = null;
10
+ for (const pkg of parts) {
11
+ const isExplicitPypi = pkg.startsWith("pypi:");
12
+ const isExplicitNpm = pkg.startsWith("npm:");
13
+ const pkgName = pkg.replace(/^(pypi:|npm:)/, "");
14
+ // Try npm
15
+ if (!isExplicitPypi) {
16
+ try {
17
+ const npmRes = await fetch(`https://registry.npmjs.org/${encodeURIComponent(pkgName)}`, {
18
+ headers: { Accept: "application/json" },
19
+ });
20
+ if (npmRes.ok) {
21
+ const npmData = await npmRes.json();
22
+ const latestVersion = npmData["dist-tags"]?.latest ?? "unknown";
23
+ const modified = npmData.time?.modified ?? null;
24
+ const created = npmData.time?.created ?? null;
25
+ const versions = Object.keys(npmData.time ?? {}).filter((k) => !["created", "modified"].includes(k)).length;
26
+ if (modified && (!latestDate || modified > latestDate))
27
+ latestDate = modified;
28
+ results.push([
29
+ `📦 [npm] ${npmData.name}`,
30
+ `Latest version: ${latestVersion}`,
31
+ `Total versions: ${versions}`,
32
+ `Description: ${npmData.description ?? "N/A"}`,
33
+ `Keywords: ${npmData.keywords?.join(", ") ?? "none"}`,
34
+ `Created: ${created ?? "unknown"}`,
35
+ `Last updated: ${modified ?? "unknown"}`,
36
+ `Homepage: ${npmData.homepage ?? "N/A"}`,
37
+ ].join("\n"));
38
+ continue;
39
+ }
40
+ }
41
+ catch { /* fall through to PyPI */ }
42
+ }
43
+ // Try PyPI
44
+ if (!isExplicitNpm) {
45
+ try {
46
+ const pypiRes = await fetch(`https://pypi.org/pypi/${encodeURIComponent(pkgName)}/json`);
47
+ if (pypiRes.ok) {
48
+ const pypiData = await pypiRes.json();
49
+ const info = pypiData.info;
50
+ const releaseCount = Object.keys(pypiData.releases ?? {}).length;
51
+ const latestUpload = pypiData.urls?.[0]?.upload_time ?? null;
52
+ if (latestUpload && (!latestDate || latestUpload > latestDate))
53
+ latestDate = latestUpload;
54
+ results.push([
55
+ `🐍 [PyPI] ${info.name}`,
56
+ `Latest version: ${info.version}`,
57
+ `Total releases: ${releaseCount}`,
58
+ `Description: ${info.summary ?? "N/A"}`,
59
+ `Keywords: ${info.keywords ?? "none"}`,
60
+ `Last release: ${latestUpload ?? "unknown"}`,
61
+ `Homepage: ${info.home_page ?? info.project_urls?.Homepage ?? "N/A"}`,
62
+ ].join("\n"));
63
+ continue;
64
+ }
65
+ }
66
+ catch { /* not found */ }
67
+ }
68
+ results.push(`❌ Package not found on npm or PyPI: ${pkgName}`);
69
+ }
70
+ return {
71
+ raw: results.join("\n\n").slice(0, options.maxLength ?? 5000),
72
+ content_date: latestDate,
73
+ freshness_confidence: latestDate ? "high" : "low",
74
+ };
75
+ }
@@ -0,0 +1,54 @@
1
+ // Uses GitHub Search API (no auth needed for basic search)
2
+ export async function repoSearchAdapter(options) {
3
+ // options.url is treated as the search query string
4
+ // e.g. "mcp server typescript" or a full GitHub search URL
5
+ let query = options.url;
6
+ // If it's a full URL, extract the query param
7
+ try {
8
+ const parsed = new URL(options.url);
9
+ if (parsed.hostname === "github.com" && parsed.pathname.includes("/search")) {
10
+ query = parsed.searchParams.get("q") ?? options.url;
11
+ }
12
+ else if (parsed.hostname === "github.com") {
13
+ // It's a direct URL — not a search
14
+ query = parsed.pathname.replace("/search", "").trim().replace(/^\//, "");
15
+ }
16
+ }
17
+ catch {
18
+ // plain string query, use as-is
19
+ }
20
+ const apiUrl = `https://api.github.com/search/repositories?q=${encodeURIComponent(query)}&sort=stars&order=desc&per_page=10`;
21
+ const res = await fetch(apiUrl, {
22
+ headers: {
23
+ Accept: "application/vnd.github.v3+json",
24
+ "User-Agent": "freshcontext-mcp/0.1.0",
25
+ },
26
+ });
27
+ if (!res.ok) {
28
+ throw new Error(`GitHub Search API error: ${res.status} ${await res.text()}`);
29
+ }
30
+ const data = await res.json();
31
+ const raw = [
32
+ `Total matching repos: ${data.total_count.toLocaleString()}`,
33
+ `Top ${data.items.length} by stars:\n`,
34
+ ...data.items.map((r, i) => [
35
+ `[${i + 1}] ${r.full_name}`,
36
+ `⭐ ${r.stargazers_count.toLocaleString()} stars | 🍴 ${r.forks_count} forks | Issues: ${r.open_issues_count}`,
37
+ `Language: ${r.language ?? "unknown"}`,
38
+ `Topics: ${r.topics?.join(", ") || "none"}`,
39
+ `Description: ${r.description ?? "N/A"}`,
40
+ `Last push: ${r.pushed_at}`,
41
+ `Created: ${r.created_at}`,
42
+ `URL: ${r.html_url}`,
43
+ ].join("\n")),
44
+ ]
45
+ .join("\n\n")
46
+ .slice(0, options.maxLength ?? 6000);
47
+ // Most recently pushed repo date as content_date
48
+ const dates = data.items.map((r) => r.pushed_at).sort().reverse();
49
+ return {
50
+ raw,
51
+ content_date: dates[0] ?? null,
52
+ freshness_confidence: "high",
53
+ };
54
+ }
@@ -0,0 +1,51 @@
1
+ import { chromium } from "playwright";
2
+ export async function scholarAdapter(options) {
3
+ const browser = await chromium.launch({ headless: true });
4
+ const page = await browser.newPage();
5
+ await page.setExtraHTTPHeaders({
6
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
7
+ });
8
+ await page.goto(options.url, { waitUntil: "domcontentloaded", timeout: 20000 });
9
+ const data = await page.evaluate(`(function() {
10
+ var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
11
+ var results = items.map(function(el) {
12
+ var titleEl = el.querySelector('.gs_rt');
13
+ var title = titleEl ? titleEl.textContent.trim() : null;
14
+ var authorsEl = el.querySelector('.gs_a');
15
+ var authors = authorsEl ? authorsEl.textContent.trim() : null;
16
+ var snippetEl = el.querySelector('.gs_rs');
17
+ var snippet = snippetEl ? snippetEl.textContent.trim() : null;
18
+ var linkEl = el.querySelector('.gs_rt a');
19
+ var link = linkEl ? linkEl.getAttribute('href') : null;
20
+ var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
21
+ var year = yearMatch ? yearMatch[0] : null;
22
+ return { title: title, authors: authors, snippet: snippet, link: link, year: year };
23
+ });
24
+ return results;
25
+ })()`);
26
+ await browser.close();
27
+ const typedData = data;
28
+ if (!typedData.length) {
29
+ return {
30
+ raw: "No results found on this Scholar page.",
31
+ content_date: null,
32
+ freshness_confidence: "low",
33
+ };
34
+ }
35
+ const raw = typedData
36
+ .map((r, i) => [
37
+ `[${i + 1}] ${r.title ?? "Untitled"}`,
38
+ `Authors: ${r.authors ?? "Unknown"}`,
39
+ `Year: ${r.year ?? "Unknown"}`,
40
+ `Snippet: ${r.snippet ?? "N/A"}`,
41
+ `Link: ${r.link ?? "N/A"}`,
42
+ ].join("\n"))
43
+ .join("\n\n");
44
+ const years = typedData.map((r) => r.year).filter(Boolean);
45
+ const newestYear = years.sort().reverse()[0] ?? null;
46
+ return {
47
+ raw,
48
+ content_date: newestYear ? `${newestYear}-01-01` : null,
49
+ freshness_confidence: newestYear ? "high" : "low",
50
+ };
51
+ }
@@ -0,0 +1,81 @@
1
+ import { chromium } from "playwright";
2
+ export async function ycAdapter(options) {
3
+ const browser = await chromium.launch({ headless: true });
4
+ const page = await browser.newPage();
5
+ // YC company directory is React-rendered — wait for network to settle
6
+ await page.goto(options.url, { waitUntil: "networkidle", timeout: 30000 });
7
+ // Wait for company cards to appear
8
+ await page.waitForSelector('a[href*="/companies/"]', { timeout: 15000 }).catch(() => null);
9
+ const data = await page.evaluate(`(function() {
10
+ // YC company cards — robust multi-strategy extraction
11
+ var results = [];
12
+
13
+ // Strategy 1: structured company divs with name + description + batch
14
+ var cards = Array.from(document.querySelectorAll('div[class*="_company_"]'));
15
+
16
+ if (cards.length === 0) {
17
+ // Strategy 2: anchor links to /companies/* pages
18
+ cards = Array.from(document.querySelectorAll('a[href*="/companies/"]'))
19
+ .filter(function(el) {
20
+ return el.querySelector('span, p, div');
21
+ });
22
+ }
23
+
24
+ cards.slice(0, 25).forEach(function(el) {
25
+ var allText = el.innerText || el.textContent || "";
26
+ var lines = allText.split('\\n').map(function(l) { return l.trim(); }).filter(Boolean);
27
+
28
+ // Try to find structured spans
29
+ var spans = Array.from(el.querySelectorAll('span'));
30
+ var name = null, description = null, batch = null;
31
+ var tags = [];
32
+
33
+ spans.forEach(function(s) {
34
+ var t = s.textContent.trim();
35
+ if (!t) return;
36
+ if (s.className && s.className.toString().includes('Name')) name = t;
37
+ else if (s.className && s.className.toString().includes('Desc')) description = t;
38
+ else if (s.className && s.className.toString().includes('Batch')) batch = t;
39
+ else if (s.className && s.className.toString().includes('Tag')) tags.push(t);
40
+ });
41
+
42
+ // Fallback to line parsing
43
+ if (!name && lines.length > 0) name = lines[0];
44
+ if (!description && lines.length > 1) description = lines[1];
45
+
46
+ var link = el.tagName === 'A'
47
+ ? el.getAttribute('href')
48
+ : (el.querySelector('a') ? el.querySelector('a').getAttribute('href') : null);
49
+
50
+ if (name && name.length > 1 && name.length < 80) {
51
+ results.push({ name, description, batch, tags, link });
52
+ }
53
+ });
54
+
55
+ return results;
56
+ })()`);
57
+ await browser.close();
58
+ const typedData = data;
59
+ if (!typedData.length) {
60
+ return {
61
+ raw: "No YC companies found — page may have changed structure. Try visiting: " + options.url,
62
+ content_date: null,
63
+ freshness_confidence: "low",
64
+ };
65
+ }
66
+ const raw = typedData
67
+ .map((r, i) => [
68
+ `[${i + 1}] ${r.name ?? "Unknown"}`,
69
+ `Batch: ${r.batch ?? "Unknown"}`,
70
+ `Tags: ${r.tags?.join(", ") || "none"}`,
71
+ `Description: ${r.description ?? "N/A"}`,
72
+ `Link: ${r.link ? (r.link.startsWith("http") ? r.link : "https://www.ycombinator.com" + r.link) : "N/A"}`,
73
+ ].join("\n"))
74
+ .join("\n\n")
75
+ .slice(0, options.maxLength ?? 6000);
76
+ return {
77
+ raw,
78
+ content_date: new Date().toISOString().split("T")[0],
79
+ freshness_confidence: "high",
80
+ };
81
+ }
package/dist/server.js ADDED
@@ -0,0 +1,129 @@
1
+ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
+ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { z } from "zod";
4
+ import { githubAdapter } from "./adapters/github.js";
5
+ import { scholarAdapter } from "./adapters/scholar.js";
6
+ import { hackerNewsAdapter } from "./adapters/hackernews.js";
7
+ import { ycAdapter } from "./adapters/yc.js";
8
+ import { repoSearchAdapter } from "./adapters/repoSearch.js";
9
+ import { packageTrendsAdapter } from "./adapters/packageTrends.js";
10
+ import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
11
+ const server = new McpServer({
12
+ name: "freshcontext-mcp",
13
+ version: "0.1.0",
14
+ });
15
+ // ─── Tool: extract_github ────────────────────────────────────────────────────
16
+ server.registerTool("extract_github", {
17
+ description: "Extract real-time data from a GitHub repository — README, stars, forks, language, topics, last commit. Returns timestamped freshcontext.",
18
+ inputSchema: z.object({
19
+ url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
20
+ max_length: z.number().optional().default(6000).describe("Max content length"),
21
+ }),
22
+ annotations: { readOnlyHint: true, openWorldHint: true },
23
+ }, async ({ url, max_length }) => {
24
+ const result = await githubAdapter({ url, maxLength: max_length });
25
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
26
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
27
+ });
28
+ // ─── Tool: extract_scholar ───────────────────────────────────────────────────
29
+ server.registerTool("extract_scholar", {
30
+ description: "Extract research results from a Google Scholar search URL. Returns titles, authors, publication years, and snippets — all timestamped.",
31
+ inputSchema: z.object({
32
+ url: z.string().url().describe("Google Scholar search URL e.g. https://scholar.google.com/scholar?q=..."),
33
+ max_length: z.number().optional().default(6000),
34
+ }),
35
+ annotations: { readOnlyHint: true, openWorldHint: true },
36
+ }, async ({ url, max_length }) => {
37
+ const result = await scholarAdapter({ url, maxLength: max_length });
38
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
39
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
40
+ });
41
+ // ─── Tool: extract_hackernews ────────────────────────────────────────────────
42
+ server.registerTool("extract_hackernews", {
43
+ description: "Extract top stories or search results from Hacker News. Real-time dev/tech community sentiment with post timestamps.",
44
+ inputSchema: z.object({
45
+ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com or https://hn.algolia.com/?q=..."),
46
+ max_length: z.number().optional().default(4000),
47
+ }),
48
+ annotations: { readOnlyHint: true, openWorldHint: true },
49
+ }, async ({ url, max_length }) => {
50
+ const result = await hackerNewsAdapter({ url, maxLength: max_length });
51
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
52
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
53
+ });
54
+ // ─── Tool: extract_yc ──────────────────────────────────────────────────────────
55
+ server.registerTool("extract_yc", {
56
+ description: "Scrape YC company listings. Use https://www.ycombinator.com/companies?query=KEYWORD to find startups in a space. Returns name, batch, tags, description per company with freshness timestamp.",
57
+ inputSchema: z.object({
58
+ url: z.string().url().describe("YC companies URL e.g. https://www.ycombinator.com/companies?query=mcp"),
59
+ max_length: z.number().optional().default(6000),
60
+ }),
61
+ annotations: { readOnlyHint: true, openWorldHint: true },
62
+ }, async ({ url, max_length }) => {
63
+ const result = await ycAdapter({ url, maxLength: max_length });
64
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
65
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
66
+ });
67
+ // ─── Tool: search_repos ──────────────────────────────────────────────────────
68
+ server.registerTool("search_repos", {
69
+ description: "Search GitHub for repositories matching a keyword or topic. Returns top results by stars with activity signals. Use to find competitors, similar tools, or related projects.",
70
+ inputSchema: z.object({
71
+ query: z.string().describe("Search query e.g. 'mcp server typescript' or 'cashflow prediction python'"),
72
+ max_length: z.number().optional().default(6000),
73
+ }),
74
+ annotations: { readOnlyHint: true, openWorldHint: true },
75
+ }, async ({ query, max_length }) => {
76
+ const result = await repoSearchAdapter({ url: query, maxLength: max_length });
77
+ const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
78
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
79
+ });
80
+ // ─── Tool: package_trends ────────────────────────────────────────────────────
81
+ server.registerTool("package_trends", {
82
+ description: "Look up npm and PyPI package metadata — version history, release cadence, last updated. Use to gauge ecosystem activity around a tool or dependency. Supports comma-separated list of packages.",
83
+ inputSchema: z.object({
84
+ packages: z.string().describe("Package name(s) e.g. 'langchain' or 'npm:zod,pypi:fastapi'"),
85
+ max_length: z.number().optional().default(5000),
86
+ }),
87
+ annotations: { readOnlyHint: true, openWorldHint: true },
88
+ }, async ({ packages, max_length }) => {
89
+ const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
90
+ const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
91
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
92
+ });
93
+ // ─── Tool: extract_landscape ─────────────────────────────────────────────────
94
+ server.registerTool("extract_landscape", {
95
+ description: "Composite intelligence tool. Given a project idea or keyword, simultaneously queries YC startups, GitHub repos, HN sentiment, and package activity to answer: Who is building this? Is it funded? What's getting traction? Returns a unified timestamped landscape report.",
96
+ inputSchema: z.object({
97
+ topic: z.string().describe("Your project idea or keyword e.g. 'mcp server' or 'cashflow prediction'"),
98
+ max_length: z.number().optional().default(8000),
99
+ }),
100
+ annotations: { readOnlyHint: true, openWorldHint: true },
101
+ }, async ({ topic, max_length }) => {
102
+ const perSection = Math.floor((max_length ?? 8000) / 4);
103
+ const [ycResult, repoResult, hnResult, pkgResult] = await Promise.allSettled([
104
+ ycAdapter({ url: `https://www.ycombinator.com/companies?query=${encodeURIComponent(topic)}`, maxLength: perSection }),
105
+ repoSearchAdapter({ url: topic, maxLength: perSection }),
106
+ hackerNewsAdapter({ url: `https://hn.algolia.com/api/v1/search?query=${encodeURIComponent(topic)}&tags=story&hitsPerPage=15`, maxLength: perSection }),
107
+ packageTrendsAdapter({ url: topic, maxLength: perSection }),
108
+ ]);
109
+ const section = (label, result) => result.status === "fulfilled"
110
+ ? `## ${label}\n${result.value.raw}`
111
+ : `## ${label}\n[Error: ${result.reason}]`;
112
+ const combined = [
113
+ `# Landscape Report: "${topic}"`,
114
+ `Generated: ${new Date().toISOString()}`,
115
+ "",
116
+ section("🚀 YC Startups in this space", ycResult),
117
+ section("📦 Top GitHub repos", repoResult),
118
+ section("💬 HN sentiment (last month)", hnResult),
119
+ section("📊 Package ecosystem", pkgResult),
120
+ ].join("\n\n");
121
+ return { content: [{ type: "text", text: combined }] };
122
+ });
123
+ // ─── Start ───────────────────────────────────────────────────────────────────
124
+ async function main() {
125
+ const transport = new StdioServerTransport();
126
+ await server.connect(transport);
127
+ console.error("freshcontext-mcp running on stdio");
128
+ }
129
+ main().catch(console.error);
@@ -0,0 +1,25 @@
1
+ export function stampFreshness(result, options, adapter) {
2
+ return {
3
+ content: result.raw.slice(0, options.maxLength ?? 8000),
4
+ source_url: options.url,
5
+ content_date: result.content_date,
6
+ retrieved_at: new Date().toISOString(),
7
+ freshness_confidence: result.freshness_confidence,
8
+ adapter,
9
+ };
10
+ }
11
+ export function formatForLLM(ctx) {
12
+ const dateInfo = ctx.content_date
13
+ ? `Published: ${ctx.content_date}`
14
+ : "Publish date: unknown";
15
+ return [
16
+ `[FRESHCONTEXT]`,
17
+ `Source: ${ctx.source_url}`,
18
+ `${dateInfo}`,
19
+ `Retrieved: ${ctx.retrieved_at}`,
20
+ `Confidence: ${ctx.freshness_confidence}`,
21
+ `---`,
22
+ ctx.content,
23
+ `[/FRESHCONTEXT]`,
24
+ ].join("\n");
25
+ }
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ // Core data types for freshcontext-mcp
2
+ export {};
package/package.json ADDED
@@ -0,0 +1,27 @@
1
+ {
2
+ "name": "freshcontext-mcp",
3
+ "version": "0.1.0",
4
+ "description": "Real-time web extraction MCP server with freshness timestamps for AI agents",
5
+ "type": "module",
6
+ "main": "dist/server.js",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "dev": "tsx watch src/server.ts",
10
+ "start": "node dist/server.js",
11
+ "inspect": "npx @modelcontextprotocol/inspector tsx src/server.ts",
12
+ "test": "jest"
13
+ },
14
+ "dependencies": {
15
+ "@modelcontextprotocol/sdk": "^1.0.0",
16
+ "playwright": "^1.44.0",
17
+ "zod": "^3.23.0",
18
+ "dotenv": "^16.4.0"
19
+ },
20
+ "devDependencies": {
21
+ "@types/node": "^20.0.0",
22
+ "tsx": "^4.0.0",
23
+ "typescript": "^5.4.0",
24
+ "jest": "^29.0.0",
25
+ "@types/jest": "^29.0.0"
26
+ }
27
+ }