npm - freshcontext-mcp - Versions diffs - 0.1.2 → 0.1.4 - Mend

freshcontext-mcp 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/adapters/github.js +3 -0
package/dist/adapters/hackernews.js +3 -1
package/dist/adapters/packageTrends.js +3 -3
package/dist/adapters/repoSearch.js +4 -3
package/dist/adapters/scholar.js +3 -0
package/dist/adapters/yc.js +3 -0
package/dist/security.js +117 -0
package/dist/server.js +49 -18
package/package.json +1 -1
package/src/adapters/github.ts +4 -0
package/src/adapters/hackernews.ts +3 -1
package/src/adapters/packageTrends.ts +3 -3
package/src/adapters/repoSearch.ts +4 -3
package/src/adapters/scholar.ts +4 -0
package/src/adapters/yc.ts +4 -0
package/src/security.ts +161 -0
package/src/server.ts +43 -18
package/worker/src/worker.ts +261 -83

package/dist/adapters/github.js CHANGED Viewed

@@ -1,5 +1,8 @@
 import { chromium } from "playwright";
+import { validateUrl } from "../security.js";
 export async function githubAdapter(options) {
+    const safeUrl = validateUrl(options.url, "github");
+    options = { ...options, url: safeUrl };
     const browser = await chromium.launch({ headless: true });
     const page = await browser.newPage();
     // Spoof a real browser UA to avoid bot detection

package/dist/adapters/hackernews.js CHANGED Viewed

@@ -1,6 +1,8 @@
 import { chromium } from "playwright";
+import { validateUrl } from "../security.js";
 export async function hackerNewsAdapter(options) {
-    // If it's an Algolia API URL or search query, use the REST API directly (no browser)
+    // Validate URL — allow both HN and Algolia domains
+    validateUrl(options.url, "hackernews");
     const url = options.url;
     if (url.includes("hn.algolia.com/api/") || url.startsWith("hn-search:")) {
         const query = url.startsWith("hn-search:")

package/dist/adapters/packageTrends.js CHANGED Viewed

@@ -1,8 +1,8 @@
+import { sanitizePackages } from "../security.js";
 // Uses npm registry API + PyPI JSON API (no auth needed)
 export async function packageTrendsAdapter(options) {
-    // options.url is the package name or a comma-separated list
-    // e.g. "langchain" or "npm:langchain" or "pypi:langchain"
-    const raw_input = options.url.replace(/^https?:\/\//, "").trim();
+    // Sanitize package input
+    const raw_input = sanitizePackages(options.url.replace(/^https?:\/\//, "").trim());
     // Parse ecosystem prefix
     const parts = raw_input.split(",").map((s) => s.trim());
     const results = [];

package/dist/adapters/repoSearch.js CHANGED Viewed

@@ -1,8 +1,9 @@
+import { sanitizeQuery } from "../security.js";
 // Uses GitHub Search API (no auth needed for basic search)
 export async function repoSearchAdapter(options) {
-    // options.url is treated as the search query string
-    // e.g. "mcp server typescript" or a full GitHub search URL
-    let query = options.url;
+    // Sanitize query input
+    const query_input = sanitizeQuery(options.url);
+    let query = query_input;
     // If it's a full URL, extract the query param
     try {
         const parsed = new URL(options.url);

package/dist/adapters/scholar.js CHANGED Viewed

@@ -1,5 +1,8 @@
 import { chromium } from "playwright";
+import { validateUrl } from "../security.js";
 export async function scholarAdapter(options) {
+    const safeUrl = validateUrl(options.url, "scholar");
+    options = { ...options, url: safeUrl };
     const browser = await chromium.launch({ headless: true });
     const page = await browser.newPage();
     await page.setExtraHTTPHeaders({

package/dist/adapters/yc.js CHANGED Viewed

@@ -1,5 +1,8 @@
 import { chromium } from "playwright";
+import { validateUrl } from "../security.js";
 export async function ycAdapter(options) {
+    const safeUrl = validateUrl(options.url, "yc");
+    options = { ...options, url: safeUrl };
     const browser = await chromium.launch({ headless: true });
     const page = await browser.newPage();
     // YC company directory is React-rendered — wait for network to settle

package/dist/security.js ADDED Viewed

@@ -0,0 +1,117 @@
+/**
+ * freshcontext-mcp security module
+ * Input sanitization, domain allowlists, and request validation
+ */
+// ─── Allowed domains per adapter ────────────────────────────────────────────
+export const ALLOWED_DOMAINS = {
+    github: ["github.com", "raw.githubusercontent.com"],
+    scholar: ["scholar.google.com"],
+    hackernews: ["news.ycombinator.com", "hn.algolia.com"],
+    yc: ["www.ycombinator.com", "ycombinator.com"],
+    repoSearch: [], // uses GitHub API directly, no browser
+    packageTrends: [], // uses npm/PyPI APIs directly, no browser
+};
+// ─── Blocked IP ranges and internal hostnames ────────────────────────────────
+const BLOCKED_PATTERNS = [
+    /^localhost$/i,
+    /^127\.\d+\.\d+\.\d+$/,
+    /^10\.\d+\.\d+\.\d+$/,
+    /^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
+    /^192\.168\.\d+\.\d+$/,
+    /^169\.254\.\d+\.\d+$/, // AWS metadata
+    /^0\.0\.0\.0$/,
+    /^::1$/,
+    /^fc00:/i,
+    /^fe80:/i,
+];
+// ─── Max length limits ────────────────────────────────────────────────────────
+export const MAX_URL_LENGTH = 500;
+export const MAX_QUERY_LENGTH = 200;
+export const MAX_PACKAGES_LENGTH = 300;
+// ─── Validation errors ───────────────────────────────────────────────────────
+export class SecurityError extends Error {
+    constructor(message) {
+        super(message);
+        this.name = "SecurityError";
+    }
+}
+// ─── URL validator ───────────────────────────────────────────────────────────
+export function validateUrl(rawUrl, adapterName) {
+    // Length check
+    if (!rawUrl || rawUrl.trim().length === 0) {
+        throw new SecurityError("URL cannot be empty");
+    }
+    if (rawUrl.length > MAX_URL_LENGTH) {
+        throw new SecurityError(`URL exceeds maximum length of ${MAX_URL_LENGTH} characters`);
+    }
+    // Must be a valid URL
+    let parsed;
+    try {
+        parsed = new URL(rawUrl.trim());
+    }
+    catch {
+        throw new SecurityError(`Invalid URL format: ${rawUrl}`);
+    }
+    // Must use http or https
+    if (!["http:", "https:"].includes(parsed.protocol)) {
+        throw new SecurityError(`Protocol not allowed: ${parsed.protocol}. Only http/https permitted.`);
+    }
+    const hostname = parsed.hostname.toLowerCase();
+    // Block internal/private IPs and hostnames
+    for (const pattern of BLOCKED_PATTERNS) {
+        if (pattern.test(hostname)) {
+            throw new SecurityError(`Access to internal/private addresses is not permitted: ${hostname}`);
+        }
+    }
+    // Domain allowlist check (skip if allowlist is empty — means no browser used)
+    const allowedDomains = ALLOWED_DOMAINS[adapterName];
+    if (allowedDomains && allowedDomains.length > 0) {
+        const isAllowed = allowedDomains.some((domain) => hostname === domain || hostname.endsWith(`.${domain}`));
+        if (!isAllowed) {
+            throw new SecurityError(`Domain not allowed for ${adapterName} adapter: ${hostname}. ` +
+                `Allowed domains: ${allowedDomains.join(", ")}`);
+        }
+    }
+    return parsed.toString();
+}
+// ─── Query string sanitizer ──────────────────────────────────────────────────
+export function sanitizeQuery(query, maxLength = MAX_QUERY_LENGTH) {
+    if (!query || query.trim().length === 0) {
+        throw new SecurityError("Query cannot be empty");
+    }
+    const trimmed = query.trim().slice(0, maxLength);
+    // Strip null bytes and control characters
+    const cleaned = trimmed.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
+    if (cleaned.length === 0) {
+        throw new SecurityError("Query contains no valid characters after sanitization");
+    }
+    return cleaned;
+}
+// ─── Package name sanitizer ──────────────────────────────────────────────────
+export function sanitizePackages(input) {
+    if (!input || input.trim().length === 0) {
+        throw new SecurityError("Package name cannot be empty");
+    }
+    if (input.length > MAX_PACKAGES_LENGTH) {
+        throw new SecurityError(`Package input exceeds maximum length of ${MAX_PACKAGES_LENGTH} characters`);
+    }
+    // Only allow valid npm/PyPI package name characters, commas, colons (for npm:/pypi: prefix)
+    const cleaned = input
+        .trim()
+        .replace(/[^a-zA-Z0-9@/._\-,:]/g, "")
+        .slice(0, MAX_PACKAGES_LENGTH);
+    if (cleaned.length === 0) {
+        throw new SecurityError("Package name contains no valid characters after sanitization");
+    }
+    return cleaned;
+}
+// ─── Error formatter ─────────────────────────────────────────────────────────
+export function formatSecurityError(err) {
+    if (err instanceof SecurityError) {
+        return `[Security] ${err.message}`;
+    }
+    if (err instanceof Error) {
+        return `[Error] ${err.message}`;
+    }
+    return "[Error] Unknown error occurred";
+}

package/dist/server.js CHANGED Viewed

@@ -8,6 +8,7 @@ import { ycAdapter } from "./adapters/yc.js";
 import { repoSearchAdapter } from "./adapters/repoSearch.js";
 import { packageTrendsAdapter } from "./adapters/packageTrends.js";
 import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
+import { formatSecurityError } from "./security.js";
 const server = new McpServer({
     name: "freshcontext-mcp",
     version: "0.1.0",
@@ -21,9 +22,14 @@ server.registerTool("extract_github", {
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
 }, async ({ url, max_length }) => {
-    const result = await githubAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+        const result = await githubAdapter({ url, maxLength: max_length });
+        const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
+        return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    }
+    catch (err) {
+        return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
 });
 // ─── Tool: extract_scholar ───────────────────────────────────────────────────
 server.registerTool("extract_scholar", {
@@ -34,9 +40,14 @@ server.registerTool("extract_scholar", {
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
 }, async ({ url, max_length }) => {
-    const result = await scholarAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+        const result = await scholarAdapter({ url, maxLength: max_length });
+        const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
+        return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    }
+    catch (err) {
+        return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
 });
 // ─── Tool: extract_hackernews ────────────────────────────────────────────────
 server.registerTool("extract_hackernews", {
@@ -47,9 +58,14 @@ server.registerTool("extract_hackernews", {
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
 }, async ({ url, max_length }) => {
-    const result = await hackerNewsAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+        const result = await hackerNewsAdapter({ url, maxLength: max_length });
+        const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
+        return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    }
+    catch (err) {
+        return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
 });
 // ─── Tool: extract_yc ──────────────────────────────────────────────────────────
 server.registerTool("extract_yc", {
@@ -60,9 +76,14 @@ server.registerTool("extract_yc", {
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
 }, async ({ url, max_length }) => {
-    const result = await ycAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+        const result = await ycAdapter({ url, maxLength: max_length });
+        const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
+        return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    }
+    catch (err) {
+        return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
 });
 // ─── Tool: search_repos ──────────────────────────────────────────────────────
 server.registerTool("search_repos", {
@@ -73,9 +94,14 @@ server.registerTool("search_repos", {
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
 }, async ({ query, max_length }) => {
-    const result = await repoSearchAdapter({ url: query, maxLength: max_length });
-    const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+        const result = await repoSearchAdapter({ url: query, maxLength: max_length });
+        const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
+        return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    }
+    catch (err) {
+        return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
 });
 // ─── Tool: package_trends ────────────────────────────────────────────────────
 server.registerTool("package_trends", {
@@ -86,9 +112,14 @@ server.registerTool("package_trends", {
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
 }, async ({ packages, max_length }) => {
-    const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
-    const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+        const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
+        const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
+        return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    }
+    catch (err) {
+        return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
 });
 // ─── Tool: extract_landscape ─────────────────────────────────────────────────
 server.registerTool("extract_landscape", {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "freshcontext-mcp",
-  "version": "0.1.2",
+  "version": "0.1.4",
   "description": "Real-time web extraction MCP server with freshness timestamps for AI agents",
   "keywords": [
     "mcp",

package/src/adapters/github.ts CHANGED Viewed

@@ -1,7 +1,11 @@
 import { chromium } from "playwright";
 import { AdapterResult, ExtractOptions } from "../types.js";
+import { validateUrl } from "../security.js";
 export async function githubAdapter(options: ExtractOptions): Promise<AdapterResult> {
+  const safeUrl = validateUrl(options.url, "github");
+  options = { ...options, url: safeUrl };
   const browser = await chromium.launch({ headless: true });
   const page = await browser.newPage();

package/src/adapters/hackernews.ts CHANGED Viewed

@@ -1,8 +1,10 @@
 import { chromium } from "playwright";
 import { AdapterResult, ExtractOptions } from "../types.js";
+import { validateUrl } from "../security.js";
 export async function hackerNewsAdapter(options: ExtractOptions): Promise<AdapterResult> {
-  // If it's an Algolia API URL or search query, use the REST API directly (no browser)
+  // Validate URL — allow both HN and Algolia domains
+  validateUrl(options.url, "hackernews");
   const url = options.url;
   if (url.includes("hn.algolia.com/api/") || url.startsWith("hn-search:")) {

package/src/adapters/packageTrends.ts CHANGED Viewed

@@ -1,10 +1,10 @@
 import { AdapterResult, ExtractOptions } from "../types.js";
+import { sanitizePackages } from "../security.js";
 // Uses npm registry API + PyPI JSON API (no auth needed)
 export async function packageTrendsAdapter(options: ExtractOptions): Promise<AdapterResult> {
-  // options.url is the package name or a comma-separated list
-  // e.g. "langchain" or "npm:langchain" or "pypi:langchain"
-  const raw_input = options.url.replace(/^https?:\/\//, "").trim();
+  // Sanitize package input
+  const raw_input = sanitizePackages(options.url.replace(/^https?:\/\//, "").trim());
   // Parse ecosystem prefix
   const parts = raw_input.split(",").map((s) => s.trim());

package/src/adapters/repoSearch.ts CHANGED Viewed

@@ -1,10 +1,11 @@
 import { AdapterResult, ExtractOptions } from "../types.js";
+import { sanitizeQuery } from "../security.js";
 // Uses GitHub Search API (no auth needed for basic search)
 export async function repoSearchAdapter(options: ExtractOptions): Promise<AdapterResult> {
-  // options.url is treated as the search query string
-  // e.g. "mcp server typescript" or a full GitHub search URL
-  let query = options.url;
+  // Sanitize query input
+  const query_input = sanitizeQuery(options.url);
+  let query = query_input;
   // If it's a full URL, extract the query param
   try {

package/src/adapters/scholar.ts CHANGED Viewed

@@ -1,7 +1,11 @@
 import { chromium } from "playwright";
 import { AdapterResult, ExtractOptions } from "../types.js";
+import { validateUrl } from "../security.js";
 export async function scholarAdapter(options: ExtractOptions): Promise<AdapterResult> {
+  const safeUrl = validateUrl(options.url, "scholar");
+  options = { ...options, url: safeUrl };
   const browser = await chromium.launch({ headless: true });
   const page = await browser.newPage();

package/src/adapters/yc.ts CHANGED Viewed

@@ -1,7 +1,11 @@
 import { chromium } from "playwright";
 import { AdapterResult, ExtractOptions } from "../types.js";
+import { validateUrl } from "../security.js";
 export async function ycAdapter(options: ExtractOptions): Promise<AdapterResult> {
+  const safeUrl = validateUrl(options.url, "yc");
+  options = { ...options, url: safeUrl };
   const browser = await chromium.launch({ headless: true });
   const page = await browser.newPage();

package/src/security.ts ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * freshcontext-mcp security module
+ * Input sanitization, domain allowlists, and request validation
+ */
+// ─── Allowed domains per adapter ────────────────────────────────────────────
+export const ALLOWED_DOMAINS: Record<string, string[]> = {
+  github: ["github.com", "raw.githubusercontent.com"],
+  scholar: ["scholar.google.com"],
+  hackernews: ["news.ycombinator.com", "hn.algolia.com"],
+  yc: ["www.ycombinator.com", "ycombinator.com"],
+  repoSearch: [], // uses GitHub API directly, no browser
+  packageTrends: [], // uses npm/PyPI APIs directly, no browser
+};
+// ─── Blocked IP ranges and internal hostnames ────────────────────────────────
+const BLOCKED_PATTERNS = [
+  /^localhost$/i,
+  /^127\.\d+\.\d+\.\d+$/,
+  /^10\.\d+\.\d+\.\d+$/,
+  /^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
+  /^192\.168\.\d+\.\d+$/,
+  /^169\.254\.\d+\.\d+$/, // AWS metadata
+  /^0\.0\.0\.0$/,
+  /^::1$/,
+  /^fc00:/i,
+  /^fe80:/i,
+];
+// ─── Max length limits ────────────────────────────────────────────────────────
+export const MAX_URL_LENGTH = 500;
+export const MAX_QUERY_LENGTH = 200;
+export const MAX_PACKAGES_LENGTH = 300;
+// ─── Validation errors ───────────────────────────────────────────────────────
+export class SecurityError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "SecurityError";
+  }
+}
+// ─── URL validator ───────────────────────────────────────────────────────────
+export function validateUrl(
+  rawUrl: string,
+  adapterName: keyof typeof ALLOWED_DOMAINS
+): string {
+  // Length check
+  if (!rawUrl || rawUrl.trim().length === 0) {
+    throw new SecurityError("URL cannot be empty");
+  }
+  if (rawUrl.length > MAX_URL_LENGTH) {
+    throw new SecurityError(
+      `URL exceeds maximum length of ${MAX_URL_LENGTH} characters`
+    );
+  }
+  // Must be a valid URL
+  let parsed: URL;
+  try {
+    parsed = new URL(rawUrl.trim());
+  } catch {
+    throw new SecurityError(`Invalid URL format: ${rawUrl}`);
+  }
+  // Must use http or https
+  if (!["http:", "https:"].includes(parsed.protocol)) {
+    throw new SecurityError(
+      `Protocol not allowed: ${parsed.protocol}. Only http/https permitted.`
+    );
+  }
+  const hostname = parsed.hostname.toLowerCase();
+  // Block internal/private IPs and hostnames
+  for (const pattern of BLOCKED_PATTERNS) {
+    if (pattern.test(hostname)) {
+      throw new SecurityError(
+        `Access to internal/private addresses is not permitted: ${hostname}`
+      );
+    }
+  }
+  // Domain allowlist check (skip if allowlist is empty — means no browser used)
+  const allowedDomains = ALLOWED_DOMAINS[adapterName];
+  if (allowedDomains && allowedDomains.length > 0) {
+    const isAllowed = allowedDomains.some(
+      (domain) => hostname === domain || hostname.endsWith(`.${domain}`)
+    );
+    if (!isAllowed) {
+      throw new SecurityError(
+        `Domain not allowed for ${adapterName} adapter: ${hostname}. ` +
+          `Allowed domains: ${allowedDomains.join(", ")}`
+      );
+    }
+  }
+  return parsed.toString();
+}
+// ─── Query string sanitizer ──────────────────────────────────────────────────
+export function sanitizeQuery(query: string, maxLength = MAX_QUERY_LENGTH): string {
+  if (!query || query.trim().length === 0) {
+    throw new SecurityError("Query cannot be empty");
+  }
+  const trimmed = query.trim().slice(0, maxLength);
+  // Strip null bytes and control characters
+  const cleaned = trimmed.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
+  if (cleaned.length === 0) {
+    throw new SecurityError("Query contains no valid characters after sanitization");
+  }
+  return cleaned;
+}
+// ─── Package name sanitizer ──────────────────────────────────────────────────
+export function sanitizePackages(input: string): string {
+  if (!input || input.trim().length === 0) {
+    throw new SecurityError("Package name cannot be empty");
+  }
+  if (input.length > MAX_PACKAGES_LENGTH) {
+    throw new SecurityError(
+      `Package input exceeds maximum length of ${MAX_PACKAGES_LENGTH} characters`
+    );
+  }
+  // Only allow valid npm/PyPI package name characters, commas, colons (for npm:/pypi: prefix)
+  const cleaned = input
+    .trim()
+    .replace(/[^a-zA-Z0-9@/._\-,:]/g, "")
+    .slice(0, MAX_PACKAGES_LENGTH);
+  if (cleaned.length === 0) {
+    throw new SecurityError("Package name contains no valid characters after sanitization");
+  }
+  return cleaned;
+}
+// ─── Error formatter ─────────────────────────────────────────────────────────
+export function formatSecurityError(err: unknown): string {
+  if (err instanceof SecurityError) {
+    return `[Security] ${err.message}`;
+  }
+  if (err instanceof Error) {
+    return `[Error] ${err.message}`;
+  }
+  return "[Error] Unknown error occurred";
+}

package/src/server.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { ycAdapter } from "./adapters/yc.js";
 import { repoSearchAdapter } from "./adapters/repoSearch.js";
 import { packageTrendsAdapter } from "./adapters/packageTrends.js";
 import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
+import { SecurityError, formatSecurityError } from "./security.js";
 const server = new McpServer({
   name: "freshcontext-mcp",
@@ -27,9 +28,13 @@ server.registerTool(
     annotations: { readOnlyHint: true, openWorldHint: true },
   },
   async ({ url, max_length }) => {
-    const result = await githubAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+      const result = await githubAdapter({ url, maxLength: max_length });
+      const ctx = stampFreshness(result, { url, maxLength: max_length }, "github");
+      return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    } catch (err) {
+      return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
   }
 );
@@ -46,9 +51,13 @@ server.registerTool(
     annotations: { readOnlyHint: true, openWorldHint: true },
   },
   async ({ url, max_length }) => {
-    const result = await scholarAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+      const result = await scholarAdapter({ url, maxLength: max_length });
+      const ctx = stampFreshness(result, { url, maxLength: max_length }, "google_scholar");
+      return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    } catch (err) {
+      return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
   }
 );
@@ -65,9 +74,13 @@ server.registerTool(
     annotations: { readOnlyHint: true, openWorldHint: true },
   },
   async ({ url, max_length }) => {
-    const result = await hackerNewsAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+      const result = await hackerNewsAdapter({ url, maxLength: max_length });
+      const ctx = stampFreshness(result, { url, maxLength: max_length }, "hackernews");
+      return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    } catch (err) {
+      return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
   }
 );
@@ -84,9 +97,13 @@ server.registerTool(
     annotations: { readOnlyHint: true, openWorldHint: true },
   },
   async ({ url, max_length }) => {
-    const result = await ycAdapter({ url, maxLength: max_length });
-    const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+      const result = await ycAdapter({ url, maxLength: max_length });
+      const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
+      return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    } catch (err) {
+      return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
   }
 );
@@ -103,9 +120,13 @@ server.registerTool(
     annotations: { readOnlyHint: true, openWorldHint: true },
   },
   async ({ query, max_length }) => {
-    const result = await repoSearchAdapter({ url: query, maxLength: max_length });
-    const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+      const result = await repoSearchAdapter({ url: query, maxLength: max_length });
+      const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
+      return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    } catch (err) {
+      return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
   }
 );
@@ -122,9 +143,13 @@ server.registerTool(
     annotations: { readOnlyHint: true, openWorldHint: true },
   },
   async ({ packages, max_length }) => {
-    const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
-    const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
-    return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    try {
+      const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
+      const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
+      return { content: [{ type: "text", text: formatForLLM(ctx) }] };
+    } catch (err) {
+      return { content: [{ type: "text", text: formatSecurityError(err) }] };
+    }
   }
 );

package/worker/src/worker.ts CHANGED Viewed

@@ -3,10 +3,11 @@ import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
 import { WebStandardStreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/webStandardStreamableHttp.js";
 import { z } from "zod";
-// ─── Types ───────────────────────────────────────────────────────────────────
+// ─── Types ────────────────────────────────────────────────────────────────────
 interface Env {
   BROWSER: Fetcher;
+  API_KEY?: string; // Optional: set via `wrangler secret put API_KEY`
 }
 interface FreshContext {
@@ -18,9 +19,143 @@ interface FreshContext {
   adapter: string;
 }
-// ─── Freshness Stamp ─────────────────────────────────────────────────────────
+// ─── Security ─────────────────────────────────────────────────────────────────
-function stamp(content: string, url: string, date: string | null, confidence: "high" | "medium" | "low", adapter: string): string {
+const ALLOWED_DOMAINS: Record<string, string[]> = {
+  github:      ["github.com", "raw.githubusercontent.com"],
+  scholar:     ["scholar.google.com"],
+  hackernews:  ["news.ycombinator.com", "hn.algolia.com"],
+  yc:          ["www.ycombinator.com", "ycombinator.com"],
+};
+const PRIVATE_IP_PATTERNS = [
+  /^localhost$/i,
+  /^127\./,
+  /^10\./,
+  /^192\.168\./,
+  /^172\.(1[6-9]|2\d|3[01])\./,
+  /^169\.254\./,
+  /^::1$/,
+  /^fc00:/i,
+  /^fe80:/i,
+];
+const MAX_URL_LENGTH    = 500;
+const MAX_QUERY_LENGTH  = 200;
+class SecurityError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "SecurityError";
+  }
+}
+function validateUrl(rawUrl: string, adapter: string): string {
+  if (rawUrl.length > MAX_URL_LENGTH)
+    throw new SecurityError(`URL too long (max ${MAX_URL_LENGTH} chars)`);
+  let parsed: URL;
+  try { parsed = new URL(rawUrl); }
+  catch { throw new SecurityError("Invalid URL format"); }
+  if (!["http:", "https:"].includes(parsed.protocol))
+    throw new SecurityError("Only http/https URLs are allowed");
+  const hostname = parsed.hostname.toLowerCase();
+  for (const pattern of PRIVATE_IP_PATTERNS) {
+    if (pattern.test(hostname))
+      throw new SecurityError("Access to private/internal addresses is not allowed");
+  }
+  const allowed = ALLOWED_DOMAINS[adapter];
+  if (allowed && allowed.length > 0) {
+    const ok = allowed.some(d => hostname === d || hostname.endsWith(`.${d}`));
+    if (!ok)
+      throw new SecurityError(`URL not allowed for ${adapter}. Allowed domains: ${allowed.join(", ")}`);
+  }
+  return rawUrl;
+}
+function sanitizeQuery(query: string, maxLen = MAX_QUERY_LENGTH): string {
+  if (query.length > maxLen)
+    throw new SecurityError(`Query too long (max ${maxLen} chars)`);
+  // Strip null bytes and control characters
+  return query.replace(/[\x00-\x1F\x7F]/g, "").trim();
+}
+// ─── Rate Limiting (in-memory, per isolate) ───────────────────────────────────
+interface RateEntry { count: number; windowStart: number; }
+const rateMap = new Map<string, RateEntry>();
+const RATE_LIMIT      = 20;   // max requests
+const RATE_WINDOW_MS  = 60_000; // per 60 seconds
+function checkRateLimit(ip: string): void {
+  const now = Date.now();
+  const entry = rateMap.get(ip);
+  if (!entry || now - entry.windowStart > RATE_WINDOW_MS) {
+    rateMap.set(ip, { count: 1, windowStart: now });
+    return;
+  }
+  if (entry.count >= RATE_LIMIT) {
+    throw new SecurityError(`Rate limit exceeded. Max ${RATE_LIMIT} requests per minute.`);
+  }
+  entry.count++;
+}
+// Prevent the map from growing unboundedly
+function pruneRateMap(): void {
+  const now = Date.now();
+  for (const [ip, entry] of rateMap) {
+    if (now - entry.windowStart > RATE_WINDOW_MS) rateMap.delete(ip);
+  }
+}
+// ─── Auth ─────────────────────────────────────────────────────────────────────
+function checkAuth(request: Request, env: Env): void {
+  if (!env.API_KEY) return; // Auth disabled if no key is set
+  const authHeader = request.headers.get("Authorization") ?? "";
+  const token = authHeader.startsWith("Bearer ") ? authHeader.slice(7) : "";
+  if (token !== env.API_KEY) {
+    throw new SecurityError("Unauthorized. Provide a valid Bearer token.");
+  }
+}
+// ─── Helpers ──────────────────────────────────────────────────────────────────
+function getClientIp(request: Request): string {
+  return (
+    request.headers.get("CF-Connecting-IP") ??
+    request.headers.get("X-Forwarded-For")?.split(",")[0]?.trim() ??
+    "unknown"
+  );
+}
+function securityErrorResponse(message: string, status: number): Response {
+  return new Response(JSON.stringify({ error: message }), {
+    status,
+    headers: { "Content-Type": "application/json" },
+  });
+}
+// ─── Freshness Stamp ──────────────────────────────────────────────────────────
+function stamp(
+  content: string,
+  url: string,
+  date: string | null,
+  confidence: "high" | "medium" | "low",
+  adapter: string
+): string {
   const ctx: FreshContext = {
     content: content.slice(0, 6000),
     source_url: url,
@@ -44,107 +179,133 @@ function stamp(content: string, url: string, date: string | null, confidence: "h
 // ─── Server Factory ───────────────────────────────────────────────────────────
 function createServer(env: Env): McpServer {
-  const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.0" });
+  const server = new McpServer({ name: "freshcontext-mcp", version: "0.1.3" });
   // ── extract_github ──────────────────────────────────────────────────────────
   server.registerTool("extract_github", {
     description: "Extract real-time data from a GitHub repository — README, stars, forks, last commit, topics. Returns timestamped freshcontext.",
     inputSchema: z.object({
-      url: z.string().url().describe("Full GitHub repo URL"),
+      url: z.string().url().describe("Full GitHub repo URL e.g. https://github.com/owner/repo"),
     }),
     annotations: { readOnlyHint: true, openWorldHint: true },
   }, async ({ url }) => {
-    const browser = await puppeteer.launch(env.BROWSER);
-    const page = await browser.newPage();
-    await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
-    await page.goto(url, { waitUntil: "domcontentloaded" });
-    const data = await page.evaluate(`(function() {
-      var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
-      var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
-      var stars = starsEl ? starsEl.textContent.trim() : null;
-      var forksEl = document.querySelector('[id="repo-network-counter"]');
-      var forks = forksEl ? forksEl.textContent.trim() : null;
-      var commitEl = document.querySelector('relative-time');
-      var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
-      var descEl = document.querySelector('.f4.my-3');
-      var description = descEl ? descEl.textContent.trim() : null;
-      var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
-      var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
-      var language = langEl ? langEl.textContent.trim() : null;
-      return { readme, stars, forks, lastCommit, description, topics, language };
-    })()`);
-    await browser.close();
-    const d = data as any;
-    const raw = [`Description: ${d.description ?? "N/A"}`, `Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`, `Language: ${d.language ?? "N/A"}`, `Last commit: ${d.lastCommit ?? "N/A"}`, `Topics: ${d.topics?.join(", ") ?? "none"}`, `\n--- README ---\n${d.readme ?? "No README"}`].join("\n");
-    return { content: [{ type: "text", text: stamp(raw, url, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
+    try {
+      const safeUrl = validateUrl(url, "github");
+      const browser = await puppeteer.launch(env.BROWSER);
+      const page = await browser.newPage();
+      await page.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
+      await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
+      const data = await page.evaluate(`(function() {
+        var readme = (document.querySelector('[data-target="readme-toc.content"]') || document.querySelector('.markdown-body') || {}).textContent || null;
+        var starsEl = document.querySelector('[id="repo-stars-counter-star"]') || document.querySelector('.Counter.js-social-count');
+        var stars = starsEl ? starsEl.textContent.trim() : null;
+        var forksEl = document.querySelector('[id="repo-network-counter"]');
+        var forks = forksEl ? forksEl.textContent.trim() : null;
+        var commitEl = document.querySelector('relative-time');
+        var lastCommit = commitEl ? commitEl.getAttribute('datetime') : null;
+        var descEl = document.querySelector('.f4.my-3');
+        var description = descEl ? descEl.textContent.trim() : null;
+        var topics = Array.from(document.querySelectorAll('.topic-tag')).map(function(t) { return t.textContent.trim(); });
+        var langEl = document.querySelector('.color-fg-default.text-bold.mr-1');
+        var language = langEl ? langEl.textContent.trim() : null;
+        return { readme, stars, forks, lastCommit, description, topics, language };
+      })()`);
+      await browser.close();
+      const d = data as any;
+      const raw = [
+        `Description: ${d.description ?? "N/A"}`,
+        `Stars: ${d.stars ?? "N/A"} | Forks: ${d.forks ?? "N/A"}`,
+        `Language: ${d.language ?? "N/A"}`,
+        `Last commit: ${d.lastCommit ?? "N/A"}`,
+        `Topics: ${d.topics?.join(", ") ?? "none"}`,
+        `\n--- README ---\n${d.readme ?? "No README"}`,
+      ].join("\n");
+      return { content: [{ type: "text", text: stamp(raw, safeUrl, d.lastCommit ?? null, d.lastCommit ? "high" : "medium", "github") }] };
+    } catch (err: any) {
+      return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
+    }
   });
   // ── extract_hackernews ──────────────────────────────────────────────────────
   server.registerTool("extract_hackernews", {
-    description: "Extract top stories from Hacker News with real-time timestamps.",
-    inputSchema: z.object({ url: z.string().url().describe("HN URL") }),
+    description: "Extract top stories or search results from Hacker News with real-time timestamps.",
+    inputSchema: z.object({ url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com") }),
     annotations: { readOnlyHint: true, openWorldHint: true },
   }, async ({ url }) => {
-    const browser = await puppeteer.launch(env.BROWSER);
-    const page = await browser.newPage();
-    await page.goto(url, { waitUntil: "domcontentloaded" });
-    const data = await page.evaluate(`(function() {
-      var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
-      return items.map(function(el) {
-        var titleLineEl = el.querySelector('.titleline > a');
-        var title = titleLineEl ? titleLineEl.textContent.trim() : null;
-        var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
-        var subtext = el.nextElementSibling;
-        var scoreEl = subtext ? subtext.querySelector('.score') : null;
-        var score = scoreEl ? scoreEl.textContent.trim() : null;
-        var ageEl = subtext ? subtext.querySelector('.age') : null;
-        var age = ageEl ? ageEl.getAttribute('title') : null;
-        return { title, link, score, age };
-      });
-    })()`);
-    await browser.close();
-    const items = data as any[];
-    const raw = items.map((r, i) => `[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`).join("\n\n");
-    const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
-    return { content: [{ type: "text", text: stamp(raw, url, newest, newest ? "high" : "medium", "hackernews") }] };
+    try {
+      const safeUrl = validateUrl(url, "hackernews");
+      const browser = await puppeteer.launch(env.BROWSER);
+      const page = await browser.newPage();
+      await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
+      const data = await page.evaluate(`(function() {
+        var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
+        return items.map(function(el) {
+          var titleLineEl = el.querySelector('.titleline > a');
+          var title = titleLineEl ? titleLineEl.textContent.trim() : null;
+          var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
+          var subtext = el.nextElementSibling;
+          var scoreEl = subtext ? subtext.querySelector('.score') : null;
+          var score = scoreEl ? scoreEl.textContent.trim() : null;
+          var ageEl = subtext ? subtext.querySelector('.age') : null;
+          var age = ageEl ? ageEl.getAttribute('title') : null;
+          return { title, link, score, age };
+        });
+      })()`);
+      await browser.close();
+      const items = data as any[];
+      const raw = items.map((r, i) =>
+        `[${i + 1}] ${r.title}\nURL: ${r.link}\nScore: ${r.score ?? "N/A"}\nPosted: ${r.age ?? "unknown"}`
+      ).join("\n\n");
+      const newest = items.map(r => r.age).filter(Boolean).sort().reverse()[0] ?? null;
+      return { content: [{ type: "text", text: stamp(raw, safeUrl, newest, newest ? "high" : "medium", "hackernews") }] };
+    } catch (err: any) {
+      return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
+    }
   });
   // ── extract_scholar ─────────────────────────────────────────────────────────
   server.registerTool("extract_scholar", {
     description: "Extract research results from Google Scholar with publication dates.",
-    inputSchema: z.object({ url: z.string().url().describe("Google Scholar URL") }),
+    inputSchema: z.object({ url: z.string().url().describe("Google Scholar search URL") }),
     annotations: { readOnlyHint: true, openWorldHint: true },
   }, async ({ url }) => {
-    const browser = await puppeteer.launch(env.BROWSER);
-    const page = await browser.newPage();
-    await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
-    await page.goto(url, { waitUntil: "domcontentloaded" });
-    const data = await page.evaluate(`(function() {
-      var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
-      return items.map(function(el) {
-        var titleEl = el.querySelector('.gs_rt');
-        var title = titleEl ? titleEl.textContent.trim() : null;
-        var authorsEl = el.querySelector('.gs_a');
-        var authors = authorsEl ? authorsEl.textContent.trim() : null;
-        var snippetEl = el.querySelector('.gs_rs');
-        var snippet = snippetEl ? snippetEl.textContent.trim() : null;
-        var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
-        var year = yearMatch ? yearMatch[0] : null;
-        return { title, authors, snippet, year };
-      });
-    })()`);
-    await browser.close();
-    const items = data as any[];
-    const raw = items.map((r, i) => `[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`).join("\n\n");
-    const years = items.map(r => r.year).filter(Boolean).sort().reverse();
-    const newest = years[0] ?? null;
-    return { content: [{ type: "text", text: stamp(raw, url, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
+    try {
+      const safeUrl = validateUrl(url, "scholar");
+      const browser = await puppeteer.launch(env.BROWSER);
+      const page = await browser.newPage();
+      await page.setUserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 Chrome/124.0.0.0 Safari/537.36");
+      await page.goto(safeUrl, { waitUntil: "domcontentloaded" });
+      const data = await page.evaluate(`(function() {
+        var items = Array.from(document.querySelectorAll('.gs_r.gs_or.gs_scl'));
+        return items.map(function(el) {
+          var titleEl = el.querySelector('.gs_rt');
+          var title = titleEl ? titleEl.textContent.trim() : null;
+          var authorsEl = el.querySelector('.gs_a');
+          var authors = authorsEl ? authorsEl.textContent.trim() : null;
+          var snippetEl = el.querySelector('.gs_rs');
+          var snippet = snippetEl ? snippetEl.textContent.trim() : null;
+          var yearMatch = authors ? authors.match(/\\b(19|20)\\d{2}\\b/) : null;
+          var year = yearMatch ? yearMatch[0] : null;
+          return { title, authors, snippet, year };
+        });
+      })()`);
+      await browser.close();
+      const items = data as any[];
+      const raw = items.map((r, i) =>
+        `[${i + 1}] ${r.title ?? "Untitled"}\nAuthors: ${r.authors ?? "Unknown"}\nYear: ${r.year ?? "Unknown"}\nSnippet: ${r.snippet ?? "N/A"}`
+      ).join("\n\n");
+      const years = items.map(r => r.year).filter(Boolean).sort().reverse();
+      const newest = years[0] ?? null;
+      return { content: [{ type: "text", text: stamp(raw, safeUrl, newest ? `${newest}-01-01` : null, newest ? "high" : "low", "google_scholar") }] };
+    } catch (err: any) {
+      return { content: [{ type: "text", text: `[ERROR] ${err.message}` }] };
+    }
   });
   return server;
@@ -154,6 +315,23 @@ function createServer(env: Env): McpServer {
 export default {
   async fetch(request: Request, env: Env): Promise<Response> {
+    // Prune stale rate limit entries occasionally
+    if (Math.random() < 0.05) pruneRateMap();
+    try {
+      // 1. Auth check
+      checkAuth(request, env);
+      // 2. Rate limit check
+      const ip = getClientIp(request);
+      checkRateLimit(ip);
+    } catch (err: any) {
+      const status = err.message.startsWith("Unauthorized") ? 401 : 429;
+      return securityErrorResponse(err.message, status);
+    }
+    // 3. Handle MCP request
     const transport = new WebStandardStreamableHTTPServerTransport();
     const server = createServer(env);
     await server.connect(transport);