freshcontext-mcp 0.3.15 → 0.3.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,29 @@
1
1
  import { chromium } from "playwright";
2
2
  import { validateUrl } from "../security.js";
3
+ function isUrl(input) {
4
+ try {
5
+ new URL(input);
6
+ return true;
7
+ }
8
+ catch {
9
+ return false;
10
+ }
11
+ }
12
+ function normalizeHnDate(raw) {
13
+ if (!raw)
14
+ return null;
15
+ const match = raw.match(/\b\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}(?:\.\d+)?Z?\b/);
16
+ if (!match)
17
+ return null;
18
+ const isoLike = match[0].endsWith("Z") ? match[0] : `${match[0]}Z`;
19
+ const parsed = new Date(isoLike);
20
+ return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString();
21
+ }
3
22
  export async function hackerNewsAdapter(options) {
4
- // Validate URL — allow both HN and Algolia domains
5
- validateUrl(options.url, "hackernews");
6
- const url = options.url;
23
+ const input = options.url.trim();
24
+ if (!input)
25
+ throw new Error("HN URL or search query is required");
26
+ const url = isUrl(input) ? validateUrl(input, "hackernews") : `hn-search:${input}`;
7
27
  if (url.includes("hn.algolia.com/api/") || url.startsWith("hn-search:")) {
8
28
  const query = url.startsWith("hn-search:")
9
29
  ? url.replace("hn-search:", "").trim()
@@ -20,14 +40,17 @@ export async function hackerNewsAdapter(options) {
20
40
  `[${i + 1}] ${r.title ?? "Untitled"}`,
21
41
  `URL: ${r.url ?? `https://news.ycombinator.com/item?id=${r.objectID}`}`,
22
42
  `Score: ${r.points} points | ${r.num_comments} comments`,
23
- `Author: ${r.author} | Posted: ${r.created_at}`,
43
+ `Author: ${r.author} | Posted: ${normalizeHnDate(r.created_at) ?? r.created_at}`,
24
44
  ].join("\n"))
25
45
  .join("\n\n")
26
46
  .slice(0, options.maxLength ?? 4000);
27
- const newest = data.hits.map((r) => r.created_at).sort().reverse()[0] ?? null;
47
+ const newest = data.hits
48
+ .map((r) => normalizeHnDate(r.created_at))
49
+ .filter((d) => Boolean(d))
50
+ .sort()
51
+ .reverse()[0] ?? null;
28
52
  return { raw, content_date: newest, freshness_confidence: newest ? "high" : "medium" };
29
53
  }
30
- // Default: browser-based scrape for HN front page or search pages
31
54
  const browser = await chromium.launch({ headless: true });
32
55
  const page = await browser.newPage();
33
56
  await page.goto(url, { waitUntil: "domcontentloaded", timeout: 20000 });
@@ -51,14 +74,21 @@ export async function hackerNewsAdapter(options) {
51
74
  await browser.close();
52
75
  const typedData = data;
53
76
  const raw = typedData
54
- .map((r, i) => [
55
- `[${i + 1}] ${r.title ?? "Untitled"}`,
56
- `URL: ${r.link ?? "N/A"}`,
57
- `Score: ${r.score ?? "N/A"} | ${r.commentLink ?? ""}`,
58
- `Posted: ${r.age ?? "unknown"}`,
59
- ].join("\n"))
77
+ .map((r, i) => {
78
+ const date = normalizeHnDate(r.age);
79
+ return [
80
+ `[${i + 1}] ${r.title ?? "Untitled"}`,
81
+ `URL: ${r.link ?? "N/A"}`,
82
+ `Score: ${r.score ?? "N/A"} | ${r.commentLink ?? ""}`,
83
+ `Posted: ${date ?? "unknown"}`,
84
+ ].join("\n");
85
+ })
60
86
  .join("\n\n");
61
- const newestDate = typedData.map((r) => r.age).filter(Boolean).sort().reverse()[0] ?? null;
87
+ const newestDate = typedData
88
+ .map((r) => normalizeHnDate(r.age))
89
+ .filter((d) => Boolean(d))
90
+ .sort()
91
+ .reverse()[0] ?? null;
62
92
  return {
63
93
  raw,
64
94
  content_date: newestDate,
@@ -1,5 +1,10 @@
1
+ import { validateUrl } from "../security.js";
1
2
  export async function productHuntAdapter(options) {
2
- // PH GraphQL API public, no auth for published posts
3
+ const token = process.env.PH_TOKEN?.trim() || process.env.PRODUCTHUNT_TOKEN?.trim();
4
+ if (!token)
5
+ return scrapeProductHunt(options);
6
+ // Product Hunt GraphQL API requires a bearer token. Keep it in env/secrets,
7
+ // never in source.
3
8
  const query = options.url.startsWith("http")
4
9
  ? null
5
10
  : options.url;
@@ -28,8 +33,7 @@ export async function productHuntAdapter(options) {
28
33
  method: "POST",
29
34
  headers: {
30
35
  "Content-Type": "application/json",
31
- // Public access token (read-only, rate-limited but usable)
32
- "Authorization": "Bearer irgTzMNAz-S-p1P8H5pFCxzU4TEF7GIJZ8vZZi0gLJg",
36
+ "Authorization": `Bearer ${token}`,
33
37
  },
34
38
  body: JSON.stringify({ query: gql }),
35
39
  });
@@ -68,7 +72,7 @@ export async function productHuntAdapter(options) {
68
72
  async function scrapeProductHunt(options) {
69
73
  const { chromium } = await import("playwright");
70
74
  const url = options.url.startsWith("http")
71
- ? options.url
75
+ ? validateUrl(options.url, "productHunt")
72
76
  : `https://www.producthunt.com/search?q=${encodeURIComponent(options.url)}`;
73
77
  const browser = await chromium.launch({ headless: true });
74
78
  const page = await browser.newPage();
@@ -22,7 +22,7 @@ export async function repoSearchAdapter(options) {
22
22
  const res = await fetch(apiUrl, {
23
23
  headers: {
24
24
  Accept: "application/vnd.github.v3+json",
25
- "User-Agent": "freshcontext-mcp/0.1.0",
25
+ "User-Agent": "freshcontext-mcp/0.3.17 (https://github.com/PrinceGabriel-lgtm/freshcontext-mcp)",
26
26
  },
27
27
  });
28
28
  if (!res.ok) {
@@ -12,7 +12,7 @@
12
12
  */
13
13
  const HEADERS = {
14
14
  "Accept": "application/json",
15
- "User-Agent": "freshcontext-mcp/1.0 contact@freshcontext.dev",
15
+ "User-Agent": "freshcontext-mcp/0.3.17 (https://github.com/PrinceGabriel-lgtm/freshcontext-mcp)",
16
16
  };
17
17
  async function fetchSecFilings(query, maxResults = 10) {
18
18
  const today = new Date().toISOString().slice(0, 10);
package/dist/security.js CHANGED
@@ -11,7 +11,7 @@ export const ALLOWED_DOMAINS = {
11
11
  repoSearch: [], // uses GitHub API directly, no browser
12
12
  packageTrends: [], // uses npm/PyPI APIs directly, no browser
13
13
  reddit: [], // uses public Reddit JSON API, no browser
14
- finance: [], // uses Yahoo Finance API, no browser
14
+ finance: [], // uses Stooq quote API, no browser
15
15
  productHunt: ["www.producthunt.com", "producthunt.com"],
16
16
  };
17
17
  // ─── Blocked IP ranges and internal hostnames ────────────────────────────────
package/dist/server.js CHANGED
@@ -11,6 +11,7 @@ import { packageTrendsAdapter } from "./adapters/packageTrends.js";
11
11
  import { redditAdapter } from "./adapters/reddit.js";
12
12
  import { productHuntAdapter } from "./adapters/productHunt.js";
13
13
  import { financeAdapter } from "./adapters/finance.js";
14
+ import { arxivAdapter } from "./adapters/arxiv.js";
14
15
  import { jobsAdapter } from "./adapters/jobs.js";
15
16
  import { changelogAdapter } from "./adapters/changelog.js";
16
17
  import { govContractsAdapter } from "./adapters/govcontracts.js";
@@ -21,7 +22,7 @@ import { stampFreshness, formatForLLM } from "./tools/freshnessStamp.js";
21
22
  import { formatSecurityError } from "./security.js";
22
23
  const server = new McpServer({
23
24
  name: "freshcontext-mcp",
24
- version: "0.1.0",
25
+ version: "0.3.17",
25
26
  });
26
27
  // ─── Tool: extract_github ────────────────────────────────────────────────────
27
28
  server.registerTool("extract_github", {
@@ -61,9 +62,9 @@ server.registerTool("extract_scholar", {
61
62
  });
62
63
  // ─── Tool: extract_hackernews ────────────────────────────────────────────────
63
64
  server.registerTool("extract_hackernews", {
64
- description: "Extract top stories or search results from Hacker News. Real-time dev/tech community sentiment with post timestamps.",
65
+ description: "Extract top stories or search results from Hacker News. Accepts an HN/Algolia URL or a plain search query while preserving the url field for compatibility.",
65
66
  inputSchema: z.object({
66
- url: z.string().url().describe("HN URL e.g. https://news.ycombinator.com or https://hn.algolia.com/?q=..."),
67
+ url: z.string().min(1).describe("HN URL e.g. https://news.ycombinator.com/news, Algolia API URL, or search query e.g. 'browser agents'"),
67
68
  max_length: z.number().optional().default(4000),
68
69
  }),
69
70
  annotations: { readOnlyHint: true, openWorldHint: true },
@@ -88,7 +89,7 @@ server.registerTool("extract_yc", {
88
89
  }, async ({ url, max_length }) => {
89
90
  try {
90
91
  const result = await ycAdapter({ url, maxLength: max_length });
91
- const ctx = stampFreshness(result, { url, maxLength: max_length }, "ycombinator");
92
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "yc");
92
93
  return { content: [{ type: "text", text: formatForLLM(ctx) }] };
93
94
  }
94
95
  catch (err) {
@@ -106,7 +107,7 @@ server.registerTool("search_repos", {
106
107
  }, async ({ query, max_length }) => {
107
108
  try {
108
109
  const result = await repoSearchAdapter({ url: query, maxLength: max_length });
109
- const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "github_search");
110
+ const ctx = stampFreshness(result, { url: query, maxLength: max_length }, "reposearch");
110
111
  return { content: [{ type: "text", text: formatForLLM(ctx) }] };
111
112
  }
112
113
  catch (err) {
@@ -124,7 +125,79 @@ server.registerTool("package_trends", {
124
125
  }, async ({ packages, max_length }) => {
125
126
  try {
126
127
  const result = await packageTrendsAdapter({ url: packages, maxLength: max_length });
127
- const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "package_registry");
128
+ const ctx = stampFreshness(result, { url: packages, maxLength: max_length }, "packagetrends");
129
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
130
+ }
131
+ catch (err) {
132
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
133
+ }
134
+ });
135
+ // ─── Tool: extract_arxiv ─────────────────────────────────────────────────────
136
+ server.registerTool("extract_arxiv", {
137
+ description: "Search arXiv for research papers via the official API. Pass a topic, keyword, or full arXiv API URL. Returns titles, authors, publication dates, primary category, and abstracts — all timestamped.",
138
+ inputSchema: z.object({
139
+ url: z.string().describe("Search query e.g. 'temporal retrieval', or a full arXiv API URL"),
140
+ max_length: z.number().optional().default(6000),
141
+ }),
142
+ annotations: { readOnlyHint: true, openWorldHint: true },
143
+ }, async ({ url, max_length }) => {
144
+ try {
145
+ const result = await arxivAdapter({ url, maxLength: max_length });
146
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "arxiv");
147
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
148
+ }
149
+ catch (err) {
150
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
151
+ }
152
+ });
153
+ // ─── Tool: extract_finance ───────────────────────────────────────────────────
154
+ server.registerTool("extract_finance", {
155
+ description: "No-key stock quote data via Stooq — close, open, high, low, volume, quote timestamp, and source. Accepts up to 5 comma-separated tickers. Returns timestamped freshcontext only for successful observations.",
156
+ inputSchema: z.object({
157
+ url: z.string().describe("Ticker symbol(s) e.g. 'AAPL' or 'MSFT,GOOG,PLTR'"),
158
+ max_length: z.number().optional().default(5000),
159
+ }),
160
+ annotations: { readOnlyHint: true, openWorldHint: true },
161
+ }, async ({ url, max_length }) => {
162
+ try {
163
+ const result = await financeAdapter({ url, maxLength: max_length });
164
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "finance");
165
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
166
+ }
167
+ catch (err) {
168
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
169
+ }
170
+ });
171
+ // ─── Tool: extract_reddit ────────────────────────────────────────────────────
172
+ server.registerTool("extract_reddit", {
173
+ description: "Extract posts and community sentiment from Reddit via the public JSON API. Accepts a subreddit URL (https://www.reddit.com/r/MachineLearning/.json), a search URL, or a subreddit shorthand ('r/MachineLearning'). Returns titles, authors, scores, comment counts, and per-post timestamps.",
174
+ inputSchema: z.object({
175
+ url: z.string().describe("Subreddit URL, search URL, or 'r/<subreddit>' shorthand"),
176
+ max_length: z.number().optional().default(6000),
177
+ }),
178
+ annotations: { readOnlyHint: true, openWorldHint: true },
179
+ }, async ({ url, max_length }) => {
180
+ try {
181
+ const result = await redditAdapter({ url, maxLength: max_length });
182
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "reddit");
183
+ return { content: [{ type: "text", text: formatForLLM(ctx) }] };
184
+ }
185
+ catch (err) {
186
+ return { content: [{ type: "text", text: formatSecurityError(err) }] };
187
+ }
188
+ });
189
+ // ─── Tool: extract_producthunt ───────────────────────────────────────────────
190
+ server.registerTool("extract_producthunt", {
191
+ description: "Recent Product Hunt launches by keyword or topic. Uses the Product Hunt GraphQL API (with HTML scrape fallback). Returns names, taglines, vote counts, comment counts, topics, and launch dates — all timestamped.",
192
+ inputSchema: z.object({
193
+ url: z.string().describe("Search query e.g. 'mcp ai agents' or a Product Hunt topic URL"),
194
+ max_length: z.number().optional().default(6000),
195
+ }),
196
+ annotations: { readOnlyHint: true, openWorldHint: true },
197
+ }, async ({ url, max_length }) => {
198
+ try {
199
+ const result = await productHuntAdapter({ url, maxLength: max_length });
200
+ const ctx = stampFreshness(result, { url, maxLength: max_length }, "producthunt");
128
201
  return { content: [{ type: "text", text: formatForLLM(ctx) }] };
129
202
  }
130
203
  catch (err) {
@@ -271,7 +344,7 @@ server.registerTool("extract_gov_landscape", {
271
344
  "",
272
345
  sectionWithFreshnessCheck("🏛️ Federal Contract Awards (USASpending.gov)", contractsResult, "govcontracts", min_freshness_score),
273
346
  sectionWithFreshnessCheck("💬 Developer Community Awareness (Hacker News)", hnResult, "hackernews", min_freshness_score),
274
- sectionWithFreshnessCheck("📦 GitHub Repository Activity", repoResult, "github_search", min_freshness_score),
347
+ sectionWithFreshnessCheck("📦 GitHub Repository Activity", repoResult, "reposearch", min_freshness_score),
275
348
  sectionWithFreshnessCheck("🔄 Product Release Velocity (Changelog)", changelogResult, "changelog", min_freshness_score),
276
349
  ].filter(Boolean).join("\n\n");
277
350
  return { content: [{ type: "text", text: combined }] };
@@ -282,7 +355,7 @@ server.registerTool("extract_gov_landscape", {
282
355
  // community sentiment, repo ecosystem size, and product release velocity.
283
356
  // Unique: Bloomberg Terminal doesn't read commit history as a company health signal.
284
357
  server.registerTool("extract_finance_landscape", {
285
- description: "Composite financial intelligence tool for developers. Given one or more ticker symbols, simultaneously queries: (1) Yahoo Finance for live price/market data, (2) Hacker News for developer community sentiment, (3) Reddit for investor and tech community discussion, (4) GitHub for repo ecosystem activity around the company's tech, and (5) their product changelog for release velocity as a company health signal. Answers: What's the price? What are developers saying? Is the company actually shipping? Returns a unified 5-source timestamped report. Bloomberg Terminal doesn't give you this.",
358
+ description: "Composite financial intelligence tool for developers. Given one or more ticker symbols, simultaneously queries: (1) Stooq for no-key quote data, (2) Hacker News for developer community sentiment, (3) Reddit for investor and tech community discussion, (4) GitHub for repo ecosystem activity around the company's tech, and (5) their product changelog for release velocity as a company health signal. Answers: What's the price? What are developers saying? Is the company actually shipping? Returns a unified 5-source timestamped report.",
286
359
  inputSchema: z.object({
287
360
  tickers: z.string().describe("One or more ticker symbols e.g. 'PLTR' or 'PLTR,MSFT,GOOG'. Up to 5 tickers."),
288
361
  company_name: z.string().optional().describe("Company name for HN/Reddit/GitHub searches e.g. 'Palantir'. If omitted, derived from the ticker."),
@@ -308,13 +381,13 @@ server.registerTool("extract_finance_landscape", {
308
381
  const combined = [
309
382
  `# Finance + Developer Intelligence: "${tickers}"${company_name ? ` (${company_name})` : ""}`,
310
383
  `Generated: ${new Date().toISOString()}`,
311
- `Sources: Yahoo Finance · Hacker News · Reddit · GitHub · Changelog`,
384
+ `Sources: Stooq · Hacker News · Reddit · GitHub · Changelog`,
312
385
  min_freshness_score ? `min_freshness_score: ${min_freshness_score}` : null,
313
386
  "",
314
- sectionWithFreshnessCheck("📈 Market Data (Yahoo Finance)", priceResult, "finance", min_freshness_score),
387
+ sectionWithFreshnessCheck("📈 Market Data (Stooq)", priceResult, "finance", min_freshness_score),
315
388
  sectionWithFreshnessCheck("💬 Developer Sentiment (Hacker News)", hnResult, "hackernews", min_freshness_score),
316
389
  sectionWithFreshnessCheck("🗣️ Community Discussion (Reddit)", redditResult, "reddit", min_freshness_score),
317
- sectionWithFreshnessCheck("📦 Repo Ecosystem (GitHub)", repoResult, "github_search", min_freshness_score),
390
+ sectionWithFreshnessCheck("📦 Repo Ecosystem (GitHub)", repoResult, "reposearch", min_freshness_score),
318
391
  sectionWithFreshnessCheck("🔄 Product Release Velocity (Changelog)", changelogResult, "changelog", min_freshness_score),
319
392
  ].filter(Boolean).join("\n\n");
320
393
  return { content: [{ type: "text", text: combined }] };
@@ -369,7 +442,7 @@ server.registerTool("extract_gdelt", {
369
442
  // global news intelligence + product release velocity + market pricing.
370
443
  // Unique: this combination exists nowhere else.
371
444
  server.registerTool("extract_company_landscape", {
372
- description: "Composite company intelligence tool. The most complete single-call company analysis available. Simultaneously queries 5 unique sources: (1) SEC EDGAR for 8-K material event filings — what the company legally just disclosed, (2) USASpending.gov for federal contract footprint — who is giving them government money, (3) GDELT for global news intelligence — what the world is saying about them right now, (4) their product changelog — are they actually shipping, (5) Yahoo Finance — what the market is pricing in. Returns a unified 5-source timestamped report. Unique: this combination is not available in any other MCP server.",
445
+ description: "Composite company intelligence tool. The most complete single-call company analysis available. Simultaneously queries 5 unique sources: (1) SEC EDGAR for 8-K material event filings — what the company legally just disclosed, (2) USASpending.gov for federal contract footprint — who is giving them government money, (3) GDELT for global news intelligence — what the world is saying about them right now, (4) their product changelog — are they actually shipping, (5) Stooq quote data — what the market is pricing in. Returns a unified 5-source timestamped report. Unique: this combination is not available in any other MCP server.",
373
446
  inputSchema: z.object({
374
447
  company: z.string().describe("Company name e.g. 'Palantir', 'Anthropic', 'OpenAI'"),
375
448
  ticker: z.string().optional().describe("Stock ticker for finance data e.g. 'PLTR'. Leave blank for private companies."),
@@ -391,14 +464,14 @@ server.registerTool("extract_company_landscape", {
391
464
  const combined = [
392
465
  `# Company Intelligence Landscape: "${company}"${ticker ? ` (${ticker})` : ""}`,
393
466
  `Generated: ${new Date().toISOString()}`,
394
- `Sources: SEC EDGAR · USASpending.gov · GDELT · Changelog · Yahoo Finance`,
467
+ `Sources: SEC EDGAR · USASpending.gov · GDELT · Changelog · Stooq`,
395
468
  min_freshness_score ? `min_freshness_score: ${min_freshness_score}` : null,
396
469
  "",
397
470
  sectionWithFreshnessCheck("📋 SEC 8-K Filings — Legal Disclosures", secResult, "sec_filings", min_freshness_score),
398
471
  sectionWithFreshnessCheck("🏛️ Federal Contract Awards (USASpending.gov)", contractsResult, "govcontracts", min_freshness_score),
399
472
  sectionWithFreshnessCheck("🌍 Global News Intelligence (GDELT)", gdeltResult, "gdelt", min_freshness_score),
400
473
  sectionWithFreshnessCheck("🔄 Product Release Velocity (Changelog)", changelogResult, "changelog", min_freshness_score),
401
- sectionWithFreshnessCheck("📈 Market Data (Yahoo Finance)", financeResult, "finance", min_freshness_score),
474
+ sectionWithFreshnessCheck("📈 Market Data (Stooq)", financeResult, "finance", min_freshness_score),
402
475
  ].filter(Boolean).join("\n\n");
403
476
  return { content: [{ type: "text", text: combined }] };
404
477
  });
@@ -468,10 +541,10 @@ server.registerTool("extract_idea_landscape", {
468
541
  `Launch signal (Product Hunt): What just shipped — community reception and timing.`,
469
542
  "",
470
543
  sectionWithFreshnessCheck("🗣️ Pain Signal — Developer Discussions (Hacker News)", hnResult, "hackernews", min_freshness_score),
471
- sectionWithFreshnessCheck("💰 Funding Signal — Backed Companies (YC)", ycResult, "ycombinator", min_freshness_score),
472
- sectionWithFreshnessCheck("📦 Crowding Signal — Open Source Landscape (GitHub)", repoResult, "github_search", min_freshness_score),
544
+ sectionWithFreshnessCheck("💰 Funding Signal — Backed Companies (YC)", ycResult, "yc", min_freshness_score),
545
+ sectionWithFreshnessCheck("📦 Crowding Signal — Open Source Landscape (GitHub)", repoResult, "reposearch", min_freshness_score),
473
546
  sectionWithFreshnessCheck("💼 Market Signal — Hiring Activity (Job Listings)", jobsResult, "jobs", min_freshness_score),
474
- sectionWithFreshnessCheck("🔧 Ecosystem Signal — Package Adoption (npm/PyPI)", pkgResult, "package_registry", min_freshness_score),
547
+ sectionWithFreshnessCheck("🔧 Ecosystem Signal — Package Adoption (npm/PyPI)", pkgResult, "packagetrends", min_freshness_score),
475
548
  sectionWithFreshnessCheck("🚀 Launch Signal — Recent Launches (Product Hunt)", phResult, "producthunt", min_freshness_score),
476
549
  ].filter(Boolean).join("\n\n");
477
550
  return { content: [{ type: "text", text: combined }] };
@@ -1,26 +1,34 @@
1
1
  // ─── Decay rates per adapter ──────────────────────────────────────────────────
2
- // From FreshContext Specification v1.0.
3
- // Higher decay = data goes stale faster. Half-life = 100 / (2 * decayRate) days.
4
- // finance=5.0 (half-life ~10d), jobs=3.0 (~17d), news/hn=2.0 (~25d),
5
- // github=1.0 (~50d), scholar/arxiv=0.3 (~167d), default=1.5 (~33d)
6
- const DECAY_RATES = {
7
- finance: 5.0,
8
- search_jobs: 3.0,
9
- hackernews: 2.0,
10
- reddit: 2.0,
11
- producthunt: 2.0,
12
- yc: 1.5,
13
- govcontracts: 1.5,
14
- github: 1.0,
15
- repoSearch: 1.0,
16
- packageTrends: 1.0,
17
- changelog: 1.0,
18
- scholar: 0.3,
19
- arxiv: 0.3,
2
+ // Spec-compliant exponential DAR model.
3
+ // Higher lambda = data goes stale faster. Half-life formula: = ln(2) / λ.
4
+ // Lambda is measured per hour and mirrors the Worker/D1 intelligence engine.
5
+ export const LAMBDA = {
6
+ hackernews: 0.050,
7
+ reddit: 0.010,
8
+ producthunt: 0.010,
9
+ jobs: 0.005,
10
+ finance: 0.001,
11
+ yc: 0.001,
12
+ packagetrends: 0.0005,
13
+ github: 0.0002,
14
+ reposearch: 0.0002,
15
+ google_scholar: 0.00005,
16
+ arxiv: 0.00005,
17
+ changelog: 0.0005,
18
+ gdelt: 0.020,
19
+ gebiz: 0.003,
20
+ govcontracts: 0.001,
21
+ sec_filings: 0.005,
22
+ landscape: 0.050,
23
+ gov_landscape: 0.001,
24
+ finance_landscape: 0.001,
25
+ company_landscape: 0.005,
26
+ idea_landscape: 0.050,
27
+ default: 0.001,
20
28
  };
21
29
  // ─── Score calculation ────────────────────────────────────────────────────────
22
30
  // Returns null when content_date is unknown — we can't calculate age without a date.
23
- // Returns 0 when the score would go negative (content is very old).
31
+ // Returns a clamped 0-100 exponential freshness score.
24
32
  function calculateFreshnessScore(content_date, retrieved_at, adapter) {
25
33
  if (!content_date)
26
34
  return null;
@@ -29,9 +37,9 @@ function calculateFreshnessScore(content_date, retrieved_at, adapter) {
29
37
  // Guard against unparseable dates
30
38
  if (isNaN(published) || isNaN(retrieved))
31
39
  return null;
32
- const daysSinceRetrieved = (retrieved - published) / (1000 * 60 * 60 * 24);
33
- const decayRate = DECAY_RATES[adapter] ?? 1.5;
34
- return Math.max(0, Math.round(100 - daysSinceRetrieved * decayRate));
40
+ const hoursSinceRetrieved = Math.max(0, (retrieved - published) / (1000 * 60 * 60));
41
+ const lambda = LAMBDA[adapter] ?? LAMBDA.default;
42
+ return Math.max(0, Math.round(100 * Math.exp(-lambda * hoursSinceRetrieved)));
35
43
  }
36
44
  // ─── Score label ──────────────────────────────────────────────────────────────
37
45
  // Human-readable interpretation alongside the number, per the spec.
@@ -46,16 +54,36 @@ function scoreLabel(score) {
46
54
  return "verify before acting";
47
55
  return "use with caution";
48
56
  }
57
+ function looksLikeFailedAdapterContent(raw) {
58
+ const trimmed = raw.trim();
59
+ if (!trimmed)
60
+ return true;
61
+ if (/^\[(?:error|security)\]/i.test(trimmed))
62
+ return true;
63
+ if (/^(?:error|failed|upstream|timeout)\b/i.test(trimmed))
64
+ return true;
65
+ const meaningful = trimmed
66
+ .split(/\r?\n/)
67
+ .map((line) => line.trim())
68
+ .filter(Boolean);
69
+ if (!meaningful.length)
70
+ return true;
71
+ const failureLines = meaningful.filter((line) => /\b(?:error|failed|failure|timeout|401|403|404|429|5\d\d)\b/i.test(line));
72
+ return failureLines.length === meaningful.length;
73
+ }
49
74
  // ─── Main stamp function ──────────────────────────────────────────────────────
50
75
  export function stampFreshness(result, options, adapter) {
51
76
  const retrieved_at = new Date().toISOString();
52
- const freshness_score = calculateFreshnessScore(result.content_date, retrieved_at, adapter);
77
+ const failedContent = looksLikeFailedAdapterContent(result.raw);
78
+ const content_date = failedContent ? null : result.content_date;
79
+ const freshness_confidence = failedContent ? "low" : result.freshness_confidence;
80
+ const freshness_score = calculateFreshnessScore(content_date, retrieved_at, adapter);
53
81
  return {
54
82
  content: result.raw.slice(0, options.maxLength ?? 8000),
55
83
  source_url: options.url,
56
- content_date: result.content_date,
84
+ content_date,
57
85
  retrieved_at,
58
- freshness_confidence: result.freshness_confidence,
86
+ freshness_confidence,
59
87
  freshness_score,
60
88
  adapter,
61
89
  };
@@ -36,7 +36,7 @@
36
36
  "type": ["number", "null"],
37
37
  "minimum": 0,
38
38
  "maximum": 100,
39
- "description": "Optional numeric freshness score 0-100. Calculated as: max(0, 100 - (days_since_retrieved * decay_rate)). Null if content_date is unknown.",
39
+ "description": "Optional numeric freshness score 0-100. Calculated with source-specific exponential temporal decay. Null if content_date is unknown.",
40
40
  "examples": [94, 72, 45, null]
41
41
  },
42
42
  "adapter": {
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "freshcontext-mcp",
3
3
  "mcpName": "io.github.PrinceGabriel-lgtm/freshcontext",
4
- "version": "0.3.15",
5
- "description": "Real-time web intelligence for AI agents. 20 tools, no API keys. Every result timestamped with a freshness score.",
4
+ "version": "0.3.17",
5
+ "description": "Real-time web intelligence for AI agents. 21 tools, no required API keys. Every result timestamped with a freshness score.",
6
6
  "keywords": [
7
7
  "mcp",
8
8
  "mcp-server",
@@ -24,6 +24,9 @@
24
24
  },
25
25
  "license": "MIT",
26
26
  "type": "module",
27
+ "engines": {
28
+ "node": ">=20"
29
+ },
27
30
  "main": "dist/server.js",
28
31
  "bin": {
29
32
  "freshcontext-mcp": "dist/server.js"
@@ -33,20 +36,24 @@
33
36
  "dev": "tsx watch src/server.ts",
34
37
  "start": "node dist/server.js",
35
38
  "inspect": "npx @modelcontextprotocol/inspector tsx src/server.ts",
39
+ "smoke:stdio": "node scripts/smoke-stdio.mjs",
36
40
  "test": "jest"
37
41
  },
38
42
  "dependencies": {
39
43
  "@modelcontextprotocol/sdk": "^1.0.0",
40
44
  "apify": "^3.0.0",
45
+ "dotenv": "^16.4.0",
41
46
  "playwright": "^1.44.0",
42
- "zod": "^3.23.0",
43
- "dotenv": "^16.4.0"
47
+ "zod": "^3.23.0"
44
48
  },
45
49
  "devDependencies": {
50
+ "@types/jest": "^29.0.0",
46
51
  "@types/node": "^20.0.0",
47
- "tsx": "^4.0.0",
48
- "typescript": "^5.4.0",
49
52
  "jest": "^29.0.0",
50
- "@types/jest": "^29.0.0"
53
+ "tsx": "^4.0.0",
54
+ "typescript": "^5.4.0"
55
+ },
56
+ "overrides": {
57
+ "file-type": "21.3.4"
51
58
  }
52
59
  }
package/server.json CHANGED
@@ -1,19 +1,19 @@
1
1
  {
2
2
  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-07-09/server.schema.json",
3
3
  "name": "io.github.PrinceGabriel-lgtm/freshcontext",
4
- "description": "Real-time web intelligence for AI agents. 20 tools, no API keys. GitHub, HN, Reddit, arXiv, SEC filings, US gov contracts, GDELT global news, Singapore GeBIZ, changelog & more — every result timestamped with a freshness score.",
4
+ "description": "Real-time web intelligence for AI agents. 21 tools, no required API keys. GitHub, HN, Reddit, arXiv, SEC filings, US gov contracts, GDELT global news, Singapore GeBIZ, changelog & more — every result timestamped with a freshness score.",
5
5
  "repository": {
6
6
  "url": "https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "0.3.15",
9
+ "version": "0.3.17",
10
10
  "website_url": "https://freshcontext-site.pages.dev",
11
11
  "packages": [
12
12
  {
13
13
  "registry_type": "npm",
14
14
  "registry_base_url": "https://registry.npmjs.org",
15
15
  "identifier": "freshcontext-mcp",
16
- "version": "0.3.15",
16
+ "version": "0.3.17",
17
17
  "transport": {
18
18
  "type": "stdio"
19
19
  }
package/.actor/Dockerfile DELETED
@@ -1,19 +0,0 @@
1
- FROM apify/actor-node-playwright-chrome:20
2
-
3
- # Copy package files first for better Docker layer caching
4
- COPY package*.json ./
5
-
6
- # Install dependencies
7
- RUN npm install --include=dev
8
-
9
- # Install Chromium matching the npm-installed Playwright version
10
- RUN npx playwright install chromium
11
-
12
- # Copy source and pre-built dist
13
- COPY . ./
14
-
15
- # Rebuild TypeScript
16
- RUN npm run build || echo "Build had warnings, using pre-compiled dist/"
17
-
18
- # Run the Actor entry point
19
- CMD ["node", "dist/apify.js"]
package/.actor/actor.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "actorSpecification": 1,
3
- "name": "freshcontext-mcp",
4
- "title": "FreshContext MCP",
5
- "version": "0.21",
6
- "input": "../input_schema.json",
7
- "output": "./output_schema.json",
8
- "dockerfile": "./Dockerfile"
9
- }
@@ -1,13 +0,0 @@
1
- {
2
- "actorOutputSchemaVersion": 1,
3
- "title": "FreshContext MCP Output",
4
- "description": "Timestamped web intelligence results wrapped in FreshContext envelopes.",
5
- "properties": {
6
- "results": {
7
- "type": "string",
8
- "title": "Results",
9
- "description": "FreshContext envelopes with content, source URL, retrieval timestamp, and freshness confidence.",
10
- "template": "{{links.apiDefaultDatasetUrl}}/items"
11
- }
12
- }
13
- }
@@ -1,32 +0,0 @@
1
- name: Build and Publish
2
-
3
- on:
4
- push:
5
- branches:
6
- - main
7
-
8
- jobs:
9
- build-and-publish:
10
- runs-on: ubuntu-latest
11
-
12
- steps:
13
- - name: Checkout repository
14
- uses: actions/checkout@v4
15
-
16
- - name: Set up Node.js 18
17
- uses: actions/setup-node@v4
18
- with:
19
- node-version: '18'
20
- registry-url: 'https://registry.npmjs.org'
21
-
22
- - name: Install dependencies
23
- run: npm ci
24
-
25
- - name: Build
26
- run: npm run build
27
-
28
- - name: Publish to npm
29
- run: npm publish --access public
30
- env:
31
- NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
32
- continue-on-error: true