freshcontext-mcp 0.3.14 → 0.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +8 -0
- package/README.md +117 -125
- package/RESEARCH.md +487 -0
- package/RISKS.md +137 -0
- package/cleanup.ps1 +99 -0
- package/demo/README.md +70 -0
- package/demo/data.json +88 -0
- package/demo/generate.mjs +199 -0
- package/demo/index.html +513 -0
- package/demo/logo-export.html +61 -0
- package/demo/logo.svg +23 -0
- package/dist/server.js +124 -66
- package/dist/tools/freshnessStamp.js +30 -22
- package/freshcontext-validate.js +196 -0
- package/freshcontext.schema.json +103 -0
- package/package.json +2 -2
- package/server.json +3 -3
- package/time-check.ps1 +46 -0
- package/.actor/Dockerfile +0 -16
- package/.actor/actor.json +0 -9
- package/.actor/output_schema.json +0 -13
- package/ARCHITECTURE_UPGRADE_CHECKLIST.md +0 -88
- package/ARCHITECTURE_UPGRADE_ROADMAP_V1.md +0 -174
- package/FRESHCONTEXT_SPEC.md +0 -178
- package/HANDOFF.md +0 -184
- package/ROADMAP.md +0 -174
- package/SESSION_SAVE_ARCHITECTURE_V1.md +0 -67
- package/SESSION_SAVE_ARCHITECTURE_V2.md +0 -142
- package/SESSION_SAVE_V4.md +0 -60
- package/SESSION_SAVE_V5.md +0 -121
- package/USAGE.md +0 -294
- package/add-cache.cjs +0 -86
- package/dataset_schema.json +0 -41
- package/input_schema.json +0 -48
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"$id": "https://freshcontext-site.pages.dev/freshcontext.schema.json",
|
|
4
|
+
"title": "FreshContext",
|
|
5
|
+
"description": "The FreshContext Specification v1.1 — structured envelope for AI-retrieved web data. https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
|
|
6
|
+
"type": "object",
|
|
7
|
+
"required": ["freshcontext"],
|
|
8
|
+
"properties": {
|
|
9
|
+
"freshcontext": {
|
|
10
|
+
"type": "object",
|
|
11
|
+
"description": "The freshness metadata envelope for a single retrieved result.",
|
|
12
|
+
"required": ["source_url", "retrieved_at", "freshness_confidence", "adapter"],
|
|
13
|
+
"properties": {
|
|
14
|
+
"source_url": {
|
|
15
|
+
"type": "string",
|
|
16
|
+
"format": "uri",
|
|
17
|
+
"description": "The canonical URL of the original source from which content was retrieved."
|
|
18
|
+
},
|
|
19
|
+
"content_date": {
|
|
20
|
+
"type": ["string", "null"],
|
|
21
|
+
"description": "Best estimate of when the content was originally published. ISO 8601 date (YYYY-MM-DD) or ISO 8601 datetime. Null if unknown.",
|
|
22
|
+
"examples": ["2026-03-05", "2026-03-05T09:19:00.000Z", null]
|
|
23
|
+
},
|
|
24
|
+
"retrieved_at": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"format": "date-time",
|
|
27
|
+
"description": "Exact ISO 8601 datetime (with timezone) when this data was fetched.",
|
|
28
|
+
"examples": ["2026-04-05T09:19:00.000Z"]
|
|
29
|
+
},
|
|
30
|
+
"freshness_confidence": {
|
|
31
|
+
"type": "string",
|
|
32
|
+
"enum": ["high", "medium", "low"],
|
|
33
|
+
"description": "Confidence level of the content_date estimate. 'high' = structured API field. 'medium' = inferred from page signals. 'low' = unknown or estimated."
|
|
34
|
+
},
|
|
35
|
+
"freshness_score": {
|
|
36
|
+
"type": ["number", "null"],
|
|
37
|
+
"minimum": 0,
|
|
38
|
+
"maximum": 100,
|
|
39
|
+
"description": "Optional numeric freshness score 0-100. Calculated as: max(0, 100 - (days_since_retrieved * decay_rate)). Null if content_date is unknown.",
|
|
40
|
+
"examples": [94, 72, 45, null]
|
|
41
|
+
},
|
|
42
|
+
"adapter": {
|
|
43
|
+
"type": "string",
|
|
44
|
+
"description": "Identifier of the adapter (data source) that produced this result.",
|
|
45
|
+
"examples": [
|
|
46
|
+
"github",
|
|
47
|
+
"hackernews",
|
|
48
|
+
"google_scholar",
|
|
49
|
+
"arxiv",
|
|
50
|
+
"reddit",
|
|
51
|
+
"ycombinator",
|
|
52
|
+
"producthunt",
|
|
53
|
+
"github_search",
|
|
54
|
+
"package_registry",
|
|
55
|
+
"finance",
|
|
56
|
+
"jobs",
|
|
57
|
+
"changelog",
|
|
58
|
+
"govcontracts",
|
|
59
|
+
"sec_filings",
|
|
60
|
+
"gdelt",
|
|
61
|
+
"gebiz"
|
|
62
|
+
]
|
|
63
|
+
},
|
|
64
|
+
"decay_rate": {
|
|
65
|
+
"type": ["number", "null"],
|
|
66
|
+
"description": "The decay rate used to calculate freshness_score. Domain-specific. Financial=5.0, jobs=3.0, news=2.0, github=1.0, academic=0.3, default=1.5.",
|
|
67
|
+
"examples": [5.0, 3.0, 2.0, 1.5, 1.0, 0.3, null]
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
"additionalProperties": false
|
|
71
|
+
},
|
|
72
|
+
"content": {
|
|
73
|
+
"type": "string",
|
|
74
|
+
"description": "The retrieved content — raw text, structured data, or formatted output from the adapter."
|
|
75
|
+
}
|
|
76
|
+
},
|
|
77
|
+
"examples": [
|
|
78
|
+
{
|
|
79
|
+
"freshcontext": {
|
|
80
|
+
"source_url": "https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
|
|
81
|
+
"content_date": "2026-04-05",
|
|
82
|
+
"retrieved_at": "2026-04-05T09:19:00.000Z",
|
|
83
|
+
"freshness_confidence": "high",
|
|
84
|
+
"freshness_score": 94,
|
|
85
|
+
"adapter": "github",
|
|
86
|
+
"decay_rate": 1.0
|
|
87
|
+
},
|
|
88
|
+
"content": "freshcontext-mcp — Real-time web intelligence for AI agents..."
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"freshcontext": {
|
|
92
|
+
"source_url": "https://efts.sec.gov/LATEST/search-index?q=Palantir&forms=8-K",
|
|
93
|
+
"content_date": "2026-02-03",
|
|
94
|
+
"retrieved_at": "2026-04-05T09:19:00.000Z",
|
|
95
|
+
"freshness_confidence": "high",
|
|
96
|
+
"freshness_score": 38,
|
|
97
|
+
"adapter": "sec_filings",
|
|
98
|
+
"decay_rate": 1.5
|
|
99
|
+
},
|
|
100
|
+
"content": "Palantir Technologies — 8-K filing: Q4 2025 earnings..."
|
|
101
|
+
}
|
|
102
|
+
]
|
|
103
|
+
}
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "freshcontext-mcp",
|
|
3
3
|
"mcpName": "io.github.PrinceGabriel-lgtm/freshcontext",
|
|
4
|
-
"version": "0.3.
|
|
5
|
-
"description": "Real-time web
|
|
4
|
+
"version": "0.3.16",
|
|
5
|
+
"description": "Real-time web intelligence for AI agents. 20 tools, no API keys. Every result timestamped with a freshness score.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"mcp",
|
|
8
8
|
"mcp-server",
|
package/server.json
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://static.modelcontextprotocol.io/schemas/2025-07-09/server.schema.json",
|
|
3
3
|
"name": "io.github.PrinceGabriel-lgtm/freshcontext",
|
|
4
|
-
"description": "Real-time web intelligence for AI agents.
|
|
4
|
+
"description": "Real-time web intelligence for AI agents. 20 tools, no API keys. GitHub, HN, Reddit, arXiv, SEC filings, US gov contracts, GDELT global news, Singapore GeBIZ, changelog & more — every result timestamped with a freshness score.",
|
|
5
5
|
"repository": {
|
|
6
6
|
"url": "https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
|
|
7
7
|
"source": "github"
|
|
8
8
|
},
|
|
9
|
-
"version": "0.3.
|
|
9
|
+
"version": "0.3.15",
|
|
10
10
|
"website_url": "https://freshcontext-site.pages.dev",
|
|
11
11
|
"packages": [
|
|
12
12
|
{
|
|
13
13
|
"registry_type": "npm",
|
|
14
14
|
"registry_base_url": "https://registry.npmjs.org",
|
|
15
15
|
"identifier": "freshcontext-mcp",
|
|
16
|
-
"version": "0.3.
|
|
16
|
+
"version": "0.3.15",
|
|
17
17
|
"transport": {
|
|
18
18
|
"type": "stdio"
|
|
19
19
|
}
|
package/time-check.ps1
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# time-check.ps1 — Print a session header for Claude conversations
|
|
2
|
+
# Usage: ./time-check.ps1
|
|
3
|
+
# Then paste the output at the start of your message to Claude.
|
|
4
|
+
|
|
5
|
+
$now = Get-Date
|
|
6
|
+
$utc = $now.ToUniversalTime()
|
|
7
|
+
$dayOfWeek = $now.DayOfWeek
|
|
8
|
+
$weekNumber = (Get-Culture).Calendar.GetWeekOfYear($now, [System.Globalization.CalendarWeekRule]::FirstFourDayWeek, [DayOfWeek]::Monday)
|
|
9
|
+
|
|
10
|
+
# Sun position approximation for Grootfontein (-19.57°, 18.12°)
|
|
11
|
+
# Simple model: sunrise ~6:00, sunset ~18:30 (varies seasonally, close enough for vibes)
|
|
12
|
+
$hour = $now.Hour
|
|
13
|
+
$timeOfDay = switch ($hour) {
|
|
14
|
+
{$_ -lt 5} { "deep night" }
|
|
15
|
+
{$_ -lt 7} { "before dawn" }
|
|
16
|
+
{$_ -lt 9} { "early morning" }
|
|
17
|
+
{$_ -lt 12} { "morning" }
|
|
18
|
+
{$_ -lt 14} { "midday" }
|
|
19
|
+
{$_ -lt 17} { "afternoon" }
|
|
20
|
+
{$_ -lt 19} { "early evening" }
|
|
21
|
+
{$_ -lt 22} { "evening" }
|
|
22
|
+
default { "late night" }
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
# US East Coast equivalent (CAT is UTC+2, ET is UTC-4 or UTC-5 depending on DST)
|
|
26
|
+
# Simple approximation: CAT - 6h ≈ ET in summer, CAT - 7h ≈ ET in winter
|
|
27
|
+
$etHour = ($hour - 6 + 24) % 24
|
|
28
|
+
$etTime = "{0:D2}:{1:D2} ET" -f $etHour, $now.Minute
|
|
29
|
+
|
|
30
|
+
# Output
|
|
31
|
+
Write-Host ""
|
|
32
|
+
Write-Host "═══════════════════════════════════════════" -ForegroundColor Cyan
|
|
33
|
+
Write-Host " Session header for Claude (paste this in)" -ForegroundColor Cyan
|
|
34
|
+
Write-Host "═══════════════════════════════════════════" -ForegroundColor Cyan
|
|
35
|
+
Write-Host ""
|
|
36
|
+
Write-Host "Local time: $($now.ToString('yyyy-MM-dd HH:mm')) CAT ($timeOfDay), $dayOfWeek" -ForegroundColor Green
|
|
37
|
+
Write-Host "UTC: $($utc.ToString('yyyy-MM-dd HH:mm')) UTC" -ForegroundColor Gray
|
|
38
|
+
Write-Host "ET (US): ~$etTime" -ForegroundColor Gray
|
|
39
|
+
Write-Host "Week: Week $weekNumber of $($now.Year)" -ForegroundColor Gray
|
|
40
|
+
Write-Host ""
|
|
41
|
+
|
|
42
|
+
# Also copy to clipboard so you don't have to retype it
|
|
43
|
+
$header = "[$($now.ToString('yyyy-MM-dd HH:mm')) CAT, $dayOfWeek $timeOfDay]"
|
|
44
|
+
$header | Set-Clipboard
|
|
45
|
+
Write-Host "→ Copied to clipboard: $header" -ForegroundColor Yellow
|
|
46
|
+
Write-Host ""
|
package/.actor/Dockerfile
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
FROM apify/actor-node-playwright-chrome:20
|
|
2
|
-
|
|
3
|
-
# Copy package files first for better Docker layer caching
|
|
4
|
-
COPY package*.json ./
|
|
5
|
-
|
|
6
|
-
# Install dependencies — Playwright and Chromium already in base image
|
|
7
|
-
RUN npm install --include=dev
|
|
8
|
-
|
|
9
|
-
# Copy source and pre-built dist
|
|
10
|
-
COPY . ./
|
|
11
|
-
|
|
12
|
-
# Rebuild TypeScript
|
|
13
|
-
RUN npm run build || echo "Build had warnings, using pre-compiled dist/"
|
|
14
|
-
|
|
15
|
-
# Run the Actor entry point
|
|
16
|
-
CMD ["node", "dist/apify.js"]
|
package/.actor/actor.json
DELETED
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"actorOutputSchemaVersion": 1,
|
|
3
|
-
"title": "FreshContext MCP Output",
|
|
4
|
-
"description": "Timestamped web intelligence results wrapped in FreshContext envelopes.",
|
|
5
|
-
"properties": {
|
|
6
|
-
"results": {
|
|
7
|
-
"type": "string",
|
|
8
|
-
"title": "Results",
|
|
9
|
-
"description": "FreshContext envelopes with content, source URL, retrieval timestamp, and freshness confidence.",
|
|
10
|
-
"template": "{{links.apiDefaultDatasetUrl}}/items"
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
}
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
# FreshContext — Architecture Upgrade Checklist
|
|
2
|
-
**Date started:** 2026-03-19
|
|
3
|
-
**Author:** Prince Gabriel, Grootfontein, Namibia
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## [ ] Upgrade 1 — freshness_score numeric field
|
|
8
|
-
Implement the 0-100 numeric score defined in FRESHCONTEXT_SPEC.md.
|
|
9
|
-
Formula: max(0, 100 - (days_since_retrieved * decay_rate))
|
|
10
|
-
Location: src/tools/freshnessStamp.ts
|
|
11
|
-
Decay rates by adapter: finance=5.0, jobs=3.0, hackernews=2.0, github=1.0, scholar=0.3, default=1.5
|
|
12
|
-
Adds the score to both the text envelope and the JSON form.
|
|
13
|
-
Makes FreshContext fully spec-compliant by your own standard.
|
|
14
|
-
Cost: zero.
|
|
15
|
-
|
|
16
|
-
---
|
|
17
|
-
|
|
18
|
-
## [x] Upgrade 2 — Cloudflare KV response caching ← DONE (already implemented in worker.ts)
|
|
19
|
-
Cache adapter results in KV with adapter-specific TTLs so the same query
|
|
20
|
-
hitting the Worker twice doesn't make two upstream API calls.
|
|
21
|
-
Cache key: sha256(tool + ":" + url)
|
|
22
|
-
TTLs: HN/Reddit = 1 hour, GitHub/YC = 6 hours, govcontracts/scholar = 24 hours
|
|
23
|
-
Location: worker/src/index.ts
|
|
24
|
-
Cost: zero. KV free tier is 100k reads/day, 1k writes/day.
|
|
25
|
-
|
|
26
|
-
---
|
|
27
|
-
|
|
28
|
-
## [x] Upgrade 3 — Apify Actor timeout increase ← DONE 2026-03-19
|
|
29
|
-
Change the Actor timeout from 300 seconds to 3600 seconds in the Apify UI.
|
|
30
|
-
Apify console → Actor → Settings → Timeout → 3600
|
|
31
|
-
Playwright-based tools (extract_reddit, extract_yc, extract_producthunt) need
|
|
32
|
-
more than 5 minutes to launch Chromium and scrape. They will keep timing out
|
|
33
|
-
until this is changed. This is a UI field change, not a code change.
|
|
34
|
-
Cost: zero.
|
|
35
|
-
|
|
36
|
-
---
|
|
37
|
-
|
|
38
|
-
## [x] Upgrade 4 — D1 deduplication in the cron job ← DONE (hash-based dedup already in runScheduledScrape)
|
|
39
|
-
Before inserting a new scrape result, check if the same source_url was already
|
|
40
|
-
stored in the last 24 hours. If yes, skip the insert.
|
|
41
|
-
Prevents the scrape_results table from filling with duplicate data across
|
|
42
|
-
consecutive cron runs, keeping the historical dataset clean for the intelligence
|
|
43
|
-
layer (Layer 7 in the roadmap).
|
|
44
|
-
Location: the cron job handler in the Worker code.
|
|
45
|
-
Cost: zero.
|
|
46
|
-
|
|
47
|
-
---
|
|
48
|
-
|
|
49
|
-
## [x] Upgrade 5 — Structured JSON response form ← DONE 2026-03-19
|
|
50
|
-
Add the optional JSON form defined in FRESHCONTEXT_SPEC.md alongside the text
|
|
51
|
-
envelope in every adapter response. The JSON form has: source_url, content_date,
|
|
52
|
-
retrieved_at, freshness_confidence, adapter, freshness_score.
|
|
53
|
-
When a request has Accept: application/json, serve the structured form.
|
|
54
|
-
Both forms can be returned together — text for agents, JSON for programmatic use.
|
|
55
|
-
Location: src/tools/freshnessStamp.ts (same file as Upgrade 1, do together)
|
|
56
|
-
Cost: zero.
|
|
57
|
-
|
|
58
|
-
---
|
|
59
|
-
|
|
60
|
-
## [x] Upgrade 6 — GitHub Actions CI/CD automation ← DONE 2026-03-19
|
|
61
|
-
.github/workflows/publish.yml created. Triggers on every push to main.
|
|
62
|
-
Runs npm ci → npm run build → npm publish using NPM_TOKEN secret.
|
|
63
|
-
continue-on-error on publish so doc-only pushes don't fail the workflow.
|
|
64
|
-
First run: green checkmark, 23 seconds.
|
|
65
|
-
Manual PowerShell build/publish commands no longer needed.
|
|
66
|
-
|
|
67
|
-
---
|
|
68
|
-
|
|
69
|
-
## [x] Upgrade 7 — server.json version sync ← DONE 2026-03-19
|
|
70
|
-
server.json (MCP Registry listing) shows version 0.3.1 while package.json
|
|
71
|
-
is at 0.3.10. Anyone discovering FreshContext via the MCP Registry sees an
|
|
72
|
-
outdated version number. Fix by updating server.json manually now, then
|
|
73
|
-
optionally add a workflow step that syncs the version automatically on each
|
|
74
|
-
GitHub Actions run.
|
|
75
|
-
Location: server.json — change "version" field to match package.json.
|
|
76
|
-
Cost: zero.
|
|
77
|
-
|
|
78
|
-
---
|
|
79
|
-
|
|
80
|
-
## Priority order for remaining six upgrades
|
|
81
|
-
|
|
82
|
-
Do Upgrade 3 first — it is one UI field change and immediately fixes the
|
|
83
|
-
broken Apify Actor runs. Do Upgrades 1 and 5 together second since they
|
|
84
|
-
both touch freshnessStamp.ts and completing them makes FreshContext fully
|
|
85
|
-
spec-compliant. Do Upgrade 2 third — KV caching makes the Worker resilient
|
|
86
|
-
against upstream API instability. Do Upgrade 4 fourth — D1 deduplication
|
|
87
|
-
prepares the dataset for the future intelligence layer. Do Upgrade 7 last —
|
|
88
|
-
a simple version number correction, low urgency but worth keeping clean.
|
|
@@ -1,174 +0,0 @@
|
|
|
1
|
-
# FreshContext — Architecture Upgrade Roadmap V1
|
|
2
|
-
**Date:** 2026-03-19
|
|
3
|
-
**Author:** Immanuel Gabriel (Prince Gabriel), Grootfontein, Namibia
|
|
4
|
-
|
|
5
|
-
This document describes every free structural upgrade available to FreshContext,
|
|
6
|
-
prioritised by impact, with implementation notes for each.
|
|
7
|
-
|
|
8
|
-
---
|
|
9
|
-
|
|
10
|
-
## Upgrade 1 — freshness_score numeric field (HIGHEST PRIORITY)
|
|
11
|
-
|
|
12
|
-
**What it is:** The FreshContext Specification v1.0 defines an optional freshness_score
|
|
13
|
-
field (0-100) calculated as: max(0, 100 - (days_since_retrieved * decay_rate)).
|
|
14
|
-
Right now every response carries the text envelope and the confidence level (high/medium/low)
|
|
15
|
-
but not the numeric score. This is the one remaining piece that makes FreshContext fully
|
|
16
|
-
spec-compliant by your own standard.
|
|
17
|
-
|
|
18
|
-
**Why it matters:** Once the score exists, agents can filter results programmatically —
|
|
19
|
-
"only use results with freshness_score > 70" rather than parsing the string confidence
|
|
20
|
-
level. This is the difference between a label and a query parameter. It also strengthens
|
|
21
|
-
the acquisition narrative: the spec is complete, the reference implementation is complete,
|
|
22
|
-
and the standard is fully self-consistent.
|
|
23
|
-
|
|
24
|
-
**Domain-specific decay rates from the spec:**
|
|
25
|
-
Financial data decays at 5.0 (half-life ~10 days). Job listings at 3.0 (~17 days).
|
|
26
|
-
News and HN at 2.0 (~25 days). GitHub repos at 1.0 (~50 days). Academic papers at 0.3
|
|
27
|
-
(~167 days). General web content defaults to 1.5.
|
|
28
|
-
|
|
29
|
-
**Where to implement:** In src/tools/freshnessStamp.ts — the function that wraps every
|
|
30
|
-
adapter result already has retrieved_at and content_date. Add a calculateFreshnessScore
|
|
31
|
-
function that takes content_date, decay_rate (looked up by adapter name), and returns
|
|
32
|
-
the numeric score. Add it to both the text envelope and the JSON form.
|
|
33
|
-
|
|
34
|
-
**Cost:** Zero. Pure TypeScript logic, no new services.
|
|
35
|
-
|
|
36
|
-
---
|
|
37
|
-
|
|
38
|
-
## Upgrade 2 — Cloudflare KV response caching
|
|
39
|
-
|
|
40
|
-
**What it is:** When the same query hits an adapter twice within a short window, the
|
|
41
|
-
Worker currently makes two full upstream API calls. KV caching stores the first result
|
|
42
|
-
with a TTL and serves subsequent identical requests from cache — meaning the upstream
|
|
43
|
-
API (USASpending, GitHub, HN, etc.) only gets called once per cache window.
|
|
44
|
-
|
|
45
|
-
**Why it matters:** This reduces the chance of hitting upstream rate limits, makes
|
|
46
|
-
repeated queries near-instant for users, and reduces Worker CPU time. For adapters like
|
|
47
|
-
extract_govcontracts that call a government API, caching also reduces the risk of
|
|
48
|
-
temporary blocks from aggressive polling.
|
|
49
|
-
|
|
50
|
-
**Implementation:** In the Worker code (worker/src/index.ts or equivalent), before calling
|
|
51
|
-
the adapter, compute a cache key as sha256(tool + ":" + url). Call env.KV.get(cacheKey).
|
|
52
|
-
If the result exists, return it immediately. If not, run the adapter, then call
|
|
53
|
-
env.KV.put(cacheKey, result, { expirationTtl: ttl }) before returning. Use adapter-specific
|
|
54
|
-
TTLs — 3600 seconds (1 hour) for HN and Reddit, 21600 (6 hours) for GitHub and YC,
|
|
55
|
-
86400 (24 hours) for govcontracts and scholar.
|
|
56
|
-
|
|
57
|
-
**Cost:** Zero. KV reads are free up to 100,000 per day, writes free up to 1,000 per day
|
|
58
|
-
on Cloudflare's free tier. You are nowhere near those limits.
|
|
59
|
-
|
|
60
|
-
---
|
|
61
|
-
|
|
62
|
-
## Upgrade 3 — Apify Actor timeout increase
|
|
63
|
-
|
|
64
|
-
**What it is:** The Apify Actor timeout is currently set to 300 seconds (5 minutes). Tools
|
|
65
|
-
that use Playwright to launch a browser — extract_reddit, extract_yc, extract_producthunt —
|
|
66
|
-
need more time than this to launch Chromium, navigate, wait for the page to render, and
|
|
67
|
-
extract content. They will keep timing out until this setting is increased.
|
|
68
|
-
|
|
69
|
-
**Where to change it:** Apify console → your Actor → Settings → Timeout. Change from
|
|
70
|
-
300 to 3600 (1 hour). This is a UI change, not a code change.
|
|
71
|
-
|
|
72
|
-
**Cost:** Zero. The timeout setting is just a number. You won't actually use anywhere
|
|
73
|
-
near 3600 seconds — most tools complete in 10-30 seconds. The setting just prevents Apify
|
|
74
|
-
from killing the process prematurely for the slower Playwright-based tools.
|
|
75
|
-
|
|
76
|
-
---
|
|
77
|
-
|
|
78
|
-
## Upgrade 4 — D1 deduplication in the cron job
|
|
79
|
-
|
|
80
|
-
**What it is:** Every 6 hours the cron job runs all 18 watched queries and stores results
|
|
81
|
-
in the scrape_results D1 table. Right now there is no deduplication — if the same article
|
|
82
|
-
or repo appears in two consecutive cron runs, it gets stored twice. Over time this creates
|
|
83
|
-
noise in the dataset and wastes storage.
|
|
84
|
-
|
|
85
|
-
**Implementation:** Before inserting a new result, run a SELECT to check whether a row
|
|
86
|
-
with the same source_url already exists within the last 24 hours. If it does, skip the
|
|
87
|
-
insert. This is a single SQL WHERE clause addition to the existing insert logic.
|
|
88
|
-
|
|
89
|
-
**Why it matters:** As you build the intelligence layer (Layer 7 in the roadmap), the
|
|
90
|
-
quality of the historical signal depends on clean, deduplicated data. Starting deduplication
|
|
91
|
-
now means the dataset is clean by the time you need it.
|
|
92
|
-
|
|
93
|
-
**Cost:** Zero. D1 reads are free up to 25 million rows per day. A deduplication check
|
|
94
|
-
adds one read per result per cron run — trivially within limits.
|
|
95
|
-
|
|
96
|
-
---
|
|
97
|
-
|
|
98
|
-
## Upgrade 5 — Structured JSON response form in every adapter
|
|
99
|
-
|
|
100
|
-
**What it is:** The FreshContext Specification defines two valid response formats — the
|
|
101
|
-
text envelope ([FRESHCONTEXT]...[/FRESHCONTEXT]) and an optional structured JSON form with
|
|
102
|
-
a freshcontext object containing source_url, content_date, retrieved_at,
|
|
103
|
-
freshness_confidence, adapter, and freshness_score fields. Right now only the text envelope
|
|
104
|
-
is returned. Adding the JSON form makes FreshContext usable programmatically without
|
|
105
|
-
parsing the text envelope.
|
|
106
|
-
|
|
107
|
-
**Implementation:** In src/tools/freshnessStamp.ts, after assembling the text envelope,
|
|
108
|
-
also return a structured object. When the Worker serves a response, detect whether the
|
|
109
|
-
request has Accept: application/json and serve the structured form instead of the text
|
|
110
|
-
form if so. Both formats can also be returned together — text for human/agent reading,
|
|
111
|
-
JSON for programmatic use.
|
|
112
|
-
|
|
113
|
-
**Cost:** Zero. This is a response format change, no new services.
|
|
114
|
-
|
|
115
|
-
---
|
|
116
|
-
|
|
117
|
-
## Upgrade 6 — GitHub Actions: version bump automation
|
|
118
|
-
|
|
119
|
-
**What it is:** The current GitHub Actions workflow (publish.yml) runs npm publish on every
|
|
120
|
-
push, but only succeeds if the version in package.json has changed. Right now you manually
|
|
121
|
-
bump the version before pushing. A small addition to the workflow can automate this by
|
|
122
|
-
running npm version patch automatically before the publish step — so every push to main
|
|
123
|
-
creates a new patch version and publishes it without any manual intervention.
|
|
124
|
-
|
|
125
|
-
**Tradeoff:** This means every push creates a new npm version, which may not always be
|
|
126
|
-
desirable for documentation-only changes. A better approach is to only auto-bump when
|
|
127
|
-
commits touch src/ or .actor/ — which can be detected in the workflow with a path filter.
|
|
128
|
-
|
|
129
|
-
**Implementation:** Add a paths filter to the workflow trigger so it only runs the publish
|
|
130
|
-
step when source files change. Then add an npm version patch --no-git-tag-version step
|
|
131
|
-
before the publish step. Push the bumped package.json back to the repo using a
|
|
132
|
-
git commit and git push within the workflow (requires GITHUB_TOKEN, which is automatically
|
|
133
|
-
available in all Actions workflows at no cost).
|
|
134
|
-
|
|
135
|
-
**Cost:** Zero.
|
|
136
|
-
|
|
137
|
-
---
|
|
138
|
-
|
|
139
|
-
## Upgrade 7 — server.json version sync check
|
|
140
|
-
|
|
141
|
-
**What it is:** The server.json file (used by the MCP Registry listing) still shows version
|
|
142
|
-
0.3.1 while package.json is at 0.3.10. This discrepancy means anyone who discovers
|
|
143
|
-
FreshContext via the MCP Registry sees an outdated version number. It is a cosmetic issue
|
|
144
|
-
but it affects credibility in a space where people are evaluating tools carefully.
|
|
145
|
-
|
|
146
|
-
**Implementation:** Add a step to the GitHub Actions workflow that reads the version from
|
|
147
|
-
package.json and uses sed or node -e to update the version field in server.json to match
|
|
148
|
-
before committing. Alternatively, update server.json manually now and keep it in sync
|
|
149
|
-
going forward.
|
|
150
|
-
|
|
151
|
-
**Cost:** Zero.
|
|
152
|
-
|
|
153
|
-
---
|
|
154
|
-
|
|
155
|
-
## Priority Order for Implementation
|
|
156
|
-
|
|
157
|
-
The order that maximises impact relative to effort is as follows. Implement the Apify
|
|
158
|
-
timeout increase first because it is a one-field UI change that immediately fixes the
|
|
159
|
-
broken Actor runs. Implement KV caching second because it makes the Worker more robust
|
|
160
|
-
against upstream API instability and improves response times for repeat queries. Implement
|
|
161
|
-
the freshness_score calculation third because it completes the spec and strengthens every
|
|
162
|
-
conversation about acquisition or partnership. Implement D1 deduplication fourth because
|
|
163
|
-
it improves data quality for the intelligence layer you will eventually build. Implement
|
|
164
|
-
the structured JSON response form fifth as part of the same PR as freshness_score since
|
|
165
|
-
they touch the same file. Implement the GitHub Actions version sync last as a quality-of-life
|
|
166
|
-
automation.
|
|
167
|
-
|
|
168
|
-
The total engineering cost of all six remaining upgrades is approximately 4-6 hours of
|
|
169
|
-
focused work. All run entirely within free tiers.
|
|
170
|
-
|
|
171
|
-
---
|
|
172
|
-
|
|
173
|
-
*"The work isn't gone. It's just waiting to be continued."*
|
|
174
|
-
*— Prince Gabriel, Grootfontein, Namibia*
|
package/FRESHCONTEXT_SPEC.md
DELETED
|
@@ -1,178 +0,0 @@
|
|
|
1
|
-
# The FreshContext Specification
|
|
2
|
-
**Version 1.0 — March 2026**
|
|
3
|
-
*Authored by Immanuel Gabriel (Prince Gabriel) — Grootfontein, Namibia*
|
|
4
|
-
|
|
5
|
-
---
|
|
6
|
-
|
|
7
|
-
## What This Is
|
|
8
|
-
|
|
9
|
-
The FreshContext Specification defines a standard envelope format for AI-retrieved web data.
|
|
10
|
-
|
|
11
|
-
It exists to solve one problem: **AI models present stale data with the same confidence as fresh data, and users have no way to tell the difference.**
|
|
12
|
-
|
|
13
|
-
FreshContext fixes this by wrapping every piece of retrieved content in a structured envelope that carries three guarantees:
|
|
14
|
-
|
|
15
|
-
1. **When** the data was retrieved (exact ISO 8601 timestamp)
|
|
16
|
-
2. **Where** it came from (canonical source URL)
|
|
17
|
-
3. **How confident** we are that the content date is accurate (freshness confidence)
|
|
18
|
-
|
|
19
|
-
Any tool, agent, or system that implements this spec is **FreshContext-compatible**.
|
|
20
|
-
|
|
21
|
-
---
|
|
22
|
-
|
|
23
|
-
## The Envelope Format
|
|
24
|
-
|
|
25
|
-
Every FreshContext-compatible response MUST wrap its content in the following envelope:
|
|
26
|
-
|
|
27
|
-
```
|
|
28
|
-
[FRESHCONTEXT]
|
|
29
|
-
Source: <canonical_url>
|
|
30
|
-
Published: <content_date_or_"unknown">
|
|
31
|
-
Retrieved: <iso8601_timestamp>
|
|
32
|
-
Confidence: <high|medium|low>
|
|
33
|
-
---
|
|
34
|
-
<content>
|
|
35
|
-
[/FRESHCONTEXT]
|
|
36
|
-
```
|
|
37
|
-
|
|
38
|
-
### Field Definitions
|
|
39
|
-
|
|
40
|
-
| Field | Required | Format | Description |
|
|
41
|
-
|---|---|---|---|
|
|
42
|
-
| `Source` | Yes | Valid URL | The canonical URL of the original source |
|
|
43
|
-
| `Published` | Yes | ISO 8601 date or `"unknown"` | Best estimate of when the content was originally published |
|
|
44
|
-
| `Retrieved` | Yes | ISO 8601 datetime with timezone | Exact timestamp when this data was fetched |
|
|
45
|
-
| `Confidence` | Yes | `high`, `medium`, or `low` | Confidence level of the `Published` date estimate |
|
|
46
|
-
|
|
47
|
-
---
|
|
48
|
-
|
|
49
|
-
## Confidence Levels
|
|
50
|
-
|
|
51
|
-
### `high`
|
|
52
|
-
The publication date was sourced from a structured, machine-readable field — an API response, HTML metadata tag, RSS feed, or official timestamp. The date is reliable.
|
|
53
|
-
|
|
54
|
-
*Examples: GitHub API `pushed_at`, arXiv submission date, Hacker News `created_at`*
|
|
55
|
-
|
|
56
|
-
### `medium`
|
|
57
|
-
The publication date was inferred from page signals — visible date strings, URL patterns, or content heuristics. Likely correct but not guaranteed.
|
|
58
|
-
|
|
59
|
-
*Examples: Blog post date parsed from HTML, URL containing `/2025/03/`, footer copyright year*
|
|
60
|
-
|
|
61
|
-
### `low`
|
|
62
|
-
No reliable date signal was found. The date is an estimate based on indirect signals or is entirely unknown.
|
|
63
|
-
|
|
64
|
-
*Examples: Static page with no date, scraped content with no metadata, cached result of unknown age*
|
|
65
|
-
|
|
66
|
-
---
|
|
67
|
-
|
|
68
|
-
## Structured Form (JSON)
|
|
69
|
-
|
|
70
|
-
Implementations MAY additionally expose freshness metadata as structured JSON alongside the text envelope:
|
|
71
|
-
|
|
72
|
-
```json
|
|
73
|
-
{
|
|
74
|
-
"freshcontext": {
|
|
75
|
-
"source_url": "https://github.com/owner/repo",
|
|
76
|
-
"content_date": "2026-03-05",
|
|
77
|
-
"retrieved_at": "2026-03-16T09:19:00.000Z",
|
|
78
|
-
"freshness_confidence": "high",
|
|
79
|
-
"adapter": "github",
|
|
80
|
-
"freshness_score": 94
|
|
81
|
-
},
|
|
82
|
-
"content": "..."
|
|
83
|
-
}
|
|
84
|
-
```
|
|
85
|
-
|
|
86
|
-
### `freshness_score` (optional)
|
|
87
|
-
|
|
88
|
-
A numeric representation of data freshness from 0–100, calculated as:
|
|
89
|
-
|
|
90
|
-
```
|
|
91
|
-
freshness_score = max(0, 100 - (days_since_retrieved × decay_rate))
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
Where `decay_rate` defaults to `1.5` for general web content. Implementations MAY use domain-specific decay rates (e.g., financial data decays faster than academic papers).
|
|
95
|
-
|
|
96
|
-
| Score | Interpretation |
|
|
97
|
-
|---|---|
|
|
98
|
-
| 90–100 | Retrieved within hours — treat as current |
|
|
99
|
-
| 70–89 | Retrieved within days — reliable for most uses |
|
|
100
|
-
| 50–69 | Retrieved within weeks — verify before acting |
|
|
101
|
-
| Below 50 | Retrieved more than a month ago — use with caution |
|
|
102
|
-
|
|
103
|
-
---
|
|
104
|
-
|
|
105
|
-
## Adapter Contract
|
|
106
|
-
|
|
107
|
-
Any data source that feeds into a FreshContext-compatible system is called an **adapter**. Adapters MUST:
|
|
108
|
-
|
|
109
|
-
1. Return raw content plus a `content_date` (or `null` if unknown)
|
|
110
|
-
2. Set a `freshness_confidence` level based on how the date was determined
|
|
111
|
-
3. Never fabricate or forward-date content timestamps
|
|
112
|
-
4. Clearly identify which source system produced the data via the `adapter` field
|
|
113
|
-
|
|
114
|
-
Adapters SHOULD:
|
|
115
|
-
|
|
116
|
-
- Prefer structured API sources over scraped content when both are available
|
|
117
|
-
- Log retrieval errors without silently returning cached or stale data
|
|
118
|
-
- Surface rate-limit or access-denied errors explicitly rather than returning empty content
|
|
119
|
-
|
|
120
|
-
---
|
|
121
|
-
|
|
122
|
-
## Why This Matters for AI Agents
|
|
123
|
-
|
|
124
|
-
Large language models have no internal clock. When an agent retrieves web data, it cannot distinguish between something published this morning and something published three years ago — unless that information is explicitly surfaced.
|
|
125
|
-
|
|
126
|
-
Without FreshContext (or equivalent):
|
|
127
|
-
- An agent recommending job listings may recommend roles that no longer exist
|
|
128
|
-
- An agent summarising market trends may cite conditions from a previous cycle
|
|
129
|
-
- An agent checking a competitor's pricing may act on outdated information
|
|
130
|
-
|
|
131
|
-
With FreshContext:
|
|
132
|
-
- Every piece of retrieved data carries its own timestamp
|
|
133
|
-
- The agent can reason about data age before acting
|
|
134
|
-
- Users can see exactly how fresh their AI's information is
|
|
135
|
-
|
|
136
|
-
---
|
|
137
|
-
|
|
138
|
-
## Compatibility
|
|
139
|
-
|
|
140
|
-
A tool, server, or API is **FreshContext-compatible** if:
|
|
141
|
-
|
|
142
|
-
- Its responses include the `[FRESHCONTEXT]...[/FRESHCONTEXT]` envelope, OR
|
|
143
|
-
- Its responses include the structured JSON form with `freshcontext.retrieved_at` and `freshcontext.freshness_confidence` fields
|
|
144
|
-
|
|
145
|
-
Partial implementations that include only `retrieved_at` without `freshness_confidence` are considered **FreshContext-aware** but not fully compatible.
|
|
146
|
-
|
|
147
|
-
---
|
|
148
|
-
|
|
149
|
-
## Reference Implementation
|
|
150
|
-
|
|
151
|
-
The canonical reference implementation of this specification is:
|
|
152
|
-
|
|
153
|
-
**freshcontext-mcp** — an MCP server with 11 adapters covering GitHub, Hacker News, Google Scholar, arXiv, Reddit, YC Companies, Product Hunt, npm/PyPI, financial markets, and a composite landscape tool.
|
|
154
|
-
|
|
155
|
-
- npm: `freshcontext-mcp`
|
|
156
|
-
- GitHub: https://github.com/PrinceGabriel-lgtm/freshcontext-mcp
|
|
157
|
-
- Cloud endpoint: `https://freshcontext-mcp.gimmanuel73.workers.dev/mcp`
|
|
158
|
-
|
|
159
|
-
---
|
|
160
|
-
|
|
161
|
-
## Versioning
|
|
162
|
-
|
|
163
|
-
This document is version 1.0 of the FreshContext Specification.
|
|
164
|
-
|
|
165
|
-
Future versions will be tagged in this repository. Breaking changes to the envelope format will increment the major version. Additive changes (new optional fields, new confidence levels) will increment the minor version.
|
|
166
|
-
|
|
167
|
-
---
|
|
168
|
-
|
|
169
|
-
## License
|
|
170
|
-
|
|
171
|
-
This specification is published under the MIT License.
|
|
172
|
-
Implementations may be proprietary or open source.
|
|
173
|
-
Attribution to the FreshContext Specification is appreciated but not required.
|
|
174
|
-
|
|
175
|
-
---
|
|
176
|
-
|
|
177
|
-
*"The work isn't gone. It's just waiting to be continued."*
|
|
178
|
-
*— Prince Gabriel, Grootfontein, Namibia*
|