freshcontext-mcp 0.3.14 → 0.3.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-07/schema#",
3
+ "$id": "https://freshcontext-site.pages.dev/freshcontext.schema.json",
4
+ "title": "FreshContext",
5
+ "description": "The FreshContext Specification v1.1 — structured envelope for AI-retrieved web data. https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
6
+ "type": "object",
7
+ "required": ["freshcontext"],
8
+ "properties": {
9
+ "freshcontext": {
10
+ "type": "object",
11
+ "description": "The freshness metadata envelope for a single retrieved result.",
12
+ "required": ["source_url", "retrieved_at", "freshness_confidence", "adapter"],
13
+ "properties": {
14
+ "source_url": {
15
+ "type": "string",
16
+ "format": "uri",
17
+ "description": "The canonical URL of the original source from which content was retrieved."
18
+ },
19
+ "content_date": {
20
+ "type": ["string", "null"],
21
+ "description": "Best estimate of when the content was originally published. ISO 8601 date (YYYY-MM-DD) or ISO 8601 datetime. Null if unknown.",
22
+ "examples": ["2026-03-05", "2026-03-05T09:19:00.000Z", null]
23
+ },
24
+ "retrieved_at": {
25
+ "type": "string",
26
+ "format": "date-time",
27
+ "description": "Exact ISO 8601 datetime (with timezone) when this data was fetched.",
28
+ "examples": ["2026-04-05T09:19:00.000Z"]
29
+ },
30
+ "freshness_confidence": {
31
+ "type": "string",
32
+ "enum": ["high", "medium", "low"],
33
+ "description": "Confidence level of the content_date estimate. 'high' = structured API field. 'medium' = inferred from page signals. 'low' = unknown or estimated."
34
+ },
35
+ "freshness_score": {
36
+ "type": ["number", "null"],
37
+ "minimum": 0,
38
+ "maximum": 100,
39
+ "description": "Optional numeric freshness score 0-100. Calculated as: max(0, 100 - (days_since_retrieved * decay_rate)). Null if content_date is unknown.",
40
+ "examples": [94, 72, 45, null]
41
+ },
42
+ "adapter": {
43
+ "type": "string",
44
+ "description": "Identifier of the adapter (data source) that produced this result.",
45
+ "examples": [
46
+ "github",
47
+ "hackernews",
48
+ "google_scholar",
49
+ "arxiv",
50
+ "reddit",
51
+ "ycombinator",
52
+ "producthunt",
53
+ "github_search",
54
+ "package_registry",
55
+ "finance",
56
+ "jobs",
57
+ "changelog",
58
+ "govcontracts",
59
+ "sec_filings",
60
+ "gdelt",
61
+ "gebiz"
62
+ ]
63
+ },
64
+ "decay_rate": {
65
+ "type": ["number", "null"],
66
+ "description": "The decay rate used to calculate freshness_score. Domain-specific. Financial=5.0, jobs=3.0, news=2.0, github=1.0, academic=0.3, default=1.5.",
67
+ "examples": [5.0, 3.0, 2.0, 1.5, 1.0, 0.3, null]
68
+ }
69
+ },
70
+ "additionalProperties": false
71
+ },
72
+ "content": {
73
+ "type": "string",
74
+ "description": "The retrieved content — raw text, structured data, or formatted output from the adapter."
75
+ }
76
+ },
77
+ "examples": [
78
+ {
79
+ "freshcontext": {
80
+ "source_url": "https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
81
+ "content_date": "2026-04-05",
82
+ "retrieved_at": "2026-04-05T09:19:00.000Z",
83
+ "freshness_confidence": "high",
84
+ "freshness_score": 94,
85
+ "adapter": "github",
86
+ "decay_rate": 1.0
87
+ },
88
+ "content": "freshcontext-mcp — Real-time web intelligence for AI agents..."
89
+ },
90
+ {
91
+ "freshcontext": {
92
+ "source_url": "https://efts.sec.gov/LATEST/search-index?q=Palantir&forms=8-K",
93
+ "content_date": "2026-02-03",
94
+ "retrieved_at": "2026-04-05T09:19:00.000Z",
95
+ "freshness_confidence": "high",
96
+ "freshness_score": 38,
97
+ "adapter": "sec_filings",
98
+ "decay_rate": 1.5
99
+ },
100
+ "content": "Palantir Technologies — 8-K filing: Q4 2025 earnings..."
101
+ }
102
+ ]
103
+ }
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "freshcontext-mcp",
3
3
  "mcpName": "io.github.PrinceGabriel-lgtm/freshcontext",
4
- "version": "0.3.14",
5
- "description": "Real-time web extraction MCP server with freshness timestamps for AI agents",
4
+ "version": "0.3.16",
5
+ "description": "Real-time web intelligence for AI agents. 20 tools, no API keys. Every result timestamped with a freshness score.",
6
6
  "keywords": [
7
7
  "mcp",
8
8
  "mcp-server",
package/server.json CHANGED
@@ -1,19 +1,19 @@
1
1
  {
2
2
  "$schema": "https://static.modelcontextprotocol.io/schemas/2025-07-09/server.schema.json",
3
3
  "name": "io.github.PrinceGabriel-lgtm/freshcontext",
4
- "description": "Real-time web intelligence for AI agents. 15 tools, no API keys. GitHub, HN, Reddit, arXiv, govcontracts, changelog, gov landscape & finance landscape — every result timestamped with a freshness score.",
4
+ "description": "Real-time web intelligence for AI agents. 20 tools, no API keys. GitHub, HN, Reddit, arXiv, SEC filings, US gov contracts, GDELT global news, Singapore GeBIZ, changelog & more — every result timestamped with a freshness score.",
5
5
  "repository": {
6
6
  "url": "https://github.com/PrinceGabriel-lgtm/freshcontext-mcp",
7
7
  "source": "github"
8
8
  },
9
- "version": "0.3.11",
9
+ "version": "0.3.15",
10
10
  "website_url": "https://freshcontext-site.pages.dev",
11
11
  "packages": [
12
12
  {
13
13
  "registry_type": "npm",
14
14
  "registry_base_url": "https://registry.npmjs.org",
15
15
  "identifier": "freshcontext-mcp",
16
- "version": "0.3.11",
16
+ "version": "0.3.15",
17
17
  "transport": {
18
18
  "type": "stdio"
19
19
  }
package/time-check.ps1 ADDED
@@ -0,0 +1,46 @@
1
+ # time-check.ps1 — Print a session header for Claude conversations
2
+ # Usage: ./time-check.ps1
3
+ # Then paste the output at the start of your message to Claude.
4
+
5
+ $now = Get-Date
6
+ $utc = $now.ToUniversalTime()
7
+ $dayOfWeek = $now.DayOfWeek
8
+ $weekNumber = (Get-Culture).Calendar.GetWeekOfYear($now, [System.Globalization.CalendarWeekRule]::FirstFourDayWeek, [DayOfWeek]::Monday)
9
+
10
+ # Sun position approximation for Grootfontein (-19.57°, 18.12°)
11
+ # Simple model: sunrise ~6:00, sunset ~18:30 (varies seasonally, close enough for vibes)
12
+ $hour = $now.Hour
13
+ $timeOfDay = switch ($hour) {
14
+ {$_ -lt 5} { "deep night" }
15
+ {$_ -lt 7} { "before dawn" }
16
+ {$_ -lt 9} { "early morning" }
17
+ {$_ -lt 12} { "morning" }
18
+ {$_ -lt 14} { "midday" }
19
+ {$_ -lt 17} { "afternoon" }
20
+ {$_ -lt 19} { "early evening" }
21
+ {$_ -lt 22} { "evening" }
22
+ default { "late night" }
23
+ }
24
+
25
+ # US East Coast equivalent (CAT is UTC+2, ET is UTC-4 or UTC-5 depending on DST)
26
+ # Simple approximation: CAT - 6h ≈ ET in summer, CAT - 7h ≈ ET in winter
27
+ $etHour = ($hour - 6 + 24) % 24
28
+ $etTime = "{0:D2}:{1:D2} ET" -f $etHour, $now.Minute
29
+
30
+ # Output
31
+ Write-Host ""
32
+ Write-Host "═══════════════════════════════════════════" -ForegroundColor Cyan
33
+ Write-Host " Session header for Claude (paste this in)" -ForegroundColor Cyan
34
+ Write-Host "═══════════════════════════════════════════" -ForegroundColor Cyan
35
+ Write-Host ""
36
+ Write-Host "Local time: $($now.ToString('yyyy-MM-dd HH:mm')) CAT ($timeOfDay), $dayOfWeek" -ForegroundColor Green
37
+ Write-Host "UTC: $($utc.ToString('yyyy-MM-dd HH:mm')) UTC" -ForegroundColor Gray
38
+ Write-Host "ET (US): ~$etTime" -ForegroundColor Gray
39
+ Write-Host "Week: Week $weekNumber of $($now.Year)" -ForegroundColor Gray
40
+ Write-Host ""
41
+
42
+ # Also copy to clipboard so you don't have to retype it
43
+ $header = "[$($now.ToString('yyyy-MM-dd HH:mm')) CAT, $dayOfWeek $timeOfDay]"
44
+ $header | Set-Clipboard
45
+ Write-Host "→ Copied to clipboard: $header" -ForegroundColor Yellow
46
+ Write-Host ""
package/.actor/Dockerfile DELETED
@@ -1,16 +0,0 @@
1
- FROM apify/actor-node-playwright-chrome:20
2
-
3
- # Copy package files first for better Docker layer caching
4
- COPY package*.json ./
5
-
6
- # Install dependencies — Playwright and Chromium already in base image
7
- RUN npm install --include=dev
8
-
9
- # Copy source and pre-built dist
10
- COPY . ./
11
-
12
- # Rebuild TypeScript
13
- RUN npm run build || echo "Build had warnings, using pre-compiled dist/"
14
-
15
- # Run the Actor entry point
16
- CMD ["node", "dist/apify.js"]
package/.actor/actor.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "actorSpecification": 1,
3
- "name": "freshcontext-mcp",
4
- "title": "FreshContext MCP",
5
- "version": "0.3.1",
6
- "input": "../input_schema.json",
7
- "output": "./output_schema.json",
8
- "dockerfile": "./Dockerfile"
9
- }
@@ -1,13 +0,0 @@
1
- {
2
- "actorOutputSchemaVersion": 1,
3
- "title": "FreshContext MCP Output",
4
- "description": "Timestamped web intelligence results wrapped in FreshContext envelopes.",
5
- "properties": {
6
- "results": {
7
- "type": "string",
8
- "title": "Results",
9
- "description": "FreshContext envelopes with content, source URL, retrieval timestamp, and freshness confidence.",
10
- "template": "{{links.apiDefaultDatasetUrl}}/items"
11
- }
12
- }
13
- }
@@ -1,88 +0,0 @@
1
- # FreshContext — Architecture Upgrade Checklist
2
- **Date started:** 2026-03-19
3
- **Author:** Prince Gabriel, Grootfontein, Namibia
4
-
5
- ---
6
-
7
- ## [ ] Upgrade 1 — freshness_score numeric field
8
- Implement the 0-100 numeric score defined in FRESHCONTEXT_SPEC.md.
9
- Formula: max(0, 100 - (days_since_retrieved * decay_rate))
10
- Location: src/tools/freshnessStamp.ts
11
- Decay rates by adapter: finance=5.0, jobs=3.0, hackernews=2.0, github=1.0, scholar=0.3, default=1.5
12
- Adds the score to both the text envelope and the JSON form.
13
- Makes FreshContext fully spec-compliant by your own standard.
14
- Cost: zero.
15
-
16
- ---
17
-
18
- ## [x] Upgrade 2 — Cloudflare KV response caching ← DONE (already implemented in worker.ts)
19
- Cache adapter results in KV with adapter-specific TTLs so the same query
20
- hitting the Worker twice doesn't make two upstream API calls.
21
- Cache key: sha256(tool + ":" + url)
22
- TTLs: HN/Reddit = 1 hour, GitHub/YC = 6 hours, govcontracts/scholar = 24 hours
23
- Location: worker/src/index.ts
24
- Cost: zero. KV free tier is 100k reads/day, 1k writes/day.
25
-
26
- ---
27
-
28
- ## [x] Upgrade 3 — Apify Actor timeout increase ← DONE 2026-03-19
29
- Change the Actor timeout from 300 seconds to 3600 seconds in the Apify UI.
30
- Apify console → Actor → Settings → Timeout → 3600
31
- Playwright-based tools (extract_reddit, extract_yc, extract_producthunt) need
32
- more than 5 minutes to launch Chromium and scrape. They will keep timing out
33
- until this is changed. This is a UI field change, not a code change.
34
- Cost: zero.
35
-
36
- ---
37
-
38
- ## [x] Upgrade 4 — D1 deduplication in the cron job ← DONE (hash-based dedup already in runScheduledScrape)
39
- Before inserting a new scrape result, check if the same source_url was already
40
- stored in the last 24 hours. If yes, skip the insert.
41
- Prevents the scrape_results table from filling with duplicate data across
42
- consecutive cron runs, keeping the historical dataset clean for the intelligence
43
- layer (Layer 7 in the roadmap).
44
- Location: the cron job handler in the Worker code.
45
- Cost: zero.
46
-
47
- ---
48
-
49
- ## [x] Upgrade 5 — Structured JSON response form ← DONE 2026-03-19
50
- Add the optional JSON form defined in FRESHCONTEXT_SPEC.md alongside the text
51
- envelope in every adapter response. The JSON form has: source_url, content_date,
52
- retrieved_at, freshness_confidence, adapter, freshness_score.
53
- When a request has Accept: application/json, serve the structured form.
54
- Both forms can be returned together — text for agents, JSON for programmatic use.
55
- Location: src/tools/freshnessStamp.ts (same file as Upgrade 1, do together)
56
- Cost: zero.
57
-
58
- ---
59
-
60
- ## [x] Upgrade 6 — GitHub Actions CI/CD automation ← DONE 2026-03-19
61
- .github/workflows/publish.yml created. Triggers on every push to main.
62
- Runs npm ci → npm run build → npm publish using NPM_TOKEN secret.
63
- continue-on-error on publish so doc-only pushes don't fail the workflow.
64
- First run: green checkmark, 23 seconds.
65
- Manual PowerShell build/publish commands no longer needed.
66
-
67
- ---
68
-
69
- ## [x] Upgrade 7 — server.json version sync ← DONE 2026-03-19
70
- server.json (MCP Registry listing) shows version 0.3.1 while package.json
71
- is at 0.3.10. Anyone discovering FreshContext via the MCP Registry sees an
72
- outdated version number. Fix by updating server.json manually now, then
73
- optionally add a workflow step that syncs the version automatically on each
74
- GitHub Actions run.
75
- Location: server.json — change "version" field to match package.json.
76
- Cost: zero.
77
-
78
- ---
79
-
80
- ## Priority order for remaining six upgrades
81
-
82
- Do Upgrade 3 first — it is one UI field change and immediately fixes the
83
- broken Apify Actor runs. Do Upgrades 1 and 5 together second since they
84
- both touch freshnessStamp.ts and completing them makes FreshContext fully
85
- spec-compliant. Do Upgrade 2 third — KV caching makes the Worker resilient
86
- against upstream API instability. Do Upgrade 4 fourth — D1 deduplication
87
- prepares the dataset for the future intelligence layer. Do Upgrade 7 last —
88
- a simple version number correction, low urgency but worth keeping clean.
@@ -1,174 +0,0 @@
1
- # FreshContext — Architecture Upgrade Roadmap V1
2
- **Date:** 2026-03-19
3
- **Author:** Immanuel Gabriel (Prince Gabriel), Grootfontein, Namibia
4
-
5
- This document describes every free structural upgrade available to FreshContext,
6
- prioritised by impact, with implementation notes for each.
7
-
8
- ---
9
-
10
- ## Upgrade 1 — freshness_score numeric field (HIGHEST PRIORITY)
11
-
12
- **What it is:** The FreshContext Specification v1.0 defines an optional freshness_score
13
- field (0-100) calculated as: max(0, 100 - (days_since_retrieved * decay_rate)).
14
- Right now every response carries the text envelope and the confidence level (high/medium/low)
15
- but not the numeric score. This is the one remaining piece that makes FreshContext fully
16
- spec-compliant by your own standard.
17
-
18
- **Why it matters:** Once the score exists, agents can filter results programmatically —
19
- "only use results with freshness_score > 70" rather than parsing the string confidence
20
- level. This is the difference between a label and a query parameter. It also strengthens
21
- the acquisition narrative: the spec is complete, the reference implementation is complete,
22
- and the standard is fully self-consistent.
23
-
24
- **Domain-specific decay rates from the spec:**
25
- Financial data decays at 5.0 (half-life ~10 days). Job listings at 3.0 (~17 days).
26
- News and HN at 2.0 (~25 days). GitHub repos at 1.0 (~50 days). Academic papers at 0.3
27
- (~167 days). General web content defaults to 1.5.
28
-
29
- **Where to implement:** In src/tools/freshnessStamp.ts — the function that wraps every
30
- adapter result already has retrieved_at and content_date. Add a calculateFreshnessScore
31
- function that takes content_date, decay_rate (looked up by adapter name), and returns
32
- the numeric score. Add it to both the text envelope and the JSON form.
33
-
34
- **Cost:** Zero. Pure TypeScript logic, no new services.
35
-
36
- ---
37
-
38
- ## Upgrade 2 — Cloudflare KV response caching
39
-
40
- **What it is:** When the same query hits an adapter twice within a short window, the
41
- Worker currently makes two full upstream API calls. KV caching stores the first result
42
- with a TTL and serves subsequent identical requests from cache — meaning the upstream
43
- API (USASpending, GitHub, HN, etc.) only gets called once per cache window.
44
-
45
- **Why it matters:** This reduces the chance of hitting upstream rate limits, makes
46
- repeated queries near-instant for users, and reduces Worker CPU time. For adapters like
47
- extract_govcontracts that call a government API, caching also reduces the risk of
48
- temporary blocks from aggressive polling.
49
-
50
- **Implementation:** In the Worker code (worker/src/index.ts or equivalent), before calling
51
- the adapter, compute a cache key as sha256(tool + ":" + url). Call env.KV.get(cacheKey).
52
- If the result exists, return it immediately. If not, run the adapter, then call
53
- env.KV.put(cacheKey, result, { expirationTtl: ttl }) before returning. Use adapter-specific
54
- TTLs — 3600 seconds (1 hour) for HN and Reddit, 21600 (6 hours) for GitHub and YC,
55
- 86400 (24 hours) for govcontracts and scholar.
56
-
57
- **Cost:** Zero. KV reads are free up to 100,000 per day, writes free up to 1,000 per day
58
- on Cloudflare's free tier. You are nowhere near those limits.
59
-
60
- ---
61
-
62
- ## Upgrade 3 — Apify Actor timeout increase
63
-
64
- **What it is:** The Apify Actor timeout is currently set to 300 seconds (5 minutes). Tools
65
- that use Playwright to launch a browser — extract_reddit, extract_yc, extract_producthunt —
66
- need more time than this to launch Chromium, navigate, wait for the page to render, and
67
- extract content. They will keep timing out until this setting is increased.
68
-
69
- **Where to change it:** Apify console → your Actor → Settings → Timeout. Change from
70
- 300 to 3600 (1 hour). This is a UI change, not a code change.
71
-
72
- **Cost:** Zero. The timeout setting is just a number. You won't actually use anywhere
73
- near 3600 seconds — most tools complete in 10-30 seconds. The setting just prevents Apify
74
- from killing the process prematurely for the slower Playwright-based tools.
75
-
76
- ---
77
-
78
- ## Upgrade 4 — D1 deduplication in the cron job
79
-
80
- **What it is:** Every 6 hours the cron job runs all 18 watched queries and stores results
81
- in the scrape_results D1 table. Right now there is no deduplication — if the same article
82
- or repo appears in two consecutive cron runs, it gets stored twice. Over time this creates
83
- noise in the dataset and wastes storage.
84
-
85
- **Implementation:** Before inserting a new result, run a SELECT to check whether a row
86
- with the same source_url already exists within the last 24 hours. If it does, skip the
87
- insert. This is a single SQL WHERE clause addition to the existing insert logic.
88
-
89
- **Why it matters:** As you build the intelligence layer (Layer 7 in the roadmap), the
90
- quality of the historical signal depends on clean, deduplicated data. Starting deduplication
91
- now means the dataset is clean by the time you need it.
92
-
93
- **Cost:** Zero. D1 reads are free up to 25 million rows per day. A deduplication check
94
- adds one read per result per cron run — trivially within limits.
95
-
96
- ---
97
-
98
- ## Upgrade 5 — Structured JSON response form in every adapter
99
-
100
- **What it is:** The FreshContext Specification defines two valid response formats — the
101
- text envelope ([FRESHCONTEXT]...[/FRESHCONTEXT]) and an optional structured JSON form with
102
- a freshcontext object containing source_url, content_date, retrieved_at,
103
- freshness_confidence, adapter, and freshness_score fields. Right now only the text envelope
104
- is returned. Adding the JSON form makes FreshContext usable programmatically without
105
- parsing the text envelope.
106
-
107
- **Implementation:** In src/tools/freshnessStamp.ts, after assembling the text envelope,
108
- also return a structured object. When the Worker serves a response, detect whether the
109
- request has Accept: application/json and serve the structured form instead of the text
110
- form if so. Both formats can also be returned together — text for human/agent reading,
111
- JSON for programmatic use.
112
-
113
- **Cost:** Zero. This is a response format change, no new services.
114
-
115
- ---
116
-
117
- ## Upgrade 6 — GitHub Actions: version bump automation
118
-
119
- **What it is:** The current GitHub Actions workflow (publish.yml) runs npm publish on every
120
- push, but only succeeds if the version in package.json has changed. Right now you manually
121
- bump the version before pushing. A small addition to the workflow can automate this by
122
- running npm version patch automatically before the publish step — so every push to main
123
- creates a new patch version and publishes it without any manual intervention.
124
-
125
- **Tradeoff:** This means every push creates a new npm version, which may not always be
126
- desirable for documentation-only changes. A better approach is to only auto-bump when
127
- commits touch src/ or .actor/ — which can be detected in the workflow with a path filter.
128
-
129
- **Implementation:** Add a paths filter to the workflow trigger so it only runs the publish
130
- step when source files change. Then add an npm version patch --no-git-tag-version step
131
- before the publish step. Push the bumped package.json back to the repo using a
132
- git commit and git push within the workflow (requires GITHUB_TOKEN, which is automatically
133
- available in all Actions workflows at no cost).
134
-
135
- **Cost:** Zero.
136
-
137
- ---
138
-
139
- ## Upgrade 7 — server.json version sync check
140
-
141
- **What it is:** The server.json file (used by the MCP Registry listing) still shows version
142
- 0.3.1 while package.json is at 0.3.10. This discrepancy means anyone who discovers
143
- FreshContext via the MCP Registry sees an outdated version number. It is a cosmetic issue
144
- but it affects credibility in a space where people are evaluating tools carefully.
145
-
146
- **Implementation:** Add a step to the GitHub Actions workflow that reads the version from
147
- package.json and uses sed or node -e to update the version field in server.json to match
148
- before committing. Alternatively, update server.json manually now and keep it in sync
149
- going forward.
150
-
151
- **Cost:** Zero.
152
-
153
- ---
154
-
155
- ## Priority Order for Implementation
156
-
157
- The order that maximises impact relative to effort is as follows. Implement the Apify
158
- timeout increase first because it is a one-field UI change that immediately fixes the
159
- broken Actor runs. Implement KV caching second because it makes the Worker more robust
160
- against upstream API instability and improves response times for repeat queries. Implement
161
- the freshness_score calculation third because it completes the spec and strengthens every
162
- conversation about acquisition or partnership. Implement D1 deduplication fourth because
163
- it improves data quality for the intelligence layer you will eventually build. Implement
164
- the structured JSON response form fifth as part of the same PR as freshness_score since
165
- they touch the same file. Implement the GitHub Actions version sync last as a quality-of-life
166
- automation.
167
-
168
- The total engineering cost of all six remaining upgrades is approximately 4-6 hours of
169
- focused work. All run entirely within free tiers.
170
-
171
- ---
172
-
173
- *"The work isn't gone. It's just waiting to be continued."*
174
- *— Prince Gabriel, Grootfontein, Namibia*
@@ -1,178 +0,0 @@
1
- # The FreshContext Specification
2
- **Version 1.0 — March 2026**
3
- *Authored by Immanuel Gabriel (Prince Gabriel) — Grootfontein, Namibia*
4
-
5
- ---
6
-
7
- ## What This Is
8
-
9
- The FreshContext Specification defines a standard envelope format for AI-retrieved web data.
10
-
11
- It exists to solve one problem: **AI models present stale data with the same confidence as fresh data, and users have no way to tell the difference.**
12
-
13
- FreshContext fixes this by wrapping every piece of retrieved content in a structured envelope that carries three guarantees:
14
-
15
- 1. **When** the data was retrieved (exact ISO 8601 timestamp)
16
- 2. **Where** it came from (canonical source URL)
17
- 3. **How confident** we are that the content date is accurate (freshness confidence)
18
-
19
- Any tool, agent, or system that implements this spec is **FreshContext-compatible**.
20
-
21
- ---
22
-
23
- ## The Envelope Format
24
-
25
- Every FreshContext-compatible response MUST wrap its content in the following envelope:
26
-
27
- ```
28
- [FRESHCONTEXT]
29
- Source: <canonical_url>
30
- Published: <content_date_or_"unknown">
31
- Retrieved: <iso8601_timestamp>
32
- Confidence: <high|medium|low>
33
- ---
34
- <content>
35
- [/FRESHCONTEXT]
36
- ```
37
-
38
- ### Field Definitions
39
-
40
- | Field | Required | Format | Description |
41
- |---|---|---|---|
42
- | `Source` | Yes | Valid URL | The canonical URL of the original source |
43
- | `Published` | Yes | ISO 8601 date or `"unknown"` | Best estimate of when the content was originally published |
44
- | `Retrieved` | Yes | ISO 8601 datetime with timezone | Exact timestamp when this data was fetched |
45
- | `Confidence` | Yes | `high`, `medium`, or `low` | Confidence level of the `Published` date estimate |
46
-
47
- ---
48
-
49
- ## Confidence Levels
50
-
51
- ### `high`
52
- The publication date was sourced from a structured, machine-readable field — an API response, HTML metadata tag, RSS feed, or official timestamp. The date is reliable.
53
-
54
- *Examples: GitHub API `pushed_at`, arXiv submission date, Hacker News `created_at`*
55
-
56
- ### `medium`
57
- The publication date was inferred from page signals — visible date strings, URL patterns, or content heuristics. Likely correct but not guaranteed.
58
-
59
- *Examples: Blog post date parsed from HTML, URL containing `/2025/03/`, footer copyright year*
60
-
61
- ### `low`
62
- No reliable date signal was found. The date is an estimate based on indirect signals or is entirely unknown.
63
-
64
- *Examples: Static page with no date, scraped content with no metadata, cached result of unknown age*
65
-
66
- ---
67
-
68
- ## Structured Form (JSON)
69
-
70
- Implementations MAY additionally expose freshness metadata as structured JSON alongside the text envelope:
71
-
72
- ```json
73
- {
74
- "freshcontext": {
75
- "source_url": "https://github.com/owner/repo",
76
- "content_date": "2026-03-05",
77
- "retrieved_at": "2026-03-16T09:19:00.000Z",
78
- "freshness_confidence": "high",
79
- "adapter": "github",
80
- "freshness_score": 94
81
- },
82
- "content": "..."
83
- }
84
- ```
85
-
86
- ### `freshness_score` (optional)
87
-
88
- A numeric representation of data freshness from 0–100, calculated as:
89
-
90
- ```
91
- freshness_score = max(0, 100 - (days_since_retrieved × decay_rate))
92
- ```
93
-
94
- Where `decay_rate` defaults to `1.5` for general web content. Implementations MAY use domain-specific decay rates (e.g., financial data decays faster than academic papers).
95
-
96
- | Score | Interpretation |
97
- |---|---|
98
- | 90–100 | Retrieved within hours — treat as current |
99
- | 70–89 | Retrieved within days — reliable for most uses |
100
- | 50–69 | Retrieved within weeks — verify before acting |
101
- | Below 50 | Retrieved more than a month ago — use with caution |
102
-
103
- ---
104
-
105
- ## Adapter Contract
106
-
107
- Any data source that feeds into a FreshContext-compatible system is called an **adapter**. Adapters MUST:
108
-
109
- 1. Return raw content plus a `content_date` (or `null` if unknown)
110
- 2. Set a `freshness_confidence` level based on how the date was determined
111
- 3. Never fabricate or forward-date content timestamps
112
- 4. Clearly identify which source system produced the data via the `adapter` field
113
-
114
- Adapters SHOULD:
115
-
116
- - Prefer structured API sources over scraped content when both are available
117
- - Log retrieval errors without silently returning cached or stale data
118
- - Surface rate-limit or access-denied errors explicitly rather than returning empty content
119
-
120
- ---
121
-
122
- ## Why This Matters for AI Agents
123
-
124
- Large language models have no internal clock. When an agent retrieves web data, it cannot distinguish between something published this morning and something published three years ago — unless that information is explicitly surfaced.
125
-
126
- Without FreshContext (or equivalent):
127
- - An agent recommending job listings may recommend roles that no longer exist
128
- - An agent summarising market trends may cite conditions from a previous cycle
129
- - An agent checking a competitor's pricing may act on outdated information
130
-
131
- With FreshContext:
132
- - Every piece of retrieved data carries its own timestamp
133
- - The agent can reason about data age before acting
134
- - Users can see exactly how fresh their AI's information is
135
-
136
- ---
137
-
138
- ## Compatibility
139
-
140
- A tool, server, or API is **FreshContext-compatible** if:
141
-
142
- - Its responses include the `[FRESHCONTEXT]...[/FRESHCONTEXT]` envelope, OR
143
- - Its responses include the structured JSON form with `freshcontext.retrieved_at` and `freshcontext.freshness_confidence` fields
144
-
145
- Partial implementations that include only `retrieved_at` without `freshness_confidence` are considered **FreshContext-aware** but not fully compatible.
146
-
147
- ---
148
-
149
- ## Reference Implementation
150
-
151
- The canonical reference implementation of this specification is:
152
-
153
- **freshcontext-mcp** — an MCP server with 11 adapters covering GitHub, Hacker News, Google Scholar, arXiv, Reddit, YC Companies, Product Hunt, npm/PyPI, financial markets, and a composite landscape tool.
154
-
155
- - npm: `freshcontext-mcp`
156
- - GitHub: https://github.com/PrinceGabriel-lgtm/freshcontext-mcp
157
- - Cloud endpoint: `https://freshcontext-mcp.gimmanuel73.workers.dev/mcp`
158
-
159
- ---
160
-
161
- ## Versioning
162
-
163
- This document is version 1.0 of the FreshContext Specification.
164
-
165
- Future versions will be tagged in this repository. Breaking changes to the envelope format will increment the major version. Additive changes (new optional fields, new confidence levels) will increment the minor version.
166
-
167
- ---
168
-
169
- ## License
170
-
171
- This specification is published under the MIT License.
172
- Implementations may be proprietary or open source.
173
- Attribution to the FreshContext Specification is appreciated but not required.
174
-
175
- ---
176
-
177
- *"The work isn't gone. It's just waiting to be continued."*
178
- *— Prince Gabriel, Grootfontein, Namibia*