freshcontext-mcp 0.3.17 → 0.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/NOTICE.md +17 -0
- package/README.md +459 -296
- package/SECURITY.md +34 -0
- package/TRADEMARKS.md +9 -0
- package/dist/adapters/arxiv.js +92 -48
- package/dist/adapters/hackernews.js +16 -16
- package/dist/adapters/registry.js +232 -0
- package/dist/core/decay.js +61 -0
- package/dist/core/decision.js +176 -0
- package/dist/core/envelope.js +59 -0
- package/dist/core/explain.js +28 -0
- package/dist/core/guards.js +17 -0
- package/dist/core/index.js +11 -0
- package/dist/core/pipeline.js +101 -0
- package/dist/core/provenance.js +73 -0
- package/dist/core/rank.js +84 -0
- package/dist/core/signal.js +101 -0
- package/dist/core/sourceProfiles.js +126 -0
- package/dist/core/types.js +1 -0
- package/dist/core/utility.js +90 -0
- package/dist/rest/handler.js +126 -0
- package/dist/server.js +40 -2
- package/dist/tools/evaluateContext.js +127 -0
- package/dist/tools/freshnessStamp.js +1 -137
- package/dist/types.js +0 -1
- package/docs/API_DESIGN.md +434 -0
- package/docs/CODEX_MCP_USAGE.md +116 -0
- package/docs/CORE_API.md +226 -0
- package/docs/CORE_MCP_BOUNDARY.md +106 -0
- package/docs/DEPENDENCY_DILIGENCE.md +63 -0
- package/docs/FUTURE_LANES.md +173 -0
- package/docs/HA_PRI_V2_DESIGN.md +279 -0
- package/docs/RELEASE_INTEGRITY.md +55 -0
- package/docs/RELEASE_NOTES.md +55 -0
- package/docs/SIGNAL_CONTRACT.md +89 -0
- package/docs/SOURCE_PROFILES.md +427 -0
- package/freshcontext.schema.json +103 -103
- package/package-script-guard.mjs +141 -0
- package/package.json +94 -59
- package/server.json +27 -28
- package/dist/apify.js +0 -133
package/SECURITY.md
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
FreshContext currently supports the active `freshcontext-mcp@0.3.x` package line.
|
|
6
|
+
|
|
7
|
+
Please use the latest published `0.3.x` release when reporting a vulnerability, and include the exact package version, repository, transport, and environment involved.
|
|
8
|
+
|
|
9
|
+
## Reporting A Vulnerability
|
|
10
|
+
|
|
11
|
+
FreshContext accepts responsible security reports by email:
|
|
12
|
+
|
|
13
|
+
- gimmanuel73@gmail.com
|
|
14
|
+
|
|
15
|
+
Please do not post secrets, tokens, private logs, customer data, exploit payloads, or sensitive operational details in public GitHub issues.
|
|
16
|
+
|
|
17
|
+
For a useful report, include:
|
|
18
|
+
|
|
19
|
+
- affected repository or package
|
|
20
|
+
- affected version or commit
|
|
21
|
+
- reproduction steps
|
|
22
|
+
- expected and observed behavior
|
|
23
|
+
- security impact
|
|
24
|
+
- whether the issue affects local MCP usage, hosted Worker usage, examples, docs, packaging, or another surface
|
|
25
|
+
|
|
26
|
+
Public GitHub issues are fine for non-sensitive bugs, documentation mistakes, stale claims, build failures, and feature requests.
|
|
27
|
+
|
|
28
|
+
## Scope Notes
|
|
29
|
+
|
|
30
|
+
FreshContext does not currently offer a formal bug bounty program.
|
|
31
|
+
|
|
32
|
+
Please do not send live production tokens, private Cloudflare logs, npm tokens, GitHub tokens, MCP registry tokens, customer data, or private account data. If a report requires sensitive evidence, describe the issue first by email so a safer exchange path can be agreed.
|
|
33
|
+
|
|
34
|
+
This policy does not make claims of certification, compliance, guaranteed response time, or security warranty.
|
package/TRADEMARKS.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# FreshContext Brand and Trademark Notice
|
|
2
|
+
|
|
3
|
+
FreshContext is the product and project name used by the project owner for this software, documentation, examples, and related public materials.
|
|
4
|
+
|
|
5
|
+
No mark registration claim is made in this repository. Any future trademark filing, transfer, assignment, or licensing question should be reviewed separately.
|
|
6
|
+
|
|
7
|
+
Third-party names, marks, services, and platforms, including Cloudflare, npm, GitHub, Model Context Protocol, MCP, Apify, Claude, OpenAI, Anthropic, and other referenced ecosystems, belong to their respective owners.
|
|
8
|
+
|
|
9
|
+
Use of third-party names in this repository is descriptive, interoperability-oriented, or reference-oriented. It does not imply endorsement, certification, sponsorship, partnership, or official affiliation unless explicitly stated by the relevant third party.
|
package/dist/adapters/arxiv.js
CHANGED
|
@@ -1,66 +1,110 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
?
|
|
11
|
-
|
|
1
|
+
const USER_AGENT = "freshcontext-mcp/0.1.7 (https://github.com/PrinceGabriel-lgtm/freshcontext-mcp)";
|
|
2
|
+
const DEFAULT_ARXIV_SIGNAL_SCORE = 0.8;
|
|
3
|
+
function buildArxivApiUrl(input, maxResults = 10) {
|
|
4
|
+
const trimmed = input.trim();
|
|
5
|
+
const safeMaxResults = Number.isFinite(maxResults)
|
|
6
|
+
? Math.max(1, Math.min(Math.trunc(maxResults), 50))
|
|
7
|
+
: 10;
|
|
8
|
+
return trimmed.startsWith("http")
|
|
9
|
+
? trimmed
|
|
10
|
+
: `https://export.arxiv.org/api/query?search_query=all:${encodeURIComponent(trimmed)}&start=0&max_results=${safeMaxResults}&sortBy=relevance&sortOrder=descending`;
|
|
11
|
+
}
|
|
12
|
+
async function fetchArxivXml(apiUrl) {
|
|
12
13
|
const res = await fetch(apiUrl, {
|
|
13
|
-
headers: { "User-Agent":
|
|
14
|
+
headers: { "User-Agent": USER_AGENT },
|
|
14
15
|
});
|
|
15
16
|
if (!res.ok)
|
|
16
17
|
throw new Error(`arXiv API error: ${res.status} ${res.statusText}`);
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
18
|
+
return res.text();
|
|
19
|
+
}
|
|
20
|
+
function getTag(block, tag) {
|
|
21
|
+
const m = block.match(new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, "i"));
|
|
22
|
+
return m ? m[1].trim().replace(/\s+/g, " ") : "";
|
|
23
|
+
}
|
|
24
|
+
function getAttr(block, tag, attr) {
|
|
25
|
+
const m = block.match(new RegExp(`<${tag}[^>]*${attr}="([^"]*)"`, "i"));
|
|
26
|
+
return m ? m[1].trim() : "";
|
|
27
|
+
}
|
|
28
|
+
function normalizeArxivUrl(id) {
|
|
29
|
+
return id.replace("http://arxiv.org/abs/", "https://arxiv.org/abs/");
|
|
30
|
+
}
|
|
31
|
+
function parseArxivEntries(xml) {
|
|
32
|
+
return [...xml.matchAll(/<entry>([\s\S]*?)<\/entry>/g)].map((match) => {
|
|
32
33
|
const block = match[1];
|
|
33
|
-
const title = getTag(block, "title").replace(/\n/g, " ");
|
|
34
|
-
const summary = getTag(block, "summary").slice(0, 300).replace(/\n/g, " ");
|
|
35
|
-
const published = getTag(block, "published").slice(0, 10); // YYYY-MM-DD
|
|
36
|
-
const updated = getTag(block, "updated").slice(0, 10);
|
|
37
|
-
const id = getTag(block, "id").replace("http://arxiv.org/abs/", "https://arxiv.org/abs/");
|
|
38
|
-
// Authors — can be multiple
|
|
39
34
|
const authorMatches = [...block.matchAll(/<author>([\s\S]*?)<\/author>/g)];
|
|
40
35
|
const authors = authorMatches
|
|
41
36
|
.map(a => getTag(a[1], "name"))
|
|
42
37
|
.filter(Boolean)
|
|
43
|
-
.slice(0, 4)
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
`Link: ${id}`,
|
|
55
|
-
].filter(Boolean).join("\n");
|
|
38
|
+
.slice(0, 4);
|
|
39
|
+
return {
|
|
40
|
+
title: getTag(block, "title").replace(/\n/g, " "),
|
|
41
|
+
summary: getTag(block, "summary").replace(/\n/g, " "),
|
|
42
|
+
published: getTag(block, "published"),
|
|
43
|
+
updated: getTag(block, "updated"),
|
|
44
|
+
id: normalizeArxivUrl(getTag(block, "id")),
|
|
45
|
+
authors,
|
|
46
|
+
category: getAttr(block, "arxiv:primary_category", "term") ||
|
|
47
|
+
getAttr(block, "category", "term"),
|
|
48
|
+
};
|
|
56
49
|
});
|
|
50
|
+
}
|
|
51
|
+
function formatArxivEntry(entry, index) {
|
|
52
|
+
const published = entry.published.slice(0, 10);
|
|
53
|
+
const updated = entry.updated.slice(0, 10);
|
|
54
|
+
const authors = entry.authors.join(", ");
|
|
55
|
+
const summary = entry.summary.slice(0, 300);
|
|
56
|
+
return [
|
|
57
|
+
`[${index + 1}] ${entry.title}`,
|
|
58
|
+
`Authors: ${authors || "Unknown"}`,
|
|
59
|
+
`Published: ${published}${updated !== published ? ` (updated ${updated})` : ""}`,
|
|
60
|
+
entry.category ? `Category: ${entry.category}` : null,
|
|
61
|
+
`Abstract: ${summary}\u00e2\u20ac\u00a6`,
|
|
62
|
+
`Link: ${entry.id}`,
|
|
63
|
+
].filter(Boolean).join("\n");
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* arXiv adapter uses the official arXiv API.
|
|
67
|
+
* Accepts a search query or a direct arXiv API URL.
|
|
68
|
+
* Docs: https://arxiv.org/help/api/user-manual
|
|
69
|
+
*/
|
|
70
|
+
export async function arxivAdapter(options) {
|
|
71
|
+
const input = options.url.trim();
|
|
72
|
+
const apiUrl = buildArxivApiUrl(input);
|
|
73
|
+
const xml = await fetchArxivXml(apiUrl);
|
|
74
|
+
const entries = parseArxivEntries(xml);
|
|
75
|
+
if (!entries.length) {
|
|
76
|
+
return { raw: "No results found for this query.", content_date: null, freshness_confidence: "low" };
|
|
77
|
+
}
|
|
78
|
+
const papers = entries.map(formatArxivEntry);
|
|
57
79
|
const raw = papers.join("\n\n").slice(0, options.maxLength ?? 6000);
|
|
58
|
-
// Most recent publication date
|
|
59
80
|
const dates = entries
|
|
60
|
-
.map(
|
|
81
|
+
.map(entry => entry.published.slice(0, 10))
|
|
61
82
|
.filter(Boolean)
|
|
62
83
|
.sort()
|
|
63
84
|
.reverse();
|
|
64
85
|
const content_date = dates[0] ?? null;
|
|
65
86
|
return { raw, content_date, freshness_confidence: content_date ? "high" : "medium" };
|
|
66
87
|
}
|
|
88
|
+
export async function searchArxivSignals(input) {
|
|
89
|
+
const query = input.query.trim();
|
|
90
|
+
const apiUrl = buildArxivApiUrl(query, input.maxResults);
|
|
91
|
+
const xml = await fetchArxivXml(apiUrl);
|
|
92
|
+
const entries = parseArxivEntries(xml);
|
|
93
|
+
const retrievedAt = input.retrievedAt ?? new Date().toISOString();
|
|
94
|
+
const semanticScore = input.semanticScore ?? DEFAULT_ARXIV_SIGNAL_SCORE;
|
|
95
|
+
return entries.map((entry) => ({
|
|
96
|
+
title: entry.title,
|
|
97
|
+
content: entry.summary,
|
|
98
|
+
source: entry.id,
|
|
99
|
+
source_type: "arxiv",
|
|
100
|
+
published_at: entry.published || null,
|
|
101
|
+
retrieved_at: retrievedAt,
|
|
102
|
+
semantic_score: semanticScore,
|
|
103
|
+
metadata: {
|
|
104
|
+
authors: entry.authors,
|
|
105
|
+
category: entry.category || null,
|
|
106
|
+
updated_at: entry.updated || null,
|
|
107
|
+
query,
|
|
108
|
+
},
|
|
109
|
+
}));
|
|
110
|
+
}
|
|
@@ -54,22 +54,22 @@ export async function hackerNewsAdapter(options) {
|
|
|
54
54
|
const browser = await chromium.launch({ headless: true });
|
|
55
55
|
const page = await browser.newPage();
|
|
56
56
|
await page.goto(url, { waitUntil: "domcontentloaded", timeout: 20000 });
|
|
57
|
-
const data = await page.evaluate(`(function() {
|
|
58
|
-
var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
|
|
59
|
-
var results = items.map(function(el) {
|
|
60
|
-
var titleLineEl = el.querySelector('.titleline > a');
|
|
61
|
-
var title = titleLineEl ? titleLineEl.textContent.trim() : null;
|
|
62
|
-
var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
|
|
63
|
-
var subtext = el.nextElementSibling;
|
|
64
|
-
var scoreEl = subtext ? subtext.querySelector('.score') : null;
|
|
65
|
-
var score = scoreEl ? scoreEl.textContent.trim() : null;
|
|
66
|
-
var ageEl = subtext ? subtext.querySelector('.age') : null;
|
|
67
|
-
var age = ageEl ? ageEl.getAttribute('title') : null;
|
|
68
|
-
var anchors = subtext ? subtext.querySelectorAll('a') : [];
|
|
69
|
-
var commentLink = anchors.length > 0 ? anchors[anchors.length - 1].textContent.trim() : null;
|
|
70
|
-
return { title: title, link: link, score: score, age: age, commentLink: commentLink };
|
|
71
|
-
});
|
|
72
|
-
return results;
|
|
57
|
+
const data = await page.evaluate(`(function() {
|
|
58
|
+
var items = Array.from(document.querySelectorAll('.athing')).slice(0, 20);
|
|
59
|
+
var results = items.map(function(el) {
|
|
60
|
+
var titleLineEl = el.querySelector('.titleline > a');
|
|
61
|
+
var title = titleLineEl ? titleLineEl.textContent.trim() : null;
|
|
62
|
+
var link = titleLineEl ? titleLineEl.getAttribute('href') : null;
|
|
63
|
+
var subtext = el.nextElementSibling;
|
|
64
|
+
var scoreEl = subtext ? subtext.querySelector('.score') : null;
|
|
65
|
+
var score = scoreEl ? scoreEl.textContent.trim() : null;
|
|
66
|
+
var ageEl = subtext ? subtext.querySelector('.age') : null;
|
|
67
|
+
var age = ageEl ? ageEl.getAttribute('title') : null;
|
|
68
|
+
var anchors = subtext ? subtext.querySelectorAll('a') : [];
|
|
69
|
+
var commentLink = anchors.length > 0 ? anchors[anchors.length - 1].textContent.trim() : null;
|
|
70
|
+
return { title: title, link: link, score: score, age: age, commentLink: commentLink };
|
|
71
|
+
});
|
|
72
|
+
return results;
|
|
73
73
|
})()`);
|
|
74
74
|
await browser.close();
|
|
75
75
|
const typedData = data;
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
function descriptor(input) {
|
|
2
|
+
return Object.freeze({
|
|
3
|
+
...input,
|
|
4
|
+
secondary_source_profiles: input.secondary_source_profiles
|
|
5
|
+
? Object.freeze([...input.secondary_source_profiles])
|
|
6
|
+
: undefined,
|
|
7
|
+
});
|
|
8
|
+
}
|
|
9
|
+
function copyDescriptor(descriptor) {
|
|
10
|
+
return {
|
|
11
|
+
...descriptor,
|
|
12
|
+
secondary_source_profiles: descriptor.secondary_source_profiles
|
|
13
|
+
? [...descriptor.secondary_source_profiles]
|
|
14
|
+
: undefined,
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
export const BUILT_IN_ADAPTER_REGISTRY = Object.freeze([
|
|
18
|
+
descriptor({
|
|
19
|
+
adapter_id: "github",
|
|
20
|
+
tool_name: "extract_github",
|
|
21
|
+
source_profile: "code_activity",
|
|
22
|
+
output_mode: "single",
|
|
23
|
+
runtime_kind: "browser",
|
|
24
|
+
risk: "medium",
|
|
25
|
+
notes: "Repository page extraction uses browser automation; keep behavior compatibility pinned before signal extraction.",
|
|
26
|
+
}),
|
|
27
|
+
descriptor({
|
|
28
|
+
adapter_id: "google_scholar",
|
|
29
|
+
tool_name: "extract_scholar",
|
|
30
|
+
source_profile: "academic_research",
|
|
31
|
+
output_mode: "batch",
|
|
32
|
+
runtime_kind: "browser",
|
|
33
|
+
risk: "medium",
|
|
34
|
+
notes: "Scholar extraction is browser-backed and date precision is usually year-level.",
|
|
35
|
+
}),
|
|
36
|
+
descriptor({
|
|
37
|
+
adapter_id: "hackernews",
|
|
38
|
+
tool_name: "extract_hackernews",
|
|
39
|
+
source_profile: "social_pulse",
|
|
40
|
+
output_mode: "batch",
|
|
41
|
+
runtime_kind: "mixed",
|
|
42
|
+
risk: "medium",
|
|
43
|
+
notes: "Plain query path uses Algolia API; URL extraction can use browser automation.",
|
|
44
|
+
}),
|
|
45
|
+
descriptor({
|
|
46
|
+
adapter_id: "yc",
|
|
47
|
+
tool_name: "extract_yc",
|
|
48
|
+
source_profile: "company_intel",
|
|
49
|
+
output_mode: "batch",
|
|
50
|
+
runtime_kind: "browser",
|
|
51
|
+
risk: "medium",
|
|
52
|
+
notes: "YC company listing extraction is browser-backed.",
|
|
53
|
+
}),
|
|
54
|
+
descriptor({
|
|
55
|
+
adapter_id: "reposearch",
|
|
56
|
+
tool_name: "search_repos",
|
|
57
|
+
source_profile: "code_activity",
|
|
58
|
+
output_mode: "batch",
|
|
59
|
+
runtime_kind: "api",
|
|
60
|
+
risk: "low",
|
|
61
|
+
notes: "GitHub repository search API result set; good early signal-output candidate.",
|
|
62
|
+
}),
|
|
63
|
+
descriptor({
|
|
64
|
+
adapter_id: "packagetrends",
|
|
65
|
+
tool_name: "package_trends",
|
|
66
|
+
source_profile: "code_activity",
|
|
67
|
+
secondary_source_profiles: ["official_docs"],
|
|
68
|
+
output_mode: "batch",
|
|
69
|
+
runtime_kind: "api",
|
|
70
|
+
risk: "low",
|
|
71
|
+
notes: "Registry metadata for npm and PyPI packages.",
|
|
72
|
+
}),
|
|
73
|
+
descriptor({
|
|
74
|
+
adapter_id: "arxiv",
|
|
75
|
+
tool_name: "extract_arxiv",
|
|
76
|
+
source_profile: "academic_research",
|
|
77
|
+
output_mode: "batch",
|
|
78
|
+
runtime_kind: "api",
|
|
79
|
+
risk: "low",
|
|
80
|
+
notes: "Official API with clear paper timestamps; recommended first extraction target.",
|
|
81
|
+
}),
|
|
82
|
+
descriptor({
|
|
83
|
+
adapter_id: "finance",
|
|
84
|
+
tool_name: "extract_finance",
|
|
85
|
+
source_profile: "market_finance",
|
|
86
|
+
output_mode: "batch",
|
|
87
|
+
runtime_kind: "api",
|
|
88
|
+
risk: "medium",
|
|
89
|
+
notes: "Quote freshness and partial-failure semantics need careful compatibility coverage.",
|
|
90
|
+
}),
|
|
91
|
+
descriptor({
|
|
92
|
+
adapter_id: "reddit",
|
|
93
|
+
tool_name: "extract_reddit",
|
|
94
|
+
source_profile: "social_pulse",
|
|
95
|
+
output_mode: "batch",
|
|
96
|
+
runtime_kind: "api",
|
|
97
|
+
risk: "medium",
|
|
98
|
+
notes: "Public JSON API with community-content volatility.",
|
|
99
|
+
}),
|
|
100
|
+
descriptor({
|
|
101
|
+
adapter_id: "producthunt",
|
|
102
|
+
tool_name: "extract_producthunt",
|
|
103
|
+
source_profile: "social_pulse",
|
|
104
|
+
output_mode: "batch",
|
|
105
|
+
runtime_kind: "mixed",
|
|
106
|
+
risk: "medium",
|
|
107
|
+
notes: "Uses optional API path with browser fallback.",
|
|
108
|
+
}),
|
|
109
|
+
descriptor({
|
|
110
|
+
adapter_id: "landscape",
|
|
111
|
+
tool_name: "extract_landscape",
|
|
112
|
+
source_profile: "composite_landscape",
|
|
113
|
+
secondary_source_profiles: ["company_intel", "code_activity", "social_pulse"],
|
|
114
|
+
output_mode: "composite",
|
|
115
|
+
runtime_kind: "composite",
|
|
116
|
+
risk: "high",
|
|
117
|
+
notes: "Composite report should preserve section-level source profiles before extraction.",
|
|
118
|
+
}),
|
|
119
|
+
descriptor({
|
|
120
|
+
adapter_id: "jobs",
|
|
121
|
+
tool_name: "search_jobs",
|
|
122
|
+
source_profile: "jobs_opportunities",
|
|
123
|
+
output_mode: "batch",
|
|
124
|
+
runtime_kind: "api",
|
|
125
|
+
risk: "medium",
|
|
126
|
+
notes: "Multi-source job aggregation with filters and strict recency expectations.",
|
|
127
|
+
}),
|
|
128
|
+
descriptor({
|
|
129
|
+
adapter_id: "changelog",
|
|
130
|
+
tool_name: "extract_changelog",
|
|
131
|
+
source_profile: "official_docs",
|
|
132
|
+
secondary_source_profiles: ["code_activity"],
|
|
133
|
+
output_mode: "batch",
|
|
134
|
+
runtime_kind: "mixed",
|
|
135
|
+
risk: "medium",
|
|
136
|
+
notes: "GitHub releases and registry paths are API-backed; website discovery can use browser automation.",
|
|
137
|
+
}),
|
|
138
|
+
descriptor({
|
|
139
|
+
adapter_id: "govcontracts",
|
|
140
|
+
tool_name: "extract_govcontracts",
|
|
141
|
+
source_profile: "government_regulatory",
|
|
142
|
+
output_mode: "batch",
|
|
143
|
+
runtime_kind: "api",
|
|
144
|
+
risk: "medium",
|
|
145
|
+
notes: "Official API; direct API URL compatibility and award-date semantics need coverage.",
|
|
146
|
+
}),
|
|
147
|
+
descriptor({
|
|
148
|
+
adapter_id: "gov_landscape",
|
|
149
|
+
tool_name: "extract_gov_landscape",
|
|
150
|
+
source_profile: "composite_landscape",
|
|
151
|
+
secondary_source_profiles: ["government_regulatory", "code_activity", "social_pulse", "official_docs"],
|
|
152
|
+
output_mode: "composite",
|
|
153
|
+
runtime_kind: "composite",
|
|
154
|
+
risk: "high",
|
|
155
|
+
notes: "Composite government report stitches multiple source profiles.",
|
|
156
|
+
}),
|
|
157
|
+
descriptor({
|
|
158
|
+
adapter_id: "finance_landscape",
|
|
159
|
+
tool_name: "extract_finance_landscape",
|
|
160
|
+
source_profile: "composite_landscape",
|
|
161
|
+
secondary_source_profiles: ["market_finance", "social_pulse", "code_activity", "official_docs"],
|
|
162
|
+
output_mode: "composite",
|
|
163
|
+
runtime_kind: "composite",
|
|
164
|
+
risk: "high",
|
|
165
|
+
notes: "Composite finance report must not collapse market and social freshness into one policy.",
|
|
166
|
+
}),
|
|
167
|
+
descriptor({
|
|
168
|
+
adapter_id: "sec_filings",
|
|
169
|
+
tool_name: "extract_sec_filings",
|
|
170
|
+
source_profile: "government_regulatory",
|
|
171
|
+
output_mode: "batch",
|
|
172
|
+
runtime_kind: "api",
|
|
173
|
+
risk: "low",
|
|
174
|
+
notes: "Official SEC API with clear filing dates.",
|
|
175
|
+
}),
|
|
176
|
+
descriptor({
|
|
177
|
+
adapter_id: "gdelt",
|
|
178
|
+
tool_name: "extract_gdelt",
|
|
179
|
+
source_profile: "government_regulatory",
|
|
180
|
+
secondary_source_profiles: ["company_intel"],
|
|
181
|
+
output_mode: "batch",
|
|
182
|
+
runtime_kind: "api",
|
|
183
|
+
risk: "medium",
|
|
184
|
+
notes: "Global news intelligence has fast-moving timestamps and broad source variance.",
|
|
185
|
+
}),
|
|
186
|
+
descriptor({
|
|
187
|
+
adapter_id: "company_landscape",
|
|
188
|
+
tool_name: "extract_company_landscape",
|
|
189
|
+
source_profile: "composite_landscape",
|
|
190
|
+
secondary_source_profiles: ["company_intel", "government_regulatory", "market_finance", "official_docs"],
|
|
191
|
+
output_mode: "composite",
|
|
192
|
+
runtime_kind: "composite",
|
|
193
|
+
risk: "high",
|
|
194
|
+
notes: "Composite company report combines official, market, news, and product velocity signals.",
|
|
195
|
+
}),
|
|
196
|
+
descriptor({
|
|
197
|
+
adapter_id: "gebiz",
|
|
198
|
+
tool_name: "extract_gebiz",
|
|
199
|
+
source_profile: "government_regulatory",
|
|
200
|
+
output_mode: "batch",
|
|
201
|
+
runtime_kind: "api",
|
|
202
|
+
risk: "low",
|
|
203
|
+
notes: "Official data.gov.sg procurement dataset.",
|
|
204
|
+
}),
|
|
205
|
+
descriptor({
|
|
206
|
+
adapter_id: "idea_landscape",
|
|
207
|
+
tool_name: "extract_idea_landscape",
|
|
208
|
+
source_profile: "composite_landscape",
|
|
209
|
+
secondary_source_profiles: ["social_pulse", "company_intel", "code_activity", "jobs_opportunities"],
|
|
210
|
+
output_mode: "composite",
|
|
211
|
+
runtime_kind: "composite",
|
|
212
|
+
risk: "high",
|
|
213
|
+
notes: "Composite idea validation report stitches social, funding, code, jobs, package, and launch signals.",
|
|
214
|
+
}),
|
|
215
|
+
]);
|
|
216
|
+
export function listAdapterDescriptors() {
|
|
217
|
+
return BUILT_IN_ADAPTER_REGISTRY.map(copyDescriptor);
|
|
218
|
+
}
|
|
219
|
+
export function getAdapterDescriptor(adapterIdOrToolName) {
|
|
220
|
+
const descriptor = BUILT_IN_ADAPTER_REGISTRY.find((item) => item.adapter_id === adapterIdOrToolName || item.tool_name === adapterIdOrToolName);
|
|
221
|
+
return descriptor ? copyDescriptor(descriptor) : undefined;
|
|
222
|
+
}
|
|
223
|
+
export function listAdaptersBySourceProfile(profileId) {
|
|
224
|
+
return BUILT_IN_ADAPTER_REGISTRY
|
|
225
|
+
.filter((item) => item.source_profile === profileId || item.secondary_source_profiles?.includes(profileId))
|
|
226
|
+
.map(copyDescriptor);
|
|
227
|
+
}
|
|
228
|
+
export function listAdaptersByRisk(risk) {
|
|
229
|
+
return BUILT_IN_ADAPTER_REGISTRY
|
|
230
|
+
.filter((item) => item.risk === risk)
|
|
231
|
+
.map(copyDescriptor);
|
|
232
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
// Spec-compliant exponential DAR model.
|
|
2
|
+
// Higher lambda = data goes stale faster. Half-life formula: t1/2 = ln(2) / lambda.
|
|
3
|
+
// Lambda is measured per hour and mirrors the Worker/D1 intelligence engine.
|
|
4
|
+
export const LAMBDA = {
|
|
5
|
+
hackernews: 0.050,
|
|
6
|
+
reddit: 0.010,
|
|
7
|
+
producthunt: 0.010,
|
|
8
|
+
jobs: 0.005,
|
|
9
|
+
finance: 0.001,
|
|
10
|
+
yc: 0.001,
|
|
11
|
+
packagetrends: 0.0005,
|
|
12
|
+
github: 0.0002,
|
|
13
|
+
reposearch: 0.0002,
|
|
14
|
+
google_scholar: 0.00005,
|
|
15
|
+
arxiv: 0.00005,
|
|
16
|
+
changelog: 0.0005,
|
|
17
|
+
gdelt: 0.020,
|
|
18
|
+
gebiz: 0.003,
|
|
19
|
+
govcontracts: 0.001,
|
|
20
|
+
sec_filings: 0.005,
|
|
21
|
+
landscape: 0.050,
|
|
22
|
+
gov_landscape: 0.001,
|
|
23
|
+
finance_landscape: 0.001,
|
|
24
|
+
company_landscape: 0.005,
|
|
25
|
+
idea_landscape: 0.050,
|
|
26
|
+
default: 0.001,
|
|
27
|
+
};
|
|
28
|
+
export const FUTURE_CLOCK_SKEW_TOLERANCE_MS = 5 * 60 * 1000;
|
|
29
|
+
export function isMeaningfullyFutureDate(content_date, retrieved_at) {
|
|
30
|
+
if (!content_date)
|
|
31
|
+
return false;
|
|
32
|
+
const published = new Date(content_date).getTime();
|
|
33
|
+
const retrieved = new Date(retrieved_at).getTime();
|
|
34
|
+
if (isNaN(published) || isNaN(retrieved))
|
|
35
|
+
return false;
|
|
36
|
+
return published - retrieved > FUTURE_CLOCK_SKEW_TOLERANCE_MS;
|
|
37
|
+
}
|
|
38
|
+
export function calculateFreshnessScore(content_date, retrieved_at, adapter) {
|
|
39
|
+
if (!content_date)
|
|
40
|
+
return null;
|
|
41
|
+
const published = new Date(content_date).getTime();
|
|
42
|
+
const retrieved = new Date(retrieved_at).getTime();
|
|
43
|
+
if (isNaN(published) || isNaN(retrieved))
|
|
44
|
+
return null;
|
|
45
|
+
if (published - retrieved > FUTURE_CLOCK_SKEW_TOLERANCE_MS)
|
|
46
|
+
return null;
|
|
47
|
+
const hoursSinceRetrieved = Math.max(0, (retrieved - published) / (1000 * 60 * 60));
|
|
48
|
+
const lambda = LAMBDA[adapter] ?? LAMBDA.default;
|
|
49
|
+
return Math.max(0, Math.round(100 * Math.exp(-lambda * hoursSinceRetrieved)));
|
|
50
|
+
}
|
|
51
|
+
export function scoreLabel(score) {
|
|
52
|
+
if (score === null)
|
|
53
|
+
return "unknown";
|
|
54
|
+
if (score >= 90)
|
|
55
|
+
return "current";
|
|
56
|
+
if (score >= 70)
|
|
57
|
+
return "reliable";
|
|
58
|
+
if (score >= 50)
|
|
59
|
+
return "verify before acting";
|
|
60
|
+
return "use with caution";
|
|
61
|
+
}
|