replication-radar 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replication_radar/__init__.py +10 -0
- replication_radar/data/verdicts.json +32 -0
- replication_radar/openaire.py +201 -0
- replication_radar/radar.py +157 -0
- replication_radar/server.py +60 -0
- replication_radar/verdicts.py +39 -0
- replication_radar-0.1.0.dist-info/METADATA +100 -0
- replication_radar-0.1.0.dist-info/RECORD +11 -0
- replication_radar-0.1.0.dist-info/WHEEL +4 -0
- replication_radar-0.1.0.dist-info/entry_points.txt +2 -0
- replication_radar-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Replication Radar — turn the OpenAIRE Graph into a ranked replication queue.
|
|
2
|
+
|
|
3
|
+
Adds a capability the Graph lacks: 'what high-impact work is worth replicating,
|
|
4
|
+
with INDEPENDENT reusable tooling, and has it already been checked?' — joining
|
|
5
|
+
OpenAIRE impact + Software Heritage reuse signals + Science Live nanopub verdicts.
|
|
6
|
+
"""
|
|
7
|
+
from .radar import radar, find_independent_software, replication_status
|
|
8
|
+
|
|
9
|
+
__all__ = ["radar", "find_independent_software", "replication_status"]
|
|
10
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_meta": {
|
|
3
|
+
"source": "Science Live FORRT replication chains (nanopub verdicts), built 2026-06-13",
|
|
4
|
+
"schema": "doi (lowercased) -> list of verifications {repo, verdict, cito[], outcome_np, cito_np}",
|
|
5
|
+
"note": "This is the 'already-checked' memory layer of the Replication Radar. Verdicts are carried in CiTO nanopubs; the OpenAIRE Graph cannot hold them. Extend by adding chains."
|
|
6
|
+
},
|
|
7
|
+
"verifications": {
|
|
8
|
+
"10.1126/science.aax8591": [
|
|
9
|
+
{"repo": "weatherxbiodiversity-projection", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RAPZMgcYbScSAXnrnSySQwZzgSA_rn-xodlMxNlwwQYY8", "cito_np": "https://w3id.org/sciencelive/np/RALbHA-r6wIFOFPFlfIpwYqJEpzCFqeJ082iChgdfvhNM"},
|
|
10
|
+
{"repo": "weatherxbiodiversity-projection-nside128", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RAa4QR41Hot9zxujcrCyTo82Ij7oaw_6z8zk8NxDqoJFM", "cito_np": "https://w3id.org/sciencelive/np/RAhw9m0BEj0-9hXrTtJ2NHG5rMr-ZBf_mdBQTQRk6u3n4"},
|
|
11
|
+
{"repo": "weatherxbiodiversity-substrate-sensitivity", "verdict": "Validated+PartiallySupported", "cito": ["confirms", "extends", "qualifies"], "outcome_np": null, "cito_np": null}
|
|
12
|
+
],
|
|
13
|
+
"10.1890/07-2153.1": [
|
|
14
|
+
{"repo": "sdm-phillips-reproduction", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RA_uV84IchQAkkmCP_6amQir_flgCmvvt97DWIDmbu_V0", "cito_np": "https://w3id.org/sciencelive/np/RAWsmCzWMKYQQK_ovRvE1o2wqjYkoxjfZRncHEcWAvv2g"},
|
|
15
|
+
{"repo": "sdm-hotspot-spatial-effort", "verdict": "methodological-extension", "cito": ["extends", "usesMethodIn"], "outcome_np": "https://w3id.org/sciencelive/np/RA4q2J-h_UpFpeLTeL_DS8p7j7EOBCes4L1G1eOBfJiDo", "cito_np": "https://w3id.org/sciencelive/np/RA7151bPt5TSSTxi-sWGmZhUOHcqaSzevzhhD4QxmfURI"}
|
|
16
|
+
],
|
|
17
|
+
"10.1890/11-1952.1": [
|
|
18
|
+
{"repo": "sdm-hotspot-effort-correction", "verdict": "Validated", "cito": ["extends", "usesMethodIn"], "outcome_np": "https://w3id.org/sciencelive/np/RAsPjEImfZaXsIri0ny4j_s_k_6wyOlC6tkocl6w2y7f4", "cito_np": "https://w3id.org/sciencelive/np/RACYbb_IxZNnBcxI7uPqc-df2oRaMr4bqHJTOJe-BNmkc"}
|
|
19
|
+
],
|
|
20
|
+
"10.1073/pnas.0704469104": [
|
|
21
|
+
{"repo": "sdm-scale-replication", "verdict": "PartiallySupported", "cito": ["qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAzeZKbUCEMXZXDc-WzgHZ4K5mOMwotYhS2uCKDDmdcHI", "cito_np": "https://w3id.org/sciencelive/np/RALjFcvPtncy74ZL8QgSiEyRZv_-mOiZj4wvWuq8JK-2s"}
|
|
22
|
+
],
|
|
23
|
+
"10.1016/j.ocemod.2024.102387": [
|
|
24
|
+
{"repo": "coastal-rom-replication", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RAG8PjhjvPQFZo54BTaV_b7TryMonH--aDGzEXLhzvQ4w", "cito_np": "https://w3id.org/sciencelive/np/RAuvGPQk_nxEcBWzADcLnyfqgjJ9Hr2aSWxwof2sDAung"},
|
|
25
|
+
{"repo": "european-coastal-biodiversity-replication", "verdict": "Validated", "cito": ["extends", "qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAwrIP5nKIk8Qh7WeMs8z6HHVGssLVAkHDeI8nSB4LUK8", "cito_np": "https://w3id.org/sciencelive/np/RAEXzZcCXiwsNf19NbXvilwwxnFU5IvkplzWTNiNmT70A"}
|
|
26
|
+
],
|
|
27
|
+
"10.1038/s41597-022-01235-3": [
|
|
28
|
+
{"repo": "white-shark-geolocation-replication", "verdict": "PartiallySupported", "cito": ["qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAv0cF0rwxb1CFmUAJlk5B7PLVc9pls5OvlfOHHHhtgm8", "cito_np": "https://w3id.org/sciencelive/np/RAnqtFUZHfmW7Dtmf3bcTQtjDAfrq5IGV4xQ8guW8L3vY"},
|
|
29
|
+
{"repo": "white-shark-geolocation-light", "verdict": "PartiallySupported", "cito": ["qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAlwDA35wFcmV-ZYzOUB_E3SrqPMIlIxWftoiPFbzN-7I", "cito_np": "https://w3id.org/sciencelive/np/RA6JMK2CNN8MZITLdd3si08TwyR_-3fvbt6l6g3lf-YOg"}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Thin client over the OpenAIRE Graph API (graph/v1).
|
|
2
|
+
|
|
3
|
+
Hits api.openaire.eu directly (anonymous, no token needed for these queries).
|
|
4
|
+
Endpoint-agnostic: set RADAR_OPENAIRE_BASE to point at the Alien gateway or any
|
|
5
|
+
mirror later. Stdlib-only (urllib) so the core runs with zero install.
|
|
6
|
+
|
|
7
|
+
Operational rules learned from the connector spike (2026-06-13):
|
|
8
|
+
- Free-text terms are AND-ed: keep queries SHORT (2-3 words), OR-expand if needed.
|
|
9
|
+
- Rank SOFTWARE by reuse signal (repo + Software Heritage + usage), NOT citations
|
|
10
|
+
(research software is almost uniformly citationClass C5 / 0 citations).
|
|
11
|
+
- Rank PAPERS by citation impact (BIP! classes C1..C5 + count).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import urllib.parse
|
|
18
|
+
import urllib.request
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
BASE = os.environ.get("RADAR_OPENAIRE_BASE", "https://api.openaire.eu/graph/v1")
|
|
23
|
+
_TIMEOUT = float(os.environ.get("RADAR_HTTP_TIMEOUT", "30"))
|
|
24
|
+
|
|
25
|
+
_CLASS_RANK = {"C1": 1, "C2": 2, "C3": 3, "C4": 4, "C5": 5, None: 9}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _get(path: str, params: dict[str, Any]) -> dict:
|
|
29
|
+
qs = urllib.parse.urlencode({k: v for k, v in params.items() if v is not None})
|
|
30
|
+
url = f"{BASE}/{path}?{qs}"
|
|
31
|
+
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
|
32
|
+
with urllib.request.urlopen(req, timeout=_TIMEOUT) as resp:
|
|
33
|
+
return json.load(resp)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _doi_of(rec: dict) -> str | None:
|
|
37
|
+
for p in (rec.get("pids") or []):
|
|
38
|
+
if (p.get("scheme") or "").lower() == "doi":
|
|
39
|
+
return (p.get("value") or "").lower() or None
|
|
40
|
+
for inst in (rec.get("instances") or []):
|
|
41
|
+
for p in (inst.get("pids") or []):
|
|
42
|
+
if (p.get("scheme") or "").lower() == "doi":
|
|
43
|
+
return (p.get("value") or "").lower() or None
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _surnames(rec: dict) -> list[str]:
|
|
48
|
+
out: list[str] = []
|
|
49
|
+
for a in (rec.get("authors") or []):
|
|
50
|
+
s = a.get("surname") or ""
|
|
51
|
+
if not s and a.get("fullName"):
|
|
52
|
+
# "Surname, Given" or "Given Surname" -> take the comma-lead or last token
|
|
53
|
+
fn = a["fullName"]
|
|
54
|
+
s = fn.split(",")[0].strip() if "," in fn else fn.split()[-1]
|
|
55
|
+
s = s.strip().lower()
|
|
56
|
+
if s:
|
|
57
|
+
out.append(s)
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _year(rec: dict) -> int | None:
|
|
62
|
+
d = rec.get("publicationDate") or ""
|
|
63
|
+
return int(d[:4]) if d[:4].isdigit() else None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _impact(rec: dict) -> dict:
|
|
67
|
+
return ((rec.get("indicators") or {}).get("citationImpact")) or {}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _urls(rec: dict) -> list[str]:
|
|
71
|
+
out: list[str] = []
|
|
72
|
+
for inst in (rec.get("instances") or []):
|
|
73
|
+
out.extend(inst.get("urls") or [])
|
|
74
|
+
if rec.get("codeRepositoryUrl"):
|
|
75
|
+
out.append(rec["codeRepositoryUrl"])
|
|
76
|
+
return out
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class Product:
|
|
81
|
+
doi: str | None
|
|
82
|
+
title: str
|
|
83
|
+
authors: list[str] # lowercased surnames
|
|
84
|
+
year: int | None
|
|
85
|
+
type: str
|
|
86
|
+
citation_count: int
|
|
87
|
+
citation_class: str | None
|
|
88
|
+
influence_class: str | None
|
|
89
|
+
popularity_class: str | None
|
|
90
|
+
impulse_class: str | None
|
|
91
|
+
code_repo: str | None
|
|
92
|
+
swh_archived: bool
|
|
93
|
+
downloads: int
|
|
94
|
+
raw_id: str | None = None
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def impact_rank(self) -> tuple[int, int, int]:
|
|
98
|
+
# primary: best influence class, then citation class, then -count
|
|
99
|
+
return (
|
|
100
|
+
_CLASS_RANK.get(self.influence_class, 9),
|
|
101
|
+
_CLASS_RANK.get(self.citation_class, 9),
|
|
102
|
+
-self.citation_count,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def reuse_score(self) -> int:
|
|
107
|
+
# for SOFTWARE: how reusable does this look?
|
|
108
|
+
s = 0
|
|
109
|
+
if self.code_repo:
|
|
110
|
+
s += 2
|
|
111
|
+
if self.swh_archived:
|
|
112
|
+
s += 2
|
|
113
|
+
if self.downloads > 0:
|
|
114
|
+
s += 1
|
|
115
|
+
if self.citation_count > 0:
|
|
116
|
+
s += 1
|
|
117
|
+
return s
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _to_product(rec: dict) -> Product:
|
|
121
|
+
imp = _impact(rec)
|
|
122
|
+
usage = ((rec.get("indicators") or {}).get("usageCounts")) or {}
|
|
123
|
+
urls = _urls(rec)
|
|
124
|
+
return Product(
|
|
125
|
+
doi=_doi_of(rec),
|
|
126
|
+
title=(rec.get("mainTitle") or "").strip(),
|
|
127
|
+
authors=_surnames(rec),
|
|
128
|
+
year=_year(rec),
|
|
129
|
+
type=(rec.get("type") or "").lower(),
|
|
130
|
+
citation_count=int(imp.get("citationCount") or 0),
|
|
131
|
+
citation_class=imp.get("citationClass"),
|
|
132
|
+
influence_class=imp.get("influenceClass"),
|
|
133
|
+
popularity_class=imp.get("popularityClass"),
|
|
134
|
+
impulse_class=imp.get("impulseClass"),
|
|
135
|
+
code_repo=rec.get("codeRepositoryUrl"),
|
|
136
|
+
swh_archived=any("softwareheritage.org" in (u or "") for u in urls),
|
|
137
|
+
downloads=int(usage.get("downloads") or 0),
|
|
138
|
+
raw_id=rec.get("id"),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def search_products(topic: str, type_: str, size: int = 25, page: int = 1) -> list[Product]:
|
|
143
|
+
"""type_ in {publication, software, dataset, other}. Keep `topic` short."""
|
|
144
|
+
data = _get(
|
|
145
|
+
"researchProducts",
|
|
146
|
+
{"search": topic, "type": type_, "pageSize": size, "page": page},
|
|
147
|
+
)
|
|
148
|
+
return [_to_product(r) for r in (data.get("results") or [])]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_by_doi(doi: str) -> Product | None:
|
|
152
|
+
"""Resolve a single product by DOI via the dedup id (md5 of the lowercased DOI)."""
|
|
153
|
+
import hashlib
|
|
154
|
+
|
|
155
|
+
h = hashlib.md5(doi.lower().encode()).hexdigest()
|
|
156
|
+
try:
|
|
157
|
+
data = _get("researchProducts", {"id": f"doi_dedup___::{h}", "pageSize": 1})
|
|
158
|
+
except Exception:
|
|
159
|
+
data = {}
|
|
160
|
+
results = data.get("results") or []
|
|
161
|
+
if results:
|
|
162
|
+
return _to_product(results[0])
|
|
163
|
+
# fallback: the DOI may be deduped with a preprint -> search by DOI string
|
|
164
|
+
try:
|
|
165
|
+
data = _get("researchProducts", {"search": doi, "pageSize": 5})
|
|
166
|
+
except Exception:
|
|
167
|
+
return None
|
|
168
|
+
for r in data.get("results") or []:
|
|
169
|
+
if _doi_of(r) == doi.lower():
|
|
170
|
+
return _to_product(r)
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class Funder:
|
|
176
|
+
name: str
|
|
177
|
+
jurisdiction: str | None
|
|
178
|
+
funded_amount: float = 0.0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@dataclass
|
|
182
|
+
class ProjectLandscape:
|
|
183
|
+
total: int
|
|
184
|
+
funders: list[Funder] = field(default_factory=list)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def funder_landscape(topic: str, size: int = 20) -> ProjectLandscape:
|
|
188
|
+
"""Aggregate funder context for a field via /projects (per-paper funding is NOT
|
|
189
|
+
reachable on this connector; this is topic-level CoARA context only)."""
|
|
190
|
+
data = _get("projects", {"search": topic, "pageSize": size})
|
|
191
|
+
total = (data.get("header") or {}).get("numFound") or len(data.get("results") or [])
|
|
192
|
+
agg: dict[str, Funder] = {}
|
|
193
|
+
for proj in data.get("results") or []:
|
|
194
|
+
for f in proj.get("fundings") or []:
|
|
195
|
+
name = f.get("name") or f.get("shortName") or "?"
|
|
196
|
+
amt = float(((proj.get("granted") or {}).get("fundedAmount")) or 0)
|
|
197
|
+
if name not in agg:
|
|
198
|
+
agg[name] = Funder(name=name, jurisdiction=f.get("jurisdiction"))
|
|
199
|
+
agg[name].funded_amount += amt
|
|
200
|
+
funders = sorted(agg.values(), key=lambda x: -x.funded_amount)
|
|
201
|
+
return ProjectLandscape(total=int(total), funders=funders)
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Core Replication Radar logic — pure functions over the OpenAIRE client + verdicts.
|
|
2
|
+
|
|
3
|
+
Three capabilities (exposed as MCP tools in server.py):
|
|
4
|
+
- radar(topic) : impact-ranked replication targets in a field,
|
|
5
|
+
each flagged open vs already-verified, with a
|
|
6
|
+
field-level funder-context panel.
|
|
7
|
+
- find_independent_software(doi): reusable engines NOT authored by the original team
|
|
8
|
+
(the reproduction-vs-replication distinction, made
|
|
9
|
+
computable as author-disjointness).
|
|
10
|
+
- replication_status(doi) : Science Live verdict overlay for one DOI.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from . import openaire, verdicts
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _independence(target_authors: list[str], cand_authors: list[str]) -> bool:
|
|
18
|
+
"""A candidate tool is INDEPENDENT of the target paper if no author surname is
|
|
19
|
+
shared. This is what makes a *replication* (different toolchain) rather than a
|
|
20
|
+
*reproduction* (the original team's code)."""
|
|
21
|
+
if not cand_authors:
|
|
22
|
+
return True # unattributed engine (e.g. a package repo) — treat as independent
|
|
23
|
+
return not (set(target_authors) & set(cand_authors))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def replication_status(doi: str) -> dict:
|
|
27
|
+
st = verdicts.status_for(doi)
|
|
28
|
+
return {"doi": doi.lower(), **st}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def find_independent_software(
|
|
32
|
+
doi: str | None = None,
|
|
33
|
+
topic: str | None = None,
|
|
34
|
+
original_authors: list[str] | None = None,
|
|
35
|
+
limit: int = 8,
|
|
36
|
+
) -> dict:
|
|
37
|
+
"""Find reusable, INDEPENDENT method software for replicating a claim.
|
|
38
|
+
|
|
39
|
+
Provide a DOI (authors are looked up) or pass original_authors directly, plus a
|
|
40
|
+
short `topic` to search the software pool. Ranks by reuse signal, not citations.
|
|
41
|
+
"""
|
|
42
|
+
paper = None
|
|
43
|
+
if original_authors is None and doi:
|
|
44
|
+
paper = openaire.get_by_doi(doi)
|
|
45
|
+
original_authors = paper.authors if paper else []
|
|
46
|
+
original_authors = original_authors or []
|
|
47
|
+
if not topic:
|
|
48
|
+
# derive a short topic from the title's leading words
|
|
49
|
+
topic = " ".join((paper.title if paper else "").split()[:3]) or "software"
|
|
50
|
+
|
|
51
|
+
pool = openaire.search_products(topic, "software", size=25)
|
|
52
|
+
rows = []
|
|
53
|
+
for p in pool:
|
|
54
|
+
rows.append(
|
|
55
|
+
{
|
|
56
|
+
"title": p.title,
|
|
57
|
+
"doi": p.doi,
|
|
58
|
+
"authors": p.authors,
|
|
59
|
+
"independent": _independence(original_authors, p.authors),
|
|
60
|
+
"reuse_score": p.reuse_score,
|
|
61
|
+
"code_repo": p.code_repo,
|
|
62
|
+
"swh_archived": p.swh_archived,
|
|
63
|
+
"downloads": p.downloads,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
# independent first, then most-reusable
|
|
67
|
+
rows.sort(key=lambda r: (not r["independent"], -r["reuse_score"]))
|
|
68
|
+
return {
|
|
69
|
+
"query_topic": topic,
|
|
70
|
+
"original_authors": original_authors,
|
|
71
|
+
"independent_count": sum(1 for r in rows if r["independent"]),
|
|
72
|
+
"software": rows[:limit],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def radar(topic: str, limit: int = 8, pool: int = 30) -> dict:
|
|
77
|
+
"""Impact-ranked replication targets in a field.
|
|
78
|
+
|
|
79
|
+
Each target is flagged open vs already-verified (Science Live overlay) and, for
|
|
80
|
+
open high-impact targets, whether independent tooling exists in the field.
|
|
81
|
+
"""
|
|
82
|
+
papers = openaire.search_products(topic, "publication", size=pool)
|
|
83
|
+
papers.sort(key=lambda p: p.impact_rank)
|
|
84
|
+
|
|
85
|
+
# one software pull for the field; independence is computed per target
|
|
86
|
+
sw_pool = openaire.search_products(topic, "software", size=25)
|
|
87
|
+
|
|
88
|
+
targets = []
|
|
89
|
+
for p in papers[:limit]:
|
|
90
|
+
st = verdicts.status_for(p.doi)
|
|
91
|
+
indep_tools = [
|
|
92
|
+
s for s in sw_pool if _independence(p.authors, s.authors) and s.reuse_score >= 2
|
|
93
|
+
]
|
|
94
|
+
indep_tools.sort(key=lambda s: -s.reuse_score)
|
|
95
|
+
targets.append(
|
|
96
|
+
{
|
|
97
|
+
"title": p.title,
|
|
98
|
+
"doi": p.doi,
|
|
99
|
+
"year": p.year,
|
|
100
|
+
"citations": p.citation_count,
|
|
101
|
+
"impact": {
|
|
102
|
+
"citationClass": p.citation_class,
|
|
103
|
+
"influenceClass": p.influence_class,
|
|
104
|
+
"popularityClass": p.popularity_class,
|
|
105
|
+
},
|
|
106
|
+
"status": "VERIFIED" if st["replicated"] else "OPEN",
|
|
107
|
+
"verification": st["summary"],
|
|
108
|
+
"verifications": st["verifications"],
|
|
109
|
+
"independent_tooling": [
|
|
110
|
+
{"title": s.title, "code_repo": s.code_repo, "swh": s.swh_archived}
|
|
111
|
+
for s in indep_tools[:3]
|
|
112
|
+
],
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Guarantee the verified-overlay shows: don't rely on keyword retrieval to
|
|
117
|
+
# surface already-checked papers. Pull the verdict index directly and include
|
|
118
|
+
# any whose title is topically relevant (shares a significant token).
|
|
119
|
+
shown = {t["doi"] for t in targets}
|
|
120
|
+
topic_terms = {w.lower() for w in topic.split() if len(w) > 3}
|
|
121
|
+
verified_in_field = []
|
|
122
|
+
for vdoi in sorted(verdicts.all_dois()):
|
|
123
|
+
if vdoi in shown:
|
|
124
|
+
continue
|
|
125
|
+
p = openaire.get_by_doi(vdoi)
|
|
126
|
+
if not p:
|
|
127
|
+
continue
|
|
128
|
+
title_terms = {w.lower().strip(",.:") for w in p.title.split()}
|
|
129
|
+
if topic_terms & title_terms:
|
|
130
|
+
st = verdicts.status_for(vdoi)
|
|
131
|
+
verified_in_field.append(
|
|
132
|
+
{
|
|
133
|
+
"title": p.title,
|
|
134
|
+
"doi": vdoi,
|
|
135
|
+
"citations": p.citation_count,
|
|
136
|
+
"impact": {"citationClass": p.citation_class, "influenceClass": p.influence_class},
|
|
137
|
+
"status": "VERIFIED",
|
|
138
|
+
"verification": st["summary"],
|
|
139
|
+
"verifications": st["verifications"],
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
land = openaire.funder_landscape(topic, size=20)
|
|
144
|
+
return {
|
|
145
|
+
"topic": topic,
|
|
146
|
+
"targets": targets,
|
|
147
|
+
"verified_in_field": verified_in_field,
|
|
148
|
+
"open_count": sum(1 for t in targets if t["status"] == "OPEN"),
|
|
149
|
+
"verified_count": sum(1 for t in targets if t["status"] == "VERIFIED") + len(verified_in_field),
|
|
150
|
+
"funder_context": {
|
|
151
|
+
"projects_in_field": land.total,
|
|
152
|
+
"top_funders": [
|
|
153
|
+
{"name": f.name, "jurisdiction": f.jurisdiction, "funded_eur": round(f.funded_amount)}
|
|
154
|
+
for f in land.funders[:5]
|
|
155
|
+
],
|
|
156
|
+
},
|
|
157
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""MCP server exposing the Replication Radar over the OpenAIRE Graph.
|
|
2
|
+
|
|
3
|
+
Run: python -m replication_radar.server (stdio transport)
|
|
4
|
+
Add to an MCP client (.mcp.json):
|
|
5
|
+
{ "mcpServers": { "replication-radar": {
|
|
6
|
+
"command": "python", "args": ["-m", "replication_radar.server"] } } }
|
|
7
|
+
|
|
8
|
+
Hits api.openaire.eu/graph/v1 directly (anonymous). Point elsewhere with
|
|
9
|
+
RADAR_OPENAIRE_BASE (e.g. the Alien AI-Gateway endpoint).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from mcp.server.fastmcp import FastMCP
|
|
14
|
+
|
|
15
|
+
# import from the submodule directly (the package exports `radar` as a *function*,
|
|
16
|
+
# which would shadow the module on `from . import radar`).
|
|
17
|
+
from .radar import (
|
|
18
|
+
radar as _radar,
|
|
19
|
+
find_independent_software as _find_software,
|
|
20
|
+
replication_status as _replication_status,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
mcp = FastMCP("replication-radar")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@mcp.tool()
|
|
27
|
+
def radar(topic: str, limit: int = 8) -> dict:
|
|
28
|
+
"""Impact-ranked replication targets in a research field.
|
|
29
|
+
|
|
30
|
+
Returns high-impact OpenAIRE papers worth replicating, each flagged OPEN
|
|
31
|
+
(opportunity) or VERIFIED (already checked by a Science Live replication, with
|
|
32
|
+
the verdict), plus independent reusable tooling and a field funder-context panel.
|
|
33
|
+
Keep `topic` short (2-3 words); OpenAIRE free-text terms are AND-ed.
|
|
34
|
+
"""
|
|
35
|
+
return _radar(topic, limit=limit)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@mcp.tool()
|
|
39
|
+
def find_independent_software(doi: str = "", topic: str = "", limit: int = 8) -> dict:
|
|
40
|
+
"""Reusable method software for *replicating* a claim — engines NOT authored by
|
|
41
|
+
the original paper's team (author-disjoint), ranked by reuse signal (code repo +
|
|
42
|
+
Software Heritage archival + usage), not citations. Pass the original paper's DOI
|
|
43
|
+
(authors are looked up) and a short topic."""
|
|
44
|
+
return _find_software(doi=doi or None, topic=topic or None, limit=limit)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@mcp.tool()
|
|
48
|
+
def replication_status(doi: str) -> dict:
|
|
49
|
+
"""Has this DOI been independently replicated, and did it hold? Returns the
|
|
50
|
+
Science Live verdict(s) (the reliability signal the OpenAIRE Graph cannot hold)
|
|
51
|
+
with links to the CiTO nanopubs, or 'open' if not yet replicated."""
|
|
52
|
+
return _replication_status(doi)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main() -> None:
|
|
56
|
+
mcp.run()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
main()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""The 'already-checked' memory layer: DOI -> Science Live replication verdicts.
|
|
2
|
+
|
|
3
|
+
This is the signal the OpenAIRE Graph structurally cannot hold (citation-popularity
|
|
4
|
+
is orthogonal to whether a claim held). Verdicts live in CiTO nanopubs; this index
|
|
5
|
+
is the bundled crosswalk. Extend data/verdicts.json as new chains are published.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
from importlib import resources
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@lru_cache(maxsize=1)
|
|
15
|
+
def _index() -> dict[str, list[dict]]:
|
|
16
|
+
with resources.files(__package__).joinpath("data/verdicts.json").open() as fh:
|
|
17
|
+
return (json.load(fh).get("verifications")) or {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def status_for(doi: str | None) -> dict:
|
|
21
|
+
"""Return the replication status for a DOI.
|
|
22
|
+
|
|
23
|
+
{"replicated": bool, "verifications": [...], "summary": str}
|
|
24
|
+
"""
|
|
25
|
+
if not doi:
|
|
26
|
+
return {"replicated": False, "verifications": [], "summary": "open"}
|
|
27
|
+
hits = _index().get(doi.lower(), [])
|
|
28
|
+
if not hits:
|
|
29
|
+
return {"replicated": False, "verifications": [], "summary": "open"}
|
|
30
|
+
verdicts = sorted({v["verdict"] for v in hits})
|
|
31
|
+
return {
|
|
32
|
+
"replicated": True,
|
|
33
|
+
"verifications": hits,
|
|
34
|
+
"summary": f"{len(hits)} verification(s): {', '.join(verdicts)}",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def all_dois() -> set[str]:
|
|
39
|
+
return set(_index().keys())
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: replication-radar
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server that turns the OpenAIRE Graph into a ranked replication queue — impact-ranked targets, independent reusable tooling, and the Science Live verification overlay.
|
|
5
|
+
Project-URL: Homepage, https://github.com/ScienceLiveHub/replication-radar
|
|
6
|
+
Project-URL: Repository, https://github.com/ScienceLiveHub/replication-radar
|
|
7
|
+
Project-URL: Science Live, https://sciencelive4all.org
|
|
8
|
+
Author: Anne Fouilloux
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: FORRT,mcp,nanopublication,open-science,openaire,replication
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Requires-Dist: mcp>=1.2.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# Replication Radar
|
|
25
|
+
|
|
26
|
+
An **MCP server that makes the OpenAIRE Graph more useful for replication.** Point it
|
|
27
|
+
at a research field or a paper and it answers the question the Graph structurally
|
|
28
|
+
cannot: *what high-impact work is worth replicating, is there **independent** reusable
|
|
29
|
+
tooling to do it, and has it already been checked — with what verdict?*
|
|
30
|
+
|
|
31
|
+
OpenAIRE's only value signal is citation-popularity (BIP! influence / popularity /
|
|
32
|
+
impulse, classes C1–C5) — paper-bound, and orthogonal to whether a claim is *true*.
|
|
33
|
+
The Radar joins three sources to add a **replication layer** on top:
|
|
34
|
+
|
|
35
|
+
- **OpenAIRE Graph** — impact-ranks candidate papers (`api.openaire.eu/graph/v1`).
|
|
36
|
+
- **Software Heritage + repo signals** — surfaces *reusable* method software.
|
|
37
|
+
- **Science Live nanopub verdicts** — the "already checked → did it hold" overlay.
|
|
38
|
+
|
|
39
|
+
> OpenAIRE AI Hackathon · Theme B (Build) · CC-BY. Built to be reused through the
|
|
40
|
+
> [forrt-replication-template](https://github.com/ScienceLiveHub/forrt-replication-template):
|
|
41
|
+
> discovery at the *start* of a replication, where the template's existing skills
|
|
42
|
+
> handle the nanopub chain at the *end*.
|
|
43
|
+
|
|
44
|
+
## Tools
|
|
45
|
+
|
|
46
|
+
| Tool | What it answers |
|
|
47
|
+
|---|---|
|
|
48
|
+
| `radar(topic)` | Impact-ranked replication targets in a field — each **OPEN** (opportunity) or **VERIFIED** (done, with verdict) + independent tooling + funder context |
|
|
49
|
+
| `find_independent_software(doi, topic)` | Reusable engines **not authored by the original team** (author-disjoint = *replication*, not *reproduction*), ranked by reuse signal not citations |
|
|
50
|
+
| `replication_status(doi)` | Has this DOI been replicated, did it hold? Verdict(s) + CiTO nanopub links, or `open` |
|
|
51
|
+
|
|
52
|
+
### The reproduction-vs-replication distinction, made computable
|
|
53
|
+
A *reproduction* re-runs the original code; a *replication* tests the same claim by a
|
|
54
|
+
**different** route. So the Radar filters tooling by **author-disjointness** from the
|
|
55
|
+
original paper — e.g. for Phillips et al. 2009, the `dismo` package (co-authored by
|
|
56
|
+
Phillips & Elith) is flagged *rooted* / non-independent, while `biomod2` and `jSDM`
|
|
57
|
+
are *independent*. That filter is the difference between the two, and it's the thing
|
|
58
|
+
that makes this replication-aware rather than just "find the code".
|
|
59
|
+
|
|
60
|
+
## Run
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install -e . # installs the `mcp` runtime
|
|
64
|
+
python -m replication_radar.server # stdio MCP server
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Add to an MCP client (`.mcp.json`):
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{ "mcpServers": {
|
|
71
|
+
"replication-radar": { "command": "python", "args": ["-m", "replication_radar.server"] }
|
|
72
|
+
} }
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The **core** (OpenAIRE client + radar logic) is stdlib-only — try it without the MCP
|
|
76
|
+
runtime:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
PYTHONPATH=src python3 demo_sdm.py # live vertical-slice demo on SDM
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Configuration
|
|
83
|
+
|
|
84
|
+
| Env var | Default | Purpose |
|
|
85
|
+
|---|---|---|
|
|
86
|
+
| `RADAR_OPENAIRE_BASE` | `https://api.openaire.eu/graph/v1` | Swap to the Alien AI-Gateway or a mirror — the Radar is endpoint-agnostic |
|
|
87
|
+
| `RADAR_HTTP_TIMEOUT` | `30` | Per-request timeout (s) |
|
|
88
|
+
|
|
89
|
+
## Known limits (v1, honest)
|
|
90
|
+
- **Keyword-bound discovery.** OpenAIRE free-text terms are AND-ed; long queries
|
|
91
|
+
return nothing. Use short topics. The VERIFIED overlay is *guaranteed* (resolved
|
|
92
|
+
from the verdict index directly), but OPEN-target recall depends on the query.
|
|
93
|
+
- **No graph-relation traversal** on the public API (paper→its software/data/grant
|
|
94
|
+
edges aren't exposed): tooling/data are matched heuristically by topic + author
|
|
95
|
+
independence, not by a hard relation. Upgrades cleanly if a gateway exposes relations.
|
|
96
|
+
- **Funder context is field-level, not per-paper** (per-paper funder attribution is
|
|
97
|
+
not reachable); budgets are frequently reported as 0 in records.
|
|
98
|
+
- The verdict index ships 6 source works / 12 chains (Science Live). Extend
|
|
99
|
+
`data/verdicts.json` to grow coverage.
|
|
100
|
+
```
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
replication_radar/__init__.py,sha256=7Ys8zOWMBapBHzgnNzT_bd7aX4zDB_wk0lzfAFQmfZc,497
|
|
2
|
+
replication_radar/openaire.py,sha256=pp87dgW0XZjxxPOy0xvL5xLhGw8Udr2v5IPy7GVYxXY,6886
|
|
3
|
+
replication_radar/radar.py,sha256=GCOF6HUquiO6exLQGt_x0gvkPnJ4dtXNm5Avlejrp-U,6361
|
|
4
|
+
replication_radar/server.py,sha256=2wLX--f1LS4yo1pt32En86FYz5MsJxcls9VXtRLTF3o,2201
|
|
5
|
+
replication_radar/verdicts.py,sha256=OjT4ESIGYCA3nVWSYp-IObK4cZTo_bOXbFqH6y29zpU,1309
|
|
6
|
+
replication_radar/data/verdicts.json,sha256=OQR1tVqAQmRK2_xBofeHZufFKjcN344LKx1Vp-fFsvw,3761
|
|
7
|
+
replication_radar-0.1.0.dist-info/METADATA,sha256=crP-b27ljqVOFWOP-oOUfBZylUqz22KOSFsOvsPzPKU,4834
|
|
8
|
+
replication_radar-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
|
|
9
|
+
replication_radar-0.1.0.dist-info/entry_points.txt,sha256=y7KOZtmp05sKfs0e_JzAoJR08fB0JR5RjbCbhB5ATFA,68
|
|
10
|
+
replication_radar-0.1.0.dist-info/licenses/LICENSE,sha256=97cSYKrE2bUYfMTtW1hpOptYWTZTXsWCWud5Qc4Tszc,1086
|
|
11
|
+
replication_radar-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Anne Fouilloux / Science Live
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|