replication-radar 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- replication_radar-0.1.0/.github/workflows/publish-pypi.yml +51 -0
- replication_radar-0.1.0/.gitignore +6 -0
- replication_radar-0.1.0/LICENSE +21 -0
- replication_radar-0.1.0/PKG-INFO +100 -0
- replication_radar-0.1.0/README.md +77 -0
- replication_radar-0.1.0/STORY.md +99 -0
- replication_radar-0.1.0/demo_sdm.py +49 -0
- replication_radar-0.1.0/pyproject.toml +41 -0
- replication_radar-0.1.0/src/replication_radar/__init__.py +10 -0
- replication_radar-0.1.0/src/replication_radar/data/verdicts.json +32 -0
- replication_radar-0.1.0/src/replication_radar/openaire.py +201 -0
- replication_radar-0.1.0/src/replication_radar/radar.py +157 -0
- replication_radar-0.1.0/src/replication_radar/server.py +60 -0
- replication_radar-0.1.0/src/replication_radar/verdicts.py +39 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
# Publishes replication-radar to PyPI on each GitHub Release, using PyPI
|
|
4
|
+
# Trusted Publishing (OIDC) — no API token stored in the repo.
|
|
5
|
+
#
|
|
6
|
+
# One-time setup on PyPI (pypi.org -> your account -> Publishing -> Add a pending
|
|
7
|
+
# publisher), BEFORE the first release:
|
|
8
|
+
# PyPI project name : replication-radar
|
|
9
|
+
# Owner : ScienceLiveHub
|
|
10
|
+
# Repository name : replication-radar
|
|
11
|
+
# Workflow name : publish-pypi.yml
|
|
12
|
+
# Environment name : pypi
|
|
13
|
+
|
|
14
|
+
on:
|
|
15
|
+
release:
|
|
16
|
+
types: [published]
|
|
17
|
+
workflow_dispatch:
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
build:
|
|
21
|
+
runs-on: ubuntu-latest
|
|
22
|
+
steps:
|
|
23
|
+
- uses: actions/checkout@v4
|
|
24
|
+
- uses: actions/setup-python@v5
|
|
25
|
+
with:
|
|
26
|
+
python-version: "3.12"
|
|
27
|
+
- name: Build sdist + wheel
|
|
28
|
+
run: |
|
|
29
|
+
python -m pip install --upgrade build
|
|
30
|
+
python -m build
|
|
31
|
+
- name: Smoke-check the wheel imports + bundled data resolves
|
|
32
|
+
run: |
|
|
33
|
+
python -m pip install dist/*.whl
|
|
34
|
+
python -c "import replication_radar as r; assert r.replication_status('10.1126/science.aax8591')['replicated']; print('ok', r.__version__)"
|
|
35
|
+
- uses: actions/upload-artifact@v4
|
|
36
|
+
with:
|
|
37
|
+
name: dist
|
|
38
|
+
path: dist/
|
|
39
|
+
|
|
40
|
+
publish:
|
|
41
|
+
needs: build
|
|
42
|
+
runs-on: ubuntu-latest
|
|
43
|
+
environment: pypi
|
|
44
|
+
permissions:
|
|
45
|
+
id-token: write # required for trusted publishing
|
|
46
|
+
steps:
|
|
47
|
+
- uses: actions/download-artifact@v4
|
|
48
|
+
with:
|
|
49
|
+
name: dist
|
|
50
|
+
path: dist/
|
|
51
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Anne Fouilloux / Science Live
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: replication-radar
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: MCP server that turns the OpenAIRE Graph into a ranked replication queue — impact-ranked targets, independent reusable tooling, and the Science Live verification overlay.
|
|
5
|
+
Project-URL: Homepage, https://github.com/ScienceLiveHub/replication-radar
|
|
6
|
+
Project-URL: Repository, https://github.com/ScienceLiveHub/replication-radar
|
|
7
|
+
Project-URL: Science Live, https://sciencelive4all.org
|
|
8
|
+
Author: Anne Fouilloux
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: FORRT,mcp,nanopublication,open-science,openaire,replication
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Requires-Dist: mcp>=1.2.0
|
|
20
|
+
Provides-Extra: dev
|
|
21
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
|
|
24
|
+
# Replication Radar
|
|
25
|
+
|
|
26
|
+
An **MCP server that makes the OpenAIRE Graph more useful for replication.** Point it
|
|
27
|
+
at a research field or a paper and it answers the question the Graph structurally
|
|
28
|
+
cannot: *what high-impact work is worth replicating, is there **independent** reusable
|
|
29
|
+
tooling to do it, and has it already been checked — with what verdict?*
|
|
30
|
+
|
|
31
|
+
OpenAIRE's only value signal is citation-popularity (BIP! influence / popularity /
|
|
32
|
+
impulse, classes C1–C5) — paper-bound, and orthogonal to whether a claim is *true*.
|
|
33
|
+
The Radar joins three sources to add a **replication layer** on top:
|
|
34
|
+
|
|
35
|
+
- **OpenAIRE Graph** — impact-ranks candidate papers (`api.openaire.eu/graph/v1`).
|
|
36
|
+
- **Software Heritage + repo signals** — surfaces *reusable* method software.
|
|
37
|
+
- **Science Live nanopub verdicts** — the "already checked → did it hold" overlay.
|
|
38
|
+
|
|
39
|
+
> OpenAIRE AI Hackathon · Theme B (Build) · CC-BY. Built to be reused through the
|
|
40
|
+
> [forrt-replication-template](https://github.com/ScienceLiveHub/forrt-replication-template):
|
|
41
|
+
> discovery at the *start* of a replication, where the template's existing skills
|
|
42
|
+
> handle the nanopub chain at the *end*.
|
|
43
|
+
|
|
44
|
+
## Tools
|
|
45
|
+
|
|
46
|
+
| Tool | What it answers |
|
|
47
|
+
|---|---|
|
|
48
|
+
| `radar(topic)` | Impact-ranked replication targets in a field — each **OPEN** (opportunity) or **VERIFIED** (done, with verdict) + independent tooling + funder context |
|
|
49
|
+
| `find_independent_software(doi, topic)` | Reusable engines **not authored by the original team** (author-disjoint = *replication*, not *reproduction*), ranked by reuse signal not citations |
|
|
50
|
+
| `replication_status(doi)` | Has this DOI been replicated, did it hold? Verdict(s) + CiTO nanopub links, or `open` |
|
|
51
|
+
|
|
52
|
+
### The reproduction-vs-replication distinction, made computable
|
|
53
|
+
A *reproduction* re-runs the original code; a *replication* tests the same claim by a
|
|
54
|
+
**different** route. So the Radar filters tooling by **author-disjointness** from the
|
|
55
|
+
original paper — e.g. for Phillips et al. 2009, the `dismo` package (co-authored by
|
|
56
|
+
Phillips & Elith) is flagged *rooted* / non-independent, while `biomod2` and `jSDM`
|
|
57
|
+
are *independent*. That filter is the difference between the two, and it's the thing
|
|
58
|
+
that makes this replication-aware rather than just "find the code".
|
|
59
|
+
|
|
60
|
+
## Run
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
pip install -e . # installs the `mcp` runtime
|
|
64
|
+
python -m replication_radar.server # stdio MCP server
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Add to an MCP client (`.mcp.json`):
|
|
68
|
+
|
|
69
|
+
```json
|
|
70
|
+
{ "mcpServers": {
|
|
71
|
+
"replication-radar": { "command": "python", "args": ["-m", "replication_radar.server"] }
|
|
72
|
+
} }
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
The **core** (OpenAIRE client + radar logic) is stdlib-only — try it without the MCP
|
|
76
|
+
runtime:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
PYTHONPATH=src python3 demo_sdm.py # live vertical-slice demo on SDM
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Configuration
|
|
83
|
+
|
|
84
|
+
| Env var | Default | Purpose |
|
|
85
|
+
|---|---|---|
|
|
86
|
+
| `RADAR_OPENAIRE_BASE` | `https://api.openaire.eu/graph/v1` | Swap to the Alien AI-Gateway or a mirror — the Radar is endpoint-agnostic |
|
|
87
|
+
| `RADAR_HTTP_TIMEOUT` | `30` | Per-request timeout (s) |
|
|
88
|
+
|
|
89
|
+
## Known limits (v1, honest)
|
|
90
|
+
- **Keyword-bound discovery.** OpenAIRE free-text terms are AND-ed; long queries
|
|
91
|
+
return nothing. Use short topics. The VERIFIED overlay is *guaranteed* (resolved
|
|
92
|
+
from the verdict index directly), but OPEN-target recall depends on the query.
|
|
93
|
+
- **No graph-relation traversal** on the public API (paper→its software/data/grant
|
|
94
|
+
edges aren't exposed): tooling/data are matched heuristically by topic + author
|
|
95
|
+
independence, not by a hard relation. Upgrades cleanly if a gateway exposes relations.
|
|
96
|
+
- **Funder context is field-level, not per-paper** (per-paper funder attribution is
|
|
97
|
+
not reachable); budgets are frequently reported as 0 in records.
|
|
98
|
+
- The verdict index ships 6 source works / 12 chains (Science Live). Extend
|
|
99
|
+
`data/verdicts.json` to grow coverage.
|
|
100
|
+
```
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# Replication Radar
|
|
2
|
+
|
|
3
|
+
An **MCP server that makes the OpenAIRE Graph more useful for replication.** Point it
|
|
4
|
+
at a research field or a paper and it answers the question the Graph structurally
|
|
5
|
+
cannot: *what high-impact work is worth replicating, is there **independent** reusable
|
|
6
|
+
tooling to do it, and has it already been checked — with what verdict?*
|
|
7
|
+
|
|
8
|
+
OpenAIRE's only value signal is citation-popularity (BIP! influence / popularity /
|
|
9
|
+
impulse, classes C1–C5) — paper-bound, and orthogonal to whether a claim is *true*.
|
|
10
|
+
The Radar joins three sources to add a **replication layer** on top:
|
|
11
|
+
|
|
12
|
+
- **OpenAIRE Graph** — impact-ranks candidate papers (`api.openaire.eu/graph/v1`).
|
|
13
|
+
- **Software Heritage + repo signals** — surfaces *reusable* method software.
|
|
14
|
+
- **Science Live nanopub verdicts** — the "already checked → did it hold" overlay.
|
|
15
|
+
|
|
16
|
+
> OpenAIRE AI Hackathon · Theme B (Build) · CC-BY. Built to be reused through the
|
|
17
|
+
> [forrt-replication-template](https://github.com/ScienceLiveHub/forrt-replication-template):
|
|
18
|
+
> discovery at the *start* of a replication, where the template's existing skills
|
|
19
|
+
> handle the nanopub chain at the *end*.
|
|
20
|
+
|
|
21
|
+
## Tools
|
|
22
|
+
|
|
23
|
+
| Tool | What it answers |
|
|
24
|
+
|---|---|
|
|
25
|
+
| `radar(topic)` | Impact-ranked replication targets in a field — each **OPEN** (opportunity) or **VERIFIED** (done, with verdict) + independent tooling + funder context |
|
|
26
|
+
| `find_independent_software(doi, topic)` | Reusable engines **not authored by the original team** (author-disjoint = *replication*, not *reproduction*), ranked by reuse signal not citations |
|
|
27
|
+
| `replication_status(doi)` | Has this DOI been replicated, did it hold? Verdict(s) + CiTO nanopub links, or `open` |
|
|
28
|
+
|
|
29
|
+
### The reproduction-vs-replication distinction, made computable
|
|
30
|
+
A *reproduction* re-runs the original code; a *replication* tests the same claim by a
|
|
31
|
+
**different** route. So the Radar filters tooling by **author-disjointness** from the
|
|
32
|
+
original paper — e.g. for Phillips et al. 2009, the `dismo` package (co-authored by
|
|
33
|
+
Phillips & Elith) is flagged *rooted* / non-independent, while `biomod2` and `jSDM`
|
|
34
|
+
are *independent*. That filter is the difference between the two, and it's the thing
|
|
35
|
+
that makes this replication-aware rather than just "find the code".
|
|
36
|
+
|
|
37
|
+
## Run
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install -e . # installs the `mcp` runtime
|
|
41
|
+
python -m replication_radar.server # stdio MCP server
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Add to an MCP client (`.mcp.json`):
|
|
45
|
+
|
|
46
|
+
```json
|
|
47
|
+
{ "mcpServers": {
|
|
48
|
+
"replication-radar": { "command": "python", "args": ["-m", "replication_radar.server"] }
|
|
49
|
+
} }
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
The **core** (OpenAIRE client + radar logic) is stdlib-only — try it without the MCP
|
|
53
|
+
runtime:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
PYTHONPATH=src python3 demo_sdm.py # live vertical-slice demo on SDM
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Configuration
|
|
60
|
+
|
|
61
|
+
| Env var | Default | Purpose |
|
|
62
|
+
|---|---|---|
|
|
63
|
+
| `RADAR_OPENAIRE_BASE` | `https://api.openaire.eu/graph/v1` | Swap to the Alien AI-Gateway or a mirror — the Radar is endpoint-agnostic |
|
|
64
|
+
| `RADAR_HTTP_TIMEOUT` | `30` | Per-request timeout (s) |
|
|
65
|
+
|
|
66
|
+
## Known limits (v1, honest)
|
|
67
|
+
- **Keyword-bound discovery.** OpenAIRE free-text terms are AND-ed; long queries
|
|
68
|
+
return nothing. Use short topics. The VERIFIED overlay is *guaranteed* (resolved
|
|
69
|
+
from the verdict index directly), but OPEN-target recall depends on the query.
|
|
70
|
+
- **No graph-relation traversal** on the public API (paper→its software/data/grant
|
|
71
|
+
edges aren't exposed): tooling/data are matched heuristically by topic + author
|
|
72
|
+
independence, not by a hard relation. Upgrades cleanly if a gateway exposes relations.
|
|
73
|
+
- **Funder context is field-level, not per-paper** (per-paper funder attribution is
|
|
74
|
+
not reachable); budgets are frequently reported as 0 in records.
|
|
75
|
+
- The verdict index ships 6 source works / 12 chains (Science Live). Extend
|
|
76
|
+
`data/verdicts.json` to grow coverage.
|
|
77
|
+
```
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
# Replication Radar — making the OpenAIRE Graph tell you what to replicate
|
|
2
|
+
|
|
3
|
+
*OpenAIRE AI Hackathon · Theme B (Build) · a Science Live contribution*
|
|
4
|
+
|
|
5
|
+
## The question
|
|
6
|
+
|
|
7
|
+
The OpenAIRE Graph knows how *visible* a paper is — citation influence, popularity,
|
|
8
|
+
impulse, the BIP! classes C1–C5. It does not, and structurally cannot, know whether a
|
|
9
|
+
paper's claim is *true*. Citation-popularity is a measure of attention, and attention
|
|
10
|
+
is orthogonal to reliability: a heavily-cited result looks identical, in the Graph, to
|
|
11
|
+
one nobody ever checked.
|
|
12
|
+
|
|
13
|
+
So we asked a build question, not a metrics question: **can we add the one signal the
|
|
14
|
+
Graph can't hold — has this claim been independently replicated, and did it hold — and
|
|
15
|
+
in doing so turn the Graph from a record of what *was* done into a tool that tells you
|
|
16
|
+
what's worth replicating *next*?**
|
|
17
|
+
|
|
18
|
+
## The journey
|
|
19
|
+
|
|
20
|
+
We started narrow: Science Live already has 30+ FORRT replication chains, each ending
|
|
21
|
+
in a cryptographically-signed nanopub that records a verdict (Validated, Partially
|
|
22
|
+
Supported, …). The first idea was a lookup — "is this DOI verified?" — joined to the
|
|
23
|
+
Graph by DOI. It worked, but it was **sparse**: it lights up only on the dozen papers
|
|
24
|
+
we happen to have replicated, and answers "no data" everywhere else. A tool that's
|
|
25
|
+
empty 99.99% of the time isn't a tool.
|
|
26
|
+
|
|
27
|
+
The turn came from a distinction that matters in replication science but is usually
|
|
28
|
+
left implicit. A **reproduction** re-runs the original code; a **replication** tests
|
|
29
|
+
the same claim by a *different* route. The right question for a research engineer isn't
|
|
30
|
+
"where's the paper's code" — it's "is there *independent* tooling I could use to check
|
|
31
|
+
this?" That reframing pointed at the whole Graph, not our twelve chains: the Graph
|
|
32
|
+
already records which results are high-impact, which have **reusable open software**,
|
|
33
|
+
and which have **open data** — it just never joins them into a "this is worth, and
|
|
34
|
+
feasible, to replicate" signal.
|
|
35
|
+
|
|
36
|
+
We spiked feasibility against the live OpenAIRE Graph API before building, and reported
|
|
37
|
+
honestly what works and what doesn't:
|
|
38
|
+
|
|
39
|
+
- **Impact ranking** — works (`citationImpact`, C1–C5, on papers and software).
|
|
40
|
+
- **Independent tooling** — works, and the reproduction-vs-replication line turns out
|
|
41
|
+
to be *computable*: for Phillips et al. 2009 (2,441 citations, C1), the `dismo`
|
|
42
|
+
package is flagged *non-independent* because Phillips and Elith co-author it, while
|
|
43
|
+
`biomod2` (Software-Heritage-archived) and `jSDM` are independent. Author-disjointness
|
|
44
|
+
is the filter that makes the tool replication-aware.
|
|
45
|
+
- **Reuse ranking** — research software is almost uniformly C5/0 citations, so we rank
|
|
46
|
+
*software* by reuse signal (code repo + Software Heritage archival + usage), not by
|
|
47
|
+
citations.
|
|
48
|
+
- **Reference data** — abundant (1,228 occurrence datasets for one query).
|
|
49
|
+
- **Funder context** — only *field-level* (per-paper funder attribution and graph-edge
|
|
50
|
+
traversal aren't exposed on the public API). We kept what works and dropped what
|
|
51
|
+
doesn't, rather than fake it.
|
|
52
|
+
|
|
53
|
+
Then we built it, proved it live on Species Distribution Models, and wired it into the
|
|
54
|
+
public `forrt-replication-template` so every new replication repo gets discovery built in.
|
|
55
|
+
|
|
56
|
+
## The insight
|
|
57
|
+
|
|
58
|
+
Three things we didn't expect going in:
|
|
59
|
+
|
|
60
|
+
1. **Reliability is a different *category* of signal, not a better metric.** The Graph's
|
|
61
|
+
citation axis can't be repaired into a truth axis; verification has to be *added*,
|
|
62
|
+
and it applies to any claim — including paperless ones the Graph never indexes.
|
|
63
|
+
2. **The reproduction/replication distinction is operational.** "Independent of the
|
|
64
|
+
original authors" is a concrete, computable filter — and it's the thing that turns
|
|
65
|
+
"find the code" into "find a way to *check* this."
|
|
66
|
+
3. **The Graph already holds the ingredients of a replication-readiness signal** (impact
|
|
67
|
+
+ reusable software + data) and simply doesn't join them. Joining them is most of the
|
|
68
|
+
value.
|
|
69
|
+
|
|
70
|
+
## What others can reuse
|
|
71
|
+
|
|
72
|
+
- **`replication-radar`** — an MCP server (`pip install` / `uvx`, MIT) exposing three
|
|
73
|
+
tools any agent can add next to the OpenAIRE MCP: `radar(topic)` (impact-ranked
|
|
74
|
+
replication targets, each OPEN or already-VERIFIED, with independent tooling + funder
|
|
75
|
+
context), `find_independent_software(doi)` (author-disjoint reusable engines), and
|
|
76
|
+
`replication_status(doi)` (the verdict overlay). It hits the public OpenAIRE Graph API
|
|
77
|
+
anonymously and is endpoint-agnostic (`RADAR_OPENAIRE_BASE`) so it points at the Alien
|
|
78
|
+
gateway or any mirror. Repo: https://github.com/ScienceLiveHub/replication-radar
|
|
79
|
+
- **The verdict-index format** (`data/verdicts.json`) — a portable DOI→verdict crosswalk
|
|
80
|
+
others can extend with their own replications.
|
|
81
|
+
- **The template integration** — a `/radar` discovery skill that drops into any
|
|
82
|
+
fork of `forrt-replication-template`.
|
|
83
|
+
- **The connector feasibility map** — a documented account of what the OpenAIRE MCP /
|
|
84
|
+
Graph API can and can't do for replication tooling, so the next builder doesn't
|
|
85
|
+
re-discover it.
|
|
86
|
+
|
|
87
|
+
## Honest limits
|
|
88
|
+
|
|
89
|
+
Discovery recall is keyword-bound (OpenAIRE free-text terms are AND-ed); the verified
|
|
90
|
+
overlay is authoritative but reflects a known index, not every replication that exists;
|
|
91
|
+
tooling is matched by topic + author-independence, not a proven ability to test a
|
|
92
|
+
specific claim — the tool surfaces and ranks, the researcher judges. None of these are
|
|
93
|
+
hidden in the output.
|
|
94
|
+
|
|
95
|
+
---
|
|
96
|
+
|
|
97
|
+
*Materials in this repository are dual-licensed: **source code under MIT**, and this
|
|
98
|
+
write-up together with the verdict index (`STORY.md`, `data/verdicts.json`) under
|
|
99
|
+
**[CC-BY 4.0](https://creativecommons.org/licenses/by/4.0/)**.*
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Live vertical-slice demo: the Replication Radar on Species Distribution Models.
|
|
2
|
+
|
|
3
|
+
Runs against the real OpenAIRE Graph API (anonymous). Proves the full pipeline:
|
|
4
|
+
impact-ranked targets + independent tooling + reference data + verified-overlay.
|
|
5
|
+
|
|
6
|
+
PYTHONPATH=src python3 demo_sdm.py
|
|
7
|
+
"""
|
|
8
|
+
import sys, os
|
|
9
|
+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))
|
|
10
|
+
|
|
11
|
+
from replication_radar import radar, find_independent_software, replication_status # noqa: E402
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def hr(t):
|
|
15
|
+
print("\n" + "=" * 72 + f"\n {t}\n" + "=" * 72)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
hr("radar('species distribution') — replication targets in the field")
|
|
19
|
+
r = radar("species distribution", limit=8)
|
|
20
|
+
print(f"topic: {r['topic']} OPEN: {r['open_count']} VERIFIED: {r['verified_count']}\n")
|
|
21
|
+
for i, t in enumerate(r["targets"], 1):
|
|
22
|
+
imp = t["impact"]
|
|
23
|
+
print(f"{i:>2}. [{t['status']:8}] {(t['title'] or '')[:58]:58} {t['citations']:>5} cites "
|
|
24
|
+
f"cit:{imp['citationClass']} inf:{imp['influenceClass']}")
|
|
25
|
+
print(f" doi: {t['doi']}")
|
|
26
|
+
if t["status"] == "VERIFIED":
|
|
27
|
+
print(f" -> {t['verification']}")
|
|
28
|
+
elif t["independent_tooling"]:
|
|
29
|
+
tl = t["independent_tooling"][0]
|
|
30
|
+
print(f" independent tooling avail: {(tl['title'] or '')[:46]} swh={tl['swh']}")
|
|
31
|
+
|
|
32
|
+
print("\nfunder context (field-level):")
|
|
33
|
+
fc = r["funder_context"]
|
|
34
|
+
print(f" projects in field: {fc['projects_in_field']}")
|
|
35
|
+
for f in fc["top_funders"]:
|
|
36
|
+
print(f" - {f['name'][:42]:42} {f['jurisdiction'] or '--':4} EUR {f['funded_eur']:,}")
|
|
37
|
+
|
|
38
|
+
hr("find_independent_software(Phillips 2009 = 10.1890/07-2153.1)")
|
|
39
|
+
fs = find_independent_software(doi="10.1890/07-2153.1", topic="species distribution")
|
|
40
|
+
print(f"original authors: {fs['original_authors']}")
|
|
41
|
+
print(f"independent tools found: {fs['independent_count']} / {len(fs['software'])}\n")
|
|
42
|
+
for s in fs["software"][:6]:
|
|
43
|
+
flag = "INDEP" if s["independent"] else "rooted"
|
|
44
|
+
print(f" [{flag}] reuse={s['reuse_score']} {(s['title'] or '')[:50]:50} authors={s['authors'][:2]}")
|
|
45
|
+
|
|
46
|
+
hr("replication_status() — the verified-overlay the Graph cannot hold")
|
|
47
|
+
for doi in ["10.1890/07-2153.1", "10.1073/pnas.0704469104", "10.1126/science.aax8591", "10.9999/not.replicated"]:
|
|
48
|
+
st = replication_status(doi)
|
|
49
|
+
print(f" {doi:28} -> {st['summary']}")
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "replication-radar"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "MCP server that turns the OpenAIRE Graph into a ranked replication queue — impact-ranked targets, independent reusable tooling, and the Science Live verification overlay."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
authors = [{ name = "Anne Fouilloux" }]
|
|
9
|
+
keywords = ["openaire", "mcp", "replication", "open-science", "nanopublication", "FORRT"]
|
|
10
|
+
classifiers = [
|
|
11
|
+
"Development Status :: 4 - Beta",
|
|
12
|
+
"Intended Audience :: Science/Research",
|
|
13
|
+
"License :: OSI Approved :: MIT License",
|
|
14
|
+
"Programming Language :: Python :: 3",
|
|
15
|
+
"Topic :: Scientific/Engineering",
|
|
16
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
17
|
+
]
|
|
18
|
+
dependencies = [
|
|
19
|
+
"mcp>=1.2.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[project.urls]
|
|
23
|
+
Homepage = "https://github.com/ScienceLiveHub/replication-radar"
|
|
24
|
+
Repository = "https://github.com/ScienceLiveHub/replication-radar"
|
|
25
|
+
"Science Live" = "https://sciencelive4all.org"
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
replication-radar = "replication_radar.server:main"
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
# the core (openaire client + radar logic) is stdlib-only and needs nothing extra;
|
|
32
|
+
# `mcp` is only required to run the server wrapper.
|
|
33
|
+
dev = ["pytest>=8"]
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/replication_radar"]
|
|
41
|
+
# data/verdicts.json ships automatically as package data under the package dir.
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Replication Radar — turn the OpenAIRE Graph into a ranked replication queue.
|
|
2
|
+
|
|
3
|
+
Adds a capability the Graph lacks: 'what high-impact work is worth replicating,
|
|
4
|
+
with INDEPENDENT reusable tooling, and has it already been checked?' — joining
|
|
5
|
+
OpenAIRE impact + Software Heritage reuse signals + Science Live nanopub verdicts.
|
|
6
|
+
"""
|
|
7
|
+
from .radar import radar, find_independent_software, replication_status
|
|
8
|
+
|
|
9
|
+
__all__ = ["radar", "find_independent_software", "replication_status"]
|
|
10
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"_meta": {
|
|
3
|
+
"source": "Science Live FORRT replication chains (nanopub verdicts), built 2026-06-13",
|
|
4
|
+
"schema": "doi (lowercased) -> list of verifications {repo, verdict, cito[], outcome_np, cito_np}",
|
|
5
|
+
"note": "This is the 'already-checked' memory layer of the Replication Radar. Verdicts are carried in CiTO nanopubs; the OpenAIRE Graph cannot hold them. Extend by adding chains."
|
|
6
|
+
},
|
|
7
|
+
"verifications": {
|
|
8
|
+
"10.1126/science.aax8591": [
|
|
9
|
+
{"repo": "weatherxbiodiversity-projection", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RAPZMgcYbScSAXnrnSySQwZzgSA_rn-xodlMxNlwwQYY8", "cito_np": "https://w3id.org/sciencelive/np/RALbHA-r6wIFOFPFlfIpwYqJEpzCFqeJ082iChgdfvhNM"},
|
|
10
|
+
{"repo": "weatherxbiodiversity-projection-nside128", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RAa4QR41Hot9zxujcrCyTo82Ij7oaw_6z8zk8NxDqoJFM", "cito_np": "https://w3id.org/sciencelive/np/RAhw9m0BEj0-9hXrTtJ2NHG5rMr-ZBf_mdBQTQRk6u3n4"},
|
|
11
|
+
{"repo": "weatherxbiodiversity-substrate-sensitivity", "verdict": "Validated+PartiallySupported", "cito": ["confirms", "extends", "qualifies"], "outcome_np": null, "cito_np": null}
|
|
12
|
+
],
|
|
13
|
+
"10.1890/07-2153.1": [
|
|
14
|
+
{"repo": "sdm-phillips-reproduction", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RA_uV84IchQAkkmCP_6amQir_flgCmvvt97DWIDmbu_V0", "cito_np": "https://w3id.org/sciencelive/np/RAWsmCzWMKYQQK_ovRvE1o2wqjYkoxjfZRncHEcWAvv2g"},
|
|
15
|
+
{"repo": "sdm-hotspot-spatial-effort", "verdict": "methodological-extension", "cito": ["extends", "usesMethodIn"], "outcome_np": "https://w3id.org/sciencelive/np/RA4q2J-h_UpFpeLTeL_DS8p7j7EOBCes4L1G1eOBfJiDo", "cito_np": "https://w3id.org/sciencelive/np/RA7151bPt5TSSTxi-sWGmZhUOHcqaSzevzhhD4QxmfURI"}
|
|
16
|
+
],
|
|
17
|
+
"10.1890/11-1952.1": [
|
|
18
|
+
{"repo": "sdm-hotspot-effort-correction", "verdict": "Validated", "cito": ["extends", "usesMethodIn"], "outcome_np": "https://w3id.org/sciencelive/np/RAsPjEImfZaXsIri0ny4j_s_k_6wyOlC6tkocl6w2y7f4", "cito_np": "https://w3id.org/sciencelive/np/RACYbb_IxZNnBcxI7uPqc-df2oRaMr4bqHJTOJe-BNmkc"}
|
|
19
|
+
],
|
|
20
|
+
"10.1073/pnas.0704469104": [
|
|
21
|
+
{"repo": "sdm-scale-replication", "verdict": "PartiallySupported", "cito": ["qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAzeZKbUCEMXZXDc-WzgHZ4K5mOMwotYhS2uCKDDmdcHI", "cito_np": "https://w3id.org/sciencelive/np/RALjFcvPtncy74ZL8QgSiEyRZv_-mOiZj4wvWuq8JK-2s"}
|
|
22
|
+
],
|
|
23
|
+
"10.1016/j.ocemod.2024.102387": [
|
|
24
|
+
{"repo": "coastal-rom-replication", "verdict": "Validated", "cito": ["confirms"], "outcome_np": "https://w3id.org/sciencelive/np/RAG8PjhjvPQFZo54BTaV_b7TryMonH--aDGzEXLhzvQ4w", "cito_np": "https://w3id.org/sciencelive/np/RAuvGPQk_nxEcBWzADcLnyfqgjJ9Hr2aSWxwof2sDAung"},
|
|
25
|
+
{"repo": "european-coastal-biodiversity-replication", "verdict": "Validated", "cito": ["extends", "qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAwrIP5nKIk8Qh7WeMs8z6HHVGssLVAkHDeI8nSB4LUK8", "cito_np": "https://w3id.org/sciencelive/np/RAEXzZcCXiwsNf19NbXvilwwxnFU5IvkplzWTNiNmT70A"}
|
|
26
|
+
],
|
|
27
|
+
"10.1038/s41597-022-01235-3": [
|
|
28
|
+
{"repo": "white-shark-geolocation-replication", "verdict": "PartiallySupported", "cito": ["qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAv0cF0rwxb1CFmUAJlk5B7PLVc9pls5OvlfOHHHhtgm8", "cito_np": "https://w3id.org/sciencelive/np/RAnqtFUZHfmW7Dtmf3bcTQtjDAfrq5IGV4xQ8guW8L3vY"},
|
|
29
|
+
{"repo": "white-shark-geolocation-light", "verdict": "PartiallySupported", "cito": ["qualifies"], "outcome_np": "https://w3id.org/sciencelive/np/RAlwDA35wFcmV-ZYzOUB_E3SrqPMIlIxWftoiPFbzN-7I", "cito_np": "https://w3id.org/sciencelive/np/RA6JMK2CNN8MZITLdd3si08TwyR_-3fvbt6l6g3lf-YOg"}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Thin client over the OpenAIRE Graph API (graph/v1).
|
|
2
|
+
|
|
3
|
+
Hits api.openaire.eu directly (anonymous, no token needed for these queries).
|
|
4
|
+
Endpoint-agnostic: set RADAR_OPENAIRE_BASE to point at the Alien gateway or any
|
|
5
|
+
mirror later. Stdlib-only (urllib) so the core runs with zero install.
|
|
6
|
+
|
|
7
|
+
Operational rules learned from the connector spike (2026-06-13):
|
|
8
|
+
- Free-text terms are AND-ed: keep queries SHORT (2-3 words), OR-expand if needed.
|
|
9
|
+
- Rank SOFTWARE by reuse signal (repo + Software Heritage + usage), NOT citations
|
|
10
|
+
(research software is almost uniformly citationClass C5 / 0 citations).
|
|
11
|
+
- Rank PAPERS by citation impact (BIP! classes C1..C5 + count).
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import urllib.parse
|
|
18
|
+
import urllib.request
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
BASE = os.environ.get("RADAR_OPENAIRE_BASE", "https://api.openaire.eu/graph/v1")
|
|
23
|
+
_TIMEOUT = float(os.environ.get("RADAR_HTTP_TIMEOUT", "30"))
|
|
24
|
+
|
|
25
|
+
_CLASS_RANK = {"C1": 1, "C2": 2, "C3": 3, "C4": 4, "C5": 5, None: 9}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _get(path: str, params: dict[str, Any]) -> dict:
|
|
29
|
+
qs = urllib.parse.urlencode({k: v for k, v in params.items() if v is not None})
|
|
30
|
+
url = f"{BASE}/{path}?{qs}"
|
|
31
|
+
req = urllib.request.Request(url, headers={"Accept": "application/json"})
|
|
32
|
+
with urllib.request.urlopen(req, timeout=_TIMEOUT) as resp:
|
|
33
|
+
return json.load(resp)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _doi_of(rec: dict) -> str | None:
|
|
37
|
+
for p in (rec.get("pids") or []):
|
|
38
|
+
if (p.get("scheme") or "").lower() == "doi":
|
|
39
|
+
return (p.get("value") or "").lower() or None
|
|
40
|
+
for inst in (rec.get("instances") or []):
|
|
41
|
+
for p in (inst.get("pids") or []):
|
|
42
|
+
if (p.get("scheme") or "").lower() == "doi":
|
|
43
|
+
return (p.get("value") or "").lower() or None
|
|
44
|
+
return None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _surnames(rec: dict) -> list[str]:
|
|
48
|
+
out: list[str] = []
|
|
49
|
+
for a in (rec.get("authors") or []):
|
|
50
|
+
s = a.get("surname") or ""
|
|
51
|
+
if not s and a.get("fullName"):
|
|
52
|
+
# "Surname, Given" or "Given Surname" -> take the comma-lead or last token
|
|
53
|
+
fn = a["fullName"]
|
|
54
|
+
s = fn.split(",")[0].strip() if "," in fn else fn.split()[-1]
|
|
55
|
+
s = s.strip().lower()
|
|
56
|
+
if s:
|
|
57
|
+
out.append(s)
|
|
58
|
+
return out
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _year(rec: dict) -> int | None:
|
|
62
|
+
d = rec.get("publicationDate") or ""
|
|
63
|
+
return int(d[:4]) if d[:4].isdigit() else None
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _impact(rec: dict) -> dict:
|
|
67
|
+
return ((rec.get("indicators") or {}).get("citationImpact")) or {}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _urls(rec: dict) -> list[str]:
|
|
71
|
+
out: list[str] = []
|
|
72
|
+
for inst in (rec.get("instances") or []):
|
|
73
|
+
out.extend(inst.get("urls") or [])
|
|
74
|
+
if rec.get("codeRepositoryUrl"):
|
|
75
|
+
out.append(rec["codeRepositoryUrl"])
|
|
76
|
+
return out
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class Product:
|
|
81
|
+
doi: str | None
|
|
82
|
+
title: str
|
|
83
|
+
authors: list[str] # lowercased surnames
|
|
84
|
+
year: int | None
|
|
85
|
+
type: str
|
|
86
|
+
citation_count: int
|
|
87
|
+
citation_class: str | None
|
|
88
|
+
influence_class: str | None
|
|
89
|
+
popularity_class: str | None
|
|
90
|
+
impulse_class: str | None
|
|
91
|
+
code_repo: str | None
|
|
92
|
+
swh_archived: bool
|
|
93
|
+
downloads: int
|
|
94
|
+
raw_id: str | None = None
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def impact_rank(self) -> tuple[int, int, int]:
|
|
98
|
+
# primary: best influence class, then citation class, then -count
|
|
99
|
+
return (
|
|
100
|
+
_CLASS_RANK.get(self.influence_class, 9),
|
|
101
|
+
_CLASS_RANK.get(self.citation_class, 9),
|
|
102
|
+
-self.citation_count,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def reuse_score(self) -> int:
|
|
107
|
+
# for SOFTWARE: how reusable does this look?
|
|
108
|
+
s = 0
|
|
109
|
+
if self.code_repo:
|
|
110
|
+
s += 2
|
|
111
|
+
if self.swh_archived:
|
|
112
|
+
s += 2
|
|
113
|
+
if self.downloads > 0:
|
|
114
|
+
s += 1
|
|
115
|
+
if self.citation_count > 0:
|
|
116
|
+
s += 1
|
|
117
|
+
return s
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _to_product(rec: dict) -> Product:
|
|
121
|
+
imp = _impact(rec)
|
|
122
|
+
usage = ((rec.get("indicators") or {}).get("usageCounts")) or {}
|
|
123
|
+
urls = _urls(rec)
|
|
124
|
+
return Product(
|
|
125
|
+
doi=_doi_of(rec),
|
|
126
|
+
title=(rec.get("mainTitle") or "").strip(),
|
|
127
|
+
authors=_surnames(rec),
|
|
128
|
+
year=_year(rec),
|
|
129
|
+
type=(rec.get("type") or "").lower(),
|
|
130
|
+
citation_count=int(imp.get("citationCount") or 0),
|
|
131
|
+
citation_class=imp.get("citationClass"),
|
|
132
|
+
influence_class=imp.get("influenceClass"),
|
|
133
|
+
popularity_class=imp.get("popularityClass"),
|
|
134
|
+
impulse_class=imp.get("impulseClass"),
|
|
135
|
+
code_repo=rec.get("codeRepositoryUrl"),
|
|
136
|
+
swh_archived=any("softwareheritage.org" in (u or "") for u in urls),
|
|
137
|
+
downloads=int(usage.get("downloads") or 0),
|
|
138
|
+
raw_id=rec.get("id"),
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def search_products(topic: str, type_: str, size: int = 25, page: int = 1) -> list[Product]:
|
|
143
|
+
"""type_ in {publication, software, dataset, other}. Keep `topic` short."""
|
|
144
|
+
data = _get(
|
|
145
|
+
"researchProducts",
|
|
146
|
+
{"search": topic, "type": type_, "pageSize": size, "page": page},
|
|
147
|
+
)
|
|
148
|
+
return [_to_product(r) for r in (data.get("results") or [])]
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def get_by_doi(doi: str) -> Product | None:
|
|
152
|
+
"""Resolve a single product by DOI via the dedup id (md5 of the lowercased DOI)."""
|
|
153
|
+
import hashlib
|
|
154
|
+
|
|
155
|
+
h = hashlib.md5(doi.lower().encode()).hexdigest()
|
|
156
|
+
try:
|
|
157
|
+
data = _get("researchProducts", {"id": f"doi_dedup___::{h}", "pageSize": 1})
|
|
158
|
+
except Exception:
|
|
159
|
+
data = {}
|
|
160
|
+
results = data.get("results") or []
|
|
161
|
+
if results:
|
|
162
|
+
return _to_product(results[0])
|
|
163
|
+
# fallback: the DOI may be deduped with a preprint -> search by DOI string
|
|
164
|
+
try:
|
|
165
|
+
data = _get("researchProducts", {"search": doi, "pageSize": 5})
|
|
166
|
+
except Exception:
|
|
167
|
+
return None
|
|
168
|
+
for r in data.get("results") or []:
|
|
169
|
+
if _doi_of(r) == doi.lower():
|
|
170
|
+
return _to_product(r)
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
@dataclass
|
|
175
|
+
class Funder:
|
|
176
|
+
name: str
|
|
177
|
+
jurisdiction: str | None
|
|
178
|
+
funded_amount: float = 0.0
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
@dataclass
|
|
182
|
+
class ProjectLandscape:
|
|
183
|
+
total: int
|
|
184
|
+
funders: list[Funder] = field(default_factory=list)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def funder_landscape(topic: str, size: int = 20) -> ProjectLandscape:
|
|
188
|
+
"""Aggregate funder context for a field via /projects (per-paper funding is NOT
|
|
189
|
+
reachable on this connector; this is topic-level CoARA context only)."""
|
|
190
|
+
data = _get("projects", {"search": topic, "pageSize": size})
|
|
191
|
+
total = (data.get("header") or {}).get("numFound") or len(data.get("results") or [])
|
|
192
|
+
agg: dict[str, Funder] = {}
|
|
193
|
+
for proj in data.get("results") or []:
|
|
194
|
+
for f in proj.get("fundings") or []:
|
|
195
|
+
name = f.get("name") or f.get("shortName") or "?"
|
|
196
|
+
amt = float(((proj.get("granted") or {}).get("fundedAmount")) or 0)
|
|
197
|
+
if name not in agg:
|
|
198
|
+
agg[name] = Funder(name=name, jurisdiction=f.get("jurisdiction"))
|
|
199
|
+
agg[name].funded_amount += amt
|
|
200
|
+
funders = sorted(agg.values(), key=lambda x: -x.funded_amount)
|
|
201
|
+
return ProjectLandscape(total=int(total), funders=funders)
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""Core Replication Radar logic — pure functions over the OpenAIRE client + verdicts.
|
|
2
|
+
|
|
3
|
+
Three capabilities (exposed as MCP tools in server.py):
|
|
4
|
+
- radar(topic) : impact-ranked replication targets in a field,
|
|
5
|
+
each flagged open vs already-verified, with a
|
|
6
|
+
field-level funder-context panel.
|
|
7
|
+
- find_independent_software(doi): reusable engines NOT authored by the original team
|
|
8
|
+
(the reproduction-vs-replication distinction, made
|
|
9
|
+
computable as author-disjointness).
|
|
10
|
+
- replication_status(doi) : Science Live verdict overlay for one DOI.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from . import openaire, verdicts
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _independence(target_authors: list[str], cand_authors: list[str]) -> bool:
|
|
18
|
+
"""A candidate tool is INDEPENDENT of the target paper if no author surname is
|
|
19
|
+
shared. This is what makes a *replication* (different toolchain) rather than a
|
|
20
|
+
*reproduction* (the original team's code)."""
|
|
21
|
+
if not cand_authors:
|
|
22
|
+
return True # unattributed engine (e.g. a package repo) — treat as independent
|
|
23
|
+
return not (set(target_authors) & set(cand_authors))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def replication_status(doi: str) -> dict:
|
|
27
|
+
st = verdicts.status_for(doi)
|
|
28
|
+
return {"doi": doi.lower(), **st}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def find_independent_software(
|
|
32
|
+
doi: str | None = None,
|
|
33
|
+
topic: str | None = None,
|
|
34
|
+
original_authors: list[str] | None = None,
|
|
35
|
+
limit: int = 8,
|
|
36
|
+
) -> dict:
|
|
37
|
+
"""Find reusable, INDEPENDENT method software for replicating a claim.
|
|
38
|
+
|
|
39
|
+
Provide a DOI (authors are looked up) or pass original_authors directly, plus a
|
|
40
|
+
short `topic` to search the software pool. Ranks by reuse signal, not citations.
|
|
41
|
+
"""
|
|
42
|
+
paper = None
|
|
43
|
+
if original_authors is None and doi:
|
|
44
|
+
paper = openaire.get_by_doi(doi)
|
|
45
|
+
original_authors = paper.authors if paper else []
|
|
46
|
+
original_authors = original_authors or []
|
|
47
|
+
if not topic:
|
|
48
|
+
# derive a short topic from the title's leading words
|
|
49
|
+
topic = " ".join((paper.title if paper else "").split()[:3]) or "software"
|
|
50
|
+
|
|
51
|
+
pool = openaire.search_products(topic, "software", size=25)
|
|
52
|
+
rows = []
|
|
53
|
+
for p in pool:
|
|
54
|
+
rows.append(
|
|
55
|
+
{
|
|
56
|
+
"title": p.title,
|
|
57
|
+
"doi": p.doi,
|
|
58
|
+
"authors": p.authors,
|
|
59
|
+
"independent": _independence(original_authors, p.authors),
|
|
60
|
+
"reuse_score": p.reuse_score,
|
|
61
|
+
"code_repo": p.code_repo,
|
|
62
|
+
"swh_archived": p.swh_archived,
|
|
63
|
+
"downloads": p.downloads,
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
# independent first, then most-reusable
|
|
67
|
+
rows.sort(key=lambda r: (not r["independent"], -r["reuse_score"]))
|
|
68
|
+
return {
|
|
69
|
+
"query_topic": topic,
|
|
70
|
+
"original_authors": original_authors,
|
|
71
|
+
"independent_count": sum(1 for r in rows if r["independent"]),
|
|
72
|
+
"software": rows[:limit],
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def radar(topic: str, limit: int = 8, pool: int = 30) -> dict:
|
|
77
|
+
"""Impact-ranked replication targets in a field.
|
|
78
|
+
|
|
79
|
+
Each target is flagged open vs already-verified (Science Live overlay) and, for
|
|
80
|
+
open high-impact targets, whether independent tooling exists in the field.
|
|
81
|
+
"""
|
|
82
|
+
papers = openaire.search_products(topic, "publication", size=pool)
|
|
83
|
+
papers.sort(key=lambda p: p.impact_rank)
|
|
84
|
+
|
|
85
|
+
# one software pull for the field; independence is computed per target
|
|
86
|
+
sw_pool = openaire.search_products(topic, "software", size=25)
|
|
87
|
+
|
|
88
|
+
targets = []
|
|
89
|
+
for p in papers[:limit]:
|
|
90
|
+
st = verdicts.status_for(p.doi)
|
|
91
|
+
indep_tools = [
|
|
92
|
+
s for s in sw_pool if _independence(p.authors, s.authors) and s.reuse_score >= 2
|
|
93
|
+
]
|
|
94
|
+
indep_tools.sort(key=lambda s: -s.reuse_score)
|
|
95
|
+
targets.append(
|
|
96
|
+
{
|
|
97
|
+
"title": p.title,
|
|
98
|
+
"doi": p.doi,
|
|
99
|
+
"year": p.year,
|
|
100
|
+
"citations": p.citation_count,
|
|
101
|
+
"impact": {
|
|
102
|
+
"citationClass": p.citation_class,
|
|
103
|
+
"influenceClass": p.influence_class,
|
|
104
|
+
"popularityClass": p.popularity_class,
|
|
105
|
+
},
|
|
106
|
+
"status": "VERIFIED" if st["replicated"] else "OPEN",
|
|
107
|
+
"verification": st["summary"],
|
|
108
|
+
"verifications": st["verifications"],
|
|
109
|
+
"independent_tooling": [
|
|
110
|
+
{"title": s.title, "code_repo": s.code_repo, "swh": s.swh_archived}
|
|
111
|
+
for s in indep_tools[:3]
|
|
112
|
+
],
|
|
113
|
+
}
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Guarantee the verified-overlay shows: don't rely on keyword retrieval to
|
|
117
|
+
# surface already-checked papers. Pull the verdict index directly and include
|
|
118
|
+
# any whose title is topically relevant (shares a significant token).
|
|
119
|
+
shown = {t["doi"] for t in targets}
|
|
120
|
+
topic_terms = {w.lower() for w in topic.split() if len(w) > 3}
|
|
121
|
+
verified_in_field = []
|
|
122
|
+
for vdoi in sorted(verdicts.all_dois()):
|
|
123
|
+
if vdoi in shown:
|
|
124
|
+
continue
|
|
125
|
+
p = openaire.get_by_doi(vdoi)
|
|
126
|
+
if not p:
|
|
127
|
+
continue
|
|
128
|
+
title_terms = {w.lower().strip(",.:") for w in p.title.split()}
|
|
129
|
+
if topic_terms & title_terms:
|
|
130
|
+
st = verdicts.status_for(vdoi)
|
|
131
|
+
verified_in_field.append(
|
|
132
|
+
{
|
|
133
|
+
"title": p.title,
|
|
134
|
+
"doi": vdoi,
|
|
135
|
+
"citations": p.citation_count,
|
|
136
|
+
"impact": {"citationClass": p.citation_class, "influenceClass": p.influence_class},
|
|
137
|
+
"status": "VERIFIED",
|
|
138
|
+
"verification": st["summary"],
|
|
139
|
+
"verifications": st["verifications"],
|
|
140
|
+
}
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
land = openaire.funder_landscape(topic, size=20)
|
|
144
|
+
return {
|
|
145
|
+
"topic": topic,
|
|
146
|
+
"targets": targets,
|
|
147
|
+
"verified_in_field": verified_in_field,
|
|
148
|
+
"open_count": sum(1 for t in targets if t["status"] == "OPEN"),
|
|
149
|
+
"verified_count": sum(1 for t in targets if t["status"] == "VERIFIED") + len(verified_in_field),
|
|
150
|
+
"funder_context": {
|
|
151
|
+
"projects_in_field": land.total,
|
|
152
|
+
"top_funders": [
|
|
153
|
+
{"name": f.name, "jurisdiction": f.jurisdiction, "funded_eur": round(f.funded_amount)}
|
|
154
|
+
for f in land.funders[:5]
|
|
155
|
+
],
|
|
156
|
+
},
|
|
157
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""MCP server exposing the Replication Radar over the OpenAIRE Graph.
|
|
2
|
+
|
|
3
|
+
Run: python -m replication_radar.server (stdio transport)
|
|
4
|
+
Add to an MCP client (.mcp.json):
|
|
5
|
+
{ "mcpServers": { "replication-radar": {
|
|
6
|
+
"command": "python", "args": ["-m", "replication_radar.server"] } } }
|
|
7
|
+
|
|
8
|
+
Hits api.openaire.eu/graph/v1 directly (anonymous). Point elsewhere with
|
|
9
|
+
RADAR_OPENAIRE_BASE (e.g. the Alien AI-Gateway endpoint).
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from mcp.server.fastmcp import FastMCP
|
|
14
|
+
|
|
15
|
+
# import from the submodule directly (the package exports `radar` as a *function*,
|
|
16
|
+
# which would shadow the module on `from . import radar`).
|
|
17
|
+
from .radar import (
|
|
18
|
+
radar as _radar,
|
|
19
|
+
find_independent_software as _find_software,
|
|
20
|
+
replication_status as _replication_status,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
mcp = FastMCP("replication-radar")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@mcp.tool()
|
|
27
|
+
def radar(topic: str, limit: int = 8) -> dict:
|
|
28
|
+
"""Impact-ranked replication targets in a research field.
|
|
29
|
+
|
|
30
|
+
Returns high-impact OpenAIRE papers worth replicating, each flagged OPEN
|
|
31
|
+
(opportunity) or VERIFIED (already checked by a Science Live replication, with
|
|
32
|
+
the verdict), plus independent reusable tooling and a field funder-context panel.
|
|
33
|
+
Keep `topic` short (2-3 words); OpenAIRE free-text terms are AND-ed.
|
|
34
|
+
"""
|
|
35
|
+
return _radar(topic, limit=limit)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@mcp.tool()
|
|
39
|
+
def find_independent_software(doi: str = "", topic: str = "", limit: int = 8) -> dict:
|
|
40
|
+
"""Reusable method software for *replicating* a claim — engines NOT authored by
|
|
41
|
+
the original paper's team (author-disjoint), ranked by reuse signal (code repo +
|
|
42
|
+
Software Heritage archival + usage), not citations. Pass the original paper's DOI
|
|
43
|
+
(authors are looked up) and a short topic."""
|
|
44
|
+
return _find_software(doi=doi or None, topic=topic or None, limit=limit)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@mcp.tool()
|
|
48
|
+
def replication_status(doi: str) -> dict:
|
|
49
|
+
"""Has this DOI been independently replicated, and did it hold? Returns the
|
|
50
|
+
Science Live verdict(s) (the reliability signal the OpenAIRE Graph cannot hold)
|
|
51
|
+
with links to the CiTO nanopubs, or 'open' if not yet replicated."""
|
|
52
|
+
return _replication_status(doi)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def main() -> None:
|
|
56
|
+
mcp.run()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
if __name__ == "__main__":
|
|
60
|
+
main()
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""The 'already-checked' memory layer: DOI -> Science Live replication verdicts.
|
|
2
|
+
|
|
3
|
+
This is the signal the OpenAIRE Graph structurally cannot hold (citation-popularity
|
|
4
|
+
is orthogonal to whether a claim held). Verdicts live in CiTO nanopubs; this index
|
|
5
|
+
is the bundled crosswalk. Extend data/verdicts.json as new chains are published.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from functools import lru_cache
|
|
11
|
+
from importlib import resources
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@lru_cache(maxsize=1)
|
|
15
|
+
def _index() -> dict[str, list[dict]]:
|
|
16
|
+
with resources.files(__package__).joinpath("data/verdicts.json").open() as fh:
|
|
17
|
+
return (json.load(fh).get("verifications")) or {}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def status_for(doi: str | None) -> dict:
|
|
21
|
+
"""Return the replication status for a DOI.
|
|
22
|
+
|
|
23
|
+
{"replicated": bool, "verifications": [...], "summary": str}
|
|
24
|
+
"""
|
|
25
|
+
if not doi:
|
|
26
|
+
return {"replicated": False, "verifications": [], "summary": "open"}
|
|
27
|
+
hits = _index().get(doi.lower(), [])
|
|
28
|
+
if not hits:
|
|
29
|
+
return {"replicated": False, "verifications": [], "summary": "open"}
|
|
30
|
+
verdicts = sorted({v["verdict"] for v in hits})
|
|
31
|
+
return {
|
|
32
|
+
"replicated": True,
|
|
33
|
+
"verifications": hits,
|
|
34
|
+
"summary": f"{len(hits)} verification(s): {', '.join(verdicts)}",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def all_dois() -> set[str]:
|
|
39
|
+
return set(_index().keys())
|