@wentorai/research-plugins 1.3.2 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -56
- package/curated/analysis/README.md +1 -13
- package/curated/domains/README.md +1 -5
- package/curated/literature/README.md +1 -10
- package/curated/research/README.md +1 -18
- package/curated/tools/README.md +1 -12
- package/curated/writing/README.md +1 -5
- package/index.ts +88 -5
- package/openclaw.plugin.json +3 -12
- package/package.json +3 -5
- package/skills/analysis/statistics/SKILL.md +1 -1
- package/skills/analysis/statistics/meta-analysis-guide/SKILL.md +1 -1
- package/skills/domains/ai-ml/SKILL.md +3 -2
- package/skills/domains/ai-ml/generative-ai-guide/SKILL.md +1 -0
- package/skills/domains/ai-ml/huggingface-api/SKILL.md +251 -0
- package/skills/domains/biomedical/SKILL.md +9 -2
- package/skills/domains/biomedical/alphafold-api/SKILL.md +227 -0
- package/skills/domains/biomedical/biothings-api/SKILL.md +296 -0
- package/skills/domains/biomedical/clinicaltrials-api-v2/SKILL.md +216 -0
- package/skills/domains/biomedical/enrichr-api/SKILL.md +264 -0
- package/skills/domains/biomedical/ensembl-rest-api/SKILL.md +204 -0
- package/skills/domains/biomedical/medical-data-api/SKILL.md +197 -0
- package/skills/domains/biomedical/pdb-structure-api/SKILL.md +219 -0
- package/skills/domains/business/SKILL.md +2 -3
- package/skills/domains/chemistry/SKILL.md +3 -2
- package/skills/domains/chemistry/catalysis-hub-api/SKILL.md +171 -0
- package/skills/domains/education/SKILL.md +2 -3
- package/skills/domains/law/SKILL.md +3 -2
- package/skills/domains/law/uk-legislation-api/SKILL.md +179 -0
- package/skills/literature/fulltext/SKILL.md +3 -2
- package/skills/literature/fulltext/arxiv-latex-source/SKILL.md +195 -0
- package/skills/literature/search/SKILL.md +2 -3
- package/skills/research/automation/SKILL.md +2 -3
- package/skills/research/automation/datagen-research-guide/SKILL.md +1 -0
- package/skills/research/automation/mle-agent-guide/SKILL.md +1 -0
- package/skills/research/automation/paper-to-agent-guide/SKILL.md +1 -0
- package/skills/research/deep-research/auto-deep-research-guide/SKILL.md +1 -0
- package/skills/research/methodology/SKILL.md +1 -1
- package/skills/research/methodology/claude-scientific-guide/SKILL.md +1 -0
- package/skills/research/methodology/qualitative-research-guide/SKILL.md +1 -1
- package/skills/research/paper-review/SKILL.md +1 -1
- package/skills/research/paper-review/peer-review-guide/SKILL.md +1 -1
- package/skills/tools/knowledge-graph/SKILL.md +2 -3
- package/skills/tools/ocr-translate/zotero-pdf2zh-guide/SKILL.md +1 -0
- package/skills/writing/citation/obsidian-citation-guide/SKILL.md +1 -0
- package/skills/writing/citation/obsidian-zotero-guide/SKILL.md +1 -0
- package/skills/writing/citation/papersgpt-zotero-guide/SKILL.md +1 -0
- package/skills/writing/citation/zotero-mdnotes-guide/SKILL.md +1 -0
- package/skills/writing/citation/zotero-reference-guide/SKILL.md +1 -0
- package/skills/writing/composition/scientific-writing-resources/SKILL.md +1 -0
- package/skills/writing/latex/latex-drawing-collection/SKILL.md +1 -0
- package/skills/writing/latex/latex-templates-collection/SKILL.md +1 -0
- package/skills/writing/templates/novathesis-guide/SKILL.md +1 -0
- package/src/tools/arxiv.ts +78 -30
- package/src/tools/biorxiv.ts +142 -0
- package/src/tools/crossref.ts +60 -22
- package/src/tools/datacite.ts +188 -0
- package/src/tools/dblp.ts +125 -0
- package/src/tools/doaj.ts +82 -0
- package/src/tools/europe-pmc.ts +159 -0
- package/src/tools/hal.ts +118 -0
- package/src/tools/inspire-hep.ts +165 -0
- package/src/tools/openaire.ts +158 -0
- package/src/tools/openalex.ts +20 -15
- package/src/tools/opencitations.ts +103 -0
- package/src/tools/orcid.ts +136 -0
- package/src/tools/osf-preprints.ts +104 -0
- package/src/tools/pubmed.ts +19 -13
- package/src/tools/ror.ts +118 -0
- package/src/tools/unpaywall.ts +12 -6
- package/src/tools/util.ts +141 -0
- package/src/tools/zenodo.ts +154 -0
- package/mcp-configs/academic-db/ChatSpatial.json +0 -17
- package/mcp-configs/academic-db/academia-mcp.json +0 -17
- package/mcp-configs/academic-db/academic-paper-explorer.json +0 -17
- package/mcp-configs/academic-db/academic-search-mcp-server.json +0 -17
- package/mcp-configs/academic-db/agentinterviews-mcp.json +0 -17
- package/mcp-configs/academic-db/all-in-mcp.json +0 -17
- package/mcp-configs/academic-db/alphafold-mcp.json +0 -20
- package/mcp-configs/academic-db/apple-health-mcp.json +0 -17
- package/mcp-configs/academic-db/arxiv-latex-mcp.json +0 -17
- package/mcp-configs/academic-db/arxiv-mcp-server.json +0 -17
- package/mcp-configs/academic-db/bgpt-mcp.json +0 -17
- package/mcp-configs/academic-db/biomcp.json +0 -17
- package/mcp-configs/academic-db/biothings-mcp.json +0 -17
- package/mcp-configs/academic-db/brightspace-mcp.json +0 -21
- package/mcp-configs/academic-db/catalysishub-mcp-server.json +0 -17
- package/mcp-configs/academic-db/climatiq-mcp.json +0 -20
- package/mcp-configs/academic-db/clinicaltrialsgov-mcp-server.json +0 -17
- package/mcp-configs/academic-db/deep-research-mcp.json +0 -17
- package/mcp-configs/academic-db/dicom-mcp.json +0 -17
- package/mcp-configs/academic-db/enrichr-mcp-server.json +0 -17
- package/mcp-configs/academic-db/fec-mcp-server.json +0 -17
- package/mcp-configs/academic-db/fhir-mcp-server-themomentum.json +0 -17
- package/mcp-configs/academic-db/fhir-mcp.json +0 -19
- package/mcp-configs/academic-db/gget-mcp.json +0 -17
- package/mcp-configs/academic-db/gibs-mcp.json +0 -20
- package/mcp-configs/academic-db/gis-mcp-server.json +0 -22
- package/mcp-configs/academic-db/google-earth-engine-mcp.json +0 -21
- package/mcp-configs/academic-db/google-researcher-mcp.json +0 -17
- package/mcp-configs/academic-db/idea-reality-mcp.json +0 -17
- package/mcp-configs/academic-db/legiscan-mcp.json +0 -19
- package/mcp-configs/academic-db/lex.json +0 -17
- package/mcp-configs/academic-db/m4-clinical-mcp.json +0 -21
- package/mcp-configs/academic-db/medical-mcp.json +0 -21
- package/mcp-configs/academic-db/nexonco-mcp.json +0 -20
- package/mcp-configs/academic-db/omop-mcp.json +0 -20
- package/mcp-configs/academic-db/onekgpd-mcp.json +0 -20
- package/mcp-configs/academic-db/openedu-mcp.json +0 -20
- package/mcp-configs/academic-db/opengenes-mcp.json +0 -20
- package/mcp-configs/academic-db/openstax-mcp.json +0 -21
- package/mcp-configs/academic-db/openstreetmap-mcp.json +0 -21
- package/mcp-configs/academic-db/opentargets-mcp.json +0 -21
- package/mcp-configs/academic-db/pdb-mcp.json +0 -21
- package/mcp-configs/academic-db/smithsonian-mcp.json +0 -20
- package/mcp-configs/ai-platform/Adaptive-Graph-of-Thoughts-MCP-server.json +0 -17
- package/mcp-configs/ai-platform/ai-counsel.json +0 -17
- package/mcp-configs/ai-platform/atlas-mcp-server.json +0 -17
- package/mcp-configs/ai-platform/counsel-mcp.json +0 -17
- package/mcp-configs/ai-platform/cross-llm-mcp.json +0 -17
- package/mcp-configs/ai-platform/gptr-mcp.json +0 -17
- package/mcp-configs/ai-platform/magi-researchers.json +0 -21
- package/mcp-configs/ai-platform/mcp-academic-researcher.json +0 -22
- package/mcp-configs/ai-platform/open-paper-machine.json +0 -21
- package/mcp-configs/ai-platform/paper-intelligence.json +0 -21
- package/mcp-configs/ai-platform/paper-reader.json +0 -21
- package/mcp-configs/ai-platform/paperdebugger.json +0 -21
- package/mcp-configs/browser/decipher-research-agent.json +0 -17
- package/mcp-configs/browser/deep-research.json +0 -17
- package/mcp-configs/browser/everything-claude-code.json +0 -17
- package/mcp-configs/browser/exa-mcp.json +0 -20
- package/mcp-configs/browser/gpt-researcher.json +0 -17
- package/mcp-configs/browser/heurist-agent-framework.json +0 -17
- package/mcp-configs/browser/mcp-searxng.json +0 -21
- package/mcp-configs/browser/mcp-webresearch.json +0 -20
- package/mcp-configs/cloud-docs/confluence-mcp.json +0 -37
- package/mcp-configs/cloud-docs/google-drive-mcp.json +0 -35
- package/mcp-configs/cloud-docs/notion-mcp.json +0 -29
- package/mcp-configs/communication/discord-mcp.json +0 -29
- package/mcp-configs/communication/discourse-mcp.json +0 -21
- package/mcp-configs/communication/slack-mcp.json +0 -29
- package/mcp-configs/communication/telegram-mcp.json +0 -28
- package/mcp-configs/data-platform/4everland-hosting-mcp.json +0 -17
- package/mcp-configs/data-platform/automl-stat-mcp.json +0 -21
- package/mcp-configs/data-platform/context-keeper.json +0 -17
- package/mcp-configs/data-platform/context7.json +0 -19
- package/mcp-configs/data-platform/contextstream-mcp.json +0 -17
- package/mcp-configs/data-platform/email-mcp.json +0 -17
- package/mcp-configs/data-platform/jefferson-stats-mcp.json +0 -22
- package/mcp-configs/data-platform/mcp-excel-server.json +0 -21
- package/mcp-configs/data-platform/mcp-stata.json +0 -21
- package/mcp-configs/data-platform/mcpstack-jupyter.json +0 -21
- package/mcp-configs/data-platform/ml-mcp.json +0 -21
- package/mcp-configs/data-platform/nasdaq-data-link-mcp.json +0 -20
- package/mcp-configs/data-platform/numpy-mcp.json +0 -21
- package/mcp-configs/database/neo4j-mcp.json +0 -37
- package/mcp-configs/database/postgres-mcp.json +0 -28
- package/mcp-configs/database/sqlite-mcp.json +0 -29
- package/mcp-configs/dev-platform/geogebra-mcp.json +0 -21
- package/mcp-configs/dev-platform/github-mcp.json +0 -31
- package/mcp-configs/dev-platform/gitlab-mcp.json +0 -34
- package/mcp-configs/dev-platform/latex-mcp-server.json +0 -21
- package/mcp-configs/dev-platform/manim-mcp.json +0 -20
- package/mcp-configs/dev-platform/mcp-echarts.json +0 -20
- package/mcp-configs/dev-platform/panel-viz-mcp.json +0 -20
- package/mcp-configs/dev-platform/paperbanana.json +0 -20
- package/mcp-configs/dev-platform/texflow-mcp.json +0 -20
- package/mcp-configs/dev-platform/texmcp.json +0 -20
- package/mcp-configs/dev-platform/typst-mcp.json +0 -21
- package/mcp-configs/dev-platform/vizro-mcp.json +0 -20
- package/mcp-configs/email/email-mcp.json +0 -40
- package/mcp-configs/email/gmail-mcp.json +0 -37
- package/mcp-configs/note-knowledge/ApeRAG.json +0 -17
- package/mcp-configs/note-knowledge/In-Memoria.json +0 -17
- package/mcp-configs/note-knowledge/agent-memory.json +0 -17
- package/mcp-configs/note-knowledge/aimemo.json +0 -17
- package/mcp-configs/note-knowledge/biel-mcp.json +0 -19
- package/mcp-configs/note-knowledge/cognee.json +0 -17
- package/mcp-configs/note-knowledge/context-awesome.json +0 -17
- package/mcp-configs/note-knowledge/context-mcp.json +0 -17
- package/mcp-configs/note-knowledge/conversation-handoff-mcp.json +0 -17
- package/mcp-configs/note-knowledge/cortex.json +0 -17
- package/mcp-configs/note-knowledge/devrag.json +0 -17
- package/mcp-configs/note-knowledge/easy-obsidian-mcp.json +0 -17
- package/mcp-configs/note-knowledge/engram.json +0 -17
- package/mcp-configs/note-knowledge/gnosis-mcp.json +0 -17
- package/mcp-configs/note-knowledge/graphlit-mcp-server.json +0 -19
- package/mcp-configs/note-knowledge/local-faiss-mcp.json +0 -21
- package/mcp-configs/note-knowledge/mcp-memory-service.json +0 -21
- package/mcp-configs/note-knowledge/mcp-obsidian.json +0 -23
- package/mcp-configs/note-knowledge/mcp-ragdocs.json +0 -20
- package/mcp-configs/note-knowledge/mcp-summarizer.json +0 -21
- package/mcp-configs/note-knowledge/mediawiki-mcp.json +0 -21
- package/mcp-configs/note-knowledge/openzim-mcp.json +0 -20
- package/mcp-configs/note-knowledge/zettelkasten-mcp.json +0 -21
- package/mcp-configs/reference-mgr/academic-paper-mcp-http.json +0 -20
- package/mcp-configs/reference-mgr/academix.json +0 -20
- package/mcp-configs/reference-mgr/arxiv-cli.json +0 -17
- package/mcp-configs/reference-mgr/arxiv-research-mcp.json +0 -21
- package/mcp-configs/reference-mgr/arxiv-search-mcp.json +0 -17
- package/mcp-configs/reference-mgr/chiken.json +0 -17
- package/mcp-configs/reference-mgr/claude-scholar.json +0 -17
- package/mcp-configs/reference-mgr/devonthink-mcp.json +0 -17
- package/mcp-configs/reference-mgr/google-scholar-abstract-mcp.json +0 -19
- package/mcp-configs/reference-mgr/google-scholar-mcp.json +0 -20
- package/mcp-configs/reference-mgr/mcp-paperswithcode.json +0 -21
- package/mcp-configs/reference-mgr/mcp-scholarly.json +0 -20
- package/mcp-configs/reference-mgr/mcp-simple-arxiv.json +0 -20
- package/mcp-configs/reference-mgr/mcp-simple-pubmed.json +0 -20
- package/mcp-configs/reference-mgr/mcp-zotero.json +0 -21
- package/mcp-configs/reference-mgr/mendeley-mcp.json +0 -20
- package/mcp-configs/reference-mgr/ncbi-mcp-server.json +0 -22
- package/mcp-configs/reference-mgr/onecite.json +0 -21
- package/mcp-configs/reference-mgr/paper-search-mcp.json +0 -21
- package/mcp-configs/reference-mgr/pubmed-search-mcp.json +0 -21
- package/mcp-configs/reference-mgr/scholar-mcp.json +0 -21
- package/mcp-configs/reference-mgr/scholar-multi-mcp.json +0 -21
- package/mcp-configs/reference-mgr/seerai.json +0 -21
- package/mcp-configs/reference-mgr/semantic-scholar-fastmcp.json +0 -21
- package/mcp-configs/reference-mgr/sourcelibrary.json +0 -20
- package/mcp-configs/registry.json +0 -476
- package/mcp-configs/repository/dataverse-mcp.json +0 -33
- package/mcp-configs/repository/huggingface-mcp.json +0 -29
- package/skills/domains/business/xpert-bi-guide/SKILL.md +0 -84
- package/skills/domains/education/edumcp-guide/SKILL.md +0 -74
- package/skills/literature/search/paper-search-mcp-guide/SKILL.md +0 -107
- package/skills/research/automation/mcp-server-guide/SKILL.md +0 -211
- package/skills/tools/knowledge-graph/paperpile-notion-guide/SKILL.md +0 -84
- package/src/tools/semantic-scholar.ts +0 -66
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: clinicaltrials-api-v2
|
|
3
|
+
description: "Search and analyze clinical trials via the ClinicalTrials.gov v2 API"
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "🏥"
|
|
7
|
+
category: "domains"
|
|
8
|
+
subcategory: "biomedical"
|
|
9
|
+
keywords: ["clinical trials", "ClinicalTrials.gov", "medical research", "study design", "FDA", "drug trials"]
|
|
10
|
+
source: "https://clinicaltrials.gov/data-api/about-api"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# ClinicalTrials.gov v2 API Guide
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
ClinicalTrials.gov is the world's largest clinical trial registry, maintained by the U.S. National Library of Medicine (NLM) at NIH. It contains over 576,000 study records from 220+ countries covering interventional trials, observational studies, and expanded access programs. The v2 API provides structured JSON access with field-level filtering, cursor-based pagination, and statistics endpoints.
|
|
18
|
+
|
|
19
|
+
Key v2 improvements over the legacy API: JSON-native responses, sparse field selection via the `fields` parameter, `nextPageToken` pagination, and dedicated statistics endpoints. Study data is organized into `protocolSection` (sponsor-submitted) and `derivedSection` (NLM-computed).
|
|
20
|
+
|
|
21
|
+
## Authentication
|
|
22
|
+
|
|
23
|
+
No authentication required. All endpoints are publicly accessible without API keys or registration. Users should comply with NCBI usage policies and maintain reasonable request rates.
|
|
24
|
+
|
|
25
|
+
## Core Endpoints
|
|
26
|
+
|
|
27
|
+
### Search Studies
|
|
28
|
+
|
|
29
|
+
- **URL**: `GET https://clinicaltrials.gov/api/v2/studies`
|
|
30
|
+
- **Parameters**:
|
|
31
|
+
|
|
32
|
+
| Parameter | Type | Required | Description |
|
|
33
|
+
|-----------|------|----------|-------------|
|
|
34
|
+
| query.term | string | No | Free-text search across all fields |
|
|
35
|
+
| query.cond | string | No | Condition or disease filter |
|
|
36
|
+
| query.intr | string | No | Intervention or treatment filter |
|
|
37
|
+
| query.spons | string | No | Sponsor or collaborator filter |
|
|
38
|
+
| filter.overallStatus | string | No | `RECRUITING`, `COMPLETED`, `ACTIVE_NOT_RECRUITING`, etc. |
|
|
39
|
+
| filter.phase | string | No | `EARLY_PHASE1`, `PHASE1`, `PHASE2`, `PHASE3`, `PHASE4`, `NA` |
|
|
40
|
+
| filter.geo | string | No | Geographic filter (`distance(lat,lng,dist)`) |
|
|
41
|
+
| fields | string | No | Comma-separated fields for sparse response |
|
|
42
|
+
| sort | string | No | Sort field and direction (e.g., `LastUpdatePostDate:desc`) |
|
|
43
|
+
| pageSize | int | No | Results per page (default 10, max 1000) |
|
|
44
|
+
| pageToken | string | No | Cursor token for next page |
|
|
45
|
+
| format | string | No | `json` (default) or `csv` |
|
|
46
|
+
|
|
47
|
+
- **Example**:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
curl "https://clinicaltrials.gov/api/v2/studies?query.cond=diabetes&query.intr=metformin&pageSize=1&fields=NCTId,BriefTitle,OverallStatus"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
- **Response**:
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"studies": [{
|
|
58
|
+
"protocolSection": {
|
|
59
|
+
"identificationModule": {
|
|
60
|
+
"nctId": "NCT06649773",
|
|
61
|
+
"briefTitle": "The Experiment of Noiiglutide Injection in Type 2 Diabetes Patients"
|
|
62
|
+
},
|
|
63
|
+
"statusModule": { "overallStatus": "ACTIVE_NOT_RECRUITING" }
|
|
64
|
+
}
|
|
65
|
+
}],
|
|
66
|
+
"nextPageToken": "ZVNj7o2Elu8o3lpo..."
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Full responses include `protocolSection` with: `identificationModule` (NCT ID, titles, organization), `statusModule` (status, dates), `descriptionModule` (summary), `conditionsModule`, `designModule` (type, phases, enrollment), `armsInterventionsModule`, `eligibilityModule` (criteria, sex, age), `outcomesModule`, and `contactsLocationsModule`.
|
|
71
|
+
|
|
72
|
+
### Get Single Study
|
|
73
|
+
|
|
74
|
+
- **URL**: `GET https://clinicaltrials.gov/api/v2/studies/{nctId}`
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
curl "https://clinicaltrials.gov/api/v2/studies/NCT04280705?fields=NCTId,BriefTitle,OverallStatus,Phase,LeadSponsorName,EnrollmentCount,Condition,InterventionName"
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
```json
|
|
81
|
+
{
|
|
82
|
+
"protocolSection": {
|
|
83
|
+
"identificationModule": {
|
|
84
|
+
"nctId": "NCT04280705",
|
|
85
|
+
"briefTitle": "Adaptive COVID-19 Treatment Trial (ACTT)"
|
|
86
|
+
},
|
|
87
|
+
"statusModule": {
|
|
88
|
+
"overallStatus": "COMPLETED",
|
|
89
|
+
"startDateStruct": { "date": "2020-02-21" },
|
|
90
|
+
"completionDateStruct": { "date": "2020-05-21" }
|
|
91
|
+
},
|
|
92
|
+
"sponsorCollaboratorsModule": {
|
|
93
|
+
"leadSponsor": { "name": "National Institute of Allergy and Infectious Diseases (NIAID)" }
|
|
94
|
+
},
|
|
95
|
+
"conditionsModule": { "conditions": ["COVID-19"] },
|
|
96
|
+
"designModule": { "phases": ["PHASE3"], "enrollmentInfo": { "count": 1062 } },
|
|
97
|
+
"armsInterventionsModule": {
|
|
98
|
+
"interventions": [{ "name": "Placebo" }, { "name": "Remdesivir" }]
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Database Statistics
|
|
105
|
+
|
|
106
|
+
- **URL**: `GET https://clinicaltrials.gov/api/v2/stats/size`
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
curl "https://clinicaltrials.gov/api/v2/stats/size"
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
```json
|
|
113
|
+
{
|
|
114
|
+
"totalStudies": 576554,
|
|
115
|
+
"averageSizeBytes": 17186,
|
|
116
|
+
"largestStudies": [
|
|
117
|
+
{ "id": "NCT02723955", "sizeBytes": 3596689 },
|
|
118
|
+
{ "id": "NCT03688620", "sizeBytes": 2865033 }
|
|
119
|
+
]
|
|
120
|
+
}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Field Value Statistics
|
|
124
|
+
|
|
125
|
+
- **URL**: `GET https://clinicaltrials.gov/api/v2/stats/fieldValues/{fieldName}`
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
curl "https://clinicaltrials.gov/api/v2/stats/fieldValues/Phase"
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
```json
|
|
132
|
+
{
|
|
133
|
+
"type": "ENUM",
|
|
134
|
+
"piece": "Phase",
|
|
135
|
+
"field": "protocolSection.designModule.phases",
|
|
136
|
+
"missingStudiesCount": 136632,
|
|
137
|
+
"topValues": [
|
|
138
|
+
{ "value": "NA", "studiesCount": 222829 },
|
|
139
|
+
{ "value": "PHASE2", "studiesCount": 87478 },
|
|
140
|
+
{ "value": "PHASE1", "studiesCount": 63716 },
|
|
141
|
+
{ "value": "PHASE3", "studiesCount": 48700 },
|
|
142
|
+
{ "value": "PHASE4", "studiesCount": 34911 },
|
|
143
|
+
{ "value": "EARLY_PHASE1", "studiesCount": 6179 }
|
|
144
|
+
]
|
|
145
|
+
}
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Rate Limits
|
|
149
|
+
|
|
150
|
+
No formal rate limits are published for the v2 API. Follow NCBI usage guidelines: stay under 3 requests/second without an API key, up to 10/second with one. For bulk data access, use the AACT relational database (https://aact.ctti-clinicaltrials.org/) or downloadable flat files rather than paginating through the full API.
|
|
151
|
+
|
|
152
|
+
## Academic Use Cases
|
|
153
|
+
|
|
154
|
+
- **Systematic reviews**: Use `query.cond` + `query.intr` + `filter.overallStatus=COMPLETED` to build PRISMA-compliant trial inventories. Paginate with `nextPageToken` to collect all records, then extract outcomes and enrollment for quantitative synthesis.
|
|
155
|
+
- **Landscape mapping**: Combine search with `stats/fieldValues` to map phase distributions, sponsor concentration, and geographic spread for a therapeutic area -- useful for identifying evidence gaps in grant proposals.
|
|
156
|
+
- **Recruitment tracking**: Filter by `RECRUITING` status and `filter.geo` to find active enrollment opportunities. Automate periodic queries for new trials in your domain.
|
|
157
|
+
|
|
158
|
+
## Code Examples
|
|
159
|
+
|
|
160
|
+
### Paginated Collection for Systematic Review
|
|
161
|
+
|
|
162
|
+
```python
|
|
163
|
+
import requests, time
|
|
164
|
+
|
|
165
|
+
def collect_trials(condition, intervention, status="COMPLETED"):
|
|
166
|
+
base = "https://clinicaltrials.gov/api/v2/studies"
|
|
167
|
+
studies, token = [], None
|
|
168
|
+
while True:
|
|
169
|
+
params = {
|
|
170
|
+
"query.cond": condition, "query.intr": intervention,
|
|
171
|
+
"filter.overallStatus": status, "pageSize": 100,
|
|
172
|
+
"fields": "NCTId,BriefTitle,Phase,EnrollmentCount,CompletionDate",
|
|
173
|
+
}
|
|
174
|
+
if token:
|
|
175
|
+
params["pageToken"] = token
|
|
176
|
+
data = requests.get(base, params=params).json()
|
|
177
|
+
studies.extend(data.get("studies", []))
|
|
178
|
+
token = data.get("nextPageToken")
|
|
179
|
+
if not token:
|
|
180
|
+
break
|
|
181
|
+
time.sleep(0.34)
|
|
182
|
+
return studies
|
|
183
|
+
|
|
184
|
+
trials = collect_trials("type 2 diabetes", "metformin")
|
|
185
|
+
print(f"Collected {len(trials)} completed metformin T2D trials")
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
### Sponsor and Phase Analysis
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
import requests
|
|
192
|
+
from collections import Counter
|
|
193
|
+
|
|
194
|
+
params = {"query.cond": "Alzheimer's Disease", "pageSize": 100,
|
|
195
|
+
"fields": "NCTId,Phase,LeadSponsorName"}
|
|
196
|
+
data = requests.get("https://clinicaltrials.gov/api/v2/studies", params=params).json()
|
|
197
|
+
|
|
198
|
+
phases, sponsors = Counter(), Counter()
|
|
199
|
+
for s in data["studies"]:
|
|
200
|
+
p = s["protocolSection"]
|
|
201
|
+
for ph in p.get("designModule", {}).get("phases", []):
|
|
202
|
+
phases[ph] += 1
|
|
203
|
+
sponsors[p.get("sponsorCollaboratorsModule", {})
|
|
204
|
+
.get("leadSponsor", {}).get("name", "Unknown")] += 1
|
|
205
|
+
|
|
206
|
+
for ph, n in phases.most_common():
|
|
207
|
+
print(f"{ph}: {n}")
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
## References
|
|
211
|
+
|
|
212
|
+
- v2 API reference: https://clinicaltrials.gov/data-api/api
|
|
213
|
+
- About the API: https://clinicaltrials.gov/data-api/about-api
|
|
214
|
+
- AACT database (bulk access): https://aact.ctti-clinicaltrials.org/
|
|
215
|
+
- WHO ICTRP: https://trialsearch.who.int/
|
|
216
|
+
- NCBI usage policies: https://www.ncbi.nlm.nih.gov/home/about/policies/
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: enrichr-api
|
|
3
|
+
description: "Perform gene set enrichment analysis using the Enrichr API"
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "🔬"
|
|
7
|
+
category: "domains"
|
|
8
|
+
subcategory: "biomedical"
|
|
9
|
+
keywords: ["gene set enrichment", "pathway analysis", "GO terms", "KEGG", "Enrichr", "functional analysis"]
|
|
10
|
+
source: "https://maayanlab.cloud/Enrichr"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Enrichr Gene Set Enrichment Analysis API
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
Enrichr is the most widely used gene set enrichment analysis tool, developed by the Ma'ayan Lab at the Icahn School of Medicine at Mount Sinai. It tests whether a user-supplied gene list is statistically over-represented in curated gene set libraries spanning pathways, ontologies, transcription factor targets, disease associations, and cell types. The API provides access to 225 background libraries covering over 500,000 annotated gene sets. Free, no authentication required.
|
|
18
|
+
|
|
19
|
+
## Two-Step Workflow
|
|
20
|
+
|
|
21
|
+
Enrichr uses a submit-then-query pattern:
|
|
22
|
+
|
|
23
|
+
1. **POST gene list** to `/addList` -- returns a `userListId` token
|
|
24
|
+
2. **GET enrichment** from `/enrich` using that token and a chosen library
|
|
25
|
+
|
|
26
|
+
The `userListId` persists on the server, so you can run multiple library queries against the same submission without re-uploading.
|
|
27
|
+
|
|
28
|
+
## Core Endpoints
|
|
29
|
+
|
|
30
|
+
### Base URL
|
|
31
|
+
|
|
32
|
+
```
|
|
33
|
+
https://maayanlab.cloud/Enrichr
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### Step 1: Submit Gene List
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
curl -X POST "https://maayanlab.cloud/Enrichr/addList" \
|
|
40
|
+
-F "list=BRCA1
|
|
41
|
+
BRCA2
|
|
42
|
+
TP53
|
|
43
|
+
EGFR
|
|
44
|
+
MYC
|
|
45
|
+
PTEN
|
|
46
|
+
AKT1
|
|
47
|
+
KRAS
|
|
48
|
+
PIK3CA
|
|
49
|
+
RAF1" \
|
|
50
|
+
-F "description=cancer_genes"
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Response:**
|
|
54
|
+
|
|
55
|
+
```json
|
|
56
|
+
{
|
|
57
|
+
"shortId": "8619200cc78f1513ff1029a04af90ad7",
|
|
58
|
+
"userListId": 124544426
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Genes are newline-separated. The request must use `multipart/form-data` (the `-F` flag), not `application/x-www-form-urlencoded`.
|
|
63
|
+
|
|
64
|
+
### Step 2: Retrieve Enrichment Results
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
curl "https://maayanlab.cloud/Enrichr/enrich?userListId=124544426&backgroundType=KEGG_2021_Human"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
**Response (first 3 of 143 results):**
|
|
71
|
+
|
|
72
|
+
```json
|
|
73
|
+
{
|
|
74
|
+
"KEGG_2021_Human": [
|
|
75
|
+
[1, "Breast cancer", 3.37e-22, 198530.0, 9815800.25,
|
|
76
|
+
["PIK3CA","MYC","PTEN","AKT1","KRAS","BRCA1","BRCA2","RAF1","TP53","EGFR"],
|
|
77
|
+
4.82e-20, 0, 0],
|
|
78
|
+
[2, "Endometrial cancer", 1.35e-19, 1595.2, 69306.12,
|
|
79
|
+
["PIK3CA","MYC","PTEN","AKT1","KRAS","RAF1","TP53","EGFR"],
|
|
80
|
+
9.68e-18, 0, 0],
|
|
81
|
+
[3, "Central carbon metabolism in cancer", 6.66e-19, 1285.68, 53809.88,
|
|
82
|
+
["PIK3CA","MYC","PTEN","AKT1","KRAS","RAF1","TP53","EGFR"],
|
|
83
|
+
3.17e-17, 0, 0]
|
|
84
|
+
]
|
|
85
|
+
}
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Each result array contains: `[rank, term_name, p_value, z_score, combined_score, overlapping_genes, adjusted_p_value, old_p_value, old_adjusted_p_value]`.
|
|
89
|
+
|
|
90
|
+
### View Submitted Gene List
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
curl "https://maayanlab.cloud/Enrichr/view?userListId=124544426"
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"genes": ["PIK3CA","MYC","AKT1","PTEN","BRCA1","KRAS","BRCA2","EGFR","TP53","RAF1"],
|
|
99
|
+
"description": "cancer_genes"
|
|
100
|
+
}
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Export Results as TSV
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
curl "https://maayanlab.cloud/Enrichr/export?userListId=124544426&backgroundType=KEGG_2021_Human&filename=results" \
|
|
107
|
+
-o enrichr_results.txt
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### List Available Libraries
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
curl "https://maayanlab.cloud/Enrichr/datasetStatistics"
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Returns metadata for all 225 libraries, each entry containing `libraryName`, `numTerms`, `geneCoverage`, and `genesPerTerm`.
|
|
117
|
+
|
|
118
|
+
## Available Libraries (225 Total)
|
|
119
|
+
|
|
120
|
+
### Pathway Databases
|
|
121
|
+
|
|
122
|
+
| Library | Terms | Genes |
|
|
123
|
+
|---------|-------|-------|
|
|
124
|
+
| KEGG_2026 | 352 | 8,110 |
|
|
125
|
+
| KEGG_2021_Human | 320 | 8,078 |
|
|
126
|
+
| WikiPathways_2024_Human | 829 | 8,281 |
|
|
127
|
+
| Reactome_Pathways_2024 | 2,105 | 11,671 |
|
|
128
|
+
| BioCarta_2016 | 237 | 1,348 |
|
|
129
|
+
|
|
130
|
+
### Gene Ontology
|
|
131
|
+
|
|
132
|
+
| Library | Terms | Genes |
|
|
133
|
+
|---------|-------|-------|
|
|
134
|
+
| GO_Biological_Process_2025 | 5,343 | 14,674 |
|
|
135
|
+
| GO_Molecular_Function_2025 | 1,174 | 11,484 |
|
|
136
|
+
| GO_Cellular_Component_2025 | 468 | 11,501 |
|
|
137
|
+
|
|
138
|
+
### Disease and Phenotype
|
|
139
|
+
|
|
140
|
+
| Library | Terms | Genes |
|
|
141
|
+
|---------|-------|-------|
|
|
142
|
+
| DisGeNET | 9,828 | 17,464 |
|
|
143
|
+
| GWAS_Catalog_2025 | 2,369 | 15,030 |
|
|
144
|
+
| ClinVar_2025 | 609 | 3,481 |
|
|
145
|
+
| OMIM_Disease | 90 | 1,759 |
|
|
146
|
+
| Human_Phenotype_Ontology | 1,779 | 3,096 |
|
|
147
|
+
|
|
148
|
+
### Transcription Factor and Epigenomics
|
|
149
|
+
|
|
150
|
+
| Library | Terms | Genes |
|
|
151
|
+
|---------|-------|-------|
|
|
152
|
+
| ChEA_2022 | 757 | 18,365 |
|
|
153
|
+
| ENCODE_TF_ChIP-seq_2015 | 816 | 26,382 |
|
|
154
|
+
| JASPAR_PWM_Human_2025 | 675 | 18,518 |
|
|
155
|
+
|
|
156
|
+
### Cell Type and Tissue
|
|
157
|
+
|
|
158
|
+
| Library | Terms | Genes |
|
|
159
|
+
|---------|-------|-------|
|
|
160
|
+
| CellMarker_2024 | 1,692 | 12,642 |
|
|
161
|
+
| ARCHS4_Tissues | 108 | 21,809 |
|
|
162
|
+
| Human_Gene_Atlas | 84 | 13,373 |
|
|
163
|
+
|
|
164
|
+
### Cancer and Drug
|
|
165
|
+
|
|
166
|
+
| Library | Terms | Genes |
|
|
167
|
+
|---------|-------|-------|
|
|
168
|
+
| MSigDB_Hallmark_2020 | 50 | 4,383 |
|
|
169
|
+
| MSigDB_Oncogenic_Signatures | 189 | 11,250 |
|
|
170
|
+
| DGIdb_Drug_Targets_2024 | 659 | 2,513 |
|
|
171
|
+
|
|
172
|
+
## Rate Limits
|
|
173
|
+
|
|
174
|
+
- No authentication or API key required
|
|
175
|
+
- No officially published rate limits, but automated queries should include reasonable delays (1-2 seconds between requests)
|
|
176
|
+
- Very large gene lists (>3,000 genes) may time out on some libraries
|
|
177
|
+
- The `userListId` persists server-side; avoid re-submitting the same list repeatedly
|
|
178
|
+
|
|
179
|
+
## Academic Use Cases
|
|
180
|
+
|
|
181
|
+
- **Differential expression follow-up**: Submit DEGs from RNA-seq to identify enriched pathways and GO terms
|
|
182
|
+
- **GWAS hit annotation**: Map GWAS-significant genes to disease phenotypes via DisGeNET or GWAS_Catalog
|
|
183
|
+
- **Drug target discovery**: Cross-reference gene signatures against DGIdb_Drug_Targets for druggable candidates
|
|
184
|
+
- **Transcription factor analysis**: Identify upstream regulators via ChEA or ENCODE TF libraries
|
|
185
|
+
|
|
186
|
+
## Python Usage
|
|
187
|
+
|
|
188
|
+
```python
|
|
189
|
+
import requests
|
|
190
|
+
|
|
191
|
+
ENRICHR_URL = "https://maayanlab.cloud/Enrichr"
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def submit_gene_list(genes: list[str], description: str = "") -> int:
|
|
195
|
+
"""Submit a gene list to Enrichr, return userListId."""
|
|
196
|
+
payload = {
|
|
197
|
+
"list": (None, "\n".join(genes)),
|
|
198
|
+
"description": (None, description),
|
|
199
|
+
}
|
|
200
|
+
resp = requests.post(f"{ENRICHR_URL}/addList", files=payload)
|
|
201
|
+
resp.raise_for_status()
|
|
202
|
+
return resp.json()["userListId"]
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def get_enrichment(user_list_id: int, library: str) -> list[dict]:
|
|
206
|
+
"""Retrieve enrichment results for a given library."""
|
|
207
|
+
resp = requests.get(
|
|
208
|
+
f"{ENRICHR_URL}/enrich",
|
|
209
|
+
params={"userListId": user_list_id, "backgroundType": library},
|
|
210
|
+
)
|
|
211
|
+
resp.raise_for_status()
|
|
212
|
+
data = resp.json()
|
|
213
|
+
|
|
214
|
+
results = []
|
|
215
|
+
for entry in data.get(library, []):
|
|
216
|
+
results.append({
|
|
217
|
+
"rank": entry[0],
|
|
218
|
+
"term": entry[1],
|
|
219
|
+
"p_value": entry[2],
|
|
220
|
+
"z_score": entry[3],
|
|
221
|
+
"combined_score": entry[4],
|
|
222
|
+
"genes": entry[5],
|
|
223
|
+
"adj_p_value": entry[6],
|
|
224
|
+
})
|
|
225
|
+
return results
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def get_libraries() -> list[dict]:
|
|
229
|
+
"""List all available Enrichr libraries."""
|
|
230
|
+
resp = requests.get(f"{ENRICHR_URL}/datasetStatistics")
|
|
231
|
+
resp.raise_for_status()
|
|
232
|
+
return resp.json()["statistics"]
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
# Example: enrichment analysis of cancer-related genes
|
|
236
|
+
genes = ["BRCA1", "BRCA2", "TP53", "EGFR", "MYC",
|
|
237
|
+
"PTEN", "AKT1", "KRAS", "PIK3CA", "RAF1"]
|
|
238
|
+
|
|
239
|
+
list_id = submit_gene_list(genes, "cancer_genes")
|
|
240
|
+
print(f"Submitted gene list, ID: {list_id}")
|
|
241
|
+
|
|
242
|
+
# Query KEGG pathways
|
|
243
|
+
kegg = get_enrichment(list_id, "KEGG_2021_Human")
|
|
244
|
+
print(f"\nTop 5 KEGG pathways ({len(kegg)} total):")
|
|
245
|
+
for r in kegg[:5]:
|
|
246
|
+
print(f" {r['rank']}. {r['term']}")
|
|
247
|
+
print(f" p={r['p_value']:.2e}, adj_p={r['adj_p_value']:.2e}, "
|
|
248
|
+
f"genes={','.join(r['genes'][:5])}...")
|
|
249
|
+
|
|
250
|
+
# Query GO Biological Process
|
|
251
|
+
go_bp = get_enrichment(list_id, "GO_Biological_Process_2023")
|
|
252
|
+
print(f"\nTop 5 GO Biological Processes ({len(go_bp)} total):")
|
|
253
|
+
for r in go_bp[:5]:
|
|
254
|
+
print(f" {r['rank']}. {r['term']}")
|
|
255
|
+
print(f" p={r['p_value']:.2e}, genes={','.join(r['genes'])}")
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
## References
|
|
259
|
+
|
|
260
|
+
- [Enrichr Web App](https://maayanlab.cloud/Enrichr)
|
|
261
|
+
- [Enrichr API Docs](https://maayanlab.cloud/Enrichr/help#api)
|
|
262
|
+
- Chen, E.Y. et al. (2013). "Enrichr: interactive and collaborative HTML5 gene list enrichment analysis tool." *BMC Bioinformatics* 14:128.
|
|
263
|
+
- Kuleshov, M.V. et al. (2016). "Enrichr: a comprehensive gene set enrichment analysis web server 2016 update." *Nucleic Acids Res.* 44(W1).
|
|
264
|
+
- Xie, Z. et al. (2021). "Gene Set Knowledge Discovery with Enrichr." *Current Protocols* 1(3):e90.
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: ensembl-rest-api
|
|
3
|
+
description: "Query gene, sequence, and variant data via the Ensembl REST API"
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "🧬"
|
|
7
|
+
category: "domains"
|
|
8
|
+
subcategory: "biomedical"
|
|
9
|
+
keywords: ["Ensembl", "gene lookup", "sequence retrieval", "genomics", "variant data", "bioinformatics"]
|
|
10
|
+
source: "https://rest.ensembl.org"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Ensembl REST API Guide
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
Ensembl is a genome browser and annotation system maintained by EMBL-EBI and the Wellcome Sanger Institute, providing reference assemblies, gene annotations, variant data, and comparative genomics for over 300 vertebrate genomes. It is the genomic reference underpinning gget, PyEnsembl, and BioMart.
|
|
18
|
+
|
|
19
|
+
The REST API exposes Ensembl data via stateless HTTP. Researchers can look up genes by symbol or stable ID, retrieve genomic/cDNA/protein sequences, query variant annotations (rsIDs, clinical significance, consequences), access cross-references (HGNC, UniProt, RefSeq, OMIM), and obtain assembly metadata. Responses in JSON or XML.
|
|
20
|
+
|
|
21
|
+
## Authentication
|
|
22
|
+
|
|
23
|
+
No authentication required. All endpoints are publicly accessible. Users needing higher throughput can register for an API token.
|
|
24
|
+
|
|
25
|
+
## Core Endpoints
|
|
26
|
+
|
|
27
|
+
### lookup/symbol: Gene Lookup by Symbol
|
|
28
|
+
|
|
29
|
+
Retrieve gene metadata: coordinates, biotype, canonical transcript.
|
|
30
|
+
|
|
31
|
+
- **URL**: `GET https://rest.ensembl.org/lookup/symbol/{species}/{symbol}`
|
|
32
|
+
- **Parameters**:
|
|
33
|
+
|
|
34
|
+
| Parameter | Type | Required | Description |
|
|
35
|
+
|---------------|--------|----------|--------------------------------------------------|
|
|
36
|
+
| species | string | Yes | Species name (e.g., `homo_sapiens`) |
|
|
37
|
+
| symbol | string | Yes | Gene symbol (e.g., `BRCA1`, `TP53`) |
|
|
38
|
+
| expand | int | No | Set to 1 to include transcripts and translations |
|
|
39
|
+
| content-type | string | Yes | `application/json` or `text/xml` |
|
|
40
|
+
|
|
41
|
+
- **Example**:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
curl "https://rest.ensembl.org/lookup/symbol/homo_sapiens/BRCA1?content-type=application/json"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
- **Response** (actual):
|
|
48
|
+
|
|
49
|
+
```json
|
|
50
|
+
{
|
|
51
|
+
"display_name": "BRCA1",
|
|
52
|
+
"description": "BRCA1 DNA repair associated [Source:HGNC Symbol;Acc:HGNC:1100]",
|
|
53
|
+
"object_type": "Gene", "species": "homo_sapiens",
|
|
54
|
+
"assembly_name": "GRCh38", "biotype": "protein_coding",
|
|
55
|
+
"seq_region_name": "17", "start": 43044292, "end": 43170245, "strand": -1,
|
|
56
|
+
"id": "ENSG00000012048", "canonical_transcript": "ENST00000357654.9"
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### sequence/id: Sequence Retrieval
|
|
61
|
+
|
|
62
|
+
Retrieve genomic, cDNA, CDS, or protein sequences by Ensembl stable ID.
|
|
63
|
+
|
|
64
|
+
- **URL**: `GET https://rest.ensembl.org/sequence/id/{id}`
|
|
65
|
+
- **Parameters**:
|
|
66
|
+
|
|
67
|
+
| Parameter | Type | Required | Description |
|
|
68
|
+
|----------------|--------|----------|-------------------------------------------------------|
|
|
69
|
+
| id | string | Yes | Ensembl stable ID (e.g., `ENSG00000012048`) |
|
|
70
|
+
| type | string | No | `genomic`, `cdna`, `cds`, or `protein` |
|
|
71
|
+
| expand_5prime | int | No | Expand 5' flanking region by N bases |
|
|
72
|
+
| expand_3prime | int | No | Expand 3' flanking region by N bases |
|
|
73
|
+
| content-type | string | Yes | `application/json` or `text/plain` (FASTA) |
|
|
74
|
+
|
|
75
|
+
- **Example**:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
curl "https://rest.ensembl.org/sequence/id/ENSG00000012048?content-type=application/json&type=genomic"
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
- **Response** (actual, seq truncated):
|
|
82
|
+
|
|
83
|
+
```json
|
|
84
|
+
{
|
|
85
|
+
"id": "ENSG00000012048", "query": "ENSG00000012048",
|
|
86
|
+
"desc": "chromosome:GRCh38:17:43044292:43170245:-1",
|
|
87
|
+
"molecule": "DNA",
|
|
88
|
+
"seq": "AAAGCGTGGGAATTACAGATAAATTAAAACTGTGGAACCCCTTTCCTCGGCTGCCGCCAAGGTGTTCGG..."
|
|
89
|
+
}
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
### xrefs/symbol: Cross-References
|
|
93
|
+
|
|
94
|
+
Map a gene symbol to Ensembl stable IDs and external database identifiers.
|
|
95
|
+
|
|
96
|
+
- **URL**: `GET https://rest.ensembl.org/xrefs/symbol/{species}/{symbol}`
|
|
97
|
+
- **Key params**: `species` (required), `symbol` (required), `external_db` (optional filter, e.g., `UniProt`)
|
|
98
|
+
- **Example**:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
curl "https://rest.ensembl.org/xrefs/symbol/homo_sapiens/TP53?content-type=application/json"
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
- **Response** (actual): `[{"type":"gene","id":"ENSG00000141510"},{"type":"gene","id":"LRG_321"}]`
|
|
105
|
+
|
|
106
|
+
Use `xrefs/id/{id}` to expand an Ensembl ID to all external cross-references (UniProt, HGNC, RefSeq, OMIM).
|
|
107
|
+
|
|
108
|
+
### variation: Variant Annotation
|
|
109
|
+
|
|
110
|
+
Retrieve variant data by rsID: mappings, alleles, consequence, clinical significance.
|
|
111
|
+
|
|
112
|
+
- **URL**: `GET https://rest.ensembl.org/variation/{species}/{id}`
|
|
113
|
+
- **Key params**: `species` (required), `id` (required, e.g., `rs699`)
|
|
114
|
+
- **Example**:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
curl "https://rest.ensembl.org/variation/homo_sapiens/rs699?content-type=application/json"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
- **Response** (actual, synonyms truncated):
|
|
121
|
+
|
|
122
|
+
```json
|
|
123
|
+
{
|
|
124
|
+
"name": "rs699", "var_class": "SNP",
|
|
125
|
+
"most_severe_consequence": "missense_variant",
|
|
126
|
+
"clinical_significance": ["benign"],
|
|
127
|
+
"evidence": ["Frequency","1000Genomes","Cited","ESP","Phenotype_or_Disease","ExAC","TOPMed","gnomAD"],
|
|
128
|
+
"mappings": [{"location":"1:230710048-230710048","allele_string":"A/G","strand":1,"assembly_name":"GRCh38"}]
|
|
129
|
+
}
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
### info/assembly: Assembly Metadata
|
|
133
|
+
|
|
134
|
+
- **URL**: `GET https://rest.ensembl.org/info/assembly/{species}`
|
|
135
|
+
- **Response** (actual): Returns `assembly_name` ("GRCh38.p14"), `assembly_date` ("2013-12"), `assembly_accession` ("GCA_000001405.29"), full `karyotype` array (1-22, X, Y, MT), and 347 `top_level_region` entries.
|
|
136
|
+
|
|
137
|
+
## Rate Limits
|
|
138
|
+
|
|
139
|
+
- **Without token**: 15 requests per second per IP.
|
|
140
|
+
- **With token**: higher limits available upon registration.
|
|
141
|
+
- **Response headers**: `X-RateLimit-Limit`, `X-RateLimit-Remaining`, `X-RateLimit-Reset` on every response.
|
|
142
|
+
- **Batch POST endpoints** (`/lookup/id`, `/sequence/id`): accept up to 1000 IDs per request.
|
|
143
|
+
- **GRCh37 mirror**: `https://grch37.rest.ensembl.org`
|
|
144
|
+
|
|
145
|
+
## Academic Use Cases
|
|
146
|
+
|
|
147
|
+
- **Gene annotation**: Look up coordinates and biotypes for HGNC symbols to annotate RNA-seq results.
|
|
148
|
+
- **Variant interpretation**: Retrieve consequence types and clinical significance for GWAS rsIDs.
|
|
149
|
+
- **ID mapping**: Map between Ensembl, UniProt, RefSeq, and HGNC identifiers.
|
|
150
|
+
- **Primer design**: Fetch genomic sequences with flanking regions for PCR or CRISPR targeting.
|
|
151
|
+
- **Comparative genomics**: Query homology endpoints for orthologs across species.
|
|
152
|
+
|
|
153
|
+
## Code Examples (Python)
|
|
154
|
+
|
|
155
|
+
### Gene Lookup and Sequence Retrieval
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
import requests
|
|
159
|
+
|
|
160
|
+
BASE = "https://rest.ensembl.org"
|
|
161
|
+
HEADERS = {"Content-Type": "application/json"}
|
|
162
|
+
|
|
163
|
+
gene = requests.get(f"{BASE}/lookup/symbol/homo_sapiens/BRCA1", headers=HEADERS).json()
|
|
164
|
+
print(f"{gene['display_name']} ({gene['id']}) chr{gene['seq_region_name']}:{gene['start']}-{gene['end']}")
|
|
165
|
+
|
|
166
|
+
seq = requests.get(f"{BASE}/sequence/id/{gene['id']}?type=cds", headers=HEADERS).json()
|
|
167
|
+
print(f"CDS length: {len(seq['seq'])} bp")
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Batch ID Lookup (POST)
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
import requests
|
|
174
|
+
|
|
175
|
+
ids = ["ENSG00000012048", "ENSG00000141510", "ENSG00000157764"] # BRCA1, TP53, BRAF
|
|
176
|
+
resp = requests.post(
|
|
177
|
+
"https://rest.ensembl.org/lookup/id",
|
|
178
|
+
headers={"Content-Type": "application/json", "Accept": "application/json"},
|
|
179
|
+
json={"ids": ids}
|
|
180
|
+
)
|
|
181
|
+
for ens_id, info in resp.json().items():
|
|
182
|
+
print(f"{info['display_name']:10s} chr{info['seq_region_name']}:{info['start']}-{info['end']}")
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Variant Annotation Pipeline
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
import requests
|
|
189
|
+
|
|
190
|
+
for rsid in ["rs699", "rs1042522", "rs334"]:
|
|
191
|
+
v = requests.get(
|
|
192
|
+
f"https://rest.ensembl.org/variation/homo_sapiens/{rsid}",
|
|
193
|
+
headers={"Content-Type": "application/json"}
|
|
194
|
+
).json()
|
|
195
|
+
loc = v["mappings"][0]["location"] if v.get("mappings") else "N/A"
|
|
196
|
+
print(f"{v['name']:12s} {v['var_class']:5s} {v['most_severe_consequence']:25s} {loc}")
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## References
|
|
200
|
+
|
|
201
|
+
- REST API docs: https://rest.ensembl.org/documentation
|
|
202
|
+
- Ensembl browser: https://www.ensembl.org
|
|
203
|
+
- gget toolkit (built on Ensembl REST): https://pachterlab.github.io/gget/
|
|
204
|
+
- GRCh37 archive API: https://grch37.rest.ensembl.org
|