@wentorai/research-plugins 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +204 -0
- package/curated/analysis/README.md +64 -0
- package/curated/domains/README.md +104 -0
- package/curated/literature/README.md +53 -0
- package/curated/research/README.md +62 -0
- package/curated/tools/README.md +87 -0
- package/curated/writing/README.md +61 -0
- package/index.ts +39 -0
- package/mcp-configs/academic-db/ChatSpatial.json +17 -0
- package/mcp-configs/academic-db/academia-mcp.json +17 -0
- package/mcp-configs/academic-db/academic-paper-explorer.json +17 -0
- package/mcp-configs/academic-db/academic-search-mcp-server.json +17 -0
- package/mcp-configs/academic-db/agentinterviews-mcp.json +17 -0
- package/mcp-configs/academic-db/all-in-mcp.json +17 -0
- package/mcp-configs/academic-db/apple-health-mcp.json +17 -0
- package/mcp-configs/academic-db/arxiv-latex-mcp.json +17 -0
- package/mcp-configs/academic-db/arxiv-mcp-server.json +17 -0
- package/mcp-configs/academic-db/bgpt-mcp.json +17 -0
- package/mcp-configs/academic-db/biomcp.json +17 -0
- package/mcp-configs/academic-db/biothings-mcp.json +17 -0
- package/mcp-configs/academic-db/catalysishub-mcp-server.json +17 -0
- package/mcp-configs/academic-db/clinicaltrialsgov-mcp-server.json +17 -0
- package/mcp-configs/academic-db/deep-research-mcp.json +17 -0
- package/mcp-configs/academic-db/dicom-mcp.json +17 -0
- package/mcp-configs/academic-db/enrichr-mcp-server.json +17 -0
- package/mcp-configs/academic-db/fec-mcp-server.json +17 -0
- package/mcp-configs/academic-db/fhir-mcp-server-themomentum.json +17 -0
- package/mcp-configs/academic-db/fhir-mcp.json +19 -0
- package/mcp-configs/academic-db/gget-mcp.json +17 -0
- package/mcp-configs/academic-db/google-researcher-mcp.json +17 -0
- package/mcp-configs/academic-db/idea-reality-mcp.json +17 -0
- package/mcp-configs/academic-db/legiscan-mcp.json +19 -0
- package/mcp-configs/academic-db/lex.json +17 -0
- package/mcp-configs/ai-platform/Adaptive-Graph-of-Thoughts-MCP-server.json +17 -0
- package/mcp-configs/ai-platform/ai-counsel.json +17 -0
- package/mcp-configs/ai-platform/atlas-mcp-server.json +17 -0
- package/mcp-configs/ai-platform/counsel-mcp.json +17 -0
- package/mcp-configs/ai-platform/cross-llm-mcp.json +17 -0
- package/mcp-configs/ai-platform/gptr-mcp.json +17 -0
- package/mcp-configs/browser/decipher-research-agent.json +17 -0
- package/mcp-configs/browser/deep-research.json +17 -0
- package/mcp-configs/browser/everything-claude-code.json +17 -0
- package/mcp-configs/browser/gpt-researcher.json +17 -0
- package/mcp-configs/browser/heurist-agent-framework.json +17 -0
- package/mcp-configs/data-platform/4everland-hosting-mcp.json +17 -0
- package/mcp-configs/data-platform/context-keeper.json +17 -0
- package/mcp-configs/data-platform/context7.json +19 -0
- package/mcp-configs/data-platform/contextstream-mcp.json +17 -0
- package/mcp-configs/data-platform/email-mcp.json +17 -0
- package/mcp-configs/note-knowledge/ApeRAG.json +17 -0
- package/mcp-configs/note-knowledge/In-Memoria.json +17 -0
- package/mcp-configs/note-knowledge/agent-memory.json +17 -0
- package/mcp-configs/note-knowledge/aimemo.json +17 -0
- package/mcp-configs/note-knowledge/biel-mcp.json +19 -0
- package/mcp-configs/note-knowledge/cognee.json +17 -0
- package/mcp-configs/note-knowledge/context-awesome.json +17 -0
- package/mcp-configs/note-knowledge/context-mcp.json +17 -0
- package/mcp-configs/note-knowledge/conversation-handoff-mcp.json +17 -0
- package/mcp-configs/note-knowledge/cortex.json +17 -0
- package/mcp-configs/note-knowledge/devrag.json +17 -0
- package/mcp-configs/note-knowledge/easy-obsidian-mcp.json +17 -0
- package/mcp-configs/note-knowledge/engram.json +17 -0
- package/mcp-configs/note-knowledge/gnosis-mcp.json +17 -0
- package/mcp-configs/note-knowledge/graphlit-mcp-server.json +19 -0
- package/mcp-configs/reference-mgr/arxiv-cli.json +17 -0
- package/mcp-configs/reference-mgr/arxiv-search-mcp.json +17 -0
- package/mcp-configs/reference-mgr/chiken.json +17 -0
- package/mcp-configs/reference-mgr/claude-scholar.json +17 -0
- package/mcp-configs/reference-mgr/devonthink-mcp.json +17 -0
- package/mcp-configs/registry.json +447 -0
- package/openclaw.plugin.json +21 -0
- package/package.json +61 -0
- package/skills/analysis/dataviz/color-accessibility-guide/SKILL.md +230 -0
- package/skills/analysis/dataviz/geospatial-viz-guide/SKILL.md +218 -0
- package/skills/analysis/dataviz/interactive-viz-guide/SKILL.md +287 -0
- package/skills/analysis/dataviz/network-visualization-guide/SKILL.md +195 -0
- package/skills/analysis/dataviz/publication-figures-guide/SKILL.md +238 -0
- package/skills/analysis/dataviz/python-dataviz-guide/SKILL.md +195 -0
- package/skills/analysis/econometrics/causal-inference-guide/SKILL.md +197 -0
- package/skills/analysis/econometrics/iv-regression-guide/SKILL.md +198 -0
- package/skills/analysis/econometrics/panel-data-guide/SKILL.md +274 -0
- package/skills/analysis/econometrics/robustness-checks/SKILL.md +250 -0
- package/skills/analysis/econometrics/stata-regression/SKILL.md +117 -0
- package/skills/analysis/econometrics/time-series-guide/SKILL.md +235 -0
- package/skills/analysis/statistics/bayesian-statistics-guide/SKILL.md +221 -0
- package/skills/analysis/statistics/hypothesis-testing-guide/SKILL.md +210 -0
- package/skills/analysis/statistics/meta-analysis-guide/SKILL.md +206 -0
- package/skills/analysis/statistics/nonparametric-tests-guide/SKILL.md +221 -0
- package/skills/analysis/statistics/power-analysis-guide/SKILL.md +240 -0
- package/skills/analysis/statistics/sem-guide/SKILL.md +231 -0
- package/skills/analysis/statistics/survival-analysis-guide/SKILL.md +195 -0
- package/skills/analysis/wrangling/missing-data-handling/SKILL.md +224 -0
- package/skills/analysis/wrangling/pandas-data-wrangling/SKILL.md +242 -0
- package/skills/analysis/wrangling/questionnaire-design-guide/SKILL.md +234 -0
- package/skills/analysis/wrangling/text-mining-guide/SKILL.md +225 -0
- package/skills/domains/ai-ml/computer-vision-guide/SKILL.md +213 -0
- package/skills/domains/ai-ml/deep-learning-papers-guide/SKILL.md +200 -0
- package/skills/domains/ai-ml/llm-evaluation-guide/SKILL.md +194 -0
- package/skills/domains/ai-ml/prompt-engineering-research/SKILL.md +233 -0
- package/skills/domains/ai-ml/reinforcement-learning-guide/SKILL.md +254 -0
- package/skills/domains/ai-ml/transformer-architecture-guide/SKILL.md +233 -0
- package/skills/domains/biomedical/clinical-research-guide/SKILL.md +232 -0
- package/skills/domains/biomedical/clinicaltrials-api/SKILL.md +177 -0
- package/skills/domains/biomedical/epidemiology-guide/SKILL.md +200 -0
- package/skills/domains/biomedical/genomics-analysis-guide/SKILL.md +270 -0
- package/skills/domains/business/market-analysis-guide/SKILL.md +112 -0
- package/skills/domains/business/strategic-management-guide/SKILL.md +154 -0
- package/skills/domains/chemistry/computational-chemistry-guide/SKILL.md +266 -0
- package/skills/domains/chemistry/retrosynthesis-guide/SKILL.md +215 -0
- package/skills/domains/cs/algorithms-complexity-guide/SKILL.md +194 -0
- package/skills/domains/cs/dblp-api/SKILL.md +129 -0
- package/skills/domains/cs/software-engineering-research/SKILL.md +218 -0
- package/skills/domains/ecology/biodiversity-data-guide/SKILL.md +296 -0
- package/skills/domains/ecology/conservation-biology-guide/SKILL.md +198 -0
- package/skills/domains/ecology/gbif-api/SKILL.md +158 -0
- package/skills/domains/ecology/inaturalist-api/SKILL.md +173 -0
- package/skills/domains/economics/behavioral-economics-guide/SKILL.md +239 -0
- package/skills/domains/economics/development-economics-guide/SKILL.md +181 -0
- package/skills/domains/economics/fred-api/SKILL.md +189 -0
- package/skills/domains/education/curriculum-design-guide/SKILL.md +144 -0
- package/skills/domains/education/learning-science-guide/SKILL.md +150 -0
- package/skills/domains/finance/financial-data-analysis/SKILL.md +152 -0
- package/skills/domains/finance/quantitative-finance-guide/SKILL.md +151 -0
- package/skills/domains/geoscience/climate-science-guide/SKILL.md +158 -0
- package/skills/domains/geoscience/gis-remote-sensing-guide/SKILL.md +129 -0
- package/skills/domains/humanities/digital-humanities-guide/SKILL.md +181 -0
- package/skills/domains/humanities/philosophy-research-guide/SKILL.md +148 -0
- package/skills/domains/law/courtlistener-api/SKILL.md +213 -0
- package/skills/domains/law/legal-research-guide/SKILL.md +250 -0
- package/skills/domains/math/linear-algebra-applications/SKILL.md +227 -0
- package/skills/domains/math/numerical-methods-guide/SKILL.md +236 -0
- package/skills/domains/math/oeis-api/SKILL.md +158 -0
- package/skills/domains/pharma/clinical-pharmacology-guide/SKILL.md +165 -0
- package/skills/domains/pharma/drug-development-guide/SKILL.md +177 -0
- package/skills/domains/physics/computational-physics-guide/SKILL.md +300 -0
- package/skills/domains/physics/nasa-ads-api/SKILL.md +150 -0
- package/skills/domains/physics/quantum-computing-guide/SKILL.md +234 -0
- package/skills/domains/social-science/social-research-methods/SKILL.md +194 -0
- package/skills/domains/social-science/survey-research-guide/SKILL.md +182 -0
- package/skills/literature/discovery/citation-alert-guide/SKILL.md +154 -0
- package/skills/literature/discovery/conference-proceedings-guide/SKILL.md +142 -0
- package/skills/literature/discovery/literature-mapping-guide/SKILL.md +175 -0
- package/skills/literature/discovery/paper-tracking-guide/SKILL.md +211 -0
- package/skills/literature/discovery/rss-paper-feeds/SKILL.md +214 -0
- package/skills/literature/discovery/semantic-scholar-recs-guide/SKILL.md +164 -0
- package/skills/literature/fulltext/doaj-api/SKILL.md +120 -0
- package/skills/literature/fulltext/interlibrary-loan-guide/SKILL.md +163 -0
- package/skills/literature/fulltext/open-access-guide/SKILL.md +183 -0
- package/skills/literature/fulltext/pmc-oai-api/SKILL.md +184 -0
- package/skills/literature/fulltext/preprint-servers-guide/SKILL.md +128 -0
- package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +207 -0
- package/skills/literature/fulltext/unpaywall-api/SKILL.md +113 -0
- package/skills/literature/metadata/altmetrics-guide/SKILL.md +132 -0
- package/skills/literature/metadata/citation-network-guide/SKILL.md +236 -0
- package/skills/literature/metadata/crossref-api/SKILL.md +133 -0
- package/skills/literature/metadata/datacite-api/SKILL.md +126 -0
- package/skills/literature/metadata/doi-resolution-guide/SKILL.md +168 -0
- package/skills/literature/metadata/h-index-guide/SKILL.md +183 -0
- package/skills/literature/metadata/journal-metrics-guide/SKILL.md +188 -0
- package/skills/literature/metadata/opencitations-api/SKILL.md +128 -0
- package/skills/literature/metadata/orcid-api/SKILL.md +136 -0
- package/skills/literature/metadata/orcid-integration-guide/SKILL.md +178 -0
- package/skills/literature/search/arxiv-api/SKILL.md +95 -0
- package/skills/literature/search/biorxiv-api/SKILL.md +123 -0
- package/skills/literature/search/boolean-search-guide/SKILL.md +199 -0
- package/skills/literature/search/citation-chaining-guide/SKILL.md +148 -0
- package/skills/literature/search/database-comparison-guide/SKILL.md +100 -0
- package/skills/literature/search/europe-pmc-api/SKILL.md +120 -0
- package/skills/literature/search/google-scholar-guide/SKILL.md +182 -0
- package/skills/literature/search/mesh-terms-guide/SKILL.md +164 -0
- package/skills/literature/search/openalex-api/SKILL.md +134 -0
- package/skills/literature/search/pubmed-api/SKILL.md +130 -0
- package/skills/literature/search/scientify-literature-survey/SKILL.md +203 -0
- package/skills/literature/search/semantic-scholar-api/SKILL.md +134 -0
- package/skills/literature/search/systematic-search-strategy/SKILL.md +214 -0
- package/skills/research/automation/ai-scientist-guide/SKILL.md +228 -0
- package/skills/research/automation/data-collection-automation/SKILL.md +248 -0
- package/skills/research/automation/research-workflow-automation/SKILL.md +266 -0
- package/skills/research/deep-research/meta-synthesis-guide/SKILL.md +174 -0
- package/skills/research/deep-research/research-cog/SKILL.md +153 -0
- package/skills/research/deep-research/scoping-review-guide/SKILL.md +217 -0
- package/skills/research/deep-research/systematic-review-guide/SKILL.md +250 -0
- package/skills/research/funding/figshare-api/SKILL.md +163 -0
- package/skills/research/funding/grant-writing-guide/SKILL.md +233 -0
- package/skills/research/funding/nsf-grant-guide/SKILL.md +206 -0
- package/skills/research/funding/open-science-guide/SKILL.md +255 -0
- package/skills/research/funding/zenodo-api/SKILL.md +174 -0
- package/skills/research/methodology/action-research-guide/SKILL.md +201 -0
- package/skills/research/methodology/experimental-design-guide/SKILL.md +236 -0
- package/skills/research/methodology/grad-school-guide/SKILL.md +182 -0
- package/skills/research/methodology/grounded-theory-guide/SKILL.md +171 -0
- package/skills/research/methodology/mixed-methods-guide/SKILL.md +208 -0
- package/skills/research/methodology/qualitative-research-guide/SKILL.md +234 -0
- package/skills/research/methodology/scientify-idea-generation/SKILL.md +222 -0
- package/skills/research/paper-review/paper-reading-assistant/SKILL.md +266 -0
- package/skills/research/paper-review/peer-review-guide/SKILL.md +227 -0
- package/skills/research/paper-review/rebuttal-writing-guide/SKILL.md +185 -0
- package/skills/research/paper-review/scientify-write-review-paper/SKILL.md +209 -0
- package/skills/tools/code-exec/jupyter-notebook-guide/SKILL.md +178 -0
- package/skills/tools/code-exec/python-reproducibility-guide/SKILL.md +341 -0
- package/skills/tools/code-exec/r-reproducibility-guide/SKILL.md +236 -0
- package/skills/tools/code-exec/sandbox-execution-guide/SKILL.md +221 -0
- package/skills/tools/diagram/mermaid-diagram-guide/SKILL.md +269 -0
- package/skills/tools/diagram/plantuml-guide/SKILL.md +397 -0
- package/skills/tools/diagram/scientific-illustration-guide/SKILL.md +225 -0
- package/skills/tools/document/anystyle-api/SKILL.md +199 -0
- package/skills/tools/document/grobid-pdf-parsing/SKILL.md +294 -0
- package/skills/tools/document/markdown-academic-guide/SKILL.md +217 -0
- package/skills/tools/document/pdf-extraction-guide/SKILL.md +321 -0
- package/skills/tools/knowledge-graph/knowledge-graph-construction/SKILL.md +306 -0
- package/skills/tools/knowledge-graph/ontology-design-guide/SKILL.md +214 -0
- package/skills/tools/knowledge-graph/rag-methodology-guide/SKILL.md +325 -0
- package/skills/tools/ocr-translate/formula-recognition-guide/SKILL.md +367 -0
- package/skills/tools/ocr-translate/handwriting-recognition-guide/SKILL.md +211 -0
- package/skills/tools/ocr-translate/latex-ocr-guide/SKILL.md +204 -0
- package/skills/tools/ocr-translate/multilingual-research-guide/SKILL.md +234 -0
- package/skills/tools/scraping/academic-web-scraping/SKILL.md +326 -0
- package/skills/tools/scraping/api-data-collection-guide/SKILL.md +301 -0
- package/skills/tools/scraping/web-scraping-ethics-guide/SKILL.md +250 -0
- package/skills/writing/citation/bibtex-management-guide/SKILL.md +246 -0
- package/skills/writing/citation/citation-style-guide/SKILL.md +248 -0
- package/skills/writing/citation/reference-manager-comparison/SKILL.md +208 -0
- package/skills/writing/citation/zotero-api/SKILL.md +188 -0
- package/skills/writing/composition/abstract-writing-guide/SKILL.md +188 -0
- package/skills/writing/composition/discussion-writing-guide/SKILL.md +194 -0
- package/skills/writing/composition/introduction-writing-guide/SKILL.md +194 -0
- package/skills/writing/composition/literature-review-writing/SKILL.md +196 -0
- package/skills/writing/composition/methods-section-guide/SKILL.md +185 -0
- package/skills/writing/composition/response-to-reviewers/SKILL.md +215 -0
- package/skills/writing/composition/scientific-writing-guide/SKILL.md +152 -0
- package/skills/writing/latex/bibliography-management-guide/SKILL.md +206 -0
- package/skills/writing/latex/latex-drawing-guide/SKILL.md +234 -0
- package/skills/writing/latex/latex-ecosystem-guide/SKILL.md +240 -0
- package/skills/writing/latex/math-typesetting-guide/SKILL.md +231 -0
- package/skills/writing/latex/overleaf-collaboration-guide/SKILL.md +211 -0
- package/skills/writing/latex/tikz-diagrams-guide/SKILL.md +211 -0
- package/skills/writing/polish/academic-translation-guide/SKILL.md +175 -0
- package/skills/writing/polish/academic-writing-refiner/SKILL.md +143 -0
- package/skills/writing/polish/ai-writing-humanizer/SKILL.md +178 -0
- package/skills/writing/polish/grammar-checker-guide/SKILL.md +184 -0
- package/skills/writing/polish/plagiarism-detection-guide/SKILL.md +167 -0
- package/skills/writing/templates/beamer-presentation-guide/SKILL.md +263 -0
- package/skills/writing/templates/conference-paper-template/SKILL.md +219 -0
- package/skills/writing/templates/thesis-template-guide/SKILL.md +200 -0
- package/skills/writing/templates/thesis-writing-guide/SKILL.md +220 -0
- package/src/tools/arxiv.ts +131 -0
- package/src/tools/crossref.ts +112 -0
- package/src/tools/openalex.ts +174 -0
- package/src/tools/pubmed.ts +166 -0
- package/src/tools/semantic-scholar.ts +108 -0
- package/src/tools/unpaywall.ts +58 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: doaj-api
|
|
3
|
+
description: "Search open access journals and articles in the DOAJ directory"
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "🔍"
|
|
7
|
+
category: "literature"
|
|
8
|
+
subcategory: "fulltext"
|
|
9
|
+
keywords: ["full-text retrieval", "open access", "open access rights", "scholarly database"]
|
|
10
|
+
source: "https://doaj.org/api/docs"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# DOAJ API Guide
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
The Directory of Open Access Journals (DOAJ) is a community-curated online directory that indexes and provides access to high quality, open access, peer-reviewed journals. Founded in 2003, DOAJ currently indexes over 20,000 journals and 9 million articles from 130 countries, covering all areas of science, technology, medicine, social sciences, arts, and humanities.
|
|
18
|
+
|
|
19
|
+
DOAJ serves as a quality filter for open access publishing. Journals must meet strict criteria to be included, including editorial review, transparent policies, and adherence to open access principles. This makes DOAJ particularly valuable for researchers who need to verify whether a journal is a legitimate open access outlet, librarians curating discovery systems, and developers building tools that surface OA content.
|
|
20
|
+
|
|
21
|
+
The DOAJ API provides free, unauthenticated access to search and retrieve journal and article metadata. All data is available under a CC BY-SA license. The API returns JSON and supports Elasticsearch-style queries for advanced filtering.
|
|
22
|
+
|
|
23
|
+
## Authentication
|
|
24
|
+
|
|
25
|
+
No authentication required. The DOAJ API is fully open and free to use. No API key, registration, or email is needed. There are no published rate limits, but users should be respectful and avoid sending excessive concurrent requests. For bulk data access, DOAJ provides data dumps at https://doaj.org/docs/public-data-dump/.
|
|
26
|
+
|
|
27
|
+
## Core Endpoints
|
|
28
|
+
|
|
29
|
+
### Article Search: Find Open Access Articles
|
|
30
|
+
|
|
31
|
+
- **URL**: `GET https://doaj.org/api/search/articles/{search_query}`
|
|
32
|
+
- **Parameters**:
|
|
33
|
+
| Param | Type | Required | Description |
|
|
34
|
+
|-------|------|----------|-------------|
|
|
35
|
+
| search_query | string | Yes | Search query (URL path parameter, supports field-specific search) |
|
|
36
|
+
| page | integer | No | Page number (default: 1) |
|
|
37
|
+
| pageSize | integer | No | Results per page (default: 10, max: 100) |
|
|
38
|
+
| sort | string | No | Sort field (e.g., created_date:desc) |
|
|
39
|
+
- **Example**:
|
|
40
|
+
```bash
|
|
41
|
+
curl "https://doaj.org/api/search/articles/climate+change?page=1&pageSize=10"
|
|
42
|
+
```
|
|
43
|
+
- **Response**: JSON with `total` count and `results` array. Each result contains `bibjson` with `title`, `abstract`, `author`, `journal.title`, `identifier` (DOI, ISSN), `link` (full-text URL), `year`, `month`, `keywords`, and `subject`.
|
|
44
|
+
|
|
45
|
+
### Journal Search: Find Open Access Journals
|
|
46
|
+
|
|
47
|
+
- **URL**: `GET https://doaj.org/api/search/journals/{search_query}`
|
|
48
|
+
- **Parameters**:
|
|
49
|
+
| Param | Type | Required | Description |
|
|
50
|
+
|-------|------|----------|-------------|
|
|
51
|
+
| search_query | string | Yes | Journal search query |
|
|
52
|
+
| page | integer | No | Page number (default: 1) |
|
|
53
|
+
| pageSize | integer | No | Results per page (default: 10, max: 100) |
|
|
54
|
+
- **Example**:
|
|
55
|
+
```bash
|
|
56
|
+
curl "https://doaj.org/api/search/journals/bioinformatics?page=1&pageSize=5"
|
|
57
|
+
```
|
|
58
|
+
- **Response**: JSON with journal records containing `bibjson` with `title`, `alternative_title`, `identifier` (ISSN, EISSN), `publisher`, `institution`, `subject`, `license`, `apc` (article processing charge info), `language`, and `editorial.review_process`.
|
|
59
|
+
|
|
60
|
+
### Article by DOI: Direct Lookup
|
|
61
|
+
|
|
62
|
+
- **URL**: `GET https://doaj.org/api/search/articles/doi:{doi}`
|
|
63
|
+
- **Parameters**:
|
|
64
|
+
| Param | Type | Required | Description |
|
|
65
|
+
|-------|------|----------|-------------|
|
|
66
|
+
| doi | string | Yes | The DOI to look up |
|
|
67
|
+
- **Example**:
|
|
68
|
+
```bash
|
|
69
|
+
curl "https://doaj.org/api/search/articles/doi:10.1371/journal.pone.0213676"
|
|
70
|
+
```
|
|
71
|
+
- **Response**: JSON with matching article records from DOAJ-indexed journals.
|
|
72
|
+
|
|
73
|
+
### Journal by ISSN: Direct Lookup
|
|
74
|
+
|
|
75
|
+
- **URL**: `GET https://doaj.org/api/search/journals/issn:{issn}`
|
|
76
|
+
- **Parameters**:
|
|
77
|
+
| Param | Type | Required | Description |
|
|
78
|
+
|-------|------|----------|-------------|
|
|
79
|
+
| issn | string | Yes | The ISSN to look up |
|
|
80
|
+
- **Example**:
|
|
81
|
+
```bash
|
|
82
|
+
curl "https://doaj.org/api/search/journals/issn:1932-6203"
|
|
83
|
+
```
|
|
84
|
+
- **Response**: JSON with journal record including full metadata and policies.
|
|
85
|
+
|
|
86
|
+
## Rate Limits
|
|
87
|
+
|
|
88
|
+
No published rate limits. DOAJ does not enforce strict API quotas. However, the service is maintained by a small nonprofit team. Best practices include limiting requests to a reasonable rate (2-5 per second), caching results, and using the public data dump for large-scale analyses. Abusive usage may result in IP blocking without notice.
|
|
89
|
+
|
|
90
|
+
## Common Patterns
|
|
91
|
+
|
|
92
|
+
### Verify Journal Quality
|
|
93
|
+
|
|
94
|
+
Check if a journal is indexed in DOAJ (a proxy for quality and legitimacy):
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
curl -s "https://doaj.org/api/search/journals/issn:2045-2322" | jq '{total: .total, title: .results[0].bibjson.title, publisher: .results[0].bibjson.publisher.name, license: .results[0].bibjson.license[0].type}'
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Find OA Articles in a Subject Area
|
|
101
|
+
|
|
102
|
+
Search for articles within a specific discipline with full-text links:
|
|
103
|
+
|
|
104
|
+
```bash
|
|
105
|
+
curl -s "https://doaj.org/api/search/articles/bibjson.subject.term:neuroscience?pageSize=20" | jq '.results[] | {title: .bibjson.title, journal: .bibjson.journal.title, url: .bibjson.link[0].url, year: .bibjson.year}'
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Check APC (Article Processing Charge) Information
|
|
109
|
+
|
|
110
|
+
Determine if a journal charges fees for publishing:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
curl -s "https://doaj.org/api/search/journals/issn:2041-1723" | jq '.results[0].bibjson | {title: .title, has_apc: .apc.has_apc, apc_amount: .apc.max[0].price, currency: .apc.max[0].currency}'
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## References
|
|
117
|
+
|
|
118
|
+
- Official documentation: https://doaj.org/api/docs
|
|
119
|
+
- DOAJ public data dump: https://doaj.org/docs/public-data-dump/
|
|
120
|
+
- DOAJ journal application criteria: https://doaj.org/apply/guide/
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: interlibrary-loan-guide
|
|
3
|
+
description: "Access papers through interlibrary loan and document delivery services"
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "books"
|
|
7
|
+
category: "literature"
|
|
8
|
+
subcategory: "fulltext"
|
|
9
|
+
keywords: ["interlibrary loan", "ILL", "document delivery", "full text access", "library services"]
|
|
10
|
+
source: "wentor-research-plugins"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Interlibrary Loan Guide
|
|
14
|
+
|
|
15
|
+
A skill for accessing research papers and books through interlibrary loan (ILL) and document delivery services when your institution does not have a subscription. Covers ILL workflows, alternative free access methods, and strategies for rapid document retrieval.
|
|
16
|
+
|
|
17
|
+
## Understanding Interlibrary Loan
|
|
18
|
+
|
|
19
|
+
### What Is ILL?
|
|
20
|
+
|
|
21
|
+
Interlibrary loan is a service where your library borrows materials from another library on your behalf. Most academic libraries offer ILL free of charge to their students, faculty, and staff. Turnaround time is typically 1-7 business days for articles and 1-3 weeks for books.
|
|
22
|
+
|
|
23
|
+
### Types of Requests
|
|
24
|
+
|
|
25
|
+
```
|
|
26
|
+
Article/Chapter Request:
|
|
27
|
+
- You receive a digital scan (PDF) of the article
|
|
28
|
+
- Usually delivered to your email or ILL portal
|
|
29
|
+
- Turnaround: 1-5 business days
|
|
30
|
+
- Typically free
|
|
31
|
+
|
|
32
|
+
Book Loan:
|
|
33
|
+
- Physical book is shipped from another library
|
|
34
|
+
- Must be returned by a due date (usually 3-6 weeks)
|
|
35
|
+
- Turnaround: 5-15 business days
|
|
36
|
+
- May have a small shipping fee
|
|
37
|
+
|
|
38
|
+
Thesis/Dissertation:
|
|
39
|
+
- Some are available digitally via ProQuest or institutional repositories
|
|
40
|
+
- Others must be requested as physical loans or scans
|
|
41
|
+
- Turnaround varies widely
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Step-by-Step ILL Process
|
|
45
|
+
|
|
46
|
+
### Submitting a Request
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
def prepare_ill_request(item_type: str, metadata: dict) -> dict:
|
|
50
|
+
"""
|
|
51
|
+
Prepare an interlibrary loan request with required information.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
item_type: 'article', 'book', or 'chapter'
|
|
55
|
+
metadata: Bibliographic information about the item
|
|
56
|
+
"""
|
|
57
|
+
required_fields = {
|
|
58
|
+
"article": [
|
|
59
|
+
"article_title", "journal_title", "author",
|
|
60
|
+
"year", "volume", "issue", "pages", "doi"
|
|
61
|
+
],
|
|
62
|
+
"book": [
|
|
63
|
+
"title", "author", "publisher", "year",
|
|
64
|
+
"isbn", "edition"
|
|
65
|
+
],
|
|
66
|
+
"chapter": [
|
|
67
|
+
"chapter_title", "book_title", "author",
|
|
68
|
+
"editor", "publisher", "year", "pages", "isbn"
|
|
69
|
+
]
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
request = {"type": item_type}
|
|
73
|
+
fields = required_fields.get(item_type, [])
|
|
74
|
+
|
|
75
|
+
for field in fields:
|
|
76
|
+
value = metadata.get(field, "")
|
|
77
|
+
request[field] = value
|
|
78
|
+
if not value:
|
|
79
|
+
request.setdefault("missing_fields", []).append(field)
|
|
80
|
+
|
|
81
|
+
if request.get("missing_fields"):
|
|
82
|
+
request["note"] = (
|
|
83
|
+
"Provide as many fields as possible. "
|
|
84
|
+
"DOI or PMID alone is often sufficient for articles."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
return request
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Typical Workflow
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
1. Verify your library does not have access
|
|
94
|
+
- Check library catalog and database A-Z list
|
|
95
|
+
- Try off-campus access via VPN or proxy
|
|
96
|
+
|
|
97
|
+
2. Gather bibliographic details
|
|
98
|
+
- Title, author, journal/book, year, DOI or ISBN
|
|
99
|
+
- The more detail you provide, the faster the request is filled
|
|
100
|
+
|
|
101
|
+
3. Submit request through your library's ILL system
|
|
102
|
+
- Common systems: ILLiad, Tipasa, OCLC WorldShare
|
|
103
|
+
- Usually accessible from your library's website under "Interlibrary Loan"
|
|
104
|
+
|
|
105
|
+
4. Wait for delivery
|
|
106
|
+
- Articles: PDF delivered to your email or ILL portal
|
|
107
|
+
- Books: Pick up at the library circulation desk
|
|
108
|
+
|
|
109
|
+
5. Return books by the due date
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Free Alternatives Before Requesting ILL
|
|
113
|
+
|
|
114
|
+
### Check These Sources First
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
1. Open Access repositories:
|
|
118
|
+
- PubMed Central (PMC) for NIH-funded biomedical research
|
|
119
|
+
- arXiv, bioRxiv, medRxiv for preprints
|
|
120
|
+
- SSRN for social science and economics working papers
|
|
121
|
+
- Institutional repositories (search via BASE or OpenDOAR)
|
|
122
|
+
|
|
123
|
+
2. Author contact:
|
|
124
|
+
- Email the corresponding author requesting a copy
|
|
125
|
+
- Check the author's personal or lab website for PDFs
|
|
126
|
+
- ResearchGate: request full text from the author
|
|
127
|
+
|
|
128
|
+
3. Legal free access tools:
|
|
129
|
+
- Unpaywall browser extension (finds legal OA copies)
|
|
130
|
+
- CORE.ac.uk (aggregates open access research)
|
|
131
|
+
- Google Scholar: click "PDF" links on the right side
|
|
132
|
+
|
|
133
|
+
4. Your institution:
|
|
134
|
+
- Try different databases (your library may have access via
|
|
135
|
+
a different provider)
|
|
136
|
+
- Ask a librarian: they know about access paths you may miss
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Document Delivery Services
|
|
140
|
+
|
|
141
|
+
### Commercial Alternatives
|
|
142
|
+
|
|
143
|
+
When ILL is too slow or unavailable, commercial document delivery services can provide articles within hours:
|
|
144
|
+
|
|
145
|
+
| Service | Turnaround | Typical Cost |
|
|
146
|
+
|---------|-----------|-------------|
|
|
147
|
+
| British Library Document Supply | 1-2 days | Varies by country |
|
|
148
|
+
| Reprints Desk | Same day to 48 hours | Per-article fee |
|
|
149
|
+
| Copyright Clearance Center (Get It Now) | Minutes to hours | Per-article fee |
|
|
150
|
+
| DeepDyve | Instant (rental model) | Monthly subscription |
|
|
151
|
+
|
|
152
|
+
### When to Use Document Delivery vs. ILL
|
|
153
|
+
|
|
154
|
+
- Use ILL for non-urgent requests (1+ week lead time is acceptable)
|
|
155
|
+
- Use document delivery when you need the article within 24 hours
|
|
156
|
+
- Use document delivery when your institution has no ILL service (e.g., independent researchers)
|
|
157
|
+
|
|
158
|
+
## Tips for Efficient Access
|
|
159
|
+
|
|
160
|
+
- Keep a reference manager (Zotero, Mendeley) to avoid requesting the same paper twice
|
|
161
|
+
- Batch ILL requests: submit several at once during literature review phases
|
|
162
|
+
- Build relationships with your subject librarian -- they can often expedite requests or suggest alternative access routes
|
|
163
|
+
- For systematic reviews, inform your library early about the volume of ILL requests you will need
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: open-access-guide
|
|
3
|
+
description: "Navigate open access policies, repositories, and legal full-text retrieval me..."
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "unlock"
|
|
7
|
+
category: "literature"
|
|
8
|
+
subcategory: "fulltext"
|
|
9
|
+
keywords: ["open access", "full-text retrieval", "journal copyright policy", "self-archiving", "open access rights"]
|
|
10
|
+
source: "wentor"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# Open Access Guide
|
|
14
|
+
|
|
15
|
+
A skill for understanding open access publishing models, locating free full-text articles legally, and navigating self-archiving policies. Essential for researchers at institutions with limited journal subscriptions.
|
|
16
|
+
|
|
17
|
+
## Open Access Models
|
|
18
|
+
|
|
19
|
+
### Types of Open Access
|
|
20
|
+
|
|
21
|
+
| Type | Description | Cost to Author | Reader Access |
|
|
22
|
+
|------|------------|----------------|---------------|
|
|
23
|
+
| Gold OA | Published OA by journal (APC paid) | $1,000-$11,000 | Immediate, permanent |
|
|
24
|
+
| Green OA | Self-archived preprint/postprint | Free | After embargo (0-24 months) |
|
|
25
|
+
| Diamond/Platinum OA | Journal charges no APC | Free | Immediate, permanent |
|
|
26
|
+
| Bronze OA | Free to read on publisher site | Free | No reuse license, may be temporary |
|
|
27
|
+
| Hybrid OA | OA article in subscription journal | $2,000-$5,000 | Immediate for that article |
|
|
28
|
+
|
|
29
|
+
### Checking OA Status
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import requests
|
|
33
|
+
|
|
34
|
+
def check_oa_status(doi: str) -> dict:
|
|
35
|
+
"""
|
|
36
|
+
Check open access availability using the Unpaywall API.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
doi: DOI of the paper (e.g., '10.1038/s41586-021-03819-2')
|
|
40
|
+
Returns:
|
|
41
|
+
OA status and best available link
|
|
42
|
+
"""
|
|
43
|
+
email = "researcher@university.edu" # Required by Unpaywall API
|
|
44
|
+
url = f"https://api.unpaywall.org/v2/{doi}?email={email}"
|
|
45
|
+
|
|
46
|
+
response = requests.get(url)
|
|
47
|
+
if response.status_code != 200:
|
|
48
|
+
return {'error': f'API returned status {response.status_code}'}
|
|
49
|
+
|
|
50
|
+
data = response.json()
|
|
51
|
+
|
|
52
|
+
# Find best OA location
|
|
53
|
+
best_location = data.get('best_oa_location', {})
|
|
54
|
+
|
|
55
|
+
return {
|
|
56
|
+
'doi': doi,
|
|
57
|
+
'title': data.get('title', ''),
|
|
58
|
+
'is_oa': data.get('is_oa', False),
|
|
59
|
+
'oa_status': data.get('oa_status', 'closed'),
|
|
60
|
+
'journal_is_oa': data.get('journal_is_oa', False),
|
|
61
|
+
'best_oa_url': best_location.get('url', None) if best_location else None,
|
|
62
|
+
'version': best_location.get('version', None) if best_location else None,
|
|
63
|
+
'license': best_location.get('license', None) if best_location else None,
|
|
64
|
+
'all_locations': len(data.get('oa_locations', []))
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Example
|
|
68
|
+
result = check_oa_status('10.1038/s41586-021-03819-2')
|
|
69
|
+
if result['is_oa']:
|
|
70
|
+
print(f"OA available: {result['best_oa_url']}")
|
|
71
|
+
else:
|
|
72
|
+
print("Not openly available -- check Green OA options below")
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Legal Full-Text Sources
|
|
76
|
+
|
|
77
|
+
### Repositories and Aggregators
|
|
78
|
+
|
|
79
|
+
| Source | Type | Coverage | URL |
|
|
80
|
+
|--------|------|----------|-----|
|
|
81
|
+
| PubMed Central (PMC) | Repository | Biomedical + life sciences | ncbi.nlm.nih.gov/pmc |
|
|
82
|
+
| arXiv | Preprint server | Physics, CS, Math, Stats | arxiv.org |
|
|
83
|
+
| bioRxiv/medRxiv | Preprint server | Biology, medicine | biorxiv.org / medrxiv.org |
|
|
84
|
+
| SSRN | Preprint server | Social sciences, law, economics | ssrn.com |
|
|
85
|
+
| Zenodo | Repository | All disciplines | zenodo.org |
|
|
86
|
+
| CORE | Aggregator | 300M+ papers from repositories | core.ac.uk |
|
|
87
|
+
| Semantic Scholar | Search + OA links | Cross-disciplinary | semanticscholar.org |
|
|
88
|
+
| BASE (Bielefeld) | Aggregator | 400M+ documents | base-search.net |
|
|
89
|
+
|
|
90
|
+
### Batch OA Lookup
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
def batch_oa_lookup(dois: list[str]) -> list[dict]:
|
|
94
|
+
"""
|
|
95
|
+
Check OA status for a batch of DOIs.
|
|
96
|
+
Unpaywall supports up to 100,000 DOIs per day.
|
|
97
|
+
"""
|
|
98
|
+
results = []
|
|
99
|
+
for doi in dois:
|
|
100
|
+
status = check_oa_status(doi)
|
|
101
|
+
results.append(status)
|
|
102
|
+
|
|
103
|
+
# Summary statistics
|
|
104
|
+
total = len(results)
|
|
105
|
+
oa_count = sum(1 for r in results if r.get('is_oa', False))
|
|
106
|
+
print(f"OA availability: {oa_count}/{total} ({oa_count/total*100:.1f}%)")
|
|
107
|
+
|
|
108
|
+
# Group by OA status
|
|
109
|
+
by_status = {}
|
|
110
|
+
for r in results:
|
|
111
|
+
status = r.get('oa_status', 'unknown')
|
|
112
|
+
by_status.setdefault(status, []).append(r)
|
|
113
|
+
|
|
114
|
+
for status, papers in by_status.items():
|
|
115
|
+
print(f" {status}: {len(papers)} papers")
|
|
116
|
+
|
|
117
|
+
return results
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Self-Archiving and Green OA
|
|
121
|
+
|
|
122
|
+
### Checking Publisher Policies
|
|
123
|
+
|
|
124
|
+
Use SHERPA/RoMEO to determine what you can self-archive:
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
def check_sherpa_romeo(issn: str, api_key: str) -> dict:
|
|
128
|
+
"""
|
|
129
|
+
Check journal self-archiving policy via SHERPA/RoMEO.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
issn: Journal ISSN
|
|
133
|
+
api_key: SHERPA/RoMEO API key
|
|
134
|
+
"""
|
|
135
|
+
url = f"https://v2.sherpa.ac.uk/cgi/retrieve/by_id?item-type=publication&format=Json&api-key={api_key}&filter=[[%22issn%22,%22equals%22,%22{issn}%22]]"
|
|
136
|
+
|
|
137
|
+
response = requests.get(url)
|
|
138
|
+
data = response.json()
|
|
139
|
+
|
|
140
|
+
if not data.get('items'):
|
|
141
|
+
return {'error': 'Journal not found'}
|
|
142
|
+
|
|
143
|
+
journal = data['items'][0]
|
|
144
|
+
policies = journal.get('publisher_policy', [])
|
|
145
|
+
|
|
146
|
+
results = {
|
|
147
|
+
'journal': journal.get('title', [{}])[0].get('title', ''),
|
|
148
|
+
'publisher': journal.get('publishers', [{}])[0].get('publisher', {}).get('name', ''),
|
|
149
|
+
'policies': []
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
for policy in policies:
|
|
153
|
+
for permitted in policy.get('permitted_oa', []):
|
|
154
|
+
results['policies'].append({
|
|
155
|
+
'version': permitted.get('article_version', ''),
|
|
156
|
+
'location': permitted.get('location', {}).get('location', []),
|
|
157
|
+
'conditions': permitted.get('conditions', []),
|
|
158
|
+
'embargo': permitted.get('embargo', {}).get('amount', 0),
|
|
159
|
+
'license': permitted.get('license', [])
|
|
160
|
+
})
|
|
161
|
+
|
|
162
|
+
return results
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Version Terminology
|
|
166
|
+
|
|
167
|
+
- **Preprint** (submitted manuscript): Author's version before peer review
|
|
168
|
+
- **Postprint** (accepted manuscript): Author's version after peer review, before typesetting
|
|
169
|
+
- **Published version** (Version of Record): Final published PDF with journal formatting
|
|
170
|
+
|
|
171
|
+
Most funders (NIH, UKRI, ERC) require deposit of at least the postprint in a repository. Always check your specific funder mandate and journal policy before self-archiving.
|
|
172
|
+
|
|
173
|
+
## Institutional Repository Deposit
|
|
174
|
+
|
|
175
|
+
When depositing in your institutional repository:
|
|
176
|
+
|
|
177
|
+
1. Identify the correct version (usually postprint for Green OA)
|
|
178
|
+
2. Check the embargo period from SHERPA/RoMEO
|
|
179
|
+
3. Add complete metadata: title, authors, DOI, journal, abstract, keywords
|
|
180
|
+
4. Apply the correct license (often CC BY for funder mandates)
|
|
181
|
+
5. Link to the publisher's Version of Record via DOI
|
|
182
|
+
|
|
183
|
+
This maximizes discoverability while respecting publisher agreements.
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: pmc-oai-api
|
|
3
|
+
description: "PubMed Central OAI-PMH metadata harvesting"
|
|
4
|
+
metadata:
|
|
5
|
+
openclaw:
|
|
6
|
+
emoji: "🔍"
|
|
7
|
+
category: "literature"
|
|
8
|
+
subcategory: "fulltext"
|
|
9
|
+
keywords: ["full-text retrieval", "open access", "PDF download", "preprint server"]
|
|
10
|
+
source: "https://pmc.ncbi.nlm.nih.gov/tools/oai/"
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
# PMC-OAI API Guide
|
|
14
|
+
|
|
15
|
+
## Overview
|
|
16
|
+
|
|
17
|
+
PubMed Central (PMC) is a free full-text archive of biomedical and life sciences journal literature at the U.S. National Institutes of Health's National Library of Medicine (NIH/NLM). The PMC OAI-PMH (Open Archives Initiative Protocol for Metadata Harvesting) service provides a standardized interface for systematically harvesting metadata and full-text content from the PMC archive.
|
|
18
|
+
|
|
19
|
+
The OAI-PMH protocol is an internationally recognized standard for metadata harvesting, widely used by libraries, repositories, and research infrastructure. The PMC implementation allows researchers to programmatically discover and retrieve article metadata, including titles, authors, abstracts, MeSH terms, publication dates, and links to full-text XML and PDF versions. This is particularly valuable for building local search indexes, systematic review pipelines, and text mining corpora.
|
|
20
|
+
|
|
21
|
+
Biomedical researchers, systematic reviewers, bioinformaticians, medical librarians, and text mining specialists use the PMC OAI-PMH service to harvest large collections of open-access biomedical literature for meta-analyses, natural language processing research, knowledge graph construction, and institutional repository enrichment. PMC contains over 9 million full-text articles, making it one of the largest open-access biomedical literature collections in the world.
|
|
22
|
+
|
|
23
|
+
## Authentication
|
|
24
|
+
|
|
25
|
+
No authentication required. The PMC OAI-PMH service is freely accessible without any API key, token, or registration. All requests are made via standard HTTP GET requests. However, users must comply with NCBI usage guidelines and respect the rate limits to ensure fair access for all users.
|
|
26
|
+
|
|
27
|
+
## Core Endpoints
|
|
28
|
+
|
|
29
|
+
### ListRecords: Harvest Article Metadata
|
|
30
|
+
|
|
31
|
+
Retrieve metadata records from PMC in bulk, with optional filtering by date range and metadata set. This is the primary endpoint for systematic metadata harvesting.
|
|
32
|
+
|
|
33
|
+
- **URL**: `GET https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/`
|
|
34
|
+
- **Parameters**:
|
|
35
|
+
|
|
36
|
+
| Parameter | Type | Required | Description |
|
|
37
|
+
|-----------------|--------|----------|---------------------------------------------------------|
|
|
38
|
+
| verb | string | Yes | Must be `ListRecords` |
|
|
39
|
+
| metadataPrefix | string | Yes | Metadata format: `oai_dc`, `pmc`, or `pmc_fm` |
|
|
40
|
+
| set | string | No | Filter by set (e.g., journal, open access subset) |
|
|
41
|
+
| from | string | No | Start date for selective harvesting (YYYY-MM-DD) |
|
|
42
|
+
| until | string | No | End date for selective harvesting (YYYY-MM-DD) |
|
|
43
|
+
| resumptionToken | string | No | Token for paginating through large result sets |
|
|
44
|
+
|
|
45
|
+
- **Example**:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Harvest recent open-access records in Dublin Core format
|
|
49
|
+
curl "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/?verb=ListRecords&metadataPrefix=oai_dc&from=2024-06-01&until=2024-06-07"
|
|
50
|
+
|
|
51
|
+
# Harvest PMC full metadata format
|
|
52
|
+
curl "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/?verb=ListRecords&metadataPrefix=pmc_fm&from=2024-06-01&until=2024-06-02"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
- **Response**: Returns XML containing `<record>` elements, each with `<header>` (identifier, datestamp, setSpec) and `<metadata>` (article title, creators, subjects, description, date, identifiers, rights). Includes a `<resumptionToken>` for fetching subsequent pages.
|
|
56
|
+
|
|
57
|
+
### GetRecord: Retrieve a Single Record
|
|
58
|
+
|
|
59
|
+
Fetch the complete metadata record for a specific PMC article by its OAI identifier.
|
|
60
|
+
|
|
61
|
+
- **URL**: `GET https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/`
|
|
62
|
+
- **Parameters**:
|
|
63
|
+
|
|
64
|
+
| Parameter | Type | Required | Description |
|
|
65
|
+
|-----------------|--------|----------|----------------------------------------------------|
|
|
66
|
+
| verb | string | Yes | Must be `GetRecord` |
|
|
67
|
+
| identifier | string | Yes | OAI identifier (e.g., `oai:pubmedcentral.nih.gov:1234567`) |
|
|
68
|
+
| metadataPrefix | string | Yes | Metadata format: `oai_dc`, `pmc`, or `pmc_fm` |
|
|
69
|
+
|
|
70
|
+
- **Example**:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
curl "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/?verb=GetRecord&identifier=oai:pubmedcentral.nih.gov:7096803&metadataPrefix=oai_dc"
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
- **Response**: Returns a single `<record>` element with full metadata in the requested format, including article title, all authors, abstract, journal information, publication date, DOI, PMID, and subject classifications.
|
|
77
|
+
|
|
78
|
+
### ListSets: Discover Available Sets
|
|
79
|
+
|
|
80
|
+
Retrieve the list of available sets (collections) that can be used to filter records during harvesting. Sets typically correspond to journals, open-access subsets, or subject categories.
|
|
81
|
+
|
|
82
|
+
- **URL**: `GET https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/`
|
|
83
|
+
- **Parameters**:
|
|
84
|
+
|
|
85
|
+
| Parameter | Type | Required | Description |
|
|
86
|
+
|-----------|--------|----------|----------------------|
|
|
87
|
+
| verb | string | Yes | Must be `ListSets` |
|
|
88
|
+
|
|
89
|
+
- **Example**:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
curl "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/?verb=ListSets"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
- **Response**: Returns XML with `<set>` elements containing `<setSpec>` (machine-readable identifier) and `<setName>` (human-readable name) for each available collection.
|
|
96
|
+
|
|
97
|
+
## Rate Limits
|
|
98
|
+
|
|
99
|
+
The PMC OAI-PMH service enforces a rate limit of 3 requests per second. Exceeding this limit may result in temporary IP blocking. NCBI requires users to make no more than 3 requests per second across all NCBI E-utilities and OAI services combined. For bulk harvesting, implement appropriate delays between requests and use the resumptionToken for pagination rather than making parallel requests.
|
|
100
|
+
|
|
101
|
+
NCBI also requests that users identify themselves by including an email address in the HTTP request headers or by registering for an NCBI API key (which allows up to 10 requests per second).
|
|
102
|
+
|
|
103
|
+
## Common Patterns
|
|
104
|
+
|
|
105
|
+
### Incremental Metadata Harvesting
|
|
106
|
+
|
|
107
|
+
Harvest new records added since your last sync using date-based selective harvesting:
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
import requests
|
|
111
|
+
import xml.etree.ElementTree as ET
|
|
112
|
+
import time
|
|
113
|
+
|
|
114
|
+
base_url = "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/"
|
|
115
|
+
params = {
|
|
116
|
+
"verb": "ListRecords",
|
|
117
|
+
"metadataPrefix": "oai_dc",
|
|
118
|
+
"from": "2024-06-01",
|
|
119
|
+
"until": "2024-06-07"
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
all_records = []
|
|
123
|
+
while True:
|
|
124
|
+
resp = requests.get(base_url, params=params)
|
|
125
|
+
root = ET.fromstring(resp.text)
|
|
126
|
+
ns = {"oai": "http://www.openarchives.org/OAI/2.0/"}
|
|
127
|
+
|
|
128
|
+
records = root.findall(".//oai:record", ns)
|
|
129
|
+
all_records.extend(records)
|
|
130
|
+
|
|
131
|
+
token_elem = root.find(".//oai:resumptionToken", ns)
|
|
132
|
+
if token_elem is not None and token_elem.text:
|
|
133
|
+
params = {"verb": "ListRecords", "resumptionToken": token_elem.text}
|
|
134
|
+
time.sleep(0.5) # Respect rate limits
|
|
135
|
+
else:
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
print(f"Harvested {len(all_records)} records")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### Build a Local Search Index
|
|
142
|
+
|
|
143
|
+
Extract structured metadata from harvested records for indexing:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
import requests
|
|
147
|
+
import xml.etree.ElementTree as ET
|
|
148
|
+
|
|
149
|
+
url = "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/"
|
|
150
|
+
params = {
|
|
151
|
+
"verb": "GetRecord",
|
|
152
|
+
"identifier": "oai:pubmedcentral.nih.gov:7096803",
|
|
153
|
+
"metadataPrefix": "oai_dc"
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
resp = requests.get(url, params=params)
|
|
157
|
+
root = ET.fromstring(resp.text)
|
|
158
|
+
|
|
159
|
+
dc_ns = "http://purl.org/dc/elements/1.1/"
|
|
160
|
+
oai_ns = "http://www.openarchives.org/OAI/2.0/"
|
|
161
|
+
|
|
162
|
+
metadata = root.find(f".//{{{oai_ns}}}metadata")
|
|
163
|
+
if metadata is not None:
|
|
164
|
+
title = metadata.find(f".//{{{dc_ns}}}title")
|
|
165
|
+
creators = metadata.findall(f".//{{{dc_ns}}}creator")
|
|
166
|
+
print(f"Title: {title.text if title is not None else 'N/A'}")
|
|
167
|
+
print(f"Authors: {', '.join(c.text for c in creators)}")
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### Discover Available Journal Sets
|
|
171
|
+
|
|
172
|
+
List all available journal sets to target specific journal harvesting:
|
|
173
|
+
|
|
174
|
+
```bash
|
|
175
|
+
curl "https://pmc.ncbi.nlm.nih.gov/api/oai/v1/mh/?verb=ListSets" | head -100
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## References
|
|
179
|
+
|
|
180
|
+
- Official PMC OAI documentation: https://pmc.ncbi.nlm.nih.gov/tools/oai/
|
|
181
|
+
- PMC homepage: https://pmc.ncbi.nlm.nih.gov/
|
|
182
|
+
- OAI-PMH protocol specification: https://www.openarchives.org/OAI/openarchivesprotocol.html
|
|
183
|
+
- NCBI usage guidelines: https://www.ncbi.nlm.nih.gov/home/about/policies/
|
|
184
|
+
- PMC Open Access subset: https://pmc.ncbi.nlm.nih.gov/tools/openftlist/
|