@wentorai/research-plugins 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (252) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +204 -0
  3. package/curated/analysis/README.md +64 -0
  4. package/curated/domains/README.md +104 -0
  5. package/curated/literature/README.md +53 -0
  6. package/curated/research/README.md +62 -0
  7. package/curated/tools/README.md +87 -0
  8. package/curated/writing/README.md +61 -0
  9. package/index.ts +39 -0
  10. package/mcp-configs/academic-db/ChatSpatial.json +17 -0
  11. package/mcp-configs/academic-db/academia-mcp.json +17 -0
  12. package/mcp-configs/academic-db/academic-paper-explorer.json +17 -0
  13. package/mcp-configs/academic-db/academic-search-mcp-server.json +17 -0
  14. package/mcp-configs/academic-db/agentinterviews-mcp.json +17 -0
  15. package/mcp-configs/academic-db/all-in-mcp.json +17 -0
  16. package/mcp-configs/academic-db/apple-health-mcp.json +17 -0
  17. package/mcp-configs/academic-db/arxiv-latex-mcp.json +17 -0
  18. package/mcp-configs/academic-db/arxiv-mcp-server.json +17 -0
  19. package/mcp-configs/academic-db/bgpt-mcp.json +17 -0
  20. package/mcp-configs/academic-db/biomcp.json +17 -0
  21. package/mcp-configs/academic-db/biothings-mcp.json +17 -0
  22. package/mcp-configs/academic-db/catalysishub-mcp-server.json +17 -0
  23. package/mcp-configs/academic-db/clinicaltrialsgov-mcp-server.json +17 -0
  24. package/mcp-configs/academic-db/deep-research-mcp.json +17 -0
  25. package/mcp-configs/academic-db/dicom-mcp.json +17 -0
  26. package/mcp-configs/academic-db/enrichr-mcp-server.json +17 -0
  27. package/mcp-configs/academic-db/fec-mcp-server.json +17 -0
  28. package/mcp-configs/academic-db/fhir-mcp-server-themomentum.json +17 -0
  29. package/mcp-configs/academic-db/fhir-mcp.json +19 -0
  30. package/mcp-configs/academic-db/gget-mcp.json +17 -0
  31. package/mcp-configs/academic-db/google-researcher-mcp.json +17 -0
  32. package/mcp-configs/academic-db/idea-reality-mcp.json +17 -0
  33. package/mcp-configs/academic-db/legiscan-mcp.json +19 -0
  34. package/mcp-configs/academic-db/lex.json +17 -0
  35. package/mcp-configs/ai-platform/Adaptive-Graph-of-Thoughts-MCP-server.json +17 -0
  36. package/mcp-configs/ai-platform/ai-counsel.json +17 -0
  37. package/mcp-configs/ai-platform/atlas-mcp-server.json +17 -0
  38. package/mcp-configs/ai-platform/counsel-mcp.json +17 -0
  39. package/mcp-configs/ai-platform/cross-llm-mcp.json +17 -0
  40. package/mcp-configs/ai-platform/gptr-mcp.json +17 -0
  41. package/mcp-configs/browser/decipher-research-agent.json +17 -0
  42. package/mcp-configs/browser/deep-research.json +17 -0
  43. package/mcp-configs/browser/everything-claude-code.json +17 -0
  44. package/mcp-configs/browser/gpt-researcher.json +17 -0
  45. package/mcp-configs/browser/heurist-agent-framework.json +17 -0
  46. package/mcp-configs/data-platform/4everland-hosting-mcp.json +17 -0
  47. package/mcp-configs/data-platform/context-keeper.json +17 -0
  48. package/mcp-configs/data-platform/context7.json +19 -0
  49. package/mcp-configs/data-platform/contextstream-mcp.json +17 -0
  50. package/mcp-configs/data-platform/email-mcp.json +17 -0
  51. package/mcp-configs/note-knowledge/ApeRAG.json +17 -0
  52. package/mcp-configs/note-knowledge/In-Memoria.json +17 -0
  53. package/mcp-configs/note-knowledge/agent-memory.json +17 -0
  54. package/mcp-configs/note-knowledge/aimemo.json +17 -0
  55. package/mcp-configs/note-knowledge/biel-mcp.json +19 -0
  56. package/mcp-configs/note-knowledge/cognee.json +17 -0
  57. package/mcp-configs/note-knowledge/context-awesome.json +17 -0
  58. package/mcp-configs/note-knowledge/context-mcp.json +17 -0
  59. package/mcp-configs/note-knowledge/conversation-handoff-mcp.json +17 -0
  60. package/mcp-configs/note-knowledge/cortex.json +17 -0
  61. package/mcp-configs/note-knowledge/devrag.json +17 -0
  62. package/mcp-configs/note-knowledge/easy-obsidian-mcp.json +17 -0
  63. package/mcp-configs/note-knowledge/engram.json +17 -0
  64. package/mcp-configs/note-knowledge/gnosis-mcp.json +17 -0
  65. package/mcp-configs/note-knowledge/graphlit-mcp-server.json +19 -0
  66. package/mcp-configs/reference-mgr/arxiv-cli.json +17 -0
  67. package/mcp-configs/reference-mgr/arxiv-search-mcp.json +17 -0
  68. package/mcp-configs/reference-mgr/chiken.json +17 -0
  69. package/mcp-configs/reference-mgr/claude-scholar.json +17 -0
  70. package/mcp-configs/reference-mgr/devonthink-mcp.json +17 -0
  71. package/mcp-configs/registry.json +447 -0
  72. package/openclaw.plugin.json +21 -0
  73. package/package.json +61 -0
  74. package/skills/analysis/dataviz/color-accessibility-guide/SKILL.md +230 -0
  75. package/skills/analysis/dataviz/geospatial-viz-guide/SKILL.md +218 -0
  76. package/skills/analysis/dataviz/interactive-viz-guide/SKILL.md +287 -0
  77. package/skills/analysis/dataviz/network-visualization-guide/SKILL.md +195 -0
  78. package/skills/analysis/dataviz/publication-figures-guide/SKILL.md +238 -0
  79. package/skills/analysis/dataviz/python-dataviz-guide/SKILL.md +195 -0
  80. package/skills/analysis/econometrics/causal-inference-guide/SKILL.md +197 -0
  81. package/skills/analysis/econometrics/iv-regression-guide/SKILL.md +198 -0
  82. package/skills/analysis/econometrics/panel-data-guide/SKILL.md +274 -0
  83. package/skills/analysis/econometrics/robustness-checks/SKILL.md +250 -0
  84. package/skills/analysis/econometrics/stata-regression/SKILL.md +117 -0
  85. package/skills/analysis/econometrics/time-series-guide/SKILL.md +235 -0
  86. package/skills/analysis/statistics/bayesian-statistics-guide/SKILL.md +221 -0
  87. package/skills/analysis/statistics/hypothesis-testing-guide/SKILL.md +210 -0
  88. package/skills/analysis/statistics/meta-analysis-guide/SKILL.md +206 -0
  89. package/skills/analysis/statistics/nonparametric-tests-guide/SKILL.md +221 -0
  90. package/skills/analysis/statistics/power-analysis-guide/SKILL.md +240 -0
  91. package/skills/analysis/statistics/sem-guide/SKILL.md +231 -0
  92. package/skills/analysis/statistics/survival-analysis-guide/SKILL.md +195 -0
  93. package/skills/analysis/wrangling/missing-data-handling/SKILL.md +224 -0
  94. package/skills/analysis/wrangling/pandas-data-wrangling/SKILL.md +242 -0
  95. package/skills/analysis/wrangling/questionnaire-design-guide/SKILL.md +234 -0
  96. package/skills/analysis/wrangling/text-mining-guide/SKILL.md +225 -0
  97. package/skills/domains/ai-ml/computer-vision-guide/SKILL.md +213 -0
  98. package/skills/domains/ai-ml/deep-learning-papers-guide/SKILL.md +200 -0
  99. package/skills/domains/ai-ml/llm-evaluation-guide/SKILL.md +194 -0
  100. package/skills/domains/ai-ml/prompt-engineering-research/SKILL.md +233 -0
  101. package/skills/domains/ai-ml/reinforcement-learning-guide/SKILL.md +254 -0
  102. package/skills/domains/ai-ml/transformer-architecture-guide/SKILL.md +233 -0
  103. package/skills/domains/biomedical/clinical-research-guide/SKILL.md +232 -0
  104. package/skills/domains/biomedical/clinicaltrials-api/SKILL.md +177 -0
  105. package/skills/domains/biomedical/epidemiology-guide/SKILL.md +200 -0
  106. package/skills/domains/biomedical/genomics-analysis-guide/SKILL.md +270 -0
  107. package/skills/domains/business/market-analysis-guide/SKILL.md +112 -0
  108. package/skills/domains/business/strategic-management-guide/SKILL.md +154 -0
  109. package/skills/domains/chemistry/computational-chemistry-guide/SKILL.md +266 -0
  110. package/skills/domains/chemistry/retrosynthesis-guide/SKILL.md +215 -0
  111. package/skills/domains/cs/algorithms-complexity-guide/SKILL.md +194 -0
  112. package/skills/domains/cs/dblp-api/SKILL.md +129 -0
  113. package/skills/domains/cs/software-engineering-research/SKILL.md +218 -0
  114. package/skills/domains/ecology/biodiversity-data-guide/SKILL.md +296 -0
  115. package/skills/domains/ecology/conservation-biology-guide/SKILL.md +198 -0
  116. package/skills/domains/ecology/gbif-api/SKILL.md +158 -0
  117. package/skills/domains/ecology/inaturalist-api/SKILL.md +173 -0
  118. package/skills/domains/economics/behavioral-economics-guide/SKILL.md +239 -0
  119. package/skills/domains/economics/development-economics-guide/SKILL.md +181 -0
  120. package/skills/domains/economics/fred-api/SKILL.md +189 -0
  121. package/skills/domains/education/curriculum-design-guide/SKILL.md +144 -0
  122. package/skills/domains/education/learning-science-guide/SKILL.md +150 -0
  123. package/skills/domains/finance/financial-data-analysis/SKILL.md +152 -0
  124. package/skills/domains/finance/quantitative-finance-guide/SKILL.md +151 -0
  125. package/skills/domains/geoscience/climate-science-guide/SKILL.md +158 -0
  126. package/skills/domains/geoscience/gis-remote-sensing-guide/SKILL.md +129 -0
  127. package/skills/domains/humanities/digital-humanities-guide/SKILL.md +181 -0
  128. package/skills/domains/humanities/philosophy-research-guide/SKILL.md +148 -0
  129. package/skills/domains/law/courtlistener-api/SKILL.md +213 -0
  130. package/skills/domains/law/legal-research-guide/SKILL.md +250 -0
  131. package/skills/domains/math/linear-algebra-applications/SKILL.md +227 -0
  132. package/skills/domains/math/numerical-methods-guide/SKILL.md +236 -0
  133. package/skills/domains/math/oeis-api/SKILL.md +158 -0
  134. package/skills/domains/pharma/clinical-pharmacology-guide/SKILL.md +165 -0
  135. package/skills/domains/pharma/drug-development-guide/SKILL.md +177 -0
  136. package/skills/domains/physics/computational-physics-guide/SKILL.md +300 -0
  137. package/skills/domains/physics/nasa-ads-api/SKILL.md +150 -0
  138. package/skills/domains/physics/quantum-computing-guide/SKILL.md +234 -0
  139. package/skills/domains/social-science/social-research-methods/SKILL.md +194 -0
  140. package/skills/domains/social-science/survey-research-guide/SKILL.md +182 -0
  141. package/skills/literature/discovery/citation-alert-guide/SKILL.md +154 -0
  142. package/skills/literature/discovery/conference-proceedings-guide/SKILL.md +142 -0
  143. package/skills/literature/discovery/literature-mapping-guide/SKILL.md +175 -0
  144. package/skills/literature/discovery/paper-tracking-guide/SKILL.md +211 -0
  145. package/skills/literature/discovery/rss-paper-feeds/SKILL.md +214 -0
  146. package/skills/literature/discovery/semantic-scholar-recs-guide/SKILL.md +164 -0
  147. package/skills/literature/fulltext/doaj-api/SKILL.md +120 -0
  148. package/skills/literature/fulltext/interlibrary-loan-guide/SKILL.md +163 -0
  149. package/skills/literature/fulltext/open-access-guide/SKILL.md +183 -0
  150. package/skills/literature/fulltext/pmc-oai-api/SKILL.md +184 -0
  151. package/skills/literature/fulltext/preprint-servers-guide/SKILL.md +128 -0
  152. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +207 -0
  153. package/skills/literature/fulltext/unpaywall-api/SKILL.md +113 -0
  154. package/skills/literature/metadata/altmetrics-guide/SKILL.md +132 -0
  155. package/skills/literature/metadata/citation-network-guide/SKILL.md +236 -0
  156. package/skills/literature/metadata/crossref-api/SKILL.md +133 -0
  157. package/skills/literature/metadata/datacite-api/SKILL.md +126 -0
  158. package/skills/literature/metadata/doi-resolution-guide/SKILL.md +168 -0
  159. package/skills/literature/metadata/h-index-guide/SKILL.md +183 -0
  160. package/skills/literature/metadata/journal-metrics-guide/SKILL.md +188 -0
  161. package/skills/literature/metadata/opencitations-api/SKILL.md +128 -0
  162. package/skills/literature/metadata/orcid-api/SKILL.md +136 -0
  163. package/skills/literature/metadata/orcid-integration-guide/SKILL.md +178 -0
  164. package/skills/literature/search/arxiv-api/SKILL.md +95 -0
  165. package/skills/literature/search/biorxiv-api/SKILL.md +123 -0
  166. package/skills/literature/search/boolean-search-guide/SKILL.md +199 -0
  167. package/skills/literature/search/citation-chaining-guide/SKILL.md +148 -0
  168. package/skills/literature/search/database-comparison-guide/SKILL.md +100 -0
  169. package/skills/literature/search/europe-pmc-api/SKILL.md +120 -0
  170. package/skills/literature/search/google-scholar-guide/SKILL.md +182 -0
  171. package/skills/literature/search/mesh-terms-guide/SKILL.md +164 -0
  172. package/skills/literature/search/openalex-api/SKILL.md +134 -0
  173. package/skills/literature/search/pubmed-api/SKILL.md +130 -0
  174. package/skills/literature/search/scientify-literature-survey/SKILL.md +203 -0
  175. package/skills/literature/search/semantic-scholar-api/SKILL.md +134 -0
  176. package/skills/literature/search/systematic-search-strategy/SKILL.md +214 -0
  177. package/skills/research/automation/ai-scientist-guide/SKILL.md +228 -0
  178. package/skills/research/automation/data-collection-automation/SKILL.md +248 -0
  179. package/skills/research/automation/research-workflow-automation/SKILL.md +266 -0
  180. package/skills/research/deep-research/meta-synthesis-guide/SKILL.md +174 -0
  181. package/skills/research/deep-research/research-cog/SKILL.md +153 -0
  182. package/skills/research/deep-research/scoping-review-guide/SKILL.md +217 -0
  183. package/skills/research/deep-research/systematic-review-guide/SKILL.md +250 -0
  184. package/skills/research/funding/figshare-api/SKILL.md +163 -0
  185. package/skills/research/funding/grant-writing-guide/SKILL.md +233 -0
  186. package/skills/research/funding/nsf-grant-guide/SKILL.md +206 -0
  187. package/skills/research/funding/open-science-guide/SKILL.md +255 -0
  188. package/skills/research/funding/zenodo-api/SKILL.md +174 -0
  189. package/skills/research/methodology/action-research-guide/SKILL.md +201 -0
  190. package/skills/research/methodology/experimental-design-guide/SKILL.md +236 -0
  191. package/skills/research/methodology/grad-school-guide/SKILL.md +182 -0
  192. package/skills/research/methodology/grounded-theory-guide/SKILL.md +171 -0
  193. package/skills/research/methodology/mixed-methods-guide/SKILL.md +208 -0
  194. package/skills/research/methodology/qualitative-research-guide/SKILL.md +234 -0
  195. package/skills/research/methodology/scientify-idea-generation/SKILL.md +222 -0
  196. package/skills/research/paper-review/paper-reading-assistant/SKILL.md +266 -0
  197. package/skills/research/paper-review/peer-review-guide/SKILL.md +227 -0
  198. package/skills/research/paper-review/rebuttal-writing-guide/SKILL.md +185 -0
  199. package/skills/research/paper-review/scientify-write-review-paper/SKILL.md +209 -0
  200. package/skills/tools/code-exec/jupyter-notebook-guide/SKILL.md +178 -0
  201. package/skills/tools/code-exec/python-reproducibility-guide/SKILL.md +341 -0
  202. package/skills/tools/code-exec/r-reproducibility-guide/SKILL.md +236 -0
  203. package/skills/tools/code-exec/sandbox-execution-guide/SKILL.md +221 -0
  204. package/skills/tools/diagram/mermaid-diagram-guide/SKILL.md +269 -0
  205. package/skills/tools/diagram/plantuml-guide/SKILL.md +397 -0
  206. package/skills/tools/diagram/scientific-illustration-guide/SKILL.md +225 -0
  207. package/skills/tools/document/anystyle-api/SKILL.md +199 -0
  208. package/skills/tools/document/grobid-pdf-parsing/SKILL.md +294 -0
  209. package/skills/tools/document/markdown-academic-guide/SKILL.md +217 -0
  210. package/skills/tools/document/pdf-extraction-guide/SKILL.md +321 -0
  211. package/skills/tools/knowledge-graph/knowledge-graph-construction/SKILL.md +306 -0
  212. package/skills/tools/knowledge-graph/ontology-design-guide/SKILL.md +214 -0
  213. package/skills/tools/knowledge-graph/rag-methodology-guide/SKILL.md +325 -0
  214. package/skills/tools/ocr-translate/formula-recognition-guide/SKILL.md +367 -0
  215. package/skills/tools/ocr-translate/handwriting-recognition-guide/SKILL.md +211 -0
  216. package/skills/tools/ocr-translate/latex-ocr-guide/SKILL.md +204 -0
  217. package/skills/tools/ocr-translate/multilingual-research-guide/SKILL.md +234 -0
  218. package/skills/tools/scraping/academic-web-scraping/SKILL.md +326 -0
  219. package/skills/tools/scraping/api-data-collection-guide/SKILL.md +301 -0
  220. package/skills/tools/scraping/web-scraping-ethics-guide/SKILL.md +250 -0
  221. package/skills/writing/citation/bibtex-management-guide/SKILL.md +246 -0
  222. package/skills/writing/citation/citation-style-guide/SKILL.md +248 -0
  223. package/skills/writing/citation/reference-manager-comparison/SKILL.md +208 -0
  224. package/skills/writing/citation/zotero-api/SKILL.md +188 -0
  225. package/skills/writing/composition/abstract-writing-guide/SKILL.md +188 -0
  226. package/skills/writing/composition/discussion-writing-guide/SKILL.md +194 -0
  227. package/skills/writing/composition/introduction-writing-guide/SKILL.md +194 -0
  228. package/skills/writing/composition/literature-review-writing/SKILL.md +196 -0
  229. package/skills/writing/composition/methods-section-guide/SKILL.md +185 -0
  230. package/skills/writing/composition/response-to-reviewers/SKILL.md +215 -0
  231. package/skills/writing/composition/scientific-writing-guide/SKILL.md +152 -0
  232. package/skills/writing/latex/bibliography-management-guide/SKILL.md +206 -0
  233. package/skills/writing/latex/latex-drawing-guide/SKILL.md +234 -0
  234. package/skills/writing/latex/latex-ecosystem-guide/SKILL.md +240 -0
  235. package/skills/writing/latex/math-typesetting-guide/SKILL.md +231 -0
  236. package/skills/writing/latex/overleaf-collaboration-guide/SKILL.md +211 -0
  237. package/skills/writing/latex/tikz-diagrams-guide/SKILL.md +211 -0
  238. package/skills/writing/polish/academic-translation-guide/SKILL.md +175 -0
  239. package/skills/writing/polish/academic-writing-refiner/SKILL.md +143 -0
  240. package/skills/writing/polish/ai-writing-humanizer/SKILL.md +178 -0
  241. package/skills/writing/polish/grammar-checker-guide/SKILL.md +184 -0
  242. package/skills/writing/polish/plagiarism-detection-guide/SKILL.md +167 -0
  243. package/skills/writing/templates/beamer-presentation-guide/SKILL.md +263 -0
  244. package/skills/writing/templates/conference-paper-template/SKILL.md +219 -0
  245. package/skills/writing/templates/thesis-template-guide/SKILL.md +200 -0
  246. package/skills/writing/templates/thesis-writing-guide/SKILL.md +220 -0
  247. package/src/tools/arxiv.ts +131 -0
  248. package/src/tools/crossref.ts +112 -0
  249. package/src/tools/openalex.ts +174 -0
  250. package/src/tools/pubmed.ts +166 -0
  251. package/src/tools/semantic-scholar.ts +108 -0
  252. package/src/tools/unpaywall.ts +58 -0
@@ -0,0 +1,341 @@
1
+ ---
2
+ name: python-reproducibility-guide
3
+ description: "Reproducible Python environments, notebooks, and literate programming"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "snake"
7
+ category: "tools"
8
+ subcategory: "code-exec"
9
+ keywords: ["sandbox execution", "Jupyter notebook", "computational notebook", "literate programming"]
10
+ source: "wentor-research-plugins"
11
+ ---
12
+
13
+ # Python Reproducibility Guide
14
+
15
+ Set up reproducible Python environments for research computing, using virtual environments, dependency management, Jupyter notebooks, and literate programming practices.
16
+
17
+ ## Environment Management
18
+
19
+ ### Virtual Environments
20
+
21
+ ```bash
22
+ # Option 1: venv (built-in, lightweight)
23
+ python -m venv .venv
24
+ source .venv/bin/activate # macOS/Linux
25
+ # .venv\Scripts\activate # Windows
26
+ pip install -r requirements.txt
27
+
28
+ # Option 2: conda (includes non-Python dependencies)
29
+ conda create -n myproject python=3.11
30
+ conda activate myproject
31
+ conda install numpy pandas scipy matplotlib
32
+ conda env export > environment.yml
33
+
34
+ # Option 3: uv (fast, modern Python package manager)
35
+ uv venv
36
+ source .venv/bin/activate
37
+ uv pip install -r requirements.txt
38
+ ```
39
+
40
+ ### Dependency Pinning
41
+
42
+ ```bash
43
+ # requirements.txt with exact versions (pip freeze)
44
+ pip freeze > requirements.txt
45
+
46
+ # Better: use pip-tools for compiled dependencies
47
+ pip install pip-tools
48
+
49
+ # Create requirements.in (human-readable, loose constraints)
50
+ cat > requirements.in << 'EOF'
51
+ numpy>=1.24
52
+ pandas>=2.0
53
+ scipy>=1.11
54
+ matplotlib>=3.7
55
+ scikit-learn>=1.3
56
+ EOF
57
+
58
+ # Compile to requirements.txt (pinned, reproducible)
59
+ pip-compile requirements.in --output-file requirements.txt
60
+
61
+ # Install from compiled requirements
62
+ pip-sync requirements.txt
63
+ ```
64
+
65
+ ### pyproject.toml (Modern Standard)
66
+
67
+ ```toml
68
+ [project]
69
+ name = "my-research-project"
70
+ version = "0.1.0"
71
+ description = "Analysis code for paper: Title"
72
+ requires-python = ">=3.10"
73
+ dependencies = [
74
+ "numpy>=1.24",
75
+ "pandas>=2.0",
76
+ "scipy>=1.11",
77
+ "matplotlib>=3.7",
78
+ "scikit-learn>=1.3",
79
+ "statsmodels>=0.14",
80
+ ]
81
+
82
+ [project.optional-dependencies]
83
+ dev = ["pytest", "black", "ruff", "jupyter"]
84
+ gpu = ["torch>=2.0", "torchvision"]
85
+
86
+ [tool.ruff]
87
+ line-length = 88
88
+ select = ["E", "F", "I"]
89
+ ```
90
+
91
+ ## Jupyter Notebooks for Research
92
+
93
+ ### Best Practices
94
+
95
+ ```python
96
+ # Cell 1: Imports and configuration (always the first cell)
97
+ import numpy as np
98
+ import pandas as pd
99
+ import matplotlib.pyplot as plt
100
+ from pathlib import Path
101
+
102
+ # Configuration
103
+ DATA_DIR = Path("./data")
104
+ OUTPUT_DIR = Path("./outputs")
105
+ OUTPUT_DIR.mkdir(exist_ok=True)
106
+
107
+ RANDOM_SEED = 42
108
+ np.random.seed(RANDOM_SEED)
109
+
110
+ # Matplotlib defaults
111
+ plt.rcParams.update({
112
+ "figure.figsize": (10, 6),
113
+ "figure.dpi": 150,
114
+ "font.size": 12,
115
+ "axes.spines.top": False,
116
+ "axes.spines.right": False,
117
+ })
118
+
119
+ print(f"NumPy: {np.__version__}")
120
+ print(f"Pandas: {pd.__version__}")
121
+ ```
122
+
123
+ ### Notebook Structure Template
124
+
125
+ ```markdown
126
+ # Paper Title: Analysis Notebook
127
+
128
+ ## 1. Setup and Data Loading
129
+ [Import libraries, set seeds, load data]
130
+
131
+ ## 2. Data Exploration
132
+ [Summary statistics, distributions, missing data check]
133
+
134
+ ## 3. Preprocessing
135
+ [Cleaning, transformation, feature engineering]
136
+
137
+ ## 4. Analysis
138
+ ### 4.1 Primary Analysis
139
+ [Main statistical tests or model training]
140
+ ### 4.2 Sensitivity Analysis
141
+ [Robustness checks]
142
+ ### 4.3 Supplementary Analysis
143
+ [Additional analyses for appendix]
144
+
145
+ ## 5. Visualization
146
+ [Publication-quality figures]
147
+
148
+ ## 6. Export Results
149
+ [Save tables, figures, and summary statistics]
150
+ ```
151
+
152
+ ### Converting Notebooks to Scripts
153
+
154
+ ```bash
155
+ # Convert notebook to Python script
156
+ jupyter nbconvert --to script analysis.ipynb
157
+
158
+ # Convert notebook to HTML report
159
+ jupyter nbconvert --to html --no-input analysis.ipynb
160
+
161
+ # Convert notebook to PDF
162
+ jupyter nbconvert --to pdf analysis.ipynb
163
+
164
+ # Execute notebook from command line (and save output)
165
+ jupyter nbconvert --execute --to notebook --inplace analysis.ipynb
166
+ ```
167
+
168
+ ## Reproducible Random Seeds
169
+
170
+ ```python
171
+ import numpy as np
172
+ import random
173
+ import os
174
+
175
+ def set_global_seed(seed=42):
176
+ """Set random seeds for full reproducibility."""
177
+ random.seed(seed)
178
+ np.random.seed(seed)
179
+ os.environ["PYTHONHASHSEED"] = str(seed)
180
+
181
+ # PyTorch (if used)
182
+ try:
183
+ import torch
184
+ torch.manual_seed(seed)
185
+ torch.cuda.manual_seed_all(seed)
186
+ torch.backends.cudnn.deterministic = True
187
+ torch.backends.cudnn.benchmark = False
188
+ except ImportError:
189
+ pass
190
+
191
+ # TensorFlow (if used)
192
+ try:
193
+ import tensorflow as tf
194
+ tf.random.set_seed(seed)
195
+ except ImportError:
196
+ pass
197
+
198
+ set_global_seed(42)
199
+ ```
200
+
201
+ ## Containerization with Docker
202
+
203
+ ### Dockerfile for Research
204
+
205
+ ```dockerfile
206
+ FROM python:3.11-slim
207
+
208
+ WORKDIR /app
209
+
210
+ # System dependencies
211
+ RUN apt-get update && apt-get install -y \
212
+ build-essential \
213
+ git \
214
+ && rm -rf /var/lib/apt/lists/*
215
+
216
+ # Python dependencies
217
+ COPY requirements.txt .
218
+ RUN pip install --no-cache-dir -r requirements.txt
219
+
220
+ # Copy project code
221
+ COPY . .
222
+
223
+ # Default: run the analysis
224
+ CMD ["python", "run_analysis.py"]
225
+ ```
226
+
227
+ ```bash
228
+ # Build and run
229
+ docker build -t my-analysis .
230
+ docker run -v $(pwd)/data:/app/data -v $(pwd)/outputs:/app/outputs my-analysis
231
+
232
+ # Interactive Jupyter inside Docker
233
+ docker run -p 8888:8888 -v $(pwd):/app my-analysis \
234
+ jupyter notebook --ip=0.0.0.0 --allow-root --no-browser
235
+ ```
236
+
237
+ ## Project Structure
238
+
239
+ ```
240
+ research-project/
241
+ ├── README.md # Project overview and how to reproduce
242
+ ├── pyproject.toml # Dependencies and project metadata
243
+ ├── requirements.txt # Pinned dependencies
244
+ ├── Dockerfile # Containerized environment
245
+ ├── Makefile # Automation (make data, make analysis, make figures)
246
+ ├── data/
247
+ │ ├── raw/ # Original, immutable data
248
+ │ ├── processed/ # Cleaned, transformed data
249
+ │ └── external/ # Third-party data sources
250
+ ├── notebooks/
251
+ │ ├── 01_exploration.ipynb # Data exploration
252
+ │ ├── 02_analysis.ipynb # Main analysis
253
+ │ └── 03_figures.ipynb # Publication figures
254
+ ├── src/
255
+ │ ├── __init__.py
256
+ │ ├── data.py # Data loading and preprocessing
257
+ │ ├── models.py # Statistical models and ML
258
+ │ ├── visualization.py # Plotting functions
259
+ │ └── utils.py # Shared utilities
260
+ ├── tests/
261
+ │ ├── test_data.py # Data pipeline tests
262
+ │ └── test_models.py # Model correctness tests
263
+ ├── outputs/
264
+ │ ├── figures/ # Generated figures (PDF, PNG)
265
+ │ ├── tables/ # Generated tables (CSV, LaTeX)
266
+ │ └── models/ # Saved model artifacts
267
+ └── configs/
268
+ ├── experiment_1.yaml # Experiment configuration
269
+ └── experiment_2.yaml # Experiment configuration
270
+ ```
271
+
272
+ ## Makefile for Automation
273
+
274
+ ```makefile
275
+ .PHONY: all data analysis figures clean
276
+
277
+ all: data analysis figures
278
+
279
+ data:
280
+ python src/data.py --input data/raw/ --output data/processed/
281
+
282
+ analysis: data
283
+ python -m jupyter nbconvert --execute notebooks/02_analysis.ipynb \
284
+ --to notebook --inplace
285
+
286
+ figures: analysis
287
+ python src/visualization.py --output outputs/figures/
288
+
289
+ clean:
290
+ rm -rf data/processed/ outputs/
291
+
292
+ # Reproduce the full pipeline from scratch
293
+ reproduce: clean all
294
+ @echo "All results reproduced successfully."
295
+
296
+ # Run tests
297
+ test:
298
+ pytest tests/ -v
299
+
300
+ # Format code
301
+ format:
302
+ ruff check --fix src/ tests/
303
+ ruff format src/ tests/
304
+ ```
305
+
306
+ ## Logging and Experiment Tracking
307
+
308
+ ```python
309
+ import logging
310
+ from datetime import datetime
311
+
312
+ # Set up logging
313
+ logging.basicConfig(
314
+ level=logging.INFO,
315
+ format="%(asctime)s [%(levelname)s] %(message)s",
316
+ handlers=[
317
+ logging.FileHandler(f"outputs/logs/run_{datetime.now():%Y%m%d_%H%M%S}.log"),
318
+ logging.StreamHandler()
319
+ ]
320
+ )
321
+ logger = logging.getLogger(__name__)
322
+
323
+ # Log experiment parameters
324
+ logger.info(f"Random seed: {RANDOM_SEED}")
325
+ logger.info(f"Data file: {DATA_DIR / 'dataset.csv'}")
326
+ logger.info(f"Model: Linear Regression with L2 regularization (alpha=0.1)")
327
+ logger.info(f"Train/test split: 80/20")
328
+ ```
329
+
330
+ ## Reproducibility Checklist
331
+
332
+ - [ ] All dependencies are pinned in `requirements.txt` or `pyproject.toml`
333
+ - [ ] Random seeds are set at the beginning of every script/notebook
334
+ - [ ] Raw data is stored separately and never modified
335
+ - [ ] Data preprocessing steps are scripted (not manual)
336
+ - [ ] Analysis can be re-run with a single command (`make all` or `python run_analysis.py`)
337
+ - [ ] Environment is documented (Python version, OS, hardware specs)
338
+ - [ ] Figures are generated programmatically (not edited manually)
339
+ - [ ] Code is tested (at least smoke tests for critical functions)
340
+ - [ ] A README explains how to set up the environment and reproduce results
341
+ - [ ] Version control (git) tracks all code changes with meaningful commits
@@ -0,0 +1,236 @@
1
+ ---
2
+ name: r-reproducibility-guide
3
+ description: "Create reproducible research workflows with R and RMarkdown/Quarto"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "repeat"
7
+ category: "tools"
8
+ subcategory: "code-exec"
9
+ keywords: ["R programming", "RMarkdown", "reproducibility", "Quarto", "renv", "computational reproducibility"]
10
+ source: "wentor-research-plugins"
11
+ ---
12
+
13
+ # Reproducible Research with R
14
+
15
+ A skill for creating fully reproducible research workflows in R using RMarkdown, Quarto, package management with renv, and project organization best practices. Covers literate programming, environment management, automated reporting, and sharing reproducible analyses.
16
+
17
+ ## Project Organization
18
+
19
+ ### Recommended Directory Structure
20
+
21
+ ```
22
+ my-research-project/
23
+ README.md
24
+ my-project.Rproj # RStudio project file
25
+ renv.lock # Package versions (managed by renv)
26
+ renv/ # renv library directory
27
+ data/
28
+ raw/ # Untouched original data
29
+ processed/ # Cleaned, analysis-ready data
30
+ R/
31
+ 01-clean.R # Data cleaning functions
32
+ 02-analyze.R # Analysis functions
33
+ 03-visualize.R # Plotting functions
34
+ utils.R # Helper functions
35
+ analysis/
36
+ main-analysis.Rmd # Primary analysis notebook
37
+ supplementary.Rmd # Supplementary analyses
38
+ output/
39
+ figures/ # Generated plots
40
+ tables/ # Generated tables
41
+ manuscript.pdf # Compiled document
42
+ Makefile # Reproducible build commands
43
+ ```
44
+
45
+ ### Key Principles
46
+
47
+ ```
48
+ 1. Raw data is read-only (never modify original data files)
49
+ 2. All processing steps are scripted (no manual spreadsheet edits)
50
+ 3. Generated outputs can be deleted and recreated from source
51
+ 4. Package versions are locked with renv
52
+ 5. Random seeds are set for all stochastic operations
53
+ 6. Paths are relative to project root (never absolute)
54
+ ```
55
+
56
+ ## RMarkdown and Quarto
57
+
58
+ ### RMarkdown Document
59
+
60
+ ````markdown
61
+ ---
62
+ title: "Analysis of Treatment Effects"
63
+ author: "Jane Smith"
64
+ date: "`r Sys.Date()`"
65
+ output:
66
+ pdf_document:
67
+ toc: true
68
+ number_sections: true
69
+ html_document:
70
+ toc: true
71
+ code_folding: hide
72
+ bibliography: references.bib
73
+ ---
74
+
75
+ ```{r setup, include=FALSE}
76
+ knitr::opts_chunk$set(
77
+ echo = TRUE,
78
+ message = FALSE,
79
+ warning = FALSE,
80
+ fig.width = 7,
81
+ fig.height = 5,
82
+ dpi = 300
83
+ )
84
+
85
+ library(tidyverse)
86
+ library(broom)
87
+
88
+ set.seed(42)
89
+ ```
90
+
91
+ # Introduction
92
+
93
+ This analysis examines the effect of treatment on outcomes
94
+ [@smith2024].
95
+
96
+ # Methods
97
+
98
+ ```{r load-data}
99
+ df <- read_csv("data/processed/study_data.csv")
100
+ glimpse(df)
101
+ ```
102
+
103
+ # Results
104
+
105
+ ```{r model}
106
+ model <- lm(outcome ~ treatment + age + gender, data = df)
107
+ tidy(model, conf.int = TRUE)
108
+ ```
109
+
110
+ ```{r fig-main, fig.cap="Treatment effect on primary outcome."}
111
+ ggplot(df, aes(x = treatment, y = outcome, fill = treatment)) +
112
+ geom_boxplot() +
113
+ theme_minimal() +
114
+ labs(x = "Group", y = "Outcome Score")
115
+ ```
116
+ ````
117
+
118
+ ### Quarto (Next Generation)
119
+
120
+ ```yaml
121
+ ---
122
+ title: "Analysis Report"
123
+ format:
124
+ html:
125
+ code-fold: true
126
+ toc: true
127
+ pdf:
128
+ documentclass: article
129
+ execute:
130
+ echo: true
131
+ warning: false
132
+ ---
133
+ ```
134
+
135
+ Quarto supports R, Python, Julia, and Observable JS in a single document, making it ideal for multilingual research workflows.
136
+
137
+ ## Package Management with renv
138
+
139
+ ### Setting Up renv
140
+
141
+ ```r
142
+ # Initialize renv in your project
143
+ renv::init()
144
+
145
+ # Install packages as usual
146
+ install.packages("tidyverse")
147
+ install.packages("lme4")
148
+
149
+ # Snapshot current package versions
150
+ renv::snapshot()
151
+
152
+ # Restore environment from lockfile (on a new machine)
153
+ renv::restore()
154
+ ```
155
+
156
+ ### How renv Works
157
+
158
+ ```python
159
+ def explain_renv() -> dict:
160
+ """
161
+ Explain the renv reproducibility workflow.
162
+ """
163
+ return {
164
+ "init": "Creates project-local library and renv.lock",
165
+ "snapshot": (
166
+ "Records exact package versions (name, version, source) "
167
+ "into renv.lock. Commit this file to Git."
168
+ ),
169
+ "restore": (
170
+ "Installs exact package versions from renv.lock on any machine. "
171
+ "Collaborators run renv::restore() to match your environment."
172
+ ),
173
+ "benefits": [
174
+ "Each project has isolated package versions",
175
+ "No conflicts between projects",
176
+ "Exact reproducibility months or years later",
177
+ "renv.lock is a text file that diffs cleanly in Git"
178
+ ]
179
+ }
180
+ ```
181
+
182
+ ## Automated Reporting
183
+
184
+ ### Make-Based Pipeline
185
+
186
+ ```makefile
187
+ # Makefile for reproducible analysis
188
+
189
+ all: output/manuscript.pdf
190
+
191
+ data/processed/clean_data.csv: data/raw/study_data.csv R/01-clean.R
192
+ Rscript R/01-clean.R
193
+
194
+ output/figures/figure1.pdf: data/processed/clean_data.csv R/03-visualize.R
195
+ Rscript R/03-visualize.R
196
+
197
+ output/manuscript.pdf: analysis/main-analysis.Rmd data/processed/clean_data.csv
198
+ Rscript -e "rmarkdown::render('analysis/main-analysis.Rmd', output_dir='output')"
199
+
200
+ clean:
201
+ rm -rf output/figures/* output/manuscript.pdf data/processed/*
202
+ ```
203
+
204
+ ### targets Package (R-native Pipeline)
205
+
206
+ ```r
207
+ # _targets.R
208
+ library(targets)
209
+
210
+ tar_option_set(packages = c("tidyverse", "broom"))
211
+
212
+ list(
213
+ tar_target(raw_data, read_csv("data/raw/study_data.csv")),
214
+ tar_target(clean_data, clean_dataset(raw_data)),
215
+ tar_target(model, fit_model(clean_data)),
216
+ tar_target(report, {
217
+ rmarkdown::render("analysis/main-analysis.Rmd")
218
+ "output/manuscript.pdf"
219
+ })
220
+ )
221
+ ```
222
+
223
+ The targets package tracks dependencies between pipeline steps and only reruns steps whose inputs have changed, saving time on large analyses.
224
+
225
+ ## Sharing Reproducible Analyses
226
+
227
+ ### Options for Sharing
228
+
229
+ | Method | Effort | Reproducibility |
230
+ |--------|--------|----------------|
231
+ | GitHub repo + renv.lock | Low | Good (requires R installation) |
232
+ | Docker container | Medium | Excellent (full environment) |
233
+ | Binder (mybinder.org) | Low | Good (browser-based, no install) |
234
+ | Code Ocean capsule | Medium | Excellent (certified reproducibility) |
235
+
236
+ Always include a README with instructions for reproducing the analysis: required software, how to install dependencies (renv::restore), how to run the pipeline (make all), and expected runtime.