@wentorai/research-plugins 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. package/README.md +22 -22
  2. package/curated/analysis/README.md +82 -56
  3. package/curated/domains/README.md +225 -69
  4. package/curated/literature/README.md +115 -46
  5. package/curated/research/README.md +106 -58
  6. package/curated/tools/README.md +107 -87
  7. package/curated/writing/README.md +92 -45
  8. package/mcp-configs/academic-db/alphafold-mcp.json +20 -0
  9. package/mcp-configs/academic-db/brightspace-mcp.json +21 -0
  10. package/mcp-configs/academic-db/climatiq-mcp.json +20 -0
  11. package/mcp-configs/academic-db/gibs-mcp.json +20 -0
  12. package/mcp-configs/academic-db/gis-mcp-server.json +22 -0
  13. package/mcp-configs/academic-db/google-earth-engine-mcp.json +21 -0
  14. package/mcp-configs/academic-db/m4-clinical-mcp.json +21 -0
  15. package/mcp-configs/academic-db/medical-mcp.json +21 -0
  16. package/mcp-configs/academic-db/nexonco-mcp.json +20 -0
  17. package/mcp-configs/academic-db/omop-mcp.json +20 -0
  18. package/mcp-configs/academic-db/onekgpd-mcp.json +20 -0
  19. package/mcp-configs/academic-db/openedu-mcp.json +20 -0
  20. package/mcp-configs/academic-db/opengenes-mcp.json +20 -0
  21. package/mcp-configs/academic-db/openstax-mcp.json +21 -0
  22. package/mcp-configs/academic-db/openstreetmap-mcp.json +21 -0
  23. package/mcp-configs/academic-db/opentargets-mcp.json +21 -0
  24. package/mcp-configs/academic-db/pdb-mcp.json +21 -0
  25. package/mcp-configs/academic-db/smithsonian-mcp.json +20 -0
  26. package/mcp-configs/ai-platform/magi-researchers.json +21 -0
  27. package/mcp-configs/ai-platform/mcp-academic-researcher.json +22 -0
  28. package/mcp-configs/ai-platform/open-paper-machine.json +21 -0
  29. package/mcp-configs/ai-platform/paper-intelligence.json +21 -0
  30. package/mcp-configs/ai-platform/paper-reader.json +21 -0
  31. package/mcp-configs/ai-platform/paperdebugger.json +21 -0
  32. package/mcp-configs/browser/exa-mcp.json +20 -0
  33. package/mcp-configs/browser/mcp-searxng.json +21 -0
  34. package/mcp-configs/browser/mcp-webresearch.json +20 -0
  35. package/mcp-configs/cloud-docs/confluence-mcp.json +37 -0
  36. package/mcp-configs/cloud-docs/google-drive-mcp.json +35 -0
  37. package/mcp-configs/cloud-docs/notion-mcp.json +29 -0
  38. package/mcp-configs/communication/discord-mcp.json +29 -0
  39. package/mcp-configs/communication/discourse-mcp.json +21 -0
  40. package/mcp-configs/communication/slack-mcp.json +29 -0
  41. package/mcp-configs/communication/telegram-mcp.json +28 -0
  42. package/mcp-configs/data-platform/automl-stat-mcp.json +21 -0
  43. package/mcp-configs/data-platform/jefferson-stats-mcp.json +22 -0
  44. package/mcp-configs/data-platform/mcp-excel-server.json +21 -0
  45. package/mcp-configs/data-platform/mcp-stata.json +21 -0
  46. package/mcp-configs/data-platform/mcpstack-jupyter.json +21 -0
  47. package/mcp-configs/data-platform/ml-mcp.json +21 -0
  48. package/mcp-configs/data-platform/nasdaq-data-link-mcp.json +20 -0
  49. package/mcp-configs/data-platform/numpy-mcp.json +21 -0
  50. package/mcp-configs/database/neo4j-mcp.json +37 -0
  51. package/mcp-configs/database/postgres-mcp.json +28 -0
  52. package/mcp-configs/database/sqlite-mcp.json +29 -0
  53. package/mcp-configs/dev-platform/geogebra-mcp.json +21 -0
  54. package/mcp-configs/dev-platform/github-mcp.json +31 -0
  55. package/mcp-configs/dev-platform/gitlab-mcp.json +34 -0
  56. package/mcp-configs/dev-platform/latex-mcp-server.json +21 -0
  57. package/mcp-configs/dev-platform/manim-mcp.json +20 -0
  58. package/mcp-configs/dev-platform/mcp-echarts.json +20 -0
  59. package/mcp-configs/dev-platform/panel-viz-mcp.json +20 -0
  60. package/mcp-configs/dev-platform/paperbanana.json +20 -0
  61. package/mcp-configs/dev-platform/texflow-mcp.json +20 -0
  62. package/mcp-configs/dev-platform/texmcp.json +20 -0
  63. package/mcp-configs/dev-platform/typst-mcp.json +21 -0
  64. package/mcp-configs/dev-platform/vizro-mcp.json +20 -0
  65. package/mcp-configs/email/email-mcp.json +40 -0
  66. package/mcp-configs/email/gmail-mcp.json +37 -0
  67. package/mcp-configs/note-knowledge/local-faiss-mcp.json +21 -0
  68. package/mcp-configs/note-knowledge/mcp-memory-service.json +21 -0
  69. package/mcp-configs/note-knowledge/mcp-obsidian.json +23 -0
  70. package/mcp-configs/note-knowledge/mcp-ragdocs.json +20 -0
  71. package/mcp-configs/note-knowledge/mcp-summarizer.json +21 -0
  72. package/mcp-configs/note-knowledge/mediawiki-mcp.json +21 -0
  73. package/mcp-configs/note-knowledge/openzim-mcp.json +20 -0
  74. package/mcp-configs/note-knowledge/zettelkasten-mcp.json +21 -0
  75. package/mcp-configs/reference-mgr/academic-paper-mcp-http.json +20 -0
  76. package/mcp-configs/reference-mgr/academix.json +20 -0
  77. package/mcp-configs/reference-mgr/arxiv-research-mcp.json +21 -0
  78. package/mcp-configs/reference-mgr/google-scholar-abstract-mcp.json +19 -0
  79. package/mcp-configs/reference-mgr/google-scholar-mcp.json +20 -0
  80. package/mcp-configs/reference-mgr/mcp-paperswithcode.json +21 -0
  81. package/mcp-configs/reference-mgr/mcp-scholarly.json +20 -0
  82. package/mcp-configs/reference-mgr/mcp-simple-arxiv.json +20 -0
  83. package/mcp-configs/reference-mgr/mcp-simple-pubmed.json +20 -0
  84. package/mcp-configs/reference-mgr/mcp-zotero.json +21 -0
  85. package/mcp-configs/reference-mgr/mendeley-mcp.json +20 -0
  86. package/mcp-configs/reference-mgr/ncbi-mcp-server.json +22 -0
  87. package/mcp-configs/reference-mgr/onecite.json +21 -0
  88. package/mcp-configs/reference-mgr/paper-search-mcp.json +21 -0
  89. package/mcp-configs/reference-mgr/pubmed-search-mcp.json +21 -0
  90. package/mcp-configs/reference-mgr/scholar-mcp.json +21 -0
  91. package/mcp-configs/reference-mgr/scholar-multi-mcp.json +21 -0
  92. package/mcp-configs/reference-mgr/seerai.json +21 -0
  93. package/mcp-configs/reference-mgr/semantic-scholar-fastmcp.json +21 -0
  94. package/mcp-configs/reference-mgr/sourcelibrary.json +20 -0
  95. package/mcp-configs/registry.json +178 -149
  96. package/mcp-configs/repository/dataverse-mcp.json +33 -0
  97. package/mcp-configs/repository/huggingface-mcp.json +29 -0
  98. package/openclaw.plugin.json +2 -2
  99. package/package.json +2 -2
  100. package/skills/analysis/dataviz/algorithm-visualizer-guide/SKILL.md +259 -0
  101. package/skills/analysis/dataviz/bokeh-visualization-guide/SKILL.md +270 -0
  102. package/skills/analysis/dataviz/chart-image-generator/SKILL.md +229 -0
  103. package/skills/analysis/dataviz/citation-map-guide/SKILL.md +184 -0
  104. package/skills/analysis/dataviz/d3-visualization-guide/SKILL.md +281 -0
  105. package/skills/analysis/dataviz/data-visualization-principles/SKILL.md +171 -0
  106. package/skills/analysis/dataviz/echarts-visualization-guide/SKILL.md +250 -0
  107. package/skills/analysis/dataviz/metabase-analytics-guide/SKILL.md +242 -0
  108. package/skills/analysis/dataviz/plotly-interactive-guide/SKILL.md +266 -0
  109. package/skills/analysis/dataviz/redash-analytics-guide/SKILL.md +284 -0
  110. package/skills/analysis/econometrics/econml-causal-guide/SKILL.md +163 -0
  111. package/skills/analysis/econometrics/empirical-paper-analysis/SKILL.md +192 -0
  112. package/skills/analysis/econometrics/mostly-harmless-guide/SKILL.md +139 -0
  113. package/skills/analysis/econometrics/panel-data-analyst/SKILL.md +259 -0
  114. package/skills/analysis/econometrics/panel-data-regression-workflow/SKILL.md +267 -0
  115. package/skills/analysis/econometrics/python-causality-guide/SKILL.md +134 -0
  116. package/skills/analysis/econometrics/stata-accounting-guide/SKILL.md +269 -0
  117. package/skills/analysis/econometrics/stata-analyst-guide/SKILL.md +245 -0
  118. package/skills/analysis/econometrics/stata-reference-guide/SKILL.md +293 -0
  119. package/skills/analysis/statistics/data-anomaly-detection/SKILL.md +157 -0
  120. package/skills/analysis/statistics/general-statistics-guide/SKILL.md +226 -0
  121. package/skills/analysis/statistics/infiagent-benchmark-guide/SKILL.md +106 -0
  122. package/skills/analysis/statistics/ml-experiment-tracker/SKILL.md +212 -0
  123. package/skills/analysis/statistics/pywayne-statistics-guide/SKILL.md +192 -0
  124. package/skills/analysis/statistics/quantitative-methods-guide/SKILL.md +193 -0
  125. package/skills/analysis/statistics/senior-data-scientist-guide/SKILL.md +223 -0
  126. package/skills/analysis/wrangling/claude-data-analysis-guide/SKILL.md +100 -0
  127. package/skills/analysis/wrangling/csv-data-analyzer/SKILL.md +170 -0
  128. package/skills/analysis/wrangling/data-cleaning-pipeline/SKILL.md +266 -0
  129. package/skills/analysis/wrangling/data-cog-guide/SKILL.md +178 -0
  130. package/skills/analysis/wrangling/open-data-scientist-guide/SKILL.md +197 -0
  131. package/skills/analysis/wrangling/stata-data-cleaning/SKILL.md +276 -0
  132. package/skills/analysis/wrangling/streamline-analyst-guide/SKILL.md +119 -0
  133. package/skills/analysis/wrangling/survey-data-processing/SKILL.md +298 -0
  134. package/skills/domains/ai-ml/ai-agent-papers-guide/SKILL.md +146 -0
  135. package/skills/domains/ai-ml/ai-model-benchmarking/SKILL.md +209 -0
  136. package/skills/domains/ai-ml/annotated-dl-papers-guide/SKILL.md +159 -0
  137. package/skills/domains/ai-ml/anomaly-detection-papers-guide/SKILL.md +167 -0
  138. package/skills/domains/ai-ml/autonomous-agents-papers-guide/SKILL.md +178 -0
  139. package/skills/domains/ai-ml/dl-transformer-finetune/SKILL.md +239 -0
  140. package/skills/domains/ai-ml/domain-adaptation-papers-guide/SKILL.md +173 -0
  141. package/skills/domains/ai-ml/generative-ai-guide/SKILL.md +146 -0
  142. package/skills/domains/ai-ml/graph-learning-papers-guide/SKILL.md +125 -0
  143. package/skills/domains/ai-ml/huggingface-inference-guide/SKILL.md +196 -0
  144. package/skills/domains/ai-ml/keras-deep-learning/SKILL.md +210 -0
  145. package/skills/domains/ai-ml/kolmogorov-arnold-networks-guide/SKILL.md +185 -0
  146. package/skills/domains/ai-ml/llm-from-scratch-guide/SKILL.md +124 -0
  147. package/skills/domains/ai-ml/ml-pipeline-guide/SKILL.md +295 -0
  148. package/skills/domains/ai-ml/nlp-toolkit-guide/SKILL.md +247 -0
  149. package/skills/domains/ai-ml/npcpy-research-guide/SKILL.md +137 -0
  150. package/skills/domains/ai-ml/pytorch-guide/SKILL.md +281 -0
  151. package/skills/domains/ai-ml/pytorch-lightning-guide/SKILL.md +244 -0
  152. package/skills/domains/ai-ml/responsible-ai-guide/SKILL.md +126 -0
  153. package/skills/domains/ai-ml/tensorflow-guide/SKILL.md +241 -0
  154. package/skills/domains/ai-ml/vmas-simulator-guide/SKILL.md +129 -0
  155. package/skills/domains/biomedical/bioagents-guide/SKILL.md +308 -0
  156. package/skills/domains/biomedical/clawbio-guide/SKILL.md +167 -0
  157. package/skills/domains/biomedical/clinical-dialogue-agents-guide/SKILL.md +145 -0
  158. package/skills/domains/biomedical/ena-sequence-api/SKILL.md +175 -0
  159. package/skills/domains/biomedical/genomas-guide/SKILL.md +126 -0
  160. package/skills/domains/biomedical/genotex-benchmark-guide/SKILL.md +125 -0
  161. package/skills/domains/biomedical/med-researcher-guide/SKILL.md +161 -0
  162. package/skills/domains/biomedical/med-researcher-r1-guide/SKILL.md +146 -0
  163. package/skills/domains/biomedical/medgeclaw-guide/SKILL.md +345 -0
  164. package/skills/domains/biomedical/medical-imaging-guide/SKILL.md +305 -0
  165. package/skills/domains/biomedical/ncbi-blast-api/SKILL.md +195 -0
  166. package/skills/domains/biomedical/ncbi-datasets-api/SKILL.md +220 -0
  167. package/skills/domains/biomedical/quickgo-api/SKILL.md +181 -0
  168. package/skills/domains/business/architecture-design-guide/SKILL.md +279 -0
  169. package/skills/domains/business/innovation-management-guide/SKILL.md +257 -0
  170. package/skills/domains/business/operations-research-guide/SKILL.md +258 -0
  171. package/skills/domains/business/xpert-bi-guide/SKILL.md +84 -0
  172. package/skills/domains/chemistry/cactus-cheminformatics-guide/SKILL.md +89 -0
  173. package/skills/domains/chemistry/chemeagle-guide/SKILL.md +147 -0
  174. package/skills/domains/chemistry/chemgraph-agent-guide/SKILL.md +120 -0
  175. package/skills/domains/chemistry/molecular-dynamics-guide/SKILL.md +237 -0
  176. package/skills/domains/chemistry/pubchem-api-guide/SKILL.md +180 -0
  177. package/skills/domains/chemistry/spectroscopy-analysis-guide/SKILL.md +290 -0
  178. package/skills/domains/cs/ai-security-papers-guide/SKILL.md +103 -0
  179. package/skills/domains/cs/code-llm-papers-guide/SKILL.md +131 -0
  180. package/skills/domains/cs/distributed-systems-guide/SKILL.md +268 -0
  181. package/skills/domains/cs/formal-verification-guide/SKILL.md +298 -0
  182. package/skills/domains/cs/gaussian-splatting-papers-guide/SKILL.md +158 -0
  183. package/skills/domains/cs/llm-aiops-guide/SKILL.md +70 -0
  184. package/skills/domains/cs/software-heritage-api/SKILL.md +200 -0
  185. package/skills/domains/ecology/species-distribution-guide/SKILL.md +343 -0
  186. package/skills/domains/economics/imf-data-api-guide/SKILL.md +174 -0
  187. package/skills/domains/economics/nber-working-papers-api/SKILL.md +177 -0
  188. package/skills/domains/economics/post-labor-economics/SKILL.md +254 -0
  189. package/skills/domains/economics/pricing-psychology-guide/SKILL.md +273 -0
  190. package/skills/domains/economics/repec-economics-api/SKILL.md +188 -0
  191. package/skills/domains/economics/world-bank-data-guide/SKILL.md +179 -0
  192. package/skills/domains/education/academic-study-methods/SKILL.md +228 -0
  193. package/skills/domains/education/assessment-design-guide/SKILL.md +213 -0
  194. package/skills/domains/education/educational-research-methods/SKILL.md +179 -0
  195. package/skills/domains/education/edumcp-guide/SKILL.md +74 -0
  196. package/skills/domains/education/mooc-analytics-guide/SKILL.md +206 -0
  197. package/skills/domains/education/open-syllabus-api/SKILL.md +171 -0
  198. package/skills/domains/finance/akshare-finance-data/SKILL.md +207 -0
  199. package/skills/domains/finance/finsight-research-guide/SKILL.md +113 -0
  200. package/skills/domains/finance/options-analytics-agent-guide/SKILL.md +117 -0
  201. package/skills/domains/finance/portfolio-optimization-guide/SKILL.md +279 -0
  202. package/skills/domains/finance/risk-modeling-guide/SKILL.md +260 -0
  203. package/skills/domains/finance/stata-accounting-research/SKILL.md +372 -0
  204. package/skills/domains/geoscience/climate-modeling-guide/SKILL.md +215 -0
  205. package/skills/domains/geoscience/pangaea-data-api/SKILL.md +197 -0
  206. package/skills/domains/geoscience/satellite-remote-sensing/SKILL.md +193 -0
  207. package/skills/domains/geoscience/seismology-data-guide/SKILL.md +208 -0
  208. package/skills/domains/humanities/digital-humanities-methods/SKILL.md +232 -0
  209. package/skills/domains/humanities/ethical-philosophy-guide/SKILL.md +244 -0
  210. package/skills/domains/humanities/history-research-guide/SKILL.md +260 -0
  211. package/skills/domains/humanities/political-history-guide/SKILL.md +241 -0
  212. package/skills/domains/law/caselaw-access-api/SKILL.md +149 -0
  213. package/skills/domains/law/legal-agent-skills-guide/SKILL.md +132 -0
  214. package/skills/domains/law/legal-nlp-guide/SKILL.md +236 -0
  215. package/skills/domains/law/legal-research-methods/SKILL.md +190 -0
  216. package/skills/domains/law/opencontracts-guide/SKILL.md +168 -0
  217. package/skills/domains/law/patent-analysis-guide/SKILL.md +257 -0
  218. package/skills/domains/law/regulatory-compliance-guide/SKILL.md +267 -0
  219. package/skills/domains/math/lean-theorem-proving-guide/SKILL.md +140 -0
  220. package/skills/domains/math/symbolic-computation-guide/SKILL.md +263 -0
  221. package/skills/domains/math/topology-data-analysis/SKILL.md +305 -0
  222. package/skills/domains/pharma/clinical-trial-design-guide/SKILL.md +271 -0
  223. package/skills/domains/pharma/drug-target-interaction/SKILL.md +242 -0
  224. package/skills/domains/pharma/madd-drug-discovery-guide/SKILL.md +153 -0
  225. package/skills/domains/pharma/pharmacovigilance-guide/SKILL.md +216 -0
  226. package/skills/domains/physics/astrophysics-data-guide/SKILL.md +305 -0
  227. package/skills/domains/physics/particle-physics-guide/SKILL.md +287 -0
  228. package/skills/domains/social-science/ipums-microdata-api/SKILL.md +211 -0
  229. package/skills/domains/social-science/network-analysis-guide/SKILL.md +310 -0
  230. package/skills/domains/social-science/psychology-research-guide/SKILL.md +270 -0
  231. package/skills/domains/social-science/sociology-research-guide/SKILL.md +238 -0
  232. package/skills/domains/social-science/sociology-research-methods/SKILL.md +181 -0
  233. package/skills/literature/discovery/arxiv-paper-monitoring/SKILL.md +233 -0
  234. package/skills/literature/discovery/paper-recommendation-guide/SKILL.md +120 -0
  235. package/skills/literature/discovery/papers-we-love-guide/SKILL.md +169 -0
  236. package/skills/literature/discovery/semantic-paper-radar/SKILL.md +144 -0
  237. package/skills/literature/discovery/zotero-arxiv-daily-guide/SKILL.md +94 -0
  238. package/skills/literature/fulltext/bioc-pmc-api/SKILL.md +146 -0
  239. package/skills/literature/fulltext/core-api-guide/SKILL.md +144 -0
  240. package/skills/literature/fulltext/dataverse-api/SKILL.md +215 -0
  241. package/skills/literature/fulltext/hal-archive-api/SKILL.md +218 -0
  242. package/skills/literature/fulltext/institutional-repository-guide/SKILL.md +212 -0
  243. package/skills/literature/fulltext/open-access-mining-guide/SKILL.md +341 -0
  244. package/skills/literature/fulltext/osf-api/SKILL.md +212 -0
  245. package/skills/literature/fulltext/pmc-ftp-bulk-download/SKILL.md +182 -0
  246. package/skills/literature/fulltext/zotero-ai-butler-guide/SKILL.md +166 -0
  247. package/skills/literature/fulltext/zotero-scihub-guide/SKILL.md +168 -0
  248. package/skills/literature/metadata/academic-paper-summarizer/SKILL.md +101 -0
  249. package/skills/literature/metadata/bibliometrix-guide/SKILL.md +164 -0
  250. package/skills/literature/metadata/crossref-event-data-api/SKILL.md +183 -0
  251. package/skills/literature/metadata/doi-content-negotiation/SKILL.md +202 -0
  252. package/skills/literature/metadata/orkg-api/SKILL.md +153 -0
  253. package/skills/literature/metadata/plumx-metrics-api/SKILL.md +188 -0
  254. package/skills/literature/metadata/ror-organization-api/SKILL.md +208 -0
  255. package/skills/literature/metadata/sophosia-reference-guide/SKILL.md +110 -0
  256. package/skills/literature/metadata/viaf-authority-api/SKILL.md +209 -0
  257. package/skills/literature/metadata/wikidata-api-guide/SKILL.md +156 -0
  258. package/skills/literature/metadata/zoplicate-dedup-guide/SKILL.md +147 -0
  259. package/skills/literature/metadata/zotero-actions-tags-guide/SKILL.md +212 -0
  260. package/skills/literature/metadata/zotmoov-guide/SKILL.md +120 -0
  261. package/skills/literature/metadata/zutilo-guide/SKILL.md +140 -0
  262. package/skills/literature/search/arxiv-batch-reporting/SKILL.md +133 -0
  263. package/skills/literature/search/arxiv-cli-tools/SKILL.md +172 -0
  264. package/skills/literature/search/arxiv-osiris/SKILL.md +199 -0
  265. package/skills/literature/search/arxiv-paper-processor/SKILL.md +141 -0
  266. package/skills/literature/search/baidu-scholar-guide/SKILL.md +110 -0
  267. package/skills/literature/search/base-academic-search/SKILL.md +196 -0
  268. package/skills/literature/search/chatpaper-guide/SKILL.md +122 -0
  269. package/skills/literature/search/citeseerx-api/SKILL.md +183 -0
  270. package/skills/literature/search/deep-literature-search/SKILL.md +149 -0
  271. package/skills/literature/search/deepgit-search-guide/SKILL.md +147 -0
  272. package/skills/literature/search/eric-education-api/SKILL.md +199 -0
  273. package/skills/literature/search/findpapers-guide/SKILL.md +177 -0
  274. package/skills/literature/search/ieee-xplore-api/SKILL.md +177 -0
  275. package/skills/literature/search/lens-scholarly-api/SKILL.md +211 -0
  276. package/skills/literature/search/multi-database-literature-search/SKILL.md +198 -0
  277. package/skills/literature/search/open-library-api/SKILL.md +196 -0
  278. package/skills/literature/search/open-semantic-search-guide/SKILL.md +190 -0
  279. package/skills/literature/search/openaire-api/SKILL.md +141 -0
  280. package/skills/literature/search/paper-search-mcp-guide/SKILL.md +107 -0
  281. package/skills/literature/search/papers-chat-guide/SKILL.md +194 -0
  282. package/skills/literature/search/pasa-paper-search-guide/SKILL.md +138 -0
  283. package/skills/literature/search/plos-open-access-api/SKILL.md +203 -0
  284. package/skills/literature/search/scielo-api/SKILL.md +182 -0
  285. package/skills/literature/search/share-research-api/SKILL.md +129 -0
  286. package/skills/literature/search/worldcat-search-api/SKILL.md +224 -0
  287. package/skills/research/automation/ai-scientist-v2-guide/SKILL.md +284 -0
  288. package/skills/research/automation/aim-experiment-guide/SKILL.md +234 -0
  289. package/skills/research/automation/claude-academic-workflow-guide/SKILL.md +202 -0
  290. package/skills/research/automation/coexist-ai-guide/SKILL.md +149 -0
  291. package/skills/research/automation/datagen-research-guide/SKILL.md +131 -0
  292. package/skills/research/automation/foam-agent-guide/SKILL.md +203 -0
  293. package/skills/research/automation/kedro-pipeline-guide/SKILL.md +216 -0
  294. package/skills/research/automation/mle-agent-guide/SKILL.md +139 -0
  295. package/skills/research/automation/paper-to-agent-guide/SKILL.md +116 -0
  296. package/skills/research/automation/rd-agent-guide/SKILL.md +246 -0
  297. package/skills/research/automation/research-paper-orchestrator/SKILL.md +254 -0
  298. package/skills/research/deep-research/academic-deep-research/SKILL.md +190 -0
  299. package/skills/research/deep-research/auto-deep-research-guide/SKILL.md +141 -0
  300. package/skills/research/deep-research/cognitive-kernel-guide/SKILL.md +200 -0
  301. package/skills/research/deep-research/corvus-research-guide/SKILL.md +132 -0
  302. package/skills/research/deep-research/deep-research-pro/SKILL.md +213 -0
  303. package/skills/research/deep-research/deep-research-work/SKILL.md +204 -0
  304. package/skills/research/deep-research/deep-searcher-guide/SKILL.md +253 -0
  305. package/skills/research/deep-research/gpt-researcher-guide/SKILL.md +191 -0
  306. package/skills/research/deep-research/in-depth-research-guide/SKILL.md +205 -0
  307. package/skills/research/deep-research/khoj-research-guide/SKILL.md +200 -0
  308. package/skills/research/deep-research/kosmos-scientist-guide/SKILL.md +185 -0
  309. package/skills/research/deep-research/llm-scientific-discovery-guide/SKILL.md +178 -0
  310. package/skills/research/deep-research/local-deep-research-guide/SKILL.md +253 -0
  311. package/skills/research/deep-research/open-researcher-guide/SKILL.md +138 -0
  312. package/skills/research/deep-research/tongyi-deep-research-guide/SKILL.md +217 -0
  313. package/skills/research/funding/eu-horizon-guide/SKILL.md +244 -0
  314. package/skills/research/funding/grant-budget-guide/SKILL.md +284 -0
  315. package/skills/research/funding/nih-reporter-api-guide/SKILL.md +166 -0
  316. package/skills/research/funding/nsf-award-api-guide/SKILL.md +133 -0
  317. package/skills/research/methodology/academic-mentor-guide/SKILL.md +169 -0
  318. package/skills/research/methodology/claude-scientific-guide/SKILL.md +122 -0
  319. package/skills/research/methodology/deep-innovator-guide/SKILL.md +242 -0
  320. package/skills/research/methodology/osf-api-guide/SKILL.md +165 -0
  321. package/skills/research/methodology/parsifal-slr-guide/SKILL.md +154 -0
  322. package/skills/research/methodology/research-paper-kb/SKILL.md +263 -0
  323. package/skills/research/methodology/research-pipeline-units-guide/SKILL.md +169 -0
  324. package/skills/research/methodology/research-town-guide/SKILL.md +263 -0
  325. package/skills/research/methodology/slr-automation-guide/SKILL.md +235 -0
  326. package/skills/research/paper-review/automated-review-guide/SKILL.md +281 -0
  327. package/skills/research/paper-review/latte-review-guide/SKILL.md +175 -0
  328. package/skills/research/paper-review/paper-compare-guide/SKILL.md +238 -0
  329. package/skills/research/paper-review/paper-critique-framework/SKILL.md +181 -0
  330. package/skills/research/paper-review/paper-digest-guide/SKILL.md +240 -0
  331. package/skills/research/paper-review/paper-research-assistant/SKILL.md +231 -0
  332. package/skills/research/paper-review/research-quality-filter/SKILL.md +261 -0
  333. package/skills/research/paper-review/review-response-guide/SKILL.md +275 -0
  334. package/skills/tools/code-exec/contextplus-mcp-guide/SKILL.md +110 -0
  335. package/skills/tools/code-exec/google-colab-guide/SKILL.md +276 -0
  336. package/skills/tools/code-exec/kaggle-api-guide/SKILL.md +216 -0
  337. package/skills/tools/code-exec/overleaf-cli-guide/SKILL.md +279 -0
  338. package/skills/tools/diagram/clawphd-guide/SKILL.md +149 -0
  339. package/skills/tools/diagram/code-flow-visualizer/SKILL.md +197 -0
  340. package/skills/tools/diagram/excalidraw-diagram-guide/SKILL.md +170 -0
  341. package/skills/tools/diagram/json-data-visualizer/SKILL.md +270 -0
  342. package/skills/tools/diagram/kroki-diagram-api/SKILL.md +198 -0
  343. package/skills/tools/diagram/mermaid-architect-guide/SKILL.md +219 -0
  344. package/skills/tools/diagram/scientific-graphical-abstract/SKILL.md +201 -0
  345. package/skills/tools/diagram/tldraw-whiteboard-guide/SKILL.md +397 -0
  346. package/skills/tools/document/docsgpt-guide/SKILL.md +130 -0
  347. package/skills/tools/document/large-document-reader/SKILL.md +202 -0
  348. package/skills/tools/document/md2pdf-xelatex/SKILL.md +212 -0
  349. package/skills/tools/document/openpaper-guide/SKILL.md +232 -0
  350. package/skills/tools/document/paper-parse-guide/SKILL.md +243 -0
  351. package/skills/tools/document/weknora-guide/SKILL.md +216 -0
  352. package/skills/tools/document/zotero-addon-market-guide/SKILL.md +108 -0
  353. package/skills/tools/document/zotero-night-theme-guide/SKILL.md +142 -0
  354. package/skills/tools/document/zotero-style-guide/SKILL.md +217 -0
  355. package/skills/tools/knowledge-graph/citation-network-builder/SKILL.md +244 -0
  356. package/skills/tools/knowledge-graph/concept-map-generator/SKILL.md +284 -0
  357. package/skills/tools/knowledge-graph/graphiti-guide/SKILL.md +219 -0
  358. package/skills/tools/knowledge-graph/mimir-memory-guide/SKILL.md +135 -0
  359. package/skills/tools/knowledge-graph/notero-zotero-notion-guide/SKILL.md +187 -0
  360. package/skills/tools/knowledge-graph/open-webui-tools-guide/SKILL.md +156 -0
  361. package/skills/tools/knowledge-graph/openspg-guide/SKILL.md +210 -0
  362. package/skills/tools/knowledge-graph/paperpile-notion-guide/SKILL.md +84 -0
  363. package/skills/tools/knowledge-graph/zotero-markdb-connect-guide/SKILL.md +162 -0
  364. package/skills/tools/ocr-translate/latex-translation-guide/SKILL.md +176 -0
  365. package/skills/tools/ocr-translate/math-equation-renderer/SKILL.md +198 -0
  366. package/skills/tools/ocr-translate/pdf-math-translate-guide/SKILL.md +141 -0
  367. package/skills/tools/ocr-translate/zotero-pdf-translate-guide/SKILL.md +95 -0
  368. package/skills/tools/ocr-translate/zotero-pdf2zh-guide/SKILL.md +143 -0
  369. package/skills/tools/scraping/dataset-finder-guide/SKILL.md +253 -0
  370. package/skills/tools/scraping/easy-spider-guide/SKILL.md +250 -0
  371. package/skills/tools/scraping/google-scholar-scraper/SKILL.md +255 -0
  372. package/skills/tools/scraping/repository-harvesting-guide/SKILL.md +310 -0
  373. package/skills/writing/citation/academic-citation-manager/SKILL.md +314 -0
  374. package/skills/writing/citation/academic-citation-manager-guide/SKILL.md +182 -0
  375. package/skills/writing/citation/citation-assistant-skill/SKILL.md +192 -0
  376. package/skills/writing/citation/jabref-reference-guide/SKILL.md +127 -0
  377. package/skills/writing/citation/jasminum-zotero-guide/SKILL.md +103 -0
  378. package/skills/writing/citation/mendeley-api/SKILL.md +231 -0
  379. package/skills/writing/citation/obsidian-citation-guide/SKILL.md +164 -0
  380. package/skills/writing/citation/obsidian-zotero-guide/SKILL.md +137 -0
  381. package/skills/writing/citation/onecite-reference-guide/SKILL.md +168 -0
  382. package/skills/writing/citation/papersgpt-zotero-guide/SKILL.md +132 -0
  383. package/skills/writing/citation/papis-cli-guide/SKILL.md +213 -0
  384. package/skills/writing/citation/zotero-better-bibtex-guide/SKILL.md +107 -0
  385. package/skills/writing/citation/zotero-better-notes-guide/SKILL.md +121 -0
  386. package/skills/writing/citation/zotero-gpt-guide/SKILL.md +111 -0
  387. package/skills/writing/citation/zotero-mcp-guide/SKILL.md +164 -0
  388. package/skills/writing/citation/zotero-mdnotes-guide/SKILL.md +162 -0
  389. package/skills/writing/citation/zotero-reference-guide/SKILL.md +139 -0
  390. package/skills/writing/citation/zotero-scholar-guide/SKILL.md +294 -0
  391. package/skills/writing/citation/zotfile-attachment-guide/SKILL.md +140 -0
  392. package/skills/writing/composition/ml-paper-writing/SKILL.md +163 -0
  393. package/skills/writing/composition/opendraft-thesis-guide/SKILL.md +200 -0
  394. package/skills/writing/composition/paper-debugger-guide/SKILL.md +143 -0
  395. package/skills/writing/composition/paperforge-guide/SKILL.md +205 -0
  396. package/skills/writing/composition/research-paper-writer/SKILL.md +226 -0
  397. package/skills/writing/composition/scientific-writing-resources/SKILL.md +151 -0
  398. package/skills/writing/composition/scientific-writing-wrapper/SKILL.md +153 -0
  399. package/skills/writing/latex/academic-writing-latex/SKILL.md +285 -0
  400. package/skills/writing/latex/latex-drawing-collection/SKILL.md +154 -0
  401. package/skills/writing/latex/latex-templates-collection/SKILL.md +159 -0
  402. package/skills/writing/latex/md-to-pdf-academic/SKILL.md +230 -0
  403. package/skills/writing/latex/tex-render-guide/SKILL.md +243 -0
  404. package/skills/writing/polish/academic-tone-guide/SKILL.md +209 -0
  405. package/skills/writing/polish/chinese-text-humanizer/SKILL.md +140 -0
  406. package/skills/writing/polish/conciseness-editing-guide/SKILL.md +225 -0
  407. package/skills/writing/polish/paper-polish-guide/SKILL.md +160 -0
  408. package/skills/writing/templates/arxiv-preprint-template/SKILL.md +184 -0
  409. package/skills/writing/templates/elegant-paper-template/SKILL.md +141 -0
  410. package/skills/writing/templates/graphical-abstract-guide/SKILL.md +183 -0
  411. package/skills/writing/templates/novathesis-guide/SKILL.md +152 -0
  412. package/skills/writing/templates/scientific-article-pdf/SKILL.md +261 -0
  413. package/skills/writing/templates/sjtuthesis-guide/SKILL.md +197 -0
  414. package/skills/writing/templates/thuthesis-guide/SKILL.md +181 -0
  415. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +0 -207
@@ -0,0 +1,95 @@
1
+ ---
2
+ name: zotero-pdf-translate-guide
3
+ description: "Guide to Zotero PDF Translate for multilingual PDF and annotation translation"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "🌐"
7
+ category: "tools"
8
+ subcategory: "ocr-translate"
9
+ keywords: ["zotero", "pdf-translate", "multilingual", "annotation", "academic-translation"]
10
+ source: "https://github.com/windingwind/zotero-pdf-translate"
11
+ ---
12
+
13
+ # Zotero PDF Translate Guide
14
+
15
+ ## Overview
16
+
17
+ Zotero PDF Translate is one of the most popular Zotero plugins with over 10,000 stars on GitHub, providing seamless translation capabilities directly within the Zotero PDF reader. It enables researchers to translate selected text, annotations, metadata, and even entire pages without leaving their reference management workflow.
18
+
19
+ The plugin supports a wide range of translation engines including Google Translate, DeepL, Microsoft Translator, OpenAI, and numerous other services. This flexibility allows researchers to choose the engine that best suits their language pair, domain, and accuracy requirements. For academic work involving technical terminology, the ability to switch between engines or use specialized services is invaluable.
20
+
21
+ Zotero PDF Translate goes beyond simple text translation. It handles EPub documents, web pages saved to Zotero, item metadata fields, and annotation notes. This makes it an essential tool for any researcher who regularly reads papers in languages other than their primary working language, or who collaborates across linguistic boundaries.
22
+
23
+ ## Installation and Setup
24
+
25
+ Install Zotero PDF Translate through the Zotero Add-ons Manager:
26
+
27
+ 1. Download the latest `.xpi` file from the GitHub releases page at https://github.com/windingwind/zotero-pdf-translate/releases
28
+ 2. In Zotero, navigate to Tools > Add-ons > gear icon > Install Add-on From File
29
+ 3. Select the downloaded `.xpi` file and restart Zotero when prompted
30
+
31
+ After installation, configure your preferred translation engines:
32
+
33
+ - Open Zotero Preferences > PDF Translate
34
+ - Select your primary translation engine from the dropdown
35
+ - For engines requiring API access, set your credentials via environment variables:
36
+ - DeepL: configure `$DEEPL_API_KEY` in your environment
37
+ - OpenAI: configure `$OPENAI_API_KEY` in your environment
38
+ - Microsoft: configure `$AZURE_TRANSLATOR_KEY` in your environment
39
+ - Set your source and target languages
40
+ - Configure auto-translate behavior (on selection, on annotation, etc.)
41
+
42
+ For best results with academic content, consider using DeepL or OpenAI-based translation, as these engines generally handle technical vocabulary and complex sentence structures better than basic machine translation services.
43
+
44
+ ## Core Features
45
+
46
+ **In-Reader Translation**: Select any text in the Zotero PDF reader and the translation appears instantly in a sidebar panel. The original and translated text are displayed side by side, making it easy to verify accuracy and understand context.
47
+
48
+ **Annotation Translation**: When you highlight text and create annotations, Zotero PDF Translate can automatically translate the annotation content. This is particularly useful when building a collection of notes from foreign-language papers, as your annotation library remains in your working language.
49
+
50
+ **Metadata Translation**: Translate item titles, abstracts, and other metadata fields directly within your Zotero library. This helps when organizing a large collection that includes papers from multiple languages, making search and browsing more efficient.
51
+
52
+ **Multiple Engine Support**: The plugin supports over 15 translation engines:
53
+ - Free engines: Google Translate, Bing, Yandex, LibreTranslate
54
+ - Premium engines: DeepL, OpenAI GPT, Azure Translator
55
+ - Chinese-specialized: Baidu, Tencent, Youdao, Caiyun
56
+ - Academic: CNKI translation services
57
+
58
+ **Batch Translation**: Translate multiple items or annotations at once rather than processing them one by one. This is essential when processing a large reading list of foreign-language papers.
59
+
60
+ **Custom Dictionaries**: Define custom term mappings to ensure domain-specific terminology is translated consistently. For example, you can map technical terms in your field to their correct translations rather than relying on generic machine translation.
61
+
62
+ ## Academic Workflow Integration
63
+
64
+ A typical multilingual research workflow with Zotero PDF Translate involves several stages:
65
+
66
+ **Literature Discovery**: When you encounter papers in unfamiliar languages through citation tracking or database searches, add them to Zotero normally. Use metadata translation to translate titles and abstracts, helping you triage which papers deserve full reading.
67
+
68
+ **Deep Reading**: Open a foreign-language PDF in the Zotero reader. As you read, select passages for instant translation. For particularly important sections, create annotations that automatically include both the original text and translation.
69
+
70
+ **Note Compilation**: After reading, your Zotero annotations contain bilingual notes. Export these to your note-taking system with both original quotes and translations preserved, ensuring you can always trace back to the source text.
71
+
72
+ **Collaborative Research**: When working with international collaborators, use batch translation to prepare shared reading lists with translated metadata, making it easier for team members to navigate references outside their language expertise.
73
+
74
+ **Configuration Tips for Researchers**:
75
+ - Set keyboard shortcuts for quick translation toggling
76
+ - Enable auto-translate on text selection for fluid reading
77
+ - Configure fallback engines in case the primary service is unavailable
78
+ - Use custom dictionaries for your specific research domain terminology
79
+
80
+ ## Troubleshooting Common Issues
81
+
82
+ **Translation Not Appearing**: Ensure the plugin is enabled in Tools > Add-ons. Check that your selected translation engine is properly configured and that any required API keys are set in environment variables.
83
+
84
+ **Slow Translation Speed**: Some engines have rate limits. If translation is slow, try switching to a different engine or enabling caching in the plugin settings to avoid re-translating previously seen text.
85
+
86
+ **Incorrect Technical Terms**: Academic translation often struggles with domain-specific vocabulary. Use the custom dictionary feature to define correct translations for key terms in your field. This builds up over time into a valuable domain-specific translation resource.
87
+
88
+ **Character Encoding Issues**: Some older PDFs may have character encoding problems that affect text extraction and translation. Try re-saving the PDF or using Zotero's built-in PDF processing to improve text extraction quality.
89
+
90
+ ## References
91
+
92
+ - GitHub Repository: https://github.com/windingwind/zotero-pdf-translate
93
+ - Zotero Plugin Directory: https://www.zotero.org/support/plugins
94
+ - Zotero Forums Translation Discussion: https://forums.zotero.org
95
+ - DeepL API Documentation: https://www.deepl.com/docs-api
@@ -0,0 +1,143 @@
1
+ ---
2
+ name: zotero-pdf2zh-guide
3
+ description: "PDF Chinese translation plugin for Zotero reference manager"
4
+ version: 1.0.0
5
+ author: wentor-community
6
+ source: https://github.com/guaguastandup/zotero-pdf2zh
7
+ metadata:
8
+ openclaw:
9
+ category: "tools"
10
+ subcategory: "ocr-translate"
11
+ keywords:
12
+ - pdf-translation
13
+ - zotero-plugin
14
+ - chinese-translation
15
+ - ocr
16
+ - academic-papers
17
+ - bilingual-reading
18
+ ---
19
+
20
+ # Zotero PDF2ZH Guide
21
+
22
+ A skill for using the Zotero PDF2ZH plugin to translate academic PDF documents between Chinese and English while preserving the original layout, figures, and mathematical notation. Based on zotero-pdf2zh (3K stars), this skill enables researchers to bridge the language barrier in academic literature.
23
+
24
+ ## Overview
25
+
26
+ Academic research is inherently global, yet language barriers remain a significant obstacle. Chinese-speaking researchers need to read English-language papers, and English-speaking researchers increasingly need access to Chinese-language publications, particularly in fields where China produces significant research output (materials science, AI, engineering, traditional medicine). PDF2ZH addresses this by providing high-quality translation directly within the Zotero reference management workflow, producing bilingual documents that preserve the original formatting.
27
+
28
+ The plugin supports bidirectional translation between Chinese and English, leveraging modern neural machine translation while maintaining the structural integrity of academic documents including equations, tables, figures, and references.
29
+
30
+ ## Installation and Setup
31
+
32
+ **Prerequisites**
33
+ - Zotero 6 or Zotero 7 installed and configured
34
+ - An active internet connection for translation API access
35
+ - Sufficient storage for translated PDF copies (approximately 2x per document)
36
+
37
+ **Installation Steps**
38
+ - Download the latest .xpi file from the project's releases page
39
+ - In Zotero, navigate to Tools then Add-ons
40
+ - Click the gear icon and select Install Add-on From File
41
+ - Select the downloaded .xpi file and confirm installation
42
+ - Restart Zotero to activate the plugin
43
+ - Configure translation settings in the plugin preferences panel
44
+
45
+ **Configuration Options**
46
+ - Select the default translation direction (EN to ZH or ZH to EN)
47
+ - Choose the translation engine (multiple backends supported)
48
+ - Configure output format (side-by-side bilingual or translated-only)
49
+ - Set quality preferences (speed versus accuracy trade-off)
50
+ - Define custom terminology dictionaries for domain-specific terms
51
+
52
+ ## Translation Workflow
53
+
54
+ **Single Document Translation**
55
+ - Right-click a PDF item in the Zotero library
56
+ - Select the PDF2ZH translation option from the context menu
57
+ - Choose the target language if different from the default
58
+ - The plugin processes the document and creates a translated copy
59
+ - The translated PDF is automatically attached to the same Zotero item
60
+ - Original and translated versions are both accessible from the item entry
61
+
62
+ **Batch Translation**
63
+ - Select multiple items in the Zotero library
64
+ - Apply translation to the entire selection
65
+ - The plugin queues documents and processes them sequentially
66
+ - Progress is displayed in the Zotero status bar
67
+ - Failed translations are flagged for manual review
68
+
69
+ **Translation Quality Features**
70
+ - Mathematical equations are detected and preserved without translation
71
+ - Figures and their captions are handled separately for better accuracy
72
+ - Tables maintain their structure with cell-level translation
73
+ - Reference lists are preserved in their original form
74
+ - Headers, footers, and page numbers are excluded from translation
75
+
76
+ ## Handling Academic Content
77
+
78
+ The plugin includes special handling for academic document elements:
79
+
80
+ **Mathematical Notation**
81
+ - LaTeX-style equations are detected and excluded from translation
82
+ - Inline math symbols and variables are preserved in context
83
+ - Equation numbers and cross-references maintain their original format
84
+ - Greek letters and mathematical operators are not transliterated
85
+
86
+ **Technical Terminology**
87
+ - Domain-specific terms can be added to custom dictionaries
88
+ - The plugin supports glossary files for consistent term translation
89
+ - Ambiguous terms are handled based on the document's detected field
90
+ - Acronyms are expanded on first occurrence in the translation
91
+ - Standard academic phrases have pre-verified translations
92
+
93
+ **Figures and Tables**
94
+ - Figure labels and captions are translated while images are preserved
95
+ - Table headers and cell contents are translated maintaining structure
96
+ - Chart axis labels and legends within embedded images are not modified
97
+ - Cross-references to figures and tables maintain their numbering
98
+
99
+ ## Bilingual Reading Mode
100
+
101
+ The side-by-side bilingual output is particularly useful for research:
102
+
103
+ **Layout Options**
104
+ - Parallel paragraphs: original and translation appear side by side
105
+ - Interleaved paragraphs: translation appears below each original paragraph
106
+ - Margin annotations: brief translations appear in the margin alongside originals
107
+ - Separate documents: full original and full translation as independent PDFs
108
+
109
+ **Reading Strategies**
110
+ - Use bilingual mode to learn technical vocabulary in the target language
111
+ - Reference the original when the translation of a technical passage is unclear
112
+ - Compare sentence structures to understand discipline-specific writing conventions
113
+ - Build personal glossaries from translated terms in your research area
114
+
115
+ ## Domain-Specific Considerations
116
+
117
+ Translation quality varies by academic domain:
118
+
119
+ **High Accuracy Domains** - Computer science, mathematics, physics (standardized terminology)
120
+ **Medium Accuracy Domains** - Biology, chemistry, engineering (mixed standardized and specialized terms)
121
+ **Challenging Domains** - Social sciences, humanities, law (culturally embedded terminology)
122
+ **Specialized Domains** - Traditional Chinese medicine, regional studies (unique conceptual frameworks)
123
+
124
+ For challenging domains, always verify key translated terms against established bilingual dictionaries or domain experts.
125
+
126
+ ## Integration with Research-Claw
127
+
128
+ This skill enhances the Research-Claw multilingual research workflow:
129
+
130
+ - Translate papers discovered through literature search skills
131
+ - Feed translated content to paper analysis skills for knowledge extraction
132
+ - Support bilingual literature reviews spanning Chinese and English sources
133
+ - Connect with citation management skills for multilingual bibliographies
134
+ - Enable cross-language comparison of research findings on the same topic
135
+
136
+ ## Best Practices
137
+
138
+ - Always verify critical translations against the original, especially for methodology details
139
+ - Build and maintain domain-specific glossaries to improve translation consistency
140
+ - Use bilingual mode for important papers to catch translation nuances
141
+ - Batch-translate collection items during off-peak hours to manage API load
142
+ - Keep both original and translated versions in Zotero for reference
143
+ - Report translation errors to improve the tool's accuracy over time
@@ -0,0 +1,253 @@
1
+ ---
2
+ name: dataset-finder-guide
3
+ description: "Search and download research datasets from Kaggle, HuggingFace, and repos"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "database"
7
+ category: "tools"
8
+ subcategory: "scraping"
9
+ keywords: ["dataset", "Kaggle", "data download", "HuggingFace", "data repository", "open data"]
10
+ source: "https://github.com/wentor-ai/research-plugins"
11
+ ---
12
+
13
+ # Dataset Finder Guide
14
+
15
+ Search, evaluate, and download research datasets from major repositories including Kaggle, Hugging Face, Google Dataset Search, Zenodo, UCI Machine Learning Repository, and domain-specific archives. This skill helps researchers locate the right data for their experiments efficiently.
16
+
17
+ ## Overview
18
+
19
+ Finding suitable datasets is often one of the most time-consuming phases of empirical research. Datasets are scattered across dozens of platforms, each with different APIs, licensing terms, download mechanisms, and metadata standards. A single research project might require datasets from Kaggle for benchmarking, Hugging Face for NLP tasks, Zenodo for supplementary materials from published papers, and government open data portals for demographic or economic variables.
20
+
21
+ This skill provides a unified approach to dataset discovery: formulating search queries, evaluating dataset quality and suitability, understanding licensing implications, and efficiently downloading and organizing data. It covers both general-purpose repositories and domain-specific archives that researchers in various fields need.
22
+
23
+ The emphasis is on reproducibility -- every dataset used in research should be citable, versioned, and documented. This skill includes patterns for recording dataset provenance, creating data cards, and managing dataset versions across experiments.
24
+
25
+ ## Dataset Repositories
26
+
27
+ ### General-Purpose Repositories
28
+
29
+ | Repository | Strengths | API | Citation Support |
30
+ |------------|-----------|-----|-----------------|
31
+ | Kaggle | ML benchmarks, competitions, community kernels | REST + CLI | DOI via dataset cards |
32
+ | Hugging Face Datasets | NLP, CV, audio; streaming support | Python library | Built-in citation |
33
+ | Zenodo | Any research data, DOI minting, EU-funded | REST API | Automatic DOI |
34
+ | Google Dataset Search | Meta-search across repositories | Web only | Links to source |
35
+ | UCI ML Repository | Classic ML benchmarks | Direct download | BibTeX provided |
36
+ | Figshare | Figures, datasets, media, preprints | REST API | DOI per item |
37
+ | Dryad | Ecology, biology, environmental science | REST API | DOI per dataset |
38
+ | ICPSR | Social science survey data | Restricted API | Persistent IDs |
39
+ | Harvard Dataverse | Multi-discipline, institutional | REST API | DOI per dataset |
40
+
41
+ ### Domain-Specific Archives
42
+
43
+ | Domain | Repository | Notable Datasets |
44
+ |--------|-----------|-----------------|
45
+ | Genomics | NCBI GEO, ENA | Gene expression, sequencing data |
46
+ | Astronomy | NASA archives, SDSS | Sky surveys, spectral data |
47
+ | Economics | FRED, World Bank, IMF | Time series, macro indicators |
48
+ | Climate | NOAA, CMIP6 | Temperature, precipitation records |
49
+ | Linguistics | LDC, CLARIN | Corpora, treebanks |
50
+ | Medical | PhysioNet, MIMIC | Clinical records, ECG/EEG |
51
+ | Chemistry | PubChem, ChEMBL | Molecular structures, bioassays |
52
+
53
+ ## Searching for Datasets
54
+
55
+ ### Kaggle CLI
56
+
57
+ ```bash
58
+ # Install and configure
59
+ pip install kaggle
60
+ # Place kaggle.json in ~/.kaggle/
61
+
62
+ # Search datasets
63
+ kaggle datasets list -s "sentiment analysis" --sort-by votes
64
+ kaggle datasets list -s "medical imaging" --file-type csv --min-size 100MB
65
+
66
+ # Get dataset details
67
+ kaggle datasets metadata -d stanford/imdb-review-dataset
68
+
69
+ # Download dataset
70
+ kaggle datasets download -d stanford/imdb-review-dataset -p ./data/
71
+ unzip ./data/imdb-review-dataset.zip -d ./data/imdb/
72
+
73
+ # Download competition data
74
+ kaggle competitions download -c titanic -p ./data/
75
+ ```
76
+
77
+ ### Hugging Face Datasets
78
+
79
+ ```python
80
+ from datasets import load_dataset, list_datasets
81
+
82
+ # Search for datasets by task
83
+ from huggingface_hub import HfApi
84
+ api = HfApi()
85
+ datasets = api.list_datasets(
86
+ search="scientific papers",
87
+ sort="downloads",
88
+ direction=-1,
89
+ limit=20
90
+ )
91
+ for ds in datasets:
92
+ print(f"{ds.id}: {ds.downloads} downloads")
93
+
94
+ # Load a dataset (with streaming for large datasets)
95
+ dataset = load_dataset("scientific_papers", "arxiv", streaming=True)
96
+
97
+ # Inspect structure
98
+ print(dataset["train"].features)
99
+ print(f"Number of examples: {dataset['train'].num_rows}")
100
+
101
+ # Load specific split and subset
102
+ validation = load_dataset(
103
+ "scientific_papers", "arxiv",
104
+ split="validation[:1000]"
105
+ )
106
+ ```
107
+
108
+ ### Google Dataset Search (Programmatic)
109
+
110
+ ```python
111
+ import requests
112
+ from bs4 import BeautifulSoup
113
+
114
+ def search_google_datasets(query, num_results=10):
115
+ """Search Google Dataset Search and extract results."""
116
+ url = f"https://datasetsearch.research.google.com/search"
117
+ params = {"query": query, "docid": ""}
118
+ # Note: Google Dataset Search does not have an official API
119
+ # Use the web interface or alternative approaches
120
+ print(f"Search at: {url}?query={query.replace(' ', '+')}")
121
+ return url
122
+ ```
123
+
124
+ ### Zenodo API
125
+
126
+ ```python
127
+ import requests
128
+
129
+ def search_zenodo(query, resource_type="dataset", size=10):
130
+ """Search Zenodo for research datasets."""
131
+ url = "https://zenodo.org/api/records"
132
+ params = {
133
+ "q": query,
134
+ "type": resource_type,
135
+ "size": size,
136
+ "sort": "mostrecent",
137
+ "access_right": "open"
138
+ }
139
+ response = requests.get(url, params=params)
140
+ results = response.json()
141
+
142
+ for hit in results.get("hits", {}).get("hits", []):
143
+ meta = hit["metadata"]
144
+ print(f"Title: {meta['title']}")
145
+ print(f"DOI: {meta.get('doi', 'N/A')}")
146
+ print(f"License: {meta.get('license', {}).get('id', 'N/A')}")
147
+ print(f"Size: {sum(f['size'] for f in hit.get('files', []))/1e6:.1f} MB")
148
+ print("---")
149
+
150
+ return results
151
+ ```
152
+
153
+ ## Dataset Evaluation Checklist
154
+
155
+ Before using a dataset in research, verify the following:
156
+
157
+ ### Quality Assessment
158
+
159
+ - **Completeness**: What percentage of values are missing? Are missing values random or systematic?
160
+ - **Accuracy**: Are values within expected ranges? Are there obvious errors?
161
+ - **Consistency**: Are formats uniform (dates, categories, units)?
162
+ - **Timeliness**: When was the data collected? Is it current enough for your research question?
163
+ - **Sample size**: Is the dataset large enough for your intended analysis?
164
+
165
+ ### Licensing and Ethics
166
+
167
+ | License | Commercial Use | Modification | Attribution Required |
168
+ |---------|---------------|-------------|---------------------|
169
+ | CC0 | Yes | Yes | No |
170
+ | CC-BY 4.0 | Yes | Yes | Yes |
171
+ | CC-BY-SA 4.0 | Yes | Yes (share-alike) | Yes |
172
+ | CC-BY-NC 4.0 | No | Yes | Yes |
173
+ | ODC-ODbL | Yes | Yes (share-alike) | Yes |
174
+ | Custom/Restricted | Varies | Varies | Varies |
175
+
176
+ ### Reproducibility Documentation
177
+
178
+ ```markdown
179
+ ## Data Card
180
+
181
+ **Dataset**: [Name]
182
+ **Source**: [URL]
183
+ **Version**: [Version/Date]
184
+ **DOI**: [DOI if available]
185
+ **License**: [License name]
186
+ **Downloaded**: [YYYY-MM-DD]
187
+ **Size**: [X rows, Y columns, Z MB]
188
+ **Description**: [Brief description]
189
+ **Preprocessing**: [Steps applied before use]
190
+ **Citation**: [BibTeX entry]
191
+ ```
192
+
193
+ ## Download and Organization
194
+
195
+ ### Project Data Structure
196
+
197
+ ```
198
+ project/
199
+ data/
200
+ raw/ # Original downloaded data (never modify)
201
+ dataset_v1.csv
202
+ README.md # Data card with provenance
203
+ processed/ # Cleaned and transformed data
204
+ train.csv
205
+ test.csv
206
+ external/ # Third-party reference data
207
+ scripts/
208
+ download_data.py # Reproducible download script
209
+ preprocess.py # Data cleaning pipeline
210
+ ```
211
+
212
+ ### Reproducible Download Script
213
+
214
+ ```python
215
+ """download_data.py - Reproducible dataset download."""
216
+ import hashlib
217
+ from pathlib import Path
218
+ import requests
219
+
220
+ DATASETS = {
221
+ "main_dataset": {
222
+ "url": "https://zenodo.org/record/12345/files/data.csv",
223
+ "sha256": "abc123...",
224
+ "filename": "raw/main_dataset.csv"
225
+ }
226
+ }
227
+
228
+ DATA_DIR = Path("data")
229
+
230
+ for name, info in DATASETS.items():
231
+ path = DATA_DIR / info["filename"]
232
+ if path.exists():
233
+ print(f"Already downloaded: {name}")
234
+ continue
235
+
236
+ path.parent.mkdir(parents=True, exist_ok=True)
237
+ print(f"Downloading {name}...")
238
+ response = requests.get(info["url"])
239
+ path.write_bytes(response.content)
240
+
241
+ # Verify integrity
242
+ sha256 = hashlib.sha256(response.content).hexdigest()
243
+ assert sha256 == info["sha256"], f"Checksum mismatch for {name}"
244
+ print(f"Verified: {name}")
245
+ ```
246
+
247
+ ## References
248
+
249
+ - Kaggle API documentation: https://github.com/Kaggle/kaggle-api
250
+ - Hugging Face Datasets: https://huggingface.co/docs/datasets
251
+ - Zenodo API: https://developers.zenodo.org
252
+ - Google Dataset Search: https://datasetsearch.research.google.com
253
+ - Gebru et al., "Datasheets for Datasets" (2021): https://arxiv.org/abs/1803.09010