@wentorai/research-plugins 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. package/README.md +22 -22
  2. package/curated/analysis/README.md +82 -56
  3. package/curated/domains/README.md +225 -69
  4. package/curated/literature/README.md +115 -46
  5. package/curated/research/README.md +106 -58
  6. package/curated/tools/README.md +107 -87
  7. package/curated/writing/README.md +92 -45
  8. package/mcp-configs/academic-db/alphafold-mcp.json +20 -0
  9. package/mcp-configs/academic-db/brightspace-mcp.json +21 -0
  10. package/mcp-configs/academic-db/climatiq-mcp.json +20 -0
  11. package/mcp-configs/academic-db/gibs-mcp.json +20 -0
  12. package/mcp-configs/academic-db/gis-mcp-server.json +22 -0
  13. package/mcp-configs/academic-db/google-earth-engine-mcp.json +21 -0
  14. package/mcp-configs/academic-db/m4-clinical-mcp.json +21 -0
  15. package/mcp-configs/academic-db/medical-mcp.json +21 -0
  16. package/mcp-configs/academic-db/nexonco-mcp.json +20 -0
  17. package/mcp-configs/academic-db/omop-mcp.json +20 -0
  18. package/mcp-configs/academic-db/onekgpd-mcp.json +20 -0
  19. package/mcp-configs/academic-db/openedu-mcp.json +20 -0
  20. package/mcp-configs/academic-db/opengenes-mcp.json +20 -0
  21. package/mcp-configs/academic-db/openstax-mcp.json +21 -0
  22. package/mcp-configs/academic-db/openstreetmap-mcp.json +21 -0
  23. package/mcp-configs/academic-db/opentargets-mcp.json +21 -0
  24. package/mcp-configs/academic-db/pdb-mcp.json +21 -0
  25. package/mcp-configs/academic-db/smithsonian-mcp.json +20 -0
  26. package/mcp-configs/ai-platform/magi-researchers.json +21 -0
  27. package/mcp-configs/ai-platform/mcp-academic-researcher.json +22 -0
  28. package/mcp-configs/ai-platform/open-paper-machine.json +21 -0
  29. package/mcp-configs/ai-platform/paper-intelligence.json +21 -0
  30. package/mcp-configs/ai-platform/paper-reader.json +21 -0
  31. package/mcp-configs/ai-platform/paperdebugger.json +21 -0
  32. package/mcp-configs/browser/exa-mcp.json +20 -0
  33. package/mcp-configs/browser/mcp-searxng.json +21 -0
  34. package/mcp-configs/browser/mcp-webresearch.json +20 -0
  35. package/mcp-configs/cloud-docs/confluence-mcp.json +37 -0
  36. package/mcp-configs/cloud-docs/google-drive-mcp.json +35 -0
  37. package/mcp-configs/cloud-docs/notion-mcp.json +29 -0
  38. package/mcp-configs/communication/discord-mcp.json +29 -0
  39. package/mcp-configs/communication/discourse-mcp.json +21 -0
  40. package/mcp-configs/communication/slack-mcp.json +29 -0
  41. package/mcp-configs/communication/telegram-mcp.json +28 -0
  42. package/mcp-configs/data-platform/automl-stat-mcp.json +21 -0
  43. package/mcp-configs/data-platform/jefferson-stats-mcp.json +22 -0
  44. package/mcp-configs/data-platform/mcp-excel-server.json +21 -0
  45. package/mcp-configs/data-platform/mcp-stata.json +21 -0
  46. package/mcp-configs/data-platform/mcpstack-jupyter.json +21 -0
  47. package/mcp-configs/data-platform/ml-mcp.json +21 -0
  48. package/mcp-configs/data-platform/nasdaq-data-link-mcp.json +20 -0
  49. package/mcp-configs/data-platform/numpy-mcp.json +21 -0
  50. package/mcp-configs/database/neo4j-mcp.json +37 -0
  51. package/mcp-configs/database/postgres-mcp.json +28 -0
  52. package/mcp-configs/database/sqlite-mcp.json +29 -0
  53. package/mcp-configs/dev-platform/geogebra-mcp.json +21 -0
  54. package/mcp-configs/dev-platform/github-mcp.json +31 -0
  55. package/mcp-configs/dev-platform/gitlab-mcp.json +34 -0
  56. package/mcp-configs/dev-platform/latex-mcp-server.json +21 -0
  57. package/mcp-configs/dev-platform/manim-mcp.json +20 -0
  58. package/mcp-configs/dev-platform/mcp-echarts.json +20 -0
  59. package/mcp-configs/dev-platform/panel-viz-mcp.json +20 -0
  60. package/mcp-configs/dev-platform/paperbanana.json +20 -0
  61. package/mcp-configs/dev-platform/texflow-mcp.json +20 -0
  62. package/mcp-configs/dev-platform/texmcp.json +20 -0
  63. package/mcp-configs/dev-platform/typst-mcp.json +21 -0
  64. package/mcp-configs/dev-platform/vizro-mcp.json +20 -0
  65. package/mcp-configs/email/email-mcp.json +40 -0
  66. package/mcp-configs/email/gmail-mcp.json +37 -0
  67. package/mcp-configs/note-knowledge/local-faiss-mcp.json +21 -0
  68. package/mcp-configs/note-knowledge/mcp-memory-service.json +21 -0
  69. package/mcp-configs/note-knowledge/mcp-obsidian.json +23 -0
  70. package/mcp-configs/note-knowledge/mcp-ragdocs.json +20 -0
  71. package/mcp-configs/note-knowledge/mcp-summarizer.json +21 -0
  72. package/mcp-configs/note-knowledge/mediawiki-mcp.json +21 -0
  73. package/mcp-configs/note-knowledge/openzim-mcp.json +20 -0
  74. package/mcp-configs/note-knowledge/zettelkasten-mcp.json +21 -0
  75. package/mcp-configs/reference-mgr/academic-paper-mcp-http.json +20 -0
  76. package/mcp-configs/reference-mgr/academix.json +20 -0
  77. package/mcp-configs/reference-mgr/arxiv-research-mcp.json +21 -0
  78. package/mcp-configs/reference-mgr/google-scholar-abstract-mcp.json +19 -0
  79. package/mcp-configs/reference-mgr/google-scholar-mcp.json +20 -0
  80. package/mcp-configs/reference-mgr/mcp-paperswithcode.json +21 -0
  81. package/mcp-configs/reference-mgr/mcp-scholarly.json +20 -0
  82. package/mcp-configs/reference-mgr/mcp-simple-arxiv.json +20 -0
  83. package/mcp-configs/reference-mgr/mcp-simple-pubmed.json +20 -0
  84. package/mcp-configs/reference-mgr/mcp-zotero.json +21 -0
  85. package/mcp-configs/reference-mgr/mendeley-mcp.json +20 -0
  86. package/mcp-configs/reference-mgr/ncbi-mcp-server.json +22 -0
  87. package/mcp-configs/reference-mgr/onecite.json +21 -0
  88. package/mcp-configs/reference-mgr/paper-search-mcp.json +21 -0
  89. package/mcp-configs/reference-mgr/pubmed-search-mcp.json +21 -0
  90. package/mcp-configs/reference-mgr/scholar-mcp.json +21 -0
  91. package/mcp-configs/reference-mgr/scholar-multi-mcp.json +21 -0
  92. package/mcp-configs/reference-mgr/seerai.json +21 -0
  93. package/mcp-configs/reference-mgr/semantic-scholar-fastmcp.json +21 -0
  94. package/mcp-configs/reference-mgr/sourcelibrary.json +20 -0
  95. package/mcp-configs/registry.json +178 -149
  96. package/mcp-configs/repository/dataverse-mcp.json +33 -0
  97. package/mcp-configs/repository/huggingface-mcp.json +29 -0
  98. package/openclaw.plugin.json +2 -2
  99. package/package.json +2 -2
  100. package/skills/analysis/dataviz/algorithm-visualizer-guide/SKILL.md +259 -0
  101. package/skills/analysis/dataviz/bokeh-visualization-guide/SKILL.md +270 -0
  102. package/skills/analysis/dataviz/chart-image-generator/SKILL.md +229 -0
  103. package/skills/analysis/dataviz/citation-map-guide/SKILL.md +184 -0
  104. package/skills/analysis/dataviz/d3-visualization-guide/SKILL.md +281 -0
  105. package/skills/analysis/dataviz/data-visualization-principles/SKILL.md +171 -0
  106. package/skills/analysis/dataviz/echarts-visualization-guide/SKILL.md +250 -0
  107. package/skills/analysis/dataviz/metabase-analytics-guide/SKILL.md +242 -0
  108. package/skills/analysis/dataviz/plotly-interactive-guide/SKILL.md +266 -0
  109. package/skills/analysis/dataviz/redash-analytics-guide/SKILL.md +284 -0
  110. package/skills/analysis/econometrics/econml-causal-guide/SKILL.md +163 -0
  111. package/skills/analysis/econometrics/empirical-paper-analysis/SKILL.md +192 -0
  112. package/skills/analysis/econometrics/mostly-harmless-guide/SKILL.md +139 -0
  113. package/skills/analysis/econometrics/panel-data-analyst/SKILL.md +259 -0
  114. package/skills/analysis/econometrics/panel-data-regression-workflow/SKILL.md +267 -0
  115. package/skills/analysis/econometrics/python-causality-guide/SKILL.md +134 -0
  116. package/skills/analysis/econometrics/stata-accounting-guide/SKILL.md +269 -0
  117. package/skills/analysis/econometrics/stata-analyst-guide/SKILL.md +245 -0
  118. package/skills/analysis/econometrics/stata-reference-guide/SKILL.md +293 -0
  119. package/skills/analysis/statistics/data-anomaly-detection/SKILL.md +157 -0
  120. package/skills/analysis/statistics/general-statistics-guide/SKILL.md +226 -0
  121. package/skills/analysis/statistics/infiagent-benchmark-guide/SKILL.md +106 -0
  122. package/skills/analysis/statistics/ml-experiment-tracker/SKILL.md +212 -0
  123. package/skills/analysis/statistics/pywayne-statistics-guide/SKILL.md +192 -0
  124. package/skills/analysis/statistics/quantitative-methods-guide/SKILL.md +193 -0
  125. package/skills/analysis/statistics/senior-data-scientist-guide/SKILL.md +223 -0
  126. package/skills/analysis/wrangling/claude-data-analysis-guide/SKILL.md +100 -0
  127. package/skills/analysis/wrangling/csv-data-analyzer/SKILL.md +170 -0
  128. package/skills/analysis/wrangling/data-cleaning-pipeline/SKILL.md +266 -0
  129. package/skills/analysis/wrangling/data-cog-guide/SKILL.md +178 -0
  130. package/skills/analysis/wrangling/open-data-scientist-guide/SKILL.md +197 -0
  131. package/skills/analysis/wrangling/stata-data-cleaning/SKILL.md +276 -0
  132. package/skills/analysis/wrangling/streamline-analyst-guide/SKILL.md +119 -0
  133. package/skills/analysis/wrangling/survey-data-processing/SKILL.md +298 -0
  134. package/skills/domains/ai-ml/ai-agent-papers-guide/SKILL.md +146 -0
  135. package/skills/domains/ai-ml/ai-model-benchmarking/SKILL.md +209 -0
  136. package/skills/domains/ai-ml/annotated-dl-papers-guide/SKILL.md +159 -0
  137. package/skills/domains/ai-ml/anomaly-detection-papers-guide/SKILL.md +167 -0
  138. package/skills/domains/ai-ml/autonomous-agents-papers-guide/SKILL.md +178 -0
  139. package/skills/domains/ai-ml/dl-transformer-finetune/SKILL.md +239 -0
  140. package/skills/domains/ai-ml/domain-adaptation-papers-guide/SKILL.md +173 -0
  141. package/skills/domains/ai-ml/generative-ai-guide/SKILL.md +146 -0
  142. package/skills/domains/ai-ml/graph-learning-papers-guide/SKILL.md +125 -0
  143. package/skills/domains/ai-ml/huggingface-inference-guide/SKILL.md +196 -0
  144. package/skills/domains/ai-ml/keras-deep-learning/SKILL.md +210 -0
  145. package/skills/domains/ai-ml/kolmogorov-arnold-networks-guide/SKILL.md +185 -0
  146. package/skills/domains/ai-ml/llm-from-scratch-guide/SKILL.md +124 -0
  147. package/skills/domains/ai-ml/ml-pipeline-guide/SKILL.md +295 -0
  148. package/skills/domains/ai-ml/nlp-toolkit-guide/SKILL.md +247 -0
  149. package/skills/domains/ai-ml/npcpy-research-guide/SKILL.md +137 -0
  150. package/skills/domains/ai-ml/pytorch-guide/SKILL.md +281 -0
  151. package/skills/domains/ai-ml/pytorch-lightning-guide/SKILL.md +244 -0
  152. package/skills/domains/ai-ml/responsible-ai-guide/SKILL.md +126 -0
  153. package/skills/domains/ai-ml/tensorflow-guide/SKILL.md +241 -0
  154. package/skills/domains/ai-ml/vmas-simulator-guide/SKILL.md +129 -0
  155. package/skills/domains/biomedical/bioagents-guide/SKILL.md +308 -0
  156. package/skills/domains/biomedical/clawbio-guide/SKILL.md +167 -0
  157. package/skills/domains/biomedical/clinical-dialogue-agents-guide/SKILL.md +145 -0
  158. package/skills/domains/biomedical/ena-sequence-api/SKILL.md +175 -0
  159. package/skills/domains/biomedical/genomas-guide/SKILL.md +126 -0
  160. package/skills/domains/biomedical/genotex-benchmark-guide/SKILL.md +125 -0
  161. package/skills/domains/biomedical/med-researcher-guide/SKILL.md +161 -0
  162. package/skills/domains/biomedical/med-researcher-r1-guide/SKILL.md +146 -0
  163. package/skills/domains/biomedical/medgeclaw-guide/SKILL.md +345 -0
  164. package/skills/domains/biomedical/medical-imaging-guide/SKILL.md +305 -0
  165. package/skills/domains/biomedical/ncbi-blast-api/SKILL.md +195 -0
  166. package/skills/domains/biomedical/ncbi-datasets-api/SKILL.md +220 -0
  167. package/skills/domains/biomedical/quickgo-api/SKILL.md +181 -0
  168. package/skills/domains/business/architecture-design-guide/SKILL.md +279 -0
  169. package/skills/domains/business/innovation-management-guide/SKILL.md +257 -0
  170. package/skills/domains/business/operations-research-guide/SKILL.md +258 -0
  171. package/skills/domains/business/xpert-bi-guide/SKILL.md +84 -0
  172. package/skills/domains/chemistry/cactus-cheminformatics-guide/SKILL.md +89 -0
  173. package/skills/domains/chemistry/chemeagle-guide/SKILL.md +147 -0
  174. package/skills/domains/chemistry/chemgraph-agent-guide/SKILL.md +120 -0
  175. package/skills/domains/chemistry/molecular-dynamics-guide/SKILL.md +237 -0
  176. package/skills/domains/chemistry/pubchem-api-guide/SKILL.md +180 -0
  177. package/skills/domains/chemistry/spectroscopy-analysis-guide/SKILL.md +290 -0
  178. package/skills/domains/cs/ai-security-papers-guide/SKILL.md +103 -0
  179. package/skills/domains/cs/code-llm-papers-guide/SKILL.md +131 -0
  180. package/skills/domains/cs/distributed-systems-guide/SKILL.md +268 -0
  181. package/skills/domains/cs/formal-verification-guide/SKILL.md +298 -0
  182. package/skills/domains/cs/gaussian-splatting-papers-guide/SKILL.md +158 -0
  183. package/skills/domains/cs/llm-aiops-guide/SKILL.md +70 -0
  184. package/skills/domains/cs/software-heritage-api/SKILL.md +200 -0
  185. package/skills/domains/ecology/species-distribution-guide/SKILL.md +343 -0
  186. package/skills/domains/economics/imf-data-api-guide/SKILL.md +174 -0
  187. package/skills/domains/economics/nber-working-papers-api/SKILL.md +177 -0
  188. package/skills/domains/economics/post-labor-economics/SKILL.md +254 -0
  189. package/skills/domains/economics/pricing-psychology-guide/SKILL.md +273 -0
  190. package/skills/domains/economics/repec-economics-api/SKILL.md +188 -0
  191. package/skills/domains/economics/world-bank-data-guide/SKILL.md +179 -0
  192. package/skills/domains/education/academic-study-methods/SKILL.md +228 -0
  193. package/skills/domains/education/assessment-design-guide/SKILL.md +213 -0
  194. package/skills/domains/education/educational-research-methods/SKILL.md +179 -0
  195. package/skills/domains/education/edumcp-guide/SKILL.md +74 -0
  196. package/skills/domains/education/mooc-analytics-guide/SKILL.md +206 -0
  197. package/skills/domains/education/open-syllabus-api/SKILL.md +171 -0
  198. package/skills/domains/finance/akshare-finance-data/SKILL.md +207 -0
  199. package/skills/domains/finance/finsight-research-guide/SKILL.md +113 -0
  200. package/skills/domains/finance/options-analytics-agent-guide/SKILL.md +117 -0
  201. package/skills/domains/finance/portfolio-optimization-guide/SKILL.md +279 -0
  202. package/skills/domains/finance/risk-modeling-guide/SKILL.md +260 -0
  203. package/skills/domains/finance/stata-accounting-research/SKILL.md +372 -0
  204. package/skills/domains/geoscience/climate-modeling-guide/SKILL.md +215 -0
  205. package/skills/domains/geoscience/pangaea-data-api/SKILL.md +197 -0
  206. package/skills/domains/geoscience/satellite-remote-sensing/SKILL.md +193 -0
  207. package/skills/domains/geoscience/seismology-data-guide/SKILL.md +208 -0
  208. package/skills/domains/humanities/digital-humanities-methods/SKILL.md +232 -0
  209. package/skills/domains/humanities/ethical-philosophy-guide/SKILL.md +244 -0
  210. package/skills/domains/humanities/history-research-guide/SKILL.md +260 -0
  211. package/skills/domains/humanities/political-history-guide/SKILL.md +241 -0
  212. package/skills/domains/law/caselaw-access-api/SKILL.md +149 -0
  213. package/skills/domains/law/legal-agent-skills-guide/SKILL.md +132 -0
  214. package/skills/domains/law/legal-nlp-guide/SKILL.md +236 -0
  215. package/skills/domains/law/legal-research-methods/SKILL.md +190 -0
  216. package/skills/domains/law/opencontracts-guide/SKILL.md +168 -0
  217. package/skills/domains/law/patent-analysis-guide/SKILL.md +257 -0
  218. package/skills/domains/law/regulatory-compliance-guide/SKILL.md +267 -0
  219. package/skills/domains/math/lean-theorem-proving-guide/SKILL.md +140 -0
  220. package/skills/domains/math/symbolic-computation-guide/SKILL.md +263 -0
  221. package/skills/domains/math/topology-data-analysis/SKILL.md +305 -0
  222. package/skills/domains/pharma/clinical-trial-design-guide/SKILL.md +271 -0
  223. package/skills/domains/pharma/drug-target-interaction/SKILL.md +242 -0
  224. package/skills/domains/pharma/madd-drug-discovery-guide/SKILL.md +153 -0
  225. package/skills/domains/pharma/pharmacovigilance-guide/SKILL.md +216 -0
  226. package/skills/domains/physics/astrophysics-data-guide/SKILL.md +305 -0
  227. package/skills/domains/physics/particle-physics-guide/SKILL.md +287 -0
  228. package/skills/domains/social-science/ipums-microdata-api/SKILL.md +211 -0
  229. package/skills/domains/social-science/network-analysis-guide/SKILL.md +310 -0
  230. package/skills/domains/social-science/psychology-research-guide/SKILL.md +270 -0
  231. package/skills/domains/social-science/sociology-research-guide/SKILL.md +238 -0
  232. package/skills/domains/social-science/sociology-research-methods/SKILL.md +181 -0
  233. package/skills/literature/discovery/arxiv-paper-monitoring/SKILL.md +233 -0
  234. package/skills/literature/discovery/paper-recommendation-guide/SKILL.md +120 -0
  235. package/skills/literature/discovery/papers-we-love-guide/SKILL.md +169 -0
  236. package/skills/literature/discovery/semantic-paper-radar/SKILL.md +144 -0
  237. package/skills/literature/discovery/zotero-arxiv-daily-guide/SKILL.md +94 -0
  238. package/skills/literature/fulltext/bioc-pmc-api/SKILL.md +146 -0
  239. package/skills/literature/fulltext/core-api-guide/SKILL.md +144 -0
  240. package/skills/literature/fulltext/dataverse-api/SKILL.md +215 -0
  241. package/skills/literature/fulltext/hal-archive-api/SKILL.md +218 -0
  242. package/skills/literature/fulltext/institutional-repository-guide/SKILL.md +212 -0
  243. package/skills/literature/fulltext/open-access-mining-guide/SKILL.md +341 -0
  244. package/skills/literature/fulltext/osf-api/SKILL.md +212 -0
  245. package/skills/literature/fulltext/pmc-ftp-bulk-download/SKILL.md +182 -0
  246. package/skills/literature/fulltext/zotero-ai-butler-guide/SKILL.md +166 -0
  247. package/skills/literature/fulltext/zotero-scihub-guide/SKILL.md +168 -0
  248. package/skills/literature/metadata/academic-paper-summarizer/SKILL.md +101 -0
  249. package/skills/literature/metadata/bibliometrix-guide/SKILL.md +164 -0
  250. package/skills/literature/metadata/crossref-event-data-api/SKILL.md +183 -0
  251. package/skills/literature/metadata/doi-content-negotiation/SKILL.md +202 -0
  252. package/skills/literature/metadata/orkg-api/SKILL.md +153 -0
  253. package/skills/literature/metadata/plumx-metrics-api/SKILL.md +188 -0
  254. package/skills/literature/metadata/ror-organization-api/SKILL.md +208 -0
  255. package/skills/literature/metadata/sophosia-reference-guide/SKILL.md +110 -0
  256. package/skills/literature/metadata/viaf-authority-api/SKILL.md +209 -0
  257. package/skills/literature/metadata/wikidata-api-guide/SKILL.md +156 -0
  258. package/skills/literature/metadata/zoplicate-dedup-guide/SKILL.md +147 -0
  259. package/skills/literature/metadata/zotero-actions-tags-guide/SKILL.md +212 -0
  260. package/skills/literature/metadata/zotmoov-guide/SKILL.md +120 -0
  261. package/skills/literature/metadata/zutilo-guide/SKILL.md +140 -0
  262. package/skills/literature/search/arxiv-batch-reporting/SKILL.md +133 -0
  263. package/skills/literature/search/arxiv-cli-tools/SKILL.md +172 -0
  264. package/skills/literature/search/arxiv-osiris/SKILL.md +199 -0
  265. package/skills/literature/search/arxiv-paper-processor/SKILL.md +141 -0
  266. package/skills/literature/search/baidu-scholar-guide/SKILL.md +110 -0
  267. package/skills/literature/search/base-academic-search/SKILL.md +196 -0
  268. package/skills/literature/search/chatpaper-guide/SKILL.md +122 -0
  269. package/skills/literature/search/citeseerx-api/SKILL.md +183 -0
  270. package/skills/literature/search/deep-literature-search/SKILL.md +149 -0
  271. package/skills/literature/search/deepgit-search-guide/SKILL.md +147 -0
  272. package/skills/literature/search/eric-education-api/SKILL.md +199 -0
  273. package/skills/literature/search/findpapers-guide/SKILL.md +177 -0
  274. package/skills/literature/search/ieee-xplore-api/SKILL.md +177 -0
  275. package/skills/literature/search/lens-scholarly-api/SKILL.md +211 -0
  276. package/skills/literature/search/multi-database-literature-search/SKILL.md +198 -0
  277. package/skills/literature/search/open-library-api/SKILL.md +196 -0
  278. package/skills/literature/search/open-semantic-search-guide/SKILL.md +190 -0
  279. package/skills/literature/search/openaire-api/SKILL.md +141 -0
  280. package/skills/literature/search/paper-search-mcp-guide/SKILL.md +107 -0
  281. package/skills/literature/search/papers-chat-guide/SKILL.md +194 -0
  282. package/skills/literature/search/pasa-paper-search-guide/SKILL.md +138 -0
  283. package/skills/literature/search/plos-open-access-api/SKILL.md +203 -0
  284. package/skills/literature/search/scielo-api/SKILL.md +182 -0
  285. package/skills/literature/search/share-research-api/SKILL.md +129 -0
  286. package/skills/literature/search/worldcat-search-api/SKILL.md +224 -0
  287. package/skills/research/automation/ai-scientist-v2-guide/SKILL.md +284 -0
  288. package/skills/research/automation/aim-experiment-guide/SKILL.md +234 -0
  289. package/skills/research/automation/claude-academic-workflow-guide/SKILL.md +202 -0
  290. package/skills/research/automation/coexist-ai-guide/SKILL.md +149 -0
  291. package/skills/research/automation/datagen-research-guide/SKILL.md +131 -0
  292. package/skills/research/automation/foam-agent-guide/SKILL.md +203 -0
  293. package/skills/research/automation/kedro-pipeline-guide/SKILL.md +216 -0
  294. package/skills/research/automation/mle-agent-guide/SKILL.md +139 -0
  295. package/skills/research/automation/paper-to-agent-guide/SKILL.md +116 -0
  296. package/skills/research/automation/rd-agent-guide/SKILL.md +246 -0
  297. package/skills/research/automation/research-paper-orchestrator/SKILL.md +254 -0
  298. package/skills/research/deep-research/academic-deep-research/SKILL.md +190 -0
  299. package/skills/research/deep-research/auto-deep-research-guide/SKILL.md +141 -0
  300. package/skills/research/deep-research/cognitive-kernel-guide/SKILL.md +200 -0
  301. package/skills/research/deep-research/corvus-research-guide/SKILL.md +132 -0
  302. package/skills/research/deep-research/deep-research-pro/SKILL.md +213 -0
  303. package/skills/research/deep-research/deep-research-work/SKILL.md +204 -0
  304. package/skills/research/deep-research/deep-searcher-guide/SKILL.md +253 -0
  305. package/skills/research/deep-research/gpt-researcher-guide/SKILL.md +191 -0
  306. package/skills/research/deep-research/in-depth-research-guide/SKILL.md +205 -0
  307. package/skills/research/deep-research/khoj-research-guide/SKILL.md +200 -0
  308. package/skills/research/deep-research/kosmos-scientist-guide/SKILL.md +185 -0
  309. package/skills/research/deep-research/llm-scientific-discovery-guide/SKILL.md +178 -0
  310. package/skills/research/deep-research/local-deep-research-guide/SKILL.md +253 -0
  311. package/skills/research/deep-research/open-researcher-guide/SKILL.md +138 -0
  312. package/skills/research/deep-research/tongyi-deep-research-guide/SKILL.md +217 -0
  313. package/skills/research/funding/eu-horizon-guide/SKILL.md +244 -0
  314. package/skills/research/funding/grant-budget-guide/SKILL.md +284 -0
  315. package/skills/research/funding/nih-reporter-api-guide/SKILL.md +166 -0
  316. package/skills/research/funding/nsf-award-api-guide/SKILL.md +133 -0
  317. package/skills/research/methodology/academic-mentor-guide/SKILL.md +169 -0
  318. package/skills/research/methodology/claude-scientific-guide/SKILL.md +122 -0
  319. package/skills/research/methodology/deep-innovator-guide/SKILL.md +242 -0
  320. package/skills/research/methodology/osf-api-guide/SKILL.md +165 -0
  321. package/skills/research/methodology/parsifal-slr-guide/SKILL.md +154 -0
  322. package/skills/research/methodology/research-paper-kb/SKILL.md +263 -0
  323. package/skills/research/methodology/research-pipeline-units-guide/SKILL.md +169 -0
  324. package/skills/research/methodology/research-town-guide/SKILL.md +263 -0
  325. package/skills/research/methodology/slr-automation-guide/SKILL.md +235 -0
  326. package/skills/research/paper-review/automated-review-guide/SKILL.md +281 -0
  327. package/skills/research/paper-review/latte-review-guide/SKILL.md +175 -0
  328. package/skills/research/paper-review/paper-compare-guide/SKILL.md +238 -0
  329. package/skills/research/paper-review/paper-critique-framework/SKILL.md +181 -0
  330. package/skills/research/paper-review/paper-digest-guide/SKILL.md +240 -0
  331. package/skills/research/paper-review/paper-research-assistant/SKILL.md +231 -0
  332. package/skills/research/paper-review/research-quality-filter/SKILL.md +261 -0
  333. package/skills/research/paper-review/review-response-guide/SKILL.md +275 -0
  334. package/skills/tools/code-exec/contextplus-mcp-guide/SKILL.md +110 -0
  335. package/skills/tools/code-exec/google-colab-guide/SKILL.md +276 -0
  336. package/skills/tools/code-exec/kaggle-api-guide/SKILL.md +216 -0
  337. package/skills/tools/code-exec/overleaf-cli-guide/SKILL.md +279 -0
  338. package/skills/tools/diagram/clawphd-guide/SKILL.md +149 -0
  339. package/skills/tools/diagram/code-flow-visualizer/SKILL.md +197 -0
  340. package/skills/tools/diagram/excalidraw-diagram-guide/SKILL.md +170 -0
  341. package/skills/tools/diagram/json-data-visualizer/SKILL.md +270 -0
  342. package/skills/tools/diagram/kroki-diagram-api/SKILL.md +198 -0
  343. package/skills/tools/diagram/mermaid-architect-guide/SKILL.md +219 -0
  344. package/skills/tools/diagram/scientific-graphical-abstract/SKILL.md +201 -0
  345. package/skills/tools/diagram/tldraw-whiteboard-guide/SKILL.md +397 -0
  346. package/skills/tools/document/docsgpt-guide/SKILL.md +130 -0
  347. package/skills/tools/document/large-document-reader/SKILL.md +202 -0
  348. package/skills/tools/document/md2pdf-xelatex/SKILL.md +212 -0
  349. package/skills/tools/document/openpaper-guide/SKILL.md +232 -0
  350. package/skills/tools/document/paper-parse-guide/SKILL.md +243 -0
  351. package/skills/tools/document/weknora-guide/SKILL.md +216 -0
  352. package/skills/tools/document/zotero-addon-market-guide/SKILL.md +108 -0
  353. package/skills/tools/document/zotero-night-theme-guide/SKILL.md +142 -0
  354. package/skills/tools/document/zotero-style-guide/SKILL.md +217 -0
  355. package/skills/tools/knowledge-graph/citation-network-builder/SKILL.md +244 -0
  356. package/skills/tools/knowledge-graph/concept-map-generator/SKILL.md +284 -0
  357. package/skills/tools/knowledge-graph/graphiti-guide/SKILL.md +219 -0
  358. package/skills/tools/knowledge-graph/mimir-memory-guide/SKILL.md +135 -0
  359. package/skills/tools/knowledge-graph/notero-zotero-notion-guide/SKILL.md +187 -0
  360. package/skills/tools/knowledge-graph/open-webui-tools-guide/SKILL.md +156 -0
  361. package/skills/tools/knowledge-graph/openspg-guide/SKILL.md +210 -0
  362. package/skills/tools/knowledge-graph/paperpile-notion-guide/SKILL.md +84 -0
  363. package/skills/tools/knowledge-graph/zotero-markdb-connect-guide/SKILL.md +162 -0
  364. package/skills/tools/ocr-translate/latex-translation-guide/SKILL.md +176 -0
  365. package/skills/tools/ocr-translate/math-equation-renderer/SKILL.md +198 -0
  366. package/skills/tools/ocr-translate/pdf-math-translate-guide/SKILL.md +141 -0
  367. package/skills/tools/ocr-translate/zotero-pdf-translate-guide/SKILL.md +95 -0
  368. package/skills/tools/ocr-translate/zotero-pdf2zh-guide/SKILL.md +143 -0
  369. package/skills/tools/scraping/dataset-finder-guide/SKILL.md +253 -0
  370. package/skills/tools/scraping/easy-spider-guide/SKILL.md +250 -0
  371. package/skills/tools/scraping/google-scholar-scraper/SKILL.md +255 -0
  372. package/skills/tools/scraping/repository-harvesting-guide/SKILL.md +310 -0
  373. package/skills/writing/citation/academic-citation-manager/SKILL.md +314 -0
  374. package/skills/writing/citation/academic-citation-manager-guide/SKILL.md +182 -0
  375. package/skills/writing/citation/citation-assistant-skill/SKILL.md +192 -0
  376. package/skills/writing/citation/jabref-reference-guide/SKILL.md +127 -0
  377. package/skills/writing/citation/jasminum-zotero-guide/SKILL.md +103 -0
  378. package/skills/writing/citation/mendeley-api/SKILL.md +231 -0
  379. package/skills/writing/citation/obsidian-citation-guide/SKILL.md +164 -0
  380. package/skills/writing/citation/obsidian-zotero-guide/SKILL.md +137 -0
  381. package/skills/writing/citation/onecite-reference-guide/SKILL.md +168 -0
  382. package/skills/writing/citation/papersgpt-zotero-guide/SKILL.md +132 -0
  383. package/skills/writing/citation/papis-cli-guide/SKILL.md +213 -0
  384. package/skills/writing/citation/zotero-better-bibtex-guide/SKILL.md +107 -0
  385. package/skills/writing/citation/zotero-better-notes-guide/SKILL.md +121 -0
  386. package/skills/writing/citation/zotero-gpt-guide/SKILL.md +111 -0
  387. package/skills/writing/citation/zotero-mcp-guide/SKILL.md +164 -0
  388. package/skills/writing/citation/zotero-mdnotes-guide/SKILL.md +162 -0
  389. package/skills/writing/citation/zotero-reference-guide/SKILL.md +139 -0
  390. package/skills/writing/citation/zotero-scholar-guide/SKILL.md +294 -0
  391. package/skills/writing/citation/zotfile-attachment-guide/SKILL.md +140 -0
  392. package/skills/writing/composition/ml-paper-writing/SKILL.md +163 -0
  393. package/skills/writing/composition/opendraft-thesis-guide/SKILL.md +200 -0
  394. package/skills/writing/composition/paper-debugger-guide/SKILL.md +143 -0
  395. package/skills/writing/composition/paperforge-guide/SKILL.md +205 -0
  396. package/skills/writing/composition/research-paper-writer/SKILL.md +226 -0
  397. package/skills/writing/composition/scientific-writing-resources/SKILL.md +151 -0
  398. package/skills/writing/composition/scientific-writing-wrapper/SKILL.md +153 -0
  399. package/skills/writing/latex/academic-writing-latex/SKILL.md +285 -0
  400. package/skills/writing/latex/latex-drawing-collection/SKILL.md +154 -0
  401. package/skills/writing/latex/latex-templates-collection/SKILL.md +159 -0
  402. package/skills/writing/latex/md-to-pdf-academic/SKILL.md +230 -0
  403. package/skills/writing/latex/tex-render-guide/SKILL.md +243 -0
  404. package/skills/writing/polish/academic-tone-guide/SKILL.md +209 -0
  405. package/skills/writing/polish/chinese-text-humanizer/SKILL.md +140 -0
  406. package/skills/writing/polish/conciseness-editing-guide/SKILL.md +225 -0
  407. package/skills/writing/polish/paper-polish-guide/SKILL.md +160 -0
  408. package/skills/writing/templates/arxiv-preprint-template/SKILL.md +184 -0
  409. package/skills/writing/templates/elegant-paper-template/SKILL.md +141 -0
  410. package/skills/writing/templates/graphical-abstract-guide/SKILL.md +183 -0
  411. package/skills/writing/templates/novathesis-guide/SKILL.md +152 -0
  412. package/skills/writing/templates/scientific-article-pdf/SKILL.md +261 -0
  413. package/skills/writing/templates/sjtuthesis-guide/SKILL.md +197 -0
  414. package/skills/writing/templates/thuthesis-guide/SKILL.md +181 -0
  415. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +0 -207
@@ -0,0 +1,266 @@
1
+ ---
2
+ name: data-cleaning-pipeline
3
+ description: "Systematic data cleaning workflows for research datasets"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "broom"
7
+ category: "analysis"
8
+ subcategory: "wrangling"
9
+ keywords: ["data cleaning", "data quality", "missing values", "outlier detection", "data validation", "preprocessing"]
10
+ source: "wentor-research-plugins"
11
+ ---
12
+
13
+ # Data Cleaning Pipeline
14
+
15
+ A skill for building systematic, reproducible data cleaning pipelines for research datasets. Covers common data quality issues, step-by-step cleaning workflows, handling missing values, detecting and treating outliers, validating data integrity, and documenting cleaning decisions for reproducibility.
16
+
17
+ ## The Data Cleaning Workflow
18
+
19
+ ### Pipeline Overview
20
+
21
+ Data cleaning should follow a consistent, documented order. Each step builds on the previous one, and the entire pipeline should be scripted for reproducibility.
22
+
23
+ ```
24
+ Data Cleaning Pipeline (recommended order):
25
+
26
+ 1. Initial Assessment
27
+ - Load data, check dimensions, inspect dtypes
28
+ - Generate summary statistics and missing value report
29
+ - Identify structural issues (merged cells, inconsistent delimiters)
30
+
31
+ 2. Structural Fixes
32
+ - Standardize column names (snake_case, no spaces)
33
+ - Fix data types (strings to numbers, dates, categories)
34
+ - Split or merge columns as needed
35
+ - Remove completely empty rows/columns
36
+
37
+ 3. Deduplication
38
+ - Identify exact duplicates
39
+ - Identify near-duplicates (fuzzy matching)
40
+ - Decide keep-first, keep-last, or merge strategy
41
+
42
+ 4. Missing Value Treatment
43
+ - Classify missingness mechanism (MCAR, MAR, MNAR)
44
+ - Apply appropriate imputation or exclusion strategy
45
+ - Document and justify missing data decisions
46
+
47
+ 5. Outlier Detection and Treatment
48
+ - Statistical methods (IQR, z-score, Mahalanobis)
49
+ - Domain-based validation (impossible values)
50
+ - Decide: correct, cap, remove, or keep with flag
51
+
52
+ 6. Consistency Checks
53
+ - Cross-field validation (age vs birth date)
54
+ - Range validation (0-100 for percentages)
55
+ - Referential integrity (foreign keys exist)
56
+
57
+ 7. Documentation and Export
58
+ - Log all changes with before/after counts
59
+ - Export cleaned dataset with version number
60
+ - Save cleaning script for reproducibility
61
+ ```
62
+
63
+ ## Initial Data Assessment
64
+
65
+ ### Automated Quality Report
66
+
67
+ ```python
68
+ import pandas as pd
69
+ import numpy as np
70
+
71
+ def generate_quality_report(df):
72
+ """
73
+ Generate a comprehensive data quality report.
74
+ Run this BEFORE any cleaning to establish a baseline.
75
+ """
76
+ report = {
77
+ "dimensions": f"{df.shape[0]} rows x {df.shape[1]} columns",
78
+ "memory_usage": f"{df.memory_usage(deep=True).sum() / 1e6:.1f} MB",
79
+ "duplicate_rows": df.duplicated().sum(),
80
+ }
81
+
82
+ col_report = []
83
+ for col in df.columns:
84
+ info = {
85
+ "column": col,
86
+ "dtype": str(df[col].dtype),
87
+ "missing_count": df[col].isna().sum(),
88
+ "missing_pct": f"{df[col].isna().mean() * 100:.1f}%",
89
+ "unique_values": df[col].nunique(),
90
+ "sample_values": str(df[col].dropna().head(3).tolist()),
91
+ }
92
+
93
+ if pd.api.types.is_numeric_dtype(df[col]):
94
+ info["min"] = df[col].min()
95
+ info["max"] = df[col].max()
96
+ info["mean"] = df[col].mean()
97
+ info["std"] = df[col].std()
98
+
99
+ col_report.append(info)
100
+
101
+ report["columns"] = col_report
102
+ return report
103
+ ```
104
+
105
+ ## Missing Value Treatment
106
+
107
+ ### Classifying Missingness
108
+
109
+ ```
110
+ Missing data mechanisms (Rubin's classification):
111
+
112
+ MCAR (Missing Completely At Random):
113
+ - Missingness is unrelated to any variable
114
+ - Example: Lab samples randomly lost during transport
115
+ - Test: Little's MCAR test, compare distributions
116
+ - Safe to: Listwise delete if < 5% missing
117
+
118
+ MAR (Missing At Random):
119
+ - Missingness depends on observed variables but not the missing value
120
+ - Example: Younger participants skip income questions more often
121
+ - Test: Compare missingness patterns across groups
122
+ - Best approach: Multiple imputation, regression imputation
123
+
124
+ MNAR (Missing Not At Random):
125
+ - Missingness depends on the unobserved value itself
126
+ - Example: High-income people refuse to report income
127
+ - Cannot be tested directly from the data
128
+ - Requires: Sensitivity analysis, selection models, domain expertise
129
+ ```
130
+
131
+ ### Imputation Strategies
132
+
133
+ ```python
134
+ from sklearn.impute import SimpleImputer, KNNImputer
135
+
136
+ def impute_missing_values(df, numeric_strategy="median",
137
+ categorical_strategy="mode"):
138
+ """
139
+ Apply appropriate imputation strategies by column type.
140
+
141
+ For research data, prefer:
142
+ - Median for skewed numeric data
143
+ - Mean for normally distributed numeric data
144
+ - Mode for categorical data
145
+ - KNN for multivariate patterns
146
+ - Multiple imputation for inference (use statsmodels or mice)
147
+ """
148
+ numeric_cols = df.select_dtypes(include=[np.number]).columns
149
+ categorical_cols = df.select_dtypes(include=["object", "category"]).columns
150
+
151
+ # Numeric imputation
152
+ if len(numeric_cols) > 0:
153
+ if numeric_strategy == "knn":
154
+ imputer = KNNImputer(n_neighbors=5)
155
+ df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
156
+ else:
157
+ imputer = SimpleImputer(strategy=numeric_strategy)
158
+ df[numeric_cols] = imputer.fit_transform(df[numeric_cols])
159
+
160
+ # Categorical imputation
161
+ if len(categorical_cols) > 0:
162
+ imputer = SimpleImputer(strategy="most_frequent")
163
+ df[categorical_cols] = imputer.fit_transform(df[categorical_cols])
164
+
165
+ return df
166
+ ```
167
+
168
+ ## Outlier Detection
169
+
170
+ ### Statistical Methods
171
+
172
+ ```python
173
+ def detect_outliers_iqr(series, multiplier=1.5):
174
+ """
175
+ Detect outliers using the IQR method.
176
+ Standard multiplier is 1.5 (outlier) or 3.0 (extreme outlier).
177
+ """
178
+ q1 = series.quantile(0.25)
179
+ q3 = series.quantile(0.75)
180
+ iqr = q3 - q1
181
+ lower = q1 - multiplier * iqr
182
+ upper = q3 + multiplier * iqr
183
+
184
+ outliers = (series < lower) | (series > upper)
185
+ return outliers, lower, upper
186
+
187
+
188
+ def detect_outliers_zscore(series, threshold=3.0):
189
+ """
190
+ Detect outliers using z-score method.
191
+ Threshold of 3.0 corresponds to 99.7% of normal distribution.
192
+ Use modified z-score (MAD-based) for skewed distributions.
193
+ """
194
+ from scipy import stats
195
+ z_scores = np.abs(stats.zscore(series.dropna()))
196
+ outliers = z_scores > threshold
197
+ return outliers
198
+ ```
199
+
200
+ ### Domain-Based Validation
201
+
202
+ ```
203
+ Common domain validations:
204
+
205
+ Age: 0-120 (flag > 100)
206
+ Height (cm): 50-250
207
+ Weight (kg): 1-300
208
+ Blood pressure systolic: 60-250
209
+ Blood pressure diastolic: 30-150
210
+ Temperature (C): 30-45 for body temperature
211
+ Likert scale (1-5): only integer values 1-5
212
+ Percentage: 0-100
213
+ Latitude: -90 to 90
214
+ Longitude: -180 to 180
215
+ Year of birth: 1900-current_year
216
+ Email: matches standard regex pattern
217
+ ```
218
+
219
+ ## Reproducibility and Documentation
220
+
221
+ ### Cleaning Log
222
+
223
+ ```python
224
+ class CleaningLog:
225
+ """
226
+ Log all cleaning operations for reproducibility.
227
+ Every step should be documented with before/after counts.
228
+ """
229
+
230
+ def __init__(self):
231
+ self.entries = []
232
+ self.version = 0
233
+
234
+ def log_step(self, step_name, description,
235
+ rows_before, rows_after, cols_affected):
236
+ self.version += 1
237
+ self.entries.append({
238
+ "version": self.version,
239
+ "step": step_name,
240
+ "description": description,
241
+ "rows_before": rows_before,
242
+ "rows_after": rows_after,
243
+ "rows_removed": rows_before - rows_after,
244
+ "columns_affected": cols_affected,
245
+ })
246
+
247
+ def save_report(self, path):
248
+ report_df = pd.DataFrame(self.entries)
249
+ report_df.to_csv(path, index=False)
250
+ ```
251
+
252
+ ### Best Practices for Research Data
253
+
254
+ ```
255
+ Reproducibility rules:
256
+ 1. Never modify the raw data file -- always save cleaned versions
257
+ 2. Use version numbers (data_v1_raw, data_v2_cleaned, data_v3_final)
258
+ 3. Script every step -- no manual edits in Excel
259
+ 4. Document every decision (why delete, why impute, why cap)
260
+ 5. Include the cleaning script in supplementary materials
261
+ 6. Record software versions (pandas, numpy, R packages)
262
+ 7. Set random seeds for any stochastic imputation
263
+ 8. Save intermediate datasets at major checkpoints
264
+ ```
265
+
266
+ A well-documented data cleaning pipeline not only improves the quality of research findings but also strengthens the credibility of the work during peer review. Reviewers increasingly expect transparent data handling practices, and journals like PLOS ONE and Nature require data availability statements that implicitly demand reproducible preprocessing.
@@ -0,0 +1,178 @@
1
+ ---
2
+ name: data-cog-guide
3
+ description: "Upload messy CSVs with minimal prompting for deep automated analysis"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "🧠"
7
+ category: "analysis"
8
+ subcategory: "wrangling"
9
+ keywords: ["automated analysis", "data wrangling", "CSV upload", "data profiling", "smart analysis", "minimal prompting"]
10
+ source: "https://github.com/AcademicSkills/data-cog-guide"
11
+ ---
12
+
13
+ # Data Cog Guide
14
+
15
+ An intelligent data analysis assistant that accepts messy, poorly documented CSV files and automatically infers structure, cleans anomalies, and produces deep analytical reports with minimal user prompting. Designed for researchers who need quick insights from unfamiliar or inherited datasets without spending hours on manual data preparation.
16
+
17
+ ## Overview
18
+
19
+ Researchers frequently receive datasets from collaborators, public repositories, or legacy systems that lack documentation, use inconsistent formatting, and contain mixed data quality. Traditional analysis requires significant upfront effort to understand and prepare such data. Data Cog automates this process by applying heuristic inference, pattern recognition, and iterative cleaning to produce analysis-ready data along with a comprehensive profile report.
20
+
21
+ The skill implements a "zero-configuration" philosophy: provide the CSV file path and an optional research question, and it handles encoding detection, delimiter inference, type casting, missingness assessment, and initial exploratory statistics automatically.
22
+
23
+ ## Automated Ingestion Pipeline
24
+
25
+ ### Smart Loading
26
+
27
+ ```python
28
+ import pandas as pd
29
+ import chardet
30
+ import io
31
+
32
+ def smart_load_csv(filepath: str) -> tuple:
33
+ """
34
+ Intelligently load a CSV file, auto-detecting encoding,
35
+ delimiter, header row, and comment lines.
36
+ """
37
+ # Step 1: Detect encoding
38
+ with open(filepath, 'rb') as f:
39
+ raw = f.read(100000)
40
+ encoding = chardet.detect(raw)['encoding']
41
+
42
+ # Step 2: Detect delimiter
43
+ import csv
44
+ with open(filepath, 'r', encoding=encoding, errors='replace') as f:
45
+ sample = f.read(8192)
46
+ sniffer = csv.Sniffer()
47
+ try:
48
+ dialect = sniffer.sniff(sample)
49
+ delimiter = dialect.delimiter
50
+ except csv.Error:
51
+ delimiter = ','
52
+
53
+ # Step 3: Detect header row (skip comment lines)
54
+ skip_rows = 0
55
+ with open(filepath, 'r', encoding=encoding, errors='replace') as f:
56
+ for line in f:
57
+ if line.startswith('#') or line.startswith('//') or line.strip() == '':
58
+ skip_rows += 1
59
+ else:
60
+ break
61
+
62
+ # Step 4: Load with inferred parameters
63
+ df = pd.read_csv(
64
+ filepath, encoding=encoding, delimiter=delimiter,
65
+ skiprows=skip_rows, low_memory=False
66
+ )
67
+
68
+ metadata = {
69
+ 'encoding': encoding,
70
+ 'delimiter': repr(delimiter),
71
+ 'skipped_rows': skip_rows,
72
+ 'shape': df.shape
73
+ }
74
+ return df, metadata
75
+ ```
76
+
77
+ ### Automatic Type Inference
78
+
79
+ ```python
80
+ def auto_cast_columns(df: pd.DataFrame) -> pd.DataFrame:
81
+ """
82
+ Automatically cast columns to their most appropriate types.
83
+ Handles dates, numerics stored as strings, booleans, and categories.
84
+ """
85
+ for col in df.columns:
86
+ # Try numeric conversion
87
+ numeric = pd.to_numeric(df[col], errors='coerce')
88
+ if numeric.notna().mean() > 0.85:
89
+ df[col] = numeric
90
+ continue
91
+
92
+ # Try datetime conversion
93
+ datetime = pd.to_datetime(df[col], errors='coerce', infer_datetime_format=True)
94
+ if datetime.notna().mean() > 0.85:
95
+ df[col] = datetime
96
+ continue
97
+
98
+ # Try boolean detection
99
+ unique_lower = df[col].dropna().astype(str).str.lower().unique()
100
+ if set(unique_lower).issubset({'true', 'false', 'yes', 'no', '1', '0', 'y', 'n'}):
101
+ df[col] = df[col].astype(str).str.lower().map(
102
+ {'true': True, 'false': False, 'yes': True, 'no': False,
103
+ '1': True, '0': False, 'y': True, 'n': False}
104
+ )
105
+ continue
106
+
107
+ # Convert low-cardinality strings to category
108
+ if df[col].nunique() / len(df) < 0.05 and df[col].nunique() < 50:
109
+ df[col] = df[col].astype('category')
110
+
111
+ return df
112
+ ```
113
+
114
+ ## Deep Automated Profiling
115
+
116
+ ### Profile Report Generation
117
+
118
+ The profiling stage produces a structured report covering:
119
+
120
+ 1. **Schema overview**: Column names, inferred types, semantic roles (ID, feature, target, timestamp).
121
+ 2. **Univariate statistics**: Mean, median, mode, std, skewness, kurtosis for numeric columns; frequency tables for categoricals.
122
+ 3. **Missing data matrix**: Heatmap-style report of missingness patterns across all columns.
123
+ 4. **Correlation analysis**: Pairwise Pearson, Spearman, and Cramér's V correlations.
124
+ 5. **Distribution flags**: Columns that are heavily skewed, zero-inflated, or constant.
125
+ 6. **Duplicate detection**: Exact row duplicates and near-duplicate clusters.
126
+
127
+ | Metric | Numeric Columns | Categorical Columns |
128
+ |--------|----------------|-------------------|
129
+ | Central tendency | Mean, median, mode | Mode, frequency |
130
+ | Dispersion | Std, IQR, range, CV | Unique count, entropy |
131
+ | Shape | Skewness, kurtosis | Imbalance ratio |
132
+ | Quality | Missing %, zero %, outlier % | Missing %, rare labels % |
133
+
134
+ ## Interactive Analysis Workflow
135
+
136
+ ### Minimal-Prompt Usage Pattern
137
+
138
+ The recommended workflow requires only three inputs:
139
+
140
+ 1. **File path**: The CSV to analyze.
141
+ 2. **Research question** (optional): A one-sentence description of what you want to learn.
142
+ 3. **Output format**: "summary", "full_report", or "cleaned_csv".
143
+
144
+ ```
145
+ User: Analyze /data/survey_results_2025.csv
146
+ Question: What factors predict participant satisfaction?
147
+ Output: full_report
148
+
149
+ Data Cog will:
150
+ 1. Load and profile the dataset (auto-detect everything)
151
+ 2. Clean and transform (handle missing data, encode categoricals)
152
+ 3. Run correlation analysis focused on satisfaction-related columns
153
+ 4. Generate regression models predicting satisfaction
154
+ 5. Produce a structured report with findings and visualizations
155
+ ```
156
+
157
+ ### Iterative Refinement
158
+
159
+ After the initial automated analysis, you can refine by asking targeted follow-up questions:
160
+
161
+ - "Focus only on respondents from Group A"
162
+ - "Exclude the first 50 rows (pilot data)"
163
+ - "Treat column X as ordinal with levels: low < medium < high"
164
+ - "Run the same analysis but with log-transformed income"
165
+
166
+ ## Best Practices
167
+
168
+ - Always review the auto-generated profile before trusting downstream results.
169
+ - Verify that automatic type inference made sensible choices, especially for ambiguous columns.
170
+ - Provide a research question when possible to guide feature selection and analysis focus.
171
+ - Save the cleaning audit log alongside your results for reproducibility.
172
+ - For datasets over 1 million rows, consider sampling for the initial profile to save time.
173
+
174
+ ## References
175
+
176
+ - Breck, E., et al. (2019). Data Validation for Machine Learning. *MLSys 2019*.
177
+ - Hynes, N., et al. (2017). The Data Linter: Lightweight, Automated Sanity Checking for ML Data Sets. *NIPS MLSys Workshop*.
178
+ - Pandas Development Team (2024). *pandas: Powerful Python Data Analysis Toolkit*. https://pandas.pydata.org/
@@ -0,0 +1,197 @@
1
+ ---
2
+ name: open-data-scientist-guide
3
+ description: "AI agent that performs end-to-end data science workflows"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "📊"
7
+ category: "analysis"
8
+ subcategory: "wrangling"
9
+ keywords: ["data science", "automated analysis", "EDA", "feature engineering", "data wrangling", "AI agent"]
10
+ source: "https://github.com/Open-Data-Scientist/open-data-scientist"
11
+ ---
12
+
13
+ # Open Data Scientist Guide
14
+
15
+ ## Overview
16
+
17
+ Open Data Scientist is an AI agent that automates end-to-end data science workflows — from data loading and cleaning through exploratory analysis, feature engineering, modeling, and report generation. It interprets natural language task descriptions, generates and executes Python code, iteratively refines analyses based on results, and produces publication-ready outputs. Designed for researchers who need quick, thorough data analyses without deep programming expertise.
18
+
19
+ ## Workflow Pipeline
20
+
21
+ ```
22
+ Dataset + Task Description
23
+
24
+ Data Profiling (types, distributions, missing values)
25
+
26
+ Cleaning & Preprocessing (imputation, encoding, scaling)
27
+
28
+ Exploratory Data Analysis (correlations, distributions, outliers)
29
+
30
+ Feature Engineering (transforms, interactions, selection)
31
+
32
+ Modeling (train, evaluate, compare)
33
+
34
+ Report Generation (figures, tables, interpretation)
35
+ ```
36
+
37
+ ## Usage
38
+
39
+ ```python
40
+ from open_data_scientist import DataScientist
41
+
42
+ ds = DataScientist(llm_provider="anthropic")
43
+
44
+ # Natural language task
45
+ result = ds.analyze(
46
+ data="experiment_results.csv",
47
+ task="Identify which experimental conditions significantly affect "
48
+ "the response variable. Build a predictive model and report "
49
+ "the most important features.",
50
+ )
51
+
52
+ # Outputs
53
+ print(result.summary) # Text summary of findings
54
+ result.save_report("report.html") # Full HTML report
55
+ result.save_figures("figures/") # All generated plots
56
+ ```
57
+
58
+ ## Data Profiling
59
+
60
+ ```python
61
+ # Automatic data profiling before analysis
62
+ profile = ds.profile("dataset.csv")
63
+
64
+ print(f"Rows: {profile.n_rows}, Columns: {profile.n_cols}")
65
+ print(f"Missing values: {profile.missing_summary}")
66
+ print(f"Data types: {profile.dtype_summary}")
67
+ print(f"Potential issues: {profile.warnings}")
68
+
69
+ # Column-level details
70
+ for col in profile.columns:
71
+ print(f"\n{col.name} ({col.dtype}):")
72
+ print(f" Unique: {col.n_unique}")
73
+ print(f" Missing: {col.n_missing} ({col.pct_missing:.1f}%)")
74
+ if col.is_numeric:
75
+ print(f" Range: [{col.min}, {col.max}]")
76
+ print(f" Mean: {col.mean:.3f}, Std: {col.std:.3f}")
77
+ ```
78
+
79
+ ## Exploratory Data Analysis
80
+
81
+ ```python
82
+ # Guided EDA
83
+ eda_result = ds.explore(
84
+ data="dataset.csv",
85
+ focus="relationships", # or "distributions", "outliers", "time_trends"
86
+ target_column="outcome",
87
+ )
88
+
89
+ # Generated analyses include:
90
+ # - Correlation heatmap
91
+ # - Pairwise scatter plots for top correlations
92
+ # - Distribution plots per group
93
+ # - Statistical tests (t-test, ANOVA, chi-square)
94
+ # - Outlier detection (IQR, Z-score)
95
+
96
+ for finding in eda_result.findings:
97
+ print(f"- {finding.description} (p={finding.p_value:.4f})")
98
+ ```
99
+
100
+ ## Feature Engineering
101
+
102
+ ```python
103
+ # Automatic feature engineering
104
+ features = ds.engineer_features(
105
+ data="dataset.csv",
106
+ target="outcome",
107
+ strategies=[
108
+ "polynomial_interactions", # x1*x2, x1^2
109
+ "datetime_extraction", # year, month, day_of_week
110
+ "text_embeddings", # TF-IDF or sentence embeddings
111
+ "binning", # numeric to categorical
112
+ "target_encoding", # category to target mean
113
+ ],
114
+ selection_method="mutual_information",
115
+ max_features=50,
116
+ )
117
+
118
+ print(f"Original features: {features.n_original}")
119
+ print(f"Generated features: {features.n_generated}")
120
+ print(f"Selected features: {features.n_selected}")
121
+ ```
122
+
123
+ ## Modeling Pipeline
124
+
125
+ ```python
126
+ result = ds.model(
127
+ data="dataset.csv",
128
+ target="outcome",
129
+ task_type="classification", # or "regression"
130
+ models=["logistic_regression", "random_forest",
131
+ "gradient_boosting", "neural_network"],
132
+ cv_folds=5,
133
+ metric="f1_macro",
134
+ )
135
+
136
+ # Model comparison table
137
+ print(result.comparison_table)
138
+ # | Model | F1 Macro | Accuracy | AUC |
139
+ # |--------------------|----------|----------|-------|
140
+ # | Gradient Boosting | 0.847 | 0.862 | 0.921 |
141
+ # | Random Forest | 0.831 | 0.849 | 0.908 |
142
+ # | ... | | | |
143
+
144
+ # Best model details
145
+ best = result.best_model
146
+ print(f"Best: {best.name}")
147
+ print(f"Feature importance:\n{best.feature_importance.head(10)}")
148
+ ```
149
+
150
+ ## Report Generation
151
+
152
+ ```python
153
+ # Generate publication-ready report
154
+ result = ds.analyze(
155
+ data="experiment_results.csv",
156
+ task="Full analysis with statistical tests",
157
+ report_config={
158
+ "format": "html", # html, pdf, markdown
159
+ "style": "academic", # academic, business, minimal
160
+ "include_code": True, # Show generated code
161
+ "figure_dpi": 300, # Publication quality
162
+ },
163
+ )
164
+
165
+ result.save_report("analysis_report.html")
166
+ ```
167
+
168
+ ## Configuration
169
+
170
+ ```python
171
+ ds = DataScientist(
172
+ llm_provider="anthropic",
173
+ model="claude-sonnet-4-20250514",
174
+ execution_config={
175
+ "timeout": 300, # Max seconds per code block
176
+ "max_iterations": 10, # Refinement iterations
177
+ "sandbox": True, # Isolated execution
178
+ },
179
+ analysis_config={
180
+ "significance_level": 0.05,
181
+ "random_state": 42,
182
+ "test_size": 0.2,
183
+ },
184
+ )
185
+ ```
186
+
187
+ ## Use Cases
188
+
189
+ 1. **Experiment analysis**: Analyze lab or survey data with statistical tests
190
+ 2. **Dataset exploration**: Quick EDA on unfamiliar datasets
191
+ 3. **Baseline modeling**: Rapid prototyping of predictive models
192
+ 4. **Report generation**: Automated analysis reports for publications
193
+
194
+ ## References
195
+
196
+ - [Open Data Scientist GitHub](https://github.com/Open-Data-Scientist/open-data-scientist)
197
+ - [Pandas Profiling](https://github.com/ydataai/ydata-profiling)