@wentorai/research-plugins 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. package/README.md +22 -22
  2. package/curated/analysis/README.md +82 -56
  3. package/curated/domains/README.md +225 -69
  4. package/curated/literature/README.md +115 -46
  5. package/curated/research/README.md +106 -58
  6. package/curated/tools/README.md +107 -87
  7. package/curated/writing/README.md +92 -45
  8. package/mcp-configs/academic-db/alphafold-mcp.json +20 -0
  9. package/mcp-configs/academic-db/brightspace-mcp.json +21 -0
  10. package/mcp-configs/academic-db/climatiq-mcp.json +20 -0
  11. package/mcp-configs/academic-db/gibs-mcp.json +20 -0
  12. package/mcp-configs/academic-db/gis-mcp-server.json +22 -0
  13. package/mcp-configs/academic-db/google-earth-engine-mcp.json +21 -0
  14. package/mcp-configs/academic-db/m4-clinical-mcp.json +21 -0
  15. package/mcp-configs/academic-db/medical-mcp.json +21 -0
  16. package/mcp-configs/academic-db/nexonco-mcp.json +20 -0
  17. package/mcp-configs/academic-db/omop-mcp.json +20 -0
  18. package/mcp-configs/academic-db/onekgpd-mcp.json +20 -0
  19. package/mcp-configs/academic-db/openedu-mcp.json +20 -0
  20. package/mcp-configs/academic-db/opengenes-mcp.json +20 -0
  21. package/mcp-configs/academic-db/openstax-mcp.json +21 -0
  22. package/mcp-configs/academic-db/openstreetmap-mcp.json +21 -0
  23. package/mcp-configs/academic-db/opentargets-mcp.json +21 -0
  24. package/mcp-configs/academic-db/pdb-mcp.json +21 -0
  25. package/mcp-configs/academic-db/smithsonian-mcp.json +20 -0
  26. package/mcp-configs/ai-platform/magi-researchers.json +21 -0
  27. package/mcp-configs/ai-platform/mcp-academic-researcher.json +22 -0
  28. package/mcp-configs/ai-platform/open-paper-machine.json +21 -0
  29. package/mcp-configs/ai-platform/paper-intelligence.json +21 -0
  30. package/mcp-configs/ai-platform/paper-reader.json +21 -0
  31. package/mcp-configs/ai-platform/paperdebugger.json +21 -0
  32. package/mcp-configs/browser/exa-mcp.json +20 -0
  33. package/mcp-configs/browser/mcp-searxng.json +21 -0
  34. package/mcp-configs/browser/mcp-webresearch.json +20 -0
  35. package/mcp-configs/cloud-docs/confluence-mcp.json +37 -0
  36. package/mcp-configs/cloud-docs/google-drive-mcp.json +35 -0
  37. package/mcp-configs/cloud-docs/notion-mcp.json +29 -0
  38. package/mcp-configs/communication/discord-mcp.json +29 -0
  39. package/mcp-configs/communication/discourse-mcp.json +21 -0
  40. package/mcp-configs/communication/slack-mcp.json +29 -0
  41. package/mcp-configs/communication/telegram-mcp.json +28 -0
  42. package/mcp-configs/data-platform/automl-stat-mcp.json +21 -0
  43. package/mcp-configs/data-platform/jefferson-stats-mcp.json +22 -0
  44. package/mcp-configs/data-platform/mcp-excel-server.json +21 -0
  45. package/mcp-configs/data-platform/mcp-stata.json +21 -0
  46. package/mcp-configs/data-platform/mcpstack-jupyter.json +21 -0
  47. package/mcp-configs/data-platform/ml-mcp.json +21 -0
  48. package/mcp-configs/data-platform/nasdaq-data-link-mcp.json +20 -0
  49. package/mcp-configs/data-platform/numpy-mcp.json +21 -0
  50. package/mcp-configs/database/neo4j-mcp.json +37 -0
  51. package/mcp-configs/database/postgres-mcp.json +28 -0
  52. package/mcp-configs/database/sqlite-mcp.json +29 -0
  53. package/mcp-configs/dev-platform/geogebra-mcp.json +21 -0
  54. package/mcp-configs/dev-platform/github-mcp.json +31 -0
  55. package/mcp-configs/dev-platform/gitlab-mcp.json +34 -0
  56. package/mcp-configs/dev-platform/latex-mcp-server.json +21 -0
  57. package/mcp-configs/dev-platform/manim-mcp.json +20 -0
  58. package/mcp-configs/dev-platform/mcp-echarts.json +20 -0
  59. package/mcp-configs/dev-platform/panel-viz-mcp.json +20 -0
  60. package/mcp-configs/dev-platform/paperbanana.json +20 -0
  61. package/mcp-configs/dev-platform/texflow-mcp.json +20 -0
  62. package/mcp-configs/dev-platform/texmcp.json +20 -0
  63. package/mcp-configs/dev-platform/typst-mcp.json +21 -0
  64. package/mcp-configs/dev-platform/vizro-mcp.json +20 -0
  65. package/mcp-configs/email/email-mcp.json +40 -0
  66. package/mcp-configs/email/gmail-mcp.json +37 -0
  67. package/mcp-configs/note-knowledge/local-faiss-mcp.json +21 -0
  68. package/mcp-configs/note-knowledge/mcp-memory-service.json +21 -0
  69. package/mcp-configs/note-knowledge/mcp-obsidian.json +23 -0
  70. package/mcp-configs/note-knowledge/mcp-ragdocs.json +20 -0
  71. package/mcp-configs/note-knowledge/mcp-summarizer.json +21 -0
  72. package/mcp-configs/note-knowledge/mediawiki-mcp.json +21 -0
  73. package/mcp-configs/note-knowledge/openzim-mcp.json +20 -0
  74. package/mcp-configs/note-knowledge/zettelkasten-mcp.json +21 -0
  75. package/mcp-configs/reference-mgr/academic-paper-mcp-http.json +20 -0
  76. package/mcp-configs/reference-mgr/academix.json +20 -0
  77. package/mcp-configs/reference-mgr/arxiv-research-mcp.json +21 -0
  78. package/mcp-configs/reference-mgr/google-scholar-abstract-mcp.json +19 -0
  79. package/mcp-configs/reference-mgr/google-scholar-mcp.json +20 -0
  80. package/mcp-configs/reference-mgr/mcp-paperswithcode.json +21 -0
  81. package/mcp-configs/reference-mgr/mcp-scholarly.json +20 -0
  82. package/mcp-configs/reference-mgr/mcp-simple-arxiv.json +20 -0
  83. package/mcp-configs/reference-mgr/mcp-simple-pubmed.json +20 -0
  84. package/mcp-configs/reference-mgr/mcp-zotero.json +21 -0
  85. package/mcp-configs/reference-mgr/mendeley-mcp.json +20 -0
  86. package/mcp-configs/reference-mgr/ncbi-mcp-server.json +22 -0
  87. package/mcp-configs/reference-mgr/onecite.json +21 -0
  88. package/mcp-configs/reference-mgr/paper-search-mcp.json +21 -0
  89. package/mcp-configs/reference-mgr/pubmed-search-mcp.json +21 -0
  90. package/mcp-configs/reference-mgr/scholar-mcp.json +21 -0
  91. package/mcp-configs/reference-mgr/scholar-multi-mcp.json +21 -0
  92. package/mcp-configs/reference-mgr/seerai.json +21 -0
  93. package/mcp-configs/reference-mgr/semantic-scholar-fastmcp.json +21 -0
  94. package/mcp-configs/reference-mgr/sourcelibrary.json +20 -0
  95. package/mcp-configs/registry.json +178 -149
  96. package/mcp-configs/repository/dataverse-mcp.json +33 -0
  97. package/mcp-configs/repository/huggingface-mcp.json +29 -0
  98. package/openclaw.plugin.json +2 -2
  99. package/package.json +2 -2
  100. package/skills/analysis/dataviz/algorithm-visualizer-guide/SKILL.md +259 -0
  101. package/skills/analysis/dataviz/bokeh-visualization-guide/SKILL.md +270 -0
  102. package/skills/analysis/dataviz/chart-image-generator/SKILL.md +229 -0
  103. package/skills/analysis/dataviz/citation-map-guide/SKILL.md +184 -0
  104. package/skills/analysis/dataviz/d3-visualization-guide/SKILL.md +281 -0
  105. package/skills/analysis/dataviz/data-visualization-principles/SKILL.md +171 -0
  106. package/skills/analysis/dataviz/echarts-visualization-guide/SKILL.md +250 -0
  107. package/skills/analysis/dataviz/metabase-analytics-guide/SKILL.md +242 -0
  108. package/skills/analysis/dataviz/plotly-interactive-guide/SKILL.md +266 -0
  109. package/skills/analysis/dataviz/redash-analytics-guide/SKILL.md +284 -0
  110. package/skills/analysis/econometrics/econml-causal-guide/SKILL.md +163 -0
  111. package/skills/analysis/econometrics/empirical-paper-analysis/SKILL.md +192 -0
  112. package/skills/analysis/econometrics/mostly-harmless-guide/SKILL.md +139 -0
  113. package/skills/analysis/econometrics/panel-data-analyst/SKILL.md +259 -0
  114. package/skills/analysis/econometrics/panel-data-regression-workflow/SKILL.md +267 -0
  115. package/skills/analysis/econometrics/python-causality-guide/SKILL.md +134 -0
  116. package/skills/analysis/econometrics/stata-accounting-guide/SKILL.md +269 -0
  117. package/skills/analysis/econometrics/stata-analyst-guide/SKILL.md +245 -0
  118. package/skills/analysis/econometrics/stata-reference-guide/SKILL.md +293 -0
  119. package/skills/analysis/statistics/data-anomaly-detection/SKILL.md +157 -0
  120. package/skills/analysis/statistics/general-statistics-guide/SKILL.md +226 -0
  121. package/skills/analysis/statistics/infiagent-benchmark-guide/SKILL.md +106 -0
  122. package/skills/analysis/statistics/ml-experiment-tracker/SKILL.md +212 -0
  123. package/skills/analysis/statistics/pywayne-statistics-guide/SKILL.md +192 -0
  124. package/skills/analysis/statistics/quantitative-methods-guide/SKILL.md +193 -0
  125. package/skills/analysis/statistics/senior-data-scientist-guide/SKILL.md +223 -0
  126. package/skills/analysis/wrangling/claude-data-analysis-guide/SKILL.md +100 -0
  127. package/skills/analysis/wrangling/csv-data-analyzer/SKILL.md +170 -0
  128. package/skills/analysis/wrangling/data-cleaning-pipeline/SKILL.md +266 -0
  129. package/skills/analysis/wrangling/data-cog-guide/SKILL.md +178 -0
  130. package/skills/analysis/wrangling/open-data-scientist-guide/SKILL.md +197 -0
  131. package/skills/analysis/wrangling/stata-data-cleaning/SKILL.md +276 -0
  132. package/skills/analysis/wrangling/streamline-analyst-guide/SKILL.md +119 -0
  133. package/skills/analysis/wrangling/survey-data-processing/SKILL.md +298 -0
  134. package/skills/domains/ai-ml/ai-agent-papers-guide/SKILL.md +146 -0
  135. package/skills/domains/ai-ml/ai-model-benchmarking/SKILL.md +209 -0
  136. package/skills/domains/ai-ml/annotated-dl-papers-guide/SKILL.md +159 -0
  137. package/skills/domains/ai-ml/anomaly-detection-papers-guide/SKILL.md +167 -0
  138. package/skills/domains/ai-ml/autonomous-agents-papers-guide/SKILL.md +178 -0
  139. package/skills/domains/ai-ml/dl-transformer-finetune/SKILL.md +239 -0
  140. package/skills/domains/ai-ml/domain-adaptation-papers-guide/SKILL.md +173 -0
  141. package/skills/domains/ai-ml/generative-ai-guide/SKILL.md +146 -0
  142. package/skills/domains/ai-ml/graph-learning-papers-guide/SKILL.md +125 -0
  143. package/skills/domains/ai-ml/huggingface-inference-guide/SKILL.md +196 -0
  144. package/skills/domains/ai-ml/keras-deep-learning/SKILL.md +210 -0
  145. package/skills/domains/ai-ml/kolmogorov-arnold-networks-guide/SKILL.md +185 -0
  146. package/skills/domains/ai-ml/llm-from-scratch-guide/SKILL.md +124 -0
  147. package/skills/domains/ai-ml/ml-pipeline-guide/SKILL.md +295 -0
  148. package/skills/domains/ai-ml/nlp-toolkit-guide/SKILL.md +247 -0
  149. package/skills/domains/ai-ml/npcpy-research-guide/SKILL.md +137 -0
  150. package/skills/domains/ai-ml/pytorch-guide/SKILL.md +281 -0
  151. package/skills/domains/ai-ml/pytorch-lightning-guide/SKILL.md +244 -0
  152. package/skills/domains/ai-ml/responsible-ai-guide/SKILL.md +126 -0
  153. package/skills/domains/ai-ml/tensorflow-guide/SKILL.md +241 -0
  154. package/skills/domains/ai-ml/vmas-simulator-guide/SKILL.md +129 -0
  155. package/skills/domains/biomedical/bioagents-guide/SKILL.md +308 -0
  156. package/skills/domains/biomedical/clawbio-guide/SKILL.md +167 -0
  157. package/skills/domains/biomedical/clinical-dialogue-agents-guide/SKILL.md +145 -0
  158. package/skills/domains/biomedical/ena-sequence-api/SKILL.md +175 -0
  159. package/skills/domains/biomedical/genomas-guide/SKILL.md +126 -0
  160. package/skills/domains/biomedical/genotex-benchmark-guide/SKILL.md +125 -0
  161. package/skills/domains/biomedical/med-researcher-guide/SKILL.md +161 -0
  162. package/skills/domains/biomedical/med-researcher-r1-guide/SKILL.md +146 -0
  163. package/skills/domains/biomedical/medgeclaw-guide/SKILL.md +345 -0
  164. package/skills/domains/biomedical/medical-imaging-guide/SKILL.md +305 -0
  165. package/skills/domains/biomedical/ncbi-blast-api/SKILL.md +195 -0
  166. package/skills/domains/biomedical/ncbi-datasets-api/SKILL.md +220 -0
  167. package/skills/domains/biomedical/quickgo-api/SKILL.md +181 -0
  168. package/skills/domains/business/architecture-design-guide/SKILL.md +279 -0
  169. package/skills/domains/business/innovation-management-guide/SKILL.md +257 -0
  170. package/skills/domains/business/operations-research-guide/SKILL.md +258 -0
  171. package/skills/domains/business/xpert-bi-guide/SKILL.md +84 -0
  172. package/skills/domains/chemistry/cactus-cheminformatics-guide/SKILL.md +89 -0
  173. package/skills/domains/chemistry/chemeagle-guide/SKILL.md +147 -0
  174. package/skills/domains/chemistry/chemgraph-agent-guide/SKILL.md +120 -0
  175. package/skills/domains/chemistry/molecular-dynamics-guide/SKILL.md +237 -0
  176. package/skills/domains/chemistry/pubchem-api-guide/SKILL.md +180 -0
  177. package/skills/domains/chemistry/spectroscopy-analysis-guide/SKILL.md +290 -0
  178. package/skills/domains/cs/ai-security-papers-guide/SKILL.md +103 -0
  179. package/skills/domains/cs/code-llm-papers-guide/SKILL.md +131 -0
  180. package/skills/domains/cs/distributed-systems-guide/SKILL.md +268 -0
  181. package/skills/domains/cs/formal-verification-guide/SKILL.md +298 -0
  182. package/skills/domains/cs/gaussian-splatting-papers-guide/SKILL.md +158 -0
  183. package/skills/domains/cs/llm-aiops-guide/SKILL.md +70 -0
  184. package/skills/domains/cs/software-heritage-api/SKILL.md +200 -0
  185. package/skills/domains/ecology/species-distribution-guide/SKILL.md +343 -0
  186. package/skills/domains/economics/imf-data-api-guide/SKILL.md +174 -0
  187. package/skills/domains/economics/nber-working-papers-api/SKILL.md +177 -0
  188. package/skills/domains/economics/post-labor-economics/SKILL.md +254 -0
  189. package/skills/domains/economics/pricing-psychology-guide/SKILL.md +273 -0
  190. package/skills/domains/economics/repec-economics-api/SKILL.md +188 -0
  191. package/skills/domains/economics/world-bank-data-guide/SKILL.md +179 -0
  192. package/skills/domains/education/academic-study-methods/SKILL.md +228 -0
  193. package/skills/domains/education/assessment-design-guide/SKILL.md +213 -0
  194. package/skills/domains/education/educational-research-methods/SKILL.md +179 -0
  195. package/skills/domains/education/edumcp-guide/SKILL.md +74 -0
  196. package/skills/domains/education/mooc-analytics-guide/SKILL.md +206 -0
  197. package/skills/domains/education/open-syllabus-api/SKILL.md +171 -0
  198. package/skills/domains/finance/akshare-finance-data/SKILL.md +207 -0
  199. package/skills/domains/finance/finsight-research-guide/SKILL.md +113 -0
  200. package/skills/domains/finance/options-analytics-agent-guide/SKILL.md +117 -0
  201. package/skills/domains/finance/portfolio-optimization-guide/SKILL.md +279 -0
  202. package/skills/domains/finance/risk-modeling-guide/SKILL.md +260 -0
  203. package/skills/domains/finance/stata-accounting-research/SKILL.md +372 -0
  204. package/skills/domains/geoscience/climate-modeling-guide/SKILL.md +215 -0
  205. package/skills/domains/geoscience/pangaea-data-api/SKILL.md +197 -0
  206. package/skills/domains/geoscience/satellite-remote-sensing/SKILL.md +193 -0
  207. package/skills/domains/geoscience/seismology-data-guide/SKILL.md +208 -0
  208. package/skills/domains/humanities/digital-humanities-methods/SKILL.md +232 -0
  209. package/skills/domains/humanities/ethical-philosophy-guide/SKILL.md +244 -0
  210. package/skills/domains/humanities/history-research-guide/SKILL.md +260 -0
  211. package/skills/domains/humanities/political-history-guide/SKILL.md +241 -0
  212. package/skills/domains/law/caselaw-access-api/SKILL.md +149 -0
  213. package/skills/domains/law/legal-agent-skills-guide/SKILL.md +132 -0
  214. package/skills/domains/law/legal-nlp-guide/SKILL.md +236 -0
  215. package/skills/domains/law/legal-research-methods/SKILL.md +190 -0
  216. package/skills/domains/law/opencontracts-guide/SKILL.md +168 -0
  217. package/skills/domains/law/patent-analysis-guide/SKILL.md +257 -0
  218. package/skills/domains/law/regulatory-compliance-guide/SKILL.md +267 -0
  219. package/skills/domains/math/lean-theorem-proving-guide/SKILL.md +140 -0
  220. package/skills/domains/math/symbolic-computation-guide/SKILL.md +263 -0
  221. package/skills/domains/math/topology-data-analysis/SKILL.md +305 -0
  222. package/skills/domains/pharma/clinical-trial-design-guide/SKILL.md +271 -0
  223. package/skills/domains/pharma/drug-target-interaction/SKILL.md +242 -0
  224. package/skills/domains/pharma/madd-drug-discovery-guide/SKILL.md +153 -0
  225. package/skills/domains/pharma/pharmacovigilance-guide/SKILL.md +216 -0
  226. package/skills/domains/physics/astrophysics-data-guide/SKILL.md +305 -0
  227. package/skills/domains/physics/particle-physics-guide/SKILL.md +287 -0
  228. package/skills/domains/social-science/ipums-microdata-api/SKILL.md +211 -0
  229. package/skills/domains/social-science/network-analysis-guide/SKILL.md +310 -0
  230. package/skills/domains/social-science/psychology-research-guide/SKILL.md +270 -0
  231. package/skills/domains/social-science/sociology-research-guide/SKILL.md +238 -0
  232. package/skills/domains/social-science/sociology-research-methods/SKILL.md +181 -0
  233. package/skills/literature/discovery/arxiv-paper-monitoring/SKILL.md +233 -0
  234. package/skills/literature/discovery/paper-recommendation-guide/SKILL.md +120 -0
  235. package/skills/literature/discovery/papers-we-love-guide/SKILL.md +169 -0
  236. package/skills/literature/discovery/semantic-paper-radar/SKILL.md +144 -0
  237. package/skills/literature/discovery/zotero-arxiv-daily-guide/SKILL.md +94 -0
  238. package/skills/literature/fulltext/bioc-pmc-api/SKILL.md +146 -0
  239. package/skills/literature/fulltext/core-api-guide/SKILL.md +144 -0
  240. package/skills/literature/fulltext/dataverse-api/SKILL.md +215 -0
  241. package/skills/literature/fulltext/hal-archive-api/SKILL.md +218 -0
  242. package/skills/literature/fulltext/institutional-repository-guide/SKILL.md +212 -0
  243. package/skills/literature/fulltext/open-access-mining-guide/SKILL.md +341 -0
  244. package/skills/literature/fulltext/osf-api/SKILL.md +212 -0
  245. package/skills/literature/fulltext/pmc-ftp-bulk-download/SKILL.md +182 -0
  246. package/skills/literature/fulltext/zotero-ai-butler-guide/SKILL.md +166 -0
  247. package/skills/literature/fulltext/zotero-scihub-guide/SKILL.md +168 -0
  248. package/skills/literature/metadata/academic-paper-summarizer/SKILL.md +101 -0
  249. package/skills/literature/metadata/bibliometrix-guide/SKILL.md +164 -0
  250. package/skills/literature/metadata/crossref-event-data-api/SKILL.md +183 -0
  251. package/skills/literature/metadata/doi-content-negotiation/SKILL.md +202 -0
  252. package/skills/literature/metadata/orkg-api/SKILL.md +153 -0
  253. package/skills/literature/metadata/plumx-metrics-api/SKILL.md +188 -0
  254. package/skills/literature/metadata/ror-organization-api/SKILL.md +208 -0
  255. package/skills/literature/metadata/sophosia-reference-guide/SKILL.md +110 -0
  256. package/skills/literature/metadata/viaf-authority-api/SKILL.md +209 -0
  257. package/skills/literature/metadata/wikidata-api-guide/SKILL.md +156 -0
  258. package/skills/literature/metadata/zoplicate-dedup-guide/SKILL.md +147 -0
  259. package/skills/literature/metadata/zotero-actions-tags-guide/SKILL.md +212 -0
  260. package/skills/literature/metadata/zotmoov-guide/SKILL.md +120 -0
  261. package/skills/literature/metadata/zutilo-guide/SKILL.md +140 -0
  262. package/skills/literature/search/arxiv-batch-reporting/SKILL.md +133 -0
  263. package/skills/literature/search/arxiv-cli-tools/SKILL.md +172 -0
  264. package/skills/literature/search/arxiv-osiris/SKILL.md +199 -0
  265. package/skills/literature/search/arxiv-paper-processor/SKILL.md +141 -0
  266. package/skills/literature/search/baidu-scholar-guide/SKILL.md +110 -0
  267. package/skills/literature/search/base-academic-search/SKILL.md +196 -0
  268. package/skills/literature/search/chatpaper-guide/SKILL.md +122 -0
  269. package/skills/literature/search/citeseerx-api/SKILL.md +183 -0
  270. package/skills/literature/search/deep-literature-search/SKILL.md +149 -0
  271. package/skills/literature/search/deepgit-search-guide/SKILL.md +147 -0
  272. package/skills/literature/search/eric-education-api/SKILL.md +199 -0
  273. package/skills/literature/search/findpapers-guide/SKILL.md +177 -0
  274. package/skills/literature/search/ieee-xplore-api/SKILL.md +177 -0
  275. package/skills/literature/search/lens-scholarly-api/SKILL.md +211 -0
  276. package/skills/literature/search/multi-database-literature-search/SKILL.md +198 -0
  277. package/skills/literature/search/open-library-api/SKILL.md +196 -0
  278. package/skills/literature/search/open-semantic-search-guide/SKILL.md +190 -0
  279. package/skills/literature/search/openaire-api/SKILL.md +141 -0
  280. package/skills/literature/search/paper-search-mcp-guide/SKILL.md +107 -0
  281. package/skills/literature/search/papers-chat-guide/SKILL.md +194 -0
  282. package/skills/literature/search/pasa-paper-search-guide/SKILL.md +138 -0
  283. package/skills/literature/search/plos-open-access-api/SKILL.md +203 -0
  284. package/skills/literature/search/scielo-api/SKILL.md +182 -0
  285. package/skills/literature/search/share-research-api/SKILL.md +129 -0
  286. package/skills/literature/search/worldcat-search-api/SKILL.md +224 -0
  287. package/skills/research/automation/ai-scientist-v2-guide/SKILL.md +284 -0
  288. package/skills/research/automation/aim-experiment-guide/SKILL.md +234 -0
  289. package/skills/research/automation/claude-academic-workflow-guide/SKILL.md +202 -0
  290. package/skills/research/automation/coexist-ai-guide/SKILL.md +149 -0
  291. package/skills/research/automation/datagen-research-guide/SKILL.md +131 -0
  292. package/skills/research/automation/foam-agent-guide/SKILL.md +203 -0
  293. package/skills/research/automation/kedro-pipeline-guide/SKILL.md +216 -0
  294. package/skills/research/automation/mle-agent-guide/SKILL.md +139 -0
  295. package/skills/research/automation/paper-to-agent-guide/SKILL.md +116 -0
  296. package/skills/research/automation/rd-agent-guide/SKILL.md +246 -0
  297. package/skills/research/automation/research-paper-orchestrator/SKILL.md +254 -0
  298. package/skills/research/deep-research/academic-deep-research/SKILL.md +190 -0
  299. package/skills/research/deep-research/auto-deep-research-guide/SKILL.md +141 -0
  300. package/skills/research/deep-research/cognitive-kernel-guide/SKILL.md +200 -0
  301. package/skills/research/deep-research/corvus-research-guide/SKILL.md +132 -0
  302. package/skills/research/deep-research/deep-research-pro/SKILL.md +213 -0
  303. package/skills/research/deep-research/deep-research-work/SKILL.md +204 -0
  304. package/skills/research/deep-research/deep-searcher-guide/SKILL.md +253 -0
  305. package/skills/research/deep-research/gpt-researcher-guide/SKILL.md +191 -0
  306. package/skills/research/deep-research/in-depth-research-guide/SKILL.md +205 -0
  307. package/skills/research/deep-research/khoj-research-guide/SKILL.md +200 -0
  308. package/skills/research/deep-research/kosmos-scientist-guide/SKILL.md +185 -0
  309. package/skills/research/deep-research/llm-scientific-discovery-guide/SKILL.md +178 -0
  310. package/skills/research/deep-research/local-deep-research-guide/SKILL.md +253 -0
  311. package/skills/research/deep-research/open-researcher-guide/SKILL.md +138 -0
  312. package/skills/research/deep-research/tongyi-deep-research-guide/SKILL.md +217 -0
  313. package/skills/research/funding/eu-horizon-guide/SKILL.md +244 -0
  314. package/skills/research/funding/grant-budget-guide/SKILL.md +284 -0
  315. package/skills/research/funding/nih-reporter-api-guide/SKILL.md +166 -0
  316. package/skills/research/funding/nsf-award-api-guide/SKILL.md +133 -0
  317. package/skills/research/methodology/academic-mentor-guide/SKILL.md +169 -0
  318. package/skills/research/methodology/claude-scientific-guide/SKILL.md +122 -0
  319. package/skills/research/methodology/deep-innovator-guide/SKILL.md +242 -0
  320. package/skills/research/methodology/osf-api-guide/SKILL.md +165 -0
  321. package/skills/research/methodology/parsifal-slr-guide/SKILL.md +154 -0
  322. package/skills/research/methodology/research-paper-kb/SKILL.md +263 -0
  323. package/skills/research/methodology/research-pipeline-units-guide/SKILL.md +169 -0
  324. package/skills/research/methodology/research-town-guide/SKILL.md +263 -0
  325. package/skills/research/methodology/slr-automation-guide/SKILL.md +235 -0
  326. package/skills/research/paper-review/automated-review-guide/SKILL.md +281 -0
  327. package/skills/research/paper-review/latte-review-guide/SKILL.md +175 -0
  328. package/skills/research/paper-review/paper-compare-guide/SKILL.md +238 -0
  329. package/skills/research/paper-review/paper-critique-framework/SKILL.md +181 -0
  330. package/skills/research/paper-review/paper-digest-guide/SKILL.md +240 -0
  331. package/skills/research/paper-review/paper-research-assistant/SKILL.md +231 -0
  332. package/skills/research/paper-review/research-quality-filter/SKILL.md +261 -0
  333. package/skills/research/paper-review/review-response-guide/SKILL.md +275 -0
  334. package/skills/tools/code-exec/contextplus-mcp-guide/SKILL.md +110 -0
  335. package/skills/tools/code-exec/google-colab-guide/SKILL.md +276 -0
  336. package/skills/tools/code-exec/kaggle-api-guide/SKILL.md +216 -0
  337. package/skills/tools/code-exec/overleaf-cli-guide/SKILL.md +279 -0
  338. package/skills/tools/diagram/clawphd-guide/SKILL.md +149 -0
  339. package/skills/tools/diagram/code-flow-visualizer/SKILL.md +197 -0
  340. package/skills/tools/diagram/excalidraw-diagram-guide/SKILL.md +170 -0
  341. package/skills/tools/diagram/json-data-visualizer/SKILL.md +270 -0
  342. package/skills/tools/diagram/kroki-diagram-api/SKILL.md +198 -0
  343. package/skills/tools/diagram/mermaid-architect-guide/SKILL.md +219 -0
  344. package/skills/tools/diagram/scientific-graphical-abstract/SKILL.md +201 -0
  345. package/skills/tools/diagram/tldraw-whiteboard-guide/SKILL.md +397 -0
  346. package/skills/tools/document/docsgpt-guide/SKILL.md +130 -0
  347. package/skills/tools/document/large-document-reader/SKILL.md +202 -0
  348. package/skills/tools/document/md2pdf-xelatex/SKILL.md +212 -0
  349. package/skills/tools/document/openpaper-guide/SKILL.md +232 -0
  350. package/skills/tools/document/paper-parse-guide/SKILL.md +243 -0
  351. package/skills/tools/document/weknora-guide/SKILL.md +216 -0
  352. package/skills/tools/document/zotero-addon-market-guide/SKILL.md +108 -0
  353. package/skills/tools/document/zotero-night-theme-guide/SKILL.md +142 -0
  354. package/skills/tools/document/zotero-style-guide/SKILL.md +217 -0
  355. package/skills/tools/knowledge-graph/citation-network-builder/SKILL.md +244 -0
  356. package/skills/tools/knowledge-graph/concept-map-generator/SKILL.md +284 -0
  357. package/skills/tools/knowledge-graph/graphiti-guide/SKILL.md +219 -0
  358. package/skills/tools/knowledge-graph/mimir-memory-guide/SKILL.md +135 -0
  359. package/skills/tools/knowledge-graph/notero-zotero-notion-guide/SKILL.md +187 -0
  360. package/skills/tools/knowledge-graph/open-webui-tools-guide/SKILL.md +156 -0
  361. package/skills/tools/knowledge-graph/openspg-guide/SKILL.md +210 -0
  362. package/skills/tools/knowledge-graph/paperpile-notion-guide/SKILL.md +84 -0
  363. package/skills/tools/knowledge-graph/zotero-markdb-connect-guide/SKILL.md +162 -0
  364. package/skills/tools/ocr-translate/latex-translation-guide/SKILL.md +176 -0
  365. package/skills/tools/ocr-translate/math-equation-renderer/SKILL.md +198 -0
  366. package/skills/tools/ocr-translate/pdf-math-translate-guide/SKILL.md +141 -0
  367. package/skills/tools/ocr-translate/zotero-pdf-translate-guide/SKILL.md +95 -0
  368. package/skills/tools/ocr-translate/zotero-pdf2zh-guide/SKILL.md +143 -0
  369. package/skills/tools/scraping/dataset-finder-guide/SKILL.md +253 -0
  370. package/skills/tools/scraping/easy-spider-guide/SKILL.md +250 -0
  371. package/skills/tools/scraping/google-scholar-scraper/SKILL.md +255 -0
  372. package/skills/tools/scraping/repository-harvesting-guide/SKILL.md +310 -0
  373. package/skills/writing/citation/academic-citation-manager/SKILL.md +314 -0
  374. package/skills/writing/citation/academic-citation-manager-guide/SKILL.md +182 -0
  375. package/skills/writing/citation/citation-assistant-skill/SKILL.md +192 -0
  376. package/skills/writing/citation/jabref-reference-guide/SKILL.md +127 -0
  377. package/skills/writing/citation/jasminum-zotero-guide/SKILL.md +103 -0
  378. package/skills/writing/citation/mendeley-api/SKILL.md +231 -0
  379. package/skills/writing/citation/obsidian-citation-guide/SKILL.md +164 -0
  380. package/skills/writing/citation/obsidian-zotero-guide/SKILL.md +137 -0
  381. package/skills/writing/citation/onecite-reference-guide/SKILL.md +168 -0
  382. package/skills/writing/citation/papersgpt-zotero-guide/SKILL.md +132 -0
  383. package/skills/writing/citation/papis-cli-guide/SKILL.md +213 -0
  384. package/skills/writing/citation/zotero-better-bibtex-guide/SKILL.md +107 -0
  385. package/skills/writing/citation/zotero-better-notes-guide/SKILL.md +121 -0
  386. package/skills/writing/citation/zotero-gpt-guide/SKILL.md +111 -0
  387. package/skills/writing/citation/zotero-mcp-guide/SKILL.md +164 -0
  388. package/skills/writing/citation/zotero-mdnotes-guide/SKILL.md +162 -0
  389. package/skills/writing/citation/zotero-reference-guide/SKILL.md +139 -0
  390. package/skills/writing/citation/zotero-scholar-guide/SKILL.md +294 -0
  391. package/skills/writing/citation/zotfile-attachment-guide/SKILL.md +140 -0
  392. package/skills/writing/composition/ml-paper-writing/SKILL.md +163 -0
  393. package/skills/writing/composition/opendraft-thesis-guide/SKILL.md +200 -0
  394. package/skills/writing/composition/paper-debugger-guide/SKILL.md +143 -0
  395. package/skills/writing/composition/paperforge-guide/SKILL.md +205 -0
  396. package/skills/writing/composition/research-paper-writer/SKILL.md +226 -0
  397. package/skills/writing/composition/scientific-writing-resources/SKILL.md +151 -0
  398. package/skills/writing/composition/scientific-writing-wrapper/SKILL.md +153 -0
  399. package/skills/writing/latex/academic-writing-latex/SKILL.md +285 -0
  400. package/skills/writing/latex/latex-drawing-collection/SKILL.md +154 -0
  401. package/skills/writing/latex/latex-templates-collection/SKILL.md +159 -0
  402. package/skills/writing/latex/md-to-pdf-academic/SKILL.md +230 -0
  403. package/skills/writing/latex/tex-render-guide/SKILL.md +243 -0
  404. package/skills/writing/polish/academic-tone-guide/SKILL.md +209 -0
  405. package/skills/writing/polish/chinese-text-humanizer/SKILL.md +140 -0
  406. package/skills/writing/polish/conciseness-editing-guide/SKILL.md +225 -0
  407. package/skills/writing/polish/paper-polish-guide/SKILL.md +160 -0
  408. package/skills/writing/templates/arxiv-preprint-template/SKILL.md +184 -0
  409. package/skills/writing/templates/elegant-paper-template/SKILL.md +141 -0
  410. package/skills/writing/templates/graphical-abstract-guide/SKILL.md +183 -0
  411. package/skills/writing/templates/novathesis-guide/SKILL.md +152 -0
  412. package/skills/writing/templates/scientific-article-pdf/SKILL.md +261 -0
  413. package/skills/writing/templates/sjtuthesis-guide/SKILL.md +197 -0
  414. package/skills/writing/templates/thuthesis-guide/SKILL.md +181 -0
  415. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +0 -207
@@ -0,0 +1,276 @@
1
+ ---
2
+ name: stata-data-cleaning
3
+ description: "Clean, transform, and validate messy research data using Stata"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "broom"
7
+ category: "analysis"
8
+ subcategory: "wrangling"
9
+ keywords: ["Stata", "data cleaning", "data wrangling", "missing values", "recoding", "validation"]
10
+ source: "https://www.stata.com/manuals/d.pdf"
11
+ ---
12
+
13
+ # Stata Data Cleaning
14
+
15
+ Clean, transform, and validate messy research datasets in Stata. This skill covers the complete data preparation pipeline from raw survey or administrative data to analysis-ready datasets, with emphasis on documentation, reproducibility, and handling the common data quality issues encountered in social science, economics, and health research.
16
+
17
+ ## Overview
18
+
19
+ Data cleaning typically consumes 60-80% of research time in empirical studies, yet it is often under-documented and poorly reproducible. Stata provides a powerful set of commands for data manipulation, but knowing which commands to use and in what order requires experience with common data quality issues: inconsistent coding, duplicate observations, string formatting problems, implausible values, and complex missing data patterns.
20
+
21
+ This skill provides a systematic, step-by-step data cleaning workflow in Stata. Each step produces a log of changes made, enabling full reproducibility and audit trails. The workflow is organized around the principle that raw data should never be modified in place -- instead, cleaning scripts transform raw data into processed datasets while preserving the original.
22
+
23
+ The approach follows best practices from the World Bank's DIME Analytics team and the J-PAL research transparency guidelines, making it suitable for projects that require rigorous data documentation for peer review, replication packages, or regulatory compliance.
24
+
25
+ ## Initial Data Assessment
26
+
27
+ ### Loading and Inspecting Data
28
+
29
+ ```stata
30
+ * ============================================
31
+ * Data Cleaning Script: [Project Name]
32
+ * Author: [Name]
33
+ * Date: [Date]
34
+ * Input: raw/survey_data_raw.dta
35
+ * Output: processed/survey_data_clean.dta
36
+ * ============================================
37
+
38
+ clear all
39
+ set more off
40
+ log using "logs/cleaning_log.smcl", replace
41
+
42
+ * Load raw data
43
+ use "raw/survey_data_raw.dta", clear
44
+
45
+ * Basic inspection
46
+ describe
47
+ summarize
48
+ codebook, compact
49
+
50
+ * Check dimensions
51
+ display "Observations: " _N
52
+ display "Variables: " c(k)
53
+
54
+ * Check for duplicates on ID variable
55
+ duplicates report respondent_id
56
+ duplicates list respondent_id if duplicates(respondent_id) > 0
57
+ ```
58
+
59
+ ### Data Quality Report
60
+
61
+ ```stata
62
+ * Generate a data quality summary
63
+ foreach var of varlist _all {
64
+ quietly {
65
+ count if missing(`var')
66
+ local nmiss = r(N)
67
+ local pctmiss = (`nmiss' / _N) * 100
68
+ }
69
+ if `pctmiss' > 0 {
70
+ display "`var': `nmiss' missing (`pctmiss'%)"
71
+ }
72
+ }
73
+
74
+ * Check value ranges for numeric variables
75
+ foreach var of varlist age income years_education {
76
+ summarize `var', detail
77
+ * Flag implausible values
78
+ count if `var' < 0 & !missing(`var')
79
+ count if `var' > 150 & !missing(`var')
80
+ }
81
+ ```
82
+
83
+ ## String Cleaning
84
+
85
+ ### Standardizing Text Variables
86
+
87
+ ```stata
88
+ * Trim whitespace
89
+ replace name = strtrim(name)
90
+ replace name = stritrim(name) // Remove internal multiple spaces
91
+
92
+ * Standardize case
93
+ replace city = proper(city) // Title case
94
+ replace country = upper(country) // Upper case
95
+ replace email = lower(email) // Lower case
96
+
97
+ * Remove special characters
98
+ replace phone = ustrregexra(phone, "[^0-9]", "")
99
+
100
+ * Fix encoding issues
101
+ replace name = ustrfix(name)
102
+
103
+ * Standardize common variations
104
+ replace department = "Computer Science" if ///
105
+ inlist(department, "CS", "Comp Sci", "Comp. Sci.", "CompSci")
106
+
107
+ replace gender = "Female" if inlist(gender, "F", "f", "female", "FEMALE")
108
+ replace gender = "Male" if inlist(gender, "M", "m", "male", "MALE")
109
+ ```
110
+
111
+ ### Parsing Complex Strings
112
+
113
+ ```stata
114
+ * Split full name into first and last
115
+ gen first_name = word(full_name, 1)
116
+ gen last_name = word(full_name, -1)
117
+
118
+ * Extract year from date string "March 15, 2024"
119
+ gen year = real(word(date_string, -1))
120
+
121
+ * Parse numeric values from strings like "$1,234.56"
122
+ gen income_clean = real(subinstr(subinstr(income_str, "$", "", .), ",", "", .))
123
+ ```
124
+
125
+ ## Missing Data Handling
126
+
127
+ ### Identifying Missing Data Patterns
128
+
129
+ ```stata
130
+ * Install missing data analysis tools
131
+ ssc install mdesc
132
+ ssc install misstable
133
+
134
+ * Summary of missing data
135
+ mdesc
136
+
137
+ * Missing data patterns
138
+ misstable summarize
139
+ misstable patterns
140
+
141
+ * Create missing indicator variables
142
+ foreach var of varlist income education occupation {
143
+ gen mi_`var' = missing(`var')
144
+ }
145
+
146
+ * Test whether missing is random (Little's MCAR test approximation)
147
+ * Compare means of observed variables by missing status
148
+ foreach var of varlist income education {
149
+ ttest age, by(mi_`var')
150
+ ttest gender_numeric, by(mi_`var')
151
+ }
152
+ ```
153
+
154
+ ### Recoding Missing Values
155
+
156
+ ```stata
157
+ * Common survey codes for missing
158
+ * -99 = refused, -88 = don't know, -77 = not applicable
159
+ foreach var of varlist income satisfaction trust_score {
160
+ replace `var' = .r if `var' == -99 // .r = refused
161
+ replace `var' = .d if `var' == -88 // .d = don't know
162
+ replace `var' = .n if `var' == -77 // .n = not applicable
163
+ }
164
+
165
+ * Extended missing values preserve the reason for missingness
166
+ * while still being treated as missing in analyses
167
+ ```
168
+
169
+ ## Variable Construction
170
+
171
+ ### Recoding and Categorization
172
+
173
+ ```stata
174
+ * Create age groups
175
+ recode age (18/29 = 1 "18-29") (30/44 = 2 "30-44") ///
176
+ (45/59 = 3 "45-59") (60/max = 4 "60+"), gen(age_group)
177
+
178
+ * Create binary indicator
179
+ gen high_income = (income > 75000) if !missing(income)
180
+
181
+ * Create composite scale (e.g., Likert items)
182
+ alpha item1 item2 item3 item4 item5, gen(scale_score) item
183
+ * Cronbach's alpha is reported; scale_score is the mean
184
+
185
+ * Standardize continuous variables
186
+ foreach var of varlist income education_years age {
187
+ egen z_`var' = std(`var')
188
+ }
189
+
190
+ * Winsorize extreme values
191
+ winsor2 income, cuts(1 99) replace
192
+ ```
193
+
194
+ ### Date Variables
195
+
196
+ ```stata
197
+ * Parse date strings
198
+ gen interview_date = date(date_string, "MDY")
199
+ format interview_date %td
200
+
201
+ * Extract components
202
+ gen interview_year = year(interview_date)
203
+ gen interview_month = month(interview_date)
204
+ gen interview_dow = dow(interview_date) // 0=Sunday
205
+
206
+ * Calculate durations
207
+ gen days_since_treatment = interview_date - treatment_date
208
+ gen months_since = (interview_date - treatment_date) / 30.44
209
+ ```
210
+
211
+ ## Data Validation
212
+
213
+ ### Assertion-Based Validation
214
+
215
+ ```stata
216
+ * These assertions halt execution if violated
217
+ assert _N == 5000 // Expected sample size
218
+ assert !missing(respondent_id) // No missing IDs
219
+ assert age >= 18 & age <= 120 if !missing(age) // Plausible age range
220
+ assert inlist(gender, "Male", "Female", "Other", "") | missing(gender)
221
+
222
+ * Cross-variable consistency checks
223
+ assert education_years >= 0 if !missing(education_years)
224
+ assert income >= 0 if !missing(income)
225
+ assert end_date >= start_date if !missing(end_date) & !missing(start_date)
226
+ ```
227
+
228
+ ### Duplicate Detection and Resolution
229
+
230
+ ```stata
231
+ * Identify duplicates
232
+ duplicates tag respondent_id, gen(dup_flag)
233
+ list respondent_id survey_date if dup_flag > 0, sepby(respondent_id)
234
+
235
+ * Keep most recent observation per respondent
236
+ bysort respondent_id (survey_date): keep if _n == _N
237
+
238
+ * Or keep first observation
239
+ bysort respondent_id (survey_date): keep if _n == 1
240
+ ```
241
+
242
+ ## Saving and Documentation
243
+
244
+ ```stata
245
+ * Label all variables
246
+ label variable age "Age at time of interview (years)"
247
+ label variable income "Annual household income (USD)"
248
+ label variable education_years "Total years of formal education"
249
+
250
+ * Save cleaned dataset
251
+ compress // Reduce file size
252
+ save "processed/survey_data_clean.dta", replace
253
+
254
+ * Export codebook
255
+ codebook, compact
256
+ describe, short
257
+
258
+ * Close log
259
+ log close
260
+ ```
261
+
262
+ ## Best Practices
263
+
264
+ 1. **Never modify raw data files**: Always read raw data and write to a separate processed file.
265
+ 2. **Log everything**: Use `log using` to capture all output for audit trails.
266
+ 3. **Use assert statements**: Validate assumptions about the data at each stage.
267
+ 4. **Document decisions**: Comment every recode, drop, or imputation with the rationale.
268
+ 5. **Version your cleaning scripts**: Use git to track changes to .do files.
269
+ 6. **Produce a data dictionary**: Label every variable and value label in the final dataset.
270
+
271
+ ## References
272
+
273
+ - Stata Data Management Reference Manual: https://www.stata.com/manuals/d.pdf
274
+ - DIME Analytics Data Management Wiki: https://dimewiki.worldbank.org/Data_Management
275
+ - J-PAL Research Resources: https://www.povertyactionlab.org/resource/data-cleaning
276
+ - Long, J.S. (2009), The Workflow of Data Analysis Using Stata, Stata Press
@@ -0,0 +1,119 @@
1
+ ---
2
+ name: streamline-analyst-guide
3
+ description: "End-to-end data analysis AI agent with Streamlit UI"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "📈"
7
+ category: "analysis"
8
+ subcategory: "wrangling"
9
+ keywords: ["data analysis", "Streamlit", "automated EDA", "machine learning", "data science", "AI analyst"]
10
+ source: "https://github.com/Wilson-ZheLin/Streamline-Analyst"
11
+ ---
12
+
13
+ # Streamline Analyst Guide
14
+
15
+ ## Overview
16
+
17
+ Streamline Analyst is an end-to-end data analysis AI agent with a Streamlit web interface. Upload a dataset and describe your analysis goal in natural language — the agent handles data cleaning, EDA, feature engineering, model training, evaluation, and report generation. Provides an interactive UI for reviewing each step and adjusting parameters.
18
+
19
+ ## Installation
20
+
21
+ ```bash
22
+ git clone https://github.com/Wilson-ZheLin/Streamline-Analyst.git
23
+ cd Streamline-Analyst
24
+ pip install -r requirements.txt
25
+ streamlit run app.py
26
+ ```
27
+
28
+ ## Workflow
29
+
30
+ ```
31
+ Upload Dataset (CSV, Excel, Parquet)
32
+
33
+ Data Profiling
34
+ ├── Column types and distributions
35
+ ├── Missing value analysis
36
+ ├── Correlation matrix
37
+ └── Outlier detection
38
+
39
+ Data Cleaning (interactive)
40
+ ├── Handle missing values
41
+ ├── Remove/fix outliers
42
+ ├── Type conversions
43
+ └── Feature encoding
44
+
45
+ EDA (automated + custom)
46
+ ├── Univariate analysis
47
+ ├── Bivariate relationships
48
+ ├── Statistical tests
49
+ └── Custom visualizations
50
+
51
+ Modeling (if applicable)
52
+ ├── Train/test split
53
+ ├── Model selection + training
54
+ ├── Hyperparameter tuning
55
+ └── Evaluation metrics
56
+
57
+ Report Generation
58
+ ```
59
+
60
+ ## Features
61
+
62
+ ```python
63
+ # Streamline Analyst provides:
64
+
65
+ # 1. Smart data profiling
66
+ # - Auto-detect column types (numeric, categorical, datetime)
67
+ # - Distribution analysis per column
68
+ # - Missing value patterns (MCAR, MAR, MNAR hints)
69
+ # - Correlation analysis with significance
70
+
71
+ # 2. Interactive cleaning
72
+ # - Imputation strategies (mean, median, mode, KNN, model)
73
+ # - Outlier handling (IQR, Z-score, isolation forest)
74
+ # - Encoding (one-hot, label, target, ordinal)
75
+ # - Scaling (standard, minmax, robust)
76
+
77
+ # 3. Automated EDA
78
+ # - Distribution plots (histogram, KDE, box, violin)
79
+ # - Relationship plots (scatter, pair, heatmap)
80
+ # - Time series decomposition
81
+ # - Statistical tests (t-test, ANOVA, chi-square, Mann-Whitney)
82
+
83
+ # 4. Model pipeline
84
+ # - Classification: LR, RF, GBM, SVM, MLP
85
+ # - Regression: LR, RF, GBM, SVR, ElasticNet
86
+ # - Cross-validation with confidence intervals
87
+ # - Feature importance visualization
88
+ # - SHAP explanations
89
+
90
+ # 5. Report
91
+ # - HTML report with all plots and findings
92
+ # - Downloadable cleaned dataset
93
+ # - Model artifacts (pickle)
94
+ ```
95
+
96
+ ## Natural Language Interface
97
+
98
+ ```markdown
99
+ ### Example Prompts
100
+ - "Show me the distribution of all numeric columns"
101
+ - "Is there a significant difference in income between genders?"
102
+ - "Build a classifier to predict churn using all features"
103
+ - "What are the top 5 most important features for prediction?"
104
+ - "Clean the data: fill missing values and remove outliers"
105
+ - "Generate a summary report of this dataset"
106
+ ```
107
+
108
+ ## Use Cases
109
+
110
+ 1. **Quick EDA**: Rapid exploration of unfamiliar datasets
111
+ 2. **Data cleaning**: Interactive preprocessing with AI guidance
112
+ 3. **Baseline models**: Quick ML prototyping without coding
113
+ 4. **Report generation**: Automated analysis reports
114
+ 5. **Teaching**: Interactive data science demonstrations
115
+
116
+ ## References
117
+
118
+ - [Streamline-Analyst GitHub](https://github.com/Wilson-ZheLin/Streamline-Analyst)
119
+ - [Streamlit](https://streamlit.io/)
@@ -0,0 +1,298 @@
1
+ ---
2
+ name: survey-data-processing
3
+ description: "Clean, recode, and prepare survey response data for analysis"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "clipboard"
7
+ category: "analysis"
8
+ subcategory: "wrangling"
9
+ keywords: ["survey data", "questionnaire coding", "Likert scale", "response validation", "recoding", "survey analysis"]
10
+ source: "wentor-research-plugins"
11
+ ---
12
+
13
+ # Survey Data Processing
14
+
15
+ A skill for cleaning, recoding, and preparing survey response data for statistical analysis. Covers handling common survey data issues such as incomplete responses, attention check failures, reverse-coded items, scale construction, open-ended response coding, and export to analysis-ready formats compatible with SPSS, Stata, and R.
16
+
17
+ ## Survey Data Quality Assessment
18
+
19
+ ### Initial Inspection Workflow
20
+
21
+ Survey data from platforms like Qualtrics, SurveyMonkey, REDCap, and Google Forms each have their own export formats and quirks. The first step is always standardization.
22
+
23
+ ```python
24
+ import pandas as pd
25
+ import numpy as np
26
+
27
+ def assess_survey_quality(df, duration_col="duration_seconds",
28
+ min_duration=60):
29
+ """
30
+ Generate a survey data quality report.
31
+
32
+ Checks:
33
+ - Completion rates per question
34
+ - Response duration (speeders and slow responders)
35
+ - Straight-line responding patterns
36
+ - Attention check failures
37
+ """
38
+ report = {}
39
+
40
+ # Overall completion
41
+ total_respondents = len(df)
42
+ complete = df.dropna(thresh=int(len(df.columns) * 0.8))
43
+ report["total_responses"] = total_respondents
44
+ report["substantially_complete"] = len(complete)
45
+ report["completion_rate"] = f"{len(complete)/total_respondents*100:.1f}%"
46
+
47
+ # Duration analysis
48
+ if duration_col in df.columns:
49
+ durations = df[duration_col].dropna()
50
+ report["median_duration_seconds"] = durations.median()
51
+ report["speeders"] = (durations < min_duration).sum()
52
+ report["speeder_pct"] = f"{(durations < min_duration).mean()*100:.1f}%"
53
+
54
+ # Missing data per question
55
+ missing_by_col = df.isna().sum().sort_values(ascending=False)
56
+ report["most_skipped_questions"] = missing_by_col.head(10).to_dict()
57
+
58
+ return report
59
+ ```
60
+
61
+ ### Identifying Low-Quality Responses
62
+
63
+ ```python
64
+ def detect_straightlining(df, likert_columns, threshold=0.9):
65
+ """
66
+ Detect respondents who select the same answer for nearly
67
+ all Likert-scale questions (straight-line responding).
68
+
69
+ A respondent is flagged if the proportion of their most
70
+ common response exceeds the threshold.
71
+ """
72
+ flagged = []
73
+ for idx, row in df[likert_columns].iterrows():
74
+ responses = row.dropna()
75
+ if len(responses) == 0:
76
+ continue
77
+ most_common_pct = responses.value_counts().iloc[0] / len(responses)
78
+ if most_common_pct >= threshold:
79
+ flagged.append(idx)
80
+
81
+ return flagged
82
+
83
+
84
+ def check_attention_items(df, attention_checks):
85
+ """
86
+ Validate attention check (trap) questions.
87
+
88
+ Args:
89
+ attention_checks: dict of {column_name: correct_answer}
90
+ Example: {"q15_attention": 4, "q32_trap": "strongly agree"}
91
+ """
92
+ failed = pd.Series(False, index=df.index)
93
+ for col, correct in attention_checks.items():
94
+ failed = failed | (df[col] != correct)
95
+
96
+ return df.index[failed].tolist()
97
+ ```
98
+
99
+ ## Recoding and Transformation
100
+
101
+ ### Reverse Coding
102
+
103
+ Many validated psychological scales include reverse-coded items to detect acquiescence bias. These must be recoded before computing scale scores.
104
+
105
+ ```python
106
+ def reverse_code(df, columns, scale_max, scale_min=1):
107
+ """
108
+ Reverse-code specified columns for Likert-type scales.
109
+
110
+ Formula: reversed = (scale_max + scale_min) - original
111
+
112
+ Example for a 1-5 scale:
113
+ 1 -> 5, 2 -> 4, 3 -> 3, 4 -> 2, 5 -> 1
114
+ """
115
+ df_recoded = df.copy()
116
+ for col in columns:
117
+ df_recoded[col] = (scale_max + scale_min) - df[col]
118
+ return df_recoded
119
+
120
+
121
+ # Example usage with a Big Five personality scale
122
+ reverse_items = {
123
+ "extraversion": ["ext_2", "ext_4", "ext_6"],
124
+ "neuroticism": ["neur_1", "neur_3", "neur_5"],
125
+ "agreeableness": ["agree_3", "agree_5"],
126
+ }
127
+
128
+ # For a 1-7 Likert scale:
129
+ for construct, items in reverse_items.items():
130
+ df = reverse_code(df, items, scale_max=7, scale_min=1)
131
+ ```
132
+
133
+ ### Scale Construction
134
+
135
+ ```python
136
+ def compute_scale_scores(df, scale_definitions, method="mean"):
137
+ """
138
+ Compute composite scale scores from individual items.
139
+
140
+ Args:
141
+ scale_definitions: dict mapping scale name to list of columns
142
+ method: "mean" or "sum"
143
+
144
+ Returns:
145
+ DataFrame with new scale score columns
146
+ """
147
+ for scale_name, items in scale_definitions.items():
148
+ if method == "mean":
149
+ df[scale_name] = df[items].mean(axis=1)
150
+ elif method == "sum":
151
+ df[scale_name] = df[items].sum(axis=1)
152
+
153
+ # Also compute Cronbach's alpha for reliability
154
+ alpha = cronbachs_alpha(df[items])
155
+ print(f"{scale_name}: alpha = {alpha:.3f} "
156
+ f"(n_items = {len(items)})")
157
+
158
+ return df
159
+
160
+
161
+ def cronbachs_alpha(item_df):
162
+ """
163
+ Compute Cronbach's alpha for internal consistency reliability.
164
+ Values above 0.70 are generally considered acceptable.
165
+ """
166
+ item_df = item_df.dropna()
167
+ n_items = item_df.shape[1]
168
+ if n_items < 2:
169
+ return np.nan
170
+
171
+ item_variances = item_df.var(axis=0, ddof=1)
172
+ total_variance = item_df.sum(axis=1).var(ddof=1)
173
+
174
+ alpha = (n_items / (n_items - 1)) * (
175
+ 1 - item_variances.sum() / total_variance
176
+ )
177
+ return alpha
178
+ ```
179
+
180
+ ## Open-Ended Response Processing
181
+
182
+ ### Coding Qualitative Responses
183
+
184
+ ```python
185
+ def code_open_responses(df, text_column, codebook):
186
+ """
187
+ Apply a predefined codebook to open-ended responses using
188
+ keyword matching. For research-quality coding, this should
189
+ be supplemented with manual coding by trained raters.
190
+
191
+ Args:
192
+ codebook: dict mapping code names to keyword lists
193
+ Example: {
194
+ "financial_concern": ["money", "cost", "expensive", "afford"],
195
+ "time_constraint": ["time", "busy", "schedule", "hours"],
196
+ "quality_issue": ["quality", "broken", "defect", "poor"],
197
+ }
198
+ """
199
+ for code_name, keywords in codebook.items():
200
+ pattern = "|".join(keywords)
201
+ df[f"code_{code_name}"] = (
202
+ df[text_column]
203
+ .str.lower()
204
+ .str.contains(pattern, na=False)
205
+ .astype(int)
206
+ )
207
+
208
+ return df
209
+ ```
210
+
211
+ ### Inter-Rater Reliability
212
+
213
+ ```
214
+ When multiple coders classify open-ended responses:
215
+
216
+ Cohen's Kappa (2 raters):
217
+ - < 0.20: poor agreement
218
+ - 0.21-0.40: fair
219
+ - 0.41-0.60: moderate
220
+ - 0.61-0.80: substantial
221
+ - 0.81-1.00: almost perfect
222
+
223
+ Fleiss' Kappa (3+ raters):
224
+ - Same interpretation scale as Cohen's
225
+ - Use when more than two raters code the same responses
226
+
227
+ Process:
228
+ 1. Develop codebook with definitions and examples
229
+ 2. Train coders on 10-20 practice responses
230
+ 3. Code 20% of responses independently (overlap set)
231
+ 4. Calculate inter-rater reliability on the overlap set
232
+ 5. If kappa < 0.70, discuss disagreements and refine codebook
233
+ 6. Repeat until acceptable reliability is achieved
234
+ 7. Divide remaining responses among coders
235
+ ```
236
+
237
+ ## Data Reshaping for Analysis
238
+
239
+ ### Wide to Long Format
240
+
241
+ Survey data is typically exported in wide format (one row per respondent, one column per question). Many analyses require long format.
242
+
243
+ ```python
244
+ def reshape_repeated_measures(df, id_col, time_points,
245
+ measure_prefix):
246
+ """
247
+ Reshape repeated-measures survey data from wide to long.
248
+
249
+ Example: columns q1_pre, q1_post -> long format with
250
+ time column ("pre", "post") and value column.
251
+ """
252
+ value_vars = [f"{measure_prefix}_{t}" for t in time_points]
253
+
254
+ long_df = pd.melt(
255
+ df,
256
+ id_vars=[id_col],
257
+ value_vars=value_vars,
258
+ var_name="time_point",
259
+ value_name=measure_prefix
260
+ )
261
+
262
+ # Clean time_point column
263
+ long_df["time_point"] = (
264
+ long_df["time_point"]
265
+ .str.replace(f"{measure_prefix}_", "")
266
+ )
267
+
268
+ return long_df
269
+ ```
270
+
271
+ ## Export for Statistical Software
272
+
273
+ ```
274
+ Export formats by software:
275
+
276
+ SPSS (.sav):
277
+ - Use pyreadstat: pyreadstat.write_sav(df, "output.sav")
278
+ - Include variable labels and value labels
279
+ - Set measurement level (nominal, ordinal, scale)
280
+
281
+ Stata (.dta):
282
+ - Use pandas: df.to_stata("output.dta")
283
+ - Include variable labels via write_stata with labels dict
284
+
285
+ R (.csv with codebook):
286
+ - Export CSV plus a separate codebook document
287
+ - Or use pyreadstat to write .rds format
288
+ - Include factor level definitions
289
+
290
+ General best practices:
291
+ - Include a unique respondent ID column
292
+ - Use numeric codes for categorical variables (with labels)
293
+ - Document all recoding in a companion codebook
294
+ - Save both raw and processed versions
295
+ - Include a timestamp column for data versioning
296
+ ```
297
+
298
+ Proper survey data processing is essential for valid statistical inference. Decisions made during cleaning and recoding directly affect research conclusions, making transparent documentation of every step a methodological requirement rather than a convenience.