@wentorai/research-plugins 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (415) hide show
  1. package/README.md +22 -22
  2. package/curated/analysis/README.md +82 -56
  3. package/curated/domains/README.md +225 -69
  4. package/curated/literature/README.md +115 -46
  5. package/curated/research/README.md +106 -58
  6. package/curated/tools/README.md +107 -87
  7. package/curated/writing/README.md +92 -45
  8. package/mcp-configs/academic-db/alphafold-mcp.json +20 -0
  9. package/mcp-configs/academic-db/brightspace-mcp.json +21 -0
  10. package/mcp-configs/academic-db/climatiq-mcp.json +20 -0
  11. package/mcp-configs/academic-db/gibs-mcp.json +20 -0
  12. package/mcp-configs/academic-db/gis-mcp-server.json +22 -0
  13. package/mcp-configs/academic-db/google-earth-engine-mcp.json +21 -0
  14. package/mcp-configs/academic-db/m4-clinical-mcp.json +21 -0
  15. package/mcp-configs/academic-db/medical-mcp.json +21 -0
  16. package/mcp-configs/academic-db/nexonco-mcp.json +20 -0
  17. package/mcp-configs/academic-db/omop-mcp.json +20 -0
  18. package/mcp-configs/academic-db/onekgpd-mcp.json +20 -0
  19. package/mcp-configs/academic-db/openedu-mcp.json +20 -0
  20. package/mcp-configs/academic-db/opengenes-mcp.json +20 -0
  21. package/mcp-configs/academic-db/openstax-mcp.json +21 -0
  22. package/mcp-configs/academic-db/openstreetmap-mcp.json +21 -0
  23. package/mcp-configs/academic-db/opentargets-mcp.json +21 -0
  24. package/mcp-configs/academic-db/pdb-mcp.json +21 -0
  25. package/mcp-configs/academic-db/smithsonian-mcp.json +20 -0
  26. package/mcp-configs/ai-platform/magi-researchers.json +21 -0
  27. package/mcp-configs/ai-platform/mcp-academic-researcher.json +22 -0
  28. package/mcp-configs/ai-platform/open-paper-machine.json +21 -0
  29. package/mcp-configs/ai-platform/paper-intelligence.json +21 -0
  30. package/mcp-configs/ai-platform/paper-reader.json +21 -0
  31. package/mcp-configs/ai-platform/paperdebugger.json +21 -0
  32. package/mcp-configs/browser/exa-mcp.json +20 -0
  33. package/mcp-configs/browser/mcp-searxng.json +21 -0
  34. package/mcp-configs/browser/mcp-webresearch.json +20 -0
  35. package/mcp-configs/cloud-docs/confluence-mcp.json +37 -0
  36. package/mcp-configs/cloud-docs/google-drive-mcp.json +35 -0
  37. package/mcp-configs/cloud-docs/notion-mcp.json +29 -0
  38. package/mcp-configs/communication/discord-mcp.json +29 -0
  39. package/mcp-configs/communication/discourse-mcp.json +21 -0
  40. package/mcp-configs/communication/slack-mcp.json +29 -0
  41. package/mcp-configs/communication/telegram-mcp.json +28 -0
  42. package/mcp-configs/data-platform/automl-stat-mcp.json +21 -0
  43. package/mcp-configs/data-platform/jefferson-stats-mcp.json +22 -0
  44. package/mcp-configs/data-platform/mcp-excel-server.json +21 -0
  45. package/mcp-configs/data-platform/mcp-stata.json +21 -0
  46. package/mcp-configs/data-platform/mcpstack-jupyter.json +21 -0
  47. package/mcp-configs/data-platform/ml-mcp.json +21 -0
  48. package/mcp-configs/data-platform/nasdaq-data-link-mcp.json +20 -0
  49. package/mcp-configs/data-platform/numpy-mcp.json +21 -0
  50. package/mcp-configs/database/neo4j-mcp.json +37 -0
  51. package/mcp-configs/database/postgres-mcp.json +28 -0
  52. package/mcp-configs/database/sqlite-mcp.json +29 -0
  53. package/mcp-configs/dev-platform/geogebra-mcp.json +21 -0
  54. package/mcp-configs/dev-platform/github-mcp.json +31 -0
  55. package/mcp-configs/dev-platform/gitlab-mcp.json +34 -0
  56. package/mcp-configs/dev-platform/latex-mcp-server.json +21 -0
  57. package/mcp-configs/dev-platform/manim-mcp.json +20 -0
  58. package/mcp-configs/dev-platform/mcp-echarts.json +20 -0
  59. package/mcp-configs/dev-platform/panel-viz-mcp.json +20 -0
  60. package/mcp-configs/dev-platform/paperbanana.json +20 -0
  61. package/mcp-configs/dev-platform/texflow-mcp.json +20 -0
  62. package/mcp-configs/dev-platform/texmcp.json +20 -0
  63. package/mcp-configs/dev-platform/typst-mcp.json +21 -0
  64. package/mcp-configs/dev-platform/vizro-mcp.json +20 -0
  65. package/mcp-configs/email/email-mcp.json +40 -0
  66. package/mcp-configs/email/gmail-mcp.json +37 -0
  67. package/mcp-configs/note-knowledge/local-faiss-mcp.json +21 -0
  68. package/mcp-configs/note-knowledge/mcp-memory-service.json +21 -0
  69. package/mcp-configs/note-knowledge/mcp-obsidian.json +23 -0
  70. package/mcp-configs/note-knowledge/mcp-ragdocs.json +20 -0
  71. package/mcp-configs/note-knowledge/mcp-summarizer.json +21 -0
  72. package/mcp-configs/note-knowledge/mediawiki-mcp.json +21 -0
  73. package/mcp-configs/note-knowledge/openzim-mcp.json +20 -0
  74. package/mcp-configs/note-knowledge/zettelkasten-mcp.json +21 -0
  75. package/mcp-configs/reference-mgr/academic-paper-mcp-http.json +20 -0
  76. package/mcp-configs/reference-mgr/academix.json +20 -0
  77. package/mcp-configs/reference-mgr/arxiv-research-mcp.json +21 -0
  78. package/mcp-configs/reference-mgr/google-scholar-abstract-mcp.json +19 -0
  79. package/mcp-configs/reference-mgr/google-scholar-mcp.json +20 -0
  80. package/mcp-configs/reference-mgr/mcp-paperswithcode.json +21 -0
  81. package/mcp-configs/reference-mgr/mcp-scholarly.json +20 -0
  82. package/mcp-configs/reference-mgr/mcp-simple-arxiv.json +20 -0
  83. package/mcp-configs/reference-mgr/mcp-simple-pubmed.json +20 -0
  84. package/mcp-configs/reference-mgr/mcp-zotero.json +21 -0
  85. package/mcp-configs/reference-mgr/mendeley-mcp.json +20 -0
  86. package/mcp-configs/reference-mgr/ncbi-mcp-server.json +22 -0
  87. package/mcp-configs/reference-mgr/onecite.json +21 -0
  88. package/mcp-configs/reference-mgr/paper-search-mcp.json +21 -0
  89. package/mcp-configs/reference-mgr/pubmed-search-mcp.json +21 -0
  90. package/mcp-configs/reference-mgr/scholar-mcp.json +21 -0
  91. package/mcp-configs/reference-mgr/scholar-multi-mcp.json +21 -0
  92. package/mcp-configs/reference-mgr/seerai.json +21 -0
  93. package/mcp-configs/reference-mgr/semantic-scholar-fastmcp.json +21 -0
  94. package/mcp-configs/reference-mgr/sourcelibrary.json +20 -0
  95. package/mcp-configs/registry.json +178 -149
  96. package/mcp-configs/repository/dataverse-mcp.json +33 -0
  97. package/mcp-configs/repository/huggingface-mcp.json +29 -0
  98. package/openclaw.plugin.json +2 -2
  99. package/package.json +2 -2
  100. package/skills/analysis/dataviz/algorithm-visualizer-guide/SKILL.md +259 -0
  101. package/skills/analysis/dataviz/bokeh-visualization-guide/SKILL.md +270 -0
  102. package/skills/analysis/dataviz/chart-image-generator/SKILL.md +229 -0
  103. package/skills/analysis/dataviz/citation-map-guide/SKILL.md +184 -0
  104. package/skills/analysis/dataviz/d3-visualization-guide/SKILL.md +281 -0
  105. package/skills/analysis/dataviz/data-visualization-principles/SKILL.md +171 -0
  106. package/skills/analysis/dataviz/echarts-visualization-guide/SKILL.md +250 -0
  107. package/skills/analysis/dataviz/metabase-analytics-guide/SKILL.md +242 -0
  108. package/skills/analysis/dataviz/plotly-interactive-guide/SKILL.md +266 -0
  109. package/skills/analysis/dataviz/redash-analytics-guide/SKILL.md +284 -0
  110. package/skills/analysis/econometrics/econml-causal-guide/SKILL.md +163 -0
  111. package/skills/analysis/econometrics/empirical-paper-analysis/SKILL.md +192 -0
  112. package/skills/analysis/econometrics/mostly-harmless-guide/SKILL.md +139 -0
  113. package/skills/analysis/econometrics/panel-data-analyst/SKILL.md +259 -0
  114. package/skills/analysis/econometrics/panel-data-regression-workflow/SKILL.md +267 -0
  115. package/skills/analysis/econometrics/python-causality-guide/SKILL.md +134 -0
  116. package/skills/analysis/econometrics/stata-accounting-guide/SKILL.md +269 -0
  117. package/skills/analysis/econometrics/stata-analyst-guide/SKILL.md +245 -0
  118. package/skills/analysis/econometrics/stata-reference-guide/SKILL.md +293 -0
  119. package/skills/analysis/statistics/data-anomaly-detection/SKILL.md +157 -0
  120. package/skills/analysis/statistics/general-statistics-guide/SKILL.md +226 -0
  121. package/skills/analysis/statistics/infiagent-benchmark-guide/SKILL.md +106 -0
  122. package/skills/analysis/statistics/ml-experiment-tracker/SKILL.md +212 -0
  123. package/skills/analysis/statistics/pywayne-statistics-guide/SKILL.md +192 -0
  124. package/skills/analysis/statistics/quantitative-methods-guide/SKILL.md +193 -0
  125. package/skills/analysis/statistics/senior-data-scientist-guide/SKILL.md +223 -0
  126. package/skills/analysis/wrangling/claude-data-analysis-guide/SKILL.md +100 -0
  127. package/skills/analysis/wrangling/csv-data-analyzer/SKILL.md +170 -0
  128. package/skills/analysis/wrangling/data-cleaning-pipeline/SKILL.md +266 -0
  129. package/skills/analysis/wrangling/data-cog-guide/SKILL.md +178 -0
  130. package/skills/analysis/wrangling/open-data-scientist-guide/SKILL.md +197 -0
  131. package/skills/analysis/wrangling/stata-data-cleaning/SKILL.md +276 -0
  132. package/skills/analysis/wrangling/streamline-analyst-guide/SKILL.md +119 -0
  133. package/skills/analysis/wrangling/survey-data-processing/SKILL.md +298 -0
  134. package/skills/domains/ai-ml/ai-agent-papers-guide/SKILL.md +146 -0
  135. package/skills/domains/ai-ml/ai-model-benchmarking/SKILL.md +209 -0
  136. package/skills/domains/ai-ml/annotated-dl-papers-guide/SKILL.md +159 -0
  137. package/skills/domains/ai-ml/anomaly-detection-papers-guide/SKILL.md +167 -0
  138. package/skills/domains/ai-ml/autonomous-agents-papers-guide/SKILL.md +178 -0
  139. package/skills/domains/ai-ml/dl-transformer-finetune/SKILL.md +239 -0
  140. package/skills/domains/ai-ml/domain-adaptation-papers-guide/SKILL.md +173 -0
  141. package/skills/domains/ai-ml/generative-ai-guide/SKILL.md +146 -0
  142. package/skills/domains/ai-ml/graph-learning-papers-guide/SKILL.md +125 -0
  143. package/skills/domains/ai-ml/huggingface-inference-guide/SKILL.md +196 -0
  144. package/skills/domains/ai-ml/keras-deep-learning/SKILL.md +210 -0
  145. package/skills/domains/ai-ml/kolmogorov-arnold-networks-guide/SKILL.md +185 -0
  146. package/skills/domains/ai-ml/llm-from-scratch-guide/SKILL.md +124 -0
  147. package/skills/domains/ai-ml/ml-pipeline-guide/SKILL.md +295 -0
  148. package/skills/domains/ai-ml/nlp-toolkit-guide/SKILL.md +247 -0
  149. package/skills/domains/ai-ml/npcpy-research-guide/SKILL.md +137 -0
  150. package/skills/domains/ai-ml/pytorch-guide/SKILL.md +281 -0
  151. package/skills/domains/ai-ml/pytorch-lightning-guide/SKILL.md +244 -0
  152. package/skills/domains/ai-ml/responsible-ai-guide/SKILL.md +126 -0
  153. package/skills/domains/ai-ml/tensorflow-guide/SKILL.md +241 -0
  154. package/skills/domains/ai-ml/vmas-simulator-guide/SKILL.md +129 -0
  155. package/skills/domains/biomedical/bioagents-guide/SKILL.md +308 -0
  156. package/skills/domains/biomedical/clawbio-guide/SKILL.md +167 -0
  157. package/skills/domains/biomedical/clinical-dialogue-agents-guide/SKILL.md +145 -0
  158. package/skills/domains/biomedical/ena-sequence-api/SKILL.md +175 -0
  159. package/skills/domains/biomedical/genomas-guide/SKILL.md +126 -0
  160. package/skills/domains/biomedical/genotex-benchmark-guide/SKILL.md +125 -0
  161. package/skills/domains/biomedical/med-researcher-guide/SKILL.md +161 -0
  162. package/skills/domains/biomedical/med-researcher-r1-guide/SKILL.md +146 -0
  163. package/skills/domains/biomedical/medgeclaw-guide/SKILL.md +345 -0
  164. package/skills/domains/biomedical/medical-imaging-guide/SKILL.md +305 -0
  165. package/skills/domains/biomedical/ncbi-blast-api/SKILL.md +195 -0
  166. package/skills/domains/biomedical/ncbi-datasets-api/SKILL.md +220 -0
  167. package/skills/domains/biomedical/quickgo-api/SKILL.md +181 -0
  168. package/skills/domains/business/architecture-design-guide/SKILL.md +279 -0
  169. package/skills/domains/business/innovation-management-guide/SKILL.md +257 -0
  170. package/skills/domains/business/operations-research-guide/SKILL.md +258 -0
  171. package/skills/domains/business/xpert-bi-guide/SKILL.md +84 -0
  172. package/skills/domains/chemistry/cactus-cheminformatics-guide/SKILL.md +89 -0
  173. package/skills/domains/chemistry/chemeagle-guide/SKILL.md +147 -0
  174. package/skills/domains/chemistry/chemgraph-agent-guide/SKILL.md +120 -0
  175. package/skills/domains/chemistry/molecular-dynamics-guide/SKILL.md +237 -0
  176. package/skills/domains/chemistry/pubchem-api-guide/SKILL.md +180 -0
  177. package/skills/domains/chemistry/spectroscopy-analysis-guide/SKILL.md +290 -0
  178. package/skills/domains/cs/ai-security-papers-guide/SKILL.md +103 -0
  179. package/skills/domains/cs/code-llm-papers-guide/SKILL.md +131 -0
  180. package/skills/domains/cs/distributed-systems-guide/SKILL.md +268 -0
  181. package/skills/domains/cs/formal-verification-guide/SKILL.md +298 -0
  182. package/skills/domains/cs/gaussian-splatting-papers-guide/SKILL.md +158 -0
  183. package/skills/domains/cs/llm-aiops-guide/SKILL.md +70 -0
  184. package/skills/domains/cs/software-heritage-api/SKILL.md +200 -0
  185. package/skills/domains/ecology/species-distribution-guide/SKILL.md +343 -0
  186. package/skills/domains/economics/imf-data-api-guide/SKILL.md +174 -0
  187. package/skills/domains/economics/nber-working-papers-api/SKILL.md +177 -0
  188. package/skills/domains/economics/post-labor-economics/SKILL.md +254 -0
  189. package/skills/domains/economics/pricing-psychology-guide/SKILL.md +273 -0
  190. package/skills/domains/economics/repec-economics-api/SKILL.md +188 -0
  191. package/skills/domains/economics/world-bank-data-guide/SKILL.md +179 -0
  192. package/skills/domains/education/academic-study-methods/SKILL.md +228 -0
  193. package/skills/domains/education/assessment-design-guide/SKILL.md +213 -0
  194. package/skills/domains/education/educational-research-methods/SKILL.md +179 -0
  195. package/skills/domains/education/edumcp-guide/SKILL.md +74 -0
  196. package/skills/domains/education/mooc-analytics-guide/SKILL.md +206 -0
  197. package/skills/domains/education/open-syllabus-api/SKILL.md +171 -0
  198. package/skills/domains/finance/akshare-finance-data/SKILL.md +207 -0
  199. package/skills/domains/finance/finsight-research-guide/SKILL.md +113 -0
  200. package/skills/domains/finance/options-analytics-agent-guide/SKILL.md +117 -0
  201. package/skills/domains/finance/portfolio-optimization-guide/SKILL.md +279 -0
  202. package/skills/domains/finance/risk-modeling-guide/SKILL.md +260 -0
  203. package/skills/domains/finance/stata-accounting-research/SKILL.md +372 -0
  204. package/skills/domains/geoscience/climate-modeling-guide/SKILL.md +215 -0
  205. package/skills/domains/geoscience/pangaea-data-api/SKILL.md +197 -0
  206. package/skills/domains/geoscience/satellite-remote-sensing/SKILL.md +193 -0
  207. package/skills/domains/geoscience/seismology-data-guide/SKILL.md +208 -0
  208. package/skills/domains/humanities/digital-humanities-methods/SKILL.md +232 -0
  209. package/skills/domains/humanities/ethical-philosophy-guide/SKILL.md +244 -0
  210. package/skills/domains/humanities/history-research-guide/SKILL.md +260 -0
  211. package/skills/domains/humanities/political-history-guide/SKILL.md +241 -0
  212. package/skills/domains/law/caselaw-access-api/SKILL.md +149 -0
  213. package/skills/domains/law/legal-agent-skills-guide/SKILL.md +132 -0
  214. package/skills/domains/law/legal-nlp-guide/SKILL.md +236 -0
  215. package/skills/domains/law/legal-research-methods/SKILL.md +190 -0
  216. package/skills/domains/law/opencontracts-guide/SKILL.md +168 -0
  217. package/skills/domains/law/patent-analysis-guide/SKILL.md +257 -0
  218. package/skills/domains/law/regulatory-compliance-guide/SKILL.md +267 -0
  219. package/skills/domains/math/lean-theorem-proving-guide/SKILL.md +140 -0
  220. package/skills/domains/math/symbolic-computation-guide/SKILL.md +263 -0
  221. package/skills/domains/math/topology-data-analysis/SKILL.md +305 -0
  222. package/skills/domains/pharma/clinical-trial-design-guide/SKILL.md +271 -0
  223. package/skills/domains/pharma/drug-target-interaction/SKILL.md +242 -0
  224. package/skills/domains/pharma/madd-drug-discovery-guide/SKILL.md +153 -0
  225. package/skills/domains/pharma/pharmacovigilance-guide/SKILL.md +216 -0
  226. package/skills/domains/physics/astrophysics-data-guide/SKILL.md +305 -0
  227. package/skills/domains/physics/particle-physics-guide/SKILL.md +287 -0
  228. package/skills/domains/social-science/ipums-microdata-api/SKILL.md +211 -0
  229. package/skills/domains/social-science/network-analysis-guide/SKILL.md +310 -0
  230. package/skills/domains/social-science/psychology-research-guide/SKILL.md +270 -0
  231. package/skills/domains/social-science/sociology-research-guide/SKILL.md +238 -0
  232. package/skills/domains/social-science/sociology-research-methods/SKILL.md +181 -0
  233. package/skills/literature/discovery/arxiv-paper-monitoring/SKILL.md +233 -0
  234. package/skills/literature/discovery/paper-recommendation-guide/SKILL.md +120 -0
  235. package/skills/literature/discovery/papers-we-love-guide/SKILL.md +169 -0
  236. package/skills/literature/discovery/semantic-paper-radar/SKILL.md +144 -0
  237. package/skills/literature/discovery/zotero-arxiv-daily-guide/SKILL.md +94 -0
  238. package/skills/literature/fulltext/bioc-pmc-api/SKILL.md +146 -0
  239. package/skills/literature/fulltext/core-api-guide/SKILL.md +144 -0
  240. package/skills/literature/fulltext/dataverse-api/SKILL.md +215 -0
  241. package/skills/literature/fulltext/hal-archive-api/SKILL.md +218 -0
  242. package/skills/literature/fulltext/institutional-repository-guide/SKILL.md +212 -0
  243. package/skills/literature/fulltext/open-access-mining-guide/SKILL.md +341 -0
  244. package/skills/literature/fulltext/osf-api/SKILL.md +212 -0
  245. package/skills/literature/fulltext/pmc-ftp-bulk-download/SKILL.md +182 -0
  246. package/skills/literature/fulltext/zotero-ai-butler-guide/SKILL.md +166 -0
  247. package/skills/literature/fulltext/zotero-scihub-guide/SKILL.md +168 -0
  248. package/skills/literature/metadata/academic-paper-summarizer/SKILL.md +101 -0
  249. package/skills/literature/metadata/bibliometrix-guide/SKILL.md +164 -0
  250. package/skills/literature/metadata/crossref-event-data-api/SKILL.md +183 -0
  251. package/skills/literature/metadata/doi-content-negotiation/SKILL.md +202 -0
  252. package/skills/literature/metadata/orkg-api/SKILL.md +153 -0
  253. package/skills/literature/metadata/plumx-metrics-api/SKILL.md +188 -0
  254. package/skills/literature/metadata/ror-organization-api/SKILL.md +208 -0
  255. package/skills/literature/metadata/sophosia-reference-guide/SKILL.md +110 -0
  256. package/skills/literature/metadata/viaf-authority-api/SKILL.md +209 -0
  257. package/skills/literature/metadata/wikidata-api-guide/SKILL.md +156 -0
  258. package/skills/literature/metadata/zoplicate-dedup-guide/SKILL.md +147 -0
  259. package/skills/literature/metadata/zotero-actions-tags-guide/SKILL.md +212 -0
  260. package/skills/literature/metadata/zotmoov-guide/SKILL.md +120 -0
  261. package/skills/literature/metadata/zutilo-guide/SKILL.md +140 -0
  262. package/skills/literature/search/arxiv-batch-reporting/SKILL.md +133 -0
  263. package/skills/literature/search/arxiv-cli-tools/SKILL.md +172 -0
  264. package/skills/literature/search/arxiv-osiris/SKILL.md +199 -0
  265. package/skills/literature/search/arxiv-paper-processor/SKILL.md +141 -0
  266. package/skills/literature/search/baidu-scholar-guide/SKILL.md +110 -0
  267. package/skills/literature/search/base-academic-search/SKILL.md +196 -0
  268. package/skills/literature/search/chatpaper-guide/SKILL.md +122 -0
  269. package/skills/literature/search/citeseerx-api/SKILL.md +183 -0
  270. package/skills/literature/search/deep-literature-search/SKILL.md +149 -0
  271. package/skills/literature/search/deepgit-search-guide/SKILL.md +147 -0
  272. package/skills/literature/search/eric-education-api/SKILL.md +199 -0
  273. package/skills/literature/search/findpapers-guide/SKILL.md +177 -0
  274. package/skills/literature/search/ieee-xplore-api/SKILL.md +177 -0
  275. package/skills/literature/search/lens-scholarly-api/SKILL.md +211 -0
  276. package/skills/literature/search/multi-database-literature-search/SKILL.md +198 -0
  277. package/skills/literature/search/open-library-api/SKILL.md +196 -0
  278. package/skills/literature/search/open-semantic-search-guide/SKILL.md +190 -0
  279. package/skills/literature/search/openaire-api/SKILL.md +141 -0
  280. package/skills/literature/search/paper-search-mcp-guide/SKILL.md +107 -0
  281. package/skills/literature/search/papers-chat-guide/SKILL.md +194 -0
  282. package/skills/literature/search/pasa-paper-search-guide/SKILL.md +138 -0
  283. package/skills/literature/search/plos-open-access-api/SKILL.md +203 -0
  284. package/skills/literature/search/scielo-api/SKILL.md +182 -0
  285. package/skills/literature/search/share-research-api/SKILL.md +129 -0
  286. package/skills/literature/search/worldcat-search-api/SKILL.md +224 -0
  287. package/skills/research/automation/ai-scientist-v2-guide/SKILL.md +284 -0
  288. package/skills/research/automation/aim-experiment-guide/SKILL.md +234 -0
  289. package/skills/research/automation/claude-academic-workflow-guide/SKILL.md +202 -0
  290. package/skills/research/automation/coexist-ai-guide/SKILL.md +149 -0
  291. package/skills/research/automation/datagen-research-guide/SKILL.md +131 -0
  292. package/skills/research/automation/foam-agent-guide/SKILL.md +203 -0
  293. package/skills/research/automation/kedro-pipeline-guide/SKILL.md +216 -0
  294. package/skills/research/automation/mle-agent-guide/SKILL.md +139 -0
  295. package/skills/research/automation/paper-to-agent-guide/SKILL.md +116 -0
  296. package/skills/research/automation/rd-agent-guide/SKILL.md +246 -0
  297. package/skills/research/automation/research-paper-orchestrator/SKILL.md +254 -0
  298. package/skills/research/deep-research/academic-deep-research/SKILL.md +190 -0
  299. package/skills/research/deep-research/auto-deep-research-guide/SKILL.md +141 -0
  300. package/skills/research/deep-research/cognitive-kernel-guide/SKILL.md +200 -0
  301. package/skills/research/deep-research/corvus-research-guide/SKILL.md +132 -0
  302. package/skills/research/deep-research/deep-research-pro/SKILL.md +213 -0
  303. package/skills/research/deep-research/deep-research-work/SKILL.md +204 -0
  304. package/skills/research/deep-research/deep-searcher-guide/SKILL.md +253 -0
  305. package/skills/research/deep-research/gpt-researcher-guide/SKILL.md +191 -0
  306. package/skills/research/deep-research/in-depth-research-guide/SKILL.md +205 -0
  307. package/skills/research/deep-research/khoj-research-guide/SKILL.md +200 -0
  308. package/skills/research/deep-research/kosmos-scientist-guide/SKILL.md +185 -0
  309. package/skills/research/deep-research/llm-scientific-discovery-guide/SKILL.md +178 -0
  310. package/skills/research/deep-research/local-deep-research-guide/SKILL.md +253 -0
  311. package/skills/research/deep-research/open-researcher-guide/SKILL.md +138 -0
  312. package/skills/research/deep-research/tongyi-deep-research-guide/SKILL.md +217 -0
  313. package/skills/research/funding/eu-horizon-guide/SKILL.md +244 -0
  314. package/skills/research/funding/grant-budget-guide/SKILL.md +284 -0
  315. package/skills/research/funding/nih-reporter-api-guide/SKILL.md +166 -0
  316. package/skills/research/funding/nsf-award-api-guide/SKILL.md +133 -0
  317. package/skills/research/methodology/academic-mentor-guide/SKILL.md +169 -0
  318. package/skills/research/methodology/claude-scientific-guide/SKILL.md +122 -0
  319. package/skills/research/methodology/deep-innovator-guide/SKILL.md +242 -0
  320. package/skills/research/methodology/osf-api-guide/SKILL.md +165 -0
  321. package/skills/research/methodology/parsifal-slr-guide/SKILL.md +154 -0
  322. package/skills/research/methodology/research-paper-kb/SKILL.md +263 -0
  323. package/skills/research/methodology/research-pipeline-units-guide/SKILL.md +169 -0
  324. package/skills/research/methodology/research-town-guide/SKILL.md +263 -0
  325. package/skills/research/methodology/slr-automation-guide/SKILL.md +235 -0
  326. package/skills/research/paper-review/automated-review-guide/SKILL.md +281 -0
  327. package/skills/research/paper-review/latte-review-guide/SKILL.md +175 -0
  328. package/skills/research/paper-review/paper-compare-guide/SKILL.md +238 -0
  329. package/skills/research/paper-review/paper-critique-framework/SKILL.md +181 -0
  330. package/skills/research/paper-review/paper-digest-guide/SKILL.md +240 -0
  331. package/skills/research/paper-review/paper-research-assistant/SKILL.md +231 -0
  332. package/skills/research/paper-review/research-quality-filter/SKILL.md +261 -0
  333. package/skills/research/paper-review/review-response-guide/SKILL.md +275 -0
  334. package/skills/tools/code-exec/contextplus-mcp-guide/SKILL.md +110 -0
  335. package/skills/tools/code-exec/google-colab-guide/SKILL.md +276 -0
  336. package/skills/tools/code-exec/kaggle-api-guide/SKILL.md +216 -0
  337. package/skills/tools/code-exec/overleaf-cli-guide/SKILL.md +279 -0
  338. package/skills/tools/diagram/clawphd-guide/SKILL.md +149 -0
  339. package/skills/tools/diagram/code-flow-visualizer/SKILL.md +197 -0
  340. package/skills/tools/diagram/excalidraw-diagram-guide/SKILL.md +170 -0
  341. package/skills/tools/diagram/json-data-visualizer/SKILL.md +270 -0
  342. package/skills/tools/diagram/kroki-diagram-api/SKILL.md +198 -0
  343. package/skills/tools/diagram/mermaid-architect-guide/SKILL.md +219 -0
  344. package/skills/tools/diagram/scientific-graphical-abstract/SKILL.md +201 -0
  345. package/skills/tools/diagram/tldraw-whiteboard-guide/SKILL.md +397 -0
  346. package/skills/tools/document/docsgpt-guide/SKILL.md +130 -0
  347. package/skills/tools/document/large-document-reader/SKILL.md +202 -0
  348. package/skills/tools/document/md2pdf-xelatex/SKILL.md +212 -0
  349. package/skills/tools/document/openpaper-guide/SKILL.md +232 -0
  350. package/skills/tools/document/paper-parse-guide/SKILL.md +243 -0
  351. package/skills/tools/document/weknora-guide/SKILL.md +216 -0
  352. package/skills/tools/document/zotero-addon-market-guide/SKILL.md +108 -0
  353. package/skills/tools/document/zotero-night-theme-guide/SKILL.md +142 -0
  354. package/skills/tools/document/zotero-style-guide/SKILL.md +217 -0
  355. package/skills/tools/knowledge-graph/citation-network-builder/SKILL.md +244 -0
  356. package/skills/tools/knowledge-graph/concept-map-generator/SKILL.md +284 -0
  357. package/skills/tools/knowledge-graph/graphiti-guide/SKILL.md +219 -0
  358. package/skills/tools/knowledge-graph/mimir-memory-guide/SKILL.md +135 -0
  359. package/skills/tools/knowledge-graph/notero-zotero-notion-guide/SKILL.md +187 -0
  360. package/skills/tools/knowledge-graph/open-webui-tools-guide/SKILL.md +156 -0
  361. package/skills/tools/knowledge-graph/openspg-guide/SKILL.md +210 -0
  362. package/skills/tools/knowledge-graph/paperpile-notion-guide/SKILL.md +84 -0
  363. package/skills/tools/knowledge-graph/zotero-markdb-connect-guide/SKILL.md +162 -0
  364. package/skills/tools/ocr-translate/latex-translation-guide/SKILL.md +176 -0
  365. package/skills/tools/ocr-translate/math-equation-renderer/SKILL.md +198 -0
  366. package/skills/tools/ocr-translate/pdf-math-translate-guide/SKILL.md +141 -0
  367. package/skills/tools/ocr-translate/zotero-pdf-translate-guide/SKILL.md +95 -0
  368. package/skills/tools/ocr-translate/zotero-pdf2zh-guide/SKILL.md +143 -0
  369. package/skills/tools/scraping/dataset-finder-guide/SKILL.md +253 -0
  370. package/skills/tools/scraping/easy-spider-guide/SKILL.md +250 -0
  371. package/skills/tools/scraping/google-scholar-scraper/SKILL.md +255 -0
  372. package/skills/tools/scraping/repository-harvesting-guide/SKILL.md +310 -0
  373. package/skills/writing/citation/academic-citation-manager/SKILL.md +314 -0
  374. package/skills/writing/citation/academic-citation-manager-guide/SKILL.md +182 -0
  375. package/skills/writing/citation/citation-assistant-skill/SKILL.md +192 -0
  376. package/skills/writing/citation/jabref-reference-guide/SKILL.md +127 -0
  377. package/skills/writing/citation/jasminum-zotero-guide/SKILL.md +103 -0
  378. package/skills/writing/citation/mendeley-api/SKILL.md +231 -0
  379. package/skills/writing/citation/obsidian-citation-guide/SKILL.md +164 -0
  380. package/skills/writing/citation/obsidian-zotero-guide/SKILL.md +137 -0
  381. package/skills/writing/citation/onecite-reference-guide/SKILL.md +168 -0
  382. package/skills/writing/citation/papersgpt-zotero-guide/SKILL.md +132 -0
  383. package/skills/writing/citation/papis-cli-guide/SKILL.md +213 -0
  384. package/skills/writing/citation/zotero-better-bibtex-guide/SKILL.md +107 -0
  385. package/skills/writing/citation/zotero-better-notes-guide/SKILL.md +121 -0
  386. package/skills/writing/citation/zotero-gpt-guide/SKILL.md +111 -0
  387. package/skills/writing/citation/zotero-mcp-guide/SKILL.md +164 -0
  388. package/skills/writing/citation/zotero-mdnotes-guide/SKILL.md +162 -0
  389. package/skills/writing/citation/zotero-reference-guide/SKILL.md +139 -0
  390. package/skills/writing/citation/zotero-scholar-guide/SKILL.md +294 -0
  391. package/skills/writing/citation/zotfile-attachment-guide/SKILL.md +140 -0
  392. package/skills/writing/composition/ml-paper-writing/SKILL.md +163 -0
  393. package/skills/writing/composition/opendraft-thesis-guide/SKILL.md +200 -0
  394. package/skills/writing/composition/paper-debugger-guide/SKILL.md +143 -0
  395. package/skills/writing/composition/paperforge-guide/SKILL.md +205 -0
  396. package/skills/writing/composition/research-paper-writer/SKILL.md +226 -0
  397. package/skills/writing/composition/scientific-writing-resources/SKILL.md +151 -0
  398. package/skills/writing/composition/scientific-writing-wrapper/SKILL.md +153 -0
  399. package/skills/writing/latex/academic-writing-latex/SKILL.md +285 -0
  400. package/skills/writing/latex/latex-drawing-collection/SKILL.md +154 -0
  401. package/skills/writing/latex/latex-templates-collection/SKILL.md +159 -0
  402. package/skills/writing/latex/md-to-pdf-academic/SKILL.md +230 -0
  403. package/skills/writing/latex/tex-render-guide/SKILL.md +243 -0
  404. package/skills/writing/polish/academic-tone-guide/SKILL.md +209 -0
  405. package/skills/writing/polish/chinese-text-humanizer/SKILL.md +140 -0
  406. package/skills/writing/polish/conciseness-editing-guide/SKILL.md +225 -0
  407. package/skills/writing/polish/paper-polish-guide/SKILL.md +160 -0
  408. package/skills/writing/templates/arxiv-preprint-template/SKILL.md +184 -0
  409. package/skills/writing/templates/elegant-paper-template/SKILL.md +141 -0
  410. package/skills/writing/templates/graphical-abstract-guide/SKILL.md +183 -0
  411. package/skills/writing/templates/novathesis-guide/SKILL.md +152 -0
  412. package/skills/writing/templates/scientific-article-pdf/SKILL.md +261 -0
  413. package/skills/writing/templates/sjtuthesis-guide/SKILL.md +197 -0
  414. package/skills/writing/templates/thuthesis-guide/SKILL.md +181 -0
  415. package/skills/literature/fulltext/repository-harvesting-guide/SKILL.md +0 -207
@@ -0,0 +1,250 @@
1
+ ---
2
+ name: easy-spider-guide
3
+ description: "Guide to EasySpider for visual no-code web data collection"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "🕷️"
7
+ category: "tools"
8
+ subcategory: "scraping"
9
+ keywords: ["web scraping", "visual crawler", "no-code scraping", "data collection", "research data", "web automation"]
10
+ source: "https://github.com/NaiboWang/EasySpider"
11
+ ---
12
+
13
+ # EasySpider Guide
14
+
15
+ ## Overview
16
+
17
+ EasySpider is a visual, no-code web crawler tool with over 44K stars on GitHub. It provides a graphical interface where users design web scraping tasks by interacting directly with target web pages, clicking on elements to extract, and defining navigation flows visually. No programming knowledge is required to build functional scrapers, making it accessible to researchers across all disciplines.
18
+
19
+ For academic researchers, data collection from web sources is a frequent need but often a technical barrier. Whether gathering publication metadata from journal websites, collecting survey responses from public forums, extracting pricing data for economic research, or archiving web content for digital humanities projects, EasySpider enables researchers to build custom scrapers without writing Python or JavaScript code. The visual approach also makes scrapers easier to maintain and modify when target websites change their structure.
20
+
21
+ EasySpider runs as a desktop application on Windows, macOS, and Linux. It uses a built-in Chromium browser for rendering, which means it can handle JavaScript-heavy websites, single-page applications, and sites that require user interaction such as clicking buttons, scrolling, or filling forms. Scraped data can be exported as CSV, JSON, or directly to databases.
22
+
23
+ ## Installation
24
+
25
+ ### Download and Setup
26
+
27
+ ```bash
28
+ # Download the latest release for your platform from GitHub releases
29
+ # https://github.com/NaiboWang/EasySpider/releases
30
+
31
+ # macOS - download the .dmg file and drag to Applications
32
+
33
+ # Linux - download the AppImage
34
+ chmod +x EasySpider-linux-x86_64.AppImage
35
+ ./EasySpider-linux-x86_64.AppImage
36
+
37
+ # Or run from source
38
+ git clone https://github.com/NaiboWang/EasySpider.git
39
+ cd EasySpider
40
+ npm install
41
+ npm start
42
+ ```
43
+
44
+ ### System Requirements
45
+
46
+ - Operating system: Windows 10+, macOS 10.15+, or Linux (Ubuntu 18.04+)
47
+ - RAM: 4 GB minimum, 8 GB recommended for complex scraping tasks
48
+ - Disk space: 500 MB for the application plus storage for scraped data
49
+ - Network: stable internet connection for web scraping
50
+
51
+ ## Core Concepts
52
+
53
+ ### Task Design Workflow
54
+
55
+ EasySpider follows a visual task design approach with these steps:
56
+
57
+ 1. **Open target page** - Enter the URL in EasySpider's built-in browser
58
+ 2. **Select elements** - Click on the data elements you want to extract
59
+ 3. **Define fields** - Name each extracted element (title, author, date, etc.)
60
+ 4. **Configure pagination** - Click the "next page" button to set up pagination
61
+ 5. **Set extraction rules** - Define how to handle lists, tables, and nested pages
62
+ 6. **Test and run** - Preview results, then execute the full scraping task
63
+
64
+ ### Element Selection Modes
65
+
66
+ - **Single element** - Click one element to extract that specific item
67
+ - **Similar elements** - Click two similar items and EasySpider detects the pattern for all matching elements on the page
68
+ - **Table mode** - Select a table header row to extract entire structured tables
69
+ - **Input mode** - Define form fields to fill before extracting (useful for search-based data collection)
70
+
71
+ ## Research Use Cases
72
+
73
+ ### Collecting Publication Metadata
74
+
75
+ Researchers can use EasySpider to gather publication information from journal websites, conference proceedings pages, or institutional repositories.
76
+
77
+ **Example workflow for scraping a conference proceedings page:**
78
+
79
+ 1. Navigate to the proceedings listing page
80
+ 2. Click on the first paper title to mark it as a "title" field
81
+ 3. Click on the second paper title; EasySpider recognizes the pattern and selects all titles
82
+ 4. Similarly select author names, abstract snippets, and publication dates
83
+ 5. If papers span multiple pages, click the "Next" pagination button
84
+ 6. Configure "click into each paper" to follow links and extract full abstracts
85
+ 7. Run the task and export as CSV
86
+
87
+ ### Monitoring Research Funding Opportunities
88
+
89
+ ```
90
+ Task: Daily scan of funding agency websites for new opportunities
91
+
92
+ Steps configured in EasySpider:
93
+ 1. Navigate to funding agency announcement page
94
+ 2. Extract: opportunity title, deadline, funding amount, eligibility
95
+ 3. Filter: only new announcements (since last check)
96
+ 4. Schedule: run daily at 8:00 AM
97
+ 5. Export: append to CSV file, send notification email
98
+ ```
99
+
100
+ ### Gathering Economic Data from Public Sources
101
+
102
+ For economics and social science research, EasySpider can collect publicly available data from government statistics portals, price comparison websites, and public registries.
103
+
104
+ ```
105
+ Task: Collect commodity prices from public market websites
106
+
107
+ Fields to extract:
108
+ - commodity_name: product identifier
109
+ - price: current listed price
110
+ - unit: measurement unit
111
+ - date: listing date
112
+ - source_url: page URL for reference
113
+
114
+ Pagination: navigate through category pages
115
+ Schedule: weekly collection
116
+ Output: CSV with timestamp for time-series analysis
117
+ ```
118
+
119
+ ### Digital Humanities Web Archiving
120
+
121
+ ```
122
+ Task: Archive public blog posts for discourse analysis
123
+
124
+ Configuration:
125
+ - Start URL: blog archive page
126
+ - Follow: links matching pattern /posts/*
127
+ - Extract per page:
128
+ - post_title
129
+ - post_date
130
+ - author_name
131
+ - post_content (full text)
132
+ - comment_count
133
+ - tags/categories
134
+ - Pagination: follow archive navigation links
135
+ - Output: JSON with full text content
136
+ ```
137
+
138
+ ## Advanced Features
139
+
140
+ ### Conditional Logic
141
+
142
+ EasySpider supports conditional branches in task flows:
143
+
144
+ - **If element exists** - Check for specific elements before attempting extraction
145
+ - **If text contains** - Filter items based on content matching
146
+ - **Loop control** - Set maximum iterations for pagination or nested page visits
147
+
148
+ ### Data Cleaning Options
149
+
150
+ Built-in text processing options can be applied during extraction:
151
+
152
+ - Remove HTML tags from extracted text
153
+ - Trim whitespace and normalize spacing
154
+ - Extract numbers from mixed text fields
155
+ - Apply regex patterns to clean specific formats
156
+ - Convert date strings to standardized formats
157
+
158
+ ### Handling Dynamic Content
159
+
160
+ For JavaScript-rendered pages, EasySpider provides options to:
161
+
162
+ - Wait for specific elements to appear before extracting
163
+ - Scroll to load lazy-loaded content
164
+ - Click "Load More" buttons automatically
165
+ - Handle infinite scroll pages with configurable scroll limits
166
+
167
+ ### Anti-Detection Configuration
168
+
169
+ For responsible scraping, EasySpider includes options to:
170
+
171
+ ```
172
+ Request configuration:
173
+ - Delay between requests: 2-5 seconds (randomized)
174
+ - User-Agent rotation: enabled
175
+ - Concurrent requests: 1 (sequential for politeness)
176
+ - Respect robots.txt: check before scraping
177
+ - Rate limiting: max 30 requests per minute
178
+ ```
179
+
180
+ ## Exporting Research Data
181
+
182
+ ### CSV Export
183
+
184
+ The most common format for researchers. Data is exported with headers matching the field names defined during task design.
185
+
186
+ ```csv
187
+ title,authors,year,journal,doi,abstract
188
+ "Machine Learning in Materials Science","Smith J, Lee K",2025,"Nature Materials","10.1038/xxx","Abstract text here..."
189
+ ```
190
+
191
+ ### JSON Export
192
+
193
+ Preserves nested structure for complex extractions:
194
+
195
+ ```json
196
+ {
197
+ "task_name": "proceedings_scrape",
198
+ "extracted_at": "2026-03-10T14:30:00Z",
199
+ "records": [
200
+ {
201
+ "title": "Machine Learning in Materials Science",
202
+ "authors": ["Smith J", "Lee K"],
203
+ "year": 2025,
204
+ "metadata": {
205
+ "journal": "Nature Materials",
206
+ "doi": "10.1038/xxx"
207
+ }
208
+ }
209
+ ]
210
+ }
211
+ ```
212
+
213
+ ### Database Export
214
+
215
+ EasySpider can write directly to SQLite databases, which is convenient for subsequent analysis with Python pandas or R.
216
+
217
+ ```python
218
+ import sqlite3
219
+ import pandas as pd
220
+
221
+ # Read EasySpider output database
222
+ conn = sqlite3.connect("easyspider_results.db")
223
+ df = pd.read_sql("SELECT * FROM scraped_data", conn)
224
+
225
+ # Process and analyze
226
+ print(f"Total records: {len(df)}")
227
+ print(df.describe())
228
+ conn.close()
229
+ ```
230
+
231
+ ## Ethical Web Scraping Guidelines for Researchers
232
+
233
+ When using EasySpider for research data collection, follow these ethical guidelines:
234
+
235
+ - **Check robots.txt** before scraping any website
236
+ - **Respect rate limits** and add appropriate delays between requests
237
+ - **Review terms of service** for target websites
238
+ - **Use APIs when available** rather than scraping HTML (many services offer research APIs)
239
+ - **Minimize data collection** to only what is needed for the research question
240
+ - **Store data securely** especially when collecting personal information
241
+ - **Cite data sources** in publications and include data collection methodology
242
+ - **Obtain IRB approval** if scraping involves human subjects data
243
+ - **Consider GDPR/privacy regulations** when scraping data from EU sources
244
+
245
+ ## References
246
+
247
+ - EasySpider GitHub repository: https://github.com/NaiboWang/EasySpider
248
+ - EasySpider documentation: https://github.com/NaiboWang/EasySpider/wiki
249
+ - EasySpider video tutorials: https://github.com/NaiboWang/EasySpider#tutorials
250
+ - Web scraping ethics in research: https://doi.org/10.1177/2053951720943006
@@ -0,0 +1,255 @@
1
+ ---
2
+ name: google-scholar-scraper
3
+ description: "Ethical Google Scholar data collection techniques and best practices"
4
+ metadata:
5
+ openclaw:
6
+ emoji: "mag"
7
+ category: "tools"
8
+ subcategory: "scraping"
9
+ keywords: ["Google Scholar", "web scraping", "bibliometrics", "citation data", "scholarly search", "rate limiting"]
10
+ source: "wentor-research-plugins"
11
+ ---
12
+
13
+ # Google Scholar Scraper
14
+
15
+ A skill for ethically collecting bibliometric data from Google Scholar, including search results, citation counts, author profiles, and related articles. Covers rate limiting, CAPTCHA avoidance, alternative APIs, legal considerations, data parsing, and practical workflows that balance data needs with responsible access.
16
+
17
+ ## Legal and Ethical Considerations
18
+
19
+ ### Before You Scrape
20
+
21
+ Google Scholar does not offer an official API, and its Terms of Service restrict automated access. Researchers must weigh their data needs against legal and ethical constraints.
22
+
23
+ ```
24
+ Legal landscape:
25
+
26
+ Terms of Service:
27
+ - Google's ToS prohibit automated queries
28
+ - Violation can result in IP blocking (temporary or permanent)
29
+ - Institutional IPs can be blocked, affecting all campus users
30
+ - In some jurisdictions, ToS violations are not legally binding
31
+ for non-commercial academic research, but this is debated
32
+
33
+ Ethical guidelines:
34
+ - Minimize load: respect the server, use delays between requests
35
+ - Cache aggressively: never request the same page twice
36
+ - Use official alternatives first (see below)
37
+ - Do not redistribute raw scraped data
38
+ - Cite Google Scholar as your data source in publications
39
+ - Consider whether your research question truly requires
40
+ Google Scholar data, or if Web of Science, Scopus, or
41
+ OpenAlex could answer it instead
42
+
43
+ Official and semi-official alternatives:
44
+ - Semantic Scholar API: free, 100 requests/sec, excellent coverage
45
+ - OpenAlex API: free, comprehensive, well-documented
46
+ - Crossref API: free, DOI-based metadata and citation counts
47
+ - CORE API: free, full-text open access content
48
+ - Google Scholar Alerts: manual but ToS-compliant monitoring
49
+ - Publish or Perish (software): uses Google Scholar with built-in
50
+ rate limiting, commonly used in bibliometric research
51
+ ```
52
+
53
+ ## Data Collection Approaches
54
+
55
+ ### Using Scholarly (Python Library)
56
+
57
+ The `scholarly` Python library wraps Google Scholar access with built-in rate limiting and proxy support. It is the most commonly used tool for academic Google Scholar scraping.
58
+
59
+ ```python
60
+ from scholarly import scholarly, ProxyGenerator
61
+
62
+ def setup_scholarly_with_proxy():
63
+ """
64
+ Configure scholarly with a free proxy to reduce blocking risk.
65
+ For heavy usage, consider ScraperAPI or similar paid services.
66
+ """
67
+ pg = ProxyGenerator()
68
+ # Free proxy (less reliable, suitable for small jobs)
69
+ pg.FreeProxies()
70
+ scholarly.use_proxy(pg)
71
+
72
+
73
+ def search_scholar(query, max_results=20):
74
+ """
75
+ Search Google Scholar and collect structured results.
76
+
77
+ IMPORTANT: Add delays between queries to avoid blocking.
78
+ Recommended: 10-30 seconds between searches.
79
+ """
80
+ import time
81
+
82
+ results = []
83
+ search_query = scholarly.search_pubs(query)
84
+
85
+ for i in range(max_results):
86
+ try:
87
+ result = next(search_query)
88
+ parsed = {
89
+ "title": result["bib"].get("title", ""),
90
+ "author": result["bib"].get("author", []),
91
+ "year": result["bib"].get("pub_year", ""),
92
+ "venue": result["bib"].get("venue", ""),
93
+ "abstract": result["bib"].get("abstract", ""),
94
+ "citations": result.get("num_citations", 0),
95
+ "url": result.get("pub_url", ""),
96
+ }
97
+ results.append(parsed)
98
+
99
+ # Rate limiting: wait between result fetches
100
+ time.sleep(2)
101
+
102
+ except StopIteration:
103
+ break
104
+
105
+ return results
106
+
107
+
108
+ def get_author_profile(author_name):
109
+ """
110
+ Retrieve an author's Google Scholar profile.
111
+ Includes h-index, i10-index, and publication list.
112
+ """
113
+ search_query = scholarly.search_author(author_name)
114
+ author = next(search_query)
115
+ author = scholarly.fill(author)
116
+
117
+ profile = {
118
+ "name": author.get("name", ""),
119
+ "affiliation": author.get("affiliation", ""),
120
+ "h_index": author.get("hindex", 0),
121
+ "i10_index": author.get("i10index", 0),
122
+ "cited_by": author.get("citedby", 0),
123
+ "interests": author.get("interests", []),
124
+ "publications": len(author.get("publications", [])),
125
+ }
126
+
127
+ return profile
128
+ ```
129
+
130
+ ## Rate Limiting and Anti-Blocking
131
+
132
+ ### Best Practices
133
+
134
+ ```
135
+ Rate limiting strategy:
136
+
137
+ 1. Request delays:
138
+ - Between search queries: 15-30 seconds minimum
139
+ - Between profile lookups: 10-20 seconds
140
+ - Between citation fetches: 5-10 seconds
141
+ - Add random jitter: delay + random(0, 5) seconds
142
+
143
+ 2. Session management:
144
+ - Rotate user agents (maintain a list of 10+ real browser UAs)
145
+ - Clear cookies periodically
146
+ - Use residential proxies for large jobs (paid)
147
+ - Limit sessions to 100-200 requests before rotating proxy
148
+
149
+ 3. Caching:
150
+ - Cache every response to disk (shelve, sqlite, or JSON)
151
+ - Check cache before making any request
152
+ - Set cache expiry (7-30 days for citation counts)
153
+
154
+ 4. Batch scheduling:
155
+ - Spread collection over days, not hours
156
+ - Run during off-peak hours (late night UTC)
157
+ - Process in batches of 50-100 queries per session
158
+ ```
159
+
160
+ ### Handling CAPTCHAs and Blocks
161
+
162
+ ```python
163
+ import time
164
+ import random
165
+
166
+ def resilient_search(query, max_retries=3):
167
+ """
168
+ Search with exponential backoff on failures.
169
+ When blocked, wait and retry with increasing delays.
170
+ """
171
+ for attempt in range(max_retries):
172
+ try:
173
+ results = search_scholar(query, max_results=10)
174
+ return results
175
+ except Exception as e:
176
+ if "CAPTCHA" in str(e) or "429" in str(e):
177
+ wait_time = (2 ** attempt) * 60 + random.randint(0, 30)
178
+ print(f"Blocked. Waiting {wait_time}s before retry "
179
+ f"(attempt {attempt + 1}/{max_retries})")
180
+ time.sleep(wait_time)
181
+ else:
182
+ raise e
183
+
184
+ print("Max retries exceeded. Consider using a different proxy "
185
+ "or waiting 24 hours before resuming.")
186
+ return []
187
+ ```
188
+
189
+ ## Data Processing and Storage
190
+
191
+ ### Structuring Collected Data
192
+
193
+ ```python
194
+ import pandas as pd
195
+ import json
196
+ from datetime import datetime
197
+
198
+ def save_results(results, output_dir, query_name):
199
+ """
200
+ Save scraped results in multiple formats with metadata.
201
+ """
202
+ # Add collection metadata
203
+ metadata = {
204
+ "query": query_name,
205
+ "collected_at": datetime.now().isoformat(),
206
+ "n_results": len(results),
207
+ "source": "google_scholar",
208
+ }
209
+
210
+ # Save as JSON (preserves all structure)
211
+ with open(f"{output_dir}/{query_name}_results.json", "w") as f:
212
+ json.dump({"metadata": metadata, "results": results}, f, indent=2)
213
+
214
+ # Save as CSV (for spreadsheet analysis)
215
+ df = pd.DataFrame(results)
216
+ df.to_csv(f"{output_dir}/{query_name}_results.csv", index=False)
217
+
218
+ return f"Saved {len(results)} results for query: {query_name}"
219
+ ```
220
+
221
+ ## Recommended Alternatives to Scraping
222
+
223
+ ### When Not to Scrape Google Scholar
224
+
225
+ ```
226
+ Use these free APIs instead when possible:
227
+
228
+ OpenAlex (openalex.org):
229
+ - Coverage: 250M+ works
230
+ - API: REST, no key needed (polite pool with email)
231
+ - Rate limit: 10 requests/sec (polite pool), 100K/day
232
+ - Data: titles, abstracts, citations, authors, institutions
233
+ - Best for: large-scale bibliometric analysis
234
+
235
+ Semantic Scholar (semanticscholar.org):
236
+ - Coverage: 200M+ papers
237
+ - API: REST, free key available
238
+ - Rate limit: 100 requests/sec with API key
239
+ - Data: titles, abstracts, citations, citation contexts, TLDR
240
+ - Best for: citation analysis, NLP on papers
241
+
242
+ Crossref (crossref.org):
243
+ - Coverage: 130M+ DOIs
244
+ - API: REST, no key needed (polite pool with email)
245
+ - Data: metadata, reference lists, citation counts
246
+ - Best for: DOI resolution, reference matching
247
+
248
+ Use Google Scholar scraping ONLY when:
249
+ - You need Google Scholar-specific metrics (h-index by GS)
250
+ - Your target papers are not indexed elsewhere
251
+ - You need Google Scholar's ranking/relevance ordering
252
+ - Small-scale collection (< 500 results)
253
+ ```
254
+
255
+ Responsible data collection from Google Scholar requires balancing research needs with ethical obligations to shared infrastructure. When possible, prefer official APIs that are designed for programmatic access. When scraping is necessary, implement aggressive rate limiting, cache results, and keep total request volumes as low as your research question permits.