pi-skill-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/LICENSE +21 -0
  3. package/README.md +97 -0
  4. package/index.ts +163 -0
  5. package/package.json +48 -0
  6. package/skills/adaptyv/SKILL.md +92 -0
  7. package/skills/add-community-extension/SKILL.md +85 -0
  8. package/skills/aeon/SKILL.md +111 -0
  9. package/skills/ai-slop-cleaner/SKILL.md +118 -0
  10. package/skills/anndata/SKILL.md +83 -0
  11. package/skills/arboreto/SKILL.md +107 -0
  12. package/skills/ask/SKILL.md +55 -0
  13. package/skills/astropy/SKILL.md +30 -0
  14. package/skills/async-worker-recovery/SKILL.md +44 -0
  15. package/skills/autopilot/SKILL.md +63 -0
  16. package/skills/autoresearch/SKILL.md +64 -0
  17. package/skills/autoskill/SKILL.md +116 -0
  18. package/skills/babysit/SKILL.md +43 -0
  19. package/skills/benchling-integration/SKILL.md +106 -0
  20. package/skills/bgpt-paper-search/SKILL.md +67 -0
  21. package/skills/biopython/SKILL.md +29 -0
  22. package/skills/bioservices/SKILL.md +96 -0
  23. package/skills/brainstorming/SKILL.md +104 -0
  24. package/skills/cancel/SKILL.md +85 -0
  25. package/skills/ccg/SKILL.md +87 -0
  26. package/skills/celery-pipeline/SKILL.md +30 -0
  27. package/skills/cellxgene-census/SKILL.md +104 -0
  28. package/skills/child-pi-spawning/SKILL.md +85 -0
  29. package/skills/cirq/SKILL.md +113 -0
  30. package/skills/citation-management/SKILL.md +91 -0
  31. package/skills/clinical-decision-support/SKILL.md +117 -0
  32. package/skills/clinical-reports/SKILL.md +118 -0
  33. package/skills/clinical-trial/SKILL.md +28 -0
  34. package/skills/cobrapy/SKILL.md +116 -0
  35. package/skills/configure-notifications/SKILL.md +85 -0
  36. package/skills/consciousness-council/SKILL.md +120 -0
  37. package/skills/context-artifact-hygiene/SKILL.md +85 -0
  38. package/skills/context-mode-ops/SKILL.md +87 -0
  39. package/skills/dask/SKILL.md +85 -0
  40. package/skills/database-lookup/SKILL.md +118 -0
  41. package/skills/datamol/SKILL.md +108 -0
  42. package/skills/debug/SKILL.md +32 -0
  43. package/skills/deep-dive/SKILL.md +114 -0
  44. package/skills/deep-interview/SKILL.md +90 -0
  45. package/skills/deepchem/SKILL.md +117 -0
  46. package/skills/deepinit/SKILL.md +100 -0
  47. package/skills/deeptools/SKILL.md +118 -0
  48. package/skills/delegation-patterns/SKILL.md +56 -0
  49. package/skills/depmap/SKILL.md +94 -0
  50. package/skills/dhdna-profiler/SKILL.md +86 -0
  51. package/skills/diffdock/SKILL.md +101 -0
  52. package/skills/dispatching-parallel-agents/SKILL.md +119 -0
  53. package/skills/dnanexus-integration/SKILL.md +118 -0
  54. package/skills/do/SKILL.md +48 -0
  55. package/skills/docker-sandbox/SKILL.md +29 -0
  56. package/skills/docx/SKILL.md +119 -0
  57. package/skills/esm/SKILL.md +116 -0
  58. package/skills/etetoolkit/SKILL.md +103 -0
  59. package/skills/event-log-tracing/SKILL.md +85 -0
  60. package/skills/exa-search/SKILL.md +72 -0
  61. package/skills/executing-plans/SKILL.md +69 -0
  62. package/skills/exploratory-data-analysis/SKILL.md +118 -0
  63. package/skills/external-context/SKILL.md +80 -0
  64. package/skills/fastapi/SKILL.md +30 -0
  65. package/skills/finishing-a-development-branch/SKILL.md +106 -0
  66. package/skills/flowio/SKILL.md +114 -0
  67. package/skills/fluidsim/SKILL.md +108 -0
  68. package/skills/generate-image/SKILL.md +108 -0
  69. package/skills/geniml/SKILL.md +117 -0
  70. package/skills/geomaster/SKILL.md +109 -0
  71. package/skills/geopandas/SKILL.md +114 -0
  72. package/skills/get-available-resources/SKILL.md +100 -0
  73. package/skills/gget/SKILL.md +111 -0
  74. package/skills/ginkgo-cloud-lab/SKILL.md +52 -0
  75. package/skills/git-master/SKILL.md +85 -0
  76. package/skills/glycoengineering/SKILL.md +104 -0
  77. package/skills/gtars/SKILL.md +104 -0
  78. package/skills/hackernews-frontpage/SKILL.md +46 -0
  79. package/skills/histolab/SKILL.md +98 -0
  80. package/skills/how-it-works/SKILL.md +25 -0
  81. package/skills/hud/SKILL.md +86 -0
  82. package/skills/hugging-science/SKILL.md +93 -0
  83. package/skills/huggingface/SKILL.md +30 -0
  84. package/skills/hypogenic/SKILL.md +107 -0
  85. package/skills/hypothesis-generation/SKILL.md +118 -0
  86. package/skills/imaging-data-commons/SKILL.md +119 -0
  87. package/skills/infographics/SKILL.md +102 -0
  88. package/skills/iso-13485-certification/SKILL.md +114 -0
  89. package/skills/knowledge-agent/SKILL.md +83 -0
  90. package/skills/labarchive-integration/SKILL.md +98 -0
  91. package/skills/lamindb/SKILL.md +119 -0
  92. package/skills/landsat/SKILL.md +29 -0
  93. package/skills/latchbio-integration/SKILL.md +118 -0
  94. package/skills/latex-posters/SKILL.md +112 -0
  95. package/skills/learn-codebase/SKILL.md +24 -0
  96. package/skills/learner/SKILL.md +118 -0
  97. package/skills/literature-review/SKILL.md +118 -0
  98. package/skills/live-agent-lifecycle/SKILL.md +85 -0
  99. package/skills/mailbox-interactive/SKILL.md +85 -0
  100. package/skills/make-plan/SKILL.md +59 -0
  101. package/skills/markdown-mermaid-writing/SKILL.md +118 -0
  102. package/skills/market-research-reports/SKILL.md +119 -0
  103. package/skills/markitdown/SKILL.md +111 -0
  104. package/skills/markitdown-docs/SKILL.md +28 -0
  105. package/skills/matchms/SKILL.md +91 -0
  106. package/skills/matlab/SKILL.md +118 -0
  107. package/skills/matplotlib/SKILL.md +30 -0
  108. package/skills/mcp-setup/SKILL.md +84 -0
  109. package/skills/medchem/SKILL.md +109 -0
  110. package/skills/mem-search/SKILL.md +96 -0
  111. package/skills/modal/SKILL.md +104 -0
  112. package/skills/model-routing-context/SKILL.md +85 -0
  113. package/skills/molecular-dynamics/SKILL.md +116 -0
  114. package/skills/molfeat/SKILL.md +110 -0
  115. package/skills/multi-perspective-review/SKILL.md +85 -0
  116. package/skills/networkx/SKILL.md +111 -0
  117. package/skills/neurokit2/SKILL.md +114 -0
  118. package/skills/neuropixels-analysis/SKILL.md +112 -0
  119. package/skills/nilearn/SKILL.md +29 -0
  120. package/skills/observability-reliability/SKILL.md +43 -0
  121. package/skills/omc-doctor/SKILL.md +86 -0
  122. package/skills/omc-reference/SKILL.md +119 -0
  123. package/skills/omc-setup/SKILL.md +82 -0
  124. package/skills/omc-teams/SKILL.md +81 -0
  125. package/skills/omero-integration/SKILL.md +111 -0
  126. package/skills/open-notebook/SKILL.md +100 -0
  127. package/skills/openephys/SKILL.md +28 -0
  128. package/skills/opentrons-integration/SKILL.md +110 -0
  129. package/skills/optimize-for-gpu/SKILL.md +119 -0
  130. package/skills/orchestration/SKILL.md +85 -0
  131. package/skills/ownership-session-security/SKILL.md +43 -0
  132. package/skills/paper-lookup/SKILL.md +119 -0
  133. package/skills/paperzilla/SKILL.md +114 -0
  134. package/skills/parallel-web/SKILL.md +64 -0
  135. package/skills/pathfinder/SKILL.md +114 -0
  136. package/skills/pathml/SKILL.md +98 -0
  137. package/skills/pdf/SKILL.md +113 -0
  138. package/skills/peer-review/SKILL.md +119 -0
  139. package/skills/pennylane/SKILL.md +119 -0
  140. package/skills/phylogenetics/SKILL.md +102 -0
  141. package/skills/pi-extension-lifecycle/SKILL.md +41 -0
  142. package/skills/plan/SKILL.md +66 -0
  143. package/skills/polars/SKILL.md +114 -0
  144. package/skills/polars-bio/SKILL.md +84 -0
  145. package/skills/pptx/SKILL.md +118 -0
  146. package/skills/pptx-posters/SKILL.md +112 -0
  147. package/skills/primekg/SKILL.md +97 -0
  148. package/skills/project-session-manager/SKILL.md +85 -0
  149. package/skills/protocolsio-integration/SKILL.md +119 -0
  150. package/skills/pubmed-search/SKILL.md +29 -0
  151. package/skills/pufferlib/SKILL.md +103 -0
  152. package/skills/pydeseq2/SKILL.md +106 -0
  153. package/skills/pydicom/SKILL.md +115 -0
  154. package/skills/pyhealth/SKILL.md +117 -0
  155. package/skills/pylabrobot/SKILL.md +100 -0
  156. package/skills/pymatgen/SKILL.md +28 -0
  157. package/skills/pymc/SKILL.md +108 -0
  158. package/skills/pymoo/SKILL.md +90 -0
  159. package/skills/pyopenms/SKILL.md +119 -0
  160. package/skills/pysam/SKILL.md +118 -0
  161. package/skills/pyspark/SKILL.md +30 -0
  162. package/skills/pytdc/SKILL.md +102 -0
  163. package/skills/pytorch/SKILL.md +31 -0
  164. package/skills/pytorch-lightning/SKILL.md +119 -0
  165. package/skills/pyzotero/SKILL.md +104 -0
  166. package/skills/qiskit/SKILL.md +119 -0
  167. package/skills/qutip/SKILL.md +111 -0
  168. package/skills/ralph/SKILL.md +23 -0
  169. package/skills/ralplan/SKILL.md +105 -0
  170. package/skills/rdflib/SKILL.md +29 -0
  171. package/skills/rdkit/SKILL.md +30 -0
  172. package/skills/read-only-explorer/SKILL.md +85 -0
  173. package/skills/receiving-code-review/SKILL.md +103 -0
  174. package/skills/release/SKILL.md +117 -0
  175. package/skills/remember/SKILL.md +39 -0
  176. package/skills/requesting-code-review/SKILL.md +85 -0
  177. package/skills/requirements-to-task-packet/SKILL.md +65 -0
  178. package/skills/research-grants/SKILL.md +118 -0
  179. package/skills/research-lookup/SKILL.md +117 -0
  180. package/skills/research-reproducibility/SKILL.md +28 -0
  181. package/skills/resource-discovery-config/SKILL.md +43 -0
  182. package/skills/rowan/SKILL.md +100 -0
  183. package/skills/runtime-state-reader/SKILL.md +46 -0
  184. package/skills/safe-bash/SKILL.md +85 -0
  185. package/skills/scanpy/SKILL.md +32 -0
  186. package/skills/scholar-evaluation/SKILL.md +115 -0
  187. package/skills/scientific-brainstorming/SKILL.md +118 -0
  188. package/skills/scientific-critical-thinking/SKILL.md +119 -0
  189. package/skills/scientific-schematics/SKILL.md +116 -0
  190. package/skills/scientific-slides/SKILL.md +117 -0
  191. package/skills/scientific-visualization/SKILL.md +109 -0
  192. package/skills/scientific-writing/SKILL.md +119 -0
  193. package/skills/scikit-bio/SKILL.md +92 -0
  194. package/skills/scikit-learn/SKILL.md +99 -0
  195. package/skills/scikit-survival/SKILL.md +110 -0
  196. package/skills/sciomc/SKILL.md +86 -0
  197. package/skills/scvelo/SKILL.md +106 -0
  198. package/skills/scvi-tools/SKILL.md +114 -0
  199. package/skills/seaborn/SKILL.md +97 -0
  200. package/skills/secure-agent-orchestration-review/SKILL.md +47 -0
  201. package/skills/self-improve/SKILL.md +119 -0
  202. package/skills/semantic-compression/SKILL.md +62 -0
  203. package/skills/setup/SKILL.md +42 -0
  204. package/skills/shap/SKILL.md +103 -0
  205. package/skills/simpy/SKILL.md +116 -0
  206. package/skills/skill/SKILL.md +117 -0
  207. package/skills/skill-search/SKILL.md +67 -0
  208. package/skills/skillify/SKILL.md +46 -0
  209. package/skills/smart-explore/SKILL.md +94 -0
  210. package/skills/sqlite-pandas/SKILL.md +30 -0
  211. package/skills/stable-baselines3/SKILL.md +86 -0
  212. package/skills/state-mutation-locking/SKILL.md +44 -0
  213. package/skills/statistical-analysis/SKILL.md +108 -0
  214. package/skills/statsmodels/SKILL.md +29 -0
  215. package/skills/subagent-driven-development/SKILL.md +89 -0
  216. package/skills/sympy/SKILL.md +115 -0
  217. package/skills/system-prompts/SKILL.md +116 -0
  218. package/skills/systematic-debugging/SKILL.md +119 -0
  219. package/skills/team/SKILL.md +85 -0
  220. package/skills/test-driven-development/SKILL.md +84 -0
  221. package/skills/tiledbvcf/SKILL.md +119 -0
  222. package/skills/timeline-report/SKILL.md +85 -0
  223. package/skills/timesfm-forecasting/SKILL.md +112 -0
  224. package/skills/torch-geometric/SKILL.md +118 -0
  225. package/skills/torchdrug/SKILL.md +118 -0
  226. package/skills/trace/SKILL.md +118 -0
  227. package/skills/transformers/SKILL.md +110 -0
  228. package/skills/treatment-plans/SKILL.md +119 -0
  229. package/skills/ui-render-performance/SKILL.md +41 -0
  230. package/skills/ultragoal/SKILL.md +63 -0
  231. package/skills/ultraqa/SKILL.md +85 -0
  232. package/skills/ultrawork/SKILL.md +20 -0
  233. package/skills/umap-learn/SKILL.md +119 -0
  234. package/skills/usfiscaldata/SKILL.md +118 -0
  235. package/skills/using-git-worktrees/SKILL.md +112 -0
  236. package/skills/using-superpowers/SKILL.md +85 -0
  237. package/skills/using-vetc/SKILL.md +92 -0
  238. package/skills/vaex/SKILL.md +111 -0
  239. package/skills/venue-templates/SKILL.md +113 -0
  240. package/skills/verification-before-completion/SKILL.md +88 -0
  241. package/skills/verification-before-done/SKILL.md +68 -0
  242. package/skills/verify/SKILL.md +33 -0
  243. package/skills/version-bump/SKILL.md +54 -0
  244. package/skills/vetc-analyze-ba/SKILL.md +117 -0
  245. package/skills/vetc-analyze-codebase/SKILL.md +118 -0
  246. package/skills/vetc-api-design/SKILL.md +103 -0
  247. package/skills/vetc-brainstorming/SKILL.md +116 -0
  248. package/skills/vetc-change-proposal/SKILL.md +111 -0
  249. package/skills/vetc-cicd/SKILL.md +113 -0
  250. package/skills/vetc-continuous-learning/SKILL.md +115 -0
  251. package/skills/vetc-deep-interview/SKILL.md +103 -0
  252. package/skills/vetc-docgen/SKILL.md +108 -0
  253. package/skills/vetc-frontend-patterns/SKILL.md +99 -0
  254. package/skills/vetc-iterative-retrieval/SKILL.md +110 -0
  255. package/skills/vetc-java-patterns/SKILL.md +113 -0
  256. package/skills/vetc-meta-skill-creator/SKILL.md +99 -0
  257. package/skills/vetc-oracle-patterns/SKILL.md +109 -0
  258. package/skills/vetc-performance-testing/SKILL.md +104 -0
  259. package/skills/vetc-pr-response/SKILL.md +106 -0
  260. package/skills/vetc-ralph/SKILL.md +108 -0
  261. package/skills/vetc-ralplan/SKILL.md +116 -0
  262. package/skills/vetc-receiving-review/SKILL.md +106 -0
  263. package/skills/vetc-reconcile-patterns/SKILL.md +117 -0
  264. package/skills/vetc-refactoring/SKILL.md +96 -0
  265. package/skills/vetc-runbook/SKILL.md +118 -0
  266. package/skills/vetc-sast/SKILL.md +118 -0
  267. package/skills/vetc-sdlc/SKILL.md +97 -0
  268. package/skills/vetc-security/SKILL.md +117 -0
  269. package/skills/vetc-spec-driven/SKILL.md +111 -0
  270. package/skills/vetc-spec-quality/SKILL.md +117 -0
  271. package/skills/vetc-systematic-debugging/SKILL.md +74 -0
  272. package/skills/vetc-tdd/SKILL.md +96 -0
  273. package/skills/vetc-thinking-pm/SKILL.md +110 -0
  274. package/skills/vetc-ui-visual-qa/SKILL.md +117 -0
  275. package/skills/vetc-verify/SKILL.md +101 -0
  276. package/skills/visual-verdict/SKILL.md +59 -0
  277. package/skills/what-if-oracle/SKILL.md +87 -0
  278. package/skills/widget-rendering/SKILL.md +85 -0
  279. package/skills/wiki/SKILL.md +69 -0
  280. package/skills/workspace-isolation/SKILL.md +85 -0
  281. package/skills/worktree-isolation/SKILL.md +85 -0
  282. package/skills/wowerpoint/SKILL.md +101 -0
  283. package/skills/writer-memory/SKILL.md +82 -0
  284. package/skills/writing-plans/SKILL.md +115 -0
  285. package/skills/writing-skills/SKILL.md +115 -0
  286. package/skills/xgboost/SKILL.md +29 -0
  287. package/skills/xgboost-ts/SKILL.md +28 -0
  288. package/skills/xlsx/SKILL.md +111 -0
  289. package/skills/zarr-python/SKILL.md +101 -0
  290. package/src/categories.ts +383 -0
  291. package/src/format.ts +104 -0
  292. package/src/indexer.ts +101 -0
  293. package/src/proactive.ts +51 -0
  294. package/src/scanner.ts +85 -0
  295. package/src/search.ts +89 -0
  296. package/src/strip.ts +29 -0
  297. package/src/synonyms.ts +83 -0
  298. package/src/text.ts +118 -0
  299. package/src/types.ts +64 -0
@@ -0,0 +1,116 @@
1
+ ---
2
+ name: scientific-schematics
3
+ description: Create publication-quality scientific diagrams using Nano Banana 2 AI with smart iterative refinement. Uses Gemini 3.1 Pro Preview for quality review. Only regenerates if quality is below threshold for your document type. Specialized in neural network architectures, system diagrams, flowcharts, biological pathways, and complex scientific visualizations.
4
+ ---
5
+
6
+ # Scientific Schematics and Diagrams
7
+
8
+ ## Overview
9
+
10
+ Scientific schematics and diagrams transform complex concepts into clear visual representations for publication. **This skill uses Nano Banana 2 AI for diagram generation with Gemini 3.1 Pro Preview quality review.**
11
+
12
+ **How it works:**
13
+ - Describe your diagram in natural language
14
+ - Nano Banana 2 generates publication-quality images automatically
15
+ - **Gemini 3.1 Pro Preview reviews quality** against document-type thresholds
16
+ - **Smart iteration**: Only regenerates if quality is below threshold
17
+ - Publication-ready output in minutes
18
+ - No coding, templates, or manual drawing required
19
+
20
+ **Quality Thresholds by Document Type:**
21
+ | Document Type | Threshold | Description |
22
+ |---------------|-----------|-------------|
23
+
24
+ ## Quick Start: Generate Any Diagram
25
+
26
+ Create any scientific diagram by simply describing it. Nano Banana 2 handles everything automatically with **smart iteration**:
27
+
28
+ ```bash
29
+ # Generate for journal paper (highest quality threshold: 8.5/10)
30
+ python scripts/generate_schematic.py "CONSORT participant flow diagram with 500 screened, 150 excluded, 350 randomized" -o figures/consort.png --doc-type journal
31
+
32
+ # Generate for presentation (lower threshold: 6.5/10 - faster)
33
+ python scripts/generate_schematic.py "Transformer encoder-decoder architecture showing multi-head attention" -o figures/transformer.png --doc-type presentation
34
+
35
+ # Generate for poster (moderate threshold: 7.0/10)
36
+ python scripts/generate_schematic.py "MAPK signaling pathway from EGFR to gene transcription" -o figures/mapk_pathway.png --doc-type poster
37
+
38
+ # Custom max iterations (max 2)
39
+ python scripts/generate_schematic.py "Complex circuit diagram with op-amp, resistors, and capacitors" -o figures/circuit.png --iterations 2 --doc-type journal
40
+ ```
41
+
42
+ **What happens behind the scenes:**
43
+ 1. **Generation 1**: Nano Banana 2 creates initial image following scientific diagram best practices
44
+ 2. **Review 1**: **Gemini 3.1 Pro Preview** evaluates quality against document-type threshold
45
+ 3. **Decision**: If quality >= threshold → **DONE** (no more iterations needed!)
46
+ 4. **If below threshold**: Improved prompt based on critique, regenerate
47
+ 5. **Repeat**: Until quality meets threshold OR max iterations reached
48
+
49
+ **Smart Iteration Benefits:**
50
+ - ✅ Saves API calls if first generation is good enough
51
+ - ✅ Higher quality standards for journal papers
52
+ - ✅ Faster turnaround for presentations/posters
53
+
54
+ ### Configuration
55
+
56
+ Set your OpenRouter API key:
57
+ ```bash
58
+ export OPENROUTER_API_KEY='your_api_key_here'
59
+ ```
60
+
61
+ Get an API key at: https://openrouter.ai/keys
62
+
63
+ ### AI Generation Best Practices
64
+
65
+ **Effective Prompts for Scientific Diagrams:**
66
+
67
+ ✓ **Good prompts** (specific, detailed):
68
+ - "CONSORT flowchart showing participant flow from screening (n=500) through randomization to final analysis"
69
+ - "Transformer neural network architecture with encoder stack on left, decoder stack on right, showing multi-head attention and cross-attention connections"
70
+ - "Biological signaling cascade: EGFR receptor → RAS → RAF → MEK → ERK → nucleus, with phosphorylation steps labeled"
71
+ - "Block diagram of IoT system: sensors → microcontroller → WiFi module → cloud server → mobile app"
72
+
73
+ ✗ **Avoid vague prompts**:
74
+ - "Make a flowchart" (too generic)
75
+ - "Neural network" (which type? what components?)
76
+ - "Pathway diagram" (which pathway? what molecules?)
77
+
78
+
79
+ ## When to Use This Skill
80
+
81
+ This skill should be used when:
82
+ - Creating neural network architecture diagrams (Transformers, CNNs, RNNs, etc.)
83
+ - Illustrating system architectures and data flow diagrams
84
+ - Drawing methodology flowcharts for study design (CONSORT, PRISMA)
85
+ - Visualizing algorithm workflows and processing pipelines
86
+ - Creating circuit diagrams and electrical schematics
87
+ - Depicting biological pathways and molecular interactions
88
+ - Generating network topologies and hierarchical structures
89
+ - Illustrating conceptual frameworks and theoretical models
90
+ - Designing block diagrams for technical papers
91
+
92
+ ## How to Use This Skill
93
+
94
+ **Simply describe your diagram in natural language.** Nano Banana 2 generates it automatically:
95
+
96
+ ```bash
97
+ python scripts/generate_schematic.py "your diagram description" -o output.png
98
+ ```
99
+
100
+ **That's it!** The AI handles:
101
+ - ✓ Layout and composition
102
+ - ✓ Labels and annotations
103
+ - ✓ Colors and styling
104
+ - ✓ Quality review and refinement
105
+ - ✓ Publication-ready output
106
+
107
+
108
+ # AI Generation Mode (Nano Banana 2 + Gemini 3.1 Pro Preview Review)
109
+
110
+ ## Smart Iterative Refinement Workflow
111
+
112
+ The AI generation system uses **smart iteration** - it only regenerates if quality is below the threshold for your document type:
113
+
114
+ ### How Smart Iteration Works
115
+
116
+
@@ -0,0 +1,117 @@
1
+ ---
2
+ name: scientific-slides
3
+ description: Build slide decks and presentations for research talks. Use this for making PowerPoint slides, conference presentations, seminar talks, research presentations, thesis defense slides, or any scientific talk. Provides slide structure, design templates, timing guidance, and visual validation. Works with PowerPoint and LaTeX Beamer.
4
+ ---
5
+
6
+ # Scientific Slides
7
+
8
+ ## Overview
9
+
10
+ Scientific presentations are a critical medium for communicating research, sharing findings, and engaging with academic and professional audiences. This skill provides comprehensive guidance for creating effective scientific presentations, from structure and content development to visual design and delivery preparation.
11
+
12
+ **Key Focus**: Oral presentations for conferences, seminars, defenses, and professional talks.
13
+
14
+ **CRITICAL DESIGN PHILOSOPHY**: Scientific presentations should be VISUALLY ENGAGING and RESEARCH-BACKED. Avoid dry, text-heavy slides at all costs. Great scientific presentations combine:
15
+ - **Compelling visuals**: High-quality figures, images, diagrams (not just bullet points)
16
+ - **Research context**: Proper citations from research-lookup establishing credibility
17
+ - **Minimal text**: Bullet points as prompts, YOU provide the explanation verbally
18
+ - **Professional design**: Modern color schemes, strong visual hierarchy, generous white space
19
+ - **Story-driven**: Clear narrative arc, not just data dumps
20
+
21
+ **Remember**: Boring presentations = forgotten science. Make your slides visually memorable while maintaining scientific rigor through proper citations.
22
+
23
+ ## When to Use This Skill
24
+
25
+ This skill should be used when:
26
+ - Preparing conference presentations (5-20 minutes)
27
+ - Developing academic seminars (45-60 minutes)
28
+ - Creating thesis or dissertation defense presentations
29
+ - Designing grant pitch presentations
30
+ - Preparing journal club presentations
31
+ - Giving research talks at institutions or companies
32
+ - Teaching or tutorial presentations on scientific topics
33
+
34
+ ## Slide Generation with Nano Banana Pro
35
+
36
+ **This skill uses Nano Banana Pro AI to generate stunning presentation slides automatically.**
37
+
38
+ There are two workflows depending on output format:
39
+
40
+ ### Default Workflow: PDF Slides (Recommended)
41
+
42
+ Generate each slide as a complete image using Nano Banana Pro, then combine into a PDF. This produces the most visually stunning results.
43
+
44
+ **How it works:**
45
+ 1. **Plan the deck**: Create a detailed plan for each slide (title, key points, visual elements)
46
+ 2. **Generate slides**: Call Nano Banana Pro for each slide to create complete slide images
47
+ 3. **Combine to PDF**: Assemble slide images into a single PDF presentation
48
+
49
+ **Step 1: Plan Each Slide**
50
+
51
+ Before generating, create a detailed plan for your presentation:
52
+
53
+ ```markdown
54
+ # Presentation Plan: Introduction to Machine Learning
55
+
56
+ ## Slide 1: Title Slide
57
+ - Title: "Machine Learning: From Theory to Practice"
58
+ - Subtitle: "AI Conference 2025"
59
+ - Speaker: Dr. Jane Smith, University of XYZ
60
+ - Visual: Modern abstract neural network background
61
+
62
+ ## Slide 2: Introduction
63
+ - Title: "Why Machine Learning Matters"
64
+ - Key points: Industry adoption, breakthrough applications, future potential
65
+ - Visual: Icons showing different ML applications (healthcare, finance, robotics)
66
+
67
+ ## Slide 3: Core Concepts
68
+ - Title: "The Three Types of Learning"
69
+ - Content: Supervised, Unsupervised, Reinforcement
70
+ - Visual: Three-part diagram showing each type with examples
71
+
72
+ ... (continue for all slides)
73
+ ```
74
+
75
+ **Step 2: Generate Each Slide**
76
+
77
+ Use the `generate_slide_image.py` script to create each slide.
78
+
79
+ **CRITICAL: Formatting Consistency Protocol**
80
+
81
+ To ensure unified formatting across all slides in a presentation:
82
+
83
+ # Title slide (first slide - establishes the style)
84
+ python scripts/generate_slide_image.py "Title slide for presentation: 'Machine Learning: From Theory to Practice'. Subtitle: 'AI Conference 2025'. Speaker: K-Dense. FORMATTING GOAL: Dark blue background (#1a237e), white text, gold accents (#ffc107), minimal design, sans-serif fonts, generous margins, no decorative elements." -o slides/01_title.png
85
+
86
+ # Content slide with citations (attach previous slide for consistency)
87
+ python scripts/generate_slide_image.py "Presentation slide titled 'Why Machine Learning Matters'. Three key points with simple icons: 1) Industry adoption, 2) Breakthrough applications, 3) Future potential. CITATIONS: Include at bottom in small text: (LeCun et al., 2015; Goodfellow et al., 2016). FORMATTING GOAL: Match attached slide style - dark blue background, white text, gold accents, minimal professional design, no visual clutter." -o slides/02_intro.png --attach slides/01_title.png
88
+
89
+ # Background slide with multiple citations
90
+ python scripts/generate_slide_image.py "Presentation slide titled 'Deep Learning Revolution'. Key milestones: ImageNet breakthrough (2012), transformer architecture (2017), GPT models (2018-present). CITATIONS: Show references at bottom: (Krizhevsky et al., 2012; Vaswani et al., 2017; Brown et al., 2020). FORMATTING GOAL: Match attached slide style exactly - same colors, fonts, minimal design." -o slides/03_background.png --attach slides/02_intro.png
91
+
92
+ # RESULTS SLIDE - Attach actual data figure from working directory
93
+ # First, check what figures exist: ls figures/ or ls results/
94
+ python scripts/generate_slide_image.py "Presentation slide titled 'Model Performance Results'. Create a slide presenting the attached accuracy chart. Key findings to highlight: 1) 95% accuracy achieved, 2) Outperforms baseline by 12%, 3) Consistent across test sets. CITATIONS: Include at bottom: (Our results, 2025). FORMATTING GOAL: Match attached slide style exactly." -o slides/04_results.png --attach slides/03_background.png --attach figures/accuracy_chart.png
95
+
96
+ # RESULTS SLIDE - Multiple figures comparison
97
+ python scripts/generate_slide_image.py "Presentation slide titled 'Before vs After Comparison'. Build a side-by-side comparison slide using the two attached figures. Left: baseline results, Right: our improved results. Add brief labels explaining the improvement. FORMATTING GOAL: Match attached slide style exactly." -o slides/05_comparison.png --attach slides/04_results.png --attach figures/baseline.png --attach figures/improved.png
98
+
99
+ # METHODOLOGY SLIDE - Attach existing diagram
100
+ python scripts/generate_slide_image.py "Presentation slide titled 'System Architecture'. Present the attached architecture diagram with brief explanatory bullet points: 1) Input processing, 2) Model inference, 3) Output generation. FORMATTING GOAL: Match attached slide style exactly." -o slides/06_architecture.png --attach slides/05_comparison.png --attach diagrams/system_architecture.png
101
+ ```
102
+
103
+ **IMPORTANT: Before creating results slides, always:**
104
+ 1. List files in working directory: `ls -la figures/` or `ls -la results/`
105
+ 2. Check user-provided directories for relevant figures
106
+ 3. Attach ALL relevant figures that should appear on the slide
107
+ 4. Describe how Nano Banana Pro should incorporate the attached figures
108
+
109
+ **Prompt Template:**
110
+
111
+ Include these elements in every prompt (customize as needed):
112
+ ```
113
+ [Slide content description]
114
+
115
+ # Combine all slides into a PDF presentation
116
+
117
+
@@ -0,0 +1,109 @@
1
+ ---
2
+ name: scientific-visualization
3
+ description: Meta-skill for publication-ready figures. Use when creating journal submission figures requiring multi-panel layouts, significance annotations, error bars, colorblind-safe palettes, and specific journal formatting (Nature, Science, Cell). Orchestrates matplotlib/seaborn/plotly with publication styles. For quick exploration use seaborn or plotly directly.
4
+ ---
5
+
6
+ # Scientific Visualization
7
+
8
+ ## Overview
9
+
10
+ Scientific visualization transforms data into clear, accurate figures for publication. Create journal-ready plots with multi-panel layouts, error bars, significance markers, and colorblind-safe palettes. Export as PDF/EPS/TIFF using matplotlib, seaborn, and plotly for manuscripts.
11
+
12
+ ## When to Use This Skill
13
+
14
+ This skill should be used when:
15
+ - Creating plots or visualizations for scientific manuscripts
16
+ - Preparing figures for journal submission (Nature, Science, Cell, PLOS, etc.)
17
+ - Ensuring figures are colorblind-friendly and accessible
18
+ - Making multi-panel figures with consistent styling
19
+ - Exporting figures at correct resolution and format
20
+ - Following specific publication guidelines
21
+ - Improving existing figures to meet publication standards
22
+ - Creating figures that need to work in both color and grayscale
23
+
24
+ ## Quick Start Guide
25
+
26
+ ### Basic Publication-Quality Figure
27
+
28
+ ```python
29
+ import matplotlib.pyplot as plt
30
+ import numpy as np
31
+
32
+ # Apply publication style (from scripts/style_presets.py)
33
+ from style_presets import apply_publication_style
34
+ apply_publication_style('default')
35
+
36
+ # Create figure with appropriate size (single column = 3.5 inches)
37
+ fig, ax = plt.subplots(figsize=(3.5, 2.5))
38
+
39
+ # Plot data
40
+ x = np.linspace(0, 10, 100)
41
+ ax.plot(x, np.sin(x), label='sin(x)')
42
+ ax.plot(x, np.cos(x), label='cos(x)')
43
+
44
+ # Proper labeling with units
45
+ ax.set_xlabel('Time (seconds)')
46
+ ax.set_ylabel('Amplitude (mV)')
47
+ ax.legend(frameon=False)
48
+
49
+ # Remove unnecessary spines
50
+ ax.spines['top'].set_visible(False)
51
+ ax.spines['right'].set_visible(False)
52
+
53
+ ### Using Pre-configured Styles
54
+
55
+ Apply journal-specific styles using the matplotlib style files in `assets/`:
56
+
57
+ ```python
58
+ import matplotlib.pyplot as plt
59
+
60
+ # Option 1: Use style file directly
61
+ plt.style.use('assets/nature.mplstyle')
62
+
63
+ # Option 2: Use style_presets.py helper
64
+ from style_presets import configure_for_journal
65
+ configure_for_journal('nature', figure_width='single')
66
+
67
+ # Now create figures - they'll automatically match Nature specifications
68
+ fig, ax = plt.subplots()
69
+ # ... your plotting code ...
70
+ ```
71
+
72
+ ### Quick Start with Seaborn
73
+
74
+ For statistical plots, use seaborn with publication styling:
75
+
76
+ ```python
77
+ import seaborn as sns
78
+ import matplotlib.pyplot as plt
79
+ from style_presets import apply_publication_style
80
+
81
+ # Apply publication style
82
+ apply_publication_style('default')
83
+ sns.set_theme(style='ticks', context='paper', font_scale=1.1)
84
+ sns.set_palette('colorblind')
85
+
86
+ # Create statistical comparison figure
87
+ fig, ax = plt.subplots(figsize=(3.5, 3))
88
+ sns.boxplot(data=df, x='treatment', y='response',
89
+ order=['Control', 'Low', 'High'], palette='Set2', ax=ax)
90
+ sns.stripplot(data=df, x='treatment', y='response',
91
+ order=['Control', 'Low', 'High'],
92
+ color='black', alpha=0.3, size=3, ax=ax)
93
+ ax.set_ylabel('Response (μM)')
94
+ sns.despine()
95
+
96
+ # Save figure
97
+ from figure_export import save_publication_figure
98
+ save_publication_figure(fig, 'treatment_comparison', formats=['pdf', 'png'], dpi=300)
99
+ ```
100
+
101
+ ## Core Principles and Best Practices
102
+
103
+ # Use the figure_export.py script for correct settings
104
+ from figure_export import save_publication_figure
105
+
106
+ # Or save for specific journal requirements
107
+ from figure_export import save_for_journal
108
+ save_for_journal(fig, 'figure1', journal='nature', figure_type='combination')
109
+ ```
@@ -0,0 +1,119 @@
1
+ ---
2
+ name: scientific-writing
3
+ description: Core skill for the deep research and writing tool. Write scientific manuscripts in full paragraphs (never bullet points). Use two-stage process with (1) section outlines with key points using research-lookup then (2) convert to flowing prose. IMRAD structure, citations (APA/AMA/Vancouver), figures/tables, reporting guidelines (CONSORT/STROBE/PRISMA), for research papers and journal submissions.
4
+ ---
5
+
6
+ # Scientific Writing
7
+
8
+ ## Overview
9
+
10
+ **This is the core skill for the deep research and writing tool**—combining AI-driven deep research with well-formatted written outputs. Every document produced is backed by comprehensive literature search and verified citations through the research-lookup skill.
11
+
12
+ Scientific writing is a process for communicating research with precision and clarity. Write manuscripts using IMRAD structure, citations (APA/AMA/Vancouver), figures/tables, and reporting guidelines (CONSORT/STROBE/PRISMA). Apply this skill for research papers and journal submissions.
13
+
14
+ **Critical Principle: Always write in full paragraphs with flowing prose. Never submit bullet points in the final manuscript.** Use a two-stage process: first create section outlines with key points using research-lookup, then convert those outlines into complete paragraphs.
15
+
16
+ ## When to Use This Skill
17
+
18
+ This skill should be used when:
19
+ - Writing or revising any section of a scientific manuscript (abstract, introduction, methods, results, discussion)
20
+ - Structuring a research paper using IMRAD or other standard formats
21
+ - Formatting citations and references in specific styles (APA, AMA, Vancouver, Chicago, IEEE)
22
+ - Creating, formatting, or improving figures, tables, and data visualizations
23
+ - Applying study-specific reporting guidelines (CONSORT for trials, STROBE for observational studies, PRISMA for reviews)
24
+ - Drafting abstracts that meet journal requirements (structured or unstructured)
25
+ - Preparing manuscripts for submission to specific journals
26
+ - Improving writing clarity, conciseness, and precision
27
+ - Ensuring proper use of field-specific terminology and nomenclature
28
+ - Addressing reviewer comments and revising manuscripts
29
+
30
+ ## Visual Enhancement with Scientific Schematics
31
+
32
+ **⚠️ MANDATORY: Every scientific paper MUST include a graphical abstract plus 1-2 additional AI-generated figures using the scientific-schematics skill.**
33
+
34
+ This is not optional. Scientific papers without visual elements are incomplete. Before finalizing any document:
35
+ 1. **ALWAYS generate a graphical abstract** as the first visual element
36
+ 2. Generate at minimum ONE additional schematic or diagram using scientific-schematics
37
+ 3. Prefer 3-4 total figures for comprehensive papers (graphical abstract + methods flowchart + results visualization + conceptual diagram)
38
+
39
+ ### Graphical Abstract (REQUIRED)
40
+
41
+ **Every scientific writeup MUST include a graphical abstract.** This is a visual summary of your paper that:
42
+ - Appears before or immediately after the text abstract
43
+ - Captures the entire paper's key message in one image
44
+ - Is suitable for journal table of contents display
45
+ - Uses landscape orientation (typically 1200x600px)
46
+
47
+ **Generate the graphical abstract FIRST:**
48
+ ```bash
49
+ python scripts/generate_schematic.py "Graphical abstract for [paper title]: [brief description showing workflow from input → methods → key findings → conclusions]" -o figures/graphical_abstract.png
50
+ ```
51
+
52
+ **Graphical Abstract Requirements:**
53
+ - **Content**: Visual summary showing workflow, key methods, main findings, and conclusions
54
+
55
+ ### Additional Figures (GENERATE EXTENSIVELY)
56
+
57
+ **⚠️ CRITICAL: Use BOTH scientific-schematics AND generate-image EXTENSIVELY throughout all documents.**
58
+
59
+ Every document should be richly illustrated. Generate figures liberally - when in doubt, add a visual.
60
+
61
+ **MINIMUM Figure Requirements:**
62
+
63
+ | Document Type | Minimum | Recommended |
64
+ |--------------|---------|-------------|
65
+ | Research Papers | 5 | 6-8 |
66
+ | Literature Reviews | 4 | 5-7 |
67
+ | Market Research | 20 | 25-30 |
68
+ | Presentations | 1/slide | 1-2/slide |
69
+ | Posters | 6 | 8-10 |
70
+
71
+ ## Core Capabilities
72
+
73
+ ### 1. Manuscript Structure and Organization
74
+
75
+ **IMRAD Format**: Guide papers through the standard Introduction, Methods, Results, And Discussion structure used across most scientific disciplines. This includes:
76
+ - **Introduction**: Establish research context, identify gaps, state objectives
77
+ - **Methods**: Detail study design, populations, procedures, and analysis approaches
78
+ - **Results**: Present findings objectively without interpretation
79
+ - **Discussion**: Interpret results, acknowledge limitations, propose future directions
80
+
81
+ For detailed guidance on IMRAD structure, refer to `(see docs)`.
82
+
83
+ **Alternative Structures**: Support discipline-specific formats including:
84
+ - Review articles (narrative, systematic, scoping)
85
+ - Case reports and case series
86
+ - Meta-analyses and pooled analyses
87
+ - Theoretical/modeling papers
88
+ - Methods papers and protocols
89
+
90
+ ### 2. Section-Specific Writing Guidance
91
+
92
+ **Abstract Composition**: Craft concise, standalone summaries (100-250 words) that capture the paper's purpose, methods, results, and conclusions. Support both structured abstracts (with labeled sections) and unstructured single-paragraph formats.
93
+
94
+ **Introduction Development**: Build compelling introductions that:
95
+ - Establish the research problem's importance
96
+ - Review relevant literature systematically
97
+ - Identify knowledge gaps or controversies
98
+ - State clear research questions or hypotheses
99
+ - Explain the study's novelty and significance
100
+
101
+ **Methods Documentation**: Ensure reproducibility through:
102
+ - Detailed participant/sample descriptions
103
+ - Clear procedural documentation
104
+ - Statistical methods with justification
105
+
106
+ ### 3. Citation and Reference Management
107
+
108
+ Apply citation styles correctly across disciplines. For comprehensive style guides, refer to `(see docs)`.
109
+
110
+ **Major Citation Styles:**
111
+ - **AMA (American Medical Association)**: Numbered superscript citations, common in medicine
112
+ - **Vancouver**: Numbered citations in square brackets, biomedical standard
113
+ - **APA (American Psychological Association)**: Author-date in-text citations, common in social sciences
114
+ - **Chicago**: Notes-bibliography or author-date, humanities and sciences
115
+ - **IEEE**: Numbered square brackets, engineering and computer science
116
+
117
+ **Best Practices:**
118
+
119
+
@@ -0,0 +1,92 @@
1
+ ---
2
+ name: scikit-bio
3
+ description: Biological data toolkit. Sequence analysis, alignments, phylogenetic trees, diversity metrics (alpha/beta, UniFrac), ordination (PCoA), PERMANOVA, FASTA/Newick I/O, for microbiome analysis.
4
+ ---
5
+
6
+ # scikit-bio
7
+
8
+ ## Overview
9
+
10
+ scikit-bio is a comprehensive Python library for working with biological data. Apply this skill for bioinformatics analyses spanning sequence manipulation, alignment, phylogenetics, microbial ecology, and multivariate statistics.
11
+
12
+ ## When to Use This Skill
13
+
14
+ This skill should be used when the user:
15
+ - Works with biological sequences (DNA, RNA, protein)
16
+ - Needs to read/write biological file formats (FASTA, FASTQ, GenBank, Newick, BIOM, etc.)
17
+ - Performs sequence alignments or searches for motifs
18
+ - Constructs or analyzes phylogenetic trees
19
+ - Calculates diversity metrics (alpha/beta diversity, UniFrac distances)
20
+ - Performs ordination analysis (PCoA, CCA, RDA)
21
+ - Runs statistical tests on biological/ecological data (PERMANOVA, ANOSIM, Mantel)
22
+ - Analyzes microbiome or community ecology data
23
+ - Works with protein embeddings from language models
24
+ - Needs to manipulate biological data tables
25
+
26
+ ## Core Capabilities
27
+
28
+ ### 1. Sequence Manipulation
29
+
30
+ Work with biological sequences using specialized classes for DNA, RNA, and protein data.
31
+
32
+ **Key operations:**
33
+ - Read/write sequences from FASTA, FASTQ, GenBank, EMBL formats
34
+ - Sequence slicing, concatenation, and searching
35
+ - Reverse complement, transcription (DNA→RNA), and translation (RNA→protein)
36
+ - Find motifs and patterns using regex
37
+ - Calculate distances (Hamming, k-mer based)
38
+ - Handle sequence quality scores and metadata
39
+
40
+ **Common patterns:**
41
+ ```python
42
+ import skbio
43
+
44
+ # Read sequences from file
45
+ seq = skbio.DNA.read('input.fasta')
46
+
47
+ # Sequence operations
48
+ rc = seq.reverse_complement()
49
+ rna = seq.transcribe()
50
+ protein = rna.translate()
51
+
52
+ # Find motifs
53
+ motif_positions = seq.find_with_regex('ATG[ACGT]{3}')
54
+
55
+ # Check for properties
56
+ has_degens = seq.has_degenerates()
57
+ seq_no_gaps = seq.degap()
58
+ ```
59
+
60
+ **Important notes:**
61
+ - Use `DNA`, `RNA`, `Protein` classes for grammared sequences with validation
62
+ - Use `Sequence` class for generic sequences without alphabet restrictions
63
+ - Quality scores automatically loaded from FASTQ files into positional metadata
64
+ - Metadata types: sequence-level (ID, description), positional (per-base), interval (regions/features)
65
+
66
+ ### 2. Sequence Alignment
67
+
68
+ Perform pairwise and multiple sequence alignments using dynamic programming algorithms.
69
+
70
+ **Key capabilities:**
71
+ - Global alignment (Needleman-Wunsch with semi-global variant)
72
+ - Local alignment (Smith-Waterman)
73
+ - Configurable scoring schemes (match/mismatch, gap penalties, substitution matrices)
74
+ - CIGAR string conversion
75
+ - Multiple sequence alignment storage and manipulation with `TabularMSA`
76
+
77
+ **Common patterns:**
78
+ ```python
79
+ from skbio.alignment import local_pairwise_align_ssw, TabularMSA
80
+
81
+ # Pairwise alignment
82
+ alignment = local_pairwise_align_ssw(seq1, seq2)
83
+
84
+ # Access aligned sequences
85
+ msa = alignment.aligned_sequences
86
+
87
+ # Read multiple alignment from file
88
+ msa = TabularMSA.read('alignment.fasta', constructor=skbio.DNA)
89
+
90
+ # Calculate consensus
91
+ consensus = msa.consensus()
92
+ ```
@@ -0,0 +1,99 @@
1
+ ---
2
+ name: scikit-learn
3
+ description: Machine learning in Python with scikit-learn. Use when working with supervised learning (classification, regression), unsupervised learning (clustering, dimensionality reduction), model evaluation, hyperparameter tuning, preprocessing, or building ML pipelines. Provides comprehensive reference documentation for algorithms, preprocessing techniques, pipelines, and best practices.
4
+ ---
5
+
6
+ # Scikit-learn
7
+
8
+ ## Overview
9
+
10
+ This skill provides comprehensive guidance for machine learning tasks using scikit-learn, the industry-standard Python library for classical machine learning. Use this skill for classification, regression, clustering, dimensionality reduction, preprocessing, model evaluation, and building production-ready ML pipelines.
11
+
12
+ # Commonly used with
13
+ uv pip install pandas numpy
14
+ ```
15
+
16
+ ## When to Use This Skill
17
+
18
+ Use the scikit-learn skill when:
19
+
20
+ - Building classification or regression models
21
+ - Performing clustering or dimensionality reduction
22
+ - Preprocessing and transforming data for machine learning
23
+ - Evaluating model performance with cross-validation
24
+ - Tuning hyperparameters with grid or random search
25
+ - Creating ML pipelines for production workflows
26
+ - Comparing different algorithms for a task
27
+ - Working with both structured (tabular) and text data
28
+ - Need interpretable, classical machine learning approaches
29
+
30
+ ## Quick Start
31
+
32
+ ### Classification Example
33
+
34
+ ```python
35
+ from sklearn.model_selection import train_test_split
36
+ from sklearn.preprocessing import StandardScaler
37
+ from sklearn.ensemble import RandomForestClassifier
38
+ from sklearn.metrics import classification_report
39
+
40
+ # Split data
41
+ X_train, X_test, y_train, y_test = train_test_split(
42
+ X, y, test_size=0.2, stratify=y, random_state=42
43
+ )
44
+
45
+ # Preprocess
46
+ scaler = StandardScaler()
47
+ X_train_scaled = scaler.fit_transform(X_train)
48
+ X_test_scaled = scaler.transform(X_test)
49
+
50
+ # Train model
51
+ model = RandomForestClassifier(n_estimators=100, random_state=42)
52
+ model.fit(X_train_scaled, y_train)
53
+
54
+ # Evaluate
55
+ y_pred = model.predict(X_test_scaled)
56
+ print(classification_report(y_test, y_pred))
57
+ ```
58
+
59
+ ### Complete Pipeline with Mixed Data
60
+
61
+ ```python
62
+ from sklearn.pipeline import Pipeline
63
+ from sklearn.compose import ColumnTransformer
64
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
65
+ from sklearn.impute import SimpleImputer
66
+ from sklearn.ensemble import GradientBoostingClassifier
67
+
68
+ # Define feature types
69
+ numeric_features = ['age', 'income']
70
+ categorical_features = ['gender', 'occupation']
71
+
72
+ # Create preprocessing pipelines
73
+ numeric_transformer = Pipeline([
74
+ ('imputer', SimpleImputer(strategy='median')),
75
+ ('scaler', StandardScaler())
76
+ ])
77
+
78
+ categorical_transformer = Pipeline([
79
+ ('imputer', SimpleImputer(strategy='most_frequent')),
80
+ ('onehot', OneHotEncoder(handle_unknown='ignore'))
81
+ ])
82
+
83
+ # Combine transformers
84
+ preprocessor = ColumnTransformer([
85
+ ('num', numeric_transformer, numeric_features),
86
+ ('cat', categorical_transformer, categorical_features)
87
+ ])
88
+
89
+ # Full pipeline
90
+ model = Pipeline([
91
+ ('preprocessor', preprocessor),
92
+ ('classifier', GradientBoostingClassifier(random_state=42))
93
+ ])
94
+
95
+ # Fit and predict
96
+ model.fit(X_train, y_train)
97
+ y_pred = model.predict(X_test)
98
+
99
+