pi-skill-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (299) hide show
  1. package/CHANGELOG.md +20 -0
  2. package/LICENSE +21 -0
  3. package/README.md +97 -0
  4. package/index.ts +163 -0
  5. package/package.json +48 -0
  6. package/skills/adaptyv/SKILL.md +92 -0
  7. package/skills/add-community-extension/SKILL.md +85 -0
  8. package/skills/aeon/SKILL.md +111 -0
  9. package/skills/ai-slop-cleaner/SKILL.md +118 -0
  10. package/skills/anndata/SKILL.md +83 -0
  11. package/skills/arboreto/SKILL.md +107 -0
  12. package/skills/ask/SKILL.md +55 -0
  13. package/skills/astropy/SKILL.md +30 -0
  14. package/skills/async-worker-recovery/SKILL.md +44 -0
  15. package/skills/autopilot/SKILL.md +63 -0
  16. package/skills/autoresearch/SKILL.md +64 -0
  17. package/skills/autoskill/SKILL.md +116 -0
  18. package/skills/babysit/SKILL.md +43 -0
  19. package/skills/benchling-integration/SKILL.md +106 -0
  20. package/skills/bgpt-paper-search/SKILL.md +67 -0
  21. package/skills/biopython/SKILL.md +29 -0
  22. package/skills/bioservices/SKILL.md +96 -0
  23. package/skills/brainstorming/SKILL.md +104 -0
  24. package/skills/cancel/SKILL.md +85 -0
  25. package/skills/ccg/SKILL.md +87 -0
  26. package/skills/celery-pipeline/SKILL.md +30 -0
  27. package/skills/cellxgene-census/SKILL.md +104 -0
  28. package/skills/child-pi-spawning/SKILL.md +85 -0
  29. package/skills/cirq/SKILL.md +113 -0
  30. package/skills/citation-management/SKILL.md +91 -0
  31. package/skills/clinical-decision-support/SKILL.md +117 -0
  32. package/skills/clinical-reports/SKILL.md +118 -0
  33. package/skills/clinical-trial/SKILL.md +28 -0
  34. package/skills/cobrapy/SKILL.md +116 -0
  35. package/skills/configure-notifications/SKILL.md +85 -0
  36. package/skills/consciousness-council/SKILL.md +120 -0
  37. package/skills/context-artifact-hygiene/SKILL.md +85 -0
  38. package/skills/context-mode-ops/SKILL.md +87 -0
  39. package/skills/dask/SKILL.md +85 -0
  40. package/skills/database-lookup/SKILL.md +118 -0
  41. package/skills/datamol/SKILL.md +108 -0
  42. package/skills/debug/SKILL.md +32 -0
  43. package/skills/deep-dive/SKILL.md +114 -0
  44. package/skills/deep-interview/SKILL.md +90 -0
  45. package/skills/deepchem/SKILL.md +117 -0
  46. package/skills/deepinit/SKILL.md +100 -0
  47. package/skills/deeptools/SKILL.md +118 -0
  48. package/skills/delegation-patterns/SKILL.md +56 -0
  49. package/skills/depmap/SKILL.md +94 -0
  50. package/skills/dhdna-profiler/SKILL.md +86 -0
  51. package/skills/diffdock/SKILL.md +101 -0
  52. package/skills/dispatching-parallel-agents/SKILL.md +119 -0
  53. package/skills/dnanexus-integration/SKILL.md +118 -0
  54. package/skills/do/SKILL.md +48 -0
  55. package/skills/docker-sandbox/SKILL.md +29 -0
  56. package/skills/docx/SKILL.md +119 -0
  57. package/skills/esm/SKILL.md +116 -0
  58. package/skills/etetoolkit/SKILL.md +103 -0
  59. package/skills/event-log-tracing/SKILL.md +85 -0
  60. package/skills/exa-search/SKILL.md +72 -0
  61. package/skills/executing-plans/SKILL.md +69 -0
  62. package/skills/exploratory-data-analysis/SKILL.md +118 -0
  63. package/skills/external-context/SKILL.md +80 -0
  64. package/skills/fastapi/SKILL.md +30 -0
  65. package/skills/finishing-a-development-branch/SKILL.md +106 -0
  66. package/skills/flowio/SKILL.md +114 -0
  67. package/skills/fluidsim/SKILL.md +108 -0
  68. package/skills/generate-image/SKILL.md +108 -0
  69. package/skills/geniml/SKILL.md +117 -0
  70. package/skills/geomaster/SKILL.md +109 -0
  71. package/skills/geopandas/SKILL.md +114 -0
  72. package/skills/get-available-resources/SKILL.md +100 -0
  73. package/skills/gget/SKILL.md +111 -0
  74. package/skills/ginkgo-cloud-lab/SKILL.md +52 -0
  75. package/skills/git-master/SKILL.md +85 -0
  76. package/skills/glycoengineering/SKILL.md +104 -0
  77. package/skills/gtars/SKILL.md +104 -0
  78. package/skills/hackernews-frontpage/SKILL.md +46 -0
  79. package/skills/histolab/SKILL.md +98 -0
  80. package/skills/how-it-works/SKILL.md +25 -0
  81. package/skills/hud/SKILL.md +86 -0
  82. package/skills/hugging-science/SKILL.md +93 -0
  83. package/skills/huggingface/SKILL.md +30 -0
  84. package/skills/hypogenic/SKILL.md +107 -0
  85. package/skills/hypothesis-generation/SKILL.md +118 -0
  86. package/skills/imaging-data-commons/SKILL.md +119 -0
  87. package/skills/infographics/SKILL.md +102 -0
  88. package/skills/iso-13485-certification/SKILL.md +114 -0
  89. package/skills/knowledge-agent/SKILL.md +83 -0
  90. package/skills/labarchive-integration/SKILL.md +98 -0
  91. package/skills/lamindb/SKILL.md +119 -0
  92. package/skills/landsat/SKILL.md +29 -0
  93. package/skills/latchbio-integration/SKILL.md +118 -0
  94. package/skills/latex-posters/SKILL.md +112 -0
  95. package/skills/learn-codebase/SKILL.md +24 -0
  96. package/skills/learner/SKILL.md +118 -0
  97. package/skills/literature-review/SKILL.md +118 -0
  98. package/skills/live-agent-lifecycle/SKILL.md +85 -0
  99. package/skills/mailbox-interactive/SKILL.md +85 -0
  100. package/skills/make-plan/SKILL.md +59 -0
  101. package/skills/markdown-mermaid-writing/SKILL.md +118 -0
  102. package/skills/market-research-reports/SKILL.md +119 -0
  103. package/skills/markitdown/SKILL.md +111 -0
  104. package/skills/markitdown-docs/SKILL.md +28 -0
  105. package/skills/matchms/SKILL.md +91 -0
  106. package/skills/matlab/SKILL.md +118 -0
  107. package/skills/matplotlib/SKILL.md +30 -0
  108. package/skills/mcp-setup/SKILL.md +84 -0
  109. package/skills/medchem/SKILL.md +109 -0
  110. package/skills/mem-search/SKILL.md +96 -0
  111. package/skills/modal/SKILL.md +104 -0
  112. package/skills/model-routing-context/SKILL.md +85 -0
  113. package/skills/molecular-dynamics/SKILL.md +116 -0
  114. package/skills/molfeat/SKILL.md +110 -0
  115. package/skills/multi-perspective-review/SKILL.md +85 -0
  116. package/skills/networkx/SKILL.md +111 -0
  117. package/skills/neurokit2/SKILL.md +114 -0
  118. package/skills/neuropixels-analysis/SKILL.md +112 -0
  119. package/skills/nilearn/SKILL.md +29 -0
  120. package/skills/observability-reliability/SKILL.md +43 -0
  121. package/skills/omc-doctor/SKILL.md +86 -0
  122. package/skills/omc-reference/SKILL.md +119 -0
  123. package/skills/omc-setup/SKILL.md +82 -0
  124. package/skills/omc-teams/SKILL.md +81 -0
  125. package/skills/omero-integration/SKILL.md +111 -0
  126. package/skills/open-notebook/SKILL.md +100 -0
  127. package/skills/openephys/SKILL.md +28 -0
  128. package/skills/opentrons-integration/SKILL.md +110 -0
  129. package/skills/optimize-for-gpu/SKILL.md +119 -0
  130. package/skills/orchestration/SKILL.md +85 -0
  131. package/skills/ownership-session-security/SKILL.md +43 -0
  132. package/skills/paper-lookup/SKILL.md +119 -0
  133. package/skills/paperzilla/SKILL.md +114 -0
  134. package/skills/parallel-web/SKILL.md +64 -0
  135. package/skills/pathfinder/SKILL.md +114 -0
  136. package/skills/pathml/SKILL.md +98 -0
  137. package/skills/pdf/SKILL.md +113 -0
  138. package/skills/peer-review/SKILL.md +119 -0
  139. package/skills/pennylane/SKILL.md +119 -0
  140. package/skills/phylogenetics/SKILL.md +102 -0
  141. package/skills/pi-extension-lifecycle/SKILL.md +41 -0
  142. package/skills/plan/SKILL.md +66 -0
  143. package/skills/polars/SKILL.md +114 -0
  144. package/skills/polars-bio/SKILL.md +84 -0
  145. package/skills/pptx/SKILL.md +118 -0
  146. package/skills/pptx-posters/SKILL.md +112 -0
  147. package/skills/primekg/SKILL.md +97 -0
  148. package/skills/project-session-manager/SKILL.md +85 -0
  149. package/skills/protocolsio-integration/SKILL.md +119 -0
  150. package/skills/pubmed-search/SKILL.md +29 -0
  151. package/skills/pufferlib/SKILL.md +103 -0
  152. package/skills/pydeseq2/SKILL.md +106 -0
  153. package/skills/pydicom/SKILL.md +115 -0
  154. package/skills/pyhealth/SKILL.md +117 -0
  155. package/skills/pylabrobot/SKILL.md +100 -0
  156. package/skills/pymatgen/SKILL.md +28 -0
  157. package/skills/pymc/SKILL.md +108 -0
  158. package/skills/pymoo/SKILL.md +90 -0
  159. package/skills/pyopenms/SKILL.md +119 -0
  160. package/skills/pysam/SKILL.md +118 -0
  161. package/skills/pyspark/SKILL.md +30 -0
  162. package/skills/pytdc/SKILL.md +102 -0
  163. package/skills/pytorch/SKILL.md +31 -0
  164. package/skills/pytorch-lightning/SKILL.md +119 -0
  165. package/skills/pyzotero/SKILL.md +104 -0
  166. package/skills/qiskit/SKILL.md +119 -0
  167. package/skills/qutip/SKILL.md +111 -0
  168. package/skills/ralph/SKILL.md +23 -0
  169. package/skills/ralplan/SKILL.md +105 -0
  170. package/skills/rdflib/SKILL.md +29 -0
  171. package/skills/rdkit/SKILL.md +30 -0
  172. package/skills/read-only-explorer/SKILL.md +85 -0
  173. package/skills/receiving-code-review/SKILL.md +103 -0
  174. package/skills/release/SKILL.md +117 -0
  175. package/skills/remember/SKILL.md +39 -0
  176. package/skills/requesting-code-review/SKILL.md +85 -0
  177. package/skills/requirements-to-task-packet/SKILL.md +65 -0
  178. package/skills/research-grants/SKILL.md +118 -0
  179. package/skills/research-lookup/SKILL.md +117 -0
  180. package/skills/research-reproducibility/SKILL.md +28 -0
  181. package/skills/resource-discovery-config/SKILL.md +43 -0
  182. package/skills/rowan/SKILL.md +100 -0
  183. package/skills/runtime-state-reader/SKILL.md +46 -0
  184. package/skills/safe-bash/SKILL.md +85 -0
  185. package/skills/scanpy/SKILL.md +32 -0
  186. package/skills/scholar-evaluation/SKILL.md +115 -0
  187. package/skills/scientific-brainstorming/SKILL.md +118 -0
  188. package/skills/scientific-critical-thinking/SKILL.md +119 -0
  189. package/skills/scientific-schematics/SKILL.md +116 -0
  190. package/skills/scientific-slides/SKILL.md +117 -0
  191. package/skills/scientific-visualization/SKILL.md +109 -0
  192. package/skills/scientific-writing/SKILL.md +119 -0
  193. package/skills/scikit-bio/SKILL.md +92 -0
  194. package/skills/scikit-learn/SKILL.md +99 -0
  195. package/skills/scikit-survival/SKILL.md +110 -0
  196. package/skills/sciomc/SKILL.md +86 -0
  197. package/skills/scvelo/SKILL.md +106 -0
  198. package/skills/scvi-tools/SKILL.md +114 -0
  199. package/skills/seaborn/SKILL.md +97 -0
  200. package/skills/secure-agent-orchestration-review/SKILL.md +47 -0
  201. package/skills/self-improve/SKILL.md +119 -0
  202. package/skills/semantic-compression/SKILL.md +62 -0
  203. package/skills/setup/SKILL.md +42 -0
  204. package/skills/shap/SKILL.md +103 -0
  205. package/skills/simpy/SKILL.md +116 -0
  206. package/skills/skill/SKILL.md +117 -0
  207. package/skills/skill-search/SKILL.md +67 -0
  208. package/skills/skillify/SKILL.md +46 -0
  209. package/skills/smart-explore/SKILL.md +94 -0
  210. package/skills/sqlite-pandas/SKILL.md +30 -0
  211. package/skills/stable-baselines3/SKILL.md +86 -0
  212. package/skills/state-mutation-locking/SKILL.md +44 -0
  213. package/skills/statistical-analysis/SKILL.md +108 -0
  214. package/skills/statsmodels/SKILL.md +29 -0
  215. package/skills/subagent-driven-development/SKILL.md +89 -0
  216. package/skills/sympy/SKILL.md +115 -0
  217. package/skills/system-prompts/SKILL.md +116 -0
  218. package/skills/systematic-debugging/SKILL.md +119 -0
  219. package/skills/team/SKILL.md +85 -0
  220. package/skills/test-driven-development/SKILL.md +84 -0
  221. package/skills/tiledbvcf/SKILL.md +119 -0
  222. package/skills/timeline-report/SKILL.md +85 -0
  223. package/skills/timesfm-forecasting/SKILL.md +112 -0
  224. package/skills/torch-geometric/SKILL.md +118 -0
  225. package/skills/torchdrug/SKILL.md +118 -0
  226. package/skills/trace/SKILL.md +118 -0
  227. package/skills/transformers/SKILL.md +110 -0
  228. package/skills/treatment-plans/SKILL.md +119 -0
  229. package/skills/ui-render-performance/SKILL.md +41 -0
  230. package/skills/ultragoal/SKILL.md +63 -0
  231. package/skills/ultraqa/SKILL.md +85 -0
  232. package/skills/ultrawork/SKILL.md +20 -0
  233. package/skills/umap-learn/SKILL.md +119 -0
  234. package/skills/usfiscaldata/SKILL.md +118 -0
  235. package/skills/using-git-worktrees/SKILL.md +112 -0
  236. package/skills/using-superpowers/SKILL.md +85 -0
  237. package/skills/using-vetc/SKILL.md +92 -0
  238. package/skills/vaex/SKILL.md +111 -0
  239. package/skills/venue-templates/SKILL.md +113 -0
  240. package/skills/verification-before-completion/SKILL.md +88 -0
  241. package/skills/verification-before-done/SKILL.md +68 -0
  242. package/skills/verify/SKILL.md +33 -0
  243. package/skills/version-bump/SKILL.md +54 -0
  244. package/skills/vetc-analyze-ba/SKILL.md +117 -0
  245. package/skills/vetc-analyze-codebase/SKILL.md +118 -0
  246. package/skills/vetc-api-design/SKILL.md +103 -0
  247. package/skills/vetc-brainstorming/SKILL.md +116 -0
  248. package/skills/vetc-change-proposal/SKILL.md +111 -0
  249. package/skills/vetc-cicd/SKILL.md +113 -0
  250. package/skills/vetc-continuous-learning/SKILL.md +115 -0
  251. package/skills/vetc-deep-interview/SKILL.md +103 -0
  252. package/skills/vetc-docgen/SKILL.md +108 -0
  253. package/skills/vetc-frontend-patterns/SKILL.md +99 -0
  254. package/skills/vetc-iterative-retrieval/SKILL.md +110 -0
  255. package/skills/vetc-java-patterns/SKILL.md +113 -0
  256. package/skills/vetc-meta-skill-creator/SKILL.md +99 -0
  257. package/skills/vetc-oracle-patterns/SKILL.md +109 -0
  258. package/skills/vetc-performance-testing/SKILL.md +104 -0
  259. package/skills/vetc-pr-response/SKILL.md +106 -0
  260. package/skills/vetc-ralph/SKILL.md +108 -0
  261. package/skills/vetc-ralplan/SKILL.md +116 -0
  262. package/skills/vetc-receiving-review/SKILL.md +106 -0
  263. package/skills/vetc-reconcile-patterns/SKILL.md +117 -0
  264. package/skills/vetc-refactoring/SKILL.md +96 -0
  265. package/skills/vetc-runbook/SKILL.md +118 -0
  266. package/skills/vetc-sast/SKILL.md +118 -0
  267. package/skills/vetc-sdlc/SKILL.md +97 -0
  268. package/skills/vetc-security/SKILL.md +117 -0
  269. package/skills/vetc-spec-driven/SKILL.md +111 -0
  270. package/skills/vetc-spec-quality/SKILL.md +117 -0
  271. package/skills/vetc-systematic-debugging/SKILL.md +74 -0
  272. package/skills/vetc-tdd/SKILL.md +96 -0
  273. package/skills/vetc-thinking-pm/SKILL.md +110 -0
  274. package/skills/vetc-ui-visual-qa/SKILL.md +117 -0
  275. package/skills/vetc-verify/SKILL.md +101 -0
  276. package/skills/visual-verdict/SKILL.md +59 -0
  277. package/skills/what-if-oracle/SKILL.md +87 -0
  278. package/skills/widget-rendering/SKILL.md +85 -0
  279. package/skills/wiki/SKILL.md +69 -0
  280. package/skills/workspace-isolation/SKILL.md +85 -0
  281. package/skills/worktree-isolation/SKILL.md +85 -0
  282. package/skills/wowerpoint/SKILL.md +101 -0
  283. package/skills/writer-memory/SKILL.md +82 -0
  284. package/skills/writing-plans/SKILL.md +115 -0
  285. package/skills/writing-skills/SKILL.md +115 -0
  286. package/skills/xgboost/SKILL.md +29 -0
  287. package/skills/xgboost-ts/SKILL.md +28 -0
  288. package/skills/xlsx/SKILL.md +111 -0
  289. package/skills/zarr-python/SKILL.md +101 -0
  290. package/src/categories.ts +383 -0
  291. package/src/format.ts +104 -0
  292. package/src/indexer.ts +101 -0
  293. package/src/proactive.ts +51 -0
  294. package/src/scanner.ts +85 -0
  295. package/src/search.ts +89 -0
  296. package/src/strip.ts +29 -0
  297. package/src/synonyms.ts +83 -0
  298. package/src/text.ts +118 -0
  299. package/src/types.ts +64 -0
@@ -0,0 +1,103 @@
1
+ ---
2
+ name: pufferlib
3
+ description: High-performance reinforcement learning framework optimized for speed and scale. Use when you need fast parallel training, vectorized environments, multi-agent systems, or integration with game environments (Atari, Procgen, NetHack). Achieves 2-10x speedups over standard implementations. For quick prototyping or standard algorithm implementations with extensive documentation, use stable-baselines3 instead.
4
+ ---
5
+
6
+ # PufferLib - High-Performance Reinforcement Learning
7
+
8
+ ## Overview
9
+
10
+ PufferLib is a high-performance reinforcement learning library designed for fast parallel environment simulation and training. It achieves training at millions of steps per second through optimized vectorization, native multi-agent support, and efficient PPO implementation (PuffeRL). The library provides the Ocean suite of 20+ environments and seamless integration with Gymnasium, PettingZoo, and specialized RL frameworks.
11
+
12
+ ## When to Use This Skill
13
+
14
+ Use this skill when:
15
+ - **Training RL agents** with PPO on any environment (single or multi-agent)
16
+ - **Creating custom environments** using the PufferEnv API
17
+ - **Optimizing performance** for parallel environment simulation (vectorization)
18
+ - **Integrating existing environments** from Gymnasium, PettingZoo, Atari, Procgen, etc.
19
+ - **Developing policies** with CNN, LSTM, or custom architectures
20
+ - **Scaling RL** to millions of steps per second for faster experimentation
21
+ - **Multi-agent RL** with native multi-agent environment support
22
+
23
+ ## Core Capabilities
24
+
25
+ ### 1. High-Performance Training (PuffeRL)
26
+
27
+ PuffeRL is PufferLib's optimized PPO+LSTM training algorithm achieving 1M-4M steps/second.
28
+
29
+ **Quick start training:**
30
+ ```bash
31
+ # CLI training
32
+ puffer train procgen-coinrun --train.device cuda --train.learning-rate 3e-4
33
+
34
+ # Distributed training
35
+ torchrun --nproc_per_node=4 train.py
36
+ ```
37
+
38
+ **Python training loop:**
39
+ ```python
40
+ import pufferlib
41
+ from pufferlib import PuffeRL
42
+
43
+ # Create vectorized environment
44
+ env = pufferlib.make('procgen-coinrun', num_envs=256)
45
+
46
+ # Create trainer
47
+ trainer = PuffeRL(
48
+ env=env,
49
+ policy=my_policy,
50
+ device='cuda',
51
+ learning_rate=3e-4,
52
+ batch_size=32768
53
+ )
54
+
55
+ # Training loop
56
+ for iteration in range(num_iterations):
57
+ trainer.evaluate() # Collect rollouts
58
+ trainer.train() # Train on batch
59
+ trainer.mean_and_log() # Log results
60
+ ```
61
+
62
+ **For comprehensive training guidance**, read `(see docs)` for:
63
+ - Complete training workflow and CLI options
64
+ - Hyperparameter tuning with Protein
65
+ - Distributed multi-GPU/multi-node training
66
+ - Logger integration (Weights & Biases, Neptune)
67
+ - Checkpointing and resume training
68
+ - Performance optimization tips
69
+ - Curriculum learning patterns
70
+
71
+ ### 2. Environment Development (PufferEnv)
72
+
73
+ Create custom high-performance environments with the PufferEnv API.
74
+
75
+ **Basic environment structure:**
76
+ ```python
77
+ import numpy as np
78
+ from pufferlib import PufferEnv
79
+
80
+ class MyEnvironment(PufferEnv):
81
+ def __init__(self, buf=None):
82
+ super().__init__(buf)
83
+
84
+ # Define spaces
85
+ self.observation_space = self.make_space((4,))
86
+
87
+ ### 3. Vectorization and Performance
88
+
89
+ Achieve maximum throughput with optimized parallel simulation.
90
+
91
+ **Vectorization setup:**
92
+ ```python
93
+ import pufferlib
94
+
95
+ # Automatic vectorization
96
+ env = pufferlib.make('environment_name', num_envs=256, num_workers=8)
97
+
98
+ # Performance benchmarks:
99
+ # - Pure Python envs: 100k-500k SPS
100
+ # - C-based envs: 100M+ SPS
101
+ # - With training: 400k-4M total SPS
102
+
103
+
@@ -0,0 +1,106 @@
1
+ ---
2
+ name: pydeseq2
3
+ description: Differential gene expression analysis (Python DESeq2). Identify DE genes from bulk RNA-seq counts, Wald tests, FDR correction, volcano/MA plots, for RNA-seq analysis.
4
+ ---
5
+
6
+ # PyDESeq2
7
+
8
+ ## Overview
9
+
10
+ PyDESeq2 is a Python implementation of DESeq2 for differential expression analysis with bulk RNA-seq data. Design and execute complete workflows from data loading through result interpretation, including single-factor and multi-factor designs, Wald tests with multiple testing correction, optional apeGLM shrinkage, and integration with pandas and AnnData.
11
+
12
+ ## When to Use This Skill
13
+
14
+ This skill should be used when:
15
+ - Analyzing bulk RNA-seq count data for differential expression
16
+ - Comparing gene expression between experimental conditions (e.g., treated vs control)
17
+ - Performing multi-factor designs accounting for batch effects or covariates
18
+ - Converting R-based DESeq2 workflows to Python
19
+ - Integrating differential expression analysis into Python-based pipelines
20
+ - Users mention "DESeq2", "differential expression", "RNA-seq analysis", or "PyDESeq2"
21
+
22
+ ## Quick Start Workflow
23
+
24
+ For users who want to perform a standard differential expression analysis:
25
+
26
+ ```python
27
+ import pandas as pd
28
+ from pydeseq2.dds import DeseqDataSet
29
+ from pydeseq2.ds import DeseqStats
30
+
31
+ # 1. Load data
32
+ counts_df = pd.read_csv("counts.csv", index_col=0).T # Transpose to samples × genes
33
+ metadata = pd.read_csv("metadata.csv", index_col=0)
34
+
35
+ # 2. Filter low-count genes
36
+ genes_to_keep = counts_df.columns[counts_df.sum(axis=0) >= 10]
37
+ counts_df = counts_df[genes_to_keep]
38
+
39
+ # 3. Initialize and fit DESeq2
40
+ dds = DeseqDataSet(
41
+ counts=counts_df,
42
+ metadata=metadata,
43
+ design="~condition",
44
+ refit_cooks=True
45
+ )
46
+ dds.deseq2()
47
+
48
+ # 4. Perform statistical testing
49
+ ds = DeseqStats(dds, contrast=["condition", "treated", "control"])
50
+ ds.summary()
51
+
52
+ # 5. Access results
53
+ results = ds.results_df
54
+ significant = results[results.padj < 0.05]
55
+ print(f"Found {len(significant)} significant genes")
56
+ ```
57
+
58
+ ## Core Workflow Steps
59
+
60
+ ### Step 1: Data Preparation
61
+
62
+ **Input requirements:**
63
+ - **Count matrix:** Samples × genes DataFrame with non-negative integer read counts
64
+ - **Metadata:** Samples × variables DataFrame with experimental factors
65
+
66
+ **Common data loading patterns:**
67
+
68
+ ```python
69
+ # From TSV
70
+ counts_df = pd.read_csv("counts.tsv", sep="\t", index_col=0).T
71
+
72
+ # From AnnData
73
+ import anndata as ad
74
+ adata = ad.read_h5ad("data.h5ad")
75
+ counts_df = pd.DataFrame(adata.X, index=adata.obs_names, columns=adata.var_names)
76
+ metadata = adata.obs
77
+ ```
78
+
79
+ **Data filtering:**
80
+
81
+ ```python
82
+ # Remove low-count genes
83
+ genes_to_keep = counts_df.columns[counts_df.sum(axis=0) >= 10]
84
+ counts_df = counts_df[genes_to_keep]
85
+
86
+ # Remove samples with missing metadata
87
+ samples_to_keep = ~metadata.condition.isna()
88
+ counts_df = counts_df.loc[samples_to_keep]
89
+ metadata = metadata.loc[samples_to_keep]
90
+ ```
91
+
92
+ ### Step 2: Design Specification
93
+
94
+ The design formula specifies how gene expression is modeled.
95
+
96
+ **Single-factor designs:**
97
+ ```python
98
+ design = "~condition" # Simple two-group comparison
99
+ ```
100
+
101
+ **Multi-factor designs:**
102
+ ```python
103
+ design = "~batch + condition" # Control for batch effects
104
+ design = "~age + condition" # Include continuous covariate
105
+ design = "~group + condition + group:condition" # Interaction effects
106
+ ```
@@ -0,0 +1,115 @@
1
+ ---
2
+ name: pydicom
3
+ description: Python library for working with DICOM (Digital Imaging and Communications in Medicine) files. Use this skill when reading, writing, or modifying medical imaging data in DICOM format, extracting pixel data from medical images (CT, MRI, X-ray, ultrasound), anonymizing DICOM files, working with DICOM metadata and tags, converting DICOM images to other formats, handling compressed DICOM data, or processing medical imaging datasets. Applies to tasks involving medical image analysis, PACS systems, radiology workflows, and healthcare imaging applications.
4
+ ---
5
+
6
+ # Pydicom
7
+
8
+ ## Overview
9
+
10
+ Pydicom is a pure Python package for working with DICOM files, the standard format for medical imaging data. This skill provides guidance on reading, writing, and manipulating DICOM files, including working with pixel data, metadata, and various compression formats.
11
+
12
+ ## When to Use This Skill
13
+
14
+ Use this skill when working with:
15
+ - Medical imaging files (CT, MRI, X-ray, ultrasound, PET, etc.)
16
+ - DICOM datasets requiring metadata extraction or modification
17
+ - Pixel data extraction and image processing from medical scans
18
+ - DICOM anonymization for research or data sharing
19
+ - Converting DICOM files to standard image formats
20
+ - Compressed DICOM data requiring decompression
21
+ - DICOM sequences and structured reports
22
+ - Multi-slice volume reconstruction
23
+ - PACS (Picture Archiving and Communication System) integration
24
+
25
+ ## Core Workflows
26
+
27
+ ### Reading DICOM Files
28
+
29
+ Read a DICOM file using `pydicom.dcmread()`:
30
+
31
+ ```python
32
+ import pydicom
33
+
34
+ # Read a DICOM file
35
+ ds = pydicom.dcmread('path/to/file.dcm')
36
+
37
+ # Access metadata
38
+ print(f"Patient Name: {ds.PatientName}")
39
+ print(f"Study Date: {ds.StudyDate}")
40
+ print(f"Modality: {ds.Modality}")
41
+
42
+ # Display all elements
43
+ print(ds)
44
+ ```
45
+
46
+ **Key points:**
47
+ - `dcmread()` returns a `Dataset` object
48
+ - Access data elements using attribute notation (e.g., `ds.PatientName`) or tag notation (e.g., `ds[0x0010, 0x0010]`)
49
+ - Use `ds.file_meta` to access file metadata like Transfer Syntax UID
50
+ - Handle missing attributes with `getattr(ds, 'AttributeName', default_value)` or `hasattr(ds, 'AttributeName')`
51
+
52
+ ### Working with Pixel Data
53
+
54
+ Extract and manipulate image data from DICOM files:
55
+
56
+ ```python
57
+ import pydicom
58
+ import numpy as np
59
+ import matplotlib.pyplot as plt
60
+
61
+ # Read DICOM file
62
+ ds = pydicom.dcmread('image.dcm')
63
+
64
+ # Get pixel array (requires numpy)
65
+ pixel_array = ds.pixel_array
66
+
67
+ # Apply windowing for display (CT/MRI)
68
+ if hasattr(ds, 'WindowCenter') and hasattr(ds, 'WindowWidth'):
69
+ from pydicom.pixel_data_handlers.util import apply_voi_lut
70
+ windowed_image = apply_voi_lut(pixel_array, ds)
71
+ else:
72
+ windowed_image = pixel_array
73
+
74
+ # Display image
75
+ plt.imshow(windowed_image, cmap='gray')
76
+ plt.title(f"{ds.Modality} - {ds.StudyDescription}")
77
+ plt.axis('off')
78
+ plt.show()
79
+ ```
80
+
81
+ **Working with color images:**
82
+
83
+ ```python
84
+ # RGB images have shape (rows, columns, 3)
85
+ if ds.PhotometricInterpretation == 'RGB':
86
+ rgb_image = ds.pixel_array
87
+ plt.imshow(rgb_image)
88
+ elif ds.PhotometricInterpretation == 'YBR_FULL':
89
+ from pydicom.pixel_data_handlers.util import convert_color_space
90
+ rgb_image = convert_color_space(ds.pixel_array, 'YBR_FULL', 'RGB')
91
+ plt.imshow(rgb_image)
92
+ ```
93
+
94
+ **Multi-frame images (videos/series):**
95
+
96
+ ```python
97
+ # For multi-frame DICOM files
98
+ if hasattr(ds, 'NumberOfFrames') and ds.NumberOfFrames > 1:
99
+ frames = ds.pixel_array # Shape: (num_frames, rows, columns)
100
+ print(f"Number of frames: {frames.shape[0]}")
101
+
102
+ # Display specific frame
103
+ plt.imshow(frames[0], cmap='gray')
104
+ ```
105
+
106
+ # Normalize to 0-255 range
107
+ if pixel_array.dtype != np.uint8:
108
+ pixel_array = ((pixel_array - pixel_array.min()) /
109
+ (pixel_array.max() - pixel_array.min()) * 255).astype(np.uint8)
110
+
111
+ # Save as PNG
112
+ image = Image.fromarray(pixel_array)
113
+ image.save('output.png')
114
+
115
+
@@ -0,0 +1,117 @@
1
+ ---
2
+ name: pyhealth
3
+ description: Build clinical/healthcare deep-learning pipelines with PyHealth — loading EHR/signal/imaging datasets (MIMIC-III/IV, eICU, OMOP, SleepEDF, ChestXray14, EHRShot), defining tasks (mortality, readmission, length-of-stay, drug recommendation, sleep staging, ICD coding, EEG events), instantiating models (Transformer, RETAIN, GAMENet, SafeDrug, MICRON, StageNet, AdaCare, CNN/RNN/MLP), training with the PyHealth Trainer, computing clinical metrics, and using medical code utilities (ICD/ATC/NDC/RxNorm lookup and cross-mapping). Use this skill whenever the user mentions PyHealth, MIMIC, eICU, OMOP, EHR modeling, clinical prediction, drug recommendation, sleep staging, medical code mapping, ICD/ATC codes, or any healthcare ML pipeline that fits the dataset → task → model → trainer → metrics pattern, even if "PyHealth" isn't named explicitly.
4
+ ---
5
+
6
+ # PyHealth
7
+
8
+ PyHealth (https://pyhealth.dev/) is a Python toolkit for clinical deep learning. It provides a unified, modular pipeline across electronic health records (EHR), physiological signals, and medical imaging.
9
+
10
+ The library is built around a **5-stage pipeline** — `Dataset → Task → Model → Trainer → Metrics` — where each stage is replaceable and the interfaces between stages are stable. Code that follows this pipeline shape composes well; code that bypasses it usually fights the library.
11
+
12
+ ## When to use this skill
13
+
14
+ Use this skill whenever the user is doing clinical/healthcare ML and any of the following are true:
15
+
16
+ - They mention PyHealth, MIMIC-III/IV, eICU, OMOP-CDM, EHRShot, SleepEDF, SHHS, ISRUC, COVID19-CXR, ChestX-ray14, TUEV/TUAB.
17
+ - They want to predict mortality, readmission, length of stay, drug recommendations, sleep stages, ICD codes, EEG events, or de-identification.
18
+ - They need to look up or cross-map medical codes (ICD-9-CM, ICD-10-CM, ATC, NDC, RxNorm, CCS).
19
+ - They have EHR-shaped data and want to train a clinical model without writing the plumbing themselves.
20
+
21
+ PyHealth is the right tool when the workflow fits its 5 stages. If the user just wants generic PyTorch on tabular data, this skill is not necessary.
22
+
23
+ # Create a project with the right Python
24
+ uv init my-pyhealth-project
25
+ cd my-pyhealth-project
26
+ uv python pin 3.12
27
+
28
+ # Add PyHealth (this also pulls in PyTorch and friends)
29
+ uv add pyhealth
30
+
31
+ # Run scripts inside the env
32
+ uv run python train.py
33
+ ```
34
+
35
+ For a one-off script without a project, use `uv run --with pyhealth python script.py`. For the legacy 1.x line (Python 3.9+), `uv add pyhealth==1.16`. Detailed install notes, MIMIC access, and GPU/CPU device tips are in `(see docs)`.
36
+
37
+ ## The 5-stage pipeline
38
+
39
+ A complete pipeline is typically <20 lines. This is the canonical shape — start here and modify pieces:
40
+
41
+ ```python
42
+ from pyhealth.datasets import MIMIC3Dataset, split_by_patient, get_dataloader
43
+ from pyhealth.tasks import MortalityPredictionMIMIC3
44
+ from pyhealth.models import Transformer
45
+ from pyhealth.trainer import Trainer
46
+ from pyhealth.metrics.binary import binary_metrics_fn
47
+
48
+ # 1. Dataset — raw patient registry
49
+ base = MIMIC3Dataset(
50
+ root="https://storage.googleapis.com/pyhealth/Synthetic_MIMIC-III/",
51
+ tables=["DIAGNOSES_ICD", "PROCEDURES_ICD", "PRESCRIPTIONS"],
52
+ )
53
+
54
+ # 2. Task — converts patients into supervised samples
55
+ samples = base.set_task(MortalityPredictionMIMIC3())
56
+
57
+ # 3. Split + DataLoaders (split by patient to avoid leakage)
58
+ train_ds, val_ds, test_ds = split_by_patient(samples, [0.8, 0.1, 0.1])
59
+ train_loader = get_dataloader(train_ds, batch_size=32, shuffle=True)
60
+ val_loader = get_dataloader(val_ds, batch_size=32, shuffle=False)
61
+ test_loader = get_dataloader(test_ds, batch_size=32, shuffle=False)
62
+
63
+ # 4. Model — must be passed the SampleDataset, not the BaseDataset
64
+ model = Transformer(dataset=samples)
65
+
66
+ # 5. Train + evaluate
67
+ trainer = Trainer(model=model)
68
+ trainer.train(
69
+ train_dataloader=train_loader,
70
+ val_dataloader=val_loader,
71
+ epochs=50,
72
+ monitor="pr_auc",
73
+ )
74
+
75
+ y_true, y_prob, _ = trainer.inference(test_loader)
76
+ print(binary_metrics_fn(y_true, y_prob, metrics=["pr_auc", "roc_auc"]))
77
+ ```
78
+
79
+ A copy-pasteable starter is in `assets/starter_pipeline.py`.
80
+
81
+ ## Critical things to get right
82
+
83
+ These are the mistakes that PyHealth code most commonly trips on. Internalize them before writing pipelines:
84
+
85
+ 1. **Models take a `SampleDataset`, not a `BaseDataset`.** `MIMIC3Dataset(...)` returns a `BaseDataset` (a queryable patient registry). Only after `.set_task(task)` do you get a `SampleDataset`, which is what models, splitters, and DataLoaders expect. If you pass `base` to a model, it will fail or behave wrong.
86
+
87
+ 2. **Always split by patient (or visit), not by sample.** Random sample-level splits leak information across train/test because the same patient can appear in both. Use `split_by_patient` for patient-level prediction, `split_by_visit` only when visits are independent.
88
+
89
+ 3. **Match the task to the dataset.** Tasks are dataset-specific: `MortalityPredictionMIMIC3` won't work on MIMIC-IV — use `MortalityPredictionMIMIC4` or `InHospitalMortalityMIMIC4`. The full mapping is in `(see docs)`.
90
+
91
+ 4. **Pick `monitor` to match the task type.** For binary classification use `"pr_auc"` or `"roc_auc"`. For multilabel (drug rec) use `"pr_auc_samples"` or `"jaccard_samples"`. For multiclass use `"accuracy"` or `"f1_macro"`. Wrong monitor → checkpoint selection saves the wrong epoch.
92
+
93
+ 5. **MIMIC-IV uses `ehr_root=`, not `root=`.** This is the one inconsistency in the dataset constructors.
94
+
95
+ 6. **For reproducible work, point `cache_dir=` somewhere persistent.** PyHealth caches the parsed dataset; without `cache_dir`, you re-parse every run.
96
+
97
+ ## How to use this skill
98
+
99
+ PyHealth has a large API surface — there's no point loading it all at once. Read the reference file that matches the user's task:
100
+
101
+ | If the user is asking about… | Read |
102
+ |---|---|
103
+ | Installing, env setup, MIMIC access, GPU | `(see docs)` |
104
+ | Which dataset class to use, loading patterns, splitting | `(see docs)` |
105
+ | What prediction task to choose (mortality, readmission, drug rec, sleep…) | `(see docs)` |
106
+ | Picking a model architecture, model-specific arguments | `(see docs)` |
107
+ | Looking up or cross-mapping ICD/ATC/NDC/RxNorm/CCS codes, tokenizers | `(see docs)` |
108
+ | End-to-end recipes for common scenarios | `(see docs)` |
109
+
110
+ For multi-step tasks (e.g., "build a drug recommendation pipeline on MIMIC-IV"), read `tasks.md` + `models.md` + `examples.md` together — they cross-reference each other.
111
+
112
+ ## A note on style
113
+
114
+ Write minimal, idiomatic PyHealth. The library is opinionated; lean into its abstractions instead of reimplementing them in raw PyTorch. If you find yourself writing a custom training loop, ask whether `Trainer` would do the job — it almost always will, and it handles checkpointing, logging, and best-model selection for free.
115
+
116
+ When the user has private MIMIC access, point them at the local CSV root; for demos and learning, the synthetic MIMIC-III bucket (`https://storage.googleapis.com/pyhealth/Synthetic_MIMIC-III/`) is fine and works without credentialing.
117
+ ```
@@ -0,0 +1,100 @@
1
+ ---
2
+ name: pylabrobot
3
+ description: Vendor-agnostic lab automation framework. Use when controlling multiple equipment types (Hamilton, Tecan, Opentrons, plate readers, pumps) or needing unified programming across different vendors. Best for complex workflows, multi-vendor setups, simulation. For Opentrons-only protocols with official API, opentrons-integration may be simpler.
4
+ ---
5
+
6
+ # PyLabRobot
7
+
8
+ ## Overview
9
+
10
+ PyLabRobot is a hardware-agnostic, pure Python Software Development Kit for automated and autonomous laboratories. Use this skill to control liquid handling robots, plate readers, pumps, heater shakers, incubators, centrifuges, and other laboratory automation equipment through a unified Python interface that works across platforms (Windows, macOS, Linux).
11
+
12
+ ## When to Use This Skill
13
+
14
+ Use this skill when:
15
+ - Programming liquid handling robots (Hamilton STAR/STARlet, Opentrons OT-2, Tecan EVO)
16
+ - Automating laboratory workflows involving pipetting, sample preparation, or analytical measurements
17
+ - Managing deck layouts and laboratory resources (plates, tips, containers, troughs)
18
+ - Integrating multiple lab devices (liquid handlers, plate readers, heater shakers, pumps)
19
+ - Creating reproducible laboratory protocols with state management
20
+ - Simulating protocols before running on physical hardware
21
+ - Reading plates using BMG CLARIOstar or other supported plate readers
22
+ - Controlling temperature, shaking, centrifugation, or other material handling operations
23
+ - Working with laboratory automation in Python
24
+
25
+ ## Core Capabilities
26
+
27
+ PyLabRobot provides comprehensive laboratory automation through six main capability areas, each detailed in the references/ directory:
28
+
29
+ ### 1. Liquid Handling (`(see docs)`)
30
+
31
+ Control liquid handling robots for aspirating, dispensing, and transferring liquids. Key operations include:
32
+ - **Basic Operations**: Aspirate, dispense, transfer liquids between wells
33
+ - **Tip Management**: Pick up, drop, and track pipette tips automatically
34
+ - **Advanced Techniques**: Multi-channel pipetting, serial dilutions, plate replication
35
+ - **Volume Tracking**: Automatic tracking of liquid volumes in wells
36
+ - **Hardware Support**: Hamilton STAR/STARlet, Opentrons OT-2, Tecan EVO, and others
37
+
38
+ ### 2. Resource Management (`(see docs)`)
39
+
40
+ Manage laboratory resources in a hierarchical system:
41
+ - **Resource Types**: Plates, tip racks, troughs, tubes, carriers, and custom labware
42
+ - **Deck Layout**: Assign resources to deck positions with coordinate systems
43
+ - **State Management**: Track tip presence, liquid volumes, and resource states
44
+ - **Serialization**: Save and load deck layouts and states from JSON files
45
+ - **Resource Discovery**: Access wells, tips, and containers through intuitive APIs
46
+
47
+ ### 3. Hardware Backends (`(see docs)`)
48
+
49
+ Connect to diverse laboratory equipment through backend abstraction:
50
+ - **Liquid Handlers**: Hamilton STAR (full support), Opentrons OT-2, Tecan EVO
51
+ - **Simulation**: ChatterboxBackend for protocol testing without hardware
52
+ - **Platform Support**: Works on Windows, macOS, Linux, and Raspberry Pi
53
+ - **Backend Switching**: Change robots by swapping backend without rewriting protocols
54
+
55
+ ### 4. Analytical Equipment (`(see docs)`)
56
+
57
+ Integrate plate readers and analytical instruments:
58
+ - **Plate Readers**: BMG CLARIOstar for absorbance, luminescence, fluorescence
59
+ - **Scales**: Mettler Toledo integration for mass measurements
60
+ - **Integration Patterns**: Combine liquid handlers with analytical equipment
61
+ - **Automated Workflows**: Move plates between devices automatically
62
+
63
+ ### 5. Material Handling (`(see docs)`)
64
+
65
+ Control environmental and material handling equipment:
66
+ - **Heater Shakers**: Hamilton HeaterShaker, Inheco ThermoShake
67
+ - **Incubators**: Inheco and Thermo Fisher incubators with temperature control
68
+ - **Centrifuges**: Agilent VSpin with bucket positioning and spin control
69
+ - **Pumps**: Cole Parmer Masterflex for fluid pumping operations
70
+ - **Temperature Control**: Set and monitor temperatures during protocols
71
+
72
+ ### 6. Visualization & Simulation (`(see docs)`)
73
+
74
+ Visualize and simulate laboratory protocols:
75
+ - **Browser Visualizer**: Real-time 3D visualization of deck state
76
+ - **Simulation Mode**: Test protocols without physical hardware
77
+ - **State Tracking**: Monitor tip presence and liquid volumes visually
78
+ - **Deck Editor**: Graphical tool for designing deck layouts
79
+ - **Protocol Validation**: Verify protocols before running on hardware
80
+
81
+ ## Quick Start
82
+
83
+ To get started with PyLabRobot, install the package and initialize a liquid handler:
84
+
85
+ ```python
86
+ # Basic liquid handling setup
87
+ from pylabrobot.liquid_handling import LiquidHandler
88
+ from pylabrobot.liquid_handling.backends import STAR
89
+ from pylabrobot.resources import STARLetDeck
90
+
91
+ # Initialize liquid handler
92
+ lh = LiquidHandler(backend=STAR(), deck=STARLetDeck())
93
+ await lh.setup()
94
+
95
+ # Basic operations
96
+ await lh.pick_up_tips(tip_rack["A1:H1"])
97
+ await lh.aspirate(plate["A1"], vols=100)
98
+ await lh.dispense(plate["A2"], vols=100)
99
+ await lh.drop_tips()
100
+ ```
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: pymatgen
3
+ description: Materials science computational library. Use when working with crystal structures, phase diagrams, electronic structure, diffusion analysis, or materials property prediction. Trigger on imports of pymatgen, Structure, PhaseDiagram, or mentions of crystal lattice, band structure, DFT, materials genome.
4
+ ---
5
+ # pymatgen
6
+
7
+ Use this skill for materials science computations.
8
+
9
+ ## Core patterns
10
+
11
+ - **Structure**: `Structure.from_file('POSCAR')` / `structure.to('cif')`.
12
+ - **Lattice**: `Lattice.cubic(4.2)`, `Lattice.hexagonal(a=3.0, c=5.0)`.
13
+ - **Sites**: `structure.sites` → `site.species`, `site.coords`, `site.frac_coords`.
14
+ - **Phase diagram**: `PhaseDiagram(entries)` → `pd.get_equilibrium_reaction_energy(entry)`.
15
+ - **Diffusion**: `DiffusionAnalyzer.from_files()` for MD trajectory analysis.
16
+
17
+ ## Rules
18
+
19
+ - Always check structure validity: `structure.is_valid(tol=0.5)`.
20
+ - Use `structure.make_supercell()` for defect calculations, not manual replication.
21
+ - For DFT workflows, validate k-points and convergence parameters.
22
+
23
+ ## Anti-patterns
24
+
25
+ - Don't compare floating point coordinates directly — use `structure.matches(other)`.
26
+ - Don't create structures with overlapping sites without checking tolerance.
27
+
28
+
@@ -0,0 +1,108 @@
1
+ ---
2
+ name: pymc
3
+ description: Bayesian modeling with PyMC. Build hierarchical models, MCMC (NUTS), variational inference, LOO/WAIC comparison, posterior checks, for probabilistic programming and inference.
4
+ ---
5
+
6
+ # PyMC Bayesian Modeling
7
+
8
+ ## Overview
9
+
10
+ PyMC is a Python library for Bayesian modeling and probabilistic programming. Build, fit, validate, and compare Bayesian models using PyMC's modern API (version 5.x+), including hierarchical models, MCMC sampling (NUTS), variational inference, and model comparison (LOO, WAIC).
11
+
12
+ ## When to Use This Skill
13
+
14
+ This skill should be used when:
15
+ - Building Bayesian models (linear/logistic regression, hierarchical models, time series, etc.)
16
+ - Performing MCMC sampling or variational inference
17
+ - Conducting prior/posterior predictive checks
18
+ - Diagnosing sampling issues (divergences, convergence, ESS)
19
+ - Comparing multiple models using information criteria (LOO, WAIC)
20
+ - Implementing uncertainty quantification through Bayesian methods
21
+ - Working with hierarchical/multilevel data structures
22
+ - Handling missing data or measurement error in a principled way
23
+
24
+ ## Standard Bayesian Workflow
25
+
26
+ Follow this workflow for building and validating Bayesian models:
27
+
28
+ ### 1. Data Preparation
29
+
30
+ ```python
31
+ import pymc as pm
32
+ import arviz as az
33
+ import numpy as np
34
+
35
+ # Load and prepare data
36
+ X = ... # Predictors
37
+ y = ... # Outcomes
38
+
39
+ # Standardize predictors for better sampling
40
+ X_mean = X.mean(axis=0)
41
+ X_std = X.std(axis=0)
42
+ X_scaled = (X - X_mean) / X_std
43
+ ```
44
+
45
+ **Key practices:**
46
+ - Standardize continuous predictors (improves sampling efficiency)
47
+ - Center outcomes when possible
48
+ - Handle missing data explicitly (treat as parameters)
49
+ - Use named dimensions with `coords` for clarity
50
+
51
+ ### 2. Model Building
52
+
53
+ ```python
54
+ coords = {
55
+ 'predictors': ['var1', 'var2', 'var3'],
56
+ 'obs_id': np.arange(len(y))
57
+ }
58
+
59
+ with pm.Model(coords=coords) as model:
60
+ # Priors
61
+ alpha = pm.Normal('alpha', mu=0, sigma=1)
62
+ beta = pm.Normal('beta', mu=0, sigma=1, dims='predictors')
63
+ sigma = pm.HalfNormal('sigma', sigma=1)
64
+
65
+ # Linear predictor
66
+
67
+ ### 3. Prior Predictive Check
68
+
69
+ **Always validate priors before fitting:**
70
+
71
+ ```python
72
+ with model:
73
+ prior_pred = pm.sample_prior_predictive(samples=1000, random_seed=42)
74
+
75
+ # Visualize
76
+ az.plot_ppc(prior_pred, group='prior')
77
+ ```
78
+
79
+ **Check:**
80
+ - Do prior predictions span reasonable values?
81
+ - Are extreme values plausible given domain knowledge?
82
+ - If priors generate implausible data, adjust and re-check
83
+
84
+ ### 4. Fit Model
85
+
86
+ ```python
87
+ with model:
88
+ # Optional: Quick exploration with ADVI
89
+ # approx = pm.fit(n=20000)
90
+
91
+ # Full MCMC inference
92
+ idata = pm.sample(
93
+ draws=2000,
94
+ tune=1000,
95
+ chains=4,
96
+ target_accept=0.9,
97
+ random_seed=42,
98
+ idata_kwargs={'log_likelihood': True} # For model comparison
99
+
100
+ ### 5. Check Diagnostics
101
+
102
+ **Use the diagnostic script:**
103
+
104
+ ```python
105
+ from scripts.model_diagnostics import check_diagnostics
106
+
107
+ results = check_diagnostics(idata, var_names=['alpha', 'beta', 'sigma'])
108
+ ```