@bgicli/bgicli 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1266) hide show
  1. package/data/skills/aav-vector-design-agent/SKILL.md +198 -0
  2. package/data/skills/adaptyv/SKILL.md +112 -0
  3. package/data/skills/adhd-daily-planner/SKILL.md +271 -0
  4. package/data/skills/aeon/SKILL.md +372 -0
  5. package/data/skills/agent-browser/SKILL.md +159 -0
  6. package/data/skills/agentd-drug-discovery/SKILL.md +52 -0
  7. package/data/skills/ai-analyzer/SKILL.md +218 -0
  8. package/data/skills/alphafold/SKILL.md +183 -0
  9. package/data/skills/alphafold-database/SKILL.md +500 -0
  10. package/data/skills/anndata/SKILL.md +394 -0
  11. package/data/skills/antibody-design-agent/SKILL.md +64 -0
  12. package/data/skills/arboreto/SKILL.md +237 -0
  13. package/data/skills/armored-cart-design-agent/SKILL.md +225 -0
  14. package/data/skills/arxiv-search/SKILL.md +224 -0
  15. package/data/skills/autonomous-oncology-agent/SKILL.md +77 -0
  16. package/data/skills/bayesian-optimizer/SKILL.md +60 -0
  17. package/data/skills/benchling-integration/SKILL.md +473 -0
  18. package/data/skills/bgpt-paper-search/SKILL.md +81 -0
  19. package/data/skills/bindcraft/SKILL.md +198 -0
  20. package/data/skills/binder-design/SKILL.md +182 -0
  21. package/data/skills/binding-characterization/SKILL.md +234 -0
  22. package/data/skills/bindingdb-database/SKILL.md +332 -0
  23. package/data/skills/bio-admet-prediction/SKILL.md +224 -0
  24. package/data/skills/bio-alignment-files-bam-statistics/SKILL.md +340 -0
  25. package/data/skills/bio-alignment-filtering/SKILL.md +322 -0
  26. package/data/skills/bio-alignment-indexing/SKILL.md +249 -0
  27. package/data/skills/bio-alignment-io/SKILL.md +301 -0
  28. package/data/skills/bio-alignment-msa-parsing/SKILL.md +366 -0
  29. package/data/skills/bio-alignment-msa-statistics/SKILL.md +375 -0
  30. package/data/skills/bio-alignment-pairwise/SKILL.md +277 -0
  31. package/data/skills/bio-alignment-sorting/SKILL.md +296 -0
  32. package/data/skills/bio-alignment-validation/SKILL.md +374 -0
  33. package/data/skills/bio-atac-seq-atac-peak-calling/SKILL.md +221 -0
  34. package/data/skills/bio-atac-seq-atac-qc/SKILL.md +292 -0
  35. package/data/skills/bio-atac-seq-differential-accessibility/SKILL.md +268 -0
  36. package/data/skills/bio-atac-seq-footprinting/SKILL.md +256 -0
  37. package/data/skills/bio-atac-seq-motif-deviation/SKILL.md +319 -0
  38. package/data/skills/bio-atac-seq-nucleosome-positioning/SKILL.md +321 -0
  39. package/data/skills/bio-basecalling/SKILL.md +368 -0
  40. package/data/skills/bio-batch-downloads/SKILL.md +384 -0
  41. package/data/skills/bio-batch-processing/SKILL.md +303 -0
  42. package/data/skills/bio-bedgraph-handling/SKILL.md +336 -0
  43. package/data/skills/bio-blast-searches/SKILL.md +354 -0
  44. package/data/skills/bio-causal-genomics-colocalization-analysis/SKILL.md +264 -0
  45. package/data/skills/bio-causal-genomics-fine-mapping/SKILL.md +267 -0
  46. package/data/skills/bio-causal-genomics-mediation-analysis/SKILL.md +264 -0
  47. package/data/skills/bio-causal-genomics-mendelian-randomization/SKILL.md +221 -0
  48. package/data/skills/bio-causal-genomics-pleiotropy-detection/SKILL.md +292 -0
  49. package/data/skills/bio-cfdna-preprocessing/SKILL.md +200 -0
  50. package/data/skills/bio-chipseq-differential-binding/SKILL.md +262 -0
  51. package/data/skills/bio-chipseq-motif-analysis/SKILL.md +387 -0
  52. package/data/skills/bio-chipseq-peak-annotation/SKILL.md +239 -0
  53. package/data/skills/bio-chipseq-peak-calling/SKILL.md +277 -0
  54. package/data/skills/bio-chipseq-qc/SKILL.md +391 -0
  55. package/data/skills/bio-chipseq-super-enhancers/SKILL.md +288 -0
  56. package/data/skills/bio-chipseq-visualization/SKILL.md +289 -0
  57. package/data/skills/bio-clinical-databases-clinvar-lookup/SKILL.md +188 -0
  58. package/data/skills/bio-clinical-databases-dbsnp-queries/SKILL.md +171 -0
  59. package/data/skills/bio-clinical-databases-gnomad-frequencies/SKILL.md +205 -0
  60. package/data/skills/bio-clinical-databases-hla-typing/SKILL.md +248 -0
  61. package/data/skills/bio-clinical-databases-myvariant-queries/SKILL.md +174 -0
  62. package/data/skills/bio-clinical-databases-pharmacogenomics/SKILL.md +232 -0
  63. package/data/skills/bio-clinical-databases-polygenic-risk/SKILL.md +276 -0
  64. package/data/skills/bio-clinical-databases-somatic-signatures/SKILL.md +261 -0
  65. package/data/skills/bio-clinical-databases-tumor-mutational-burden/SKILL.md +301 -0
  66. package/data/skills/bio-clinical-databases-variant-prioritization/SKILL.md +225 -0
  67. package/data/skills/bio-clip-seq-binding-site-annotation/SKILL.md +66 -0
  68. package/data/skills/bio-clip-seq-clip-alignment/SKILL.md +70 -0
  69. package/data/skills/bio-clip-seq-clip-motif-analysis/SKILL.md +62 -0
  70. package/data/skills/bio-clip-seq-clip-peak-calling/SKILL.md +282 -0
  71. package/data/skills/bio-clip-seq-clip-preprocessing/SKILL.md +142 -0
  72. package/data/skills/bio-codon-usage/SKILL.md +353 -0
  73. package/data/skills/bio-comparative-genomics-ancestral-reconstruction/SKILL.md +312 -0
  74. package/data/skills/bio-comparative-genomics-hgt-detection/SKILL.md +341 -0
  75. package/data/skills/bio-comparative-genomics-ortholog-inference/SKILL.md +308 -0
  76. package/data/skills/bio-comparative-genomics-positive-selection/SKILL.md +354 -0
  77. package/data/skills/bio-comparative-genomics-synteny-analysis/SKILL.md +315 -0
  78. package/data/skills/bio-compressed-files/SKILL.md +263 -0
  79. package/data/skills/bio-consensus-sequences/SKILL.md +340 -0
  80. package/data/skills/bio-copy-number-cnv-annotation/SKILL.md +307 -0
  81. package/data/skills/bio-copy-number-cnv-visualization/SKILL.md +294 -0
  82. package/data/skills/bio-copy-number-cnvkit-analysis/SKILL.md +290 -0
  83. package/data/skills/bio-copy-number-gatk-cnv/SKILL.md +270 -0
  84. package/data/skills/bio-crispr-screens-base-editing-analysis/SKILL.md +110 -0
  85. package/data/skills/bio-crispr-screens-batch-correction/SKILL.md +316 -0
  86. package/data/skills/bio-crispr-screens-crispresso-editing/SKILL.md +205 -0
  87. package/data/skills/bio-crispr-screens-hit-calling/SKILL.md +264 -0
  88. package/data/skills/bio-crispr-screens-jacks-analysis/SKILL.md +313 -0
  89. package/data/skills/bio-crispr-screens-library-design/SKILL.md +417 -0
  90. package/data/skills/bio-crispr-screens-mageck-analysis/SKILL.md +222 -0
  91. package/data/skills/bio-crispr-screens-screen-qc/SKILL.md +243 -0
  92. package/data/skills/bio-ctdna-mutation-detection/SKILL.md +234 -0
  93. package/data/skills/bio-data-visualization-circos-plots/SKILL.md +405 -0
  94. package/data/skills/bio-data-visualization-color-palettes/SKILL.md +244 -0
  95. package/data/skills/bio-data-visualization-genome-browser-tracks/SKILL.md +328 -0
  96. package/data/skills/bio-data-visualization-genome-tracks/SKILL.md +249 -0
  97. package/data/skills/bio-data-visualization-ggplot2-fundamentals/SKILL.md +313 -0
  98. package/data/skills/bio-data-visualization-heatmaps-clustering/SKILL.md +227 -0
  99. package/data/skills/bio-data-visualization-interactive-visualization/SKILL.md +210 -0
  100. package/data/skills/bio-data-visualization-multipanel-figures/SKILL.md +274 -0
  101. package/data/skills/bio-data-visualization-specialized-omics-plots/SKILL.md +251 -0
  102. package/data/skills/bio-data-visualization-upset-plots/SKILL.md +228 -0
  103. package/data/skills/bio-data-visualization-volcano-customization/SKILL.md +233 -0
  104. package/data/skills/bio-de-deseq2-basics/SKILL.md +376 -0
  105. package/data/skills/bio-de-edger-basics/SKILL.md +418 -0
  106. package/data/skills/bio-de-results/SKILL.md +378 -0
  107. package/data/skills/bio-de-visualization/SKILL.md +408 -0
  108. package/data/skills/bio-differential-expression-batch-correction/SKILL.md +253 -0
  109. package/data/skills/bio-differential-expression-timeseries-de/SKILL.md +370 -0
  110. package/data/skills/bio-differential-splicing/SKILL.md +177 -0
  111. package/data/skills/bio-duplicate-handling/SKILL.md +292 -0
  112. package/data/skills/bio-entrez-fetch/SKILL.md +334 -0
  113. package/data/skills/bio-entrez-link/SKILL.md +325 -0
  114. package/data/skills/bio-entrez-search/SKILL.md +311 -0
  115. package/data/skills/bio-epidemiological-genomics-amr-surveillance/SKILL.md +233 -0
  116. package/data/skills/bio-epidemiological-genomics-pathogen-typing/SKILL.md +202 -0
  117. package/data/skills/bio-epidemiological-genomics-phylodynamics/SKILL.md +207 -0
  118. package/data/skills/bio-epidemiological-genomics-transmission-inference/SKILL.md +237 -0
  119. package/data/skills/bio-epidemiological-genomics-variant-surveillance/SKILL.md +237 -0
  120. package/data/skills/bio-epitranscriptomics-m6a-differential/SKILL.md +88 -0
  121. package/data/skills/bio-epitranscriptomics-m6a-peak-calling/SKILL.md +89 -0
  122. package/data/skills/bio-epitranscriptomics-m6anet-analysis/SKILL.md +101 -0
  123. package/data/skills/bio-epitranscriptomics-merip-preprocessing/SKILL.md +81 -0
  124. package/data/skills/bio-epitranscriptomics-modification-visualization/SKILL.md +98 -0
  125. package/data/skills/bio-experimental-design-batch-design/SKILL.md +110 -0
  126. package/data/skills/bio-experimental-design-multiple-testing/SKILL.md +98 -0
  127. package/data/skills/bio-experimental-design-power-analysis/SKILL.md +84 -0
  128. package/data/skills/bio-experimental-design-sample-size/SKILL.md +93 -0
  129. package/data/skills/bio-expression-matrix-counts-ingest/SKILL.md +220 -0
  130. package/data/skills/bio-expression-matrix-gene-id-mapping/SKILL.md +256 -0
  131. package/data/skills/bio-expression-matrix-metadata-joins/SKILL.md +271 -0
  132. package/data/skills/bio-expression-matrix-sparse-handling/SKILL.md +247 -0
  133. package/data/skills/bio-fastq-quality/SKILL.md +279 -0
  134. package/data/skills/bio-filter-sequences/SKILL.md +265 -0
  135. package/data/skills/bio-flow-cytometry-bead-normalization/SKILL.md +315 -0
  136. package/data/skills/bio-flow-cytometry-clustering-phenotyping/SKILL.md +237 -0
  137. package/data/skills/bio-flow-cytometry-compensation-transformation/SKILL.md +196 -0
  138. package/data/skills/bio-flow-cytometry-cytometry-qc/SKILL.md +382 -0
  139. package/data/skills/bio-flow-cytometry-differential-analysis/SKILL.md +217 -0
  140. package/data/skills/bio-flow-cytometry-doublet-detection/SKILL.md +288 -0
  141. package/data/skills/bio-flow-cytometry-fcs-handling/SKILL.md +221 -0
  142. package/data/skills/bio-flow-cytometry-gating-analysis/SKILL.md +193 -0
  143. package/data/skills/bio-format-conversion/SKILL.md +193 -0
  144. package/data/skills/bio-fragment-analysis/SKILL.md +214 -0
  145. package/data/skills/bio-gatk-variant-calling/SKILL.md +422 -0
  146. package/data/skills/bio-genome-assembly-assembly-polishing/SKILL.md +333 -0
  147. package/data/skills/bio-genome-assembly-assembly-qc/SKILL.md +344 -0
  148. package/data/skills/bio-genome-assembly-contamination-detection/SKILL.md +235 -0
  149. package/data/skills/bio-genome-assembly-hifi-assembly/SKILL.md +178 -0
  150. package/data/skills/bio-genome-assembly-long-read-assembly/SKILL.md +307 -0
  151. package/data/skills/bio-genome-assembly-metagenome-assembly/SKILL.md +227 -0
  152. package/data/skills/bio-genome-assembly-scaffolding/SKILL.md +204 -0
  153. package/data/skills/bio-genome-assembly-short-read-assembly/SKILL.md +319 -0
  154. package/data/skills/bio-genome-engineering-base-editing-design/SKILL.md +277 -0
  155. package/data/skills/bio-genome-engineering-grna-design/SKILL.md +221 -0
  156. package/data/skills/bio-genome-engineering-hdr-template-design/SKILL.md +264 -0
  157. package/data/skills/bio-genome-engineering-off-target-prediction/SKILL.md +232 -0
  158. package/data/skills/bio-genome-engineering-prime-editing-design/SKILL.md +275 -0
  159. package/data/skills/bio-genome-intervals-bed-file-basics/SKILL.md +357 -0
  160. package/data/skills/bio-genome-intervals-bigwig-tracks/SKILL.md +351 -0
  161. package/data/skills/bio-genome-intervals-coverage-analysis/SKILL.md +300 -0
  162. package/data/skills/bio-genome-intervals-gtf-gff-handling/SKILL.md +345 -0
  163. package/data/skills/bio-genome-intervals-interval-arithmetic/SKILL.md +485 -0
  164. package/data/skills/bio-genome-intervals-proximity-operations/SKILL.md +337 -0
  165. package/data/skills/bio-geo-data/SKILL.md +380 -0
  166. package/data/skills/bio-hi-c-analysis-compartment-analysis/SKILL.md +261 -0
  167. package/data/skills/bio-hi-c-analysis-contact-pairs/SKILL.md +278 -0
  168. package/data/skills/bio-hi-c-analysis-hic-data-io/SKILL.md +260 -0
  169. package/data/skills/bio-hi-c-analysis-hic-differential/SKILL.md +328 -0
  170. package/data/skills/bio-hi-c-analysis-hic-visualization/SKILL.md +297 -0
  171. package/data/skills/bio-hi-c-analysis-loop-calling/SKILL.md +284 -0
  172. package/data/skills/bio-hi-c-analysis-matrix-operations/SKILL.md +274 -0
  173. package/data/skills/bio-hi-c-analysis-tad-detection/SKILL.md +239 -0
  174. package/data/skills/bio-imaging-mass-cytometry-cell-segmentation/SKILL.md +241 -0
  175. package/data/skills/bio-imaging-mass-cytometry-data-preprocessing/SKILL.md +279 -0
  176. package/data/skills/bio-imaging-mass-cytometry-interactive-annotation/SKILL.md +304 -0
  177. package/data/skills/bio-imaging-mass-cytometry-phenotyping/SKILL.md +231 -0
  178. package/data/skills/bio-imaging-mass-cytometry-quality-metrics/SKILL.md +316 -0
  179. package/data/skills/bio-imaging-mass-cytometry-spatial-analysis/SKILL.md +246 -0
  180. package/data/skills/bio-immunoinformatics-epitope-prediction/SKILL.md +259 -0
  181. package/data/skills/bio-immunoinformatics-immunogenicity-scoring/SKILL.md +275 -0
  182. package/data/skills/bio-immunoinformatics-mhc-binding-prediction/SKILL.md +260 -0
  183. package/data/skills/bio-immunoinformatics-neoantigen-prediction/SKILL.md +277 -0
  184. package/data/skills/bio-immunoinformatics-tcr-epitope-binding/SKILL.md +257 -0
  185. package/data/skills/bio-isoform-switching/SKILL.md +192 -0
  186. package/data/skills/bio-liquid-biopsy-pipeline/SKILL.md +311 -0
  187. package/data/skills/bio-local-blast/SKILL.md +350 -0
  188. package/data/skills/bio-long-read-sequencing-clair3-variants/SKILL.md +252 -0
  189. package/data/skills/bio-long-read-sequencing-isoseq-analysis/SKILL.md +334 -0
  190. package/data/skills/bio-long-read-sequencing-nanopore-methylation/SKILL.md +110 -0
  191. package/data/skills/bio-longitudinal-monitoring/SKILL.md +271 -0
  192. package/data/skills/bio-longread-alignment/SKILL.md +193 -0
  193. package/data/skills/bio-longread-medaka/SKILL.md +176 -0
  194. package/data/skills/bio-longread-qc/SKILL.md +224 -0
  195. package/data/skills/bio-longread-structural-variants/SKILL.md +201 -0
  196. package/data/skills/bio-machine-learning-atlas-mapping/SKILL.md +139 -0
  197. package/data/skills/bio-machine-learning-biomarker-discovery/SKILL.md +157 -0
  198. package/data/skills/bio-machine-learning-model-validation/SKILL.md +148 -0
  199. package/data/skills/bio-machine-learning-omics-classifiers/SKILL.md +146 -0
  200. package/data/skills/bio-machine-learning-prediction-explanation/SKILL.md +162 -0
  201. package/data/skills/bio-machine-learning-survival-analysis/SKILL.md +176 -0
  202. package/data/skills/bio-metabolomics-lipidomics/SKILL.md +265 -0
  203. package/data/skills/bio-metabolomics-metabolite-annotation/SKILL.md +241 -0
  204. package/data/skills/bio-metabolomics-msdial-preprocessing/SKILL.md +308 -0
  205. package/data/skills/bio-metabolomics-normalization-qc/SKILL.md +283 -0
  206. package/data/skills/bio-metabolomics-pathway-mapping/SKILL.md +237 -0
  207. package/data/skills/bio-metabolomics-statistical-analysis/SKILL.md +276 -0
  208. package/data/skills/bio-metabolomics-targeted-analysis/SKILL.md +314 -0
  209. package/data/skills/bio-metabolomics-xcms-preprocessing/SKILL.md +268 -0
  210. package/data/skills/bio-metagenomics-abundance/SKILL.md +203 -0
  211. package/data/skills/bio-metagenomics-amr-detection/SKILL.md +293 -0
  212. package/data/skills/bio-metagenomics-functional-profiling/SKILL.md +252 -0
  213. package/data/skills/bio-metagenomics-kraken/SKILL.md +204 -0
  214. package/data/skills/bio-metagenomics-metaphlan/SKILL.md +214 -0
  215. package/data/skills/bio-metagenomics-strain-tracking/SKILL.md +292 -0
  216. package/data/skills/bio-metagenomics-visualization/SKILL.md +240 -0
  217. package/data/skills/bio-methylation-based-detection/SKILL.md +223 -0
  218. package/data/skills/bio-methylation-bismark-alignment/SKILL.md +195 -0
  219. package/data/skills/bio-methylation-calling/SKILL.md +200 -0
  220. package/data/skills/bio-methylation-dmr-detection/SKILL.md +211 -0
  221. package/data/skills/bio-methylation-methylkit/SKILL.md +219 -0
  222. package/data/skills/bio-microbiome-amplicon-processing/SKILL.md +137 -0
  223. package/data/skills/bio-microbiome-differential-abundance/SKILL.md +147 -0
  224. package/data/skills/bio-microbiome-diversity-analysis/SKILL.md +188 -0
  225. package/data/skills/bio-microbiome-functional-prediction/SKILL.md +153 -0
  226. package/data/skills/bio-microbiome-qiime2-workflow/SKILL.md +219 -0
  227. package/data/skills/bio-microbiome-taxonomy-assignment/SKILL.md +168 -0
  228. package/data/skills/bio-molecular-descriptors/SKILL.md +200 -0
  229. package/data/skills/bio-molecular-io/SKILL.md +188 -0
  230. package/data/skills/bio-motif-search/SKILL.md +354 -0
  231. package/data/skills/bio-multi-omics-data-harmonization/SKILL.md +228 -0
  232. package/data/skills/bio-multi-omics-mixomics-analysis/SKILL.md +221 -0
  233. package/data/skills/bio-multi-omics-mofa-integration/SKILL.md +225 -0
  234. package/data/skills/bio-multi-omics-similarity-network/SKILL.md +235 -0
  235. package/data/skills/bio-orchestrator/SKILL.md +133 -0
  236. package/data/skills/bio-paired-end-fastq/SKILL.md +334 -0
  237. package/data/skills/bio-pathway-enrichment-visualization/SKILL.md +278 -0
  238. package/data/skills/bio-pathway-go-enrichment/SKILL.md +218 -0
  239. package/data/skills/bio-pathway-gsea/SKILL.md +227 -0
  240. package/data/skills/bio-pathway-kegg-pathways/SKILL.md +234 -0
  241. package/data/skills/bio-pathway-reactome/SKILL.md +215 -0
  242. package/data/skills/bio-pathway-wikipathways/SKILL.md +255 -0
  243. package/data/skills/bio-pdb-geometric-analysis/SKILL.md +475 -0
  244. package/data/skills/bio-pdb-structure-io/SKILL.md +296 -0
  245. package/data/skills/bio-pdb-structure-modification/SKILL.md +448 -0
  246. package/data/skills/bio-pdb-structure-navigation/SKILL.md +335 -0
  247. package/data/skills/bio-phasing-imputation-genotype-imputation/SKILL.md +201 -0
  248. package/data/skills/bio-phasing-imputation-haplotype-phasing/SKILL.md +190 -0
  249. package/data/skills/bio-phasing-imputation-imputation-qc/SKILL.md +265 -0
  250. package/data/skills/bio-phasing-imputation-reference-panels/SKILL.md +203 -0
  251. package/data/skills/bio-phylo-distance-calculations/SKILL.md +307 -0
  252. package/data/skills/bio-phylo-modern-tree-inference/SKILL.md +274 -0
  253. package/data/skills/bio-phylo-tree-io/SKILL.md +252 -0
  254. package/data/skills/bio-phylo-tree-manipulation/SKILL.md +375 -0
  255. package/data/skills/bio-phylo-tree-visualization/SKILL.md +275 -0
  256. package/data/skills/bio-pileup-generation/SKILL.md +314 -0
  257. package/data/skills/bio-population-genetics-association-testing/SKILL.md +293 -0
  258. package/data/skills/bio-population-genetics-linkage-disequilibrium/SKILL.md +260 -0
  259. package/data/skills/bio-population-genetics-plink-basics/SKILL.md +338 -0
  260. package/data/skills/bio-population-genetics-population-structure/SKILL.md +352 -0
  261. package/data/skills/bio-population-genetics-scikit-allel-analysis/SKILL.md +306 -0
  262. package/data/skills/bio-population-genetics-selection-statistics/SKILL.md +251 -0
  263. package/data/skills/bio-primer-design-primer-basics/SKILL.md +289 -0
  264. package/data/skills/bio-primer-design-primer-validation/SKILL.md +344 -0
  265. package/data/skills/bio-primer-design-qpcr-primers/SKILL.md +273 -0
  266. package/data/skills/bio-proteomics-data-import/SKILL.md +122 -0
  267. package/data/skills/bio-proteomics-dia-analysis/SKILL.md +246 -0
  268. package/data/skills/bio-proteomics-differential-abundance/SKILL.md +129 -0
  269. package/data/skills/bio-proteomics-peptide-identification/SKILL.md +122 -0
  270. package/data/skills/bio-proteomics-protein-inference/SKILL.md +174 -0
  271. package/data/skills/bio-proteomics-proteomics-qc/SKILL.md +208 -0
  272. package/data/skills/bio-proteomics-ptm-analysis/SKILL.md +139 -0
  273. package/data/skills/bio-proteomics-quantification/SKILL.md +141 -0
  274. package/data/skills/bio-proteomics-spectral-libraries/SKILL.md +270 -0
  275. package/data/skills/bio-reaction-enumeration/SKILL.md +251 -0
  276. package/data/skills/bio-read-alignment-bowtie2-alignment/SKILL.md +189 -0
  277. package/data/skills/bio-read-alignment-bwa-alignment/SKILL.md +166 -0
  278. package/data/skills/bio-read-alignment-hisat2-alignment/SKILL.md +205 -0
  279. package/data/skills/bio-read-alignment-star-alignment/SKILL.md +204 -0
  280. package/data/skills/bio-read-qc-adapter-trimming/SKILL.md +222 -0
  281. package/data/skills/bio-read-qc-contamination-screening/SKILL.md +252 -0
  282. package/data/skills/bio-read-qc-fastp-workflow/SKILL.md +278 -0
  283. package/data/skills/bio-read-qc-quality-filtering/SKILL.md +231 -0
  284. package/data/skills/bio-read-qc-quality-reports/SKILL.md +204 -0
  285. package/data/skills/bio-read-qc-umi-processing/SKILL.md +391 -0
  286. package/data/skills/bio-read-sequences/SKILL.md +319 -0
  287. package/data/skills/bio-reference-operations/SKILL.md +302 -0
  288. package/data/skills/bio-reporting-automated-qc-reports/SKILL.md +103 -0
  289. package/data/skills/bio-reporting-figure-export/SKILL.md +112 -0
  290. package/data/skills/bio-reporting-jupyter-reports/SKILL.md +98 -0
  291. package/data/skills/bio-reporting-quarto-reports/SKILL.md +295 -0
  292. package/data/skills/bio-reporting-rmarkdown-reports/SKILL.md +276 -0
  293. package/data/skills/bio-research-tools-biomarker-signature-studio/SKILL.md +99 -0
  294. package/data/skills/bio-restriction-enzyme-selection/SKILL.md +342 -0
  295. package/data/skills/bio-restriction-fragment-analysis/SKILL.md +259 -0
  296. package/data/skills/bio-restriction-mapping/SKILL.md +239 -0
  297. package/data/skills/bio-restriction-sites/SKILL.md +222 -0
  298. package/data/skills/bio-reverse-complement/SKILL.md +250 -0
  299. package/data/skills/bio-ribo-seq-orf-detection/SKILL.md +303 -0
  300. package/data/skills/bio-ribo-seq-riboseq-preprocessing/SKILL.md +176 -0
  301. package/data/skills/bio-ribo-seq-ribosome-periodicity/SKILL.md +182 -0
  302. package/data/skills/bio-ribo-seq-ribosome-stalling/SKILL.md +217 -0
  303. package/data/skills/bio-ribo-seq-translation-efficiency/SKILL.md +183 -0
  304. package/data/skills/bio-rna-quantification-alignment-free-quant/SKILL.md +226 -0
  305. package/data/skills/bio-rna-quantification-count-matrix-qc/SKILL.md +310 -0
  306. package/data/skills/bio-rna-quantification-featurecounts-counting/SKILL.md +190 -0
  307. package/data/skills/bio-rna-quantification-tximport-workflow/SKILL.md +240 -0
  308. package/data/skills/bio-rnaseq-qc/SKILL.md +320 -0
  309. package/data/skills/bio-sam-bam-basics/SKILL.md +248 -0
  310. package/data/skills/bio-sashimi-plots/SKILL.md +175 -0
  311. package/data/skills/bio-seq-objects/SKILL.md +240 -0
  312. package/data/skills/bio-sequence-properties/SKILL.md +397 -0
  313. package/data/skills/bio-sequence-similarity/SKILL.md +335 -0
  314. package/data/skills/bio-sequence-slicing/SKILL.md +232 -0
  315. package/data/skills/bio-sequence-statistics/SKILL.md +318 -0
  316. package/data/skills/bio-similarity-searching/SKILL.md +200 -0
  317. package/data/skills/bio-single-cell-batch-integration/SKILL.md +317 -0
  318. package/data/skills/bio-single-cell-cell-annotation/SKILL.md +259 -0
  319. package/data/skills/bio-single-cell-cell-communication/SKILL.md +257 -0
  320. package/data/skills/bio-single-cell-clustering/SKILL.md +330 -0
  321. package/data/skills/bio-single-cell-data-io/SKILL.md +315 -0
  322. package/data/skills/bio-single-cell-doublet-detection/SKILL.md +362 -0
  323. package/data/skills/bio-single-cell-lineage-tracing/SKILL.md +319 -0
  324. package/data/skills/bio-single-cell-markers-annotation/SKILL.md +317 -0
  325. package/data/skills/bio-single-cell-metabolite-communication/SKILL.md +258 -0
  326. package/data/skills/bio-single-cell-multimodal-integration/SKILL.md +242 -0
  327. package/data/skills/bio-single-cell-perturb-seq/SKILL.md +258 -0
  328. package/data/skills/bio-single-cell-preprocessing/SKILL.md +338 -0
  329. package/data/skills/bio-single-cell-scatac-analysis/SKILL.md +326 -0
  330. package/data/skills/bio-single-cell-splicing/SKILL.md +199 -0
  331. package/data/skills/bio-single-cell-trajectory-inference/SKILL.md +225 -0
  332. package/data/skills/bio-small-rna-seq-differential-mirna/SKILL.md +194 -0
  333. package/data/skills/bio-small-rna-seq-mirdeep2-analysis/SKILL.md +180 -0
  334. package/data/skills/bio-small-rna-seq-mirge3-analysis/SKILL.md +178 -0
  335. package/data/skills/bio-small-rna-seq-smrna-preprocessing/SKILL.md +174 -0
  336. package/data/skills/bio-small-rna-seq-target-prediction/SKILL.md +202 -0
  337. package/data/skills/bio-spatial-transcriptomics-image-analysis/SKILL.md +283 -0
  338. package/data/skills/bio-spatial-transcriptomics-spatial-communication/SKILL.md +299 -0
  339. package/data/skills/bio-spatial-transcriptomics-spatial-data-io/SKILL.md +272 -0
  340. package/data/skills/bio-spatial-transcriptomics-spatial-deconvolution/SKILL.md +314 -0
  341. package/data/skills/bio-spatial-transcriptomics-spatial-domains/SKILL.md +254 -0
  342. package/data/skills/bio-spatial-transcriptomics-spatial-multiomics/SKILL.md +181 -0
  343. package/data/skills/bio-spatial-transcriptomics-spatial-neighbors/SKILL.md +198 -0
  344. package/data/skills/bio-spatial-transcriptomics-spatial-preprocessing/SKILL.md +269 -0
  345. package/data/skills/bio-spatial-transcriptomics-spatial-proteomics/SKILL.md +124 -0
  346. package/data/skills/bio-spatial-transcriptomics-spatial-statistics/SKILL.md +237 -0
  347. package/data/skills/bio-spatial-transcriptomics-spatial-visualization/SKILL.md +287 -0
  348. package/data/skills/bio-splicing-pipeline/SKILL.md +253 -0
  349. package/data/skills/bio-splicing-qc/SKILL.md +190 -0
  350. package/data/skills/bio-splicing-quantification/SKILL.md +145 -0
  351. package/data/skills/bio-sra-data/SKILL.md +363 -0
  352. package/data/skills/bio-structural-biology-alphafold-predictions/SKILL.md +258 -0
  353. package/data/skills/bio-structural-biology-modern-structure-prediction/SKILL.md +346 -0
  354. package/data/skills/bio-substructure-search/SKILL.md +206 -0
  355. package/data/skills/bio-systems-biology-context-specific-models/SKILL.md +241 -0
  356. package/data/skills/bio-systems-biology-flux-balance-analysis/SKILL.md +206 -0
  357. package/data/skills/bio-systems-biology-gene-essentiality/SKILL.md +235 -0
  358. package/data/skills/bio-systems-biology-metabolic-reconstruction/SKILL.md +215 -0
  359. package/data/skills/bio-systems-biology-model-curation/SKILL.md +243 -0
  360. package/data/skills/bio-tcr-bcr-analysis-immcantation-analysis/SKILL.md +195 -0
  361. package/data/skills/bio-tcr-bcr-analysis-mixcr-analysis/SKILL.md +167 -0
  362. package/data/skills/bio-tcr-bcr-analysis-repertoire-visualization/SKILL.md +224 -0
  363. package/data/skills/bio-tcr-bcr-analysis-scirpy-analysis/SKILL.md +168 -0
  364. package/data/skills/bio-tcr-bcr-analysis-vdjtools-analysis/SKILL.md +188 -0
  365. package/data/skills/bio-transcription-translation/SKILL.md +237 -0
  366. package/data/skills/bio-tumor-fraction-estimation/SKILL.md +211 -0
  367. package/data/skills/bio-uniprot-access/SKILL.md +239 -0
  368. package/data/skills/bio-variant-annotation/SKILL.md +410 -0
  369. package/data/skills/bio-variant-calling/SKILL.md +266 -0
  370. package/data/skills/bio-variant-calling-clinical-interpretation/SKILL.md +355 -0
  371. package/data/skills/bio-variant-calling-deepvariant/SKILL.md +315 -0
  372. package/data/skills/bio-variant-calling-filtering-best-practices/SKILL.md +403 -0
  373. package/data/skills/bio-variant-calling-joint-calling/SKILL.md +338 -0
  374. package/data/skills/bio-variant-calling-structural-variant-calling/SKILL.md +253 -0
  375. package/data/skills/bio-variant-normalization/SKILL.md +325 -0
  376. package/data/skills/bio-vcf-basics/SKILL.md +342 -0
  377. package/data/skills/bio-vcf-manipulation/SKILL.md +429 -0
  378. package/data/skills/bio-vcf-statistics/SKILL.md +445 -0
  379. package/data/skills/bio-virtual-screening/SKILL.md +263 -0
  380. package/data/skills/bio-workflow-management-cwl-workflows/SKILL.md +433 -0
  381. package/data/skills/bio-workflow-management-nextflow-pipelines/SKILL.md +386 -0
  382. package/data/skills/bio-workflow-management-snakemake-workflows/SKILL.md +383 -0
  383. package/data/skills/bio-workflow-management-wdl-workflows/SKILL.md +500 -0
  384. package/data/skills/bio-workflows-atacseq-pipeline/SKILL.md +362 -0
  385. package/data/skills/bio-workflows-biomarker-pipeline/SKILL.md +272 -0
  386. package/data/skills/bio-workflows-chipseq-pipeline/SKILL.md +282 -0
  387. package/data/skills/bio-workflows-clip-pipeline/SKILL.md +268 -0
  388. package/data/skills/bio-workflows-cnv-pipeline/SKILL.md +324 -0
  389. package/data/skills/bio-workflows-crispr-editing-pipeline/SKILL.md +455 -0
  390. package/data/skills/bio-workflows-crispr-screen-pipeline/SKILL.md +278 -0
  391. package/data/skills/bio-workflows-cytometry-pipeline/SKILL.md +328 -0
  392. package/data/skills/bio-workflows-expression-to-pathways/SKILL.md +329 -0
  393. package/data/skills/bio-workflows-fastq-to-variants/SKILL.md +374 -0
  394. package/data/skills/bio-workflows-genome-assembly-pipeline/SKILL.md +290 -0
  395. package/data/skills/bio-workflows-gwas-pipeline/SKILL.md +323 -0
  396. package/data/skills/bio-workflows-hic-pipeline/SKILL.md +304 -0
  397. package/data/skills/bio-workflows-imc-pipeline/SKILL.md +304 -0
  398. package/data/skills/bio-workflows-longread-sv-pipeline/SKILL.md +281 -0
  399. package/data/skills/bio-workflows-merip-pipeline/SKILL.md +222 -0
  400. package/data/skills/bio-workflows-metabolic-modeling-pipeline/SKILL.md +408 -0
  401. package/data/skills/bio-workflows-metabolomics-pipeline/SKILL.md +297 -0
  402. package/data/skills/bio-workflows-metagenomics-pipeline/SKILL.md +283 -0
  403. package/data/skills/bio-workflows-methylation-pipeline/SKILL.md +274 -0
  404. package/data/skills/bio-workflows-microbiome-pipeline/SKILL.md +221 -0
  405. package/data/skills/bio-workflows-multi-omics-pipeline/SKILL.md +362 -0
  406. package/data/skills/bio-workflows-multiome-pipeline/SKILL.md +298 -0
  407. package/data/skills/bio-workflows-neoantigen-pipeline/SKILL.md +325 -0
  408. package/data/skills/bio-workflows-outbreak-pipeline/SKILL.md +341 -0
  409. package/data/skills/bio-workflows-proteomics-pipeline/SKILL.md +226 -0
  410. package/data/skills/bio-workflows-riboseq-pipeline/SKILL.md +94 -0
  411. package/data/skills/bio-workflows-rnaseq-to-de/SKILL.md +345 -0
  412. package/data/skills/bio-workflows-scrnaseq-pipeline/SKILL.md +354 -0
  413. package/data/skills/bio-workflows-smrna-pipeline/SKILL.md +86 -0
  414. package/data/skills/bio-workflows-somatic-variant-pipeline/SKILL.md +313 -0
  415. package/data/skills/bio-workflows-spatial-pipeline/SKILL.md +267 -0
  416. package/data/skills/bio-workflows-tcr-pipeline/SKILL.md +84 -0
  417. package/data/skills/bio-write-sequences/SKILL.md +205 -0
  418. package/data/skills/bioinformatics-singlecell/SKILL.md +143 -0
  419. package/data/skills/biokernel/SKILL.md +61 -0
  420. package/data/skills/biologist-analyst/SKILL.md +799 -0
  421. package/data/skills/biomaster-workflows/SKILL.md +55 -0
  422. package/data/skills/biomcp-server/SKILL.md +65 -0
  423. package/data/skills/biomedical-data-analysis/SKILL.md +56 -0
  424. package/data/skills/biomedical-search/SKILL.md +214 -0
  425. package/data/skills/biomni/SKILL.md +309 -0
  426. package/data/skills/biomni-general-agent/SKILL.md +43 -0
  427. package/data/skills/biomni-research-agent/SKILL.md +76 -0
  428. package/data/skills/biopython/SKILL.md +437 -0
  429. package/data/skills/biorxiv-database/SKILL.md +477 -0
  430. package/data/skills/bioservices/SKILL.md +355 -0
  431. package/data/skills/boltz/SKILL.md +188 -0
  432. package/data/skills/boltzgen/SKILL.md +287 -0
  433. package/data/skills/bone-marrow-ai-agent/SKILL.md +163 -0
  434. package/data/skills/brainstorming/SKILL.md +96 -0
  435. package/data/skills/brenda-database/SKILL.md +714 -0
  436. package/data/skills/bulk-combat-correction/SKILL.md +54 -0
  437. package/data/skills/bulk-deg-analysis/SKILL.md +61 -0
  438. package/data/skills/bulk-deseq2-analysis/SKILL.md +50 -0
  439. package/data/skills/bulk-stringdb-ppi/SKILL.md +49 -0
  440. package/data/skills/bulk-to-single-deconvolution/SKILL.md +50 -0
  441. package/data/skills/bulk-trajblend-interpolation/SKILL.md +52 -0
  442. package/data/skills/bulk-wgcna-analysis/SKILL.md +56 -0
  443. package/data/skills/cancer-metabolism-agent/SKILL.md +180 -0
  444. package/data/skills/care-coordination/SKILL.md +35 -0
  445. package/data/skills/cart-design-optimizer-agent/SKILL.md +162 -0
  446. package/data/skills/cbioportal-database/SKILL.md +367 -0
  447. package/data/skills/cell-free-expression/SKILL.md +291 -0
  448. package/data/skills/cellagent-annotation/SKILL.md +69 -0
  449. package/data/skills/cellfree-rna-agent/SKILL.md +182 -0
  450. package/data/skills/cellular-senescence-agent/SKILL.md +183 -0
  451. package/data/skills/cellxgene-census/SKILL.md +505 -0
  452. package/data/skills/chai/SKILL.md +272 -0
  453. package/data/skills/chatehr-clinician-assistant/SKILL.md +67 -0
  454. package/data/skills/chematagent-drug-discovery/SKILL.md +68 -0
  455. package/data/skills/chembl-database/SKILL.md +383 -0
  456. package/data/skills/chembl-search/SKILL.md +211 -0
  457. package/data/skills/chemcrow-drug-discovery/SKILL.md +61 -0
  458. package/data/skills/chemical-property-lookup/SKILL.md +42 -0
  459. package/data/skills/chemist-analyst/SKILL.md +1603 -0
  460. package/data/skills/chemistry-agent/SKILL.md +62 -0
  461. package/data/skills/chip-clonal-hematopoiesis-agent/SKILL.md +224 -0
  462. package/data/skills/chromosomal-instability-agent/SKILL.md +187 -0
  463. package/data/skills/citation-management/SKILL.md +1081 -0
  464. package/data/skills/claims-appeals/SKILL.md +35 -0
  465. package/data/skills/claw-ancestry-pca/SKILL.md +145 -0
  466. package/data/skills/claw-metagenomics/SKILL.md +238 -0
  467. package/data/skills/claw-semantic-sim/SKILL.md +151 -0
  468. package/data/skills/clinical-decision-support/SKILL.md +504 -0
  469. package/data/skills/clinical-diagnostic-reasoning/SKILL.md +222 -0
  470. package/data/skills/clinical-nlp-extractor/SKILL.md +59 -0
  471. package/data/skills/clinical-note-summarization/SKILL.md +52 -0
  472. package/data/skills/clinical-reports/SKILL.md +1127 -0
  473. package/data/skills/clinical-trial-protocol-skill/SKILL.md +508 -0
  474. package/data/skills/clinical-trials-search/SKILL.md +211 -0
  475. package/data/skills/clinicaltrials-database/SKILL.md +501 -0
  476. package/data/skills/clinpgx/SKILL.md +96 -0
  477. package/data/skills/clinpgx-database/SKILL.md +632 -0
  478. package/data/skills/clinvar-database/SKILL.md +356 -0
  479. package/data/skills/cnv-caller-agent/SKILL.md +171 -0
  480. package/data/skills/coagulation-thrombosis-agent/SKILL.md +141 -0
  481. package/data/skills/cobrapy/SKILL.md +457 -0
  482. package/data/skills/compbioagent-explorer/SKILL.md +67 -0
  483. package/data/skills/computational-pathology-agent/SKILL.md +72 -0
  484. package/data/skills/convergence-study/SKILL.md +98 -0
  485. package/data/skills/cosmic-database/SKILL.md +330 -0
  486. package/data/skills/crisis-detection-intervention-ai/SKILL.md +569 -0
  487. package/data/skills/crisis-response-protocol/SKILL.md +456 -0
  488. package/data/skills/crispr-guide-design/SKILL.md +72 -0
  489. package/data/skills/crispr-offtarget-predictor/SKILL.md +56 -0
  490. package/data/skills/cryoem-ai-drug-design-agent/SKILL.md +216 -0
  491. package/data/skills/ctdna-dynamics-mrd-agent/SKILL.md +206 -0
  492. package/data/skills/cytokine-storm-analysis-agent/SKILL.md +180 -0
  493. package/data/skills/dask/SKILL.md +454 -0
  494. package/data/skills/data-stats-analysis/SKILL.md +477 -0
  495. package/data/skills/data-transform/SKILL.md +576 -0
  496. package/data/skills/data-visualization-biomedical/SKILL.md +252 -0
  497. package/data/skills/data-visualization-expert/SKILL.md +72 -0
  498. package/data/skills/data-viz-plots/SKILL.md +461 -0
  499. package/data/skills/datacommons-client/SKILL.md +253 -0
  500. package/data/skills/datamol/SKILL.md +700 -0
  501. package/data/skills/deep-research/SKILL.md +111 -0
  502. package/data/skills/deep-research-swarm/SKILL.md +62 -0
  503. package/data/skills/deep-visual-proteomics-agent/SKILL.md +149 -0
  504. package/data/skills/deepchem/SKILL.md +591 -0
  505. package/data/skills/deeptools/SKILL.md +525 -0
  506. package/data/skills/depmap/SKILL.md +300 -0
  507. package/data/skills/diffdock/SKILL.md +477 -0
  508. package/data/skills/differentiation-schemes/SKILL.md +159 -0
  509. package/data/skills/digital-twin-clinical-agent/SKILL.md +228 -0
  510. package/data/skills/dispatching-parallel-agents/SKILL.md +180 -0
  511. package/data/skills/dnanexus-integration/SKILL.md +376 -0
  512. package/data/skills/doc-coauthoring/SKILL.md +375 -0
  513. package/data/skills/docx/SKILL.md +590 -0
  514. package/data/skills/docx-official/SKILL.md +197 -0
  515. package/data/skills/drug-discovery-search/SKILL.md +214 -0
  516. package/data/skills/drug-interaction-checker/SKILL.md +56 -0
  517. package/data/skills/drug-labels-search/SKILL.md +211 -0
  518. package/data/skills/drug-photo/SKILL.md +149 -0
  519. package/data/skills/drugbank-database/SKILL.md +184 -0
  520. package/data/skills/drugbank-search/SKILL.md +211 -0
  521. package/data/skills/ehr-fhir-integration/SKILL.md +60 -0
  522. package/data/skills/emergency-card/SKILL.md +426 -0
  523. package/data/skills/ena-database/SKILL.md +198 -0
  524. package/data/skills/ensembl-database/SKILL.md +305 -0
  525. package/data/skills/epidemiologist-analyst/SKILL.md +1844 -0
  526. package/data/skills/epigenomics-methylgpt-agent/SKILL.md +111 -0
  527. package/data/skills/equity-scorer/SKILL.md +182 -0
  528. package/data/skills/esm/SKILL.md +300 -0
  529. package/data/skills/etetoolkit/SKILL.md +617 -0
  530. package/data/skills/executing-plans/SKILL.md +84 -0
  531. package/data/skills/exosome-ev-analysis-agent/SKILL.md +171 -0
  532. package/data/skills/exploratory-data-analysis/SKILL.md +440 -0
  533. package/data/skills/family-health-analyzer/SKILL.md +137 -0
  534. package/data/skills/fastq-analysis/SKILL.md +191 -0
  535. package/data/skills/fda-database/SKILL.md +512 -0
  536. package/data/skills/fhir-developer-skill/SKILL.md +294 -0
  537. package/data/skills/fhir-development/SKILL.md +35 -0
  538. package/data/skills/find-skills/SKILL.md +133 -0
  539. package/data/skills/finishing-a-development-branch/SKILL.md +200 -0
  540. package/data/skills/fitness-analyzer/SKILL.md +431 -0
  541. package/data/skills/flowio/SKILL.md +602 -0
  542. package/data/skills/foldseek/SKILL.md +179 -0
  543. package/data/skills/galaxy-bridge/SKILL.md +215 -0
  544. package/data/skills/gene-database/SKILL.md +173 -0
  545. package/data/skills/gene-panel-design-agent/SKILL.md +192 -0
  546. package/data/skills/geniml/SKILL.md +312 -0
  547. package/data/skills/genome-compare/SKILL.md +127 -0
  548. package/data/skills/geo-database/SKILL.md +809 -0
  549. package/data/skills/geopandas/SKILL.md +245 -0
  550. package/data/skills/gget/SKILL.md +865 -0
  551. package/data/skills/ginkgo-cloud-lab/SKILL.md +56 -0
  552. package/data/skills/glycoengineering/SKILL.md +338 -0
  553. package/data/skills/gnomad-database/SKILL.md +395 -0
  554. package/data/skills/goal-analyzer/SKILL.md +605 -0
  555. package/data/skills/grief-companion/SKILL.md +250 -0
  556. package/data/skills/gsea-enrichment/SKILL.md +151 -0
  557. package/data/skills/gtars/SKILL.md +279 -0
  558. package/data/skills/gtex-database/SKILL.md +315 -0
  559. package/data/skills/gwas-database/SKILL.md +602 -0
  560. package/data/skills/gwas-lookup/SKILL.md +122 -0
  561. package/data/skills/gwas-prs/SKILL.md +178 -0
  562. package/data/skills/health-trend-analyzer/SKILL.md +451 -0
  563. package/data/skills/hemoglobinopathy-analysis-agent/SKILL.md +167 -0
  564. package/data/skills/hipaa-compliance/SKILL.md +230 -0
  565. package/data/skills/histolab/SKILL.md +672 -0
  566. package/data/skills/hmdb-database/SKILL.md +190 -0
  567. package/data/skills/hrd-analysis-agent/SKILL.md +184 -0
  568. package/data/skills/hrv-alexithymia-expert/SKILL.md +151 -0
  569. package/data/skills/hypogenic/SKILL.md +649 -0
  570. package/data/skills/hypothesis-generation/SKILL.md +286 -0
  571. package/data/skills/imaging-data-commons/SKILL.md +843 -0
  572. package/data/skills/immune-checkpoint-combination-agent/SKILL.md +170 -0
  573. package/data/skills/infographics/SKILL.md +563 -0
  574. package/data/skills/instrument-data-to-allotrope/SKILL.md +280 -0
  575. package/data/skills/interpro-database/SKILL.md +305 -0
  576. package/data/skills/ipsae/SKILL.md +190 -0
  577. package/data/skills/iso-13485-certification/SKILL.md +678 -0
  578. package/data/skills/jaspar-database/SKILL.md +351 -0
  579. package/data/skills/jungian-psychologist/SKILL.md +191 -0
  580. package/data/skills/kegg-database/SKILL.md +371 -0
  581. package/data/skills/knowledge-synthesis/SKILL.md +283 -0
  582. package/data/skills/kragen-knowledge-graph/SKILL.md +68 -0
  583. package/data/skills/lab-results/SKILL.md +35 -0
  584. package/data/skills/labarchive-integration/SKILL.md +262 -0
  585. package/data/skills/labstep/SKILL.md +208 -0
  586. package/data/skills/lamindb/SKILL.md +384 -0
  587. package/data/skills/latchbio-integration/SKILL.md +347 -0
  588. package/data/skills/latex-posters/SKILL.md +1602 -0
  589. package/data/skills/leads-literature-mining/SKILL.md +68 -0
  590. package/data/skills/ligandmpnn/SKILL.md +170 -0
  591. package/data/skills/linear-solvers/SKILL.md +165 -0
  592. package/data/skills/liquid-biopsy-analytics-agent/SKILL.md +171 -0
  593. package/data/skills/lit-synthesizer/SKILL.md +53 -0
  594. package/data/skills/literature-review/SKILL.md +584 -0
  595. package/data/skills/literature-search/SKILL.md +214 -0
  596. package/data/skills/lobster-bioinformatics/SKILL.md +305 -0
  597. package/data/skills/long-read-sequencing-agent/SKILL.md +181 -0
  598. package/data/skills/mage-antibody-generator/SKILL.md +54 -0
  599. package/data/skills/markdown-mermaid-writing/SKILL.md +327 -0
  600. package/data/skills/markitdown/SKILL.md +486 -0
  601. package/data/skills/matchms/SKILL.md +197 -0
  602. package/data/skills/matplotlib/SKILL.md +359 -0
  603. package/data/skills/mcpmed-bioinformatics-server/SKILL.md +42 -0
  604. package/data/skills/medchem/SKILL.md +400 -0
  605. package/data/skills/medea-therapeutic-discovery/SKILL.md +45 -0
  606. package/data/skills/medical-entity-extractor/SKILL.md +144 -0
  607. package/data/skills/medical-imaging-review/SKILL.md +170 -0
  608. package/data/skills/medical-research-toolkit/SKILL.md +273 -0
  609. package/data/skills/medrxiv-search/SKILL.md +211 -0
  610. package/data/skills/mental-health-analyzer/SKILL.md +981 -0
  611. package/data/skills/mesh-generation/SKILL.md +149 -0
  612. package/data/skills/metabolomics-workbench-database/SKILL.md +253 -0
  613. package/data/skills/microbiome-cancer-agent/SKILL.md +180 -0
  614. package/data/skills/modern-drug-rehab-computer/SKILL.md +392 -0
  615. package/data/skills/molecular-dynamics/SKILL.md +457 -0
  616. package/data/skills/molecular-glue-discovery-agent/SKILL.md +224 -0
  617. package/data/skills/molecule-evolution-agent/SKILL.md +62 -0
  618. package/data/skills/molfeat/SKILL.md +505 -0
  619. package/data/skills/monarch-database/SKILL.md +372 -0
  620. package/data/skills/mpn-progression-monitor-agent/SKILL.md +228 -0
  621. package/data/skills/mpn-research-assistant/SKILL.md +197 -0
  622. package/data/skills/mrd-edge-detection-agent/SKILL.md +213 -0
  623. package/data/skills/multi-ancestry-prs-agent/SKILL.md +224 -0
  624. package/data/skills/multi-search-engine/SKILL.md +110 -0
  625. package/data/skills/multimodal-medical-imaging/SKILL.md +59 -0
  626. package/data/skills/multimodal-radpath-fusion-agent/SKILL.md +213 -0
  627. package/data/skills/myeloma-mrd-agent/SKILL.md +184 -0
  628. package/data/skills/networkx/SKILL.md +435 -0
  629. package/data/skills/neurokit2/SKILL.md +350 -0
  630. package/data/skills/neuropixels-analysis/SKILL.md +344 -0
  631. package/data/skills/nextflow-development/SKILL.md +290 -0
  632. package/data/skills/ngs-analysis/SKILL.md +183 -0
  633. package/data/skills/nicheformer-spatial-agent/SKILL.md +197 -0
  634. package/data/skills/nk-cell-therapy-agent/SKILL.md +186 -0
  635. package/data/skills/nonlinear-solvers/SKILL.md +180 -0
  636. package/data/skills/numerical-integration/SKILL.md +166 -0
  637. package/data/skills/numerical-stability/SKILL.md +149 -0
  638. package/data/skills/nutrition-analyzer/SKILL.md +775 -0
  639. package/data/skills/occupational-health-analyzer/SKILL.md +386 -0
  640. package/data/skills/omero-integration/SKILL.md +245 -0
  641. package/data/skills/ontology-explorer/SKILL.md +168 -0
  642. package/data/skills/ontology-mapper/SKILL.md +171 -0
  643. package/data/skills/ontology-validator/SKILL.md +136 -0
  644. package/data/skills/open-notebook/SKILL.md +289 -0
  645. package/data/skills/open-targets-search/SKILL.md +211 -0
  646. package/data/skills/openalex-database/SKILL.md +488 -0
  647. package/data/skills/opentargets-database/SKILL.md +367 -0
  648. package/data/skills/opentrons-integration/SKILL.md +567 -0
  649. package/data/skills/opentrons-protocol-agent/SKILL.md +58 -0
  650. package/data/skills/organoid-drug-response-agent/SKILL.md +189 -0
  651. package/data/skills/pan-cancer-multiomics-agent/SKILL.md +159 -0
  652. package/data/skills/paper-2-web/SKILL.md +495 -0
  653. package/data/skills/parameter-optimization/SKILL.md +141 -0
  654. package/data/skills/patents-search/SKILL.md +211 -0
  655. package/data/skills/pathml/SKILL.md +160 -0
  656. package/data/skills/patiently-ai/SKILL.md +103 -0
  657. package/data/skills/pdb/SKILL.md +217 -0
  658. package/data/skills/pdb-database/SKILL.md +303 -0
  659. package/data/skills/pdf/SKILL.md +314 -0
  660. package/data/skills/pdf-anthropic/SKILL.md +294 -0
  661. package/data/skills/pdf-processing/SKILL.md +149 -0
  662. package/data/skills/pdf-processing-pro/SKILL.md +296 -0
  663. package/data/skills/pdx-model-analysis-agent/SKILL.md +169 -0
  664. package/data/skills/peer-review/SKILL.md +565 -0
  665. package/data/skills/performance-profiling/SKILL.md +255 -0
  666. package/data/skills/perplexity-search/SKILL.md +441 -0
  667. package/data/skills/pharmacogenomics-agent/SKILL.md +143 -0
  668. package/data/skills/pharmgx-reporter/SKILL.md +134 -0
  669. package/data/skills/phylogenetics/SKILL.md +404 -0
  670. package/data/skills/plotly/SKILL.md +265 -0
  671. package/data/skills/polars/SKILL.md +385 -0
  672. package/data/skills/popeve-variant-predictor-agent/SKILL.md +213 -0
  673. package/data/skills/post-processing/SKILL.md +338 -0
  674. package/data/skills/pptx/SKILL.md +232 -0
  675. package/data/skills/pptx-official/SKILL.md +484 -0
  676. package/data/skills/pptx-posters/SKILL.md +414 -0
  677. package/data/skills/precision-oncology-agent/SKILL.md +53 -0
  678. package/data/skills/prior-auth-coworker/SKILL.md +60 -0
  679. package/data/skills/prior-auth-review-skill/SKILL.md +360 -0
  680. package/data/skills/profile-report/SKILL.md +120 -0
  681. package/data/skills/protac-design-agent/SKILL.md +220 -0
  682. package/data/skills/protein-design-workflow/SKILL.md +199 -0
  683. package/data/skills/protein-qc/SKILL.md +300 -0
  684. package/data/skills/protein-structure-prediction/SKILL.md +59 -0
  685. package/data/skills/proteinmpnn/SKILL.md +279 -0
  686. package/data/skills/protocolsio-integration/SKILL.md +415 -0
  687. package/data/skills/prs-net-deep-learning-agent/SKILL.md +232 -0
  688. package/data/skills/psychologist-analyst/SKILL.md +1888 -0
  689. package/data/skills/pubchem-database/SKILL.md +568 -0
  690. package/data/skills/pubmed-database/SKILL.md +454 -0
  691. package/data/skills/pubmed-search/SKILL.md +103 -0
  692. package/data/skills/pydeseq2/SKILL.md +553 -0
  693. package/data/skills/pydicom/SKILL.md +428 -0
  694. package/data/skills/pyhealth/SKILL.md +485 -0
  695. package/data/skills/pylabrobot/SKILL.md +179 -0
  696. package/data/skills/pymc/SKILL.md +566 -0
  697. package/data/skills/pymoo/SKILL.md +565 -0
  698. package/data/skills/pyopenms/SKILL.md +211 -0
  699. package/data/skills/pysam/SKILL.md +259 -0
  700. package/data/skills/pytdc/SKILL.md +454 -0
  701. package/data/skills/pytorch-lightning/SKILL.md +172 -0
  702. package/data/skills/pyzotero/SKILL.md +111 -0
  703. package/data/skills/radgpt-radiology-reporter/SKILL.md +67 -0
  704. package/data/skills/radiomics-pathomics-fusion-agent/SKILL.md +221 -0
  705. package/data/skills/rdkit/SKILL.md +763 -0
  706. package/data/skills/reactome-database/SKILL.md +272 -0
  707. package/data/skills/receiving-code-review/SKILL.md +213 -0
  708. package/data/skills/recovery-community-moderator/SKILL.md +175 -0
  709. package/data/skills/regulatory-drafter/SKILL.md +56 -0
  710. package/data/skills/regulatory-drafting/SKILL.md +35 -0
  711. package/data/skills/rehabilitation-analyzer/SKILL.md +636 -0
  712. package/data/skills/repro-enforcer/SKILL.md +50 -0
  713. package/data/skills/requesting-code-review/SKILL.md +105 -0
  714. package/data/skills/research-grants/SKILL.md +935 -0
  715. package/data/skills/research-literature/SKILL.md +35 -0
  716. package/data/skills/research-lookup/SKILL.md +502 -0
  717. package/data/skills/rfdiffusion/SKILL.md +306 -0
  718. package/data/skills/rna-velocity-agent/SKILL.md +174 -0
  719. package/data/skills/scanpy/SKILL.md +380 -0
  720. package/data/skills/scfoundation-model-agent/SKILL.md +210 -0
  721. package/data/skills/scientific-brainstorming/SKILL.md +185 -0
  722. package/data/skills/scientific-critical-thinking/SKILL.md +566 -0
  723. package/data/skills/scientific-manuscript/SKILL.md +181 -0
  724. package/data/skills/scientific-problem-selection/SKILL.md +269 -0
  725. package/data/skills/scientific-schematics/SKILL.md +619 -0
  726. package/data/skills/scientific-slides/SKILL.md +1154 -0
  727. package/data/skills/scientific-visualization/SKILL.md +773 -0
  728. package/data/skills/scientific-writing/SKILL.md +483 -0
  729. package/data/skills/scikit-bio/SKILL.md +431 -0
  730. package/data/skills/scikit-learn/SKILL.md +515 -0
  731. package/data/skills/scikit-survival/SKILL.md +393 -0
  732. package/data/skills/scrna-orchestrator/SKILL.md +204 -0
  733. package/data/skills/scrna-qc/SKILL.md +43 -0
  734. package/data/skills/scvelo/SKILL.md +321 -0
  735. package/data/skills/scvi-tools/SKILL.md +184 -0
  736. package/data/skills/seaborn/SKILL.md +671 -0
  737. package/data/skills/search-strategy/SKILL.md +247 -0
  738. package/data/skills/seq-wrangler/SKILL.md +58 -0
  739. package/data/skills/shap/SKILL.md +560 -0
  740. package/data/skills/simo-multiomics-integration-agent/SKILL.md +178 -0
  741. package/data/skills/simpy/SKILL.md +423 -0
  742. package/data/skills/simulation-orchestrator/SKILL.md +230 -0
  743. package/data/skills/simulation-validator/SKILL.md +195 -0
  744. package/data/skills/single-annotation/SKILL.md +129 -0
  745. package/data/skills/single-cell-rna-qc/SKILL.md +175 -0
  746. package/data/skills/single-cellphone-db/SKILL.md +68 -0
  747. package/data/skills/single-clustering/SKILL.md +75 -0
  748. package/data/skills/single-downstream-analysis/SKILL.md +150 -0
  749. package/data/skills/single-multiomics/SKILL.md +44 -0
  750. package/data/skills/single-preprocessing/SKILL.md +184 -0
  751. package/data/skills/single-to-spatial-mapping/SKILL.md +48 -0
  752. package/data/skills/single-trajectory/SKILL.md +62 -0
  753. package/data/skills/sleep-analyzer/SKILL.md +773 -0
  754. package/data/skills/slurm-job-script-generator/SKILL.md +135 -0
  755. package/data/skills/solublempnn/SKILL.md +165 -0
  756. package/data/skills/spatial-agent/SKILL.md +56 -0
  757. package/data/skills/spatial-epigenomics-agent/SKILL.md +163 -0
  758. package/data/skills/spatial-transcriptomics-agent/SKILL.md +75 -0
  759. package/data/skills/spatial-transcriptomics-analysis/SKILL.md +72 -0
  760. package/data/skills/spatial-transcriptomics-analysis/STAgent/SKILL.md +75 -0
  761. package/data/skills/spatial-transcriptomics-analysis/SpatialAgent/SKILL.md +56 -0
  762. package/data/skills/spatial-transcriptomics-analysis/bioSkills/image-analysis/SKILL.md +266 -0
  763. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-communication/SKILL.md +287 -0
  764. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-data-io/SKILL.md +243 -0
  765. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-deconvolution/SKILL.md +298 -0
  766. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-domains/SKILL.md +229 -0
  767. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-multiomics/SKILL.md +172 -0
  768. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-neighbors/SKILL.md +189 -0
  769. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-preprocessing/SKILL.md +232 -0
  770. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-proteomics/SKILL.md +127 -0
  771. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-statistics/SKILL.md +225 -0
  772. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-visualization/SKILL.md +270 -0
  773. package/data/skills/spatial-tutorials/SKILL.md +87 -0
  774. package/data/skills/speech-pathology-ai/SKILL.md +184 -0
  775. package/data/skills/statistical-analysis/SKILL.md +626 -0
  776. package/data/skills/statsmodels/SKILL.md +608 -0
  777. package/data/skills/string-database/SKILL.md +528 -0
  778. package/data/skills/struct-predictor/SKILL.md +52 -0
  779. package/data/skills/subagent-driven-development/SKILL.md +242 -0
  780. package/data/skills/systematic-debugging/SKILL.md +296 -0
  781. package/data/skills/tcell-exhaustion-analysis-agent/SKILL.md +139 -0
  782. package/data/skills/tcga-preprocessing/SKILL.md +49 -0
  783. package/data/skills/tcm-constitution-analyzer/SKILL.md +664 -0
  784. package/data/skills/tcr-pmhc-prediction-agent/SKILL.md +226 -0
  785. package/data/skills/tcr-repertoire-analysis-agent/SKILL.md +218 -0
  786. package/data/skills/test-driven-development/SKILL.md +371 -0
  787. package/data/skills/tiledbvcf/SKILL.md +459 -0
  788. package/data/skills/time-resolved-cryoem-agent/SKILL.md +223 -0
  789. package/data/skills/time-stepping/SKILL.md +140 -0
  790. package/data/skills/timesfm-forecasting/SKILL.md +785 -0
  791. package/data/skills/tme-immune-profiling-agent/SKILL.md +220 -0
  792. package/data/skills/tooluniverse-adverse-event-detection/SKILL.md +1115 -0
  793. package/data/skills/tooluniverse-antibody-engineering/SKILL.md +1581 -0
  794. package/data/skills/tooluniverse-binder-discovery/SKILL.md +1459 -0
  795. package/data/skills/tooluniverse-cancer-variant-interpretation/SKILL.md +971 -0
  796. package/data/skills/tooluniverse-chemical-compound-retrieval/SKILL.md +322 -0
  797. package/data/skills/tooluniverse-chemical-safety/SKILL.md +733 -0
  798. package/data/skills/tooluniverse-clinical-guidelines/SKILL.md +399 -0
  799. package/data/skills/tooluniverse-clinical-trial-design/SKILL.md +1195 -0
  800. package/data/skills/tooluniverse-clinical-trial-matching/SKILL.md +1333 -0
  801. package/data/skills/tooluniverse-crispr-screen-analysis/SKILL.md +900 -0
  802. package/data/skills/tooluniverse-disease-research/SKILL.md +630 -0
  803. package/data/skills/tooluniverse-drug-drug-interaction/SKILL.md +73 -0
  804. package/data/skills/tooluniverse-drug-repurposing/SKILL.md +595 -0
  805. package/data/skills/tooluniverse-drug-research/SKILL.md +1642 -0
  806. package/data/skills/tooluniverse-drug-target-validation/SKILL.md +1206 -0
  807. package/data/skills/tooluniverse-epigenomics/SKILL.md +1489 -0
  808. package/data/skills/tooluniverse-expression-data-retrieval/SKILL.md +389 -0
  809. package/data/skills/tooluniverse-gene-enrichment/SKILL.md +402 -0
  810. package/data/skills/tooluniverse-gwas-drug-discovery/SKILL.md +576 -0
  811. package/data/skills/tooluniverse-gwas-finemapping/SKILL.md +309 -0
  812. package/data/skills/tooluniverse-gwas-snp-interpretation/SKILL.md +223 -0
  813. package/data/skills/tooluniverse-gwas-study-explorer/SKILL.md +342 -0
  814. package/data/skills/tooluniverse-gwas-trait-to-gene/SKILL.md +236 -0
  815. package/data/skills/tooluniverse-image-analysis/SKILL.md +439 -0
  816. package/data/skills/tooluniverse-immune-repertoire-analysis/SKILL.md +949 -0
  817. package/data/skills/tooluniverse-immunotherapy-response-prediction/SKILL.md +865 -0
  818. package/data/skills/tooluniverse-infectious-disease/SKILL.md +749 -0
  819. package/data/skills/tooluniverse-literature-deep-research/SKILL.md +1050 -0
  820. package/data/skills/tooluniverse-metabolomics/SKILL.md +298 -0
  821. package/data/skills/tooluniverse-metabolomics-analysis/SKILL.md +764 -0
  822. package/data/skills/tooluniverse-multi-omics-integration/SKILL.md +703 -0
  823. package/data/skills/tooluniverse-multiomic-disease-characterization/SKILL.md +1138 -0
  824. package/data/skills/tooluniverse-network-pharmacology/SKILL.md +1312 -0
  825. package/data/skills/tooluniverse-pharmacovigilance/SKILL.md +807 -0
  826. package/data/skills/tooluniverse-phylogenetics/SKILL.md +461 -0
  827. package/data/skills/tooluniverse-polygenic-risk-score/SKILL.md +397 -0
  828. package/data/skills/tooluniverse-precision-medicine-stratification/SKILL.md +1143 -0
  829. package/data/skills/tooluniverse-precision-oncology/SKILL.md +1091 -0
  830. package/data/skills/tooluniverse-protein-interactions/SKILL.md +446 -0
  831. package/data/skills/tooluniverse-protein-structure-retrieval/SKILL.md +416 -0
  832. package/data/skills/tooluniverse-protein-therapeutic-design/SKILL.md +637 -0
  833. package/data/skills/tooluniverse-proteomics-analysis/SKILL.md +843 -0
  834. package/data/skills/tooluniverse-rare-disease-diagnosis/SKILL.md +1257 -0
  835. package/data/skills/tooluniverse-rnaseq-deseq2/SKILL.md +536 -0
  836. package/data/skills/tooluniverse-sequence-retrieval/SKILL.md +419 -0
  837. package/data/skills/tooluniverse-single-cell/SKILL.md +719 -0
  838. package/data/skills/tooluniverse-spatial-omics-analysis/SKILL.md +1102 -0
  839. package/data/skills/tooluniverse-spatial-transcriptomics/SKILL.md +788 -0
  840. package/data/skills/tooluniverse-statistical-modeling/SKILL.md +557 -0
  841. package/data/skills/tooluniverse-structural-variant-analysis/SKILL.md +1356 -0
  842. package/data/skills/tooluniverse-systems-biology/SKILL.md +374 -0
  843. package/data/skills/tooluniverse-target-research/SKILL.md +1510 -0
  844. package/data/skills/tooluniverse-variant-analysis/SKILL.md +448 -0
  845. package/data/skills/tooluniverse-variant-interpretation/SKILL.md +1118 -0
  846. package/data/skills/torch-geometric/SKILL.md +674 -0
  847. package/data/skills/torch_geometric/SKILL.md +670 -0
  848. package/data/skills/torchdrug/SKILL.md +444 -0
  849. package/data/skills/tpd-ternary-complex-agent/SKILL.md +226 -0
  850. package/data/skills/transformers/SKILL.md +157 -0
  851. package/data/skills/travel-health-analyzer/SKILL.md +421 -0
  852. package/data/skills/treatment-plans/SKILL.md +1576 -0
  853. package/data/skills/trial-eligibility-agent/SKILL.md +54 -0
  854. package/data/skills/trialgpt-matching/SKILL.md +66 -0
  855. package/data/skills/tumor-clonal-evolution-agent/SKILL.md +134 -0
  856. package/data/skills/tumor-heterogeneity-agent/SKILL.md +216 -0
  857. package/data/skills/tumor-mutational-burden-agent/SKILL.md +188 -0
  858. package/data/skills/ukb-navigator/SKILL.md +113 -0
  859. package/data/skills/umap-learn/SKILL.md +473 -0
  860. package/data/skills/uniprot-database/SKILL.md +189 -0
  861. package/data/skills/universal-single-cell-annotator/SKILL.md +72 -0
  862. package/data/skills/using-git-worktrees/SKILL.md +218 -0
  863. package/data/skills/using-superpowers/SKILL.md +95 -0
  864. package/data/skills/usmle/SKILL.md +62 -0
  865. package/data/skills/uspto-database/SKILL.md +597 -0
  866. package/data/skills/vaex/SKILL.md +180 -0
  867. package/data/skills/varcadd-pathogenicity/SKILL.md +68 -0
  868. package/data/skills/variant-interpretation-acmg/SKILL.md +58 -0
  869. package/data/skills/variant-interpretation-acmg/bioSkills/clinical-interpretation/SKILL.md +334 -0
  870. package/data/skills/variant-interpretation-acmg/bioSkills/consensus-sequences/SKILL.md +343 -0
  871. package/data/skills/variant-interpretation-acmg/bioSkills/deepvariant/SKILL.md +279 -0
  872. package/data/skills/variant-interpretation-acmg/bioSkills/filtering-best-practices/SKILL.md +362 -0
  873. package/data/skills/variant-interpretation-acmg/bioSkills/gatk-variant-calling/SKILL.md +398 -0
  874. package/data/skills/variant-interpretation-acmg/bioSkills/joint-calling/SKILL.md +343 -0
  875. package/data/skills/variant-interpretation-acmg/bioSkills/structural-variant-calling/SKILL.md +256 -0
  876. package/data/skills/variant-interpretation-acmg/bioSkills/variant-annotation/SKILL.md +387 -0
  877. package/data/skills/variant-interpretation-acmg/bioSkills/variant-calling/SKILL.md +258 -0
  878. package/data/skills/variant-interpretation-acmg/bioSkills/variant-normalization/SKILL.md +304 -0
  879. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-basics/SKILL.md +329 -0
  880. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-manipulation/SKILL.md +398 -0
  881. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-statistics/SKILL.md +424 -0
  882. package/data/skills/variant-interpretation-acmg/varCADD/SKILL.md +68 -0
  883. package/data/skills/vcf-annotator/SKILL.md +55 -0
  884. package/data/skills/verification-before-completion/SKILL.md +139 -0
  885. package/data/skills/virtual-lab-agent/SKILL.md +240 -0
  886. package/data/skills/wearable-analysis-agent/SKILL.md +70 -0
  887. package/data/skills/weightloss-analyzer/SKILL.md +320 -0
  888. package/data/skills/wellally-tech/SKILL.md +685 -0
  889. package/data/skills/wikipedia-search/SKILL.md +481 -0
  890. package/data/skills/writing-plans/SKILL.md +116 -0
  891. package/data/skills/writing-skills/SKILL.md +655 -0
  892. package/data/skills/xlsx/SKILL.md +292 -0
  893. package/data/skills/xlsx-official/SKILL.md +289 -0
  894. package/data/skills/zarr-python/SKILL.md +777 -0
  895. package/data/skills/zinc-database/SKILL.md +398 -0
  896. package/data/tools/__init__.py +8 -0
  897. package/data/tools/hpc.py +71 -0
  898. package/data/tools/hpc_client/__init__.py +8 -0
  899. package/data/tools/hpc_client/builders/__init__.py +12 -0
  900. package/data/tools/hpc_client/builders/alphafold.py +36 -0
  901. package/data/tools/hpc_client/builders/boltz.py +33 -0
  902. package/data/tools/hpc_client/builders/chai.py +30 -0
  903. package/data/tools/hpc_client/builders/immunebuilder.py +31 -0
  904. package/data/tools/hpc_client/builders/rfantibody.py +58 -0
  905. package/data/tools/hpc_client/builders/thermompnn.py +16 -0
  906. package/data/tools/hpc_client/hpc_api.py +41 -0
  907. package/data/tools/hpc_client/hpc_tools.py +218 -0
  908. package/data/tools/hpc_dynamic.py +71 -0
  909. package/data/tools/integrations/__init__.py +14 -0
  910. package/data/tools/integrations/adaptyv.py +107 -0
  911. package/data/tools/integrations/addgene.py +52 -0
  912. package/data/tools/integrations/api_internal.py +33 -0
  913. package/data/tools/molecular_biology.py +688 -0
  914. package/data/tools/pharmacology.py +67 -0
  915. package/data/workflows/bulk-omics-clustering/SKILL.md +501 -0
  916. package/data/workflows/bulk-omics-clustering/references/best_practices.md +395 -0
  917. package/data/workflows/bulk-omics-clustering/references/clustering_methods_comparison.md +288 -0
  918. package/data/workflows/bulk-omics-clustering/references/common-patterns.md +1136 -0
  919. package/data/workflows/bulk-omics-clustering/references/decision-guide.md +819 -0
  920. package/data/workflows/bulk-omics-clustering/references/distance_metrics_guide.md +388 -0
  921. package/data/workflows/bulk-omics-clustering/references/parameter_guide.md +396 -0
  922. package/data/workflows/bulk-omics-clustering/references/r-quick-start.md +105 -0
  923. package/data/workflows/bulk-omics-clustering/references/validation_metrics_guide.md +315 -0
  924. package/data/workflows/bulk-omics-clustering/scripts/characterize_clusters.py +255 -0
  925. package/data/workflows/bulk-omics-clustering/scripts/cluster_validation.py +449 -0
  926. package/data/workflows/bulk-omics-clustering/scripts/density_clustering.py +321 -0
  927. package/data/workflows/bulk-omics-clustering/scripts/dimensionality_reduction.py +328 -0
  928. package/data/workflows/bulk-omics-clustering/scripts/distance_metrics.py +251 -0
  929. package/data/workflows/bulk-omics-clustering/scripts/export_results.py +456 -0
  930. package/data/workflows/bulk-omics-clustering/scripts/hierarchical_clustering.R +229 -0
  931. package/data/workflows/bulk-omics-clustering/scripts/hierarchical_clustering.py +269 -0
  932. package/data/workflows/bulk-omics-clustering/scripts/kmeans_clustering.py +346 -0
  933. package/data/workflows/bulk-omics-clustering/scripts/load_example_data.R +171 -0
  934. package/data/workflows/bulk-omics-clustering/scripts/load_example_data.py +171 -0
  935. package/data/workflows/bulk-omics-clustering/scripts/model_based_clustering.py +370 -0
  936. package/data/workflows/bulk-omics-clustering/scripts/optimal_clusters.py +381 -0
  937. package/data/workflows/bulk-omics-clustering/scripts/plot_cluster_heatmap.R +141 -0
  938. package/data/workflows/bulk-omics-clustering/scripts/plot_clustering_results.py +452 -0
  939. package/data/workflows/bulk-omics-clustering/scripts/prepare_data.py +250 -0
  940. package/data/workflows/bulk-omics-clustering/scripts/stability_analysis.py +434 -0
  941. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/SKILL.md +505 -0
  942. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/comprehensive-reference.md +440 -0
  943. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/decision-guide.md +327 -0
  944. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/troubleshooting.md +456 -0
  945. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/usage-guide.md +75 -0
  946. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/basic_workflow.R +149 -0
  947. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/batch_correction.R +44 -0
  948. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/export_results.R +190 -0
  949. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/extract_results.R +242 -0
  950. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/load_example_data.R +250 -0
  951. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/multi_condition.R +50 -0
  952. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/qc_plots.R +410 -0
  953. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/transformations.R +218 -0
  954. package/data/workflows/chip-atlas-diff-analysis/SKILL.md +222 -0
  955. package/data/workflows/chip-atlas-diff-analysis/references/chipatlas_diff_api_format.md +106 -0
  956. package/data/workflows/chip-atlas-diff-analysis/references/diff_analysis_methods.md +89 -0
  957. package/data/workflows/chip-atlas-diff-analysis/references/output_format.md +78 -0
  958. package/data/workflows/chip-atlas-diff-analysis/scripts/__init__.py +1 -0
  959. package/data/workflows/chip-atlas-diff-analysis/scripts/annotate_genes.py +144 -0
  960. package/data/workflows/chip-atlas-diff-analysis/scripts/export_all.py +498 -0
  961. package/data/workflows/chip-atlas-diff-analysis/scripts/filter_regions.py +176 -0
  962. package/data/workflows/chip-atlas-diff-analysis/scripts/generate_all_plots.py +321 -0
  963. package/data/workflows/chip-atlas-diff-analysis/scripts/load_example_data.py +149 -0
  964. package/data/workflows/chip-atlas-diff-analysis/scripts/load_user_data.py +211 -0
  965. package/data/workflows/chip-atlas-diff-analysis/scripts/parse_bed_results.py +240 -0
  966. package/data/workflows/chip-atlas-diff-analysis/scripts/qc_checks.py +621 -0
  967. package/data/workflows/chip-atlas-diff-analysis/scripts/query_chipatlas_api.py +329 -0
  968. package/data/workflows/chip-atlas-diff-analysis/scripts/run_diff_workflow.py +256 -0
  969. package/data/workflows/chip-atlas-peak-enrichment/SKILL.md +212 -0
  970. package/data/workflows/chip-atlas-peak-enrichment/references/chipatlas_metadata_format.md +115 -0
  971. package/data/workflows/chip-atlas-peak-enrichment/references/enrichment_statistics.md +145 -0
  972. package/data/workflows/chip-atlas-peak-enrichment/references/peak_thresholds.md +63 -0
  973. package/data/workflows/chip-atlas-peak-enrichment/references/promoter_definitions.md +69 -0
  974. package/data/workflows/chip-atlas-peak-enrichment/scripts/__init__.py +1 -0
  975. package/data/workflows/chip-atlas-peak-enrichment/scripts/convert_genes_to_regions.py +271 -0
  976. package/data/workflows/chip-atlas-peak-enrichment/scripts/export_all.py +456 -0
  977. package/data/workflows/chip-atlas-peak-enrichment/scripts/filter_experiments.py +116 -0
  978. package/data/workflows/chip-atlas-peak-enrichment/scripts/generate_all_plots.py +280 -0
  979. package/data/workflows/chip-atlas-peak-enrichment/scripts/load_example_data.py +96 -0
  980. package/data/workflows/chip-atlas-peak-enrichment/scripts/load_user_data.py +183 -0
  981. package/data/workflows/chip-atlas-peak-enrichment/scripts/query_chipatlas_api.py +349 -0
  982. package/data/workflows/chip-atlas-peak-enrichment/scripts/run_enrichment_workflow.py +271 -0
  983. package/data/workflows/chip-atlas-target-genes/SKILL.md +230 -0
  984. package/data/workflows/chip-atlas-target-genes/references/macs2_binding_scores.md +89 -0
  985. package/data/workflows/chip-atlas-target-genes/references/string_scores.md +58 -0
  986. package/data/workflows/chip-atlas-target-genes/references/target_genes_data_format.md +73 -0
  987. package/data/workflows/chip-atlas-target-genes/scripts/__init__.py +0 -0
  988. package/data/workflows/chip-atlas-target-genes/scripts/download_target_genes.py +200 -0
  989. package/data/workflows/chip-atlas-target-genes/scripts/export_all.py +340 -0
  990. package/data/workflows/chip-atlas-target-genes/scripts/filter_targets.py +205 -0
  991. package/data/workflows/chip-atlas-target-genes/scripts/generate_all_plots.py +330 -0
  992. package/data/workflows/chip-atlas-target-genes/scripts/load_example_query.py +61 -0
  993. package/data/workflows/chip-atlas-target-genes/scripts/load_user_query.py +47 -0
  994. package/data/workflows/chip-atlas-target-genes/scripts/run_target_genes_workflow.py +141 -0
  995. package/data/workflows/clinicaltrials-landscape/SKILL.md +257 -0
  996. package/data/workflows/clinicaltrials-landscape/references/api-parameters.md +181 -0
  997. package/data/workflows/clinicaltrials-landscape/references/mechanisms.md +141 -0
  998. package/data/workflows/clinicaltrials-landscape/references/output-schema.md +184 -0
  999. package/data/workflows/clinicaltrials-landscape/scripts/__init__.py +1 -0
  1000. package/data/workflows/clinicaltrials-landscape/scripts/classify_mechanisms.py +359 -0
  1001. package/data/workflows/clinicaltrials-landscape/scripts/compile_trials.py +579 -0
  1002. package/data/workflows/clinicaltrials-landscape/scripts/disease_config.py +161 -0
  1003. package/data/workflows/clinicaltrials-landscape/scripts/export_all.py +242 -0
  1004. package/data/workflows/clinicaltrials-landscape/scripts/generate_landscape_plots.py +761 -0
  1005. package/data/workflows/clinicaltrials-landscape/scripts/generate_pdf_report.py +1465 -0
  1006. package/data/workflows/clinicaltrials-landscape/scripts/generate_report.py +1813 -0
  1007. package/data/workflows/clinicaltrials-landscape/scripts/query_clinicaltrials.py +307 -0
  1008. package/data/workflows/coexpression-network/SKILL.md +344 -0
  1009. package/data/workflows/coexpression-network/references/parameter-tuning-guide.md +591 -0
  1010. package/data/workflows/coexpression-network/references/troubleshooting.md +483 -0
  1011. package/data/workflows/coexpression-network/references/wgcna-best-practices.md +563 -0
  1012. package/data/workflows/coexpression-network/references/wgcna-reference.md +538 -0
  1013. package/data/workflows/coexpression-network/scripts/build_network.R +43 -0
  1014. package/data/workflows/coexpression-network/scripts/correlate_modules_traits.R +92 -0
  1015. package/data/workflows/coexpression-network/scripts/export_wgcna_results.R +117 -0
  1016. package/data/workflows/coexpression-network/scripts/identify_hub_genes.R +63 -0
  1017. package/data/workflows/coexpression-network/scripts/load_example_data.R +214 -0
  1018. package/data/workflows/coexpression-network/scripts/module_enrichment.R +159 -0
  1019. package/data/workflows/coexpression-network/scripts/pick_soft_power.R +70 -0
  1020. package/data/workflows/coexpression-network/scripts/plot_all_wgcna.R +104 -0
  1021. package/data/workflows/coexpression-network/scripts/plot_eigengene_heatmap.R +65 -0
  1022. package/data/workflows/coexpression-network/scripts/plot_hub_genes.R +70 -0
  1023. package/data/workflows/coexpression-network/scripts/plot_module_dendrogram.R +50 -0
  1024. package/data/workflows/coexpression-network/scripts/plotting_helpers.R +87 -0
  1025. package/data/workflows/coexpression-network/scripts/prepare_wgcna_data.R +73 -0
  1026. package/data/workflows/coexpression-network/scripts/wgcna_workflow.R +93 -0
  1027. package/data/workflows/experimental-design-statistics/SKILL.md +408 -0
  1028. package/data/workflows/experimental-design-statistics/references/batch_effect_mitigation.md +756 -0
  1029. package/data/workflows/experimental-design-statistics/references/cv_tissue_database.csv +30 -0
  1030. package/data/workflows/experimental-design-statistics/references/experimental_design_best_practices.md +515 -0
  1031. package/data/workflows/experimental-design-statistics/references/multiple_testing_guide.md +730 -0
  1032. package/data/workflows/experimental-design-statistics/references/power_analysis_guidelines.md +635 -0
  1033. package/data/workflows/experimental-design-statistics/references/qc_guidelines.md +310 -0
  1034. package/data/workflows/experimental-design-statistics/references/software_requirements.md +328 -0
  1035. package/data/workflows/experimental-design-statistics/references/troubleshooting_guide.md +510 -0
  1036. package/data/workflows/experimental-design-statistics/scripts/batch_assignment.R +302 -0
  1037. package/data/workflows/experimental-design-statistics/scripts/batch_validation.R +342 -0
  1038. package/data/workflows/experimental-design-statistics/scripts/export_design.R +352 -0
  1039. package/data/workflows/experimental-design-statistics/scripts/load_example_data.R +204 -0
  1040. package/data/workflows/experimental-design-statistics/scripts/multiple_testing.R +417 -0
  1041. package/data/workflows/experimental-design-statistics/scripts/plot_power_curves.R +317 -0
  1042. package/data/workflows/experimental-design-statistics/scripts/power_atacseq.R +229 -0
  1043. package/data/workflows/experimental-design-statistics/scripts/power_pilot_based.R +289 -0
  1044. package/data/workflows/experimental-design-statistics/scripts/power_rnaseq.R +247 -0
  1045. package/data/workflows/experimental-design-statistics/scripts/sample_size_de.R +327 -0
  1046. package/data/workflows/experimental-design-statistics/scripts/sample_size_scrna.R +304 -0
  1047. package/data/workflows/functional-enrichment-from-degs/SKILL.md +387 -0
  1048. package/data/workflows/functional-enrichment-from-degs/references/database_guide.md +354 -0
  1049. package/data/workflows/functional-enrichment-from-degs/references/decision-guide.md +546 -0
  1050. package/data/workflows/functional-enrichment-from-degs/references/gsea_ora_comparison.md +213 -0
  1051. package/data/workflows/functional-enrichment-from-degs/references/gsea_ora_validation_framework.md +483 -0
  1052. package/data/workflows/functional-enrichment-from-degs/references/interpretation_guidelines.md +374 -0
  1053. package/data/workflows/functional-enrichment-from-degs/references/method-reference.md +742 -0
  1054. package/data/workflows/functional-enrichment-from-degs/scripts/export_results.R +190 -0
  1055. package/data/workflows/functional-enrichment-from-degs/scripts/generate_plots.R +240 -0
  1056. package/data/workflows/functional-enrichment-from-degs/scripts/get_msigdb_genesets.R +75 -0
  1057. package/data/workflows/functional-enrichment-from-degs/scripts/load_de_results.R +60 -0
  1058. package/data/workflows/functional-enrichment-from-degs/scripts/load_example_data.R +212 -0
  1059. package/data/workflows/functional-enrichment-from-degs/scripts/prepare_gene_lists.R +92 -0
  1060. package/data/workflows/functional-enrichment-from-degs/scripts/run_gsea.R +44 -0
  1061. package/data/workflows/functional-enrichment-from-degs/scripts/run_ora.R +53 -0
  1062. package/data/workflows/genetic-variant-annotation/SKILL.md +440 -0
  1063. package/data/workflows/genetic-variant-annotation/references/auto_installation_implementation.md +274 -0
  1064. package/data/workflows/genetic-variant-annotation/references/consequence_terms.md +392 -0
  1065. package/data/workflows/genetic-variant-annotation/references/filtering_strategies.md +808 -0
  1066. package/data/workflows/genetic-variant-annotation/references/installation_guide.md +557 -0
  1067. package/data/workflows/genetic-variant-annotation/references/pathogenicity_interpretation.md +473 -0
  1068. package/data/workflows/genetic-variant-annotation/references/qc_guidelines.md +524 -0
  1069. package/data/workflows/genetic-variant-annotation/references/snpeff_best_practices.md +481 -0
  1070. package/data/workflows/genetic-variant-annotation/references/tool_selection_guide.md +433 -0
  1071. package/data/workflows/genetic-variant-annotation/references/troubleshooting_guide.md +678 -0
  1072. package/data/workflows/genetic-variant-annotation/references/vep_best_practices.md +450 -0
  1073. package/data/workflows/genetic-variant-annotation/scripts/annotate_genes.py +243 -0
  1074. package/data/workflows/genetic-variant-annotation/scripts/export_results.py +450 -0
  1075. package/data/workflows/genetic-variant-annotation/scripts/filter_variants.py +365 -0
  1076. package/data/workflows/genetic-variant-annotation/scripts/install_tools.py +246 -0
  1077. package/data/workflows/genetic-variant-annotation/scripts/load_example_data.py +166 -0
  1078. package/data/workflows/genetic-variant-annotation/scripts/parse_snpeff_output.py +283 -0
  1079. package/data/workflows/genetic-variant-annotation/scripts/parse_vep_output.py +257 -0
  1080. package/data/workflows/genetic-variant-annotation/scripts/plot_variant_distribution.py +372 -0
  1081. package/data/workflows/genetic-variant-annotation/scripts/prioritize_variants.py +287 -0
  1082. package/data/workflows/genetic-variant-annotation/scripts/run_snpeff.py +418 -0
  1083. package/data/workflows/genetic-variant-annotation/scripts/run_vep.py +358 -0
  1084. package/data/workflows/genetic-variant-annotation/scripts/select_tool.py +203 -0
  1085. package/data/workflows/genetic-variant-annotation/scripts/test_complete_workflow.py +312 -0
  1086. package/data/workflows/genetic-variant-annotation/scripts/test_pickle_load.py +118 -0
  1087. package/data/workflows/genetic-variant-annotation/scripts/validate_vcf.py +351 -0
  1088. package/data/workflows/genetic-variant-annotation/scripts/verify_changes.py +212 -0
  1089. package/data/workflows/grn-pyscenic/SKILL.md +331 -0
  1090. package/data/workflows/grn-pyscenic/references/cli_interface.md +222 -0
  1091. package/data/workflows/grn-pyscenic/references/database_downloads.md +245 -0
  1092. package/data/workflows/grn-pyscenic/scripts/export_all.py +192 -0
  1093. package/data/workflows/grn-pyscenic/scripts/generate_report.py +512 -0
  1094. package/data/workflows/grn-pyscenic/scripts/integrate_with_adata.py +54 -0
  1095. package/data/workflows/grn-pyscenic/scripts/load_example_data.py +200 -0
  1096. package/data/workflows/grn-pyscenic/scripts/load_expression_data.py +61 -0
  1097. package/data/workflows/grn-pyscenic/scripts/plot_regulon_visualizations.py +263 -0
  1098. package/data/workflows/grn-pyscenic/scripts/run_grn_workflow.py +184 -0
  1099. package/data/workflows/gwas-to-function-twas/SKILL.md +394 -0
  1100. package/data/workflows/gwas-to-function-twas/references/fusion_best_practices.md +120 -0
  1101. package/data/workflows/gwas-to-function-twas/references/installation-guide.md +414 -0
  1102. package/data/workflows/gwas-to-function-twas/references/ldsc_qc_guidelines.md +287 -0
  1103. package/data/workflows/gwas-to-function-twas/references/spredixxcan_best_practices.md +166 -0
  1104. package/data/workflows/gwas-to-function-twas/references/therapeutic_interpretation_guide.md +717 -0
  1105. package/data/workflows/gwas-to-function-twas/references/tissue_reference_guide.md +182 -0
  1106. package/data/workflows/gwas-to-function-twas/references/troubleshooting_guide.md +317 -0
  1107. package/data/workflows/gwas-to-function-twas/references/twas_hub_validation_guide.md +88 -0
  1108. package/data/workflows/gwas-to-function-twas/scripts/colocalization_analysis.py +187 -0
  1109. package/data/workflows/gwas-to-function-twas/scripts/druggability_scoring.py +199 -0
  1110. package/data/workflows/gwas-to-function-twas/scripts/export_results.py +220 -0
  1111. package/data/workflows/gwas-to-function-twas/scripts/integrate_variant_annotation.py +194 -0
  1112. package/data/workflows/gwas-to-function-twas/scripts/interpret_therapeutic_direction.py +418 -0
  1113. package/data/workflows/gwas-to-function-twas/scripts/mendelian_randomization.py +749 -0
  1114. package/data/workflows/gwas-to-function-twas/scripts/multilayer_direction_analysis.py +471 -0
  1115. package/data/workflows/gwas-to-function-twas/scripts/plot_twas_results.py +252 -0
  1116. package/data/workflows/gwas-to-function-twas/scripts/run_fusion.py +155 -0
  1117. package/data/workflows/gwas-to-function-twas/scripts/run_smultixcan.py +102 -0
  1118. package/data/workflows/gwas-to-function-twas/scripts/run_spredixxcan.py +138 -0
  1119. package/data/workflows/gwas-to-function-twas/scripts/select_reference_panel.py +253 -0
  1120. package/data/workflows/gwas-to-function-twas/scripts/validate_gwas_sumstats.py +214 -0
  1121. package/data/workflows/gwas-to-function-twas/scripts/validate_with_twas_hub.py +439 -0
  1122. package/data/workflows/lasso-biomarker-panel/SKILL.md +322 -0
  1123. package/data/workflows/lasso-biomarker-panel/references/decision-guide.md +64 -0
  1124. package/data/workflows/lasso-biomarker-panel/references/lasso-reference.md +110 -0
  1125. package/data/workflows/lasso-biomarker-panel/references/validation-guide.md +105 -0
  1126. package/data/workflows/lasso-biomarker-panel/scripts/biological_interpretation.R +1560 -0
  1127. package/data/workflows/lasso-biomarker-panel/scripts/biomarker_plots.R +350 -0
  1128. package/data/workflows/lasso-biomarker-panel/scripts/export_results.R +1492 -0
  1129. package/data/workflows/lasso-biomarker-panel/scripts/lasso_workflow.R +328 -0
  1130. package/data/workflows/lasso-biomarker-panel/scripts/load_example_data.R +1903 -0
  1131. package/data/workflows/lasso-biomarker-panel/scripts/plotting_helpers.R +78 -0
  1132. package/data/workflows/lasso-biomarker-panel/scripts/prepare_features.R +225 -0
  1133. package/data/workflows/lasso-biomarker-panel/scripts/query_cellxgene.py +107 -0
  1134. package/data/workflows/lasso-biomarker-panel/scripts/validate_external.R +174 -0
  1135. package/data/workflows/literature-preclinical/SKILL.md +276 -0
  1136. package/data/workflows/literature-preclinical/assets/eval/simple_test.py +386 -0
  1137. package/data/workflows/literature-preclinical/references/experiment-extraction-guide.md +147 -0
  1138. package/data/workflows/literature-preclinical/references/full-text-enrichment-guide.md +121 -0
  1139. package/data/workflows/literature-preclinical/references/preclinical-search-guide.md +117 -0
  1140. package/data/workflows/literature-preclinical/scripts/extract_experiments.py +401 -0
  1141. package/data/workflows/literature-preclinical/scripts/generate_plots.R +303 -0
  1142. package/data/workflows/literature-preclinical/scripts/narrative_synthesis.py +653 -0
  1143. package/data/workflows/literature-preclinical/scripts/preclinical_search.py +332 -0
  1144. package/data/workflows/literature-preclinical/scripts/preclinical_synthesis.py +237 -0
  1145. package/data/workflows/literature-preclinical/scripts/report_generation.py +326 -0
  1146. package/data/workflows/mendelian-randomization-twosamplemr/SKILL.md +210 -0
  1147. package/data/workflows/mendelian-randomization-twosamplemr/references/interpretation-guide.md +239 -0
  1148. package/data/workflows/mendelian-randomization-twosamplemr/references/method-reference.md +190 -0
  1149. package/data/workflows/mendelian-randomization-twosamplemr/scripts/export_results.R +123 -0
  1150. package/data/workflows/mendelian-randomization-twosamplemr/scripts/generate_report.R +411 -0
  1151. package/data/workflows/mendelian-randomization-twosamplemr/scripts/load_data.R +281 -0
  1152. package/data/workflows/mendelian-randomization-twosamplemr/scripts/mr_plots.R +163 -0
  1153. package/data/workflows/mendelian-randomization-twosamplemr/scripts/run_mr_analysis.R +322 -0
  1154. package/data/workflows/pcr-primer-design/SKILL.md +397 -0
  1155. package/data/workflows/pcr-primer-design/references/code_examples.md +594 -0
  1156. package/data/workflows/pcr-primer-design/references/miqe_guidelines.md +453 -0
  1157. package/data/workflows/pcr-primer-design/references/parameter_ranges.md +356 -0
  1158. package/data/workflows/pcr-primer-design/references/primer_design_best_practices.md +451 -0
  1159. package/data/workflows/pcr-primer-design/references/troubleshooting_guide.md +477 -0
  1160. package/data/workflows/pcr-primer-design/scripts/__init__.py +2 -0
  1161. package/data/workflows/pcr-primer-design/scripts/calculate_tm.py +306 -0
  1162. package/data/workflows/pcr-primer-design/scripts/check_dimers.py +298 -0
  1163. package/data/workflows/pcr-primer-design/scripts/check_secondary_structures.py +343 -0
  1164. package/data/workflows/pcr-primer-design/scripts/design_qpcr_primers.py +233 -0
  1165. package/data/workflows/pcr-primer-design/scripts/design_standard_primers.py +197 -0
  1166. package/data/workflows/pcr-primer-design/scripts/design_taqman_probes.py +226 -0
  1167. package/data/workflows/pcr-primer-design/scripts/export_results.py +382 -0
  1168. package/data/workflows/pcr-primer-design/scripts/generate_reports.py +379 -0
  1169. package/data/workflows/pcr-primer-design/scripts/validate_specificity.py +311 -0
  1170. package/data/workflows/pcr-primer-design/scripts/visualize_primers.py +379 -0
  1171. package/data/workflows/polygenic-risk-score-prs-catalog/SKILL.md +195 -0
  1172. package/data/workflows/polygenic-risk-score-prs-catalog/references/interpretation-guide.md +80 -0
  1173. package/data/workflows/polygenic-risk-score-prs-catalog/references/pgs-catalog-guide.md +109 -0
  1174. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/export_results.R +186 -0
  1175. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/generate_plots.R +283 -0
  1176. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/load_pgs_weights.R +228 -0
  1177. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/load_reference_data.R +191 -0
  1178. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/score_traits.R +216 -0
  1179. package/data/workflows/pooled-crispr-screens/SKILL.md +362 -0
  1180. package/data/workflows/pooled-crispr-screens/references/crispr_screen_best_practices.md +349 -0
  1181. package/data/workflows/pooled-crispr-screens/references/qc_guidelines.md +722 -0
  1182. package/data/workflows/pooled-crispr-screens/references/statistical_methods.md +644 -0
  1183. package/data/workflows/pooled-crispr-screens/references/troubleshooting_guide.md +684 -0
  1184. package/data/workflows/pooled-crispr-screens/references/umi_optimization.md +297 -0
  1185. package/data/workflows/pooled-crispr-screens/scripts/concatenate_libraries.py +132 -0
  1186. package/data/workflows/pooled-crispr-screens/scripts/detect_perturbed_cells.py +255 -0
  1187. package/data/workflows/pooled-crispr-screens/scripts/differential_expression.py +202 -0
  1188. package/data/workflows/pooled-crispr-screens/scripts/differential_expression_glmgampoi.py +320 -0
  1189. package/data/workflows/pooled-crispr-screens/scripts/export_results.py +261 -0
  1190. package/data/workflows/pooled-crispr-screens/scripts/expression_filtering.py +159 -0
  1191. package/data/workflows/pooled-crispr-screens/scripts/gene_name_corrections.py +188 -0
  1192. package/data/workflows/pooled-crispr-screens/scripts/generate_report.py +485 -0
  1193. package/data/workflows/pooled-crispr-screens/scripts/load_10x_libraries.py +69 -0
  1194. package/data/workflows/pooled-crispr-screens/scripts/load_example_data.py +257 -0
  1195. package/data/workflows/pooled-crispr-screens/scripts/map_sgrna_to_cells.py +119 -0
  1196. package/data/workflows/pooled-crispr-screens/scripts/normalize_and_scale.py +140 -0
  1197. package/data/workflows/pooled-crispr-screens/scripts/qc_filtering.py +185 -0
  1198. package/data/workflows/pooled-crispr-screens/scripts/run_glmgampoi.R +181 -0
  1199. package/data/workflows/pooled-crispr-screens/scripts/screen_all_perturbations.py +306 -0
  1200. package/data/workflows/pooled-crispr-screens/scripts/validate_perturbations.py +314 -0
  1201. package/data/workflows/pooled-crispr-screens/scripts/visualize_perturbations.py +314 -0
  1202. package/data/workflows/scrnaseq-scanpy-core-analysis/SKILL.md +425 -0
  1203. package/data/workflows/scrnaseq-scanpy-core-analysis/references/ambient_rna_correction.md +422 -0
  1204. package/data/workflows/scrnaseq-scanpy-core-analysis/references/common-patterns.md +533 -0
  1205. package/data/workflows/scrnaseq-scanpy-core-analysis/references/integration_methods.md +820 -0
  1206. package/data/workflows/scrnaseq-scanpy-core-analysis/references/marker_gene_database.md +471 -0
  1207. package/data/workflows/scrnaseq-scanpy-core-analysis/references/pseudobulk_de_guide.md +408 -0
  1208. package/data/workflows/scrnaseq-scanpy-core-analysis/references/qc_guidelines.md +535 -0
  1209. package/data/workflows/scrnaseq-scanpy-core-analysis/references/scanpy_best_practices.md +496 -0
  1210. package/data/workflows/scrnaseq-scanpy-core-analysis/references/troubleshooting_guide.md +668 -0
  1211. package/data/workflows/scrnaseq-scanpy-core-analysis/references/workflow-details.md +727 -0
  1212. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/annotate_celltypes.py +431 -0
  1213. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/cluster_cells.py +293 -0
  1214. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/export_results.py +423 -0
  1215. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/filter_cells.py +531 -0
  1216. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/find_markers.py +391 -0
  1217. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/find_variable_genes.py +222 -0
  1218. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/integrate_scvi.py +665 -0
  1219. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/integration_diagnostics.py +678 -0
  1220. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/load_example_data.py +68 -0
  1221. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/normalize_data.py +325 -0
  1222. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/plot_dimreduction.py +389 -0
  1223. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/plot_qc.py +320 -0
  1224. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/pseudobulk_de.py +553 -0
  1225. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/qc_metrics.py +477 -0
  1226. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/remove_ambient_rna.py +347 -0
  1227. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/run_umap.py +188 -0
  1228. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/scale_and_pca.py +365 -0
  1229. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/setup_and_import.py +334 -0
  1230. package/data/workflows/scrnaseq-seurat-core-analysis/SKILL.md +585 -0
  1231. package/data/workflows/scrnaseq-seurat-core-analysis/references/ambient_rna_correction.md +422 -0
  1232. package/data/workflows/scrnaseq-seurat-core-analysis/references/common-patterns.md +667 -0
  1233. package/data/workflows/scrnaseq-seurat-core-analysis/references/decision-guide.md +456 -0
  1234. package/data/workflows/scrnaseq-seurat-core-analysis/references/integration_methods.md +864 -0
  1235. package/data/workflows/scrnaseq-seurat-core-analysis/references/marker_gene_database.md +471 -0
  1236. package/data/workflows/scrnaseq-seurat-core-analysis/references/pseudobulk_de_guide.md +408 -0
  1237. package/data/workflows/scrnaseq-seurat-core-analysis/references/qc_guidelines.md +452 -0
  1238. package/data/workflows/scrnaseq-seurat-core-analysis/references/seurat_best_practices.md +417 -0
  1239. package/data/workflows/scrnaseq-seurat-core-analysis/references/troubleshooting_guide.md +566 -0
  1240. package/data/workflows/scrnaseq-seurat-core-analysis/references/workflow-details.md +801 -0
  1241. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/annotate_celltypes.R +306 -0
  1242. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/cluster_cells.R +223 -0
  1243. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/export_results.R +292 -0
  1244. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/filter_cells.R +576 -0
  1245. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/find_markers.R +325 -0
  1246. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/find_variable_features.R +106 -0
  1247. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/integrate_batches.R +504 -0
  1248. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/integration_diagnostics.R +596 -0
  1249. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/load_example_data.R +89 -0
  1250. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/normalize_data.R +184 -0
  1251. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/plot_dimreduction.R +273 -0
  1252. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/plot_qc.R +250 -0
  1253. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/pseudobulk_de.R +324 -0
  1254. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/qc_metrics.R +358 -0
  1255. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/remove_ambient_rna.R +281 -0
  1256. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/run_umap.R +116 -0
  1257. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/scale_and_pca.R +243 -0
  1258. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/setup_and_import.R +193 -0
  1259. package/data/workflows/spatial-transcriptomics/SKILL.md +256 -0
  1260. package/data/workflows/spatial-transcriptomics/references/spatial-analysis-guide.md +216 -0
  1261. package/data/workflows/spatial-transcriptomics/scripts/export_results.py +214 -0
  1262. package/data/workflows/spatial-transcriptomics/scripts/generate_all_plots.py +397 -0
  1263. package/data/workflows/spatial-transcriptomics/scripts/load_example_data.py +175 -0
  1264. package/data/workflows/spatial-transcriptomics/scripts/spatial_workflow.py +206 -0
  1265. package/dist/bgi.js +28 -1
  1266. package/package.json +2 -1
@@ -0,0 +1,1560 @@
1
+ # Biological Interpretation of Biomarker Panel
2
+ #
3
+ # Three complementary analyses:
4
+ # 1. Pathway enrichment (ORA on panel genes + GSEA on ranked features)
5
+ # 2. Cell-type expression context (CZI CELLxGENE Census)
6
+ # 3. GWAS genetic risk / disease gene overlap
7
+ #
8
+ # Usage:
9
+ # source("scripts/biological_interpretation.R")
10
+ # interp <- run_biological_interpretation(model, features, output_dir = "results",
11
+ # disease = "ibd") # or "bladder_cancer", "breast_cancer", or "sepsis"
12
+
13
+ cat("Loading biological interpretation functions...\n")
14
+
15
+ # ============================================================
16
+ # 1. Pathway Enrichment
17
+ # ============================================================
18
+
19
+ .run_pathway_enrichment <- function(model_result, features = NULL, output_dir = "results") {
20
+ cat("\n--- Pathway Enrichment Analysis ---\n\n")
21
+
22
+ suppressPackageStartupMessages({
23
+ library(clusterProfiler)
24
+ library(org.Hs.eg.db)
25
+ library(fgsea)
26
+ library(msigdbr)
27
+ library(ggplot2)
28
+ library(ggprism)
29
+ })
30
+
31
+ panel_genes <- model_result$stable_features
32
+ fi <- model_result$feature_importance
33
+
34
+ results <- list(ora_go = NULL, ora_kegg = NULL, gsea_hallmark = NULL,
35
+ gsea_reactome = NULL, gsea_immunologic = NULL)
36
+
37
+ # ---- ORA on panel genes (GO:BP) ----
38
+ cat(" ORA: GO Biological Process on", length(panel_genes), "panel genes...\n")
39
+
40
+ # Filter out non-standard gene symbols (IGLV4-60 etc.)
41
+ valid_genes <- panel_genes[!grepl("^IGH|^IGK|^IGL|^LOC|^C\\d+orf", panel_genes)]
42
+ cat(" Valid gene symbols for ORA:", length(valid_genes), "\n")
43
+
44
+ # Get universe from all tested features
45
+ all_genes <- fi$feature
46
+ all_valid <- all_genes[!grepl("^IGH|^IGK|^IGL|^LOC|^C\\d+orf", all_genes)]
47
+
48
+ ora_go <- tryCatch({
49
+ ego <- enrichGO(
50
+ gene = valid_genes,
51
+ universe = all_valid,
52
+ OrgDb = org.Hs.eg.db,
53
+ keyType = "SYMBOL",
54
+ ont = "BP",
55
+ pAdjustMethod = "BH",
56
+ pvalueCutoff = 0.1, # Relaxed for small gene list
57
+ qvalueCutoff = 0.2,
58
+ minGSSize = 5,
59
+ maxGSSize = 500,
60
+ readable = TRUE
61
+ )
62
+ if (!is.null(ego) && nrow(ego@result[ego@result$p.adjust < 0.1, ]) > 0) {
63
+ cat(" Found", sum(ego@result$p.adjust < 0.1), "significant GO terms (padj < 0.1)\n")
64
+ ego
65
+ } else {
66
+ cat(" No significant GO terms at padj < 0.1 (expected with 10 genes)\n")
67
+ ego
68
+ }
69
+ }, error = function(e) {
70
+ cat(" GO ORA failed:", conditionMessage(e), "\n")
71
+ NULL
72
+ })
73
+ results$ora_go <- ora_go
74
+
75
+ # ---- ORA on panel genes (Reactome via msigdbr) ----
76
+ cat(" ORA: Reactome pathways on panel genes...\n")
77
+
78
+ reactome_sets <- msigdbr(species = "Homo sapiens", collection = "C2", subcollection = "CP:REACTOME")
79
+ reactome_list <- split(reactome_sets$gene_symbol, reactome_sets$gs_name)
80
+
81
+ ora_reactome <- tryCatch({
82
+ er <- enricher(
83
+ gene = valid_genes,
84
+ universe = all_valid,
85
+ TERM2GENE = reactome_sets[, c("gs_name", "gene_symbol")],
86
+ pvalueCutoff = 0.1,
87
+ qvalueCutoff = 0.2,
88
+ minGSSize = 5,
89
+ maxGSSize = 500
90
+ )
91
+ if (!is.null(er) && nrow(er@result[er@result$p.adjust < 0.1, ]) > 0) {
92
+ cat(" Found", sum(er@result$p.adjust < 0.1), "significant Reactome terms\n")
93
+ } else {
94
+ cat(" No significant Reactome terms (expected with 10 genes)\n")
95
+ }
96
+ er
97
+ }, error = function(e) {
98
+ cat(" Reactome ORA failed:", conditionMessage(e), "\n")
99
+ NULL
100
+ })
101
+ results$ora_reactome <- ora_reactome
102
+
103
+ # ---- GSEA on ranked feature list (500 genes) ----
104
+ cat(" GSEA: Ranked analysis on", nrow(fi), "features using selection frequency...\n")
105
+
106
+ # Create ranked vector: combined score to break ties in selection_frequency
107
+ # Use selection_frequency * (1 + scaled |mean_coefficient|) for unique ranks
108
+ max_coef <- max(abs(fi$mean_coefficient), na.rm = TRUE)
109
+ combo_score <- fi$selection_frequency + abs(fi$mean_coefficient) / (max_coef * 10)
110
+ stats <- setNames(combo_score, fi$feature)
111
+ stats <- sort(stats, decreasing = TRUE)
112
+ # Remove non-standard symbols
113
+ stats <- stats[!grepl("^IGH|^IGK|^IGL|^LOC|^C\\d+orf", names(stats))]
114
+
115
+ # Hallmark gene sets
116
+ cat(" GSEA: MSigDB Hallmark (50 gene sets)...\n")
117
+ hallmark <- msigdbr(species = "Homo sapiens", collection = "H")
118
+ hallmark_list <- split(hallmark$gene_symbol, hallmark$gs_name)
119
+
120
+ gsea_hallmark <- tryCatch({
121
+ res <- fgsea(pathways = hallmark_list, stats = stats,
122
+ minSize = 5, maxSize = 500)
123
+ res <- res[order(res$pval), ]
124
+ n_sig <- sum(res$padj < 0.25, na.rm = TRUE)
125
+ cat(" Found", n_sig, "enriched hallmark pathways (padj < 0.25)\n")
126
+ res
127
+ }, error = function(e) {
128
+ cat(" Hallmark GSEA failed:", conditionMessage(e), "\n")
129
+ NULL
130
+ })
131
+ results$gsea_hallmark <- gsea_hallmark
132
+
133
+ # Reactome
134
+ cat(" GSEA: Reactome pathways...\n")
135
+ gsea_reactome <- tryCatch({
136
+ res <- fgsea(pathways = reactome_list, stats = stats,
137
+ minSize = 5, maxSize = 500)
138
+ res <- res[order(res$pval), ]
139
+ n_sig <- sum(res$padj < 0.25, na.rm = TRUE)
140
+ cat(" Found", n_sig, "enriched Reactome pathways (padj < 0.25)\n")
141
+ res
142
+ }, error = function(e) {
143
+ cat(" Reactome GSEA failed:", conditionMessage(e), "\n")
144
+ NULL
145
+ })
146
+ results$gsea_reactome <- gsea_reactome
147
+
148
+ # Immunologic signatures (C7) - especially relevant for IBD
149
+ cat(" GSEA: Immunologic signatures (C7)...\n")
150
+ immuno <- msigdbr(species = "Homo sapiens", collection = "C7",
151
+ subcollection = "IMMUNESIGDB")
152
+ immuno_list <- split(immuno$gene_symbol, immuno$gs_name)
153
+
154
+ gsea_immuno <- tryCatch({
155
+ res <- fgsea(pathways = immuno_list, stats = stats,
156
+ minSize = 5, maxSize = 500)
157
+ res <- res[order(res$pval), ]
158
+ n_sig <- sum(res$padj < 0.25, na.rm = TRUE)
159
+ cat(" Found", n_sig, "enriched immunologic signatures (padj < 0.25)\n")
160
+ res
161
+ }, error = function(e) {
162
+ cat(" Immunologic GSEA failed:", conditionMessage(e), "\n")
163
+ NULL
164
+ })
165
+ results$gsea_immunologic <- gsea_immuno
166
+
167
+ # ---- Generate plots ----
168
+ cat(" Generating enrichment plots...\n")
169
+
170
+ # GSEA dot plot for hallmark pathways
171
+ if (!is.null(gsea_hallmark) && nrow(gsea_hallmark) > 0) {
172
+ top_pathways <- head(gsea_hallmark[order(gsea_hallmark$pval), ], 15)
173
+ top_pathways$pathway_short <- gsub("HALLMARK_", "", top_pathways$pathway)
174
+ top_pathways$pathway_short <- gsub("_", " ", top_pathways$pathway_short)
175
+ top_pathways$pathway_short <- tolower(top_pathways$pathway_short)
176
+ # Title case
177
+ top_pathways$pathway_short <- gsub("(^|\\s)(\\w)", "\\1\\U\\2",
178
+ top_pathways$pathway_short, perl = TRUE)
179
+
180
+ p_gsea <- ggplot(top_pathways, aes(x = NES,
181
+ y = reorder(pathway_short, NES),
182
+ size = size,
183
+ color = -log10(pval))) +
184
+ geom_point() +
185
+ scale_color_gradient(low = "grey70", high = "firebrick", name = "-log10(p)") +
186
+ scale_size_continuous(range = c(2, 6), name = "Gene set\nsize") +
187
+ theme_prism(base_size = 11) +
188
+ theme(
189
+ axis.text.y = element_text(size = 9),
190
+ plot.title = element_text(hjust = 0.5, face = "bold", size = 13)
191
+ ) +
192
+ labs(x = "Normalized Enrichment Score (NES)",
193
+ y = NULL,
194
+ title = "GSEA: MSigDB Hallmark Pathways") +
195
+ geom_vline(xintercept = 0, linetype = "dashed", color = "grey50")
196
+
197
+ .save_enrichment_plot(p_gsea, file.path(output_dir, "gsea_hallmark_dotplot"), 9, 6)
198
+ }
199
+
200
+ # GO ORA dot plot
201
+ if (!is.null(ora_go) && nrow(ora_go@result) > 0) {
202
+ tryCatch({
203
+ top_go <- head(ora_go@result[order(ora_go@result$pvalue), ], 15)
204
+ if (nrow(top_go) > 0) {
205
+ top_go$GeneRatio_num <- sapply(top_go$GeneRatio, function(x) {
206
+ parts <- strsplit(x, "/")[[1]]
207
+ as.numeric(parts[1]) / as.numeric(parts[2])
208
+ })
209
+ # Truncate long descriptions
210
+ top_go$Description_short <- ifelse(
211
+ nchar(top_go$Description) > 50,
212
+ paste0(substr(top_go$Description, 1, 47), "..."),
213
+ top_go$Description
214
+ )
215
+ p_ora <- ggplot(top_go, aes(x = GeneRatio_num,
216
+ y = reorder(Description_short, GeneRatio_num),
217
+ size = Count,
218
+ color = -log10(pvalue))) +
219
+ geom_point() +
220
+ scale_color_gradient(low = "grey70", high = "steelblue4",
221
+ name = "-log10(p)") +
222
+ scale_size_continuous(range = c(3, 7), name = "Gene\ncount") +
223
+ theme_prism(base_size = 11) +
224
+ theme(
225
+ axis.text.y = element_text(size = 9),
226
+ plot.title = element_text(hjust = 0.5, face = "bold", size = 13)
227
+ ) +
228
+ labs(x = "Gene Ratio", y = NULL,
229
+ title = "GO Biological Process (Panel Genes)")
230
+
231
+ .save_enrichment_plot(p_ora, file.path(output_dir, "ora_go_dotplot"), 9, 6)
232
+ }
233
+ }, error = function(e) cat(" GO plot failed:", conditionMessage(e), "\n"))
234
+ }
235
+
236
+ cat(" Pathway enrichment complete.\n")
237
+ return(results)
238
+ }
239
+
240
+
241
+ # ============================================================
242
+ # 2. Cell-Type Expression (CZI CELLxGENE Census)
243
+ # ============================================================
244
+
245
+ .run_celltype_enrichment <- function(panel_genes, output_dir = "results",
246
+ disease = "ibd") {
247
+ cat("\n--- Cell-Type Expression Analysis (CZI CELLxGENE) ---\n\n")
248
+
249
+ csv_path <- file.path(output_dir, "celltype_expression.csv")
250
+
251
+ # Disease-specific tissue for CZI Census query
252
+ tissue <- switch(disease,
253
+ ibd = "large intestine",
254
+ bladder_cancer = "bladder organ",
255
+ breast_cancer = "breast",
256
+ sepsis = "blood",
257
+ "large intestine" # default
258
+ )
259
+
260
+ # Filter to protein-coding genes with standard symbols
261
+ query_genes <- panel_genes[!grepl("^IGH|^IGK|^IGL|^LOC|^TMB", panel_genes)]
262
+ cat(" Querying", length(query_genes), "genes in '", tissue, "':",
263
+ paste(query_genes, collapse = ", "), "\n")
264
+
265
+ # Try Python CELLxGENE Census query
266
+ py_script <- file.path("scripts", "query_cellxgene.py")
267
+
268
+ celltype_data <- NULL
269
+
270
+ if (file.exists(py_script)) {
271
+ gene_args <- paste(query_genes, collapse = " ")
272
+ cmd <- sprintf("python3 '%s' %s --output '%s' --tissue '%s' 2>&1",
273
+ py_script, gene_args, csv_path, tissue)
274
+ cat(" Running CZI Census query (may take 1-3 minutes)...\n")
275
+
276
+ result <- tryCatch({
277
+ system(cmd, intern = TRUE, timeout = 300)
278
+ }, error = function(e) {
279
+ cat(" Census query failed:", conditionMessage(e), "\n")
280
+ NULL
281
+ })
282
+
283
+ if (!is.null(result)) cat(paste(" ", result, collapse = "\n"), "\n")
284
+
285
+ if (file.exists(csv_path)) {
286
+ celltype_data <- read.csv(csv_path, stringsAsFactors = FALSE)
287
+ if (nrow(celltype_data) > 0) {
288
+ cat(" Census data loaded:", nrow(celltype_data), "rows,",
289
+ length(unique(celltype_data$cell_type)), "cell types\n")
290
+ }
291
+ }
292
+ }
293
+
294
+ # Fallback: curated literature-based annotations
295
+ if (is.null(celltype_data) || nrow(celltype_data) == 0) {
296
+ if (disease == "bladder_cancer") {
297
+ cat(" Using curated cell-type annotations from published bladder cancer atlases...\n")
298
+ celltype_data <- .bladder_cancer_celltype_annotations(panel_genes)
299
+ } else if (disease == "breast_cancer") {
300
+ cat(" Using curated cell-type annotations from published breast cancer atlases...\n")
301
+ celltype_data <- .breast_cancer_celltype_annotations(panel_genes)
302
+ } else if (disease == "sepsis") {
303
+ cat(" Using curated cell-type annotations from published sepsis blood atlases...\n")
304
+ celltype_data <- .sepsis_celltype_annotations(panel_genes)
305
+ } else {
306
+ cat(" Using curated cell-type annotations from published UC single-cell atlases...\n")
307
+ celltype_data <- .curated_celltype_annotations()
308
+ }
309
+ }
310
+
311
+ # Generate cell-type heatmap
312
+ tissue_label <- switch(disease,
313
+ ibd = "Large Intestine",
314
+ bladder_cancer = "Bladder Tumor",
315
+ breast_cancer = "Breast Tumor",
316
+ sepsis = "Blood",
317
+ "Tissue"
318
+ )
319
+ if (!is.null(celltype_data) && nrow(celltype_data) > 0) {
320
+ .plot_celltype_expression(celltype_data, output_dir, tissue_label = tissue_label)
321
+ }
322
+
323
+ cat(" Cell-type analysis complete.\n")
324
+ return(celltype_data)
325
+ }
326
+
327
+
328
+ .curated_celltype_annotations <- function() {
329
+ # Curated from published UC single-cell atlases:
330
+ # - Smillie et al. 2019 (Cell) - Human UC colon atlas
331
+ # - Parikh et al. 2019 (Nature) - Intestinal organoids
332
+ # - Kinchen et al. 2018 (Cell) - Colonic mesenchyme
333
+ # - Corridoni et al. 2020 (Nat Med) - UC immune cells
334
+ data.frame(
335
+ gene = c(
336
+ # S100A8 - calprotectin subunit, innate immunity
337
+ "S100A8", "S100A8", "S100A8",
338
+ # SELENBP1 - selenium binding, colonocyte marker
339
+ "SELENBP1", "SELENBP1",
340
+ # GUCA2B - uroguanylin, epithelial secretory
341
+ "GUCA2B", "GUCA2B",
342
+ # RGS13 - regulator of G-protein signaling
343
+ "RGS13", "RGS13",
344
+ # PTP4A3 - phosphatase, cell proliferation
345
+ "PTP4A3", "PTP4A3",
346
+ # CKB - creatine kinase brain-type
347
+ "CKB", "CKB",
348
+ # UCA1 - lncRNA, epithelial
349
+ "UCA1", "UCA1",
350
+ # GLDN - gliomedin, neural/enteric
351
+ "GLDN"
352
+ ),
353
+ cell_type = c(
354
+ "Neutrophils", "Inflammatory monocytes", "Macrophages",
355
+ "Absorptive colonocytes", "Transit-amplifying cells",
356
+ "Goblet cells", "Absorptive colonocytes",
357
+ "Germinal center B cells", "Mast cells",
358
+ "Endothelial cells", "Epithelial progenitors",
359
+ "Absorptive colonocytes", "Smooth muscle cells",
360
+ "Absorptive colonocytes", "Transit-amplifying cells",
361
+ "Enteric neurons"
362
+ ),
363
+ compartment = c(
364
+ "Immune", "Immune", "Immune",
365
+ "Epithelial", "Epithelial",
366
+ "Epithelial", "Epithelial",
367
+ "Immune", "Immune",
368
+ "Stromal", "Epithelial",
369
+ "Epithelial", "Stromal",
370
+ "Epithelial", "Epithelial",
371
+ "Stromal"
372
+ ),
373
+ uc_change = c(
374
+ "Strong upregulation", "Strong upregulation", "Upregulation",
375
+ "Downregulation", "Reduced",
376
+ "Strong downregulation", "Downregulation",
377
+ "Upregulation", "Upregulation",
378
+ "Variable", "Variable",
379
+ "Downregulation", "No change",
380
+ "Upregulation", "Upregulation",
381
+ "Not characterized"
382
+ ),
383
+ evidence = c(
384
+ "Smillie 2019, Corridoni 2020", "Corridoni 2020", "Smillie 2019",
385
+ "Smillie 2019, Parikh 2019", "Smillie 2019",
386
+ "Parikh 2019", "Smillie 2019",
387
+ "Smillie 2019", "Kinchen 2018",
388
+ "Kinchen 2018", "Smillie 2019",
389
+ "Smillie 2019", "Kinchen 2018",
390
+ "Smillie 2019", "Smillie 2019",
391
+ "Limited data"
392
+ ),
393
+ source = "curated",
394
+ stringsAsFactors = FALSE
395
+ )
396
+ }
397
+
398
+
399
+ #' Curated bladder cancer TME cell-type annotations
400
+ #' Sources: Chen et al. 2020 (Nat Commun), Lai et al. 2021 (Int J Cancer)
401
+ .bladder_cancer_celltype_annotations <- function(panel_genes) {
402
+ # Generic TME cell types for bladder cancer - annotated per gene if known
403
+ # Since panel genes are data-driven, provide general bladder TME context
404
+ known_annotations <- list(
405
+ "CD8A" = list(ct = c("CD8+ T cells", "NK cells"),
406
+ comp = c("Immune", "Immune"),
407
+ change = c("Enriched in inflamed tumors", "Variable")),
408
+ "CD8B" = list(ct = c("CD8+ T cells"),
409
+ comp = c("Immune"),
410
+ change = c("Enriched in inflamed tumors")),
411
+ "CXCL9" = list(ct = c("Macrophages", "Dendritic cells"),
412
+ comp = c("Immune", "Immune"),
413
+ change = c("Upregulated in inflamed TME", "Upregulated")),
414
+ "CXCL10" = list(ct = c("Macrophages", "Endothelial cells"),
415
+ comp = c("Immune", "Stromal"),
416
+ change = c("Upregulated in inflamed TME", "Variable")),
417
+ "IFNG" = list(ct = c("CD8+ T cells", "NK cells"),
418
+ comp = c("Immune", "Immune"),
419
+ change = c("Enriched in responders", "Enriched in responders")),
420
+ "GZMA" = list(ct = c("CD8+ T cells", "NK cells"),
421
+ comp = c("Immune", "Immune"),
422
+ change = c("Cytolytic marker", "Cytolytic marker")),
423
+ "GZMB" = list(ct = c("CD8+ T cells", "NK cells"),
424
+ comp = c("Immune", "Immune"),
425
+ change = c("Cytolytic marker", "Cytolytic marker")),
426
+ "PRF1" = list(ct = c("CD8+ T cells", "NK cells"),
427
+ comp = c("Immune", "Immune"),
428
+ change = c("Cytolytic marker", "Cytolytic marker")),
429
+ "IDO1" = list(ct = c("Macrophages", "Dendritic cells"),
430
+ comp = c("Immune", "Immune"),
431
+ change = c("Upregulated in immune-active tumors", "Upregulated")),
432
+ "FOXP3" = list(ct = c("Regulatory T cells"),
433
+ comp = c("Immune"),
434
+ change = c("Enriched in excluded/inflamed")),
435
+ "TGFB1" = list(ct = c("Cancer-associated fibroblasts", "Macrophages"),
436
+ comp = c("Stromal", "Immune"),
437
+ change = c("Promotes T cell exclusion", "Variable")),
438
+ "S100A8" = list(ct = c("Neutrophils", "Inflammatory monocytes"),
439
+ comp = c("Immune", "Immune"),
440
+ change = c("Myeloid inflammation marker", "Upregulated")),
441
+ "S100A9" = list(ct = c("Neutrophils", "Inflammatory monocytes"),
442
+ comp = c("Immune", "Immune"),
443
+ change = c("Myeloid inflammation marker", "Upregulated")),
444
+ "TOP2A" = list(ct = c("Proliferating tumor cells", "Proliferating T cells"),
445
+ comp = c("Epithelial", "Immune"),
446
+ change = c("High in basal subtype", "Proliferation marker")),
447
+ "ADAM12" = list(ct = c("Cancer-associated fibroblasts", "Myofibroblasts"),
448
+ comp = c("Stromal", "Stromal"),
449
+ change = c("Stromal remodeling", "Stromal remodeling"))
450
+ )
451
+
452
+ rows <- list()
453
+ for (gene in panel_genes) {
454
+ if (gene %in% names(known_annotations)) {
455
+ ann <- known_annotations[[gene]]
456
+ for (i in seq_along(ann$ct)) {
457
+ rows[[length(rows) + 1]] <- data.frame(
458
+ gene = gene, cell_type = ann$ct[i],
459
+ compartment = ann$comp[i], disease_change = ann$change[i],
460
+ evidence = "Chen 2020, Lai 2021", source = "curated",
461
+ stringsAsFactors = FALSE)
462
+ }
463
+ } else {
464
+ # Generic annotation for unknown genes
465
+ rows[[length(rows) + 1]] <- data.frame(
466
+ gene = gene, cell_type = "Not characterized in bladder TME",
467
+ compartment = "Unknown", disease_change = "Not characterized",
468
+ evidence = "Limited data", source = "curated",
469
+ stringsAsFactors = FALSE)
470
+ }
471
+ }
472
+ do.call(rbind, rows)
473
+ }
474
+
475
+
476
+ #' Curated breast cancer TME cell-type annotations
477
+ #' Sources: Wu et al. 2021 (Nat Genet), Bassez et al. 2021 (Nat Med),
478
+ #' Pal et al. 2021 (EMBO J)
479
+ .breast_cancer_celltype_annotations <- function(panel_genes) {
480
+ known_annotations <- list(
481
+ "ZIC1" = list(ct = c("Epithelial tumor cells"),
482
+ comp = c("Epithelial"),
483
+ change = c("Methylation marker in breast cancer")),
484
+ "TPSAB1" = list(ct = c("Mast cells"),
485
+ comp = c("Immune"),
486
+ change = c("Stromal immune infiltrate; variable with chemo")),
487
+ "CHAF1B" = list(ct = c("Proliferating tumor cells", "Proliferating T cells"),
488
+ comp = c("Epithelial", "Immune"),
489
+ change = c("Proliferation marker", "Proliferation marker")),
490
+ "RRAGD" = list(ct = c("Epithelial tumor cells"),
491
+ comp = c("Epithelial"),
492
+ change = c("mTOR pathway; metabolically active cells")),
493
+ "TTLL4" = list(ct = c("Epithelial tumor cells"),
494
+ comp = c("Epithelial"),
495
+ change = c("Tubulin modification; taxane sensitivity marker")),
496
+ "RARRES1" = list(ct = c("Epithelial tumor cells", "Luminal progenitors"),
497
+ comp = c("Epithelial", "Epithelial"),
498
+ change = c("Tumor suppressor; retinoic acid pathway", "Differentiation")),
499
+ "ESR1" = list(ct = c("Luminal epithelial cells", "ER+ tumor cells"),
500
+ comp = c("Epithelial", "Epithelial"),
501
+ change = c("Estrogen receptor; luminal marker", "ER+ tumors: lower pCR")),
502
+ "S100B" = list(ct = c("Basal-like tumor cells", "Neural cells"),
503
+ comp = c("Epithelial", "Stromal"),
504
+ change = c("Basal subtype marker", "TME neural component")),
505
+ "NMU" = list(ct = c("Epithelial tumor cells"),
506
+ comp = c("Epithelial"),
507
+ change = c("Neuropeptide; proliferation signal")),
508
+ "NFIB" = list(ct = c("Basal-like tumor cells", "Cancer stem cells"),
509
+ comp = c("Epithelial", "Epithelial"),
510
+ change = c("TNBC-associated TF", "Stemness marker")),
511
+ "TM4SF1" = list(ct = c("Epithelial tumor cells", "Endothelial cells"),
512
+ comp = c("Epithelial", "Stromal"),
513
+ change = c("Tumor cell surface marker", "Angiogenesis")),
514
+ "GREB1" = list(ct = c("ER+ tumor cells"),
515
+ comp = c("Epithelial"),
516
+ change = c("Estrogen-responsive; ER+ subtype marker")),
517
+ "CD8A" = list(ct = c("CD8+ T cells"),
518
+ comp = c("Immune"),
519
+ change = c("Tumor-infiltrating lymphocytes; predicts pCR")),
520
+ "GZMA" = list(ct = c("CD8+ T cells", "NK cells"),
521
+ comp = c("Immune", "Immune"),
522
+ change = c("Cytolytic activity", "Cytolytic activity")),
523
+ "GZMB" = list(ct = c("CD8+ T cells", "NK cells"),
524
+ comp = c("Immune", "Immune"),
525
+ change = c("Cytolytic activity", "Cytolytic activity")),
526
+ "MKI67" = list(ct = c("Proliferating tumor cells", "Proliferating immune cells"),
527
+ comp = c("Epithelial", "Immune"),
528
+ change = c("Ki-67; standard proliferation marker", "Active immune response"))
529
+ )
530
+
531
+ rows <- list()
532
+ for (gene in panel_genes) {
533
+ if (gene %in% names(known_annotations)) {
534
+ ann <- known_annotations[[gene]]
535
+ for (i in seq_along(ann$ct)) {
536
+ rows[[length(rows) + 1]] <- data.frame(
537
+ gene = gene, cell_type = ann$ct[i],
538
+ compartment = ann$comp[i], disease_change = ann$change[i],
539
+ evidence = "Wu 2021, Bassez 2021", source = "curated",
540
+ stringsAsFactors = FALSE)
541
+ }
542
+ } else {
543
+ rows[[length(rows) + 1]] <- data.frame(
544
+ gene = gene, cell_type = "Not characterized in breast TME",
545
+ compartment = "Unknown", disease_change = "Not characterized",
546
+ evidence = "Limited data", source = "curated",
547
+ stringsAsFactors = FALSE)
548
+ }
549
+ }
550
+ do.call(rbind, rows)
551
+ }
552
+
553
+
554
+ #' Curated sepsis blood cell-type annotations
555
+ #' Sources: Reyes et al. 2020 (Nat Med), Scicluna et al. 2017 (Lancet Respir Med),
556
+ #' Kwok et al. 2023 (Nat Commun), Sweeney et al. 2018 (Sci Transl Med)
557
+ .sepsis_celltype_annotations <- function(panel_genes) {
558
+ known_annotations <- list(
559
+ "IFIT1B" = list(ct = c("Monocytes", "Dendritic cells"),
560
+ comp = c("Immune", "Immune"),
561
+ change = c("Downregulated in Mars1 immunosuppression", "Reduced interferon response")),
562
+ "ACSL6" = list(ct = c("Monocytes/Macrophages", "Neutrophils"),
563
+ comp = c("Immune", "Immune"),
564
+ change = c("Metabolic reprogramming in sepsis", "Altered lipid metabolism")),
565
+ "CTNNAL1" = list(ct = c("Monocytes", "Neutrophils"),
566
+ comp = c("Immune", "Immune"),
567
+ change = c("Altered leukocyte adhesion", "Impaired migration")),
568
+ "ABCC13" = list(ct = c("Neutrophils"),
569
+ comp = c("Immune"),
570
+ change = c("Pseudogene; potential regulatory role")),
571
+ "C9orf78" = list(ct = c("Monocytes", "T cells"),
572
+ comp = c("Immune", "Immune"),
573
+ change = c("Apoptosis regulation", "Lymphocyte survival")),
574
+ "KCNH2" = list(ct = c("Multiple cell types"),
575
+ comp = c("Immune"),
576
+ change = c("Electrolyte regulation in critical illness")),
577
+ "FAM104A" = list(ct = c("Multiple cell types"),
578
+ comp = c("Immune"),
579
+ change = c("Function in immune cells unclear")),
580
+ "POC1B" = list(ct = c("Proliferating immune cells"),
581
+ comp = c("Immune"),
582
+ change = c("Cell division in activated immune cells")),
583
+ "BPGM" = list(ct = c("Erythrocytes", "Neutrophils"),
584
+ comp = c("Erythroid", "Immune"),
585
+ change = c("2,3-BPG regulation; oxygen delivery", "Variable")),
586
+ "ZDHHC2" = list(ct = c("T cells", "Monocytes"),
587
+ comp = c("Immune", "Immune"),
588
+ change = c("Protein palmitoylation in signaling", "Immune signaling regulation")),
589
+ "FGFR1OP2" = list(ct = c("Multiple cell types"),
590
+ comp = c("Immune"),
591
+ change = c("Centrosome/cytoskeletal organization")),
592
+ "SAMHD1" = list(ct = c("Monocytes/Macrophages", "Dendritic cells"),
593
+ comp = c("Immune", "Immune"),
594
+ change = c("Interferon-stimulated gene; innate defense", "Antiviral response")),
595
+ "DARC" = list(ct = c("Erythrocytes", "Endothelial cells"),
596
+ comp = c("Erythroid", "Stromal"),
597
+ change = c("Chemokine scavenger; regulates neutrophil trafficking", "Chemokine sequestration")),
598
+ "ACKR1" = list(ct = c("Erythrocytes", "Endothelial cells"),
599
+ comp = c("Erythroid", "Stromal"),
600
+ change = c("Chemokine scavenger; regulates neutrophil trafficking", "Chemokine sequestration")),
601
+ "DDX17" = list(ct = c("Monocytes", "Dendritic cells"),
602
+ comp = c("Immune", "Immune"),
603
+ change = c("Innate immune signaling", "RNA processing in immune activation")),
604
+ "DAAM2" = list(ct = c("T cells", "Monocytes"),
605
+ comp = c("Immune", "Immune"),
606
+ change = c("Wnt signaling; cytoskeletal regulation", "Cell migration")),
607
+ "TUBG2" = list(ct = c("Proliferating immune cells"),
608
+ comp = c("Immune"),
609
+ change = c("Centrosome function; cell division")),
610
+ "IL8" = list(ct = c("Neutrophils", "Monocytes/Macrophages"),
611
+ comp = c("Immune", "Immune"),
612
+ change = c("Major neutrophil chemoattractant; strongly upregulated", "Key inflammatory mediator")),
613
+ "CXCL8" = list(ct = c("Neutrophils", "Monocytes/Macrophages"),
614
+ comp = c("Immune", "Immune"),
615
+ change = c("Major neutrophil chemoattractant; strongly upregulated", "Key inflammatory mediator")),
616
+ "EGR1" = list(ct = c("Monocytes/Macrophages", "Endothelial cells"),
617
+ comp = c("Immune", "Stromal"),
618
+ change = c("Stress-responsive TF; monocyte activation", "Endothelial activation")),
619
+ "TNFSF12" = list(ct = c("Monocytes/Macrophages", "T cells"),
620
+ comp = c("Immune", "Immune"),
621
+ change = c("TWEAK; immune regulation", "T cell modulation")),
622
+ "TNFRSF8" = list(ct = c("T cells", "B cells"),
623
+ comp = c("Immune", "Immune"),
624
+ change = c("CD30; T-cell activation marker", "Activation marker")),
625
+ "CTSO" = list(ct = c("Monocytes/Macrophages", "Dendritic cells"),
626
+ comp = c("Immune", "Immune"),
627
+ change = c("Lysosomal protease; antigen processing", "Antigen presentation")),
628
+ "TNKS" = list(ct = c("Multiple cell types"),
629
+ comp = c("Immune"),
630
+ change = c("Wnt signaling; telomere maintenance")),
631
+ "PTPLA" = list(ct = c("Monocytes", "Neutrophils"),
632
+ comp = c("Immune", "Immune"),
633
+ change = c("Fatty acid elongation", "Lipid metabolism")),
634
+ "HACD1" = list(ct = c("Monocytes", "Neutrophils"),
635
+ comp = c("Immune", "Immune"),
636
+ change = c("Fatty acid elongation", "Lipid metabolism")),
637
+ "DEFA4" = list(ct = c("Neutrophils"),
638
+ comp = c("Immune"),
639
+ change = c("Antimicrobial peptide; strongly upregulated in sepsis")),
640
+ "S100A12" = list(ct = c("Neutrophils", "Inflammatory monocytes"),
641
+ comp = c("Immune", "Immune"),
642
+ change = c("Alarmin; neutrophil activation marker", "Strong upregulation")),
643
+ "S100A8" = list(ct = c("Neutrophils", "Inflammatory monocytes"),
644
+ comp = c("Immune", "Immune"),
645
+ change = c("Calprotectin subunit; innate immunity", "Strong upregulation")),
646
+ "S100A9" = list(ct = c("Neutrophils", "Inflammatory monocytes"),
647
+ comp = c("Immune", "Immune"),
648
+ change = c("Calprotectin subunit; innate immunity", "Strong upregulation")),
649
+ "TNF" = list(ct = c("Monocytes/Macrophages", "NK cells"),
650
+ comp = c("Immune", "Immune"),
651
+ change = c("Master pro-inflammatory cytokine", "Early sepsis response")),
652
+ "IL1B" = list(ct = c("Monocytes/Macrophages", "Neutrophils"),
653
+ comp = c("Immune", "Immune"),
654
+ change = c("Inflammasome-derived cytokine", "Pyroptosis pathway")),
655
+ "IL6" = list(ct = c("Monocytes/Macrophages", "Endothelial cells"),
656
+ comp = c("Immune", "Stromal"),
657
+ change = c("Key sepsis cytokine; drives CRP", "Endothelial activation")),
658
+ "IL10" = list(ct = c("Monocytes/Macrophages", "Regulatory T cells"),
659
+ comp = c("Immune", "Immune"),
660
+ change = c("Anti-inflammatory; immunosuppression marker", "Immune regulation")),
661
+ "IFNG" = list(ct = c("NK cells", "T cells"),
662
+ comp = c("Immune", "Immune"),
663
+ change = c("Macrophage activation", "Adaptive immune response")),
664
+ "CD14" = list(ct = c("Monocytes/Macrophages"),
665
+ comp = c("Immune"),
666
+ change = c("LPS co-receptor; monocyte marker; soluble CD14 is sepsis biomarker")),
667
+ "HLA-DRA" = list(ct = c("Monocytes/Macrophages", "Dendritic cells", "B cells"),
668
+ comp = c("Immune", "Immune", "Immune"),
669
+ change = c("Reduced in immunoparalysis", "Reduced antigen presentation", "Variable")),
670
+ "CX3CR1" = list(ct = c("Monocytes", "NK cells"),
671
+ comp = c("Immune", "Immune"),
672
+ change = c("Non-classical monocyte marker; reduced in sepsis", "Tissue surveillance")),
673
+ "CCR7" = list(ct = c("Dendritic cells", "T cells"),
674
+ comp = c("Immune", "Immune"),
675
+ change = c("Lymph node homing; reduced in sepsis", "Naive/central memory marker"))
676
+ )
677
+
678
+ rows <- list()
679
+ for (gene in panel_genes) {
680
+ if (gene %in% names(known_annotations)) {
681
+ ann <- known_annotations[[gene]]
682
+ for (i in seq_along(ann$ct)) {
683
+ rows[[length(rows) + 1]] <- data.frame(
684
+ gene = gene, cell_type = ann$ct[i],
685
+ compartment = ann$comp[i], disease_change = ann$change[i],
686
+ evidence = "Reyes 2020, Scicluna 2017", source = "curated",
687
+ stringsAsFactors = FALSE)
688
+ }
689
+ } else {
690
+ # Generic annotation for unknown genes
691
+ rows[[length(rows) + 1]] <- data.frame(
692
+ gene = gene, cell_type = "Not characterized in sepsis blood",
693
+ compartment = "Unknown", disease_change = "Not characterized",
694
+ evidence = "Limited data", source = "curated",
695
+ stringsAsFactors = FALSE)
696
+ }
697
+ }
698
+ do.call(rbind, rows)
699
+ }
700
+
701
+
702
+ .plot_celltype_expression <- function(celltype_data, output_dir, tissue_label = "Tissue") {
703
+ suppressPackageStartupMessages({
704
+ library(ggplot2)
705
+ library(ggprism)
706
+ })
707
+
708
+ if ("source" %in% names(celltype_data) && all(celltype_data$source == "curated")) {
709
+ # Plot curated data as a tile plot
710
+ plot_data <- celltype_data
711
+
712
+ # Detect change column (uc_change for legacy IBD, disease_change for new)
713
+ change_col <- if ("disease_change" %in% names(plot_data)) "disease_change" else "uc_change"
714
+
715
+ # Map change to numeric for color scale
716
+ change_map <- c("Strong upregulation" = 2, "Upregulation" = 1,
717
+ "Enriched in inflamed tumors" = 1.5, "Enriched in responders" = 1.5,
718
+ "Upregulated in inflamed TME" = 1.5, "Upregulated" = 1,
719
+ "Cytolytic marker" = 1, "Cytolytic activity" = 1,
720
+ "Myeloid inflammation marker" = 1,
721
+ "Promotes T cell exclusion" = -1, "Stromal remodeling" = 0,
722
+ "Proliferation marker" = 0.5, "High in basal subtype" = 0.5,
723
+ "Proliferation" = 0.5, "Active immune response" = 1,
724
+ "Tumor-infiltrating lymphocytes; predicts pCR" = 1.5,
725
+ "Methylation marker in breast cancer" = 0,
726
+ "Stromal immune infiltrate; variable with chemo" = 0,
727
+ "mTOR pathway; metabolically active cells" = 0.5,
728
+ "Tubulin modification; taxane sensitivity marker" = 0.5,
729
+ "Tumor suppressor; retinoic acid pathway" = 0,
730
+ "Differentiation" = 0, "Stemness marker" = 0.5,
731
+ "Estrogen receptor; luminal marker" = 0,
732
+ "ER+ tumors: lower pCR" = -1,
733
+ "Basal subtype marker" = 0.5,
734
+ "TME neural component" = 0,
735
+ "Neuropeptide; proliferation signal" = 0.5,
736
+ "TNBC-associated TF" = 0.5,
737
+ "Tumor cell surface marker" = 0, "Angiogenesis" = 0,
738
+ "Estrogen-responsive; ER+ subtype marker" = -0.5,
739
+ "Ki-67; standard proliferation marker" = 1,
740
+ "Variable" = 0, "No change" = 0, "Not characterized" = NA,
741
+ "Not characterized in bladder TME" = NA,
742
+ "Not characterized in breast TME" = NA,
743
+ "Not characterized in sepsis blood" = NA,
744
+ "Downregulated in Mars1 immunosuppression" = -1.5,
745
+ "Reduced interferon response" = -1,
746
+ "Metabolic reprogramming in sepsis" = 0.5,
747
+ "Altered lipid metabolism" = 0,
748
+ "Altered leukocyte adhesion" = 0.5,
749
+ "Impaired migration" = -0.5,
750
+ "Pseudogene; potential regulatory role" = 0,
751
+ "Apoptosis regulation" = 0,
752
+ "Lymphocyte survival" = 0,
753
+ "Electrolyte regulation in critical illness" = 0,
754
+ "Function in immune cells unclear" = NA,
755
+ "Cell division in activated immune cells" = 0.5,
756
+ "2,3-BPG regulation; oxygen delivery" = 0.5,
757
+ "Protein palmitoylation in signaling" = 0,
758
+ "Immune signaling regulation" = 0,
759
+ "Centrosome/cytoskeletal organization" = 0,
760
+ "Interferon-stimulated gene; innate defense" = 1,
761
+ "Antiviral response" = 1,
762
+ "Chemokine scavenger; regulates neutrophil trafficking" = 1,
763
+ "Chemokine sequestration" = 0.5,
764
+ "Innate immune signaling" = 1,
765
+ "RNA processing in immune activation" = 0.5,
766
+ "Wnt signaling; cytoskeletal regulation" = 0,
767
+ "Cell migration" = 0,
768
+ "Centrosome function; cell division" = 0.5,
769
+ "Major neutrophil chemoattractant; strongly upregulated" = 2,
770
+ "Key inflammatory mediator" = 1.5,
771
+ "Stress-responsive TF; monocyte activation" = 1,
772
+ "Endothelial activation" = 1,
773
+ "TWEAK; immune regulation" = 0.5,
774
+ "T cell modulation" = 0,
775
+ "CD30; T-cell activation marker" = 1,
776
+ "Activation marker" = 0.5,
777
+ "Lysosomal protease; antigen processing" = 0.5,
778
+ "Antigen presentation" = 0.5,
779
+ "Wnt signaling; telomere maintenance" = 0,
780
+ "Fatty acid elongation" = 0,
781
+ "Lipid metabolism" = 0,
782
+ "Antimicrobial peptide; strongly upregulated in sepsis" = 2,
783
+ "Alarmin; neutrophil activation marker" = 1.5,
784
+ "Calprotectin subunit; innate immunity" = 1.5,
785
+ "Master pro-inflammatory cytokine" = 2,
786
+ "Early sepsis response" = 1.5,
787
+ "Inflammasome-derived cytokine" = 2,
788
+ "Pyroptosis pathway" = 1.5,
789
+ "Key sepsis cytokine; drives CRP" = 2,
790
+ "Anti-inflammatory; immunosuppression marker" = -1,
791
+ "Immune regulation" = 0,
792
+ "Macrophage activation" = 1,
793
+ "Adaptive immune response" = 1,
794
+ "LPS co-receptor; monocyte marker; soluble CD14 is sepsis biomarker" = 1.5,
795
+ "Reduced in immunoparalysis" = -1.5,
796
+ "Reduced antigen presentation" = -1,
797
+ "Non-classical monocyte marker; reduced in sepsis" = -1,
798
+ "Tissue surveillance" = 0,
799
+ "Lymph node homing; reduced in sepsis" = -1,
800
+ "Naive/central memory marker" = 0,
801
+ "Reduced" = -1, "Downregulation" = -1,
802
+ "Strong downregulation" = -2)
803
+ plot_data$change_score <- change_map[plot_data[[change_col]]]
804
+
805
+ legend_name <- paste0(tissue_label, "\nChange")
806
+
807
+ p <- ggplot(plot_data, aes(x = gene, y = cell_type, fill = change_score)) +
808
+ geom_tile(color = "white", linewidth = 0.5) +
809
+ geom_text(aes(label = compartment), size = 2.8, color = "grey30") +
810
+ scale_fill_gradient2(low = "steelblue3", mid = "white", high = "firebrick3",
811
+ midpoint = 0, na.value = "grey90",
812
+ name = legend_name,
813
+ breaks = c(-2, -1, 0, 1, 2),
814
+ labels = c("Strong down", "Down", "No change",
815
+ "Up", "Strong up")) +
816
+ theme_prism(base_size = 11) +
817
+ theme(
818
+ axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "italic"),
819
+ axis.text.y = element_text(size = 9),
820
+ plot.title = element_text(hjust = 0.5, face = "bold", size = 13),
821
+ legend.position = "right"
822
+ ) +
823
+ labs(x = NULL, y = NULL,
824
+ title = paste("Cell-Type Expression of Panel Genes in", tissue_label))
825
+
826
+ .save_enrichment_plot(p, file.path(output_dir, "celltype_expression"), 10, 7)
827
+
828
+ } else if ("mean_expression" %in% names(celltype_data)) {
829
+ # Plot Census data - top cell types per gene
830
+ # Filter to normal tissue for baseline expression
831
+ if ("disease" %in% names(celltype_data)) {
832
+ plot_data <- celltype_data[celltype_data$disease == "normal", ]
833
+ if (nrow(plot_data) == 0) plot_data <- celltype_data
834
+ } else {
835
+ plot_data <- celltype_data
836
+ }
837
+
838
+ # Top 3 cell types per gene by expression
839
+ top_ct <- do.call(rbind, lapply(split(plot_data, plot_data$gene), function(df) {
840
+ head(df[order(-df$mean_expression), ], 5)
841
+ }))
842
+
843
+ if (nrow(top_ct) > 0) {
844
+ p <- ggplot(top_ct, aes(x = reorder(cell_type, mean_expression),
845
+ y = mean_expression,
846
+ fill = gene)) +
847
+ geom_col(position = "dodge") +
848
+ coord_flip() +
849
+ facet_wrap(~ gene, scales = "free", ncol = 2) +
850
+ theme_prism(base_size = 10) +
851
+ theme(
852
+ legend.position = "none",
853
+ strip.text = element_text(face = "italic", size = 10),
854
+ plot.title = element_text(hjust = 0.5, face = "bold", size = 13)
855
+ ) +
856
+ labs(x = NULL, y = "Mean Expression",
857
+ title = paste("Cell-Type Expression in", tissue_label, "(CZI CELLxGENE)"))
858
+
859
+ .save_enrichment_plot(p, file.path(output_dir, "celltype_expression"), 12, 10)
860
+ }
861
+ }
862
+ }
863
+
864
+
865
+ # ============================================================
866
+ # 3. GWAS / Disease Gene Overlap
867
+ # ============================================================
868
+
869
+ .get_gwas_config <- function(disease) {
870
+ if (disease == "breast_cancer") {
871
+ list(
872
+ genes = c(
873
+ # Breast cancer GWAS susceptibility loci (Michailidou 2017, Fachal 2020)
874
+ "BRCA1", "BRCA2", "TP53", "CHEK2", "ATM", "PALB2", "RAD51C",
875
+ "ESR1", "FGFR2", "MAP3K1", "TOX3", "CCND1", "TERT", "CASP8",
876
+ "MYC", "CDKN2A", "PTEN", "PIK3CA", "AKT1", "CDH1",
877
+ "RAD51B", "RAD51D", "NF1", "STK11", "RB1",
878
+ # Chemotherapy response / DNA damage repair genes
879
+ "TOP2A", "TYMS", "ERCC1", "ERCC2", "TUBB3", "RRM1",
880
+ "ABCB1", "GSTP1", "DPYD", "UGT1A1",
881
+ # Immune / tumor microenvironment genes (pCR predictors)
882
+ "CD8A", "CD8B", "GZMA", "GZMB", "PRF1", "IFNG",
883
+ "CXCL9", "CXCL10", "CXCL13", "IDO1", "CD274",
884
+ "FOXP3", "CTLA4", "CD19", "MS4A1", "IGHG1",
885
+ "MKI67", "PCNA", "MCM2", "BIRC5",
886
+ # Breast cancer molecular subtype markers
887
+ "ERBB2", "PGR", "KRT5", "KRT14", "KRT17", "EGFR", "VIM"
888
+ ),
889
+ label = "Breast Cancer GWAS & Chemo Response",
890
+ references = "Michailidou et al. 2017 Nature; Fachal et al. 2020 Nat Genet",
891
+ refs_detail = paste("Breast cancer susceptibility loci (Michailidou et al. 2017",
892
+ "Nature, 65 GWAS loci) plus chemotherapy response genes and",
893
+ "immune/proliferation markers associated with pCR",
894
+ "(Denkert et al. 2018 Lancet Oncol)")
895
+ )
896
+ } else if (disease == "bladder_cancer") {
897
+ list(
898
+ genes = c(
899
+ # Bladder cancer GWAS loci (Figueroa 2023 meta-analysis, 24 loci)
900
+ "PSCA", "FGFR3", "TACC3", "TP63", "MYC", "TERT", "CLPTM1L",
901
+ "NAT2", "UGT1A6", "UGT1A8", "CCNE1", "APOBEC3A", "APOBEC3B",
902
+ "CBX6", "SLC14A1", "GSTM1", "CDKN2A", "MTAP", "PAG1",
903
+ # ICI response / immune checkpoint genes
904
+ "CD274", "PDCD1", "PDCD1LG2", "CTLA4", "LAG3", "HAVCR2",
905
+ "B2M", "JAK1", "JAK2", "STK11", "PTEN", "KEAP1",
906
+ # Immunotherapy response score genes
907
+ "TOP2A", "ADAM12",
908
+ # TME / immune signature genes
909
+ "CD8A", "CD8B", "CXCL9", "CXCL10", "CXCL13", "IFNG",
910
+ "GZMA", "GZMB", "PRF1", "IDO1", "FOXP3", "TGFB1",
911
+ # APOBEC mutagenesis
912
+ "AICDA", "APOBEC3C", "APOBEC3D", "APOBEC3F", "APOBEC3G",
913
+ # DNA damage response
914
+ "ERCC2", "ATM", "RB1", "BRCA2", "FANCA"
915
+ ),
916
+ label = "Bladder Cancer GWAS & ICI Response",
917
+ references = "Figueroa et al. 2023 Eur Urol; Samstein et al. 2019 Nat Genet",
918
+ refs_detail = paste("Bladder cancer susceptibility loci (Figueroa et al. 2023",
919
+ "Eur Urol, 24 GWAS loci) plus immune checkpoint and",
920
+ "immunotherapy response genes (Samstein et al. 2019;",
921
+ "Cristescu et al. 2022)")
922
+ )
923
+ } else if (disease == "sepsis") {
924
+ list(
925
+ genes = c(
926
+ # Sepsis susceptibility GWAS loci (Rautanen et al. 2015 Am J Hum Genet,
927
+ # Scherag et al. 2016 Eur J Hum Genet, Srinivasan et al. 2020 AJRCCM)
928
+ "FER", "PCSK9",
929
+ # Innate immune recognition / TLR pathway
930
+ "TLR1", "TLR4", "TLR5", "MBL2",
931
+ # Pro-inflammatory cytokines
932
+ "TNF", "IL1B", "IL6", "IL10", "IL1R2",
933
+ # Damage-associated / immunomodulatory
934
+ "HMGB1", "MIF", "PAI1",
935
+ # Coagulation / endothelial dysfunction
936
+ "ACE", "ADAMTS13", "PROC", "THBD", "ABO",
937
+ # Adaptive immunity / antigen presentation
938
+ "IFNG", "HLA-DRA", "CD14",
939
+ # Alarmins / myeloid markers
940
+ "S100A12", "S100A8", "S100A9",
941
+ # Chemokine receptors / trafficking
942
+ "CX3CR1", "CCR7",
943
+ # Interferon-stimulated genes
944
+ "IFIT1", "IFIT2", "IFIT3", "MX1", "OAS1"
945
+ ),
946
+ label = "Sepsis GWAS & Immune Response",
947
+ references = "Rautanen et al. 2015 Am J Hum Genet; Scherag et al. 2016 Eur J Hum Genet; Scicluna et al. 2017 Lancet Respir Med",
948
+ refs_detail = paste("Sepsis susceptibility loci (Rautanen et al. 2015 Am J Hum",
949
+ "Genet, FER locus; Scherag et al. 2016 Eur J Hum Genet)",
950
+ "plus key innate/adaptive immune response genes from sepsis",
951
+ "transcriptomic studies (Scicluna et al. 2017 Lancet Respir Med;",
952
+ "Sweeney et al. 2018 Sci Transl Med)")
953
+ )
954
+ } else {
955
+ # IBD (default)
956
+ list(
957
+ genes = c(
958
+ "NOD2", "CARD9", "RIPK2", "XIAP", "NFKB1", "NFKB2", "REL", "RELA",
959
+ "IL23R", "IL12B", "JAK2", "TYK2", "STAT3", "STAT4", "IL21", "IL6ST",
960
+ "IL23A", "RORC", "CCR6", "IL17A", "IL17F", "IL22",
961
+ "ATG16L1", "IRGM", "LRRK2", "ULK1", "SMURF1",
962
+ "HNF4A", "CDH1", "ITLN1", "MUC1", "MUC19", "FUT2", "ECM1",
963
+ "IL10", "IL2RA", "IL7R", "TNFSF15", "TNFAIP3", "TNFRSF6B",
964
+ "BACH2", "PRDM1", "IKZF1", "TAGAP", "IRF5", "IRF1", "IRF4",
965
+ "PTGER4", "MST1", "CXCR2", "CXCL5", "CCL2", "CCL7", "CCL20",
966
+ "ICAM1", "ITGAL", "MADCAM1",
967
+ "FCGR2A", "FCGR2B", "FCGR3A",
968
+ "PTPN2", "PTPN22", "SH2B3", "SMAD3", "SMAD7",
969
+ "ERAP1", "ERAP2", "BTNL2", "SP140",
970
+ "TLR4", "IFIH1", "MDA5",
971
+ "NKX2-3", "FOXO3", "CEBPB", "GATA3", "CREM", "KLF3",
972
+ "DNMT3A", "ARID5B",
973
+ "SLC22A4", "SLC22A5", "SLC39A8", "GPR35", "GPR65", "ORMDL3",
974
+ "S100A8", "S100A9", "S100A12",
975
+ "ZMIZ1", "PUS10", "SBNO2", "RNF186", "OTUD3", "LITAF",
976
+ "USP25", "CYLD", "NDFIP1", "TNFRSF14", "CARD11",
977
+ "FCER1G", "RNF40", "PDLIM5", "CUL2", "RTEL1", "FOS"
978
+ ),
979
+ label = "IBD GWAS",
980
+ references = "de Lange 2017, Liu 2023, Jostins 2012",
981
+ refs_detail = paste("de Lange et al. 2017 Nat Genet; Liu et al. 2023",
982
+ "Nat Genet; Jostins et al. 2012 Nature")
983
+ )
984
+ }
985
+ }
986
+
987
+ .get_gene_annotations <- function(disease) {
988
+ if (disease == "breast_cancer") {
989
+ list(
990
+ # ----- Subtype classification panel genes -----
991
+ "ESR1" = list(
992
+ gwas_evidence = "Direct breast cancer GWAS hit (6q25.1)",
993
+ disease_relevance = "Estrogen receptor alpha; THE defining marker of luminal breast cancer; ER+ in ~70% of cases",
994
+ drug_relevance = "Target of endocrine therapy (tamoxifen, aromatase inhibitors); high expression defines Luminal A",
995
+ references = "TCGA Network 2012 Nature; Parker et al. 2009 JCO"),
996
+ "TBC1D9" = list(
997
+ gwas_evidence = "No direct GWAS hit; strongly co-expressed with ESR1",
998
+ disease_relevance = "TBC1 domain family member 9; Rab GTPase-activating protein; highly expressed in luminal tumors",
999
+ drug_relevance = "Luminal marker; high expression predicts endocrine therapy response",
1000
+ references = "Smid et al. 2006 BMC Genomics"),
1001
+ "CA12" = list(
1002
+ gwas_evidence = "No direct GWAS hit; ER-regulated gene",
1003
+ disease_relevance = "Carbonic anhydrase XII; directly induced by estrogen/ER signaling; luminal marker in PAM50",
1004
+ drug_relevance = "Potential therapeutic target; high expression indicates active ER signaling",
1005
+ references = "Barnett et al. 2008 Cancer Res"),
1006
+ "MLPH" = list(
1007
+ gwas_evidence = "No direct GWAS hit",
1008
+ disease_relevance = "Melanophilin; involved in melanosome transport; highly expressed in luminal breast tumors",
1009
+ drug_relevance = "Luminal A marker gene; associated with favorable prognosis",
1010
+ references = "Parker et al. 2009 JCO"),
1011
+ "NAT1" = list(
1012
+ gwas_evidence = "Breast cancer risk locus (8p22); NAT2 variant is GWAS hit",
1013
+ disease_relevance = "N-acetyltransferase 1; Phase II detoxification enzyme; high expression defines luminal subtype",
1014
+ drug_relevance = "Luminal marker; associated with endocrine therapy sensitivity",
1015
+ references = "Perou et al. 2000 Nature"),
1016
+ "SLC44A4" = list(
1017
+ gwas_evidence = "No direct GWAS hit",
1018
+ disease_relevance = "Choline transporter-like protein 4; highly expressed in luminal breast cancer",
1019
+ drug_relevance = "Strong luminal marker; negative coefficient indicates Basal-like when absent",
1020
+ references = "TCGA Network 2012 Nature"),
1021
+ "FOXA1" = list(
1022
+ gwas_evidence = "Direct breast cancer GWAS hit (14q21.1)",
1023
+ disease_relevance = "Forkhead box A1; pioneer transcription factor for ER; essential for luminal differentiation",
1024
+ drug_relevance = "Required for ER chromatin binding; FOXA1 mutations affect endocrine therapy response",
1025
+ references = "Carroll et al. 2005 Cell; Hurtado et al. 2011 Nat Genet"),
1026
+ "MAPT" = list(
1027
+ gwas_evidence = "GWAS hit for Alzheimer's; indirectly linked to breast cancer prognosis",
1028
+ disease_relevance = "Microtubule-associated protein Tau; expressed in luminal tumors; inversely correlated with basal markers",
1029
+ drug_relevance = "High expression predicts taxane resistance (competes with paclitaxel for tubulin binding)",
1030
+ references = "Rouzier et al. 2005 PNAS"),
1031
+ "AGR2" = list(
1032
+ gwas_evidence = "No direct GWAS hit",
1033
+ disease_relevance = "Anterior gradient 2; ER-regulated protein disulfide isomerase; luminal A marker",
1034
+ drug_relevance = "High expression in ER+ luminal tumors; associated with favorable prognosis",
1035
+ references = "Innes et al. 2006 BJC"),
1036
+ "CDC20" = list(
1037
+ gwas_evidence = "No direct GWAS hit",
1038
+ disease_relevance = "Cell division cycle 20; mitotic checkpoint protein; proliferation marker enriched in basal-like tumors",
1039
+ drug_relevance = "High proliferation marker; positive coefficient indicates Basal-like subtype",
1040
+ references = "TCGA Network 2012 Nature"),
1041
+ "CIRBP" = list(
1042
+ gwas_evidence = "No direct GWAS hit",
1043
+ disease_relevance = "Cold-inducible RNA binding protein; stress-responsive gene; differentially expressed between subtypes",
1044
+ drug_relevance = "Luminal marker; involved in mRNA stability regulation",
1045
+ references = "Perou et al. 2000 Nature"),
1046
+ "XBP1" = list(
1047
+ gwas_evidence = "No direct GWAS hit; near breast cancer GWAS locus",
1048
+ disease_relevance = "X-box binding protein 1; UPR transcription factor; strongly expressed in luminal breast cancer",
1049
+ drug_relevance = "XBP1 splicing drives endocrine resistance; high expression indicates luminal differentiation",
1050
+ references = "Chen et al. 2014 Nature"),
1051
+ "KIF2C" = list(
1052
+ gwas_evidence = "No direct GWAS hit",
1053
+ disease_relevance = "Kinesin family member 2C (MCAK); mitotic kinesin; proliferation marker in breast cancer",
1054
+ drug_relevance = "Cell cycle gene enriched in basal-like tumors; potential therapeutic target",
1055
+ references = "Parker et al. 2009 JCO"),
1056
+ "SLC39A6" = list(
1057
+ gwas_evidence = "No direct GWAS hit",
1058
+ disease_relevance = "Zinc transporter LIV-1; estrogen-induced gene; luminal breast cancer marker",
1059
+ drug_relevance = "Target of ladiratuzumab vedotin (ADC); high expression in luminal tumors",
1060
+ references = "Taylor et al. 2007 Cancer Res"),
1061
+ "SCUBE2" = list(
1062
+ gwas_evidence = "No direct GWAS hit",
1063
+ disease_relevance = "Signal peptide CUB EGF-like domain 2; luminal marker; inhibits cancer progression",
1064
+ drug_relevance = "High expression indicates favorable prognosis; part of Oncotype DX gene list",
1065
+ references = "Cheng et al. 2009 JCO"),
1066
+ "DNAJC12" = list(
1067
+ gwas_evidence = "No direct GWAS hit",
1068
+ disease_relevance = "DnaJ heat shock protein family member C12; ER-regulated chaperone in luminal tumors",
1069
+ drug_relevance = "High expression predicts tamoxifen sensitivity",
1070
+ references = "de Ronde et al. 2013 Mol Oncol"),
1071
+ "TPX2" = list(
1072
+ gwas_evidence = "No direct GWAS hit",
1073
+ disease_relevance = "Targeting protein for Xklp2; Aurora A kinase activator; proliferation marker in breast cancer",
1074
+ drug_relevance = "Enriched in basal-like tumors; Aurora kinase inhibitors in clinical trials",
1075
+ references = "TCGA Network 2012 Nature"),
1076
+ # ----- pCR-specific genes (retained for backward compatibility) -----
1077
+ "ZIC1" = list(
1078
+ gwas_evidence = "No direct breast cancer GWAS hit",
1079
+ disease_relevance = "Zinc finger transcription factor; frequently methylated in breast cancer",
1080
+ drug_relevance = "Positive coefficient: higher expression predicts pCR",
1081
+ references = "Gan et al. 2011 Breast Cancer Res Treat"),
1082
+ "TPSAB1" = list(
1083
+ gwas_evidence = "No direct GWAS hit",
1084
+ disease_relevance = "Tryptase alpha/beta 1; mast cell marker in tumor microenvironment",
1085
+ drug_relevance = "Negative coefficient: higher mast cell infiltration predicts residual disease",
1086
+ references = "Aponte-López et al. 2018 Immunol Lett"),
1087
+ "CHAF1B" = list(
1088
+ gwas_evidence = "No direct GWAS hit",
1089
+ disease_relevance = "Chromatin assembly factor 1 subunit B; proliferation marker",
1090
+ drug_relevance = "Higher proliferation predicts pCR to chemotherapy",
1091
+ references = "Peng et al. 2019 BMC Cancer")
1092
+ )
1093
+ } else if (disease == "bladder_cancer") {
1094
+ # Bladder cancer / ICI response gene annotations
1095
+ # These are populated dynamically — only genes that appear in the panel get used
1096
+ list(
1097
+ "TMB_log2" = list(
1098
+ gwas_evidence = "Genomic feature (not a gene); TMB is a validated pan-cancer ICI biomarker",
1099
+ disease_relevance = "TMB >= 10 mut/Mb predicts ICI response across cancer types",
1100
+ drug_relevance = "FDA-approved TMB-high biomarker for pembrolizumab (pan-tumor)",
1101
+ references = "Samstein et al. 2019 Nat Genet"),
1102
+ "CD274" = list(
1103
+ gwas_evidence = "PD-L1 gene; target of atezolizumab",
1104
+ disease_relevance = "PD-L1 expression on immune cells (IC2+) enriches response to ~27%",
1105
+ drug_relevance = "Direct drug target; SP142 IHC companion diagnostic",
1106
+ references = "Mariathasan et al. 2018 Nature"),
1107
+ "PDCD1" = list(
1108
+ gwas_evidence = "PD-1 gene; immune checkpoint receptor",
1109
+ disease_relevance = "Component of IRS; T cell exhaustion marker in bladder TME",
1110
+ drug_relevance = "Target of nivolumab/pembrolizumab; high expression indicates active immune checkpoint",
1111
+ references = "Cristescu et al. 2022"),
1112
+ "CD8A" = list(
1113
+ gwas_evidence = "No direct GWAS hit; key ICI response gene",
1114
+ disease_relevance = "CD8+ T cell infiltration is strongest predictor of ICI response",
1115
+ drug_relevance = "Inflamed phenotype marker; associated with atezolizumab benefit",
1116
+ references = "Mariathasan et al. 2018"),
1117
+ "CXCL9" = list(
1118
+ gwas_evidence = "No direct GWAS hit",
1119
+ disease_relevance = "IFN-gamma-induced chemokine; T cell recruitment to tumor",
1120
+ drug_relevance = "Core immune-inflamed signature gene",
1121
+ references = "Ayers et al. 2017 JCI"),
1122
+ "CXCL10" = list(
1123
+ gwas_evidence = "No direct GWAS hit",
1124
+ disease_relevance = "T cell chemoattractant; marks immune-active tumors",
1125
+ drug_relevance = "Part of Teff signature predicting anti-PD-L1 response",
1126
+ references = "Mariathasan et al. 2018"),
1127
+ "TGFB1" = list(
1128
+ gwas_evidence = "No direct bladder GWAS hit; major immune regulator",
1129
+ disease_relevance = "TGF-beta drives T cell exclusion from tumor parenchyma",
1130
+ drug_relevance = "Anti-TGF-beta combinations under investigation; resistance mechanism",
1131
+ references = "Mariathasan et al. 2018 Nature"),
1132
+ "TOP2A" = list(
1133
+ gwas_evidence = "No direct GWAS hit",
1134
+ disease_relevance = "Proliferation marker; component of Immunotherapy Response Score (IRS)",
1135
+ drug_relevance = "IRS = TMB + PD1 + PDL1 + TOP2A + ADAM12 predicts anti-PD-L1 benefit",
1136
+ references = "Cristescu et al. 2022"),
1137
+ "ADAM12" = list(
1138
+ gwas_evidence = "No direct GWAS hit",
1139
+ disease_relevance = "Stromal remodeling; component of IRS; CAF marker",
1140
+ drug_relevance = "Part of pan-tumor IRS; high ADAM12 may indicate stromal exclusion",
1141
+ references = "Cristescu et al. 2022"),
1142
+ "FGFR3" = list(
1143
+ gwas_evidence = "Direct bladder cancer GWAS hit (4p16.3); also somatically mutated in ~15% of MIBC",
1144
+ disease_relevance = "Receptor tyrosine kinase; FGFR3-mutant tumors are often luminal, immune-cold",
1145
+ drug_relevance = "Erdafitinib (FGFR inhibitor) FDA-approved; FGFR3-mutant = poor ICI response",
1146
+ references = "Figueroa 2023; Loriot et al. 2019"),
1147
+ "PSCA" = list(
1148
+ gwas_evidence = "Direct bladder cancer GWAS hit (8q24.3)",
1149
+ disease_relevance = "Prostate stem cell antigen; expressed on urothelial cells",
1150
+ drug_relevance = "BiTE antibody target (pavurutamab) in clinical trials",
1151
+ references = "Wu et al. 2009 PNAS"),
1152
+ "IFNG" = list(
1153
+ gwas_evidence = "No direct GWAS hit; key effector cytokine",
1154
+ disease_relevance = "IFN-gamma drives anti-tumor immunity; marks inflamed tumors",
1155
+ drug_relevance = "High IFNG signature predicts ICI response across tumor types",
1156
+ references = "Ayers et al. 2017 JCI"),
1157
+ "IDO1" = list(
1158
+ gwas_evidence = "No direct GWAS hit",
1159
+ disease_relevance = "Immune-regulatory enzyme; upregulated in response to IFN-gamma",
1160
+ drug_relevance = "IDO inhibitor (epacadostat) failed in ECHO-301; paradoxically marks immune-active tumors",
1161
+ references = "Long et al. 2018 J Immunother Cancer"),
1162
+ "S100A8" = list(
1163
+ gwas_evidence = "No direct bladder GWAS hit",
1164
+ disease_relevance = "Calprotectin subunit; neutrophil/monocyte marker in TME",
1165
+ drug_relevance = "Myeloid inflammation marker; neutrophil infiltration may impair ICI response",
1166
+ references = "Shaul & Fridlender 2019 Nat Rev Cancer"),
1167
+ "GZMA" = list(
1168
+ gwas_evidence = "No direct GWAS hit",
1169
+ disease_relevance = "Granzyme A; CD8+ T cell cytolytic activity marker",
1170
+ drug_relevance = "Core cytolytic score gene; high expression predicts ICI response",
1171
+ references = "Rooney et al. 2015 Cell"),
1172
+ "GZMB" = list(
1173
+ gwas_evidence = "No direct GWAS hit",
1174
+ disease_relevance = "Granzyme B; key effector of T cell killing",
1175
+ drug_relevance = "Cytolytic activity marker; prognostic in IO-treated patients",
1176
+ references = "Rooney et al. 2015 Cell"),
1177
+ "B2M" = list(
1178
+ gwas_evidence = "ICI resistance gene",
1179
+ disease_relevance = "Beta-2-microglobulin; loss disrupts MHC-I antigen presentation",
1180
+ drug_relevance = "B2M mutations cause primary ICI resistance; loss-of-function = immune evasion",
1181
+ references = "Zaretsky et al. 2016 NEJM")
1182
+ )
1183
+ } else if (disease == "sepsis") {
1184
+ # Sepsis / Mars1 biomarker panel gene annotations
1185
+ list(
1186
+ "IFIT1B" = list(
1187
+ gwas_evidence = "No direct GWAS hit; interferon pathway broadly implicated in sepsis outcomes",
1188
+ disease_relevance = "Interferon-induced protein; innate antiviral defense; downregulated in Mars1 immunosuppression endotype",
1189
+ drug_relevance = "IFN pathway restoration as therapeutic target; marker of immunosuppressive state",
1190
+ references = "Scicluna et al. 2017 Lancet Respir Med; Sweeney et al. 2018 Sci Transl Med"),
1191
+ "ACSL6" = list(
1192
+ gwas_evidence = "No direct sepsis GWAS hit",
1193
+ disease_relevance = "Acyl-CoA synthetase long-chain 6; lipid metabolism/immunometabolism; metabolic reprogramming in sepsis monocytes",
1194
+ drug_relevance = "Immunometabolic target; fatty acid oxidation modulation under investigation",
1195
+ references = "Langley et al. 2013 Sci Transl Med"),
1196
+ "CTNNAL1" = list(
1197
+ gwas_evidence = "No direct GWAS hit",
1198
+ disease_relevance = "Catenin alpha-like 1; cell adhesion molecule; leukocyte adhesion and migration in vascular endothelium",
1199
+ drug_relevance = "Adhesion pathway marker; anti-adhesion therapies in sepsis trials",
1200
+ references = "Scicluna et al. 2017 Lancet Respir Med"),
1201
+ "ABCC13" = list(
1202
+ gwas_evidence = "No direct GWAS hit",
1203
+ disease_relevance = "ABC transporter pseudogene; potential role in drug metabolism pathways",
1204
+ drug_relevance = "Possible pharmacogenomic relevance for drug clearance in critical illness",
1205
+ references = "Limited functional data"),
1206
+ "C9orf78" = list(
1207
+ gwas_evidence = "No direct GWAS hit",
1208
+ disease_relevance = "HAX1-associated protein (HAIP); apoptosis regulation in immune cells; lymphocyte survival during sepsis",
1209
+ drug_relevance = "Apoptosis pathway; anti-apoptotic strategies under investigation in sepsis",
1210
+ references = "Hotchkiss et al. 2013 Lancet Infect Dis"),
1211
+ "KCNH2" = list(
1212
+ gwas_evidence = "No direct sepsis GWAS hit; GWAS hit for cardiac arrhythmia",
1213
+ disease_relevance = "Potassium voltage-gated channel (hERG); electrolyte regulation critical in sepsis-associated organ dysfunction",
1214
+ drug_relevance = "Cardiac safety marker; QT prolongation risk in ICU drug regimens",
1215
+ references = "Sanguinetti & Bhatt 2022 Physiol Rev"),
1216
+ "FAM104A" = list(
1217
+ gwas_evidence = "No direct GWAS hit",
1218
+ disease_relevance = "DUF1220 domain-containing protein; function in immune cells unclear; identified in Mars1 endotype",
1219
+ drug_relevance = "Novel candidate requiring functional validation",
1220
+ references = "Scicluna et al. 2017 Lancet Respir Med"),
1221
+ "POC1B" = list(
1222
+ gwas_evidence = "No direct GWAS hit",
1223
+ disease_relevance = "Centriolar protein POC1B; cell division in proliferating immune cells during sepsis response",
1224
+ drug_relevance = "Cell cycle marker; potential indicator of immune cell proliferative capacity",
1225
+ references = "Limited sepsis-specific data"),
1226
+ "BPGM" = list(
1227
+ gwas_evidence = "No direct GWAS hit",
1228
+ disease_relevance = "Bisphosphoglycerate mutase; regulates 2,3-BPG levels controlling oxygen delivery; critical in tissue hypoxia during sepsis",
1229
+ drug_relevance = "Oxygen delivery pathway; potential target for improving tissue oxygenation",
1230
+ references = "Ditzel 2003 IUBMB Life"),
1231
+ "ZDHHC2" = list(
1232
+ gwas_evidence = "No direct GWAS hit",
1233
+ disease_relevance = "Palmitoyl transferase (DHHC2); protein palmitoylation in immune signaling; modifies key immune receptors",
1234
+ drug_relevance = "Post-translational modification target; palmitoylation inhibitors under investigation",
1235
+ references = "Chamberlain & Bhatt 2015 Mol Membr Biol"),
1236
+ "FGFR1OP2" = list(
1237
+ gwas_evidence = "No direct GWAS hit",
1238
+ disease_relevance = "FGFR1 oncogene partner 2; centrosome and cytoskeletal organization; role in cell division",
1239
+ drug_relevance = "Cytoskeletal target; marker of proliferative state",
1240
+ references = "Limited sepsis-specific data"),
1241
+ "SAMHD1" = list(
1242
+ gwas_evidence = "No direct sepsis GWAS hit; mutations cause Aicardi-Goutieres syndrome (innate immune)",
1243
+ disease_relevance = "dNTP triphosphohydrolase; innate immune defense; interferon-stimulated gene; restricts viral replication in monocytes",
1244
+ drug_relevance = "Innate immune pathway; IFN-stimulated gene used as immunosuppression marker",
1245
+ references = "Maelfait et al. 2016 Nat Commun; Scicluna et al. 2017"),
1246
+ "DARC" = list(
1247
+ gwas_evidence = "Duffy-null polymorphism (rs2814778) affects neutrophil counts; GWAS hit for benign neutropenia",
1248
+ disease_relevance = "Atypical chemokine receptor 1 (ACKR1/Duffy); chemokine scavenger on erythrocytes; regulates neutrophil trafficking and chemokine bioavailability",
1249
+ drug_relevance = "Pharmacogenomic relevance: Duffy-null status affects baseline neutrophil counts and chemokine levels",
1250
+ references = "Pruenster et al. 2009 J Exp Med; Reich et al. 2009 Genome Biol"),
1251
+ "ACKR1" = list(
1252
+ gwas_evidence = "Duffy-null polymorphism (rs2814778) affects neutrophil counts; GWAS hit for benign neutropenia",
1253
+ disease_relevance = "Atypical chemokine receptor 1 (Duffy); chemokine scavenger on erythrocytes; regulates neutrophil trafficking and chemokine bioavailability",
1254
+ drug_relevance = "Pharmacogenomic relevance: Duffy-null status affects baseline neutrophil counts and chemokine levels",
1255
+ references = "Pruenster et al. 2009 J Exp Med; Reich et al. 2009 Genome Biol"),
1256
+ "DDX17" = list(
1257
+ gwas_evidence = "No direct GWAS hit",
1258
+ disease_relevance = "DEAD-box RNA helicase 17; innate immune signaling; senses viral RNA and activates NF-kB/IRF pathways",
1259
+ drug_relevance = "Pattern recognition pathway; potential target for modulating innate immune activation",
1260
+ references = "Moy et al. 2014 Cell Host Microbe"),
1261
+ "DAAM2" = list(
1262
+ gwas_evidence = "No direct sepsis GWAS hit; associated with autoimmune traits",
1263
+ disease_relevance = "Dishevelled-associated activator of morphogenesis 2; Wnt signaling; cytoskeletal regulation in immune cell migration",
1264
+ drug_relevance = "Wnt pathway modulation; potential role in immune cell trafficking",
1265
+ references = "Lee & Bhatt 2016 Dev Biol"),
1266
+ "TUBG2" = list(
1267
+ gwas_evidence = "No direct GWAS hit",
1268
+ disease_relevance = "Tubulin gamma 2; centrosome function; cell division in proliferating immune cells during sepsis",
1269
+ drug_relevance = "Cell cycle marker; indicator of immune proliferative response",
1270
+ references = "Limited sepsis-specific data"),
1271
+ "IL8" = list(
1272
+ gwas_evidence = "No direct sepsis GWAS hit; IL8 polymorphisms associated with sepsis severity",
1273
+ disease_relevance = "CXCL8; major neutrophil chemoattractant; key sepsis inflammatory mediator; elevated in severe sepsis",
1274
+ drug_relevance = "Anti-IL8/CXCR2 axis under investigation; neutrophil recruitment modulation",
1275
+ references = "Bozza et al. 2007 Crit Care; Sweeney et al. 2018 Sci Transl Med"),
1276
+ "CXCL8" = list(
1277
+ gwas_evidence = "No direct sepsis GWAS hit; IL8 polymorphisms associated with sepsis severity",
1278
+ disease_relevance = "Major neutrophil chemoattractant; key sepsis inflammatory mediator; elevated in severe sepsis",
1279
+ drug_relevance = "Anti-IL8/CXCR2 axis under investigation; neutrophil recruitment modulation",
1280
+ references = "Bozza et al. 2007 Crit Care; Sweeney et al. 2018 Sci Transl Med"),
1281
+ "EGR1" = list(
1282
+ gwas_evidence = "No direct GWAS hit",
1283
+ disease_relevance = "Early growth response 1; stress-responsive transcription factor; rapidly induced in monocyte activation and endothelial stress",
1284
+ drug_relevance = "Immediate early gene; marker of innate immune activation state",
1285
+ references = "Kharbanda et al. 1991 PNAS; Scicluna et al. 2017"),
1286
+ "TNFSF12" = list(
1287
+ gwas_evidence = "No direct GWAS hit",
1288
+ disease_relevance = "TNF superfamily member 12 (TWEAK); immune regulation; monocyte/macrophage-derived; modulates inflammation and tissue repair",
1289
+ drug_relevance = "Anti-TWEAK antibodies in clinical development; potential for modulating sepsis inflammation",
1290
+ references = "Burkly et al. 2011 Drug Discov Today"),
1291
+ "TNFRSF8" = list(
1292
+ gwas_evidence = "No direct GWAS hit",
1293
+ disease_relevance = "TNF receptor superfamily member 8 (CD30); T-cell activation marker; elevated soluble CD30 in sepsis",
1294
+ drug_relevance = "Brentuximab vedotin target (lymphoma); soluble CD30 as sepsis immune activation marker",
1295
+ references = "Pellegrini et al. 2003 Clin Exp Immunol"),
1296
+ "CTSO" = list(
1297
+ gwas_evidence = "No direct GWAS hit",
1298
+ disease_relevance = "Cathepsin O; lysosomal protease; antigen processing and MHC-II presentation in monocytes/dendritic cells",
1299
+ drug_relevance = "Antigen processing pathway; cathepsin inhibitors under investigation",
1300
+ references = "Bhatt et al. 2018 Front Immunol"),
1301
+ "TNKS" = list(
1302
+ gwas_evidence = "No direct GWAS hit",
1303
+ disease_relevance = "Tankyrase 1; Wnt signaling regulation; telomere maintenance; role in cell survival",
1304
+ drug_relevance = "Tankyrase inhibitors in oncology; potential Wnt pathway modulation in immune cells",
1305
+ references = "Lehtio et al. 2013 Mol Cell"),
1306
+ "PTPLA" = list(
1307
+ gwas_evidence = "No direct GWAS hit",
1308
+ disease_relevance = "3-hydroxyacyl-CoA dehydratase 1 (HACD1); fatty acid elongation; lipid metabolism in immune cells",
1309
+ drug_relevance = "Immunometabolic pathway; lipid metabolism modulation",
1310
+ references = "Ikeda et al. 2008 J Biol Chem"),
1311
+ "HACD1" = list(
1312
+ gwas_evidence = "No direct GWAS hit",
1313
+ disease_relevance = "3-hydroxyacyl-CoA dehydratase 1; fatty acid elongation; lipid metabolism in immune cells",
1314
+ drug_relevance = "Immunometabolic pathway; lipid metabolism modulation",
1315
+ references = "Ikeda et al. 2008 J Biol Chem"),
1316
+ "DEFA4" = list(
1317
+ gwas_evidence = "No direct GWAS hit; defensin cluster polymorphisms associated with infection susceptibility",
1318
+ disease_relevance = "Defensin alpha 4; neutrophil antimicrobial peptide; innate immunity first line of defense; upregulated in sepsis",
1319
+ drug_relevance = "Antimicrobial peptide; neutrophil degranulation marker; indicator of innate immune activation",
1320
+ references = "Ganz 2003 Nat Rev Immunol; Scicluna et al. 2017")
1321
+ )
1322
+ } else {
1323
+ # IBD gene annotations
1324
+ list(
1325
+ "S100A8" = list(
1326
+ gwas_evidence = "Indirect: 4 IBD GWAS loci regulate S100A8/A9 expression",
1327
+ disease_relevance = "Calprotectin subunit; fecal calprotectin is standard IBD activity biomarker",
1328
+ drug_relevance = "Neutrophil/monocyte marker; mucosal healing reduces S100A8 expression",
1329
+ references = "S100A8 eQTL: PLOS Genet 2022; Calprotectin: Gut 2018"),
1330
+ "SELENBP1" = list(
1331
+ gwas_evidence = "No direct hit; near IBD loci (ECM1, FCGR2A cluster)",
1332
+ disease_relevance = "Downregulated in active UC (p=0.003); recovers with healing",
1333
+ drug_relevance = "Colonocyte differentiation marker; restoration = epithelial recovery",
1334
+ references = "SELENBP1 in UC: PMC11677018"),
1335
+ "GUCA2B" = list(
1336
+ gwas_evidence = "No direct hit",
1337
+ disease_relevance = "GC-C pathway disruption is hallmark of IBD; strongly downregulated",
1338
+ drug_relevance = "Expression recovers with biologic response; pharmacodynamic marker",
1339
+ references = "GC-C pathway in IBD: PMC4673555"),
1340
+ "RGS13" = list(
1341
+ gwas_evidence = "No direct hit",
1342
+ disease_relevance = "Part of 4-gene vedolizumab response predictor (80-100% accuracy)",
1343
+ drug_relevance = "Published vedolizumab biomarker (Verstockt 2020)",
1344
+ references = "Verstockt et al. Clin Gastroenterol Hepatol 2020"),
1345
+ "PTP4A3" = list(
1346
+ gwas_evidence = "No direct hit; PTPN2/PTPN22 (same family) are IBD GWAS genes",
1347
+ disease_relevance = "Phosphatase regulating cell adhesion in colonic epithelium",
1348
+ drug_relevance = "Negative coefficient: higher expression predicts non-response",
1349
+ references = "PTP4A3 in colon: Oncogene 2016"),
1350
+ "CKB" = list(
1351
+ gwas_evidence = "No direct hit",
1352
+ disease_relevance = "Creatine kinase brain-type; colonocyte marker, reduced in inflammation",
1353
+ drug_relevance = "Epithelial energy metabolism marker",
1354
+ references = "CKB: Human Protein Atlas"),
1355
+ "UCA1" = list(
1356
+ gwas_evidence = "No direct hit",
1357
+ disease_relevance = "lncRNA upregulated in IBD epithelium; regulates NF-kB signaling",
1358
+ drug_relevance = "Epithelial stress indicator",
1359
+ references = "UCA1: RNA Biol 2020"),
1360
+ "GLDN" = list(
1361
+ gwas_evidence = "No direct hit",
1362
+ disease_relevance = "Gliomedin; enteric nervous system expression",
1363
+ drug_relevance = "May reflect enteric neural remodeling (gut-brain axis)",
1364
+ references = "Enteric nervous system in IBD: Gut 2020"),
1365
+ "IGLV4-60" = list(
1366
+ gwas_evidence = "No direct hit; Ig loci are polymorphic",
1367
+ disease_relevance = "Immunoglobulin light chain; B cell/plasma cell infiltration",
1368
+ drug_relevance = "Higher Ig may indicate treatment-refractory inflammation",
1369
+ references = "B cell infiltration: Nat Med 2019"),
1370
+ "C1orf125" = list(
1371
+ gwas_evidence = "No direct hit",
1372
+ disease_relevance = "Uncharacterized ORF",
1373
+ drug_relevance = "Novel candidate; requires functional validation",
1374
+ references = "Limited data")
1375
+ )
1376
+ }
1377
+ }
1378
+
1379
+ .run_gwas_overlap <- function(panel_genes, all_features = NULL, disease = "ibd") {
1380
+ config <- .get_gwas_config(disease)
1381
+ gwas_genes <- config$genes
1382
+
1383
+ cat("\n---", config$label, "Genetic Risk Overlap ---\n\n")
1384
+ cat(" Curated gene list:", length(gwas_genes), "genes\n")
1385
+ cat(" Sources:", config$references, "\n\n")
1386
+
1387
+ # Direct overlap with panel genes
1388
+ direct_overlap <- intersect(panel_genes, gwas_genes)
1389
+ cat(" Direct panel overlap:", length(direct_overlap), "/", length(panel_genes), "\n")
1390
+ if (length(direct_overlap) > 0) cat(" Genes:", paste(direct_overlap, collapse = ", "), "\n")
1391
+
1392
+ # Build annotation table for all panel genes
1393
+ gene_annotations <- data.frame(
1394
+ gene = panel_genes,
1395
+ in_gwas = panel_genes %in% gwas_genes,
1396
+ stringsAsFactors = FALSE
1397
+ )
1398
+
1399
+ # Detailed annotations per gene (disease-specific)
1400
+ annotation_details <- .get_gene_annotations(disease)
1401
+
1402
+ gene_annotations$gwas_evidence <- sapply(panel_genes, function(g) {
1403
+ if (g %in% names(annotation_details)) annotation_details[[g]]$gwas_evidence
1404
+ else "Not assessed"
1405
+ })
1406
+ gene_annotations$disease_relevance <- sapply(panel_genes, function(g) {
1407
+ if (g %in% names(annotation_details)) annotation_details[[g]]$disease_relevance
1408
+ else "Not assessed"
1409
+ })
1410
+ gene_annotations$drug_relevance <- sapply(panel_genes, function(g) {
1411
+ if (g %in% names(annotation_details)) annotation_details[[g]]$drug_relevance
1412
+ else "Not assessed"
1413
+ })
1414
+ gene_annotations$references <- sapply(panel_genes, function(g) {
1415
+ if (g %in% names(annotation_details)) annotation_details[[g]]$references
1416
+ else ""
1417
+ })
1418
+
1419
+ # Check broader feature list overlap with GWAS genes
1420
+ if (!is.null(all_features)) {
1421
+ all_overlap <- intersect(all_features, gwas_genes)
1422
+ cat("\n Broader overlap (all", length(all_features), "features):",
1423
+ length(all_overlap), "disease genes\n")
1424
+ if (length(all_overlap) > 0) {
1425
+ cat(" Disease genes in feature space:", paste(head(all_overlap, 20), collapse = ", "),
1426
+ if (length(all_overlap) > 20) "..." else "", "\n")
1427
+ }
1428
+ }
1429
+
1430
+ cat("\n GWAS overlap analysis complete.\n")
1431
+
1432
+ return(list(
1433
+ panel_annotations = gene_annotations,
1434
+ direct_overlap = direct_overlap,
1435
+ gwas_genes = gwas_genes,
1436
+ n_gwas_genes = length(gwas_genes),
1437
+ gwas_label = config$label,
1438
+ gwas_refs = config$refs_detail
1439
+ ))
1440
+ }
1441
+
1442
+
1443
+ # ============================================================
1444
+ # Plotting helper
1445
+ # ============================================================
1446
+
1447
+ .save_enrichment_plot <- function(plot, base_path, width = 8, height = 6, dpi = 300) {
1448
+ png_path <- paste0(base_path, ".png")
1449
+ ggsave(png_path, plot = plot, width = width, height = height, dpi = dpi, device = "png")
1450
+ cat(" Saved:", png_path, "\n")
1451
+
1452
+ svg_path <- paste0(base_path, ".svg")
1453
+ tryCatch({
1454
+ ggsave(svg_path, plot = plot, width = width, height = height, device = "svg")
1455
+ cat(" Saved:", svg_path, "\n")
1456
+ }, error = function(e) {
1457
+ tryCatch({
1458
+ svg(svg_path, width = width, height = height)
1459
+ print(plot)
1460
+ dev.off()
1461
+ cat(" Saved:", svg_path, "\n")
1462
+ }, error = function(e2) {
1463
+ cat(" (SVG export failed)\n")
1464
+ })
1465
+ })
1466
+ }
1467
+
1468
+
1469
+ # ============================================================
1470
+ # Main entry point
1471
+ # ============================================================
1472
+
1473
+ #' Run all biological interpretation analyses
1474
+ #'
1475
+ #' @param model_result LASSO model result from run_lasso_panel()
1476
+ #' @param features Feature object from prepare_feature_matrix() (optional)
1477
+ #' @param output_dir Output directory for plots and CSVs
1478
+ #' @param disease Disease context: "ibd", "bladder_cancer", "breast_cancer", or "sepsis"
1479
+ #' @return Named list with pathway, celltype, gwas results + disease metadata
1480
+ run_biological_interpretation <- function(model_result, features = NULL,
1481
+ output_dir = "results",
1482
+ disease = "ibd") {
1483
+ cat("\n=== Biological Interpretation of Biomarker Panel ===\n")
1484
+ cat("Panel:", paste(model_result$stable_features, collapse = ", "), "\n")
1485
+ cat("Disease context:", disease, "\n")
1486
+
1487
+ dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
1488
+
1489
+ panel_genes <- model_result$stable_features
1490
+ all_features <- if (!is.null(model_result$feature_importance)) {
1491
+ model_result$feature_importance$feature
1492
+ } else NULL
1493
+
1494
+ # 1. Pathway enrichment (disease-agnostic — uses gene sets from MSigDB)
1495
+ pathway <- tryCatch(
1496
+ .run_pathway_enrichment(model_result, features, output_dir),
1497
+ error = function(e) {
1498
+ cat(" Pathway enrichment failed:", conditionMessage(e), "\n")
1499
+ NULL
1500
+ }
1501
+ )
1502
+
1503
+ # 2. Cell-type enrichment (disease-specific tissue + curated fallback)
1504
+ celltype <- tryCatch(
1505
+ .run_celltype_enrichment(panel_genes, output_dir, disease = disease),
1506
+ error = function(e) {
1507
+ cat(" Cell-type enrichment failed:", conditionMessage(e), "\n")
1508
+ NULL
1509
+ }
1510
+ )
1511
+
1512
+ # 3. GWAS / disease gene overlap (disease-specific gene list)
1513
+ gwas <- tryCatch(
1514
+ .run_gwas_overlap(panel_genes, all_features, disease = disease),
1515
+ error = function(e) {
1516
+ cat(" GWAS overlap failed:", conditionMessage(e), "\n")
1517
+ NULL
1518
+ }
1519
+ )
1520
+
1521
+ # Disease metadata for report generation
1522
+ disease_label <- switch(disease,
1523
+ ibd = "IBD",
1524
+ bladder_cancer = "Bladder Cancer / ICI Response",
1525
+ breast_cancer = "Breast Cancer / Neoadjuvant Chemotherapy",
1526
+ sepsis = "Sepsis / Molecular Endotyping",
1527
+ disease)
1528
+ tissue_label <- switch(disease,
1529
+ ibd = "Large Intestine",
1530
+ bladder_cancer = "Bladder Tumor",
1531
+ breast_cancer = "Breast Tumor",
1532
+ sepsis = "Blood",
1533
+ "Tissue")
1534
+ gwas_label <- if (!is.null(gwas$gwas_label)) gwas$gwas_label else paste(disease_label, "GWAS")
1535
+
1536
+ # Save combined results
1537
+ interp <- list(
1538
+ pathway = pathway,
1539
+ celltype = celltype,
1540
+ gwas = gwas,
1541
+ panel_genes = panel_genes,
1542
+ disease = disease,
1543
+ disease_label = disease_label,
1544
+ tissue_label = tissue_label,
1545
+ gwas_label = gwas_label
1546
+ )
1547
+ saveRDS(interp, file.path(output_dir, "biological_interpretation.rds"))
1548
+ cat(" Saved: biological_interpretation.rds\n")
1549
+
1550
+ # Save GWAS annotations as CSV
1551
+ if (!is.null(gwas$panel_annotations)) {
1552
+ write.csv(gwas$panel_annotations,
1553
+ file.path(output_dir, "gwas_gene_annotations.csv"),
1554
+ row.names = FALSE)
1555
+ cat(" Saved: gwas_gene_annotations.csv\n")
1556
+ }
1557
+
1558
+ cat("\n✓ Biological interpretation completed successfully!\n")
1559
+ return(interp)
1560
+ }