@bgicli/bgicli 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1266) hide show
  1. package/data/skills/aav-vector-design-agent/SKILL.md +198 -0
  2. package/data/skills/adaptyv/SKILL.md +112 -0
  3. package/data/skills/adhd-daily-planner/SKILL.md +271 -0
  4. package/data/skills/aeon/SKILL.md +372 -0
  5. package/data/skills/agent-browser/SKILL.md +159 -0
  6. package/data/skills/agentd-drug-discovery/SKILL.md +52 -0
  7. package/data/skills/ai-analyzer/SKILL.md +218 -0
  8. package/data/skills/alphafold/SKILL.md +183 -0
  9. package/data/skills/alphafold-database/SKILL.md +500 -0
  10. package/data/skills/anndata/SKILL.md +394 -0
  11. package/data/skills/antibody-design-agent/SKILL.md +64 -0
  12. package/data/skills/arboreto/SKILL.md +237 -0
  13. package/data/skills/armored-cart-design-agent/SKILL.md +225 -0
  14. package/data/skills/arxiv-search/SKILL.md +224 -0
  15. package/data/skills/autonomous-oncology-agent/SKILL.md +77 -0
  16. package/data/skills/bayesian-optimizer/SKILL.md +60 -0
  17. package/data/skills/benchling-integration/SKILL.md +473 -0
  18. package/data/skills/bgpt-paper-search/SKILL.md +81 -0
  19. package/data/skills/bindcraft/SKILL.md +198 -0
  20. package/data/skills/binder-design/SKILL.md +182 -0
  21. package/data/skills/binding-characterization/SKILL.md +234 -0
  22. package/data/skills/bindingdb-database/SKILL.md +332 -0
  23. package/data/skills/bio-admet-prediction/SKILL.md +224 -0
  24. package/data/skills/bio-alignment-files-bam-statistics/SKILL.md +340 -0
  25. package/data/skills/bio-alignment-filtering/SKILL.md +322 -0
  26. package/data/skills/bio-alignment-indexing/SKILL.md +249 -0
  27. package/data/skills/bio-alignment-io/SKILL.md +301 -0
  28. package/data/skills/bio-alignment-msa-parsing/SKILL.md +366 -0
  29. package/data/skills/bio-alignment-msa-statistics/SKILL.md +375 -0
  30. package/data/skills/bio-alignment-pairwise/SKILL.md +277 -0
  31. package/data/skills/bio-alignment-sorting/SKILL.md +296 -0
  32. package/data/skills/bio-alignment-validation/SKILL.md +374 -0
  33. package/data/skills/bio-atac-seq-atac-peak-calling/SKILL.md +221 -0
  34. package/data/skills/bio-atac-seq-atac-qc/SKILL.md +292 -0
  35. package/data/skills/bio-atac-seq-differential-accessibility/SKILL.md +268 -0
  36. package/data/skills/bio-atac-seq-footprinting/SKILL.md +256 -0
  37. package/data/skills/bio-atac-seq-motif-deviation/SKILL.md +319 -0
  38. package/data/skills/bio-atac-seq-nucleosome-positioning/SKILL.md +321 -0
  39. package/data/skills/bio-basecalling/SKILL.md +368 -0
  40. package/data/skills/bio-batch-downloads/SKILL.md +384 -0
  41. package/data/skills/bio-batch-processing/SKILL.md +303 -0
  42. package/data/skills/bio-bedgraph-handling/SKILL.md +336 -0
  43. package/data/skills/bio-blast-searches/SKILL.md +354 -0
  44. package/data/skills/bio-causal-genomics-colocalization-analysis/SKILL.md +264 -0
  45. package/data/skills/bio-causal-genomics-fine-mapping/SKILL.md +267 -0
  46. package/data/skills/bio-causal-genomics-mediation-analysis/SKILL.md +264 -0
  47. package/data/skills/bio-causal-genomics-mendelian-randomization/SKILL.md +221 -0
  48. package/data/skills/bio-causal-genomics-pleiotropy-detection/SKILL.md +292 -0
  49. package/data/skills/bio-cfdna-preprocessing/SKILL.md +200 -0
  50. package/data/skills/bio-chipseq-differential-binding/SKILL.md +262 -0
  51. package/data/skills/bio-chipseq-motif-analysis/SKILL.md +387 -0
  52. package/data/skills/bio-chipseq-peak-annotation/SKILL.md +239 -0
  53. package/data/skills/bio-chipseq-peak-calling/SKILL.md +277 -0
  54. package/data/skills/bio-chipseq-qc/SKILL.md +391 -0
  55. package/data/skills/bio-chipseq-super-enhancers/SKILL.md +288 -0
  56. package/data/skills/bio-chipseq-visualization/SKILL.md +289 -0
  57. package/data/skills/bio-clinical-databases-clinvar-lookup/SKILL.md +188 -0
  58. package/data/skills/bio-clinical-databases-dbsnp-queries/SKILL.md +171 -0
  59. package/data/skills/bio-clinical-databases-gnomad-frequencies/SKILL.md +205 -0
  60. package/data/skills/bio-clinical-databases-hla-typing/SKILL.md +248 -0
  61. package/data/skills/bio-clinical-databases-myvariant-queries/SKILL.md +174 -0
  62. package/data/skills/bio-clinical-databases-pharmacogenomics/SKILL.md +232 -0
  63. package/data/skills/bio-clinical-databases-polygenic-risk/SKILL.md +276 -0
  64. package/data/skills/bio-clinical-databases-somatic-signatures/SKILL.md +261 -0
  65. package/data/skills/bio-clinical-databases-tumor-mutational-burden/SKILL.md +301 -0
  66. package/data/skills/bio-clinical-databases-variant-prioritization/SKILL.md +225 -0
  67. package/data/skills/bio-clip-seq-binding-site-annotation/SKILL.md +66 -0
  68. package/data/skills/bio-clip-seq-clip-alignment/SKILL.md +70 -0
  69. package/data/skills/bio-clip-seq-clip-motif-analysis/SKILL.md +62 -0
  70. package/data/skills/bio-clip-seq-clip-peak-calling/SKILL.md +282 -0
  71. package/data/skills/bio-clip-seq-clip-preprocessing/SKILL.md +142 -0
  72. package/data/skills/bio-codon-usage/SKILL.md +353 -0
  73. package/data/skills/bio-comparative-genomics-ancestral-reconstruction/SKILL.md +312 -0
  74. package/data/skills/bio-comparative-genomics-hgt-detection/SKILL.md +341 -0
  75. package/data/skills/bio-comparative-genomics-ortholog-inference/SKILL.md +308 -0
  76. package/data/skills/bio-comparative-genomics-positive-selection/SKILL.md +354 -0
  77. package/data/skills/bio-comparative-genomics-synteny-analysis/SKILL.md +315 -0
  78. package/data/skills/bio-compressed-files/SKILL.md +263 -0
  79. package/data/skills/bio-consensus-sequences/SKILL.md +340 -0
  80. package/data/skills/bio-copy-number-cnv-annotation/SKILL.md +307 -0
  81. package/data/skills/bio-copy-number-cnv-visualization/SKILL.md +294 -0
  82. package/data/skills/bio-copy-number-cnvkit-analysis/SKILL.md +290 -0
  83. package/data/skills/bio-copy-number-gatk-cnv/SKILL.md +270 -0
  84. package/data/skills/bio-crispr-screens-base-editing-analysis/SKILL.md +110 -0
  85. package/data/skills/bio-crispr-screens-batch-correction/SKILL.md +316 -0
  86. package/data/skills/bio-crispr-screens-crispresso-editing/SKILL.md +205 -0
  87. package/data/skills/bio-crispr-screens-hit-calling/SKILL.md +264 -0
  88. package/data/skills/bio-crispr-screens-jacks-analysis/SKILL.md +313 -0
  89. package/data/skills/bio-crispr-screens-library-design/SKILL.md +417 -0
  90. package/data/skills/bio-crispr-screens-mageck-analysis/SKILL.md +222 -0
  91. package/data/skills/bio-crispr-screens-screen-qc/SKILL.md +243 -0
  92. package/data/skills/bio-ctdna-mutation-detection/SKILL.md +234 -0
  93. package/data/skills/bio-data-visualization-circos-plots/SKILL.md +405 -0
  94. package/data/skills/bio-data-visualization-color-palettes/SKILL.md +244 -0
  95. package/data/skills/bio-data-visualization-genome-browser-tracks/SKILL.md +328 -0
  96. package/data/skills/bio-data-visualization-genome-tracks/SKILL.md +249 -0
  97. package/data/skills/bio-data-visualization-ggplot2-fundamentals/SKILL.md +313 -0
  98. package/data/skills/bio-data-visualization-heatmaps-clustering/SKILL.md +227 -0
  99. package/data/skills/bio-data-visualization-interactive-visualization/SKILL.md +210 -0
  100. package/data/skills/bio-data-visualization-multipanel-figures/SKILL.md +274 -0
  101. package/data/skills/bio-data-visualization-specialized-omics-plots/SKILL.md +251 -0
  102. package/data/skills/bio-data-visualization-upset-plots/SKILL.md +228 -0
  103. package/data/skills/bio-data-visualization-volcano-customization/SKILL.md +233 -0
  104. package/data/skills/bio-de-deseq2-basics/SKILL.md +376 -0
  105. package/data/skills/bio-de-edger-basics/SKILL.md +418 -0
  106. package/data/skills/bio-de-results/SKILL.md +378 -0
  107. package/data/skills/bio-de-visualization/SKILL.md +408 -0
  108. package/data/skills/bio-differential-expression-batch-correction/SKILL.md +253 -0
  109. package/data/skills/bio-differential-expression-timeseries-de/SKILL.md +370 -0
  110. package/data/skills/bio-differential-splicing/SKILL.md +177 -0
  111. package/data/skills/bio-duplicate-handling/SKILL.md +292 -0
  112. package/data/skills/bio-entrez-fetch/SKILL.md +334 -0
  113. package/data/skills/bio-entrez-link/SKILL.md +325 -0
  114. package/data/skills/bio-entrez-search/SKILL.md +311 -0
  115. package/data/skills/bio-epidemiological-genomics-amr-surveillance/SKILL.md +233 -0
  116. package/data/skills/bio-epidemiological-genomics-pathogen-typing/SKILL.md +202 -0
  117. package/data/skills/bio-epidemiological-genomics-phylodynamics/SKILL.md +207 -0
  118. package/data/skills/bio-epidemiological-genomics-transmission-inference/SKILL.md +237 -0
  119. package/data/skills/bio-epidemiological-genomics-variant-surveillance/SKILL.md +237 -0
  120. package/data/skills/bio-epitranscriptomics-m6a-differential/SKILL.md +88 -0
  121. package/data/skills/bio-epitranscriptomics-m6a-peak-calling/SKILL.md +89 -0
  122. package/data/skills/bio-epitranscriptomics-m6anet-analysis/SKILL.md +101 -0
  123. package/data/skills/bio-epitranscriptomics-merip-preprocessing/SKILL.md +81 -0
  124. package/data/skills/bio-epitranscriptomics-modification-visualization/SKILL.md +98 -0
  125. package/data/skills/bio-experimental-design-batch-design/SKILL.md +110 -0
  126. package/data/skills/bio-experimental-design-multiple-testing/SKILL.md +98 -0
  127. package/data/skills/bio-experimental-design-power-analysis/SKILL.md +84 -0
  128. package/data/skills/bio-experimental-design-sample-size/SKILL.md +93 -0
  129. package/data/skills/bio-expression-matrix-counts-ingest/SKILL.md +220 -0
  130. package/data/skills/bio-expression-matrix-gene-id-mapping/SKILL.md +256 -0
  131. package/data/skills/bio-expression-matrix-metadata-joins/SKILL.md +271 -0
  132. package/data/skills/bio-expression-matrix-sparse-handling/SKILL.md +247 -0
  133. package/data/skills/bio-fastq-quality/SKILL.md +279 -0
  134. package/data/skills/bio-filter-sequences/SKILL.md +265 -0
  135. package/data/skills/bio-flow-cytometry-bead-normalization/SKILL.md +315 -0
  136. package/data/skills/bio-flow-cytometry-clustering-phenotyping/SKILL.md +237 -0
  137. package/data/skills/bio-flow-cytometry-compensation-transformation/SKILL.md +196 -0
  138. package/data/skills/bio-flow-cytometry-cytometry-qc/SKILL.md +382 -0
  139. package/data/skills/bio-flow-cytometry-differential-analysis/SKILL.md +217 -0
  140. package/data/skills/bio-flow-cytometry-doublet-detection/SKILL.md +288 -0
  141. package/data/skills/bio-flow-cytometry-fcs-handling/SKILL.md +221 -0
  142. package/data/skills/bio-flow-cytometry-gating-analysis/SKILL.md +193 -0
  143. package/data/skills/bio-format-conversion/SKILL.md +193 -0
  144. package/data/skills/bio-fragment-analysis/SKILL.md +214 -0
  145. package/data/skills/bio-gatk-variant-calling/SKILL.md +422 -0
  146. package/data/skills/bio-genome-assembly-assembly-polishing/SKILL.md +333 -0
  147. package/data/skills/bio-genome-assembly-assembly-qc/SKILL.md +344 -0
  148. package/data/skills/bio-genome-assembly-contamination-detection/SKILL.md +235 -0
  149. package/data/skills/bio-genome-assembly-hifi-assembly/SKILL.md +178 -0
  150. package/data/skills/bio-genome-assembly-long-read-assembly/SKILL.md +307 -0
  151. package/data/skills/bio-genome-assembly-metagenome-assembly/SKILL.md +227 -0
  152. package/data/skills/bio-genome-assembly-scaffolding/SKILL.md +204 -0
  153. package/data/skills/bio-genome-assembly-short-read-assembly/SKILL.md +319 -0
  154. package/data/skills/bio-genome-engineering-base-editing-design/SKILL.md +277 -0
  155. package/data/skills/bio-genome-engineering-grna-design/SKILL.md +221 -0
  156. package/data/skills/bio-genome-engineering-hdr-template-design/SKILL.md +264 -0
  157. package/data/skills/bio-genome-engineering-off-target-prediction/SKILL.md +232 -0
  158. package/data/skills/bio-genome-engineering-prime-editing-design/SKILL.md +275 -0
  159. package/data/skills/bio-genome-intervals-bed-file-basics/SKILL.md +357 -0
  160. package/data/skills/bio-genome-intervals-bigwig-tracks/SKILL.md +351 -0
  161. package/data/skills/bio-genome-intervals-coverage-analysis/SKILL.md +300 -0
  162. package/data/skills/bio-genome-intervals-gtf-gff-handling/SKILL.md +345 -0
  163. package/data/skills/bio-genome-intervals-interval-arithmetic/SKILL.md +485 -0
  164. package/data/skills/bio-genome-intervals-proximity-operations/SKILL.md +337 -0
  165. package/data/skills/bio-geo-data/SKILL.md +380 -0
  166. package/data/skills/bio-hi-c-analysis-compartment-analysis/SKILL.md +261 -0
  167. package/data/skills/bio-hi-c-analysis-contact-pairs/SKILL.md +278 -0
  168. package/data/skills/bio-hi-c-analysis-hic-data-io/SKILL.md +260 -0
  169. package/data/skills/bio-hi-c-analysis-hic-differential/SKILL.md +328 -0
  170. package/data/skills/bio-hi-c-analysis-hic-visualization/SKILL.md +297 -0
  171. package/data/skills/bio-hi-c-analysis-loop-calling/SKILL.md +284 -0
  172. package/data/skills/bio-hi-c-analysis-matrix-operations/SKILL.md +274 -0
  173. package/data/skills/bio-hi-c-analysis-tad-detection/SKILL.md +239 -0
  174. package/data/skills/bio-imaging-mass-cytometry-cell-segmentation/SKILL.md +241 -0
  175. package/data/skills/bio-imaging-mass-cytometry-data-preprocessing/SKILL.md +279 -0
  176. package/data/skills/bio-imaging-mass-cytometry-interactive-annotation/SKILL.md +304 -0
  177. package/data/skills/bio-imaging-mass-cytometry-phenotyping/SKILL.md +231 -0
  178. package/data/skills/bio-imaging-mass-cytometry-quality-metrics/SKILL.md +316 -0
  179. package/data/skills/bio-imaging-mass-cytometry-spatial-analysis/SKILL.md +246 -0
  180. package/data/skills/bio-immunoinformatics-epitope-prediction/SKILL.md +259 -0
  181. package/data/skills/bio-immunoinformatics-immunogenicity-scoring/SKILL.md +275 -0
  182. package/data/skills/bio-immunoinformatics-mhc-binding-prediction/SKILL.md +260 -0
  183. package/data/skills/bio-immunoinformatics-neoantigen-prediction/SKILL.md +277 -0
  184. package/data/skills/bio-immunoinformatics-tcr-epitope-binding/SKILL.md +257 -0
  185. package/data/skills/bio-isoform-switching/SKILL.md +192 -0
  186. package/data/skills/bio-liquid-biopsy-pipeline/SKILL.md +311 -0
  187. package/data/skills/bio-local-blast/SKILL.md +350 -0
  188. package/data/skills/bio-long-read-sequencing-clair3-variants/SKILL.md +252 -0
  189. package/data/skills/bio-long-read-sequencing-isoseq-analysis/SKILL.md +334 -0
  190. package/data/skills/bio-long-read-sequencing-nanopore-methylation/SKILL.md +110 -0
  191. package/data/skills/bio-longitudinal-monitoring/SKILL.md +271 -0
  192. package/data/skills/bio-longread-alignment/SKILL.md +193 -0
  193. package/data/skills/bio-longread-medaka/SKILL.md +176 -0
  194. package/data/skills/bio-longread-qc/SKILL.md +224 -0
  195. package/data/skills/bio-longread-structural-variants/SKILL.md +201 -0
  196. package/data/skills/bio-machine-learning-atlas-mapping/SKILL.md +139 -0
  197. package/data/skills/bio-machine-learning-biomarker-discovery/SKILL.md +157 -0
  198. package/data/skills/bio-machine-learning-model-validation/SKILL.md +148 -0
  199. package/data/skills/bio-machine-learning-omics-classifiers/SKILL.md +146 -0
  200. package/data/skills/bio-machine-learning-prediction-explanation/SKILL.md +162 -0
  201. package/data/skills/bio-machine-learning-survival-analysis/SKILL.md +176 -0
  202. package/data/skills/bio-metabolomics-lipidomics/SKILL.md +265 -0
  203. package/data/skills/bio-metabolomics-metabolite-annotation/SKILL.md +241 -0
  204. package/data/skills/bio-metabolomics-msdial-preprocessing/SKILL.md +308 -0
  205. package/data/skills/bio-metabolomics-normalization-qc/SKILL.md +283 -0
  206. package/data/skills/bio-metabolomics-pathway-mapping/SKILL.md +237 -0
  207. package/data/skills/bio-metabolomics-statistical-analysis/SKILL.md +276 -0
  208. package/data/skills/bio-metabolomics-targeted-analysis/SKILL.md +314 -0
  209. package/data/skills/bio-metabolomics-xcms-preprocessing/SKILL.md +268 -0
  210. package/data/skills/bio-metagenomics-abundance/SKILL.md +203 -0
  211. package/data/skills/bio-metagenomics-amr-detection/SKILL.md +293 -0
  212. package/data/skills/bio-metagenomics-functional-profiling/SKILL.md +252 -0
  213. package/data/skills/bio-metagenomics-kraken/SKILL.md +204 -0
  214. package/data/skills/bio-metagenomics-metaphlan/SKILL.md +214 -0
  215. package/data/skills/bio-metagenomics-strain-tracking/SKILL.md +292 -0
  216. package/data/skills/bio-metagenomics-visualization/SKILL.md +240 -0
  217. package/data/skills/bio-methylation-based-detection/SKILL.md +223 -0
  218. package/data/skills/bio-methylation-bismark-alignment/SKILL.md +195 -0
  219. package/data/skills/bio-methylation-calling/SKILL.md +200 -0
  220. package/data/skills/bio-methylation-dmr-detection/SKILL.md +211 -0
  221. package/data/skills/bio-methylation-methylkit/SKILL.md +219 -0
  222. package/data/skills/bio-microbiome-amplicon-processing/SKILL.md +137 -0
  223. package/data/skills/bio-microbiome-differential-abundance/SKILL.md +147 -0
  224. package/data/skills/bio-microbiome-diversity-analysis/SKILL.md +188 -0
  225. package/data/skills/bio-microbiome-functional-prediction/SKILL.md +153 -0
  226. package/data/skills/bio-microbiome-qiime2-workflow/SKILL.md +219 -0
  227. package/data/skills/bio-microbiome-taxonomy-assignment/SKILL.md +168 -0
  228. package/data/skills/bio-molecular-descriptors/SKILL.md +200 -0
  229. package/data/skills/bio-molecular-io/SKILL.md +188 -0
  230. package/data/skills/bio-motif-search/SKILL.md +354 -0
  231. package/data/skills/bio-multi-omics-data-harmonization/SKILL.md +228 -0
  232. package/data/skills/bio-multi-omics-mixomics-analysis/SKILL.md +221 -0
  233. package/data/skills/bio-multi-omics-mofa-integration/SKILL.md +225 -0
  234. package/data/skills/bio-multi-omics-similarity-network/SKILL.md +235 -0
  235. package/data/skills/bio-orchestrator/SKILL.md +133 -0
  236. package/data/skills/bio-paired-end-fastq/SKILL.md +334 -0
  237. package/data/skills/bio-pathway-enrichment-visualization/SKILL.md +278 -0
  238. package/data/skills/bio-pathway-go-enrichment/SKILL.md +218 -0
  239. package/data/skills/bio-pathway-gsea/SKILL.md +227 -0
  240. package/data/skills/bio-pathway-kegg-pathways/SKILL.md +234 -0
  241. package/data/skills/bio-pathway-reactome/SKILL.md +215 -0
  242. package/data/skills/bio-pathway-wikipathways/SKILL.md +255 -0
  243. package/data/skills/bio-pdb-geometric-analysis/SKILL.md +475 -0
  244. package/data/skills/bio-pdb-structure-io/SKILL.md +296 -0
  245. package/data/skills/bio-pdb-structure-modification/SKILL.md +448 -0
  246. package/data/skills/bio-pdb-structure-navigation/SKILL.md +335 -0
  247. package/data/skills/bio-phasing-imputation-genotype-imputation/SKILL.md +201 -0
  248. package/data/skills/bio-phasing-imputation-haplotype-phasing/SKILL.md +190 -0
  249. package/data/skills/bio-phasing-imputation-imputation-qc/SKILL.md +265 -0
  250. package/data/skills/bio-phasing-imputation-reference-panels/SKILL.md +203 -0
  251. package/data/skills/bio-phylo-distance-calculations/SKILL.md +307 -0
  252. package/data/skills/bio-phylo-modern-tree-inference/SKILL.md +274 -0
  253. package/data/skills/bio-phylo-tree-io/SKILL.md +252 -0
  254. package/data/skills/bio-phylo-tree-manipulation/SKILL.md +375 -0
  255. package/data/skills/bio-phylo-tree-visualization/SKILL.md +275 -0
  256. package/data/skills/bio-pileup-generation/SKILL.md +314 -0
  257. package/data/skills/bio-population-genetics-association-testing/SKILL.md +293 -0
  258. package/data/skills/bio-population-genetics-linkage-disequilibrium/SKILL.md +260 -0
  259. package/data/skills/bio-population-genetics-plink-basics/SKILL.md +338 -0
  260. package/data/skills/bio-population-genetics-population-structure/SKILL.md +352 -0
  261. package/data/skills/bio-population-genetics-scikit-allel-analysis/SKILL.md +306 -0
  262. package/data/skills/bio-population-genetics-selection-statistics/SKILL.md +251 -0
  263. package/data/skills/bio-primer-design-primer-basics/SKILL.md +289 -0
  264. package/data/skills/bio-primer-design-primer-validation/SKILL.md +344 -0
  265. package/data/skills/bio-primer-design-qpcr-primers/SKILL.md +273 -0
  266. package/data/skills/bio-proteomics-data-import/SKILL.md +122 -0
  267. package/data/skills/bio-proteomics-dia-analysis/SKILL.md +246 -0
  268. package/data/skills/bio-proteomics-differential-abundance/SKILL.md +129 -0
  269. package/data/skills/bio-proteomics-peptide-identification/SKILL.md +122 -0
  270. package/data/skills/bio-proteomics-protein-inference/SKILL.md +174 -0
  271. package/data/skills/bio-proteomics-proteomics-qc/SKILL.md +208 -0
  272. package/data/skills/bio-proteomics-ptm-analysis/SKILL.md +139 -0
  273. package/data/skills/bio-proteomics-quantification/SKILL.md +141 -0
  274. package/data/skills/bio-proteomics-spectral-libraries/SKILL.md +270 -0
  275. package/data/skills/bio-reaction-enumeration/SKILL.md +251 -0
  276. package/data/skills/bio-read-alignment-bowtie2-alignment/SKILL.md +189 -0
  277. package/data/skills/bio-read-alignment-bwa-alignment/SKILL.md +166 -0
  278. package/data/skills/bio-read-alignment-hisat2-alignment/SKILL.md +205 -0
  279. package/data/skills/bio-read-alignment-star-alignment/SKILL.md +204 -0
  280. package/data/skills/bio-read-qc-adapter-trimming/SKILL.md +222 -0
  281. package/data/skills/bio-read-qc-contamination-screening/SKILL.md +252 -0
  282. package/data/skills/bio-read-qc-fastp-workflow/SKILL.md +278 -0
  283. package/data/skills/bio-read-qc-quality-filtering/SKILL.md +231 -0
  284. package/data/skills/bio-read-qc-quality-reports/SKILL.md +204 -0
  285. package/data/skills/bio-read-qc-umi-processing/SKILL.md +391 -0
  286. package/data/skills/bio-read-sequences/SKILL.md +319 -0
  287. package/data/skills/bio-reference-operations/SKILL.md +302 -0
  288. package/data/skills/bio-reporting-automated-qc-reports/SKILL.md +103 -0
  289. package/data/skills/bio-reporting-figure-export/SKILL.md +112 -0
  290. package/data/skills/bio-reporting-jupyter-reports/SKILL.md +98 -0
  291. package/data/skills/bio-reporting-quarto-reports/SKILL.md +295 -0
  292. package/data/skills/bio-reporting-rmarkdown-reports/SKILL.md +276 -0
  293. package/data/skills/bio-research-tools-biomarker-signature-studio/SKILL.md +99 -0
  294. package/data/skills/bio-restriction-enzyme-selection/SKILL.md +342 -0
  295. package/data/skills/bio-restriction-fragment-analysis/SKILL.md +259 -0
  296. package/data/skills/bio-restriction-mapping/SKILL.md +239 -0
  297. package/data/skills/bio-restriction-sites/SKILL.md +222 -0
  298. package/data/skills/bio-reverse-complement/SKILL.md +250 -0
  299. package/data/skills/bio-ribo-seq-orf-detection/SKILL.md +303 -0
  300. package/data/skills/bio-ribo-seq-riboseq-preprocessing/SKILL.md +176 -0
  301. package/data/skills/bio-ribo-seq-ribosome-periodicity/SKILL.md +182 -0
  302. package/data/skills/bio-ribo-seq-ribosome-stalling/SKILL.md +217 -0
  303. package/data/skills/bio-ribo-seq-translation-efficiency/SKILL.md +183 -0
  304. package/data/skills/bio-rna-quantification-alignment-free-quant/SKILL.md +226 -0
  305. package/data/skills/bio-rna-quantification-count-matrix-qc/SKILL.md +310 -0
  306. package/data/skills/bio-rna-quantification-featurecounts-counting/SKILL.md +190 -0
  307. package/data/skills/bio-rna-quantification-tximport-workflow/SKILL.md +240 -0
  308. package/data/skills/bio-rnaseq-qc/SKILL.md +320 -0
  309. package/data/skills/bio-sam-bam-basics/SKILL.md +248 -0
  310. package/data/skills/bio-sashimi-plots/SKILL.md +175 -0
  311. package/data/skills/bio-seq-objects/SKILL.md +240 -0
  312. package/data/skills/bio-sequence-properties/SKILL.md +397 -0
  313. package/data/skills/bio-sequence-similarity/SKILL.md +335 -0
  314. package/data/skills/bio-sequence-slicing/SKILL.md +232 -0
  315. package/data/skills/bio-sequence-statistics/SKILL.md +318 -0
  316. package/data/skills/bio-similarity-searching/SKILL.md +200 -0
  317. package/data/skills/bio-single-cell-batch-integration/SKILL.md +317 -0
  318. package/data/skills/bio-single-cell-cell-annotation/SKILL.md +259 -0
  319. package/data/skills/bio-single-cell-cell-communication/SKILL.md +257 -0
  320. package/data/skills/bio-single-cell-clustering/SKILL.md +330 -0
  321. package/data/skills/bio-single-cell-data-io/SKILL.md +315 -0
  322. package/data/skills/bio-single-cell-doublet-detection/SKILL.md +362 -0
  323. package/data/skills/bio-single-cell-lineage-tracing/SKILL.md +319 -0
  324. package/data/skills/bio-single-cell-markers-annotation/SKILL.md +317 -0
  325. package/data/skills/bio-single-cell-metabolite-communication/SKILL.md +258 -0
  326. package/data/skills/bio-single-cell-multimodal-integration/SKILL.md +242 -0
  327. package/data/skills/bio-single-cell-perturb-seq/SKILL.md +258 -0
  328. package/data/skills/bio-single-cell-preprocessing/SKILL.md +338 -0
  329. package/data/skills/bio-single-cell-scatac-analysis/SKILL.md +326 -0
  330. package/data/skills/bio-single-cell-splicing/SKILL.md +199 -0
  331. package/data/skills/bio-single-cell-trajectory-inference/SKILL.md +225 -0
  332. package/data/skills/bio-small-rna-seq-differential-mirna/SKILL.md +194 -0
  333. package/data/skills/bio-small-rna-seq-mirdeep2-analysis/SKILL.md +180 -0
  334. package/data/skills/bio-small-rna-seq-mirge3-analysis/SKILL.md +178 -0
  335. package/data/skills/bio-small-rna-seq-smrna-preprocessing/SKILL.md +174 -0
  336. package/data/skills/bio-small-rna-seq-target-prediction/SKILL.md +202 -0
  337. package/data/skills/bio-spatial-transcriptomics-image-analysis/SKILL.md +283 -0
  338. package/data/skills/bio-spatial-transcriptomics-spatial-communication/SKILL.md +299 -0
  339. package/data/skills/bio-spatial-transcriptomics-spatial-data-io/SKILL.md +272 -0
  340. package/data/skills/bio-spatial-transcriptomics-spatial-deconvolution/SKILL.md +314 -0
  341. package/data/skills/bio-spatial-transcriptomics-spatial-domains/SKILL.md +254 -0
  342. package/data/skills/bio-spatial-transcriptomics-spatial-multiomics/SKILL.md +181 -0
  343. package/data/skills/bio-spatial-transcriptomics-spatial-neighbors/SKILL.md +198 -0
  344. package/data/skills/bio-spatial-transcriptomics-spatial-preprocessing/SKILL.md +269 -0
  345. package/data/skills/bio-spatial-transcriptomics-spatial-proteomics/SKILL.md +124 -0
  346. package/data/skills/bio-spatial-transcriptomics-spatial-statistics/SKILL.md +237 -0
  347. package/data/skills/bio-spatial-transcriptomics-spatial-visualization/SKILL.md +287 -0
  348. package/data/skills/bio-splicing-pipeline/SKILL.md +253 -0
  349. package/data/skills/bio-splicing-qc/SKILL.md +190 -0
  350. package/data/skills/bio-splicing-quantification/SKILL.md +145 -0
  351. package/data/skills/bio-sra-data/SKILL.md +363 -0
  352. package/data/skills/bio-structural-biology-alphafold-predictions/SKILL.md +258 -0
  353. package/data/skills/bio-structural-biology-modern-structure-prediction/SKILL.md +346 -0
  354. package/data/skills/bio-substructure-search/SKILL.md +206 -0
  355. package/data/skills/bio-systems-biology-context-specific-models/SKILL.md +241 -0
  356. package/data/skills/bio-systems-biology-flux-balance-analysis/SKILL.md +206 -0
  357. package/data/skills/bio-systems-biology-gene-essentiality/SKILL.md +235 -0
  358. package/data/skills/bio-systems-biology-metabolic-reconstruction/SKILL.md +215 -0
  359. package/data/skills/bio-systems-biology-model-curation/SKILL.md +243 -0
  360. package/data/skills/bio-tcr-bcr-analysis-immcantation-analysis/SKILL.md +195 -0
  361. package/data/skills/bio-tcr-bcr-analysis-mixcr-analysis/SKILL.md +167 -0
  362. package/data/skills/bio-tcr-bcr-analysis-repertoire-visualization/SKILL.md +224 -0
  363. package/data/skills/bio-tcr-bcr-analysis-scirpy-analysis/SKILL.md +168 -0
  364. package/data/skills/bio-tcr-bcr-analysis-vdjtools-analysis/SKILL.md +188 -0
  365. package/data/skills/bio-transcription-translation/SKILL.md +237 -0
  366. package/data/skills/bio-tumor-fraction-estimation/SKILL.md +211 -0
  367. package/data/skills/bio-uniprot-access/SKILL.md +239 -0
  368. package/data/skills/bio-variant-annotation/SKILL.md +410 -0
  369. package/data/skills/bio-variant-calling/SKILL.md +266 -0
  370. package/data/skills/bio-variant-calling-clinical-interpretation/SKILL.md +355 -0
  371. package/data/skills/bio-variant-calling-deepvariant/SKILL.md +315 -0
  372. package/data/skills/bio-variant-calling-filtering-best-practices/SKILL.md +403 -0
  373. package/data/skills/bio-variant-calling-joint-calling/SKILL.md +338 -0
  374. package/data/skills/bio-variant-calling-structural-variant-calling/SKILL.md +253 -0
  375. package/data/skills/bio-variant-normalization/SKILL.md +325 -0
  376. package/data/skills/bio-vcf-basics/SKILL.md +342 -0
  377. package/data/skills/bio-vcf-manipulation/SKILL.md +429 -0
  378. package/data/skills/bio-vcf-statistics/SKILL.md +445 -0
  379. package/data/skills/bio-virtual-screening/SKILL.md +263 -0
  380. package/data/skills/bio-workflow-management-cwl-workflows/SKILL.md +433 -0
  381. package/data/skills/bio-workflow-management-nextflow-pipelines/SKILL.md +386 -0
  382. package/data/skills/bio-workflow-management-snakemake-workflows/SKILL.md +383 -0
  383. package/data/skills/bio-workflow-management-wdl-workflows/SKILL.md +500 -0
  384. package/data/skills/bio-workflows-atacseq-pipeline/SKILL.md +362 -0
  385. package/data/skills/bio-workflows-biomarker-pipeline/SKILL.md +272 -0
  386. package/data/skills/bio-workflows-chipseq-pipeline/SKILL.md +282 -0
  387. package/data/skills/bio-workflows-clip-pipeline/SKILL.md +268 -0
  388. package/data/skills/bio-workflows-cnv-pipeline/SKILL.md +324 -0
  389. package/data/skills/bio-workflows-crispr-editing-pipeline/SKILL.md +455 -0
  390. package/data/skills/bio-workflows-crispr-screen-pipeline/SKILL.md +278 -0
  391. package/data/skills/bio-workflows-cytometry-pipeline/SKILL.md +328 -0
  392. package/data/skills/bio-workflows-expression-to-pathways/SKILL.md +329 -0
  393. package/data/skills/bio-workflows-fastq-to-variants/SKILL.md +374 -0
  394. package/data/skills/bio-workflows-genome-assembly-pipeline/SKILL.md +290 -0
  395. package/data/skills/bio-workflows-gwas-pipeline/SKILL.md +323 -0
  396. package/data/skills/bio-workflows-hic-pipeline/SKILL.md +304 -0
  397. package/data/skills/bio-workflows-imc-pipeline/SKILL.md +304 -0
  398. package/data/skills/bio-workflows-longread-sv-pipeline/SKILL.md +281 -0
  399. package/data/skills/bio-workflows-merip-pipeline/SKILL.md +222 -0
  400. package/data/skills/bio-workflows-metabolic-modeling-pipeline/SKILL.md +408 -0
  401. package/data/skills/bio-workflows-metabolomics-pipeline/SKILL.md +297 -0
  402. package/data/skills/bio-workflows-metagenomics-pipeline/SKILL.md +283 -0
  403. package/data/skills/bio-workflows-methylation-pipeline/SKILL.md +274 -0
  404. package/data/skills/bio-workflows-microbiome-pipeline/SKILL.md +221 -0
  405. package/data/skills/bio-workflows-multi-omics-pipeline/SKILL.md +362 -0
  406. package/data/skills/bio-workflows-multiome-pipeline/SKILL.md +298 -0
  407. package/data/skills/bio-workflows-neoantigen-pipeline/SKILL.md +325 -0
  408. package/data/skills/bio-workflows-outbreak-pipeline/SKILL.md +341 -0
  409. package/data/skills/bio-workflows-proteomics-pipeline/SKILL.md +226 -0
  410. package/data/skills/bio-workflows-riboseq-pipeline/SKILL.md +94 -0
  411. package/data/skills/bio-workflows-rnaseq-to-de/SKILL.md +345 -0
  412. package/data/skills/bio-workflows-scrnaseq-pipeline/SKILL.md +354 -0
  413. package/data/skills/bio-workflows-smrna-pipeline/SKILL.md +86 -0
  414. package/data/skills/bio-workflows-somatic-variant-pipeline/SKILL.md +313 -0
  415. package/data/skills/bio-workflows-spatial-pipeline/SKILL.md +267 -0
  416. package/data/skills/bio-workflows-tcr-pipeline/SKILL.md +84 -0
  417. package/data/skills/bio-write-sequences/SKILL.md +205 -0
  418. package/data/skills/bioinformatics-singlecell/SKILL.md +143 -0
  419. package/data/skills/biokernel/SKILL.md +61 -0
  420. package/data/skills/biologist-analyst/SKILL.md +799 -0
  421. package/data/skills/biomaster-workflows/SKILL.md +55 -0
  422. package/data/skills/biomcp-server/SKILL.md +65 -0
  423. package/data/skills/biomedical-data-analysis/SKILL.md +56 -0
  424. package/data/skills/biomedical-search/SKILL.md +214 -0
  425. package/data/skills/biomni/SKILL.md +309 -0
  426. package/data/skills/biomni-general-agent/SKILL.md +43 -0
  427. package/data/skills/biomni-research-agent/SKILL.md +76 -0
  428. package/data/skills/biopython/SKILL.md +437 -0
  429. package/data/skills/biorxiv-database/SKILL.md +477 -0
  430. package/data/skills/bioservices/SKILL.md +355 -0
  431. package/data/skills/boltz/SKILL.md +188 -0
  432. package/data/skills/boltzgen/SKILL.md +287 -0
  433. package/data/skills/bone-marrow-ai-agent/SKILL.md +163 -0
  434. package/data/skills/brainstorming/SKILL.md +96 -0
  435. package/data/skills/brenda-database/SKILL.md +714 -0
  436. package/data/skills/bulk-combat-correction/SKILL.md +54 -0
  437. package/data/skills/bulk-deg-analysis/SKILL.md +61 -0
  438. package/data/skills/bulk-deseq2-analysis/SKILL.md +50 -0
  439. package/data/skills/bulk-stringdb-ppi/SKILL.md +49 -0
  440. package/data/skills/bulk-to-single-deconvolution/SKILL.md +50 -0
  441. package/data/skills/bulk-trajblend-interpolation/SKILL.md +52 -0
  442. package/data/skills/bulk-wgcna-analysis/SKILL.md +56 -0
  443. package/data/skills/cancer-metabolism-agent/SKILL.md +180 -0
  444. package/data/skills/care-coordination/SKILL.md +35 -0
  445. package/data/skills/cart-design-optimizer-agent/SKILL.md +162 -0
  446. package/data/skills/cbioportal-database/SKILL.md +367 -0
  447. package/data/skills/cell-free-expression/SKILL.md +291 -0
  448. package/data/skills/cellagent-annotation/SKILL.md +69 -0
  449. package/data/skills/cellfree-rna-agent/SKILL.md +182 -0
  450. package/data/skills/cellular-senescence-agent/SKILL.md +183 -0
  451. package/data/skills/cellxgene-census/SKILL.md +505 -0
  452. package/data/skills/chai/SKILL.md +272 -0
  453. package/data/skills/chatehr-clinician-assistant/SKILL.md +67 -0
  454. package/data/skills/chematagent-drug-discovery/SKILL.md +68 -0
  455. package/data/skills/chembl-database/SKILL.md +383 -0
  456. package/data/skills/chembl-search/SKILL.md +211 -0
  457. package/data/skills/chemcrow-drug-discovery/SKILL.md +61 -0
  458. package/data/skills/chemical-property-lookup/SKILL.md +42 -0
  459. package/data/skills/chemist-analyst/SKILL.md +1603 -0
  460. package/data/skills/chemistry-agent/SKILL.md +62 -0
  461. package/data/skills/chip-clonal-hematopoiesis-agent/SKILL.md +224 -0
  462. package/data/skills/chromosomal-instability-agent/SKILL.md +187 -0
  463. package/data/skills/citation-management/SKILL.md +1081 -0
  464. package/data/skills/claims-appeals/SKILL.md +35 -0
  465. package/data/skills/claw-ancestry-pca/SKILL.md +145 -0
  466. package/data/skills/claw-metagenomics/SKILL.md +238 -0
  467. package/data/skills/claw-semantic-sim/SKILL.md +151 -0
  468. package/data/skills/clinical-decision-support/SKILL.md +504 -0
  469. package/data/skills/clinical-diagnostic-reasoning/SKILL.md +222 -0
  470. package/data/skills/clinical-nlp-extractor/SKILL.md +59 -0
  471. package/data/skills/clinical-note-summarization/SKILL.md +52 -0
  472. package/data/skills/clinical-reports/SKILL.md +1127 -0
  473. package/data/skills/clinical-trial-protocol-skill/SKILL.md +508 -0
  474. package/data/skills/clinical-trials-search/SKILL.md +211 -0
  475. package/data/skills/clinicaltrials-database/SKILL.md +501 -0
  476. package/data/skills/clinpgx/SKILL.md +96 -0
  477. package/data/skills/clinpgx-database/SKILL.md +632 -0
  478. package/data/skills/clinvar-database/SKILL.md +356 -0
  479. package/data/skills/cnv-caller-agent/SKILL.md +171 -0
  480. package/data/skills/coagulation-thrombosis-agent/SKILL.md +141 -0
  481. package/data/skills/cobrapy/SKILL.md +457 -0
  482. package/data/skills/compbioagent-explorer/SKILL.md +67 -0
  483. package/data/skills/computational-pathology-agent/SKILL.md +72 -0
  484. package/data/skills/convergence-study/SKILL.md +98 -0
  485. package/data/skills/cosmic-database/SKILL.md +330 -0
  486. package/data/skills/crisis-detection-intervention-ai/SKILL.md +569 -0
  487. package/data/skills/crisis-response-protocol/SKILL.md +456 -0
  488. package/data/skills/crispr-guide-design/SKILL.md +72 -0
  489. package/data/skills/crispr-offtarget-predictor/SKILL.md +56 -0
  490. package/data/skills/cryoem-ai-drug-design-agent/SKILL.md +216 -0
  491. package/data/skills/ctdna-dynamics-mrd-agent/SKILL.md +206 -0
  492. package/data/skills/cytokine-storm-analysis-agent/SKILL.md +180 -0
  493. package/data/skills/dask/SKILL.md +454 -0
  494. package/data/skills/data-stats-analysis/SKILL.md +477 -0
  495. package/data/skills/data-transform/SKILL.md +576 -0
  496. package/data/skills/data-visualization-biomedical/SKILL.md +252 -0
  497. package/data/skills/data-visualization-expert/SKILL.md +72 -0
  498. package/data/skills/data-viz-plots/SKILL.md +461 -0
  499. package/data/skills/datacommons-client/SKILL.md +253 -0
  500. package/data/skills/datamol/SKILL.md +700 -0
  501. package/data/skills/deep-research/SKILL.md +111 -0
  502. package/data/skills/deep-research-swarm/SKILL.md +62 -0
  503. package/data/skills/deep-visual-proteomics-agent/SKILL.md +149 -0
  504. package/data/skills/deepchem/SKILL.md +591 -0
  505. package/data/skills/deeptools/SKILL.md +525 -0
  506. package/data/skills/depmap/SKILL.md +300 -0
  507. package/data/skills/diffdock/SKILL.md +477 -0
  508. package/data/skills/differentiation-schemes/SKILL.md +159 -0
  509. package/data/skills/digital-twin-clinical-agent/SKILL.md +228 -0
  510. package/data/skills/dispatching-parallel-agents/SKILL.md +180 -0
  511. package/data/skills/dnanexus-integration/SKILL.md +376 -0
  512. package/data/skills/doc-coauthoring/SKILL.md +375 -0
  513. package/data/skills/docx/SKILL.md +590 -0
  514. package/data/skills/docx-official/SKILL.md +197 -0
  515. package/data/skills/drug-discovery-search/SKILL.md +214 -0
  516. package/data/skills/drug-interaction-checker/SKILL.md +56 -0
  517. package/data/skills/drug-labels-search/SKILL.md +211 -0
  518. package/data/skills/drug-photo/SKILL.md +149 -0
  519. package/data/skills/drugbank-database/SKILL.md +184 -0
  520. package/data/skills/drugbank-search/SKILL.md +211 -0
  521. package/data/skills/ehr-fhir-integration/SKILL.md +60 -0
  522. package/data/skills/emergency-card/SKILL.md +426 -0
  523. package/data/skills/ena-database/SKILL.md +198 -0
  524. package/data/skills/ensembl-database/SKILL.md +305 -0
  525. package/data/skills/epidemiologist-analyst/SKILL.md +1844 -0
  526. package/data/skills/epigenomics-methylgpt-agent/SKILL.md +111 -0
  527. package/data/skills/equity-scorer/SKILL.md +182 -0
  528. package/data/skills/esm/SKILL.md +300 -0
  529. package/data/skills/etetoolkit/SKILL.md +617 -0
  530. package/data/skills/executing-plans/SKILL.md +84 -0
  531. package/data/skills/exosome-ev-analysis-agent/SKILL.md +171 -0
  532. package/data/skills/exploratory-data-analysis/SKILL.md +440 -0
  533. package/data/skills/family-health-analyzer/SKILL.md +137 -0
  534. package/data/skills/fastq-analysis/SKILL.md +191 -0
  535. package/data/skills/fda-database/SKILL.md +512 -0
  536. package/data/skills/fhir-developer-skill/SKILL.md +294 -0
  537. package/data/skills/fhir-development/SKILL.md +35 -0
  538. package/data/skills/find-skills/SKILL.md +133 -0
  539. package/data/skills/finishing-a-development-branch/SKILL.md +200 -0
  540. package/data/skills/fitness-analyzer/SKILL.md +431 -0
  541. package/data/skills/flowio/SKILL.md +602 -0
  542. package/data/skills/foldseek/SKILL.md +179 -0
  543. package/data/skills/galaxy-bridge/SKILL.md +215 -0
  544. package/data/skills/gene-database/SKILL.md +173 -0
  545. package/data/skills/gene-panel-design-agent/SKILL.md +192 -0
  546. package/data/skills/geniml/SKILL.md +312 -0
  547. package/data/skills/genome-compare/SKILL.md +127 -0
  548. package/data/skills/geo-database/SKILL.md +809 -0
  549. package/data/skills/geopandas/SKILL.md +245 -0
  550. package/data/skills/gget/SKILL.md +865 -0
  551. package/data/skills/ginkgo-cloud-lab/SKILL.md +56 -0
  552. package/data/skills/glycoengineering/SKILL.md +338 -0
  553. package/data/skills/gnomad-database/SKILL.md +395 -0
  554. package/data/skills/goal-analyzer/SKILL.md +605 -0
  555. package/data/skills/grief-companion/SKILL.md +250 -0
  556. package/data/skills/gsea-enrichment/SKILL.md +151 -0
  557. package/data/skills/gtars/SKILL.md +279 -0
  558. package/data/skills/gtex-database/SKILL.md +315 -0
  559. package/data/skills/gwas-database/SKILL.md +602 -0
  560. package/data/skills/gwas-lookup/SKILL.md +122 -0
  561. package/data/skills/gwas-prs/SKILL.md +178 -0
  562. package/data/skills/health-trend-analyzer/SKILL.md +451 -0
  563. package/data/skills/hemoglobinopathy-analysis-agent/SKILL.md +167 -0
  564. package/data/skills/hipaa-compliance/SKILL.md +230 -0
  565. package/data/skills/histolab/SKILL.md +672 -0
  566. package/data/skills/hmdb-database/SKILL.md +190 -0
  567. package/data/skills/hrd-analysis-agent/SKILL.md +184 -0
  568. package/data/skills/hrv-alexithymia-expert/SKILL.md +151 -0
  569. package/data/skills/hypogenic/SKILL.md +649 -0
  570. package/data/skills/hypothesis-generation/SKILL.md +286 -0
  571. package/data/skills/imaging-data-commons/SKILL.md +843 -0
  572. package/data/skills/immune-checkpoint-combination-agent/SKILL.md +170 -0
  573. package/data/skills/infographics/SKILL.md +563 -0
  574. package/data/skills/instrument-data-to-allotrope/SKILL.md +280 -0
  575. package/data/skills/interpro-database/SKILL.md +305 -0
  576. package/data/skills/ipsae/SKILL.md +190 -0
  577. package/data/skills/iso-13485-certification/SKILL.md +678 -0
  578. package/data/skills/jaspar-database/SKILL.md +351 -0
  579. package/data/skills/jungian-psychologist/SKILL.md +191 -0
  580. package/data/skills/kegg-database/SKILL.md +371 -0
  581. package/data/skills/knowledge-synthesis/SKILL.md +283 -0
  582. package/data/skills/kragen-knowledge-graph/SKILL.md +68 -0
  583. package/data/skills/lab-results/SKILL.md +35 -0
  584. package/data/skills/labarchive-integration/SKILL.md +262 -0
  585. package/data/skills/labstep/SKILL.md +208 -0
  586. package/data/skills/lamindb/SKILL.md +384 -0
  587. package/data/skills/latchbio-integration/SKILL.md +347 -0
  588. package/data/skills/latex-posters/SKILL.md +1602 -0
  589. package/data/skills/leads-literature-mining/SKILL.md +68 -0
  590. package/data/skills/ligandmpnn/SKILL.md +170 -0
  591. package/data/skills/linear-solvers/SKILL.md +165 -0
  592. package/data/skills/liquid-biopsy-analytics-agent/SKILL.md +171 -0
  593. package/data/skills/lit-synthesizer/SKILL.md +53 -0
  594. package/data/skills/literature-review/SKILL.md +584 -0
  595. package/data/skills/literature-search/SKILL.md +214 -0
  596. package/data/skills/lobster-bioinformatics/SKILL.md +305 -0
  597. package/data/skills/long-read-sequencing-agent/SKILL.md +181 -0
  598. package/data/skills/mage-antibody-generator/SKILL.md +54 -0
  599. package/data/skills/markdown-mermaid-writing/SKILL.md +327 -0
  600. package/data/skills/markitdown/SKILL.md +486 -0
  601. package/data/skills/matchms/SKILL.md +197 -0
  602. package/data/skills/matplotlib/SKILL.md +359 -0
  603. package/data/skills/mcpmed-bioinformatics-server/SKILL.md +42 -0
  604. package/data/skills/medchem/SKILL.md +400 -0
  605. package/data/skills/medea-therapeutic-discovery/SKILL.md +45 -0
  606. package/data/skills/medical-entity-extractor/SKILL.md +144 -0
  607. package/data/skills/medical-imaging-review/SKILL.md +170 -0
  608. package/data/skills/medical-research-toolkit/SKILL.md +273 -0
  609. package/data/skills/medrxiv-search/SKILL.md +211 -0
  610. package/data/skills/mental-health-analyzer/SKILL.md +981 -0
  611. package/data/skills/mesh-generation/SKILL.md +149 -0
  612. package/data/skills/metabolomics-workbench-database/SKILL.md +253 -0
  613. package/data/skills/microbiome-cancer-agent/SKILL.md +180 -0
  614. package/data/skills/modern-drug-rehab-computer/SKILL.md +392 -0
  615. package/data/skills/molecular-dynamics/SKILL.md +457 -0
  616. package/data/skills/molecular-glue-discovery-agent/SKILL.md +224 -0
  617. package/data/skills/molecule-evolution-agent/SKILL.md +62 -0
  618. package/data/skills/molfeat/SKILL.md +505 -0
  619. package/data/skills/monarch-database/SKILL.md +372 -0
  620. package/data/skills/mpn-progression-monitor-agent/SKILL.md +228 -0
  621. package/data/skills/mpn-research-assistant/SKILL.md +197 -0
  622. package/data/skills/mrd-edge-detection-agent/SKILL.md +213 -0
  623. package/data/skills/multi-ancestry-prs-agent/SKILL.md +224 -0
  624. package/data/skills/multi-search-engine/SKILL.md +110 -0
  625. package/data/skills/multimodal-medical-imaging/SKILL.md +59 -0
  626. package/data/skills/multimodal-radpath-fusion-agent/SKILL.md +213 -0
  627. package/data/skills/myeloma-mrd-agent/SKILL.md +184 -0
  628. package/data/skills/networkx/SKILL.md +435 -0
  629. package/data/skills/neurokit2/SKILL.md +350 -0
  630. package/data/skills/neuropixels-analysis/SKILL.md +344 -0
  631. package/data/skills/nextflow-development/SKILL.md +290 -0
  632. package/data/skills/ngs-analysis/SKILL.md +183 -0
  633. package/data/skills/nicheformer-spatial-agent/SKILL.md +197 -0
  634. package/data/skills/nk-cell-therapy-agent/SKILL.md +186 -0
  635. package/data/skills/nonlinear-solvers/SKILL.md +180 -0
  636. package/data/skills/numerical-integration/SKILL.md +166 -0
  637. package/data/skills/numerical-stability/SKILL.md +149 -0
  638. package/data/skills/nutrition-analyzer/SKILL.md +775 -0
  639. package/data/skills/occupational-health-analyzer/SKILL.md +386 -0
  640. package/data/skills/omero-integration/SKILL.md +245 -0
  641. package/data/skills/ontology-explorer/SKILL.md +168 -0
  642. package/data/skills/ontology-mapper/SKILL.md +171 -0
  643. package/data/skills/ontology-validator/SKILL.md +136 -0
  644. package/data/skills/open-notebook/SKILL.md +289 -0
  645. package/data/skills/open-targets-search/SKILL.md +211 -0
  646. package/data/skills/openalex-database/SKILL.md +488 -0
  647. package/data/skills/opentargets-database/SKILL.md +367 -0
  648. package/data/skills/opentrons-integration/SKILL.md +567 -0
  649. package/data/skills/opentrons-protocol-agent/SKILL.md +58 -0
  650. package/data/skills/organoid-drug-response-agent/SKILL.md +189 -0
  651. package/data/skills/pan-cancer-multiomics-agent/SKILL.md +159 -0
  652. package/data/skills/paper-2-web/SKILL.md +495 -0
  653. package/data/skills/parameter-optimization/SKILL.md +141 -0
  654. package/data/skills/patents-search/SKILL.md +211 -0
  655. package/data/skills/pathml/SKILL.md +160 -0
  656. package/data/skills/patiently-ai/SKILL.md +103 -0
  657. package/data/skills/pdb/SKILL.md +217 -0
  658. package/data/skills/pdb-database/SKILL.md +303 -0
  659. package/data/skills/pdf/SKILL.md +314 -0
  660. package/data/skills/pdf-anthropic/SKILL.md +294 -0
  661. package/data/skills/pdf-processing/SKILL.md +149 -0
  662. package/data/skills/pdf-processing-pro/SKILL.md +296 -0
  663. package/data/skills/pdx-model-analysis-agent/SKILL.md +169 -0
  664. package/data/skills/peer-review/SKILL.md +565 -0
  665. package/data/skills/performance-profiling/SKILL.md +255 -0
  666. package/data/skills/perplexity-search/SKILL.md +441 -0
  667. package/data/skills/pharmacogenomics-agent/SKILL.md +143 -0
  668. package/data/skills/pharmgx-reporter/SKILL.md +134 -0
  669. package/data/skills/phylogenetics/SKILL.md +404 -0
  670. package/data/skills/plotly/SKILL.md +265 -0
  671. package/data/skills/polars/SKILL.md +385 -0
  672. package/data/skills/popeve-variant-predictor-agent/SKILL.md +213 -0
  673. package/data/skills/post-processing/SKILL.md +338 -0
  674. package/data/skills/pptx/SKILL.md +232 -0
  675. package/data/skills/pptx-official/SKILL.md +484 -0
  676. package/data/skills/pptx-posters/SKILL.md +414 -0
  677. package/data/skills/precision-oncology-agent/SKILL.md +53 -0
  678. package/data/skills/prior-auth-coworker/SKILL.md +60 -0
  679. package/data/skills/prior-auth-review-skill/SKILL.md +360 -0
  680. package/data/skills/profile-report/SKILL.md +120 -0
  681. package/data/skills/protac-design-agent/SKILL.md +220 -0
  682. package/data/skills/protein-design-workflow/SKILL.md +199 -0
  683. package/data/skills/protein-qc/SKILL.md +300 -0
  684. package/data/skills/protein-structure-prediction/SKILL.md +59 -0
  685. package/data/skills/proteinmpnn/SKILL.md +279 -0
  686. package/data/skills/protocolsio-integration/SKILL.md +415 -0
  687. package/data/skills/prs-net-deep-learning-agent/SKILL.md +232 -0
  688. package/data/skills/psychologist-analyst/SKILL.md +1888 -0
  689. package/data/skills/pubchem-database/SKILL.md +568 -0
  690. package/data/skills/pubmed-database/SKILL.md +454 -0
  691. package/data/skills/pubmed-search/SKILL.md +103 -0
  692. package/data/skills/pydeseq2/SKILL.md +553 -0
  693. package/data/skills/pydicom/SKILL.md +428 -0
  694. package/data/skills/pyhealth/SKILL.md +485 -0
  695. package/data/skills/pylabrobot/SKILL.md +179 -0
  696. package/data/skills/pymc/SKILL.md +566 -0
  697. package/data/skills/pymoo/SKILL.md +565 -0
  698. package/data/skills/pyopenms/SKILL.md +211 -0
  699. package/data/skills/pysam/SKILL.md +259 -0
  700. package/data/skills/pytdc/SKILL.md +454 -0
  701. package/data/skills/pytorch-lightning/SKILL.md +172 -0
  702. package/data/skills/pyzotero/SKILL.md +111 -0
  703. package/data/skills/radgpt-radiology-reporter/SKILL.md +67 -0
  704. package/data/skills/radiomics-pathomics-fusion-agent/SKILL.md +221 -0
  705. package/data/skills/rdkit/SKILL.md +763 -0
  706. package/data/skills/reactome-database/SKILL.md +272 -0
  707. package/data/skills/receiving-code-review/SKILL.md +213 -0
  708. package/data/skills/recovery-community-moderator/SKILL.md +175 -0
  709. package/data/skills/regulatory-drafter/SKILL.md +56 -0
  710. package/data/skills/regulatory-drafting/SKILL.md +35 -0
  711. package/data/skills/rehabilitation-analyzer/SKILL.md +636 -0
  712. package/data/skills/repro-enforcer/SKILL.md +50 -0
  713. package/data/skills/requesting-code-review/SKILL.md +105 -0
  714. package/data/skills/research-grants/SKILL.md +935 -0
  715. package/data/skills/research-literature/SKILL.md +35 -0
  716. package/data/skills/research-lookup/SKILL.md +502 -0
  717. package/data/skills/rfdiffusion/SKILL.md +306 -0
  718. package/data/skills/rna-velocity-agent/SKILL.md +174 -0
  719. package/data/skills/scanpy/SKILL.md +380 -0
  720. package/data/skills/scfoundation-model-agent/SKILL.md +210 -0
  721. package/data/skills/scientific-brainstorming/SKILL.md +185 -0
  722. package/data/skills/scientific-critical-thinking/SKILL.md +566 -0
  723. package/data/skills/scientific-manuscript/SKILL.md +181 -0
  724. package/data/skills/scientific-problem-selection/SKILL.md +269 -0
  725. package/data/skills/scientific-schematics/SKILL.md +619 -0
  726. package/data/skills/scientific-slides/SKILL.md +1154 -0
  727. package/data/skills/scientific-visualization/SKILL.md +773 -0
  728. package/data/skills/scientific-writing/SKILL.md +483 -0
  729. package/data/skills/scikit-bio/SKILL.md +431 -0
  730. package/data/skills/scikit-learn/SKILL.md +515 -0
  731. package/data/skills/scikit-survival/SKILL.md +393 -0
  732. package/data/skills/scrna-orchestrator/SKILL.md +204 -0
  733. package/data/skills/scrna-qc/SKILL.md +43 -0
  734. package/data/skills/scvelo/SKILL.md +321 -0
  735. package/data/skills/scvi-tools/SKILL.md +184 -0
  736. package/data/skills/seaborn/SKILL.md +671 -0
  737. package/data/skills/search-strategy/SKILL.md +247 -0
  738. package/data/skills/seq-wrangler/SKILL.md +58 -0
  739. package/data/skills/shap/SKILL.md +560 -0
  740. package/data/skills/simo-multiomics-integration-agent/SKILL.md +178 -0
  741. package/data/skills/simpy/SKILL.md +423 -0
  742. package/data/skills/simulation-orchestrator/SKILL.md +230 -0
  743. package/data/skills/simulation-validator/SKILL.md +195 -0
  744. package/data/skills/single-annotation/SKILL.md +129 -0
  745. package/data/skills/single-cell-rna-qc/SKILL.md +175 -0
  746. package/data/skills/single-cellphone-db/SKILL.md +68 -0
  747. package/data/skills/single-clustering/SKILL.md +75 -0
  748. package/data/skills/single-downstream-analysis/SKILL.md +150 -0
  749. package/data/skills/single-multiomics/SKILL.md +44 -0
  750. package/data/skills/single-preprocessing/SKILL.md +184 -0
  751. package/data/skills/single-to-spatial-mapping/SKILL.md +48 -0
  752. package/data/skills/single-trajectory/SKILL.md +62 -0
  753. package/data/skills/sleep-analyzer/SKILL.md +773 -0
  754. package/data/skills/slurm-job-script-generator/SKILL.md +135 -0
  755. package/data/skills/solublempnn/SKILL.md +165 -0
  756. package/data/skills/spatial-agent/SKILL.md +56 -0
  757. package/data/skills/spatial-epigenomics-agent/SKILL.md +163 -0
  758. package/data/skills/spatial-transcriptomics-agent/SKILL.md +75 -0
  759. package/data/skills/spatial-transcriptomics-analysis/SKILL.md +72 -0
  760. package/data/skills/spatial-transcriptomics-analysis/STAgent/SKILL.md +75 -0
  761. package/data/skills/spatial-transcriptomics-analysis/SpatialAgent/SKILL.md +56 -0
  762. package/data/skills/spatial-transcriptomics-analysis/bioSkills/image-analysis/SKILL.md +266 -0
  763. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-communication/SKILL.md +287 -0
  764. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-data-io/SKILL.md +243 -0
  765. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-deconvolution/SKILL.md +298 -0
  766. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-domains/SKILL.md +229 -0
  767. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-multiomics/SKILL.md +172 -0
  768. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-neighbors/SKILL.md +189 -0
  769. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-preprocessing/SKILL.md +232 -0
  770. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-proteomics/SKILL.md +127 -0
  771. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-statistics/SKILL.md +225 -0
  772. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-visualization/SKILL.md +270 -0
  773. package/data/skills/spatial-tutorials/SKILL.md +87 -0
  774. package/data/skills/speech-pathology-ai/SKILL.md +184 -0
  775. package/data/skills/statistical-analysis/SKILL.md +626 -0
  776. package/data/skills/statsmodels/SKILL.md +608 -0
  777. package/data/skills/string-database/SKILL.md +528 -0
  778. package/data/skills/struct-predictor/SKILL.md +52 -0
  779. package/data/skills/subagent-driven-development/SKILL.md +242 -0
  780. package/data/skills/systematic-debugging/SKILL.md +296 -0
  781. package/data/skills/tcell-exhaustion-analysis-agent/SKILL.md +139 -0
  782. package/data/skills/tcga-preprocessing/SKILL.md +49 -0
  783. package/data/skills/tcm-constitution-analyzer/SKILL.md +664 -0
  784. package/data/skills/tcr-pmhc-prediction-agent/SKILL.md +226 -0
  785. package/data/skills/tcr-repertoire-analysis-agent/SKILL.md +218 -0
  786. package/data/skills/test-driven-development/SKILL.md +371 -0
  787. package/data/skills/tiledbvcf/SKILL.md +459 -0
  788. package/data/skills/time-resolved-cryoem-agent/SKILL.md +223 -0
  789. package/data/skills/time-stepping/SKILL.md +140 -0
  790. package/data/skills/timesfm-forecasting/SKILL.md +785 -0
  791. package/data/skills/tme-immune-profiling-agent/SKILL.md +220 -0
  792. package/data/skills/tooluniverse-adverse-event-detection/SKILL.md +1115 -0
  793. package/data/skills/tooluniverse-antibody-engineering/SKILL.md +1581 -0
  794. package/data/skills/tooluniverse-binder-discovery/SKILL.md +1459 -0
  795. package/data/skills/tooluniverse-cancer-variant-interpretation/SKILL.md +971 -0
  796. package/data/skills/tooluniverse-chemical-compound-retrieval/SKILL.md +322 -0
  797. package/data/skills/tooluniverse-chemical-safety/SKILL.md +733 -0
  798. package/data/skills/tooluniverse-clinical-guidelines/SKILL.md +399 -0
  799. package/data/skills/tooluniverse-clinical-trial-design/SKILL.md +1195 -0
  800. package/data/skills/tooluniverse-clinical-trial-matching/SKILL.md +1333 -0
  801. package/data/skills/tooluniverse-crispr-screen-analysis/SKILL.md +900 -0
  802. package/data/skills/tooluniverse-disease-research/SKILL.md +630 -0
  803. package/data/skills/tooluniverse-drug-drug-interaction/SKILL.md +73 -0
  804. package/data/skills/tooluniverse-drug-repurposing/SKILL.md +595 -0
  805. package/data/skills/tooluniverse-drug-research/SKILL.md +1642 -0
  806. package/data/skills/tooluniverse-drug-target-validation/SKILL.md +1206 -0
  807. package/data/skills/tooluniverse-epigenomics/SKILL.md +1489 -0
  808. package/data/skills/tooluniverse-expression-data-retrieval/SKILL.md +389 -0
  809. package/data/skills/tooluniverse-gene-enrichment/SKILL.md +402 -0
  810. package/data/skills/tooluniverse-gwas-drug-discovery/SKILL.md +576 -0
  811. package/data/skills/tooluniverse-gwas-finemapping/SKILL.md +309 -0
  812. package/data/skills/tooluniverse-gwas-snp-interpretation/SKILL.md +223 -0
  813. package/data/skills/tooluniverse-gwas-study-explorer/SKILL.md +342 -0
  814. package/data/skills/tooluniverse-gwas-trait-to-gene/SKILL.md +236 -0
  815. package/data/skills/tooluniverse-image-analysis/SKILL.md +439 -0
  816. package/data/skills/tooluniverse-immune-repertoire-analysis/SKILL.md +949 -0
  817. package/data/skills/tooluniverse-immunotherapy-response-prediction/SKILL.md +865 -0
  818. package/data/skills/tooluniverse-infectious-disease/SKILL.md +749 -0
  819. package/data/skills/tooluniverse-literature-deep-research/SKILL.md +1050 -0
  820. package/data/skills/tooluniverse-metabolomics/SKILL.md +298 -0
  821. package/data/skills/tooluniverse-metabolomics-analysis/SKILL.md +764 -0
  822. package/data/skills/tooluniverse-multi-omics-integration/SKILL.md +703 -0
  823. package/data/skills/tooluniverse-multiomic-disease-characterization/SKILL.md +1138 -0
  824. package/data/skills/tooluniverse-network-pharmacology/SKILL.md +1312 -0
  825. package/data/skills/tooluniverse-pharmacovigilance/SKILL.md +807 -0
  826. package/data/skills/tooluniverse-phylogenetics/SKILL.md +461 -0
  827. package/data/skills/tooluniverse-polygenic-risk-score/SKILL.md +397 -0
  828. package/data/skills/tooluniverse-precision-medicine-stratification/SKILL.md +1143 -0
  829. package/data/skills/tooluniverse-precision-oncology/SKILL.md +1091 -0
  830. package/data/skills/tooluniverse-protein-interactions/SKILL.md +446 -0
  831. package/data/skills/tooluniverse-protein-structure-retrieval/SKILL.md +416 -0
  832. package/data/skills/tooluniverse-protein-therapeutic-design/SKILL.md +637 -0
  833. package/data/skills/tooluniverse-proteomics-analysis/SKILL.md +843 -0
  834. package/data/skills/tooluniverse-rare-disease-diagnosis/SKILL.md +1257 -0
  835. package/data/skills/tooluniverse-rnaseq-deseq2/SKILL.md +536 -0
  836. package/data/skills/tooluniverse-sequence-retrieval/SKILL.md +419 -0
  837. package/data/skills/tooluniverse-single-cell/SKILL.md +719 -0
  838. package/data/skills/tooluniverse-spatial-omics-analysis/SKILL.md +1102 -0
  839. package/data/skills/tooluniverse-spatial-transcriptomics/SKILL.md +788 -0
  840. package/data/skills/tooluniverse-statistical-modeling/SKILL.md +557 -0
  841. package/data/skills/tooluniverse-structural-variant-analysis/SKILL.md +1356 -0
  842. package/data/skills/tooluniverse-systems-biology/SKILL.md +374 -0
  843. package/data/skills/tooluniverse-target-research/SKILL.md +1510 -0
  844. package/data/skills/tooluniverse-variant-analysis/SKILL.md +448 -0
  845. package/data/skills/tooluniverse-variant-interpretation/SKILL.md +1118 -0
  846. package/data/skills/torch-geometric/SKILL.md +674 -0
  847. package/data/skills/torch_geometric/SKILL.md +670 -0
  848. package/data/skills/torchdrug/SKILL.md +444 -0
  849. package/data/skills/tpd-ternary-complex-agent/SKILL.md +226 -0
  850. package/data/skills/transformers/SKILL.md +157 -0
  851. package/data/skills/travel-health-analyzer/SKILL.md +421 -0
  852. package/data/skills/treatment-plans/SKILL.md +1576 -0
  853. package/data/skills/trial-eligibility-agent/SKILL.md +54 -0
  854. package/data/skills/trialgpt-matching/SKILL.md +66 -0
  855. package/data/skills/tumor-clonal-evolution-agent/SKILL.md +134 -0
  856. package/data/skills/tumor-heterogeneity-agent/SKILL.md +216 -0
  857. package/data/skills/tumor-mutational-burden-agent/SKILL.md +188 -0
  858. package/data/skills/ukb-navigator/SKILL.md +113 -0
  859. package/data/skills/umap-learn/SKILL.md +473 -0
  860. package/data/skills/uniprot-database/SKILL.md +189 -0
  861. package/data/skills/universal-single-cell-annotator/SKILL.md +72 -0
  862. package/data/skills/using-git-worktrees/SKILL.md +218 -0
  863. package/data/skills/using-superpowers/SKILL.md +95 -0
  864. package/data/skills/usmle/SKILL.md +62 -0
  865. package/data/skills/uspto-database/SKILL.md +597 -0
  866. package/data/skills/vaex/SKILL.md +180 -0
  867. package/data/skills/varcadd-pathogenicity/SKILL.md +68 -0
  868. package/data/skills/variant-interpretation-acmg/SKILL.md +58 -0
  869. package/data/skills/variant-interpretation-acmg/bioSkills/clinical-interpretation/SKILL.md +334 -0
  870. package/data/skills/variant-interpretation-acmg/bioSkills/consensus-sequences/SKILL.md +343 -0
  871. package/data/skills/variant-interpretation-acmg/bioSkills/deepvariant/SKILL.md +279 -0
  872. package/data/skills/variant-interpretation-acmg/bioSkills/filtering-best-practices/SKILL.md +362 -0
  873. package/data/skills/variant-interpretation-acmg/bioSkills/gatk-variant-calling/SKILL.md +398 -0
  874. package/data/skills/variant-interpretation-acmg/bioSkills/joint-calling/SKILL.md +343 -0
  875. package/data/skills/variant-interpretation-acmg/bioSkills/structural-variant-calling/SKILL.md +256 -0
  876. package/data/skills/variant-interpretation-acmg/bioSkills/variant-annotation/SKILL.md +387 -0
  877. package/data/skills/variant-interpretation-acmg/bioSkills/variant-calling/SKILL.md +258 -0
  878. package/data/skills/variant-interpretation-acmg/bioSkills/variant-normalization/SKILL.md +304 -0
  879. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-basics/SKILL.md +329 -0
  880. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-manipulation/SKILL.md +398 -0
  881. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-statistics/SKILL.md +424 -0
  882. package/data/skills/variant-interpretation-acmg/varCADD/SKILL.md +68 -0
  883. package/data/skills/vcf-annotator/SKILL.md +55 -0
  884. package/data/skills/verification-before-completion/SKILL.md +139 -0
  885. package/data/skills/virtual-lab-agent/SKILL.md +240 -0
  886. package/data/skills/wearable-analysis-agent/SKILL.md +70 -0
  887. package/data/skills/weightloss-analyzer/SKILL.md +320 -0
  888. package/data/skills/wellally-tech/SKILL.md +685 -0
  889. package/data/skills/wikipedia-search/SKILL.md +481 -0
  890. package/data/skills/writing-plans/SKILL.md +116 -0
  891. package/data/skills/writing-skills/SKILL.md +655 -0
  892. package/data/skills/xlsx/SKILL.md +292 -0
  893. package/data/skills/xlsx-official/SKILL.md +289 -0
  894. package/data/skills/zarr-python/SKILL.md +777 -0
  895. package/data/skills/zinc-database/SKILL.md +398 -0
  896. package/data/tools/__init__.py +8 -0
  897. package/data/tools/hpc.py +71 -0
  898. package/data/tools/hpc_client/__init__.py +8 -0
  899. package/data/tools/hpc_client/builders/__init__.py +12 -0
  900. package/data/tools/hpc_client/builders/alphafold.py +36 -0
  901. package/data/tools/hpc_client/builders/boltz.py +33 -0
  902. package/data/tools/hpc_client/builders/chai.py +30 -0
  903. package/data/tools/hpc_client/builders/immunebuilder.py +31 -0
  904. package/data/tools/hpc_client/builders/rfantibody.py +58 -0
  905. package/data/tools/hpc_client/builders/thermompnn.py +16 -0
  906. package/data/tools/hpc_client/hpc_api.py +41 -0
  907. package/data/tools/hpc_client/hpc_tools.py +218 -0
  908. package/data/tools/hpc_dynamic.py +71 -0
  909. package/data/tools/integrations/__init__.py +14 -0
  910. package/data/tools/integrations/adaptyv.py +107 -0
  911. package/data/tools/integrations/addgene.py +52 -0
  912. package/data/tools/integrations/api_internal.py +33 -0
  913. package/data/tools/molecular_biology.py +688 -0
  914. package/data/tools/pharmacology.py +67 -0
  915. package/data/workflows/bulk-omics-clustering/SKILL.md +501 -0
  916. package/data/workflows/bulk-omics-clustering/references/best_practices.md +395 -0
  917. package/data/workflows/bulk-omics-clustering/references/clustering_methods_comparison.md +288 -0
  918. package/data/workflows/bulk-omics-clustering/references/common-patterns.md +1136 -0
  919. package/data/workflows/bulk-omics-clustering/references/decision-guide.md +819 -0
  920. package/data/workflows/bulk-omics-clustering/references/distance_metrics_guide.md +388 -0
  921. package/data/workflows/bulk-omics-clustering/references/parameter_guide.md +396 -0
  922. package/data/workflows/bulk-omics-clustering/references/r-quick-start.md +105 -0
  923. package/data/workflows/bulk-omics-clustering/references/validation_metrics_guide.md +315 -0
  924. package/data/workflows/bulk-omics-clustering/scripts/characterize_clusters.py +255 -0
  925. package/data/workflows/bulk-omics-clustering/scripts/cluster_validation.py +449 -0
  926. package/data/workflows/bulk-omics-clustering/scripts/density_clustering.py +321 -0
  927. package/data/workflows/bulk-omics-clustering/scripts/dimensionality_reduction.py +328 -0
  928. package/data/workflows/bulk-omics-clustering/scripts/distance_metrics.py +251 -0
  929. package/data/workflows/bulk-omics-clustering/scripts/export_results.py +456 -0
  930. package/data/workflows/bulk-omics-clustering/scripts/hierarchical_clustering.R +229 -0
  931. package/data/workflows/bulk-omics-clustering/scripts/hierarchical_clustering.py +269 -0
  932. package/data/workflows/bulk-omics-clustering/scripts/kmeans_clustering.py +346 -0
  933. package/data/workflows/bulk-omics-clustering/scripts/load_example_data.R +171 -0
  934. package/data/workflows/bulk-omics-clustering/scripts/load_example_data.py +171 -0
  935. package/data/workflows/bulk-omics-clustering/scripts/model_based_clustering.py +370 -0
  936. package/data/workflows/bulk-omics-clustering/scripts/optimal_clusters.py +381 -0
  937. package/data/workflows/bulk-omics-clustering/scripts/plot_cluster_heatmap.R +141 -0
  938. package/data/workflows/bulk-omics-clustering/scripts/plot_clustering_results.py +452 -0
  939. package/data/workflows/bulk-omics-clustering/scripts/prepare_data.py +250 -0
  940. package/data/workflows/bulk-omics-clustering/scripts/stability_analysis.py +434 -0
  941. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/SKILL.md +505 -0
  942. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/comprehensive-reference.md +440 -0
  943. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/decision-guide.md +327 -0
  944. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/troubleshooting.md +456 -0
  945. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/usage-guide.md +75 -0
  946. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/basic_workflow.R +149 -0
  947. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/batch_correction.R +44 -0
  948. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/export_results.R +190 -0
  949. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/extract_results.R +242 -0
  950. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/load_example_data.R +250 -0
  951. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/multi_condition.R +50 -0
  952. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/qc_plots.R +410 -0
  953. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/transformations.R +218 -0
  954. package/data/workflows/chip-atlas-diff-analysis/SKILL.md +222 -0
  955. package/data/workflows/chip-atlas-diff-analysis/references/chipatlas_diff_api_format.md +106 -0
  956. package/data/workflows/chip-atlas-diff-analysis/references/diff_analysis_methods.md +89 -0
  957. package/data/workflows/chip-atlas-diff-analysis/references/output_format.md +78 -0
  958. package/data/workflows/chip-atlas-diff-analysis/scripts/__init__.py +1 -0
  959. package/data/workflows/chip-atlas-diff-analysis/scripts/annotate_genes.py +144 -0
  960. package/data/workflows/chip-atlas-diff-analysis/scripts/export_all.py +498 -0
  961. package/data/workflows/chip-atlas-diff-analysis/scripts/filter_regions.py +176 -0
  962. package/data/workflows/chip-atlas-diff-analysis/scripts/generate_all_plots.py +321 -0
  963. package/data/workflows/chip-atlas-diff-analysis/scripts/load_example_data.py +149 -0
  964. package/data/workflows/chip-atlas-diff-analysis/scripts/load_user_data.py +211 -0
  965. package/data/workflows/chip-atlas-diff-analysis/scripts/parse_bed_results.py +240 -0
  966. package/data/workflows/chip-atlas-diff-analysis/scripts/qc_checks.py +621 -0
  967. package/data/workflows/chip-atlas-diff-analysis/scripts/query_chipatlas_api.py +329 -0
  968. package/data/workflows/chip-atlas-diff-analysis/scripts/run_diff_workflow.py +256 -0
  969. package/data/workflows/chip-atlas-peak-enrichment/SKILL.md +212 -0
  970. package/data/workflows/chip-atlas-peak-enrichment/references/chipatlas_metadata_format.md +115 -0
  971. package/data/workflows/chip-atlas-peak-enrichment/references/enrichment_statistics.md +145 -0
  972. package/data/workflows/chip-atlas-peak-enrichment/references/peak_thresholds.md +63 -0
  973. package/data/workflows/chip-atlas-peak-enrichment/references/promoter_definitions.md +69 -0
  974. package/data/workflows/chip-atlas-peak-enrichment/scripts/__init__.py +1 -0
  975. package/data/workflows/chip-atlas-peak-enrichment/scripts/convert_genes_to_regions.py +271 -0
  976. package/data/workflows/chip-atlas-peak-enrichment/scripts/export_all.py +456 -0
  977. package/data/workflows/chip-atlas-peak-enrichment/scripts/filter_experiments.py +116 -0
  978. package/data/workflows/chip-atlas-peak-enrichment/scripts/generate_all_plots.py +280 -0
  979. package/data/workflows/chip-atlas-peak-enrichment/scripts/load_example_data.py +96 -0
  980. package/data/workflows/chip-atlas-peak-enrichment/scripts/load_user_data.py +183 -0
  981. package/data/workflows/chip-atlas-peak-enrichment/scripts/query_chipatlas_api.py +349 -0
  982. package/data/workflows/chip-atlas-peak-enrichment/scripts/run_enrichment_workflow.py +271 -0
  983. package/data/workflows/chip-atlas-target-genes/SKILL.md +230 -0
  984. package/data/workflows/chip-atlas-target-genes/references/macs2_binding_scores.md +89 -0
  985. package/data/workflows/chip-atlas-target-genes/references/string_scores.md +58 -0
  986. package/data/workflows/chip-atlas-target-genes/references/target_genes_data_format.md +73 -0
  987. package/data/workflows/chip-atlas-target-genes/scripts/__init__.py +0 -0
  988. package/data/workflows/chip-atlas-target-genes/scripts/download_target_genes.py +200 -0
  989. package/data/workflows/chip-atlas-target-genes/scripts/export_all.py +340 -0
  990. package/data/workflows/chip-atlas-target-genes/scripts/filter_targets.py +205 -0
  991. package/data/workflows/chip-atlas-target-genes/scripts/generate_all_plots.py +330 -0
  992. package/data/workflows/chip-atlas-target-genes/scripts/load_example_query.py +61 -0
  993. package/data/workflows/chip-atlas-target-genes/scripts/load_user_query.py +47 -0
  994. package/data/workflows/chip-atlas-target-genes/scripts/run_target_genes_workflow.py +141 -0
  995. package/data/workflows/clinicaltrials-landscape/SKILL.md +257 -0
  996. package/data/workflows/clinicaltrials-landscape/references/api-parameters.md +181 -0
  997. package/data/workflows/clinicaltrials-landscape/references/mechanisms.md +141 -0
  998. package/data/workflows/clinicaltrials-landscape/references/output-schema.md +184 -0
  999. package/data/workflows/clinicaltrials-landscape/scripts/__init__.py +1 -0
  1000. package/data/workflows/clinicaltrials-landscape/scripts/classify_mechanisms.py +359 -0
  1001. package/data/workflows/clinicaltrials-landscape/scripts/compile_trials.py +579 -0
  1002. package/data/workflows/clinicaltrials-landscape/scripts/disease_config.py +161 -0
  1003. package/data/workflows/clinicaltrials-landscape/scripts/export_all.py +242 -0
  1004. package/data/workflows/clinicaltrials-landscape/scripts/generate_landscape_plots.py +761 -0
  1005. package/data/workflows/clinicaltrials-landscape/scripts/generate_pdf_report.py +1465 -0
  1006. package/data/workflows/clinicaltrials-landscape/scripts/generate_report.py +1813 -0
  1007. package/data/workflows/clinicaltrials-landscape/scripts/query_clinicaltrials.py +307 -0
  1008. package/data/workflows/coexpression-network/SKILL.md +344 -0
  1009. package/data/workflows/coexpression-network/references/parameter-tuning-guide.md +591 -0
  1010. package/data/workflows/coexpression-network/references/troubleshooting.md +483 -0
  1011. package/data/workflows/coexpression-network/references/wgcna-best-practices.md +563 -0
  1012. package/data/workflows/coexpression-network/references/wgcna-reference.md +538 -0
  1013. package/data/workflows/coexpression-network/scripts/build_network.R +43 -0
  1014. package/data/workflows/coexpression-network/scripts/correlate_modules_traits.R +92 -0
  1015. package/data/workflows/coexpression-network/scripts/export_wgcna_results.R +117 -0
  1016. package/data/workflows/coexpression-network/scripts/identify_hub_genes.R +63 -0
  1017. package/data/workflows/coexpression-network/scripts/load_example_data.R +214 -0
  1018. package/data/workflows/coexpression-network/scripts/module_enrichment.R +159 -0
  1019. package/data/workflows/coexpression-network/scripts/pick_soft_power.R +70 -0
  1020. package/data/workflows/coexpression-network/scripts/plot_all_wgcna.R +104 -0
  1021. package/data/workflows/coexpression-network/scripts/plot_eigengene_heatmap.R +65 -0
  1022. package/data/workflows/coexpression-network/scripts/plot_hub_genes.R +70 -0
  1023. package/data/workflows/coexpression-network/scripts/plot_module_dendrogram.R +50 -0
  1024. package/data/workflows/coexpression-network/scripts/plotting_helpers.R +87 -0
  1025. package/data/workflows/coexpression-network/scripts/prepare_wgcna_data.R +73 -0
  1026. package/data/workflows/coexpression-network/scripts/wgcna_workflow.R +93 -0
  1027. package/data/workflows/experimental-design-statistics/SKILL.md +408 -0
  1028. package/data/workflows/experimental-design-statistics/references/batch_effect_mitigation.md +756 -0
  1029. package/data/workflows/experimental-design-statistics/references/cv_tissue_database.csv +30 -0
  1030. package/data/workflows/experimental-design-statistics/references/experimental_design_best_practices.md +515 -0
  1031. package/data/workflows/experimental-design-statistics/references/multiple_testing_guide.md +730 -0
  1032. package/data/workflows/experimental-design-statistics/references/power_analysis_guidelines.md +635 -0
  1033. package/data/workflows/experimental-design-statistics/references/qc_guidelines.md +310 -0
  1034. package/data/workflows/experimental-design-statistics/references/software_requirements.md +328 -0
  1035. package/data/workflows/experimental-design-statistics/references/troubleshooting_guide.md +510 -0
  1036. package/data/workflows/experimental-design-statistics/scripts/batch_assignment.R +302 -0
  1037. package/data/workflows/experimental-design-statistics/scripts/batch_validation.R +342 -0
  1038. package/data/workflows/experimental-design-statistics/scripts/export_design.R +352 -0
  1039. package/data/workflows/experimental-design-statistics/scripts/load_example_data.R +204 -0
  1040. package/data/workflows/experimental-design-statistics/scripts/multiple_testing.R +417 -0
  1041. package/data/workflows/experimental-design-statistics/scripts/plot_power_curves.R +317 -0
  1042. package/data/workflows/experimental-design-statistics/scripts/power_atacseq.R +229 -0
  1043. package/data/workflows/experimental-design-statistics/scripts/power_pilot_based.R +289 -0
  1044. package/data/workflows/experimental-design-statistics/scripts/power_rnaseq.R +247 -0
  1045. package/data/workflows/experimental-design-statistics/scripts/sample_size_de.R +327 -0
  1046. package/data/workflows/experimental-design-statistics/scripts/sample_size_scrna.R +304 -0
  1047. package/data/workflows/functional-enrichment-from-degs/SKILL.md +387 -0
  1048. package/data/workflows/functional-enrichment-from-degs/references/database_guide.md +354 -0
  1049. package/data/workflows/functional-enrichment-from-degs/references/decision-guide.md +546 -0
  1050. package/data/workflows/functional-enrichment-from-degs/references/gsea_ora_comparison.md +213 -0
  1051. package/data/workflows/functional-enrichment-from-degs/references/gsea_ora_validation_framework.md +483 -0
  1052. package/data/workflows/functional-enrichment-from-degs/references/interpretation_guidelines.md +374 -0
  1053. package/data/workflows/functional-enrichment-from-degs/references/method-reference.md +742 -0
  1054. package/data/workflows/functional-enrichment-from-degs/scripts/export_results.R +190 -0
  1055. package/data/workflows/functional-enrichment-from-degs/scripts/generate_plots.R +240 -0
  1056. package/data/workflows/functional-enrichment-from-degs/scripts/get_msigdb_genesets.R +75 -0
  1057. package/data/workflows/functional-enrichment-from-degs/scripts/load_de_results.R +60 -0
  1058. package/data/workflows/functional-enrichment-from-degs/scripts/load_example_data.R +212 -0
  1059. package/data/workflows/functional-enrichment-from-degs/scripts/prepare_gene_lists.R +92 -0
  1060. package/data/workflows/functional-enrichment-from-degs/scripts/run_gsea.R +44 -0
  1061. package/data/workflows/functional-enrichment-from-degs/scripts/run_ora.R +53 -0
  1062. package/data/workflows/genetic-variant-annotation/SKILL.md +440 -0
  1063. package/data/workflows/genetic-variant-annotation/references/auto_installation_implementation.md +274 -0
  1064. package/data/workflows/genetic-variant-annotation/references/consequence_terms.md +392 -0
  1065. package/data/workflows/genetic-variant-annotation/references/filtering_strategies.md +808 -0
  1066. package/data/workflows/genetic-variant-annotation/references/installation_guide.md +557 -0
  1067. package/data/workflows/genetic-variant-annotation/references/pathogenicity_interpretation.md +473 -0
  1068. package/data/workflows/genetic-variant-annotation/references/qc_guidelines.md +524 -0
  1069. package/data/workflows/genetic-variant-annotation/references/snpeff_best_practices.md +481 -0
  1070. package/data/workflows/genetic-variant-annotation/references/tool_selection_guide.md +433 -0
  1071. package/data/workflows/genetic-variant-annotation/references/troubleshooting_guide.md +678 -0
  1072. package/data/workflows/genetic-variant-annotation/references/vep_best_practices.md +450 -0
  1073. package/data/workflows/genetic-variant-annotation/scripts/annotate_genes.py +243 -0
  1074. package/data/workflows/genetic-variant-annotation/scripts/export_results.py +450 -0
  1075. package/data/workflows/genetic-variant-annotation/scripts/filter_variants.py +365 -0
  1076. package/data/workflows/genetic-variant-annotation/scripts/install_tools.py +246 -0
  1077. package/data/workflows/genetic-variant-annotation/scripts/load_example_data.py +166 -0
  1078. package/data/workflows/genetic-variant-annotation/scripts/parse_snpeff_output.py +283 -0
  1079. package/data/workflows/genetic-variant-annotation/scripts/parse_vep_output.py +257 -0
  1080. package/data/workflows/genetic-variant-annotation/scripts/plot_variant_distribution.py +372 -0
  1081. package/data/workflows/genetic-variant-annotation/scripts/prioritize_variants.py +287 -0
  1082. package/data/workflows/genetic-variant-annotation/scripts/run_snpeff.py +418 -0
  1083. package/data/workflows/genetic-variant-annotation/scripts/run_vep.py +358 -0
  1084. package/data/workflows/genetic-variant-annotation/scripts/select_tool.py +203 -0
  1085. package/data/workflows/genetic-variant-annotation/scripts/test_complete_workflow.py +312 -0
  1086. package/data/workflows/genetic-variant-annotation/scripts/test_pickle_load.py +118 -0
  1087. package/data/workflows/genetic-variant-annotation/scripts/validate_vcf.py +351 -0
  1088. package/data/workflows/genetic-variant-annotation/scripts/verify_changes.py +212 -0
  1089. package/data/workflows/grn-pyscenic/SKILL.md +331 -0
  1090. package/data/workflows/grn-pyscenic/references/cli_interface.md +222 -0
  1091. package/data/workflows/grn-pyscenic/references/database_downloads.md +245 -0
  1092. package/data/workflows/grn-pyscenic/scripts/export_all.py +192 -0
  1093. package/data/workflows/grn-pyscenic/scripts/generate_report.py +512 -0
  1094. package/data/workflows/grn-pyscenic/scripts/integrate_with_adata.py +54 -0
  1095. package/data/workflows/grn-pyscenic/scripts/load_example_data.py +200 -0
  1096. package/data/workflows/grn-pyscenic/scripts/load_expression_data.py +61 -0
  1097. package/data/workflows/grn-pyscenic/scripts/plot_regulon_visualizations.py +263 -0
  1098. package/data/workflows/grn-pyscenic/scripts/run_grn_workflow.py +184 -0
  1099. package/data/workflows/gwas-to-function-twas/SKILL.md +394 -0
  1100. package/data/workflows/gwas-to-function-twas/references/fusion_best_practices.md +120 -0
  1101. package/data/workflows/gwas-to-function-twas/references/installation-guide.md +414 -0
  1102. package/data/workflows/gwas-to-function-twas/references/ldsc_qc_guidelines.md +287 -0
  1103. package/data/workflows/gwas-to-function-twas/references/spredixxcan_best_practices.md +166 -0
  1104. package/data/workflows/gwas-to-function-twas/references/therapeutic_interpretation_guide.md +717 -0
  1105. package/data/workflows/gwas-to-function-twas/references/tissue_reference_guide.md +182 -0
  1106. package/data/workflows/gwas-to-function-twas/references/troubleshooting_guide.md +317 -0
  1107. package/data/workflows/gwas-to-function-twas/references/twas_hub_validation_guide.md +88 -0
  1108. package/data/workflows/gwas-to-function-twas/scripts/colocalization_analysis.py +187 -0
  1109. package/data/workflows/gwas-to-function-twas/scripts/druggability_scoring.py +199 -0
  1110. package/data/workflows/gwas-to-function-twas/scripts/export_results.py +220 -0
  1111. package/data/workflows/gwas-to-function-twas/scripts/integrate_variant_annotation.py +194 -0
  1112. package/data/workflows/gwas-to-function-twas/scripts/interpret_therapeutic_direction.py +418 -0
  1113. package/data/workflows/gwas-to-function-twas/scripts/mendelian_randomization.py +749 -0
  1114. package/data/workflows/gwas-to-function-twas/scripts/multilayer_direction_analysis.py +471 -0
  1115. package/data/workflows/gwas-to-function-twas/scripts/plot_twas_results.py +252 -0
  1116. package/data/workflows/gwas-to-function-twas/scripts/run_fusion.py +155 -0
  1117. package/data/workflows/gwas-to-function-twas/scripts/run_smultixcan.py +102 -0
  1118. package/data/workflows/gwas-to-function-twas/scripts/run_spredixxcan.py +138 -0
  1119. package/data/workflows/gwas-to-function-twas/scripts/select_reference_panel.py +253 -0
  1120. package/data/workflows/gwas-to-function-twas/scripts/validate_gwas_sumstats.py +214 -0
  1121. package/data/workflows/gwas-to-function-twas/scripts/validate_with_twas_hub.py +439 -0
  1122. package/data/workflows/lasso-biomarker-panel/SKILL.md +322 -0
  1123. package/data/workflows/lasso-biomarker-panel/references/decision-guide.md +64 -0
  1124. package/data/workflows/lasso-biomarker-panel/references/lasso-reference.md +110 -0
  1125. package/data/workflows/lasso-biomarker-panel/references/validation-guide.md +105 -0
  1126. package/data/workflows/lasso-biomarker-panel/scripts/biological_interpretation.R +1560 -0
  1127. package/data/workflows/lasso-biomarker-panel/scripts/biomarker_plots.R +350 -0
  1128. package/data/workflows/lasso-biomarker-panel/scripts/export_results.R +1492 -0
  1129. package/data/workflows/lasso-biomarker-panel/scripts/lasso_workflow.R +328 -0
  1130. package/data/workflows/lasso-biomarker-panel/scripts/load_example_data.R +1903 -0
  1131. package/data/workflows/lasso-biomarker-panel/scripts/plotting_helpers.R +78 -0
  1132. package/data/workflows/lasso-biomarker-panel/scripts/prepare_features.R +225 -0
  1133. package/data/workflows/lasso-biomarker-panel/scripts/query_cellxgene.py +107 -0
  1134. package/data/workflows/lasso-biomarker-panel/scripts/validate_external.R +174 -0
  1135. package/data/workflows/literature-preclinical/SKILL.md +276 -0
  1136. package/data/workflows/literature-preclinical/assets/eval/simple_test.py +386 -0
  1137. package/data/workflows/literature-preclinical/references/experiment-extraction-guide.md +147 -0
  1138. package/data/workflows/literature-preclinical/references/full-text-enrichment-guide.md +121 -0
  1139. package/data/workflows/literature-preclinical/references/preclinical-search-guide.md +117 -0
  1140. package/data/workflows/literature-preclinical/scripts/extract_experiments.py +401 -0
  1141. package/data/workflows/literature-preclinical/scripts/generate_plots.R +303 -0
  1142. package/data/workflows/literature-preclinical/scripts/narrative_synthesis.py +653 -0
  1143. package/data/workflows/literature-preclinical/scripts/preclinical_search.py +332 -0
  1144. package/data/workflows/literature-preclinical/scripts/preclinical_synthesis.py +237 -0
  1145. package/data/workflows/literature-preclinical/scripts/report_generation.py +326 -0
  1146. package/data/workflows/mendelian-randomization-twosamplemr/SKILL.md +210 -0
  1147. package/data/workflows/mendelian-randomization-twosamplemr/references/interpretation-guide.md +239 -0
  1148. package/data/workflows/mendelian-randomization-twosamplemr/references/method-reference.md +190 -0
  1149. package/data/workflows/mendelian-randomization-twosamplemr/scripts/export_results.R +123 -0
  1150. package/data/workflows/mendelian-randomization-twosamplemr/scripts/generate_report.R +411 -0
  1151. package/data/workflows/mendelian-randomization-twosamplemr/scripts/load_data.R +281 -0
  1152. package/data/workflows/mendelian-randomization-twosamplemr/scripts/mr_plots.R +163 -0
  1153. package/data/workflows/mendelian-randomization-twosamplemr/scripts/run_mr_analysis.R +322 -0
  1154. package/data/workflows/pcr-primer-design/SKILL.md +397 -0
  1155. package/data/workflows/pcr-primer-design/references/code_examples.md +594 -0
  1156. package/data/workflows/pcr-primer-design/references/miqe_guidelines.md +453 -0
  1157. package/data/workflows/pcr-primer-design/references/parameter_ranges.md +356 -0
  1158. package/data/workflows/pcr-primer-design/references/primer_design_best_practices.md +451 -0
  1159. package/data/workflows/pcr-primer-design/references/troubleshooting_guide.md +477 -0
  1160. package/data/workflows/pcr-primer-design/scripts/__init__.py +2 -0
  1161. package/data/workflows/pcr-primer-design/scripts/calculate_tm.py +306 -0
  1162. package/data/workflows/pcr-primer-design/scripts/check_dimers.py +298 -0
  1163. package/data/workflows/pcr-primer-design/scripts/check_secondary_structures.py +343 -0
  1164. package/data/workflows/pcr-primer-design/scripts/design_qpcr_primers.py +233 -0
  1165. package/data/workflows/pcr-primer-design/scripts/design_standard_primers.py +197 -0
  1166. package/data/workflows/pcr-primer-design/scripts/design_taqman_probes.py +226 -0
  1167. package/data/workflows/pcr-primer-design/scripts/export_results.py +382 -0
  1168. package/data/workflows/pcr-primer-design/scripts/generate_reports.py +379 -0
  1169. package/data/workflows/pcr-primer-design/scripts/validate_specificity.py +311 -0
  1170. package/data/workflows/pcr-primer-design/scripts/visualize_primers.py +379 -0
  1171. package/data/workflows/polygenic-risk-score-prs-catalog/SKILL.md +195 -0
  1172. package/data/workflows/polygenic-risk-score-prs-catalog/references/interpretation-guide.md +80 -0
  1173. package/data/workflows/polygenic-risk-score-prs-catalog/references/pgs-catalog-guide.md +109 -0
  1174. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/export_results.R +186 -0
  1175. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/generate_plots.R +283 -0
  1176. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/load_pgs_weights.R +228 -0
  1177. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/load_reference_data.R +191 -0
  1178. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/score_traits.R +216 -0
  1179. package/data/workflows/pooled-crispr-screens/SKILL.md +362 -0
  1180. package/data/workflows/pooled-crispr-screens/references/crispr_screen_best_practices.md +349 -0
  1181. package/data/workflows/pooled-crispr-screens/references/qc_guidelines.md +722 -0
  1182. package/data/workflows/pooled-crispr-screens/references/statistical_methods.md +644 -0
  1183. package/data/workflows/pooled-crispr-screens/references/troubleshooting_guide.md +684 -0
  1184. package/data/workflows/pooled-crispr-screens/references/umi_optimization.md +297 -0
  1185. package/data/workflows/pooled-crispr-screens/scripts/concatenate_libraries.py +132 -0
  1186. package/data/workflows/pooled-crispr-screens/scripts/detect_perturbed_cells.py +255 -0
  1187. package/data/workflows/pooled-crispr-screens/scripts/differential_expression.py +202 -0
  1188. package/data/workflows/pooled-crispr-screens/scripts/differential_expression_glmgampoi.py +320 -0
  1189. package/data/workflows/pooled-crispr-screens/scripts/export_results.py +261 -0
  1190. package/data/workflows/pooled-crispr-screens/scripts/expression_filtering.py +159 -0
  1191. package/data/workflows/pooled-crispr-screens/scripts/gene_name_corrections.py +188 -0
  1192. package/data/workflows/pooled-crispr-screens/scripts/generate_report.py +485 -0
  1193. package/data/workflows/pooled-crispr-screens/scripts/load_10x_libraries.py +69 -0
  1194. package/data/workflows/pooled-crispr-screens/scripts/load_example_data.py +257 -0
  1195. package/data/workflows/pooled-crispr-screens/scripts/map_sgrna_to_cells.py +119 -0
  1196. package/data/workflows/pooled-crispr-screens/scripts/normalize_and_scale.py +140 -0
  1197. package/data/workflows/pooled-crispr-screens/scripts/qc_filtering.py +185 -0
  1198. package/data/workflows/pooled-crispr-screens/scripts/run_glmgampoi.R +181 -0
  1199. package/data/workflows/pooled-crispr-screens/scripts/screen_all_perturbations.py +306 -0
  1200. package/data/workflows/pooled-crispr-screens/scripts/validate_perturbations.py +314 -0
  1201. package/data/workflows/pooled-crispr-screens/scripts/visualize_perturbations.py +314 -0
  1202. package/data/workflows/scrnaseq-scanpy-core-analysis/SKILL.md +425 -0
  1203. package/data/workflows/scrnaseq-scanpy-core-analysis/references/ambient_rna_correction.md +422 -0
  1204. package/data/workflows/scrnaseq-scanpy-core-analysis/references/common-patterns.md +533 -0
  1205. package/data/workflows/scrnaseq-scanpy-core-analysis/references/integration_methods.md +820 -0
  1206. package/data/workflows/scrnaseq-scanpy-core-analysis/references/marker_gene_database.md +471 -0
  1207. package/data/workflows/scrnaseq-scanpy-core-analysis/references/pseudobulk_de_guide.md +408 -0
  1208. package/data/workflows/scrnaseq-scanpy-core-analysis/references/qc_guidelines.md +535 -0
  1209. package/data/workflows/scrnaseq-scanpy-core-analysis/references/scanpy_best_practices.md +496 -0
  1210. package/data/workflows/scrnaseq-scanpy-core-analysis/references/troubleshooting_guide.md +668 -0
  1211. package/data/workflows/scrnaseq-scanpy-core-analysis/references/workflow-details.md +727 -0
  1212. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/annotate_celltypes.py +431 -0
  1213. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/cluster_cells.py +293 -0
  1214. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/export_results.py +423 -0
  1215. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/filter_cells.py +531 -0
  1216. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/find_markers.py +391 -0
  1217. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/find_variable_genes.py +222 -0
  1218. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/integrate_scvi.py +665 -0
  1219. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/integration_diagnostics.py +678 -0
  1220. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/load_example_data.py +68 -0
  1221. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/normalize_data.py +325 -0
  1222. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/plot_dimreduction.py +389 -0
  1223. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/plot_qc.py +320 -0
  1224. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/pseudobulk_de.py +553 -0
  1225. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/qc_metrics.py +477 -0
  1226. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/remove_ambient_rna.py +347 -0
  1227. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/run_umap.py +188 -0
  1228. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/scale_and_pca.py +365 -0
  1229. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/setup_and_import.py +334 -0
  1230. package/data/workflows/scrnaseq-seurat-core-analysis/SKILL.md +585 -0
  1231. package/data/workflows/scrnaseq-seurat-core-analysis/references/ambient_rna_correction.md +422 -0
  1232. package/data/workflows/scrnaseq-seurat-core-analysis/references/common-patterns.md +667 -0
  1233. package/data/workflows/scrnaseq-seurat-core-analysis/references/decision-guide.md +456 -0
  1234. package/data/workflows/scrnaseq-seurat-core-analysis/references/integration_methods.md +864 -0
  1235. package/data/workflows/scrnaseq-seurat-core-analysis/references/marker_gene_database.md +471 -0
  1236. package/data/workflows/scrnaseq-seurat-core-analysis/references/pseudobulk_de_guide.md +408 -0
  1237. package/data/workflows/scrnaseq-seurat-core-analysis/references/qc_guidelines.md +452 -0
  1238. package/data/workflows/scrnaseq-seurat-core-analysis/references/seurat_best_practices.md +417 -0
  1239. package/data/workflows/scrnaseq-seurat-core-analysis/references/troubleshooting_guide.md +566 -0
  1240. package/data/workflows/scrnaseq-seurat-core-analysis/references/workflow-details.md +801 -0
  1241. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/annotate_celltypes.R +306 -0
  1242. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/cluster_cells.R +223 -0
  1243. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/export_results.R +292 -0
  1244. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/filter_cells.R +576 -0
  1245. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/find_markers.R +325 -0
  1246. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/find_variable_features.R +106 -0
  1247. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/integrate_batches.R +504 -0
  1248. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/integration_diagnostics.R +596 -0
  1249. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/load_example_data.R +89 -0
  1250. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/normalize_data.R +184 -0
  1251. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/plot_dimreduction.R +273 -0
  1252. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/plot_qc.R +250 -0
  1253. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/pseudobulk_de.R +324 -0
  1254. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/qc_metrics.R +358 -0
  1255. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/remove_ambient_rna.R +281 -0
  1256. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/run_umap.R +116 -0
  1257. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/scale_and_pca.R +243 -0
  1258. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/setup_and_import.R +193 -0
  1259. package/data/workflows/spatial-transcriptomics/SKILL.md +256 -0
  1260. package/data/workflows/spatial-transcriptomics/references/spatial-analysis-guide.md +216 -0
  1261. package/data/workflows/spatial-transcriptomics/scripts/export_results.py +214 -0
  1262. package/data/workflows/spatial-transcriptomics/scripts/generate_all_plots.py +397 -0
  1263. package/data/workflows/spatial-transcriptomics/scripts/load_example_data.py +175 -0
  1264. package/data/workflows/spatial-transcriptomics/scripts/spatial_workflow.py +206 -0
  1265. package/dist/bgi.js +28 -1
  1266. package/package.json +2 -1
@@ -0,0 +1,1492 @@
1
+ # Export LASSO Biomarker Panel Results
2
+ # Saves all results including RDS objects for downstream skills
3
+ # Generates comprehensive markdown + PDF reports with embedded plots
4
+
5
+ #' Export all LASSO biomarker panel results
6
+ #'
7
+ #' @param model_result Result from run_lasso_panel()
8
+ #' @param validation_result Result from validate_panel() (optional)
9
+ #' @param output_dir Output directory (default: "results")
10
+ #' @param data Original data object from load_*_data() (optional, enriches report)
11
+ #' @param features Feature matrix result from prepare_feature_matrix() (optional)
12
+ #'
13
+ #' @export
14
+ export_all <- function(model_result, validation_result = NULL,
15
+ output_dir = "results",
16
+ data = NULL, features = NULL,
17
+ interpretation = NULL) {
18
+
19
+ cat("\n=== Exporting LASSO Biomarker Panel Results ===\n\n")
20
+
21
+ # Create output directory
22
+ if (!dir.exists(output_dir)) {
23
+ dir.create(output_dir, recursive = TRUE)
24
+ cat("Created directory:", output_dir, "\n\n")
25
+ }
26
+
27
+ # 1. Biomarker panel (stable features with coefficients)
28
+ cat("1. Exporting biomarker panel...\n")
29
+ panel <- model_result$feature_importance[model_result$feature_importance$is_stable, ]
30
+ write.csv(panel, file.path(output_dir, "biomarker_panel.csv"), row.names = FALSE)
31
+ cat(" Saved: biomarker_panel.csv (", nrow(panel), "features)\n\n")
32
+
33
+ # 2. All feature stability scores
34
+ cat("2. Exporting all feature stability scores...\n")
35
+ write.csv(model_result$feature_importance,
36
+ file.path(output_dir, "all_feature_stability.csv"), row.names = FALSE)
37
+ cat(" Saved: all_feature_stability.csv (",
38
+ nrow(model_result$feature_importance), "features)\n\n")
39
+
40
+ # 3. Discovery performance (per-fold)
41
+ cat("3. Exporting discovery performance...\n")
42
+ perf <- data.frame(
43
+ fold = seq_along(model_result$fold_aucs),
44
+ auc = model_result$fold_aucs,
45
+ sensitivity = model_result$fold_sensitivities,
46
+ specificity = model_result$fold_specificities,
47
+ stringsAsFactors = FALSE
48
+ )
49
+ write.csv(perf, file.path(output_dir, "discovery_performance.csv"), row.names = FALSE)
50
+ cat(" Saved: discovery_performance.csv\n")
51
+ cat(" Mean AUC:", round(model_result$mean_auc, 3),
52
+ "(95% CI:", round(model_result$auc_ci[1], 3), "-",
53
+ round(model_result$auc_ci[2], 3), ")\n\n")
54
+
55
+ # 4. Validation performance (if provided)
56
+ if (!is.null(validation_result)) {
57
+ cat("4. Exporting validation performance...\n")
58
+ write.csv(validation_result$performance_table,
59
+ file.path(output_dir, "validation_performance.csv"), row.names = FALSE)
60
+ cat(" Saved: validation_performance.csv\n")
61
+ cat(" Validation AUC:", round(validation_result$auc, 3), "\n\n")
62
+
63
+ # Validation predictions
64
+ write.csv(validation_result$predictions,
65
+ file.path(output_dir, "validation_predictions.csv"), row.names = FALSE)
66
+ cat(" Saved: validation_predictions.csv\n\n")
67
+ } else {
68
+ cat("4. No validation result provided (skipped)\n\n")
69
+ }
70
+
71
+ # 5. Cross-validation predictions
72
+ cat("5. Exporting CV predictions...\n")
73
+ write.csv(model_result$cv_predictions,
74
+ file.path(output_dir, "cv_predictions.csv"), row.names = FALSE)
75
+ cat(" Saved: cv_predictions.csv (",
76
+ nrow(model_result$cv_predictions), "predictions across",
77
+ length(unique(model_result$cv_predictions$fold)), "folds)\n\n")
78
+
79
+ # 6. LASSO model object (CRITICAL for downstream skills)
80
+ cat("6. Saving LASSO model object (RDS)...\n")
81
+ saveRDS(model_result, file.path(output_dir, "lasso_model.rds"))
82
+ cat(" Saved: lasso_model.rds\n")
83
+ cat(" (Load with: model <- readRDS('results/lasso_model.rds'))\n")
84
+ cat(" (Predict with: predict_biomarker_panel(model, new_X))\n\n")
85
+
86
+ # 7. Final glmnet model only (lightweight, for prediction only)
87
+ if (!is.null(model_result$final_model)) {
88
+ cat("7. Saving final glmnet model (RDS)...\n")
89
+ saveRDS(model_result$final_model,
90
+ file.path(output_dir, "final_glmnet_model.rds"))
91
+ cat(" Saved: final_glmnet_model.rds\n")
92
+ cat(" (Load with: fit <- readRDS('results/final_glmnet_model.rds'))\n\n")
93
+ }
94
+
95
+ # 8. Summary report (enhanced markdown)
96
+ cat("8. Generating summary report (markdown)...\n")
97
+ .write_summary_report(model_result, validation_result, output_dir, data, features,
98
+ interpretation)
99
+ cat(" Saved: summary_report.md\n\n")
100
+
101
+ # 9. PDF report (with embedded plots)
102
+ cat("9. Generating PDF report...\n")
103
+ .render_pdf_report(model_result, validation_result, output_dir, data, features,
104
+ interpretation)
105
+
106
+ # 10. Parameters log
107
+ cat("10. Saving parameters...\n")
108
+ params_df <- data.frame(
109
+ parameter = names(model_result$parameters),
110
+ value = as.character(model_result$parameters),
111
+ stringsAsFactors = FALSE
112
+ )
113
+ write.csv(params_df, file.path(output_dir, "parameters.csv"), row.names = FALSE)
114
+ cat(" Saved: parameters.csv\n\n")
115
+
116
+ cat("\n=== Export Complete ===\n")
117
+ cat("All files saved to:", output_dir, "\n")
118
+ cat("Files:\n")
119
+ files <- list.files(output_dir, full.names = FALSE)
120
+ for (f in files) {
121
+ cat(" -", f, "\n")
122
+ }
123
+ }
124
+
125
+
126
+ # ============================================================================
127
+ # SHARED HELPER: Build enriched report context lines
128
+ # ============================================================================
129
+
130
+ #' Build enriched report content from report_context metadata
131
+ #'
132
+ #' Returns a named list of character vectors for enriched report sections.
133
+ #' When report_context is NULL, returns all-NULL list (generators fall back
134
+ #' to generic text).
135
+ #'
136
+ #' @param rc report_context list from data loader (or NULL)
137
+ #' @param data Full data object from loader (or NULL)
138
+ #' @param p Model parameters list
139
+ #' @param features Feature matrix result (or NULL)
140
+ #' @param model_result Full model result
141
+ #' @param validation_result Validation result (or NULL)
142
+ #' @param format "md" for markdown, "rmd" for R Markdown with LaTeX math
143
+ #' @return Named list with: analytical_goals, disease_sections, lasso_rationale,
144
+ #' method_text, references
145
+ #' @keywords internal
146
+ .build_context_lines <- function(rc, data, p, features, model_result,
147
+ validation_result, format = "md") {
148
+
149
+ # Default: all NULL = use generic text in generators
150
+ ctx <- list(
151
+ analytical_goals = NULL, # Numbered aims with inline citations
152
+ disease_sections = NULL, # Disease context subsections
153
+ lasso_rationale = NULL, # LASSO rationale with citations
154
+ method_text = NULL, # Methods with inline citations
155
+ benchmarks = NULL, # Published benchmarks section
156
+ references = NULL # Numbered reference list
157
+ )
158
+
159
+ if (is.null(rc)) return(ctx)
160
+
161
+ # Subsection heading prefix: ### for markdown, ## for Rmd (numbered_sections)
162
+ sub_pfx <- if (format == "rmd") "##" else "###"
163
+
164
+ # Math symbols by format
165
+ alpha_sym <- if (format == "rmd") paste0("$\\alpha$ = ", p$alpha) else paste0("alpha = ", p$alpha)
166
+ lambda_sym <- if (format == "rmd") "$\\lambda$" else "lambda"
167
+ geq_sym <- if (format == "rmd") "$\\geq$" else ">="
168
+ arrow_sym <- if (format == "rmd") "$\\rightarrow$" else "->"
169
+
170
+ # ---- Analytical Goals (with inline citations) ----
171
+ if (!is.null(rc$analytical_goals)) {
172
+ goal_lines <- character(0)
173
+ for (i in seq_along(rc$analytical_goals)) {
174
+ goal_lines <- c(goal_lines, paste0(i, ". ", rc$analytical_goals[i]))
175
+ }
176
+ ctx$analytical_goals <- goal_lines
177
+ }
178
+
179
+ # ---- Disease Context Subsections ----
180
+ sections <- character(0)
181
+
182
+ if (!is.null(rc$disease_background)) {
183
+ sections <- c(sections,
184
+ paste(sub_pfx, "Disease Background"), "",
185
+ rc$disease_background, "")
186
+ }
187
+ if (!is.null(rc$trial_description)) {
188
+ sections <- c(sections,
189
+ paste(sub_pfx, "Clinical Trial"), "",
190
+ rc$trial_description, "")
191
+ }
192
+ if (!is.null(rc$patient_population)) {
193
+ sections <- c(sections,
194
+ paste(sub_pfx, "Patient Population"), "",
195
+ rc$patient_population, "")
196
+ }
197
+ if (!is.null(rc$endpoint_definition)) {
198
+ sections <- c(sections,
199
+ paste(sub_pfx, "Endpoint Definition"), "",
200
+ rc$endpoint_definition, "")
201
+ }
202
+ if (!is.null(rc$platform_description)) {
203
+ sections <- c(sections,
204
+ paste(sub_pfx, "Expression Platform"), "",
205
+ rc$platform_description, "")
206
+ }
207
+
208
+ if (length(sections) > 0) {
209
+ ctx$disease_sections <- sections
210
+ }
211
+
212
+ # ---- LASSO Rationale (with citations) ----
213
+ if (!is.null(rc$references)) {
214
+ lasso_text <- c(
215
+ paste("LASSO logistic regression [1] performs simultaneous feature selection and",
216
+ "coefficient estimation by applying an L1 penalty that shrinks many",
217
+ "coefficients to exactly zero. This produces sparse, interpretable",
218
+ "models ideal for clinical biomarker panels where a small number of",
219
+ "measurable features is critical for translation to diagnostic",
220
+ "assays. Compared to other machine learning approaches, LASSO provides",
221
+ "transparent, interpretable coefficients and naturally handles the",
222
+ "p >> n problem common in omics data (thousands of features, tens to",
223
+ "hundreds of samples)."),
224
+ "")
225
+
226
+ if (p$alpha < 1) {
227
+ lasso_text <- c(lasso_text,
228
+ paste0("This analysis uses **elastic net** regularization (",
229
+ alpha_sym, ") [2], which combines L1 (LASSO) and L2 (ridge) ",
230
+ "penalties. This is particularly useful when features are ",
231
+ "correlated, as it tends to retain groups of correlated genes ",
232
+ "rather than arbitrarily selecting one from each group."),
233
+ "")
234
+ }
235
+
236
+ lasso_text <- c(lasso_text,
237
+ paste("Model fitting uses the glmnet coordinate descent algorithm [4]",
238
+ "with regularization path optimization. Stability selection [3]",
239
+ "ensures that selected features are robust to sampling variation,",
240
+ "reducing false discovery of noise features. Model discrimination",
241
+ "is evaluated via ROC analysis using the pROC package [5],",
242
+ "following the cross-validated biomarker discovery framework of",
243
+ "Ali et al. [6]."),
244
+ "")
245
+
246
+ ctx$lasso_rationale <- lasso_text
247
+ }
248
+
249
+ # ---- Methods with inline citations ----
250
+ if (!is.null(rc$references)) {
251
+ ctx$method_text <- c(
252
+ paste0("- **Regularization:** ", alpha_sym,
253
+ ifelse(p$alpha == 1, " (pure LASSO [1])",
254
+ " (elastic net [1,2])")),
255
+ paste("- **Repeated CV:**", p$n_repeats,
256
+ "iterations of class-balanced",
257
+ paste0(round(p$train_fraction * 100), "/",
258
+ round((1 - p$train_fraction) * 100)),
259
+ "train/test splits"),
260
+ paste("- **Inner CV:**", p$n_inner_folds, "folds for", lambda_sym,
261
+ "selection (cv.glmnet [4])"),
262
+ paste("- **Stability threshold:**", p$stability_threshold,
263
+ "(features must be selected in", geq_sym,
264
+ paste0(p$stability_threshold * 100, "%"),
265
+ "of iterations [3])"),
266
+ paste("- **Random seed:**", p$seed, "(reproducible)"),
267
+ "",
268
+ paste0("**Algorithm:** For each of the ", p$n_repeats,
269
+ " repeated CV iterations, a class-balanced train/test split is ",
270
+ "created. On the training set, cv.glmnet [4] selects the optimal ",
271
+ lambda_sym, " via inner cross-validation (", p$n_inner_folds,
272
+ "-fold). Non-zero coefficients are recorded. After all iterations, ",
273
+ "features selected in ", geq_sym, " ",
274
+ p$stability_threshold * 100, "% of iterations (stability ",
275
+ "selection [3]) form the final panel. A final model is fit on all ",
276
+ "samples using only these stable features."),
277
+ "",
278
+ paste(sub_pfx, "Performance Evaluation"),
279
+ "",
280
+ "- **Discrimination:** AUC (Area Under ROC Curve) via pROC package [5] (DeLong method for CIs)",
281
+ paste("- **Confidence intervals:** 2.5th and 97.5th percentiles of fold AUCs across",
282
+ p$n_repeats, "iterations"),
283
+ "- **Sensitivity/Specificity:** At Youden's J-statistic optimal threshold",
284
+ "- **Calibration:** Predicted probability vs observed event rate",
285
+ "")
286
+ }
287
+
288
+ # ---- Published Benchmarks ----
289
+ if (!is.null(rc$published_benchmarks)) {
290
+ pb <- rc$published_benchmarks
291
+ bench_lines <- character(0)
292
+
293
+ if (!is.null(pb$intro)) {
294
+ bench_lines <- c(bench_lines, pb$intro, "")
295
+ }
296
+
297
+ # Render benchmark table
298
+ if (!is.null(pb$studies)) {
299
+ bench_lines <- c(bench_lines,
300
+ "| Study | Drug | Validated AUC | Method | Notes |",
301
+ "|-------|------|:---:|--------|-------|")
302
+ for (j in seq_len(nrow(pb$studies))) {
303
+ s <- pb$studies[j, ]
304
+ bench_lines <- c(bench_lines,
305
+ paste0("| ", s$study, " | ", s$drug, " | ", s$validated_auc,
306
+ " | ", s$method, " | ", s$notes, " |"))
307
+ }
308
+ # Add our result as last row
309
+ our_auc <- round(model_result$mean_auc, 3)
310
+ our_ci <- paste0(round(model_result$auc_ci[1], 3), "-",
311
+ round(model_result$auc_ci[2], 3))
312
+ alpha_label <- ifelse(p$alpha == 1, "LASSO",
313
+ paste0("Elastic net (", p$alpha, ")"))
314
+ # Extract drug from first benchmark row or use generic
315
+ our_drug <- if (nrow(pb$studies) > 0) pb$studies$drug[1] else "—"
316
+ bench_lines <- c(bench_lines,
317
+ paste0("| **This analysis** | **", our_drug, "** | **", our_auc,
318
+ "** | **", alpha_label, "** | **Nested CV, no leakage** |"),
319
+ "")
320
+ }
321
+
322
+ if (!is.null(pb$context)) {
323
+ bench_lines <- c(bench_lines, pb$context, "")
324
+ }
325
+
326
+ ctx$benchmarks <- bench_lines
327
+ }
328
+
329
+ # ---- References ----
330
+ if (!is.null(rc$references)) {
331
+ ref_lines <- character(0)
332
+ for (ref in rc$references) {
333
+ ref_lines <- c(ref_lines, ref, "")
334
+ }
335
+ ctx$references <- ref_lines
336
+ }
337
+
338
+ return(ctx)
339
+ }
340
+
341
+
342
+ # ============================================================================
343
+ # BIOLOGICAL INTERPRETATION HELPER
344
+ # ============================================================================
345
+
346
+ #' Build biological interpretation report lines
347
+ #' @param interp Result from run_biological_interpretation()
348
+ #' @param panel Data frame of panel genes with coefficients
349
+ #' @param format "md" for markdown or "rmd" for R Markdown (LaTeX)
350
+ #' @return Named list with pathway_lines, celltype_lines, gwas_lines
351
+ #' @keywords internal
352
+ .build_interpretation_lines <- function(interp, panel, format = "md") {
353
+ result <- list(pathway_lines = NULL, celltype_lines = NULL, gwas_lines = NULL)
354
+ if (is.null(interp)) return(result)
355
+
356
+ pos_genes <- panel$feature[panel$mean_coefficient > 0]
357
+ neg_genes <- panel$feature[panel$mean_coefficient < 0]
358
+ arrow <- if (format == "rmd") "$\\rightarrow$" else "→"
359
+
360
+ # ---- Pathway Enrichment ----
361
+ pw <- c()
362
+
363
+ # Coefficient direction summary
364
+ if (length(pos_genes) > 0) {
365
+ pw <- c(pw, paste0("**Positive coefficient genes** (higher expression ", arrow,
366
+ " positive outcome): ", paste(pos_genes, collapse = ", ")), "")
367
+ }
368
+ if (length(neg_genes) > 0) {
369
+ pw <- c(pw, paste0("**Negative coefficient genes** (higher expression ", arrow,
370
+ " negative outcome): ", paste(neg_genes, collapse = ", ")), "")
371
+ }
372
+
373
+ # GSEA Hallmark results
374
+ gsea_h <- interp$pathway$gsea_hallmark
375
+ if (!is.null(gsea_h) && nrow(gsea_h) > 0) {
376
+ top_gsea <- head(gsea_h[order(gsea_h$pval), ], 10)
377
+ top_gsea$pathway_short <- gsub("HALLMARK_", "", top_gsea$pathway)
378
+ top_gsea$pathway_short <- gsub("_", " ", top_gsea$pathway_short)
379
+
380
+ pw <- c(pw,
381
+ "**Gene Set Enrichment Analysis (GSEA)** — MSigDB Hallmark pathways ranked by",
382
+ "LASSO selection frequency across 500 candidate features:",
383
+ "",
384
+ "| Pathway | NES | P-value | Adj. P | Gene Set Size |",
385
+ "|---------|:---:|:-------:|:------:|:---:|")
386
+ for (i in seq_len(nrow(top_gsea))) {
387
+ pw <- c(pw, sprintf("| %s | %.2f | %.4f | %.3f | %d |",
388
+ top_gsea$pathway_short[i],
389
+ top_gsea$NES[i],
390
+ top_gsea$pval[i],
391
+ top_gsea$padj[i],
392
+ top_gsea$size[i]))
393
+ }
394
+ pw <- c(pw, "")
395
+ }
396
+
397
+ # ORA Reactome results
398
+ ora_r <- interp$pathway$ora_reactome
399
+ if (!is.null(ora_r) && nrow(ora_r@result[ora_r@result$p.adjust < 0.1, ]) > 0) {
400
+ sig_r <- ora_r@result[ora_r@result$p.adjust < 0.1, ]
401
+ sig_r$Description_short <- gsub("REACTOME_", "", sig_r$Description)
402
+ sig_r$Description_short <- gsub("_", " ", sig_r$Description_short)
403
+
404
+ pw <- c(pw,
405
+ "**Over-Representation Analysis (ORA)** — Reactome pathways (10-gene panel):",
406
+ "",
407
+ "| Pathway | Gene Ratio | P-value | Adj. P | Genes |",
408
+ "|---------|:----------:|:-------:|:------:|-------|")
409
+ for (i in seq_len(nrow(sig_r))) {
410
+ pw <- c(pw, sprintf("| %s | %s | %.4f | %.3f | %s |",
411
+ sig_r$Description_short[i],
412
+ sig_r$GeneRatio[i],
413
+ sig_r$pvalue[i],
414
+ sig_r$p.adjust[i],
415
+ sig_r$geneID[i]))
416
+ }
417
+ pw <- c(pw, "")
418
+ }
419
+
420
+ # ORA GO (only if significant)
421
+ ora_go <- interp$pathway$ora_go
422
+ if (!is.null(ora_go) && nrow(ora_go@result[ora_go@result$p.adjust < 0.1, ]) > 0) {
423
+ sig_go <- ora_go@result[ora_go@result$p.adjust < 0.1, ]
424
+ pw <- c(pw,
425
+ "**GO Biological Process (ORA):**",
426
+ "",
427
+ "| GO Term | Gene Ratio | Adj. P | Genes |",
428
+ "|---------|:----------:|:------:|-------|")
429
+ for (i in seq_len(min(nrow(sig_go), 10))) {
430
+ pw <- c(pw, sprintf("| %s | %s | %.3f | %s |",
431
+ sig_go$Description[i],
432
+ sig_go$GeneRatio[i],
433
+ sig_go$p.adjust[i],
434
+ sig_go$geneID[i]))
435
+ }
436
+ pw <- c(pw, "")
437
+ }
438
+
439
+ result$pathway_lines <- pw
440
+
441
+ # ---- Cell-Type Expression Context ----
442
+ ct <- interp$celltype
443
+ ct_lines <- c()
444
+ tissue_label <- if (!is.null(interp$tissue_label)) interp$tissue_label else "Tissue"
445
+
446
+ if (!is.null(ct) && nrow(ct) > 0) {
447
+ if ("source" %in% names(ct) && all(ct$source == "curated")) {
448
+ # Detect change column (uc_change for legacy IBD, disease_change for new)
449
+ change_col <- if ("disease_change" %in% names(ct)) "disease_change" else "uc_change"
450
+ change_header <- paste(tissue_label, "Change")
451
+
452
+ ct_lines <- c(ct_lines,
453
+ paste("Cell-type expression context from published single-cell atlases for",
454
+ tissue_label, "tissue:"),
455
+ "",
456
+ sprintf("| Gene | Primary Cell Type | Compartment | %s | Evidence |", change_header),
457
+ "|------|------------------|:-----------:|:------------:|----------|")
458
+ for (i in seq_len(nrow(ct))) {
459
+ ct_lines <- c(ct_lines, sprintf("| *%s* | %s | %s | %s | %s |",
460
+ ct$gene[i],
461
+ ct$cell_type[i],
462
+ ct$compartment[i],
463
+ ct[[change_col]][i],
464
+ ct$evidence[i]))
465
+ }
466
+ } else {
467
+ # CZI Census real data — summarize top cell types per gene
468
+ ct_lines <- c(ct_lines,
469
+ paste("Cell-type expression in", tissue_label, "from CZI CELLxGENE Census"),
470
+ "(tens of millions of single cells across published datasets):",
471
+ "",
472
+ "| Gene | Top Cell Types (by expression) | % Expressing |",
473
+ "|------|------------------------------|:------------:|")
474
+
475
+ # Use normal tissue if available, otherwise all
476
+ disease_col <- if ("disease" %in% names(ct)) "disease" else NULL
477
+ if (!is.null(disease_col) && "normal" %in% ct$disease) {
478
+ ct_sub <- ct[ct$disease == "normal", ]
479
+ } else {
480
+ ct_sub <- ct
481
+ }
482
+
483
+ for (gene in unique(ct_sub$gene)) {
484
+ gene_data <- ct_sub[ct_sub$gene == gene, ]
485
+ gene_data <- gene_data[order(-gene_data$mean_expression), ]
486
+ top3 <- head(gene_data, 3)
487
+ ct_str <- paste(sprintf("%s (%.0f%%)", top3$cell_type,
488
+ top3$pct_expressing), collapse = "; ")
489
+ ct_lines <- c(ct_lines, sprintf("| *%s* | %s | |", gene, ct_str))
490
+ }
491
+ }
492
+ ct_lines <- c(ct_lines, "")
493
+ }
494
+ result$celltype_lines <- ct_lines
495
+
496
+ # ---- GWAS / Disease Gene Overlap ----
497
+ gwas <- interp$gwas
498
+ gwas_lines <- c()
499
+ gwas_label <- if (!is.null(interp$gwas_label)) interp$gwas_label else "Disease GWAS"
500
+
501
+ if (!is.null(gwas)) {
502
+ ann <- gwas$panel_annotations
503
+ gwas_refs <- if (!is.null(gwas$gwas_refs)) gwas$gwas_refs else "published GWAS studies"
504
+
505
+ gwas_lines <- c(gwas_lines,
506
+ sprintf("Cross-reference with %d curated %s genes (%s):",
507
+ gwas$n_gwas_genes, gwas_label, gwas_refs),
508
+ "",
509
+ "| Gene | GWAS Hit | Disease Relevance | Drug Relevance |",
510
+ "|------|:--------:|-------------------|----------------|")
511
+ for (i in seq_len(nrow(ann))) {
512
+ gwas_icon <- if (ann$in_gwas[i]) "Direct" else "Indirect/None"
513
+ # Truncate long text for table readability
514
+ dis_rel <- ann$disease_relevance[i]
515
+ drug_rel <- ann$drug_relevance[i]
516
+ if (nchar(dis_rel) > 80) dis_rel <- paste0(substr(dis_rel, 1, 77), "...")
517
+ if (nchar(drug_rel) > 60) drug_rel <- paste0(substr(drug_rel, 1, 57), "...")
518
+ gwas_lines <- c(gwas_lines, sprintf("| *%s* | %s | %s | %s |",
519
+ ann$gene[i], gwas_icon,
520
+ dis_rel, drug_rel))
521
+ }
522
+ gwas_lines <- c(gwas_lines, "")
523
+
524
+ # Summary
525
+ gwas_lines <- c(gwas_lines,
526
+ sprintf("**Summary:** %d/%d panel genes have direct %s association (%s).",
527
+ sum(ann$in_gwas), nrow(ann), gwas_label,
528
+ if (sum(ann$in_gwas) > 0) paste(ann$gene[ann$in_gwas], collapse = ", ") else "none"),
529
+ "The panel primarily captures expression-level disease signatures rather than",
530
+ "germline genetic risk — consistent with the transcriptomic biomarker approach.",
531
+ "")
532
+ }
533
+ result$gwas_lines <- gwas_lines
534
+
535
+ return(result)
536
+ }
537
+
538
+
539
+ # ============================================================================
540
+ # ENHANCED MARKDOWN REPORT
541
+ # ============================================================================
542
+
543
+ #' Write comprehensive summary report as markdown
544
+ #' @keywords internal
545
+ .write_summary_report <- function(model_result, validation_result, output_dir,
546
+ data = NULL, features = NULL,
547
+ interpretation = NULL) {
548
+ p <- model_result$parameters
549
+ panel <- model_result$feature_importance[model_result$feature_importance$is_stable, ]
550
+ pos_genes <- panel$feature[panel$mean_coefficient > 0]
551
+ neg_genes <- panel$feature[panel$mean_coefficient < 0]
552
+
553
+ # Get enriched context (NULL fields => generic fallback)
554
+ rc <- if (!is.null(data)) data$report_context else NULL
555
+ ctx <- .build_context_lines(rc, data, p, features, model_result,
556
+ validation_result, format = "md")
557
+
558
+ has_validation <- !is.null(validation_result)
559
+
560
+ lines <- c(
561
+ "# LASSO Biomarker Panel Discovery Report",
562
+ "",
563
+ paste("**Generated:**", Sys.time()),
564
+ ""
565
+ )
566
+
567
+ # Discovery-only caveat (omitted if external validation was performed)
568
+ if (!has_validation) {
569
+ lines <- c(lines,
570
+ "> **IMPORTANT — Discovery-Only Analysis:** This report presents results from a",
571
+ "> single discovery cohort with no independent external validation. All performance",
572
+ "> metrics (AUC, sensitivity, specificity) are estimates from repeated subsampling",
573
+ "> of the same dataset and should be considered **preliminary**. They are expected",
574
+ "> to be optimistic relative to performance on a truly independent cohort. External",
575
+ "> validation is required before any clinical or translational use of this panel.",
576
+ "",
577
+ "---",
578
+ "")
579
+ }
580
+
581
+ # ---- 1. Study Objectives ----
582
+ lines <- c(lines, "## 1. Study Objectives", "")
583
+
584
+ if (!is.null(ctx$analytical_goals)) {
585
+ # Enriched: clinical question framing + cited aims
586
+ lines <- c(lines,
587
+ "This analysis was designed to address the following specific aims in the",
588
+ "context of predicting clinical outcomes from baseline molecular profiling:",
589
+ "",
590
+ ctx$analytical_goals,
591
+ "")
592
+ } else {
593
+ # Generic fallback
594
+ lines <- c(lines,
595
+ "This analysis aims to identify a minimal, interpretable biomarker panel from",
596
+ "high-dimensional gene expression data using LASSO (Least Absolute Shrinkage",
597
+ "and Selection Operator) regularized regression. The goal is to select a",
598
+ "parsimonious set of transcriptomic features that can predict a binary clinical",
599
+ "outcome from baseline tissue biopsies, enabling potential translation into a",
600
+ "clinical diagnostic or prognostic assay.",
601
+ "",
602
+ "**Specific aims:**",
603
+ "",
604
+ "1. Select a minimal gene panel (<15 features) with robust predictive performance",
605
+ "2. Evaluate model discrimination (AUC), calibration, and feature stability",
606
+ paste0("3. Assess cross-validation performance across ", p$n_repeats,
607
+ " repeated train/test splits"),
608
+ "")
609
+
610
+ if (!is.null(validation_result)) {
611
+ lines <- c(lines,
612
+ paste0("4. Validate the panel on an independent cohort (",
613
+ validation_result$cohort_name, ")"),
614
+ "")
615
+ }
616
+ }
617
+
618
+ # ---- 2. Disease Context & Rationale ----
619
+ lines <- c(lines, "## 2. Disease Context & Rationale", "")
620
+
621
+ if (!is.null(ctx$disease_sections)) {
622
+ # Enriched: multiple subsections from report_context
623
+ lines <- c(lines, ctx$disease_sections)
624
+ } else if (!is.null(data) && !is.null(data$description)) {
625
+ # Fallback: single description line
626
+ lines <- c(lines,
627
+ "**Dataset context:**",
628
+ data$description,
629
+ "")
630
+ }
631
+
632
+ # LASSO rationale
633
+ lines <- c(lines, "### Why LASSO for biomarker selection", "")
634
+
635
+ if (!is.null(ctx$lasso_rationale)) {
636
+ # Enriched: with inline citations
637
+ lines <- c(lines, ctx$lasso_rationale)
638
+ } else {
639
+ # Generic fallback
640
+ lines <- c(lines,
641
+ "LASSO logistic regression performs simultaneous feature selection and coefficient",
642
+ "estimation by applying an L1 penalty that shrinks many coefficients to exactly",
643
+ "zero. This produces sparse, interpretable models ideal for clinical biomarker",
644
+ "panels where a small number of measurable features is critical for translation",
645
+ "to diagnostic assays. Compared to other machine learning approaches, LASSO",
646
+ "provides transparent, interpretable coefficients and naturally handles the",
647
+ "p >> n problem common in omics data (thousands of features, tens to hundreds",
648
+ "of samples).",
649
+ "")
650
+
651
+ if (p$alpha < 1) {
652
+ lines <- c(lines,
653
+ paste0("This analysis uses **elastic net** regularization (alpha = ", p$alpha,
654
+ "), which combines L1 (LASSO) and L2 (ridge) penalties. This is",
655
+ " particularly useful when features are correlated, as it tends to",
656
+ " retain groups of correlated genes rather than arbitrarily selecting",
657
+ " one from each group."),
658
+ "")
659
+ }
660
+ }
661
+
662
+ # ---- 3. Datasets ----
663
+ lines <- c(lines,
664
+ "## 3. Datasets",
665
+ "",
666
+ "### 3.1 Discovery Cohort",
667
+ "")
668
+
669
+ if (!is.null(data)) {
670
+ n_resp <- sum(data$metadata[[data$outcome_col]] == 1, na.rm = TRUE)
671
+ n_nonresp <- sum(data$metadata[[data$outcome_col]] == 0, na.rm = TRUE)
672
+ treatment_tab <- table(data$metadata$treatment)
673
+ treat_str <- paste(names(treatment_tab), "=", treatment_tab, collapse = ", ")
674
+
675
+ lines <- c(lines,
676
+ paste("- **Samples:**", ncol(data$expression),
677
+ paste0("(", n_resp, " positive / ", n_nonresp, " negative)")),
678
+ paste("- **Genes measured:**", format(nrow(data$expression), big.mark = ",")),
679
+ paste("- **Features after filtering:**", p$n_features,
680
+ "(top most variable genes)"),
681
+ paste("- **Treatment arms:**", treat_str),
682
+ "")
683
+ } else {
684
+ lines <- c(lines,
685
+ paste("- **Samples:**", p$n_samples),
686
+ paste("- **Features:**", p$n_features),
687
+ "")
688
+ }
689
+
690
+ if (!is.null(validation_result)) {
691
+ lines <- c(lines,
692
+ paste("### 3.2 Validation Cohort:", validation_result$cohort_name),
693
+ "",
694
+ paste("- **Features matched:**", validation_result$n_features_used,
695
+ "/", validation_result$n_features_total, "discovery features"),
696
+ "",
697
+ "The validation cohort shares the same microarray platform as the discovery",
698
+ "cohort, enabling direct application of the trained model without cross-platform",
699
+ "normalization. Cross-drug validation (different therapeutic mechanisms) tests",
700
+ "whether the baseline transcriptomic signature captures shared biology of",
701
+ "treatment response rather than drug-specific effects.",
702
+ "")
703
+ }
704
+
705
+ # ---- 4. Methods ----
706
+ lines <- c(lines,
707
+ "## 4. Methods",
708
+ "",
709
+ "### 4.1 Feature Preparation",
710
+ "")
711
+
712
+ if (!is.null(features)) {
713
+ lines <- c(lines,
714
+ paste("- **Input features:**", ncol(features$X), "genes selected by variance"),
715
+ paste("- **Samples:**", nrow(features$X)),
716
+ "- **Preprocessing:** Log2-transformed (if not already), filtered to top",
717
+ paste0(" most variable genes, scaled to zero mean and unit variance"),
718
+ "")
719
+ } else {
720
+ lines <- c(lines,
721
+ paste("- **Features:**", p$n_features),
722
+ paste("- **Samples:**", p$n_samples),
723
+ "")
724
+ }
725
+
726
+ lines <- c(lines, "### 4.2 LASSO/Elastic Net Model", "")
727
+
728
+ if (!is.null(ctx$method_text)) {
729
+ # Enriched: methods with inline citations
730
+ lines <- c(lines, ctx$method_text)
731
+ } else {
732
+ # Generic fallback
733
+ lines <- c(lines,
734
+ paste0("- **Regularization:** alpha = ", p$alpha,
735
+ ifelse(p$alpha == 1, " (pure LASSO)", " (elastic net)")),
736
+ paste("- **Repeated CV:**", p$n_repeats, "iterations of class-balanced",
737
+ paste0(round(p$train_fraction * 100), "/", round((1 - p$train_fraction) * 100)),
738
+ "train/test splits"),
739
+ paste("- **Inner CV:**", p$n_inner_folds, "folds for lambda selection (cv.glmnet)"),
740
+ paste("- **Stability threshold:**", p$stability_threshold,
741
+ "(features must be selected in this fraction of iterations)"),
742
+ paste("- **Random seed:**", p$seed, "(reproducible)"),
743
+ "",
744
+ "**Algorithm:** For each of the repeated CV iterations, a class-balanced",
745
+ "train/test split is created. On the training set, cv.glmnet selects the",
746
+ "optimal lambda via inner cross-validation. Non-zero coefficients are recorded.",
747
+ "After all iterations, features selected in more than the stability threshold",
748
+ "fraction of iterations form the final panel. A final model is fit on all",
749
+ "samples using only these stable features.",
750
+ "",
751
+ "### 4.3 Performance Evaluation",
752
+ "",
753
+ "- **Discrimination:** AUC (Area Under ROC Curve) via pROC package",
754
+ "- **Confidence intervals:** 2.5th and 97.5th percentiles of fold AUCs",
755
+ "- **Sensitivity/Specificity:** At Youden's J-statistic optimal threshold",
756
+ "- **Calibration:** Predicted probability vs observed event rate",
757
+ "")
758
+ }
759
+
760
+ # ---- 5. Results ----
761
+ lines <- c(lines,
762
+ "## 5. Results",
763
+ "",
764
+ "### 5.1 Biomarker Panel",
765
+ "",
766
+ paste("**Panel size:**", nrow(panel), "features"),
767
+ paste0("(stability threshold: ", p$stability_threshold,
768
+ "; ", p$n_stable, " passed threshold",
769
+ ifelse(p$n_stable < nrow(panel),
770
+ paste0(", relaxed to top ", nrow(panel)), ""), ")"),
771
+ "",
772
+ "| Feature | Selection Frequency | Mean Coefficient | SD Coefficient |",
773
+ "|---------|:------------------:|:----------------:|:--------------:|")
774
+
775
+ for (i in seq_len(nrow(panel))) {
776
+ lines <- c(lines, paste0(
777
+ "| ", panel$feature[i],
778
+ " | ", round(panel$selection_frequency[i], 3),
779
+ " | ", sprintf("%+.4f", panel$mean_coefficient[i]),
780
+ " | ", round(panel$sd_coefficient[i], 4), " |"))
781
+ }
782
+
783
+ lines <- c(lines,
784
+ "",
785
+ "### 5.2 Discovery Performance (Nested CV)",
786
+ "",
787
+ paste("- **Mean AUC:**", round(model_result$mean_auc, 3),
788
+ "(95% CI:", round(model_result$auc_ci[1], 3), "-",
789
+ round(model_result$auc_ci[2], 3), ")"),
790
+ paste("- **Mean Sensitivity:**",
791
+ round(mean(model_result$fold_sensitivities, na.rm = TRUE), 3)),
792
+ paste("- **Mean Specificity:**",
793
+ round(mean(model_result$fold_specificities, na.rm = TRUE), 3)),
794
+ paste("- **AUC range across folds:**",
795
+ round(min(model_result$fold_aucs, na.rm = TRUE), 3), "-",
796
+ round(max(model_result$fold_aucs, na.rm = TRUE), 3)),
797
+ "")
798
+
799
+ if (!is.null(validation_result)) {
800
+ lines <- c(lines,
801
+ paste("### 5.3 External Validation:", validation_result$cohort_name),
802
+ "",
803
+ paste("- **AUC:**", round(validation_result$auc, 3),
804
+ "(95% CI:", round(validation_result$auc_ci[1], 3), "-",
805
+ round(validation_result$auc_ci[3], 3), ")"),
806
+ paste("- **Panel features used:**", validation_result$n_features_used,
807
+ "/", validation_result$n_features_total),
808
+ "")
809
+
810
+ if (!is.null(validation_result$performance_table)) {
811
+ perf_t <- validation_result$performance_table
812
+ for (j in seq_len(nrow(perf_t))) {
813
+ lines <- c(lines, paste0("- **", perf_t$metric[j], ":** ",
814
+ round(as.numeric(perf_t$value[j]), 3)))
815
+ }
816
+ lines <- c(lines, "")
817
+ }
818
+ }
819
+
820
+ # ---- 6. Published Benchmarks (if available) ----
821
+ if (!is.null(ctx$benchmarks)) {
822
+ lines <- c(lines,
823
+ "## 6. Published Benchmarks",
824
+ "",
825
+ ctx$benchmarks)
826
+ }
827
+
828
+ # ---- 7. Biological Interpretation ----
829
+ bio_num <- if (!is.null(ctx$benchmarks)) 7 else 6
830
+ lines <- c(lines,
831
+ paste0("## ", bio_num, ". Biological Interpretation"),
832
+ "")
833
+
834
+ interp_lines <- .build_interpretation_lines(interpretation, panel, format = "md")
835
+
836
+ if (!is.null(interp_lines$pathway_lines)) {
837
+ # Enriched: full pathway, celltype, GWAS analysis
838
+ lines <- c(lines,
839
+ paste0("### ", bio_num, ".1 Pathway Enrichment"),
840
+ "",
841
+ interp_lines$pathway_lines)
842
+
843
+ if (!is.null(interp_lines$celltype_lines)) {
844
+ lines <- c(lines,
845
+ paste0("### ", bio_num, ".2 Cell-Type Expression Context"),
846
+ "",
847
+ interp_lines$celltype_lines)
848
+ }
849
+
850
+ if (!is.null(interp_lines$gwas_lines)) {
851
+ gwas_header <- if (!is.null(interpretation$gwas_label)) {
852
+ paste0("Genetic Risk Overlap (", interpretation$gwas_label, ")")
853
+ } else "Genetic Risk Overlap"
854
+ lines <- c(lines,
855
+ paste0("### ", bio_num, ".3 ", gwas_header),
856
+ "",
857
+ interp_lines$gwas_lines)
858
+ }
859
+ } else {
860
+ # Fallback: basic coefficient direction
861
+ if (length(pos_genes) > 0) {
862
+ lines <- c(lines,
863
+ paste0("**Positive coefficient genes** (higher expression associated with ",
864
+ "positive outcome): ", paste(pos_genes, collapse = ", ")),
865
+ "")
866
+ }
867
+ if (length(neg_genes) > 0) {
868
+ lines <- c(lines,
869
+ paste0("**Negative coefficient genes** (higher expression associated with ",
870
+ "negative outcome): ", paste(neg_genes, collapse = ", ")),
871
+ "")
872
+ }
873
+
874
+ lines <- c(lines,
875
+ "The sign and magnitude of LASSO coefficients indicate the direction and",
876
+ "strength of each feature's contribution to the prediction. Features with",
877
+ "positive coefficients suggest that higher expression is associated with the",
878
+ "positive outcome class (e.g., treatment response), while negative coefficients",
879
+ "suggest the opposite. Selection frequency reflects the robustness of each",
880
+ "feature across bootstrap resampling iterations.",
881
+ "")
882
+ }
883
+
884
+ # ---- Limitations ----
885
+ lim_num <- bio_num + 1
886
+ lines <- c(lines,
887
+ paste0("## ", lim_num, ". Limitations"),
888
+ "")
889
+
890
+ if (!has_validation) {
891
+ lines <- c(lines,
892
+ paste0("### ", lim_num, ".1 No External Validation"),
893
+ "",
894
+ "This is a **discovery-only analysis**. No independent validation cohort was used.",
895
+ paste0("The reported AUC of ", round(model_result$mean_auc, 3),
896
+ " is derived entirely from the same ", p$n_samples,
897
+ "-sample dataset used for feature selection and model training."),
898
+ "Performance on an independent cohort is expected to be lower. The workflow",
899
+ "supports external validation via `validate_external.R` — this step is strongly",
900
+ "recommended before drawing conclusions about panel utility.",
901
+ "")
902
+ } else {
903
+ lines <- c(lines,
904
+ paste0("### ", lim_num, ".1 External Validation"),
905
+ "",
906
+ paste0("External validation was performed on ", validation_result$cohort_name,
907
+ " (AUC: ", round(validation_result$auc, 3), ")."),
908
+ "Additional independent cohorts are recommended to confirm generalizability.",
909
+ "")
910
+ }
911
+
912
+ lines <- c(lines,
913
+ paste0("### ", lim_num, ".2 Optimism Bias in CV"),
914
+ "",
915
+ paste0("The stability selection procedure uses ", p$n_repeats,
916
+ " random ", round(p$train_fraction * 100), "/",
917
+ round((1 - p$train_fraction) * 100),
918
+ " splits of the same samples for both (a) determining which"),
919
+ "features are stable and (b) estimating AUC. Because feature selection and",
920
+ "performance estimation share the same data pool, the reported AUC is not fully",
921
+ "independent of the feature selection step. This is a known source of optimism bias",
922
+ "in LASSO stability selection workflows. Expected magnitude: typically 0.02-0.05 AUC units.",
923
+ "",
924
+ paste0("### ", lim_num, ".3 Platform Specificity"),
925
+ "",
926
+ "The panel was derived from a specific expression platform. Transferability to other",
927
+ "platforms (e.g., RNA-seq vs microarray) requires cross-platform validation and may",
928
+ "be affected by gene coverage differences.",
929
+ "")
930
+
931
+ # ---- Downstream Use ----
932
+ ds_num <- lim_num + 1
933
+ lines <- c(lines,
934
+ paste0("## ", ds_num, ". Downstream Use"),
935
+ "",
936
+ "### Load the model for prediction on new data:",
937
+ "",
938
+ "```r",
939
+ 'model <- readRDS("results/lasso_model.rds")',
940
+ 'source("scripts/lasso_workflow.R")',
941
+ "predictions <- predict_biomarker_panel(model, new_X)",
942
+ "```",
943
+ "",
944
+ "### Suggested next steps:",
945
+ "",
946
+ "- **Pathway enrichment** of panel genes (functional-enrichment-from-degs)",
947
+ "- **Co-expression context** for panel genes (coexpression-network)",
948
+ "- **Patient stratification** using panel scores (multiomics-patient-stratification)",
949
+ "- **Literature validation** of panel genes in disease context",
950
+ "")
951
+
952
+ # ---- References (if report_context provides them) ----
953
+ if (!is.null(ctx$references)) {
954
+ ref_num <- ds_num + 1
955
+ lines <- c(lines,
956
+ paste0("## ", ref_num, ". References"),
957
+ "",
958
+ ctx$references)
959
+ }
960
+
961
+ writeLines(lines, file.path(output_dir, "summary_report.md"))
962
+ }
963
+
964
+
965
+ # ============================================================================
966
+ # PDF REPORT GENERATION
967
+ # ============================================================================
968
+
969
+ #' Generate PDF report with embedded plots via R Markdown
970
+ #' @keywords internal
971
+ .render_pdf_report <- function(model_result, validation_result, output_dir,
972
+ data = NULL, features = NULL,
973
+ interpretation = NULL) {
974
+
975
+ # Check for rmarkdown
976
+ if (!requireNamespace("rmarkdown", quietly = TRUE)) {
977
+ cat(" rmarkdown not installed - skipping PDF generation\n")
978
+ cat(" Install with: install.packages('rmarkdown')\n\n")
979
+ return(invisible(NULL))
980
+ }
981
+
982
+ # Write the .Rmd file
983
+ rmd_path <- file.path(output_dir, "summary_report.Rmd")
984
+ .write_rmd_report(model_result, validation_result, output_dir, data, features,
985
+ rmd_path, interpretation)
986
+
987
+ # Render to PDF
988
+ pdf_path <- tryCatch({
989
+ rmarkdown::render(
990
+ rmd_path,
991
+ output_format = rmarkdown::pdf_document(
992
+ toc = TRUE,
993
+ toc_depth = 2,
994
+ number_sections = TRUE,
995
+ latex_engine = "xelatex"
996
+ ),
997
+ output_file = "summary_report.pdf",
998
+ output_dir = output_dir,
999
+ quiet = TRUE,
1000
+ envir = new.env(parent = globalenv())
1001
+ )
1002
+ }, error = function(e) {
1003
+ # Try pdflatex if xelatex fails
1004
+ tryCatch({
1005
+ rmarkdown::render(
1006
+ rmd_path,
1007
+ output_format = rmarkdown::pdf_document(
1008
+ toc = TRUE,
1009
+ toc_depth = 2,
1010
+ number_sections = TRUE
1011
+ ),
1012
+ output_file = "summary_report.pdf",
1013
+ output_dir = output_dir,
1014
+ quiet = TRUE,
1015
+ envir = new.env(parent = globalenv())
1016
+ )
1017
+ }, error = function(e2) {
1018
+ cat(" PDF rendering failed:", conditionMessage(e2), "\n")
1019
+ cat(" Falling back to HTML report...\n")
1020
+ # Try HTML as fallback
1021
+ tryCatch({
1022
+ rmarkdown::render(
1023
+ rmd_path,
1024
+ output_format = "html_document",
1025
+ output_file = "summary_report.html",
1026
+ output_dir = output_dir,
1027
+ quiet = TRUE,
1028
+ envir = new.env(parent = globalenv())
1029
+ )
1030
+ cat(" Saved: summary_report.html (PDF unavailable)\n\n")
1031
+ }, error = function(e3) {
1032
+ cat(" HTML rendering also failed:", conditionMessage(e3), "\n")
1033
+ cat(" Markdown report available: summary_report.md\n\n")
1034
+ })
1035
+ return(NULL)
1036
+ })
1037
+ })
1038
+
1039
+ if (!is.null(pdf_path) && file.exists(file.path(output_dir, "summary_report.pdf"))) {
1040
+ cat(" Saved: summary_report.pdf\n\n")
1041
+ }
1042
+
1043
+ return(invisible(NULL))
1044
+ }
1045
+
1046
+
1047
+ #' Write R Markdown report file with embedded plots
1048
+ #' @keywords internal
1049
+ .write_rmd_report <- function(model_result, validation_result, output_dir,
1050
+ data, features, rmd_path,
1051
+ interpretation = NULL) {
1052
+
1053
+ p <- model_result$parameters
1054
+ panel <- model_result$feature_importance[model_result$feature_importance$is_stable, ]
1055
+ pos_genes <- panel$feature[panel$mean_coefficient > 0]
1056
+ neg_genes <- panel$feature[panel$mean_coefficient < 0]
1057
+
1058
+ # Get enriched context (NULL fields => generic fallback)
1059
+ rc <- if (!is.null(data)) data$report_context else NULL
1060
+ ctx <- .build_context_lines(rc, data, p, features, model_result,
1061
+ validation_result, format = "rmd")
1062
+ has_validation <- !is.null(validation_result)
1063
+
1064
+ # Build the Rmd content
1065
+ rmd <- c(
1066
+ "---",
1067
+ "title: \"LASSO Biomarker Panel Discovery Report\"",
1068
+ paste0("date: \"", Sys.Date(), "\""),
1069
+ "output:",
1070
+ " pdf_document:",
1071
+ " toc: true",
1072
+ " toc_depth: 2",
1073
+ " number_sections: true",
1074
+ "header-includes:",
1075
+ " - \\usepackage{booktabs}",
1076
+ " - \\usepackage{float}",
1077
+ " - \\usepackage{graphicx}",
1078
+ "---",
1079
+ "",
1080
+ "```{r setup, include=FALSE}",
1081
+ "knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE,",
1082
+ " fig.pos = 'H', out.width = '100%')",
1083
+ "```",
1084
+ ""
1085
+ )
1086
+
1087
+ # Discovery-only caveat (omitted if external validation was performed)
1088
+ if (!has_validation) {
1089
+ rmd <- c(rmd,
1090
+ "> **IMPORTANT --- Discovery-Only Analysis:** This report presents results from a",
1091
+ "> single discovery cohort with no independent external validation. All performance",
1092
+ "> metrics (AUC, sensitivity, specificity) are estimates from repeated subsampling",
1093
+ "> of the same dataset and should be considered **preliminary**. External",
1094
+ "> validation is required before any clinical or translational use of this panel.",
1095
+ "",
1096
+ "---",
1097
+ "")
1098
+ }
1099
+
1100
+ # ---- Study Objectives ----
1101
+ rmd <- c(rmd, "# Study Objectives", "")
1102
+
1103
+ if (!is.null(ctx$analytical_goals)) {
1104
+ # Enriched: clinical question framing + cited aims
1105
+ rmd <- c(rmd,
1106
+ "This analysis was designed to address the following specific aims in the",
1107
+ "context of predicting clinical outcomes from baseline molecular profiling:",
1108
+ "",
1109
+ ctx$analytical_goals,
1110
+ "")
1111
+ } else {
1112
+ # Generic fallback
1113
+ rmd <- c(rmd,
1114
+ "This analysis identifies a minimal, interpretable biomarker panel from",
1115
+ "high-dimensional gene expression data using penalized logistic regression (LASSO/elastic net).",
1116
+ "The goal is to select a parsimonious set of transcriptomic features that",
1117
+ "predict a binary clinical outcome from baseline tissue biopsies.",
1118
+ "",
1119
+ "**Specific aims:**",
1120
+ "",
1121
+ "1. Select a minimal gene panel (<15 features) with robust predictive performance",
1122
+ "2. Evaluate model discrimination (AUC), calibration, and feature stability",
1123
+ paste0("3. Assess cross-validation performance across ", p$n_repeats,
1124
+ " repeated train/test splits"),
1125
+ "")
1126
+
1127
+ if (!is.null(validation_result)) {
1128
+ rmd <- c(rmd,
1129
+ paste0("4. Validate the panel on an independent cohort (",
1130
+ validation_result$cohort_name, ")"),
1131
+ "")
1132
+ }
1133
+ }
1134
+
1135
+ # ---- Disease Context & Rationale ----
1136
+ rmd <- c(rmd, "# Disease Context & Rationale", "")
1137
+
1138
+ if (!is.null(ctx$disease_sections)) {
1139
+ # Enriched: multiple subsections
1140
+ rmd <- c(rmd, ctx$disease_sections)
1141
+ } else if (!is.null(data) && !is.null(data$description)) {
1142
+ # Fallback: single description
1143
+ rmd <- c(rmd,
1144
+ "**Dataset context:**",
1145
+ data$description,
1146
+ "")
1147
+ }
1148
+
1149
+ # LASSO rationale
1150
+ rmd <- c(rmd, "## Why LASSO for biomarker selection", "")
1151
+
1152
+ if (!is.null(ctx$lasso_rationale)) {
1153
+ # Enriched: with inline citations
1154
+ rmd <- c(rmd, ctx$lasso_rationale)
1155
+ } else {
1156
+ # Generic fallback
1157
+ rmd <- c(rmd,
1158
+ "LASSO logistic regression performs simultaneous feature selection and coefficient",
1159
+ "estimation by applying an L1 penalty that shrinks many coefficients to",
1160
+ "exactly zero. This produces sparse, interpretable models ideal for clinical",
1161
+ "biomarker panels where a small number of measurable features is critical",
1162
+ "for translation to diagnostic assays.",
1163
+ "")
1164
+
1165
+ if (p$alpha < 1) {
1166
+ rmd <- c(rmd,
1167
+ paste0("This analysis uses **elastic net** regularization ($\\alpha$ = ",
1168
+ p$alpha, "), combining L1 (LASSO) and L2 (ridge) penalties. ",
1169
+ "This retains groups of correlated genes rather than arbitrarily ",
1170
+ "selecting one from each group."),
1171
+ "")
1172
+ }
1173
+ }
1174
+
1175
+ # ---- Datasets ----
1176
+ rmd <- c(rmd,
1177
+ "# Datasets",
1178
+ "",
1179
+ "## Discovery Cohort",
1180
+ "")
1181
+
1182
+ if (!is.null(data)) {
1183
+ n_resp <- sum(data$metadata[[data$outcome_col]] == 1, na.rm = TRUE)
1184
+ n_nonresp <- sum(data$metadata[[data$outcome_col]] == 0, na.rm = TRUE)
1185
+ treatment_tab <- table(data$metadata$treatment)
1186
+ treat_str <- paste(names(treatment_tab), "=", treatment_tab, collapse = ", ")
1187
+
1188
+ rmd <- c(rmd,
1189
+ paste("- **Samples:**", ncol(data$expression),
1190
+ paste0("(", n_resp, " positive / ", n_nonresp, " negative)")),
1191
+ paste("- **Genes measured:**", format(nrow(data$expression), big.mark = ",")),
1192
+ paste("- **Features after filtering:**", p$n_features),
1193
+ paste("- **Treatment arms:**", treat_str),
1194
+ "")
1195
+ } else {
1196
+ rmd <- c(rmd,
1197
+ paste("- **Samples:**", p$n_samples),
1198
+ paste("- **Features:**", p$n_features),
1199
+ "")
1200
+ }
1201
+
1202
+ if (!is.null(validation_result)) {
1203
+ rmd <- c(rmd,
1204
+ paste("## Validation Cohort:", validation_result$cohort_name),
1205
+ "",
1206
+ paste("- **Features matched:**", validation_result$n_features_used,
1207
+ "/", validation_result$n_features_total),
1208
+ "",
1209
+ "The validation cohort enables cross-drug testing of whether the baseline",
1210
+ "transcriptomic signature captures shared biology of treatment response.",
1211
+ "")
1212
+ }
1213
+
1214
+ # ---- Methods ----
1215
+ rmd <- c(rmd,
1216
+ "# Methods",
1217
+ "",
1218
+ "## Feature Preparation",
1219
+ "")
1220
+
1221
+ if (!is.null(features)) {
1222
+ rmd <- c(rmd,
1223
+ paste("- **Input features:**", ncol(features$X), "genes selected by variance"),
1224
+ paste("- **Samples:**", nrow(features$X)),
1225
+ "- **Preprocessing:** Log2-transformed, variance-filtered, scaled (zero mean, unit variance)",
1226
+ "")
1227
+ } else {
1228
+ rmd <- c(rmd,
1229
+ paste("- **Features:**", p$n_features),
1230
+ paste("- **Samples:**", p$n_samples),
1231
+ "")
1232
+ }
1233
+
1234
+ rmd <- c(rmd, "## LASSO/Elastic Net Model", "")
1235
+
1236
+ if (!is.null(ctx$method_text)) {
1237
+ # Enriched: methods with inline citations
1238
+ rmd <- c(rmd, ctx$method_text)
1239
+ } else {
1240
+ # Generic fallback
1241
+ rmd <- c(rmd,
1242
+ paste0("- **Regularization:** $\\alpha$ = ", p$alpha,
1243
+ ifelse(p$alpha == 1, " (pure LASSO)", " (elastic net)")),
1244
+ paste("- **Repeated CV:**", p$n_repeats, "iterations of class-balanced",
1245
+ paste0(round(p$train_fraction * 100), "/", round((1 - p$train_fraction) * 100)),
1246
+ "train/test splits"),
1247
+ paste("- **Inner CV:**", p$n_inner_folds, "folds for lambda selection"),
1248
+ paste("- **Stability threshold:**", p$stability_threshold),
1249
+ paste("- **Random seed:**", p$seed),
1250
+ "",
1251
+ "For each iteration, a class-balanced train/test split is created.",
1252
+ "On the training set, `cv.glmnet` selects the optimal $\\lambda$ via inner",
1253
+ "cross-validation. Non-zero coefficients are recorded. Features selected in",
1254
+ paste0("more than ", p$stability_threshold * 100, "% of iterations form the final panel."),
1255
+ "",
1256
+ "## Performance Evaluation",
1257
+ "",
1258
+ "- **Discrimination:** AUC via pROC package (DeLong method for CIs)",
1259
+ "- **Sensitivity/Specificity:** Youden's J-statistic optimal threshold",
1260
+ "- **Calibration:** Predicted probability vs observed event rate",
1261
+ "")
1262
+ }
1263
+
1264
+ # ---- Results: Panel ----
1265
+ rmd <- c(rmd,
1266
+ "# Results",
1267
+ "",
1268
+ "## Biomarker Panel",
1269
+ "",
1270
+ paste("**Panel size:**", nrow(panel), "features"),
1271
+ "",
1272
+ "```{r panel-table, results='asis'}",
1273
+ paste0("panel_df <- data.frame(",
1274
+ "Feature = c(", paste0('"', panel$feature, '"', collapse = ", "), "), ",
1275
+ "Frequency = c(", paste(round(panel$selection_frequency, 3), collapse = ", "), "), ",
1276
+ "Coefficient = c(", paste(sprintf("%.4f", panel$mean_coefficient), collapse = ", "), "), ",
1277
+ "SD = c(", paste(round(panel$sd_coefficient, 4), collapse = ", "), "), ",
1278
+ "stringsAsFactors = FALSE)"),
1279
+ "colnames(panel_df) <- c('Feature', 'Selection Freq.', 'Mean Coef.', 'SD Coef.')",
1280
+ "knitr::kable(panel_df, format = 'latex', booktabs = TRUE, align = c('l','c','c','c'))",
1281
+ "```",
1282
+ "")
1283
+
1284
+ # ---- Results: Performance ----
1285
+ rmd <- c(rmd,
1286
+ "## Discovery Performance (Nested CV)",
1287
+ "",
1288
+ paste("- **Mean AUC:**", round(model_result$mean_auc, 3),
1289
+ "(95% CI:", round(model_result$auc_ci[1], 3), "--",
1290
+ round(model_result$auc_ci[2], 3), ")"),
1291
+ paste("- **Mean Sensitivity:**",
1292
+ round(mean(model_result$fold_sensitivities, na.rm = TRUE), 3)),
1293
+ paste("- **Mean Specificity:**",
1294
+ round(mean(model_result$fold_specificities, na.rm = TRUE), 3)),
1295
+ paste("- **AUC range:**",
1296
+ round(min(model_result$fold_aucs, na.rm = TRUE), 3), "--",
1297
+ round(max(model_result$fold_aucs, na.rm = TRUE), 3)),
1298
+ "")
1299
+
1300
+ if (!is.null(validation_result)) {
1301
+ rmd <- c(rmd,
1302
+ paste("## External Validation:", validation_result$cohort_name),
1303
+ "",
1304
+ paste("- **AUC:**", round(validation_result$auc, 3),
1305
+ "(95% CI:", round(validation_result$auc_ci[1], 3), "--",
1306
+ round(validation_result$auc_ci[3], 3), ")"),
1307
+ paste("- **Features used:**", validation_result$n_features_used,
1308
+ "/", validation_result$n_features_total),
1309
+ "")
1310
+ }
1311
+
1312
+ # ---- Plots ----
1313
+ rmd <- c(rmd,
1314
+ "# Diagnostic Plots",
1315
+ "")
1316
+
1317
+ plot_specs <- list(
1318
+ list(file = "roc_curve.png",
1319
+ caption = "ROC curve showing model discrimination. AUC annotated."),
1320
+ list(file = "stability_barplot.png",
1321
+ caption = "Feature selection frequency across CV iterations. Dashed line indicates the stability threshold."),
1322
+ list(file = "coefficient_forest.png",
1323
+ caption = "LASSO coefficients with 95% confidence intervals for each panel feature."),
1324
+ list(file = "calibration_curve.png",
1325
+ caption = "Calibration plot: predicted probability vs observed event rate."),
1326
+ list(file = "auc_distribution.png",
1327
+ caption = "Distribution of AUC values across cross-validation folds."),
1328
+ list(file = "feature_heatmap.png",
1329
+ caption = "Heatmap of panel features across samples, annotated by outcome.")
1330
+ )
1331
+
1332
+ for (ps in plot_specs) {
1333
+ plot_file <- file.path(output_dir, ps$file)
1334
+ if (file.exists(plot_file)) {
1335
+ rmd <- c(rmd,
1336
+ paste0("```{r ", sub("\\.png$", "", ps$file), ", fig.cap='", ps$caption, "'}"),
1337
+ paste0("knitr::include_graphics('", ps$file, "')"),
1338
+ "```",
1339
+ "")
1340
+ }
1341
+ }
1342
+
1343
+ # ---- Published Benchmarks (if available) ----
1344
+ if (!is.null(ctx$benchmarks)) {
1345
+ rmd <- c(rmd,
1346
+ "# Published Benchmarks",
1347
+ "",
1348
+ ctx$benchmarks)
1349
+ }
1350
+
1351
+ # ---- Biological Interpretation ----
1352
+ rmd <- c(rmd,
1353
+ "# Biological Interpretation",
1354
+ "")
1355
+
1356
+ interp_lines <- .build_interpretation_lines(interpretation, panel, format = "rmd")
1357
+
1358
+ if (!is.null(interp_lines$pathway_lines)) {
1359
+ # Enriched: full pathway, celltype, GWAS analysis
1360
+ rmd <- c(rmd,
1361
+ "## Pathway Enrichment",
1362
+ "",
1363
+ interp_lines$pathway_lines)
1364
+
1365
+ # Embed GSEA plot if available
1366
+ gsea_plot <- file.path(output_dir, "gsea_hallmark_dotplot.png")
1367
+ if (file.exists(gsea_plot)) {
1368
+ rmd <- c(rmd,
1369
+ "```{r gsea-plot, echo=FALSE, fig.cap='GSEA: MSigDB Hallmark Pathways', out.width='90%'}",
1370
+ paste0("knitr::include_graphics('", basename(gsea_plot), "')"),
1371
+ "```",
1372
+ "")
1373
+ }
1374
+
1375
+ if (!is.null(interp_lines$celltype_lines)) {
1376
+ rmd <- c(rmd,
1377
+ "## Cell-Type Expression Context",
1378
+ "",
1379
+ interp_lines$celltype_lines)
1380
+
1381
+ # Embed cell-type plot if available
1382
+ ct_plot <- file.path(output_dir, "celltype_expression.png")
1383
+ ct_caption <- if (!is.null(interpretation$tissue_label)) {
1384
+ paste("Cell-Type Expression in", interpretation$tissue_label)
1385
+ } else "Cell-Type Expression"
1386
+ if (file.exists(ct_plot)) {
1387
+ rmd <- c(rmd,
1388
+ sprintf("```{r celltype-plot, echo=FALSE, fig.cap='%s', out.width='90%%'}", ct_caption),
1389
+ paste0("knitr::include_graphics('", basename(ct_plot), "')"),
1390
+ "```",
1391
+ "")
1392
+ }
1393
+ }
1394
+
1395
+ if (!is.null(interp_lines$gwas_lines)) {
1396
+ gwas_header <- if (!is.null(interpretation$gwas_label)) {
1397
+ paste0("Genetic Risk Overlap (", interpretation$gwas_label, ")")
1398
+ } else "Genetic Risk Overlap"
1399
+ rmd <- c(rmd,
1400
+ paste("##", gwas_header),
1401
+ "",
1402
+ interp_lines$gwas_lines)
1403
+ }
1404
+ } else {
1405
+ # Fallback: basic coefficient direction
1406
+ if (length(pos_genes) > 0) {
1407
+ rmd <- c(rmd,
1408
+ paste0("**Positive coefficient genes** (higher expression $\\rightarrow$ ",
1409
+ "positive outcome): ", paste(pos_genes, collapse = ", ")),
1410
+ "")
1411
+ }
1412
+ if (length(neg_genes) > 0) {
1413
+ rmd <- c(rmd,
1414
+ paste0("**Negative coefficient genes** (higher expression $\\rightarrow$ ",
1415
+ "negative outcome): ", paste(neg_genes, collapse = ", ")),
1416
+ "")
1417
+ }
1418
+
1419
+ rmd <- c(rmd,
1420
+ "The sign and magnitude of LASSO coefficients indicate each feature's",
1421
+ "direction and strength of contribution. Selection frequency reflects",
1422
+ "robustness across bootstrap resampling. Features selected in $\\geq$80%",
1423
+ "of iterations are considered highly stable.",
1424
+ "")
1425
+ }
1426
+
1427
+ # ---- Limitations ----
1428
+ rmd <- c(rmd, "# Limitations", "")
1429
+
1430
+ if (!has_validation) {
1431
+ rmd <- c(rmd,
1432
+ "## No External Validation",
1433
+ "",
1434
+ "This is a **discovery-only analysis**. No independent validation cohort was used.",
1435
+ paste0("The reported AUC of ", round(model_result$mean_auc, 3),
1436
+ " is derived entirely from the same ", p$n_samples,
1437
+ "-sample dataset used for feature selection and model training."),
1438
+ "Performance on an independent cohort is expected to be lower. The workflow",
1439
+ "supports external validation via `validate_external.R` --- this step is strongly",
1440
+ "recommended before drawing conclusions about panel utility.",
1441
+ "")
1442
+ } else {
1443
+ rmd <- c(rmd,
1444
+ "## External Validation",
1445
+ "",
1446
+ paste0("External validation was performed on ", validation_result$cohort_name,
1447
+ " (AUC: ", round(validation_result$auc, 3), ")."),
1448
+ "Additional independent cohorts are recommended to confirm generalizability.",
1449
+ "")
1450
+ }
1451
+
1452
+ rmd <- c(rmd,
1453
+ "## Optimism Bias in CV",
1454
+ "",
1455
+ paste0("The stability selection procedure uses ", p$n_repeats,
1456
+ " random ", round(p$train_fraction * 100), "/",
1457
+ round((1 - p$train_fraction) * 100),
1458
+ " splits of the same samples for both (a) determining which"),
1459
+ "features are stable and (b) estimating AUC. Because feature selection and",
1460
+ "performance estimation share the same data pool, the reported AUC is not fully",
1461
+ "independent of the feature selection step. This is a known source of optimism bias.",
1462
+ "Expected magnitude: typically 0.02--0.05 AUC units.",
1463
+ "",
1464
+ "## Platform Specificity",
1465
+ "",
1466
+ "The panel was derived from a specific expression platform. Transferability to other",
1467
+ "platforms (e.g., RNA-seq vs microarray) requires cross-platform validation.",
1468
+ "")
1469
+
1470
+ # ---- Downstream Use ----
1471
+ rmd <- c(rmd,
1472
+ "# Downstream Use",
1473
+ "",
1474
+ "Load the model for prediction on new data:",
1475
+ "",
1476
+ "```r",
1477
+ 'model <- readRDS("results/lasso_model.rds")',
1478
+ 'source("scripts/lasso_workflow.R")',
1479
+ "predictions <- predict_biomarker_panel(model, new_X)",
1480
+ "```",
1481
+ "")
1482
+
1483
+ # ---- References (if report_context provides them) ----
1484
+ if (!is.null(ctx$references)) {
1485
+ rmd <- c(rmd,
1486
+ "# References",
1487
+ "",
1488
+ ctx$references)
1489
+ }
1490
+
1491
+ writeLines(rmd, rmd_path)
1492
+ }