@bgicli/bgicli 2.1.1 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1266) hide show
  1. package/data/skills/aav-vector-design-agent/SKILL.md +198 -0
  2. package/data/skills/adaptyv/SKILL.md +112 -0
  3. package/data/skills/adhd-daily-planner/SKILL.md +271 -0
  4. package/data/skills/aeon/SKILL.md +372 -0
  5. package/data/skills/agent-browser/SKILL.md +159 -0
  6. package/data/skills/agentd-drug-discovery/SKILL.md +52 -0
  7. package/data/skills/ai-analyzer/SKILL.md +218 -0
  8. package/data/skills/alphafold/SKILL.md +183 -0
  9. package/data/skills/alphafold-database/SKILL.md +500 -0
  10. package/data/skills/anndata/SKILL.md +394 -0
  11. package/data/skills/antibody-design-agent/SKILL.md +64 -0
  12. package/data/skills/arboreto/SKILL.md +237 -0
  13. package/data/skills/armored-cart-design-agent/SKILL.md +225 -0
  14. package/data/skills/arxiv-search/SKILL.md +224 -0
  15. package/data/skills/autonomous-oncology-agent/SKILL.md +77 -0
  16. package/data/skills/bayesian-optimizer/SKILL.md +60 -0
  17. package/data/skills/benchling-integration/SKILL.md +473 -0
  18. package/data/skills/bgpt-paper-search/SKILL.md +81 -0
  19. package/data/skills/bindcraft/SKILL.md +198 -0
  20. package/data/skills/binder-design/SKILL.md +182 -0
  21. package/data/skills/binding-characterization/SKILL.md +234 -0
  22. package/data/skills/bindingdb-database/SKILL.md +332 -0
  23. package/data/skills/bio-admet-prediction/SKILL.md +224 -0
  24. package/data/skills/bio-alignment-files-bam-statistics/SKILL.md +340 -0
  25. package/data/skills/bio-alignment-filtering/SKILL.md +322 -0
  26. package/data/skills/bio-alignment-indexing/SKILL.md +249 -0
  27. package/data/skills/bio-alignment-io/SKILL.md +301 -0
  28. package/data/skills/bio-alignment-msa-parsing/SKILL.md +366 -0
  29. package/data/skills/bio-alignment-msa-statistics/SKILL.md +375 -0
  30. package/data/skills/bio-alignment-pairwise/SKILL.md +277 -0
  31. package/data/skills/bio-alignment-sorting/SKILL.md +296 -0
  32. package/data/skills/bio-alignment-validation/SKILL.md +374 -0
  33. package/data/skills/bio-atac-seq-atac-peak-calling/SKILL.md +221 -0
  34. package/data/skills/bio-atac-seq-atac-qc/SKILL.md +292 -0
  35. package/data/skills/bio-atac-seq-differential-accessibility/SKILL.md +268 -0
  36. package/data/skills/bio-atac-seq-footprinting/SKILL.md +256 -0
  37. package/data/skills/bio-atac-seq-motif-deviation/SKILL.md +319 -0
  38. package/data/skills/bio-atac-seq-nucleosome-positioning/SKILL.md +321 -0
  39. package/data/skills/bio-basecalling/SKILL.md +368 -0
  40. package/data/skills/bio-batch-downloads/SKILL.md +384 -0
  41. package/data/skills/bio-batch-processing/SKILL.md +303 -0
  42. package/data/skills/bio-bedgraph-handling/SKILL.md +336 -0
  43. package/data/skills/bio-blast-searches/SKILL.md +354 -0
  44. package/data/skills/bio-causal-genomics-colocalization-analysis/SKILL.md +264 -0
  45. package/data/skills/bio-causal-genomics-fine-mapping/SKILL.md +267 -0
  46. package/data/skills/bio-causal-genomics-mediation-analysis/SKILL.md +264 -0
  47. package/data/skills/bio-causal-genomics-mendelian-randomization/SKILL.md +221 -0
  48. package/data/skills/bio-causal-genomics-pleiotropy-detection/SKILL.md +292 -0
  49. package/data/skills/bio-cfdna-preprocessing/SKILL.md +200 -0
  50. package/data/skills/bio-chipseq-differential-binding/SKILL.md +262 -0
  51. package/data/skills/bio-chipseq-motif-analysis/SKILL.md +387 -0
  52. package/data/skills/bio-chipseq-peak-annotation/SKILL.md +239 -0
  53. package/data/skills/bio-chipseq-peak-calling/SKILL.md +277 -0
  54. package/data/skills/bio-chipseq-qc/SKILL.md +391 -0
  55. package/data/skills/bio-chipseq-super-enhancers/SKILL.md +288 -0
  56. package/data/skills/bio-chipseq-visualization/SKILL.md +289 -0
  57. package/data/skills/bio-clinical-databases-clinvar-lookup/SKILL.md +188 -0
  58. package/data/skills/bio-clinical-databases-dbsnp-queries/SKILL.md +171 -0
  59. package/data/skills/bio-clinical-databases-gnomad-frequencies/SKILL.md +205 -0
  60. package/data/skills/bio-clinical-databases-hla-typing/SKILL.md +248 -0
  61. package/data/skills/bio-clinical-databases-myvariant-queries/SKILL.md +174 -0
  62. package/data/skills/bio-clinical-databases-pharmacogenomics/SKILL.md +232 -0
  63. package/data/skills/bio-clinical-databases-polygenic-risk/SKILL.md +276 -0
  64. package/data/skills/bio-clinical-databases-somatic-signatures/SKILL.md +261 -0
  65. package/data/skills/bio-clinical-databases-tumor-mutational-burden/SKILL.md +301 -0
  66. package/data/skills/bio-clinical-databases-variant-prioritization/SKILL.md +225 -0
  67. package/data/skills/bio-clip-seq-binding-site-annotation/SKILL.md +66 -0
  68. package/data/skills/bio-clip-seq-clip-alignment/SKILL.md +70 -0
  69. package/data/skills/bio-clip-seq-clip-motif-analysis/SKILL.md +62 -0
  70. package/data/skills/bio-clip-seq-clip-peak-calling/SKILL.md +282 -0
  71. package/data/skills/bio-clip-seq-clip-preprocessing/SKILL.md +142 -0
  72. package/data/skills/bio-codon-usage/SKILL.md +353 -0
  73. package/data/skills/bio-comparative-genomics-ancestral-reconstruction/SKILL.md +312 -0
  74. package/data/skills/bio-comparative-genomics-hgt-detection/SKILL.md +341 -0
  75. package/data/skills/bio-comparative-genomics-ortholog-inference/SKILL.md +308 -0
  76. package/data/skills/bio-comparative-genomics-positive-selection/SKILL.md +354 -0
  77. package/data/skills/bio-comparative-genomics-synteny-analysis/SKILL.md +315 -0
  78. package/data/skills/bio-compressed-files/SKILL.md +263 -0
  79. package/data/skills/bio-consensus-sequences/SKILL.md +340 -0
  80. package/data/skills/bio-copy-number-cnv-annotation/SKILL.md +307 -0
  81. package/data/skills/bio-copy-number-cnv-visualization/SKILL.md +294 -0
  82. package/data/skills/bio-copy-number-cnvkit-analysis/SKILL.md +290 -0
  83. package/data/skills/bio-copy-number-gatk-cnv/SKILL.md +270 -0
  84. package/data/skills/bio-crispr-screens-base-editing-analysis/SKILL.md +110 -0
  85. package/data/skills/bio-crispr-screens-batch-correction/SKILL.md +316 -0
  86. package/data/skills/bio-crispr-screens-crispresso-editing/SKILL.md +205 -0
  87. package/data/skills/bio-crispr-screens-hit-calling/SKILL.md +264 -0
  88. package/data/skills/bio-crispr-screens-jacks-analysis/SKILL.md +313 -0
  89. package/data/skills/bio-crispr-screens-library-design/SKILL.md +417 -0
  90. package/data/skills/bio-crispr-screens-mageck-analysis/SKILL.md +222 -0
  91. package/data/skills/bio-crispr-screens-screen-qc/SKILL.md +243 -0
  92. package/data/skills/bio-ctdna-mutation-detection/SKILL.md +234 -0
  93. package/data/skills/bio-data-visualization-circos-plots/SKILL.md +405 -0
  94. package/data/skills/bio-data-visualization-color-palettes/SKILL.md +244 -0
  95. package/data/skills/bio-data-visualization-genome-browser-tracks/SKILL.md +328 -0
  96. package/data/skills/bio-data-visualization-genome-tracks/SKILL.md +249 -0
  97. package/data/skills/bio-data-visualization-ggplot2-fundamentals/SKILL.md +313 -0
  98. package/data/skills/bio-data-visualization-heatmaps-clustering/SKILL.md +227 -0
  99. package/data/skills/bio-data-visualization-interactive-visualization/SKILL.md +210 -0
  100. package/data/skills/bio-data-visualization-multipanel-figures/SKILL.md +274 -0
  101. package/data/skills/bio-data-visualization-specialized-omics-plots/SKILL.md +251 -0
  102. package/data/skills/bio-data-visualization-upset-plots/SKILL.md +228 -0
  103. package/data/skills/bio-data-visualization-volcano-customization/SKILL.md +233 -0
  104. package/data/skills/bio-de-deseq2-basics/SKILL.md +376 -0
  105. package/data/skills/bio-de-edger-basics/SKILL.md +418 -0
  106. package/data/skills/bio-de-results/SKILL.md +378 -0
  107. package/data/skills/bio-de-visualization/SKILL.md +408 -0
  108. package/data/skills/bio-differential-expression-batch-correction/SKILL.md +253 -0
  109. package/data/skills/bio-differential-expression-timeseries-de/SKILL.md +370 -0
  110. package/data/skills/bio-differential-splicing/SKILL.md +177 -0
  111. package/data/skills/bio-duplicate-handling/SKILL.md +292 -0
  112. package/data/skills/bio-entrez-fetch/SKILL.md +334 -0
  113. package/data/skills/bio-entrez-link/SKILL.md +325 -0
  114. package/data/skills/bio-entrez-search/SKILL.md +311 -0
  115. package/data/skills/bio-epidemiological-genomics-amr-surveillance/SKILL.md +233 -0
  116. package/data/skills/bio-epidemiological-genomics-pathogen-typing/SKILL.md +202 -0
  117. package/data/skills/bio-epidemiological-genomics-phylodynamics/SKILL.md +207 -0
  118. package/data/skills/bio-epidemiological-genomics-transmission-inference/SKILL.md +237 -0
  119. package/data/skills/bio-epidemiological-genomics-variant-surveillance/SKILL.md +237 -0
  120. package/data/skills/bio-epitranscriptomics-m6a-differential/SKILL.md +88 -0
  121. package/data/skills/bio-epitranscriptomics-m6a-peak-calling/SKILL.md +89 -0
  122. package/data/skills/bio-epitranscriptomics-m6anet-analysis/SKILL.md +101 -0
  123. package/data/skills/bio-epitranscriptomics-merip-preprocessing/SKILL.md +81 -0
  124. package/data/skills/bio-epitranscriptomics-modification-visualization/SKILL.md +98 -0
  125. package/data/skills/bio-experimental-design-batch-design/SKILL.md +110 -0
  126. package/data/skills/bio-experimental-design-multiple-testing/SKILL.md +98 -0
  127. package/data/skills/bio-experimental-design-power-analysis/SKILL.md +84 -0
  128. package/data/skills/bio-experimental-design-sample-size/SKILL.md +93 -0
  129. package/data/skills/bio-expression-matrix-counts-ingest/SKILL.md +220 -0
  130. package/data/skills/bio-expression-matrix-gene-id-mapping/SKILL.md +256 -0
  131. package/data/skills/bio-expression-matrix-metadata-joins/SKILL.md +271 -0
  132. package/data/skills/bio-expression-matrix-sparse-handling/SKILL.md +247 -0
  133. package/data/skills/bio-fastq-quality/SKILL.md +279 -0
  134. package/data/skills/bio-filter-sequences/SKILL.md +265 -0
  135. package/data/skills/bio-flow-cytometry-bead-normalization/SKILL.md +315 -0
  136. package/data/skills/bio-flow-cytometry-clustering-phenotyping/SKILL.md +237 -0
  137. package/data/skills/bio-flow-cytometry-compensation-transformation/SKILL.md +196 -0
  138. package/data/skills/bio-flow-cytometry-cytometry-qc/SKILL.md +382 -0
  139. package/data/skills/bio-flow-cytometry-differential-analysis/SKILL.md +217 -0
  140. package/data/skills/bio-flow-cytometry-doublet-detection/SKILL.md +288 -0
  141. package/data/skills/bio-flow-cytometry-fcs-handling/SKILL.md +221 -0
  142. package/data/skills/bio-flow-cytometry-gating-analysis/SKILL.md +193 -0
  143. package/data/skills/bio-format-conversion/SKILL.md +193 -0
  144. package/data/skills/bio-fragment-analysis/SKILL.md +214 -0
  145. package/data/skills/bio-gatk-variant-calling/SKILL.md +422 -0
  146. package/data/skills/bio-genome-assembly-assembly-polishing/SKILL.md +333 -0
  147. package/data/skills/bio-genome-assembly-assembly-qc/SKILL.md +344 -0
  148. package/data/skills/bio-genome-assembly-contamination-detection/SKILL.md +235 -0
  149. package/data/skills/bio-genome-assembly-hifi-assembly/SKILL.md +178 -0
  150. package/data/skills/bio-genome-assembly-long-read-assembly/SKILL.md +307 -0
  151. package/data/skills/bio-genome-assembly-metagenome-assembly/SKILL.md +227 -0
  152. package/data/skills/bio-genome-assembly-scaffolding/SKILL.md +204 -0
  153. package/data/skills/bio-genome-assembly-short-read-assembly/SKILL.md +319 -0
  154. package/data/skills/bio-genome-engineering-base-editing-design/SKILL.md +277 -0
  155. package/data/skills/bio-genome-engineering-grna-design/SKILL.md +221 -0
  156. package/data/skills/bio-genome-engineering-hdr-template-design/SKILL.md +264 -0
  157. package/data/skills/bio-genome-engineering-off-target-prediction/SKILL.md +232 -0
  158. package/data/skills/bio-genome-engineering-prime-editing-design/SKILL.md +275 -0
  159. package/data/skills/bio-genome-intervals-bed-file-basics/SKILL.md +357 -0
  160. package/data/skills/bio-genome-intervals-bigwig-tracks/SKILL.md +351 -0
  161. package/data/skills/bio-genome-intervals-coverage-analysis/SKILL.md +300 -0
  162. package/data/skills/bio-genome-intervals-gtf-gff-handling/SKILL.md +345 -0
  163. package/data/skills/bio-genome-intervals-interval-arithmetic/SKILL.md +485 -0
  164. package/data/skills/bio-genome-intervals-proximity-operations/SKILL.md +337 -0
  165. package/data/skills/bio-geo-data/SKILL.md +380 -0
  166. package/data/skills/bio-hi-c-analysis-compartment-analysis/SKILL.md +261 -0
  167. package/data/skills/bio-hi-c-analysis-contact-pairs/SKILL.md +278 -0
  168. package/data/skills/bio-hi-c-analysis-hic-data-io/SKILL.md +260 -0
  169. package/data/skills/bio-hi-c-analysis-hic-differential/SKILL.md +328 -0
  170. package/data/skills/bio-hi-c-analysis-hic-visualization/SKILL.md +297 -0
  171. package/data/skills/bio-hi-c-analysis-loop-calling/SKILL.md +284 -0
  172. package/data/skills/bio-hi-c-analysis-matrix-operations/SKILL.md +274 -0
  173. package/data/skills/bio-hi-c-analysis-tad-detection/SKILL.md +239 -0
  174. package/data/skills/bio-imaging-mass-cytometry-cell-segmentation/SKILL.md +241 -0
  175. package/data/skills/bio-imaging-mass-cytometry-data-preprocessing/SKILL.md +279 -0
  176. package/data/skills/bio-imaging-mass-cytometry-interactive-annotation/SKILL.md +304 -0
  177. package/data/skills/bio-imaging-mass-cytometry-phenotyping/SKILL.md +231 -0
  178. package/data/skills/bio-imaging-mass-cytometry-quality-metrics/SKILL.md +316 -0
  179. package/data/skills/bio-imaging-mass-cytometry-spatial-analysis/SKILL.md +246 -0
  180. package/data/skills/bio-immunoinformatics-epitope-prediction/SKILL.md +259 -0
  181. package/data/skills/bio-immunoinformatics-immunogenicity-scoring/SKILL.md +275 -0
  182. package/data/skills/bio-immunoinformatics-mhc-binding-prediction/SKILL.md +260 -0
  183. package/data/skills/bio-immunoinformatics-neoantigen-prediction/SKILL.md +277 -0
  184. package/data/skills/bio-immunoinformatics-tcr-epitope-binding/SKILL.md +257 -0
  185. package/data/skills/bio-isoform-switching/SKILL.md +192 -0
  186. package/data/skills/bio-liquid-biopsy-pipeline/SKILL.md +311 -0
  187. package/data/skills/bio-local-blast/SKILL.md +350 -0
  188. package/data/skills/bio-long-read-sequencing-clair3-variants/SKILL.md +252 -0
  189. package/data/skills/bio-long-read-sequencing-isoseq-analysis/SKILL.md +334 -0
  190. package/data/skills/bio-long-read-sequencing-nanopore-methylation/SKILL.md +110 -0
  191. package/data/skills/bio-longitudinal-monitoring/SKILL.md +271 -0
  192. package/data/skills/bio-longread-alignment/SKILL.md +193 -0
  193. package/data/skills/bio-longread-medaka/SKILL.md +176 -0
  194. package/data/skills/bio-longread-qc/SKILL.md +224 -0
  195. package/data/skills/bio-longread-structural-variants/SKILL.md +201 -0
  196. package/data/skills/bio-machine-learning-atlas-mapping/SKILL.md +139 -0
  197. package/data/skills/bio-machine-learning-biomarker-discovery/SKILL.md +157 -0
  198. package/data/skills/bio-machine-learning-model-validation/SKILL.md +148 -0
  199. package/data/skills/bio-machine-learning-omics-classifiers/SKILL.md +146 -0
  200. package/data/skills/bio-machine-learning-prediction-explanation/SKILL.md +162 -0
  201. package/data/skills/bio-machine-learning-survival-analysis/SKILL.md +176 -0
  202. package/data/skills/bio-metabolomics-lipidomics/SKILL.md +265 -0
  203. package/data/skills/bio-metabolomics-metabolite-annotation/SKILL.md +241 -0
  204. package/data/skills/bio-metabolomics-msdial-preprocessing/SKILL.md +308 -0
  205. package/data/skills/bio-metabolomics-normalization-qc/SKILL.md +283 -0
  206. package/data/skills/bio-metabolomics-pathway-mapping/SKILL.md +237 -0
  207. package/data/skills/bio-metabolomics-statistical-analysis/SKILL.md +276 -0
  208. package/data/skills/bio-metabolomics-targeted-analysis/SKILL.md +314 -0
  209. package/data/skills/bio-metabolomics-xcms-preprocessing/SKILL.md +268 -0
  210. package/data/skills/bio-metagenomics-abundance/SKILL.md +203 -0
  211. package/data/skills/bio-metagenomics-amr-detection/SKILL.md +293 -0
  212. package/data/skills/bio-metagenomics-functional-profiling/SKILL.md +252 -0
  213. package/data/skills/bio-metagenomics-kraken/SKILL.md +204 -0
  214. package/data/skills/bio-metagenomics-metaphlan/SKILL.md +214 -0
  215. package/data/skills/bio-metagenomics-strain-tracking/SKILL.md +292 -0
  216. package/data/skills/bio-metagenomics-visualization/SKILL.md +240 -0
  217. package/data/skills/bio-methylation-based-detection/SKILL.md +223 -0
  218. package/data/skills/bio-methylation-bismark-alignment/SKILL.md +195 -0
  219. package/data/skills/bio-methylation-calling/SKILL.md +200 -0
  220. package/data/skills/bio-methylation-dmr-detection/SKILL.md +211 -0
  221. package/data/skills/bio-methylation-methylkit/SKILL.md +219 -0
  222. package/data/skills/bio-microbiome-amplicon-processing/SKILL.md +137 -0
  223. package/data/skills/bio-microbiome-differential-abundance/SKILL.md +147 -0
  224. package/data/skills/bio-microbiome-diversity-analysis/SKILL.md +188 -0
  225. package/data/skills/bio-microbiome-functional-prediction/SKILL.md +153 -0
  226. package/data/skills/bio-microbiome-qiime2-workflow/SKILL.md +219 -0
  227. package/data/skills/bio-microbiome-taxonomy-assignment/SKILL.md +168 -0
  228. package/data/skills/bio-molecular-descriptors/SKILL.md +200 -0
  229. package/data/skills/bio-molecular-io/SKILL.md +188 -0
  230. package/data/skills/bio-motif-search/SKILL.md +354 -0
  231. package/data/skills/bio-multi-omics-data-harmonization/SKILL.md +228 -0
  232. package/data/skills/bio-multi-omics-mixomics-analysis/SKILL.md +221 -0
  233. package/data/skills/bio-multi-omics-mofa-integration/SKILL.md +225 -0
  234. package/data/skills/bio-multi-omics-similarity-network/SKILL.md +235 -0
  235. package/data/skills/bio-orchestrator/SKILL.md +133 -0
  236. package/data/skills/bio-paired-end-fastq/SKILL.md +334 -0
  237. package/data/skills/bio-pathway-enrichment-visualization/SKILL.md +278 -0
  238. package/data/skills/bio-pathway-go-enrichment/SKILL.md +218 -0
  239. package/data/skills/bio-pathway-gsea/SKILL.md +227 -0
  240. package/data/skills/bio-pathway-kegg-pathways/SKILL.md +234 -0
  241. package/data/skills/bio-pathway-reactome/SKILL.md +215 -0
  242. package/data/skills/bio-pathway-wikipathways/SKILL.md +255 -0
  243. package/data/skills/bio-pdb-geometric-analysis/SKILL.md +475 -0
  244. package/data/skills/bio-pdb-structure-io/SKILL.md +296 -0
  245. package/data/skills/bio-pdb-structure-modification/SKILL.md +448 -0
  246. package/data/skills/bio-pdb-structure-navigation/SKILL.md +335 -0
  247. package/data/skills/bio-phasing-imputation-genotype-imputation/SKILL.md +201 -0
  248. package/data/skills/bio-phasing-imputation-haplotype-phasing/SKILL.md +190 -0
  249. package/data/skills/bio-phasing-imputation-imputation-qc/SKILL.md +265 -0
  250. package/data/skills/bio-phasing-imputation-reference-panels/SKILL.md +203 -0
  251. package/data/skills/bio-phylo-distance-calculations/SKILL.md +307 -0
  252. package/data/skills/bio-phylo-modern-tree-inference/SKILL.md +274 -0
  253. package/data/skills/bio-phylo-tree-io/SKILL.md +252 -0
  254. package/data/skills/bio-phylo-tree-manipulation/SKILL.md +375 -0
  255. package/data/skills/bio-phylo-tree-visualization/SKILL.md +275 -0
  256. package/data/skills/bio-pileup-generation/SKILL.md +314 -0
  257. package/data/skills/bio-population-genetics-association-testing/SKILL.md +293 -0
  258. package/data/skills/bio-population-genetics-linkage-disequilibrium/SKILL.md +260 -0
  259. package/data/skills/bio-population-genetics-plink-basics/SKILL.md +338 -0
  260. package/data/skills/bio-population-genetics-population-structure/SKILL.md +352 -0
  261. package/data/skills/bio-population-genetics-scikit-allel-analysis/SKILL.md +306 -0
  262. package/data/skills/bio-population-genetics-selection-statistics/SKILL.md +251 -0
  263. package/data/skills/bio-primer-design-primer-basics/SKILL.md +289 -0
  264. package/data/skills/bio-primer-design-primer-validation/SKILL.md +344 -0
  265. package/data/skills/bio-primer-design-qpcr-primers/SKILL.md +273 -0
  266. package/data/skills/bio-proteomics-data-import/SKILL.md +122 -0
  267. package/data/skills/bio-proteomics-dia-analysis/SKILL.md +246 -0
  268. package/data/skills/bio-proteomics-differential-abundance/SKILL.md +129 -0
  269. package/data/skills/bio-proteomics-peptide-identification/SKILL.md +122 -0
  270. package/data/skills/bio-proteomics-protein-inference/SKILL.md +174 -0
  271. package/data/skills/bio-proteomics-proteomics-qc/SKILL.md +208 -0
  272. package/data/skills/bio-proteomics-ptm-analysis/SKILL.md +139 -0
  273. package/data/skills/bio-proteomics-quantification/SKILL.md +141 -0
  274. package/data/skills/bio-proteomics-spectral-libraries/SKILL.md +270 -0
  275. package/data/skills/bio-reaction-enumeration/SKILL.md +251 -0
  276. package/data/skills/bio-read-alignment-bowtie2-alignment/SKILL.md +189 -0
  277. package/data/skills/bio-read-alignment-bwa-alignment/SKILL.md +166 -0
  278. package/data/skills/bio-read-alignment-hisat2-alignment/SKILL.md +205 -0
  279. package/data/skills/bio-read-alignment-star-alignment/SKILL.md +204 -0
  280. package/data/skills/bio-read-qc-adapter-trimming/SKILL.md +222 -0
  281. package/data/skills/bio-read-qc-contamination-screening/SKILL.md +252 -0
  282. package/data/skills/bio-read-qc-fastp-workflow/SKILL.md +278 -0
  283. package/data/skills/bio-read-qc-quality-filtering/SKILL.md +231 -0
  284. package/data/skills/bio-read-qc-quality-reports/SKILL.md +204 -0
  285. package/data/skills/bio-read-qc-umi-processing/SKILL.md +391 -0
  286. package/data/skills/bio-read-sequences/SKILL.md +319 -0
  287. package/data/skills/bio-reference-operations/SKILL.md +302 -0
  288. package/data/skills/bio-reporting-automated-qc-reports/SKILL.md +103 -0
  289. package/data/skills/bio-reporting-figure-export/SKILL.md +112 -0
  290. package/data/skills/bio-reporting-jupyter-reports/SKILL.md +98 -0
  291. package/data/skills/bio-reporting-quarto-reports/SKILL.md +295 -0
  292. package/data/skills/bio-reporting-rmarkdown-reports/SKILL.md +276 -0
  293. package/data/skills/bio-research-tools-biomarker-signature-studio/SKILL.md +99 -0
  294. package/data/skills/bio-restriction-enzyme-selection/SKILL.md +342 -0
  295. package/data/skills/bio-restriction-fragment-analysis/SKILL.md +259 -0
  296. package/data/skills/bio-restriction-mapping/SKILL.md +239 -0
  297. package/data/skills/bio-restriction-sites/SKILL.md +222 -0
  298. package/data/skills/bio-reverse-complement/SKILL.md +250 -0
  299. package/data/skills/bio-ribo-seq-orf-detection/SKILL.md +303 -0
  300. package/data/skills/bio-ribo-seq-riboseq-preprocessing/SKILL.md +176 -0
  301. package/data/skills/bio-ribo-seq-ribosome-periodicity/SKILL.md +182 -0
  302. package/data/skills/bio-ribo-seq-ribosome-stalling/SKILL.md +217 -0
  303. package/data/skills/bio-ribo-seq-translation-efficiency/SKILL.md +183 -0
  304. package/data/skills/bio-rna-quantification-alignment-free-quant/SKILL.md +226 -0
  305. package/data/skills/bio-rna-quantification-count-matrix-qc/SKILL.md +310 -0
  306. package/data/skills/bio-rna-quantification-featurecounts-counting/SKILL.md +190 -0
  307. package/data/skills/bio-rna-quantification-tximport-workflow/SKILL.md +240 -0
  308. package/data/skills/bio-rnaseq-qc/SKILL.md +320 -0
  309. package/data/skills/bio-sam-bam-basics/SKILL.md +248 -0
  310. package/data/skills/bio-sashimi-plots/SKILL.md +175 -0
  311. package/data/skills/bio-seq-objects/SKILL.md +240 -0
  312. package/data/skills/bio-sequence-properties/SKILL.md +397 -0
  313. package/data/skills/bio-sequence-similarity/SKILL.md +335 -0
  314. package/data/skills/bio-sequence-slicing/SKILL.md +232 -0
  315. package/data/skills/bio-sequence-statistics/SKILL.md +318 -0
  316. package/data/skills/bio-similarity-searching/SKILL.md +200 -0
  317. package/data/skills/bio-single-cell-batch-integration/SKILL.md +317 -0
  318. package/data/skills/bio-single-cell-cell-annotation/SKILL.md +259 -0
  319. package/data/skills/bio-single-cell-cell-communication/SKILL.md +257 -0
  320. package/data/skills/bio-single-cell-clustering/SKILL.md +330 -0
  321. package/data/skills/bio-single-cell-data-io/SKILL.md +315 -0
  322. package/data/skills/bio-single-cell-doublet-detection/SKILL.md +362 -0
  323. package/data/skills/bio-single-cell-lineage-tracing/SKILL.md +319 -0
  324. package/data/skills/bio-single-cell-markers-annotation/SKILL.md +317 -0
  325. package/data/skills/bio-single-cell-metabolite-communication/SKILL.md +258 -0
  326. package/data/skills/bio-single-cell-multimodal-integration/SKILL.md +242 -0
  327. package/data/skills/bio-single-cell-perturb-seq/SKILL.md +258 -0
  328. package/data/skills/bio-single-cell-preprocessing/SKILL.md +338 -0
  329. package/data/skills/bio-single-cell-scatac-analysis/SKILL.md +326 -0
  330. package/data/skills/bio-single-cell-splicing/SKILL.md +199 -0
  331. package/data/skills/bio-single-cell-trajectory-inference/SKILL.md +225 -0
  332. package/data/skills/bio-small-rna-seq-differential-mirna/SKILL.md +194 -0
  333. package/data/skills/bio-small-rna-seq-mirdeep2-analysis/SKILL.md +180 -0
  334. package/data/skills/bio-small-rna-seq-mirge3-analysis/SKILL.md +178 -0
  335. package/data/skills/bio-small-rna-seq-smrna-preprocessing/SKILL.md +174 -0
  336. package/data/skills/bio-small-rna-seq-target-prediction/SKILL.md +202 -0
  337. package/data/skills/bio-spatial-transcriptomics-image-analysis/SKILL.md +283 -0
  338. package/data/skills/bio-spatial-transcriptomics-spatial-communication/SKILL.md +299 -0
  339. package/data/skills/bio-spatial-transcriptomics-spatial-data-io/SKILL.md +272 -0
  340. package/data/skills/bio-spatial-transcriptomics-spatial-deconvolution/SKILL.md +314 -0
  341. package/data/skills/bio-spatial-transcriptomics-spatial-domains/SKILL.md +254 -0
  342. package/data/skills/bio-spatial-transcriptomics-spatial-multiomics/SKILL.md +181 -0
  343. package/data/skills/bio-spatial-transcriptomics-spatial-neighbors/SKILL.md +198 -0
  344. package/data/skills/bio-spatial-transcriptomics-spatial-preprocessing/SKILL.md +269 -0
  345. package/data/skills/bio-spatial-transcriptomics-spatial-proteomics/SKILL.md +124 -0
  346. package/data/skills/bio-spatial-transcriptomics-spatial-statistics/SKILL.md +237 -0
  347. package/data/skills/bio-spatial-transcriptomics-spatial-visualization/SKILL.md +287 -0
  348. package/data/skills/bio-splicing-pipeline/SKILL.md +253 -0
  349. package/data/skills/bio-splicing-qc/SKILL.md +190 -0
  350. package/data/skills/bio-splicing-quantification/SKILL.md +145 -0
  351. package/data/skills/bio-sra-data/SKILL.md +363 -0
  352. package/data/skills/bio-structural-biology-alphafold-predictions/SKILL.md +258 -0
  353. package/data/skills/bio-structural-biology-modern-structure-prediction/SKILL.md +346 -0
  354. package/data/skills/bio-substructure-search/SKILL.md +206 -0
  355. package/data/skills/bio-systems-biology-context-specific-models/SKILL.md +241 -0
  356. package/data/skills/bio-systems-biology-flux-balance-analysis/SKILL.md +206 -0
  357. package/data/skills/bio-systems-biology-gene-essentiality/SKILL.md +235 -0
  358. package/data/skills/bio-systems-biology-metabolic-reconstruction/SKILL.md +215 -0
  359. package/data/skills/bio-systems-biology-model-curation/SKILL.md +243 -0
  360. package/data/skills/bio-tcr-bcr-analysis-immcantation-analysis/SKILL.md +195 -0
  361. package/data/skills/bio-tcr-bcr-analysis-mixcr-analysis/SKILL.md +167 -0
  362. package/data/skills/bio-tcr-bcr-analysis-repertoire-visualization/SKILL.md +224 -0
  363. package/data/skills/bio-tcr-bcr-analysis-scirpy-analysis/SKILL.md +168 -0
  364. package/data/skills/bio-tcr-bcr-analysis-vdjtools-analysis/SKILL.md +188 -0
  365. package/data/skills/bio-transcription-translation/SKILL.md +237 -0
  366. package/data/skills/bio-tumor-fraction-estimation/SKILL.md +211 -0
  367. package/data/skills/bio-uniprot-access/SKILL.md +239 -0
  368. package/data/skills/bio-variant-annotation/SKILL.md +410 -0
  369. package/data/skills/bio-variant-calling/SKILL.md +266 -0
  370. package/data/skills/bio-variant-calling-clinical-interpretation/SKILL.md +355 -0
  371. package/data/skills/bio-variant-calling-deepvariant/SKILL.md +315 -0
  372. package/data/skills/bio-variant-calling-filtering-best-practices/SKILL.md +403 -0
  373. package/data/skills/bio-variant-calling-joint-calling/SKILL.md +338 -0
  374. package/data/skills/bio-variant-calling-structural-variant-calling/SKILL.md +253 -0
  375. package/data/skills/bio-variant-normalization/SKILL.md +325 -0
  376. package/data/skills/bio-vcf-basics/SKILL.md +342 -0
  377. package/data/skills/bio-vcf-manipulation/SKILL.md +429 -0
  378. package/data/skills/bio-vcf-statistics/SKILL.md +445 -0
  379. package/data/skills/bio-virtual-screening/SKILL.md +263 -0
  380. package/data/skills/bio-workflow-management-cwl-workflows/SKILL.md +433 -0
  381. package/data/skills/bio-workflow-management-nextflow-pipelines/SKILL.md +386 -0
  382. package/data/skills/bio-workflow-management-snakemake-workflows/SKILL.md +383 -0
  383. package/data/skills/bio-workflow-management-wdl-workflows/SKILL.md +500 -0
  384. package/data/skills/bio-workflows-atacseq-pipeline/SKILL.md +362 -0
  385. package/data/skills/bio-workflows-biomarker-pipeline/SKILL.md +272 -0
  386. package/data/skills/bio-workflows-chipseq-pipeline/SKILL.md +282 -0
  387. package/data/skills/bio-workflows-clip-pipeline/SKILL.md +268 -0
  388. package/data/skills/bio-workflows-cnv-pipeline/SKILL.md +324 -0
  389. package/data/skills/bio-workflows-crispr-editing-pipeline/SKILL.md +455 -0
  390. package/data/skills/bio-workflows-crispr-screen-pipeline/SKILL.md +278 -0
  391. package/data/skills/bio-workflows-cytometry-pipeline/SKILL.md +328 -0
  392. package/data/skills/bio-workflows-expression-to-pathways/SKILL.md +329 -0
  393. package/data/skills/bio-workflows-fastq-to-variants/SKILL.md +374 -0
  394. package/data/skills/bio-workflows-genome-assembly-pipeline/SKILL.md +290 -0
  395. package/data/skills/bio-workflows-gwas-pipeline/SKILL.md +323 -0
  396. package/data/skills/bio-workflows-hic-pipeline/SKILL.md +304 -0
  397. package/data/skills/bio-workflows-imc-pipeline/SKILL.md +304 -0
  398. package/data/skills/bio-workflows-longread-sv-pipeline/SKILL.md +281 -0
  399. package/data/skills/bio-workflows-merip-pipeline/SKILL.md +222 -0
  400. package/data/skills/bio-workflows-metabolic-modeling-pipeline/SKILL.md +408 -0
  401. package/data/skills/bio-workflows-metabolomics-pipeline/SKILL.md +297 -0
  402. package/data/skills/bio-workflows-metagenomics-pipeline/SKILL.md +283 -0
  403. package/data/skills/bio-workflows-methylation-pipeline/SKILL.md +274 -0
  404. package/data/skills/bio-workflows-microbiome-pipeline/SKILL.md +221 -0
  405. package/data/skills/bio-workflows-multi-omics-pipeline/SKILL.md +362 -0
  406. package/data/skills/bio-workflows-multiome-pipeline/SKILL.md +298 -0
  407. package/data/skills/bio-workflows-neoantigen-pipeline/SKILL.md +325 -0
  408. package/data/skills/bio-workflows-outbreak-pipeline/SKILL.md +341 -0
  409. package/data/skills/bio-workflows-proteomics-pipeline/SKILL.md +226 -0
  410. package/data/skills/bio-workflows-riboseq-pipeline/SKILL.md +94 -0
  411. package/data/skills/bio-workflows-rnaseq-to-de/SKILL.md +345 -0
  412. package/data/skills/bio-workflows-scrnaseq-pipeline/SKILL.md +354 -0
  413. package/data/skills/bio-workflows-smrna-pipeline/SKILL.md +86 -0
  414. package/data/skills/bio-workflows-somatic-variant-pipeline/SKILL.md +313 -0
  415. package/data/skills/bio-workflows-spatial-pipeline/SKILL.md +267 -0
  416. package/data/skills/bio-workflows-tcr-pipeline/SKILL.md +84 -0
  417. package/data/skills/bio-write-sequences/SKILL.md +205 -0
  418. package/data/skills/bioinformatics-singlecell/SKILL.md +143 -0
  419. package/data/skills/biokernel/SKILL.md +61 -0
  420. package/data/skills/biologist-analyst/SKILL.md +799 -0
  421. package/data/skills/biomaster-workflows/SKILL.md +55 -0
  422. package/data/skills/biomcp-server/SKILL.md +65 -0
  423. package/data/skills/biomedical-data-analysis/SKILL.md +56 -0
  424. package/data/skills/biomedical-search/SKILL.md +214 -0
  425. package/data/skills/biomni/SKILL.md +309 -0
  426. package/data/skills/biomni-general-agent/SKILL.md +43 -0
  427. package/data/skills/biomni-research-agent/SKILL.md +76 -0
  428. package/data/skills/biopython/SKILL.md +437 -0
  429. package/data/skills/biorxiv-database/SKILL.md +477 -0
  430. package/data/skills/bioservices/SKILL.md +355 -0
  431. package/data/skills/boltz/SKILL.md +188 -0
  432. package/data/skills/boltzgen/SKILL.md +287 -0
  433. package/data/skills/bone-marrow-ai-agent/SKILL.md +163 -0
  434. package/data/skills/brainstorming/SKILL.md +96 -0
  435. package/data/skills/brenda-database/SKILL.md +714 -0
  436. package/data/skills/bulk-combat-correction/SKILL.md +54 -0
  437. package/data/skills/bulk-deg-analysis/SKILL.md +61 -0
  438. package/data/skills/bulk-deseq2-analysis/SKILL.md +50 -0
  439. package/data/skills/bulk-stringdb-ppi/SKILL.md +49 -0
  440. package/data/skills/bulk-to-single-deconvolution/SKILL.md +50 -0
  441. package/data/skills/bulk-trajblend-interpolation/SKILL.md +52 -0
  442. package/data/skills/bulk-wgcna-analysis/SKILL.md +56 -0
  443. package/data/skills/cancer-metabolism-agent/SKILL.md +180 -0
  444. package/data/skills/care-coordination/SKILL.md +35 -0
  445. package/data/skills/cart-design-optimizer-agent/SKILL.md +162 -0
  446. package/data/skills/cbioportal-database/SKILL.md +367 -0
  447. package/data/skills/cell-free-expression/SKILL.md +291 -0
  448. package/data/skills/cellagent-annotation/SKILL.md +69 -0
  449. package/data/skills/cellfree-rna-agent/SKILL.md +182 -0
  450. package/data/skills/cellular-senescence-agent/SKILL.md +183 -0
  451. package/data/skills/cellxgene-census/SKILL.md +505 -0
  452. package/data/skills/chai/SKILL.md +272 -0
  453. package/data/skills/chatehr-clinician-assistant/SKILL.md +67 -0
  454. package/data/skills/chematagent-drug-discovery/SKILL.md +68 -0
  455. package/data/skills/chembl-database/SKILL.md +383 -0
  456. package/data/skills/chembl-search/SKILL.md +211 -0
  457. package/data/skills/chemcrow-drug-discovery/SKILL.md +61 -0
  458. package/data/skills/chemical-property-lookup/SKILL.md +42 -0
  459. package/data/skills/chemist-analyst/SKILL.md +1603 -0
  460. package/data/skills/chemistry-agent/SKILL.md +62 -0
  461. package/data/skills/chip-clonal-hematopoiesis-agent/SKILL.md +224 -0
  462. package/data/skills/chromosomal-instability-agent/SKILL.md +187 -0
  463. package/data/skills/citation-management/SKILL.md +1081 -0
  464. package/data/skills/claims-appeals/SKILL.md +35 -0
  465. package/data/skills/claw-ancestry-pca/SKILL.md +145 -0
  466. package/data/skills/claw-metagenomics/SKILL.md +238 -0
  467. package/data/skills/claw-semantic-sim/SKILL.md +151 -0
  468. package/data/skills/clinical-decision-support/SKILL.md +504 -0
  469. package/data/skills/clinical-diagnostic-reasoning/SKILL.md +222 -0
  470. package/data/skills/clinical-nlp-extractor/SKILL.md +59 -0
  471. package/data/skills/clinical-note-summarization/SKILL.md +52 -0
  472. package/data/skills/clinical-reports/SKILL.md +1127 -0
  473. package/data/skills/clinical-trial-protocol-skill/SKILL.md +508 -0
  474. package/data/skills/clinical-trials-search/SKILL.md +211 -0
  475. package/data/skills/clinicaltrials-database/SKILL.md +501 -0
  476. package/data/skills/clinpgx/SKILL.md +96 -0
  477. package/data/skills/clinpgx-database/SKILL.md +632 -0
  478. package/data/skills/clinvar-database/SKILL.md +356 -0
  479. package/data/skills/cnv-caller-agent/SKILL.md +171 -0
  480. package/data/skills/coagulation-thrombosis-agent/SKILL.md +141 -0
  481. package/data/skills/cobrapy/SKILL.md +457 -0
  482. package/data/skills/compbioagent-explorer/SKILL.md +67 -0
  483. package/data/skills/computational-pathology-agent/SKILL.md +72 -0
  484. package/data/skills/convergence-study/SKILL.md +98 -0
  485. package/data/skills/cosmic-database/SKILL.md +330 -0
  486. package/data/skills/crisis-detection-intervention-ai/SKILL.md +569 -0
  487. package/data/skills/crisis-response-protocol/SKILL.md +456 -0
  488. package/data/skills/crispr-guide-design/SKILL.md +72 -0
  489. package/data/skills/crispr-offtarget-predictor/SKILL.md +56 -0
  490. package/data/skills/cryoem-ai-drug-design-agent/SKILL.md +216 -0
  491. package/data/skills/ctdna-dynamics-mrd-agent/SKILL.md +206 -0
  492. package/data/skills/cytokine-storm-analysis-agent/SKILL.md +180 -0
  493. package/data/skills/dask/SKILL.md +454 -0
  494. package/data/skills/data-stats-analysis/SKILL.md +477 -0
  495. package/data/skills/data-transform/SKILL.md +576 -0
  496. package/data/skills/data-visualization-biomedical/SKILL.md +252 -0
  497. package/data/skills/data-visualization-expert/SKILL.md +72 -0
  498. package/data/skills/data-viz-plots/SKILL.md +461 -0
  499. package/data/skills/datacommons-client/SKILL.md +253 -0
  500. package/data/skills/datamol/SKILL.md +700 -0
  501. package/data/skills/deep-research/SKILL.md +111 -0
  502. package/data/skills/deep-research-swarm/SKILL.md +62 -0
  503. package/data/skills/deep-visual-proteomics-agent/SKILL.md +149 -0
  504. package/data/skills/deepchem/SKILL.md +591 -0
  505. package/data/skills/deeptools/SKILL.md +525 -0
  506. package/data/skills/depmap/SKILL.md +300 -0
  507. package/data/skills/diffdock/SKILL.md +477 -0
  508. package/data/skills/differentiation-schemes/SKILL.md +159 -0
  509. package/data/skills/digital-twin-clinical-agent/SKILL.md +228 -0
  510. package/data/skills/dispatching-parallel-agents/SKILL.md +180 -0
  511. package/data/skills/dnanexus-integration/SKILL.md +376 -0
  512. package/data/skills/doc-coauthoring/SKILL.md +375 -0
  513. package/data/skills/docx/SKILL.md +590 -0
  514. package/data/skills/docx-official/SKILL.md +197 -0
  515. package/data/skills/drug-discovery-search/SKILL.md +214 -0
  516. package/data/skills/drug-interaction-checker/SKILL.md +56 -0
  517. package/data/skills/drug-labels-search/SKILL.md +211 -0
  518. package/data/skills/drug-photo/SKILL.md +149 -0
  519. package/data/skills/drugbank-database/SKILL.md +184 -0
  520. package/data/skills/drugbank-search/SKILL.md +211 -0
  521. package/data/skills/ehr-fhir-integration/SKILL.md +60 -0
  522. package/data/skills/emergency-card/SKILL.md +426 -0
  523. package/data/skills/ena-database/SKILL.md +198 -0
  524. package/data/skills/ensembl-database/SKILL.md +305 -0
  525. package/data/skills/epidemiologist-analyst/SKILL.md +1844 -0
  526. package/data/skills/epigenomics-methylgpt-agent/SKILL.md +111 -0
  527. package/data/skills/equity-scorer/SKILL.md +182 -0
  528. package/data/skills/esm/SKILL.md +300 -0
  529. package/data/skills/etetoolkit/SKILL.md +617 -0
  530. package/data/skills/executing-plans/SKILL.md +84 -0
  531. package/data/skills/exosome-ev-analysis-agent/SKILL.md +171 -0
  532. package/data/skills/exploratory-data-analysis/SKILL.md +440 -0
  533. package/data/skills/family-health-analyzer/SKILL.md +137 -0
  534. package/data/skills/fastq-analysis/SKILL.md +191 -0
  535. package/data/skills/fda-database/SKILL.md +512 -0
  536. package/data/skills/fhir-developer-skill/SKILL.md +294 -0
  537. package/data/skills/fhir-development/SKILL.md +35 -0
  538. package/data/skills/find-skills/SKILL.md +133 -0
  539. package/data/skills/finishing-a-development-branch/SKILL.md +200 -0
  540. package/data/skills/fitness-analyzer/SKILL.md +431 -0
  541. package/data/skills/flowio/SKILL.md +602 -0
  542. package/data/skills/foldseek/SKILL.md +179 -0
  543. package/data/skills/galaxy-bridge/SKILL.md +215 -0
  544. package/data/skills/gene-database/SKILL.md +173 -0
  545. package/data/skills/gene-panel-design-agent/SKILL.md +192 -0
  546. package/data/skills/geniml/SKILL.md +312 -0
  547. package/data/skills/genome-compare/SKILL.md +127 -0
  548. package/data/skills/geo-database/SKILL.md +809 -0
  549. package/data/skills/geopandas/SKILL.md +245 -0
  550. package/data/skills/gget/SKILL.md +865 -0
  551. package/data/skills/ginkgo-cloud-lab/SKILL.md +56 -0
  552. package/data/skills/glycoengineering/SKILL.md +338 -0
  553. package/data/skills/gnomad-database/SKILL.md +395 -0
  554. package/data/skills/goal-analyzer/SKILL.md +605 -0
  555. package/data/skills/grief-companion/SKILL.md +250 -0
  556. package/data/skills/gsea-enrichment/SKILL.md +151 -0
  557. package/data/skills/gtars/SKILL.md +279 -0
  558. package/data/skills/gtex-database/SKILL.md +315 -0
  559. package/data/skills/gwas-database/SKILL.md +602 -0
  560. package/data/skills/gwas-lookup/SKILL.md +122 -0
  561. package/data/skills/gwas-prs/SKILL.md +178 -0
  562. package/data/skills/health-trend-analyzer/SKILL.md +451 -0
  563. package/data/skills/hemoglobinopathy-analysis-agent/SKILL.md +167 -0
  564. package/data/skills/hipaa-compliance/SKILL.md +230 -0
  565. package/data/skills/histolab/SKILL.md +672 -0
  566. package/data/skills/hmdb-database/SKILL.md +190 -0
  567. package/data/skills/hrd-analysis-agent/SKILL.md +184 -0
  568. package/data/skills/hrv-alexithymia-expert/SKILL.md +151 -0
  569. package/data/skills/hypogenic/SKILL.md +649 -0
  570. package/data/skills/hypothesis-generation/SKILL.md +286 -0
  571. package/data/skills/imaging-data-commons/SKILL.md +843 -0
  572. package/data/skills/immune-checkpoint-combination-agent/SKILL.md +170 -0
  573. package/data/skills/infographics/SKILL.md +563 -0
  574. package/data/skills/instrument-data-to-allotrope/SKILL.md +280 -0
  575. package/data/skills/interpro-database/SKILL.md +305 -0
  576. package/data/skills/ipsae/SKILL.md +190 -0
  577. package/data/skills/iso-13485-certification/SKILL.md +678 -0
  578. package/data/skills/jaspar-database/SKILL.md +351 -0
  579. package/data/skills/jungian-psychologist/SKILL.md +191 -0
  580. package/data/skills/kegg-database/SKILL.md +371 -0
  581. package/data/skills/knowledge-synthesis/SKILL.md +283 -0
  582. package/data/skills/kragen-knowledge-graph/SKILL.md +68 -0
  583. package/data/skills/lab-results/SKILL.md +35 -0
  584. package/data/skills/labarchive-integration/SKILL.md +262 -0
  585. package/data/skills/labstep/SKILL.md +208 -0
  586. package/data/skills/lamindb/SKILL.md +384 -0
  587. package/data/skills/latchbio-integration/SKILL.md +347 -0
  588. package/data/skills/latex-posters/SKILL.md +1602 -0
  589. package/data/skills/leads-literature-mining/SKILL.md +68 -0
  590. package/data/skills/ligandmpnn/SKILL.md +170 -0
  591. package/data/skills/linear-solvers/SKILL.md +165 -0
  592. package/data/skills/liquid-biopsy-analytics-agent/SKILL.md +171 -0
  593. package/data/skills/lit-synthesizer/SKILL.md +53 -0
  594. package/data/skills/literature-review/SKILL.md +584 -0
  595. package/data/skills/literature-search/SKILL.md +214 -0
  596. package/data/skills/lobster-bioinformatics/SKILL.md +305 -0
  597. package/data/skills/long-read-sequencing-agent/SKILL.md +181 -0
  598. package/data/skills/mage-antibody-generator/SKILL.md +54 -0
  599. package/data/skills/markdown-mermaid-writing/SKILL.md +327 -0
  600. package/data/skills/markitdown/SKILL.md +486 -0
  601. package/data/skills/matchms/SKILL.md +197 -0
  602. package/data/skills/matplotlib/SKILL.md +359 -0
  603. package/data/skills/mcpmed-bioinformatics-server/SKILL.md +42 -0
  604. package/data/skills/medchem/SKILL.md +400 -0
  605. package/data/skills/medea-therapeutic-discovery/SKILL.md +45 -0
  606. package/data/skills/medical-entity-extractor/SKILL.md +144 -0
  607. package/data/skills/medical-imaging-review/SKILL.md +170 -0
  608. package/data/skills/medical-research-toolkit/SKILL.md +273 -0
  609. package/data/skills/medrxiv-search/SKILL.md +211 -0
  610. package/data/skills/mental-health-analyzer/SKILL.md +981 -0
  611. package/data/skills/mesh-generation/SKILL.md +149 -0
  612. package/data/skills/metabolomics-workbench-database/SKILL.md +253 -0
  613. package/data/skills/microbiome-cancer-agent/SKILL.md +180 -0
  614. package/data/skills/modern-drug-rehab-computer/SKILL.md +392 -0
  615. package/data/skills/molecular-dynamics/SKILL.md +457 -0
  616. package/data/skills/molecular-glue-discovery-agent/SKILL.md +224 -0
  617. package/data/skills/molecule-evolution-agent/SKILL.md +62 -0
  618. package/data/skills/molfeat/SKILL.md +505 -0
  619. package/data/skills/monarch-database/SKILL.md +372 -0
  620. package/data/skills/mpn-progression-monitor-agent/SKILL.md +228 -0
  621. package/data/skills/mpn-research-assistant/SKILL.md +197 -0
  622. package/data/skills/mrd-edge-detection-agent/SKILL.md +213 -0
  623. package/data/skills/multi-ancestry-prs-agent/SKILL.md +224 -0
  624. package/data/skills/multi-search-engine/SKILL.md +110 -0
  625. package/data/skills/multimodal-medical-imaging/SKILL.md +59 -0
  626. package/data/skills/multimodal-radpath-fusion-agent/SKILL.md +213 -0
  627. package/data/skills/myeloma-mrd-agent/SKILL.md +184 -0
  628. package/data/skills/networkx/SKILL.md +435 -0
  629. package/data/skills/neurokit2/SKILL.md +350 -0
  630. package/data/skills/neuropixels-analysis/SKILL.md +344 -0
  631. package/data/skills/nextflow-development/SKILL.md +290 -0
  632. package/data/skills/ngs-analysis/SKILL.md +183 -0
  633. package/data/skills/nicheformer-spatial-agent/SKILL.md +197 -0
  634. package/data/skills/nk-cell-therapy-agent/SKILL.md +186 -0
  635. package/data/skills/nonlinear-solvers/SKILL.md +180 -0
  636. package/data/skills/numerical-integration/SKILL.md +166 -0
  637. package/data/skills/numerical-stability/SKILL.md +149 -0
  638. package/data/skills/nutrition-analyzer/SKILL.md +775 -0
  639. package/data/skills/occupational-health-analyzer/SKILL.md +386 -0
  640. package/data/skills/omero-integration/SKILL.md +245 -0
  641. package/data/skills/ontology-explorer/SKILL.md +168 -0
  642. package/data/skills/ontology-mapper/SKILL.md +171 -0
  643. package/data/skills/ontology-validator/SKILL.md +136 -0
  644. package/data/skills/open-notebook/SKILL.md +289 -0
  645. package/data/skills/open-targets-search/SKILL.md +211 -0
  646. package/data/skills/openalex-database/SKILL.md +488 -0
  647. package/data/skills/opentargets-database/SKILL.md +367 -0
  648. package/data/skills/opentrons-integration/SKILL.md +567 -0
  649. package/data/skills/opentrons-protocol-agent/SKILL.md +58 -0
  650. package/data/skills/organoid-drug-response-agent/SKILL.md +189 -0
  651. package/data/skills/pan-cancer-multiomics-agent/SKILL.md +159 -0
  652. package/data/skills/paper-2-web/SKILL.md +495 -0
  653. package/data/skills/parameter-optimization/SKILL.md +141 -0
  654. package/data/skills/patents-search/SKILL.md +211 -0
  655. package/data/skills/pathml/SKILL.md +160 -0
  656. package/data/skills/patiently-ai/SKILL.md +103 -0
  657. package/data/skills/pdb/SKILL.md +217 -0
  658. package/data/skills/pdb-database/SKILL.md +303 -0
  659. package/data/skills/pdf/SKILL.md +314 -0
  660. package/data/skills/pdf-anthropic/SKILL.md +294 -0
  661. package/data/skills/pdf-processing/SKILL.md +149 -0
  662. package/data/skills/pdf-processing-pro/SKILL.md +296 -0
  663. package/data/skills/pdx-model-analysis-agent/SKILL.md +169 -0
  664. package/data/skills/peer-review/SKILL.md +565 -0
  665. package/data/skills/performance-profiling/SKILL.md +255 -0
  666. package/data/skills/perplexity-search/SKILL.md +441 -0
  667. package/data/skills/pharmacogenomics-agent/SKILL.md +143 -0
  668. package/data/skills/pharmgx-reporter/SKILL.md +134 -0
  669. package/data/skills/phylogenetics/SKILL.md +404 -0
  670. package/data/skills/plotly/SKILL.md +265 -0
  671. package/data/skills/polars/SKILL.md +385 -0
  672. package/data/skills/popeve-variant-predictor-agent/SKILL.md +213 -0
  673. package/data/skills/post-processing/SKILL.md +338 -0
  674. package/data/skills/pptx/SKILL.md +232 -0
  675. package/data/skills/pptx-official/SKILL.md +484 -0
  676. package/data/skills/pptx-posters/SKILL.md +414 -0
  677. package/data/skills/precision-oncology-agent/SKILL.md +53 -0
  678. package/data/skills/prior-auth-coworker/SKILL.md +60 -0
  679. package/data/skills/prior-auth-review-skill/SKILL.md +360 -0
  680. package/data/skills/profile-report/SKILL.md +120 -0
  681. package/data/skills/protac-design-agent/SKILL.md +220 -0
  682. package/data/skills/protein-design-workflow/SKILL.md +199 -0
  683. package/data/skills/protein-qc/SKILL.md +300 -0
  684. package/data/skills/protein-structure-prediction/SKILL.md +59 -0
  685. package/data/skills/proteinmpnn/SKILL.md +279 -0
  686. package/data/skills/protocolsio-integration/SKILL.md +415 -0
  687. package/data/skills/prs-net-deep-learning-agent/SKILL.md +232 -0
  688. package/data/skills/psychologist-analyst/SKILL.md +1888 -0
  689. package/data/skills/pubchem-database/SKILL.md +568 -0
  690. package/data/skills/pubmed-database/SKILL.md +454 -0
  691. package/data/skills/pubmed-search/SKILL.md +103 -0
  692. package/data/skills/pydeseq2/SKILL.md +553 -0
  693. package/data/skills/pydicom/SKILL.md +428 -0
  694. package/data/skills/pyhealth/SKILL.md +485 -0
  695. package/data/skills/pylabrobot/SKILL.md +179 -0
  696. package/data/skills/pymc/SKILL.md +566 -0
  697. package/data/skills/pymoo/SKILL.md +565 -0
  698. package/data/skills/pyopenms/SKILL.md +211 -0
  699. package/data/skills/pysam/SKILL.md +259 -0
  700. package/data/skills/pytdc/SKILL.md +454 -0
  701. package/data/skills/pytorch-lightning/SKILL.md +172 -0
  702. package/data/skills/pyzotero/SKILL.md +111 -0
  703. package/data/skills/radgpt-radiology-reporter/SKILL.md +67 -0
  704. package/data/skills/radiomics-pathomics-fusion-agent/SKILL.md +221 -0
  705. package/data/skills/rdkit/SKILL.md +763 -0
  706. package/data/skills/reactome-database/SKILL.md +272 -0
  707. package/data/skills/receiving-code-review/SKILL.md +213 -0
  708. package/data/skills/recovery-community-moderator/SKILL.md +175 -0
  709. package/data/skills/regulatory-drafter/SKILL.md +56 -0
  710. package/data/skills/regulatory-drafting/SKILL.md +35 -0
  711. package/data/skills/rehabilitation-analyzer/SKILL.md +636 -0
  712. package/data/skills/repro-enforcer/SKILL.md +50 -0
  713. package/data/skills/requesting-code-review/SKILL.md +105 -0
  714. package/data/skills/research-grants/SKILL.md +935 -0
  715. package/data/skills/research-literature/SKILL.md +35 -0
  716. package/data/skills/research-lookup/SKILL.md +502 -0
  717. package/data/skills/rfdiffusion/SKILL.md +306 -0
  718. package/data/skills/rna-velocity-agent/SKILL.md +174 -0
  719. package/data/skills/scanpy/SKILL.md +380 -0
  720. package/data/skills/scfoundation-model-agent/SKILL.md +210 -0
  721. package/data/skills/scientific-brainstorming/SKILL.md +185 -0
  722. package/data/skills/scientific-critical-thinking/SKILL.md +566 -0
  723. package/data/skills/scientific-manuscript/SKILL.md +181 -0
  724. package/data/skills/scientific-problem-selection/SKILL.md +269 -0
  725. package/data/skills/scientific-schematics/SKILL.md +619 -0
  726. package/data/skills/scientific-slides/SKILL.md +1154 -0
  727. package/data/skills/scientific-visualization/SKILL.md +773 -0
  728. package/data/skills/scientific-writing/SKILL.md +483 -0
  729. package/data/skills/scikit-bio/SKILL.md +431 -0
  730. package/data/skills/scikit-learn/SKILL.md +515 -0
  731. package/data/skills/scikit-survival/SKILL.md +393 -0
  732. package/data/skills/scrna-orchestrator/SKILL.md +204 -0
  733. package/data/skills/scrna-qc/SKILL.md +43 -0
  734. package/data/skills/scvelo/SKILL.md +321 -0
  735. package/data/skills/scvi-tools/SKILL.md +184 -0
  736. package/data/skills/seaborn/SKILL.md +671 -0
  737. package/data/skills/search-strategy/SKILL.md +247 -0
  738. package/data/skills/seq-wrangler/SKILL.md +58 -0
  739. package/data/skills/shap/SKILL.md +560 -0
  740. package/data/skills/simo-multiomics-integration-agent/SKILL.md +178 -0
  741. package/data/skills/simpy/SKILL.md +423 -0
  742. package/data/skills/simulation-orchestrator/SKILL.md +230 -0
  743. package/data/skills/simulation-validator/SKILL.md +195 -0
  744. package/data/skills/single-annotation/SKILL.md +129 -0
  745. package/data/skills/single-cell-rna-qc/SKILL.md +175 -0
  746. package/data/skills/single-cellphone-db/SKILL.md +68 -0
  747. package/data/skills/single-clustering/SKILL.md +75 -0
  748. package/data/skills/single-downstream-analysis/SKILL.md +150 -0
  749. package/data/skills/single-multiomics/SKILL.md +44 -0
  750. package/data/skills/single-preprocessing/SKILL.md +184 -0
  751. package/data/skills/single-to-spatial-mapping/SKILL.md +48 -0
  752. package/data/skills/single-trajectory/SKILL.md +62 -0
  753. package/data/skills/sleep-analyzer/SKILL.md +773 -0
  754. package/data/skills/slurm-job-script-generator/SKILL.md +135 -0
  755. package/data/skills/solublempnn/SKILL.md +165 -0
  756. package/data/skills/spatial-agent/SKILL.md +56 -0
  757. package/data/skills/spatial-epigenomics-agent/SKILL.md +163 -0
  758. package/data/skills/spatial-transcriptomics-agent/SKILL.md +75 -0
  759. package/data/skills/spatial-transcriptomics-analysis/SKILL.md +72 -0
  760. package/data/skills/spatial-transcriptomics-analysis/STAgent/SKILL.md +75 -0
  761. package/data/skills/spatial-transcriptomics-analysis/SpatialAgent/SKILL.md +56 -0
  762. package/data/skills/spatial-transcriptomics-analysis/bioSkills/image-analysis/SKILL.md +266 -0
  763. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-communication/SKILL.md +287 -0
  764. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-data-io/SKILL.md +243 -0
  765. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-deconvolution/SKILL.md +298 -0
  766. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-domains/SKILL.md +229 -0
  767. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-multiomics/SKILL.md +172 -0
  768. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-neighbors/SKILL.md +189 -0
  769. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-preprocessing/SKILL.md +232 -0
  770. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-proteomics/SKILL.md +127 -0
  771. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-statistics/SKILL.md +225 -0
  772. package/data/skills/spatial-transcriptomics-analysis/bioSkills/spatial-visualization/SKILL.md +270 -0
  773. package/data/skills/spatial-tutorials/SKILL.md +87 -0
  774. package/data/skills/speech-pathology-ai/SKILL.md +184 -0
  775. package/data/skills/statistical-analysis/SKILL.md +626 -0
  776. package/data/skills/statsmodels/SKILL.md +608 -0
  777. package/data/skills/string-database/SKILL.md +528 -0
  778. package/data/skills/struct-predictor/SKILL.md +52 -0
  779. package/data/skills/subagent-driven-development/SKILL.md +242 -0
  780. package/data/skills/systematic-debugging/SKILL.md +296 -0
  781. package/data/skills/tcell-exhaustion-analysis-agent/SKILL.md +139 -0
  782. package/data/skills/tcga-preprocessing/SKILL.md +49 -0
  783. package/data/skills/tcm-constitution-analyzer/SKILL.md +664 -0
  784. package/data/skills/tcr-pmhc-prediction-agent/SKILL.md +226 -0
  785. package/data/skills/tcr-repertoire-analysis-agent/SKILL.md +218 -0
  786. package/data/skills/test-driven-development/SKILL.md +371 -0
  787. package/data/skills/tiledbvcf/SKILL.md +459 -0
  788. package/data/skills/time-resolved-cryoem-agent/SKILL.md +223 -0
  789. package/data/skills/time-stepping/SKILL.md +140 -0
  790. package/data/skills/timesfm-forecasting/SKILL.md +785 -0
  791. package/data/skills/tme-immune-profiling-agent/SKILL.md +220 -0
  792. package/data/skills/tooluniverse-adverse-event-detection/SKILL.md +1115 -0
  793. package/data/skills/tooluniverse-antibody-engineering/SKILL.md +1581 -0
  794. package/data/skills/tooluniverse-binder-discovery/SKILL.md +1459 -0
  795. package/data/skills/tooluniverse-cancer-variant-interpretation/SKILL.md +971 -0
  796. package/data/skills/tooluniverse-chemical-compound-retrieval/SKILL.md +322 -0
  797. package/data/skills/tooluniverse-chemical-safety/SKILL.md +733 -0
  798. package/data/skills/tooluniverse-clinical-guidelines/SKILL.md +399 -0
  799. package/data/skills/tooluniverse-clinical-trial-design/SKILL.md +1195 -0
  800. package/data/skills/tooluniverse-clinical-trial-matching/SKILL.md +1333 -0
  801. package/data/skills/tooluniverse-crispr-screen-analysis/SKILL.md +900 -0
  802. package/data/skills/tooluniverse-disease-research/SKILL.md +630 -0
  803. package/data/skills/tooluniverse-drug-drug-interaction/SKILL.md +73 -0
  804. package/data/skills/tooluniverse-drug-repurposing/SKILL.md +595 -0
  805. package/data/skills/tooluniverse-drug-research/SKILL.md +1642 -0
  806. package/data/skills/tooluniverse-drug-target-validation/SKILL.md +1206 -0
  807. package/data/skills/tooluniverse-epigenomics/SKILL.md +1489 -0
  808. package/data/skills/tooluniverse-expression-data-retrieval/SKILL.md +389 -0
  809. package/data/skills/tooluniverse-gene-enrichment/SKILL.md +402 -0
  810. package/data/skills/tooluniverse-gwas-drug-discovery/SKILL.md +576 -0
  811. package/data/skills/tooluniverse-gwas-finemapping/SKILL.md +309 -0
  812. package/data/skills/tooluniverse-gwas-snp-interpretation/SKILL.md +223 -0
  813. package/data/skills/tooluniverse-gwas-study-explorer/SKILL.md +342 -0
  814. package/data/skills/tooluniverse-gwas-trait-to-gene/SKILL.md +236 -0
  815. package/data/skills/tooluniverse-image-analysis/SKILL.md +439 -0
  816. package/data/skills/tooluniverse-immune-repertoire-analysis/SKILL.md +949 -0
  817. package/data/skills/tooluniverse-immunotherapy-response-prediction/SKILL.md +865 -0
  818. package/data/skills/tooluniverse-infectious-disease/SKILL.md +749 -0
  819. package/data/skills/tooluniverse-literature-deep-research/SKILL.md +1050 -0
  820. package/data/skills/tooluniverse-metabolomics/SKILL.md +298 -0
  821. package/data/skills/tooluniverse-metabolomics-analysis/SKILL.md +764 -0
  822. package/data/skills/tooluniverse-multi-omics-integration/SKILL.md +703 -0
  823. package/data/skills/tooluniverse-multiomic-disease-characterization/SKILL.md +1138 -0
  824. package/data/skills/tooluniverse-network-pharmacology/SKILL.md +1312 -0
  825. package/data/skills/tooluniverse-pharmacovigilance/SKILL.md +807 -0
  826. package/data/skills/tooluniverse-phylogenetics/SKILL.md +461 -0
  827. package/data/skills/tooluniverse-polygenic-risk-score/SKILL.md +397 -0
  828. package/data/skills/tooluniverse-precision-medicine-stratification/SKILL.md +1143 -0
  829. package/data/skills/tooluniverse-precision-oncology/SKILL.md +1091 -0
  830. package/data/skills/tooluniverse-protein-interactions/SKILL.md +446 -0
  831. package/data/skills/tooluniverse-protein-structure-retrieval/SKILL.md +416 -0
  832. package/data/skills/tooluniverse-protein-therapeutic-design/SKILL.md +637 -0
  833. package/data/skills/tooluniverse-proteomics-analysis/SKILL.md +843 -0
  834. package/data/skills/tooluniverse-rare-disease-diagnosis/SKILL.md +1257 -0
  835. package/data/skills/tooluniverse-rnaseq-deseq2/SKILL.md +536 -0
  836. package/data/skills/tooluniverse-sequence-retrieval/SKILL.md +419 -0
  837. package/data/skills/tooluniverse-single-cell/SKILL.md +719 -0
  838. package/data/skills/tooluniverse-spatial-omics-analysis/SKILL.md +1102 -0
  839. package/data/skills/tooluniverse-spatial-transcriptomics/SKILL.md +788 -0
  840. package/data/skills/tooluniverse-statistical-modeling/SKILL.md +557 -0
  841. package/data/skills/tooluniverse-structural-variant-analysis/SKILL.md +1356 -0
  842. package/data/skills/tooluniverse-systems-biology/SKILL.md +374 -0
  843. package/data/skills/tooluniverse-target-research/SKILL.md +1510 -0
  844. package/data/skills/tooluniverse-variant-analysis/SKILL.md +448 -0
  845. package/data/skills/tooluniverse-variant-interpretation/SKILL.md +1118 -0
  846. package/data/skills/torch-geometric/SKILL.md +674 -0
  847. package/data/skills/torch_geometric/SKILL.md +670 -0
  848. package/data/skills/torchdrug/SKILL.md +444 -0
  849. package/data/skills/tpd-ternary-complex-agent/SKILL.md +226 -0
  850. package/data/skills/transformers/SKILL.md +157 -0
  851. package/data/skills/travel-health-analyzer/SKILL.md +421 -0
  852. package/data/skills/treatment-plans/SKILL.md +1576 -0
  853. package/data/skills/trial-eligibility-agent/SKILL.md +54 -0
  854. package/data/skills/trialgpt-matching/SKILL.md +66 -0
  855. package/data/skills/tumor-clonal-evolution-agent/SKILL.md +134 -0
  856. package/data/skills/tumor-heterogeneity-agent/SKILL.md +216 -0
  857. package/data/skills/tumor-mutational-burden-agent/SKILL.md +188 -0
  858. package/data/skills/ukb-navigator/SKILL.md +113 -0
  859. package/data/skills/umap-learn/SKILL.md +473 -0
  860. package/data/skills/uniprot-database/SKILL.md +189 -0
  861. package/data/skills/universal-single-cell-annotator/SKILL.md +72 -0
  862. package/data/skills/using-git-worktrees/SKILL.md +218 -0
  863. package/data/skills/using-superpowers/SKILL.md +95 -0
  864. package/data/skills/usmle/SKILL.md +62 -0
  865. package/data/skills/uspto-database/SKILL.md +597 -0
  866. package/data/skills/vaex/SKILL.md +180 -0
  867. package/data/skills/varcadd-pathogenicity/SKILL.md +68 -0
  868. package/data/skills/variant-interpretation-acmg/SKILL.md +58 -0
  869. package/data/skills/variant-interpretation-acmg/bioSkills/clinical-interpretation/SKILL.md +334 -0
  870. package/data/skills/variant-interpretation-acmg/bioSkills/consensus-sequences/SKILL.md +343 -0
  871. package/data/skills/variant-interpretation-acmg/bioSkills/deepvariant/SKILL.md +279 -0
  872. package/data/skills/variant-interpretation-acmg/bioSkills/filtering-best-practices/SKILL.md +362 -0
  873. package/data/skills/variant-interpretation-acmg/bioSkills/gatk-variant-calling/SKILL.md +398 -0
  874. package/data/skills/variant-interpretation-acmg/bioSkills/joint-calling/SKILL.md +343 -0
  875. package/data/skills/variant-interpretation-acmg/bioSkills/structural-variant-calling/SKILL.md +256 -0
  876. package/data/skills/variant-interpretation-acmg/bioSkills/variant-annotation/SKILL.md +387 -0
  877. package/data/skills/variant-interpretation-acmg/bioSkills/variant-calling/SKILL.md +258 -0
  878. package/data/skills/variant-interpretation-acmg/bioSkills/variant-normalization/SKILL.md +304 -0
  879. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-basics/SKILL.md +329 -0
  880. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-manipulation/SKILL.md +398 -0
  881. package/data/skills/variant-interpretation-acmg/bioSkills/vcf-statistics/SKILL.md +424 -0
  882. package/data/skills/variant-interpretation-acmg/varCADD/SKILL.md +68 -0
  883. package/data/skills/vcf-annotator/SKILL.md +55 -0
  884. package/data/skills/verification-before-completion/SKILL.md +139 -0
  885. package/data/skills/virtual-lab-agent/SKILL.md +240 -0
  886. package/data/skills/wearable-analysis-agent/SKILL.md +70 -0
  887. package/data/skills/weightloss-analyzer/SKILL.md +320 -0
  888. package/data/skills/wellally-tech/SKILL.md +685 -0
  889. package/data/skills/wikipedia-search/SKILL.md +481 -0
  890. package/data/skills/writing-plans/SKILL.md +116 -0
  891. package/data/skills/writing-skills/SKILL.md +655 -0
  892. package/data/skills/xlsx/SKILL.md +292 -0
  893. package/data/skills/xlsx-official/SKILL.md +289 -0
  894. package/data/skills/zarr-python/SKILL.md +777 -0
  895. package/data/skills/zinc-database/SKILL.md +398 -0
  896. package/data/tools/__init__.py +8 -0
  897. package/data/tools/hpc.py +71 -0
  898. package/data/tools/hpc_client/__init__.py +8 -0
  899. package/data/tools/hpc_client/builders/__init__.py +12 -0
  900. package/data/tools/hpc_client/builders/alphafold.py +36 -0
  901. package/data/tools/hpc_client/builders/boltz.py +33 -0
  902. package/data/tools/hpc_client/builders/chai.py +30 -0
  903. package/data/tools/hpc_client/builders/immunebuilder.py +31 -0
  904. package/data/tools/hpc_client/builders/rfantibody.py +58 -0
  905. package/data/tools/hpc_client/builders/thermompnn.py +16 -0
  906. package/data/tools/hpc_client/hpc_api.py +41 -0
  907. package/data/tools/hpc_client/hpc_tools.py +218 -0
  908. package/data/tools/hpc_dynamic.py +71 -0
  909. package/data/tools/integrations/__init__.py +14 -0
  910. package/data/tools/integrations/adaptyv.py +107 -0
  911. package/data/tools/integrations/addgene.py +52 -0
  912. package/data/tools/integrations/api_internal.py +33 -0
  913. package/data/tools/molecular_biology.py +688 -0
  914. package/data/tools/pharmacology.py +67 -0
  915. package/data/workflows/bulk-omics-clustering/SKILL.md +501 -0
  916. package/data/workflows/bulk-omics-clustering/references/best_practices.md +395 -0
  917. package/data/workflows/bulk-omics-clustering/references/clustering_methods_comparison.md +288 -0
  918. package/data/workflows/bulk-omics-clustering/references/common-patterns.md +1136 -0
  919. package/data/workflows/bulk-omics-clustering/references/decision-guide.md +819 -0
  920. package/data/workflows/bulk-omics-clustering/references/distance_metrics_guide.md +388 -0
  921. package/data/workflows/bulk-omics-clustering/references/parameter_guide.md +396 -0
  922. package/data/workflows/bulk-omics-clustering/references/r-quick-start.md +105 -0
  923. package/data/workflows/bulk-omics-clustering/references/validation_metrics_guide.md +315 -0
  924. package/data/workflows/bulk-omics-clustering/scripts/characterize_clusters.py +255 -0
  925. package/data/workflows/bulk-omics-clustering/scripts/cluster_validation.py +449 -0
  926. package/data/workflows/bulk-omics-clustering/scripts/density_clustering.py +321 -0
  927. package/data/workflows/bulk-omics-clustering/scripts/dimensionality_reduction.py +328 -0
  928. package/data/workflows/bulk-omics-clustering/scripts/distance_metrics.py +251 -0
  929. package/data/workflows/bulk-omics-clustering/scripts/export_results.py +456 -0
  930. package/data/workflows/bulk-omics-clustering/scripts/hierarchical_clustering.R +229 -0
  931. package/data/workflows/bulk-omics-clustering/scripts/hierarchical_clustering.py +269 -0
  932. package/data/workflows/bulk-omics-clustering/scripts/kmeans_clustering.py +346 -0
  933. package/data/workflows/bulk-omics-clustering/scripts/load_example_data.R +171 -0
  934. package/data/workflows/bulk-omics-clustering/scripts/load_example_data.py +171 -0
  935. package/data/workflows/bulk-omics-clustering/scripts/model_based_clustering.py +370 -0
  936. package/data/workflows/bulk-omics-clustering/scripts/optimal_clusters.py +381 -0
  937. package/data/workflows/bulk-omics-clustering/scripts/plot_cluster_heatmap.R +141 -0
  938. package/data/workflows/bulk-omics-clustering/scripts/plot_clustering_results.py +452 -0
  939. package/data/workflows/bulk-omics-clustering/scripts/prepare_data.py +250 -0
  940. package/data/workflows/bulk-omics-clustering/scripts/stability_analysis.py +434 -0
  941. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/SKILL.md +505 -0
  942. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/comprehensive-reference.md +440 -0
  943. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/decision-guide.md +327 -0
  944. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/troubleshooting.md +456 -0
  945. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/references/usage-guide.md +75 -0
  946. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/basic_workflow.R +149 -0
  947. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/batch_correction.R +44 -0
  948. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/export_results.R +190 -0
  949. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/extract_results.R +242 -0
  950. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/load_example_data.R +250 -0
  951. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/multi_condition.R +50 -0
  952. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/qc_plots.R +410 -0
  953. package/data/workflows/bulk-rnaseq-counts-to-de-deseq2/scripts/transformations.R +218 -0
  954. package/data/workflows/chip-atlas-diff-analysis/SKILL.md +222 -0
  955. package/data/workflows/chip-atlas-diff-analysis/references/chipatlas_diff_api_format.md +106 -0
  956. package/data/workflows/chip-atlas-diff-analysis/references/diff_analysis_methods.md +89 -0
  957. package/data/workflows/chip-atlas-diff-analysis/references/output_format.md +78 -0
  958. package/data/workflows/chip-atlas-diff-analysis/scripts/__init__.py +1 -0
  959. package/data/workflows/chip-atlas-diff-analysis/scripts/annotate_genes.py +144 -0
  960. package/data/workflows/chip-atlas-diff-analysis/scripts/export_all.py +498 -0
  961. package/data/workflows/chip-atlas-diff-analysis/scripts/filter_regions.py +176 -0
  962. package/data/workflows/chip-atlas-diff-analysis/scripts/generate_all_plots.py +321 -0
  963. package/data/workflows/chip-atlas-diff-analysis/scripts/load_example_data.py +149 -0
  964. package/data/workflows/chip-atlas-diff-analysis/scripts/load_user_data.py +211 -0
  965. package/data/workflows/chip-atlas-diff-analysis/scripts/parse_bed_results.py +240 -0
  966. package/data/workflows/chip-atlas-diff-analysis/scripts/qc_checks.py +621 -0
  967. package/data/workflows/chip-atlas-diff-analysis/scripts/query_chipatlas_api.py +329 -0
  968. package/data/workflows/chip-atlas-diff-analysis/scripts/run_diff_workflow.py +256 -0
  969. package/data/workflows/chip-atlas-peak-enrichment/SKILL.md +212 -0
  970. package/data/workflows/chip-atlas-peak-enrichment/references/chipatlas_metadata_format.md +115 -0
  971. package/data/workflows/chip-atlas-peak-enrichment/references/enrichment_statistics.md +145 -0
  972. package/data/workflows/chip-atlas-peak-enrichment/references/peak_thresholds.md +63 -0
  973. package/data/workflows/chip-atlas-peak-enrichment/references/promoter_definitions.md +69 -0
  974. package/data/workflows/chip-atlas-peak-enrichment/scripts/__init__.py +1 -0
  975. package/data/workflows/chip-atlas-peak-enrichment/scripts/convert_genes_to_regions.py +271 -0
  976. package/data/workflows/chip-atlas-peak-enrichment/scripts/export_all.py +456 -0
  977. package/data/workflows/chip-atlas-peak-enrichment/scripts/filter_experiments.py +116 -0
  978. package/data/workflows/chip-atlas-peak-enrichment/scripts/generate_all_plots.py +280 -0
  979. package/data/workflows/chip-atlas-peak-enrichment/scripts/load_example_data.py +96 -0
  980. package/data/workflows/chip-atlas-peak-enrichment/scripts/load_user_data.py +183 -0
  981. package/data/workflows/chip-atlas-peak-enrichment/scripts/query_chipatlas_api.py +349 -0
  982. package/data/workflows/chip-atlas-peak-enrichment/scripts/run_enrichment_workflow.py +271 -0
  983. package/data/workflows/chip-atlas-target-genes/SKILL.md +230 -0
  984. package/data/workflows/chip-atlas-target-genes/references/macs2_binding_scores.md +89 -0
  985. package/data/workflows/chip-atlas-target-genes/references/string_scores.md +58 -0
  986. package/data/workflows/chip-atlas-target-genes/references/target_genes_data_format.md +73 -0
  987. package/data/workflows/chip-atlas-target-genes/scripts/__init__.py +0 -0
  988. package/data/workflows/chip-atlas-target-genes/scripts/download_target_genes.py +200 -0
  989. package/data/workflows/chip-atlas-target-genes/scripts/export_all.py +340 -0
  990. package/data/workflows/chip-atlas-target-genes/scripts/filter_targets.py +205 -0
  991. package/data/workflows/chip-atlas-target-genes/scripts/generate_all_plots.py +330 -0
  992. package/data/workflows/chip-atlas-target-genes/scripts/load_example_query.py +61 -0
  993. package/data/workflows/chip-atlas-target-genes/scripts/load_user_query.py +47 -0
  994. package/data/workflows/chip-atlas-target-genes/scripts/run_target_genes_workflow.py +141 -0
  995. package/data/workflows/clinicaltrials-landscape/SKILL.md +257 -0
  996. package/data/workflows/clinicaltrials-landscape/references/api-parameters.md +181 -0
  997. package/data/workflows/clinicaltrials-landscape/references/mechanisms.md +141 -0
  998. package/data/workflows/clinicaltrials-landscape/references/output-schema.md +184 -0
  999. package/data/workflows/clinicaltrials-landscape/scripts/__init__.py +1 -0
  1000. package/data/workflows/clinicaltrials-landscape/scripts/classify_mechanisms.py +359 -0
  1001. package/data/workflows/clinicaltrials-landscape/scripts/compile_trials.py +579 -0
  1002. package/data/workflows/clinicaltrials-landscape/scripts/disease_config.py +161 -0
  1003. package/data/workflows/clinicaltrials-landscape/scripts/export_all.py +242 -0
  1004. package/data/workflows/clinicaltrials-landscape/scripts/generate_landscape_plots.py +761 -0
  1005. package/data/workflows/clinicaltrials-landscape/scripts/generate_pdf_report.py +1465 -0
  1006. package/data/workflows/clinicaltrials-landscape/scripts/generate_report.py +1813 -0
  1007. package/data/workflows/clinicaltrials-landscape/scripts/query_clinicaltrials.py +307 -0
  1008. package/data/workflows/coexpression-network/SKILL.md +344 -0
  1009. package/data/workflows/coexpression-network/references/parameter-tuning-guide.md +591 -0
  1010. package/data/workflows/coexpression-network/references/troubleshooting.md +483 -0
  1011. package/data/workflows/coexpression-network/references/wgcna-best-practices.md +563 -0
  1012. package/data/workflows/coexpression-network/references/wgcna-reference.md +538 -0
  1013. package/data/workflows/coexpression-network/scripts/build_network.R +43 -0
  1014. package/data/workflows/coexpression-network/scripts/correlate_modules_traits.R +92 -0
  1015. package/data/workflows/coexpression-network/scripts/export_wgcna_results.R +117 -0
  1016. package/data/workflows/coexpression-network/scripts/identify_hub_genes.R +63 -0
  1017. package/data/workflows/coexpression-network/scripts/load_example_data.R +214 -0
  1018. package/data/workflows/coexpression-network/scripts/module_enrichment.R +159 -0
  1019. package/data/workflows/coexpression-network/scripts/pick_soft_power.R +70 -0
  1020. package/data/workflows/coexpression-network/scripts/plot_all_wgcna.R +104 -0
  1021. package/data/workflows/coexpression-network/scripts/plot_eigengene_heatmap.R +65 -0
  1022. package/data/workflows/coexpression-network/scripts/plot_hub_genes.R +70 -0
  1023. package/data/workflows/coexpression-network/scripts/plot_module_dendrogram.R +50 -0
  1024. package/data/workflows/coexpression-network/scripts/plotting_helpers.R +87 -0
  1025. package/data/workflows/coexpression-network/scripts/prepare_wgcna_data.R +73 -0
  1026. package/data/workflows/coexpression-network/scripts/wgcna_workflow.R +93 -0
  1027. package/data/workflows/experimental-design-statistics/SKILL.md +408 -0
  1028. package/data/workflows/experimental-design-statistics/references/batch_effect_mitigation.md +756 -0
  1029. package/data/workflows/experimental-design-statistics/references/cv_tissue_database.csv +30 -0
  1030. package/data/workflows/experimental-design-statistics/references/experimental_design_best_practices.md +515 -0
  1031. package/data/workflows/experimental-design-statistics/references/multiple_testing_guide.md +730 -0
  1032. package/data/workflows/experimental-design-statistics/references/power_analysis_guidelines.md +635 -0
  1033. package/data/workflows/experimental-design-statistics/references/qc_guidelines.md +310 -0
  1034. package/data/workflows/experimental-design-statistics/references/software_requirements.md +328 -0
  1035. package/data/workflows/experimental-design-statistics/references/troubleshooting_guide.md +510 -0
  1036. package/data/workflows/experimental-design-statistics/scripts/batch_assignment.R +302 -0
  1037. package/data/workflows/experimental-design-statistics/scripts/batch_validation.R +342 -0
  1038. package/data/workflows/experimental-design-statistics/scripts/export_design.R +352 -0
  1039. package/data/workflows/experimental-design-statistics/scripts/load_example_data.R +204 -0
  1040. package/data/workflows/experimental-design-statistics/scripts/multiple_testing.R +417 -0
  1041. package/data/workflows/experimental-design-statistics/scripts/plot_power_curves.R +317 -0
  1042. package/data/workflows/experimental-design-statistics/scripts/power_atacseq.R +229 -0
  1043. package/data/workflows/experimental-design-statistics/scripts/power_pilot_based.R +289 -0
  1044. package/data/workflows/experimental-design-statistics/scripts/power_rnaseq.R +247 -0
  1045. package/data/workflows/experimental-design-statistics/scripts/sample_size_de.R +327 -0
  1046. package/data/workflows/experimental-design-statistics/scripts/sample_size_scrna.R +304 -0
  1047. package/data/workflows/functional-enrichment-from-degs/SKILL.md +387 -0
  1048. package/data/workflows/functional-enrichment-from-degs/references/database_guide.md +354 -0
  1049. package/data/workflows/functional-enrichment-from-degs/references/decision-guide.md +546 -0
  1050. package/data/workflows/functional-enrichment-from-degs/references/gsea_ora_comparison.md +213 -0
  1051. package/data/workflows/functional-enrichment-from-degs/references/gsea_ora_validation_framework.md +483 -0
  1052. package/data/workflows/functional-enrichment-from-degs/references/interpretation_guidelines.md +374 -0
  1053. package/data/workflows/functional-enrichment-from-degs/references/method-reference.md +742 -0
  1054. package/data/workflows/functional-enrichment-from-degs/scripts/export_results.R +190 -0
  1055. package/data/workflows/functional-enrichment-from-degs/scripts/generate_plots.R +240 -0
  1056. package/data/workflows/functional-enrichment-from-degs/scripts/get_msigdb_genesets.R +75 -0
  1057. package/data/workflows/functional-enrichment-from-degs/scripts/load_de_results.R +60 -0
  1058. package/data/workflows/functional-enrichment-from-degs/scripts/load_example_data.R +212 -0
  1059. package/data/workflows/functional-enrichment-from-degs/scripts/prepare_gene_lists.R +92 -0
  1060. package/data/workflows/functional-enrichment-from-degs/scripts/run_gsea.R +44 -0
  1061. package/data/workflows/functional-enrichment-from-degs/scripts/run_ora.R +53 -0
  1062. package/data/workflows/genetic-variant-annotation/SKILL.md +440 -0
  1063. package/data/workflows/genetic-variant-annotation/references/auto_installation_implementation.md +274 -0
  1064. package/data/workflows/genetic-variant-annotation/references/consequence_terms.md +392 -0
  1065. package/data/workflows/genetic-variant-annotation/references/filtering_strategies.md +808 -0
  1066. package/data/workflows/genetic-variant-annotation/references/installation_guide.md +557 -0
  1067. package/data/workflows/genetic-variant-annotation/references/pathogenicity_interpretation.md +473 -0
  1068. package/data/workflows/genetic-variant-annotation/references/qc_guidelines.md +524 -0
  1069. package/data/workflows/genetic-variant-annotation/references/snpeff_best_practices.md +481 -0
  1070. package/data/workflows/genetic-variant-annotation/references/tool_selection_guide.md +433 -0
  1071. package/data/workflows/genetic-variant-annotation/references/troubleshooting_guide.md +678 -0
  1072. package/data/workflows/genetic-variant-annotation/references/vep_best_practices.md +450 -0
  1073. package/data/workflows/genetic-variant-annotation/scripts/annotate_genes.py +243 -0
  1074. package/data/workflows/genetic-variant-annotation/scripts/export_results.py +450 -0
  1075. package/data/workflows/genetic-variant-annotation/scripts/filter_variants.py +365 -0
  1076. package/data/workflows/genetic-variant-annotation/scripts/install_tools.py +246 -0
  1077. package/data/workflows/genetic-variant-annotation/scripts/load_example_data.py +166 -0
  1078. package/data/workflows/genetic-variant-annotation/scripts/parse_snpeff_output.py +283 -0
  1079. package/data/workflows/genetic-variant-annotation/scripts/parse_vep_output.py +257 -0
  1080. package/data/workflows/genetic-variant-annotation/scripts/plot_variant_distribution.py +372 -0
  1081. package/data/workflows/genetic-variant-annotation/scripts/prioritize_variants.py +287 -0
  1082. package/data/workflows/genetic-variant-annotation/scripts/run_snpeff.py +418 -0
  1083. package/data/workflows/genetic-variant-annotation/scripts/run_vep.py +358 -0
  1084. package/data/workflows/genetic-variant-annotation/scripts/select_tool.py +203 -0
  1085. package/data/workflows/genetic-variant-annotation/scripts/test_complete_workflow.py +312 -0
  1086. package/data/workflows/genetic-variant-annotation/scripts/test_pickle_load.py +118 -0
  1087. package/data/workflows/genetic-variant-annotation/scripts/validate_vcf.py +351 -0
  1088. package/data/workflows/genetic-variant-annotation/scripts/verify_changes.py +212 -0
  1089. package/data/workflows/grn-pyscenic/SKILL.md +331 -0
  1090. package/data/workflows/grn-pyscenic/references/cli_interface.md +222 -0
  1091. package/data/workflows/grn-pyscenic/references/database_downloads.md +245 -0
  1092. package/data/workflows/grn-pyscenic/scripts/export_all.py +192 -0
  1093. package/data/workflows/grn-pyscenic/scripts/generate_report.py +512 -0
  1094. package/data/workflows/grn-pyscenic/scripts/integrate_with_adata.py +54 -0
  1095. package/data/workflows/grn-pyscenic/scripts/load_example_data.py +200 -0
  1096. package/data/workflows/grn-pyscenic/scripts/load_expression_data.py +61 -0
  1097. package/data/workflows/grn-pyscenic/scripts/plot_regulon_visualizations.py +263 -0
  1098. package/data/workflows/grn-pyscenic/scripts/run_grn_workflow.py +184 -0
  1099. package/data/workflows/gwas-to-function-twas/SKILL.md +394 -0
  1100. package/data/workflows/gwas-to-function-twas/references/fusion_best_practices.md +120 -0
  1101. package/data/workflows/gwas-to-function-twas/references/installation-guide.md +414 -0
  1102. package/data/workflows/gwas-to-function-twas/references/ldsc_qc_guidelines.md +287 -0
  1103. package/data/workflows/gwas-to-function-twas/references/spredixxcan_best_practices.md +166 -0
  1104. package/data/workflows/gwas-to-function-twas/references/therapeutic_interpretation_guide.md +717 -0
  1105. package/data/workflows/gwas-to-function-twas/references/tissue_reference_guide.md +182 -0
  1106. package/data/workflows/gwas-to-function-twas/references/troubleshooting_guide.md +317 -0
  1107. package/data/workflows/gwas-to-function-twas/references/twas_hub_validation_guide.md +88 -0
  1108. package/data/workflows/gwas-to-function-twas/scripts/colocalization_analysis.py +187 -0
  1109. package/data/workflows/gwas-to-function-twas/scripts/druggability_scoring.py +199 -0
  1110. package/data/workflows/gwas-to-function-twas/scripts/export_results.py +220 -0
  1111. package/data/workflows/gwas-to-function-twas/scripts/integrate_variant_annotation.py +194 -0
  1112. package/data/workflows/gwas-to-function-twas/scripts/interpret_therapeutic_direction.py +418 -0
  1113. package/data/workflows/gwas-to-function-twas/scripts/mendelian_randomization.py +749 -0
  1114. package/data/workflows/gwas-to-function-twas/scripts/multilayer_direction_analysis.py +471 -0
  1115. package/data/workflows/gwas-to-function-twas/scripts/plot_twas_results.py +252 -0
  1116. package/data/workflows/gwas-to-function-twas/scripts/run_fusion.py +155 -0
  1117. package/data/workflows/gwas-to-function-twas/scripts/run_smultixcan.py +102 -0
  1118. package/data/workflows/gwas-to-function-twas/scripts/run_spredixxcan.py +138 -0
  1119. package/data/workflows/gwas-to-function-twas/scripts/select_reference_panel.py +253 -0
  1120. package/data/workflows/gwas-to-function-twas/scripts/validate_gwas_sumstats.py +214 -0
  1121. package/data/workflows/gwas-to-function-twas/scripts/validate_with_twas_hub.py +439 -0
  1122. package/data/workflows/lasso-biomarker-panel/SKILL.md +322 -0
  1123. package/data/workflows/lasso-biomarker-panel/references/decision-guide.md +64 -0
  1124. package/data/workflows/lasso-biomarker-panel/references/lasso-reference.md +110 -0
  1125. package/data/workflows/lasso-biomarker-panel/references/validation-guide.md +105 -0
  1126. package/data/workflows/lasso-biomarker-panel/scripts/biological_interpretation.R +1560 -0
  1127. package/data/workflows/lasso-biomarker-panel/scripts/biomarker_plots.R +350 -0
  1128. package/data/workflows/lasso-biomarker-panel/scripts/export_results.R +1492 -0
  1129. package/data/workflows/lasso-biomarker-panel/scripts/lasso_workflow.R +328 -0
  1130. package/data/workflows/lasso-biomarker-panel/scripts/load_example_data.R +1903 -0
  1131. package/data/workflows/lasso-biomarker-panel/scripts/plotting_helpers.R +78 -0
  1132. package/data/workflows/lasso-biomarker-panel/scripts/prepare_features.R +225 -0
  1133. package/data/workflows/lasso-biomarker-panel/scripts/query_cellxgene.py +107 -0
  1134. package/data/workflows/lasso-biomarker-panel/scripts/validate_external.R +174 -0
  1135. package/data/workflows/literature-preclinical/SKILL.md +276 -0
  1136. package/data/workflows/literature-preclinical/assets/eval/simple_test.py +386 -0
  1137. package/data/workflows/literature-preclinical/references/experiment-extraction-guide.md +147 -0
  1138. package/data/workflows/literature-preclinical/references/full-text-enrichment-guide.md +121 -0
  1139. package/data/workflows/literature-preclinical/references/preclinical-search-guide.md +117 -0
  1140. package/data/workflows/literature-preclinical/scripts/extract_experiments.py +401 -0
  1141. package/data/workflows/literature-preclinical/scripts/generate_plots.R +303 -0
  1142. package/data/workflows/literature-preclinical/scripts/narrative_synthesis.py +653 -0
  1143. package/data/workflows/literature-preclinical/scripts/preclinical_search.py +332 -0
  1144. package/data/workflows/literature-preclinical/scripts/preclinical_synthesis.py +237 -0
  1145. package/data/workflows/literature-preclinical/scripts/report_generation.py +326 -0
  1146. package/data/workflows/mendelian-randomization-twosamplemr/SKILL.md +210 -0
  1147. package/data/workflows/mendelian-randomization-twosamplemr/references/interpretation-guide.md +239 -0
  1148. package/data/workflows/mendelian-randomization-twosamplemr/references/method-reference.md +190 -0
  1149. package/data/workflows/mendelian-randomization-twosamplemr/scripts/export_results.R +123 -0
  1150. package/data/workflows/mendelian-randomization-twosamplemr/scripts/generate_report.R +411 -0
  1151. package/data/workflows/mendelian-randomization-twosamplemr/scripts/load_data.R +281 -0
  1152. package/data/workflows/mendelian-randomization-twosamplemr/scripts/mr_plots.R +163 -0
  1153. package/data/workflows/mendelian-randomization-twosamplemr/scripts/run_mr_analysis.R +322 -0
  1154. package/data/workflows/pcr-primer-design/SKILL.md +397 -0
  1155. package/data/workflows/pcr-primer-design/references/code_examples.md +594 -0
  1156. package/data/workflows/pcr-primer-design/references/miqe_guidelines.md +453 -0
  1157. package/data/workflows/pcr-primer-design/references/parameter_ranges.md +356 -0
  1158. package/data/workflows/pcr-primer-design/references/primer_design_best_practices.md +451 -0
  1159. package/data/workflows/pcr-primer-design/references/troubleshooting_guide.md +477 -0
  1160. package/data/workflows/pcr-primer-design/scripts/__init__.py +2 -0
  1161. package/data/workflows/pcr-primer-design/scripts/calculate_tm.py +306 -0
  1162. package/data/workflows/pcr-primer-design/scripts/check_dimers.py +298 -0
  1163. package/data/workflows/pcr-primer-design/scripts/check_secondary_structures.py +343 -0
  1164. package/data/workflows/pcr-primer-design/scripts/design_qpcr_primers.py +233 -0
  1165. package/data/workflows/pcr-primer-design/scripts/design_standard_primers.py +197 -0
  1166. package/data/workflows/pcr-primer-design/scripts/design_taqman_probes.py +226 -0
  1167. package/data/workflows/pcr-primer-design/scripts/export_results.py +382 -0
  1168. package/data/workflows/pcr-primer-design/scripts/generate_reports.py +379 -0
  1169. package/data/workflows/pcr-primer-design/scripts/validate_specificity.py +311 -0
  1170. package/data/workflows/pcr-primer-design/scripts/visualize_primers.py +379 -0
  1171. package/data/workflows/polygenic-risk-score-prs-catalog/SKILL.md +195 -0
  1172. package/data/workflows/polygenic-risk-score-prs-catalog/references/interpretation-guide.md +80 -0
  1173. package/data/workflows/polygenic-risk-score-prs-catalog/references/pgs-catalog-guide.md +109 -0
  1174. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/export_results.R +186 -0
  1175. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/generate_plots.R +283 -0
  1176. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/load_pgs_weights.R +228 -0
  1177. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/load_reference_data.R +191 -0
  1178. package/data/workflows/polygenic-risk-score-prs-catalog/scripts/score_traits.R +216 -0
  1179. package/data/workflows/pooled-crispr-screens/SKILL.md +362 -0
  1180. package/data/workflows/pooled-crispr-screens/references/crispr_screen_best_practices.md +349 -0
  1181. package/data/workflows/pooled-crispr-screens/references/qc_guidelines.md +722 -0
  1182. package/data/workflows/pooled-crispr-screens/references/statistical_methods.md +644 -0
  1183. package/data/workflows/pooled-crispr-screens/references/troubleshooting_guide.md +684 -0
  1184. package/data/workflows/pooled-crispr-screens/references/umi_optimization.md +297 -0
  1185. package/data/workflows/pooled-crispr-screens/scripts/concatenate_libraries.py +132 -0
  1186. package/data/workflows/pooled-crispr-screens/scripts/detect_perturbed_cells.py +255 -0
  1187. package/data/workflows/pooled-crispr-screens/scripts/differential_expression.py +202 -0
  1188. package/data/workflows/pooled-crispr-screens/scripts/differential_expression_glmgampoi.py +320 -0
  1189. package/data/workflows/pooled-crispr-screens/scripts/export_results.py +261 -0
  1190. package/data/workflows/pooled-crispr-screens/scripts/expression_filtering.py +159 -0
  1191. package/data/workflows/pooled-crispr-screens/scripts/gene_name_corrections.py +188 -0
  1192. package/data/workflows/pooled-crispr-screens/scripts/generate_report.py +485 -0
  1193. package/data/workflows/pooled-crispr-screens/scripts/load_10x_libraries.py +69 -0
  1194. package/data/workflows/pooled-crispr-screens/scripts/load_example_data.py +257 -0
  1195. package/data/workflows/pooled-crispr-screens/scripts/map_sgrna_to_cells.py +119 -0
  1196. package/data/workflows/pooled-crispr-screens/scripts/normalize_and_scale.py +140 -0
  1197. package/data/workflows/pooled-crispr-screens/scripts/qc_filtering.py +185 -0
  1198. package/data/workflows/pooled-crispr-screens/scripts/run_glmgampoi.R +181 -0
  1199. package/data/workflows/pooled-crispr-screens/scripts/screen_all_perturbations.py +306 -0
  1200. package/data/workflows/pooled-crispr-screens/scripts/validate_perturbations.py +314 -0
  1201. package/data/workflows/pooled-crispr-screens/scripts/visualize_perturbations.py +314 -0
  1202. package/data/workflows/scrnaseq-scanpy-core-analysis/SKILL.md +425 -0
  1203. package/data/workflows/scrnaseq-scanpy-core-analysis/references/ambient_rna_correction.md +422 -0
  1204. package/data/workflows/scrnaseq-scanpy-core-analysis/references/common-patterns.md +533 -0
  1205. package/data/workflows/scrnaseq-scanpy-core-analysis/references/integration_methods.md +820 -0
  1206. package/data/workflows/scrnaseq-scanpy-core-analysis/references/marker_gene_database.md +471 -0
  1207. package/data/workflows/scrnaseq-scanpy-core-analysis/references/pseudobulk_de_guide.md +408 -0
  1208. package/data/workflows/scrnaseq-scanpy-core-analysis/references/qc_guidelines.md +535 -0
  1209. package/data/workflows/scrnaseq-scanpy-core-analysis/references/scanpy_best_practices.md +496 -0
  1210. package/data/workflows/scrnaseq-scanpy-core-analysis/references/troubleshooting_guide.md +668 -0
  1211. package/data/workflows/scrnaseq-scanpy-core-analysis/references/workflow-details.md +727 -0
  1212. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/annotate_celltypes.py +431 -0
  1213. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/cluster_cells.py +293 -0
  1214. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/export_results.py +423 -0
  1215. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/filter_cells.py +531 -0
  1216. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/find_markers.py +391 -0
  1217. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/find_variable_genes.py +222 -0
  1218. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/integrate_scvi.py +665 -0
  1219. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/integration_diagnostics.py +678 -0
  1220. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/load_example_data.py +68 -0
  1221. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/normalize_data.py +325 -0
  1222. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/plot_dimreduction.py +389 -0
  1223. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/plot_qc.py +320 -0
  1224. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/pseudobulk_de.py +553 -0
  1225. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/qc_metrics.py +477 -0
  1226. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/remove_ambient_rna.py +347 -0
  1227. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/run_umap.py +188 -0
  1228. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/scale_and_pca.py +365 -0
  1229. package/data/workflows/scrnaseq-scanpy-core-analysis/scripts/setup_and_import.py +334 -0
  1230. package/data/workflows/scrnaseq-seurat-core-analysis/SKILL.md +585 -0
  1231. package/data/workflows/scrnaseq-seurat-core-analysis/references/ambient_rna_correction.md +422 -0
  1232. package/data/workflows/scrnaseq-seurat-core-analysis/references/common-patterns.md +667 -0
  1233. package/data/workflows/scrnaseq-seurat-core-analysis/references/decision-guide.md +456 -0
  1234. package/data/workflows/scrnaseq-seurat-core-analysis/references/integration_methods.md +864 -0
  1235. package/data/workflows/scrnaseq-seurat-core-analysis/references/marker_gene_database.md +471 -0
  1236. package/data/workflows/scrnaseq-seurat-core-analysis/references/pseudobulk_de_guide.md +408 -0
  1237. package/data/workflows/scrnaseq-seurat-core-analysis/references/qc_guidelines.md +452 -0
  1238. package/data/workflows/scrnaseq-seurat-core-analysis/references/seurat_best_practices.md +417 -0
  1239. package/data/workflows/scrnaseq-seurat-core-analysis/references/troubleshooting_guide.md +566 -0
  1240. package/data/workflows/scrnaseq-seurat-core-analysis/references/workflow-details.md +801 -0
  1241. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/annotate_celltypes.R +306 -0
  1242. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/cluster_cells.R +223 -0
  1243. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/export_results.R +292 -0
  1244. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/filter_cells.R +576 -0
  1245. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/find_markers.R +325 -0
  1246. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/find_variable_features.R +106 -0
  1247. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/integrate_batches.R +504 -0
  1248. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/integration_diagnostics.R +596 -0
  1249. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/load_example_data.R +89 -0
  1250. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/normalize_data.R +184 -0
  1251. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/plot_dimreduction.R +273 -0
  1252. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/plot_qc.R +250 -0
  1253. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/pseudobulk_de.R +324 -0
  1254. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/qc_metrics.R +358 -0
  1255. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/remove_ambient_rna.R +281 -0
  1256. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/run_umap.R +116 -0
  1257. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/scale_and_pca.R +243 -0
  1258. package/data/workflows/scrnaseq-seurat-core-analysis/scripts/setup_and_import.R +193 -0
  1259. package/data/workflows/spatial-transcriptomics/SKILL.md +256 -0
  1260. package/data/workflows/spatial-transcriptomics/references/spatial-analysis-guide.md +216 -0
  1261. package/data/workflows/spatial-transcriptomics/scripts/export_results.py +214 -0
  1262. package/data/workflows/spatial-transcriptomics/scripts/generate_all_plots.py +397 -0
  1263. package/data/workflows/spatial-transcriptomics/scripts/load_example_data.py +175 -0
  1264. package/data/workflows/spatial-transcriptomics/scripts/spatial_workflow.py +206 -0
  1265. package/dist/bgi.js +28 -1
  1266. package/package.json +2 -1
@@ -0,0 +1,1813 @@
1
+ """
2
+ Generate comprehensive markdown landscape report for clinical trials.
3
+
4
+ Config-driven: loads disease-specific settings (mechanism descriptions,
5
+ highlight sections, indication categories) from a YAML config via
6
+ disease_config.py. Falls back to generic behavior when no config is provided.
7
+
8
+ Creates a detailed competitive intelligence report with:
9
+ - Executive summary with strategic highlights
10
+ - Mechanism deep-dives with drug pipeline tables
11
+ - Geographic landscape analysis
12
+ - Study design breakdown
13
+ - Phase transition funnel
14
+ - Combination therapy landscape
15
+ - Late-stage pipeline analysis (Phase 3 trials)
16
+ - Phase 3 endpoint comparison
17
+ - Upcoming readouts / expected completion dates
18
+ - Company competitive positioning and portfolio analysis
19
+ - Indication breakdown (config-driven or data-driven)
20
+ - Patient population analysis
21
+ - Trial arms & comparator analysis
22
+ - Collaborator network
23
+ - Regulatory signals
24
+ - Enrollment and investment signals (clean)
25
+ - Biosimilar threat assessment
26
+ - Whitespace / unmet needs analysis
27
+ """
28
+
29
+ import re
30
+ import numpy as np
31
+ import pandas as pd
32
+ from datetime import datetime, timedelta
33
+
34
+ try:
35
+ from disease_config import (
36
+ get_mechanism_descriptions, get_disease_name, get_disease_short,
37
+ get_highlight_mechanisms, get_highlight_sponsors,
38
+ get_executive_highlights, get_indication_categories,
39
+ )
40
+ except ImportError:
41
+ from scripts.disease_config import (
42
+ get_mechanism_descriptions, get_disease_name, get_disease_short,
43
+ get_highlight_mechanisms, get_highlight_sponsors,
44
+ get_executive_highlights, get_indication_categories,
45
+ )
46
+
47
+
48
+ # ============================================================
49
+ # SAFE COLUMN ACCESS HELPER
50
+ # ============================================================
51
+
52
+ def _safe_col(df, col, default=None):
53
+ """
54
+ Safely access a DataFrame column, returning a Series of `default` if missing.
55
+
56
+ This allows backward-compatibility when generating a report from older compiled
57
+ DataFrames that may not contain Phase-2B columns (geographic, study-design, etc.).
58
+
59
+ Parameters
60
+ ----------
61
+ df : pd.DataFrame
62
+ col : str
63
+ Column name to look up.
64
+ default : scalar or None
65
+ Value to fill if column is absent. None -> pd.NA.
66
+
67
+ Returns
68
+ -------
69
+ pd.Series
70
+ """
71
+ if col in df.columns:
72
+ return df[col]
73
+ return pd.Series(default, index=df.index, name=col)
74
+
75
+
76
+ def generate_report(trials_df, parameters=None, output_file="landscape_report.md", config=None):
77
+ """
78
+ Generate comprehensive markdown landscape report.
79
+
80
+ Parameters
81
+ ----------
82
+ trials_df : pd.DataFrame
83
+ Compiled trials from compile_trials().
84
+ parameters : dict, optional
85
+ Query parameters and metadata.
86
+ output_file : str
87
+ Output file path.
88
+ config : dict, optional
89
+ Disease configuration loaded via load_disease_config().
90
+ When provided, disease-specific names, descriptions, highlight
91
+ sections, and indication categories are drawn from config.
92
+ When None, generic/data-driven fallbacks are used.
93
+
94
+ Returns
95
+ -------
96
+ str
97
+ The generated report as a string.
98
+ """
99
+ if parameters is None:
100
+ parameters = {}
101
+
102
+ # ------------------------------------------------------------------
103
+ # Load config-driven data (all functions handle config=None gracefully)
104
+ # ------------------------------------------------------------------
105
+ disease_name = get_disease_name(config, default="Clinical Trial")
106
+ disease_short = get_disease_short(config, default="")
107
+ mechanism_descriptions = get_mechanism_descriptions(config)
108
+ highlight_mechanisms = get_highlight_mechanisms(config)
109
+ highlight_sponsors = get_highlight_sponsors(config)
110
+ exec_highlights = get_executive_highlights(config)
111
+ indication_cats = get_indication_categories(config)
112
+
113
+ sections = []
114
+ now = datetime.now()
115
+
116
+ # ==========================================
117
+ # TITLE
118
+ # ==========================================
119
+ sections.append(f"# {disease_name} Clinical Trial Landscape Report\n")
120
+ sections.append(f"*Competitive Intelligence Report \u2014 Generated: {now.strftime('%Y-%m-%d %H:%M')}*\n")
121
+ sections.append("---\n")
122
+
123
+ # ==========================================
124
+ # TABLE OF CONTENTS (dynamic based on config highlight sections)
125
+ # ==========================================
126
+ sections.append("## Table of Contents\n")
127
+
128
+ # Fixed sections 1-11
129
+ toc_entries = [
130
+ "Executive Summary",
131
+ "Mechanism \u00d7 Phase Overview",
132
+ "Mechanism Deep Dives",
133
+ "Geographic Landscape",
134
+ "Study Design Analysis",
135
+ "Phase Transition Funnel",
136
+ "Late-Stage Pipeline (Phase 3)",
137
+ "Phase 3 Endpoint Comparison",
138
+ "Combination Therapy Landscape",
139
+ "Upcoming Readouts",
140
+ "Drug-Level Pipeline",
141
+ ]
142
+ # Dynamic highlight sections from config (mechanisms then sponsors)
143
+ for hl in highlight_mechanisms:
144
+ toc_entries.append(hl["section_title"])
145
+ for hl in highlight_sponsors:
146
+ sponsor_title = hl["section_title"]
147
+ if disease_short and disease_short not in sponsor_title:
148
+ toc_entries.append(f"{sponsor_title}")
149
+ else:
150
+ toc_entries.append(sponsor_title)
151
+ # Remaining fixed sections
152
+ toc_entries.extend([
153
+ "Sponsor Competitive Landscape",
154
+ "Indication Breakdown",
155
+ "Patient Population Analysis",
156
+ "Trial Arms & Comparator Analysis",
157
+ "Enrollment & Investment Signals",
158
+ "Biosimilar Landscape",
159
+ "Whitespace & Unmet Needs",
160
+ "Collaborator Network",
161
+ "Regulatory Signals",
162
+ "Data Notes",
163
+ ])
164
+ for i, title in enumerate(toc_entries, 1):
165
+ anchor = re.sub(r'[^a-z0-9\s-]', '', title.lower()).strip().replace(' ', '-')
166
+ anchor = re.sub(r'-+', '-', anchor)
167
+ sections.append(f"{i}. [{title}](#{anchor})")
168
+ sections.append("")
169
+
170
+ # ==========================================
171
+ # 1. EXECUTIVE SUMMARY
172
+ # ==========================================
173
+ sections.append("---\n")
174
+ sections.append("## Executive Summary\n")
175
+
176
+ n_total = len(trials_df)
177
+ n_mechanisms = trials_df["mechanism"].nunique()
178
+ n_sponsors = trials_df["sponsor_normalized"].nunique()
179
+ n_industry = int(trials_df["is_industry"].sum())
180
+ n_academic = n_total - n_industry
181
+
182
+ # Pharmacological mechanism counts (exclude non-pharma and unclassified)
183
+ non_pharma = ["Non-pharmacological", "Unclassified", "Other Biologic", "Small Molecule (Other)"]
184
+ pharma_df = trials_df[~trials_df["mechanism"].isin(non_pharma)]
185
+ n_pharma_mechanisms = pharma_df["mechanism"].nunique()
186
+
187
+ top_mech = trials_df["mechanism"].value_counts()
188
+ pharma_mechs = top_mech[~top_mech.index.isin(non_pharma)]
189
+ top_sponsor = trials_df["sponsor_normalized"].value_counts()
190
+
191
+ # Phase counts
192
+ phase3_count = len(trials_df[trials_df["phase_normalized"] == "Phase 3"])
193
+ phase2_count = len(trials_df[trials_df["phase_normalized"].isin(["Phase 2", "Phase 2/3"])])
194
+ phase1_count = len(trials_df[trials_df["phase_normalized"].isin(["Phase 1", "Phase 1/2"])])
195
+
196
+ # Recruiting stats
197
+ n_recruiting = len(trials_df[trials_df["overall_status"] == "RECRUITING"])
198
+ n_not_yet = len(trials_df[trials_df["overall_status"] == "NOT_YET_RECRUITING"])
199
+
200
+ sections.append("### Key Figures\n")
201
+ sections.append(f"| Metric | Value |")
202
+ sections.append(f"|--------|-------|")
203
+ sections.append(f"| Total active clinical trials | **{n_total}** |")
204
+ sections.append(f"| Pharmacological mechanism classes | **{n_pharma_mechanisms}** |")
205
+ sections.append(f"| Unique sponsors | **{n_sponsors}** |")
206
+ sections.append(f"| Industry-sponsored | **{n_industry}** ({n_industry/n_total*100:.0f}%) |")
207
+ sections.append(f"| Academic / Other | **{n_academic}** ({n_academic/n_total*100:.0f}%) |")
208
+ sections.append(f"| Phase 3 trials | **{phase3_count}** |")
209
+ sections.append(f"| Phase 2 / 2-3 trials | **{phase2_count}** |")
210
+ sections.append(f"| Phase 1 / 1-2 trials | **{phase1_count}** |")
211
+ sections.append(f"| Currently recruiting | **{n_recruiting}** |")
212
+ sections.append(f"| Not yet recruiting | **{n_not_yet}** |")
213
+ sections.append("")
214
+
215
+ sections.append("### Strategic Highlights\n")
216
+ if len(pharma_mechs) > 0:
217
+ sections.append(f"- **Most active mechanism class:** {pharma_mechs.index[0]} ({pharma_mechs.iloc[0]} trials)")
218
+ if len(top_sponsor) > 0:
219
+ sections.append(f"- **Most active sponsor:** {top_sponsor.index[0]} ({top_sponsor.iloc[0]} trials)")
220
+
221
+ # Config-driven executive highlights (mechanisms)
222
+ exec_mechs = exec_highlights.get("mechanisms", []) if exec_highlights else []
223
+ exec_spons = exec_highlights.get("sponsors", []) if exec_highlights else []
224
+
225
+ for mech_name in exec_mechs:
226
+ mech_trials = trials_df[trials_df["mechanism"] == mech_name]
227
+ if len(mech_trials) > 0:
228
+ mech_p3 = len(mech_trials[mech_trials["phase_normalized"] == "Phase 3"])
229
+ p3_note = f" ({mech_p3} in Phase 3)" if mech_p3 > 0 else ""
230
+ sections.append(f"- **{mech_name}:** {len(mech_trials)} trials{p3_note}")
231
+
232
+ # Config-driven executive highlights (sponsors)
233
+ for sponsor_name in exec_spons:
234
+ spons_trials = trials_df[trials_df["sponsor_normalized"] == sponsor_name]
235
+ if len(spons_trials) > 0:
236
+ spons_mechs = spons_trials["mechanism"].nunique()
237
+ sections.append(f"- **{sponsor_name} portfolio:** {len(spons_trials)} active trials across {spons_mechs} mechanism(s)")
238
+
239
+ # Upcoming readouts
240
+ upcoming = _get_upcoming_readouts(trials_df, months=18)
241
+ if len(upcoming) > 0:
242
+ sections.append(f"- **Upcoming readouts (next 18 months):** {len(upcoming)} trials expected to report")
243
+
244
+ # Total enrollment (prefer enrollment_clean, fallback to enrollment)
245
+ enroll_col = "enrollment_clean" if "enrollment_clean" in trials_df.columns else "enrollment"
246
+ total_enrollment = trials_df[enroll_col].sum()
247
+ if total_enrollment > 0:
248
+ sections.append(f"- **Total enrolled/planned participants:** ~{int(total_enrollment):,}")
249
+
250
+ # --- NEW: Geographic summary ---
251
+ n_countries_col = _safe_col(trials_df, "n_countries", default=np.nan)
252
+ if n_countries_col.notna().any():
253
+ unique_countries = set()
254
+ for val in _safe_col(trials_df, "countries_str", default=""):
255
+ if pd.notna(val) and str(val).strip():
256
+ for c in str(val).split(";"):
257
+ c = c.strip()
258
+ if c:
259
+ unique_countries.add(c)
260
+ if unique_countries:
261
+ sections.append(f"- **Geographic reach:** trials conducted across **{len(unique_countries)} countries**")
262
+
263
+ # --- NEW: Study design summary ---
264
+ study_design_col = _safe_col(trials_df, "study_design_category", default="")
265
+ rct_mask = study_design_col.astype(str).str.lower().str.contains("rct|randomized", na=False)
266
+ n_rct = int(rct_mask.sum())
267
+ if n_rct > 0:
268
+ sections.append(f"- **RCTs:** {n_rct} ({n_rct/n_total*100:.0f}%) of trials are randomized controlled trials")
269
+
270
+ # --- NEW: Combination therapy summary ---
271
+ combo_col = _safe_col(trials_df, "is_combination", default=False)
272
+ n_combo = int(combo_col.fillna(False).astype(bool).sum())
273
+ if n_combo > 0:
274
+ sections.append(f"- **Combination therapies:** {n_combo} trials testing multi-drug regimens")
275
+
276
+ sections.append("")
277
+
278
+ # ==========================================
279
+ # 2. MECHANISM x PHASE OVERVIEW
280
+ # ==========================================
281
+ sections.append("---\n")
282
+ sections.append("## Mechanism \u00d7 Phase Overview\n")
283
+
284
+ phase_order = ["Phase 1", "Phase 1/2", "Phase 2", "Phase 2/3", "Phase 3", "Phase 4"]
285
+ ct = pd.crosstab(trials_df["mechanism"], trials_df["phase_normalized"])
286
+ ct = ct.reindex(columns=[p for p in phase_order if p in ct.columns], fill_value=0)
287
+ ct["Total"] = ct.sum(axis=1)
288
+ ct = ct.sort_values("Total", ascending=False)
289
+
290
+ cols = list(ct.columns)
291
+ header = "| Mechanism | " + " | ".join(cols) + " |"
292
+ sep = "|---|" + "|".join(["---:"] * len(cols)) + "|"
293
+ sections.append(header)
294
+ sections.append(sep)
295
+ for mech, row in ct.iterrows():
296
+ vals = " | ".join(str(int(row[c])) for c in cols)
297
+ sections.append(f"| {mech} | {vals} |")
298
+ sections.append("")
299
+
300
+ # ==========================================
301
+ # 3. MECHANISM DEEP DIVES
302
+ # ==========================================
303
+ sections.append("---\n")
304
+ sections.append("## Mechanism Deep Dives\n")
305
+
306
+ # Only deep-dive into pharmacological mechanisms with 2+ trials
307
+ mech_order = pharma_mechs[pharma_mechs >= 2].index.tolist()
308
+
309
+ for mech in mech_order:
310
+ mech_df = trials_df[trials_df["mechanism"] == mech].copy()
311
+ sections.append(f"### {mech} ({len(mech_df)} trials)\n")
312
+
313
+ # Description
314
+ desc = mechanism_descriptions.get(mech, "")
315
+ if desc:
316
+ sections.append(f"{desc}\n")
317
+
318
+ # Phase breakdown
319
+ mech_phases = mech_df["phase_normalized"].value_counts()
320
+ sections.append("**Phase distribution:**")
321
+ for phase, count in mech_phases.items():
322
+ sections.append(f"- {phase}: {count} trial{'s' if count > 1 else ''}")
323
+ sections.append("")
324
+
325
+ # Status breakdown
326
+ mech_statuses = mech_df["overall_status"].value_counts()
327
+ status_parts = [f"{_format_status(s)}: {c}" for s, c in mech_statuses.items()]
328
+ sections.append(f"**Recruitment status:** {' | '.join(status_parts)}\n")
329
+
330
+ # Key sponsors for this mechanism
331
+ mech_sponsors = mech_df["sponsor_normalized"].value_counts()
332
+ if len(mech_sponsors) > 0:
333
+ sections.append("**Key sponsors:**")
334
+ for sponsor, count in mech_sponsors.head(8).items():
335
+ phases = sorted(mech_df[mech_df["sponsor_normalized"] == sponsor]["phase_normalized"].unique())
336
+ sections.append(f"- **{sponsor}**: {count} trial{'s' if count > 1 else ''} ({', '.join(phases)})")
337
+ sections.append("")
338
+
339
+ # Drug pipeline table for this mechanism
340
+ drugs = _extract_drug_table(mech_df, indication_cats=indication_cats)
341
+ if len(drugs) > 0:
342
+ sections.append("**Drug pipeline:**\n")
343
+ sections.append("| Drug / Intervention | Sponsor | Phase | Status | Indication | Enrollment | Expected Completion |")
344
+ sections.append("|---|---|---|---|---|---:|---|")
345
+ for _, drug_row in drugs.iterrows():
346
+ sections.append(
347
+ f"| {drug_row['drug']} | {drug_row['sponsor']} | {drug_row['phase']} | "
348
+ f"{drug_row['status']} | {drug_row['indication']} | "
349
+ f"{drug_row['enrollment']} | {drug_row['completion']} |"
350
+ )
351
+ sections.append("")
352
+
353
+ # Notable trials (brief list with NCT IDs)
354
+ sections.append("**Trial listing:**\n")
355
+ sections.append("| NCT ID | Title | Sponsor | Phase | Status | Enrollment | Completion |")
356
+ sections.append("|---|---|---|---|---|---:|---|")
357
+ for _, trial in mech_df.head(15).iterrows():
358
+ enrollment_val = _get_enrollment(trial)
359
+ enrollment_str = f"{int(enrollment_val):,}" if pd.notna(enrollment_val) and enrollment_val > 0 else "\u2014"
360
+ completion_str = _format_date(trial.get("completion_date", ""))
361
+ sections.append(
362
+ f"| {trial['nct_id']} | {trial['brief_title'][:65]} | {trial['sponsor_normalized']} | "
363
+ f"{trial['phase_normalized']} | {_format_status(trial['overall_status'])} | "
364
+ f"{enrollment_str} | {completion_str} |"
365
+ )
366
+ if len(mech_df) > 15:
367
+ sections.append(f"\n*... and {len(mech_df) - 15} additional trials*")
368
+ sections.append("")
369
+
370
+ # ==========================================
371
+ # 4. GEOGRAPHIC LANDSCAPE
372
+ # ==========================================
373
+ sections.append("---\n")
374
+ sections.append("## Geographic Landscape\n")
375
+ sections.append(
376
+ f"Geographic distribution of {disease_short or disease_name} trials reveals where investment and patient "
377
+ "recruitment are concentrated. Multi-country trials signal global registration "
378
+ "intent; single-country trials may indicate regional strategies or early-phase work.\n"
379
+ )
380
+
381
+ countries_str_col = _safe_col(trials_df, "countries_str", default="")
382
+ n_countries_col = _safe_col(trials_df, "n_countries", default=np.nan)
383
+ regions_str_col = _safe_col(trials_df, "regions_str", default="")
384
+
385
+ if countries_str_col.astype(str).str.strip().replace("", pd.NA).dropna().shape[0] > 0:
386
+ # Build country-level counts
387
+ country_counts = {}
388
+ for val in countries_str_col:
389
+ if pd.notna(val) and str(val).strip():
390
+ for c in str(val).split(";"):
391
+ c = c.strip()
392
+ if c:
393
+ country_counts[c] = country_counts.get(c, 0) + 1
394
+
395
+ if country_counts:
396
+ sorted_countries = sorted(country_counts.items(), key=lambda x: x[1], reverse=True)
397
+ sections.append("### Top 15 Countries by Trial Presence\n")
398
+ sections.append("| Rank | Country | Trials |")
399
+ sections.append("|---:|---|---:|")
400
+ for rank, (country, count) in enumerate(sorted_countries[:15], 1):
401
+ sections.append(f"| {rank} | {country} | {count} |")
402
+ if len(sorted_countries) > 15:
403
+ sections.append(f"\n*{len(sorted_countries) - 15} additional countries with active trials.*")
404
+ sections.append("")
405
+
406
+ # Region breakdown
407
+ region_counts = {}
408
+ for val in regions_str_col:
409
+ if pd.notna(val) and str(val).strip():
410
+ for r in str(val).split(";"):
411
+ r = r.strip()
412
+ if r:
413
+ region_counts[r] = region_counts.get(r, 0) + 1
414
+
415
+ if region_counts:
416
+ sorted_regions = sorted(region_counts.items(), key=lambda x: x[1], reverse=True)
417
+ sections.append("### Region Breakdown\n")
418
+ sections.append("| Region | Trials |")
419
+ sections.append("|---|---:|")
420
+ for region, count in sorted_regions:
421
+ sections.append(f"| {region} | {count} |")
422
+ sections.append("")
423
+
424
+ # Mechanism x region cross-tab
425
+ if region_counts:
426
+ # Build a DataFrame with one row per trial-region pair
427
+ rows_mr = []
428
+ for idx, row in trials_df.iterrows():
429
+ rval = _safe_col_val(row, "regions_str", "")
430
+ if pd.notna(rval) and str(rval).strip():
431
+ for r in str(rval).split(";"):
432
+ r = r.strip()
433
+ if r:
434
+ rows_mr.append({"mechanism": row["mechanism"], "region": r})
435
+ if rows_mr:
436
+ mr_df = pd.DataFrame(rows_mr)
437
+ mr_ct = pd.crosstab(mr_df["mechanism"], mr_df["region"])
438
+ mr_ct["Total"] = mr_ct.sum(axis=1)
439
+ mr_ct = mr_ct.sort_values("Total", ascending=False)
440
+
441
+ sections.append("### Mechanism \u00d7 Region\n")
442
+ mr_cols = [c for c in mr_ct.columns if c != "Total"] + ["Total"]
443
+ mr_ct = mr_ct[mr_cols]
444
+ header = "| Mechanism | " + " | ".join(str(c) for c in mr_cols) + " |"
445
+ sep_line = "|---|" + "|".join(["---:"] * len(mr_cols)) + "|"
446
+ sections.append(header)
447
+ sections.append(sep_line)
448
+ for mech, vals in mr_ct.iterrows():
449
+ row_str = " | ".join(str(int(vals[c])) for c in mr_cols)
450
+ sections.append(f"| {mech} | {row_str} |")
451
+ sections.append("")
452
+
453
+ # Multi-country trial distribution
454
+ mc = n_countries_col.dropna()
455
+ if len(mc) > 0:
456
+ n_multi = int((mc > 1).sum())
457
+ n_single = int((mc == 1).sum())
458
+ sections.append("### Multi-Country Trial Distribution\n")
459
+ sections.append(f"- Single-country: {n_single} trials")
460
+ sections.append(f"- Multi-country: {n_multi} trials")
461
+ avg_countries = mc[mc > 1].mean()
462
+ if pd.notna(avg_countries):
463
+ sections.append(f"- Average countries per multi-country trial: {avg_countries:.1f}")
464
+ sections.append("")
465
+ else:
466
+ sections.append("*Geographic data not available in current dataset. Re-run compile_trials.py with Phase 2B to populate.*\n")
467
+
468
+ # ==========================================
469
+ # 5. STUDY DESIGN ANALYSIS
470
+ # ==========================================
471
+ sections.append("---\n")
472
+ sections.append("## Study Design Analysis\n")
473
+ sections.append(
474
+ "Study design choices impact data quality, regulatory acceptance, and time to "
475
+ "market. A high proportion of double-blind RCTs signals mature, registration-intent "
476
+ "programs.\n"
477
+ )
478
+
479
+ design_cat_col = _safe_col(trials_df, "study_design_category", default="")
480
+ allocation_col = _safe_col(trials_df, "allocation", default="")
481
+ masking_col = _safe_col(trials_df, "masking", default="")
482
+ model_col = _safe_col(trials_df, "intervention_model", default="")
483
+
484
+ has_design_data = design_cat_col.astype(str).str.strip().replace("", pd.NA).dropna().shape[0] > 0
485
+
486
+ if has_design_data:
487
+ # Study design category breakdown
488
+ design_vc = design_cat_col.astype(str).replace("", "Unknown").value_counts()
489
+ sections.append("### Study Design Category Breakdown\n")
490
+ sections.append("| Design Category | Trials | % |")
491
+ sections.append("|---|---:|---:|")
492
+ for cat, cnt in design_vc.items():
493
+ if str(cat).strip() and str(cat) != "nan":
494
+ sections.append(f"| {cat} | {cnt} | {cnt/n_total*100:.0f}% |")
495
+ sections.append("")
496
+
497
+ # Allocation breakdown
498
+ alloc_vc = allocation_col.astype(str).replace("", "Not reported").value_counts()
499
+ if len(alloc_vc) > 0:
500
+ sections.append("### Allocation\n")
501
+ sections.append("| Allocation | Trials |")
502
+ sections.append("|---|---:|")
503
+ for a, cnt in alloc_vc.items():
504
+ if str(a) != "nan":
505
+ sections.append(f"| {a} | {cnt} |")
506
+ sections.append("")
507
+
508
+ # Masking breakdown
509
+ masking_vc = masking_col.astype(str).replace("", "Not reported").value_counts()
510
+ if len(masking_vc) > 0:
511
+ sections.append("### Masking / Blinding\n")
512
+ sections.append("| Masking | Trials |")
513
+ sections.append("|---|---:|")
514
+ for m, cnt in masking_vc.items():
515
+ if str(m) != "nan":
516
+ sections.append(f"| {m} | {cnt} |")
517
+ sections.append("")
518
+
519
+ # % double-blind by phase
520
+ db_mask = masking_col.astype(str).str.lower().str.contains("double", na=False)
521
+ if db_mask.any():
522
+ sections.append("### % Double-Blind by Phase\n")
523
+ phase_groups = trials_df.groupby("phase_normalized")
524
+ sections.append("| Phase | Total Trials | Double-Blind | % Double-Blind |")
525
+ sections.append("|---|---:|---:|---:|")
526
+ for phase in phase_order:
527
+ if phase in phase_groups.groups:
528
+ idx = phase_groups.groups[phase]
529
+ n_phase = len(idx)
530
+ n_db = int(db_mask.loc[idx].sum())
531
+ pct = n_db / n_phase * 100 if n_phase > 0 else 0
532
+ sections.append(f"| {phase} | {n_phase} | {n_db} | {pct:.0f}% |")
533
+ sections.append("")
534
+
535
+ # Intervention model
536
+ model_vc = model_col.astype(str).replace("", "Not reported").value_counts()
537
+ if len(model_vc) > 0:
538
+ sections.append("### Intervention Model\n")
539
+ sections.append("| Model | Trials |")
540
+ sections.append("|---|---:|")
541
+ for m, cnt in model_vc.items():
542
+ if str(m) != "nan":
543
+ sections.append(f"| {m} | {cnt} |")
544
+ sections.append("")
545
+ else:
546
+ sections.append("*Study design data not available in current dataset. Re-run compile_trials.py with Phase 2B to populate.*\n")
547
+
548
+ # ==========================================
549
+ # 6. PHASE TRANSITION FUNNEL
550
+ # ==========================================
551
+ sections.append("---\n")
552
+ sections.append("## Phase Transition Funnel\n")
553
+ sections.append(
554
+ "The phase funnel shows how many trials each mechanism has at each stage, "
555
+ "providing a rough indicator of pipeline depth and implied transition rates. "
556
+ "Higher Phase 2-to-3 ratios suggest stronger conviction by sponsors.\n"
557
+ )
558
+
559
+ # Phase counts per mechanism
560
+ funnel_phases = ["Phase 1", "Phase 1/2", "Phase 2", "Phase 2/3", "Phase 3"]
561
+ funnel_mechs = pharma_mechs[pharma_mechs >= 2].index.tolist()
562
+
563
+ sections.append("### Trials per Phase by Mechanism\n")
564
+ sections.append("| Mechanism | Ph 1 | Ph 1/2 | Ph 2 | Ph 2/3 | Ph 3 | Total | Ph2\u21923 Ratio |")
565
+ sections.append("|---|---:|---:|---:|---:|---:|---:|---|")
566
+ for mech in funnel_mechs:
567
+ mech_df = trials_df[trials_df["mechanism"] == mech]
568
+ counts = {}
569
+ for p in funnel_phases:
570
+ counts[p] = len(mech_df[mech_df["phase_normalized"] == p])
571
+ total = sum(counts.values())
572
+ # Implied transition ratio: Phase 3 / (Phase 2 + Phase 2/3) if denominator > 0
573
+ denom = counts["Phase 2"] + counts["Phase 2/3"]
574
+ if denom > 0 and counts["Phase 3"] > 0:
575
+ ratio = f"{counts['Phase 3']/denom:.1f}"
576
+ elif counts["Phase 3"] > 0:
577
+ ratio = "\u221e" # infinity — no Phase 2 trials
578
+ else:
579
+ ratio = "\u2014"
580
+ sections.append(
581
+ f"| {mech} | {counts['Phase 1']} | {counts['Phase 1/2']} | "
582
+ f"{counts['Phase 2']} | {counts['Phase 2/3']} | {counts['Phase 3']} | "
583
+ f"{total} | {ratio} |"
584
+ )
585
+ sections.append("")
586
+
587
+ sections.append(
588
+ "*Ph2\u21923 Ratio = Phase 3 count / (Phase 2 + Phase 2/3 count). Higher values imply "
589
+ "strong advancement conviction. \"\u221e\" means Phase 3 trials exist without active Phase 2.*\n"
590
+ )
591
+
592
+ # ==========================================
593
+ # 7. LATE-STAGE PIPELINE (Phase 3)
594
+ # ==========================================
595
+ sections.append("---\n")
596
+ sections.append("## Late-Stage Pipeline (Phase 3)\n")
597
+
598
+ p3_df = trials_df[trials_df["phase_normalized"].isin(["Phase 3", "Phase 3/4"])].copy()
599
+ if len(p3_df) > 0:
600
+ sections.append(f"**{len(p3_df)} Phase 3 trials** represent the most advanced pipeline with near-term commercial potential.\n")
601
+
602
+ # Phase 3 by mechanism
603
+ p3_mechs = p3_df["mechanism"].value_counts()
604
+ sections.append("**Phase 3 by mechanism:**")
605
+ for mech, count in p3_mechs.items():
606
+ sections.append(f"- {mech}: {count}")
607
+ sections.append("")
608
+
609
+ # Full Phase 3 table
610
+ sections.append("| NCT ID | Drug(s) | Mechanism | Sponsor | Indication | Enrollment | Status | Expected Completion |")
611
+ sections.append("|---|---|---|---|---|---:|---|---|")
612
+ for _, trial in p3_df.sort_values("completion_date").iterrows():
613
+ drugs = trial.get("drug_names_str", "")
614
+ if not drugs or drugs == "nan":
615
+ drugs = "\u2014"
616
+ elif len(drugs) > 40:
617
+ drugs = drugs[:37] + "..."
618
+ enrollment_val = _get_enrollment(trial)
619
+ enrollment_str = f"{int(enrollment_val):,}" if pd.notna(enrollment_val) and enrollment_val > 0 else "\u2014"
620
+ completion_str = _format_date(trial.get("completion_date", ""))
621
+ indication = _extract_indication(trial.get("conditions_str", ""), indication_cats=indication_cats)
622
+ sections.append(
623
+ f"| {trial['nct_id']} | {drugs} | {trial['mechanism']} | "
624
+ f"{trial['sponsor_normalized']} | {indication} | {enrollment_str} | "
625
+ f"{_format_status(trial['overall_status'])} | {completion_str} |"
626
+ )
627
+ sections.append("")
628
+ else:
629
+ sections.append("*No Phase 3 trials found in the current dataset.*\n")
630
+
631
+ # ==========================================
632
+ # 8. PHASE 3 ENDPOINT COMPARISON
633
+ # ==========================================
634
+ sections.append("---\n")
635
+ sections.append("## Phase 3 Endpoint Comparison\n")
636
+ sections.append(
637
+ "Primary endpoints in Phase 3 trials define the regulatory bar each program must clear. "
638
+ "Comparing endpoints across mechanisms reveals whether the field is converging on "
639
+ "standard outcomes (e.g., clinical remission at Week 12/52) or pursuing differentiated strategies.\n"
640
+ )
641
+
642
+ endpoint_col = _safe_col(trials_df, "primary_endpoint", default="")
643
+ timeframe_col = _safe_col(trials_df, "endpoint_timeframe", default="")
644
+
645
+ p3_with_endpoint = p3_df.copy() if len(p3_df) > 0 else pd.DataFrame()
646
+ if len(p3_with_endpoint) > 0:
647
+ p3_with_endpoint["_endpoint"] = endpoint_col.reindex(p3_with_endpoint.index).fillna("").astype(str)
648
+ p3_with_endpoint["_timeframe"] = timeframe_col.reindex(p3_with_endpoint.index).fillna("").astype(str)
649
+ has_ep = p3_with_endpoint["_endpoint"].str.strip().replace("", pd.NA).dropna()
650
+
651
+ if len(has_ep) > 0:
652
+ sections.append("### Primary Endpoints by Mechanism (Phase 3)\n")
653
+ sections.append("| NCT ID | Mechanism | Sponsor | Primary Endpoint | Timeframe |")
654
+ sections.append("|---|---|---|---|---|")
655
+ for mech in p3_with_endpoint["mechanism"].unique():
656
+ mech_ep = p3_with_endpoint[p3_with_endpoint["mechanism"] == mech]
657
+ for _, trial in mech_ep.iterrows():
658
+ ep_text = str(trial["_endpoint"]).strip()
659
+ if not ep_text or ep_text == "nan":
660
+ ep_text = "\u2014"
661
+ elif len(ep_text) > 80:
662
+ ep_text = ep_text[:77] + "..."
663
+ tf_text = str(trial["_timeframe"]).strip()
664
+ if not tf_text or tf_text == "nan":
665
+ tf_text = "\u2014"
666
+ sections.append(
667
+ f"| {trial['nct_id']} | {mech} | {trial['sponsor_normalized']} | "
668
+ f"{ep_text} | {tf_text} |"
669
+ )
670
+ sections.append("")
671
+
672
+ # Timeframe comparison summary
673
+ tf_series = p3_with_endpoint["_timeframe"].replace("", pd.NA).dropna()
674
+ if len(tf_series) > 0:
675
+ sections.append("### Timeframe Summary\n")
676
+ tf_counts = tf_series.value_counts().head(10)
677
+ sections.append("| Timeframe | Trials |")
678
+ sections.append("|---|---:|")
679
+ for tf, cnt in tf_counts.items():
680
+ if str(tf).strip() and str(tf) != "nan":
681
+ sections.append(f"| {tf} | {cnt} |")
682
+ sections.append("")
683
+ else:
684
+ sections.append("*Endpoint data not populated for Phase 3 trials. Re-run compile_trials.py with Phase 2B.*\n")
685
+ else:
686
+ sections.append("*No Phase 3 trials in dataset.*\n")
687
+
688
+ # ==========================================
689
+ # 9. COMBINATION THERAPY LANDSCAPE
690
+ # ==========================================
691
+ sections.append("---\n")
692
+ sections.append("## Combination Therapy Landscape\n")
693
+ sections.append(
694
+ f"Combination trials (2+ active drugs) represent a growing strategy in {disease_short or disease_name}, reflecting "
695
+ "the need for improved efficacy beyond monotherapy. Identifying which mechanism pairs "
696
+ "are being tested reveals emerging therapeutic paradigms.\n"
697
+ )
698
+
699
+ combo_col = _safe_col(trials_df, "is_combination", default=False)
700
+ combo_df = trials_df[combo_col.fillna(False).astype(bool)].copy()
701
+
702
+ if len(combo_df) > 0:
703
+ sections.append(f"**{len(combo_df)} combination therapy trials** identified.\n")
704
+
705
+ # By mechanism
706
+ combo_mechs = combo_df["mechanism"].value_counts()
707
+ sections.append("### Combination Trials by Mechanism\n")
708
+ sections.append("| Mechanism | Combo Trials | % of Mechanism Total |")
709
+ sections.append("|---|---:|---:|")
710
+ for mech, cnt in combo_mechs.items():
711
+ total_mech = len(trials_df[trials_df["mechanism"] == mech])
712
+ pct = cnt / total_mech * 100 if total_mech > 0 else 0
713
+ sections.append(f"| {mech} | {cnt} | {pct:.0f}% |")
714
+ sections.append("")
715
+
716
+ # Full combo listing
717
+ sections.append("### Combination Trial Details\n")
718
+ sections.append("| NCT ID | Drug(s) | Mechanism | Sponsor | Phase | Status |")
719
+ sections.append("|---|---|---|---|---|---|")
720
+ for _, trial in combo_df.sort_values(["mechanism", "phase_normalized"]).iterrows():
721
+ drugs = trial.get("drug_names_str", "")
722
+ if not drugs or str(drugs) == "nan":
723
+ drugs = "\u2014"
724
+ elif len(str(drugs)) > 50:
725
+ drugs = str(drugs)[:47] + "..."
726
+ sections.append(
727
+ f"| {trial['nct_id']} | {drugs} | {trial['mechanism']} | "
728
+ f"{trial['sponsor_normalized']} | {trial['phase_normalized']} | "
729
+ f"{_format_status(trial['overall_status'])} |"
730
+ )
731
+ sections.append("")
732
+
733
+ # Sponsor activity in combinations
734
+ combo_sponsors = combo_df["sponsor_normalized"].value_counts()
735
+ if len(combo_sponsors) > 0:
736
+ sections.append("### Sponsors Active in Combination Trials\n")
737
+ sections.append("| Sponsor | Combo Trials |")
738
+ sections.append("|---|---:|")
739
+ for sponsor, cnt in combo_sponsors.head(10).items():
740
+ sections.append(f"| {sponsor} | {cnt} |")
741
+ sections.append("")
742
+ else:
743
+ combo_has_data = "is_combination" in trials_df.columns
744
+ if combo_has_data:
745
+ sections.append("*No combination therapy trials identified in the current dataset.*\n")
746
+ else:
747
+ sections.append("*Combination therapy data not available. Re-run compile_trials.py with Phase 2B to populate.*\n")
748
+
749
+ # ==========================================
750
+ # 10. UPCOMING READOUTS
751
+ # ==========================================
752
+ sections.append("---\n")
753
+ sections.append("## Upcoming Readouts\n")
754
+
755
+ upcoming_12 = _get_upcoming_readouts(trials_df, months=12)
756
+ upcoming_24 = _get_upcoming_readouts(trials_df, months=24)
757
+
758
+ if len(upcoming_24) > 0:
759
+ sections.append(
760
+ f"**{len(upcoming_12)} trials** expected to complete within 12 months, "
761
+ f"**{len(upcoming_24)}** within 24 months.\n"
762
+ )
763
+ sections.append("*Sorted by expected primary completion date.*\n")
764
+
765
+ sections.append("| NCT ID | Drug(s) | Mechanism | Sponsor | Phase | Enrollment | Expected Completion | Time to Readout |")
766
+ sections.append("|---|---|---|---|---|---:|---|---|")
767
+ for _, trial in upcoming_24.iterrows():
768
+ drugs = trial.get("drug_names_str", "")
769
+ if not drugs or str(drugs) == "nan":
770
+ drugs = "\u2014"
771
+ elif len(str(drugs)) > 35:
772
+ drugs = str(drugs)[:32] + "..."
773
+ enrollment_val = _get_enrollment(trial)
774
+ enrollment_str = f"{int(enrollment_val):,}" if pd.notna(enrollment_val) and enrollment_val > 0 else "\u2014"
775
+ completion_str = _format_date(trial.get("completion_date", ""))
776
+ time_to = _time_to_readout(trial.get("completion_date", ""))
777
+ sections.append(
778
+ f"| {trial['nct_id']} | {drugs} | {trial['mechanism']} | "
779
+ f"{trial['sponsor_normalized']} | {trial['phase_normalized']} | "
780
+ f"{enrollment_str} | {completion_str} | {time_to} |"
781
+ )
782
+ sections.append("")
783
+ else:
784
+ sections.append("*No trials with completion dates within 24 months found.*\n")
785
+
786
+ # ==========================================
787
+ # 11. DRUG-LEVEL PIPELINE
788
+ # ==========================================
789
+ sections.append("---\n")
790
+ sections.append("## Drug-Level Pipeline\n")
791
+
792
+ drug_table = _extract_drug_table(trials_df, indication_cats=indication_cats)
793
+ if len(drug_table) > 0:
794
+ sections.append(f"**{len(drug_table)} distinct drug/intervention programs** identified across all mechanisms.\n")
795
+ sections.append("| Drug / Intervention | Mechanism | Sponsor | Phase | Status | Indication | Enrollment | Completion |")
796
+ sections.append("|---|---|---|---|---|---|---:|---|")
797
+ for _, row in drug_table.iterrows():
798
+ sections.append(
799
+ f"| {row['drug']} | {row['mechanism']} | {row['sponsor']} | "
800
+ f"{row['phase']} | {row['status']} | {row['indication']} | "
801
+ f"{row['enrollment']} | {row['completion']} |"
802
+ )
803
+ sections.append("")
804
+ else:
805
+ sections.append("*No named drug interventions identified.*\n")
806
+
807
+ # ==========================================
808
+ # 12+. CONFIG-DRIVEN HIGHLIGHT SECTIONS (mechanisms)
809
+ # ==========================================
810
+ for hl in highlight_mechanisms:
811
+ hl_mech_name = hl["mechanism"]
812
+ hl_section_title = hl["section_title"]
813
+ hl_narrative = hl.get("narrative", "")
814
+ hl_trials = trials_df[trials_df["mechanism"] == hl_mech_name]
815
+ if len(hl_trials) > 0:
816
+ sections.append("---\n")
817
+ sections.append(f"## {hl_section_title}\n")
818
+ narrative_suffix = f" \u2014 {hl_narrative}" if hl_narrative else ""
819
+ sections.append(f"**{len(hl_trials)} active trials**{narrative_suffix}.\n")
820
+
821
+ desc = mechanism_descriptions.get(hl_mech_name, "")
822
+ if desc:
823
+ sections.append(f"> {desc}\n")
824
+
825
+ # Phase breakdown
826
+ hl_phases = hl_trials["phase_normalized"].value_counts()
827
+ for phase, count in hl_phases.items():
828
+ sections.append(f"- **{phase}:** {count} trials")
829
+ sections.append("")
830
+
831
+ # Key sponsors + their drugs
832
+ sections.append("### Key Sponsors & Drug Programs\n")
833
+ hl_sponsors = hl_trials["sponsor_normalized"].value_counts()
834
+ for sponsor, count in hl_sponsors.items():
835
+ sponsor_trials = hl_trials[hl_trials["sponsor_normalized"] == sponsor]
836
+ drugs = _get_unique_drugs(sponsor_trials)
837
+ phases = sorted(sponsor_trials["phase_normalized"].unique())
838
+ drug_str = f" ({', '.join(drugs)})" if drugs else ""
839
+ enrollments = sponsor_trials["enrollment"].dropna()
840
+ enroll_str = f", ~{int(enrollments.sum()):,} patients" if len(enrollments) > 0 else ""
841
+ sections.append(
842
+ f"- **{sponsor}**: {count} trial{'s' if count > 1 else ''}{drug_str} "
843
+ f"\u2014 {', '.join(phases)}{enroll_str}"
844
+ )
845
+ sections.append("")
846
+
847
+ # Indication split
848
+ sections.append("### Indication Split\n")
849
+ _add_indication_split(sections, hl_trials, indication_cats=indication_cats)
850
+
851
+ # Complete trial listing
852
+ sections.append("### Complete Trial Listing\n")
853
+ sections.append("| NCT ID | Title | Sponsor | Phase | Status | Enrollment | Completion |")
854
+ sections.append("|---|---|---|---|---|---:|---|")
855
+ for _, trial in hl_trials.iterrows():
856
+ enrollment_val = _get_enrollment(trial)
857
+ enrollment_str = f"{int(enrollment_val):,}" if pd.notna(enrollment_val) and enrollment_val > 0 else "\u2014"
858
+ completion_str = _format_date(trial.get("completion_date", ""))
859
+ sections.append(
860
+ f"| {trial['nct_id']} | {trial['brief_title'][:60]} | "
861
+ f"{trial['sponsor_normalized']} | {trial['phase_normalized']} | "
862
+ f"{_format_status(trial['overall_status'])} | {enrollment_str} | {completion_str} |"
863
+ )
864
+ sections.append("")
865
+
866
+ # ==========================================
867
+ # CONFIG-DRIVEN HIGHLIGHT SECTIONS (sponsors)
868
+ # ==========================================
869
+ for hl in highlight_sponsors:
870
+ hl_sponsor_name = hl["sponsor"]
871
+ hl_section_title = hl["section_title"]
872
+ hl_trials = trials_df[trials_df["sponsor_normalized"] == hl_sponsor_name]
873
+ if len(hl_trials) > 0:
874
+ sections.append("---\n")
875
+ sections.append(f"## {hl_section_title}\n")
876
+ sections.append(f"**{len(hl_trials)} active trials** from {hl_sponsor_name}.\n")
877
+
878
+ # Portfolio by mechanism
879
+ sections.append("### Portfolio by Mechanism\n")
880
+ hl_mechs = hl_trials["mechanism"].value_counts()
881
+ for mech, count in hl_mechs.items():
882
+ mech_trials = hl_trials[hl_trials["mechanism"] == mech]
883
+ drugs = _get_unique_drugs(mech_trials)
884
+ phases = sorted(mech_trials["phase_normalized"].unique())
885
+ drug_str = f": {', '.join(drugs)}" if drugs else ""
886
+ sections.append(f"- **{mech}** ({count} trial{'s' if count > 1 else ''}){drug_str} \u2014 {', '.join(phases)}")
887
+ sections.append("")
888
+
889
+ # Indication split
890
+ sections.append("### Indication Coverage\n")
891
+ _add_indication_split(sections, hl_trials, indication_cats=indication_cats)
892
+
893
+ # Competitive context
894
+ sections.append("### Competitive Context\n")
895
+ for mech in hl_mechs.index:
896
+ all_mech = trials_df[trials_df["mechanism"] == mech]
897
+ sponsor_mech = hl_trials[hl_trials["mechanism"] == mech]
898
+ other_sponsors = all_mech[all_mech["sponsor_normalized"] != hl_sponsor_name]["sponsor_normalized"].nunique()
899
+ sections.append(
900
+ f"- **{mech}**: {hl_sponsor_name} has {len(sponsor_mech)} of {len(all_mech)} total trials "
901
+ f"(vs. {other_sponsors} other sponsors)"
902
+ )
903
+ sections.append("")
904
+
905
+ # Full trial listing
906
+ sections.append("### Complete Trial Listing\n")
907
+ sections.append("| NCT ID | Title | Mechanism | Phase | Status | Enrollment | Completion |")
908
+ sections.append("|---|---|---|---|---|---:|---|")
909
+ for _, trial in hl_trials.iterrows():
910
+ enrollment_val = _get_enrollment(trial)
911
+ enrollment_str = f"{int(enrollment_val):,}" if pd.notna(enrollment_val) and enrollment_val > 0 else "\u2014"
912
+ completion_str = _format_date(trial.get("completion_date", ""))
913
+ sections.append(
914
+ f"| {trial['nct_id']} | {trial['brief_title'][:55]} | "
915
+ f"{trial['mechanism']} | {trial['phase_normalized']} | "
916
+ f"{_format_status(trial['overall_status'])} | {enrollment_str} | {completion_str} |"
917
+ )
918
+ sections.append("")
919
+
920
+ # ==========================================
921
+ # SPONSOR COMPETITIVE LANDSCAPE
922
+ # ==========================================
923
+ sections.append("---\n")
924
+ sections.append("## Sponsor Competitive Landscape\n")
925
+
926
+ # Top sponsors summary table
927
+ enroll_col = "enrollment_clean" if "enrollment_clean" in trials_df.columns else "enrollment"
928
+ sponsor_summary = (
929
+ trials_df.groupby("sponsor_normalized")
930
+ .agg(
931
+ trials=("nct_id", "count"),
932
+ mechanisms=("mechanism", "nunique"),
933
+ top_mechanism=("mechanism", lambda x: x.value_counts().index[0] if len(x) > 0 else ""),
934
+ phases=("phase_normalized", lambda x: ", ".join(sorted(x.unique()))),
935
+ total_enrollment=(enroll_col, "sum"),
936
+ industry=("is_industry", "first"),
937
+ )
938
+ .sort_values("trials", ascending=False)
939
+ )
940
+
941
+ top_n = 25
942
+ top_sponsors = sponsor_summary.head(top_n)
943
+
944
+ sections.append(f"### Top {min(top_n, len(top_sponsors))} Sponsors\n")
945
+ sections.append("| Rank | Sponsor | Trials | Mechanisms | Primary Focus | Phases | Est. Enrollment | Type |")
946
+ sections.append("|---:|---|---:|---:|---|---|---:|---|")
947
+ for rank, (sponsor, row) in enumerate(top_sponsors.iterrows(), 1):
948
+ enroll = f"{int(row['total_enrollment']):,}" if pd.notna(row["total_enrollment"]) and row["total_enrollment"] > 0 else "\u2014"
949
+ stype = "Industry" if row["industry"] else "Academic"
950
+ sections.append(
951
+ f"| {rank} | **{sponsor}** | {int(row['trials'])} | {int(row['mechanisms'])} | "
952
+ f"{row['top_mechanism']} | {row['phases'][:35]} | {enroll} | {stype} |"
953
+ )
954
+ sections.append("")
955
+
956
+ # Company deep-dives for top sponsors
957
+ sections.append("### Company Portfolio Analysis\n")
958
+ for sponsor, row in top_sponsors.head(10).iterrows():
959
+ sponsor_df = trials_df[trials_df["sponsor_normalized"] == sponsor]
960
+ mech_breakdown = sponsor_df["mechanism"].value_counts()
961
+ phase_breakdown = sponsor_df["phase_normalized"].value_counts()
962
+
963
+ sections.append(f"**{sponsor}** \u2014 {int(row['trials'])} trials\n")
964
+
965
+ # Mechanism focus
966
+ mech_parts = [f"{m} ({c})" for m, c in mech_breakdown.items()]
967
+ sections.append(f"- Mechanisms: {', '.join(mech_parts)}")
968
+
969
+ # Phase coverage
970
+ phase_parts = [f"{p}: {c}" for p, c in phase_breakdown.items()]
971
+ sections.append(f"- Phases: {', '.join(phase_parts)}")
972
+
973
+ # Key drugs
974
+ drugs = _get_unique_drugs(sponsor_df)
975
+ if drugs:
976
+ sections.append(f"- Key drugs/interventions: {', '.join(drugs[:5])}")
977
+
978
+ # Indication focus
979
+ ind_counts = _count_indications(sponsor_df, indication_cats=indication_cats)
980
+ ind_parts = [f"{label} ({cnt})" for label, _full, cnt in ind_counts if cnt > 0]
981
+ if ind_parts:
982
+ sections.append(f"- Indications: {', '.join(ind_parts)}")
983
+ sections.append("")
984
+
985
+ # Industry vs academic summary
986
+ sections.append("### Industry vs. Academic Split\n")
987
+ industry_df = trials_df[trials_df["is_industry"]]
988
+ academic_df = trials_df[~trials_df["is_industry"]]
989
+ sections.append(f"| | Industry | Academic/Other |")
990
+ sections.append(f"|---|---:|---:|")
991
+ sections.append(f"| Total trials | {len(industry_df)} | {len(academic_df)} |")
992
+ sections.append(f"| Unique sponsors | {industry_df['sponsor_normalized'].nunique()} | {academic_df['sponsor_normalized'].nunique()} |")
993
+ sections.append(f"| Phase 3 trials | {len(industry_df[industry_df['phase_normalized'] == 'Phase 3'])} | {len(academic_df[academic_df['phase_normalized'] == 'Phase 3'])} |")
994
+ sections.append(f"| Mechanisms covered | {industry_df['mechanism'].nunique()} | {academic_df['mechanism'].nunique()} |")
995
+ ind_enroll = industry_df[enroll_col].sum() if enroll_col in industry_df.columns else industry_df["enrollment"].sum()
996
+ aca_enroll = academic_df[enroll_col].sum() if enroll_col in academic_df.columns else academic_df["enrollment"].sum()
997
+ sections.append(f"| Total enrollment | {int(ind_enroll):,} | {int(aca_enroll):,} |")
998
+ sections.append("")
999
+
1000
+ # ==========================================
1001
+ # 16. INDICATION BREAKDOWN
1002
+ # ==========================================
1003
+ sections.append("---\n")
1004
+ sections.append("## Indication Breakdown\n")
1005
+
1006
+ ind_totals = _count_indications(trials_df, indication_cats=indication_cats)
1007
+ sections.append(f"| Indication | Trials |")
1008
+ sections.append(f"|---|---:|")
1009
+ for label, full_name, cnt in ind_totals:
1010
+ display = f"{full_name} ({label})" if label != full_name else label
1011
+ sections.append(f"| {display} | {cnt} |")
1012
+ sections.append("")
1013
+
1014
+ # Mechanism x indication
1015
+ ind_labels = [entry[0] for entry in ind_totals]
1016
+ sections.append("### Mechanism \u00d7 Indication\n")
1017
+ header = "| Mechanism | " + " | ".join(ind_labels) + " |"
1018
+ sep = "|---|" + "|".join(["---:"] * len(ind_labels)) + "|"
1019
+ sections.append(header)
1020
+ sections.append(sep)
1021
+ for mech in ct.index:
1022
+ mech_df = trials_df[trials_df["mechanism"] == mech]
1023
+ mech_ind = _count_indications(mech_df, indication_cats=indication_cats)
1024
+ vals = " | ".join(str(entry[2]) for entry in mech_ind)
1025
+ sections.append(f"| {mech} | {vals} |")
1026
+ sections.append("")
1027
+
1028
+ # ==========================================
1029
+ # 17. PATIENT POPULATION ANALYSIS
1030
+ # ==========================================
1031
+ sections.append("---\n")
1032
+ sections.append("## Patient Population Analysis\n")
1033
+ sections.append(
1034
+ "Eligibility criteria reveal which patient segments are being targeted. "
1035
+ f"Pediatric {disease_short or disease_name} remains underserved relative to adult populations; identifying "
1036
+ "pediatric-specific programs is valuable for portfolio gap analysis.\n"
1037
+ )
1038
+
1039
+ is_pediatric_col = _safe_col(trials_df, "is_pediatric", default=False)
1040
+ min_age_years_col = _safe_col(trials_df, "min_age_years", default=np.nan)
1041
+ max_age_years_col = _safe_col(trials_df, "max_age_years", default=np.nan)
1042
+ sex_col = _safe_col(trials_df, "sex", default="")
1043
+
1044
+ has_pop_data = is_pediatric_col.astype(bool).any() or min_age_years_col.notna().any()
1045
+
1046
+ if has_pop_data:
1047
+ n_ped = int(is_pediatric_col.fillna(False).astype(bool).sum())
1048
+ n_adult = n_total - n_ped
1049
+ sections.append("### Pediatric vs. Adult\n")
1050
+ sections.append(f"| Population | Trials | % |")
1051
+ sections.append(f"|---|---:|---:|")
1052
+ sections.append(f"| Adult-only | {n_adult} | {n_adult/n_total*100:.0f}% |")
1053
+ sections.append(f"| Includes pediatric | {n_ped} | {n_ped/n_total*100:.0f}% |")
1054
+ sections.append("")
1055
+
1056
+ # Pediatric by mechanism
1057
+ if n_ped > 0:
1058
+ ped_df = trials_df[is_pediatric_col.fillna(False).astype(bool)]
1059
+ ped_mechs = ped_df["mechanism"].value_counts()
1060
+ sections.append("### Pediatric Trials by Mechanism\n")
1061
+ sections.append("| Mechanism | Pediatric Trials |")
1062
+ sections.append("|---|---:|")
1063
+ for mech, cnt in ped_mechs.items():
1064
+ sections.append(f"| {mech} | {cnt} |")
1065
+ sections.append("")
1066
+
1067
+ # Age range summary
1068
+ min_ages = min_age_years_col.dropna()
1069
+ max_ages = max_age_years_col.dropna()
1070
+ if len(min_ages) > 0 or len(max_ages) > 0:
1071
+ sections.append("### Age Eligibility Summary\n")
1072
+ if len(min_ages) > 0:
1073
+ sections.append(f"- Minimum age range: {min_ages.min():.0f} \u2013 {min_ages.max():.0f} years (median: {min_ages.median():.0f})")
1074
+ if len(max_ages) > 0:
1075
+ sections.append(f"- Maximum age range: {max_ages.min():.0f} \u2013 {max_ages.max():.0f} years (median: {max_ages.median():.0f})")
1076
+ sections.append("")
1077
+
1078
+ # Gender eligibility
1079
+ sex_vc = sex_col.astype(str).replace("", "Not reported").value_counts()
1080
+ if len(sex_vc) > 0 and not (len(sex_vc) == 1 and "Not reported" in sex_vc.index):
1081
+ sections.append("### Gender Eligibility\n")
1082
+ sections.append("| Sex | Trials |")
1083
+ sections.append("|---|---:|")
1084
+ for s, cnt in sex_vc.items():
1085
+ if str(s) != "nan":
1086
+ sections.append(f"| {s} | {cnt} |")
1087
+ sections.append("")
1088
+ else:
1089
+ sections.append("*Patient population data not available. Re-run compile_trials.py with Phase 2B to populate.*\n")
1090
+
1091
+ # ==========================================
1092
+ # 18. TRIAL ARMS & COMPARATOR ANALYSIS
1093
+ # ==========================================
1094
+ sections.append("---\n")
1095
+ sections.append("## Trial Arms & Comparator Analysis\n")
1096
+ sections.append(
1097
+ "Understanding comparator strategies is critical for competitive positioning. "
1098
+ "Head-to-head trials directly compare therapies and carry outsize strategic "
1099
+ "importance for prescriber adoption and market access.\n"
1100
+ )
1101
+
1102
+ has_placebo_col = _safe_col(trials_df, "has_placebo_arm", default=False)
1103
+ has_active_col = _safe_col(trials_df, "has_active_comparator", default=False)
1104
+ is_h2h_col = _safe_col(trials_df, "is_head_to_head", default=False)
1105
+ n_arms_col = _safe_col(trials_df, "n_arms", default=np.nan)
1106
+
1107
+ has_arms_data = has_placebo_col.astype(bool).any() or n_arms_col.notna().any()
1108
+
1109
+ if has_arms_data:
1110
+ n_placebo = int(has_placebo_col.fillna(False).astype(bool).sum())
1111
+ n_active = int(has_active_col.fillna(False).astype(bool).sum())
1112
+ n_h2h = int(is_h2h_col.fillna(False).astype(bool).sum())
1113
+
1114
+ sections.append("### Comparator Summary\n")
1115
+ sections.append("| Comparator Type | Trials | % |")
1116
+ sections.append("|---|---:|---:|")
1117
+ sections.append(f"| Has placebo arm | {n_placebo} | {n_placebo/n_total*100:.0f}% |")
1118
+ sections.append(f"| Has active comparator | {n_active} | {n_active/n_total*100:.0f}% |")
1119
+ sections.append(f"| Head-to-head | {n_h2h} | {n_h2h/n_total*100:.0f}% |")
1120
+ sections.append("")
1121
+
1122
+ # Number of arms distribution
1123
+ arms_valid = n_arms_col.dropna()
1124
+ if len(arms_valid) > 0:
1125
+ sections.append("### Number of Arms\n")
1126
+ arms_vc = arms_valid.astype(int).value_counts().sort_index()
1127
+ sections.append("| Arms | Trials |")
1128
+ sections.append("|---:|---:|")
1129
+ for n, cnt in arms_vc.items():
1130
+ sections.append(f"| {n} | {cnt} |")
1131
+ sections.append("")
1132
+
1133
+ # Head-to-head trial details
1134
+ if n_h2h > 0:
1135
+ h2h_df = trials_df[is_h2h_col.fillna(False).astype(bool)]
1136
+ sections.append("### Head-to-Head Trials\n")
1137
+ sections.append("| NCT ID | Drug(s) | Mechanism | Sponsor | Phase | Status |")
1138
+ sections.append("|---|---|---|---|---|---|")
1139
+ for _, trial in h2h_df.iterrows():
1140
+ drugs = trial.get("drug_names_str", "")
1141
+ if not drugs or str(drugs) == "nan":
1142
+ drugs = "\u2014"
1143
+ elif len(str(drugs)) > 50:
1144
+ drugs = str(drugs)[:47] + "..."
1145
+ sections.append(
1146
+ f"| {trial['nct_id']} | {drugs} | {trial['mechanism']} | "
1147
+ f"{trial['sponsor_normalized']} | {trial['phase_normalized']} | "
1148
+ f"{_format_status(trial['overall_status'])} |"
1149
+ )
1150
+ sections.append("")
1151
+
1152
+ # Placebo vs active by phase
1153
+ sections.append("### Comparator Strategy by Phase\n")
1154
+ sections.append("| Phase | Placebo | Active | Head-to-Head | Neither |")
1155
+ sections.append("|---|---:|---:|---:|---:|")
1156
+ for phase in phase_order:
1157
+ phase_mask = trials_df["phase_normalized"] == phase
1158
+ if phase_mask.sum() == 0:
1159
+ continue
1160
+ n_ph = int(phase_mask.sum())
1161
+ n_pl = int((phase_mask & has_placebo_col.fillna(False).astype(bool)).sum())
1162
+ n_ac = int((phase_mask & has_active_col.fillna(False).astype(bool)).sum())
1163
+ n_hh = int((phase_mask & is_h2h_col.fillna(False).astype(bool)).sum())
1164
+ n_neither = n_ph - max(n_pl, n_ac)
1165
+ if n_neither < 0:
1166
+ n_neither = 0
1167
+ sections.append(f"| {phase} | {n_pl} | {n_ac} | {n_hh} | {n_neither} |")
1168
+ sections.append("")
1169
+ else:
1170
+ sections.append("*Trial arm/comparator data not available. Re-run compile_trials.py with Phase 2B to populate.*\n")
1171
+
1172
+ # ==========================================
1173
+ # 19. ENROLLMENT & INVESTMENT SIGNALS
1174
+ # ==========================================
1175
+ sections.append("---\n")
1176
+ sections.append("## Enrollment & Investment Signals\n")
1177
+
1178
+ # Use enrollment_clean if available, else fallback to enrollment
1179
+ enroll_col = "enrollment_clean" if "enrollment_clean" in trials_df.columns else "enrollment"
1180
+ if enroll_col == "enrollment_clean":
1181
+ sections.append("*Using cleaned enrollment figures (outliers flagged/removed).*\n")
1182
+
1183
+ # Largest trials
1184
+ large_trials = trials_df[trials_df[enroll_col].notna() & (trials_df[enroll_col] > 0)].sort_values(enroll_col, ascending=False)
1185
+ if len(large_trials) > 0:
1186
+ sections.append("### Largest Trials by Enrollment\n")
1187
+ sections.append(f"*Large enrollment signals significant investment and commercial intent.*\n")
1188
+ sections.append("| NCT ID | Drug(s) | Mechanism | Sponsor | Phase | Enrollment | Status |")
1189
+ sections.append("|---|---|---|---|---|---:|---|")
1190
+ for _, trial in large_trials.head(20).iterrows():
1191
+ drugs = trial.get("drug_names_str", "")
1192
+ if not drugs or str(drugs) == "nan":
1193
+ drugs = "\u2014"
1194
+ elif len(str(drugs)) > 35:
1195
+ drugs = str(drugs)[:32] + "..."
1196
+ enrollment_val = trial[enroll_col]
1197
+ sections.append(
1198
+ f"| {trial['nct_id']} | {drugs} | {trial['mechanism']} | "
1199
+ f"{trial['sponsor_normalized']} | {trial['phase_normalized']} | "
1200
+ f"{int(enrollment_val):,} | {_format_status(trial['overall_status'])} |"
1201
+ )
1202
+ sections.append("")
1203
+
1204
+ # Enrollment by mechanism
1205
+ sections.append("### Total Enrollment by Mechanism\n")
1206
+ mech_enrollment = trials_df.groupby("mechanism")[enroll_col].agg(["sum", "count", "mean"]).sort_values("sum", ascending=False)
1207
+ sections.append("| Mechanism | Total Enrollment | Trials | Avg. per Trial |")
1208
+ sections.append("|---|---:|---:|---:|")
1209
+ for mech, row in mech_enrollment.iterrows():
1210
+ total = f"{int(row['sum']):,}" if pd.notna(row["sum"]) else "\u2014"
1211
+ avg = f"{int(row['mean']):,}" if pd.notna(row["mean"]) else "\u2014"
1212
+ sections.append(f"| {mech} | {total} | {int(row['count'])} | {avg} |")
1213
+ sections.append("")
1214
+
1215
+ # New trials (started recently)
1216
+ recent_starts = trials_df[trials_df["start_year"].notna()].copy()
1217
+ if len(recent_starts) > 0:
1218
+ current_year = now.year
1219
+ new_trials = recent_starts[recent_starts["start_year"] >= current_year - 1]
1220
+ if len(new_trials) > 0:
1221
+ sections.append(f"### Recently Initiated Trials ({current_year - 1}\u2013{current_year})\n")
1222
+ sections.append(f"**{len(new_trials)} trials** started in {current_year - 1}\u2013{current_year}.\n")
1223
+ new_mechs = new_trials["mechanism"].value_counts()
1224
+ for mech, count in new_mechs.items():
1225
+ sections.append(f"- {mech}: {count}")
1226
+ sections.append("")
1227
+
1228
+ # ==========================================
1229
+ # 20. BIOSIMILAR LANDSCAPE
1230
+ # ==========================================
1231
+ sections.append("---\n")
1232
+ sections.append("## Biosimilar Landscape\n")
1233
+ sections.append(
1234
+ "Biosimilar trials erode originator revenue and reshape competitive dynamics. "
1235
+ "Mechanisms with heavy biosimilar activity (e.g., Anti-TNF) face price compression, "
1236
+ "while mechanisms with no biosimilars retain pricing power.\n"
1237
+ )
1238
+
1239
+ biosim_col = _safe_col(trials_df, "is_biosimilar", default=False)
1240
+ has_biosim_data = "is_biosimilar" in trials_df.columns
1241
+
1242
+ if has_biosim_data:
1243
+ n_biosim = int(biosim_col.fillna(False).astype(bool).sum())
1244
+ n_originator = n_total - n_biosim
1245
+
1246
+ sections.append(f"**{n_biosim} biosimilar trials** vs. **{n_originator} originator/novel** trials.\n")
1247
+
1248
+ if n_biosim > 0:
1249
+ biosim_df = trials_df[biosim_col.fillna(False).astype(bool)]
1250
+
1251
+ # Biosimilar by mechanism
1252
+ biosim_mechs = biosim_df["mechanism"].value_counts()
1253
+ sections.append("### Biosimilar Trials by Mechanism\n")
1254
+ sections.append("| Mechanism | Biosimilar Trials | Originator Trials | Biosimilar % |")
1255
+ sections.append("|---|---:|---:|---:|")
1256
+ for mech in ct.index:
1257
+ mech_total = len(trials_df[trials_df["mechanism"] == mech])
1258
+ mech_biosim = int(biosim_mechs.get(mech, 0))
1259
+ mech_orig = mech_total - mech_biosim
1260
+ pct = mech_biosim / mech_total * 100 if mech_total > 0 else 0
1261
+ if mech_biosim > 0 or mech_total >= 3: # Only show mechanisms with biosimilars or enough trials
1262
+ sections.append(f"| {mech} | {mech_biosim} | {mech_orig} | {pct:.0f}% |")
1263
+ sections.append("")
1264
+
1265
+ # Biosimilar sponsors
1266
+ biosim_sponsors = biosim_df["sponsor_normalized"].value_counts()
1267
+ sections.append("### Top Biosimilar Sponsors\n")
1268
+ sections.append("| Sponsor | Biosimilar Trials |")
1269
+ sections.append("|---|---:|")
1270
+ for sponsor, cnt in biosim_sponsors.head(10).items():
1271
+ sections.append(f"| {sponsor} | {cnt} |")
1272
+ sections.append("")
1273
+ else:
1274
+ sections.append("*No biosimilar trials identified in current dataset.*\n")
1275
+ else:
1276
+ sections.append("*Biosimilar classification not available. Re-run compile_trials.py with Phase 2B to populate.*\n")
1277
+
1278
+ # ==========================================
1279
+ # 21. WHITESPACE & UNMET NEEDS
1280
+ # ==========================================
1281
+ sections.append("---\n")
1282
+ sections.append("## Whitespace & Unmet Needs\n")
1283
+ sections.append(
1284
+ "Identifying gaps in the current pipeline highlights opportunities for BD and "
1285
+ "portfolio strategy. Mechanisms without Phase 3 programs, indications with thin "
1286
+ "coverage, and under-tested patient segments represent potential whitespace.\n"
1287
+ )
1288
+
1289
+ # Mechanisms with no Phase 3
1290
+ sections.append("### Mechanisms Without Phase 3 Trials\n")
1291
+ mechs_no_p3 = []
1292
+ for mech in pharma_mechs.index:
1293
+ mech_p3 = len(trials_df[(trials_df["mechanism"] == mech) &
1294
+ (trials_df["phase_normalized"].isin(["Phase 3", "Phase 3/4"]))])
1295
+ if mech_p3 == 0:
1296
+ total_for_mech = int(pharma_mechs[mech])
1297
+ mechs_no_p3.append((mech, total_for_mech))
1298
+
1299
+ if mechs_no_p3:
1300
+ sections.append("| Mechanism | Total Trials | Highest Phase |")
1301
+ sections.append("|---|---:|---|")
1302
+ for mech, total in mechs_no_p3:
1303
+ mech_df_ws = trials_df[trials_df["mechanism"] == mech]
1304
+ highest = mech_df_ws["phase_normalized"].value_counts().index[0] if len(mech_df_ws) > 0 else "\u2014"
1305
+ sections.append(f"| {mech} | {total} | {highest} |")
1306
+ sections.append("")
1307
+ else:
1308
+ sections.append("*All pharmacological mechanisms have at least one Phase 3 trial.*\n")
1309
+
1310
+ # Indication gaps — mechanisms x indication with 0 trials
1311
+ sections.append("### Underserved Mechanism \u00d7 Indication Combinations\n")
1312
+ sections.append(
1313
+ "*Mechanism-indication pairs where no active trials exist represent potential "
1314
+ "first-mover opportunities.*\n"
1315
+ )
1316
+ gaps = []
1317
+ for mech in pharma_mechs.index[:10]: # Top 10 mechanisms
1318
+ mech_df_gap = trials_df[trials_df["mechanism"] == mech]
1319
+ mech_ind_counts = _count_indications(mech_df_gap, indication_cats=indication_cats)
1320
+ for label, _full, cnt in mech_ind_counts:
1321
+ # Skip default/catch-all categories for gap analysis
1322
+ is_default = False
1323
+ if indication_cats:
1324
+ for cat in indication_cats:
1325
+ if cat.get("label") == label and cat.get("is_default"):
1326
+ is_default = True
1327
+ break
1328
+ if not is_default and cnt == 0:
1329
+ gaps.append((mech, label))
1330
+
1331
+ if gaps:
1332
+ sections.append("| Mechanism | Missing Indication |")
1333
+ sections.append("|---|---|")
1334
+ for mech, ind in gaps:
1335
+ sections.append(f"| {mech} | {ind} |")
1336
+ sections.append("")
1337
+ else:
1338
+ sections.append("*No obvious mechanism \u00d7 indication gaps among top mechanisms.*\n")
1339
+
1340
+ # Pediatric gaps
1341
+ if has_pop_data and n_ped > 0:
1342
+ sections.append("### Pediatric Coverage Gaps\n")
1343
+ ped_mechs_set = set(trials_df[is_pediatric_col.fillna(False).astype(bool)]["mechanism"].unique())
1344
+ all_pharma_mechs = set(pharma_mechs.index)
1345
+ no_ped = all_pharma_mechs - ped_mechs_set
1346
+ if no_ped:
1347
+ sections.append("*Mechanisms with NO pediatric trials:*\n")
1348
+ for mech in sorted(no_ped):
1349
+ sections.append(f"- {mech}")
1350
+ sections.append("")
1351
+ else:
1352
+ sections.append("*All pharmacological mechanisms have at least one pediatric trial.*\n")
1353
+
1354
+ # ==========================================
1355
+ # 22. COLLABORATOR NETWORK
1356
+ # ==========================================
1357
+ sections.append("---\n")
1358
+ sections.append("## Collaborator Network\n")
1359
+ sections.append(
1360
+ "Collaborating institutions (listed as collaborators on ClinicalTrials.gov) "
1361
+ f"reveal partnership density and key academic centers driving {disease_short or disease_name} research.\n"
1362
+ )
1363
+
1364
+ collab_col = _safe_col(trials_df, "collaborators_str", default="")
1365
+ has_collab = collab_col.astype(str).str.strip().replace("", pd.NA).dropna().shape[0] > 0
1366
+
1367
+ if has_collab:
1368
+ # Count collaborator appearances
1369
+ collab_counts = {}
1370
+ for val in collab_col:
1371
+ if pd.notna(val) and str(val).strip():
1372
+ for c in str(val).split(";"):
1373
+ c = c.strip()
1374
+ if c:
1375
+ collab_counts[c] = collab_counts.get(c, 0) + 1
1376
+
1377
+ if collab_counts:
1378
+ sorted_collabs = sorted(collab_counts.items(), key=lambda x: x[1], reverse=True)
1379
+ sections.append(f"**{len(sorted_collabs)} unique collaborating institutions** identified.\n")
1380
+
1381
+ sections.append("### Top 20 Collaborators\n")
1382
+ sections.append("| Rank | Collaborator | Trials |")
1383
+ sections.append("|---:|---|---:|")
1384
+ for rank, (collab, cnt) in enumerate(sorted_collabs[:20], 1):
1385
+ sections.append(f"| {rank} | {collab} | {cnt} |")
1386
+ sections.append("")
1387
+
1388
+ # Trials with collaborators vs. without
1389
+ n_with_collab = int(collab_col.astype(str).str.strip().replace("", pd.NA).dropna().shape[0])
1390
+ sections.append(f"- Trials with listed collaborators: {n_with_collab} ({n_with_collab/n_total*100:.0f}%)")
1391
+ sections.append(f"- Trials without collaborators: {n_total - n_with_collab} ({(n_total - n_with_collab)/n_total*100:.0f}%)")
1392
+ sections.append("")
1393
+ else:
1394
+ sections.append("*No collaborator data found.*\n")
1395
+ else:
1396
+ sections.append("*Collaborator data not available. Re-run compile_trials.py with Phase 2B to populate.*\n")
1397
+
1398
+ # ==========================================
1399
+ # 23. REGULATORY SIGNALS
1400
+ # ==========================================
1401
+ sections.append("---\n")
1402
+ sections.append("## Regulatory Signals\n")
1403
+ sections.append(
1404
+ "FDA-regulated status and Data Safety Monitoring Board (DSMB) presence are "
1405
+ "proxies for regulatory seriousness and risk management maturity.\n"
1406
+ )
1407
+
1408
+ fda_col = _safe_col(trials_df, "is_fda_regulated_drug", default=False)
1409
+ dmc_col = _safe_col(trials_df, "has_dmc", default=False)
1410
+ has_reg_data = "is_fda_regulated_drug" in trials_df.columns or "has_dmc" in trials_df.columns
1411
+
1412
+ if has_reg_data:
1413
+ # FDA-regulated breakdown
1414
+ if "is_fda_regulated_drug" in trials_df.columns:
1415
+ n_fda = int(fda_col.fillna(False).astype(bool).sum())
1416
+ n_non_fda = n_total - n_fda
1417
+ sections.append("### FDA-Regulated Drug Trials\n")
1418
+ sections.append(f"| | Trials | % |")
1419
+ sections.append(f"|---|---:|---:|")
1420
+ sections.append(f"| FDA-regulated | {n_fda} | {n_fda/n_total*100:.0f}% |")
1421
+ sections.append(f"| Non-FDA-regulated | {n_non_fda} | {n_non_fda/n_total*100:.0f}% |")
1422
+ sections.append("")
1423
+
1424
+ # FDA by mechanism
1425
+ sections.append("### FDA-Regulated by Mechanism\n")
1426
+ sections.append("| Mechanism | FDA-Regulated | Non-FDA | % FDA |")
1427
+ sections.append("|---|---:|---:|---:|")
1428
+ for mech in ct.index:
1429
+ mech_mask = trials_df["mechanism"] == mech
1430
+ mech_n = int(mech_mask.sum())
1431
+ mech_fda = int((mech_mask & fda_col.fillna(False).astype(bool)).sum())
1432
+ pct = mech_fda / mech_n * 100 if mech_n > 0 else 0
1433
+ sections.append(f"| {mech} | {mech_fda} | {mech_n - mech_fda} | {pct:.0f}% |")
1434
+ sections.append("")
1435
+
1436
+ # DSMB breakdown
1437
+ if "has_dmc" in trials_df.columns:
1438
+ n_dmc = int(dmc_col.fillna(False).astype(bool).sum())
1439
+ sections.append("### Data Safety Monitoring Board (DSMB)\n")
1440
+ sections.append(f"| | Trials | % |")
1441
+ sections.append(f"|---|---:|---:|")
1442
+ sections.append(f"| Has DSMB | {n_dmc} | {n_dmc/n_total*100:.0f}% |")
1443
+ sections.append(f"| No DSMB | {n_total - n_dmc} | {(n_total - n_dmc)/n_total*100:.0f}% |")
1444
+ sections.append("")
1445
+
1446
+ # DSMB rate by phase
1447
+ sections.append("### DSMB Rate by Phase\n")
1448
+ sections.append("| Phase | Has DSMB | Total | % DSMB |")
1449
+ sections.append("|---|---:|---:|---:|")
1450
+ for phase in phase_order:
1451
+ phase_mask = trials_df["phase_normalized"] == phase
1452
+ if phase_mask.sum() == 0:
1453
+ continue
1454
+ n_ph = int(phase_mask.sum())
1455
+ n_dmc_ph = int((phase_mask & dmc_col.fillna(False).astype(bool)).sum())
1456
+ pct = n_dmc_ph / n_ph * 100 if n_ph > 0 else 0
1457
+ sections.append(f"| {phase} | {n_dmc_ph} | {n_ph} | {pct:.0f}% |")
1458
+ sections.append("")
1459
+ else:
1460
+ sections.append("*Regulatory signal data not available. Re-run compile_trials.py with Phase 2B to populate.*\n")
1461
+
1462
+ # ==========================================
1463
+ # 24. DATA NOTES
1464
+ # ==========================================
1465
+ sections.append("---\n")
1466
+ sections.append("## Data Notes\n")
1467
+
1468
+ sections.append(f"- **Source:** ClinicalTrials.gov API v2 (free, no authentication)")
1469
+ sections.append(f"- **Query date:** {now.strftime('%Y-%m-%d')}")
1470
+
1471
+ if parameters:
1472
+ conditions = parameters.get("conditions", [])
1473
+ if conditions:
1474
+ sections.append(f"- **Conditions searched:** {', '.join(conditions)}")
1475
+ statuses = parameters.get("statuses", [])
1476
+ if statuses:
1477
+ sections.append(f"- **Status filter:** {', '.join(_format_status(s) for s in statuses)}")
1478
+ highlight_mech = parameters.get("highlight_mechanism")
1479
+ if highlight_mech:
1480
+ sections.append(f"- **Mechanism focus:** {highlight_mech}")
1481
+ highlight_sponsor = parameters.get("highlight_sponsor")
1482
+ if highlight_sponsor:
1483
+ sections.append(f"- **Sponsor focus:** {highlight_sponsor}")
1484
+
1485
+ unclassified = trials_df[trials_df["mechanism"].isin(["Unclassified", "Other Biologic", "Small Molecule (Other)"])]
1486
+ if len(unclassified) > 0:
1487
+ sections.append(
1488
+ f"- **Unclassified trials:** {len(unclassified)} ({len(unclassified)/n_total*100:.0f}% of total) "
1489
+ f"\u2014 trials with generic or unrecognized interventions"
1490
+ )
1491
+ sections.append(f"- **Classification method:** Pattern matching on intervention names, descriptions, and trial titles")
1492
+ sections.append(f"- **Limitations:** Mechanism classification is automated and may misclassify novel or ambiguous interventions. "
1493
+ f"Completion dates are sponsor-reported estimates and may change.")
1494
+ if "enrollment_clean" in trials_df.columns:
1495
+ n_outliers = int(_safe_col(trials_df, "enrollment_outlier", default=False).fillna(False).astype(bool).sum())
1496
+ if n_outliers > 0:
1497
+ sections.append(f"- **Enrollment cleaning:** {n_outliers} enrollment outlier(s) flagged and excluded from summary statistics")
1498
+ sections.append("")
1499
+
1500
+ # ==========================================
1501
+ # BUILD AND WRITE
1502
+ # ==========================================
1503
+ report = "\n".join(sections)
1504
+
1505
+ with open(output_file, "w") as f:
1506
+ f.write(report)
1507
+
1508
+ print(f"\u2713 Markdown report generated: {output_file}")
1509
+ return report
1510
+
1511
+
1512
+ # ============================================================
1513
+ # HELPER FUNCTIONS
1514
+ # ============================================================
1515
+
1516
+ def _safe_col_val(row, col, default=""):
1517
+ """Safely get a value from a row (Series), returning default if key missing."""
1518
+ try:
1519
+ val = row[col]
1520
+ if pd.isna(val):
1521
+ return default
1522
+ return val
1523
+ except (KeyError, IndexError):
1524
+ return default
1525
+
1526
+
1527
+ def _get_enrollment(trial):
1528
+ """
1529
+ Get enrollment from a trial row, preferring enrollment_clean over enrollment.
1530
+
1531
+ Returns the numeric value or NaN.
1532
+ """
1533
+ for col in ("enrollment_clean", "enrollment"):
1534
+ try:
1535
+ val = trial[col]
1536
+ if pd.notna(val) and val > 0:
1537
+ return val
1538
+ except (KeyError, IndexError):
1539
+ continue
1540
+ return np.nan
1541
+
1542
+
1543
+ def _format_status(status):
1544
+ """Convert API status to readable format."""
1545
+ status_map = {
1546
+ "RECRUITING": "Recruiting",
1547
+ "ACTIVE_NOT_RECRUITING": "Active, not recruiting",
1548
+ "NOT_YET_RECRUITING": "Not yet recruiting",
1549
+ "ENROLLING_BY_INVITATION": "Enrolling by invitation",
1550
+ "COMPLETED": "Completed",
1551
+ "SUSPENDED": "Suspended",
1552
+ "TERMINATED": "Terminated",
1553
+ "WITHDRAWN": "Withdrawn",
1554
+ }
1555
+ return status_map.get(status, status)
1556
+
1557
+
1558
+ def _format_date(date_str):
1559
+ """Format date string to readable format, return dash if empty."""
1560
+ if not date_str or str(date_str) == "nan":
1561
+ return "\u2014"
1562
+ # Clean up: just return the date as-is but shorter
1563
+ date_str = str(date_str).strip()
1564
+ if len(date_str) > 10:
1565
+ date_str = date_str[:10]
1566
+ return date_str if date_str else "\u2014"
1567
+
1568
+
1569
+ def _time_to_readout(completion_date):
1570
+ """Calculate time from now to expected completion."""
1571
+ if not completion_date or str(completion_date) == "nan":
1572
+ return "\u2014"
1573
+ try:
1574
+ date_str = str(completion_date).strip()
1575
+ # Parse various date formats
1576
+ for fmt in ("%Y-%m-%d", "%Y-%m", "%Y"):
1577
+ try:
1578
+ dt = datetime.strptime(date_str[:len(fmt.replace("%", "").replace("-", "") + date_str[:4])], fmt)
1579
+ break
1580
+ except ValueError:
1581
+ continue
1582
+ else:
1583
+ # Try simple parse
1584
+ if len(date_str) >= 7:
1585
+ dt = datetime.strptime(date_str[:7], "%Y-%m")
1586
+ elif len(date_str) >= 4:
1587
+ dt = datetime.strptime(date_str[:4], "%Y")
1588
+ else:
1589
+ return "\u2014"
1590
+
1591
+ delta = dt - datetime.now()
1592
+ months = delta.days / 30.44
1593
+ if months < 0:
1594
+ return "Overdue"
1595
+ elif months < 1:
1596
+ return "<1 month"
1597
+ elif months < 12:
1598
+ return f"~{int(months)} months"
1599
+ else:
1600
+ years = months / 12
1601
+ return f"~{years:.1f} years"
1602
+ except (ValueError, TypeError):
1603
+ return "\u2014"
1604
+
1605
+
1606
+ def _get_upcoming_readouts(trials_df, months=18):
1607
+ """Get trials with completion dates within the next N months."""
1608
+ now = datetime.now()
1609
+ cutoff = now + timedelta(days=months * 30.44)
1610
+ upcoming = []
1611
+
1612
+ for _, trial in trials_df.iterrows():
1613
+ cd = trial.get("completion_date", "")
1614
+ if not cd or str(cd) == "nan":
1615
+ continue
1616
+ try:
1617
+ date_str = str(cd).strip()
1618
+ if len(date_str) >= 10:
1619
+ dt = datetime.strptime(date_str[:10], "%Y-%m-%d")
1620
+ elif len(date_str) >= 7:
1621
+ dt = datetime.strptime(date_str[:7], "%Y-%m")
1622
+ elif len(date_str) >= 4:
1623
+ dt = datetime.strptime(date_str[:4], "%Y")
1624
+ else:
1625
+ continue
1626
+
1627
+ if now <= dt <= cutoff:
1628
+ upcoming.append(trial)
1629
+ except (ValueError, TypeError):
1630
+ continue
1631
+
1632
+ if not upcoming:
1633
+ return pd.DataFrame()
1634
+
1635
+ result = pd.DataFrame(upcoming)
1636
+ # Sort by completion date
1637
+ result = result.sort_values("completion_date")
1638
+ return result
1639
+
1640
+
1641
+ def _extract_drug_table(df, indication_cats=None):
1642
+ """Extract a drug-level pipeline table from trials DataFrame."""
1643
+ records = []
1644
+ seen_drugs = set()
1645
+
1646
+ for _, trial in df.iterrows():
1647
+ drugs_str = trial.get("drug_names_str", "")
1648
+ if not drugs_str or str(drugs_str) == "nan":
1649
+ continue
1650
+
1651
+ drugs = [d.strip() for d in str(drugs_str).split(";") if d.strip()]
1652
+
1653
+ for drug in drugs:
1654
+ # Skip placebo, generic names
1655
+ drug_lower = drug.lower()
1656
+ if any(skip in drug_lower for skip in ["placebo", "saline", "sodium chloride", "standard of care",
1657
+ "standard care", "usual care", "sham", "no intervention"]):
1658
+ continue
1659
+
1660
+ # Deduplicate by drug + sponsor
1661
+ key = (drug_lower, trial.get("sponsor_normalized", ""))
1662
+ if key in seen_drugs:
1663
+ continue
1664
+ seen_drugs.add(key)
1665
+
1666
+ enrollment_val = _get_enrollment(trial)
1667
+ enrollment_str = f"{int(enrollment_val):,}" if pd.notna(enrollment_val) and enrollment_val > 0 else "\u2014"
1668
+ indication = _extract_indication(trial.get("conditions_str", ""), indication_cats=indication_cats)
1669
+ completion_str = _format_date(trial.get("completion_date", ""))
1670
+
1671
+ records.append({
1672
+ "drug": drug[:40],
1673
+ "mechanism": trial.get("mechanism", ""),
1674
+ "sponsor": trial.get("sponsor_normalized", ""),
1675
+ "phase": trial.get("phase_normalized", ""),
1676
+ "status": _format_status(trial.get("overall_status", "")),
1677
+ "indication": indication,
1678
+ "enrollment": enrollment_str,
1679
+ "completion": completion_str,
1680
+ "phase_numeric": trial.get("phase_numeric", 0),
1681
+ })
1682
+
1683
+ if not records:
1684
+ return pd.DataFrame()
1685
+
1686
+ result = pd.DataFrame(records)
1687
+ result = result.sort_values(["phase_numeric", "drug"], ascending=[False, True])
1688
+ return result.drop(columns=["phase_numeric"])
1689
+
1690
+
1691
+ def _get_unique_drugs(df):
1692
+ """Extract unique meaningful drug names from a trials DataFrame."""
1693
+ drugs = set()
1694
+ skip_terms = {"placebo", "saline", "sodium chloride", "standard of care", "standard care",
1695
+ "usual care", "sham", "no intervention", "data collection"}
1696
+ for _, trial in df.iterrows():
1697
+ drugs_str = trial.get("drug_names_str", "")
1698
+ if not drugs_str or str(drugs_str) == "nan":
1699
+ continue
1700
+ for d in str(drugs_str).split(";"):
1701
+ d = d.strip()
1702
+ if d and not any(skip in d.lower() for skip in skip_terms):
1703
+ drugs.add(d)
1704
+ return sorted(drugs)
1705
+
1706
+
1707
+ def _extract_indication(conditions_str, indication_cats=None):
1708
+ """
1709
+ Extract simplified indication from conditions string.
1710
+
1711
+ Parameters
1712
+ ----------
1713
+ conditions_str : str
1714
+ The trial's conditions string.
1715
+ indication_cats : list of dict, optional
1716
+ Config-driven indication categories. Each dict has 'label',
1717
+ 'pattern' (substring match), and optionally 'is_default'.
1718
+ When None, falls back to returning the raw conditions (truncated).
1719
+ """
1720
+ if not conditions_str or str(conditions_str) == "nan":
1721
+ return "\u2014"
1722
+ conditions_lower = str(conditions_str).lower()
1723
+
1724
+ if indication_cats:
1725
+ matched = []
1726
+ for cat in indication_cats:
1727
+ if cat.get("is_default"):
1728
+ continue
1729
+ if cat.get("pattern") and cat["pattern"].lower() in conditions_lower:
1730
+ matched.append(cat["label"])
1731
+ if matched:
1732
+ return " + ".join(matched)
1733
+ # Check for default category
1734
+ for cat in indication_cats:
1735
+ if cat.get("is_default"):
1736
+ return cat["label"]
1737
+ return "Other"
1738
+ else:
1739
+ # No config: return truncated conditions
1740
+ raw = str(conditions_str).strip()
1741
+ return raw[:40] if len(raw) > 40 else raw if raw else "Other"
1742
+
1743
+
1744
+ def _count_indications(df, indication_cats=None):
1745
+ """
1746
+ Count trials per indication category.
1747
+
1748
+ Parameters
1749
+ ----------
1750
+ df : pd.DataFrame
1751
+ Trials DataFrame.
1752
+ indication_cats : list of dict, optional
1753
+ Config-driven indication categories. When None, falls back to
1754
+ counting top conditions from the data.
1755
+
1756
+ Returns
1757
+ -------
1758
+ list of (label, full_name, count) tuples if indication_cats provided,
1759
+ or (uc_count, cd_count, other_count) tuple for backward compatibility.
1760
+ """
1761
+ if indication_cats:
1762
+ counts = []
1763
+ matched_indices = set()
1764
+ for cat in indication_cats:
1765
+ if cat.get("is_default"):
1766
+ continue
1767
+ label = cat["label"]
1768
+ full_name = cat.get("full_name", label)
1769
+ pattern = cat.get("pattern", "").lower()
1770
+ cat_count = 0
1771
+ for idx, trial in df.iterrows():
1772
+ cond = str(trial.get("conditions_str", "")).lower()
1773
+ if pattern and pattern in cond:
1774
+ cat_count += 1
1775
+ matched_indices.add(idx)
1776
+ counts.append((label, full_name, cat_count))
1777
+ # Handle default category (remaining unmatched)
1778
+ for cat in indication_cats:
1779
+ if cat.get("is_default"):
1780
+ label = cat["label"]
1781
+ full_name = cat.get("full_name", label)
1782
+ default_count = len(df) - len(matched_indices)
1783
+ counts.append((label, full_name, default_count))
1784
+ break
1785
+ return counts
1786
+ else:
1787
+ # Fallback: no config, return top conditions from data
1788
+ cond_counts = {}
1789
+ for _, trial in df.iterrows():
1790
+ cond = str(trial.get("conditions_str", "")).strip()
1791
+ if cond and cond != "nan":
1792
+ # Split semicolon-separated conditions
1793
+ for c in cond.split(";"):
1794
+ c = c.strip()
1795
+ if c:
1796
+ cond_counts[c] = cond_counts.get(c, 0) + 1
1797
+ # Return as list of (label, full_name, count) tuples
1798
+ sorted_conds = sorted(cond_counts.items(), key=lambda x: x[1], reverse=True)
1799
+ return [(c, c, n) for c, n in sorted_conds[:10]]
1800
+
1801
+
1802
+ def _add_indication_split(sections, df, indication_cats=None):
1803
+ """Add indication split analysis to sections."""
1804
+ counts = _count_indications(df, indication_cats=indication_cats)
1805
+ total = len(df)
1806
+ for entry in counts:
1807
+ label, full_name, cnt = entry
1808
+ display = f"{full_name} ({label})" if label != full_name else label
1809
+ if total > 0:
1810
+ sections.append(f"- {display}: {cnt} ({cnt/total*100:.0f}%)")
1811
+ else:
1812
+ sections.append(f"- {display}: 0")
1813
+ sections.append("")