@brainpilot/skills 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/package.json +2 -2
  2. package/skills/01_Meta-Skills/academic-research-hub/SKILL.md +108 -0
  3. package/skills/01_Meta-Skills/academic-research-hub/scripts/requirements.txt +17 -0
  4. package/skills/01_Meta-Skills/academic-research-hub/scripts/research.py +781 -0
  5. package/skills/01_Meta-Skills/beautiful-log/SKILL.md +64 -0
  6. package/skills/01_Meta-Skills/beautiful-log/scripts/beautiful_log.py +274 -0
  7. package/skills/01_Meta-Skills/ethoclaw-daily-paper/SKILL.md +130 -0
  8. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/config.template.yaml +54 -0
  9. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/top5_digest_template.md +5 -0
  10. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/build_top5_digest.py +300 -0
  11. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/common.py +137 -0
  12. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/merge_results.py +106 -0
  13. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/run_pipeline.py +177 -0
  14. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_arxiv.py +162 -0
  15. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_pubmed.py +202 -0
  16. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/SKILL.md +173 -0
  17. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/scripts/normalize_data.py +874 -0
  18. package/skills/01_Meta-Skills/ethoclaw-pdf-research/SKILL.md +134 -0
  19. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/confirmation-prompts.md +31 -0
  20. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/output-patterns.md +45 -0
  21. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_markdown_deliverables.py +41 -0
  22. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_research_log.py +84 -0
  23. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_summary_md.py +63 -0
  24. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/extract_pdf_bundle.py +140 -0
  25. package/skills/01_Meta-Skills/experiment-controller/SKILL.md +140 -0
  26. package/skills/01_Meta-Skills/knowledge-graph-builder/SKILL.md +366 -0
  27. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/entity_resolution.py +120 -0
  28. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/extraction_prompt_template.txt +19 -0
  29. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/graph_query.py +106 -0
  30. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/hypothesis_cli_reference.py +42 -0
  31. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/new_data_source_template.py +116 -0
  32. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/requirements.txt +15 -0
  33. package/skills/01_Meta-Skills/method-design/SKILL.md +61 -0
  34. package/skills/01_Meta-Skills/multi-search-engine/SKILL.md +119 -0
  35. package/skills/01_Meta-Skills/research-idea/SKILL.md +65 -0
  36. package/skills/05_EEG_ERP/eeg-skill/SKILL.md +197 -0
  37. package/skills/05_EEG_ERP/meg-skill/SKILL.md +188 -0
  38. package/skills/05_EEG_ERP/meg-skill/scripts/time_frequency.py +223 -0
  39. package/skills/05_EEG_ERP/mne-eeg-tool/SKILL.md +165 -0
  40. package/skills/05_EEG_ERP/mne-eeg-tool/scripts/eeg_pipeline_reference.py +231 -0
  41. package/skills/05_EEG_ERP/seed-iv-skill/SKILL.md +184 -0
  42. package/skills/05_EEG_ERP/seed-iv-skill/scripts/classify_seed_iv.py +154 -0
  43. package/skills/05_EEG_ERP/seed-iv-skill/scripts/extract_seed_iv_features.py +190 -0
  44. package/skills/05_EEG_ERP/seed-iv-skill/scripts/validate_seed_iv.py +102 -0
  45. package/skills/05_EEG_ERP/seed-vig-skill/SKILL.md +182 -0
  46. package/skills/05_EEG_ERP/seed-vig-skill/scripts/classify_seed_vig.py +165 -0
  47. package/skills/05_EEG_ERP/seed-vig-skill/scripts/extract_seed_vig_features.py +185 -0
  48. package/skills/05_EEG_ERP/seed-vig-skill/scripts/validate_seed_vig.py +88 -0
  49. package/skills/06_fMRI_Neuroimaging/abcd-skill/SKILL.md +308 -0
  50. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/abcd_qc_summary.py +449 -0
  51. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/extract_abcd_phenotype.py +292 -0
  52. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/reorganize_abcd.py +387 -0
  53. package/skills/06_fMRI_Neuroimaging/abide-skill/SKILL.md +302 -0
  54. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/abide_qc_summary.py +317 -0
  55. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/extract_abide_phenotype.py +267 -0
  56. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/reorganize_abide.py +387 -0
  57. package/skills/06_fMRI_Neuroimaging/adhd200-skill/SKILL.md +244 -0
  58. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/adhd200_qc_summary.py +98 -0
  59. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/extract_adhd200_phenotype.py +134 -0
  60. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/reorganize_adhd200.py +206 -0
  61. package/skills/06_fMRI_Neuroimaging/adni-skill/SKILL.md +358 -0
  62. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_adni_task_files.py +1305 -0
  63. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_vqa_from_tasks.py +766 -0
  64. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/reorganize_adni.py +491 -0
  65. package/skills/06_fMRI_Neuroimaging/aibl-skill/SKILL.md +295 -0
  66. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/aibl_qc_summary.py +260 -0
  67. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/extract_aibl_phenotype.py +365 -0
  68. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/reorganize_aibl.py +394 -0
  69. package/skills/06_fMRI_Neuroimaging/aomic-skill/SKILL.md +292 -0
  70. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/aomic_qc_summary.py +258 -0
  71. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/extract_aomic_phenotype.py +284 -0
  72. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/reorganize_aomic.py +322 -0
  73. package/skills/06_fMRI_Neuroimaging/asl-skill/SKILL.md +168 -0
  74. package/skills/06_fMRI_Neuroimaging/asl-skill/scripts/compute_cbf.py +224 -0
  75. package/skills/06_fMRI_Neuroimaging/bids-organizer/SKILL.md +241 -0
  76. package/skills/06_fMRI_Neuroimaging/bold5000-skill/SKILL.md +186 -0
  77. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/bold5000_qc_summary.py +96 -0
  78. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/extract_bold5000_stimulus.py +125 -0
  79. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/reorganize_bold5000.py +102 -0
  80. package/skills/06_fMRI_Neuroimaging/camcan-skill/SKILL.md +213 -0
  81. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/camcan_qc_summary.py +131 -0
  82. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/extract_camcan_phenotype.py +145 -0
  83. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/validate_camcan.py +141 -0
  84. package/skills/06_fMRI_Neuroimaging/cobre-skill/SKILL.md +201 -0
  85. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/cobre_qc_summary.py +95 -0
  86. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/extract_cobre_phenotype.py +104 -0
  87. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/reorganize_cobre.py +140 -0
  88. package/skills/06_fMRI_Neuroimaging/conn-tool/SKILL.md +180 -0
  89. package/skills/06_fMRI_Neuroimaging/dcm2nii/SKILL.md +189 -0
  90. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/SKILL.md +183 -0
  91. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/dmt_har_med_qc_summary.py +96 -0
  92. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/extract_dmt_har_med_phenotype.py +121 -0
  93. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/reorganize_dmt_har_med.py +125 -0
  94. package/skills/06_fMRI_Neuroimaging/dwi-skill/SKILL.md +359 -0
  95. package/skills/06_fMRI_Neuroimaging/fmri-skill/SKILL.md +371 -0
  96. package/skills/06_fMRI_Neuroimaging/fmriprep-tool/SKILL.md +228 -0
  97. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/SKILL.md +286 -0
  98. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/scripts/freesurfer_processor.py +145 -0
  99. package/skills/06_fMRI_Neuroimaging/fsl-tool/SKILL.md +208 -0
  100. package/skills/06_fMRI_Neuroimaging/hbn-skill/SKILL.md +271 -0
  101. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/extract_hbn_phenotype.py +107 -0
  102. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/hbn_qc_summary.py +96 -0
  103. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/reorganize_hbn.py +150 -0
  104. package/skills/06_fMRI_Neuroimaging/hcpa-skill/SKILL.md +210 -0
  105. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/extract_hcpa_phenotype.py +146 -0
  106. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/hcpa_qc_summary.py +120 -0
  107. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/reorganize_hcpa.py +155 -0
  108. package/skills/06_fMRI_Neuroimaging/hcpd-skill/SKILL.md +210 -0
  109. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/extract_hcpd_phenotype.py +148 -0
  110. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/hcpd_qc_summary.py +125 -0
  111. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/reorganize_hcpd.py +146 -0
  112. package/skills/06_fMRI_Neuroimaging/hcpep-skill/SKILL.md +215 -0
  113. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/extract_hcpep_phenotype.py +157 -0
  114. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/hcpep_qc_summary.py +143 -0
  115. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/reorganize_hcpep.py +146 -0
  116. package/skills/06_fMRI_Neuroimaging/hcppipeline-tool/SKILL.md +217 -0
  117. package/skills/06_fMRI_Neuroimaging/hcpya-skill/SKILL.md +214 -0
  118. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/extract_hcpya_phenotype.py +190 -0
  119. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/hcpya_qc_summary.py +152 -0
  120. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/reorganize_hcpya.py +203 -0
  121. package/skills/06_fMRI_Neuroimaging/ixi-skill/SKILL.md +198 -0
  122. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/ixi_qc_summary.py +137 -0
  123. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/reorganize_ixi.py +190 -0
  124. package/skills/06_fMRI_Neuroimaging/mnd-skill/SKILL.md +191 -0
  125. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/extract_mnd_phenotype.py +143 -0
  126. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/mnd_qc_summary.py +120 -0
  127. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/validate_mnd.py +107 -0
  128. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/SKILL.md +203 -0
  129. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/analyze_lesions.py +119 -0
  130. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/longitudinal_lesion.py +148 -0
  131. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/mschallenge_qc_summary.py +132 -0
  132. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/validate_mschallenge.py +116 -0
  133. package/skills/06_fMRI_Neuroimaging/nibabel-skill/SKILL.md +184 -0
  134. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/atlas_coordinate_reference.py +61 -0
  135. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/freesurfer_io_reference.py +34 -0
  136. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/nifti_inspection_reference.py +35 -0
  137. package/skills/06_fMRI_Neuroimaging/nifd-skill/SKILL.md +205 -0
  138. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/extract_nifd_phenotype.py +132 -0
  139. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/nifd_qc_summary.py +111 -0
  140. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/validate_nifd.py +111 -0
  141. package/skills/06_fMRI_Neuroimaging/nii2dcm/SKILL.md +143 -0
  142. package/skills/06_fMRI_Neuroimaging/nilearn-tool/SKILL.md +266 -0
  143. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/connectome_reference.py +65 -0
  144. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/denoise_timeseries_reference.py +58 -0
  145. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/hierarchical_parcellation_reference.py +53 -0
  146. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/kmeans_parcellation_reference.py +53 -0
  147. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/preprocess_bold_reference.py +76 -0
  148. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_dictlearning_reference.py +56 -0
  149. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_ica_reference.py +59 -0
  150. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/second_level_glm_reference.py +58 -0
  151. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/spacenet_classifier_reference.py +59 -0
  152. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/svm_classifier_reference.py +60 -0
  153. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/task_glm_reference.py +63 -0
  154. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/zalff_summary_reference.py +109 -0
  155. package/skills/06_fMRI_Neuroimaging/nsd-skill/SKILL.md +210 -0
  156. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/extract_nsd_stimulus.py +171 -0
  157. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/nsd_qc_summary.py +142 -0
  158. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/validate_nsd.py +142 -0
  159. package/skills/06_fMRI_Neuroimaging/oasis-skill/SKILL.md +205 -0
  160. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/extract_oasis_phenotype.py +126 -0
  161. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/oasis_qc_summary.py +115 -0
  162. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/validate_oasis.py +119 -0
  163. package/skills/06_fMRI_Neuroimaging/pet-skill/SKILL.md +173 -0
  164. package/skills/06_fMRI_Neuroimaging/pet-skill/scripts/compute_suvr.py +202 -0
  165. package/skills/06_fMRI_Neuroimaging/pnc-skill/SKILL.md +206 -0
  166. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/extract_pnc_phenotype.py +136 -0
  167. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/pnc_qc_summary.py +116 -0
  168. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/validate_pnc.py +120 -0
  169. package/skills/06_fMRI_Neuroimaging/ppmi-skill/SKILL.md +209 -0
  170. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/extract_ppmi_phenotype.py +138 -0
  171. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/ppmi_qc_summary.py +111 -0
  172. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/validate_ppmi.py +117 -0
  173. package/skills/06_fMRI_Neuroimaging/qsiprep-tool/SKILL.md +320 -0
  174. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/SKILL.md +215 -0
  175. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/extract_rest_mdd_phenotype.py +132 -0
  176. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/harmonize_sites.py +152 -0
  177. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/rest_mdd_qc_summary.py +124 -0
  178. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/validate_rest_mdd.py +103 -0
  179. package/skills/06_fMRI_Neuroimaging/smri-skill/SKILL.md +302 -0
  180. package/skills/06_fMRI_Neuroimaging/tcp-skill/SKILL.md +204 -0
  181. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/extract_tcp_phenotype.py +139 -0
  182. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/tcp_qc_summary.py +111 -0
  183. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/validate_tcp.py +99 -0
  184. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/SKILL.md +217 -0
  185. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/extract_ucla_cnp_phenotype.py +145 -0
  186. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/ucla_cnp_qc_summary.py +111 -0
  187. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/validate_ucla_cnp.py +113 -0
  188. package/skills/06_fMRI_Neuroimaging/ukb-skill/SKILL.md +310 -0
  189. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/build_ukb_survival.py +210 -0
  190. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_cases.py +308 -0
  191. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_phenotype.py +232 -0
  192. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/ukb_qc_summary.py +158 -0
  193. package/skills/06_fMRI_Neuroimaging/wmh-segmentation/SKILL.md +133 -0
  194. package/skills/07_Computational_Modeling/detrending/SKILL.md +118 -0
  195. package/skills/07_Computational_Modeling/dictlearning/SKILL.md +122 -0
  196. package/skills/07_Computational_Modeling/filtering/SKILL.md +121 -0
  197. package/skills/07_Computational_Modeling/glm/SKILL.md +153 -0
  198. package/skills/07_Computational_Modeling/hierarchical/SKILL.md +121 -0
  199. package/skills/07_Computational_Modeling/ica/SKILL.md +122 -0
  200. package/skills/07_Computational_Modeling/kmeans/SKILL.md +119 -0
  201. package/skills/07_Computational_Modeling/run_models/SKILL.md +427 -0
  202. package/skills/07_Computational_Modeling/spacenet/SKILL.md +122 -0
  203. package/skills/07_Computational_Modeling/svm/SKILL.md +120 -0
  204. package/skills/08_Computational_Neuroscience/brain_gnn/SKILL.md +183 -0
  205. package/skills/08_Computational_Neuroscience/dipy-tool/SKILL.md +239 -0
  206. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/dti_metrics_reference.py +70 -0
  207. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/load_and_mask_reference.py +76 -0
  208. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/roi_stats_reference.py +59 -0
  209. package/skills/08_Computational_Neuroscience/fm_app/SKILL.md +195 -0
  210. package/skills/08_Computational_Neuroscience/neurostorm/SKILL.md +151 -0
  211. package/skills/13_Visualization/brain-visualization/SKILL.md +191 -0
  212. package/skills/13_Visualization/brain-visualization/scripts/connectome_reference.py +108 -0
  213. package/skills/13_Visualization/brain-visualization/scripts/freesurfer_ply_reference.py +54 -0
  214. package/skills/13_Visualization/brain-visualization/scripts/zalff_summary_reference.py +116 -0
  215. package/skills/13_Visualization/ethoclaw-paper-figure-layout/SKILL.md +78 -0
  216. package/skills/13_Visualization/ethoclaw-paper-figure-layout/assets/naturecomm_figures.tex +74 -0
  217. package/skills/13_Visualization/ethoclaw-paper-figure-layout/scripts/layout_results_foldered.py +579 -0
  218. package/skills/14_Writing/overleaf-skill/SKILL.md +184 -0
  219. package/skills/14_Writing/overleaf-skill/scripts/install.sh +30 -0
  220. package/skills/14_Writing/paper-writing/SKILL.md +146 -0
  221. package/skills/14_Writing/paper-writing/scripts/data_statement_templates.py +164 -0
  222. package/skills/14_Writing/paper-writing/scripts/figure_templates.py +315 -0
  223. package/skills/14_Writing/paper-writing/scripts/nature_figure_style.py +214 -0
  224. package/skills/14_Writing/paper-writing/scripts/section_phrasebank.py +246 -0
  225. package/skills/16_Animal_Behavior/deeplabcut/SKILL.md +154 -0
  226. package/skills/16_Animal_Behavior/deeplabcut/references/3d-pose.md +89 -0
  227. package/skills/16_Animal_Behavior/deeplabcut/references/maDLC.md +123 -0
  228. package/skills/16_Animal_Behavior/deeplabcut/references/modelzoo.md +98 -0
  229. package/skills/16_Animal_Behavior/deeplabcut/references/standard-pipeline.md +165 -0
  230. package/skills/16_Animal_Behavior/deeplabcut/references/utilities.md +146 -0
  231. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/SKILL.md +274 -0
  232. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.html +112 -0
  233. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.md +21 -0
  234. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/cluster-section.md +5 -0
  235. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/heatmap-section.md +5 -0
  236. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/integrated-interpretation.md +3 -0
  237. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/overview.md +3 -0
  238. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/project-summary.md +3 -0
  239. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/radar-section.md +5 -0
  240. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/raw-trajectory.md +3 -0
  241. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/sample-check.md +3 -0
  242. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/single-subject-section.md +3 -0
  243. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/stats-section.md +5 -0
  244. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/epm.md +52 -0
  245. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/fst.md +37 -0
  246. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/nor.md +39 -0
  247. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/oft.md +43 -0
  248. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tcst.md +45 -0
  249. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tst.md +36 -0
  250. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/input-types.md +59 -0
  251. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/interpretation-guardrails.md +45 -0
  252. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/metadata-schema.md +57 -0
  253. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/report-sections.md +86 -0
  254. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/section-selection-rules.md +169 -0
  255. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/build_report_manifest.py +27 -0
  256. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/render_report.py +34 -0
  257. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/report_utils.py +1121 -0
  258. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/SKILL.md +390 -0
  259. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/reference_code.py +98 -0
  260. package/skills/16_Animal_Behavior/ethoclaw-animal-pose-estimation/SKILL.md +336 -0
  261. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/README.md +21 -0
  262. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/SKILL.md +41 -0
  263. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/batch_kinematic_generator.py +663 -0
  264. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/config.json +19 -0
  265. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/generate_kinematic_parameter.py +401 -0
  266. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/kinematic_generator.py +265 -0
  267. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/SKILL.md +72 -0
  268. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/references/config.example.toml +56 -0
  269. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params.py +232 -0
  270. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params_from_config.py +236 -0
  271. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/SKILL.md +68 -0
  272. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/references/notes.md +5 -0
  273. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/scripts/plot_h5_radar.py +513 -0
  274. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/SKILL.md +52 -0
  275. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/config.toml +81 -0
  276. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/references/stats-rule.md +18 -0
  277. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_inspect.py +79 -0
  278. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_batch.py +624 -0
  279. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_stats.py +438 -0
  280. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/SKILL.md +280 -0
  281. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_trajectory.py +790 -0
  282. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_velocity.py +855 -0
  283. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.csv +101 -0
  284. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.h5 +0 -0
  285. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_data_readme.md +126 -0
@@ -0,0 +1,1305 @@
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import ipdb
4
+ import os
5
+ import re
6
+ import shlex
7
+ import shutil
8
+ import subprocess
9
+ import sys
10
+ import time
11
+ from pathlib import Path
12
+ from typing import Dict, Iterable, List, Optional, Set, Tuple
13
+
14
+ import pandas as pd
15
+
16
+ DATE_PATTERN = re.compile(r"(\d{4}-\d{2}-\d{2})")
17
+ PTID_PATTERN = re.compile(r"(\d{3})[_-]?S[_-]?(\d{4})", re.IGNORECASE)
18
+
19
+ MODALITY_ORDER = ["T1", "T2", "FLAIR", "PD", "DTI", "fMRI"]
20
+
21
+
22
+ def strip_nii_suffix(name: str) -> str:
23
+ lower = name.lower()
24
+ if lower.endswith(".nii.gz"):
25
+ return name[:-7]
26
+ if lower.endswith(".nii"):
27
+ return name[:-4]
28
+ return name
29
+
30
+
31
+ def canonical_token(text: str) -> str:
32
+ return re.sub(r"[^a-z0-9]+", "", text.lower())
33
+
34
+
35
+ def normalize_subject_id(value: object) -> Optional[str]:
36
+ if value is None or pd.isna(value):
37
+ return None
38
+
39
+ text = str(value).strip()
40
+ if not text:
41
+ return None
42
+
43
+ match = PTID_PATTERN.search(text)
44
+ if match:
45
+ return f"{match.group(1)}_S_{match.group(2)}"
46
+
47
+ text = text.upper().replace("-", "_").replace(" ", "")
48
+ if text.startswith("SUB_"):
49
+ text = text[4:]
50
+
51
+ return text or None
52
+
53
+
54
+ def normalize_date(value: object) -> Optional[str]:
55
+ if value is None or pd.isna(value):
56
+ return None
57
+
58
+ text = str(value).strip()
59
+ if not text:
60
+ return None
61
+
62
+ match = DATE_PATTERN.search(text)
63
+ if match:
64
+ text = match.group(1)
65
+
66
+ dt = pd.to_datetime(text, errors="coerce")
67
+ if pd.isna(dt):
68
+ return None
69
+
70
+ return dt.strftime("%Y-%m-%d")
71
+
72
+
73
+ def safe_int(value: object) -> Optional[int]:
74
+ if value is None or pd.isna(value):
75
+ return None
76
+
77
+ try:
78
+ return int(float(value))
79
+ except Exception:
80
+ return None
81
+
82
+
83
+ def diagnosis_text(code: object) -> Optional[str]:
84
+ code = safe_int(code)
85
+ mapping = {
86
+ 1: "CN",
87
+ 2: "MCI",
88
+ 3: "Dementia",
89
+ }
90
+ return mapping.get(code)
91
+
92
+
93
+ def find_column(df: pd.DataFrame, candidates: List[str]) -> Optional[str]:
94
+ lower_map = {str(col).lower(): col for col in df.columns}
95
+ for candidate in candidates:
96
+ if candidate.lower() in lower_map:
97
+ return lower_map[candidate.lower()]
98
+ return None
99
+
100
+
101
+ def detect_modality_from_file(file_path: Path) -> Optional[str]:
102
+ name = strip_nii_suffix(file_path.name)
103
+ key = canonical_token(name)
104
+
105
+ if key in {"t1", "t1w"}:
106
+ return "T1"
107
+ if key == "t2":
108
+ return "T2"
109
+ if key == "flair":
110
+ return "FLAIR"
111
+ if key == "pd":
112
+ return "PD"
113
+ if key == "dti":
114
+ return "DTI"
115
+ if key in {"fmri", "rsfmri"}:
116
+ return "fMRI"
117
+
118
+ return None
119
+
120
+
121
+ def is_nifti_file(path: Path) -> bool:
122
+ name = path.name.lower()
123
+ return name.endswith(".nii") or name.endswith(".nii.gz")
124
+
125
+
126
+ def quote_sh(value: object) -> str:
127
+ return shlex.quote(str(value))
128
+
129
+
130
+ def ensure_parent(path: Path) -> None:
131
+ path.parent.mkdir(parents=True, exist_ok=True)
132
+
133
+
134
+ def none_if_na(value: object) -> Optional[object]:
135
+ if value is None or pd.isna(value):
136
+ return None
137
+ return value
138
+
139
+
140
+ def is_subject_dir(path: Path) -> bool:
141
+ if not path.is_dir():
142
+ return False
143
+
144
+ try:
145
+ for child in path.iterdir():
146
+ if child.is_dir() and normalize_date(child.name):
147
+ return True
148
+ except Exception:
149
+ return False
150
+
151
+ return False
152
+
153
+
154
+ def scan_dataset(root: Path) -> pd.DataFrame:
155
+ records: List[Dict[str, object]] = []
156
+
157
+ for subject_dir in sorted(root.iterdir(), key=lambda p: p.name):
158
+ if not is_subject_dir(subject_dir):
159
+ continue
160
+
161
+ subject_folder = subject_dir.name
162
+ subject_id = normalize_subject_id(subject_folder) or subject_folder
163
+
164
+ for date_dir in sorted(subject_dir.iterdir(), key=lambda p: p.name):
165
+ if not date_dir.is_dir():
166
+ continue
167
+
168
+ visit_date = normalize_date(date_dir.name)
169
+ if visit_date is None:
170
+ continue
171
+
172
+ modality_paths: Dict[str, str] = {}
173
+ for item in sorted(date_dir.iterdir(), key=lambda p: p.name):
174
+ if not item.is_file():
175
+ continue
176
+ if not is_nifti_file(item):
177
+ continue
178
+
179
+ modality = detect_modality_from_file(item)
180
+ if modality is None:
181
+ continue
182
+
183
+ if modality not in modality_paths:
184
+ modality_paths[modality] = str(item.resolve())
185
+
186
+ if not modality_paths:
187
+ continue
188
+
189
+ record: Dict[str, object] = {
190
+ "subject_id": subject_id,
191
+ "subject_folder": subject_folder,
192
+ "visit_date": visit_date,
193
+ "visit_folder_path": str(date_dir.resolve()),
194
+ }
195
+
196
+ available_modalities = []
197
+ for modality in MODALITY_ORDER:
198
+ has_modality = int(modality in modality_paths)
199
+ record[f"has_{modality}"] = has_modality
200
+ record[f"{modality}_path"] = modality_paths.get(modality)
201
+ if has_modality:
202
+ available_modalities.append(modality)
203
+
204
+ record["available_modalities"] = "|".join(available_modalities)
205
+ record["omnibrainbench_minimum_modalities"] = int(
206
+ record["has_T1"] == 1 and record["has_FLAIR"] == 1
207
+ )
208
+
209
+ records.append(record)
210
+
211
+ if not records:
212
+ return pd.DataFrame()
213
+
214
+ df = pd.DataFrame(records)
215
+ df["visit_ts"] = pd.to_datetime(df["visit_date"], errors="coerce")
216
+ df = df.sort_values(["subject_id", "visit_ts"]).reset_index(drop=True)
217
+ return df
218
+
219
+
220
+ def auto_find_csv(root: Path, prefix: str) -> Optional[Path]:
221
+ candidates = sorted(root.glob(f"{prefix}*.csv"))
222
+ if not candidates:
223
+ return None
224
+ return candidates[0]
225
+
226
+
227
+ def derive_diagnosis_code(
228
+ row: pd.Series,
229
+ diagnosis_col: Optional[str],
230
+ dxchange_col: Optional[str],
231
+ dxcurren_col: Optional[str],
232
+ ) -> Optional[int]:
233
+ if diagnosis_col is not None:
234
+ code = safe_int(row.get(diagnosis_col))
235
+ if code in {1, 2, 3}:
236
+ return code
237
+
238
+ if dxchange_col is not None:
239
+ dxchange = safe_int(row.get(dxchange_col))
240
+ dxchange_map = {
241
+ 1: 1,
242
+ 2: 2,
243
+ 3: 3,
244
+ 4: 2,
245
+ 5: 3,
246
+ 6: 3,
247
+ 7: 1,
248
+ 8: 2,
249
+ 9: 1,
250
+ }
251
+ if dxchange in dxchange_map:
252
+ return dxchange_map[dxchange]
253
+
254
+ if dxcurren_col is not None:
255
+ code = safe_int(row.get(dxcurren_col))
256
+ if code in {1, 2, 3}:
257
+ return code
258
+
259
+ return None
260
+
261
+
262
+ def load_dxsum_table(csv_path: Path) -> pd.DataFrame:
263
+ df = pd.read_csv(csv_path, low_memory=False)
264
+
265
+ ptid_col = find_column(df, ["PTID", "SUBJECT", "SUBJECT_ID"])
266
+ date_col = find_column(df, ["EXAMDATE", "VISDATE", "USERDATE", "COLDATE", "SCANDATE"])
267
+ phase_col = find_column(df, ["PHASE", "COLPROT", "PROTOCOL"])
268
+ viscode_col = find_column(df, ["VISCODE2", "VISCODE"])
269
+ diagnosis_col = find_column(df, ["DIAGNOSIS"])
270
+ dxchange_col = find_column(df, ["DXCHANGE"])
271
+ dxcurren_col = find_column(df, ["DXCURREN"])
272
+
273
+ if ptid_col is None:
274
+ raise RuntimeError(f"PTID column not found in {csv_path}")
275
+ if date_col is None:
276
+ raise RuntimeError(f"Date column not found in {csv_path}")
277
+
278
+ rows: List[Dict[str, object]] = []
279
+ for _, row in df.iterrows():
280
+ subject_id = normalize_subject_id(row.get(ptid_col))
281
+ source_date = normalize_date(row.get(date_col))
282
+ if subject_id is None or source_date is None:
283
+ continue
284
+
285
+ diag_code = derive_diagnosis_code(row, diagnosis_col, dxchange_col, dxcurren_col)
286
+ if diag_code is None:
287
+ continue
288
+
289
+ rows.append(
290
+ {
291
+ "subject_id": subject_id,
292
+ "source_date": source_date,
293
+ "source_ts": pd.to_datetime(source_date, errors="coerce"),
294
+ "diagnosis_code": diag_code,
295
+ "diagnosis": diagnosis_text(diag_code),
296
+ "phase": row.get(phase_col) if phase_col else None,
297
+ "viscode": row.get(viscode_col) if viscode_col else None,
298
+ }
299
+ )
300
+
301
+ out = pd.DataFrame(rows)
302
+ if out.empty:
303
+ return out
304
+
305
+ out = out.sort_values(["subject_id", "source_ts"]).drop_duplicates(
306
+ subset=["subject_id", "source_date"], keep="first"
307
+ )
308
+ out = out.reset_index(drop=True)
309
+ return out
310
+
311
+
312
+ def load_ucsf_table(csv_path: Path) -> pd.DataFrame:
313
+ df = pd.read_csv(csv_path, low_memory=False)
314
+
315
+ ptid_col = find_column(df, ["PTID", "SUBJECT", "SUBJECT_ID"])
316
+ date_col = find_column(df, ["EXAMDATE", "SCANDATE", "VISDATE", "USERDATE", "COLDATE"])
317
+ qc_col = find_column(df, ["OVERALLQC"])
318
+ phase_col = find_column(df, ["PHASE", "COLPROT", "PROTOCOL"])
319
+ viscode_col = find_column(df, ["VISCODE2", "VISCODE"])
320
+
321
+ if ptid_col is None:
322
+ raise RuntimeError(f"PTID column not found in {csv_path}")
323
+ if date_col is None:
324
+ raise RuntimeError(f"Date column not found in {csv_path}")
325
+ if qc_col is None:
326
+ raise RuntimeError(f"OVERALLQC column not found in {csv_path}")
327
+
328
+ rows: List[Dict[str, object]] = []
329
+ for _, row in df.iterrows():
330
+ subject_id = normalize_subject_id(row.get(ptid_col))
331
+ source_date = normalize_date(row.get(date_col))
332
+ overall_qc = safe_int(row.get(qc_col))
333
+
334
+ if subject_id is None or source_date is None:
335
+ continue
336
+ if overall_qc != 1:
337
+ continue
338
+
339
+ rows.append(
340
+ {
341
+ "subject_id": subject_id,
342
+ "source_date": source_date,
343
+ "source_ts": pd.to_datetime(source_date, errors="coerce"),
344
+ "overallqc": overall_qc,
345
+ "phase": row.get(phase_col) if phase_col else None,
346
+ "viscode": row.get(viscode_col) if viscode_col else None,
347
+ }
348
+ )
349
+
350
+ out = pd.DataFrame(rows)
351
+ if out.empty:
352
+ return out
353
+
354
+ out = out.sort_values(["subject_id", "source_ts"]).drop_duplicates(
355
+ subset=["subject_id", "source_date"], keep="first"
356
+ )
357
+ out = out.reset_index(drop=True)
358
+ return out
359
+
360
+
361
+ def pick_best_match(
362
+ source_df: pd.DataFrame,
363
+ visit_ts: pd.Timestamp,
364
+ max_days: int,
365
+ ) -> Tuple[Optional[pd.Series], Optional[int], Optional[str]]:
366
+ if source_df.empty or pd.isna(visit_ts):
367
+ return None, None, None
368
+
369
+ exact = source_df[source_df["source_ts"] == visit_ts]
370
+ if not exact.empty:
371
+ return exact.iloc[0], 0, "exact"
372
+
373
+ tmp = source_df.copy()
374
+ tmp["match_days"] = (tmp["source_ts"] - visit_ts).abs().dt.days
375
+ tmp = tmp[tmp["match_days"] <= max_days]
376
+ if tmp.empty:
377
+ return None, None, None
378
+
379
+ tmp = tmp.sort_values(["match_days", "source_ts"])
380
+ best = tmp.iloc[0]
381
+ return best, int(best["match_days"]), "nearest"
382
+
383
+
384
+ def attach_matches(
385
+ visits_df: pd.DataFrame,
386
+ source_df: pd.DataFrame,
387
+ prefix: str,
388
+ payload_cols: List[str],
389
+ max_days: int,
390
+ ) -> pd.DataFrame:
391
+ out = visits_df.copy()
392
+
393
+ out[f"{prefix}_source_date"] = pd.NA
394
+ out[f"{prefix}_match_days"] = pd.NA
395
+ out[f"{prefix}_match_type"] = pd.NA
396
+ for col in payload_cols:
397
+ out[f"{prefix}_{col}"] = pd.NA
398
+
399
+ if source_df.empty:
400
+ return out
401
+
402
+ grouped = {sid: grp.reset_index(drop=True) for sid, grp in source_df.groupby("subject_id")}
403
+
404
+ for idx, row in out.iterrows():
405
+ sid = row["subject_id"]
406
+ visit_ts = row["visit_ts"]
407
+
408
+ if sid not in grouped:
409
+ continue
410
+
411
+ best_row, match_days, match_type = pick_best_match(grouped[sid], visit_ts, max_days)
412
+ if best_row is None:
413
+ continue
414
+
415
+ out.at[idx, f"{prefix}_source_date"] = best_row["source_date"]
416
+ out.at[idx, f"{prefix}_match_days"] = match_days
417
+ out.at[idx, f"{prefix}_match_type"] = match_type
418
+
419
+ for col in payload_cols:
420
+ out.at[idx, f"{prefix}_{col}"] = best_row.get(col)
421
+
422
+ return out
423
+
424
+
425
+ def build_task2_df(merged_df: pd.DataFrame) -> pd.DataFrame:
426
+ cols = [
427
+ "subject_id",
428
+ "subject_folder",
429
+ "visit_date",
430
+ "visit_folder_path",
431
+ "available_modalities",
432
+ "omnibrainbench_minimum_modalities",
433
+ "has_T1",
434
+ "T1_path",
435
+ "has_T2",
436
+ "T2_path",
437
+ "has_FLAIR",
438
+ "FLAIR_path",
439
+ "has_PD",
440
+ "PD_path",
441
+ "has_DTI",
442
+ "DTI_path",
443
+ "has_fMRI",
444
+ "fMRI_path",
445
+ ]
446
+ out = merged_df[cols].copy()
447
+ out = out.rename(
448
+ columns={
449
+ "omnibrainbench_minimum_modalities": "task2_omnibrainbench_minimum_modalities",
450
+ "available_modalities": "task2_available_modalities",
451
+ }
452
+ )
453
+ return out
454
+
455
+
456
+ def build_task3_df(merged_df: pd.DataFrame) -> pd.DataFrame:
457
+ out = merged_df[
458
+ [
459
+ "subject_id",
460
+ "subject_folder",
461
+ "visit_date",
462
+ "visit_folder_path",
463
+ "dx_source_date",
464
+ "dx_match_days",
465
+ "dx_match_type",
466
+ "dx_diagnosis_code",
467
+ "dx_diagnosis",
468
+ "dx_phase",
469
+ "dx_viscode",
470
+ ]
471
+ ].copy()
472
+
473
+ out["task3_label_available"] = out["dx_diagnosis_code"].apply(lambda x: int(safe_int(x) in {1, 2, 3}))
474
+ out["task3_any_cognitive_impairment"] = out["dx_diagnosis_code"].apply(
475
+ lambda x: 1 if safe_int(x) in {2, 3} else (0 if safe_int(x) in {1} else pd.NA)
476
+ )
477
+ out["task3_dementia"] = out["dx_diagnosis_code"].apply(
478
+ lambda x: 1 if safe_int(x) == 3 else (0 if safe_int(x) in {1, 2} else pd.NA)
479
+ )
480
+
481
+ out = out.rename(
482
+ columns={
483
+ "dx_source_date": "task3_dx_source_date",
484
+ "dx_match_days": "task3_dx_match_days",
485
+ "dx_match_type": "task3_dx_match_type",
486
+ "dx_diagnosis_code": "task3_diagnosis_code",
487
+ "dx_diagnosis": "task3_diagnosis",
488
+ "dx_phase": "task3_dx_phase",
489
+ "dx_viscode": "task3_dx_viscode",
490
+ }
491
+ )
492
+ return out
493
+
494
+
495
+ def build_subject_risk_summary(long_df: pd.DataFrame) -> pd.DataFrame:
496
+ rows: List[Dict[str, object]] = []
497
+
498
+ for subject_id, group in long_df.sort_values(["subject_id", "visit_ts"]).groupby("subject_id", sort=False):
499
+ group = group.reset_index(drop=True)
500
+ labeled = group[group["current_diagnosis_code"].notna()].copy()
501
+
502
+ baseline_code = None
503
+ baseline_text = None
504
+ baseline_date = None
505
+ last_code = None
506
+ last_text = None
507
+ last_date = None
508
+
509
+ if not labeled.empty:
510
+ baseline_code = safe_int(labeled.iloc[0]["current_diagnosis_code"])
511
+ baseline_text = diagnosis_text(baseline_code)
512
+ baseline_date = labeled.iloc[0]["visit_date"]
513
+ last_code = safe_int(labeled.iloc[-1]["current_diagnosis_code"])
514
+ last_text = diagnosis_text(last_code)
515
+ last_date = labeled.iloc[-1]["visit_date"]
516
+
517
+ rows.append(
518
+ {
519
+ "subject_id": subject_id,
520
+ "task5_subject_n_visits": len(group),
521
+ "task5_subject_n_labeled_visits": len(labeled),
522
+ "task5_has_longitudinal_followup": int(len(group) > 1),
523
+ "task5_baseline_diagnosis_code": baseline_code,
524
+ "task5_baseline_diagnosis": baseline_text,
525
+ "task5_baseline_labeled_visit_date": baseline_date,
526
+ "task5_last_diagnosis_code": last_code,
527
+ "task5_last_diagnosis": last_text,
528
+ "task5_last_labeled_visit_date": last_date,
529
+ }
530
+ )
531
+
532
+ return pd.DataFrame(rows)
533
+
534
+
535
+ def add_longitudinal_labels(merged_df: pd.DataFrame) -> pd.DataFrame:
536
+ out = merged_df.sort_values(["subject_id", "visit_ts"]).reset_index(drop=True).copy()
537
+ out["current_diagnosis_code"] = out["dx_diagnosis_code"].apply(safe_int)
538
+ out["current_diagnosis"] = out["current_diagnosis_code"].apply(diagnosis_text)
539
+
540
+ new_cols = [
541
+ "task5_has_future_visit",
542
+ "task5_has_future_labeled_visit",
543
+ "task5_next_labeled_visit_date",
544
+ "task5_next_labeled_diagnosis_code",
545
+ "task5_next_labeled_diagnosis",
546
+ "task5_days_to_next_labeled_visit",
547
+ "task5_future_decline",
548
+ "task5_days_to_first_decline",
549
+ "task5_future_any_impairment",
550
+ "task5_future_dementia",
551
+ "task5_stable_cn_followup",
552
+ "task5_mci_to_dementia_risk",
553
+ "task5_risk_label",
554
+ ]
555
+ for col in new_cols:
556
+ out[col] = pd.NA
557
+
558
+ for subject_id, group in out.groupby("subject_id", sort=False):
559
+ idxs = list(group.index)
560
+ rows = []
561
+ for idx in idxs:
562
+ rows.append(
563
+ {
564
+ "idx": idx,
565
+ "visit_ts": out.at[idx, "visit_ts"],
566
+ "code": safe_int(out.at[idx, "current_diagnosis_code"]),
567
+ }
568
+ )
569
+
570
+ for i, current in enumerate(rows):
571
+ current_idx = current["idx"]
572
+ current_ts = current["visit_ts"]
573
+ current_code = current["code"]
574
+
575
+ future_all = rows[i + 1 :]
576
+ future_labeled = [r for r in future_all if r["code"] in {1, 2, 3}]
577
+
578
+ out.at[current_idx, "task5_has_future_visit"] = int(len(future_all) > 0)
579
+ out.at[current_idx, "task5_has_future_labeled_visit"] = int(len(future_labeled) > 0)
580
+
581
+ if future_labeled:
582
+ next_row = future_labeled[0]
583
+ next_code = next_row["code"]
584
+ next_ts = next_row["visit_ts"]
585
+
586
+ out.at[current_idx, "task5_next_labeled_visit_date"] = next_ts.strftime("%Y-%m-%d")
587
+ out.at[current_idx, "task5_next_labeled_diagnosis_code"] = next_code
588
+ out.at[current_idx, "task5_next_labeled_diagnosis"] = diagnosis_text(next_code)
589
+ out.at[current_idx, "task5_days_to_next_labeled_visit"] = int((next_ts - current_ts).days)
590
+
591
+ if current_code not in {1, 2, 3} or not future_labeled:
592
+ continue
593
+
594
+ future_codes = [r["code"] for r in future_labeled]
595
+
596
+ first_decline = None
597
+ for r in future_labeled:
598
+ if r["code"] > current_code:
599
+ first_decline = r
600
+ break
601
+
602
+ out.at[current_idx, "task5_future_decline"] = int(first_decline is not None)
603
+ if first_decline is not None:
604
+ out.at[current_idx, "task5_days_to_first_decline"] = int((first_decline["visit_ts"] - current_ts).days)
605
+
606
+ if current_code == 1:
607
+ has_future_impairment = any(code in {2, 3} for code in future_codes)
608
+ stable_cn = all(code == 1 for code in future_codes)
609
+
610
+ out.at[current_idx, "task5_future_any_impairment"] = int(has_future_impairment)
611
+ out.at[current_idx, "task5_future_dementia"] = int(any(code == 3 for code in future_codes))
612
+ out.at[current_idx, "task5_stable_cn_followup"] = int(stable_cn)
613
+ out.at[current_idx, "task5_mci_to_dementia_risk"] = 0
614
+ out.at[current_idx, "task5_risk_label"] = (
615
+ "CN_to_impairment_risk" if has_future_impairment else "stable_CN_short_term"
616
+ )
617
+
618
+ elif current_code == 2:
619
+ progresses_to_dementia = any(code == 3 for code in future_codes)
620
+
621
+ out.at[current_idx, "task5_future_any_impairment"] = 1
622
+ out.at[current_idx, "task5_future_dementia"] = int(progresses_to_dementia)
623
+ out.at[current_idx, "task5_stable_cn_followup"] = 0
624
+ out.at[current_idx, "task5_mci_to_dementia_risk"] = int(progresses_to_dementia)
625
+ out.at[current_idx, "task5_risk_label"] = (
626
+ "MCI_to_dementia_risk" if progresses_to_dementia else "stable_or_reverting_MCI"
627
+ )
628
+
629
+ elif current_code == 3:
630
+ out.at[current_idx, "task5_future_any_impairment"] = 1
631
+ out.at[current_idx, "task5_future_dementia"] = 1
632
+ out.at[current_idx, "task5_stable_cn_followup"] = 0
633
+ out.at[current_idx, "task5_mci_to_dementia_risk"] = 0
634
+ out.at[current_idx, "task5_risk_label"] = "established_dementia"
635
+
636
+ summary_df = build_subject_risk_summary(out)
637
+ out = out.merge(summary_df, on="subject_id", how="left")
638
+ return out
639
+
640
+
641
+ def build_task5_df(merged_df: pd.DataFrame) -> pd.DataFrame:
642
+ long_df = add_longitudinal_labels(merged_df)
643
+
644
+ cols = [
645
+ "subject_id",
646
+ "subject_folder",
647
+ "visit_date",
648
+ "visit_folder_path",
649
+ "current_diagnosis_code",
650
+ "current_diagnosis",
651
+ "task5_subject_n_visits",
652
+ "task5_subject_n_labeled_visits",
653
+ "task5_has_longitudinal_followup",
654
+ "task5_baseline_diagnosis_code",
655
+ "task5_baseline_diagnosis",
656
+ "task5_baseline_labeled_visit_date",
657
+ "task5_last_diagnosis_code",
658
+ "task5_last_diagnosis",
659
+ "task5_last_labeled_visit_date",
660
+ "task5_has_future_visit",
661
+ "task5_has_future_labeled_visit",
662
+ "task5_next_labeled_visit_date",
663
+ "task5_next_labeled_diagnosis_code",
664
+ "task5_next_labeled_diagnosis",
665
+ "task5_days_to_next_labeled_visit",
666
+ "task5_future_decline",
667
+ "task5_days_to_first_decline",
668
+ "task5_future_any_impairment",
669
+ "task5_future_dementia",
670
+ "task5_stable_cn_followup",
671
+ "task5_mci_to_dementia_risk",
672
+ "task5_risk_label",
673
+ "dx_source_date",
674
+ "dx_match_days",
675
+ "dx_match_type",
676
+ ]
677
+ out = long_df[cols].copy()
678
+ out = out.rename(
679
+ columns={
680
+ "current_diagnosis_code": "task5_current_diagnosis_code",
681
+ "current_diagnosis": "task5_current_diagnosis",
682
+ "dx_source_date": "task5_dx_source_date",
683
+ "dx_match_days": "task5_dx_match_days",
684
+ "dx_match_type": "task5_dx_match_type",
685
+ }
686
+ )
687
+ return out
688
+
689
+
690
+ def build_task1_df(merged_df: pd.DataFrame, outdir: Path) -> pd.DataFrame:
691
+ rows: List[Dict[str, object]] = []
692
+
693
+ fs_subjects_dir = outdir / "task1_freesurfer_subjects"
694
+ label_root = outdir / "task1_labels"
695
+ log_root = outdir / "task1_logs"
696
+
697
+ for _, row in merged_df.iterrows():
698
+ subject_id = row["subject_id"]
699
+ visit_date = row["visit_date"]
700
+ t1_path = none_if_na(row.get("T1_path"))
701
+ fs_qc_pass = 1 if safe_int(row.get("fs_overallqc")) == 1 else 0
702
+ # Regeneration mode: task1 only requires T1 to be present.
703
+ eligible = int(row["has_T1"] == 1)
704
+
705
+ fs_subject_id = f"{subject_id}_{str(visit_date).replace('-', '')}"
706
+ label_dir = label_root / subject_id / visit_date
707
+ label_path = label_dir / "task1_anatomical_seg.nii.gz"
708
+ aux_path = label_dir / "task1_aseg_aux.nii.gz"
709
+ log_path = log_root / subject_id / visit_date / "task1_recon_all.log"
710
+
711
+ recon_cmd = None
712
+ convert_aparc_cmd = None
713
+ convert_aseg_cmd = None
714
+
715
+ if t1_path is not None:
716
+ recon_cmd = (
717
+ f"recon-all -sd {quote_sh(fs_subjects_dir)} -wsatlas -wsless -all "
718
+ f"-s {quote_sh(fs_subject_id)} -i {quote_sh(t1_path)}"
719
+ )
720
+ convert_aparc_cmd = (
721
+ f"mri_convert {quote_sh(fs_subjects_dir / fs_subject_id / 'mri' / 'aparc+aseg.mgz')} "
722
+ f"{quote_sh(label_path)}"
723
+ )
724
+ convert_aseg_cmd = (
725
+ f"mri_convert {quote_sh(fs_subjects_dir / fs_subject_id / 'mri' / 'aseg.mgz')} "
726
+ f"{quote_sh(aux_path)}"
727
+ )
728
+
729
+ status = "generated" if label_path.exists() else ("pending" if eligible == 1 else "ineligible")
730
+
731
+ rows.append(
732
+ {
733
+ "subject_id": subject_id,
734
+ "subject_folder": row["subject_folder"],
735
+ "visit_date": visit_date,
736
+ "visit_folder_path": row["visit_folder_path"],
737
+ "T1_path": t1_path,
738
+ "task1_fs_source_date": none_if_na(row.get("fs_source_date")),
739
+ "task1_fs_match_days": none_if_na(row.get("fs_match_days")),
740
+ "task1_fs_match_type": none_if_na(row.get("fs_match_type")),
741
+ "task1_fs_overallqc": safe_int(row.get("fs_overallqc")),
742
+ "task1_fs_phase": none_if_na(row.get("fs_phase")),
743
+ "task1_fs_viscode": none_if_na(row.get("fs_viscode")),
744
+ "task1_fs_qc_pass": fs_qc_pass,
745
+ "task1_eligible": eligible,
746
+ "task1_freesurfer_subjects_dir": str(fs_subjects_dir.resolve()),
747
+ "task1_freesurfer_subject_id": fs_subject_id,
748
+ "task1_recon_all_cmd": recon_cmd,
749
+ "task1_convert_aparc_aseg_cmd": convert_aparc_cmd,
750
+ "task1_convert_aseg_cmd": convert_aseg_cmd,
751
+ "task1_label_type": "aparc+aseg_multiclass_segmentation",
752
+ "task1_label_path": str(label_path.resolve()),
753
+ "task1_aux_aseg_path": str(aux_path.resolve()),
754
+ "task1_log_path": str(log_path.resolve()),
755
+ "task1_status": status,
756
+ }
757
+ )
758
+
759
+ return pd.DataFrame(rows)
760
+
761
+
762
+ def build_task4_df(merged_df: pd.DataFrame, outdir: Path, wmh_image: str, use_gpu: bool) -> pd.DataFrame:
763
+ rows: List[Dict[str, object]] = []
764
+
765
+ label_root = outdir / "task4_labels"
766
+ log_root = outdir / "task4_logs"
767
+
768
+ for _, row in merged_df.iterrows():
769
+ subject_id = row["subject_id"]
770
+ visit_date = row["visit_date"]
771
+ t1_path = none_if_na(row.get("T1_path"))
772
+ flair_path = none_if_na(row.get("FLAIR_path"))
773
+
774
+ eligible = int(row["has_T1"] == 1 and row["has_FLAIR"] == 1)
775
+
776
+ label_dir = label_root / subject_id / visit_date
777
+ label_path = label_dir / "task4_wmh_seg.nii.gz"
778
+ log_path = log_root / subject_id / visit_date / "task4_wmh.log"
779
+
780
+ docker_cmd = None
781
+ if t1_path is not None and flair_path is not None:
782
+ visit_dir = Path(row["visit_folder_path"])
783
+ gpu_part = "--gpus all " if use_gpu else ""
784
+ docker_cmd = (
785
+ f'docker run --rm {gpu_part}-v {quote_sh(str(visit_dir.resolve()) + ":/data")} '
786
+ f"{quote_sh(wmh_image)} --flair {quote_sh('/data/' + Path(flair_path).name)} "
787
+ f"--t1 {quote_sh('/data/' + Path(t1_path).name)}"
788
+ )
789
+
790
+ status = "generated" if label_path.exists() else ("pending" if eligible == 1 else "ineligible")
791
+
792
+ rows.append(
793
+ {
794
+ "subject_id": subject_id,
795
+ "subject_folder": row["subject_folder"],
796
+ "visit_date": visit_date,
797
+ "visit_folder_path": row["visit_folder_path"],
798
+ "T1_path": t1_path,
799
+ "FLAIR_path": flair_path,
800
+ "task4_eligible": eligible,
801
+ "task4_wmh_image": wmh_image,
802
+ "task4_docker_cmd": docker_cmd,
803
+ "task4_label_path": str(label_path.resolve()),
804
+ "task4_log_path": str(log_path.resolve()),
805
+ "task4_status": status,
806
+ }
807
+ )
808
+
809
+ return pd.DataFrame(rows)
810
+
811
+
812
+ def find_freesurfer_lut() -> Optional[Path]:
813
+ candidates: List[Path] = []
814
+
815
+ fs_home = os.environ.get("FREESURFER_HOME")
816
+ if fs_home:
817
+ candidates.append(Path(fs_home) / "FreeSurferColorLUT.txt")
818
+
819
+ candidates.extend(
820
+ [
821
+ Path("/usr/local/freesurfer/FreeSurferColorLUT.txt"),
822
+ Path("/opt/freesurfer/FreeSurferColorLUT.txt"),
823
+ ]
824
+ )
825
+
826
+ for path in candidates:
827
+ if path.exists():
828
+ return path
829
+
830
+ return None
831
+
832
+
833
+ def parse_freesurfer_lut(lut_path: Path) -> pd.DataFrame:
834
+ rows: List[Dict[str, object]] = []
835
+
836
+ with open(lut_path, "r", encoding="utf-8", errors="ignore") as f:
837
+ for line in f:
838
+ text = line.strip()
839
+ if not text or text.startswith("#"):
840
+ continue
841
+
842
+ parts = text.split()
843
+ if len(parts) < 2:
844
+ continue
845
+
846
+ try:
847
+ label_id = int(parts[0])
848
+ except Exception:
849
+ continue
850
+
851
+ structure_name = parts[1]
852
+ red = safe_int(parts[2]) if len(parts) > 2 else None
853
+ green = safe_int(parts[3]) if len(parts) > 3 else None
854
+ blue = safe_int(parts[4]) if len(parts) > 4 else None
855
+ alpha = safe_int(parts[5]) if len(parts) > 5 else None
856
+
857
+ rows.append(
858
+ {
859
+ "label_id": label_id,
860
+ "structure_name": structure_name,
861
+ "red": red,
862
+ "green": green,
863
+ "blue": blue,
864
+ "alpha": alpha,
865
+ }
866
+ )
867
+
868
+ out = pd.DataFrame(rows)
869
+ if not out.empty:
870
+ out = out.sort_values("label_id").reset_index(drop=True)
871
+ return out
872
+
873
+
874
+ def write_task1_shell(task1_df: pd.DataFrame, shell_path: Path) -> None:
875
+ ensure_parent(shell_path)
876
+
877
+ with open(shell_path, "w", encoding="utf-8") as f:
878
+ f.write("#!/usr/bin/env bash\n")
879
+ f.write("set -euo pipefail\n\n")
880
+ f.write("run_one() {\n")
881
+ f.write(' local fs_subjects_dir="$1"\n')
882
+ f.write(' local fs_subject_id="$2"\n')
883
+ f.write(' local t1_path="$3"\n')
884
+ f.write(' local label_path="$4"\n')
885
+ f.write(' local aux_path="$5"\n')
886
+ f.write(' local log_path="$6"\n')
887
+ f.write(' mkdir -p "$fs_subjects_dir" "$(dirname "$label_path")" "$(dirname "$aux_path")" "$(dirname "$log_path")"\n')
888
+ f.write(' if [[ -f "$label_path" ]]; then\n')
889
+ f.write(' echo "[SKIP] $label_path already exists"\n')
890
+ f.write(" return 0\n")
891
+ f.write(" fi\n")
892
+ f.write(' if [[ ! -f "$fs_subjects_dir/$fs_subject_id/mri/aparc+aseg.mgz" ]]; then\n')
893
+ f.write(' recon-all -sd "$fs_subjects_dir" -wsatlas -wsless -all -s "$fs_subject_id" -i "$t1_path" >"$log_path" 2>&1\n')
894
+ f.write(" else\n")
895
+ f.write(' echo "[INFO] Existing FreeSurfer subject found, converting only" >"$log_path"\n')
896
+ f.write(" fi\n")
897
+ f.write(' mri_convert "$fs_subjects_dir/$fs_subject_id/mri/aparc+aseg.mgz" "$label_path" >>"$log_path" 2>&1\n')
898
+ f.write(' mri_convert "$fs_subjects_dir/$fs_subject_id/mri/aseg.mgz" "$aux_path" >>"$log_path" 2>&1\n')
899
+ f.write(' echo "[OK] $label_path"\n')
900
+ f.write("}\n\n")
901
+
902
+ eligible_df = task1_df[task1_df["task1_eligible"] == 1].copy()
903
+ for _, row in eligible_df.iterrows():
904
+ f.write(
905
+ "run_one "
906
+ f"{quote_sh(row['task1_freesurfer_subjects_dir'])} "
907
+ f"{quote_sh(row['task1_freesurfer_subject_id'])} "
908
+ f"{quote_sh(row['T1_path'])} "
909
+ f"{quote_sh(row['task1_label_path'])} "
910
+ f"{quote_sh(row['task1_aux_aseg_path'])} "
911
+ f"{quote_sh(row['task1_log_path'])}\n"
912
+ )
913
+
914
+ os.chmod(shell_path, 0o755)
915
+
916
+
917
+ def write_task4_shell(task4_df: pd.DataFrame, shell_path: Path, use_gpu: bool) -> None:
918
+ ensure_parent(shell_path)
919
+ gpu_flag = '--gpus all' if use_gpu else ''
920
+
921
+ with open(shell_path, "w", encoding="utf-8") as f:
922
+ f.write("#!/usr/bin/env bash\n")
923
+ f.write("set -euo pipefail\n\n")
924
+ f.write("run_one() {\n")
925
+ f.write(' local visit_dir="$1"\n')
926
+ f.write(' local t1_name="$2"\n')
927
+ f.write(' local flair_name="$3"\n')
928
+ f.write(' local output_path="$4"\n')
929
+ f.write(' local log_path="$5"\n')
930
+ f.write(' local image_name="$6"\n')
931
+ f.write(' mkdir -p "$(dirname "$output_path")" "$(dirname "$log_path")"\n')
932
+ f.write(' if [[ -f "$output_path" ]]; then\n')
933
+ f.write(' echo "[SKIP] $output_path already exists"\n')
934
+ f.write(" return 0\n")
935
+ f.write(" fi\n")
936
+ f.write(' local before_file after_file new_file\n')
937
+ f.write(' before_file="$(mktemp)"\n')
938
+ f.write(' after_file="$(mktemp)"\n')
939
+ f.write(' find "$visit_dir" -type f \\( -name "*.nii" -o -name "*.nii.gz" \\) | sort >"$before_file"\n')
940
+ if gpu_flag:
941
+ f.write(
942
+ f' docker run --rm {gpu_flag} -v "$visit_dir:/data" "$image_name" --flair "/data/$flair_name" --t1 "/data/$t1_name" >"$log_path" 2>&1\n'
943
+ )
944
+ else:
945
+ f.write(
946
+ ' docker run --rm -v "$visit_dir:/data" "$image_name" --flair "/data/$flair_name" --t1 "/data/$t1_name" >"$log_path" 2>&1\n'
947
+ )
948
+ f.write(' find "$visit_dir" -type f \\( -name "*.nii" -o -name "*.nii.gz" \\) | sort >"$after_file"\n')
949
+ f.write(' new_file="$(comm -13 "$before_file" "$after_file" | grep -v -F "$visit_dir/$t1_name" | grep -v -F "$visit_dir/$flair_name" | tail -n 1 || true)"\n')
950
+ f.write(' if [[ -z "$new_file" ]]; then\n')
951
+ f.write(' new_file="$(find "$visit_dir" -type f \\( -name "*.nii" -o -name "*.nii.gz" \\) ! -name "$t1_name" ! -name "$flair_name" -printf "%T@ %p\\n" | sort -nr | head -n 1 | cut -d" " -f2-)"\n')
952
+ f.write(" fi\n")
953
+ f.write(' rm -f "$before_file" "$after_file"\n')
954
+ f.write(' if [[ -z "$new_file" ]]; then\n')
955
+ f.write(' echo "[FAIL] No WMH output NIfTI found for $visit_dir" >&2\n')
956
+ f.write(" return 1\n")
957
+ f.write(" fi\n")
958
+ f.write(' mv -f "$new_file" "$output_path"\n')
959
+ f.write(' echo "[OK] $output_path"\n')
960
+ f.write("}\n\n")
961
+
962
+ eligible_df = task4_df[task4_df["task4_eligible"] == 1].copy()
963
+ for _, row in eligible_df.iterrows():
964
+ t1_name = Path(row["T1_path"]).name
965
+ flair_name = Path(row["FLAIR_path"]).name
966
+ f.write(
967
+ "run_one "
968
+ f"{quote_sh(row['visit_folder_path'])} "
969
+ f"{quote_sh(t1_name)} "
970
+ f"{quote_sh(flair_name)} "
971
+ f"{quote_sh(row['task4_label_path'])} "
972
+ f"{quote_sh(row['task4_log_path'])} "
973
+ f"{quote_sh(row['task4_wmh_image'])}\n"
974
+ )
975
+
976
+ os.chmod(shell_path, 0o755)
977
+
978
+
979
+ def run_command_to_log(cmd: List[str], log_path: Path, cwd: Optional[Path] = None) -> int:
980
+ ensure_parent(log_path)
981
+ with open(log_path, "w", encoding="utf-8") as f:
982
+ result = subprocess.run(
983
+ cmd,
984
+ cwd=str(cwd) if cwd else None,
985
+ stdout=f,
986
+ stderr=subprocess.STDOUT,
987
+ text=True,
988
+ )
989
+ return result.returncode
990
+
991
+
992
+ def snapshot_nifti_files(root: Path) -> Set[str]:
993
+ files: Set[str] = set()
994
+ if not root.exists():
995
+ return files
996
+
997
+ for path in root.rglob("*"):
998
+ if path.is_file() and is_nifti_file(path):
999
+ files.add(str(path.resolve()))
1000
+ return files
1001
+
1002
+
1003
+ def pick_task4_output_file(
1004
+ visit_dir: Path,
1005
+ before: Set[str],
1006
+ input_paths: Set[str],
1007
+ start_time: float,
1008
+ ) -> Optional[Path]:
1009
+ after = snapshot_nifti_files(visit_dir)
1010
+ new_files = [Path(p) for p in sorted(after - before)]
1011
+ candidates = [p for p in new_files if str(p.resolve()) not in input_paths]
1012
+
1013
+ if not candidates:
1014
+ for path in visit_dir.rglob("*"):
1015
+ if not path.is_file():
1016
+ continue
1017
+ if not is_nifti_file(path):
1018
+ continue
1019
+ if str(path.resolve()) in input_paths:
1020
+ continue
1021
+ try:
1022
+ if path.stat().st_mtime >= start_time - 2:
1023
+ candidates.append(path)
1024
+ except FileNotFoundError:
1025
+ continue
1026
+
1027
+ if not candidates:
1028
+ return None
1029
+
1030
+ candidates = sorted(candidates, key=lambda p: p.stat().st_mtime, reverse=True)
1031
+ return candidates[0]
1032
+
1033
+
1034
+ def run_task1_jobs(task1_df: pd.DataFrame) -> None:
1035
+ eligible_df = task1_df[task1_df["task1_eligible"] == 1].copy()
1036
+ if eligible_df.empty:
1037
+ print("No eligible task1 jobs found.")
1038
+ return
1039
+
1040
+ print(f"Running task1 FreeSurfer jobs: {len(eligible_df)}")
1041
+
1042
+ for _, row in eligible_df.iterrows():
1043
+ label_path = Path(row["task1_label_path"])
1044
+ aux_path = Path(row["task1_aux_aseg_path"])
1045
+ log_path = Path(row["task1_log_path"])
1046
+ fs_subjects_dir = Path(row["task1_freesurfer_subjects_dir"])
1047
+ fs_subject_id = row["task1_freesurfer_subject_id"]
1048
+ t1_path = Path(row["T1_path"])
1049
+
1050
+ if label_path.exists():
1051
+ print(f"[SKIP] task1 exists: {label_path}")
1052
+ continue
1053
+
1054
+ fs_subjects_dir.mkdir(parents=True, exist_ok=True)
1055
+ ensure_parent(label_path)
1056
+ ensure_parent(aux_path)
1057
+
1058
+ aparc_mgz = fs_subjects_dir / fs_subject_id / "mri" / "aparc+aseg.mgz"
1059
+ aseg_mgz = fs_subjects_dir / fs_subject_id / "mri" / "aseg.mgz"
1060
+
1061
+ if not aparc_mgz.exists():
1062
+ print(f"[RUN] task1 recon-all | {row['subject_id']} | {row['visit_date']}")
1063
+ recon_cmd = [
1064
+ "recon-all",
1065
+ "-sd",
1066
+ str(fs_subjects_dir),
1067
+ "-wsatlas",
1068
+ "-wsless",
1069
+ "-all",
1070
+ "-s",
1071
+ str(fs_subject_id),
1072
+ "-i",
1073
+ str(t1_path),
1074
+ ]
1075
+ recon_cmd_str = " ".join(quote_sh(part) for part in recon_cmd)
1076
+ ipdb.set_trace()
1077
+ rc = run_command_to_log(
1078
+ recon_cmd,
1079
+ log_path=log_path,
1080
+ )
1081
+ if rc != 0:
1082
+ print(f"[FAIL] task1 recon-all failed: {row['subject_id']} | {row['visit_date']}")
1083
+ continue
1084
+ else:
1085
+ ensure_parent(log_path)
1086
+ with open(log_path, "w", encoding="utf-8") as f:
1087
+ f.write("Existing FreeSurfer subject found. Conversion only.\n")
1088
+
1089
+ print(f"[RUN] task1 mri_convert | {row['subject_id']} | {row['visit_date']}")
1090
+ rc1 = run_command_to_log(
1091
+ ["mri_convert", str(aparc_mgz), str(label_path)],
1092
+ log_path=log_path,
1093
+ )
1094
+ rc2 = run_command_to_log(
1095
+ ["mri_convert", str(aseg_mgz), str(aux_path)],
1096
+ log_path=log_path,
1097
+ )
1098
+
1099
+ if rc1 == 0 and rc2 == 0 and label_path.exists():
1100
+ print(f"[OK] task1 label -> {label_path}")
1101
+ else:
1102
+ print(f"[FAIL] task1 conversion failed: {row['subject_id']} | {row['visit_date']}")
1103
+
1104
+
1105
+ def run_task4_jobs(task4_df: pd.DataFrame, use_gpu: bool) -> None:
1106
+ eligible_df = task4_df[task4_df["task4_eligible"] == 1].copy()
1107
+ if eligible_df.empty:
1108
+ print("No eligible task4 jobs found.")
1109
+ return
1110
+
1111
+ print(f"Running task4 WMH jobs: {len(eligible_df)}")
1112
+
1113
+ for _, row in eligible_df.iterrows():
1114
+ label_path = Path(row["task4_label_path"])
1115
+ log_path = Path(row["task4_log_path"])
1116
+ visit_dir = Path(row["visit_folder_path"])
1117
+ t1_path = Path(row["T1_path"])
1118
+ flair_path = Path(row["FLAIR_path"])
1119
+ image_name = row["task4_wmh_image"]
1120
+
1121
+ if label_path.exists():
1122
+ print(f"[SKIP] task4 exists: {label_path}")
1123
+ continue
1124
+
1125
+ ensure_parent(label_path)
1126
+ ensure_parent(log_path)
1127
+
1128
+ before = snapshot_nifti_files(visit_dir)
1129
+ start_time = time.time()
1130
+
1131
+ cmd = ["docker", "run", "--rm"]
1132
+ if use_gpu:
1133
+ cmd.extend(["--gpus", "all"])
1134
+ cmd.extend(
1135
+ [
1136
+ "-v",
1137
+ f"{visit_dir.resolve()}:/data",
1138
+ str(image_name),
1139
+ "--flair",
1140
+ f"/data/{flair_path.name}",
1141
+ "--t1",
1142
+ f"/data/{t1_path.name}",
1143
+ ]
1144
+ )
1145
+
1146
+ print(f"[RUN] task4 WMH | {row['subject_id']} | {row['visit_date']}")
1147
+ rc = run_command_to_log(cmd, log_path=log_path)
1148
+ if rc != 0:
1149
+ print(f"[FAIL] task4 docker failed: {row['subject_id']} | {row['visit_date']}")
1150
+ continue
1151
+
1152
+ output_file = pick_task4_output_file(
1153
+ visit_dir=visit_dir,
1154
+ before=before,
1155
+ input_paths={str(t1_path.resolve()), str(flair_path.resolve())},
1156
+ start_time=start_time,
1157
+ )
1158
+ if output_file is None:
1159
+ print(f"[FAIL] task4 output not found: {row['subject_id']} | {row['visit_date']}")
1160
+ continue
1161
+
1162
+ if label_path.exists():
1163
+ label_path.unlink()
1164
+
1165
+ shutil.move(str(output_file), str(label_path))
1166
+ print(f"[OK] task4 label -> {label_path}")
1167
+
1168
+
1169
+ def main() -> int:
1170
+ parser = argparse.ArgumentParser(description="Generate ADNI task1-task5 label files.")
1171
+ parser.add_argument("--root", default=".", help="Dataset root directory")
1172
+ parser.add_argument("--dxsum", default=None, help="Path to DXSUM CSV")
1173
+ parser.add_argument("--ucsf", default=None, help="Path to UCSFFSX7 CSV")
1174
+ parser.add_argument("--outdir", default="task_outputs", help="Output directory")
1175
+ parser.add_argument("--dx-match-window-days", type=int, default=180, help="Max day difference for DXSUM matching")
1176
+ parser.add_argument("--fs-match-window-days", type=int, default=180, help="Max day difference for UCSF matching")
1177
+ parser.add_argument("--run-task1", action="store_true", help="Run FreeSurfer task1 jobs now")
1178
+ parser.add_argument("--run-task4", action="store_true", help="Run WMH task4 jobs now")
1179
+ parser.add_argument("--wmh-image", default="mars-wmh-nnunet:latest", help="Docker image for WMH inference")
1180
+ parser.add_argument("--wmh-no-gpu", action="store_true", help="Run WMH docker without --gpus all")
1181
+ args = parser.parse_args()
1182
+
1183
+ root = Path(args.root).resolve()
1184
+ outdir = Path(args.outdir).resolve()
1185
+ outdir.mkdir(parents=True, exist_ok=True)
1186
+
1187
+ if not root.exists() or not root.is_dir():
1188
+ print(f"Root directory does not exist: {root}", file=sys.stderr)
1189
+ return 1
1190
+
1191
+ dxsum_path = Path(args.dxsum).resolve() if args.dxsum else auto_find_csv(root, "DXSUM")
1192
+ ucsf_path = Path(args.ucsf).resolve() if args.ucsf else auto_find_csv(root, "UCSFFSX7")
1193
+
1194
+ if dxsum_path is None or not dxsum_path.exists():
1195
+ print("DXSUM CSV not found. Put DXSUM*.csv in the current directory or pass --dxsum.", file=sys.stderr)
1196
+ return 1
1197
+
1198
+ if ucsf_path is None or not ucsf_path.exists():
1199
+ print("UCSFFSX7 CSV not found. Put UCSFFSX7*.csv in the current directory or pass --ucsf.", file=sys.stderr)
1200
+ return 1
1201
+
1202
+ use_gpu = not args.wmh_no_gpu
1203
+
1204
+ print(f"Dataset root: {root}")
1205
+ print(f"DXSUM CSV: {dxsum_path}")
1206
+ print(f"UCSFFSX7 CSV: {ucsf_path}")
1207
+ print(f"Output dir: {outdir}")
1208
+
1209
+ visits_df = scan_dataset(root)
1210
+ if visits_df.empty:
1211
+ print("No visits with NIfTI files were found.", file=sys.stderr)
1212
+ return 1
1213
+
1214
+ print(f"Scanned visits: {len(visits_df)}")
1215
+ print(f"Scanned subjects: {visits_df['subject_id'].nunique()}")
1216
+
1217
+ dx_df = load_dxsum_table(dxsum_path)
1218
+ fs_df = load_ucsf_table(ucsf_path)
1219
+
1220
+ print(f"DX rows after filtering: {len(dx_df)}")
1221
+ print(f"UCSF rows after OVERALLQC=1 filtering: {len(fs_df)}")
1222
+
1223
+ merged_df = attach_matches(
1224
+ visits_df,
1225
+ dx_df,
1226
+ prefix="dx",
1227
+ payload_cols=["diagnosis_code", "diagnosis", "phase", "viscode"],
1228
+ max_days=args.dx_match_window_days,
1229
+ )
1230
+ merged_df = attach_matches(
1231
+ merged_df,
1232
+ fs_df,
1233
+ prefix="fs",
1234
+ payload_cols=["overallqc", "phase", "viscode"],
1235
+ max_days=args.fs_match_window_days,
1236
+ )
1237
+
1238
+ task1_df = build_task1_df(merged_df, outdir)
1239
+ task2_df = build_task2_df(merged_df)
1240
+ task3_df = build_task3_df(merged_df)
1241
+ task4_df = build_task4_df(merged_df, outdir, args.wmh_image, use_gpu)
1242
+ task5_df = build_task5_df(merged_df)
1243
+
1244
+ task1_shell = outdir / "task1_run_freesurfer.sh"
1245
+ task4_shell = outdir / "task4_run_wmh.sh"
1246
+ write_task1_shell(task1_df, task1_shell)
1247
+ write_task4_shell(task4_df, task4_shell, use_gpu=use_gpu)
1248
+
1249
+ lut_path = find_freesurfer_lut()
1250
+ if lut_path is not None:
1251
+ lut_df = parse_freesurfer_lut(lut_path)
1252
+ lut_out = outdir / "task1_anatomical_structure_label_lookup.csv"
1253
+ lut_df.to_csv(lut_out, index=False)
1254
+ print(f"Task1 label lookup: {lut_out}")
1255
+ else:
1256
+ print("Warning: FreeSurferColorLUT.txt not found. task1 label lookup CSV was not generated.")
1257
+
1258
+ if args.run_task1:
1259
+ run_task1_jobs(task1_df)
1260
+ task1_df = build_task1_df(merged_df, outdir)
1261
+
1262
+ if args.run_task4:
1263
+ run_task4_jobs(task4_df, use_gpu=use_gpu)
1264
+ task4_df = build_task4_df(merged_df, outdir, args.wmh_image, use_gpu)
1265
+
1266
+ task1_csv = outdir / "task1_anatomical_structure_identification_labels.csv"
1267
+ task2_csv = outdir / "task2_imaging_modality_identification_labels.csv"
1268
+ task3_csv = outdir / "task3_disease_abnormality_diagnosis_labels.csv"
1269
+ task4_csv = outdir / "task4_lesion_localization_wmh_labels.csv"
1270
+ task5_csv = outdir / "task5_risk_forecasting_treatment_related_labels.csv"
1271
+
1272
+ task1_df.to_csv(task1_csv, index=False)
1273
+ task2_df.to_csv(task2_csv, index=False)
1274
+ task3_df.to_csv(task3_csv, index=False)
1275
+ task4_df.to_csv(task4_csv, index=False)
1276
+ task5_df.drop(columns=["visit_ts"], errors="ignore").to_csv(task5_csv, index=False)
1277
+
1278
+ task1_eligible = int((task1_df["task1_eligible"] == 1).sum())
1279
+ task1_generated = int((task1_df["task1_status"] == "generated").sum())
1280
+ task3_labeled = int((task3_df["task3_label_available"] == 1).sum())
1281
+ task4_eligible = int((task4_df["task4_eligible"] == 1).sum())
1282
+ task4_generated = int((task4_df["task4_status"] == "generated").sum())
1283
+ task5_with_future = int((task5_df["task5_has_future_labeled_visit"] == 1).sum())
1284
+
1285
+ print("\nDone")
1286
+ print(f"Task1 CSV: {task1_csv}")
1287
+ print(f"Task2 CSV: {task2_csv}")
1288
+ print(f"Task3 CSV: {task3_csv}")
1289
+ print(f"Task4 CSV: {task4_csv}")
1290
+ print(f"Task5 CSV: {task5_csv}")
1291
+ print(f"Task1 shell: {task1_shell}")
1292
+ print(f"Task4 shell: {task4_shell}")
1293
+ print("")
1294
+ print(f"Task1 eligible visits: {task1_eligible}")
1295
+ print(f"Task1 generated labels: {task1_generated}")
1296
+ print(f"Task3 labeled visits: {task3_labeled}")
1297
+ print(f"Task4 eligible visits: {task4_eligible}")
1298
+ print(f"Task4 generated labels: {task4_generated}")
1299
+ print(f"Task5 future-labeled: {task5_with_future}")
1300
+
1301
+ return 0
1302
+
1303
+
1304
+ if __name__ == "__main__":
1305
+ sys.exit(main())