@brainpilot/skills 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/package.json +2 -2
  2. package/skills/01_Meta-Skills/academic-research-hub/SKILL.md +108 -0
  3. package/skills/01_Meta-Skills/academic-research-hub/scripts/requirements.txt +17 -0
  4. package/skills/01_Meta-Skills/academic-research-hub/scripts/research.py +781 -0
  5. package/skills/01_Meta-Skills/beautiful-log/SKILL.md +64 -0
  6. package/skills/01_Meta-Skills/beautiful-log/scripts/beautiful_log.py +274 -0
  7. package/skills/01_Meta-Skills/ethoclaw-daily-paper/SKILL.md +130 -0
  8. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/config.template.yaml +54 -0
  9. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/top5_digest_template.md +5 -0
  10. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/build_top5_digest.py +300 -0
  11. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/common.py +137 -0
  12. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/merge_results.py +106 -0
  13. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/run_pipeline.py +177 -0
  14. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_arxiv.py +162 -0
  15. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_pubmed.py +202 -0
  16. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/SKILL.md +173 -0
  17. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/scripts/normalize_data.py +874 -0
  18. package/skills/01_Meta-Skills/ethoclaw-pdf-research/SKILL.md +134 -0
  19. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/confirmation-prompts.md +31 -0
  20. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/output-patterns.md +45 -0
  21. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_markdown_deliverables.py +41 -0
  22. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_research_log.py +84 -0
  23. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_summary_md.py +63 -0
  24. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/extract_pdf_bundle.py +140 -0
  25. package/skills/01_Meta-Skills/experiment-controller/SKILL.md +140 -0
  26. package/skills/01_Meta-Skills/knowledge-graph-builder/SKILL.md +366 -0
  27. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/entity_resolution.py +120 -0
  28. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/extraction_prompt_template.txt +19 -0
  29. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/graph_query.py +106 -0
  30. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/hypothesis_cli_reference.py +42 -0
  31. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/new_data_source_template.py +116 -0
  32. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/requirements.txt +15 -0
  33. package/skills/01_Meta-Skills/method-design/SKILL.md +61 -0
  34. package/skills/01_Meta-Skills/multi-search-engine/SKILL.md +119 -0
  35. package/skills/01_Meta-Skills/research-idea/SKILL.md +65 -0
  36. package/skills/05_EEG_ERP/eeg-skill/SKILL.md +197 -0
  37. package/skills/05_EEG_ERP/meg-skill/SKILL.md +188 -0
  38. package/skills/05_EEG_ERP/meg-skill/scripts/time_frequency.py +223 -0
  39. package/skills/05_EEG_ERP/mne-eeg-tool/SKILL.md +165 -0
  40. package/skills/05_EEG_ERP/mne-eeg-tool/scripts/eeg_pipeline_reference.py +231 -0
  41. package/skills/05_EEG_ERP/seed-iv-skill/SKILL.md +184 -0
  42. package/skills/05_EEG_ERP/seed-iv-skill/scripts/classify_seed_iv.py +154 -0
  43. package/skills/05_EEG_ERP/seed-iv-skill/scripts/extract_seed_iv_features.py +190 -0
  44. package/skills/05_EEG_ERP/seed-iv-skill/scripts/validate_seed_iv.py +102 -0
  45. package/skills/05_EEG_ERP/seed-vig-skill/SKILL.md +182 -0
  46. package/skills/05_EEG_ERP/seed-vig-skill/scripts/classify_seed_vig.py +165 -0
  47. package/skills/05_EEG_ERP/seed-vig-skill/scripts/extract_seed_vig_features.py +185 -0
  48. package/skills/05_EEG_ERP/seed-vig-skill/scripts/validate_seed_vig.py +88 -0
  49. package/skills/06_fMRI_Neuroimaging/abcd-skill/SKILL.md +308 -0
  50. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/abcd_qc_summary.py +449 -0
  51. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/extract_abcd_phenotype.py +292 -0
  52. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/reorganize_abcd.py +387 -0
  53. package/skills/06_fMRI_Neuroimaging/abide-skill/SKILL.md +302 -0
  54. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/abide_qc_summary.py +317 -0
  55. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/extract_abide_phenotype.py +267 -0
  56. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/reorganize_abide.py +387 -0
  57. package/skills/06_fMRI_Neuroimaging/adhd200-skill/SKILL.md +244 -0
  58. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/adhd200_qc_summary.py +98 -0
  59. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/extract_adhd200_phenotype.py +134 -0
  60. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/reorganize_adhd200.py +206 -0
  61. package/skills/06_fMRI_Neuroimaging/adni-skill/SKILL.md +358 -0
  62. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_adni_task_files.py +1305 -0
  63. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_vqa_from_tasks.py +766 -0
  64. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/reorganize_adni.py +491 -0
  65. package/skills/06_fMRI_Neuroimaging/aibl-skill/SKILL.md +295 -0
  66. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/aibl_qc_summary.py +260 -0
  67. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/extract_aibl_phenotype.py +365 -0
  68. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/reorganize_aibl.py +394 -0
  69. package/skills/06_fMRI_Neuroimaging/aomic-skill/SKILL.md +292 -0
  70. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/aomic_qc_summary.py +258 -0
  71. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/extract_aomic_phenotype.py +284 -0
  72. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/reorganize_aomic.py +322 -0
  73. package/skills/06_fMRI_Neuroimaging/asl-skill/SKILL.md +168 -0
  74. package/skills/06_fMRI_Neuroimaging/asl-skill/scripts/compute_cbf.py +224 -0
  75. package/skills/06_fMRI_Neuroimaging/bids-organizer/SKILL.md +241 -0
  76. package/skills/06_fMRI_Neuroimaging/bold5000-skill/SKILL.md +186 -0
  77. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/bold5000_qc_summary.py +96 -0
  78. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/extract_bold5000_stimulus.py +125 -0
  79. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/reorganize_bold5000.py +102 -0
  80. package/skills/06_fMRI_Neuroimaging/camcan-skill/SKILL.md +213 -0
  81. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/camcan_qc_summary.py +131 -0
  82. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/extract_camcan_phenotype.py +145 -0
  83. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/validate_camcan.py +141 -0
  84. package/skills/06_fMRI_Neuroimaging/cobre-skill/SKILL.md +201 -0
  85. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/cobre_qc_summary.py +95 -0
  86. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/extract_cobre_phenotype.py +104 -0
  87. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/reorganize_cobre.py +140 -0
  88. package/skills/06_fMRI_Neuroimaging/conn-tool/SKILL.md +180 -0
  89. package/skills/06_fMRI_Neuroimaging/dcm2nii/SKILL.md +189 -0
  90. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/SKILL.md +183 -0
  91. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/dmt_har_med_qc_summary.py +96 -0
  92. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/extract_dmt_har_med_phenotype.py +121 -0
  93. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/reorganize_dmt_har_med.py +125 -0
  94. package/skills/06_fMRI_Neuroimaging/dwi-skill/SKILL.md +359 -0
  95. package/skills/06_fMRI_Neuroimaging/fmri-skill/SKILL.md +371 -0
  96. package/skills/06_fMRI_Neuroimaging/fmriprep-tool/SKILL.md +228 -0
  97. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/SKILL.md +286 -0
  98. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/scripts/freesurfer_processor.py +145 -0
  99. package/skills/06_fMRI_Neuroimaging/fsl-tool/SKILL.md +208 -0
  100. package/skills/06_fMRI_Neuroimaging/hbn-skill/SKILL.md +271 -0
  101. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/extract_hbn_phenotype.py +107 -0
  102. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/hbn_qc_summary.py +96 -0
  103. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/reorganize_hbn.py +150 -0
  104. package/skills/06_fMRI_Neuroimaging/hcpa-skill/SKILL.md +210 -0
  105. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/extract_hcpa_phenotype.py +146 -0
  106. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/hcpa_qc_summary.py +120 -0
  107. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/reorganize_hcpa.py +155 -0
  108. package/skills/06_fMRI_Neuroimaging/hcpd-skill/SKILL.md +210 -0
  109. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/extract_hcpd_phenotype.py +148 -0
  110. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/hcpd_qc_summary.py +125 -0
  111. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/reorganize_hcpd.py +146 -0
  112. package/skills/06_fMRI_Neuroimaging/hcpep-skill/SKILL.md +215 -0
  113. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/extract_hcpep_phenotype.py +157 -0
  114. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/hcpep_qc_summary.py +143 -0
  115. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/reorganize_hcpep.py +146 -0
  116. package/skills/06_fMRI_Neuroimaging/hcppipeline-tool/SKILL.md +217 -0
  117. package/skills/06_fMRI_Neuroimaging/hcpya-skill/SKILL.md +214 -0
  118. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/extract_hcpya_phenotype.py +190 -0
  119. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/hcpya_qc_summary.py +152 -0
  120. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/reorganize_hcpya.py +203 -0
  121. package/skills/06_fMRI_Neuroimaging/ixi-skill/SKILL.md +198 -0
  122. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/ixi_qc_summary.py +137 -0
  123. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/reorganize_ixi.py +190 -0
  124. package/skills/06_fMRI_Neuroimaging/mnd-skill/SKILL.md +191 -0
  125. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/extract_mnd_phenotype.py +143 -0
  126. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/mnd_qc_summary.py +120 -0
  127. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/validate_mnd.py +107 -0
  128. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/SKILL.md +203 -0
  129. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/analyze_lesions.py +119 -0
  130. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/longitudinal_lesion.py +148 -0
  131. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/mschallenge_qc_summary.py +132 -0
  132. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/validate_mschallenge.py +116 -0
  133. package/skills/06_fMRI_Neuroimaging/nibabel-skill/SKILL.md +184 -0
  134. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/atlas_coordinate_reference.py +61 -0
  135. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/freesurfer_io_reference.py +34 -0
  136. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/nifti_inspection_reference.py +35 -0
  137. package/skills/06_fMRI_Neuroimaging/nifd-skill/SKILL.md +205 -0
  138. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/extract_nifd_phenotype.py +132 -0
  139. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/nifd_qc_summary.py +111 -0
  140. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/validate_nifd.py +111 -0
  141. package/skills/06_fMRI_Neuroimaging/nii2dcm/SKILL.md +143 -0
  142. package/skills/06_fMRI_Neuroimaging/nilearn-tool/SKILL.md +266 -0
  143. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/connectome_reference.py +65 -0
  144. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/denoise_timeseries_reference.py +58 -0
  145. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/hierarchical_parcellation_reference.py +53 -0
  146. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/kmeans_parcellation_reference.py +53 -0
  147. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/preprocess_bold_reference.py +76 -0
  148. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_dictlearning_reference.py +56 -0
  149. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_ica_reference.py +59 -0
  150. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/second_level_glm_reference.py +58 -0
  151. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/spacenet_classifier_reference.py +59 -0
  152. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/svm_classifier_reference.py +60 -0
  153. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/task_glm_reference.py +63 -0
  154. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/zalff_summary_reference.py +109 -0
  155. package/skills/06_fMRI_Neuroimaging/nsd-skill/SKILL.md +210 -0
  156. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/extract_nsd_stimulus.py +171 -0
  157. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/nsd_qc_summary.py +142 -0
  158. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/validate_nsd.py +142 -0
  159. package/skills/06_fMRI_Neuroimaging/oasis-skill/SKILL.md +205 -0
  160. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/extract_oasis_phenotype.py +126 -0
  161. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/oasis_qc_summary.py +115 -0
  162. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/validate_oasis.py +119 -0
  163. package/skills/06_fMRI_Neuroimaging/pet-skill/SKILL.md +173 -0
  164. package/skills/06_fMRI_Neuroimaging/pet-skill/scripts/compute_suvr.py +202 -0
  165. package/skills/06_fMRI_Neuroimaging/pnc-skill/SKILL.md +206 -0
  166. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/extract_pnc_phenotype.py +136 -0
  167. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/pnc_qc_summary.py +116 -0
  168. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/validate_pnc.py +120 -0
  169. package/skills/06_fMRI_Neuroimaging/ppmi-skill/SKILL.md +209 -0
  170. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/extract_ppmi_phenotype.py +138 -0
  171. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/ppmi_qc_summary.py +111 -0
  172. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/validate_ppmi.py +117 -0
  173. package/skills/06_fMRI_Neuroimaging/qsiprep-tool/SKILL.md +320 -0
  174. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/SKILL.md +215 -0
  175. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/extract_rest_mdd_phenotype.py +132 -0
  176. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/harmonize_sites.py +152 -0
  177. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/rest_mdd_qc_summary.py +124 -0
  178. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/validate_rest_mdd.py +103 -0
  179. package/skills/06_fMRI_Neuroimaging/smri-skill/SKILL.md +302 -0
  180. package/skills/06_fMRI_Neuroimaging/tcp-skill/SKILL.md +204 -0
  181. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/extract_tcp_phenotype.py +139 -0
  182. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/tcp_qc_summary.py +111 -0
  183. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/validate_tcp.py +99 -0
  184. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/SKILL.md +217 -0
  185. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/extract_ucla_cnp_phenotype.py +145 -0
  186. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/ucla_cnp_qc_summary.py +111 -0
  187. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/validate_ucla_cnp.py +113 -0
  188. package/skills/06_fMRI_Neuroimaging/ukb-skill/SKILL.md +310 -0
  189. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/build_ukb_survival.py +210 -0
  190. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_cases.py +308 -0
  191. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_phenotype.py +232 -0
  192. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/ukb_qc_summary.py +158 -0
  193. package/skills/06_fMRI_Neuroimaging/wmh-segmentation/SKILL.md +133 -0
  194. package/skills/07_Computational_Modeling/detrending/SKILL.md +118 -0
  195. package/skills/07_Computational_Modeling/dictlearning/SKILL.md +122 -0
  196. package/skills/07_Computational_Modeling/filtering/SKILL.md +121 -0
  197. package/skills/07_Computational_Modeling/glm/SKILL.md +153 -0
  198. package/skills/07_Computational_Modeling/hierarchical/SKILL.md +121 -0
  199. package/skills/07_Computational_Modeling/ica/SKILL.md +122 -0
  200. package/skills/07_Computational_Modeling/kmeans/SKILL.md +119 -0
  201. package/skills/07_Computational_Modeling/run_models/SKILL.md +427 -0
  202. package/skills/07_Computational_Modeling/spacenet/SKILL.md +122 -0
  203. package/skills/07_Computational_Modeling/svm/SKILL.md +120 -0
  204. package/skills/08_Computational_Neuroscience/brain_gnn/SKILL.md +183 -0
  205. package/skills/08_Computational_Neuroscience/dipy-tool/SKILL.md +239 -0
  206. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/dti_metrics_reference.py +70 -0
  207. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/load_and_mask_reference.py +76 -0
  208. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/roi_stats_reference.py +59 -0
  209. package/skills/08_Computational_Neuroscience/fm_app/SKILL.md +195 -0
  210. package/skills/08_Computational_Neuroscience/neurostorm/SKILL.md +151 -0
  211. package/skills/13_Visualization/brain-visualization/SKILL.md +191 -0
  212. package/skills/13_Visualization/brain-visualization/scripts/connectome_reference.py +108 -0
  213. package/skills/13_Visualization/brain-visualization/scripts/freesurfer_ply_reference.py +54 -0
  214. package/skills/13_Visualization/brain-visualization/scripts/zalff_summary_reference.py +116 -0
  215. package/skills/13_Visualization/ethoclaw-paper-figure-layout/SKILL.md +78 -0
  216. package/skills/13_Visualization/ethoclaw-paper-figure-layout/assets/naturecomm_figures.tex +74 -0
  217. package/skills/13_Visualization/ethoclaw-paper-figure-layout/scripts/layout_results_foldered.py +579 -0
  218. package/skills/14_Writing/overleaf-skill/SKILL.md +184 -0
  219. package/skills/14_Writing/overleaf-skill/scripts/install.sh +30 -0
  220. package/skills/14_Writing/paper-writing/SKILL.md +146 -0
  221. package/skills/14_Writing/paper-writing/scripts/data_statement_templates.py +164 -0
  222. package/skills/14_Writing/paper-writing/scripts/figure_templates.py +315 -0
  223. package/skills/14_Writing/paper-writing/scripts/nature_figure_style.py +214 -0
  224. package/skills/14_Writing/paper-writing/scripts/section_phrasebank.py +246 -0
  225. package/skills/16_Animal_Behavior/deeplabcut/SKILL.md +154 -0
  226. package/skills/16_Animal_Behavior/deeplabcut/references/3d-pose.md +89 -0
  227. package/skills/16_Animal_Behavior/deeplabcut/references/maDLC.md +123 -0
  228. package/skills/16_Animal_Behavior/deeplabcut/references/modelzoo.md +98 -0
  229. package/skills/16_Animal_Behavior/deeplabcut/references/standard-pipeline.md +165 -0
  230. package/skills/16_Animal_Behavior/deeplabcut/references/utilities.md +146 -0
  231. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/SKILL.md +274 -0
  232. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.html +112 -0
  233. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.md +21 -0
  234. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/cluster-section.md +5 -0
  235. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/heatmap-section.md +5 -0
  236. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/integrated-interpretation.md +3 -0
  237. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/overview.md +3 -0
  238. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/project-summary.md +3 -0
  239. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/radar-section.md +5 -0
  240. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/raw-trajectory.md +3 -0
  241. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/sample-check.md +3 -0
  242. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/single-subject-section.md +3 -0
  243. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/stats-section.md +5 -0
  244. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/epm.md +52 -0
  245. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/fst.md +37 -0
  246. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/nor.md +39 -0
  247. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/oft.md +43 -0
  248. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tcst.md +45 -0
  249. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tst.md +36 -0
  250. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/input-types.md +59 -0
  251. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/interpretation-guardrails.md +45 -0
  252. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/metadata-schema.md +57 -0
  253. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/report-sections.md +86 -0
  254. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/section-selection-rules.md +169 -0
  255. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/build_report_manifest.py +27 -0
  256. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/render_report.py +34 -0
  257. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/report_utils.py +1121 -0
  258. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/SKILL.md +390 -0
  259. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/reference_code.py +98 -0
  260. package/skills/16_Animal_Behavior/ethoclaw-animal-pose-estimation/SKILL.md +336 -0
  261. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/README.md +21 -0
  262. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/SKILL.md +41 -0
  263. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/batch_kinematic_generator.py +663 -0
  264. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/config.json +19 -0
  265. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/generate_kinematic_parameter.py +401 -0
  266. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/kinematic_generator.py +265 -0
  267. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/SKILL.md +72 -0
  268. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/references/config.example.toml +56 -0
  269. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params.py +232 -0
  270. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params_from_config.py +236 -0
  271. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/SKILL.md +68 -0
  272. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/references/notes.md +5 -0
  273. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/scripts/plot_h5_radar.py +513 -0
  274. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/SKILL.md +52 -0
  275. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/config.toml +81 -0
  276. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/references/stats-rule.md +18 -0
  277. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_inspect.py +79 -0
  278. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_batch.py +624 -0
  279. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_stats.py +438 -0
  280. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/SKILL.md +280 -0
  281. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_trajectory.py +790 -0
  282. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_velocity.py +855 -0
  283. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.csv +101 -0
  284. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.h5 +0 -0
  285. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_data_readme.md +126 -0
@@ -0,0 +1,210 @@
1
+ #!/usr/bin/env python3
2
+ """Build survival analysis datasets from UK Biobank data.
3
+
4
+ Adapted from UKBAnalytica_v2 survival.R (Nan He, Southern Medical University).
5
+ Computes follow-up time, event status, and handles prevalent/incident case separation.
6
+
7
+ Usage:
8
+ python build_ukb_survival.py --input ukb_raw.csv --disease dementia --output survival.csv
9
+ python build_ukb_survival.py --input ukb_raw.csv --disease stroke --censor-date 2023-10-31 --output stroke_survival.csv
10
+ """
11
+ import argparse
12
+ import re
13
+ import sys
14
+ from datetime import datetime
15
+ from pathlib import Path
16
+ from typing import Dict, List, Optional
17
+
18
+ import pandas as pd
19
+
20
+ # Import disease definitions from extract_ukb_cases
21
+ DISEASE_DEFINITIONS = {
22
+ "dementia": {"icd10_pattern": "F0[0-9]", "self_report_codes": [1263]},
23
+ "alzheimers": {"icd10_pattern": "F00|G30", "self_report_codes": [1263]},
24
+ "stroke": {"icd10_pattern": "I6[0-4]", "self_report_codes": [1081, 1491, 1583]},
25
+ "ischaemic_stroke": {"icd10_pattern": "I63", "self_report_codes": []},
26
+ "parkinsons": {"icd10_pattern": "G20", "self_report_codes": [1254]},
27
+ "multiple_sclerosis": {"icd10_pattern": "G35", "self_report_codes": [1258]},
28
+ "epilepsy": {"icd10_pattern": "G4[0-1]", "self_report_codes": [1262]},
29
+ "depression": {"icd10_pattern": "F3[2-3]", "self_report_codes": [1286, 1530]},
30
+ "anxiety": {"icd10_pattern": "F4[0-1]", "self_report_codes": [1287, 1531]},
31
+ "schizophrenia": {"icd10_pattern": "F20", "self_report_codes": [1289]},
32
+ }
33
+
34
+
35
+ def parse_dates(series: pd.Series) -> pd.Series:
36
+ """Parse UKB date strings to datetime."""
37
+ return pd.to_datetime(series, errors="coerce", format="mixed")
38
+
39
+
40
+ def find_first_diagnosis_date(
41
+ row: pd.Series, pattern: str, date_cols: List[str], code_cols: List[str]
42
+ ) -> Optional[datetime]:
43
+ """Find the earliest diagnosis date matching ICD pattern for one subject."""
44
+ earliest = None
45
+
46
+ for code_col, date_col in zip(code_cols, date_cols):
47
+ codes_raw = row.get(code_col, "")
48
+ dates_raw = row.get(date_col, "")
49
+
50
+ if pd.isna(codes_raw) or str(codes_raw).strip() == "":
51
+ continue
52
+
53
+ codes = re.findall(r"[A-Z][0-9]{2,3}", str(codes_raw).upper())
54
+
55
+ # Parse corresponding dates
56
+ if pd.notna(dates_raw):
57
+ try:
58
+ dates = re.findall(r"\d{4}-\d{2}-\d{2}", str(dates_raw))
59
+ except Exception:
60
+ dates = []
61
+ else:
62
+ dates = []
63
+
64
+ for i, code in enumerate(codes):
65
+ if re.match(pattern, code):
66
+ if i < len(dates):
67
+ try:
68
+ dt = datetime.strptime(dates[i], "%Y-%m-%d")
69
+ if earliest is None or dt < earliest:
70
+ earliest = dt
71
+ except ValueError:
72
+ pass
73
+
74
+ return earliest
75
+
76
+
77
+ def build_survival_dataset(
78
+ df: pd.DataFrame,
79
+ disease_key: str,
80
+ censor_date: str = "2023-10-31",
81
+ baseline_col: str = "p53_i0",
82
+ ) -> pd.DataFrame:
83
+ """Build survival dataset with prevalent/incident case separation."""
84
+ disease_def = DISEASE_DEFINITIONS.get(disease_key)
85
+ if disease_def is None:
86
+ raise ValueError(f"Unknown disease: {disease_key}")
87
+
88
+ pattern = disease_def["icd10_pattern"]
89
+ censor_dt = datetime.strptime(censor_date, "%Y-%m-%d")
90
+
91
+ # Find baseline date column
92
+ baseline_date_col = None
93
+ for col in [baseline_col, "p53_i0", "p53"]:
94
+ if col in df.columns:
95
+ baseline_date_col = col
96
+ break
97
+
98
+ # Find ICD-10 code and date columns
99
+ code_cols = [c for c in df.columns if c.startswith("p41270")]
100
+ date_cols = [c for c in df.columns if c.startswith("p41280")]
101
+
102
+ # Find death date column
103
+ death_date_col = None
104
+ for col in ["p40000_i0", "p40000"]:
105
+ if col in df.columns:
106
+ death_date_col = col
107
+ break
108
+
109
+ records = []
110
+ n_prevalent = 0
111
+ n_incident = 0
112
+ n_censored = 0
113
+
114
+ for _, row in df.iterrows():
115
+ eid = row["eid"]
116
+
117
+ # Get baseline date
118
+ baseline_dt = None
119
+ if baseline_date_col:
120
+ baseline_dt = pd.to_datetime(row.get(baseline_date_col), errors="coerce")
121
+
122
+ # Find first diagnosis date from ICD-10
123
+ diag_dt = find_first_diagnosis_date(row, pattern, date_cols, code_cols)
124
+
125
+ # Get death date
126
+ death_dt = None
127
+ if death_date_col:
128
+ death_dt = pd.to_datetime(row.get(death_date_col), errors="coerce")
129
+
130
+ # Classify case status
131
+ outcome_status = None
132
+ follow_up_years = None
133
+
134
+ if baseline_dt is None:
135
+ # No baseline date, skip
136
+ outcome_status = pd.NA
137
+ follow_up_years = pd.NA
138
+ elif diag_dt is not None and diag_dt <= baseline_dt:
139
+ # Prevalent case: diagnosis before or at baseline
140
+ outcome_status = pd.NA # Not at risk
141
+ follow_up_years = pd.NA
142
+ n_prevalent += 1
143
+ elif diag_dt is not None and diag_dt > baseline_dt:
144
+ # Incident case
145
+ outcome_status = 1
146
+ follow_up_years = (diag_dt - baseline_dt).days / 365.25
147
+ n_incident += 1
148
+ else:
149
+ # Censored: no diagnosis
150
+ outcome_status = 0
151
+ end_dt = min(
152
+ d for d in [death_dt, censor_dt] if d is not None
153
+ ) if death_dt is not None else censor_dt
154
+ follow_up_years = (end_dt - baseline_dt).days / 365.25
155
+ n_censored += 1
156
+
157
+ records.append({
158
+ "eid": eid,
159
+ f"{disease_key}_prevalent": 1 if (diag_dt is not None and baseline_dt is not None and diag_dt <= baseline_dt) else 0,
160
+ f"{disease_key}_incident": 1 if (diag_dt is not None and baseline_dt is not None and diag_dt > baseline_dt) else 0,
161
+ "outcome_status": outcome_status,
162
+ "survival_years": follow_up_years,
163
+ })
164
+
165
+ result = pd.DataFrame(records)
166
+
167
+ # Summary
168
+ total = len(df)
169
+ print(f"\nSurvival dataset: {disease_key}")
170
+ print(f" Total subjects: {total}")
171
+ print(f" Prevalent cases: {n_prevalent} ({n_prevalent/total*100:.1f}%)")
172
+ print(f" Incident cases: {n_incident} ({n_incident/total*100:.1f}%)")
173
+ print(f" Censored: {n_censored} ({n_censored/total*100:.1f}%)")
174
+ print(f" At-risk for analysis: {n_incident + n_censored}")
175
+
176
+ return result
177
+
178
+
179
+ def main() -> int:
180
+ parser = argparse.ArgumentParser(description="Build UKB survival dataset.")
181
+ parser.add_argument("--input", required=True, help="Path to UKB raw CSV")
182
+ parser.add_argument("--output", required=True, help="Output path for survival CSV")
183
+ parser.add_argument("--disease", required=True,
184
+ help=f"Disease key. Available: {list(DISEASE_DEFINITIONS.keys())}")
185
+ parser.add_argument("--censor-date", default="2023-10-31",
186
+ help="Administrative censoring date (default: 2023-10-31)")
187
+ parser.add_argument("--baseline-col", default="p53_i0",
188
+ help="Column name for baseline assessment date")
189
+ args = parser.parse_args()
190
+
191
+ input_path = Path(args.input).resolve()
192
+ if not input_path.exists():
193
+ print(f"Input file not found: {input_path}", file=sys.stderr)
194
+ return 1
195
+
196
+ df = pd.read_csv(input_path, low_memory=False)
197
+ print(f"Loaded {len(df)} subjects")
198
+
199
+ result = build_survival_dataset(df, args.disease, args.censor_date, args.baseline_col)
200
+
201
+ output_path = Path(args.output).resolve()
202
+ output_path.parent.mkdir(parents=True, exist_ok=True)
203
+ result.to_csv(output_path, index=False)
204
+ print(f"Saved -> {output_path}")
205
+
206
+ return 0
207
+
208
+
209
+ if __name__ == "__main__":
210
+ sys.exit(main())
@@ -0,0 +1,308 @@
1
+ #!/usr/bin/env python3
2
+ """Extract disease cases from UK Biobank data using ICD-10/ICD-9 codes.
3
+
4
+ Adapted from UKBAnalytica_v2 case_extraction.R and ICD_diagnose.R.
5
+ Supports brain-related disease endpoints: dementia, stroke, Parkinson's, etc.
6
+
7
+ Usage:
8
+ python extract_ukb_cases.py --input ukb_raw.csv --disease dementia --output cases.csv
9
+ python extract_ukb_cases.py --input ukb_raw.csv --disease stroke --sources ICD10,Self-report --output stroke_cases.csv
10
+ python extract_ukb_cases.py --input ukb_raw.csv --custom-icd G30 --output custom_cases.csv
11
+ """
12
+ import argparse
13
+ import csv
14
+ import re
15
+ import sys
16
+ from pathlib import Path
17
+ from typing import Dict, List, Optional, Tuple
18
+
19
+ import pandas as pd
20
+
21
+ # Predefined brain-related disease definitions (ICD-10 patterns)
22
+ DISEASE_DEFINITIONS = {
23
+ "dementia": {
24
+ "description": "All-cause dementia",
25
+ "icd10_pattern": "F0[0-9]",
26
+ "icd10_codes": ["F00", "F01", "F02", "F03", "F09"],
27
+ "self_report_codes": [1263], # UKB self-report code for dementia
28
+ },
29
+ "alzheimers": {
30
+ "description": "Alzheimer's disease",
31
+ "icd10_pattern": "F00|G30",
32
+ "icd10_codes": ["F00", "F000", "F001", "F002", "F009", "G30", "G300", "G301", "G308", "G309"],
33
+ "self_report_codes": [1263],
34
+ },
35
+ "vascular_dementia": {
36
+ "description": "Vascular dementia",
37
+ "icd10_pattern": "F01",
38
+ "icd10_codes": ["F010", "F011", "F012", "F013", "F018", "F019"],
39
+ "self_report_codes": [],
40
+ },
41
+ "stroke": {
42
+ "description": "Stroke (ischaemic + haemorrhagic)",
43
+ "icd10_pattern": "I6[0-4]",
44
+ "icd10_codes": ["I60", "I61", "I62", "I63", "I64"],
45
+ "self_report_codes": [1081, 1491, 1583],
46
+ },
47
+ "ischaemic_stroke": {
48
+ "description": "Ischaemic stroke",
49
+ "icd10_pattern": "I63",
50
+ "icd10_codes": ["I630", "I631", "I632", "I633", "I634", "I635", "I636", "I638", "I639"],
51
+ "self_report_codes": [],
52
+ },
53
+ "haemorrhagic_stroke": {
54
+ "description": "Haemorrhagic stroke",
55
+ "icd10_pattern": "I6[0-2]",
56
+ "icd10_codes": ["I60", "I61", "I62"],
57
+ "self_report_codes": [],
58
+ },
59
+ "parkinsons": {
60
+ "description": "Parkinson's disease",
61
+ "icd10_pattern": "G20",
62
+ "icd10_codes": ["G20"],
63
+ "self_report_codes": [1254],
64
+ },
65
+ "multiple_sclerosis": {
66
+ "description": "Multiple sclerosis",
67
+ "icd10_pattern": "G35",
68
+ "icd10_codes": ["G35"],
69
+ "self_report_codes": [1258],
70
+ },
71
+ "epilepsy": {
72
+ "description": "Epilepsy",
73
+ "icd10_pattern": "G4[0-1]",
74
+ "icd10_codes": ["G40", "G41"],
75
+ "self_report_codes": [1262],
76
+ },
77
+ "migraine": {
78
+ "description": "Migraine",
79
+ "icd10_pattern": "G43",
80
+ "icd10_codes": ["G430", "G431", "G432", "G433", "G438", "G439"],
81
+ "self_report_codes": [1265],
82
+ },
83
+ "depression": {
84
+ "description": "Major depressive disorder",
85
+ "icd10_pattern": "F3[2-3]",
86
+ "icd10_codes": ["F320", "F321", "F322", "F323", "F328", "F329", "F330", "F331", "F332", "F333", "F334", "F338", "F339"],
87
+ "self_report_codes": [1286, 1530],
88
+ },
89
+ "anxiety": {
90
+ "description": "Anxiety disorders",
91
+ "icd10_pattern": "F4[0-1]",
92
+ "icd10_codes": ["F400", "F401", "F402", "F408", "F409", "F410", "F411", "F412", "F413", "F418", "F419"],
93
+ "self_report_codes": [1287, 1531],
94
+ },
95
+ "schizophrenia": {
96
+ "description": "Schizophrenia",
97
+ "icd10_pattern": "F20",
98
+ "icd10_codes": ["F200", "F201", "F202", "F203", "F205", "F206", "F208", "F209"],
99
+ "self_report_codes": [1289],
100
+ },
101
+ "bipolar": {
102
+ "description": "Bipolar disorder",
103
+ "icd10_pattern": "F31",
104
+ "icd10_codes": ["F310", "F311", "F312", "F313", "F315", "F316", "F317", "F318", "F319"],
105
+ "self_report_codes": [1291],
106
+ },
107
+ "brain_tumour": {
108
+ "description": "Brain tumour (benign + malignant)",
109
+ "icd10_pattern": "C71|D33|D43",
110
+ "icd10_codes": ["C710", "C711", "C712", "C713", "C719", "D330", "D331", "D332", "D339", "D430", "D431", "D432", "D439"],
111
+ "self_report_codes": [],
112
+ },
113
+ "tbi": {
114
+ "description": "Traumatic brain injury",
115
+ "icd10_pattern": "S0[6-9]",
116
+ "icd10_codes": ["S060", "S061", "S062", "S063", "S064", "S065", "S066", "S068", "S069"],
117
+ "self_report_codes": [],
118
+ },
119
+ }
120
+
121
+
122
+ def parse_icd10_diagnoses(df: pd.DataFrame) -> pd.DataFrame:
123
+ """Parse ICD-10 diagnosis codes from UKB hospital inpatient data."""
124
+ # p41270: ICD-10 main diagnoses; p41280: diagnosis dates
125
+ # Stored as concatenated strings, e.g. "['I639','G20']"
126
+ records = []
127
+ icd10_col = None
128
+ for col in ["p41270", "p41270_i0"]:
129
+ if col in df.columns:
130
+ icd10_col = col
131
+ break
132
+
133
+ if icd10_col is None:
134
+ print("[WARN] No ICD-10 diagnosis column (p41270) found")
135
+ return pd.DataFrame(columns=["eid", "icd10_code", "diag_date", "source"])
136
+
137
+ for _, row in df.iterrows():
138
+ eid = row["eid"]
139
+ raw = row.get(icd10_col, "")
140
+ if pd.isna(raw) or str(raw).strip() == "":
141
+ continue
142
+
143
+ # Parse list-like string: "['I639','G20']" or "I639,G20"
144
+ codes = re.findall(r"[A-Z][0-9]{2,3}", str(raw).upper())
145
+ for code in codes:
146
+ records.append({"eid": eid, "icd10_code": code, "source": "ICD10"})
147
+
148
+ return pd.DataFrame(records) if records else pd.DataFrame(columns=["eid", "icd10_code", "diag_date", "source"])
149
+
150
+
151
+ def parse_icd9_diagnoses(df: pd.DataFrame) -> pd.DataFrame:
152
+ """Parse ICD-9 diagnosis codes from UKB hospital inpatient data."""
153
+ records = []
154
+ icd9_col = None
155
+ for col in ["p41271", "p41271_i0"]:
156
+ if col in df.columns:
157
+ icd9_col = col
158
+ break
159
+
160
+ if icd9_col is None:
161
+ return pd.DataFrame(columns=["eid", "icd9_code", "diag_date", "source"])
162
+
163
+ for _, row in df.iterrows():
164
+ eid = row["eid"]
165
+ raw = row.get(icd9_col, "")
166
+ if pd.isna(raw) or str(raw).strip() == "":
167
+ continue
168
+
169
+ codes = re.findall(r"[VE]?[0-9]{3,5}", str(raw).upper())
170
+ for code in codes:
171
+ records.append({"eid": eid, "icd9_code": code, "source": "ICD9"})
172
+
173
+ return pd.DataFrame(records) if records else pd.DataFrame(columns=["eid", "icd9_code", "diag_date", "source"])
174
+
175
+
176
+ def extract_cases(
177
+ df: pd.DataFrame,
178
+ disease_key: str,
179
+ custom_icd: Optional[str] = None,
180
+ sources: List[str] = None,
181
+ ) -> Tuple[pd.DataFrame, Dict]:
182
+ """Extract cases for a given disease."""
183
+ if sources is None:
184
+ sources = ["ICD10", "ICD9"]
185
+
186
+ # Use custom ICD pattern or predefined
187
+ if custom_icd:
188
+ disease_def = {"description": "Custom", "icd10_pattern": custom_icd, "icd10_codes": [custom_icd], "self_report_codes": []}
189
+ elif disease_key in DISEASE_DEFINITIONS:
190
+ disease_def = DISEASE_DEFINITIONS[disease_key]
191
+ else:
192
+ raise ValueError(f"Unknown disease: {disease_key}. Available: {list(DISEASE_DEFINITIONS.keys())}")
193
+
194
+ pattern = disease_def["icd10_pattern"]
195
+ case_eids = set()
196
+ case_details = []
197
+
198
+ # ICD-10 from hospital inpatient
199
+ if "ICD10" in sources:
200
+ icd10_df = parse_icd10_diagnoses(df)
201
+ if not icd10_df.empty:
202
+ mask = icd10_df["icd10_code"].str.match(pattern, na=False)
203
+ matched = icd10_df[mask]
204
+ for _, row in matched.iterrows():
205
+ case_eids.add(row["eid"])
206
+ case_details.append({"eid": row["eid"], "source": "ICD10", "code": row["icd10_code"]})
207
+
208
+ # ICD-9 from hospital inpatient
209
+ if "ICD9" in sources:
210
+ icd9_df = parse_icd9_diagnoses(df)
211
+ if not icd9_df.empty:
212
+ # Basic ICD-9 matching (simplified)
213
+ icd9_codes = disease_def.get("icd9_codes", [])
214
+ for code in icd9_codes:
215
+ mask = icd9_df["icd9_code"].str.startswith(code[:3], na=False)
216
+ matched = icd9_df[mask]
217
+ for _, row in matched.iterrows():
218
+ case_eids.add(row["eid"])
219
+ case_details.append({"eid": row["eid"], "source": "ICD9", "code": row["icd9_code"]})
220
+
221
+ # Death register
222
+ if "Death" in sources:
223
+ death_col = None
224
+ for col in ["p40001_i0", "p40001"]:
225
+ if col in df.columns:
226
+ death_col = col
227
+ break
228
+ if death_col:
229
+ for _, row in df.iterrows():
230
+ raw = row.get(death_col, "")
231
+ if pd.isna(raw):
232
+ continue
233
+ codes = re.findall(r"[A-Z][0-9]{2,3}", str(raw).upper())
234
+ for code in codes:
235
+ if re.match(pattern, code):
236
+ case_eids.add(row["eid"])
237
+ case_details.append({"eid": row["eid"], "source": "Death", "code": code})
238
+
239
+ # Build case table
240
+ if case_details:
241
+ details_df = pd.DataFrame(case_details)
242
+ # Keep first occurrence per subject
243
+ details_df = details_df.drop_duplicates(subset=["eid"], keep="first")
244
+ else:
245
+ details_df = pd.DataFrame(columns=["eid", "source", "code"])
246
+
247
+ # Build full result: all subjects with case indicator
248
+ result = pd.DataFrame({"eid": df["eid"]})
249
+ result[f"{disease_key}_case"] = result["eid"].isin(case_eids).astype(int)
250
+
251
+ stats = {
252
+ "disease": disease_def["description"],
253
+ "total_subjects": len(df),
254
+ "cases": len(case_eids),
255
+ "prevalence": len(case_eids) / len(df) * 100 if len(df) > 0 else 0,
256
+ "sources_used": sources,
257
+ }
258
+
259
+ return result, stats
260
+
261
+
262
+ def main() -> int:
263
+ parser = argparse.ArgumentParser(description="Extract UKB disease cases.")
264
+ parser.add_argument("--input", required=True, help="Path to UKB raw CSV")
265
+ parser.add_argument("--output", required=True, help="Output path for cases CSV")
266
+ parser.add_argument("--disease", help=f"Disease key. Available: {list(DISEASE_DEFINITIONS.keys())}")
267
+ parser.add_argument("--custom-icd", help="Custom ICD-10 pattern (regex)")
268
+ parser.add_argument("--sources", default="ICD10,ICD9,Death",
269
+ help="Comma-separated data sources: ICD10,ICD9,Death,Self-report")
270
+ parser.add_argument("--list-diseases", action="store_true", help="List available diseases and exit")
271
+ args = parser.parse_args()
272
+
273
+ if args.list_diseases:
274
+ print("Available brain-related disease definitions:")
275
+ for key, info in DISEASE_DEFINITIONS.items():
276
+ print(f" {key}: {info['description']} (ICD-10: {info['icd10_pattern']})")
277
+ return 0
278
+
279
+ if not args.disease and not args.custom_icd:
280
+ print("Error: --disease or --custom-icd is required", file=sys.stderr)
281
+ return 1
282
+
283
+ input_path = Path(args.input).resolve()
284
+ if not input_path.exists():
285
+ print(f"Input file not found: {input_path}", file=sys.stderr)
286
+ return 1
287
+
288
+ df = pd.read_csv(input_path, low_memory=False)
289
+ print(f"Loaded {len(df)} subjects")
290
+
291
+ sources = [s.strip() for s in args.sources.split(",")]
292
+ result, stats = extract_cases(df, args.disease or "custom", args.custom_icd, sources)
293
+
294
+ print(f"\nCase extraction: {stats['disease']}")
295
+ print(f" Total subjects: {stats['total_subjects']}")
296
+ print(f" Cases: {stats['cases']} ({stats['prevalence']:.2f}%)")
297
+ print(f" Sources: {stats['sources_used']}")
298
+
299
+ output_path = Path(args.output).resolve()
300
+ output_path.parent.mkdir(parents=True, exist_ok=True)
301
+ result.to_csv(output_path, index=False)
302
+ print(f"Saved -> {output_path}")
303
+
304
+ return 0
305
+
306
+
307
+ if __name__ == "__main__":
308
+ sys.exit(main())