@brainpilot/skills 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/package.json +2 -2
  2. package/skills/01_Meta-Skills/academic-research-hub/SKILL.md +108 -0
  3. package/skills/01_Meta-Skills/academic-research-hub/scripts/requirements.txt +17 -0
  4. package/skills/01_Meta-Skills/academic-research-hub/scripts/research.py +781 -0
  5. package/skills/01_Meta-Skills/beautiful-log/SKILL.md +64 -0
  6. package/skills/01_Meta-Skills/beautiful-log/scripts/beautiful_log.py +274 -0
  7. package/skills/01_Meta-Skills/ethoclaw-daily-paper/SKILL.md +130 -0
  8. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/config.template.yaml +54 -0
  9. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/top5_digest_template.md +5 -0
  10. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/build_top5_digest.py +300 -0
  11. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/common.py +137 -0
  12. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/merge_results.py +106 -0
  13. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/run_pipeline.py +177 -0
  14. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_arxiv.py +162 -0
  15. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_pubmed.py +202 -0
  16. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/SKILL.md +173 -0
  17. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/scripts/normalize_data.py +874 -0
  18. package/skills/01_Meta-Skills/ethoclaw-pdf-research/SKILL.md +134 -0
  19. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/confirmation-prompts.md +31 -0
  20. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/output-patterns.md +45 -0
  21. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_markdown_deliverables.py +41 -0
  22. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_research_log.py +84 -0
  23. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_summary_md.py +63 -0
  24. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/extract_pdf_bundle.py +140 -0
  25. package/skills/01_Meta-Skills/experiment-controller/SKILL.md +140 -0
  26. package/skills/01_Meta-Skills/knowledge-graph-builder/SKILL.md +366 -0
  27. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/entity_resolution.py +120 -0
  28. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/extraction_prompt_template.txt +19 -0
  29. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/graph_query.py +106 -0
  30. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/hypothesis_cli_reference.py +42 -0
  31. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/new_data_source_template.py +116 -0
  32. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/requirements.txt +15 -0
  33. package/skills/01_Meta-Skills/method-design/SKILL.md +61 -0
  34. package/skills/01_Meta-Skills/multi-search-engine/SKILL.md +119 -0
  35. package/skills/01_Meta-Skills/research-idea/SKILL.md +65 -0
  36. package/skills/05_EEG_ERP/eeg-skill/SKILL.md +197 -0
  37. package/skills/05_EEG_ERP/meg-skill/SKILL.md +188 -0
  38. package/skills/05_EEG_ERP/meg-skill/scripts/time_frequency.py +223 -0
  39. package/skills/05_EEG_ERP/mne-eeg-tool/SKILL.md +165 -0
  40. package/skills/05_EEG_ERP/mne-eeg-tool/scripts/eeg_pipeline_reference.py +231 -0
  41. package/skills/05_EEG_ERP/seed-iv-skill/SKILL.md +184 -0
  42. package/skills/05_EEG_ERP/seed-iv-skill/scripts/classify_seed_iv.py +154 -0
  43. package/skills/05_EEG_ERP/seed-iv-skill/scripts/extract_seed_iv_features.py +190 -0
  44. package/skills/05_EEG_ERP/seed-iv-skill/scripts/validate_seed_iv.py +102 -0
  45. package/skills/05_EEG_ERP/seed-vig-skill/SKILL.md +182 -0
  46. package/skills/05_EEG_ERP/seed-vig-skill/scripts/classify_seed_vig.py +165 -0
  47. package/skills/05_EEG_ERP/seed-vig-skill/scripts/extract_seed_vig_features.py +185 -0
  48. package/skills/05_EEG_ERP/seed-vig-skill/scripts/validate_seed_vig.py +88 -0
  49. package/skills/06_fMRI_Neuroimaging/abcd-skill/SKILL.md +308 -0
  50. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/abcd_qc_summary.py +449 -0
  51. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/extract_abcd_phenotype.py +292 -0
  52. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/reorganize_abcd.py +387 -0
  53. package/skills/06_fMRI_Neuroimaging/abide-skill/SKILL.md +302 -0
  54. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/abide_qc_summary.py +317 -0
  55. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/extract_abide_phenotype.py +267 -0
  56. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/reorganize_abide.py +387 -0
  57. package/skills/06_fMRI_Neuroimaging/adhd200-skill/SKILL.md +244 -0
  58. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/adhd200_qc_summary.py +98 -0
  59. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/extract_adhd200_phenotype.py +134 -0
  60. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/reorganize_adhd200.py +206 -0
  61. package/skills/06_fMRI_Neuroimaging/adni-skill/SKILL.md +358 -0
  62. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_adni_task_files.py +1305 -0
  63. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_vqa_from_tasks.py +766 -0
  64. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/reorganize_adni.py +491 -0
  65. package/skills/06_fMRI_Neuroimaging/aibl-skill/SKILL.md +295 -0
  66. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/aibl_qc_summary.py +260 -0
  67. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/extract_aibl_phenotype.py +365 -0
  68. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/reorganize_aibl.py +394 -0
  69. package/skills/06_fMRI_Neuroimaging/aomic-skill/SKILL.md +292 -0
  70. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/aomic_qc_summary.py +258 -0
  71. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/extract_aomic_phenotype.py +284 -0
  72. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/reorganize_aomic.py +322 -0
  73. package/skills/06_fMRI_Neuroimaging/asl-skill/SKILL.md +168 -0
  74. package/skills/06_fMRI_Neuroimaging/asl-skill/scripts/compute_cbf.py +224 -0
  75. package/skills/06_fMRI_Neuroimaging/bids-organizer/SKILL.md +241 -0
  76. package/skills/06_fMRI_Neuroimaging/bold5000-skill/SKILL.md +186 -0
  77. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/bold5000_qc_summary.py +96 -0
  78. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/extract_bold5000_stimulus.py +125 -0
  79. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/reorganize_bold5000.py +102 -0
  80. package/skills/06_fMRI_Neuroimaging/camcan-skill/SKILL.md +213 -0
  81. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/camcan_qc_summary.py +131 -0
  82. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/extract_camcan_phenotype.py +145 -0
  83. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/validate_camcan.py +141 -0
  84. package/skills/06_fMRI_Neuroimaging/cobre-skill/SKILL.md +201 -0
  85. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/cobre_qc_summary.py +95 -0
  86. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/extract_cobre_phenotype.py +104 -0
  87. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/reorganize_cobre.py +140 -0
  88. package/skills/06_fMRI_Neuroimaging/conn-tool/SKILL.md +180 -0
  89. package/skills/06_fMRI_Neuroimaging/dcm2nii/SKILL.md +189 -0
  90. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/SKILL.md +183 -0
  91. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/dmt_har_med_qc_summary.py +96 -0
  92. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/extract_dmt_har_med_phenotype.py +121 -0
  93. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/reorganize_dmt_har_med.py +125 -0
  94. package/skills/06_fMRI_Neuroimaging/dwi-skill/SKILL.md +359 -0
  95. package/skills/06_fMRI_Neuroimaging/fmri-skill/SKILL.md +371 -0
  96. package/skills/06_fMRI_Neuroimaging/fmriprep-tool/SKILL.md +228 -0
  97. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/SKILL.md +286 -0
  98. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/scripts/freesurfer_processor.py +145 -0
  99. package/skills/06_fMRI_Neuroimaging/fsl-tool/SKILL.md +208 -0
  100. package/skills/06_fMRI_Neuroimaging/hbn-skill/SKILL.md +271 -0
  101. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/extract_hbn_phenotype.py +107 -0
  102. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/hbn_qc_summary.py +96 -0
  103. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/reorganize_hbn.py +150 -0
  104. package/skills/06_fMRI_Neuroimaging/hcpa-skill/SKILL.md +210 -0
  105. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/extract_hcpa_phenotype.py +146 -0
  106. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/hcpa_qc_summary.py +120 -0
  107. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/reorganize_hcpa.py +155 -0
  108. package/skills/06_fMRI_Neuroimaging/hcpd-skill/SKILL.md +210 -0
  109. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/extract_hcpd_phenotype.py +148 -0
  110. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/hcpd_qc_summary.py +125 -0
  111. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/reorganize_hcpd.py +146 -0
  112. package/skills/06_fMRI_Neuroimaging/hcpep-skill/SKILL.md +215 -0
  113. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/extract_hcpep_phenotype.py +157 -0
  114. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/hcpep_qc_summary.py +143 -0
  115. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/reorganize_hcpep.py +146 -0
  116. package/skills/06_fMRI_Neuroimaging/hcppipeline-tool/SKILL.md +217 -0
  117. package/skills/06_fMRI_Neuroimaging/hcpya-skill/SKILL.md +214 -0
  118. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/extract_hcpya_phenotype.py +190 -0
  119. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/hcpya_qc_summary.py +152 -0
  120. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/reorganize_hcpya.py +203 -0
  121. package/skills/06_fMRI_Neuroimaging/ixi-skill/SKILL.md +198 -0
  122. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/ixi_qc_summary.py +137 -0
  123. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/reorganize_ixi.py +190 -0
  124. package/skills/06_fMRI_Neuroimaging/mnd-skill/SKILL.md +191 -0
  125. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/extract_mnd_phenotype.py +143 -0
  126. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/mnd_qc_summary.py +120 -0
  127. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/validate_mnd.py +107 -0
  128. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/SKILL.md +203 -0
  129. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/analyze_lesions.py +119 -0
  130. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/longitudinal_lesion.py +148 -0
  131. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/mschallenge_qc_summary.py +132 -0
  132. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/validate_mschallenge.py +116 -0
  133. package/skills/06_fMRI_Neuroimaging/nibabel-skill/SKILL.md +184 -0
  134. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/atlas_coordinate_reference.py +61 -0
  135. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/freesurfer_io_reference.py +34 -0
  136. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/nifti_inspection_reference.py +35 -0
  137. package/skills/06_fMRI_Neuroimaging/nifd-skill/SKILL.md +205 -0
  138. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/extract_nifd_phenotype.py +132 -0
  139. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/nifd_qc_summary.py +111 -0
  140. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/validate_nifd.py +111 -0
  141. package/skills/06_fMRI_Neuroimaging/nii2dcm/SKILL.md +143 -0
  142. package/skills/06_fMRI_Neuroimaging/nilearn-tool/SKILL.md +266 -0
  143. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/connectome_reference.py +65 -0
  144. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/denoise_timeseries_reference.py +58 -0
  145. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/hierarchical_parcellation_reference.py +53 -0
  146. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/kmeans_parcellation_reference.py +53 -0
  147. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/preprocess_bold_reference.py +76 -0
  148. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_dictlearning_reference.py +56 -0
  149. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_ica_reference.py +59 -0
  150. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/second_level_glm_reference.py +58 -0
  151. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/spacenet_classifier_reference.py +59 -0
  152. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/svm_classifier_reference.py +60 -0
  153. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/task_glm_reference.py +63 -0
  154. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/zalff_summary_reference.py +109 -0
  155. package/skills/06_fMRI_Neuroimaging/nsd-skill/SKILL.md +210 -0
  156. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/extract_nsd_stimulus.py +171 -0
  157. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/nsd_qc_summary.py +142 -0
  158. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/validate_nsd.py +142 -0
  159. package/skills/06_fMRI_Neuroimaging/oasis-skill/SKILL.md +205 -0
  160. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/extract_oasis_phenotype.py +126 -0
  161. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/oasis_qc_summary.py +115 -0
  162. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/validate_oasis.py +119 -0
  163. package/skills/06_fMRI_Neuroimaging/pet-skill/SKILL.md +173 -0
  164. package/skills/06_fMRI_Neuroimaging/pet-skill/scripts/compute_suvr.py +202 -0
  165. package/skills/06_fMRI_Neuroimaging/pnc-skill/SKILL.md +206 -0
  166. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/extract_pnc_phenotype.py +136 -0
  167. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/pnc_qc_summary.py +116 -0
  168. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/validate_pnc.py +120 -0
  169. package/skills/06_fMRI_Neuroimaging/ppmi-skill/SKILL.md +209 -0
  170. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/extract_ppmi_phenotype.py +138 -0
  171. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/ppmi_qc_summary.py +111 -0
  172. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/validate_ppmi.py +117 -0
  173. package/skills/06_fMRI_Neuroimaging/qsiprep-tool/SKILL.md +320 -0
  174. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/SKILL.md +215 -0
  175. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/extract_rest_mdd_phenotype.py +132 -0
  176. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/harmonize_sites.py +152 -0
  177. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/rest_mdd_qc_summary.py +124 -0
  178. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/validate_rest_mdd.py +103 -0
  179. package/skills/06_fMRI_Neuroimaging/smri-skill/SKILL.md +302 -0
  180. package/skills/06_fMRI_Neuroimaging/tcp-skill/SKILL.md +204 -0
  181. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/extract_tcp_phenotype.py +139 -0
  182. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/tcp_qc_summary.py +111 -0
  183. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/validate_tcp.py +99 -0
  184. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/SKILL.md +217 -0
  185. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/extract_ucla_cnp_phenotype.py +145 -0
  186. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/ucla_cnp_qc_summary.py +111 -0
  187. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/validate_ucla_cnp.py +113 -0
  188. package/skills/06_fMRI_Neuroimaging/ukb-skill/SKILL.md +310 -0
  189. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/build_ukb_survival.py +210 -0
  190. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_cases.py +308 -0
  191. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_phenotype.py +232 -0
  192. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/ukb_qc_summary.py +158 -0
  193. package/skills/06_fMRI_Neuroimaging/wmh-segmentation/SKILL.md +133 -0
  194. package/skills/07_Computational_Modeling/detrending/SKILL.md +118 -0
  195. package/skills/07_Computational_Modeling/dictlearning/SKILL.md +122 -0
  196. package/skills/07_Computational_Modeling/filtering/SKILL.md +121 -0
  197. package/skills/07_Computational_Modeling/glm/SKILL.md +153 -0
  198. package/skills/07_Computational_Modeling/hierarchical/SKILL.md +121 -0
  199. package/skills/07_Computational_Modeling/ica/SKILL.md +122 -0
  200. package/skills/07_Computational_Modeling/kmeans/SKILL.md +119 -0
  201. package/skills/07_Computational_Modeling/run_models/SKILL.md +427 -0
  202. package/skills/07_Computational_Modeling/spacenet/SKILL.md +122 -0
  203. package/skills/07_Computational_Modeling/svm/SKILL.md +120 -0
  204. package/skills/08_Computational_Neuroscience/brain_gnn/SKILL.md +183 -0
  205. package/skills/08_Computational_Neuroscience/dipy-tool/SKILL.md +239 -0
  206. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/dti_metrics_reference.py +70 -0
  207. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/load_and_mask_reference.py +76 -0
  208. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/roi_stats_reference.py +59 -0
  209. package/skills/08_Computational_Neuroscience/fm_app/SKILL.md +195 -0
  210. package/skills/08_Computational_Neuroscience/neurostorm/SKILL.md +151 -0
  211. package/skills/13_Visualization/brain-visualization/SKILL.md +191 -0
  212. package/skills/13_Visualization/brain-visualization/scripts/connectome_reference.py +108 -0
  213. package/skills/13_Visualization/brain-visualization/scripts/freesurfer_ply_reference.py +54 -0
  214. package/skills/13_Visualization/brain-visualization/scripts/zalff_summary_reference.py +116 -0
  215. package/skills/13_Visualization/ethoclaw-paper-figure-layout/SKILL.md +78 -0
  216. package/skills/13_Visualization/ethoclaw-paper-figure-layout/assets/naturecomm_figures.tex +74 -0
  217. package/skills/13_Visualization/ethoclaw-paper-figure-layout/scripts/layout_results_foldered.py +579 -0
  218. package/skills/14_Writing/overleaf-skill/SKILL.md +184 -0
  219. package/skills/14_Writing/overleaf-skill/scripts/install.sh +30 -0
  220. package/skills/14_Writing/paper-writing/SKILL.md +146 -0
  221. package/skills/14_Writing/paper-writing/scripts/data_statement_templates.py +164 -0
  222. package/skills/14_Writing/paper-writing/scripts/figure_templates.py +315 -0
  223. package/skills/14_Writing/paper-writing/scripts/nature_figure_style.py +214 -0
  224. package/skills/14_Writing/paper-writing/scripts/section_phrasebank.py +246 -0
  225. package/skills/16_Animal_Behavior/deeplabcut/SKILL.md +154 -0
  226. package/skills/16_Animal_Behavior/deeplabcut/references/3d-pose.md +89 -0
  227. package/skills/16_Animal_Behavior/deeplabcut/references/maDLC.md +123 -0
  228. package/skills/16_Animal_Behavior/deeplabcut/references/modelzoo.md +98 -0
  229. package/skills/16_Animal_Behavior/deeplabcut/references/standard-pipeline.md +165 -0
  230. package/skills/16_Animal_Behavior/deeplabcut/references/utilities.md +146 -0
  231. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/SKILL.md +274 -0
  232. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.html +112 -0
  233. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.md +21 -0
  234. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/cluster-section.md +5 -0
  235. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/heatmap-section.md +5 -0
  236. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/integrated-interpretation.md +3 -0
  237. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/overview.md +3 -0
  238. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/project-summary.md +3 -0
  239. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/radar-section.md +5 -0
  240. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/raw-trajectory.md +3 -0
  241. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/sample-check.md +3 -0
  242. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/single-subject-section.md +3 -0
  243. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/stats-section.md +5 -0
  244. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/epm.md +52 -0
  245. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/fst.md +37 -0
  246. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/nor.md +39 -0
  247. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/oft.md +43 -0
  248. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tcst.md +45 -0
  249. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tst.md +36 -0
  250. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/input-types.md +59 -0
  251. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/interpretation-guardrails.md +45 -0
  252. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/metadata-schema.md +57 -0
  253. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/report-sections.md +86 -0
  254. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/section-selection-rules.md +169 -0
  255. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/build_report_manifest.py +27 -0
  256. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/render_report.py +34 -0
  257. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/report_utils.py +1121 -0
  258. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/SKILL.md +390 -0
  259. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/reference_code.py +98 -0
  260. package/skills/16_Animal_Behavior/ethoclaw-animal-pose-estimation/SKILL.md +336 -0
  261. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/README.md +21 -0
  262. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/SKILL.md +41 -0
  263. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/batch_kinematic_generator.py +663 -0
  264. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/config.json +19 -0
  265. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/generate_kinematic_parameter.py +401 -0
  266. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/kinematic_generator.py +265 -0
  267. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/SKILL.md +72 -0
  268. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/references/config.example.toml +56 -0
  269. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params.py +232 -0
  270. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params_from_config.py +236 -0
  271. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/SKILL.md +68 -0
  272. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/references/notes.md +5 -0
  273. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/scripts/plot_h5_radar.py +513 -0
  274. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/SKILL.md +52 -0
  275. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/config.toml +81 -0
  276. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/references/stats-rule.md +18 -0
  277. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_inspect.py +79 -0
  278. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_batch.py +624 -0
  279. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_stats.py +438 -0
  280. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/SKILL.md +280 -0
  281. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_trajectory.py +790 -0
  282. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_velocity.py +855 -0
  283. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.csv +101 -0
  284. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.h5 +0 -0
  285. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_data_readme.md +126 -0
@@ -0,0 +1,258 @@
1
+ #!/usr/bin/env python3
2
+ """Generate per-subject QC summaries and exclusion lists for AOMIC.
3
+
4
+ Combines fMRIPrep confounds and FreeSurfer recon-all metrics to produce a
5
+ unified QC report with configurable exclusion criteria.
6
+ """
7
+ import argparse
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Dict, List, Tuple
11
+
12
+ try:
13
+ import pandas as pd
14
+ except ImportError:
15
+ print("Error: pandas is required. Install with: pip install pandas", file=sys.stderr)
16
+ sys.exit(1)
17
+
18
+
19
+ def detect_delimiter(file_path: Path) -> str:
20
+ """Detect whether file uses tab or comma delimiter."""
21
+ with open(file_path, "r", encoding="utf-8") as f:
22
+ first_line = f.readline()
23
+ if "\t" in first_line:
24
+ return "\t"
25
+ return ","
26
+
27
+
28
+ def collect_fmriprep_qc(fmriprep_dir: Path) -> Dict[str, Dict[str, float]]:
29
+ """Collect QC metrics from fMRIPrep outputs."""
30
+ qc = {}
31
+
32
+ confounds_patterns = [
33
+ "**/desc-confounds_timeseries.tsv",
34
+ "**/*_desc-confounds_timeseries.tsv",
35
+ "**/confounds.tsv",
36
+ ]
37
+
38
+ confounds_files = []
39
+ for pattern in confounds_patterns:
40
+ confounds_files.extend(fmriprep_dir.rglob(pattern))
41
+
42
+ for confounds_file in confounds_files:
43
+ subject_id = None
44
+ for part in confounds_file.parts:
45
+ if part.startswith("sub-"):
46
+ subject_id = part
47
+ break
48
+
49
+ if not subject_id:
50
+ continue
51
+
52
+ delimiter = detect_delimiter(confounds_file)
53
+ try:
54
+ df = pd.read_csv(confounds_file, sep=delimiter, low_memory=False)
55
+ except Exception:
56
+ continue
57
+
58
+ fd_col = None
59
+ for col_name in ["framewise_displacement", "fd", "FD"]:
60
+ if col_name in df.columns:
61
+ fd_col = col_name
62
+ break
63
+
64
+ metrics = {"n_volumes": len(df)}
65
+
66
+ if fd_col:
67
+ fd_values = pd.to_numeric(df[fd_col], errors="coerce").dropna()
68
+ if len(fd_values) > 0:
69
+ metrics["mean_fd"] = float(fd_values.mean())
70
+ metrics["max_fd"] = float(fd_values.max())
71
+ else:
72
+ metrics["mean_fd"] = 0.0
73
+ metrics["max_fd"] = 0.0
74
+
75
+ if subject_id in qc:
76
+ existing = qc[subject_id]
77
+ existing["mean_fd"] = max(existing.get("mean_fd", 0), metrics.get("mean_fd", 0))
78
+ existing["max_fd"] = max(existing.get("max_fd", 0), metrics.get("max_fd", 0))
79
+ existing["n_volumes"] = existing.get("n_volumes", 0) + metrics.get("n_volumes", 0)
80
+ else:
81
+ qc[subject_id] = metrics
82
+
83
+ return qc
84
+
85
+
86
+ def collect_freesurfer_qc(freesurfer_dir: Path) -> Dict[str, Dict[str, float]]:
87
+ """Collect QC metrics from FreeSurfer recon-all outputs."""
88
+ qc = {}
89
+
90
+ for subject_dir in sorted(freesurfer_dir.rglob("sub-*")):
91
+ if not subject_dir.is_dir():
92
+ continue
93
+
94
+ subject_id = None
95
+ for part in subject_dir.parts:
96
+ if part.startswith("sub-"):
97
+ subject_id = part
98
+ break
99
+ if not subject_id:
100
+ subject_id = subject_dir.name
101
+
102
+ recon_log = None
103
+ matches = list(freesurfer_dir.rglob(f"{subject_id}/**/recon-all.log"))
104
+ if matches:
105
+ recon_log = matches[0]
106
+
107
+ completed = False
108
+ if recon_log and recon_log.exists():
109
+ try:
110
+ with open(recon_log, "r", encoding="utf-8", errors="ignore") as f:
111
+ content = f.read()
112
+ completed = "finished without error" in content.lower() or "recon-all -done" in content.lower()
113
+ except Exception:
114
+ pass
115
+
116
+ aseg_matches = list(freesurfer_dir.rglob(f"{subject_id}**/aseg.stats"))
117
+ total_brain_vol = None
118
+ etiv = None
119
+
120
+ if aseg_matches:
121
+ aseg_file = aseg_matches[0]
122
+ try:
123
+ with open(aseg_file, "r", encoding="utf-8", errors="ignore") as f:
124
+ for line in f:
125
+ if "EstimatedTotalIntraCranialVol" in line:
126
+ parts = line.split()
127
+ if len(parts) >= 4:
128
+ try:
129
+ etiv = float(parts[3])
130
+ except ValueError:
131
+ pass
132
+ elif "BrainSegVol" in line and "Not" not in line:
133
+ parts = line.split()
134
+ if len(parts) >= 4:
135
+ try:
136
+ total_brain_vol = float(parts[3])
137
+ except ValueError:
138
+ pass
139
+ except Exception:
140
+ pass
141
+
142
+ metrics = {"completed": completed}
143
+ if total_brain_vol is not None:
144
+ metrics["total_brain_volume"] = total_brain_vol
145
+ if etiv is not None:
146
+ metrics["estimated_total_intracranial_volume"] = etiv
147
+
148
+ qc[subject_id] = metrics
149
+
150
+ return qc
151
+
152
+
153
+ def generate_qc_summary(
154
+ fmriprep_qc: Dict[str, Dict[str, float]],
155
+ freesurfer_qc: Dict[str, Dict[str, float]],
156
+ fd_threshold: float = 0.3,
157
+ max_fd_threshold: float = 5.0,
158
+ ) -> Tuple["pd.DataFrame", List[str]]:
159
+ """Generate QC summary DataFrame and exclusion list."""
160
+ all_subjects = set()
161
+ all_subjects.update(fmriprep_qc.keys())
162
+ all_subjects.update(freesurfer_qc.keys())
163
+
164
+ rows = []
165
+ excluded = []
166
+
167
+ for sub_id in sorted(all_subjects):
168
+ row = {"subject_id": sub_id}
169
+ exclude_reasons = []
170
+
171
+ fp = fmriprep_qc.get(sub_id, {})
172
+ row["mean_fd"] = fp.get("mean_fd", None)
173
+ row["max_fd"] = fp.get("max_fd", None)
174
+ row["n_volumes"] = fp.get("n_volumes", None)
175
+
176
+ fs = freesurfer_qc.get(sub_id, {})
177
+ row["fs_completed"] = fs.get("completed", None)
178
+ row["total_brain_volume"] = fs.get("total_brain_volume", None)
179
+ row["etiv"] = fs.get("estimated_total_intracranial_volume", None)
180
+
181
+ if row["mean_fd"] is not None and row["mean_fd"] > fd_threshold:
182
+ exclude_reasons.append(f"mean_fd={row['mean_fd']:.3f}>{fd_threshold}")
183
+
184
+ if row["max_fd"] is not None and row["max_fd"] > max_fd_threshold:
185
+ exclude_reasons.append(f"max_fd={row['max_fd']:.3f}>{max_fd_threshold}")
186
+
187
+ if row["fs_completed"] is False:
188
+ exclude_reasons.append("FreeSurfer recon-all incomplete")
189
+
190
+ row["exclude"] = len(exclude_reasons) > 0
191
+ row["exclude_reasons"] = "; ".join(exclude_reasons) if exclude_reasons else ""
192
+
193
+ if exclude_reasons:
194
+ excluded.append(sub_id)
195
+
196
+ rows.append(row)
197
+
198
+ df = pd.DataFrame(rows)
199
+ return df, excluded
200
+
201
+
202
+ def main() -> int:
203
+ parser = argparse.ArgumentParser(
204
+ description="Generate per-subject QC summaries and exclusion lists for AOMIC."
205
+ )
206
+ parser.add_argument("--fmriprep-dir", help="Path to fMRIPrep output directory")
207
+ parser.add_argument("--freesurfer-dir", help="Path to FreeSurfer output directory")
208
+ parser.add_argument("--output", required=True, help="Output path for QC summary CSV")
209
+ parser.add_argument("--exclude-output", help="Output path for exclusion list CSV")
210
+ parser.add_argument("--fd-threshold", type=float, default=0.3, help="Mean FD threshold (default: 0.3)")
211
+ parser.add_argument("--max-fd-threshold", type=float, default=5.0, help="Max FD threshold (default: 5.0)")
212
+ args = parser.parse_args()
213
+
214
+ fmriprep_qc = {}
215
+ freesurfer_qc = {}
216
+
217
+ if args.fmriprep_dir:
218
+ fp_dir = Path(args.fmriprep_dir).resolve()
219
+ if fp_dir.exists():
220
+ print(f"Collecting fMRIPrep QC from {fp_dir}...")
221
+ fmriprep_qc = collect_fmriprep_qc(fp_dir)
222
+ print(f" Found {len(fmriprep_qc)} subjects")
223
+
224
+ if args.freesurfer_dir:
225
+ fs_dir = Path(args.freesurfer_dir).resolve()
226
+ if fs_dir.exists():
227
+ print(f"Collecting FreeSurfer QC from {fs_dir}...")
228
+ freesurfer_qc = collect_freesurfer_qc(fs_dir)
229
+ print(f" Found {len(freesurfer_qc)} subjects")
230
+
231
+ if not fmriprep_qc and not freesurfer_qc:
232
+ print("[ERROR] No QC data collected. Check input paths.", file=sys.stderr)
233
+ return 1
234
+
235
+ summary_df, excluded = generate_qc_summary(
236
+ fmriprep_qc=fmriprep_qc,
237
+ freesurfer_qc=freesurfer_qc,
238
+ fd_threshold=args.fd_threshold,
239
+ max_fd_threshold=args.max_fd_threshold,
240
+ )
241
+
242
+ output_path = Path(args.output).resolve()
243
+ output_path.parent.mkdir(parents=True, exist_ok=True)
244
+ summary_df.to_csv(output_path, index=False)
245
+ print(f"\nQC Summary: {len(summary_df)} subjects -> {output_path}")
246
+ print(f" Excluded: {len(excluded)} / {len(summary_df)} ({100*len(excluded)/max(len(summary_df),1):.1f}%)")
247
+
248
+ if args.exclude_output:
249
+ exclude_path = Path(args.exclude_output).resolve()
250
+ exclude_path.parent.mkdir(parents=True, exist_ok=True)
251
+ summary_df[summary_df["exclude"] == True][["subject_id", "exclude_reasons"]].to_csv(exclude_path, index=False)
252
+ print(f" Exclusion list: {exclude_path}")
253
+
254
+ return 0
255
+
256
+
257
+ if __name__ == "__main__":
258
+ sys.exit(main())
@@ -0,0 +1,284 @@
1
+ #!/usr/bin/env python3
2
+ """Extract and merge AOMIC phenotype tables for downstream analysis.
3
+
4
+ Reads AOMIC phenotype files (Big Five personality, Raven's progressive matrices,
5
+ demographics), selects columns, and optionally cross-references with imaging
6
+ subject lists.
7
+ """
8
+ import argparse
9
+ import sys
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Set
12
+
13
+ try:
14
+ import pandas as pd
15
+ except ImportError:
16
+ print("Error: pandas is required. Install with: pip install pandas", file=sys.stderr)
17
+ sys.exit(1)
18
+
19
+ # Default phenotype files to look for
20
+ DEFAULT_PHENOTYPE_FILES = [
21
+ "big_five.csv",
22
+ "big_five.tsv",
23
+ "personality.csv",
24
+ "personality.tsv",
25
+ "ravens.csv",
26
+ "ravens.tsv",
27
+ "ravens_progressive_matrices.csv",
28
+ "ravens_progressive_matrices.tsv",
29
+ "demographics.csv",
30
+ "demographics.tsv",
31
+ "participants.csv",
32
+ "participants.tsv",
33
+ "cognitive.csv",
34
+ "cognitive.tsv",
35
+ ]
36
+
37
+ # Standard column name mapping
38
+ COLUMN_MAP = {
39
+ "participant_id": "subject_id",
40
+ "subject": "subject_id",
41
+ "sub_id": "subject_id",
42
+ "openness": "openness",
43
+ "conscientiousness": "conscientiousness",
44
+ "extraversion": "extraversion",
45
+ "agreeableness": "agreeableness",
46
+ "neuroticism": "neuroticism",
47
+ "big5_o": "openness",
48
+ "big5_c": "conscientiousness",
49
+ "big5_e": "extraversion",
50
+ "big5_a": "agreeableness",
51
+ "big5_n": "neuroticism",
52
+ "ravens_score": "ravens_score",
53
+ "raven": "ravens_score",
54
+ "rpm": "ravens_score",
55
+ "age": "age",
56
+ "sex": "sex",
57
+ "gender": "sex",
58
+ "handedness": "handedness",
59
+ "education": "education",
60
+ }
61
+
62
+
63
+ def detect_delimiter(file_path: Path) -> str:
64
+ """Detect whether file uses tab or comma delimiter."""
65
+ with open(file_path, "r", encoding="utf-8") as f:
66
+ first_line = f.readline()
67
+ if "\t" in first_line:
68
+ return "\t"
69
+ return ","
70
+
71
+
72
+ def read_phenotype_file(file_path: Path) -> Optional["pd.DataFrame"]:
73
+ """Read a single AOMIC phenotype file."""
74
+ delimiter = detect_delimiter(file_path)
75
+ try:
76
+ df = pd.read_csv(file_path, sep=delimiter, low_memory=False)
77
+ return df
78
+ except Exception as e:
79
+ print(f"[WARN] Failed to read {file_path}: {e}", file=sys.stderr)
80
+ return None
81
+
82
+
83
+ def merge_phenotype_tables(
84
+ phenotype_dir: Path,
85
+ columns: Optional[List[str]] = None,
86
+ imaging_ids: Optional[Set[str]] = None,
87
+ drop_missing_threshold: float = 0.5,
88
+ ) -> "pd.DataFrame":
89
+ """Merge multiple AOMIC phenotype tables.
90
+
91
+ Args:
92
+ phenotype_dir: Directory containing phenotype CSV/TSV files.
93
+ columns: Specific columns to select (None = all).
94
+ imaging_ids: Set of subject IDs from imaging data to filter by.
95
+ drop_missing_threshold: Drop columns with > this fraction of missing values.
96
+
97
+ Returns:
98
+ Merged DataFrame.
99
+ """
100
+ phenotype_files = []
101
+ for f in sorted(phenotype_dir.iterdir()):
102
+ if f.is_file() and f.suffix in (".csv", ".tsv"):
103
+ phenotype_files.append(f)
104
+
105
+ if not phenotype_files:
106
+ for name in DEFAULT_PHENOTYPE_FILES:
107
+ candidate = phenotype_dir / name
108
+ if candidate.exists():
109
+ phenotype_files.append(candidate)
110
+
111
+ if not phenotype_files:
112
+ print(f"[ERROR] No phenotype files found in {phenotype_dir}", file=sys.stderr)
113
+ return pd.DataFrame()
114
+
115
+ print(f"Found {len(phenotype_files)} phenotype files:")
116
+ for f in phenotype_files:
117
+ print(f" - {f.name}")
118
+
119
+ dataframes = []
120
+ for f in phenotype_files:
121
+ df = read_phenotype_file(f)
122
+ if df is not None and len(df) > 0:
123
+ # Apply column name mapping
124
+ rename_map = {k: v for k, v in COLUMN_MAP.items() if k in df.columns}
125
+ if rename_map:
126
+ df = df.rename(columns=rename_map)
127
+ # Ensure subject_id column exists
128
+ if "subject_id" not in df.columns:
129
+ # Try first column as subject ID
130
+ first_col = df.columns[0]
131
+ if df[first_col].dtype == object or df[first_col].nunique() == len(df):
132
+ df = df.rename(columns={first_col: "subject_id"})
133
+ dataframes.append(df)
134
+
135
+ if not dataframes:
136
+ return pd.DataFrame()
137
+
138
+ # Find common columns
139
+ common_cols = set(dataframes[0].columns)
140
+ for df in dataframes[1:]:
141
+ common_cols &= set(df.columns)
142
+ common_cols = sorted(common_cols)
143
+ print(f"Common columns across all tables: {len(common_cols)}")
144
+
145
+ # Merge on subject_id
146
+ merge_keys = []
147
+ if "subject_id" in common_cols:
148
+ merge_keys.append("subject_id")
149
+
150
+ if not merge_keys:
151
+ print("[WARN] No merge key found (subject_id). Concatenating instead.")
152
+ merged = pd.concat(dataframes, ignore_index=True)
153
+ else:
154
+ merged = dataframes[0]
155
+ for df in dataframes[1:]:
156
+ new_cols = [c for c in df.columns if c not in merged.columns or c in merge_keys]
157
+ df_subset = df[new_cols]
158
+ merged = pd.merge(merged, df_subset, on=merge_keys, how="outer", suffixes=("", "_dup"))
159
+
160
+ # Drop duplicate columns
161
+ dup_cols = [c for c in merged.columns if c.endswith("_dup")]
162
+ if dup_cols:
163
+ merged = merged.drop(columns=dup_cols)
164
+
165
+ # Filter by imaging IDs
166
+ if imaging_ids and "subject_id" in merged.columns:
167
+ normalized_imaging = set()
168
+ for sid in imaging_ids:
169
+ clean = sid.replace("sub-", "")
170
+ normalized_imaging.add(clean)
171
+ normalized_imaging.add(sid)
172
+
173
+ before_count = len(merged)
174
+ mask = merged["subject_id"].apply(
175
+ lambda x: str(x).strip() in normalized_imaging
176
+ or f"sub-{str(x).strip()}" in normalized_imaging
177
+ )
178
+ merged = merged[mask]
179
+ print(f"Filtered to imaging subjects: {before_count} -> {len(merged)} rows")
180
+
181
+ # Select specific columns
182
+ if columns:
183
+ available = [c for c in columns if c in merged.columns]
184
+ missing = [c for c in columns if c not in merged.columns]
185
+ if missing:
186
+ print(f"[WARN] Columns not found (skipped): {missing}")
187
+ if available:
188
+ merged = merged[available]
189
+
190
+ # Drop columns with too many missing values
191
+ if drop_missing_threshold < 1.0:
192
+ missing_frac = merged.isnull().mean()
193
+ cols_to_drop = missing_frac[missing_frac > drop_missing_threshold].index.tolist()
194
+ if cols_to_drop:
195
+ print(f"Dropping {len(cols_to_drop)} columns with >{drop_missing_threshold*100}% missing")
196
+ merged = merged.drop(columns=cols_to_drop)
197
+
198
+ return merged
199
+
200
+
201
+ def load_imaging_ids(imaging_ids_file: Path) -> Set[str]:
202
+ """Load subject IDs from a BIDS participants.tsv or similar file."""
203
+ ids = set()
204
+ delimiter = detect_delimiter(imaging_ids_file)
205
+ try:
206
+ df = pd.read_csv(imaging_ids_file, sep=delimiter)
207
+ id_col = None
208
+ for col_name in ["participant_id", "subject_id", "sub_id"]:
209
+ if col_name in df.columns:
210
+ id_col = col_name
211
+ break
212
+ if id_col:
213
+ ids = set(df[id_col].astype(str).str.strip())
214
+ except Exception as e:
215
+ print(f"[WARN] Failed to read imaging IDs: {e}", file=sys.stderr)
216
+ return ids
217
+
218
+
219
+ def main() -> int:
220
+ parser = argparse.ArgumentParser(
221
+ description="Extract and merge AOMIC phenotype tables."
222
+ )
223
+ parser.add_argument(
224
+ "--phenotype-dir",
225
+ required=True,
226
+ help="Directory containing AOMIC phenotype CSV/TSV files",
227
+ )
228
+ parser.add_argument(
229
+ "--output",
230
+ required=True,
231
+ help="Output path for merged phenotype CSV",
232
+ )
233
+ parser.add_argument(
234
+ "--columns",
235
+ help="Comma-separated list of columns to select (default: all)",
236
+ )
237
+ parser.add_argument(
238
+ "--imaging-ids",
239
+ help="Path to BIDS participants.tsv or file with imaging subject IDs",
240
+ )
241
+ parser.add_argument(
242
+ "--missing-threshold",
243
+ type=float,
244
+ default=0.5,
245
+ help="Drop columns with more than this fraction of missing values (default: 0.5)",
246
+ )
247
+ args = parser.parse_args()
248
+
249
+ phenotype_dir = Path(args.phenotype_dir).resolve()
250
+ if not phenotype_dir.exists() or not phenotype_dir.is_dir():
251
+ print(f"Phenotype directory does not exist: {phenotype_dir}", file=sys.stderr)
252
+ return 1
253
+
254
+ columns = None
255
+ if args.columns:
256
+ columns = [c.strip() for c in args.columns.split(",")]
257
+
258
+ imaging_ids = None
259
+ if args.imaging_ids:
260
+ imaging_ids_path = Path(args.imaging_ids).resolve()
261
+ if imaging_ids_path.exists():
262
+ imaging_ids = load_imaging_ids(imaging_ids_path)
263
+ print(f"Loaded {len(imaging_ids)} imaging subject IDs")
264
+
265
+ merged = merge_phenotype_tables(
266
+ phenotype_dir=phenotype_dir,
267
+ columns=columns,
268
+ imaging_ids=imaging_ids,
269
+ drop_missing_threshold=args.missing_threshold,
270
+ )
271
+
272
+ if merged.empty:
273
+ print("[ERROR] No data after merging. Check input files.", file=sys.stderr)
274
+ return 1
275
+
276
+ output_path = Path(args.output).resolve()
277
+ output_path.parent.mkdir(parents=True, exist_ok=True)
278
+ merged.to_csv(output_path, index=False)
279
+ print(f"\nWrote {len(merged)} rows x {len(merged.columns)} columns to {output_path}")
280
+ return 0
281
+
282
+
283
+ if __name__ == "__main__":
284
+ sys.exit(main())