@brainpilot/skills 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/skills/01_Meta-Skills/academic-research-hub/SKILL.md +108 -0
- package/skills/01_Meta-Skills/academic-research-hub/scripts/requirements.txt +17 -0
- package/skills/01_Meta-Skills/academic-research-hub/scripts/research.py +781 -0
- package/skills/01_Meta-Skills/beautiful-log/SKILL.md +64 -0
- package/skills/01_Meta-Skills/beautiful-log/scripts/beautiful_log.py +274 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/SKILL.md +130 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/config.template.yaml +54 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/top5_digest_template.md +5 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/build_top5_digest.py +300 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/common.py +137 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/merge_results.py +106 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/run_pipeline.py +177 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_arxiv.py +162 -0
- package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_pubmed.py +202 -0
- package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/SKILL.md +173 -0
- package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/scripts/normalize_data.py +874 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/SKILL.md +134 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/confirmation-prompts.md +31 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/output-patterns.md +45 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_markdown_deliverables.py +41 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_research_log.py +84 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_summary_md.py +63 -0
- package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/extract_pdf_bundle.py +140 -0
- package/skills/01_Meta-Skills/experiment-controller/SKILL.md +140 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/SKILL.md +366 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/entity_resolution.py +120 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/extraction_prompt_template.txt +19 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/graph_query.py +106 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/hypothesis_cli_reference.py +42 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/new_data_source_template.py +116 -0
- package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/requirements.txt +15 -0
- package/skills/01_Meta-Skills/method-design/SKILL.md +61 -0
- package/skills/01_Meta-Skills/multi-search-engine/SKILL.md +119 -0
- package/skills/01_Meta-Skills/research-idea/SKILL.md +65 -0
- package/skills/05_EEG_ERP/eeg-skill/SKILL.md +197 -0
- package/skills/05_EEG_ERP/meg-skill/SKILL.md +188 -0
- package/skills/05_EEG_ERP/meg-skill/scripts/time_frequency.py +223 -0
- package/skills/05_EEG_ERP/mne-eeg-tool/SKILL.md +165 -0
- package/skills/05_EEG_ERP/mne-eeg-tool/scripts/eeg_pipeline_reference.py +231 -0
- package/skills/05_EEG_ERP/seed-iv-skill/SKILL.md +184 -0
- package/skills/05_EEG_ERP/seed-iv-skill/scripts/classify_seed_iv.py +154 -0
- package/skills/05_EEG_ERP/seed-iv-skill/scripts/extract_seed_iv_features.py +190 -0
- package/skills/05_EEG_ERP/seed-iv-skill/scripts/validate_seed_iv.py +102 -0
- package/skills/05_EEG_ERP/seed-vig-skill/SKILL.md +182 -0
- package/skills/05_EEG_ERP/seed-vig-skill/scripts/classify_seed_vig.py +165 -0
- package/skills/05_EEG_ERP/seed-vig-skill/scripts/extract_seed_vig_features.py +185 -0
- package/skills/05_EEG_ERP/seed-vig-skill/scripts/validate_seed_vig.py +88 -0
- package/skills/06_fMRI_Neuroimaging/abcd-skill/SKILL.md +308 -0
- package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/abcd_qc_summary.py +449 -0
- package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/extract_abcd_phenotype.py +292 -0
- package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/reorganize_abcd.py +387 -0
- package/skills/06_fMRI_Neuroimaging/abide-skill/SKILL.md +302 -0
- package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/abide_qc_summary.py +317 -0
- package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/extract_abide_phenotype.py +267 -0
- package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/reorganize_abide.py +387 -0
- package/skills/06_fMRI_Neuroimaging/adhd200-skill/SKILL.md +244 -0
- package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/adhd200_qc_summary.py +98 -0
- package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/extract_adhd200_phenotype.py +134 -0
- package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/reorganize_adhd200.py +206 -0
- package/skills/06_fMRI_Neuroimaging/adni-skill/SKILL.md +358 -0
- package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_adni_task_files.py +1305 -0
- package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_vqa_from_tasks.py +766 -0
- package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/reorganize_adni.py +491 -0
- package/skills/06_fMRI_Neuroimaging/aibl-skill/SKILL.md +295 -0
- package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/aibl_qc_summary.py +260 -0
- package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/extract_aibl_phenotype.py +365 -0
- package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/reorganize_aibl.py +394 -0
- package/skills/06_fMRI_Neuroimaging/aomic-skill/SKILL.md +292 -0
- package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/aomic_qc_summary.py +258 -0
- package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/extract_aomic_phenotype.py +284 -0
- package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/reorganize_aomic.py +322 -0
- package/skills/06_fMRI_Neuroimaging/asl-skill/SKILL.md +168 -0
- package/skills/06_fMRI_Neuroimaging/asl-skill/scripts/compute_cbf.py +224 -0
- package/skills/06_fMRI_Neuroimaging/bids-organizer/SKILL.md +241 -0
- package/skills/06_fMRI_Neuroimaging/bold5000-skill/SKILL.md +186 -0
- package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/bold5000_qc_summary.py +96 -0
- package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/extract_bold5000_stimulus.py +125 -0
- package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/reorganize_bold5000.py +102 -0
- package/skills/06_fMRI_Neuroimaging/camcan-skill/SKILL.md +213 -0
- package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/camcan_qc_summary.py +131 -0
- package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/extract_camcan_phenotype.py +145 -0
- package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/validate_camcan.py +141 -0
- package/skills/06_fMRI_Neuroimaging/cobre-skill/SKILL.md +201 -0
- package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/cobre_qc_summary.py +95 -0
- package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/extract_cobre_phenotype.py +104 -0
- package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/reorganize_cobre.py +140 -0
- package/skills/06_fMRI_Neuroimaging/conn-tool/SKILL.md +180 -0
- package/skills/06_fMRI_Neuroimaging/dcm2nii/SKILL.md +189 -0
- package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/SKILL.md +183 -0
- package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/dmt_har_med_qc_summary.py +96 -0
- package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/extract_dmt_har_med_phenotype.py +121 -0
- package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/reorganize_dmt_har_med.py +125 -0
- package/skills/06_fMRI_Neuroimaging/dwi-skill/SKILL.md +359 -0
- package/skills/06_fMRI_Neuroimaging/fmri-skill/SKILL.md +371 -0
- package/skills/06_fMRI_Neuroimaging/fmriprep-tool/SKILL.md +228 -0
- package/skills/06_fMRI_Neuroimaging/freesurfer-tool/SKILL.md +286 -0
- package/skills/06_fMRI_Neuroimaging/freesurfer-tool/scripts/freesurfer_processor.py +145 -0
- package/skills/06_fMRI_Neuroimaging/fsl-tool/SKILL.md +208 -0
- package/skills/06_fMRI_Neuroimaging/hbn-skill/SKILL.md +271 -0
- package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/extract_hbn_phenotype.py +107 -0
- package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/hbn_qc_summary.py +96 -0
- package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/reorganize_hbn.py +150 -0
- package/skills/06_fMRI_Neuroimaging/hcpa-skill/SKILL.md +210 -0
- package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/extract_hcpa_phenotype.py +146 -0
- package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/hcpa_qc_summary.py +120 -0
- package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/reorganize_hcpa.py +155 -0
- package/skills/06_fMRI_Neuroimaging/hcpd-skill/SKILL.md +210 -0
- package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/extract_hcpd_phenotype.py +148 -0
- package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/hcpd_qc_summary.py +125 -0
- package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/reorganize_hcpd.py +146 -0
- package/skills/06_fMRI_Neuroimaging/hcpep-skill/SKILL.md +215 -0
- package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/extract_hcpep_phenotype.py +157 -0
- package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/hcpep_qc_summary.py +143 -0
- package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/reorganize_hcpep.py +146 -0
- package/skills/06_fMRI_Neuroimaging/hcppipeline-tool/SKILL.md +217 -0
- package/skills/06_fMRI_Neuroimaging/hcpya-skill/SKILL.md +214 -0
- package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/extract_hcpya_phenotype.py +190 -0
- package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/hcpya_qc_summary.py +152 -0
- package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/reorganize_hcpya.py +203 -0
- package/skills/06_fMRI_Neuroimaging/ixi-skill/SKILL.md +198 -0
- package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/ixi_qc_summary.py +137 -0
- package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/reorganize_ixi.py +190 -0
- package/skills/06_fMRI_Neuroimaging/mnd-skill/SKILL.md +191 -0
- package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/extract_mnd_phenotype.py +143 -0
- package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/mnd_qc_summary.py +120 -0
- package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/validate_mnd.py +107 -0
- package/skills/06_fMRI_Neuroimaging/mschallenge-skill/SKILL.md +203 -0
- package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/analyze_lesions.py +119 -0
- package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/longitudinal_lesion.py +148 -0
- package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/mschallenge_qc_summary.py +132 -0
- package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/validate_mschallenge.py +116 -0
- package/skills/06_fMRI_Neuroimaging/nibabel-skill/SKILL.md +184 -0
- package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/atlas_coordinate_reference.py +61 -0
- package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/freesurfer_io_reference.py +34 -0
- package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/nifti_inspection_reference.py +35 -0
- package/skills/06_fMRI_Neuroimaging/nifd-skill/SKILL.md +205 -0
- package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/extract_nifd_phenotype.py +132 -0
- package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/nifd_qc_summary.py +111 -0
- package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/validate_nifd.py +111 -0
- package/skills/06_fMRI_Neuroimaging/nii2dcm/SKILL.md +143 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/SKILL.md +266 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/connectome_reference.py +65 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/denoise_timeseries_reference.py +58 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/hierarchical_parcellation_reference.py +53 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/kmeans_parcellation_reference.py +53 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/preprocess_bold_reference.py +76 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_dictlearning_reference.py +56 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_ica_reference.py +59 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/second_level_glm_reference.py +58 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/spacenet_classifier_reference.py +59 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/svm_classifier_reference.py +60 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/task_glm_reference.py +63 -0
- package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/zalff_summary_reference.py +109 -0
- package/skills/06_fMRI_Neuroimaging/nsd-skill/SKILL.md +210 -0
- package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/extract_nsd_stimulus.py +171 -0
- package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/nsd_qc_summary.py +142 -0
- package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/validate_nsd.py +142 -0
- package/skills/06_fMRI_Neuroimaging/oasis-skill/SKILL.md +205 -0
- package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/extract_oasis_phenotype.py +126 -0
- package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/oasis_qc_summary.py +115 -0
- package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/validate_oasis.py +119 -0
- package/skills/06_fMRI_Neuroimaging/pet-skill/SKILL.md +173 -0
- package/skills/06_fMRI_Neuroimaging/pet-skill/scripts/compute_suvr.py +202 -0
- package/skills/06_fMRI_Neuroimaging/pnc-skill/SKILL.md +206 -0
- package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/extract_pnc_phenotype.py +136 -0
- package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/pnc_qc_summary.py +116 -0
- package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/validate_pnc.py +120 -0
- package/skills/06_fMRI_Neuroimaging/ppmi-skill/SKILL.md +209 -0
- package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/extract_ppmi_phenotype.py +138 -0
- package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/ppmi_qc_summary.py +111 -0
- package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/validate_ppmi.py +117 -0
- package/skills/06_fMRI_Neuroimaging/qsiprep-tool/SKILL.md +320 -0
- package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/SKILL.md +215 -0
- package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/extract_rest_mdd_phenotype.py +132 -0
- package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/harmonize_sites.py +152 -0
- package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/rest_mdd_qc_summary.py +124 -0
- package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/validate_rest_mdd.py +103 -0
- package/skills/06_fMRI_Neuroimaging/smri-skill/SKILL.md +302 -0
- package/skills/06_fMRI_Neuroimaging/tcp-skill/SKILL.md +204 -0
- package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/extract_tcp_phenotype.py +139 -0
- package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/tcp_qc_summary.py +111 -0
- package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/validate_tcp.py +99 -0
- package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/SKILL.md +217 -0
- package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/extract_ucla_cnp_phenotype.py +145 -0
- package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/ucla_cnp_qc_summary.py +111 -0
- package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/validate_ucla_cnp.py +113 -0
- package/skills/06_fMRI_Neuroimaging/ukb-skill/SKILL.md +310 -0
- package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/build_ukb_survival.py +210 -0
- package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_cases.py +308 -0
- package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_phenotype.py +232 -0
- package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/ukb_qc_summary.py +158 -0
- package/skills/06_fMRI_Neuroimaging/wmh-segmentation/SKILL.md +133 -0
- package/skills/07_Computational_Modeling/detrending/SKILL.md +118 -0
- package/skills/07_Computational_Modeling/dictlearning/SKILL.md +122 -0
- package/skills/07_Computational_Modeling/filtering/SKILL.md +121 -0
- package/skills/07_Computational_Modeling/glm/SKILL.md +153 -0
- package/skills/07_Computational_Modeling/hierarchical/SKILL.md +121 -0
- package/skills/07_Computational_Modeling/ica/SKILL.md +122 -0
- package/skills/07_Computational_Modeling/kmeans/SKILL.md +119 -0
- package/skills/07_Computational_Modeling/run_models/SKILL.md +427 -0
- package/skills/07_Computational_Modeling/spacenet/SKILL.md +122 -0
- package/skills/07_Computational_Modeling/svm/SKILL.md +120 -0
- package/skills/08_Computational_Neuroscience/brain_gnn/SKILL.md +183 -0
- package/skills/08_Computational_Neuroscience/dipy-tool/SKILL.md +239 -0
- package/skills/08_Computational_Neuroscience/dipy-tool/scripts/dti_metrics_reference.py +70 -0
- package/skills/08_Computational_Neuroscience/dipy-tool/scripts/load_and_mask_reference.py +76 -0
- package/skills/08_Computational_Neuroscience/dipy-tool/scripts/roi_stats_reference.py +59 -0
- package/skills/08_Computational_Neuroscience/fm_app/SKILL.md +195 -0
- package/skills/08_Computational_Neuroscience/neurostorm/SKILL.md +151 -0
- package/skills/13_Visualization/brain-visualization/SKILL.md +191 -0
- package/skills/13_Visualization/brain-visualization/scripts/connectome_reference.py +108 -0
- package/skills/13_Visualization/brain-visualization/scripts/freesurfer_ply_reference.py +54 -0
- package/skills/13_Visualization/brain-visualization/scripts/zalff_summary_reference.py +116 -0
- package/skills/13_Visualization/ethoclaw-paper-figure-layout/SKILL.md +78 -0
- package/skills/13_Visualization/ethoclaw-paper-figure-layout/assets/naturecomm_figures.tex +74 -0
- package/skills/13_Visualization/ethoclaw-paper-figure-layout/scripts/layout_results_foldered.py +579 -0
- package/skills/14_Writing/overleaf-skill/SKILL.md +184 -0
- package/skills/14_Writing/overleaf-skill/scripts/install.sh +30 -0
- package/skills/14_Writing/paper-writing/SKILL.md +146 -0
- package/skills/14_Writing/paper-writing/scripts/data_statement_templates.py +164 -0
- package/skills/14_Writing/paper-writing/scripts/figure_templates.py +315 -0
- package/skills/14_Writing/paper-writing/scripts/nature_figure_style.py +214 -0
- package/skills/14_Writing/paper-writing/scripts/section_phrasebank.py +246 -0
- package/skills/16_Animal_Behavior/deeplabcut/SKILL.md +154 -0
- package/skills/16_Animal_Behavior/deeplabcut/references/3d-pose.md +89 -0
- package/skills/16_Animal_Behavior/deeplabcut/references/maDLC.md +123 -0
- package/skills/16_Animal_Behavior/deeplabcut/references/modelzoo.md +98 -0
- package/skills/16_Animal_Behavior/deeplabcut/references/standard-pipeline.md +165 -0
- package/skills/16_Animal_Behavior/deeplabcut/references/utilities.md +146 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/SKILL.md +274 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.html +112 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.md +21 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/cluster-section.md +5 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/heatmap-section.md +5 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/integrated-interpretation.md +3 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/overview.md +3 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/project-summary.md +3 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/radar-section.md +5 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/raw-trajectory.md +3 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/sample-check.md +3 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/single-subject-section.md +3 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/stats-section.md +5 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/epm.md +52 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/fst.md +37 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/nor.md +39 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/oft.md +43 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tcst.md +45 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tst.md +36 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/input-types.md +59 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/interpretation-guardrails.md +45 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/metadata-schema.md +57 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/report-sections.md +86 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/section-selection-rules.md +169 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/build_report_manifest.py +27 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/render_report.py +34 -0
- package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/report_utils.py +1121 -0
- package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/SKILL.md +390 -0
- package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/reference_code.py +98 -0
- package/skills/16_Animal_Behavior/ethoclaw-animal-pose-estimation/SKILL.md +336 -0
- package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/README.md +21 -0
- package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/SKILL.md +41 -0
- package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/batch_kinematic_generator.py +663 -0
- package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/config.json +19 -0
- package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/generate_kinematic_parameter.py +401 -0
- package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/kinematic_generator.py +265 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/SKILL.md +72 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/references/config.example.toml +56 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params.py +232 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params_from_config.py +236 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/SKILL.md +68 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/references/notes.md +5 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/scripts/plot_h5_radar.py +513 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/SKILL.md +52 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/config.toml +81 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/references/stats-rule.md +18 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_inspect.py +79 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_batch.py +624 -0
- package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_stats.py +438 -0
- package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/SKILL.md +280 -0
- package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_trajectory.py +790 -0
- package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_velocity.py +855 -0
- package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.csv +101 -0
- package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.h5 +0 -0
- package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_data_readme.md +126 -0
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Build survival analysis datasets from UK Biobank data.
|
|
3
|
+
|
|
4
|
+
Adapted from UKBAnalytica_v2 survival.R (Nan He, Southern Medical University).
|
|
5
|
+
Computes follow-up time, event status, and handles prevalent/incident case separation.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python build_ukb_survival.py --input ukb_raw.csv --disease dementia --output survival.csv
|
|
9
|
+
python build_ukb_survival.py --input ukb_raw.csv --disease stroke --censor-date 2023-10-31 --output stroke_survival.csv
|
|
10
|
+
"""
|
|
11
|
+
import argparse
|
|
12
|
+
import re
|
|
13
|
+
import sys
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
# Import disease definitions from extract_ukb_cases
|
|
21
|
+
DISEASE_DEFINITIONS = {
|
|
22
|
+
"dementia": {"icd10_pattern": "F0[0-9]", "self_report_codes": [1263]},
|
|
23
|
+
"alzheimers": {"icd10_pattern": "F00|G30", "self_report_codes": [1263]},
|
|
24
|
+
"stroke": {"icd10_pattern": "I6[0-4]", "self_report_codes": [1081, 1491, 1583]},
|
|
25
|
+
"ischaemic_stroke": {"icd10_pattern": "I63", "self_report_codes": []},
|
|
26
|
+
"parkinsons": {"icd10_pattern": "G20", "self_report_codes": [1254]},
|
|
27
|
+
"multiple_sclerosis": {"icd10_pattern": "G35", "self_report_codes": [1258]},
|
|
28
|
+
"epilepsy": {"icd10_pattern": "G4[0-1]", "self_report_codes": [1262]},
|
|
29
|
+
"depression": {"icd10_pattern": "F3[2-3]", "self_report_codes": [1286, 1530]},
|
|
30
|
+
"anxiety": {"icd10_pattern": "F4[0-1]", "self_report_codes": [1287, 1531]},
|
|
31
|
+
"schizophrenia": {"icd10_pattern": "F20", "self_report_codes": [1289]},
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def parse_dates(series: pd.Series) -> pd.Series:
|
|
36
|
+
"""Parse UKB date strings to datetime."""
|
|
37
|
+
return pd.to_datetime(series, errors="coerce", format="mixed")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def find_first_diagnosis_date(
|
|
41
|
+
row: pd.Series, pattern: str, date_cols: List[str], code_cols: List[str]
|
|
42
|
+
) -> Optional[datetime]:
|
|
43
|
+
"""Find the earliest diagnosis date matching ICD pattern for one subject."""
|
|
44
|
+
earliest = None
|
|
45
|
+
|
|
46
|
+
for code_col, date_col in zip(code_cols, date_cols):
|
|
47
|
+
codes_raw = row.get(code_col, "")
|
|
48
|
+
dates_raw = row.get(date_col, "")
|
|
49
|
+
|
|
50
|
+
if pd.isna(codes_raw) or str(codes_raw).strip() == "":
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
codes = re.findall(r"[A-Z][0-9]{2,3}", str(codes_raw).upper())
|
|
54
|
+
|
|
55
|
+
# Parse corresponding dates
|
|
56
|
+
if pd.notna(dates_raw):
|
|
57
|
+
try:
|
|
58
|
+
dates = re.findall(r"\d{4}-\d{2}-\d{2}", str(dates_raw))
|
|
59
|
+
except Exception:
|
|
60
|
+
dates = []
|
|
61
|
+
else:
|
|
62
|
+
dates = []
|
|
63
|
+
|
|
64
|
+
for i, code in enumerate(codes):
|
|
65
|
+
if re.match(pattern, code):
|
|
66
|
+
if i < len(dates):
|
|
67
|
+
try:
|
|
68
|
+
dt = datetime.strptime(dates[i], "%Y-%m-%d")
|
|
69
|
+
if earliest is None or dt < earliest:
|
|
70
|
+
earliest = dt
|
|
71
|
+
except ValueError:
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
return earliest
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def build_survival_dataset(
|
|
78
|
+
df: pd.DataFrame,
|
|
79
|
+
disease_key: str,
|
|
80
|
+
censor_date: str = "2023-10-31",
|
|
81
|
+
baseline_col: str = "p53_i0",
|
|
82
|
+
) -> pd.DataFrame:
|
|
83
|
+
"""Build survival dataset with prevalent/incident case separation."""
|
|
84
|
+
disease_def = DISEASE_DEFINITIONS.get(disease_key)
|
|
85
|
+
if disease_def is None:
|
|
86
|
+
raise ValueError(f"Unknown disease: {disease_key}")
|
|
87
|
+
|
|
88
|
+
pattern = disease_def["icd10_pattern"]
|
|
89
|
+
censor_dt = datetime.strptime(censor_date, "%Y-%m-%d")
|
|
90
|
+
|
|
91
|
+
# Find baseline date column
|
|
92
|
+
baseline_date_col = None
|
|
93
|
+
for col in [baseline_col, "p53_i0", "p53"]:
|
|
94
|
+
if col in df.columns:
|
|
95
|
+
baseline_date_col = col
|
|
96
|
+
break
|
|
97
|
+
|
|
98
|
+
# Find ICD-10 code and date columns
|
|
99
|
+
code_cols = [c for c in df.columns if c.startswith("p41270")]
|
|
100
|
+
date_cols = [c for c in df.columns if c.startswith("p41280")]
|
|
101
|
+
|
|
102
|
+
# Find death date column
|
|
103
|
+
death_date_col = None
|
|
104
|
+
for col in ["p40000_i0", "p40000"]:
|
|
105
|
+
if col in df.columns:
|
|
106
|
+
death_date_col = col
|
|
107
|
+
break
|
|
108
|
+
|
|
109
|
+
records = []
|
|
110
|
+
n_prevalent = 0
|
|
111
|
+
n_incident = 0
|
|
112
|
+
n_censored = 0
|
|
113
|
+
|
|
114
|
+
for _, row in df.iterrows():
|
|
115
|
+
eid = row["eid"]
|
|
116
|
+
|
|
117
|
+
# Get baseline date
|
|
118
|
+
baseline_dt = None
|
|
119
|
+
if baseline_date_col:
|
|
120
|
+
baseline_dt = pd.to_datetime(row.get(baseline_date_col), errors="coerce")
|
|
121
|
+
|
|
122
|
+
# Find first diagnosis date from ICD-10
|
|
123
|
+
diag_dt = find_first_diagnosis_date(row, pattern, date_cols, code_cols)
|
|
124
|
+
|
|
125
|
+
# Get death date
|
|
126
|
+
death_dt = None
|
|
127
|
+
if death_date_col:
|
|
128
|
+
death_dt = pd.to_datetime(row.get(death_date_col), errors="coerce")
|
|
129
|
+
|
|
130
|
+
# Classify case status
|
|
131
|
+
outcome_status = None
|
|
132
|
+
follow_up_years = None
|
|
133
|
+
|
|
134
|
+
if baseline_dt is None:
|
|
135
|
+
# No baseline date, skip
|
|
136
|
+
outcome_status = pd.NA
|
|
137
|
+
follow_up_years = pd.NA
|
|
138
|
+
elif diag_dt is not None and diag_dt <= baseline_dt:
|
|
139
|
+
# Prevalent case: diagnosis before or at baseline
|
|
140
|
+
outcome_status = pd.NA # Not at risk
|
|
141
|
+
follow_up_years = pd.NA
|
|
142
|
+
n_prevalent += 1
|
|
143
|
+
elif diag_dt is not None and diag_dt > baseline_dt:
|
|
144
|
+
# Incident case
|
|
145
|
+
outcome_status = 1
|
|
146
|
+
follow_up_years = (diag_dt - baseline_dt).days / 365.25
|
|
147
|
+
n_incident += 1
|
|
148
|
+
else:
|
|
149
|
+
# Censored: no diagnosis
|
|
150
|
+
outcome_status = 0
|
|
151
|
+
end_dt = min(
|
|
152
|
+
d for d in [death_dt, censor_dt] if d is not None
|
|
153
|
+
) if death_dt is not None else censor_dt
|
|
154
|
+
follow_up_years = (end_dt - baseline_dt).days / 365.25
|
|
155
|
+
n_censored += 1
|
|
156
|
+
|
|
157
|
+
records.append({
|
|
158
|
+
"eid": eid,
|
|
159
|
+
f"{disease_key}_prevalent": 1 if (diag_dt is not None and baseline_dt is not None and diag_dt <= baseline_dt) else 0,
|
|
160
|
+
f"{disease_key}_incident": 1 if (diag_dt is not None and baseline_dt is not None and diag_dt > baseline_dt) else 0,
|
|
161
|
+
"outcome_status": outcome_status,
|
|
162
|
+
"survival_years": follow_up_years,
|
|
163
|
+
})
|
|
164
|
+
|
|
165
|
+
result = pd.DataFrame(records)
|
|
166
|
+
|
|
167
|
+
# Summary
|
|
168
|
+
total = len(df)
|
|
169
|
+
print(f"\nSurvival dataset: {disease_key}")
|
|
170
|
+
print(f" Total subjects: {total}")
|
|
171
|
+
print(f" Prevalent cases: {n_prevalent} ({n_prevalent/total*100:.1f}%)")
|
|
172
|
+
print(f" Incident cases: {n_incident} ({n_incident/total*100:.1f}%)")
|
|
173
|
+
print(f" Censored: {n_censored} ({n_censored/total*100:.1f}%)")
|
|
174
|
+
print(f" At-risk for analysis: {n_incident + n_censored}")
|
|
175
|
+
|
|
176
|
+
return result
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def main() -> int:
|
|
180
|
+
parser = argparse.ArgumentParser(description="Build UKB survival dataset.")
|
|
181
|
+
parser.add_argument("--input", required=True, help="Path to UKB raw CSV")
|
|
182
|
+
parser.add_argument("--output", required=True, help="Output path for survival CSV")
|
|
183
|
+
parser.add_argument("--disease", required=True,
|
|
184
|
+
help=f"Disease key. Available: {list(DISEASE_DEFINITIONS.keys())}")
|
|
185
|
+
parser.add_argument("--censor-date", default="2023-10-31",
|
|
186
|
+
help="Administrative censoring date (default: 2023-10-31)")
|
|
187
|
+
parser.add_argument("--baseline-col", default="p53_i0",
|
|
188
|
+
help="Column name for baseline assessment date")
|
|
189
|
+
args = parser.parse_args()
|
|
190
|
+
|
|
191
|
+
input_path = Path(args.input).resolve()
|
|
192
|
+
if not input_path.exists():
|
|
193
|
+
print(f"Input file not found: {input_path}", file=sys.stderr)
|
|
194
|
+
return 1
|
|
195
|
+
|
|
196
|
+
df = pd.read_csv(input_path, low_memory=False)
|
|
197
|
+
print(f"Loaded {len(df)} subjects")
|
|
198
|
+
|
|
199
|
+
result = build_survival_dataset(df, args.disease, args.censor_date, args.baseline_col)
|
|
200
|
+
|
|
201
|
+
output_path = Path(args.output).resolve()
|
|
202
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
203
|
+
result.to_csv(output_path, index=False)
|
|
204
|
+
print(f"Saved -> {output_path}")
|
|
205
|
+
|
|
206
|
+
return 0
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
if __name__ == "__main__":
|
|
210
|
+
sys.exit(main())
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Extract disease cases from UK Biobank data using ICD-10/ICD-9 codes.
|
|
3
|
+
|
|
4
|
+
Adapted from UKBAnalytica_v2 case_extraction.R and ICD_diagnose.R.
|
|
5
|
+
Supports brain-related disease endpoints: dementia, stroke, Parkinson's, etc.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python extract_ukb_cases.py --input ukb_raw.csv --disease dementia --output cases.csv
|
|
9
|
+
python extract_ukb_cases.py --input ukb_raw.csv --disease stroke --sources ICD10,Self-report --output stroke_cases.csv
|
|
10
|
+
python extract_ukb_cases.py --input ukb_raw.csv --custom-icd G30 --output custom_cases.csv
|
|
11
|
+
"""
|
|
12
|
+
import argparse
|
|
13
|
+
import csv
|
|
14
|
+
import re
|
|
15
|
+
import sys
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Dict, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
import pandas as pd
|
|
20
|
+
|
|
21
|
+
# Predefined brain-related disease definitions (ICD-10 patterns)
|
|
22
|
+
DISEASE_DEFINITIONS = {
|
|
23
|
+
"dementia": {
|
|
24
|
+
"description": "All-cause dementia",
|
|
25
|
+
"icd10_pattern": "F0[0-9]",
|
|
26
|
+
"icd10_codes": ["F00", "F01", "F02", "F03", "F09"],
|
|
27
|
+
"self_report_codes": [1263], # UKB self-report code for dementia
|
|
28
|
+
},
|
|
29
|
+
"alzheimers": {
|
|
30
|
+
"description": "Alzheimer's disease",
|
|
31
|
+
"icd10_pattern": "F00|G30",
|
|
32
|
+
"icd10_codes": ["F00", "F000", "F001", "F002", "F009", "G30", "G300", "G301", "G308", "G309"],
|
|
33
|
+
"self_report_codes": [1263],
|
|
34
|
+
},
|
|
35
|
+
"vascular_dementia": {
|
|
36
|
+
"description": "Vascular dementia",
|
|
37
|
+
"icd10_pattern": "F01",
|
|
38
|
+
"icd10_codes": ["F010", "F011", "F012", "F013", "F018", "F019"],
|
|
39
|
+
"self_report_codes": [],
|
|
40
|
+
},
|
|
41
|
+
"stroke": {
|
|
42
|
+
"description": "Stroke (ischaemic + haemorrhagic)",
|
|
43
|
+
"icd10_pattern": "I6[0-4]",
|
|
44
|
+
"icd10_codes": ["I60", "I61", "I62", "I63", "I64"],
|
|
45
|
+
"self_report_codes": [1081, 1491, 1583],
|
|
46
|
+
},
|
|
47
|
+
"ischaemic_stroke": {
|
|
48
|
+
"description": "Ischaemic stroke",
|
|
49
|
+
"icd10_pattern": "I63",
|
|
50
|
+
"icd10_codes": ["I630", "I631", "I632", "I633", "I634", "I635", "I636", "I638", "I639"],
|
|
51
|
+
"self_report_codes": [],
|
|
52
|
+
},
|
|
53
|
+
"haemorrhagic_stroke": {
|
|
54
|
+
"description": "Haemorrhagic stroke",
|
|
55
|
+
"icd10_pattern": "I6[0-2]",
|
|
56
|
+
"icd10_codes": ["I60", "I61", "I62"],
|
|
57
|
+
"self_report_codes": [],
|
|
58
|
+
},
|
|
59
|
+
"parkinsons": {
|
|
60
|
+
"description": "Parkinson's disease",
|
|
61
|
+
"icd10_pattern": "G20",
|
|
62
|
+
"icd10_codes": ["G20"],
|
|
63
|
+
"self_report_codes": [1254],
|
|
64
|
+
},
|
|
65
|
+
"multiple_sclerosis": {
|
|
66
|
+
"description": "Multiple sclerosis",
|
|
67
|
+
"icd10_pattern": "G35",
|
|
68
|
+
"icd10_codes": ["G35"],
|
|
69
|
+
"self_report_codes": [1258],
|
|
70
|
+
},
|
|
71
|
+
"epilepsy": {
|
|
72
|
+
"description": "Epilepsy",
|
|
73
|
+
"icd10_pattern": "G4[0-1]",
|
|
74
|
+
"icd10_codes": ["G40", "G41"],
|
|
75
|
+
"self_report_codes": [1262],
|
|
76
|
+
},
|
|
77
|
+
"migraine": {
|
|
78
|
+
"description": "Migraine",
|
|
79
|
+
"icd10_pattern": "G43",
|
|
80
|
+
"icd10_codes": ["G430", "G431", "G432", "G433", "G438", "G439"],
|
|
81
|
+
"self_report_codes": [1265],
|
|
82
|
+
},
|
|
83
|
+
"depression": {
|
|
84
|
+
"description": "Major depressive disorder",
|
|
85
|
+
"icd10_pattern": "F3[2-3]",
|
|
86
|
+
"icd10_codes": ["F320", "F321", "F322", "F323", "F328", "F329", "F330", "F331", "F332", "F333", "F334", "F338", "F339"],
|
|
87
|
+
"self_report_codes": [1286, 1530],
|
|
88
|
+
},
|
|
89
|
+
"anxiety": {
|
|
90
|
+
"description": "Anxiety disorders",
|
|
91
|
+
"icd10_pattern": "F4[0-1]",
|
|
92
|
+
"icd10_codes": ["F400", "F401", "F402", "F408", "F409", "F410", "F411", "F412", "F413", "F418", "F419"],
|
|
93
|
+
"self_report_codes": [1287, 1531],
|
|
94
|
+
},
|
|
95
|
+
"schizophrenia": {
|
|
96
|
+
"description": "Schizophrenia",
|
|
97
|
+
"icd10_pattern": "F20",
|
|
98
|
+
"icd10_codes": ["F200", "F201", "F202", "F203", "F205", "F206", "F208", "F209"],
|
|
99
|
+
"self_report_codes": [1289],
|
|
100
|
+
},
|
|
101
|
+
"bipolar": {
|
|
102
|
+
"description": "Bipolar disorder",
|
|
103
|
+
"icd10_pattern": "F31",
|
|
104
|
+
"icd10_codes": ["F310", "F311", "F312", "F313", "F315", "F316", "F317", "F318", "F319"],
|
|
105
|
+
"self_report_codes": [1291],
|
|
106
|
+
},
|
|
107
|
+
"brain_tumour": {
|
|
108
|
+
"description": "Brain tumour (benign + malignant)",
|
|
109
|
+
"icd10_pattern": "C71|D33|D43",
|
|
110
|
+
"icd10_codes": ["C710", "C711", "C712", "C713", "C719", "D330", "D331", "D332", "D339", "D430", "D431", "D432", "D439"],
|
|
111
|
+
"self_report_codes": [],
|
|
112
|
+
},
|
|
113
|
+
"tbi": {
|
|
114
|
+
"description": "Traumatic brain injury",
|
|
115
|
+
"icd10_pattern": "S0[6-9]",
|
|
116
|
+
"icd10_codes": ["S060", "S061", "S062", "S063", "S064", "S065", "S066", "S068", "S069"],
|
|
117
|
+
"self_report_codes": [],
|
|
118
|
+
},
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def parse_icd10_diagnoses(df: pd.DataFrame) -> pd.DataFrame:
|
|
123
|
+
"""Parse ICD-10 diagnosis codes from UKB hospital inpatient data."""
|
|
124
|
+
# p41270: ICD-10 main diagnoses; p41280: diagnosis dates
|
|
125
|
+
# Stored as concatenated strings, e.g. "['I639','G20']"
|
|
126
|
+
records = []
|
|
127
|
+
icd10_col = None
|
|
128
|
+
for col in ["p41270", "p41270_i0"]:
|
|
129
|
+
if col in df.columns:
|
|
130
|
+
icd10_col = col
|
|
131
|
+
break
|
|
132
|
+
|
|
133
|
+
if icd10_col is None:
|
|
134
|
+
print("[WARN] No ICD-10 diagnosis column (p41270) found")
|
|
135
|
+
return pd.DataFrame(columns=["eid", "icd10_code", "diag_date", "source"])
|
|
136
|
+
|
|
137
|
+
for _, row in df.iterrows():
|
|
138
|
+
eid = row["eid"]
|
|
139
|
+
raw = row.get(icd10_col, "")
|
|
140
|
+
if pd.isna(raw) or str(raw).strip() == "":
|
|
141
|
+
continue
|
|
142
|
+
|
|
143
|
+
# Parse list-like string: "['I639','G20']" or "I639,G20"
|
|
144
|
+
codes = re.findall(r"[A-Z][0-9]{2,3}", str(raw).upper())
|
|
145
|
+
for code in codes:
|
|
146
|
+
records.append({"eid": eid, "icd10_code": code, "source": "ICD10"})
|
|
147
|
+
|
|
148
|
+
return pd.DataFrame(records) if records else pd.DataFrame(columns=["eid", "icd10_code", "diag_date", "source"])
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def parse_icd9_diagnoses(df: pd.DataFrame) -> pd.DataFrame:
|
|
152
|
+
"""Parse ICD-9 diagnosis codes from UKB hospital inpatient data."""
|
|
153
|
+
records = []
|
|
154
|
+
icd9_col = None
|
|
155
|
+
for col in ["p41271", "p41271_i0"]:
|
|
156
|
+
if col in df.columns:
|
|
157
|
+
icd9_col = col
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
if icd9_col is None:
|
|
161
|
+
return pd.DataFrame(columns=["eid", "icd9_code", "diag_date", "source"])
|
|
162
|
+
|
|
163
|
+
for _, row in df.iterrows():
|
|
164
|
+
eid = row["eid"]
|
|
165
|
+
raw = row.get(icd9_col, "")
|
|
166
|
+
if pd.isna(raw) or str(raw).strip() == "":
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
codes = re.findall(r"[VE]?[0-9]{3,5}", str(raw).upper())
|
|
170
|
+
for code in codes:
|
|
171
|
+
records.append({"eid": eid, "icd9_code": code, "source": "ICD9"})
|
|
172
|
+
|
|
173
|
+
return pd.DataFrame(records) if records else pd.DataFrame(columns=["eid", "icd9_code", "diag_date", "source"])
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def extract_cases(
|
|
177
|
+
df: pd.DataFrame,
|
|
178
|
+
disease_key: str,
|
|
179
|
+
custom_icd: Optional[str] = None,
|
|
180
|
+
sources: List[str] = None,
|
|
181
|
+
) -> Tuple[pd.DataFrame, Dict]:
|
|
182
|
+
"""Extract cases for a given disease."""
|
|
183
|
+
if sources is None:
|
|
184
|
+
sources = ["ICD10", "ICD9"]
|
|
185
|
+
|
|
186
|
+
# Use custom ICD pattern or predefined
|
|
187
|
+
if custom_icd:
|
|
188
|
+
disease_def = {"description": "Custom", "icd10_pattern": custom_icd, "icd10_codes": [custom_icd], "self_report_codes": []}
|
|
189
|
+
elif disease_key in DISEASE_DEFINITIONS:
|
|
190
|
+
disease_def = DISEASE_DEFINITIONS[disease_key]
|
|
191
|
+
else:
|
|
192
|
+
raise ValueError(f"Unknown disease: {disease_key}. Available: {list(DISEASE_DEFINITIONS.keys())}")
|
|
193
|
+
|
|
194
|
+
pattern = disease_def["icd10_pattern"]
|
|
195
|
+
case_eids = set()
|
|
196
|
+
case_details = []
|
|
197
|
+
|
|
198
|
+
# ICD-10 from hospital inpatient
|
|
199
|
+
if "ICD10" in sources:
|
|
200
|
+
icd10_df = parse_icd10_diagnoses(df)
|
|
201
|
+
if not icd10_df.empty:
|
|
202
|
+
mask = icd10_df["icd10_code"].str.match(pattern, na=False)
|
|
203
|
+
matched = icd10_df[mask]
|
|
204
|
+
for _, row in matched.iterrows():
|
|
205
|
+
case_eids.add(row["eid"])
|
|
206
|
+
case_details.append({"eid": row["eid"], "source": "ICD10", "code": row["icd10_code"]})
|
|
207
|
+
|
|
208
|
+
# ICD-9 from hospital inpatient
|
|
209
|
+
if "ICD9" in sources:
|
|
210
|
+
icd9_df = parse_icd9_diagnoses(df)
|
|
211
|
+
if not icd9_df.empty:
|
|
212
|
+
# Basic ICD-9 matching (simplified)
|
|
213
|
+
icd9_codes = disease_def.get("icd9_codes", [])
|
|
214
|
+
for code in icd9_codes:
|
|
215
|
+
mask = icd9_df["icd9_code"].str.startswith(code[:3], na=False)
|
|
216
|
+
matched = icd9_df[mask]
|
|
217
|
+
for _, row in matched.iterrows():
|
|
218
|
+
case_eids.add(row["eid"])
|
|
219
|
+
case_details.append({"eid": row["eid"], "source": "ICD9", "code": row["icd9_code"]})
|
|
220
|
+
|
|
221
|
+
# Death register
|
|
222
|
+
if "Death" in sources:
|
|
223
|
+
death_col = None
|
|
224
|
+
for col in ["p40001_i0", "p40001"]:
|
|
225
|
+
if col in df.columns:
|
|
226
|
+
death_col = col
|
|
227
|
+
break
|
|
228
|
+
if death_col:
|
|
229
|
+
for _, row in df.iterrows():
|
|
230
|
+
raw = row.get(death_col, "")
|
|
231
|
+
if pd.isna(raw):
|
|
232
|
+
continue
|
|
233
|
+
codes = re.findall(r"[A-Z][0-9]{2,3}", str(raw).upper())
|
|
234
|
+
for code in codes:
|
|
235
|
+
if re.match(pattern, code):
|
|
236
|
+
case_eids.add(row["eid"])
|
|
237
|
+
case_details.append({"eid": row["eid"], "source": "Death", "code": code})
|
|
238
|
+
|
|
239
|
+
# Build case table
|
|
240
|
+
if case_details:
|
|
241
|
+
details_df = pd.DataFrame(case_details)
|
|
242
|
+
# Keep first occurrence per subject
|
|
243
|
+
details_df = details_df.drop_duplicates(subset=["eid"], keep="first")
|
|
244
|
+
else:
|
|
245
|
+
details_df = pd.DataFrame(columns=["eid", "source", "code"])
|
|
246
|
+
|
|
247
|
+
# Build full result: all subjects with case indicator
|
|
248
|
+
result = pd.DataFrame({"eid": df["eid"]})
|
|
249
|
+
result[f"{disease_key}_case"] = result["eid"].isin(case_eids).astype(int)
|
|
250
|
+
|
|
251
|
+
stats = {
|
|
252
|
+
"disease": disease_def["description"],
|
|
253
|
+
"total_subjects": len(df),
|
|
254
|
+
"cases": len(case_eids),
|
|
255
|
+
"prevalence": len(case_eids) / len(df) * 100 if len(df) > 0 else 0,
|
|
256
|
+
"sources_used": sources,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
return result, stats
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def main() -> int:
|
|
263
|
+
parser = argparse.ArgumentParser(description="Extract UKB disease cases.")
|
|
264
|
+
parser.add_argument("--input", required=True, help="Path to UKB raw CSV")
|
|
265
|
+
parser.add_argument("--output", required=True, help="Output path for cases CSV")
|
|
266
|
+
parser.add_argument("--disease", help=f"Disease key. Available: {list(DISEASE_DEFINITIONS.keys())}")
|
|
267
|
+
parser.add_argument("--custom-icd", help="Custom ICD-10 pattern (regex)")
|
|
268
|
+
parser.add_argument("--sources", default="ICD10,ICD9,Death",
|
|
269
|
+
help="Comma-separated data sources: ICD10,ICD9,Death,Self-report")
|
|
270
|
+
parser.add_argument("--list-diseases", action="store_true", help="List available diseases and exit")
|
|
271
|
+
args = parser.parse_args()
|
|
272
|
+
|
|
273
|
+
if args.list_diseases:
|
|
274
|
+
print("Available brain-related disease definitions:")
|
|
275
|
+
for key, info in DISEASE_DEFINITIONS.items():
|
|
276
|
+
print(f" {key}: {info['description']} (ICD-10: {info['icd10_pattern']})")
|
|
277
|
+
return 0
|
|
278
|
+
|
|
279
|
+
if not args.disease and not args.custom_icd:
|
|
280
|
+
print("Error: --disease or --custom-icd is required", file=sys.stderr)
|
|
281
|
+
return 1
|
|
282
|
+
|
|
283
|
+
input_path = Path(args.input).resolve()
|
|
284
|
+
if not input_path.exists():
|
|
285
|
+
print(f"Input file not found: {input_path}", file=sys.stderr)
|
|
286
|
+
return 1
|
|
287
|
+
|
|
288
|
+
df = pd.read_csv(input_path, low_memory=False)
|
|
289
|
+
print(f"Loaded {len(df)} subjects")
|
|
290
|
+
|
|
291
|
+
sources = [s.strip() for s in args.sources.split(",")]
|
|
292
|
+
result, stats = extract_cases(df, args.disease or "custom", args.custom_icd, sources)
|
|
293
|
+
|
|
294
|
+
print(f"\nCase extraction: {stats['disease']}")
|
|
295
|
+
print(f" Total subjects: {stats['total_subjects']}")
|
|
296
|
+
print(f" Cases: {stats['cases']} ({stats['prevalence']:.2f}%)")
|
|
297
|
+
print(f" Sources: {stats['sources_used']}")
|
|
298
|
+
|
|
299
|
+
output_path = Path(args.output).resolve()
|
|
300
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
301
|
+
result.to_csv(output_path, index=False)
|
|
302
|
+
print(f"Saved -> {output_path}")
|
|
303
|
+
|
|
304
|
+
return 0
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
if __name__ == "__main__":
|
|
308
|
+
sys.exit(main())
|