@brainpilot/skills 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (285) hide show
  1. package/package.json +2 -2
  2. package/skills/01_Meta-Skills/academic-research-hub/SKILL.md +108 -0
  3. package/skills/01_Meta-Skills/academic-research-hub/scripts/requirements.txt +17 -0
  4. package/skills/01_Meta-Skills/academic-research-hub/scripts/research.py +781 -0
  5. package/skills/01_Meta-Skills/beautiful-log/SKILL.md +64 -0
  6. package/skills/01_Meta-Skills/beautiful-log/scripts/beautiful_log.py +274 -0
  7. package/skills/01_Meta-Skills/ethoclaw-daily-paper/SKILL.md +130 -0
  8. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/config.template.yaml +54 -0
  9. package/skills/01_Meta-Skills/ethoclaw-daily-paper/assets/top5_digest_template.md +5 -0
  10. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/build_top5_digest.py +300 -0
  11. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/common.py +137 -0
  12. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/merge_results.py +106 -0
  13. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/run_pipeline.py +177 -0
  14. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_arxiv.py +162 -0
  15. package/skills/01_Meta-Skills/ethoclaw-daily-paper/scripts/search_pubmed.py +202 -0
  16. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/SKILL.md +173 -0
  17. package/skills/01_Meta-Skills/ethoclaw-normalize-tabular/scripts/normalize_data.py +874 -0
  18. package/skills/01_Meta-Skills/ethoclaw-pdf-research/SKILL.md +134 -0
  19. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/confirmation-prompts.md +31 -0
  20. package/skills/01_Meta-Skills/ethoclaw-pdf-research/references/output-patterns.md +45 -0
  21. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_markdown_deliverables.py +41 -0
  22. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_research_log.py +84 -0
  23. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/build_summary_md.py +63 -0
  24. package/skills/01_Meta-Skills/ethoclaw-pdf-research/scripts/extract_pdf_bundle.py +140 -0
  25. package/skills/01_Meta-Skills/experiment-controller/SKILL.md +140 -0
  26. package/skills/01_Meta-Skills/knowledge-graph-builder/SKILL.md +366 -0
  27. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/entity_resolution.py +120 -0
  28. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/extraction_prompt_template.txt +19 -0
  29. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/graph_query.py +106 -0
  30. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/hypothesis_cli_reference.py +42 -0
  31. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/new_data_source_template.py +116 -0
  32. package/skills/01_Meta-Skills/knowledge-graph-builder/scripts/requirements.txt +15 -0
  33. package/skills/01_Meta-Skills/method-design/SKILL.md +61 -0
  34. package/skills/01_Meta-Skills/multi-search-engine/SKILL.md +119 -0
  35. package/skills/01_Meta-Skills/research-idea/SKILL.md +65 -0
  36. package/skills/05_EEG_ERP/eeg-skill/SKILL.md +197 -0
  37. package/skills/05_EEG_ERP/meg-skill/SKILL.md +188 -0
  38. package/skills/05_EEG_ERP/meg-skill/scripts/time_frequency.py +223 -0
  39. package/skills/05_EEG_ERP/mne-eeg-tool/SKILL.md +165 -0
  40. package/skills/05_EEG_ERP/mne-eeg-tool/scripts/eeg_pipeline_reference.py +231 -0
  41. package/skills/05_EEG_ERP/seed-iv-skill/SKILL.md +184 -0
  42. package/skills/05_EEG_ERP/seed-iv-skill/scripts/classify_seed_iv.py +154 -0
  43. package/skills/05_EEG_ERP/seed-iv-skill/scripts/extract_seed_iv_features.py +190 -0
  44. package/skills/05_EEG_ERP/seed-iv-skill/scripts/validate_seed_iv.py +102 -0
  45. package/skills/05_EEG_ERP/seed-vig-skill/SKILL.md +182 -0
  46. package/skills/05_EEG_ERP/seed-vig-skill/scripts/classify_seed_vig.py +165 -0
  47. package/skills/05_EEG_ERP/seed-vig-skill/scripts/extract_seed_vig_features.py +185 -0
  48. package/skills/05_EEG_ERP/seed-vig-skill/scripts/validate_seed_vig.py +88 -0
  49. package/skills/06_fMRI_Neuroimaging/abcd-skill/SKILL.md +308 -0
  50. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/abcd_qc_summary.py +449 -0
  51. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/extract_abcd_phenotype.py +292 -0
  52. package/skills/06_fMRI_Neuroimaging/abcd-skill/scripts/reorganize_abcd.py +387 -0
  53. package/skills/06_fMRI_Neuroimaging/abide-skill/SKILL.md +302 -0
  54. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/abide_qc_summary.py +317 -0
  55. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/extract_abide_phenotype.py +267 -0
  56. package/skills/06_fMRI_Neuroimaging/abide-skill/scripts/reorganize_abide.py +387 -0
  57. package/skills/06_fMRI_Neuroimaging/adhd200-skill/SKILL.md +244 -0
  58. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/adhd200_qc_summary.py +98 -0
  59. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/extract_adhd200_phenotype.py +134 -0
  60. package/skills/06_fMRI_Neuroimaging/adhd200-skill/scripts/reorganize_adhd200.py +206 -0
  61. package/skills/06_fMRI_Neuroimaging/adni-skill/SKILL.md +358 -0
  62. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_adni_task_files.py +1305 -0
  63. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/generate_vqa_from_tasks.py +766 -0
  64. package/skills/06_fMRI_Neuroimaging/adni-skill/scripts/reorganize_adni.py +491 -0
  65. package/skills/06_fMRI_Neuroimaging/aibl-skill/SKILL.md +295 -0
  66. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/aibl_qc_summary.py +260 -0
  67. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/extract_aibl_phenotype.py +365 -0
  68. package/skills/06_fMRI_Neuroimaging/aibl-skill/scripts/reorganize_aibl.py +394 -0
  69. package/skills/06_fMRI_Neuroimaging/aomic-skill/SKILL.md +292 -0
  70. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/aomic_qc_summary.py +258 -0
  71. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/extract_aomic_phenotype.py +284 -0
  72. package/skills/06_fMRI_Neuroimaging/aomic-skill/scripts/reorganize_aomic.py +322 -0
  73. package/skills/06_fMRI_Neuroimaging/asl-skill/SKILL.md +168 -0
  74. package/skills/06_fMRI_Neuroimaging/asl-skill/scripts/compute_cbf.py +224 -0
  75. package/skills/06_fMRI_Neuroimaging/bids-organizer/SKILL.md +241 -0
  76. package/skills/06_fMRI_Neuroimaging/bold5000-skill/SKILL.md +186 -0
  77. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/bold5000_qc_summary.py +96 -0
  78. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/extract_bold5000_stimulus.py +125 -0
  79. package/skills/06_fMRI_Neuroimaging/bold5000-skill/scripts/reorganize_bold5000.py +102 -0
  80. package/skills/06_fMRI_Neuroimaging/camcan-skill/SKILL.md +213 -0
  81. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/camcan_qc_summary.py +131 -0
  82. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/extract_camcan_phenotype.py +145 -0
  83. package/skills/06_fMRI_Neuroimaging/camcan-skill/scripts/validate_camcan.py +141 -0
  84. package/skills/06_fMRI_Neuroimaging/cobre-skill/SKILL.md +201 -0
  85. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/cobre_qc_summary.py +95 -0
  86. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/extract_cobre_phenotype.py +104 -0
  87. package/skills/06_fMRI_Neuroimaging/cobre-skill/scripts/reorganize_cobre.py +140 -0
  88. package/skills/06_fMRI_Neuroimaging/conn-tool/SKILL.md +180 -0
  89. package/skills/06_fMRI_Neuroimaging/dcm2nii/SKILL.md +189 -0
  90. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/SKILL.md +183 -0
  91. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/dmt_har_med_qc_summary.py +96 -0
  92. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/extract_dmt_har_med_phenotype.py +121 -0
  93. package/skills/06_fMRI_Neuroimaging/dmt-har-med-skill/scripts/reorganize_dmt_har_med.py +125 -0
  94. package/skills/06_fMRI_Neuroimaging/dwi-skill/SKILL.md +359 -0
  95. package/skills/06_fMRI_Neuroimaging/fmri-skill/SKILL.md +371 -0
  96. package/skills/06_fMRI_Neuroimaging/fmriprep-tool/SKILL.md +228 -0
  97. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/SKILL.md +286 -0
  98. package/skills/06_fMRI_Neuroimaging/freesurfer-tool/scripts/freesurfer_processor.py +145 -0
  99. package/skills/06_fMRI_Neuroimaging/fsl-tool/SKILL.md +208 -0
  100. package/skills/06_fMRI_Neuroimaging/hbn-skill/SKILL.md +271 -0
  101. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/extract_hbn_phenotype.py +107 -0
  102. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/hbn_qc_summary.py +96 -0
  103. package/skills/06_fMRI_Neuroimaging/hbn-skill/scripts/reorganize_hbn.py +150 -0
  104. package/skills/06_fMRI_Neuroimaging/hcpa-skill/SKILL.md +210 -0
  105. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/extract_hcpa_phenotype.py +146 -0
  106. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/hcpa_qc_summary.py +120 -0
  107. package/skills/06_fMRI_Neuroimaging/hcpa-skill/scripts/reorganize_hcpa.py +155 -0
  108. package/skills/06_fMRI_Neuroimaging/hcpd-skill/SKILL.md +210 -0
  109. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/extract_hcpd_phenotype.py +148 -0
  110. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/hcpd_qc_summary.py +125 -0
  111. package/skills/06_fMRI_Neuroimaging/hcpd-skill/scripts/reorganize_hcpd.py +146 -0
  112. package/skills/06_fMRI_Neuroimaging/hcpep-skill/SKILL.md +215 -0
  113. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/extract_hcpep_phenotype.py +157 -0
  114. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/hcpep_qc_summary.py +143 -0
  115. package/skills/06_fMRI_Neuroimaging/hcpep-skill/scripts/reorganize_hcpep.py +146 -0
  116. package/skills/06_fMRI_Neuroimaging/hcppipeline-tool/SKILL.md +217 -0
  117. package/skills/06_fMRI_Neuroimaging/hcpya-skill/SKILL.md +214 -0
  118. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/extract_hcpya_phenotype.py +190 -0
  119. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/hcpya_qc_summary.py +152 -0
  120. package/skills/06_fMRI_Neuroimaging/hcpya-skill/scripts/reorganize_hcpya.py +203 -0
  121. package/skills/06_fMRI_Neuroimaging/ixi-skill/SKILL.md +198 -0
  122. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/ixi_qc_summary.py +137 -0
  123. package/skills/06_fMRI_Neuroimaging/ixi-skill/scripts/reorganize_ixi.py +190 -0
  124. package/skills/06_fMRI_Neuroimaging/mnd-skill/SKILL.md +191 -0
  125. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/extract_mnd_phenotype.py +143 -0
  126. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/mnd_qc_summary.py +120 -0
  127. package/skills/06_fMRI_Neuroimaging/mnd-skill/scripts/validate_mnd.py +107 -0
  128. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/SKILL.md +203 -0
  129. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/analyze_lesions.py +119 -0
  130. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/longitudinal_lesion.py +148 -0
  131. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/mschallenge_qc_summary.py +132 -0
  132. package/skills/06_fMRI_Neuroimaging/mschallenge-skill/scripts/validate_mschallenge.py +116 -0
  133. package/skills/06_fMRI_Neuroimaging/nibabel-skill/SKILL.md +184 -0
  134. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/atlas_coordinate_reference.py +61 -0
  135. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/freesurfer_io_reference.py +34 -0
  136. package/skills/06_fMRI_Neuroimaging/nibabel-skill/scripts/nifti_inspection_reference.py +35 -0
  137. package/skills/06_fMRI_Neuroimaging/nifd-skill/SKILL.md +205 -0
  138. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/extract_nifd_phenotype.py +132 -0
  139. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/nifd_qc_summary.py +111 -0
  140. package/skills/06_fMRI_Neuroimaging/nifd-skill/scripts/validate_nifd.py +111 -0
  141. package/skills/06_fMRI_Neuroimaging/nii2dcm/SKILL.md +143 -0
  142. package/skills/06_fMRI_Neuroimaging/nilearn-tool/SKILL.md +266 -0
  143. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/connectome_reference.py +65 -0
  144. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/denoise_timeseries_reference.py +58 -0
  145. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/hierarchical_parcellation_reference.py +53 -0
  146. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/kmeans_parcellation_reference.py +53 -0
  147. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/preprocess_bold_reference.py +76 -0
  148. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_dictlearning_reference.py +56 -0
  149. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/rest_ica_reference.py +59 -0
  150. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/second_level_glm_reference.py +58 -0
  151. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/spacenet_classifier_reference.py +59 -0
  152. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/svm_classifier_reference.py +60 -0
  153. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/task_glm_reference.py +63 -0
  154. package/skills/06_fMRI_Neuroimaging/nilearn-tool/scripts/zalff_summary_reference.py +109 -0
  155. package/skills/06_fMRI_Neuroimaging/nsd-skill/SKILL.md +210 -0
  156. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/extract_nsd_stimulus.py +171 -0
  157. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/nsd_qc_summary.py +142 -0
  158. package/skills/06_fMRI_Neuroimaging/nsd-skill/scripts/validate_nsd.py +142 -0
  159. package/skills/06_fMRI_Neuroimaging/oasis-skill/SKILL.md +205 -0
  160. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/extract_oasis_phenotype.py +126 -0
  161. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/oasis_qc_summary.py +115 -0
  162. package/skills/06_fMRI_Neuroimaging/oasis-skill/scripts/validate_oasis.py +119 -0
  163. package/skills/06_fMRI_Neuroimaging/pet-skill/SKILL.md +173 -0
  164. package/skills/06_fMRI_Neuroimaging/pet-skill/scripts/compute_suvr.py +202 -0
  165. package/skills/06_fMRI_Neuroimaging/pnc-skill/SKILL.md +206 -0
  166. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/extract_pnc_phenotype.py +136 -0
  167. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/pnc_qc_summary.py +116 -0
  168. package/skills/06_fMRI_Neuroimaging/pnc-skill/scripts/validate_pnc.py +120 -0
  169. package/skills/06_fMRI_Neuroimaging/ppmi-skill/SKILL.md +209 -0
  170. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/extract_ppmi_phenotype.py +138 -0
  171. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/ppmi_qc_summary.py +111 -0
  172. package/skills/06_fMRI_Neuroimaging/ppmi-skill/scripts/validate_ppmi.py +117 -0
  173. package/skills/06_fMRI_Neuroimaging/qsiprep-tool/SKILL.md +320 -0
  174. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/SKILL.md +215 -0
  175. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/extract_rest_mdd_phenotype.py +132 -0
  176. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/harmonize_sites.py +152 -0
  177. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/rest_mdd_qc_summary.py +124 -0
  178. package/skills/06_fMRI_Neuroimaging/rest-mneta-mdd-skill/scripts/validate_rest_mdd.py +103 -0
  179. package/skills/06_fMRI_Neuroimaging/smri-skill/SKILL.md +302 -0
  180. package/skills/06_fMRI_Neuroimaging/tcp-skill/SKILL.md +204 -0
  181. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/extract_tcp_phenotype.py +139 -0
  182. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/tcp_qc_summary.py +111 -0
  183. package/skills/06_fMRI_Neuroimaging/tcp-skill/scripts/validate_tcp.py +99 -0
  184. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/SKILL.md +217 -0
  185. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/extract_ucla_cnp_phenotype.py +145 -0
  186. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/ucla_cnp_qc_summary.py +111 -0
  187. package/skills/06_fMRI_Neuroimaging/ucla-cnp-skill/scripts/validate_ucla_cnp.py +113 -0
  188. package/skills/06_fMRI_Neuroimaging/ukb-skill/SKILL.md +310 -0
  189. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/build_ukb_survival.py +210 -0
  190. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_cases.py +308 -0
  191. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/extract_ukb_phenotype.py +232 -0
  192. package/skills/06_fMRI_Neuroimaging/ukb-skill/scripts/ukb_qc_summary.py +158 -0
  193. package/skills/06_fMRI_Neuroimaging/wmh-segmentation/SKILL.md +133 -0
  194. package/skills/07_Computational_Modeling/detrending/SKILL.md +118 -0
  195. package/skills/07_Computational_Modeling/dictlearning/SKILL.md +122 -0
  196. package/skills/07_Computational_Modeling/filtering/SKILL.md +121 -0
  197. package/skills/07_Computational_Modeling/glm/SKILL.md +153 -0
  198. package/skills/07_Computational_Modeling/hierarchical/SKILL.md +121 -0
  199. package/skills/07_Computational_Modeling/ica/SKILL.md +122 -0
  200. package/skills/07_Computational_Modeling/kmeans/SKILL.md +119 -0
  201. package/skills/07_Computational_Modeling/run_models/SKILL.md +427 -0
  202. package/skills/07_Computational_Modeling/spacenet/SKILL.md +122 -0
  203. package/skills/07_Computational_Modeling/svm/SKILL.md +120 -0
  204. package/skills/08_Computational_Neuroscience/brain_gnn/SKILL.md +183 -0
  205. package/skills/08_Computational_Neuroscience/dipy-tool/SKILL.md +239 -0
  206. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/dti_metrics_reference.py +70 -0
  207. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/load_and_mask_reference.py +76 -0
  208. package/skills/08_Computational_Neuroscience/dipy-tool/scripts/roi_stats_reference.py +59 -0
  209. package/skills/08_Computational_Neuroscience/fm_app/SKILL.md +195 -0
  210. package/skills/08_Computational_Neuroscience/neurostorm/SKILL.md +151 -0
  211. package/skills/13_Visualization/brain-visualization/SKILL.md +191 -0
  212. package/skills/13_Visualization/brain-visualization/scripts/connectome_reference.py +108 -0
  213. package/skills/13_Visualization/brain-visualization/scripts/freesurfer_ply_reference.py +54 -0
  214. package/skills/13_Visualization/brain-visualization/scripts/zalff_summary_reference.py +116 -0
  215. package/skills/13_Visualization/ethoclaw-paper-figure-layout/SKILL.md +78 -0
  216. package/skills/13_Visualization/ethoclaw-paper-figure-layout/assets/naturecomm_figures.tex +74 -0
  217. package/skills/13_Visualization/ethoclaw-paper-figure-layout/scripts/layout_results_foldered.py +579 -0
  218. package/skills/14_Writing/overleaf-skill/SKILL.md +184 -0
  219. package/skills/14_Writing/overleaf-skill/scripts/install.sh +30 -0
  220. package/skills/14_Writing/paper-writing/SKILL.md +146 -0
  221. package/skills/14_Writing/paper-writing/scripts/data_statement_templates.py +164 -0
  222. package/skills/14_Writing/paper-writing/scripts/figure_templates.py +315 -0
  223. package/skills/14_Writing/paper-writing/scripts/nature_figure_style.py +214 -0
  224. package/skills/14_Writing/paper-writing/scripts/section_phrasebank.py +246 -0
  225. package/skills/16_Animal_Behavior/deeplabcut/SKILL.md +154 -0
  226. package/skills/16_Animal_Behavior/deeplabcut/references/3d-pose.md +89 -0
  227. package/skills/16_Animal_Behavior/deeplabcut/references/maDLC.md +123 -0
  228. package/skills/16_Animal_Behavior/deeplabcut/references/modelzoo.md +98 -0
  229. package/skills/16_Animal_Behavior/deeplabcut/references/standard-pipeline.md +165 -0
  230. package/skills/16_Animal_Behavior/deeplabcut/references/utilities.md +146 -0
  231. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/SKILL.md +274 -0
  232. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.html +112 -0
  233. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/report_template_en.md +21 -0
  234. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/cluster-section.md +5 -0
  235. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/heatmap-section.md +5 -0
  236. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/integrated-interpretation.md +3 -0
  237. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/overview.md +3 -0
  238. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/project-summary.md +3 -0
  239. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/radar-section.md +5 -0
  240. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/raw-trajectory.md +3 -0
  241. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/sample-check.md +3 -0
  242. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/single-subject-section.md +3 -0
  243. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/assets/section_templates/stats-section.md +5 -0
  244. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/epm.md +52 -0
  245. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/fst.md +37 -0
  246. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/nor.md +39 -0
  247. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/oft.md +43 -0
  248. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tcst.md +45 -0
  249. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/experiment-types/tst.md +36 -0
  250. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/input-types.md +59 -0
  251. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/interpretation-guardrails.md +45 -0
  252. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/metadata-schema.md +57 -0
  253. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/report-sections.md +86 -0
  254. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/references/section-selection-rules.md +169 -0
  255. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/build_report_manifest.py +27 -0
  256. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/render_report.py +34 -0
  257. package/skills/16_Animal_Behavior/ethoclaw-analysis-report/scripts/report_utils.py +1121 -0
  258. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/SKILL.md +390 -0
  259. package/skills/16_Animal_Behavior/ethoclaw-animal-grounding/reference_code.py +98 -0
  260. package/skills/16_Animal_Behavior/ethoclaw-animal-pose-estimation/SKILL.md +336 -0
  261. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/README.md +21 -0
  262. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/SKILL.md +41 -0
  263. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/batch_kinematic_generator.py +663 -0
  264. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/config.json +19 -0
  265. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/generate_kinematic_parameter.py +401 -0
  266. package/skills/16_Animal_Behavior/ethoclaw-kinematic-parameter-generator/kinematic_generator.py +265 -0
  267. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/SKILL.md +72 -0
  268. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/references/config.example.toml +56 -0
  269. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params.py +232 -0
  270. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-clustermap-generate/scripts/cluster_all_params_from_config.py +236 -0
  271. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/SKILL.md +68 -0
  272. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/references/notes.md +5 -0
  273. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-radar-generate/scripts/plot_h5_radar.py +513 -0
  274. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/SKILL.md +52 -0
  275. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/config.toml +81 -0
  276. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/references/stats-rule.md +18 -0
  277. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_inspect.py +79 -0
  278. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_batch.py +624 -0
  279. package/skills/16_Animal_Behavior/ethoclaw-multiparameter-violin-stats-generate/scripts/h5_violin_stats.py +438 -0
  280. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/SKILL.md +280 -0
  281. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_trajectory.py +790 -0
  282. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/core_scripts/heatmap_velocity.py +855 -0
  283. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.csv +101 -0
  284. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_2d.h5 +0 -0
  285. package/skills/16_Animal_Behavior/ethoclaw-trajectory-velocity-heatmap-generate/reference_data/reference_data_readme.md +126 -0
@@ -0,0 +1,874 @@
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import csv
6
+ import importlib.util
7
+ import json
8
+ import platform
9
+ import re
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Any
13
+
14
+ try:
15
+ import pandas as pd
16
+ except ImportError:
17
+ pd = None
18
+
19
+ try:
20
+ import h5py
21
+ except ImportError:
22
+ h5py = None
23
+
24
+
25
+ SCRIPT_VERSION = "1.1.0"
26
+
27
+ SUPPORTED_FORMATS = {
28
+ ".csv": "csv",
29
+ ".xlsx": "excel",
30
+ ".xls": "excel",
31
+ ".h5": "hdf5",
32
+ ".hdf5": "hdf5",
33
+ }
34
+
35
+ NULL_MARKERS = {"", "na", "n/a", "null", "none", "nan"}
36
+ HDF5_LABEL_ATTRS = ("bodyparts", "keypoints", "points", "node_names", "labels")
37
+ HDF5_COLUMN_ATTRS = ("columns", "column_names", "feature_names", "fields")
38
+
39
+
40
+ def ensure_pandas() -> None:
41
+ if pd is None:
42
+ raise RuntimeError(
43
+ "Missing dependency: pandas. Install with "
44
+ "`python3 -m pip install pandas openpyxl pyarrow`."
45
+ )
46
+
47
+
48
+ def ensure_h5py() -> None:
49
+ if h5py is None:
50
+ raise RuntimeError(
51
+ "Missing dependency: h5py. Install with "
52
+ "`python3 -m pip install h5py`."
53
+ )
54
+
55
+
56
+ def has_module(name: str) -> bool:
57
+ return importlib.util.find_spec(name) is not None
58
+
59
+
60
+ def check_env() -> dict[str, Any]:
61
+ modules = {
62
+ "pandas": has_module("pandas"),
63
+ "openpyxl": has_module("openpyxl"),
64
+ "pyarrow": has_module("pyarrow"),
65
+ "h5py": has_module("h5py"),
66
+ }
67
+ required_for_any = {"pandas": modules["pandas"]}
68
+ ok = all(required_for_any.values())
69
+
70
+ return {
71
+ "ok": ok,
72
+ "command": "check-env",
73
+ "script_version": SCRIPT_VERSION,
74
+ "python": sys.version.split()[0],
75
+ "platform": platform.platform(),
76
+ "dependencies": modules,
77
+ "format_support": {
78
+ "csv": modules["pandas"],
79
+ "excel": modules["pandas"] and modules["openpyxl"],
80
+ "hdf5": modules["pandas"] and modules["h5py"],
81
+ "parquet_output": modules["pandas"] and modules["pyarrow"],
82
+ },
83
+ "install_hint": "python3 -m pip install pandas openpyxl pyarrow h5py",
84
+ }
85
+
86
+
87
+ def infer_source_format(path: Path) -> str:
88
+ suffix = path.suffix.lower()
89
+ if suffix not in SUPPORTED_FORMATS:
90
+ raise ValueError(
91
+ f"Unsupported file type: {suffix}. "
92
+ f"Supported: {sorted(SUPPORTED_FORMATS.keys())}"
93
+ )
94
+ return SUPPORTED_FORMATS[suffix]
95
+
96
+
97
+ def snake_case(text: str) -> str:
98
+ text = str(text).strip()
99
+ text = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", text)
100
+ text = re.sub(r"[\s./-]+", "_", text)
101
+ text = re.sub(r"[^0-9a-zA-Z_]+", "", text)
102
+ text = re.sub(r"_+", "_", text)
103
+ text = text.strip("_").lower()
104
+ return text or "col"
105
+
106
+
107
+ def sanitize_name_fragment(text: str) -> str:
108
+ return snake_case(text.replace("/", "_").replace("\\", "_"))
109
+
110
+
111
+ def flatten_column_name(value: Any) -> str:
112
+ if isinstance(value, tuple):
113
+ parts = [str(part).strip() for part in value if str(part).strip()]
114
+ return "_".join(parts) if parts else "col"
115
+ return str(value)
116
+
117
+
118
+ def dedupe_names(names: list[str]) -> list[str]:
119
+ seen: dict[str, int] = {}
120
+ output: list[str] = []
121
+
122
+ for name in names:
123
+ count = seen.get(name, 0)
124
+ seen[name] = count + 1
125
+ output.append(name if count == 0 else f"{name}_{count + 1}")
126
+
127
+ return output
128
+
129
+
130
+ def normalize_columns(columns: list[Any]) -> tuple[list[str], list[dict[str, str]]]:
131
+ flattened = [flatten_column_name(col) for col in columns]
132
+ normalized = [snake_case(col) for col in flattened]
133
+ deduped = dedupe_names(normalized)
134
+
135
+ mappings = [
136
+ {"original": str(original), "normalized": final}
137
+ for original, final in zip(flattened, deduped)
138
+ ]
139
+ return deduped, mappings
140
+
141
+
142
+ def to_jsonable(value: Any) -> Any:
143
+ if pd is not None and value is pd.NA:
144
+ return None
145
+
146
+ if isinstance(value, Path):
147
+ return str(value)
148
+
149
+ if isinstance(value, bytes):
150
+ return value.decode("utf-8", errors="ignore")
151
+
152
+ if isinstance(value, (str, int, float, bool)) or value is None:
153
+ return value
154
+
155
+ if hasattr(value, "item") and not isinstance(value, (str, bytes, bytearray)):
156
+ try:
157
+ return to_jsonable(value.item())
158
+ except Exception:
159
+ pass
160
+
161
+ if hasattr(value, "tolist"):
162
+ try:
163
+ return to_jsonable(value.tolist())
164
+ except Exception:
165
+ pass
166
+
167
+ if isinstance(value, dict):
168
+ return {str(k): to_jsonable(v) for k, v in value.items()}
169
+
170
+ if isinstance(value, (list, tuple, set)):
171
+ return [to_jsonable(v) for v in value]
172
+
173
+ return str(value)
174
+
175
+
176
+ def normalize_string_or_null(value: Any) -> Any:
177
+ if isinstance(value, str):
178
+ stripped = value.strip()
179
+ if stripped.lower() in NULL_MARKERS:
180
+ return pd.NA
181
+ return stripped
182
+ return value
183
+
184
+
185
+ def normalize_string_columns(df: "pd.DataFrame") -> "pd.DataFrame":
186
+ output = df.copy()
187
+
188
+ for column in output.columns:
189
+ series = output[column]
190
+ if pd.api.types.is_object_dtype(series) or pd.api.types.is_string_dtype(series):
191
+ output[column] = series.map(normalize_string_or_null)
192
+
193
+ output = output.dropna(how="all")
194
+ return output
195
+
196
+
197
+ def preview_records(df: "pd.DataFrame", limit: int = 5) -> list[dict[str, Any]]:
198
+ records = df.head(limit).to_dict(orient="records")
199
+ return to_jsonable(records)
200
+
201
+
202
+ def classify_hdf5_shape(shape: list[int]) -> str:
203
+ if len(shape) == 0:
204
+ return "scalar"
205
+ if len(shape) == 1:
206
+ return "vector"
207
+ if len(shape) == 2:
208
+ return "matrix"
209
+ if len(shape) == 3 and shape[-1] in (2, 3):
210
+ return "pose_tensor"
211
+ return "other"
212
+
213
+
214
+ def score_hdf5_candidate(dataset_summary: dict[str, Any]) -> tuple[int, int]:
215
+ kind = dataset_summary.get("kind", "other")
216
+ shape = dataset_summary.get("shape", [])
217
+ size = 1
218
+ for value in shape:
219
+ size *= int(value)
220
+
221
+ if kind == "matrix":
222
+ return (40, size)
223
+ if kind == "pose_tensor":
224
+ return (35, size)
225
+ if kind == "vector":
226
+ return (20, size)
227
+ if kind == "scalar":
228
+ return (10, size)
229
+ return (0, size)
230
+
231
+
232
+ def pick_recommended_sheet(sheet_summaries: list[dict[str, Any]]) -> str | None:
233
+ candidates = [item for item in sheet_summaries if "rows" in item and "column_count" in item]
234
+ if not candidates:
235
+ return None
236
+
237
+ discouraged = {"summary", "meta", "metadata", "readme", "info"}
238
+
239
+ def score(item: dict[str, Any]) -> tuple[int, int]:
240
+ name = snake_case(item["sheet"])
241
+ penalty = 0 if name not in discouraged else -1
242
+ area = int(item.get("rows", 0)) * max(int(item.get("column_count", 0)), 1)
243
+ return (penalty, area)
244
+
245
+ return max(candidates, key=score)["sheet"]
246
+
247
+
248
+ def decode_text_list(value: Any) -> list[str] | None:
249
+ value = to_jsonable(value)
250
+ if not isinstance(value, list):
251
+ return None
252
+ return [snake_case(str(item)) for item in value]
253
+
254
+
255
+ def get_hdf5_axis_labels(attrs: dict[str, Any], width: int) -> list[str] | None:
256
+ for key in HDF5_LABEL_ATTRS:
257
+ if key in attrs:
258
+ labels = decode_text_list(attrs[key])
259
+ if labels and len(labels) == width:
260
+ return labels
261
+ return None
262
+
263
+
264
+ def get_hdf5_column_names(attrs: dict[str, Any], width: int) -> list[str] | None:
265
+ for key in HDF5_COLUMN_ATTRS:
266
+ if key in attrs:
267
+ labels = decode_text_list(attrs[key])
268
+ if labels and len(labels) == width:
269
+ return dedupe_names(labels)
270
+ return None
271
+
272
+
273
+ def inspect_hdf5(path: Path) -> list[dict[str, Any]]:
274
+ ensure_h5py()
275
+ datasets: list[dict[str, Any]] = []
276
+
277
+ with h5py.File(path, "r") as handle:
278
+
279
+ def visitor(name: str, obj: Any) -> None:
280
+ if isinstance(obj, h5py.Dataset):
281
+ shape = list(obj.shape)
282
+ datasets.append(
283
+ {
284
+ "path": name,
285
+ "shape": shape,
286
+ "ndim": len(shape),
287
+ "dtype": str(obj.dtype),
288
+ "kind": classify_hdf5_shape(shape),
289
+ "attr_keys": sorted(str(key) for key in obj.attrs.keys()),
290
+ }
291
+ )
292
+
293
+ handle.visititems(visitor)
294
+
295
+ datasets.sort(key=lambda item: item["path"])
296
+ return datasets
297
+
298
+
299
+ def inspect_csv_stdlib(path: Path) -> dict[str, Any]:
300
+ encodings = ("utf-8", "utf-8-sig", "gb18030", "latin1")
301
+ last_error: Exception | None = None
302
+
303
+ for encoding in encodings:
304
+ try:
305
+ with path.open("r", encoding=encoding, newline="") as handle:
306
+ sample = handle.read(4096)
307
+ handle.seek(0)
308
+ try:
309
+ dialect = csv.Sniffer().sniff(sample)
310
+ delimiter = dialect.delimiter
311
+ except Exception:
312
+ delimiter = ","
313
+
314
+ reader = csv.reader(handle, delimiter=delimiter)
315
+ header = next(reader)
316
+ preview = []
317
+ data_rows = 0
318
+ for row in reader:
319
+ data_rows += 1
320
+ if len(preview) < 5:
321
+ preview.append(row)
322
+
323
+ return {
324
+ "path": str(path),
325
+ "source_format": "csv",
326
+ "rows": data_rows,
327
+ "column_count": len(header),
328
+ "columns": header,
329
+ "preview": preview,
330
+ "read_info": {"encoding": encoding, "delimiter": delimiter, "engine": "stdlib"},
331
+ "convert_ready": True,
332
+ }
333
+ except Exception as exc:
334
+ last_error = exc
335
+
336
+ raise RuntimeError(f"Failed to read CSV: {path}") from last_error
337
+
338
+
339
+ def read_csv_file(path: Path) -> tuple["pd.DataFrame", dict[str, Any]]:
340
+ ensure_pandas()
341
+ last_error: Exception | None = None
342
+
343
+ for encoding in ("utf-8", "utf-8-sig", "gb18030", "latin1"):
344
+ try:
345
+ frame = pd.read_csv(path, sep=None, engine="python", encoding=encoding)
346
+ return frame, {"encoding": encoding}
347
+ except Exception as exc:
348
+ last_error = exc
349
+
350
+ raise RuntimeError(f"Failed to read CSV: {path}") from last_error
351
+
352
+
353
+ def inspect_excel(path: Path) -> dict[str, Any]:
354
+ ensure_pandas()
355
+ workbook = pd.ExcelFile(path)
356
+
357
+ sheet_summaries: list[dict[str, Any]] = []
358
+ for sheet_name in workbook.sheet_names:
359
+ try:
360
+ sheet_df = pd.read_excel(path, sheet_name=sheet_name)
361
+ columns = [flatten_column_name(col) for col in list(sheet_df.columns)]
362
+ sheet_summaries.append(
363
+ {
364
+ "sheet": sheet_name,
365
+ "rows": int(len(sheet_df)),
366
+ "column_count": int(len(columns)),
367
+ "columns": columns,
368
+ "preview": preview_records(sheet_df),
369
+ }
370
+ )
371
+ except Exception as exc:
372
+ sheet_summaries.append(
373
+ {
374
+ "sheet": sheet_name,
375
+ "error": str(exc),
376
+ }
377
+ )
378
+
379
+ recommended_sheet = None
380
+ if len(workbook.sheet_names) == 1:
381
+ recommended_sheet = workbook.sheet_names[0]
382
+ else:
383
+ recommended_sheet = pick_recommended_sheet(sheet_summaries)
384
+
385
+ return {
386
+ "path": str(path),
387
+ "source_format": "excel",
388
+ "sheet_count": len(workbook.sheet_names),
389
+ "sheet_names": workbook.sheet_names,
390
+ "sheets": sheet_summaries,
391
+ "convert_ready": len(workbook.sheet_names) == 1,
392
+ "needs_sheet": len(workbook.sheet_names) != 1,
393
+ "recommended_sheet": recommended_sheet,
394
+ }
395
+
396
+
397
+ def inspect_tabular_file(path: Path) -> dict[str, Any]:
398
+ source_format = infer_source_format(path)
399
+
400
+ if source_format == "csv":
401
+ if pd is None:
402
+ return inspect_csv_stdlib(path)
403
+
404
+ frame, read_info = read_csv_file(path)
405
+ columns = [flatten_column_name(col) for col in list(frame.columns)]
406
+ return {
407
+ "path": str(path),
408
+ "source_format": "csv",
409
+ "rows": int(len(frame)),
410
+ "column_count": int(len(columns)),
411
+ "columns": columns,
412
+ "preview": preview_records(frame),
413
+ "read_info": read_info,
414
+ "convert_ready": True,
415
+ }
416
+
417
+ if source_format == "excel":
418
+ return inspect_excel(path)
419
+
420
+ datasets = inspect_hdf5(path)
421
+ recommended_dataset = None
422
+ if len(datasets) == 1:
423
+ recommended_dataset = datasets[0]["path"]
424
+ elif datasets:
425
+ recommended_dataset = max(datasets, key=score_hdf5_candidate)["path"]
426
+
427
+ return {
428
+ "path": str(path),
429
+ "source_format": "hdf5",
430
+ "dataset_count": len(datasets),
431
+ "datasets": datasets,
432
+ "convert_ready": len(datasets) == 1,
433
+ "needs_dataset": len(datasets) != 1,
434
+ "recommended_dataset": recommended_dataset,
435
+ }
436
+
437
+
438
+ def read_excel_file(path: Path, sheet: str | None) -> tuple["pd.DataFrame", dict[str, Any]]:
439
+ ensure_pandas()
440
+ workbook = pd.ExcelFile(path)
441
+
442
+ if sheet is None:
443
+ if len(workbook.sheet_names) != 1:
444
+ raise ValueError(
445
+ f"Excel has multiple sheets. Pass --sheet. "
446
+ f"Available: {workbook.sheet_names}"
447
+ )
448
+ sheet = workbook.sheet_names[0]
449
+
450
+ frame = pd.read_excel(path, sheet_name=sheet)
451
+ return frame, {
452
+ "sheet": sheet,
453
+ "available_sheets": workbook.sheet_names,
454
+ }
455
+
456
+
457
+ def hdf5_dataset_to_frame(data: Any, attrs: dict[str, Any]) -> tuple["pd.DataFrame", dict[str, Any]]:
458
+ ensure_pandas()
459
+
460
+ if getattr(data.dtype, "names", None):
461
+ frame = pd.DataFrame.from_records(data)
462
+ return frame, {"layout": "structured_array"}
463
+
464
+ if data.ndim == 0:
465
+ return pd.DataFrame({"value": [to_jsonable(data)]}), {"layout": "scalar"}
466
+
467
+ if data.ndim == 1:
468
+ return pd.DataFrame({"value": data}), {"layout": "vector"}
469
+
470
+ if data.ndim == 2:
471
+ width = int(data.shape[1])
472
+ columns = get_hdf5_column_names(attrs, width)
473
+ if columns is None:
474
+ columns = [f"col_{index}" for index in range(width)]
475
+ frame = pd.DataFrame(data, columns=columns)
476
+ return frame, {"layout": "matrix"}
477
+
478
+ if data.ndim == 3 and data.shape[-1] in (2, 3):
479
+ point_count = int(data.shape[1])
480
+ labels = get_hdf5_axis_labels(attrs, point_count)
481
+ if labels is None:
482
+ labels = [f"point_{index}" for index in range(point_count)]
483
+
484
+ parts: list[pd.DataFrame] = []
485
+ for point_index, bodypart in enumerate(labels):
486
+ part = pd.DataFrame(
487
+ {
488
+ "frame": range(int(data.shape[0])),
489
+ "bodypart": bodypart,
490
+ "x": data[:, point_index, 0],
491
+ "y": data[:, point_index, 1],
492
+ "confidence": data[:, point_index, 2] if data.shape[-1] == 3 else pd.NA,
493
+ }
494
+ )
495
+ parts.append(part)
496
+
497
+ return pd.concat(parts, ignore_index=True), {
498
+ "layout": "pose_long_from_h5",
499
+ "bodyparts": labels,
500
+ }
501
+
502
+ raise ValueError(f"Unsupported HDF5 dataset shape: {list(data.shape)}")
503
+
504
+
505
+ def read_hdf5_file(path: Path, dataset: str | None) -> tuple["pd.DataFrame", dict[str, Any]]:
506
+ ensure_h5py()
507
+ datasets = inspect_hdf5(path)
508
+
509
+ if not datasets:
510
+ raise ValueError(f"No datasets found in HDF5 file: {path}")
511
+
512
+ if dataset is None:
513
+ if len(datasets) != 1:
514
+ available = [item["path"] for item in datasets]
515
+ raise ValueError(
516
+ f"HDF5 has multiple datasets. Pass --dataset. Available: {available}"
517
+ )
518
+ dataset = datasets[0]["path"]
519
+
520
+ with h5py.File(path, "r") as handle:
521
+ ds = handle[dataset]
522
+ attrs = {snake_case(str(key)): to_jsonable(value) for key, value in ds.attrs.items()}
523
+ frame, layout_info = hdf5_dataset_to_frame(ds[()], attrs)
524
+
525
+ info = {
526
+ "dataset": dataset,
527
+ "available_datasets": datasets,
528
+ "attrs": attrs,
529
+ **layout_info,
530
+ }
531
+ return frame, info
532
+
533
+
534
+ def load_source(
535
+ path: Path,
536
+ sheet: str | None,
537
+ dataset: str | None,
538
+ ) -> tuple["pd.DataFrame", dict[str, Any], str]:
539
+ source_format = infer_source_format(path)
540
+
541
+ if source_format == "csv":
542
+ frame, info = read_csv_file(path)
543
+ return frame, info, source_format
544
+
545
+ if source_format == "excel":
546
+ frame, info = read_excel_file(path, sheet)
547
+ return frame, info, source_format
548
+
549
+ frame, info = read_hdf5_file(path, dataset)
550
+ return frame, info, source_format
551
+
552
+
553
+ def detect_pose_columns(columns: list[str]) -> tuple[dict[str, dict[str, str]], list[str]]:
554
+ pattern = re.compile(r"^(?P<bodypart>.+)_(?P<field>x|y|confidence|likelihood|score)$")
555
+ pose_fields: dict[str, dict[str, str]] = {}
556
+ non_pose_columns: list[str] = []
557
+
558
+ for column in columns:
559
+ match = pattern.match(column)
560
+ if not match:
561
+ non_pose_columns.append(column)
562
+ continue
563
+
564
+ bodypart = match.group("bodypart")
565
+ field = match.group("field")
566
+ canonical_field = "confidence" if field in ("confidence", "likelihood", "score") else field
567
+ pose_fields.setdefault(bodypart, {})[canonical_field] = column
568
+
569
+ pose_fields = {
570
+ bodypart: fields
571
+ for bodypart, fields in pose_fields.items()
572
+ if "x" in fields and "y" in fields
573
+ }
574
+ return pose_fields, non_pose_columns
575
+
576
+
577
+ def wide_pose_to_long(df: "pd.DataFrame") -> tuple["pd.DataFrame", dict[str, Any]]:
578
+ pose_fields, non_pose_columns = detect_pose_columns(list(df.columns))
579
+ if not pose_fields:
580
+ return df, {"layout": "table"}
581
+
582
+ working = df.copy()
583
+ if "frame" not in working.columns:
584
+ working.insert(0, "frame", range(len(working)))
585
+
586
+ meta_columns = [column for column in non_pose_columns if column != "frame"]
587
+ meta_columns = ["frame", *meta_columns]
588
+
589
+ long_parts: list[pd.DataFrame] = []
590
+ for bodypart, fields in sorted(pose_fields.items()):
591
+ part = working[meta_columns].copy()
592
+ part["bodypart"] = bodypart
593
+ part["x"] = working[fields["x"]]
594
+ part["y"] = working[fields["y"]]
595
+ part["confidence"] = working[fields["confidence"]] if "confidence" in fields else pd.NA
596
+ long_parts.append(part)
597
+
598
+ output = pd.concat(long_parts, ignore_index=True)
599
+ return output, {
600
+ "layout": "pose_long",
601
+ "bodyparts": sorted(pose_fields.keys()),
602
+ }
603
+
604
+
605
+ def unique_column_name(existing: set[str], preferred: str) -> str:
606
+ if preferred not in existing:
607
+ return preferred
608
+
609
+ index = 2
610
+ while f"{preferred}_{index}" in existing:
611
+ index += 1
612
+ return f"{preferred}_{index}"
613
+
614
+
615
+ def add_provenance_columns(
616
+ df: "pd.DataFrame",
617
+ path: Path,
618
+ source_format: str,
619
+ ) -> tuple["pd.DataFrame", dict[str, str]]:
620
+ output = df.copy()
621
+ existing = set(output.columns)
622
+ provenance_map: dict[str, str] = {}
623
+
624
+ values = {
625
+ "source_file": path.name,
626
+ "source_stem": sanitize_name_fragment(path.stem),
627
+ "source_format": source_format,
628
+ "source_parent": path.parent.name,
629
+ }
630
+
631
+ if path.parent.parent != path.parent:
632
+ values["source_group"] = path.parent.parent.name
633
+
634
+ for logical_name, value in values.items():
635
+ actual_name = unique_column_name(existing, logical_name)
636
+ output[actual_name] = value
637
+ existing.add(actual_name)
638
+ provenance_map[logical_name] = actual_name
639
+
640
+ return output, provenance_map
641
+
642
+
643
+ def build_schema(df: "pd.DataFrame") -> dict[str, Any]:
644
+ return {
645
+ "row_count": int(len(df)),
646
+ "column_count": int(len(df.columns)),
647
+ "columns": [
648
+ {
649
+ "name": str(column),
650
+ "dtype": str(df[column].dtype),
651
+ "null_count": int(df[column].isna().sum()),
652
+ "non_null_count": int(df[column].notna().sum()),
653
+ }
654
+ for column in df.columns
655
+ ],
656
+ }
657
+
658
+
659
+ def write_outputs(
660
+ df: "pd.DataFrame",
661
+ out_dir: Path,
662
+ stem: str,
663
+ report: dict[str, Any],
664
+ output_format: str,
665
+ ) -> dict[str, str]:
666
+ normalized_dir = out_dir / "normalized"
667
+ schemas_dir = out_dir / "schemas"
668
+ reports_dir = out_dir / "reports"
669
+
670
+ normalized_dir.mkdir(parents=True, exist_ok=True)
671
+ schemas_dir.mkdir(parents=True, exist_ok=True)
672
+ reports_dir.mkdir(parents=True, exist_ok=True)
673
+
674
+ schema_path = schemas_dir / f"{stem}.schema.json"
675
+ report_path = reports_dir / f"{stem}.report.json"
676
+
677
+ actual_format = output_format
678
+ data_path: Path
679
+
680
+ if output_format == "csv":
681
+ data_path = normalized_dir / f"{stem}.csv"
682
+ df.to_csv(data_path, index=False)
683
+ else:
684
+ try:
685
+ data_path = normalized_dir / f"{stem}.parquet"
686
+ df.to_parquet(data_path, index=False)
687
+ actual_format = "parquet"
688
+ except Exception as exc:
689
+ if output_format == "parquet":
690
+ raise
691
+ report["parquet_fallback"] = str(exc)
692
+ data_path = normalized_dir / f"{stem}.csv"
693
+ df.to_csv(data_path, index=False)
694
+ actual_format = "csv"
695
+
696
+ report["output_format"] = actual_format
697
+
698
+ schema_path.write_text(
699
+ json.dumps(build_schema(df), ensure_ascii=False, indent=2),
700
+ encoding="utf-8",
701
+ )
702
+ report_path.write_text(
703
+ json.dumps(report, ensure_ascii=False, indent=2),
704
+ encoding="utf-8",
705
+ )
706
+
707
+ return {
708
+ "data": str(data_path),
709
+ "schema": str(schema_path),
710
+ "report": str(report_path),
711
+ }
712
+
713
+
714
+ def inspect_source(path: Path) -> dict[str, Any]:
715
+ if not path.exists():
716
+ raise FileNotFoundError(path)
717
+ result = inspect_tabular_file(path)
718
+ result["ok"] = True
719
+ result["script_version"] = SCRIPT_VERSION
720
+ result["env"] = check_env()
721
+ return result
722
+
723
+
724
+ def convert_source(
725
+ path: Path,
726
+ out_dir: Path,
727
+ sheet: str | None,
728
+ dataset: str | None,
729
+ output_format: str,
730
+ pose_long: bool,
731
+ ) -> dict[str, Any]:
732
+ if not path.exists():
733
+ raise FileNotFoundError(path)
734
+
735
+ env = check_env()
736
+ if not env["ok"]:
737
+ raise RuntimeError(
738
+ "Environment check failed. Install dependencies first: "
739
+ f"{env['install_hint']}"
740
+ )
741
+
742
+ source_frame, read_info, source_format = load_source(path, sheet, dataset)
743
+
744
+ source_rows = int(len(source_frame))
745
+ source_columns = [flatten_column_name(col) for col in list(source_frame.columns)]
746
+
747
+ normalized = source_frame.copy()
748
+ normalized = normalize_string_columns(normalized)
749
+
750
+ normalized_columns, column_mappings = normalize_columns(list(normalized.columns))
751
+ normalized.columns = normalized_columns
752
+
753
+ if pose_long:
754
+ normalized, layout_info = wide_pose_to_long(normalized)
755
+ else:
756
+ layout_info = {"layout": "table", "pose_long": False}
757
+
758
+ normalized = normalized.convert_dtypes()
759
+
760
+ normalized, provenance_map = add_provenance_columns(normalized, path, source_format)
761
+
762
+ stem_parts = [sanitize_name_fragment(path.stem)]
763
+ if sheet:
764
+ stem_parts.append(sanitize_name_fragment(sheet))
765
+ if dataset:
766
+ stem_parts.append(sanitize_name_fragment(dataset))
767
+ output_stem = "_".join(stem_parts)
768
+
769
+ report = {
770
+ "input": str(path),
771
+ "source_format": source_format,
772
+ "source_rows": source_rows,
773
+ "source_column_count": len(source_columns),
774
+ "source_columns": source_columns,
775
+ "column_mappings": column_mappings,
776
+ "read_info": read_info,
777
+ "layout_info": layout_info,
778
+ "provenance_columns": provenance_map,
779
+ "output_rows": int(len(normalized)),
780
+ "output_column_count": int(len(normalized.columns)),
781
+ "script_version": SCRIPT_VERSION,
782
+ "env": env,
783
+ }
784
+
785
+ outputs = write_outputs(normalized, out_dir, output_stem, report, output_format)
786
+
787
+ return {
788
+ "ok": True,
789
+ "script_version": SCRIPT_VERSION,
790
+ "input": str(path),
791
+ "source_format": source_format,
792
+ "rows": int(len(normalized)),
793
+ "column_count": int(len(normalized.columns)),
794
+ "columns": list(normalized.columns),
795
+ "layout_info": layout_info,
796
+ "env": env,
797
+ "outputs": outputs,
798
+ }
799
+
800
+
801
+ def build_parser() -> argparse.ArgumentParser:
802
+ parser = argparse.ArgumentParser(
803
+ description="Check environment, inspect, and normalize local CSV, Excel, and HDF5 files."
804
+ )
805
+ subparsers = parser.add_subparsers(dest="command", required=True)
806
+
807
+ subparsers.add_parser("check-env", help="Check Python and dependency availability.")
808
+
809
+ inspect_parser = subparsers.add_parser("inspect", help="Inspect a local data file.")
810
+ inspect_parser.add_argument("input", help="Path to a local csv/xlsx/h5 file.")
811
+
812
+ convert_parser = subparsers.add_parser("convert", help="Convert a local data file.")
813
+ convert_parser.add_argument("input", help="Path to a local csv/xlsx/h5 file.")
814
+ convert_parser.add_argument("--out", default="out", help="Output directory. Default: out")
815
+ convert_parser.add_argument(
816
+ "--sheet",
817
+ default=None,
818
+ help="Excel sheet name when the workbook has multiple sheets.",
819
+ )
820
+ convert_parser.add_argument(
821
+ "--dataset",
822
+ default=None,
823
+ help="HDF5 dataset path when the file has multiple datasets.",
824
+ )
825
+ convert_parser.add_argument(
826
+ "--format",
827
+ choices=("auto", "parquet", "csv"),
828
+ default="auto",
829
+ help="Output data format. Default: auto",
830
+ )
831
+ convert_parser.add_argument(
832
+ "--no-pose-long",
833
+ action="store_true",
834
+ help="Disable automatic wide-pose to long-form conversion.",
835
+ )
836
+
837
+ return parser
838
+
839
+
840
+ def main(argv: list[str] | None = None) -> int:
841
+ parser = build_parser()
842
+ args = parser.parse_args(argv)
843
+
844
+ try:
845
+ if args.command == "check-env":
846
+ result = check_env()
847
+ elif args.command == "inspect":
848
+ result = inspect_source(Path(args.input))
849
+ else:
850
+ result = convert_source(
851
+ path=Path(args.input),
852
+ out_dir=Path(args.out),
853
+ sheet=args.sheet,
854
+ dataset=args.dataset,
855
+ output_format=args.format,
856
+ pose_long=not args.no_pose_long,
857
+ )
858
+
859
+ print(json.dumps(result, ensure_ascii=False, indent=2))
860
+ return 0
861
+
862
+ except Exception as exc:
863
+ error = {
864
+ "ok": False,
865
+ "command": args.command,
866
+ "error": str(exc),
867
+ "script_version": SCRIPT_VERSION,
868
+ }
869
+ print(json.dumps(error, ensure_ascii=False, indent=2), file=sys.stderr)
870
+ return 1
871
+
872
+
873
+ if __name__ == "__main__":
874
+ raise SystemExit(main())